From d81e86cdbed8905927e162c0efc82580943288f4 Mon Sep 17 00:00:00 2001 From: Joseph Kleinhenz Date: Wed, 17 Jul 2019 16:55:57 -0400 Subject: [PATCH 1/3] complex support --- src/HDF5.jl | 54 ++++++++++++++++++++++++++++++++++++++++----------- test/plain.jl | 48 +++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 91 insertions(+), 11 deletions(-) diff --git a/src/HDF5.jl b/src/HDF5.jl index 44a4abf13..538ac3189 100644 --- a/src/HDF5.jl +++ b/src/HDF5.jl @@ -323,6 +323,13 @@ cset(::Type{ASCIIChar}) = H5T_CSET_ASCII hdf5_type_id(::Type{C}) where {C<:CharType} = H5T_C_S1 +# global configuration for complex support +complex_support = true +complex_field_names = ("r", "i") +enable_complex_support() = global complex_support = true +disable_complex_support() = global complex_support = false +set_complex_field_names(real::String, imag::String) = global complex_field_names = (real, imag) + ## HDF5 uses a plain integer to refer to each file, group, or ## dataset. These are wrapped into special types in order to allow ## method dispatch. @@ -1175,6 +1182,16 @@ datatype(dset::HDF5Attribute) = HDF5Datatype(h5a_get_type(checkvalid(dset).id), datatype(x::HDF5Scalar) = HDF5Datatype(hdf5_type_id(typeof(x)), false) datatype(::Type{T}) where {T<:HDF5Scalar} = HDF5Datatype(hdf5_type_id(T), false) datatype(A::AbstractArray{T}) where {T<:HDF5Scalar} = HDF5Datatype(hdf5_type_id(T), false) +function datatype(::Type{Complex{T}}) where {T<:HDF5Scalar} + complex_support || error("complex support disabled. call HDF5.enable_complex_support() to enable") + dtype = h5t_create(H5T_COMPOUND, 2*sizeof(T)) + h5t_insert(dtype, complex_field_names[1], 0, hdf5_type_id(T)) + h5t_insert(dtype, complex_field_names[2], sizeof(T), hdf5_type_id(T)) + return HDF5Datatype(dtype) +end +datatype(x::Complex{T}) where {T<:HDF5Scalar} = datatype(typeof(x)) +datatype(A::AbstractArray{Complex{T}}) where {T<:HDF5Scalar} = datatype(eltype(A)) + function datatype(str::String) type_id = h5t_copy(hdf5_type_id(typeof(str))) h5t_set_size(type_id, max(sizeof(str), 1)) @@ -1203,7 +1220,8 @@ dataspace(dset::HDF5Dataset) = HDF5Dataspace(h5d_get_space(checkvalid(dset).id)) dataspace(attr::HDF5Attribute) = HDF5Dataspace(h5a_get_space(checkvalid(attr).id)) # Create a dataspace from in-memory types -dataspace(x::T) where {T<:HDF5Scalar} = HDF5Dataspace(h5s_create(H5S_SCALAR)) +dataspace(x::Union{T, Complex{T}}) where {T<:HDF5Scalar} = HDF5Dataspace(h5s_create(H5S_SCALAR)) + function _dataspace(sz::Tuple{Vararg{Int}}, max_dims::Union{Dims, Tuple{}}=()) dims = Vector{Hsize}(undef,length(sz)) any_zero = false @@ -1301,18 +1319,20 @@ function read(obj::DatasetOrAttribute) read(obj, T) end # Read scalars -function read(obj::DatasetOrAttribute, ::Type{T}) where {T<:HDF5Scalar} +function read(obj::DatasetOrAttribute, ::Type{T}) where {T<:Union{HDF5Scalar, Complex{<:HDF5Scalar}}} x = read(obj, Array{T}) x[1] end # Read array of scalars -function read(obj::DatasetOrAttribute, ::Type{Array{T}}) where {T<:HDF5Scalar} +function read(obj::DatasetOrAttribute, ::Type{Array{T}}) where {T<:Union{HDF5Scalar, Complex{<:HDF5Scalar}}} if isnull(obj) return T[] end dims = size(obj) data = Array{T}(undef,dims) - readarray(obj, hdf5_type_id(T), data) + dtype = datatype(data) + readarray(obj, dtype.id, data) + close(dtype) data end # Empty arrays @@ -1700,7 +1720,7 @@ for (privatesym, fsym, ptype) in obj, dtype end # Scalar types - ($fsym)(parent::$ptype, name::String, data::Union{T, AbstractArray{T}}, plists...) where {T<:ScalarOrString} = + ($fsym)(parent::$ptype, name::String, data::Union{T, AbstractArray{T}}, plists...) where {T<:Union{ScalarOrString, Complex{<:HDF5Scalar}}} = ($privatesym)(parent, name, data, plists...) # VLEN types ($fsym)(parent::$ptype, name::String, data::HDF5Vlen{T}, plists...) where {T<:Union{HDF5Scalar,CharType}} = @@ -1723,7 +1743,7 @@ for (privatesym, fsym, ptype, crsym) in end end # Scalar types - ($fsym)(parent::$ptype, name::String, data::Union{T, AbstractArray{T}}, plists...) where {T<:ScalarOrString} = + ($fsym)(parent::$ptype, name::String, data::Union{T, AbstractArray{T}}, plists...) where {T<:Union{ScalarOrString, Complex{<:HDF5Scalar}}} = ($privatesym)(parent, name, data, plists...) # VLEN types ($fsym)(parent::$ptype, name::String, data::HDF5Vlen{T}, plists...) where {T<:Union{HDF5Scalar,CharType}} = @@ -1732,7 +1752,7 @@ for (privatesym, fsym, ptype, crsym) in end # Write to already-created objects # Scalars -function write(obj::DatasetOrAttribute, x::Union{T, Array{T}}) where {T<:ScalarOrString} +function write(obj::DatasetOrAttribute, x::Union{T, Array{T}}) where {T<:Union{ScalarOrString, Complex{<:HDF5Scalar}}} dtype = datatype(x) try writearray(obj, dtype.id, x) @@ -1750,7 +1770,7 @@ function write(obj::DatasetOrAttribute, data::HDF5Vlen{T}) where {T<:Union{HDF5S end end # For plain files and groups, let "write(obj, name, val)" mean "d_write" -write(parent::Union{HDF5File, HDF5Group}, name::String, data::Union{T, AbstractArray{T}}, plists...) where {T<:ScalarOrString} = +write(parent::Union{HDF5File, HDF5Group}, name::String, data::Union{T, AbstractArray{T}}, plists...) where {T<:Union{ScalarOrString, Complex{<:HDF5Scalar}}} = d_write(parent, name, data, plists...) # For datasets, "write(dset, name, val)" means "a_write" write(parent::HDF5Dataset, name::String, data::Union{T, AbstractArray{T}}, plists...) where {T<:ScalarOrString} = a_write(parent, name, data, plists...) @@ -1993,7 +2013,19 @@ function hdf5_to_julia_eltype(objtype) T = HDF5Vlen{hdf5_to_julia_eltype(HDF5Datatype(super_id))} elseif class_id == H5T_COMPOUND N = Int(h5t_get_nmembers(objtype.id)) - T = HDF5Compound{N} + # check if should be interpreted as complex + if complex_support && N == 2 + membernames = ntuple(N) do i + h5t_get_member_name(objtype.id, i-1) + end + membertypes = ntuple(N) do i + h5t_get_member_type(objtype.id, i-1) |> HDF5Datatype |> hdf5_to_julia_eltype + end + iscomplex = membernames == complex_field_names && membertypes[1] == membertypes[2] && membertypes[1] <: HDF5.HDF5Scalar + T = iscomplex ? Complex{membertypes[1]} : HDF5Compound{N} + else + T = HDF5Compound{N} + end elseif class_id == H5T_ARRAY T = hdf5array(objtype) else @@ -2007,7 +2039,7 @@ end # These supply default values where possible # See also the "special handling" section below h5a_write(attr_id::Hid, mem_type_id::Hid, buf::String) = h5a_write(attr_id, mem_type_id, unsafe_wrap(Vector{UInt8}, pointer(buf), ncodeunits(buf))) -function h5a_write(attr_id::Hid, mem_type_id::Hid, x::T) where {T<:HDF5Scalar} +function h5a_write(attr_id::Hid, mem_type_id::Hid, x::T) where {T<:Union{HDF5Scalar, Complex{<:HDF5Scalar}}} tmp = Ref{T}(x) h5a_write(attr_id, mem_type_id, tmp) end @@ -2034,7 +2066,7 @@ end function h5d_write(dataset_id::Hid, memtype_id::Hid, str::String, xfer::Hid=H5P_DEFAULT) ccall((:H5Dwrite, libhdf5), Herr, (Hid, Hid, Hid, Hid, Hid, Cstring), dataset_id, memtype_id, H5S_ALL, H5S_ALL, xfer, str) end -function h5d_write(dataset_id::Hid, memtype_id::Hid, x::T, xfer::Hid=H5P_DEFAULT) where {T<:HDF5Scalar} +function h5d_write(dataset_id::Hid, memtype_id::Hid, x::T, xfer::Hid=H5P_DEFAULT) where {T<:Union{HDF5Scalar, Complex{<:HDF5Scalar}}} tmp = Ref{T}(x) h5d_write(dataset_id, memtype_id, H5S_ALL, H5S_ALL, xfer, tmp) end diff --git a/test/plain.jl b/test/plain.jl index 16242cd80..d53f13290 100644 --- a/test/plain.jl +++ b/test/plain.jl @@ -402,6 +402,54 @@ end end # testset plain +@testset "complex" begin + HDF5.enable_complex_support() + + fn = tempname() + f = h5open(fn, "w") + + f["ComplexF64"] = 1.0 + 2.0im + attrs(f["ComplexF64"])["ComplexInt64"] = 1im + + Acmplx = rand(ComplexF64, 3, 5) + write(f, "Acmplx64", convert(Matrix{ComplexF64}, Acmplx)) + write(f, "Acmplx32", convert(Matrix{ComplexF32}, Acmplx)) + + HDF5.disable_complex_support() + @test_throws ErrorException f["_ComplexF64"] = 1.0 + 2.0im + @test_throws ErrorException write(f, "_Acmplx64", convert(Matrix{ComplexF64}, Acmplx)) + @test_throws ErrorException write(f, "_Acmplx32", convert(Matrix{ComplexF32}, Acmplx)) + HDF5.enable_complex_support() + + close(f) + + fr = h5open(fn) + z = read(fr, "ComplexF64") + @test z == 1.0 + 2.0im && isa(z, ComplexF64) + z_attrs = attrs(fr["ComplexF64"]) + @test read(z_attrs["ComplexInt64"]) == 1im + + Acmplx32 = read(fr, "Acmplx32") + @test convert(Matrix{ComplexF32}, Acmplx) == Acmplx32 + @test eltype(Acmplx32) == ComplexF32 + Acmplx64 = read(fr, "Acmplx64") + @test convert(Matrix{ComplexF64}, Acmplx) == Acmplx64 + @test eltype(Acmplx64) == ComplexF64 + + HDF5.disable_complex_support() + z = read(fr, "ComplexF64") + @test isa(z, HDF5.HDF5Compound{2}) + + Acmplx32 = read(fr, "Acmplx32") + @test eltype(Acmplx32) == HDF5.HDF5Compound{2} + Acmplx64 = read(fr, "Acmplx64") + @test eltype(Acmplx64) == HDF5.HDF5Compound{2} + + close(fr) + + HDF5.enable_complex_support() +end + # test strings with null and undefined references @testset "undefined and null" begin fn = tempname() From 6d090c909e042957a76186a3e0134a6e5ae37418 Mon Sep 17 00:00:00 2001 From: Joseph Kleinhenz Date: Wed, 21 Aug 2019 12:42:48 -0400 Subject: [PATCH 2/3] update global constants to use refs --- src/HDF5.jl | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/src/HDF5.jl b/src/HDF5.jl index 538ac3189..0eb660e7f 100644 --- a/src/HDF5.jl +++ b/src/HDF5.jl @@ -324,11 +324,11 @@ cset(::Type{ASCIIChar}) = H5T_CSET_ASCII hdf5_type_id(::Type{C}) where {C<:CharType} = H5T_C_S1 # global configuration for complex support -complex_support = true -complex_field_names = ("r", "i") -enable_complex_support() = global complex_support = true -disable_complex_support() = global complex_support = false -set_complex_field_names(real::String, imag::String) = global complex_field_names = (real, imag) +const COMPLEX_SUPPORT = Ref(true) +const COMPLEX_FIELD_NAMES = Ref(("r", "i")) +enable_complex_support() = COMPLEX_SUPPORT[] = true +disable_complex_support() = COMPLEX_SUPPORT[] = false +set_complex_field_names(real::AbstractString, imag::AbstractString) = COMPLEX_FIELD_NAMES[] = ((real, imag)) ## HDF5 uses a plain integer to refer to each file, group, or ## dataset. These are wrapped into special types in order to allow @@ -1183,10 +1183,10 @@ datatype(x::HDF5Scalar) = HDF5Datatype(hdf5_type_id(typeof(x)), false) datatype(::Type{T}) where {T<:HDF5Scalar} = HDF5Datatype(hdf5_type_id(T), false) datatype(A::AbstractArray{T}) where {T<:HDF5Scalar} = HDF5Datatype(hdf5_type_id(T), false) function datatype(::Type{Complex{T}}) where {T<:HDF5Scalar} - complex_support || error("complex support disabled. call HDF5.enable_complex_support() to enable") + COMPLEX_SUPPORT[] || error("complex support disabled. call HDF5.enable_complex_support() to enable") dtype = h5t_create(H5T_COMPOUND, 2*sizeof(T)) - h5t_insert(dtype, complex_field_names[1], 0, hdf5_type_id(T)) - h5t_insert(dtype, complex_field_names[2], sizeof(T), hdf5_type_id(T)) + h5t_insert(dtype, COMPLEX_FIELD_NAMES[][1], 0, hdf5_type_id(T)) + h5t_insert(dtype, COMPLEX_FIELD_NAMES[][2], sizeof(T), hdf5_type_id(T)) return HDF5Datatype(dtype) end datatype(x::Complex{T}) where {T<:HDF5Scalar} = datatype(typeof(x)) @@ -2014,14 +2014,14 @@ function hdf5_to_julia_eltype(objtype) elseif class_id == H5T_COMPOUND N = Int(h5t_get_nmembers(objtype.id)) # check if should be interpreted as complex - if complex_support && N == 2 + if COMPLEX_SUPPORT[] && N == 2 membernames = ntuple(N) do i h5t_get_member_name(objtype.id, i-1) end membertypes = ntuple(N) do i h5t_get_member_type(objtype.id, i-1) |> HDF5Datatype |> hdf5_to_julia_eltype end - iscomplex = membernames == complex_field_names && membertypes[1] == membertypes[2] && membertypes[1] <: HDF5.HDF5Scalar + iscomplex = membernames == COMPLEX_FIELD_NAMES[] && membertypes[1] == membertypes[2] && membertypes[1] <: HDF5.HDF5Scalar T = iscomplex ? Complex{membertypes[1]} : HDF5Compound{N} else T = HDF5Compound{N} From 49f769574c23f69637803c1aa3137609c1da9510 Mon Sep 17 00:00:00 2001 From: Joseph Kleinhenz Date: Wed, 21 Aug 2019 15:13:48 -0400 Subject: [PATCH 3/3] cleanup + documentation --- doc/hdf5.md | 10 +++++++--- src/HDF5.jl | 4 ++-- 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/doc/hdf5.md b/doc/hdf5.md index 869a2aae1..3755eafe2 100644 --- a/doc/hdf5.md +++ b/doc/hdf5.md @@ -102,7 +102,7 @@ write(g, "mydataset", rand(3,5)) Passing parameters ------------------------ -It is often required to pass parameters to specific routines, which are collected +It is often required to pass parameters to specific routines, which are collected in so-called property lists in HDF5. There are different property lists for different tasks, e.g. for the access/creation of files, datasets, groups. In this high level framework multiple parameters can be simply applied by @@ -114,7 +114,7 @@ g["A", "chunk", (5,5)] = A # add chunks B=h5read(fn,"mygroup/B", # two parameters "fapl_mpio", (ccomm,cinfo), # if parameter requires multiple args use tuples - "dxpl_mpio", HDF5.H5FD_MPIO_COLLECTIVE ) + "dxpl_mpio", HDF5.H5FD_MPIO_COLLECTIVE ) ``` This will automatically create the correct property lists, add the properties, @@ -198,7 +198,11 @@ This is in contrast to standard HDF5 datasets, where closing the file prevents f Supported data types -------------------- -PlainHDF5File knows how to store values of the following types: signed and unsigned integers of 8, 16, 32, and 64 bits, `Float32` and `Float64`; `Array`s of these numeric types; `ASCIIString` and `UTF8String`; and `Array`s of these two string types. `Array`s of strings are supported using HDF5's variable-length-strings facility. +`HDF5.jl` knows how to store values of the following types: signed and unsigned integers of 8, 16, 32, and 64 bits, `Float32`, `Float64`; `Complex` versions of these numeric types; `Array`s of these numeric types (including complex versions); `ASCIIString` and `UTF8String`; and `Array`s of these two string types. +`Array`s of strings are supported using HDF5's variable-length-strings facility. +By default `Complex` numbers are stored as compound types with `r` and `i` fields following the `h5py` convention. +When reading data, compound types with matching field names will be loaded as the corresponding `Complex` julia type. +These field names are configurable with the `HDF5.set_complex_field_names(real::AbstractString, imag::AbstractString)` function and complex support can be completely enabled/disabled with `HDF5.enable/disable_complex_support()`. This module also supports HDF5's VLEN, OPAQUE, and REFERENCE types, which can be used to encode more complex types. In general, you need to specify how you want to combine these more advanced facilities to represent more complex data types. For many of the data types in Julia, the JLD module implements support. You can likewise define your own file format if, for example, you need to interact with some external program that has explicit formatting requirements. diff --git a/src/HDF5.jl b/src/HDF5.jl index 0eb660e7f..03b32bd7f 100644 --- a/src/HDF5.jl +++ b/src/HDF5.jl @@ -2019,9 +2019,9 @@ function hdf5_to_julia_eltype(objtype) h5t_get_member_name(objtype.id, i-1) end membertypes = ntuple(N) do i - h5t_get_member_type(objtype.id, i-1) |> HDF5Datatype |> hdf5_to_julia_eltype + hdf5_to_julia_eltype(HDF5Datatype(h5t_get_member_type(objtype.id, i-1))) end - iscomplex = membernames == COMPLEX_FIELD_NAMES[] && membertypes[1] == membertypes[2] && membertypes[1] <: HDF5.HDF5Scalar + iscomplex = (membernames == COMPLEX_FIELD_NAMES[]) && (membertypes[1] == membertypes[2]) && (membertypes[1] <: HDF5.HDF5Scalar) T = iscomplex ? Complex{membertypes[1]} : HDF5Compound{N} else T = HDF5Compound{N}