Skip to content

Commit 7fb88d3

Browse files
authored
Merge pull request #362 from astro-informatics/cg_hdf5_interface
HDF5 interface and support for stochastic reading
2 parents 2baba46 + 6b513db commit 7fb88d3

21 files changed

+583
-67
lines changed

.github/workflows/ci.yml

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -124,6 +124,15 @@ jobs:
124124
# Fix bug in FFT3 (cf. https://github.com/FFTW/fftw3/issues/332)
125125
sed -i -e 's/^.*FFTW3LibraryDepends.cmake.*$//1' ${{github.workspace}}/local/lib*/cmake/*/FFTW3Config.cmake
126126
127+
- name: Install HDF5
128+
run: |
129+
wget https://support.hdfgroup.org/ftp/HDF5/releases/hdf5-1.14/hdf5-1.14.3/src/hdf5-1.14.3.tar.gz -O- | tar --no-same-owner -xz;
130+
cd hdf5-1.14.3/
131+
mkdir build && cd build
132+
#CC=`which mpicc` CXX=`which mpicxx` ./configure --prefix=${{github.workspace}}/local --enable-parallel --enable-shared --enable-cxx
133+
CC=`which mpicc` CXX=`which mpicxx` cmake .. -DBUILD_TESTING=OFF -DCMAKE_INSTALL_PREFIX=${{github.workspace}}/local -DHDF5_ENABLE_PARALLEL:BOOL=ON -DALLOW_UNSUPPORTED:BOOL=ON -DHDF5_BUILD_FORTRAN:BOOL=OFF -DHDF5_BUILD_CPP_LIB:BOOL=ON -DHDF5_ENABLE_SZIP_SUPPORT:BOOL=OFF
134+
make -j2 install
135+
127136
- name: Checkout SOPT
128137
uses: actions/checkout@v4
129138
with:
@@ -214,9 +223,10 @@ jobs:
214223
# Build your program with the given configuration
215224
run: |
216225
export CMAKE_PREFIX_PATH=${{github.workspace}}/local:$CMAKE_PREFIX_PATH
226+
#export PATH=${{github.workspace}}/local:$PATH
217227
mkdir -p ${{github.workspace}}/build
218228
cd ${{github.workspace}}/build
219-
cmake .. --fresh -DCMAKE_INSTALL_PREFIX=${{github.workspace}}/local -Donnxrt=ON -Ddocasa=OFF -Ddompi=${{matrix.mpi}} -Dopenmp=${{matrix.omp}} -Dtests=ON
229+
cmake .. --fresh -DCMAKE_INSTALL_PREFIX=${{github.workspace}}/local -Donnxrt=ON -Dhdf5=ON -Ddocasa=OFF -Ddompi=${{matrix.mpi}} -Dopenmp=${{matrix.omp}} -Dtests=ON
220230
make -j$(nproc --ignore 1) install
221231
222232
- name: Test
@@ -278,7 +288,7 @@ jobs:
278288
export CMAKE_PREFIX_PATH=${{github.workspace}}/local:$CMAKE_PREFIX_PATH
279289
mkdir -p ${{github.workspace}}/build
280290
cd ${{github.workspace}}/build
281-
cmake .. --fresh -DCMAKE_INSTALL_PREFIX=${{github.workspace}}/local -Donnxrt=ON -Ddompi=OFF -Dopenmp=OFF -Ddocs=ON
291+
cmake .. --fresh -DCMAKE_INSTALL_PREFIX=${{github.workspace}}/local -Donnxrt=ON -Dhdf5=ON -Ddompi=OFF -Dopenmp=OFF -Ddocs=ON
282292
make -j$(nproc --ignore 1) install
283293
284294
- name: Deploy to GH pages

CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ option(docasa "Enable CASA" off)
1616
option(docs "Build documentation" off)
1717
option(coverage "Build coverage" off)
1818
option(onnxrt "Build with ONNXruntime interface" off)
19+
option(hdf5 "Build with HDF5 interface" off)
1920

2021
if(NOT CMAKE_BUILD_TYPE)
2122
message(STATUS "Setting build type to 'Release' as none was specified.")

cmake_files/LookUpHighFive.cmake

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
# Installs BlueBrain/HighFive into build directory
2+
#
3+
# - GIT_REPOSITORY: defaults to origin BlueBrain/HighFive repo on github
4+
# - GIT_TAG: defaults to master
5+
include(ExternalProject)
6+
if(HighFive_ARGUMENTS)
7+
cmake_parse_arguments(HighFive "" "GIT_REPOSITORY;GIT_TAG" ""
8+
${HighFive_ARGUMENTS})
9+
endif()
10+
if(NOT HighFive_GIT_REPOSITORY)
11+
set(HighFive_GIT_REPOSITORY https://github.com/BlueBrain/HighFive)
12+
endif()
13+
if(NOT HighFive_GIT_TAG)
14+
set(HighFive_GIT_TAG master)
15+
endif()
16+
17+
set(HighFive_DIR "${CMAKE_INSTALL_PREFIX}/external")
18+
ExternalProject_Add(
19+
Lookup-HighFive
20+
GIT_REPOSITORY ${HighFive_GIT_REPOSITORY}
21+
GIT_TAG ${HighFive_GIT_TAG}
22+
PREFIX "${CMAKE_BINARY_DIR}/external"
23+
#INSTALL_DIR ${EXTERNAL_ROOT}
24+
CMAKE_ARGS
25+
-DCMAKE_INSTALL_PREFIX=${HighFive_DIR}
26+
-DCMAKE_INSTALL_LIBDIR=${CMAKE_SHARED_LIBRARY_PREFIX}
27+
-DHIGHFIVE_EXAMPLES=OFF
28+
-DHIGHFIVE_USE_BOOST=OFF
29+
-DHIGHFIVE_UNIT_TESTS=OFF
30+
# Wrap download, configure and build steps in a script to log output
31+
UPDATE_COMMAND ""
32+
LOG_DOWNLOAD ON
33+
LOG_CONFIGURE ON
34+
LOG_BUILD ON
35+
LOG_INSTALL ON
36+
)
37+
set(HighFive_INCLUDE_DIR "${HighFive_DIR}/include")
38+
#set(HighFive_LIBRARY_DIR "${HighFive_DIR}/${CMAKE_SHARED_LIBRARY_PREFIX}")
39+
#set(HighFive_LIBRARIES "HighFive")
40+

cmake_files/dependencies.cmake

Lines changed: 23 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -94,7 +94,11 @@ find_package(fftw3 NAMES FFTW3 REQUIRED)
9494
set(PURIFY_MPI FALSE)
9595
if(dompi)
9696
find_package(MPI REQUIRED)
97-
set(PURIFY_MPI TRUE)
97+
if (${sopt_HAS_MPI})
98+
set(PURIFY_MPI TRUE)
99+
else()
100+
message(FATAL_ERROR "SOPT built without MPI support")
101+
endif()
98102
endif()
99103
find_package(TIFF REQUIRED)
100104

@@ -122,5 +126,23 @@ if(docasa)
122126
set(PURIFY_CASACORE TRUE)
123127
endif()
124128

129+
set(PURIFY_H5 FALSE)
130+
if(hdf5)
131+
find_package(HDF5 COMPONENTS CXX REQUIRED)
132+
message(STATUS "Found HDF5 include dir: ${HDF5_INCLUDE_DIR}")
133+
message(STATUS "Found HDF5 CXX library: ${HDF5_CXX_LIBRARIES}")
134+
message(STATUS "HDF5 parallel: ${HDF5_IS_PARALLEL}")
135+
if (PURIFY_MPI AND NOT ${HDF5_IS_PARALLEL})
136+
message(FATAL_ERROR "HDF5 built without MPI support")
137+
endif()
138+
find_package(HighFive QUIET)
139+
if(NOT HighFive_FOUND)
140+
message(STATUS "HighFive not found. Attempt to install...")
141+
include(LookUpHighFive)
142+
set(PURIFY_HIGHFIVE_LOOKUP TRUE)
143+
endif()
144+
set(PURIFY_H5 TRUE)
145+
endif()
146+
125147
# Add script to execute to make sure libraries in the build tree can be found
126148
add_to_ld_path("${EXTERNAL_ROOT}/lib")

cpp/CMakeLists.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,8 @@ add_include_dir(
2323
${yaml-cpp_INCLUDE_DIR}
2424
${Cubature_INCLUDE_DIR}
2525
${CImg_INCLUDE_DIR}
26+
${HDF5_INCLUDE_DIR}
27+
${HighFive_INCLUDE_DIR}
2628
)
2729

2830

cpp/purify/CMakeLists.txt

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,8 +16,8 @@ endfunction()
1616
configure_file(config.in.h "${PROJECT_BINARY_DIR}/include/purify/config.h")
1717

1818
set(HEADERS
19-
logging.h kernels.h pfitsio.h types.h
20-
IndexMapping.h utilities.h operators.h wproj_utilities.h
19+
logging.h kernels.h pfitsio.h types.h
20+
IndexMapping.h utilities.h operators.h wproj_utilities.h
2121
cimg.h uvfits.h convolution.h measurement_operator_factory.h wavelet_operator_factory.h distribute.h
2222
update_factory.h
2323
convergence_factory.h
@@ -42,6 +42,10 @@ if(PURIFY_CASACORE)
4242
list(APPEND HEADERS casacore.h)
4343
endif()
4444

45+
if(PURIFY_H5)
46+
list(APPEND HEADERS h5reader.h)
47+
endif()
48+
4549
if(PURIFY_MPI)
4650
list(APPEND HEADERS mpi_utilities.h distribute.h DistributeSparseVector.h
4751
random_update_factory.h
@@ -70,6 +74,11 @@ if(PURIFY_ARRAYFIRE)
7074
target_link_libraries(libpurify ${ArrayFire_LIBRARIES})
7175
endif()
7276

77+
if(PURIFY_H5)
78+
target_link_libraries(libpurify ${HDF5_CXX_LIBRARIES})
79+
target_include_directories(libpurify SYSTEM PUBLIC ${HDF5_INCLUDE_DIR} ${HighFive_INCLUDE_DIR})
80+
endif()
81+
7382
target_include_directories(libpurify PUBLIC
7483
$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/..>
7584
$<BUILD_INTERFACE:${PROJECT_BINARY_DIR}/include>
@@ -111,6 +120,10 @@ if(PURIFY_CASACORE_LOOKUP)
111120
add_dependencies(libpurify Lookup-CasaCore)
112121
endif()
113122

123+
if(PURIFY_HIGHFIVE_LOOKUP)
124+
add_dependencies(libpurify Lookup-HighFive)
125+
endif()
126+
114127
install(FILES ${HEADERS} DESTINATION include/purify)
115128
install(TARGETS libpurify
116129
EXPORT PurifyTargets

cpp/purify/config.in.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,9 @@
2828
//! Whether PURIFY is using (and SOPT was built with) onnxrt support
2929
#cmakedefine PURIFY_ONNXRT
3030

31+
//! Whether PURIFY is using HDF5 support
32+
#cmakedefine PURIFY_H5
33+
3134
#include <cstdint>
3235
#include <string>
3336
#include <tuple>

cpp/purify/distribute.cc

Lines changed: 25 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,15 @@
11
#include "purify/distribute.h"
22
#include "purify/wide_field_utilities.h"
33

4-
namespace purify {
5-
namespace distribute {
4+
namespace purify::distribute {
65

76
std::vector<t_int> distribute_measurements(Vector<t_real> const &u, Vector<t_real> const &v,
87
Vector<t_real> const &w, t_int const number_of_nodes,
98
distribute::plan const distribution_plan,
109
t_int const &grid_size) {
1110
// distrubte visibilities from a measurement
1211
Vector<t_int> index = Vector<t_int>::LinSpaced(u.size(), 0, u.size());
13-
t_int const patition_size =
12+
t_int const partition_size =
1413
std::ceil(static_cast<t_real>(u.size()) / static_cast<t_real>(number_of_nodes));
1514
// return a vector of vectors of indicies for each node
1615
std::string plan_name = "";
@@ -41,23 +40,25 @@ std::vector<t_int> distribute_measurements(Vector<t_real> const &u, Vector<t_rea
4140
}
4241
PURIFY_DEBUG(
4342
"Using {} to make {} partitions from {} visibilities, with {} visibilities per a node.",
44-
plan_name, number_of_nodes, index.size(), patition_size);
45-
std::vector<t_int> patitions(u.size());
46-
// creating patitions
47-
for (t_int i = 0; i < index.size(); i++) {
48-
if (std::floor(static_cast<t_real>(i) / static_cast<t_real>(patition_size)) >
49-
number_of_nodes - 1) {
50-
PURIFY_ERROR("Error: Probably a bug in distribution plan.");
51-
throw std::runtime_error("Distributing data into too many nodes");
52-
}
53-
patitions[index(i)] = std::floor(static_cast<t_real>(i) / static_cast<t_real>(patition_size));
43+
plan_name, number_of_nodes, index.size(), partition_size);
44+
std::vector<t_int> partitions(u.size());
45+
if (std::floor(static_cast<t_real>(index.size() - 1) / static_cast<t_real>(partition_size)) >
46+
number_of_nodes - 1) {
47+
PURIFY_ERROR("Error: Probably a bug in distribution plan.");
48+
throw std::runtime_error("Distributing data into too many nodes");
49+
}
50+
// creating partitions
51+
for (t_int i = 0; i < index.size(); ++i) {
52+
partitions[index(i)] = std::floor(static_cast<t_real>(i) / static_cast<t_real>(partition_size));
5453
}
55-
return patitions;
54+
return partitions;
5655
}
56+
5757
Vector<t_int> w_distribution(const Vector<t_real> &u, const Vector<t_real> &v,
5858
Vector<t_real> const &w) {
5959
return w_distribution(w);
6060
}
61+
6162
Vector<t_int> w_distribution(Vector<t_real> const &w) {
6263
// sort visibilities by w from w_max to w_min
6364
Vector<t_int> index = Vector<t_int>::LinSpaced(w.size(), 0, w.size());
@@ -98,6 +99,7 @@ Vector<t_int> equal_distribution(Vector<t_real> const &u, Vector<t_real> const &
9899
});
99100
return index;
100101
}
102+
101103
std::tuple<std::vector<t_int>, std::vector<t_real>> kmeans_algo(
102104
const Vector<t_real> &w, const t_int number_of_nodes, const t_int iters,
103105
const std::function<t_real(t_real)> &cost, const t_real rel_diff) {
@@ -147,6 +149,7 @@ std::tuple<std::vector<t_int>, std::vector<t_real>> kmeans_algo(
147149

148150
return std::make_tuple(w_node, w_centre);
149151
}
152+
150153
#ifdef PURIFY_MPI
151154
std::tuple<std::vector<t_int>, std::vector<t_real>> kmeans_algo(
152155
const Vector<t_real> &w, const t_int number_of_nodes, const t_int iters,
@@ -164,7 +167,7 @@ std::tuple<std::vector<t_int>, std::vector<t_real>> kmeans_algo(
164167
for (int i = 0; i < w_centre.size(); i++)
165168
w_centre[i] =
166169
(static_cast<t_real>(i) * (wmax - wmin) / static_cast<t_real>(number_of_nodes) + wmin);
167-
// lopp through even nodes to reduces w-term
170+
// loop through even nodes to reduce w-term
168171
for (int n = 0; n < iters; n++) {
169172
if (comm.is_root()) PURIFY_DEBUG("clustering iteration {}", n);
170173
for (int i = 0; i < w.size(); i++) {
@@ -216,8 +219,9 @@ std::vector<t_int> w_support(Vector<t_real> const &w, const std::vector<t_int> &
216219
min_support, max_support);
217220
const t_real coeff_average =
218221
comm.all_sum_all<t_real>(coeff_total) / static_cast<t_real>(comm.size());
219-
if (comm.is_root())
222+
if (comm.is_root()) {
220223
PURIFY_DEBUG("Each node should have on average {} coefficients.", coeff_average);
224+
}
221225
t_real coeff_sum = 0;
222226
t_int group = 0;
223227
std::vector<t_int> groups(w.size(), comm.rank());
@@ -242,22 +246,23 @@ std::vector<t_int> w_support(Vector<t_real> const &w, const std::vector<t_int> &
242246
groups[i] = group;
243247
}
244248
}
245-
if (group > comm.size() - 1)
249+
if (group > comm.size() - 1) {
246250
throw std::runtime_error(
247251
"Error distributing visibilites to even computational load for wide field imaging. Group "
248252
"number out of bounds.");
253+
}
249254
coeff_sum = comm.broadcast(coeff_sum, rank);
250255
group = comm.broadcast(group, rank);
251256

252-
if (total != coeff_total and comm.rank() == rank)
257+
if (total != coeff_total and comm.rank() == rank) {
253258
throw std::runtime_error(
254259
"Total number of coefficients calculated is not the same, loop might be broken. " +
255260
std::to_string(total) + " != " + std::to_string(coeff_total));
261+
}
256262
}
257263
if (comm.is_root()) PURIFY_DEBUG("{} node should have {} coefficients.", group, coeff_sum);
258264
return groups;
259265
}
260266
#endif
261267

262-
} // namespace distribute
263-
} // namespace purify
268+
} // namespace purify::distribute

0 commit comments

Comments
 (0)