diff --git a/CMakeLists.txt b/CMakeLists.txt index dae5d5c85..047da06f6 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -63,6 +63,5 @@ endif() add_subdirectory(cpp) - # Exports Purify so other packages can access it include(export_purify) diff --git a/cmake_files/dependencies.cmake b/cmake_files/dependencies.cmake index 2d8c66a56..7df76fa9c 100644 --- a/cmake_files/dependencies.cmake +++ b/cmake_files/dependencies.cmake @@ -65,10 +65,6 @@ if(tests) # Adds ctest include(AddCatchTest) endif() -if(examples) - find_package(TIFF REQUIRED) -endif() - if(tests OR examples) file(COPY data DESTINATION .) endif() diff --git a/cmake_files/export_purify.cmake b/cmake_files/export_purify.cmake index 1b1c3c713..2abfc0bca 100644 --- a/cmake_files/export_purify.cmake +++ b/cmake_files/export_purify.cmake @@ -38,3 +38,4 @@ install(FILES ) install(EXPORT PurifyTargets DESTINATION share/cmake/purify COMPONENT dev) +install(DIRECTORY "${PROJECT_SOURCE_DIR}/data" DESTINATION "${CMAKE_INSTALL_PREFIX}") diff --git a/cpp/benchmarks/CMakeLists.txt b/cpp/benchmarks/CMakeLists.txt index d457b2271..6901524f5 100644 --- a/cpp/benchmarks/CMakeLists.txt +++ b/cpp/benchmarks/CMakeLists.txt @@ -25,8 +25,8 @@ if(dompi) add_executable(mpi_benchmark_MO_wproj main.cc utilities.cc measurement_operator_wproj.cc) target_link_libraries(mpi_benchmark_MO_wproj ${MPI_LIBRARIES} benchmark libpurify) #target_include_directories(mpi_benchmark_MO_wproj PUBLIC "${PROJECT_SOURCE_DIR}/cpp" "${CMAKE_CURRENT_BINARY_DIR}/include") - add_executable(mpi_benchmark_PADMM main.cc utilities.cc padmm_mpi.cc) - target_link_libraries(mpi_benchmark_PADMM ${MPI_LIBRARIES} benchmark libpurify) + add_executable(mpi_benchmark_algorithms main.cc utilities.cc algorithms_mpi.cc) + target_link_libraries(mpi_benchmark_algorithms ${MPI_LIBRARIES} benchmark libpurify) #target_include_directories(mpi_benchmark_PADMM PUBLIC "${PROJECT_SOURCE_DIR}/cpp" "${CMAKE_CURRENT_BINARY_DIR}/include") add_executable(mpi_benchmark_WLO main.cc utilities.cc wavelet_operator_mpi.cc) target_link_libraries(mpi_benchmark_WLO ${MPI_LIBRARIES} benchmark libpurify) diff --git a/cpp/benchmarks/algorithms.cc b/cpp/benchmarks/algorithms.cc new file mode 100644 index 000000000..4c20ff445 --- /dev/null +++ b/cpp/benchmarks/algorithms.cc @@ -0,0 +1,156 @@ +#include "purify/config.h" +#include "purify/types.h" +#include +#include +#include "benchmarks/utilities.h" +#include "purify/algorithm_factory.h" +#include "purify/directories.h" +#include "purify/measurement_operator_factory.h" +#include "purify/operators.h" +#include "purify/utilities.h" +#include "purify/wavelet_operator_factory.h" +#include +#include +#include +#include +#include + +using namespace purify; + +class AlgoFixture : public ::benchmark::Fixture { + public: + void SetUp(const ::benchmark::State &state) { + // Reading image from file and update related quantities + bool newImage = b_utilities::updateImage(state.range(0), m_image, m_imsizex, m_imsizey); + + // Generating random uv(w) coverage + bool newMeasurements = + b_utilities::updateMeasurements(state.range(1), m_uv_data, m_epsilon, newImage, m_image); + + bool newKernel = m_kernel != state.range(2); + + m_kernel = state.range(2); + // creating the measurement operator + const t_real FoV = 1; // deg + const t_real cellsize = FoV / m_imsizex * 60. * 60.; + const bool w_term = false; + m_measurements_transform = factory::measurement_operator_factory>( + factory::distributed_measurement_operator::serial, m_uv_data, m_imsizey, m_imsizex, + cellsize, cellsize, 2, kernels::kernel::kb, m_kernel, m_kernel, w_term); + + t_real const m_sigma = 0.016820222945913496 * std::sqrt(2); // see test_parameters file + } + + void TearDown(const ::benchmark::State &state) {} + + t_real m_epsilon; + t_uint m_counter; + t_real m_sigma; + std::vector> const m_sara{ + std::make_tuple("Dirac", 3u), std::make_tuple("DB1", 3u), std::make_tuple("DB2", 3u), + std::make_tuple("DB3", 3u), std::make_tuple("DB4", 3u), std::make_tuple("DB5", 3u), + std::make_tuple("DB6", 3u), std::make_tuple("DB7", 3u), std::make_tuple("DB8", 3u)}; + + Image m_image; + t_uint m_imsizex; + t_uint m_imsizey; + + utilities::vis_params m_uv_data; + + t_uint m_kernel; + std::shared_ptr> const> m_measurements_transform; + std::shared_ptr> m_padmm; + std::shared_ptr> m_fb; +}; + +BENCHMARK_DEFINE_F(AlgoFixture, Padmm)(benchmark::State &state) { + // Benchmark the application of the algorithm + auto const wavelets = factory::wavelet_operator_factory>( + factory::distributed_wavelet_operator::serial, m_sara, m_imsizey, m_imsizex); + + m_padmm = factory::padmm_factory>( + factory::algo_distribution::serial, m_measurements_transform, wavelets, m_uv_data, m_sigma, + m_imsizey, m_imsizex, m_sara.size(), state.range(3) + 1, true, true, false, 1e-3, 1e-2, 50, + 1.0, 1.0); + + while (state.KeepRunning()) { + auto start = std::chrono::high_resolution_clock::now(); + (*m_padmm)(); + auto end = std::chrono::high_resolution_clock::now(); + state.SetIterationTime(b_utilities::duration(start, end)); + } +} + +BENCHMARK_DEFINE_F(AlgoFixture, ForwardBackward)(benchmark::State &state) { + // Benchmark the application of the algorithm + auto const wavelets = factory::wavelet_operator_factory>( + factory::distributed_wavelet_operator::serial, m_sara, m_imsizey, m_imsizex); + + t_real const beta = m_sigma * m_sigma; + t_real const gamma = 0.0001; + + m_fb = factory::fb_factory>( + factory::algo_distribution::serial, m_measurements_transform, wavelets, m_uv_data, m_sigma, + beta, gamma, m_imsizey, m_imsizex, m_sara.size(), state.range(3) + 1, true, true, false, 1e-3, + 1e-2, 50, 1.0); + + while (state.KeepRunning()) { + auto start = std::chrono::high_resolution_clock::now(); + (*m_fb)(); + auto end = std::chrono::high_resolution_clock::now(); + state.SetIterationTime(b_utilities::duration(start, end)); + } +} + +#ifdef PURIFY_ONNXRT +BENCHMARK_DEFINE_F(AlgoFixture, ForwardBackwardOnnx)(benchmark::State &state) { + // Benchmark the application of the algorithm + auto const wavelets = factory::wavelet_operator_factory>( + factory::distributed_wavelet_operator::serial, m_sara, m_imsizey, m_imsizex); + + t_real const beta = m_sigma * m_sigma; + t_real const gamma = 0.0001; + std::string tf_model_path = purify::models_directory() + "/snr_15_model_dynamic.onnx"; + + m_fb = factory::fb_factory>( + factory::algo_distribution::serial, m_measurements_transform, wavelets, m_uv_data, m_sigma, + beta, gamma, m_imsizey, m_imsizex, m_sara.size(), state.range(3) + 1, true, true, false, 1e-3, + 1e-2, 50, 1.0, tf_model_path, factory::g_proximal_type::TFGProximal); + + while (state.KeepRunning()) { + auto start = std::chrono::high_resolution_clock::now(); + (*m_fb)(); + auto end = std::chrono::high_resolution_clock::now(); + state.SetIterationTime(b_utilities::duration(start, end)); + } +} + +BENCHMARK_REGISTER_F(AlgoFixture, ForwardBackwardOnnx) + //->Apply(b_utilities::Arguments) + ->Args({128, 10000, 4, 10}) + ->UseManualTime() + ->MinTime(10.0) + ->MinWarmUpTime(5.0) + ->Repetitions(3) //->ReportAggregatesOnly(true) + ->Unit(benchmark::kMillisecond); +#endif + +BENCHMARK_REGISTER_F(AlgoFixture, Padmm) + //->Apply(b_utilities::Arguments) + ->Args({128, 10000, 4, 10}) + ->UseManualTime() + ->MinTime(10.0) + ->MinWarmUpTime(5.0) + ->Repetitions(3) //->ReportAggregatesOnly(true) + ->Unit(benchmark::kMillisecond); + +BENCHMARK_REGISTER_F(AlgoFixture, ForwardBackward) + //->Apply(b_utilities::Arguments) + ->Args({128, 10000, 4, 10}) + ->UseManualTime() + ->MinTime(10.0) + ->MinWarmUpTime(5.0) + ->Repetitions(3) //->ReportAggregatesOnly(true) + ->Unit(benchmark::kMillisecond); + +BENCHMARK_MAIN(); diff --git a/cpp/benchmarks/algorithms_mpi.cc b/cpp/benchmarks/algorithms_mpi.cc new file mode 100644 index 000000000..cf97b72d5 --- /dev/null +++ b/cpp/benchmarks/algorithms_mpi.cc @@ -0,0 +1,268 @@ +#include "purify/types.h" +#include +#include +#include "benchmarks/utilities.h" +#include "purify/algorithm_factory.h" +#include "purify/convergence_factory.h" +#include "purify/directories.h" +#include "purify/distribute.h" +#include "purify/logging.h" +#include "purify/measurement_operator_factory.h" +#include "purify/mpi_utilities.h" +#include "purify/operators.h" +#include "purify/utilities.h" +#include "purify/wavelet_operator_factory.h" +#include +#include +#include +#include +#include +#include +#include + +using namespace purify; + +class AlgoFixtureMPI : public ::benchmark::Fixture { + public: + void SetUp(const ::benchmark::State &state) { + // Reading image from file and update related quantities + bool newImage = b_utilities::updateImage(state.range(0), m_image, m_imsizex, m_imsizey); + + // Generating random uv(w) coverage + bool newMeasurements = b_utilities::updateMeasurements(state.range(1), m_uv_data, m_epsilon, + newImage, m_image, m_world); + + bool newKernel = m_kernel != state.range(2); + + m_kernel = state.range(2); + // Create the measurement operator for both distributed algorithms + const t_real FoV = 1; // deg + const t_real cellsize = FoV / m_imsizex * 60. * 60.; + const bool w_term = false; + if (state.range(4) == 1) { + PURIFY_INFO("Using distributed image MPI algorithm"); + m_measurements_distribute_image = factory::measurement_operator_factory>( + factory::distributed_measurement_operator::mpi_distribute_image, m_uv_data, + m_image.rows(), m_image.cols(), cellsize, cellsize, 2, kernels::kernel::kb, m_kernel, + m_kernel, w_term); + } + + if (state.range(4) == 2) { + PURIFY_INFO("Using distributed grid MPI algorithm"); + m_measurements_distribute_grid = factory::measurement_operator_factory>( + factory::distributed_measurement_operator::mpi_distribute_grid, m_uv_data, m_image.rows(), + m_image.cols(), cellsize, cellsize, 2, kernels::kernel::kb, m_kernel, m_kernel, w_term); + } + + m_sigma = 0.016820222945913496 * std::sqrt(2); // see test_parameters file + } + + void TearDown(const ::benchmark::State &state) {} + + sopt::mpi::Communicator m_world; + + std::vector> const m_sara{ + std::make_tuple("Dirac", 3u), std::make_tuple("DB1", 3u), std::make_tuple("DB2", 3u), + std::make_tuple("DB3", 3u), std::make_tuple("DB4", 3u), std::make_tuple("DB5", 3u), + std::make_tuple("DB6", 3u), std::make_tuple("DB7", 3u), std::make_tuple("DB8", 3u)}; + + Image m_image; + t_uint m_imsizex; + t_uint m_imsizey; + + utilities::vis_params m_uv_data; + t_real m_epsilon; + t_real m_sigma; + t_uint m_kernel; + + std::shared_ptr> const> m_measurements_distribute_image; + std::shared_ptr> const> m_measurements_distribute_grid; + std::shared_ptr> m_padmm; + std::shared_ptr> m_fb; +}; + +BENCHMARK_DEFINE_F(AlgoFixtureMPI, PadmmDistributeImage)(benchmark::State &state) { + // Create the algorithm - has to be done there to reset the internal state. + // If done in the fixture repeats would start at the solution and converge immediately. + auto const wavelets = factory::wavelet_operator_factory>( + factory::distributed_wavelet_operator::mpi_sara, m_sara, m_imsizey, m_imsizex); + + m_padmm = factory::padmm_factory>( + factory::algo_distribution::mpi_distributed, m_measurements_distribute_image, wavelets, + m_uv_data, m_sigma, m_imsizey, m_imsizex, m_sara.size(), state.range(3) + 1, true, true, + false, 1e-3, 1e-2, 50, 1.0, 1.0); + + // Benchmark the application of the algorithm + while (state.KeepRunning()) { + auto start = std::chrono::high_resolution_clock::now(); + auto result = (*m_padmm)(); + auto end = std::chrono::high_resolution_clock::now(); + std::cout << "Converged? " << result.good << " , niters = " << result.niters << std::endl; + state.SetIterationTime(b_utilities::duration(start, end, m_world)); + } +} + +BENCHMARK_DEFINE_F(AlgoFixtureMPI, PadmmDistributeGrid)(benchmark::State &state) { + // Create the algorithm - has to be done there to reset the internal state. + // If done in the fixture repeats would start at the solution and converge immediately. + auto const wavelets = factory::wavelet_operator_factory>( + factory::distributed_wavelet_operator::mpi_sara, m_sara, m_imsizey, m_imsizex); + + m_padmm = factory::padmm_factory>( + factory::algo_distribution::mpi_distributed, m_measurements_distribute_grid, wavelets, + m_uv_data, m_sigma, m_imsizey, m_imsizex, m_sara.size(), state.range(3) + 1, true, true, + false, 1e-3, 1e-2, 50, 1.0, 1.0); + + // Benchmark the application of the algorithm + while (state.KeepRunning()) { + auto start = std::chrono::high_resolution_clock::now(); + auto result = (*m_padmm)(); + auto end = std::chrono::high_resolution_clock::now(); + std::cout << "Converged? " << result.good << " , niters = " << result.niters << std::endl; + state.SetIterationTime(b_utilities::duration(start, end, m_world)); + } +} + +BENCHMARK_DEFINE_F(AlgoFixtureMPI, FbDistributeImage)(benchmark::State &state) { + // Create the algorithm - has to be done there to reset the internal state. + // If done in the fixture repeats would start at the solution and converge immediately. + auto const wavelets = factory::wavelet_operator_factory>( + factory::distributed_wavelet_operator::mpi_sara, m_sara, m_imsizey, m_imsizex); + + t_real const beta = m_sigma * m_sigma; + t_real const gamma = 0.0001; + + m_fb = factory::fb_factory>( + factory::algo_distribution::mpi_serial, m_measurements_distribute_image, wavelets, m_uv_data, + m_sigma, beta, gamma, m_imsizey, m_imsizex, m_sara.size(), state.range(3) + 1, true, true, + false, 1e-3, 1e-2, 50, 1.0); + + // Benchmark the application of the algorithm + while (state.KeepRunning()) { + auto start = std::chrono::high_resolution_clock::now(); + auto result = (*m_fb)(); + auto end = std::chrono::high_resolution_clock::now(); + std::cout << "Converged? " << result.good << " , niters = " << result.niters << std::endl; + state.SetIterationTime(b_utilities::duration(start, end, m_world)); + } +} + +BENCHMARK_DEFINE_F(AlgoFixtureMPI, FbDistributeGrid)(benchmark::State &state) { + // Create the algorithm - has to be done there to reset the internal state. + // If done in the fixture repeats would start at the solution and converge immediately. + auto const wavelets = factory::wavelet_operator_factory>( + factory::distributed_wavelet_operator::mpi_sara, m_sara, m_imsizey, m_imsizex); + + t_real const beta = m_sigma * m_sigma; + t_real const gamma = 0.0001; + + m_fb = factory::fb_factory>( + factory::algo_distribution::mpi_serial, m_measurements_distribute_grid, wavelets, m_uv_data, + m_sigma, beta, gamma, m_imsizey, m_imsizex, m_sara.size(), state.range(3) + 1, true, true, + false, 1e-3, 1e-2, 50, 1.0); + + // Benchmark the application of the algorithm + while (state.KeepRunning()) { + auto start = std::chrono::high_resolution_clock::now(); + auto result = (*m_fb)(); + auto end = std::chrono::high_resolution_clock::now(); + std::cout << "Converged? " << result.good << " , niters = " << result.niters << std::endl; + state.SetIterationTime(b_utilities::duration(start, end, m_world)); + } +} + +#ifdef PURIFY_ONNXRT +BENCHMARK_DEFINE_F(AlgoFixtureMPI, FbOnnxDistributeImage)(benchmark::State &state) { + // Create the algorithm - has to be done there to reset the internal state. + // If done in the fixture repeats would start at the solution and converge immediately. + + // TODO: Wavelets are constructed but not used in the factory method + auto const wavelets = factory::wavelet_operator_factory>( + factory::distributed_wavelet_operator::serial, m_sara, m_imsizey, m_imsizex); + + t_real const beta = m_sigma * m_sigma; + t_real const gamma = 0.0001; + + std::string tf_model_path = purify::models_directory() + "/snr_15_model_dynamic.onnx"; + + m_fb = factory::fb_factory>( + factory::algo_distribution::mpi_serial, m_measurements_distribute_image, wavelets, m_uv_data, + m_sigma, beta, gamma, m_imsizey, m_imsizex, m_sara.size(), state.range(3) + 1, true, true, + false, 1e-3, 1e-2, 50, 1.0, tf_model_path, factory::g_proximal_type::TFGProximal); + + // Benchmark the application of the algorithm + while (state.KeepRunning()) { + auto start = std::chrono::high_resolution_clock::now(); + auto result = (*m_fb)(); + auto end = std::chrono::high_resolution_clock::now(); + std::cout << "Converged? " << result.good << " , niters = " << result.niters << std::endl; + state.SetIterationTime(b_utilities::duration(start, end, m_world)); + } +} + +BENCHMARK_REGISTER_F(AlgoFixtureMPI, FbOnnxDistributeImage) + //->Apply(b_utilities::Arguments) + ->Args({128, 10000, 4, 10, 1}) + ->Args({1024, static_cast(1e6), 4, 10, 1}) + ->Args({1024, static_cast(1e7), 4, 10, 1}) + ->Args({1024, static_cast(1e8), 4, 10, 1}) + ->Args({1024, static_cast(1e9), 4, 10, 1}) + ->UseManualTime() + ->MinTime(120.0) + ->MinWarmUpTime(10.0) + ->Repetitions(3) //->ReportAggregatesOnly(true) + ->Unit(benchmark::kMillisecond); + +#endif + +BENCHMARK_REGISTER_F(AlgoFixtureMPI, FbDistributeImage) + //->Apply(b_utilities::Arguments) + ->Args({128, 10000, 4, 10, 1}) + ->Args({1024, static_cast(1e6), 4, 10, 1}) + ->Args({1024, static_cast(1e7), 4, 10, 1}) + ->Args({1024, static_cast(1e8), 4, 10, 1}) + ->Args({1024, static_cast(1e9), 4, 10, 1}) + ->UseManualTime() + ->MinTime(120.0) + ->MinWarmUpTime(10.0) + ->Repetitions(3) //->ReportAggregatesOnly(true) + ->Unit(benchmark::kMillisecond); + +BENCHMARK_REGISTER_F(AlgoFixtureMPI, FbDistributeGrid) + //->Apply(b_utilities::Arguments) + ->Args({128, 10000, 4, 10, 2}) + ->Args({1024, static_cast(1e6), 4, 10, 2}) + ->Args({1024, static_cast(1e7), 4, 10, 2}) + ->Args({1024, static_cast(1e8), 4, 10, 1}) + ->Args({1024, static_cast(1e9), 4, 10, 1}) + ->UseManualTime() + ->MinTime(120.0) + ->MinWarmUpTime(10.0) + ->Repetitions(3) //->ReportAggregatesOnly(true) + ->Unit(benchmark::kMillisecond); + +BENCHMARK_REGISTER_F(AlgoFixtureMPI, PadmmDistributeImage) + //->Apply(b_utilities::Arguments) + ->Args({128, 10000, 4, 10, 1}) + ->Args({1024, static_cast(1e6), 4, 10, 1}) + ->Args({1024, static_cast(1e7), 4, 10, 1}) + ->Args({1024, static_cast(1e8), 4, 10, 1}) + ->Args({1024, static_cast(1e9), 4, 10, 1}) + ->UseManualTime() + ->MinTime(120.0) + ->MinWarmUpTime(10.0) + ->Repetitions(3) //->ReportAggregatesOnly(true) + ->Unit(benchmark::kMillisecond); + +BENCHMARK_REGISTER_F(AlgoFixtureMPI, PadmmDistributeGrid) + //->Apply(b_utilities::Arguments) + ->Args({128, 10000, 4, 10, 2}) + ->Args({1024, static_cast(1e6), 4, 10, 2}) + ->Args({1024, static_cast(1e7), 4, 10, 2}) + ->Args({1024, static_cast(1e8), 4, 10, 1}) + ->Args({1024, static_cast(1e9), 4, 10, 1}) + ->UseManualTime() + ->MinTime(120.0) + ->MinWarmUpTime(10.0) + ->Repetitions(3) //->ReportAggregatesOnly(true) + ->Unit(benchmark::kMillisecond); diff --git a/cpp/benchmarks/main.cc b/cpp/benchmarks/main.cc index 39717d3a8..ab9a95437 100644 --- a/cpp/benchmarks/main.cc +++ b/cpp/benchmarks/main.cc @@ -1,5 +1,7 @@ #include "purify/config.h" #include +#include "purify/logging.h" +#include #include #include @@ -16,9 +18,13 @@ class NullReporter : public ::benchmark::BenchmarkReporter { // The main is rewritten to allow for MPI initializing and for selecting a // reporter according to the process rank int main(int argc, char const **argv) { + sopt::logging::set_level("info"); + purify::logging::set_level("debug"); + #ifdef PURIFY_MPI auto const session = sopt::mpi::init(argc, argv); auto const world = sopt::mpi::Communicator::World(); + PURIFY_LOW_LOG("MPI initialized"); #endif ::benchmark::Initialize(&argc, const_cast(argv)); diff --git a/cpp/benchmarks/measurement_operator_mpi.cc b/cpp/benchmarks/measurement_operator_mpi.cc index 659e7c3a3..585556102 100644 --- a/cpp/benchmarks/measurement_operator_mpi.cc +++ b/cpp/benchmarks/measurement_operator_mpi.cc @@ -17,17 +17,14 @@ class DegridOperatorCtorFixturePar : public ::benchmark::Fixture { public: void SetUp(const ::benchmark::State &state) { // Keep count of the benchmark repetitions + m_counter++; m_imsizex = state.range(0); m_imsizey = state.range(0); // Generating random uv(w) coverage - bool newMeasurements = m_uv_data.size() != state.range(1); - if (newMeasurements) { - t_real const sigma_m = constant::pi / 3; - m_uv_data = utilities::random_sample_density(state.range(1), 0, sigma_m); - } + bool newMeasurements = b_utilities::updateMeasurements(state.range(1), m_uv_data, m_world); // Data needed for the creation of the measurement operator const t_real FoV = 1; // deg @@ -93,18 +90,12 @@ BENCHMARK_DEFINE_F(DegridOperatorCtorFixturePar, MPI)(benchmark::State &state) { class DegridOperatorFixturePar : public ::benchmark::Fixture { public: void SetUp(const ::benchmark::State &state) { - // Keep count of the benchmark repetitions - m_counter++; - // Reading image from file and create temporary image bool newImage = updateImage(state.range(0)); // Generating random uv(w) coverage - bool newMeasurements = m_uv_data.size() != state.range(1); - if (newMeasurements) { - t_real const sigma_m = constant::pi / 3; - m_uv_data = utilities::random_sample_density(state.range(1), 0, sigma_m); - } + // bool newMeasurements = m_uv_data.size() != state.range(1); + bool newMeasurements = b_utilities::updateMeasurements(state.range(1), m_uv_data, m_world); // Create measurement operators bool newKernel = m_kernel != state.range(2); @@ -123,7 +114,6 @@ class DegridOperatorFixturePar : public ::benchmark::Fixture { virtual std::shared_ptr> const> measurementOperator( t_real cellsize, bool w_term) = 0; - t_uint m_counter; sopt::mpi::Communicator m_world; t_uint m_kernel; @@ -197,9 +187,6 @@ class DegridOperatorAdjointFixtureMPI : public DegridOperatorAdjointFixturePar { BENCHMARK_DEFINE_F(DegridOperatorDirectFixtureDistr, Apply)(benchmark::State &state) { // Benchmark the application of the distributed operator - if ((m_counter % 10) == 1) { - m_uv_data.vis = (*m_degridOperator) * Image::Map(m_image.data(), m_image.size(), 1); - } while (state.KeepRunning()) { auto start = std::chrono::high_resolution_clock::now(); m_uv_data.vis = (*m_degridOperator) * Image::Map(m_image.data(), m_image.size(), 1); @@ -213,9 +200,6 @@ BENCHMARK_DEFINE_F(DegridOperatorDirectFixtureDistr, Apply)(benchmark::State &st BENCHMARK_DEFINE_F(DegridOperatorAdjointFixtureDistr, Apply)(benchmark::State &state) { // Benchmark the application of the adjoint distributed operator - if ((m_counter % 10) == 1) { - m_image = m_degridOperator->adjoint() * m_uv_data.vis; - } while (state.KeepRunning()) { auto start = std::chrono::high_resolution_clock::now(); m_image = m_degridOperator->adjoint() * m_uv_data.vis; @@ -229,9 +213,6 @@ BENCHMARK_DEFINE_F(DegridOperatorAdjointFixtureDistr, Apply)(benchmark::State &s BENCHMARK_DEFINE_F(DegridOperatorDirectFixtureMPI, Apply)(benchmark::State &state) { // Benchmark the application of the distributed MPI operator - if ((m_counter % 10) == 1) { - m_uv_data.vis = (*m_degridOperator) * Image::Map(m_image.data(), m_image.size(), 1); - } while (state.KeepRunning()) { auto start = std::chrono::high_resolution_clock::now(); m_uv_data.vis = (*m_degridOperator) * Image::Map(m_image.data(), m_image.size(), 1); @@ -245,9 +226,6 @@ BENCHMARK_DEFINE_F(DegridOperatorDirectFixtureMPI, Apply)(benchmark::State &stat BENCHMARK_DEFINE_F(DegridOperatorAdjointFixtureMPI, Apply)(benchmark::State &state) { // Benchmark the application of the adjoint distributed MPI operator - if ((m_counter % 10) == 1) { - m_image = m_degridOperator->adjoint() * m_uv_data.vis; - } while (state.KeepRunning()) { auto start = std::chrono::high_resolution_clock::now(); m_image = m_degridOperator->adjoint() * m_uv_data.vis; @@ -282,51 +260,43 @@ BENCHMARK_REGISTER_F(DegridOperatorCtorFixturePar, MPI) BENCHMARK_REGISTER_F(DegridOperatorDirectFixtureDistr, Apply) //->Apply(b_utilities::Arguments) ->Args({1024, static_cast(1e6), 4}) - ->Args({1024, static_cast(5e6), 4}) ->Args({1024, static_cast(1e7), 4}) - ->Args({1024, static_cast(5e7), 4}) - ->Args({1024, static_cast(1e8), 4}) - ->Args({1024, static_cast(5e8), 4}) ->UseManualTime() - ->Repetitions(10) + ->MinTime(10.0) + ->MinWarmUpTime(5.0) + ->Repetitions(3) //->ReportAggregatesOnly(true) ->Unit(benchmark::kMillisecond); BENCHMARK_REGISTER_F(DegridOperatorAdjointFixtureDistr, Apply) //->Apply(b_utilities::Arguments) ->Args({1024, static_cast(1e6), 4}) - ->Args({1024, static_cast(5e6), 4}) ->Args({1024, static_cast(1e7), 4}) - ->Args({1024, static_cast(5e7), 4}) - ->Args({1024, static_cast(1e8), 4}) - ->Args({1024, static_cast(5e8), 4}) ->UseManualTime() - ->Repetitions(10) + ->MinTime(10.0) + ->MinWarmUpTime(5.0) + ->Repetitions(3) //->ReportAggregatesOnly(true) ->Unit(benchmark::kMillisecond); BENCHMARK_REGISTER_F(DegridOperatorDirectFixtureMPI, Apply) //->Apply(b_utilities::Arguments) ->Args({1024, static_cast(1e6), 4}) - ->Args({1024, static_cast(5e6), 4}) ->Args({1024, static_cast(1e7), 4}) - ->Args({1024, static_cast(5e7), 4}) - ->Args({1024, static_cast(1e8), 4}) - ->Args({1024, static_cast(5e8), 4}) ->UseManualTime() - ->Repetitions(10) + ->MinTime(10.0) + ->MinWarmUpTime(5.0) + ->Repetitions(3) //->ReportAggregatesOnly(true) ->Unit(benchmark::kMillisecond); BENCHMARK_REGISTER_F(DegridOperatorAdjointFixtureMPI, Apply) //->Apply(b_utilities::Arguments) ->Args({1024, static_cast(1e6), 4}) - ->Args({1024, static_cast(5e6), 4}) ->Args({1024, static_cast(1e7), 4}) - ->Args({1024, static_cast(5e7), 4}) - ->Args({1024, static_cast(1e8), 4}) - ->Args({1024, static_cast(5e8), 4}) ->UseManualTime() - ->Repetitions(10) + ->MinTime(10.0) + ->MinWarmUpTime(5.0) + ->Repetitions(3) //->ReportAggregatesOnly(true) ->Unit(benchmark::kMillisecond); diff --git a/cpp/benchmarks/padmm.cc b/cpp/benchmarks/padmm.cc deleted file mode 100644 index 8b6b875e5..000000000 --- a/cpp/benchmarks/padmm.cc +++ /dev/null @@ -1,104 +0,0 @@ -#include "purify/config.h" -#include "purify/types.h" -#include -#include -#include "benchmarks/utilities.h" -#include "purify/operators.h" -#include "purify/utilities.h" -#include -#include -#include -#include -#include - -using namespace purify; - -class PadmmFixture : public ::benchmark::Fixture { - public: - void SetUp(const ::benchmark::State &state) { - // Reading image from file and update related quantities - bool newImage = b_utilities::updateImage(state.range(0), m_image, m_imsizex, m_imsizey); - - // Generating random uv(w) coverage - bool newMeasurements = m_uv_data.size() != state.range(1); - if (newMeasurements) { - t_real const sigma_m = constant::pi / 3; - m_uv_data = utilities::random_sample_density(state.range(1), 0, sigma_m); - } - - bool newKernel = m_kernel != state.range(2); - if (newImage || newMeasurements || newKernel) { - m_kernel = state.range(2); - // creating the measurement operator - const t_real FoV = 1; // deg - const t_real cellsize = FoV / m_imsizex * 60. * 60.; - const bool w_term = false; - m_measurements_transform = measurementoperator::init_degrid_operator_2d>( - m_uv_data, m_imsizey, m_imsizex, cellsize, cellsize, 2, kernels::kernel::kb, m_kernel, - m_kernel, w_term); - m_gamma = (m_measurements_transform->adjoint() * m_uv_data.vis).real().maxCoeff() * 1e-3; - - // create the padmm algorithm - sopt::LinearTransform> Psi = - sopt::linear_transform(m_sara, m_imsizey, m_imsizex); - m_padmm = std::make_shared>(m_uv_data.vis); - m_padmm->itermax(state.range(3) + 1) - .gamma(m_gamma) - .relative_variation(1e-3) - .l2ball_proximal_epsilon(m_epsilon) - .tight_frame(false) - .l1_proximal_tolerance(1e-2) - .l1_proximal_nu(1) - .l1_proximal_itermax(2) - .l1_proximal_positivity_constraint(true) - .l1_proximal_real_constraint(true) - .residual_convergence(m_epsilon * 1.001) - .lagrange_update_scale(0.9) - .nu(1e0) - .Psi(Psi) - .Phi(*m_measurements_transform); - } - } - - void TearDown(const ::benchmark::State &state) {} - - t_uint m_counter; - const sopt::wavelets::SARA m_sara{ - std::make_tuple("Dirac", 3u), std::make_tuple("DB1", 3u), std::make_tuple("DB2", 3u), - std::make_tuple("DB3", 3u), std::make_tuple("DB4", 3u), std::make_tuple("DB5", 3u), - std::make_tuple("DB6", 3u), std::make_tuple("DB7", 3u), std::make_tuple("DB8", 3u)}; - - Image m_image; - t_uint m_imsizex; - t_uint m_imsizey; - - utilities::vis_params m_uv_data; - t_real m_epsilon; - - t_uint m_kernel; - std::shared_ptr> const> m_measurements_transform; - t_real m_gamma; - std::shared_ptr> m_padmm; -}; - -BENCHMARK_DEFINE_F(PadmmFixture, Apply)(benchmark::State &state) { - // Benchmark the application of the algorithm - while (state.KeepRunning()) { - auto start = std::chrono::high_resolution_clock::now(); - (*m_padmm)(); - auto end = std::chrono::high_resolution_clock::now(); - state.SetIterationTime(b_utilities::duration(start, end)); - } - - // state.SetBytesProcessed(int64_t(state.iterations()) * (state.range(1) + m_imsizey * m_imsizex) - // * sizeof(t_complex)); -} - -BENCHMARK_REGISTER_F(PadmmFixture, Apply) - //->Apply(b_utilities::Arguments) - ->Args({128, 10000, 4, 100}) - ->UseManualTime() - ->Repetitions(1) //->ReportAggregatesOnly(true) - ->Unit(benchmark::kMillisecond); - -BENCHMARK_MAIN(); diff --git a/cpp/benchmarks/padmm_mpi.cc b/cpp/benchmarks/padmm_mpi.cc deleted file mode 100644 index f2a7c673f..000000000 --- a/cpp/benchmarks/padmm_mpi.cc +++ /dev/null @@ -1,193 +0,0 @@ -#include "purify/types.h" -#include -#include -#include "benchmarks/utilities.h" -#include "purify/convergence_factory.h" -#include "purify/directories.h" -#include "purify/distribute.h" -#include "purify/mpi_utilities.h" -#include "purify/operators.h" -#include "purify/utilities.h" -#include -#include -#include -#include -#include -#include -#include - -using namespace purify; - -class PadmmFixtureMPI : public ::benchmark::Fixture { - public: - void SetUp(const ::benchmark::State &state) { - // Reading image from file and update related quantities - bool newImage = b_utilities::updateImage(state.range(0), m_image, m_imsizex, m_imsizey); - - // Generating random uv(w) coverage - bool newMeasurements = m_uv_data.size() != state.range(1); - if (newMeasurements) { - t_real const sigma_m = constant::pi / 3; - m_uv_data = utilities::random_sample_density(state.range(1), 0, sigma_m); - } - - bool newKernel = m_kernel != state.range(2); - if (newImage || newMeasurements || newKernel) { - m_kernel = state.range(2); - // creating the measurement operator - const t_real FoV = 1; // deg - const t_real cellsize = FoV / m_imsizex * 60. * 60.; - const bool w_term = false; - // algorithm 1 - if (state.range(4) == 1) - m_measurements1 = measurementoperator::init_degrid_operator_2d_mpi>( - m_world, m_uv_data, m_image.rows(), m_image.cols(), cellsize, cellsize, 2, - kernels::kernel::kb, m_kernel, m_kernel, w_term); - // algorithm 3 - if (state.range(4) == 3) - m_measurements3 = measurementoperator::init_degrid_operator_2d>( - m_world, m_uv_data, m_image.rows(), m_image.cols(), cellsize, cellsize, 2, - kernels::kernel::kb, m_kernel, m_kernel, w_term); - } - } - - void TearDown(const ::benchmark::State &state) {} - - sopt::mpi::Communicator m_world; - - const sopt::wavelets::SARA m_sara{ - std::make_tuple("Dirac", 3u), std::make_tuple("DB1", 3u), std::make_tuple("DB2", 3u), - std::make_tuple("DB3", 3u), std::make_tuple("DB4", 3u), std::make_tuple("DB5", 3u), - std::make_tuple("DB6", 3u), std::make_tuple("DB7", 3u), std::make_tuple("DB8", 3u)}; - - Image m_image; - t_uint m_imsizex; - t_uint m_imsizey; - - utilities::vis_params m_uv_data; - t_real m_epsilon; - - t_uint m_kernel; - std::shared_ptr> const> m_measurements1; - std::shared_ptr> const> m_measurements3; -}; - -BENCHMARK_DEFINE_F(PadmmFixtureMPI, ApplyAlgo1)(benchmark::State &state) { - // Create the algorithm - somehow doesn't work if done in the fixture... - sopt::wavelets::SARA saraDistr = sopt::wavelets::distribute_sara(m_sara, m_world); - auto const Psi = - sopt::linear_transform(saraDistr, m_image.rows(), m_image.cols(), m_world); - t_real gamma = - utilities::step_size(m_uv_data.vis, m_measurements1, - std::make_shared> const>(Psi), - saraDistr.size()) * - 1e-3; - gamma = m_world.all_reduce(gamma, MPI_MAX); - - std::shared_ptr> padmm = - std::make_shared>(m_uv_data.vis); - padmm->itermax(state.range(3) + 1) - .gamma(gamma) - .relative_variation(1e-3) - .l2ball_proximal_epsilon(m_epsilon) - // communicator ensuring l1 norm in l1 proximal is global - .l1_proximal_adjoint_space_comm(m_world) - .tight_frame(false) - .l1_proximal_tolerance(1e-2) - .l1_proximal_nu(1) - .l1_proximal_itermax(2) - .l1_proximal_positivity_constraint(true) - .l1_proximal_real_constraint(true) - .residual_tolerance(m_epsilon) - .lagrange_update_scale(0.9) - .nu(1e0) - .Psi(Psi) - .Phi(*m_measurements1); - - std::weak_ptr const padmm_weak(padmm); - padmm->residual_convergence( - factory::l2_convergence_factory(factory::ConvergenceType::mpi_local, padmm_weak)); - padmm->objective_convergence( - factory::l1_convergence_factory(factory::ConvergenceType::mpi_local, padmm_weak)); - // Benchmark the application of the algorithm - while (state.KeepRunning()) { - auto start = std::chrono::high_resolution_clock::now(); - auto result = (*padmm)(); - auto end = std::chrono::high_resolution_clock::now(); - // std::cout << "Converged? " << result.good << " , niters = " << result.niters << std::endl; - state.SetIterationTime(b_utilities::duration(start, end, m_world)); - } -} - -BENCHMARK_DEFINE_F(PadmmFixtureMPI, ApplyAlgo3)(benchmark::State &state) { - // Create the algorithm - somehow doesn't work if done in the fixture... - sopt::wavelets::SARA saraDistr = sopt::wavelets::distribute_sara(m_sara, m_world); - auto const Psi = - sopt::linear_transform(saraDistr, m_image.rows(), m_image.cols(), m_world); - t_real gamma = - utilities::step_size(m_uv_data.vis, m_measurements3, - std::make_shared> const>(Psi), - saraDistr.size()) * - 1e-3; - gamma = m_world.all_reduce(gamma, MPI_MAX); - - std::shared_ptr> padmm = - std::make_shared>(m_uv_data.vis); - padmm->itermax(state.range(3) + 1) - .gamma(gamma) - .relative_variation(1e-3) - .l2ball_proximal_epsilon(m_epsilon) - // communicator ensuring l1 norm in l1 proximal is global - .l1_proximal_adjoint_space_comm(m_world) - .tight_frame(false) - .l1_proximal_tolerance(1e-2) - .l1_proximal_nu(1) - .l1_proximal_itermax(2) - .l1_proximal_positivity_constraint(true) - .l1_proximal_real_constraint(true) - .residual_tolerance(m_epsilon) - .lagrange_update_scale(0.9) - .nu(1e0) - .Psi(Psi) - .Phi(*m_measurements3); - - std::weak_ptr const padmm_weak(padmm); - padmm->residual_convergence( - factory::l2_convergence_factory(factory::ConvergenceType::mpi_local, padmm_weak)); - padmm->objective_convergence( - factory::l1_convergence_factory(factory::ConvergenceType::mpi_local, padmm_weak)); - // Benchmark the application of the algorithm - while (state.KeepRunning()) { - auto start = std::chrono::high_resolution_clock::now(); - auto result = (*padmm)(); - auto end = std::chrono::high_resolution_clock::now(); - // std::cout << "Converged? " << result.good << " , niters = " << result.niters << std::endl; - state.SetIterationTime(b_utilities::duration(start, end, m_world)); - } -} - -BENCHMARK_REGISTER_F(PadmmFixtureMPI, ApplyAlgo1) - //->Apply(b_utilities::Arguments) - ->Args({1024, static_cast(1e6), 4, 10, 1}) - ->Args({1024, static_cast(5e6), 4, 10, 1}) - ->Args({1024, static_cast(1e7), 4, 10, 1}) - ->Args({1024, static_cast(5e7), 4, 10, 1}) - ->Args({1024, static_cast(1e8), 4, 10, 1}) - ->Args({1024, static_cast(5e8), 4, 10, 1}) - //->Args({128, 1000, 4}) - ->UseManualTime() - ->Repetitions(3) //->ReportAggregatesOnly(true) - ->Unit(benchmark::kMillisecond); - -BENCHMARK_REGISTER_F(PadmmFixtureMPI, ApplyAlgo3) - //->Apply(b_utilities::Arguments) - ->Args({1024, static_cast(1e6), 4, 10, 3}) - ->Args({1024, static_cast(5e6), 4, 10, 3}) - ->Args({1024, static_cast(1e7), 4, 10, 3}) - ->Args({1024, static_cast(5e7), 4, 10, 3}) - ->Args({1024, static_cast(1e8), 4, 10, 3}) - ->Args({1024, static_cast(5e8), 4, 10, 3}) - //->Args({128, 1000, 4}) - ->UseManualTime() - ->Repetitions(3) //->ReportAggregatesOnly(true) - ->Unit(benchmark::kMillisecond); diff --git a/cpp/benchmarks/utilities.cc b/cpp/benchmarks/utilities.cc index 2079a24aa..e43c8898d 100644 --- a/cpp/benchmarks/utilities.cc +++ b/cpp/benchmarks/utilities.cc @@ -3,6 +3,7 @@ #include #include "purify/directories.h" #include "purify/distribute.h" +#include "purify/logging.h" #include "purify/mpi_utilities.h" #include "purify/operators.h" #include "purify/pfitsio.h" @@ -101,9 +102,11 @@ utilities::vis_params random_measurements(t_int size, const t_real max_w, const utilities::vis_params uv_data; if (vis_file_str.good()) { + PURIFY_INFO("Reading random visibilities from file {}", vis_file); uv_data = utilities::read_visibility(vis_file, true); uv_data.units = utilities::vis_units::radians; } else { + PURIFY_INFO("Generating random visibilities and writing to {}", vis_file); t_real const sigma_m = constant::pi / 3; uv_data = utilities::random_sample_density(size, 0, sigma_m, max_w); uv_data.units = utilities::vis_units::radians; diff --git a/cpp/purify/algorithm_factory.h b/cpp/purify/algorithm_factory.h index 9ce248e3a..b5730b225 100644 --- a/cpp/purify/algorithm_factory.h +++ b/cpp/purify/algorithm_factory.h @@ -69,6 +69,7 @@ padmm_factory(const algo_distribution dist, throw std::runtime_error( "l1 proximal not consistent: You say you are using a tight frame, but you have more than " "one wavelet basis."); + PURIFY_INFO("Constructing PADMM algorithm"); auto epsilon = std::sqrt(2 * uv_data.size() + 2 * std::sqrt(4 * uv_data.size())) * sigma; auto padmm = std::make_shared(uv_data.vis); padmm->itermax(max_iterations) @@ -172,6 +173,7 @@ fb_factory(const algo_distribution dist, throw std::runtime_error( "l1 proximal not consistent: You say you are using a tight frame, but you have more than " "one wavelet basis."); + PURIFY_INFO("Constructing Forward Backward algorithm"); auto fb = std::make_shared(uv_data.vis); fb->itermax(max_iterations) .regulariser_strength(reg_parameter) @@ -265,6 +267,7 @@ primaldual_factory( const t_real relative_variation = 1e-3, const t_real residual_tolerance_scaling = 1, const t_real op_norm = 1) { typedef typename Algorithm::Scalar t_scalar; + PURIFY_INFO("Constructing Primal Dual algorithm"); auto epsilon = std::sqrt(2 * uv_data.size() + 2 * std::sqrt(4 * uv_data.size())) * sigma; auto primaldual = std::make_shared(uv_data.vis); primaldual->itermax(max_iterations)