From ef81d97b467928c37c6698f4fca6718d6fc57b5d Mon Sep 17 00:00:00 2001 From: Tuomas Koskela Date: Thu, 24 Oct 2024 12:12:22 +0100 Subject: [PATCH 01/30] Distribute visibilities to get strong scaling benchmarks --- cpp/benchmarks/measurement_operator_mpi.cc | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) diff --git a/cpp/benchmarks/measurement_operator_mpi.cc b/cpp/benchmarks/measurement_operator_mpi.cc index c04c7992e..1cc9cb875 100644 --- a/cpp/benchmarks/measurement_operator_mpi.cc +++ b/cpp/benchmarks/measurement_operator_mpi.cc @@ -18,17 +18,14 @@ class DegridOperatorCtorFixturePar : public ::benchmark::Fixture { public: void SetUp(const ::benchmark::State &state) { // Keep count of the benchmark repetitions + m_counter++; m_imsizex = state.range(0); m_imsizey = state.range(0); // Generating random uv(w) coverage - bool newMeasurements = m_uv_data.size() != state.range(1); - if (newMeasurements) { - t_real const sigma_m = constant::pi / 3; - m_uv_data = utilities::random_sample_density(state.range(1), 0, sigma_m); - } + bool newMeasurements = b_utilities::updateMeasurements(state.range(1), m_uv_data, m_world); // Data needed for the creation of the measurement operator const t_real FoV = 1; // deg @@ -101,11 +98,8 @@ class DegridOperatorFixturePar : public ::benchmark::Fixture { bool newImage = updateImage(state.range(0)); // Generating random uv(w) coverage - bool newMeasurements = m_uv_data.size() != state.range(1); - if (newMeasurements) { - t_real const sigma_m = constant::pi / 3; - m_uv_data = utilities::random_sample_density(state.range(1), 0, sigma_m); - } + // bool newMeasurements = m_uv_data.size() != state.range(1); + bool newMeasurements = b_utilities::updateMeasurements(state.range(1), m_uv_data, m_world); // Create measurement operators bool newKernel = m_kernel != state.range(2); From 4a2984d836dc79ef6011c3f4551b28b659272e6c Mon Sep 17 00:00:00 2001 From: Tuomas Koskela Date: Thu, 24 Oct 2024 13:56:03 +0100 Subject: [PATCH 02/30] Add warmup time to benchmarks --- cpp/benchmarks/measurement_operator_mpi.cc | 48 ++++++---------------- 1 file changed, 12 insertions(+), 36 deletions(-) diff --git a/cpp/benchmarks/measurement_operator_mpi.cc b/cpp/benchmarks/measurement_operator_mpi.cc index 1cc9cb875..03f3b5df1 100644 --- a/cpp/benchmarks/measurement_operator_mpi.cc +++ b/cpp/benchmarks/measurement_operator_mpi.cc @@ -91,9 +91,6 @@ BENCHMARK_DEFINE_F(DegridOperatorCtorFixturePar, MPI)(benchmark::State &state) { class DegridOperatorFixturePar : public ::benchmark::Fixture { public: void SetUp(const ::benchmark::State &state) { - // Keep count of the benchmark repetitions - m_counter++; - // Reading image from file and create temporary image bool newImage = updateImage(state.range(0)); @@ -118,7 +115,6 @@ class DegridOperatorFixturePar : public ::benchmark::Fixture { virtual std::shared_ptr> const> measurementOperator( t_real cellsize, bool w_term) = 0; - t_uint m_counter; sopt::mpi::Communicator m_world; t_uint m_kernel; @@ -192,9 +188,6 @@ class DegridOperatorAdjointFixtureMPI : public DegridOperatorAdjointFixturePar { BENCHMARK_DEFINE_F(DegridOperatorDirectFixtureDistr, Apply)(benchmark::State &state) { // Benchmark the application of the distributed operator - if ((m_counter % 10) == 1) { - m_uv_data.vis = (*m_degridOperator) * Image::Map(m_image.data(), m_image.size(), 1); - } while (state.KeepRunning()) { auto start = std::chrono::high_resolution_clock::now(); m_uv_data.vis = (*m_degridOperator) * Image::Map(m_image.data(), m_image.size(), 1); @@ -208,9 +201,6 @@ BENCHMARK_DEFINE_F(DegridOperatorDirectFixtureDistr, Apply)(benchmark::State &st BENCHMARK_DEFINE_F(DegridOperatorAdjointFixtureDistr, Apply)(benchmark::State &state) { // Benchmark the application of the adjoint distributed operator - if ((m_counter % 10) == 1) { - m_image = m_degridOperator->adjoint() * m_uv_data.vis; - } while (state.KeepRunning()) { auto start = std::chrono::high_resolution_clock::now(); m_image = m_degridOperator->adjoint() * m_uv_data.vis; @@ -224,9 +214,6 @@ BENCHMARK_DEFINE_F(DegridOperatorAdjointFixtureDistr, Apply)(benchmark::State &s BENCHMARK_DEFINE_F(DegridOperatorDirectFixtureMPI, Apply)(benchmark::State &state) { // Benchmark the application of the distributed MPI operator - if ((m_counter % 10) == 1) { - m_uv_data.vis = (*m_degridOperator) * Image::Map(m_image.data(), m_image.size(), 1); - } while (state.KeepRunning()) { auto start = std::chrono::high_resolution_clock::now(); m_uv_data.vis = (*m_degridOperator) * Image::Map(m_image.data(), m_image.size(), 1); @@ -240,9 +227,6 @@ BENCHMARK_DEFINE_F(DegridOperatorDirectFixtureMPI, Apply)(benchmark::State &stat BENCHMARK_DEFINE_F(DegridOperatorAdjointFixtureMPI, Apply)(benchmark::State &state) { // Benchmark the application of the adjoint distributed MPI operator - if ((m_counter % 10) == 1) { - m_image = m_degridOperator->adjoint() * m_uv_data.vis; - } while (state.KeepRunning()) { auto start = std::chrono::high_resolution_clock::now(); m_image = m_degridOperator->adjoint() * m_uv_data.vis; @@ -277,51 +261,43 @@ BENCHMARK_REGISTER_F(DegridOperatorCtorFixturePar, MPI) BENCHMARK_REGISTER_F(DegridOperatorDirectFixtureDistr, Apply) //->Apply(b_utilities::Arguments) ->Args({1024, static_cast(1e6), 4}) - ->Args({1024, static_cast(5e6), 4}) ->Args({1024, static_cast(1e7), 4}) - ->Args({1024, static_cast(5e7), 4}) - ->Args({1024, static_cast(1e8), 4}) - ->Args({1024, static_cast(5e8), 4}) ->UseManualTime() - ->Repetitions(10) + ->MinTime(10.0) + ->MinWarmUpTime(5.0) + ->Repetitions(3) //->ReportAggregatesOnly(true) ->Unit(benchmark::kMillisecond); BENCHMARK_REGISTER_F(DegridOperatorAdjointFixtureDistr, Apply) //->Apply(b_utilities::Arguments) ->Args({1024, static_cast(1e6), 4}) - ->Args({1024, static_cast(5e6), 4}) ->Args({1024, static_cast(1e7), 4}) - ->Args({1024, static_cast(5e7), 4}) - ->Args({1024, static_cast(1e8), 4}) - ->Args({1024, static_cast(5e8), 4}) ->UseManualTime() - ->Repetitions(10) + ->MinTime(10.0) + ->MinWarmUpTime(5.0) + ->Repetitions(3) //->ReportAggregatesOnly(true) ->Unit(benchmark::kMillisecond); BENCHMARK_REGISTER_F(DegridOperatorDirectFixtureMPI, Apply) //->Apply(b_utilities::Arguments) ->Args({1024, static_cast(1e6), 4}) - ->Args({1024, static_cast(5e6), 4}) ->Args({1024, static_cast(1e7), 4}) - ->Args({1024, static_cast(5e7), 4}) - ->Args({1024, static_cast(1e8), 4}) - ->Args({1024, static_cast(5e8), 4}) ->UseManualTime() - ->Repetitions(10) + ->MinTime(10.0) + ->MinWarmUpTime(5.0) + ->Repetitions(3) //->ReportAggregatesOnly(true) ->Unit(benchmark::kMillisecond); BENCHMARK_REGISTER_F(DegridOperatorAdjointFixtureMPI, Apply) //->Apply(b_utilities::Arguments) ->Args({1024, static_cast(1e6), 4}) - ->Args({1024, static_cast(5e6), 4}) ->Args({1024, static_cast(1e7), 4}) - ->Args({1024, static_cast(5e7), 4}) - ->Args({1024, static_cast(1e8), 4}) - ->Args({1024, static_cast(5e8), 4}) ->UseManualTime() - ->Repetitions(10) + ->MinTime(10.0) + ->MinWarmUpTime(5.0) + ->Repetitions(3) //->ReportAggregatesOnly(true) ->Unit(benchmark::kMillisecond); From ccddb82d05821ff3b670f3e51fdb2fe784e4124a Mon Sep 17 00:00:00 2001 From: Tuomas Koskela Date: Thu, 24 Oct 2024 13:57:01 +0100 Subject: [PATCH 03/30] Enable debug logging --- cpp/benchmarks/main.cc | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/cpp/benchmarks/main.cc b/cpp/benchmarks/main.cc index 39717d3a8..cd7e6888b 100644 --- a/cpp/benchmarks/main.cc +++ b/cpp/benchmarks/main.cc @@ -1,7 +1,9 @@ #include "purify/config.h" +#include "purify/logging.h" #include #include #include +#include // This reporter does nothing. // We can use it to disable output from all but the root process @@ -16,9 +18,14 @@ class NullReporter : public ::benchmark::BenchmarkReporter { // The main is rewritten to allow for MPI initializing and for selecting a // reporter according to the process rank int main(int argc, char const **argv) { + + sopt::logging::set_level("debug"); + purify::logging::set_level("debug"); + #ifdef PURIFY_MPI auto const session = sopt::mpi::init(argc, argv); auto const world = sopt::mpi::Communicator::World(); + PURIFY_LOW_LOG("MPI initialized"); #endif ::benchmark::Initialize(&argc, const_cast(argv)); From 10614a671f3c5456c9d3918b6978c0759b8223ae Mon Sep 17 00:00:00 2001 From: Tuomas Koskela Date: Mon, 28 Oct 2024 17:09:33 +0000 Subject: [PATCH 04/30] Scatter visibilities. Define parameters. --- cpp/benchmarks/padmm_mpi.cc | 36 ++++++++++++++++-------------------- 1 file changed, 16 insertions(+), 20 deletions(-) diff --git a/cpp/benchmarks/padmm_mpi.cc b/cpp/benchmarks/padmm_mpi.cc index f2a7c673f..1bc68d0e3 100644 --- a/cpp/benchmarks/padmm_mpi.cc +++ b/cpp/benchmarks/padmm_mpi.cc @@ -25,7 +25,8 @@ class PadmmFixtureMPI : public ::benchmark::Fixture { bool newImage = b_utilities::updateImage(state.range(0), m_image, m_imsizex, m_imsizey); // Generating random uv(w) coverage - bool newMeasurements = m_uv_data.size() != state.range(1); + bool newMeasurements = b_utilities::updateMeasurements(state.range(1), m_uv_data, m_world); + if (newMeasurements) { t_real const sigma_m = constant::pi / 3; m_uv_data = utilities::random_sample_density(state.range(1), 0, sigma_m); @@ -80,10 +81,10 @@ BENCHMARK_DEFINE_F(PadmmFixtureMPI, ApplyAlgo1)(benchmark::State &state) { t_real gamma = utilities::step_size(m_uv_data.vis, m_measurements1, std::make_shared> const>(Psi), - saraDistr.size()) * - 1e-3; + saraDistr.size()) * 1e-3; gamma = m_world.all_reduce(gamma, MPI_MAX); - + auto sigma = 1.0; + m_epsilon = std::sqrt(2 * m_uv_data.size() + 2 * std::sqrt(4 * m_uv_data.size())) * sigma; std::shared_ptr> padmm = std::make_shared>(m_uv_data.vis); padmm->itermax(state.range(3) + 1) @@ -95,7 +96,7 @@ BENCHMARK_DEFINE_F(PadmmFixtureMPI, ApplyAlgo1)(benchmark::State &state) { .tight_frame(false) .l1_proximal_tolerance(1e-2) .l1_proximal_nu(1) - .l1_proximal_itermax(2) + .l1_proximal_itermax(20) .l1_proximal_positivity_constraint(true) .l1_proximal_real_constraint(true) .residual_tolerance(m_epsilon) @@ -114,7 +115,7 @@ BENCHMARK_DEFINE_F(PadmmFixtureMPI, ApplyAlgo1)(benchmark::State &state) { auto start = std::chrono::high_resolution_clock::now(); auto result = (*padmm)(); auto end = std::chrono::high_resolution_clock::now(); - // std::cout << "Converged? " << result.good << " , niters = " << result.niters << std::endl; + std::cout << "Converged? " << result.good << " , niters = " << result.niters << std::endl; state.SetIterationTime(b_utilities::duration(start, end, m_world)); } } @@ -130,7 +131,8 @@ BENCHMARK_DEFINE_F(PadmmFixtureMPI, ApplyAlgo3)(benchmark::State &state) { saraDistr.size()) * 1e-3; gamma = m_world.all_reduce(gamma, MPI_MAX); - + auto sigma = 1.0; + m_epsilon = std::sqrt(2 * m_uv_data.size() + 2 * std::sqrt(4 * m_uv_data.size())) * sigma; std::shared_ptr> padmm = std::make_shared>(m_uv_data.vis); padmm->itermax(state.range(3) + 1) @@ -142,7 +144,7 @@ BENCHMARK_DEFINE_F(PadmmFixtureMPI, ApplyAlgo3)(benchmark::State &state) { .tight_frame(false) .l1_proximal_tolerance(1e-2) .l1_proximal_nu(1) - .l1_proximal_itermax(2) + .l1_proximal_itermax(10) .l1_proximal_positivity_constraint(true) .l1_proximal_real_constraint(true) .residual_tolerance(m_epsilon) @@ -168,26 +170,20 @@ BENCHMARK_DEFINE_F(PadmmFixtureMPI, ApplyAlgo3)(benchmark::State &state) { BENCHMARK_REGISTER_F(PadmmFixtureMPI, ApplyAlgo1) //->Apply(b_utilities::Arguments) - ->Args({1024, static_cast(1e6), 4, 10, 1}) - ->Args({1024, static_cast(5e6), 4, 10, 1}) - ->Args({1024, static_cast(1e7), 4, 10, 1}) - ->Args({1024, static_cast(5e7), 4, 10, 1}) - ->Args({1024, static_cast(1e8), 4, 10, 1}) - ->Args({1024, static_cast(5e8), 4, 10, 1}) - //->Args({128, 1000, 4}) + ->Args({1024, static_cast(1e6), 4, 100, 1}) + ->Args({1024, static_cast(1e7), 4, 100, 1}) ->UseManualTime() + ->MinTime(10.0) + ->MinWarmUpTime(5.0) ->Repetitions(3) //->ReportAggregatesOnly(true) ->Unit(benchmark::kMillisecond); BENCHMARK_REGISTER_F(PadmmFixtureMPI, ApplyAlgo3) //->Apply(b_utilities::Arguments) ->Args({1024, static_cast(1e6), 4, 10, 3}) - ->Args({1024, static_cast(5e6), 4, 10, 3}) ->Args({1024, static_cast(1e7), 4, 10, 3}) - ->Args({1024, static_cast(5e7), 4, 10, 3}) - ->Args({1024, static_cast(1e8), 4, 10, 3}) - ->Args({1024, static_cast(5e8), 4, 10, 3}) - //->Args({128, 1000, 4}) ->UseManualTime() + ->MinTime(10.0) + ->MinWarmUpTime(5.0) ->Repetitions(3) //->ReportAggregatesOnly(true) ->Unit(benchmark::kMillisecond); From aa05250c78f4ee8577cba2e2cd102dcca06d23ce Mon Sep 17 00:00:00 2001 From: Tuomas Koskela Date: Wed, 30 Oct 2024 16:42:05 +0000 Subject: [PATCH 05/30] Clean up padmm benchmarks --- cpp/benchmarks/padmm.cc | 8 +- cpp/benchmarks/padmm_mpi.cc | 142 ++++++++++++++---------------------- 2 files changed, 62 insertions(+), 88 deletions(-) diff --git a/cpp/benchmarks/padmm.cc b/cpp/benchmarks/padmm.cc index 8b6b875e5..8dbfa32c1 100644 --- a/cpp/benchmarks/padmm.cc +++ b/cpp/benchmarks/padmm.cc @@ -37,6 +37,8 @@ class PadmmFixture : public ::benchmark::Fixture { m_uv_data, m_imsizey, m_imsizex, cellsize, cellsize, 2, kernels::kernel::kb, m_kernel, m_kernel, w_term); m_gamma = (m_measurements_transform->adjoint() * m_uv_data.vis).real().maxCoeff() * 1e-3; + auto sigma = 0.033; // roughly the value used in algo_factory test + m_epsilon = std::sqrt(2 * m_uv_data.size() + 2 * std::sqrt(4 * m_uv_data.size())) * sigma; // create the padmm algorithm sopt::LinearTransform> Psi = @@ -49,7 +51,7 @@ class PadmmFixture : public ::benchmark::Fixture { .tight_frame(false) .l1_proximal_tolerance(1e-2) .l1_proximal_nu(1) - .l1_proximal_itermax(2) + .l1_proximal_itermax(20) .l1_proximal_positivity_constraint(true) .l1_proximal_real_constraint(true) .residual_convergence(m_epsilon * 1.001) @@ -98,7 +100,9 @@ BENCHMARK_REGISTER_F(PadmmFixture, Apply) //->Apply(b_utilities::Arguments) ->Args({128, 10000, 4, 100}) ->UseManualTime() - ->Repetitions(1) //->ReportAggregatesOnly(true) + ->MinTime(10.0) + ->MinWarmUpTime(5.0) + ->Repetitions(3) //->ReportAggregatesOnly(true) ->Unit(benchmark::kMillisecond); BENCHMARK_MAIN(); diff --git a/cpp/benchmarks/padmm_mpi.cc b/cpp/benchmarks/padmm_mpi.cc index 1bc68d0e3..c1976c72c 100644 --- a/cpp/benchmarks/padmm_mpi.cc +++ b/cpp/benchmarks/padmm_mpi.cc @@ -5,6 +5,7 @@ #include "purify/convergence_factory.h" #include "purify/directories.h" #include "purify/distribute.h" +#include "purify/logging.h" #include "purify/mpi_utilities.h" #include "purify/operators.h" #include "purify/utilities.h" @@ -39,16 +40,58 @@ class PadmmFixtureMPI : public ::benchmark::Fixture { const t_real FoV = 1; // deg const t_real cellsize = FoV / m_imsizex * 60. * 60.; const bool w_term = false; + + auto sigma = 0.033; // roughly the value used in algo_factory test + m_epsilon = std::sqrt(2 * m_uv_data.size() + 2 * std::sqrt(4 * m_uv_data.size())) * sigma; + + sopt::wavelets::SARA saraDistr = sopt::wavelets::distribute_sara(m_sara, m_world); + auto const Psi = + sopt::linear_transform(saraDistr, m_image.rows(), m_image.cols(), m_world); + // algorithm 1 - if (state.range(4) == 1) - m_measurements1 = measurementoperator::init_degrid_operator_2d_mpi>( + if (state.range(4) == 1) { + m_measurements = measurementoperator::init_degrid_operator_2d_mpi>( m_world, m_uv_data, m_image.rows(), m_image.cols(), cellsize, cellsize, 2, kernels::kernel::kb, m_kernel, m_kernel, w_term); - // algorithm 3 - if (state.range(4) == 3) - m_measurements3 = measurementoperator::init_degrid_operator_2d>( + } else if (state.range(4) == 3) { + m_measurements = measurementoperator::init_degrid_operator_2d>( m_world, m_uv_data, m_image.rows(), m_image.cols(), cellsize, cellsize, 2, kernels::kernel::kb, m_kernel, m_kernel, w_term); + } else { + PURIFY_ERROR("Unknown MPI algorithm", state.range(4)); + } + + // Create the algorithm + t_real gamma = utilities::step_size( + m_uv_data.vis, m_measurements, + std::make_shared> const>(Psi), + saraDistr.size()) * + 1e-3; + gamma = m_world.all_reduce(gamma, MPI_MAX); + m_padmm = std::make_shared>(m_uv_data.vis); + m_padmm->itermax(state.range(3) + 1) + .gamma(gamma) + .relative_variation(1e-3) + .l2ball_proximal_epsilon(m_epsilon) + // communicator ensuring l1 norm in l1 proximal is global + .l1_proximal_adjoint_space_comm(m_world) + .tight_frame(false) + .l1_proximal_tolerance(1e-2) + .l1_proximal_nu(1) + .l1_proximal_itermax(20) + .l1_proximal_positivity_constraint(true) + .l1_proximal_real_constraint(true) + .residual_tolerance(m_epsilon) + .lagrange_update_scale(0.9) + .nu(1e0) + .Psi(Psi) + .Phi(*m_measurements); + + std::weak_ptr const padmm_weak(m_padmm); + m_padmm->residual_convergence(factory::l2_convergence_factory( + factory::ConvergenceType::mpi_local, padmm_weak)); + m_padmm->objective_convergence(factory::l1_convergence_factory( + factory::ConvergenceType::mpi_local, padmm_weak)); } } @@ -67,53 +110,17 @@ class PadmmFixtureMPI : public ::benchmark::Fixture { utilities::vis_params m_uv_data; t_real m_epsilon; - t_uint m_kernel; - std::shared_ptr> const> m_measurements1; - std::shared_ptr> const> m_measurements3; + + std::shared_ptr> const> m_measurements; + std::shared_ptr> m_padmm; }; BENCHMARK_DEFINE_F(PadmmFixtureMPI, ApplyAlgo1)(benchmark::State &state) { - // Create the algorithm - somehow doesn't work if done in the fixture... - sopt::wavelets::SARA saraDistr = sopt::wavelets::distribute_sara(m_sara, m_world); - auto const Psi = - sopt::linear_transform(saraDistr, m_image.rows(), m_image.cols(), m_world); - t_real gamma = - utilities::step_size(m_uv_data.vis, m_measurements1, - std::make_shared> const>(Psi), - saraDistr.size()) * 1e-3; - gamma = m_world.all_reduce(gamma, MPI_MAX); - auto sigma = 1.0; - m_epsilon = std::sqrt(2 * m_uv_data.size() + 2 * std::sqrt(4 * m_uv_data.size())) * sigma; - std::shared_ptr> padmm = - std::make_shared>(m_uv_data.vis); - padmm->itermax(state.range(3) + 1) - .gamma(gamma) - .relative_variation(1e-3) - .l2ball_proximal_epsilon(m_epsilon) - // communicator ensuring l1 norm in l1 proximal is global - .l1_proximal_adjoint_space_comm(m_world) - .tight_frame(false) - .l1_proximal_tolerance(1e-2) - .l1_proximal_nu(1) - .l1_proximal_itermax(20) - .l1_proximal_positivity_constraint(true) - .l1_proximal_real_constraint(true) - .residual_tolerance(m_epsilon) - .lagrange_update_scale(0.9) - .nu(1e0) - .Psi(Psi) - .Phi(*m_measurements1); - - std::weak_ptr const padmm_weak(padmm); - padmm->residual_convergence( - factory::l2_convergence_factory(factory::ConvergenceType::mpi_local, padmm_weak)); - padmm->objective_convergence( - factory::l1_convergence_factory(factory::ConvergenceType::mpi_local, padmm_weak)); // Benchmark the application of the algorithm while (state.KeepRunning()) { auto start = std::chrono::high_resolution_clock::now(); - auto result = (*padmm)(); + auto result = (*m_padmm)(); auto end = std::chrono::high_resolution_clock::now(); std::cout << "Converged? " << result.good << " , niters = " << result.niters << std::endl; state.SetIterationTime(b_utilities::duration(start, end, m_world)); @@ -121,57 +128,20 @@ BENCHMARK_DEFINE_F(PadmmFixtureMPI, ApplyAlgo1)(benchmark::State &state) { } BENCHMARK_DEFINE_F(PadmmFixtureMPI, ApplyAlgo3)(benchmark::State &state) { - // Create the algorithm - somehow doesn't work if done in the fixture... - sopt::wavelets::SARA saraDistr = sopt::wavelets::distribute_sara(m_sara, m_world); - auto const Psi = - sopt::linear_transform(saraDistr, m_image.rows(), m_image.cols(), m_world); - t_real gamma = - utilities::step_size(m_uv_data.vis, m_measurements3, - std::make_shared> const>(Psi), - saraDistr.size()) * - 1e-3; - gamma = m_world.all_reduce(gamma, MPI_MAX); - auto sigma = 1.0; - m_epsilon = std::sqrt(2 * m_uv_data.size() + 2 * std::sqrt(4 * m_uv_data.size())) * sigma; - std::shared_ptr> padmm = - std::make_shared>(m_uv_data.vis); - padmm->itermax(state.range(3) + 1) - .gamma(gamma) - .relative_variation(1e-3) - .l2ball_proximal_epsilon(m_epsilon) - // communicator ensuring l1 norm in l1 proximal is global - .l1_proximal_adjoint_space_comm(m_world) - .tight_frame(false) - .l1_proximal_tolerance(1e-2) - .l1_proximal_nu(1) - .l1_proximal_itermax(10) - .l1_proximal_positivity_constraint(true) - .l1_proximal_real_constraint(true) - .residual_tolerance(m_epsilon) - .lagrange_update_scale(0.9) - .nu(1e0) - .Psi(Psi) - .Phi(*m_measurements3); - - std::weak_ptr const padmm_weak(padmm); - padmm->residual_convergence( - factory::l2_convergence_factory(factory::ConvergenceType::mpi_local, padmm_weak)); - padmm->objective_convergence( - factory::l1_convergence_factory(factory::ConvergenceType::mpi_local, padmm_weak)); // Benchmark the application of the algorithm while (state.KeepRunning()) { auto start = std::chrono::high_resolution_clock::now(); - auto result = (*padmm)(); + auto result = (*m_padmm)(); auto end = std::chrono::high_resolution_clock::now(); - // std::cout << "Converged? " << result.good << " , niters = " << result.niters << std::endl; + std::cout << "Converged? " << result.good << " , niters = " << result.niters << std::endl; state.SetIterationTime(b_utilities::duration(start, end, m_world)); } } BENCHMARK_REGISTER_F(PadmmFixtureMPI, ApplyAlgo1) //->Apply(b_utilities::Arguments) - ->Args({1024, static_cast(1e6), 4, 100, 1}) - ->Args({1024, static_cast(1e7), 4, 100, 1}) + ->Args({1024, static_cast(1e6), 4, 10, 1}) + ->Args({1024, static_cast(1e7), 4, 10, 1}) ->UseManualTime() ->MinTime(10.0) ->MinWarmUpTime(5.0) From b59fa42123e3c2621414fbecdf57325045d9078c Mon Sep 17 00:00:00 2001 From: Tuomas Koskela Date: Fri, 1 Nov 2024 14:59:39 +0000 Subject: [PATCH 06/30] Use factories in padmm benchmarks --- cpp/benchmarks/padmm.cc | 38 +++--- cpp/benchmarks/padmm_mpi.cc | 223 ++++++++++++++++++++---------------- 2 files changed, 140 insertions(+), 121 deletions(-) diff --git a/cpp/benchmarks/padmm.cc b/cpp/benchmarks/padmm.cc index 8b6b875e5..a8b49516e 100644 --- a/cpp/benchmarks/padmm.cc +++ b/cpp/benchmarks/padmm.cc @@ -3,8 +3,10 @@ #include #include #include "benchmarks/utilities.h" +#include "purify/algorithm_factory.h" #include "purify/operators.h" #include "purify/utilities.h" +#include "purify/wavelet_operator_factory.h" #include #include #include @@ -36,34 +38,26 @@ class PadmmFixture : public ::benchmark::Fixture { m_measurements_transform = measurementoperator::init_degrid_operator_2d>( m_uv_data, m_imsizey, m_imsizex, cellsize, cellsize, 2, kernels::kernel::kb, m_kernel, m_kernel, w_term); - m_gamma = (m_measurements_transform->adjoint() * m_uv_data.vis).real().maxCoeff() * 1e-3; - - // create the padmm algorithm - sopt::LinearTransform> Psi = - sopt::linear_transform(m_sara, m_imsizey, m_imsizex); - m_padmm = std::make_shared>(m_uv_data.vis); - m_padmm->itermax(state.range(3) + 1) - .gamma(m_gamma) - .relative_variation(1e-3) - .l2ball_proximal_epsilon(m_epsilon) - .tight_frame(false) - .l1_proximal_tolerance(1e-2) - .l1_proximal_nu(1) - .l1_proximal_itermax(2) - .l1_proximal_positivity_constraint(true) - .l1_proximal_real_constraint(true) - .residual_convergence(m_epsilon * 1.001) - .lagrange_update_scale(0.9) - .nu(1e0) - .Psi(Psi) - .Phi(*m_measurements_transform); + + const t_uint imsizex = m_imsizex; + const t_uint imsizey = m_imsizey; + + auto const wavelets = factory::wavelet_operator_factory>( + factory::distributed_wavelet_operator::serial, m_sara, m_imsizey, m_imsizex); + + t_real const sigma = 0.016820222945913496 * std::sqrt(2); // see test_parameters file + + m_padmm = factory::padmm_factory>( + factory::algo_distribution::serial, m_measurements_transform, wavelets, m_uv_data, sigma, + m_imsizey, m_imsizex, m_sara.size(), state.range(3) + 1, true, true, false, 1e-3, 1e-2, + 50, 1.0, 1.0); } } void TearDown(const ::benchmark::State &state) {} t_uint m_counter; - const sopt::wavelets::SARA m_sara{ + std::vector> const m_sara{ std::make_tuple("Dirac", 3u), std::make_tuple("DB1", 3u), std::make_tuple("DB2", 3u), std::make_tuple("DB3", 3u), std::make_tuple("DB4", 3u), std::make_tuple("DB5", 3u), std::make_tuple("DB6", 3u), std::make_tuple("DB7", 3u), std::make_tuple("DB8", 3u)}; diff --git a/cpp/benchmarks/padmm_mpi.cc b/cpp/benchmarks/padmm_mpi.cc index 1bc68d0e3..514cb45b3 100644 --- a/cpp/benchmarks/padmm_mpi.cc +++ b/cpp/benchmarks/padmm_mpi.cc @@ -2,12 +2,14 @@ #include #include #include "benchmarks/utilities.h" +#include "purify/algorithm_factory.h" #include "purify/convergence_factory.h" #include "purify/directories.h" #include "purify/distribute.h" #include "purify/mpi_utilities.h" #include "purify/operators.h" #include "purify/utilities.h" +#include "purify/wavelet_operator_factory.h" #include #include #include @@ -25,38 +27,38 @@ class PadmmFixtureMPI : public ::benchmark::Fixture { bool newImage = b_utilities::updateImage(state.range(0), m_image, m_imsizex, m_imsizey); // Generating random uv(w) coverage - bool newMeasurements = b_utilities::updateMeasurements(state.range(1), m_uv_data, m_world); - - if (newMeasurements) { - t_real const sigma_m = constant::pi / 3; - m_uv_data = utilities::random_sample_density(state.range(1), 0, sigma_m); - } + bool newMeasurements = b_utilities::updateMeasurements(state.range(1), m_uv_data, m_epsilon, + newImage, m_image, m_world); bool newKernel = m_kernel != state.range(2); - if (newImage || newMeasurements || newKernel) { - m_kernel = state.range(2); - // creating the measurement operator - const t_real FoV = 1; // deg - const t_real cellsize = FoV / m_imsizex * 60. * 60.; - const bool w_term = false; - // algorithm 1 - if (state.range(4) == 1) - m_measurements1 = measurementoperator::init_degrid_operator_2d_mpi>( - m_world, m_uv_data, m_image.rows(), m_image.cols(), cellsize, cellsize, 2, - kernels::kernel::kb, m_kernel, m_kernel, w_term); - // algorithm 3 - if (state.range(4) == 3) - m_measurements3 = measurementoperator::init_degrid_operator_2d>( - m_world, m_uv_data, m_image.rows(), m_image.cols(), cellsize, cellsize, 2, - kernels::kernel::kb, m_kernel, m_kernel, w_term); - } + + m_kernel = state.range(2); + // Create the measurement operator for both distributed algorithms + const t_real FoV = 1; // deg + const t_real cellsize = FoV / m_imsizex * 60. * 60.; + const bool w_term = false; + // algorithm 1 + if (state.range(4) == 1) + m_measurements1 = measurementoperator::init_degrid_operator_2d_mpi>( + m_world, m_uv_data, m_image.rows(), m_image.cols(), cellsize, cellsize, 2, + kernels::kernel::kb, m_kernel, m_kernel, w_term); + // algorithm 3 + if (state.range(4) == 3) + m_measurements3 = measurementoperator::init_degrid_operator_2d>( + m_world, m_uv_data, m_image.rows(), m_image.cols(), cellsize, cellsize, 2, + kernels::kernel::kb, m_kernel, m_kernel, w_term); } void TearDown(const ::benchmark::State &state) {} sopt::mpi::Communicator m_world; - const sopt::wavelets::SARA m_sara{ + // const sopt::wavelets::SARA m_sara{ + // std::make_tuple("Dirac", 3u), std::make_tuple("DB1", 3u), std::make_tuple("DB2", 3u), + // std::make_tuple("DB3", 3u), std::make_tuple("DB4", 3u), std::make_tuple("DB5", 3u), + // std::make_tuple("DB6", 3u), std::make_tuple("DB7", 3u), std::make_tuple("DB8", 3u)}; + + std::vector> const m_sara{ std::make_tuple("Dirac", 3u), std::make_tuple("DB1", 3u), std::make_tuple("DB2", 3u), std::make_tuple("DB3", 3u), std::make_tuple("DB4", 3u), std::make_tuple("DB5", 3u), std::make_tuple("DB6", 3u), std::make_tuple("DB7", 3u), std::make_tuple("DB8", 3u)}; @@ -74,42 +76,54 @@ class PadmmFixtureMPI : public ::benchmark::Fixture { }; BENCHMARK_DEFINE_F(PadmmFixtureMPI, ApplyAlgo1)(benchmark::State &state) { - // Create the algorithm - somehow doesn't work if done in the fixture... - sopt::wavelets::SARA saraDistr = sopt::wavelets::distribute_sara(m_sara, m_world); - auto const Psi = - sopt::linear_transform(saraDistr, m_image.rows(), m_image.cols(), m_world); - t_real gamma = - utilities::step_size(m_uv_data.vis, m_measurements1, - std::make_shared> const>(Psi), - saraDistr.size()) * 1e-3; - gamma = m_world.all_reduce(gamma, MPI_MAX); - auto sigma = 1.0; - m_epsilon = std::sqrt(2 * m_uv_data.size() + 2 * std::sqrt(4 * m_uv_data.size())) * sigma; - std::shared_ptr> padmm = - std::make_shared>(m_uv_data.vis); - padmm->itermax(state.range(3) + 1) - .gamma(gamma) - .relative_variation(1e-3) - .l2ball_proximal_epsilon(m_epsilon) - // communicator ensuring l1 norm in l1 proximal is global - .l1_proximal_adjoint_space_comm(m_world) - .tight_frame(false) - .l1_proximal_tolerance(1e-2) - .l1_proximal_nu(1) - .l1_proximal_itermax(20) - .l1_proximal_positivity_constraint(true) - .l1_proximal_real_constraint(true) - .residual_tolerance(m_epsilon) - .lagrange_update_scale(0.9) - .nu(1e0) - .Psi(Psi) - .Phi(*m_measurements1); - - std::weak_ptr const padmm_weak(padmm); - padmm->residual_convergence( - factory::l2_convergence_factory(factory::ConvergenceType::mpi_local, padmm_weak)); - padmm->objective_convergence( - factory::l1_convergence_factory(factory::ConvergenceType::mpi_local, padmm_weak)); + // Create the algorithm - has to be done there to reset the internal state. + // If done in the fixture repeats would start at the solution and converge immediately. + + // sopt::wavelets::SARA saraDistr = sopt::wavelets::distribute_sara(m_sara, m_world); + // auto const Psi = + // sopt::linear_transform(saraDistr, m_image.rows(), m_image.cols(), m_world); + // t_real gamma = + // utilities::step_size(m_uv_data.vis, m_measurements1, + // std::make_shared> const>(Psi), + // saraDistr.size()) * + // 1e-3; + // gamma = m_world.all_reduce(gamma, MPI_MAX); + // std::shared_ptr> padmm = + // std::make_shared>(m_uv_data.vis); + // padmm->itermax(state.range(3) + 1) + // .gamma(gamma) + // .relative_variation(1e-3) + // .l2ball_proximal_epsilon(m_epsilon) + // // communicator ensuring l1 norm in l1 proximal is global + // .l1_proximal_adjoint_space_comm(m_world) + // .tight_frame(false) + // .l1_proximal_tolerance(1e-2) + // .l1_proximal_nu(1) + // .l1_proximal_itermax(20) + // .l1_proximal_positivity_constraint(true) + // .l1_proximal_real_constraint(true) + // .residual_tolerance(m_epsilon) + // .lagrange_update_scale(0.9) + // .nu(1e0) + // .Psi(Psi) + // .Phi(*m_measurements1); + // std::weak_ptr const padmm_weak(padmm); + // padmm->residual_convergence( + // factory::l2_convergence_factory(factory::ConvergenceType::mpi_local, + // padmm_weak)); + // padmm->objective_convergence( + // factory::l1_convergence_factory(factory::ConvergenceType::mpi_local, + // padmm_weak)); + + auto const wavelets = factory::wavelet_operator_factory>( + factory::distributed_wavelet_operator::mpi_sara, m_sara, m_imsizey, m_imsizex); + t_real const sigma = + m_world.broadcast(0.016820222945913496) * std::sqrt(2); // see test_parameters file + auto const padmm = factory::padmm_factory>( + factory::algo_distribution::mpi_distributed, m_measurements1, wavelets, m_uv_data, sigma, + m_imsizey, m_imsizex, m_sara.size(), state.range(3) + 1, true, true, false, 1e-3, 1e-2, 50, + 1.0, 1.0); + // Benchmark the application of the algorithm while (state.KeepRunning()) { auto start = std::chrono::high_resolution_clock::now(); @@ -121,43 +135,54 @@ BENCHMARK_DEFINE_F(PadmmFixtureMPI, ApplyAlgo1)(benchmark::State &state) { } BENCHMARK_DEFINE_F(PadmmFixtureMPI, ApplyAlgo3)(benchmark::State &state) { - // Create the algorithm - somehow doesn't work if done in the fixture... - sopt::wavelets::SARA saraDistr = sopt::wavelets::distribute_sara(m_sara, m_world); - auto const Psi = - sopt::linear_transform(saraDistr, m_image.rows(), m_image.cols(), m_world); - t_real gamma = - utilities::step_size(m_uv_data.vis, m_measurements3, - std::make_shared> const>(Psi), - saraDistr.size()) * - 1e-3; - gamma = m_world.all_reduce(gamma, MPI_MAX); - auto sigma = 1.0; - m_epsilon = std::sqrt(2 * m_uv_data.size() + 2 * std::sqrt(4 * m_uv_data.size())) * sigma; - std::shared_ptr> padmm = - std::make_shared>(m_uv_data.vis); - padmm->itermax(state.range(3) + 1) - .gamma(gamma) - .relative_variation(1e-3) - .l2ball_proximal_epsilon(m_epsilon) - // communicator ensuring l1 norm in l1 proximal is global - .l1_proximal_adjoint_space_comm(m_world) - .tight_frame(false) - .l1_proximal_tolerance(1e-2) - .l1_proximal_nu(1) - .l1_proximal_itermax(10) - .l1_proximal_positivity_constraint(true) - .l1_proximal_real_constraint(true) - .residual_tolerance(m_epsilon) - .lagrange_update_scale(0.9) - .nu(1e0) - .Psi(Psi) - .Phi(*m_measurements3); - - std::weak_ptr const padmm_weak(padmm); - padmm->residual_convergence( - factory::l2_convergence_factory(factory::ConvergenceType::mpi_local, padmm_weak)); - padmm->objective_convergence( - factory::l1_convergence_factory(factory::ConvergenceType::mpi_local, padmm_weak)); + // Create the algorithm - has to be done there to reset the internal state. + // If done in the fixture repeats would start at the solution and converge immediately. + + // sopt::wavelets::SARA saraDistr = sopt::wavelets::distribute_sara(m_sara, m_world); + // auto const Psi = + // sopt::linear_transform(saraDistr, m_image.rows(), m_image.cols(), m_world); + // t_real gamma = + // utilities::step_size(m_uv_data.vis, m_measurements3, + // std::make_shared> const>(Psi), + // saraDistr.size()) * + // 1e-3; + // gamma = m_world.all_reduce(gamma, MPI_MAX); + // std::shared_ptr> padmm = + // std::make_shared>(m_uv_data.vis); + // padmm->itermax(state.range(3) + 1) + // .gamma(gamma) + // .relative_variation(1e-3) + // .l2ball_proximal_epsilon(m_epsilon) + // // communicator ensuring l1 norm in l1 proximal is global + // .l1_proximal_adjoint_space_comm(m_world) + // .tight_frame(false) + // .l1_proximal_tolerance(1e-2) + // .l1_proximal_nu(1) + // .l1_proximal_itermax(10) + // .l1_proximal_positivity_constraint(true) + // .l1_proximal_real_constraint(true) + // .residual_tolerance(m_epsilon) + // .lagrange_update_scale(0.9) + // .nu(1e0) + // .Psi(Psi) + // .Phi(*m_measurements3); + // std::weak_ptr const padmm_weak(padmm); + // padmm->residual_convergence( + // factory::l2_convergence_factory(factory::ConvergenceType::mpi_local, + // padmm_weak)); + // padmm->objective_convergence( + // factory::l1_convergence_factory(factory::ConvergenceType::mpi_local, + // padmm_weak)); + + auto const wavelets = factory::wavelet_operator_factory>( + factory::distributed_wavelet_operator::mpi_sara, m_sara, m_imsizey, m_imsizex); + t_real const sigma = + m_world.broadcast(0.016820222945913496) * std::sqrt(2); // see test_parameters file + auto const padmm = factory::padmm_factory>( + factory::algo_distribution::mpi_distributed, m_measurements3, wavelets, m_uv_data, sigma, + m_imsizey, m_imsizex, m_sara.size(), state.range(3) + 1, true, true, false, 1e-3, 1e-2, 50, + 1.0, 1.0); + // Benchmark the application of the algorithm while (state.KeepRunning()) { auto start = std::chrono::high_resolution_clock::now(); @@ -170,8 +195,8 @@ BENCHMARK_DEFINE_F(PadmmFixtureMPI, ApplyAlgo3)(benchmark::State &state) { BENCHMARK_REGISTER_F(PadmmFixtureMPI, ApplyAlgo1) //->Apply(b_utilities::Arguments) - ->Args({1024, static_cast(1e6), 4, 100, 1}) - ->Args({1024, static_cast(1e7), 4, 100, 1}) + ->Args({1024, static_cast(1e6), 4, 10, 1}) + ->Args({1024, static_cast(1e7), 4, 10, 1}) ->UseManualTime() ->MinTime(10.0) ->MinWarmUpTime(5.0) From 86b9c197e4a5a0453c1d059b339e05231bf44389 Mon Sep 17 00:00:00 2001 From: Tuomas Koskela Date: Thu, 7 Nov 2024 09:57:35 +0000 Subject: [PATCH 07/30] Don't use hard coded paths --- cpp/tests/directories.in.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/tests/directories.in.h b/cpp/tests/directories.in.h index e6026b0ec..65f6f9dd9 100644 --- a/cpp/tests/directories.in.h +++ b/cpp/tests/directories.in.h @@ -34,7 +34,7 @@ inline std::string image_filename(std::string const &filename) { } //! Visibility filename inline std::string visibility_filename(std::string const &filename) { - return scratch_filename("vis_" + filename); + return data_filename("vis_" + filename); } //! Specific vla data inline std::string vla_filename(std::string const &filename) { From 01e21281124bd4a14f64bd41397e3ef6b3f172cf Mon Sep 17 00:00:00 2001 From: Tuomas Koskela Date: Thu, 7 Nov 2024 10:01:03 +0000 Subject: [PATCH 08/30] Fix variable names --- cpp/benchmarks/padmm_mpi.cc | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/cpp/benchmarks/padmm_mpi.cc b/cpp/benchmarks/padmm_mpi.cc index f8c5d6bb6..00bc33948 100644 --- a/cpp/benchmarks/padmm_mpi.cc +++ b/cpp/benchmarks/padmm_mpi.cc @@ -72,7 +72,8 @@ class PadmmFixtureMPI : public ::benchmark::Fixture { t_real m_epsilon; t_uint m_kernel; - std::shared_ptr> const> m_measurements; + std::shared_ptr> const> m_measurements1; + std::shared_ptr> const> m_measurements3; std::shared_ptr> m_padmm; }; From f36b91b71bdfbf3f55cd6165065e6fd5a8d1dfe4 Mon Sep 17 00:00:00 2001 From: Tuomas Koskela Date: Fri, 8 Nov 2024 11:55:42 +0000 Subject: [PATCH 09/30] Add some logging to report where vis data is coming from --- cpp/benchmarks/utilities.cc | 3 +++ 1 file changed, 3 insertions(+) diff --git a/cpp/benchmarks/utilities.cc b/cpp/benchmarks/utilities.cc index cdb02fae9..e7fdb779f 100644 --- a/cpp/benchmarks/utilities.cc +++ b/cpp/benchmarks/utilities.cc @@ -7,6 +7,7 @@ #include "purify/operators.h" #include "purify/pfitsio.h" #include +#include "purify/logging.h" using namespace purify; using namespace purify::notinstalled; @@ -102,9 +103,11 @@ utilities::vis_params random_measurements(t_int size, const t_real max_w, const utilities::vis_params uv_data; if (vis_file_str.good()) { + PURIFY_INFO("Reading random visibilities from file", vis_file); uv_data = utilities::read_visibility(vis_file, true); uv_data.units = utilities::vis_units::radians; } else { + PURIFY_INFO("Generating random visibilities and writing to", vis_file); t_real const sigma_m = constant::pi / 3; uv_data = utilities::random_sample_density(size, 0, sigma_m, max_w); uv_data.units = utilities::vis_units::radians; From 645d7f03c78bf84d675c7b32c6daf91d117338c0 Mon Sep 17 00:00:00 2001 From: Tuomas Koskela Date: Fri, 8 Nov 2024 11:56:07 +0000 Subject: [PATCH 10/30] Update measurements the same way as mpi version --- cpp/benchmarks/padmm.cc | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/cpp/benchmarks/padmm.cc b/cpp/benchmarks/padmm.cc index b108c4cb6..c3150936d 100644 --- a/cpp/benchmarks/padmm.cc +++ b/cpp/benchmarks/padmm.cc @@ -22,11 +22,8 @@ class PadmmFixture : public ::benchmark::Fixture { bool newImage = b_utilities::updateImage(state.range(0), m_image, m_imsizex, m_imsizey); // Generating random uv(w) coverage - bool newMeasurements = m_uv_data.size() != state.range(1); - if (newMeasurements) { - t_real const sigma_m = constant::pi / 3; - m_uv_data = utilities::random_sample_density(state.range(1), 0, sigma_m); - } + bool newMeasurements = b_utilities::updateMeasurements(state.range(1), m_uv_data, m_epsilon, + newImage, m_image); bool newKernel = m_kernel != state.range(2); if (newImage || newMeasurements || newKernel) { @@ -56,6 +53,7 @@ class PadmmFixture : public ::benchmark::Fixture { void TearDown(const ::benchmark::State &state) {} + t_real m_epsilon; t_uint m_counter; std::vector> const m_sara{ std::make_tuple("Dirac", 3u), std::make_tuple("DB1", 3u), std::make_tuple("DB2", 3u), @@ -67,11 +65,9 @@ class PadmmFixture : public ::benchmark::Fixture { t_uint m_imsizey; utilities::vis_params m_uv_data; - t_real m_epsilon; t_uint m_kernel; std::shared_ptr> const> m_measurements_transform; - t_real m_gamma; std::shared_ptr> m_padmm; }; @@ -90,7 +86,7 @@ BENCHMARK_DEFINE_F(PadmmFixture, Apply)(benchmark::State &state) { BENCHMARK_REGISTER_F(PadmmFixture, Apply) //->Apply(b_utilities::Arguments) - ->Args({128, 10000, 4, 100}) + ->Args({128, 10000, 4, 10}) ->UseManualTime() ->MinTime(10.0) ->MinWarmUpTime(5.0) From 06f47320349cbf56873a5dfb06ff1ced38940487 Mon Sep 17 00:00:00 2001 From: Tuomas Koskela Date: Fri, 8 Nov 2024 11:56:57 +0000 Subject: [PATCH 11/30] Clarify names, call the right padmm object --- cpp/benchmarks/padmm_mpi.cc | 133 +++++++++--------------------------- 1 file changed, 31 insertions(+), 102 deletions(-) diff --git a/cpp/benchmarks/padmm_mpi.cc b/cpp/benchmarks/padmm_mpi.cc index 00bc33948..309072216 100644 --- a/cpp/benchmarks/padmm_mpi.cc +++ b/cpp/benchmarks/padmm_mpi.cc @@ -11,6 +11,7 @@ #include "purify/operators.h" #include "purify/utilities.h" #include "purify/wavelet_operator_factory.h" +#include "purify/measurement_operator_factory.h" #include #include #include @@ -38,27 +39,30 @@ class PadmmFixtureMPI : public ::benchmark::Fixture { const t_real FoV = 1; // deg const t_real cellsize = FoV / m_imsizex * 60. * 60.; const bool w_term = false; - // algorithm 1 - if (state.range(4) == 1) - m_measurements1 = measurementoperator::init_degrid_operator_2d_mpi>( - m_world, m_uv_data, m_image.rows(), m_image.cols(), cellsize, cellsize, 2, - kernels::kernel::kb, m_kernel, m_kernel, w_term); - // algorithm 3 - if (state.range(4) == 3) - m_measurements3 = measurementoperator::init_degrid_operator_2d>( - m_world, m_uv_data, m_image.rows(), m_image.cols(), cellsize, cellsize, 2, + if (state.range(4) == 1) { + PURIFY_INFO("Using distributed image MPI algorithm"); + m_measurements_distribute_image = factory::measurement_operator_factory>( + factory::distributed_measurement_operator::mpi_distribute_image, + m_uv_data, m_image.rows(), m_image.cols(), cellsize, cellsize, 2, + kernels::kernel::kb, m_kernel, m_kernel, w_term); + } + + if (state.range(4) == 2) { + PURIFY_INFO("Using distributed grid MPI algorithm"); + m_measurements_distribute_grid = factory::measurement_operator_factory>( + factory::distributed_measurement_operator::mpi_distribute_grid, + m_uv_data, m_image.rows(), m_image.cols(), cellsize, cellsize, 2, kernels::kernel::kb, m_kernel, m_kernel, w_term); + } + + m_sigma = 0.016820222945913496 * std::sqrt(2); // see test_parameters file + } void TearDown(const ::benchmark::State &state) {} sopt::mpi::Communicator m_world; - // const sopt::wavelets::SARA m_sara{ - // std::make_tuple("Dirac", 3u), std::make_tuple("DB1", 3u), std::make_tuple("DB2", 3u), - // std::make_tuple("DB3", 3u), std::make_tuple("DB4", 3u), std::make_tuple("DB5", 3u), - // std::make_tuple("DB6", 3u), std::make_tuple("DB7", 3u), std::make_tuple("DB8", 3u)}; - std::vector> const m_sara{ std::make_tuple("Dirac", 3u), std::make_tuple("DB1", 3u), std::make_tuple("DB2", 3u), std::make_tuple("DB3", 3u), std::make_tuple("DB4", 3u), std::make_tuple("DB5", 3u), @@ -70,59 +74,22 @@ class PadmmFixtureMPI : public ::benchmark::Fixture { utilities::vis_params m_uv_data; t_real m_epsilon; + t_real m_sigma; t_uint m_kernel; - std::shared_ptr> const> m_measurements1; - std::shared_ptr> const> m_measurements3; + std::shared_ptr> const> m_measurements_distribute_image; + std::shared_ptr> const> m_measurements_distribute_grid; std::shared_ptr> m_padmm; }; -BENCHMARK_DEFINE_F(PadmmFixtureMPI, ApplyAlgo1)(benchmark::State &state) { +BENCHMARK_DEFINE_F(PadmmFixtureMPI, DistributeImage)(benchmark::State &state) { // Create the algorithm - has to be done there to reset the internal state. // If done in the fixture repeats would start at the solution and converge immediately. - - // sopt::wavelets::SARA saraDistr = sopt::wavelets::distribute_sara(m_sara, m_world); - // auto const Psi = - // sopt::linear_transform(saraDistr, m_image.rows(), m_image.cols(), m_world); - // t_real gamma = - // utilities::step_size(m_uv_data.vis, m_measurements1, - // std::make_shared> const>(Psi), - // saraDistr.size()) * - // 1e-3; - // gamma = m_world.all_reduce(gamma, MPI_MAX); - // std::shared_ptr> padmm = - // std::make_shared>(m_uv_data.vis); - // padmm->itermax(state.range(3) + 1) - // .gamma(gamma) - // .relative_variation(1e-3) - // .l2ball_proximal_epsilon(m_epsilon) - // // communicator ensuring l1 norm in l1 proximal is global - // .l1_proximal_adjoint_space_comm(m_world) - // .tight_frame(false) - // .l1_proximal_tolerance(1e-2) - // .l1_proximal_nu(1) - // .l1_proximal_itermax(20) - // .l1_proximal_positivity_constraint(true) - // .l1_proximal_real_constraint(true) - // .residual_tolerance(m_epsilon) - // .lagrange_update_scale(0.9) - // .nu(1e0) - // .Psi(Psi) - // .Phi(*m_measurements1); - // std::weak_ptr const padmm_weak(padmm); - // padmm->residual_convergence( - // factory::l2_convergence_factory(factory::ConvergenceType::mpi_local, - // padmm_weak)); - // padmm->objective_convergence( - // factory::l1_convergence_factory(factory::ConvergenceType::mpi_local, - // padmm_weak)); - auto const wavelets = factory::wavelet_operator_factory>( factory::distributed_wavelet_operator::mpi_sara, m_sara, m_imsizey, m_imsizex); - t_real const sigma = - m_world.broadcast(0.016820222945913496) * std::sqrt(2); // see test_parameters file - auto const padmm = factory::padmm_factory>( - factory::algo_distribution::mpi_distributed, m_measurements1, wavelets, m_uv_data, sigma, + + m_padmm = factory::padmm_factory>( + factory::algo_distribution::mpi_distributed, m_measurements_distribute_image, wavelets, m_uv_data, m_sigma, m_imsizey, m_imsizex, m_sara.size(), state.range(3) + 1, true, true, false, 1e-3, 1e-2, 50, 1.0, 1.0); @@ -136,52 +103,14 @@ BENCHMARK_DEFINE_F(PadmmFixtureMPI, ApplyAlgo1)(benchmark::State &state) { } } -BENCHMARK_DEFINE_F(PadmmFixtureMPI, ApplyAlgo3)(benchmark::State &state) { +BENCHMARK_DEFINE_F(PadmmFixtureMPI, DistributeGrid)(benchmark::State &state) { // Create the algorithm - has to be done there to reset the internal state. // If done in the fixture repeats would start at the solution and converge immediately. - // sopt::wavelets::SARA saraDistr = sopt::wavelets::distribute_sara(m_sara, m_world); - // auto const Psi = - // sopt::linear_transform(saraDistr, m_image.rows(), m_image.cols(), m_world); - // t_real gamma = - // utilities::step_size(m_uv_data.vis, m_measurements3, - // std::make_shared> const>(Psi), - // saraDistr.size()) * - // 1e-3; - // gamma = m_world.all_reduce(gamma, MPI_MAX); - // std::shared_ptr> padmm = - // std::make_shared>(m_uv_data.vis); - // padmm->itermax(state.range(3) + 1) - // .gamma(gamma) - // .relative_variation(1e-3) - // .l2ball_proximal_epsilon(m_epsilon) - // // communicator ensuring l1 norm in l1 proximal is global - // .l1_proximal_adjoint_space_comm(m_world) - // .tight_frame(false) - // .l1_proximal_tolerance(1e-2) - // .l1_proximal_nu(1) - // .l1_proximal_itermax(10) - // .l1_proximal_positivity_constraint(true) - // .l1_proximal_real_constraint(true) - // .residual_tolerance(m_epsilon) - // .lagrange_update_scale(0.9) - // .nu(1e0) - // .Psi(Psi) - // .Phi(*m_measurements3); - // std::weak_ptr const padmm_weak(padmm); - // padmm->residual_convergence( - // factory::l2_convergence_factory(factory::ConvergenceType::mpi_local, - // padmm_weak)); - // padmm->objective_convergence( - // factory::l1_convergence_factory(factory::ConvergenceType::mpi_local, - // padmm_weak)); - auto const wavelets = factory::wavelet_operator_factory>( factory::distributed_wavelet_operator::mpi_sara, m_sara, m_imsizey, m_imsizex); - t_real const sigma = - m_world.broadcast(0.016820222945913496) * std::sqrt(2); // see test_parameters file auto const padmm = factory::padmm_factory>( - factory::algo_distribution::mpi_distributed, m_measurements3, wavelets, m_uv_data, sigma, + factory::algo_distribution::mpi_distributed, m_measurements_distribute_grid, wavelets, m_uv_data, m_sigma, m_imsizey, m_imsizex, m_sara.size(), state.range(3) + 1, true, true, false, 1e-3, 1e-2, 50, 1.0, 1.0); @@ -195,7 +124,7 @@ BENCHMARK_DEFINE_F(PadmmFixtureMPI, ApplyAlgo3)(benchmark::State &state) { } } -BENCHMARK_REGISTER_F(PadmmFixtureMPI, ApplyAlgo1) +BENCHMARK_REGISTER_F(PadmmFixtureMPI, DistributeImage) //->Apply(b_utilities::Arguments) ->Args({1024, static_cast(1e6), 4, 10, 1}) ->Args({1024, static_cast(1e7), 4, 10, 1}) @@ -205,10 +134,10 @@ BENCHMARK_REGISTER_F(PadmmFixtureMPI, ApplyAlgo1) ->Repetitions(3) //->ReportAggregatesOnly(true) ->Unit(benchmark::kMillisecond); -BENCHMARK_REGISTER_F(PadmmFixtureMPI, ApplyAlgo3) +BENCHMARK_REGISTER_F(PadmmFixtureMPI, DistributeGrid) //->Apply(b_utilities::Arguments) - ->Args({1024, static_cast(1e6), 4, 10, 3}) - ->Args({1024, static_cast(1e7), 4, 10, 3}) + ->Args({1024, static_cast(1e6), 4, 10, 2}) + ->Args({1024, static_cast(1e7), 4, 10, 2}) ->UseManualTime() ->MinTime(10.0) ->MinWarmUpTime(5.0) From 4e9f98271cae3c301184f3ae678071692cce4c74 Mon Sep 17 00:00:00 2001 From: Tuomas Koskela Date: Fri, 8 Nov 2024 12:18:46 +0000 Subject: [PATCH 12/30] Add placeholders to logging command --- cpp/benchmarks/utilities.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cpp/benchmarks/utilities.cc b/cpp/benchmarks/utilities.cc index e7fdb779f..8688a0082 100644 --- a/cpp/benchmarks/utilities.cc +++ b/cpp/benchmarks/utilities.cc @@ -103,11 +103,11 @@ utilities::vis_params random_measurements(t_int size, const t_real max_w, const utilities::vis_params uv_data; if (vis_file_str.good()) { - PURIFY_INFO("Reading random visibilities from file", vis_file); + PURIFY_INFO("Reading random visibilities from file {}", vis_file); uv_data = utilities::read_visibility(vis_file, true); uv_data.units = utilities::vis_units::radians; } else { - PURIFY_INFO("Generating random visibilities and writing to", vis_file); + PURIFY_INFO("Generating random visibilities and writing to {}", vis_file); t_real const sigma_m = constant::pi / 3; uv_data = utilities::random_sample_density(size, 0, sigma_m, max_w); uv_data.units = utilities::vis_units::radians; From 73fd8d8327c3dadf5baa79b1273036b814657b74 Mon Sep 17 00:00:00 2001 From: Tuomas Koskela Date: Fri, 8 Nov 2024 14:47:10 +0000 Subject: [PATCH 13/30] Reduce sopt verbosity --- cpp/benchmarks/main.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/benchmarks/main.cc b/cpp/benchmarks/main.cc index cd7e6888b..f894b7145 100644 --- a/cpp/benchmarks/main.cc +++ b/cpp/benchmarks/main.cc @@ -19,7 +19,7 @@ class NullReporter : public ::benchmark::BenchmarkReporter { // reporter according to the process rank int main(int argc, char const **argv) { - sopt::logging::set_level("debug"); + sopt::logging::set_level("info"); purify::logging::set_level("debug"); #ifdef PURIFY_MPI From e5333f1faed08fed8583690775c6498b1aa0fd56 Mon Sep 17 00:00:00 2001 From: Tuomas Koskela Date: Fri, 8 Nov 2024 14:48:16 +0000 Subject: [PATCH 14/30] Fix bugs and lint. Add smaller test for comparison with serial version --- cpp/benchmarks/padmm_mpi.cc | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/cpp/benchmarks/padmm_mpi.cc b/cpp/benchmarks/padmm_mpi.cc index 309072216..dc753aabf 100644 --- a/cpp/benchmarks/padmm_mpi.cc +++ b/cpp/benchmarks/padmm_mpi.cc @@ -7,11 +7,11 @@ #include "purify/directories.h" #include "purify/distribute.h" #include "purify/logging.h" +#include "purify/measurement_operator_factory.h" #include "purify/mpi_utilities.h" #include "purify/operators.h" #include "purify/utilities.h" #include "purify/wavelet_operator_factory.h" -#include "purify/measurement_operator_factory.h" #include #include #include @@ -42,21 +42,19 @@ class PadmmFixtureMPI : public ::benchmark::Fixture { if (state.range(4) == 1) { PURIFY_INFO("Using distributed image MPI algorithm"); m_measurements_distribute_image = factory::measurement_operator_factory>( - factory::distributed_measurement_operator::mpi_distribute_image, - m_uv_data, m_image.rows(), m_image.cols(), cellsize, cellsize, 2, - kernels::kernel::kb, m_kernel, m_kernel, w_term); + factory::distributed_measurement_operator::mpi_distribute_image, m_uv_data, + m_image.rows(), m_image.cols(), cellsize, cellsize, 2, kernels::kernel::kb, m_kernel, + m_kernel, w_term); } if (state.range(4) == 2) { PURIFY_INFO("Using distributed grid MPI algorithm"); m_measurements_distribute_grid = factory::measurement_operator_factory>( - factory::distributed_measurement_operator::mpi_distribute_grid, - m_uv_data, m_image.rows(), m_image.cols(), cellsize, cellsize, 2, - kernels::kernel::kb, m_kernel, m_kernel, w_term); + factory::distributed_measurement_operator::mpi_distribute_grid, m_uv_data, m_image.rows(), + m_image.cols(), cellsize, cellsize, 2, kernels::kernel::kb, m_kernel, m_kernel, w_term); } m_sigma = 0.016820222945913496 * std::sqrt(2); // see test_parameters file - } void TearDown(const ::benchmark::State &state) {} @@ -89,9 +87,9 @@ BENCHMARK_DEFINE_F(PadmmFixtureMPI, DistributeImage)(benchmark::State &state) { factory::distributed_wavelet_operator::mpi_sara, m_sara, m_imsizey, m_imsizex); m_padmm = factory::padmm_factory>( - factory::algo_distribution::mpi_distributed, m_measurements_distribute_image, wavelets, m_uv_data, m_sigma, - m_imsizey, m_imsizex, m_sara.size(), state.range(3) + 1, true, true, false, 1e-3, 1e-2, 50, - 1.0, 1.0); + factory::algo_distribution::mpi_distributed, m_measurements_distribute_image, wavelets, + m_uv_data, m_sigma, m_imsizey, m_imsizex, m_sara.size(), state.range(3) + 1, true, true, + false, 1e-3, 1e-2, 50, 1.0, 1.0); // Benchmark the application of the algorithm while (state.KeepRunning()) { @@ -106,13 +104,13 @@ BENCHMARK_DEFINE_F(PadmmFixtureMPI, DistributeImage)(benchmark::State &state) { BENCHMARK_DEFINE_F(PadmmFixtureMPI, DistributeGrid)(benchmark::State &state) { // Create the algorithm - has to be done there to reset the internal state. // If done in the fixture repeats would start at the solution and converge immediately. - auto const wavelets = factory::wavelet_operator_factory>( factory::distributed_wavelet_operator::mpi_sara, m_sara, m_imsizey, m_imsizex); - auto const padmm = factory::padmm_factory>( - factory::algo_distribution::mpi_distributed, m_measurements_distribute_grid, wavelets, m_uv_data, m_sigma, - m_imsizey, m_imsizex, m_sara.size(), state.range(3) + 1, true, true, false, 1e-3, 1e-2, 50, - 1.0, 1.0); + + m_padmm = factory::padmm_factory>( + factory::algo_distribution::mpi_distributed, m_measurements_distribute_grid, wavelets, + m_uv_data, m_sigma, m_imsizey, m_imsizex, m_sara.size(), state.range(3) + 1, true, true, + false, 1e-3, 1e-2, 50, 1.0, 1.0); // Benchmark the application of the algorithm while (state.KeepRunning()) { @@ -126,6 +124,7 @@ BENCHMARK_DEFINE_F(PadmmFixtureMPI, DistributeGrid)(benchmark::State &state) { BENCHMARK_REGISTER_F(PadmmFixtureMPI, DistributeImage) //->Apply(b_utilities::Arguments) + ->Args({128, 10000, 4, 10, 1}) ->Args({1024, static_cast(1e6), 4, 10, 1}) ->Args({1024, static_cast(1e7), 4, 10, 1}) ->UseManualTime() @@ -136,6 +135,7 @@ BENCHMARK_REGISTER_F(PadmmFixtureMPI, DistributeImage) BENCHMARK_REGISTER_F(PadmmFixtureMPI, DistributeGrid) //->Apply(b_utilities::Arguments) + ->Args({128, 10000, 4, 10, 2}) ->Args({1024, static_cast(1e6), 4, 10, 2}) ->Args({1024, static_cast(1e7), 4, 10, 2}) ->UseManualTime() From f8ba8c09b9d3fedd469f058e93ba23630931cff0 Mon Sep 17 00:00:00 2001 From: Tuomas Koskela Date: Fri, 8 Nov 2024 15:25:05 +0000 Subject: [PATCH 15/30] Add info to factory functions --- cpp/purify/algorithm_factory.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/cpp/purify/algorithm_factory.h b/cpp/purify/algorithm_factory.h index a279ebf6f..c776b5f05 100644 --- a/cpp/purify/algorithm_factory.h +++ b/cpp/purify/algorithm_factory.h @@ -69,6 +69,7 @@ padmm_factory(const algo_distribution dist, throw std::runtime_error( "l1 proximal not consistent: You say you are using a tight frame, but you have more than " "one wavelet basis."); + PURIFY_INFO("Constructing PADMM algorithm"); auto epsilon = std::sqrt(2 * uv_data.size() + 2 * std::sqrt(4 * uv_data.size())) * sigma; auto padmm = std::make_shared(uv_data.vis); padmm->itermax(max_iterations) @@ -171,6 +172,7 @@ fb_factory(const algo_distribution dist, throw std::runtime_error( "l1 proximal not consistent: You say you are using a tight frame, but you have more than " "one wavelet basis."); + PURIFY_INFO("Constructing Forward Backward algorithm"); auto fb = std::make_shared(uv_data.vis); fb->itermax(max_iterations) .gamma(reg_parameter) @@ -264,6 +266,7 @@ primaldual_factory( const t_real relative_variation = 1e-3, const t_real residual_tolerance_scaling = 1, const t_real op_norm = 1) { typedef typename Algorithm::Scalar t_scalar; + PURIFY_INFO("Constructing Primal Dual algorithm") auto epsilon = std::sqrt(2 * uv_data.size() + 2 * std::sqrt(4 * uv_data.size())) * sigma; auto primaldual = std::make_shared(uv_data.vis); primaldual->itermax(max_iterations) From 54ae8dda2de936dab65ea47908c0e0559753d2b1 Mon Sep 17 00:00:00 2001 From: Tuomas Koskela Date: Fri, 8 Nov 2024 15:32:10 +0000 Subject: [PATCH 16/30] Add FB algorithms, rename to be more descriptive --- cpp/benchmarks/CMakeLists.txt | 4 +- .../{padmm_mpi.cc => algorithms_mpi.cc} | 77 +++++++++++++++++-- cpp/purify/algorithm_factory.h | 2 +- 3 files changed, 75 insertions(+), 8 deletions(-) rename cpp/benchmarks/{padmm_mpi.cc => algorithms_mpi.cc} (63%) diff --git a/cpp/benchmarks/CMakeLists.txt b/cpp/benchmarks/CMakeLists.txt index d457b2271..6901524f5 100644 --- a/cpp/benchmarks/CMakeLists.txt +++ b/cpp/benchmarks/CMakeLists.txt @@ -25,8 +25,8 @@ if(dompi) add_executable(mpi_benchmark_MO_wproj main.cc utilities.cc measurement_operator_wproj.cc) target_link_libraries(mpi_benchmark_MO_wproj ${MPI_LIBRARIES} benchmark libpurify) #target_include_directories(mpi_benchmark_MO_wproj PUBLIC "${PROJECT_SOURCE_DIR}/cpp" "${CMAKE_CURRENT_BINARY_DIR}/include") - add_executable(mpi_benchmark_PADMM main.cc utilities.cc padmm_mpi.cc) - target_link_libraries(mpi_benchmark_PADMM ${MPI_LIBRARIES} benchmark libpurify) + add_executable(mpi_benchmark_algorithms main.cc utilities.cc algorithms_mpi.cc) + target_link_libraries(mpi_benchmark_algorithms ${MPI_LIBRARIES} benchmark libpurify) #target_include_directories(mpi_benchmark_PADMM PUBLIC "${PROJECT_SOURCE_DIR}/cpp" "${CMAKE_CURRENT_BINARY_DIR}/include") add_executable(mpi_benchmark_WLO main.cc utilities.cc wavelet_operator_mpi.cc) target_link_libraries(mpi_benchmark_WLO ${MPI_LIBRARIES} benchmark libpurify) diff --git a/cpp/benchmarks/padmm_mpi.cc b/cpp/benchmarks/algorithms_mpi.cc similarity index 63% rename from cpp/benchmarks/padmm_mpi.cc rename to cpp/benchmarks/algorithms_mpi.cc index dc753aabf..437213768 100644 --- a/cpp/benchmarks/padmm_mpi.cc +++ b/cpp/benchmarks/algorithms_mpi.cc @@ -22,7 +22,7 @@ using namespace purify; -class PadmmFixtureMPI : public ::benchmark::Fixture { +class AlgoFixtureMPI : public ::benchmark::Fixture { public: void SetUp(const ::benchmark::State &state) { // Reading image from file and update related quantities @@ -78,9 +78,10 @@ class PadmmFixtureMPI : public ::benchmark::Fixture { std::shared_ptr> const> m_measurements_distribute_image; std::shared_ptr> const> m_measurements_distribute_grid; std::shared_ptr> m_padmm; + std::shared_ptr> m_fb; }; -BENCHMARK_DEFINE_F(PadmmFixtureMPI, DistributeImage)(benchmark::State &state) { +BENCHMARK_DEFINE_F(AlgoFixtureMPI, PadmmDistributeImage)(benchmark::State &state) { // Create the algorithm - has to be done there to reset the internal state. // If done in the fixture repeats would start at the solution and converge immediately. auto const wavelets = factory::wavelet_operator_factory>( @@ -101,7 +102,7 @@ BENCHMARK_DEFINE_F(PadmmFixtureMPI, DistributeImage)(benchmark::State &state) { } } -BENCHMARK_DEFINE_F(PadmmFixtureMPI, DistributeGrid)(benchmark::State &state) { +BENCHMARK_DEFINE_F(AlgoFixtureMPI, PadmmDistributeGrid)(benchmark::State &state) { // Create the algorithm - has to be done there to reset the internal state. // If done in the fixture repeats would start at the solution and converge immediately. auto const wavelets = factory::wavelet_operator_factory>( @@ -122,7 +123,73 @@ BENCHMARK_DEFINE_F(PadmmFixtureMPI, DistributeGrid)(benchmark::State &state) { } } -BENCHMARK_REGISTER_F(PadmmFixtureMPI, DistributeImage) +BENCHMARK_DEFINE_F(AlgoFixtureMPI, FbDistributeImage)(benchmark::State &state) { + // Create the algorithm - has to be done there to reset the internal state. + // If done in the fixture repeats would start at the solution and converge immediately. + auto const wavelets = factory::wavelet_operator_factory>( + factory::distributed_wavelet_operator::mpi_sara, m_sara, m_imsizey, m_imsizex); + + t_real const beta = m_sigma * m_sigma; + t_real const gamma = 0.0001; + + m_fb = factory::fb_factory>( + factory::algo_distribution::mpi_serial, m_measurements_distribute_image, wavelets, + m_uv_data, m_sigma, beta, gamma, m_imsizey, m_imsizex, m_sara.size(), state.range(3) + 1, + true, true, false, 1e-3, 1e-2, 50, 1.0); + + // Benchmark the application of the algorithm + while (state.KeepRunning()) { + auto start = std::chrono::high_resolution_clock::now(); + auto result = (*m_fb)(); + auto end = std::chrono::high_resolution_clock::now(); + std::cout << "Converged? " << result.good << " , niters = " << result.niters << std::endl; + state.SetIterationTime(b_utilities::duration(start, end, m_world)); + } +} + +BENCHMARK_DEFINE_F(AlgoFixtureMPI, FbDistributeGrid)(benchmark::State &state) { + // Create the algorithm - has to be done there to reset the internal state. + // If done in the fixture repeats would start at the solution and converge immediately. + auto const wavelets = factory::wavelet_operator_factory>( + factory::distributed_wavelet_operator::mpi_sara, m_sara, m_imsizey, m_imsizex); + + t_real const beta = m_sigma * m_sigma; + t_real const gamma = 0.0001; + + m_fb = factory::fb_factory>( + factory::algo_distribution::mpi_serial, m_measurements_distribute_grid, wavelets, + m_uv_data, m_sigma, beta, gamma, m_imsizey, m_imsizex, m_sara.size(), state.range(3) + 1, + true, true, false, 1e-3, 1e-2, 50, 1.0); + + // Benchmark the application of the algorithm + while (state.KeepRunning()) { + auto start = std::chrono::high_resolution_clock::now(); + auto result = (*m_fb)(); + auto end = std::chrono::high_resolution_clock::now(); + std::cout << "Converged? " << result.good << " , niters = " << result.niters << std::endl; + state.SetIterationTime(b_utilities::duration(start, end, m_world)); + } +} + +BENCHMARK_REGISTER_F(AlgoFixtureMPI, FbDistributeImage) + //->Apply(b_utilities::Arguments) + ->Args({128, 10000, 4, 10, 1}) + ->UseManualTime() + ->MinTime(10.0) + ->MinWarmUpTime(5.0) + ->Repetitions(3) //->ReportAggregatesOnly(true) + ->Unit(benchmark::kMillisecond); + +BENCHMARK_REGISTER_F(AlgoFixtureMPI, FbDistributeGrid) + //->Apply(b_utilities::Arguments) + ->Args({128, 10000, 4, 10, 2}) + ->UseManualTime() + ->MinTime(10.0) + ->MinWarmUpTime(5.0) + ->Repetitions(3) //->ReportAggregatesOnly(true) + ->Unit(benchmark::kMillisecond); + +BENCHMARK_REGISTER_F(AlgoFixtureMPI, PadmmDistributeImage) //->Apply(b_utilities::Arguments) ->Args({128, 10000, 4, 10, 1}) ->Args({1024, static_cast(1e6), 4, 10, 1}) @@ -133,7 +200,7 @@ BENCHMARK_REGISTER_F(PadmmFixtureMPI, DistributeImage) ->Repetitions(3) //->ReportAggregatesOnly(true) ->Unit(benchmark::kMillisecond); -BENCHMARK_REGISTER_F(PadmmFixtureMPI, DistributeGrid) +BENCHMARK_REGISTER_F(AlgoFixtureMPI, PadmmDistributeGrid) //->Apply(b_utilities::Arguments) ->Args({128, 10000, 4, 10, 2}) ->Args({1024, static_cast(1e6), 4, 10, 2}) diff --git a/cpp/purify/algorithm_factory.h b/cpp/purify/algorithm_factory.h index c776b5f05..5d8b78c42 100644 --- a/cpp/purify/algorithm_factory.h +++ b/cpp/purify/algorithm_factory.h @@ -266,7 +266,7 @@ primaldual_factory( const t_real relative_variation = 1e-3, const t_real residual_tolerance_scaling = 1, const t_real op_norm = 1) { typedef typename Algorithm::Scalar t_scalar; - PURIFY_INFO("Constructing Primal Dual algorithm") + PURIFY_INFO("Constructing Primal Dual algorithm"); auto epsilon = std::sqrt(2 * uv_data.size() + 2 * std::sqrt(4 * uv_data.size())) * sigma; auto primaldual = std::make_shared(uv_data.vis); primaldual->itermax(max_iterations) From 6c539de962560bfe64f0b1c533f9132bfc4d6216 Mon Sep 17 00:00:00 2001 From: Tuomas Koskela Date: Wed, 13 Nov 2024 12:18:21 +0000 Subject: [PATCH 17/30] Help purify look up the ONNX runtime --- CMakeLists.txt | 1 - cmake_files/LookUpONNXRT.cmake | 54 ++++++++++++++++++++++++++++++++++ cmake_files/dependencies.cmake | 3 +- 3 files changed, 56 insertions(+), 2 deletions(-) create mode 100644 cmake_files/LookUpONNXRT.cmake diff --git a/CMakeLists.txt b/CMakeLists.txt index dae5d5c85..047da06f6 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -63,6 +63,5 @@ endif() add_subdirectory(cpp) - # Exports Purify so other packages can access it include(export_purify) diff --git a/cmake_files/LookUpONNXRT.cmake b/cmake_files/LookUpONNXRT.cmake new file mode 100644 index 000000000..a02721674 --- /dev/null +++ b/cmake_files/LookUpONNXRT.cmake @@ -0,0 +1,54 @@ + +find_package(onnxruntime QUIET) + +if(${onnxruntime_FOUND}) + find_path(onnxruntime_INCLUDE_DIR NAMES onnxruntime_cxx_api.h + HINTS + ENV onnxruntime_ROOT + ENV onnxruntime_ROOT_DIR + ${CMAKE_INSTALL_PREFIX}/include + ${KDE4_INCLUDE_DIR} + PATH_SUFFIXES onnxruntime + ) + include(FindPackageHandleStandardArgs) + find_package_handle_standard_args(onnxruntime DEFAULT_MSG onnxruntime_INCLUDE_DIR TRUE) + set(onnxruntime_LIBRARIES onnxruntime::onnxruntime) + get_filename_component(onnxruntime_INSTALL_PREFIX "${onnxruntime_INCLUDE_DIR}/../../" ABSOLUTE) + find_library(onnxruntime_LIBRARY onnxruntime PATHS "${onnxruntime_INSTALL_PREFIX}/lib") + set_target_properties(${onnxruntime_LIBRARIES} PROPERTIES IMPORTED_LOCATION "${onnxruntime_LIBRARY}") + set_target_properties(${onnxruntime_LIBRARIES} PROPERTIES INTERFACE_INCLUDE_DIRECTORIES "${onnxruntime_INCLUDE_DIR}") +endif() + +if(NOT ${onnxruntime_FOUND}) + message(STATUS "ONNXrt not found. Attempt to install...") + EXECUTE_PROCESS( COMMAND uname -m COMMAND tr -d '\n' OUTPUT_VARIABLE ARCH ) + message( STATUS "Detected architecture: ${ARCH}" ) + if ("${ARCH}" STREQUAL "x86_64") + set(ARCH "x64") + endif() + set(ORT_VERSION "1.16.3") + set(ORT_URL_BASE "https://github.com/microsoft/onnxruntime/releases/download") + set(ORT_TARNAME "onnxruntime-linux-${ARCH}-${ORT_VERSION}") + set(ORT_DEST "${CMAKE_CURRENT_BINARY_DIR}/external/${ORT_TARNAME}.tgz") + set(ORT_URL "${ORT_URL_BASE}/v${ORT_VERSION}/${ORT_TARNAME}.tgz") + # https://cmake.org/cmake/help/latest/policy/CMP0135.html + # + # CMP0135 is for solving re-building and re-downloading. + # The NEW policy suppresses warnings for some CMake versions. + if(POLICY CMP0135) + cmake_policy(SET CMP0135 NEW) + endif() + set(onnxruntime_DIR "${CMAKE_INSTALL_PREFIX}/external") + file(DOWNLOAD ${ORT_URL} ${ORT_DEST} + EXPECTED_HASH SHA256=b072f989d6315ac0e22dcb4771b083c5156d974a3496ac3504c77f4062eb248e + ) + execute_process(COMMAND ${CMAKE_COMMAND} -E tar xzf ${ORT_DEST}) + execute_process(COMMAND ${CMAKE_COMMAND} -E copy_directory ${ORT_TARNAME} ${onnxruntime_DIR}) + set(onnxruntime_INCLUDE_DIR "${onnxruntime_DIR}/include") + set(onnxruntime_LIBRARY_DIR "${onnxruntime_DIR}/lib") + set(onnxruntime_LIBRARIES onnxruntime::onnxruntime) + add_library(${onnxruntime_LIBRARIES} SHARED IMPORTED GLOBAL) + set_target_properties(${onnxruntime_LIBRARIES} PROPERTIES IMPORTED_LOCATION ${onnxruntime_LIBRARY_DIR}/libonnxruntime.so) + set(onnxruntime_FOUND TRUE) +endif() + diff --git a/cmake_files/dependencies.cmake b/cmake_files/dependencies.cmake index 54d9c9cee..12492c7f2 100644 --- a/cmake_files/dependencies.cmake +++ b/cmake_files/dependencies.cmake @@ -27,7 +27,8 @@ find_package(Boost COMPONENTS system filesystem REQUIRED) find_package(yaml-cpp REQUIRED) if (onnxrt) - find_package(onnxruntime REQUIRED) + #find_package(onnxruntime REQUIRED) + include(LookUpONNXRT) endif() find_package(sopt REQUIRED) From 9cab4f0f35f146c4ed6e6362e84c5ba85933be9a Mon Sep 17 00:00:00 2001 From: Tuomas Koskela Date: Wed, 13 Nov 2024 12:18:43 +0000 Subject: [PATCH 18/30] Install data files for TF models --- cmake_files/export_purify.cmake | 1 + 1 file changed, 1 insertion(+) diff --git a/cmake_files/export_purify.cmake b/cmake_files/export_purify.cmake index 1b1c3c713..2abfc0bca 100644 --- a/cmake_files/export_purify.cmake +++ b/cmake_files/export_purify.cmake @@ -38,3 +38,4 @@ install(FILES ) install(EXPORT PurifyTargets DESTINATION share/cmake/purify COMPONENT dev) +install(DIRECTORY "${PROJECT_SOURCE_DIR}/data" DESTINATION "${CMAKE_INSTALL_PREFIX}") From 45d7bb495626072564f4a3c5135e805b5f3e52ba Mon Sep 17 00:00:00 2001 From: Tuomas Koskela Date: Wed, 13 Nov 2024 12:19:11 +0000 Subject: [PATCH 19/30] Add FB benchmark that uses onnx rt --- cpp/benchmarks/algorithms_mpi.cc | 63 ++++++++++++++++++++++++++++---- 1 file changed, 55 insertions(+), 8 deletions(-) diff --git a/cpp/benchmarks/algorithms_mpi.cc b/cpp/benchmarks/algorithms_mpi.cc index 437213768..8ce3a7dea 100644 --- a/cpp/benchmarks/algorithms_mpi.cc +++ b/cpp/benchmarks/algorithms_mpi.cc @@ -171,21 +171,68 @@ BENCHMARK_DEFINE_F(AlgoFixtureMPI, FbDistributeGrid)(benchmark::State &state) { } } +#ifdef PURIFY_ONNXRT +BENCHMARK_DEFINE_F(AlgoFixtureMPI, FbOnnxDistributeImage)(benchmark::State &state) { + // Create the algorithm - has to be done there to reset the internal state. + // If done in the fixture repeats would start at the solution and converge immediately. + + // TODO: Wavelets are constructed but not used in the factory method + auto const wavelets = factory::wavelet_operator_factory>( + factory::distributed_wavelet_operator::serial, m_sara, m_imsizey, m_imsizex); + + t_real const beta = m_sigma * m_sigma; + t_real const gamma = 0.0001; + + std::string tf_model_path = + purify::notinstalled::data_directory() + "/models/snr_15_model_dynamic.onnx"; + + m_fb = factory::fb_factory>( + factory::algo_distribution::mpi_serial, m_measurements_distribute_image, wavelets, + m_uv_data, m_sigma, beta, gamma, m_imsizey, m_imsizex, m_sara.size(), state.range(3) + 1, + true, true, false, 1e-3, 1e-2, 50, 1.0, tf_model_path, factory::g_proximal_type::TFGProximal); + + // Benchmark the application of the algorithm + while (state.KeepRunning()) { + auto start = std::chrono::high_resolution_clock::now(); + auto result = (*m_fb)(); + auto end = std::chrono::high_resolution_clock::now(); + std::cout << "Converged? " << result.good << " , niters = " << result.niters << std::endl; + state.SetIterationTime(b_utilities::duration(start, end, m_world)); + } +} + +BENCHMARK_REGISTER_F(AlgoFixtureMPI, FbOnnxDistributeImage) + //->Apply(b_utilities::Arguments) + ->Args({128, 10000, 4, 10, 1}) + ->Args({1024, static_cast(1e6), 4, 10, 1}) + ->Args({1024, static_cast(1e7), 4, 10, 1}) + ->UseManualTime() + ->MinTime(9.0) + ->MinWarmUpTime(1.0) + ->Repetitions(3) //->ReportAggregatesOnly(true) + ->Unit(benchmark::kMillisecond); + +#endif + BENCHMARK_REGISTER_F(AlgoFixtureMPI, FbDistributeImage) //->Apply(b_utilities::Arguments) ->Args({128, 10000, 4, 10, 1}) + ->Args({1024, static_cast(1e6), 4, 10, 1}) + ->Args({1024, static_cast(1e7), 4, 10, 1}) ->UseManualTime() - ->MinTime(10.0) - ->MinWarmUpTime(5.0) + ->MinTime(9.0) + ->MinWarmUpTime(1.0) ->Repetitions(3) //->ReportAggregatesOnly(true) ->Unit(benchmark::kMillisecond); BENCHMARK_REGISTER_F(AlgoFixtureMPI, FbDistributeGrid) //->Apply(b_utilities::Arguments) ->Args({128, 10000, 4, 10, 2}) + ->Args({1024, static_cast(1e6), 4, 10, 2}) + ->Args({1024, static_cast(1e7), 4, 10, 2}) ->UseManualTime() - ->MinTime(10.0) - ->MinWarmUpTime(5.0) + ->MinTime(9.0) + ->MinWarmUpTime(1.0) ->Repetitions(3) //->ReportAggregatesOnly(true) ->Unit(benchmark::kMillisecond); @@ -195,8 +242,8 @@ BENCHMARK_REGISTER_F(AlgoFixtureMPI, PadmmDistributeImage) ->Args({1024, static_cast(1e6), 4, 10, 1}) ->Args({1024, static_cast(1e7), 4, 10, 1}) ->UseManualTime() - ->MinTime(10.0) - ->MinWarmUpTime(5.0) + ->MinTime(9.0) + ->MinWarmUpTime(1.0) ->Repetitions(3) //->ReportAggregatesOnly(true) ->Unit(benchmark::kMillisecond); @@ -206,7 +253,7 @@ BENCHMARK_REGISTER_F(AlgoFixtureMPI, PadmmDistributeGrid) ->Args({1024, static_cast(1e6), 4, 10, 2}) ->Args({1024, static_cast(1e7), 4, 10, 2}) ->UseManualTime() - ->MinTime(10.0) - ->MinWarmUpTime(5.0) + ->MinTime(9.0) + ->MinWarmUpTime(1.0) ->Repetitions(3) //->ReportAggregatesOnly(true) ->Unit(benchmark::kMillisecond); From 1aa88613bd4340af76840f082da560e855cd63fa Mon Sep 17 00:00:00 2001 From: Tuomas Koskela Date: Thu, 14 Nov 2024 15:26:26 +0000 Subject: [PATCH 20/30] Linting --- cpp/benchmarks/algorithms_mpi.cc | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/cpp/benchmarks/algorithms_mpi.cc b/cpp/benchmarks/algorithms_mpi.cc index 8ce3a7dea..f4b932f81 100644 --- a/cpp/benchmarks/algorithms_mpi.cc +++ b/cpp/benchmarks/algorithms_mpi.cc @@ -133,9 +133,9 @@ BENCHMARK_DEFINE_F(AlgoFixtureMPI, FbDistributeImage)(benchmark::State &state) { t_real const gamma = 0.0001; m_fb = factory::fb_factory>( - factory::algo_distribution::mpi_serial, m_measurements_distribute_image, wavelets, - m_uv_data, m_sigma, beta, gamma, m_imsizey, m_imsizex, m_sara.size(), state.range(3) + 1, - true, true, false, 1e-3, 1e-2, 50, 1.0); + factory::algo_distribution::mpi_serial, m_measurements_distribute_image, wavelets, m_uv_data, + m_sigma, beta, gamma, m_imsizey, m_imsizex, m_sara.size(), state.range(3) + 1, true, true, + false, 1e-3, 1e-2, 50, 1.0); // Benchmark the application of the algorithm while (state.KeepRunning()) { @@ -157,9 +157,9 @@ BENCHMARK_DEFINE_F(AlgoFixtureMPI, FbDistributeGrid)(benchmark::State &state) { t_real const gamma = 0.0001; m_fb = factory::fb_factory>( - factory::algo_distribution::mpi_serial, m_measurements_distribute_grid, wavelets, - m_uv_data, m_sigma, beta, gamma, m_imsizey, m_imsizex, m_sara.size(), state.range(3) + 1, - true, true, false, 1e-3, 1e-2, 50, 1.0); + factory::algo_distribution::mpi_serial, m_measurements_distribute_grid, wavelets, m_uv_data, + m_sigma, beta, gamma, m_imsizey, m_imsizex, m_sara.size(), state.range(3) + 1, true, true, + false, 1e-3, 1e-2, 50, 1.0); // Benchmark the application of the algorithm while (state.KeepRunning()) { @@ -187,9 +187,9 @@ BENCHMARK_DEFINE_F(AlgoFixtureMPI, FbOnnxDistributeImage)(benchmark::State &stat purify::notinstalled::data_directory() + "/models/snr_15_model_dynamic.onnx"; m_fb = factory::fb_factory>( - factory::algo_distribution::mpi_serial, m_measurements_distribute_image, wavelets, - m_uv_data, m_sigma, beta, gamma, m_imsizey, m_imsizex, m_sara.size(), state.range(3) + 1, - true, true, false, 1e-3, 1e-2, 50, 1.0, tf_model_path, factory::g_proximal_type::TFGProximal); + factory::algo_distribution::mpi_serial, m_measurements_distribute_image, wavelets, m_uv_data, + m_sigma, beta, gamma, m_imsizey, m_imsizex, m_sara.size(), state.range(3) + 1, true, true, + false, 1e-3, 1e-2, 50, 1.0, tf_model_path, factory::g_proximal_type::TFGProximal); // Benchmark the application of the algorithm while (state.KeepRunning()) { From 21ff63f519b0f767d4ef810999f7d5e29a372b51 Mon Sep 17 00:00:00 2001 From: Tuomas Koskela Date: Thu, 14 Nov 2024 15:27:33 +0000 Subject: [PATCH 21/30] More linting --- cpp/benchmarks/main.cc | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/cpp/benchmarks/main.cc b/cpp/benchmarks/main.cc index f894b7145..ab9a95437 100644 --- a/cpp/benchmarks/main.cc +++ b/cpp/benchmarks/main.cc @@ -1,9 +1,9 @@ #include "purify/config.h" -#include "purify/logging.h" #include +#include "purify/logging.h" +#include #include #include -#include // This reporter does nothing. // We can use it to disable output from all but the root process @@ -18,10 +18,9 @@ class NullReporter : public ::benchmark::BenchmarkReporter { // The main is rewritten to allow for MPI initializing and for selecting a // reporter according to the process rank int main(int argc, char const **argv) { + sopt::logging::set_level("info"); + purify::logging::set_level("debug"); - sopt::logging::set_level("info"); - purify::logging::set_level("debug"); - #ifdef PURIFY_MPI auto const session = sopt::mpi::init(argc, argv); auto const world = sopt::mpi::Communicator::World(); From 7677b1f082dd7fcc9acb37305f961ef438dee473 Mon Sep 17 00:00:00 2001 From: Tuomas Koskela Date: Thu, 14 Nov 2024 15:28:29 +0000 Subject: [PATCH 22/30] Linting++ --- cpp/benchmarks/padmm.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cpp/benchmarks/padmm.cc b/cpp/benchmarks/padmm.cc index c3150936d..4d0aba175 100644 --- a/cpp/benchmarks/padmm.cc +++ b/cpp/benchmarks/padmm.cc @@ -22,8 +22,8 @@ class PadmmFixture : public ::benchmark::Fixture { bool newImage = b_utilities::updateImage(state.range(0), m_image, m_imsizex, m_imsizey); // Generating random uv(w) coverage - bool newMeasurements = b_utilities::updateMeasurements(state.range(1), m_uv_data, m_epsilon, - newImage, m_image); + bool newMeasurements = + b_utilities::updateMeasurements(state.range(1), m_uv_data, m_epsilon, newImage, m_image); bool newKernel = m_kernel != state.range(2); if (newImage || newMeasurements || newKernel) { From ec1febd076901e04f0dd5a956859362da116dfba Mon Sep 17 00:00:00 2001 From: Tuomas Koskela Date: Thu, 14 Nov 2024 15:30:50 +0000 Subject: [PATCH 23/30] One more for the linter --- cpp/benchmarks/utilities.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/benchmarks/utilities.cc b/cpp/benchmarks/utilities.cc index 8688a0082..3d2385355 100644 --- a/cpp/benchmarks/utilities.cc +++ b/cpp/benchmarks/utilities.cc @@ -3,11 +3,11 @@ #include #include "purify/directories.h" #include "purify/distribute.h" +#include "purify/logging.h" #include "purify/mpi_utilities.h" #include "purify/operators.h" #include "purify/pfitsio.h" #include -#include "purify/logging.h" using namespace purify; using namespace purify::notinstalled; From 16b0face58ac4c59a920a91ef54c3b8a1cc23ee7 Mon Sep 17 00:00:00 2001 From: Tuomas Koskela Date: Thu, 12 Dec 2024 12:58:24 +0000 Subject: [PATCH 24/30] Remove obsolete reference to notinstalled namespace --- cpp/benchmarks/algorithms_mpi.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/benchmarks/algorithms_mpi.cc b/cpp/benchmarks/algorithms_mpi.cc index f4b932f81..e06c508a2 100644 --- a/cpp/benchmarks/algorithms_mpi.cc +++ b/cpp/benchmarks/algorithms_mpi.cc @@ -184,7 +184,7 @@ BENCHMARK_DEFINE_F(AlgoFixtureMPI, FbOnnxDistributeImage)(benchmark::State &stat t_real const gamma = 0.0001; std::string tf_model_path = - purify::notinstalled::data_directory() + "/models/snr_15_model_dynamic.onnx"; + purify::data_directory() + "/models/snr_15_model_dynamic.onnx"; m_fb = factory::fb_factory>( factory::algo_distribution::mpi_serial, m_measurements_distribute_image, wavelets, m_uv_data, From 2810d43f3d401217c8961ca095e7a6d31a7ae1be Mon Sep 17 00:00:00 2001 From: Tuomas Koskela Date: Thu, 12 Dec 2024 13:00:39 +0000 Subject: [PATCH 25/30] Use correct models directory --- cpp/benchmarks/algorithms_mpi.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/benchmarks/algorithms_mpi.cc b/cpp/benchmarks/algorithms_mpi.cc index e06c508a2..6bc9911fb 100644 --- a/cpp/benchmarks/algorithms_mpi.cc +++ b/cpp/benchmarks/algorithms_mpi.cc @@ -184,7 +184,7 @@ BENCHMARK_DEFINE_F(AlgoFixtureMPI, FbOnnxDistributeImage)(benchmark::State &stat t_real const gamma = 0.0001; std::string tf_model_path = - purify::data_directory() + "/models/snr_15_model_dynamic.onnx"; + purify::models_directory() + "/snr_15_model_dynamic.onnx"; m_fb = factory::fb_factory>( factory::algo_distribution::mpi_serial, m_measurements_distribute_image, wavelets, m_uv_data, From ad473fce4a3e2e9e55a6937279b2e08e4b768150 Mon Sep 17 00:00:00 2001 From: Tuomas Koskela Date: Thu, 12 Dec 2024 13:15:32 +0000 Subject: [PATCH 26/30] remove onnxrt lookup, should be provided by sopt --- cmake_files/LookUpONNXRT.cmake | 54 ---------------------------------- 1 file changed, 54 deletions(-) delete mode 100644 cmake_files/LookUpONNXRT.cmake diff --git a/cmake_files/LookUpONNXRT.cmake b/cmake_files/LookUpONNXRT.cmake deleted file mode 100644 index a02721674..000000000 --- a/cmake_files/LookUpONNXRT.cmake +++ /dev/null @@ -1,54 +0,0 @@ - -find_package(onnxruntime QUIET) - -if(${onnxruntime_FOUND}) - find_path(onnxruntime_INCLUDE_DIR NAMES onnxruntime_cxx_api.h - HINTS - ENV onnxruntime_ROOT - ENV onnxruntime_ROOT_DIR - ${CMAKE_INSTALL_PREFIX}/include - ${KDE4_INCLUDE_DIR} - PATH_SUFFIXES onnxruntime - ) - include(FindPackageHandleStandardArgs) - find_package_handle_standard_args(onnxruntime DEFAULT_MSG onnxruntime_INCLUDE_DIR TRUE) - set(onnxruntime_LIBRARIES onnxruntime::onnxruntime) - get_filename_component(onnxruntime_INSTALL_PREFIX "${onnxruntime_INCLUDE_DIR}/../../" ABSOLUTE) - find_library(onnxruntime_LIBRARY onnxruntime PATHS "${onnxruntime_INSTALL_PREFIX}/lib") - set_target_properties(${onnxruntime_LIBRARIES} PROPERTIES IMPORTED_LOCATION "${onnxruntime_LIBRARY}") - set_target_properties(${onnxruntime_LIBRARIES} PROPERTIES INTERFACE_INCLUDE_DIRECTORIES "${onnxruntime_INCLUDE_DIR}") -endif() - -if(NOT ${onnxruntime_FOUND}) - message(STATUS "ONNXrt not found. Attempt to install...") - EXECUTE_PROCESS( COMMAND uname -m COMMAND tr -d '\n' OUTPUT_VARIABLE ARCH ) - message( STATUS "Detected architecture: ${ARCH}" ) - if ("${ARCH}" STREQUAL "x86_64") - set(ARCH "x64") - endif() - set(ORT_VERSION "1.16.3") - set(ORT_URL_BASE "https://github.com/microsoft/onnxruntime/releases/download") - set(ORT_TARNAME "onnxruntime-linux-${ARCH}-${ORT_VERSION}") - set(ORT_DEST "${CMAKE_CURRENT_BINARY_DIR}/external/${ORT_TARNAME}.tgz") - set(ORT_URL "${ORT_URL_BASE}/v${ORT_VERSION}/${ORT_TARNAME}.tgz") - # https://cmake.org/cmake/help/latest/policy/CMP0135.html - # - # CMP0135 is for solving re-building and re-downloading. - # The NEW policy suppresses warnings for some CMake versions. - if(POLICY CMP0135) - cmake_policy(SET CMP0135 NEW) - endif() - set(onnxruntime_DIR "${CMAKE_INSTALL_PREFIX}/external") - file(DOWNLOAD ${ORT_URL} ${ORT_DEST} - EXPECTED_HASH SHA256=b072f989d6315ac0e22dcb4771b083c5156d974a3496ac3504c77f4062eb248e - ) - execute_process(COMMAND ${CMAKE_COMMAND} -E tar xzf ${ORT_DEST}) - execute_process(COMMAND ${CMAKE_COMMAND} -E copy_directory ${ORT_TARNAME} ${onnxruntime_DIR}) - set(onnxruntime_INCLUDE_DIR "${onnxruntime_DIR}/include") - set(onnxruntime_LIBRARY_DIR "${onnxruntime_DIR}/lib") - set(onnxruntime_LIBRARIES onnxruntime::onnxruntime) - add_library(${onnxruntime_LIBRARIES} SHARED IMPORTED GLOBAL) - set_target_properties(${onnxruntime_LIBRARIES} PROPERTIES IMPORTED_LOCATION ${onnxruntime_LIBRARY_DIR}/libonnxruntime.so) - set(onnxruntime_FOUND TRUE) -endif() - From ed0bfd83948d92439b778c59e7085bd614dac715 Mon Sep 17 00:00:00 2001 From: Tuomas Koskela Date: Thu, 12 Dec 2024 13:16:08 +0000 Subject: [PATCH 27/30] Remove duplicate --- cmake_files/dependencies.cmake | 4 ---- 1 file changed, 4 deletions(-) diff --git a/cmake_files/dependencies.cmake b/cmake_files/dependencies.cmake index 2d8c66a56..7df76fa9c 100644 --- a/cmake_files/dependencies.cmake +++ b/cmake_files/dependencies.cmake @@ -65,10 +65,6 @@ if(tests) # Adds ctest include(AddCatchTest) endif() -if(examples) - find_package(TIFF REQUIRED) -endif() - if(tests OR examples) file(COPY data DESTINATION .) endif() From 430a15535e5a330999455df9bbe78a50929c9ae5 Mon Sep 17 00:00:00 2001 From: Tuomas Koskela Date: Thu, 12 Dec 2024 16:28:13 +0000 Subject: [PATCH 28/30] Add bigger problems and more runtime --- cpp/benchmarks/algorithms_mpi.cc | 30 ++++++++++++++++++++---------- 1 file changed, 20 insertions(+), 10 deletions(-) diff --git a/cpp/benchmarks/algorithms_mpi.cc b/cpp/benchmarks/algorithms_mpi.cc index 6bc9911fb..b2a3aee33 100644 --- a/cpp/benchmarks/algorithms_mpi.cc +++ b/cpp/benchmarks/algorithms_mpi.cc @@ -206,9 +206,11 @@ BENCHMARK_REGISTER_F(AlgoFixtureMPI, FbOnnxDistributeImage) ->Args({128, 10000, 4, 10, 1}) ->Args({1024, static_cast(1e6), 4, 10, 1}) ->Args({1024, static_cast(1e7), 4, 10, 1}) + ->Args({1024, static_cast(1e8), 4, 10, 1}) + ->Args({1024, static_cast(1e9), 4, 10, 1}) ->UseManualTime() - ->MinTime(9.0) - ->MinWarmUpTime(1.0) + ->MinTime(120.0) + ->MinWarmUpTime(10.0) ->Repetitions(3) //->ReportAggregatesOnly(true) ->Unit(benchmark::kMillisecond); @@ -219,9 +221,11 @@ BENCHMARK_REGISTER_F(AlgoFixtureMPI, FbDistributeImage) ->Args({128, 10000, 4, 10, 1}) ->Args({1024, static_cast(1e6), 4, 10, 1}) ->Args({1024, static_cast(1e7), 4, 10, 1}) + ->Args({1024, static_cast(1e8), 4, 10, 1}) + ->Args({1024, static_cast(1e9), 4, 10, 1}) ->UseManualTime() - ->MinTime(9.0) - ->MinWarmUpTime(1.0) + ->MinTime(120.0) + ->MinWarmUpTime(10.0) ->Repetitions(3) //->ReportAggregatesOnly(true) ->Unit(benchmark::kMillisecond); @@ -230,9 +234,11 @@ BENCHMARK_REGISTER_F(AlgoFixtureMPI, FbDistributeGrid) ->Args({128, 10000, 4, 10, 2}) ->Args({1024, static_cast(1e6), 4, 10, 2}) ->Args({1024, static_cast(1e7), 4, 10, 2}) + ->Args({1024, static_cast(1e8), 4, 10, 1}) + ->Args({1024, static_cast(1e9), 4, 10, 1}) ->UseManualTime() - ->MinTime(9.0) - ->MinWarmUpTime(1.0) + ->MinTime(120.0) + ->MinWarmUpTime(10.0) ->Repetitions(3) //->ReportAggregatesOnly(true) ->Unit(benchmark::kMillisecond); @@ -241,9 +247,11 @@ BENCHMARK_REGISTER_F(AlgoFixtureMPI, PadmmDistributeImage) ->Args({128, 10000, 4, 10, 1}) ->Args({1024, static_cast(1e6), 4, 10, 1}) ->Args({1024, static_cast(1e7), 4, 10, 1}) + ->Args({1024, static_cast(1e8), 4, 10, 1}) + ->Args({1024, static_cast(1e9), 4, 10, 1}) ->UseManualTime() - ->MinTime(9.0) - ->MinWarmUpTime(1.0) + ->MinTime(120.0) + ->MinWarmUpTime(10.0) ->Repetitions(3) //->ReportAggregatesOnly(true) ->Unit(benchmark::kMillisecond); @@ -252,8 +260,10 @@ BENCHMARK_REGISTER_F(AlgoFixtureMPI, PadmmDistributeGrid) ->Args({128, 10000, 4, 10, 2}) ->Args({1024, static_cast(1e6), 4, 10, 2}) ->Args({1024, static_cast(1e7), 4, 10, 2}) + ->Args({1024, static_cast(1e8), 4, 10, 1}) + ->Args({1024, static_cast(1e9), 4, 10, 1}) ->UseManualTime() - ->MinTime(9.0) - ->MinWarmUpTime(1.0) + ->MinTime(120.0) + ->MinWarmUpTime(10.0) ->Repetitions(3) //->ReportAggregatesOnly(true) ->Unit(benchmark::kMillisecond); From f5daf32a09571a2e71d13424d7387ddb73598664 Mon Sep 17 00:00:00 2001 From: Tuomas Koskela Date: Fri, 13 Dec 2024 16:22:25 +0000 Subject: [PATCH 29/30] Linting --- cpp/benchmarks/algorithms_mpi.cc | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/cpp/benchmarks/algorithms_mpi.cc b/cpp/benchmarks/algorithms_mpi.cc index b2a3aee33..cf97b72d5 100644 --- a/cpp/benchmarks/algorithms_mpi.cc +++ b/cpp/benchmarks/algorithms_mpi.cc @@ -183,8 +183,7 @@ BENCHMARK_DEFINE_F(AlgoFixtureMPI, FbOnnxDistributeImage)(benchmark::State &stat t_real const beta = m_sigma * m_sigma; t_real const gamma = 0.0001; - std::string tf_model_path = - purify::models_directory() + "/snr_15_model_dynamic.onnx"; + std::string tf_model_path = purify::models_directory() + "/snr_15_model_dynamic.onnx"; m_fb = factory::fb_factory>( factory::algo_distribution::mpi_serial, m_measurements_distribute_image, wavelets, m_uv_data, From 152b5183ab3137a5dcd9085d7d3447e9b75b9811 Mon Sep 17 00:00:00 2001 From: Tuomas Koskela Date: Fri, 13 Dec 2024 17:20:41 +0000 Subject: [PATCH 30/30] refactor + add serial fb algorithms --- cpp/benchmarks/algorithms.cc | 156 +++++++++++++++++++++++++++++++++++ cpp/benchmarks/padmm.cc | 96 --------------------- 2 files changed, 156 insertions(+), 96 deletions(-) create mode 100644 cpp/benchmarks/algorithms.cc delete mode 100644 cpp/benchmarks/padmm.cc diff --git a/cpp/benchmarks/algorithms.cc b/cpp/benchmarks/algorithms.cc new file mode 100644 index 000000000..4c20ff445 --- /dev/null +++ b/cpp/benchmarks/algorithms.cc @@ -0,0 +1,156 @@ +#include "purify/config.h" +#include "purify/types.h" +#include +#include +#include "benchmarks/utilities.h" +#include "purify/algorithm_factory.h" +#include "purify/directories.h" +#include "purify/measurement_operator_factory.h" +#include "purify/operators.h" +#include "purify/utilities.h" +#include "purify/wavelet_operator_factory.h" +#include +#include +#include +#include +#include + +using namespace purify; + +class AlgoFixture : public ::benchmark::Fixture { + public: + void SetUp(const ::benchmark::State &state) { + // Reading image from file and update related quantities + bool newImage = b_utilities::updateImage(state.range(0), m_image, m_imsizex, m_imsizey); + + // Generating random uv(w) coverage + bool newMeasurements = + b_utilities::updateMeasurements(state.range(1), m_uv_data, m_epsilon, newImage, m_image); + + bool newKernel = m_kernel != state.range(2); + + m_kernel = state.range(2); + // creating the measurement operator + const t_real FoV = 1; // deg + const t_real cellsize = FoV / m_imsizex * 60. * 60.; + const bool w_term = false; + m_measurements_transform = factory::measurement_operator_factory>( + factory::distributed_measurement_operator::serial, m_uv_data, m_imsizey, m_imsizex, + cellsize, cellsize, 2, kernels::kernel::kb, m_kernel, m_kernel, w_term); + + t_real const m_sigma = 0.016820222945913496 * std::sqrt(2); // see test_parameters file + } + + void TearDown(const ::benchmark::State &state) {} + + t_real m_epsilon; + t_uint m_counter; + t_real m_sigma; + std::vector> const m_sara{ + std::make_tuple("Dirac", 3u), std::make_tuple("DB1", 3u), std::make_tuple("DB2", 3u), + std::make_tuple("DB3", 3u), std::make_tuple("DB4", 3u), std::make_tuple("DB5", 3u), + std::make_tuple("DB6", 3u), std::make_tuple("DB7", 3u), std::make_tuple("DB8", 3u)}; + + Image m_image; + t_uint m_imsizex; + t_uint m_imsizey; + + utilities::vis_params m_uv_data; + + t_uint m_kernel; + std::shared_ptr> const> m_measurements_transform; + std::shared_ptr> m_padmm; + std::shared_ptr> m_fb; +}; + +BENCHMARK_DEFINE_F(AlgoFixture, Padmm)(benchmark::State &state) { + // Benchmark the application of the algorithm + auto const wavelets = factory::wavelet_operator_factory>( + factory::distributed_wavelet_operator::serial, m_sara, m_imsizey, m_imsizex); + + m_padmm = factory::padmm_factory>( + factory::algo_distribution::serial, m_measurements_transform, wavelets, m_uv_data, m_sigma, + m_imsizey, m_imsizex, m_sara.size(), state.range(3) + 1, true, true, false, 1e-3, 1e-2, 50, + 1.0, 1.0); + + while (state.KeepRunning()) { + auto start = std::chrono::high_resolution_clock::now(); + (*m_padmm)(); + auto end = std::chrono::high_resolution_clock::now(); + state.SetIterationTime(b_utilities::duration(start, end)); + } +} + +BENCHMARK_DEFINE_F(AlgoFixture, ForwardBackward)(benchmark::State &state) { + // Benchmark the application of the algorithm + auto const wavelets = factory::wavelet_operator_factory>( + factory::distributed_wavelet_operator::serial, m_sara, m_imsizey, m_imsizex); + + t_real const beta = m_sigma * m_sigma; + t_real const gamma = 0.0001; + + m_fb = factory::fb_factory>( + factory::algo_distribution::serial, m_measurements_transform, wavelets, m_uv_data, m_sigma, + beta, gamma, m_imsizey, m_imsizex, m_sara.size(), state.range(3) + 1, true, true, false, 1e-3, + 1e-2, 50, 1.0); + + while (state.KeepRunning()) { + auto start = std::chrono::high_resolution_clock::now(); + (*m_fb)(); + auto end = std::chrono::high_resolution_clock::now(); + state.SetIterationTime(b_utilities::duration(start, end)); + } +} + +#ifdef PURIFY_ONNXRT +BENCHMARK_DEFINE_F(AlgoFixture, ForwardBackwardOnnx)(benchmark::State &state) { + // Benchmark the application of the algorithm + auto const wavelets = factory::wavelet_operator_factory>( + factory::distributed_wavelet_operator::serial, m_sara, m_imsizey, m_imsizex); + + t_real const beta = m_sigma * m_sigma; + t_real const gamma = 0.0001; + std::string tf_model_path = purify::models_directory() + "/snr_15_model_dynamic.onnx"; + + m_fb = factory::fb_factory>( + factory::algo_distribution::serial, m_measurements_transform, wavelets, m_uv_data, m_sigma, + beta, gamma, m_imsizey, m_imsizex, m_sara.size(), state.range(3) + 1, true, true, false, 1e-3, + 1e-2, 50, 1.0, tf_model_path, factory::g_proximal_type::TFGProximal); + + while (state.KeepRunning()) { + auto start = std::chrono::high_resolution_clock::now(); + (*m_fb)(); + auto end = std::chrono::high_resolution_clock::now(); + state.SetIterationTime(b_utilities::duration(start, end)); + } +} + +BENCHMARK_REGISTER_F(AlgoFixture, ForwardBackwardOnnx) + //->Apply(b_utilities::Arguments) + ->Args({128, 10000, 4, 10}) + ->UseManualTime() + ->MinTime(10.0) + ->MinWarmUpTime(5.0) + ->Repetitions(3) //->ReportAggregatesOnly(true) + ->Unit(benchmark::kMillisecond); +#endif + +BENCHMARK_REGISTER_F(AlgoFixture, Padmm) + //->Apply(b_utilities::Arguments) + ->Args({128, 10000, 4, 10}) + ->UseManualTime() + ->MinTime(10.0) + ->MinWarmUpTime(5.0) + ->Repetitions(3) //->ReportAggregatesOnly(true) + ->Unit(benchmark::kMillisecond); + +BENCHMARK_REGISTER_F(AlgoFixture, ForwardBackward) + //->Apply(b_utilities::Arguments) + ->Args({128, 10000, 4, 10}) + ->UseManualTime() + ->MinTime(10.0) + ->MinWarmUpTime(5.0) + ->Repetitions(3) //->ReportAggregatesOnly(true) + ->Unit(benchmark::kMillisecond); + +BENCHMARK_MAIN(); diff --git a/cpp/benchmarks/padmm.cc b/cpp/benchmarks/padmm.cc deleted file mode 100644 index 4d0aba175..000000000 --- a/cpp/benchmarks/padmm.cc +++ /dev/null @@ -1,96 +0,0 @@ -#include "purify/config.h" -#include "purify/types.h" -#include -#include -#include "benchmarks/utilities.h" -#include "purify/algorithm_factory.h" -#include "purify/operators.h" -#include "purify/utilities.h" -#include "purify/wavelet_operator_factory.h" -#include -#include -#include -#include -#include - -using namespace purify; - -class PadmmFixture : public ::benchmark::Fixture { - public: - void SetUp(const ::benchmark::State &state) { - // Reading image from file and update related quantities - bool newImage = b_utilities::updateImage(state.range(0), m_image, m_imsizex, m_imsizey); - - // Generating random uv(w) coverage - bool newMeasurements = - b_utilities::updateMeasurements(state.range(1), m_uv_data, m_epsilon, newImage, m_image); - - bool newKernel = m_kernel != state.range(2); - if (newImage || newMeasurements || newKernel) { - m_kernel = state.range(2); - // creating the measurement operator - const t_real FoV = 1; // deg - const t_real cellsize = FoV / m_imsizex * 60. * 60.; - const bool w_term = false; - m_measurements_transform = measurementoperator::init_degrid_operator_2d>( - m_uv_data, m_imsizey, m_imsizex, cellsize, cellsize, 2, kernels::kernel::kb, m_kernel, - m_kernel, w_term); - - const t_uint imsizex = m_imsizex; - const t_uint imsizey = m_imsizey; - - auto const wavelets = factory::wavelet_operator_factory>( - factory::distributed_wavelet_operator::serial, m_sara, m_imsizey, m_imsizex); - - t_real const sigma = 0.016820222945913496 * std::sqrt(2); // see test_parameters file - - m_padmm = factory::padmm_factory>( - factory::algo_distribution::serial, m_measurements_transform, wavelets, m_uv_data, sigma, - m_imsizey, m_imsizex, m_sara.size(), state.range(3) + 1, true, true, false, 1e-3, 1e-2, - 50, 1.0, 1.0); - } - } - - void TearDown(const ::benchmark::State &state) {} - - t_real m_epsilon; - t_uint m_counter; - std::vector> const m_sara{ - std::make_tuple("Dirac", 3u), std::make_tuple("DB1", 3u), std::make_tuple("DB2", 3u), - std::make_tuple("DB3", 3u), std::make_tuple("DB4", 3u), std::make_tuple("DB5", 3u), - std::make_tuple("DB6", 3u), std::make_tuple("DB7", 3u), std::make_tuple("DB8", 3u)}; - - Image m_image; - t_uint m_imsizex; - t_uint m_imsizey; - - utilities::vis_params m_uv_data; - - t_uint m_kernel; - std::shared_ptr> const> m_measurements_transform; - std::shared_ptr> m_padmm; -}; - -BENCHMARK_DEFINE_F(PadmmFixture, Apply)(benchmark::State &state) { - // Benchmark the application of the algorithm - while (state.KeepRunning()) { - auto start = std::chrono::high_resolution_clock::now(); - (*m_padmm)(); - auto end = std::chrono::high_resolution_clock::now(); - state.SetIterationTime(b_utilities::duration(start, end)); - } - - // state.SetBytesProcessed(int64_t(state.iterations()) * (state.range(1) + m_imsizey * m_imsizex) - // * sizeof(t_complex)); -} - -BENCHMARK_REGISTER_F(PadmmFixture, Apply) - //->Apply(b_utilities::Arguments) - ->Args({128, 10000, 4, 10}) - ->UseManualTime() - ->MinTime(10.0) - ->MinWarmUpTime(5.0) - ->Repetitions(3) //->ReportAggregatesOnly(true) - ->Unit(benchmark::kMillisecond); - -BENCHMARK_MAIN();