diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 716e8b2524..60359cf0ed 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -27,7 +27,7 @@ concurrency: jobs: sharding_tests: - name: Sharding Tests - Julia ${{ matrix.version }} - ${{ matrix.os }} - ${{ matrix.arch }} + name: Sharding Tests - Julia ${{ matrix.version }} - ${{ matrix.os }} runs-on: ${{ matrix.os }} timeout-minutes: 120 strategy: @@ -35,29 +35,28 @@ jobs: matrix: version: - '1.10' - os: + os: + - ubuntu-22.04-arm - ubuntu-latest - arch: - - x64 + - macos-latest steps: + - uses: actions/checkout@v4 + - uses: julia-actions/setup-julia@v2 + with: + version: ${{ matrix.version }} + - uses: julia-actions/cache@v2 - run: | touch LocalPreferences.toml echo "[Reactant]" >> LocalPreferences.toml echo "xla_runtime = \"IFRT\"" >> LocalPreferences.toml - cat LocalPreferences.toml - - uses: actions/checkout@v4 - - uses: julia-actions/setup-julia@v2 - with: - version: ${{ matrix.version }} - arch: ${{ matrix.arch }} - - uses: julia-actions/cache@v2 + cat LocalPreferences.toml - uses: julia-actions/julia-buildpkg@v1 - uses: julia-actions/julia-runtest@v1 env: + XLA_FLAGS: "--xla_force_host_platform_device_count=4" JULIA_DEBUG: "Reactant, Reactant_jll" - REACTANT_TEST: true TEST_GROUP: "sharding" mpi_tripolar: @@ -110,31 +109,6 @@ jobs: env: TEST_GROUP: "turbulence_closures" - reactant: - name: Reactant - Julia ${{ matrix.version }} - ${{ matrix.os }} - ${{ matrix.arch }} - runs-on: ${{ matrix.os }} - timeout-minutes: 120 - strategy: - fail-fast: false - matrix: - version: - - '1.10' - os: - - ubuntu-latest - arch: - - x64 - steps: - - uses: actions/checkout@v4 - - uses: julia-actions/setup-julia@v2 - with: - version: ${{ matrix.version }} - arch: ${{ matrix.arch }} - - uses: julia-actions/cache@v2 - - uses: julia-actions/julia-buildpkg@v1 - - uses: julia-actions/julia-runtest@v1 - env: - TEST_GROUP: "reactant" - metal: name: Metal - Julia ${{ matrix.version }} - ${{ matrix.os }} - ${{ matrix.arch }} runs-on: ${{ matrix.os }} diff --git a/ext/OceananigansReactantExt/Fields.jl b/ext/OceananigansReactantExt/Fields.jl index d3076fecc3..1d9f364641 100644 --- a/ext/OceananigansReactantExt/Fields.jl +++ b/ext/OceananigansReactantExt/Fields.jl @@ -9,6 +9,7 @@ using Oceananigans.Fields: Field, interior using KernelAbstractions: @index, @kernel import Oceananigans.Fields: set_to_field!, set_to_function!, set! +import Oceananigans.DistributedComputations: reconstruct_global_field, synchronize_communication! import ..OceananigansReactantExt: deconcretize import ..Grids: ReactantGrid @@ -17,6 +18,8 @@ import ..Grids: ShardedGrid const ReactantField{LX, LY, LZ, O} = Field{LX, LY, LZ, O, <:ReactantGrid} const ShardedDistributedField{LX, LY, LZ, O} = Field{LX, LY, LZ, O, <:ShardedGrid} +reconstruct_global_field(field::ShardedDistributedField) = field + deconcretize(field::Field{LX, LY, LZ}) where {LX, LY, LZ} = Field{LX, LY, LZ}(field.grid, deconcretize(field.data), @@ -57,6 +60,9 @@ end # keepin it simple set_to_field!(u::ReactantField, v::ReactantField) = @jit _set_to_field!(u, v) +# No need to synchronize -> it should be implicit +synchronize_communication!(::ShardedDistributedField) = nothing + function set_to_function!(u::ShardedDistributedField, f) grid = u.grid arch = grid.architecture diff --git a/ext/OceananigansReactantExt/Grids/sharded_grids.jl b/ext/OceananigansReactantExt/Grids/sharded_grids.jl index 7b4559815b..a5d184d558 100644 --- a/ext/OceananigansReactantExt/Grids/sharded_grids.jl +++ b/ext/OceananigansReactantExt/Grids/sharded_grids.jl @@ -152,7 +152,7 @@ function Oceananigans.LatitudeLongitudeGrid(arch::ShardedDistributed, z, # Intentionally not sharded Δxᶜᶜᵃ, Δxᶠᶜᵃ, Δxᶜᶠᵃ, Δxᶠᶠᵃ, Δyᶠᶜᵃ, Δyᶜᶠᵃ, - Azᶜᶜᵃ, Azᶠᶜᵃ, Azᶜᶠᵃ, Azᶠᶠᵃ, + Azᶜᶜᵃ, Azᶠᶜᵃ, Azᶜᶠᵃ, Azᶠᶠᵃ, grid.radius) end diff --git a/ext/OceananigansReactantExt/Models.jl b/ext/OceananigansReactantExt/Models.jl index 64a5fff6a2..37d9b17ff5 100644 --- a/ext/OceananigansReactantExt/Models.jl +++ b/ext/OceananigansReactantExt/Models.jl @@ -2,20 +2,24 @@ module Models import Oceananigans -import Oceananigans.Models: initialization_update_state! -import Oceananigans.Models.HydrostaticFreeSurfaceModels.SplitExplicitFreeSurfaces: maybe_extend_halos, FixedSubstepNumber -import Oceananigans: initialize! - using Oceananigans.Architectures: ReactantState using Oceananigans.DistributedComputations: Distributed using Oceananigans.Models.HydrostaticFreeSurfaceModels: initialize_free_surface!, HydrostaticFreeSurfaceModel using ..TimeSteppers: ReactantModel using ..Grids: ReactantGrid, ReactantImmersedBoundaryGrid +using ..Grids: ShardedGrid, ShardedDistributed + +import Oceananigans.Models.HydrostaticFreeSurfaceModels.SplitExplicitFreeSurfaces: maybe_extend_halos, FixedSubstepNumber +import Oceananigans: initialize! +import Oceananigans.Models: + initialization_update_state!, + complete_communication_and_compute_buffer!, + interior_tendency_kernel_parameters const ReactantHFSM{TS, E} = Union{ HydrostaticFreeSurfaceModel{TS, E, <:ReactantState}, - HydrostaticFreeSurfaceModel{TS, E, <:Distributed{<:ReactantState}}, + HydrostaticFreeSurfaceModel{TS, E, <:ShardedDistributed}, } initialize_immersed_boundary_grid!(grid) = nothing @@ -46,4 +50,8 @@ function initialize!(model::ReactantHFSM) return nothing end +# Undo all the pipelining for a `ShardedDistributed` architecture +complete_communication_and_compute_buffer!(model, ::ShardedGrid, ::ShardedDistributed) = nothing +interior_tendency_kernel_parameters(::ShardedDistributed, grid) = :xyz + end # module diff --git a/src/OrthogonalSphericalShellGrids/distributed_tripolar_grid.jl b/src/OrthogonalSphericalShellGrids/distributed_tripolar_grid.jl index 5fde541587..4149c12ea2 100644 --- a/src/OrthogonalSphericalShellGrids/distributed_tripolar_grid.jl +++ b/src/OrthogonalSphericalShellGrids/distributed_tripolar_grid.jl @@ -19,10 +19,8 @@ import Oceananigans.Fields: Field, validate_indices, validate_boundary_condition const DistributedTripolarGrid{FT, TX, TY, TZ, CZ, CC, FC, CF, FF, Arch} = OrthogonalSphericalShellGrid{FT, TX, TY, TZ, CZ, <:Tripolar, CC, FC, CF, FF, <:Distributed{<:Union{CPU, GPU}}} -const DistributedTripolarGridOfSomeKind = Union{ - DistributedTripolarGrid, - ImmersedBoundaryGrid{<:Any, <:Any, <:Any, <:Any, <:DistributedTripolarGrid} -} +const MPITripolarGrid{FT, TX, TY, TZ, CZ, CC, FC, CF, FF, Arch} = OrthogonalSphericalShellGrid{FT, TX, TY, TZ, CZ, <:Tripolar, CC, FC, CF, FF, <:Distributed{<:Union{CPU, GPU}}} +const MPITripolarGridOfSomeKind = Union{MPITripolarGrid, ImmersedBoundaryGrid{<:Any, <:Any, <:Any, <:Any, <:MPITripolarGrid}} """ TripolarGrid(arch::Distributed, FT::DataType = Float64; halo = (4, 4, 4), kwargs...) @@ -234,7 +232,7 @@ end # a distributed `TripolarGrid` needs a `ZipperBoundaryCondition` for the north boundary # only on the last rank function regularize_field_boundary_conditions(bcs::FieldBoundaryConditions, - grid::DistributedTripolarGridOfSomeKind, + grid::MPITripolarGridOfSomeKind, field_name::Symbol, prognostic_names=nothing) @@ -274,7 +272,7 @@ end # Extension of the constructor for a `Field` on a `TRG` grid. We assumes that the north boundary is a zipper # with a sign that depends on the location of the field (revert the value of the halos if on edges, keep it if on nodes or centers) -function Field(loc::Tuple{<:LX, <:LY, <:LZ}, grid::DistributedTripolarGridOfSomeKind, data, old_bcs, indices::Tuple, op, status) where {LX, LY, LZ} +function Field(loc::Tuple{<:LX, <:LY, <:LZ}, grid::MPITripolarGridOfSomeKind, data, old_bcs, indices::Tuple, op, status) where {LX, LY, LZ} arch = architecture(grid) yrank = arch.local_index[2] - 1 @@ -327,7 +325,7 @@ function Field(loc::Tuple{<:LX, <:LY, <:LZ}, grid::DistributedTripolarGridOfSome end # Reconstruction the global tripolar grid for visualization purposes -function reconstruct_global_grid(grid::DistributedTripolarGrid) +function reconstruct_global_grid(grid::MPITripolarGrid) arch = grid.architecture @@ -354,8 +352,7 @@ function reconstruct_global_grid(grid::DistributedTripolarGrid) z) end -function with_halo(new_halo, old_grid::DistributedTripolarGrid) - +function with_halo(new_halo, old_grid::MPITripolarGrid) arch = old_grid.architecture n = size(old_grid) diff --git a/src/OrthogonalSphericalShellGrids/distributed_zipper.jl b/src/OrthogonalSphericalShellGrids/distributed_zipper.jl index 2f5dc1c9cc..27e9757d12 100644 --- a/src/OrthogonalSphericalShellGrids/distributed_zipper.jl +++ b/src/OrthogonalSphericalShellGrids/distributed_zipper.jl @@ -4,6 +4,7 @@ using Oceananigans.BoundaryConditions: fill_open_boundary_regions!, DistributedCommunication using Oceananigans.DistributedComputations: cooperative_waitall!, + AsynchronousDistributed, recv_from_buffers!, fill_corners!, loc_id, @@ -59,7 +60,7 @@ end return nothing end -function fill_halo_regions!(c::OffsetArray, bcs, indices, loc, grid::DistributedTripolarGridOfSomeKind, buffers, args...; +function fill_halo_regions!(c::OffsetArray, bcs, indices, loc, grid::MPITripolarGridOfSomeKind, buffers, args...; only_local_halos=false, fill_boundary_normal_velocities=true, kwargs...) if fill_boundary_normal_velocities @@ -91,26 +92,28 @@ function fill_halo_regions!(c::OffsetArray, bcs, indices, loc, grid::Distributed return nothing end -function synchronize_communication!(field::Field{<:Any, <:Any, <:Any, <:Any, <:DistributedTripolarGridOfSomeKind}) +function synchronize_communication!(field::Field{<:Any, <:Any, <:Any, <:Any, <:MPITripolarGridOfSomeKind}) arch = architecture(field.grid) - # Wait for outstanding requests - if !isempty(arch.mpi_requests) - cooperative_waitall!(arch.mpi_requests) + if arch isa AsynchronousDistributed # Otherwise no need to synchonize + # Wait for outstanding requests + if !isempty(arch.mpi_requests) + cooperative_waitall!(arch.mpi_requests) + + # Reset MPI tag + arch.mpi_tag[] = 0 - # Reset MPI tag - arch.mpi_tag[] = 0 + # Reset MPI requests + empty!(arch.mpi_requests) + end - # Reset MPI requests - empty!(arch.mpi_requests) - end - - recv_from_buffers!(field.data, field.communication_buffers, field.grid) + recv_from_buffers!(field.data, field.communication_buffers, field.grid) - north_bc = field.boundary_conditions.north - instantiated_location = map(instantiate, location(field)) + north_bc = field.boundary_conditions.north + instantiated_location = map(instantiate, location(field)) - switch_north_halos!(field, north_bc, field.grid, instantiated_location) + switch_north_halos!(field, north_bc, field.grid, instantiated_location) + end return nothing end \ No newline at end of file diff --git a/src/OrthogonalSphericalShellGrids/distributed_zipper_north_tags.jl b/src/OrthogonalSphericalShellGrids/distributed_zipper_north_tags.jl index 51f8d66596..99447c3d2d 100644 --- a/src/OrthogonalSphericalShellGrids/distributed_zipper_north_tags.jl +++ b/src/OrthogonalSphericalShellGrids/distributed_zipper_north_tags.jl @@ -11,7 +11,7 @@ sides = (:west, :east, :south, :north, :southwest, :southeast, :northwest, :nor side_id = Dict(side => n-1 for (n, side) in enumerate(sides)) # Change these and we are golden! -function north_recv_tag(arch, ::DistributedTripolarGridOfSomeKind, location) +function north_recv_tag(arch, ::MPITripolarGridOfSomeKind, location) field_id = string(arch.mpi_tag[], pad=ID_DIGITS) loc_digit = string(loc_id(location...), pad=ID_DIGITS) last_rank = arch.local_index[2] == ranks(arch)[2] @@ -19,7 +19,7 @@ function north_recv_tag(arch, ::DistributedTripolarGridOfSomeKind, location) return parse(Int, field_id * loc_digit * side_digit) end -function north_send_tag(arch, ::DistributedTripolarGridOfSomeKind, location) +function north_send_tag(arch, ::MPITripolarGridOfSomeKind, location) field_id = string(arch.mpi_tag[], pad=ID_DIGITS) loc_digit = string(loc_id(location...), pad=ID_DIGITS) last_rank = arch.local_index[2] == ranks(arch)[2] @@ -27,7 +27,7 @@ function north_send_tag(arch, ::DistributedTripolarGridOfSomeKind, location) return parse(Int, field_id * loc_digit * side_digit) end -function northwest_recv_tag(arch, ::DistributedTripolarGridOfSomeKind, location) +function northwest_recv_tag(arch, ::MPITripolarGridOfSomeKind, location) field_id = string(arch.mpi_tag[], pad=ID_DIGITS) loc_digit = string(loc_id(location...), pad=ID_DIGITS) last_rank = arch.local_index[2] == ranks(arch)[2] @@ -35,7 +35,7 @@ function northwest_recv_tag(arch, ::DistributedTripolarGridOfSomeKind, location) return parse(Int, field_id * loc_digit * side_digit) end -function northwest_send_tag(arch, ::DistributedTripolarGridOfSomeKind, location) +function northwest_send_tag(arch, ::MPITripolarGridOfSomeKind, location) field_id = string(arch.mpi_tag[], pad=ID_DIGITS) loc_digit = string(loc_id(location...), pad=ID_DIGITS) last_rank = arch.local_index[2] == ranks(arch)[2] @@ -43,7 +43,7 @@ function northwest_send_tag(arch, ::DistributedTripolarGridOfSomeKind, location) return parse(Int, field_id * loc_digit * side_digit) end -function northeast_recv_tag(arch, ::DistributedTripolarGridOfSomeKind, location) +function northeast_recv_tag(arch, ::MPITripolarGridOfSomeKind, location) field_id = string(arch.mpi_tag[], pad=ID_DIGITS) loc_digit = string(loc_id(location...), pad=ID_DIGITS) last_rank = arch.local_index[2] == ranks(arch)[2] @@ -51,7 +51,7 @@ function northeast_recv_tag(arch, ::DistributedTripolarGridOfSomeKind, location) return parse(Int, field_id * loc_digit * side_digit) end -function northeast_send_tag(arch, ::DistributedTripolarGridOfSomeKind, location) +function northeast_send_tag(arch, ::MPITripolarGridOfSomeKind, location) field_id = string(arch.mpi_tag[], pad=ID_DIGITS) loc_digit = string(loc_id(location...), pad=ID_DIGITS) last_rank = arch.local_index[2] == ranks(arch)[2] diff --git a/test/distributed_tests_utils.jl b/test/distributed_tests_utils.jl index 0fa31f49d1..f653b8f2ab 100644 --- a/test/distributed_tests_utils.jl +++ b/test/distributed_tests_utils.jl @@ -68,25 +68,22 @@ end # Run the distributed grid simulation and save down reconstructed results function run_distributed_tripolar_grid(arch, filename) - distributed_grid = TripolarGrid(arch; size = (40, 40, 1), z = (-1000, 0), halo = (5, 5, 5)) - distributed_grid = analytical_immersed_tripolar_grid(distributed_grid) - model = run_distributed_simulation(distributed_grid) + grid = TripolarGrid(arch; size = (40, 40, 1), z = (-1000, 0), halo = (5, 5, 5)) + grid = analytical_immersed_tripolar_grid(grid) + model = run_distributed_simulation(grid) η = reconstruct_global_field(model.free_surface.η) u = reconstruct_global_field(model.velocities.u) v = reconstruct_global_field(model.velocities.v) c = reconstruct_global_field(model.tracers.c) - if arch.local_rank == 0 + if MPI.Comm_rank(MPI.COMM_WORLD) == 0 jldsave(filename; u = Array(interior(u, :, :, 1)), v = Array(interior(v, :, :, 1)), c = Array(interior(c, :, :, 1)), η = Array(interior(η, :, :, 1))) end - MPI.Barrier(MPI.COMM_WORLD) - MPI.Finalize() - return nothing end @@ -101,24 +98,24 @@ function run_distributed_latitude_longitude_grid(arch, filename) latitude = (-90, 90), topology = (Periodic, Bounded, Flat)) - @test isnothing(flat_distributed_grid.z) - - distributed_grid = LatitudeLongitudeGrid(arch; - size = (40, 40, 10), - longitude = (0, 360), - latitude = (-10, 10), - z = (-1000, 0), - halo = (5, 5, 5)) + # @test isnothing(flat_distributed_grid.z) - distributed_grid = ImmersedBoundaryGrid(distributed_grid, GridFittedBottom(bottom_height)) - model = run_distributed_simulation(distributed_grid) + grid = LatitudeLongitudeGrid(arch; + size=(40, 40, 10), + longitude=(0, 360), + latitude=(-10, 10), + z=(-1000, 0), + halo=(5, 5, 5)) + grid = ImmersedBoundaryGrid(grid, GridFittedBottom(bottom_height)) + model = run_distributed_simulation(grid) + η = reconstruct_global_field(model.free_surface.η) u = reconstruct_global_field(model.velocities.u) v = reconstruct_global_field(model.velocities.v) c = reconstruct_global_field(model.tracers.c) - if arch.local_rank == 0 + if MPI.Comm_rank(MPI.COMM_WORLD) == 0 jldsave(filename; u = Array(interior(u, :, :, 10)), v = Array(interior(v, :, :, 10)), c = Array(interior(c, :, :, 10)), @@ -144,7 +141,7 @@ function run_distributed_simulation(grid) ηᵢ(λ, φ, z) = exp(- (φ - 90)^2 / 10^2) + exp(- φ^2 / 10^2) set!(model, c=ηᵢ, η=ηᵢ) - Δt = 5minutes + Δt = 10 # 5minutes arch = architecture(grid) if arch isa ReactantState || arch isa Distributed{<:ReactantState} @info "Compiling first_time_step..." @@ -159,7 +156,7 @@ function run_distributed_simulation(grid) @info "Running first time step..." r_first_time_step!(model, Δt) - @info "Running time steps..." + @info "Running time step..." for N in 2:100 r_time_step!(model, Δt) end diff --git a/test/run_sharding_tests.jl b/test/run_sharding_tests.jl new file mode 100644 index 0000000000..ce240d6523 --- /dev/null +++ b/test/run_sharding_tests.jl @@ -0,0 +1,32 @@ +# We need to initiate MPI for sharding because we are using a multi-host implementation: +# i.e. we are launching the tests with `mpiexec` and on Github actions the default MPI +# implementation is MPICH which requires calling MPI.Init(). In the case of OpenMPI, +# MPI.Init() is not necessary. +using MPI +MPI.Init() +include("distributed_tests_utils.jl") + +ENV["XLA_FLAGS"] = "--xla_force_host_platform_device_count=4" +ENV["JULIA_DEBUG"] = "Reactant, Reactant_jll" + +if Base.ARGS[1] == "tripolar" + run_function = run_distributed_tripolar_grid + suffix = "trg" +else + run_function = run_distributed_latitude_longitude_grid + suffix = "llg" +end + +Reactant.Distributed.initialize(; single_gpu_per_process=false) + +arch = Distributed(ReactantState(), partition = Partition(4, 1)) +filename = "distributed_xslab_$(suffix).jld2" +run_function(arch, filename) + +arch = Distributed(ReactantState(), partition = Partition(1, 4)) +filename = "distributed_yslab_$(suffix).jld2" +run_function(arch, filename) + +arch = Distributed(ReactantState(), partition = Partition(2, 2)) +filename = "distributed_pencil_$(suffix).jld2" +run_function(arch, filename) diff --git a/test/runtests.jl b/test/runtests.jl index 5c5ee49c99..ceb6deb3f0 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -260,9 +260,9 @@ CUDA.allowscalar() do if group == :sharding || group == :all @testset "Sharding Reactant extension tests" begin - # Broken for the moment (trying to fix them in https://github.com/CliMA/Oceananigans.jl/pull/4293) - # include("test_sharded_lat_lon.jl") - # include("test_sharded_tripolar.jl") + include("test_sharded_lat_lon.jl") + # Tests are not correct at the moment, TODO: fix them + # include("test_sharded_tripolar.jl") end end diff --git a/test/test_sharded_lat_lon.jl b/test/test_sharded_lat_lon.jl index 5e4047b567..fe4813bb54 100644 --- a/test/test_sharded_lat_lon.jl +++ b/test/test_sharded_lat_lon.jl @@ -1,48 +1,20 @@ -using JLD2 -using Oceananigans -using Oceananigans.DistributedComputations: reconstruct_global_field, reconstruct_global_grid -using Oceananigans.Units -using Reactant -using Random -using Test - +include("dependencies_for_runtests.jl") include("distributed_tests_utils.jl") -run_xslab_distributed_grid = """ - using MPI - MPI.Init() - include("distributed_tests_utils.jl") - Reactant.Distributed.initialize(; single_gpu_per_process=false) - arch = Distributed(ReactantState(), partition = Partition(4, 1)) - run_distributed_latitude_longitude_grid(arch, "distributed_xslab_llg.jld2") -""" - -run_yslab_distributed_grid = """ - using MPI - MPI.Init() - include("distributed_tests_utils.jl") - Reactant.Distributed.initialize(; single_gpu_per_process=false) - arch = Distributed(ReactantState(), partition = Partition(1, 4)) - run_distributed_latitude_longitude_grid(arch, "distributed_yslab_llg.jld2") -""" +Nhosts = 1 -run_pencil_distributed_grid = """ - using MPI - MPI.Init() - include("distributed_tests_utils.jl") - Reactant.Distributed.initialize(; single_gpu_per_process=false) - @test_throws ArgumentError Distributed(ReactantState(), partition = Partition(3, 2)) - @test_throws ArgumentError Distributed(ReactantState(), partition = Partition(1, 2)) - arch = Distributed(ReactantState(), partition = Partition(2, 2)) - run_distributed_latitude_longitude_grid(arch, "distributed_pencil_llg.jld2") -""" - -@testset "Test distributed LatitudeLongitudeGrid simulations..." begin - # Run the serial computation +@testset "Test sharded LatitudeLongitudeGrid simulations..." begin + # Run the serial computation Random.seed!(1234) - bottom_height = - rand(40, 40, 1) .* 500 .- 500 + bottom_height = - 500 .* rand(40, 40, 1) .- 500 + + grid = LatitudeLongitudeGrid(size=(40, 40, 10), + longitude=(0, 360), + latitude=(-10, 10), + z=(-1000, 0), + halo=(5, 5, 5)) + - grid = LatitudeLongitudeGrid(size=(40, 40, 10), longitude=(0, 360), latitude=(-10, 10), z=(-1000, 0), halo=(5, 5, 5)) grid = ImmersedBoundaryGrid(grid, GridFittedBottom(bottom_height)) model = run_distributed_simulation(grid) @@ -56,61 +28,43 @@ run_pencil_distributed_grid = """ cs = interior(cs, :, :, 10) ηs = interior(ηs, :, :, 1) - # Run the distributed grid simulation with a pencil configuration - write("distributed_xslab_llg_tests.jl", run_xslab_distributed_grid) - run(`$(mpiexec()) -n 4 $(Base.julia_cmd()) --project -O0 distributed_xslab_llg_tests.jl`) - rm("distributed_xslab_llg_tests.jl") - - # Retrieve Parallel quantities - up = jldopen("distributed_xslab_llg.jld2")["u"] - vp = jldopen("distributed_xslab_llg.jld2")["v"] - ηp = jldopen("distributed_xslab_llg.jld2")["η"] - cp = jldopen("distributed_xslab_llg.jld2")["c"] - - # rm("distributed_xslab_llg.jld2") - - @test all(us .≈ up) - @test all(vs .≈ vp) - @test all(cs .≈ cp) - @test all(ηs .≈ ηp) - - # Run the distributed grid simulation with a slab configuration - write("distributed_yslab_llg_tests.jl", run_yslab_distributed_grid) - run(`$(mpiexec()) -n 4 $(Base.julia_cmd()) --project -O0 distributed_yslab_llg_tests.jl`) - rm("distributed_yslab_llg_tests.jl") + # Run the distributed grid simulations in all the configurations + run(`$(mpiexec()) -n $(Nhosts) $(Base.julia_cmd()) --project -O0 run_sharding_tests.jl "latlon"`) # Retrieve Parallel quantities - up = jldopen("distributed_yslab_llg.jld2")["u"] - vp = jldopen("distributed_yslab_llg.jld2")["v"] - cp = jldopen("distributed_yslab_llg.jld2")["c"] - ηp = jldopen("distributed_yslab_llg.jld2")["η"] - - # rm("distributed_yslab_llg.jld2") - - # Test slab partitioning - @test all(us .≈ up) - @test all(vs .≈ vp) - @test all(cs .≈ cp) - @test all(ηs .≈ ηp) - - # We try now with more ranks in the x-direction. This is not a trivial - # test as we are now splitting, not only where the singularities are, but - # also in the middle of the north fold. This is a more challenging test - write("distributed_pencil_llg_tests.jl", run_pencil_distributed_grid) - run(`$(mpiexec()) -n 4 julia --project -O0 distributed_pencil_llg_tests.jl`) - rm("distributed_pencil_llg_tests.jl") - - # Retrieve Parallel quantities - up = jldopen("distributed_pencil_llg.jld2")["u"] - vp = jldopen("distributed_pencil_llg.jld2")["v"] - ηp = jldopen("distributed_pencil_llg.jld2")["η"] - cp = jldopen("distributed_pencil_llg.jld2")["c"] - - # rm("distributed_pencil_llg.jld2") - - @test all(us .≈ up) - @test all(vs .≈ vp) - @test all(cs .≈ cp) - @test all(ηs .≈ ηp) -end - + up1 = jldopen("distributed_xslab_llg.jld2")["u"] + vp1 = jldopen("distributed_xslab_llg.jld2")["v"] + cp1 = jldopen("distributed_xslab_llg.jld2")["c"] + ηp1 = jldopen("distributed_xslab_llg.jld2")["η"] + + up2 = jldopen("distributed_yslab_llg.jld2")["u"] + vp2 = jldopen("distributed_yslab_llg.jld2")["v"] + cp2 = jldopen("distributed_yslab_llg.jld2")["c"] + ηp2 = jldopen("distributed_yslab_llg.jld2")["η"] + + up3 = jldopen("distributed_pencil_llg.jld2")["u"] + vp3 = jldopen("distributed_pencil_llg.jld2")["v"] + cp3 = jldopen("distributed_pencil_llg.jld2")["c"] + ηp3 = jldopen("distributed_pencil_llg.jld2")["η"] + + # What does correctness mean in this case? Probably sqrt(ϵ)? + ϵ = sqrt(eps(Float64)) + + @info "Testing xslab partitioning..." + @test all(isapprox.(us, up1; atol=ϵ)) + @test all(isapprox.(vs, vp1; atol=ϵ)) + @test all(isapprox.(cs, cp1; atol=ϵ)) + @test all(isapprox.(ηs, ηp1; atol=ϵ)) + + @info "Testing yslab partitioning..." + @test all(isapprox.(us, up2; atol=ϵ)) + @test all(isapprox.(vs, vp2; atol=ϵ)) + @test all(isapprox.(cs, cp2; atol=ϵ)) + @test all(isapprox.(ηs, ηp2; atol=ϵ)) + + @info "Testing pencil partitioning..." + @test all(isapprox.(us, up2; atol=ϵ)) + @test all(isapprox.(vs, vp2; atol=ϵ)) + @test all(isapprox.(cs, cp2; atol=ϵ)) + @test all(isapprox.(ηs, ηp2; atol=ϵ)) +end \ No newline at end of file diff --git a/test/test_sharded_tripolar.jl b/test/test_sharded_tripolar.jl index fdc9cd7723..a2b4fac3e7 100644 --- a/test/test_sharded_tripolar.jl +++ b/test/test_sharded_tripolar.jl @@ -1,28 +1,8 @@ include("dependencies_for_runtests.jl") include("distributed_tests_utils.jl") -# We need to initiate MPI for sharding because we are using a multi-host implementation: -# i.e. we are launching the tests with `mpiexec` and on Github actions the default MPI -# implementation is MPICH which requires calling MPI.Init(). In the case of OpenMPI, -# MPI.Init() is not necessary. +Nhosts = 1 -run_slab_distributed_grid = """ - using MPI - MPI.Init() - include("distributed_tests_utils.jl") - Reactant.Distributed.initialize(; single_gpu_per_process=false) - arch = Distributed(ReactantState(), partition = Partition(1, 4)) #, synchronized_communication=true) - run_distributed_tripolar_grid(arch, "distributed_yslab_tripolar.jld2") -""" - -run_pencil_distributed_grid = """ - using MPI - MPI.Init() - include("distributed_tests_utils.jl") - Reactant.Distributed.initialize(; single_gpu_per_process=false) - arch = Distributed(ReactantState(), partition = Partition(2, 2)) - run_distributed_tripolar_grid(arch, "distributed_pencil_tripolar.jld2") -""" @testset "Test distributed TripolarGrid simulations..." begin # Run the serial computation @@ -37,41 +17,46 @@ run_pencil_distributed_grid = """ us = interior(us, :, :, 1) vs = interior(vs, :, :, 1) + ηs = interior(ηs, :, :, 1) cs = interior(cs, :, :, 1) - # Run the distributed grid simulation with a slab configuration - write("distributed_slab_tests.jl", run_slab_distributed_grid) - run(`$(mpiexec()) -n 4 $(Base.julia_cmd()) --project -O0 distributed_slab_tests.jl`) - rm("distributed_slab_tests.jl") - - # Retrieve Parallel quantities - up = jldopen("distributed_yslab_tripolar.jld2")["u"] - vp = jldopen("distributed_yslab_tripolar.jld2")["v"] - cp = jldopen("distributed_yslab_tripolar.jld2")["c"] - ηp = jldopen("distributed_yslab_tripolar.jld2")["η"] - - rm("distributed_yslab_tripolar.jld2") - # Test slab partitioning - @test all(us .≈ up) - @test all(vs .≈ vp) - @test all(cs .≈ cp) - @test all(ηs .≈ ηp) - - # Run the distributed grid simulation with a pencil configuration - write("distributed_tests.jl", run_pencil_distributed_grid) - run(`$(mpiexec()) -n 4 $(Base.julia_cmd()) --project -O0 distributed_tests.jl`) - rm("distributed_tests.jl") + # Run the distributed grid simulations in all the configurations + run(`$(mpiexec()) -n $(Nhosts) $(Base.julia_cmd()) --project -O0 run_sharding_tests.jl "tripolar"`) # Retrieve Parallel quantities - up = jldopen("distributed_pencil_tripolar.jld2")["u"] - vp = jldopen("distributed_pencil_tripolar.jld2")["v"] - ηp = jldopen("distributed_pencil_tripolar.jld2")["η"] - cp = jldopen("distributed_pencil_tripolar.jld2")["c"] - - rm("distributed_pencil_tripolar.jld2") - - @test all(us .≈ up) - @test all(vs .≈ vp) - @test all(cs .≈ cp) - @test all(ηs .≈ ηp) + up1 = jldopen("distributed_xslab_trg.jld2")["u"] + vp1 = jldopen("distributed_xslab_trg.jld2")["v"] + cp1 = jldopen("distributed_xslab_trg.jld2")["c"] + ηp1 = jldopen("distributed_xslab_trg.jld2")["η"] + + vp2 = jldopen("distributed_yslab_trg.jld2")["v"] + up2 = jldopen("distributed_yslab_trg.jld2")["u"] + cp2 = jldopen("distributed_yslab_trg.jld2")["c"] + ηp2 = jldopen("distributed_yslab_trg.jld2")["η"] + + vp3 = jldopen("distributed_pencil_trg.jld2")["v"] + up3 = jldopen("distributed_pencil_trg.jld2")["u"] + cp3 = jldopen("distributed_pencil_trg.jld2")["c"] + ηp3 = jldopen("distributed_pencil_trg.jld2")["η"] + + # What does correctness mean in this case? Probably sqrt(ϵ)? + ϵ = sqrt(eps(Float64)) + + @info "Testing xslab partitioning..." + @test all(isapprox.(us, up1; atol=ϵ)) + @test all(isapprox.(vs, vp1; atol=ϵ)) + @test all(isapprox.(cs, cp1; atol=ϵ)) + @test all(isapprox.(ηs, ηp1; atol=ϵ)) + + @info "Testing yslab partitioning..." + @test all(isapprox.(us, up2; atol=ϵ)) + @test all(isapprox.(vs, vp2; atol=ϵ)) + @test all(isapprox.(cs, cp2; atol=ϵ)) + @test all(isapprox.(ηs, ηp2; atol=ϵ)) + + @info "Testing pencil partitioning..." + @test all(isapprox.(us, up2; atol=ϵ)) + @test all(isapprox.(vs, vp2; atol=ϵ)) + @test all(isapprox.(cs, cp2; atol=ϵ)) + @test all(isapprox.(ηs, ηp2; atol=ϵ)) end \ No newline at end of file