diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 716e8b2524..60359cf0ed 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -27,7 +27,7 @@ concurrency:
 
 jobs:
   sharding_tests:
-    name: Sharding Tests - Julia ${{ matrix.version }} - ${{ matrix.os }} - ${{ matrix.arch }}
+    name: Sharding Tests - Julia ${{ matrix.version }} - ${{ matrix.os }}
     runs-on: ${{ matrix.os }}
     timeout-minutes: 120
     strategy:
@@ -35,29 +35,28 @@ jobs:
       matrix:
         version:
           - '1.10'
-        os:
+        os: 
+          - ubuntu-22.04-arm
           - ubuntu-latest
-        arch:
-          - x64
+          - macos-latest
     steps:
+      - uses: actions/checkout@v4
+      - uses: julia-actions/setup-julia@v2
+        with:
+          version: ${{ matrix.version }}
+      - uses: julia-actions/cache@v2
       - run: |
           touch LocalPreferences.toml
 
           echo "[Reactant]" >> LocalPreferences.toml
           echo "xla_runtime = \"IFRT\"" >> LocalPreferences.toml
 
-          cat LocalPreferences.toml     
-      - uses: actions/checkout@v4
-      - uses: julia-actions/setup-julia@v2
-        with:
-          version: ${{ matrix.version }}
-          arch: ${{ matrix.arch }}
-      - uses: julia-actions/cache@v2
+          cat LocalPreferences.toml    
       - uses: julia-actions/julia-buildpkg@v1
       - uses: julia-actions/julia-runtest@v1
         env:
+          XLA_FLAGS: "--xla_force_host_platform_device_count=4"
           JULIA_DEBUG: "Reactant, Reactant_jll"
-          REACTANT_TEST: true
           TEST_GROUP: "sharding"
 
   mpi_tripolar:
@@ -110,31 +109,6 @@ jobs:
         env:
           TEST_GROUP: "turbulence_closures"
 
-  reactant:
-    name: Reactant - Julia ${{ matrix.version }} - ${{ matrix.os }} - ${{ matrix.arch }}
-    runs-on: ${{ matrix.os }}
-    timeout-minutes: 120
-    strategy:
-      fail-fast: false
-      matrix:
-        version:
-          - '1.10'
-        os:
-          - ubuntu-latest
-        arch:
-          - x64
-    steps:
-      - uses: actions/checkout@v4
-      - uses: julia-actions/setup-julia@v2
-        with:
-          version: ${{ matrix.version }}
-          arch: ${{ matrix.arch }}
-      - uses: julia-actions/cache@v2
-      - uses: julia-actions/julia-buildpkg@v1
-      - uses: julia-actions/julia-runtest@v1
-        env:
-          TEST_GROUP: "reactant"
-
   metal:
     name: Metal - Julia ${{ matrix.version }} - ${{ matrix.os }} - ${{ matrix.arch }}
     runs-on: ${{ matrix.os }}
diff --git a/ext/OceananigansReactantExt/Fields.jl b/ext/OceananigansReactantExt/Fields.jl
index d3076fecc3..1d9f364641 100644
--- a/ext/OceananigansReactantExt/Fields.jl
+++ b/ext/OceananigansReactantExt/Fields.jl
@@ -9,6 +9,7 @@ using Oceananigans.Fields: Field, interior
 using KernelAbstractions: @index, @kernel
 
 import Oceananigans.Fields: set_to_field!, set_to_function!, set!
+import Oceananigans.DistributedComputations: reconstruct_global_field, synchronize_communication!
 
 import ..OceananigansReactantExt: deconcretize
 import ..Grids: ReactantGrid
@@ -17,6 +18,8 @@ import ..Grids: ShardedGrid
 const ReactantField{LX, LY, LZ, O} = Field{LX, LY, LZ, O, <:ReactantGrid}
 const ShardedDistributedField{LX, LY, LZ, O} = Field{LX, LY, LZ, O, <:ShardedGrid}
 
+reconstruct_global_field(field::ShardedDistributedField) = field
+
 deconcretize(field::Field{LX, LY, LZ}) where {LX, LY, LZ} =
     Field{LX, LY, LZ}(field.grid,
                       deconcretize(field.data),
@@ -57,6 +60,9 @@ end
 # keepin it simple
 set_to_field!(u::ReactantField, v::ReactantField) = @jit _set_to_field!(u, v)
 
+# No need to synchronize -> it should be implicit
+synchronize_communication!(::ShardedDistributedField) = nothing
+
 function set_to_function!(u::ShardedDistributedField, f)
     grid = u.grid
     arch = grid.architecture
diff --git a/ext/OceananigansReactantExt/Grids/sharded_grids.jl b/ext/OceananigansReactantExt/Grids/sharded_grids.jl
index 7b4559815b..a5d184d558 100644
--- a/ext/OceananigansReactantExt/Grids/sharded_grids.jl
+++ b/ext/OceananigansReactantExt/Grids/sharded_grids.jl
@@ -152,7 +152,7 @@ function Oceananigans.LatitudeLongitudeGrid(arch::ShardedDistributed,
                                              z, # Intentionally not sharded
                                              Δxᶜᶜᵃ, Δxᶠᶜᵃ, Δxᶜᶠᵃ, Δxᶠᶠᵃ,
                                              Δyᶠᶜᵃ, Δyᶜᶠᵃ,
-                                             Azᶜᶜᵃ, Azᶠᶜᵃ, Azᶜᶠᵃ, Azᶠᶠᵃ,
+                                             Azᶜᶜᵃ, Azᶠᶜᵃ, Azᶜᶠᵃ, Azᶠᶠᵃ, 
                                              grid.radius)
 end
 
diff --git a/ext/OceananigansReactantExt/Models.jl b/ext/OceananigansReactantExt/Models.jl
index 64a5fff6a2..37d9b17ff5 100644
--- a/ext/OceananigansReactantExt/Models.jl
+++ b/ext/OceananigansReactantExt/Models.jl
@@ -2,20 +2,24 @@ module Models
 
 import Oceananigans
 
-import Oceananigans.Models: initialization_update_state!
-import Oceananigans.Models.HydrostaticFreeSurfaceModels.SplitExplicitFreeSurfaces: maybe_extend_halos, FixedSubstepNumber
-import Oceananigans: initialize!
-
 using Oceananigans.Architectures: ReactantState
 using Oceananigans.DistributedComputations: Distributed
 using Oceananigans.Models.HydrostaticFreeSurfaceModels: initialize_free_surface!, HydrostaticFreeSurfaceModel
 
 using ..TimeSteppers: ReactantModel
 using ..Grids: ReactantGrid, ReactantImmersedBoundaryGrid
+using ..Grids: ShardedGrid, ShardedDistributed
+
+import Oceananigans.Models.HydrostaticFreeSurfaceModels.SplitExplicitFreeSurfaces: maybe_extend_halos, FixedSubstepNumber
+import Oceananigans: initialize!
+import Oceananigans.Models: 
+        initialization_update_state!, 
+        complete_communication_and_compute_buffer!, 
+        interior_tendency_kernel_parameters
 
 const ReactantHFSM{TS, E} = Union{
     HydrostaticFreeSurfaceModel{TS, E, <:ReactantState},
-    HydrostaticFreeSurfaceModel{TS, E, <:Distributed{<:ReactantState}},
+    HydrostaticFreeSurfaceModel{TS, E, <:ShardedDistributed},
 }
 
 initialize_immersed_boundary_grid!(grid) = nothing
@@ -46,4 +50,8 @@ function initialize!(model::ReactantHFSM)
     return nothing
 end
 
+# Undo all the pipelining for a `ShardedDistributed` architecture
+complete_communication_and_compute_buffer!(model, ::ShardedGrid, ::ShardedDistributed) = nothing
+interior_tendency_kernel_parameters(::ShardedDistributed, grid) = :xyz
+
 end # module
diff --git a/src/OrthogonalSphericalShellGrids/distributed_tripolar_grid.jl b/src/OrthogonalSphericalShellGrids/distributed_tripolar_grid.jl
index 5fde541587..4149c12ea2 100644
--- a/src/OrthogonalSphericalShellGrids/distributed_tripolar_grid.jl
+++ b/src/OrthogonalSphericalShellGrids/distributed_tripolar_grid.jl
@@ -19,10 +19,8 @@ import Oceananigans.Fields: Field, validate_indices, validate_boundary_condition
 const DistributedTripolarGrid{FT, TX, TY, TZ, CZ, CC, FC, CF, FF, Arch} =
     OrthogonalSphericalShellGrid{FT, TX, TY, TZ, CZ, <:Tripolar, CC, FC, CF, FF, <:Distributed{<:Union{CPU, GPU}}}
 
-const DistributedTripolarGridOfSomeKind = Union{
-    DistributedTripolarGrid,
-    ImmersedBoundaryGrid{<:Any, <:Any, <:Any, <:Any, <:DistributedTripolarGrid}
-}
+const MPITripolarGrid{FT, TX, TY, TZ, CZ, CC, FC, CF, FF, Arch} = OrthogonalSphericalShellGrid{FT, TX, TY, TZ, CZ, <:Tripolar, CC, FC, CF, FF, <:Distributed{<:Union{CPU, GPU}}}
+const MPITripolarGridOfSomeKind = Union{MPITripolarGrid, ImmersedBoundaryGrid{<:Any, <:Any, <:Any, <:Any, <:MPITripolarGrid}}
 
 """
     TripolarGrid(arch::Distributed, FT::DataType = Float64; halo = (4, 4, 4), kwargs...)
@@ -234,7 +232,7 @@ end
 # a distributed `TripolarGrid` needs a `ZipperBoundaryCondition` for the north boundary
 # only on the last rank
 function regularize_field_boundary_conditions(bcs::FieldBoundaryConditions,
-                                              grid::DistributedTripolarGridOfSomeKind,
+                                              grid::MPITripolarGridOfSomeKind,
                                               field_name::Symbol,
                                               prognostic_names=nothing)
 
@@ -274,7 +272,7 @@ end
 
 # Extension of the constructor for a `Field` on a `TRG` grid. We assumes that the north boundary is a zipper
 # with a sign that depends on the location of the field (revert the value of the halos if on edges, keep it if on nodes or centers)
-function Field(loc::Tuple{<:LX, <:LY, <:LZ}, grid::DistributedTripolarGridOfSomeKind, data, old_bcs, indices::Tuple, op, status) where {LX, LY, LZ}
+function Field(loc::Tuple{<:LX, <:LY, <:LZ}, grid::MPITripolarGridOfSomeKind, data, old_bcs, indices::Tuple, op, status) where {LX, LY, LZ}
     arch = architecture(grid)
     yrank = arch.local_index[2] - 1
 
@@ -327,7 +325,7 @@ function Field(loc::Tuple{<:LX, <:LY, <:LZ}, grid::DistributedTripolarGridOfSome
 end
 
 # Reconstruction the global tripolar grid for visualization purposes
-function reconstruct_global_grid(grid::DistributedTripolarGrid)
+function reconstruct_global_grid(grid::MPITripolarGrid)
 
     arch = grid.architecture
 
@@ -354,8 +352,7 @@ function reconstruct_global_grid(grid::DistributedTripolarGrid)
                         z)
 end
 
-function with_halo(new_halo, old_grid::DistributedTripolarGrid)
-
+function with_halo(new_halo, old_grid::MPITripolarGrid) 
     arch = old_grid.architecture
 
     n  = size(old_grid)
diff --git a/src/OrthogonalSphericalShellGrids/distributed_zipper.jl b/src/OrthogonalSphericalShellGrids/distributed_zipper.jl
index 2f5dc1c9cc..27e9757d12 100644
--- a/src/OrthogonalSphericalShellGrids/distributed_zipper.jl
+++ b/src/OrthogonalSphericalShellGrids/distributed_zipper.jl
@@ -4,6 +4,7 @@ using Oceananigans.BoundaryConditions: fill_open_boundary_regions!,
                                        DistributedCommunication
 
 using Oceananigans.DistributedComputations: cooperative_waitall!,
+                                            AsynchronousDistributed,
                                             recv_from_buffers!,
                                             fill_corners!,
                                             loc_id,
@@ -59,7 +60,7 @@ end
     return nothing
 end
 
-function fill_halo_regions!(c::OffsetArray, bcs, indices, loc, grid::DistributedTripolarGridOfSomeKind, buffers, args...;
+function fill_halo_regions!(c::OffsetArray, bcs, indices, loc, grid::MPITripolarGridOfSomeKind, buffers, args...; 
                             only_local_halos=false, fill_boundary_normal_velocities=true, kwargs...)
 
     if fill_boundary_normal_velocities
@@ -91,26 +92,28 @@ function fill_halo_regions!(c::OffsetArray, bcs, indices, loc, grid::Distributed
     return nothing
 end
 
-function synchronize_communication!(field::Field{<:Any, <:Any, <:Any, <:Any, <:DistributedTripolarGridOfSomeKind})
+function synchronize_communication!(field::Field{<:Any, <:Any, <:Any, <:Any, <:MPITripolarGridOfSomeKind})
     arch = architecture(field.grid)
 
-    # Wait for outstanding requests
-    if !isempty(arch.mpi_requests)
-        cooperative_waitall!(arch.mpi_requests)
+    if arch isa AsynchronousDistributed # Otherwise no need to synchonize
+        # Wait for outstanding requests
+        if !isempty(arch.mpi_requests) 
+            cooperative_waitall!(arch.mpi_requests)
+      
+            # Reset MPI tag
+            arch.mpi_tag[] = 0
 
-        # Reset MPI tag
-        arch.mpi_tag[] = 0
+            # Reset MPI requests
+            empty!(arch.mpi_requests)
+        end
 
-        # Reset MPI requests
-        empty!(arch.mpi_requests)
-    end
-
-    recv_from_buffers!(field.data, field.communication_buffers, field.grid)
+        recv_from_buffers!(field.data, field.communication_buffers, field.grid)
 
-    north_bc = field.boundary_conditions.north
-    instantiated_location = map(instantiate, location(field))
+        north_bc = field.boundary_conditions.north
+        instantiated_location = map(instantiate, location(field))
 
-    switch_north_halos!(field, north_bc, field.grid, instantiated_location)
+        switch_north_halos!(field, north_bc, field.grid, instantiated_location)
+    end
 
     return nothing
 end
\ No newline at end of file
diff --git a/src/OrthogonalSphericalShellGrids/distributed_zipper_north_tags.jl b/src/OrthogonalSphericalShellGrids/distributed_zipper_north_tags.jl
index 51f8d66596..99447c3d2d 100644
--- a/src/OrthogonalSphericalShellGrids/distributed_zipper_north_tags.jl
+++ b/src/OrthogonalSphericalShellGrids/distributed_zipper_north_tags.jl
@@ -11,7 +11,7 @@ sides  = (:west, :east, :south, :north, :southwest, :southeast, :northwest, :nor
 side_id = Dict(side => n-1 for (n, side) in enumerate(sides))
 
 # Change these and we are golden!
-function north_recv_tag(arch, ::DistributedTripolarGridOfSomeKind, location)
+function north_recv_tag(arch, ::MPITripolarGridOfSomeKind, location)
     field_id   = string(arch.mpi_tag[], pad=ID_DIGITS)
     loc_digit  = string(loc_id(location...), pad=ID_DIGITS)
     last_rank  = arch.local_index[2] == ranks(arch)[2]
@@ -19,7 +19,7 @@ function north_recv_tag(arch, ::DistributedTripolarGridOfSomeKind, location)
     return parse(Int, field_id * loc_digit * side_digit)
 end
 
-function north_send_tag(arch, ::DistributedTripolarGridOfSomeKind, location)
+function north_send_tag(arch, ::MPITripolarGridOfSomeKind, location)
     field_id   = string(arch.mpi_tag[], pad=ID_DIGITS)
     loc_digit  = string(loc_id(location...), pad=ID_DIGITS)
     last_rank  = arch.local_index[2] == ranks(arch)[2]
@@ -27,7 +27,7 @@ function north_send_tag(arch, ::DistributedTripolarGridOfSomeKind, location)
     return parse(Int, field_id * loc_digit * side_digit)
 end
 
-function northwest_recv_tag(arch, ::DistributedTripolarGridOfSomeKind, location)
+function northwest_recv_tag(arch, ::MPITripolarGridOfSomeKind, location)
     field_id   = string(arch.mpi_tag[], pad=ID_DIGITS)
     loc_digit  = string(loc_id(location...), pad=ID_DIGITS)
     last_rank  = arch.local_index[2] == ranks(arch)[2]
@@ -35,7 +35,7 @@ function northwest_recv_tag(arch, ::DistributedTripolarGridOfSomeKind, location)
     return parse(Int, field_id * loc_digit * side_digit)
 end
 
-function northwest_send_tag(arch, ::DistributedTripolarGridOfSomeKind, location)
+function northwest_send_tag(arch, ::MPITripolarGridOfSomeKind, location)
     field_id   = string(arch.mpi_tag[], pad=ID_DIGITS)
     loc_digit  = string(loc_id(location...), pad=ID_DIGITS)
     last_rank  = arch.local_index[2] == ranks(arch)[2]
@@ -43,7 +43,7 @@ function northwest_send_tag(arch, ::DistributedTripolarGridOfSomeKind, location)
     return parse(Int, field_id * loc_digit * side_digit)
 end
 
-function northeast_recv_tag(arch, ::DistributedTripolarGridOfSomeKind, location)
+function northeast_recv_tag(arch, ::MPITripolarGridOfSomeKind, location)
     field_id   = string(arch.mpi_tag[], pad=ID_DIGITS)
     loc_digit  = string(loc_id(location...), pad=ID_DIGITS)
     last_rank  = arch.local_index[2] == ranks(arch)[2]
@@ -51,7 +51,7 @@ function northeast_recv_tag(arch, ::DistributedTripolarGridOfSomeKind, location)
     return parse(Int, field_id * loc_digit * side_digit)
 end
 
-function northeast_send_tag(arch, ::DistributedTripolarGridOfSomeKind, location)
+function northeast_send_tag(arch, ::MPITripolarGridOfSomeKind, location)
     field_id   = string(arch.mpi_tag[], pad=ID_DIGITS)
     loc_digit  = string(loc_id(location...), pad=ID_DIGITS)
     last_rank  = arch.local_index[2] == ranks(arch)[2]
diff --git a/test/distributed_tests_utils.jl b/test/distributed_tests_utils.jl
index 0fa31f49d1..f653b8f2ab 100644
--- a/test/distributed_tests_utils.jl
+++ b/test/distributed_tests_utils.jl
@@ -68,25 +68,22 @@ end
 
 # Run the distributed grid simulation and save down reconstructed results
 function run_distributed_tripolar_grid(arch, filename)
-    distributed_grid = TripolarGrid(arch; size = (40, 40, 1), z = (-1000, 0), halo = (5, 5, 5))
-    distributed_grid = analytical_immersed_tripolar_grid(distributed_grid)
-    model            = run_distributed_simulation(distributed_grid)
+    grid  = TripolarGrid(arch; size = (40, 40, 1), z = (-1000, 0), halo = (5, 5, 5))
+    grid  = analytical_immersed_tripolar_grid(grid)
+    model = run_distributed_simulation(grid)
 
     η = reconstruct_global_field(model.free_surface.η)
     u = reconstruct_global_field(model.velocities.u)
     v = reconstruct_global_field(model.velocities.v)
     c = reconstruct_global_field(model.tracers.c)
 
-    if arch.local_rank == 0
+    if MPI.Comm_rank(MPI.COMM_WORLD) == 0
         jldsave(filename; u = Array(interior(u, :, :, 1)),
                           v = Array(interior(v, :, :, 1)),
                           c = Array(interior(c, :, :, 1)),
                           η = Array(interior(η, :, :, 1)))
     end
 
-    MPI.Barrier(MPI.COMM_WORLD)
-    MPI.Finalize()
-
     return nothing
 end
 
@@ -101,24 +98,24 @@ function run_distributed_latitude_longitude_grid(arch, filename)
         latitude = (-90, 90),
         topology = (Periodic, Bounded, Flat))
 
-    @test isnothing(flat_distributed_grid.z)
-
-    distributed_grid = LatitudeLongitudeGrid(arch;
-                                             size = (40, 40, 10),
-                                             longitude = (0, 360),
-                                             latitude = (-10, 10),
-                                             z = (-1000, 0),
-                                             halo = (5, 5, 5))
+    # @test isnothing(flat_distributed_grid.z)
 
-    distributed_grid = ImmersedBoundaryGrid(distributed_grid, GridFittedBottom(bottom_height))
-    model = run_distributed_simulation(distributed_grid)
+    grid = LatitudeLongitudeGrid(arch; 
+                                 size=(40, 40, 10), 
+                                 longitude=(0, 360), 
+                                 latitude=(-10, 10), 
+                                 z=(-1000, 0), 
+                                 halo=(5, 5, 5))   
 
+    grid  = ImmersedBoundaryGrid(grid, GridFittedBottom(bottom_height))      
+    model = run_distributed_simulation(grid)
+    
     η = reconstruct_global_field(model.free_surface.η)
     u = reconstruct_global_field(model.velocities.u)
     v = reconstruct_global_field(model.velocities.v)
     c = reconstruct_global_field(model.tracers.c)
 
-    if arch.local_rank == 0
+    if MPI.Comm_rank(MPI.COMM_WORLD) == 0
         jldsave(filename; u = Array(interior(u, :, :, 10)),
                           v = Array(interior(v, :, :, 10)),
                           c = Array(interior(c, :, :, 10)),
@@ -144,7 +141,7 @@ function run_distributed_simulation(grid)
     ηᵢ(λ, φ, z) = exp(- (φ - 90)^2 / 10^2) + exp(- φ^2 / 10^2)
     set!(model, c=ηᵢ, η=ηᵢ)
 
-    Δt = 5minutes
+    Δt = 10 # 5minutes
     arch = architecture(grid)
     if arch isa ReactantState || arch isa Distributed{<:ReactantState}
         @info "Compiling first_time_step..."
@@ -159,7 +156,7 @@ function run_distributed_simulation(grid)
 
     @info "Running first time step..."
     r_first_time_step!(model, Δt)
-    @info "Running time steps..."
+    @info "Running time step..."
     for N in 2:100
         r_time_step!(model, Δt)
     end
diff --git a/test/run_sharding_tests.jl b/test/run_sharding_tests.jl
new file mode 100644
index 0000000000..ce240d6523
--- /dev/null
+++ b/test/run_sharding_tests.jl
@@ -0,0 +1,32 @@
+# We need to initiate MPI for sharding because we are using a multi-host implementation: 
+# i.e. we are launching the tests with `mpiexec` and on Github actions the default MPI 
+# implementation is MPICH which requires calling MPI.Init(). In the case of OpenMPI,
+# MPI.Init() is not necessary.
+using MPI 
+MPI.Init()
+include("distributed_tests_utils.jl")
+
+ENV["XLA_FLAGS"] = "--xla_force_host_platform_device_count=4"
+ENV["JULIA_DEBUG"] = "Reactant, Reactant_jll"
+
+if Base.ARGS[1] == "tripolar"
+    run_function = run_distributed_tripolar_grid
+    suffix = "trg"
+else
+    run_function = run_distributed_latitude_longitude_grid
+    suffix = "llg"
+end
+
+Reactant.Distributed.initialize(; single_gpu_per_process=false)
+
+arch = Distributed(ReactantState(), partition = Partition(4, 1))
+filename = "distributed_xslab_$(suffix).jld2"
+run_function(arch, filename)
+
+arch = Distributed(ReactantState(), partition = Partition(1, 4))
+filename = "distributed_yslab_$(suffix).jld2"
+run_function(arch, filename)
+
+arch = Distributed(ReactantState(), partition = Partition(2, 2))
+filename = "distributed_pencil_$(suffix).jld2"
+run_function(arch, filename)
diff --git a/test/runtests.jl b/test/runtests.jl
index 5c5ee49c99..ceb6deb3f0 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -260,9 +260,9 @@ CUDA.allowscalar() do
 
     if group == :sharding || group == :all
         @testset "Sharding Reactant extension tests" begin
-            # Broken for the moment (trying to fix them in https://github.com/CliMA/Oceananigans.jl/pull/4293)
-            # include("test_sharded_lat_lon.jl")
-            # include("test_sharded_tripolar.jl")
+            include("test_sharded_lat_lon.jl")
+            # Tests are not correct at the moment, TODO: fix them
+            # include("test_sharded_tripolar.jl") 
         end
     end
 
diff --git a/test/test_sharded_lat_lon.jl b/test/test_sharded_lat_lon.jl
index 5e4047b567..fe4813bb54 100644
--- a/test/test_sharded_lat_lon.jl
+++ b/test/test_sharded_lat_lon.jl
@@ -1,48 +1,20 @@
-using JLD2
-using Oceananigans
-using Oceananigans.DistributedComputations: reconstruct_global_field, reconstruct_global_grid
-using Oceananigans.Units
-using Reactant
-using Random
-using Test
-
+include("dependencies_for_runtests.jl")
 include("distributed_tests_utils.jl")
 
-run_xslab_distributed_grid = """
-    using MPI
-    MPI.Init()
-    include("distributed_tests_utils.jl")
-    Reactant.Distributed.initialize(; single_gpu_per_process=false)
-    arch = Distributed(ReactantState(), partition = Partition(4, 1))
-    run_distributed_latitude_longitude_grid(arch, "distributed_xslab_llg.jld2")
-"""
-
-run_yslab_distributed_grid = """
-    using MPI
-    MPI.Init()
-    include("distributed_tests_utils.jl")
-    Reactant.Distributed.initialize(; single_gpu_per_process=false)
-    arch = Distributed(ReactantState(), partition = Partition(1, 4))
-    run_distributed_latitude_longitude_grid(arch, "distributed_yslab_llg.jld2")
-"""
+Nhosts = 1
 
-run_pencil_distributed_grid = """
-    using MPI
-    MPI.Init()
-    include("distributed_tests_utils.jl")
-    Reactant.Distributed.initialize(; single_gpu_per_process=false)
-    @test_throws ArgumentError Distributed(ReactantState(), partition = Partition(3, 2))
-    @test_throws ArgumentError Distributed(ReactantState(), partition = Partition(1, 2))
-    arch = Distributed(ReactantState(), partition = Partition(2, 2))
-    run_distributed_latitude_longitude_grid(arch, "distributed_pencil_llg.jld2")
-"""
-
-@testset "Test distributed LatitudeLongitudeGrid simulations..." begin
-    # Run the serial computation
+@testset "Test sharded LatitudeLongitudeGrid simulations..." begin
+    # Run the serial computation  
     Random.seed!(1234)
-    bottom_height = - rand(40, 40, 1) .* 500 .- 500
+    bottom_height = - 500 .* rand(40, 40, 1) .- 500
+    
+    grid  = LatitudeLongitudeGrid(size=(40, 40, 10), 
+                                  longitude=(0, 360), 
+                                  latitude=(-10, 10), 
+                                  z=(-1000, 0), 
+                                  halo=(5, 5, 5))    
+                              
 
-    grid  = LatitudeLongitudeGrid(size=(40, 40, 10), longitude=(0, 360), latitude=(-10, 10), z=(-1000, 0), halo=(5, 5, 5))
     grid  = ImmersedBoundaryGrid(grid, GridFittedBottom(bottom_height))
     model = run_distributed_simulation(grid)
 
@@ -56,61 +28,43 @@ run_pencil_distributed_grid = """
     cs = interior(cs, :, :, 10)
     ηs = interior(ηs, :, :, 1)
 
-    # Run the distributed grid simulation with a pencil configuration
-    write("distributed_xslab_llg_tests.jl", run_xslab_distributed_grid)
-    run(`$(mpiexec()) -n 4 $(Base.julia_cmd()) --project -O0 distributed_xslab_llg_tests.jl`)
-    rm("distributed_xslab_llg_tests.jl")
-
-    # Retrieve Parallel quantities
-    up = jldopen("distributed_xslab_llg.jld2")["u"]
-    vp = jldopen("distributed_xslab_llg.jld2")["v"]
-    ηp = jldopen("distributed_xslab_llg.jld2")["η"]
-    cp = jldopen("distributed_xslab_llg.jld2")["c"]
-
-    # rm("distributed_xslab_llg.jld2")
-
-    @test all(us .≈ up)
-    @test all(vs .≈ vp)
-    @test all(cs .≈ cp)
-    @test all(ηs .≈ ηp)
-
-    # Run the distributed grid simulation with a slab configuration
-    write("distributed_yslab_llg_tests.jl", run_yslab_distributed_grid)
-    run(`$(mpiexec()) -n 4 $(Base.julia_cmd()) --project -O0 distributed_yslab_llg_tests.jl`)
-    rm("distributed_yslab_llg_tests.jl")
+    # Run the distributed grid simulations in all the configurations
+    run(`$(mpiexec()) -n $(Nhosts) $(Base.julia_cmd()) --project -O0 run_sharding_tests.jl "latlon"`)
 
     # Retrieve Parallel quantities
-    up = jldopen("distributed_yslab_llg.jld2")["u"]
-    vp = jldopen("distributed_yslab_llg.jld2")["v"]
-    cp = jldopen("distributed_yslab_llg.jld2")["c"]
-    ηp = jldopen("distributed_yslab_llg.jld2")["η"]
-
-    # rm("distributed_yslab_llg.jld2")
-
-    # Test slab partitioning
-    @test all(us .≈ up)
-    @test all(vs .≈ vp)
-    @test all(cs .≈ cp)
-    @test all(ηs .≈ ηp)
-
-    # We try now with more ranks in the x-direction. This is not a trivial
-    # test as we are now splitting, not only where the singularities are, but
-    # also in the middle of the north fold. This is a more challenging test
-    write("distributed_pencil_llg_tests.jl", run_pencil_distributed_grid)
-    run(`$(mpiexec()) -n 4 julia --project -O0 distributed_pencil_llg_tests.jl`)
-    rm("distributed_pencil_llg_tests.jl")
-
-    # Retrieve Parallel quantities
-    up = jldopen("distributed_pencil_llg.jld2")["u"]
-    vp = jldopen("distributed_pencil_llg.jld2")["v"]
-    ηp = jldopen("distributed_pencil_llg.jld2")["η"]
-    cp = jldopen("distributed_pencil_llg.jld2")["c"]
-
-    # rm("distributed_pencil_llg.jld2")
-
-    @test all(us .≈ up)
-    @test all(vs .≈ vp)
-    @test all(cs .≈ cp)
-    @test all(ηs .≈ ηp)
-end
-
+    up1 = jldopen("distributed_xslab_llg.jld2")["u"]
+    vp1 = jldopen("distributed_xslab_llg.jld2")["v"]
+    cp1 = jldopen("distributed_xslab_llg.jld2")["c"]
+    ηp1 = jldopen("distributed_xslab_llg.jld2")["η"]
+
+    up2 = jldopen("distributed_yslab_llg.jld2")["u"]
+    vp2 = jldopen("distributed_yslab_llg.jld2")["v"]
+    cp2 = jldopen("distributed_yslab_llg.jld2")["c"]
+    ηp2 = jldopen("distributed_yslab_llg.jld2")["η"]
+
+    up3 = jldopen("distributed_pencil_llg.jld2")["u"]
+    vp3 = jldopen("distributed_pencil_llg.jld2")["v"]
+    cp3 = jldopen("distributed_pencil_llg.jld2")["c"]
+    ηp3 = jldopen("distributed_pencil_llg.jld2")["η"]
+
+    # What does correctness mean in this case? Probably sqrt(ϵ)?
+    ϵ = sqrt(eps(Float64))
+
+    @info "Testing xslab partitioning..."
+    @test all(isapprox.(us, up1; atol=ϵ))
+    @test all(isapprox.(vs, vp1; atol=ϵ))
+    @test all(isapprox.(cs, cp1; atol=ϵ))
+    @test all(isapprox.(ηs, ηp1; atol=ϵ))
+
+    @info "Testing yslab partitioning..."
+    @test all(isapprox.(us, up2; atol=ϵ))
+    @test all(isapprox.(vs, vp2; atol=ϵ))
+    @test all(isapprox.(cs, cp2; atol=ϵ))
+    @test all(isapprox.(ηs, ηp2; atol=ϵ))
+
+    @info "Testing pencil partitioning..."
+    @test all(isapprox.(us, up2; atol=ϵ))
+    @test all(isapprox.(vs, vp2; atol=ϵ))
+    @test all(isapprox.(cs, cp2; atol=ϵ))
+    @test all(isapprox.(ηs, ηp2; atol=ϵ))
+end
\ No newline at end of file
diff --git a/test/test_sharded_tripolar.jl b/test/test_sharded_tripolar.jl
index fdc9cd7723..a2b4fac3e7 100644
--- a/test/test_sharded_tripolar.jl
+++ b/test/test_sharded_tripolar.jl
@@ -1,28 +1,8 @@
 include("dependencies_for_runtests.jl")
 include("distributed_tests_utils.jl")
 
-# We need to initiate MPI for sharding because we are using a multi-host implementation:
-# i.e. we are launching the tests with `mpiexec` and on Github actions the default MPI
-# implementation is MPICH which requires calling MPI.Init(). In the case of OpenMPI,
-# MPI.Init() is not necessary.
+Nhosts = 1
 
-run_slab_distributed_grid = """
-    using MPI
-    MPI.Init()
-    include("distributed_tests_utils.jl")
-    Reactant.Distributed.initialize(; single_gpu_per_process=false)
-    arch = Distributed(ReactantState(), partition = Partition(1, 4)) #, synchronized_communication=true)
-    run_distributed_tripolar_grid(arch, "distributed_yslab_tripolar.jld2")
-"""
-
-run_pencil_distributed_grid = """
-    using MPI
-    MPI.Init()
-    include("distributed_tests_utils.jl")
-    Reactant.Distributed.initialize(; single_gpu_per_process=false)
-    arch = Distributed(ReactantState(), partition = Partition(2, 2))
-    run_distributed_tripolar_grid(arch, "distributed_pencil_tripolar.jld2")
-"""
 
 @testset "Test distributed TripolarGrid simulations..." begin
     # Run the serial computation
@@ -37,41 +17,46 @@ run_pencil_distributed_grid = """
 
     us = interior(us, :, :, 1)
     vs = interior(vs, :, :, 1)
+    ηs = interior(ηs, :, :, 1)
     cs = interior(cs, :, :, 1)
-    # Run the distributed grid simulation with a slab configuration
-    write("distributed_slab_tests.jl", run_slab_distributed_grid)
-    run(`$(mpiexec()) -n 4 $(Base.julia_cmd()) --project -O0 distributed_slab_tests.jl`)
-    rm("distributed_slab_tests.jl")
-
-    # Retrieve Parallel quantities
-    up = jldopen("distributed_yslab_tripolar.jld2")["u"]
-    vp = jldopen("distributed_yslab_tripolar.jld2")["v"]
-    cp = jldopen("distributed_yslab_tripolar.jld2")["c"]
-    ηp = jldopen("distributed_yslab_tripolar.jld2")["η"]
-
-    rm("distributed_yslab_tripolar.jld2")
 
-    # Test slab partitioning
-    @test all(us .≈ up)
-    @test all(vs .≈ vp)
-    @test all(cs .≈ cp)
-    @test all(ηs .≈ ηp)
-
-    # Run the distributed grid simulation with a pencil configuration
-    write("distributed_tests.jl", run_pencil_distributed_grid)
-    run(`$(mpiexec()) -n 4 $(Base.julia_cmd()) --project -O0 distributed_tests.jl`)
-    rm("distributed_tests.jl")
+    # Run the distributed grid simulations in all the configurations
+    run(`$(mpiexec()) -n $(Nhosts) $(Base.julia_cmd()) --project -O0 run_sharding_tests.jl "tripolar"`)
 
     # Retrieve Parallel quantities
-    up = jldopen("distributed_pencil_tripolar.jld2")["u"]
-    vp = jldopen("distributed_pencil_tripolar.jld2")["v"]
-    ηp = jldopen("distributed_pencil_tripolar.jld2")["η"]
-    cp = jldopen("distributed_pencil_tripolar.jld2")["c"]
-
-    rm("distributed_pencil_tripolar.jld2")
-
-    @test all(us .≈ up)
-    @test all(vs .≈ vp)
-    @test all(cs .≈ cp)
-    @test all(ηs .≈ ηp)
+    up1 = jldopen("distributed_xslab_trg.jld2")["u"]
+    vp1 = jldopen("distributed_xslab_trg.jld2")["v"]
+    cp1 = jldopen("distributed_xslab_trg.jld2")["c"]
+    ηp1 = jldopen("distributed_xslab_trg.jld2")["η"]
+
+    vp2 = jldopen("distributed_yslab_trg.jld2")["v"]
+    up2 = jldopen("distributed_yslab_trg.jld2")["u"]
+    cp2 = jldopen("distributed_yslab_trg.jld2")["c"]
+    ηp2 = jldopen("distributed_yslab_trg.jld2")["η"]
+
+    vp3 = jldopen("distributed_pencil_trg.jld2")["v"]
+    up3 = jldopen("distributed_pencil_trg.jld2")["u"]
+    cp3 = jldopen("distributed_pencil_trg.jld2")["c"]
+    ηp3 = jldopen("distributed_pencil_trg.jld2")["η"]
+
+    # What does correctness mean in this case? Probably sqrt(ϵ)?
+    ϵ = sqrt(eps(Float64))
+
+    @info "Testing xslab partitioning..."
+    @test all(isapprox.(us, up1; atol=ϵ))
+    @test all(isapprox.(vs, vp1; atol=ϵ))
+    @test all(isapprox.(cs, cp1; atol=ϵ))
+    @test all(isapprox.(ηs, ηp1; atol=ϵ))
+
+    @info "Testing yslab partitioning..."
+    @test all(isapprox.(us, up2; atol=ϵ))
+    @test all(isapprox.(vs, vp2; atol=ϵ))
+    @test all(isapprox.(cs, cp2; atol=ϵ))
+    @test all(isapprox.(ηs, ηp2; atol=ϵ))
+
+    @info "Testing pencil partitioning..."
+    @test all(isapprox.(us, up2; atol=ϵ))
+    @test all(isapprox.(vs, vp2; atol=ϵ))
+    @test all(isapprox.(cs, cp2; atol=ϵ))
+    @test all(isapprox.(ηs, ηp2; atol=ϵ))
 end
\ No newline at end of file