CliMA · simone-silvestri · Apr 23, 2025 · Apr 9, 2025 · Apr 9, 2025 · Apr 9, 2025
diff --git a/.buildkite/pipeline-benchmarks.yml b/.buildkite/pipeline-benchmarks.yml
@@ -0,0 +1,97 @@
+env:
+  JULIA_VERSION: "1.10.9"
+  JULIA_MINOR_VERSION: "1.10"
+  TARTARUS_HOME: "/storage5/buildkite-agent"
+  JULIA_DEPOT_PATH: "$TARTARUS_HOME/.julia-$BUILDKITE_BUILD_NUMBER"
+  JULIA_PKG_SERVER_REGISTRY_PREFERENCE: eager
+  JULIA_NUM_PRECOMPILE_TASKS: 24
+  JULIA_NUM_THREADS: 8
+  NSYS: "/storage6/simone/new_nsight/bin/nsys"
+  CUDA_VISIBLE_DEVICES: "1" # Tartarus device for GPU Benchmarking
+  TMPDIR: "$TARTARUS_HOME/tmp"
+
+agents:
+  queue: "Oceananigans"
+
+steps:
+  - label: "🏕️ initialize tartarus environment"
+    key: "init"
+    env:
+      JULIA_DEPOT_PATH: "$TARTARUS_HOME/.julia-$BUILDKITE_BUILD_NUMBER"
+      TEST_GROUP: "init"
+      JULIA_BINDIR: "$TARTARUS_HOME/julia-$JULIA_VERSION/bin"
+    command: |
+      # Download julia binaries
+      wget -N -P $TARTARUS_HOME https://julialang-s3.julialang.org/bin/linux/x64/$JULIA_MINOR_VERSION/julia-$JULIA_VERSION-linux-x86_64.tar.gz
+      tar xf $TARTARUS_HOME/julia-$JULIA_VERSION-linux-x86_64.tar.gz -C $TARTARUS_HOME
+    agents:
+      queue: "Oceananigans"
+    retry:
+      automatic:
+        - exit_status: 1
+          limit: 1
+
+  - wait
+
+  - label: "🚀 Oceananigans GPU benchmarks"
+    key: "benchmarks"
+    agents:
+      queue: "Oceananigans"
+    command: |
+      # Instantiate
+      $TARTARUS_HOME/julia-$JULIA_VERSION/bin/julia --color=yes --project --check-bounds=no -e 'using Pkg; Pkg.instantiate()'
+
+      # Run Periodic benchmarks
+      export BENCHMARK_GROUP="periodic"
+      $NSYS profile --output=periodic_output --trace=cuda $TARTARUS_HOME/julia-$JULIA_VERSION/bin/julia --color=yes --project --check-bounds=no test/benchmark_tests.jl
+      $NSYS stats periodic_output.nsys-rep > periodic_output.txt
+
+      # Remove generated output files
+      rm periodic_output.nsys-rep
+      rm periodic_output.sqlite
+
+      # Run Bounded benchmarks
+      export BENCHMARK_GROUP="bounded"
+      $NSYS profile --output=bounded_output --trace=cuda $TARTARUS_HOME/julia-$JULIA_VERSION/bin/julia --color=yes --project --check-bounds=no test/benchmark_tests.jl
+      $NSYS stats bounded_output.nsys-rep > bounded_output.txt
+
+      # Remove generated output files
+      rm bounded_output.nsys-rep
+      rm bounded_output.sqlite
+
+      # Run Periodic cheap advection benchmarks
+      export BENCHMARK_GROUP="periodic_cheap_advection"
+      $NSYS profile --output=periodic_cheap_advection_output --trace=cuda $TARTARUS_HOME/julia-$JULIA_VERSION/bin/julia --color=yes --project --check-bounds=no test/benchmark_tests.jl
+      $NSYS stats periodic_cheap_advection_output.nsys-rep > periodic_cheap_advection_output.txt
+
+      # Remove generated output files
+      rm periodic_cheap_advection_output.nsys-rep
+      rm periodic_cheap_advection_output.sqlite
+
+      # Run Bounded cheap advection benchmarks
+      export BENCHMARK_GROUP="bounded_cheap_advection"
+      $NSYS profile --output=bounded_cheap_advection_output --trace=cuda $TARTARUS_HOME/julia-$JULIA_VERSION/bin/julia --color=yes --project --check-bounds=no test/benchmark_tests.jl
+      $NSYS stats bounded_cheap_advection_output.nsys-rep > bounded_cheap_advection_output.txt
+
+      # Remove generated output files
+      rm bounded_cheap_advection_output.nsys-rep
+      rm bounded_cheap_advection_output.sqlite
+
+      # Run Immersed benchmarks
+      export BENCHMARK_GROUP="immersed"
+      $NSYS profile --output=immersed_output --trace=cuda $TARTARUS_HOME/julia-$JULIA_VERSION/bin/julia --color=yes --project --check-bounds=no test/benchmark_tests.jl
+      $NSYS stats immersed_output.nsys-rep > immersed_output.txt
+
+      # Remove generated output files
+      rm immersed_output.nsys-rep
+      rm immersed_output.sqlite
+
+    artifact_paths:
+      - "periodic_output.txt"
+      - "bounded_output.txt"
+      - "periodic_cheap_advection_output.txt"
+      - "bounded_cheap_advection_output.txt"
+      - "immersed_output.txt"
+    soft_fail:
+      - exit_status: 3
+
diff --git a/test/benchmark_tests.jl b/test/benchmark_tests.jl
@@ -0,0 +1,95 @@
+using Oceananigans
+using Oceananigans.Units
+using Oceananigans.Architectures: on_architecture
+using Oceananigans.TurbulenceClosures.TKEBasedVerticalDiffusivities: CATKEVerticalDiffusivity
+using SeawaterPolynomials.TEOS10
+using Random
+
+function ocean_benchmark(arch, Nx, Ny, Nz, topology, immersed, tracer_advection=WENO(order=7))    
+
+    z_faces = collect(range(-6000, 0, length=Nz+1))
+
+    grid = RectilinearGrid(arch; size=(Nx, Ny, Nz), 
+                                 halo=(7, 7, 7), 
+                                    z=z_faces, 
+                                    x=(-1000kilometers, 1000kilometers), 
+                                    y=(-1000kilometers, 1000kilometers), 
+                                    topology)
+
+    grid = if immersed
+        Random.seed!(1234)
+        bottom = Oceananigans.Architectures.on_architecture(arch, - 5000 .* rand(Nx, Ny) .- 1000)
+        ImmersedBoundaryGrid(grid, GridFittedBottom(bottom); active_cells_map=true)
+    else
+        grid
+    end
+
+    @info "Grid is built"
+    momentum_advection = WENOVectorInvariant()
+    buoyancy = SeawaterBuoyancy(equation_of_state=TEOS10EquationOfState())
+    free_surface = SplitExplicitFreeSurface(grid; substeps=70)
+    closure = CATKEVerticalDiffusivity()
+
+    model = HydrostaticFreeSurfaceModel(; grid,
+                                          momentum_advection,
+                                          tracer_advection,
+                                          buoyancy,
+                                          closure,
+                                          free_surface,
+                                          tracers = (:T, :S, :e))
+
+    @info "Model is built"
+
+    R = rand(size(model.grid))
+
+    # initialize variables with randomish values
+    Tᵢ = 0.0001 .* R .+ 20
+    Sᵢ = 0.0001 .* R .+ 35
+    uᵢ = 0.0001 .* R
+    vᵢ = 0.0001 .* R
+
+    set!(model, T=Tᵢ, S=Sᵢ, e=1e-6, u=uᵢ, v=vᵢ)
+
+    return model
+end
+
+function run_benchmark(model)
+    for _ in 1:15
+        time_step!(model, 1.0)
+    end
+end
+
+group = get(ENV, "BENCHMARK_GROUP", "all") |> Symbol
+
+const Nx = 500
+const Ny = 200
+const Nz = 60
+
+arch = GPU()
+
+cheap_advection = FluxFormAdvection(WENO(order=7), WENO(order=7), Centered())
+
+if group == :periodic
+    model = ocean_benchmark(arch, Nx, Ny, Nz, (Periodic, Periodic, Bounded), false)
+    run_benchmark(model)
+end    
+
+if group == :bounded
+    model = ocean_benchmark(arch, Nx, Ny, Nz, (Bounded, Bounded, Bounded), false)
+    run_benchmark(model)
+end    
+
+if group == :periodic_cheap_advection
+    model = ocean_benchmark(arch, Nx, Ny, Nz, (Periodic, Periodic, Bounded), false, cheap_advection)
+    run_benchmark(model)
+end    
+
+if group == :bounded_cheap_advection
+    model = ocean_benchmark(arch, Nx, Ny, Nz, (Bounded, Bounded, Bounded), false, cheap_advection)
+    run_benchmark(model)
+end    
+
+if group == :immersed
+    model = ocean_benchmark(arch, Nx, Ny, Nz, (Periodic, Periodic, Bounded), true)
+    run_benchmark(model)
+end