Skip to content

Commit

Permalink
Add MPI and GPU tests on buildkite
Browse files Browse the repository at this point in the history
  • Loading branch information
Sbozzolo committed May 12, 2024
1 parent e3fe65b commit 232c1f9
Show file tree
Hide file tree
Showing 6 changed files with 117 additions and 33 deletions.
19 changes: 19 additions & 0 deletions .buildkite/Project.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
[deps]
Accessors = "7d9f7c33-5ae7-4f3b-8dc6-eff91059b697"
Aqua = "4c88cf16-eb10-579e-8560-4a9242c79595"
BenchmarkTools = "6e4b80f9-dd63-53aa-95a3-0cdb28fa8baf"
CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba"
ClimaComms = "3a4d1b5c-c61d-41fd-a00a-5873ba7a1b0d"
ClimaCore = "d414da3d-4745-48bb-8d80-42e94e092884"
ClimaDiagnostics = "1ecacbb8-0713-4841-9a07-eb5aa8a2d53f"
ClimaTimeSteppers = "595c0a79-7f3d-439a-bc5a-b232dc3bde79"
Dates = "ade2ca70-3891-5945-98fb-dc099432e06a"
Documenter = "e30172f5-a6a5-5a46-863b-614d45cd2de4"
JuliaFormatter = "98e50ef6-434e-11e9-1051-2b60c6c9e899"
MPI = "da04e1cc-30fd-572f-bb4f-1f8673147195"
NCDatasets = "85f8d34a-cbdd-5861-8df4-14fed0d494ab"
Profile = "9abbd945-dff8-562f-b5e8-e1ebf5ef1b79"
ProfileCanvas = "efd6af41-a80b-495e-886c-e51b0c7d77a3"
SafeTestsets = "1bc83da4-3b8d-516f-aca4-4fe02f6d838f"
SciMLBase = "0bca4576-84f4-4d90-8ffe-ffa030f20462"
Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
49 changes: 49 additions & 0 deletions .buildkite/pipeline.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
agents:
queue: new-central
slurm_mem: 8G
modules: climacommon/2024_04_30

env:
JULIA_LOAD_PATH: "${JULIA_LOAD_PATH}:${BUILDKITE_BUILD_CHECKOUT_PATH}/.buildkite"
JULIA_DEPOT_PATH: "${BUILDKITE_BUILD_PATH}/${BUILDKITE_PIPELINE_SLUG}/depot/default"
SLURM_KILL_BAD_EXIT: 1

steps:
- label: "init :computer:"
key: "init_cpu_env"
command:
- "echo $$JULIA_DEPOT_PATH"

- echo "--- Instantiate project"
- "julia --project=.buildkite -e 'using Pkg; Pkg.develop(; path = \".\")'"
- "julia --project=.buildkite -e 'using Pkg; Pkg.instantiate(;verbose=true)'"
- "julia --project=.buildkite -e 'using Pkg; Pkg.precompile()'"
- "julia --project=.buildkite -e 'using Pkg; Pkg.status()'"
agents:
slurm_cpus_per_task: 8
slurm_gpus: 1

- wait

- label: "Run tests on CPU"
key: "cpu_tests"
command:
- "julia --color=yes --project=.buildkite test/runtests.jl"

- label: "Run tests on GPU"
key: "gpu_tests"
command:
- "julia --color=yes --project=.buildkite test/runtests.jl"
env:
CLIMACOMMS_DEVICE: "CUDA"
agents:
slurm_gpus: 1

- label: "Run tests with MPI"
key: "mpi_tests"
command:
- "srun julia --color=yes --project=.buildkite test/integration_test.jl"
env:
CLIMACOMMS_CONTEXT: "MPI"
agents:
slurm_ntasks: 2
2 changes: 1 addition & 1 deletion .github/workflows/CI.yml
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ jobs:
version:
# - '1.9'
- '1.10'
- '~1.11.0-0'
# - '~1.11.0-0'
timeout-minutes: 30
steps:
- name: Checkout
Expand Down
2 changes: 1 addition & 1 deletion test/TestTools.jl
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ function SphericalShellSpace(;
nelements = 10,
zelem = 10,
npolynomial = 4,
context = ClimaComms.SingletonCommsContext(),
context = ClimaComms.context(),
FT = Float64,
)
vertdomain = ClimaCore.Domains.IntervalDomain(
Expand Down
73 changes: 46 additions & 27 deletions test/integration_test.jl
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,14 @@ import NCDatasets

import ClimaDiagnostics

import ClimaComms
@static if pkgversion(ClimaComms) >= v"0.6"
ClimaComms.@import_required_backends
end

const context = ClimaComms.context()
ClimaComms.init(context)

include("TestTools.jl")

"""
Expand All @@ -15,11 +23,11 @@ Set up a full test problem
Increasing `more_compute_diagnostics` adds more copies of a compute diagnostic with no output.
Useful to stress allocations.
"""
function setup_integrator(output_dir; more_compute_diagnostics = 0)
function setup_integrator(output_dir; context, more_compute_diagnostics = 0)
t0 = 0.0
tf = 10.0
dt = 1.0
space = SphericalShellSpace()
space = SphericalShellSpace(; context)
args, kwargs = create_problem(space; t0, tf, dt)

@info "Writing output to $output_dir"
Expand Down Expand Up @@ -95,44 +103,55 @@ end

@testset "A full problem" begin
mktempdir() do output_dir
integrator = setup_integrator(output_dir)
output_dir = ClimaComms.bcast(context, output_dir)

SciMLBase.solve!(integrator)
integrator = setup_integrator(output_dir; context)

NCDatasets.NCDataset(joinpath(output_dir, "YO_1it_inst.nc")) do nc
@test nc["YO"].attrib["short_name"] == "YO"
@test nc["YO"].attrib["long_name"] == "YO YO, Instantaneous"
@test size(nc["YO"]) == (11, 10, 5, 3)
end

NCDatasets.NCDataset(joinpath(output_dir, "YO_2it_average.nc")) do nc
@test nc["YO"].attrib["short_name"] == "YO"
@test nc["YO"].attrib["long_name"] ==
"YO YO, average within every 2 iterations"
@test size(nc["YO"]) == (5, 10, 5, 3)
end
SciMLBase.solve!(integrator)

NCDatasets.NCDataset(joinpath(output_dir, "YO_3s_inst.nc")) do nc
@test nc["YO"].attrib["short_name"] == "YO"
@test nc["YO"].attrib["long_name"] == "YO YO, Instantaneous"
@test size(nc["YO"]) == (4, 10, 5, 3)
if ClimaComms.iamroot(context)
NCDatasets.NCDataset(joinpath(output_dir, "YO_1it_inst.nc")) do nc
@test nc["YO"].attrib["short_name"] == "YO"
@test nc["YO"].attrib["long_name"] == "YO YO, Instantaneous"
@test size(nc["YO"]) == (11, 10, 5, 3)
end

NCDatasets.NCDataset(
joinpath(output_dir, "YO_2it_average.nc"),
) do nc
@test nc["YO"].attrib["short_name"] == "YO"
@test nc["YO"].attrib["long_name"] ==
"YO YO, average within every 2 iterations"
@test size(nc["YO"]) == (5, 10, 5, 3)
end

NCDatasets.NCDataset(joinpath(output_dir, "YO_3s_inst.nc")) do nc
@test nc["YO"].attrib["short_name"] == "YO"
@test nc["YO"].attrib["long_name"] == "YO YO, Instantaneous"
@test size(nc["YO"]) == (4, 10, 5, 3)
end
end
end
end

@testset "Performance" begin
mktempdir() do output_dir
output_dir = ClimaComms.bcast(context, output_dir)

# Flame
integrator = setup_integrator(output_dir)
integrator = setup_integrator(output_dir; context)
prof = Profile.@profile SciMLBase.solve!(integrator)
results = Profile.fetch()
ProfileCanvas.html_file("flame.html", results)
ClimaComms.iamroot(context) && (results = Profile.fetch())
ClimaComms.iamroot(context) &&
ProfileCanvas.html_file("flame.html", results)

# Allocations
integrator = setup_integrator(output_dir)
integrator = setup_integrator(output_dir; context)
prof = Profile.Allocs.@profile SciMLBase.solve!(integrator)
results = Profile.Allocs.fetch()
allocs = ProfileCanvas.view_allocs(results)
ProfileCanvas.html_file("allocs.html", allocs)
ClimaComms.iamroot(context) && (results = Profile.Allocs.fetch())
ClimaComms.iamroot(context) &&
(allocs = ProfileCanvas.view_allocs(results))
ClimaComms.iamroot(context) &&
ProfileCanvas.html_file("allocs.html", allocs)
end
end
5 changes: 1 addition & 4 deletions test/writers.jl
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,7 @@ include("TestTools.jl")

# The temporary directory where we write the file cannot be in /tmp, it has
# to be on disk
output_dir = "netcdf_writer_performance_test"
Base.mkpath(output_dir)
output_dir = mktempdir(".")

@testset "DictWriter" begin
writer = Writers.DictWriter()
Expand Down Expand Up @@ -191,5 +190,3 @@ end
show(stdout, MIME"text/plain"(), timing_ncdataset)
println()
end

Base.rm(output_dir, force = true, recursive = true)

0 comments on commit 232c1f9

Please sign in to comment.