diff --git a/.buildkite/Project.toml b/.buildkite/Project.toml new file mode 100644 index 00000000..3155fbad --- /dev/null +++ b/.buildkite/Project.toml @@ -0,0 +1,19 @@ +[deps] +Accessors = "7d9f7c33-5ae7-4f3b-8dc6-eff91059b697" +Aqua = "4c88cf16-eb10-579e-8560-4a9242c79595" +BenchmarkTools = "6e4b80f9-dd63-53aa-95a3-0cdb28fa8baf" +CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba" +ClimaComms = "3a4d1b5c-c61d-41fd-a00a-5873ba7a1b0d" +ClimaCore = "d414da3d-4745-48bb-8d80-42e94e092884" +ClimaDiagnostics = "1ecacbb8-0713-4841-9a07-eb5aa8a2d53f" +ClimaTimeSteppers = "595c0a79-7f3d-439a-bc5a-b232dc3bde79" +Dates = "ade2ca70-3891-5945-98fb-dc099432e06a" +Documenter = "e30172f5-a6a5-5a46-863b-614d45cd2de4" +JuliaFormatter = "98e50ef6-434e-11e9-1051-2b60c6c9e899" +MPI = "da04e1cc-30fd-572f-bb4f-1f8673147195" +NCDatasets = "85f8d34a-cbdd-5861-8df4-14fed0d494ab" +Profile = "9abbd945-dff8-562f-b5e8-e1ebf5ef1b79" +ProfileCanvas = "efd6af41-a80b-495e-886c-e51b0c7d77a3" +SafeTestsets = "1bc83da4-3b8d-516f-aca4-4fe02f6d838f" +SciMLBase = "0bca4576-84f4-4d90-8ffe-ffa030f20462" +Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" diff --git a/.buildkite/pipeline.yml b/.buildkite/pipeline.yml new file mode 100644 index 00000000..f66dfad6 --- /dev/null +++ b/.buildkite/pipeline.yml @@ -0,0 +1,49 @@ +agents: + queue: new-central + slurm_mem: 8G + modules: climacommon/2024_04_30 + +env: + JULIA_LOAD_PATH: "${JULIA_LOAD_PATH}:${BUILDKITE_BUILD_CHECKOUT_PATH}/.buildkite" + JULIA_DEPOT_PATH: "${BUILDKITE_BUILD_PATH}/${BUILDKITE_PIPELINE_SLUG}/depot/default" + SLURM_KILL_BAD_EXIT: 1 + +steps: + - label: "init :computer:" + key: "init_cpu_env" + command: + - "echo $$JULIA_DEPOT_PATH" + + - echo "--- Instantiate project" + - "julia --project=.buildkite -e 'using Pkg; Pkg.develop(; path = \".\")'" + - "julia --project=.buildkite -e 'using Pkg; Pkg.instantiate(;verbose=true)'" + - "julia --project=.buildkite -e 'using Pkg; Pkg.precompile()'" + - "julia --project=.buildkite -e 'using Pkg; Pkg.status()'" + agents: + slurm_cpus_per_task: 8 + slurm_gpus: 1 + + - wait + + - label: "Run tests on CPU" + key: "cpu_tests" + command: + - "julia --color=yes --project=.buildkite test/runtests.jl" + + - label: "Run tests on GPU" + key: "gpu_tests" + command: + - "julia --color=yes --project=.buildkite test/runtests.jl" + env: + CLIMACOMMS_DEVICE: "CUDA" + agents: + slurm_gpus: 1 + + - label: "Run tests with MPI" + key: "mpi_tests" + command: + - "srun julia --color=yes --project=.buildkite test/integration_test.jl" + env: + CLIMACOMMS_CONTEXT: "MPI" + agents: + slurm_ntasks: 2 diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml index a90548dd..e3c3b118 100644 --- a/.github/workflows/CI.yml +++ b/.github/workflows/CI.yml @@ -23,7 +23,7 @@ jobs: version: # - '1.9' - '1.10' - - '~1.11.0-0' + # - '~1.11.0-0' timeout-minutes: 30 steps: - name: Checkout diff --git a/test/TestTools.jl b/test/TestTools.jl index c4b06c9e..f49abd79 100644 --- a/test/TestTools.jl +++ b/test/TestTools.jl @@ -31,7 +31,7 @@ function SphericalShellSpace(; nelements = 10, zelem = 10, npolynomial = 4, - context = ClimaComms.SingletonCommsContext(), + context = ClimaComms.context(), FT = Float64, ) vertdomain = ClimaCore.Domains.IntervalDomain( diff --git a/test/integration_test.jl b/test/integration_test.jl index 1a3e19a1..c9373ece 100644 --- a/test/integration_test.jl +++ b/test/integration_test.jl @@ -7,6 +7,14 @@ import NCDatasets import ClimaDiagnostics +import ClimaComms +@static if pkgversion(ClimaComms) >= v"0.6" + ClimaComms.@import_required_backends +end + +const context = ClimaComms.context() +ClimaComms.init(context) + include("TestTools.jl") """ @@ -15,11 +23,11 @@ Set up a full test problem Increasing `more_compute_diagnostics` adds more copies of a compute diagnostic with no output. Useful to stress allocations. """ -function setup_integrator(output_dir; more_compute_diagnostics = 0) +function setup_integrator(output_dir; context, more_compute_diagnostics = 0) t0 = 0.0 tf = 10.0 dt = 1.0 - space = SphericalShellSpace() + space = SphericalShellSpace(; context) args, kwargs = create_problem(space; t0, tf, dt) @info "Writing output to $output_dir" @@ -95,44 +103,55 @@ end @testset "A full problem" begin mktempdir() do output_dir - integrator = setup_integrator(output_dir) + output_dir = ClimaComms.bcast(context, output_dir) - SciMLBase.solve!(integrator) + integrator = setup_integrator(output_dir; context) - NCDatasets.NCDataset(joinpath(output_dir, "YO_1it_inst.nc")) do nc - @test nc["YO"].attrib["short_name"] == "YO" - @test nc["YO"].attrib["long_name"] == "YO YO, Instantaneous" - @test size(nc["YO"]) == (11, 10, 5, 3) - end - - NCDatasets.NCDataset(joinpath(output_dir, "YO_2it_average.nc")) do nc - @test nc["YO"].attrib["short_name"] == "YO" - @test nc["YO"].attrib["long_name"] == - "YO YO, average within every 2 iterations" - @test size(nc["YO"]) == (5, 10, 5, 3) - end + SciMLBase.solve!(integrator) - NCDatasets.NCDataset(joinpath(output_dir, "YO_3s_inst.nc")) do nc - @test nc["YO"].attrib["short_name"] == "YO" - @test nc["YO"].attrib["long_name"] == "YO YO, Instantaneous" - @test size(nc["YO"]) == (4, 10, 5, 3) + if ClimaComms.iamroot(context) + NCDatasets.NCDataset(joinpath(output_dir, "YO_1it_inst.nc")) do nc + @test nc["YO"].attrib["short_name"] == "YO" + @test nc["YO"].attrib["long_name"] == "YO YO, Instantaneous" + @test size(nc["YO"]) == (11, 10, 5, 3) + end + + NCDatasets.NCDataset( + joinpath(output_dir, "YO_2it_average.nc"), + ) do nc + @test nc["YO"].attrib["short_name"] == "YO" + @test nc["YO"].attrib["long_name"] == + "YO YO, average within every 2 iterations" + @test size(nc["YO"]) == (5, 10, 5, 3) + end + + NCDatasets.NCDataset(joinpath(output_dir, "YO_3s_inst.nc")) do nc + @test nc["YO"].attrib["short_name"] == "YO" + @test nc["YO"].attrib["long_name"] == "YO YO, Instantaneous" + @test size(nc["YO"]) == (4, 10, 5, 3) + end end end end @testset "Performance" begin mktempdir() do output_dir + output_dir = ClimaComms.bcast(context, output_dir) + # Flame - integrator = setup_integrator(output_dir) + integrator = setup_integrator(output_dir; context) prof = Profile.@profile SciMLBase.solve!(integrator) - results = Profile.fetch() - ProfileCanvas.html_file("flame.html", results) + ClimaComms.iamroot(context) && (results = Profile.fetch()) + ClimaComms.iamroot(context) && + ProfileCanvas.html_file("flame.html", results) # Allocations - integrator = setup_integrator(output_dir) + integrator = setup_integrator(output_dir; context) prof = Profile.Allocs.@profile SciMLBase.solve!(integrator) - results = Profile.Allocs.fetch() - allocs = ProfileCanvas.view_allocs(results) - ProfileCanvas.html_file("allocs.html", allocs) + ClimaComms.iamroot(context) && (results = Profile.Allocs.fetch()) + ClimaComms.iamroot(context) && + (allocs = ProfileCanvas.view_allocs(results)) + ClimaComms.iamroot(context) && + ProfileCanvas.html_file("allocs.html", allocs) end end diff --git a/test/writers.jl b/test/writers.jl index cfaeb228..a7f1ab83 100644 --- a/test/writers.jl +++ b/test/writers.jl @@ -15,8 +15,7 @@ include("TestTools.jl") # The temporary directory where we write the file cannot be in /tmp, it has # to be on disk -output_dir = "netcdf_writer_performance_test" -Base.mkpath(output_dir) +output_dir = mktempdir(".") @testset "DictWriter" begin writer = Writers.DictWriter() @@ -191,5 +190,3 @@ end show(stdout, MIME"text/plain"(), timing_ncdataset) println() end - -Base.rm(output_dir, force = true, recursive = true)