diff --git a/README.md b/README.md index b15ea7a8..0a1bbd98 100644 --- a/README.md +++ b/README.md @@ -8,10 +8,11 @@ asQ is designed to allow fast prototyping of new ParaDiag methods, while still b This is achieved using the Firedrake and PETSc libraries. The finite element models are defined by specifying the weak form using [Firedrake, "*an automated system for the portable solution of partial differential equations using the finite element method*](https://www.firedrakeproject.org/)", and the linear and nonlinear solvers required are provided by [PETSc, "*the Portable, Extensible Toolkit for Scientific Computation*"](https://petsc.org/release/). +See the arXiv paper https://arxiv.org/abs/2409.18792 for a description and demonstration of asQ. The code used for this paper can be found in the [asq_manuscript_examples](https://github.com/firedrakeproject/asQ/tree/master/asq_manuscript_examples) directory. ## ParaDiag -ParaDiag is a parallel-in-time method, meaning that is solves for multiple timesteps of a timeseries simultaneously, rather than one at a time like traditional serial-in-time methods. +ParaDiag is a parallel-in-time method, meaning that it solves for multiple timesteps of a timeseries simultaneously, rather than one at a time like traditional serial-in-time methods. This [review article](https://arxiv.org/abs/2005.09158) provides a good introduction to the method. asQ implements the ParaDiag-II family of methods based on creating a block-circulant approximation to the all-at-once system which can be block-diagonalised with the FFT and solved efficiently in parallel. @@ -22,10 +23,9 @@ To install asQ, pass the arguments `--install asQ` to the `firedrake-install` sc ## Getting started -The best place to start is the [examples directory](https://github.com/firedrakeproject/asQ/tree/master/examples). -Annotated scripts for the linear advection equation and the heat equation show how to set up a problem with asQ and solve the timeseries using ParaDiag. +The best place to start is the arXiv paper and associated examples in [this directory](https://github.com/firedrakeproject/asQ/tree/master/asq_manuscript_examples). -More advanced scripts can be found in the [case studies directory](https://github.com/firedrakeproject/asQ/tree/master/case_studies), including scripts for the shallow water equations and a model for stratigraphic evolution of the sea floor. +Other examples can be found in the [examples directory](https://github.com/firedrakeproject/asQ/tree/master/examples) and more advanced scripts can be found in the [case studies directory](https://github.com/firedrakeproject/asQ/tree/master/case_studies), including scripts for the shallow water equations and a model for stratigraphic evolution of the sea floor. ## Help and support diff --git a/asq_manuscript_examples/README.md b/asq_manuscript_examples/README.md new file mode 100644 index 00000000..b64ed27a --- /dev/null +++ b/asq_manuscript_examples/README.md @@ -0,0 +1,44 @@ +# Example scripts for "asQ: parallel-in-time finite element simulations using ParaDiag for geoscientific models and beyond" + +These are the python scripts used to generate the data for the asQ library paper https://arxiv.org/abs/2409.18792. + +## Section 3.2 "Heat equation example" script +`heat.py` is the example in Section 3.2 "A heat equation example". +It explicitly uses the `AllAtOnce*` objects to create each component of the all-at-once system. +Because it is hard-coded to four ensemble ranks (i.e. four MPI ranks in time), it must be run with a multiple of 4 MPI ranks, e.g. + +```mpiexec -np 4 heat.py``` + +will run with 4 ranks in time, and serial in space, whereas + +```mpiexec -np 8 heat.py``` + +will run with 4 ranks in time, and 2 ranks in each spatial communicator. +To change the time-parallelism, change the `time_partition` list in the script. + +## Section 4 "Numerical Examples" scripts + +All scripts use `argparse` to process command line argument, so will print out information on how to use them if run with the `-h` flag. They do not have to be run in parallel to do this. + +`python -h` + +All scripts will also accept a `--show_args` argument, which will print out the value of all argparse arguments at the beginning of the script. +The default arguments for the `*_paradiag.py` scripts do not use time-parallelism, so can be run in serial. +To specify the time-parallelism, see the help for the `--nslices` and `--slice_length` command line arguments. + +- The data in Section 4.1 "Advection equation" was generated with + - `advection_serial.py` for the serial-in-time results. + - `advection_paradiag.py` for the parallel-in-time results. +- The data in Section 4.2 "Linear shallow water equations" was generated with + - `linear_shallow_water_serial.py` for the serial-in-time results. + - `linear_shallow_water_paradiag.py` for the parallel-in-time results. +- The data in Section 4.3 "Nonlinear shallow water equations" was generated with + - `nonlinear_shallow_water_serial.py` for the serial-in-time results. + - `nonlinear_shallow_water_paradiag.py` for the parallel-in-time results. +- The data in Section 4.4 "Compressible Euler equations" was generated with + - `vertical_slice_serial.py` for the serial-in-time results. + - `vertical_slice_paradiag.py` for the parallel-in-time results. + + The `*_serial.py` scripts all use the `SerialMiniApp` class to run the serial-in-time method. + The parallel-in-time shallow water equation scripts use the `ShallowWaterMiniApp` to set up the all-at-once system specifically fo the shallow water equations. + The parallel-in-time advection and vertical slice scripts use the `Paradiag` class to construct the all-at-once system without having to manually create each `AllAtOnce*` object. diff --git a/asq_manuscript_examples/advection_paradiag.py b/asq_manuscript_examples/advection_paradiag.py new file mode 100644 index 00000000..095046df --- /dev/null +++ b/asq_manuscript_examples/advection_paradiag.py @@ -0,0 +1,253 @@ +from math import pi, cos, sin +from utils.timing import SolverTimer +import firedrake as fd +from firedrake.petsc import PETSc +import asQ +from argparse import ArgumentParser +from argparse_formatter import DefaultsAndRawTextFormatter + +parser = ArgumentParser( + description='ParaDiag timestepping for scalar advection of a Gaussian bump in a periodic square with DG in space and implicit-theta in time.', + epilog="""\ +Optional PETSc command line arguments: + + -circulant_alpha :float: The circulant parameter to use in the preconditioner. Default 1e-4. + -ksp_rtol :float: The relative residual drop required for convergence. Default 1e-11. + See https://petsc.org/release/manualpages/KSP/KSPSetTolerances/ + -ksp_type :str: The Krylov method to use for the all-at-once iterations. Default 'richardson'. + Alternatives include gmres or fgmres. + See https://petsc.org/release/manualpages/KSP/KSPSetType/ +""", + formatter_class=DefaultsAndRawTextFormatter +) +parser.add_argument('--nx', type=int, default=16, help='Number of cells along each side of the square.') +parser.add_argument('--cfl', type=float, default=0.8, help='Convective CFL number.') +parser.add_argument('--angle', type=float, default=pi/6, help='Angle of the convective velocity to the horizontal.') +parser.add_argument('--degree', type=int, default=1, help='Degree of the scalar space.') +parser.add_argument('--theta', type=float, default=0.5, help='Parameter for the implicit theta timestepping method.') +parser.add_argument('--width', type=float, default=0.2, help='Width of the Gaussian bump.') +parser.add_argument('--nwindows', type=int, default=1, help='Number of time-windows to solve.') +parser.add_argument('--nslices', type=int, default=1, help='Number of time-slices in the all-at-once system. Must divide the number of MPI ranks exactly.') +parser.add_argument('--slice_length', type=int, default=4, help='Number of timesteps per time-slice. Total number of timesteps in the all-at-once system is nslices*slice_length.') +parser.add_argument('--metrics_dir', type=str, default='metrics/advection', help='Directory to save paradiag output metrics to.') +parser.add_argument('--show_args', action='store_true', help='Print all the arguments when the script starts.') + +args = parser.parse_known_args() +args = args[0] + +if args.show_args: + PETSc.Sys.Print(args) + +# The time partition describes how many timesteps +# are included on each time-slice of the ensemble + +time_partition = tuple(args.slice_length for _ in range(args.nslices)) +window_length = sum(time_partition) +nsteps = args.nwindows*window_length + +# Calculate the timestep size dt from the CFL number +umax = 1. +dx = 1./args.nx +dt = args.cfl*dx/umax + +# The Ensemble with the spatial and time communicators +ensemble = asQ.create_ensemble(time_partition) + +# # # === --- domain --- === # # # + +# The mesh needs to be created with the spatial communicator +mesh = fd.PeriodicUnitSquareMesh(args.nx, args.nx, quadrilateral=True, comm=ensemble.comm) + +# We use a discontinuous Galerkin space for the advected scalar +V = fd.FunctionSpace(mesh, "DQ", args.degree) + +# # # === --- initial conditions --- === # # # + +x, y = fd.SpatialCoordinate(mesh) + + +# The scalar initial condition is a Gaussian bump centred at (0.5, 0.5) +def radius(x, y): + return fd.sqrt(pow(x-0.5, 2) + pow(y-0.5, 2)) + + +def gaussian(x, y): + return fd.exp(-0.5*pow(radius(x, y)/args.width, 2)) + + +q0 = fd.Function(V, name="scalar_initial") +q0.interpolate(1 + gaussian(x, y)) + +# The advecting velocity field is constant and directed at an angle to the x-axis +u = fd.Constant(fd.as_vector((umax*cos(args.angle), umax*sin(args.angle)))) + +# # # === --- finite element forms --- === # # # + + +# The time-derivative mass form for the scalar advection equation. +# asQ assumes that the mass form is linear so here +# q is a TrialFunction and phi is a TestFunction +def form_mass(q, phi): + return phi*q*fd.dx + + +# The DG advection form for the scalar advection equation. +# asQ assumes that the function form may be nonlinear so +# here q is a Function and phi is a TestFunction +def form_function(q, phi, t): + # upwind switch + n = fd.FacetNormal(mesh) + un = fd.Constant(0.5)*(fd.dot(u, n) + abs(fd.dot(u, n))) + + # integration over element volume + int_cell = q*fd.div(phi*u)*fd.dx + + # integration over internal facets + int_facet = (phi('+') - phi('-'))*(un('+')*q('+') - un('-')*q('-'))*fd.dS + + return int_facet - int_cell + + +# # # === --- PETSc solver parameters --- === # # # + +# The PETSc solver parameters used to solve the +# blocks in step (b) of inverting the ParaDiag matrix. +# MUMPS is a parallel direct solver so spatial parallelism can be used +block_parameters = { + 'ksp_type': 'preonly', + 'pc_type': 'lu', + 'pc_factor_mat_solver_type': 'mumps', +} + +# The PETSc solver parameters for solving the all-at-once system. +# The python preconditioner 'asQ.CirculantPC' applies the ParaDiag matrix. +# +# The equation is linear so we can either: +# a) Solve it using a preconditioned Krylov method: +# P^{-1}Au = P^{-1}b +# The solver option for this is: +# -ksp_type gmres +# b) Solve it with stationary iterations: +# Pu_{k+1} = (P - A)u_{k} + b +# The solver option for this is: +# -ksp_type richardson + +paradiag_parameters = { + 'snes_type': 'ksponly', # only solve 1 "Newton iteration" per window (i.e. a linear problem) + 'ksp_type': 'richardson', # stationary iterations + 'ksp': { + 'monitor': None, # show the residual at every iteration + 'converged_rate': None, # show the contraction rate once the linear solve has converged + 'rtol': 1e-11, # relative residual tolerance + }, + 'pc_type': 'python', + 'pc_python_type': 'asQ.CirculantPC', # the alpha-circulant preconditioner + 'circulant_alpha': 1e-4, # set other values from command line using: -circulant_alpha + 'circulant_block': block_parameters, # options dictionary for the inner solve + 'circulant_state': 'linear', # system is linear so don't update the preconditioner reference state + 'aaos_jacobian_state': 'linear', # system is linear so don't update the jacobian reference state +} + + +# # # === --- Setup ParaDiag --- === # # # + +# Give everything to the Paradiag object which will build the all-at-once system. +paradiag = asQ.Paradiag(ensemble=ensemble, + form_function=form_function, + form_mass=form_mass, + ics=q0, dt=dt, theta=args.theta, + time_partition=time_partition, + solver_parameters=paradiag_parameters) + +# create a timer to profile the calculations +timer = SolverTimer() + + +# This function will be called before paradiag solves each time-window. We can use +# this to make the output a bit easier to read, and to time the window calculation +def window_preproc(paradiag, wndw, rhs): + PETSc.Sys.Print(f'### === --- Calculating time-window {wndw} --- === ###') + PETSc.Sys.Print('') + # for now we are interested in timing only the solve, this + # makes sure we don't time any synchronisation after prints. + with PETSc.Log.Event("window_preproc.Coll_Barrier"): + paradiag.ensemble.ensemble_comm.Barrier() + timer.start_timing() + + +# This function will be called after paradiag solves each time-window. We can use +# this to finish the window calculation timing and print the result. +def window_postproc(paradiag, wndw, rhs): + timer.stop_timing() + PETSc.Sys.Print('') + PETSc.Sys.Print(f'Window solution time: {round(timer.times[-1], 5)}') + PETSc.Sys.Print('') + + +# Setup all solver objects. The firedrake DM and options management +# makes it difficult to setup some preconditioners without actually +# calling `solve`, so we just run once to set everything up. +PETSc.Sys.Print('') +PETSc.Sys.Print('### === --- Setting up solver and prefactoring --- === ###') +PETSc.Sys.Print('') +with PETSc.Log.Event("warmup_solve"): + paradiag.solve(1) +PETSc.Sys.Print('') + +# reset solution and iteration counts for timed solved +paradiag.reset_diagnostics() +aaofunc = paradiag.solver.aaofunc +aaofunc.bcast_field(-1, aaofunc.initial_condition) +aaofunc.assign(aaofunc.initial_condition) + +PETSc.Sys.Print('### === --- Solving timeseries --- === ###') +PETSc.Sys.Print('') + +# Solve nwindows of the all-at-once system +with PETSc.Log.Event("timed_solves"): + paradiag.solve(args.nwindows, + preproc=window_preproc, + postproc=window_postproc) + +# # # === --- Solver diagnostics --- === # # # + +PETSc.Sys.Print('### === --- Iteration and timing results --- === ###') +PETSc.Sys.Print('') + +asQ.write_paradiag_metrics(paradiag, directory=args.metrics_dir) + +nw = paradiag.total_windows +nt = paradiag.total_timesteps +PETSc.Sys.Print(f'Total windows: {nw}') +PETSc.Sys.Print(f'Total timesteps: {nt}') +PETSc.Sys.Print('') + +# Show the parallel partition sizes. +PETSc.Sys.Print(f'Total DoFs per window: {V.dim()*window_length}') +PETSc.Sys.Print(f'DoFs per timestep: {V.dim()}') +PETSc.Sys.Print(f'Total number of MPI ranks: {ensemble.global_comm.size}') +PETSc.Sys.Print(f'Number of MPI ranks per timestep: {mesh.comm.size}') +PETSc.Sys.Print(f'DoFs/rank: {V.dim()/mesh.comm.size}') +PETSc.Sys.Print(f'Complex block DoFs/rank: {2*V.dim()/mesh.comm.size}') +PETSc.Sys.Print('') + +# paradiag collects a few iteration counts for us +lits = paradiag.linear_iterations +nlits = paradiag.nonlinear_iterations +blits = paradiag.block_iterations.data() + +# Number of nonlinear iterations will be 1 per window for linear problems +PETSc.Sys.Print(f'Nonlinear iterations: {str(nlits).rjust(5)} | Iterations per window: {str(nlits/nw).rjust(5)}') + +# Number of linear iterations of the all-at-once system, total and per window. +PETSc.Sys.Print(f'Linear iterations: {str(lits).rjust(5)} | Iterations per window: {str(lits/nw).rjust(5)}') + +# Number of iterations needed for each block in step-(b), total and per block solve +PETSc.Sys.Print(f'Total block linear iterations: {blits}') +PETSc.Sys.Print(f'Iterations per block solve: {blits/lits}') +PETSc.Sys.Print('') + +# Timing measurements +PETSc.Sys.Print(timer.string(timesteps_per_solve=window_length, + total_iterations=paradiag.linear_iterations, ndigits=5)) +PETSc.Sys.Print('') diff --git a/asq_manuscript_examples/advection_serial.py b/asq_manuscript_examples/advection_serial.py new file mode 100644 index 00000000..78395f87 --- /dev/null +++ b/asq_manuscript_examples/advection_serial.py @@ -0,0 +1,207 @@ +from math import pi, cos, sin +from utils.timing import SolverTimer +import firedrake as fd +from firedrake.petsc import PETSc +from utils.serial import SerialMiniApp +from argparse import ArgumentParser +from argparse_formatter import DefaultsAndRawTextFormatter + +parser = ArgumentParser( + description='Serial timestepping for scalar advection of a Gaussian bump in a periodic square with DG in space and implicit-theta in time.', + formatter_class=DefaultsAndRawTextFormatter +) +parser.add_argument('--nx', type=int, default=16, help='Number of cells along each side of the square.') +parser.add_argument('--cfl', type=float, default=0.8, help='Convective CFL number.') +parser.add_argument('--angle', type=float, default=pi/6, help='Angle of the convective velocity to the horizontal.') +parser.add_argument('--degree', type=int, default=1, help='Degree of the scalar space.') +parser.add_argument('--theta', type=float, default=0.5, help='Parameter for the implicit theta timestepping method.') +parser.add_argument('--width', type=float, default=0.2, help='Width of the Gaussian bump.') +parser.add_argument('--nt', type=int, default=4, help='Number of timesteps to solve.') +parser.add_argument('--show_args', action='store_true', help='Print all the arguments when the script starts.') + +args = parser.parse_known_args() +args = args[0] + +if args.show_args: + PETSc.Sys.Print(args) + +# Calculate the timestep from the CFL number +umax = 1. +dx = 1./args.nx +dt = args.cfl*dx/umax + +# # # === --- domain --- === # # # + +# Quadrilateral square mesh +mesh = fd.PeriodicUnitSquareMesh(args.nx, args.nx, quadrilateral=True) + +# We use a discontinuous Galerkin space for the advected scalar +V = fd.FunctionSpace(mesh, "DQ", args.degree) + +# # # === --- initial conditions --- === # # # + +x, y = fd.SpatialCoordinate(mesh) + + +# The scalar initial conditions are a Gaussian bump centred at (0.5, 0.5) +def radius(x, y): + return fd.sqrt(pow(x-0.5, 2) + pow(y-0.5, 2)) + + +def gaussian(x, y): + return fd.exp(-0.5*pow(radius(x, y)/args.width, 2)) + + +q0 = fd.Function(V, name="scalar_initial") +q0.interpolate(1 + gaussian(x, y)) + +# The advecting velocity field is constant and directed at an angle to the x-axis +u = fd.Constant(fd.as_vector((umax*cos(args.angle), umax*sin(args.angle)))) + +# # # === --- finite element forms --- === # # # + + +# The time-derivative mass form for the scalar advection equation. +# asQ assumes that the mass form is linear so here +# q is a TrialFunction and phi is a TestFunction +def form_mass(q, phi): + return phi*q*fd.dx + + +# The DG advection form for the scalar advection equation. +# asQ assumes that the function form is nonlinear so here +# q is a Function and phi is a TestFunction +def form_function(q, phi, t): + # upwind switch + n = fd.FacetNormal(mesh) + un = fd.Constant(0.5)*(fd.dot(u, n) + abs(fd.dot(u, n))) + + # integration over element volume + int_cell = q*fd.div(phi*u)*fd.dx + + # integration over internal facets + int_facet = (phi('+')-phi('-'))*(un('+')*q('+')-un('-')*q('-'))*fd.dS + + return int_facet - int_cell + + +# # # === --- PETSc solver parameters --- === # # # + + +# The PETSc solver parameters used to solve the +# serial-in-time blocks +block_parameters = { + 'ksp_type': 'preonly', + 'pc_type': 'lu', + 'pc_factor_mat_solver_type': 'mumps', +} + +serial_parameters = { + 'snes': { # 'ksponly' means we are solving a linear problem, and lagging prevents the block from being refactorised every timestep. + 'type': 'ksponly', + 'lag_jacobian': -2, + 'lag_jacobian_persists': None, + 'lag_preconditioner': -2, + 'lag_preconditioner_persists': None, + }, + 'ksp': { + 'monitor': None, # show the residual at every iteration + 'converged_rate': None, # show the contraction rate once the linear solve has converged + 'rtol': 1e-11, + }, +} +serial_parameters.update(block_parameters) + + +# # # === --- Setup the solver --- === # # # + + +# The SerialMiniApp class will set up the implicit-theta system +# for the serial-in-time method. +miniapp = SerialMiniApp(dt, args.theta, q0, + form_mass, form_function, + serial_parameters) + +# create a timer to profile the calculations +timer = SolverTimer() + +PETSc.Sys.Print('### === --- Timestepping loop --- === ###') +PETSc.Sys.Print('') +linear_its = 0 +nonlinear_its = 0 + + +# This function will be called before solving each timestep. We can use +# this to make the output a bit easier to read, and to time the calculation +def preproc(app, step, t): + PETSc.Sys.Print(f'### === --- Calculating timestep {step} --- === ###') + PETSc.Sys.Print('') + # for now we are interested in timing only the solve, this + # makes sure we don't time any synchronisation after prints. + with PETSc.Log.Event("timestep_preproc.Coll_Barrier"): + mesh.comm.Barrier() + timer.start_timing() + + +# This function will be called after solving each timestep. We can use +# this to finish the timestep calculation timing and print the result, +# and to record the number of iterations. +def postproc(app, step, t): + timer.stop_timing() + PETSc.Sys.Print('') + PETSc.Sys.Print(f'Timestep solution time: {round(timer.times[-1], 5)}') + PETSc.Sys.Print('') + + global linear_its + global nonlinear_its + linear_its += app.nlsolver.snes.getLinearSolveIterations() + nonlinear_its += app.nlsolver.snes.getIterationNumber() + + +# run one timestep to get all solver objects set up e.g. factorisations + +# Setup all solver objects. The firedrake DM and options management +# makes it difficult to setup some preconditioners without actually +# calling `solve`, so we just run once to set everything up. +PETSc.Sys.Print('') +PETSc.Sys.Print('### === --- Setting up solver and prefactoring --- === ###') +PETSc.Sys.Print('') +with PETSc.Log.Event("warmup_solve"): + miniapp.solve(1) +PETSc.Sys.Print('') + +# reset solution +miniapp.w0.assign(q0) +miniapp.w1.assign(q0) + +PETSc.Sys.Print('### === --- Solving timeseries --- === ###') +PETSc.Sys.Print('') + +# Solve nt timesteps +with PETSc.Log.Event("timed_solves"): + miniapp.solve(args.nt, + preproc=preproc, + postproc=postproc) + +# # # === --- Solver diagnostics --- === # # # + +PETSc.Sys.Print('### === --- Iteration and timing results --- === ###') +PETSc.Sys.Print('') + +# parallelism +PETSc.Sys.Print(f'DoFs per timestep: {V.dim()}') +PETSc.Sys.Print(f'Number of MPI ranks per timestep: {mesh.comm.size}') +PETSc.Sys.Print(f'DoFs/rank: {V.dim()/mesh.comm.size}') +PETSc.Sys.Print('') + +# Number of nonlinear iterations will be 1 per timestep for linear problems +PETSc.Sys.Print(f'Nonlinear iterations: {str(nonlinear_its).rjust(5)} | Iterations per window: {str(nonlinear_its/args.nt).rjust(5)}') + +# Number of linear iterations of the all-at-once system, total and per window. +PETSc.Sys.Print(f'Linear iterations: {str(linear_its).rjust(5)} | Iterations per window: {str(linear_its/args.nt).rjust(5)}') +PETSc.Sys.Print('') + +# Timing measurements +PETSc.Sys.Print(timer.string(timesteps_per_solve=1, + total_iterations=linear_its, ndigits=5)) +PETSc.Sys.Print('') diff --git a/asq_manuscript_examples/argparse_formatter.py b/asq_manuscript_examples/argparse_formatter.py new file mode 100644 index 00000000..6a8ba4e5 --- /dev/null +++ b/asq_manuscript_examples/argparse_formatter.py @@ -0,0 +1,10 @@ +from argparse import ArgumentDefaultsHelpFormatter, RawDescriptionHelpFormatter + + +class DefaultsAndRawTextFormatter(ArgumentDefaultsHelpFormatter, RawDescriptionHelpFormatter): + ''' + This just combines the effect of the two subclassed argparse formatters. + ArgumentDefaultsHelpFormatter will print the default argument values with `-h`. + RawDescriptionHelpFormatter means we can format the PETSc argument help more nicely. + ''' + pass diff --git a/asq_manuscript_examples/heat.py b/asq_manuscript_examples/heat.py new file mode 100644 index 00000000..8d29143f --- /dev/null +++ b/asq_manuscript_examples/heat.py @@ -0,0 +1,59 @@ +from firedrake import * +import asQ + +time_partition = [2, 2, 2, 2] +ensemble = asQ.create_ensemble( + time_partition, comm=COMM_WORLD) + +mesh = SquareMesh(nx=32, ny=32, L=1, + comm=ensemble.comm) +x, y = SpatialCoordinate(mesh) + +V = FunctionSpace(mesh, "CG", 1) +u0 = Function(V) +u0.interpolate(sin(0.25*pi*x)*cos(2*pi*y)) + +aaofunc = asQ.AllAtOnceFunction( + ensemble, time_partition, V) +aaofunc.initial_condition.assign(u0) + +dt = 0.05 +theta = 1 + +bcs = [DirichletBC(V, 0, sub_domain=1)] + + +def form_mass(u, v): + return u*v*dx + + +def form_function(u, v, t): + return inner(grad(u), grad(v))*dx + + +aaoform = asQ.AllAtOnceForm( + aaofunc, dt, theta, form_mass, + form_function, bcs=bcs) + +solver_parameters = { + 'snes_type': 'ksponly', + 'mat_type': 'matfree', + 'ksp_type': 'richardson', + 'ksp_rtol': 1e-12, + 'ksp_monitor': None, + 'ksp_converged_rate': None, + 'pc_type': 'python', + 'pc_python_type': 'asQ.CirculantPC', + 'circulant_block': {'pc_type': 'lu'}, + 'circulant_alpha': 1e-4} + +aaosolver = asQ.AllAtOnceSolver( + aaoform, aaofunc, solver_parameters) + +aaofunc.assign(u0) +for i in range(6): + aaosolver.solve() + aaofunc.bcast_field( + -1, aaofunc.initial_condition) + aaofunc.assign( + aaofunc.initial_condition) diff --git a/asq_manuscript_examples/linear_shallow_water_paradiag.py b/asq_manuscript_examples/linear_shallow_water_paradiag.py new file mode 100644 index 00000000..61d9afb3 --- /dev/null +++ b/asq_manuscript_examples/linear_shallow_water_paradiag.py @@ -0,0 +1,244 @@ +import firedrake as fd +from firedrake.petsc import PETSc + +from utils.timing import SolverTimer +from utils.planets import earth +from utils import units + +from argparse import ArgumentParser +from argparse_formatter import DefaultsAndRawTextFormatter + + +# get command arguments +parser = ArgumentParser( + description='Gravity wave testcase for ParaDiag solver using fully implicit linear SWE solver.', + epilog="""\ +Optional PETSc command line arguments: + + -circulant_alpha :float: The circulant parameter to use in the preconditioner. Default 1e-4. + -ksp_rtol :float: The relative residual drop required for convergence. Default 1e-11. + See https://petsc.org/release/manualpages/KSP/KSPSetTolerances/ + -ksp_type :str: The Krylov method to use for the all-at-once iterations. Default 'fgmres'. + Alternatives include richardson or gmres. + See https://petsc.org/release/manualpages/KSP/KSPSetType/ +""", + formatter_class=DefaultsAndRawTextFormatter +) + +parser.add_argument('--ref_level', type=int, default=3, help='Refinement level of icosahedral grid. Total number of cells is 20*4^ref_level.') +parser.add_argument('--dt', type=float, default=0.25, help='Timestep in hours.') +parser.add_argument('--theta', type=float, default=0.5, help='Parameter for implicit theta method.') +parser.add_argument('--nwindows', type=int, default=1, help='Number of time-windows to solve.') +parser.add_argument('--nslices', type=int, default=1, help='Number of time-slices in the all-at-once system. Must divide the number of MPI ranks exactly.') +parser.add_argument('--slice_length', type=int, default=4, help='Number of timesteps per time-slice. Total number of timesteps in the all-at-once system is nslices*slice_length.') +parser.add_argument('--vtkfile', type=str, default='vtk/gravity_waves_paradiag', help='Name of output vtk files for the last timestep of each window.') +parser.add_argument('--metrics_dir', type=str, default='metrics/linear_shallow_water', help='Directory to save paradiag output metrics to.') +parser.add_argument('--show_args', action='store_true', help='Print all the arguments when the script starts.') + +args = parser.parse_known_args() +args = args[0] + +if args.show_args: + PETSc.Sys.Print(args) + +PETSc.Sys.Print('') +PETSc.Sys.Print('### === --- Setting up --- === ###') +PETSc.Sys.Print('') + +# The time partition describes how many timesteps +# are included on each time-slice of the ensemble + +time_partition = [args.slice_length for _ in range(args.nslices)] +window_length = sum(time_partition) +nsteps = args.nwindows*window_length + +dt = args.dt*units.hour + +# Parameters for the implicit diagonal solve in step-(b). +# We use hybridisation to reduce the block to a smaller +# trace finite element space defined only on the element +# facets. The 'hybridscpc_condensed_field' options define +# how this trace system is solved. + +from utils.hybridisation import HybridisedSCPC # noqa: F401 + +block_parameters = { + "mat_type": "matfree", + "ksp_type": 'preonly', + "pc_type": "python", + "pc_python_type": f"{__name__}.HybridisedSCPC", + "hybridscpc_condensed_field": { + 'ksp_type': 'preonly', + 'pc_type': 'lu', + 'pc_factor_mat_solver_type': 'mumps', + } +} + +paradiag_parameters = { + 'snes_type': 'ksponly', # only solve 1 "Newton iteration" per window (i.e. a linear problem) + 'ksp_type': 'fgmres', # fgmres requires one less preconditioner application than gmres or richardson + 'ksp': { + 'monitor': None, # show the residual at every iteration + 'converged_rate': None, # show the contraction rate once the linear solve has converged + 'rtol': 1e-11, # relative residual tolerance + }, + 'pc_type': 'python', + 'pc_python_type': 'asQ.CirculantPC', # the alpha-circulant preconditioner + 'circulant_alpha': 1e-4, # set other values from command line using: -circulant_alpha + 'circulant_block': block_parameters, # options dictionary for the inner solve + 'circulant_state': 'linear', # system is linear so don't update the preconditioner reference state + 'aaos_jacobian_state': 'linear', # system is linear so don't update the jacobian reference state +} + + +# In this script we use the ShallowWaterMiniApp class to set +# everything up for us. The miniapp creates the ensemble, so +# at this stage we don't have a spatial communicator to define +# a mesh over. Instead we provide a function to the miniapp +# to create the mesh given a communicator. +def create_mesh(comm): + distribution_parameters = { + "partition": True, + "overlap_type": (fd.DistributedMeshOverlapType.VERTEX, 2) + } + mesh = fd.IcosahedralSphereMesh( + radius=earth.radius, refinement_level=args.ref_level, + distribution_parameters=distribution_parameters, + comm=comm) + x = fd.SpatialCoordinate(mesh) + mesh.init_cell_orientations(x) + return mesh + + +# We have a variety of utilities for the shallow water equations +# in the utils module. The two used here are: +# +# - The ShallowWaterMiniApp builds the compatible finite element +# forms for the shallow water equations and sets up the Paradiag +# object for us. It will also record some diagnostics, for +# example the advective Courant number (not used for this linear +# example), and writing the solution at the last timestep of each +# window to a VTK file. +# +# - The gravity_bumps submodule has expressions for the initial +# conditions for the test case of: +# Schreiber & Loft, 2019, "A Parallel Time-Integrator for Solving +# the Linearized Shallow Water Equations on the Rotating Sphere" + +from utils.shallow_water import (ShallowWaterMiniApp, + gravity_bumps as gwcase) + +miniapp = ShallowWaterMiniApp( + gravity=earth.Gravity, + topography_expression=gwcase.topography_expression, + velocity_expression=gwcase.velocity_expression, + depth_expression=gwcase.depth_expression, + reference_depth=gwcase.H, + create_mesh=create_mesh, + linear=True, + dt=dt, theta=args.theta, + time_partition=time_partition, + paradiag_sparameters=paradiag_parameters, + record_diagnostics={'cfl': False, 'file': True}, + file_name=args.vtkfile) + + +timer = SolverTimer() + + +# This function will be called before paradiag solves each time-window. We can use +# this to make the output a bit easier to read, and to time the window calculation +def window_preproc(swe_app, pdg, wndw): + PETSc.Sys.Print(f'### === --- Calculating time-window {wndw} --- === ###') + PETSc.Sys.Print('') + # for now we are interested in timing only the solve, this + # makes sure we don't time any synchronisation after prints. + with PETSc.Log.Event("window_preproc.Coll_Barrier"): + pdg.ensemble.ensemble_comm.Barrier() + timer.start_timing() + + +# This function will be called after paradiag solves each time-window. We can use +# this to finish the window calculation timing and print the result. +# The time at the last timestep of the window is also printed. +def window_postproc(swe_app, pdg, wndw): + timer.stop_timing() + PETSc.Sys.Print('') + PETSc.Sys.Print(f'Window solution time: {timer.times[-1]}') + PETSc.Sys.Print('') + + if pdg.layout.is_local(miniapp.save_step): + nt = (pdg.total_windows - 1)*pdg.ntimesteps + (miniapp.save_step + 1) + time = nt*pdg.aaoform.dt + comm = miniapp.ensemble.comm + PETSc.Sys.Print(f'Hours = {float(time/units.hour)}', comm=comm) + PETSc.Sys.Print(f'Days = {float(time/earth.day)}', comm=comm) + PETSc.Sys.Print('', comm=comm) + + +paradiag = miniapp.paradiag +ics = paradiag.solver.aaofunc.initial_condition.copy(deepcopy=True) + +# Setup all solver objects. The firedrake DM and options management +# makes it difficult to setup some preconditioners without actually +# calling `solve`, so we just run once to set everything up. +PETSc.Sys.Print('### === --- Setting up solver and prefactoring --- === ###') +PETSc.Sys.Print('') + +with PETSc.Log.Event("warmup_solve"): + miniapp.solve(nwindows=1) + +# reset solution and iteration counts for timed solved +timer.times.clear() +paradiag.reset_diagnostics() +aaofunc = paradiag.solver.aaofunc +aaofunc.bcast_field(-1, aaofunc.initial_condition) +aaofunc.assign(aaofunc.initial_condition) + +PETSc.Sys.Print('### === --- Calculating parallel solution --- === ###') +PETSc.Sys.Print('') + +with PETSc.Log.Event("timed_solves"): + miniapp.solve(nwindows=args.nwindows, + preproc=window_preproc, + postproc=window_postproc) + +PETSc.Sys.Print('### === --- Iteration and timing results --- === ###') +PETSc.Sys.Print('') + +from asQ import write_paradiag_metrics +write_paradiag_metrics(paradiag, directory=args.metrics_dir) + +nw = paradiag.total_windows +nt = paradiag.total_timesteps +PETSc.Sys.Print(f'windows: {nw}') +PETSc.Sys.Print(f'timesteps: {nt}') +PETSc.Sys.Print('') + +# Show the parallel partition sizes. +ensemble = paradiag.ensemble +mesh = miniapp.mesh +W = miniapp.W + +PETSc.Sys.Print(f'Total DoFs per window: {W.dim()*window_length}') +PETSc.Sys.Print(f'DoFs per timestep: {W.dim()}') +PETSc.Sys.Print(f'Total number of MPI ranks: {ensemble.global_comm.size}') +PETSc.Sys.Print(f'Number of MPI ranks per timestep: {mesh.comm.size}') +PETSc.Sys.Print(f'DoFs/rank: {W.dim()/mesh.comm.size}') +PETSc.Sys.Print(f'Complex block DoFs/rank: {2*W.dim()/mesh.comm.size}') +PETSc.Sys.Print('') + +# paradiag collects a few iteration counts for us +lits = paradiag.linear_iterations +nlits = paradiag.nonlinear_iterations +blits = paradiag.block_iterations.data() + +PETSc.Sys.Print(f'Nonlinear iterations: {nlits} | Iterations per window: {nlits/nw}') +PETSc.Sys.Print(f'Linear iterations: {lits} | Iterations per window: {lits/nw}') +PETSc.Sys.Print(f'Total block linear iterations: {blits}') +PETSc.Sys.Print(f'Iterations per block solve: {blits/lits}') +PETSc.Sys.Print('') + +PETSc.Sys.Print(timer.string(timesteps_per_solve=window_length, + total_iterations=lits, ndigits=5)) +PETSc.Sys.Print('') diff --git a/asq_manuscript_examples/linear_shallow_water_serial.py b/asq_manuscript_examples/linear_shallow_water_serial.py new file mode 100644 index 00000000..5d41aec6 --- /dev/null +++ b/asq_manuscript_examples/linear_shallow_water_serial.py @@ -0,0 +1,215 @@ +import firedrake as fd +from firedrake.petsc import PETSc +from firedrake.output import VTKFile + +from utils.serial import SerialMiniApp +from utils.timing import SolverTimer +from utils.planets import earth +from utils import units + +from argparse import ArgumentParser +from argparse_formatter import DefaultsAndRawTextFormatter + + +# get command arguments +parser = ArgumentParser( + description='Gravity wave testcase for serial-in-time solver using fully implicit linear SWE.', + formatter_class=DefaultsAndRawTextFormatter +) + +parser.add_argument('--ref_level', type=int, default=3, help='Refinement level of icosahedral grid. Total number of cells is 20*4^ref_level.') +parser.add_argument('--nt', type=int, default=20, help='Number of time steps.') +parser.add_argument('--dt', type=float, default=0.25, help='Timestep in hours.') +parser.add_argument('--theta', type=float, default=0.5, help='Parameter for implicit theta method.') +parser.add_argument('--vtkfile', type=str, default='vtk/gravity_waves_serial', help='Name of output vtk files') +parser.add_argument('--write_freq', type=int, default=1, help='How often to write the solution to file.') +parser.add_argument('--show_args', action='store_true', help='Print all the arguments when the script starts.') + +args = parser.parse_known_args() +args = args[0] + +if args.show_args: + PETSc.Sys.Print(args) + +PETSc.Sys.Print('') +PETSc.Sys.Print('### === --- Setting up --- === ###') +PETSc.Sys.Print('') + +# Icosahedral sphere mesh +distribution_parameters = { + "partition": True, + "overlap_type": (fd.DistributedMeshOverlapType.VERTEX, 2) +} +mesh = fd.IcosahedralSphereMesh( + radius=earth.radius, refinement_level=args.ref_level, + distribution_parameters=distribution_parameters) +x = fd.SpatialCoordinate(mesh) +mesh.init_cell_orientations(x) + +# time step +dt = args.dt*units.hour + +# We have a variety of utilities for the shallow water equations +# in the utils module. The two used here are: +# +# - The function spaces, and compatible finite element forms +# for the linearisation around a state of rest. +# +# - The gravity_bumps submodule has expressions for the initial +# conditions for the test case of: +# Schreiber & Loft, 2019, "A Parallel Time-Integrator for Solving +# the Linearized Shallow Water Equations on the Rotating Sphere" + +import utils.shallow_water as swe +import utils.shallow_water.gravity_bumps as gwcase + +# shallow water equation function spaces (velocity and depth) +W = swe.default_function_space(mesh) + +# parameters +g = earth.Gravity +H = gwcase.H +f = gwcase.coriolis_expression(*x) + +# initial conditions +w_initial = fd.Function(W) +u_initial, h_initial = w_initial.subfunctions + +u_initial.project(gwcase.velocity_expression(*x)) +h_initial.project(gwcase.depth_expression(*x)) + + +# shallow water equation forms +def form_mass(u, h, v, q): + return swe.linear.form_mass(mesh, u, h, v, q) + + +def form_function(u, h, v, q, t): + return swe.linear.form_function(mesh, g, H, f, + u, h, v, q, t) + + +# solver parameters for the implicit solve + +from utils.hybridisation import HybridisedSCPC # noqa: F401 +block_parameters = { + "mat_type": "matfree", + 'ksp_type': 'preonly', + "pc_type": "python", + "pc_python_type": f"{__name__}.HybridisedSCPC", + "hybridscpc_condensed_field": { + 'ksp_type': 'preonly', + 'pc_type': 'lu', + 'pc_factor_mat_solver_type': 'mumps', + } +} + +serial_parameters = { + 'snes': { # 'ksponly' means we are solving a linear problem, and lagging prevents the block from being refactorised every timestep. + 'type': 'ksponly', + 'lag_jacobian': -2, + 'lag_jacobian_persists': None, + 'lag_preconditioner': -2, + 'lag_preconditioner_persists': None, + }, + 'ksp': { + 'monitor': None, + 'converged_rate': None, + 'rtol': 1e-11, + }, +} +serial_parameters.update(block_parameters) + +# The SerialMiniApp class will set up the implicit-theta system +# for the serial-in-time method. +miniapp = SerialMiniApp(dt, args.theta, w_initial, + form_mass, form_function, + serial_parameters) + +PETSc.Sys.Print('### === --- Timestepping loop --- === ###') +linear_its = 0 +nonlinear_its = 0 + +ofile = VTKFile(args.vtkfile+'.pvd') +uout = fd.Function(u_initial.function_space(), name='velocity') +hout = fd.Function(h_initial.function_space(), name='depth') + +uout.assign(u_initial) +hout.assign(h_initial - gwcase.H) +ofile.write(uout, hout, time=0) + +timer = SolverTimer() + + +# This function will be called before solving each timestep. We can use +# this to make the output a bit easier to read, and to time the calculation +def preproc(app, step, t): + PETSc.Sys.Print('') + PETSc.Sys.Print(f'=== --- Timestep {step} --- ===') + PETSc.Sys.Print('') + # for now we are interested in timing only the solve, this + # makes sure we don't time any synchronisation after prints. + with PETSc.Log.Event("timestep_preproc.Coll_Barrier"): + mesh.comm.Barrier() + timer.start_timing() + + +# This function will be called after solving each timestep. We can use +# this to finish the timestep calculation timing and print the result, +# to record the number of iterations, and to write the solution to file. +def postproc(app, step, t): + global linear_its, nonlinear_its + timer.stop_timing() + PETSc.Sys.Print('') + PETSc.Sys.Print(f'Timestep solution time: {timer.times[-1]}') + PETSc.Sys.Print('') + + linear_its += app.nlsolver.snes.getLinearSolveIterations() + nonlinear_its += app.nlsolver.snes.getIterationNumber() + + if ((step + 1) % args.write_freq) == 0: + u, h = app.w0.subfunctions + uout.assign(u) + hout.assign(h-gwcase.H) + ofile.write(uout, hout, time=t/units.hour) + + +# Setup all solver objects. The firedrake DM and options management +# makes it difficult to setup some preconditioners without actually +# calling `solve`, so we just run once to set everything up. +PETSc.Sys.Print('') +PETSc.Sys.Print('### === --- Setting up solver and prefactoring --- === ###') +PETSc.Sys.Print('') +with PETSc.Log.Event("warmup_solve"): + miniapp.solve(nt=1) +PETSc.Sys.Print('') + +PETSc.Sys.Print('### === --- Solving timeseries --- === ###') +PETSc.Sys.Print('') + +# Solve nt timesteps +with PETSc.Log.Event("timed_solves"): + miniapp.solve(nt=args.nt, + preproc=preproc, + postproc=postproc) + +PETSc.Sys.Print('### === --- Iteration counts --- === ###') +PETSc.Sys.Print('') + +# parallelism +PETSc.Sys.Print(f'DoFs per timestep: {W.dim()}') +PETSc.Sys.Print(f'Number of MPI ranks per timestep: {mesh.comm.size}') +PETSc.Sys.Print(f'DoFs/rank: {W.dim()/mesh.comm.size}') +PETSc.Sys.Print('') + +# Number of nonlinear iterations will be 1 per timestep for linear problems +PETSc.Sys.Print(f'Nonlinear iterations: {str(nonlinear_its).rjust(5)} | Iterations per window: {str(nonlinear_its/args.nt).rjust(5)}') + +# Number of linear iterations of the all-at-once system, total and per window. +PETSc.Sys.Print(f'Linear iterations: {str(linear_its).rjust(5)} | Iterations per window: {str(linear_its/args.nt).rjust(5)}') +PETSc.Sys.Print('') + +# Timing measurements +PETSc.Sys.Print(timer.string(timesteps_per_solve=1, + total_iterations=linear_its, ndigits=5)) +PETSc.Sys.Print('') diff --git a/asq_manuscript_examples/nonlinear_shallow_water_paradiag.py b/asq_manuscript_examples/nonlinear_shallow_water_paradiag.py new file mode 100644 index 00000000..392e4f1f --- /dev/null +++ b/asq_manuscript_examples/nonlinear_shallow_water_paradiag.py @@ -0,0 +1,270 @@ +from firedrake.petsc import PETSc + +from utils.timing import SolverTimer +from utils.planets import earth +from utils import units + +import utils.shallow_water as swe +from utils.shallow_water import galewsky + +from math import sqrt +from functools import partial + +import argparse +from argparse_formatter import DefaultsAndRawTextFormatter + + +# get command arguments +parser = argparse.ArgumentParser( + description='Galewsky testcase for ParaDiag solver using fully implicit SWE solver.', + epilog="""\ +Optional PETSc command line arguments: + + -circulant_alpha :float: The circulant parameter to use in the preconditioner. Default 1e-4. +""", + formatter_class=DefaultsAndRawTextFormatter +) + +parser.add_argument('--ref_level', type=int, default=3, help='Refinement level of icosahedral grid. Total number of cells is 20*4^ref_level.') +parser.add_argument('--base_level', type=int, default=2, help='Base refinement level for multigrid.') +parser.add_argument('--dt', type=float, default=0.5, help='Timestep in hours.') +parser.add_argument('--theta', type=float, default=0.5, help='Parameter for implicit theta method.') +parser.add_argument('--nwindows', type=int, default=1, help='Number of time-windows to solve.') +parser.add_argument('--nslices', type=int, default=1, help='Number of time-slices in the all-at-once system. Must divide the number of MPI ranks exactly.') +parser.add_argument('--slice_length', type=int, default=4, help='Number of timesteps per time-slice. Total number of timesteps in the all-at-once system is nslices*slice_length.') +parser.add_argument('--vtkfile', type=str, default='vtk/galewsky_paradiag', help='Name of output vtk files for the last timestep of each window.') +parser.add_argument('--metrics_dir', type=str, default='metrics/nonlinear_shallow_water', help='Directory to save paradiag output metrics to.') +parser.add_argument('--show_args', action='store_true', help='Output all the arguments.') + +args = parser.parse_known_args() +args = args[0] + +if args.show_args: + PETSc.Sys.Print(args) + +PETSc.Sys.Print('') +PETSc.Sys.Print('### === --- Setting up --- === ###') +PETSc.Sys.Print('') + +# This script broadly follows the same structure as the +# linear_shallow_water_paradiag.py script, with the main +# differences being: +# +# - a hierarchy of meshes is required for the multigrid method +# - the galewsky test case is used instead of the gravity waves case: +# Galewsky et al, 2004, "An initial-value problem for testing +# numerical models of the global shallow-water equations" +# - the options parameters specify a multigrid scheme not hybridisation + +# time steps + +time_partition = tuple((args.slice_length for _ in range(args.nslices))) +window_length = sum(time_partition) +nsteps = args.nwindows*window_length + +dt = args.dt*units.hour +H = galewsky.H0 +g = earth.Gravity + +# parameters for the implicit diagonal solve in step-(b) + +from utils.mg import ManifoldTransferManager # noqa: F401 + +block_parameters = { + 'mat_type': 'matfree', + 'ksp_type': 'fgmres', + 'ksp': { + 'rtol': 1e-3, + 'max_it': 30, + }, + 'pc_type': 'mg', + 'pc_mg_cycle_type': 'v', + 'pc_mg_type': 'multiplicative', + 'mg': { + 'transfer_manager': f'{__name__}.ManifoldTransferManager', + 'levels': { + 'ksp_type': 'gmres', + 'ksp_max_it': 3, + 'pc_type': 'python', + 'pc_python_type': 'firedrake.PatchPC', + 'patch': { + 'pc_patch': { + 'save_operators': True, + 'partition_of_unity': True, + 'sub_mat_type': 'seqdense', + 'construct_dim': 0, + 'construct_type': 'vanka', + 'local_type': 'additive', + 'precompute_element_tensors': True, + 'symmetrise_sweep': False + }, + 'sub': { + 'ksp_type': 'preonly', + 'pc_type': 'lu', + 'pc_factor_shift_type': 'nonzero', + } + } + }, + 'coarse': { + 'pc_type': 'python', + 'pc_python_type': 'firedrake.AssembledPC', + 'assembled': { + 'ksp_type': 'preonly', + 'pc_type': 'lu', + 'pc_factor_mat_solver_type': 'mumps', + }, + }, + } +} + +# atol is the same for Newton and (right-preconditioned) Krylov +atol = 1e4 +patol = sqrt(window_length)*atol +paradiag_parameters = { + 'snes': { + 'linesearch_type': 'basic', + 'monitor': None, + 'converged_reason': None, + 'atol': patol, + 'rtol': 1e-10, + 'stol': 1e-12, + 'ksp_ew': None, + 'ksp_ew_version': 1, + 'ksp_ew_rtol0': 1e-1, + 'ksp_ew_threshold': 1e-2, + }, + 'ksp_type': 'fgmres', + 'ksp': { + 'monitor': None, + 'converged_rate': None, + 'rtol': 1e-5, + 'atol': patol, + }, + 'pc_type': 'python', + 'pc_python_type': 'asQ.CirculantPC', + 'circulant_block': block_parameters, + 'circulant_alpha': 1e-4, +} + +create_mesh = partial( + swe.create_mg_globe_mesh, + ref_level=args.ref_level, + base_level=args.base_level, + coords_degree=1) + +miniapp = swe.ShallowWaterMiniApp(gravity=earth.Gravity, + topography_expression=galewsky.topography_expression, + velocity_expression=galewsky.velocity_expression, + depth_expression=galewsky.depth_expression, + reference_depth=galewsky.H0, + reference_state=True, + create_mesh=create_mesh, + dt=dt, theta=args.theta, + time_partition=time_partition, + paradiag_sparameters=paradiag_parameters, + file_name=args.vtkfile, + record_diagnostics={'cfl': True, 'file': True}) + +timer = SolverTimer() + +fround = lambda x: round(float(x), 2) + + +def window_preproc(swe_app, pdg, wndw): + PETSc.Sys.Print('') + PETSc.Sys.Print(f'### === --- Calculating time-window {wndw} --- === ###') + PETSc.Sys.Print('') + with PETSc.Log.Event("window_preproc.Coll_Barrier"): + pdg.ensemble.ensemble_comm.Barrier() + timer.start_timing() + + +def window_postproc(swe_app, pdg, wndw): + timer.stop_timing() + PETSc.Sys.Print('') + PETSc.Sys.Print(f'Window solution time: {timer.times[-1]}') + PETSc.Sys.Print('') + + if miniapp.layout.is_local(miniapp.save_step): + nt = (pdg.total_windows - 1)*pdg.ntimesteps + (miniapp.save_step + 1) + time = float(nt*pdg.aaoform.dt) + comm = miniapp.ensemble.comm + PETSc.Sys.Print(f'Maximum CFL = {fround(swe_app.cfl_series[wndw])}', comm=comm) + PETSc.Sys.Print(f'Hours = {fround(time/units.hour)}', comm=comm) + PETSc.Sys.Print(f'Days = {fround(time/earth.day)}', comm=comm) + + +paradiag = miniapp.paradiag +ics = paradiag.solver.aaofunc.initial_condition.copy(deepcopy=True) + +# Setup all solver objects. The firedrake DM and options management +# makes it difficult to setup some preconditioners without actually +# calling `solve`, so we just run once to set everything up. +PETSc.Sys.Print('### === --- Setting up solver and prefactoring --- === ###') + +with PETSc.Log.Event("warmup_solve"): + miniapp.solve(nwindows=1, + preproc=window_preproc, + postproc=window_postproc) +PETSc.Sys.Print('') + +# reset +timer.times.clear() +paradiag.reset_diagnostics() +paradiag.aaofunc.assign(ics) + +PETSc.Sys.Print('### === --- Calculating parallel solution --- === ###') + +with PETSc.Log.Event("timed_solves"): + miniapp.solve(nwindows=args.nwindows, + preproc=window_preproc, + postproc=window_postproc) + +PETSc.Sys.Print('### === --- Iteration counts --- === ###') +PETSc.Sys.Print('') + +from asQ import write_paradiag_metrics +write_paradiag_metrics(paradiag, directory=args.metrics_dir) + +nw = paradiag.total_windows +nt = paradiag.total_timesteps +PETSc.Sys.Print(f'windows: {nw}') +PETSc.Sys.Print(f'timesteps: {nt}') +PETSc.Sys.Print('') + +# Show the parallel partition sizes. +ensemble = paradiag.ensemble +mesh = miniapp.mesh +W = miniapp.W + +PETSc.Sys.Print(f'Total DoFs per window: {W.dim()*window_length}') +PETSc.Sys.Print(f'DoFs per timestep: {W.dim()}') +PETSc.Sys.Print(f'Total number of MPI ranks: {ensemble.global_comm.size}') +PETSc.Sys.Print(f'Number of MPI ranks per timestep: {mesh.comm.size}') +PETSc.Sys.Print(f'DoFs/rank: {W.dim()/mesh.comm.size}') +PETSc.Sys.Print(f'Complex block DoFs/rank: {2*W.dim()/mesh.comm.size}') +PETSc.Sys.Print('') + +# paradiag collects a few iteration counts for us +lits = paradiag.linear_iterations +nlits = paradiag.nonlinear_iterations +blits = paradiag.block_iterations.data() + +PETSc.Sys.Print(f'Nonlinear iterations: {nlits} | Iterations per window: {nlits/nw}') +PETSc.Sys.Print(f'Linear iterations: {lits} | Iterations per window: {lits/nw}') +PETSc.Sys.Print(f'Total block linear iterations: {blits}') +PETSc.Sys.Print(f'Iterations per block solve: {blits/lits}') +PETSc.Sys.Print(f'Minimum block iterations per solve: {min(blits)/lits}') +PETSc.Sys.Print(f'Maximum block iterations per solve: {max(blits)/lits}') +PETSc.Sys.Print('') + +ensemble.global_comm.Barrier() +if miniapp.layout.is_local(miniapp.save_step): + PETSc.Sys.Print(f'Maximum CFL = {max(miniapp.cfl_series)}') + PETSc.Sys.Print(f'Minimum CFL = {min(miniapp.cfl_series)}') + PETSc.Sys.Print('') +ensemble.global_comm.Barrier() + +PETSc.Sys.Print(timer.string(timesteps_per_solve=window_length, + total_iterations=lits, ndigits=5)) +PETSc.Sys.Print('') diff --git a/asq_manuscript_examples/nonlinear_shallow_water_serial.py b/asq_manuscript_examples/nonlinear_shallow_water_serial.py new file mode 100644 index 00000000..a415b78f --- /dev/null +++ b/asq_manuscript_examples/nonlinear_shallow_water_serial.py @@ -0,0 +1,252 @@ +import firedrake as fd +from firedrake.petsc import PETSc +from firedrake.output import VTKFile + +from utils.serial import SerialMiniApp +from utils.timing import SolverTimer +from utils.planets import earth +from utils import units + +from argparse import ArgumentParser +from argparse_formatter import DefaultsAndRawTextFormatter + +import utils.shallow_water as swe +from utils.shallow_water import galewsky +from utils import diagnostics + + +# get command arguments +parser = ArgumentParser( + description='Galewsky testcase for serial-in-time solver using fully implicit nonlinear SWE.', + formatter_class=DefaultsAndRawTextFormatter +) + +parser.add_argument('--ref_level', type=int, default=3, help='Refinement level of icosahedral grid. Total number of cells is 20*4^ref_level.') +parser.add_argument('--base_level', type=int, default=2, help='Base refinement level for multigrid.') +parser.add_argument('--nt', type=int, default=10, help='Number of time steps.') +parser.add_argument('--dt', type=float, default=0.5, help='Timestep in hours.') +parser.add_argument('--theta', type=float, default=0.5, help='Parameter for implicit theta method. 0.5 for trapezium rule, 1 for backwards Euler.') +parser.add_argument('--degree', type=float, default=swe.default_degree(), help='Degree of the depth function space.') +parser.add_argument('--vtkfile', type=str, default='vtk/galewsky_serial', help='Name of output vtk files') +parser.add_argument('--write_freq', type=int, default=1, help='How often to write the solution to file.') +parser.add_argument('--show_args', action='store_true', help='Output all the arguments.') + +args = parser.parse_known_args() +args = args[0] + +if args.show_args: + PETSc.Sys.Print(args) + +PETSc.Sys.Print('') +PETSc.Sys.Print('### === --- Setting up --- === ###') +PETSc.Sys.Print('') + +# This script broadly follows the same structure as the +# linear_shallow_water_serial.py script, with the main +# differences being: +# +# - a hierarchy of meshes is required for the multigrid method +# - the galewsky test case is used instead of the gravity waves case: +# Galewsky et al, 2004, "An initial-value problem for testing +# numerical models of the global shallow-water equations" +# - the options parameters specify a multigrid scheme not hybridisation + +# hierarchy of icosahedral sphere meshes for multigrid +mesh = swe.create_mg_globe_mesh(ref_level=args.ref_level, + base_level=args.base_level, + coords_degree=1) +x = fd.SpatialCoordinate(mesh) + +# time step +dt = args.dt*units.hour + +# shallow water equation function spaces (velocity and depth) +W = swe.default_function_space(mesh, degree=args.degree) + +# parameters +g = earth.Gravity + +b = galewsky.topography_expression(*x) +f = swe.earth_coriolis_expression(*x) + +# initial conditions +w_initial = fd.Function(W) +u_initial = w_initial.subfunctions[0] +h_initial = w_initial.subfunctions[1] + +u_initial.interpolate(galewsky.velocity_expression(*x)) +h_initial.interpolate(galewsky.depth_expression(*x)) + +# current and next timestep +w0 = fd.Function(W).assign(w_initial) +w1 = fd.Function(W).assign(w_initial) + +H = galewsky.H0 + + +# shallow water equation forms +def form_function(u, h, v, q, t): + return swe.nonlinear.form_function(mesh, g, b, f, + u, h, v, q, t) + + +def form_mass(u, h, v, q): + return swe.nonlinear.form_mass(mesh, u, h, v, q) + + +# grid transfers for non-nested manifolds. +from utils.mg import ManifoldTransferManager # noqa: F401 + +# solver parameters for the implicit solve +mg_sparameters = { + 'mat_type': 'matfree', + 'pc_type': 'mg', + 'pc_mg_cycle_type': 'v', + 'pc_mg_type': 'multiplicative', + 'mg': { + 'transfer_manager': f'{__name__}.ManifoldTransferManager', + 'levels': { + 'ksp_type': 'gmres', + 'ksp_max_it': 3, + 'pc_type': 'python', + 'pc_python_type': 'firedrake.PatchPC', + 'patch': { + 'pc_patch': { + 'save_operators': True, + 'partition_of_unity': True, + 'sub_mat_type': 'seqdense', + 'construct_dim': 0, + 'construct_type': 'vanka', + 'local_type': 'additive', + 'precompute_element_tensors': True, + 'symmetrise_sweep': False + }, + 'sub': { + 'ksp_type': 'preonly', + 'pc_type': 'lu', + 'pc_factor_shift_type': 'nonzero', + } + } + }, + 'coarse': { + 'pc_type': 'python', + 'pc_python_type': 'firedrake.AssembledPC', + 'assembled': { + 'ksp_type': 'preonly', + 'pc_type': 'lu', + 'pc_factor_mat_solver_type': 'mumps', + }, + }, + } +} + +# atol is the same for Newton and (right-preconditioned) Krylov +atol = 1e4 +serial_parameters = { + 'snes': { + 'monitor': None, + 'converged_reason': None, + 'rtol': 1e-12, + 'atol': atol, + 'ksp_ew': None, + 'ksp_ew_version': 1, + }, + 'ksp_type': 'fgmres', + 'ksp': { + 'monitor': None, + 'converged_rate': None, + 'atol': atol, + 'rtol': 1e-5, + }, +} +serial_parameters.update(mg_sparameters) + +# The SerialMiniApp class will set up the implicit-theta system +# for the serial-in-time method. +miniapp = SerialMiniApp(dt, args.theta, w_initial, + form_mass, form_function, + serial_parameters) + +PETSc.Sys.Print('### === --- Timestepping loop --- === ###') +linear_its = 0 +nonlinear_its = 0 + +ofile = VTKFile(f"{args.vtkfile}.pvd") +uout = fd.Function(u_initial.function_space(), name='velocity') +hout = fd.Function(h_initial.function_space(), name='elevation') + +potential_vorticity = diagnostics.potential_vorticity_calculator( + u_initial.function_space(), name='vorticity') + +uout.assign(u_initial) +hout.assign(h_initial) +ofile.write(uout, hout, potential_vorticity(uout), time=0) + +timer = SolverTimer() + + +def preproc(app, step, t): + PETSc.Sys.Print('') + PETSc.Sys.Print(f'=== --- Timestep {step} --- ===') + PETSc.Sys.Print('') + with PETSc.Log.Event("timestep_preproc.Coll_Barrier"): + mesh.comm.Barrier() + timer.start_timing() + + +def postproc(app, step, t): + global linear_its, nonlinear_its + timer.stop_timing() + PETSc.Sys.Print('') + PETSc.Sys.Print(f'Timestep solution time: {timer.times[-1]}') + PETSc.Sys.Print('') + + linear_its += app.nlsolver.snes.getLinearSolveIterations() + nonlinear_its += app.nlsolver.snes.getIterationNumber() + + if ((step + 1) % args.write_freq) == 0: + uout.assign(miniapp.w0.subfunctions[0]) + hout.assign(miniapp.w0.subfunctions[1]) + ofile.write(uout, hout, potential_vorticity(uout), time=t) + + +# Setup all solver objects. The firedrake DM and options management +# makes it difficult to setup some preconditioners without actually +# calling `solve`, so we just run once to set everything up. +PETSc.Sys.Print('') +PETSc.Sys.Print('### === --- Setting up solver and prefactoring --- === ###') +PETSc.Sys.Print('') +with PETSc.Log.Event("warmup_solve"): + miniapp.solve(nt=1) +PETSc.Sys.Print('') + +miniapp.w0.assign(w_initial) +miniapp.w1.assign(w_initial) +linear_its -= linear_its +nonlinear_its -= nonlinear_its + +PETSc.Sys.Print('### === --- Solving timeseries --- === ###') +PETSc.Sys.Print('') + +with PETSc.Log.Event("timed_solves"): + miniapp.solve(nt=args.nt, + preproc=preproc, + postproc=postproc) + +PETSc.Sys.Print('### === --- Iteration counts --- === ###') +PETSc.Sys.Print('') + +# parallelism +PETSc.Sys.Print(f'DoFs per timestep: {W.dim()}') +PETSc.Sys.Print(f'Number of MPI ranks per timestep: {mesh.comm.size}') +PETSc.Sys.Print(f'DoFs/rank: {W.dim()/mesh.comm.size}') +PETSc.Sys.Print('') + +PETSc.Sys.Print(f'Nonlinear iterations: {str(nonlinear_its).rjust(5)} | Iterations per window: {str(nonlinear_its/args.nt).rjust(5)}') +PETSc.Sys.Print(f'Linear iterations: {str(linear_its).rjust(5)} | Iterations per window: {str(linear_its/args.nt).rjust(5)}') +PETSc.Sys.Print('') + +# Timing measurements +PETSc.Sys.Print(timer.string(timesteps_per_solve=1, + total_iterations=linear_its, ndigits=5)) +PETSc.Sys.Print('') diff --git a/asq_manuscript_examples/vertical_slice_paradiag.py b/asq_manuscript_examples/vertical_slice_paradiag.py new file mode 100644 index 00000000..7679419c --- /dev/null +++ b/asq_manuscript_examples/vertical_slice_paradiag.py @@ -0,0 +1,402 @@ +import firedrake as fd +from firedrake.petsc import PETSc +from firedrake.output import VTKFile + +from utils.timing import SolverTimer +from utils import compressible_flow as euler +from utils.diagnostics import convective_cfl_calculator +from math import sqrt, pi +import asQ + +from argparse import ArgumentParser +from argparse_formatter import DefaultsAndRawTextFormatter + +Print = PETSc.Sys.Print + + +def initial_conditions(mesh, W, Vv, gas, H, perturbation=True, hydrostatic=False): + if hydrostatic: + return NotImplementedError + + x, z = fd.SpatialCoordinate(mesh) + V2 = W.subfunctions[1] + up = fd.as_vector([fd.Constant(0.0), fd.Constant(1.0)]) # up direction + + Un = fd.Function(W) + un, rhon, thetan = Un.subfunctions + + Tsurf = fd.Constant(300.) + thetab = Tsurf*fd.exp(gas.N**2*z/gas.g) + + thetan.interpolate(thetab) + + Pi = fd.Function(V2) + + # background state is in hydrostatic balance + euler.hydrostatic_rho(Vv, V2, mesh, thetan, rhon, + pi_boundary=fd.Constant(1.0), + gas=gas, Up=up, top=True, Pi=Pi) + + un.project(fd.as_vector([20.0, 0.0])) + + Uback = Un.copy(deepcopy=True) + + # temperature perturbation to initiate gravity wave + if perturbation: + a = fd.Constant(5e3) + dtheta = fd.Constant(1e-2) + + theta_pert = dtheta*fd.sin(pi*z/H)/(1 + x**2/a**2) + thetan.interpolate(thetan + theta_pert) + + return Un, Uback + + +parser = ArgumentParser( + description='Nonhydrostatic gravity wave testcase for ParaDiag solver using fully implicit vertical slice.', + epilog="""\ +Optional PETSc command line arguments: + + -circulant_alpha :float: The circulant parameter to use in the preconditioner. Default 1e-5. + -circulant_block_rtol :float: The relative tolerance to solve each block to. Default 1e-5. +""", + formatter_class=DefaultsAndRawTextFormatter +) +parser.add_argument('--nlayers', type=int, default=10, help='Number of layers in the vertical direction.') +parser.add_argument('--ncolumns', type=int, default=300, help='Number of columns in the horizontal direction.') +parser.add_argument('--dt', type=float, default=12, help='Timestep in seconds.') +parser.add_argument('--theta', type=float, default=0.5, help='Parameter for implicit theta method.') +parser.add_argument('--nwindows', type=int, default=1, help='Number of time-windows to solve.') +parser.add_argument('--nslices', type=int, default=1, help='Number of time-slices in the all-at-once system. Must divide the number of MPI ranks exactly.') +parser.add_argument('--slice_length', type=int, default=4, help='Number of timesteps per time-slice. Total number of timesteps in the all-at-once system is nslices*slice_length.') +parser.add_argument('--degree', type=int, default=1, help='Degree of finite element space (the DG space). Default 1.') +parser.add_argument('--vtkfile', type=str, default='vtk/gravity_wave_paradiag', help='Name of output vtk files') +parser.add_argument('--metrics_dir', type=str, default='metrics/vertical_slice', help='Directory to save paradiag output metrics to.') +parser.add_argument('--show_args', action='store_true', help='Output all the arguments.') + +args = parser.parse_known_args() +args = args[0] + +if args.show_args: + Print(args) + +Print('') +Print('### === --- Setting up --- === ###') +Print('') + +# This script broadly follows the same structure +# as the advection_paradiag.py script, with the main +# differences being: +# +# - more complicated finite element spaces and forms are required, +# which are provided by the utils.compressible_flow module. +# - more complicated initial conditions (the function at the top of +# the file), from the test case of +# Skamarock & Klemp, 1994, "Efficiency and accuracy of the +# Klemp-Wilhelmson time-splitting technique". +# - options parameters specifying more involved block preconditioning. + +# set up the ensemble communicator for space-time parallelism +time_partition = tuple((args.slice_length for _ in range(args.nslices))) +window_length = sum(time_partition) + +global_comm = fd.COMM_WORLD +ensemble = asQ.create_ensemble(time_partition, comm=global_comm) + +dt = args.dt + +# set up the mesh +L = 300e3 +H = 10e3 + +distribution_parameters = { + "partition": True, + "overlap_type": (fd.DistributedMeshOverlapType.VERTEX, 2) +} + +base_mesh = fd.PeriodicIntervalMesh( + args.ncolumns, float(L), + distribution_parameters=distribution_parameters, + comm=ensemble.comm) +base_mesh.coordinates.dat.data[:] -= float(L)/2 + +mesh = fd.ExtrudedMesh(base_mesh, + layers=args.nlayers, + layer_height=float(H/args.nlayers)) +n = fd.FacetNormal(mesh) + +# compatible function spaces + +W, Vv = euler.function_space(mesh, horizontal_degree=args.degree, + vertical_degree=args.degree, + vertical_velocity_space=True) +V1, V2, Vt = W.subfunctions # velocity, density, temperature + +Print(f'DoFs per timestep: {W.dim()}', comm=global_comm) +Print(f'Number of MPI ranks per timestep: {mesh.comm.size}', comm=global_comm) +Print(f'DoFs/rank: {W.dim()/mesh.comm.size}', comm=global_comm) +Print(f'Block DoFs/rank: {2*W.dim()/mesh.comm.size}', comm=global_comm) +Print('') + +Print("Calculating initial condiions") +Print('') + +# ideal gas properties +gas = euler.StandardAtmosphere(N=0.01) + +# initial conditions and background state +Un, Uback = initial_conditions(mesh, W, Vv, gas, H) +uback, rho_back, theta_back = Uback.subfunctions + +U0 = Uback.copy(deepcopy=True) # background state at rest +U0.subfunctions[0].assign(0) + +# finite element forms + +form_mass = euler.get_form_mass() + +up = fd.as_vector([fd.Constant(0.0), fd.Constant(1.0)]) # up direction + +form_function = euler.get_form_function( + n, up, c_pen=fd.Constant(2.0**(-7./2)), gas=gas, mu=None) + +# tangential flow boundary conditions at ground and lid +zv = fd.as_vector([fd.Constant(0.), fd.Constant(0.)]) +bcs = [fd.DirichletBC(W.sub(0), zv, "bottom"), + fd.DirichletBC(W.sub(0), zv, "top")] +for bc in bcs: + bc.apply(Un) + +# Parameters for the diag +patch_parameters = { + "pc_type": "python", + "pc_python_type": "firedrake.AssembledPC", + "assembled": { + "pc_type": "python", + "pc_python_type": "firedrake.ASMStarPC", + "pc_star": { + "construct_dim": 0, + "sub_sub_pc_type": "lu", + "sub_sub_pc_factor_mat_solver_type": 'mumps', + }, + }, +} + +aux_parameters = { + 'pc_type': 'python', + 'pc_python_type': 'asQ.AuxiliaryComplexBlockPC', + 'aux': { + 'frozen': None, # never update the factorisation + 'pc_type': 'lu', + 'pc_factor_mat_solver_type': 'mumps', + }, +} + +block_parameters = { + 'mat_type': 'matfree', + "ksp_type": "fgmres", + 'ksp': { + 'rtol': 1e-5, + 'max_it': 200, + 'converged_maxits': None, + 'gmres_restart': 200, + }, + 'pc_type': 'composite', + 'pc_composite_type': 'multiplicative', + 'pc_composite_pcs': 'ksp,ksp', + 'sub_0_ksp': aux_parameters, + 'sub_0_ksp_ksp': { + 'type': 'gmres', + 'max_it': 2, + 'convergence_test': 'skip', + 'converged_maxits': None, + }, + 'sub_1_ksp': patch_parameters, + 'sub_1_ksp_ksp': { + 'type': 'gmres', + 'max_it': 2, + 'convergence_test': 'skip', + 'converged_maxits': None, + }, +} + +# atol is the same for Newton and (right-preconditioned) Krylov +atol = 1e-4 +patol = sqrt(window_length)*atol +solver_parameters = { + "snes": { + 'linesearch_type': 'basic', + "monitor": None, + "converged_reason": None, + 'stol': 1e-100, + "atol": patol, + "ksp_ew": None, + "ksp_ew_version": 1, + "ksp_ew_rtol0": 1e-1, + "ksp_ew_threshold": 1e-10, + "lag_preconditioner": -2, # preconditioner is constant-in-time + "lag_preconditioner_persists": None, + }, + "mat_type": "matfree", + "ksp_type": "fgmres", + "ksp": { + "monitor": None, + "converged_rate": None, + "atol": patol, + 'max_it': 10, + "min_it": 1, + 'converged_maxits': None, + }, + "pc_type": "python", + "pc_python_type": "asQ.CirculantPC", + "circulant_state": 'reference', # use the Jacobian reference state + "circulant_alpha": 1e-5, + "circulant_block": block_parameters, +} + + +# the reference state for the linearisation around a state of rest +appctx = {'block_appctx': {'aux_uref': U0}} + +paradiag = asQ.Paradiag(ensemble=ensemble, + time_partition=time_partition, + form_function=form_function, + form_mass=form_mass, + ics=Un, dt=dt, theta=args.theta, + reference_state=Uback, + bcs=bcs, appctx=appctx, + solver_parameters=solver_parameters) + +aaofunc = paradiag.aaofunc +# are we on the comm with timestep -1? If so, we are in charge of output. +is_last_slice = paradiag.layout.is_local(-1) + +# only last slice does diagnostics/output +if is_last_slice: + uout = fd.Function(V1, name='velocity') + thetaout = fd.Function(Vt, name='temperature') + rhoout = fd.Function(V2, name='density') + + ofile = VTKFile(f'{args.vtkfile}.pvd', + comm=ensemble.comm) + + def assign_out_functions(): + uout.assign(aaofunc[-1].subfunctions[0]) + rhoout.assign(aaofunc[-1].subfunctions[1]) + thetaout.assign(aaofunc[-1].subfunctions[2]) + + # output density and temperature variations, not absolute values + rhoout.assign(rhoout - rho_back) + thetaout.assign(thetaout - theta_back) + + def write_to_file(): + ofile.write(uout, rhoout, thetaout) + + cfl_calc = convective_cfl_calculator(mesh) + cfl_series = [] + + def max_cfl(u, dt): + with cfl_calc(u, dt).dat.vec_ro as v: + return v.max()[1] + + +timer = SolverTimer() + +fround = lambda x: round(float(x), 2) + + +def window_preproc(pdg, wndw, rhs): + Print('') + Print(f'### === --- Calculating time-window {wndw} --- === ###') + Print('') + with PETSc.Log.Event("window_preproc.Coll_Barrier"): + pdg.ensemble.ensemble_comm.Barrier() + timer.start_timing() + + +def window_postproc(pdg, wndw, rhs): + timer.stop_timing() + Print('', comm=global_comm) + Print(f'Window solution time: {timer.times[-1]}', comm=global_comm) + Print('', comm=global_comm) + + # postprocess this timeslice + if is_last_slice: + assign_out_functions() + write_to_file() + + cfl = max_cfl(uout, dt) + cfl_series.append(cfl) + Print(f'Maximum CFL = {cfl}', comm=ensemble.comm) + + nt = pdg.total_windows*pdg.ntimesteps - 1 + time = float(nt*pdg.aaoform.dt) + Print(f'Time = {fround(time)}', comm=ensemble.comm) + + +# Setup all solver objects. The firedrake DM and options management +# makes it difficult to setup some preconditioners without actually +# calling `solve`, so we just run once to set everything up. +Print('### === --- Setting up solver and prefactoring --- === ###') + +with PETSc.Log.Event("warmup_solve"): + paradiag.solve(nwindows=1, + preproc=window_preproc, + postproc=window_postproc) +Print('') + +# reset +timer.times.clear() +paradiag.solver.aaofunc.assign(Un) +paradiag.reset_diagnostics() + +Print('### === --- Calculating parallel solution --- === ###') + +# solve for each window +with PETSc.Log.Event("timed_solves"): + paradiag.solve(nwindows=args.nwindows, + preproc=window_preproc, + postproc=window_postproc) + +Print('### === --- Iteration counts --- === ###') +Print('') + +asQ.write_paradiag_metrics(paradiag, directory=args.metrics_dir) + +nw = paradiag.total_windows +nt = paradiag.total_timesteps +Print(f'windows: {nw}') +Print(f'timesteps: {nt}') +Print('') + +# Show the parallel partition sizes. +Print(f'Total DoFs per window: {W.dim()*window_length}') +Print(f'DoFs per timestep: {W.dim()}') +Print(f'Total number of MPI ranks: {ensemble.global_comm.size}') +Print(f'Number of MPI ranks per timestep: {mesh.comm.size}') +Print(f'DoFs/rank: {W.dim()/mesh.comm.size}') +Print(f'Complex block DoFs/rank: {2*W.dim()/mesh.comm.size}') +Print('') + +lits = paradiag.linear_iterations +nlits = paradiag.nonlinear_iterations +blits = paradiag.block_iterations.data() + +Print(f'Nonlinear iterations: {nlits} | Iterations per window: {nlits/nw}') +Print(f'Linear iterations: {lits} | Iterations per window: {lits/nw}') +Print(f'Total block linear iterations: {blits}') +Print(f'Iterations per block solve: {blits/lits}') +Print(f'Minimum block iterations per solve: {min(blits)/lits}') +Print(f'Maximum block iterations per solve: {max(blits)/lits}') +Print('') + +ensemble.global_comm.Barrier() +if is_last_slice: + Print(f'Maximum CFL = {max(cfl_series)}', comm=ensemble.comm) + Print(f'Minimum CFL = {min(cfl_series)}', comm=ensemble.comm) + Print('', comm=ensemble.comm) +ensemble.global_comm.Barrier() + +Print(timer.string(timesteps_per_solve=window_length, + total_iterations=lits, ndigits=5)) +Print('') diff --git a/asq_manuscript_examples/vertical_slice_serial.py b/asq_manuscript_examples/vertical_slice_serial.py new file mode 100644 index 00000000..8e631f29 --- /dev/null +++ b/asq_manuscript_examples/vertical_slice_serial.py @@ -0,0 +1,343 @@ +import firedrake as fd +from firedrake.petsc import PETSc +from firedrake.output import VTKFile + +from utils.timing import SolverTimer +from utils.serial import SerialMiniApp +from utils.diagnostics import convective_cfl_calculator +from utils import compressible_flow as euler +from math import pi + +from argparse import ArgumentParser +from argparse_formatter import DefaultsAndRawTextFormatter + + +def initial_conditions(mesh, W, Vv, gas, H, perturbation=True, hydrostatic=False): + if hydrostatic: + return NotImplementedError + + x, z = fd.SpatialCoordinate(mesh) + V2 = W.subfunctions[1] + up = fd.as_vector([fd.Constant(0.0), fd.Constant(1.0)]) # up direction + + Un = fd.Function(W) + un, rhon, thetan = Un.subfunctions + + Tsurf = fd.Constant(300.) + thetab = Tsurf*fd.exp(gas.N**2*z/gas.g) + + thetan.interpolate(thetab) + + Pi = fd.Function(V2) + + # background state is in hydrostatic balance + euler.hydrostatic_rho(Vv, V2, mesh, thetan, rhon, + pi_boundary=fd.Constant(1.0), + gas=gas, Up=up, top=True, Pi=Pi) + + un.project(fd.as_vector([20.0, 0.0])) + + Uback = Un.copy(deepcopy=True) + + # temperature perturbation to initiate gravity wave + if perturbation: + a = fd.Constant(5e3) + dtheta = fd.Constant(1e-2) + + theta_pert = dtheta*fd.sin(pi*z/H)/(1 + x**2/a**2) + thetan.interpolate(thetan + theta_pert) + + return Un, Uback + + +parser = ArgumentParser( + description='Nonhydrostatic gravity wave testcase for serial-in-time solver using fully implicit vertical slice.', + formatter_class=DefaultsAndRawTextFormatter +) +parser.add_argument('--nlayers', type=int, default=10, help='Number of layers in the vertical direction.') +parser.add_argument('--ncolumns', type=int, default=300, help='Number of columns in the horizontal direction.') +parser.add_argument('--nt', type=int, default=10, help='Number of timesteps to solve.') +parser.add_argument('--dt', type=float, default=12, help='Timestep in seconds.') +parser.add_argument('--theta', type=float, default=0.5, help='Parameter for implicit theta method. 0.5 for trapezium rule, 1 for backwards Euler.') +parser.add_argument('--degree', type=int, default=1, help='Degree of finite element space (the DG space).') +parser.add_argument('--vtkfile', type=str, default='vtk/gravity_wave_serial', help='Name of output vtk files') +parser.add_argument('--write_freq', type=int, default=1, help='How often to write the solution to file.') +parser.add_argument('--show_args', action='store_true', help='Output all the arguments.') + +args = parser.parse_known_args() +args = args[0] + +if args.show_args: + PETSc.Sys.Print(args) + +PETSc.Sys.Print('') +PETSc.Sys.Print('### === --- Setting up --- === ###') +PETSc.Sys.Print('') + +# This script broadly follows the same structure +# as the advection_serial.py script, with the main +# differences being: +# +# - more complicated finite element spaces and forms are required, +# which are provided by the utils.compressible_flow module. +# - more complicated initial conditions (the function at the top of +# the file), from the test case of +# Skamarock & Klemp, 1994, "Efficiency and accuracy of the +# Klemp-Wilhelmson time-splitting technique". +# - options parameters specifying more involved block preconditioning. + +comm = fd.COMM_WORLD + +# parameters + +nt = args.nt +dt = args.dt + +# set up the mesh +L = 300e3 +H = 10e3 + +distribution_parameters = { + "partition": True, + "overlap_type": (fd.DistributedMeshOverlapType.VERTEX, 2) +} + +base_mesh = fd.PeriodicIntervalMesh( + args.ncolumns, float(L), + distribution_parameters=distribution_parameters, + comm=comm) +base_mesh.coordinates.dat.data[:] -= float(L)/2 + +mesh = fd.ExtrudedMesh(base_mesh, + layers=args.nlayers, + layer_height=float(H/args.nlayers)) +n = fd.FacetNormal(mesh) + +# compatible function spaces + +W, Vv = euler.function_space(mesh, horizontal_degree=args.degree, + vertical_degree=args.degree, + vertical_velocity_space=True) +V1, V2, Vt = W.subfunctions # velocity, density, temperature + +PETSc.Sys.Print(f"DoFs: {W.dim()}") +PETSc.Sys.Print(f"DoFs/core: {W.dim()/comm.size}") +PETSc.Sys.Print("") + +PETSc.Sys.Print("Calculating initial condiions") + +# ideal gas properties +gas = euler.StandardAtmosphere(N=0.01) + +# initial conditions and background state +Un, Uback = initial_conditions(mesh, W, Vv, gas, H) +uback, rho_back, theta_back = Uback.subfunctions + +U0 = Uback.copy(deepcopy=True) # background state at rest +U0.subfunctions[0].assign(0) + +# finite element forms + +form_mass = euler.get_form_mass() + +up = fd.as_vector([fd.Constant(0.0), fd.Constant(1.0)]) # up direction + +form_function = euler.get_form_function( + n, up, c_pen=fd.Constant(2.0**(-7./2)), gas=gas, mu=None) + +# tangential flow boundary conditions at ground and lid +zv = fd.as_vector([fd.Constant(0.), fd.Constant(0.)]) +bcs = [fd.DirichletBC(W.sub(0), zv, "bottom"), + fd.DirichletBC(W.sub(0), zv, "top")] +for bc in bcs: + bc.apply(Un) + +# Parameters for the newton iterations +patch_parameters = { + 'pc_type': 'python', + 'pc_python_type': 'firedrake.AssembledPC', + 'assembled': { + 'pc_type': 'python', + 'pc_python_type': 'firedrake.ASMStarPC', + 'pc_star': { + 'construct_dim': 0, + 'sub_sub_pc_type': 'lu', + 'sub_sub_pc_factor_mat_solver_type': 'mumps', + }, + }, +} + +aux_parameters = { + 'pc_type': 'python', + 'pc_python_type': 'asQ.AuxiliaryRealBlockPC', + 'aux': { + 'frozen': None, # never update the factorisation + 'pc_type': 'lu', + 'pc_factor_mat_solver_type': 'mumps', + } +} + +block_parameters = { + 'pc_type': 'composite', + 'pc_composite_type': 'multiplicative', + 'pc_composite_pcs': 'ksp,ksp', + 'sub_0_ksp': aux_parameters, + 'sub_0_ksp_ksp': { + 'type': 'gmres', + 'max_it': 2, + 'convergence_test': 'skip', + 'converged_maxits': None, + }, + 'sub_1_ksp': patch_parameters, + 'sub_1_ksp_ksp': { + 'type': 'gmres', + 'max_it': 2, + 'convergence_test': 'skip', + 'converged_maxits': None, + }, +} + +# atol is the same for Newton and (right-preconditioned) Krylov +atol = 1e-4 +solver_parameters = { + 'snes': { + 'linesearch_type': 'basic', + 'monitor': None, + 'converged_reason': None, + 'stol': 1e-100, + 'atol': atol, + 'ksp_ew': None, + 'ksp_ew_version': 1, + 'ksp_ew_rtol0': 1e-1, + 'ksp_ew_threshold': 1e-10, + 'lag_preconditioner': -2, # preconditioner is constant-in-time + 'lag_preconditioner_persists': None, + }, + 'mat_type': 'matfree', + 'ksp_type': 'fgmres', + 'ksp': { + 'monitor': None, + 'converged_rate': None, + 'atol': atol, + 'max_it': 30, + 'converged_maxits': None, + "min_it": 1, + }, +} +solver_parameters.update(block_parameters) + +# reference state at rest for the wave preconditioner +appctx = {'aux_uref': U0} + +miniapp = SerialMiniApp(dt=dt, theta=args.theta, w_initial=Un, + form_mass=form_mass, + form_function=form_function, + solver_parameters=solver_parameters, + bcs=bcs, appctx=appctx) + +PETSc.Sys.Print('### === --- Timestepping loop --- === ###') +linear_its = 0 +nonlinear_its = 0 + +uout = fd.Function(V1, name='velocity') +thetaout = fd.Function(Vt, name='temperature') +rhoout = fd.Function(V2, name='density') + +ofile = VTKFile(f'{args.vtkfile}.pvd', comm=comm) + + +def assign_out_functions(): + uout.assign(miniapp.w0.subfunctions[0]) + rhoout.assign(miniapp.w0.subfunctions[1]) + thetaout.assign(miniapp.w0.subfunctions[2]) + + # output density and temperature variations, not absolute values + rhoout.assign(rhoout - rho_back) + thetaout.assign(thetaout - theta_back) + + +def write_to_file(time): + assign_out_functions() + ofile.write(uout, rhoout, thetaout, t=time) + + +# Setup all solver objects. The firedrake DM and options management +# makes it difficult to setup some preconditioners without actually +# calling `solve`, so we just run once to set everything up. +PETSc.Sys.Print('') +PETSc.Sys.Print('### === --- Setting up solver and prefactoring --- === ###') +PETSc.Sys.Print('') +with PETSc.Log.Event("warmup_solve"): + miniapp.solve(nt=1) +miniapp.w0.assign(Un) +miniapp.w1.assign(Un) +PETSc.Sys.Print('') + +linear_its = 0 +nonlinear_its = 0 + +cfl_calc = convective_cfl_calculator(mesh) +cfl_series = [] + +timer = SolverTimer() + + +def max_cfl(u, dt): + with cfl_calc(u, dt).dat.vec_ro as v: + return v.max()[1] + + +def preproc(app, step, time): + PETSc.Sys.Print('') + PETSc.Sys.Print(f'### === --- Calculating time-step {step} --- === ###') + PETSc.Sys.Print('') + with PETSc.Log.Event("timestep_preproc.Coll_Barrier"): + comm.Barrier() + timer.start_timing() + + +def postproc(app, step, time): + global linear_its + global nonlinear_its + timer.stop_timing() + PETSc.Sys.Print('') + PETSc.Sys.Print(f'Timestep solution time: {timer.times[-1]}') + PETSc.Sys.Print('') + + linear_its += miniapp.nlsolver.snes.getLinearSolveIterations() + nonlinear_its += miniapp.nlsolver.snes.getIterationNumber() + + if ((step + 1) % args.write_freq) == 0: + write_to_file(time=time) + + cfl = max_cfl(uout, dt) + cfl_series.append(cfl) + PETSc.Sys.Print(f'Time = {time}') + PETSc.Sys.Print(f'Maximum CFL = {cfl}') + + +# solve for each window +PETSc.Sys.Print('### === --- Solving timeseries --- === ###') +PETSc.Sys.Print('') +with PETSc.Log.Event("timed_solves"): + miniapp.solve(nt=nt, + preproc=preproc, + postproc=postproc) +PETSc.Sys.Print('') + +PETSc.Sys.Print('### === --- Iteration counts --- === ###') +PETSc.Sys.Print('') +# parallelism +PETSc.Sys.Print(f'DoFs per timestep: {W.dim()}') +PETSc.Sys.Print(f'Number of MPI ranks per timestep: {mesh.comm.size}') +PETSc.Sys.Print(f'DoFs/rank: {W.dim()/mesh.comm.size}') +PETSc.Sys.Print('') + +PETSc.Sys.Print(f'Nonlinear iterations: {str(nonlinear_its).rjust(5)} | Iterations per window: {str(nonlinear_its/args.nt).rjust(5)}') +PETSc.Sys.Print(f'Linear iterations: {str(linear_its).rjust(5)} | Iterations per window: {str(linear_its/args.nt).rjust(5)}') +PETSc.Sys.Print('') + +# Timing measurements +PETSc.Sys.Print(timer.string(timesteps_per_solve=1, + total_iterations=linear_its, ndigits=5)) +PETSc.Sys.Print('')