Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Nonlinear Gauss-Seidel #164

Draft
wants to merge 21 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
21 commits
Select commit Hold shift + click to select a range
70e7ab9
nonlinear gauss siedel for galewsky
JHopeCollins Jan 10, 2024
1202047
correct tolerances for nonlinear gs
JHopeCollins Jan 25, 2024
75da08c
Merge branch 'master' into gauss_siedel
JHopeCollins Jan 26, 2024
2624c02
converge multiple chunks for nonlinear gauss siedel and output block …
JHopeCollins Jan 26, 2024
26c43e6
switch for nonlinear jacobi or gauss-siedel switch
JHopeCollins Jan 29, 2024
d3f710c
split serial/parallel gauss-siedel galewsky
JHopeCollins Jan 30, 2024
0180e04
dg_advection with aaos gauss-siedel
JHopeCollins Feb 20, 2024
4404d45
split_ensemble functions from JacobiPC branch
JHopeCollins Feb 22, 2024
dd275b7
continuing gauss_seidel parallelisation
JHopeCollins Mar 11, 2024
0242daa
I can't spell German
JHopeCollins Mar 11, 2024
384ec50
add ensemble file to asQ/__init__
JHopeCollins Mar 11, 2024
fcacf56
initialisation loop for gauss-seidel
JHopeCollins Mar 11, 2024
d32e9eb
parallel gauss-seidel?
JHopeCollins Mar 11, 2024
7c1aa26
Merge branch 'master' into gauss_siedel
JHopeCollins Mar 12, 2024
e24161d
switch between serial and parallel gauss-seidel sweeps
JHopeCollins Mar 12, 2024
5768808
remove duplicate asQ.ensemble import
JHopeCollins Mar 13, 2024
1a5db26
advection gs example
JHopeCollins Mar 13, 2024
ddd6383
update EnsembleConnector with new pyop2.internal_comm interface
JHopeCollins Mar 14, 2024
707f6d9
advection gauss-seidel script with error check vs serial
JHopeCollins Mar 14, 2024
00704a4
chunk snes outputs to seperate files
JHopeCollins Mar 19, 2024
8f69ccc
Merge branch 'master' into gauss_siedel
JHopeCollins Apr 16, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions asQ/ensemble.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,15 +62,15 @@ def __init__(self, global_comm, local_comm, nmembers):
raise ValueError(msg)

self.global_comm = global_comm
self._global_comm = internal_comm(self.global_comm)
self._comm = internal_comm(self.global_comm, self)

self.comm = local_comm
self._comm = internal_comm(self.comm)
self._spatial_comm = internal_comm(self.comm, self)

self.ensemble_comm = self.global_comm.Split(color=self.comm.rank,
key=global_comm.rank)

self._ensemble_comm = internal_comm(self.ensemble_comm)
self._ensemble_comm = internal_comm(self.ensemble_comm, self)

def __del__(self):
if hasattr(self, "ensemble_comm"):
Expand Down
352 changes: 352 additions & 0 deletions case_studies/gauss_seidel/advection.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,352 @@
from firedrake.petsc import PETSc

import asQ
import firedrake as fd

from time import sleep # noqa: F401
import numpy as np
from math import sqrt, pi, cos, sin

PETSc.Sys.popErrorHandler()

# get command arguments
import argparse
parser = argparse.ArgumentParser(
description='DG scalar advection testcase for ParaDiag solver using a pipelined nonlinear Gauss-Seidel method.',
formatter_class=argparse.ArgumentDefaultsHelpFormatter
)

parser.add_argument('--nx', type=int, default=16, help='Number of cells along each square side.')
parser.add_argument('--cfl', type=float, default=0.8, help='Convective CFL number.')
parser.add_argument('--angle', type=float, default=pi/6, help='Angle of the convective velocity.')
parser.add_argument('--degree', type=int, default=1, help='Degree of the scalar spaces.')
parser.add_argument('--width', type=float, default=0.2, help='Width of the Gaussian bump.')
parser.add_argument('--nwindows', type=int, default=1, help='Total number of time-windows.')
parser.add_argument('--nchunks', type=int, default=4, help='Number of chunks to solve simultaneously.')
parser.add_argument('--nsweeps', type=int, default=4, help='Number of nonlinear sweeps.')
parser.add_argument('--nsmooth', type=int, default=1, help='Number of nonlinear iterations per chunk at each sweep.')
parser.add_argument('--atol', type=float, default=1e-6, help='Average atol of each timestep.')
parser.add_argument('--nslices', type=int, default=2, help='Number of time-slices per time-window.')
parser.add_argument('--slice_length', type=int, default=2, help='Number of timesteps per time-slice.')
parser.add_argument('--alpha', type=float, default=1e-1, help='Circulant coefficient.')
parser.add_argument('--theta', type=float, default=0.5, help='Parameter for the implicit theta timestepping method.')
parser.add_argument('--serial', action='store_true', help='Calculate each chunk in serial.')
parser.add_argument('--show_args', action='store_true', help='Output all the arguments.')

args = parser.parse_known_args()
args = args[0]

if args.show_args:
PETSc.Sys.Print(args)

PETSc.Sys.Print('')
PETSc.Sys.Print('### === --- Setting up --- === ###')
PETSc.Sys.Print('')

# time steps

chunk_partition = tuple((args.slice_length for _ in range(args.nslices)))
chunk_length = sum(chunk_partition)
total_timesteps = chunk_length*args.nchunks
total_slices = args.nslices*args.nchunks

global_comm = fd.COMM_WORLD
global_time_partition = tuple((args.slice_length for _ in range(total_slices)))
global_ensemble = asQ.create_ensemble(global_time_partition, global_comm)
chunk_ensemble = asQ.split_ensemble(global_ensemble, args.nslices)

# which chunk are we?
chunk_id = global_ensemble.ensemble_comm.rank // args.nslices

# Calculate the timestep from the CFL number
umax = 1.
dx = 1./args.nx
dt = args.cfl*dx/umax

# # # === --- domain and FE spaces --- === # # #

mesh = fd.PeriodicUnitSquareMesh(args.nx, args.nx, quadrilateral=True, comm=chunk_ensemble.comm)

V = fd.FunctionSpace(mesh, "DQ", args.degree)

# # # === --- initial conditions --- === # # #

x, y = fd.SpatialCoordinate(mesh)


def radius(x, y):
return fd.sqrt(pow(x-0.5, 2) + pow(y-0.5, 2))


def gaussian(x, y):
return fd.exp(-0.5*pow(radius(x, y)/args.width, 2))


# Gaussian bump centred at (0.5, 0.5)
q0 = fd.Function(V, name="scalar_initial")
q0.interpolate(1 + gaussian(x, y))

# The advecting velocity at angle to the x-axis
u = fd.Constant(fd.as_vector((umax*cos(args.angle), umax*sin(args.angle))))

# # # === --- finite element forms --- === # # #


# The time-derivative mass form for the scalar advection equation.
# asQ assumes that the mass form is linear so here
# q is a TrialFunction and phi is a TestFunction
def form_mass(q, phi):
return phi*q*fd.dx


# The DG advection form for the scalar advection equation.
# asQ assumes that the function form is nonlinear so here
# q is a Function and phi is a TestFunction
def form_function(q, phi, t):
# upwind switch
n = fd.FacetNormal(mesh)
un = fd.Constant(0.5)*(fd.dot(u, n) + abs(fd.dot(u, n)))

# integration over element volume
int_cell = q*fd.div(phi*u)*fd.dx

# integration over internal facets
int_facet = (phi('+')-phi('-'))*(un('+')*q('+')-un('-')*q('-'))*fd.dS

return int_facet - int_cell


# # # === --- PETSc solver parameters --- === # # #

snes_linear_params = {
'type': 'ksponly',
'lag_jacobian': -2,
'lag_jacobian_persists': None,
'lag_preconditioner': -2,
'lag_preconditioner_persists': None,
}

# parameters for the implicit diagonal solve in step-(b)
block_parameters = {
'snes': snes_linear_params,
'ksp_type': 'preonly',
'pc_type': 'lu',
'pc_factor_mat_solver_type': 'mumps'
}

atol = args.atol
patol = sqrt(chunk_length)*atol
sparameters_diag = {
'snes': {
'monitor': f":snes_monitor_chunk{chunk_id}.log",
'converged_reason': f":snes_converged_reason_chunk{chunk_id}.log",
'atol': patol,
'rtol': 1e-10,
'stol': 1e-12,
'max_it': args.nsmooth,
'convergence_test': 'skip',
},
'mat_type': 'matfree',
'ksp_type': 'preonly',
'ksp': {
'rtol': 1e-2,
'atol': patol,
},
'pc_type': 'python',
'pc_python_type': 'asQ.CirculantPC',
'diagfft_alpha': args.alpha,
'diagfft_state': 'linear',
'aaos_jacobian_state': 'linear'
}
sparameters_diag['snes'].update(snes_linear_params)

for i in range(chunk_length):
sparameters_diag['diagfft_block_'+str(i)+'_'] = block_parameters

# function spaces and initial conditions

# all at once solver

chunk_aaofunc = asQ.AllAtOnceFunction(chunk_ensemble, chunk_partition, V)
chunk_aaofunc.assign(q0)

theta = 0.5
chunk_aaoform = asQ.AllAtOnceForm(chunk_aaofunc, dt, theta,
form_mass, form_function)

chunk_solver = asQ.AllAtOnceSolver(chunk_aaoform, chunk_aaofunc,
solver_parameters=sparameters_diag)

# which chunk_id is holding which part of the total timeseries?
chunk_indexes = np.array([*range(args.nchunks)], dtype=int)

# we need to make this an array so we can send it via mpi
convergence_flag = np.array([False], dtype=bool)

# first mpi rank on each chunk (assumes all chunks are equal size):
chunk_root = lambda c: c*chunk_ensemble.global_comm.size

# am I the chunk with the earliest timesteps?
earliest = lambda: (chunk_id == chunk_indexes[0])

# update chunk ics from previous chunk
uprev = fd.Function(V)


def update_chunk_halos(uhalo):
chunk_begin = chunk_aaofunc.layout.is_local(0)
chunk_end = chunk_aaofunc.layout.is_local(-1)

global_rank = global_ensemble.ensemble_comm.rank
global_size = global_ensemble.ensemble_comm.size

# ring communication so the first chunk can
# pick up after last chunk after convergence

# send forward last step of chunk
if chunk_end:
dest = (global_rank + 1) % global_size
global_ensemble.send(chunk_aaofunc[-1], dest=dest, tag=dest)

# recv updated ics from previous chunk
if chunk_begin:
source = (global_rank - 1) % global_size
global_ensemble.recv(uhalo, source=source, tag=global_rank)

# broadcast new ics to all ranks
chunk_ensemble.bcast(uhalo)


PETSc.Sys.Print('### === --- Calculating parallel solution --- === ###')
PETSc.Sys.Print('')

nconverged = 0

PETSc.Sys.Print('### === --- Initialising all chunks --- === ###')
PETSc.Sys.Print('')
sleep_time = 0.01
for j in range(args.nchunks):
PETSc.Sys.Print(f' === --- Initial nonlinear sweep {j} --- === ')
PETSc.Sys.Print('')

# only smooth chunks that the first sweep has reached

if args.serial:
for i in range(j+1):
PETSc.Sys.Print(f' --- Calculating chunk {i} --- ')

global_comm.Barrier()
sleep(sleep_time)
if chunk_id == i:
chunk_solver.solve()
global_comm.Barrier()
sleep(sleep_time)

PETSc.Sys.Print("")
else:
if chunk_id < j+1:
chunk_solver.solve()

# propogate solution
update_chunk_halos(uprev)

# update initial condition guess for later chunks
if chunk_id != 0:
chunk_aaofunc.initial_condition.assign(uprev)

# initial guess in front of first sweep is persistence forecast
if chunk_id > j:
chunk_aaofunc.assign(chunk_aaofunc.initial_condition)

PETSc.Sys.Print('### === --- All chunks initialised --- === ###')
PETSc.Sys.Print('')

for j in range(args.nsweeps):
PETSc.Sys.Print(f' === --- Calculating nonlinear sweep {j} --- === ')
PETSc.Sys.Print('')

# 1) one smoothing application on each chunk
if args.serial:
for i in range(args.nchunks):
PETSc.Sys.Print(f' --- Calculating chunk {i} on solver {chunk_indexes[i]} --- ')

global_comm.Barrier()
sleep(sleep_time)
if chunk_id == chunk_indexes[i]:
chunk_solver.solve()
global_comm.Barrier()
sleep(sleep_time)

PETSc.Sys.Print("")

else:
chunk_solver.solve()

# 2) update ics of each chunk from previous chunk
update_chunk_halos(uprev)

# everyone uses latest ic guesses, except chunk
# with earliest timesteps (already has 'exact' ic)
if not earliest():
chunk_aaofunc.initial_condition.assign(uprev)

# 3) check convergence of earliest chunk
if earliest():
chunk_aaoform.assemble()
with chunk_aaoform.F.global_vec_ro() as rvec:
res = rvec.norm()
convergence_flag[0] = (res < patol)

# rank 0 on the earliest chunk tells everyone if they've converged
global_ensemble.global_comm.Bcast(convergence_flag,
root=chunk_root(chunk_indexes[0]))

# update and report
if convergence_flag[0]:
nconverged += 1

converged_time = nconverged*chunk_length*dt
PETSc.Sys.Print('')
PETSc.Sys.Print(f">>> Converged chunks: {nconverged}.")
PETSc.Sys.Print(f">>> Converged time: {converged_time} hours.")
PETSc.Sys.Print('')

# 4) stop iterating if we've reached the end
if nconverged >= args.nwindows:
PETSc.Sys.Print(f"Finished iterating to {args.nwindows} windows.")
PETSc.Sys.Print('')
break

# 5) shuffle and restart if we haven't reached the end
if convergence_flag[0]:
# earliest chunk_id becomes last chunk
if earliest():
chunk_aaofunc.assign(uprev)

# update record of which chunk_id is in which position
for i in range(args.nchunks):
chunk_indexes[i] = (chunk_indexes[i] + 1) % args.nchunks

global_comm.Barrier()
sleep(sleep_time)
if earliest() and chunk_aaofunc.layout.is_local(-1):
from utils.serial import SerialMiniApp
serialapp = SerialMiniApp(dt, theta, q0, form_mass, form_function, block_parameters)
serialapp.solve(nt=nconverged*chunk_length)
PETSc.Sys.Print(f"serial error: {fd.errornorm(serialapp.w0, chunk_aaofunc[-1])}", comm=chunk_ensemble.comm)
PETSc.Sys.Print('')
global_comm.Barrier()
sleep(sleep_time)

nsweeps = j

niterations = nsweeps*args.nsmooth

PETSc.Sys.Print(f"Number of chunks: {args.nchunks}")
PETSc.Sys.Print(f"Maximum number of sweeps: {args.nsweeps}")
PETSc.Sys.Print(f"Actual number of sweeps: {nsweeps}")
PETSc.Sys.Print(f"Number of chunks converged: {int(nconverged)}")
PETSc.Sys.Print(f"Number of chunks converged per sweep: {nconverged/nsweeps}")
PETSc.Sys.Print(f"Number of sweeps per converged chunk: {nsweeps/nconverged if nconverged else 'n/a'}")
PETSc.Sys.Print(f"Number of iterations per converged chunk: {niterations/nconverged if nconverged else 'n/a'}")
PETSc.Sys.Print(f"Number of timesteps per iteration: {nconverged*chunk_length/niterations}")
Loading
Loading