From d053c8854227d5994a53406d481cc2db92288c1e Mon Sep 17 00:00:00 2001 From: milindasf Date: Fri, 20 Oct 2023 10:47:56 -0500 Subject: [PATCH 01/75] basic code to launch bte in tps code --- src/tps-time-loop.py | 266 ++++++++++++++++++++++++++++++++++++++----- 1 file changed, 239 insertions(+), 27 deletions(-) diff --git a/src/tps-time-loop.py b/src/tps-time-loop.py index 9bbc719d0..51f47379c 100755 --- a/src/tps-time-loop.py +++ b/src/tps-time-loop.py @@ -1,36 +1,244 @@ #!/usr/bin/env python3 import sys import os +from mpi4py import MPI import numpy as np +import scipy.constants +import csv +import matplotlib.pyplot as plt -from mpi4py import MPI +# set path to C++ TPS library +path = os.path.abspath(os.path.dirname(sys.argv[0])) +sys.path.append(path + "/.libs") +sys.path.append(path + "/../../boltzmann/BESolver/python") +import libtps +from bte_0d3v_batched import bte_0d3v_batched as BoltzmannSolver +import cupy as cp -class BoltzmannMockSolver: - def __init__(self): - pass +class BoltzmannSolverParams(): + sp_order = 8 # B-spline order in v-space + spline_qpts = 10 # number of Gauss-Legendre quadrature points per knot interval + Nr = 127 # number of B-splines used in radial direction + l_max = 1 # spherical modes uses, 0, to l_max + ev_max = 16 # v-space grid truncation (eV) + n_grids = 1 # number of v-space grids + + dt = 1e-2 # [] non-dimentionalized time w.r.t. oscilation period + cycles = 10 # number of max cycles to evolve + solver_type = "transient" # two modes, "transient" or "steady-state" + atol = 1e-16 # absolute tolerance + rtol = 1e-12 # relative tolerance + max_iter = 1000 # max iterations for the newton solver + ee_collisions = 0 # enable electron-electron Coulombic effects + use_gpu = 1 # enable GPU use (1)-GPU solver, (0)-CPU solver + dev_id = 0 # which GPU device to use only used when use_gpu=1 + + collisions = ["g0","g2"] # collision string g0-elastic, g2-ionization + export_csv = 1 # export the qois to csv file + plot_data = 1 + + Efreq = 0.0 #[1/s] # E-field osicllation frequency + verbose = 1 # verbose output for the BTE solver + n_pts = 10 # number of spatial points to launch the BTE solver + Te = 0.5 #[eV] # approximate electron temperature + + threads = 16 # number of threads to use to assemble operators + grid_idx = 0 + + output_dir = "batched_bte" + out_fname = output_dir + "/tps" + + # some useful units and conversion factors. + ev_to_K = (scipy.constants.electron_volt/scipy.constants.Boltzmann) + Td_fac = 1e-21 #[Vm^2] + +class TPSINDEX(): + """ + simple index map to differnt fields, from the TPS arrays + """ + ION_IDX = 0 # ion density index + ELE_IDX = 1 # electron density index + NEU_IDX = 2 # neutral density index + + EF_RE_IDX = 0 # Re(E) index + EF_IM_IDX = 1 # Im(E) index + +class Boltzmann0D2VBactchedSolver: + def __init__(self, tps): + self.tps = tps + self.param = BoltzmannSolverParams() + # overide the default params, based on the config.ini file. + self.param.Efreq = 0#tps.getRequiredInput("em/current_frequency") + self.param.solver_type = "steady-state" + #self.param.n_pts = 10 + + lm_modes = [[[l,0] for l in range(self.param.l_max+1)]] + nr = np.ones(self.param.n_grids, dtype=np.int32) * self.param.Nr + + Te = np.ones(self.param.n_grids) * self.param.Te + ev_max = np.ones(self.param.n_grids) * self.param.ev_max + self.bte_solver = BoltzmannSolver(self.param, ev_max ,Te , nr, lm_modes, self.param.n_grids, self.param.collisions) + + # compute BTE operators + grid_idx = self.param.grid_idx + self.bte_solver.assemble_operators(grid_idx) + def fetch(self, interface): - species_densities = np.array(interface.HostRead(libtps.t2bIndex.SpeciesDensities), copy=False) - efield = np.array(interface.HostRead(libtps.t2bIndex.ElectricField), copy=False) - heavy_temperature = np.array(interface.HostRead(libtps.t2bIndex.HeavyTemperature), copy=False) + grid_idx = self.param.grid_idx + Tg = np.array(interface.HostRead(libtps.t2bIndex.HeavyTemperature), copy=True) + tps_npts = len(Tg) + + Te = np.array(interface.HostRead(libtps.t2bIndex.ElectronTemperature), copy=True) + rr = np.array(interface.HostRead(libtps.t2bIndex.ReactionRates), copy=True) + efield = np.array(interface.HostRead(libtps.t2bIndex.ElectricField), copy=True).reshape((2, tps_npts)) + species_densities = np.array(interface.HostRead(libtps.t2bIndex.SpeciesDensities), copy=True).reshape(3, tps_npts) + + bte_idx = Te > (0.4 * self.param.ev_to_K) + self.param.n_pts = len(Te[bte_idx]) + + ni = species_densities[TPSINDEX.ION_IDX][bte_idx] + ne = species_densities[TPSINDEX.ELE_IDX][bte_idx] + n0 = species_densities[TPSINDEX.NEU_IDX][bte_idx] + Tg = Tg[bte_idx] + Te = Te[bte_idx] + + ne[ne<0] = 1e-16 + ni[ni<0] = 1e-16 + + eRe = efield[TPSINDEX.EF_RE_IDX][bte_idx] + eIm = efield[TPSINDEX.EF_IM_IDX][bte_idx] + eMag = np.sqrt(eRe**2 + eIm **2) + eByn0 = eMag/n0/self.param.Td_fac + + + if self.param.verbose == 1 : + print("Boltzmann Solver Inputs") + print("Efreq = %.4E [1/s]" %(self.param.Efreq)) + print("n_pts = %d" % self.param.n_pts) + # idx0 = np.argmin(eByn0) + # idx1 = np.argmax(eByn0) + # print("E/n0 (min) = %.12E [Td] \t E/n0 (max) = %.12E [Td] "%(eByn0[idx0], eByn0[idx1])) + # print("at E/n0 min max, Tg = %.12E [K] \t Tg = %.12E [K] "%(Tg[idx0], Tg[idx1])) + # print("at E/n0 min max, Te = %.12E [K] \t Te = %.12E [K] "%(Te[idx0], Te[idx1])) + + # print("at E/n0 min max, ne = %.12E [1/m^3] \t ne = %.12E [1/m^3] "%(ne[idx0], ne[idx1])) + # print("at E/n0 min max, ni = %.12E [1/m^3] \t ni = %.12E [1/m^3] "%(ni[idx0], ni[idx1])) + # print("at E/n0 min max, n0 = %.12E [1/m^3] \t n0 = %.12E [1/m^3] "%(n0[idx0], n0[idx1])) + print("E/n0 (min) = %.12E [Td] \t E/n0 (max) = %.12E [Td] "%(np.min(eByn0), np.max(eByn0))) + print("Tg (min) = %.12E [K] \t Tg (max) = %.12E [K] "%(np.min(Tg), np.max(Tg))) + print("Te (min) = %.12E [K] \t Te (max) = %.12E [K] "%(np.min(Te), np.max(Te))) + + print("ne (min) = %.12E [1/m^3] \t ne (max) = %.12E [1/m^3] "%(np.min(ne), np.max(ne))) + print("ni (min) = %.12E [1/m^3] \t ni (max) = %.12E [1/m^3] "%(np.min(ni), np.max(ni))) + print("n0 (min) = %.12E [1/m^3] \t n0 (max) = %.12E [1/m^3] "%(np.min(n0), np.max(n0))) + + + self.bte_solver.set_boltzmann_parameters(grid_idx, n0, ne, ni, Tg, self.param.solver_type) + self.bte_f0 = self.bte_solver.initialize(0, self.param.n_pts, "maxwellian") + if self.param.Efreq == 0: + ef_t = lambda t : eMag + else: + ef_t = lambda t : eRe * np.cos(2 * np.pi * self.param.Efreq * t) + eIm * np.sin(2 * np.pi * self.param.Efreq * t) - print("|| species_densities ||_2 = ", np.linalg.norm(species_densities) ) - print("|| efield ||_2 = ", np.linalg.norm(efield) ) - print("||heavy_temperature||_2 = ", np.linalg.norm(heavy_temperature) ) + if self.param.use_gpu==1: + dev_id = self.param.dev_id + self.bte_solver.host_to_device_setup(dev_id, 0) + + eRe_d = cp.asarray(eRe) + eIm_d = cp.asarray(eIm) + + if self.param.Efreq == 0: + ef_t = lambda t : cp.sqrt(eRe_d**2 + eIm_d**2) + else: + ef_t = lambda t : eRe_d * cp.cos(2 * cp.pi * self.param.Efreq * t) + eIm_d * cp.sin(2 * cp.pi * self.param.Efreq * t) + + self.bte_f0 = cp.asarray(self.bte_f0) + + self.bte_solver.set_efield_function(ef_t) + return def solve(self): - pass + grid_idx = self.param.grid_idx + ff , qoi = self.bte_solver.solve(grid_idx, self.bte_f0, self.param.atol, self.param.rtol, self.param.max_iter, self.param.solver_type) + ev = np.linspace(1e-3, self.bte_solver._par_ev_range[0][1], 500) + ff_r = self.bte_solver.compute_radial_components(grid_idx, ev, ff) + + if self.param.use_gpu==1: + self.bte_solver.device_to_host_setup(self.param.dev_id,grid_idx) + + ff_r = cp.asnumpy(ff_r) + for k, v in qoi.items(): + qoi[k] = cp.asnumpy(v) + + csv_write = self.param.export_csv + if csv_write: + fname = self.param.out_fname + with open("%s_qoi.csv"%fname, 'w', encoding='UTF8') as f: + writer = csv.writer(f,delimiter=',') + # write the header + header = ["n0", "ne", "ni", "Tg", "E", "energy", "mobility", "diffusion"] + for col_idx, g in enumerate(self.param.collisions): + header.append(str(g)) + + writer.writerow(header) + + n0 = self.bte_solver._par_bte_params[grid_idx]["n0"] + ne = self.bte_solver._par_bte_params[grid_idx]["ne"] + ni = self.bte_solver._par_bte_params[grid_idx]["ni"] + Tg = self.bte_solver._par_bte_params[grid_idx]["Tg"] + + eRe = self.bte_solver._par_ef_t(0) + eIm = 0 * self.bte_solver._par_ef_t(0) + + if self.param.use_gpu==1: + eRe = cp.asnumpy(eRe) + eIm = cp.asnumpy(eIm) + + eMag = np.sqrt(eRe**2 + eIm**2) + data = np.concatenate((n0.reshape(-1,1), ne.reshape(-1,1), ni.reshape(-1,1), Tg.reshape(-1,1), eMag.reshape(-1,1), qoi["energy"].reshape(-1,1), qoi["mobility"].reshape(-1,1), qoi["diffusion"].reshape(-1,1)), axis=1) + for col_idx, g in enumerate(self.param.collisions): + data = np.concatenate((data, qoi["rates"][col_idx].reshape(-1,1)), axis=1) + + writer.writerows(data) + + plot_data = self.param.plot_data + if plot_data: + num_sh = len(self.bte_solver._par_lm[grid_idx]) + num_subplots = num_sh + num_plt_cols = min(num_sh, 4) + num_plt_rows = np.int64(np.ceil(num_subplots/num_plt_cols)) + fig = plt.figure(figsize=(num_plt_cols * 8 + 0.5*(num_plt_cols-1), num_plt_rows * 8 + 0.5*(num_plt_rows-1)), dpi=300, constrained_layout=True) + plt_idx = 1 + n_pts_step = self.param.n_pts // 20 + + for lm_idx, lm in enumerate(self.bte_solver._par_lm[grid_idx]): + plt.subplot(num_plt_rows, num_plt_cols, plt_idx) + for ii in range(0, self.param.n_pts, n_pts_step): + fr = np.abs(ff_r[ii, lm_idx, :]) + plt.semilogy(ev, fr, label=r"$T_g$=%.2E [K], $E/n_0$=%.2E [Td], $n_e/n_0$ = %.2E "%(Tg[ii], eMag[ii]/n0[ii]/1e-21, ne[ii]/n0[ii])) + + plt.xlabel(r"energy (eV)") + plt.ylabel(r"$f_%d$"%(lm[0])) + plt.grid(visible=True) + if lm_idx==0: + plt.legend(prop={'size': 6}) + + plt_idx +=1 + + #plt_idx = num_sh + plt.savefig("%s_plot.png"%(self.param.out_fname)) + + def push(self, interface): - electron_temperature = np.array(interface.HostWrite(libtps.t2bIndex.ElectronTemperature), copy=False) - electron_temperature[:] = 1. + pass + #electron_temperature = np.array(interface.HostWrite(libtps.t2bIndex.ElectronTemperature), copy=False) + #electron_temperature[:] = 1. + -# set path to C++ TPS library -path = os.path.abspath(os.path.dirname(sys.argv[0])) -sys.path.append(path + "/.libs") -import libtps comm = MPI.COMM_WORLD # TPS solver @@ -42,7 +250,7 @@ def push(self, interface): tps.chooseSolver() tps.initialize() -boltzmann = BoltzmannMockSolver() +boltzmann = Boltzmann0D2VBactchedSolver(tps) interface = libtps.Tps2Boltzmann(tps) tps.initInterface(interface) @@ -51,17 +259,21 @@ def push(self, interface): max_iters = tps.getRequiredInput("cycle-avg-joule-coupled/max-iters") print("Max Iters: ", max_iters) tps.solveBegin() +tps.solveStep() +tps.push(interface) +boltzmann.fetch(interface) +boltzmann.solve() -while it < max_iters: - tps.solveStep() - tps.push(interface) - boltzmann.fetch(interface) - boltzmann.solve() - boltzmann.push(interface) - tps.fetch(interface) +# while it < max_iters: +# tps.solveStep() +# tps.push(interface) +# boltzmann.fetch(interface) +# boltzmann.solve() +# boltzmann.push(interface) +# tps.fetch(interface) - it = it+1 - print("it, ", it) +# it = it+1 +# print("it, ", it) tps.solveEnd() From e3eb1120bf032cfb607582dbef79c2fc101022ee Mon Sep 17 00:00:00 2001 From: milindasf Date: Sat, 21 Oct 2023 00:48:48 -0500 Subject: [PATCH 02/75] initial dev. of spatally adapted v-grids --- src/tps-time-loop.py | 308 +++++++++++++++++++++++++------------------ 1 file changed, 177 insertions(+), 131 deletions(-) diff --git a/src/tps-time-loop.py b/src/tps-time-loop.py index 51f47379c..47dba0e17 100755 --- a/src/tps-time-loop.py +++ b/src/tps-time-loop.py @@ -21,14 +21,14 @@ class BoltzmannSolverParams(): Nr = 127 # number of B-splines used in radial direction l_max = 1 # spherical modes uses, 0, to l_max ev_max = 16 # v-space grid truncation (eV) - n_grids = 1 # number of v-space grids + n_grids = 4 # number of v-space grids dt = 1e-2 # [] non-dimentionalized time w.r.t. oscilation period cycles = 10 # number of max cycles to evolve solver_type = "transient" # two modes, "transient" or "steady-state" atol = 1e-16 # absolute tolerance rtol = 1e-12 # relative tolerance - max_iter = 1000 # max iterations for the newton solver + max_iter = 1000 # max iterations for the newton solver ee_collisions = 0 # enable electron-electron Coulombic effects use_gpu = 1 # enable GPU use (1)-GPU solver, (0)-CPU solver @@ -40,7 +40,6 @@ class BoltzmannSolverParams(): Efreq = 0.0 #[1/s] # E-field osicllation frequency verbose = 1 # verbose output for the BTE solver - n_pts = 10 # number of spatial points to launch the BTE solver Te = 0.5 #[eV] # approximate electron temperature threads = 16 # number of threads to use to assemble operators @@ -52,6 +51,9 @@ class BoltzmannSolverParams(): # some useful units and conversion factors. ev_to_K = (scipy.constants.electron_volt/scipy.constants.Boltzmann) Td_fac = 1e-21 #[Vm^2] + c_gamma = np.sqrt(2 * scipy.constants.elementary_charge / scipy.constants.electron_mass) #[(C/kg)^{1/2}] + me = scipy.constants.electron_mass + kB = scipy.constants.Boltzmann class TPSINDEX(): """ @@ -65,170 +67,213 @@ class TPSINDEX(): EF_IM_IDX = 1 # Im(E) index class Boltzmann0D2VBactchedSolver: + def __init__(self, tps): self.tps = tps self.param = BoltzmannSolverParams() # overide the default params, based on the config.ini file. self.param.Efreq = 0#tps.getRequiredInput("em/current_frequency") self.param.solver_type = "steady-state" - #self.param.n_pts = 10 - - lm_modes = [[[l,0] for l in range(self.param.l_max+1)]] - nr = np.ones(self.param.n_grids, dtype=np.int32) * self.param.Nr - - Te = np.ones(self.param.n_grids) * self.param.Te - ev_max = np.ones(self.param.n_grids) * self.param.ev_max - self.bte_solver = BoltzmannSolver(self.param, ev_max ,Te , nr, lm_modes, self.param.n_grids, self.param.collisions) - # compute BTE operators - grid_idx = self.param.grid_idx - self.bte_solver.assemble_operators(grid_idx) + self.xp_module = np + + def grid_setup(self, interface): + xp = self.xp_module + Te = xp.array(interface.HostRead(libtps.t2bIndex.ElectronTemperature), copy=False) / self.param.ev_to_K # [eV] + Te_min, Te_max = xp.min(Te), xp.max(Te) + Te_b = xp.linspace(Te_min, Te_max + 1e-12, self.param.n_grids + 1) - def fetch(self, interface): - grid_idx = self.param.grid_idx - Tg = np.array(interface.HostRead(libtps.t2bIndex.HeavyTemperature), copy=True) - tps_npts = len(Tg) + grid_idx_to_spatial_pts_map = list() + for b_idx in range(self.param.n_grids): + grid_idx_to_spatial_pts_map.append(xp.argwhere(xp.logical_and(Te>= Te_b[b_idx], Te < Te_b[b_idx+1]))[:,0]) - Te = np.array(interface.HostRead(libtps.t2bIndex.ElectronTemperature), copy=True) - rr = np.array(interface.HostRead(libtps.t2bIndex.ReactionRates), copy=True) - efield = np.array(interface.HostRead(libtps.t2bIndex.ElectricField), copy=True).reshape((2, tps_npts)) - species_densities = np.array(interface.HostRead(libtps.t2bIndex.SpeciesDensities), copy=True).reshape(3, tps_npts) + self.grid_idx_to_npts = xp.array([len(a) for a in grid_idx_to_spatial_pts_map], dtype=xp.int32) + self.grid_idx_to_spatial_idx_map = grid_idx_to_spatial_pts_map - bte_idx = Te > (0.4 * self.param.ev_to_K) - self.param.n_pts = len(Te[bte_idx]) + xp.sum(self.grid_idx_to_npts) == len(Te), "[Error] : TPS spatial points for v-space grid assignment is inconsitant" + lm_modes = [[[l,0] for l in range(self.param.l_max+1)] for grid_idx in range(self.param.n_grids)] + nr = xp.ones(self.param.n_grids, dtype=np.int32) * self.param.Nr + Te = xp.array([Te_b[b_idx] for b_idx in range(self.param.n_grids)]) # xp.ones(self.param.n_grids) * self.param.Te + vth = np.sqrt(2* self.param.kB * Te * self.param.ev_to_K /self.param.me) + ev_max = (6 * vth / self.param.c_gamma)**2 + self.bte_solver = BoltzmannSolver(self.param, ev_max ,Te , nr, lm_modes, self.param.n_grids, self.param.collisions) + + if self.param.verbose==1: + print("grid energy max (eV) \n", ev_max) - ni = species_densities[TPSINDEX.ION_IDX][bte_idx] - ne = species_densities[TPSINDEX.ELE_IDX][bte_idx] - n0 = species_densities[TPSINDEX.NEU_IDX][bte_idx] - Tg = Tg[bte_idx] - Te = Te[bte_idx] + # compute BTE operators + for grid_idx in range(self.param.n_grids): + print("setting up grid %d"%(grid_idx)) + self.bte_solver.assemble_operators(grid_idx) + + return - ne[ne<0] = 1e-16 - ni[ni<0] = 1e-16 + def fetch(self, interface): + xp = self.xp_module + gidx_to_pidx_map = self.grid_idx_to_spatial_idx_map - eRe = efield[TPSINDEX.EF_RE_IDX][bte_idx] - eIm = efield[TPSINDEX.EF_IM_IDX][bte_idx] - eMag = np.sqrt(eRe**2 + eIm **2) - eByn0 = eMag/n0/self.param.Td_fac + heavy_temp = xp.array(interface.HostRead(libtps.t2bIndex.HeavyTemperature), copy=False) + tps_npts = len(heavy_temp) + electron_temp = xp.array(interface.HostRead(libtps.t2bIndex.ElectronTemperature), copy=False) + efield = xp.array(interface.HostRead(libtps.t2bIndex.ElectricField), copy=False).reshape((2, tps_npts)) + species_densities = xp.array(interface.HostRead(libtps.t2bIndex.SpeciesDensities), copy=False).reshape(3, tps_npts) - if self.param.verbose == 1 : - print("Boltzmann Solver Inputs") - print("Efreq = %.4E [1/s]" %(self.param.Efreq)) - print("n_pts = %d" % self.param.n_pts) - # idx0 = np.argmin(eByn0) - # idx1 = np.argmax(eByn0) - # print("E/n0 (min) = %.12E [Td] \t E/n0 (max) = %.12E [Td] "%(eByn0[idx0], eByn0[idx1])) - # print("at E/n0 min max, Tg = %.12E [K] \t Tg = %.12E [K] "%(Tg[idx0], Tg[idx1])) - # print("at E/n0 min max, Te = %.12E [K] \t Te = %.12E [K] "%(Te[idx0], Te[idx1])) + for grid_idx in range(self.param.n_grids): + bte_idx = gidx_to_pidx_map[grid_idx] + ni = species_densities[TPSINDEX.ION_IDX][bte_idx] + ne = species_densities[TPSINDEX.ELE_IDX][bte_idx] + n0 = species_densities[TPSINDEX.NEU_IDX][bte_idx] + Tg = heavy_temp[bte_idx] + Te = electron_temp[bte_idx] - # print("at E/n0 min max, ne = %.12E [1/m^3] \t ne = %.12E [1/m^3] "%(ne[idx0], ne[idx1])) - # print("at E/n0 min max, ni = %.12E [1/m^3] \t ni = %.12E [1/m^3] "%(ni[idx0], ni[idx1])) - # print("at E/n0 min max, n0 = %.12E [1/m^3] \t n0 = %.12E [1/m^3] "%(n0[idx0], n0[idx1])) - print("E/n0 (min) = %.12E [Td] \t E/n0 (max) = %.12E [Td] "%(np.min(eByn0), np.max(eByn0))) - print("Tg (min) = %.12E [K] \t Tg (max) = %.12E [K] "%(np.min(Tg), np.max(Tg))) - print("Te (min) = %.12E [K] \t Te (max) = %.12E [K] "%(np.min(Te), np.max(Te))) - - print("ne (min) = %.12E [1/m^3] \t ne (max) = %.12E [1/m^3] "%(np.min(ne), np.max(ne))) - print("ni (min) = %.12E [1/m^3] \t ni (max) = %.12E [1/m^3] "%(np.min(ni), np.max(ni))) - print("n0 (min) = %.12E [1/m^3] \t n0 (max) = %.12E [1/m^3] "%(np.min(n0), np.max(n0))) + eRe = efield[TPSINDEX.EF_RE_IDX][bte_idx] + eIm = efield[TPSINDEX.EF_IM_IDX][bte_idx] + eMag = np.sqrt(eRe**2 + eIm **2) + eByn0 = eMag/n0/self.param.Td_fac - self.bte_solver.set_boltzmann_parameters(grid_idx, n0, ne, ni, Tg, self.param.solver_type) - self.bte_f0 = self.bte_solver.initialize(0, self.param.n_pts, "maxwellian") + if self.param.verbose == 1 : + print("Boltzmann solver inputs for v-space grid id %d"%(grid_idx)) + print("Efreq = %.4E [1/s]" %(self.param.Efreq)) + print("n_pts = %d" % self.grid_idx_to_npts[grid_idx]) + + # idx0 = np.argmin(eByn0) + # idx1 = np.argmax(eByn0) + # print("E/n0 (min) = %.12E [Td] \t E/n0 (max) = %.12E [Td] "%(eByn0[idx0], eByn0[idx1])) + # print("at E/n0 min max, Tg = %.12E [K] \t Tg = %.12E [K] "%(Tg[idx0], Tg[idx1])) + # print("at E/n0 min max, Te = %.12E [K] \t Te = %.12E [K] "%(Te[idx0], Te[idx1])) + + # print("at E/n0 min max, ne = %.12E [1/m^3] \t ne = %.12E [1/m^3] "%(ne[idx0], ne[idx1])) + # print("at E/n0 min max, ni = %.12E [1/m^3] \t ni = %.12E [1/m^3] "%(ni[idx0], ni[idx1])) + # print("at E/n0 min max, n0 = %.12E [1/m^3] \t n0 = %.12E [1/m^3] "%(n0[idx0], n0[idx1])) + + print("E/n0 (min) = %.12E [Td] \t E/n0 (max) = %.12E [Td] "%(np.min(eByn0), np.max(eByn0))) + print("Tg (min) = %.12E [K] \t Tg (max) = %.12E [K] "%(np.min(Tg), np.max(Tg))) + print("Te (min) = %.12E [K] \t Te (max) = %.12E [K] "%(np.min(Te), np.max(Te))) + + print("ne (min) = %.12E [1/m^3] \t ne (max) = %.12E [1/m^3] "%(np.min(ne), np.max(ne))) + print("ni (min) = %.12E [1/m^3] \t ni (max) = %.12E [1/m^3] "%(np.min(ni), np.max(ni))) + print("n0 (min) = %.12E [1/m^3] \t n0 (max) = %.12E [1/m^3] "%(np.min(n0), np.max(n0))) + + #self.bte_solver.set_boltzmann_parameters(grid_idx, n0, ne, ni, Tg, self.param.solver_type) + self.bte_solver.set_boltzmann_parameter(grid_idx, "n0", n0) + self.bte_solver.set_boltzmann_parameter(grid_idx, "ne", ne) + self.bte_solver.set_boltzmann_parameter(grid_idx, "ni", ni) + self.bte_solver.set_boltzmann_parameter(grid_idx, "Tg", Tg) + self.bte_solver.set_boltzmann_parameter(grid_idx, "eRe", eRe) + self.bte_solver.set_boltzmann_parameter(grid_idx, "eIm", eRe) + if self.param.Efreq == 0: ef_t = lambda t : eMag else: ef_t = lambda t : eRe * np.cos(2 * np.pi * self.param.Efreq * t) + eIm * np.sin(2 * np.pi * self.param.Efreq * t) - if self.param.use_gpu==1: - dev_id = self.param.dev_id - self.bte_solver.host_to_device_setup(dev_id, 0) - - eRe_d = cp.asarray(eRe) - eIm_d = cp.asarray(eIm) - - if self.param.Efreq == 0: - ef_t = lambda t : cp.sqrt(eRe_d**2 + eIm_d**2) - else: - ef_t = lambda t : eRe_d * cp.cos(2 * cp.pi * self.param.Efreq * t) + eIm_d * cp.sin(2 * cp.pi * self.param.Efreq * t) - - self.bte_f0 = cp.asarray(self.bte_f0) - self.bte_solver.set_efield_function(ef_t) return def solve(self): - grid_idx = self.param.grid_idx - ff , qoi = self.bte_solver.solve(grid_idx, self.bte_f0, self.param.atol, self.param.rtol, self.param.max_iter, self.param.solver_type) - ev = np.linspace(1e-3, self.bte_solver._par_ev_range[0][1], 500) - ff_r = self.bte_solver.compute_radial_components(grid_idx, ev, ff) - - if self.param.use_gpu==1: - self.bte_solver.device_to_host_setup(self.param.dev_id,grid_idx) - - ff_r = cp.asnumpy(ff_r) - for k, v in qoi.items(): - qoi[k] = cp.asnumpy(v) - - csv_write = self.param.export_csv - if csv_write: - fname = self.param.out_fname - with open("%s_qoi.csv"%fname, 'w', encoding='UTF8') as f: - writer = csv.writer(f,delimiter=',') - # write the header - header = ["n0", "ne", "ni", "Tg", "E", "energy", "mobility", "diffusion"] - for col_idx, g in enumerate(self.param.collisions): - header.append(str(g)) - - writer.writerow(header) + xp = self.xp_module + for grid_idx in range(self.param.n_grids): + + if self.grid_idx_to_npts[grid_idx] ==0: + continue + + if self.param.verbose==1: + print("setting initial Maxwellian at %.4E eV" %(self.bte_solver._par_ap_Te[grid_idx])) + f0 = self.bte_solver.initialize(grid_idx, self.grid_idx_to_npts[grid_idx], "maxwellian") + self.bte_solver.set_boltzmann_parameter(grid_idx, "f0", f0) + + if self.param.use_gpu==1: + dev_id = self.param.dev_id + self.bte_solver.host_to_device_setup(dev_id, grid_idx) - n0 = self.bte_solver._par_bte_params[grid_idx]["n0"] - ne = self.bte_solver._par_bte_params[grid_idx]["ne"] - ni = self.bte_solver._par_bte_params[grid_idx]["ni"] - Tg = self.bte_solver._par_bte_params[grid_idx]["Tg"] + with cp.cuda.Device(dev_id): + eRe_d = self.bte_solver.get_boltzmann_parameter(grid_idx, "eRe") + eIm_d = self.bte_solver.get_boltzmann_parameter(grid_idx, "eIm") + + if self.param.Efreq == 0: + ef_t = lambda t : cp.sqrt(eRe_d**2 + eIm_d**2) + else: + ef_t = lambda t : eRe_d * cp.cos(2 * cp.pi * self.param.Efreq * t) + eIm_d * cp.sin(2 * cp.pi * self.param.Efreq * t) + + self.bte_solver.set_efield_function(ef_t) - eRe = self.bte_solver._par_ef_t(0) - eIm = 0 * self.bte_solver._par_ef_t(0) + f0 = self.bte_solver.get_boltzmann_parameter(grid_idx, "f0") + ff , qoi = self.bte_solver.solve(grid_idx, f0, self.param.atol, self.param.rtol, self.param.max_iter, self.param.solver_type) + + ev = np.linspace(1e-3, self.bte_solver._par_ev_range[grid_idx][1], 500) + ff_r = self.bte_solver.compute_radial_components(grid_idx, ev, ff) + + if self.param.use_gpu==1: + self.bte_solver.device_to_host_setup(self.param.dev_id,grid_idx) - if self.param.use_gpu==1: - eRe = cp.asnumpy(eRe) - eIm = cp.asnumpy(eIm) + with cp.cuda.Device(dev_id): + ff_r = cp.asnumpy(ff_r) + for k, v in qoi.items(): + qoi[k] = cp.asnumpy(v) + + csv_write = self.param.export_csv + if csv_write: + fname = self.param.out_fname + csv_mode = 'a' - eMag = np.sqrt(eRe**2 + eIm**2) - data = np.concatenate((n0.reshape(-1,1), ne.reshape(-1,1), ni.reshape(-1,1), Tg.reshape(-1,1), eMag.reshape(-1,1), qoi["energy"].reshape(-1,1), qoi["mobility"].reshape(-1,1), qoi["diffusion"].reshape(-1,1)), axis=1) - for col_idx, g in enumerate(self.param.collisions): - data = np.concatenate((data, qoi["rates"][col_idx].reshape(-1,1)), axis=1) + if grid_idx == 0: + csv_mode = 'w' - writer.writerows(data) + with open("%s_qoi.csv"%fname, csv_mode, encoding='UTF8') as f: + writer = csv.writer(f,delimiter=',') + # write the header + header = ["n0", "ne", "ni", "Tg", "E", "energy", "mobility", "diffusion"] + for col_idx, g in enumerate(self.param.collisions): + header.append(str(g)) + + if grid_idx ==0: + writer.writerow(header) + + n0 = self.bte_solver.get_boltzmann_parameter(grid_idx, "n0") + ne = self.bte_solver.get_boltzmann_parameter(grid_idx, "ne") + ni = self.bte_solver.get_boltzmann_parameter(grid_idx, "ni") + Tg = self.bte_solver.get_boltzmann_parameter(grid_idx, "Tg") + + eRe = self.bte_solver.get_boltzmann_parameter(grid_idx, "eRe") + eIm = self.bte_solver.get_boltzmann_parameter(grid_idx, "eIm") + eMag = np.sqrt(eRe**2 + eIm**2) + + data = np.concatenate((n0.reshape(-1,1), ne.reshape(-1,1), ni.reshape(-1,1), Tg.reshape(-1,1), eMag.reshape(-1,1), qoi["energy"].reshape(-1,1), qoi["mobility"].reshape(-1,1), qoi["diffusion"].reshape(-1,1)), axis=1) + for col_idx, g in enumerate(self.param.collisions): + data = np.concatenate((data, qoi["rates"][col_idx].reshape(-1,1)), axis=1) + + writer.writerows(data) - plot_data = self.param.plot_data - if plot_data: - num_sh = len(self.bte_solver._par_lm[grid_idx]) - num_subplots = num_sh - num_plt_cols = min(num_sh, 4) - num_plt_rows = np.int64(np.ceil(num_subplots/num_plt_cols)) - fig = plt.figure(figsize=(num_plt_cols * 8 + 0.5*(num_plt_cols-1), num_plt_rows * 8 + 0.5*(num_plt_rows-1)), dpi=300, constrained_layout=True) - plt_idx = 1 - n_pts_step = self.param.n_pts // 20 + plot_data = self.param.plot_data + if plot_data: + num_sh = len(self.bte_solver._par_lm[grid_idx]) + num_subplots = num_sh + num_plt_cols = min(num_sh, 4) + num_plt_rows = np.int64(np.ceil(num_subplots/num_plt_cols)) + fig = plt.figure(figsize=(num_plt_cols * 8 + 0.5*(num_plt_cols-1), num_plt_rows * 8 + 0.5*(num_plt_rows-1)), dpi=300, constrained_layout=True) + plt_idx = 1 + n_pts_step = self.grid_idx_to_npts[grid_idx] // 20 - for lm_idx, lm in enumerate(self.bte_solver._par_lm[grid_idx]): - plt.subplot(num_plt_rows, num_plt_cols, plt_idx) - for ii in range(0, self.param.n_pts, n_pts_step): - fr = np.abs(ff_r[ii, lm_idx, :]) - plt.semilogy(ev, fr, label=r"$T_g$=%.2E [K], $E/n_0$=%.2E [Td], $n_e/n_0$ = %.2E "%(Tg[ii], eMag[ii]/n0[ii]/1e-21, ne[ii]/n0[ii])) - - plt.xlabel(r"energy (eV)") - plt.ylabel(r"$f_%d$"%(lm[0])) - plt.grid(visible=True) - if lm_idx==0: - plt.legend(prop={'size': 6}) + for lm_idx, lm in enumerate(self.bte_solver._par_lm[grid_idx]): + plt.subplot(num_plt_rows, num_plt_cols, plt_idx) + for ii in range(0, self.grid_idx_to_npts[grid_idx], n_pts_step): + fr = np.abs(ff_r[ii, lm_idx, :]) + plt.semilogy(ev, fr, label=r"$T_g$=%.2E [K], $E/n_0$=%.2E [Td], $n_e/n_0$ = %.2E "%(Tg[ii], eMag[ii]/n0[ii]/1e-21, ne[ii]/n0[ii])) - plt_idx +=1 - - #plt_idx = num_sh - plt.savefig("%s_plot.png"%(self.param.out_fname)) + plt.xlabel(r"energy (eV)") + plt.ylabel(r"$f_%d$"%(lm[0])) + plt.grid(visible=True) + if lm_idx==0: + plt.legend(prop={'size': 6}) + + plt_idx +=1 + + #plt_idx = num_sh + plt.savefig("%s_plot_%02d.png"%(self.param.out_fname, grid_idx)) + plt.close() def push(self, interface): @@ -261,6 +306,7 @@ def push(self, interface): tps.solveBegin() tps.solveStep() tps.push(interface) +boltzmann.grid_setup(interface) boltzmann.fetch(interface) boltzmann.solve() From 6ed4cc0a87657e932a1a61d125fb4c070fa836f9 Mon Sep 17 00:00:00 2001 From: milindasf Date: Sun, 22 Oct 2023 08:57:37 -0500 Subject: [PATCH 03/75] multiple grids batched v-space solver --- src/tps-time-loop.py | 124 ++++++++++++++++++++++++++++--------------- 1 file changed, 80 insertions(+), 44 deletions(-) diff --git a/src/tps-time-loop.py b/src/tps-time-loop.py index 47dba0e17..27bbb8fed 100755 --- a/src/tps-time-loop.py +++ b/src/tps-time-loop.py @@ -23,11 +23,11 @@ class BoltzmannSolverParams(): ev_max = 16 # v-space grid truncation (eV) n_grids = 4 # number of v-space grids - dt = 1e-2 # [] non-dimentionalized time w.r.t. oscilation period - cycles = 10 # number of max cycles to evolve + dt = 1e-3 # [] non-dimentionalized time w.r.t. oscilation period + cycles = 3 # number of max cycles to evolve solver_type = "transient" # two modes, "transient" or "steady-state" - atol = 1e-16 # absolute tolerance - rtol = 1e-12 # relative tolerance + atol = 1e-10 # absolute tolerance + rtol = 1e-10 # relative tolerance max_iter = 1000 # max iterations for the newton solver ee_collisions = 0 # enable electron-electron Coulombic effects @@ -72,8 +72,8 @@ def __init__(self, tps): self.tps = tps self.param = BoltzmannSolverParams() # overide the default params, based on the config.ini file. - self.param.Efreq = 0#tps.getRequiredInput("em/current_frequency") - self.param.solver_type = "steady-state" + self.param.Efreq = tps.getRequiredInput("em/current_frequency") + #self.param.solver_type = "steady-state" self.xp_module = np @@ -81,11 +81,20 @@ def grid_setup(self, interface): xp = self.xp_module Te = xp.array(interface.HostRead(libtps.t2bIndex.ElectronTemperature), copy=False) / self.param.ev_to_K # [eV] Te_min, Te_max = xp.min(Te), xp.max(Te) - Te_b = xp.linspace(Te_min, Te_max + 1e-12, self.param.n_grids + 1) + Te_b = xp.linspace(Te_min, Te_max, self.param.n_grids, endpoint=False) + + dist_mat = xp.zeros((len(Te), self.param.n_grids)) + + for i in range(self.param.n_grids): + dist_mat[:,i] = xp.abs(Te-Te_b[i]) + + membership = xp.argmin(dist_mat, axis=1) + grid_idx_to_spatial_pts_map = list() for b_idx in range(self.param.n_grids): - grid_idx_to_spatial_pts_map.append(xp.argwhere(xp.logical_and(Te>= Te_b[b_idx], Te < Te_b[b_idx+1]))[:,0]) + #grid_idx_to_spatial_pts_map.append(xp.argwhere(xp.logical_and(Te>= Te_b[b_idx], Te < Te_b[b_idx+1]))[:,0]) + grid_idx_to_spatial_pts_map.append(xp.argwhere(membership==b_idx)[:,0]) self.grid_idx_to_npts = xp.array([len(a) for a in grid_idx_to_spatial_pts_map], dtype=xp.int32) self.grid_idx_to_spatial_idx_map = grid_idx_to_spatial_pts_map @@ -114,6 +123,7 @@ def fetch(self, interface): heavy_temp = xp.array(interface.HostRead(libtps.t2bIndex.HeavyTemperature), copy=False) tps_npts = len(heavy_temp) + self.tps_npts = tps_npts electron_temp = xp.array(interface.HostRead(libtps.t2bIndex.ElectronTemperature), copy=False) efield = xp.array(interface.HostRead(libtps.t2bIndex.ElectricField), copy=False).reshape((2, tps_npts)) @@ -173,9 +183,14 @@ def fetch(self, interface): return def solve(self): - xp = self.xp_module - for grid_idx in range(self.param.n_grids): + xp = self.xp_module + csv_write = self.param.export_csv + gidx_to_pidx_map = self.grid_idx_to_spatial_idx_map + + if csv_write ==1 : + data_csv = np.empty((self.tps_npts, 8 + len(self.param.collisions))) + for grid_idx in range(self.param.n_grids): if self.grid_idx_to_npts[grid_idx] ==0: continue @@ -200,8 +215,12 @@ def solve(self): self.bte_solver.set_efield_function(ef_t) f0 = self.bte_solver.get_boltzmann_parameter(grid_idx, "f0") - ff , qoi = self.bte_solver.solve(grid_idx, f0, self.param.atol, self.param.rtol, self.param.max_iter, self.param.solver_type) - + try: + ff , qoi = self.bte_solver.solve(grid_idx, f0, self.param.atol, self.param.rtol, self.param.max_iter, self.param.solver_type) + except: + print("solver failed for v-space gird no %d"%(grid_idx)) + continue + ev = np.linspace(1e-3, self.bte_solver._par_ev_range[grid_idx][1], 500) ff_r = self.bte_solver.compute_radial_components(grid_idx, ev, ff) @@ -212,43 +231,35 @@ def solve(self): ff_r = cp.asnumpy(ff_r) for k, v in qoi.items(): qoi[k] = cp.asnumpy(v) - - csv_write = self.param.export_csv - if csv_write: - fname = self.param.out_fname - csv_mode = 'a' - - if grid_idx == 0: - csv_mode = 'w' - - with open("%s_qoi.csv"%fname, csv_mode, encoding='UTF8') as f: - writer = csv.writer(f,delimiter=',') - # write the header - header = ["n0", "ne", "ni", "Tg", "E", "energy", "mobility", "diffusion"] - for col_idx, g in enumerate(self.param.collisions): - header.append(str(g)) - - if grid_idx ==0: - writer.writerow(header) - n0 = self.bte_solver.get_boltzmann_parameter(grid_idx, "n0") - ne = self.bte_solver.get_boltzmann_parameter(grid_idx, "ne") - ni = self.bte_solver.get_boltzmann_parameter(grid_idx, "ni") - Tg = self.bte_solver.get_boltzmann_parameter(grid_idx, "Tg") - - eRe = self.bte_solver.get_boltzmann_parameter(grid_idx, "eRe") - eIm = self.bte_solver.get_boltzmann_parameter(grid_idx, "eIm") - eMag = np.sqrt(eRe**2 + eIm**2) - - data = np.concatenate((n0.reshape(-1,1), ne.reshape(-1,1), ni.reshape(-1,1), Tg.reshape(-1,1), eMag.reshape(-1,1), qoi["energy"].reshape(-1,1), qoi["mobility"].reshape(-1,1), qoi["diffusion"].reshape(-1,1)), axis=1) - for col_idx, g in enumerate(self.param.collisions): - data = np.concatenate((data, qoi["rates"][col_idx].reshape(-1,1)), axis=1) + if csv_write==1: + data_csv[gidx_to_pidx_map[grid_idx], 0] = self.bte_solver.get_boltzmann_parameter(grid_idx, "n0") + data_csv[gidx_to_pidx_map[grid_idx], 1] = self.bte_solver.get_boltzmann_parameter(grid_idx, "ne") + data_csv[gidx_to_pidx_map[grid_idx], 2] = self.bte_solver.get_boltzmann_parameter(grid_idx, "ni") + data_csv[gidx_to_pidx_map[grid_idx], 3] = self.bte_solver.get_boltzmann_parameter(grid_idx, "Tg") + data_csv[gidx_to_pidx_map[grid_idx], 4] = np.sqrt(self.bte_solver.get_boltzmann_parameter(grid_idx, "eRe")**2 + self.bte_solver.get_boltzmann_parameter(grid_idx, "eIm")**2) + data_csv[gidx_to_pidx_map[grid_idx], 5] = qoi["energy"] + data_csv[gidx_to_pidx_map[grid_idx], 6] = qoi["mobility"] + data_csv[gidx_to_pidx_map[grid_idx], 7] = qoi["diffusion"] + + for col_idx, g in enumerate(self.param.collisions): + data_csv[gidx_to_pidx_map[grid_idx], 8 + col_idx] = qoi["rates"][col_idx] - writer.writerows(data) - + + plot_data = self.param.plot_data if plot_data: + + n0 = self.bte_solver.get_boltzmann_parameter(grid_idx, "n0") + ne = self.bte_solver.get_boltzmann_parameter(grid_idx, "ne") + ni = self.bte_solver.get_boltzmann_parameter(grid_idx, "ni") + Tg = self.bte_solver.get_boltzmann_parameter(grid_idx, "Tg") + + eRe = self.bte_solver.get_boltzmann_parameter(grid_idx, "eRe") + eIm = self.bte_solver.get_boltzmann_parameter(grid_idx, "eIm") + eMag = np.sqrt(eRe**2 + eIm**2) + num_sh = len(self.bte_solver._par_lm[grid_idx]) num_subplots = num_sh num_plt_cols = min(num_sh, 4) @@ -276,6 +287,31 @@ def solve(self): plt.close() + if csv_write: + fname = self.param.out_fname + with open("%s_qoi.csv"%fname, 'w', encoding='UTF8') as f: + writer = csv.writer(f,delimiter=',') + # write the header + header = ["n0", "ne", "ni", "Tg", "E", "energy", "mobility", "diffusion"] + for col_idx, g in enumerate(self.param.collisions): + header.append(str(g)) + + writer.writerow(header) + # n0 = self.bte_solver.get_boltzmann_parameter(grid_idx, "n0") + # ne = self.bte_solver.get_boltzmann_parameter(grid_idx, "ne") + # ni = self.bte_solver.get_boltzmann_parameter(grid_idx, "ni") + # Tg = self.bte_solver.get_boltzmann_parameter(grid_idx, "Tg") + + # eRe = self.bte_solver.get_boltzmann_parameter(grid_idx, "eRe") + # eIm = self.bte_solver.get_boltzmann_parameter(grid_idx, "eIm") + # eMag = np.sqrt(eRe**2 + eIm**2) + + # data = np.concatenate((n0.reshape(-1,1), ne.reshape(-1,1), ni.reshape(-1,1), Tg.reshape(-1,1), eMag.reshape(-1,1), qoi["energy"].reshape(-1,1), qoi["mobility"].reshape(-1,1), qoi["diffusion"].reshape(-1,1)), axis=1) + # for col_idx, g in enumerate(self.param.collisions): + # data = np.concatenate((data, qoi["rates"][col_idx].reshape(-1,1)), axis=1) + + writer.writerows(data_csv) + def push(self, interface): pass #electron_temperature = np.array(interface.HostWrite(libtps.t2bIndex.ElectronTemperature), copy=False) From 7a2d75bf41cefa8aa961778b27b42748d202c778 Mon Sep 17 00:00:00 2001 From: milindasf Date: Sun, 22 Oct 2023 17:53:28 -0500 Subject: [PATCH 04/75] multiple v-space grids added, and boltzmann to tps push code added. --- src/tps-time-loop.py | 84 ++++++++++++++++++++++++++++++++------------ 1 file changed, 62 insertions(+), 22 deletions(-) diff --git a/src/tps-time-loop.py b/src/tps-time-loop.py index 27bbb8fed..9094fa6c5 100755 --- a/src/tps-time-loop.py +++ b/src/tps-time-loop.py @@ -6,6 +6,7 @@ import scipy.constants import csv import matplotlib.pyplot as plt +from time import perf_counter as time # set path to C++ TPS library path = os.path.abspath(os.path.dirname(sys.argv[0])) @@ -24,7 +25,7 @@ class BoltzmannSolverParams(): n_grids = 4 # number of v-space grids dt = 1e-3 # [] non-dimentionalized time w.r.t. oscilation period - cycles = 3 # number of max cycles to evolve + cycles = 10 # number of max cycles to evolve solver_type = "transient" # two modes, "transient" or "steady-state" atol = 1e-10 # absolute tolerance rtol = 1e-10 # relative tolerance @@ -45,7 +46,7 @@ class BoltzmannSolverParams(): threads = 16 # number of threads to use to assemble operators grid_idx = 0 - output_dir = "batched_bte" + output_dir = "batched_bte1" out_fname = output_dir + "/tps" # some useful units and conversion factors. @@ -72,30 +73,52 @@ def __init__(self, tps): self.tps = tps self.param = BoltzmannSolverParams() # overide the default params, based on the config.ini file. - self.param.Efreq = tps.getRequiredInput("em/current_frequency") - #self.param.solver_type = "steady-state" + self.param.Efreq = 0 #tps.getRequiredInput("em/current_frequency") + self.param.solver_type = "steady-state" self.xp_module = np + + boltzmann_dir = self.param.output_dir + isExist = os.path.exists(boltzmann_dir) + if not isExist: + # Create a new directory because it does not exist + os.makedirs(boltzmann_dir) + #print("directory %s is created!"%(dir_name)) + return + + def parse_config_file(self): + """ + add the configuaraion file parse code here, + which overides the default BoltzmannSolverParams + """ + pass def grid_setup(self, interface): + """ + Perform the boltzmann grid setup. + we generate v-space grid for each spatial point cluster in the parameter space, + where, at the moment the clustering is determined based on the electron temperature + computed from the TPS code. + """ xp = self.xp_module Te = xp.array(interface.HostRead(libtps.t2bIndex.ElectronTemperature), copy=False) / self.param.ev_to_K # [eV] Te_min, Te_max = xp.min(Te), xp.max(Te) Te_b = xp.linspace(Te_min, Te_max, self.param.n_grids, endpoint=False) + t1 = time() dist_mat = xp.zeros((len(Te), self.param.n_grids)) for i in range(self.param.n_grids): dist_mat[:,i] = xp.abs(Te-Te_b[i]) membership = xp.argmin(dist_mat, axis=1) - - grid_idx_to_spatial_pts_map = list() for b_idx in range(self.param.n_grids): #grid_idx_to_spatial_pts_map.append(xp.argwhere(xp.logical_and(Te>= Te_b[b_idx], Te < Te_b[b_idx+1]))[:,0]) grid_idx_to_spatial_pts_map.append(xp.argwhere(membership==b_idx)[:,0]) + np.save("%s_gidx_to_pidx.npy"%(self.param.out_fname), np.array(grid_idx_to_spatial_pts_map, dtype=object), allow_pickle=True) + self.grid_idx_to_npts = xp.array([len(a) for a in grid_idx_to_spatial_pts_map], dtype=xp.int32) self.grid_idx_to_spatial_idx_map = grid_idx_to_spatial_pts_map @@ -114,7 +137,9 @@ def grid_setup(self, interface): for grid_idx in range(self.param.n_grids): print("setting up grid %d"%(grid_idx)) self.bte_solver.assemble_operators(grid_idx) - + + t2=time() + print("time for boltzmann grid setup = %.4E"%(t2-t1)) return def fetch(self, interface): @@ -148,16 +173,6 @@ def fetch(self, interface): print("Efreq = %.4E [1/s]" %(self.param.Efreq)) print("n_pts = %d" % self.grid_idx_to_npts[grid_idx]) - # idx0 = np.argmin(eByn0) - # idx1 = np.argmax(eByn0) - # print("E/n0 (min) = %.12E [Td] \t E/n0 (max) = %.12E [Td] "%(eByn0[idx0], eByn0[idx1])) - # print("at E/n0 min max, Tg = %.12E [K] \t Tg = %.12E [K] "%(Tg[idx0], Tg[idx1])) - # print("at E/n0 min max, Te = %.12E [K] \t Te = %.12E [K] "%(Te[idx0], Te[idx1])) - - # print("at E/n0 min max, ne = %.12E [1/m^3] \t ne = %.12E [1/m^3] "%(ne[idx0], ne[idx1])) - # print("at E/n0 min max, ni = %.12E [1/m^3] \t ni = %.12E [1/m^3] "%(ni[idx0], ni[idx1])) - # print("at E/n0 min max, n0 = %.12E [1/m^3] \t n0 = %.12E [1/m^3] "%(n0[idx0], n0[idx1])) - print("E/n0 (min) = %.12E [Td] \t E/n0 (max) = %.12E [Td] "%(np.min(eByn0), np.max(eByn0))) print("Tg (min) = %.12E [K] \t Tg (max) = %.12E [K] "%(np.min(Tg), np.max(Tg))) print("Te (min) = %.12E [K] \t Te (max) = %.12E [K] "%(np.min(Te), np.max(Te))) @@ -183,13 +198,19 @@ def fetch(self, interface): return def solve(self): + """ + perform the BTE solve, supports both stead-state solution (static E-field) + and time-periodic solutions for the oscillatory E-fields + """ xp = self.xp_module csv_write = self.param.export_csv gidx_to_pidx_map = self.grid_idx_to_spatial_idx_map if csv_write ==1 : data_csv = np.empty((self.tps_npts, 8 + len(self.param.collisions))) - + + t1 = time() + self.qoi = list() for grid_idx in range(self.param.n_grids): if self.grid_idx_to_npts[grid_idx] ==0: continue @@ -217,9 +238,12 @@ def solve(self): f0 = self.bte_solver.get_boltzmann_parameter(grid_idx, "f0") try: ff , qoi = self.bte_solver.solve(grid_idx, f0, self.param.atol, self.param.rtol, self.param.max_iter, self.param.solver_type) + self.qoi.append(qoi) except: print("solver failed for v-space gird no %d"%(grid_idx)) - continue + # self.qoi.append(None) + # continue + sys.exit(0) ev = np.linspace(1e-3, self.bte_solver._par_ev_range[grid_idx][1], 500) ff_r = self.bte_solver.compute_radial_components(grid_idx, ev, ff) @@ -286,6 +310,8 @@ def solve(self): plt.savefig("%s_plot_%02d.png"%(self.param.out_fname, grid_idx)) plt.close() + t2 = time() + print("time for boltzmann v-space solve = %.4E"%(t2- t1)) if csv_write: fname = self.param.out_fname @@ -313,10 +339,23 @@ def solve(self): writer.writerows(data_csv) def push(self, interface): - pass - #electron_temperature = np.array(interface.HostWrite(libtps.t2bIndex.ElectronTemperature), copy=False) - #electron_temperature[:] = 1. + Te = np.array(interface.HostWrite(libtps.t2bIndex.ElectronTemperature), copy=False) + rate_coeff = np.array(interface.HostWrite(libtps.t2bIndex.ReactionRates), copy=False).reshape((2, self.tps_npts)) + + gidx_to_pidx_map = self.grid_idx_to_spatial_idx_map + + for grid_idx in range(self.param.n_grids): + Te[gidx_to_pidx_map[grid_idx]] = self.qoi[grid_idx]["energy"]/1.5 + rr = self.qoi[grid_idx]["rates"] + # here rr should be in the same ordering as the collision model prescribed to the Boltzmann solver. + + rate_coeff[0][gidx_to_pidx_map[grid_idx]] = rr[0] + rate_coeff[1][gidx_to_pidx_map[grid_idx]] = rr[1] + rate_coeff[1][rate_coeff[1]<0] = 0.0 + + return + @@ -345,6 +384,7 @@ def push(self, interface): boltzmann.grid_setup(interface) boltzmann.fetch(interface) boltzmann.solve() +boltzmann.push(interface) # while it < max_iters: # tps.solveStep() From 01e51bfba0533c32761e2bd74a4ee3ce8e3cc052 Mon Sep 17 00:00:00 2001 From: milindasf Date: Wed, 25 Oct 2023 10:44:13 -0500 Subject: [PATCH 05/75] tps bte batched solver integration with Parla. --- src/tps-time-loop.py | 358 +++++++++++++++++++++++++++++++++++++------ 1 file changed, 315 insertions(+), 43 deletions(-) diff --git a/src/tps-time-loop.py b/src/tps-time-loop.py index 9094fa6c5..4b04c0478 100755 --- a/src/tps-time-loop.py +++ b/src/tps-time-loop.py @@ -7,6 +7,47 @@ import csv import matplotlib.pyplot as plt from time import perf_counter as time +import configparser +import cupy as cp +import enum + +class profile_t: + def __init__(self,name): + self.name = name + self.seconds=0 + self.snap=0 + self._pri_time =0 + self.iter =0 + + def __add__(self,o): + assert(self.name==o.name) + self.seconds+=o.seconds + self.snap+=o.snap + self.iter+=o.iter + return self + + def start(self): + self._pri_time = time() + + def stop(self): + self.seconds-=self._pri_time + self.snap=-self._pri_time + + self._pri_time = time() + + self.seconds +=self._pri_time + self.snap += self._pri_time + self.iter+=1 + + def reset(self): + self.seconds=0 + self.snap=0 + self._pri_time =0 + self.iter =0 + +def min_mean_max(a, comm: MPI.Comm): + return (comm.allreduce(a, MPI.MIN) , comm.allreduce(a, MPI.SUM)/comm.Get_size(), comm.allreduce(a, MPI.MAX)) + # set path to C++ TPS library path = os.path.abspath(os.path.dirname(sys.argv[0])) @@ -14,11 +55,26 @@ sys.path.append(path + "/../../boltzmann/BESolver/python") import libtps from bte_0d3v_batched import bte_0d3v_batched as BoltzmannSolver -import cupy as cp + +WITH_PARLA = 1 +if WITH_PARLA: + try: + from parla import Parla + from parla.tasks import spawn, TaskSpace + from parla.devices import cpu, gpu + except: + print("Error occured during Parla import. Please make sure Parla is installed properly.") + sys.exit(0) + + +class pp(enum.IntEnum): + SETUP = 0 + SOLVE = 1 + LAST = 2 class BoltzmannSolverParams(): - sp_order = 8 # B-spline order in v-space - spline_qpts = 10 # number of Gauss-Legendre quadrature points per knot interval + sp_order = 3 # B-spline order in v-space + spline_qpts = 5 # number of Gauss-Legendre quadrature points per knot interval Nr = 127 # number of B-splines used in radial direction l_max = 1 # spherical modes uses, 0, to l_max ev_max = 16 # v-space grid truncation (eV) @@ -69,12 +125,12 @@ class TPSINDEX(): class Boltzmann0D2VBactchedSolver: - def __init__(self, tps): + def __init__(self, tps, comm): self.tps = tps + self.comm : MPI.Comm = comm self.param = BoltzmannSolverParams() # overide the default params, based on the config.ini file. - self.param.Efreq = 0 #tps.getRequiredInput("em/current_frequency") - self.param.solver_type = "steady-state" + self.parse_config_file(sys.argv[2]) self.xp_module = np @@ -84,14 +140,52 @@ def __init__(self, tps): # Create a new directory because it does not exist os.makedirs(boltzmann_dir) #print("directory %s is created!"%(dir_name)) + + profile_tt = [None] * int(pp.LAST) + profile_nn = ["setup", "solve", "last"] + for i in range(pp.LAST): + profile_tt[i] = profile_t(profile_nn[i]) + + self.profile_tt = profile_tt + self.profile_nn = profile_nn + return - def parse_config_file(self): + def parse_config_file(self, fname): """ add the configuaraion file parse code here, which overides the default BoltzmannSolverParams """ - pass + config = configparser.ConfigParser() + print("[Boltzmann] reading configure file given by : ", fname) + config.read(fname) + + self.param.sp_order = int(config.get("boltzmannSolver", "sp_order").split("#")[0].strip()) + self.param.spline_qpts = int(config.get("boltzmannSolver", "spline_qpts").split("#")[0].strip()) + + self.param.Nr = int(config.get("boltzmannSolver", "Nr").split("#")[0].strip()) + self.param.l_max = int(config.get("boltzmannSolver", "l_max").split("#")[0].strip()) + self.param.n_grids = int(config.get("boltzmannSolver", "n_grids").split("#")[0].strip()) + self.param.dt = float(config.get("boltzmannSolver", "dt").split("#")[0].strip()) + self.param.cycles = float(config.get("boltzmannSolver", "cycles").split("#")[0].strip()) + self.param.solver_type = str(config.get("boltzmannSolver", "solver_type").split("#")[0].strip()) + self.param.atol = float(config.get("boltzmannSolver", "atol").split("#")[0].strip()) + self.param.rtol = float(config.get("boltzmannSolver", "rtol").split("#")[0].strip()) + self.param.max_iter = int(config.get("boltzmannSolver", "max_iter").split("#")[0].strip()) + self.param.ee_collisions = int(config.get("boltzmannSolver", "ee_collisions").split("#")[0].strip()) + self.param.use_gpu = int(config.get("boltzmannSolver", "use_gpu").split("#")[0].strip()) + #self.param.collisions = config.get("boltzmannSolver", "collisions").split("#")[0] + + self.param.export_csv = int(config.get("boltzmannSolver", "export_csv").split("#")[0].strip()) + self.param.plot_data = int(config.get("boltzmannSolver", "plot_data").split("#")[0].strip()) + self.param.Efreq = float(config.get("boltzmannSolver", "Efreq").split("#")[0].strip()) + self.param.verbose = int(config.get("boltzmannSolver", "verbose").split("#")[0].strip()) + self.param.Te = float(config.get("boltzmannSolver", "Te").split("#")[0].strip()) + + self.param.threads = int(config.get("boltzmannSolver", "threads").split("#")[0].strip()) + self.param.output_dir = str(config.get("boltzmannSolver", "output_dir").split("#")[0].strip()) + self.param.out_fname = self.param.output_dir + "/" + str(config.get("boltzmannSolver", "output_fname").split("#")[0].strip()) + return def grid_setup(self, interface): """ @@ -100,12 +194,14 @@ def grid_setup(self, interface): where, at the moment the clustering is determined based on the electron temperature computed from the TPS code. """ + + self.profile_tt[pp.SETUP].start() + xp = self.xp_module Te = xp.array(interface.HostRead(libtps.t2bIndex.ElectronTemperature), copy=False) / self.param.ev_to_K # [eV] Te_min, Te_max = xp.min(Te), xp.max(Te) Te_b = xp.linspace(Te_min, Te_max, self.param.n_grids, endpoint=False) - t1 = time() dist_mat = xp.zeros((len(Te), self.param.n_grids)) for i in range(self.param.n_grids): @@ -131,15 +227,14 @@ def grid_setup(self, interface): self.bte_solver = BoltzmannSolver(self.param, ev_max ,Te , nr, lm_modes, self.param.n_grids, self.param.collisions) if self.param.verbose==1: - print("grid energy max (eV) \n", ev_max) + print("grid energy max (eV) \n", ev_max, flush = True) # compute BTE operators for grid_idx in range(self.param.n_grids): - print("setting up grid %d"%(grid_idx)) + print("setting up grid %d"%(grid_idx), flush = True) self.bte_solver.assemble_operators(grid_idx) - t2=time() - print("time for boltzmann grid setup = %.4E"%(t2-t1)) + self.profile_tt[pp.SETUP].stop() return def fetch(self, interface): @@ -189,12 +284,6 @@ def fetch(self, interface): self.bte_solver.set_boltzmann_parameter(grid_idx, "eRe", eRe) self.bte_solver.set_boltzmann_parameter(grid_idx, "eIm", eRe) - if self.param.Efreq == 0: - ef_t = lambda t : eMag - else: - ef_t = lambda t : eRe * np.cos(2 * np.pi * self.param.Efreq * t) + eIm * np.sin(2 * np.pi * self.param.Efreq * t) - - self.bte_solver.set_efield_function(ef_t) return def solve(self): @@ -202,21 +291,34 @@ def solve(self): perform the BTE solve, supports both stead-state solution (static E-field) and time-periodic solutions for the oscillatory E-fields """ + + if WITH_PARLA==1: + self.solve_with_parla() + return + else: + self.solve_seq() + return + + def solve_seq(self): xp = self.xp_module csv_write = self.param.export_csv gidx_to_pidx_map = self.grid_idx_to_spatial_idx_map + self.qoi = [None for grid_idx in range(self.param.n_grids)] + self.ff = [None for grid_idx in range(self.param.n_grids)] + if csv_write ==1 : data_csv = np.empty((self.tps_npts, 8 + len(self.param.collisions))) t1 = time() - self.qoi = list() + for grid_idx in range(self.param.n_grids): + if self.grid_idx_to_npts[grid_idx] ==0: continue if self.param.verbose==1: - print("setting initial Maxwellian at %.4E eV" %(self.bte_solver._par_ap_Te[grid_idx])) + print("setting initial Maxwellian at %.4E eV" %(self.bte_solver._par_ap_Te[grid_idx]), flush=True) f0 = self.bte_solver.initialize(grid_idx, self.grid_idx_to_npts[grid_idx], "maxwellian") self.bte_solver.set_boltzmann_parameter(grid_idx, "f0", f0) @@ -227,24 +329,36 @@ def solve(self): with cp.cuda.Device(dev_id): eRe_d = self.bte_solver.get_boltzmann_parameter(grid_idx, "eRe") eIm_d = self.bte_solver.get_boltzmann_parameter(grid_idx, "eIm") - + if self.param.Efreq == 0: - ef_t = lambda t : cp.sqrt(eRe_d**2 + eIm_d**2) + ef_t = lambda t : xp.sqrt(eRe_d**2 + eIm_d**2) else: - ef_t = lambda t : eRe_d * cp.cos(2 * cp.pi * self.param.Efreq * t) + eIm_d * cp.sin(2 * cp.pi * self.param.Efreq * t) + ef_t = lambda t : eRe_d * xp.cos(2 * xp.pi * self.param.Efreq * t) + eIm_d * xp.sin(2 * xp.pi * self.param.Efreq * t) - self.bte_solver.set_efield_function(ef_t) - + else: + eRe_d = self.bte_solver.get_boltzmann_parameter(grid_idx, "eRe") + eIm_d = self.bte_solver.get_boltzmann_parameter(grid_idx, "eIm") + + if self.param.Efreq == 0: + ef_t = lambda t : xp.sqrt(eRe_d**2 + eIm_d**2) + else: + ef_t = lambda t : eRe_d * xp.cos(2 * xp.pi * self.param.Efreq * t) + eIm_d * xp.sin(2 * xp.pi * self.param.Efreq * t) + + self.bte_solver.set_efield_function(grid_idx, ef_t) f0 = self.bte_solver.get_boltzmann_parameter(grid_idx, "f0") try: ff , qoi = self.bte_solver.solve(grid_idx, f0, self.param.atol, self.param.rtol, self.param.max_iter, self.param.solver_type) - self.qoi.append(qoi) + self.qoi[grid_idx] = qoi + self.ff [grid_idx] = ff except: print("solver failed for v-space gird no %d"%(grid_idx)) # self.qoi.append(None) # continue sys.exit(0) - + + if self.param.export_csv ==0 and self.param.plot_data==0: + continue + ev = np.linspace(1e-3, self.bte_solver._par_ev_range[grid_idx][1], 500) ff_r = self.bte_solver.compute_radial_components(grid_idx, ev, ff) @@ -269,9 +383,6 @@ def solve(self): for col_idx, g in enumerate(self.param.collisions): data_csv[gidx_to_pidx_map[grid_idx], 8 + col_idx] = qoi["rates"][col_idx] - - - plot_data = self.param.plot_data if plot_data: @@ -323,21 +434,182 @@ def solve(self): header.append(str(g)) writer.writerow(header) - # n0 = self.bte_solver.get_boltzmann_parameter(grid_idx, "n0") - # ne = self.bte_solver.get_boltzmann_parameter(grid_idx, "ne") - # ni = self.bte_solver.get_boltzmann_parameter(grid_idx, "ni") - # Tg = self.bte_solver.get_boltzmann_parameter(grid_idx, "Tg") + writer.writerows(data_csv) + + return + + def solve_with_parla(self): + csv_write = self.param.export_csv + gidx_to_pidx_map = self.grid_idx_to_spatial_idx_map + self.qoi = [None for grid_idx in range(self.param.n_grids)] + self.ff = [None for grid_idx in range(self.param.n_grids)] + + if csv_write ==1 : + data_csv = np.empty((self.tps_npts, 8 + len(self.param.collisions))) + + + rank = self.comm.Get_rank() + npes = self.comm.Get_size() + + with Parla(): + num_gpus = len(gpu) + grid_to_device_map = lambda gidx : gidx % num_gpus + @spawn(placement=cpu, vcus=0) + async def __main__(): + self.profile_tt[pp.SETUP].start() + ts_0 = TaskSpace("T") + for grid_idx in range(self.param.n_grids): + @spawn(ts_0[grid_idx], placement=[cpu], vcus=0.0) + def t0(): + print("setting initial Maxwellian at %.4E eV" %(self.bte_solver._par_ap_Te[grid_idx]), flush=True) + f0 = self.bte_solver.initialize(grid_idx, self.grid_idx_to_npts[grid_idx], "maxwellian") + self.bte_solver.set_boltzmann_parameter(grid_idx, "f0", f0) + + if self.param.use_gpu == 1: + dev_id = grid_to_device_map(grid_idx) + self.bte_solver.host_to_device_setup(dev_id, grid_idx) + xp = cp + + with cp.cuda.Device(dev_id): + eRe_d = self.bte_solver.get_boltzmann_parameter(grid_idx, "eRe") + eIm_d = self.bte_solver.get_boltzmann_parameter(grid_idx, "eIm") + + if self.param.Efreq == 0: + ef_t = lambda t : xp.sqrt(eRe_d**2 + eIm_d**2) + else: + ef_t = lambda t : eRe_d * xp.cos(2 * xp.pi * self.param.Efreq * t) + eIm_d * xp.sin(2 * xp.pi * self.param.Efreq * t) + else: + xp = np + eRe_d = self.bte_solver.get_boltzmann_parameter(grid_idx, "eRe") + eIm_d = self.bte_solver.get_boltzmann_parameter(grid_idx, "eIm") + + if self.param.Efreq == 0: + ef_t = lambda t : xp.sqrt(eRe_d**2 + eIm_d**2) + else: + ef_t = lambda t : eRe_d * xp.cos(2 * xp.pi * self.param.Efreq * t) + eIm_d * xp.sin(2 * xp.pi * self.param.Efreq * t) + + self.bte_solver.set_efield_function(grid_idx, ef_t) + return - # eRe = self.bte_solver.get_boltzmann_parameter(grid_idx, "eRe") - # eIm = self.bte_solver.get_boltzmann_parameter(grid_idx, "eIm") - # eMag = np.sqrt(eRe**2 + eIm**2) + await ts_0 - # data = np.concatenate((n0.reshape(-1,1), ne.reshape(-1,1), ni.reshape(-1,1), Tg.reshape(-1,1), eMag.reshape(-1,1), qoi["energy"].reshape(-1,1), qoi["mobility"].reshape(-1,1), qoi["diffusion"].reshape(-1,1)), axis=1) - # for col_idx, g in enumerate(self.param.collisions): - # data = np.concatenate((data, qoi["rates"][col_idx].reshape(-1,1)), axis=1) + self.profile_tt[pp.SETUP].stop() + if self.param.use_gpu==1: + p1 = [gpu(grid_to_device_map(grid_idx)) for grid_idx in range(self.param.n_grids)] + else: + p1 = [cpu for grid_idx in range(self.param.n_grids)] - writer.writerows(data_csv) + self.profile_tt[pp.SOLVE].start() + ts_1 = TaskSpace("T") + for grid_idx in range(self.param.n_grids): + @spawn(ts_1[grid_idx], placement=[p1[grid_idx]], dependencies=ts_0[grid_idx], vcus=0.0) + def t1(): + f0 = self.bte_solver.get_boltzmann_parameter(grid_idx, "f0") + print("[Boltzmann] %d / %d launching grid %d on %s"%(rank, npes, grid_idx, p1[grid_idx])) + try: + ff , qoi = self.bte_solver.solve(grid_idx, f0, self.param.atol, self.param.rtol, self.param.max_iter, self.param.solver_type) + self.ff[grid_idx] = ff + self.qoi[grid_idx] = qoi + except: + print("solver failed for v-space gird no %d"%(grid_idx)) + # self.qoi.append(None) + # continue + sys.exit(0) + + await ts_1 + self.profile_tt[pp.SOLVE].stop() + + t1 = min_mean_max(self.profile_tt[pp.SETUP].seconds, self.comm) + t2 = min_mean_max(self.profile_tt[pp.SOLVE].seconds, self.comm) + print("[Boltzmann] setup (min) = %.4E (s) setup (mean) = %.4E (s) setup (max) = %.4E (s)" % (t1[0],t1[1],t1[2])) + print("[Boltzmann] solve (min) = %.4E (s) solve (mean) = %.4E (s) solve (max) = %.4E (s)" % (t2[0],t2[1],t2[2])) + if self.param.export_csv ==0 and self.param.plot_data==0: + return + + for grid_idx in range(self.param.n_grids): + dev_id = grid_idx % num_gpus + + if self.param.use_gpu==1: + gpu_id = cp.cuda.Device(dev_id) + gpu_id.use() + + ff = self.ff[grid_idx] + ev = np.linspace(1e-3, self.bte_solver._par_ev_range[grid_idx][1], 500) + ff_r = self.bte_solver.compute_radial_components(grid_idx, ev, ff) + + if self.param.use_gpu==1: + self.bte_solver.device_to_host_setup(self.param.dev_id,grid_idx) + + qoi = self.qoi[grid_idx] + with cp.cuda.Device(dev_id): + ff_r = cp.asnumpy(ff_r) + for k, v in qoi.items(): + qoi[k] = cp.asnumpy(v) + + if csv_write==1: + data_csv[gidx_to_pidx_map[grid_idx], 0] = self.bte_solver.get_boltzmann_parameter(grid_idx, "n0") + data_csv[gidx_to_pidx_map[grid_idx], 1] = self.bte_solver.get_boltzmann_parameter(grid_idx, "ne") + data_csv[gidx_to_pidx_map[grid_idx], 2] = self.bte_solver.get_boltzmann_parameter(grid_idx, "ni") + data_csv[gidx_to_pidx_map[grid_idx], 3] = self.bte_solver.get_boltzmann_parameter(grid_idx, "Tg") + data_csv[gidx_to_pidx_map[grid_idx], 4] = np.sqrt(self.bte_solver.get_boltzmann_parameter(grid_idx, "eRe")**2 + self.bte_solver.get_boltzmann_parameter(grid_idx, "eIm")**2) + data_csv[gidx_to_pidx_map[grid_idx], 5] = qoi["energy"] + data_csv[gidx_to_pidx_map[grid_idx], 6] = qoi["mobility"] + data_csv[gidx_to_pidx_map[grid_idx], 7] = qoi["diffusion"] + + for col_idx, g in enumerate(self.param.collisions): + data_csv[gidx_to_pidx_map[grid_idx], 8 + col_idx] = qoi["rates"][col_idx] + + plot_data = self.param.plot_data + if plot_data: + + n0 = self.bte_solver.get_boltzmann_parameter(grid_idx, "n0") + ne = self.bte_solver.get_boltzmann_parameter(grid_idx, "ne") + ni = self.bte_solver.get_boltzmann_parameter(grid_idx, "ni") + Tg = self.bte_solver.get_boltzmann_parameter(grid_idx, "Tg") + + eRe = self.bte_solver.get_boltzmann_parameter(grid_idx, "eRe") + eIm = self.bte_solver.get_boltzmann_parameter(grid_idx, "eIm") + eMag = np.sqrt(eRe**2 + eIm**2) + + num_sh = len(self.bte_solver._par_lm[grid_idx]) + num_subplots = num_sh + num_plt_cols = min(num_sh, 4) + num_plt_rows = np.int64(np.ceil(num_subplots/num_plt_cols)) + fig = plt.figure(figsize=(num_plt_cols * 8 + 0.5*(num_plt_cols-1), num_plt_rows * 8 + 0.5*(num_plt_rows-1)), dpi=300, constrained_layout=True) + plt_idx = 1 + n_pts_step = self.grid_idx_to_npts[grid_idx] // 20 + + for lm_idx, lm in enumerate(self.bte_solver._par_lm[grid_idx]): + plt.subplot(num_plt_rows, num_plt_cols, plt_idx) + for ii in range(0, self.grid_idx_to_npts[grid_idx], n_pts_step): + fr = np.abs(ff_r[ii, lm_idx, :]) + plt.semilogy(ev, fr, label=r"$T_g$=%.2E [K], $E/n_0$=%.2E [Td], $n_e/n_0$ = %.2E "%(Tg[ii], eMag[ii]/n0[ii]/1e-21, ne[ii]/n0[ii])) + + plt.xlabel(r"energy (eV)") + plt.ylabel(r"$f_%d$"%(lm[0])) + plt.grid(visible=True) + if lm_idx==0: + plt.legend(prop={'size': 6}) + + plt_idx +=1 + + #plt_idx = num_sh + plt.savefig("%s_plot_%02d.png"%(self.param.out_fname, grid_idx)) + plt.close() + + if csv_write: + fname = self.param.out_fname + with open("%s_qoi.csv"%fname, 'w', encoding='UTF8') as f: + writer = csv.writer(f,delimiter=',') + # write the header + header = ["n0", "ne", "ni", "Tg", "E", "energy", "mobility", "diffusion"] + for col_idx, g in enumerate(self.param.collisions): + header.append(str(g)) + + writer.writerow(header) + writer.writerows(data_csv) + def push(self, interface): Te = np.array(interface.HostWrite(libtps.t2bIndex.ElectronTemperature), copy=False) rate_coeff = np.array(interface.HostWrite(libtps.t2bIndex.ReactionRates), copy=False).reshape((2, self.tps_npts)) @@ -370,7 +642,7 @@ def push(self, interface): tps.chooseSolver() tps.initialize() -boltzmann = Boltzmann0D2VBactchedSolver(tps) +boltzmann = Boltzmann0D2VBactchedSolver(tps, comm) interface = libtps.Tps2Boltzmann(tps) tps.initInterface(interface) From 31ec57f6714801c06275728c8e08096aaeee8791 Mon Sep 17 00:00:00 2001 From: milindasf Date: Wed, 25 Oct 2023 17:23:22 -0500 Subject: [PATCH 06/75] k-means clustering updated --- src/tps-time-loop.py | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/src/tps-time-loop.py b/src/tps-time-loop.py index 4b04c0478..bf061fc7d 100755 --- a/src/tps-time-loop.py +++ b/src/tps-time-loop.py @@ -201,9 +201,22 @@ def grid_setup(self, interface): Te = xp.array(interface.HostRead(libtps.t2bIndex.ElectronTemperature), copy=False) / self.param.ev_to_K # [eV] Te_min, Te_max = xp.min(Te), xp.max(Te) Te_b = xp.linspace(Te_min, Te_max, self.param.n_grids, endpoint=False) - dist_mat = xp.zeros((len(Te), self.param.n_grids)) + for iter in range(50): + #print("clustering iteration ", iter, Te_b) + for i in range(self.param.n_grids): + dist_mat[:,i] = xp.abs(Te-Te_b[i]) + + membership = xp.argmin(dist_mat, axis=1) + Te_b1 = np.array([np.mean(Te[xp.argwhere(membership==i)[:,0]]) for i in range(self.param.n_grids)]) + rel_error = np.max(np.abs(1 - Te_b1/Te_b)) + Te_b = Te_b1 + + if rel_error < 1e-4: + break + + print("K-means Te clusters ", Te_b) for i in range(self.param.n_grids): dist_mat[:,i] = xp.abs(Te-Te_b[i]) From 00f02dcfee5215ff31a154dba667a49cd87168fb Mon Sep 17 00:00:00 2001 From: Umberto Villa Date: Fri, 27 Oct 2023 12:11:32 -0500 Subject: [PATCH 07/75] Add spatial coordinate getter in tps2boltzamann --- src/tps2Boltzmann.cpp | 24 +++++++++++++++++++++++- src/tps2Boltzmann.hpp | 6 ++++++ 2 files changed, 29 insertions(+), 1 deletion(-) diff --git a/src/tps2Boltzmann.cpp b/src/tps2Boltzmann.cpp index 171313f0d..5ab4564fe 100644 --- a/src/tps2Boltzmann.cpp +++ b/src/tps2Boltzmann.cpp @@ -73,6 +73,10 @@ class CPUData { size_t stride_; }; +void idenity_fun(const Vector & x, Vector & out) { + for ( int i(0); i < x.Size(); ++i ) out[i] = x[i]; +} + Tps2Boltzmann::Tps2Boltzmann(Tps *tps) : NIndexes(7), tps_(tps), all_fes_(nullptr) { // Assert we have a couple solver; assert(tps->isFlowEMCoupled()); @@ -164,6 +168,13 @@ void Tps2Boltzmann::init(M2ulPhyS *flowSolver) { scalar_interpolator_->AddDomainInterpolator(new mfem::IdentityInterpolator()); scalar_interpolator_->SetAssemblyLevel(assembly_level); scalar_interpolator_->Assemble(); + + // Spatial coordinates + spatial_coord_fes_ = new mfem::ParFiniteElementSpace(pmesh, fec_native, pmesh->Dimension(), mfem::Ordering::byNODES); + spatial_coordinates_ = new mfem::ParGridFunction(spatial_coord_fes_); + mfem::VectorFunctionCoefficient coord_fun(pmesh->Dimension(), + std::function(idenity_fun)); + spatial_coordinates_->ProjectCoefficient(coord_fun); } void Tps2Boltzmann::interpolateFromNativeFES(const ParGridFunction &input, Tps2Boltzmann::Index index) { @@ -207,6 +218,9 @@ Tps2Boltzmann::~Tps2Boltzmann() { // Delete monolithic function space delete all_fes_; + delete spatial_coord_fes_; + delete spatial_coordinates_; + // Delete finite element collection delete fec_; } @@ -251,6 +265,10 @@ void tps2bolzmann(py::module &m) { py::class_(m, "Tps2Boltzmann") .def(py::init()) + .def("HostReadSpatialCoordinates", + [](const TPS::Tps2Boltzmann &interface) { + return std::unique_ptr(new TPS::CPUDataRead(interface.SpatialCoordinates())); + }) .def("HostRead", [](const TPS::Tps2Boltzmann &interface, TPS::Tps2Boltzmann::Index index) { return std::unique_ptr(new TPS::CPUDataRead(interface.Field(index))); @@ -261,7 +279,11 @@ void tps2bolzmann(py::module &m) { }) .def("HostReadWrite", [](TPS::Tps2Boltzmann &interface, TPS::Tps2Boltzmann::Index index) { return std::unique_ptr(new TPS::CPUData(interface.Field(index), true)); - }); + }) + .def("EfieldAngularFreq", &TPS::Tps2Boltzmann::EfieldAngularFreq) + .def("Nspecies", &TPS::Tps2Boltzmann::Nspecies) + .def("NeFiledComps", &TPS::Tps2Boltzmann::NeFieldComps) + .def("nComponents", &TPS::Tps2Boltzmann::nComponents); } } // namespace tps_wrappers #endif diff --git a/src/tps2Boltzmann.hpp b/src/tps2Boltzmann.hpp index db8813458..4895c523b 100644 --- a/src/tps2Boltzmann.hpp +++ b/src/tps2Boltzmann.hpp @@ -99,6 +99,9 @@ class Tps2Boltzmann { const mfem::ParFiniteElementSpace &NativeFes(Index index) const { return *(list_native_fes_[index]); } mfem::ParFiniteElementSpace &NativeFes(Index index) { return *(list_native_fes_[index]); } + const mfem::ParGridFunction & SpatialCoordinates() const { return *spatial_coordinates_; } + mfem::ParGridFunction & SpatialCoordinates() { return *spatial_coordinates_; } + const mfem::ParGridFunction &Field(Index index) const { return *(fields_[index]); } mfem::ParGridFunction &Field(Index index) { return *(fields_[index]); } @@ -135,6 +138,8 @@ class Tps2Boltzmann { mfem::ParFiniteElementSpace *reaction_rates_fes_; mfem::ParFiniteElementSpace **list_fes_; + mfem::ParFiniteElementSpace *spatial_coord_fes_; + //! Function spaces using the native TPS fec mfem::ParFiniteElementSpace *species_densities_native_fes_; mfem::ParFiniteElementSpace *efield_native_fes_; @@ -147,6 +152,7 @@ class Tps2Boltzmann { //! array of fields see *Index for how to address this mfem::ParGridFunction **fields_; + mfem::ParGridFunction *spatial_coordinates_; double EfieldAngularFreq_; }; From 7c7f37b55e6df82657ad49a14ecf54fc5c480b5c Mon Sep 17 00:00:00 2001 From: Umberto Villa Date: Fri, 27 Oct 2023 13:16:02 -0500 Subject: [PATCH 08/75] Now the fetch method saves a Paraview file --- src/M2ulPhyS2Boltzmann.cpp | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/src/M2ulPhyS2Boltzmann.cpp b/src/M2ulPhyS2Boltzmann.cpp index 5ba3ee4fc..e3d60bf20 100644 --- a/src/M2ulPhyS2Boltzmann.cpp +++ b/src/M2ulPhyS2Boltzmann.cpp @@ -83,4 +83,22 @@ void M2ulPhyS::push(TPS::Tps2Boltzmann &interface) { delete electronTemperature; } -void M2ulPhyS::fetch(TPS::Tps2Boltzmann &interface) { return; } +void M2ulPhyS::fetch(TPS::Tps2Boltzmann &interface) { + + mfem::ParaViewDataCollection paraview_dc("interface", mesh); + paraview_dc.SetPrefixPath("BoltzmannInterface"); + paraview_dc.SetCycle(0); + paraview_dc.SetDataFormat(VTKFormat::BINARY); + paraview_dc.SetTime(0.0); + paraview_dc.RegisterField("Heavy temperature", + &interface.Field(TPS::Tps2Boltzmann::Index::HeavyTemperature)); + paraview_dc.RegisterField("Electron temperature", + &interface.Field(TPS::Tps2Boltzmann::Index::ElectronTemperature)); + paraview_dc.RegisterField("Electric field", + &interface.Field(TPS::Tps2Boltzmann::Index::ElectricField)); + paraview_dc.RegisterField("Species", + &interface.Field(TPS::Tps2Boltzmann::Index::SpeciesDensities)); + paraview_dc.RegisterField("Reaction rates", + &interface.Field(TPS::Tps2Boltzmann::Index::ReactionRates)); + paraview_dc.Save(); + } From 32877b63211535a3bec089db9b01c0410f07ed08 Mon Sep 17 00:00:00 2001 From: Umberto Villa Date: Fri, 27 Oct 2023 13:23:52 -0500 Subject: [PATCH 09/75] Fix spatial coordinates fe order --- src/tps2Boltzmann.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/tps2Boltzmann.cpp b/src/tps2Boltzmann.cpp index 5ab4564fe..a6b5800f7 100644 --- a/src/tps2Boltzmann.cpp +++ b/src/tps2Boltzmann.cpp @@ -170,7 +170,7 @@ void Tps2Boltzmann::init(M2ulPhyS *flowSolver) { scalar_interpolator_->Assemble(); // Spatial coordinates - spatial_coord_fes_ = new mfem::ParFiniteElementSpace(pmesh, fec_native, pmesh->Dimension(), mfem::Ordering::byNODES); + spatial_coord_fes_ = new mfem::ParFiniteElementSpace(pmesh, fec_, pmesh->Dimension(), mfem::Ordering::byNODES); spatial_coordinates_ = new mfem::ParGridFunction(spatial_coord_fes_); mfem::VectorFunctionCoefficient coord_fun(pmesh->Dimension(), std::function(idenity_fun)); From 0066290d690d7e4e01179d73a7c79bab407ef38d Mon Sep 17 00:00:00 2001 From: milindasf Date: Tue, 31 Oct 2023 11:01:11 -0500 Subject: [PATCH 10/75] vtk output added. --- src/tps-time-loop.py | 59 ++++++++++++++++++++++++++++++++++++-------- 1 file changed, 49 insertions(+), 10 deletions(-) diff --git a/src/tps-time-loop.py b/src/tps-time-loop.py index bf061fc7d..66bae0b18 100755 --- a/src/tps-time-loop.py +++ b/src/tps-time-loop.py @@ -10,6 +10,8 @@ import configparser import cupy as cp import enum +import pandas as pd +import scipy.interpolate class profile_t: def __init__(self,name): @@ -49,6 +51,19 @@ def min_mean_max(a, comm: MPI.Comm): return (comm.allreduce(a, MPI.MIN) , comm.allreduce(a, MPI.SUM)/comm.Get_size(), comm.allreduce(a, MPI.MAX)) +try: + df = pd.read_csv("ionization_rates.csv") + Te = np.array(df["Te[K]"]) + r_arr = np.array(df["Arr[m3/s]"]) + r_csc = np.array(df["CSC_Maxwellian[m3/s]"]) + r_arr = scipy.interpolate.interp1d(Te, r_arr,bounds_error=False, fill_value=0.0) + r_csc = scipy.interpolate.interp1d(Te, r_csc,bounds_error=False, fill_value=0.0) + print("ionization coefficient read from file ") +except: + print("ionization rate coefficient file not found!!") + r_arr = lambda Te : 1.235e-13 * np.exp(-18.687 / np.abs(Te * scipy.constants.Boltzmann/scipy.constants.electron_volt)) + r_csc = lambda Te : 1.235e-13 * np.exp(-18.687 / np.abs(Te * scipy.constants.Boltzmann/scipy.constants.electron_volt)) + # set path to C++ TPS library path = os.path.abspath(os.path.dirname(sys.argv[0])) sys.path.append(path + "/.libs") @@ -624,21 +639,41 @@ def t1(): writer.writerows(data_csv) def push(self, interface): - Te = np.array(interface.HostWrite(libtps.t2bIndex.ElectronTemperature), copy=False) - rate_coeff = np.array(interface.HostWrite(libtps.t2bIndex.ReactionRates), copy=False).reshape((2, self.tps_npts)) + xp = self.xp_module + Te_bte = xp.array(interface.HostWrite(libtps.t2bIndex.ElectronTemperature), copy=False) + rate_bte = xp.array(interface.HostWrite(libtps.t2bIndex.ReactionRates), copy=False).reshape((2, self.tps_npts)) + Te_tps = xp.array(interface.HostRead(libtps.t2bIndex.ElectronTemperature), copy=False) - gidx_to_pidx_map = self.grid_idx_to_spatial_idx_map + species_densities = xp.array(interface.HostRead(libtps.t2bIndex.SpeciesDensities), copy=False).reshape(3, self.tps_npts) + ni = species_densities[TPSINDEX.ION_IDX] + n0 = species_densities[TPSINDEX.NEU_IDX] + ne = species_densities[TPSINDEX.ELE_IDX] + + rate_tps_arr = r_arr(Te_tps) + rate_tps_csc = r_csc(Te_tps) + + rr_bte = np.zeros_like(rate_tps_arr) + gidx_to_pidx_map = self.grid_idx_to_spatial_idx_map for grid_idx in range(self.param.n_grids): - Te[gidx_to_pidx_map[grid_idx]] = self.qoi[grid_idx]["energy"]/1.5 + Te_bte[gidx_to_pidx_map[grid_idx]] = (self.qoi[grid_idx]["energy"]/1.5) * self.param.ev_to_K rr = self.qoi[grid_idx]["rates"] # here rr should be in the same ordering as the collision model prescribed to the Boltzmann solver. - - rate_coeff[0][gidx_to_pidx_map[grid_idx]] = rr[0] - rate_coeff[1][gidx_to_pidx_map[grid_idx]] = rr[1] - - rate_coeff[1][rate_coeff[1]<0] = 0.0 - + rr_bte[gidx_to_pidx_map[grid_idx]] = rr[1] + + rr_bte[rr_bte<0] = 0.0 + s0 = rate_tps_arr * n0 + s1 = rate_tps_csc * n0 + + s2 = rr_bte * n0 + + tau = 1e-2 + idx = s2 > tau + rate_bte[0][:] = 0.0 + rate_bte[1][:] = 0.0 + rate_bte[0] = rr_bte + rate_bte[1][idx] = np.abs(1 - s1[idx]/s2[idx]) + return @@ -660,6 +695,9 @@ def push(self, interface): interface = libtps.Tps2Boltzmann(tps) tps.initInterface(interface) +coords = np.array(interface.HostReadSpatialCoordinates(), copy=False) +print(coords.shape) + it = 0 max_iters = tps.getRequiredInput("cycle-avg-joule-coupled/max-iters") print("Max Iters: ", max_iters) @@ -670,6 +708,7 @@ def push(self, interface): boltzmann.fetch(interface) boltzmann.solve() boltzmann.push(interface) +tps.fetch(interface) # while it < max_iters: # tps.solveStep() From a11bfa5feab64c96b3a5f849cefd2db2d44b2ead Mon Sep 17 00:00:00 2001 From: milindasf Date: Tue, 31 Oct 2023 15:11:19 -0500 Subject: [PATCH 11/75] Te clusters are sorted based on emax --- src/tps-time-loop.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/tps-time-loop.py b/src/tps-time-loop.py index 66bae0b18..db1d01b3e 100755 --- a/src/tps-time-loop.py +++ b/src/tps-time-loop.py @@ -230,7 +230,7 @@ def grid_setup(self, interface): if rel_error < 1e-4: break - + Te_b = np.sort(Te_b) print("K-means Te clusters ", Te_b) for i in range(self.param.n_grids): dist_mat[:,i] = xp.abs(Te-Te_b[i]) From 837566f0254e2cfe2680afe3b190e77414f57f9e Mon Sep 17 00:00:00 2001 From: milindasf Date: Wed, 1 Nov 2023 09:17:54 -0500 Subject: [PATCH 12/75] rel error computation updated --- src/tps-time-loop.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/tps-time-loop.py b/src/tps-time-loop.py index db1d01b3e..7eb206148 100755 --- a/src/tps-time-loop.py +++ b/src/tps-time-loop.py @@ -652,7 +652,7 @@ def push(self, interface): rate_tps_arr = r_arr(Te_tps) rate_tps_csc = r_csc(Te_tps) - rr_bte = np.zeros_like(rate_tps_arr) + rr_bte = xp.zeros_like(rate_tps_arr) gidx_to_pidx_map = self.grid_idx_to_spatial_idx_map for grid_idx in range(self.param.n_grids): @@ -662,17 +662,17 @@ def push(self, interface): rr_bte[gidx_to_pidx_map[grid_idx]] = rr[1] rr_bte[rr_bte<0] = 0.0 - s0 = rate_tps_arr * n0 - s1 = rate_tps_csc * n0 + s0 = rate_tps_arr * n0 * ni + s1 = rate_tps_csc * n0 * ni - s2 = rr_bte * n0 + s2 = rr_bte * n0 * ni tau = 1e-2 idx = s2 > tau rate_bte[0][:] = 0.0 rate_bte[1][:] = 0.0 rate_bte[0] = rr_bte - rate_bte[1][idx] = np.abs(1 - s1[idx]/s2[idx]) + rate_bte[1][idx] = xp.abs(s2-s1)/xp.max(s2) return From 19e08fc405892779d7821a9606a8aed5774f214e Mon Sep 17 00:00:00 2001 From: milindasf Date: Wed, 1 Nov 2023 09:36:43 -0500 Subject: [PATCH 13/75] minor fix --- src/tps-time-loop.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/tps-time-loop.py b/src/tps-time-loop.py index 7eb206148..ca6f2670f 100755 --- a/src/tps-time-loop.py +++ b/src/tps-time-loop.py @@ -667,12 +667,12 @@ def push(self, interface): s2 = rr_bte * n0 * ni - tau = 1e-2 - idx = s2 > tau + # tau = 1e-2 + # idx = s2 > tau rate_bte[0][:] = 0.0 rate_bte[1][:] = 0.0 rate_bte[0] = rr_bte - rate_bte[1][idx] = xp.abs(s2-s1)/xp.max(s2) + rate_bte[1] = xp.abs(s2-s1)/xp.max(s2) return From 6da87b18cb5732394fcd8904276a3562ecf3ac82 Mon Sep 17 00:00:00 2001 From: Umberto Villa Date: Fri, 5 Jan 2024 09:32:18 -0600 Subject: [PATCH 14/75] Add the dof index to the reaction interface/rename the the python bte solver --- src/M2ulPhyS.cpp | 2 +- src/chemistry.cpp | 8 +- src/chemistry.hpp | 6 +- src/reaction.cpp | 3 + src/reaction.hpp | 4 + src/source_term.cpp | 2 +- src/tps-bte_0d3v.py | 727 +++++++++++++++++++++++++++++++++++++++++++ src/tps-time-loop.py | 710 ++---------------------------------------- test/test_table.cpp | 2 +- 9 files changed, 772 insertions(+), 692 deletions(-) create mode 100755 src/tps-bte_0d3v.py diff --git a/src/M2ulPhyS.cpp b/src/M2ulPhyS.cpp index a8aadc86d..da5d4eee7 100644 --- a/src/M2ulPhyS.cpp +++ b/src/M2ulPhyS.cpp @@ -4184,7 +4184,7 @@ void M2ulPhyS::updateVisualizationVariables() { Th = prim[1 + _nvel]; Te = (in_mix->IsTwoTemperature()) ? prim[_num_equation - 1] : Th; double kfwd[gpudata::MAXREACTIONS], kC[gpudata::MAXREACTIONS]; - in_chem->computeForwardRateCoeffs(Th, Te, kfwd); + in_chem->computeForwardRateCoeffs(Th, Te, n, kfwd); in_chem->computeEquilibriumConstants(Th, Te, kC); // get reaction rates double progressRates[gpudata::MAXREACTIONS]; diff --git a/src/chemistry.cpp b/src/chemistry.cpp index ea5e0c597..52e1744bd 100644 --- a/src/chemistry.cpp +++ b/src/chemistry.cpp @@ -105,6 +105,7 @@ MFEM_HOST_DEVICE Chemistry::~Chemistry() { } } +#if 0 void Chemistry::computeForwardRateCoeffs(const double &T_h, const double &T_e, Vector &kfwd) { kfwd.SetSize(numReactions_); computeForwardRateCoeffs(T_h, T_e, &kfwd[0]); @@ -117,19 +118,21 @@ void Chemistry::computeForwardRateCoeffs(const double &T_h, const double &T_e, V return; } +#endif -MFEM_HOST_DEVICE void Chemistry::computeForwardRateCoeffs(const double &T_h, const double &T_e, double *kfwd) { +MFEM_HOST_DEVICE void Chemistry::computeForwardRateCoeffs(const double &T_h, const double &T_e, const int & dofindex, double *kfwd) { // kfwd.SetSize(numReactions_); for (int r = 0; r < numReactions_; r++) kfwd[r] = 0.0; for (int r = 0; r < numReactions_; r++) { bool isElectronInvolved = isElectronInvolvedAt(r); - kfwd[r] = reactions_[r]->computeRateCoefficient(T_h, T_e, isElectronInvolved); + kfwd[r] = reactions_[r]->computeRateCoefficient(T_h, T_e, dofindex, isElectronInvolved); } return; } +#if 0 // NOTE: if not detailedBalance, equilibrium constant is returned as zero, though it cannot be used. void Chemistry::computeEquilibriumConstants(const double &T_h, const double &T_e, Vector &kC) { kC.SetSize(numReactions_); @@ -147,6 +150,7 @@ void Chemistry::computeEquilibriumConstants(const double &T_h, const double &T_e return; } +#endif MFEM_HOST_DEVICE void Chemistry::computeEquilibriumConstants(const double &T_h, const double &T_e, double *kC) { for (int r = 0; r < numReactions_; r++) kC[r] = 0.0; diff --git a/src/chemistry.hpp b/src/chemistry.hpp index 8a18e87b9..6b6af72f8 100644 --- a/src/chemistry.hpp +++ b/src/chemistry.hpp @@ -96,10 +96,10 @@ class Chemistry { // return Vector of reaction rate coefficients, with the size of numReaction_. // WARNING(marc) I have removed "virtual" qualifier here assuming these functions will not // change for child classes. Correct if wrong - void computeForwardRateCoeffs(const double &T_h, const double &T_e, Vector &kfwd); - MFEM_HOST_DEVICE void computeForwardRateCoeffs(const double &T_h, const double &T_e, double *kfwd); + //void computeForwardRateCoeffs(const double &T_h, const double &T_e, Vector &kfwd); + MFEM_HOST_DEVICE void computeForwardRateCoeffs(const double &T_h, const double &T_e, const int & dofindex, double *kfwd); - void computeEquilibriumConstants(const double &T_h, const double &T_e, Vector &kC); + //void computeEquilibriumConstants(const double &T_h, const double &T_e, Vector &kC); MFEM_HOST_DEVICE void computeEquilibriumConstants(const double &T_h, const double &T_e, double *kC); // return rate coefficients of (reactionIndex)-th reaction. (start from 0) diff --git a/src/reaction.cpp b/src/reaction.cpp index 54c8baa28..448d25515 100644 --- a/src/reaction.cpp +++ b/src/reaction.cpp @@ -39,6 +39,7 @@ MFEM_HOST_DEVICE Arrhenius::Arrhenius(const double &A, const double &b, const do : Reaction(), A_(A), b_(b), E_(E) {} MFEM_HOST_DEVICE double Arrhenius::computeRateCoefficient(const double &T_h, const double &T_e, + [[maybe_unused]] const int & dofindex, const bool isElectronInvolved) { double temp = (isElectronInvolved) ? T_e : T_h; @@ -49,6 +50,7 @@ MFEM_HOST_DEVICE HoffertLien::HoffertLien(const double &A, const double &b, cons : Reaction(), A_(A), b_(b), E_(E) {} MFEM_HOST_DEVICE double HoffertLien::computeRateCoefficient(const double &T_h, const double &T_e, + [[maybe_unused]] const int & dofindex, const bool isElectronInvolved) { double temp = (isElectronInvolved) ? T_e : T_h; double tempFactor = E_ / BOLTZMANNCONSTANT / temp; @@ -71,6 +73,7 @@ MFEM_HOST_DEVICE Tabulated::Tabulated(const TableInput &input) : Reaction() { MFEM_HOST_DEVICE Tabulated::~Tabulated() { delete table_; } MFEM_HOST_DEVICE double Tabulated::computeRateCoefficient(const double &T_h, const double &T_e, + [[maybe_unused]] const int & dofindex, const bool isElectronInvolved) { double temp = (isElectronInvolved) ? T_e : T_h; return table_->eval(temp); diff --git a/src/reaction.hpp b/src/reaction.hpp index a8507c2f2..4571c7161 100644 --- a/src/reaction.hpp +++ b/src/reaction.hpp @@ -58,6 +58,7 @@ class Reaction { MFEM_HOST_DEVICE virtual ~Reaction() {} MFEM_HOST_DEVICE virtual double computeRateCoefficient(const double &T_h, const double &T_e, + [[maybe_unused]] const int & dofindex, const bool isElectronInvolved = false) { printf("computeRateCoefficient not implemented"); return 0; @@ -77,6 +78,7 @@ class Arrhenius : public Reaction { MFEM_HOST_DEVICE virtual ~Arrhenius() {} MFEM_HOST_DEVICE virtual double computeRateCoefficient(const double &T_h, const double &T_e, + [[maybe_unused]] const int & dofindex, const bool isElectronInvolved = false); }; @@ -97,6 +99,7 @@ class HoffertLien : public Reaction { MFEM_HOST_DEVICE virtual ~HoffertLien() {} MFEM_HOST_DEVICE virtual double computeRateCoefficient(const double &T_h, const double &T_e, + [[maybe_unused]] const int & dofindex, const bool isElectronInvolved = false); }; @@ -110,6 +113,7 @@ class Tabulated : public Reaction { MFEM_HOST_DEVICE virtual ~Tabulated(); MFEM_HOST_DEVICE virtual double computeRateCoefficient(const double &T_h, const double &T_e, + [[maybe_unused]] const int & dofindex, const bool isElectronInvolved = false); }; diff --git a/src/source_term.cpp b/src/source_term.cpp index bbc7bcbdc..4c585f06c 100644 --- a/src/source_term.cpp +++ b/src/source_term.cpp @@ -162,7 +162,7 @@ void SourceTerm::updateTerms(mfem::Vector &in) { double progressRates[gpudata::MAXREACTIONS], creationRates[gpudata::MAXSPECIES]; if (_numSpecies > 1 && _numReactions > 0) { double kfwd[gpudata::MAXREACTIONS], kC[gpudata::MAXREACTIONS]; - _chemistry->computeForwardRateCoeffs(Th, Te, kfwd); + _chemistry->computeForwardRateCoeffs(Th, Te, n, kfwd); _chemistry->computeEquilibriumConstants(Th, Te, kC); // get reaction rates diff --git a/src/tps-bte_0d3v.py b/src/tps-bte_0d3v.py new file mode 100755 index 000000000..ca6f2670f --- /dev/null +++ b/src/tps-bte_0d3v.py @@ -0,0 +1,727 @@ +#!/usr/bin/env python3 +import sys +import os +from mpi4py import MPI +import numpy as np +import scipy.constants +import csv +import matplotlib.pyplot as plt +from time import perf_counter as time +import configparser +import cupy as cp +import enum +import pandas as pd +import scipy.interpolate + +class profile_t: + def __init__(self,name): + self.name = name + self.seconds=0 + self.snap=0 + self._pri_time =0 + self.iter =0 + + def __add__(self,o): + assert(self.name==o.name) + self.seconds+=o.seconds + self.snap+=o.snap + self.iter+=o.iter + return self + + def start(self): + self._pri_time = time() + + def stop(self): + self.seconds-=self._pri_time + self.snap=-self._pri_time + + self._pri_time = time() + + self.seconds +=self._pri_time + self.snap += self._pri_time + self.iter+=1 + + def reset(self): + self.seconds=0 + self.snap=0 + self._pri_time =0 + self.iter =0 + +def min_mean_max(a, comm: MPI.Comm): + return (comm.allreduce(a, MPI.MIN) , comm.allreduce(a, MPI.SUM)/comm.Get_size(), comm.allreduce(a, MPI.MAX)) + + +try: + df = pd.read_csv("ionization_rates.csv") + Te = np.array(df["Te[K]"]) + r_arr = np.array(df["Arr[m3/s]"]) + r_csc = np.array(df["CSC_Maxwellian[m3/s]"]) + r_arr = scipy.interpolate.interp1d(Te, r_arr,bounds_error=False, fill_value=0.0) + r_csc = scipy.interpolate.interp1d(Te, r_csc,bounds_error=False, fill_value=0.0) + print("ionization coefficient read from file ") +except: + print("ionization rate coefficient file not found!!") + r_arr = lambda Te : 1.235e-13 * np.exp(-18.687 / np.abs(Te * scipy.constants.Boltzmann/scipy.constants.electron_volt)) + r_csc = lambda Te : 1.235e-13 * np.exp(-18.687 / np.abs(Te * scipy.constants.Boltzmann/scipy.constants.electron_volt)) + +# set path to C++ TPS library +path = os.path.abspath(os.path.dirname(sys.argv[0])) +sys.path.append(path + "/.libs") +sys.path.append(path + "/../../boltzmann/BESolver/python") +import libtps +from bte_0d3v_batched import bte_0d3v_batched as BoltzmannSolver + +WITH_PARLA = 1 +if WITH_PARLA: + try: + from parla import Parla + from parla.tasks import spawn, TaskSpace + from parla.devices import cpu, gpu + except: + print("Error occured during Parla import. Please make sure Parla is installed properly.") + sys.exit(0) + + +class pp(enum.IntEnum): + SETUP = 0 + SOLVE = 1 + LAST = 2 + +class BoltzmannSolverParams(): + sp_order = 3 # B-spline order in v-space + spline_qpts = 5 # number of Gauss-Legendre quadrature points per knot interval + Nr = 127 # number of B-splines used in radial direction + l_max = 1 # spherical modes uses, 0, to l_max + ev_max = 16 # v-space grid truncation (eV) + n_grids = 4 # number of v-space grids + + dt = 1e-3 # [] non-dimentionalized time w.r.t. oscilation period + cycles = 10 # number of max cycles to evolve + solver_type = "transient" # two modes, "transient" or "steady-state" + atol = 1e-10 # absolute tolerance + rtol = 1e-10 # relative tolerance + max_iter = 1000 # max iterations for the newton solver + + ee_collisions = 0 # enable electron-electron Coulombic effects + use_gpu = 1 # enable GPU use (1)-GPU solver, (0)-CPU solver + dev_id = 0 # which GPU device to use only used when use_gpu=1 + + collisions = ["g0","g2"] # collision string g0-elastic, g2-ionization + export_csv = 1 # export the qois to csv file + plot_data = 1 + + Efreq = 0.0 #[1/s] # E-field osicllation frequency + verbose = 1 # verbose output for the BTE solver + Te = 0.5 #[eV] # approximate electron temperature + + threads = 16 # number of threads to use to assemble operators + grid_idx = 0 + + output_dir = "batched_bte1" + out_fname = output_dir + "/tps" + + # some useful units and conversion factors. + ev_to_K = (scipy.constants.electron_volt/scipy.constants.Boltzmann) + Td_fac = 1e-21 #[Vm^2] + c_gamma = np.sqrt(2 * scipy.constants.elementary_charge / scipy.constants.electron_mass) #[(C/kg)^{1/2}] + me = scipy.constants.electron_mass + kB = scipy.constants.Boltzmann + +class TPSINDEX(): + """ + simple index map to differnt fields, from the TPS arrays + """ + ION_IDX = 0 # ion density index + ELE_IDX = 1 # electron density index + NEU_IDX = 2 # neutral density index + + EF_RE_IDX = 0 # Re(E) index + EF_IM_IDX = 1 # Im(E) index + +class Boltzmann0D2VBactchedSolver: + + def __init__(self, tps, comm): + self.tps = tps + self.comm : MPI.Comm = comm + self.param = BoltzmannSolverParams() + # overide the default params, based on the config.ini file. + self.parse_config_file(sys.argv[2]) + + self.xp_module = np + + boltzmann_dir = self.param.output_dir + isExist = os.path.exists(boltzmann_dir) + if not isExist: + # Create a new directory because it does not exist + os.makedirs(boltzmann_dir) + #print("directory %s is created!"%(dir_name)) + + profile_tt = [None] * int(pp.LAST) + profile_nn = ["setup", "solve", "last"] + for i in range(pp.LAST): + profile_tt[i] = profile_t(profile_nn[i]) + + self.profile_tt = profile_tt + self.profile_nn = profile_nn + + return + + def parse_config_file(self, fname): + """ + add the configuaraion file parse code here, + which overides the default BoltzmannSolverParams + """ + config = configparser.ConfigParser() + print("[Boltzmann] reading configure file given by : ", fname) + config.read(fname) + + self.param.sp_order = int(config.get("boltzmannSolver", "sp_order").split("#")[0].strip()) + self.param.spline_qpts = int(config.get("boltzmannSolver", "spline_qpts").split("#")[0].strip()) + + self.param.Nr = int(config.get("boltzmannSolver", "Nr").split("#")[0].strip()) + self.param.l_max = int(config.get("boltzmannSolver", "l_max").split("#")[0].strip()) + self.param.n_grids = int(config.get("boltzmannSolver", "n_grids").split("#")[0].strip()) + self.param.dt = float(config.get("boltzmannSolver", "dt").split("#")[0].strip()) + self.param.cycles = float(config.get("boltzmannSolver", "cycles").split("#")[0].strip()) + self.param.solver_type = str(config.get("boltzmannSolver", "solver_type").split("#")[0].strip()) + self.param.atol = float(config.get("boltzmannSolver", "atol").split("#")[0].strip()) + self.param.rtol = float(config.get("boltzmannSolver", "rtol").split("#")[0].strip()) + self.param.max_iter = int(config.get("boltzmannSolver", "max_iter").split("#")[0].strip()) + self.param.ee_collisions = int(config.get("boltzmannSolver", "ee_collisions").split("#")[0].strip()) + self.param.use_gpu = int(config.get("boltzmannSolver", "use_gpu").split("#")[0].strip()) + #self.param.collisions = config.get("boltzmannSolver", "collisions").split("#")[0] + + self.param.export_csv = int(config.get("boltzmannSolver", "export_csv").split("#")[0].strip()) + self.param.plot_data = int(config.get("boltzmannSolver", "plot_data").split("#")[0].strip()) + self.param.Efreq = float(config.get("boltzmannSolver", "Efreq").split("#")[0].strip()) + self.param.verbose = int(config.get("boltzmannSolver", "verbose").split("#")[0].strip()) + self.param.Te = float(config.get("boltzmannSolver", "Te").split("#")[0].strip()) + + self.param.threads = int(config.get("boltzmannSolver", "threads").split("#")[0].strip()) + self.param.output_dir = str(config.get("boltzmannSolver", "output_dir").split("#")[0].strip()) + self.param.out_fname = self.param.output_dir + "/" + str(config.get("boltzmannSolver", "output_fname").split("#")[0].strip()) + return + + def grid_setup(self, interface): + """ + Perform the boltzmann grid setup. + we generate v-space grid for each spatial point cluster in the parameter space, + where, at the moment the clustering is determined based on the electron temperature + computed from the TPS code. + """ + + self.profile_tt[pp.SETUP].start() + + xp = self.xp_module + Te = xp.array(interface.HostRead(libtps.t2bIndex.ElectronTemperature), copy=False) / self.param.ev_to_K # [eV] + Te_min, Te_max = xp.min(Te), xp.max(Te) + Te_b = xp.linspace(Te_min, Te_max, self.param.n_grids, endpoint=False) + dist_mat = xp.zeros((len(Te), self.param.n_grids)) + + for iter in range(50): + #print("clustering iteration ", iter, Te_b) + for i in range(self.param.n_grids): + dist_mat[:,i] = xp.abs(Te-Te_b[i]) + + membership = xp.argmin(dist_mat, axis=1) + Te_b1 = np.array([np.mean(Te[xp.argwhere(membership==i)[:,0]]) for i in range(self.param.n_grids)]) + rel_error = np.max(np.abs(1 - Te_b1/Te_b)) + Te_b = Te_b1 + + if rel_error < 1e-4: + break + Te_b = np.sort(Te_b) + print("K-means Te clusters ", Te_b) + for i in range(self.param.n_grids): + dist_mat[:,i] = xp.abs(Te-Te_b[i]) + + membership = xp.argmin(dist_mat, axis=1) + grid_idx_to_spatial_pts_map = list() + for b_idx in range(self.param.n_grids): + #grid_idx_to_spatial_pts_map.append(xp.argwhere(xp.logical_and(Te>= Te_b[b_idx], Te < Te_b[b_idx+1]))[:,0]) + grid_idx_to_spatial_pts_map.append(xp.argwhere(membership==b_idx)[:,0]) + + np.save("%s_gidx_to_pidx.npy"%(self.param.out_fname), np.array(grid_idx_to_spatial_pts_map, dtype=object), allow_pickle=True) + + self.grid_idx_to_npts = xp.array([len(a) for a in grid_idx_to_spatial_pts_map], dtype=xp.int32) + self.grid_idx_to_spatial_idx_map = grid_idx_to_spatial_pts_map + + xp.sum(self.grid_idx_to_npts) == len(Te), "[Error] : TPS spatial points for v-space grid assignment is inconsitant" + lm_modes = [[[l,0] for l in range(self.param.l_max+1)] for grid_idx in range(self.param.n_grids)] + nr = xp.ones(self.param.n_grids, dtype=np.int32) * self.param.Nr + Te = xp.array([Te_b[b_idx] for b_idx in range(self.param.n_grids)]) # xp.ones(self.param.n_grids) * self.param.Te + vth = np.sqrt(2* self.param.kB * Te * self.param.ev_to_K /self.param.me) + ev_max = (6 * vth / self.param.c_gamma)**2 + self.bte_solver = BoltzmannSolver(self.param, ev_max ,Te , nr, lm_modes, self.param.n_grids, self.param.collisions) + + if self.param.verbose==1: + print("grid energy max (eV) \n", ev_max, flush = True) + + # compute BTE operators + for grid_idx in range(self.param.n_grids): + print("setting up grid %d"%(grid_idx), flush = True) + self.bte_solver.assemble_operators(grid_idx) + + self.profile_tt[pp.SETUP].stop() + return + + def fetch(self, interface): + xp = self.xp_module + gidx_to_pidx_map = self.grid_idx_to_spatial_idx_map + + heavy_temp = xp.array(interface.HostRead(libtps.t2bIndex.HeavyTemperature), copy=False) + tps_npts = len(heavy_temp) + self.tps_npts = tps_npts + + electron_temp = xp.array(interface.HostRead(libtps.t2bIndex.ElectronTemperature), copy=False) + efield = xp.array(interface.HostRead(libtps.t2bIndex.ElectricField), copy=False).reshape((2, tps_npts)) + species_densities = xp.array(interface.HostRead(libtps.t2bIndex.SpeciesDensities), copy=False).reshape(3, tps_npts) + + for grid_idx in range(self.param.n_grids): + bte_idx = gidx_to_pidx_map[grid_idx] + ni = species_densities[TPSINDEX.ION_IDX][bte_idx] + ne = species_densities[TPSINDEX.ELE_IDX][bte_idx] + n0 = species_densities[TPSINDEX.NEU_IDX][bte_idx] + Tg = heavy_temp[bte_idx] + Te = electron_temp[bte_idx] + + + eRe = efield[TPSINDEX.EF_RE_IDX][bte_idx] + eIm = efield[TPSINDEX.EF_IM_IDX][bte_idx] + eMag = np.sqrt(eRe**2 + eIm **2) + eByn0 = eMag/n0/self.param.Td_fac + + if self.param.verbose == 1 : + print("Boltzmann solver inputs for v-space grid id %d"%(grid_idx)) + print("Efreq = %.4E [1/s]" %(self.param.Efreq)) + print("n_pts = %d" % self.grid_idx_to_npts[grid_idx]) + + print("E/n0 (min) = %.12E [Td] \t E/n0 (max) = %.12E [Td] "%(np.min(eByn0), np.max(eByn0))) + print("Tg (min) = %.12E [K] \t Tg (max) = %.12E [K] "%(np.min(Tg), np.max(Tg))) + print("Te (min) = %.12E [K] \t Te (max) = %.12E [K] "%(np.min(Te), np.max(Te))) + + print("ne (min) = %.12E [1/m^3] \t ne (max) = %.12E [1/m^3] "%(np.min(ne), np.max(ne))) + print("ni (min) = %.12E [1/m^3] \t ni (max) = %.12E [1/m^3] "%(np.min(ni), np.max(ni))) + print("n0 (min) = %.12E [1/m^3] \t n0 (max) = %.12E [1/m^3] "%(np.min(n0), np.max(n0))) + + #self.bte_solver.set_boltzmann_parameters(grid_idx, n0, ne, ni, Tg, self.param.solver_type) + self.bte_solver.set_boltzmann_parameter(grid_idx, "n0", n0) + self.bte_solver.set_boltzmann_parameter(grid_idx, "ne", ne) + self.bte_solver.set_boltzmann_parameter(grid_idx, "ni", ni) + self.bte_solver.set_boltzmann_parameter(grid_idx, "Tg", Tg) + self.bte_solver.set_boltzmann_parameter(grid_idx, "eRe", eRe) + self.bte_solver.set_boltzmann_parameter(grid_idx, "eIm", eRe) + + return + + def solve(self): + """ + perform the BTE solve, supports both stead-state solution (static E-field) + and time-periodic solutions for the oscillatory E-fields + """ + + if WITH_PARLA==1: + self.solve_with_parla() + return + else: + self.solve_seq() + return + + def solve_seq(self): + xp = self.xp_module + csv_write = self.param.export_csv + gidx_to_pidx_map = self.grid_idx_to_spatial_idx_map + + self.qoi = [None for grid_idx in range(self.param.n_grids)] + self.ff = [None for grid_idx in range(self.param.n_grids)] + + if csv_write ==1 : + data_csv = np.empty((self.tps_npts, 8 + len(self.param.collisions))) + + t1 = time() + + for grid_idx in range(self.param.n_grids): + + if self.grid_idx_to_npts[grid_idx] ==0: + continue + + if self.param.verbose==1: + print("setting initial Maxwellian at %.4E eV" %(self.bte_solver._par_ap_Te[grid_idx]), flush=True) + f0 = self.bte_solver.initialize(grid_idx, self.grid_idx_to_npts[grid_idx], "maxwellian") + self.bte_solver.set_boltzmann_parameter(grid_idx, "f0", f0) + + if self.param.use_gpu==1: + dev_id = self.param.dev_id + self.bte_solver.host_to_device_setup(dev_id, grid_idx) + + with cp.cuda.Device(dev_id): + eRe_d = self.bte_solver.get_boltzmann_parameter(grid_idx, "eRe") + eIm_d = self.bte_solver.get_boltzmann_parameter(grid_idx, "eIm") + + if self.param.Efreq == 0: + ef_t = lambda t : xp.sqrt(eRe_d**2 + eIm_d**2) + else: + ef_t = lambda t : eRe_d * xp.cos(2 * xp.pi * self.param.Efreq * t) + eIm_d * xp.sin(2 * xp.pi * self.param.Efreq * t) + + else: + eRe_d = self.bte_solver.get_boltzmann_parameter(grid_idx, "eRe") + eIm_d = self.bte_solver.get_boltzmann_parameter(grid_idx, "eIm") + + if self.param.Efreq == 0: + ef_t = lambda t : xp.sqrt(eRe_d**2 + eIm_d**2) + else: + ef_t = lambda t : eRe_d * xp.cos(2 * xp.pi * self.param.Efreq * t) + eIm_d * xp.sin(2 * xp.pi * self.param.Efreq * t) + + self.bte_solver.set_efield_function(grid_idx, ef_t) + f0 = self.bte_solver.get_boltzmann_parameter(grid_idx, "f0") + try: + ff , qoi = self.bte_solver.solve(grid_idx, f0, self.param.atol, self.param.rtol, self.param.max_iter, self.param.solver_type) + self.qoi[grid_idx] = qoi + self.ff [grid_idx] = ff + except: + print("solver failed for v-space gird no %d"%(grid_idx)) + # self.qoi.append(None) + # continue + sys.exit(0) + + if self.param.export_csv ==0 and self.param.plot_data==0: + continue + + ev = np.linspace(1e-3, self.bte_solver._par_ev_range[grid_idx][1], 500) + ff_r = self.bte_solver.compute_radial_components(grid_idx, ev, ff) + + if self.param.use_gpu==1: + self.bte_solver.device_to_host_setup(self.param.dev_id,grid_idx) + + with cp.cuda.Device(dev_id): + ff_r = cp.asnumpy(ff_r) + for k, v in qoi.items(): + qoi[k] = cp.asnumpy(v) + + if csv_write==1: + data_csv[gidx_to_pidx_map[grid_idx], 0] = self.bte_solver.get_boltzmann_parameter(grid_idx, "n0") + data_csv[gidx_to_pidx_map[grid_idx], 1] = self.bte_solver.get_boltzmann_parameter(grid_idx, "ne") + data_csv[gidx_to_pidx_map[grid_idx], 2] = self.bte_solver.get_boltzmann_parameter(grid_idx, "ni") + data_csv[gidx_to_pidx_map[grid_idx], 3] = self.bte_solver.get_boltzmann_parameter(grid_idx, "Tg") + data_csv[gidx_to_pidx_map[grid_idx], 4] = np.sqrt(self.bte_solver.get_boltzmann_parameter(grid_idx, "eRe")**2 + self.bte_solver.get_boltzmann_parameter(grid_idx, "eIm")**2) + data_csv[gidx_to_pidx_map[grid_idx], 5] = qoi["energy"] + data_csv[gidx_to_pidx_map[grid_idx], 6] = qoi["mobility"] + data_csv[gidx_to_pidx_map[grid_idx], 7] = qoi["diffusion"] + + for col_idx, g in enumerate(self.param.collisions): + data_csv[gidx_to_pidx_map[grid_idx], 8 + col_idx] = qoi["rates"][col_idx] + + plot_data = self.param.plot_data + if plot_data: + + n0 = self.bte_solver.get_boltzmann_parameter(grid_idx, "n0") + ne = self.bte_solver.get_boltzmann_parameter(grid_idx, "ne") + ni = self.bte_solver.get_boltzmann_parameter(grid_idx, "ni") + Tg = self.bte_solver.get_boltzmann_parameter(grid_idx, "Tg") + + eRe = self.bte_solver.get_boltzmann_parameter(grid_idx, "eRe") + eIm = self.bte_solver.get_boltzmann_parameter(grid_idx, "eIm") + eMag = np.sqrt(eRe**2 + eIm**2) + + num_sh = len(self.bte_solver._par_lm[grid_idx]) + num_subplots = num_sh + num_plt_cols = min(num_sh, 4) + num_plt_rows = np.int64(np.ceil(num_subplots/num_plt_cols)) + fig = plt.figure(figsize=(num_plt_cols * 8 + 0.5*(num_plt_cols-1), num_plt_rows * 8 + 0.5*(num_plt_rows-1)), dpi=300, constrained_layout=True) + plt_idx = 1 + n_pts_step = self.grid_idx_to_npts[grid_idx] // 20 + + for lm_idx, lm in enumerate(self.bte_solver._par_lm[grid_idx]): + plt.subplot(num_plt_rows, num_plt_cols, plt_idx) + for ii in range(0, self.grid_idx_to_npts[grid_idx], n_pts_step): + fr = np.abs(ff_r[ii, lm_idx, :]) + plt.semilogy(ev, fr, label=r"$T_g$=%.2E [K], $E/n_0$=%.2E [Td], $n_e/n_0$ = %.2E "%(Tg[ii], eMag[ii]/n0[ii]/1e-21, ne[ii]/n0[ii])) + + plt.xlabel(r"energy (eV)") + plt.ylabel(r"$f_%d$"%(lm[0])) + plt.grid(visible=True) + if lm_idx==0: + plt.legend(prop={'size': 6}) + + plt_idx +=1 + + #plt_idx = num_sh + plt.savefig("%s_plot_%02d.png"%(self.param.out_fname, grid_idx)) + plt.close() + + t2 = time() + print("time for boltzmann v-space solve = %.4E"%(t2- t1)) + + if csv_write: + fname = self.param.out_fname + with open("%s_qoi.csv"%fname, 'w', encoding='UTF8') as f: + writer = csv.writer(f,delimiter=',') + # write the header + header = ["n0", "ne", "ni", "Tg", "E", "energy", "mobility", "diffusion"] + for col_idx, g in enumerate(self.param.collisions): + header.append(str(g)) + + writer.writerow(header) + writer.writerows(data_csv) + + return + + def solve_with_parla(self): + csv_write = self.param.export_csv + gidx_to_pidx_map = self.grid_idx_to_spatial_idx_map + self.qoi = [None for grid_idx in range(self.param.n_grids)] + self.ff = [None for grid_idx in range(self.param.n_grids)] + + if csv_write ==1 : + data_csv = np.empty((self.tps_npts, 8 + len(self.param.collisions))) + + + rank = self.comm.Get_rank() + npes = self.comm.Get_size() + + with Parla(): + num_gpus = len(gpu) + grid_to_device_map = lambda gidx : gidx % num_gpus + @spawn(placement=cpu, vcus=0) + async def __main__(): + self.profile_tt[pp.SETUP].start() + ts_0 = TaskSpace("T") + for grid_idx in range(self.param.n_grids): + @spawn(ts_0[grid_idx], placement=[cpu], vcus=0.0) + def t0(): + print("setting initial Maxwellian at %.4E eV" %(self.bte_solver._par_ap_Te[grid_idx]), flush=True) + f0 = self.bte_solver.initialize(grid_idx, self.grid_idx_to_npts[grid_idx], "maxwellian") + self.bte_solver.set_boltzmann_parameter(grid_idx, "f0", f0) + + if self.param.use_gpu == 1: + dev_id = grid_to_device_map(grid_idx) + self.bte_solver.host_to_device_setup(dev_id, grid_idx) + xp = cp + + with cp.cuda.Device(dev_id): + eRe_d = self.bte_solver.get_boltzmann_parameter(grid_idx, "eRe") + eIm_d = self.bte_solver.get_boltzmann_parameter(grid_idx, "eIm") + + if self.param.Efreq == 0: + ef_t = lambda t : xp.sqrt(eRe_d**2 + eIm_d**2) + else: + ef_t = lambda t : eRe_d * xp.cos(2 * xp.pi * self.param.Efreq * t) + eIm_d * xp.sin(2 * xp.pi * self.param.Efreq * t) + else: + xp = np + eRe_d = self.bte_solver.get_boltzmann_parameter(grid_idx, "eRe") + eIm_d = self.bte_solver.get_boltzmann_parameter(grid_idx, "eIm") + + if self.param.Efreq == 0: + ef_t = lambda t : xp.sqrt(eRe_d**2 + eIm_d**2) + else: + ef_t = lambda t : eRe_d * xp.cos(2 * xp.pi * self.param.Efreq * t) + eIm_d * xp.sin(2 * xp.pi * self.param.Efreq * t) + + self.bte_solver.set_efield_function(grid_idx, ef_t) + return + + await ts_0 + + self.profile_tt[pp.SETUP].stop() + if self.param.use_gpu==1: + p1 = [gpu(grid_to_device_map(grid_idx)) for grid_idx in range(self.param.n_grids)] + else: + p1 = [cpu for grid_idx in range(self.param.n_grids)] + + self.profile_tt[pp.SOLVE].start() + ts_1 = TaskSpace("T") + for grid_idx in range(self.param.n_grids): + @spawn(ts_1[grid_idx], placement=[p1[grid_idx]], dependencies=ts_0[grid_idx], vcus=0.0) + def t1(): + f0 = self.bte_solver.get_boltzmann_parameter(grid_idx, "f0") + print("[Boltzmann] %d / %d launching grid %d on %s"%(rank, npes, grid_idx, p1[grid_idx])) + try: + ff , qoi = self.bte_solver.solve(grid_idx, f0, self.param.atol, self.param.rtol, self.param.max_iter, self.param.solver_type) + self.ff[grid_idx] = ff + self.qoi[grid_idx] = qoi + except: + print("solver failed for v-space gird no %d"%(grid_idx)) + # self.qoi.append(None) + # continue + sys.exit(0) + + await ts_1 + self.profile_tt[pp.SOLVE].stop() + + + t1 = min_mean_max(self.profile_tt[pp.SETUP].seconds, self.comm) + t2 = min_mean_max(self.profile_tt[pp.SOLVE].seconds, self.comm) + print("[Boltzmann] setup (min) = %.4E (s) setup (mean) = %.4E (s) setup (max) = %.4E (s)" % (t1[0],t1[1],t1[2])) + print("[Boltzmann] solve (min) = %.4E (s) solve (mean) = %.4E (s) solve (max) = %.4E (s)" % (t2[0],t2[1],t2[2])) + if self.param.export_csv ==0 and self.param.plot_data==0: + return + + for grid_idx in range(self.param.n_grids): + dev_id = grid_idx % num_gpus + + if self.param.use_gpu==1: + gpu_id = cp.cuda.Device(dev_id) + gpu_id.use() + + ff = self.ff[grid_idx] + ev = np.linspace(1e-3, self.bte_solver._par_ev_range[grid_idx][1], 500) + ff_r = self.bte_solver.compute_radial_components(grid_idx, ev, ff) + + if self.param.use_gpu==1: + self.bte_solver.device_to_host_setup(self.param.dev_id,grid_idx) + + qoi = self.qoi[grid_idx] + with cp.cuda.Device(dev_id): + ff_r = cp.asnumpy(ff_r) + for k, v in qoi.items(): + qoi[k] = cp.asnumpy(v) + + if csv_write==1: + data_csv[gidx_to_pidx_map[grid_idx], 0] = self.bte_solver.get_boltzmann_parameter(grid_idx, "n0") + data_csv[gidx_to_pidx_map[grid_idx], 1] = self.bte_solver.get_boltzmann_parameter(grid_idx, "ne") + data_csv[gidx_to_pidx_map[grid_idx], 2] = self.bte_solver.get_boltzmann_parameter(grid_idx, "ni") + data_csv[gidx_to_pidx_map[grid_idx], 3] = self.bte_solver.get_boltzmann_parameter(grid_idx, "Tg") + data_csv[gidx_to_pidx_map[grid_idx], 4] = np.sqrt(self.bte_solver.get_boltzmann_parameter(grid_idx, "eRe")**2 + self.bte_solver.get_boltzmann_parameter(grid_idx, "eIm")**2) + data_csv[gidx_to_pidx_map[grid_idx], 5] = qoi["energy"] + data_csv[gidx_to_pidx_map[grid_idx], 6] = qoi["mobility"] + data_csv[gidx_to_pidx_map[grid_idx], 7] = qoi["diffusion"] + + for col_idx, g in enumerate(self.param.collisions): + data_csv[gidx_to_pidx_map[grid_idx], 8 + col_idx] = qoi["rates"][col_idx] + + plot_data = self.param.plot_data + if plot_data: + + n0 = self.bte_solver.get_boltzmann_parameter(grid_idx, "n0") + ne = self.bte_solver.get_boltzmann_parameter(grid_idx, "ne") + ni = self.bte_solver.get_boltzmann_parameter(grid_idx, "ni") + Tg = self.bte_solver.get_boltzmann_parameter(grid_idx, "Tg") + + eRe = self.bte_solver.get_boltzmann_parameter(grid_idx, "eRe") + eIm = self.bte_solver.get_boltzmann_parameter(grid_idx, "eIm") + eMag = np.sqrt(eRe**2 + eIm**2) + + num_sh = len(self.bte_solver._par_lm[grid_idx]) + num_subplots = num_sh + num_plt_cols = min(num_sh, 4) + num_plt_rows = np.int64(np.ceil(num_subplots/num_plt_cols)) + fig = plt.figure(figsize=(num_plt_cols * 8 + 0.5*(num_plt_cols-1), num_plt_rows * 8 + 0.5*(num_plt_rows-1)), dpi=300, constrained_layout=True) + plt_idx = 1 + n_pts_step = self.grid_idx_to_npts[grid_idx] // 20 + + for lm_idx, lm in enumerate(self.bte_solver._par_lm[grid_idx]): + plt.subplot(num_plt_rows, num_plt_cols, plt_idx) + for ii in range(0, self.grid_idx_to_npts[grid_idx], n_pts_step): + fr = np.abs(ff_r[ii, lm_idx, :]) + plt.semilogy(ev, fr, label=r"$T_g$=%.2E [K], $E/n_0$=%.2E [Td], $n_e/n_0$ = %.2E "%(Tg[ii], eMag[ii]/n0[ii]/1e-21, ne[ii]/n0[ii])) + + plt.xlabel(r"energy (eV)") + plt.ylabel(r"$f_%d$"%(lm[0])) + plt.grid(visible=True) + if lm_idx==0: + plt.legend(prop={'size': 6}) + + plt_idx +=1 + + #plt_idx = num_sh + plt.savefig("%s_plot_%02d.png"%(self.param.out_fname, grid_idx)) + plt.close() + + if csv_write: + fname = self.param.out_fname + with open("%s_qoi.csv"%fname, 'w', encoding='UTF8') as f: + writer = csv.writer(f,delimiter=',') + # write the header + header = ["n0", "ne", "ni", "Tg", "E", "energy", "mobility", "diffusion"] + for col_idx, g in enumerate(self.param.collisions): + header.append(str(g)) + + writer.writerow(header) + writer.writerows(data_csv) + + def push(self, interface): + xp = self.xp_module + Te_bte = xp.array(interface.HostWrite(libtps.t2bIndex.ElectronTemperature), copy=False) + rate_bte = xp.array(interface.HostWrite(libtps.t2bIndex.ReactionRates), copy=False).reshape((2, self.tps_npts)) + Te_tps = xp.array(interface.HostRead(libtps.t2bIndex.ElectronTemperature), copy=False) + + species_densities = xp.array(interface.HostRead(libtps.t2bIndex.SpeciesDensities), copy=False).reshape(3, self.tps_npts) + ni = species_densities[TPSINDEX.ION_IDX] + n0 = species_densities[TPSINDEX.NEU_IDX] + ne = species_densities[TPSINDEX.ELE_IDX] + + rate_tps_arr = r_arr(Te_tps) + rate_tps_csc = r_csc(Te_tps) + + rr_bte = xp.zeros_like(rate_tps_arr) + gidx_to_pidx_map = self.grid_idx_to_spatial_idx_map + + for grid_idx in range(self.param.n_grids): + Te_bte[gidx_to_pidx_map[grid_idx]] = (self.qoi[grid_idx]["energy"]/1.5) * self.param.ev_to_K + rr = self.qoi[grid_idx]["rates"] + # here rr should be in the same ordering as the collision model prescribed to the Boltzmann solver. + rr_bte[gidx_to_pidx_map[grid_idx]] = rr[1] + + rr_bte[rr_bte<0] = 0.0 + s0 = rate_tps_arr * n0 * ni + s1 = rate_tps_csc * n0 * ni + + s2 = rr_bte * n0 * ni + + # tau = 1e-2 + # idx = s2 > tau + rate_bte[0][:] = 0.0 + rate_bte[1][:] = 0.0 + rate_bte[0] = rr_bte + rate_bte[1] = xp.abs(s2-s1)/xp.max(s2) + + return + + + + + +comm = MPI.COMM_WORLD +# TPS solver +tps = libtps.Tps(comm) + +tps.parseCommandLineArgs(sys.argv) +tps.parseInput() +tps.chooseDevices() +tps.chooseSolver() +tps.initialize() + +boltzmann = Boltzmann0D2VBactchedSolver(tps, comm) + +interface = libtps.Tps2Boltzmann(tps) +tps.initInterface(interface) + +coords = np.array(interface.HostReadSpatialCoordinates(), copy=False) +print(coords.shape) + +it = 0 +max_iters = tps.getRequiredInput("cycle-avg-joule-coupled/max-iters") +print("Max Iters: ", max_iters) +tps.solveBegin() +tps.solveStep() +tps.push(interface) +boltzmann.grid_setup(interface) +boltzmann.fetch(interface) +boltzmann.solve() +boltzmann.push(interface) +tps.fetch(interface) + +# while it < max_iters: +# tps.solveStep() +# tps.push(interface) +# boltzmann.fetch(interface) +# boltzmann.solve() +# boltzmann.push(interface) +# tps.fetch(interface) + +# it = it+1 +# print("it, ", it) + +tps.solveEnd() + + +sys.exit (tps.getStatus()) diff --git a/src/tps-time-loop.py b/src/tps-time-loop.py index ca6f2670f..9bbc719d0 100755 --- a/src/tps-time-loop.py +++ b/src/tps-time-loop.py @@ -1,684 +1,36 @@ #!/usr/bin/env python3 import sys import os -from mpi4py import MPI import numpy as np -import scipy.constants -import csv -import matplotlib.pyplot as plt -from time import perf_counter as time -import configparser -import cupy as cp -import enum -import pandas as pd -import scipy.interpolate -class profile_t: - def __init__(self,name): - self.name = name - self.seconds=0 - self.snap=0 - self._pri_time =0 - self.iter =0 +from mpi4py import MPI - def __add__(self,o): - assert(self.name==o.name) - self.seconds+=o.seconds - self.snap+=o.snap - self.iter+=o.iter - return self +class BoltzmannMockSolver: + def __init__(self): + pass - def start(self): - self._pri_time = time() - - def stop(self): - self.seconds-=self._pri_time - self.snap=-self._pri_time + def fetch(self, interface): + species_densities = np.array(interface.HostRead(libtps.t2bIndex.SpeciesDensities), copy=False) + efield = np.array(interface.HostRead(libtps.t2bIndex.ElectricField), copy=False) + heavy_temperature = np.array(interface.HostRead(libtps.t2bIndex.HeavyTemperature), copy=False) - self._pri_time = time() + print("|| species_densities ||_2 = ", np.linalg.norm(species_densities) ) + print("|| efield ||_2 = ", np.linalg.norm(efield) ) + print("||heavy_temperature||_2 = ", np.linalg.norm(heavy_temperature) ) - self.seconds +=self._pri_time - self.snap += self._pri_time - self.iter+=1 - - def reset(self): - self.seconds=0 - self.snap=0 - self._pri_time =0 - self.iter =0 + def solve(self): + pass -def min_mean_max(a, comm: MPI.Comm): - return (comm.allreduce(a, MPI.MIN) , comm.allreduce(a, MPI.SUM)/comm.Get_size(), comm.allreduce(a, MPI.MAX)) + def push(self, interface): + electron_temperature = np.array(interface.HostWrite(libtps.t2bIndex.ElectronTemperature), copy=False) + electron_temperature[:] = 1. -try: - df = pd.read_csv("ionization_rates.csv") - Te = np.array(df["Te[K]"]) - r_arr = np.array(df["Arr[m3/s]"]) - r_csc = np.array(df["CSC_Maxwellian[m3/s]"]) - r_arr = scipy.interpolate.interp1d(Te, r_arr,bounds_error=False, fill_value=0.0) - r_csc = scipy.interpolate.interp1d(Te, r_csc,bounds_error=False, fill_value=0.0) - print("ionization coefficient read from file ") -except: - print("ionization rate coefficient file not found!!") - r_arr = lambda Te : 1.235e-13 * np.exp(-18.687 / np.abs(Te * scipy.constants.Boltzmann/scipy.constants.electron_volt)) - r_csc = lambda Te : 1.235e-13 * np.exp(-18.687 / np.abs(Te * scipy.constants.Boltzmann/scipy.constants.electron_volt)) # set path to C++ TPS library path = os.path.abspath(os.path.dirname(sys.argv[0])) sys.path.append(path + "/.libs") -sys.path.append(path + "/../../boltzmann/BESolver/python") import libtps -from bte_0d3v_batched import bte_0d3v_batched as BoltzmannSolver - -WITH_PARLA = 1 -if WITH_PARLA: - try: - from parla import Parla - from parla.tasks import spawn, TaskSpace - from parla.devices import cpu, gpu - except: - print("Error occured during Parla import. Please make sure Parla is installed properly.") - sys.exit(0) - - -class pp(enum.IntEnum): - SETUP = 0 - SOLVE = 1 - LAST = 2 - -class BoltzmannSolverParams(): - sp_order = 3 # B-spline order in v-space - spline_qpts = 5 # number of Gauss-Legendre quadrature points per knot interval - Nr = 127 # number of B-splines used in radial direction - l_max = 1 # spherical modes uses, 0, to l_max - ev_max = 16 # v-space grid truncation (eV) - n_grids = 4 # number of v-space grids - - dt = 1e-3 # [] non-dimentionalized time w.r.t. oscilation period - cycles = 10 # number of max cycles to evolve - solver_type = "transient" # two modes, "transient" or "steady-state" - atol = 1e-10 # absolute tolerance - rtol = 1e-10 # relative tolerance - max_iter = 1000 # max iterations for the newton solver - - ee_collisions = 0 # enable electron-electron Coulombic effects - use_gpu = 1 # enable GPU use (1)-GPU solver, (0)-CPU solver - dev_id = 0 # which GPU device to use only used when use_gpu=1 - - collisions = ["g0","g2"] # collision string g0-elastic, g2-ionization - export_csv = 1 # export the qois to csv file - plot_data = 1 - - Efreq = 0.0 #[1/s] # E-field osicllation frequency - verbose = 1 # verbose output for the BTE solver - Te = 0.5 #[eV] # approximate electron temperature - - threads = 16 # number of threads to use to assemble operators - grid_idx = 0 - - output_dir = "batched_bte1" - out_fname = output_dir + "/tps" - - # some useful units and conversion factors. - ev_to_K = (scipy.constants.electron_volt/scipy.constants.Boltzmann) - Td_fac = 1e-21 #[Vm^2] - c_gamma = np.sqrt(2 * scipy.constants.elementary_charge / scipy.constants.electron_mass) #[(C/kg)^{1/2}] - me = scipy.constants.electron_mass - kB = scipy.constants.Boltzmann - -class TPSINDEX(): - """ - simple index map to differnt fields, from the TPS arrays - """ - ION_IDX = 0 # ion density index - ELE_IDX = 1 # electron density index - NEU_IDX = 2 # neutral density index - - EF_RE_IDX = 0 # Re(E) index - EF_IM_IDX = 1 # Im(E) index - -class Boltzmann0D2VBactchedSolver: - - def __init__(self, tps, comm): - self.tps = tps - self.comm : MPI.Comm = comm - self.param = BoltzmannSolverParams() - # overide the default params, based on the config.ini file. - self.parse_config_file(sys.argv[2]) - - self.xp_module = np - - boltzmann_dir = self.param.output_dir - isExist = os.path.exists(boltzmann_dir) - if not isExist: - # Create a new directory because it does not exist - os.makedirs(boltzmann_dir) - #print("directory %s is created!"%(dir_name)) - - profile_tt = [None] * int(pp.LAST) - profile_nn = ["setup", "solve", "last"] - for i in range(pp.LAST): - profile_tt[i] = profile_t(profile_nn[i]) - - self.profile_tt = profile_tt - self.profile_nn = profile_nn - - return - - def parse_config_file(self, fname): - """ - add the configuaraion file parse code here, - which overides the default BoltzmannSolverParams - """ - config = configparser.ConfigParser() - print("[Boltzmann] reading configure file given by : ", fname) - config.read(fname) - - self.param.sp_order = int(config.get("boltzmannSolver", "sp_order").split("#")[0].strip()) - self.param.spline_qpts = int(config.get("boltzmannSolver", "spline_qpts").split("#")[0].strip()) - - self.param.Nr = int(config.get("boltzmannSolver", "Nr").split("#")[0].strip()) - self.param.l_max = int(config.get("boltzmannSolver", "l_max").split("#")[0].strip()) - self.param.n_grids = int(config.get("boltzmannSolver", "n_grids").split("#")[0].strip()) - self.param.dt = float(config.get("boltzmannSolver", "dt").split("#")[0].strip()) - self.param.cycles = float(config.get("boltzmannSolver", "cycles").split("#")[0].strip()) - self.param.solver_type = str(config.get("boltzmannSolver", "solver_type").split("#")[0].strip()) - self.param.atol = float(config.get("boltzmannSolver", "atol").split("#")[0].strip()) - self.param.rtol = float(config.get("boltzmannSolver", "rtol").split("#")[0].strip()) - self.param.max_iter = int(config.get("boltzmannSolver", "max_iter").split("#")[0].strip()) - self.param.ee_collisions = int(config.get("boltzmannSolver", "ee_collisions").split("#")[0].strip()) - self.param.use_gpu = int(config.get("boltzmannSolver", "use_gpu").split("#")[0].strip()) - #self.param.collisions = config.get("boltzmannSolver", "collisions").split("#")[0] - - self.param.export_csv = int(config.get("boltzmannSolver", "export_csv").split("#")[0].strip()) - self.param.plot_data = int(config.get("boltzmannSolver", "plot_data").split("#")[0].strip()) - self.param.Efreq = float(config.get("boltzmannSolver", "Efreq").split("#")[0].strip()) - self.param.verbose = int(config.get("boltzmannSolver", "verbose").split("#")[0].strip()) - self.param.Te = float(config.get("boltzmannSolver", "Te").split("#")[0].strip()) - - self.param.threads = int(config.get("boltzmannSolver", "threads").split("#")[0].strip()) - self.param.output_dir = str(config.get("boltzmannSolver", "output_dir").split("#")[0].strip()) - self.param.out_fname = self.param.output_dir + "/" + str(config.get("boltzmannSolver", "output_fname").split("#")[0].strip()) - return - - def grid_setup(self, interface): - """ - Perform the boltzmann grid setup. - we generate v-space grid for each spatial point cluster in the parameter space, - where, at the moment the clustering is determined based on the electron temperature - computed from the TPS code. - """ - - self.profile_tt[pp.SETUP].start() - - xp = self.xp_module - Te = xp.array(interface.HostRead(libtps.t2bIndex.ElectronTemperature), copy=False) / self.param.ev_to_K # [eV] - Te_min, Te_max = xp.min(Te), xp.max(Te) - Te_b = xp.linspace(Te_min, Te_max, self.param.n_grids, endpoint=False) - dist_mat = xp.zeros((len(Te), self.param.n_grids)) - - for iter in range(50): - #print("clustering iteration ", iter, Te_b) - for i in range(self.param.n_grids): - dist_mat[:,i] = xp.abs(Te-Te_b[i]) - - membership = xp.argmin(dist_mat, axis=1) - Te_b1 = np.array([np.mean(Te[xp.argwhere(membership==i)[:,0]]) for i in range(self.param.n_grids)]) - rel_error = np.max(np.abs(1 - Te_b1/Te_b)) - Te_b = Te_b1 - - if rel_error < 1e-4: - break - Te_b = np.sort(Te_b) - print("K-means Te clusters ", Te_b) - for i in range(self.param.n_grids): - dist_mat[:,i] = xp.abs(Te-Te_b[i]) - - membership = xp.argmin(dist_mat, axis=1) - grid_idx_to_spatial_pts_map = list() - for b_idx in range(self.param.n_grids): - #grid_idx_to_spatial_pts_map.append(xp.argwhere(xp.logical_and(Te>= Te_b[b_idx], Te < Te_b[b_idx+1]))[:,0]) - grid_idx_to_spatial_pts_map.append(xp.argwhere(membership==b_idx)[:,0]) - - np.save("%s_gidx_to_pidx.npy"%(self.param.out_fname), np.array(grid_idx_to_spatial_pts_map, dtype=object), allow_pickle=True) - - self.grid_idx_to_npts = xp.array([len(a) for a in grid_idx_to_spatial_pts_map], dtype=xp.int32) - self.grid_idx_to_spatial_idx_map = grid_idx_to_spatial_pts_map - - xp.sum(self.grid_idx_to_npts) == len(Te), "[Error] : TPS spatial points for v-space grid assignment is inconsitant" - lm_modes = [[[l,0] for l in range(self.param.l_max+1)] for grid_idx in range(self.param.n_grids)] - nr = xp.ones(self.param.n_grids, dtype=np.int32) * self.param.Nr - Te = xp.array([Te_b[b_idx] for b_idx in range(self.param.n_grids)]) # xp.ones(self.param.n_grids) * self.param.Te - vth = np.sqrt(2* self.param.kB * Te * self.param.ev_to_K /self.param.me) - ev_max = (6 * vth / self.param.c_gamma)**2 - self.bte_solver = BoltzmannSolver(self.param, ev_max ,Te , nr, lm_modes, self.param.n_grids, self.param.collisions) - - if self.param.verbose==1: - print("grid energy max (eV) \n", ev_max, flush = True) - - # compute BTE operators - for grid_idx in range(self.param.n_grids): - print("setting up grid %d"%(grid_idx), flush = True) - self.bte_solver.assemble_operators(grid_idx) - - self.profile_tt[pp.SETUP].stop() - return - - def fetch(self, interface): - xp = self.xp_module - gidx_to_pidx_map = self.grid_idx_to_spatial_idx_map - - heavy_temp = xp.array(interface.HostRead(libtps.t2bIndex.HeavyTemperature), copy=False) - tps_npts = len(heavy_temp) - self.tps_npts = tps_npts - - electron_temp = xp.array(interface.HostRead(libtps.t2bIndex.ElectronTemperature), copy=False) - efield = xp.array(interface.HostRead(libtps.t2bIndex.ElectricField), copy=False).reshape((2, tps_npts)) - species_densities = xp.array(interface.HostRead(libtps.t2bIndex.SpeciesDensities), copy=False).reshape(3, tps_npts) - - for grid_idx in range(self.param.n_grids): - bte_idx = gidx_to_pidx_map[grid_idx] - ni = species_densities[TPSINDEX.ION_IDX][bte_idx] - ne = species_densities[TPSINDEX.ELE_IDX][bte_idx] - n0 = species_densities[TPSINDEX.NEU_IDX][bte_idx] - Tg = heavy_temp[bte_idx] - Te = electron_temp[bte_idx] - - - eRe = efield[TPSINDEX.EF_RE_IDX][bte_idx] - eIm = efield[TPSINDEX.EF_IM_IDX][bte_idx] - eMag = np.sqrt(eRe**2 + eIm **2) - eByn0 = eMag/n0/self.param.Td_fac - - if self.param.verbose == 1 : - print("Boltzmann solver inputs for v-space grid id %d"%(grid_idx)) - print("Efreq = %.4E [1/s]" %(self.param.Efreq)) - print("n_pts = %d" % self.grid_idx_to_npts[grid_idx]) - - print("E/n0 (min) = %.12E [Td] \t E/n0 (max) = %.12E [Td] "%(np.min(eByn0), np.max(eByn0))) - print("Tg (min) = %.12E [K] \t Tg (max) = %.12E [K] "%(np.min(Tg), np.max(Tg))) - print("Te (min) = %.12E [K] \t Te (max) = %.12E [K] "%(np.min(Te), np.max(Te))) - - print("ne (min) = %.12E [1/m^3] \t ne (max) = %.12E [1/m^3] "%(np.min(ne), np.max(ne))) - print("ni (min) = %.12E [1/m^3] \t ni (max) = %.12E [1/m^3] "%(np.min(ni), np.max(ni))) - print("n0 (min) = %.12E [1/m^3] \t n0 (max) = %.12E [1/m^3] "%(np.min(n0), np.max(n0))) - - #self.bte_solver.set_boltzmann_parameters(grid_idx, n0, ne, ni, Tg, self.param.solver_type) - self.bte_solver.set_boltzmann_parameter(grid_idx, "n0", n0) - self.bte_solver.set_boltzmann_parameter(grid_idx, "ne", ne) - self.bte_solver.set_boltzmann_parameter(grid_idx, "ni", ni) - self.bte_solver.set_boltzmann_parameter(grid_idx, "Tg", Tg) - self.bte_solver.set_boltzmann_parameter(grid_idx, "eRe", eRe) - self.bte_solver.set_boltzmann_parameter(grid_idx, "eIm", eRe) - - return - - def solve(self): - """ - perform the BTE solve, supports both stead-state solution (static E-field) - and time-periodic solutions for the oscillatory E-fields - """ - - if WITH_PARLA==1: - self.solve_with_parla() - return - else: - self.solve_seq() - return - - def solve_seq(self): - xp = self.xp_module - csv_write = self.param.export_csv - gidx_to_pidx_map = self.grid_idx_to_spatial_idx_map - - self.qoi = [None for grid_idx in range(self.param.n_grids)] - self.ff = [None for grid_idx in range(self.param.n_grids)] - - if csv_write ==1 : - data_csv = np.empty((self.tps_npts, 8 + len(self.param.collisions))) - - t1 = time() - - for grid_idx in range(self.param.n_grids): - - if self.grid_idx_to_npts[grid_idx] ==0: - continue - - if self.param.verbose==1: - print("setting initial Maxwellian at %.4E eV" %(self.bte_solver._par_ap_Te[grid_idx]), flush=True) - f0 = self.bte_solver.initialize(grid_idx, self.grid_idx_to_npts[grid_idx], "maxwellian") - self.bte_solver.set_boltzmann_parameter(grid_idx, "f0", f0) - - if self.param.use_gpu==1: - dev_id = self.param.dev_id - self.bte_solver.host_to_device_setup(dev_id, grid_idx) - - with cp.cuda.Device(dev_id): - eRe_d = self.bte_solver.get_boltzmann_parameter(grid_idx, "eRe") - eIm_d = self.bte_solver.get_boltzmann_parameter(grid_idx, "eIm") - - if self.param.Efreq == 0: - ef_t = lambda t : xp.sqrt(eRe_d**2 + eIm_d**2) - else: - ef_t = lambda t : eRe_d * xp.cos(2 * xp.pi * self.param.Efreq * t) + eIm_d * xp.sin(2 * xp.pi * self.param.Efreq * t) - - else: - eRe_d = self.bte_solver.get_boltzmann_parameter(grid_idx, "eRe") - eIm_d = self.bte_solver.get_boltzmann_parameter(grid_idx, "eIm") - - if self.param.Efreq == 0: - ef_t = lambda t : xp.sqrt(eRe_d**2 + eIm_d**2) - else: - ef_t = lambda t : eRe_d * xp.cos(2 * xp.pi * self.param.Efreq * t) + eIm_d * xp.sin(2 * xp.pi * self.param.Efreq * t) - - self.bte_solver.set_efield_function(grid_idx, ef_t) - f0 = self.bte_solver.get_boltzmann_parameter(grid_idx, "f0") - try: - ff , qoi = self.bte_solver.solve(grid_idx, f0, self.param.atol, self.param.rtol, self.param.max_iter, self.param.solver_type) - self.qoi[grid_idx] = qoi - self.ff [grid_idx] = ff - except: - print("solver failed for v-space gird no %d"%(grid_idx)) - # self.qoi.append(None) - # continue - sys.exit(0) - - if self.param.export_csv ==0 and self.param.plot_data==0: - continue - - ev = np.linspace(1e-3, self.bte_solver._par_ev_range[grid_idx][1], 500) - ff_r = self.bte_solver.compute_radial_components(grid_idx, ev, ff) - - if self.param.use_gpu==1: - self.bte_solver.device_to_host_setup(self.param.dev_id,grid_idx) - - with cp.cuda.Device(dev_id): - ff_r = cp.asnumpy(ff_r) - for k, v in qoi.items(): - qoi[k] = cp.asnumpy(v) - - if csv_write==1: - data_csv[gidx_to_pidx_map[grid_idx], 0] = self.bte_solver.get_boltzmann_parameter(grid_idx, "n0") - data_csv[gidx_to_pidx_map[grid_idx], 1] = self.bte_solver.get_boltzmann_parameter(grid_idx, "ne") - data_csv[gidx_to_pidx_map[grid_idx], 2] = self.bte_solver.get_boltzmann_parameter(grid_idx, "ni") - data_csv[gidx_to_pidx_map[grid_idx], 3] = self.bte_solver.get_boltzmann_parameter(grid_idx, "Tg") - data_csv[gidx_to_pidx_map[grid_idx], 4] = np.sqrt(self.bte_solver.get_boltzmann_parameter(grid_idx, "eRe")**2 + self.bte_solver.get_boltzmann_parameter(grid_idx, "eIm")**2) - data_csv[gidx_to_pidx_map[grid_idx], 5] = qoi["energy"] - data_csv[gidx_to_pidx_map[grid_idx], 6] = qoi["mobility"] - data_csv[gidx_to_pidx_map[grid_idx], 7] = qoi["diffusion"] - - for col_idx, g in enumerate(self.param.collisions): - data_csv[gidx_to_pidx_map[grid_idx], 8 + col_idx] = qoi["rates"][col_idx] - - plot_data = self.param.plot_data - if plot_data: - - n0 = self.bte_solver.get_boltzmann_parameter(grid_idx, "n0") - ne = self.bte_solver.get_boltzmann_parameter(grid_idx, "ne") - ni = self.bte_solver.get_boltzmann_parameter(grid_idx, "ni") - Tg = self.bte_solver.get_boltzmann_parameter(grid_idx, "Tg") - - eRe = self.bte_solver.get_boltzmann_parameter(grid_idx, "eRe") - eIm = self.bte_solver.get_boltzmann_parameter(grid_idx, "eIm") - eMag = np.sqrt(eRe**2 + eIm**2) - - num_sh = len(self.bte_solver._par_lm[grid_idx]) - num_subplots = num_sh - num_plt_cols = min(num_sh, 4) - num_plt_rows = np.int64(np.ceil(num_subplots/num_plt_cols)) - fig = plt.figure(figsize=(num_plt_cols * 8 + 0.5*(num_plt_cols-1), num_plt_rows * 8 + 0.5*(num_plt_rows-1)), dpi=300, constrained_layout=True) - plt_idx = 1 - n_pts_step = self.grid_idx_to_npts[grid_idx] // 20 - - for lm_idx, lm in enumerate(self.bte_solver._par_lm[grid_idx]): - plt.subplot(num_plt_rows, num_plt_cols, plt_idx) - for ii in range(0, self.grid_idx_to_npts[grid_idx], n_pts_step): - fr = np.abs(ff_r[ii, lm_idx, :]) - plt.semilogy(ev, fr, label=r"$T_g$=%.2E [K], $E/n_0$=%.2E [Td], $n_e/n_0$ = %.2E "%(Tg[ii], eMag[ii]/n0[ii]/1e-21, ne[ii]/n0[ii])) - - plt.xlabel(r"energy (eV)") - plt.ylabel(r"$f_%d$"%(lm[0])) - plt.grid(visible=True) - if lm_idx==0: - plt.legend(prop={'size': 6}) - - plt_idx +=1 - - #plt_idx = num_sh - plt.savefig("%s_plot_%02d.png"%(self.param.out_fname, grid_idx)) - plt.close() - - t2 = time() - print("time for boltzmann v-space solve = %.4E"%(t2- t1)) - - if csv_write: - fname = self.param.out_fname - with open("%s_qoi.csv"%fname, 'w', encoding='UTF8') as f: - writer = csv.writer(f,delimiter=',') - # write the header - header = ["n0", "ne", "ni", "Tg", "E", "energy", "mobility", "diffusion"] - for col_idx, g in enumerate(self.param.collisions): - header.append(str(g)) - - writer.writerow(header) - writer.writerows(data_csv) - - return - - def solve_with_parla(self): - csv_write = self.param.export_csv - gidx_to_pidx_map = self.grid_idx_to_spatial_idx_map - self.qoi = [None for grid_idx in range(self.param.n_grids)] - self.ff = [None for grid_idx in range(self.param.n_grids)] - - if csv_write ==1 : - data_csv = np.empty((self.tps_npts, 8 + len(self.param.collisions))) - - - rank = self.comm.Get_rank() - npes = self.comm.Get_size() - - with Parla(): - num_gpus = len(gpu) - grid_to_device_map = lambda gidx : gidx % num_gpus - @spawn(placement=cpu, vcus=0) - async def __main__(): - self.profile_tt[pp.SETUP].start() - ts_0 = TaskSpace("T") - for grid_idx in range(self.param.n_grids): - @spawn(ts_0[grid_idx], placement=[cpu], vcus=0.0) - def t0(): - print("setting initial Maxwellian at %.4E eV" %(self.bte_solver._par_ap_Te[grid_idx]), flush=True) - f0 = self.bte_solver.initialize(grid_idx, self.grid_idx_to_npts[grid_idx], "maxwellian") - self.bte_solver.set_boltzmann_parameter(grid_idx, "f0", f0) - - if self.param.use_gpu == 1: - dev_id = grid_to_device_map(grid_idx) - self.bte_solver.host_to_device_setup(dev_id, grid_idx) - xp = cp - - with cp.cuda.Device(dev_id): - eRe_d = self.bte_solver.get_boltzmann_parameter(grid_idx, "eRe") - eIm_d = self.bte_solver.get_boltzmann_parameter(grid_idx, "eIm") - - if self.param.Efreq == 0: - ef_t = lambda t : xp.sqrt(eRe_d**2 + eIm_d**2) - else: - ef_t = lambda t : eRe_d * xp.cos(2 * xp.pi * self.param.Efreq * t) + eIm_d * xp.sin(2 * xp.pi * self.param.Efreq * t) - else: - xp = np - eRe_d = self.bte_solver.get_boltzmann_parameter(grid_idx, "eRe") - eIm_d = self.bte_solver.get_boltzmann_parameter(grid_idx, "eIm") - - if self.param.Efreq == 0: - ef_t = lambda t : xp.sqrt(eRe_d**2 + eIm_d**2) - else: - ef_t = lambda t : eRe_d * xp.cos(2 * xp.pi * self.param.Efreq * t) + eIm_d * xp.sin(2 * xp.pi * self.param.Efreq * t) - - self.bte_solver.set_efield_function(grid_idx, ef_t) - return - - await ts_0 - - self.profile_tt[pp.SETUP].stop() - if self.param.use_gpu==1: - p1 = [gpu(grid_to_device_map(grid_idx)) for grid_idx in range(self.param.n_grids)] - else: - p1 = [cpu for grid_idx in range(self.param.n_grids)] - - self.profile_tt[pp.SOLVE].start() - ts_1 = TaskSpace("T") - for grid_idx in range(self.param.n_grids): - @spawn(ts_1[grid_idx], placement=[p1[grid_idx]], dependencies=ts_0[grid_idx], vcus=0.0) - def t1(): - f0 = self.bte_solver.get_boltzmann_parameter(grid_idx, "f0") - print("[Boltzmann] %d / %d launching grid %d on %s"%(rank, npes, grid_idx, p1[grid_idx])) - try: - ff , qoi = self.bte_solver.solve(grid_idx, f0, self.param.atol, self.param.rtol, self.param.max_iter, self.param.solver_type) - self.ff[grid_idx] = ff - self.qoi[grid_idx] = qoi - except: - print("solver failed for v-space gird no %d"%(grid_idx)) - # self.qoi.append(None) - # continue - sys.exit(0) - - await ts_1 - self.profile_tt[pp.SOLVE].stop() - - - t1 = min_mean_max(self.profile_tt[pp.SETUP].seconds, self.comm) - t2 = min_mean_max(self.profile_tt[pp.SOLVE].seconds, self.comm) - print("[Boltzmann] setup (min) = %.4E (s) setup (mean) = %.4E (s) setup (max) = %.4E (s)" % (t1[0],t1[1],t1[2])) - print("[Boltzmann] solve (min) = %.4E (s) solve (mean) = %.4E (s) solve (max) = %.4E (s)" % (t2[0],t2[1],t2[2])) - if self.param.export_csv ==0 and self.param.plot_data==0: - return - - for grid_idx in range(self.param.n_grids): - dev_id = grid_idx % num_gpus - - if self.param.use_gpu==1: - gpu_id = cp.cuda.Device(dev_id) - gpu_id.use() - - ff = self.ff[grid_idx] - ev = np.linspace(1e-3, self.bte_solver._par_ev_range[grid_idx][1], 500) - ff_r = self.bte_solver.compute_radial_components(grid_idx, ev, ff) - - if self.param.use_gpu==1: - self.bte_solver.device_to_host_setup(self.param.dev_id,grid_idx) - - qoi = self.qoi[grid_idx] - with cp.cuda.Device(dev_id): - ff_r = cp.asnumpy(ff_r) - for k, v in qoi.items(): - qoi[k] = cp.asnumpy(v) - - if csv_write==1: - data_csv[gidx_to_pidx_map[grid_idx], 0] = self.bte_solver.get_boltzmann_parameter(grid_idx, "n0") - data_csv[gidx_to_pidx_map[grid_idx], 1] = self.bte_solver.get_boltzmann_parameter(grid_idx, "ne") - data_csv[gidx_to_pidx_map[grid_idx], 2] = self.bte_solver.get_boltzmann_parameter(grid_idx, "ni") - data_csv[gidx_to_pidx_map[grid_idx], 3] = self.bte_solver.get_boltzmann_parameter(grid_idx, "Tg") - data_csv[gidx_to_pidx_map[grid_idx], 4] = np.sqrt(self.bte_solver.get_boltzmann_parameter(grid_idx, "eRe")**2 + self.bte_solver.get_boltzmann_parameter(grid_idx, "eIm")**2) - data_csv[gidx_to_pidx_map[grid_idx], 5] = qoi["energy"] - data_csv[gidx_to_pidx_map[grid_idx], 6] = qoi["mobility"] - data_csv[gidx_to_pidx_map[grid_idx], 7] = qoi["diffusion"] - - for col_idx, g in enumerate(self.param.collisions): - data_csv[gidx_to_pidx_map[grid_idx], 8 + col_idx] = qoi["rates"][col_idx] - - plot_data = self.param.plot_data - if plot_data: - - n0 = self.bte_solver.get_boltzmann_parameter(grid_idx, "n0") - ne = self.bte_solver.get_boltzmann_parameter(grid_idx, "ne") - ni = self.bte_solver.get_boltzmann_parameter(grid_idx, "ni") - Tg = self.bte_solver.get_boltzmann_parameter(grid_idx, "Tg") - - eRe = self.bte_solver.get_boltzmann_parameter(grid_idx, "eRe") - eIm = self.bte_solver.get_boltzmann_parameter(grid_idx, "eIm") - eMag = np.sqrt(eRe**2 + eIm**2) - - num_sh = len(self.bte_solver._par_lm[grid_idx]) - num_subplots = num_sh - num_plt_cols = min(num_sh, 4) - num_plt_rows = np.int64(np.ceil(num_subplots/num_plt_cols)) - fig = plt.figure(figsize=(num_plt_cols * 8 + 0.5*(num_plt_cols-1), num_plt_rows * 8 + 0.5*(num_plt_rows-1)), dpi=300, constrained_layout=True) - plt_idx = 1 - n_pts_step = self.grid_idx_to_npts[grid_idx] // 20 - - for lm_idx, lm in enumerate(self.bte_solver._par_lm[grid_idx]): - plt.subplot(num_plt_rows, num_plt_cols, plt_idx) - for ii in range(0, self.grid_idx_to_npts[grid_idx], n_pts_step): - fr = np.abs(ff_r[ii, lm_idx, :]) - plt.semilogy(ev, fr, label=r"$T_g$=%.2E [K], $E/n_0$=%.2E [Td], $n_e/n_0$ = %.2E "%(Tg[ii], eMag[ii]/n0[ii]/1e-21, ne[ii]/n0[ii])) - - plt.xlabel(r"energy (eV)") - plt.ylabel(r"$f_%d$"%(lm[0])) - plt.grid(visible=True) - if lm_idx==0: - plt.legend(prop={'size': 6}) - - plt_idx +=1 - - #plt_idx = num_sh - plt.savefig("%s_plot_%02d.png"%(self.param.out_fname, grid_idx)) - plt.close() - - if csv_write: - fname = self.param.out_fname - with open("%s_qoi.csv"%fname, 'w', encoding='UTF8') as f: - writer = csv.writer(f,delimiter=',') - # write the header - header = ["n0", "ne", "ni", "Tg", "E", "energy", "mobility", "diffusion"] - for col_idx, g in enumerate(self.param.collisions): - header.append(str(g)) - - writer.writerow(header) - writer.writerows(data_csv) - - def push(self, interface): - xp = self.xp_module - Te_bte = xp.array(interface.HostWrite(libtps.t2bIndex.ElectronTemperature), copy=False) - rate_bte = xp.array(interface.HostWrite(libtps.t2bIndex.ReactionRates), copy=False).reshape((2, self.tps_npts)) - Te_tps = xp.array(interface.HostRead(libtps.t2bIndex.ElectronTemperature), copy=False) - - species_densities = xp.array(interface.HostRead(libtps.t2bIndex.SpeciesDensities), copy=False).reshape(3, self.tps_npts) - ni = species_densities[TPSINDEX.ION_IDX] - n0 = species_densities[TPSINDEX.NEU_IDX] - ne = species_densities[TPSINDEX.ELE_IDX] - - rate_tps_arr = r_arr(Te_tps) - rate_tps_csc = r_csc(Te_tps) - - rr_bte = xp.zeros_like(rate_tps_arr) - gidx_to_pidx_map = self.grid_idx_to_spatial_idx_map - - for grid_idx in range(self.param.n_grids): - Te_bte[gidx_to_pidx_map[grid_idx]] = (self.qoi[grid_idx]["energy"]/1.5) * self.param.ev_to_K - rr = self.qoi[grid_idx]["rates"] - # here rr should be in the same ordering as the collision model prescribed to the Boltzmann solver. - rr_bte[gidx_to_pidx_map[grid_idx]] = rr[1] - - rr_bte[rr_bte<0] = 0.0 - s0 = rate_tps_arr * n0 * ni - s1 = rate_tps_csc * n0 * ni - - s2 = rr_bte * n0 * ni - - # tau = 1e-2 - # idx = s2 > tau - rate_bte[0][:] = 0.0 - rate_bte[1][:] = 0.0 - rate_bte[0] = rr_bte - rate_bte[1] = xp.abs(s2-s1)/xp.max(s2) - - return - - - - comm = MPI.COMM_WORLD # TPS solver @@ -690,36 +42,26 @@ def push(self, interface): tps.chooseSolver() tps.initialize() -boltzmann = Boltzmann0D2VBactchedSolver(tps, comm) +boltzmann = BoltzmannMockSolver() interface = libtps.Tps2Boltzmann(tps) tps.initInterface(interface) -coords = np.array(interface.HostReadSpatialCoordinates(), copy=False) -print(coords.shape) - it = 0 max_iters = tps.getRequiredInput("cycle-avg-joule-coupled/max-iters") print("Max Iters: ", max_iters) tps.solveBegin() -tps.solveStep() -tps.push(interface) -boltzmann.grid_setup(interface) -boltzmann.fetch(interface) -boltzmann.solve() -boltzmann.push(interface) -tps.fetch(interface) -# while it < max_iters: -# tps.solveStep() -# tps.push(interface) -# boltzmann.fetch(interface) -# boltzmann.solve() -# boltzmann.push(interface) -# tps.fetch(interface) +while it < max_iters: + tps.solveStep() + tps.push(interface) + boltzmann.fetch(interface) + boltzmann.solve() + boltzmann.push(interface) + tps.fetch(interface) -# it = it+1 -# print("it, ", it) + it = it+1 + print("it, ", it) tps.solveEnd() diff --git a/test/test_table.cpp b/test/test_table.cpp index b5fc65ecb..3a8a95e2a 100644 --- a/test/test_table.cpp +++ b/test/test_table.cpp @@ -89,7 +89,7 @@ void testTableInterpolator1D(TPS::Tps &tps, int rank) { double xtest = refValues(k, 0); double fref = refValues(k, 1); double ftest[gpudata::MAXREACTIONS]; - chem->computeForwardRateCoeffs(xtest, xtest, ftest); + chem->computeForwardRateCoeffs(xtest, xtest, k, ftest); double error = abs((fref - ftest[0]) / fref); if (error >= scalarErrorThreshold) { grvy_printf(GRVY_ERROR, "Rank %d - %.5E: %.5E\n", rank, xtest, abs((fref - ftest[0]) / fref)); From c337e6171dd793e6c670724a0c14ab5501fa6943 Mon Sep 17 00:00:00 2001 From: Umberto Villa Date: Fri, 5 Jan 2024 10:01:23 -0600 Subject: [PATCH 15/75] Add GridFunctionReaction --- src/reaction.cpp | 19 +++++++++++++++++++ src/reaction.hpp | 15 +++++++++++++++ 2 files changed, 34 insertions(+) diff --git a/src/reaction.cpp b/src/reaction.cpp index 448d25515..1f41acc10 100644 --- a/src/reaction.cpp +++ b/src/reaction.cpp @@ -78,3 +78,22 @@ MFEM_HOST_DEVICE double Tabulated::computeRateCoefficient(const double &T_h, con double temp = (isElectronInvolved) ? T_e : T_h; return table_->eval(temp); } + +MFEM_HOST_DEVICE GridFunctionReaction::GridFunctionReaction(const mfem::GridFunction & f, int comp): +#ifdef _GPU_ +data( f.Read() + comp*f.FESpace()->GetNDofs() ) +#else +data( f.HostRead() + comp*f.FESpace()->GetNDofs() ) +#endif +{ + assert( f.Size() >= (comp+1)*f.FESpace()->GetNDofs() ); +} + +MFEM_HOST_DEVICE GridFunctionReaction::~GridFunctionReaction() { } + +MFEM_HOST_DEVICE double GridFunctionReaction::computeRateCoefficient([[maybe_unused]] const double &T_h, + [[maybe_unused]] const double &T_e, + const int & dofindex, + [[maybe_unused]] const bool isElectronInvolved) { + return data[dofindex]; +} diff --git a/src/reaction.hpp b/src/reaction.hpp index 4571c7161..3ff35487b 100644 --- a/src/reaction.hpp +++ b/src/reaction.hpp @@ -117,4 +117,19 @@ class Tabulated : public Reaction { const bool isElectronInvolved = false); }; +class GridFunctionReaction : public Reaction { + private: + const double * data; + + public: + MFEM_HOST_DEVICE GridFunctionReaction(const mfem::GridFunction & f, int comp); + + MFEM_HOST_DEVICE virtual ~GridFunctionReaction(); + + MFEM_HOST_DEVICE virtual double computeRateCoefficient([[maybe_unused]] const double &T_h, + [[maybe_unused]] const double &T_e, + const int & dofindex, + [[maybe_unused]] const bool isElectronInvolved = false); +}; + #endif // REACTION_HPP_ From 3ea95f1a3c5febd3f5baea0e2675db125f870b71 Mon Sep 17 00:00:00 2001 From: Umberto Villa Date: Fri, 5 Jan 2024 11:57:06 -0600 Subject: [PATCH 16/75] Preparing the data structures to support the Boltzmann integration --- src/chemistry.cpp | 13 +++++++++++++ src/chemistry.hpp | 3 +++ src/dataStructures.hpp | 3 ++- src/reaction.cpp | 26 ++++++++++++++------------ src/reaction.hpp | 8 ++++++-- 5 files changed, 38 insertions(+), 15 deletions(-) diff --git a/src/chemistry.cpp b/src/chemistry.cpp index a2900fc07..30e31d292 100644 --- a/src/chemistry.cpp +++ b/src/chemistry.cpp @@ -86,6 +86,9 @@ MFEM_HOST_DEVICE Chemistry::Chemistry(GasMixture *mixture, const ChemistryInput case TABULATED_RXN: { reactions_[r] = new Tabulated(inputs.reactionInputs[r].tableInput); } break; + case GRIDFUNCTION_RXN: { + reactions_[r] = new GridFunctionReaction(inputs.reactionInputs[r].indexInput); + } break; default: printf("Unknown reactionModel."); assert(false); @@ -106,6 +109,16 @@ MFEM_HOST_DEVICE Chemistry::~Chemistry() { } } +void Chemistry::setGridFunctionRates(const mfem::GridFunction & f) +{ + for (int r = 0; r < numReactions_; r++) { + if (reactions_[r]->reactionModel == GRIDFUNCTION_RXN) { + GridFunctionReaction * rx = dynamic_cast(reactions_[r]); + rx->setGridFunctionData(f); + } + } +} + #if 0 void Chemistry::computeForwardRateCoeffs(const double &T_h, const double &T_e, Vector &kfwd) { kfwd.SetSize(numReactions_); diff --git a/src/chemistry.hpp b/src/chemistry.hpp index 653b5fe77..6b0ce6e46 100644 --- a/src/chemistry.hpp +++ b/src/chemistry.hpp @@ -95,6 +95,9 @@ class Chemistry { MFEM_HOST_DEVICE ~Chemistry(); + // Set the grid function rates for GRIDFUNCTION_RXN reaction types + void setGridFunctionRates(const mfem::GridFunction & f); + // return Vector of reaction rate coefficients, with the size of numReaction_. // WARNING(marc) I have removed "virtual" qualifier here assuming these functions will not // change for child classes. Correct if wrong diff --git a/src/dataStructures.hpp b/src/dataStructures.hpp index c3a7a6825..d881abe86 100644 --- a/src/dataStructures.hpp +++ b/src/dataStructures.hpp @@ -74,7 +74,7 @@ enum TransportModel { ARGON_MINIMAL, ARGON_MIXTURE, CONSTANT, LTE_TRANSPORT, MIX enum ChemistryModel { /* CANTERA, */ NUM_CHEMISTRYMODEL }; -enum ReactionModel { ARRHENIUS, HOFFERTLIEN, TABULATED_RXN, NUM_REACTIONMODEL }; +enum ReactionModel { ARRHENIUS, HOFFERTLIEN, TABULATED_RXN, GRIDFUNCTION_RXN, NUM_REACTIONMODEL }; enum RadiationModel { NONE_RAD, NET_EMISSION, NUM_RADIATIONMODEL }; @@ -623,6 +623,7 @@ struct ReactionInput { TableInput tableInput; // NOTE(kevin): with gpu, this pointer is only valid on the device. const double *modelParams; + int indexInput; }; struct ChemistryInput { diff --git a/src/reaction.cpp b/src/reaction.cpp index 1f41acc10..5720f0966 100644 --- a/src/reaction.cpp +++ b/src/reaction.cpp @@ -36,7 +36,7 @@ using namespace mfem; using namespace std; MFEM_HOST_DEVICE Arrhenius::Arrhenius(const double &A, const double &b, const double &E) - : Reaction(), A_(A), b_(b), E_(E) {} + : Reaction(ARRHENIUS), A_(A), b_(b), E_(E) {} MFEM_HOST_DEVICE double Arrhenius::computeRateCoefficient(const double &T_h, const double &T_e, [[maybe_unused]] const int & dofindex, @@ -47,7 +47,7 @@ MFEM_HOST_DEVICE double Arrhenius::computeRateCoefficient(const double &T_h, con } MFEM_HOST_DEVICE HoffertLien::HoffertLien(const double &A, const double &b, const double &E) - : Reaction(), A_(A), b_(b), E_(E) {} + : Reaction(HOFFERTLIEN), A_(A), b_(b), E_(E) {} MFEM_HOST_DEVICE double HoffertLien::computeRateCoefficient(const double &T_h, const double &T_e, [[maybe_unused]] const int & dofindex, @@ -58,7 +58,7 @@ MFEM_HOST_DEVICE double HoffertLien::computeRateCoefficient(const double &T_h, c return A_ * pow(temp, b_) * (tempFactor + 2.0) * exp(-tempFactor); } -MFEM_HOST_DEVICE Tabulated::Tabulated(const TableInput &input) : Reaction() { +MFEM_HOST_DEVICE Tabulated::Tabulated(const TableInput &input) : Reaction(TABULATED_RXN) { switch (input.order) { case 1: { table_ = new LinearTable(input); @@ -79,18 +79,20 @@ MFEM_HOST_DEVICE double Tabulated::computeRateCoefficient(const double &T_h, con return table_->eval(temp); } -MFEM_HOST_DEVICE GridFunctionReaction::GridFunctionReaction(const mfem::GridFunction & f, int comp): -#ifdef _GPU_ -data( f.Read() + comp*f.FESpace()->GetNDofs() ) -#else -data( f.HostRead() + comp*f.FESpace()->GetNDofs() ) -#endif -{ - assert( f.Size() >= (comp+1)*f.FESpace()->GetNDofs() ); -} +MFEM_HOST_DEVICE GridFunctionReaction::GridFunctionReaction(int comp): +Reaction(GRIDFUNCTION_RXN), data( nullptr ), comp(comp) { } MFEM_HOST_DEVICE GridFunctionReaction::~GridFunctionReaction() { } +void GridFunctionReaction::setGridFunctionData(const mfem::GridFunction & f) { + assert( f.Size() >= (comp+1)*f.FESpace()->GetNDofs() ); + #ifdef _GPU_ + data = f.Read() + comp*f.FESpace()->GetNDofs(); + #else + data = f.HostRead() + comp*f.FESpace()->GetNDofs(); + #endif +} + MFEM_HOST_DEVICE double GridFunctionReaction::computeRateCoefficient([[maybe_unused]] const double &T_h, [[maybe_unused]] const double &T_e, const int & dofindex, diff --git a/src/reaction.hpp b/src/reaction.hpp index 3ff35487b..aae3b5789 100644 --- a/src/reaction.hpp +++ b/src/reaction.hpp @@ -53,7 +53,8 @@ using namespace std; class Reaction { protected: public: - MFEM_HOST_DEVICE Reaction() {} + const ReactionModel reactionModel; + MFEM_HOST_DEVICE Reaction(ReactionModel rm): reactionModel(rm) {} MFEM_HOST_DEVICE virtual ~Reaction() {} @@ -120,12 +121,15 @@ class Tabulated : public Reaction { class GridFunctionReaction : public Reaction { private: const double * data; + const int comp; public: - MFEM_HOST_DEVICE GridFunctionReaction(const mfem::GridFunction & f, int comp); + MFEM_HOST_DEVICE GridFunctionReaction(int comp); MFEM_HOST_DEVICE virtual ~GridFunctionReaction(); + void setGridFunctionData(const mfem::GridFunction & f); + MFEM_HOST_DEVICE virtual double computeRateCoefficient([[maybe_unused]] const double &T_h, [[maybe_unused]] const double &T_e, const int & dofindex, From 53002e6abe856482945a6be534320dcc54589418 Mon Sep 17 00:00:00 2001 From: Umberto Villa Date: Fri, 5 Jan 2024 12:00:23 -0600 Subject: [PATCH 17/75] make enforcestyle --- src/M2ulPhyS2Boltzmann.cpp | 20 +++++++------------ src/chemistry.cpp | 10 +++++----- src/chemistry.hpp | 9 +++++---- src/cycle_avg_joule_coupling.cpp | 1 - src/reaction.cpp | 34 ++++++++++++++++---------------- src/reaction.hpp | 19 +++++++++--------- src/tps2Boltzmann.cpp | 16 ++++++++------- src/tps2Boltzmann.hpp | 4 ++-- 8 files changed, 54 insertions(+), 59 deletions(-) diff --git a/src/M2ulPhyS2Boltzmann.cpp b/src/M2ulPhyS2Boltzmann.cpp index e3d60bf20..6d150d3e2 100644 --- a/src/M2ulPhyS2Boltzmann.cpp +++ b/src/M2ulPhyS2Boltzmann.cpp @@ -83,22 +83,16 @@ void M2ulPhyS::push(TPS::Tps2Boltzmann &interface) { delete electronTemperature; } -void M2ulPhyS::fetch(TPS::Tps2Boltzmann &interface) { - +void M2ulPhyS::fetch(TPS::Tps2Boltzmann &interface) { mfem::ParaViewDataCollection paraview_dc("interface", mesh); paraview_dc.SetPrefixPath("BoltzmannInterface"); paraview_dc.SetCycle(0); paraview_dc.SetDataFormat(VTKFormat::BINARY); paraview_dc.SetTime(0.0); - paraview_dc.RegisterField("Heavy temperature", - &interface.Field(TPS::Tps2Boltzmann::Index::HeavyTemperature)); - paraview_dc.RegisterField("Electron temperature", - &interface.Field(TPS::Tps2Boltzmann::Index::ElectronTemperature)); - paraview_dc.RegisterField("Electric field", - &interface.Field(TPS::Tps2Boltzmann::Index::ElectricField)); - paraview_dc.RegisterField("Species", - &interface.Field(TPS::Tps2Boltzmann::Index::SpeciesDensities)); - paraview_dc.RegisterField("Reaction rates", - &interface.Field(TPS::Tps2Boltzmann::Index::ReactionRates)); + paraview_dc.RegisterField("Heavy temperature", &interface.Field(TPS::Tps2Boltzmann::Index::HeavyTemperature)); + paraview_dc.RegisterField("Electron temperature", &interface.Field(TPS::Tps2Boltzmann::Index::ElectronTemperature)); + paraview_dc.RegisterField("Electric field", &interface.Field(TPS::Tps2Boltzmann::Index::ElectricField)); + paraview_dc.RegisterField("Species", &interface.Field(TPS::Tps2Boltzmann::Index::SpeciesDensities)); + paraview_dc.RegisterField("Reaction rates", &interface.Field(TPS::Tps2Boltzmann::Index::ReactionRates)); paraview_dc.Save(); - } +} diff --git a/src/chemistry.cpp b/src/chemistry.cpp index 30e31d292..45eab9c9c 100644 --- a/src/chemistry.cpp +++ b/src/chemistry.cpp @@ -109,11 +109,10 @@ MFEM_HOST_DEVICE Chemistry::~Chemistry() { } } -void Chemistry::setGridFunctionRates(const mfem::GridFunction & f) -{ +void Chemistry::setGridFunctionRates(const mfem::GridFunction &f) { for (int r = 0; r < numReactions_; r++) { if (reactions_[r]->reactionModel == GRIDFUNCTION_RXN) { - GridFunctionReaction * rx = dynamic_cast(reactions_[r]); + GridFunctionReaction *rx = dynamic_cast(reactions_[r]); rx->setGridFunctionData(f); } } @@ -138,7 +137,8 @@ void Chemistry::computeForwardRateCoeffs(const double &T_h, const double &T_e, V } #endif -MFEM_HOST_DEVICE void Chemistry::computeForwardRateCoeffs(const double &T_h, const double &T_e, const int & dofindex, double *kfwd) { +MFEM_HOST_DEVICE void Chemistry::computeForwardRateCoeffs(const double &T_h, const double &T_e, const int &dofindex, + double *kfwd) { // kfwd.SetSize(numReactions_); for (int r = 0; r < numReactions_; r++) kfwd[r] = 0.0; @@ -147,7 +147,7 @@ MFEM_HOST_DEVICE void Chemistry::computeForwardRateCoeffs(const double &T_h, con for (int r = 0; r < numReactions_; r++) { bool isElectronInvolved = isElectronInvolvedAt(r); - kfwd[r] = reactions_[r]->computeRateCoefficient(Thlim , Telim, dofindex, isElectronInvolved); + kfwd[r] = reactions_[r]->computeRateCoefficient(Thlim, Telim, dofindex, isElectronInvolved); } return; diff --git a/src/chemistry.hpp b/src/chemistry.hpp index 6b0ce6e46..a80e5fb2b 100644 --- a/src/chemistry.hpp +++ b/src/chemistry.hpp @@ -96,15 +96,16 @@ class Chemistry { MFEM_HOST_DEVICE ~Chemistry(); // Set the grid function rates for GRIDFUNCTION_RXN reaction types - void setGridFunctionRates(const mfem::GridFunction & f); + void setGridFunctionRates(const mfem::GridFunction &f); // return Vector of reaction rate coefficients, with the size of numReaction_. // WARNING(marc) I have removed "virtual" qualifier here assuming these functions will not // change for child classes. Correct if wrong - //void computeForwardRateCoeffs(const double &T_h, const double &T_e, Vector &kfwd); - MFEM_HOST_DEVICE void computeForwardRateCoeffs(const double &T_h, const double &T_e, const int & dofindex, double *kfwd); + // void computeForwardRateCoeffs(const double &T_h, const double &T_e, Vector &kfwd); + MFEM_HOST_DEVICE void computeForwardRateCoeffs(const double &T_h, const double &T_e, const int &dofindex, + double *kfwd); - //void computeEquilibriumConstants(const double &T_h, const double &T_e, Vector &kC); + // void computeEquilibriumConstants(const double &T_h, const double &T_e, Vector &kC); MFEM_HOST_DEVICE void computeEquilibriumConstants(const double &T_h, const double &T_e, double *kC); // return rate coefficients of (reactionIndex)-th reaction. (start from 0) diff --git a/src/cycle_avg_joule_coupling.cpp b/src/cycle_avg_joule_coupling.cpp index f5f5c3a5c..acf78235b 100644 --- a/src/cycle_avg_joule_coupling.cpp +++ b/src/cycle_avg_joule_coupling.cpp @@ -313,7 +313,6 @@ void CycleAvgJouleCoupling::interpElectricFieldFromEMToFlow() { efieldR_->SetFromTrueDofs(interp_vals); efieldR_->HostRead(); - const ParGridFunction *efield_imag_gf = qmsa_solver_->getElectricFieldimag(); interp_em_to_flow_->Interpolate(vxyz, *efield_imag_gf, interp_vals); efieldI_->SetFromTrueDofs(interp_vals); diff --git a/src/reaction.cpp b/src/reaction.cpp index 5720f0966..e2a5c952d 100644 --- a/src/reaction.cpp +++ b/src/reaction.cpp @@ -39,7 +39,7 @@ MFEM_HOST_DEVICE Arrhenius::Arrhenius(const double &A, const double &b, const do : Reaction(ARRHENIUS), A_(A), b_(b), E_(E) {} MFEM_HOST_DEVICE double Arrhenius::computeRateCoefficient(const double &T_h, const double &T_e, - [[maybe_unused]] const int & dofindex, + [[maybe_unused]] const int &dofindex, const bool isElectronInvolved) { double temp = (isElectronInvolved) ? T_e : T_h; @@ -50,7 +50,7 @@ MFEM_HOST_DEVICE HoffertLien::HoffertLien(const double &A, const double &b, cons : Reaction(HOFFERTLIEN), A_(A), b_(b), E_(E) {} MFEM_HOST_DEVICE double HoffertLien::computeRateCoefficient(const double &T_h, const double &T_e, - [[maybe_unused]] const int & dofindex, + [[maybe_unused]] const int &dofindex, const bool isElectronInvolved) { double temp = (isElectronInvolved) ? T_e : T_h; double tempFactor = E_ / BOLTZMANNCONSTANT / temp; @@ -73,29 +73,29 @@ MFEM_HOST_DEVICE Tabulated::Tabulated(const TableInput &input) : Reaction(TABULA MFEM_HOST_DEVICE Tabulated::~Tabulated() { delete table_; } MFEM_HOST_DEVICE double Tabulated::computeRateCoefficient(const double &T_h, const double &T_e, - [[maybe_unused]] const int & dofindex, + [[maybe_unused]] const int &dofindex, const bool isElectronInvolved) { double temp = (isElectronInvolved) ? T_e : T_h; return table_->eval(temp); } -MFEM_HOST_DEVICE GridFunctionReaction::GridFunctionReaction(int comp): -Reaction(GRIDFUNCTION_RXN), data( nullptr ), comp(comp) { } +MFEM_HOST_DEVICE GridFunctionReaction::GridFunctionReaction(int comp) + : Reaction(GRIDFUNCTION_RXN), data(nullptr), comp(comp) {} -MFEM_HOST_DEVICE GridFunctionReaction::~GridFunctionReaction() { } +MFEM_HOST_DEVICE GridFunctionReaction::~GridFunctionReaction() {} -void GridFunctionReaction::setGridFunctionData(const mfem::GridFunction & f) { - assert( f.Size() >= (comp+1)*f.FESpace()->GetNDofs() ); - #ifdef _GPU_ - data = f.Read() + comp*f.FESpace()->GetNDofs(); - #else - data = f.HostRead() + comp*f.FESpace()->GetNDofs(); - #endif +void GridFunctionReaction::setGridFunctionData(const mfem::GridFunction &f) { + assert(f.Size() >= (comp + 1) * f.FESpace()->GetNDofs()); +#ifdef _GPU_ + data = f.Read() + comp * f.FESpace()->GetNDofs(); +#else + data = f.HostRead() + comp * f.FESpace()->GetNDofs(); +#endif } -MFEM_HOST_DEVICE double GridFunctionReaction::computeRateCoefficient([[maybe_unused]] const double &T_h, - [[maybe_unused]] const double &T_e, - const int & dofindex, - [[maybe_unused]] const bool isElectronInvolved) { +MFEM_HOST_DEVICE double GridFunctionReaction::computeRateCoefficient([[maybe_unused]] const double &T_h, + [[maybe_unused]] const double &T_e, + const int &dofindex, + [[maybe_unused]] const bool isElectronInvolved) { return data[dofindex]; } diff --git a/src/reaction.hpp b/src/reaction.hpp index aae3b5789..b8c666219 100644 --- a/src/reaction.hpp +++ b/src/reaction.hpp @@ -54,12 +54,12 @@ class Reaction { protected: public: const ReactionModel reactionModel; - MFEM_HOST_DEVICE Reaction(ReactionModel rm): reactionModel(rm) {} + MFEM_HOST_DEVICE Reaction(ReactionModel rm) : reactionModel(rm) {} MFEM_HOST_DEVICE virtual ~Reaction() {} MFEM_HOST_DEVICE virtual double computeRateCoefficient(const double &T_h, const double &T_e, - [[maybe_unused]] const int & dofindex, + [[maybe_unused]] const int &dofindex, const bool isElectronInvolved = false) { printf("computeRateCoefficient not implemented"); return 0; @@ -79,7 +79,7 @@ class Arrhenius : public Reaction { MFEM_HOST_DEVICE virtual ~Arrhenius() {} MFEM_HOST_DEVICE virtual double computeRateCoefficient(const double &T_h, const double &T_e, - [[maybe_unused]] const int & dofindex, + [[maybe_unused]] const int &dofindex, const bool isElectronInvolved = false); }; @@ -100,7 +100,7 @@ class HoffertLien : public Reaction { MFEM_HOST_DEVICE virtual ~HoffertLien() {} MFEM_HOST_DEVICE virtual double computeRateCoefficient(const double &T_h, const double &T_e, - [[maybe_unused]] const int & dofindex, + [[maybe_unused]] const int &dofindex, const bool isElectronInvolved = false); }; @@ -114,13 +114,13 @@ class Tabulated : public Reaction { MFEM_HOST_DEVICE virtual ~Tabulated(); MFEM_HOST_DEVICE virtual double computeRateCoefficient(const double &T_h, const double &T_e, - [[maybe_unused]] const int & dofindex, + [[maybe_unused]] const int &dofindex, const bool isElectronInvolved = false); }; class GridFunctionReaction : public Reaction { private: - const double * data; + const double *data; const int comp; public: @@ -128,11 +128,10 @@ class GridFunctionReaction : public Reaction { MFEM_HOST_DEVICE virtual ~GridFunctionReaction(); - void setGridFunctionData(const mfem::GridFunction & f); + void setGridFunctionData(const mfem::GridFunction &f); - MFEM_HOST_DEVICE virtual double computeRateCoefficient([[maybe_unused]] const double &T_h, - [[maybe_unused]] const double &T_e, - const int & dofindex, + MFEM_HOST_DEVICE virtual double computeRateCoefficient([[maybe_unused]] const double &T_h, + [[maybe_unused]] const double &T_e, const int &dofindex, [[maybe_unused]] const bool isElectronInvolved = false); }; diff --git a/src/tps2Boltzmann.cpp b/src/tps2Boltzmann.cpp index a6b5800f7..8cb35d0c8 100644 --- a/src/tps2Boltzmann.cpp +++ b/src/tps2Boltzmann.cpp @@ -41,9 +41,10 @@ #include #endif +#include + #include #include -#include namespace TPS { @@ -73,8 +74,8 @@ class CPUData { size_t stride_; }; -void idenity_fun(const Vector & x, Vector & out) { - for ( int i(0); i < x.Size(); ++i ) out[i] = x[i]; +void idenity_fun(const Vector &x, Vector &out) { + for (int i(0); i < x.Size(); ++i) out[i] = x[i]; } Tps2Boltzmann::Tps2Boltzmann(Tps *tps) : NIndexes(7), tps_(tps), all_fes_(nullptr) { @@ -89,7 +90,7 @@ Tps2Boltzmann::Tps2Boltzmann(Tps *tps) : NIndexes(7), tps_(tps), all_fes_(nullpt assert(basis_type_ == 0 || basis_type_ == 1); tps->getRequiredInput("em/current_frequency", EfieldAngularFreq_); - EfieldAngularFreq_ *= 2.*M_PI; + EfieldAngularFreq_ *= 2. * M_PI; offsets.SetSize(NIndexes + 1); ncomps.SetSize(NIndexes + 1); @@ -277,9 +278,10 @@ void tps2bolzmann(py::module &m) { [](TPS::Tps2Boltzmann &interface, TPS::Tps2Boltzmann::Index index) { return std::unique_ptr(new TPS::CPUData(interface.Field(index), false)); }) - .def("HostReadWrite", [](TPS::Tps2Boltzmann &interface, TPS::Tps2Boltzmann::Index index) { - return std::unique_ptr(new TPS::CPUData(interface.Field(index), true)); - }) + .def("HostReadWrite", + [](TPS::Tps2Boltzmann &interface, TPS::Tps2Boltzmann::Index index) { + return std::unique_ptr(new TPS::CPUData(interface.Field(index), true)); + }) .def("EfieldAngularFreq", &TPS::Tps2Boltzmann::EfieldAngularFreq) .def("Nspecies", &TPS::Tps2Boltzmann::Nspecies) .def("NeFiledComps", &TPS::Tps2Boltzmann::NeFieldComps) diff --git a/src/tps2Boltzmann.hpp b/src/tps2Boltzmann.hpp index 4895c523b..91cbd1a54 100644 --- a/src/tps2Boltzmann.hpp +++ b/src/tps2Boltzmann.hpp @@ -99,8 +99,8 @@ class Tps2Boltzmann { const mfem::ParFiniteElementSpace &NativeFes(Index index) const { return *(list_native_fes_[index]); } mfem::ParFiniteElementSpace &NativeFes(Index index) { return *(list_native_fes_[index]); } - const mfem::ParGridFunction & SpatialCoordinates() const { return *spatial_coordinates_; } - mfem::ParGridFunction & SpatialCoordinates() { return *spatial_coordinates_; } + const mfem::ParGridFunction &SpatialCoordinates() const { return *spatial_coordinates_; } + mfem::ParGridFunction &SpatialCoordinates() { return *spatial_coordinates_; } const mfem::ParGridFunction &Field(Index index) const { return *(fields_[index]); } mfem::ParGridFunction &Field(Index index) { return *(fields_[index]); } From 248aa45b952eaf3277a0a175353d975c8bfdcab8 Mon Sep 17 00:00:00 2001 From: Umberto Villa Date: Fri, 5 Jan 2024 12:02:12 -0600 Subject: [PATCH 18/75] make style --- src/chemistry.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/chemistry.cpp b/src/chemistry.cpp index 45eab9c9c..10fa0190b 100644 --- a/src/chemistry.cpp +++ b/src/chemistry.cpp @@ -118,7 +118,7 @@ void Chemistry::setGridFunctionRates(const mfem::GridFunction &f) { } } -#if 0 +#if 0 void Chemistry::computeForwardRateCoeffs(const double &T_h, const double &T_e, Vector &kfwd) { kfwd.SetSize(numReactions_); From e45626218c2a85b3f8db1ee83467bf405932e759 Mon Sep 17 00:00:00 2001 From: Umberto Villa Date: Fri, 5 Jan 2024 23:03:50 -0600 Subject: [PATCH 19/75] Complete interface --- src/M2ulPhyS2Boltzmann.cpp | 15 +++-------- src/chemistry.cpp | 2 +- src/chemistry.hpp | 2 +- src/reaction.cpp | 9 ++++--- src/reaction.hpp | 3 ++- src/tps2Boltzmann.cpp | 55 ++++++++++++++++++++++++++++++++++++-- src/tps2Boltzmann.hpp | 7 +++++ 7 files changed, 73 insertions(+), 20 deletions(-) diff --git a/src/M2ulPhyS2Boltzmann.cpp b/src/M2ulPhyS2Boltzmann.cpp index 6d150d3e2..bf1a55801 100644 --- a/src/M2ulPhyS2Boltzmann.cpp +++ b/src/M2ulPhyS2Boltzmann.cpp @@ -84,15 +84,8 @@ void M2ulPhyS::push(TPS::Tps2Boltzmann &interface) { } void M2ulPhyS::fetch(TPS::Tps2Boltzmann &interface) { - mfem::ParaViewDataCollection paraview_dc("interface", mesh); - paraview_dc.SetPrefixPath("BoltzmannInterface"); - paraview_dc.SetCycle(0); - paraview_dc.SetDataFormat(VTKFormat::BINARY); - paraview_dc.SetTime(0.0); - paraview_dc.RegisterField("Heavy temperature", &interface.Field(TPS::Tps2Boltzmann::Index::HeavyTemperature)); - paraview_dc.RegisterField("Electron temperature", &interface.Field(TPS::Tps2Boltzmann::Index::ElectronTemperature)); - paraview_dc.RegisterField("Electric field", &interface.Field(TPS::Tps2Boltzmann::Index::ElectricField)); - paraview_dc.RegisterField("Species", &interface.Field(TPS::Tps2Boltzmann::Index::SpeciesDensities)); - paraview_dc.RegisterField("Reaction rates", &interface.Field(TPS::Tps2Boltzmann::Index::ReactionRates)); - paraview_dc.Save(); + mfem::ParFiniteElementSpace * reaction_rates_fes(&(interface.NativeFes(TPS::Tps2Boltzmann::Index::ReactionRates))); + std::shared_ptr reaction_rates(new mfem::ParGridFunction( reaction_rates_fes ) ); + interface.interpolateToNativeFES(*reaction_rates, TPS::Tps2Boltzmann::Index::ReactionRates); + chemistry_->setGridFunctionRates(reaction_rates); } diff --git a/src/chemistry.cpp b/src/chemistry.cpp index 10fa0190b..f233d2ed5 100644 --- a/src/chemistry.cpp +++ b/src/chemistry.cpp @@ -109,7 +109,7 @@ MFEM_HOST_DEVICE Chemistry::~Chemistry() { } } -void Chemistry::setGridFunctionRates(const mfem::GridFunction &f) { +void Chemistry::setGridFunctionRates(std::shared_ptr &f) { for (int r = 0; r < numReactions_; r++) { if (reactions_[r]->reactionModel == GRIDFUNCTION_RXN) { GridFunctionReaction *rx = dynamic_cast(reactions_[r]); diff --git a/src/chemistry.hpp b/src/chemistry.hpp index a80e5fb2b..0463f0f22 100644 --- a/src/chemistry.hpp +++ b/src/chemistry.hpp @@ -96,7 +96,7 @@ class Chemistry { MFEM_HOST_DEVICE ~Chemistry(); // Set the grid function rates for GRIDFUNCTION_RXN reaction types - void setGridFunctionRates(const mfem::GridFunction &f); + void setGridFunctionRates(std::shared_ptr &f); // return Vector of reaction rate coefficients, with the size of numReaction_. // WARNING(marc) I have removed "virtual" qualifier here assuming these functions will not diff --git a/src/reaction.cpp b/src/reaction.cpp index e2a5c952d..a8a984ef8 100644 --- a/src/reaction.cpp +++ b/src/reaction.cpp @@ -84,12 +84,13 @@ MFEM_HOST_DEVICE GridFunctionReaction::GridFunctionReaction(int comp) MFEM_HOST_DEVICE GridFunctionReaction::~GridFunctionReaction() {} -void GridFunctionReaction::setGridFunctionData(const mfem::GridFunction &f) { - assert(f.Size() >= (comp + 1) * f.FESpace()->GetNDofs()); +void GridFunctionReaction::setGridFunctionData(std::shared_ptr &f) { + f_ = f; + assert(f->Size() >= (comp + 1) * f->FESpace()->GetNDofs()); #ifdef _GPU_ - data = f.Read() + comp * f.FESpace()->GetNDofs(); + data = f_->Read() + comp * f_->FESpace()->GetNDofs(); #else - data = f.HostRead() + comp * f.FESpace()->GetNDofs(); + data = f_->HostRead() + comp * f_->FESpace()->GetNDofs(); #endif } diff --git a/src/reaction.hpp b/src/reaction.hpp index b8c666219..daf0406ed 100644 --- a/src/reaction.hpp +++ b/src/reaction.hpp @@ -120,6 +120,7 @@ class Tabulated : public Reaction { class GridFunctionReaction : public Reaction { private: + std::shared_ptr f_; const double *data; const int comp; @@ -128,7 +129,7 @@ class GridFunctionReaction : public Reaction { MFEM_HOST_DEVICE virtual ~GridFunctionReaction(); - void setGridFunctionData(const mfem::GridFunction &f); + void setGridFunctionData(std::shared_ptr &f); MFEM_HOST_DEVICE virtual double computeRateCoefficient([[maybe_unused]] const double &T_h, [[maybe_unused]] const double &T_e, const int &dofindex, diff --git a/src/tps2Boltzmann.cpp b/src/tps2Boltzmann.cpp index 8cb35d0c8..ea554f571 100644 --- a/src/tps2Boltzmann.cpp +++ b/src/tps2Boltzmann.cpp @@ -78,7 +78,12 @@ void idenity_fun(const Vector &x, Vector &out) { for (int i(0); i < x.Size(); ++i) out[i] = x[i]; } -Tps2Boltzmann::Tps2Boltzmann(Tps *tps) : NIndexes(7), tps_(tps), all_fes_(nullptr) { +Tps2Boltzmann::Tps2Boltzmann(Tps *tps) : + NIndexes(7), + tps_(tps), + all_fes_(nullptr), + save_to_paraview_dc(false), + paraview_dc(nullptr) { // Assert we have a couple solver; assert(tps->isFlowEMCoupled()); @@ -92,6 +97,8 @@ Tps2Boltzmann::Tps2Boltzmann(Tps *tps) : NIndexes(7), tps_(tps), all_fes_(nullpt tps->getRequiredInput("em/current_frequency", EfieldAngularFreq_); EfieldAngularFreq_ *= 2. * M_PI; + tps->getInput("boltzmannInterface/save_to_paraview", save_to_paraview_dc); + offsets.SetSize(NIndexes + 1); ncomps.SetSize(NIndexes + 1); } @@ -170,12 +177,28 @@ void Tps2Boltzmann::init(M2ulPhyS *flowSolver) { scalar_interpolator_->SetAssemblyLevel(assembly_level); scalar_interpolator_->Assemble(); + scalar_interpolator_to_nativeFES_= new mfem::ParDiscreteLinearOperator(scalar_fes_, scalar_native_fes_); + scalar_interpolator_to_nativeFES_->AddDomainInterpolator(new mfem::IdentityInterpolator()); + scalar_interpolator_to_nativeFES_->SetAssemblyLevel(assembly_level); + scalar_interpolator_to_nativeFES_->Assemble(); + // Spatial coordinates spatial_coord_fes_ = new mfem::ParFiniteElementSpace(pmesh, fec_, pmesh->Dimension(), mfem::Ordering::byNODES); spatial_coordinates_ = new mfem::ParGridFunction(spatial_coord_fes_); mfem::VectorFunctionCoefficient coord_fun(pmesh->Dimension(), std::function(idenity_fun)); spatial_coordinates_->ProjectCoefficient(coord_fun); + + if(save_to_paraview_dc) { + paraview_dc = new mfem::ParaViewDataCollection("interface", pmesh); + paraview_dc->SetPrefixPath("BoltzmannInterface"); + paraview_dc->SetDataFormat(VTKFormat::BINARY); + paraview_dc->RegisterField("Heavy temperature", &(this->Field(TPS::Tps2Boltzmann::Index::HeavyTemperature))); + paraview_dc->RegisterField("Electron temperature", &(this->Field(TPS::Tps2Boltzmann::Index::ElectronTemperature))); + paraview_dc->RegisterField("Electric field", &(this->Field(TPS::Tps2Boltzmann::Index::ElectricField))); + paraview_dc->RegisterField("Species", &(this->Field(TPS::Tps2Boltzmann::Index::SpeciesDensities))); + paraview_dc->RegisterField("Reaction rates", &(this->Field(TPS::Tps2Boltzmann::Index::ReactionRates))); + } } void Tps2Boltzmann::interpolateFromNativeFES(const ParGridFunction &input, Tps2Boltzmann::Index index) { @@ -193,6 +216,29 @@ void Tps2Boltzmann::interpolateFromNativeFES(const ParGridFunction &input, Tps2B } } +void Tps2Boltzmann::interpolateToNativeFES(ParGridFunction &output, Index index) { + if (ncomps[index] == 1) { + scalar_interpolator_to_nativeFES_->Mult(*(fields_[index]), output); + } else { + const int loc_size_native = list_native_fes_[index]->GetNDofs(); + const int loc_size = list_fes_[index]->GetNDofs(); + for (int icomp(0); icomp < ncomps[index]; ++icomp) { + mfem::Vector view_output(output, icomp * loc_size_native, + loc_size_native); + mfem::Vector view_field(*(fields_[index]), icomp * loc_size, loc_size); + scalar_interpolator_to_nativeFES_->Mult(view_field, view_output); + } + } +} + +void Tps2Boltzmann::saveDataCollection(int cycle, double time) { + if ( paraview_dc) { + paraview_dc->SetCycle(cycle); + paraview_dc->SetTime(time); + paraview_dc->Save(); + } +} + Tps2Boltzmann::~Tps2Boltzmann() { // Delete views for (std::size_t i(0); i < NIndexes + 1; ++i) delete fields_[i]; @@ -200,6 +246,7 @@ Tps2Boltzmann::~Tps2Boltzmann() { delete[] fields_; // Delete interpolators + delete scalar_interpolator_to_nativeFES_; delete scalar_interpolator_; // Delete view Native Finite Element Spaces @@ -285,7 +332,11 @@ void tps2bolzmann(py::module &m) { .def("EfieldAngularFreq", &TPS::Tps2Boltzmann::EfieldAngularFreq) .def("Nspecies", &TPS::Tps2Boltzmann::Nspecies) .def("NeFiledComps", &TPS::Tps2Boltzmann::NeFieldComps) - .def("nComponents", &TPS::Tps2Boltzmann::nComponents); + .def("nComponents", &TPS::Tps2Boltzmann::nComponents) + .def("saveDataCollection", + &TPS::Tps2Boltzmann::saveDataCollection, + "Save the data collection in Paraview format", + py::arg("cycle"), py::arg("time") ); } } // namespace tps_wrappers #endif diff --git a/src/tps2Boltzmann.hpp b/src/tps2Boltzmann.hpp index 91cbd1a54..c4138b755 100644 --- a/src/tps2Boltzmann.hpp +++ b/src/tps2Boltzmann.hpp @@ -106,6 +106,7 @@ class Tps2Boltzmann { mfem::ParGridFunction &Field(Index index) { return *(fields_[index]); } void interpolateFromNativeFES(const ParGridFunction &input, Index index); + void interpolateToNativeFES(ParGridFunction &output, Index index); //! Get the angular Frequency \omega of the electrical field: //! E(t) = Er*cos(\omega t) + Ei*sin(\omega t) @@ -114,6 +115,8 @@ class Tps2Boltzmann { int NeFieldComps() const { return nEfieldComps_; } int nComponents(Index index) const { return ncomps[index]; } + void saveDataCollection(int cycle, double time); + ~Tps2Boltzmann(); private: @@ -149,12 +152,16 @@ class Tps2Boltzmann { //! Linear interpolator between native TPS fec to Interface fec mfem::ParDiscreteLinearOperator *scalar_interpolator_; + mfem::ParDiscreteLinearOperator *scalar_interpolator_to_nativeFES_; //! array of fields see *Index for how to address this mfem::ParGridFunction **fields_; mfem::ParGridFunction *spatial_coordinates_; double EfieldAngularFreq_; + + bool save_to_paraview_dc; + mfem::ParaViewDataCollection * paraview_dc; }; } // namespace TPS From baddac5de2835fc578fe5c274b3fc8d9e1ca6479 Mon Sep 17 00:00:00 2001 From: Umberto Villa Date: Fri, 5 Jan 2024 23:04:58 -0600 Subject: [PATCH 20/75] make enforcestyle --- src/M2ulPhyS2Boltzmann.cpp | 4 ++-- src/tps2Boltzmann.cpp | 39 ++++++++++++++++---------------------- src/tps2Boltzmann.hpp | 2 +- 3 files changed, 19 insertions(+), 26 deletions(-) diff --git a/src/M2ulPhyS2Boltzmann.cpp b/src/M2ulPhyS2Boltzmann.cpp index bf1a55801..4fa594e5e 100644 --- a/src/M2ulPhyS2Boltzmann.cpp +++ b/src/M2ulPhyS2Boltzmann.cpp @@ -84,8 +84,8 @@ void M2ulPhyS::push(TPS::Tps2Boltzmann &interface) { } void M2ulPhyS::fetch(TPS::Tps2Boltzmann &interface) { - mfem::ParFiniteElementSpace * reaction_rates_fes(&(interface.NativeFes(TPS::Tps2Boltzmann::Index::ReactionRates))); - std::shared_ptr reaction_rates(new mfem::ParGridFunction( reaction_rates_fes ) ); + mfem::ParFiniteElementSpace *reaction_rates_fes(&(interface.NativeFes(TPS::Tps2Boltzmann::Index::ReactionRates))); + std::shared_ptr reaction_rates(new mfem::ParGridFunction(reaction_rates_fes)); interface.interpolateToNativeFES(*reaction_rates, TPS::Tps2Boltzmann::Index::ReactionRates); chemistry_->setGridFunctionRates(reaction_rates); } diff --git a/src/tps2Boltzmann.cpp b/src/tps2Boltzmann.cpp index ea554f571..504c4560f 100644 --- a/src/tps2Boltzmann.cpp +++ b/src/tps2Boltzmann.cpp @@ -78,12 +78,8 @@ void idenity_fun(const Vector &x, Vector &out) { for (int i(0); i < x.Size(); ++i) out[i] = x[i]; } -Tps2Boltzmann::Tps2Boltzmann(Tps *tps) : - NIndexes(7), - tps_(tps), - all_fes_(nullptr), - save_to_paraview_dc(false), - paraview_dc(nullptr) { +Tps2Boltzmann::Tps2Boltzmann(Tps *tps) + : NIndexes(7), tps_(tps), all_fes_(nullptr), save_to_paraview_dc(false), paraview_dc(nullptr) { // Assert we have a couple solver; assert(tps->isFlowEMCoupled()); @@ -177,7 +173,7 @@ void Tps2Boltzmann::init(M2ulPhyS *flowSolver) { scalar_interpolator_->SetAssemblyLevel(assembly_level); scalar_interpolator_->Assemble(); - scalar_interpolator_to_nativeFES_= new mfem::ParDiscreteLinearOperator(scalar_fes_, scalar_native_fes_); + scalar_interpolator_to_nativeFES_ = new mfem::ParDiscreteLinearOperator(scalar_fes_, scalar_native_fes_); scalar_interpolator_to_nativeFES_->AddDomainInterpolator(new mfem::IdentityInterpolator()); scalar_interpolator_to_nativeFES_->SetAssemblyLevel(assembly_level); scalar_interpolator_to_nativeFES_->Assemble(); @@ -189,15 +185,15 @@ void Tps2Boltzmann::init(M2ulPhyS *flowSolver) { std::function(idenity_fun)); spatial_coordinates_->ProjectCoefficient(coord_fun); - if(save_to_paraview_dc) { - paraview_dc = new mfem::ParaViewDataCollection("interface", pmesh); - paraview_dc->SetPrefixPath("BoltzmannInterface"); - paraview_dc->SetDataFormat(VTKFormat::BINARY); - paraview_dc->RegisterField("Heavy temperature", &(this->Field(TPS::Tps2Boltzmann::Index::HeavyTemperature))); - paraview_dc->RegisterField("Electron temperature", &(this->Field(TPS::Tps2Boltzmann::Index::ElectronTemperature))); - paraview_dc->RegisterField("Electric field", &(this->Field(TPS::Tps2Boltzmann::Index::ElectricField))); - paraview_dc->RegisterField("Species", &(this->Field(TPS::Tps2Boltzmann::Index::SpeciesDensities))); - paraview_dc->RegisterField("Reaction rates", &(this->Field(TPS::Tps2Boltzmann::Index::ReactionRates))); + if (save_to_paraview_dc) { + paraview_dc = new mfem::ParaViewDataCollection("interface", pmesh); + paraview_dc->SetPrefixPath("BoltzmannInterface"); + paraview_dc->SetDataFormat(VTKFormat::BINARY); + paraview_dc->RegisterField("Heavy temperature", &(this->Field(TPS::Tps2Boltzmann::Index::HeavyTemperature))); + paraview_dc->RegisterField("Electron temperature", &(this->Field(TPS::Tps2Boltzmann::Index::ElectronTemperature))); + paraview_dc->RegisterField("Electric field", &(this->Field(TPS::Tps2Boltzmann::Index::ElectricField))); + paraview_dc->RegisterField("Species", &(this->Field(TPS::Tps2Boltzmann::Index::SpeciesDensities))); + paraview_dc->RegisterField("Reaction rates", &(this->Field(TPS::Tps2Boltzmann::Index::ReactionRates))); } } @@ -223,8 +219,7 @@ void Tps2Boltzmann::interpolateToNativeFES(ParGridFunction &output, Index index) const int loc_size_native = list_native_fes_[index]->GetNDofs(); const int loc_size = list_fes_[index]->GetNDofs(); for (int icomp(0); icomp < ncomps[index]; ++icomp) { - mfem::Vector view_output(output, icomp * loc_size_native, - loc_size_native); + mfem::Vector view_output(output, icomp * loc_size_native, loc_size_native); mfem::Vector view_field(*(fields_[index]), icomp * loc_size, loc_size); scalar_interpolator_to_nativeFES_->Mult(view_field, view_output); } @@ -232,7 +227,7 @@ void Tps2Boltzmann::interpolateToNativeFES(ParGridFunction &output, Index index) } void Tps2Boltzmann::saveDataCollection(int cycle, double time) { - if ( paraview_dc) { + if (paraview_dc) { paraview_dc->SetCycle(cycle); paraview_dc->SetTime(time); paraview_dc->Save(); @@ -333,10 +328,8 @@ void tps2bolzmann(py::module &m) { .def("Nspecies", &TPS::Tps2Boltzmann::Nspecies) .def("NeFiledComps", &TPS::Tps2Boltzmann::NeFieldComps) .def("nComponents", &TPS::Tps2Boltzmann::nComponents) - .def("saveDataCollection", - &TPS::Tps2Boltzmann::saveDataCollection, - "Save the data collection in Paraview format", - py::arg("cycle"), py::arg("time") ); + .def("saveDataCollection", &TPS::Tps2Boltzmann::saveDataCollection, "Save the data collection in Paraview format", + py::arg("cycle"), py::arg("time")); } } // namespace tps_wrappers #endif diff --git a/src/tps2Boltzmann.hpp b/src/tps2Boltzmann.hpp index c4138b755..71649d8d6 100644 --- a/src/tps2Boltzmann.hpp +++ b/src/tps2Boltzmann.hpp @@ -161,7 +161,7 @@ class Tps2Boltzmann { double EfieldAngularFreq_; bool save_to_paraview_dc; - mfem::ParaViewDataCollection * paraview_dc; + mfem::ParaViewDataCollection *paraview_dc; }; } // namespace TPS From 804da189125753dabc182771a3fecd66b141b3cb Mon Sep 17 00:00:00 2001 From: Umberto Villa Date: Sat, 6 Jan 2024 21:45:04 -0600 Subject: [PATCH 21/75] Small edits --- src/M2ulPhyS.cpp | 5 +++++ test/vpath.sh | 3 ++- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/src/M2ulPhyS.cpp b/src/M2ulPhyS.cpp index f529d9019..d858477ec 100644 --- a/src/M2ulPhyS.cpp +++ b/src/M2ulPhyS.cpp @@ -3319,6 +3319,11 @@ void M2ulPhyS::parseReactionInputs() { config.reactionModels[r - 1] = TABULATED_RXN; std::string inputPath(basepath + "/tabulated"); readTable(inputPath, config.chemistryInput.reactionInputs[r - 1].tableInput); + } else if (model == "bte") { + config.reactionModels[r - 1] = GRIDFUNCTION_RXN; + int index; + tpsP->getRequiredInput((basepath + "index").c_str(), index); + config.chemistryInput.reactionInputs[r - 1].indexInput = index; } else { grvy_printf(GRVY_ERROR, "\nUnknown reaction_model -> %s", model.c_str()); exit(ERROR); diff --git a/test/vpath.sh b/test/vpath.sh index a98808e8c..af1e5eead 100755 --- a/test/vpath.sh +++ b/test/vpath.sh @@ -30,7 +30,8 @@ if [ ! -d ref_solns ];then fi # necessary binaries -binaries="bats die.sh soln_differ count_gpus.sh sniff_mpirun.sh ../src/tps.py ../src/tps-time-loop.py ../test/test_tps_splitcomm.py" +binaries="bats die.sh soln_differ count_gpus.sh sniff_mpirun.sh " +binaries+="../src/tps.py ../src/tps-time-loop.py ../src/tps-bte_0d3v.py ../test/test_tps_splitcomm.py" for binary in $binaries; do if [ ! -x $binary ];then if [ -x $testDir/$binary ];then From 1bb3216091a915aa65c689e2348a1ddb5cdd9eca Mon Sep 17 00:00:00 2001 From: Umberto Villa Date: Sun, 7 Jan 2024 09:54:50 -0600 Subject: [PATCH 22/75] Let GridFunctionReaction return 0 if the gridfunction is not set.\n Use Arrhenius rates in the Mock Boltzmann solver --- src/reaction.cpp | 5 ++++- src/tps-time-loop.py | 35 +++++++++++++++++++++++------------ test/vpath.sh | 2 +- 3 files changed, 28 insertions(+), 14 deletions(-) diff --git a/src/reaction.cpp b/src/reaction.cpp index a8a984ef8..ae322c934 100644 --- a/src/reaction.cpp +++ b/src/reaction.cpp @@ -98,5 +98,8 @@ MFEM_HOST_DEVICE double GridFunctionReaction::computeRateCoefficient([[maybe_unu [[maybe_unused]] const double &T_e, const int &dofindex, [[maybe_unused]] const bool isElectronInvolved) { - return data[dofindex]; + if(data) + return data[dofindex]; + else + return 0.; } diff --git a/src/tps-time-loop.py b/src/tps-time-loop.py index 9bbc719d0..ce5a398a6 100755 --- a/src/tps-time-loop.py +++ b/src/tps-time-loop.py @@ -5,25 +5,36 @@ from mpi4py import MPI -class BoltzmannMockSolver: +class ArrheniusSolver: def __init__(self): - pass + self.UNIVERSALGASCONSTANT = 8.3144598; # J * mol^(-1) * K^(-1) + self.species_densities = None + self.efield = None + self.heavy_temperature = None + self.reaction_rates = [None, None] + #Reaction 1: 'Ar + E => Ar.+1 + 2 E', + #Reaction 2: 'Ar.+1 + 2 E => Ar + E' + self.A = [74072.331348, 5.66683445516e-20] + self.b = [1.511, 0.368] + self.E = [1176329.772504, -377725.908714] # [J/mol] def fetch(self, interface): - species_densities = np.array(interface.HostRead(libtps.t2bIndex.SpeciesDensities), copy=False) - efield = np.array(interface.HostRead(libtps.t2bIndex.ElectricField), copy=False) - heavy_temperature = np.array(interface.HostRead(libtps.t2bIndex.HeavyTemperature), copy=False) + self.species_densities = np.array(interface.HostRead(libtps.t2bIndex.SpeciesDensities), copy=False) + self.efield = np.array(interface.HostRead(libtps.t2bIndex.ElectricField), copy=False) + self.heavy_temperature = np.array(interface.HostRead(libtps.t2bIndex.HeavyTemperature), copy=False) + - print("|| species_densities ||_2 = ", np.linalg.norm(species_densities) ) - print("|| efield ||_2 = ", np.linalg.norm(efield) ) - print("||heavy_temperature||_2 = ", np.linalg.norm(heavy_temperature) ) def solve(self): - pass + #A_ * pow(temp, b_) * exp(-E_ / UNIVERSALGASCONSTANT / temp); + self.reaction_rates = [A * np.pow(self.heavy_temperature, b) * + np.exp(-E/(self.UNIVERSALGASCONSTANT * self.heavy_temperature)) + for A,b,E in zip(self.A, self.b, self.E) ] def push(self, interface): - electron_temperature = np.array(interface.HostWrite(libtps.t2bIndex.ElectronTemperature), copy=False) - electron_temperature[:] = 1. + rates = np.array(interface.HostWrite(libtps.t2bIndex.ReactionRates), copy=False) + rates[0:self.heavy_temperature.shape[0]] = self.reaction_rates[0] + rates[self.heavy_temperature.shape[0]:] = self.reaction_rates[1] @@ -42,7 +53,7 @@ def push(self, interface): tps.chooseSolver() tps.initialize() -boltzmann = BoltzmannMockSolver() +boltzmann = ArrheniusSolver() interface = libtps.Tps2Boltzmann(tps) tps.initInterface(interface) diff --git a/test/vpath.sh b/test/vpath.sh index af1e5eead..26d070bc1 100755 --- a/test/vpath.sh +++ b/test/vpath.sh @@ -31,7 +31,7 @@ fi # necessary binaries binaries="bats die.sh soln_differ count_gpus.sh sniff_mpirun.sh " -binaries+="../src/tps.py ../src/tps-time-loop.py ../src/tps-bte_0d3v.py ../test/test_tps_splitcomm.py" +binaries+="../src/tps.py ../src/tps-time-loop.py ../cdsrc/tps-bte_0d3v.py ../test/test_tps_splitcomm.py" for binary in $binaries; do if [ ! -x $binary ];then if [ -x $testDir/$binary ];then From 89c3b54fd741a01bb8dc3eeaa03d9e8d0faf95c7 Mon Sep 17 00:00:00 2001 From: Umberto Villa Date: Mon, 8 Jan 2024 10:34:31 -0600 Subject: [PATCH 23/75] Few extra tweaks, including computing the number of reaction involving BTE --- src/M2ulPhyS.cpp | 2 +- src/tps-time-loop.py | 14 ++++++++++---- src/tps2Boltzmann.cpp | 24 +++++++++++++++++++++--- src/tps2Boltzmann.hpp | 3 +++ test/inputs/coupled-3d-boltzmann.ini | 2 +- 5 files changed, 36 insertions(+), 9 deletions(-) diff --git a/src/M2ulPhyS.cpp b/src/M2ulPhyS.cpp index d858477ec..e441b9f08 100644 --- a/src/M2ulPhyS.cpp +++ b/src/M2ulPhyS.cpp @@ -3322,7 +3322,7 @@ void M2ulPhyS::parseReactionInputs() { } else if (model == "bte") { config.reactionModels[r - 1] = GRIDFUNCTION_RXN; int index; - tpsP->getRequiredInput((basepath + "index").c_str(), index); + tpsP->getRequiredInput((basepath + "bte/index").c_str(), index); config.chemistryInput.reactionInputs[r - 1].indexInput = index; } else { grvy_printf(GRVY_ERROR, "\nUnknown reaction_model -> %s", model.c_str()); diff --git a/src/tps-time-loop.py b/src/tps-time-loop.py index ce5a398a6..3038ac043 100755 --- a/src/tps-time-loop.py +++ b/src/tps-time-loop.py @@ -19,6 +19,9 @@ def __init__(self): self.E = [1176329.772504, -377725.908714] # [J/mol] def fetch(self, interface): + n_reactions =interface.nComponents(libtps.t2bIndex.ReactionRates) + for r in range(n_reactions): + print("Reaction ", r+1, ": ", interface.getReactionEquation(r)) self.species_densities = np.array(interface.HostRead(libtps.t2bIndex.SpeciesDensities), copy=False) self.efield = np.array(interface.HostRead(libtps.t2bIndex.ElectricField), copy=False) self.heavy_temperature = np.array(interface.HostRead(libtps.t2bIndex.HeavyTemperature), copy=False) @@ -27,14 +30,16 @@ def fetch(self, interface): def solve(self): #A_ * pow(temp, b_) * exp(-E_ / UNIVERSALGASCONSTANT / temp); - self.reaction_rates = [A * np.pow(self.heavy_temperature, b) * + self.reaction_rates = [A * np.power(self.heavy_temperature, b) * np.exp(-E/(self.UNIVERSALGASCONSTANT * self.heavy_temperature)) for A,b,E in zip(self.A, self.b, self.E) ] def push(self, interface): - rates = np.array(interface.HostWrite(libtps.t2bIndex.ReactionRates), copy=False) - rates[0:self.heavy_temperature.shape[0]] = self.reaction_rates[0] - rates[self.heavy_temperature.shape[0]:] = self.reaction_rates[1] + n_reactions =interface.nComponents(libtps.t2bIndex.ReactionRates) + if n_reactions >= 2: + rates = np.array(interface.HostWrite(libtps.t2bIndex.ReactionRates), copy=False) + rates[0:self.heavy_temperature.shape[0]] = self.reaction_rates[0] + rates[self.heavy_temperature.shape[0]:] = self.reaction_rates[1] @@ -69,6 +74,7 @@ def push(self, interface): boltzmann.fetch(interface) boltzmann.solve() boltzmann.push(interface) + interface.saveDataCollection(cycle=it, time=it) tps.fetch(interface) it = it+1 diff --git a/src/tps2Boltzmann.cpp b/src/tps2Boltzmann.cpp index 504c4560f..69eebf8f1 100644 --- a/src/tps2Boltzmann.cpp +++ b/src/tps2Boltzmann.cpp @@ -84,8 +84,7 @@ Tps2Boltzmann::Tps2Boltzmann(Tps *tps) assert(tps->isFlowEMCoupled()); tps->getRequiredInput("species/numSpecies", nspecies_); - // TODO(Umberto): Get the number of reactions for the solver - tps->getRequiredInput("boltzmannInterface/nreactions", nreactions_); + nreactions_ = _countBTEReactions(); tps->getRequiredInput("boltzmannInterface/order", order_); tps->getRequiredInput("boltzmannInterface/basisType", basis_type_); assert(basis_type_ == 0 || basis_type_ == 1); @@ -99,6 +98,24 @@ Tps2Boltzmann::Tps2Boltzmann(Tps *tps) ncomps.SetSize(NIndexes + 1); } +int Tps2Boltzmann::_countBTEReactions() { + int total_reactions(0); + int bte_reactions(0); + tps_->getRequiredInput("reactions/number_of_reactions", total_reactions); + reaction_eqs_.reserve(total_reactions); + for ( int r(0); rgetRequiredInput((basepath + "/equation").c_str(), equation); + tps_->getRequiredInput((basepath + "/model").c_str(), model); + if ( model == "bte" ) { + ++bte_reactions; + reaction_eqs_.push_back(equation); + } + } + return bte_reactions; +} + void Tps2Boltzmann::init(M2ulPhyS *flowSolver) { std::cout << "Tps2Boltzmann::init is called" << std::endl; mfem::ParMesh *pmesh(flowSolver->GetMesh()); @@ -329,7 +346,8 @@ void tps2bolzmann(py::module &m) { .def("NeFiledComps", &TPS::Tps2Boltzmann::NeFieldComps) .def("nComponents", &TPS::Tps2Boltzmann::nComponents) .def("saveDataCollection", &TPS::Tps2Boltzmann::saveDataCollection, "Save the data collection in Paraview format", - py::arg("cycle"), py::arg("time")); + py::arg("cycle"), py::arg("time")) + .def("getReactionEquation", &TPS::Tps2Boltzmann::getReactionEquation, "Return the equation of the reaction", py::arg("index")); } } // namespace tps_wrappers #endif diff --git a/src/tps2Boltzmann.hpp b/src/tps2Boltzmann.hpp index 71649d8d6..82d4e8ee7 100644 --- a/src/tps2Boltzmann.hpp +++ b/src/tps2Boltzmann.hpp @@ -114,12 +114,14 @@ class Tps2Boltzmann { int Nspecies() const { return nspecies_; } int NeFieldComps() const { return nEfieldComps_; } int nComponents(Index index) const { return ncomps[index]; } + std::string getReactionEquation(int index) const { return reaction_eqs_[index]; } void saveDataCollection(int cycle, double time); ~Tps2Boltzmann(); private: + int _countBTEReactions(); Tps *tps_; int nspecies_; @@ -162,6 +164,7 @@ class Tps2Boltzmann { bool save_to_paraview_dc; mfem::ParaViewDataCollection *paraview_dc; + std::vector reaction_eqs_; }; } // namespace TPS diff --git a/test/inputs/coupled-3d-boltzmann.ini b/test/inputs/coupled-3d-boltzmann.ini index 9764096b2..980e62cde 100644 --- a/test/inputs/coupled-3d-boltzmann.ini +++ b/test/inputs/coupled-3d-boltzmann.ini @@ -141,6 +141,6 @@ permeability = 1.25663706e-6 # m * kg / s^2 / A^2 preconditioner_background_sigma = 0.01 [boltzmannInterface] -nreactions = 1 +save_to_paraview = true order = 0 basisType = 0 \ No newline at end of file From 062a1eda649171fdfbb7a0d77b0b4bd2966831be Mon Sep 17 00:00:00 2001 From: Umberto Villa Date: Mon, 8 Jan 2024 10:36:25 -0600 Subject: [PATCH 24/75] make enforcestyle --- src/reaction.cpp | 2 +- src/tps2Boltzmann.cpp | 9 +++++---- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/src/reaction.cpp b/src/reaction.cpp index ae322c934..215f55015 100644 --- a/src/reaction.cpp +++ b/src/reaction.cpp @@ -98,7 +98,7 @@ MFEM_HOST_DEVICE double GridFunctionReaction::computeRateCoefficient([[maybe_unu [[maybe_unused]] const double &T_e, const int &dofindex, [[maybe_unused]] const bool isElectronInvolved) { - if(data) + if (data) return data[dofindex]; else return 0.; diff --git a/src/tps2Boltzmann.cpp b/src/tps2Boltzmann.cpp index 69eebf8f1..9c13517e0 100644 --- a/src/tps2Boltzmann.cpp +++ b/src/tps2Boltzmann.cpp @@ -103,12 +103,12 @@ int Tps2Boltzmann::_countBTEReactions() { int bte_reactions(0); tps_->getRequiredInput("reactions/number_of_reactions", total_reactions); reaction_eqs_.reserve(total_reactions); - for ( int r(0); rgetRequiredInput((basepath + "/equation").c_str(), equation); tps_->getRequiredInput((basepath + "/model").c_str(), model); - if ( model == "bte" ) { + if (model == "bte") { ++bte_reactions; reaction_eqs_.push_back(equation); } @@ -347,7 +347,8 @@ void tps2bolzmann(py::module &m) { .def("nComponents", &TPS::Tps2Boltzmann::nComponents) .def("saveDataCollection", &TPS::Tps2Boltzmann::saveDataCollection, "Save the data collection in Paraview format", py::arg("cycle"), py::arg("time")) - .def("getReactionEquation", &TPS::Tps2Boltzmann::getReactionEquation, "Return the equation of the reaction", py::arg("index")); + .def("getReactionEquation", &TPS::Tps2Boltzmann::getReactionEquation, "Return the equation of the reaction", + py::arg("index")); } } // namespace tps_wrappers #endif From c94674c15ed192615841009fbe3092b1706eeac2 Mon Sep 17 00:00:00 2001 From: Umberto Villa Date: Mon, 8 Jan 2024 12:11:07 -0600 Subject: [PATCH 25/75] Bugfixes --- src/M2ulPhyS.cpp | 4 ++-- src/reaction.cpp | 16 ++++++++++------ src/reaction.hpp | 1 + src/tps-time-loop.py | 3 +++ src/tps2Boltzmann.cpp | 2 +- 5 files changed, 17 insertions(+), 9 deletions(-) diff --git a/src/M2ulPhyS.cpp b/src/M2ulPhyS.cpp index e441b9f08..99c56f144 100644 --- a/src/M2ulPhyS.cpp +++ b/src/M2ulPhyS.cpp @@ -3322,7 +3322,7 @@ void M2ulPhyS::parseReactionInputs() { } else if (model == "bte") { config.reactionModels[r - 1] = GRIDFUNCTION_RXN; int index; - tpsP->getRequiredInput((basepath + "bte/index").c_str(), index); + tpsP->getRequiredInput((basepath + "/bte/index").c_str(), index); config.chemistryInput.reactionInputs[r - 1].indexInput = index; } else { grvy_printf(GRVY_ERROR, "\nUnknown reaction_model -> %s", model.c_str()); @@ -3442,7 +3442,7 @@ void M2ulPhyS::parseReactionInputs() { config.equilibriumConstantParams[p + r * gpudata::MAXCHEMPARAMS]; } - if (config.reactionModels[r] != TABULATED_RXN) { + if (config.reactionModels[r] == ARRHENIUS || config.reactionModels[r] == HOFFERTLIEN) { assert(rxn_param_idx < config.rxnModelParamsHost.size()); config.chemistryInput.reactionInputs[r].modelParams = config.rxnModelParamsHost[rxn_param_idx].Read(); rxn_param_idx += 1; diff --git a/src/reaction.cpp b/src/reaction.cpp index 215f55015..6ca9355f6 100644 --- a/src/reaction.cpp +++ b/src/reaction.cpp @@ -80,17 +80,19 @@ MFEM_HOST_DEVICE double Tabulated::computeRateCoefficient(const double &T_h, con } MFEM_HOST_DEVICE GridFunctionReaction::GridFunctionReaction(int comp) - : Reaction(GRIDFUNCTION_RXN), data(nullptr), comp(comp) {} + : Reaction(GRIDFUNCTION_RXN), data(nullptr), comp(comp), size_(0) {} MFEM_HOST_DEVICE GridFunctionReaction::~GridFunctionReaction() {} void GridFunctionReaction::setGridFunctionData(std::shared_ptr &f) { f_ = f; - assert(f->Size() >= (comp + 1) * f->FESpace()->GetNDofs()); + size_ = f->FESpace()->GetNDofs(); + assert(comp < f->FESpace()->GetVDim() ); + assert(f->FESpace()->GetOrdering() == mfem::Ordering::byNodes); #ifdef _GPU_ - data = f_->Read() + comp * f_->FESpace()->GetNDofs(); + data = f_->Read() + comp * size_; #else - data = f_->HostRead() + comp * f_->FESpace()->GetNDofs(); + data = f_->HostRead() + comp * size_; #endif } @@ -98,8 +100,10 @@ MFEM_HOST_DEVICE double GridFunctionReaction::computeRateCoefficient([[maybe_unu [[maybe_unused]] const double &T_e, const int &dofindex, [[maybe_unused]] const bool isElectronInvolved) { - if (data) + if (data) { + assert(dofindex < size_) return data[dofindex]; - else + } + else return 0.; } diff --git a/src/reaction.hpp b/src/reaction.hpp index daf0406ed..623312ca0 100644 --- a/src/reaction.hpp +++ b/src/reaction.hpp @@ -123,6 +123,7 @@ class GridFunctionReaction : public Reaction { std::shared_ptr f_; const double *data; const int comp; + int size_; public: MFEM_HOST_DEVICE GridFunctionReaction(int comp); diff --git a/src/tps-time-loop.py b/src/tps-time-loop.py index 3038ac043..af2efc6c8 100755 --- a/src/tps-time-loop.py +++ b/src/tps-time-loop.py @@ -26,6 +26,9 @@ def fetch(self, interface): self.efield = np.array(interface.HostRead(libtps.t2bIndex.ElectricField), copy=False) self.heavy_temperature = np.array(interface.HostRead(libtps.t2bIndex.HeavyTemperature), copy=False) + efieldAngularFreq = interface.EfieldAngularFreq() + print("Electric field angular frequency: ", efieldAngularFreq) + def solve(self): diff --git a/src/tps2Boltzmann.cpp b/src/tps2Boltzmann.cpp index 9c13517e0..b2cceba63 100644 --- a/src/tps2Boltzmann.cpp +++ b/src/tps2Boltzmann.cpp @@ -92,7 +92,7 @@ Tps2Boltzmann::Tps2Boltzmann(Tps *tps) tps->getRequiredInput("em/current_frequency", EfieldAngularFreq_); EfieldAngularFreq_ *= 2. * M_PI; - tps->getInput("boltzmannInterface/save_to_paraview", save_to_paraview_dc); + save_to_paraview_dc = tps->getInput("boltzmannInterface/save_to_paraview", false); offsets.SetSize(NIndexes + 1); ncomps.SetSize(NIndexes + 1); From e65671a2ca95d25f897c90262ac2b13c22f6efcb Mon Sep 17 00:00:00 2001 From: Umberto Villa Date: Mon, 8 Jan 2024 12:18:55 -0600 Subject: [PATCH 26/75] Fix compilation typos --- src/reaction.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/reaction.cpp b/src/reaction.cpp index 6ca9355f6..533ee4b5e 100644 --- a/src/reaction.cpp +++ b/src/reaction.cpp @@ -88,7 +88,7 @@ void GridFunctionReaction::setGridFunctionData(std::shared_ptrFESpace()->GetNDofs(); assert(comp < f->FESpace()->GetVDim() ); - assert(f->FESpace()->GetOrdering() == mfem::Ordering::byNodes); + assert(f->FESpace()->GetOrdering() == mfem::Ordering::byNODES); #ifdef _GPU_ data = f_->Read() + comp * size_; #else @@ -101,7 +101,7 @@ MFEM_HOST_DEVICE double GridFunctionReaction::computeRateCoefficient([[maybe_unu const int &dofindex, [[maybe_unused]] const bool isElectronInvolved) { if (data) { - assert(dofindex < size_) + assert(dofindex < size_); return data[dofindex]; } else From 5ac56172e3b27df5023191cf31a8b81712152af6 Mon Sep 17 00:00:00 2001 From: Umberto Villa Date: Mon, 8 Jan 2024 12:20:56 -0600 Subject: [PATCH 27/75] make enforcestyle --- src/reaction.cpp | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/reaction.cpp b/src/reaction.cpp index 533ee4b5e..7ae59707a 100644 --- a/src/reaction.cpp +++ b/src/reaction.cpp @@ -87,7 +87,7 @@ MFEM_HOST_DEVICE GridFunctionReaction::~GridFunctionReaction() {} void GridFunctionReaction::setGridFunctionData(std::shared_ptr &f) { f_ = f; size_ = f->FESpace()->GetNDofs(); - assert(comp < f->FESpace()->GetVDim() ); + assert(comp < f->FESpace()->GetVDim()); assert(f->FESpace()->GetOrdering() == mfem::Ordering::byNODES); #ifdef _GPU_ data = f_->Read() + comp * size_; @@ -103,7 +103,6 @@ MFEM_HOST_DEVICE double GridFunctionReaction::computeRateCoefficient([[maybe_unu if (data) { assert(dofindex < size_); return data[dofindex]; - } - else + } else return 0.; } From bb2e7c8f0bdc47517113a546c5353edcdc068cde Mon Sep 17 00:00:00 2001 From: Umberto Villa Date: Mon, 8 Jan 2024 12:22:25 -0600 Subject: [PATCH 28/75] make style --- src/reaction.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/reaction.cpp b/src/reaction.cpp index 7ae59707a..ac8affe82 100644 --- a/src/reaction.cpp +++ b/src/reaction.cpp @@ -103,6 +103,7 @@ MFEM_HOST_DEVICE double GridFunctionReaction::computeRateCoefficient([[maybe_unu if (data) { assert(dofindex < size_); return data[dofindex]; - } else + } else { return 0.; + } } From b15431237ac963e3dc834f83d23406a6b7682060 Mon Sep 17 00:00:00 2001 From: Umberto Villa Date: Mon, 8 Jan 2024 14:57:02 -0600 Subject: [PATCH 29/75] Avoid compilation error on Tioga --- src/reaction.cpp | 4 ++-- src/reaction.hpp | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/reaction.cpp b/src/reaction.cpp index ac8affe82..ed2a0748f 100644 --- a/src/reaction.cpp +++ b/src/reaction.cpp @@ -79,10 +79,10 @@ MFEM_HOST_DEVICE double Tabulated::computeRateCoefficient(const double &T_h, con return table_->eval(temp); } -MFEM_HOST_DEVICE GridFunctionReaction::GridFunctionReaction(int comp) +GridFunctionReaction::GridFunctionReaction(int comp) : Reaction(GRIDFUNCTION_RXN), data(nullptr), comp(comp), size_(0) {} -MFEM_HOST_DEVICE GridFunctionReaction::~GridFunctionReaction() {} +GridFunctionReaction::~GridFunctionReaction() {} void GridFunctionReaction::setGridFunctionData(std::shared_ptr &f) { f_ = f; diff --git a/src/reaction.hpp b/src/reaction.hpp index 623312ca0..3b3c27e73 100644 --- a/src/reaction.hpp +++ b/src/reaction.hpp @@ -126,9 +126,9 @@ class GridFunctionReaction : public Reaction { int size_; public: - MFEM_HOST_DEVICE GridFunctionReaction(int comp); + GridFunctionReaction(int comp); - MFEM_HOST_DEVICE virtual ~GridFunctionReaction(); + virtual ~GridFunctionReaction(); void setGridFunctionData(std::shared_ptr &f); From 1771a7968fd5f3da8cec8e89f9013f74852bdc84 Mon Sep 17 00:00:00 2001 From: Umberto Villa Date: Mon, 8 Jan 2024 15:02:55 -0600 Subject: [PATCH 30/75] Avoid compilation error on Tioga --- src/chemistry.cpp | 2 +- src/chemistry.hpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/chemistry.cpp b/src/chemistry.cpp index f233d2ed5..9f64284de 100644 --- a/src/chemistry.cpp +++ b/src/chemistry.cpp @@ -37,7 +37,7 @@ using namespace std; Chemistry::Chemistry(GasMixture *mixture, RunConfiguration &config) : Chemistry(mixture, config.chemistryInput) {} -MFEM_HOST_DEVICE Chemistry::Chemistry(GasMixture *mixture, const ChemistryInput &inputs) : mixture_(mixture) { +Chemistry::Chemistry(GasMixture *mixture, const ChemistryInput &inputs) : mixture_(mixture) { numEquations_ = mixture->GetNumEquations(); numSpecies_ = mixture->GetNumSpecies(); numActiveSpecies_ = mixture->GetNumActiveSpecies(); diff --git a/src/chemistry.hpp b/src/chemistry.hpp index 0463f0f22..4c190a3f6 100644 --- a/src/chemistry.hpp +++ b/src/chemistry.hpp @@ -91,7 +91,7 @@ class Chemistry { public: Chemistry(GasMixture *mixture, RunConfiguration &config); - MFEM_HOST_DEVICE Chemistry(GasMixture *mixture, const ChemistryInput &inputs); + Chemistry(GasMixture *mixture, const ChemistryInput &inputs); MFEM_HOST_DEVICE ~Chemistry(); From e7d37558b9c29fc8c72a9a887373688f66b8e11d Mon Sep 17 00:00:00 2001 From: Umberto Villa Date: Mon, 8 Jan 2024 15:13:57 -0600 Subject: [PATCH 31/75] Restore MFEM_HOST_DEVICE in the constructor/deconstructor of Reaction/Chemestry --- src/chemistry.cpp | 2 +- src/chemistry.hpp | 2 +- src/reaction.cpp | 4 ++-- src/reaction.hpp | 4 ++-- 4 files changed, 6 insertions(+), 6 deletions(-) diff --git a/src/chemistry.cpp b/src/chemistry.cpp index 9f64284de..f233d2ed5 100644 --- a/src/chemistry.cpp +++ b/src/chemistry.cpp @@ -37,7 +37,7 @@ using namespace std; Chemistry::Chemistry(GasMixture *mixture, RunConfiguration &config) : Chemistry(mixture, config.chemistryInput) {} -Chemistry::Chemistry(GasMixture *mixture, const ChemistryInput &inputs) : mixture_(mixture) { +MFEM_HOST_DEVICE Chemistry::Chemistry(GasMixture *mixture, const ChemistryInput &inputs) : mixture_(mixture) { numEquations_ = mixture->GetNumEquations(); numSpecies_ = mixture->GetNumSpecies(); numActiveSpecies_ = mixture->GetNumActiveSpecies(); diff --git a/src/chemistry.hpp b/src/chemistry.hpp index 4c190a3f6..0463f0f22 100644 --- a/src/chemistry.hpp +++ b/src/chemistry.hpp @@ -91,7 +91,7 @@ class Chemistry { public: Chemistry(GasMixture *mixture, RunConfiguration &config); - Chemistry(GasMixture *mixture, const ChemistryInput &inputs); + MFEM_HOST_DEVICE Chemistry(GasMixture *mixture, const ChemistryInput &inputs); MFEM_HOST_DEVICE ~Chemistry(); diff --git a/src/reaction.cpp b/src/reaction.cpp index ed2a0748f..ac8affe82 100644 --- a/src/reaction.cpp +++ b/src/reaction.cpp @@ -79,10 +79,10 @@ MFEM_HOST_DEVICE double Tabulated::computeRateCoefficient(const double &T_h, con return table_->eval(temp); } -GridFunctionReaction::GridFunctionReaction(int comp) +MFEM_HOST_DEVICE GridFunctionReaction::GridFunctionReaction(int comp) : Reaction(GRIDFUNCTION_RXN), data(nullptr), comp(comp), size_(0) {} -GridFunctionReaction::~GridFunctionReaction() {} +MFEM_HOST_DEVICE GridFunctionReaction::~GridFunctionReaction() {} void GridFunctionReaction::setGridFunctionData(std::shared_ptr &f) { f_ = f; diff --git a/src/reaction.hpp b/src/reaction.hpp index 3b3c27e73..623312ca0 100644 --- a/src/reaction.hpp +++ b/src/reaction.hpp @@ -126,9 +126,9 @@ class GridFunctionReaction : public Reaction { int size_; public: - GridFunctionReaction(int comp); + MFEM_HOST_DEVICE GridFunctionReaction(int comp); - virtual ~GridFunctionReaction(); + MFEM_HOST_DEVICE virtual ~GridFunctionReaction(); void setGridFunctionData(std::shared_ptr &f); From 65e892d900172a60d8d3499db7da62eebfea8715 Mon Sep 17 00:00:00 2001 From: Umberto Villa Date: Mon, 8 Jan 2024 16:40:40 -0600 Subject: [PATCH 32/75] Fix incorrect allocation and setting of external rates data on device --- src/M2ulPhyS.hpp | 3 +++ src/M2ulPhyS2Boltzmann.cpp | 13 ++++++++++--- src/chemistry.cpp | 13 +++++++++++-- src/chemistry.hpp | 3 ++- src/gpu_constructor.cpp | 11 +++++++++++ src/gpu_constructor.hpp | 4 ++++ src/reaction.cpp | 24 ++++++++++++++---------- src/reaction.hpp | 9 +++++---- 8 files changed, 60 insertions(+), 20 deletions(-) diff --git a/src/M2ulPhyS.hpp b/src/M2ulPhyS.hpp index 934131487..345687302 100644 --- a/src/M2ulPhyS.hpp +++ b/src/M2ulPhyS.hpp @@ -250,6 +250,9 @@ class M2ulPhyS : public TPS::Solver { // ParNonlinearForm *gradUp_A; GradNonLinearForm *gradUp_A; + // Auxiliary grid function to store external reaction rates + std::unique_ptr externalReactionRates; + // Average handler Averaging *average; diff --git a/src/M2ulPhyS2Boltzmann.cpp b/src/M2ulPhyS2Boltzmann.cpp index 4fa594e5e..e54dfdb01 100644 --- a/src/M2ulPhyS2Boltzmann.cpp +++ b/src/M2ulPhyS2Boltzmann.cpp @@ -85,7 +85,14 @@ void M2ulPhyS::push(TPS::Tps2Boltzmann &interface) { void M2ulPhyS::fetch(TPS::Tps2Boltzmann &interface) { mfem::ParFiniteElementSpace *reaction_rates_fes(&(interface.NativeFes(TPS::Tps2Boltzmann::Index::ReactionRates))); - std::shared_ptr reaction_rates(new mfem::ParGridFunction(reaction_rates_fes)); - interface.interpolateToNativeFES(*reaction_rates, TPS::Tps2Boltzmann::Index::ReactionRates); - chemistry_->setGridFunctionRates(reaction_rates); + externalReactionRates.reset(new mfem::ParGridFunction(reaction_rates_fes)); + interface.interpolateToNativeFES(*externalReactionRates, TPS::Tps2Boltzmann::Index::ReactionRates); + #if _GPU_ + const double * data(externalReactionRates->Read() ); + int size(externalReactionRates->FESpace()->GetNDofs() ); + assert(externalReactionRates->FESpace()->GetOrdering() == mfem::Ordering::byNODES); + gpu::deviceSetChemistryReactionData<<<1, 1>>>(data, size, chemistry_); + #else + chemistry_->setGridFunctionRates(*externalReactionRates); + #endif } diff --git a/src/chemistry.cpp b/src/chemistry.cpp index f233d2ed5..57704f345 100644 --- a/src/chemistry.cpp +++ b/src/chemistry.cpp @@ -109,11 +109,20 @@ MFEM_HOST_DEVICE Chemistry::~Chemistry() { } } -void Chemistry::setGridFunctionRates(std::shared_ptr &f) { +MFEM_HOST_DEVICE void Chemistry::setRates(const double * data, int size) { for (int r = 0; r < numReactions_; r++) { if (reactions_[r]->reactionModel == GRIDFUNCTION_RXN) { GridFunctionReaction *rx = dynamic_cast(reactions_[r]); - rx->setGridFunctionData(f); + rx->setData(data, size); + } + } +} + +void Chemistry::setGridFunctionRates(mfem::GridFunction &f) { + for (int r = 0; r < numReactions_; r++) { + if (reactions_[r]->reactionModel == GRIDFUNCTION_RXN) { + GridFunctionReaction *rx = dynamic_cast(reactions_[r]); + rx->setGridFunction(f); } } } diff --git a/src/chemistry.hpp b/src/chemistry.hpp index 0463f0f22..906d249af 100644 --- a/src/chemistry.hpp +++ b/src/chemistry.hpp @@ -96,7 +96,8 @@ class Chemistry { MFEM_HOST_DEVICE ~Chemistry(); // Set the grid function rates for GRIDFUNCTION_RXN reaction types - void setGridFunctionRates(std::shared_ptr &f); + void setGridFunctionRates(mfem::GridFunction &f); + MFEM_HOST_DEVICE void setRates(const double * data, int size); // return Vector of reaction rate coefficients, with the size of numReaction_. // WARNING(marc) I have removed "virtual" qualifier here assuming these functions will not diff --git a/src/gpu_constructor.cpp b/src/gpu_constructor.cpp index c6143c51f..af49d98b6 100644 --- a/src/gpu_constructor.cpp +++ b/src/gpu_constructor.cpp @@ -121,5 +121,16 @@ __global__ void freeDeviceRadiation(Radiation *radiation) { if (radiation != NULL) radiation->~Radiation(); } +//--------------------------------------------------- +// And finally devise setters +//--------------------------------------------------- +__global__ void deviceSetGridFunctionReactionData(const double * data, int size, GridFunctionReaction * reaction) { + reaction->setData(ata, size); +} + +__global__ void deviceSetChemistryReactionData(const double * data, int size, Chemistry * chem) { + chem->setRates(data, size); +} + #endif // cuda or hip } // namespace gpu diff --git a/src/gpu_constructor.hpp b/src/gpu_constructor.hpp index ca4695383..64e98d34d 100644 --- a/src/gpu_constructor.hpp +++ b/src/gpu_constructor.hpp @@ -160,6 +160,10 @@ __global__ void freeDeviceChemistry(Chemistry *chem); //! Explicit call to Radiation destructor on the device __global__ void freeDeviceRadiation(Radiation *radiation); +//! Set the data to a GridFunctionReaction +__global__ void deviceSetGridFunctionReactionData(const double * data, int size, GridFunctionReaction * reaction); +__global__ void deviceSetChemistryReactionData(const double * data, int size, Chemistry * chem); + #endif // cuda or hip } // namespace gpu diff --git a/src/reaction.cpp b/src/reaction.cpp index ac8affe82..5abdd1512 100644 --- a/src/reaction.cpp +++ b/src/reaction.cpp @@ -80,19 +80,23 @@ MFEM_HOST_DEVICE double Tabulated::computeRateCoefficient(const double &T_h, con } MFEM_HOST_DEVICE GridFunctionReaction::GridFunctionReaction(int comp) - : Reaction(GRIDFUNCTION_RXN), data(nullptr), comp(comp), size_(0) {} + : Reaction(GRIDFUNCTION_RXN), data_(nullptr), comp_(comp), size_(0) {} MFEM_HOST_DEVICE GridFunctionReaction::~GridFunctionReaction() {} -void GridFunctionReaction::setGridFunctionData(std::shared_ptr &f) { - f_ = f; - size_ = f->FESpace()->GetNDofs(); - assert(comp < f->FESpace()->GetVDim()); - assert(f->FESpace()->GetOrdering() == mfem::Ordering::byNODES); +MFEM_HOST_DEVICE void GridFunctionReaction::setData(const double * data, int size) { + data_ = data + comp_ * size_; + size_ = size; +} + +void GridFunctionReaction::setGridFunction(const mfem::GridFunction & f) { + size_ = f.FESpace()->GetNDofs(); + assert(comp_ < f.FESpace()->GetVDim()); + assert(f.FESpace()->GetOrdering() == mfem::Ordering::byNODES); #ifdef _GPU_ - data = f_->Read() + comp * size_; + data_ = f.Read() + comp_ * size_; #else - data = f_->HostRead() + comp * size_; + data_ = f.HostRead() + comp_ * size_; #endif } @@ -100,9 +104,9 @@ MFEM_HOST_DEVICE double GridFunctionReaction::computeRateCoefficient([[maybe_unu [[maybe_unused]] const double &T_e, const int &dofindex, [[maybe_unused]] const bool isElectronInvolved) { - if (data) { + if (data_) { assert(dofindex < size_); - return data[dofindex]; + return data_[dofindex]; } else { return 0.; } diff --git a/src/reaction.hpp b/src/reaction.hpp index 623312ca0..c563e0dd8 100644 --- a/src/reaction.hpp +++ b/src/reaction.hpp @@ -120,9 +120,8 @@ class Tabulated : public Reaction { class GridFunctionReaction : public Reaction { private: - std::shared_ptr f_; - const double *data; - const int comp; + const double *data_; + const int comp_; int size_; public: @@ -130,7 +129,9 @@ class GridFunctionReaction : public Reaction { MFEM_HOST_DEVICE virtual ~GridFunctionReaction(); - void setGridFunctionData(std::shared_ptr &f); + void setGridFunction(const mfem::GridFunction & f); + + MFEM_HOST_DEVICE void setData(const double * data, int size); MFEM_HOST_DEVICE virtual double computeRateCoefficient([[maybe_unused]] const double &T_h, [[maybe_unused]] const double &T_e, const int &dofindex, From 332e4e721ff7ecaf14062da9fec04899608c429f Mon Sep 17 00:00:00 2001 From: Umberto Villa Date: Tue, 9 Jan 2024 09:07:01 -0600 Subject: [PATCH 33/75] Fix compilation on device. No dynamic_cast --- src/chemistry.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/chemistry.cpp b/src/chemistry.cpp index 57704f345..fed7512d3 100644 --- a/src/chemistry.cpp +++ b/src/chemistry.cpp @@ -112,7 +112,7 @@ MFEM_HOST_DEVICE Chemistry::~Chemistry() { MFEM_HOST_DEVICE void Chemistry::setRates(const double * data, int size) { for (int r = 0; r < numReactions_; r++) { if (reactions_[r]->reactionModel == GRIDFUNCTION_RXN) { - GridFunctionReaction *rx = dynamic_cast(reactions_[r]); + GridFunctionReaction *rx = (GridFunctionReaction *)(reactions_[r]); rx->setData(data, size); } } From 88856bc97aeab58995327990656f22d32e3cc3ff Mon Sep 17 00:00:00 2001 From: Umberto Villa Date: Tue, 9 Jan 2024 10:36:10 -0600 Subject: [PATCH 34/75] Fix variable name typo --- src/gpu_constructor.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/gpu_constructor.cpp b/src/gpu_constructor.cpp index af49d98b6..6dbb5de1e 100644 --- a/src/gpu_constructor.cpp +++ b/src/gpu_constructor.cpp @@ -125,7 +125,7 @@ __global__ void freeDeviceRadiation(Radiation *radiation) { // And finally devise setters //--------------------------------------------------- __global__ void deviceSetGridFunctionReactionData(const double * data, int size, GridFunctionReaction * reaction) { - reaction->setData(ata, size); + reaction->setData(data, size); } __global__ void deviceSetChemistryReactionData(const double * data, int size, Chemistry * chem) { From d5733d5b303edeb914c13c2fa010ad6f56184edf Mon Sep 17 00:00:00 2001 From: Umberto Villa Date: Tue, 9 Jan 2024 16:28:02 -0600 Subject: [PATCH 35/75] Bugfix for gpu-cpu build --- src/M2ulPhyS2Boltzmann.cpp | 2 +- src/reaction.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/M2ulPhyS2Boltzmann.cpp b/src/M2ulPhyS2Boltzmann.cpp index e54dfdb01..60c9f450b 100644 --- a/src/M2ulPhyS2Boltzmann.cpp +++ b/src/M2ulPhyS2Boltzmann.cpp @@ -87,7 +87,7 @@ void M2ulPhyS::fetch(TPS::Tps2Boltzmann &interface) { mfem::ParFiniteElementSpace *reaction_rates_fes(&(interface.NativeFes(TPS::Tps2Boltzmann::Index::ReactionRates))); externalReactionRates.reset(new mfem::ParGridFunction(reaction_rates_fes)); interface.interpolateToNativeFES(*externalReactionRates, TPS::Tps2Boltzmann::Index::ReactionRates); - #if _GPU_ + #if defined(_CUDA_) || defined(_HIP_) const double * data(externalReactionRates->Read() ); int size(externalReactionRates->FESpace()->GetNDofs() ); assert(externalReactionRates->FESpace()->GetOrdering() == mfem::Ordering::byNODES); diff --git a/src/reaction.cpp b/src/reaction.cpp index 5abdd1512..18efb6072 100644 --- a/src/reaction.cpp +++ b/src/reaction.cpp @@ -93,7 +93,7 @@ void GridFunctionReaction::setGridFunction(const mfem::GridFunction & f) { size_ = f.FESpace()->GetNDofs(); assert(comp_ < f.FESpace()->GetVDim()); assert(f.FESpace()->GetOrdering() == mfem::Ordering::byNODES); -#ifdef _GPU_ +#if defined(_CUDA_) || defined(_HIP_) data_ = f.Read() + comp_ * size_; #else data_ = f.HostRead() + comp_ * size_; From 7d5e17031474a8f9ca49821b3a6de050fd4fd70c Mon Sep 17 00:00:00 2001 From: milindasf Date: Wed, 24 Jan 2024 08:59:23 -0600 Subject: [PATCH 36/75] code clean up for 2w-coupling, grid setup moved to a new function. --- src/tps-bte_0d3v.py | 595 +++++++++++++++++++++++--------------------- 1 file changed, 310 insertions(+), 285 deletions(-) diff --git a/src/tps-bte_0d3v.py b/src/tps-bte_0d3v.py index ca6f2670f..732bd41a6 100755 --- a/src/tps-bte_0d3v.py +++ b/src/tps-bte_0d3v.py @@ -145,10 +145,8 @@ def __init__(self, tps, comm): self.comm : MPI.Comm = comm self.param = BoltzmannSolverParams() # overide the default params, based on the config.ini file. - self.parse_config_file(sys.argv[2]) - + self.__parse_config_file__(sys.argv[2]) self.xp_module = np - boltzmann_dir = self.param.output_dir isExist = os.path.exists(boltzmann_dir) if not isExist: @@ -163,10 +161,13 @@ def __init__(self, tps, comm): self.profile_tt = profile_tt self.profile_nn = profile_nn + + # how to map each grid to the GPU devices on the node + self.gidx_to_device_map = lambda gidx, num_grids : gidx % 2 return - def parse_config_file(self, fname): + def __parse_config_file__(self, fname): """ add the configuaraion file parse code here, which overides the default BoltzmannSolverParams @@ -209,7 +210,7 @@ def grid_setup(self, interface): where, at the moment the clustering is determined based on the electron temperature computed from the TPS code. """ - + assert self.xp_module==np, "grid setup only supported in CPU" self.profile_tt[pp.SETUP].start() xp = self.xp_module @@ -261,48 +262,113 @@ def grid_setup(self, interface): for grid_idx in range(self.param.n_grids): print("setting up grid %d"%(grid_idx), flush = True) self.bte_solver.assemble_operators(grid_idx) + + n_grids = self.param.n_grids + gidx_to_device_map = self.gidx_to_device_map + + for grid_idx in range(n_grids): + assert self.grid_idx_to_npts[grid_idx] > 0 + + print("setting initial Maxwellian at %.4E eV" %(self.bte_solver._par_ap_Te[grid_idx]), flush=True) + f0 = self.bte_solver.initialize(grid_idx, self.grid_idx_to_npts[grid_idx], "maxwellian") + self.bte_solver.set_boltzmann_parameter(grid_idx, "f0", f0) + + print("setting grid %d to device %d"%(grid_idx, gidx_to_device_map(grid_idx, n_grids))) + self.bte_solver.host_to_device_setup(gidx_to_device_map(grid_idx, n_grids), grid_idx) + self.xp_module = cp self.profile_tt[pp.SETUP].stop() return + + def __efield_setup__(self): + + """ + Here we set the E-field for 1-way coupling + """ + + use_gpu = self.param.use_gpu + n_grids = self.param.n_grids + xp = self.xp_module + if(use_gpu==1): + def Et(t, grid_idx): + dev_id = self.gidx_to_device_map(grid_idx, n_grids) + with cp.cuda.Device(dev_id): + eRe_d = self.bte_solver.get_boltzmann_parameter(grid_idx, "eRe") + eIm_d = self.bte_solver.get_boltzmann_parameter(grid_idx, "eIm") + + if self.param.Efreq == 0: + return xp.sqrt(eRe_d**2 + eIm_d**2) + else: + return eRe_d * xp.cos(2 * xp.pi * self.param.Efreq * t) + eIm_d * xp.sin(2 * xp.pi * self.param.Efreq * t) + else: + def Et(t, grid_idx): + eRe_d = self.bte_solver.get_boltzmann_parameter(grid_idx, "eRe") + eIm_d = self.bte_solver.get_boltzmann_parameter(grid_idx, "eIm") + + if self.param.Efreq == 0: + return xp.sqrt(eRe_d**2 + eIm_d**2) + else: + return eRe_d * xp.cos(2 * xp.pi * self.param.Efreq * t) + eIm_d * xp.sin(2 * xp.pi * self.param.Efreq * t) + + for grid_idx in range(n_grids): + et = lambda t, gid=grid_idx: Et(t, gid) + self.bte_solver.set_efield_function(grid_idx, et) + + return def fetch(self, interface): - xp = self.xp_module gidx_to_pidx_map = self.grid_idx_to_spatial_idx_map - heavy_temp = xp.array(interface.HostRead(libtps.t2bIndex.HeavyTemperature), copy=False) + heavy_temp = np.array(interface.HostRead(libtps.t2bIndex.HeavyTemperature), copy=False) tps_npts = len(heavy_temp) self.tps_npts = tps_npts - electron_temp = xp.array(interface.HostRead(libtps.t2bIndex.ElectronTemperature), copy=False) - efield = xp.array(interface.HostRead(libtps.t2bIndex.ElectricField), copy=False).reshape((2, tps_npts)) - species_densities = xp.array(interface.HostRead(libtps.t2bIndex.SpeciesDensities), copy=False).reshape(3, tps_npts) + electron_temp = np.array(interface.HostRead(libtps.t2bIndex.ElectronTemperature), copy=False) + efield = np.array(interface.HostRead(libtps.t2bIndex.ElectricField), copy=False).reshape((2, tps_npts)) + species_densities = np.array(interface.HostRead(libtps.t2bIndex.SpeciesDensities), copy=False).reshape(3, tps_npts) + n_grids = self.param.n_grids + use_gpu = self.param.use_gpu - for grid_idx in range(self.param.n_grids): + for grid_idx in range(n_grids): bte_idx = gidx_to_pidx_map[grid_idx] + dev_id = self.gidx_to_device_map(grid_idx, n_grids) + ni = species_densities[TPSINDEX.ION_IDX][bte_idx] ne = species_densities[TPSINDEX.ELE_IDX][bte_idx] n0 = species_densities[TPSINDEX.NEU_IDX][bte_idx] Tg = heavy_temp[bte_idx] Te = electron_temp[bte_idx] - - eRe = efield[TPSINDEX.EF_RE_IDX][bte_idx] eIm = efield[TPSINDEX.EF_IM_IDX][bte_idx] + eMag = np.sqrt(eRe**2 + eIm **2) eByn0 = eMag/n0/self.param.Td_fac - + if self.param.verbose == 1 : print("Boltzmann solver inputs for v-space grid id %d"%(grid_idx)) print("Efreq = %.4E [1/s]" %(self.param.Efreq)) print("n_pts = %d" % self.grid_idx_to_npts[grid_idx]) print("E/n0 (min) = %.12E [Td] \t E/n0 (max) = %.12E [Td] "%(np.min(eByn0), np.max(eByn0))) - print("Tg (min) = %.12E [K] \t Tg (max) = %.12E [K] "%(np.min(Tg), np.max(Tg))) - print("Te (min) = %.12E [K] \t Te (max) = %.12E [K] "%(np.min(Te), np.max(Te))) + print("Tg (min) = %.12E [K] \t Tg (max) = %.12E [K] "%(np.min(Tg) , np.max(Tg))) + print("Te (min) = %.12E [K] \t Te (max) = %.12E [K] "%(np.min(Te) , np.max(Te))) - print("ne (min) = %.12E [1/m^3] \t ne (max) = %.12E [1/m^3] "%(np.min(ne), np.max(ne))) - print("ni (min) = %.12E [1/m^3] \t ni (max) = %.12E [1/m^3] "%(np.min(ni), np.max(ni))) - print("n0 (min) = %.12E [1/m^3] \t n0 (max) = %.12E [1/m^3] "%(np.min(n0), np.max(n0))) + print("ne (min) = %.12E [1/m^3] \t ne (max) = %.12E [1/m^3] "%(np.min(ne) , np.max(ne))) + print("ni (min) = %.12E [1/m^3] \t ni (max) = %.12E [1/m^3] "%(np.min(ni) , np.max(ni))) + print("n0 (min) = %.12E [1/m^3] \t n0 (max) = %.12E [1/m^3] "%(np.min(n0) , np.max(n0))) + + if (use_gpu == 1): + with cp.cuda.Device(dev_id): + ne = cp.array(ne) + ni = cp.array(ni) + n0 = cp.array(n0) + Tg = cp.array(Tg) + Te = cp.array(Te) + eRe = cp.array(eRe) + eIm = cp.array(eIm) + + eMag = cp.sqrt(eRe**2 + eIm **2) + eByn0 = eMag/n0/self.param.Td_fac #self.bte_solver.set_boltzmann_parameters(grid_idx, n0, ne, ni, Tg, self.param.solver_type) self.bte_solver.set_boltzmann_parameter(grid_idx, "n0", n0) @@ -321,323 +387,282 @@ def solve(self): """ if WITH_PARLA==1: - self.solve_with_parla() + self.solve_w_parla() return else: - self.solve_seq() + self.solve_wo_parla() return - def solve_seq(self): - xp = self.xp_module - csv_write = self.param.export_csv - gidx_to_pidx_map = self.grid_idx_to_spatial_idx_map + def solve_wo_parla(self): + xp = self.xp_module + csv_write = self.param.export_csv + plot_data = self.param.plot_data + gidx_to_pidx_map = self.grid_idx_to_spatial_idx_map + use_gpu = self.param.use_gpu + dev_id = self.param.dev_id + verbose = self.param.verbose + n_grids = self.param.n_grids + gidx_to_device_map = self.gidx_to_device_map self.qoi = [None for grid_idx in range(self.param.n_grids)] self.ff = [None for grid_idx in range(self.param.n_grids)] - if csv_write ==1 : + if csv_write: data_csv = np.empty((self.tps_npts, 8 + len(self.param.collisions))) - t1 = time() + self.__efield_setup__() - for grid_idx in range(self.param.n_grids): - - if self.grid_idx_to_npts[grid_idx] ==0: - continue - - if self.param.verbose==1: - print("setting initial Maxwellian at %.4E eV" %(self.bte_solver._par_ap_Te[grid_idx]), flush=True) - f0 = self.bte_solver.initialize(grid_idx, self.grid_idx_to_npts[grid_idx], "maxwellian") - self.bte_solver.set_boltzmann_parameter(grid_idx, "f0", f0) - - if self.param.use_gpu==1: - dev_id = self.param.dev_id - self.bte_solver.host_to_device_setup(dev_id, grid_idx) - - with cp.cuda.Device(dev_id): - eRe_d = self.bte_solver.get_boltzmann_parameter(grid_idx, "eRe") - eIm_d = self.bte_solver.get_boltzmann_parameter(grid_idx, "eIm") - + t1 = time() + for grid_idx in range(n_grids): + dev_id = gidx_to_device_map(grid_idx, n_grids) + if (use_gpu==0): + try: + eRe_d = self.bte_solver.get_boltzmann_parameter(grid_idx, "eRe") + eIm_d = self.bte_solver.get_boltzmann_parameter(grid_idx, "eIm") if self.param.Efreq == 0: ef_t = lambda t : xp.sqrt(eRe_d**2 + eIm_d**2) else: ef_t = lambda t : eRe_d * xp.cos(2 * xp.pi * self.param.Efreq * t) + eIm_d * xp.sin(2 * xp.pi * self.param.Efreq * t) - + + self.bte_solver.set_efield_function(grid_idx, ef_t) + + f0 = self.bte_solver.get_boltzmann_parameter(grid_idx, "f0") + ff , qoi = self.bte_solver.solve(grid_idx, f0, self.param.atol, self.param.rtol, self.param.max_iter, self.param.solver_type) + self.qoi[grid_idx] = qoi + self.ff [grid_idx] = ff + except: + print("solver failed for v-space gird no %d"%(grid_idx)) + sys.exit(0) else: - eRe_d = self.bte_solver.get_boltzmann_parameter(grid_idx, "eRe") - eIm_d = self.bte_solver.get_boltzmann_parameter(grid_idx, "eIm") - - if self.param.Efreq == 0: - ef_t = lambda t : xp.sqrt(eRe_d**2 + eIm_d**2) - else: - ef_t = lambda t : eRe_d * xp.cos(2 * xp.pi * self.param.Efreq * t) + eIm_d * xp.sin(2 * xp.pi * self.param.Efreq * t) - - self.bte_solver.set_efield_function(grid_idx, ef_t) - f0 = self.bte_solver.get_boltzmann_parameter(grid_idx, "f0") - try: - ff , qoi = self.bte_solver.solve(grid_idx, f0, self.param.atol, self.param.rtol, self.param.max_iter, self.param.solver_type) - self.qoi[grid_idx] = qoi - self.ff [grid_idx] = ff - except: - print("solver failed for v-space gird no %d"%(grid_idx)) - # self.qoi.append(None) - # continue - sys.exit(0) - - if self.param.export_csv ==0 and self.param.plot_data==0: - continue - - ev = np.linspace(1e-3, self.bte_solver._par_ev_range[grid_idx][1], 500) - ff_r = self.bte_solver.compute_radial_components(grid_idx, ev, ff) - - if self.param.use_gpu==1: - self.bte_solver.device_to_host_setup(self.param.dev_id,grid_idx) - - with cp.cuda.Device(dev_id): - ff_r = cp.asnumpy(ff_r) - for k, v in qoi.items(): - qoi[k] = cp.asnumpy(v) + with xp.cuda.Device(dev_id): + f0 = self.bte_solver.get_boltzmann_parameter(grid_idx, "f0") + ff , qoi = self.bte_solver.solve(grid_idx, f0, self.param.atol, self.param.rtol, self.param.max_iter, self.param.solver_type) + self.qoi[grid_idx] = qoi + self.ff [grid_idx] = ff + # try: + # f0 = self.bte_solver.get_boltzmann_parameter(grid_idx, "f0") + # ff , qoi = self.bte_solver.solve(grid_idx, f0, self.param.atol, self.param.rtol, self.param.max_iter, self.param.solver_type) + # self.qoi[grid_idx] = qoi + # self.ff [grid_idx] = ff + # except: + # print("solver failed for v-space gird no %d"%(grid_idx)) + # sys.exit(0) - if csv_write==1: - data_csv[gidx_to_pidx_map[grid_idx], 0] = self.bte_solver.get_boltzmann_parameter(grid_idx, "n0") - data_csv[gidx_to_pidx_map[grid_idx], 1] = self.bte_solver.get_boltzmann_parameter(grid_idx, "ne") - data_csv[gidx_to_pidx_map[grid_idx], 2] = self.bte_solver.get_boltzmann_parameter(grid_idx, "ni") - data_csv[gidx_to_pidx_map[grid_idx], 3] = self.bte_solver.get_boltzmann_parameter(grid_idx, "Tg") - data_csv[gidx_to_pidx_map[grid_idx], 4] = np.sqrt(self.bte_solver.get_boltzmann_parameter(grid_idx, "eRe")**2 + self.bte_solver.get_boltzmann_parameter(grid_idx, "eIm")**2) - data_csv[gidx_to_pidx_map[grid_idx], 5] = qoi["energy"] - data_csv[gidx_to_pidx_map[grid_idx], 6] = qoi["mobility"] - data_csv[gidx_to_pidx_map[grid_idx], 7] = qoi["diffusion"] + t2 = time() + print("time for boltzmann v-space solve = %.4E"%(t2- t1)) + + if (self.param.export_csv ==1 or self.param.plot_data==1): + for grid_idx in range(n_grids): + dev_id = gidx_to_device_map(grid_idx, n_grids) + ff = self.ff[grid_idx] + qoi = self.qoi[grid_idx] - for col_idx, g in enumerate(self.param.collisions): - data_csv[gidx_to_pidx_map[grid_idx], 8 + col_idx] = qoi["rates"][col_idx] - - plot_data = self.param.plot_data - if plot_data: + def asnumpy(a): + if cp.get_array_module(a)==cp: + with cp.cuda.Device(dev_id): + return cp.asnumpy(a) + else: + return a - n0 = self.bte_solver.get_boltzmann_parameter(grid_idx, "n0") - ne = self.bte_solver.get_boltzmann_parameter(grid_idx, "ne") - ni = self.bte_solver.get_boltzmann_parameter(grid_idx, "ni") - Tg = self.bte_solver.get_boltzmann_parameter(grid_idx, "Tg") + ff_cpu = asnumpy(ff) + ev = np.linspace(1e-3, self.bte_solver._par_ev_range[grid_idx][1], 500) + ff_r = self.bte_solver.compute_radial_components(grid_idx, ev, ff_cpu) - eRe = self.bte_solver.get_boltzmann_parameter(grid_idx, "eRe") - eIm = self.bte_solver.get_boltzmann_parameter(grid_idx, "eIm") + n0 = asnumpy(self.bte_solver.get_boltzmann_parameter(grid_idx, "n0")) + ne = asnumpy(self.bte_solver.get_boltzmann_parameter(grid_idx, "ne")) + ni = asnumpy(self.bte_solver.get_boltzmann_parameter(grid_idx, "ni")) + Tg = asnumpy(self.bte_solver.get_boltzmann_parameter(grid_idx, "Tg")) + eRe = asnumpy(self.bte_solver.get_boltzmann_parameter(grid_idx, "eRe")) + eIm = asnumpy(self.bte_solver.get_boltzmann_parameter(grid_idx, "eIm")) eMag = np.sqrt(eRe**2 + eIm**2) - num_sh = len(self.bte_solver._par_lm[grid_idx]) - num_subplots = num_sh - num_plt_cols = min(num_sh, 4) - num_plt_rows = np.int64(np.ceil(num_subplots/num_plt_cols)) - fig = plt.figure(figsize=(num_plt_cols * 8 + 0.5*(num_plt_cols-1), num_plt_rows * 8 + 0.5*(num_plt_rows-1)), dpi=300, constrained_layout=True) - plt_idx = 1 - n_pts_step = self.grid_idx_to_npts[grid_idx] // 20 - - for lm_idx, lm in enumerate(self.bte_solver._par_lm[grid_idx]): - plt.subplot(num_plt_rows, num_plt_cols, plt_idx) - for ii in range(0, self.grid_idx_to_npts[grid_idx], n_pts_step): - fr = np.abs(ff_r[ii, lm_idx, :]) - plt.semilogy(ev, fr, label=r"$T_g$=%.2E [K], $E/n_0$=%.2E [Td], $n_e/n_0$ = %.2E "%(Tg[ii], eMag[ii]/n0[ii]/1e-21, ne[ii]/n0[ii])) + if csv_write: + data_csv[gidx_to_pidx_map[grid_idx], 0] = n0 + data_csv[gidx_to_pidx_map[grid_idx], 1] = ne + data_csv[gidx_to_pidx_map[grid_idx], 2] = ni + data_csv[gidx_to_pidx_map[grid_idx], 3] = Tg + data_csv[gidx_to_pidx_map[grid_idx], 4] = eMag + data_csv[gidx_to_pidx_map[grid_idx], 5] = asnumpy(qoi["energy"]) + data_csv[gidx_to_pidx_map[grid_idx], 6] = asnumpy(qoi["mobility"]) + data_csv[gidx_to_pidx_map[grid_idx], 7] = asnumpy(qoi["diffusion"]) - plt.xlabel(r"energy (eV)") - plt.ylabel(r"$f_%d$"%(lm[0])) - plt.grid(visible=True) - if lm_idx==0: - plt.legend(prop={'size': 6}) + for col_idx, g in enumerate(self.param.collisions): + data_csv[gidx_to_pidx_map[grid_idx], 8 + col_idx] = asnumpy(qoi["rates"][col_idx]) + + if plot_data: + num_sh = len(self.bte_solver._par_lm[grid_idx]) + num_subplots = num_sh + num_plt_cols = min(num_sh, 4) + num_plt_rows = np.int64(np.ceil(num_subplots/num_plt_cols)) + fig = plt.figure(figsize=(num_plt_cols * 8 + 0.5*(num_plt_cols-1), num_plt_rows * 8 + 0.5*(num_plt_rows-1)), dpi=300, constrained_layout=True) + plt_idx = 1 + n_pts_step = self.grid_idx_to_npts[grid_idx] // 20 + + for lm_idx, lm in enumerate(self.bte_solver._par_lm[grid_idx]): + plt.subplot(num_plt_rows, num_plt_cols, plt_idx) + for ii in range(0, self.grid_idx_to_npts[grid_idx], n_pts_step): + fr = np.abs(ff_r[ii, lm_idx, :]) + plt.semilogy(ev, fr, label=r"$T_g$=%.2E [K], $E/n_0$=%.2E [Td], $n_e/n_0$ = %.2E "%(Tg[ii], eMag[ii]/n0[ii]/1e-21, ne[ii]/n0[ii])) - plt_idx +=1 - - #plt_idx = num_sh - plt.savefig("%s_plot_%02d.png"%(self.param.out_fname, grid_idx)) - plt.close() - - t2 = time() - print("time for boltzmann v-space solve = %.4E"%(t2- t1)) - - if csv_write: - fname = self.param.out_fname - with open("%s_qoi.csv"%fname, 'w', encoding='UTF8') as f: - writer = csv.writer(f,delimiter=',') - # write the header - header = ["n0", "ne", "ni", "Tg", "E", "energy", "mobility", "diffusion"] - for col_idx, g in enumerate(self.param.collisions): - header.append(str(g)) - - writer.writerow(header) - writer.writerows(data_csv) + plt.xlabel(r"energy (eV)") + plt.ylabel(r"$f_%d$"%(lm[0])) + plt.grid(visible=True) + if lm_idx==0: + plt.legend(prop={'size': 6}) + + plt_idx +=1 + + plt.savefig("%s_plot_%02d.png"%(self.param.out_fname, grid_idx)) + plt.close() + + if csv_write: + fname = self.param.out_fname + with open("%s_qoi.csv"%fname, 'w', encoding='UTF8') as f: + writer = csv.writer(f,delimiter=',') + # write the header + header = ["n0", "ne", "ni", "Tg", "E", "energy", "mobility", "diffusion"] + for col_idx, g in enumerate(self.param.collisions): + header.append(str(g)) + + writer.writerow(header) + writer.writerows(data_csv) return - def solve_with_parla(self): - csv_write = self.param.export_csv - gidx_to_pidx_map = self.grid_idx_to_spatial_idx_map + def solve_w_parla(self): + rank = self.comm.Get_rank() + npes = self.comm.Get_size() + xp = self.xp_module + csv_write = self.param.export_csv + plot_data = self.param.plot_data + gidx_to_pidx_map = self.grid_idx_to_spatial_idx_map + use_gpu = self.param.use_gpu + dev_id = self.param.dev_id + verbose = self.param.verbose + n_grids = self.param.n_grids + gidx_to_device_map = self.gidx_to_device_map + self.qoi = [None for grid_idx in range(self.param.n_grids)] self.ff = [None for grid_idx in range(self.param.n_grids)] - if csv_write ==1 : + if csv_write: data_csv = np.empty((self.tps_npts, 8 + len(self.param.collisions))) - - rank = self.comm.Get_rank() - npes = self.comm.Get_size() + self.profile_tt[pp.SETUP].start() + self.__efield_setup__() + self.profile_tt[pp.SETUP].stop() with Parla(): num_gpus = len(gpu) - grid_to_device_map = lambda gidx : gidx % num_gpus + if (use_gpu==1): + parla_placement = [gpu(gidx_to_device_map(grid_idx,n_grids)) for grid_idx in range(n_grids)] + else: + parla_placement = [cpu for grid_idx in range(n_grids)] + print(parla_placement) + @spawn(placement=cpu, vcus=0) async def __main__(): - self.profile_tt[pp.SETUP].start() - ts_0 = TaskSpace("T") - for grid_idx in range(self.param.n_grids): - @spawn(ts_0[grid_idx], placement=[cpu], vcus=0.0) - def t0(): - print("setting initial Maxwellian at %.4E eV" %(self.bte_solver._par_ap_Te[grid_idx]), flush=True) - f0 = self.bte_solver.initialize(grid_idx, self.grid_idx_to_npts[grid_idx], "maxwellian") - self.bte_solver.set_boltzmann_parameter(grid_idx, "f0", f0) - - if self.param.use_gpu == 1: - dev_id = grid_to_device_map(grid_idx) - self.bte_solver.host_to_device_setup(dev_id, grid_idx) - xp = cp - - with cp.cuda.Device(dev_id): - eRe_d = self.bte_solver.get_boltzmann_parameter(grid_idx, "eRe") - eIm_d = self.bte_solver.get_boltzmann_parameter(grid_idx, "eIm") - - if self.param.Efreq == 0: - ef_t = lambda t : xp.sqrt(eRe_d**2 + eIm_d**2) - else: - ef_t = lambda t : eRe_d * xp.cos(2 * xp.pi * self.param.Efreq * t) + eIm_d * xp.sin(2 * xp.pi * self.param.Efreq * t) - else: - xp = np - eRe_d = self.bte_solver.get_boltzmann_parameter(grid_idx, "eRe") - eIm_d = self.bte_solver.get_boltzmann_parameter(grid_idx, "eIm") - - if self.param.Efreq == 0: - ef_t = lambda t : xp.sqrt(eRe_d**2 + eIm_d**2) - else: - ef_t = lambda t : eRe_d * xp.cos(2 * xp.pi * self.param.Efreq * t) + eIm_d * xp.sin(2 * xp.pi * self.param.Efreq * t) - - self.bte_solver.set_efield_function(grid_idx, ef_t) - return - - await ts_0 - - self.profile_tt[pp.SETUP].stop() - if self.param.use_gpu==1: - p1 = [gpu(grid_to_device_map(grid_idx)) for grid_idx in range(self.param.n_grids)] - else: - p1 = [cpu for grid_idx in range(self.param.n_grids)] - self.profile_tt[pp.SOLVE].start() ts_1 = TaskSpace("T") for grid_idx in range(self.param.n_grids): - @spawn(ts_1[grid_idx], placement=[p1[grid_idx]], dependencies=ts_0[grid_idx], vcus=0.0) + @spawn(ts_1[grid_idx], placement=[parla_placement[grid_idx]], vcus=0.0) def t1(): f0 = self.bte_solver.get_boltzmann_parameter(grid_idx, "f0") - print("[Boltzmann] %d / %d launching grid %d on %s"%(rank, npes, grid_idx, p1[grid_idx])) + print("[Boltzmann] %d / %d launching grid %d on %s"%(rank, npes, grid_idx, parla_placement[grid_idx])) try: ff , qoi = self.bte_solver.solve(grid_idx, f0, self.param.atol, self.param.rtol, self.param.max_iter, self.param.solver_type) self.ff[grid_idx] = ff self.qoi[grid_idx] = qoi except: print("solver failed for v-space gird no %d"%(grid_idx)) - # self.qoi.append(None) - # continue - sys.exit(0) + sys.exit(-1) await ts_1 self.profile_tt[pp.SOLVE].stop() - t1 = min_mean_max(self.profile_tt[pp.SETUP].seconds, self.comm) t2 = min_mean_max(self.profile_tt[pp.SOLVE].seconds, self.comm) print("[Boltzmann] setup (min) = %.4E (s) setup (mean) = %.4E (s) setup (max) = %.4E (s)" % (t1[0],t1[1],t1[2])) - print("[Boltzmann] solve (min) = %.4E (s) solve (mean) = %.4E (s) solve (max) = %.4E (s)" % (t2[0],t2[1],t2[2])) - if self.param.export_csv ==0 and self.param.plot_data==0: - return + print("[Boltzmann] solve (min) = %.4E (s) solve (mean) = %.4E (s) solve (max) = %.4E (s)" % (t2[0],t2[1],t2[2])) - for grid_idx in range(self.param.n_grids): - dev_id = grid_idx % num_gpus - - if self.param.use_gpu==1: - gpu_id = cp.cuda.Device(dev_id) - gpu_id.use() - - ff = self.ff[grid_idx] - ev = np.linspace(1e-3, self.bte_solver._par_ev_range[grid_idx][1], 500) - ff_r = self.bte_solver.compute_radial_components(grid_idx, ev, ff) - - if self.param.use_gpu==1: - self.bte_solver.device_to_host_setup(self.param.dev_id,grid_idx) + if (self.param.export_csv ==1 or self.param.plot_data==1): + for grid_idx in range(n_grids): + dev_id = gidx_to_device_map(grid_idx, n_grids) + ff = self.ff[grid_idx] + qoi = self.qoi[grid_idx] - qoi = self.qoi[grid_idx] - with cp.cuda.Device(dev_id): - ff_r = cp.asnumpy(ff_r) - for k, v in qoi.items(): - qoi[k] = cp.asnumpy(v) - - if csv_write==1: - data_csv[gidx_to_pidx_map[grid_idx], 0] = self.bte_solver.get_boltzmann_parameter(grid_idx, "n0") - data_csv[gidx_to_pidx_map[grid_idx], 1] = self.bte_solver.get_boltzmann_parameter(grid_idx, "ne") - data_csv[gidx_to_pidx_map[grid_idx], 2] = self.bte_solver.get_boltzmann_parameter(grid_idx, "ni") - data_csv[gidx_to_pidx_map[grid_idx], 3] = self.bte_solver.get_boltzmann_parameter(grid_idx, "Tg") - data_csv[gidx_to_pidx_map[grid_idx], 4] = np.sqrt(self.bte_solver.get_boltzmann_parameter(grid_idx, "eRe")**2 + self.bte_solver.get_boltzmann_parameter(grid_idx, "eIm")**2) - data_csv[gidx_to_pidx_map[grid_idx], 5] = qoi["energy"] - data_csv[gidx_to_pidx_map[grid_idx], 6] = qoi["mobility"] - data_csv[gidx_to_pidx_map[grid_idx], 7] = qoi["diffusion"] - - for col_idx, g in enumerate(self.param.collisions): - data_csv[gidx_to_pidx_map[grid_idx], 8 + col_idx] = qoi["rates"][col_idx] - - plot_data = self.param.plot_data - if plot_data: + def asnumpy(a): + if cp.get_array_module(a)==cp: + with cp.cuda.Device(dev_id): + return cp.asnumpy(a) + else: + return a - n0 = self.bte_solver.get_boltzmann_parameter(grid_idx, "n0") - ne = self.bte_solver.get_boltzmann_parameter(grid_idx, "ne") - ni = self.bte_solver.get_boltzmann_parameter(grid_idx, "ni") - Tg = self.bte_solver.get_boltzmann_parameter(grid_idx, "Tg") + ff_cpu = asnumpy(ff) + ev = np.linspace(1e-3, self.bte_solver._par_ev_range[grid_idx][1], 500) + ff_r = self.bte_solver.compute_radial_components(grid_idx, ev, ff_cpu) - eRe = self.bte_solver.get_boltzmann_parameter(grid_idx, "eRe") - eIm = self.bte_solver.get_boltzmann_parameter(grid_idx, "eIm") + n0 = asnumpy(self.bte_solver.get_boltzmann_parameter(grid_idx, "n0")) + ne = asnumpy(self.bte_solver.get_boltzmann_parameter(grid_idx, "ne")) + ni = asnumpy(self.bte_solver.get_boltzmann_parameter(grid_idx, "ni")) + Tg = asnumpy(self.bte_solver.get_boltzmann_parameter(grid_idx, "Tg")) + eRe = asnumpy(self.bte_solver.get_boltzmann_parameter(grid_idx, "eRe")) + eIm = asnumpy(self.bte_solver.get_boltzmann_parameter(grid_idx, "eIm")) eMag = np.sqrt(eRe**2 + eIm**2) - num_sh = len(self.bte_solver._par_lm[grid_idx]) - num_subplots = num_sh - num_plt_cols = min(num_sh, 4) - num_plt_rows = np.int64(np.ceil(num_subplots/num_plt_cols)) - fig = plt.figure(figsize=(num_plt_cols * 8 + 0.5*(num_plt_cols-1), num_plt_rows * 8 + 0.5*(num_plt_rows-1)), dpi=300, constrained_layout=True) - plt_idx = 1 - n_pts_step = self.grid_idx_to_npts[grid_idx] // 20 - - for lm_idx, lm in enumerate(self.bte_solver._par_lm[grid_idx]): - plt.subplot(num_plt_rows, num_plt_cols, plt_idx) - for ii in range(0, self.grid_idx_to_npts[grid_idx], n_pts_step): - fr = np.abs(ff_r[ii, lm_idx, :]) - plt.semilogy(ev, fr, label=r"$T_g$=%.2E [K], $E/n_0$=%.2E [Td], $n_e/n_0$ = %.2E "%(Tg[ii], eMag[ii]/n0[ii]/1e-21, ne[ii]/n0[ii])) + if csv_write: + data_csv[gidx_to_pidx_map[grid_idx], 0] = n0 + data_csv[gidx_to_pidx_map[grid_idx], 1] = ne + data_csv[gidx_to_pidx_map[grid_idx], 2] = ni + data_csv[gidx_to_pidx_map[grid_idx], 3] = Tg + data_csv[gidx_to_pidx_map[grid_idx], 4] = eMag + data_csv[gidx_to_pidx_map[grid_idx], 5] = asnumpy(qoi["energy"]) + data_csv[gidx_to_pidx_map[grid_idx], 6] = asnumpy(qoi["mobility"]) + data_csv[gidx_to_pidx_map[grid_idx], 7] = asnumpy(qoi["diffusion"]) - plt.xlabel(r"energy (eV)") - plt.ylabel(r"$f_%d$"%(lm[0])) - plt.grid(visible=True) - if lm_idx==0: - plt.legend(prop={'size': 6}) + for col_idx, g in enumerate(self.param.collisions): + data_csv[gidx_to_pidx_map[grid_idx], 8 + col_idx] = asnumpy(qoi["rates"][col_idx]) + + if plot_data: + num_sh = len(self.bte_solver._par_lm[grid_idx]) + num_subplots = num_sh + num_plt_cols = min(num_sh, 4) + num_plt_rows = np.int64(np.ceil(num_subplots/num_plt_cols)) + fig = plt.figure(figsize=(num_plt_cols * 8 + 0.5*(num_plt_cols-1), num_plt_rows * 8 + 0.5*(num_plt_rows-1)), dpi=300, constrained_layout=True) + plt_idx = 1 + n_pts_step = self.grid_idx_to_npts[grid_idx] // 20 + + for lm_idx, lm in enumerate(self.bte_solver._par_lm[grid_idx]): + plt.subplot(num_plt_rows, num_plt_cols, plt_idx) + for ii in range(0, self.grid_idx_to_npts[grid_idx], n_pts_step): + fr = np.abs(ff_r[ii, lm_idx, :]) + plt.semilogy(ev, fr, label=r"$T_g$=%.2E [K], $E/n_0$=%.2E [Td], $n_e/n_0$ = %.2E "%(Tg[ii], eMag[ii]/n0[ii]/1e-21, ne[ii]/n0[ii])) - plt_idx +=1 - - #plt_idx = num_sh - plt.savefig("%s_plot_%02d.png"%(self.param.out_fname, grid_idx)) - plt.close() - - if csv_write: - fname = self.param.out_fname - with open("%s_qoi.csv"%fname, 'w', encoding='UTF8') as f: - writer = csv.writer(f,delimiter=',') - # write the header - header = ["n0", "ne", "ni", "Tg", "E", "energy", "mobility", "diffusion"] - for col_idx, g in enumerate(self.param.collisions): - header.append(str(g)) - - writer.writerow(header) - writer.writerows(data_csv) - + plt.xlabel(r"energy (eV)") + plt.ylabel(r"$f_%d$"%(lm[0])) + plt.grid(visible=True) + if lm_idx==0: + plt.legend(prop={'size': 6}) + + plt_idx +=1 + + plt.savefig("%s_plot_%02d.png"%(self.param.out_fname, grid_idx)) + plt.close() + + if csv_write: + fname = self.param.out_fname + with open("%s_qoi.csv"%fname, 'w', encoding='UTF8') as f: + writer = csv.writer(f,delimiter=',') + # write the header + header = ["n0", "ne", "ni", "Tg", "E", "energy", "mobility", "diffusion"] + for col_idx, g in enumerate(self.param.collisions): + header.append(str(g)) + + writer.writerow(header) + writer.writerows(data_csv) + + return + def push(self, interface): xp = self.xp_module Te_bte = xp.array(interface.HostWrite(libtps.t2bIndex.ElectronTemperature), copy=False) @@ -661,18 +686,18 @@ def push(self, interface): # here rr should be in the same ordering as the collision model prescribed to the Boltzmann solver. rr_bte[gidx_to_pidx_map[grid_idx]] = rr[1] - rr_bte[rr_bte<0] = 0.0 - s0 = rate_tps_arr * n0 * ni - s1 = rate_tps_csc * n0 * ni + # rr_bte[rr_bte<0] = 0.0 + # s0 = rate_tps_arr * n0 * ni + # s1 = rate_tps_csc * n0 * ni - s2 = rr_bte * n0 * ni + # s2 = rr_bte * n0 * ni - # tau = 1e-2 - # idx = s2 > tau - rate_bte[0][:] = 0.0 - rate_bte[1][:] = 0.0 - rate_bte[0] = rr_bte - rate_bte[1] = xp.abs(s2-s1)/xp.max(s2) + # # tau = 1e-2 + # # idx = s2 > tau + # rate_bte[0][:] = 0.0 + # rate_bte[1][:] = 0.0 + # rate_bte[0] = rr_bte + # rate_bte[1] = xp.abs(s2-s1)/xp.max(s2) return @@ -707,8 +732,8 @@ def push(self, interface): boltzmann.grid_setup(interface) boltzmann.fetch(interface) boltzmann.solve() -boltzmann.push(interface) -tps.fetch(interface) +# boltzmann.push(interface) +# tps.fetch(interface) # while it < max_iters: # tps.solveStep() From 11e0733209bac159f9b3ce4f20785e3481b87508 Mon Sep 17 00:00:00 2001 From: milindasf Date: Sun, 28 Jan 2024 22:26:39 -0600 Subject: [PATCH 37/75] basic tps+ BTE coupling, main loop moved to Parla + MPI --- src/tps-bte_0d3v.py | 524 ++++++++++++++++++++++---------------------- 1 file changed, 265 insertions(+), 259 deletions(-) diff --git a/src/tps-bte_0d3v.py b/src/tps-bte_0d3v.py index 732bd41a6..021167d93 100755 --- a/src/tps-bte_0d3v.py +++ b/src/tps-bte_0d3v.py @@ -162,9 +162,12 @@ def __init__(self, tps, comm): self.profile_tt = profile_tt self.profile_nn = profile_nn + num_gpus_per_node = 1 + if self.param.use_gpu==1: + num_gpus_per_node = cp.cuda.runtime.getDeviceCount() + # how to map each grid to the GPU devices on the node - self.gidx_to_device_map = lambda gidx, num_grids : gidx % 2 - + self.gidx_to_device_map = lambda gidx, num_grids : gidx % num_gpus_per_node return def __parse_config_file__(self, fname): @@ -270,129 +273,12 @@ def grid_setup(self, interface): assert self.grid_idx_to_npts[grid_idx] > 0 print("setting initial Maxwellian at %.4E eV" %(self.bte_solver._par_ap_Te[grid_idx]), flush=True) - f0 = self.bte_solver.initialize(grid_idx, self.grid_idx_to_npts[grid_idx], "maxwellian") - self.bte_solver.set_boltzmann_parameter(grid_idx, "f0", f0) + self.bte_solver.set_boltzmann_parameter(grid_idx, "f_mw", self.bte_solver.initialize(grid_idx, self.grid_idx_to_npts[grid_idx], "maxwellian")) - print("setting grid %d to device %d"%(grid_idx, gidx_to_device_map(grid_idx, n_grids))) - self.bte_solver.host_to_device_setup(gidx_to_device_map(grid_idx, n_grids), grid_idx) - self.xp_module = cp self.profile_tt[pp.SETUP].stop() return - def __efield_setup__(self): - - """ - Here we set the E-field for 1-way coupling - """ - - use_gpu = self.param.use_gpu - n_grids = self.param.n_grids - xp = self.xp_module - if(use_gpu==1): - def Et(t, grid_idx): - dev_id = self.gidx_to_device_map(grid_idx, n_grids) - with cp.cuda.Device(dev_id): - eRe_d = self.bte_solver.get_boltzmann_parameter(grid_idx, "eRe") - eIm_d = self.bte_solver.get_boltzmann_parameter(grid_idx, "eIm") - - if self.param.Efreq == 0: - return xp.sqrt(eRe_d**2 + eIm_d**2) - else: - return eRe_d * xp.cos(2 * xp.pi * self.param.Efreq * t) + eIm_d * xp.sin(2 * xp.pi * self.param.Efreq * t) - else: - def Et(t, grid_idx): - eRe_d = self.bte_solver.get_boltzmann_parameter(grid_idx, "eRe") - eIm_d = self.bte_solver.get_boltzmann_parameter(grid_idx, "eIm") - - if self.param.Efreq == 0: - return xp.sqrt(eRe_d**2 + eIm_d**2) - else: - return eRe_d * xp.cos(2 * xp.pi * self.param.Efreq * t) + eIm_d * xp.sin(2 * xp.pi * self.param.Efreq * t) - - for grid_idx in range(n_grids): - et = lambda t, gid=grid_idx: Et(t, gid) - self.bte_solver.set_efield_function(grid_idx, et) - - return - - def fetch(self, interface): - gidx_to_pidx_map = self.grid_idx_to_spatial_idx_map - - heavy_temp = np.array(interface.HostRead(libtps.t2bIndex.HeavyTemperature), copy=False) - tps_npts = len(heavy_temp) - self.tps_npts = tps_npts - - electron_temp = np.array(interface.HostRead(libtps.t2bIndex.ElectronTemperature), copy=False) - efield = np.array(interface.HostRead(libtps.t2bIndex.ElectricField), copy=False).reshape((2, tps_npts)) - species_densities = np.array(interface.HostRead(libtps.t2bIndex.SpeciesDensities), copy=False).reshape(3, tps_npts) - n_grids = self.param.n_grids - use_gpu = self.param.use_gpu - - for grid_idx in range(n_grids): - bte_idx = gidx_to_pidx_map[grid_idx] - dev_id = self.gidx_to_device_map(grid_idx, n_grids) - - ni = species_densities[TPSINDEX.ION_IDX][bte_idx] - ne = species_densities[TPSINDEX.ELE_IDX][bte_idx] - n0 = species_densities[TPSINDEX.NEU_IDX][bte_idx] - Tg = heavy_temp[bte_idx] - Te = electron_temp[bte_idx] - eRe = efield[TPSINDEX.EF_RE_IDX][bte_idx] - eIm = efield[TPSINDEX.EF_IM_IDX][bte_idx] - - eMag = np.sqrt(eRe**2 + eIm **2) - eByn0 = eMag/n0/self.param.Td_fac - - if self.param.verbose == 1 : - print("Boltzmann solver inputs for v-space grid id %d"%(grid_idx)) - print("Efreq = %.4E [1/s]" %(self.param.Efreq)) - print("n_pts = %d" % self.grid_idx_to_npts[grid_idx]) - - print("E/n0 (min) = %.12E [Td] \t E/n0 (max) = %.12E [Td] "%(np.min(eByn0), np.max(eByn0))) - print("Tg (min) = %.12E [K] \t Tg (max) = %.12E [K] "%(np.min(Tg) , np.max(Tg))) - print("Te (min) = %.12E [K] \t Te (max) = %.12E [K] "%(np.min(Te) , np.max(Te))) - - print("ne (min) = %.12E [1/m^3] \t ne (max) = %.12E [1/m^3] "%(np.min(ne) , np.max(ne))) - print("ni (min) = %.12E [1/m^3] \t ni (max) = %.12E [1/m^3] "%(np.min(ni) , np.max(ni))) - print("n0 (min) = %.12E [1/m^3] \t n0 (max) = %.12E [1/m^3] "%(np.min(n0) , np.max(n0))) - - if (use_gpu == 1): - with cp.cuda.Device(dev_id): - ne = cp.array(ne) - ni = cp.array(ni) - n0 = cp.array(n0) - Tg = cp.array(Tg) - Te = cp.array(Te) - eRe = cp.array(eRe) - eIm = cp.array(eIm) - - eMag = cp.sqrt(eRe**2 + eIm **2) - eByn0 = eMag/n0/self.param.Td_fac - - #self.bte_solver.set_boltzmann_parameters(grid_idx, n0, ne, ni, Tg, self.param.solver_type) - self.bte_solver.set_boltzmann_parameter(grid_idx, "n0", n0) - self.bte_solver.set_boltzmann_parameter(grid_idx, "ne", ne) - self.bte_solver.set_boltzmann_parameter(grid_idx, "ni", ni) - self.bte_solver.set_boltzmann_parameter(grid_idx, "Tg", Tg) - self.bte_solver.set_boltzmann_parameter(grid_idx, "eRe", eRe) - self.bte_solver.set_boltzmann_parameter(grid_idx, "eIm", eRe) - - return - - def solve(self): - """ - perform the BTE solve, supports both stead-state solution (static E-field) - and time-periodic solutions for the oscillatory E-fields - """ - - if WITH_PARLA==1: - self.solve_w_parla() - return - else: - self.solve_wo_parla() - return - def solve_wo_parla(self): xp = self.xp_module csv_write = self.param.export_csv @@ -410,43 +296,28 @@ def solve_wo_parla(self): if csv_write: data_csv = np.empty((self.tps_npts, 8 + len(self.param.collisions))) - self.__efield_setup__() - t1 = time() for grid_idx in range(n_grids): dev_id = gidx_to_device_map(grid_idx, n_grids) if (use_gpu==0): try: - eRe_d = self.bte_solver.get_boltzmann_parameter(grid_idx, "eRe") - eIm_d = self.bte_solver.get_boltzmann_parameter(grid_idx, "eIm") - if self.param.Efreq == 0: - ef_t = lambda t : xp.sqrt(eRe_d**2 + eIm_d**2) - else: - ef_t = lambda t : eRe_d * xp.cos(2 * xp.pi * self.param.Efreq * t) + eIm_d * xp.sin(2 * xp.pi * self.param.Efreq * t) - - self.bte_solver.set_efield_function(grid_idx, ef_t) - - f0 = self.bte_solver.get_boltzmann_parameter(grid_idx, "f0") + f0 = self.bte_solver.get_boltzmann_parameter(grid_idx, "f_mw") ff , qoi = self.bte_solver.solve(grid_idx, f0, self.param.atol, self.param.rtol, self.param.max_iter, self.param.solver_type) self.qoi[grid_idx] = qoi self.ff [grid_idx] = ff except: print("solver failed for v-space gird no %d"%(grid_idx)) - sys.exit(0) + sys.exit(-1) else: with xp.cuda.Device(dev_id): - f0 = self.bte_solver.get_boltzmann_parameter(grid_idx, "f0") - ff , qoi = self.bte_solver.solve(grid_idx, f0, self.param.atol, self.param.rtol, self.param.max_iter, self.param.solver_type) - self.qoi[grid_idx] = qoi - self.ff [grid_idx] = ff - # try: - # f0 = self.bte_solver.get_boltzmann_parameter(grid_idx, "f0") - # ff , qoi = self.bte_solver.solve(grid_idx, f0, self.param.atol, self.param.rtol, self.param.max_iter, self.param.solver_type) - # self.qoi[grid_idx] = qoi - # self.ff [grid_idx] = ff - # except: - # print("solver failed for v-space gird no %d"%(grid_idx)) - # sys.exit(0) + try: + f0 = self.bte_solver.get_boltzmann_parameter(grid_idx, "f_mw") + ff , qoi = self.bte_solver.solve(grid_idx, f0, self.param.atol, self.param.rtol, self.param.max_iter, self.param.solver_type) + self.qoi[grid_idx] = qoi + self.ff [grid_idx] = ff + except: + print("solver failed for v-space gird no %d"%(grid_idx)) + sys.exit(-1) t2 = time() print("time for boltzmann v-space solve = %.4E"%(t2- t1)) @@ -529,7 +400,157 @@ def asnumpy(a): return - def solve_w_parla(self): + async def fetch(self, interface): + gidx_to_pidx_map = self.grid_idx_to_spatial_idx_map + heavy_temp = np.array(interface.HostRead(libtps.t2bIndex.HeavyTemperature), copy=False) + tps_npts = len(heavy_temp) + self.tps_npts = tps_npts + electron_temp = np.array(interface.HostRead(libtps.t2bIndex.ElectronTemperature), copy=False) + efield = np.array(interface.HostRead(libtps.t2bIndex.ElectricField), copy=False).reshape((2, tps_npts)) + species_densities = np.array(interface.HostRead(libtps.t2bIndex.SpeciesDensities), copy=False).reshape(3, tps_npts) + + n_grids = self.param.n_grids + use_gpu = self.param.use_gpu + n_grids = self.param.n_grids + + gidx_to_device_map = self.gidx_to_device_map + + for grid_idx in range(n_grids): + bte_idx = gidx_to_pidx_map[grid_idx] + dev_id = self.gidx_to_device_map(grid_idx, n_grids) + + ni = species_densities[TPSINDEX.ION_IDX][bte_idx] + ne = species_densities[TPSINDEX.ELE_IDX][bte_idx] + n0 = species_densities[TPSINDEX.NEU_IDX][bte_idx] + Tg = heavy_temp[bte_idx] + Te = electron_temp[bte_idx] + eRe = efield[TPSINDEX.EF_RE_IDX][bte_idx] + eIm = efield[TPSINDEX.EF_IM_IDX][bte_idx] + + eMag = np.sqrt(eRe**2 + eIm **2) + eByn0 = eMag/n0/self.param.Td_fac + + if self.param.verbose == 1 : + print("Boltzmann solver inputs for v-space grid id %d"%(grid_idx)) + print("Efreq = %.4E [1/s]" %(self.param.Efreq)) + print("n_pts = %d" % self.grid_idx_to_npts[grid_idx]) + + print("E/n0 (min) = %.12E [Td] \t E/n0 (max) = %.12E [Td] "%(np.min(eByn0), np.max(eByn0))) + print("Tg (min) = %.12E [K] \t Tg (max) = %.12E [K] "%(np.min(Tg) , np.max(Tg))) + print("Te (min) = %.12E [K] \t Te (max) = %.12E [K] "%(np.min(Te) , np.max(Te))) + + print("ne (min) = %.12E [1/m^3] \t ne (max) = %.12E [1/m^3] "%(np.min(ne) , np.max(ne))) + print("ni (min) = %.12E [1/m^3] \t ni (max) = %.12E [1/m^3] "%(np.min(ni) , np.max(ni))) + print("n0 (min) = %.12E [1/m^3] \t n0 (max) = %.12E [1/m^3] "%(np.min(n0) , np.max(n0))) + + + if (use_gpu == 1): + with cp.cuda.Device(dev_id): + ne = cp.array(ne) + ni = cp.array(ni) + n0 = cp.array(n0) + Tg = cp.array(Tg) + Te = cp.array(Te) + eRe = cp.array(eRe) + eIm = cp.array(eIm) + + eMag = cp.sqrt(eRe**2 + eIm **2) + eByn0 = eMag/n0/self.param.Td_fac + + self.bte_solver.set_boltzmann_parameter(grid_idx, "n0" , n0) + self.bte_solver.set_boltzmann_parameter(grid_idx, "ne" , ne) + self.bte_solver.set_boltzmann_parameter(grid_idx, "ni" , ni) + self.bte_solver.set_boltzmann_parameter(grid_idx, "Tg" , Tg) + self.bte_solver.set_boltzmann_parameter(grid_idx, "eRe", eRe) + self.bte_solver.set_boltzmann_parameter(grid_idx, "eIm", eIm) + self.bte_solver.set_boltzmann_parameter(grid_idx, "E" , eMag) + + + return + + async def solve_init(self): + rank = self.comm.Get_rank() + npes = self.comm.Get_size() + n_grids = self.param.n_grids + gidx_to_device_map = self.gidx_to_device_map + + ts = TaskSpace("T") + for grid_idx in range(self.param.n_grids): + @spawn(ts[grid_idx], placement=[cpu], vcus=0.0) + def t1(): + dev_id = gidx_to_device_map(grid_idx, n_grids) + print("[%d/%d] setting grid %d to device %d"%(rank, npes, grid_idx, dev_id)) + self.bte_solver.host_to_device_setup(dev_id, grid_idx) + + await ts + + def ts_op_setup(grid_idx): + xp = self.xp_module + f_mw = self.bte_solver.get_boltzmann_parameter(grid_idx, "f_mw") + n_pts = f_mw.shape[1] + Qmat = self.bte_solver._op_qmat[grid_idx] + INr = xp.eye(Qmat.shape[1]) + self.bte_solver._op_imat_vx[grid_idx] = xp.einsum("i,jk->ijk",xp.ones(n_pts), INr) + + if(self.param.use_gpu==1): + self.xp_module = cp + ts = TaskSpace("T") + + for grid_idx in range(self.param.n_grids): + dev_id = gidx_to_device_map(grid_idx, n_grids) + @spawn(ts[grid_idx], placement=[gpu(dev_id)], vcus=0.0) + def t1(): + ts_op_setup(grid_idx) + f_mw = self.bte_solver.get_boltzmann_parameter(grid_idx, "f_mw") + self.bte_solver.set_boltzmann_parameter(grid_idx, "u0", f_mw) + + + await ts + else: + self.xp_module = np + ts = TaskSpace("T") + for grid_idx in range(self.param.n_grids): + @spawn(ts[grid_idx], placement=[cpu], vcus=0.0) + def t1(): + ts_op_setup(grid_idx) + f_mw = self.bte_solver.get_boltzmann_parameter(grid_idx, "f_mw") + self.bte_solver.set_boltzmann_parameter(grid_idx, "u0", f_mw) + + await ts + + return + + async def solve_step(self, time, delta_t): + """ + perform a single timestep in 0d-BTE + """ + rank = self.comm.Get_rank() + npes = self.comm.Get_size() + # xp = self.xp_module + # csv_write = self.param.export_csv + # plot_data = self.param.plot_data + # gidx_to_pidx_map = self.grid_idx_to_spatial_idx_map + # use_gpu = self.param.use_gpu + # dev_id = self.param.dev_id + # verbose = self.param.verbose + n_grids = self.param.n_grids + gidx_to_device_map = self.gidx_to_device_map + + ts = TaskSpace("T") + for grid_idx in range(n_grids): + @spawn(ts[grid_idx], placement=[gpu(gidx_to_device_map(grid_idx,n_grids))], vcus=0.0) + def t1(): + u0 = self.bte_solver.get_boltzmann_parameter(grid_idx, "u0") + v = self.bte_solver.step(grid_idx, u0, self.param.atol, self.param.rtol, self.param.max_iter, time, delta_t) + self.bte_solver.set_boltzmann_parameter(grid_idx, "u1", v) + await ts + + return + + async def solve(self): + """ + Can be used to compute steady-state or cycle averaged BTE solutions + """ rank = self.comm.Get_rank() npes = self.comm.Get_size() xp = self.xp_module @@ -542,43 +563,36 @@ def solve_w_parla(self): n_grids = self.param.n_grids gidx_to_device_map = self.gidx_to_device_map - self.qoi = [None for grid_idx in range(self.param.n_grids)] - self.ff = [None for grid_idx in range(self.param.n_grids)] + self.qoi = [None for grid_idx in range(self.param.n_grids)] + self.ff = [None for grid_idx in range(self.param.n_grids)] + num_gpus = len(gpu) + if (use_gpu==1): + parla_placement = [gpu(gidx_to_device_map(grid_idx,n_grids)) for grid_idx in range(n_grids)] + else: + parla_placement = [cpu for grid_idx in range(n_grids)] + if csv_write: data_csv = np.empty((self.tps_npts, 8 + len(self.param.collisions))) - - self.profile_tt[pp.SETUP].start() - self.__efield_setup__() - self.profile_tt[pp.SETUP].stop() - - with Parla(): - num_gpus = len(gpu) - if (use_gpu==1): - parla_placement = [gpu(gidx_to_device_map(grid_idx,n_grids)) for grid_idx in range(n_grids)] - else: - parla_placement = [cpu for grid_idx in range(n_grids)] - print(parla_placement) - @spawn(placement=cpu, vcus=0) - async def __main__(): - self.profile_tt[pp.SOLVE].start() - ts_1 = TaskSpace("T") - for grid_idx in range(self.param.n_grids): - @spawn(ts_1[grid_idx], placement=[parla_placement[grid_idx]], vcus=0.0) - def t1(): - f0 = self.bte_solver.get_boltzmann_parameter(grid_idx, "f0") - print("[Boltzmann] %d / %d launching grid %d on %s"%(rank, npes, grid_idx, parla_placement[grid_idx])) - try: - ff , qoi = self.bte_solver.solve(grid_idx, f0, self.param.atol, self.param.rtol, self.param.max_iter, self.param.solver_type) - self.ff[grid_idx] = ff - self.qoi[grid_idx] = qoi - except: - print("solver failed for v-space gird no %d"%(grid_idx)) - sys.exit(-1) - - await ts_1 - self.profile_tt[pp.SOLVE].stop() + self.profile_tt[pp.SOLVE].start() + ts = TaskSpace("T") + for grid_idx in range(self.param.n_grids): + @spawn(ts[grid_idx], placement=[parla_placement[grid_idx]], vcus=0.0) + def t1(): + try: + print("[Boltzmann] %d / %d launching grid %d on %s"%(rank, npes, grid_idx, parla_placement[grid_idx])) + f0 = self.bte_solver.get_boltzmann_parameter(grid_idx, "f_mw") + ff , qoi = self.bte_solver.solve(grid_idx, f0, self.param.atol, self.param.rtol, self.param.max_iter, self.param.solver_type) + self.ff[grid_idx] = ff + self.qoi[grid_idx] = qoi + except: + print("solver failed for v-space gird no %d"%(grid_idx)) + sys.exit(-1) + + await ts + self.profile_tt[pp.SOLVE].stop() + t1 = min_mean_max(self.profile_tt[pp.SETUP].seconds, self.comm) t2 = min_mean_max(self.profile_tt[pp.SOLVE].seconds, self.comm) @@ -663,90 +677,82 @@ def asnumpy(a): return - def push(self, interface): - xp = self.xp_module - Te_bte = xp.array(interface.HostWrite(libtps.t2bIndex.ElectronTemperature), copy=False) - rate_bte = xp.array(interface.HostWrite(libtps.t2bIndex.ReactionRates), copy=False).reshape((2, self.tps_npts)) - Te_tps = xp.array(interface.HostRead(libtps.t2bIndex.ElectronTemperature), copy=False) - - species_densities = xp.array(interface.HostRead(libtps.t2bIndex.SpeciesDensities), copy=False).reshape(3, self.tps_npts) - ni = species_densities[TPSINDEX.ION_IDX] - n0 = species_densities[TPSINDEX.NEU_IDX] - ne = species_densities[TPSINDEX.ELE_IDX] - - rate_tps_arr = r_arr(Te_tps) - rate_tps_csc = r_csc(Te_tps) - - rr_bte = xp.zeros_like(rate_tps_arr) - gidx_to_pidx_map = self.grid_idx_to_spatial_idx_map - - for grid_idx in range(self.param.n_grids): - Te_bte[gidx_to_pidx_map[grid_idx]] = (self.qoi[grid_idx]["energy"]/1.5) * self.param.ev_to_K - rr = self.qoi[grid_idx]["rates"] - # here rr should be in the same ordering as the collision model prescribed to the Boltzmann solver. - rr_bte[gidx_to_pidx_map[grid_idx]] = rr[1] - - # rr_bte[rr_bte<0] = 0.0 - # s0 = rate_tps_arr * n0 * ni - # s1 = rate_tps_csc * n0 * ni + async def push(self, interface): + xp = self.xp_module + n_grids = self.param.n_grids + gidx_to_device_map = self.gidx_to_device_map + gidx_to_pidx_map = self.grid_idx_to_spatial_idx_map - # s2 = rr_bte * n0 * ni + heavy_temp = np.array(interface.HostRead(libtps.t2bIndex.HeavyTemperature), copy=False) + tps_npts = len(heavy_temp) - # # tau = 1e-2 - # # idx = s2 > tau - # rate_bte[0][:] = 0.0 - # rate_bte[1][:] = 0.0 - # rate_bte[0] = rr_bte - # rate_bte[1] = xp.abs(s2-s1)/xp.max(s2) + n_reactions = interface.nComponents(libtps.t2bIndex.ReactionRates) + rates = np.array(interface.HostWrite(libtps.t2bIndex.ReactionRates), copy=False).reshape((n_reactions, tps_npts)) + if(n_reactions>0): + ts = TaskSpace("T") + for grid_idx in range(n_grids): + @spawn(ts[grid_idx], placement=[gpu(gidx_to_device_map(grid_idx,n_grids))], vcus=0.0) + def t1(): + qA = boltzmann.bte_solver._op_diag_dg[grid_idx] + u0 = boltzmann.bte_solver.get_boltzmann_parameter(grid_idx, "u0") + + h_curr = xp.dot(qA, u0) + h_curr = boltzmann.bte_solver.normalized_distribution(grid_idx, h_curr) + qoi = boltzmann.bte_solver.compute_QoIs(grid_idx, h_curr, effective_mobility=False) + + rates[0][gidx_to_pidx_map[grid_idx]] = xp.asnumpy(qoi["rates"][1]) + + + await ts return - - - - -comm = MPI.COMM_WORLD -# TPS solver -tps = libtps.Tps(comm) - -tps.parseCommandLineArgs(sys.argv) -tps.parseInput() -tps.chooseDevices() -tps.chooseSolver() -tps.initialize() - -boltzmann = Boltzmann0D2VBactchedSolver(tps, comm) - -interface = libtps.Tps2Boltzmann(tps) -tps.initInterface(interface) - -coords = np.array(interface.HostReadSpatialCoordinates(), copy=False) -print(coords.shape) - -it = 0 -max_iters = tps.getRequiredInput("cycle-avg-joule-coupled/max-iters") -print("Max Iters: ", max_iters) -tps.solveBegin() -tps.solveStep() -tps.push(interface) -boltzmann.grid_setup(interface) -boltzmann.fetch(interface) -boltzmann.solve() -# boltzmann.push(interface) -# tps.fetch(interface) - -# while it < max_iters: -# tps.solveStep() -# tps.push(interface) -# boltzmann.fetch(interface) -# boltzmann.solve() -# boltzmann.push(interface) -# tps.fetch(interface) +if __name__=="__main__": + comm = MPI.COMM_WORLD -# it = it+1 -# print("it, ", it) - -tps.solveEnd() - + with Parla(): + # TPS solver + tps = libtps.Tps(comm) + tps.parseCommandLineArgs(sys.argv) + tps.parseInput() + tps.chooseDevices() + tps.chooseSolver() + tps.initialize() + + boltzmann = Boltzmann0D2VBactchedSolver(tps, comm) + interface = libtps.Tps2Boltzmann(tps) + tps.initInterface(interface) + + #coords = np.array(interface.HostReadSpatialCoordinates(), copy=False) + tps.solveBegin() + tps.push(interface) + boltzmann.grid_setup(interface) + + max_iters = tps.getRequiredInput("cycle-avg-joule-coupled/max-iters") + iter = 0 + tt = 0 + dt = 1e-3 /boltzmann.param.Efreq + + @spawn(placement=cpu, vcus=0) + async def __main__(): + await boltzmann.solve_init() + xp = boltzmann.bte_solver.xp_module + + + while (iter Date: Mon, 29 Jan 2024 09:34:59 -0600 Subject: [PATCH 38/75] timing results for BTE and TPS steps added. --- src/tps-bte_0d3v.py | 42 ++++++++++++++++++++++++++---------------- 1 file changed, 26 insertions(+), 16 deletions(-) diff --git a/src/tps-bte_0d3v.py b/src/tps-bte_0d3v.py index 021167d93..f485e809e 100755 --- a/src/tps-bte_0d3v.py +++ b/src/tps-bte_0d3v.py @@ -430,18 +430,18 @@ async def fetch(self, interface): eMag = np.sqrt(eRe**2 + eIm **2) eByn0 = eMag/n0/self.param.Td_fac - if self.param.verbose == 1 : - print("Boltzmann solver inputs for v-space grid id %d"%(grid_idx)) - print("Efreq = %.4E [1/s]" %(self.param.Efreq)) - print("n_pts = %d" % self.grid_idx_to_npts[grid_idx]) + # if self.param.verbose == 1 : + # print("Boltzmann solver inputs for v-space grid id %d"%(grid_idx)) + # print("Efreq = %.4E [1/s]" %(self.param.Efreq)) + # print("n_pts = %d" % self.grid_idx_to_npts[grid_idx]) - print("E/n0 (min) = %.12E [Td] \t E/n0 (max) = %.12E [Td] "%(np.min(eByn0), np.max(eByn0))) - print("Tg (min) = %.12E [K] \t Tg (max) = %.12E [K] "%(np.min(Tg) , np.max(Tg))) - print("Te (min) = %.12E [K] \t Te (max) = %.12E [K] "%(np.min(Te) , np.max(Te))) + # print("E/n0 (min) = %.12E [Td] \t E/n0 (max) = %.12E [Td] "%(np.min(eByn0), np.max(eByn0))) + # print("Tg (min) = %.12E [K] \t Tg (max) = %.12E [K] "%(np.min(Tg) , np.max(Tg))) + # print("Te (min) = %.12E [K] \t Te (max) = %.12E [K] "%(np.min(Te) , np.max(Te))) - print("ne (min) = %.12E [1/m^3] \t ne (max) = %.12E [1/m^3] "%(np.min(ne) , np.max(ne))) - print("ni (min) = %.12E [1/m^3] \t ni (max) = %.12E [1/m^3] "%(np.min(ni) , np.max(ni))) - print("n0 (min) = %.12E [1/m^3] \t n0 (max) = %.12E [1/m^3] "%(np.min(n0) , np.max(n0))) + # print("ne (min) = %.12E [1/m^3] \t ne (max) = %.12E [1/m^3] "%(np.min(ne) , np.max(ne))) + # print("ni (min) = %.12E [1/m^3] \t ni (max) = %.12E [1/m^3] "%(np.min(ni) , np.max(ni))) + # print("n0 (min) = %.12E [1/m^3] \t n0 (max) = %.12E [1/m^3] "%(np.min(n0) , np.max(n0))) if (use_gpu == 1): @@ -728,30 +728,40 @@ def t1(): tps.push(interface) boltzmann.grid_setup(interface) - max_iters = tps.getRequiredInput("cycle-avg-joule-coupled/max-iters") - iter = 0 - tt = 0 - dt = 1e-3 /boltzmann.param.Efreq - @spawn(placement=cpu, vcus=0) async def __main__(): await boltzmann.solve_init() xp = boltzmann.bte_solver.xp_module - + + max_iters = tps.getRequiredInput("cycle-avg-joule-coupled/max-iters") + iter = 0 + tt = 0 + tau = (1/boltzmann.param.Efreq) + dt = 5e-3 * tau while (iter Date: Mon, 29 Jan 2024 09:57:46 -0600 Subject: [PATCH 39/75] Export timestep and currentTime --- src/M2ulPhyS2Boltzmann.cpp | 3 +++ src/tps2Boltzmann.cpp | 2 ++ src/tps2Boltzmann.hpp | 8 ++++++++ 3 files changed, 13 insertions(+) diff --git a/src/M2ulPhyS2Boltzmann.cpp b/src/M2ulPhyS2Boltzmann.cpp index 60c9f450b..a8aba27b6 100644 --- a/src/M2ulPhyS2Boltzmann.cpp +++ b/src/M2ulPhyS2Boltzmann.cpp @@ -78,6 +78,9 @@ void M2ulPhyS::push(TPS::Tps2Boltzmann &interface) { interface.interpolateFromNativeFES(*heavyTemperature, TPS::Tps2Boltzmann::Index::HeavyTemperature); interface.interpolateFromNativeFES(*electronTemperature, TPS::Tps2Boltzmann::Index::ElectronTemperature); + interface.setTimeStep(this->dt); + interface.setCurrentTime(this->time); + delete species; delete heavyTemperature; delete electronTemperature; diff --git a/src/tps2Boltzmann.cpp b/src/tps2Boltzmann.cpp index b2cceba63..76f97c28c 100644 --- a/src/tps2Boltzmann.cpp +++ b/src/tps2Boltzmann.cpp @@ -342,6 +342,8 @@ void tps2bolzmann(py::module &m) { return std::unique_ptr(new TPS::CPUData(interface.Field(index), true)); }) .def("EfieldAngularFreq", &TPS::Tps2Boltzmann::EfieldAngularFreq) + .def("timeStep", &TPS::Tps2Boltzmann::timeStep) + .def("currentTime", &TPS::Tps2Boltzmann::currentTime) .def("Nspecies", &TPS::Tps2Boltzmann::Nspecies) .def("NeFiledComps", &TPS::Tps2Boltzmann::NeFieldComps) .def("nComponents", &TPS::Tps2Boltzmann::nComponents) diff --git a/src/tps2Boltzmann.hpp b/src/tps2Boltzmann.hpp index 82d4e8ee7..301d9ada5 100644 --- a/src/tps2Boltzmann.hpp +++ b/src/tps2Boltzmann.hpp @@ -116,6 +116,12 @@ class Tps2Boltzmann { int nComponents(Index index) const { return ncomps[index]; } std::string getReactionEquation(int index) const { return reaction_eqs_[index]; } + void setTimeStep(double dt) { timestep_=dt; } + void setCurrentTime(double time) { currentTime_=time; } + + double timeStep() const { return timestep_; } + double currentTime() const { return currentTime_; } + void saveDataCollection(int cycle, double time); ~Tps2Boltzmann(); @@ -161,6 +167,8 @@ class Tps2Boltzmann { mfem::ParGridFunction *spatial_coordinates_; double EfieldAngularFreq_; + double timestep_; + double currentTime_; bool save_to_paraview_dc; mfem::ParaViewDataCollection *paraview_dc; From ce6c857badfc1b116cbfb72dfade6a3eab8a7219 Mon Sep 17 00:00:00 2001 From: Umberto Villa Date: Mon, 29 Jan 2024 10:34:39 -0600 Subject: [PATCH 40/75] Make style --- src/tps2Boltzmann.hpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/tps2Boltzmann.hpp b/src/tps2Boltzmann.hpp index 301d9ada5..a0d041492 100644 --- a/src/tps2Boltzmann.hpp +++ b/src/tps2Boltzmann.hpp @@ -116,8 +116,8 @@ class Tps2Boltzmann { int nComponents(Index index) const { return ncomps[index]; } std::string getReactionEquation(int index) const { return reaction_eqs_[index]; } - void setTimeStep(double dt) { timestep_=dt; } - void setCurrentTime(double time) { currentTime_=time; } + void setTimeStep(double dt) { timestep_ = dt; } + void setCurrentTime(double time) { currentTime_ = time; } double timeStep() const { return timestep_; } double currentTime() const { return currentTime_; } From b6f246083c52382856fdbba2459f7f69cc463b22 Mon Sep 17 00:00:00 2001 From: milindasf Date: Tue, 6 Feb 2024 10:22:26 -0600 Subject: [PATCH 41/75] basic tps + 0d bte 2-way coupling with parla --- src/tps-bte_0d3v.py | 131 +++++++++++++++++++++++++++++++------------- 1 file changed, 93 insertions(+), 38 deletions(-) diff --git a/src/tps-bte_0d3v.py b/src/tps-bte_0d3v.py index f485e809e..123cff0a8 100755 --- a/src/tps-bte_0d3v.py +++ b/src/tps-bte_0d3v.py @@ -12,7 +12,7 @@ import enum import pandas as pd import scipy.interpolate - +import scipy.cluster class profile_t: def __init__(self,name): self.name = name @@ -217,32 +217,43 @@ def grid_setup(self, interface): self.profile_tt[pp.SETUP].start() xp = self.xp_module + n_grids = self.param.n_grids Te = xp.array(interface.HostRead(libtps.t2bIndex.ElectronTemperature), copy=False) / self.param.ev_to_K # [eV] - Te_min, Te_max = xp.min(Te), xp.max(Te) - Te_b = xp.linspace(Te_min, Te_max, self.param.n_grids, endpoint=False) - dist_mat = xp.zeros((len(Te), self.param.n_grids)) - - for iter in range(50): - #print("clustering iteration ", iter, Te_b) - for i in range(self.param.n_grids): - dist_mat[:,i] = xp.abs(Te-Te_b[i]) + # Te_min, Te_max = xp.min(Te), xp.max(Te) + # Te_b = xp.linspace(Te_min, Te_max, self.param.n_grids, endpoint=False) + # dist_mat = xp.zeros((len(Te), self.param.n_grids)) + + # #scipy.cluster.vq.kmeans(scipy.cluster.vq.whiten(Te), Te_b, ) + + # for iter in range(50): + # #print("clustering iteration ", iter, Te_b) + # for i in range(self.param.n_grids): + # dist_mat[:,i] = xp.abs(Te-Te_b[i]) - membership = xp.argmin(dist_mat, axis=1) - Te_b1 = np.array([np.mean(Te[xp.argwhere(membership==i)[:,0]]) for i in range(self.param.n_grids)]) - rel_error = np.max(np.abs(1 - Te_b1/Te_b)) - Te_b = Te_b1 + # membership = xp.argmin(dist_mat, axis=1) + # Te_b1 = np.array([np.mean(Te[xp.argwhere(membership==i)[:,0]]) for i in range(self.param.n_grids)]) + # rel_error = np.max(np.abs(1 - Te_b1/Te_b)) + # Te_b = Te_b1 - if rel_error < 1e-4: - break - Te_b = np.sort(Te_b) + # if rel_error < 1e-4: + # break + # Te_b = np.sort(Te_b) + # print("K-means Te clusters ", Te_b) + # for i in range(self.param.n_grids): + # dist_mat[:,i] = xp.abs(Te-Te_b[i]) + + Tew = scipy.cluster.vq.whiten(Te) + Tecw = scipy.cluster.vq.kmeans(Tew, np.linspace(np.min(Tew), np.max(Tew), n_grids), iter=1000, thresh=1e-8)[0] + Te_b = Tecw * np.std(Te, axis=0) + dist_mat = xp.zeros((len(Te),n_grids)) + print("K-means Te clusters ", Te_b) for i in range(self.param.n_grids): - dist_mat[:,i] = xp.abs(Te-Te_b[i]) + dist_mat[:,i] = xp.abs(Tew-Tecw[i]) membership = xp.argmin(dist_mat, axis=1) grid_idx_to_spatial_pts_map = list() for b_idx in range(self.param.n_grids): - #grid_idx_to_spatial_pts_map.append(xp.argwhere(xp.logical_and(Te>= Te_b[b_idx], Te < Te_b[b_idx+1]))[:,0]) grid_idx_to_spatial_pts_map.append(xp.argwhere(membership==b_idx)[:,0]) np.save("%s_gidx_to_pidx.npy"%(self.param.out_fname), np.array(grid_idx_to_spatial_pts_map, dtype=object), allow_pickle=True) @@ -256,7 +267,7 @@ def grid_setup(self, interface): Te = xp.array([Te_b[b_idx] for b_idx in range(self.param.n_grids)]) # xp.ones(self.param.n_grids) * self.param.Te vth = np.sqrt(2* self.param.kB * Te * self.param.ev_to_K /self.param.me) ev_max = (6 * vth / self.param.c_gamma)**2 - self.bte_solver = BoltzmannSolver(self.param, ev_max ,Te , nr, lm_modes, self.param.n_grids, self.param.collisions) + self.bte_solver = BoltzmannSolver(self.param, ev_max , Te , nr, lm_modes, self.param.n_grids, self.param.collisions) if self.param.verbose==1: print("grid energy max (eV) \n", ev_max, flush = True) @@ -409,6 +420,15 @@ async def fetch(self, interface): efield = np.array(interface.HostRead(libtps.t2bIndex.ElectricField), copy=False).reshape((2, tps_npts)) species_densities = np.array(interface.HostRead(libtps.t2bIndex.SpeciesDensities), copy=False).reshape(3, tps_npts) + # np.save("n0.npy", species_densities[TPSINDEX.NEU_IDX]) + # np.save("ne.npy", species_densities[TPSINDEX.ELE_IDX]) + # np.save("ni.npy", species_densities[TPSINDEX.ION_IDX]) + + # np.save("Te.npy", heavy_temp) + # np.save("Tg.npy", heavy_temp) + # np.save("E.npy" , np.sqrt(efield[0]**2 + efield[1]**2)) + # sys.exit(-1) + n_grids = self.param.n_grids use_gpu = self.param.use_gpu n_grids = self.param.n_grids @@ -430,18 +450,18 @@ async def fetch(self, interface): eMag = np.sqrt(eRe**2 + eIm **2) eByn0 = eMag/n0/self.param.Td_fac - # if self.param.verbose == 1 : - # print("Boltzmann solver inputs for v-space grid id %d"%(grid_idx)) - # print("Efreq = %.4E [1/s]" %(self.param.Efreq)) - # print("n_pts = %d" % self.grid_idx_to_npts[grid_idx]) + if self.param.verbose == 1 : + print("Boltzmann solver inputs for v-space grid id %d"%(grid_idx)) + print("Efreq = %.4E [1/s]" %(self.param.Efreq)) + print("n_pts = %d" % self.grid_idx_to_npts[grid_idx]) - # print("E/n0 (min) = %.12E [Td] \t E/n0 (max) = %.12E [Td] "%(np.min(eByn0), np.max(eByn0))) + print("E/n0 (min) = %.12E [Td] \t E/n0 (max) = %.12E [Td] "%(np.min(eByn0), np.max(eByn0))) # print("Tg (min) = %.12E [K] \t Tg (max) = %.12E [K] "%(np.min(Tg) , np.max(Tg))) # print("Te (min) = %.12E [K] \t Te (max) = %.12E [K] "%(np.min(Te) , np.max(Te))) # print("ne (min) = %.12E [1/m^3] \t ne (max) = %.12E [1/m^3] "%(np.min(ne) , np.max(ne))) - # print("ni (min) = %.12E [1/m^3] \t ni (max) = %.12E [1/m^3] "%(np.min(ni) , np.max(ni))) - # print("n0 (min) = %.12E [1/m^3] \t n0 (max) = %.12E [1/m^3] "%(np.min(n0) , np.max(n0))) + print("ni (min) = %.12E [1/m^3] \t ni (max) = %.12E [1/m^3] "%(np.min(ni) , np.max(ni))) + print("n0 (min) = %.12E [1/m^3] \t n0 (max) = %.12E [1/m^3] "%(np.min(n0) , np.max(n0))) if (use_gpu == 1): @@ -695,7 +715,7 @@ async def push(self, interface): @spawn(ts[grid_idx], placement=[gpu(gidx_to_device_map(grid_idx,n_grids))], vcus=0.0) def t1(): qA = boltzmann.bte_solver._op_diag_dg[grid_idx] - u0 = boltzmann.bte_solver.get_boltzmann_parameter(grid_idx, "u0") + u0 = boltzmann.bte_solver.get_boltzmann_parameter(grid_idx, "u_avg") h_curr = xp.dot(qA, u0) h_curr = boltzmann.bte_solver.normalized_distribution(grid_idx, h_curr) @@ -705,6 +725,7 @@ def t1(): await ts + rates = rates.reshape((-1)) return if __name__=="__main__": @@ -735,10 +756,15 @@ async def __main__(): max_iters = tps.getRequiredInput("cycle-avg-joule-coupled/max-iters") iter = 0 - tt = 0 + tt = 0#interface.currentTime() tau = (1/boltzmann.param.Efreq) - dt = 5e-3 * tau + dt_tps = interface.timeStep() + dt_bte = boltzmann.param.dt * (dt_tps) + bte_steps = int(dt_tps/dt_bte) + n_grids = boltzmann.param.n_grids + cycle_freq = int(xp.ceil(tau/dt_tps)) + gidx_to_device_map = boltzmann.gidx_to_device_map while (iter Date: Wed, 14 Feb 2024 13:52:15 -0600 Subject: [PATCH 42/75] hierarcical clustering added for the bte solve, active grid select based on ionization threshold --- src/tps-bte_0d3v.py | 407 ++++++++++++++++++++++++++++++-------------- 1 file changed, 278 insertions(+), 129 deletions(-) diff --git a/src/tps-bte_0d3v.py b/src/tps-bte_0d3v.py index 123cff0a8..1d8e29095 100755 --- a/src/tps-bte_0d3v.py +++ b/src/tps-bte_0d3v.py @@ -94,6 +94,7 @@ class BoltzmannSolverParams(): l_max = 1 # spherical modes uses, 0, to l_max ev_max = 16 # v-space grid truncation (eV) n_grids = 4 # number of v-space grids + n_sub_clusters= 300 # number of sub-clusters dt = 1e-3 # [] non-dimentionalized time w.r.t. oscilation period cycles = 10 # number of max cycles to evolve @@ -126,6 +127,10 @@ class BoltzmannSolverParams(): c_gamma = np.sqrt(2 * scipy.constants.elementary_charge / scipy.constants.electron_mass) #[(C/kg)^{1/2}] me = scipy.constants.electron_mass kB = scipy.constants.Boltzmann + N_Avo = scipy.constants.Avogadro + + n0 = 3.22e22 #[m^{-3}] + class TPSINDEX(): """ @@ -138,6 +143,11 @@ class TPSINDEX(): EF_RE_IDX = 0 # Re(E) index EF_IM_IDX = 1 # Im(E) index + # in future we need to setup this methodically + # here key denotes the idx running from 0, nreactions-1 + # value denotes the reaction index in the qoi array + RR_IDX = {0:1} + class Boltzmann0D2VBactchedSolver: def __init__(self, tps, comm): @@ -216,34 +226,12 @@ def grid_setup(self, interface): assert self.xp_module==np, "grid setup only supported in CPU" self.profile_tt[pp.SETUP].start() - xp = self.xp_module - n_grids = self.param.n_grids - Te = xp.array(interface.HostRead(libtps.t2bIndex.ElectronTemperature), copy=False) / self.param.ev_to_K # [eV] - # Te_min, Te_max = xp.min(Te), xp.max(Te) - # Te_b = xp.linspace(Te_min, Te_max, self.param.n_grids, endpoint=False) - # dist_mat = xp.zeros((len(Te), self.param.n_grids)) - - # #scipy.cluster.vq.kmeans(scipy.cluster.vq.whiten(Te), Te_b, ) - - # for iter in range(50): - # #print("clustering iteration ", iter, Te_b) - # for i in range(self.param.n_grids): - # dist_mat[:,i] = xp.abs(Te-Te_b[i]) - - # membership = xp.argmin(dist_mat, axis=1) - # Te_b1 = np.array([np.mean(Te[xp.argwhere(membership==i)[:,0]]) for i in range(self.param.n_grids)]) - # rel_error = np.max(np.abs(1 - Te_b1/Te_b)) - # Te_b = Te_b1 - - # if rel_error < 1e-4: - # break - # Te_b = np.sort(Te_b) - # print("K-means Te clusters ", Te_b) - # for i in range(self.param.n_grids): - # dist_mat[:,i] = xp.abs(Te-Te_b[i]) + xp = self.xp_module + n_grids = self.param.n_grids + Te = xp.array(interface.HostRead(libtps.t2bIndex.ElectronTemperature), copy=False) / self.param.ev_to_K # [eV] Tew = scipy.cluster.vq.whiten(Te) - Tecw = scipy.cluster.vq.kmeans(Tew, np.linspace(np.min(Tew), np.max(Tew), n_grids), iter=1000, thresh=1e-8)[0] + Tecw = scipy.cluster.vq.kmeans(Tew, np.linspace(np.min(Tew), np.max(Tew), n_grids), iter=1000, thresh=1e-8, check_finite=False)[0] Te_b = Tecw * np.std(Te, axis=0) dist_mat = xp.zeros((len(Te),n_grids)) @@ -285,8 +273,17 @@ def grid_setup(self, interface): print("setting initial Maxwellian at %.4E eV" %(self.bte_solver._par_ap_Te[grid_idx]), flush=True) self.bte_solver.set_boltzmann_parameter(grid_idx, "f_mw", self.bte_solver.initialize(grid_idx, self.grid_idx_to_npts[grid_idx], "maxwellian")) - + + active_grid_idx=list() + for grid_idx in range(n_grids): + spec_sp = self.bte_solver._op_spec_sp[grid_idx] + ev_max_ext = (spec_sp._basis_p._t[-1] * vth[grid_idx] / self.param.c_gamma)**2 + if ev_max_ext > 15.76: + active_grid_idx.append(grid_idx) + + self.active_grid_idx = active_grid_idx #[i for i in range(self.param.n_grids)] + self.sub_clusters_run = False self.profile_tt[pp.SETUP].stop() return @@ -411,14 +408,15 @@ def asnumpy(a): return - async def fetch(self, interface): - gidx_to_pidx_map = self.grid_idx_to_spatial_idx_map + async def fetch(self, interface, use_interp:bool): + gidx_to_pidx = self.grid_idx_to_spatial_idx_map heavy_temp = np.array(interface.HostRead(libtps.t2bIndex.HeavyTemperature), copy=False) tps_npts = len(heavy_temp) self.tps_npts = tps_npts + nspecies = interface.Nspecies() electron_temp = np.array(interface.HostRead(libtps.t2bIndex.ElectronTemperature), copy=False) efield = np.array(interface.HostRead(libtps.t2bIndex.ElectricField), copy=False).reshape((2, tps_npts)) - species_densities = np.array(interface.HostRead(libtps.t2bIndex.SpeciesDensities), copy=False).reshape(3, tps_npts) + species_densities = np.array(interface.HostRead(libtps.t2bIndex.SpeciesDensities), copy=False).reshape(nspecies, tps_npts) # np.save("n0.npy", species_densities[TPSINDEX.NEU_IDX]) # np.save("ne.npy", species_densities[TPSINDEX.ELE_IDX]) @@ -429,66 +427,175 @@ async def fetch(self, interface): # np.save("E.npy" , np.sqrt(efield[0]**2 + efield[1]**2)) # sys.exit(-1) - n_grids = self.param.n_grids - use_gpu = self.param.use_gpu - n_grids = self.param.n_grids + n_grids = self.param.n_grids + use_gpu = self.param.use_gpu - gidx_to_device_map = self.gidx_to_device_map + Tg = heavy_temp + n0 = species_densities[TPSINDEX.NEU_IDX] + ne = species_densities[TPSINDEX.ELE_IDX] + ni = species_densities[TPSINDEX.ION_IDX] + + Ex = efield[0] + Ey = efield[1] + + ExbyN = Ex/n0/self.param.Td_fac + EybyN = Ey/n0/self.param.Td_fac - for grid_idx in range(n_grids): - bte_idx = gidx_to_pidx_map[grid_idx] - dev_id = self.gidx_to_device_map(grid_idx, n_grids) - - ni = species_densities[TPSINDEX.ION_IDX][bte_idx] - ne = species_densities[TPSINDEX.ELE_IDX][bte_idx] - n0 = species_densities[TPSINDEX.NEU_IDX][bte_idx] - Tg = heavy_temp[bte_idx] - Te = electron_temp[bte_idx] - eRe = efield[TPSINDEX.EF_RE_IDX][bte_idx] - eIm = efield[TPSINDEX.EF_IM_IDX][bte_idx] - - eMag = np.sqrt(eRe**2 + eIm **2) - eByn0 = eMag/n0/self.param.Td_fac + Ex = ExbyN * self.param.n0 * self.param.Td_fac + Ey = EybyN * self.param.n0 * self.param.Td_fac + + ion_deg = np.zeros_like(ne) #ne/n0 + + m_bte = np.concatenate((ExbyN.reshape((-1, 1)), EybyN.reshape((-1, 1)), Tg.reshape((-1, 1)), ion_deg.reshape((-1,1)) ), axis=1) + + self.sub_cluster_idx_to_pidx = None + self.sub_cluster_c = None + gidx_to_device_map = self.gidx_to_device_map + + if (use_interp == True): + n_sub_clusters = self.param.n_sub_clusters + self.sub_cluster_idx_to_pidx = [[None for i in range(n_sub_clusters)] for i in range(self.param.n_grids)] + self.sub_cluster_c = [None for i in range(self.param.n_grids)] - if self.param.verbose == 1 : - print("Boltzmann solver inputs for v-space grid id %d"%(grid_idx)) - print("Efreq = %.4E [1/s]" %(self.param.Efreq)) - print("n_pts = %d" % self.grid_idx_to_npts[grid_idx]) - - print("E/n0 (min) = %.12E [Td] \t E/n0 (max) = %.12E [Td] "%(np.min(eByn0), np.max(eByn0))) - # print("Tg (min) = %.12E [K] \t Tg (max) = %.12E [K] "%(np.min(Tg) , np.max(Tg))) - # print("Te (min) = %.12E [K] \t Te (max) = %.12E [K] "%(np.min(Te) , np.max(Te))) + def normalize(obs): + std_obs = np.std(obs, axis=0) + std_obs[std_obs == 0.0] = 1.0 + return obs/std_obs, std_obs + + ts = TaskSpace("T") + for grid_idx in self.active_grid_idx: + @spawn(ts[grid_idx], placement=[cpu], vcus=0.0) + def t1(): + dev_id = self.gidx_to_device_map(grid_idx, n_grids) + m = m_bte[gidx_to_pidx[grid_idx]] + mw , mw_std = normalize(m) + mcw0 = mw[np.random.choice(mw.shape[0], self.param.n_sub_clusters, replace=False)] + mcw = scipy.cluster.vq.kmeans(mw, mcw0, iter=1000, thresh=1e-8, check_finite=False)[0] + mcw0[0:mcw.shape[0], :] = mcw[:,:] + mcw = mcw0 + + mc = mcw * mw_std + dist_mat = np.array([np.linalg.norm(mw - mcw[i], axis=1) for i in range(n_sub_clusters)]).T + membership_m = np.argmin(dist_mat, axis=1) + self.sub_cluster_c[grid_idx] = mc + + for c_idx in range(n_sub_clusters): + self.sub_cluster_idx_to_pidx[grid_idx][c_idx] = np.argwhere(membership_m==c_idx)[:,0] + + # idx = self.sub_cluster_idx_to_pidx[grid_idx][c_idx] + # abs_err = np.linalg.norm(dist_mat[idx, c_idx] - np.linalg.norm(mw[idx] - mcw[c_idx], axis=1)) + # print(grid_idx, c_idx, abs_err) + + # dw_mat = np.zeros(self.param.n_sub_clusters) + # print(grid_idx,"\n" , mc) + # for c_idx in range(n_sub_clusters): + # idx = self.sub_cluster_idx_to_pidx[grid_idx][c_idx] + # if len(idx>0): + # dw_mat[c_idx] = np.max(np.linalg.norm(1 - m[idx] / mc[c_idx], axis = 1)) + + # plt.figure(figsize=(8, 8), dpi=300) + # plt.semilogy(np.array(range(self.param.n_sub_clusters)), dw_mat) + # plt.xlabel(r"cluster id") + # plt.ylabel(r"relative error") + # plt.grid(visible=True) + # plt.savefig("%s_grid_idx_%04d.png"%(self.param.out_fname, grid_idx)) + # plt.close() + + + n0 = np.ones(mc.shape[0]) * self.param.n0 + Ex = mc[: , 0] * self.param.n0 * self.param.Td_fac + Ey = mc[: , 1] * self.param.n0 * self.param.Td_fac + + Tg = mc[: , 2] + ne = mc[: , 3] * self.param.n0 + ni = mc[: , 3] * self.param.n0 + EMag = np.sqrt(Ex**2 + Ey**2) + + if self.param.verbose == 1 : + print("Boltzmann solver inputs for v-space grid id %d"%(grid_idx)) + print("Efreq = %.4E [1/s]" %(self.param.Efreq)) + print("n_pts = %d" % self.grid_idx_to_npts[grid_idx]) + + print("Ex/n0 (min) = %.12E [Td] \t Ex/n0(max) = %.12E [Td] "%(np.min(ExbyN), np.max(ExbyN))) + print("Ey/n0 (min) = %.12E [Td] \t Ey/n0(max) = %.12E [Td] "%(np.min(EybyN), np.max(EybyN))) + print("Tg (min) = %.12E [K] \t Tg (max) = %.12E [K] "%(np.min(Tg) , np.max(Tg))) + print("ne (min) = %.12E [1/m^3] \t ne (max) = %.12E [1/m^3] "%(np.min(ne) , np.max(ne))) + + if (use_gpu==1): + with cp.cuda.Device(dev_id): + n0 = cp.array(n0) + Ex = cp.array(Ex) + Ey = cp.array(Ey) + Tg = cp.array(Tg) + ne = cp.array(ne) + ni = cp.array(ni) + EMag = cp.sqrt(Ex**2 + Ey**2) + + + self.bte_solver.set_boltzmann_parameter(grid_idx, "n0" , n0) + self.bte_solver.set_boltzmann_parameter(grid_idx, "ne" , ne) + self.bte_solver.set_boltzmann_parameter(grid_idx, "ni" , ni) + self.bte_solver.set_boltzmann_parameter(grid_idx, "Tg" , Tg) + self.bte_solver.set_boltzmann_parameter(grid_idx, "eRe", Ex) + self.bte_solver.set_boltzmann_parameter(grid_idx, "eIm", Ey) + self.bte_solver.set_boltzmann_parameter(grid_idx, "E" , EMag) + + return - # print("ne (min) = %.12E [1/m^3] \t ne (max) = %.12E [1/m^3] "%(np.min(ne) , np.max(ne))) - print("ni (min) = %.12E [1/m^3] \t ni (max) = %.12E [1/m^3] "%(np.min(ni) , np.max(ni))) - print("n0 (min) = %.12E [1/m^3] \t n0 (max) = %.12E [1/m^3] "%(np.min(n0) , np.max(n0))) - + await ts - if (use_gpu == 1): - with cp.cuda.Device(dev_id): - ne = cp.array(ne) - ni = cp.array(ni) - n0 = cp.array(n0) - Tg = cp.array(Tg) - Te = cp.array(Te) - eRe = cp.array(eRe) - eIm = cp.array(eIm) + else: + ts = TaskSpace("T") + for grid_idx in self.active_grid_idx: + @spawn(ts[grid_idx], placement=[cpu], vcus=0.0) + def t1(): + bte_idx = gidx_to_pidx[grid_idx] + dev_id = self.gidx_to_device_map(grid_idx, n_grids) + + mc = m_bte[bte_idx] - eMag = cp.sqrt(eRe**2 + eIm **2) - eByn0 = eMag/n0/self.param.Td_fac + n0 = np.ones(mc.shape[0]) * self.param.n0 + Ex = mc[: , 0] * self.param.n0 * self.param.Td_fac + Ey = mc[: , 1] * self.param.n0 * self.param.Td_fac + + Tg = mc[: , 2] + ne = mc[: , 3] * self.param.n0 + ni = mc[: , 3] * self.param.n0 + EMag = np.sqrt(Ex**2 + Ey**2) + + if self.param.verbose == 1 : + print("Boltzmann solver inputs for v-space grid id %d"%(grid_idx)) + print("Efreq = %.4E [1/s]" %(self.param.Efreq)) + print("n_pts = %d" % self.grid_idx_to_npts[grid_idx]) + + print("Ex/n0 (min) = %.12E [Td] \t Ex/n0(max) = %.12E [Td] "%(np.min(ExbyN), np.max(ExbyN))) + print("Ey/n0 (min) = %.12E [Td] \t Ey/n0(max) = %.12E [Td] "%(np.min(EybyN), np.max(EybyN))) + print("Tg (min) = %.12E [K] \t Tg (max) = %.12E [K] "%(np.min(Tg) , np.max(Tg))) + print("ne (min) = %.12E [1/m^3] \t ne (max) = %.12E [1/m^3] "%(np.min(ne) , np.max(ne))) - self.bte_solver.set_boltzmann_parameter(grid_idx, "n0" , n0) - self.bte_solver.set_boltzmann_parameter(grid_idx, "ne" , ne) - self.bte_solver.set_boltzmann_parameter(grid_idx, "ni" , ni) - self.bte_solver.set_boltzmann_parameter(grid_idx, "Tg" , Tg) - self.bte_solver.set_boltzmann_parameter(grid_idx, "eRe", eRe) - self.bte_solver.set_boltzmann_parameter(grid_idx, "eIm", eIm) - self.bte_solver.set_boltzmann_parameter(grid_idx, "E" , eMag) - + if (use_gpu == 1): + with cp.cuda.Device(dev_id): + n0 = cp.array(n0) + ne = cp.array(ne) + ni = cp.array(ni) + Ex = cp.array(Ex) + Ey = cp.array(Ey) + Tg = cp.array(Tg) + EMag = cp.sqrt(Ex**2 + Ey**2) + + self.bte_solver.set_boltzmann_parameter(grid_idx, "n0" , n0) + self.bte_solver.set_boltzmann_parameter(grid_idx, "ne" , ne) + self.bte_solver.set_boltzmann_parameter(grid_idx, "ni" , ni) + self.bte_solver.set_boltzmann_parameter(grid_idx, "Tg" , Tg) + self.bte_solver.set_boltzmann_parameter(grid_idx, "eRe", Ex) + self.bte_solver.set_boltzmann_parameter(grid_idx, "eIm", Ey) + self.bte_solver.set_boltzmann_parameter(grid_idx, "E" , EMag) + return + await ts return - async def solve_init(self): + async def solve_init(self, use_interp:bool): rank = self.comm.Get_rank() npes = self.comm.Get_size() n_grids = self.param.n_grids @@ -507,7 +614,12 @@ def t1(): def ts_op_setup(grid_idx): xp = self.xp_module f_mw = self.bte_solver.get_boltzmann_parameter(grid_idx, "f_mw") - n_pts = f_mw.shape[1] + + if (use_interp==True): + n_pts = self.param.n_sub_clusters + else: + n_pts = f_mw.shape[1] + Qmat = self.bte_solver._op_qmat[grid_idx] INr = xp.eye(Qmat.shape[1]) self.bte_solver._op_imat_vx[grid_idx] = xp.einsum("i,jk->ijk",xp.ones(n_pts), INr) @@ -516,25 +628,33 @@ def ts_op_setup(grid_idx): self.xp_module = cp ts = TaskSpace("T") - for grid_idx in range(self.param.n_grids): + for grid_idx in self.active_grid_idx: dev_id = gidx_to_device_map(grid_idx, n_grids) @spawn(ts[grid_idx], placement=[gpu(dev_id)], vcus=0.0) def t1(): ts_op_setup(grid_idx) f_mw = self.bte_solver.get_boltzmann_parameter(grid_idx, "f_mw") - self.bte_solver.set_boltzmann_parameter(grid_idx, "u0", f_mw) + + if (use_interp==True): + self.bte_solver.set_boltzmann_parameter(grid_idx, "u0", f_mw[: , 0:self.param.n_sub_clusters]) + else: + self.bte_solver.set_boltzmann_parameter(grid_idx, "u0", f_mw) await ts else: self.xp_module = np + ts = TaskSpace("T") - for grid_idx in range(self.param.n_grids): + for grid_idx in self.active_grid_idx: @spawn(ts[grid_idx], placement=[cpu], vcus=0.0) def t1(): ts_op_setup(grid_idx) f_mw = self.bte_solver.get_boltzmann_parameter(grid_idx, "f_mw") - self.bte_solver.set_boltzmann_parameter(grid_idx, "u0", f_mw) + if (use_interp==True): + self.bte_solver.set_boltzmann_parameter(grid_idx, "u0", f_mw[: , 0:self.param.n_sub_clusters]) + else: + self.bte_solver.set_boltzmann_parameter(grid_idx, "u0", f_mw) await ts @@ -546,18 +666,11 @@ async def solve_step(self, time, delta_t): """ rank = self.comm.Get_rank() npes = self.comm.Get_size() - # xp = self.xp_module - # csv_write = self.param.export_csv - # plot_data = self.param.plot_data - # gidx_to_pidx_map = self.grid_idx_to_spatial_idx_map - # use_gpu = self.param.use_gpu - # dev_id = self.param.dev_id - # verbose = self.param.verbose n_grids = self.param.n_grids gidx_to_device_map = self.gidx_to_device_map ts = TaskSpace("T") - for grid_idx in range(n_grids): + for grid_idx in self.active_grid_idx: @spawn(ts[grid_idx], placement=[gpu(gidx_to_device_map(grid_idx,n_grids))], vcus=0.0) def t1(): u0 = self.bte_solver.get_boltzmann_parameter(grid_idx, "u0") @@ -697,7 +810,7 @@ def asnumpy(a): return - async def push(self, interface): + async def push(self, interface, use_interp:bool): xp = self.xp_module n_grids = self.param.n_grids gidx_to_device_map = self.gidx_to_device_map @@ -709,24 +822,49 @@ async def push(self, interface): n_reactions = interface.nComponents(libtps.t2bIndex.ReactionRates) rates = np.array(interface.HostWrite(libtps.t2bIndex.ReactionRates), copy=False).reshape((n_reactions, tps_npts)) - if(n_reactions>0): - ts = TaskSpace("T") - for grid_idx in range(n_grids): - @spawn(ts[grid_idx], placement=[gpu(gidx_to_device_map(grid_idx,n_grids))], vcus=0.0) - def t1(): - qA = boltzmann.bte_solver._op_diag_dg[grid_idx] - u0 = boltzmann.bte_solver.get_boltzmann_parameter(grid_idx, "u_avg") - - h_curr = xp.dot(qA, u0) - h_curr = boltzmann.bte_solver.normalized_distribution(grid_idx, h_curr) - qoi = boltzmann.bte_solver.compute_QoIs(grid_idx, h_curr, effective_mobility=False) - - rates[0][gidx_to_pidx_map[grid_idx]] = xp.asnumpy(qoi["rates"][1]) - - - await ts - rates = rates.reshape((-1)) + if (use_interp==True): + if(n_reactions>0): + rates[:,:] = 0.0 + ts = TaskSpace("T") + for grid_idx in self.active_grid_idx: + @spawn(ts[grid_idx], placement=[gpu(gidx_to_device_map(grid_idx,n_grids))], vcus=0.0) + def t1(): + qA = boltzmann.bte_solver._op_diag_dg[grid_idx] + u0 = boltzmann.bte_solver.get_boltzmann_parameter(grid_idx, "u_avg") + + h_curr = xp.dot(qA, u0) + h_curr = boltzmann.bte_solver.normalized_distribution(grid_idx, h_curr) + qoi = boltzmann.bte_solver.compute_QoIs(grid_idx, h_curr, effective_mobility=False) + + rr_interp = np.zeros(len(gidx_to_pidx_map[grid_idx])) + rr_cpu = xp.asnumpy(qoi["rates"][TPSINDEX.RR_IDX[0]]) + + for c_idx in range(self.param.n_sub_clusters): + rr_interp[self.sub_cluster_idx_to_pidx[grid_idx][c_idx]] = rr_cpu[c_idx] * self.param.N_Avo + + rates[0][gidx_to_pidx_map[grid_idx]] = rr_interp + + await ts + rates = rates.reshape((-1)) + else: + if(n_reactions>0): + rates[:,:] = 0.0 + ts = TaskSpace("T") + for grid_idx in self.active_grid_idx: + @spawn(ts[grid_idx], placement=[gpu(gidx_to_device_map(grid_idx,n_grids))], vcus=0.0) + def t1(): + qA = boltzmann.bte_solver._op_diag_dg[grid_idx] + u0 = boltzmann.bte_solver.get_boltzmann_parameter(grid_idx, "u_avg") + + h_curr = xp.dot(qA, u0) + h_curr = boltzmann.bte_solver.normalized_distribution(grid_idx, h_curr) + qoi = boltzmann.bte_solver.compute_QoIs(grid_idx, h_curr, effective_mobility=False) + rates[0][gidx_to_pidx_map[grid_idx]] = xp.asnumpy(qoi["rates"][TPSINDEX.RR_IDX[0]]) * self.param.N_Avo + + await ts + rates = rates.reshape((-1)) return + if __name__=="__main__": comm = MPI.COMM_WORLD @@ -751,7 +889,9 @@ def t1(): @spawn(placement=cpu, vcus=0) async def __main__(): - await boltzmann.solve_init() + + bte_use_interp = True + await boltzmann.solve_init(bte_use_interp) xp = boltzmann.bte_solver.xp_module max_iters = tps.getRequiredInput("cycle-avg-joule-coupled/max-iters") @@ -759,31 +899,39 @@ async def __main__(): tt = 0#interface.currentTime() tau = (1/boltzmann.param.Efreq) dt_tps = interface.timeStep() - dt_bte = boltzmann.param.dt * (dt_tps) + dt_bte = 1e-2 * tau #boltzmann.param.dt * (dt_tps) bte_steps = int(dt_tps/dt_bte) n_grids = boltzmann.param.n_grids - cycle_freq = int(xp.ceil(tau/dt_tps)) + cycle_freq = 10 #int(xp.ceil(tau/dt_tps)) gidx_to_device_map = boltzmann.gidx_to_device_map + + tps_sper_cycle = int(xp.ceil(tau/dt_tps)) + bte_sper_cycle = int(xp.ceil(tau/dt_bte)) + + print("tps steps per cycle : ", tps_sper_cycle, "bte_steps per cycle", bte_sper_cycle) + while (iter Date: Thu, 15 Feb 2024 18:33:33 -0600 Subject: [PATCH 43/75] now we evolve the tps code until steady state once the BTE rate coefficients computed. --- src/tps-bte_0d3v.py | 232 ++++++++++++++++++++++++++++++++++++++------ 1 file changed, 205 insertions(+), 27 deletions(-) diff --git a/src/tps-bte_0d3v.py b/src/tps-bte_0d3v.py index 1d8e29095..2ad2ead8d 100755 --- a/src/tps-bte_0d3v.py +++ b/src/tps-bte_0d3v.py @@ -283,6 +283,7 @@ def grid_setup(self, interface): active_grid_idx.append(grid_idx) self.active_grid_idx = active_grid_idx #[i for i in range(self.param.n_grids)] + #self.active_grid_idx = [i for i in range(self.param.n_grids)] self.sub_clusters_run = False self.profile_tt[pp.SETUP].stop() return @@ -626,8 +627,8 @@ def ts_op_setup(grid_idx): if(self.param.use_gpu==1): self.xp_module = cp - ts = TaskSpace("T") + ts = TaskSpace("T") for grid_idx in self.active_grid_idx: dev_id = gidx_to_device_map(grid_idx, n_grids) @spawn(ts[grid_idx], placement=[gpu(dev_id)], vcus=0.0) @@ -636,15 +637,14 @@ def t1(): f_mw = self.bte_solver.get_boltzmann_parameter(grid_idx, "f_mw") if (use_interp==True): - self.bte_solver.set_boltzmann_parameter(grid_idx, "u0", f_mw[: , 0:self.param.n_sub_clusters]) + self.bte_solver.set_boltzmann_parameter(grid_idx, "u0", cp.copy(f_mw[: , 0:self.param.n_sub_clusters])) else: - self.bte_solver.set_boltzmann_parameter(grid_idx, "u0", f_mw) + self.bte_solver.set_boltzmann_parameter(grid_idx, "u0", cp.copy(f_mw)) await ts else: self.xp_module = np - ts = TaskSpace("T") for grid_idx in self.active_grid_idx: @spawn(ts[grid_idx], placement=[cpu], vcus=0.0) @@ -652,9 +652,9 @@ def t1(): ts_op_setup(grid_idx) f_mw = self.bte_solver.get_boltzmann_parameter(grid_idx, "f_mw") if (use_interp==True): - self.bte_solver.set_boltzmann_parameter(grid_idx, "u0", f_mw[: , 0:self.param.n_sub_clusters]) + self.bte_solver.set_boltzmann_parameter(grid_idx, "u0", np.copy(f_mw[: , 0:self.param.n_sub_clusters])) else: - self.bte_solver.set_boltzmann_parameter(grid_idx, "u0", f_mw) + self.bte_solver.set_boltzmann_parameter(grid_idx, "u0", np.copy(f_mw)) await ts @@ -673,9 +673,18 @@ async def solve_step(self, time, delta_t): for grid_idx in self.active_grid_idx: @spawn(ts[grid_idx], placement=[gpu(gidx_to_device_map(grid_idx,n_grids))], vcus=0.0) def t1(): + + # seting the E field for time t + dt (implicit step) + xp = self.bte_solver.xp_module + eRe = self.bte_solver.get_boltzmann_parameter(grid_idx, "eRe") + eIm = self.bte_solver.get_boltzmann_parameter(grid_idx, "eIm") + Et = eRe * xp.cos(2 * xp.pi * self.param.Efreq * (time + delta_t)) + eIm * xp.sin(2 * xp.pi * self.param.Efreq * (time + delta_t)) + self.bte_solver.set_boltzmann_parameter(grid_idx, "E", Et) + u0 = self.bte_solver.get_boltzmann_parameter(grid_idx, "u0") v = self.bte_solver.step(grid_idx, u0, self.param.atol, self.param.rtol, self.param.max_iter, time, delta_t) self.bte_solver.set_boltzmann_parameter(grid_idx, "u1", v) + await ts return @@ -846,6 +855,7 @@ def t1(): await ts rates = rates.reshape((-1)) + rates[rates<0] = 0.0 else: if(n_reactions>0): rates[:,:] = 0.0 @@ -863,9 +873,96 @@ def t1(): await ts rates = rates.reshape((-1)) + rates[rates<0] = 0.0 return + def io_output_data(self, grid_idx, u0, plot_data:bool, export_csv:bool, fname:str): + xp = self.xp_module + gidx_to_device_map = self.gidx_to_device_map + n_grids = self.param.n_grids + dev_id = gidx_to_device_map(grid_idx, n_grids) + qA = self.bte_solver._op_diag_dg[grid_idx] + h_curr = xp.dot(qA, u0) + h_curr = boltzmann.bte_solver.normalized_distribution(grid_idx, h_curr) + ff = h_curr + qoi = boltzmann.bte_solver.compute_QoIs(grid_idx, h_curr, effective_mobility=False) + + + def asnumpy(a): + if cp.get_array_module(a)==cp: + with cp.cuda.Device(dev_id): + return cp.asnumpy(a) + else: + return a + + ff_cpu = asnumpy(ff) + ev = np.linspace(1e-3, self.bte_solver._par_ev_range[grid_idx][1], 500) + ff_r = self.bte_solver.compute_radial_components(grid_idx, ev, ff_cpu) + + n0 = asnumpy(self.bte_solver.get_boltzmann_parameter(grid_idx, "n0")) + ne = asnumpy(self.bte_solver.get_boltzmann_parameter(grid_idx, "ne")) + ni = asnumpy(self.bte_solver.get_boltzmann_parameter(grid_idx, "ni")) + Tg = asnumpy(self.bte_solver.get_boltzmann_parameter(grid_idx, "Tg")) + eRe = asnumpy(self.bte_solver.get_boltzmann_parameter(grid_idx, "eRe")) + eIm = asnumpy(self.bte_solver.get_boltzmann_parameter(grid_idx, "eIm")) + eMag = np.sqrt(eRe**2 + eIm**2) + data_csv = np.zeros((ne.shape[0], 8 + len((self.param.collisions)))) + + if export_csv: + data_csv[: , 0] = n0 + data_csv[: , 1] = ne + data_csv[: , 2] = ni + data_csv[: , 3] = Tg + data_csv[: , 4] = eMag + data_csv[: , 5] = asnumpy(qoi["energy"]) + data_csv[: , 6] = asnumpy(qoi["mobility"]) + data_csv[: , 7] = asnumpy(qoi["diffusion"]) + + for col_idx, g in enumerate(self.param.collisions): + data_csv[: , 8 + col_idx] = asnumpy(qoi["rates"][col_idx]) + + + with open("%s_qoi.csv"%(fname), 'w', encoding='UTF8') as f: + writer = csv.writer(f,delimiter=',') + # write the header + header = ["n0", "ne", "ni", "Tg", "E", "energy", "mobility", "diffusion"] + for col_idx, g in enumerate(self.param.collisions): + header.append(str(g)) + + writer.writerow(header) + writer.writerows(data_csv) + + if plot_data: + n_pts = ff_cpu.shape[1] + num_sh = len(self.bte_solver._par_lm[grid_idx]) + num_subplots = num_sh + num_plt_cols = min(num_sh, 4) + num_plt_rows = np.int64(np.ceil(num_subplots/num_plt_cols)) + fig = plt.figure(figsize=(num_plt_cols * 8 + 0.5*(num_plt_cols-1), num_plt_rows * 8 + 0.5*(num_plt_rows-1)), dpi=200, constrained_layout=True) + plt_idx = 1 + n_pts_step = n_pts // 20 + + for lm_idx, lm in enumerate(self.bte_solver._par_lm[grid_idx]): + plt.subplot(num_plt_rows, num_plt_cols, plt_idx) + for ii in range(0, n_pts, n_pts_step): + fr = np.abs(ff_r[ii, lm_idx, :]) + #plt.semilogy(ev, fr, label=r"$T_g$=%.2E [K], $E/n_0$=%.2E [Td], $n_e/n_0$ = %.2E "%(Tg[ii], eMag[ii]/n0[ii]/1e-21, ne[ii]/n0[ii])) + + #plt.xlabel(r"energy (eV)") + #plt.ylabel(r"$f_%d$"%(lm[0])) + plt.grid(visible=True) + if lm_idx==0: + plt.legend(prop={'size': 6}) + + plt_idx +=1 + + plt.savefig("%s_plot.png"%(fname)) + plt.close() + + return + + if __name__=="__main__": comm = MPI.COMM_WORLD @@ -903,33 +1000,73 @@ async def __main__(): bte_steps = int(dt_tps/dt_bte) n_grids = boltzmann.param.n_grids - cycle_freq = 10 #int(xp.ceil(tau/dt_tps)) + cycle_freq = 1 #int(xp.ceil(tau/dt_tps)) + terminal_output_freq = -1 gidx_to_device_map = boltzmann.gidx_to_device_map tps_sper_cycle = int(xp.ceil(tau/dt_tps)) bte_sper_cycle = int(xp.ceil(tau/dt_bte)) + bte_max_cycles = 10 + tps_max_cycles = 1000 print("tps steps per cycle : ", tps_sper_cycle, "bte_steps per cycle", bte_sper_cycle) - + tps.solveStep() + tps.push(interface) + p_t1 = 0 + p_t2 = 0 while (iter0): + # print(grid_idx, " u_ptr ", u_avg[grid_idx].data, " v_ptr " , v_avg[grid_idx].data) + + bte_v[grid_idx] = xp.copy(u0) + + await ts + p_t3 = min_mean_max(p_t2-p_t1, comm) + print("[BTE] step = %04d time = %.4E ||u1 - u0|| = %.4E ||u0 - u1|| / ||u0|| = %.4E --- runtime = %.4E (s) "%(bte_idx, tt_bte, max(abs_error), max(rel_error), p_t3[2])) + + if max(abs_error) < boltzmann.param.atol or max(rel_error)< boltzmann.param.rtol: + break + + if bte_idx < bte_sper_cycle * bte_max_cycles: + u_avg = [0 for i in range(n_grids)] + + + if bte_idx == bte_sper_cycle * bte_max_cycles : + break + ts = TaskSpace("T") for grid_idx in boltzmann.active_grid_idx: @spawn(ts[grid_idx], placement=[gpu(gidx_to_device_map(grid_idx,n_grids))], vcus=0.0) @@ -937,11 +1074,13 @@ def t1(): u_avg[grid_idx] += cycle_f1 * boltzmann.bte_solver.get_boltzmann_parameter(grid_idx, "u0") await ts - t1 = time() + p_t1 = time() await boltzmann.solve_step(tt_bte, dt_bte) - t2 = time() - t1 = min_mean_max(t2-t1, comm) - print("[BTE] %04d simulation time = %.4E cycle step (min) = %.4E (s) step (mean) = %.4E (s) step (max) = %.4E (s)" % (bte_idx, tt_bte/tau, t1[0],t1[1],t1[2])) + p_t2 = time() + + if(terminal_output_freq > 0 and bte_idx % terminal_output_freq ==0): + p_t3 = min_mean_max(p_t2-p_t1, comm) + print("[BTE] %04d simulation time = %.4E cycle step (min) = %.4E (s) step (mean) = %.4E (s) step (max) = %.4E (s)" % (bte_idx, tt_bte, p_t3[0], p_t3[1], p_t3[2])) ts = TaskSpace("T") for grid_idx in boltzmann.active_grid_idx: @@ -961,10 +1100,49 @@ def t1(): await ts await boltzmann.push(interface, use_interp=bte_use_interp) - if (iter%cycle_freq==0): - interface.saveDataCollection(cycle=(iter//cycle_freq), time=tt/tau) + + ################### tps solve ###################################### tps.fetch(interface) - tt += dt_tps * tps_sper_cycle + tps_u = 0 + tps_v = 0 + tt_tps = 0 + + p_t1 = 0 + p_t2 = 0 + for tps_idx in range(tps_sper_cycle * tps_max_cycles + 1): + if (tps_idx % tps_sper_cycle == 0): + + tps.push(interface) + nspecies = interface.Nspecies() + heavy_temp = np.array(interface.HostRead(libtps.t2bIndex.HeavyTemperature), copy=False) + tps_npts = len(heavy_temp) + tps_u = np.array(interface.HostRead(libtps.t2bIndex.SpeciesDensities), copy=False).reshape(nspecies, tps_npts) + # rates = np.array(interface.HostRead(libtps.t2bIndex.ReactionRates), copy=False).reshape((1, tps_npts)) + # print("rates", np.min(rates[0]), np.max(rates[0])) + + abs_error = np.linalg.norm(tps_u - tps_v, axis=1) + rel_error = abs_error / np.linalg.norm(tps_u, axis=1) + tps_v = np.copy(tps_u) + + p_t3 = min_mean_max(p_t2-p_t1, comm) + print("[TPS] step = %04d time = %.4E ||u1 - u0|| = %.4E ||u0 - u1|| / ||u0|| = %.4E -- runtime = %.4E (s)"%(tps_idx, tt_tps, np.max(abs_error), np.max(rel_error), p_t3[2])) + if (np.max(abs_error) < boltzmann.param.atol or np.max(rel_error) < max(1e-6,boltzmann.param.rtol)): + break + + if (tps_idx == tps_sper_cycle * tps_max_cycles): + break + + p_t1 = time() + tps.solveStep() + p_t2 = time() + if(terminal_output_freq > 0 and tps_idx % terminal_output_freq ==0): + p_t3 = min_mean_max(p_t2-p_t1, comm) + print("[TPS] %04d simulation time = %.4E cycle step (min) = %.4E (s) step (mean) = %.4E (s) step (max) = %.4E (s)" % (tps_idx,tt_tps, p_t3[0],p_t3[1],p_t3[2])) + tt_tps +=dt_tps + + tps.push(interface) + + tt += dt_tps * tps_idx iter+=1 From d81feaafcefe6d372ad102e49119ee21356157c4 Mon Sep 17 00:00:00 2001 From: milindasf Date: Tue, 27 Feb 2024 14:12:41 -0600 Subject: [PATCH 44/75] 6-species collision model added for the tps batched sover, * For the data output now added additional collisions and mole fractions. * random seed parameter initialized for reproducability between runs --- src/tps-bte_0d3v.py | 377 ++++++++++++++++++++++++++------------------ 1 file changed, 225 insertions(+), 152 deletions(-) diff --git a/src/tps-bte_0d3v.py b/src/tps-bte_0d3v.py index 2ad2ead8d..3cfbfdc53 100755 --- a/src/tps-bte_0d3v.py +++ b/src/tps-bte_0d3v.py @@ -50,19 +50,18 @@ def reset(self): def min_mean_max(a, comm: MPI.Comm): return (comm.allreduce(a, MPI.MIN) , comm.allreduce(a, MPI.SUM)/comm.Get_size(), comm.allreduce(a, MPI.MAX)) - -try: - df = pd.read_csv("ionization_rates.csv") - Te = np.array(df["Te[K]"]) - r_arr = np.array(df["Arr[m3/s]"]) - r_csc = np.array(df["CSC_Maxwellian[m3/s]"]) - r_arr = scipy.interpolate.interp1d(Te, r_arr,bounds_error=False, fill_value=0.0) - r_csc = scipy.interpolate.interp1d(Te, r_csc,bounds_error=False, fill_value=0.0) - print("ionization coefficient read from file ") -except: - print("ionization rate coefficient file not found!!") - r_arr = lambda Te : 1.235e-13 * np.exp(-18.687 / np.abs(Te * scipy.constants.Boltzmann/scipy.constants.electron_volt)) - r_csc = lambda Te : 1.235e-13 * np.exp(-18.687 / np.abs(Te * scipy.constants.Boltzmann/scipy.constants.electron_volt)) +# try: +# df = pd.read_csv("ionization_rates.csv") +# Te = np.array(df["Te[K]"]) +# r_arr = np.array(df["Arr[m3/s]"]) +# r_csc = np.array(df["CSC_Maxwellian[m3/s]"]) +# r_arr = scipy.interpolate.interp1d(Te, r_arr,bounds_error=False, fill_value=0.0) +# r_csc = scipy.interpolate.interp1d(Te, r_csc,bounds_error=False, fill_value=0.0) +# print("ionization coefficient read from file ") +# except: +# print("ionization rate coefficient file not found!!") +# r_arr = lambda Te : 1.235e-13 * np.exp(-18.687 / np.abs(Te * scipy.constants.Boltzmann/scipy.constants.electron_volt)) +# r_csc = lambda Te : 1.235e-13 * np.exp(-18.687 / np.abs(Te * scipy.constants.Boltzmann/scipy.constants.electron_volt)) # set path to C++ TPS library path = os.path.abspath(os.path.dirname(sys.argv[0])) @@ -94,7 +93,7 @@ class BoltzmannSolverParams(): l_max = 1 # spherical modes uses, 0, to l_max ev_max = 16 # v-space grid truncation (eV) n_grids = 4 # number of v-space grids - n_sub_clusters= 300 # number of sub-clusters + n_sub_clusters= 200 # number of sub-clusters dt = 1e-3 # [] non-dimentionalized time w.r.t. oscilation period cycles = 10 # number of max cycles to evolve @@ -107,7 +106,7 @@ class BoltzmannSolverParams(): use_gpu = 1 # enable GPU use (1)-GPU solver, (0)-CPU solver dev_id = 0 # which GPU device to use only used when use_gpu=1 - collisions = ["g0","g2"] # collision string g0-elastic, g2-ionization + collisions = "" # collision string g0-elastic, g2-ionization export_csv = 1 # export the qois to csv file plot_data = 1 @@ -131,14 +130,16 @@ class BoltzmannSolverParams(): n0 = 3.22e22 #[m^{-3}] + rand_seed = 0 + class TPSINDEX(): """ simple index map to differnt fields, from the TPS arrays """ - ION_IDX = 0 # ion density index - ELE_IDX = 1 # electron density index - NEU_IDX = 2 # neutral density index + # ION_IDX = 0 # ion density index + # ELE_IDX = 1 # electron density index + # NEU_IDX = 2 # neutral density index EF_RE_IDX = 0 # Re(E) index EF_IM_IDX = 1 # Im(E) index @@ -146,7 +147,20 @@ class TPSINDEX(): # in future we need to setup this methodically # here key denotes the idx running from 0, nreactions-1 # value denotes the reaction index in the qoi array - RR_IDX = {0:1} + RR_IDX = {0 : 4 , 1 : 5 , 2 : 6, 3 : 7, 4 : 1 , 5 : 2, 6 : 3 } + + + ION_IDX = 3 + ELE_IDX = 4 + NEU_IDX = 5 + EX1_IDX = 0 + EX2_IDX = 1 + EX3_IDX = 2 + + MOLE_FRAC_IDX = {0: NEU_IDX, 1: EX1_IDX , 2: EX2_IDX , 3: EX3_IDX} + + + class Boltzmann0D2VBactchedSolver: @@ -195,6 +209,7 @@ def __parse_config_file__(self, fname): self.param.Nr = int(config.get("boltzmannSolver", "Nr").split("#")[0].strip()) self.param.l_max = int(config.get("boltzmannSolver", "l_max").split("#")[0].strip()) self.param.n_grids = int(config.get("boltzmannSolver", "n_grids").split("#")[0].strip()) + self.param.n_sub_clusters = int(config.get("boltzmannSolver", "n_sub_clusters").split("#")[0].strip()) self.param.dt = float(config.get("boltzmannSolver", "dt").split("#")[0].strip()) self.param.cycles = float(config.get("boltzmannSolver", "cycles").split("#")[0].strip()) self.param.solver_type = str(config.get("boltzmannSolver", "solver_type").split("#")[0].strip()) @@ -203,7 +218,7 @@ def __parse_config_file__(self, fname): self.param.max_iter = int(config.get("boltzmannSolver", "max_iter").split("#")[0].strip()) self.param.ee_collisions = int(config.get("boltzmannSolver", "ee_collisions").split("#")[0].strip()) self.param.use_gpu = int(config.get("boltzmannSolver", "use_gpu").split("#")[0].strip()) - #self.param.collisions = config.get("boltzmannSolver", "collisions").split("#")[0] + self.param.collisions = str(config.get("boltzmannSolver", "collisions").split("#")[0].strip()) self.param.export_csv = int(config.get("boltzmannSolver", "export_csv").split("#")[0].strip()) self.param.plot_data = int(config.get("boltzmannSolver", "plot_data").split("#")[0].strip()) @@ -275,15 +290,15 @@ def grid_setup(self, interface): self.bte_solver.set_boltzmann_parameter(grid_idx, "f_mw", self.bte_solver.initialize(grid_idx, self.grid_idx_to_npts[grid_idx], "maxwellian")) - active_grid_idx=list() - for grid_idx in range(n_grids): - spec_sp = self.bte_solver._op_spec_sp[grid_idx] - ev_max_ext = (spec_sp._basis_p._t[-1] * vth[grid_idx] / self.param.c_gamma)**2 - if ev_max_ext > 15.76: - active_grid_idx.append(grid_idx) + # active_grid_idx=list() + # for grid_idx in range(n_grids): + # spec_sp = self.bte_solver._op_spec_sp[grid_idx] + # ev_max_ext = (spec_sp._basis_p._t[-1] * vth[grid_idx] / self.param.c_gamma)**2 + # if ev_max_ext > 15.76: + # active_grid_idx.append(grid_idx) - self.active_grid_idx = active_grid_idx #[i for i in range(self.param.n_grids)] - #self.active_grid_idx = [i for i in range(self.param.n_grids)] + # self.active_grid_idx = active_grid_idx #[i for i in range(self.param.n_grids)] + self.active_grid_idx = [i for i in range(self.param.n_grids)] self.sub_clusters_run = False self.profile_tt[pp.SETUP].stop() return @@ -301,9 +316,11 @@ def solve_wo_parla(self): self.qoi = [None for grid_idx in range(self.param.n_grids)] self.ff = [None for grid_idx in range(self.param.n_grids)] + coll_list = self.bte_solver.get_collision_list() + coll_names = self.bte_solver.get_collision_names() if csv_write: - data_csv = np.empty((self.tps_npts, 8 + len(self.param.collisions))) + data_csv = np.empty((self.tps_npts, 8 + len(coll_list))) t1 = time() for grid_idx in range(n_grids): @@ -366,7 +383,7 @@ def asnumpy(a): data_csv[gidx_to_pidx_map[grid_idx], 6] = asnumpy(qoi["mobility"]) data_csv[gidx_to_pidx_map[grid_idx], 7] = asnumpy(qoi["diffusion"]) - for col_idx, g in enumerate(self.param.collisions): + for col_idx, g in enumerate(coll_list): data_csv[gidx_to_pidx_map[grid_idx], 8 + col_idx] = asnumpy(qoi["rates"][col_idx]) if plot_data: @@ -401,8 +418,8 @@ def asnumpy(a): writer = csv.writer(f,delimiter=',') # write the header header = ["n0", "ne", "ni", "Tg", "E", "energy", "mobility", "diffusion"] - for col_idx, g in enumerate(self.param.collisions): - header.append(str(g)) + for col_idx, g in enumerate(coll_list): + header.append(str(coll_names[col_idx])) writer.writerow(header) writer.writerows(data_csv) @@ -419,6 +436,11 @@ async def fetch(self, interface, use_interp:bool): efield = np.array(interface.HostRead(libtps.t2bIndex.ElectricField), copy=False).reshape((2, tps_npts)) species_densities = np.array(interface.HostRead(libtps.t2bIndex.SpeciesDensities), copy=False).reshape(nspecies, tps_npts) + cs_avail_species = self.bte_solver._avail_species + + n0 = np.sum(species_densities, axis=0) - species_densities[TPSINDEX.ELE_IDX] + ns_by_n0 = np.concatenate([species_densities[TPSINDEX.MOLE_FRAC_IDX[i]]/n0 for i in range(len(cs_avail_species))]).reshape((len(cs_avail_species), tps_npts)) + # np.save("n0.npy", species_densities[TPSINDEX.NEU_IDX]) # np.save("ne.npy", species_densities[TPSINDEX.ELE_IDX]) # np.save("ni.npy", species_densities[TPSINDEX.ION_IDX]) @@ -432,10 +454,7 @@ async def fetch(self, interface, use_interp:bool): use_gpu = self.param.use_gpu Tg = heavy_temp - n0 = species_densities[TPSINDEX.NEU_IDX] - ne = species_densities[TPSINDEX.ELE_IDX] - ni = species_densities[TPSINDEX.ION_IDX] - + Ex = efield[0] Ey = efield[1] @@ -445,9 +464,8 @@ async def fetch(self, interface, use_interp:bool): Ex = ExbyN * self.param.n0 * self.param.Td_fac Ey = EybyN * self.param.n0 * self.param.Td_fac - ion_deg = np.zeros_like(ne) #ne/n0 - - m_bte = np.concatenate((ExbyN.reshape((-1, 1)), EybyN.reshape((-1, 1)), Tg.reshape((-1, 1)), ion_deg.reshape((-1,1)) ), axis=1) + ion_deg = species_densities[TPSINDEX.ELE_IDX]/n0 + m_bte = np.concatenate([ExbyN.reshape((-1, 1)), EybyN.reshape((-1, 1)), Tg.reshape((-1, 1)), ion_deg.reshape((-1, 1))] + [ ns_by_n0[i].reshape((-1, 1)) for i in range(ns_by_n0.shape[0])], axis=1) self.sub_cluster_idx_to_pidx = None self.sub_cluster_c = None @@ -470,6 +488,10 @@ def t1(): dev_id = self.gidx_to_device_map(grid_idx, n_grids) m = m_bte[gidx_to_pidx[grid_idx]] mw , mw_std = normalize(m) + + # to repoduce clusters + np.random.seed(self.param.rand_seed) + mcw0 = mw[np.random.choice(mw.shape[0], self.param.n_sub_clusters, replace=False)] mcw = scipy.cluster.vq.kmeans(mw, mcw0, iter=1000, thresh=1e-8, check_finite=False)[0] mcw0[0:mcw.shape[0], :] = mcw[:,:] @@ -503,24 +525,28 @@ def t1(): # plt.close() - n0 = np.ones(mc.shape[0]) * self.param.n0 - Ex = mc[: , 0] * self.param.n0 * self.param.Td_fac - Ey = mc[: , 1] * self.param.n0 * self.param.Td_fac - - Tg = mc[: , 2] - ne = mc[: , 3] * self.param.n0 - ni = mc[: , 3] * self.param.n0 - EMag = np.sqrt(Ex**2 + Ey**2) + n0 = np.ones(mc.shape[0]) * self.param.n0 + Ex = mc[: , 0] * self.param.n0 * self.param.Td_fac + Ey = mc[: , 1] * self.param.n0 * self.param.Td_fac + Tg = mc[: , 2] + ne = mc[: , 3] * self.param.n0 + ni = mc[: , 3] * self.param.n0 + ns_by_n0 = np.transpose(mc[: , 4:]) + EMag = np.sqrt(Ex**2 + Ey**2) if self.param.verbose == 1 : print("Boltzmann solver inputs for v-space grid id %d"%(grid_idx)) print("Efreq = %.4E [1/s]" %(self.param.Efreq)) print("n_pts = %d" % self.grid_idx_to_npts[grid_idx]) - print("Ex/n0 (min) = %.12E [Td] \t Ex/n0(max) = %.12E [Td] "%(np.min(ExbyN), np.max(ExbyN))) - print("Ey/n0 (min) = %.12E [Td] \t Ey/n0(max) = %.12E [Td] "%(np.min(EybyN), np.max(EybyN))) - print("Tg (min) = %.12E [K] \t Tg (max) = %.12E [K] "%(np.min(Tg) , np.max(Tg))) - print("ne (min) = %.12E [1/m^3] \t ne (max) = %.12E [1/m^3] "%(np.min(ne) , np.max(ne))) + print("n0 (min) = %.12E [1/m^3] \t n0 (max) = %.12E [1/m^3] "%(np.min(n0) , np.max(n0))) + print("Ex/n0 (min) = %.12E [Td] \t Ex/n0(max) = %.12E [Td] "%(np.min(ExbyN) , np.max(ExbyN))) + print("Ey/n0 (min) = %.12E [Td] \t Ey/n0(max) = %.12E [Td] "%(np.min(EybyN) , np.max(EybyN))) + print("Tg (min) = %.12E [K] \t Tg (max) = %.12E [K] "%(np.min(Tg) , np.max(Tg))) + print("ne (min) = %.12E [1/m^3] \t ne (max) = %.12E [1/m^3] "%(np.min(ne) , np.max(ne))) + + for i in range(ns_by_n0.shape[0]): + print("[%d] ns/n0 (min) = %.12E \t ns/n0(max) = %.12E "%(i, np.min(ns_by_n0[i]) , np.max(ns_by_n0[i]))) if (use_gpu==1): with cp.cuda.Device(dev_id): @@ -531,8 +557,9 @@ def t1(): ne = cp.array(ne) ni = cp.array(ni) EMag = cp.sqrt(Ex**2 + Ey**2) + ns_by_n0 = cp.array(ns_by_n0) - + self.bte_solver.set_boltzmann_parameter(grid_idx, "ns_by_n0", ns_by_n0) self.bte_solver.set_boltzmann_parameter(grid_idx, "n0" , n0) self.bte_solver.set_boltzmann_parameter(grid_idx, "ne" , ne) self.bte_solver.set_boltzmann_parameter(grid_idx, "ni" , ni) @@ -562,6 +589,8 @@ def t1(): Tg = mc[: , 2] ne = mc[: , 3] * self.param.n0 ni = mc[: , 3] * self.param.n0 + ns_by_n0 = np.transpose(mc[: , 4:]) + EMag = np.sqrt(Ex**2 + Ey**2) if self.param.verbose == 1 : @@ -573,6 +602,9 @@ def t1(): print("Ey/n0 (min) = %.12E [Td] \t Ey/n0(max) = %.12E [Td] "%(np.min(EybyN), np.max(EybyN))) print("Tg (min) = %.12E [K] \t Tg (max) = %.12E [K] "%(np.min(Tg) , np.max(Tg))) print("ne (min) = %.12E [1/m^3] \t ne (max) = %.12E [1/m^3] "%(np.min(ne) , np.max(ne))) + + for i in range(ns_by_n0.shape[0]): + print("ns/n0 (min) = %.12E \t ns/n0(max) = %.12E "%(np.min(ns_by_n0[i]) , np.max(ns_by_n0[i]))) if (use_gpu == 1): @@ -584,7 +616,9 @@ def t1(): Ey = cp.array(Ey) Tg = cp.array(Tg) EMag = cp.sqrt(Ex**2 + Ey**2) + ns_by_n0 = cp.array(ns_by_n0) + self.bte_solver.set_boltzmann_parameter(grid_idx, "ns_by_n0", ns_by_n0) self.bte_solver.set_boltzmann_parameter(grid_idx, "n0" , n0) self.bte_solver.set_boltzmann_parameter(grid_idx, "ne" , ne) self.bte_solver.set_boltzmann_parameter(grid_idx, "ni" , ni) @@ -708,6 +742,8 @@ async def solve(self): self.qoi = [None for grid_idx in range(self.param.n_grids)] self.ff = [None for grid_idx in range(self.param.n_grids)] num_gpus = len(gpu) + coll_list = self.bte_solver.get_collision_list() + coll_names = self.bte_solver.get_collision_names() if (use_gpu==1): parla_placement = [gpu(gidx_to_device_map(grid_idx,n_grids)) for grid_idx in range(n_grids)] @@ -715,16 +751,16 @@ async def solve(self): parla_placement = [cpu for grid_idx in range(n_grids)] if csv_write: - data_csv = np.empty((self.tps_npts, 8 + len(self.param.collisions))) + data_csv = np.empty((self.tps_npts, 8 + len(coll_list))) self.profile_tt[pp.SOLVE].start() ts = TaskSpace("T") - for grid_idx in range(self.param.n_grids): + for grid_idx in self.active_grid_idx: @spawn(ts[grid_idx], placement=[parla_placement[grid_idx]], vcus=0.0) def t1(): try: print("[Boltzmann] %d / %d launching grid %d on %s"%(rank, npes, grid_idx, parla_placement[grid_idx])) - f0 = self.bte_solver.get_boltzmann_parameter(grid_idx, "f_mw") + f0 = self.bte_solver.get_boltzmann_parameter(grid_idx, "u0") ff , qoi = self.bte_solver.solve(grid_idx, f0, self.param.atol, self.param.rtol, self.param.max_iter, self.param.solver_type) self.ff[grid_idx] = ff self.qoi[grid_idx] = qoi @@ -741,6 +777,7 @@ def t1(): print("[Boltzmann] setup (min) = %.4E (s) setup (mean) = %.4E (s) setup (max) = %.4E (s)" % (t1[0],t1[1],t1[2])) print("[Boltzmann] solve (min) = %.4E (s) solve (mean) = %.4E (s) solve (max) = %.4E (s)" % (t2[0],t2[1],t2[2])) + """ if (self.param.export_csv ==1 or self.param.plot_data==1): for grid_idx in range(n_grids): dev_id = gidx_to_device_map(grid_idx, n_grids) @@ -776,7 +813,7 @@ def asnumpy(a): data_csv[gidx_to_pidx_map[grid_idx], 6] = asnumpy(qoi["mobility"]) data_csv[gidx_to_pidx_map[grid_idx], 7] = asnumpy(qoi["diffusion"]) - for col_idx, g in enumerate(self.param.collisions): + for col_idx, g in enumerate(coll_list): data_csv[gidx_to_pidx_map[grid_idx], 8 + col_idx] = asnumpy(qoi["rates"][col_idx]) if plot_data: @@ -811,11 +848,12 @@ def asnumpy(a): writer = csv.writer(f,delimiter=',') # write the header header = ["n0", "ne", "ni", "Tg", "E", "energy", "mobility", "diffusion"] - for col_idx, g in enumerate(self.param.collisions): - header.append(str(g)) + for col_idx, g in enumerate(coll_list): + header.append(str(coll_names[col_idx])) writer.writerow(header) writer.writerows(data_csv) + """ return @@ -840,18 +878,16 @@ async def push(self, interface, use_interp:bool): def t1(): qA = boltzmann.bte_solver._op_diag_dg[grid_idx] u0 = boltzmann.bte_solver.get_boltzmann_parameter(grid_idx, "u_avg") - - h_curr = xp.dot(qA, u0) - h_curr = boltzmann.bte_solver.normalized_distribution(grid_idx, h_curr) + h_curr = boltzmann.bte_solver.normalized_distribution(grid_idx, u0) qoi = boltzmann.bte_solver.compute_QoIs(grid_idx, h_curr, effective_mobility=False) + rr_cpu = xp.asnumpy(qoi["rates"]) rr_interp = np.zeros(len(gidx_to_pidx_map[grid_idx])) - rr_cpu = xp.asnumpy(qoi["rates"][TPSINDEX.RR_IDX[0]]) - - for c_idx in range(self.param.n_sub_clusters): - rr_interp[self.sub_cluster_idx_to_pidx[grid_idx][c_idx]] = rr_cpu[c_idx] * self.param.N_Avo + for r_idx in range(n_reactions): + for c_idx in range(self.param.n_sub_clusters): + rr_interp[self.sub_cluster_idx_to_pidx[grid_idx][c_idx]] = rr_cpu[TPSINDEX.RR_IDX[r_idx]][c_idx] * self.param.N_Avo - rates[0][gidx_to_pidx_map[grid_idx]] = rr_interp + rates[r_idx][gidx_to_pidx_map[grid_idx]] = rr_interp await ts rates = rates.reshape((-1)) @@ -865,11 +901,12 @@ def t1(): def t1(): qA = boltzmann.bte_solver._op_diag_dg[grid_idx] u0 = boltzmann.bte_solver.get_boltzmann_parameter(grid_idx, "u_avg") - - h_curr = xp.dot(qA, u0) - h_curr = boltzmann.bte_solver.normalized_distribution(grid_idx, h_curr) + h_curr = boltzmann.bte_solver.normalized_distribution(grid_idx, u0) qoi = boltzmann.bte_solver.compute_QoIs(grid_idx, h_curr, effective_mobility=False) - rates[0][gidx_to_pidx_map[grid_idx]] = xp.asnumpy(qoi["rates"][TPSINDEX.RR_IDX[0]]) * self.param.N_Avo + rr_cpu = xp.asnumpy(qoi["rates"]) + + for r_idx in range(n_reactions): + rates[r_idx][gidx_to_pidx_map[grid_idx]] = rr_cpu[TPSINDEX.RR_IDX[r_idx]] * self.param.N_Avo await ts rates = rates.reshape((-1)) @@ -886,8 +923,16 @@ def io_output_data(self, grid_idx, u0, plot_data:bool, export_csv:bool, fname:st h_curr = boltzmann.bte_solver.normalized_distribution(grid_idx, h_curr) ff = h_curr qoi = boltzmann.bte_solver.compute_QoIs(grid_idx, h_curr, effective_mobility=False) - - + coll_list = self.bte_solver.get_collision_list() + coll_names = self.bte_solver.get_collision_names() + cs_data = self.bte_solver.get_cross_section_data() + + cs_species = list() + for col_idx, (k,v) in enumerate(cs_data.items()): + cs_species.append(v["species"]) + + cs_species = list(sorted(set(cs_species), key=cs_species.index)) + def asnumpy(a): if cp.get_array_module(a)==cp: with cp.cuda.Device(dev_id): @@ -902,33 +947,36 @@ def asnumpy(a): n0 = asnumpy(self.bte_solver.get_boltzmann_parameter(grid_idx, "n0")) ne = asnumpy(self.bte_solver.get_boltzmann_parameter(grid_idx, "ne")) ni = asnumpy(self.bte_solver.get_boltzmann_parameter(grid_idx, "ni")) + ns_by_n0 = asnumpy(self.bte_solver.get_boltzmann_parameter(grid_idx, "ns_by_n0")).T Tg = asnumpy(self.bte_solver.get_boltzmann_parameter(grid_idx, "Tg")) eRe = asnumpy(self.bte_solver.get_boltzmann_parameter(grid_idx, "eRe")) eIm = asnumpy(self.bte_solver.get_boltzmann_parameter(grid_idx, "eIm")) eMag = np.sqrt(eRe**2 + eIm**2) - data_csv = np.zeros((ne.shape[0], 8 + len((self.param.collisions)))) + data_csv = np.zeros((ne.shape[0], 7 + ns_by_n0.shape[1] + len((coll_list)))) if export_csv: data_csv[: , 0] = n0 - data_csv[: , 1] = ne - data_csv[: , 2] = ni - data_csv[: , 3] = Tg - data_csv[: , 4] = eMag - data_csv[: , 5] = asnumpy(qoi["energy"]) - data_csv[: , 6] = asnumpy(qoi["mobility"]) - data_csv[: , 7] = asnumpy(qoi["diffusion"]) + data_csv[: , 1] = ne/n0 + idx =2 + ns_by_n0.shape[1] + data_csv[: ,2:idx] = ns_by_n0[:,:] + + data_csv[: , idx] = Tg + data_csv[: , idx+1] = eMag + data_csv[: , idx+2] = asnumpy(qoi["energy"]) + data_csv[: , idx+3] = asnumpy(qoi["mobility"]) + data_csv[: , idx+4] = asnumpy(qoi["diffusion"]) - for col_idx, g in enumerate(self.param.collisions): - data_csv[: , 8 + col_idx] = asnumpy(qoi["rates"][col_idx]) + for col_idx, g in enumerate(coll_list): + data_csv[: , idx+5 + col_idx] = asnumpy(qoi["rates"][col_idx]) with open("%s_qoi.csv"%(fname), 'w', encoding='UTF8') as f: writer = csv.writer(f,delimiter=',') # write the header - header = ["n0", "ne", "ni", "Tg", "E", "energy", "mobility", "diffusion"] - for col_idx, g in enumerate(self.param.collisions): - header.append(str(g)) + header = ["n0", "ne/n0"] + ["(%s)/n0"%(s) for s in cs_species] + ["Tg", "E", "energy", "mobility", "diffusion"] + for col_idx, g in enumerate(coll_list): + header.append(str(coll_names[col_idx])) writer.writerow(header) writer.writerows(data_csv) @@ -946,11 +994,12 @@ def asnumpy(a): for lm_idx, lm in enumerate(self.bte_solver._par_lm[grid_idx]): plt.subplot(num_plt_rows, num_plt_cols, plt_idx) for ii in range(0, n_pts, n_pts_step): - fr = np.abs(ff_r[ii, lm_idx, :]) - #plt.semilogy(ev, fr, label=r"$T_g$=%.2E [K], $E/n_0$=%.2E [Td], $n_e/n_0$ = %.2E "%(Tg[ii], eMag[ii]/n0[ii]/1e-21, ne[ii]/n0[ii])) + fr = np.abs(ff_r[ii, lm_idx, :]) + mf_str = " ".join([r"$%s/n0$=%.2E"%(s, ns_by_n0[ii, s_idx]) for s_idx, s in enumerate(cs_species)]) + plt.semilogy(ev, fr, label=r"$T_g$=%.2E [K], $E/n_0$=%.2E [Td]"%(Tg[ii], eMag[ii]/n0[ii]/1e-21) + " " +mf_str) - #plt.xlabel(r"energy (eV)") - #plt.ylabel(r"$f_%d$"%(lm[0])) + plt.xlabel(r"energy (eV)") + plt.ylabel(r"$f_%d$"%(lm[0])) plt.grid(visible=True) if lm_idx==0: plt.legend(prop={'size': 6}) @@ -1021,86 +1070,110 @@ async def __main__(): ########################## BTE solve ################################################## await boltzmann.fetch(interface, use_interp=bte_use_interp) - tt_bte = 0 - bte_u = [0 for i in range(n_grids)] - bte_v = [0 for i in range(n_grids)] - - u_avg = [0 for i in range(n_grids)] - abs_error = [0 for i in range(n_grids)] - rel_error = [0 for i in range(n_grids)] - cycle_f1 = (0.5 * dt_bte/ (bte_sper_cycle * dt_bte)) - - for bte_idx in range(bte_sper_cycle * bte_max_cycles +1): + if (boltzmann.param.solver_type=="steady-state"): + await boltzmann.solve() + ts = TaskSpace("T") + for grid_idx in boltzmann.active_grid_idx: + @spawn(ts[grid_idx], placement=[gpu(gidx_to_device_map(grid_idx,n_grids))], vcus=0.0) + def t1(): + boltzmann.bte_solver.set_boltzmann_parameter(grid_idx, "u_avg", boltzmann.ff[grid_idx]) + await ts - if (bte_idx % bte_sper_cycle == 0): - + await boltzmann.push(interface, use_interp=bte_use_interp) + for grid_idx in boltzmann.active_grid_idx: + u_vec = boltzmann.bte_solver.get_boltzmann_parameter(grid_idx, "u_avg") + boltzmann.io_output_data(grid_idx, u_vec, plot_data=True, export_csv=True, fname=boltzmann.param.out_fname+"_grid_%02d"%(grid_idx)) + + else: + assert boltzmann.param.solver_type == "transient", "unknown BTE solver type" + """ + transient BTE solver (evolve until time-periodic solutions) + """ + + tt_bte = 0 + bte_u = [0 for i in range(n_grids)] + bte_v = [0 for i in range(n_grids)] + + u_avg = [0 for i in range(n_grids)] + + abs_error = [0 for i in range(n_grids)] + rel_error = [0 for i in range(n_grids)] + cycle_f1 = (0.5 * dt_bte/ (bte_sper_cycle * dt_bte)) + + for bte_idx in range(bte_sper_cycle * bte_max_cycles +1): + if (bte_idx % bte_sper_cycle == 0): + ts = TaskSpace("T") + for grid_idx in boltzmann.active_grid_idx: + @spawn(ts[grid_idx], placement=[gpu(gidx_to_device_map(grid_idx,n_grids))], vcus=0.0) + def t1(): + u0 = boltzmann.bte_solver.get_boltzmann_parameter(grid_idx, "u0") + fname = "%s_iter%04d_grid_%04d_cycle_%0d"%(boltzmann.param.out_fname, iter, grid_idx, bte_idx//bte_sper_cycle) + + #boltzmann.io_output_data(grid_idx, u0, False, True, fname) + + abs_error[grid_idx] = xp.max(xp.abs(bte_v[grid_idx]-u0)) + rel_error[grid_idx] = abs_error[grid_idx] / xp.max(xp.abs(u0)) + #print("[BTE] step = %04d time = %.4E ||u1 - u0|| = %.4E ||u0 - u1|| / ||u0|| = %.4E"%(bte_idx, tt_bte, abs_error[grid_idx], rel_error[grid_idx])) + + # if(bte_idx >0): + # print(grid_idx, " u_ptr ", u_avg[grid_idx].data, " v_ptr " , v_avg[grid_idx].data) + + bte_v[grid_idx] = xp.copy(u0) + + await ts + p_t3 = min_mean_max(p_t2-p_t1, comm) + print("[BTE] step = %04d time = %.4E ||u1 - u0|| = %.4E ||u0 - u1|| / ||u0|| = %.4E --- runtime = %.4E (s) "%(bte_idx, tt_bte, max(abs_error), max(rel_error), p_t3[2])) + + if max(abs_error) < boltzmann.param.atol or max(rel_error)< boltzmann.param.rtol: + break + + if bte_idx < bte_sper_cycle * bte_max_cycles: + u_avg = [0 for i in range(n_grids)] + + if bte_idx == bte_sper_cycle * bte_max_cycles : + break + ts = TaskSpace("T") for grid_idx in boltzmann.active_grid_idx: @spawn(ts[grid_idx], placement=[gpu(gidx_to_device_map(grid_idx,n_grids))], vcus=0.0) def t1(): - u0 = boltzmann.bte_solver.get_boltzmann_parameter(grid_idx, "u0") - fname = "%s_iter%04d_grid_%04d_cycle_%0d"%(boltzmann.param.out_fname, iter, grid_idx, bte_idx//bte_sper_cycle) - - #boltzmann.io_output_data(grid_idx, u0, False, True, fname) - - abs_error[grid_idx] = xp.max(xp.abs(bte_v[grid_idx]-u0)) - rel_error[grid_idx] = abs_error[grid_idx] / xp.max(xp.abs(u0)) - #print("[BTE] step = %04d time = %.4E ||u1 - u0|| = %.4E ||u0 - u1|| / ||u0|| = %.4E"%(bte_idx, tt_bte, abs_error[grid_idx], rel_error[grid_idx])) - - # if(bte_idx >0): - # print(grid_idx, " u_ptr ", u_avg[grid_idx].data, " v_ptr " , v_avg[grid_idx].data) - - bte_v[grid_idx] = xp.copy(u0) - + u_avg[grid_idx] += cycle_f1 * boltzmann.bte_solver.get_boltzmann_parameter(grid_idx, "u0") await ts - p_t3 = min_mean_max(p_t2-p_t1, comm) - print("[BTE] step = %04d time = %.4E ||u1 - u0|| = %.4E ||u0 - u1|| / ||u0|| = %.4E --- runtime = %.4E (s) "%(bte_idx, tt_bte, max(abs_error), max(rel_error), p_t3[2])) - if max(abs_error) < boltzmann.param.atol or max(rel_error)< boltzmann.param.rtol: - break + p_t1 = time() + await boltzmann.solve_step(tt_bte, dt_bte) + p_t2 = time() - if bte_idx < bte_sper_cycle * bte_max_cycles: - u_avg = [0 for i in range(n_grids)] - - - if bte_idx == bte_sper_cycle * bte_max_cycles : - break + if(terminal_output_freq > 0 and bte_idx % terminal_output_freq ==0): + p_t3 = min_mean_max(p_t2-p_t1, comm) + print("[BTE] %04d simulation time = %.4E cycle step (min) = %.4E (s) step (mean) = %.4E (s) step (max) = %.4E (s)" % (bte_idx, tt_bte, p_t3[0], p_t3[1], p_t3[2])) - ts = TaskSpace("T") - for grid_idx in boltzmann.active_grid_idx: - @spawn(ts[grid_idx], placement=[gpu(gidx_to_device_map(grid_idx,n_grids))], vcus=0.0) - def t1(): - u_avg[grid_idx] += cycle_f1 * boltzmann.bte_solver.get_boltzmann_parameter(grid_idx, "u0") - await ts - - p_t1 = time() - await boltzmann.solve_step(tt_bte, dt_bte) - p_t2 = time() + ts = TaskSpace("T") + for grid_idx in boltzmann.active_grid_idx: + @spawn(ts[grid_idx], placement=[gpu(gidx_to_device_map(grid_idx,n_grids))], vcus=0.0) + def t1(): + u_avg[grid_idx] += cycle_f1 * boltzmann.bte_solver.get_boltzmann_parameter(grid_idx, "u1") + boltzmann.bte_solver.set_boltzmann_parameter(grid_idx, "u0", boltzmann.bte_solver.get_boltzmann_parameter(grid_idx, "u1")) + await ts - if(terminal_output_freq > 0 and bte_idx % terminal_output_freq ==0): - p_t3 = min_mean_max(p_t2-p_t1, comm) - print("[BTE] %04d simulation time = %.4E cycle step (min) = %.4E (s) step (mean) = %.4E (s) step (max) = %.4E (s)" % (bte_idx, tt_bte, p_t3[0], p_t3[1], p_t3[2])) + tt_bte += dt_bte ts = TaskSpace("T") for grid_idx in boltzmann.active_grid_idx: @spawn(ts[grid_idx], placement=[gpu(gidx_to_device_map(grid_idx,n_grids))], vcus=0.0) def t1(): - u_avg[grid_idx] += cycle_f1 * boltzmann.bte_solver.get_boltzmann_parameter(grid_idx, "u1") - boltzmann.bte_solver.set_boltzmann_parameter(grid_idx, "u0", boltzmann.bte_solver.get_boltzmann_parameter(grid_idx, "u1")) + xp = boltzmann.xp_module + qA = boltzmann.bte_solver._op_diag_dg[grid_idx] + u_avg[grid_idx] = xp.dot(qA, u_avg[grid_idx]) + boltzmann.bte_solver.set_boltzmann_parameter(grid_idx, "u_avg", u_avg[grid_idx]) await ts - - tt_bte += dt_bte - - ts = TaskSpace("T") - for grid_idx in boltzmann.active_grid_idx: - @spawn(ts[grid_idx], placement=[gpu(gidx_to_device_map(grid_idx,n_grids))], vcus=0.0) - def t1(): - boltzmann.bte_solver.set_boltzmann_parameter(grid_idx, "u_avg", u_avg[grid_idx]) - await ts - await boltzmann.push(interface, use_interp=bte_use_interp) - - + await boltzmann.push(interface, use_interp=bte_use_interp) + + for grid_idx in boltzmann.active_grid_idx: + u_vec = boltzmann.bte_solver.get_boltzmann_parameter(grid_idx, "u_avg") + boltzmann.io_output_data(grid_idx, u_vec, plot_data=True, export_csv=True, fname=boltzmann.param.out_fname+"_grid_%02d"%(grid_idx)) + ################### tps solve ###################################### tps.fetch(interface) tps_u = 0 From 170af58093a97e1850c2f636736fbe86fc4062f7 Mon Sep 17 00:00:00 2001 From: milindasf Date: Fri, 1 Mar 2024 12:57:17 -0600 Subject: [PATCH 45/75] adding e-e collisions for the torch interface with lumped cs data --- src/tps-bte_0d3v.py | 34 ++++++++++++++++++++++++++-------- 1 file changed, 26 insertions(+), 8 deletions(-) diff --git a/src/tps-bte_0d3v.py b/src/tps-bte_0d3v.py index 3cfbfdc53..c9aefb338 100755 --- a/src/tps-bte_0d3v.py +++ b/src/tps-bte_0d3v.py @@ -69,6 +69,7 @@ def min_mean_max(a, comm: MPI.Comm): sys.path.append(path + "/../../boltzmann/BESolver/python") import libtps from bte_0d3v_batched import bte_0d3v_batched as BoltzmannSolver +import utils as bte_utils WITH_PARLA = 1 if WITH_PARLA: @@ -464,7 +465,9 @@ async def fetch(self, interface, use_interp:bool): Ex = ExbyN * self.param.n0 * self.param.Td_fac Ey = EybyN * self.param.n0 * self.param.Td_fac - ion_deg = species_densities[TPSINDEX.ELE_IDX]/n0 + ion_deg = species_densities[TPSINDEX.ELE_IDX]/n0 + ion_deg[ion_deg<0] = 1e-16 + ns_by_n0[ns_by_n0<0] = 0 m_bte = np.concatenate([ExbyN.reshape((-1, 1)), EybyN.reshape((-1, 1)), Tg.reshape((-1, 1)), ion_deg.reshape((-1, 1))] + [ ns_by_n0[i].reshape((-1, 1)) for i in range(ns_by_n0.shape[0])], axis=1) self.sub_cluster_idx_to_pidx = None @@ -668,7 +671,14 @@ def ts_op_setup(grid_idx): @spawn(ts[grid_idx], placement=[gpu(dev_id)], vcus=0.0) def t1(): ts_op_setup(grid_idx) - f_mw = self.bte_solver.get_boltzmann_parameter(grid_idx, "f_mw") + + vth = self.bte_solver._par_vth[grid_idx] + qA = self.bte_solver._op_diag_dg[grid_idx] + mw = bte_utils.get_maxwellian_3d(vth, 1) + mm_op = self.bte_solver._op_mass[grid_idx] * mw(0) * vth**3 + f_mw = self.bte_solver.get_boltzmann_parameter(grid_idx, "f_mw") + f_mw = f_mw/cp.dot(mm_op, f_mw) + f_mw = cp.dot(qA.T, f_mw) if (use_interp==True): self.bte_solver.set_boltzmann_parameter(grid_idx, "u0", cp.copy(f_mw[: , 0:self.param.n_sub_clusters])) @@ -684,7 +694,15 @@ def t1(): @spawn(ts[grid_idx], placement=[cpu], vcus=0.0) def t1(): ts_op_setup(grid_idx) - f_mw = self.bte_solver.get_boltzmann_parameter(grid_idx, "f_mw") + + vth = self.bte_solver._par_vth[grid_idx] + qA = self.bte_solver._op_diag_dg[grid_idx] + mw = bte_utils.get_maxwellian_3d(vth, 1) + mm_op = self.bte_solver._op_mass[grid_idx] * mw(0) * vth**3 + f_mw = self.bte_solver.get_boltzmann_parameter(grid_idx, "f_mw") + f_mw = f_mw/np.dot(mm_op, f_mw) + f_mw = np.dot(qA.T, f_mw) + if (use_interp==True): self.bte_solver.set_boltzmann_parameter(grid_idx, "u0", np.copy(f_mw[: , 0:self.param.n_sub_clusters])) else: @@ -1045,7 +1063,7 @@ async def __main__(): tt = 0#interface.currentTime() tau = (1/boltzmann.param.Efreq) dt_tps = interface.timeStep() - dt_bte = 1e-2 * tau #boltzmann.param.dt * (dt_tps) + dt_bte = boltzmann.param.dt * tau bte_steps = int(dt_tps/dt_bte) n_grids = boltzmann.param.n_grids @@ -1055,8 +1073,8 @@ async def __main__(): tps_sper_cycle = int(xp.ceil(tau/dt_tps)) bte_sper_cycle = int(xp.ceil(tau/dt_bte)) - bte_max_cycles = 10 - tps_max_cycles = 1000 + bte_max_cycles = int(boltzmann.param.cycles) + tps_max_cycles = 500 print("tps steps per cycle : ", tps_sper_cycle, "bte_steps per cycle", bte_sper_cycle) tps.solveStep() @@ -1199,8 +1217,8 @@ def t1(): p_t3 = min_mean_max(p_t2-p_t1, comm) print("[TPS] step = %04d time = %.4E ||u1 - u0|| = %.4E ||u0 - u1|| / ||u0|| = %.4E -- runtime = %.4E (s)"%(tps_idx, tt_tps, np.max(abs_error), np.max(rel_error), p_t3[2])) - if (np.max(abs_error) < boltzmann.param.atol or np.max(rel_error) < max(1e-6,boltzmann.param.rtol)): - break + # if (np.max(abs_error) < boltzmann.param.atol or np.max(rel_error) < max(1e-6,boltzmann.param.rtol)): + # break if (tps_idx == tps_sper_cycle * tps_max_cycles): break From d5182583756456032344b01e088529c0e3beadb1 Mon Sep 17 00:00:00 2001 From: milindasf Date: Fri, 1 Mar 2024 20:29:03 -0600 Subject: [PATCH 46/75] minor changes --- src/tps-bte_0d3v.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/tps-bte_0d3v.py b/src/tps-bte_0d3v.py index c9aefb338..17996ea1d 100755 --- a/src/tps-bte_0d3v.py +++ b/src/tps-bte_0d3v.py @@ -909,7 +909,7 @@ def t1(): await ts rates = rates.reshape((-1)) - rates[rates<0] = 0.0 + #rates[rates<0] = 0.0 else: if(n_reactions>0): rates[:,:] = 0.0 @@ -928,7 +928,7 @@ def t1(): await ts rates = rates.reshape((-1)) - rates[rates<0] = 0.0 + #rates[rates<0] = 0.0 return def io_output_data(self, grid_idx, u0, plot_data:bool, export_csv:bool, fname:str): @@ -1074,7 +1074,7 @@ async def __main__(): tps_sper_cycle = int(xp.ceil(tau/dt_tps)) bte_sper_cycle = int(xp.ceil(tau/dt_bte)) bte_max_cycles = int(boltzmann.param.cycles) - tps_max_cycles = 500 + tps_max_cycles = 100 print("tps steps per cycle : ", tps_sper_cycle, "bte_steps per cycle", bte_sper_cycle) tps.solveStep() From 4d6b6efa94ac147d1be2f35aaaa277a32f8a6d36 Mon Sep 17 00:00:00 2001 From: milindasf Date: Mon, 4 Mar 2024 09:46:01 -0600 Subject: [PATCH 47/75] negative rate coefficients are set to zero from BTE side --- src/tps-bte_0d3v.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/tps-bte_0d3v.py b/src/tps-bte_0d3v.py index 17996ea1d..6a07571fe 100755 --- a/src/tps-bte_0d3v.py +++ b/src/tps-bte_0d3v.py @@ -909,7 +909,7 @@ def t1(): await ts rates = rates.reshape((-1)) - #rates[rates<0] = 0.0 + rates[rates<0] = 0.0 else: if(n_reactions>0): rates[:,:] = 0.0 @@ -928,7 +928,7 @@ def t1(): await ts rates = rates.reshape((-1)) - #rates[rates<0] = 0.0 + rates[rates<0] = 0.0 return def io_output_data(self, grid_idx, u0, plot_data:bool, export_csv:bool, fname:str): From 09b3e2fb6bee526f05751cdbf0314e0f1ecee9f7 Mon Sep 17 00:00:00 2001 From: milindasf Date: Tue, 12 Mar 2024 08:33:37 -0500 Subject: [PATCH 48/75] io crash from multi gpu case is fixed --- src/tps-bte_0d3v.py | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/src/tps-bte_0d3v.py b/src/tps-bte_0d3v.py index 6a07571fe..1300d4d69 100755 --- a/src/tps-bte_0d3v.py +++ b/src/tps-bte_0d3v.py @@ -1099,9 +1099,13 @@ def t1(): await ts await boltzmann.push(interface, use_interp=bte_use_interp) + for grid_idx in boltzmann.active_grid_idx: - u_vec = boltzmann.bte_solver.get_boltzmann_parameter(grid_idx, "u_avg") - boltzmann.io_output_data(grid_idx, u_vec, plot_data=True, export_csv=True, fname=boltzmann.param.out_fname+"_grid_%02d"%(grid_idx)) + dev_id = gidx_to_device_map(grid_idx,n_grids) + with cp.cuda.Device(dev_id): + u_vec = boltzmann.bte_solver.get_boltzmann_parameter(grid_idx, "u_avg") + boltzmann.io_output_data(grid_idx, u_vec, plot_data=True, export_csv=True, fname=boltzmann.param.out_fname+"_grid_%02d"%(grid_idx)) + else: assert boltzmann.param.solver_type == "transient", "unknown BTE solver type" @@ -1128,8 +1132,6 @@ def t1(): u0 = boltzmann.bte_solver.get_boltzmann_parameter(grid_idx, "u0") fname = "%s_iter%04d_grid_%04d_cycle_%0d"%(boltzmann.param.out_fname, iter, grid_idx, bte_idx//bte_sper_cycle) - #boltzmann.io_output_data(grid_idx, u0, False, True, fname) - abs_error[grid_idx] = xp.max(xp.abs(bte_v[grid_idx]-u0)) rel_error[grid_idx] = abs_error[grid_idx] / xp.max(xp.abs(u0)) #print("[BTE] step = %04d time = %.4E ||u1 - u0|| = %.4E ||u0 - u1|| / ||u0|| = %.4E"%(bte_idx, tt_bte, abs_error[grid_idx], rel_error[grid_idx])) @@ -1189,8 +1191,11 @@ def t1(): await boltzmann.push(interface, use_interp=bte_use_interp) for grid_idx in boltzmann.active_grid_idx: - u_vec = boltzmann.bte_solver.get_boltzmann_parameter(grid_idx, "u_avg") - boltzmann.io_output_data(grid_idx, u_vec, plot_data=True, export_csv=True, fname=boltzmann.param.out_fname+"_grid_%02d"%(grid_idx)) + dev_id = gidx_to_device_map(grid_idx,n_grids) + with cp.cuda.Device(dev_id): + u_vec = boltzmann.bte_solver.get_boltzmann_parameter(grid_idx, "u_avg") + boltzmann.io_output_data(grid_idx, u_vec, plot_data=True, export_csv=True, fname=boltzmann.param.out_fname+"_grid_%02d"%(grid_idx)) + ################### tps solve ###################################### tps.fetch(interface) From 3ef9e4639eef76e5f2a20e6b81253f5749d0fa9a Mon Sep 17 00:00:00 2001 From: milindasf Date: Wed, 13 Mar 2024 11:51:41 -0500 Subject: [PATCH 49/75] additional parameters + profile counters for tps-bte profiling --- src/tps-bte_0d3v.py | 263 ++++++++++++++++++++++---------------------- 1 file changed, 133 insertions(+), 130 deletions(-) diff --git a/src/tps-bte_0d3v.py b/src/tps-bte_0d3v.py index 1300d4d69..e165d5f17 100755 --- a/src/tps-bte_0d3v.py +++ b/src/tps-bte_0d3v.py @@ -102,6 +102,9 @@ class BoltzmannSolverParams(): atol = 1e-10 # absolute tolerance rtol = 1e-10 # relative tolerance max_iter = 1000 # max iterations for the newton solver + + tps_bte_max_iter = 5000 # max iterations for tps-bte split scheme + bte_solve_freq = 100 # run bte every x tps cycles. ee_collisions = 0 # enable electron-electron Coulombic effects use_gpu = 1 # enable GPU use (1)-GPU solver, (0)-CPU solver @@ -160,9 +163,6 @@ class TPSINDEX(): MOLE_FRAC_IDX = {0: NEU_IDX, 1: EX1_IDX , 2: EX2_IDX , 3: EX3_IDX} - - - class Boltzmann0D2VBactchedSolver: def __init__(self, tps, comm): @@ -179,14 +179,6 @@ def __init__(self, tps, comm): os.makedirs(boltzmann_dir) #print("directory %s is created!"%(dir_name)) - profile_tt = [None] * int(pp.LAST) - profile_nn = ["setup", "solve", "last"] - for i in range(pp.LAST): - profile_tt[i] = profile_t(profile_nn[i]) - - self.profile_tt = profile_tt - self.profile_nn = profile_nn - num_gpus_per_node = 1 if self.param.use_gpu==1: num_gpus_per_node = cp.cuda.runtime.getDeviceCount() @@ -230,6 +222,10 @@ def __parse_config_file__(self, fname): self.param.threads = int(config.get("boltzmannSolver", "threads").split("#")[0].strip()) self.param.output_dir = str(config.get("boltzmannSolver", "output_dir").split("#")[0].strip()) self.param.out_fname = self.param.output_dir + "/" + str(config.get("boltzmannSolver", "output_fname").split("#")[0].strip()) + + self.param.bte_solve_freq = int(config.get("boltzmannSolver", "bte_solve_freq").split("#")[0].strip()) + self.param.tps_bte_max_iter = int(config.get("boltzmannSolver", "tps_bte_max_iter").split("#")[0].strip()) + return def grid_setup(self, interface): @@ -240,8 +236,6 @@ def grid_setup(self, interface): computed from the TPS code. """ assert self.xp_module==np, "grid setup only supported in CPU" - self.profile_tt[pp.SETUP].start() - xp = self.xp_module n_grids = self.param.n_grids Te = xp.array(interface.HostRead(libtps.t2bIndex.ElectronTemperature), copy=False) / self.param.ev_to_K # [eV] @@ -301,7 +295,6 @@ def grid_setup(self, interface): # self.active_grid_idx = active_grid_idx #[i for i in range(self.param.n_grids)] self.active_grid_idx = [i for i in range(self.param.n_grids)] self.sub_clusters_run = False - self.profile_tt[pp.SETUP].stop() return def solve_wo_parla(self): @@ -466,9 +459,9 @@ async def fetch(self, interface, use_interp:bool): Ey = EybyN * self.param.n0 * self.param.Td_fac ion_deg = species_densities[TPSINDEX.ELE_IDX]/n0 - ion_deg[ion_deg<0] = 1e-16 - ns_by_n0[ns_by_n0<0] = 0 - m_bte = np.concatenate([ExbyN.reshape((-1, 1)), EybyN.reshape((-1, 1)), Tg.reshape((-1, 1)), ion_deg.reshape((-1, 1))] + [ ns_by_n0[i].reshape((-1, 1)) for i in range(ns_by_n0.shape[0])], axis=1) + ion_deg[ion_deg<=0] = 1e-16 + ns_by_n0[ns_by_n0<=0]= 0 + m_bte = np.concatenate([ExbyN.reshape((-1, 1)), EybyN.reshape((-1, 1)), Tg.reshape((-1, 1)), ion_deg.reshape((-1, 1))] + [ ns_by_n0[i].reshape((-1, 1)) for i in range(ns_by_n0.shape[0])], axis=1) self.sub_cluster_idx_to_pidx = None self.sub_cluster_c = None @@ -571,6 +564,16 @@ def t1(): self.bte_solver.set_boltzmann_parameter(grid_idx, "eIm", Ey) self.bte_solver.set_boltzmann_parameter(grid_idx, "E" , EMag) + # cp.save(self.param.out_fname + "_ns_by_n0_%02d.npy"%(grid_idx) , ns_by_n0 , grid_idx) + # cp.save(self.param.out_fname + "_n0_%02d.npy"%(grid_idx) , n0 , grid_idx) + # cp.save(self.param.out_fname + "_ne_%02d.npy"%(grid_idx) , ne , grid_idx) + # cp.save(self.param.out_fname + "_ni_%02d.npy"%(grid_idx) , ni , grid_idx) + + # cp.save(self.param.out_fname + "_Tg_%02d.npy"%(grid_idx) , Tg , grid_idx) + # cp.save(self.param.out_fname + "_eRe_%02d.npy"%(grid_idx) , Ex , grid_idx) + # cp.save(self.param.out_fname + "_eIm_%02d.npy"%(grid_idx) , Ey , grid_idx) + # cp.save(self.param.out_fname + "_E_%02d.npy"%(grid_idx) , EMag , grid_idx) + return await ts @@ -629,6 +632,17 @@ def t1(): self.bte_solver.set_boltzmann_parameter(grid_idx, "eRe", Ex) self.bte_solver.set_boltzmann_parameter(grid_idx, "eIm", Ey) self.bte_solver.set_boltzmann_parameter(grid_idx, "E" , EMag) + + # cp.save(self.param.out_fname + "_ns_by_n0_%02d.npy"%(grid_idx) , ns_by_n0 , grid_idx) + # cp.save(self.param.out_fname + "_n0_%02d.npy"%(grid_idx) , n0 , grid_idx) + # cp.save(self.param.out_fname + "_ne_%02d.npy"%(grid_idx) , ne , grid_idx) + # cp.save(self.param.out_fname + "_ni_%02d.npy"%(grid_idx) , ni , grid_idx) + + # cp.save(self.param.out_fname + "_Tg_%02d.npy"%(grid_idx) , Tg , grid_idx) + # cp.save(self.param.out_fname + "_eRe_%02d.npy"%(grid_idx) , Ex , grid_idx) + # cp.save(self.param.out_fname + "_eIm_%02d.npy"%(grid_idx) , Ey , grid_idx) + # cp.save(self.param.out_fname + "_E_%02d.npy"%(grid_idx) , EMag , grid_idx) + return await ts return @@ -771,7 +785,6 @@ async def solve(self): if csv_write: data_csv = np.empty((self.tps_npts, 8 + len(coll_list))) - self.profile_tt[pp.SOLVE].start() ts = TaskSpace("T") for grid_idx in self.active_grid_idx: @spawn(ts[grid_idx], placement=[parla_placement[grid_idx]], vcus=0.0) @@ -787,92 +800,6 @@ def t1(): sys.exit(-1) await ts - self.profile_tt[pp.SOLVE].stop() - - - t1 = min_mean_max(self.profile_tt[pp.SETUP].seconds, self.comm) - t2 = min_mean_max(self.profile_tt[pp.SOLVE].seconds, self.comm) - print("[Boltzmann] setup (min) = %.4E (s) setup (mean) = %.4E (s) setup (max) = %.4E (s)" % (t1[0],t1[1],t1[2])) - print("[Boltzmann] solve (min) = %.4E (s) solve (mean) = %.4E (s) solve (max) = %.4E (s)" % (t2[0],t2[1],t2[2])) - - """ - if (self.param.export_csv ==1 or self.param.plot_data==1): - for grid_idx in range(n_grids): - dev_id = gidx_to_device_map(grid_idx, n_grids) - ff = self.ff[grid_idx] - qoi = self.qoi[grid_idx] - - def asnumpy(a): - if cp.get_array_module(a)==cp: - with cp.cuda.Device(dev_id): - return cp.asnumpy(a) - else: - return a - - ff_cpu = asnumpy(ff) - ev = np.linspace(1e-3, self.bte_solver._par_ev_range[grid_idx][1], 500) - ff_r = self.bte_solver.compute_radial_components(grid_idx, ev, ff_cpu) - - n0 = asnumpy(self.bte_solver.get_boltzmann_parameter(grid_idx, "n0")) - ne = asnumpy(self.bte_solver.get_boltzmann_parameter(grid_idx, "ne")) - ni = asnumpy(self.bte_solver.get_boltzmann_parameter(grid_idx, "ni")) - Tg = asnumpy(self.bte_solver.get_boltzmann_parameter(grid_idx, "Tg")) - eRe = asnumpy(self.bte_solver.get_boltzmann_parameter(grid_idx, "eRe")) - eIm = asnumpy(self.bte_solver.get_boltzmann_parameter(grid_idx, "eIm")) - eMag = np.sqrt(eRe**2 + eIm**2) - - if csv_write: - data_csv[gidx_to_pidx_map[grid_idx], 0] = n0 - data_csv[gidx_to_pidx_map[grid_idx], 1] = ne - data_csv[gidx_to_pidx_map[grid_idx], 2] = ni - data_csv[gidx_to_pidx_map[grid_idx], 3] = Tg - data_csv[gidx_to_pidx_map[grid_idx], 4] = eMag - data_csv[gidx_to_pidx_map[grid_idx], 5] = asnumpy(qoi["energy"]) - data_csv[gidx_to_pidx_map[grid_idx], 6] = asnumpy(qoi["mobility"]) - data_csv[gidx_to_pidx_map[grid_idx], 7] = asnumpy(qoi["diffusion"]) - - for col_idx, g in enumerate(coll_list): - data_csv[gidx_to_pidx_map[grid_idx], 8 + col_idx] = asnumpy(qoi["rates"][col_idx]) - - if plot_data: - num_sh = len(self.bte_solver._par_lm[grid_idx]) - num_subplots = num_sh - num_plt_cols = min(num_sh, 4) - num_plt_rows = np.int64(np.ceil(num_subplots/num_plt_cols)) - fig = plt.figure(figsize=(num_plt_cols * 8 + 0.5*(num_plt_cols-1), num_plt_rows * 8 + 0.5*(num_plt_rows-1)), dpi=300, constrained_layout=True) - plt_idx = 1 - n_pts_step = self.grid_idx_to_npts[grid_idx] // 20 - - for lm_idx, lm in enumerate(self.bte_solver._par_lm[grid_idx]): - plt.subplot(num_plt_rows, num_plt_cols, plt_idx) - for ii in range(0, self.grid_idx_to_npts[grid_idx], n_pts_step): - fr = np.abs(ff_r[ii, lm_idx, :]) - plt.semilogy(ev, fr, label=r"$T_g$=%.2E [K], $E/n_0$=%.2E [Td], $n_e/n_0$ = %.2E "%(Tg[ii], eMag[ii]/n0[ii]/1e-21, ne[ii]/n0[ii])) - - plt.xlabel(r"energy (eV)") - plt.ylabel(r"$f_%d$"%(lm[0])) - plt.grid(visible=True) - if lm_idx==0: - plt.legend(prop={'size': 6}) - - plt_idx +=1 - - plt.savefig("%s_plot_%02d.png"%(self.param.out_fname, grid_idx)) - plt.close() - - if csv_write: - fname = self.param.out_fname - with open("%s_qoi.csv"%fname, 'w', encoding='UTF8') as f: - writer = csv.writer(f,delimiter=',') - # write the header - header = ["n0", "ne", "ni", "Tg", "E", "energy", "mobility", "diffusion"] - for col_idx, g in enumerate(coll_list): - header.append(str(coll_names[col_idx])) - - writer.writerow(header) - writer.writerows(data_csv) - """ - return async def push(self, interface, use_interp:bool): @@ -1028,19 +955,78 @@ def asnumpy(a): plt.close() return + +class pp(enum.IntEnum): + BTE_SETUP = 0 + BTE_FETCH = 1 + BTE_SOLVE = 2 + BTE_PUSH = 3 + TPS_SETUP = 4 + TPS_FETCH = 5 + TPS_SOLVE = 6 + TPS_PUSH = 7 + LAST = 8 + +profile_nn = ["bte_setup", "bte_fetch", "bte_solve", "bte_push", "tps_setup", "tps_fetch", "tps_solve", "tps_push", "last"] +profile_tt = [profile_t(profile_nn[i]) for i in range(int(pp.LAST))] + +def profile_stats(boltzmann:Boltzmann0D2VBactchedSolver, p_tt: profile_t, p_nn, fname, comm): + + Nx = boltzmann.param.n_grids * boltzmann.param.n_sub_clusters + Nv = (boltzmann.param.Nr + 1) * (boltzmann.param.l_max + 1) + + tt = list() + for i in range(len(p_tt)): + tt.append(min_mean_max(p_tt[i].seconds/p_tt[i].iter, comm)) - + header = [ "Nv", + "Nx", + "bte_setup_min", "bte_setup_mean", "bte_setup_max", + "bte_fetch_min", "bte_fetch_mean", "bte_fetch_max", + "bte_solve_min", "bte_solve_mean", "bte_solve_max", + "bte_push_min" , "bte_push_mean" , "bte_push_max", + + "tps_setup_min", "tps_setup_mean", "tps_setup_max", + "tps_fetch_min", "tps_fetch_mean", "tps_fetch_max", + "tps_solve_min", "tps_solve_mean", "tps_solve_max", + "tps_push_min" , "tps_push_mean" , "tps_push_max"] + + data = [ Nv, + Nx, + tt[pp.BTE_SETUP][0], tt[pp.BTE_SETUP][1], tt[pp.BTE_SETUP][2], + tt[pp.BTE_FETCH][0], tt[pp.BTE_FETCH][1], tt[pp.BTE_FETCH][2], + tt[pp.BTE_SOLVE][0], tt[pp.BTE_SOLVE][1], tt[pp.BTE_SOLVE][2], + tt[pp.BTE_PUSH][0] , tt[pp.BTE_PUSH][1] , tt[pp.BTE_PUSH][2] , + tt[pp.TPS_SETUP][0], tt[pp.TPS_SETUP][1], tt[pp.TPS_SETUP][2], + tt[pp.TPS_FETCH][0], tt[pp.TPS_FETCH][1], tt[pp.TPS_FETCH][2], + tt[pp.TPS_SOLVE][0], tt[pp.TPS_SOLVE][1], tt[pp.TPS_SOLVE][2], + tt[pp.TPS_PUSH][0] , tt[pp.TPS_PUSH][1] , tt[pp.TPS_PUSH][2] ] + + data_str= ["%.4E"%d for d in data] + if fname!="": + with open(fname, "a") as f: + f.write(",".join(header)+"\n") + f.write(",".join(data_str)+"\n") + f.close() + else: + print(",".join(header)) + print(",".join(data_str)) + if __name__=="__main__": comm = MPI.COMM_WORLD with Parla(): # TPS solver + profile_tt[pp.TPS_SETUP].start() + tps = libtps.Tps(comm) tps.parseCommandLineArgs(sys.argv) tps.parseInput() tps.chooseDevices() tps.chooseSolver() tps.initialize() + + profile_tt[pp.TPS_SETUP].stop() boltzmann = Boltzmann0D2VBactchedSolver(tps, comm) interface = libtps.Tps2Boltzmann(tps) @@ -1049,7 +1035,10 @@ def asnumpy(a): #coords = np.array(interface.HostReadSpatialCoordinates(), copy=False) tps.solveBegin() tps.push(interface) + + profile_tt[pp.BTE_SETUP].start() boltzmann.grid_setup(interface) + profile_tt[pp.BTE_SETUP].stop() @spawn(placement=cpu, vcus=0) async def __main__(): @@ -1058,7 +1047,7 @@ async def __main__(): await boltzmann.solve_init(bte_use_interp) xp = boltzmann.bte_solver.xp_module - max_iters = tps.getRequiredInput("cycle-avg-joule-coupled/max-iters") + max_iters = boltzmann.param.tps_bte_max_iter iter = 0 tt = 0#interface.currentTime() tau = (1/boltzmann.param.Efreq) @@ -1074,7 +1063,7 @@ async def __main__(): tps_sper_cycle = int(xp.ceil(tau/dt_tps)) bte_sper_cycle = int(xp.ceil(tau/dt_bte)) bte_max_cycles = int(boltzmann.param.cycles) - tps_max_cycles = 100 + tps_max_cycles = boltzmann.param.bte_solve_freq print("tps steps per cycle : ", tps_sper_cycle, "bte_steps per cycle", bte_sper_cycle) tps.solveStep() @@ -1087,26 +1076,32 @@ async def __main__(): interface.saveDataCollection(cycle=(iter//cycle_freq), time=iter) ########################## BTE solve ################################################## + profile_tt[pp.BTE_FETCH].start() await boltzmann.fetch(interface, use_interp=bte_use_interp) + profile_tt[pp.BTE_FETCH].stop() if (boltzmann.param.solver_type=="steady-state"): + profile_tt[pp.BTE_SOLVE].start() await boltzmann.solve() + profile_tt[pp.BTE_SOLVE].stop() + + + profile_tt[pp.BTE_PUSH].start() ts = TaskSpace("T") for grid_idx in boltzmann.active_grid_idx: @spawn(ts[grid_idx], placement=[gpu(gidx_to_device_map(grid_idx,n_grids))], vcus=0.0) def t1(): boltzmann.bte_solver.set_boltzmann_parameter(grid_idx, "u_avg", boltzmann.ff[grid_idx]) await ts - await boltzmann.push(interface, use_interp=bte_use_interp) + profile_tt[pp.BTE_PUSH].stop() - for grid_idx in boltzmann.active_grid_idx: - dev_id = gidx_to_device_map(grid_idx,n_grids) - with cp.cuda.Device(dev_id): - u_vec = boltzmann.bte_solver.get_boltzmann_parameter(grid_idx, "u_avg") - boltzmann.io_output_data(grid_idx, u_vec, plot_data=True, export_csv=True, fname=boltzmann.param.out_fname+"_grid_%02d"%(grid_idx)) - - + if boltzmann.param.export_csv ==1: + for grid_idx in boltzmann.active_grid_idx: + dev_id = gidx_to_device_map(grid_idx,n_grids) + with cp.cuda.Device(dev_id): + u_vec = boltzmann.bte_solver.get_boltzmann_parameter(grid_idx, "u_avg") + boltzmann.io_output_data(grid_idx, u_vec, plot_data=True, export_csv=True, fname=boltzmann.param.out_fname+"_grid_%02d"%(grid_idx)) else: assert boltzmann.param.solver_type == "transient", "unknown BTE solver type" """ @@ -1142,7 +1137,7 @@ def t1(): bte_v[grid_idx] = xp.copy(u0) await ts - p_t3 = min_mean_max(p_t2-p_t1, comm) + p_t3 = min_mean_max(profile_tt[pp.BTE_SOLVE].snap, comm) print("[BTE] step = %04d time = %.4E ||u1 - u0|| = %.4E ||u0 - u1|| / ||u0|| = %.4E --- runtime = %.4E (s) "%(bte_idx, tt_bte, max(abs_error), max(rel_error), p_t3[2])) if max(abs_error) < boltzmann.param.atol or max(rel_error)< boltzmann.param.rtol: @@ -1161,12 +1156,12 @@ def t1(): u_avg[grid_idx] += cycle_f1 * boltzmann.bte_solver.get_boltzmann_parameter(grid_idx, "u0") await ts - p_t1 = time() + profile_tt[pp.BTE_SOLVE].start() await boltzmann.solve_step(tt_bte, dt_bte) - p_t2 = time() + profile_tt[pp.BTE_SOLVE].stop() if(terminal_output_freq > 0 and bte_idx % terminal_output_freq ==0): - p_t3 = min_mean_max(p_t2-p_t1, comm) + p_t3 = min_mean_max(profile_tt[pp.BTE_SOLVE].snap, comm) print("[BTE] %04d simulation time = %.4E cycle step (min) = %.4E (s) step (mean) = %.4E (s) step (max) = %.4E (s)" % (bte_idx, tt_bte, p_t3[0], p_t3[1], p_t3[2])) ts = TaskSpace("T") @@ -1179,6 +1174,7 @@ def t1(): tt_bte += dt_bte + profile_tt[pp.BTE_PUSH].start() ts = TaskSpace("T") for grid_idx in boltzmann.active_grid_idx: @spawn(ts[grid_idx], placement=[gpu(gidx_to_device_map(grid_idx,n_grids))], vcus=0.0) @@ -1189,16 +1185,22 @@ def t1(): boltzmann.bte_solver.set_boltzmann_parameter(grid_idx, "u_avg", u_avg[grid_idx]) await ts await boltzmann.push(interface, use_interp=bte_use_interp) + profile_tt[pp.BTE_PUSH].stop() - for grid_idx in boltzmann.active_grid_idx: - dev_id = gidx_to_device_map(grid_idx,n_grids) - with cp.cuda.Device(dev_id): - u_vec = boltzmann.bte_solver.get_boltzmann_parameter(grid_idx, "u_avg") - boltzmann.io_output_data(grid_idx, u_vec, plot_data=True, export_csv=True, fname=boltzmann.param.out_fname+"_grid_%02d"%(grid_idx)) + if boltzmann.param.export_csv ==1: + for grid_idx in boltzmann.active_grid_idx: + dev_id = gidx_to_device_map(grid_idx,n_grids) + with cp.cuda.Device(dev_id): + u_vec = boltzmann.bte_solver.get_boltzmann_parameter(grid_idx, "u_avg") + boltzmann.io_output_data(grid_idx, u_vec, plot_data=True, export_csv=True, fname=boltzmann.param.out_fname+"_grid_%02d"%(grid_idx)) + ################### tps solve ###################################### + profile_tt[pp.TPS_FETCH].start() tps.fetch(interface) + profile_tt[pp.TPS_FETCH].stop() + tps_u = 0 tps_v = 0 tt_tps = 0 @@ -1207,7 +1209,6 @@ def t1(): p_t2 = 0 for tps_idx in range(tps_sper_cycle * tps_max_cycles + 1): if (tps_idx % tps_sper_cycle == 0): - tps.push(interface) nspecies = interface.Nspecies() heavy_temp = np.array(interface.HostRead(libtps.t2bIndex.HeavyTemperature), copy=False) @@ -1220,7 +1221,7 @@ def t1(): rel_error = abs_error / np.linalg.norm(tps_u, axis=1) tps_v = np.copy(tps_u) - p_t3 = min_mean_max(p_t2-p_t1, comm) + p_t3 = min_mean_max(profile_tt[pp.TPS_SOLVE].snap, comm) print("[TPS] step = %04d time = %.4E ||u1 - u0|| = %.4E ||u0 - u1|| / ||u0|| = %.4E -- runtime = %.4E (s)"%(tps_idx, tt_tps, np.max(abs_error), np.max(rel_error), p_t3[2])) # if (np.max(abs_error) < boltzmann.param.atol or np.max(rel_error) < max(1e-6,boltzmann.param.rtol)): # break @@ -1228,19 +1229,21 @@ def t1(): if (tps_idx == tps_sper_cycle * tps_max_cycles): break - p_t1 = time() + profile_tt[pp.TPS_SOLVE].start() tps.solveStep() - p_t2 = time() + profile_tt[pp.TPS_SOLVE].stop() if(terminal_output_freq > 0 and tps_idx % terminal_output_freq ==0): - p_t3 = min_mean_max(p_t2-p_t1, comm) + p_t3 = min_mean_max(profile_tt[pp.TPS_SOLVE].snap, comm) print("[TPS] %04d simulation time = %.4E cycle step (min) = %.4E (s) step (mean) = %.4E (s) step (max) = %.4E (s)" % (tps_idx,tt_tps, p_t3[0],p_t3[1],p_t3[2])) tt_tps +=dt_tps + profile_tt[pp.TPS_PUSH].start() tps.push(interface) + profile_tt[pp.TPS_PUSH].stop() tt += dt_tps * tps_idx iter+=1 - + profile_stats(boltzmann, profile_tt, profile_nn, boltzmann.param.out_fname+"_profile.csv" , comm) tps.solveEnd() sys.exit (tps.getStatus()) \ No newline at end of file From 8f012b8d92aaafd9c69e0076571762446da74794 Mon Sep 17 00:00:00 2001 From: milindasf Date: Fri, 15 Mar 2024 22:45:00 -0500 Subject: [PATCH 50/75] ibrun -np 2 debuging code, it seems to me the crash happens when launching CPU parla tasks in the async fetch --- src/tps-bte_0d3v.py | 531 +++++++++++++++++++++++++++++--------------- 1 file changed, 353 insertions(+), 178 deletions(-) diff --git a/src/tps-bte_0d3v.py b/src/tps-bte_0d3v.py index e165d5f17..0972275a7 100755 --- a/src/tps-bte_0d3v.py +++ b/src/tps-bte_0d3v.py @@ -1,7 +1,7 @@ #!/usr/bin/env python3 +from mpi4py import MPI import sys import os -from mpi4py import MPI import numpy as np import scipy.constants import csv @@ -13,6 +13,7 @@ import pandas as pd import scipy.interpolate import scipy.cluster + class profile_t: def __init__(self,name): self.name = name @@ -50,19 +51,6 @@ def reset(self): def min_mean_max(a, comm: MPI.Comm): return (comm.allreduce(a, MPI.MIN) , comm.allreduce(a, MPI.SUM)/comm.Get_size(), comm.allreduce(a, MPI.MAX)) -# try: -# df = pd.read_csv("ionization_rates.csv") -# Te = np.array(df["Te[K]"]) -# r_arr = np.array(df["Arr[m3/s]"]) -# r_csc = np.array(df["CSC_Maxwellian[m3/s]"]) -# r_arr = scipy.interpolate.interp1d(Te, r_arr,bounds_error=False, fill_value=0.0) -# r_csc = scipy.interpolate.interp1d(Te, r_csc,bounds_error=False, fill_value=0.0) -# print("ionization coefficient read from file ") -# except: -# print("ionization rate coefficient file not found!!") -# r_arr = lambda Te : 1.235e-13 * np.exp(-18.687 / np.abs(Te * scipy.constants.Boltzmann/scipy.constants.electron_volt)) -# r_csc = lambda Te : 1.235e-13 * np.exp(-18.687 / np.abs(Te * scipy.constants.Boltzmann/scipy.constants.electron_volt)) - # set path to C++ TPS library path = os.path.abspath(os.path.dirname(sys.argv[0])) sys.path.append(path + "/.libs") @@ -82,11 +70,6 @@ def min_mean_max(a, comm: MPI.Comm): sys.exit(0) -class pp(enum.IntEnum): - SETUP = 0 - SOLVE = 1 - LAST = 2 - class BoltzmannSolverParams(): sp_order = 3 # B-spline order in v-space spline_qpts = 5 # number of Gauss-Legendre quadrature points per knot interval @@ -135,7 +118,7 @@ class BoltzmannSolverParams(): n0 = 3.22e22 #[m^{-3}] rand_seed = 0 - + use_clstr_inp = True class TPSINDEX(): """ @@ -168,6 +151,10 @@ class Boltzmann0D2VBactchedSolver: def __init__(self, tps, comm): self.tps = tps self.comm : MPI.Comm = comm + + self.rankG = self.comm.Get_rank() + self.npesG = self.comm.Get_size() + self.param = BoltzmannSolverParams() # overide the default params, based on the config.ini file. self.__parse_config_file__(sys.argv[2]) @@ -175,16 +162,19 @@ def __init__(self, tps, comm): boltzmann_dir = self.param.output_dir isExist = os.path.exists(boltzmann_dir) if not isExist: - # Create a new directory because it does not exist os.makedirs(boltzmann_dir) - #print("directory %s is created!"%(dir_name)) num_gpus_per_node = 1 if self.param.use_gpu==1: num_gpus_per_node = cp.cuda.runtime.getDeviceCount() + self.num_gpus_per_node = num_gpus_per_node + + if self.rankG==0: + print("number of GPUs detected = %d "%(num_gpus_per_node), flush=True) + # how to map each grid to the GPU devices on the node - self.gidx_to_device_map = lambda gidx, num_grids : gidx % num_gpus_per_node + self.gidx_to_device_map = lambda gidx, num_grids : self.rankG % self.num_gpus_per_node #gidx % num_gpus_per_node return def __parse_config_file__(self, fname): @@ -193,7 +183,9 @@ def __parse_config_file__(self, fname): which overides the default BoltzmannSolverParams """ config = configparser.ConfigParser() - print("[Boltzmann] reading configure file given by : ", fname) + if self.rankG==0: + print("[Boltzmann] reading configure file given by : ", fname, flush=True) + config.read(fname) self.param.sp_order = int(config.get("boltzmannSolver", "sp_order").split("#")[0].strip()) @@ -241,11 +233,18 @@ def grid_setup(self, interface): Te = xp.array(interface.HostRead(libtps.t2bIndex.ElectronTemperature), copy=False) / self.param.ev_to_K # [eV] Tew = scipy.cluster.vq.whiten(Te) + Tecw0 = Tew[np.random.choice(Tew.shape[0], self.param.n_grids, replace=False)] Tecw = scipy.cluster.vq.kmeans(Tew, np.linspace(np.min(Tew), np.max(Tew), n_grids), iter=1000, thresh=1e-8, check_finite=False)[0] - Te_b = Tecw * np.std(Te, axis=0) + + Tecw0[0:len(Tecw)] = Tecw[:] + Tecw = Tecw0 + assert len(Tecw0) == self.param.n_grids + + Te_b = xp.sort(Tecw * np.std(Te, axis=0)) dist_mat = xp.zeros((len(Te),n_grids)) - print("K-means Te clusters ", Te_b) + + print("rank [%d/%d] : K-means Te clusters "%(self.rankG, self.npesG), Te_b, flush=True) for i in range(self.param.n_grids): dist_mat[:,i] = xp.abs(Tew-Tecw[i]) @@ -254,12 +253,12 @@ def grid_setup(self, interface): for b_idx in range(self.param.n_grids): grid_idx_to_spatial_pts_map.append(xp.argwhere(membership==b_idx)[:,0]) - np.save("%s_gidx_to_pidx.npy"%(self.param.out_fname), np.array(grid_idx_to_spatial_pts_map, dtype=object), allow_pickle=True) + #np.save("%s_gidx_to_pidx_rank_%08d.npy"%(self.param.out_fname, self.rankG), np.array(grid_idx_to_spatial_pts_map, dtype=object), allow_pickle=True) self.grid_idx_to_npts = xp.array([len(a) for a in grid_idx_to_spatial_pts_map], dtype=xp.int32) self.grid_idx_to_spatial_idx_map = grid_idx_to_spatial_pts_map - xp.sum(self.grid_idx_to_npts) == len(Te), "[Error] : TPS spatial points for v-space grid assignment is inconsitant" + xp.sum(self.grid_idx_to_npts) == len(Te), "[Error] : TPS spatial points for v-space grid assignment is inconsitant" lm_modes = [[[l,0] for l in range(self.param.l_max+1)] for grid_idx in range(self.param.n_grids)] nr = xp.ones(self.param.n_grids, dtype=np.int32) * self.param.Nr Te = xp.array([Te_b[b_idx] for b_idx in range(self.param.n_grids)]) # xp.ones(self.param.n_grids) * self.param.Te @@ -267,12 +266,8 @@ def grid_setup(self, interface): ev_max = (6 * vth / self.param.c_gamma)**2 self.bte_solver = BoltzmannSolver(self.param, ev_max , Te , nr, lm_modes, self.param.n_grids, self.param.collisions) - if self.param.verbose==1: - print("grid energy max (eV) \n", ev_max, flush = True) - # compute BTE operators for grid_idx in range(self.param.n_grids): - print("setting up grid %d"%(grid_idx), flush = True) self.bte_solver.assemble_operators(grid_idx) n_grids = self.param.n_grids @@ -280,11 +275,12 @@ def grid_setup(self, interface): for grid_idx in range(n_grids): assert self.grid_idx_to_npts[grid_idx] > 0 - - print("setting initial Maxwellian at %.4E eV" %(self.bte_solver._par_ap_Te[grid_idx]), flush=True) - self.bte_solver.set_boltzmann_parameter(grid_idx, "f_mw", self.bte_solver.initialize(grid_idx, self.grid_idx_to_npts[grid_idx], "maxwellian")) - + if(self.param.use_clstr_inp==True): + self.bte_solver.set_boltzmann_parameter(grid_idx, "f_mw", self.bte_solver.initialize(grid_idx, self.param.n_sub_clusters , "maxwellian")) + else: + self.bte_solver.set_boltzmann_parameter(grid_idx, "f_mw", self.bte_solver.initialize(grid_idx, self.grid_idx_to_npts[grid_idx], "maxwellian")) + self.comm.Barrier() # active_grid_idx=list() # for grid_idx in range(n_grids): # spec_sp = self.bte_solver._op_spec_sp[grid_idx] @@ -326,7 +322,7 @@ def solve_wo_parla(self): self.qoi[grid_idx] = qoi self.ff [grid_idx] = ff except: - print("solver failed for v-space gird no %d"%(grid_idx)) + print("solver failed for v-space gird no %d"%(grid_idx), flush=True) sys.exit(-1) else: with xp.cuda.Device(dev_id): @@ -336,11 +332,11 @@ def solve_wo_parla(self): self.qoi[grid_idx] = qoi self.ff [grid_idx] = ff except: - print("solver failed for v-space gird no %d"%(grid_idx)) + print("solver failed for v-space gird no %d"%(grid_idx), flush=True) sys.exit(-1) t2 = time() - print("time for boltzmann v-space solve = %.4E"%(t2- t1)) + print("time for boltzmann v-space solve = %.4E"%(t2- t1), flush=True) if (self.param.export_csv ==1 or self.param.plot_data==1): for grid_idx in range(n_grids): @@ -420,7 +416,8 @@ def asnumpy(a): return - async def fetch(self, interface, use_interp:bool): + def fetch(self, interface): + use_interp = self.param.use_clstr_inp gidx_to_pidx = self.grid_idx_to_spatial_idx_map heavy_temp = np.array(interface.HostRead(libtps.t2bIndex.HeavyTemperature), copy=False) tps_npts = len(heavy_temp) @@ -429,21 +426,10 @@ async def fetch(self, interface, use_interp:bool): electron_temp = np.array(interface.HostRead(libtps.t2bIndex.ElectronTemperature), copy=False) efield = np.array(interface.HostRead(libtps.t2bIndex.ElectricField), copy=False).reshape((2, tps_npts)) species_densities = np.array(interface.HostRead(libtps.t2bIndex.SpeciesDensities), copy=False).reshape(nspecies, tps_npts) - cs_avail_species = self.bte_solver._avail_species - n0 = np.sum(species_densities, axis=0) - species_densities[TPSINDEX.ELE_IDX] ns_by_n0 = np.concatenate([species_densities[TPSINDEX.MOLE_FRAC_IDX[i]]/n0 for i in range(len(cs_avail_species))]).reshape((len(cs_avail_species), tps_npts)) - # np.save("n0.npy", species_densities[TPSINDEX.NEU_IDX]) - # np.save("ne.npy", species_densities[TPSINDEX.ELE_IDX]) - # np.save("ni.npy", species_densities[TPSINDEX.ION_IDX]) - - # np.save("Te.npy", heavy_temp) - # np.save("Tg.npy", heavy_temp) - # np.save("E.npy" , np.sqrt(efield[0]**2 + efield[1]**2)) - # sys.exit(-1) - n_grids = self.param.n_grids use_gpu = self.param.use_gpu @@ -477,9 +463,7 @@ def normalize(obs): std_obs[std_obs == 0.0] = 1.0 return obs/std_obs, std_obs - ts = TaskSpace("T") for grid_idx in self.active_grid_idx: - @spawn(ts[grid_idx], placement=[cpu], vcus=0.0) def t1(): dev_id = self.gidx_to_device_map(grid_idx, n_grids) m = m_bte[gidx_to_pidx[grid_idx]] @@ -501,26 +485,6 @@ def t1(): for c_idx in range(n_sub_clusters): self.sub_cluster_idx_to_pidx[grid_idx][c_idx] = np.argwhere(membership_m==c_idx)[:,0] - # idx = self.sub_cluster_idx_to_pidx[grid_idx][c_idx] - # abs_err = np.linalg.norm(dist_mat[idx, c_idx] - np.linalg.norm(mw[idx] - mcw[c_idx], axis=1)) - # print(grid_idx, c_idx, abs_err) - - # dw_mat = np.zeros(self.param.n_sub_clusters) - # print(grid_idx,"\n" , mc) - # for c_idx in range(n_sub_clusters): - # idx = self.sub_cluster_idx_to_pidx[grid_idx][c_idx] - # if len(idx>0): - # dw_mat[c_idx] = np.max(np.linalg.norm(1 - m[idx] / mc[c_idx], axis = 1)) - - # plt.figure(figsize=(8, 8), dpi=300) - # plt.semilogy(np.array(range(self.param.n_sub_clusters)), dw_mat) - # plt.xlabel(r"cluster id") - # plt.ylabel(r"relative error") - # plt.grid(visible=True) - # plt.savefig("%s_grid_idx_%04d.png"%(self.param.out_fname, grid_idx)) - # plt.close() - - n0 = np.ones(mc.shape[0]) * self.param.n0 Ex = mc[: , 0] * self.param.n0 * self.param.Td_fac Ey = mc[: , 1] * self.param.n0 * self.param.Td_fac @@ -531,7 +495,7 @@ def t1(): EMag = np.sqrt(Ex**2 + Ey**2) if self.param.verbose == 1 : - print("Boltzmann solver inputs for v-space grid id %d"%(grid_idx)) + print("rank = %d Boltzmann solver inputs for v-space grid id %d"%(self.rankG, grid_idx)) print("Efreq = %.4E [1/s]" %(self.param.Efreq)) print("n_pts = %d" % self.grid_idx_to_npts[grid_idx]) @@ -575,13 +539,10 @@ def t1(): # cp.save(self.param.out_fname + "_E_%02d.npy"%(grid_idx) , EMag , grid_idx) return - - await ts + t1() else: - ts = TaskSpace("T") for grid_idx in self.active_grid_idx: - @spawn(ts[grid_idx], placement=[cpu], vcus=0.0) def t1(): bte_idx = gidx_to_pidx[grid_idx] dev_id = self.gidx_to_device_map(grid_idx, n_grids) @@ -600,7 +561,7 @@ def t1(): EMag = np.sqrt(Ex**2 + Ey**2) if self.param.verbose == 1 : - print("Boltzmann solver inputs for v-space grid id %d"%(grid_idx)) + print("rank = %d Boltzmann solver inputs for v-space grid id %d"%(self.rankG, grid_idx)) print("Efreq = %.4E [1/s]" %(self.param.Efreq)) print("n_pts = %d" % self.grid_idx_to_npts[grid_idx]) @@ -613,6 +574,209 @@ def t1(): print("ns/n0 (min) = %.12E \t ns/n0(max) = %.12E "%(np.min(ns_by_n0[i]) , np.max(ns_by_n0[i]))) + if (use_gpu == 1): + with cp.cuda.Device(dev_id): + n0 = cp.array(n0) + ne = cp.array(ne) + ni = cp.array(ni) + Ex = cp.array(Ex) + Ey = cp.array(Ey) + Tg = cp.array(Tg) + EMag = cp.sqrt(Ex**2 + Ey**2) + ns_by_n0 = cp.array(ns_by_n0) + + self.bte_solver.set_boltzmann_parameter(grid_idx, "ns_by_n0", ns_by_n0) + self.bte_solver.set_boltzmann_parameter(grid_idx, "n0" , n0) + self.bte_solver.set_boltzmann_parameter(grid_idx, "ne" , ne) + self.bte_solver.set_boltzmann_parameter(grid_idx, "ni" , ni) + self.bte_solver.set_boltzmann_parameter(grid_idx, "Tg" , Tg) + self.bte_solver.set_boltzmann_parameter(grid_idx, "eRe", Ex) + self.bte_solver.set_boltzmann_parameter(grid_idx, "eIm", Ey) + self.bte_solver.set_boltzmann_parameter(grid_idx, "E" , EMag) + + # cp.save(self.param.out_fname + "_ns_by_n0_%02d.npy"%(grid_idx) , ns_by_n0 , grid_idx) + # cp.save(self.param.out_fname + "_n0_%02d.npy"%(grid_idx) , n0 , grid_idx) + # cp.save(self.param.out_fname + "_ne_%02d.npy"%(grid_idx) , ne , grid_idx) + # cp.save(self.param.out_fname + "_ni_%02d.npy"%(grid_idx) , ni , grid_idx) + + # cp.save(self.param.out_fname + "_Tg_%02d.npy"%(grid_idx) , Tg , grid_idx) + # cp.save(self.param.out_fname + "_eRe_%02d.npy"%(grid_idx) , Ex , grid_idx) + # cp.save(self.param.out_fname + "_eIm_%02d.npy"%(grid_idx) , Ey , grid_idx) + # cp.save(self.param.out_fname + "_E_%02d.npy"%(grid_idx) , EMag , grid_idx) + + return + t1() + return + + async def fetch_asnyc(self, interface): + use_interp = self.param.use_clstr_inp + gidx_to_pidx = self.grid_idx_to_spatial_idx_map + heavy_temp = np.array(interface.HostRead(libtps.t2bIndex.HeavyTemperature), copy=False) + tps_npts = len(heavy_temp) + self.tps_npts = tps_npts + nspecies = interface.Nspecies() + electron_temp = np.array(interface.HostRead(libtps.t2bIndex.ElectronTemperature), copy=False) + efield = np.array(interface.HostRead(libtps.t2bIndex.ElectricField), copy=False).reshape((2, tps_npts)) + species_densities = np.array(interface.HostRead(libtps.t2bIndex.SpeciesDensities), copy=False).reshape(nspecies, tps_npts) + + cs_avail_species = self.bte_solver._avail_species + + n0 = np.sum(species_densities, axis=0) - species_densities[TPSINDEX.ELE_IDX] + ns_by_n0 = np.concatenate([species_densities[TPSINDEX.MOLE_FRAC_IDX[i]]/n0 for i in range(len(cs_avail_species))]).reshape((len(cs_avail_species), tps_npts)) + + n_grids = self.param.n_grids + use_gpu = self.param.use_gpu + + Tg = heavy_temp + + Ex = efield[0] + Ey = efield[1] + + ExbyN = Ex/n0/self.param.Td_fac + EybyN = Ey/n0/self.param.Td_fac + + Ex = ExbyN * self.param.n0 * self.param.Td_fac + Ey = EybyN * self.param.n0 * self.param.Td_fac + + ion_deg = species_densities[TPSINDEX.ELE_IDX]/n0 + ion_deg[ion_deg<=0] = 1e-16 + ns_by_n0[ns_by_n0<=0] = 0 + m_bte = np.concatenate([ExbyN.reshape((-1, 1)), EybyN.reshape((-1, 1)), Tg.reshape((-1, 1)), ion_deg.reshape((-1, 1))] + [ ns_by_n0[i].reshape((-1, 1)) for i in range(ns_by_n0.shape[0])], axis=1) + + self.sub_cluster_idx_to_pidx = None + self.sub_cluster_c = None + gidx_to_device_map = self.gidx_to_device_map + + if (use_interp == True): + n_sub_clusters = self.param.n_sub_clusters + self.sub_cluster_idx_to_pidx = [[None for i in range(n_sub_clusters)] for i in range(self.param.n_grids)] + self.sub_cluster_c = [None for i in range(self.param.n_grids)] + + def normalize(obs): + std_obs = np.std(obs, axis=0) + std_obs[std_obs == 0.0] = 1.0 + return obs/std_obs, std_obs + + ts = TaskSpace("T") + for grid_idx in self.active_grid_idx: + @spawn(ts[grid_idx], placement=[cpu], vcus=0.0) + def t1(): + dev_id = self.gidx_to_device_map(grid_idx, n_grids) + m = m_bte[gidx_to_pidx[grid_idx]] + mw , mw_std = normalize(m) + + # to repoduce clusters + np.random.seed(self.param.rand_seed) + + mcw0 = mw[np.random.choice(mw.shape[0], self.param.n_sub_clusters, replace=False)] + mcw = scipy.cluster.vq.kmeans(mw, mcw0, iter=1000, thresh=1e-8, check_finite=False)[0] + mcw0[0:mcw.shape[0], :] = mcw[:,:] + mcw = mcw0 + + assert mcw.shape[0] == self.param.n_sub_clusters + + mc = mcw * mw_std + dist_mat = np.array([np.linalg.norm(mw - mcw[i], axis=1) for i in range(n_sub_clusters)]).T + membership_m = np.argmin(dist_mat, axis=1) + self.sub_cluster_c[grid_idx] = mc + + for c_idx in range(n_sub_clusters): + self.sub_cluster_idx_to_pidx[grid_idx][c_idx] = np.argwhere(membership_m==c_idx)[:,0] + + + + n0 = np.ones(mc.shape[0]) * self.param.n0 + Ex = mc[: , 0] * self.param.n0 * self.param.Td_fac + Ey = mc[: , 1] * self.param.n0 * self.param.Td_fac + Tg = mc[: , 2] + ne = mc[: , 3] * self.param.n0 + ni = mc[: , 3] * self.param.n0 + ns_by_n0 = np.transpose(mc[: , 4:]) + EMag = np.sqrt(Ex**2 + Ey**2) + + if self.param.verbose == 1 : + print("rank [%d/%d] Boltzmann solver inputs for v-space grid id %d"%(self.rankG, self.npesG, grid_idx), flush=True) + print("Efreq = %.4E [1/s]" %(self.param.Efreq) , flush=True) + print("n_pts = %d" % self.grid_idx_to_npts[grid_idx], flush=True) + + print("n0 (min) = %.12E [1/m^3] \t n0 (max) = %.12E [1/m^3] "%(np.min(n0) , np.max(n0)) , flush=True) + print("Ex/n0 (min) = %.12E [Td] \t Ex/n0(max) = %.12E [Td] "%(np.min(ExbyN) , np.max(ExbyN)), flush=True) + print("Ey/n0 (min) = %.12E [Td] \t Ey/n0(max) = %.12E [Td] "%(np.min(EybyN) , np.max(EybyN)), flush=True) + print("Tg (min) = %.12E [K] \t Tg (max) = %.12E [K] "%(np.min(Tg) , np.max(Tg)) , flush=True) + print("ne (min) = %.12E [1/m^3] \t ne (max) = %.12E [1/m^3] "%(np.min(ne) , np.max(ne)) , flush=True) + + for i in range(ns_by_n0.shape[0]): + print("[%d] ns/n0 (min) = %.12E \t ns/n0(max) = %.12E "%(i, np.min(ns_by_n0[i]) , np.max(ns_by_n0[i])), flush=True) + + if (use_gpu==1): + with cp.cuda.Device(dev_id): + n0 = cp.array(n0) + Ex = cp.array(Ex) + Ey = cp.array(Ey) + Tg = cp.array(Tg) + ne = cp.array(ne) + ni = cp.array(ni) + EMag = cp.sqrt(Ex**2 + Ey**2) + ns_by_n0 = cp.array(ns_by_n0) + + self.bte_solver.set_boltzmann_parameter(grid_idx, "ns_by_n0", ns_by_n0) + self.bte_solver.set_boltzmann_parameter(grid_idx, "n0" , n0) + self.bte_solver.set_boltzmann_parameter(grid_idx, "ne" , ne) + self.bte_solver.set_boltzmann_parameter(grid_idx, "ni" , ni) + self.bte_solver.set_boltzmann_parameter(grid_idx, "Tg" , Tg) + self.bte_solver.set_boltzmann_parameter(grid_idx, "eRe", Ex) + self.bte_solver.set_boltzmann_parameter(grid_idx, "eIm", Ey) + self.bte_solver.set_boltzmann_parameter(grid_idx, "E" , EMag) + + # cp.save(self.param.out_fname + "_ns_by_n0_%02d.npy"%(grid_idx) , ns_by_n0 , grid_idx) + # cp.save(self.param.out_fname + "_n0_%02d.npy"%(grid_idx) , n0 , grid_idx) + # cp.save(self.param.out_fname + "_ne_%02d.npy"%(grid_idx) , ne , grid_idx) + # cp.save(self.param.out_fname + "_ni_%02d.npy"%(grid_idx) , ni , grid_idx) + + # cp.save(self.param.out_fname + "_Tg_%02d.npy"%(grid_idx) , Tg , grid_idx) + # cp.save(self.param.out_fname + "_eRe_%02d.npy"%(grid_idx) , Ex , grid_idx) + # cp.save(self.param.out_fname + "_eIm_%02d.npy"%(grid_idx) , Ey , grid_idx) + # cp.save(self.param.out_fname + "_E_%02d.npy"%(grid_idx) , EMag , grid_idx) + + return + + await ts + + else: + ts = TaskSpace("T") + for grid_idx in self.active_grid_idx: + @spawn(ts[grid_idx], placement=[cpu], vcus=0.0) + def t1(): + bte_idx = gidx_to_pidx[grid_idx] + dev_id = self.gidx_to_device_map(grid_idx, n_grids) + + mc = m_bte[bte_idx] + + n0 = np.ones(mc.shape[0]) * self.param.n0 + Ex = mc[: , 0] * self.param.n0 * self.param.Td_fac + Ey = mc[: , 1] * self.param.n0 * self.param.Td_fac + + Tg = mc[: , 2] + ne = mc[: , 3] * self.param.n0 + ni = mc[: , 3] * self.param.n0 + ns_by_n0 = np.transpose(mc[: , 4:]) + + EMag = np.sqrt(Ex**2 + Ey**2) + + if self.param.verbose == 1 : + print("rank [%d/%d] Boltzmann solver inputs for v-space grid id %d"%(self.rankG, self.npesG, grid_idx), flush=True) + print("Efreq = %.4E [1/s]" %(self.param.Efreq) , flush=True) + print("n_pts = %d" % self.grid_idx_to_npts[grid_idx], flush=True) + + print("Ex/n0 (min) = %.12E [Td] \t Ex/n0(max) = %.12E [Td] "%(np.min(ExbyN), np.max(ExbyN)), flush=True) + print("Ey/n0 (min) = %.12E [Td] \t Ey/n0(max) = %.12E [Td] "%(np.min(EybyN), np.max(EybyN)), flush=True) + print("Tg (min) = %.12E [K] \t Tg (max) = %.12E [K] "%(np.min(Tg) , np.max(Tg)) , flush=True) + print("ne (min) = %.12E [1/m^3] \t ne (max) = %.12E [1/m^3] "%(np.min(ne) , np.max(ne)) , flush=True) + + for i in range(ns_by_n0.shape[0]): + print("ns/n0 (min) = %.12E \t ns/n0(max) = %.12E "%(np.min(ns_by_n0[i]) , np.max(ns_by_n0[i])), flush=True) + + if (use_gpu == 1): with cp.cuda.Device(dev_id): n0 = cp.array(n0) @@ -647,7 +811,7 @@ def t1(): await ts return - async def solve_init(self, use_interp:bool): + async def solve_init_async(self): rank = self.comm.Get_rank() npes = self.comm.Get_size() n_grids = self.param.n_grids @@ -658,7 +822,7 @@ async def solve_init(self, use_interp:bool): @spawn(ts[grid_idx], placement=[cpu], vcus=0.0) def t1(): dev_id = gidx_to_device_map(grid_idx, n_grids) - print("[%d/%d] setting grid %d to device %d"%(rank, npes, grid_idx, dev_id)) + print("rank [%d/%d] setting grid %d to device %d"%(rank, npes, grid_idx, dev_id), flush=True) self.bte_solver.host_to_device_setup(dev_id, grid_idx) await ts @@ -666,16 +830,14 @@ def t1(): def ts_op_setup(grid_idx): xp = self.xp_module f_mw = self.bte_solver.get_boltzmann_parameter(grid_idx, "f_mw") - - if (use_interp==True): - n_pts = self.param.n_sub_clusters - else: - n_pts = f_mw.shape[1] - + n_pts = f_mw.shape[1] Qmat = self.bte_solver._op_qmat[grid_idx] INr = xp.eye(Qmat.shape[1]) self.bte_solver._op_imat_vx[grid_idx] = xp.einsum("i,jk->ijk",xp.ones(n_pts), INr) + if self.param.use_clstr_inp==True: + assert n_pts == self.param.n_sub_clusters + if(self.param.use_gpu==1): self.xp_module = cp @@ -694,12 +856,7 @@ def t1(): f_mw = f_mw/cp.dot(mm_op, f_mw) f_mw = cp.dot(qA.T, f_mw) - if (use_interp==True): - self.bte_solver.set_boltzmann_parameter(grid_idx, "u0", cp.copy(f_mw[: , 0:self.param.n_sub_clusters])) - else: - self.bte_solver.set_boltzmann_parameter(grid_idx, "u0", cp.copy(f_mw)) - - + self.bte_solver.set_boltzmann_parameter(grid_idx, "u0", cp.copy(f_mw)) await ts else: self.xp_module = np @@ -716,22 +873,18 @@ def t1(): f_mw = self.bte_solver.get_boltzmann_parameter(grid_idx, "f_mw") f_mw = f_mw/np.dot(mm_op, f_mw) f_mw = np.dot(qA.T, f_mw) + self.bte_solver.set_boltzmann_parameter(grid_idx, "u0", np.copy(f_mw)) - if (use_interp==True): - self.bte_solver.set_boltzmann_parameter(grid_idx, "u0", np.copy(f_mw[: , 0:self.param.n_sub_clusters])) - else: - self.bte_solver.set_boltzmann_parameter(grid_idx, "u0", np.copy(f_mw)) - await ts return - async def solve_step(self, time, delta_t): + async def solve_step_async(self, time, delta_t): """ perform a single timestep in 0d-BTE """ - rank = self.comm.Get_rank() - npes = self.comm.Get_size() + rank = self.rankG + npes = self.npesG n_grids = self.param.n_grids gidx_to_device_map = self.gidx_to_device_map @@ -755,12 +908,12 @@ def t1(): return - async def solve(self): + async def solve_async(self): """ Can be used to compute steady-state or cycle averaged BTE solutions """ - rank = self.comm.Get_rank() - npes = self.comm.Get_size() + rank = self.rankG + npes = self.npesG xp = self.xp_module csv_write = self.param.export_csv plot_data = self.param.plot_data @@ -774,6 +927,7 @@ async def solve(self): self.qoi = [None for grid_idx in range(self.param.n_grids)] self.ff = [None for grid_idx in range(self.param.n_grids)] num_gpus = len(gpu) + assert num_gpus == self.num_gpus_per_node, "CuPy and Parla number of GPUs per node does not match %d vs. %d"%(num_gpus, self.num_gpus_per_node) coll_list = self.bte_solver.get_collision_list() coll_names = self.bte_solver.get_collision_names() @@ -790,23 +944,24 @@ async def solve(self): @spawn(ts[grid_idx], placement=[parla_placement[grid_idx]], vcus=0.0) def t1(): try: - print("[Boltzmann] %d / %d launching grid %d on %s"%(rank, npes, grid_idx, parla_placement[grid_idx])) + print("rank [%d/%d] BTE launching grid %d on %s"%(rank, npes, grid_idx, parla_placement[grid_idx]), flush=True) f0 = self.bte_solver.get_boltzmann_parameter(grid_idx, "u0") ff , qoi = self.bte_solver.solve(grid_idx, f0, self.param.atol, self.param.rtol, self.param.max_iter, self.param.solver_type) self.ff[grid_idx] = ff self.qoi[grid_idx] = qoi except: - print("solver failed for v-space gird no %d"%(grid_idx)) + print("rank [%d/%d] solver failed for v-space gird no %d"%(self.rankG, self.npesG, grid_idx), flush=True) sys.exit(-1) await ts return - async def push(self, interface, use_interp:bool): + async def push_async(self, interface): xp = self.xp_module n_grids = self.param.n_grids gidx_to_device_map = self.gidx_to_device_map gidx_to_pidx_map = self.grid_idx_to_spatial_idx_map + use_interp = self.param.use_clstr_inp heavy_temp = np.array(interface.HostRead(libtps.t2bIndex.HeavyTemperature), copy=False) tps_npts = len(heavy_temp) @@ -821,10 +976,10 @@ async def push(self, interface, use_interp:bool): for grid_idx in self.active_grid_idx: @spawn(ts[grid_idx], placement=[gpu(gidx_to_device_map(grid_idx,n_grids))], vcus=0.0) def t1(): - qA = boltzmann.bte_solver._op_diag_dg[grid_idx] - u0 = boltzmann.bte_solver.get_boltzmann_parameter(grid_idx, "u_avg") - h_curr = boltzmann.bte_solver.normalized_distribution(grid_idx, u0) - qoi = boltzmann.bte_solver.compute_QoIs(grid_idx, h_curr, effective_mobility=False) + qA = self.bte_solver._op_diag_dg[grid_idx] + u0 = self.bte_solver.get_boltzmann_parameter(grid_idx, "u_avg") + h_curr = self.bte_solver.normalized_distribution(grid_idx, u0) + qoi = self.bte_solver.compute_QoIs(grid_idx, h_curr, effective_mobility=False) rr_cpu = xp.asnumpy(qoi["rates"]) rr_interp = np.zeros(len(gidx_to_pidx_map[grid_idx])) @@ -844,10 +999,10 @@ def t1(): for grid_idx in self.active_grid_idx: @spawn(ts[grid_idx], placement=[gpu(gidx_to_device_map(grid_idx,n_grids))], vcus=0.0) def t1(): - qA = boltzmann.bte_solver._op_diag_dg[grid_idx] - u0 = boltzmann.bte_solver.get_boltzmann_parameter(grid_idx, "u_avg") - h_curr = boltzmann.bte_solver.normalized_distribution(grid_idx, u0) - qoi = boltzmann.bte_solver.compute_QoIs(grid_idx, h_curr, effective_mobility=False) + qA = self.bte_solver._op_diag_dg[grid_idx] + u0 = self.bte_solver.get_boltzmann_parameter(grid_idx, "u_avg") + h_curr = self.bte_solver.normalized_distribution(grid_idx, u0) + qoi = self.bte_solver.compute_QoIs(grid_idx, h_curr, effective_mobility=False) rr_cpu = xp.asnumpy(qoi["rates"]) for r_idx in range(n_reactions): @@ -865,9 +1020,9 @@ def io_output_data(self, grid_idx, u0, plot_data:bool, export_csv:bool, fname:st dev_id = gidx_to_device_map(grid_idx, n_grids) qA = self.bte_solver._op_diag_dg[grid_idx] h_curr = xp.dot(qA, u0) - h_curr = boltzmann.bte_solver.normalized_distribution(grid_idx, h_curr) + h_curr = self.bte_solver.normalized_distribution(grid_idx, h_curr) ff = h_curr - qoi = boltzmann.bte_solver.compute_QoIs(grid_idx, h_curr, effective_mobility=False) + qoi = self.bte_solver.compute_QoIs(grid_idx, h_curr, effective_mobility=False) coll_list = self.bte_solver.get_collision_list() coll_names = self.bte_solver.get_collision_names() cs_data = self.bte_solver.get_cross_section_data() @@ -1011,45 +1166,54 @@ def profile_stats(boltzmann:Boltzmann0D2VBactchedSolver, p_tt: profile_t, p_nn, else: print(",".join(header)) print(",".join(data_str)) - -if __name__=="__main__": - comm = MPI.COMM_WORLD - - with Parla(): - # TPS solver - profile_tt[pp.TPS_SETUP].start() - - tps = libtps.Tps(comm) - tps.parseCommandLineArgs(sys.argv) - tps.parseInput() - tps.chooseDevices() - tps.chooseSolver() - tps.initialize() - - profile_tt[pp.TPS_SETUP].stop() - boltzmann = Boltzmann0D2VBactchedSolver(tps, comm) - interface = libtps.Tps2Boltzmann(tps) - tps.initInterface(interface) - #coords = np.array(interface.HostReadSpatialCoordinates(), copy=False) - tps.solveBegin() - tps.push(interface) - - profile_tt[pp.BTE_SETUP].start() - boltzmann.grid_setup(interface) - profile_tt[pp.BTE_SETUP].stop() - +def driver_w_parla(comm): + + rank = comm.Get_rank() + npes = comm.Get_size() + + dev = rank % 3 + + with Parla(): @spawn(placement=cpu, vcus=0) async def __main__(): - bte_use_interp = True - await boltzmann.solve_init(bte_use_interp) - xp = boltzmann.bte_solver.xp_module - - max_iters = boltzmann.param.tps_bte_max_iter + # TPS solver + profile_tt[pp.TPS_SETUP].start() + tps = libtps.Tps(comm) + tps.parseCommandLineArgs(sys.argv) + tps.parseInput() + tps.chooseDevices() + tps.chooseSolver() + tps.initialize() + profile_tt[pp.TPS_SETUP].stop() + + interface = libtps.Tps2Boltzmann(tps) + tps.initInterface(interface) + tps.solveBegin() + # --- first TPS step is needed to initialize the EM fields + tps.solveStep() + tps.push(interface) + + # with cp.cuda.Device(dev): + # cp.cuda.runtime.deviceSynchronize() + # comm.Barrier() + + boltzmann = Boltzmann0D2VBactchedSolver(tps, comm) + rank = boltzmann.comm.Get_rank() + npes = boltzmann.comm.Get_size() + + profile_tt[pp.BTE_SETUP].start() + boltzmann.grid_setup(interface) + profile_tt[pp.BTE_SETUP].stop() + + # await boltzmann.solve_init_async() + # xp = boltzmann.bte_solver.xp_module + xp = cp + max_iters = 1#boltzmann.param.tps_bte_max_iter iter = 0 - tt = 0#interface.currentTime() + tt = 0 tau = (1/boltzmann.param.Efreq) dt_tps = interface.timeStep() dt_bte = boltzmann.param.dt * tau @@ -1065,26 +1229,30 @@ async def __main__(): bte_max_cycles = int(boltzmann.param.cycles) tps_max_cycles = boltzmann.param.bte_solve_freq - print("tps steps per cycle : ", tps_sper_cycle, "bte_steps per cycle", bte_sper_cycle) - tps.solveStep() - tps.push(interface) - p_t1 = 0 - p_t2 = 0 + if (boltzmann.rankG==0): + print("tps steps per cycle : ", tps_sper_cycle, "bte_steps per cycle", bte_sper_cycle, flush=True) + while (iter 0 and bte_idx % terminal_output_freq ==0): @@ -1184,7 +1349,7 @@ def t1(): u_avg[grid_idx] = xp.dot(qA, u_avg[grid_idx]) boltzmann.bte_solver.set_boltzmann_parameter(grid_idx, "u_avg", u_avg[grid_idx]) await ts - await boltzmann.push(interface, use_interp=bte_use_interp) + await boltzmann.push_async(interface) profile_tt[pp.BTE_PUSH].stop() if boltzmann.param.export_csv ==1: @@ -1192,10 +1357,14 @@ def t1(): dev_id = gidx_to_device_map(grid_idx,n_grids) with cp.cuda.Device(dev_id): u_vec = boltzmann.bte_solver.get_boltzmann_parameter(grid_idx, "u_avg") - boltzmann.io_output_data(grid_idx, u_vec, plot_data=True, export_csv=True, fname=boltzmann.param.out_fname+"_grid_%02d"%(grid_idx)) + boltzmann.io_output_data(grid_idx, u_vec, plot_data=True, export_csv=True, fname=boltzmann.param.out_fname+"_grid_%02d_rank_%d_npes_%d"%(grid_idx, rank, npes)) + + with cp.cuda.Device(dev): + cp.cuda.runtime.deviceSynchronize() + comm.Barrier() + """ + - - ################### tps solve ###################################### profile_tt[pp.TPS_FETCH].start() tps.fetch(interface) @@ -1204,9 +1373,6 @@ def t1(): tps_u = 0 tps_v = 0 tt_tps = 0 - - p_t1 = 0 - p_t2 = 0 for tps_idx in range(tps_sper_cycle * tps_max_cycles + 1): if (tps_idx % tps_sper_cycle == 0): tps.push(interface) @@ -1222,7 +1388,7 @@ def t1(): tps_v = np.copy(tps_u) p_t3 = min_mean_max(profile_tt[pp.TPS_SOLVE].snap, comm) - print("[TPS] step = %04d time = %.4E ||u1 - u0|| = %.4E ||u0 - u1|| / ||u0|| = %.4E -- runtime = %.4E (s)"%(tps_idx, tt_tps, np.max(abs_error), np.max(rel_error), p_t3[2])) + print("[TPS] step = %04d time = %.4E ||u1 - u0|| = %.4E ||u0 - u1|| / ||u0|| = %.4E -- runtime = %.4E (s)"%(tps_idx, tt_tps, np.max(abs_error), np.max(rel_error), p_t3[2]), flush=True) # if (np.max(abs_error) < boltzmann.param.atol or np.max(rel_error) < max(1e-6,boltzmann.param.rtol)): # break @@ -1234,7 +1400,7 @@ def t1(): profile_tt[pp.TPS_SOLVE].stop() if(terminal_output_freq > 0 and tps_idx % terminal_output_freq ==0): p_t3 = min_mean_max(profile_tt[pp.TPS_SOLVE].snap, comm) - print("[TPS] %04d simulation time = %.4E cycle step (min) = %.4E (s) step (mean) = %.4E (s) step (max) = %.4E (s)" % (tps_idx,tt_tps, p_t3[0],p_t3[1],p_t3[2])) + print("[TPS] %04d simulation time = %.4E cycle step (min) = %.4E (s) step (mean) = %.4E (s) step (max) = %.4E (s)" % (tps_idx,tt_tps, p_t3[0],p_t3[1],p_t3[2]), flush=True) tt_tps +=dt_tps profile_tt[pp.TPS_PUSH].start() @@ -1243,7 +1409,16 @@ def t1(): tt += dt_tps * tps_idx iter+=1 - - profile_stats(boltzmann, profile_tt, profile_nn, boltzmann.param.out_fname+"_profile.csv" , comm) - tps.solveEnd() - sys.exit (tps.getStatus()) \ No newline at end of file + comm.Barrier() + + #profile_stats(boltzmann, profile_tt, profile_nn, boltzmann.param.out_fname+"_profile.csv" , comm) + tps.solveEnd() + comm.Barrier() + return tps.getStatus() + +if __name__=="__main__": + comm = MPI.COMM_WORLD + driver_w_parla(comm) + + + \ No newline at end of file From 6d0a2cb3d8073f7699d9f2187b97dde7eee7c4c7 Mon Sep 17 00:00:00 2001 From: milindasf Date: Sat, 16 Mar 2024 17:03:16 -0500 Subject: [PATCH 51/75] tps bte minimum parla crash example --- src/tps-bte_0d3v.py | 32 ++++++++++++++++++++++++++++---- 1 file changed, 28 insertions(+), 4 deletions(-) diff --git a/src/tps-bte_0d3v.py b/src/tps-bte_0d3v.py index 0972275a7..34fd0e6fd 100755 --- a/src/tps-bte_0d3v.py +++ b/src/tps-bte_0d3v.py @@ -13,6 +13,7 @@ import pandas as pd import scipy.interpolate import scipy.cluster +import threading class profile_t: def __init__(self,name): @@ -463,6 +464,7 @@ def normalize(obs): std_obs[std_obs == 0.0] = 1.0 return obs/std_obs, std_obs + thread_pool = list() for grid_idx in self.active_grid_idx: def t1(): dev_id = self.gidx_to_device_map(grid_idx, n_grids) @@ -539,9 +541,16 @@ def t1(): # cp.save(self.param.out_fname + "_E_%02d.npy"%(grid_idx) , EMag , grid_idx) return - t1() + + thread = threading.Thread(target=t1) + thread_pool.append(thread) + thread.start() + + for thread in thread_pool: + thread.join() else: + thread_pool = list() for grid_idx in self.active_grid_idx: def t1(): bte_idx = gidx_to_pidx[grid_idx] @@ -605,10 +614,25 @@ def t1(): # cp.save(self.param.out_fname + "_E_%02d.npy"%(grid_idx) , EMag , grid_idx) return - t1() + + thread = threading.Thread(target=t1) + thread_pool.append(thread) + thread.start() + + for thread in thread_pool: + thread.join() return async def fetch_asnyc(self, interface): + ts = TaskSpace("T") + for grid_idx in self.active_grid_idx: + @spawn(ts[grid_idx], placement=[cpu], vcus=0.0) + def t1(): + print("rank [%d/%d] hello from parla task %d"%(self.rankG, self.npesG, grid_idx), flush=True) + return + await ts + return + use_interp = self.param.use_clstr_inp gidx_to_pidx = self.grid_idx_to_spatial_idx_map heavy_temp = np.array(interface.HostRead(libtps.t2bIndex.HeavyTemperature), copy=False) @@ -1239,8 +1263,8 @@ async def __main__(): # ########################## BTE solve ################################################## profile_tt[pp.BTE_FETCH].start() - #await boltzmann.fetch_asnyc(interface) - boltzmann.fetch(interface) + await boltzmann.fetch_asnyc(interface) + #boltzmann.fetch(interface) profile_tt[pp.BTE_FETCH].stop() """ From c7aedb0372afeaac09b8eb56e876efb0179bb75b Mon Sep 17 00:00:00 2001 From: milindasf Date: Mon, 18 Mar 2024 10:57:46 -0500 Subject: [PATCH 52/75] tps-bte with Parla multi-gpu version [working] --- src/tps-bte_0d3v.py | 332 ++++++++++++++++++++++++++++++-------------- 1 file changed, 224 insertions(+), 108 deletions(-) diff --git a/src/tps-bte_0d3v.py b/src/tps-bte_0d3v.py index 34fd0e6fd..8940abaf4 100755 --- a/src/tps-bte_0d3v.py +++ b/src/tps-bte_0d3v.py @@ -146,6 +146,51 @@ class TPSINDEX(): EX3_IDX = 2 MOLE_FRAC_IDX = {0: NEU_IDX, 1: EX1_IDX , 2: EX2_IDX , 3: EX3_IDX} + +def k_means(x, num_clusters, xi=None, max_iter=1000, thresh=1e-12, rand_seed=0, xp=np): + assert x.ndim == 2, "observations must me 2d array" + if xi is None: + xp.random.seed(rand_seed) + xi = x[xp.random.choice(x.shape[0], num_clusters, replace=False)] + + distortion_0 = xp.zeros(num_clusters) + idx = xp.arange(num_clusters) + for iter in range(max_iter): + distance = xp.linalg.norm(x[:, None, :] - xi[None, :, :], axis=2) + pred = xp.argmin(distance, axis=1) + mask = pred == idx[:, None] + # print(mask) + # print(xp.where(mask[:, :, None], x, 0)) + # print(xp.where(mask[:, :, None], x, 0).sum(axis=1)) + # print(xp.where(mask[:, :, None], x, 0).shape) + sums = xp.where(mask[:, :, None], x, 0).sum(axis=1) + counts = xp.count_nonzero(mask, axis=1) + counts[counts==0] = 1 + # print(distance) + # print(mask) + # print(xp.where(mask.T, distance, 0)) + # print(xp.where(mask.T, distance, 0).sum(axis=0).shape) + # print(counts) + distortion_1 = xp.where(mask.T, distance, 0).sum(axis=0)/counts[:,None] + rel_error = xp.linalg.norm(distortion_0-distortion_1)/xp.linalg.norm(distortion_1) + #print(iter, rel_error, distortion_1, xi) + #print(iter, rel_error, xi[0:3]) + if rel_error < thresh: + break + + xi_new = sums / counts[:,None] + distortion_0 = distortion_1 + # rel_error = xp.max(xp.linalg.norm(xi_new-xi, axis=1)/xp.linalg.norm(xi, axis=1)) + # if rel_error < thresh: + # # print(iter, rel_error) + # # print("xi_new", xi_new[0:10,:], "xi", xi[0:10,:]) + # break + + xi = xi_new + + + return xi, pred + class Boltzmann0D2VBactchedSolver: @@ -161,9 +206,11 @@ def __init__(self, tps, comm): self.__parse_config_file__(sys.argv[2]) self.xp_module = np boltzmann_dir = self.param.output_dir - isExist = os.path.exists(boltzmann_dir) - if not isExist: - os.makedirs(boltzmann_dir) + + if (self.rankG==0): + isExist = os.path.exists(boltzmann_dir) + if not isExist: + os.makedirs(boltzmann_dir) num_gpus_per_node = 1 if self.param.use_gpu==1: @@ -623,16 +670,100 @@ def t1(): thread.join() return - async def fetch_asnyc(self, interface): - ts = TaskSpace("T") - for grid_idx in self.active_grid_idx: - @spawn(ts[grid_idx], placement=[cpu], vcus=0.0) + def solve_init(self): + rank = self.comm.Get_rank() + npes = self.comm.Get_size() + n_grids = self.param.n_grids + gidx_to_device_map = self.gidx_to_device_map + + thread_pool = list() + for grid_idx in range(self.param.n_grids): def t1(): - print("rank [%d/%d] hello from parla task %d"%(self.rankG, self.npesG, grid_idx), flush=True) - return - await ts - return + dev_id = gidx_to_device_map(grid_idx, n_grids) + print("rank [%d/%d] setting grid %d to device %d"%(rank, npes, grid_idx, dev_id), flush=True) + with cp.cuda.Device(dev_id): + s1 = cp.cuda.Stream(non_blocking=True) + with s1: + self.bte_solver.host_to_device_setup(dev_id, grid_idx) + s1.synchronize() + + thread = threading.Thread(target=t1) + thread_pool.append(thread) + thread.start() + for thread in thread_pool: + thread.join() + + def ts_op_setup(grid_idx): + xp = self.xp_module + f_mw = self.bte_solver.get_boltzmann_parameter(grid_idx, "f_mw") + n_pts = f_mw.shape[1] + Qmat = self.bte_solver._op_qmat[grid_idx] + INr = xp.eye(Qmat.shape[1]) + self.bte_solver._op_imat_vx[grid_idx] = xp.einsum("i,jk->ijk",xp.ones(n_pts), INr) + + if self.param.use_clstr_inp==True: + assert n_pts == self.param.n_sub_clusters + + if(self.param.use_gpu==1): + self.xp_module = cp + + thread_pool = list() + for grid_idx in self.active_grid_idx: + dev_id = gidx_to_device_map(grid_idx, n_grids) + def t1(): + with cp.cuda.Device(dev_id): + s1 = cp.cuda.Stream(non_blocking=True) + with s1: + ts_op_setup(grid_idx) + vth = self.bte_solver._par_vth[grid_idx] + qA = self.bte_solver._op_diag_dg[grid_idx] + mw = bte_utils.get_maxwellian_3d(vth, 1) + mm_op = self.bte_solver._op_mass[grid_idx] * mw(0) * vth**3 + f_mw = self.bte_solver.get_boltzmann_parameter(grid_idx, "f_mw") + f_mw = f_mw/cp.dot(mm_op, f_mw) + f_mw = cp.dot(qA.T, f_mw) + self.bte_solver.set_boltzmann_parameter(grid_idx, "u0", cp.copy(f_mw)) + + s1.synchronize() + + thread = threading.Thread(target=t1) + thread_pool.append(thread) + thread.start() + + for thread in thread_pool: + thread.join() + + else: + self.xp_module = np + thread_pool = list() + + for grid_idx in self.active_grid_idx: + def t1(): + ts_op_setup(grid_idx) + + vth = self.bte_solver._par_vth[grid_idx] + qA = self.bte_solver._op_diag_dg[grid_idx] + mw = bte_utils.get_maxwellian_3d(vth, 1) + mm_op = self.bte_solver._op_mass[grid_idx] * mw(0) * vth**3 + f_mw = self.bte_solver.get_boltzmann_parameter(grid_idx, "f_mw") + f_mw = f_mw/np.dot(mm_op, f_mw) + f_mw = np.dot(qA.T, f_mw) + self.bte_solver.set_boltzmann_parameter(grid_idx, "u0", np.copy(f_mw)) + + thread = threading.Thread(target=t1) + thread_pool.append(thread) + thread.start() + + for thread in thread_pool: + thread.join() + + + + return + + async def fetch_asnyc(self, interface): + xp = self.xp_module use_interp = self.param.use_clstr_inp gidx_to_pidx = self.grid_idx_to_spatial_idx_map heavy_temp = np.array(interface.HostRead(libtps.t2bIndex.HeavyTemperature), copy=False) @@ -667,70 +798,70 @@ def t1(): ns_by_n0[ns_by_n0<=0] = 0 m_bte = np.concatenate([ExbyN.reshape((-1, 1)), EybyN.reshape((-1, 1)), Tg.reshape((-1, 1)), ion_deg.reshape((-1, 1))] + [ ns_by_n0[i].reshape((-1, 1)) for i in range(ns_by_n0.shape[0])], axis=1) - self.sub_cluster_idx_to_pidx = None - self.sub_cluster_c = None - gidx_to_device_map = self.gidx_to_device_map + self.m_bte = m_bte + self.sub_cluster_c = None + self.sub_cluster_c_lbl = None + gidx_to_device_map = self.gidx_to_device_map if (use_interp == True): n_sub_clusters = self.param.n_sub_clusters - self.sub_cluster_idx_to_pidx = [[None for i in range(n_sub_clusters)] for i in range(self.param.n_grids)] self.sub_cluster_c = [None for i in range(self.param.n_grids)] + self.sub_cluster_c_lbl = [None for i in range(self.param.n_grids)] - def normalize(obs): - std_obs = np.std(obs, axis=0) + def normalize(obs, xp): + std_obs = xp.std(obs, axis=0) std_obs[std_obs == 0.0] = 1.0 return obs/std_obs, std_obs ts = TaskSpace("T") for grid_idx in self.active_grid_idx: - @spawn(ts[grid_idx], placement=[cpu], vcus=0.0) + dev_id = self.gidx_to_device_map(grid_idx, n_grids) + @spawn(ts[grid_idx], placement=[gpu(dev_id)], vcus=0.0) def t1(): - dev_id = self.gidx_to_device_map(grid_idx, n_grids) - m = m_bte[gidx_to_pidx[grid_idx]] - mw , mw_std = normalize(m) + # xp = cp + # m = xp.array(m_bte[gidx_to_pidx[grid_idx]]) + # mw , mw_std = normalize(m, xp) + # mcw, membership_m = k_means(mw, num_clusters=self.param.n_sub_clusters, max_iter=1000, thresh=1e-8, rand_seed=self.param.rand_seed, xp=xp) # to repoduce clusters + xp = np np.random.seed(self.param.rand_seed) - + m = m_bte[gidx_to_pidx[grid_idx]] + mw , mw_std = normalize(m, xp) mcw0 = mw[np.random.choice(mw.shape[0], self.param.n_sub_clusters, replace=False)] mcw = scipy.cluster.vq.kmeans(mw, mcw0, iter=1000, thresh=1e-8, check_finite=False)[0] mcw0[0:mcw.shape[0], :] = mcw[:,:] mcw = mcw0 + dist_mat = xp.linalg.norm(mw[:, None, :] - mcw[None, : , :], axis=2) + membership_m = xp.argmin(dist_mat, axis=1) - assert mcw.shape[0] == self.param.n_sub_clusters - - mc = mcw * mw_std - dist_mat = np.array([np.linalg.norm(mw - mcw[i], axis=1) for i in range(n_sub_clusters)]).T - membership_m = np.argmin(dist_mat, axis=1) - self.sub_cluster_c[grid_idx] = mc - - for c_idx in range(n_sub_clusters): - self.sub_cluster_idx_to_pidx[grid_idx][c_idx] = np.argwhere(membership_m==c_idx)[:,0] - + assert mcw.shape[0] == self.param.n_sub_clusters + mc = mcw * mw_std + self.sub_cluster_c[grid_idx] = mc + self.sub_cluster_c_lbl[grid_idx] = membership_m - - n0 = np.ones(mc.shape[0]) * self.param.n0 + n0 = xp.ones(mc.shape[0]) * self.param.n0 Ex = mc[: , 0] * self.param.n0 * self.param.Td_fac Ey = mc[: , 1] * self.param.n0 * self.param.Td_fac Tg = mc[: , 2] ne = mc[: , 3] * self.param.n0 ni = mc[: , 3] * self.param.n0 - ns_by_n0 = np.transpose(mc[: , 4:]) - EMag = np.sqrt(Ex**2 + Ey**2) + ns_by_n0 = xp.transpose(mc[: , 4:]) + EMag = xp.sqrt(Ex**2 + Ey**2) if self.param.verbose == 1 : print("rank [%d/%d] Boltzmann solver inputs for v-space grid id %d"%(self.rankG, self.npesG, grid_idx), flush=True) print("Efreq = %.4E [1/s]" %(self.param.Efreq) , flush=True) print("n_pts = %d" % self.grid_idx_to_npts[grid_idx], flush=True) - print("n0 (min) = %.12E [1/m^3] \t n0 (max) = %.12E [1/m^3] "%(np.min(n0) , np.max(n0)) , flush=True) - print("Ex/n0 (min) = %.12E [Td] \t Ex/n0(max) = %.12E [Td] "%(np.min(ExbyN) , np.max(ExbyN)), flush=True) - print("Ey/n0 (min) = %.12E [Td] \t Ey/n0(max) = %.12E [Td] "%(np.min(EybyN) , np.max(EybyN)), flush=True) - print("Tg (min) = %.12E [K] \t Tg (max) = %.12E [K] "%(np.min(Tg) , np.max(Tg)) , flush=True) - print("ne (min) = %.12E [1/m^3] \t ne (max) = %.12E [1/m^3] "%(np.min(ne) , np.max(ne)) , flush=True) + print("n0 (min) = %.12E [1/m^3] \t n0 (max) = %.12E [1/m^3] "%(xp.min(n0) , xp.max(n0)) , flush=True) + print("Ex/n0 (min) = %.12E [Td] \t Ex/n0(max) = %.12E [Td] "%(xp.min(ExbyN) , xp.max(ExbyN)), flush=True) + print("Ey/n0 (min) = %.12E [Td] \t Ey/n0(max) = %.12E [Td] "%(xp.min(EybyN) , xp.max(EybyN)), flush=True) + print("Tg (min) = %.12E [K] \t Tg (max) = %.12E [K] "%(xp.min(Tg) , xp.max(Tg)) , flush=True) + print("ne (min) = %.12E [1/m^3] \t ne (max) = %.12E [1/m^3] "%(xp.min(ne) , xp.max(ne)) , flush=True) for i in range(ns_by_n0.shape[0]): - print("[%d] ns/n0 (min) = %.12E \t ns/n0(max) = %.12E "%(i, np.min(ns_by_n0[i]) , np.max(ns_by_n0[i])), flush=True) + print("[%d] ns/n0 (min) = %.12E \t ns/n0(max) = %.12E "%(i, xp.min(ns_by_n0[i]) , xp.max(ns_by_n0[i])), flush=True) if (use_gpu==1): with cp.cuda.Device(dev_id): @@ -769,48 +900,46 @@ def t1(): else: ts = TaskSpace("T") for grid_idx in self.active_grid_idx: - @spawn(ts[grid_idx], placement=[cpu], vcus=0.0) + dev_id = self.gidx_to_device_map(grid_idx, n_grids) + @spawn(ts[grid_idx], placement=[gpu(dev_id)], vcus=0.0) def t1(): bte_idx = gidx_to_pidx[grid_idx] - dev_id = self.gidx_to_device_map(grid_idx, n_grids) - - mc = m_bte[bte_idx] + mc = xp.array(m_bte[bte_idx]) - n0 = np.ones(mc.shape[0]) * self.param.n0 + n0 = xp.ones(mc.shape[0]) * self.param.n0 Ex = mc[: , 0] * self.param.n0 * self.param.Td_fac Ey = mc[: , 1] * self.param.n0 * self.param.Td_fac Tg = mc[: , 2] ne = mc[: , 3] * self.param.n0 ni = mc[: , 3] * self.param.n0 - ns_by_n0 = np.transpose(mc[: , 4:]) - - EMag = np.sqrt(Ex**2 + Ey**2) + ns_by_n0 = xp.transpose(mc[: , 4:]) + EMag = xp.sqrt(Ex**2 + Ey**2) if self.param.verbose == 1 : print("rank [%d/%d] Boltzmann solver inputs for v-space grid id %d"%(self.rankG, self.npesG, grid_idx), flush=True) print("Efreq = %.4E [1/s]" %(self.param.Efreq) , flush=True) print("n_pts = %d" % self.grid_idx_to_npts[grid_idx], flush=True) - print("Ex/n0 (min) = %.12E [Td] \t Ex/n0(max) = %.12E [Td] "%(np.min(ExbyN), np.max(ExbyN)), flush=True) - print("Ey/n0 (min) = %.12E [Td] \t Ey/n0(max) = %.12E [Td] "%(np.min(EybyN), np.max(EybyN)), flush=True) - print("Tg (min) = %.12E [K] \t Tg (max) = %.12E [K] "%(np.min(Tg) , np.max(Tg)) , flush=True) - print("ne (min) = %.12E [1/m^3] \t ne (max) = %.12E [1/m^3] "%(np.min(ne) , np.max(ne)) , flush=True) + print("Ex/n0 (min) = %.12E [Td] \t Ex/n0(max) = %.12E [Td] "%(xp.min(ExbyN), xp.max(ExbyN)), flush=True) + print("Ey/n0 (min) = %.12E [Td] \t Ey/n0(max) = %.12E [Td] "%(xp.min(EybyN), xp.max(EybyN)), flush=True) + print("Tg (min) = %.12E [K] \t Tg (max) = %.12E [K] "%(xp.min(Tg) , xp.max(Tg)) , flush=True) + print("ne (min) = %.12E [1/m^3] \t ne (max) = %.12E [1/m^3] "%(xp.min(ne) , xp.max(ne)) , flush=True) for i in range(ns_by_n0.shape[0]): - print("ns/n0 (min) = %.12E \t ns/n0(max) = %.12E "%(np.min(ns_by_n0[i]) , np.max(ns_by_n0[i])), flush=True) + print("ns/n0 (min) = %.12E \t ns/n0(max) = %.12E "%(xp.min(ns_by_n0[i]) , xp.max(ns_by_n0[i])), flush=True) - if (use_gpu == 1): - with cp.cuda.Device(dev_id): - n0 = cp.array(n0) - ne = cp.array(ne) - ni = cp.array(ni) - Ex = cp.array(Ex) - Ey = cp.array(Ey) - Tg = cp.array(Tg) - EMag = cp.sqrt(Ex**2 + Ey**2) - ns_by_n0 = cp.array(ns_by_n0) + # if (use_gpu == 1): + # with cp.cuda.Device(dev_id): + # n0 = cp.array(n0) + # ne = cp.array(ne) + # ni = cp.array(ni) + # Ex = cp.array(Ex) + # Ey = cp.array(Ey) + # Tg = cp.array(Tg) + # EMag = cp.sqrt(Ex**2 + Ey**2) + # ns_by_n0 = cp.array(ns_by_n0) self.bte_solver.set_boltzmann_parameter(grid_idx, "ns_by_n0", ns_by_n0) self.bte_solver.set_boltzmann_parameter(grid_idx, "n0" , n0) @@ -1005,14 +1134,17 @@ def t1(): h_curr = self.bte_solver.normalized_distribution(grid_idx, u0) qoi = self.bte_solver.compute_QoIs(grid_idx, h_curr, effective_mobility=False) rr_cpu = xp.asnumpy(qoi["rates"]) + inp_mask = xp.asnumpy(self.sub_cluster_c_lbl[grid_idx]) == np.arange(self.param.n_sub_clusters)[:, None] - rr_interp = np.zeros(len(gidx_to_pidx_map[grid_idx])) - for r_idx in range(n_reactions): - for c_idx in range(self.param.n_sub_clusters): - rr_interp[self.sub_cluster_idx_to_pidx[grid_idx][c_idx]] = rr_cpu[TPSINDEX.RR_IDX[r_idx]][c_idx] * self.param.N_Avo - - rates[r_idx][gidx_to_pidx_map[grid_idx]] = rr_interp + rr_interp = np.zeros((n_reactions, len(gidx_to_pidx_map[grid_idx]))) + for c_idx in range(self.param.n_sub_clusters): + inp_idx = inp_mask[c_idx] + for r_idx in range(n_reactions): + rr_interp[r_idx, inp_idx] = rr_cpu[TPSINDEX.RR_IDX[r_idx]][c_idx] * self.param.N_Avo + + for r_idx in range(n_reactions): + rates[r_idx][gidx_to_pidx_map[grid_idx]] = rr_interp[r_idx, :] await ts rates = rates.reshape((-1)) rates[rates<0] = 0.0 @@ -1151,6 +1283,9 @@ class pp(enum.IntEnum): def profile_stats(boltzmann:Boltzmann0D2VBactchedSolver, p_tt: profile_t, p_nn, fname, comm): + rank = comm.Get_rank() + npes = comm.Get_size() + Nx = boltzmann.param.n_grids * boltzmann.param.n_sub_clusters Nv = (boltzmann.param.Nr + 1) * (boltzmann.param.l_max + 1) @@ -1182,14 +1317,16 @@ def profile_stats(boltzmann:Boltzmann0D2VBactchedSolver, p_tt: profile_t, p_nn, tt[pp.TPS_PUSH][0] , tt[pp.TPS_PUSH][1] , tt[pp.TPS_PUSH][2] ] data_str= ["%.4E"%d for d in data] - if fname!="": - with open(fname, "a") as f: - f.write(",".join(header)+"\n") - f.write(",".join(data_str)+"\n") - f.close() - else: - print(",".join(header)) - print(",".join(data_str)) + + if rank ==0 : + if fname!="": + with open(fname, "a") as f: + f.write(",".join(header)+"\n") + f.write(",".join(data_str)+"\n") + f.close() + else: + print(",".join(header)) + print(",".join(data_str)) def driver_w_parla(comm): @@ -1197,12 +1334,10 @@ def driver_w_parla(comm): rank = comm.Get_rank() npes = comm.Get_size() - dev = rank % 3 - with Parla(): - @spawn(placement=cpu, vcus=0) + dev_id = rank % len(gpu) + @spawn(placement=[gpu(dev_id)], vcus=0) async def __main__(): - # TPS solver profile_tt[pp.TPS_SETUP].start() tps = libtps.Tps(comm) @@ -1220,10 +1355,6 @@ async def __main__(): tps.solveStep() tps.push(interface) - # with cp.cuda.Device(dev): - # cp.cuda.runtime.deviceSynchronize() - # comm.Barrier() - boltzmann = Boltzmann0D2VBactchedSolver(tps, comm) rank = boltzmann.comm.Get_rank() npes = boltzmann.comm.Get_size() @@ -1232,10 +1363,9 @@ async def __main__(): boltzmann.grid_setup(interface) profile_tt[pp.BTE_SETUP].stop() - # await boltzmann.solve_init_async() - # xp = boltzmann.bte_solver.xp_module - xp = cp - max_iters = 1#boltzmann.param.tps_bte_max_iter + await boltzmann.solve_init_async() + xp = boltzmann.bte_solver.xp_module + max_iters = boltzmann.param.tps_bte_max_iter iter = 0 tt = 0 tau = (1/boltzmann.param.Efreq) @@ -1257,27 +1387,20 @@ async def __main__(): print("tps steps per cycle : ", tps_sper_cycle, "bte_steps per cycle", bte_sper_cycle, flush=True) while (iter Date: Mon, 18 Mar 2024 12:12:59 -0500 Subject: [PATCH 53/75] single stream no parla + mpi bte solver added --- src/tps-bte_0d3v.py | 597 ++++++++++++++++++++++++++++++++++---------- 1 file changed, 471 insertions(+), 126 deletions(-) diff --git a/src/tps-bte_0d3v.py b/src/tps-bte_0d3v.py index 8940abaf4..123763d6b 100755 --- a/src/tps-bte_0d3v.py +++ b/src/tps-bte_0d3v.py @@ -465,6 +465,7 @@ def asnumpy(a): return def fetch(self, interface): + xp = self.xp_module use_interp = self.param.use_clstr_inp gidx_to_pidx = self.grid_idx_to_spatial_idx_map heavy_temp = np.array(interface.HostRead(libtps.t2bIndex.HeavyTemperature), copy=False) @@ -474,88 +475,94 @@ def fetch(self, interface): electron_temp = np.array(interface.HostRead(libtps.t2bIndex.ElectronTemperature), copy=False) efield = np.array(interface.HostRead(libtps.t2bIndex.ElectricField), copy=False).reshape((2, tps_npts)) species_densities = np.array(interface.HostRead(libtps.t2bIndex.SpeciesDensities), copy=False).reshape(nspecies, tps_npts) + cs_avail_species = self.bte_solver._avail_species + n0 = np.sum(species_densities, axis=0) - species_densities[TPSINDEX.ELE_IDX] ns_by_n0 = np.concatenate([species_densities[TPSINDEX.MOLE_FRAC_IDX[i]]/n0 for i in range(len(cs_avail_species))]).reshape((len(cs_avail_species), tps_npts)) - n_grids = self.param.n_grids - use_gpu = self.param.use_gpu + n_grids = self.param.n_grids + use_gpu = self.param.use_gpu - Tg = heavy_temp + Tg = heavy_temp - Ex = efield[0] - Ey = efield[1] + Ex = efield[0] + Ey = efield[1] - ExbyN = Ex/n0/self.param.Td_fac - EybyN = Ey/n0/self.param.Td_fac + ExbyN = Ex/n0/self.param.Td_fac + EybyN = Ey/n0/self.param.Td_fac - Ex = ExbyN * self.param.n0 * self.param.Td_fac - Ey = EybyN * self.param.n0 * self.param.Td_fac + Ex = ExbyN * self.param.n0 * self.param.Td_fac + Ey = EybyN * self.param.n0 * self.param.Td_fac - ion_deg = species_densities[TPSINDEX.ELE_IDX]/n0 - ion_deg[ion_deg<=0] = 1e-16 - ns_by_n0[ns_by_n0<=0]= 0 - m_bte = np.concatenate([ExbyN.reshape((-1, 1)), EybyN.reshape((-1, 1)), Tg.reshape((-1, 1)), ion_deg.reshape((-1, 1))] + [ ns_by_n0[i].reshape((-1, 1)) for i in range(ns_by_n0.shape[0])], axis=1) + ion_deg = species_densities[TPSINDEX.ELE_IDX]/n0 + ion_deg[ion_deg<=0] = 1e-16 + ns_by_n0[ns_by_n0<=0] = 0 + m_bte = np.concatenate([ExbyN.reshape((-1, 1)), EybyN.reshape((-1, 1)), Tg.reshape((-1, 1)), ion_deg.reshape((-1, 1))] + [ ns_by_n0[i].reshape((-1, 1)) for i in range(ns_by_n0.shape[0])], axis=1) - self.sub_cluster_idx_to_pidx = None - self.sub_cluster_c = None - gidx_to_device_map = self.gidx_to_device_map + self.m_bte = m_bte + self.sub_cluster_c = None + self.sub_cluster_c_lbl = None + gidx_to_device_map = self.gidx_to_device_map if (use_interp == True): n_sub_clusters = self.param.n_sub_clusters - self.sub_cluster_idx_to_pidx = [[None for i in range(n_sub_clusters)] for i in range(self.param.n_grids)] self.sub_cluster_c = [None for i in range(self.param.n_grids)] + self.sub_cluster_c_lbl = [None for i in range(self.param.n_grids)] - def normalize(obs): - std_obs = np.std(obs, axis=0) + def normalize(obs, xp): + std_obs = xp.std(obs, axis=0) std_obs[std_obs == 0.0] = 1.0 return obs/std_obs, std_obs - thread_pool = list() for grid_idx in self.active_grid_idx: + dev_id = self.gidx_to_device_map(grid_idx, n_grids) + def t1(): - dev_id = self.gidx_to_device_map(grid_idx, n_grids) - m = m_bte[gidx_to_pidx[grid_idx]] - mw , mw_std = normalize(m) + # xp = cp + # m = xp.array(m_bte[gidx_to_pidx[grid_idx]]) + # mw , mw_std = normalize(m, xp) + # mcw, membership_m = k_means(mw, num_clusters=self.param.n_sub_clusters, max_iter=1000, thresh=1e-8, rand_seed=self.param.rand_seed, xp=xp) # to repoduce clusters + xp = np np.random.seed(self.param.rand_seed) - + m = m_bte[gidx_to_pidx[grid_idx]] + mw , mw_std = normalize(m, xp) mcw0 = mw[np.random.choice(mw.shape[0], self.param.n_sub_clusters, replace=False)] mcw = scipy.cluster.vq.kmeans(mw, mcw0, iter=1000, thresh=1e-8, check_finite=False)[0] mcw0[0:mcw.shape[0], :] = mcw[:,:] mcw = mcw0 + dist_mat = xp.linalg.norm(mw[:, None, :] - mcw[None, : , :], axis=2) + membership_m = xp.argmin(dist_mat, axis=1) - mc = mcw * mw_std - dist_mat = np.array([np.linalg.norm(mw - mcw[i], axis=1) for i in range(n_sub_clusters)]).T - membership_m = np.argmin(dist_mat, axis=1) - self.sub_cluster_c[grid_idx] = mc + assert mcw.shape[0] == self.param.n_sub_clusters + mc = mcw * mw_std + self.sub_cluster_c[grid_idx] = mc + self.sub_cluster_c_lbl[grid_idx] = membership_m - for c_idx in range(n_sub_clusters): - self.sub_cluster_idx_to_pidx[grid_idx][c_idx] = np.argwhere(membership_m==c_idx)[:,0] - - n0 = np.ones(mc.shape[0]) * self.param.n0 + n0 = xp.ones(mc.shape[0]) * self.param.n0 Ex = mc[: , 0] * self.param.n0 * self.param.Td_fac Ey = mc[: , 1] * self.param.n0 * self.param.Td_fac Tg = mc[: , 2] ne = mc[: , 3] * self.param.n0 ni = mc[: , 3] * self.param.n0 - ns_by_n0 = np.transpose(mc[: , 4:]) - EMag = np.sqrt(Ex**2 + Ey**2) + ns_by_n0 = xp.transpose(mc[: , 4:]) + EMag = xp.sqrt(Ex**2 + Ey**2) if self.param.verbose == 1 : - print("rank = %d Boltzmann solver inputs for v-space grid id %d"%(self.rankG, grid_idx)) - print("Efreq = %.4E [1/s]" %(self.param.Efreq)) - print("n_pts = %d" % self.grid_idx_to_npts[grid_idx]) + print("rank [%d/%d] Boltzmann solver inputs for v-space grid id %d"%(self.rankG, self.npesG, grid_idx), flush=True) + print("Efreq = %.4E [1/s]" %(self.param.Efreq) , flush=True) + print("n_pts = %d" % self.grid_idx_to_npts[grid_idx], flush=True) - print("n0 (min) = %.12E [1/m^3] \t n0 (max) = %.12E [1/m^3] "%(np.min(n0) , np.max(n0))) - print("Ex/n0 (min) = %.12E [Td] \t Ex/n0(max) = %.12E [Td] "%(np.min(ExbyN) , np.max(ExbyN))) - print("Ey/n0 (min) = %.12E [Td] \t Ey/n0(max) = %.12E [Td] "%(np.min(EybyN) , np.max(EybyN))) - print("Tg (min) = %.12E [K] \t Tg (max) = %.12E [K] "%(np.min(Tg) , np.max(Tg))) - print("ne (min) = %.12E [1/m^3] \t ne (max) = %.12E [1/m^3] "%(np.min(ne) , np.max(ne))) + print("n0 (min) = %.12E [1/m^3] \t n0 (max) = %.12E [1/m^3] "%(xp.min(n0) , xp.max(n0)) , flush=True) + print("Ex/n0 (min) = %.12E [Td] \t Ex/n0(max) = %.12E [Td] "%(xp.min(ExbyN) , xp.max(ExbyN)), flush=True) + print("Ey/n0 (min) = %.12E [Td] \t Ey/n0(max) = %.12E [Td] "%(xp.min(EybyN) , xp.max(EybyN)), flush=True) + print("Tg (min) = %.12E [K] \t Tg (max) = %.12E [K] "%(xp.min(Tg) , xp.max(Tg)) , flush=True) + print("ne (min) = %.12E [1/m^3] \t ne (max) = %.12E [1/m^3] "%(xp.min(ne) , xp.max(ne)) , flush=True) for i in range(ns_by_n0.shape[0]): - print("[%d] ns/n0 (min) = %.12E \t ns/n0(max) = %.12E "%(i, np.min(ns_by_n0[i]) , np.max(ns_by_n0[i]))) + print("[%d] ns/n0 (min) = %.12E \t ns/n0(max) = %.12E "%(i, xp.min(ns_by_n0[i]) , xp.max(ns_by_n0[i])), flush=True) if (use_gpu==1): with cp.cuda.Device(dev_id): @@ -589,57 +596,52 @@ def t1(): return - thread = threading.Thread(target=t1) - thread_pool.append(thread) - thread.start() - - for thread in thread_pool: - thread.join() + with cp.cuda.Device(dev_id): + t1() else: - thread_pool = list() + ts = TaskSpace("T") for grid_idx in self.active_grid_idx: + dev_id = self.gidx_to_device_map(grid_idx, n_grids) + def t1(): bte_idx = gidx_to_pidx[grid_idx] - dev_id = self.gidx_to_device_map(grid_idx, n_grids) - - mc = m_bte[bte_idx] + mc = xp.array(m_bte[bte_idx]) - n0 = np.ones(mc.shape[0]) * self.param.n0 + n0 = xp.ones(mc.shape[0]) * self.param.n0 Ex = mc[: , 0] * self.param.n0 * self.param.Td_fac Ey = mc[: , 1] * self.param.n0 * self.param.Td_fac Tg = mc[: , 2] ne = mc[: , 3] * self.param.n0 ni = mc[: , 3] * self.param.n0 - ns_by_n0 = np.transpose(mc[: , 4:]) - - EMag = np.sqrt(Ex**2 + Ey**2) + ns_by_n0 = xp.transpose(mc[: , 4:]) + EMag = xp.sqrt(Ex**2 + Ey**2) if self.param.verbose == 1 : - print("rank = %d Boltzmann solver inputs for v-space grid id %d"%(self.rankG, grid_idx)) - print("Efreq = %.4E [1/s]" %(self.param.Efreq)) - print("n_pts = %d" % self.grid_idx_to_npts[grid_idx]) + print("rank [%d/%d] Boltzmann solver inputs for v-space grid id %d"%(self.rankG, self.npesG, grid_idx), flush=True) + print("Efreq = %.4E [1/s]" %(self.param.Efreq) , flush=True) + print("n_pts = %d" % self.grid_idx_to_npts[grid_idx], flush=True) - print("Ex/n0 (min) = %.12E [Td] \t Ex/n0(max) = %.12E [Td] "%(np.min(ExbyN), np.max(ExbyN))) - print("Ey/n0 (min) = %.12E [Td] \t Ey/n0(max) = %.12E [Td] "%(np.min(EybyN), np.max(EybyN))) - print("Tg (min) = %.12E [K] \t Tg (max) = %.12E [K] "%(np.min(Tg) , np.max(Tg))) - print("ne (min) = %.12E [1/m^3] \t ne (max) = %.12E [1/m^3] "%(np.min(ne) , np.max(ne))) + print("Ex/n0 (min) = %.12E [Td] \t Ex/n0(max) = %.12E [Td] "%(xp.min(ExbyN), xp.max(ExbyN)), flush=True) + print("Ey/n0 (min) = %.12E [Td] \t Ey/n0(max) = %.12E [Td] "%(xp.min(EybyN), xp.max(EybyN)), flush=True) + print("Tg (min) = %.12E [K] \t Tg (max) = %.12E [K] "%(xp.min(Tg) , xp.max(Tg)) , flush=True) + print("ne (min) = %.12E [1/m^3] \t ne (max) = %.12E [1/m^3] "%(xp.min(ne) , xp.max(ne)) , flush=True) for i in range(ns_by_n0.shape[0]): - print("ns/n0 (min) = %.12E \t ns/n0(max) = %.12E "%(np.min(ns_by_n0[i]) , np.max(ns_by_n0[i]))) + print("ns/n0 (min) = %.12E \t ns/n0(max) = %.12E "%(xp.min(ns_by_n0[i]) , xp.max(ns_by_n0[i])), flush=True) - if (use_gpu == 1): - with cp.cuda.Device(dev_id): - n0 = cp.array(n0) - ne = cp.array(ne) - ni = cp.array(ni) - Ex = cp.array(Ex) - Ey = cp.array(Ey) - Tg = cp.array(Tg) - EMag = cp.sqrt(Ex**2 + Ey**2) - ns_by_n0 = cp.array(ns_by_n0) + # if (use_gpu == 1): + # with cp.cuda.Device(dev_id): + # n0 = cp.array(n0) + # ne = cp.array(ne) + # ni = cp.array(ni) + # Ex = cp.array(Ex) + # Ey = cp.array(Ey) + # Tg = cp.array(Tg) + # EMag = cp.sqrt(Ex**2 + Ey**2) + # ns_by_n0 = cp.array(ns_by_n0) self.bte_solver.set_boltzmann_parameter(grid_idx, "ns_by_n0", ns_by_n0) self.bte_solver.set_boltzmann_parameter(grid_idx, "n0" , n0) @@ -661,13 +663,10 @@ def t1(): # cp.save(self.param.out_fname + "_E_%02d.npy"%(grid_idx) , EMag , grid_idx) return - - thread = threading.Thread(target=t1) - thread_pool.append(thread) - thread.start() - - for thread in thread_pool: - thread.join() + + with cp.cuda.Device(dev_id): + t1() + return def solve_init(self): @@ -676,23 +675,13 @@ def solve_init(self): n_grids = self.param.n_grids gidx_to_device_map = self.gidx_to_device_map - thread_pool = list() for grid_idx in range(self.param.n_grids): def t1(): dev_id = gidx_to_device_map(grid_idx, n_grids) print("rank [%d/%d] setting grid %d to device %d"%(rank, npes, grid_idx, dev_id), flush=True) - with cp.cuda.Device(dev_id): - s1 = cp.cuda.Stream(non_blocking=True) - with s1: - self.bte_solver.host_to_device_setup(dev_id, grid_idx) - s1.synchronize() - - thread = threading.Thread(target=t1) - thread_pool.append(thread) - thread.start() - - for thread in thread_pool: - thread.join() + self.bte_solver.host_to_device_setup(dev_id, grid_idx) + + t1() def ts_op_setup(grid_idx): xp = self.xp_module @@ -707,37 +696,26 @@ def ts_op_setup(grid_idx): if(self.param.use_gpu==1): self.xp_module = cp - - thread_pool = list() for grid_idx in self.active_grid_idx: dev_id = gidx_to_device_map(grid_idx, n_grids) def t1(): - with cp.cuda.Device(dev_id): - s1 = cp.cuda.Stream(non_blocking=True) - with s1: - ts_op_setup(grid_idx) - vth = self.bte_solver._par_vth[grid_idx] - qA = self.bte_solver._op_diag_dg[grid_idx] - mw = bte_utils.get_maxwellian_3d(vth, 1) - mm_op = self.bte_solver._op_mass[grid_idx] * mw(0) * vth**3 - f_mw = self.bte_solver.get_boltzmann_parameter(grid_idx, "f_mw") - f_mw = f_mw/cp.dot(mm_op, f_mw) - f_mw = cp.dot(qA.T, f_mw) - self.bte_solver.set_boltzmann_parameter(grid_idx, "u0", cp.copy(f_mw)) - - s1.synchronize() - - thread = threading.Thread(target=t1) - thread_pool.append(thread) - thread.start() - - for thread in thread_pool: - thread.join() + ts_op_setup(grid_idx) + + vth = self.bte_solver._par_vth[grid_idx] + qA = self.bte_solver._op_diag_dg[grid_idx] + mw = bte_utils.get_maxwellian_3d(vth, 1) + mm_op = self.bte_solver._op_mass[grid_idx] * mw(0) * vth**3 + f_mw = self.bte_solver.get_boltzmann_parameter(grid_idx, "f_mw") + f_mw = f_mw/cp.dot(mm_op, f_mw) + f_mw = cp.dot(qA.T, f_mw) + + self.bte_solver.set_boltzmann_parameter(grid_idx, "u0", cp.copy(f_mw)) + + with cp.cuda.Device(dev_id): + t1() else: self.xp_module = np - thread_pool = list() - for grid_idx in self.active_grid_idx: def t1(): ts_op_setup(grid_idx) @@ -751,17 +729,147 @@ def t1(): f_mw = np.dot(qA.T, f_mw) self.bte_solver.set_boltzmann_parameter(grid_idx, "u0", np.copy(f_mw)) - thread = threading.Thread(target=t1) - thread_pool.append(thread) - thread.start() + t1() - for thread in thread_pool: - thread.join() - + return + def solve_step(self, time, delta_t): + """ + perform a single timestep in 0d-BTE + """ + rank = self.rankG + npes = self.npesG + n_grids = self.param.n_grids + gidx_to_device_map = self.gidx_to_device_map + for grid_idx in self.active_grid_idx: + dev_id = gidx_to_device_map(grid_idx,n_grids) + + def t1(): + + # seting the E field for time t + dt (implicit step) + xp = self.bte_solver.xp_module + eRe = self.bte_solver.get_boltzmann_parameter(grid_idx, "eRe") + eIm = self.bte_solver.get_boltzmann_parameter(grid_idx, "eIm") + Et = eRe * xp.cos(2 * xp.pi * self.param.Efreq * (time + delta_t)) + eIm * xp.sin(2 * xp.pi * self.param.Efreq * (time + delta_t)) + self.bte_solver.set_boltzmann_parameter(grid_idx, "E", Et) + + u0 = self.bte_solver.get_boltzmann_parameter(grid_idx, "u0") + v = self.bte_solver.step(grid_idx, u0, self.param.atol, self.param.rtol, self.param.max_iter, time, delta_t) + self.bte_solver.set_boltzmann_parameter(grid_idx, "u1", v) + + with cp.cuda.Device(dev_id): + t1() + return + + def solve(self): + """ + Can be used to compute steady-state or cycle averaged BTE solutions + """ + rank = self.rankG + npes = self.npesG + xp = self.xp_module + csv_write = self.param.export_csv + plot_data = self.param.plot_data + gidx_to_pidx_map = self.grid_idx_to_spatial_idx_map + use_gpu = self.param.use_gpu + dev_id = self.param.dev_id + verbose = self.param.verbose + n_grids = self.param.n_grids + gidx_to_device_map = self.gidx_to_device_map + + self.qoi = [None for grid_idx in range(self.param.n_grids)] + self.ff = [None for grid_idx in range(self.param.n_grids)] + coll_list = self.bte_solver.get_collision_list() + coll_names = self.bte_solver.get_collision_names() + + if csv_write: + data_csv = np.empty((self.tps_npts, 8 + len(coll_list))) + + for grid_idx in self.active_grid_idx: + dev_id = gidx_to_device_map(grid_idx,n_grids) + + def t1(): + try: + print("rank [%d/%d] BTE launching grid %d on %s"%(rank, npes, grid_idx, dev_id), flush=True) + f0 = self.bte_solver.get_boltzmann_parameter(grid_idx, "u0") + ff , qoi = self.bte_solver.solve(grid_idx, f0, self.param.atol, self.param.rtol, self.param.max_iter, self.param.solver_type) + self.ff[grid_idx] = ff + self.qoi[grid_idx] = qoi + except: + print("rank [%d/%d] solver failed for v-space gird no %d"%(self.rankG, self.npesG, grid_idx), flush=True) + sys.exit(-1) + + with cp.cuda.Device(dev_id): + t1() return + def push(self, interface): + xp = self.xp_module + n_grids = self.param.n_grids + gidx_to_device_map = self.gidx_to_device_map + gidx_to_pidx_map = self.grid_idx_to_spatial_idx_map + use_interp = self.param.use_clstr_inp + + heavy_temp = np.array(interface.HostRead(libtps.t2bIndex.HeavyTemperature), copy=False) + tps_npts = len(heavy_temp) + + n_reactions = interface.nComponents(libtps.t2bIndex.ReactionRates) + rates = np.array(interface.HostWrite(libtps.t2bIndex.ReactionRates), copy=False).reshape((n_reactions, tps_npts)) + + if (use_interp==True): + if(n_reactions>0): + rates[:,:] = 0.0 + for grid_idx in self.active_grid_idx: + dev_id = gidx_to_device_map(grid_idx,n_grids) + + def t1(): + qA = self.bte_solver._op_diag_dg[grid_idx] + u0 = self.bte_solver.get_boltzmann_parameter(grid_idx, "u_avg") + h_curr = self.bte_solver.normalized_distribution(grid_idx, u0) + qoi = self.bte_solver.compute_QoIs(grid_idx, h_curr, effective_mobility=False) + rr_cpu = xp.asnumpy(qoi["rates"]) + inp_mask = xp.asnumpy(self.sub_cluster_c_lbl[grid_idx]) == np.arange(self.param.n_sub_clusters)[:, None] + + rr_interp = np.zeros((n_reactions, len(gidx_to_pidx_map[grid_idx]))) + + for c_idx in range(self.param.n_sub_clusters): + inp_idx = inp_mask[c_idx] + for r_idx in range(n_reactions): + rr_interp[r_idx, inp_idx] = rr_cpu[TPSINDEX.RR_IDX[r_idx]][c_idx] * self.param.N_Avo + + for r_idx in range(n_reactions): + rates[r_idx][gidx_to_pidx_map[grid_idx]] = rr_interp[r_idx, :] + + with cp.cuda.Device(dev_id): + t1() + + rates = rates.reshape((-1)) + rates[rates<0] = 0.0 + else: + if(n_reactions>0): + rates[:,:] = 0.0 + + for grid_idx in self.active_grid_idx: + dev_id = gidx_to_device_map(grid_idx,n_grids) + + def t1(): + qA = self.bte_solver._op_diag_dg[grid_idx] + u0 = self.bte_solver.get_boltzmann_parameter(grid_idx, "u_avg") + h_curr = self.bte_solver.normalized_distribution(grid_idx, u0) + qoi = self.bte_solver.compute_QoIs(grid_idx, h_curr, effective_mobility=False) + rr_cpu = xp.asnumpy(qoi["rates"]) + + for r_idx in range(n_reactions): + rates[r_idx][gidx_to_pidx_map[grid_idx]] = rr_cpu[TPSINDEX.RR_IDX[r_idx]] * self.param.N_Avo + + with cp.cuda.Device(dev_id): + t1() + + rates = rates.reshape((-1)) + rates[rates<0] = 0.0 + return + async def fetch_asnyc(self, interface): xp = self.xp_module use_interp = self.param.use_clstr_inp @@ -1556,6 +1664,243 @@ def t1(): comm.Barrier() return tps.getStatus() +def driver_wo_parla(comm): + + rank = comm.Get_rank() + npes = comm.Get_size() + + dev_id = rank % (cp.cuda.runtime.getDeviceCount()) + + + with cp.cuda.Device(dev_id): + def __main__(): + # TPS solver + profile_tt[pp.TPS_SETUP].start() + tps = libtps.Tps(comm) + tps.parseCommandLineArgs(sys.argv) + tps.parseInput() + tps.chooseDevices() + tps.chooseSolver() + tps.initialize() + profile_tt[pp.TPS_SETUP].stop() + + interface = libtps.Tps2Boltzmann(tps) + tps.initInterface(interface) + tps.solveBegin() + # --- first TPS step is needed to initialize the EM fields + tps.solveStep() + tps.push(interface) + + boltzmann = Boltzmann0D2VBactchedSolver(tps, comm) + rank = boltzmann.comm.Get_rank() + npes = boltzmann.comm.Get_size() + + profile_tt[pp.BTE_SETUP].start() + boltzmann.grid_setup(interface) + profile_tt[pp.BTE_SETUP].stop() + + boltzmann.solve_init() + xp = boltzmann.bte_solver.xp_module + max_iters = boltzmann.param.tps_bte_max_iter + iter = 0 + tt = 0 + tau = (1/boltzmann.param.Efreq) + dt_tps = interface.timeStep() + dt_bte = boltzmann.param.dt * tau + bte_steps = int(dt_tps/dt_bte) + n_grids = boltzmann.param.n_grids + + cycle_freq = 1 #int(xp.ceil(tau/dt_tps)) + terminal_output_freq = -1 + gidx_to_device_map = boltzmann.gidx_to_device_map + + tps_sper_cycle = int(xp.ceil(tau/dt_tps)) + bte_sper_cycle = int(xp.ceil(tau/dt_bte)) + bte_max_cycles = int(boltzmann.param.cycles) + tps_max_cycles = boltzmann.param.bte_solve_freq + + if (boltzmann.rankG==0): + print("tps steps per cycle : ", tps_sper_cycle, "bte_steps per cycle", bte_sper_cycle, flush=True) + + while (iter0): + # print(grid_idx, " u_ptr ", u_avg[grid_idx].data, " v_ptr " , v_avg[grid_idx].data) + + bte_v[grid_idx] = xp.copy(u0) + + with cp.cuda.Device(dev_id): + t1() + + p_t3 = min_mean_max(profile_tt[pp.BTE_SOLVE].snap, comm) + print("[BTE] step = %04d time = %.4E ||u1 - u0|| = %.4E ||u0 - u1|| / ||u0|| = %.4E --- runtime = %.4E (s) "%(bte_idx, tt_bte, max(abs_error), max(rel_error), p_t3[2]), flush=True) + + if max(abs_error) < boltzmann.param.atol or max(rel_error)< boltzmann.param.rtol: + break + + if bte_idx < bte_sper_cycle * bte_max_cycles: + u_avg = [0 for i in range(n_grids)] + + if bte_idx == bte_sper_cycle * bte_max_cycles : + break + + for grid_idx in boltzmann.active_grid_idx: + dev_id = gidx_to_device_map(grid_idx,n_grids) + def t1(): + u_avg[grid_idx] += cycle_f1 * boltzmann.bte_solver.get_boltzmann_parameter(grid_idx, "u0") + + with cp.cuda.Device(dev_id): + t1() + + profile_tt[pp.BTE_SOLVE].start() + boltzmann.solve_step(tt_bte, dt_bte) + profile_tt[pp.BTE_SOLVE].stop() + + if(terminal_output_freq > 0 and bte_idx % terminal_output_freq ==0): + p_t3 = min_mean_max(profile_tt[pp.BTE_SOLVE].snap, comm) + print("[BTE] %04d simulation time = %.4E cycle step (min) = %.4E (s) step (mean) = %.4E (s) step (max) = %.4E (s)" % (bte_idx, tt_bte, p_t3[0], p_t3[1], p_t3[2])) + + for grid_idx in boltzmann.active_grid_idx: + dev_id = gidx_to_device_map(grid_idx,n_grids) + def t1(): + u_avg[grid_idx] += cycle_f1 * boltzmann.bte_solver.get_boltzmann_parameter(grid_idx, "u1") + boltzmann.bte_solver.set_boltzmann_parameter(grid_idx, "u0", boltzmann.bte_solver.get_boltzmann_parameter(grid_idx, "u1")) + + with cp.cuda.Device(dev_id): + t1() + + tt_bte += dt_bte + + profile_tt[pp.BTE_PUSH].start() + for grid_idx in boltzmann.active_grid_idx: + dev_id = gidx_to_device_map(grid_idx,n_grids) + + def t1(): + xp = boltzmann.xp_module + qA = boltzmann.bte_solver._op_diag_dg[grid_idx] + u_avg[grid_idx] = xp.dot(qA, u_avg[grid_idx]) + boltzmann.bte_solver.set_boltzmann_parameter(grid_idx, "u_avg", u_avg[grid_idx]) + + with cp.cuda.Device(dev_id): + t1() + + boltzmann.push(interface) + profile_tt[pp.BTE_PUSH].stop() + + if boltzmann.param.export_csv ==1: + for grid_idx in boltzmann.active_grid_idx: + dev_id = gidx_to_device_map(grid_idx,n_grids) + with cp.cuda.Device(dev_id): + u_vec = boltzmann.bte_solver.get_boltzmann_parameter(grid_idx, "u_avg") + boltzmann.io_output_data(grid_idx, u_vec, plot_data=True, export_csv=True, fname=boltzmann.param.out_fname+"_grid_%02d_rank_%d_npes_%d"%(grid_idx, rank, npes)) + + ################### tps solve ###################################### + profile_tt[pp.TPS_FETCH].start() + tps.fetch(interface) + profile_tt[pp.TPS_FETCH].stop() + + tps_u = 0 + tps_v = 0 + tt_tps = 0 + for tps_idx in range(tps_sper_cycle * tps_max_cycles + 1): + if (tps_idx % tps_sper_cycle == 0): + tps.push(interface) + nspecies = interface.Nspecies() + heavy_temp = np.array(interface.HostRead(libtps.t2bIndex.HeavyTemperature), copy=False) + tps_npts = len(heavy_temp) + tps_u = np.array(interface.HostRead(libtps.t2bIndex.SpeciesDensities), copy=False).reshape(nspecies, tps_npts) + # rates = np.array(interface.HostRead(libtps.t2bIndex.ReactionRates), copy=False).reshape((1, tps_npts)) + # print("rates", np.min(rates[0]), np.max(rates[0])) + + abs_error = np.linalg.norm(tps_u - tps_v, axis=1) + rel_error = abs_error / np.linalg.norm(tps_u, axis=1) + tps_v = np.copy(tps_u) + + p_t3 = min_mean_max(profile_tt[pp.TPS_SOLVE].snap, comm) + print("[TPS] step = %04d time = %.4E ||u1 - u0|| = %.4E ||u0 - u1|| / ||u0|| = %.4E -- runtime = %.4E (s)"%(tps_idx, tt_tps, np.max(abs_error), np.max(rel_error), p_t3[2]), flush=True) + # if (np.max(abs_error) < boltzmann.param.atol or np.max(rel_error) < max(1e-6,boltzmann.param.rtol)): + # break + + if (tps_idx == tps_sper_cycle * tps_max_cycles): + break + + profile_tt[pp.TPS_SOLVE].start() + tps.solveStep() + profile_tt[pp.TPS_SOLVE].stop() + if(terminal_output_freq > 0 and tps_idx % terminal_output_freq ==0): + p_t3 = min_mean_max(profile_tt[pp.TPS_SOLVE].snap, comm) + print("[TPS] %04d simulation time = %.4E cycle step (min) = %.4E (s) step (mean) = %.4E (s) step (max) = %.4E (s)" % (tps_idx,tt_tps, p_t3[0],p_t3[1],p_t3[2]), flush=True) + tt_tps +=dt_tps + + profile_tt[pp.TPS_PUSH].start() + tps.push(interface) + profile_tt[pp.TPS_PUSH].stop() + + tt += dt_tps * tps_idx + iter+=1 + + profile_stats(boltzmann, profile_tt, profile_nn, boltzmann.param.out_fname+"_profile.csv" , comm) + tps.solveEnd() + comm.Barrier() + return tps.getStatus() + + __main__() + if __name__=="__main__": comm = MPI.COMM_WORLD driver_w_parla(comm) From 3e180b8cbb1b7db571f1c5f2192f020ac3573705 Mon Sep 17 00:00:00 2001 From: milindasf Date: Tue, 19 Mar 2024 10:21:29 -0500 Subject: [PATCH 54/75] datetime stamp and parameters dump added to the timing output file --- src/tps-bte_0d3v.py | 48 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 48 insertions(+) diff --git a/src/tps-bte_0d3v.py b/src/tps-bte_0d3v.py index 123763d6b..fb4608ce8 100755 --- a/src/tps-bte_0d3v.py +++ b/src/tps-bte_0d3v.py @@ -14,6 +14,9 @@ import scipy.interpolate import scipy.cluster import threading +import datetime +# Use asynchronous stream ordered memory +#cp.cuda.set_allocator(cp.cuda.MemoryAsyncPool().malloc) class profile_t: def __init__(self,name): @@ -1205,11 +1208,13 @@ async def solve_async(self): @spawn(ts[grid_idx], placement=[parla_placement[grid_idx]], vcus=0.0) def t1(): try: + cp.cuda.nvtx.RangePush("bte_solve") print("rank [%d/%d] BTE launching grid %d on %s"%(rank, npes, grid_idx, parla_placement[grid_idx]), flush=True) f0 = self.bte_solver.get_boltzmann_parameter(grid_idx, "u0") ff , qoi = self.bte_solver.solve(grid_idx, f0, self.param.atol, self.param.rtol, self.param.max_iter, self.param.solver_type) self.ff[grid_idx] = ff self.qoi[grid_idx] = qoi + cp.cuda.nvtx.RangePop() except: print("rank [%d/%d] solver failed for v-space gird no %d"%(self.rankG, self.npesG, grid_idx), flush=True) sys.exit(-1) @@ -1375,6 +1380,42 @@ def asnumpy(a): return + def params_dump(self): + params_dict = dict() + params_dict["sp_order"] = self.param.sp_order + params_dict["spline_qpts"] = self.param.spline_qpts + params_dict["Nr"] = self.param.Nr + params_dict["l_max"] = self.param.l_max + params_dict["ev_max"] = self.param.ev_max + params_dict["n_grids"] = self.param.n_grids + params_dict["n_sub_clusters"] = self.param.n_sub_clusters + params_dict["dt"] = self.param.dt + params_dict["cycles"] = self.param.cycles + params_dict["solver_type"] = self.param.solver_type + params_dict["atol"] = self.param.atol + params_dict["rtol"] = self.param.rtol + params_dict["max_iter"] = self.param.max_iter + params_dict["tps_bte_max_iter"] = self.param.tps_bte_max_iter + params_dict["bte_solve_freq"] = self.param.bte_solve_freq + params_dict["ee_collisions"] = self.param.ee_collisions + params_dict["use_gpu"] = self.param.use_gpu + params_dict["dev_id"] = self.param.dev_id + params_dict["collisions"] = self.param.collisions + params_dict["export_csv"] = self.param.export_csv + params_dict["plot_data"] = self.param.plot_data + params_dict["Efreq"] = self.param.Efreq + params_dict["verbose"] = self.param.verbose + params_dict["Te"] = self.param.Te + params_dict["threads"] = self.param.threads + params_dict["grid_idx"] = self.param.grid_idx + params_dict["output_dir"] = self.param.output_dir + params_dict["out_fname"] = self.param.out_fname + params_dict["rand_seed"] = self.param.rand_seed + params_dict["use_clstr_inp"] = self.param.use_clstr_inp + + return params_dict + + class pp(enum.IntEnum): BTE_SETUP = 0 BTE_FETCH = 1 @@ -1429,8 +1470,11 @@ def profile_stats(boltzmann:Boltzmann0D2VBactchedSolver, p_tt: profile_t, p_nn, if rank ==0 : if fname!="": with open(fname, "a") as f: + f.write(datetime.datetime.now().strftime("%m/%d/%Y, %H:%M:%S")+"\n") + f.write(""+str(boltzmann.params_dump())+"\n") f.write(",".join(header)+"\n") f.write(",".join(data_str)+"\n") + f.write("---" + "\n") f.close() else: print(",".join(header)) @@ -1903,6 +1947,10 @@ def t1(): if __name__=="__main__": comm = MPI.COMM_WORLD + # print("running without parla") + # driver_wo_parla(comm) + + print("running with parla") driver_w_parla(comm) From acfcab5c335c3d2d3052b68f15e50a0d988cc7be Mon Sep 17 00:00:00 2001 From: milindasf Date: Tue, 19 Mar 2024 22:58:27 -0500 Subject: [PATCH 55/75] minor changes --- src/tps-bte_0d3v.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/src/tps-bte_0d3v.py b/src/tps-bte_0d3v.py index fb4608ce8..b00705cc6 100755 --- a/src/tps-bte_0d3v.py +++ b/src/tps-bte_0d3v.py @@ -63,7 +63,7 @@ def min_mean_max(a, comm: MPI.Comm): from bte_0d3v_batched import bte_0d3v_batched as BoltzmannSolver import utils as bte_utils -WITH_PARLA = 1 +WITH_PARLA = 0 if WITH_PARLA: try: from parla import Parla @@ -1208,15 +1208,15 @@ async def solve_async(self): @spawn(ts[grid_idx], placement=[parla_placement[grid_idx]], vcus=0.0) def t1(): try: - cp.cuda.nvtx.RangePush("bte_solve") + #cp.cuda.nvtx.RangePush("bte_solve") print("rank [%d/%d] BTE launching grid %d on %s"%(rank, npes, grid_idx, parla_placement[grid_idx]), flush=True) f0 = self.bte_solver.get_boltzmann_parameter(grid_idx, "u0") ff , qoi = self.bte_solver.solve(grid_idx, f0, self.param.atol, self.param.rtol, self.param.max_iter, self.param.solver_type) self.ff[grid_idx] = ff self.qoi[grid_idx] = qoi - cp.cuda.nvtx.RangePop() - except: - print("rank [%d/%d] solver failed for v-space gird no %d"%(self.rankG, self.npesG, grid_idx), flush=True) + #cp.cuda.nvtx.RangePop() + except Exception as e: + print("rank [%d/%d] solver failed for v-space gird no %d with error = %s"%(self.rankG, self.npesG, grid_idx, str(e)), flush=True) sys.exit(-1) await ts @@ -1947,11 +1947,11 @@ def t1(): if __name__=="__main__": comm = MPI.COMM_WORLD - # print("running without parla") - # driver_wo_parla(comm) + print("running without parla") + driver_wo_parla(comm) - print("running with parla") - driver_w_parla(comm) + # print("running with parla") + # driver_w_parla(comm) From 38b69be020d9d902843c5b8326e11538c35d520b Mon Sep 17 00:00:00 2001 From: milindasf Date: Thu, 21 Mar 2024 09:16:24 -0500 Subject: [PATCH 56/75] nprocs added to the profile file --- src/tps-bte_0d3v.py | 56 +++++++++++++++++++++++---------------------- 1 file changed, 29 insertions(+), 27 deletions(-) diff --git a/src/tps-bte_0d3v.py b/src/tps-bte_0d3v.py index b00705cc6..f00aa7dcc 100755 --- a/src/tps-bte_0d3v.py +++ b/src/tps-bte_0d3v.py @@ -121,8 +121,10 @@ class BoltzmannSolverParams(): n0 = 3.22e22 #[m^{-3}] - rand_seed = 0 - use_clstr_inp = True + rand_seed = 0 + use_clstr_inp = True + clstr_maxiter = 10 + clstr_threshold = 1e-3 class TPSINDEX(): """ @@ -525,7 +527,7 @@ def t1(): # xp = cp # m = xp.array(m_bte[gidx_to_pidx[grid_idx]]) # mw , mw_std = normalize(m, xp) - # mcw, membership_m = k_means(mw, num_clusters=self.param.n_sub_clusters, max_iter=1000, thresh=1e-8, rand_seed=self.param.rand_seed, xp=xp) + # mcw, membership_m = k_means(mw, num_clusters=self.param.n_sub_clusters, max_iter=self.param.clstr_maxiter, thresh=self.param.clstr_threshold, rand_seed=self.param.rand_seed, xp=xp) # to repoduce clusters xp = np @@ -533,7 +535,7 @@ def t1(): m = m_bte[gidx_to_pidx[grid_idx]] mw , mw_std = normalize(m, xp) mcw0 = mw[np.random.choice(mw.shape[0], self.param.n_sub_clusters, replace=False)] - mcw = scipy.cluster.vq.kmeans(mw, mcw0, iter=1000, thresh=1e-8, check_finite=False)[0] + mcw = scipy.cluster.vq.kmeans(mw, mcw0, iter=self.param.clstr_maxiter, thresh=self.param.clstr_threshold, check_finite=False)[0] mcw0[0:mcw.shape[0], :] = mcw[:,:] mcw = mcw0 dist_mat = xp.linalg.norm(mw[:, None, :] - mcw[None, : , :], axis=2) @@ -635,16 +637,16 @@ def t1(): print("ns/n0 (min) = %.12E \t ns/n0(max) = %.12E "%(xp.min(ns_by_n0[i]) , xp.max(ns_by_n0[i])), flush=True) - # if (use_gpu == 1): - # with cp.cuda.Device(dev_id): - # n0 = cp.array(n0) - # ne = cp.array(ne) - # ni = cp.array(ni) - # Ex = cp.array(Ex) - # Ey = cp.array(Ey) - # Tg = cp.array(Tg) - # EMag = cp.sqrt(Ex**2 + Ey**2) - # ns_by_n0 = cp.array(ns_by_n0) + if (use_gpu == 1): + with cp.cuda.Device(dev_id): + n0 = cp.array(n0) + ne = cp.array(ne) + ni = cp.array(ni) + Ex = cp.array(Ex) + Ey = cp.array(Ey) + Tg = cp.array(Tg) + EMag = cp.sqrt(Ex**2 + Ey**2) + ns_by_n0 = cp.array(ns_by_n0) self.bte_solver.set_boltzmann_parameter(grid_idx, "ns_by_n0", ns_by_n0) self.bte_solver.set_boltzmann_parameter(grid_idx, "n0" , n0) @@ -932,7 +934,7 @@ def t1(): # xp = cp # m = xp.array(m_bte[gidx_to_pidx[grid_idx]]) # mw , mw_std = normalize(m, xp) - # mcw, membership_m = k_means(mw, num_clusters=self.param.n_sub_clusters, max_iter=1000, thresh=1e-8, rand_seed=self.param.rand_seed, xp=xp) + # mcw, membership_m = k_means(mw, num_clusters=self.param.n_sub_clusters, max_iter=self.param.clstr_maxiter, thresh=1e-8, rand_seed=self.param.rand_seed, xp=xp) # to repoduce clusters xp = np @@ -940,7 +942,7 @@ def t1(): m = m_bte[gidx_to_pidx[grid_idx]] mw , mw_std = normalize(m, xp) mcw0 = mw[np.random.choice(mw.shape[0], self.param.n_sub_clusters, replace=False)] - mcw = scipy.cluster.vq.kmeans(mw, mcw0, iter=1000, thresh=1e-8, check_finite=False)[0] + mcw = scipy.cluster.vq.kmeans(mw, mcw0, iter=self.param.clstr_maxiter, thresh=self.param.clstr_threshold, check_finite=False)[0] mcw0[0:mcw.shape[0], :] = mcw[:,:] mcw = mcw0 dist_mat = xp.linalg.norm(mw[:, None, :] - mcw[None, : , :], axis=2) @@ -1041,16 +1043,16 @@ def t1(): print("ns/n0 (min) = %.12E \t ns/n0(max) = %.12E "%(xp.min(ns_by_n0[i]) , xp.max(ns_by_n0[i])), flush=True) - # if (use_gpu == 1): - # with cp.cuda.Device(dev_id): - # n0 = cp.array(n0) - # ne = cp.array(ne) - # ni = cp.array(ni) - # Ex = cp.array(Ex) - # Ey = cp.array(Ey) - # Tg = cp.array(Tg) - # EMag = cp.sqrt(Ex**2 + Ey**2) - # ns_by_n0 = cp.array(ns_by_n0) + if (use_gpu == 1): + with cp.cuda.Device(dev_id): + n0 = cp.array(n0) + ne = cp.array(ne) + ni = cp.array(ni) + Ex = cp.array(Ex) + Ey = cp.array(Ey) + Tg = cp.array(Tg) + EMag = cp.sqrt(Ex**2 + Ey**2) + ns_by_n0 = cp.array(ns_by_n0) self.bte_solver.set_boltzmann_parameter(grid_idx, "ns_by_n0", ns_by_n0) self.bte_solver.set_boltzmann_parameter(grid_idx, "n0" , n0) @@ -1470,7 +1472,7 @@ def profile_stats(boltzmann:Boltzmann0D2VBactchedSolver, p_tt: profile_t, p_nn, if rank ==0 : if fname!="": with open(fname, "a") as f: - f.write(datetime.datetime.now().strftime("%m/%d/%Y, %H:%M:%S")+"\n") + f.write("nprocs: %d timestamp %s \n"%(npes, datetime.datetime.now().strftime("%m/%d/%Y, %H:%M:%S"))) f.write(""+str(boltzmann.params_dump())+"\n") f.write(",".join(header)+"\n") f.write(",".join(data_str)+"\n") From ba72be043e80134763c4dfd6572658fa532ac015 Mon Sep 17 00:00:00 2001 From: milindasf Date: Thu, 21 Mar 2024 12:30:48 -0500 Subject: [PATCH 57/75] bte fetch when cluster samples < total tps points we need to do sampling with replacement --- src/tps-bte_0d3v.py | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/src/tps-bte_0d3v.py b/src/tps-bte_0d3v.py index f00aa7dcc..d6c596a9f 100755 --- a/src/tps-bte_0d3v.py +++ b/src/tps-bte_0d3v.py @@ -534,7 +534,12 @@ def t1(): np.random.seed(self.param.rand_seed) m = m_bte[gidx_to_pidx[grid_idx]] mw , mw_std = normalize(m, xp) - mcw0 = mw[np.random.choice(mw.shape[0], self.param.n_sub_clusters, replace=False)] + + if mw.shape[0] >= self.param.n_sub_clusters: + mcw0 = mw[np.random.choice(mw.shape[0], self.param.n_sub_clusters, replace=False)] + else: + mcw0 = mw[np.random.choice(mw.shape[0], self.param.n_sub_clusters, replace=True)] + mcw = scipy.cluster.vq.kmeans(mw, mcw0, iter=self.param.clstr_maxiter, thresh=self.param.clstr_threshold, check_finite=False)[0] mcw0[0:mcw.shape[0], :] = mcw[:,:] mcw = mcw0 @@ -941,7 +946,12 @@ def t1(): np.random.seed(self.param.rand_seed) m = m_bte[gidx_to_pidx[grid_idx]] mw , mw_std = normalize(m, xp) - mcw0 = mw[np.random.choice(mw.shape[0], self.param.n_sub_clusters, replace=False)] + + if mw.shape[0] >= self.param.n_sub_clusters: + mcw0 = mw[np.random.choice(mw.shape[0], self.param.n_sub_clusters, replace=False)] + else: + mcw0 = mw[np.random.choice(mw.shape[0], self.param.n_sub_clusters, replace=True)] + mcw = scipy.cluster.vq.kmeans(mw, mcw0, iter=self.param.clstr_maxiter, thresh=self.param.clstr_threshold, check_finite=False)[0] mcw0[0:mcw.shape[0], :] = mcw[:,:] mcw = mcw0 From 94d3b9a30a08774918252b7dda179fa7e33e1341 Mon Sep 17 00:00:00 2001 From: "Todd A. Oliver" Date: Wed, 20 Mar 2024 21:23:36 -0500 Subject: [PATCH 58/75] Add logic to read distance fcn from restart if requested --- src/M2ulPhyS.cpp | 15 ++++++++++++++- src/run_configuration.hpp | 1 + 2 files changed, 15 insertions(+), 1 deletion(-) diff --git a/src/M2ulPhyS.cpp b/src/M2ulPhyS.cpp index 99c56f144..c67746904 100644 --- a/src/M2ulPhyS.cpp +++ b/src/M2ulPhyS.cpp @@ -405,7 +405,14 @@ void M2ulPhyS::initVariables() { serial_mesh->GetNodes(coordinates); // Evaluate the distance function - evaluateDistanceSerial(*serial_mesh, wall_patch_list, coordinates, *serial_distance); + if (!config.read_distance || !config.GetRestartCycle()) { + if (rank0_) grvy_printf(ginfo, "Computing distance function\n"); + evaluateDistanceSerial(*serial_mesh, wall_patch_list, coordinates, *serial_distance); + } else { + // If distance function is read from restart, this will be overwritten later + if (rank0_) grvy_printf(ginfo, "Distance function to be read from restart\n"); + *serial_distance = 0.0; + } delete tmp_dfes; } @@ -617,6 +624,11 @@ void M2ulPhyS::initVariables() { #endif initSolutionAndVisualizationVectors(); + if (distance_ != NULL) { + ioData.registerIOFamily("Distance function", "/distance", distance_, false, config.read_distance); + ioData.registerIOVar("/distance", "distance", 0, config.read_distance); + } + average = new Averaging(Up, mesh, fec, fes, dfes, vfes, eqSystem, d_mixture, num_equation, dim, config, groupsMPI); average->read_meanANDrms_restart_files(); @@ -2657,6 +2669,7 @@ void M2ulPhyS::parseFlowOptions() { } tpsP->getInput("flow/refinement_levels", config.ref_levels, 0); tpsP->getInput("flow/computeDistance", config.compute_distance, false); + tpsP->getInput("flow/readDistance", config.read_distance, false); std::string type; tpsP->getInput("flow/sgsModel", type, std::string("none")); diff --git a/src/run_configuration.hpp b/src/run_configuration.hpp index 6cd5e0e5f..1a6aa00c5 100644 --- a/src/run_configuration.hpp +++ b/src/run_configuration.hpp @@ -286,6 +286,7 @@ class RunConfiguration { PostProcessInput postprocessInput; bool compute_distance; + bool read_distance; RunConfiguration(); ~RunConfiguration(); From 9611699dad4c37e17bd11343cda74a682870165e Mon Sep 17 00:00:00 2001 From: milindasf Date: Thu, 21 Mar 2024 16:20:56 -0500 Subject: [PATCH 59/75] nvtx tags added for ncu profiling --- src/tps.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/src/tps.py b/src/tps.py index 46b69aa14..e9281a23b 100755 --- a/src/tps.py +++ b/src/tps.py @@ -1,7 +1,7 @@ #!/usr/bin/env python3 import sys import os - +import cupy as cp from mpi4py import MPI # set path to C++ TPS library @@ -18,6 +18,12 @@ tps.chooseDevices() tps.chooseSolver() tps.initialize() -tps.solve() +tps.solveStep() + +#cp.profiler.start() +cp.cuda.nvtx.RangePush("tpsStep") +tps.solveStep() +cp.cuda.nvtx.RangePop() +#tps.solve() sys.exit (tps.getStatus()) From 1a3f5b45597ff521a871227019e22a031f79b9f0 Mon Sep 17 00:00:00 2001 From: Umberto Villa Date: Fri, 22 Mar 2024 14:56:01 -0500 Subject: [PATCH 60/75] Bugfix in test/vpath.sh so that tps-bte_0d3v.py is now also dynamically linked --- test/vpath.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/vpath.sh b/test/vpath.sh index 26d070bc1..af1e5eead 100755 --- a/test/vpath.sh +++ b/test/vpath.sh @@ -31,7 +31,7 @@ fi # necessary binaries binaries="bats die.sh soln_differ count_gpus.sh sniff_mpirun.sh " -binaries+="../src/tps.py ../src/tps-time-loop.py ../cdsrc/tps-bte_0d3v.py ../test/test_tps_splitcomm.py" +binaries+="../src/tps.py ../src/tps-time-loop.py ../src/tps-bte_0d3v.py ../test/test_tps_splitcomm.py" for binary in $binaries; do if [ ! -x $binary ];then if [ -x $testDir/$binary ];then From 239a90442ae8ca59fda168ea3a18293b6b9395fd Mon Sep 17 00:00:00 2001 From: "Todd A. Oliver" Date: Sat, 23 Mar 2024 14:43:55 -0500 Subject: [PATCH 61/75] Exchange face neighbor data before we compute primitives After restart, make sure all data valid before using it. I don't think this could ever cause an incorrect result for a time step b/c the info should be exchanged prior to all calculations that influence the step. But, it can lead to (and has recently led to) to failing asserts in the primitive variable calculations. --- src/M2ulPhyS.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/M2ulPhyS.cpp b/src/M2ulPhyS.cpp index c67746904..73febee17 100644 --- a/src/M2ulPhyS.cpp +++ b/src/M2ulPhyS.cpp @@ -1942,6 +1942,10 @@ void M2ulPhyS::projectInitialSolution() { initGradUp(); + // Exchange before computing primitives + U->ParFESpace()->ExchangeFaceNbrData(); + U->ExchangeFaceNbrData(); + updatePrimitives(); // update pressure grid function From f3b5a0ed257f7cbf50d0db9cc5566b9c72d6b6ad Mon Sep 17 00:00:00 2001 From: "Todd A. Oliver" Date: Sat, 23 Mar 2024 14:49:00 -0500 Subject: [PATCH 62/75] Bug fix: move mesh->ExchangeFaceNbrNodes() and mesh->ExchangeFaceNbrData() out of conditional These are necessary in the boundary data section of M2ulPhyS::initIndirectionArrays(). However, previously they were called inside of the conditional if (NumBCelems > 0), which leads to a problem there are some mpi ranks that have no boundary elements. --- src/M2ulPhyS.cpp | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/M2ulPhyS.cpp b/src/M2ulPhyS.cpp index 73febee17..f9d48b304 100644 --- a/src/M2ulPhyS.cpp +++ b/src/M2ulPhyS.cpp @@ -1077,6 +1077,12 @@ void M2ulPhyS::initIndirectionArrays() { // See #199 for more info. const int NumBCelems = fes->GetNBE(); + // NB: *Must* call this here, as otherwise some faces are + // erroneously included as boundary faces and asserts below may + // fail + mesh->ExchangeFaceNbrNodes(); + mesh->ExchangeFaceNbrData(); + if (NumBCelems > 0) { bdry_face_data.shape.UseDevice(true); bdry_face_data.shape.SetSize(NumBCelems * maxIntPoints * maxDofs); @@ -1120,12 +1126,6 @@ void M2ulPhyS::initIndirectionArrays() { FaceElementTransformations *tr; // Mesh *mesh = fes->GetMesh(); - // NB: *Must* call this here, as otherwise some faces are - // erroneously included as boundary faces and asserts below may - // fail - mesh->ExchangeFaceNbrNodes(); - mesh->ExchangeFaceNbrData(); - std::vector uniqueElems; uniqueElems.clear(); From d8df437f115ad4b1c8b8543133d92c31aa7da371 Mon Sep 17 00:00:00 2001 From: "Todd A. Oliver" Date: Sat, 23 Mar 2024 14:52:20 -0500 Subject: [PATCH 63/75] Fix U->HostWrite that should be U->HostRead in M2ulPhyS::updatePrimitives --- src/M2ulPhyS.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/M2ulPhyS.cpp b/src/M2ulPhyS.cpp index f9d48b304..78d411d2b 100644 --- a/src/M2ulPhyS.cpp +++ b/src/M2ulPhyS.cpp @@ -4019,8 +4019,7 @@ void M2ulPhyS::checkSolverOptions() const { } void M2ulPhyS::updatePrimitives() { - // U.V.: should this be U->HostRead() instead? U->HostWrite() does not sync memory before returning the pointer. - double *data = U->HostWrite(); + const double *data = U->HostRead(); double *dataUp = Up->HostWrite(); int dof = vfes->GetNDofs(); From b3e801cc72953e0723b1a9bbb37eca12f9dbdf55 Mon Sep 17 00:00:00 2001 From: "Todd A. Oliver" Date: Sat, 23 Mar 2024 17:06:53 -0500 Subject: [PATCH 64/75] Fix uninitialized species in outlet BC ctor Can cause problems when mixture->GetConservativesFromPrimitives(iUp, iState) gets called below. --- src/outletBC.cpp | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/outletBC.cpp b/src/outletBC.cpp index 705a33598..e7d50ea39 100644 --- a/src/outletBC.cpp +++ b/src/outletBC.cpp @@ -74,6 +74,12 @@ OutletBC::OutletBC(MPI_Groups *_groupsMPI, Equations _eqSystem, RiemannSolver *_ hmeanUp[1 + nvel_] = 300.0; // 101300; if (eqSystem == NS_PASSIVE) hmeanUp[num_equation_ - 1] = 0.; + if (mixture->GetNumActiveSpecies() > 0) { + for (int sp = 0; sp < mixture->GetNumActiveSpecies() > 0; sp++) { + hmeanUp[nvel_ + 2 + sp] = 0.0; + } + } + area_ = 0.; parallelAreaComputed = false; From d772a0248577f8d2d36b64b0817b99ce6f934988 Mon Sep 17 00:00:00 2001 From: milindasf Date: Sun, 24 Mar 2024 18:04:38 -0500 Subject: [PATCH 65/75] thesholding E to avoid E=0 case for the steady-state solver, because when E=0 the steady-state solution would be the delta function --- src/tps-bte_0d3v.py | 22 ++++++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) diff --git a/src/tps-bte_0d3v.py b/src/tps-bte_0d3v.py index d6c596a9f..f6d1e4e13 100755 --- a/src/tps-bte_0d3v.py +++ b/src/tps-bte_0d3v.py @@ -126,6 +126,8 @@ class BoltzmannSolverParams(): clstr_maxiter = 10 clstr_threshold = 1e-3 + EMag_threshold = 1e-10 + class TPSINDEX(): """ simple index map to differnt fields, from the TPS arrays @@ -493,10 +495,16 @@ def fetch(self, interface): Ex = efield[0] Ey = efield[1] - + + EMag = np.sqrt(Ex**2 + Ey**2) + e_idx = EMag Date: Mon, 25 Mar 2024 09:25:10 -0700 Subject: [PATCH 66/75] Restore tps.py and add tps-ntvx-profile.py --- src/tps-ntvx-profile.py | 29 +++++++++++++++++++++++++++++ src/tps.py | 8 +------- 2 files changed, 30 insertions(+), 7 deletions(-) create mode 100755 src/tps-ntvx-profile.py diff --git a/src/tps-ntvx-profile.py b/src/tps-ntvx-profile.py new file mode 100755 index 000000000..e9281a23b --- /dev/null +++ b/src/tps-ntvx-profile.py @@ -0,0 +1,29 @@ +#!/usr/bin/env python3 +import sys +import os +import cupy as cp +from mpi4py import MPI + +# set path to C++ TPS library +path = os.path.abspath(os.path.dirname(sys.argv[0])) +sys.path.append(path + "/.libs") +import libtps as tps + +comm = MPI.COMM_WORLD +# TPS solver +tps = tps.Tps(comm) + +tps.parseCommandLineArgs(sys.argv) +tps.parseInput() +tps.chooseDevices() +tps.chooseSolver() +tps.initialize() +tps.solveStep() + +#cp.profiler.start() +cp.cuda.nvtx.RangePush("tpsStep") +tps.solveStep() +cp.cuda.nvtx.RangePop() +#tps.solve() + +sys.exit (tps.getStatus()) diff --git a/src/tps.py b/src/tps.py index e9281a23b..c92f658ef 100755 --- a/src/tps.py +++ b/src/tps.py @@ -1,7 +1,6 @@ #!/usr/bin/env python3 import sys import os -import cupy as cp from mpi4py import MPI # set path to C++ TPS library @@ -19,11 +18,6 @@ tps.chooseSolver() tps.initialize() tps.solveStep() - -#cp.profiler.start() -cp.cuda.nvtx.RangePush("tpsStep") -tps.solveStep() -cp.cuda.nvtx.RangePop() -#tps.solve() +tps.solve() sys.exit (tps.getStatus()) From f7b63eec8d88a4a58afeaa37fdb63fd204c8c874 Mon Sep 17 00:00:00 2001 From: Umberto Villa Date: Mon, 25 Mar 2024 11:38:44 -0500 Subject: [PATCH 67/75] Update gpu_constructor.cpp spelling --- src/gpu_constructor.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/gpu_constructor.cpp b/src/gpu_constructor.cpp index 6dbb5de1e..bf58b88a4 100644 --- a/src/gpu_constructor.cpp +++ b/src/gpu_constructor.cpp @@ -122,7 +122,7 @@ __global__ void freeDeviceRadiation(Radiation *radiation) { } //--------------------------------------------------- -// And finally devise setters +// And finally device setters //--------------------------------------------------- __global__ void deviceSetGridFunctionReactionData(const double * data, int size, GridFunctionReaction * reaction) { reaction->setData(data, size); From aafa6da3bbf7a85f4e153708272d20a2d61c70c0 Mon Sep 17 00:00:00 2001 From: Umberto Emanuele Villa Date: Mon, 25 Mar 2024 11:02:57 -0700 Subject: [PATCH 68/75] enable mpirun --- test/mms.euler.test | 24 ++++++++++++++++++++++-- test/mms.ternary_2d.test | 13 ++++++++++++- test/mms.ternary_2d_inout.test | 15 ++++++++++++++- test/mms.ternary_2d_wall.test | 4 +++- 4 files changed, 51 insertions(+), 5 deletions(-) diff --git a/test/mms.euler.test b/test/mms.euler.test index 4e0ec5a08..dd5e6bd6f 100755 --- a/test/mms.euler.test +++ b/test/mms.euler.test @@ -5,17 +5,31 @@ TEST="mms/euler" RUNFILE_1="inputs/mms.euler.3d.r1.ini" RUNFILE_2="inputs/mms.euler.3d.r2.ini" +setup() { + NUM_GPUS=`./count_gpus.sh` + MPIRUN=`./sniff_mpirun.sh` +} + @test "[$TEST] run tps with input -> $RUNFILE_1" { - mpirun -np 2 ../src/tps --runFile $RUNFILE_1 >& euler_mms_r1.log + [ $NUM_GPUS -ge 2 ] || skip "Two GPUs not available" + [ "x$MPIRUN" != "x" ] || skip "Cannot launch parallel job" + + $MPIRUN -n 2 ../src/tps --runFile $RUNFILE_1 >& euler_mms_r1.log } @test "[$TEST] run tps with input -> $RUNFILE_2" { + [ $NUM_GPUS -ge 2 ] || skip "Two GPUs not available" + [ "x$MPIRUN" != "x" ] || skip "Cannot launch parallel job" + rm -f $SOLN_FILE touch DIE - mpirun -np 2 ../src/tps --runFile $RUNFILE_2 >& euler_mms_r2.log + $MPIRUN -n 2 ../src/tps --runFile $RUNFILE_2 >& euler_mms_r2.log } @test "[$TEST] verify tps density convergence rate for Euler MMS with linear elems" { + [ $NUM_GPUS -ge 2 ] || skip "Two GPUs not available" + [ "x$MPIRUN" != "x" ] || skip "Cannot launch parallel job" + rho_err_r1=$(cat euler_mms_r1.log | grep "time step: 300" | head -1 | awk '{print $9}') rho_err_r2=$(cat euler_mms_r2.log | grep "time step: 600" | head -1 | awk '{print $9}') @@ -35,6 +49,9 @@ RUNFILE_2="inputs/mms.euler.3d.r2.ini" } @test "[$TEST] verify tps velocity convergence rate for Euler MMS with linear elems" { + [ $NUM_GPUS -ge 2 ] || skip "Two GPUs not available" + [ "x$MPIRUN" != "x" ] || skip "Cannot launch parallel job" + vel_err_r1=$(cat euler_mms_r1.log | grep "time step: 300" | head -1 | awk '{print $11}') vel_err_r2=$(cat euler_mms_r2.log | grep "time step: 600" | head -1 | awk '{print $11}') @@ -54,6 +71,9 @@ RUNFILE_2="inputs/mms.euler.3d.r2.ini" } @test "[$TEST] verify tps pressure convergence rate for Euler MMS with linear elems" { + [ $NUM_GPUS -ge 2 ] || skip "Two GPUs not available" + [ "x$MPIRUN" != "x" ] || skip "Cannot launch parallel job" + pre_err_r1=$(cat euler_mms_r1.log | grep "time step: 300" | head -1 | awk '{print $13}') pre_err_r2=$(cat euler_mms_r2.log | grep "time step: 600" | head -1 | awk '{print $13}') diff --git a/test/mms.ternary_2d.test b/test/mms.ternary_2d.test index c2654e2bd..e14120010 100755 --- a/test/mms.ternary_2d.test +++ b/test/mms.ternary_2d.test @@ -8,6 +8,9 @@ setup() { SOLN_FILE=restart_argon_output.sol.h5 MESH_FILE=beam-quad-o3-s1-r1-p.mesh TOL=2e-4 + + NUM_GPUS=`./count_gpus.sh` + MPIRUN=`./sniff_mpirun.sh` } @test "[$TEST] check for input file $RUNFILE" { @@ -21,10 +24,18 @@ setup() { } @test "[$TEST] run tps with input -> $RUNFILE" { - mpirun -np 2 ../src/tps --runFile $RUNFILE >& plasma_ternary_mms.log + + [ $NUM_GPUS -ge 2 ] || skip "Two GPUs not available" + [ "x$MPIRUN" != "x" ] || skip "Cannot launch parallel job" + + $MPIRUN -n 2 ../src/tps --runFile $RUNFILE >& plasma_ternary_mms.log } @test "[$TEST] check if the relative error is similar to the reported value" { + + [ $NUM_GPUS -ge 2 ] || skip "Two GPUs not available" + [ "x$MPIRUN" != "x" ] || skip "Cannot launch parallel job" + while IFS=$'\t' read -r nx e0 e1 e2 e3 e4 e5; do test $nx -eq 100 diff --git a/test/mms.ternary_2d_inout.test b/test/mms.ternary_2d_inout.test index 361cf354a..fce5de441 100755 --- a/test/mms.ternary_2d_inout.test +++ b/test/mms.ternary_2d_inout.test @@ -7,6 +7,13 @@ RUNFILE="inputs/mms.ternary_plasma.2d.inout.ini" setup() { SOLN_FILE=restart_argon_output.sol.h5 MESH_FILE=beam-quad-o3-s1-r1-yp.mesh + + SKIP="ASPEED" + NUM_GPUS=`./count_gpus.sh` + MPIRUN=`./sniff_mpirun.sh` + + echo Number of GPUS: $NUM_GPUS + echo mpirun: $MPIRUN } @test "[$TEST] check for input file $RUNFILE" { @@ -20,10 +27,16 @@ setup() { } @test "[$TEST] run tps with input -> $RUNFILE" { - mpirun -np 2 ../src/tps --runFile $RUNFILE >& plasma_ternary_mms.inout.log + + [ "x$MPIRUN" != "x" ] || skip "Cannot launch parallel job" + + $MPIRUN -n 2 ../src/tps --runFile $RUNFILE >& plasma_ternary_mms.inout.log } @test "[$TEST] check if the relative error is similar to the reported value" { + + [ "x$MPIRUN" != "x" ] || skip "Cannot launch parallel job" + while IFS=$'\t' read -r nx e0 e1 e2 e3 e4 e5; do test $nx -eq 100 diff --git a/test/mms.ternary_2d_wall.test b/test/mms.ternary_2d_wall.test index e71c962c1..8902fbe6a 100755 --- a/test/mms.ternary_2d_wall.test +++ b/test/mms.ternary_2d_wall.test @@ -7,6 +7,8 @@ RUNFILE="inputs/mms.ternary_plasma.2d.wall.ini" setup() { SOLN_FILE=restart_argon_output.sol.h5 MESH_FILE=beam-quad-o3-s1-r1-xp.mesh + + MPIRUN=`./sniff_mpirun.sh` } @test "[$TEST] check for input file $RUNFILE" { @@ -20,7 +22,7 @@ setup() { } @test "[$TEST] run tps with input -> $RUNFILE" { - mpirun -np 2 ../src/tps --runFile $RUNFILE >& plasma_ternary_mms.wall.log + $MPIRUN -n 2 ../src/tps --runFile $RUNFILE >& plasma_ternary_mms.wall.log } @test "[$TEST] check if the relative error is similar to the reported value" { From e45aaf3dab2934d23dcf96470a9846d079b06829 Mon Sep 17 00:00:00 2001 From: Umberto Emanuele Villa Date: Mon, 25 Mar 2024 11:56:19 -0700 Subject: [PATCH 69/75] Add a command line parameter for GPU-aware MPI --- src/tps.cpp | 24 ++++++++++++++++++++---- src/tps.hpp | 1 + 2 files changed, 21 insertions(+), 4 deletions(-) diff --git a/src/tps.cpp b/src/tps.cpp index 35bdcff95..8691dfd92 100644 --- a/src/tps.cpp +++ b/src/tps.cpp @@ -138,7 +138,9 @@ void Tps::parseCommandLineArgs(int argc, char *argv[]) { args.AddOption(&debugMode, "-d", "--debug", "", "--no-debug", "Launch in debug mode for gdb attach."); args.AddOption(&visualMode, "-visual", "--visualization", "", "--no-visualization", "Launch post-process visualization."); - + gpu_aware_mpi_=false; + args.AddOption(&gpu_aware_mpi_, "-ga", "--gpu-aware-mpi", "", "--no-gpu-aware-mpi", + "Set GPU-aware MPI."); args.Parse(); if (!args.Good()) { @@ -193,11 +195,25 @@ void Tps::chooseDevices() { int mpi_gpu_aware = 0; // false; #if _CUDA_ && defined(MPIX_CUDA_AWARE_SUPPORT) - // check for cuda-aware mpi (if possible) + // check for cuda-aware mpi (if possible) and overwrite flag if needed + // Trust the command line flag if MPIX_Query_cuda_support is not available mpi_gpu_aware = MPIX_Query_cuda_support(); + + if (mpi_gpu_aware == 1 && gpu_aware_mpi_ == false) { + if (isRank0_) { + grvy_printf(GRVY_WARNING, "Cuda-aware MPI detected, but flag is false") + } + gpu_aware_mpi_=true; + } else if (mpi_gpu_aware == 0 && gpu_aware_mpi_ == true) { + if (isRank0_) { + grvy_printf(GRVY_WARNING, "No cuda-aware MPI detected, but flag is true") + } + gpu_aware_mpi_=false; + } + #endif - device_.SetGPUAwareMPI(mpi_gpu_aware); + device_.SetGPUAwareMPI(gpu_aware_mpi_); #endif if (isRank0_) { @@ -209,7 +225,7 @@ void Tps::chooseDevices() { #ifdef _GPU_ if (isRank0_) { - if (mpi_gpu_aware) { + if (gpu_aware_mpi_) { grvy_printf(GRVY_INFO, "\nTPS is using GPU-aware MPI.\n"); } else { grvy_printf(GRVY_INFO, "\nTPS is using non-GPU-aware MPI.\n"); diff --git a/src/tps.hpp b/src/tps.hpp index 544c21fe8..9b40ff6aa 100644 --- a/src/tps.hpp +++ b/src/tps.hpp @@ -81,6 +81,7 @@ class Tps { std::string iFile_; // name of runtime input file (new ini format) std::string input_solver_type_; // choice of desired solver int numGpusPerRank_; // number of GPUs to use per MPI rank + bool gpu_aware_mpi_; // whether MPI is gpu-aware (default: false) // execution device controls std::string deviceConfig_; From c3c0e6c0c42e79f9e63890e5fd95f08a4f4fb46d Mon Sep 17 00:00:00 2001 From: "Todd A. Oliver" Date: Thu, 28 Mar 2024 19:41:06 -0700 Subject: [PATCH 70/75] Ensure SpongeZone::sigma is properly initialized In the SpongeZone ctor we have sigma = new ParGridFunction(&fes); *sigma = 0.; double *hSigma = sigma->HostWrite(); and the subsequent loop (prior to this commit) only set some entries in hSigma. On systems where a device is available, this process can lead to uninitialized values in the sigma field, b/c *sigma = 0 initializes the device memory and `sigma->HostWrite` immediately invalidates that (i.e., no copy is done) but then hSigma isn't fully initialized. In this commit, we eliminate the *sigma = 0. initialization in favor of setting all entries within the loop. --- src/forcing_terms.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/forcing_terms.cpp b/src/forcing_terms.cpp index 3bd2da71a..c7f1603c7 100644 --- a/src/forcing_terms.cpp +++ b/src/forcing_terms.cpp @@ -540,7 +540,6 @@ SpongeZone::SpongeZone(const int &_dim, const int &_num_equation, const int &_or ParFiniteElementSpace fes(mesh, fec); sigma = new ParGridFunction(&fes); - *sigma = 0.; double *hSigma = sigma->HostWrite(); ParGridFunction coords(&dfes); @@ -560,6 +559,7 @@ SpongeZone::SpongeZone(const int &_dim, const int &_num_equation, const int &_or Vector Xn(dim); for (int d = 0; d < dim; d++) Xn[d] = coords[n + d * ndofs]; + hSigma[n] = 0.0; if (szData.szType == SpongeZoneType::PLANAR) { // distance to the mix-out plane double distInit = 0.; From 64e0d1e791cf07fdde77203df2b365636d28075a Mon Sep 17 00:00:00 2001 From: milindasf Date: Tue, 9 Apr 2024 13:09:46 -0500 Subject: [PATCH 71/75] Ex, Ey component output for csv files. --- src/tps-bte_0d3v.py | 45 ++++++++++++++++++++++++++++++++++----------- 1 file changed, 34 insertions(+), 11 deletions(-) diff --git a/src/tps-bte_0d3v.py b/src/tps-bte_0d3v.py index f6d1e4e13..bc1976955 100755 --- a/src/tps-bte_0d3v.py +++ b/src/tps-bte_0d3v.py @@ -122,7 +122,7 @@ class BoltzmannSolverParams(): n0 = 3.22e22 #[m^{-3}] rand_seed = 0 - use_clstr_inp = True + use_clstr_inp = False clstr_maxiter = 10 clstr_threshold = 1e-3 @@ -344,7 +344,7 @@ def grid_setup(self, interface): # active_grid_idx.append(grid_idx) # self.active_grid_idx = active_grid_idx #[i for i in range(self.param.n_grids)] - self.active_grid_idx = [i for i in range(self.param.n_grids)] + self.active_grid_idx = [2,3]#[i for i in range(self.param.n_grids)] self.sub_clusters_run = False return @@ -496,6 +496,29 @@ def fetch(self, interface): Ex = efield[0] Ey = efield[1] + ne = species_densities[TPSINDEX.ELE_IDX] + coll_list = self.bte_solver.get_collision_list() + coll_names = self.bte_solver.get_collision_names() + cs_data = self.bte_solver.get_cross_section_data() + + cs_species = list() + for col_idx, (k,v) in enumerate(cs_data.items()): + cs_species.append(v["species"]) + + cs_species = list(sorted(set(cs_species), key=cs_species.index)) + data_csv = np.concatenate([(Ex).reshape((-1, 1)), + (Ey).reshape((-1, 1)), + (Tg).reshape((-1, 1)), + (ne/n0).reshape((-1, 1))] + [ns_by_n0[i].reshape((-1, 1)) for i in range(ns_by_n0.shape[0])] + [n0.reshape(-1, 1)], axis=1) + + for grid_idx in self.active_grid_idx: + with open("%s/%s.csv"%(self.param.output_dir, "tps_fetch_grid_%02d_rank_%02d_npes_%02d"%(grid_idx, self.rankG, self.npesG)), 'w', encoding='UTF8') as f: + writer = csv.writer(f,delimiter=',') + # write the header + header = ["eRe", "eIm", "Tg", "ne/n0"] + ["(%s)/n0"%(s) for s in cs_species] + ["n0"] + writer.writerow(header) + writer.writerows(data_csv[gidx_to_pidx[grid_idx]]) + EMag = np.sqrt(Ex**2 + Ey**2) e_idx = EMag Date: Fri, 12 Apr 2024 10:30:35 -0500 Subject: [PATCH 72/75] Update outletBC.cpp --- src/outletBC.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/outletBC.cpp b/src/outletBC.cpp index 47299cb65..26293c996 100644 --- a/src/outletBC.cpp +++ b/src/outletBC.cpp @@ -75,7 +75,7 @@ OutletBC::OutletBC(MPI_Groups *_groupsMPI, Equations _eqSystem, RiemannSolver *_ if (eqSystem == NS_PASSIVE) hmeanUp[num_equation_ - 1] = 0.; if (mixture->GetNumActiveSpecies() > 0) { - for (int sp = 0; sp < mixture->GetNumActiveSpecies() > 0; sp++) { + for (int sp = 0; sp < mixture->GetNumActiveSpecies(); sp++) { hmeanUp[nvel_ + 2 + sp] = 0.0; } } From dc6e8dcf245447df082dd1fdf55988f164413b71 Mon Sep 17 00:00:00 2001 From: Umberto Villa Date: Fri, 12 Apr 2024 10:36:12 -0500 Subject: [PATCH 73/75] Update tps.cpp Formatting --- src/tps.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/tps.cpp b/src/tps.cpp index 5f5ab7c9d..1db059897 100644 --- a/src/tps.cpp +++ b/src/tps.cpp @@ -139,9 +139,9 @@ void Tps::parseCommandLineArgs(int argc, char *argv[]) { args.AddOption(&debugMode, "-d", "--debug", "", "--no-debug", "Launch in debug mode for gdb attach."); args.AddOption(&visualMode, "-visual", "--visualization", "", "--no-visualization", "Launch post-process visualization."); - gpu_aware_mpi_=false; + gpu_aware_mpi_ = false; args.AddOption(&gpu_aware_mpi_, "-ga", "--gpu-aware-mpi", "", "--no-gpu-aware-mpi", - "Set GPU-aware MPI."); + "Set GPU-aware MPI."); args.Parse(); if (!args.Good()) { @@ -204,12 +204,12 @@ void Tps::chooseDevices() { if (isRank0_) { grvy_printf(GRVY_WARNING, "Cuda-aware MPI detected, but flag is false") } - gpu_aware_mpi_=true; + gpu_aware_mpi_ = true; } else if (mpi_gpu_aware == 0 && gpu_aware_mpi_ == true) { if (isRank0_) { grvy_printf(GRVY_WARNING, "No cuda-aware MPI detected, but flag is true") } - gpu_aware_mpi_=false; + gpu_aware_mpi_ = false; } #endif From 1030494dfd468b2e73cfe69e85dbe4c573a41335 Mon Sep 17 00:00:00 2001 From: Umberto Villa Date: Fri, 12 Apr 2024 10:51:05 -0500 Subject: [PATCH 74/75] Update tps.cpp --- src/tps.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/tps.cpp b/src/tps.cpp index 1db059897..81636fc4a 100644 --- a/src/tps.cpp +++ b/src/tps.cpp @@ -194,11 +194,10 @@ void Tps::chooseDevices() { #ifdef _GPU_ device_.Configure(deviceConfig_, rank_ % numGpusPerRank_); - int mpi_gpu_aware = 0; // false; #if _CUDA_ && defined(MPIX_CUDA_AWARE_SUPPORT) // check for cuda-aware mpi (if possible) and overwrite flag if needed // Trust the command line flag if MPIX_Query_cuda_support is not available - mpi_gpu_aware = MPIX_Query_cuda_support(); + int mpi_gpu_aware = MPIX_Query_cuda_support(); if (mpi_gpu_aware == 1 && gpu_aware_mpi_ == false) { if (isRank0_) { From 75938601cfeefcc69145ec77a4de06379eb6eb63 Mon Sep 17 00:00:00 2001 From: Umberto Villa Date: Fri, 12 Apr 2024 11:59:20 -0500 Subject: [PATCH 75/75] Update tps.cpp Addresses compilation error on Lassen --- src/tps.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/tps.cpp b/src/tps.cpp index 81636fc4a..b82b7caf3 100644 --- a/src/tps.cpp +++ b/src/tps.cpp @@ -201,12 +201,12 @@ void Tps::chooseDevices() { if (mpi_gpu_aware == 1 && gpu_aware_mpi_ == false) { if (isRank0_) { - grvy_printf(GRVY_WARNING, "Cuda-aware MPI detected, but flag is false") + grvy_printf(GRVY_WARN, "Cuda-aware MPI detected, but flag is false"); } gpu_aware_mpi_ = true; } else if (mpi_gpu_aware == 0 && gpu_aware_mpi_ == true) { if (isRank0_) { - grvy_printf(GRVY_WARNING, "No cuda-aware MPI detected, but flag is true") + grvy_printf(GRVY_WARN, "No cuda-aware MPI detected, but flag is true"); } gpu_aware_mpi_ = false; }