Skip to content

Commit

Permalink
Merge pull request #95 from GEOS-ESM/feature/sshakoor/slurmify
Browse files Browse the repository at this point in the history
remap_restarts uses SLURM for lake/landice/saltwater (Resolution for issue #85)
  • Loading branch information
sdrabenh authored Sep 13, 2024
2 parents 31a14df + 7fbbd82 commit 7820b59
Showing 1 changed file with 216 additions and 45 deletions.
261 changes: 216 additions & 45 deletions pre/remap_restart/remap_lake_landice_saltwater.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
import ruamel.yaml
import shlex
from remap_base import remap_base
from remap_utils import get_label
from remap_utils import *
from remap_utils import get_geomdir
from remap_bin2nc import bin2nc

Expand Down Expand Up @@ -47,13 +47,9 @@ def remap(self):
ogrid = config['input']['shared']['ogrid']
omodel = config['input']['shared']['omodel']
stretch = config['input']['shared']['stretch']

in_geomdir = get_geomdir(in_bc_base, in_bc_version, agrid=agrid, ogrid=ogrid, omodel=omodel, stretch=stretch)
in_tile_file = glob.glob(in_geomdir+ '/*-Pfafstetter.til')[0]

agrid = config['output']['shared']['agrid']
ogrid = config['output']['shared']['ogrid']
omodel = config['output']['shared']['omodel']
stretch = config['output']['shared']['stretch']
out_geomdir = get_geomdir(out_bc_base, out_bc_version, agrid=agrid, ogrid=ogrid, omodel=omodel, stretch=stretch)
out_tile_file = glob.glob(out_geomdir+ '/*-Pfafstetter.til')[0]

Expand Down Expand Up @@ -170,6 +166,180 @@ def remap(self):
print('cd ' + cwdir)
os.chdir(cwdir)


agrid = config['output']['shared']['agrid']
if agrid[0].upper() == 'C':
imout = int(agrid[1:])
else:
exit("Only support cs grid so far")

if (imout <90):
NPE = 12; nwrit = 1
elif (imout<=180):
NPE = 24; nwrit = 1
elif (imout<=540):
NPE = 96; nwrit = 1
elif (imout<=720):
NPE = 192; nwrit = 2
elif (imout<=1080):
NPE = 384; nwrit = 2
elif (imout<=1440):
NPE = 576; nwrit = 2
elif (imout< 2880):
NPE = 768; nwrit = 2
elif (imout>=2880):
NPE = 5400; nwrit = 6

PARTITION =''
QOS = config['slurm_pbs']['qos']
TIME = "1:00:00"
if NPE > 532:
assert config['slurm_pbs']['qos'] != 'debug', "qos should be 'allnccs' for NCCS or 'normal' for NAS"
TIME = "12:00:00"
NNODE = ''
job=''
if GEOS_SITE == "NAS":
CONSTRAINT = 'cas_ait'
NNODE = (NPE-1)//40 + 1
job='PBS'
else:
job='SLURM'
partition = config['slurm_pbs']['partition']
if (partition != ''):
PARTITION = "#SBATCH --partition=" + partition

CONSTRAINT = '"[cas|sky]"'
if BUILT_ON_SLES15:
CONSTRAINT = 'mil'


remap_template = job_directive[job] + \
"""
source {Bin}/g5_modules
limit stacksize unlimited
cd {out_dir}/upper_data
/bin/touch input.nml
# The MERRA fvcore_internal_restarts don't include W or DZ, but we can add them by setting
# HYDROSTATIC = 0 which means HYDROSTATIC = FALSE
if ($?I_MPI_ROOT) then
# intel scaling suggestions
#--------------------------
setenv I_MPI_ADJUST_ALLREDUCE 12
setenv I_MPI_ADJUST_GATHERV 3
setenv I_MPI_SHM_HEAP_VSIZE 512
setenv PSM2_MEMORY large
setenv I_MPI_EXTRA_FILESYSTEM 1
setenv I_MPI_EXTRA_FILESYSTEM_FORCE gpfs
setenv ROMIO_FSTYPE_FORCE "gpfs:"
endif
set infiles = ()
set outfils = ()
foreach infile ( *_restart_in )
if ( $infile == fvcore_internal_restart_in ) continue
if ( $infile == moist_internal_restart_in ) continue
set infiles = ( $infiles $infile )
set outfil = `echo $infile | sed "s/restart_in/rst_out/"`
set outfils = ($outfils $outfil)
end
set interp_restartsX = {Bin}/interp_restarts.x
if ( $#infiles ) then
set ioflag = "-input_files $infiles -output_files $outfils"
set ftype = `file -Lb --mime-type fvcore_internal_restart_in`
if ($ftype =~ *stream*) then
set interp_restartsX = {Bin}/interp_restarts_bin.x
endif
else
set ioflag = ""
endif
set drymassFLG = {drymassFLG}
if ($drymassFLG) then
set dmflag = ""
else
set dmflag = "-scalers F"
endif
{Bin}/esma_mpirun -np {NPE} $interp_restartsX -im {imout} -lm {nlevel} \\
-do_hydro {hydrostatic} $ioflag $dmflag -nwriter {nwrit} {stretch_str}
"""
account = config['slurm_pbs']['account']
drymassFLG = config['input']['air']['drymass']
hydrostatic = config['input']['air']['hydrostatic']
nlevel = config['output']['air']['nlevel']
log_name = out_dir+'/remap_lake_landice_saltwater_log'
job_name = 'remap_lake_landice_saltwater'
stretch_str = ''
remap_script = remap_template.format(Bin=bindir, account = account, \
out_dir = out_dir, log_name = log_name, job_name= job_name, drymassFLG = drymassFLG, \
imout = imout, nwrit = nwrit, NPE = NPE, NNODE = NNODE, \
QOS = QOS, TIME = TIME, CONSTRAINT = CONSTRAINT, PARTITION = PARTITION, nlevel = nlevel, hydrostatic = hydrostatic,
stretch_str = stretch_str)

script_name = './remap_lake_landice_saltwater.j'

lake_landice_saltwater = open(script_name, 'wt')
lake_landice_saltwater.write(remap_script)
lake_landice_saltwater.close()

in_bc_base = config['input']['shared']['bc_base']
if "gmao_SIteam/ModelData" in in_bc_base:
assert GEOS_SITE == "NAS", "wrong site to run the package"

interactive = None
if GEOS_SITE == 'NAS':
interactive = os.getenv('PBS_JOBID', default = None)
else:
interactive = os.getenv('SLURM_JOB_ID', default = None)

if (interactive) :
print('interactive mode\n')
if GEOS_SITE != 'NAS':
ntasks = os.getenv('SLURM_NTASKS', default = None)
if ( not ntasks):
nnodes = int(os.getenv('SLURM_NNODES', default = '1'))
ncpus = int(os.getenv('SLURM_CPUS_ON_NODE', default = '28'))
ntasks = nnodes * ncpus
ntasks = int(ntasks)
if (ntasks < NPE ):
print("\nYou should have at least {NPE} cores. Now you only have {ntasks} cores ".format(NPE=NPE, ntasks=ntasks))

subprocess.call(['chmod', '755', script_name])
print(script_name+ ' 1>' + log_name + ' 2>&1')
os.system(script_name + ' 1>' + log_name+ ' 2>&1')
elif GEOS_SITE == "NAS" :
print('qsub -W block=true '+ script_name +'\n')
subprocess.call(['qsub', '-W','block=true', script_name])
else:
print('sbatch -W '+ script_name +'\n')
subprocess.call(['sbatch', '-W', script_name])

#
# post process
#
suffix = '_rst.' + suffix

for out_rst in glob.glob("*_rst*"):
filename = expid + os.path.basename(out_rst).split('_rst')[0].split('.')[-1]+suffix
print('\n Move ' + out_rst + ' to ' + out_dir+"/"+filename)
shutil.move(out_rst, out_dir+"/"+filename)

print('\n Move remap_lake_landice_saltwater.j to ' + out_dir)
shutil.move('remap_lake_landice_saltwater.j', out_dir+"/remap_lake_landice_saltwater.j")
with open(out_dir+'/cap_restart', 'w') as f:
yyyymmddhh_ = str(config['input']['shared']['yyyymmddhh'])
time = yyyymmddhh_[0:8]+' '+yyyymmddhh_[8:10]+'0000'
print('Create cap_restart')
f.write(time)
print('cd ' + cwdir)
os.chdir(cwdir)

self.remove_merra2()

def run_and_log(self, cmd, log_name):
Expand All @@ -185,14 +355,15 @@ def run_and_log(self, cmd, log_name):
log_.write(stdout)
log_.write(stderr)


def find_rst(self):
surf_restarts =[
"route_internal_rst" ,
"lake_internal_rst" ,
"landice_internal_rst" ,
"openwater_internal_rst" ,
"saltwater_internal_rst" ,
"seaicethermo_internal_rst"]
"route_internal_rst" ,
"lake_internal_rst" ,
"landice_internal_rst" ,
"openwater_internal_rst" ,
"saltwater_internal_rst" ,
"seaicethermo_internal_rst"]

rst_dir = self.config['input']['shared']['rst_dir']
yyyymmddhh_ = str(self.config['input']['shared']['yyyymmddhh'])
Expand All @@ -201,48 +372,48 @@ def find_rst(self):
for f in surf_restarts :
files = glob.glob(rst_dir+ '/*'+f+'*'+time+'*')
if len(files) >0:
restarts_in.append(files[0])
restarts_in.append(files[0])
if (len(restarts_in) == 0) :
print("\n try restart file names without time stamp\n")
for f in surf_restarts :
fname = rst_dir+ '/'+f
if os.path.exists(fname):
restarts_in.append(fname)
restarts_in.append(fname)

return restarts_in

def copy_merra2(self):
if not self.config['input']['shared']['MERRA-2']:
return

expid = self.config['input']['shared']['expid']
yyyymmddhh_ = str(self.config['input']['shared']['yyyymmddhh'])
yyyy_ = yyyymmddhh_[0:4]
mm_ = yyyymmddhh_[4:6]
dd_ = yyyymmddhh_[6:8]
hh_ = yyyymmddhh_[8:10]

suffix = yyyymmddhh_[0:8]+'_'+ hh_ + 'z.bin'
merra_2_rst_dir = '/archive/users/gmao_ops/MERRA2/gmao_ops/GEOSadas-5_12_4/'+expid +'/rs/Y'+yyyy_ +'/M'+mm_+'/'
rst_dir = self.config['input']['shared']['rst_dir'] + '/'
os.makedirs(rst_dir, exist_ok = True)
print(' Copy MERRA-2 surface restarts \n from \n ' + merra_2_rst_dir + '\n to\n '+ rst_dir +'\n')

surfin = [ merra_2_rst_dir + expid+'.lake_internal_rst.' + suffix,
merra_2_rst_dir + expid+'.landice_internal_rst.' + suffix,
merra_2_rst_dir + expid+'.saltwater_internal_rst.'+ suffix]
bin2nc_yaml = ['bin2nc_merra2_lake.yaml', 'bin2nc_merra2_landice.yaml','bin2nc_merra2_salt.yaml']
bin_path = os.path.dirname(os.path.realpath(__file__))
for (f,yf) in zip(surfin, bin2nc_yaml):
fname = os.path.basename(f)
dest = rst_dir + '/'+fname
print("Copy file "+f +" to " + rst_dir)
shutil.copy(f, dest)
ncdest = dest.replace('z.bin', 'z.nc4')
yaml_file = bin_path + '/'+yf
print('Convert bin to nc4:' + dest + ' to \n' + ncdest + '\n')
bin2nc(dest, ncdest, yaml_file)
os.remove(dest)
if not self.config['input']['shared']['MERRA-2']:
return

expid = self.config['input']['shared']['expid']
yyyymmddhh_ = str(self.config['input']['shared']['yyyymmddhh'])
yyyy_ = yyyymmddhh_[0:4]
mm_ = yyyymmddhh_[4:6]
dd_ = yyyymmddhh_[6:8]
hh_ = yyyymmddhh_[8:10]

suffix = yyyymmddhh_[0:8]+'_'+ hh_ + 'z.bin'
merra_2_rst_dir = '/archive/users/gmao_ops/MERRA2/gmao_ops/GEOSadas-5_12_4/'+expid +'/rs/Y'+yyyy_ +'/M'+mm_+'/'
rst_dir = self.config['input']['shared']['rst_dir'] + '/'
os.makedirs(rst_dir, exist_ok = True)
print(' Copy MERRA-2 surface restarts \n from \n ' + merra_2_rst_dir + '\n to\n '+ rst_dir +'\n')

surfin = [ merra_2_rst_dir + expid+'.lake_internal_rst.' + suffix,
merra_2_rst_dir + expid+'.landice_internal_rst.' + suffix,
merra_2_rst_dir + expid+'.saltwater_internal_rst.'+ suffix]
bin2nc_yaml = ['bin2nc_merra2_lake.yaml', 'bin2nc_merra2_landice.yaml','bin2nc_merra2_salt.yaml']
bin_path = os.path.dirname(os.path.realpath(__file__))
for (f,yf) in zip(surfin, bin2nc_yaml):
fname = os.path.basename(f)
dest = rst_dir + '/'+fname
print("Copy file "+f +" to " + rst_dir)
shutil.copy(f, dest)
ncdest = dest.replace('z.bin', 'z.nc4')
yaml_file = bin_path + '/'+yf
print('Convert bin to nc4:' + dest + ' to \n' + ncdest + '\n')
bin2nc(dest, ncdest, yaml_file)
os.remove(dest)

if __name__ == '__main__' :
lls = lake_landice_saltwater(params_file='remap_params.yaml')
Expand Down

0 comments on commit 7820b59

Please sign in to comment.