Skip to content

Commit

Permalink
run validator
Browse files Browse the repository at this point in the history
  • Loading branch information
JordanLaserGit committed Jan 8, 2024
1 parent 4e2210b commit 906e5a7
Showing 1 changed file with 138 additions and 0 deletions.
138 changes: 138 additions & 0 deletions python/run_validator.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,138 @@
import os, argparse
from ngen.config.realization import NgenRealization
from ngen.config.hydrofabric import CatchmentGeoJSON , NexusGeoJSON
from ngen.config.validate import validate_paths
import re
import geopandas

def validate(catchments,realization_file=None):

relative_dir = os.path.dirname(os.path.dirname(realization_file))

print(f'Done\nValidating {realization_file}')
serialized_realization = NgenRealization.parse_file(realization_file)
serialized_realization.resolve_paths(relative_to=relative_dir)
val = validate_paths(serialized_realization)
if len(val) > 0:
raise Exception(f'{val[0].value} does not exist!')

print(f'Done\nValidating individual catchment forcing paths')
foring_dir = os.path.join(relative_dir,serialized_realization.global_config.forcing.path)
forcing_files = sorted([x for _,_,x in os.walk(foring_dir)][0])
ncatchments = len(catchments)
catchments = sorted(catchments)
write_int = 1000
for j, jcatch in enumerate(catchments):
if j + 1 % write_int == 0:
print(f'{j/ncatchments}%')
jid = re.findall(r'\d+', jcatch)[0]
pattern = serialized_realization.global_config.forcing.file_pattern
jcatch_pattern = pattern.replace('{{id}}',jid)
compiled = re.compile(jcatch_pattern)

jfile = forcing_files[j]
assert bool(compiled.match(jfile)), f"{jcatch} -> Forcing file {jfile} does not match pattern specified {pattern}"

print(f'\nNGen run folder is valid\n')

def validate_data_dir(data_dir):

forcing_files = []
catchment_file = None
nexus_file = None
realization_file = None
geopackage_file = None
for path, _, files in os.walk(data_dir):
for jfile in files:
jfile_path = os.path.join(path,jfile)
if jfile_path.find('config') >= 0:
if jfile_path.find('catchments') >= 0:
if catchment_file is None:
catchment_file = jfile_path
else:
raise Exception('This run directory contains more than a single catchment file, remove all but one.')
if jfile_path.find('nexus') >= 0:
if nexus_file is None:
nexus_file = jfile_path
else:
raise Exception('This run directory contains more than a single nexus file, remove all but one.')
if jfile_path.find('realization') >= 0:
if realization_file is None:
realization_file = jfile_path
else:
raise Exception('This run directory contains more than a single realization file, remove all but one.')
if jfile_path.find('.gpkg') >= 0:
if geopackage_file is None:
geopackage_file = jfile_path
else:
raise Exception('This run directory contains more than a single geopackage file, remove all but one.')
if jfile_path.find('forcing') >= 0 and jfile_path.find('forcing_metadata') < 0:
forcing_files.append(jfile_path)

if not geopackage_file:
file_list = [catchment_file,nexus_file,realization_file]
else:
file_list = [geopackage_file,realization_file]
if catchment_file or nexus_file: raise Exception('The spatial domain must only be defined with either a geopackage, or catchment/nexus files. Not both.')
if any([x is None for x in file_list]):
raise Exception(f'Missing configuration file!')

print(f'Configurations found! Retrieving catchment data...')

if geopackage_file:
catchments = geopandas.read_file(geopackage_file, layer='divides')
catchment_list = list(catchments['divide_id'])
# Nexus validation?
else:
print(f'Validating {catchment_file}')
serialized_catchments = CatchmentGeoJSON.parse_file(catchment_file)
catchment_list = []
for jfeat in serialized_catchments.features:
id = jfeat.id
if id is None: id = jfeat.properties.id # discrepancy between geopandas and pydantic
catchment_list.append(id)

print(f'Done\nValidating {nexus_file}')
NexusGeoJSON.parse_file(nexus_file)

validate(catchment_list,realization_file)

if __name__ == "__main__":

parser = argparse.ArgumentParser()
parser.add_argument(
"--data_dir",
dest="data_dir",
type=str,
help="Path to the ngen input data folder",
required=False
)
parser.add_argument(
"--tarball",
dest="tarball",
type=str,
help="Path to tarball to be validated as ngen input data folder",
required=False
)
args = parser.parse_args()

if args.data_dir:
data_dir = args.data_dir
ii_delete_folder = False
elif args.tarball:
data_dir = '/tmp/ngen_data_dir'
if os.path.exists(data_dir):
os.system(f'rm -rf {data_dir}')
os.mkdir(data_dir)
os.system(f'tar -xzf {args.tarball} -C {data_dir}')
ii_delete_folder = True
elif args.data_dir and args.tarball:
raise Exception('Must specify either data folder path or tarball path, not both.')
else:
raise Exception('No options set!')

assert os.path.exists(data_dir), f"{data_dir} is an invalid directory"

validate_data_dir(data_dir)

if ii_delete_folder: os.system('rm -rf /tmp/ngen_data_dir')

0 comments on commit 906e5a7

Please sign in to comment.