From 302800115a914b4ef275e7fe90c11b7e89f712cc Mon Sep 17 00:00:00 2001 From: Josh Cunningham Date: Wed, 4 Sep 2024 14:41:59 -0500 Subject: [PATCH] Gage crosswalk (#41) * improve gage to cat crosswalk * set geopandas minimum version to 1.0.0 * fix gage cli --- modules/data_processing/gpkg_utils.py | 40 +++++++++++++++++++-------- modules/ngiab_data_cli/__main__.py | 2 +- pyproject.toml | 2 +- 3 files changed, 31 insertions(+), 13 deletions(-) diff --git a/modules/data_processing/gpkg_utils.py b/modules/data_processing/gpkg_utils.py index ce0803a..56c2632 100644 --- a/modules/data_processing/gpkg_utils.py +++ b/modules/data_processing/gpkg_utils.py @@ -312,16 +312,34 @@ def get_cat_from_gage_id(gage_id: str, gpkg: Path = file_paths.conus_hydrofabric """ gage_id = "".join([x for x in gage_id if x.isdigit()]) - logger.info(f"Getting catid for {gage_id}, in {gpkg}") - with sqlite3.connect(gpkg) as con: - sql_query = f"SELECT id FROM hydrolocations WHERE hl_uri = 'Gages-{gage_id}'" - result = con.execute(sql_query).fetchone() - if result is None: - raise IndexError(f"No nexus found for gage ID {gage_id}") - nex_id = con.execute(sql_query).fetchone()[0] - sql_query = f"SELECT divide_id FROM network WHERE toid = '{nex_id}'" - cat_id = con.execute(sql_query).fetchall() - cat_ids = [str(x[0]) for x in cat_id] - return cat_ids + if len(gage_id) < 8: + logger.warning(f"Gages in the hydrofabric are at least 8 digits {gage_id}") + old_gage_id = gage_id + gage_id = f"{int(gage_id):08d}" + logger.warning(f"Converted {old_gage_id} to {gage_id}") + logger.info(f"Getting catid for {gage_id}, in {gpkg}") + + # the hydrolocations table seems to have a bunch of errors in it + # use flowpath_attributes instead + # both have errors, cross reference them + with sqlite3.connect(gpkg) as con: + sql_query = f"""SELECT f.id + FROM flowpaths AS f + JOIN hydrolocations AS h ON f.toid = h.id + JOIN flowpath_attributes AS fa ON f.id = fa.id + WHERE h.hl_uri = 'Gages-{gage_id}' + AND fa.rl_gages LIKE '%{gage_id}%'""" + result = con.execute(sql_query).fetchall() + if len(result) == 0: + logger.critical(f"Gage ID {gage_id} is not associated with any waterbodies") + raise IndexError(f"Could not find a waterbody for {gage_id}") + if len(result) > 1: + logger.critical(f"Gage ID {gage_id} is associated with multiple waterbodies") + raise IndexError(f"Could not find a unique waterbody for {gage_id}") + + wb_id = result[0][0] + cat_id = wb_id.replace("wb", "cat") + + return cat_id diff --git a/modules/ngiab_data_cli/__main__.py b/modules/ngiab_data_cli/__main__.py index c86aa17..52f2c9c 100644 --- a/modules/ngiab_data_cli/__main__.py +++ b/modules/ngiab_data_cli/__main__.py @@ -303,7 +303,7 @@ def get_cat_ids_from_gage_ids(input_file: Path) -> List[str]: cat_ids = [] for gage_id in gage_ids: cat_id = get_cat_from_gage_id(gage_id) - cat_ids.extend(cat_id) + cat_ids.append(cat_id) logging.info(f"Converted {len(gage_ids)} gage IDs to {len(cat_ids)} catchment IDs") return cat_ids diff --git a/pyproject.toml b/pyproject.toml index 70ba0a5..06cc12a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -30,7 +30,7 @@ dependencies = [ "pyproj==3.6.1", "Flask==3.0.2", "Flask-Cors==4.0.1", - "geopandas==0.14.3", + "geopandas>=1.0.0", "requests==2.32.2", "igraph==0.11.4", "s3fs==2024.3.1",