towards commandline-ifying

lanl · Apr 3, 2024 · 1a90045 · 1a90045
1 parent c7becce
commit 1a90045
Showing 1 changed file with 196 additions and 150 deletions.
diff --git a/hydropop/dev/end_to_end_new.py b/hydropop/dev/end_to_end_new.py
@@ -1,3 +1,6 @@
+# python hydropop/dev/end_to_end_new.py --pop_breaks -11 -10 -4 -1 1 2 100 --hthi_breaks -0.01 0.4 0.7 1.01 --run_name coarse_coarse_small
+
+
 import os
 import sys
 import rabpro
@@ -12,160 +15,203 @@
 import gee_stats as gee
 import rivgraph_ports as wg
 
-### """ Adjustable parameters """
-## HPU creation parameters
-# fmt: off
-pop_breaks = [-11, -10, -4, 0, 100] # coarse = [-11, -10, -4, 0, 100], fine =  [-11, -10, -4, -1, 1, 2, 100]
-hthi_breaks = [-.01, .4, .7, 1.01] # coarse = [-.01, .4, .7, 1.01], fine = [-.01, 0.3, 0.55, 0.75, 0.9, 1.01]
-# fmt: on
-min_hpu_size = 20  # in pixels - each HPU will have at least this many pixels
-target_hpu_size = (
-    300  # in pixels - not guaranteed, but will try to make each HPU this size
-)
-
-## Path parameters
-path_bounding_box = r"data/roi_small.gpkg"  # r"data/roi.gpkg"
-path_results = r"results"  # folder to store results
-run_name = "toronto_new_method"  # string to prepend to exports
-gee_asset = "projects/cimmid/assets/toronto_coarse_hpus"  # the asset path to the hydropop shapefile--this might not be known beforehand but is created upon asset loading to GEE
-gdrive_folder_name = "CIMMID_{}".format(run_name)
-
-## Pseduo-fixed parameters/variables """
-# Paths to data
-path_hthi = r"data/hydrotopo_hab_index.tif"
-path_pop = r"data/pop_density_americas.tif"
-path_gee_csvs = r"results/toronto_new_hpu_method/gee"
-
-## Here we go
-paths = hut.prepare_export_paths(path_results, run_name)
-
-# Ensure results folder exists
-if os.path.isdir(path_results) is False:
-    os.mkdir(path_results)
-
-""" Generate HPUs """
-# Instantiate hpu class - can take awhile to load images and do some preprocessing
-hpugen = hpc.hpu(path_pop, path_hthi, bounding=path_bounding_box)
-
-# Compute classes
-breaks = {"hthi": hthi_breaks, "pop": pop_breaks}
-hpugen.compute_hp_classes_ranges(breaks)
-
-# Simplify classes
-hpugen.simplify_hpu_classes(min_class_size=min_hpu_size)
-
-# Compute HPUs from classes image
-hpugen.compute_hpus(target_hpu_size, min_hpu_size)
-
-# Export adjacency
-adj_df = hpugen.compute_adjacency()
-adj_df.to_csv(paths["adjacency"], index=False)
-
-# Export HPU rasters
-hpugen.export_raster("hpu_simplified", paths["hpu_raster"])
-hpugen.export_raster("hpu_class_simplified", paths["hpu_class_raster"])
-
-# Export classes as polygons for plotting
-classes = hut.polygonize_hpu(hpugen.I["hpu_class_simplified"], hpugen.gt, hpugen.wkt)
-classes.to_file(paths["hpu_class_gpkg"], driver="GPKG")
-
-# Compute areagrid required for computing HP unit areas
-agrid = hut.areagrid(paths["hpu_raster"])
-gdobj = gdal.Open(paths["hpu_raster"])
-wg.write_geotiff(
-    agrid,
-    gdobj.GetGeoTransform(),
-    gdobj.GetProjection(),
-    paths["areagrid"],
-    dtype=gdal.GDT_Float32,
-)
-
-""" Compute statistics for HPUs """
-# First, we do zonal stats on the locally-available rasters
-# HPU stats and properties
-do_stats = {
-    "hthi": [path_hthi, ["mean"]],
-    "pop": [path_pop, ["mean"]],
-    "area": [paths["areagrid"], ["sum"]],
-    "hpu_class": [paths["hpu_class_raster"], ["majority"]],
-}
-hpugen.compute_hpu_stats(do_stats)
-# Export the geopackage that contains all the HPU attributes
-hpugen.hpus.to_file(paths["hpu_gpkg"], driver="GPKG")
-# For the shapefile export, we only need the HPU id and the polygon
-hpus_shp = gpd.GeoDataFrame(hpugen.hpus[["hpu_id", "geometry"]])
-hpus_shp.crs = hpugen.hpus.crs
-hpus_shp.to_file(paths["hpu_shapefile"])  # shapefile needed to upload to GEE
-
-""" STOP. Here you need to upload the hpu shapefile as a GEE asset. """
-is_uploaded_to_gee = input(
-    "Next step, upload the following shapefile (and its components) to GEE (Y/n)"
-)
-while is_uploaded_to_gee is "n":
+
+def end_to_end_new(
+    pop_breaks=[-11, -10, -4, 0, 100],
+    hthi_breaks=[-0.01, 0.4, 0.7, 1.01],
+    min_hpu_size=20,
+    target_hpu_size=300,
+    path_bounding_box="data/roi.gpkg",
+    run_name="toronto_new_method",
+):
+    ### """ Adjustable parameters """
+    ## HPU creation parameters
+    # fmt: off
+    # pop_breaks = [-11, -10, -4, 0, 100] # coarse = [-11, -10, -4, 0, 100], fine =  [-11, -10, -4, -1, 1, 2, 100]
+    # hthi_breaks = [-.01, .4, .7, 1.01] # coarse = [-.01, .4, .7, 1.01], fine = [-.01, 0.3, 0.55, 0.75, 0.9, 1.01]
+    # fmt: on
+    # min_hpu_size = 20  # in pixels - each HPU will have at least this many pixels
+    # target_hpu_size = (
+    #     300  # in pixels - not guaranteed, but will try to make each HPU this size
+    # )
+
+    ## Path parameters
+    # path_bounding_box = r"data/roi_small.gpkg"  # r"data/roi.gpkg"
+    path_results = r"results"  # folder to store results
+    # run_name = "toronto_new_method"  # string to prepend to exports
+    gee_asset = "projects/cimmid/assets/toronto_coarse_hpus"  # the asset path to the hydropop shapefile--this might not be known beforehand but is created upon asset loading to GEE
+    gdrive_folder_name = "CIMMID_{}".format(run_name)
+
+    ## Pseduo-fixed parameters/variables """
+    # Paths to data
+    path_hthi = r"data/hydrotopo_hab_index.tif"
+    path_pop = r"data/pop_density_americas.tif"
+    path_gee_csvs = r"results/toronto_new_hpu_method/gee"
+
+    ## Here we go
+    paths = hut.prepare_export_paths(path_results, run_name)
+
+    # Ensure results folder exists
+    if os.path.isdir(path_results) is False:
+        os.mkdir(path_results)
+
+    """ Generate HPUs """
+    # Instantiate hpu class - can take awhile to load images and do some preprocessing
+    hpugen = hpc.hpu(path_pop, path_hthi, bounding=path_bounding_box)
+
+    # Compute classes
+    breaks = {"hthi": hthi_breaks, "pop": pop_breaks}
+    hpugen.compute_hp_classes_ranges(breaks)
+
+    # Simplify classes
+    hpugen.simplify_hpu_classes(min_class_size=min_hpu_size)
+
+    # Compute HPUs from classes image
+    hpugen.compute_hpus(target_hpu_size, min_hpu_size)
+
+    # Export adjacency
+    adj_df = hpugen.compute_adjacency()
+    adj_df.to_csv(paths["adjacency"], index=False)
+
+    # Export HPU rasters
+    hpugen.export_raster("hpu_simplified", paths["hpu_raster"])
+    hpugen.export_raster("hpu_class_simplified", paths["hpu_class_raster"])
+
+    # Export classes as polygons for plotting
+    classes = hut.polygonize_hpu(
+        hpugen.I["hpu_class_simplified"], hpugen.gt, hpugen.wkt
+    )
+    classes.to_file(paths["hpu_class_gpkg"], driver="GPKG")
+
+    # Compute areagrid required for computing HP unit areas
+    agrid = hut.areagrid(paths["hpu_raster"])
+    gdobj = gdal.Open(paths["hpu_raster"])
+    wg.write_geotiff(
+        agrid,
+        gdobj.GetGeoTransform(),
+        gdobj.GetProjection(),
+        paths["areagrid"],
+        dtype=gdal.GDT_Float32,
+    )
+
+    """ Compute statistics for HPUs """
+    # First, we do zonal stats on the locally-available rasters
+    # HPU stats and properties
+    do_stats = {
+        "hthi": [path_hthi, ["mean"]],
+        "pop": [path_pop, ["mean"]],
+        "area": [paths["areagrid"], ["sum"]],
+        "hpu_class": [paths["hpu_class_raster"], ["majority"]],
+    }
+    hpugen.compute_hpu_stats(do_stats)
+    # Export the geopackage that contains all the HPU attributes
+    hpugen.hpus.to_file(paths["hpu_gpkg"], driver="GPKG")
+    # For the shapefile export, we only need the HPU id and the polygon
+    hpus_shp = gpd.GeoDataFrame(hpugen.hpus[["hpu_id", "geometry"]])
+    hpus_shp.crs = hpugen.hpus.crs
+    hpus_shp.to_file(paths["hpu_shapefile"])  # shapefile needed to upload to GEE
+
+    breakpoint()
+
+    """ STOP. Here you need to upload the hpu shapefile as a GEE asset. """
     is_uploaded_to_gee = input(
         "Next step, upload the following shapefile (and its components) to GEE (Y/n)"
     )
+    while is_uploaded_to_gee == "n":
+        is_uploaded_to_gee = input(
+            "Next step, upload the following shapefile (and its components) to GEE (Y/n)"
+        )
+
+    """ Update the gee_asset variable. """
+    datasets, Datasets = gee.generate_datasets()
+
+    # check and do fmax
+    if "fmax" in datasets.keys():
+        filename_out = "fmax"
+        gee.export_fmax(gee_asset, filename_out, gdrive_folder_name)
+
+    # Spin up other datasets
+    urls, tasks = rabpro.basin_stats.compute(
+        Datasets, gee_feature_path=gee_asset, folder=gdrive_folder_name
+    )
 
-""" Update the gee_asset variable. """
-datasets, Datasets = gee.generate_datasets()
-
-# check and do fmax
-if "fmax" in datasets.keys():
-    filename_out = "fmax"
-    gee.export_fmax(gee_asset, filename_out, gdrive_folder_name)
-
-# Spin up other datasets
-urls, tasks = rabpro.basin_stats.compute(
-    Datasets, gee_feature_path=gee_asset, folder=gdrive_folder_name
-)
-
-""" STOP. Download the GEE exports (csvs) to path_gee_csvs """
-hpus = gpd.read_file(paths["hpu_gpkg"])
-gee_csvs = os.listdir(path_gee_csvs)
-for key in datasets.keys():
-
-    # Find the csv associated with a dataset
-    if key == "fmax":
-        look_for = "fmax"
-    else:
-        look_for = datasets[key]["path"]
-        if datasets[key]["band"] != "None":
-            look_for = look_for + "__" + datasets[key]["band"]
-        look_for = look_for.replace("/", "-")
-    this_csv = [c for c in gee_csvs if look_for in c][0]
-
-    # Ingest it
-    csv = pd.read_csv(os.path.join(path_gee_csvs, this_csv))
-
-    # Handle special cases first
-    if key == "fmax":
-        csv = csv[["fmax", "hpu_id"]]
-    elif key == "land_use":
-        csv = csv[["histogram", "hpu_id"]]
-        csv = gee.format_lc_type1(csv, fractionalize=True, prepend="lc_")
-    else:
-        keepcols = ["hpu_id"]
-        renamer = {}
-        if "mean" in datasets[key]["stats"]:
-            keepcols.append("mean")
-            renamer.update({"mean": key + "_mean"})
-        if "std" in datasets[key]["stats"] or "stdDev" in datasets[key]["stats"]:
-            keepcols.append("stdDev")
-            renamer.update({"stdDev": key + "_std"})
-        csv = csv[keepcols]
-        csv = csv.rename({"mean": key + "_mean"}, axis=1)
-
-    hpus = pd.merge(hpus, csv, left_on="hpu_id", right_on="hpu_id")
-
-hpus.to_file(paths["hpu_gpkg"], driver="GPKG")
-
-# Export watershed/gage information - keep out of class since this is somewhat
-# external...for now
-path_watersheds = r"X:\Research\CIMMID\Data\Watersheds\Toronto\initial_basins.gpkg"
-hpus = gpd.read_file(paths["hpu_gpkg"])
-watersheds = gpd.read_file(path_watersheds)
-df = hut.overlay_watersheds(hpus, watersheds)
-df.to_csv(paths["gages"], index=False)
+    """ STOP. Download the GEE exports (csvs) to path_gee_csvs """
+    hpus = gpd.read_file(paths["hpu_gpkg"])
+    gee_csvs = os.listdir(path_gee_csvs)
+    for key in datasets.keys():
+
+        # Find the csv associated with a dataset
+        if key == "fmax":
+            look_for = "fmax"
+        else:
+            look_for = datasets[key]["path"]
+            if datasets[key]["band"] != "None":
+                look_for = look_for + "__" + datasets[key]["band"]
+            look_for = look_for.replace("/", "-")
+        this_csv = [c for c in gee_csvs if look_for in c][0]
+
+        # Ingest it
+        csv = pd.read_csv(os.path.join(path_gee_csvs, this_csv))
+
+        # Handle special cases first
+        if key == "fmax":
+            csv = csv[["fmax", "hpu_id"]]
+        elif key == "land_use":
+            csv = csv[["histogram", "hpu_id"]]
+            csv = gee.format_lc_type1(csv, fractionalize=True, prepend="lc_")
+        else:
+            keepcols = ["hpu_id"]
+            renamer = {}
+            if "mean" in datasets[key]["stats"]:
+                keepcols.append("mean")
+                renamer.update({"mean": key + "_mean"})
+            if "std" in datasets[key]["stats"] or "stdDev" in datasets[key]["stats"]:
+                keepcols.append("stdDev")
+                renamer.update({"stdDev": key + "_std"})
+            csv = csv[keepcols]
+            csv = csv.rename({"mean": key + "_mean"}, axis=1)
+
+        hpus = pd.merge(hpus, csv, left_on="hpu_id", right_on="hpu_id")
+
+    hpus.to_file(paths["hpu_gpkg"], driver="GPKG")
+
+    # Export watershed/gage information - keep out of class since this is somewhat
+    # external...for now
+    path_watersheds = r"X:\Research\CIMMID\Data\Watersheds\Toronto\initial_basins.gpkg"
+    hpus = gpd.read_file(paths["hpu_gpkg"])
+    watersheds = gpd.read_file(path_watersheds)
+    df = hut.overlay_watersheds(hpus, watersheds)
+    df.to_csv(paths["gages"], index=False)
+
+
+if __name__ == "__main__":
+
+    parser = argparse.ArgumentParser()
+
+    parser.add_argument("--pop_breaks", nargs="*")
+    parser.add_argument("--hthi_breaks", nargs="*")
+    parser.add_argument("--min_hpu_size", nargs=1, default=20, type=float)
+    parser.add_argument("--target_hpu_size", nargs=1, default=300, type=float)
+    parser.add_argument("--path_bounding_box", nargs=1, type=str)
+    parser.add_argument("--run_name", nargs=1, type=str)
+
+    args = vars(parser.parse_args())
+
+    pop_breaks = [int(x) for x in args["pop_breaks"]]
+    hthi_breaks = [float(x) for x in args["hthi_breaks"]]
+    min_hpu_size = args["min_hpu_size"]
+    target_hpu_size = args["target_hpu_size"]
+    path_bounding_box = args["path_bounding_box"]
+    run_name = args["run_name"]
+
+    end_to_end_new(
+        pop_breaks=pop_breaks,
+        hthi_breaks=hthi_breaks,
+        min_hpu_size=min_hpu_size,
+        target_hpu_size=target_hpu_size,
+        path_bounding_box=path_bounding_box,
+        run_name=run_name,
+    )
 
 # from matplotlib import pyplot as plt
 # """