Skip to content

Commit

Permalink
towards commandline-ifying
Browse files Browse the repository at this point in the history
  • Loading branch information
jsta committed Apr 3, 2024
1 parent c7becce commit 1a90045
Showing 1 changed file with 196 additions and 150 deletions.
346 changes: 196 additions & 150 deletions hydropop/dev/end_to_end_new.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
# python hydropop/dev/end_to_end_new.py --pop_breaks -11 -10 -4 -1 1 2 100 --hthi_breaks -0.01 0.4 0.7 1.01 --run_name coarse_coarse_small


import os
import sys
import rabpro
Expand All @@ -12,160 +15,203 @@
import gee_stats as gee
import rivgraph_ports as wg

### """ Adjustable parameters """
## HPU creation parameters
# fmt: off
pop_breaks = [-11, -10, -4, 0, 100] # coarse = [-11, -10, -4, 0, 100], fine = [-11, -10, -4, -1, 1, 2, 100]
hthi_breaks = [-.01, .4, .7, 1.01] # coarse = [-.01, .4, .7, 1.01], fine = [-.01, 0.3, 0.55, 0.75, 0.9, 1.01]
# fmt: on
min_hpu_size = 20 # in pixels - each HPU will have at least this many pixels
target_hpu_size = (
300 # in pixels - not guaranteed, but will try to make each HPU this size
)

## Path parameters
path_bounding_box = r"data/roi_small.gpkg" # r"data/roi.gpkg"
path_results = r"results" # folder to store results
run_name = "toronto_new_method" # string to prepend to exports
gee_asset = "projects/cimmid/assets/toronto_coarse_hpus" # the asset path to the hydropop shapefile--this might not be known beforehand but is created upon asset loading to GEE
gdrive_folder_name = "CIMMID_{}".format(run_name)

## Pseduo-fixed parameters/variables """
# Paths to data
path_hthi = r"data/hydrotopo_hab_index.tif"
path_pop = r"data/pop_density_americas.tif"
path_gee_csvs = r"results/toronto_new_hpu_method/gee"

## Here we go
paths = hut.prepare_export_paths(path_results, run_name)

# Ensure results folder exists
if os.path.isdir(path_results) is False:
os.mkdir(path_results)

""" Generate HPUs """
# Instantiate hpu class - can take awhile to load images and do some preprocessing
hpugen = hpc.hpu(path_pop, path_hthi, bounding=path_bounding_box)

# Compute classes
breaks = {"hthi": hthi_breaks, "pop": pop_breaks}
hpugen.compute_hp_classes_ranges(breaks)

# Simplify classes
hpugen.simplify_hpu_classes(min_class_size=min_hpu_size)

# Compute HPUs from classes image
hpugen.compute_hpus(target_hpu_size, min_hpu_size)

# Export adjacency
adj_df = hpugen.compute_adjacency()
adj_df.to_csv(paths["adjacency"], index=False)

# Export HPU rasters
hpugen.export_raster("hpu_simplified", paths["hpu_raster"])
hpugen.export_raster("hpu_class_simplified", paths["hpu_class_raster"])

# Export classes as polygons for plotting
classes = hut.polygonize_hpu(hpugen.I["hpu_class_simplified"], hpugen.gt, hpugen.wkt)
classes.to_file(paths["hpu_class_gpkg"], driver="GPKG")

# Compute areagrid required for computing HP unit areas
agrid = hut.areagrid(paths["hpu_raster"])
gdobj = gdal.Open(paths["hpu_raster"])
wg.write_geotiff(
agrid,
gdobj.GetGeoTransform(),
gdobj.GetProjection(),
paths["areagrid"],
dtype=gdal.GDT_Float32,
)

""" Compute statistics for HPUs """
# First, we do zonal stats on the locally-available rasters
# HPU stats and properties
do_stats = {
"hthi": [path_hthi, ["mean"]],
"pop": [path_pop, ["mean"]],
"area": [paths["areagrid"], ["sum"]],
"hpu_class": [paths["hpu_class_raster"], ["majority"]],
}
hpugen.compute_hpu_stats(do_stats)
# Export the geopackage that contains all the HPU attributes
hpugen.hpus.to_file(paths["hpu_gpkg"], driver="GPKG")
# For the shapefile export, we only need the HPU id and the polygon
hpus_shp = gpd.GeoDataFrame(hpugen.hpus[["hpu_id", "geometry"]])
hpus_shp.crs = hpugen.hpus.crs
hpus_shp.to_file(paths["hpu_shapefile"]) # shapefile needed to upload to GEE

""" STOP. Here you need to upload the hpu shapefile as a GEE asset. """
is_uploaded_to_gee = input(
"Next step, upload the following shapefile (and its components) to GEE (Y/n)"
)
while is_uploaded_to_gee is "n":

def end_to_end_new(
pop_breaks=[-11, -10, -4, 0, 100],
hthi_breaks=[-0.01, 0.4, 0.7, 1.01],
min_hpu_size=20,
target_hpu_size=300,
path_bounding_box="data/roi.gpkg",
run_name="toronto_new_method",
):
### """ Adjustable parameters """
## HPU creation parameters
# fmt: off
# pop_breaks = [-11, -10, -4, 0, 100] # coarse = [-11, -10, -4, 0, 100], fine = [-11, -10, -4, -1, 1, 2, 100]
# hthi_breaks = [-.01, .4, .7, 1.01] # coarse = [-.01, .4, .7, 1.01], fine = [-.01, 0.3, 0.55, 0.75, 0.9, 1.01]
# fmt: on
# min_hpu_size = 20 # in pixels - each HPU will have at least this many pixels
# target_hpu_size = (
# 300 # in pixels - not guaranteed, but will try to make each HPU this size
# )

## Path parameters
# path_bounding_box = r"data/roi_small.gpkg" # r"data/roi.gpkg"
path_results = r"results" # folder to store results
# run_name = "toronto_new_method" # string to prepend to exports
gee_asset = "projects/cimmid/assets/toronto_coarse_hpus" # the asset path to the hydropop shapefile--this might not be known beforehand but is created upon asset loading to GEE
gdrive_folder_name = "CIMMID_{}".format(run_name)

## Pseduo-fixed parameters/variables """
# Paths to data
path_hthi = r"data/hydrotopo_hab_index.tif"
path_pop = r"data/pop_density_americas.tif"
path_gee_csvs = r"results/toronto_new_hpu_method/gee"

## Here we go
paths = hut.prepare_export_paths(path_results, run_name)

# Ensure results folder exists
if os.path.isdir(path_results) is False:
os.mkdir(path_results)

""" Generate HPUs """
# Instantiate hpu class - can take awhile to load images and do some preprocessing
hpugen = hpc.hpu(path_pop, path_hthi, bounding=path_bounding_box)

# Compute classes
breaks = {"hthi": hthi_breaks, "pop": pop_breaks}
hpugen.compute_hp_classes_ranges(breaks)

# Simplify classes
hpugen.simplify_hpu_classes(min_class_size=min_hpu_size)

# Compute HPUs from classes image
hpugen.compute_hpus(target_hpu_size, min_hpu_size)

# Export adjacency
adj_df = hpugen.compute_adjacency()
adj_df.to_csv(paths["adjacency"], index=False)

# Export HPU rasters
hpugen.export_raster("hpu_simplified", paths["hpu_raster"])
hpugen.export_raster("hpu_class_simplified", paths["hpu_class_raster"])

# Export classes as polygons for plotting
classes = hut.polygonize_hpu(
hpugen.I["hpu_class_simplified"], hpugen.gt, hpugen.wkt
)
classes.to_file(paths["hpu_class_gpkg"], driver="GPKG")

# Compute areagrid required for computing HP unit areas
agrid = hut.areagrid(paths["hpu_raster"])
gdobj = gdal.Open(paths["hpu_raster"])
wg.write_geotiff(
agrid,
gdobj.GetGeoTransform(),
gdobj.GetProjection(),
paths["areagrid"],
dtype=gdal.GDT_Float32,
)

""" Compute statistics for HPUs """
# First, we do zonal stats on the locally-available rasters
# HPU stats and properties
do_stats = {
"hthi": [path_hthi, ["mean"]],
"pop": [path_pop, ["mean"]],
"area": [paths["areagrid"], ["sum"]],
"hpu_class": [paths["hpu_class_raster"], ["majority"]],
}
hpugen.compute_hpu_stats(do_stats)
# Export the geopackage that contains all the HPU attributes
hpugen.hpus.to_file(paths["hpu_gpkg"], driver="GPKG")
# For the shapefile export, we only need the HPU id and the polygon
hpus_shp = gpd.GeoDataFrame(hpugen.hpus[["hpu_id", "geometry"]])
hpus_shp.crs = hpugen.hpus.crs
hpus_shp.to_file(paths["hpu_shapefile"]) # shapefile needed to upload to GEE

breakpoint()

""" STOP. Here you need to upload the hpu shapefile as a GEE asset. """
is_uploaded_to_gee = input(
"Next step, upload the following shapefile (and its components) to GEE (Y/n)"
)
while is_uploaded_to_gee == "n":
is_uploaded_to_gee = input(
"Next step, upload the following shapefile (and its components) to GEE (Y/n)"
)

""" Update the gee_asset variable. """
datasets, Datasets = gee.generate_datasets()

# check and do fmax
if "fmax" in datasets.keys():
filename_out = "fmax"
gee.export_fmax(gee_asset, filename_out, gdrive_folder_name)

# Spin up other datasets
urls, tasks = rabpro.basin_stats.compute(
Datasets, gee_feature_path=gee_asset, folder=gdrive_folder_name
)

""" Update the gee_asset variable. """
datasets, Datasets = gee.generate_datasets()

# check and do fmax
if "fmax" in datasets.keys():
filename_out = "fmax"
gee.export_fmax(gee_asset, filename_out, gdrive_folder_name)

# Spin up other datasets
urls, tasks = rabpro.basin_stats.compute(
Datasets, gee_feature_path=gee_asset, folder=gdrive_folder_name
)

""" STOP. Download the GEE exports (csvs) to path_gee_csvs """
hpus = gpd.read_file(paths["hpu_gpkg"])
gee_csvs = os.listdir(path_gee_csvs)
for key in datasets.keys():

# Find the csv associated with a dataset
if key == "fmax":
look_for = "fmax"
else:
look_for = datasets[key]["path"]
if datasets[key]["band"] != "None":
look_for = look_for + "__" + datasets[key]["band"]
look_for = look_for.replace("/", "-")
this_csv = [c for c in gee_csvs if look_for in c][0]

# Ingest it
csv = pd.read_csv(os.path.join(path_gee_csvs, this_csv))

# Handle special cases first
if key == "fmax":
csv = csv[["fmax", "hpu_id"]]
elif key == "land_use":
csv = csv[["histogram", "hpu_id"]]
csv = gee.format_lc_type1(csv, fractionalize=True, prepend="lc_")
else:
keepcols = ["hpu_id"]
renamer = {}
if "mean" in datasets[key]["stats"]:
keepcols.append("mean")
renamer.update({"mean": key + "_mean"})
if "std" in datasets[key]["stats"] or "stdDev" in datasets[key]["stats"]:
keepcols.append("stdDev")
renamer.update({"stdDev": key + "_std"})
csv = csv[keepcols]
csv = csv.rename({"mean": key + "_mean"}, axis=1)

hpus = pd.merge(hpus, csv, left_on="hpu_id", right_on="hpu_id")

hpus.to_file(paths["hpu_gpkg"], driver="GPKG")

# Export watershed/gage information - keep out of class since this is somewhat
# external...for now
path_watersheds = r"X:\Research\CIMMID\Data\Watersheds\Toronto\initial_basins.gpkg"
hpus = gpd.read_file(paths["hpu_gpkg"])
watersheds = gpd.read_file(path_watersheds)
df = hut.overlay_watersheds(hpus, watersheds)
df.to_csv(paths["gages"], index=False)
""" STOP. Download the GEE exports (csvs) to path_gee_csvs """
hpus = gpd.read_file(paths["hpu_gpkg"])
gee_csvs = os.listdir(path_gee_csvs)
for key in datasets.keys():

# Find the csv associated with a dataset
if key == "fmax":
look_for = "fmax"
else:
look_for = datasets[key]["path"]
if datasets[key]["band"] != "None":
look_for = look_for + "__" + datasets[key]["band"]
look_for = look_for.replace("/", "-")
this_csv = [c for c in gee_csvs if look_for in c][0]

# Ingest it
csv = pd.read_csv(os.path.join(path_gee_csvs, this_csv))

# Handle special cases first
if key == "fmax":
csv = csv[["fmax", "hpu_id"]]
elif key == "land_use":
csv = csv[["histogram", "hpu_id"]]
csv = gee.format_lc_type1(csv, fractionalize=True, prepend="lc_")
else:
keepcols = ["hpu_id"]
renamer = {}
if "mean" in datasets[key]["stats"]:
keepcols.append("mean")
renamer.update({"mean": key + "_mean"})
if "std" in datasets[key]["stats"] or "stdDev" in datasets[key]["stats"]:
keepcols.append("stdDev")
renamer.update({"stdDev": key + "_std"})
csv = csv[keepcols]
csv = csv.rename({"mean": key + "_mean"}, axis=1)

hpus = pd.merge(hpus, csv, left_on="hpu_id", right_on="hpu_id")

hpus.to_file(paths["hpu_gpkg"], driver="GPKG")

# Export watershed/gage information - keep out of class since this is somewhat
# external...for now
path_watersheds = r"X:\Research\CIMMID\Data\Watersheds\Toronto\initial_basins.gpkg"
hpus = gpd.read_file(paths["hpu_gpkg"])
watersheds = gpd.read_file(path_watersheds)
df = hut.overlay_watersheds(hpus, watersheds)
df.to_csv(paths["gages"], index=False)


if __name__ == "__main__":

parser = argparse.ArgumentParser()

parser.add_argument("--pop_breaks", nargs="*")
parser.add_argument("--hthi_breaks", nargs="*")
parser.add_argument("--min_hpu_size", nargs=1, default=20, type=float)
parser.add_argument("--target_hpu_size", nargs=1, default=300, type=float)
parser.add_argument("--path_bounding_box", nargs=1, type=str)
parser.add_argument("--run_name", nargs=1, type=str)

args = vars(parser.parse_args())

pop_breaks = [int(x) for x in args["pop_breaks"]]
hthi_breaks = [float(x) for x in args["hthi_breaks"]]
min_hpu_size = args["min_hpu_size"]
target_hpu_size = args["target_hpu_size"]
path_bounding_box = args["path_bounding_box"]
run_name = args["run_name"]

end_to_end_new(
pop_breaks=pop_breaks,
hthi_breaks=hthi_breaks,
min_hpu_size=min_hpu_size,
target_hpu_size=target_hpu_size,
path_bounding_box=path_bounding_box,
run_name=run_name,
)

# from matplotlib import pyplot as plt
# """
Expand Down

0 comments on commit 1a90045

Please sign in to comment.