Skip to content

Commit

Permalink
Expand pre-commit format logic
Browse files Browse the repository at this point in the history
  • Loading branch information
alukach committed Sep 4, 2024
1 parent 4b51bb4 commit af8dbfc
Show file tree
Hide file tree
Showing 22 changed files with 1,124 additions and 704 deletions.
12 changes: 10 additions & 2 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
@@ -1,13 +1,21 @@
repos:
- repo: https://github.com/PyCQA/isort
rev: 5.13.2
hooks:
- id: isort
language_version: python
args: ["-m", "3", "--trailing-comma", "-l", "88"]

- repo: https://github.com/charliermarsh/ruff-pre-commit
rev: v0.5.5
rev: v0.5.5
hooks:
- id: ruff
args: [--fix]
files: ^space2stats_api/
- id: ruff-format

- repo: https://github.com/pre-commit/mirrors-mypy
rev: v1.11.1
rev: v1.11.1
hooks:
- id: mypy
args: [--ignore-missing-imports]
Expand Down
14 changes: 7 additions & 7 deletions data/config/fill_location.py
Original file line number Diff line number Diff line change
@@ -1,21 +1,21 @@
import json

# Load data from iso3.json
with open('iso3.json', 'r') as f:
with open("iso3.json", "r") as f:
iso3_data = json.load(f)

# Load the template
with open('location_template.json', 'r') as f:
with open("location_template.json", "r") as f:
location_template = json.load(f)

# Create a filled configuration
location_filled = {"locations": []}
for iso3, country_name in iso3_data.items():
location = location_template['locations'][0].copy()
location['ISO3'] = iso3
location['country_name'] = country_name
location_filled['locations'].append(location)
location = location_template["locations"][0].copy()
location["ISO3"] = iso3
location["country_name"] = country_name
location_filled["locations"].append(location)

# Save the filled configuration
with open('location_filled.json', 'w') as f:
with open("location_filled.json", "w") as f:
json.dump(location_filled, f, indent=2)
Original file line number Diff line number Diff line change
@@ -1,78 +1,88 @@
import sys, os, multiprocessing
import multiprocessing
import os
import sys

import pandas as pd
import geopandas as gpd
#import numpy as np

from h3 import h3

import GOSTrocks.rasterMisc as rMisc
import GOSTrocks.ntlMisc as ntl
import GOSTrocks.rasterMisc as rMisc
import pandas as pd
from GOSTrocks.misc import tPrint
from h3 import h3

# import numpy as np


sys.path.append("../../src")
import h3_helper

AWS_S3_BUCKET = 'wbg-geography01'
AWS_S3_BUCKET = "wbg-geography01"
AWS_ACCESS_KEY_ID = os.getenv("AWS_ACCESS_KEY_ID")
AWS_SECRET_ACCESS_KEY = os.getenv("AWS_SECRET_ACCESS_KEY")
AWS_SESSION_TOKEN = os.getenv("AWS_SESSION_TOKEN")


def run_zonal(gdf, cur_raster_file, out_file, buffer0=False, verbose=False):
cName = f'{os.path.basename(os.path.dirname(out_file))}-{os.path.basename(cur_raster_file)}'
cName = f"{os.path.basename(os.path.dirname(out_file))}-{os.path.basename(cur_raster_file)}"
if verbose:
tPrint(f'Starting {cName}')
tPrint(f"Starting {cName}")
if buffer0:
gdf['geometry'] = gdf['geometry'].buffer(0)
gdf["geometry"] = gdf["geometry"].buffer(0)
res = rMisc.zonalStats(gdf, cur_raster_file, minVal=0, verbose=False)
res = pd.DataFrame(res, columns=['SUM', 'MIN', 'MAX', 'MEAN'])
res['id'] = gdf['id'].values
res = pd.DataFrame(res, columns=["SUM", "MIN", "MAX", "MEAN"])
res["id"] = gdf["id"].values
if verbose:
tPrint(f'**** finished {cName}')
return({out_file:res})
tPrint(f"**** finished {cName}")
return {out_file: res}


if __name__ == "__main__":
multiprocess=True
multiprocess = True
verbose = True
tPrint("Starting")
h3_level = 6
data_prefix = "VIIRS_ANNUAL_EOG"

# Get list of nighttime lights VIIRS data
# ntl_files = ntl.aws_search_ntl()
ntl_folder = "/home/public/Data/GLOBAL/NighttimeLights/VIIRS_ANNUAL_EOG_V21"
ntl_files = [os.path.join(ntl_folder, x) for x in os.listdir(ntl_folder) if x.endswith(".tif")]

ntl_files = [
os.path.join(ntl_folder, x)
for x in os.listdir(ntl_folder)
if x.endswith(".tif")
]

# h3_0_list = h3_helper.generate_lvl0_lists(h3_level, return_gdf=True, buffer0=False)
admin_bounds = "/home/wb411133/data/Global/ADMIN/Admin2_Polys.shp"
# Generate a list from the global admin boundaries
inA = gpd.read_file(admin_bounds)
inA['id'] = list(inA.index)
inA["id"] = list(inA.index)
h3_0_list = {}
for region, countries in inA.groupby("WB_REGION"):
h3_0_list[region] = countries

if verbose:
tPrint("H3_0 list generated")
# set up mp arguments

for h3_0_key, cur_gdf in h3_0_list.items():
arg_list = []
processed_list = []
processed_list = []
for pop_file in ntl_files:
filename = os.path.basename(f'{pop_file.replace(".tif", "")}_zonal.csv')
out_s3_key = f'Space2Stats/h3_stats_data/GLOBAL/{data_prefix}/{h3_0_key}/{filename}'
out_s3_key = f'Space2Stats/h3_stats_data/ADM_GLOBAL/{data_prefix}/{h3_0_key}/{filename}'
full_path = os.path.join("s3://", AWS_S3_BUCKET, out_s3_key)
out_s3_key = (
f"Space2Stats/h3_stats_data/GLOBAL/{data_prefix}/{h3_0_key}/{filename}"
)
out_s3_key = f"Space2Stats/h3_stats_data/ADM_GLOBAL/{data_prefix}/{h3_0_key}/{filename}"
full_path = os.path.join("s3://", AWS_S3_BUCKET, out_s3_key)
try:
tempPD = pd.read_csv(full_path)
processed_list.append(filename)
except:
arg_list.append([cur_gdf, pop_file, out_s3_key, True, verbose])

if multiprocess:
with multiprocessing.Pool(processes=min([70,len(ntl_files)])) as pool:
results = pool.starmap(run_zonal, arg_list)
with multiprocessing.Pool(processes=min([70, len(ntl_files)])) as pool:
results = pool.starmap(run_zonal, arg_list)
else:
for a in arg_list:
results = run_zonal(*a)
Expand All @@ -88,4 +98,4 @@ def run_zonal(gdf, cur_raster_file, out_file, buffer0=False, verbose=False):
"secret": AWS_SECRET_ACCESS_KEY,
"token": AWS_SESSION_TOKEN,
},
)
)
121 changes: 84 additions & 37 deletions notebooks/MP_SCRIPTS/zonal_fathom.py
Original file line number Diff line number Diff line change
@@ -1,22 +1,25 @@
import sys, os, multiprocessing
import multiprocessing
import os
import sys

import pandas as pd
#import geopandas as gpd
#import numpy as np

from h3 import h3

# import geopandas as gpd
# import numpy as np


sys.path.insert(0, "/home/wb411133/Code/GOSTrocks/src")
import GOSTrocks.rasterMisc as rMisc
import GOSTrocks.ntlMisc as ntl
import GOSTrocks.dataMisc as dMisc
import GOSTrocks.ntlMisc as ntl
import GOSTrocks.rasterMisc as rMisc
from GOSTrocks.misc import tPrint

sys.path.append("../../src")
import h3_helper
import global_zonal
import h3_helper

AWS_S3_BUCKET = 'wbg-geography01'
AWS_S3_BUCKET = "wbg-geography01"
AWS_ACCESS_KEY_ID = os.getenv("AWS_ACCESS_KEY_ID")
AWS_SECRET_ACCESS_KEY = os.getenv("AWS_SECRET_ACCESS_KEY")
AWS_SESSION_TOKEN = os.getenv("AWS_SESSION_TOKEN")
Expand All @@ -28,58 +31,98 @@
h3_level = 6
data_prefix_flood = "Flood"
data_prefix_pop = "Flood_Pop"
flood_reclass_dict = { 0: [-9999, 0],
1: [0, 10],
2: [10.1, 50],
3: [50, 100000.0],}

flood_reclass_dict = {
0: [-9999, 0],
1: [0, 10],
2: [10.1, 50],
3: [50, 100000.0],
}

# Define input layers
pop_layer = r"/home/public/Data/GLOBAL/Population/WorldPop_PPP_2020ppp_2020_1km_Aggregated.tif"
# Select layer to downlaod
flood_type = ["PLUVIAL","FLUVIAL","COASTAL"]
flood_type = ["PLUVIAL", "FLUVIAL", "COASTAL"]
defence = ["DEFENDED"]
return_period = ['1in100']
return_period = ["1in100"]
climate_model = ["PERCENTILE50"]
year = ["2020"]

all_vrts = dMisc.get_fathom_vrts(True)
sel_images = all_vrts.loc[(all_vrts['FLOOD_TYPE'].isin(flood_type)) & (all_vrts['DEFENCE'].isin(defence)) &
(all_vrts['RETURN'].isin(return_period)) & (all_vrts['CLIMATE_MODEL'].isin(climate_model))]
fathom_vrt_path = sel_images['PATH'].iloc[0]
sel_images = all_vrts.loc[
(all_vrts["FLOOD_TYPE"].isin(flood_type))
& (all_vrts["DEFENCE"].isin(defence))
& (all_vrts["RETURN"].isin(return_period))
& (all_vrts["CLIMATE_MODEL"].isin(climate_model))
]
fathom_vrt_path = sel_images["PATH"].iloc[0]

# h3_0_list = h3_helper.generate_lvl0_lists(h3_level, return_gdf=True, buffer0=False, read_pickle=True)
h3_1_list = h3_helper.generate_lvl1_lists(h3_level, return_gdf=True, buffer0=False, read_pickle=True)
h3_1_list = h3_helper.generate_lvl1_lists(
h3_level, return_gdf=True, buffer0=False, read_pickle=True
)
if verbose:
tPrint("H3_1 list generated")
# set up arguments for zonal processing
flood_depth_args = []
flood_pop_args = []
for h3_1_key, cur_gdf in h3_1_list.items():
for fathom_index, fathom_row in sel_images.iterrows():
fathom_path = fathom_row['PATH']
fathom_file = "_".join([fathom_row['FLOOD_TYPE'], fathom_row['RETURN'], fathom_row['CLIMATE_MODEL'], fathom_row['YEAR']])

flood_pop_filename = f'FATHOM_total_pop_{fathom_file}.csv'
pop_out_s3_key = f'Space2Stats/h3_stats_data/GLOBAL/{data_prefix_pop}/{h3_1_key}/{flood_pop_filename}'
fathom_path = fathom_row["PATH"]
fathom_file = "_".join(
[
fathom_row["FLOOD_TYPE"],
fathom_row["RETURN"],
fathom_row["CLIMATE_MODEL"],
fathom_row["YEAR"],
]
)

flood_pop_filename = f"FATHOM_total_pop_{fathom_file}.csv"
pop_out_s3_key = f"Space2Stats/h3_stats_data/GLOBAL/{data_prefix_pop}/{h3_1_key}/{flood_pop_filename}"
full_path_pop = os.path.join("s3://", AWS_S3_BUCKET, pop_out_s3_key)
try:
tempPD = pd.read_csv(full_path_pop)
tempPD = pd.read_csv(full_path_pop)
except:
flood_pop_args.append([cur_gdf, "shape_id", pop_layer, fathom_path, pop_out_s3_key,
None, flood_reclass_dict,
True, 0, 10000000, verbose])
total_flood_filename = f'FATHOM_total_depth_{fathom_file}.csv'
depth_out_s3_key = f'Space2Stats/h3_stats_data/GLOBAL/{data_prefix_pop}/{h3_1_key}/{total_flood_filename}'
flood_pop_args.append(
[
cur_gdf,
"shape_id",
pop_layer,
fathom_path,
pop_out_s3_key,
None,
flood_reclass_dict,
True,
0,
10000000,
verbose,
]
)
total_flood_filename = f"FATHOM_total_depth_{fathom_file}.csv"
depth_out_s3_key = f"Space2Stats/h3_stats_data/GLOBAL/{data_prefix_pop}/{h3_1_key}/{total_flood_filename}"
full_path_depth = os.path.join("s3://", AWS_S3_BUCKET, depth_out_s3_key)
try:
tempPD = pd.read_csv(full_path_depth)
tempPD = pd.read_csv(full_path_depth)
except:
flood_depth_args.append([cur_gdf, "shape_id", fathom_path, depth_out_s3_key, True, 0, 1000, verbose])
flood_depth_args.append(
[
cur_gdf,
"shape_id",
fathom_path,
depth_out_s3_key,
True,
0,
1000,
verbose,
]
)
tPrint("Arguments generated")
# Multiprocess flood population results
if multiprocess:
with multiprocessing.Pool(multiprocessing.cpu_count()-2) as pool:
pop_results = pool.starmap(global_zonal.zonal_stats_categorical, flood_pop_args)
with multiprocessing.Pool(multiprocessing.cpu_count() - 2) as pool:
pop_results = pool.starmap(
global_zonal.zonal_stats_categorical, flood_pop_args
)
else:
pop_results = []
for a in flood_pop_args:
Expand All @@ -101,8 +144,12 @@

# Multiprocess flood depth results
if multiprocess:
with multiprocessing.Pool(processes=min([multiprocessing.cpu_count()-2,len(arg_list)])) as pool:
depth_results = pool.starmap(global_zonal.zonal_stats_numerical, flood_depth_args)
with multiprocessing.Pool(
processes=min([multiprocessing.cpu_count() - 2, len(arg_list)])
) as pool:
depth_results = pool.starmap(
global_zonal.zonal_stats_numerical, flood_depth_args
)
else:
depth_results = []
for a in flood_depth_args:
Expand All @@ -120,4 +167,4 @@
"secret": AWS_SECRET_ACCESS_KEY,
"token": AWS_SESSION_TOKEN,
},
)
)
Loading

0 comments on commit af8dbfc

Please sign in to comment.