From af8dbfc8b9a0902677c639047488d8cfcc7bcc41 Mon Sep 17 00:00:00 2001
From: Anthony Lukach <anthonylukach@gmail.com>
Date: Tue, 3 Sep 2024 21:47:19 -0700
Subject: [PATCH] Expand pre-commit format logic

---
 .pre-commit-config.yaml                       |  12 +-
 data/config/fill_location.py                  |  14 +-
 .../zonal_pop_NTL_VIIRS_EOG.py                |  66 +-
 notebooks/MP_SCRIPTS/zonal_fathom.py          | 121 ++--
 .../MP_SCRIPTS/zonal_pop_NTL_VIIRS_LEN.py     |  66 +-
 notebooks/MP_SCRIPTS/zonal_pop_by_gender.py   |  83 +--
 notebooks/MP_SCRIPTS/zonal_urbanization.py    |  80 +--
 postgres/chunk_parquet.py                     |  10 +-
 postgres/nyc_sample.py                        |  29 +-
 space2stats_api/cdk/app.py                    |   3 +-
 space2stats_api/cdk/aws_stack.py              |  26 +-
 space2stats_api/cdk/settings.py               |   1 +
 space2stats_api/src/space2stats/__main__.py   |   6 +-
 space2stats_api/src/space2stats/app.py        |   9 +-
 space2stats_api/src/space2stats/handler.py    |   2 +-
 space2stats_api/src/space2stats/main.py       |  12 +-
 space2stats_api/src/tests/test_api.py         |  19 +-
 space2stats_api/src/tests/test_h3_utils.py    |   1 -
 src/country_zonal.py                          | 565 +++++++++++-------
 src/global_zonal.py                           | 332 ++++++----
 src/h3_helper.py                              | 353 +++++++----
 src/space2stats_data_config.py                |  18 +-
 22 files changed, 1124 insertions(+), 704 deletions(-)

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index d8ad7b7..2b6fdaf 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -1,13 +1,21 @@
 repos:
+  - repo: https://github.com/PyCQA/isort
+    rev: 5.13.2
+    hooks:
+      - id: isort
+        language_version: python
+        args: ["-m", "3", "--trailing-comma", "-l", "88"]
+
   - repo: https://github.com/charliermarsh/ruff-pre-commit
-    rev: v0.5.5  
+    rev: v0.5.5
     hooks:
       - id: ruff
         args: [--fix]
         files: ^space2stats_api/
+      - id: ruff-format
 
   - repo: https://github.com/pre-commit/mirrors-mypy
-    rev: v1.11.1   
+    rev: v1.11.1
     hooks:
       - id: mypy
         args: [--ignore-missing-imports]
diff --git a/data/config/fill_location.py b/data/config/fill_location.py
index 588b14d..b0e14c5 100644
--- a/data/config/fill_location.py
+++ b/data/config/fill_location.py
@@ -1,21 +1,21 @@
 import json
 
 # Load data from iso3.json
-with open('iso3.json', 'r') as f:
+with open("iso3.json", "r") as f:
     iso3_data = json.load(f)
 
 # Load the template
-with open('location_template.json', 'r') as f:
+with open("location_template.json", "r") as f:
     location_template = json.load(f)
 
 # Create a filled configuration
 location_filled = {"locations": []}
 for iso3, country_name in iso3_data.items():
-    location = location_template['locations'][0].copy()
-    location['ISO3'] = iso3
-    location['country_name'] = country_name
-    location_filled['locations'].append(location)
+    location = location_template["locations"][0].copy()
+    location["ISO3"] = iso3
+    location["country_name"] = country_name
+    location_filled["locations"].append(location)
 
 # Save the filled configuration
-with open('location_filled.json', 'w') as f:
+with open("location_filled.json", "w") as f:
     json.dump(location_filled, f, indent=2)
diff --git a/notebooks/IMPLEMENTATIONS/ZON_MNACE_Compile_NTL/zonal_pop_NTL_VIIRS_EOG.py b/notebooks/IMPLEMENTATIONS/ZON_MNACE_Compile_NTL/zonal_pop_NTL_VIIRS_EOG.py
index 4667dd3..0bda932 100644
--- a/notebooks/IMPLEMENTATIONS/ZON_MNACE_Compile_NTL/zonal_pop_NTL_VIIRS_EOG.py
+++ b/notebooks/IMPLEMENTATIONS/ZON_MNACE_Compile_NTL/zonal_pop_NTL_VIIRS_EOG.py
@@ -1,69 +1,79 @@
-import sys, os, multiprocessing
+import multiprocessing
+import os
+import sys
 
-import pandas as pd
 import geopandas as gpd
-#import numpy as np
-
-from h3 import h3
-
-import GOSTrocks.rasterMisc as rMisc
 import GOSTrocks.ntlMisc as ntl
+import GOSTrocks.rasterMisc as rMisc
+import pandas as pd
 from GOSTrocks.misc import tPrint
+from h3 import h3
+
+# import numpy as np
+
 
 sys.path.append("../../src")
 import h3_helper
 
-AWS_S3_BUCKET = 'wbg-geography01'
+AWS_S3_BUCKET = "wbg-geography01"
 AWS_ACCESS_KEY_ID = os.getenv("AWS_ACCESS_KEY_ID")
 AWS_SECRET_ACCESS_KEY = os.getenv("AWS_SECRET_ACCESS_KEY")
 AWS_SESSION_TOKEN = os.getenv("AWS_SESSION_TOKEN")
 
+
 def run_zonal(gdf, cur_raster_file, out_file, buffer0=False, verbose=False):
-    cName = f'{os.path.basename(os.path.dirname(out_file))}-{os.path.basename(cur_raster_file)}'
+    cName = f"{os.path.basename(os.path.dirname(out_file))}-{os.path.basename(cur_raster_file)}"
     if verbose:
-        tPrint(f'Starting {cName}')
+        tPrint(f"Starting {cName}")
     if buffer0:
-        gdf['geometry'] = gdf['geometry'].buffer(0)        
+        gdf["geometry"] = gdf["geometry"].buffer(0)
     res = rMisc.zonalStats(gdf, cur_raster_file, minVal=0, verbose=False)
-    res = pd.DataFrame(res, columns=['SUM', 'MIN', 'MAX', 'MEAN'])
-    res['id'] = gdf['id'].values
+    res = pd.DataFrame(res, columns=["SUM", "MIN", "MAX", "MEAN"])
+    res["id"] = gdf["id"].values
     if verbose:
-        tPrint(f'**** finished {cName}')
-    return({out_file:res})
+        tPrint(f"**** finished {cName}")
+    return {out_file: res}
+
 
 if __name__ == "__main__":
-    multiprocess=True
+    multiprocess = True
     verbose = True
     tPrint("Starting")
     h3_level = 6
     data_prefix = "VIIRS_ANNUAL_EOG"
-    
+
     # Get list of nighttime lights VIIRS data
     # ntl_files = ntl.aws_search_ntl()
     ntl_folder = "/home/public/Data/GLOBAL/NighttimeLights/VIIRS_ANNUAL_EOG_V21"
-    ntl_files = [os.path.join(ntl_folder, x) for x in os.listdir(ntl_folder) if x.endswith(".tif")]
-    
+    ntl_files = [
+        os.path.join(ntl_folder, x)
+        for x in os.listdir(ntl_folder)
+        if x.endswith(".tif")
+    ]
+
     # h3_0_list = h3_helper.generate_lvl0_lists(h3_level, return_gdf=True, buffer0=False)
     admin_bounds = "/home/wb411133/data/Global/ADMIN/Admin2_Polys.shp"
     # Generate a list from the global admin boundaries
     inA = gpd.read_file(admin_bounds)
-    inA['id'] = list(inA.index)
+    inA["id"] = list(inA.index)
     h3_0_list = {}
     for region, countries in inA.groupby("WB_REGION"):
         h3_0_list[region] = countries
-    
+
     if verbose:
         tPrint("H3_0 list generated")
     # set up mp arguments
 
     for h3_0_key, cur_gdf in h3_0_list.items():
         arg_list = []
-        processed_list = []    
+        processed_list = []
         for pop_file in ntl_files:
             filename = os.path.basename(f'{pop_file.replace(".tif", "")}_zonal.csv')
-            out_s3_key = f'Space2Stats/h3_stats_data/GLOBAL/{data_prefix}/{h3_0_key}/{filename}'
-            out_s3_key = f'Space2Stats/h3_stats_data/ADM_GLOBAL/{data_prefix}/{h3_0_key}/{filename}'            
-            full_path = os.path.join("s3://", AWS_S3_BUCKET, out_s3_key)        
+            out_s3_key = (
+                f"Space2Stats/h3_stats_data/GLOBAL/{data_prefix}/{h3_0_key}/{filename}"
+            )
+            out_s3_key = f"Space2Stats/h3_stats_data/ADM_GLOBAL/{data_prefix}/{h3_0_key}/{filename}"
+            full_path = os.path.join("s3://", AWS_S3_BUCKET, out_s3_key)
             try:
                 tempPD = pd.read_csv(full_path)
                 processed_list.append(filename)
@@ -71,8 +81,8 @@ def run_zonal(gdf, cur_raster_file, out_file, buffer0=False, verbose=False):
                 arg_list.append([cur_gdf, pop_file, out_s3_key, True, verbose])
 
         if multiprocess:
-            with multiprocessing.Pool(processes=min([70,len(ntl_files)])) as pool:
-                results = pool.starmap(run_zonal, arg_list)    
+            with multiprocessing.Pool(processes=min([70, len(ntl_files)])) as pool:
+                results = pool.starmap(run_zonal, arg_list)
         else:
             for a in arg_list:
                 results = run_zonal(*a)
@@ -88,4 +98,4 @@ def run_zonal(gdf, cur_raster_file, out_file, buffer0=False, verbose=False):
                     "secret": AWS_SECRET_ACCESS_KEY,
                     "token": AWS_SESSION_TOKEN,
                 },
-            )
\ No newline at end of file
+            )
diff --git a/notebooks/MP_SCRIPTS/zonal_fathom.py b/notebooks/MP_SCRIPTS/zonal_fathom.py
index c174280..2d74ab8 100644
--- a/notebooks/MP_SCRIPTS/zonal_fathom.py
+++ b/notebooks/MP_SCRIPTS/zonal_fathom.py
@@ -1,22 +1,25 @@
-import sys, os, multiprocessing
+import multiprocessing
+import os
+import sys
 
 import pandas as pd
-#import geopandas as gpd
-#import numpy as np
-
 from h3 import h3
 
+# import geopandas as gpd
+# import numpy as np
+
+
 sys.path.insert(0, "/home/wb411133/Code/GOSTrocks/src")
-import GOSTrocks.rasterMisc as rMisc
-import GOSTrocks.ntlMisc as ntl
 import GOSTrocks.dataMisc as dMisc
+import GOSTrocks.ntlMisc as ntl
+import GOSTrocks.rasterMisc as rMisc
 from GOSTrocks.misc import tPrint
 
 sys.path.append("../../src")
-import h3_helper
 import global_zonal
+import h3_helper
 
-AWS_S3_BUCKET = 'wbg-geography01'
+AWS_S3_BUCKET = "wbg-geography01"
 AWS_ACCESS_KEY_ID = os.getenv("AWS_ACCESS_KEY_ID")
 AWS_SECRET_ACCESS_KEY = os.getenv("AWS_SECRET_ACCESS_KEY")
 AWS_SESSION_TOKEN = os.getenv("AWS_SESSION_TOKEN")
@@ -28,27 +31,35 @@
     h3_level = 6
     data_prefix_flood = "Flood"
     data_prefix_pop = "Flood_Pop"
-    flood_reclass_dict = { 0: [-9999, 0], 
-                1: [0, 10], 
-                2: [10.1, 50], 
-                3: [50, 100000.0],}
-    
+    flood_reclass_dict = {
+        0: [-9999, 0],
+        1: [0, 10],
+        2: [10.1, 50],
+        3: [50, 100000.0],
+    }
+
     # Define input layers
     pop_layer = r"/home/public/Data/GLOBAL/Population/WorldPop_PPP_2020ppp_2020_1km_Aggregated.tif"
     # Select layer to downlaod
-    flood_type = ["PLUVIAL","FLUVIAL","COASTAL"]
+    flood_type = ["PLUVIAL", "FLUVIAL", "COASTAL"]
     defence = ["DEFENDED"]
-    return_period = ['1in100']
+    return_period = ["1in100"]
     climate_model = ["PERCENTILE50"]
     year = ["2020"]
 
     all_vrts = dMisc.get_fathom_vrts(True)
-    sel_images = all_vrts.loc[(all_vrts['FLOOD_TYPE'].isin(flood_type)) & (all_vrts['DEFENCE'].isin(defence)) & 
-                (all_vrts['RETURN'].isin(return_period))  & (all_vrts['CLIMATE_MODEL'].isin(climate_model))]
-    fathom_vrt_path = sel_images['PATH'].iloc[0]
+    sel_images = all_vrts.loc[
+        (all_vrts["FLOOD_TYPE"].isin(flood_type))
+        & (all_vrts["DEFENCE"].isin(defence))
+        & (all_vrts["RETURN"].isin(return_period))
+        & (all_vrts["CLIMATE_MODEL"].isin(climate_model))
+    ]
+    fathom_vrt_path = sel_images["PATH"].iloc[0]
 
     # h3_0_list = h3_helper.generate_lvl0_lists(h3_level, return_gdf=True, buffer0=False, read_pickle=True)
-    h3_1_list = h3_helper.generate_lvl1_lists(h3_level, return_gdf=True, buffer0=False, read_pickle=True)
+    h3_1_list = h3_helper.generate_lvl1_lists(
+        h3_level, return_gdf=True, buffer0=False, read_pickle=True
+    )
     if verbose:
         tPrint("H3_1 list generated")
     # set up arguments for zonal processing
@@ -56,30 +67,62 @@
     flood_pop_args = []
     for h3_1_key, cur_gdf in h3_1_list.items():
         for fathom_index, fathom_row in sel_images.iterrows():
-            fathom_path = fathom_row['PATH']
-            fathom_file = "_".join([fathom_row['FLOOD_TYPE'], fathom_row['RETURN'], fathom_row['CLIMATE_MODEL'], fathom_row['YEAR']])    
-            
-            flood_pop_filename   = f'FATHOM_total_pop_{fathom_file}.csv'
-            pop_out_s3_key = f'Space2Stats/h3_stats_data/GLOBAL/{data_prefix_pop}/{h3_1_key}/{flood_pop_filename}'
+            fathom_path = fathom_row["PATH"]
+            fathom_file = "_".join(
+                [
+                    fathom_row["FLOOD_TYPE"],
+                    fathom_row["RETURN"],
+                    fathom_row["CLIMATE_MODEL"],
+                    fathom_row["YEAR"],
+                ]
+            )
+
+            flood_pop_filename = f"FATHOM_total_pop_{fathom_file}.csv"
+            pop_out_s3_key = f"Space2Stats/h3_stats_data/GLOBAL/{data_prefix_pop}/{h3_1_key}/{flood_pop_filename}"
             full_path_pop = os.path.join("s3://", AWS_S3_BUCKET, pop_out_s3_key)
             try:
-                tempPD = pd.read_csv(full_path_pop)                
+                tempPD = pd.read_csv(full_path_pop)
             except:
-                flood_pop_args.append([cur_gdf, "shape_id", pop_layer, fathom_path, pop_out_s3_key, 
-                                         None, flood_reclass_dict, 
-                                         True, 0, 10000000, verbose])
-            total_flood_filename = f'FATHOM_total_depth_{fathom_file}.csv'
-            depth_out_s3_key = f'Space2Stats/h3_stats_data/GLOBAL/{data_prefix_pop}/{h3_1_key}/{total_flood_filename}'
+                flood_pop_args.append(
+                    [
+                        cur_gdf,
+                        "shape_id",
+                        pop_layer,
+                        fathom_path,
+                        pop_out_s3_key,
+                        None,
+                        flood_reclass_dict,
+                        True,
+                        0,
+                        10000000,
+                        verbose,
+                    ]
+                )
+            total_flood_filename = f"FATHOM_total_depth_{fathom_file}.csv"
+            depth_out_s3_key = f"Space2Stats/h3_stats_data/GLOBAL/{data_prefix_pop}/{h3_1_key}/{total_flood_filename}"
             full_path_depth = os.path.join("s3://", AWS_S3_BUCKET, depth_out_s3_key)
             try:
-                tempPD = pd.read_csv(full_path_depth)                
+                tempPD = pd.read_csv(full_path_depth)
             except:
-                flood_depth_args.append([cur_gdf, "shape_id", fathom_path, depth_out_s3_key, True, 0, 1000, verbose])
+                flood_depth_args.append(
+                    [
+                        cur_gdf,
+                        "shape_id",
+                        fathom_path,
+                        depth_out_s3_key,
+                        True,
+                        0,
+                        1000,
+                        verbose,
+                    ]
+                )
     tPrint("Arguments generated")
     # Multiprocess flood population results
     if multiprocess:
-        with multiprocessing.Pool(multiprocessing.cpu_count()-2) as pool:
-            pop_results = pool.starmap(global_zonal.zonal_stats_categorical, flood_pop_args)    
+        with multiprocessing.Pool(multiprocessing.cpu_count() - 2) as pool:
+            pop_results = pool.starmap(
+                global_zonal.zonal_stats_categorical, flood_pop_args
+            )
     else:
         pop_results = []
         for a in flood_pop_args:
@@ -101,8 +144,12 @@
 
     # Multiprocess flood depth results
     if multiprocess:
-        with multiprocessing.Pool(processes=min([multiprocessing.cpu_count()-2,len(arg_list)])) as pool:
-            depth_results = pool.starmap(global_zonal.zonal_stats_numerical, flood_depth_args)    
+        with multiprocessing.Pool(
+            processes=min([multiprocessing.cpu_count() - 2, len(arg_list)])
+        ) as pool:
+            depth_results = pool.starmap(
+                global_zonal.zonal_stats_numerical, flood_depth_args
+            )
     else:
         depth_results = []
         for a in flood_depth_args:
@@ -120,4 +167,4 @@
                 "secret": AWS_SECRET_ACCESS_KEY,
                 "token": AWS_SESSION_TOKEN,
             },
-        )
\ No newline at end of file
+        )
diff --git a/notebooks/MP_SCRIPTS/zonal_pop_NTL_VIIRS_LEN.py b/notebooks/MP_SCRIPTS/zonal_pop_NTL_VIIRS_LEN.py
index 4667dd3..0bda932 100755
--- a/notebooks/MP_SCRIPTS/zonal_pop_NTL_VIIRS_LEN.py
+++ b/notebooks/MP_SCRIPTS/zonal_pop_NTL_VIIRS_LEN.py
@@ -1,69 +1,79 @@
-import sys, os, multiprocessing
+import multiprocessing
+import os
+import sys
 
-import pandas as pd
 import geopandas as gpd
-#import numpy as np
-
-from h3 import h3
-
-import GOSTrocks.rasterMisc as rMisc
 import GOSTrocks.ntlMisc as ntl
+import GOSTrocks.rasterMisc as rMisc
+import pandas as pd
 from GOSTrocks.misc import tPrint
+from h3 import h3
+
+# import numpy as np
+
 
 sys.path.append("../../src")
 import h3_helper
 
-AWS_S3_BUCKET = 'wbg-geography01'
+AWS_S3_BUCKET = "wbg-geography01"
 AWS_ACCESS_KEY_ID = os.getenv("AWS_ACCESS_KEY_ID")
 AWS_SECRET_ACCESS_KEY = os.getenv("AWS_SECRET_ACCESS_KEY")
 AWS_SESSION_TOKEN = os.getenv("AWS_SESSION_TOKEN")
 
+
 def run_zonal(gdf, cur_raster_file, out_file, buffer0=False, verbose=False):
-    cName = f'{os.path.basename(os.path.dirname(out_file))}-{os.path.basename(cur_raster_file)}'
+    cName = f"{os.path.basename(os.path.dirname(out_file))}-{os.path.basename(cur_raster_file)}"
     if verbose:
-        tPrint(f'Starting {cName}')
+        tPrint(f"Starting {cName}")
     if buffer0:
-        gdf['geometry'] = gdf['geometry'].buffer(0)        
+        gdf["geometry"] = gdf["geometry"].buffer(0)
     res = rMisc.zonalStats(gdf, cur_raster_file, minVal=0, verbose=False)
-    res = pd.DataFrame(res, columns=['SUM', 'MIN', 'MAX', 'MEAN'])
-    res['id'] = gdf['id'].values
+    res = pd.DataFrame(res, columns=["SUM", "MIN", "MAX", "MEAN"])
+    res["id"] = gdf["id"].values
     if verbose:
-        tPrint(f'**** finished {cName}')
-    return({out_file:res})
+        tPrint(f"**** finished {cName}")
+    return {out_file: res}
+
 
 if __name__ == "__main__":
-    multiprocess=True
+    multiprocess = True
     verbose = True
     tPrint("Starting")
     h3_level = 6
     data_prefix = "VIIRS_ANNUAL_EOG"
-    
+
     # Get list of nighttime lights VIIRS data
     # ntl_files = ntl.aws_search_ntl()
     ntl_folder = "/home/public/Data/GLOBAL/NighttimeLights/VIIRS_ANNUAL_EOG_V21"
-    ntl_files = [os.path.join(ntl_folder, x) for x in os.listdir(ntl_folder) if x.endswith(".tif")]
-    
+    ntl_files = [
+        os.path.join(ntl_folder, x)
+        for x in os.listdir(ntl_folder)
+        if x.endswith(".tif")
+    ]
+
     # h3_0_list = h3_helper.generate_lvl0_lists(h3_level, return_gdf=True, buffer0=False)
     admin_bounds = "/home/wb411133/data/Global/ADMIN/Admin2_Polys.shp"
     # Generate a list from the global admin boundaries
     inA = gpd.read_file(admin_bounds)
-    inA['id'] = list(inA.index)
+    inA["id"] = list(inA.index)
     h3_0_list = {}
     for region, countries in inA.groupby("WB_REGION"):
         h3_0_list[region] = countries
-    
+
     if verbose:
         tPrint("H3_0 list generated")
     # set up mp arguments
 
     for h3_0_key, cur_gdf in h3_0_list.items():
         arg_list = []
-        processed_list = []    
+        processed_list = []
         for pop_file in ntl_files:
             filename = os.path.basename(f'{pop_file.replace(".tif", "")}_zonal.csv')
-            out_s3_key = f'Space2Stats/h3_stats_data/GLOBAL/{data_prefix}/{h3_0_key}/{filename}'
-            out_s3_key = f'Space2Stats/h3_stats_data/ADM_GLOBAL/{data_prefix}/{h3_0_key}/{filename}'            
-            full_path = os.path.join("s3://", AWS_S3_BUCKET, out_s3_key)        
+            out_s3_key = (
+                f"Space2Stats/h3_stats_data/GLOBAL/{data_prefix}/{h3_0_key}/{filename}"
+            )
+            out_s3_key = f"Space2Stats/h3_stats_data/ADM_GLOBAL/{data_prefix}/{h3_0_key}/{filename}"
+            full_path = os.path.join("s3://", AWS_S3_BUCKET, out_s3_key)
             try:
                 tempPD = pd.read_csv(full_path)
                 processed_list.append(filename)
@@ -71,8 +81,8 @@ def run_zonal(gdf, cur_raster_file, out_file, buffer0=False, verbose=False):
                 arg_list.append([cur_gdf, pop_file, out_s3_key, True, verbose])
 
         if multiprocess:
-            with multiprocessing.Pool(processes=min([70,len(ntl_files)])) as pool:
-                results = pool.starmap(run_zonal, arg_list)    
+            with multiprocessing.Pool(processes=min([70, len(ntl_files)])) as pool:
+                results = pool.starmap(run_zonal, arg_list)
         else:
             for a in arg_list:
                 results = run_zonal(*a)
@@ -88,4 +98,4 @@ def run_zonal(gdf, cur_raster_file, out_file, buffer0=False, verbose=False):
                     "secret": AWS_SECRET_ACCESS_KEY,
                     "token": AWS_SESSION_TOKEN,
                 },
-            )
\ No newline at end of file
+            )
diff --git a/notebooks/MP_SCRIPTS/zonal_pop_by_gender.py b/notebooks/MP_SCRIPTS/zonal_pop_by_gender.py
index aa77aef..e48f1e6 100755
--- a/notebooks/MP_SCRIPTS/zonal_pop_by_gender.py
+++ b/notebooks/MP_SCRIPTS/zonal_pop_by_gender.py
@@ -1,86 +1,97 @@
-import sys, os, importlib, math, multiprocessing
-import rasterio, geojson
+import importlib
+import math
+import multiprocessing
+import os
+import sys
 
-import pandas as pd
+import geojson
 import geopandas as gpd
+import GOSTrocks.rasterMisc as rMisc
 import numpy as np
-
+import pandas as pd
+import rasterio
+from GOSTrocks.misc import tPrint
 from h3 import h3
-from tqdm import tqdm
 from shapely.geometry import Polygon
-
-import GOSTrocks.rasterMisc as rMisc
-from GOSTrocks.misc import tPrint
+from tqdm import tqdm
 
 sys.path.append("../../src")
 import h3_helper
 
-AWS_S3_BUCKET = 'wbg-geography01'
+AWS_S3_BUCKET = "wbg-geography01"
 AWS_ACCESS_KEY_ID = os.getenv("AWS_ACCESS_KEY_ID")
 AWS_SECRET_ACCESS_KEY = os.getenv("AWS_SECRET_ACCESS_KEY")
 AWS_SESSION_TOKEN = os.getenv("AWS_SESSION_TOKEN")
 
+
 def run_zonal(gdf, cur_raster_file, out_file, buffer0=False, verbose=False):
-    cName = f'{os.path.basename(os.path.dirname(out_file))}-{os.path.basename(cur_raster_file)}'
+    cName = f"{os.path.basename(os.path.dirname(out_file))}-{os.path.basename(cur_raster_file)}"
     if verbose:
-        tPrint(f'Starting {cName}')
+        tPrint(f"Starting {cName}")
     if buffer0:
-        gdf['geometry'] = gdf['geometry'].buffer(0)        
+        gdf["geometry"] = gdf["geometry"].buffer(0)
     res = rMisc.zonalStats(gdf, cur_raster_file, minVal=0, verbose=False)
-    res = pd.DataFrame(res, columns=['SUM', 'MIN', 'MAX', 'MEAN'])
-    res['id'] = gdf['id'].values
+    res = pd.DataFrame(res, columns=["SUM", "MIN", "MAX", "MEAN"])
+    res["id"] = gdf["id"].values
     if verbose:
-        tPrint(f'**** finished {cName}')
-    return({out_file:res})
+        tPrint(f"**** finished {cName}")
+    return {out_file: res}
+
 
 if __name__ == "__main__":
-    multiprocess=True
+    multiprocess = True
     verbose = True
     tPrint("Starting")
     h3_level = 6
     data_prefix = "WorldPop_2020_Demographics"
-    
+
     admin_bounds = "/home/wb411133/data/Global/ADMIN/Admin2_Polys.shp"
-    
-    '''
+
+    """
     global_urban = "/home/public/Data/GLOBAL/GHSL/SMOD/GHS_SMOD_E2020_GLOBE_R2023A_54009_1000_V1_0.tif"
-    '''
+    """
     # Define input raster variables
-    population_folder = "/home/public/Data/GLOBAL/Population/WorldPop_PPP_2020/GLOBAL_1km_Demographics"
-    pop_files = [os.path.join(population_folder, x) for x in os.listdir(population_folder) if x.endswith("1km.tif")]
+    population_folder = (
+        "/home/public/Data/GLOBAL/Population/WorldPop_PPP_2020/GLOBAL_1km_Demographics"
+    )
+    pop_files = [
+        os.path.join(population_folder, x)
+        for x in os.listdir(population_folder)
+        if x.endswith("1km.tif")
+    ]
 
     # h3_0_list = h3_helper.generate_lvl0_lists(h3_level, return_gdf=True, buffer0=False)
-    
+
     # Generate a list from the global admin boundaries
     inA = gpd.read_file(admin_bounds)
-    inA['id'] = list(inA.index)
+    inA["id"] = list(inA.index)
     h3_0_list = {}
     for region, countries in inA.groupby("WB_REGION"):
         h3_0_list[region] = countries
-    
+
     if verbose:
-        tPrint("H3_0 list generated")        
-    
+        tPrint("H3_0 list generated")
+
     # set up mp arguments
     for h3_0_key, cur_gdf in h3_0_list.items():
         arg_list = []
-        processed_list = []    
+        processed_list = []
         for pop_file in pop_files:
             filename = os.path.basename(f'{pop_file.replace(".tif", "")}_zonal.csv')
             # out_s3_key = f'Space2Stats/h3_stats_data/GLOBAL/{data_prefix}/{h3_0_key}/{filename}'
-            out_s3_key = f'Space2Stats/h3_stats_data/ADM_GLOBAL/{data_prefix}/{h3_0_key}/{filename}'
-            full_path = os.path.join("s3://", AWS_S3_BUCKET, out_s3_key)        
-            '''
+            out_s3_key = f"Space2Stats/h3_stats_data/ADM_GLOBAL/{data_prefix}/{h3_0_key}/{filename}"
+            full_path = os.path.join("s3://", AWS_S3_BUCKET, out_s3_key)
+            """
             try:
                 tempPD = pd.read_csv(full_path)
                 processed_list.append(filename)
             except:
-            '''
+            """
             arg_list.append([cur_gdf, pop_file, out_s3_key, True, verbose])
 
         if multiprocess:
-            with multiprocessing.Pool(processes=min([70,len(pop_files)])) as pool:
-                results = pool.starmap(run_zonal, arg_list)    
+            with multiprocessing.Pool(processes=min([70, len(pop_files)])) as pool:
+                results = pool.starmap(run_zonal, arg_list)
         else:
             for a in arg_list:
                 results = run_zonal(*a)
@@ -96,4 +107,4 @@ def run_zonal(gdf, cur_raster_file, out_file, buffer0=False, verbose=False):
                     "secret": AWS_SECRET_ACCESS_KEY,
                     "token": AWS_SESSION_TOKEN,
                 },
-            )
\ No newline at end of file
+            )
diff --git a/notebooks/MP_SCRIPTS/zonal_urbanization.py b/notebooks/MP_SCRIPTS/zonal_urbanization.py
index 67f5e32..9b5b014 100644
--- a/notebooks/MP_SCRIPTS/zonal_urbanization.py
+++ b/notebooks/MP_SCRIPTS/zonal_urbanization.py
@@ -1,73 +1,83 @@
-import sys, os, multiprocessing
+import multiprocessing
+import os
+import sys
 
+import GOSTrocks.ntlMisc as ntl
+import GOSTrocks.rasterMisc as rMisc
 import pandas as pd
-#import geopandas as gpd
-#import numpy as np
-
+from GOSTrocks.misc import tPrint
 from h3 import h3
 
-import GOSTrocks.rasterMisc as rMisc
-import GOSTrocks.ntlMisc as ntl
-from GOSTrocks.misc import tPrint
+# import geopandas as gpd
+# import numpy as np
+
 
 sys.path.append("../../src")
-import h3_helper
 import global_zonal
+import h3_helper
 
-AWS_S3_BUCKET = 'wbg-geography01'
+AWS_S3_BUCKET = "wbg-geography01"
 AWS_ACCESS_KEY_ID = os.getenv("AWS_ACCESS_KEY_ID")
 AWS_SECRET_ACCESS_KEY = os.getenv("AWS_SECRET_ACCESS_KEY")
 AWS_SESSION_TOKEN = os.getenv("AWS_SESSION_TOKEN")
 
 if __name__ == "__main__":
-    multiprocess=True
+    multiprocess = True
     verbose = True
     run_urban = True
     run_urban_pop = False
-    
+
     tPrint("Starting")
     h3_level = 6
     data_prefix = "Urbanization"
     data_prefix_pop = "Urbanization_Pop"
-    
+
     # Urbanization layers
-    unq_urban = [11,12,13,21,22,23,30]
+    unq_urban = [11, 12, 13, 21, 22, 23, 30]
     ghsl_folder = "/home/public/Data/GLOBAL/GHSL/"
-    ghs_smod = os.path.join(ghsl_folder, "SMOD", "GHS_SMOD_E2020_GLOBE_R2023A_54009_1000_V1_0.tif")
-    ghs_pop = os.path.join(ghsl_folder, "POP", "GHS_POP_E2020_GLOBE_R2023A_54009_100_V1_0.tif")
-    
-    #h3_0_list = h3_helper.generate_lvl0_lists(h3_level, return_gdf=True, buffer0=False)
-    #if verbose:
+    ghs_smod = os.path.join(
+        ghsl_folder, "SMOD", "GHS_SMOD_E2020_GLOBE_R2023A_54009_1000_V1_0.tif"
+    )
+    ghs_pop = os.path.join(
+        ghsl_folder, "POP", "GHS_POP_E2020_GLOBE_R2023A_54009_100_V1_0.tif"
+    )
+
+    # h3_0_list = h3_helper.generate_lvl0_lists(h3_level, return_gdf=True, buffer0=False)
+    # if verbose:
     #    tPrint("H3_0 list generated")
-    
-    h3_1_list = h3_helper.generate_lvl1_lists(h3_level, return_gdf=True, buffer0=True, read_pickle=True, write_pickle=False)
+
+    h3_1_list = h3_helper.generate_lvl1_lists(
+        h3_level, return_gdf=True, buffer0=True, read_pickle=True, write_pickle=False
+    )
     if verbose:
         tPrint("H3_1 list generated")
-    
+
     urban_pop_args = []
     urban_args = []
     for h3_1_key, cur_gdf in h3_1_list.items():
-        # Set up mp arguments for urban population        
-        pop_filename = 'GHS_POP_2020_Urban_Breakdown.csv'
-        pop_out_s3_key = f'Space2Stats/h3_stats_data/GLOBAL/{data_prefix_pop}/{h3_1_key}/{pop_filename}'
+        # Set up mp arguments for urban population
+        pop_filename = "GHS_POP_2020_Urban_Breakdown.csv"
+        pop_out_s3_key = f"Space2Stats/h3_stats_data/GLOBAL/{data_prefix_pop}/{h3_1_key}/{pop_filename}"
         pop_full_path = os.path.join("s3://", AWS_S3_BUCKET, pop_out_s3_key)
         try:
             tempPD = pd.read_csv(pop_full_path)
         except:
-            urban_pop_args.append([cur_gdf, "shape_id", ghs_pop, ghs_smod, pop_full_path, unq_urban])
-            
+            urban_pop_args.append(
+                [cur_gdf, "shape_id", ghs_pop, ghs_smod, pop_full_path, unq_urban]
+            )
+
         # set up mp arguments for urban summary
-        urban_filename = 'GHS_SMOD_2020.csv'
-        urban_out_s3_key = f'Space2Stats/h3_stats_data/GLOBAL/{data_prefix}/{h3_1_key}/{urban_filename}'
+        urban_filename = "GHS_SMOD_2020.csv"
+        urban_out_s3_key = f"Space2Stats/h3_stats_data/GLOBAL/{data_prefix}/{h3_1_key}/{urban_filename}"
         urban_full_path = os.path.join("s3://", AWS_S3_BUCKET, urban_out_s3_key)
         urban_args.append([cur_gdf, "shape_id", ghs_smod, unq_urban, urban_full_path])
-        
+
     if run_urban:
         tPrint(f"Running calculations on urban: {len(urban_args)} processes")
         # Run multi processing on urban
         if multiprocess:
-            with multiprocessing.Pool(processes=min([70,len(urban_args)])) as pool:
-                results = pool.starmap(global_zonal.zonal_stats_categories, urban_args)    
+            with multiprocessing.Pool(processes=min([70, len(urban_args)])) as pool:
+                results = pool.starmap(global_zonal.zonal_stats_categories, urban_args)
         else:
             for a in arg_list:
                 results = run_zonal(*a)
@@ -87,8 +97,10 @@
     if run_urban_pop:
         # Run multi processing on urban_pop_calculations
         if multiprocess:
-            with multiprocessing.Pool(processes=min([70,len(urban_pop_args)])) as pool:
-                results = pool.starmap(global_zonal.zonal_stats_categorical, urban_pop_args)    
+            with multiprocessing.Pool(processes=min([70, len(urban_pop_args)])) as pool:
+                results = pool.starmap(
+                    global_zonal.zonal_stats_categorical, urban_pop_args
+                )
         else:
             for a in arg_list:
                 results = run_zonal(*a)
@@ -103,4 +115,4 @@
                     "secret": AWS_SECRET_ACCESS_KEY,
                     "token": AWS_SESSION_TOKEN,
                 },
-            )
\ No newline at end of file
+            )
diff --git a/postgres/chunk_parquet.py b/postgres/chunk_parquet.py
index f6806e6..30db620 100644
--- a/postgres/chunk_parquet.py
+++ b/postgres/chunk_parquet.py
@@ -3,14 +3,16 @@
 import pandas as pd
 
 chunk_dir = "parquet_chunks"
-df = pd.read_parquet('space2stats_updated.parquet')
+df = pd.read_parquet("space2stats_updated.parquet")
 chunk_size = 100000  # Number of rows per chunk
 
 if not os.path.exists(chunk_dir):
     os.mkdir(chunk_dir)
 
 for i in range(0, len(df), chunk_size):
-    chunk = df.iloc[i:i + chunk_size]
-    chunk.to_parquet(os.path.join(chunk_dir, f'space2stats_part_{i // chunk_size}.parquet'))
+    chunk = df.iloc[i : i + chunk_size]
+    chunk.to_parquet(
+        os.path.join(chunk_dir, f"space2stats_part_{i // chunk_size}.parquet")
+    )
 
-print("Parquet file split into smaller chunks.")
\ No newline at end of file
+print("Parquet file split into smaller chunks.")
diff --git a/postgres/nyc_sample.py b/postgres/nyc_sample.py
index cdaddd1..185c19b 100644
--- a/postgres/nyc_sample.py
+++ b/postgres/nyc_sample.py
@@ -1,29 +1,30 @@
-import pandas as pd
 import h3
-
+import pandas as pd
 
 # Load the full dataset
-df = pd.read_parquet('space2stats.parquet')
+df = pd.read_parquet("space2stats.parquet")
 
 # Define the bounding box for New York City (approximate values) as a GeoJSON polygon
 nyc_polygon = {
     "type": "Polygon",
-    "coordinates": [[
-        [-74.259090, 40.477399],  
-        [-73.700272, 40.477399],  
-        [-73.700272, 40.917577],  
-        [-74.259090, 40.917577],  
-        [-74.259090, 40.477399]   
-    ]]
+    "coordinates": [
+        [
+            [-74.259090, 40.477399],
+            [-73.700272, 40.477399],
+            [-73.700272, 40.917577],
+            [-74.259090, 40.917577],
+            [-74.259090, 40.477399],
+        ]
+    ],
 }
 
 # Generate H3 indices for the bounding box using polyfill
-resolution = 6 
+resolution = 6
 nyc_hexagons = h3.polyfill(nyc_polygon, resolution, geo_json_conformant=True)
 
 # Filter the dataframe for New York City H3 indices
-nyc_df = df[df['hex_id'].isin(nyc_hexagons)]
+nyc_df = df[df["hex_id"].isin(nyc_hexagons)]
 
-nyc_df.to_parquet('nyc_sample.parquet')
+nyc_df.to_parquet("nyc_sample.parquet")
 
-print("Filtered file for New York City.")
\ No newline at end of file
+print("Filtered file for New York City.")
diff --git a/space2stats_api/cdk/app.py b/space2stats_api/cdk/app.py
index fdf3e7d..a531357 100644
--- a/space2stats_api/cdk/app.py
+++ b/space2stats_api/cdk/app.py
@@ -5,8 +5,7 @@
 settings = DeploymentSettings(_env_file="aws_deployment.env")
 
 env = Environment(
-    account=settings.CDK_DEFAULT_ACCOUNT,
-    region=settings.CDK_DEFAULT_REGION
+    account=settings.CDK_DEFAULT_ACCOUNT, region=settings.CDK_DEFAULT_REGION
 )
 
 app = App()
diff --git a/space2stats_api/cdk/aws_stack.py b/space2stats_api/cdk/aws_stack.py
index 208f967..e0e3009 100644
--- a/space2stats_api/cdk/aws_stack.py
+++ b/space2stats_api/cdk/aws_stack.py
@@ -16,38 +16,40 @@ def __init__(self, scope: Construct, id: str, **kwargs) -> None:
         deployment_settings = DeploymentSettings(_env_file="./aws_deployment.env")
 
         lambda_function = PythonFunction(
-            self, "Space2StatsFunction",
+            self,
+            "Space2StatsFunction",
             entry="../src",
             runtime=_lambda.Runtime.PYTHON_3_11,
             index="space2stats/handler.py",
             timeout=Duration.seconds(120),
             handler="handler",
             environment=app_settings.model_dump(),
-            memory_size=1024
+            memory_size=1024,
         )
 
         certificate = acm.Certificate.from_certificate_arn(
-            self, "Certificate",
-            deployment_settings.CDK_CERTIFICATE_ARN
+            self, "Certificate", deployment_settings.CDK_CERTIFICATE_ARN
         )
 
         domain_name = apigatewayv2.DomainName(
-            self, "DomainName",
+            self,
+            "DomainName",
             domain_name=deployment_settings.CDK_DOMAIN_NAME,
-            certificate=certificate
+            certificate=certificate,
         )
 
         http_api = apigatewayv2.HttpApi(
-            self, "Space2StatsHttpApi",
+            self,
+            "Space2StatsHttpApi",
             default_integration=integrations.HttpLambdaIntegration(
-                "LambdaIntegration",
-                handler=lambda_function
-            )
+                "LambdaIntegration", handler=lambda_function
+            ),
         )
 
         apigatewayv2.ApiMapping(
-            self, "ApiMapping",
+            self,
+            "ApiMapping",
             api=http_api,
             domain_name=domain_name,
-            stage=http_api.default_stage
+            stage=http_api.default_stage,
         )
diff --git a/space2stats_api/cdk/settings.py b/space2stats_api/cdk/settings.py
index fd22267..706189c 100644
--- a/space2stats_api/cdk/settings.py
+++ b/space2stats_api/cdk/settings.py
@@ -9,6 +9,7 @@ class AppSettings(BaseSettings):
     DB_PASSWORD: str
     DB_TABLE_NAME: str
 
+
 class DeploymentSettings(BaseSettings):
     CDK_DEFAULT_ACCOUNT: str
     CDK_DEFAULT_REGION: str
diff --git a/space2stats_api/src/space2stats/__main__.py b/space2stats_api/src/space2stats/__main__.py
index edd875a..feb354d 100644
--- a/space2stats_api/src/space2stats/__main__.py
+++ b/space2stats_api/src/space2stats/__main__.py
@@ -1,4 +1,3 @@
-
 import os
 
 from .app import app
@@ -11,8 +10,9 @@
 
 
 if __name__ == "__main__":
-
-    assert uvicorn is not None, "uvicorn must be installed: `python -m pip install 'space2stats[server]'`"
+    assert (
+        uvicorn is not None
+    ), "uvicorn must be installed: `python -m pip install 'space2stats[server]'`"
 
     uvicorn.run(
         app=app,
diff --git a/space2stats_api/src/space2stats/app.py b/space2stats_api/src/space2stats/app.py
index f60b19f..ac640ba 100644
--- a/space2stats_api/src/space2stats/app.py
+++ b/space2stats_api/src/space2stats/app.py
@@ -1,16 +1,15 @@
 from contextlib import asynccontextmanager
+from typing import Any, Dict, List
 
 from fastapi import FastAPI
 from fastapi.middleware.cors import CORSMiddleware
 from fastapi.responses import ORJSONResponse
-
-from typing import Any, Dict, List
-
 from starlette.requests import Request
 from starlette_cramjam.middleware import CompressionMiddleware
 
-from .db import connect_to_db, close_db_connection
-from .main import get_summaries_from_geom, get_available_fields, SummaryRequest
+from .db import close_db_connection, connect_to_db
+from .main import SummaryRequest, get_available_fields, get_summaries_from_geom
+
 
 @asynccontextmanager
 async def lifespan(app: FastAPI):
diff --git a/space2stats_api/src/space2stats/handler.py b/space2stats_api/src/space2stats/handler.py
index 8884899..e9a7c33 100644
--- a/space2stats_api/src/space2stats/handler.py
+++ b/space2stats_api/src/space2stats/handler.py
@@ -5,8 +5,8 @@
 
 from mangum import Mangum
 
-from .db import connect_to_db
 from .app import app
+from .db import connect_to_db
 
 
 @app.on_event("startup")
diff --git a/space2stats_api/src/space2stats/main.py b/space2stats_api/src/space2stats/main.py
index 93ad60e..2141c9d 100644
--- a/space2stats_api/src/space2stats/main.py
+++ b/space2stats_api/src/space2stats/main.py
@@ -1,21 +1,19 @@
-
-
-
 from typing import Dict, List, Literal, Optional
 
 import psycopg as pg
+from geojson_pydantic import Feature, Polygon
 from psycopg import Connection
 from pydantic import BaseModel
-from geojson_pydantic import Feature, Polygon
 from typing_extensions import TypeAlias
 
-from .h3_utils import generate_h3_ids, generate_h3_geometries
+from .h3_utils import generate_h3_geometries, generate_h3_ids
 from .settings import Settings
 
 settings = Settings()
 
 AoiModel: TypeAlias = Feature[Polygon, Dict]
 
+
 class SummaryRequest(BaseModel):
     aoi: AoiModel
     spatial_join_method: Literal["touches", "centroid", "within"]
@@ -74,9 +72,7 @@ def get_summaries_from_geom(
 
     # Format Summaries
     summaries: List[Dict] = []
-    geometries = (
-        generate_h3_geometries(h3_ids, geometry) if geometry else None
-    )
+    geometries = generate_h3_geometries(h3_ids, geometry) if geometry else None
 
     for idx, row in enumerate(rows):
         summary = {"hex_id": row[0]}
diff --git a/space2stats_api/src/tests/test_api.py b/space2stats_api/src/tests/test_api.py
index 3d87a2a..75dabe7 100644
--- a/space2stats_api/src/tests/test_api.py
+++ b/space2stats_api/src/tests/test_api.py
@@ -1,10 +1,8 @@
 from unittest.mock import patch
 
 import pytest
-from pytest_postgresql.janitor import DatabaseJanitor
-
 from fastapi.testclient import TestClient
-
+from pytest_postgresql.janitor import DatabaseJanitor
 
 aoi = {
     "type": "Feature",
@@ -23,6 +21,7 @@
     "properties": {},
 }
 
+
 @pytest.fixture(scope="session")
 def database(postgresql_proc):
     """Fake Database."""
@@ -62,7 +61,7 @@ def test_read_root(client):
 def test_get_summary(mock_get_summaries, client):
     mock_get_summaries.return_value = (
         [("hex_1", 100, 200)],
-        ["hex_id", "sum_pop_2020", "sum_pop_f_10_2020"]
+        ["hex_id", "sum_pop_2020", "sum_pop_f_10_2020"],
     )
 
     request_payload = {
@@ -89,7 +88,7 @@ def test_get_summary(mock_get_summaries, client):
 def test_get_summary_with_geometry_polygon(mock_get_summaries, client):
     mock_get_summaries.return_value = (
         [("hex_1", 100, 200)],
-        ["hex_id", "sum_pop_2020", "sum_pop_f_10_2020"]
+        ["hex_id", "sum_pop_2020", "sum_pop_f_10_2020"],
     )
 
     request_payload = {
@@ -119,7 +118,7 @@ def test_get_summary_with_geometry_polygon(mock_get_summaries, client):
 def test_get_summary_with_geometry_point(mock_get_summaries, client):
     mock_get_summaries.return_value = (
         [("hex_1", 100, 200)],
-        ["hex_id", "sum_pop_2020", "sum_pop_f_10_2020"]
+        ["hex_id", "sum_pop_2020", "sum_pop_f_10_2020"],
     )
 
     request_payload = {
@@ -147,9 +146,11 @@ def test_get_summary_with_geometry_point(mock_get_summaries, client):
 
 @patch("space2stats.app.get_available_fields")
 def test_get_fields(mock_get_available_fields, client):
-    mock_get_available_fields.return_value = ["sum_pop_2020",
-                                              "sum_pop_f_10_2020",
-                                              "field3"]
+    mock_get_available_fields.return_value = [
+        "sum_pop_2020",
+        "sum_pop_f_10_2020",
+        "field3",
+    ]
 
     response = client.get("/fields")
 
diff --git a/space2stats_api/src/tests/test_h3_utils.py b/space2stats_api/src/tests/test_h3_utils.py
index b7b8082..04881d9 100644
--- a/space2stats_api/src/tests/test_h3_utils.py
+++ b/space2stats_api/src/tests/test_h3_utils.py
@@ -1,6 +1,5 @@
 import pytest
 from shapely.geometry import Polygon, mapping
-
 from space2stats.h3_utils import generate_h3_geometries, generate_h3_ids
 
 polygon_coords = [
diff --git a/src/country_zonal.py b/src/country_zonal.py
index ed7ee08..7f838ee 100755
--- a/src/country_zonal.py
+++ b/src/country_zonal.py
@@ -1,29 +1,42 @@
-import sys, os, importlib, json
-import folium, shapely, rasterio, matplotlib
+import importlib
+import json
+import os
+import sys
+from urllib.request import urlopen
 
 import contextily as ctx
-import matplotlib.pyplot as plt
-import matplotlib.patches as mpatches
-import pandas as pd
+import folium
 import geopandas as gpd
+import GOSTRocks.ntlMisc as ntl
+import GOSTRocks.rasterMisc as rMisc
+import matplotlib
+import matplotlib.patches as mpatches
+import matplotlib.pyplot as plt
 import numpy as np
-
-from rasterio.crs import CRS
-from mpl_toolkits.axes_grid1 import make_axes_locatable
+import pandas as pd
+import rasterio
+import shapely
+from GOSTRocks.misc import tPrint
 from h3 import h3
-from shapely.geometry import Polygon, Point, mapping
+from mpl_toolkits.axes_grid1 import make_axes_locatable
+from rasterio.crs import CRS
+from shapely.geometry import Point, Polygon, mapping
 from shapely.ops import unary_union
-from urllib.request import urlopen
 from tqdm import tqdm
 
 import h3_helper
-import GOSTRocks.rasterMisc as rMisc
-import GOSTRocks.ntlMisc as ntl
-from GOSTRocks.misc import tPrint
 
-def calculate_value(in_shp, zonal_res, h3_level, feat_id, fractional_res=True, 
-                    zonal_res_id='shape_id', default_sum='SUM'):
-    ''' tabulate hexabin stats for all bins that intersect shape in_shp
+
+def calculate_value(
+    in_shp,
+    zonal_res,
+    h3_level,
+    feat_id,
+    fractional_res=True,
+    zonal_res_id="shape_id",
+    default_sum="SUM",
+):
+    """tabulate hexabin stats for all bins that intersect shape in_shp
 
     :param in_shp: shape of boundary to intersect with hexabins
     :type in_shp: shapely polygon
@@ -42,25 +55,29 @@ def calculate_value(in_shp, zonal_res, h3_level, feat_id, fractional_res=True,
 
     :return: dictionary of results summarized based on type (SUM, MIN, MEAN, MAX)
     :rtype: Dictionary
-    '''
+    """
+
     def get_intersection(admin_shp, hex_shp):
-        ''' get fraction of hex_shp that is inside admin_shp
-        '''
+        """get fraction of hex_shp that is inside admin_shp"""
         if admin_shp.contains(hex_shp):
-            return(1)
+            return 1
         else:
-            return(admin_shp.intersection(hex_shp).area/hex_shp.area)
-    
-    res = {'id':feat_id}
+            return admin_shp.intersection(hex_shp).area / hex_shp.area
+
+    res = {"id": feat_id}
     process_h3 = True
     # Generate h3 cells that intersect current shape; if none are generated first time through, buffer
     #  the geometry by a little bit, and then search again
-    while process_h3:        
+    while process_h3:
         if in_shp.geom_type == "Polygon":
-            sel_h3 = h3.polyfill(in_shp.__geo_interface__, h3_level, geo_json_conformant=True)
+            sel_h3 = h3.polyfill(
+                in_shp.__geo_interface__, h3_level, geo_json_conformant=True
+            )
         elif in_shp.geom_type == "MultiPolygon":
             for cPoly in in_shp:
-                temp_h3 = h3.polyfill(cPoly.__geo_interface__, h3_level, geo_json_conformant=True)
+                temp_h3 = h3.polyfill(
+                    cPoly.__geo_interface__, h3_level, geo_json_conformant=True
+                )
                 try:
                     sel_h3 = sel_h3.union(temp_h3)
                 except:
@@ -70,45 +87,67 @@ def get_intersection(admin_shp, hex_shp):
 
     if len(sel_h3) > 0:
         hex_poly = lambda hex_id: Polygon(h3.h3_to_geo_boundary(hex_id, geo_json=True))
-        all_polys = gpd.GeoSeries(list(map(hex_poly, sel_h3)), index=sel_h3, crs="EPSG:4326")
-        all_polys = gpd.GeoDataFrame(all_polys, crs=4326, columns=['geometry'])
-        all_polys['shape_id'] = list(all_polys.index)
+        all_polys = gpd.GeoSeries(
+            list(map(hex_poly, sel_h3)), index=sel_h3, crs="EPSG:4326"
+        )
+        all_polys = gpd.GeoDataFrame(all_polys, crs=4326, columns=["geometry"])
+        all_polys["shape_id"] = list(all_polys.index)
         if fractional_res:
-            all_polys['inter_area'] = all_polys['geometry'].apply(lambda x: get_intersection(in_shp, x))
+            all_polys["inter_area"] = all_polys["geometry"].apply(
+                lambda x: get_intersection(in_shp, x)
+            )
         else:
-            all_polys['inter_area'] = 1        
-        all_polys = pd.merge(all_polys, zonal_res, left_on='shape_id', right_on=zonal_res_id)
-        for col in all_polys.columns: 
-            if not col in ['inter_area','geometry','shape_id']: 
+            all_polys["inter_area"] = 1
+        all_polys = pd.merge(
+            all_polys, zonal_res, left_on="shape_id", right_on=zonal_res_id
+        )
+        for col in all_polys.columns:
+            if not col in ["inter_area", "geometry", "shape_id"]:
                 calc_type = default_sum
-                if "SUM" in col: calc_type = "SUM"
-                if "MIN" in col: calc_type = "MIN"
-                if "MAX" in col: calc_type = "MAX"
-                if "MEAN" in col: calc_type = "MEAN"            
+                if "SUM" in col:
+                    calc_type = "SUM"
+                if "MIN" in col:
+                    calc_type = "MIN"
+                if "MAX" in col:
+                    calc_type = "MAX"
+                if "MEAN" in col:
+                    calc_type = "MEAN"
                 try:
-                    if calc_type == "SUM": # For sum columns, multiply column by inter_area and sum results
-                        cur_val = sum(all_polys[col] * all_polys['inter_area'])
+                    if (
+                        calc_type == "SUM"
+                    ):  # For sum columns, multiply column by inter_area and sum results
+                        cur_val = sum(all_polys[col] * all_polys["inter_area"])
                     elif calc_type == "MIN":
                         cur_val = all_polys[col].min()
                     elif calc_type == "MAX":
                         cur_val = all_polys[col].max()
                     elif calc_type == "MEAN":
-                        cur_val = sum(all_polys[col] * all_polys['inter_area'])/sum(all_polys['inter_area'])
+                        cur_val = sum(all_polys[col] * all_polys["inter_area"]) / sum(
+                            all_polys["inter_area"]
+                        )
                     res[col] = cur_val
                 except:
                     pass
                 try:
-                    del(cur_val)
+                    del cur_val
                 except:
                     pass
     else:
         pass
-    return(res)
+    return res
+
+
+def connect_polygons_h3_stats(
+    inA,
+    stats_df,
+    h3_level,
+    id_col,
+    fractional_res=True,
+    zonal_res_id="shape_id",
+    default_sum="SUM",
+):
+    """merge stats from hexabin stats dataframe (stats_df) with the inA geodataframe
 
-def connect_polygons_h3_stats(inA, stats_df, h3_level, id_col, fractional_res=True,
-                              zonal_res_id='shape_id', default_sum='SUM'):
-    ''' merge stats from hexabin stats dataframe (stats_df) with the inA geodataframe
-    
     :param inA: input boundary dataset
     :type inA: geopandas.GeoDataFrame
     :param stats_df: input hexabin stats dataset
@@ -123,180 +162,214 @@ def connect_polygons_h3_stats(inA, stats_df, h3_level, id_col, fractional_res=Tr
                         function sill pick up [SUM,MIN,MAX,MEAN], defaults to sum
     :type default_sum: string, optional
 
-    
+
     :return: pandas dataframe with attached statistics and matching id from id_col
     :rtype: geopandas.GeoDataFrame
-    '''
+    """
     all_res = []
-    for idx, row in inA.iterrows():               
+    for idx, row in inA.iterrows():
         try:
-            all_res.append(calculate_value(row['geometry'], stats_df, h3_level, row[id_col], fractional_res, zonal_res_id, default_sum))
+            all_res.append(
+                calculate_value(
+                    row["geometry"],
+                    stats_df,
+                    h3_level,
+                    row[id_col],
+                    fractional_res,
+                    zonal_res_id,
+                    default_sum,
+                )
+            )
         except:
-            print(f'Error processing {idx}')        
-        
-    return(pd.DataFrame(all_res))
-    
-class country_h3_zonal():
-    ''' Generate h3 grid at prescribed level; intersect with admin boundary; run zonal stats
-    
-        :param iso3: Country ISO3 code
-        :type iso3: string
-        :param adm_bounds: admin boundaries for joining with h3 grid
-        :type adm_bounds: geopandas.GeoDataFrame
-        :param adm_bounds_id: column in adm_bounds used as unique ID
-        :type adm_bounds_id: string
-        :param h3_level: size of h3 grid to create; we suggest starting with 6 or 5 (5 is larger)
-        :type h3_level: int
-    '''
-    def __init__(self, iso3, adm_bounds, adm_bounds_id, h3_level, out_folder, h3_grid = ''):
+            print(f"Error processing {idx}")
+
+    return pd.DataFrame(all_res)
+
+
+class country_h3_zonal:
+    """Generate h3 grid at prescribed level; intersect with admin boundary; run zonal stats
+
+    :param iso3: Country ISO3 code
+    :type iso3: string
+    :param adm_bounds: admin boundaries for joining with h3 grid
+    :type adm_bounds: geopandas.GeoDataFrame
+    :param adm_bounds_id: column in adm_bounds used as unique ID
+    :type adm_bounds_id: string
+    :param h3_level: size of h3 grid to create; we suggest starting with 6 or 5 (5 is larger)
+    :type h3_level: int
+    """
+
+    def __init__(
+        self, iso3, adm_bounds, adm_bounds_id, h3_level, out_folder, h3_grid=""
+    ):
         self.iso3 = iso3
         self.adm_bounds = adm_bounds
         self.adm_bounds_id = adm_bounds_id
         self.h3_level = h3_level
         self.out_folder = out_folder
-        
-        #define output variables
-        if h3_grid != '':
-            self.out_h3_grid = os.path.join(out_folder, f'h3_level_{h3_level}.geojson')
+
+        # define output variables
+        if h3_grid != "":
+            self.out_h3_grid = os.path.join(out_folder, f"h3_level_{h3_level}.geojson")
         else:
             self.out_h3_grid = h3_grid
         try:
             self.h3_cells = gpd.read_file(self.out_h3_grid)
         except:
             pass
-        self.out_admin = os.path.join(out_folder, 'admin_bounds.geojson')
+        self.out_admin = os.path.join(out_folder, "admin_bounds.geojson")
         try:
             self.adm_bounds_h3 = gpd.read_file(self.out_admin)
         except:
             pass
-        
-        
+
     def generate_h3_grid(self, cols_to_include=[], attach_admin=False):
-        ''' Generate the h3 grid and join to the admin boundaries
-            
-            :param cols_to_include: list of columns to include from adm_bounds in joined output
-            :type cols_to_include: list of strings
-        '''
+        """Generate the h3 grid and join to the admin boundaries
+
+        :param cols_to_include: list of columns to include from adm_bounds in joined output
+        :type cols_to_include: list of strings
+        """
         selA = self.adm_bounds
         try:
-            return(self.h3_cells)
-        except: 
+            return self.h3_cells
+        except:
             pass
-        
+
         try:
             h3_cells = self.h3_cells.copy()
-            h3_cells = h3_cells.loc[:,['shape_id','geometry']]
+            h3_cells = h3_cells.loc[:, ["shape_id", "geometry"]]
         except:
             h3_cells = h3_helper.generate_h3_gdf(self.adm_bounds, self.h3_level)
-        
-        h3_cells['centroid'] = h3_cells['geometry'].apply(lambda x: x.centroid)                
-        h3_centroids = h3_cells.set_geometry('centroid')
+
+        h3_cells["centroid"] = h3_cells["geometry"].apply(lambda x: x.centroid)
+        h3_centroids = h3_cells.set_geometry("centroid")
         cols_to_include.append("geometry")
         cols_to_include.append(self.adm_bounds_id)
         cols_to_include = list(set(cols_to_include))
-        h3_joined = gpd.sjoin(h3_centroids, selA.loc[:,cols_to_include], how='left')        
-        if attach_admin:        
-            h3_pivot = pd.pivot_table(h3_joined, index=self.adm_bounds_id, aggfunc={cols_to_include[0]:len})        
-            h3_pivot.columns = [*h3_pivot.columns[:-1], 'h3_count']
+        h3_joined = gpd.sjoin(h3_centroids, selA.loc[:, cols_to_include], how="left")
+        if attach_admin:
+            h3_pivot = pd.pivot_table(
+                h3_joined, index=self.adm_bounds_id, aggfunc={cols_to_include[0]: len}
+            )
+            h3_pivot.columns = [*h3_pivot.columns[:-1], "h3_count"]
             h3_pivot = h3_pivot.reset_index()
-            h3_pivot = selA.loc[:,cols_to_include].merge(h3_pivot, how='left', on=self.adm_bounds_id)
+            h3_pivot = selA.loc[:, cols_to_include].merge(
+                h3_pivot, how="left", on=self.adm_bounds_id
+            )
             self.adm_bounds_h3 = h3_pivot
 
-        h3_joined = h3_joined.set_geometry("geometry").drop(['centroid'], axis=1)
+        h3_joined = h3_joined.set_geometry("geometry").drop(["centroid"], axis=1)
         h3_joined = h3_joined.reset_index()
         self.h3_cells = h3_joined
-        
-        return(h3_joined)
-        
+
+        return h3_joined
+
     def summarize_adm_h3_join(self, verbose=False):
-        ''' Summarize the join between the adm bounds and the h3 grid:
-            1. Number of h3 cells
-            2. Number of adm bounds
-            3. Number of adm bounds with 0 h3 centroids
-            4. Number of adm bounds with 0 - 1 h3 centroids
-            5. Number of adm bounds with 2 - 5 h3 centroids            
-        '''
+        """Summarize the join between the adm bounds and the h3 grid:
+        1. Number of h3 cells
+        2. Number of adm bounds
+        3. Number of adm bounds with 0 h3 centroids
+        4. Number of adm bounds with 0 - 1 h3 centroids
+        5. Number of adm bounds with 2 - 5 h3 centroids
+        """
         try:
             inD = self.adm_bounds_h3.copy()
         except:
             self.generate_h3_grid()
             inD = self.adm_bounds_h3.copy()
-        
+
         n_h3 = self.h3_cells.shape[0]
         n_adm = inD.shape[0]
-        n_adm_0 = inD.loc[inD['h3_count'].isna()].shape[0]
-        n_adm_1 = inD.loc[inD['h3_count'] == 1].shape[0]
-        n_adm_2 = inD.loc[(inD['h3_count'] < 6) & (inD['h3_count'] > 1)].shape[0]
-        
+        n_adm_0 = inD.loc[inD["h3_count"].isna()].shape[0]
+        n_adm_1 = inD.loc[inD["h3_count"] == 1].shape[0]
+        n_adm_2 = inD.loc[(inD["h3_count"] < 6) & (inD["h3_count"] > 1)].shape[0]
+
         if verbose:
-            tPrint(f"{self.iso3}: H3 [{n_h3}], ADM [{n_adm}], ADM0 [{n_adm_0}], ADM1 [{n_adm_1}], ADM2 [{n_adm_2}]")
-        return([n_h3, n_adm, n_adm_0, n_adm_1, n_adm_2])
-        
+            tPrint(
+                f"{self.iso3}: H3 [{n_h3}], ADM [{n_adm}], ADM0 [{n_adm_0}], ADM1 [{n_adm_1}], ADM2 [{n_adm_2}]"
+            )
+        return [n_h3, n_adm, n_adm_0, n_adm_1, n_adm_2]
+
     def write_output(self, write_h3=True, write_admin=False):
-        ''' write geospatial data to disk
-        
-        '''
+        """write geospatial data to disk"""
         if write_h3:
             self.h3_cells.to_file(self.out_h3_grid, driver="GeoJSON")
         if write_admin:
             self.adm_bounds_h3.to_file(self.out_admin, driver="GeoJSON")
 
-    def zonal_raster(self, in_raster, minVal='', maxVal='', all_touched=False, weighted=False):
-        '''
+    def zonal_raster(
+        self, in_raster, minVal="", maxVal="", all_touched=False, weighted=False
+    ):
+        """
 
-            :param in_raster: string path to raster file for calculations
-            :type in_raster: string
-            :param minVal: minimum value in in_raster to pass to zonal function; everything below is considered 0. Default is no threshold
-            :type minVal: numeric
-        '''
+        :param in_raster: string path to raster file for calculations
+        :type in_raster: string
+        :param minVal: minimum value in in_raster to pass to zonal function; everything below is considered 0. Default is no threshold
+        :type minVal: numeric
+        """
         h3_grid = self.generate_h3_grid()
-        
+
         if isinstance(in_raster, str):
-            inR = rasterio.open(in_raster, 'r')
+            inR = rasterio.open(in_raster, "r")
         else:
             inR = in_raster
-        
+
         # Run zonal statistics on pop_raster
-        res = rMisc.zonalStats(h3_grid, inR, reProj=True, minVal=minVal, maxVal=maxVal,
-                                allTouched=all_touched, weighted=weighted)
-        res = pd.DataFrame(res, columns=["SUM", "MIN", "MAX", "MEAN"])            
-        res['shape_id'] = h3_grid['shape_id'].astype(object)
-        return(res)
-            
-
-    
-    def zonal_raster_population(self, in_raster, pop_raster, raster_thresh, thresh_label='thresh',
-                    resampling_type="sum", minVal='', maxVal='', all_touched=False, weighted=False):
-        ''' extract raster data from in_raster, urban_raster for selected country, standardize urban_raster to in_raster
-
-            :param in_raster: string path to raster file for calculations
-            :type in_raster: string
-            :param pop_raster: string path to population file for summarizing calculations
-            :type pop_raster: string 
-            :param raster_thresh: value to threshold in_raster in order to summarize population
-            :type raster_thresh: number
-
-            :param thresh_label: label to append to thresholded summaries in output table, default is to 'thresh'
-            :type thresh_label: string
-            :param resampling_type: how to re-sample in_raster to pop_raster, using rasterio resampling options, default is to 'SUM'         
-            :type resampling_type: string
-            :param minVal: minimum value in in_raster to pass to zonal function; everything below is considered 0. Default is no threshold
-            :type minVal: numeric
-            :param urban_mask_val: list of values in urban_raster to be used for mask
-            :type urban_mask_val: list of int
-            :param unqVals:
-            :type unqVals:
-
-
-        '''
+        res = rMisc.zonalStats(
+            h3_grid,
+            inR,
+            reProj=True,
+            minVal=minVal,
+            maxVal=maxVal,
+            allTouched=all_touched,
+            weighted=weighted,
+        )
+        res = pd.DataFrame(res, columns=["SUM", "MIN", "MAX", "MEAN"])
+        res["shape_id"] = h3_grid["shape_id"].astype(object)
+        return res
+
+    def zonal_raster_population(
+        self,
+        in_raster,
+        pop_raster,
+        raster_thresh,
+        thresh_label="thresh",
+        resampling_type="sum",
+        minVal="",
+        maxVal="",
+        all_touched=False,
+        weighted=False,
+    ):
+        """extract raster data from in_raster, urban_raster for selected country, standardize urban_raster to in_raster
+
+        :param in_raster: string path to raster file for calculations
+        :type in_raster: string
+        :param pop_raster: string path to population file for summarizing calculations
+        :type pop_raster: string
+        :param raster_thresh: value to threshold in_raster in order to summarize population
+        :type raster_thresh: number
+
+        :param thresh_label: label to append to thresholded summaries in output table, default is to 'thresh'
+        :type thresh_label: string
+        :param resampling_type: how to re-sample in_raster to pop_raster, using rasterio resampling options, default is to 'SUM'
+        :type resampling_type: string
+        :param minVal: minimum value in in_raster to pass to zonal function; everything below is considered 0. Default is no threshold
+        :type minVal: numeric
+        :param urban_mask_val: list of values in urban_raster to be used for mask
+        :type urban_mask_val: list of int
+        :param unqVals:
+        :type unqVals:
+
+
+        """
         h3_grid = self.generate_h3_grid()
-        
+
         if isinstance(in_raster, str):
-            inR = rasterio.open(in_raster, 'r')
+            inR = rasterio.open(in_raster, "r")
         else:
             inR = in_raster
         if isinstance(pop_raster, str):
-            popR = rasterio.open(pop_raster, 'r')
+            popR = rasterio.open(pop_raster, "r")
         else:
             popR = pop_raster
 
@@ -308,90 +381,148 @@ def zonal_raster_population(self, in_raster, pop_raster, raster_thresh, thresh_l
 
         with rMisc.create_rasterio_inmemory(profile1, inN) as tempR:
             # Run zonal statistics on pop_raster
-            res = rMisc.zonalStats(h3_grid, tempR, reProj=True, minVal=minVal, maxVal=maxVal,
-                                    allTouched=all_touched, weighted=weighted)
-            res = pd.DataFrame(res, columns=["SUM", "MIN", "MAX", "MEAN"])            
-            res['shape_id'] = h3_grid['shape_id'].astype(object)            
+            res = rMisc.zonalStats(
+                h3_grid,
+                tempR,
+                reProj=True,
+                minVal=minVal,
+                maxVal=maxVal,
+                allTouched=all_touched,
+                weighted=weighted,
+            )
+            res = pd.DataFrame(res, columns=["SUM", "MIN", "MAX", "MEAN"])
+            res["shape_id"] = h3_grid["shape_id"].astype(object)
             # Standardize in_raster to pop_raster
             with rMisc.create_rasterio_inmemory(profile2, inD) as tempD:
-                inD, profile2 = rMisc.standardizeInputRasters(tempD, tempR, resampling_type=resampling_type)
+                inD, profile2 = rMisc.standardizeInputRasters(
+                    tempD, tempR, resampling_type=resampling_type
+                )
 
                 # threhsold in raster to create binary
-                inR_thresh = (inD >= raster_thresh)
+                inR_thresh = inD >= raster_thresh
                 pop_thresh = inN * inR_thresh
-                
+
                 # Summarize thresholded populatino
                 with rMisc.create_rasterio_inmemory(profile1, pop_thresh) as urbanR:
-                    resU = rMisc.zonalStats(h3_grid, urbanR, reProj=True, minVal=minVal, maxVal=maxVal,
-                                            allTouched=all_touched, weighted=weighted)
-                    resU = pd.DataFrame(resU, columns=[f"SUM_{thresh_label}", f"MIN_{thresh_label}", f"MAX_{thresh_label}", f"MEAN_{thresh_label}"])
+                    resU = rMisc.zonalStats(
+                        h3_grid,
+                        urbanR,
+                        reProj=True,
+                        minVal=minVal,
+                        maxVal=maxVal,
+                        allTouched=all_touched,
+                        weighted=weighted,
+                    )
+                    resU = pd.DataFrame(
+                        resU,
+                        columns=[
+                            f"SUM_{thresh_label}",
+                            f"MIN_{thresh_label}",
+                            f"MAX_{thresh_label}",
+                            f"MEAN_{thresh_label}",
+                        ],
+                    )
                     resU = resU.astype(float)
-                    resU['shape_id'] = h3_grid['shape_id']
-                res_final = res.merge(resU, on='shape_id')        
+                    resU["shape_id"] = h3_grid["shape_id"]
+                res_final = res.merge(resU, on="shape_id")
         return res_final
 
+    def zonal_raster_urban(
+        self,
+        in_raster,
+        urban_raster,
+        resampling_type="nearest",
+        minVal="",
+        maxVal="",
+        rastType="N",
+        urban_mask_val=[21, 22, 23, 30],
+        unqVals=[],
+        all_touched=False,
+        weighted=False,
+    ):
+        """extract raster data from in_raster, urban_raster for selected country, standardize urban_raster to in_raster
 
+        :param in_raster: string path to raster file for calculations
+        :type in_raster: string
+        :param in_raster: string path to urban file tiering calculations
+        :type in_raster: string
+        :param minVal: minimum value in in_raster to pass to zonal function; everything below is considered 0. Default is no threshold
+        :type minVal: numeric
+        :param rastType: define the input data in the in_raster. Options are N (for numeric, default) or C (categorical)
+        :type rastType: string
+        :param urban_mask_val: list of values in urban_raster to be used for mask
+        :type urban_mask_val: list of int
+        :param unqVals:
+        :type unqVals:
 
-    def zonal_raster_urban(self, in_raster, urban_raster, resampling_type="nearest", minVal='', maxVal='', rastType='N',
-                           urban_mask_val=[21,22,23,30], unqVals=[], all_touched=False, weighted=False):
-        ''' extract raster data from in_raster, urban_raster for selected country, standardize urban_raster to in_raster
-
-            :param in_raster: string path to raster file for calculations
-            :type in_raster: string
-            :param in_raster: string path to urban file tiering calculations
-            :type in_raster: string            
-            :param minVal: minimum value in in_raster to pass to zonal function; everything below is considered 0. Default is no threshold
-            :type minVal: numeric
-            :param rastType: define the input data in the in_raster. Options are N (for numeric, default) or C (categorical)
-            :type rastType: string
-            :param urban_mask_val: list of values in urban_raster to be used for mask
-            :type urban_mask_val: list of int
-            :param unqVals:
-            :type unqVals:
-            
-            :return: dictionary of rasterio objects for in_raster and urban_raster
-            :rtype: dictionary of 'in_raster': rasterio.DatasetReader, 'urban_raster': rasterio.DatasetReader                    
-        '''        
+        :return: dictionary of rasterio objects for in_raster and urban_raster
+        :rtype: dictionary of 'in_raster': rasterio.DatasetReader, 'urban_raster': rasterio.DatasetReader
+        """
         h3_grid = self.generate_h3_grid()
-        
+
         if isinstance(in_raster, str):
-            inR = rasterio.open(in_raster, 'r')
+            inR = rasterio.open(in_raster, "r")
         else:
             inR = in_raster
-        
+
         if isinstance(urban_raster, str):
-            inU = rasterio.open(urban_raster, 'r')
+            inU = rasterio.open(urban_raster, "r")
         else:
             inU = urban_raster
 
         # Clip inR to extent of country
         inN, profile1 = rMisc.clipRaster(inR, self.adm_bounds, crop=False)
         with rMisc.create_rasterio_inmemory(profile1, inN) as tempR:
-            if rastType == 'N':
+            if rastType == "N":
                 # Run zonal statistics on in_raster
-                res = rMisc.zonalStats(h3_grid, tempR, rastType=rastType, reProj=True, minVal=minVal, maxVal=maxVal,
-                                       allTouched=all_touched, weighted=weighted)
-                res = pd.DataFrame(res, columns=["SUM", "MIN", "MAX", "MEAN"])            
-                res['shape_id'] = h3_grid['shape_id'].astype(object)            
+                res = rMisc.zonalStats(
+                    h3_grid,
+                    tempR,
+                    rastType=rastType,
+                    reProj=True,
+                    minVal=minVal,
+                    maxVal=maxVal,
+                    allTouched=all_touched,
+                    weighted=weighted,
+                )
+                res = pd.DataFrame(res, columns=["SUM", "MIN", "MAX", "MEAN"])
+                res["shape_id"] = h3_grid["shape_id"].astype(object)
                 # Standardize in_urban raster to clippedR
-                outU, profile2 = rMisc.standardizeInputRasters(inU, tempR, resampling_type=resampling_type)
+                outU, profile2 = rMisc.standardizeInputRasters(
+                    inU, tempR, resampling_type=resampling_type
+                )
                 # Isolate values in in_raster that are urban
-                inN_urban = np.isin(outU, urban_mask_val)  
+                inN_urban = np.isin(outU, urban_mask_val)
                 with rMisc.create_rasterio_inmemory(profile2, inN_urban) as urbanR:
-                    resU = rMisc.zonalStats(h3_grid, urbanR, rastType=rastType, reProj=True, minVal=minVal, maxVal=maxVal,
-                                            allTouched=all_touched, weighted=weighted)
-                    resU = pd.DataFrame(resU, columns=["SUM_urban", "MIN_urban", "MAX_urban", "MEAN_urban"])
+                    resU = rMisc.zonalStats(
+                        h3_grid,
+                        urbanR,
+                        rastType=rastType,
+                        reProj=True,
+                        minVal=minVal,
+                        maxVal=maxVal,
+                        allTouched=all_touched,
+                        weighted=weighted,
+                    )
+                    resU = pd.DataFrame(
+                        resU,
+                        columns=["SUM_urban", "MIN_urban", "MAX_urban", "MEAN_urban"],
+                    )
                     resU = resU.astype(float)
-                    resU['shape_id'] = h3_grid['shape_id']
-                res_final = res.merge(resU, on='shape_id')        
-            elif rastType == 'C':
+                    resU["shape_id"] = h3_grid["shape_id"]
+                res_final = res.merge(resU, on="shape_id")
+            elif rastType == "C":
                 # Run zonal statistics on in_raster
-                res_final = rMisc.zonalStats(h3_grid, tempR, rastType=rastType, reProj=True, unqVals=unqVals,
-                                             allTouched=all_touched, weighted=weighted)
-                res_final = pd.DataFrame(res_final, columns=[f'c_x' for x in unqVals])            
-                res_final['shape_id'] = h3_grid['shape_id'].astype(object)   
-
-        return(res_final)
-            
-                
-        
\ No newline at end of file
+                res_final = rMisc.zonalStats(
+                    h3_grid,
+                    tempR,
+                    rastType=rastType,
+                    reProj=True,
+                    unqVals=unqVals,
+                    allTouched=all_touched,
+                    weighted=weighted,
+                )
+                res_final = pd.DataFrame(res_final, columns=[f"c_x" for x in unqVals])
+                res_final["shape_id"] = h3_grid["shape_id"].astype(object)
+
+        return res_final
diff --git a/src/global_zonal.py b/src/global_zonal.py
index cfeba2f..31aceed 100755
--- a/src/global_zonal.py
+++ b/src/global_zonal.py
@@ -1,23 +1,30 @@
-import boto3, os
+import os
+from urllib.request import urlopen
 
-import pandas as pd
+import boto3
 import geopandas as gpd
+import GOSTrocks.ntlMisc as ntl
+import GOSTrocks.rasterMisc as rMisc
 import numpy as np
-
-from rasterio.crs import CRS
+import pandas as pd
+from GOSTrocks.misc import tPrint
 from h3 import h3
-from shapely.geometry import Polygon, Point, mapping
+from rasterio.crs import CRS
+from shapely.geometry import Point, Polygon, mapping
 from shapely.ops import unary_union
-from urllib.request import urlopen
 from tqdm import tqdm
 
 import h3_helper
-import GOSTrocks.rasterMisc as rMisc
-import GOSTrocks.ntlMisc as ntl
-from GOSTrocks.misc import tPrint
 
-def get_global_table_from_s3(variable, bucket='wbg-geography01', prefix='Space2Stats/h3_stats_data/GLOBAL/', verbose=False, read_data=True):
-    """ Get pandas dataframe of all csv files in S3 bucket that match the variable name
+
+def get_global_table_from_s3(
+    variable,
+    bucket="wbg-geography01",
+    prefix="Space2Stats/h3_stats_data/GLOBAL/",
+    verbose=False,
+    read_data=True,
+):
+    """Get pandas dataframe of all csv files in S3 bucket that match the variable name
 
     Parameters
     ----------
@@ -33,10 +40,10 @@ def get_global_table_from_s3(variable, bucket='wbg-geography01', prefix='Space2S
         If True, return results as pandas data frames for each sub-value in variable,
         otherwise returns a list of s3 prefixes for each sub-value, by default True
     """
-    
-    s3client = boto3.client('s3')
-    
-    # Loop through the S3 bucket and get all the keys for files that are .tif 
+
+    s3client = boto3.client("s3")
+
+    # Loop through the S3 bucket and get all the keys for files that are .tif
     prefix = f"{prefix}{variable}"
     more_results = True
     loops = 0
@@ -45,20 +52,22 @@ def get_global_table_from_s3(variable, bucket='wbg-geography01', prefix='Space2S
         if verbose:
             print(f"Completed loop: {loops}")
         if loops > 0:
-            objects = s3client.list_objects_v2(Bucket=bucket, Prefix=prefix, ContinuationToken=token)
+            objects = s3client.list_objects_v2(
+                Bucket=bucket, Prefix=prefix, ContinuationToken=token
+            )
         else:
             objects = s3client.list_objects_v2(Bucket=bucket, Prefix=prefix)
-        more_results = objects['IsTruncated']
+        more_results = objects["IsTruncated"]
         if more_results:
-            token = objects['NextContinuationToken']
+            token = objects["NextContinuationToken"]
         loops += 1
-        for res in objects['Contents']:
-            if res['Key'].endswith('csv'):
-                cur_variable = os.path.basename(res['Key']).replace(".csv", "")
+        for res in objects["Contents"]:
+            if res["Key"].endswith("csv"):
+                cur_variable = os.path.basename(res["Key"]).replace(".csv", "")
                 try:
-                    good_res[cur_variable].append(res['Key'])
+                    good_res[cur_variable].append(res["Key"])
                 except:
-                    good_res[cur_variable] = [res['Key']]
+                    good_res[cur_variable] = [res["Key"]]
     if read_data:
         for key, value in good_res.items():
             for idx, val in enumerate(value):
@@ -67,12 +76,19 @@ def get_global_table_from_s3(variable, bucket='wbg-geography01', prefix='Space2S
                 else:
                     cur_df = pd.concat([cur_df, pd.read_csv(f"s3://{bucket}/{val}")])
             good_res[key] = cur_df
-    return(good_res)
+    return good_res
 
 
-def calculate_value(in_shp, zonal_res, h3_level, feat_id, fractional_res=True, 
-                    zonal_res_id='id', default_sum='SUM'):
-    ''' tabulate hexabin stats for all bins that intersect shape in_shp
+def calculate_value(
+    in_shp,
+    zonal_res,
+    h3_level,
+    feat_id,
+    fractional_res=True,
+    zonal_res_id="id",
+    default_sum="SUM",
+):
+    """tabulate hexabin stats for all bins that intersect shape in_shp
 
     :param in_shp: shape of boundary to intersect with hexabins
     :type in_shp: shapely polygon
@@ -91,25 +107,29 @@ def calculate_value(in_shp, zonal_res, h3_level, feat_id, fractional_res=True,
 
     :return: dictionary of results summarized based on type (SUM, MIN, MEAN, MAX)
     :rtype: Dictionary
-    '''
+    """
+
     def get_intersection(admin_shp, hex_shp):
-        ''' get fraction of hex_shp that is inside admin_shp
-        '''
+        """get fraction of hex_shp that is inside admin_shp"""
         if admin_shp.contains(hex_shp):
-            return(1)
+            return 1
         else:
-            return(admin_shp.intersection(hex_shp).area/hex_shp.area)
-    
-    res = {'id':feat_id}
+            return admin_shp.intersection(hex_shp).area / hex_shp.area
+
+    res = {"id": feat_id}
     process_h3 = True
     # Generate h3 cells that intersect current shape; if none are generated first time through, buffer
     #  the geometry by a little bit, and then search again
-    while process_h3:        
-        if in_shp.geom_type == 'Polygon':
-            sel_h3 = h3.polyfill(in_shp.__geo_interface__, h3_level, geo_json_conformant=True)
+    while process_h3:
+        if in_shp.geom_type == "Polygon":
+            sel_h3 = h3.polyfill(
+                in_shp.__geo_interface__, h3_level, geo_json_conformant=True
+            )
         else:
             for cPoly in in_shp:
-                temp_h3 = h3.polyfill(cPoly.__geo_interface__, h3_level, geo_json_conformant=True)
+                temp_h3 = h3.polyfill(
+                    cPoly.__geo_interface__, h3_level, geo_json_conformant=True
+                )
                 try:
                     sel_h3 = sel_h3.union(temp_h3)
                 except:
@@ -119,45 +139,67 @@ def get_intersection(admin_shp, hex_shp):
 
     if len(sel_h3) > 0:
         hex_poly = lambda hex_id: Polygon(h3.h3_to_geo_boundary(hex_id, geo_json=True))
-        all_polys = gpd.GeoSeries(list(map(hex_poly, sel_h3)), index=sel_h3, crs="EPSG:4326")
-        all_polys = gpd.GeoDataFrame(all_polys, crs=4326, columns=['geometry'])
-        all_polys['shape_id'] = list(all_polys.index)
+        all_polys = gpd.GeoSeries(
+            list(map(hex_poly, sel_h3)), index=sel_h3, crs="EPSG:4326"
+        )
+        all_polys = gpd.GeoDataFrame(all_polys, crs=4326, columns=["geometry"])
+        all_polys["shape_id"] = list(all_polys.index)
         if fractional_res:
-            all_polys['inter_area'] = all_polys['geometry'].apply(lambda x: get_intersection(in_shp, x))
+            all_polys["inter_area"] = all_polys["geometry"].apply(
+                lambda x: get_intersection(in_shp, x)
+            )
         else:
-            all_polys['inter_area'] = 1        
-        all_polys = pd.merge(all_polys, zonal_res, left_on='shape_id', right_on=zonal_res_id)
-        for col in all_polys.columns: 
-            if not col in ['inter_area','geometry','shape_id']: 
+            all_polys["inter_area"] = 1
+        all_polys = pd.merge(
+            all_polys, zonal_res, left_on="shape_id", right_on=zonal_res_id
+        )
+        for col in all_polys.columns:
+            if not col in ["inter_area", "geometry", "shape_id"]:
                 calc_type = default_sum
-                if "SUM" in col: calc_type = "SUM"
-                if "MIN" in col: calc_type = "MIN"
-                if "MAX" in col: calc_type = "MAX"
-                if "MEAN" in col: calc_type = "MEAN"            
+                if "SUM" in col:
+                    calc_type = "SUM"
+                if "MIN" in col:
+                    calc_type = "MIN"
+                if "MAX" in col:
+                    calc_type = "MAX"
+                if "MEAN" in col:
+                    calc_type = "MEAN"
                 try:
-                    if calc_type == "SUM": # For sum columns, multiply column by inter_area and sum results
-                        cur_val = sum(all_polys[col] * all_polys['inter_area'])
+                    if (
+                        calc_type == "SUM"
+                    ):  # For sum columns, multiply column by inter_area and sum results
+                        cur_val = sum(all_polys[col] * all_polys["inter_area"])
                     elif calc_type == "MIN":
                         cur_val = all_polys[col].min()
                     elif calc_type == "MAX":
                         cur_val = all_polys[col].max()
                     elif calc_type == "MEAN":
-                        cur_val = sum(all_polys[col] * all_polys['inter_area'])/sum(all_polys['inter_area'])
+                        cur_val = sum(all_polys[col] * all_polys["inter_area"]) / sum(
+                            all_polys["inter_area"]
+                        )
                     res[col] = cur_val
                 except:
                     pass
                 try:
-                    del(cur_val)
+                    del cur_val
                 except:
                     pass
     else:
         pass
-    return(res)
+    return res
+
+
+def connect_polygons_h3_stats(
+    inA,
+    stats_df,
+    h3_level,
+    id_col,
+    fractional_res=True,
+    zonal_res_id="id",
+    default_sum="SUM",
+):
+    """merge stats from hexabin stats dataframe (stats_df) with the inA geodataframe
 
-def connect_polygons_h3_stats(inA, stats_df, h3_level, id_col, fractional_res=True,
-                              zonal_res_id='id', default_sum='SUM'):
-    ''' merge stats from hexabin stats dataframe (stats_df) with the inA geodataframe
-    
     :param inA: input boundary dataset
     :type inA: geopandas.GeoDataFrame
     :param stats_df: input hexabin stats dataset
@@ -172,24 +214,35 @@ def connect_polygons_h3_stats(inA, stats_df, h3_level, id_col, fractional_res=Tr
                         function sill pick up [SUM,MIN,MAX,MEAN], defaults to sum
     :type default_sum: string, optional
 
-    
+
     :return: pandas dataframe with attached statistics and matching id from id_col
     :rtype: geopandas.GeoDataFrame
-    '''
+    """
     all_res = []
-    for idx, row in inA.iterrows():               
-        all_res.append(calculate_value(row['geometry'], stats_df, h3_level, row[id_col], fractional_res, zonal_res_id, default_sum))        
-        '''
+    for idx, row in inA.iterrows():
+        all_res.append(
+            calculate_value(
+                row["geometry"],
+                stats_df,
+                h3_level,
+                row[id_col],
+                fractional_res,
+                zonal_res_id,
+                default_sum,
+            )
+        )
+        """
         try:
             all_res.append(calculate_value(row['geometry'], stats_df, h3_level, row[id_col], fractional_res, zonal_res_id, default_sum))
         except:
             print(f'Error processing {idx}')        
-        '''
-        
-    return(pd.DataFrame(all_res))
+        """
+
+    return pd.DataFrame(all_res)
+
 
 def generate_lvl0_lists(h3_lvl):
-    """ generate a dictionary with keys as lvl0 codes with all children at h3_lvl level as values
+    """generate a dictionary with keys as lvl0 codes with all children at h3_lvl level as values
 
     Parameters
     ----------
@@ -211,6 +264,7 @@ def generate_lvl0_lists(h3_lvl):
 
     return h3_lvl0_children
 
+
 def calculate_zonal_h3_list(h3_list, raster_data, output_file=""):
     """_summary_
 
@@ -225,63 +279,86 @@ def calculate_zonal_h3_list(h3_list, raster_data, output_file=""):
     """
     # Convert list of h3 cells to geometry
     hex_poly = lambda hex_id: Polygon(h3.h3_to_geo_boundary(hex_id, geo_json=True))
-    
+
     all_polys = gpd.GeoSeries(list(map(hex_poly, h3_list)), index=h3_list, crs=4326)
-    all_polys = gpd.GeoDataFrame(all_polys, crs=4326, columns=['geometry'])
-    all_polys['shape_id'] = list(all_polys.index)
+    all_polys = gpd.GeoDataFrame(all_polys, crs=4326, columns=["geometry"])
+    all_polys["shape_id"] = list(all_polys.index)
 
     res = rMisc.zonalStats(all_polys, raster_data)
-    res = pd.DataFrame(res, columns=['SUM', 'MIN', 'MAX', 'MEAN'])
+    res = pd.DataFrame(res, columns=["SUM", "MIN", "MAX", "MEAN"])
 
     if output_file != "":
         res.to_csv(output_file)
-    
-    return(res)
 
-def zonal_stats_numerical(gdf, gdf_id, raster_file, out_file,
-                          buffer0=False, minVal=None, maxVal=None, verbose=False):
-    ''' Run zonal stats on a continuous raster file using a list of h3 cells
-    '''
+    return res
+
+
+def zonal_stats_numerical(
+    gdf,
+    gdf_id,
+    raster_file,
+    out_file,
+    buffer0=False,
+    minVal=None,
+    maxVal=None,
+    verbose=False,
+):
+    """Run zonal stats on a continuous raster file using a list of h3 cells"""
     if verbose:
-        tPrint(f'Starting zonal stats on {raster_file}')
+        tPrint(f"Starting zonal stats on {raster_file}")
     if buffer0:
-        gdf['geometry'] = gdf['geometry'].buffer(0)        
-    res = rMisc.zonalStats(gdf, raster_file, minVal=minVal, maxVal=maxVal, verbose=verbose, reProj=True)
-    res = pd.DataFrame(res, columns=['SUM', 'MIN', 'MAX', 'MEAN'])
-    res['id'] = gdf[gdf_id].values
+        gdf["geometry"] = gdf["geometry"].buffer(0)
+    res = rMisc.zonalStats(
+        gdf, raster_file, minVal=minVal, maxVal=maxVal, verbose=verbose, reProj=True
+    )
+    res = pd.DataFrame(res, columns=["SUM", "MIN", "MAX", "MEAN"])
+    res["id"] = gdf[gdf_id].values
     if verbose:
-        tPrint(f'**** finished {cName}')
-    return({out_file:res})
+        tPrint(f"**** finished {cName}")
+    return {out_file: res}
 
 
-def zonal_stats_categories(gdf, gdf_id, raster_file, categories, out_file,
-                          buffer0=False, verbose=False):
-    ''' Run zonal stats on a categorical raster file using a list of h3 cells
-    '''
+def zonal_stats_categories(
+    gdf, gdf_id, raster_file, categories, out_file, buffer0=False, verbose=False
+):
+    """Run zonal stats on a categorical raster file using a list of h3 cells"""
     if verbose:
-        tPrint(f'Starting zonal stats on {raster_file}')
+        tPrint(f"Starting zonal stats on {raster_file}")
     if buffer0:
-        gdf['geometry'] = gdf['geometry'].buffer(0)        
-    res = rMisc.zonalStats(gdf, raster_file, rastType="C", unqVals=categories, verbose=verbose, reProj=True)
-    res = pd.DataFrame(res, columns=[f'c_{x}' for x in categories])
-    res['id'] = gdf[gdf_id].values
+        gdf["geometry"] = gdf["geometry"].buffer(0)
+    res = rMisc.zonalStats(
+        gdf, raster_file, rastType="C", unqVals=categories, verbose=verbose, reProj=True
+    )
+    res = pd.DataFrame(res, columns=[f"c_{x}" for x in categories])
+    res["id"] = gdf[gdf_id].values
     if verbose:
-        tPrint(f'**** finished {cName}')
-    return({out_file:res})
-
-
-def zonal_stats_categorical(gdf, gdf_id, raster_file, category_raster_file, out_file, categories=None, reclass_dict=None,
-                          buffer0=False, minVal=None, maxVal=None, verbose=False):
-    ''' Run zonal stats on a continuous raster file using a matching categorical raster 
-        file and a list of h3 cells. For each defined category in the categorical 
-        raster file, calculate the sum, min, max, mean for that category.
-    '''
-    
-    tPrint(f'Starting zonal stats on {out_file}')
+        tPrint(f"**** finished {cName}")
+    return {out_file: res}
+
+
+def zonal_stats_categorical(
+    gdf,
+    gdf_id,
+    raster_file,
+    category_raster_file,
+    out_file,
+    categories=None,
+    reclass_dict=None,
+    buffer0=False,
+    minVal=None,
+    maxVal=None,
+    verbose=False,
+):
+    """Run zonal stats on a continuous raster file using a matching categorical raster
+    file and a list of h3 cells. For each defined category in the categorical
+    raster file, calculate the sum, min, max, mean for that category.
+    """
+
+    tPrint(f"Starting zonal stats on {out_file}")
     if buffer0:
-        gdf['geometry'] = gdf['geometry'].buffer(0)        
-    
-    #extract category raster to gdf extent
+        gdf["geometry"] = gdf["geometry"].buffer(0)
+
+    # extract category raster to gdf extent
     cat_d, cat_profile = rMisc.clipRaster(category_raster_file, gdf)
     # reclasify if necessary
     if not reclass_dict is None:
@@ -291,23 +368,42 @@ def zonal_stats_categorical(gdf, gdf_id, raster_file, category_raster_file, out_
             categories.append(key)
     # extract raster to gdf extent
     rast_d, rast_profile = rMisc.clipRaster(raster_file, gdf)
-        
+
     # standardize categorical raster to zonal raster
     final_zonal_res = []
     with rMisc.create_rasterio_inmemory(rast_profile, rast_d) as rast_src:
         with rMisc.create_rasterio_inmemory(cat_profile, cat_d) as cat_src:
-            cat_d, cat_profile = rMisc.standardizeInputRasters(cat_src, rast_src, resampling_type='nearest')
+            cat_d, cat_profile = rMisc.standardizeInputRasters(
+                cat_src, rast_src, resampling_type="nearest"
+            )
             # Loop through each category
             for cur_cat in categories:
                 cur_cat_d = (cat_d == cur_cat) * 1
                 cur_rast_d = rast_d * cur_cat_d
-                with rMisc.create_rasterio_inmemory(rast_profile, cur_rast_d) as cur_rast_src:
-                    res = rMisc.zonalStats(gdf, cur_rast_src, minVal=minVal, maxVal=maxVal, verbose=verbose, reProj=True)
-                    res = pd.DataFrame(res, columns=[f'{cur_cat}_SUM', f'{cur_cat}_MIN', f'{cur_cat}_MAX', f'{cur_cat}_MEAN'])
-                    res['id'] = gdf[gdf_id].values
-                    res.set_index('id', inplace=True)
+                with rMisc.create_rasterio_inmemory(
+                    rast_profile, cur_rast_d
+                ) as cur_rast_src:
+                    res = rMisc.zonalStats(
+                        gdf,
+                        cur_rast_src,
+                        minVal=minVal,
+                        maxVal=maxVal,
+                        verbose=verbose,
+                        reProj=True,
+                    )
+                    res = pd.DataFrame(
+                        res,
+                        columns=[
+                            f"{cur_cat}_SUM",
+                            f"{cur_cat}_MIN",
+                            f"{cur_cat}_MAX",
+                            f"{cur_cat}_MEAN",
+                        ],
+                    )
+                    res["id"] = gdf[gdf_id].values
+                    res.set_index("id", inplace=True)
                     final_zonal_res.append(res)
     ret = pd.concat(final_zonal_res, axis=1)
     if verbose:
-        tPrint(f'**** finished')
-    return({out_file:ret})
\ No newline at end of file
+        tPrint(f"**** finished")
+    return {out_file: ret}
diff --git a/src/h3_helper.py b/src/h3_helper.py
index 7682ca2..abc5a6f 100755
--- a/src/h3_helper.py
+++ b/src/h3_helper.py
@@ -1,39 +1,53 @@
-import sys, os, importlib, json, pickle
-import folium, shapely, rasterio, matplotlib
+import importlib
+import json
+import os
+import pickle
+import sys
+from urllib.request import urlopen
 
 import contextily as ctx
-import matplotlib.pyplot as plt
+import folium
+import geopandas as gpd
+import matplotlib
 import matplotlib.patches as mpatches
+import matplotlib.pyplot as plt
 import pandas as pd
-import geopandas as gpd
-
-from rasterio.crs import CRS
-from mpl_toolkits.axes_grid1 import make_axes_locatable
+import rasterio
+import shapely
+from GOSTrocks.misc import tPrint
 from h3 import h3
+from mpl_toolkits.axes_grid1 import make_axes_locatable
+from rasterio.crs import CRS
 from shapely.geometry import Polygon, mapping
 from shapely.ops import unary_union
-from urllib.request import urlopen
 from tqdm import tqdm
 
-from GOSTrocks.misc import tPrint
 
 def generate_h3_gdf(in_gdf, h3_level=7):
-    ''' Generate a GeoDataFrame of h3 grid cells from an input geodataframe
-    
-        :param in_gdf: geodataframe from which to create h3 cells
-        :type in_gdf: geopandas.GeoDataFrame
-    '''
+    """Generate a GeoDataFrame of h3 grid cells from an input geodataframe
+
+    :param in_gdf: geodataframe from which to create h3 cells
+    :type in_gdf: geopandas.GeoDataFrame
+    """
     try:
         del final_hexs
     except:
         pass
 
     try:
-        final_hexs = list(h3.polyfill(in_gdf.unary_union.__geo_interface__, h3_level, geo_json_conformant=True))
+        final_hexs = list(
+            h3.polyfill(
+                in_gdf.unary_union.__geo_interface__, h3_level, geo_json_conformant=True
+            )
+        )
     except:
-        for cPoly in tqdm(in_gdf.unary_union, desc=f"Generating h3 grid level {h3_level}"):
-            all_hexs = list(h3.polyfill(cPoly.__geo_interface__, h3_level, geo_json_conformant=True))
-            try:        
+        for cPoly in tqdm(
+            in_gdf.unary_union, desc=f"Generating h3 grid level {h3_level}"
+        ):
+            all_hexs = list(
+                h3.polyfill(cPoly.__geo_interface__, h3_level, geo_json_conformant=True)
+            )
+            try:
                 final_hexs = final_hexs + all_hexs
             except:
                 final_hexs = all_hexs
@@ -41,14 +55,22 @@ def generate_h3_gdf(in_gdf, h3_level=7):
             final_hexs = list(set(final_hexs))
 
     hex_poly = lambda hex_id: Polygon(h3.h3_to_geo_boundary(hex_id, geo_json=True))
-    all_polys = gpd.GeoSeries(list(map(hex_poly, final_hexs)), index=final_hexs, crs="EPSG:4326")
-    all_polys = gpd.GeoDataFrame(all_polys, crs=4326, columns=['geometry'])
-    all_polys['shape_id'] = list(all_polys.index)
-    return(all_polys)
+    all_polys = gpd.GeoSeries(
+        list(map(hex_poly, final_hexs)), index=final_hexs, crs="EPSG:4326"
+    )
+    all_polys = gpd.GeoDataFrame(all_polys, crs=4326, columns=["geometry"])
+    all_polys["shape_id"] = list(all_polys.index)
+    return all_polys
 
-def generate_lvl0_lists(h3_lvl, return_gdf=False, buffer0=False, 
-                        read_pickle=True, pickle_file = "h0_dictionary_of_h{lvl}_geodata_frames.pickle"):
-    """ generate a dictionary with keys as lvl0 codes with all children at h3_lvl level as values
+
+def generate_lvl0_lists(
+    h3_lvl,
+    return_gdf=False,
+    buffer0=False,
+    read_pickle=True,
+    pickle_file="h0_dictionary_of_h{lvl}_geodata_frames.pickle",
+):
+    """generate a dictionary with keys as lvl0 codes with all children at h3_lvl level as values
 
     Parameters
     ----------
@@ -59,7 +81,7 @@ def generate_lvl0_lists(h3_lvl, return_gdf=False, buffer0=False,
     buffer0 : bool, optional
         buffer the h3 lvl 0 cells by 0 to fix inherent topological errors, by default False
     read_pickle : bool, optional
-        Optionally choose the read resulting data from a [ickle file defined by pickle_file, by default True. If pickle 
+        Optionally choose the read resulting data from a [ickle file defined by pickle_file, by default True. If pickle
         file is not present, function will continue to generate results as if flag was set to False
     pickle_file : str, optional
         Path of pickle file to read if read_pickle is set to True
@@ -72,15 +94,24 @@ def generate_lvl0_lists(h3_lvl, return_gdf=False, buffer0=False,
     if read_pickle:
         try:
             pickle_file = pickle_file.format(lvl=h3_lvl)
-            pickle_path = os.path.join(os.path.realpath(os.path.join(os.getcwd(), os.path.dirname(__file__))), pickle_file)
-            print(f"Loading pickle file {pickle_file}: it exists {os.path.exists(pickle_path)}")
-            with open(pickle_path, 'rb') as handle:
+            pickle_path = os.path.join(
+                os.path.realpath(os.path.join(os.getcwd(), os.path.dirname(__file__))),
+                pickle_file,
+            )
+            print(
+                f"Loading pickle file {pickle_file}: it exists {os.path.exists(pickle_path)}"
+            )
+            with open(pickle_path, "rb") as handle:
                 xx = pickle.load(handle)
-            return(xx)
+            return xx
         except:
-            #print("Could not load pickle file, continuing to process h0 manually")
-            raise(ValueError("Could not load pickle file, continuing to process h0 manually"))
-    
+            # print("Could not load pickle file, continuing to process h0 manually")
+            raise (
+                ValueError(
+                    "Could not load pickle file, continuing to process h0 manually"
+                )
+            )
+
     # Get list of all h3 lvl 0 cells
     h3_lvl0 = list(h3.get_res0_indexes())
 
@@ -89,21 +120,34 @@ def generate_lvl0_lists(h3_lvl, return_gdf=False, buffer0=False,
     for h3_0 in h3_lvl0:
         h3_children = list(h3.h3_to_children(h3_0, h3_lvl))
         if return_gdf:
-            hex_poly = lambda hex_id: Polygon(h3.h3_to_geo_boundary(hex_id, geo_json=True))
-            all_polys = gpd.GeoSeries(list(map(hex_poly, h3_children)), index=h3_children, crs=4326)
-            all_polys = gpd.GeoDataFrame(all_polys, crs=4326, columns=['geometry'])
+            hex_poly = lambda hex_id: Polygon(
+                h3.h3_to_geo_boundary(hex_id, geo_json=True)
+            )
+            all_polys = gpd.GeoSeries(
+                list(map(hex_poly, h3_children)), index=h3_children, crs=4326
+            )
+            all_polys = gpd.GeoDataFrame(all_polys, crs=4326, columns=["geometry"])
             if buffer0:
-                all_polys['geometry'] = all_polys['geometry'].apply(lambda x: x.buffer(0))
-            all_polys['shape_id'] = list(all_polys.index)
-            
+                all_polys["geometry"] = all_polys["geometry"].apply(
+                    lambda x: x.buffer(0)
+                )
+            all_polys["shape_id"] = list(all_polys.index)
+
             h3_lvl0_children[h3_0] = all_polys
         else:
             h3_lvl0_children[h3_0] = h3_children
     return h3_lvl0_children
 
-def generate_lvl1_lists(h3_lvl, return_gdf=False, buffer0=False, 
-                        read_pickle=True, pickle_file = "h1_dictionary_of_h{lvl}_geodata_frames.pickle", write_pickle=False):
-    """ generate a dictionary with keys as lvl1 codes with all children at h3_lvl level as values
+
+def generate_lvl1_lists(
+    h3_lvl,
+    return_gdf=False,
+    buffer0=False,
+    read_pickle=True,
+    pickle_file="h1_dictionary_of_h{lvl}_geodata_frames.pickle",
+    write_pickle=False,
+):
+    """generate a dictionary with keys as lvl1 codes with all children at h3_lvl level as values
 
     Parameters
     ----------
@@ -114,7 +158,7 @@ def generate_lvl1_lists(h3_lvl, return_gdf=False, buffer0=False,
     buffer0 : bool, optional
         buffer the h3 lvl 0 cells by 0 to fix inherent topological errors, by default False
     read_pickle : bool, optional
-        Optionally choose the read resulting data from a [ickle file defined by pickle_file, by default True. If pickle 
+        Optionally choose the read resulting data from a [ickle file defined by pickle_file, by default True. If pickle
         file is not present, function will continue to generate results as if flag was set to False
     pickle_file : str, optional
         Path of pickle file to read if read_pickle is set to True
@@ -125,113 +169,160 @@ def generate_lvl1_lists(h3_lvl, return_gdf=False, buffer0=False,
         dictionary with keys as lvl0 codes with all children at h3_lvl level as values; returns a GeoDataFrame if return_gdf is True
     """
     pickle_file = pickle_file.format(lvl=h3_lvl)
-    pickle_path = os.path.join(os.path.realpath(os.path.join(os.getcwd(), os.path.dirname(__file__))), pickle_file)            
+    pickle_path = os.path.join(
+        os.path.realpath(os.path.join(os.getcwd(), os.path.dirname(__file__))),
+        pickle_file,
+    )
     if read_pickle:
         try:
-            print(f"Loading pickle file {pickle_file}: it exists {os.path.exists(pickle_path)}")
-            with open(pickle_path, 'rb') as handle:
+            print(
+                f"Loading pickle file {pickle_file}: it exists {os.path.exists(pickle_path)}"
+            )
+            with open(pickle_path, "rb") as handle:
                 xx = pickle.load(handle)
-            return(xx)
+            return xx
         except:
             print("Could not load pickle file, continuing to process h1 manually")
-            raise(ValueError("Could not load pickle file, exiting. Set read_pickle to False to generate list"))
-    
+            raise (
+                ValueError(
+                    "Could not load pickle file, exiting. Set read_pickle to False to generate list"
+                )
+            )
+
     # Get list of all h3 lvl 0 cells
     h3_lvl0 = list(h3.get_res0_indexes())
 
     # Generate list of all children of h3 lvl 1 cells
     h3_lvl1_children = {}
-    for h3_0 in h3_lvl0: # Identify all lvl 0 cells
+    for h3_0 in h3_lvl0:  # Identify all lvl 0 cells
         h3_children = list(h3.h3_to_children(h3_0, 1))
-        for h3_1 in h3_children: # For current lvl 0 cell, loop through all level 1 children
+        for (
+            h3_1
+        ) in h3_children:  # For current lvl 0 cell, loop through all level 1 children
             h3_children_1 = list(h3.h3_to_children(h3_1, h3_lvl))
             if return_gdf:
-                hex_poly = lambda hex_id: Polygon(h3.h3_to_geo_boundary(hex_id, geo_json=True))
-                all_polys = gpd.GeoSeries(list(map(hex_poly, h3_children_1)), index=h3_children_1, crs=4326)
-                all_polys = gpd.GeoDataFrame(all_polys, crs=4326, columns=['geometry'])
+                hex_poly = lambda hex_id: Polygon(
+                    h3.h3_to_geo_boundary(hex_id, geo_json=True)
+                )
+                all_polys = gpd.GeoSeries(
+                    list(map(hex_poly, h3_children_1)), index=h3_children_1, crs=4326
+                )
+                all_polys = gpd.GeoDataFrame(all_polys, crs=4326, columns=["geometry"])
                 if buffer0:
-                    all_polys['geometry'] = all_polys['geometry'].apply(lambda x: x.buffer(0))
-                all_polys['shape_id'] = list(all_polys.index)
-                
+                    all_polys["geometry"] = all_polys["geometry"].apply(
+                        lambda x: x.buffer(0)
+                    )
+                all_polys["shape_id"] = list(all_polys.index)
+
                 h3_lvl1_children[h3_1] = all_polys
             else:
                 h3_lvl1_children[h3_1] = h3_children_1
-    
+
     if write_pickle:
         if not os.path.exists(pickle_path):
-            with open(pickle_path, 'wb') as handle:
+            with open(pickle_path, "wb") as handle:
                 pickle.dump(h3_lvl1_children, handle, protocol=pickle.HIGHEST_PROTOCOL)
-        
+
     return h3_lvl1_children
 
-def map_choropleth(sub, map_column, thresh=[], colour_ramp = 'Reds', invert=False, map_epsg=3857, legend_loc='upper right'):
-        ''' generate a static map of variables in GeoDataFrame sub
-            
-            :param sub: GeoDataFrame with geometry and column to map
-            :type sub: GeoPandas.GeoDataFrame
-            :param map_column: Name of column in sub to map
-            :type map_column: string
-            :param thresh: list of values to classify data in map_column
-            :type thresh: list of ints
-        '''
-        try:
-            sub = sub.to_crs(map_epsg)
-        except:
-            sub.crs = 4326
-            sub = sub.to_crs(map_epsg)
-
-        thresh=[]
-        map_sub = sub.copy()
-        cmap = matplotlib.cm.get_cmap(colour_ramp)
-        fig, ax = plt.subplots(figsize=(15,15))
-        proj = CRS.from_epsg(map_epsg)
-
-        # create map column in sub, based on re-mapping of column map_column
-        if len(thresh) == 0:
-            split = [0,0.2,0.4,0.6,0.8,1]
-            thresh = [x for x in map_sub[map_column].quantile(split).values]
-            thresh.insert(0,0)
-
-        map_sub['map'] = pd.cut(map_sub[map_column], thresh, labels=list(range(0, len(thresh)-1)))
-
-        # [x/max(thresh) for x in thresh]
-        cmap_divisions = [x/100 for x in list(range(0,101,20))]
-        # map features not included in grouping
-        sel_mixed = map_sub.loc[map_sub['map'].isna()]
-        mismatch_color = 'azure'
-        mismatch_edge = 'darkblue'
-        cur_patch = mpatches.Patch(facecolor=mismatch_color, edgecolor=mismatch_edge, hatch="///", label=f"Mismatch [{sel_mixed.shape[0]}]")
-        all_labels = [cur_patch]
-        for lbl, data in map_sub.groupby('map'):
-            cur_color = cmap(cmap_divisions[int(lbl)])
-            if invert:
-                cur_color = cmap(1 - cmap_divisions[int(lbl)])
-            data.plot(color=cur_color, ax=ax, linewidth=0.1)
-            cur_patch = mpatches.Patch(color=cur_color, label=f'{data[map_column].min()} - {data[map_column].max()} [{data.shape[0]}]')
-            all_labels.append(cur_patch)
-
-        sel_mixed.plot(color=mismatch_color, edgecolor=mismatch_edge, hatch="//////", ax=ax, label=False, linewidth=2)
-            
-        ctx.add_basemap(ax, source=ctx.providers.Stamen.TonerBackground, crs=proj) #zorder=-10, 'EPSG:4326'
-        ax.legend(handles=all_labels, loc=legend_loc)        
-        ax = ax.set_axis_off()
-
-        return(ax)
-    
-def static_map_h3(sub, map_epsg=3857, legend_loc='upper right'):
-        ''' generate a static map of the h3 grid in sub
-        '''
-        try:
-            sub = sub.to_crs(map_epsg)
-        except:
-            sub.crs = 4326
-            sub = sub.to_crs(map_epsg)
-        
-        fig, ax = plt.subplots(figsize=(15,15))
-        proj = CRS.from_epsg(map_epsg)
-        
-        sub.plot(color='grey', ax=ax, linewidth=0.1)
-            
-        ctx.add_basemap(ax, source=ctx.providers.Stamen.TonerBackground, crs=proj) #zorder=-10, 'EPSG:4326'
-        ax = ax.set_axis_off()
-        return(ax)
\ No newline at end of file
+
+def map_choropleth(
+    sub,
+    map_column,
+    thresh=[],
+    colour_ramp="Reds",
+    invert=False,
+    map_epsg=3857,
+    legend_loc="upper right",
+):
+    """generate a static map of variables in GeoDataFrame sub
+
+    :param sub: GeoDataFrame with geometry and column to map
+    :type sub: GeoPandas.GeoDataFrame
+    :param map_column: Name of column in sub to map
+    :type map_column: string
+    :param thresh: list of values to classify data in map_column
+    :type thresh: list of ints
+    """
+    try:
+        sub = sub.to_crs(map_epsg)
+    except:
+        sub.crs = 4326
+        sub = sub.to_crs(map_epsg)
+
+    thresh = []
+    map_sub = sub.copy()
+    cmap = matplotlib.cm.get_cmap(colour_ramp)
+    fig, ax = plt.subplots(figsize=(15, 15))
+    proj = CRS.from_epsg(map_epsg)
+
+    # create map column in sub, based on re-mapping of column map_column
+    if len(thresh) == 0:
+        split = [0, 0.2, 0.4, 0.6, 0.8, 1]
+        thresh = [x for x in map_sub[map_column].quantile(split).values]
+        thresh.insert(0, 0)
+
+    map_sub["map"] = pd.cut(
+        map_sub[map_column], thresh, labels=list(range(0, len(thresh) - 1))
+    )
+
+    # [x/max(thresh) for x in thresh]
+    cmap_divisions = [x / 100 for x in list(range(0, 101, 20))]
+    # map features not included in grouping
+    sel_mixed = map_sub.loc[map_sub["map"].isna()]
+    mismatch_color = "azure"
+    mismatch_edge = "darkblue"
+    cur_patch = mpatches.Patch(
+        facecolor=mismatch_color,
+        edgecolor=mismatch_edge,
+        hatch="///",
+        label=f"Mismatch [{sel_mixed.shape[0]}]",
+    )
+    all_labels = [cur_patch]
+    for lbl, data in map_sub.groupby("map"):
+        cur_color = cmap(cmap_divisions[int(lbl)])
+        if invert:
+            cur_color = cmap(1 - cmap_divisions[int(lbl)])
+        data.plot(color=cur_color, ax=ax, linewidth=0.1)
+        cur_patch = mpatches.Patch(
+            color=cur_color,
+            label=f"{data[map_column].min()} - {data[map_column].max()} [{data.shape[0]}]",
+        )
+        all_labels.append(cur_patch)
+
+    sel_mixed.plot(
+        color=mismatch_color,
+        edgecolor=mismatch_edge,
+        hatch="//////",
+        ax=ax,
+        label=False,
+        linewidth=2,
+    )
+
+    ctx.add_basemap(
+        ax, source=ctx.providers.Stamen.TonerBackground, crs=proj
+    )  # zorder=-10, 'EPSG:4326'
+    ax.legend(handles=all_labels, loc=legend_loc)
+    ax = ax.set_axis_off()
+
+    return ax
+
+
+def static_map_h3(sub, map_epsg=3857, legend_loc="upper right"):
+    """generate a static map of the h3 grid in sub"""
+    try:
+        sub = sub.to_crs(map_epsg)
+    except:
+        sub.crs = 4326
+        sub = sub.to_crs(map_epsg)
+
+    fig, ax = plt.subplots(figsize=(15, 15))
+    proj = CRS.from_epsg(map_epsg)
+
+    sub.plot(color="grey", ax=ax, linewidth=0.1)
+
+    ctx.add_basemap(
+        ax, source=ctx.providers.Stamen.TonerBackground, crs=proj
+    )  # zorder=-10, 'EPSG:4326'
+    ax = ax.set_axis_off()
+    return ax
diff --git a/src/space2stats_data_config.py b/src/space2stats_data_config.py
index 1797cfb..daee6a6 100755
--- a/src/space2stats_data_config.py
+++ b/src/space2stats_data_config.py
@@ -1,21 +1,24 @@
-import sys, os, json
+import json
+import os
+import sys
+
 import geojson
 
 
 class s2s_geo_data:
     def __init__(self, json_path):
-        """ Extract metatdata and processing information for input geospatial layers
+        """Extract metatdata and processing information for input geospatial layers
 
         Args:
             json_path (string): path to json file to process
         """
-        with open(json_path, 'r') as in_data:
+        with open(json_path, "r") as in_data:
             in_json = json.load(in_data)
 
         self.data_info = in_json
 
-    def get_path(self, yyyy='', mm='', dd=''):
-        """ Get path to geospatial data for processing
+    def get_path(self, yyyy="", mm="", dd=""):
+        """Get path to geospatial data for processing
 
         Args:
             yyyy (str, optional): specific year to process. Defaults to ''.
@@ -24,5 +27,6 @@ def get_path(self, yyyy='', mm='', dd=''):
         """
         inD = self.data_info.copy()
 
-        s3_path = os.path.join(inD['s3_bucket_base'], )
-
+        s3_path = os.path.join(
+            inD["s3_bucket_base"],
+        )