diff --git a/.gitignore b/.gitignore index 141203c..287ecaa 100644 --- a/.gitignore +++ b/.gitignore @@ -53,7 +53,8 @@ utils/fragility_utils/data/ *test* # env files -*.env +*.env* +*NSI/.env* # build files *pycache* \ No newline at end of file diff --git a/NSI/.env b/NSI/.env new file mode 100644 index 0000000..e802907 --- /dev/null +++ b/NSI/.env @@ -0,0 +1,12 @@ +# database related +DB_URL= +DB_PORT=5432 +DB_NAME=nsi_raw +DB_USERNAME= +DB_PASSWORD= + +# NSI related +NSI_PREFIX=nsi_2022_ +NSI_URL_STATE=https://nsi.sec.usace.army.mil/downloads/nsi_2022/ +NSI_URL_FIPS=https://nsi.sec.usace.army.mil/nsiapi/structures?fips= +NSI_URL_FIPS_INTERNAL=https://nsi.sec.usace.army.mil/internal/nsiapi/structures?fips= \ No newline at end of file diff --git a/NSI/README.md b/NSI/README.md new file mode 100644 index 0000000..518c9db --- /dev/null +++ b/NSI/README.md @@ -0,0 +1,8 @@ +## Data Preparation + +### Download NSI +- curl + +### Data Converstion to BUilindg Inventory + +### Publish to PostgreSQL database \ No newline at end of file diff --git a/NSI/config.py b/NSI/config.py new file mode 100644 index 0000000..760d29b --- /dev/null +++ b/NSI/config.py @@ -0,0 +1,30 @@ +# Copyright (c) 2022 University of Illinois and others. All rights reserved. +# +# This program and the accompanying materials are made available under the +# terms of the Mozilla Public License v2.0 which accompanies this distribution, +# and is available at https://www.mozilla.org/en-US/MPL/2.0/ + +# configs file +import os +from dotenv import load_dotenv + +# Load .env file +load_dotenv() + + +class Config: + """ + class to list all configuration settings required for preprocessing and formatting for EddyPro and PyFluxPro + """ + # database parameters + DB_URL = os.getenv('DB_URL', 'localhost') + DB_PORT = os.getenv('DB_PORT', '5432') + DB_NAME = os.getenv('DB_NAME') + DB_USERNAME = os.getenv('DB_USERNAME') + DB_PASSWORD = os.getenv('DB_PASSWORD') + + #NSI parameters + NSI_URL_STATE = os.getenv('NSI_URL_STATE') + NSI_PREFIX = os.getenv('NSI_PREFIX') + NSI_URL_FIPS = os.getenv('NSI_URL_FIPS') + NSI_URL_FIPS_INTERNAL = os.getenv('NSI_URL_FIPS_INTERNAL') diff --git a/NSI/nsitobuildinginventory.py b/NSI/nsitobuildinginventory.py new file mode 100644 index 0000000..5e27a11 --- /dev/null +++ b/NSI/nsitobuildinginventory.py @@ -0,0 +1,36 @@ +import pandas as pd + +from nsiutils import NsiUtils + + +def main(): + infile = "" + in_fips_file = "data\\us_county_fips_2018.csv" + + # create fips list from fips file + fips_list = create_county_fips_from_file(in_fips_file) + + for i in range(2): + fips = fips_list[i] + outfile = "data\\test" + str(fips) + ".gpkg" + + # get feature collection from NIS api + gdf = NsiUtils.get_features_by_fips(fips) + + # upload geodataframe to database + NsiUtils.upload_postgres_gdf(gdf) + + # # save gdf to geopackage + # NsiUtils.df_to_geopkg(gdf, outfile) + + +def create_county_fips_from_file(infile): + df = pd.read_csv(infile, dtype = {'GEOID': str}) + + fips_list = df['GEOID'].tolist() + + return fips_list + + +if __name__ == '__main__': + main() diff --git a/NSI/nsiutils.py b/NSI/nsiutils.py new file mode 100644 index 0000000..b4e013e --- /dev/null +++ b/NSI/nsiutils.py @@ -0,0 +1,127 @@ +import fiona +import uuid +import os +import geopandas as gpd +import sqlalchemy +import requests + +from sqlalchemy import create_engine +from geojson import FeatureCollection +from config import Config as cfg + + +class NsiUtils(): + @staticmethod + # this will download feature collection json by using county fips + # fips: 15005 + def get_features_by_fips(state_county_fips): + print("request data for " + str(state_county_fips) + " from NSI endpoint") + json_url = cfg.NSI_URL_FIPS + str(state_county_fips) + result = requests.get(json_url) + result.raise_for_status() + result_json = result.json() + + collection = FeatureCollection(result_json['features']) + + gdf = gpd.GeoDataFrame.from_features(collection['features']) + gdf = gdf.set_crs(epsg=4326) + + gdf = NsiUtils.add_columns_to_gdf(gdf, state_county_fips) + + return gdf + + @staticmethod + # this will download zip file of geopackage by state using state fipes + # Missouri: 29 + def download_nsi_data_state_file(state_fips): + file_name = cfg.NSI_PREFIX + str(state_fips) + ".gpkg.zip" + file_url = "%s/%s" % (cfg.NSI_URL_STATE, file_name) + print("Downloading NSI data for the state: " + str(state_fips)) + r = requests.get(file_url, stream = True) + + download_filename = os.path.join("data", file_name) + + with open(download_filename,"wb") as zipfile: + for chunk in r.iter_content(chunk_size=1024): + # writing one chunk at a time to pdf file + if chunk: + zipfile.write(chunk) + + @staticmethod + # convert geopackage file to geodataframe + def read_geopkg_to_gdf(infile): + print("read GeoPackage") + gpkgpd = None + for layername in fiona.listlayers(infile): + gpkgpd = gpd.read_file(infile, layer=layername, crs='EPSG:4326') + + return gpkgpd + + @staticmethod + # add guid to geodataframe + def add_guid_to_gdf(gdf): + print("create guid column") + for i, row in gdf.iterrows(): + guid_val = str(uuid.uuid4()) + gdf.at[i, 'guid'] = guid_val + + return gdf + + @staticmethod + # add fips to geodataframe + def add_columns_to_gdf(gdf, fips): + print("create fips column") + statefips = fips[:2] + countyfips = fips[2:] + for i, row in gdf.iterrows(): + guid_val = str(uuid.uuid4()) + gdf.at[i, 'guid'] = guid_val + gdf.at[i, 'fips'] = fips + gdf.at[i, 'statefips'] = statefips + gdf.at[i, 'countyfips'] = countyfips + + return gdf + + @staticmethod + # save new geopackage + def df_to_geopkg(gdf, outfile): + print("create output geopackage") + gdf.to_file(outfile, driver="GPKG") + + @staticmethod + # upload file to postgres + def upload_postgres_from_gpk(infile): + # read in the data + gpkgpd = None + for layername in fiona.listlayers(infile): + gpkgpd = gpd.read_file(infile, layer=layername, crs='EPSG:4326') + + NsiUtils.upload_postgres_gdf(gpkgpd) + + @staticmethod + # upload geodataframe to postgres + def upload_postgres_gdf(gdf): + try: + # create the sqlalchemy connection engine + db_connection_url = "postgresql://%s:%s@%s:%s/%s" % \ + (cfg.DB_USERNAME, cfg.DB_PASSWORD, cfg.DB_URL, cfg.DB_PORT, cfg.DB_NAME) + con = create_engine(db_connection_url) + + # Drop nulls in the geometry column + print('Dropping ' + str(gdf.geometry.isna().sum()) + ' nulls.') + gdf = gdf.dropna(subset=['geometry']) + + # Push the geodataframe to postgresql + print('uploading GeoPackage to database') + gdf.to_postgis("nsi_raw", con, index=False, if_exists='replace') + + con.dispose() + + print('uploading to database has finished.') + + return True + + except sqlalchemy.exc.OperationalError: + print("Error in connecting database server") + + return False diff --git a/NSI/query.py b/NSI/query.py new file mode 100644 index 0000000..1ac3589 --- /dev/null +++ b/NSI/query.py @@ -0,0 +1,45 @@ +import geopandas as gp + +from sqlalchemy import create_engine +from config import Config as cfg +from nsiutils import NsiUtils + +# create the sqlalchemy connection engine +db_connection_url = "postgresql://%s:%s@%s:%s/%s" % \ + (cfg.DB_USERNAME, cfg.DB_PASSWORD, cfg.DB_URL, cfg.DB_PORT, cfg.DB_NAME) +table_name = "public.nsi_raw" + +def connection_test_raw(): + con = create_engine(db_connection_url) + + # check the connection + cursor = con.execute("SELECT * FROM public.nsi_raw;") + cursor.next() + + con.dispose() + + +def query_test_fips(): + test_fips = '21017' + query_str = "SELECT * FROM %s WHERE fips='%s';" % (table_name, test_fips) + gdf = gp.GeoDataFrame.from_postgis( + query_str, db_connection_url, geom_col='geometry', index_col='fd_id', coerce_float=True) + + outfile = "data\\test_" + test_fips + ".gpkg" + + NsiUtils.df_to_geopkg(gdf, outfile) + + +def query_test_bbox(): + bbox='-84.297,38.246,-84.297,38.261,-84.123,38.261,-84.123,38.246,-84.297,38.246' + query_str = "SELECT * FROM %s WHERE bbox=%s;" % (table_name, bbox) + gdf = gp.GeoDataFrame.from_postgis( + query_str, db_connection_url, geom_col='geometry', index_col='fd_id', coerce_float=True) + + outfile = "data\\test_bbox.gpkg" + + NsiUtils.df_to_geopkg(gdf, outfile) + + +if __name__ == '__main__': + query_test_bbox() diff --git a/NSI/requirements.txt b/NSI/requirements.txt new file mode 100644 index 0000000..6925fdd --- /dev/null +++ b/NSI/requirements.txt @@ -0,0 +1,9 @@ +gdal>=3.4.2 +rasterio>=1.2.10 +fiona>=1.8.21 +geopandas>=0.10.2 +SQLAlchemy>=1.4.42 +GeoAlchemy2==0.12.5 +requests>=2.20.0 +Shapely>=1.8.2 +geojson>=2.5.0 \ No newline at end of file