Skip to content

5 create nsi related scripts #6

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 11 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,8 @@ utils/fragility_utils/data/
*test*

# env files
*.env
*.env*
*NSI/.env*

# build files
*pycache*
12 changes: 12 additions & 0 deletions NSI/.env
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
# database related
DB_URL=
DB_PORT=5432
DB_NAME=nsi_raw
DB_USERNAME=
DB_PASSWORD=

# NSI related
NSI_PREFIX=nsi_2022_
NSI_URL_STATE=https://nsi.sec.usace.army.mil/downloads/nsi_2022/
NSI_URL_FIPS=https://nsi.sec.usace.army.mil/nsiapi/structures?fips=
NSI_URL_FIPS_INTERNAL=https://nsi.sec.usace.army.mil/internal/nsiapi/structures?fips=
8 changes: 8 additions & 0 deletions NSI/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
## Data Preparation

### Download NSI
- curl

### Data Converstion to BUilindg Inventory

### Publish to PostgreSQL database
30 changes: 30 additions & 0 deletions NSI/config.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
# Copyright (c) 2022 University of Illinois and others. All rights reserved.
#
# This program and the accompanying materials are made available under the
# terms of the Mozilla Public License v2.0 which accompanies this distribution,
# and is available at https://www.mozilla.org/en-US/MPL/2.0/

# configs file
import os
from dotenv import load_dotenv

# Load .env file
load_dotenv()


class Config:
"""
class to list all configuration settings required for preprocessing and formatting for EddyPro and PyFluxPro
"""
# database parameters
DB_URL = os.getenv('DB_URL', 'localhost')
DB_PORT = os.getenv('DB_PORT', '5432')
DB_NAME = os.getenv('DB_NAME')
DB_USERNAME = os.getenv('DB_USERNAME')
DB_PASSWORD = os.getenv('DB_PASSWORD')

#NSI parameters
NSI_URL_STATE = os.getenv('NSI_URL_STATE')
NSI_PREFIX = os.getenv('NSI_PREFIX')
NSI_URL_FIPS = os.getenv('NSI_URL_FIPS')
NSI_URL_FIPS_INTERNAL = os.getenv('NSI_URL_FIPS_INTERNAL')
36 changes: 36 additions & 0 deletions NSI/nsitobuildinginventory.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
import pandas as pd

from nsiutils import NsiUtils


def main():
infile = ""
in_fips_file = "data\\us_county_fips_2018.csv"

# create fips list from fips file
fips_list = create_county_fips_from_file(in_fips_file)

for i in range(2):
fips = fips_list[i]
outfile = "data\\test" + str(fips) + ".gpkg"

# get feature collection from NIS api
gdf = NsiUtils.get_features_by_fips(fips)

# upload geodataframe to database
NsiUtils.upload_postgres_gdf(gdf)

# # save gdf to geopackage
# NsiUtils.df_to_geopkg(gdf, outfile)


def create_county_fips_from_file(infile):
df = pd.read_csv(infile, dtype = {'GEOID': str})

fips_list = df['GEOID'].tolist()

return fips_list


if __name__ == '__main__':
main()
127 changes: 127 additions & 0 deletions NSI/nsiutils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,127 @@
import fiona
import uuid
import os
import geopandas as gpd
import sqlalchemy
import requests

from sqlalchemy import create_engine
from geojson import FeatureCollection
from config import Config as cfg


class NsiUtils():
@staticmethod
# this will download feature collection json by using county fips
# fips: 15005
def get_features_by_fips(state_county_fips):
print("request data for " + str(state_county_fips) + " from NSI endpoint")
json_url = cfg.NSI_URL_FIPS + str(state_county_fips)
result = requests.get(json_url)
result.raise_for_status()
result_json = result.json()

collection = FeatureCollection(result_json['features'])

gdf = gpd.GeoDataFrame.from_features(collection['features'])
gdf = gdf.set_crs(epsg=4326)

gdf = NsiUtils.add_columns_to_gdf(gdf, state_county_fips)

return gdf

@staticmethod
# this will download zip file of geopackage by state using state fipes
# Missouri: 29
def download_nsi_data_state_file(state_fips):
file_name = cfg.NSI_PREFIX + str(state_fips) + ".gpkg.zip"
file_url = "%s/%s" % (cfg.NSI_URL_STATE, file_name)
print("Downloading NSI data for the state: " + str(state_fips))
r = requests.get(file_url, stream = True)

download_filename = os.path.join("data", file_name)

with open(download_filename,"wb") as zipfile:
for chunk in r.iter_content(chunk_size=1024):
# writing one chunk at a time to pdf file
if chunk:
zipfile.write(chunk)

@staticmethod
# convert geopackage file to geodataframe
def read_geopkg_to_gdf(infile):
print("read GeoPackage")
gpkgpd = None
for layername in fiona.listlayers(infile):
gpkgpd = gpd.read_file(infile, layer=layername, crs='EPSG:4326')

return gpkgpd

@staticmethod
# add guid to geodataframe
def add_guid_to_gdf(gdf):
print("create guid column")
for i, row in gdf.iterrows():
guid_val = str(uuid.uuid4())
gdf.at[i, 'guid'] = guid_val

return gdf

@staticmethod
# add fips to geodataframe
def add_columns_to_gdf(gdf, fips):
print("create fips column")
statefips = fips[:2]
countyfips = fips[2:]
for i, row in gdf.iterrows():
guid_val = str(uuid.uuid4())
gdf.at[i, 'guid'] = guid_val
gdf.at[i, 'fips'] = fips
gdf.at[i, 'statefips'] = statefips
gdf.at[i, 'countyfips'] = countyfips

return gdf

@staticmethod
# save new geopackage
def df_to_geopkg(gdf, outfile):
print("create output geopackage")
gdf.to_file(outfile, driver="GPKG")

@staticmethod
# upload file to postgres
def upload_postgres_from_gpk(infile):
# read in the data
gpkgpd = None
for layername in fiona.listlayers(infile):
gpkgpd = gpd.read_file(infile, layer=layername, crs='EPSG:4326')

NsiUtils.upload_postgres_gdf(gpkgpd)

@staticmethod
# upload geodataframe to postgres
def upload_postgres_gdf(gdf):
try:
# create the sqlalchemy connection engine
db_connection_url = "postgresql://%s:%s@%s:%s/%s" % \
(cfg.DB_USERNAME, cfg.DB_PASSWORD, cfg.DB_URL, cfg.DB_PORT, cfg.DB_NAME)
con = create_engine(db_connection_url)

# Drop nulls in the geometry column
print('Dropping ' + str(gdf.geometry.isna().sum()) + ' nulls.')
gdf = gdf.dropna(subset=['geometry'])

# Push the geodataframe to postgresql
print('uploading GeoPackage to database')
gdf.to_postgis("nsi_raw", con, index=False, if_exists='replace')

con.dispose()

print('uploading to database has finished.')

return True

except sqlalchemy.exc.OperationalError:
print("Error in connecting database server")

return False
45 changes: 45 additions & 0 deletions NSI/query.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
import geopandas as gp

from sqlalchemy import create_engine
from config import Config as cfg
from nsiutils import NsiUtils

# create the sqlalchemy connection engine
db_connection_url = "postgresql://%s:%s@%s:%s/%s" % \
(cfg.DB_USERNAME, cfg.DB_PASSWORD, cfg.DB_URL, cfg.DB_PORT, cfg.DB_NAME)
table_name = "public.nsi_raw"

def connection_test_raw():
con = create_engine(db_connection_url)

# check the connection
cursor = con.execute("SELECT * FROM public.nsi_raw;")
cursor.next()

con.dispose()


def query_test_fips():
test_fips = '21017'
query_str = "SELECT * FROM %s WHERE fips='%s';" % (table_name, test_fips)
gdf = gp.GeoDataFrame.from_postgis(
query_str, db_connection_url, geom_col='geometry', index_col='fd_id', coerce_float=True)

outfile = "data\\test_" + test_fips + ".gpkg"

NsiUtils.df_to_geopkg(gdf, outfile)


def query_test_bbox():
bbox='-84.297,38.246,-84.297,38.261,-84.123,38.261,-84.123,38.246,-84.297,38.246'
query_str = "SELECT * FROM %s WHERE bbox=%s;" % (table_name, bbox)
gdf = gp.GeoDataFrame.from_postgis(
query_str, db_connection_url, geom_col='geometry', index_col='fd_id', coerce_float=True)

outfile = "data\\test_bbox.gpkg"

NsiUtils.df_to_geopkg(gdf, outfile)


if __name__ == '__main__':
query_test_bbox()
9 changes: 9 additions & 0 deletions NSI/requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
gdal>=3.4.2
rasterio>=1.2.10
fiona>=1.8.21
geopandas>=0.10.2
SQLAlchemy>=1.4.42
GeoAlchemy2==0.12.5
requests>=2.20.0
Shapely>=1.8.2
geojson>=2.5.0