Skip to content

Commit

Permalink
Merge branch 'staging' into codewritingcow/624-add-aria-label-to-save…
Browse files Browse the repository at this point in the history
…d-button
  • Loading branch information
CodeWritingCow committed Jun 5, 2024
2 parents 71c248b + d09fda3 commit 10f5a55
Show file tree
Hide file tree
Showing 31 changed files with 704 additions and 181 deletions.
3 changes: 3 additions & 0 deletions data/src/Pipfile
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,9 @@ data-diff = {extras = ["postgresql"], version = "*"}
future = "*"
slack-sdk = "*"
pytest = "*"
networkx = "*"
libpysal = "*"
jenkspy = "*"

[dev-packages]

Expand Down
19 changes: 9 additions & 10 deletions data/src/classes/featurelayer.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,35 +5,34 @@
import geopandas as gpd
import pandas as pd
import requests
from config.config import FORCE_RELOAD, USE_CRS
from config.psql import conn
from esridump.dumper import EsriDumper
from google.cloud import storage
from google.cloud.storage.bucket import Bucket
from shapely import Point, wkb

from config.config import FORCE_RELOAD, USE_CRS


# Configure Google
def google_cloud_bucket() -> Bucket:
"""Build the google cloud bucket with name configured in your environ or default of cleanandgreenphl
Returns:
Bucket: the gcp bucket
"""
credentials_path = os.path.expanduser("/app/service-account-key.json")
credentials_path = os.path.expanduser("/app/account-service-key.json")

if not os.path.exists(credentials_path):
raise FileNotFoundError(f"Credentials file not found at {credentials_path}")

os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = credentials_path
bucket_name = (
os.environ["GOOGLE_CLOUD_BUCKET_NAME"]
if os.environ["GOOGLE_CLOUD_BUCKET_NAME"] is not None
else "cleanandgreenphl"
)
bucket_name = os.getenv("GOOGLE_CLOUD_BUCKET_NAME", "cleanandgreenphl")

storage_client = storage.Client(project="clean-and-green-philly")
return storage_client.bucket(bucket_name)


bucket = google_cloud_bucket()


class FeatureLayer:
"""
FeatureLayer is a class to represent a GIS dataset. It can be initialized with a URL to an Esri Feature Service, a SQL query to Carto, or a GeoDataFrame.
Expand Down
25 changes: 25 additions & 0 deletions data/src/constants/services.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,10 @@
"https://services2.arcgis.com/qjOOiLCYeUtwT7x7/arcgis/rest/services/PHS_NGT_Supported_Current_view/FeatureServer/0/"
]

PPR_PROPERTIES_TO_LOAD = [
"https://services.arcgis.com/fLeGjb7u4uXqeF9q/ArcGIS/rest/services/PPR_Properties/FeatureServer/0"
]

one_year_ago = (datetime.datetime.now() - datetime.timedelta(days=365)).strftime(
"%Y-%m-%d"
)
Expand All @@ -43,4 +47,25 @@

IMMINENT_DANGER_BUILDINGS_QUERY = "SELECT * FROM imm_dang"

PERMITS_QUERY = f"""
SELECT
address,
addressobjectid,
approvedscopeofwork,
commercialorresidential,
opa_account_num,
permittype,
status,
unit_num,
unit_type,
permitissuedate,
typeofwork,
the_geom,
ST_AsGeoJSON(the_geom)::json AS the_geom_geojson
FROM permits
WHERE permitissuedate >= '{one_year_ago}'
"""

NBHOODS_URL = "https://raw.githubusercontent.com/opendataphilly/open-geo-data/master/philadelphia-neighborhoods/philadelphia-neighborhoods.geojson"

CENSUS_BGS_URL = "https://opendata.arcgis.com/datasets/2f982bada233478ea0100528227febce_0.geojson"
15 changes: 9 additions & 6 deletions data/src/data_utils/community_gardens.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,18 +8,21 @@ def community_gardens(primary_featurelayer):
name="Community Gardens", esri_rest_urls=COMMUNITY_GARDENS_TO_LOAD
)

community_gardens.gdf = community_gardens.gdf[["site_Name", "geometry"]]

community_gardens.gdf = community_gardens.gdf[["Site_Name", "geometry"]]
primary_featurelayer.spatial_join(community_gardens)

# Create a boolean mask where 'Site Name' is not null
mask = primary_featurelayer.gdf["site_Name"].notnull()
# Create a boolean mask where 'site_Name' is not null
mask = primary_featurelayer.gdf["Site_Name"].notnull()

count_dropped = mask.sum()
print(f"Number of community gardens being dropped: {count_dropped}")

# Use this mask to drop rows where 'Site Name' is not null
# Use this mask to drop rows where 'site_Name' is not null
primary_featurelayer.gdf = primary_featurelayer.gdf.drop(
primary_featurelayer.gdf[mask].index
)

primary_featurelayer.gdf = primary_featurelayer.gdf.drop(columns=["site_Name"])
primary_featurelayer.gdf = primary_featurelayer.gdf.drop(columns=["Site_Name"])

return primary_featurelayer
25 changes: 25 additions & 0 deletions data/src/data_utils/contig_neighbors.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
import warnings

import geopandas as gpd
import networkx as nx
from libpysal.weights import Queen


def contig_neighbors(primary_featurelayer):

parcels = primary_featurelayer.gdf

with warnings.catch_warnings():
warnings.filterwarnings("ignore", category=FutureWarning)
warnings.filterwarnings("ignore", category=UserWarning, message="The weights matrix is not fully connected")

w = Queen.from_dataframe(parcels)

g = w.to_networkx()

# Calculate the number of contiguous neighbors for each feature in parcels
n_contiguous = [len(nx.node_connected_component(g, i)) for i in range(len(parcels))]

primary_featurelayer.gdf['n_contiguous'] = n_contiguous

return primary_featurelayer
56 changes: 56 additions & 0 deletions data/src/data_utils/dev_probability.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
from datetime import datetime, timedelta

import geopandas as gpd
import jenkspy
import pandas as pd
import requests
from classes.featurelayer import FeatureLayer
from constants.services import CENSUS_BGS_URL, PERMITS_QUERY

from config.config import USE_CRS


def dev_probability(primary_featurelayer):

census_bgs_gdf = gpd.read_file(CENSUS_BGS_URL)
census_bgs_gdf = census_bgs_gdf.to_crs(USE_CRS)

base_url = "https://phl.carto.com/api/v2/sql"
response = requests.get(f"{base_url}?q={PERMITS_QUERY}&format=GeoJSON")

if response.status_code == 200:
try:
permits_gdf = gpd.GeoDataFrame.from_features(response.json(), crs='EPSG:4326')
print("GeoDataFrame created successfully.")
except Exception as e:
print(f"Failed to convert response to GeoDataFrame: {e}")
return primary_featurelayer
else:
truncated_response = response.content[:500]
print(f"Failed to fetch permits data. HTTP status code: {response.status_code}. Response text: {truncated_response}")
return primary_featurelayer

permits_gdf = permits_gdf.to_crs(USE_CRS)

joined_gdf = gpd.sjoin(permits_gdf, census_bgs_gdf, how="inner", predicate='within')

permit_counts = joined_gdf.groupby('index_right').size()
census_bgs_gdf['permit_count'] = census_bgs_gdf.index.map(permit_counts)
census_bgs_gdf['permit_count'] = census_bgs_gdf['permit_count'].fillna(0)

# Classify development probability using Jenks natural breaks
breaks = jenkspy.jenks_breaks(census_bgs_gdf['permit_count'], n_classes=3)
census_bgs_gdf['dev_rank'] = pd.cut(census_bgs_gdf['permit_count'], bins=breaks, labels=['Low', 'Medium', 'High'])

updated_census_bgs = FeatureLayer(
name="Updated Census Block Groups",
gdf=census_bgs_gdf[['permit_count', 'dev_rank', 'geometry']],
use_wkb_geom_field="geometry",
cols=["permit_count", "dev_rank"]
)

updated_census_bgs.gdf = updated_census_bgs.gdf.to_crs(USE_CRS)

primary_featurelayer.spatial_join(updated_census_bgs)

return primary_featurelayer
25 changes: 14 additions & 11 deletions data/src/data_utils/drug_crimes.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,13 @@
from constants.services import DRUGCRIME_SQL_QUERY
from config.config import USE_CRS
from classes.featurelayer import FeatureLayer
import numpy as np
import mapclassify
import matplotlib.pyplot as plt
from awkde.awkde import GaussianKDE
import numpy as np
import rasterio
from awkde.awkde import GaussianKDE
from classes.featurelayer import FeatureLayer
from constants.services import DRUGCRIME_SQL_QUERY
from rasterio.transform import Affine
import mapclassify

from config.config import USE_CRS


def drug_crimes(primary_featurelayer):
Expand All @@ -28,15 +29,17 @@ def drug_crimes(primary_featurelayer):
X = np.array(list(zip(x, y)))

# Generate grid for plotting
x_grid, y_grid = np.linspace(x.min(), x.max(), 1000), np.linspace(
y.min(), y.max(), 1000
grid_length = 2500

x_grid, y_grid = np.linspace(x.min(), x.max(), grid_length), np.linspace(
y.min(), y.max(), grid_length
)
xx, yy = np.meshgrid(x_grid, y_grid)
grid_points = np.array([xx.ravel(), yy.ravel()]).T

# Compute adaptive KDE values
print("fitting KDE for drug crime data")
kde = GaussianKDE(glob_bw="silverman", alpha=0.999, diag_cov=True)
kde = GaussianKDE(glob_bw=0.1, alpha=0.999, diag_cov=True)
kde.fit(X)

z = kde.predict(grid_points)
Expand All @@ -58,7 +61,7 @@ def drug_crimes(primary_featurelayer):

# Export as raster
with rasterio.open(
"tmp/output.tif",
"tmp/drug_crimes.tif",
"w",
driver="GTiff",
height=zz.shape[0],
Expand All @@ -82,7 +85,7 @@ def drug_crimes(primary_featurelayer):

primary_featurelayer.gdf = primary_featurelayer.gdf.drop(columns=["centroid"])

src = rasterio.open("tmp/output.tif")
src = rasterio.open("tmp/drug_crimes.tif")
sampled_values = [x[0] for x in src.sample(coord_list)]

primary_featurelayer.gdf["drugcrime_density"] = sampled_values
Expand Down
25 changes: 14 additions & 11 deletions data/src/data_utils/gun_crimes.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,13 @@
from constants.services import GUNCRIME_SQL_QUERY
from config.config import USE_CRS
from classes.featurelayer import FeatureLayer
import numpy as np
import mapclassify
import matplotlib.pyplot as plt
from awkde.awkde import GaussianKDE
import numpy as np
import rasterio
from awkde.awkde import GaussianKDE
from classes.featurelayer import FeatureLayer
from constants.services import GUNCRIME_SQL_QUERY
from rasterio.transform import Affine
import mapclassify

from config.config import USE_CRS


def gun_crimes(primary_featurelayer):
Expand All @@ -26,15 +27,17 @@ def gun_crimes(primary_featurelayer):
X = np.array(list(zip(x, y)))

# Generate grid for plotting
x_grid, y_grid = np.linspace(x.min(), x.max(), 1000), np.linspace(
y.min(), y.max(), 1000
grid_length = 2500

x_grid, y_grid = np.linspace(x.min(), x.max(), grid_length), np.linspace(
y.min(), y.max(), grid_length
)
xx, yy = np.meshgrid(x_grid, y_grid)
grid_points = np.array([xx.ravel(), yy.ravel()]).T

# Compute adaptive KDE values
print("fitting KDE for gun crime data")
kde = GaussianKDE(glob_bw="silverman", alpha=0.999, diag_cov=True)
kde = GaussianKDE(glob_bw=0.1, alpha=0.999, diag_cov=True)
kde.fit(X)

z = kde.predict(grid_points)
Expand All @@ -56,7 +59,7 @@ def gun_crimes(primary_featurelayer):

# Export as raster
with rasterio.open(
"tmp/output.tif",
"tmp/gun_crimes.tif",
"w",
driver="GTiff",
height=zz.shape[0],
Expand All @@ -80,7 +83,7 @@ def gun_crimes(primary_featurelayer):

primary_featurelayer.gdf = primary_featurelayer.gdf.drop(columns=["centroid"])

src = rasterio.open("tmp/output.tif")
src = rasterio.open("tmp/gun_crimes.tif")
sampled_values = [x[0] for x in src.sample(coord_list)]

primary_featurelayer.gdf["guncrime_density"] = sampled_values
Expand Down
17 changes: 11 additions & 6 deletions data/src/data_utils/nbhoods.py
Original file line number Diff line number Diff line change
@@ -1,20 +1,25 @@
import geopandas as gpd
from classes.featurelayer import FeatureLayer
from config.config import USE_CRS
from constants.services import NBHOODS_URL

from config.config import USE_CRS


def nbhoods(primary_featurelayer):
phl_nbhoods = gpd.read_file(NBHOODS_URL)
phl_nbhoods.rename(columns={"mapname": "neighborhood"}, inplace=True)

# Correct the column name to uppercase if needed
if 'MAPNAME' in phl_nbhoods.columns:
phl_nbhoods.rename(columns={"MAPNAME": "neighborhood"}, inplace=True)

phl_nbhoods = phl_nbhoods.to_crs(USE_CRS)

nbhoods = FeatureLayer("Neighborhoods")
nbhoods.gdf = phl_nbhoods

red_cols_to_keep = ["neighborhood", "geometry"]
nbhoods.gdf = nbhoods.gdf[red_cols_to_keep]

primary_featurelayer.spatial_join(nbhoods)

return primary_featurelayer
Loading

0 comments on commit 10f5a55

Please sign in to comment.