diff --git a/api/webscraper/__pycache__/database_constants.cpython-312.pyc b/api/webscraper/__pycache__/database_constants.cpython-312.pyc index ba453e1..da7845e 100644 Binary files a/api/webscraper/__pycache__/database_constants.cpython-312.pyc and b/api/webscraper/__pycache__/database_constants.cpython-312.pyc differ diff --git a/api/webscraper/__pycache__/nyiso_scraper.cpython-312.pyc b/api/webscraper/__pycache__/nyiso_scraper.cpython-312.pyc index 7b91aaa..dcee55d 100644 Binary files a/api/webscraper/__pycache__/nyiso_scraper.cpython-312.pyc and b/api/webscraper/__pycache__/nyiso_scraper.cpython-312.pyc differ diff --git a/api/webscraper/__pycache__/nyserda_scraper.cpython-312.pyc b/api/webscraper/__pycache__/nyserda_scraper.cpython-312.pyc index 545bde4..cd283b4 100644 Binary files a/api/webscraper/__pycache__/nyserda_scraper.cpython-312.pyc and b/api/webscraper/__pycache__/nyserda_scraper.cpython-312.pyc differ diff --git a/api/webscraper/database.py b/api/webscraper/database.py index c84c752..d0c02b5 100644 --- a/api/webscraper/database.py +++ b/api/webscraper/database.py @@ -12,6 +12,7 @@ filter_nyiso_cluster_sheet, filter_nyiso_in_service_sheet, ) +from ores_scraper import query_ores_noi, query_ores_under_review, query_ores_permitted from utils.scraper_utils import ( create_update_object, update_kdm, @@ -413,6 +414,162 @@ def nyiso_in_service_to_database(): print(exception) +def ores_noi_to_database(): + database = [] + database.extend(query_ores_noi()) + for project in database: + existing_data = ( + supabase.table("Projects_duplicate") + .select("*") + .eq("project_name", project["project_name"]) + .execute() + ) + if len(existing_data.data) > 0: + existing_project = existing_data.data[0] + update_object = create_update_object(existing_project, project) + try: + response = ( + supabase.table("Projects_duplicate") + .update(update_object) + .eq( + "project_name", + project["project_name"], + ) + .execute() + ) + print("UPDATE", response, "\n") + except Exception as exception: + print(exception) + else: + try: + response = ( + supabase.table("Projects_duplicate").insert(project).execute() + ) + print("INSERT", response, "\n") + except Exception as exception: + print(exception) + + +def ores_under_review_to_database(): + database = [] + database.extend(query_ores_under_review()) + for project in database: + existing_data = ( + supabase.table("Projects_duplicate") + .select("*") + .eq("project_name", project["project_name"]) + .execute() + ) + if len(existing_data.data) > 0: + existing_project = existing_data.data[0] + update_object = create_update_object(existing_project, project) + # if the existing project has no kdms, add the dict first + if ( + existing_project["key_development_milestones"] is None + or len(existing_project["key_development_milestones"]) < 0 + ): + update_object["key_development_milestones"] = initial_kdm_dict + else: + update_object["key_development_milestones"] = existing_project[ + "key_development_milestones" + ] + + # update kdm for ores projects under review + update_object["key_development_milestones"] = update_kdm( + milestoneTitle="Application for permit to ORES", + completed=True, + date=None, + kdm=update_object["key_development_milestones"], + ) + try: + response = ( + supabase.table("Projects_duplicate") + .update(update_object) + .eq( + "project_name", + project["project_name"], + ) + .execute() + ) + print("UPDATE", response, "\n") + except Exception as exception: + print(exception) + else: + project["key_development_milestones"] = update_kdm( + milestoneTitle="Application for permit to ORES", + completed=True, + date=None, + kdm=project["key_development_milestones"], + ) + try: + response = ( + supabase.table("Projects_duplicate").insert(project).execute() + ) + print("INSERT", response, "\n") + except Exception as exception: + print(exception) + + +def ores_permitted_to_database(): + database = [] + database.extend(query_ores_permitted()) + for project in database: + existing_data = ( + supabase.table("Projects_duplicate") + .select("*") + .eq("project_name", project["project_name"]) + .execute() + ) + if len(existing_data.data) > 0: + existing_project = existing_data.data[0] + update_object = create_update_object(existing_project, project) + # if the existing project has no kdms, add the dict first + if ( + existing_project["key_development_milestones"] is None + or len(existing_project["key_development_milestones"]) < 0 + ): + update_object["key_development_milestones"] = initial_kdm_dict + else: + update_object["key_development_milestones"] = existing_project[ + "key_development_milestones" + ] + + # update kdm for ores projects under review + update_object["key_development_milestones"] = update_kdm( + milestoneTitle="Issuance of permit from ORES", + completed=True, + date=None, + kdm=update_object["key_development_milestones"], + ) + try: + response = ( + supabase.table("Projects_duplicate") + .update(update_object) + .eq( + "project_name", + project["project_name"], + ) + .execute() + ) + print("UPDATE", response, "\n") + except Exception as exception: + print(exception) + else: + project["key_development_milestones"] = update_kdm( + milestoneTitle="Issuance of permit from ORES", + completed=True, + date=None, + kdm=project["key_development_milestones"], + ) + try: + response = ( + supabase.table("Projects_duplicate").insert(project).execute() + ) + print("INSERT", response, "\n") + except Exception as exception: + print(exception) + + """ For testing """ @@ -420,3 +577,6 @@ def nyiso_in_service_to_database(): # nyserda_solar_to_database() # nyiso_to_database() # nyiso_in_service_to_database() +# ores_noi_to_database() +# ores_under_review_to_database() +# ores_permitted_to_database() diff --git a/api/webscraper/database_constants.py b/api/webscraper/database_constants.py index 1c4f9af..1df8137 100644 --- a/api/webscraper/database_constants.py +++ b/api/webscraper/database_constants.py @@ -40,4 +40,14 @@ "date": None, }, {"milestoneTitle": "Start of operations", "completed": False, "date": None}, + { + "milestoneTitle": "Application for permit to ORES", + "completed": False, + "date": None, + }, + { + "milestoneTitle": "Issuance of permit from ORES", + "completed": False, + "date": None, + }, ] diff --git a/api/webscraper/nyserda_scraper.py b/api/webscraper/nyserda_scraper.py index 195f2cc..686a705 100644 --- a/api/webscraper/nyserda_scraper.py +++ b/api/webscraper/nyserda_scraper.py @@ -76,8 +76,6 @@ def write_large_to_json(): file.write("\n") -write_large_to_json() - """ This scrapes data from the NYSERDA Statewide Distributed Solar Projects database. We filter for specific columns from the database's API and save them to a json file. diff --git a/api/webscraper/ores_scraper.py b/api/webscraper/ores_scraper.py new file mode 100644 index 0000000..9c830df --- /dev/null +++ b/api/webscraper/ores_scraper.py @@ -0,0 +1,180 @@ +import requests +from bs4 import BeautifulSoup +import pandas as pd +from io import StringIO +from utils.scraper_utils import geocode_lat_long, update_kdm +from database_constants import initial_kdm_dict + +# url = "https://dps.ny.gov/ores-permit-applications" +# page = requests.get(url) + +# soup = BeautifulSoup(page.content, "html.parser") +# tables = soup.find_all("table") + +# notices_of_intent = pd.read_html(StringIO(tables[0].prettify()))[0] +# noi_dict = notices_of_intent.to_dict(orient="records") + +# # Complete Applications Under Review +# under_review = pd.read_html(StringIO(tables[3].prettify()))[0] +# under_review_dict = under_review.to_dict(orient="records") + +# # Permitted Applications +# permitted = pd.read_html(StringIO(tables[4].prettify()))[0] +# permitted_dict = permitted.to_dict(orient="records") + +""" +All the descriptions of the ORES data describe the location of the project in the following format: +... Located in the Towns of ALTONA, CLINTON, ELLENBURG, and MOOERS, CLINTON COUNTY. +""" + + +def parse_for_location(description): + # finds index in the description where the phrase "Town of..." appears + town_index = description.find("Town") + town_string = description[town_index:] + # splits town_string by the comma + town_split = town_string.split(",") + # town is the second to last word before the comma + town = town_split[-2].split(" ")[-1].strip() + # county is the last word when the location string is split by commas + county = town_split[-1].strip() + + # removes the period from the end of county if it exists + index = county.find(".") + if index != -1: + while county.find(".", index + 1) != -1: + index = county.find(".", index + 1) + county = county[:index] + + # capitalize first letter of each word in town/county name + if town: + town = " ".join([word.capitalize() for word in town.split(" ")]) + if county: + county = " ".join([word.capitalize() for word in county.split(" ")]) + return (town, county) + + +# ORES notice of intent +def filter_noi(data: list) -> list: + """ + params: data - list of dictionaries representing rows in the ORES Notices of Intent table + Parses description to find town, county of project + Reverse Geocodes for latitude and longitude + Returns list of projects with data filtered to include the desired fields + """ + filtered_list = [] + for row in data: + town, county = parse_for_location(row["Description"]) + lat, long = geocode_lat_long(f"{town}, NY") + project_dict = { + "permit_application_number": row.get("Permit Application Number", None), + "project_name": row.get("Project Name", None), + "town": town if town else None, + "county": county if county else None, + "latitude": lat if lat else None, + "longitude": long if long else None, + "key_development_milestones": initial_kdm_dict, + } + filtered_list.append(project_dict) + + return filtered_list + + +def filter_under_review(data: list) -> list: + """ + params: data - list of dictionaries representing rows in the ORES Completed Projects Under Review table + Parses description to find town, county of project + Reverse Geocodes for latitude and longitude + Returns list of projects with data filtered to include the desired fields + """ + filtered_list = [] + for row in data: + town, county = parse_for_location(row["Description"]) + lat, long = geocode_lat_long(f"{town}, NY") + project_dict = { + "permit_application_number": row.get("Permit Application Number", None), + "project_name": row.get("Project Name", None), + "town": town if town else None, + "county": county if county else None, + "latitude": lat if lat else None, + "longitude": long if long else None, + "key_development_milestones": initial_kdm_dict, + } + project_dict["key_development_milestones"] = update_kdm( + "Application for permit to ORES", + date=None, + completed=True, + kdm=project_dict.get("key_development_milestones"), + ) + filtered_list.append(project_dict) + return filtered_list + + +def filter_permitted(data): + """ + params: data - list of dictionaries representing rows in the ORES Permitted Applications table + Parses description to find town, county of project + Reverse Geocodes for latitude and longitude + Returns list of projects with data filtered to include the desired fields + """ + filtered_list = [] + for row in data: + town, county = parse_for_location(row["Description"]) + lat, long = geocode_lat_long(f"{town}, NY") + project_dict = { + "permit_application_number": row.get("Permit Application Number", None), + "project_name": row.get("Project Name", None), + "town": town if town else None, + "county": county if county else None, + "latitude": lat if lat else None, + "longitude": long if long else None, + "key_development_milestones": initial_kdm_dict, + } + project_dict["key_development_milestones"] = update_kdm( + "Issuance of permit from ORES", + date=None, + completed=True, + kdm=project_dict.get("key_development_milestones"), + ) + filtered_list.append(project_dict) + return filtered_list + + +# ORES notice of review +def query_ores_noi(): + url = "https://dps.ny.gov/ores-permit-applications" + page = requests.get(url) + + soup = BeautifulSoup(page.content, "html.parser") + tables = soup.find_all("table") + + notices_of_intent = pd.read_html(StringIO(tables[0].prettify()))[0] + noi_dict = notices_of_intent.to_dict(orient="records") + response = filter_noi(noi_dict) + return response + + +def query_ores_under_review(): + url = "https://dps.ny.gov/ores-permit-applications" + page = requests.get(url) + + soup = BeautifulSoup(page.content, "html.parser") + tables = soup.find_all("table") + + under_review = pd.read_html(StringIO(tables[3].prettify()))[0] + under_review_dict = under_review.to_dict(orient="records") + response = filter_under_review(under_review_dict) + return response + + +def query_ores_permitted(): + url = "https://dps.ny.gov/ores-permit-applications" + page = requests.get(url) + + soup = BeautifulSoup(page.content, "html.parser") + tables = soup.find_all("table") + + permitted = pd.read_html(StringIO(tables[4].prettify()))[0] + permitted_dict = permitted.to_dict(orient="records") + response = filter_under_review(permitted_dict) + return response diff --git a/api/webscraper/utils/__pycache__/__init__.cpython-312.pyc b/api/webscraper/utils/__pycache__/__init__.cpython-312.pyc index 229c244..492d236 100644 Binary files a/api/webscraper/utils/__pycache__/__init__.cpython-312.pyc and b/api/webscraper/utils/__pycache__/__init__.cpython-312.pyc differ diff --git a/package.json b/package.json index efc172d..5b51f3c 100644 --- a/package.json +++ b/package.json @@ -28,6 +28,7 @@ }, "devDependencies": { "@ianvs/prettier-plugin-sort-imports": "^4.3.1", + "@types/google.maps": "^3.58.1", "@types/node": "^20.17.2", "@types/react": "^18.3.12", "@types/react-dom": "^18.3.1", diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index ff31001..fc0581b 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -45,6 +45,9 @@ importers: '@ianvs/prettier-plugin-sort-imports': specifier: ^4.3.1 version: 4.3.1(prettier@3.3.3) + '@types/google.maps': + specifier: ^3.58.1 + version: 3.58.1 '@types/node': specifier: ^20.17.2 version: 20.17.2 @@ -2572,7 +2575,7 @@ snapshots: debug: 4.3.7 enhanced-resolve: 5.17.1 eslint: 8.57.1 - eslint-module-utils: 2.12.0(@typescript-eslint/parser@7.2.0(eslint@8.57.1)(typescript@5.6.3))(eslint-import-resolver-node@0.3.9)(eslint-import-resolver-typescript@3.6.3(@typescript-eslint/parser@7.2.0(eslint@8.57.1)(typescript@5.6.3))(eslint-import-resolver-node@0.3.9)(eslint-plugin-import@2.31.0)(eslint@8.57.1))(eslint@8.57.1) + eslint-module-utils: 2.12.0(@typescript-eslint/parser@7.2.0(eslint@8.57.1)(typescript@5.6.3))(eslint-import-resolver-node@0.3.9)(eslint-import-resolver-typescript@3.6.3)(eslint@8.57.1) fast-glob: 3.3.2 get-tsconfig: 4.8.1 is-bun-module: 1.2.1 @@ -2585,7 +2588,7 @@ snapshots: - eslint-import-resolver-webpack - supports-color - eslint-module-utils@2.12.0(@typescript-eslint/parser@7.2.0(eslint@8.57.1)(typescript@5.6.3))(eslint-import-resolver-node@0.3.9)(eslint-import-resolver-typescript@3.6.3(@typescript-eslint/parser@7.2.0(eslint@8.57.1)(typescript@5.6.3))(eslint-import-resolver-node@0.3.9)(eslint-plugin-import@2.31.0)(eslint@8.57.1))(eslint@8.57.1): + eslint-module-utils@2.12.0(@typescript-eslint/parser@7.2.0(eslint@8.57.1)(typescript@5.6.3))(eslint-import-resolver-node@0.3.9)(eslint-import-resolver-typescript@3.6.3)(eslint@8.57.1): dependencies: debug: 3.2.7 optionalDependencies: @@ -2607,7 +2610,7 @@ snapshots: doctrine: 2.1.0 eslint: 8.57.1 eslint-import-resolver-node: 0.3.9 - eslint-module-utils: 2.12.0(@typescript-eslint/parser@7.2.0(eslint@8.57.1)(typescript@5.6.3))(eslint-import-resolver-node@0.3.9)(eslint-import-resolver-typescript@3.6.3(@typescript-eslint/parser@7.2.0(eslint@8.57.1)(typescript@5.6.3))(eslint-import-resolver-node@0.3.9)(eslint-plugin-import@2.31.0)(eslint@8.57.1))(eslint@8.57.1) + eslint-module-utils: 2.12.0(@typescript-eslint/parser@7.2.0(eslint@8.57.1)(typescript@5.6.3))(eslint-import-resolver-node@0.3.9)(eslint-import-resolver-typescript@3.6.3)(eslint@8.57.1) hasown: 2.0.2 is-core-module: 2.15.1 is-glob: 4.0.3