From 225e1da94ffbf5ad781cb613d7cd128753cbc260 Mon Sep 17 00:00:00 2001 From: Basavaraj Kalloli Date: Fri, 17 Jan 2025 15:15:51 +0000 Subject: [PATCH 1/2] Adding support for London Borough of Richmond Upon Thames --- uk_bin_collection/tests/input.json | 6 + .../LondonBoroughOfRichmondUponThames.py | 126 ++++++++++++++++++ 2 files changed, 132 insertions(+) create mode 100644 uk_bin_collection/uk_bin_collection/councils/LondonBoroughOfRichmondUponThames.py diff --git a/uk_bin_collection/tests/input.json b/uk_bin_collection/tests/input.json index 07b2c1bd19..893c943327 100755 --- a/uk_bin_collection/tests/input.json +++ b/uk_bin_collection/tests/input.json @@ -1030,6 +1030,12 @@ "wiki_name": "London Borough Ealing", "wiki_note": "Pass the UPRN. You can find it using [FindMyAddress](https://www.findmyaddress.co.uk/search)." }, + "LondonBoroughOfRichmondUponThames": { + "house_number": "March Road", + "skip_get_url": true, + "url": "https://www.richmond.gov.uk/services/waste_and_recycling/collection_days/", + "wiki_name": "London Borough Of Richmond Upon Thames" + }, "LondonBoroughHarrow": { "url": "https://www.harrow.gov.uk", "wiki_command_url_override": "https://www.harrow.gov.uk", diff --git a/uk_bin_collection/uk_bin_collection/councils/LondonBoroughOfRichmondUponThames.py b/uk_bin_collection/uk_bin_collection/councils/LondonBoroughOfRichmondUponThames.py new file mode 100644 index 0000000000..063e033b3f --- /dev/null +++ b/uk_bin_collection/uk_bin_collection/councils/LondonBoroughOfRichmondUponThames.py @@ -0,0 +1,126 @@ +from bs4 import BeautifulSoup +from selenium.webdriver.common.by import By +from selenium.webdriver.support import expected_conditions as EC +from selenium.webdriver.support.ui import Select +from selenium.webdriver.support.wait import WebDriverWait +from selenium.webdriver.common.keys import Keys + +from uk_bin_collection.uk_bin_collection.common import * +from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass + + +# import the wonderful Beautiful Soup and the URL grabber +class CouncilClass(AbstractGetBinDataClass): + """ + Concrete classes have to implement all abstract operations of the + base class. They can also override some operations with a default + implementation. + """ + + def parse_data(self, page: str, **kwargs) -> dict: + # Make a BS4 object + print(f"Arguments are f{kwargs}") + driver = None + try: + page = kwargs["url"] + street_name = kwargs.get("house_number") + web_driver = kwargs.get("web_driver") + headless = kwargs.get("headless") + + driver = create_webdriver(web_driver, headless, None, __name__) + driver.get(page) + + wait = WebDriverWait(driver, 60) + + dismiss_cookie_banner = wait.until( + EC.visibility_of_element_located( + (By.ID, "ccc-dismiss-button") + ) + ) + + dismiss_cookie_banner.send_keys(Keys.ENTER) + + inputElement_postcodesearch = wait.until( + EC.visibility_of_element_located( + (By.ID, "Street") + ) + ) + + inputElement_postcodesearch.send_keys(street_name) + + main_content_submit_button = wait.until( + EC.element_to_be_clickable( + (By.XPATH, ".//div[@id='maincontent']//input[@type='submit']") + ) + ) + + main_content_submit_button.send_keys(Keys.ENTER) + + collection_row = driver.find_element( + By.CLASS_NAME, "lb-table-row-highlight" + ) + + table = driver.find_element(By.XPATH, ".//div[@id='maincontent']//table") + table_rows = table.find_elements(by=By.TAG_NAME, value="tr") + headerRow = table_rows[0] + table_info_row = table_rows[1] + + bin_types = headerRow.find_elements(by=By.TAG_NAME, value ="th")[2:] + collection_days = table_info_row.find_elements(by=By.TAG_NAME, value ="td")[2:] + + for index, bin in enumerate(bin_types): + if index == 0: + next_collection_date = collection_days[index].text + print(f"{bin.text} - {collection_days[index].text}") + + # + # # Now create a Select object based on the found element + # dropdown = Select(dropdown_element) + # + # # Select the option by visible text + # dropdown.select_by_visible_text(house_number) + # + # results = wait.until( + # EC.element_to_be_clickable( + # (By.CLASS_NAME, "bin-collection-dates-container") + # ) + # ) + # + # soup = BeautifulSoup(driver.page_source, features="html.parser") + # soup.prettify() + + # Extract data from the table + bin_collection_data = [] + # rows = soup.find( + # "table", class_="defaultgeneral bin-collection-dates" + # ).find_all("tr") + # for row in rows: + # cells = row.find_all("td") + # if cells: + # date_str = cells[0].text.strip() + # bin_type = cells[1].text.strip() + # # Convert date string to the required format DD/MM/YYYY + # date_obj = datetime.strptime(date_str, "%d %B %Y") + # date_formatted = date_obj.strftime(date_format) + # bin_collection_data.append( + # {"collectionDate": date_formatted, "type": bin_type} + # ) + + # Convert to JSON + json_data = {"bins": bin_collection_data} + + except Exception as e: + # Here you can log the exception if needed + print(f"An error occurred: {e}") + # Optionally, re-raise the exception if you want it to propagate + raise + finally: + # This block ensures that the driver is closed regardless of an exception + if driver: + driver.quit() + return json_data + + +print("Hello World!") +CouncilClass().parse_data("", url="https://www.richmond.gov.uk/services/waste_and_recycling/collection_days/", + house_number="March Road", headless=True) From b62b39e5429328f878c48a290f4438e71d110cb9 Mon Sep 17 00:00:00 2001 From: Basavaraj Kalloli Date: Fri, 17 Jan 2025 15:15:51 +0000 Subject: [PATCH 2/2] Implement parsing for getting collection dates for the council --- uk_bin_collection/tests/input.json | 3 +- .../LondonBoroughOfRichmondUponThames.py | 152 +++++++----------- 2 files changed, 64 insertions(+), 91 deletions(-) diff --git a/uk_bin_collection/tests/input.json b/uk_bin_collection/tests/input.json index 893c943327..2a925ebc7a 100755 --- a/uk_bin_collection/tests/input.json +++ b/uk_bin_collection/tests/input.json @@ -1034,7 +1034,8 @@ "house_number": "March Road", "skip_get_url": true, "url": "https://www.richmond.gov.uk/services/waste_and_recycling/collection_days/", - "wiki_name": "London Borough Of Richmond Upon Thames" + "wiki_name": "London Borough Of Richmond Upon Thames", + "wiki_note": "Pass the name of the street ONLY in the house number parameter, unfortunately post code's are not allowed. " }, "LondonBoroughHarrow": { "url": "https://www.harrow.gov.uk", diff --git a/uk_bin_collection/uk_bin_collection/councils/LondonBoroughOfRichmondUponThames.py b/uk_bin_collection/uk_bin_collection/councils/LondonBoroughOfRichmondUponThames.py index 063e033b3f..83d4576af2 100644 --- a/uk_bin_collection/uk_bin_collection/councils/LondonBoroughOfRichmondUponThames.py +++ b/uk_bin_collection/uk_bin_collection/councils/LondonBoroughOfRichmondUponThames.py @@ -1,29 +1,20 @@ -from bs4 import BeautifulSoup from selenium.webdriver.common.by import By from selenium.webdriver.support import expected_conditions as EC -from selenium.webdriver.support.ui import Select +from datetime import datetime from selenium.webdriver.support.wait import WebDriverWait from selenium.webdriver.common.keys import Keys - from uk_bin_collection.uk_bin_collection.common import * from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass -# import the wonderful Beautiful Soup and the URL grabber class CouncilClass(AbstractGetBinDataClass): - """ - Concrete classes have to implement all abstract operations of the - base class. They can also override some operations with a default - implementation. - """ def parse_data(self, page: str, **kwargs) -> dict: - # Make a BS4 object print(f"Arguments are f{kwargs}") driver = None try: page = kwargs["url"] - street_name = kwargs.get("house_number") + street_name = kwargs.get("paon") web_driver = kwargs.get("web_driver") headless = kwargs.get("headless") @@ -32,82 +23,13 @@ def parse_data(self, page: str, **kwargs) -> dict: wait = WebDriverWait(driver, 60) - dismiss_cookie_banner = wait.until( - EC.visibility_of_element_located( - (By.ID, "ccc-dismiss-button") - ) - ) - - dismiss_cookie_banner.send_keys(Keys.ENTER) - - inputElement_postcodesearch = wait.until( - EC.visibility_of_element_located( - (By.ID, "Street") - ) - ) - - inputElement_postcodesearch.send_keys(street_name) - - main_content_submit_button = wait.until( - EC.element_to_be_clickable( - (By.XPATH, ".//div[@id='maincontent']//input[@type='submit']") - ) - ) + self.dismiss_cookie_banner(wait) + self.input_street_name(street_name, wait) + self.submit(wait) + bin_types, collection_days = self.get_bins(driver) + bindata = self.get_collection_days(bin_types, collection_days) - main_content_submit_button.send_keys(Keys.ENTER) - - collection_row = driver.find_element( - By.CLASS_NAME, "lb-table-row-highlight" - ) - - table = driver.find_element(By.XPATH, ".//div[@id='maincontent']//table") - table_rows = table.find_elements(by=By.TAG_NAME, value="tr") - headerRow = table_rows[0] - table_info_row = table_rows[1] - - bin_types = headerRow.find_elements(by=By.TAG_NAME, value ="th")[2:] - collection_days = table_info_row.find_elements(by=By.TAG_NAME, value ="td")[2:] - - for index, bin in enumerate(bin_types): - if index == 0: - next_collection_date = collection_days[index].text - print(f"{bin.text} - {collection_days[index].text}") - - # - # # Now create a Select object based on the found element - # dropdown = Select(dropdown_element) - # - # # Select the option by visible text - # dropdown.select_by_visible_text(house_number) - # - # results = wait.until( - # EC.element_to_be_clickable( - # (By.CLASS_NAME, "bin-collection-dates-container") - # ) - # ) - # - # soup = BeautifulSoup(driver.page_source, features="html.parser") - # soup.prettify() - - # Extract data from the table - bin_collection_data = [] - # rows = soup.find( - # "table", class_="defaultgeneral bin-collection-dates" - # ).find_all("tr") - # for row in rows: - # cells = row.find_all("td") - # if cells: - # date_str = cells[0].text.strip() - # bin_type = cells[1].text.strip() - # # Convert date string to the required format DD/MM/YYYY - # date_obj = datetime.strptime(date_str, "%d %B %Y") - # date_formatted = date_obj.strftime(date_format) - # bin_collection_data.append( - # {"collectionDate": date_formatted, "type": bin_type} - # ) - - # Convert to JSON - json_data = {"bins": bin_collection_data} + print(bindata) except Exception as e: # Here you can log the exception if needed @@ -118,9 +40,59 @@ def parse_data(self, page: str, **kwargs) -> dict: # This block ensures that the driver is closed regardless of an exception if driver: driver.quit() - return json_data + return bindata + + def get_collection_days(self, bin_types, collection_days): + bindata = {"bins": []} + WEEKLY_COLLECTION = 0 + GARDEN_COLLECTION = 1 + + for index, bin_type in enumerate(bin_types): + # currently only handled weekly and garden collection, special collections like Christmas Day need to be added + if index == WEEKLY_COLLECTION: + next_collection_date = get_next_day_of_week(collection_days[index].text.strip(), date_format) + elif index == GARDEN_COLLECTION: + split_date_part = collection_days[index].text.split("More dates")[0] + next_collection_date = datetime.strptime(split_date_part.strip(), "%d %B %Y").strftime(date_format) + else: + next_collection_date = datetime.strptime(collection_days[index].text.strip(), "%d %B %Y").strftime(date_format) + + dict_data = { + "type": bin_type.text.strip(), + "collectionDate": next_collection_date, + } + bindata["bins"].append(dict_data) + return bindata + + def get_bins(self, driver): + table = driver.find_element(By.XPATH, ".//div[@id='maincontent']//table") + table_rows = table.find_elements(by=By.TAG_NAME, value="tr") + headerRow = table_rows[0] + table_info_row = table_rows[1] + bin_types = headerRow.find_elements(by=By.TAG_NAME, value="th")[2:] + collection_days = table_info_row.find_elements(by=By.TAG_NAME, value="td")[2:] + return bin_types, collection_days + + def submit(self, wait): + main_content_submit_button = wait.until( + EC.element_to_be_clickable( + (By.XPATH, ".//div[@id='maincontent']//input[@type='submit']") + ) + ) + main_content_submit_button.send_keys(Keys.ENTER) + def input_street_name(self, street_name, wait): + input_element_postcodesearch = wait.until( + EC.visibility_of_element_located( + (By.ID, "Street") + ) + ) + input_element_postcodesearch.send_keys(street_name) -print("Hello World!") -CouncilClass().parse_data("", url="https://www.richmond.gov.uk/services/waste_and_recycling/collection_days/", - house_number="March Road", headless=True) + def dismiss_cookie_banner(self, wait): + cookie_banner = wait.until( + EC.visibility_of_element_located( + (By.ID, "ccc-dismiss-button") + ) + ) + cookie_banner.send_keys(Keys.ENTER)