From 8f53010a3c432075f6dd614d8c465944e0bede77 Mon Sep 17 00:00:00 2001 From: PhilDB-cloud <55487883+PhilDB-cloud@users.noreply.github.com> Date: Sun, 5 Nov 2023 22:25:23 +0000 Subject: [PATCH 01/11] Create MidAndEastAntrimBoroughCouncil initial commit for mid and east antrim bin collection data --- .../councils/MidAndEastAntrimBoroughCouncil | 74 +++++++++++++++++++ 1 file changed, 74 insertions(+) create mode 100644 uk_bin_collection/uk_bin_collection/councils/MidAndEastAntrimBoroughCouncil diff --git a/uk_bin_collection/uk_bin_collection/councils/MidAndEastAntrimBoroughCouncil b/uk_bin_collection/uk_bin_collection/councils/MidAndEastAntrimBoroughCouncil new file mode 100644 index 0000000000..ce44567581 --- /dev/null +++ b/uk_bin_collection/uk_bin_collection/councils/MidAndEastAntrimBoroughCouncil @@ -0,0 +1,74 @@ +import pandas as pd +import time +from selenium.webdriver.common.by import By +from selenium.webdriver.common.keys import Keys +from selenium.webdriver.support.ui import Select + +from uk_bin_collection.uk_bin_collection.common import * +from uk_bin_collection.uk_bin_collection.get_bin_data import \ + AbstractGetBinDataClass + + +class CouncilClass(AbstractGetBinDataClass): + """ + Concrete classes have to implement all abstract operations of the + base class. They can also override some operations with a default + implementation. + """ + + def get_data(self, df) -> dict: + # Create dictionary of data to be returned + data = {"bins": []} + + # Output collection data into dictionary + for i, row in df.iterrows(): + dict_data = { + "type": row["Collection Name"], + "collectionDate": row["Next Collection Due"], + } + + data["bins"].append(dict_data) + + return data + + def parse_data(self, page: str, **kwargs) -> dict: + page = "https://www.midandeastantrim.gov.uk/resident/waste-recycling/collection-dates/" + + # Assign user info + user_postcode = kwargs.get("postcode") + user_paon = kwargs.get("paon") + web_driver = kwargs.get("web_driver") + + # Create Selenium webdriver + driver = create_webdriver(web_driver) + driver.get(page) + + # Enter postcode in text box and wait + inputElement_pc = driver.find_element( + By.ID, "txtAjaxSearch" + ) + inputElement_pc.send_keys(user_postcode) + + time.sleep(4) + + # Submit address information and wait - selecting the top one only + inputElement_bn = driver.find_element( + By.ID, "show-button-0" + ).click() + + time.sleep(4) + + # Read next collection information into Pandas + table = driver.find_element( + By.ID, "divCalendarGraphics" + ).get_attribute("outerHTML") + df = pd.read_html(table, header=[1]) + df = df[0] + + # Quit Selenium webdriver to release session + driver.quit() + + # Parse data into dict + data = self.get_data(df) + + return data From 44ecaeaa63b3a045aeeab1f9fd6a9449b8be5d66 Mon Sep 17 00:00:00 2001 From: PhilDB-cloud <55487883+PhilDB-cloud@users.noreply.github.com> Date: Sun, 12 Nov 2023 20:44:16 +0000 Subject: [PATCH 02/11] Update MidAndEastAntrimBoroughCouncil update extraction of date from html --- .../councils/MidAndEastAntrimBoroughCouncil | 72 +++++++++++++------ 1 file changed, 49 insertions(+), 23 deletions(-) diff --git a/uk_bin_collection/uk_bin_collection/councils/MidAndEastAntrimBoroughCouncil b/uk_bin_collection/uk_bin_collection/councils/MidAndEastAntrimBoroughCouncil index ce44567581..f75cfb2e6f 100644 --- a/uk_bin_collection/uk_bin_collection/councils/MidAndEastAntrimBoroughCouncil +++ b/uk_bin_collection/uk_bin_collection/councils/MidAndEastAntrimBoroughCouncil @@ -1,4 +1,4 @@ -import pandas as pd +from bs4 import BeautifulSoup import time from selenium.webdriver.common.by import By from selenium.webdriver.common.keys import Keys @@ -16,21 +16,6 @@ class CouncilClass(AbstractGetBinDataClass): implementation. """ - def get_data(self, df) -> dict: - # Create dictionary of data to be returned - data = {"bins": []} - - # Output collection data into dictionary - for i, row in df.iterrows(): - dict_data = { - "type": row["Collection Name"], - "collectionDate": row["Next Collection Due"], - } - - data["bins"].append(dict_data) - - return data - def parse_data(self, page: str, **kwargs) -> dict: page = "https://www.midandeastantrim.gov.uk/resident/waste-recycling/collection-dates/" @@ -58,17 +43,58 @@ class CouncilClass(AbstractGetBinDataClass): time.sleep(4) - # Read next collection information into Pandas - table = driver.find_element( + # Read next collection information + page = driver.find_element( By.ID, "divCalendarGraphics" ).get_attribute("outerHTML") - df = pd.read_html(table, header=[1]) - df = df[0] + + # Make a BS4 object - remove bold tags + soup = BeautifulSoup(page.text.strip().replace("", "").replace("", ""), features="html.parser") + soup.prettify() + + # Data to return + data = {"bins": []} + + # Valid bin types + binTypes = [ + "Refuse", + "Garden", + ] + + # Value to create dict for bin values + keys, values = [], [] + + # Loop though html for text containing bins + # example of html (bold tags removed above) + #
+ #
Refuse: Tue 14 Nov then every alternate Tue
Recycling: No Recycling waste collection for this address
Garden: Tue 21 Nov then every alternate Tue
spacer + # split by br tag and take first 4 splits + lines = soup.text.split('
',4) + for line in lines: + keys.append(line.text.split(':')[0].strip()) + # strip out the day and month from the text + values.append(line.text.split(':')[1].strip().split(' ')[1:2] + + # Create dict for bin name and string dates + binDict = dict(zip(keys, values)) + + # Process dict for valid bin types + for bin in list(binDict): + if bin in binTypes: + # Convert date - no year value so take it from todays date + date = datetime.strptime(binDict[bin], "%d %b").replace(year=datetime.today().year) + # if the date is in the past then it means the collection is next year so add a year + if date < today(): + date = date + relativedelta(years=1) + + # Set bin data + dict_data = { + "type": bin, + "collectionDate": date.strftime(date_format), + } + data["bins"].append(dict_data) # Quit Selenium webdriver to release session driver.quit() - # Parse data into dict - data = self.get_data(df) - return data From 88eba76a06a999235fd5a9fe29937e80af38532b Mon Sep 17 00:00:00 2001 From: PhilDB-cloud <55487883+PhilDB-cloud@users.noreply.github.com> Date: Mon, 13 Nov 2023 22:38:43 +0000 Subject: [PATCH 03/11] Create MidAndEastAntrimBoroughCouncil.py first cut following offline test --- .../MidAndEastAntrimBoroughCouncil.py | 117 ++++++++++++++++++ 1 file changed, 117 insertions(+) create mode 100644 uk_bin_collection/uk_bin_collection/councils/MidAndEastAntrimBoroughCouncil.py diff --git a/uk_bin_collection/uk_bin_collection/councils/MidAndEastAntrimBoroughCouncil.py b/uk_bin_collection/uk_bin_collection/councils/MidAndEastAntrimBoroughCouncil.py new file mode 100644 index 0000000000..9b03dac964 --- /dev/null +++ b/uk_bin_collection/uk_bin_collection/councils/MidAndEastAntrimBoroughCouncil.py @@ -0,0 +1,117 @@ +from bs4 import BeautifulSoup +import time +from dateutil.relativedelta import relativedelta +from selenium.webdriver.common.by import By +from selenium.webdriver.common.keys import Keys +from selenium.webdriver.support.ui import Select +from selenium.common.exceptions import NoSuchElementException + +from uk_bin_collection.uk_bin_collection.common import * +from uk_bin_collection.uk_bin_collection.get_bin_data import \ + AbstractGetBinDataClass + +class CouncilClass(AbstractGetBinDataClass): + """ + Concrete classes have to implement all abstract operations of the + base class. They can also override some operations with a default + implementation. + """ + + def parse_data(self, page: str, **kwargs) -> dict: + page = "https://www.midandeastantrim.gov.uk/resident/waste-recycling/collection-dates/" + + # Assign user info + user_postcode = kwargs.get("postcode") + user_paon = kwargs.get("paon") + web_driver = kwargs.get("web_driver") + + # Create Selenium webdriver + options = webdriver.ChromeOptions() + options.add_experimental_option('excludeSwitches', ['enable-logging']) + driver = create_webdriver(web_driver) + + driver.get(page) + + time.sleep(5) + number=0 + driver.switch_to.frame(number) + # Enter postcode in text box and wait + inputElement_pc = driver.find_element( + By.ID, "txtAjaxSearch" + ) + inputElement_pc.send_keys(user_postcode) + + time.sleep(5) + + # Submit address information and wait - selecting the top one only + # if it is an exact match then it will go straight to the results + try: + button = driver.find_element( + By.XPATH, '//*[@id="show-button-0"]' + ) + driver.execute_script("arguments[0].click();", button) + except NoSuchElementException: + pass + + time.sleep(4) + + # Read next collection information + page = driver.find_element( + By.ID, "divCalendarGraphics" + ).get_attribute("outerHTML") + + # Make a BS4 object - remove bold tags and add @ so we can split the lines later + soup = BeautifulSoup(page.strip().replace("", "").replace("", "").replace("
", "@"), features="html.parser") + soup.prettify() + + # Data to return + data = {"bins": []} + + # Valid bin types + binTypes = [ + "Refuse", + "Garden" + ] + + # Value to create dict for bin values + keys, values = [], [] + + # Loop though html for text containing bins + # example of html (bold tags removed above) + #
+ #
Refuse: Tue 14 Nov then every alternate Tue
Recycling: No Recycling waste collection for this address
Garden: Tue 21 Nov then every alternate Tue
spacer + # split by br tag and take first 4 splits + lines = soup.text.split('@',4) + for line in lines[1:4]: + keys.append(line.split(':')[0].strip()) + # strip out the day and month from the text + values.append(line.split(':')[1].strip().split(' ')[:3]) + + # Create dict for bin name and string dates + binDict = dict(zip(keys, values)) + + # Process dict for valid bin types + for bin in list(binDict): + if bin in binTypes: + # Convert date - no year value so take it from todays date + if binDict[bin][0] == "Tomorrow": + date = datetime.today() + relativedelta(days=1) + elif binDict[bin][0] == "Today": + date = datetime.today() + else: + date = datetime.strptime(' '.join(binDict[bin][1:]), "%d %b").replace(year=datetime.today().year) + # if the date is in the past then it means the collection is next year so add a year + if date < datetime.today(): + date = date + relativedelta(years=1) + + # Set bin data + dict_data = { + "type": bin, + "collectionDate": date.strftime(date_format), + } + data["bins"].append(dict_data) + + # Quit Selenium webdriver to release session + driver.quit() + + return data From 836c8f5545938a09a09377963dcc04ee980e5104 Mon Sep 17 00:00:00 2001 From: PhilDB-cloud <55487883+PhilDB-cloud@users.noreply.github.com> Date: Mon, 13 Nov 2023 22:43:40 +0000 Subject: [PATCH 04/11] Update MidAndEastAntrimBoroughCouncil remove contents --- .../councils/MidAndEastAntrimBoroughCouncil | 99 ------------------- 1 file changed, 99 deletions(-) diff --git a/uk_bin_collection/uk_bin_collection/councils/MidAndEastAntrimBoroughCouncil b/uk_bin_collection/uk_bin_collection/councils/MidAndEastAntrimBoroughCouncil index f75cfb2e6f..8b13789179 100644 --- a/uk_bin_collection/uk_bin_collection/councils/MidAndEastAntrimBoroughCouncil +++ b/uk_bin_collection/uk_bin_collection/councils/MidAndEastAntrimBoroughCouncil @@ -1,100 +1 @@ -from bs4 import BeautifulSoup -import time -from selenium.webdriver.common.by import By -from selenium.webdriver.common.keys import Keys -from selenium.webdriver.support.ui import Select -from uk_bin_collection.uk_bin_collection.common import * -from uk_bin_collection.uk_bin_collection.get_bin_data import \ - AbstractGetBinDataClass - - -class CouncilClass(AbstractGetBinDataClass): - """ - Concrete classes have to implement all abstract operations of the - base class. They can also override some operations with a default - implementation. - """ - - def parse_data(self, page: str, **kwargs) -> dict: - page = "https://www.midandeastantrim.gov.uk/resident/waste-recycling/collection-dates/" - - # Assign user info - user_postcode = kwargs.get("postcode") - user_paon = kwargs.get("paon") - web_driver = kwargs.get("web_driver") - - # Create Selenium webdriver - driver = create_webdriver(web_driver) - driver.get(page) - - # Enter postcode in text box and wait - inputElement_pc = driver.find_element( - By.ID, "txtAjaxSearch" - ) - inputElement_pc.send_keys(user_postcode) - - time.sleep(4) - - # Submit address information and wait - selecting the top one only - inputElement_bn = driver.find_element( - By.ID, "show-button-0" - ).click() - - time.sleep(4) - - # Read next collection information - page = driver.find_element( - By.ID, "divCalendarGraphics" - ).get_attribute("outerHTML") - - # Make a BS4 object - remove bold tags - soup = BeautifulSoup(page.text.strip().replace("", "").replace("", ""), features="html.parser") - soup.prettify() - - # Data to return - data = {"bins": []} - - # Valid bin types - binTypes = [ - "Refuse", - "Garden", - ] - - # Value to create dict for bin values - keys, values = [], [] - - # Loop though html for text containing bins - # example of html (bold tags removed above) - #
- #
Refuse: Tue 14 Nov then every alternate Tue
Recycling: No Recycling waste collection for this address
Garden: Tue 21 Nov then every alternate Tue
spacer - # split by br tag and take first 4 splits - lines = soup.text.split('
',4) - for line in lines: - keys.append(line.text.split(':')[0].strip()) - # strip out the day and month from the text - values.append(line.text.split(':')[1].strip().split(' ')[1:2] - - # Create dict for bin name and string dates - binDict = dict(zip(keys, values)) - - # Process dict for valid bin types - for bin in list(binDict): - if bin in binTypes: - # Convert date - no year value so take it from todays date - date = datetime.strptime(binDict[bin], "%d %b").replace(year=datetime.today().year) - # if the date is in the past then it means the collection is next year so add a year - if date < today(): - date = date + relativedelta(years=1) - - # Set bin data - dict_data = { - "type": bin, - "collectionDate": date.strftime(date_format), - } - data["bins"].append(dict_data) - - # Quit Selenium webdriver to release session - driver.quit() - - return data From d09a3aababcf9ba42b9d8165c3c4d099b2c6b824 Mon Sep 17 00:00:00 2001 From: PhilDB-cloud <55487883+PhilDB-cloud@users.noreply.github.com> Date: Tue, 14 Nov 2023 20:02:57 +0000 Subject: [PATCH 05/11] Delete uk_bin_collection/uk_bin_collection/councils/MidAndEastAntrimBoroughCouncil wrong file type --- .../uk_bin_collection/councils/MidAndEastAntrimBoroughCouncil | 1 - 1 file changed, 1 deletion(-) delete mode 100644 uk_bin_collection/uk_bin_collection/councils/MidAndEastAntrimBoroughCouncil diff --git a/uk_bin_collection/uk_bin_collection/councils/MidAndEastAntrimBoroughCouncil b/uk_bin_collection/uk_bin_collection/councils/MidAndEastAntrimBoroughCouncil deleted file mode 100644 index 8b13789179..0000000000 --- a/uk_bin_collection/uk_bin_collection/councils/MidAndEastAntrimBoroughCouncil +++ /dev/null @@ -1 +0,0 @@ - From 4c0cffb310c33dc5e73831157382972cd0cbd2a5 Mon Sep 17 00:00:00 2001 From: PhilDB-cloud <55487883+PhilDB-cloud@users.noreply.github.com> Date: Tue, 14 Nov 2023 21:40:13 +0000 Subject: [PATCH 06/11] Update validate_council_outputs.feature Add Mid and East Antrim --- .../tests/features/validate_council_outputs.feature | 1 + 1 file changed, 1 insertion(+) diff --git a/uk_bin_collection/tests/features/validate_council_outputs.feature b/uk_bin_collection/tests/features/validate_council_outputs.feature index fd3a3d9216..c5abff3acf 100644 --- a/uk_bin_collection/tests/features/validate_council_outputs.feature +++ b/uk_bin_collection/tests/features/validate_council_outputs.feature @@ -56,6 +56,7 @@ Feature: Test each council output matches expected results | MalvernHillsDC | None | None | | ManchesterCityCouncil | None | None | | MertonCouncil | None | None | + | MidAndEastAntrimBoroughCouncil | http://selenium:4444 | local | | MidSussexDistrictCouncil | None | None | | MiltonKeynesCityCouncil | None | None | | NeathPortTalbotCouncil | http://selenium:4444 | local | From 5f38c7d509f1bd1fcc62f7cc110584b786913691 Mon Sep 17 00:00:00 2001 From: PhilDB-cloud <55487883+PhilDB-cloud@users.noreply.github.com> Date: Tue, 14 Nov 2023 21:56:38 +0000 Subject: [PATCH 07/11] Update input.json Add Mid and East Antrim --- uk_bin_collection/tests/input.json | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/uk_bin_collection/tests/input.json b/uk_bin_collection/tests/input.json index c9df4c0e60..0b8b761575 100644 --- a/uk_bin_collection/tests/input.json +++ b/uk_bin_collection/tests/input.json @@ -324,6 +324,14 @@ "wiki_name": "Merton Council", "wiki_note": "Follow the instructions [here](https://myneighbourhood.merton.gov.uk/Wasteservices/WasteServicesSearch.aspx) until you get the \"Your recycling and rubbish collection days\" page then copy the URL and replace the URL in the command (the Address parameter is optional)." }, + "MidAndEastAntrimBoroughCouncil": { + "postcode": "100 Galgorm Road", + "skip_get_url": true, + "url": "https://www.midandeastantrim.gov.uk/resident/waste-recycling/collection-dates/", + "web_driver": "http://selenium:4444", + "wiki_name": "Mid and East Antrim Borough Council", + "wiki_note": "Pass the house name/number plus the name of the street with the postcode parameter, wrapped in double quotes. Check the address in the web site first. This version will only pick the first SHOW button returned by the search or if it is fully unique. The search is not very predictable (e.g. house number 4 returns 14,24,4,44 etc.)." + }, "MidSussexDistrictCouncil": { "house_number": "OAKLANDS", "postcode": "RH16 1SS", @@ -668,4 +676,4 @@ "url": "https://waste-api.york.gov.uk/api/Collections/GetBinCollectionDataForUprn/", "wiki_name": "York Council" } -} \ No newline at end of file +} From 710c60a40fb15ab5497c8e1f341751f89e80ca4a Mon Sep 17 00:00:00 2001 From: PhilDB-cloud <55487883+PhilDB-cloud@users.noreply.github.com> Date: Wed, 15 Nov 2023 20:06:27 +0000 Subject: [PATCH 08/11] Update MidAndEastAntrimBoroughCouncil.py remove paon line as only use postcode but it needs to contain the address --- .../councils/MidAndEastAntrimBoroughCouncil.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/uk_bin_collection/uk_bin_collection/councils/MidAndEastAntrimBoroughCouncil.py b/uk_bin_collection/uk_bin_collection/councils/MidAndEastAntrimBoroughCouncil.py index 9b03dac964..8c61bbccd2 100644 --- a/uk_bin_collection/uk_bin_collection/councils/MidAndEastAntrimBoroughCouncil.py +++ b/uk_bin_collection/uk_bin_collection/councils/MidAndEastAntrimBoroughCouncil.py @@ -22,7 +22,7 @@ def parse_data(self, page: str, **kwargs) -> dict: # Assign user info user_postcode = kwargs.get("postcode") - user_paon = kwargs.get("paon") + # not used: user_paon = kwargs.get("paon") web_driver = kwargs.get("web_driver") # Create Selenium webdriver From abc099166389c6fc96ac46e4de7b466fe8a4607d Mon Sep 17 00:00:00 2001 From: PhilDB-cloud <55487883+PhilDB-cloud@users.noreply.github.com> Date: Tue, 21 Nov 2023 18:04:59 +0000 Subject: [PATCH 09/11] feat: MidAndEastAntrimBoroughCouncil.py From def85079d0c301287a1c16db551f54b7c915662f Mon Sep 17 00:00:00 2001 From: PhilDB-cloud <55487883+PhilDB-cloud@users.noreply.github.com> Date: Tue, 21 Nov 2023 18:08:09 +0000 Subject: [PATCH 10/11] feat: Add Mid and east antrim council From 8a71e5422d9c6567ba291d51847f82595c7267b4 Mon Sep 17 00:00:00 2001 From: PhilDB-cloud <55487883+PhilDB-cloud@users.noreply.github.com> Date: Tue, 21 Nov 2023 18:43:25 +0000 Subject: [PATCH 11/11] feat: Add Mid and east antrim council - validate_council_outputs.feature