Merge pull request #1198 from geekball/feat-herefordshire-council

feat: Add Herefordshire Council (closes: #1023)
robbrad · Jan 29, 2025 · ef44aad · ef44aad
2 parents a9f29b5 + 447565e
commit ef44aad
Show file tree

Hide file tree

Showing 2 changed files with 59 additions and 0 deletions.
diff --git a/uk_bin_collection/tests/input.json b/uk_bin_collection/tests/input.json
@@ -882,6 +882,12 @@
         "wiki_name": "Hartlepool Borough Council",
         "wiki_note": "You will need to use [FindMyAddress](https://www.findmyaddress.co.uk/search) to find your UPRN."
     },
+    "HerefordshireCouncil": {
+        "url": "https://www.herefordshire.gov.uk/rubbish-recycling/check-bin-collection-day?blpu_uprn=10096232662",
+        "wiki_command_url_override": "https://www.herefordshire.gov.uk/rubbish-recycling/check-bin-collection-day?blpu_uprn=XXXXXXXXXXXX",
+        "wiki_name": "Herefordshire Council",
+        "wiki_note": "Replace 'XXXXXXXXXX' with your property's UPRN. You can find it using [FindMyAddress](https://www.findmyaddress.co.uk/search)."
+    },
     "HertsmereBoroughCouncil": {
         "house_number": "1",
         "postcode": "WD7 9HZ",

diff --git a/uk_bin_collection/uk_bin_collection/councils/HerefordshireCouncil.py b/uk_bin_collection/uk_bin_collection/councils/HerefordshireCouncil.py
@@ -0,0 +1,53 @@
+import logging
+
+from bs4 import BeautifulSoup
+
+from uk_bin_collection.uk_bin_collection.common import *
+from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass
+
+
+# import the wonderful Beautiful Soup and the URL grabber
+class CouncilClass(AbstractGetBinDataClass):
+    """
+    Concrete classes have to implement all abstract operations of the
+    base class. They can also override some operations with a default
+    implementation.
+    """
+
+    def parse_data(self, page: str, **kwargs) -> dict:
+        # Make a BS4 object
+        soup = BeautifulSoup(page.text, features="html.parser")
+        soup.prettify()
+
+        data = {"bins": []}
+
+        checkValid = soup.find("p", id="selectedAddressResult")
+        if checkValid is None:
+            raise ValueError("Address/UPRN not found")
+
+        collections = soup.find("div", id="wasteCollectionDates")
+
+        for bins in collections.select('div[class*="hc-island"]'):
+            bin_type = bins.h4.get_text(strip=True)
+
+            # Last div.hc-island is the calendar link, skip it
+            if bin_type == "Calendar":
+                continue
+
+            # Next collection date is in a span under the second p.hc-no-margin of the div.
+            bin_collection = re.search(
+                r"(.*) \(.*\)", bins.select("div > p > span")[0].get_text(strip=True)
+            ).group(1)
+            if bin_collection:
+                logging.info(
+                    f"Bin type: {bin_type} - Collection date: {bin_collection}"
+                )
+                dict_data = {
+                    "type": bin_type,
+                    "collectionDate": datetime.strptime(
+                        bin_collection, "%A %d %B %Y"
+                    ).strftime(date_format),
+                }
+                data["bins"].append(dict_data)
+
+        return data