Merge pull request #1144 from m26dvd/master

robbrad · Jan 16, 2025 · e6031a5 · e6031a5
2 parents 77789a9 + c43d47b
commit e6031a5
Show file tree

Hide file tree

Showing 12 changed files with 125 additions and 78 deletions.
diff --git a/uk_bin_collection/tests/input.json b/uk_bin_collection/tests/input.json
@@ -1208,8 +1208,8 @@
         "wiki_note": "Pass the postcode and UPRN. This parser requires a Selenium webdriver."
     },
     "NewarkAndSherwoodDC": {
-        "url": "http://app.newark-sherwooddc.gov.uk/bincollection/calendar?pid=200004258529&nc=1",
-        "wiki_command_url_override": "http://app.newark-sherwooddc.gov.uk/bincollection/calendar?pid=XXXXXXXX&nc=1",
+        "url": "http://app.newark-sherwooddc.gov.uk/bincollection/calendar?pid=200004258529",
+        "wiki_command_url_override": "http://app.newark-sherwooddc.gov.uk/bincollection/calendar?pid=XXXXXXXX",
         "wiki_name": "Newark and Sherwood District Council",
         "wiki_note": "Replace XXXXXXXX with your UPRN."
     },
@@ -1409,15 +1409,6 @@
         "wiki_name": "Portsmouth City Council",
         "wiki_note": "Pass the postcode and UPRN. This parser requires a Selenium webdriver."
     },
-    "PowysCouncil": {
-        "house_number": "LANE COTTAGE",
-        "postcode": "HR3 5JS",
-        "skip_get_url": true,
-        "url": "https://www.powys.gov.uk",
-        "web_driver": "http://selenium:4444",
-        "wiki_name": "Powys Council",
-        "wiki_note": "Pass the house name/number and postcode in their respective parameters. This parser requires a Selenium webdriver."
-    },
     "PowysCouncil": {
         "house_number": "LANE COTTAGE",
         "postcode": "HR3 5JS",
@@ -1768,7 +1759,7 @@
         "wiki_name": "Sunderland City Council",
         "wiki_note": "Provide your house number (without quotes) and postcode (wrapped in double quotes with a space)."
     },
-      "SurreyHeathBoroughCouncil": {
+    "SurreyHeathBoroughCouncil": {
         "house_number": "36",
         "postcode": "GU20 6PN",
         "skip_get_url": true,

diff --git a/uk_bin_collection/uk_bin_collection/councils/CarmarthenshireCountyCouncil.py b/uk_bin_collection/uk_bin_collection/councils/CarmarthenshireCountyCouncil.py
@@ -34,10 +34,10 @@ def parse_data(self, page: str, **kwargs) -> dict:
 
             # Find the next collection date
             date_tag = container.find(class_="font11 text-center")
-            if date_tag.text.strip() == "":
-                continue
-            else:
+            if date_tag:
                 collection_date = date_tag.text.strip()
+            else:
+                continue
 
             dict_data = {
                 "type": bin_type,

diff --git a/uk_bin_collection/uk_bin_collection/councils/GlasgowCityCouncil.py b/uk_bin_collection/uk_bin_collection/councils/GlasgowCityCouncil.py
@@ -1,3 +1,5 @@
+import datetime
+
 from bs4 import BeautifulSoup
 
 from uk_bin_collection.uk_bin_collection.common import *
@@ -32,15 +34,24 @@ def parse_data(self, page: str, **kwargs) -> dict:
         for p in ps:
             collection = p.text.strip().replace("Your next ", "").split(".")[0]
             bin_type = collection.split(" day is")[0]
-            collection_date = datetime.strptime(
-                remove_ordinal_indicator_from_date_string(collection).split("day is ")[
-                    1
-                ],
-                "%A %d %B %Y",
-            )
+            collection_date = remove_ordinal_indicator_from_date_string(
+                collection
+            ).split("day is ")[1]
+            if collection_date == "Today":
+                collection_date = datetime.today().strftime(date_format)
+            elif collection_date == "Tomorrow":
+                collection_date = (datetime.today() + timedelta(days=1)).strftime(
+                    date_format
+                )
+                print(collection_date)
+            else:
+                collection_date = datetime.strptime(
+                    collection_date,
+                    "%A %d %B %Y",
+                ).strftime(date_format)
             dict_data = {
                 "type": bin_type,
-                "collectionDate": collection_date.strftime(date_format),
+                "collectionDate": collection_date,
             }
             data["bins"].append(dict_data)
 

diff --git a/uk_bin_collection/uk_bin_collection/councils/MertonCouncil.py b/uk_bin_collection/uk_bin_collection/councils/MertonCouncil.py
@@ -47,10 +47,10 @@ def parse_data(self, page: str, **kwargs) -> dict:
             bin_type = cells[0].get_text().strip()
             # Date is on the second cell, second paragraph, wrapped in p
             collectionDate = None
-            for date_format in possible_formats:
+            for format in possible_formats:
                 try:
                     collectionDate = datetime.strptime(
-                        cells[1].select("p > b")[2].get_text(strip=True), date_format
+                        cells[1].select("p > b")[2].get_text(strip=True), format
                     )
                     break  # Exit the loop if parsing is successful
                 except ValueError:

diff --git a/uk_bin_collection/uk_bin_collection/councils/PowysCouncil.py b/uk_bin_collection/uk_bin_collection/councils/PowysCouncil.py
@@ -127,12 +127,15 @@ def parse_data(self, page: str, **kwargs) -> dict:
             li.text for li in garden_waste_section.find_next("ul").find_all("li")
         ]
         for date in garden_waste_dates:
-            dict_data = {
-                "type": "Garden Waste",
-                "collectionDate": datetime.strptime(
-                    remove_ordinal_indicator_from_date_string(date), "%d %B %Y"
-                ).strftime(date_format),
-            }
-            data["bins"].append(dict_data)
+            try:
+                dict_data = {
+                    "type": "Garden Waste",
+                    "collectionDate": datetime.strptime(
+                        remove_ordinal_indicator_from_date_string(date), "%d %B %Y"
+                    ).strftime(date_format),
+                }
+                data["bins"].append(dict_data)
+            except:
+                continue
 
         return data
diff --git a/uk_bin_collection/uk_bin_collection/councils/SouthOxfordshireCouncil.py b/uk_bin_collection/uk_bin_collection/councils/SouthOxfordshireCouncil.py
@@ -1,5 +1,6 @@
 import requests
 from bs4 import BeautifulSoup
+
 from uk_bin_collection.uk_bin_collection.common import *
 from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass
 
@@ -54,6 +55,9 @@ def parse_data(self, page: str, **kwargs) -> dict:
 
         data = {"bins": []}
 
+        current_year = datetime.now().year
+        next_year = current_year + 1
+
         # Page has slider info side by side, which are two instances of this class
         for bin in soup.find_all("div", {"class": "binextra"}):
             bin_info = list(bin.stripped_strings)
@@ -62,27 +66,32 @@ def parse_data(self, page: str, **kwargs) -> dict:
                 if contains_date(bin_info[0]):
                     bin_date = get_next_occurrence_from_day_month(
                         datetime.strptime(
-                            bin_info[0] + " " + datetime.today().strftime("%Y"),
-                            "%A %d %B - %Y",
+                            bin_info[0],
+                            "%A %d %B -",
                         )
-                    ).strftime(date_format)
-                    bin_type = str.capitalize(' '.join(bin_info[1:]))
+                    )
+                    bin_type = str.capitalize(" ".join(bin_info[1:]))
                 # On exceptional collection schedule (e.g. around English Bank Holidays), date will be contained in the second stripped string
                 else:
                     bin_date = get_next_occurrence_from_day_month(
                         datetime.strptime(
-                            bin_info[1] + " " + datetime.today().strftime("%Y"),
-                            "%A %d %B - %Y",
+                            bin_info[1],
+                            "%A %d %B -",
                         )
-                    ).strftime(date_format)
-                    str.capitalize(' '.join(bin_info[2:]))
-            except Exception as ex:
-                raise ValueError(f"Error parsing bin data: {ex}")
+                    )
+                    str.capitalize(" ".join(bin_info[2:]))
+            except:
+                continue
+
+            if (datetime.now().month == 12) and (bin_date.month == 1):
+                bin_date = bin_date.replace(year=next_year)
+            else:
+                bin_date = bin_date.replace(year=current_year)
 
             # Build data dict for each entry
             dict_data = {
                 "type": bin_type,
-                "collectionDate": bin_date,
+                "collectionDate": bin_date.strftime(date_format),
             }
             data["bins"].append(dict_data)
 

diff --git a/uk_bin_collection/uk_bin_collection/councils/StaffordshireMoorlandsDistrictCouncil.py b/uk_bin_collection/uk_bin_collection/councils/StaffordshireMoorlandsDistrictCouncil.py
@@ -30,12 +30,12 @@ def parse_data(self, page: str, **kwargs) -> dict:
         driver.get("https://www.staffsmoorlands.gov.uk/findyourbinday")
 
         # Close cookies banner
-        cookieAccept = WebDriverWait(driver, 10).until(
-            EC.presence_of_element_located(
-                (By.CSS_SELECTOR, ".cookiemessage__link--close")
-            )
-        )
-        cookieAccept.click()
+        # cookieAccept = WebDriverWait(driver, 10).until(
+        #    EC.presence_of_element_located(
+        #        (By.CSS_SELECTOR, ".cookiemessage__link--close")
+        #    )
+        # )
+        # cookieAccept.click()
 
         # Wait for the postcode field to appear then populate it
         inputElement_postcode = WebDriverWait(driver, 30).until(

diff --git a/uk_bin_collection/uk_bin_collection/councils/StroudDistrictCouncil.py b/uk_bin_collection/uk_bin_collection/councils/StroudDistrictCouncil.py
@@ -1,8 +1,10 @@
+from datetime import datetime, timedelta
+from typing import Any, Dict
+
 from bs4 import BeautifulSoup
+
 from uk_bin_collection.uk_bin_collection.common import *
 from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass
-from datetime import datetime, timedelta
-from typing import Dict, Any
 
 
 class CouncilClass(AbstractGetBinDataClass):
@@ -78,7 +80,10 @@ def parse_data(self, page: Any, **kwargs: Any) -> Dict[str, Any]:
                     formatted_date = self.get_next_weekday(day_name)
                 else:
                     # Convert date format from "Tuesday 28 May 2024" to "28/05/2024"
-                    date_obj = datetime.strptime(value, "%A %d %B %Y")
+                    try:
+                        date_obj = datetime.strptime(value, "%A %d %B %Y")
+                    except:
+                        continue
                     formatted_date = date_obj.strftime("%d/%m/%Y")
 
                 bin_entry = {

diff --git a/uk_bin_collection/uk_bin_collection/councils/ValeofGlamorganCouncil.py b/uk_bin_collection/uk_bin_collection/councils/ValeofGlamorganCouncil.py
@@ -1,4 +1,5 @@
 from bs4 import BeautifulSoup
+
 from uk_bin_collection.uk_bin_collection.common import *
 from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass
 
@@ -31,12 +32,12 @@ def parse_data(self, page: str, **kwargs) -> dict:
         params = {
             "RequestType": "LocalInfo",
             "ms": "ValeOfGlamorgan/AllMaps",
-            "group": "Community and Living|Refuse HIDE2",
-            "type": "json",
+            "group": "Waste|new_refuse",
+            "type": "jsonp",
             "callback": "AddressInfoCallback",
             "uid": user_uprn,
-            "import": "jQuery35108514154283927682_1673022974838",
-            "_": "1673022974840",
+            "import": "jQuery35107288886041176057_1736292844067",
+            "_": "1736292844068",
         }
 
         # Get a response from the council
@@ -46,13 +47,15 @@ def parse_data(self, page: str, **kwargs) -> dict:
             headers=headers,
         ).text
 
+        response = response.replace("AddressInfoCallback(", "").rstrip(");")
+
         # Load the JSON and seek out the bin week text, then add it to the calendar URL. Also take the weekly
         # collection type and generate dates for it. Then make a GET request for the calendar
         bin_week = str(
-            json.loads(response)["Results"]["Refuse_HIDE2"]["Your_Refuse_round_is"]
+            json.loads(response)["Results"]["waste"]["roundday_residual"]
         ).replace(" ", "-")
         weekly_collection = str(
-            json.loads(response)["Results"]["Refuse_HIDE2"]["Recycling__type"]
+            json.loads(response)["Results"]["waste"]["recycling_code"]
         ).capitalize()
         weekly_dates = get_weekday_dates_in_period(
             datetime.now(), days_of_week.get(bin_week.split("-")[0].strip()), amount=48

diff --git a/uk_bin_collection/uk_bin_collection/councils/ValeofWhiteHorseCouncil.py b/uk_bin_collection/uk_bin_collection/councils/ValeofWhiteHorseCouncil.py
@@ -53,6 +53,9 @@ def parse_data(self, page: str, **kwargs) -> dict:
 
         data = {"bins": []}
 
+        current_year = datetime.now().year
+        next_year = current_year + 1
+
         # Page has slider info side by side, which are two instances of this class
         for bin in soup.find_all("div", {"class": "bintxt"}):
             try:
@@ -74,23 +77,31 @@ def parse_data(self, page: str, **kwargs) -> dict:
                 if contains_date(bin_date_info[0]):
                     bin_date = get_next_occurrence_from_day_month(
                         datetime.strptime(
-                            bin_date_info[0] + " " + datetime.today().strftime("%Y"),
-                            "%A %d %B - %Y",
+                            bin_date_info[0],
+                            "%A %d %B -",
                         )
-                    ).strftime(date_format)
+                    )
                 # On exceptional collection schedule (e.g. around English Bank Holidays), date will be contained in the second stripped string
                 else:
                     bin_date = get_next_occurrence_from_day_month(
                         datetime.strptime(
-                            bin_date_info[1] + " " + datetime.today().strftime("%Y"),
-                            "%A %d %B - %Y",
+                            bin_date_info[1],
+                            "%A %d %B -",
                         )
-                    ).strftime(date_format)
+                    )
             except Exception as ex:
                 raise ValueError(f"Error parsing bin data: {ex}")
 
+            if (datetime.now().month == 12) and (bin_date.month == 1):
+                bin_date = bin_date.replace(year=next_year)
+            else:
+                bin_date = bin_date.replace(year=current_year)
+
             # Build data dict for each entry
-            dict_data = {"type": bin_type, "collectionDate": bin_date}
+            dict_data = {
+                "type": bin_type,
+                "collectionDate": bin_date.strftime(date_format),
+            }
             data["bins"].append(dict_data)
 
         data["bins"].sort(

diff --git a/uk_bin_collection/uk_bin_collection/councils/WestOxfordshireDistrictCouncil.py b/uk_bin_collection/uk_bin_collection/councils/WestOxfordshireDistrictCouncil.py
@@ -42,22 +42,22 @@ def parse_data(self, page: str, **kwargs) -> dict:
             wait = WebDriverWait(driver, 60)
             address_entry_field = wait.until(
                 EC.presence_of_element_located(
-                    (By.XPATH, '//*[@id="combobox-input-19"]')
+                    (By.XPATH, '//*[@id="combobox-input-20"]')
                 )
             )
 
             address_entry_field.send_keys(str(full_address))
 
             address_entry_field = wait.until(
-                EC.element_to_be_clickable((By.XPATH, '//*[@id="combobox-input-19"]'))
+                EC.element_to_be_clickable((By.XPATH, '//*[@id="combobox-input-20"]'))
             )
             address_entry_field.click()
             address_entry_field.send_keys(Keys.BACKSPACE)
             address_entry_field.send_keys(str(full_address[len(full_address) - 1]))
 
             first_found_address = wait.until(
                 EC.element_to_be_clickable(
-                    (By.XPATH, '//*[@id="dropdown-element-19"]/ul')
+                    (By.XPATH, '//*[@id="dropdown-element-20"]/ul')
                 )
             )