diff --git a/uk_bin_collection/tests/input.json b/uk_bin_collection/tests/input.json index d124a0bfcf..07b2c1bd19 100755 --- a/uk_bin_collection/tests/input.json +++ b/uk_bin_collection/tests/input.json @@ -1208,8 +1208,8 @@ "wiki_note": "Pass the postcode and UPRN. This parser requires a Selenium webdriver." }, "NewarkAndSherwoodDC": { - "url": "http://app.newark-sherwooddc.gov.uk/bincollection/calendar?pid=200004258529&nc=1", - "wiki_command_url_override": "http://app.newark-sherwooddc.gov.uk/bincollection/calendar?pid=XXXXXXXX&nc=1", + "url": "http://app.newark-sherwooddc.gov.uk/bincollection/calendar?pid=200004258529", + "wiki_command_url_override": "http://app.newark-sherwooddc.gov.uk/bincollection/calendar?pid=XXXXXXXX", "wiki_name": "Newark and Sherwood District Council", "wiki_note": "Replace XXXXXXXX with your UPRN." }, @@ -1409,15 +1409,6 @@ "wiki_name": "Portsmouth City Council", "wiki_note": "Pass the postcode and UPRN. This parser requires a Selenium webdriver." }, - "PowysCouncil": { - "house_number": "LANE COTTAGE", - "postcode": "HR3 5JS", - "skip_get_url": true, - "url": "https://www.powys.gov.uk", - "web_driver": "http://selenium:4444", - "wiki_name": "Powys Council", - "wiki_note": "Pass the house name/number and postcode in their respective parameters. This parser requires a Selenium webdriver." - }, "PowysCouncil": { "house_number": "LANE COTTAGE", "postcode": "HR3 5JS", @@ -1768,7 +1759,7 @@ "wiki_name": "Sunderland City Council", "wiki_note": "Provide your house number (without quotes) and postcode (wrapped in double quotes with a space)." }, - "SurreyHeathBoroughCouncil": { + "SurreyHeathBoroughCouncil": { "house_number": "36", "postcode": "GU20 6PN", "skip_get_url": true, diff --git a/uk_bin_collection/uk_bin_collection/councils/CarmarthenshireCountyCouncil.py b/uk_bin_collection/uk_bin_collection/councils/CarmarthenshireCountyCouncil.py index 9bedb1ca6b..22814d82ad 100644 --- a/uk_bin_collection/uk_bin_collection/councils/CarmarthenshireCountyCouncil.py +++ b/uk_bin_collection/uk_bin_collection/councils/CarmarthenshireCountyCouncil.py @@ -34,10 +34,10 @@ def parse_data(self, page: str, **kwargs) -> dict: # Find the next collection date date_tag = container.find(class_="font11 text-center") - if date_tag.text.strip() == "": - continue - else: + if date_tag: collection_date = date_tag.text.strip() + else: + continue dict_data = { "type": bin_type, diff --git a/uk_bin_collection/uk_bin_collection/councils/GlasgowCityCouncil.py b/uk_bin_collection/uk_bin_collection/councils/GlasgowCityCouncil.py index 4c0eaaa78d..559b9bdcc6 100644 --- a/uk_bin_collection/uk_bin_collection/councils/GlasgowCityCouncil.py +++ b/uk_bin_collection/uk_bin_collection/councils/GlasgowCityCouncil.py @@ -1,3 +1,5 @@ +import datetime + from bs4 import BeautifulSoup from uk_bin_collection.uk_bin_collection.common import * @@ -32,15 +34,24 @@ def parse_data(self, page: str, **kwargs) -> dict: for p in ps: collection = p.text.strip().replace("Your next ", "").split(".")[0] bin_type = collection.split(" day is")[0] - collection_date = datetime.strptime( - remove_ordinal_indicator_from_date_string(collection).split("day is ")[ - 1 - ], - "%A %d %B %Y", - ) + collection_date = remove_ordinal_indicator_from_date_string( + collection + ).split("day is ")[1] + if collection_date == "Today": + collection_date = datetime.today().strftime(date_format) + elif collection_date == "Tomorrow": + collection_date = (datetime.today() + timedelta(days=1)).strftime( + date_format + ) + print(collection_date) + else: + collection_date = datetime.strptime( + collection_date, + "%A %d %B %Y", + ).strftime(date_format) dict_data = { "type": bin_type, - "collectionDate": collection_date.strftime(date_format), + "collectionDate": collection_date, } data["bins"].append(dict_data) diff --git a/uk_bin_collection/uk_bin_collection/councils/MertonCouncil.py b/uk_bin_collection/uk_bin_collection/councils/MertonCouncil.py index 0912ce3aee..e84066b009 100644 --- a/uk_bin_collection/uk_bin_collection/councils/MertonCouncil.py +++ b/uk_bin_collection/uk_bin_collection/councils/MertonCouncil.py @@ -47,10 +47,10 @@ def parse_data(self, page: str, **kwargs) -> dict: bin_type = cells[0].get_text().strip() # Date is on the second cell, second paragraph, wrapped in p collectionDate = None - for date_format in possible_formats: + for format in possible_formats: try: collectionDate = datetime.strptime( - cells[1].select("p > b")[2].get_text(strip=True), date_format + cells[1].select("p > b")[2].get_text(strip=True), format ) break # Exit the loop if parsing is successful except ValueError: diff --git a/uk_bin_collection/uk_bin_collection/councils/PowysCouncil.py b/uk_bin_collection/uk_bin_collection/councils/PowysCouncil.py index b5980279cd..d5c68dc97b 100644 --- a/uk_bin_collection/uk_bin_collection/councils/PowysCouncil.py +++ b/uk_bin_collection/uk_bin_collection/councils/PowysCouncil.py @@ -127,12 +127,15 @@ def parse_data(self, page: str, **kwargs) -> dict: li.text for li in garden_waste_section.find_next("ul").find_all("li") ] for date in garden_waste_dates: - dict_data = { - "type": "Garden Waste", - "collectionDate": datetime.strptime( - remove_ordinal_indicator_from_date_string(date), "%d %B %Y" - ).strftime(date_format), - } - data["bins"].append(dict_data) + try: + dict_data = { + "type": "Garden Waste", + "collectionDate": datetime.strptime( + remove_ordinal_indicator_from_date_string(date), "%d %B %Y" + ).strftime(date_format), + } + data["bins"].append(dict_data) + except: + continue return data diff --git a/uk_bin_collection/uk_bin_collection/councils/SouthOxfordshireCouncil.py b/uk_bin_collection/uk_bin_collection/councils/SouthOxfordshireCouncil.py index 3d368616be..268b89aa33 100644 --- a/uk_bin_collection/uk_bin_collection/councils/SouthOxfordshireCouncil.py +++ b/uk_bin_collection/uk_bin_collection/councils/SouthOxfordshireCouncil.py @@ -1,5 +1,6 @@ import requests from bs4 import BeautifulSoup + from uk_bin_collection.uk_bin_collection.common import * from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass @@ -54,6 +55,9 @@ def parse_data(self, page: str, **kwargs) -> dict: data = {"bins": []} + current_year = datetime.now().year + next_year = current_year + 1 + # Page has slider info side by side, which are two instances of this class for bin in soup.find_all("div", {"class": "binextra"}): bin_info = list(bin.stripped_strings) @@ -62,27 +66,32 @@ def parse_data(self, page: str, **kwargs) -> dict: if contains_date(bin_info[0]): bin_date = get_next_occurrence_from_day_month( datetime.strptime( - bin_info[0] + " " + datetime.today().strftime("%Y"), - "%A %d %B - %Y", + bin_info[0], + "%A %d %B -", ) - ).strftime(date_format) - bin_type = str.capitalize(' '.join(bin_info[1:])) + ) + bin_type = str.capitalize(" ".join(bin_info[1:])) # On exceptional collection schedule (e.g. around English Bank Holidays), date will be contained in the second stripped string else: bin_date = get_next_occurrence_from_day_month( datetime.strptime( - bin_info[1] + " " + datetime.today().strftime("%Y"), - "%A %d %B - %Y", + bin_info[1], + "%A %d %B -", ) - ).strftime(date_format) - str.capitalize(' '.join(bin_info[2:])) - except Exception as ex: - raise ValueError(f"Error parsing bin data: {ex}") + ) + str.capitalize(" ".join(bin_info[2:])) + except: + continue + + if (datetime.now().month == 12) and (bin_date.month == 1): + bin_date = bin_date.replace(year=next_year) + else: + bin_date = bin_date.replace(year=current_year) # Build data dict for each entry dict_data = { "type": bin_type, - "collectionDate": bin_date, + "collectionDate": bin_date.strftime(date_format), } data["bins"].append(dict_data) diff --git a/uk_bin_collection/uk_bin_collection/councils/StaffordshireMoorlandsDistrictCouncil.py b/uk_bin_collection/uk_bin_collection/councils/StaffordshireMoorlandsDistrictCouncil.py index 6b36d8c280..89c0d4c11e 100644 --- a/uk_bin_collection/uk_bin_collection/councils/StaffordshireMoorlandsDistrictCouncil.py +++ b/uk_bin_collection/uk_bin_collection/councils/StaffordshireMoorlandsDistrictCouncil.py @@ -30,12 +30,12 @@ def parse_data(self, page: str, **kwargs) -> dict: driver.get("https://www.staffsmoorlands.gov.uk/findyourbinday") # Close cookies banner - cookieAccept = WebDriverWait(driver, 10).until( - EC.presence_of_element_located( - (By.CSS_SELECTOR, ".cookiemessage__link--close") - ) - ) - cookieAccept.click() + # cookieAccept = WebDriverWait(driver, 10).until( + # EC.presence_of_element_located( + # (By.CSS_SELECTOR, ".cookiemessage__link--close") + # ) + # ) + # cookieAccept.click() # Wait for the postcode field to appear then populate it inputElement_postcode = WebDriverWait(driver, 30).until( diff --git a/uk_bin_collection/uk_bin_collection/councils/StroudDistrictCouncil.py b/uk_bin_collection/uk_bin_collection/councils/StroudDistrictCouncil.py index 480f1bc5fa..6d580311a2 100644 --- a/uk_bin_collection/uk_bin_collection/councils/StroudDistrictCouncil.py +++ b/uk_bin_collection/uk_bin_collection/councils/StroudDistrictCouncil.py @@ -1,8 +1,10 @@ +from datetime import datetime, timedelta +from typing import Any, Dict + from bs4 import BeautifulSoup + from uk_bin_collection.uk_bin_collection.common import * from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass -from datetime import datetime, timedelta -from typing import Dict, Any class CouncilClass(AbstractGetBinDataClass): @@ -78,7 +80,10 @@ def parse_data(self, page: Any, **kwargs: Any) -> Dict[str, Any]: formatted_date = self.get_next_weekday(day_name) else: # Convert date format from "Tuesday 28 May 2024" to "28/05/2024" - date_obj = datetime.strptime(value, "%A %d %B %Y") + try: + date_obj = datetime.strptime(value, "%A %d %B %Y") + except: + continue formatted_date = date_obj.strftime("%d/%m/%Y") bin_entry = { diff --git a/uk_bin_collection/uk_bin_collection/councils/ValeofGlamorganCouncil.py b/uk_bin_collection/uk_bin_collection/councils/ValeofGlamorganCouncil.py index 876e0b4a76..c5bdd48e5c 100644 --- a/uk_bin_collection/uk_bin_collection/councils/ValeofGlamorganCouncil.py +++ b/uk_bin_collection/uk_bin_collection/councils/ValeofGlamorganCouncil.py @@ -1,4 +1,5 @@ from bs4 import BeautifulSoup + from uk_bin_collection.uk_bin_collection.common import * from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass @@ -31,12 +32,12 @@ def parse_data(self, page: str, **kwargs) -> dict: params = { "RequestType": "LocalInfo", "ms": "ValeOfGlamorgan/AllMaps", - "group": "Community and Living|Refuse HIDE2", - "type": "json", + "group": "Waste|new_refuse", + "type": "jsonp", "callback": "AddressInfoCallback", "uid": user_uprn, - "import": "jQuery35108514154283927682_1673022974838", - "_": "1673022974840", + "import": "jQuery35107288886041176057_1736292844067", + "_": "1736292844068", } # Get a response from the council @@ -46,13 +47,15 @@ def parse_data(self, page: str, **kwargs) -> dict: headers=headers, ).text + response = response.replace("AddressInfoCallback(", "").rstrip(");") + # Load the JSON and seek out the bin week text, then add it to the calendar URL. Also take the weekly # collection type and generate dates for it. Then make a GET request for the calendar bin_week = str( - json.loads(response)["Results"]["Refuse_HIDE2"]["Your_Refuse_round_is"] + json.loads(response)["Results"]["waste"]["roundday_residual"] ).replace(" ", "-") weekly_collection = str( - json.loads(response)["Results"]["Refuse_HIDE2"]["Recycling__type"] + json.loads(response)["Results"]["waste"]["recycling_code"] ).capitalize() weekly_dates = get_weekday_dates_in_period( datetime.now(), days_of_week.get(bin_week.split("-")[0].strip()), amount=48 diff --git a/uk_bin_collection/uk_bin_collection/councils/ValeofWhiteHorseCouncil.py b/uk_bin_collection/uk_bin_collection/councils/ValeofWhiteHorseCouncil.py index 3f9f73a35c..86df3218f7 100644 --- a/uk_bin_collection/uk_bin_collection/councils/ValeofWhiteHorseCouncil.py +++ b/uk_bin_collection/uk_bin_collection/councils/ValeofWhiteHorseCouncil.py @@ -53,6 +53,9 @@ def parse_data(self, page: str, **kwargs) -> dict: data = {"bins": []} + current_year = datetime.now().year + next_year = current_year + 1 + # Page has slider info side by side, which are two instances of this class for bin in soup.find_all("div", {"class": "bintxt"}): try: @@ -74,23 +77,31 @@ def parse_data(self, page: str, **kwargs) -> dict: if contains_date(bin_date_info[0]): bin_date = get_next_occurrence_from_day_month( datetime.strptime( - bin_date_info[0] + " " + datetime.today().strftime("%Y"), - "%A %d %B - %Y", + bin_date_info[0], + "%A %d %B -", ) - ).strftime(date_format) + ) # On exceptional collection schedule (e.g. around English Bank Holidays), date will be contained in the second stripped string else: bin_date = get_next_occurrence_from_day_month( datetime.strptime( - bin_date_info[1] + " " + datetime.today().strftime("%Y"), - "%A %d %B - %Y", + bin_date_info[1], + "%A %d %B -", ) - ).strftime(date_format) + ) except Exception as ex: raise ValueError(f"Error parsing bin data: {ex}") + if (datetime.now().month == 12) and (bin_date.month == 1): + bin_date = bin_date.replace(year=next_year) + else: + bin_date = bin_date.replace(year=current_year) + # Build data dict for each entry - dict_data = {"type": bin_type, "collectionDate": bin_date} + dict_data = { + "type": bin_type, + "collectionDate": bin_date.strftime(date_format), + } data["bins"].append(dict_data) data["bins"].sort( diff --git a/uk_bin_collection/uk_bin_collection/councils/WestOxfordshireDistrictCouncil.py b/uk_bin_collection/uk_bin_collection/councils/WestOxfordshireDistrictCouncil.py index 99b39edbd7..7a18c76448 100644 --- a/uk_bin_collection/uk_bin_collection/councils/WestOxfordshireDistrictCouncil.py +++ b/uk_bin_collection/uk_bin_collection/councils/WestOxfordshireDistrictCouncil.py @@ -42,14 +42,14 @@ def parse_data(self, page: str, **kwargs) -> dict: wait = WebDriverWait(driver, 60) address_entry_field = wait.until( EC.presence_of_element_located( - (By.XPATH, '//*[@id="combobox-input-19"]') + (By.XPATH, '//*[@id="combobox-input-20"]') ) ) address_entry_field.send_keys(str(full_address)) address_entry_field = wait.until( - EC.element_to_be_clickable((By.XPATH, '//*[@id="combobox-input-19"]')) + EC.element_to_be_clickable((By.XPATH, '//*[@id="combobox-input-20"]')) ) address_entry_field.click() address_entry_field.send_keys(Keys.BACKSPACE) @@ -57,7 +57,7 @@ def parse_data(self, page: str, **kwargs) -> dict: first_found_address = wait.until( EC.element_to_be_clickable( - (By.XPATH, '//*[@id="dropdown-element-19"]/ul') + (By.XPATH, '//*[@id="dropdown-element-20"]/ul') ) ) diff --git a/wiki/Councils.md b/wiki/Councils.md index 494a9a08d7..af3a0a17fe 100644 --- a/wiki/Councils.md +++ b/wiki/Councils.md @@ -69,8 +69,8 @@ This document is still a work in progress, don't worry if your council isn't lis - [Conwy County Borough Council](#conwy-county-borough-council) - [Copeland Borough Council](#copeland-borough-council) - [Cornwall Council](#cornwall-council) -- [Coventry City Council](#coventry-city-council) - [Cotswold District Council](#cotswold-district-council) +- [Coventry City Council](#coventry-city-council) - [Crawley Borough Council](#crawley-borough-council) - [Croydon Council](#croydon-council) - [Cumberland Borough Council](#cumberland-borough-council) @@ -242,6 +242,7 @@ This document is still a work in progress, don't worry if your council isn't lis - [Stratford Upon Avon Council](#stratford-upon-avon-council) - [Stroud District Council](#stroud-district-council) - [Sunderland City Council](#sunderland-city-council) +- [Surrey Heath Borough Council / Joint Waste Solutions](#surrey-heath-borough-council-/-joint-waste-solutions) - [Swale Borough Council](#swale-borough-council) - [Swansea Council](#swansea-council) - [Swindon Borough Council](#swindon-borough-council) @@ -1019,15 +1020,6 @@ Note: Use [FindMyAddress](https://www.findmyaddress.co.uk/search) to find your U --- -### Coventry City Council -```commandline -python collect_data.py CoventryCityCouncil https://www.coventry.gov.uk/directory_record/XXXXXX/XXXXXX -``` - -Note: Follow the instructions [here](https://www.coventry.gov.uk/bin-collection-calendar) until you get the page that shows the weekly collections for your address then copy the URL and replace the URL in the command. - ---- - ### Cotswold District Council ```commandline python collect_data.py CotswoldDistrictCouncil https://community.cotswold.gov.uk/s/waste-collection-enquiry -s -p "XXXX XXX" -n XX -w http://HOST:PORT/ @@ -1042,6 +1034,15 @@ Note: Pass the full address in the house number and postcode in --- +### Coventry City Council +```commandline +python collect_data.py CoventryCityCouncil https://www.coventry.gov.uk/directory_record/XXXXXX/XXXXXX +``` + +Note: Follow the instructions [here](https://www.coventry.gov.uk/bin-collection-calendar) until you get the page that shows the weekly collections for your address then copy the URL and replace the URL in the command. + +--- + ### Crawley Borough Council ```commandline python collect_data.py CrawleyBoroughCouncil https://my.crawley.gov.uk/ -s -u XXXXXXXX -n XX @@ -2199,7 +2200,7 @@ Note: Pass the postcode and UPRN. This parser requires a Selenium webdriver. ### Newark and Sherwood District Council ```commandline -python collect_data.py NewarkAndSherwoodDC http://app.newark-sherwooddc.gov.uk/bincollection/calendar?pid=XXXXXXXX&nc=1 +python collect_data.py NewarkAndSherwoodDC http://app.newark-sherwooddc.gov.uk/bincollection/calendar?pid=XXXXXXXX ``` Note: Replace XXXXXXXX with your UPRN. @@ -3074,6 +3075,19 @@ Note: Provide your house number (without quotes) and postcode (wrapped in double --- +### Surrey Heath Borough Council / Joint Waste Solutions +```commandline +python collect_data.py SurreyHeathBoroughCouncil https://asjwsw-wrpsurreyheathmunicipal-live.whitespacews.com/ -s -p "XXXX XXX" -n XX +``` +Additional parameters: +- `-s` - skip get URL +- `-p` - postcode +- `-n` - house number + +Note: Provide your house number in the `house_number` parameter and postcode in the `postcode` parameter. + +--- + ### Swale Borough Council ```commandline python collect_data.py SwaleBoroughCouncil https://swale.gov.uk/bins-littering-and-the-environment/bins/collection-days -s -p "XXXX XXX" -n XX -w http://HOST:PORT/