Skip to content

Commit

Permalink
Merge pull request #1144 from m26dvd/master
Browse files Browse the repository at this point in the history
  • Loading branch information
robbrad authored Jan 16, 2025
2 parents 77789a9 + c43d47b commit e6031a5
Show file tree
Hide file tree
Showing 12 changed files with 125 additions and 78 deletions.
15 changes: 3 additions & 12 deletions uk_bin_collection/tests/input.json
Original file line number Diff line number Diff line change
Expand Up @@ -1208,8 +1208,8 @@
"wiki_note": "Pass the postcode and UPRN. This parser requires a Selenium webdriver."
},
"NewarkAndSherwoodDC": {
"url": "http://app.newark-sherwooddc.gov.uk/bincollection/calendar?pid=200004258529&nc=1",
"wiki_command_url_override": "http://app.newark-sherwooddc.gov.uk/bincollection/calendar?pid=XXXXXXXX&nc=1",
"url": "http://app.newark-sherwooddc.gov.uk/bincollection/calendar?pid=200004258529",
"wiki_command_url_override": "http://app.newark-sherwooddc.gov.uk/bincollection/calendar?pid=XXXXXXXX",
"wiki_name": "Newark and Sherwood District Council",
"wiki_note": "Replace XXXXXXXX with your UPRN."
},
Expand Down Expand Up @@ -1409,15 +1409,6 @@
"wiki_name": "Portsmouth City Council",
"wiki_note": "Pass the postcode and UPRN. This parser requires a Selenium webdriver."
},
"PowysCouncil": {
"house_number": "LANE COTTAGE",
"postcode": "HR3 5JS",
"skip_get_url": true,
"url": "https://www.powys.gov.uk",
"web_driver": "http://selenium:4444",
"wiki_name": "Powys Council",
"wiki_note": "Pass the house name/number and postcode in their respective parameters. This parser requires a Selenium webdriver."
},
"PowysCouncil": {
"house_number": "LANE COTTAGE",
"postcode": "HR3 5JS",
Expand Down Expand Up @@ -1768,7 +1759,7 @@
"wiki_name": "Sunderland City Council",
"wiki_note": "Provide your house number (without quotes) and postcode (wrapped in double quotes with a space)."
},
"SurreyHeathBoroughCouncil": {
"SurreyHeathBoroughCouncil": {
"house_number": "36",
"postcode": "GU20 6PN",
"skip_get_url": true,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,10 +34,10 @@ def parse_data(self, page: str, **kwargs) -> dict:

# Find the next collection date
date_tag = container.find(class_="font11 text-center")
if date_tag.text.strip() == "":
continue
else:
if date_tag:
collection_date = date_tag.text.strip()
else:
continue

dict_data = {
"type": bin_type,
Expand Down
25 changes: 18 additions & 7 deletions uk_bin_collection/uk_bin_collection/councils/GlasgowCityCouncil.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
import datetime

from bs4 import BeautifulSoup

from uk_bin_collection.uk_bin_collection.common import *
Expand Down Expand Up @@ -32,15 +34,24 @@ def parse_data(self, page: str, **kwargs) -> dict:
for p in ps:
collection = p.text.strip().replace("Your next ", "").split(".")[0]
bin_type = collection.split(" day is")[0]
collection_date = datetime.strptime(
remove_ordinal_indicator_from_date_string(collection).split("day is ")[
1
],
"%A %d %B %Y",
)
collection_date = remove_ordinal_indicator_from_date_string(
collection
).split("day is ")[1]
if collection_date == "Today":
collection_date = datetime.today().strftime(date_format)
elif collection_date == "Tomorrow":
collection_date = (datetime.today() + timedelta(days=1)).strftime(
date_format
)
print(collection_date)
else:
collection_date = datetime.strptime(
collection_date,
"%A %d %B %Y",
).strftime(date_format)
dict_data = {
"type": bin_type,
"collectionDate": collection_date.strftime(date_format),
"collectionDate": collection_date,
}
data["bins"].append(dict_data)

Expand Down
4 changes: 2 additions & 2 deletions uk_bin_collection/uk_bin_collection/councils/MertonCouncil.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,10 +47,10 @@ def parse_data(self, page: str, **kwargs) -> dict:
bin_type = cells[0].get_text().strip()
# Date is on the second cell, second paragraph, wrapped in p
collectionDate = None
for date_format in possible_formats:
for format in possible_formats:
try:
collectionDate = datetime.strptime(
cells[1].select("p > b")[2].get_text(strip=True), date_format
cells[1].select("p > b")[2].get_text(strip=True), format
)
break # Exit the loop if parsing is successful
except ValueError:
Expand Down
17 changes: 10 additions & 7 deletions uk_bin_collection/uk_bin_collection/councils/PowysCouncil.py
Original file line number Diff line number Diff line change
Expand Up @@ -127,12 +127,15 @@ def parse_data(self, page: str, **kwargs) -> dict:
li.text for li in garden_waste_section.find_next("ul").find_all("li")
]
for date in garden_waste_dates:
dict_data = {
"type": "Garden Waste",
"collectionDate": datetime.strptime(
remove_ordinal_indicator_from_date_string(date), "%d %B %Y"
).strftime(date_format),
}
data["bins"].append(dict_data)
try:
dict_data = {
"type": "Garden Waste",
"collectionDate": datetime.strptime(
remove_ordinal_indicator_from_date_string(date), "%d %B %Y"
).strftime(date_format),
}
data["bins"].append(dict_data)
except:
continue

return data
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import requests
from bs4 import BeautifulSoup

from uk_bin_collection.uk_bin_collection.common import *
from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass

Expand Down Expand Up @@ -54,6 +55,9 @@ def parse_data(self, page: str, **kwargs) -> dict:

data = {"bins": []}

current_year = datetime.now().year
next_year = current_year + 1

# Page has slider info side by side, which are two instances of this class
for bin in soup.find_all("div", {"class": "binextra"}):
bin_info = list(bin.stripped_strings)
Expand All @@ -62,27 +66,32 @@ def parse_data(self, page: str, **kwargs) -> dict:
if contains_date(bin_info[0]):
bin_date = get_next_occurrence_from_day_month(
datetime.strptime(
bin_info[0] + " " + datetime.today().strftime("%Y"),
"%A %d %B - %Y",
bin_info[0],
"%A %d %B -",
)
).strftime(date_format)
bin_type = str.capitalize(' '.join(bin_info[1:]))
)
bin_type = str.capitalize(" ".join(bin_info[1:]))
# On exceptional collection schedule (e.g. around English Bank Holidays), date will be contained in the second stripped string
else:
bin_date = get_next_occurrence_from_day_month(
datetime.strptime(
bin_info[1] + " " + datetime.today().strftime("%Y"),
"%A %d %B - %Y",
bin_info[1],
"%A %d %B -",
)
).strftime(date_format)
str.capitalize(' '.join(bin_info[2:]))
except Exception as ex:
raise ValueError(f"Error parsing bin data: {ex}")
)
str.capitalize(" ".join(bin_info[2:]))
except:
continue

if (datetime.now().month == 12) and (bin_date.month == 1):
bin_date = bin_date.replace(year=next_year)
else:
bin_date = bin_date.replace(year=current_year)

# Build data dict for each entry
dict_data = {
"type": bin_type,
"collectionDate": bin_date,
"collectionDate": bin_date.strftime(date_format),
}
data["bins"].append(dict_data)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,12 +30,12 @@ def parse_data(self, page: str, **kwargs) -> dict:
driver.get("https://www.staffsmoorlands.gov.uk/findyourbinday")

# Close cookies banner
cookieAccept = WebDriverWait(driver, 10).until(
EC.presence_of_element_located(
(By.CSS_SELECTOR, ".cookiemessage__link--close")
)
)
cookieAccept.click()
# cookieAccept = WebDriverWait(driver, 10).until(
# EC.presence_of_element_located(
# (By.CSS_SELECTOR, ".cookiemessage__link--close")
# )
# )
# cookieAccept.click()

# Wait for the postcode field to appear then populate it
inputElement_postcode = WebDriverWait(driver, 30).until(
Expand Down
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
from datetime import datetime, timedelta
from typing import Any, Dict

from bs4 import BeautifulSoup

from uk_bin_collection.uk_bin_collection.common import *
from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass
from datetime import datetime, timedelta
from typing import Dict, Any


class CouncilClass(AbstractGetBinDataClass):
Expand Down Expand Up @@ -78,7 +80,10 @@ def parse_data(self, page: Any, **kwargs: Any) -> Dict[str, Any]:
formatted_date = self.get_next_weekday(day_name)
else:
# Convert date format from "Tuesday 28 May 2024" to "28/05/2024"
date_obj = datetime.strptime(value, "%A %d %B %Y")
try:
date_obj = datetime.strptime(value, "%A %d %B %Y")
except:
continue
formatted_date = date_obj.strftime("%d/%m/%Y")

bin_entry = {
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from bs4 import BeautifulSoup

from uk_bin_collection.uk_bin_collection.common import *
from uk_bin_collection.uk_bin_collection.get_bin_data import AbstractGetBinDataClass

Expand Down Expand Up @@ -31,12 +32,12 @@ def parse_data(self, page: str, **kwargs) -> dict:
params = {
"RequestType": "LocalInfo",
"ms": "ValeOfGlamorgan/AllMaps",
"group": "Community and Living|Refuse HIDE2",
"type": "json",
"group": "Waste|new_refuse",
"type": "jsonp",
"callback": "AddressInfoCallback",
"uid": user_uprn,
"import": "jQuery35108514154283927682_1673022974838",
"_": "1673022974840",
"import": "jQuery35107288886041176057_1736292844067",
"_": "1736292844068",
}

# Get a response from the council
Expand All @@ -46,13 +47,15 @@ def parse_data(self, page: str, **kwargs) -> dict:
headers=headers,
).text

response = response.replace("AddressInfoCallback(", "").rstrip(");")

# Load the JSON and seek out the bin week text, then add it to the calendar URL. Also take the weekly
# collection type and generate dates for it. Then make a GET request for the calendar
bin_week = str(
json.loads(response)["Results"]["Refuse_HIDE2"]["Your_Refuse_round_is"]
json.loads(response)["Results"]["waste"]["roundday_residual"]
).replace(" ", "-")
weekly_collection = str(
json.loads(response)["Results"]["Refuse_HIDE2"]["Recycling__type"]
json.loads(response)["Results"]["waste"]["recycling_code"]
).capitalize()
weekly_dates = get_weekday_dates_in_period(
datetime.now(), days_of_week.get(bin_week.split("-")[0].strip()), amount=48
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,9 @@ def parse_data(self, page: str, **kwargs) -> dict:

data = {"bins": []}

current_year = datetime.now().year
next_year = current_year + 1

# Page has slider info side by side, which are two instances of this class
for bin in soup.find_all("div", {"class": "bintxt"}):
try:
Expand All @@ -74,23 +77,31 @@ def parse_data(self, page: str, **kwargs) -> dict:
if contains_date(bin_date_info[0]):
bin_date = get_next_occurrence_from_day_month(
datetime.strptime(
bin_date_info[0] + " " + datetime.today().strftime("%Y"),
"%A %d %B - %Y",
bin_date_info[0],
"%A %d %B -",
)
).strftime(date_format)
)
# On exceptional collection schedule (e.g. around English Bank Holidays), date will be contained in the second stripped string
else:
bin_date = get_next_occurrence_from_day_month(
datetime.strptime(
bin_date_info[1] + " " + datetime.today().strftime("%Y"),
"%A %d %B - %Y",
bin_date_info[1],
"%A %d %B -",
)
).strftime(date_format)
)
except Exception as ex:
raise ValueError(f"Error parsing bin data: {ex}")

if (datetime.now().month == 12) and (bin_date.month == 1):
bin_date = bin_date.replace(year=next_year)
else:
bin_date = bin_date.replace(year=current_year)

# Build data dict for each entry
dict_data = {"type": bin_type, "collectionDate": bin_date}
dict_data = {
"type": bin_type,
"collectionDate": bin_date.strftime(date_format),
}
data["bins"].append(dict_data)

data["bins"].sort(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -42,22 +42,22 @@ def parse_data(self, page: str, **kwargs) -> dict:
wait = WebDriverWait(driver, 60)
address_entry_field = wait.until(
EC.presence_of_element_located(
(By.XPATH, '//*[@id="combobox-input-19"]')
(By.XPATH, '//*[@id="combobox-input-20"]')
)
)

address_entry_field.send_keys(str(full_address))

address_entry_field = wait.until(
EC.element_to_be_clickable((By.XPATH, '//*[@id="combobox-input-19"]'))
EC.element_to_be_clickable((By.XPATH, '//*[@id="combobox-input-20"]'))
)
address_entry_field.click()
address_entry_field.send_keys(Keys.BACKSPACE)
address_entry_field.send_keys(str(full_address[len(full_address) - 1]))

first_found_address = wait.until(
EC.element_to_be_clickable(
(By.XPATH, '//*[@id="dropdown-element-19"]/ul')
(By.XPATH, '//*[@id="dropdown-element-20"]/ul')
)
)

Expand Down
Loading

0 comments on commit e6031a5

Please sign in to comment.