-
Notifications
You must be signed in to change notification settings - Fork 0
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Monterey scraping #166
Monterey scraping #166
Changes from 2 commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,8 +1,20 @@ | ||
from chalice import Blueprint | ||
|
||
from chalicelib.utils.monterey_county.scrape_hearings import scrape_hearings | ||
|
||
monterey_county_blueprint = Blueprint(__name__) | ||
|
||
|
||
@monterey_county_blueprint.route("/monterey_county") | ||
def hello_world(): | ||
return {"message": "Hello World!"} | ||
|
||
|
||
@monterey_county_blueprint.route("/monterey_county/hearings", cors=True) | ||
def get_hearings(): | ||
try: | ||
hearings = scrape_hearings() | ||
print(hearings) | ||
except Exception as e: | ||
pass | ||
return {"message": "Hello World!"} | ||
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,6 +1,57 @@ | ||
import requests | ||
import time | ||
from bs4 import BeautifulSoup | ||
from datetime import datetime | ||
|
||
from mongodb import get_mongo_client | ||
|
||
def scrape_hearings(): | ||
pass | ||
""" | ||
Scrape the upcoming hearing links and dates from the Monterey County Legistar site | ||
""" | ||
client = get_mongo_client() | ||
if client: | ||
try: | ||
collection = client["monterey"]["hearings"] | ||
except Exception as e: | ||
print(f"Failed to connect to hearings collection: {e}") | ||
|
||
url = "https://monterey.legistar.com/Calendar.aspx" | ||
response = requests.get(url) | ||
|
||
soup = BeautifulSoup(response.content, "html.parser") | ||
upcoming_events_table = soup.find("table", class_="rgMasterTable") | ||
|
||
upcoming_hearings = [] | ||
|
||
if upcoming_events_table: | ||
events = upcoming_events_table.find_all("tr", class_="rgRow") | ||
|
||
for event in events: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Incomplete/haven't tested. Still unsure about what fields can be extracted from the public hearings table, particularly for the
Missing (as far as I can see):
|
||
date = event.find("td", class_="rgSorted") | ||
date_string = date.get_text(strip=True) | ||
date_time = datetime.strptime(date_string, "%m/%d/%Y %I:%M %p") | ||
date_unix = int(time.mktime(date_time.timetuple())) | ||
|
||
link = event.find("a") | ||
if link: | ||
meeting_link = link["href"] | ||
|
||
if not in_db(date_unix, collection): | ||
|
||
collection.insert_one( | ||
{"id": date_unix, "link": meeting_link, "date": date_string} | ||
) | ||
else: | ||
print("Table not found or empty.") | ||
|
||
return upcoming_hearings | ||
|
||
|
||
def in_db(id: int, collection) -> bool: | ||
""" | ||
Given a meeting id, return if the meeting is in the database | ||
""" | ||
result = collection.find_one({"id: id"}) | ||
return result is not None | ||
|
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,6 +1,7 @@ | ||
import connectDB from "@/database/db"; | ||
import Project from "@/database/projectSchema"; | ||
import { NextResponse } from "next/server"; | ||
import { getProjectModel } from "@/database/projectSchema"; | ||
|
||
export async function GET(req: Request) { | ||
// check if query params are passed | ||
|
@@ -9,7 +10,8 @@ export async function GET(req: Request) { | |
|
||
try { | ||
await connectDB(); | ||
// TODO: get project by countyFileNumber | ||
// TODO: get county from query params and use getProjectModel to get the correct model | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. For the base route that does a simple fetch all for the project data, since we now have multiple counties, we must separate the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. NOTE: the query parameter is mostly required, but optional because we can make it so that if missing it will just choose the SLO County database as default |
||
// const Project = getProjectModel(county); | ||
if (county_file_number) { | ||
console.log(county_file_number); | ||
} | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -4,6 +4,8 @@ import Link from "next/link"; | |
import { signOut } from "next-auth/react"; | ||
import { useSession } from "next-auth/react"; | ||
import { useState } from "react"; | ||
import Image from "next/image"; | ||
import Ecologistics from "../../public/ecologistics-logo.svg"; | ||
import "@/styles/globals.css"; | ||
|
||
export default function Navbar() { | ||
|
@@ -19,8 +21,7 @@ export default function Navbar() { | |
<div className="flex bg-secondary px-16 h-16 items-center justify-between"> | ||
<div> | ||
<Link href="/" className="flex items-left"> | ||
<div className="font-bold text-3xl">ECOLO</div> | ||
<div className="text-primary text-3xl">GISTICS Web Scraper</div> | ||
<Image src={Ecologistics} alt="Ecologistics Logo" /> | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Adding the official logo to the navbar and replacing our makeshift one |
||
</Link> | ||
</div> | ||
<div className="flex flex-row gap-10"> | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -8,6 +8,7 @@ import { | |
Select, | ||
SelectContent, | ||
SelectItem, | ||
SelectGroup, | ||
SelectTrigger, | ||
SelectValue, | ||
} from "@/components/ui/select"; | ||
|
@@ -69,6 +70,7 @@ import { columns } from "../lib/tableColumns"; | |
import { IProject, ReformattedProject } from "@/database/projectSchema"; | ||
import { DialogClose } from "@radix-ui/react-dialog"; | ||
import { useToast } from "@/components/ui/use-toast"; | ||
import { SelectLabel } from "@radix-ui/react-select"; | ||
|
||
const reviewStatusColors: Record<any, string> = { | ||
Unreviewed: "#EC7590", | ||
|
@@ -79,12 +81,14 @@ const reviewStatusColors: Record<any, string> = { | |
interface DataTableProps<TData, TValue> { | ||
columns: ColumnDef<TData, TValue>[]; | ||
data: TData[]; | ||
numProjects: number; | ||
fetchProjectData: () => Promise<never[] | undefined>; | ||
} | ||
|
||
function DataTable<TData, TValue>({ | ||
columns, | ||
data, | ||
numProjects, | ||
fetchProjectData, | ||
}: DataTableProps<TData, TValue>) { | ||
const { toast } = useToast(); | ||
|
@@ -96,6 +100,7 @@ function DataTable<TData, TValue>({ | |
[], | ||
); | ||
const [columnToFilter, setColumnToFilter] = useState("countyFileNumber"); | ||
const [county, setCounty] = useState("San Luis Obispo County"); | ||
|
||
let table = useReactTable({ | ||
data, | ||
|
@@ -177,19 +182,79 @@ function DataTable<TData, TValue>({ | |
} | ||
}; | ||
|
||
useEffect(() => { | ||
console.log(county); // log the county state | ||
}, [county]); | ||
|
||
useEffect(() => { | ||
console.log(columnToFilter); // log the columnToFilter state | ||
}, [columnToFilter]); | ||
|
||
const handleCountyChange = (selectedCounty: string) => { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Handler that will be primarily responsible for re-fetching the data (via the |
||
console.log("changing county"); | ||
setCounty(selectedCounty); | ||
// refresh table data after changing county | ||
// TODO: | ||
// * call fetchProjectData with the selected county | ||
// * update the table data with the new data | ||
// * update the numProjects with the new number of projects | ||
// * refactor database schemas for each county | ||
// * rewrite fetchProjectData to fetch data for the selected county | ||
// * reset filter columns for the new county | ||
}; | ||
|
||
return ( | ||
<div> | ||
<div className="text-xl font-bold"> | ||
{county} ({numProjects}) | ||
</div> | ||
<div className="flex items-center py-4"> | ||
<Select | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Component that allows the user to select between the 3 different counties, to which their respective tables will be displayed |
||
value={county} | ||
onValueChange={(value) => { | ||
handleCountyChange(value); | ||
}} | ||
> | ||
<SelectTrigger className="w-60 mr-2"> | ||
<SelectValue placeholder={county} /> | ||
<SelectContent side="right"> | ||
<SelectGroup> | ||
<SelectLabel className="text-center">Counties</SelectLabel> | ||
<SelectItem | ||
key="sloCounty" | ||
value={"San Luis Obispo County"} | ||
onClick={() => handleCountyChange("San Luis Obispo County")} | ||
> | ||
San Luis Obispo County | ||
</SelectItem> | ||
<SelectItem | ||
key="montereyCounty" | ||
value={"Monterey County"} | ||
onClick={() => handleCountyChange("Monterey County")} | ||
> | ||
Monterey County | ||
</SelectItem> | ||
<SelectItem | ||
key="santaBarbaraCounty" | ||
value={"Santa Barbara County"} | ||
onClick={() => handleCountyChange("Santa Barbara County")} | ||
> | ||
Santa Barbara County | ||
</SelectItem> | ||
</SelectGroup> | ||
</SelectContent> | ||
</SelectTrigger> | ||
</Select> | ||
<Select | ||
value={columnToFilter} | ||
onValueChange={(value) => { | ||
setColumnToFilter(value); | ||
}} | ||
> | ||
<SelectTrigger className="h-10 w-32 flex justify-start items-center"> | ||
<SelectTrigger className="w-60"> | ||
<SelectValue placeholder={columnToFilter} /> | ||
</SelectTrigger> | ||
<SelectContent side="top"> | ||
<SelectContent side="right"> | ||
{table.getHeaderGroups().map((headerGroup) => ( | ||
<React.Fragment key={headerGroup.id}> | ||
{headerGroup.headers.map((header) => { | ||
|
@@ -591,12 +656,10 @@ export function ProjectTable({ | |
}) { | ||
return ( | ||
<div className="container mx-auto py-5"> | ||
<div className="text-xl font-bold"> | ||
San Luis Obispo County ({numProjects}) | ||
</div> | ||
<DataTable | ||
columns={columns} | ||
data={projectData} | ||
numProjects={numProjects} | ||
fetchProjectData={fetchProjectData} | ||
/> | ||
</div> | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -48,6 +48,12 @@ export const projectSchema = new Schema<IProject>({ | |
additional_notes: { type: String, required: false, default: "N/A" }, | ||
}); | ||
|
||
// retrieve correct collection based on county database | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This will be the function used in the |
||
export const getProjectModel = (county: string) => { | ||
const db = mongoose.connection.useDb(county); // switch to the correct database | ||
return db.model("projects", projectSchema); | ||
}; | ||
|
||
// defining the collection and model and creating one if doesn't exist | ||
|
||
const Project = | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Incomplete — haven't tested scrape_hearings()