Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix(cal-ag): Fix Cal AG #881

Merged
merged 7 commits into from
Jan 25, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
88 changes: 42 additions & 46 deletions juriscraper/opinions/united_states/state/calag.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,58 +5,54 @@

import datetime

from juriscraper.lib.string_utils import convert_date_string
from juriscraper.OpinionSite import OpinionSite
from lxml.html import HtmlElement

from juriscraper.OpinionSiteLinear import OpinionSiteLinear

class Site(OpinionSite):

class Site(OpinionSiteLinear):
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self.court_id = self.__module__
self.year = datetime.date.today().year
self.url_base = "https://oag.ca.gov/opinions/yearly-index?conclusion-year[value][year]="
self.url = self.url_base + str(self.year)
self.url = f"https://oag.ca.gov/opinions/yearly-index?conclusion-year[value][year]={self.year}"
self.back_scrape_iterable = list(range(1985, self.year + 1))
self.rows_path = '//tbody/tr[contains(./td[1]//a/@href, ".pdf")]'
self.cell_path = f"{self.rows_path}/td[%d]"

def _get_case_names(self):
"""No case names available"""
return ["Untitled California Attorney General Opinion"] * len(
self.html.xpath(self.rows_path)
)

def _get_download_urls(self):
path = f"{self.cell_path % 1}//a/@href"
return [href for href in self.html.xpath(path)]

def _get_case_dates(self):
dates = []
for cell in self.html.xpath(self.cell_path % 4):
date_raw = cell.text_content().replace(r"\n", "").strip()
dates.append(convert_date_string(date_raw))
return dates

def _get_docket_numbers(self):
return [
cell.text_content().strip()
for cell in self.html.xpath(self.cell_path % 1)
]

def _get_precedential_statuses(self):
return ["Published"] * len(self.case_names)

def _get_summaries(self):
"""Combine the Questions and Conclusions Columns"""
summaries = []
for row in self.html.xpath(self.rows_path):
questions = row.xpath("./td[2]")[0].text_content()
conclusions = row.xpath("./td[3]")[0].text_content()
summaries.append(
f"QUESTIONS: {questions} CONCLUSIONS: {conclusions}"
self.cipher = "ECDHE-RSA-AES128-GCM-SHA256"
self.set_custom_adapter(self.cipher)
self.status = "Published"

def build_summaries(self, row: HtmlElement) -> str:
"""Build Summaries of opinions

:param row: Row to collect from
:return: Summary of the opinion
"""
questions = row.xpath("./td[2]")[0].text_content()
conclusions = row.xpath("./td[3]")[0].text_content()
return f"QUESTIONS: {questions} CONCLUSIONS: {conclusions}"

def _process_html(self) -> None:
"""Process California AG HTML

:return: none
"""
for row in self.html.xpath("//table/tbody/tr[.//a]"):
docket = row.xpath(".//a//strong/text()")[0].strip()
self.cases.append(
{
"url": row.xpath(".//a/@href")[0],
"citation": row.xpath(".//strong/em/text()")[0],
"docket": docket,
"date": row.xpath(".//td/span/text()")[0].strip(),
"name": f"California Attorney General Opinion {docket}",
"summary": self.build_summaries(row),
}
)
return summaries

def _download_backwards(self, year):
self.url = self.url_base + str(year)
self.html = self._download()
def _download_backwards(self, year: str) -> None:
"""Download backwards

:param year: The year to scrape
:return: None
"""
self.url = f"https://oag.ca.gov/opinions/yearly-index?conclusion-year[value][year]={year}"
Loading
Loading