Skip to content

Commit

Permalink
fix(fladistctapp): correct wrong pagination xpath
Browse files Browse the repository at this point in the history
Solves freelawproject#870

Pagination test was true when no pagination menu was available, leading to paginating to inexistent pages
  • Loading branch information
grossir committed Feb 1, 2024
1 parent da04f9c commit 3bbb8c6
Showing 1 changed file with 28 additions and 6 deletions.
34 changes: 28 additions & 6 deletions juriscraper/opinions/united_states/state/fladistctapp.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,20 +17,41 @@ def __init__(self, *args, **kwargs):
self.base = "https://1dca.flcourts.gov"
self.update_url()

def update_url(self):
""""""
def update_url(self) -> None:
"""Scrape last 7 days of opinions
:return: none
"""
today = datetime.now().strftime("%m/%d/%Y")
prev = (datetime.now() - timedelta(days=7)).strftime("%m/%d/%Y")

self.url = f"{self.base}/search?sort=opinion/disposition_date%20desc,%20opinion/case_number%20asc&view=full&searchtype=opinions&limit=10&scopes[]={self.number}_district_court_of_appeal&type[]=pca&type[]=written&startdate={prev}&enddate={today}&date[year]=&date[month]=&date[day]=&query=&offset={self.offset}"
self.url = "".join(
[
self.base,
"/search?sort=opinion/disposition_date%20desc,%20opinion/case_number%20asc&view=full",
"&searchtype=opinions",
"&limit=10",
f"&scopes[]={self.number}_district_court_of_appeal",
"&type[]=pca",
"&type[]=written",
f"&startdate={prev}",
f"&enddate={today}",
"&date[year]=",
"&date[month]=",
"&date[day]=",
"&query=",
f"&offset={self.offset}",
]
)

def _process_html(self) -> None:
"""Process the html and extract out the opinions
Paginates if necessary
:return: None
"""
for row in self.html.xpath("//tr"):
if not len(row.xpath(".//td")):
if not row.xpath(".//td"):
continue
(
link,
Expand All @@ -52,8 +73,9 @@ def _process_html(self) -> None:
}
)

if not self.html.xpath('.//li[@class="next disabled"]'):
# If pagniation continues loop
paginator_exists = self.html.xpath("//ul[@class='pagination']")
paginator_disabled = self.html.xpath('//li[@class="next disabled"]')
if paginator_exists and not paginator_disabled:
self.offset = self.offset + 10
self.update_url()
self.html = super()._download()
Expand Down

0 comments on commit 3bbb8c6

Please sign in to comment.