fix(fladistctapp): correct wrong pagination xpath

Solves freelawproject#870 Pagination test was true when no pagination menu was available, leading to paginating to inexistent pages
grossir · Feb 1, 2024 · 3bbb8c6 · 3bbb8c6
1 parent da04f9c
commit 3bbb8c6
Showing 1 changed file with 28 additions and 6 deletions.
diff --git a/juriscraper/opinions/united_states/state/fladistctapp.py b/juriscraper/opinions/united_states/state/fladistctapp.py
@@ -17,20 +17,41 @@ def __init__(self, *args, **kwargs):
         self.base = "https://1dca.flcourts.gov"
         self.update_url()
 
-    def update_url(self):
-        """"""
+    def update_url(self) -> None:
+        """Scrape last 7 days of opinions
+
+        :return: none
+        """
         today = datetime.now().strftime("%m/%d/%Y")
         prev = (datetime.now() - timedelta(days=7)).strftime("%m/%d/%Y")
 
-        self.url = f"{self.base}/search?sort=opinion/disposition_date%20desc,%20opinion/case_number%20asc&view=full&searchtype=opinions&limit=10&scopes[]={self.number}_district_court_of_appeal&type[]=pca&type[]=written&startdate={prev}&enddate={today}&date[year]=&date[month]=&date[day]=&query=&offset={self.offset}"
+        self.url = "".join(
+            [
+                self.base,
+                "/search?sort=opinion/disposition_date%20desc,%20opinion/case_number%20asc&view=full",
+                "&searchtype=opinions",
+                "&limit=10",
+                f"&scopes[]={self.number}_district_court_of_appeal",
+                "&type[]=pca",
+                "&type[]=written",
+                f"&startdate={prev}",
+                f"&enddate={today}",
+                "&date[year]=",
+                "&date[month]=",
+                "&date[day]=",
+                "&query=",
+                f"&offset={self.offset}",
+            ]
+        )
 
     def _process_html(self) -> None:
         """Process the html and extract out the opinions
+        Paginates if necessary
 
         :return: None
         """
         for row in self.html.xpath("//tr"):
-            if not len(row.xpath(".//td")):
+            if not row.xpath(".//td"):
                 continue
             (
                 link,
@@ -52,8 +73,9 @@ def _process_html(self) -> None:
                 }
             )
 
-        if not self.html.xpath('.//li[@class="next disabled"]'):
-            # If pagniation continues loop
+        paginator_exists = self.html.xpath("//ul[@class='pagination']")
+        paginator_disabled = self.html.xpath('//li[@class="next disabled"]')
+        if paginator_exists and not paginator_disabled:
             self.offset = self.offset + 10
             self.update_url()
             self.html = super()._download()