From 41c98d697ab7897103cb9f568c44d9d7e0c76574 Mon Sep 17 00:00:00 2001 From: daijro <72637910+daijro@users.noreply.github.com> Date: Mon, 28 Mar 2022 18:10:36 -0500 Subject: [PATCH] Fix #1 again - Fix regex (and make it future proof) --- scraper.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scraper.py b/scraper.py index ab91d9d..876ce90 100644 --- a/scraper.py +++ b/scraper.py @@ -133,7 +133,7 @@ def set_initial_information(self): self.numberDataRsid, self.dataRSID, self.linkPath """ try: - linkPath = pot_url = re.findall('url\\(\\/doc-asset\\/bg[\\/a-z0-9\\.\\-]+\\);', self._resp.text)[0][4:-2] + linkPath = pot_url = re.findall(r'\/doc-asset\/bg\/[a-z0-9\.\-]+\/splits\/\d+\/split\-[a-z0-9\.\-]+\.jpg', self._resp.text)[0] except IndexError as e: raise self.exceptions.TooManyRequests from e except Exception as e: