From 7144156d2d9f8e3584d518e1b99b952a5c29803a Mon Sep 17 00:00:00 2001 From: Alex Boisvert Date: Sun, 5 Nov 2023 09:56:32 -0800 Subject: [PATCH] js2py for amuselabs --- requirements.txt | 1 + xword_dl/downloader/amuselabsdownloader.py | 96 +++------------------- xword_dl/version | 2 +- 3 files changed, 13 insertions(+), 86 deletions(-) diff --git a/requirements.txt b/requirements.txt index b2f22a9..4962fc7 100644 --- a/requirements.txt +++ b/requirements.txt @@ -7,3 +7,4 @@ Unidecode==1.3.6 pyyaml==6.0.1 xmltodict==0.13.0 lxml==4.9.2 +Js2Py==0.74 diff --git a/xword_dl/downloader/amuselabsdownloader.py b/xword_dl/downloader/amuselabsdownloader.py index 830a2c0..46aa66d 100644 --- a/xword_dl/downloader/amuselabsdownloader.py +++ b/xword_dl/downloader/amuselabsdownloader.py @@ -11,6 +11,8 @@ from bs4 import BeautifulSoup from html2text import html2text +import js2py + from .basedownloader import BaseDownloader from ..util import * @@ -86,92 +88,17 @@ def fetch_data(self, solver_url): js_url_fragment = m1.groups()[0] js_url = urllib.parse.urljoin(solver_url, js_url_fragment) - # get the "key" from the URL + # get the decryption function from the JS URL res2 = requests.get(js_url) + js_text = res2.text + re_match = re.search(r'rawc\;try\{(var n=function.*?n.join\(""\)\})', js_text) + jsFunc = re_match.groups()[0] + context = js2py.EvalJs() + context.execute(jsFunc) - # matches a 7-digit hex string preceded by `="` and followed by `"` - m2 = re.search(r'="([0-9a-f]{7})"', res2.text) - if m2: - # in this format, add 2 to each digit - amuseKey = [int(c,16)+2 for c in m2.groups()[0]] - else: - # otherwise, grab the new format key and do not add 2 - amuseKey = [int(x) for x in - re.findall(r'=\[\]\).push\(([0-9]{1,2})\)', res2.text)] - - # But now that might not be the right key, and there's another one - # that we need to try! - # (current as of 10/26/2023) - key_2_order_regex = r'i=(\d+);i