From 488a2a1138a217989d2ee73f90d089f9dd035acb Mon Sep 17 00:00:00 2001 From: Zach K Date: Tue, 3 Sep 2024 02:59:30 -0400 Subject: [PATCH] add url spoofing and support 4399 --- fpcurator.py | 32 ++++++++--- pyproject.toml | 2 +- sites/c4399.py | 147 +++++++++++++++++++++++++++++++++++++++++++++++++ sites/defs.txt | 5 +- 4 files changed, 176 insertions(+), 10 deletions(-) create mode 100644 sites/c4399.py diff --git a/fpcurator.py b/fpcurator.py index 44f19ae..47b7166 100644 --- a/fpcurator.py +++ b/fpcurator.py @@ -91,6 +91,7 @@ def toggle_console():
  • Keep URLVars - When checked, the downloader will append url vars present on links being downloaded to the end of the html file. This is only necessary when you have two links to the same webpage that generate different html due to the url vars.
  • Clear Done URLs - When checked, the downloader will clear any urls in the list when they are downloaded. Errored urls will remain in the list.
  • Notify When Done - When checked, the downloader will show a message box when it is done downloading.
  • +
  • Spoof Referrer - When checked, the downloader will spoof the referrer of the urls to be the url itself.
  • Here are some basic usage steps:
      @@ -225,8 +226,8 @@ def toggle_console(): # This uuid uniquely defines fpcurator. (there is a 0 on the end after the text) UUID = '51be8a01-3307-4103-8913-c2f70e64d83' -TITLE = "fpcurator v1.7.0" -ABOUT = "Created by Zach K - v1.7.0" +TITLE = "fpcurator v1.7.1" +ABOUT = "Created by Zach K - v1.7.1" VER = 7 SITES_FOLDER = "sites" @@ -454,6 +455,7 @@ def save(self): downloader["keep_vars"] = self.downloader.keep_vars.get() downloader["clear"] = self.downloader.clear.get() downloader["show_done"] = self.downloader.show_done.get() + downloader["spoof"] = self.downloader.spoof.get() downloader["urls"] = self.downloader.stxt.txt.get("0.0", "end").strip() @@ -519,6 +521,7 @@ def load(self): self.downloader.keep_vars.set(downloader["keep_vars"]) self.downloader.clear.set(downloader["clear"]) self.downloader.show_done.set(downloader["show_done"]) + self.downloader.spoof.set(downloader["spoof"]) txt = self.downloader.stxt.txt txt.delete("0.0", "end") @@ -826,6 +829,8 @@ def __init__(self, parent): self.original.set(True) self.replace_https = tk.BooleanVar() self.replace_https.set(True) + self.spoof = tk.BooleanVar() + self.spoof.set(True) original = tk.Checkbutton(cframe, bg="white", text='Rename "web.archive.org"', var=self.original) # pyright: ignore [reportCallIssue] # tkinter does have "var" original.pack(side="left") @@ -835,17 +840,25 @@ def __init__(self, parent): clear.pack(side="left") show_done = tk.Checkbutton(cframe, bg="white", text='Notify When Done', var=self.show_done) # pyright: ignore [reportCallIssue] # tkinter does have "var" show_done.pack(side="left", padx=5) + spoof = tk.Checkbutton(cframe, bg="white", text='Spoof Referrer', var=self.spoof) # pyright: ignore [reportCallIssue] # tkinter does have "var" + spoof.pack(side="left") Tooltip(original, text="When checked, the downloader will put all urls downloaded from the web archive back into their original domains.") Tooltip(keep_vars, text="When checked, the downloader will append url vars present on links being downloaded to the end of the html file. This is only necessary when you have two links to the same webpage that generate different html due to the url vars.") Tooltip(clear, text="When checked, the downloader will clear any urls in the list when they are downloaded. Errored urls will remain in the list.") Tooltip(show_done, text="When checked, the downloader will show a message box when it is done downloading.") + Tooltip(spoof, text="When checked, the downloader will spoof the referrer of the urls to be the url itself.") - # Create panel for inputting urls to download - lbl = tk.Label(self, bg="white", text=" Put URLs to download in this box:") + # Panels + lbl = tk.Label(self, bg="white", text="Put URLs to download at the top and headers at the bottom.") lbl.pack(fill="x") - self.stxt = ScrolledText(self, width=10, height=10, wrap="none") - self.stxt.pack(expand=True, fill="both", padx=5, pady=5) + txts = tk.Frame(self, bg="white") + txts.pack(expand=True, fill="both", padx=5, pady=(0, 5)) + + self.stxt = ScrolledText(txts, width=10, height=10, wrap="none") + self.stxt.pack(side="top", expand=True, fill="both") + self.stxt_headers = ScrolledText(txts, width=10, height=10, wrap="none") + self.stxt_headers.pack(side="top", expand=False, fill="both") def folder(self): # For changing the output directory @@ -856,10 +869,15 @@ def folder(self): def i_download(self): txt = self.stxt.txt + htxt = self.stxt_headers.txt try: + headers = {} + for key, value in [i.strip().split("=", 1) for i in htxt.get("0.0", "end").replace("\r\n", "\n").replace("\r", "\n").split("\n") if i.strip()]: + headers[key.strip()] = value.strip() + links = [i.strip() for i in txt.get("0.0", "end").replace("\r\n", "\n").replace("\r", "\n").split("\n") if i.strip()] if links: - errs = fpclib.download_all(links, self.output.get() or "output", not self.original.get(), self.keep_vars.get(), True) + errs = fpclib.download_all(links, self.output.get() or "output", not self.original.get(), self.keep_vars.get(), True, spoof=self.spoof.get(), headers=headers) if self.show_done.get(): if errs: if len(errs) == len(links): diff --git a/pyproject.toml b/pyproject.toml index 20169ab..846cd96 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "fpcurator" -version = "1.7.0" +version = "1.7.1" description = "fpcurator is a Python and fpclib powered tool for downloading urls, auto-generating curations, bulk searching for already curated games, and listing tags/platforms/games/animations for Flashpoint." authors = ["mathgeniuszach "] readme = "README.md" diff --git a/sites/c4399.py b/sites/c4399.py new file mode 100644 index 0000000..dbf23d6 --- /dev/null +++ b/sites/c4399.py @@ -0,0 +1,147 @@ +import fpclib +import requests +import re +import bs4 + +regex = '4399.com' +ver = 7 + +GAME_URL = re.compile(r"_\d+.htm$") +IS_HTML5 = re.compile(r"var\s+isHTML5\s*=\s*(\d+)", re.IGNORECASE) +SCREENSHOT = re.compile(r'var\s+\w+GamePic\s*=\s*"(.*?)"', re.IGNORECASE) +GAMEPATH = re.compile(r'var\s+\w+GamePath\s*=\s*"(.*?)"', re.IGNORECASE) +DIMS = re.compile(r'var\s+_w\s*=\s*(\d+);?\s*var\s+_h\s*=\s*(\d+)') + +HTML_EMBED = """ + + + +""" +FLASH_EMBED = """ + + + + + + + + + +""" + +class c4399(fpclib.Curation): + def soupify(self): + # Correct URL if not on the actual game page + with requests.get(self.src) as resp: + soup = bs4.BeautifulSoup(resp.content, "html.parser") + if not GAME_URL.search(self.src): + self.src = "https://wwww.4399.com" + soup.select_one(".play > a")["href"] + with requests.get(self.src) as resp: + soup = bs4.BeautifulSoup(resp.content, "html.parser") + return soup + + def parse(self, soup): + # Basic metadata + self.title = soup.select_one(".game-des > .name > a").text.strip() + self.date = soup.select_one(".game-des > .sorts.cf > em:last-of-type").text.strip()[3:] + self.lang = 'zh' + self.pub = "4399" + + # Description transformation + box = soup.select_one("#playmethod > .box-l") + has_ptex = bool(box.select_one("#p-tex")) + desc = [] + for tag in box.children: + # Skip random strings + if isinstance(tag, bs4.element.NavigableString): continue + # Grab header elements as is + if tag.name == "b": desc.append(tag.text.strip() + "\n") + # Grab content elements as is + if "content" in tag.get("class"): desc.append(tag.text.strip() + "\n" + "\n") + + # Transform control information (but only if a direct description is not provided) + if tag.get("id") == "GameKey" and not has_ptex: + for ul in tag.children: + # Skip random strings + if isinstance(ul, bs4.element.NavigableString): continue + # Loop over each list + for li in ul.children: + # Skip random strings + if isinstance(ul, bs4.element.NavigableString): continue + + for elem in li.children: + # Add text as is + if isinstance(elem, bs4.element.NavigableString): + desc.append(str(elem)+" ") + continue + + # Only Span elements have their class-name translated to text + if elem.name != "span": continue + + cs = elem.get("class")[0] + if not cs: continue + if cs.startswith("player"): + desc.append("玩家" + cs[6:] + " ") + elif cs == "ico_c_arrows": + desc.append("Arrow Keys ") + elif cs == "ico_c_wasd": + desc.append("WASD ") + elif cs.startswith("ico_c_"): + desc.append(cs[6:].title() + " ") + elif elem.text: + desc.append(elem.text + " ") + + # After a list ends add a newline for the next list. + desc.append("\n") + + self.desc = ''.join(desc) + + headtxt = str(soup.head) + + # Screenshot + try: + self.ss = "https:" + SCREENSHOT.search(headtxt)[1].strip() + except: + fpclib.debug("Screenshot not found", 1, pre="[WARN] ") + + # Platform detection (Flash, Unity, and HTML5) + try: + is_html = bool(int(IS_HTML5.search(headtxt)[1])) + except: + is_html = False + self.embed = fpclib.normalize(self.src, False) + self.cdn = "http://sda.4399.com/4399swf" + GAMEPATH.search(headtxt)[1] + + dims = DIMS.search(headtxt) + self.dims = (dims[1], dims[2]) + + if is_html: + self.platform = "HTML5" + self.app = fpclib.FPNAVIGATOR + self.cmd = self.embed + elif self.cdn.endswith(".swf"): + self.platform = "Flash" + self.app = fpclib.FLASH + self.cmd = self.cdn + self.add_app("Embedded Page", self.embed, fpclib.FPNAVIGATOR) + else: + self.platform = "Unity" + self.app = fpclib.UNITY + self.cmd = self.embed + + def get_files(self): + # Create embed file + if self.platform == "Flash": + html = FLASH_EMBED % (self.dims[0], self.dims[1], self.cdn) + else: + html = HTML_EMBED % (self.dims[0], self.dims[1], self.cdn) + fpclib.write(self.embed[self.embed.index("://")+3:], html) + + # Download the game's true embedded file + fpclib.download_all((self.cdn,), spoof=True) diff --git a/sites/defs.txt b/sites/defs.txt index 55d875e..d69b85a 100644 --- a/sites/defs.txt +++ b/sites/defs.txt @@ -1,4 +1,4 @@ -1725250543.8975272 +1725346764.5213957 AddictingGames.py Construct.py CoolmathGames.py @@ -15,4 +15,5 @@ Miniclip.py Newgrounds.py Therese.py Unknown.py -Y8.py \ No newline at end of file +Y8.py +c4399.py \ No newline at end of file