add url spoofing and support 4399

mathgeniuszach · mathgeniuszach · commit 488a2a1138a2 · 2024-09-03T03:00:06.000-04:00
diff --git a/fpcurator.py b/fpcurator.py
@@ -91,6 +91,7 @@ def toggle_console():
         <li><b>Keep URLVars</b> - When checked, the downloader will append url vars present on links being downloaded to the end of the html file. This is only necessary when you have two links to the same webpage that generate different html due to the url vars.</li>
         <li><b>Clear Done URLs</b> - When checked, the downloader will clear any urls in the list when they are downloaded. Errored urls will remain in the list.</li>
         <li><b>Notify When Done</b> - When checked, the downloader will show a message box when it is done downloading.</li>
+        <li><b>Spoof Referrer</b> - When checked, the downloader will spoof the referrer of the urls to be the url itself.</li>
     </ul>
     Here are some basic usage steps:
     <ol>
@@ -225,8 +226,8 @@ def toggle_console():
 # This uuid uniquely defines fpcurator. (there is a 0 on the end after the text)
 UUID = '51be8a01-3307-4103-8913-c2f70e64d83'
 
-TITLE = "fpcurator v1.7.0"
-ABOUT = "Created by Zach K - v1.7.0"
+TITLE = "fpcurator v1.7.1"
+ABOUT = "Created by Zach K - v1.7.1"
 VER = 7
 
 SITES_FOLDER = "sites"
@@ -454,6 +455,7 @@ def save(self):
         downloader["keep_vars"] = self.downloader.keep_vars.get()
         downloader["clear"] = self.downloader.clear.get()
         downloader["show_done"] = self.downloader.show_done.get()
+        downloader["spoof"] = self.downloader.spoof.get()
 
         downloader["urls"] = self.downloader.stxt.txt.get("0.0", "end").strip()
 
@@ -519,6 +521,7 @@ def load(self):
             self.downloader.keep_vars.set(downloader["keep_vars"])
             self.downloader.clear.set(downloader["clear"])
             self.downloader.show_done.set(downloader["show_done"])
+            self.downloader.spoof.set(downloader["spoof"])
 
             txt = self.downloader.stxt.txt
             txt.delete("0.0", "end")
@@ -826,6 +829,8 @@ def __init__(self, parent):
         self.original.set(True)
         self.replace_https = tk.BooleanVar()
         self.replace_https.set(True)
+        self.spoof = tk.BooleanVar()
+        self.spoof.set(True)
 
         original = tk.Checkbutton(cframe, bg="white", text='Rename "web.archive.org"', var=self.original)  # pyright: ignore [reportCallIssue] # tkinter does have "var"
         original.pack(side="left")
@@ -835,17 +840,25 @@ def __init__(self, parent):
         clear.pack(side="left")
         show_done = tk.Checkbutton(cframe, bg="white", text='Notify When Done', var=self.show_done)  # pyright: ignore [reportCallIssue] # tkinter does have "var"
         show_done.pack(side="left", padx=5)
+        spoof = tk.Checkbutton(cframe, bg="white", text='Spoof Referrer', var=self.spoof)  # pyright: ignore [reportCallIssue] # tkinter does have "var"
+        spoof.pack(side="left")
 
         Tooltip(original, text="When checked, the downloader will put all urls downloaded from the web archive back into their original domains.")
         Tooltip(keep_vars, text="When checked, the downloader will append url vars present on links being downloaded to the end of the html file. This is only necessary when you have two links to the same webpage that generate different html due to the url vars.")
         Tooltip(clear, text="When checked, the downloader will clear any urls in the list when they are downloaded. Errored urls will remain in the list.")
         Tooltip(show_done, text="When checked, the downloader will show a message box when it is done downloading.")
+        Tooltip(spoof, text="When checked, the downloader will spoof the referrer of the urls to be the url itself.")
 
-        # Create panel for inputting urls to download
-        lbl = tk.Label(self, bg="white", text="  Put URLs to download in this box:")
+        # Panels
+        lbl = tk.Label(self, bg="white", text="Put URLs to download at the top and headers at the bottom.")
         lbl.pack(fill="x")
-        self.stxt = ScrolledText(self, width=10, height=10, wrap="none")
-        self.stxt.pack(expand=True, fill="both", padx=5, pady=5)
+        txts = tk.Frame(self, bg="white")
+        txts.pack(expand=True, fill="both", padx=5, pady=(0, 5))
+
+        self.stxt = ScrolledText(txts, width=10, height=10, wrap="none")
+        self.stxt.pack(side="top", expand=True, fill="both")
+        self.stxt_headers = ScrolledText(txts, width=10, height=10, wrap="none")
+        self.stxt_headers.pack(side="top", expand=False, fill="both")
 
     def folder(self):
         # For changing the output directory
@@ -856,10 +869,15 @@ def folder(self):
 
     def i_download(self):
         txt = self.stxt.txt
+        htxt = self.stxt_headers.txt
         try:
+            headers = {}
+            for key, value in [i.strip().split("=", 1) for i in htxt.get("0.0", "end").replace("\r\n", "\n").replace("\r", "\n").split("\n") if i.strip()]:
+                headers[key.strip()] = value.strip()
+
             links = [i.strip() for i in txt.get("0.0", "end").replace("\r\n", "\n").replace("\r", "\n").split("\n") if i.strip()]
             if links:
-                errs = fpclib.download_all(links, self.output.get() or "output", not self.original.get(), self.keep_vars.get(), True)
+                errs = fpclib.download_all(links, self.output.get() or "output", not self.original.get(), self.keep_vars.get(), True, spoof=self.spoof.get(), headers=headers)
                 if self.show_done.get():
                     if errs:
                         if len(errs) == len(links):
diff --git a/pyproject.toml b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "fpcurator"
-version = "1.7.0"
+version = "1.7.1"
 description = "fpcurator is a Python and fpclib powered tool for downloading urls, auto-generating curations, bulk searching for already curated games, and listing tags/platforms/games/animations for Flashpoint."
 authors = ["mathgeniuszach <huntingmanzach@gmail.com>"]
 readme = "README.md"
diff --git a/sites/c4399.py b/sites/c4399.py
@@ -0,0 +1,147 @@
+import fpclib
+import requests
+import re
+import bs4
+
+regex = '4399.com'
+ver = 7
+
+GAME_URL = re.compile(r"_\d+.htm$")
+IS_HTML5 = re.compile(r"var\s+isHTML5\s*=\s*(\d+)", re.IGNORECASE)
+SCREENSHOT = re.compile(r'var\s+\w+GamePic\s*=\s*"(.*?)"', re.IGNORECASE)
+GAMEPATH = re.compile(r'var\s+\w+GamePath\s*=\s*"(.*?)"', re.IGNORECASE)
+DIMS = re.compile(r'var\s+_w\s*=\s*(\d+);?\s*var\s+_h\s*=\s*(\d+)')
+
+HTML_EMBED = """<body>
+    <style>
+        body { background-color: #16202c; height: 100%%; margin: 0; }
+        iframe { position: absolute; top: 0; bottom: 0; left: 0; right: 0; margin: auto; }
+    </style>
+    <iframe width="%s" height="%s" src="%s"></iframe>
+</body>
+"""
+FLASH_EMBED = """<body>
+    <style>
+        body { background-color: #16202c; height: 100%%; margin: 0; }
+        object { position: absolute; top: 0; bottom: 0; left: 0; right: 0; margin: auto; }
+    </style>
+    <object type="application/x-shockwave-flash" width="%s" height="%s" data="%s">
+        <param name="allowscriptaccess" value="always">
+        <param name="allowfullscreen" value="true">
+        <param name="allowfullscreeninteractive" value="true">
+        <param name="allownetworking" value="all">
+        <param name="wmode" value="direct">
+    </object>
+</body>
+"""
+
+class c4399(fpclib.Curation):
+    def soupify(self):
+        # Correct URL if not on the actual game page
+        with requests.get(self.src) as resp:
+            soup = bs4.BeautifulSoup(resp.content, "html.parser")
+        if not GAME_URL.search(self.src):
+            self.src = "https://wwww.4399.com" + soup.select_one(".play > a")["href"]
+            with requests.get(self.src) as resp:
+                soup = bs4.BeautifulSoup(resp.content, "html.parser")
+        return soup
+
+    def parse(self, soup):
+        # Basic metadata
+        self.title = soup.select_one(".game-des > .name > a").text.strip()
+        self.date = soup.select_one(".game-des > .sorts.cf > em:last-of-type").text.strip()[3:]
+        self.lang = 'zh'
+        self.pub = "4399"
+
+        # Description transformation
+        box = soup.select_one("#playmethod > .box-l")
+        has_ptex = bool(box.select_one("#p-tex"))
+        desc = []
+        for tag in box.children:
+            # Skip random strings
+            if isinstance(tag, bs4.element.NavigableString): continue
+            # Grab header elements as is
+            if tag.name == "b": desc.append(tag.text.strip() + "\n")
+            # Grab content elements as is
+            if "content" in tag.get("class"): desc.append(tag.text.strip() + "\n" + "\n")
+
+            # Transform control information (but only if a direct description is not provided)
+            if tag.get("id") == "GameKey" and not has_ptex:
+                for ul in tag.children:
+                    # Skip random strings
+                    if isinstance(ul, bs4.element.NavigableString): continue
+                    # Loop over each list
+                    for li in ul.children:
+                        # Skip random strings
+                        if isinstance(ul, bs4.element.NavigableString): continue
+
+                        for elem in li.children:
+                            # Add text as is
+                            if isinstance(elem, bs4.element.NavigableString):
+                                desc.append(str(elem)+" ")
+                                continue
+
+                            # Only Span elements have their class-name translated to text
+                            if elem.name != "span": continue
+
+                            cs = elem.get("class")[0]
+                            if not cs: continue
+                            if cs.startswith("player"):
+                                desc.append("玩家" + cs[6:] + " ")
+                            elif cs == "ico_c_arrows":
+                                desc.append("Arrow Keys ")
+                            elif cs == "ico_c_wasd":
+                                desc.append("WASD ")
+                            elif cs.startswith("ico_c_"):
+                                desc.append(cs[6:].title() + " ")
+                            elif elem.text:
+                                desc.append(elem.text + " ")
+
+                    # After a list ends add a newline for the next list.
+                    desc.append("\n")
+
+        self.desc = ''.join(desc)
+
+        headtxt = str(soup.head)
+
+        # Screenshot
+        try:
+            self.ss = "https:" + SCREENSHOT.search(headtxt)[1].strip()
+        except:
+            fpclib.debug("Screenshot not found", 1, pre="[WARN] ")
+
+        # Platform detection (Flash, Unity, and HTML5)
+        try:
+            is_html = bool(int(IS_HTML5.search(headtxt)[1]))
+        except:
+            is_html = False
+        self.embed = fpclib.normalize(self.src, False)
+        self.cdn = "http://sda.4399.com/4399swf" + GAMEPATH.search(headtxt)[1]
+
+        dims = DIMS.search(headtxt)
+        self.dims = (dims[1], dims[2])
+
+        if is_html:
+            self.platform = "HTML5"
+            self.app = fpclib.FPNAVIGATOR
+            self.cmd = self.embed
+        elif self.cdn.endswith(".swf"):
+            self.platform = "Flash"
+            self.app = fpclib.FLASH
+            self.cmd = self.cdn
+            self.add_app("Embedded Page", self.embed, fpclib.FPNAVIGATOR)
+        else:
+            self.platform = "Unity"
+            self.app = fpclib.UNITY
+            self.cmd = self.embed
+
+    def get_files(self):
+        # Create embed file
+        if self.platform == "Flash":
+            html = FLASH_EMBED % (self.dims[0], self.dims[1], self.cdn)
+        else:
+            html = HTML_EMBED % (self.dims[0], self.dims[1], self.cdn)
+        fpclib.write(self.embed[self.embed.index("://")+3:], html)
+
+        # Download the game's true embedded file
+        fpclib.download_all((self.cdn,), spoof=True)
diff --git a/sites/defs.txt b/sites/defs.txt
@@ -1,4 +1,4 @@
-1725250543.8975272
+1725346764.5213957
 AddictingGames.py
 Construct.py
 CoolmathGames.py
@@ -15,4 +15,5 @@ Miniclip.py
 Newgrounds.py
 Therese.py
 Unknown.py
-Y8.py
+Y8.py
+c4399.py