Skip to content

Commit 488a2a1

Browse files
add url spoofing and support 4399
1 parent c489695 commit 488a2a1

File tree

4 files changed

+176
-10
lines changed

4 files changed

+176
-10
lines changed

fpcurator.py

Lines changed: 25 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -91,6 +91,7 @@ def toggle_console():
9191
<li><b>Keep URLVars</b> - When checked, the downloader will append url vars present on links being downloaded to the end of the html file. This is only necessary when you have two links to the same webpage that generate different html due to the url vars.</li>
9292
<li><b>Clear Done URLs</b> - When checked, the downloader will clear any urls in the list when they are downloaded. Errored urls will remain in the list.</li>
9393
<li><b>Notify When Done</b> - When checked, the downloader will show a message box when it is done downloading.</li>
94+
<li><b>Spoof Referrer</b> - When checked, the downloader will spoof the referrer of the urls to be the url itself.</li>
9495
</ul>
9596
Here are some basic usage steps:
9697
<ol>
@@ -225,8 +226,8 @@ def toggle_console():
225226
# This uuid uniquely defines fpcurator. (there is a 0 on the end after the text)
226227
UUID = '51be8a01-3307-4103-8913-c2f70e64d83'
227228

228-
TITLE = "fpcurator v1.7.0"
229-
ABOUT = "Created by Zach K - v1.7.0"
229+
TITLE = "fpcurator v1.7.1"
230+
ABOUT = "Created by Zach K - v1.7.1"
230231
VER = 7
231232

232233
SITES_FOLDER = "sites"
@@ -454,6 +455,7 @@ def save(self):
454455
downloader["keep_vars"] = self.downloader.keep_vars.get()
455456
downloader["clear"] = self.downloader.clear.get()
456457
downloader["show_done"] = self.downloader.show_done.get()
458+
downloader["spoof"] = self.downloader.spoof.get()
457459

458460
downloader["urls"] = self.downloader.stxt.txt.get("0.0", "end").strip()
459461

@@ -519,6 +521,7 @@ def load(self):
519521
self.downloader.keep_vars.set(downloader["keep_vars"])
520522
self.downloader.clear.set(downloader["clear"])
521523
self.downloader.show_done.set(downloader["show_done"])
524+
self.downloader.spoof.set(downloader["spoof"])
522525

523526
txt = self.downloader.stxt.txt
524527
txt.delete("0.0", "end")
@@ -826,6 +829,8 @@ def __init__(self, parent):
826829
self.original.set(True)
827830
self.replace_https = tk.BooleanVar()
828831
self.replace_https.set(True)
832+
self.spoof = tk.BooleanVar()
833+
self.spoof.set(True)
829834

830835
original = tk.Checkbutton(cframe, bg="white", text='Rename "web.archive.org"', var=self.original) # pyright: ignore [reportCallIssue] # tkinter does have "var"
831836
original.pack(side="left")
@@ -835,17 +840,25 @@ def __init__(self, parent):
835840
clear.pack(side="left")
836841
show_done = tk.Checkbutton(cframe, bg="white", text='Notify When Done', var=self.show_done) # pyright: ignore [reportCallIssue] # tkinter does have "var"
837842
show_done.pack(side="left", padx=5)
843+
spoof = tk.Checkbutton(cframe, bg="white", text='Spoof Referrer', var=self.spoof) # pyright: ignore [reportCallIssue] # tkinter does have "var"
844+
spoof.pack(side="left")
838845

839846
Tooltip(original, text="When checked, the downloader will put all urls downloaded from the web archive back into their original domains.")
840847
Tooltip(keep_vars, text="When checked, the downloader will append url vars present on links being downloaded to the end of the html file. This is only necessary when you have two links to the same webpage that generate different html due to the url vars.")
841848
Tooltip(clear, text="When checked, the downloader will clear any urls in the list when they are downloaded. Errored urls will remain in the list.")
842849
Tooltip(show_done, text="When checked, the downloader will show a message box when it is done downloading.")
850+
Tooltip(spoof, text="When checked, the downloader will spoof the referrer of the urls to be the url itself.")
843851

844-
# Create panel for inputting urls to download
845-
lbl = tk.Label(self, bg="white", text=" Put URLs to download in this box:")
852+
# Panels
853+
lbl = tk.Label(self, bg="white", text="Put URLs to download at the top and headers at the bottom.")
846854
lbl.pack(fill="x")
847-
self.stxt = ScrolledText(self, width=10, height=10, wrap="none")
848-
self.stxt.pack(expand=True, fill="both", padx=5, pady=5)
855+
txts = tk.Frame(self, bg="white")
856+
txts.pack(expand=True, fill="both", padx=5, pady=(0, 5))
857+
858+
self.stxt = ScrolledText(txts, width=10, height=10, wrap="none")
859+
self.stxt.pack(side="top", expand=True, fill="both")
860+
self.stxt_headers = ScrolledText(txts, width=10, height=10, wrap="none")
861+
self.stxt_headers.pack(side="top", expand=False, fill="both")
849862

850863
def folder(self):
851864
# For changing the output directory
@@ -856,10 +869,15 @@ def folder(self):
856869

857870
def i_download(self):
858871
txt = self.stxt.txt
872+
htxt = self.stxt_headers.txt
859873
try:
874+
headers = {}
875+
for key, value in [i.strip().split("=", 1) for i in htxt.get("0.0", "end").replace("\r\n", "\n").replace("\r", "\n").split("\n") if i.strip()]:
876+
headers[key.strip()] = value.strip()
877+
860878
links = [i.strip() for i in txt.get("0.0", "end").replace("\r\n", "\n").replace("\r", "\n").split("\n") if i.strip()]
861879
if links:
862-
errs = fpclib.download_all(links, self.output.get() or "output", not self.original.get(), self.keep_vars.get(), True)
880+
errs = fpclib.download_all(links, self.output.get() or "output", not self.original.get(), self.keep_vars.get(), True, spoof=self.spoof.get(), headers=headers)
863881
if self.show_done.get():
864882
if errs:
865883
if len(errs) == len(links):

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[tool.poetry]
22
name = "fpcurator"
3-
version = "1.7.0"
3+
version = "1.7.1"
44
description = "fpcurator is a Python and fpclib powered tool for downloading urls, auto-generating curations, bulk searching for already curated games, and listing tags/platforms/games/animations for Flashpoint."
55
authors = ["mathgeniuszach <[email protected]>"]
66
readme = "README.md"

sites/c4399.py

Lines changed: 147 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,147 @@
1+
import fpclib
2+
import requests
3+
import re
4+
import bs4
5+
6+
regex = '4399.com'
7+
ver = 7
8+
9+
GAME_URL = re.compile(r"_\d+.htm$")
10+
IS_HTML5 = re.compile(r"var\s+isHTML5\s*=\s*(\d+)", re.IGNORECASE)
11+
SCREENSHOT = re.compile(r'var\s+\w+GamePic\s*=\s*"(.*?)"', re.IGNORECASE)
12+
GAMEPATH = re.compile(r'var\s+\w+GamePath\s*=\s*"(.*?)"', re.IGNORECASE)
13+
DIMS = re.compile(r'var\s+_w\s*=\s*(\d+);?\s*var\s+_h\s*=\s*(\d+)')
14+
15+
HTML_EMBED = """<body>
16+
<style>
17+
body { background-color: #16202c; height: 100%%; margin: 0; }
18+
iframe { position: absolute; top: 0; bottom: 0; left: 0; right: 0; margin: auto; }
19+
</style>
20+
<iframe width="%s" height="%s" src="%s"></iframe>
21+
</body>
22+
"""
23+
FLASH_EMBED = """<body>
24+
<style>
25+
body { background-color: #16202c; height: 100%%; margin: 0; }
26+
object { position: absolute; top: 0; bottom: 0; left: 0; right: 0; margin: auto; }
27+
</style>
28+
<object type="application/x-shockwave-flash" width="%s" height="%s" data="%s">
29+
<param name="allowscriptaccess" value="always">
30+
<param name="allowfullscreen" value="true">
31+
<param name="allowfullscreeninteractive" value="true">
32+
<param name="allownetworking" value="all">
33+
<param name="wmode" value="direct">
34+
</object>
35+
</body>
36+
"""
37+
38+
class c4399(fpclib.Curation):
39+
def soupify(self):
40+
# Correct URL if not on the actual game page
41+
with requests.get(self.src) as resp:
42+
soup = bs4.BeautifulSoup(resp.content, "html.parser")
43+
if not GAME_URL.search(self.src):
44+
self.src = "https://wwww.4399.com" + soup.select_one(".play > a")["href"]
45+
with requests.get(self.src) as resp:
46+
soup = bs4.BeautifulSoup(resp.content, "html.parser")
47+
return soup
48+
49+
def parse(self, soup):
50+
# Basic metadata
51+
self.title = soup.select_one(".game-des > .name > a").text.strip()
52+
self.date = soup.select_one(".game-des > .sorts.cf > em:last-of-type").text.strip()[3:]
53+
self.lang = 'zh'
54+
self.pub = "4399"
55+
56+
# Description transformation
57+
box = soup.select_one("#playmethod > .box-l")
58+
has_ptex = bool(box.select_one("#p-tex"))
59+
desc = []
60+
for tag in box.children:
61+
# Skip random strings
62+
if isinstance(tag, bs4.element.NavigableString): continue
63+
# Grab header elements as is
64+
if tag.name == "b": desc.append(tag.text.strip() + "\n")
65+
# Grab content elements as is
66+
if "content" in tag.get("class"): desc.append(tag.text.strip() + "\n" + "\n")
67+
68+
# Transform control information (but only if a direct description is not provided)
69+
if tag.get("id") == "GameKey" and not has_ptex:
70+
for ul in tag.children:
71+
# Skip random strings
72+
if isinstance(ul, bs4.element.NavigableString): continue
73+
# Loop over each list
74+
for li in ul.children:
75+
# Skip random strings
76+
if isinstance(ul, bs4.element.NavigableString): continue
77+
78+
for elem in li.children:
79+
# Add text as is
80+
if isinstance(elem, bs4.element.NavigableString):
81+
desc.append(str(elem)+" ")
82+
continue
83+
84+
# Only Span elements have their class-name translated to text
85+
if elem.name != "span": continue
86+
87+
cs = elem.get("class")[0]
88+
if not cs: continue
89+
if cs.startswith("player"):
90+
desc.append("玩家" + cs[6:] + " ")
91+
elif cs == "ico_c_arrows":
92+
desc.append("Arrow Keys ")
93+
elif cs == "ico_c_wasd":
94+
desc.append("WASD ")
95+
elif cs.startswith("ico_c_"):
96+
desc.append(cs[6:].title() + " ")
97+
elif elem.text:
98+
desc.append(elem.text + " ")
99+
100+
# After a list ends add a newline for the next list.
101+
desc.append("\n")
102+
103+
self.desc = ''.join(desc)
104+
105+
headtxt = str(soup.head)
106+
107+
# Screenshot
108+
try:
109+
self.ss = "https:" + SCREENSHOT.search(headtxt)[1].strip()
110+
except:
111+
fpclib.debug("Screenshot not found", 1, pre="[WARN] ")
112+
113+
# Platform detection (Flash, Unity, and HTML5)
114+
try:
115+
is_html = bool(int(IS_HTML5.search(headtxt)[1]))
116+
except:
117+
is_html = False
118+
self.embed = fpclib.normalize(self.src, False)
119+
self.cdn = "http://sda.4399.com/4399swf" + GAMEPATH.search(headtxt)[1]
120+
121+
dims = DIMS.search(headtxt)
122+
self.dims = (dims[1], dims[2])
123+
124+
if is_html:
125+
self.platform = "HTML5"
126+
self.app = fpclib.FPNAVIGATOR
127+
self.cmd = self.embed
128+
elif self.cdn.endswith(".swf"):
129+
self.platform = "Flash"
130+
self.app = fpclib.FLASH
131+
self.cmd = self.cdn
132+
self.add_app("Embedded Page", self.embed, fpclib.FPNAVIGATOR)
133+
else:
134+
self.platform = "Unity"
135+
self.app = fpclib.UNITY
136+
self.cmd = self.embed
137+
138+
def get_files(self):
139+
# Create embed file
140+
if self.platform == "Flash":
141+
html = FLASH_EMBED % (self.dims[0], self.dims[1], self.cdn)
142+
else:
143+
html = HTML_EMBED % (self.dims[0], self.dims[1], self.cdn)
144+
fpclib.write(self.embed[self.embed.index("://")+3:], html)
145+
146+
# Download the game's true embedded file
147+
fpclib.download_all((self.cdn,), spoof=True)

sites/defs.txt

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
1725250543.8975272
1+
1725346764.5213957
22
AddictingGames.py
33
Construct.py
44
CoolmathGames.py
@@ -15,4 +15,5 @@ Miniclip.py
1515
Newgrounds.py
1616
Therese.py
1717
Unknown.py
18-
Y8.py
18+
Y8.py
19+
c4399.py

0 commit comments

Comments
 (0)