From b9ba99e189d9273b7f7451e7046f021d240265ac Mon Sep 17 00:00:00 2001 From: VisoTC Date: Sat, 2 Jul 2022 20:35:54 +0800 Subject: [PATCH] Fixed Zimuku provider to bypass yunsuo protection --- libs/subliminal_patch/providers/zimuku.py | 65 +++++++++++++++++------ 1 file changed, 50 insertions(+), 15 deletions(-) diff --git a/libs/subliminal_patch/providers/zimuku.py b/libs/subliminal_patch/providers/zimuku.py index ec3fa0864..469160315 100644 --- a/libs/subliminal_patch/providers/zimuku.py +++ b/libs/subliminal_patch/providers/zimuku.py @@ -25,7 +25,7 @@ from subliminal.subtitle import ( SUBTITLE_EXTENSIONS, fix_line_ending - ) +) from subliminal_patch.subtitle import ( Subtitle, guess_matches @@ -88,7 +88,7 @@ class ZimukuProvider(Provider): logger.info(str(supported_languages)) server_url = "http://zimuku.org" - search_url = "/search?q={}" + search_url = "/search?q={}&vertoken={}" download_url = "http://zimuku.org/" subtitle_class = ZimukuSubtitle @@ -96,6 +96,39 @@ class ZimukuProvider(Provider): def __init__(self): self.session = None + def stringToHex(self, s): + val = "" + for i in s: + val += hex(ord(i))[2:] + return val + vertoken = "" + location_re = re.compile( + r'self\.location = "(.*)" \+ stringToHex\(screendate\)') + + def yunsuo_bypass(self, url, *args, **kwargs): + i = -1 + while True: + i += 1 + r = self.session.get(url, *args, **kwargs) + if(r.status_code == 404): + tr = self.location_re.findall(r.text) + self.session.cookies.set("srcurl", self.stringToHex(r.url)) + if(tr): + verify_resp = self.session.get( + self.server_url+tr[0]+self.stringToHex("1080,1920"), allow_redirects=False) + if(verify_resp.status_code == 302 and self.session.cookies.get("security_session_verify") != None): + pass + continue + if len(self.location_re.findall(r.text)) == 0: + if(r.headers.get("Content-Type") == "text/html; charset=utf-8"): + v = ParserBeautifulSoup( + r.content.decode("utf-8", "ignore"), ["html.parser"] + ).find( + "input", attrs={'name': 'vertoken'}) + if(v): + self.vertoken = v.get("value") + return r + def initialize(self): self.session = Session() self.session.headers["User-Agent"] = AGENT_LIST[randint(0, len(AGENT_LIST) - 1)] @@ -104,7 +137,7 @@ def terminate(self): self.session.close() def _parse_episode_page(self, link, year): - r = self.session.get(link) + r = self.yunsuo_bypass(link) bs_obj = ParserBeautifulSoup( r.content.decode("utf-8", "ignore"), ["html.parser"] ) @@ -122,16 +155,16 @@ def _parse_episode_page(self, link, year): if ( "china" in img.attrs["src"] and "hongkong" in img.attrs["src"] - ): + ): language = Language("zho").add(Language('zho', 'TW', None)) logger.debug("language:"+str(language)) - elif ( + elif ( "china" in img.attrs["src"] or "jollyroger" in img.attrs["src"] ): language = Language("zho") elif "hongkong" in img.attrs["src"]: - language = Language('zho', 'TW', None) + language = Language('zho', 'TW', None) break sub_page_link = urljoin(self.server_url, a.attrs["href"]) backup_session = copy.deepcopy(self.session) @@ -144,6 +177,8 @@ def _parse_episode_page(self, link, year): return subs def query(self, keyword, season=None, episode=None, year=None): + if self.vertoken == "": + self.yunsuo_bypass(self.server_url + '/') params = keyword if season: params += ".S{season:02d}".format(season=season) @@ -152,9 +187,9 @@ def query(self, keyword, season=None, episode=None, year=None): logger.debug("Searching subtitles %r", params) subtitles = [] - search_link = self.server_url + text_type(self.search_url).format(params) - - r = self.session.get(search_link, timeout=30) + search_link = self.server_url + text_type(self.search_url).format(params, self.vertoken) + + r = self.yunsuo_bypass(search_link, timeout=30) r.raise_for_status() if not r.content: @@ -169,7 +204,7 @@ def query(self, keyword, season=None, episode=None, year=None): while parts: parts.reverse() redirect_url = urljoin(self.server_url, "".join(parts)) - r = self.session.get(redirect_url, timeout=30) + r = self.query_resp(redirect_url, timeout=30) html = r.content.decode("utf-8", "ignore") parts = re.findall(pattern, html) logger.debug("search url located: " + redirect_url) @@ -238,14 +273,14 @@ def list_subtitles(self, video, languages): return subtitles def download_subtitle(self, subtitle): - def _get_archive_dowload_link(session, sub_page_link): - r = session.get(sub_page_link) + def _get_archive_dowload_link(yunsuopass, sub_page_link): + r = yunsuopass(sub_page_link) bs_obj = ParserBeautifulSoup( r.content.decode("utf-8", "ignore"), ["html.parser"] ) down_page_link = bs_obj.find("a", {"id": "down1"}).attrs["href"] down_page_link = urljoin(sub_page_link, down_page_link) - r = session.get(down_page_link) + r = yunsuopass(down_page_link) bs_obj = ParserBeautifulSoup( r.content.decode("utf-8", "ignore"), ["html.parser"] ) @@ -257,8 +292,8 @@ def _get_archive_dowload_link(session, sub_page_link): # download the subtitle logger.info("Downloading subtitle %r", subtitle) self.session = subtitle.session - download_link = _get_archive_dowload_link(self.session, subtitle.page_link) - r = self.session.get(download_link, headers={'Referer': subtitle.page_link}, timeout=30) + download_link = _get_archive_dowload_link(self.yunsuo_bypass, subtitle.page_link) + r = self.yunsuo_bypass(download_link, headers={'Referer': subtitle.page_link}, timeout=30) r.raise_for_status() try: filename = r.headers["Content-Disposition"]