From 41fe0ece85f54a71b3e902e3da401f4f3fdf3c03 Mon Sep 17 00:00:00 2001 From: eight Date: Sun, 24 Dec 2023 16:18:01 +0800 Subject: [PATCH] Fix: seemh server detection (#362) * Fix: seemh server detection * Fix: support tw --- comiccrawler/mods/seemh.py | 6 ++++-- comiccrawler/util.py | 41 +++++++++++++++++++++++++++++++++----- 2 files changed, 40 insertions(+), 7 deletions(-) diff --git a/comiccrawler/mods/seemh.py b/comiccrawler/mods/seemh.py index d828f96..06c8f9d 100644 --- a/comiccrawler/mods/seemh.py +++ b/comiccrawler/mods/seemh.py @@ -13,6 +13,7 @@ from lzstring import LZString from ..core import Episode, grabhtml +from ..util import balance domain = ["seemh.com", "ikanman.com", "manhuagui.com", "www.mhgui.com"] name = "看漫畫" @@ -107,8 +108,9 @@ def get_images(html, url): corejs = grabhtml(urljoin(url, corejs_url), referer=url) # cache server list - servs = re.search(r"var servs=(.+)", configjs).group(1) - servs = eval(servs) + m = re.search(r"自动|自動", configjs) + s = balance(configjs, m.start(), "[", "]") + servs = eval(s) servs = [host["h"] for category in servs for host in category["hosts"]] global servers diff --git a/comiccrawler/util.py b/comiccrawler/util.py index 5686460..31db32f 100644 --- a/comiccrawler/util.py +++ b/comiccrawler/util.py @@ -33,15 +33,15 @@ def create_safefilepath_table(): ":": ":", "\"": """, "*": "*" - }) + }) table.update({ c: None for c in set(chr(i) for i in range(128)).difference(string.printable) - }) + }) table.update({ chr(i): " " for i in range(32) if chr(i) not in table - }) + }) return str.maketrans(table) - + safefilepath_table = create_safefilepath_table() dot_table = str.maketrans({".": "."}) @@ -71,8 +71,39 @@ def clean_tags(html): class MinimumAny: def __le__(self, other): return True - + def __eq__(self, other): return self is other MIN = MinimumAny() + +def balance(s: str, index: int, left="(", right=")", skip=0): + """Return the string inside (including) matched left and right brackets.""" + # backward search + count = 0 + for i in range(index, -1, -1): + if s[i] == right: + count += 1 + elif s[i] == left: + if count == -skip: + break + count -= 1 + else: + raise ValueError(f"Unbalanced brackets: {s}") + start = i + + # forward search + count = 0 + for j in range(index, len(s)): + if s[j] == left: + count += 1 + elif s[j] == right: + if count == -skip: + break + count -= 1 + else: + raise ValueError(f"Unbalanced brackets: {s}") + end = j + 1 + + return s[start:end] +