From 1e378823aadda6179079950e8fff56eb0d534da3 Mon Sep 17 00:00:00 2001 From: eight Date: Mon, 22 Jan 2024 16:32:32 +0800 Subject: [PATCH] Fix: some posts are missing in twitter (#371) --- comiccrawler/mods/twitter.py | 23 +++++++++++++++++++---- 1 file changed, 19 insertions(+), 4 deletions(-) diff --git a/comiccrawler/mods/twitter.py b/comiccrawler/mods/twitter.py index 4a46aeb..8b1804a 100644 --- a/comiccrawler/mods/twitter.py +++ b/comiccrawler/mods/twitter.py @@ -79,19 +79,34 @@ def get_episodes(html, url): def tweet_result_to_episode(tweet_result): try: - all_media = tweet_result["legacy"]["entities"]["media"] + tweet_result["legacy"]["extended_entities"]["media"] + legacy = tweet_result["legacy"] + except KeyError: + legacy = tweet_result["tweet"]["legacy"] + + try: + all_media = legacy["entities"]["media"] + legacy["extended_entities"]["media"] except KeyError: return None imgs = [find_media_source(m) for m in all_media] imgs = list(OrderedDict.fromkeys(imgs).keys()) # remove dup result = None + try: result = tweet_result["legacy"]["retweeted_status_result"]["result"] except KeyError: result = tweet_result - - screen_name = result["core"]["user_results"]["result"]["legacy"]["screen_name"] - id_str = result["legacy"]["id_str"] + try: + core = result["core"] + except KeyError: + core = result["tweet"]["core"] + screen_name = core["user_results"]["result"]["legacy"]["screen_name"] + + try: + legacy = result["legacy"] + except KeyError: + legacy = result["tweet"]["legacy"] + id_str = legacy["id_str"] + ep_url = f"https://twitter.com/{screen_name}/status/{id_str}" return Episode(