From 628ddd51912ade4c57faf9a7df63e2041d376c24 Mon Sep 17 00:00:00 2001 From: miro Date: Tue, 31 Dec 2024 04:48:27 +0000 Subject: [PATCH] improve skipping of disambiguation pages --- __init__.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/__init__.py b/__init__.py index d3fa6cf..3fac0f3 100755 --- a/__init__.py +++ b/__init__.py @@ -73,12 +73,15 @@ def get_page_data(self, pid: str, lang: str): response = requests.get(url, timeout=5).json() page = response["query"]["pages"][pid] summary = rm_parentheses(page.get("extract", "")) + if "commonly refers to:" in summary: + return None, None, None # disambiguation list page img = None if "thumbnail" in page: thumbnail = page["thumbnail"]["source"] parts = thumbnail.split("/")[:-1] img = "/".join(part for part in parts if part != "thumb") ans = flatten_list([sentence_tokenize(s) for s in summary.split("\n")]) + return page["title"], ans, img except Exception as e: LOG.error(f"Error fetching page data for PID {pid}: {e}") @@ -379,6 +382,7 @@ def stop_session(self, sess): print(s.wiki.get_spoken_answer("venus", "en")) print(s.wiki.get_spoken_answer("elon musk", "en")) + print(s.wiki.get_spoken_answer("mercury", "en")) exit() # full answer