From ac5bc2f915b9fef5e76acadcf3080e69d35bdb57 Mon Sep 17 00:00:00 2001 From: miro Date: Tue, 31 Dec 2024 02:17:16 +0000 Subject: [PATCH 1/7] refactor: new common query decorators companion to https://github.com/OpenVoiceOS/OVOS-workshop/pull/315 --- __init__.py | 51 ++++++++++++++++++++++----------------------------- 1 file changed, 22 insertions(+), 29 deletions(-) diff --git a/__init__.py b/__init__.py index f47e79c..f9ac401 100755 --- a/__init__.py +++ b/__init__.py @@ -11,8 +11,7 @@ # limitations under the License. import os.path import re -from os.path import dirname, join -from typing import Optional +from typing import Optional, Tuple import requests from ovos_bus_client.session import SessionManager, Session @@ -22,9 +21,9 @@ from ovos_utils.gui import can_use_gui from ovos_utils.log import LOG from ovos_utils.process_utils import RuntimeRequirements -from ovos_workshop.decorators import intent_handler +from ovos_workshop.decorators import intent_handler, common_query from ovos_workshop.intents import IntentBuilder -from ovos_workshop.skills.common_query_skill import CommonQuerySkill, CQSMatchLevel +from ovos_workshop.skills.ovos import OVOSSkill from padacioso import IntentContainer from padacioso.bracket_expansion import expand_parentheses from quebra_frases import sentence_tokenize @@ -135,7 +134,7 @@ def get_expanded_answer(self, query: str, return steps -class WikipediaSkill(CommonQuerySkill): +class WikipediaSkill(OVOSSkill): def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) self.session_results = {} @@ -221,9 +220,21 @@ def handle_tell_more(self, message): self.speak_result(sess) # common query - def CQS_match_query_phrase(self, phrase): + def cq_callback(self, utterance: str, answer: str, lang: str): + """ If selected show gui """ sess = SessionManager.get() - query = self.wiki.extract_keyword(phrase, lang=sess.lang) + self.display_ddg(sess) + if sess.session_id in self.session_results: + self.display_wiki_entry() + else: + LOG.error(f"{sess.session_id} not in " + f"{list(self.session_results.keys())}") + self.set_context("WikiKnows", utterance) + + @common_query(callback=cq_callback) + def match_common_query(self, phrase: str, lang: str) -> Tuple[str, float]: + sess = SessionManager.get() + query = self.wiki.extract_keyword(phrase, lang=lang) if not query: # doesnt look like a question we can answer at all return None @@ -232,7 +243,7 @@ def CQS_match_query_phrase(self, phrase): "query": query, "results": [], "idx": 0, - "lang": sess.lang, + "lang": lang, "title": phrase, "image": None } @@ -241,26 +252,7 @@ def CQS_match_query_phrase(self, phrase): self.log.info(f"Wikipedia answer: {summary}") self.session_results[sess.session_id]["idx"] += 1 # spoken by common query self.session_results[sess.session_id]["title"] = title or phrase - return ( - phrase, - CQSMatchLevel.GENERAL, - summary, {"query": phrase, - "image": self.session_results[sess.session_id].get("image"), - "title": title, - "answer": summary}, - ) - - def CQS_action(self, phrase, data): - """If selected show gui""" - - sess = SessionManager.get() - if sess.session_id in self.session_results: - self.display_wiki_entry() - else: - LOG.error(f"{sess.session_id} not in " - f"{list(self.session_results.keys())}") - - self.set_context("WikiKnows", data.get("title") or phrase) + return summary, 0.6 # wikipedia def ask_the_wiki(self, sess: Session): @@ -329,7 +321,7 @@ def stop_session(self, sess): from ovos_utils.fakebus import FakeBus s = WikipediaSkill(bus=FakeBus(), skill_id="wiki.skill") - print(s.CQS_match_query_phrase("quem é Elon Musk")) + print(s.match_common_query("quem é Elon Musk", "pt")) # ('who is Elon Musk', , 'The Musk family is a wealthy family of South African origin that is largely active in the United States and Canada.', # {'query': 'who is Elon Musk', 'image': None, 'title': 'Musk Family', # 'answer': 'The Musk family is a wealthy family of South African origin that is largely active in the United States and Canada.'}) @@ -337,6 +329,7 @@ def stop_session(self, sess): d = WikipediaSolver() query = "who is Isaac Newton" + print(d.extract_keyword(query, "en-us")) assert d.extract_keyword(query, "en-us") == "Isaac Newton" # full answer From 0ccf788024ae54036d29dedf7f61312fb58f9674 Mon Sep 17 00:00:00 2001 From: miro Date: Tue, 31 Dec 2024 03:20:38 +0000 Subject: [PATCH 2/7] fix: locale lang folder matching + improve disambiguation --- __init__.py | 72 +++++++++++++++++++++++++++++++++++++---------------- 1 file changed, 50 insertions(+), 22 deletions(-) diff --git a/__init__.py b/__init__.py index f9ac401..9033bc1 100755 --- a/__init__.py +++ b/__init__.py @@ -11,13 +11,14 @@ # limitations under the License. import os.path import re -from typing import Optional, Tuple +from typing import Optional, Tuple, List import requests +from langcodes import closest_supported_match from ovos_bus_client.session import SessionManager, Session from ovos_plugin_manager.templates.solvers import QuestionSolver -from ovos_utils import classproperty -from ovos_utils import flatten_list +from ovos_utils import classproperty, flatten_list +from ovos_utils.bracket_expansion import expand_template from ovos_utils.gui import can_use_gui from ovos_utils.log import LOG from ovos_utils.process_utils import RuntimeRequirements @@ -25,16 +26,18 @@ from ovos_workshop.intents import IntentBuilder from ovos_workshop.skills.ovos import OVOSSkill from padacioso import IntentContainer -from padacioso.bracket_expansion import expand_parentheses from quebra_frases import sentence_tokenize -def rm_parentheses(text: str): +def rm_parentheses(text: str) -> str: """helper to remove the text between paranthesis in a wikipedia summary, makes the text more natural and speakable""" return re.sub(r"\((.*?)\)", "", text).replace(" ", " ") +WikiMatches = List[Tuple[str, List[str], Optional[str]]] # (title, sentences, img_url) + + class WikipediaSolver(QuestionSolver): priority = 40 enable_tx = False @@ -81,14 +84,18 @@ def get_data(self, query: str, LOG.debug(f"WikiSolver Fallback, new query: {q2}") return self.get_data(q2, lang=lang, units=units) + LOG.debug(f"matched {len(res)} wikipedia pages") + summaries: WikiMatches = [] for r in res: title = r["title"] + if "(disambiguation)" in title: + continue pid = str(r["pageid"]) results_url = f"https://{lang}.wikipedia.org/w/api.php?format=json&action=query&prop=extracts|pageimages&exintro&explaintext&redirects=1&pageids=" + pid r = requests.get(results_url).json() summary = r['query']['pages'][pid]['extract'] - img = None + img: Optional[str] = None if "thumbnail" in r['query']['pages'][pid]: thumbnail = r['query']['pages'][pid]['thumbnail']['source'] parts = thumbnail.split("/")[:-1] @@ -97,8 +104,24 @@ def get_data(self, query: str, summary = rm_parentheses(summary) # normalize to make more speakable - ans = flatten_list([sentence_tokenize(s) for s in summary.split("\n")]) - return {"title": title, "short_answer": ans[0], "summary": summary, "img": img} + ans: List[str] = flatten_list([sentence_tokenize(s) for s in summary.split("\n")]) + summaries.append((title, ans, img)) + + if summaries: + if len(summaries) == 1: + return {"title": summaries[0][0], + "short_answer": summaries[0][1][0], + "summary": "\n".join(summaries[0][1]), + "img": summaries[0][-1]} + + final_ans = "\n".join([sentences[0] for title, sentences, _ in summaries[:3]]) + final_sum = "\n\n".join([title + " - " + ".\n".join(sents) + for title, sents, img in summaries]) + return {"title": query, + "short_answer": final_ans, + "summary": final_sum, + "img": summaries[0][-1]} + return {} def get_spoken_answer(self, query: str, @@ -143,20 +166,24 @@ def __init__(self, *args, **kwargs): def register_kw_xtract(self): """internal padacioso intents for kw extraction""" + supported = os.listdir(f"{self.root_dir}/locale") for lang in self.native_langs: - filename = f"{self.root_dir}/locale/{lang}/query.intent" + + lang2 = closest_supported_match(lang, supported, 10) + if not lang2: + LOG.warning(f"'{self.root_dir}/locale/{lang}' directory not found! wikipedia will be disabled for '{lang}'") + continue + + filename = f"{self.root_dir}/locale/{lang2}/query.intent" if not os.path.isfile(filename): - LOG.warning(f"{filename} not found! wikipedia common QA will be disabled for '{lang}'") + LOG.warning(f"{filename} not found! wikipedia will be disabled for '{lang}'") continue samples = [] with open(filename) as f: for l in f.read().split("\n"): if not l.strip() or l.startswith("#"): continue - if "(" in l: - samples += expand_parentheses(l) - else: - samples.append(l) + samples += expand_template(l) self.wiki.register_kw_extractors(samples, lang=lang) @classproperty @@ -320,20 +347,21 @@ def stop_session(self, sess): if __name__ == "__main__": from ovos_utils.fakebus import FakeBus + # print(WikipediaSolver().get_spoken_answer("venus", "en")) + # print(WikipediaSolver().get_spoken_answer("elon musk", "en")) + s = WikipediaSkill(bus=FakeBus(), skill_id="wiki.skill") print(s.match_common_query("quem é Elon Musk", "pt")) # ('who is Elon Musk', , 'The Musk family is a wealthy family of South African origin that is largely active in the United States and Canada.', # {'query': 'who is Elon Musk', 'image': None, 'title': 'Musk Family', # 'answer': 'The Musk family is a wealthy family of South African origin that is largely active in the United States and Canada.'}) - d = WikipediaSolver() - query = "who is Isaac Newton" - print(d.extract_keyword(query, "en-us")) - assert d.extract_keyword(query, "en-us") == "Isaac Newton" + print(s.wiki.extract_keyword(query, "en-us")) + assert s.wiki.extract_keyword(query, "en-us") == "Isaac Newton" # full answer - ans = d.spoken_answer(query) + ans = s.wiki.spoken_answer(query) print(ans) # Sir Isaac Newton (25 December 1642 – 20 March 1726/27) was an English mathematician, physicist, astronomer, alchemist, theologian, and author (described in his time as a "natural philosopher") widely recognised as one of the greatest mathematicians and physicists of all time and among the most influential scientists. He was a key figure in the philosophical revolution known as the Enlightenment. His book Philosophiæ Naturalis Principia Mathematica (Mathematical Principles of Natural Philosophy), first published in 1687, established classical mechanics. Newton also made seminal contributions to optics, and shares credit with German mathematician Gottfried Wilhelm Leibniz for developing infinitesimal calculus. # In the Principia, Newton formulated the laws of motion and universal gravitation that formed the dominant scientific viewpoint until it was superseded by the theory of relativity. Newton used his mathematical description of gravity to derive Kepler's laws of planetary motion, account for tides, the trajectories of comets, the precession of the equinoxes and other phenomena, eradicating doubt about the Solar System's heliocentricity. He demonstrated that the motion of objects on Earth and celestial bodies could be accounted for by the same principles. Newton's inference that the Earth is an oblate spheroid was later confirmed by the geodetic measurements of Maupertuis, La Condamine, and others, convincing most European scientists of the superiority of Newtonian mechanics over earlier systems. @@ -342,7 +370,7 @@ def stop_session(self, sess): query = "venus" # chunked answer, "tell me more" - for sentence in d.long_answer(query): + for sentence in s.wiki.long_answer(query): print(sentence["title"]) print(sentence["summary"]) print(sentence.get("img")) @@ -408,8 +436,8 @@ def stop_session(self, sess): # lang support query = "Quem é Isaac Newton" - sentence = d.spoken_answer(query, context={"lang": "pt"}) - assert d.extract_keyword(query, "pt") == "Isaac Newton" + sentence = s.wiki.spoken_answer(query, context={"lang": "pt"}) + assert s.wiki.extract_keyword(query, "pt") == "Isaac Newton" print(sentence) # Sir Isaac Newton (25 de dezembro de 1642 - 20 de março de 1726/27) foi um matemático, físico, astrônomo, alquimista, teólogo e autor (descrito em seu tempo como um "filósofo natural") amplamente reconhecido como um dos maiores matemáticos e físicos de todos os tempos e entre os cientistas mais influentes. Ele era uma figura chave na revolução filosófica conhecida como Iluminismo. Seu livro Philosophiæ Naturalis Principia Mathematica (Princípios matemáticos da Filosofia Natural), publicado pela primeira vez em 1687, estabeleceu a mecânica clássica. Newton também fez contribuições seminais para a óptica, e compartilha crédito com o matemático alemão Gottfried Wilhelm Leibniz para desenvolver cálculo infinitesimal. # No Principia, Newton formulou as leis do movimento e da gravitação universal que formaram o ponto de vista científico dominante até ser superado pela teoria da relatividade. Newton usou sua descrição matemática da gravidade para derivar as leis de Kepler do movimento planetário, conta para as marés, as trajetórias dos cometas, a precessão dos equinócios e outros fenômenos, erradicando dúvidas sobre a heliocentricidade do Sistema Solar. Ele demonstrou que o movimento de objetos na Terra e corpos celestes poderia ser contabilizado pelos mesmos princípios. A inferência de Newton de que a Terra é um esferóide oblate foi mais tarde confirmada pelas medidas geodésicas de Maupertuis, La Condamine, e outros, convencendo a maioria dos cientistas europeus da superioridade da mecânica newtoniana sobre sistemas anteriores. From d16b32597693aba3b1dfff4dea821ff73d78f0a7 Mon Sep 17 00:00:00 2001 From: miro Date: Tue, 31 Dec 2024 03:38:08 +0000 Subject: [PATCH 3/7] speed up --- __init__.py | 118 ++++++++++++++++++++++++++++++---------------------- 1 file changed, 69 insertions(+), 49 deletions(-) diff --git a/__init__.py b/__init__.py index 9033bc1..8a13cd9 100755 --- a/__init__.py +++ b/__init__.py @@ -9,9 +9,10 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +import concurrent.futures import os.path import re -from typing import Optional, Tuple, List +from typing import Optional, Tuple import requests from langcodes import closest_supported_match @@ -35,15 +36,12 @@ def rm_parentheses(text: str) -> str: return re.sub(r"\((.*?)\)", "", text).replace(" ", " ") -WikiMatches = List[Tuple[str, List[str], Optional[str]]] # (title, sentences, img_url) - - class WikipediaSolver(QuestionSolver): priority = 40 enable_tx = False kw_matchers = {} - # utils to extract keyword from text + # Utils to extract keywords from text @classmethod def register_kw_extractors(cls, samples: list, lang: str): lang = lang.split("-")[0] @@ -65,54 +63,75 @@ def extract_keyword(cls, utterance: str, lang: str): LOG.debug(f"Could not extract search keyword for '{lang}' from '{utterance}'") return kw - # abstract Solver methods to implement - def get_data(self, query: str, - lang: Optional[str] = None, - units: Optional[str] = None): - """ - query assured to be in self.default_lang - return a dict response - """ - LOG.debug(f"WikiSolver query: {query}") - lang = lang or self.default_lang - lang = lang.split("-")[0] - url = f"https://{lang}.wikipedia.org/w/api.php?action=query&list=search&srsearch={query}&format=json" - res = requests.get(url).json()["query"]["search"] - if not res: - q2 = self.extract_keyword(query, lang) - if q2 and q2 != query: - LOG.debug(f"WikiSolver Fallback, new query: {q2}") - return self.get_data(q2, lang=lang, units=units) - - LOG.debug(f"matched {len(res)} wikipedia pages") - summaries: WikiMatches = [] - for r in res: - title = r["title"] - if "(disambiguation)" in title: - continue - pid = str(r["pageid"]) - results_url = f"https://{lang}.wikipedia.org/w/api.php?format=json&action=query&prop=extracts|pageimages&exintro&explaintext&redirects=1&pageids=" + pid - r = requests.get(results_url).json() - - summary = r['query']['pages'][pid]['extract'] - img: Optional[str] = None - if "thumbnail" in r['query']['pages'][pid]: - thumbnail = r['query']['pages'][pid]['thumbnail']['source'] + def get_page_data(self, pid: str, lang: str): + """Fetch detailed data for a single Wikipedia page.""" + url = ( + f"https://{lang}.wikipedia.org/w/api.php?format=json&action=query&" + f"prop=extracts|pageimages&exintro&explaintext&redirects=1&pageids={pid}" + ) + try: + response = requests.get(url, timeout=5).json() + page = response["query"]["pages"][pid] + summary = rm_parentheses(page.get("extract", "")) + img = None + if "thumbnail" in page: + thumbnail = page["thumbnail"]["source"] parts = thumbnail.split("/")[:-1] - img = '/'.join((part for part in parts if part != 'thumb')) - LOG.debug(f"Found image: {img}") - - summary = rm_parentheses(summary) # normalize to make more speakable + img = "/".join(part for part in parts if part != "thumb") + ans = flatten_list([sentence_tokenize(s) for s in summary.split("\n")]) + return page["title"], ans, img + except Exception as e: + LOG.error(f"Error fetching page data for PID {pid}: {e}") + return None, None, None + + def get_data(self, query: str, lang: Optional[str] = None, units: Optional[str] = None): + """Fetch Wikipedia search results and detailed data concurrently.""" + LOG.debug(f"WikiSolver query: {query}") + lang = (lang or self.default_lang).split("-")[0] + search_url = ( + f"https://{lang}.wikipedia.org/w/api.php?action=query&list=search&" + f"srsearch={query}&format=json" + ) - ans: List[str] = flatten_list([sentence_tokenize(s) for s in summary.split("\n")]) - summaries.append((title, ans, img)) + try: + search_results = requests.get(search_url, timeout=5).json().get("query", {}).get("search", []) + except Exception as e: + LOG.error(f"Error fetching search results: {e}") + search_results = [] + + if not search_results: + fallback_query = self.extract_keyword(query, lang) + if fallback_query and fallback_query != query: + LOG.debug(f"WikiSolver Fallback, new query: {fallback_query}") + return self.get_data(fallback_query, lang=lang, units=units) + return {} + + LOG.debug(f"Matched {len(search_results)} Wikipedia pages") + + # Prepare for parallel fetch and maintain original order + summaries = [None] * len(search_results) # List to hold results in original order + with concurrent.futures.ThreadPoolExecutor() as executor: + future_to_idx = { + executor.submit(self.get_page_data, str(r["pageid"]), lang): idx + for idx, r in enumerate(search_results) + if "(disambiguation)" not in r["title"] + } + + for future in concurrent.futures.as_completed(future_to_idx): + idx = future_to_idx[future] # Get original index from future + title, ans, img = future.result() + if title and ans: + summaries[idx] = (title, ans, img) + + # Filter out None entries and sort based on original order + summaries = [entry for entry in summaries if entry is not None] if summaries: if len(summaries) == 1: return {"title": summaries[0][0], "short_answer": summaries[0][1][0], "summary": "\n".join(summaries[0][1]), - "img": summaries[0][-1]} + "img": summaries[0][2]} final_ans = "\n".join([sentences[0] for title, sentences, _ in summaries[:3]]) final_sum = "\n\n".join([title + " - " + ".\n".join(sents) @@ -120,7 +139,7 @@ def get_data(self, query: str, return {"title": query, "short_answer": final_ans, "summary": final_sum, - "img": summaries[0][-1]} + "img": summaries[0][2]} return {} @@ -347,9 +366,6 @@ def stop_session(self, sess): if __name__ == "__main__": from ovos_utils.fakebus import FakeBus - # print(WikipediaSolver().get_spoken_answer("venus", "en")) - # print(WikipediaSolver().get_spoken_answer("elon musk", "en")) - s = WikipediaSkill(bus=FakeBus(), skill_id="wiki.skill") print(s.match_common_query("quem é Elon Musk", "pt")) # ('who is Elon Musk', , 'The Musk family is a wealthy family of South African origin that is largely active in the United States and Canada.', @@ -360,6 +376,10 @@ def stop_session(self, sess): print(s.wiki.extract_keyword(query, "en-us")) assert s.wiki.extract_keyword(query, "en-us") == "Isaac Newton" + print(s.wiki.get_spoken_answer("venus", "en")) + print(s.wiki.get_spoken_answer("elon musk", "en")) + + exit() # full answer ans = s.wiki.spoken_answer(query) print(ans) From 599f452aad3a7532d334ea7f7f601f5d83d0c848 Mon Sep 17 00:00:00 2001 From: miro Date: Tue, 31 Dec 2024 03:52:17 +0000 Subject: [PATCH 4/7] fix --- __init__.py | 1 - 1 file changed, 1 deletion(-) diff --git a/__init__.py b/__init__.py index 8a13cd9..88edb8d 100755 --- a/__init__.py +++ b/__init__.py @@ -269,7 +269,6 @@ def handle_tell_more(self, message): def cq_callback(self, utterance: str, answer: str, lang: str): """ If selected show gui """ sess = SessionManager.get() - self.display_ddg(sess) if sess.session_id in self.session_results: self.display_wiki_entry() else: From 4f7e8f420a06758bdd4dbe1f570e0190aa8330df Mon Sep 17 00:00:00 2001 From: JarbasAI <33701864+JarbasAl@users.noreply.github.com> Date: Tue, 31 Dec 2024 04:24:03 +0000 Subject: [PATCH 5/7] Update requirements.txt --- requirements.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/requirements.txt b/requirements.txt index 3054e07..93dd3da 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,4 @@ ovos-utils>=0.0.38,<1.0.0 -ovos_workshop>=0.0.15,<4.0.0 +ovos_workshop>=3.3.0,<4.0.0 ovos-plugin-manager>=0.0.26,<1.0.0 -ovos-bus-client>=1.0.1 \ No newline at end of file +ovos-bus-client>=1.0.1 From 913d65c8de580be4450ce76b5324222167d26d25 Mon Sep 17 00:00:00 2001 From: miro Date: Tue, 31 Dec 2024 04:39:57 +0000 Subject: [PATCH 6/7] fix --- __init__.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/__init__.py b/__init__.py index 88edb8d..d3fa6cf 100755 --- a/__init__.py +++ b/__init__.py @@ -275,6 +275,10 @@ def cq_callback(self, utterance: str, answer: str, lang: str): LOG.error(f"{sess.session_id} not in " f"{list(self.session_results.keys())}") self.set_context("WikiKnows", utterance) + # cache long answer for "tell me more" + self.ask_the_wiki(sess) + self.session_results[sess.session_id]["idx"] += 1 # spoken by common query + self.session_results[sess.session_id]["title"] = utterance @common_query(callback=cq_callback) def match_common_query(self, phrase: str, lang: str) -> Tuple[str, float]: @@ -292,11 +296,9 @@ def match_common_query(self, phrase: str, lang: str) -> Tuple[str, float]: "title": phrase, "image": None } - title, summary = self.ask_the_wiki(sess) + summary = self.wiki.get_spoken_answer(query, lang=sess.lang, units=sess.system_unit) if summary: self.log.info(f"Wikipedia answer: {summary}") - self.session_results[sess.session_id]["idx"] += 1 # spoken by common query - self.session_results[sess.session_id]["title"] = title or phrase return summary, 0.6 # wikipedia @@ -366,7 +368,7 @@ def stop_session(self, sess): from ovos_utils.fakebus import FakeBus s = WikipediaSkill(bus=FakeBus(), skill_id="wiki.skill") - print(s.match_common_query("quem é Elon Musk", "pt")) + print(s.wiki.get_spoken_answer("quem é Elon Musk", "pt")) # ('who is Elon Musk', , 'The Musk family is a wealthy family of South African origin that is largely active in the United States and Canada.', # {'query': 'who is Elon Musk', 'image': None, 'title': 'Musk Family', # 'answer': 'The Musk family is a wealthy family of South African origin that is largely active in the United States and Canada.'}) From 628ddd51912ade4c57faf9a7df63e2041d376c24 Mon Sep 17 00:00:00 2001 From: miro Date: Tue, 31 Dec 2024 04:48:27 +0000 Subject: [PATCH 7/7] improve skipping of disambiguation pages --- __init__.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/__init__.py b/__init__.py index d3fa6cf..3fac0f3 100755 --- a/__init__.py +++ b/__init__.py @@ -73,12 +73,15 @@ def get_page_data(self, pid: str, lang: str): response = requests.get(url, timeout=5).json() page = response["query"]["pages"][pid] summary = rm_parentheses(page.get("extract", "")) + if "commonly refers to:" in summary: + return None, None, None # disambiguation list page img = None if "thumbnail" in page: thumbnail = page["thumbnail"]["source"] parts = thumbnail.split("/")[:-1] img = "/".join(part for part in parts if part != "thumb") ans = flatten_list([sentence_tokenize(s) for s in summary.split("\n")]) + return page["title"], ans, img except Exception as e: LOG.error(f"Error fetching page data for PID {pid}: {e}") @@ -379,6 +382,7 @@ def stop_session(self, sess): print(s.wiki.get_spoken_answer("venus", "en")) print(s.wiki.get_spoken_answer("elon musk", "en")) + print(s.wiki.get_spoken_answer("mercury", "en")) exit() # full answer