Skip to content

Commit

Permalink
improve skipping of disambiguation pages
Browse files Browse the repository at this point in the history
  • Loading branch information
JarbasAl committed Dec 31, 2024
1 parent 913d65c commit 628ddd5
Showing 1 changed file with 4 additions and 0 deletions.
4 changes: 4 additions & 0 deletions __init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,12 +73,15 @@ def get_page_data(self, pid: str, lang: str):
response = requests.get(url, timeout=5).json()
page = response["query"]["pages"][pid]
summary = rm_parentheses(page.get("extract", ""))
if "commonly refers to:" in summary:
return None, None, None # disambiguation list page
img = None
if "thumbnail" in page:
thumbnail = page["thumbnail"]["source"]
parts = thumbnail.split("/")[:-1]
img = "/".join(part for part in parts if part != "thumb")
ans = flatten_list([sentence_tokenize(s) for s in summary.split("\n")])

return page["title"], ans, img
except Exception as e:
LOG.error(f"Error fetching page data for PID {pid}: {e}")
Expand Down Expand Up @@ -379,6 +382,7 @@ def stop_session(self, sess):

print(s.wiki.get_spoken_answer("venus", "en"))
print(s.wiki.get_spoken_answer("elon musk", "en"))
print(s.wiki.get_spoken_answer("mercury", "en"))

exit()
# full answer
Expand Down

0 comments on commit 628ddd5

Please sign in to comment.