Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Append default override page JSON file to the "--override" option #373

Merged
merged 1 commit into from
Oct 20, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Empty file removed overrides/.gitignore
Empty file.
File renamed without changes.
File renamed without changes.
22 changes: 19 additions & 3 deletions src/wiktextract/wiktwords.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,11 @@
from pathlib import Path
from typing import TextIO

if sys.version_info < (3, 10):
from importlib_resources import files
else:
from importlib.resources import files

from wikitextprocessor import Wtp
from wikitextprocessor.dumpparser import analyze_and_overwrite_pages

Expand Down Expand Up @@ -246,8 +251,7 @@ def main():
"--override",
type=str,
action="append",
help="Override module(s) by one in file or files in directory "
"(for debugging)",
help="Path of JSON file contains override page data",
)
parser.add_argument(
"--use-thesaurus",
Expand Down Expand Up @@ -439,8 +443,20 @@ def main():
pr = cProfile.Profile()
pr.enable()

skip_extract_dump = wxr.wtp.saved_page_nums() > 0
default_override_json_path = (
files("wiktextract")
/ "data"
/ "overrides"
/ f"{args.dump_file_language_code}.json"
)
if default_override_json_path.exists() and not skip_extract_dump:
if args.override is None:
args.override = [default_override_json_path]
elif default_override_json_path not in args.override:
args.override.append(default_override_json_path)

try:
skip_extract_dump = wxr.wtp.saved_page_nums() > 0
if args.path is not None:
namespace_ids = {
wxr.wtp.NAMESPACE_DATA.get(name, {}).get("id")
Expand Down