Skip to content

Commit

Permalink
Merge pull request #367 from empiriker/master
Browse files Browse the repository at this point in the history
Add support for "capture_language_codes" in de and fr extractors
  • Loading branch information
xxyzz authored Oct 18, 2023
2 parents b1cb0dd + 57b71bd commit 2a54f0c
Show file tree
Hide file tree
Showing 3 changed files with 12 additions and 2 deletions.
5 changes: 5 additions & 0 deletions src/wiktextract/extractor/de/page.py
Original file line number Diff line number Diff line change
Expand Up @@ -335,6 +335,11 @@ def parse_page(
sortid="extractor/de/page/parse_page/76",
)
continue
if (
wxr.config.capture_language_codes
and lang_code not in wxr.config.capture_language_codes
):
continue

base_data = defaultdict(
list,
Expand Down
5 changes: 5 additions & 0 deletions src/wiktextract/extractor/fr/page.py
Original file line number Diff line number Diff line change
Expand Up @@ -163,6 +163,11 @@ def parse_page(
if subtitle_template.template_name == "langue":
categories_and_links = defaultdict(list)
lang_code = subtitle_template.template_parameters.get(1)
if (
wxr.config.capture_language_codes
and lang_code not in wxr.config.capture_language_codes
):
continue
lang_name = clean_node(
wxr, categories_and_links, subtitle_template
)
Expand Down
4 changes: 2 additions & 2 deletions tests/test_de_page.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,8 @@ class DePageTests(unittest.TestCase):
def setUp(self):
conf1 = WiktionaryConfig(
dump_file_lang_code="de",
# capture_language_codes=None,
# capture_translations=True,
capture_language_codes=None,
capture_translations=True,
# capture_pronunciation=True,
# capture_linkages=True,
# capture_compounds=True,
Expand Down

0 comments on commit 2a54f0c

Please sign in to comment.