Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Testsuite sync #228

Merged
merged 6 commits into from
Dec 1, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
The table of contents is too big for display.
Diff view
Diff view
  •  
  •  
  •  
2 changes: 1 addition & 1 deletion .github/workflows/check-on-push.yml
Original file line number Diff line number Diff line change
Expand Up @@ -73,4 +73,4 @@ jobs:
working-directory: ./py-usfm-parser
run:
# pytest -k "not compare_usx_with_testsuite_samples and not testsuite_usx_with_rnc_grammar and not samples-from-wild and not 57-TIT.partial" -n auto
pytest -k "not compare_usx_with_testsuite_samples and not testsuite_usx_with_rnc_grammar and not generated_usx_with_rnc_grammar and not samples-from-wild and not 57-TIT.partial" -n auto
pytest -k "not compare_usx_with_testsuite_samples and not testsuite_usx_with_rnc_grammar and not generated_usx_with_rnc_grammar and not samples-from-wild" -n auto
2 changes: 1 addition & 1 deletion py-usfm-parser/src/usfm_grammar/usfm_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -138,7 +138,7 @@ def to_usj(self,
"\nUse ignore_errors=True, to generate output inspite of errors")
json_root_obj = {
"type": "USJ",
"version": "0.1.0",
"version": "0.2.0",
"content":[]
}
try:
Expand Down
2 changes: 1 addition & 1 deletion py-usfm-parser/src/usfm_grammar/usj_generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ def __init__(self, tree_sitter_language_obj, usfm_bytes, usj_root_obj=None):
if usj_root_obj is None:
self.json_root_obj = {
"type": "USJ",
"version": "0.1.0",
"version": "0.2.0",
"content":[]
}
else:
Expand Down
23 changes: 12 additions & 11 deletions py-usfm-parser/tests/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -179,7 +179,6 @@ def find_all_markers(usfm_path, keep_id=False, keep_number=True):
f"{TEST_DIR}/specExamples/cross-ref/origin.usfm": "fail",
f"{TEST_DIR}/paratextTests/MarkersMissingSpace/origin.usfm": "fail",
f"{TEST_DIR}/paratextTests/NestingInCrossReferences/origin.usfm": "fail",
f"{TEST_DIR}/usfmjsTests/acts-1-20.aligned.crammed.oldformat/origin.usfm": "fail",
f"{TEST_DIR}/special-cases/empty-para/origin.usfm": "fail",
f"{TEST_DIR}/special-cases/sp/origin.usfm": "fail",
f"{TEST_DIR}/specExamples/extended/sidebars/origin.usfm":"fail",
Expand All @@ -190,43 +189,45 @@ def find_all_markers(usfm_path, keep_id=False, keep_number=True):
# WordlistMarkerMissingFromGlossaryCitationForms from paratext. Something to do with \k or \w
f"{TEST_DIR}/paratextTests/WordlistMarkerMissingFromGlossaryCitationForms/origin.usfm": "pass",

f"{TEST_DIR}/usfmjsTests/ts/origin.usfm": "pass", # Committee thinks these should fail though
f"{TEST_DIR}/usfmjsTests/chunk_footnote/origin.usfm": "pass", # Committee thinks these should fail though
f"{TEST_DIR}/usfmjsTests/ts_2/origin.usfm": "pass", # Committee thinks these should fail though
f"{TEST_DIR}/special-cases/newline-attributes/origin.usfm": "pass", # Committee thinks these should fail though
f"{TEST_DIR}/special-cases/empty-attributes5/origin.usfm": "pass", # Committee thinks these should fail though

# no content in ide, rem, toc1, ip etc
f"{TEST_DIR}/paratextTests/NoErrorsPartiallyEmptyBook/origin.usfm": "fail",
f"{TEST_DIR}/paratextTests/NoErrorsEmptyBook/origin.usfm": "fail",
f"{TEST_DIR}/usfmjsTests/57-TIT.greek.oldformat/origin.usfm": "fail",
f"{TEST_DIR}/usfmjsTests/57-TIT.greek/origin.usfm": "fail",
f"{TEST_DIR}/paratextTests/EmptyMarkers/origin.usfm": "fail",
f"{TEST_DIR}/usfmjsTests/45-ACT.ugnt.oldformat/origin.usfm": "fail", # also \k-s not closed, + not used with \w inside \f

# no \p (usually after \s)
f"{TEST_DIR}/usfmjsTests/usfmBodyTestD/origin.usfm": "fail", # has \s5
f"{TEST_DIR}/usfmjsTests/missing_verses/origin.usfm": "fail", # has \s5
f"{TEST_DIR}/usfmjsTests/isa_verse_span/origin.usfm": "fail", # has \s5
f"{TEST_DIR}/usfmjsTests/isa_footnote/origin.usfm": "fail", # has \s5
f"{TEST_DIR}/usfmjsTests/tit_extra_space_after_chapter/origin.usfm": "fail", # has \s5
f"{TEST_DIR}/usfmjsTests/1ch_verse_span/origin.usfm": "fail", # has \s5
f"{TEST_DIR}/usfmjsTests/acts_1_milestone.oldformat/origin.usfm": "fail", # has \s5
f"{TEST_DIR}/usfmjsTests/nb/origin.usfm": "fail",
f"{TEST_DIR}/usfmjsTests/usfmIntroTest/origin.usfm": "fail",
f"{TEST_DIR}/usfmjsTests/usfm-body-testF/origin.usfm": "fail",
f"{TEST_DIR}/usfmjsTests/out_of_sequence_verses/origin.usfm": "fail",
f"{TEST_DIR}/usfmjsTests/acts_1_milestone/origin.usfm": "fail",
f"{TEST_DIR}/usfmjsTests/luk_quotes/origin.usfm": "fail",
f"{TEST_DIR}/biblica/BlankLinesWithFigures/origin.usfm": "fail", #\fig used without \p, only \b

# no use of nesting (eg: \w within \f) correct usage: usfmjsTests/acts_8-37-ugnt-footnote
f"{TEST_DIR}/usfmjsTests/45-ACT.ugnt/origin.usfm": "fail", # also has \toc1 without content


f"{TEST_DIR}/biblica/PublishingVersesWithFormatting/origin.usfm": "fail", # \c without number

f"{TEST_DIR}/specExamples/extended/contentCatogories1/origin.usfm": "fail", # cat inside footnote

f'{TEST_DIR}/special-cases/figure_with_quotes_in_desc/origin.usfm': "fail", # quote within quote
f'{TEST_DIR}/specExamples/poetry/origin.usfm': "fail", # \b not followed by a \p or \q

f'{TEST_DIR}/paratextTests/InvalidMilestone_MissingEnd/origin.usfm': "fail", # committee now thinks start/end milestones is a semantic check not syntactic
########### Need to be fixed #######################
f"{TEST_DIR}/paratextTests/NoErrorsShort/origin.usfm": "pass", # \c is mandatory!
f"{TEST_DIR}/usfmjsTests/gn_headers/origin.usfm": "fail", # what is the valid position for mte and imt
# f"{TEST_DIR}/usfmjsTests/gn_headers/origin.usfm": "fail", # what is the valid position for mte and imt
f"{TEST_DIR}/usfmjsTests/acts_8-37-ugnt-footnote/origin.usfm": "fail", # no clue why it fails

f"{TEST_DIR}/advanced/periph/origin.usfm": "fail", # Peripharals not implemented
}

negative_tests = []
Expand Down
20 changes: 8 additions & 12 deletions py-usfm-parser/tests/test_json_conversion.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
'''Test the to_dict or json conversion API'''
import pytest
import json
import re
from jsonschema import validate

from tests import all_usfm_files, initialise_parser, doubtful_usfms, negative_tests,\
Expand All @@ -23,7 +24,7 @@ def test_usj_converions_without_filter(file_path):
assert not test_parser.errors, test_parser.errors
usfm_dict = test_parser.to_usj()
assert isinstance(usfm_dict, dict)
# usj_file_path = file_path.replace("origin.usfm", "origin-usj.json")
# usj_file_path = file_path.replace("origin.usfm", "origin.json")
# with open(usj_file_path, 'w', encoding='utf-8') as usj_file:
# json.dump(usfm_dict, usj_file, indent=2 )

Expand Down Expand Up @@ -71,12 +72,12 @@ def get_types(element):
if 'marker' in element:
types.append(element['marker'])
if "altnumber" in element:
if "c" in element['type']:
if element['marker'] == "c":
types.append("ca")
else:
types.append("va")
if "pubnumber" in element:
if "c" in element['type']:
if element['marker'] == "c":
types.append("cp")
else:
types.append("vp")
Expand Down Expand Up @@ -140,21 +141,16 @@ def remove_newlines_in_text(usj_dict):
for i,item in enumerate(usj_dict["content"]):
if isinstance(item, str):
usj_dict['content'][i] = item.replace("\n", " ")
usj_dict['content'][i] = usj_dict['content'][i].replace(" ", " ")
usj_dict['content'][i] = usj_dict['content'][i].replace(" ", " ")
usj_dict['content'][i] = re.sub(r" +", " ", usj_dict['content'][i])
continue
if "sid" in item and "PSA 09" in item['sid']: # for /usfmjsTests/tstudio/origin.usfm
item['sid'] = item['sid'].replace("PSA 091:01", "PSA 91:1")
item['sid'] = item['sid'].replace("PSA 091:02", "PSA 91:2")
item['sid'] = item['sid'].replace("PSA 09", "PSA 9")
remove_newlines_in_text(item)

def strip_default_attrib_value(usj_dict):
'''The USX samples in test suite have space in lemma values when given as default attribute'''
if "content" in usj_dict:
for item in usj_dict["content"]:
if isinstance(item, dict):
if item['type'] == "char:w":
if item['type'] == "char" and item['marker'] == "w":
if "lemma" in item:
item['lemma'] = item['lemma'].strip()
strip_default_attrib_value(item)
Expand All @@ -169,9 +165,9 @@ def test_compare_usj_with_testsuite_samples(file_path):
usx_file_path = file_path.replace("origin.usfm", "origin.xml")
if usx_file_path not in exclude_USX_files:
usj_dict = test_parser.to_usj()
# remove_newlines_in_text(usj_dict) # need this if using USJ generated from tcdocs
remove_newlines_in_text(usj_dict) # need this if using USJ generated from tcdocs
try:
usj_file_path = file_path.replace("origin.usfm", "origin-usj.json")
usj_file_path = file_path.replace("origin.usfm", "origin.json")
with open(usj_file_path, 'r', encoding='utf-8') as usj_file:
origin_usj = json.load(usj_file)
assert usj_dict == origin_usj, f"generated USJ:\n{usj_dict}\n"+\
Expand Down
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
{
"type": "USJ",
"version": "0.1.0",
"version": "0.2.0",
"content": [
{
"type": "book",
"marker": "id",
"content": [],
"code": "GEN"
"code": "GEN",
"content": []
},
{
"type": "chapter",
Expand Down Expand Up @@ -35,32 +35,32 @@
{
"type": "char",
"marker": "w",
"x-myattr": "metadata",
"content": [
"gracious"
],
"x-myattr": "metadata"
]
}
]
},
{
"type": "para",
"marker": "q1",
"content": [
"\u201cSomeone is shouting in the desert,"
"“Someone is shouting in the desert,"
]
},
{
"type": "para",
"marker": "q2",
"content": [
"\u2018Prepare a road for the Lord;"
"‘Prepare a road for the Lord;"
]
},
{
"type": "para",
"marker": "q2",
"content": [
"make a straight path for him to travel!\u2019 \u201d"
"make a straight path for him to travel!’ ”"
]
},
{
Expand All @@ -70,8 +70,8 @@
{
"type": "char",
"marker": "jmp",
"content": [],
"link-id": "article-john_the_baptist"
"link-id": "article-john_the_baptist",
"content": []
},
"John the Baptist"
]
Expand Down
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
{
"type": "USJ",
"version": "0.1.0",
"version": "0.2.0",
"content": [
{
"type": "book",
"marker": "id",
"content": [],
"code": "GEN"
"code": "GEN",
"content": []
},
{
"type": "chapter",
Expand Down Expand Up @@ -35,10 +35,10 @@
{
"type": "char",
"marker": "w",
"lemma": "grace",
"content": [
"gracious"
],
"lemma": "grace"
]
}
]
}
Expand Down
Original file line number Diff line number Diff line change
@@ -1,14 +1,14 @@
{
"type": "USJ",
"version": "0.1.0",
"version": "0.2.0",
"content": [
{
"type": "book",
"marker": "id",
"code": "MRK",
"content": [
"41MRKGNT92.SFM, Good News Translation, June 2003"
],
"code": "MRK"
]
},
{
"type": "para",
Expand Down Expand Up @@ -96,7 +96,7 @@
"type": "char",
"marker": "ior",
"content": [
"(1.14\u20139.50)"
"(1.14–9.50)"
]
}
]
Expand Down Expand Up @@ -132,7 +132,7 @@
"type": "para",
"marker": "mr",
"content": [
"(Psalms 1\u201341)"
"(Psalms 1–41)"
]
},
{
Expand Down
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
{
"type": "USJ",
"version": "0.1.0",
"version": "0.2.0",
"content": [
{
"type": "book",
"marker": "id",
"content": [],
"code": "GEN"
"code": "GEN",
"content": []
},
{
"type": "chapter",
Expand Down
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
{
"type": "USJ",
"version": "0.1.0",
"version": "0.2.0",
"content": [
{
"type": "book",
"marker": "id",
"content": [],
"code": "GEN"
"code": "GEN",
"content": []
},
{
"type": "chapter",
Expand Down Expand Up @@ -44,7 +44,7 @@
"sid": "qt_123",
"who": "Pilate"
},
"\u201cAre you the king of the Jews?\u201d",
"“Are you the king of the Jews?",
{
"type": "ms",
"marker": "qt-e",
Expand Down
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
{
"type": "USJ",
"version": "0.1.0",
"version": "0.2.0",
"content": [
{
"type": "book",
"marker": "id",
"content": [],
"code": "GEN"
"code": "GEN",
"content": []
},
{
"type": "chapter",
Expand Down Expand Up @@ -54,7 +54,7 @@
"'s Battles"
]
},
"speaks of \u201c...the town of Waheb in the area of Suphah"
"speaks of ...the town of Waheb in the area of Suphah"
]
}
]
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
<?xml version="1.0" encoding="utf-8"?>
<test-metadata>
<description>t4t file2</description>
<description>Peripheral section in FRT</description>
<validated>pass</validated>
<tags></tags>
</test-metadata>
26 changes: 26 additions & 0 deletions tests/advanced/periph/origin.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
{
"type": "USJ",
"version": "0.2.0",
"content": [
{
"type": "book",
"marker": "id",
"code": "FRT",
"content": []
},
{
"type": "periph",
"alt": "My Title",
"id": "title",
"content": [
{
"type": "para",
"marker": "p",
"content": [
"this is the title details"
]
}
]
}
]
}
3 changes: 3 additions & 0 deletions tests/advanced/periph/origin.usfm
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
\id FRT
\periph My Title|id="title"
\p this is the title details
Loading
Loading