Skip to content

Commit

Permalink
resolve conflicts
Browse files Browse the repository at this point in the history
  • Loading branch information
kavitharaju committed Oct 5, 2023
2 parents 7c50c78 + e4347d7 commit e7bbea7
Show file tree
Hide file tree
Showing 249 changed files with 438,232 additions and 787,295 deletions.
9 changes: 6 additions & 3 deletions py-usfm-parser/src/usfm_grammar/usj_generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -140,7 +140,9 @@ def node_2_usj_para(self, node, parent_json_obj):
para_tag_cap = self.usfm_language.query(
"(paragraph (_) @para-marker)").captures(node)[0]
para_marker = para_tag_cap[0].type
if not para_marker.endswith("Block"):
if para_marker == "b":
self.node_2_usj_special(para_tag_cap[0], parent_json_obj)
elif not para_marker.endswith("Block"):
para_json_obj = {"type": f"para:{para_marker}", "content":[]}
for child in para_tag_cap[0].children[1:]:
self.node_2_usj(child, para_json_obj)
Expand Down Expand Up @@ -323,14 +325,15 @@ def node_2_usj(self, node, parent_json_obj): # pylint: disable= too-many-branche
self.node_2_usj_attrib(node, parent_json_obj)
elif node.type == 'text':
text_val = self.usfm[node.start_byte:node.end_byte].decode('utf-8').strip()
parent_json_obj['content'].append(text_val)
if text_val != "":
parent_json_obj['content'].append(text_val)
elif node.type in ["table", "tr"]+ self.TABLE_CELL_MARKERS:
self.node_2_usj_table(node, parent_json_obj)
elif node.type == "milestone":
self.node_2_usj_milestone(node, parent_json_obj)
elif node.type == "zNameSpace":
self.node_2_usj_milestone(node, parent_json_obj)
elif node.type in ["esb", "cat", "fig", "b", "usfm"]:
elif node.type in ["esb", "cat", "fig", "usfm"]:
self.node_2_usj_special(node, parent_json_obj)
elif (node.type in self.PARA_STYLE_MARKERS or
node.type.replace("\\","").strip() in self.PARA_STYLE_MARKERS):
Expand Down
33 changes: 31 additions & 2 deletions py-usfm-parser/tests/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -221,6 +221,9 @@ def find_all_markers(usfm_path, keep_id=False, keep_number=True):
f"{TEST_DIR}/biblica/PublishingVersesWithFormatting/origin.usfm": "fail", # \c without number

f"{TEST_DIR}/specExamples/extended/contentCatogories1/origin.usfm": "fail", # cat inside footnote

f'{TEST_DIR}/special-cases/figure_with_quotes_in_desc/origin.usfm': "fail", # quote within quote
f'{TEST_DIR}/specExamples/poetry/origin.usfm': "fail", # \b not followed by a \p or \q
########### Need to be fixed #######################
f"{TEST_DIR}/paratextTests/NoErrorsShort/origin.usfm": "pass", # \c is mandatory!
f"{TEST_DIR}/usfmjsTests/gn_headers/origin.usfm": "fail", # what is the valid position for mte and imt
Expand All @@ -233,10 +236,36 @@ def find_all_markers(usfm_path, keep_id=False, keep_number=True):
negative_tests.append(file_path)

exclude_USX_files = [
# f'{TEST_DIR}/specExamples/chapter-verse/origin.usx',
# f'{TEST_DIR}/specExamples/chapter-verse/origin.xml',
# # ca is added as attribute to cl not chapter node
# f'{TEST_DIR}/specExamples/milestone/origin.usx',
# f'{TEST_DIR}/specExamples/milestone/origin.xml',
# # Znamespace not represented properly. Even no docs of it on https://ubsicap.github.io/usx
# f'{TEST_DIR}/advanced/table/origin.xml',
# # There is no verse end node at end(in last row of the table)
f'{TEST_DIR}/specExamples/extended/contentCatogories2/origin.xml',
# \ef not treated as inline content of paragraph
f'{TEST_DIR}/specExamples/extended/sectionIntroductions/origin.xml',
# verse number="+"!!!
f'{TEST_DIR}/specExamples/character/origin.xml',
# lit element treated as a body paragraph enclosing a verse!
f'{TEST_DIR}/usfmjsTests/esb/origin.xml',
# last verse text given outside of paragraph.
f'{TEST_DIR}/special-cases/nbsp/origin.xml',
# ~ not being replaced by nbsp in usfm-grammar
f'{TEST_DIR}/special-cases/empty-attributes/origin.xml',
# attributes treated as text content of marker
]

invalid_usxs = []
for file_path in all_usfm_files:
usx_path = file_path.replace("origin.usfm", "origin.xml")
try:
with open(usx_path, 'r', encoding='utf-8') as usx_file:
usx_text = usx_file.read()
if 'status="invalid"' in usx_text:
invalid_usxs.append(usx_path)
except FileNotFoundError as exe:
print(exe)
invalid_usxs.append(usx_path)

exclude_USX_files += invalid_usxs
55 changes: 55 additions & 0 deletions py-usfm-parser/tests/test_json_conversion.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,11 @@
from jsonschema import validate

from tests import all_usfm_files, initialise_parser, doubtful_usfms, negative_tests,\
<<<<<<< HEAD
find_all_markers, Filter, generate_USFM_from_USJ, parse_USFM_string
=======
find_all_markers, Filter, exclude_USX_files
>>>>>>> e4347d702c8bf62b48a4180c1462e6dbe71487d1

all_valid_markers = []
for member in Filter:
Expand Down Expand Up @@ -130,3 +134,54 @@ def test_usj_round_tripping(file_path):
assert not test_parser2.errors, str(test_parser2.errors)#+"\n"+ generated_USFM

# assert test_parser1.to_syntax_tree() == test_parser2.to_syntax_tree(), generated_USFM

def remove_newlines_in_text(usj_dict):
'''The test samples in testsuite do not preserve new lines in. But we do in usfm-grammar.
So removing them just for comparison'''
if "content" in usj_dict:
for i,item in enumerate(usj_dict["content"]):
if isinstance(item, str):
usj_dict['content'][i] = item.replace("\n", " ")
usj_dict['content'][i] = usj_dict['content'][i].replace(" ", " ")
usj_dict['content'][i] = usj_dict['content'][i].replace(" ", " ")
continue
if "sid" in item and "PSA 09" in item['sid']: # for /usfmjsTests/tstudio/origin.usfm
item['sid'] = item['sid'].replace("PSA 091:01", "PSA 91:1")
item['sid'] = item['sid'].replace("PSA 091:02", "PSA 91:2")
item['sid'] = item['sid'].replace("PSA 09", "PSA 9")
remove_newlines_in_text(item)

def strip_default_attrib_value(usj_dict):
'''The USX samples in test suite have space in lemma values when given as default attribute'''
if "content" in usj_dict:
for item in usj_dict["content"]:
if isinstance(item, dict):
if item['type'] == "char:w":
if "lemma" in item:
item['lemma'] = item['lemma'].strip()
strip_default_attrib_value(item)


@pytest.mark.parametrize('file_path', test_files)
@pytest.mark.timeout(30)
def test_compare_usj_with_testsuite_samples(file_path):
'''Compare the generated USX with the origin.xml in test suite'''
test_parser = initialise_parser(file_path)
assert not test_parser.errors, test_parser.errors
usx_file_path = file_path.replace("origin.usfm", "origin.xml")
if usx_file_path not in exclude_USX_files:
usj_dict = test_parser.to_usj()
remove_newlines_in_text(usj_dict)
try:
usj_file_path = file_path.replace("origin.usfm", "origin-usj.json")
with open(usj_file_path, 'r', encoding='utf-8') as usj_file:
origin_usj = json.load(usj_file)
assert usj_dict == origin_usj, f"generated USJ:\n{usj_dict}\n"+\
f"USJ in testsuite:\n{origin_usj}\n syntax tree: {test_parser.to_syntax_tree()}"
except FileNotFoundError:
pass
except AssertionError:
strip_default_attrib_value(origin_usj)
assert usj_dict == origin_usj, f"generated USJ:\n{usj_dict}\n"+\
f"USJ in testsuite:\n{origin_usj}\n syntax tree: {test_parser.to_syntax_tree()}"
# assert usj_dict == origin_usj
135 changes: 66 additions & 69 deletions tests/advanced/custom-attributes/origin-usj.json
Original file line number Diff line number Diff line change
@@ -1,78 +1,75 @@
{
"type": "USJ",
"version": "0.1.0",
"content": [
"type": "USJ",
"version": "0.1.0",
"content": [
{
"type": "book:id",
"code": "GEN",
"content": []
},
{
"type": "chapter:c",
"number": "1",
"sid": "GEN 1"
},
{
"type": "para:p",
"content": [
{
"type": "book:id",
"content": [],
"code": "GEN"
"type": "verse:v",
"number": "1",
"sid": "GEN 1:1"
},
"the first verse",
{
"type": "chapter:c",
"number": "1",
"sid": "GEN 1"
"type": "verse:v",
"number": "2",
"sid": "GEN 1:2"
},
"the second verse",
{
"type": "para:p",
"content": [
{
"type": "verse:v",
"number": "1",
"sid": "GEN 1:1"
},
"the first verse",
{
"type": "verse:v",
"number": "2",
"sid": "GEN 1:2"
},
"the second verse",
{
"type": "char:w",
"content": [
"gracious"
],
"x-myattr": "metadata"
},
""
]
},
{
"type": "para:q1",
"content": [
"“Someone is shouting in the desert,"
]
},
{
"type": "para:q2",
"content": [
"‘Prepare a road for the Lord;"
]
},
{
"type": "para:q2",
"content": [
"make a straight path for him to travel!’ ”"
]
},
"type": "char:w",
"x-myattr": "metadata",
"content": [
"gracious"
]
}
]
},
{
"type": "para:q1",
"content": [
"“Someone is shouting in the desert,"
]
},
{
"type": "para:q2",
"content": [
"‘Prepare a road for the Lord;"
]
},
{
"type": "para:q2",
"content": [
"make a straight path for him to travel!’ ”"
]
},
{
"type": "para:s",
"content": [
{
"type": "para:s",
"content": [
{
"type": "char:jmp",
"content": [
""
],
"link-id": "article-john_the_baptist"
},
"John the Baptist"
]
"type": "char:jmp",
"link-id": "article-john_the_baptist",
"content": []
},
{
"type": "para:p",
"content": [
"John is sometimes called..."
]
}
]
"John the Baptist"
]
},
{
"type": "para:p",
"content": [
"John is sometimes called..."
]
}
]
}
64 changes: 32 additions & 32 deletions tests/advanced/default-attributes/origin-usj.json
Original file line number Diff line number Diff line change
@@ -1,40 +1,40 @@
{
"type": "USJ",
"version": "0.1.0",
"content": [
"type": "USJ",
"version": "0.1.0",
"content": [
{
"type": "book:id",
"code": "GEN",
"content": []
},
{
"type": "chapter:c",
"number": "1",
"sid": "GEN 1"
},
{
"type": "para:p",
"content": [
{
"type": "book:id",
"content": [],
"code": "GEN"
"type": "verse:v",
"number": "1",
"sid": "GEN 1:1"
},
"the first verse",
{
"type": "chapter:c",
"number": "1",
"sid": "GEN 1"
"type": "verse:v",
"number": "2",
"sid": "GEN 1:2"
},
"the second verse",
{
"type": "para:p",
"content": [
{
"type": "verse:v",
"number": "1",
"sid": "GEN 1:1"
},
"the first verse",
{
"type": "verse:v",
"number": "2",
"sid": "GEN 1:2"
},
"the second verse",
{
"type": "char:w",
"content": [
"gracious"
],
"lemma": "grace"
}
]
"type": "char:w",
"lemma": "grace",
"content": [
"gracious"
]
}
]
]
}
]
}
Loading

0 comments on commit e7bbea7

Please sign in to comment.