Bridgeconn · joelthe1 · Dec 1, 2023 · Dec 1, 2023 · Dec 1, 2023 · Dec 1, 2023
diff --git a/.github/workflows/check-on-push.yml b/.github/workflows/check-on-push.yml
@@ -73,4 +73,4 @@ jobs:
         working-directory: ./py-usfm-parser
         run:
           # pytest -k "not compare_usx_with_testsuite_samples and not testsuite_usx_with_rnc_grammar and not samples-from-wild and not 57-TIT.partial" -n auto
-          pytest -k "not compare_usx_with_testsuite_samples and not testsuite_usx_with_rnc_grammar and not generated_usx_with_rnc_grammar and not samples-from-wild and not 57-TIT.partial" -n auto
+          pytest -k "not compare_usx_with_testsuite_samples and not testsuite_usx_with_rnc_grammar and not generated_usx_with_rnc_grammar and not samples-from-wild" -n auto
diff --git a/py-usfm-parser/src/usfm_grammar/usfm_parser.py b/py-usfm-parser/src/usfm_grammar/usfm_parser.py
@@ -138,7 +138,7 @@ def to_usj(self,
                 "\nUse ignore_errors=True, to generate output inspite of errors")
         json_root_obj = {
                 "type": "USJ",
-                "version": "0.1.0",
+                "version": "0.2.0",
                 "content":[]
             }
         try:

diff --git a/py-usfm-parser/src/usfm_grammar/usj_generator.py b/py-usfm-parser/src/usfm_grammar/usj_generator.py
@@ -22,7 +22,7 @@ def __init__(self, tree_sitter_language_obj, usfm_bytes, usj_root_obj=None):
         if usj_root_obj is None:
             self.json_root_obj = {
                 "type": "USJ",
-                "version": "0.1.0",
+                "version": "0.2.0",
                 "content":[]
             }
         else:

diff --git a/py-usfm-parser/tests/__init__.py b/py-usfm-parser/tests/__init__.py
@@ -179,7 +179,6 @@ def find_all_markers(usfm_path, keep_id=False, keep_number=True):
     f"{TEST_DIR}/specExamples/cross-ref/origin.usfm": "fail",
     f"{TEST_DIR}/paratextTests/MarkersMissingSpace/origin.usfm": "fail",
     f"{TEST_DIR}/paratextTests/NestingInCrossReferences/origin.usfm": "fail",
-    f"{TEST_DIR}/usfmjsTests/acts-1-20.aligned.crammed.oldformat/origin.usfm": "fail",
     f"{TEST_DIR}/special-cases/empty-para/origin.usfm": "fail",
     f"{TEST_DIR}/special-cases/sp/origin.usfm": "fail",
     f"{TEST_DIR}/specExamples/extended/sidebars/origin.usfm":"fail",
@@ -190,43 +189,45 @@ def find_all_markers(usfm_path, keep_id=False, keep_number=True):
     # WordlistMarkerMissingFromGlossaryCitationForms from paratext. Something to do with \k or \w
     f"{TEST_DIR}/paratextTests/WordlistMarkerMissingFromGlossaryCitationForms/origin.usfm": "pass",
 
+    f"{TEST_DIR}/usfmjsTests/ts/origin.usfm": "pass", # Committee thinks these should fail though
+    f"{TEST_DIR}/usfmjsTests/chunk_footnote/origin.usfm": "pass", # Committee thinks these should fail though
+    f"{TEST_DIR}/usfmjsTests/ts_2/origin.usfm": "pass", # Committee thinks these should fail though
+    f"{TEST_DIR}/special-cases/newline-attributes/origin.usfm": "pass", # Committee thinks these should fail though
+    f"{TEST_DIR}/special-cases/empty-attributes5/origin.usfm": "pass", # Committee thinks these should fail though
+
     # no content in ide, rem, toc1, ip etc
     f"{TEST_DIR}/paratextTests/NoErrorsPartiallyEmptyBook/origin.usfm": "fail",
     f"{TEST_DIR}/paratextTests/NoErrorsEmptyBook/origin.usfm": "fail",
-    f"{TEST_DIR}/usfmjsTests/57-TIT.greek.oldformat/origin.usfm": "fail",
     f"{TEST_DIR}/usfmjsTests/57-TIT.greek/origin.usfm": "fail",
     f"{TEST_DIR}/paratextTests/EmptyMarkers/origin.usfm": "fail",
-    f"{TEST_DIR}/usfmjsTests/45-ACT.ugnt.oldformat/origin.usfm": "fail", #  also \k-s not closed, + not used with \w inside \f
 
     # no \p (usually after \s)
-    f"{TEST_DIR}/usfmjsTests/usfmBodyTestD/origin.usfm": "fail", # has \s5
     f"{TEST_DIR}/usfmjsTests/missing_verses/origin.usfm": "fail", # has \s5
     f"{TEST_DIR}/usfmjsTests/isa_verse_span/origin.usfm": "fail", # has \s5
     f"{TEST_DIR}/usfmjsTests/isa_footnote/origin.usfm": "fail", # has \s5
     f"{TEST_DIR}/usfmjsTests/tit_extra_space_after_chapter/origin.usfm": "fail", # has \s5
     f"{TEST_DIR}/usfmjsTests/1ch_verse_span/origin.usfm": "fail", # has \s5
-    f"{TEST_DIR}/usfmjsTests/acts_1_milestone.oldformat/origin.usfm": "fail", # has \s5
-    f"{TEST_DIR}/usfmjsTests/nb/origin.usfm": "fail", 
     f"{TEST_DIR}/usfmjsTests/usfmIntroTest/origin.usfm": "fail",
-    f"{TEST_DIR}/usfmjsTests/usfm-body-testF/origin.usfm": "fail",
     f"{TEST_DIR}/usfmjsTests/out_of_sequence_verses/origin.usfm": "fail",
     f"{TEST_DIR}/usfmjsTests/acts_1_milestone/origin.usfm": "fail",
     f"{TEST_DIR}/usfmjsTests/luk_quotes/origin.usfm": "fail",
     f"{TEST_DIR}/biblica/BlankLinesWithFigures/origin.usfm": "fail", #\fig used without \p, only \b
 
-    # no use of nesting (eg: \w within \f) correct usage: usfmjsTests/acts_8-37-ugnt-footnote
-    f"{TEST_DIR}/usfmjsTests/45-ACT.ugnt/origin.usfm": "fail", # also has \toc1 without content
-
+
     f"{TEST_DIR}/biblica/PublishingVersesWithFormatting/origin.usfm": "fail", # \c without number
 
     f"{TEST_DIR}/specExamples/extended/contentCatogories1/origin.usfm": "fail", # cat inside footnote
 
     f'{TEST_DIR}/special-cases/figure_with_quotes_in_desc/origin.usfm': "fail", # quote within quote
     f'{TEST_DIR}/specExamples/poetry/origin.usfm': "fail", # \b not followed by a \p or \q
+
+    f'{TEST_DIR}/paratextTests/InvalidMilestone_MissingEnd/origin.usfm': "fail", # committee now thinks start/end milestones is a semantic check not syntactic
     ########### Need to be fixed #######################
     f"{TEST_DIR}/paratextTests/NoErrorsShort/origin.usfm": "pass", # \c is mandatory!
-    f"{TEST_DIR}/usfmjsTests/gn_headers/origin.usfm": "fail", # what is the valid position for mte and imt
+    # f"{TEST_DIR}/usfmjsTests/gn_headers/origin.usfm": "fail", # what is the valid position for mte and imt
     f"{TEST_DIR}/usfmjsTests/acts_8-37-ugnt-footnote/origin.usfm": "fail", # no clue why it fails
+
+    f"{TEST_DIR}/advanced/periph/origin.usfm": "fail", # Peripharals not implemented
 }
 
 negative_tests = []

diff --git a/py-usfm-parser/tests/test_json_conversion.py b/py-usfm-parser/tests/test_json_conversion.py
@@ -1,6 +1,7 @@
 '''Test the to_dict or json conversion API'''
 import pytest
 import json
+import re
 from jsonschema import validate
 
 from tests import all_usfm_files, initialise_parser, doubtful_usfms, negative_tests,\
@@ -23,7 +24,7 @@ def test_usj_converions_without_filter(file_path):
     assert not test_parser.errors, test_parser.errors
     usfm_dict = test_parser.to_usj()
     assert isinstance(usfm_dict, dict)
-    # usj_file_path = file_path.replace("origin.usfm", "origin-usj.json")
+    # usj_file_path = file_path.replace("origin.usfm", "origin.json")
     # with open(usj_file_path, 'w', encoding='utf-8') as usj_file:
     #     json.dump(usfm_dict, usj_file, indent=2 )
 
@@ -71,12 +72,12 @@ def get_types(element):
         if 'marker' in element:
             types.append(element['marker'])
         if "altnumber" in element:
-            if "c" in element['type']:
+            if element['marker'] == "c":
                 types.append("ca")
             else:
                 types.append("va")
         if "pubnumber" in element:
-            if "c" in element['type']:
+            if element['marker'] == "c":
                 types.append("cp")
             else:
                 types.append("vp")
@@ -140,21 +141,16 @@ def remove_newlines_in_text(usj_dict):
         for i,item in enumerate(usj_dict["content"]):
             if isinstance(item, str):
                 usj_dict['content'][i] = item.replace("\n", " ")
-                usj_dict['content'][i] = usj_dict['content'][i].replace("  ", " ")
-                usj_dict['content'][i] = usj_dict['content'][i].replace("     ", " ")
+                usj_dict['content'][i] = re.sub(r" +", " ", usj_dict['content'][i])
                 continue
-            if "sid" in item and "PSA 09" in item['sid']: # for /usfmjsTests/tstudio/origin.usfm
-                item['sid'] = item['sid'].replace("PSA 091:01", "PSA 91:1")
-                item['sid'] = item['sid'].replace("PSA 091:02", "PSA 91:2")
-                item['sid'] = item['sid'].replace("PSA 09", "PSA 9")
             remove_newlines_in_text(item)
 
 def strip_default_attrib_value(usj_dict):
     '''The USX samples in test suite have space in lemma values when given as default attribute'''
     if "content" in usj_dict:
         for item in usj_dict["content"]:
             if isinstance(item, dict):
-                if item['type'] == "char:w":
+                if item['type'] == "char" and item['marker'] == "w":
                     if "lemma" in item:
                         item['lemma'] = item['lemma'].strip()
             strip_default_attrib_value(item)
@@ -169,9 +165,9 @@ def test_compare_usj_with_testsuite_samples(file_path):
     usx_file_path = file_path.replace("origin.usfm", "origin.xml")
     if usx_file_path not in exclude_USX_files:
         usj_dict = test_parser.to_usj()
-        # remove_newlines_in_text(usj_dict) # need this if using USJ generated from tcdocs
+        remove_newlines_in_text(usj_dict) # need this if using USJ generated from tcdocs
         try:
-            usj_file_path = file_path.replace("origin.usfm", "origin-usj.json")
+            usj_file_path = file_path.replace("origin.usfm", "origin.json")
             with open(usj_file_path, 'r', encoding='utf-8') as usj_file:
                 origin_usj = json.load(usj_file)
             assert usj_dict == origin_usj, f"generated USJ:\n{usj_dict}\n"+\

diff --git a/...dvanced/custom-attributes/origin-usj.json → tests/advanced/custom-attributes/origin.json b/...dvanced/custom-attributes/origin-usj.json → tests/advanced/custom-attributes/origin.json
@@ -1,12 +1,12 @@
 {
   "type": "USJ",
-  "version": "0.1.0",
+  "version": "0.2.0",
   "content": [
     {
       "type": "book",
       "marker": "id",
-      "content": [],
-      "code": "GEN"
+      "code": "GEN",
+      "content": []
     },
     {
       "type": "chapter",
@@ -35,32 +35,32 @@
         {
           "type": "char",
           "marker": "w",
+          "x-myattr": "metadata",
           "content": [
             "gracious"
-          ],
-          "x-myattr": "metadata"
+          ]
         }
       ]
     },
     {
       "type": "para",
       "marker": "q1",
       "content": [
-        "\u201cSomeone is shouting in the desert,"
+        "“Someone is shouting in the desert,"
       ]
     },
     {
       "type": "para",
       "marker": "q2",
       "content": [
-        "\u2018Prepare a road for the Lord;"
+        "‘Prepare a road for the Lord;"
       ]
     },
     {
       "type": "para",
       "marker": "q2",
       "content": [
-        "make a straight path for him to travel!\u2019 \u201d"
+        "make a straight path for him to travel!’ ”"
       ]
     },
     {
@@ -70,8 +70,8 @@
         {
           "type": "char",
           "marker": "jmp",
-          "content": [],
-          "link-id": "article-john_the_baptist"
+          "link-id": "article-john_the_baptist",
+          "content": []
         },
         "John the Baptist"
       ]

diff --git a/tests/basic/attributes/origin-usj.json → ...s/advanced/default-attributes/origin.json b/tests/basic/attributes/origin-usj.json → ...s/advanced/default-attributes/origin.json
@@ -1,12 +1,12 @@
 {
   "type": "USJ",
-  "version": "0.1.0",
+  "version": "0.2.0",
   "content": [
     {
       "type": "book",
       "marker": "id",
-      "content": [],
-      "code": "GEN"
+      "code": "GEN",
+      "content": []
     },
     {
       "type": "chapter",
@@ -35,10 +35,10 @@
         {
           "type": "char",
           "marker": "w",
+          "lemma": "grace",
           "content": [
             "gracious"
-          ],
-          "lemma": "grace"
+          ]
         }
       ]
     }

diff --git a/tests/advanced/header/origin-usj.json → tests/advanced/header/origin.json b/tests/advanced/header/origin-usj.json → tests/advanced/header/origin.json
@@ -1,14 +1,14 @@
 {
   "type": "USJ",
-  "version": "0.1.0",
+  "version": "0.2.0",
   "content": [
     {
       "type": "book",
       "marker": "id",
+      "code": "MRK",
       "content": [
         "41MRKGNT92.SFM, Good News Translation, June 2003"
-      ],
-      "code": "MRK"
+      ]
     },
     {
       "type": "para",
@@ -96,7 +96,7 @@
           "type": "char",
           "marker": "ior",
           "content": [
-            "(1.14\u20139.50)"
+            "(1.14–9.50)"
           ]
         }
       ]
@@ -132,7 +132,7 @@
       "type": "para",
       "marker": "mr",
       "content": [
-        "(Psalms 1\u201341)"
+        "(Psalms 1–41)"
       ]
     },
     {

diff --git a/tests/advanced/list/origin-usj.json → tests/advanced/list/origin.json b/tests/advanced/list/origin-usj.json → tests/advanced/list/origin.json
@@ -1,12 +1,12 @@
 {
   "type": "USJ",
-  "version": "0.1.0",
+  "version": "0.2.0",
   "content": [
     {
       "type": "book",
       "marker": "id",
-      "content": [],
-      "code": "GEN"
+      "code": "GEN",
+      "content": []
     },
     {
       "type": "chapter",

diff --git a/tests/advanced/milestones/origin-usj.json → tests/advanced/milestones/origin.json b/tests/advanced/milestones/origin-usj.json → tests/advanced/milestones/origin.json
@@ -1,12 +1,12 @@
 {
   "type": "USJ",
-  "version": "0.1.0",
+  "version": "0.2.0",
   "content": [
     {
       "type": "book",
       "marker": "id",
-      "content": [],
-      "code": "GEN"
+      "code": "GEN",
+      "content": []
     },
     {
       "type": "chapter",
@@ -44,7 +44,7 @@
           "sid": "qt_123",
           "who": "Pilate"
         },
-        "\u201cAre you the king of the Jews?\u201d",
+        "“Are you the king of the Jews?”",
         {
           "type": "ms",
           "marker": "qt-e",

diff --git a/tests/advanced/nesting/origin-usj.json → tests/advanced/nesting/origin.json b/tests/advanced/nesting/origin-usj.json → tests/advanced/nesting/origin.json
@@ -1,12 +1,12 @@
 {
   "type": "USJ",
-  "version": "0.1.0",
+  "version": "0.2.0",
   "content": [
     {
       "type": "book",
       "marker": "id",
-      "content": [],
-      "code": "GEN"
+      "code": "GEN",
+      "content": []
     },
     {
       "type": "chapter",
@@ -54,7 +54,7 @@
             "'s Battles"
           ]
         },
-        "speaks of \u201c...the town of Waheb in the area of Suphah"
+        "speaks of “...the town of Waheb in the area of Suphah"
       ]
     }
   ]

diff --git a/tests/samples-from-wild/t4t2/metadata.xml → tests/advanced/periph/metadata.xml b/tests/samples-from-wild/t4t2/metadata.xml → tests/advanced/periph/metadata.xml
@@ -1,6 +1,6 @@
 <?xml version="1.0" encoding="utf-8"?>
 <test-metadata>
-        <description>t4t file2</description>
+        <description>Peripheral section in FRT</description>
         <validated>pass</validated>
         <tags></tags>
 </test-metadata>
diff --git a/tests/advanced/periph/origin.json b/tests/advanced/periph/origin.json
@@ -0,0 +1,26 @@
+{
+  "type": "USJ",
+  "version": "0.2.0",
+  "content": [
+    {
+      "type": "book",
+      "marker": "id",
+      "code": "FRT",
+      "content": []
+    },
+    {
+      "type": "periph",
+      "alt": "My Title",
+      "id": "title",
+      "content": [
+        {
+          "type": "para",
+          "marker": "p",
+          "content": [
+            "this is the title details"
+          ]
+        }
+      ]
+    }
+  ]
+}
diff --git a/tests/advanced/periph/origin.usfm b/tests/advanced/periph/origin.usfm
@@ -0,0 +1,3 @@
+\id FRT
+\periph My Title|id="title"
+\p this is the title details