From 6a488a17207c67e1d7c1edc2e2ef29f04b7e102e Mon Sep 17 00:00:00 2001 From: Kay Robbins <1189050+VisLab@users.noreply.github.com> Date: Wed, 9 Oct 2024 16:16:44 -0500 Subject: [PATCH] Updated the definition of nonascii to exclude C1 control --- docs/source/02_Terminology.md | 86 ++++++++++++------------- docs/source/Appendix_A.md | 3 + docs/source/Appendix_B.md | 4 +- docs/source/conf.py | 2 +- tests/javascript_tests.json | 15 ++++- tests/json_tests/CHARACTER_INVALID.json | 7 +- tests/python_tests.json | 15 ++++- tests/run_consolidate_tests.py | 6 +- tests/test_summarize_testdata.py | 8 --- 9 files changed, 86 insertions(+), 60 deletions(-) diff --git a/docs/source/02_Terminology.md b/docs/source/02_Terminology.md index 8fd7612..aa2b0ba 100644 --- a/docs/source/02_Terminology.md +++ b/docs/source/02_Terminology.md @@ -142,46 +142,46 @@ is fixed or noted. Starting with HED standard schema versions 8.3.0 and above, HED will allow UTF-8 characters in various settings. The types of characters referred to in this specification are: -| Name | Description | -|-----------------| ----------- | -| `alphanumeric` | `letters` and/or `digits` | -| `ampersand` | ASCII code 38 | -| `ascii` | utf-8 codes 0 to 127 (single byte) | -| `asterisk` | ASCII code 42 | -| `at-sign` | ASCII code 64 | -| `backslash` | ASCII code 92 | -| `blank` | ASCII code 32 | -| `caret` | ASCII code 94 | -| `colon` | ASCII code 58 | -| `comma` | ASCII code 44 | -| `dollar` | ASCII code 36 | -| `digits` | 0-9 | -| `double-quote` | ASCII code 34 | -| `equals` | ASCII code 61 | -| `exclamation` | ASCII code 33 | -| `forward-slash` | ASCII code 47 | -| `greater-than` | ASCII code 62 | -| `hyphen` | ASCII code 45 | -| `left-paren` | ASCII code 40 | -| `less-than` | ASCII code 60 | -| `letters` | `lowercase` and/or `uppercase` | -| `lowercase` | ASCII characters a-z | -| `name` | `alphanumeric`, `hyphen`, `period`, `underscore`, `nonascii` | -| `newline` | ASCII code 10 (linefeed) | -| `nonascii` | utf-8 codes greater than 128 (multi-byte) | -| `number-sign` | ASCII code 35 | -| `numeric` | digits, period, hyphen, plus, caret, E, e | -| `percent-sign` | ASCII code 37 | -| `period` | ASCII code 46 | -| `plus` | ASCII code 43 | -| `printable` | ASCII 32 <= code < 127 | -| `question-mark` | ASCII code 63 | -| `right-paren` | ASCII code 41 | -| `semicolon` | ASCII code 59 | -| `single-quote` | ASCII code 39 | -| `tab` | ASCII code 09 | -| `text` | `printable` and/or `nonascii` excluding comma and curly braces.| -| `tilde` | ASCII code 126 | -| `underscore` | ASCII code 95 | -| `uppercase` | ASCII characters A-Z | -| `vertical-bar` | ASCII code 124 | +| Name | Description | +|-----------------|-----------------------------------------------------------------| +| `alphanumeric` | `letters` and/or `digits` | +| `ampersand` | ASCII code 38 | +| `ascii` | utf-8 codes 0 to 127 (single byte) | +| `asterisk` | ASCII code 42 | +| `at-sign` | ASCII code 64 | +| `backslash` | ASCII code 92 | +| `blank` | ASCII code 32 | +| `caret` | ASCII code 94 | +| `colon` | ASCII code 58 | +| `comma` | ASCII code 44 | +| `dollar` | ASCII code 36 | +| `digits` | 0-9 | +| `double-quote` | ASCII code 34 | +| `equals` | ASCII code 61 | +| `exclamation` | ASCII code 33 | +| `forward-slash` | ASCII code 47 | +| `greater-than` | ASCII code 62 | +| `hyphen` | ASCII code 45 | +| `left-paren` | ASCII code 40 | +| `less-than` | ASCII code 60 | +| `letters` | `lowercase` and/or `uppercase` | +| `lowercase` | ASCII characters a-z | +| `name` | `alphanumeric`, `hyphen`, `period`, `underscore`, `nonascii` | +| `newline` | ASCII code 10 (linefeed) | +| `nonascii` | utf-8 codes >= 160 (multi-byte) | +| `number-sign` | ASCII code 35 | +| `numeric` | digits, period, hyphen, plus, caret, E, e | +| `percent-sign` | ASCII code 37 | +| `period` | ASCII code 46 | +| `plus` | ASCII code 43 | +| `printable` | ASCII 32 <= code < 127 | +| `question-mark` | ASCII code 63 | +| `right-paren` | ASCII code 41 | +| `semicolon` | ASCII code 59 | +| `single-quote` | ASCII code 39 | +| `tab` | ASCII code 09 | +| `text` | `printable` and/or `nonascii` excluding comma and curly braces. | +| `tilde` | ASCII code 126 | +| `underscore` | ASCII code 95 | +| `uppercase` | ASCII characters A-Z | +| `vertical-bar` | ASCII code 124 | diff --git a/docs/source/Appendix_A.md b/docs/source/Appendix_A.md index 721e19b..594ab26 100644 --- a/docs/source/Appendix_A.md +++ b/docs/source/Appendix_A.md @@ -199,6 +199,9 @@ behavior of certain value classes (for example the `numericClass` value class). - Valid International Resource Identifier as standardized by [rfc3987](https://datatracker.ietf.org/doc/html/rfc3987). `````` +See [**2.2 Character sets and restrictions**](./02_Terminology.md#22-character-sets_and_restrictions) for +definitions of the various character class definitions. + ````{admonition} Notes on rules for allowed characters in the HED schema. :class: tip diff --git a/docs/source/Appendix_B.md b/docs/source/Appendix_B.md index bb6920b..3cfca05 100644 --- a/docs/source/Appendix_B.md +++ b/docs/source/Appendix_B.md @@ -25,7 +25,7 @@ of errors keyed to the HED specification. A HED string contains an invalid character. -**a.** A non-printable character (ASCII code < 32 or == 127) appears in a HED string. +**a.** An invalid character (character code < 32 or 127 <= character code < 160) appears in a HED string. **b.** Curly braces appear in a HED string not in a sidecar. @@ -33,7 +33,7 @@ A HED string contains an invalid character. - Starting with HED 8.3.0, HED supports UTF-8 encoding. - Different parts of a HED string have different rules for acceptable characters. -See +See also: [**3.2.4 Tags that take values**](03_HED_formats.md#324-tags-that-take-values) and [**3.2.5: Tag extensions**](03_HED_formats.md#325-tag-extensions) for an explanation of the rules for tag values and extensions. diff --git a/docs/source/conf.py b/docs/source/conf.py index 1ddbf06..e43fcba 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -95,4 +95,4 @@ html_static_path = ['_static'] html_css_files = [ 'custom.css', -] \ No newline at end of file +] diff --git a/tests/javascript_tests.json b/tests/javascript_tests.json index c95ccc2..84f8e45 100644 --- a/tests/javascript_tests.json +++ b/tests/javascript_tests.json @@ -17,7 +17,8 @@ "tests": { "string_tests": { "fails": [ - "Item/Bl\b" + "Item/Bl\b", + "Item/ABC\u009e" ], "passes": [ "Red, Blue, Description/Red", @@ -57,6 +58,18 @@ 0, "Item/Bl\b" ] + ], + [ + [ + "onset", + "duration", + "HED" + ], + [ + 4.5, + 0, + "Item/{abc}" + ] ] ], "passes": [ diff --git a/tests/json_tests/CHARACTER_INVALID.json b/tests/json_tests/CHARACTER_INVALID.json index 6a2449f..ea602c9 100644 --- a/tests/json_tests/CHARACTER_INVALID.json +++ b/tests/json_tests/CHARACTER_INVALID.json @@ -10,7 +10,8 @@ "tests": { "string_tests": { "fails": [ - "Item/Bl\b" + "Item/Bl\b", + "Item/ABC\u009E" ], "passes": [ "Red, Blue, Description/Red", @@ -42,6 +43,10 @@ [ ["onset", "duration", "HED"], [ 4.5, 0, "Item/Bl\b"] + ], + [ + ["onset", "duration", "HED"], + [ 4.5, 0, "Item/{abc}"] ] ], "passes": [ diff --git a/tests/python_tests.json b/tests/python_tests.json index ac9125f..eec6b88 100644 --- a/tests/python_tests.json +++ b/tests/python_tests.json @@ -17,7 +17,8 @@ "tests": { "string_tests": { "fails": [ - "Item/Bl\b" + "Item/Bl\b", + "Item/ABC\u009e" ], "passes": [ "Red, Blue, Description/Red", @@ -57,6 +58,18 @@ 0, "Item/Bl\b" ] + ], + [ + [ + "onset", + "duration", + "HED" + ], + [ + 4.5, + 0, + "Item/{abc}" + ] ] ], "passes": [ diff --git a/tests/run_consolidate_tests.py b/tests/run_consolidate_tests.py index f6a6879..11d28d7 100644 --- a/tests/run_consolidate_tests.py +++ b/tests/run_consolidate_tests.py @@ -20,8 +20,8 @@ def combine_tests(test_names, test_dir, output_path): def main(exclude_names=[], out_name='temp.json'): relative_dir = "json_tests" # relative directory to read - script_dir = os.path.dirname(os.path.abspath(__file__)) # directory of this script - target_dir = os.path.join(script_dir, relative_dir) # full path of the + script_dir = os.path.dirname(os.path.abspath(__file__)) # directory of this script + target_dir = os.path.join(script_dir, relative_dir) # full path of the # Write the indicated files file_names = [f for f in os.listdir(target_dir) if os.path.isfile(os.path.join(target_dir, f))] @@ -30,7 +30,7 @@ def main(exclude_names=[], out_name='temp.json'): if __name__ == '__main__': - exclude_names =['SCHEMA', 'TAG_NAMESPACE', 'VERSION_DEPRECATED'] + exclude_names = ['SCHEMA', 'TAG_NAMESPACE', 'VERSION_DEPRECATED'] javascript_name = "javascript_tests.json" main(exclude_names, javascript_name) diff --git a/tests/test_summarize_testdata.py b/tests/test_summarize_testdata.py index 6af0262..cbe0ac1 100644 --- a/tests/test_summarize_testdata.py +++ b/tests/test_summarize_testdata.py @@ -11,7 +11,6 @@ def setUpClass(cls): cls.test_files = [os.path.join(test_dir, f) for f in os.listdir(test_dir) if os.path.isfile(os.path.join(test_dir, f))] - @staticmethod def get_test_info(test_file, details=True): indent = " " @@ -55,13 +54,6 @@ def test_summary(self): print(out_str) self.assertEqual(True, True) # add assertion here - # def test_summary_full(self): - # for test_file in self.test_files: - # print(test_file) - # out_str = self.get_test_info(test_file, details=True) - # print(out_str + '\n') - # - # self.assertEqual(True, True) # add assertion here if __name__ == '__main__':