From d398c2955f835a0b64297b0f6748f09e3553615b Mon Sep 17 00:00:00 2001
From: Yaroslav Halchenko <debian@onerussian.com>
Date: Thu, 3 Oct 2024 10:40:35 -0400
Subject: [PATCH] Add "hed-validator" to run validation on BIDS dataset

Unlike other hed_ scripts I prefixed this as hed- to be more consistent
with conventions like git-COMMAND, bids-validator, etc.

I have not yet had luck to get errors to see how those would look and if would
work to serialize into json.  hed-examples are all clean. Sample dataset I had
crashes validator (separate issue to follow).
---
 hed/scripts/hed_validator.py | 76 ++++++++++++++++++++++++++++++++++++
 pyproject.toml               |  1 +
 2 files changed, 77 insertions(+)
 create mode 100644 hed/scripts/hed_validator.py

diff --git a/hed/scripts/hed_validator.py b/hed/scripts/hed_validator.py
new file mode 100644
index 00000000..385ff611
--- /dev/null
+++ b/hed/scripts/hed_validator.py
@@ -0,0 +1,76 @@
+import argparse
+import json
+import sys
+
+
+def main():
+    # Create the argument parser
+    parser = argparse.ArgumentParser(description="Validate an HED BIDS dataset.")
+
+    # Positional argument for the dataset path
+    parser.add_argument("dataset_path", help="Path to the dataset directory")
+
+    # Optional argument for the format
+    parser.add_argument("-f", "--format", choices=["text", "json", "json_pp"], default="text",
+                        help="Output format: 'text' (default) or 'json' ('json_pp' for pretty-printed json)")
+
+    # Optional argument for the output file
+    parser.add_argument("-o", "--output-file", help="File to save the output. If not provided, output is printed to the screen")
+
+    # Optional flag to check for warnings
+    parser.add_argument("--check-for-warnings", action="store_true",
+                        help="Enable checking for warnings during validation")
+
+    # Parse the arguments
+    args = parser.parse_args()
+
+    issue_list = validate_dataset(args)
+
+    # Return 1 if there are issues, 0 otherwise
+    return int(bool(issue_list))
+
+
+def validate_dataset(args):
+    # Delayed imports to speed up --help
+    from hed.errors import get_printable_issue_string
+    from hed.tools import BidsDataset
+    from hed import _version as vr
+
+    # Validate the dataset
+    bids = BidsDataset(args.dataset_path)
+    issue_list = bids.validate(check_for_warnings=args.check_for_warnings)
+    # Output based on format
+    if args.format in ("json", "json_pp"):
+        kw = {"indent": 4} if args.format == "json_pp" else {}
+        output = json.dumps(
+            {
+                "issues": issue_list,
+                "hedtools_version": str(vr.get_versions())
+            },
+            **kw)
+    elif args.format == "json":
+        output = json.dumps(issue_list)
+    elif args.format == "text":
+        # Print HEDTOOLS version
+        print(f"Using HEDTOOLS version: {str(vr.get_versions())}")
+
+        if issue_list:
+            output = get_printable_issue_string(issue_list, "HED validation errors: ", skip_filename=False)
+            # Print number of issues
+            print(f"Number of issues: {len(issue_list)}")
+        else:
+            output = "No HED validation errors"
+    else:
+        raise ValueError(args.format)
+    # Output to file or print to screen
+    if args.output_file:
+        with open(args.output_file, 'w') as fp:
+            fp.write(output)
+    else:
+        print(output)
+    return issue_list
+
+
+if __name__ == "__main__":
+    sys.exit(main())
+
diff --git a/pyproject.toml b/pyproject.toml
index 233be6c2..4ce1ef0a 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -55,6 +55,7 @@ dependencies = [
 run_remodel = "hed.tools.remodeling.cli.run_remodel:main"
 run_remodel_backup = "hed.tools.remodeling.cli.run_remodel_backup:main"
 run_remodel_restore = "hed.tools.remodeling.cli.run_remodel_restore:main"
+hed-validator = "hed.scripts.hed_validator:main"
 hed_validate_schemas = "hed.scripts.validate_schemas:main"
 hed_update_schemas = "hed.scripts.convert_and_update_schema:main"
 hed_add_ids = "hed.scripts.add_hed_ids:main"