From d398c2955f835a0b64297b0f6748f09e3553615b Mon Sep 17 00:00:00 2001 From: Yaroslav Halchenko Date: Thu, 3 Oct 2024 10:40:35 -0400 Subject: [PATCH] Add "hed-validator" to run validation on BIDS dataset Unlike other hed_ scripts I prefixed this as hed- to be more consistent with conventions like git-COMMAND, bids-validator, etc. I have not yet had luck to get errors to see how those would look and if would work to serialize into json. hed-examples are all clean. Sample dataset I had crashes validator (separate issue to follow). --- hed/scripts/hed_validator.py | 76 ++++++++++++++++++++++++++++++++++++ pyproject.toml | 1 + 2 files changed, 77 insertions(+) create mode 100644 hed/scripts/hed_validator.py diff --git a/hed/scripts/hed_validator.py b/hed/scripts/hed_validator.py new file mode 100644 index 00000000..385ff611 --- /dev/null +++ b/hed/scripts/hed_validator.py @@ -0,0 +1,76 @@ +import argparse +import json +import sys + + +def main(): + # Create the argument parser + parser = argparse.ArgumentParser(description="Validate an HED BIDS dataset.") + + # Positional argument for the dataset path + parser.add_argument("dataset_path", help="Path to the dataset directory") + + # Optional argument for the format + parser.add_argument("-f", "--format", choices=["text", "json", "json_pp"], default="text", + help="Output format: 'text' (default) or 'json' ('json_pp' for pretty-printed json)") + + # Optional argument for the output file + parser.add_argument("-o", "--output-file", help="File to save the output. If not provided, output is printed to the screen") + + # Optional flag to check for warnings + parser.add_argument("--check-for-warnings", action="store_true", + help="Enable checking for warnings during validation") + + # Parse the arguments + args = parser.parse_args() + + issue_list = validate_dataset(args) + + # Return 1 if there are issues, 0 otherwise + return int(bool(issue_list)) + + +def validate_dataset(args): + # Delayed imports to speed up --help + from hed.errors import get_printable_issue_string + from hed.tools import BidsDataset + from hed import _version as vr + + # Validate the dataset + bids = BidsDataset(args.dataset_path) + issue_list = bids.validate(check_for_warnings=args.check_for_warnings) + # Output based on format + if args.format in ("json", "json_pp"): + kw = {"indent": 4} if args.format == "json_pp" else {} + output = json.dumps( + { + "issues": issue_list, + "hedtools_version": str(vr.get_versions()) + }, + **kw) + elif args.format == "json": + output = json.dumps(issue_list) + elif args.format == "text": + # Print HEDTOOLS version + print(f"Using HEDTOOLS version: {str(vr.get_versions())}") + + if issue_list: + output = get_printable_issue_string(issue_list, "HED validation errors: ", skip_filename=False) + # Print number of issues + print(f"Number of issues: {len(issue_list)}") + else: + output = "No HED validation errors" + else: + raise ValueError(args.format) + # Output to file or print to screen + if args.output_file: + with open(args.output_file, 'w') as fp: + fp.write(output) + else: + print(output) + return issue_list + + +if __name__ == "__main__": + sys.exit(main()) + diff --git a/pyproject.toml b/pyproject.toml index 233be6c2..4ce1ef0a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -55,6 +55,7 @@ dependencies = [ run_remodel = "hed.tools.remodeling.cli.run_remodel:main" run_remodel_backup = "hed.tools.remodeling.cli.run_remodel_backup:main" run_remodel_restore = "hed.tools.remodeling.cli.run_remodel_restore:main" +hed-validator = "hed.scripts.hed_validator:main" hed_validate_schemas = "hed.scripts.validate_schemas:main" hed_update_schemas = "hed.scripts.convert_and_update_schema:main" hed_add_ids = "hed.scripts.add_hed_ids:main"