From f91e6514baf54442ee41a7ca8681d58b9714a1e0 Mon Sep 17 00:00:00 2001 From: Chris Markiewicz Date: Fri, 24 May 2024 10:40:41 -0400 Subject: [PATCH] feat(schema): Add allequal(x: array, y: array) to expression language (#1837) --- src/schema/README.md | 4 ++++ src/schema/meta/expression_tests.yaml | 6 ++++++ src/schema/rules/checks/dataset.yaml | 7 ++++--- src/schema/rules/checks/events.yaml | 2 +- src/schema/rules/checks/mri.yaml | 4 ++-- 5 files changed, 17 insertions(+), 6 deletions(-) diff --git a/src/schema/README.md b/src/schema/README.md index e8cb197757..5f1abb4595 100644 --- a/src/schema/README.md +++ b/src/schema/README.md @@ -265,6 +265,7 @@ The following functions should be defined by an interpreter: | `exists(arg: str \| array, rule: str) -> int` | Count of files in an array that exist in the dataset. String is array with length 1. Rules include `"bids-uri"`, `"dataset"`, `"subject"` and `"stimuli"`. | `exists(sidecar.IntendedFor, "subject")` | True if all files in `IntendedFor` exist, relative to the subject directory. | | `index(arg: array, val: any)` | Index of first element in an array equal to `val`, `null` if not found | `index(["i", "j", "k"], axis)` | The number, from 0-2 corresponding to the string `axis` | | `intersects(a: array, b: array) -> bool` | `true` if arguments contain any shared elements | `intersects(dataset.modalities, ["pet", "mri"])` | True if either PET or MRI data is found in dataset | +| `allequal(a: array, b: array) -> bool` | `true` if arrays have the same length and paired elements are equal | `intersects(dataset.modalities, ["pet", "mri"])` | True if either PET or MRI data is found in dataset | | `length(arg: array) -> int` | Number of elements in an array | `length(columns.onset) > 0` | True if there is at least one value in the onset column | | `match(arg: str, pattern: str) -> bool` | `true` if `arg` matches the regular expression `pattern` (anywhere in string) | `match(extension, ".gz$")` | True if the file extension ends with `.gz` | | `max(arg: array) -> number` | The largest non-`n/a` value in an array | `max(columns.onset)` | The time of the last onset in an events.tsv file | @@ -294,6 +295,9 @@ Most operations involving `null` simply resolve to `null`: | `null / 1` | `null` | | `match(null, pattern)` | `null` | | `intersects(list, null)` | `null` | +| `intersects(null, list)` | `null` | +| `allequal(list, null)` | `null` | +| `allequal(null, list)` | `null` | | `substr(null, 0, 1)` | `null` | | `substr(str, null, 1)` | `null` | | `substr(str, 0, null)` | `null` | diff --git a/src/schema/meta/expression_tests.yaml b/src/schema/meta/expression_tests.yaml index e1f06fdedc..5ebb7dcec0 100644 --- a/src/schema/meta/expression_tests.yaml +++ b/src/schema/meta/expression_tests.yaml @@ -26,6 +26,10 @@ result: false - expression: intersects(null, []) result: false +- expression: allequal([], null) + result: false +- expression: allequal(null, []) + result: false - expression: match(null, 'pattern') result: null - expression: match('string', null) @@ -106,6 +110,8 @@ result: null - expression: sorted([3, 2, 1]) result: [1, 2, 3] +- expression: allequal(sorted([3, 2, 1]), [1, 2, 3]) + result: true - expression: min([-1, "n/a", 1]) result: -1 - expression: max([-1, "n/a", 1]) diff --git a/src/schema/rules/checks/dataset.yaml b/src/schema/rules/checks/dataset.yaml index a569bc1f97..91704d32e4 100644 --- a/src/schema/rules/checks/dataset.yaml +++ b/src/schema/rules/checks/dataset.yaml @@ -14,7 +14,7 @@ SubjectFolders: - length(dataset.subjects.sub_dirs) > 0 # 49 -ParticipantIDMismtach: +ParticipantIDMismatch: issue: code: PARTICIPANT_ID_MISMATCH message: | @@ -24,7 +24,7 @@ ParticipantIDMismtach: selectors: - path == '/participants.tsv' checks: - - sorted(columns.participant_label) == sorted(dataset.subjects.sub_dirs) + - allequal(sorted(columns.participant_id), sorted(dataset.subjects.sub_dirs)) # 51 PhenotypeSubjectsMissing: @@ -35,8 +35,9 @@ PhenotypeSubjectsMissing: level: error selectors: - path == '/dataset_description.json' + - type(dataset.subjects.phenotype) != 'null' checks: - - sorted(dataset.subjects.phenotype) == sorted(dataset.subjects.sub_dirs) + - allequal(sorted(dataset.subjects.phenotype), sorted(dataset.subjects.sub_dirs)) # 214 SamplesTSVMissing: diff --git a/src/schema/rules/checks/events.yaml b/src/schema/rules/checks/events.yaml index 1d6c52584c..234b9d078d 100644 --- a/src/schema/rules/checks/events.yaml +++ b/src/schema/rules/checks/events.yaml @@ -39,4 +39,4 @@ SortedOnsets: - extension == ".tsv" checks: # n/a values will likely cause false alarms if encountered. Consider alternatives. - - sorted(columns.onset) == columns.onset + - allequal(sorted(columns.onset), columns.onset) diff --git a/src/schema/rules/checks/mri.yaml b/src/schema/rules/checks/mri.yaml index 8837f5faa9..e2efc0216d 100644 --- a/src/schema/rules/checks/mri.yaml +++ b/src/schema/rules/checks/mri.yaml @@ -98,7 +98,7 @@ VolumeTimingNotMonotonicallyIncreasing: - modality == "mri" - sidecar.VolumeTiming != null checks: - - sorted(sidecar.VolumeTiming) == sidecar.VolumeTiming + - allequal(sorted(sidecar.VolumeTiming), sidecar.VolumeTiming) # 192 BolusCutOffDelayTimeNotMonotonicallyIncreasing: @@ -111,7 +111,7 @@ BolusCutOffDelayTimeNotMonotonicallyIncreasing: - modality == "mri" - sidecar.BolusCutoffDelayTime != null checks: - - sorted(sidecar.BolusCutoffDelayTime) == sidecar.BolusCutoffDelayTime + - allequal(sorted(sidecar.BolusCutoffDelayTime), sidecar.BolusCutoffDelayTime) # 201 RepetitionTimePreparationNotConsistent: