From f91e6514baf54442ee41a7ca8681d58b9714a1e0 Mon Sep 17 00:00:00 2001
From: Chris Markiewicz <markiewicz@stanford.edu>
Date: Fri, 24 May 2024 10:40:41 -0400
Subject: [PATCH] feat(schema): Add allequal(x: array, y: array) to expression
 language (#1837)

---
 src/schema/README.md                  | 4 ++++
 src/schema/meta/expression_tests.yaml | 6 ++++++
 src/schema/rules/checks/dataset.yaml  | 7 ++++---
 src/schema/rules/checks/events.yaml   | 2 +-
 src/schema/rules/checks/mri.yaml      | 4 ++--
 5 files changed, 17 insertions(+), 6 deletions(-)

diff --git a/src/schema/README.md b/src/schema/README.md
index e8cb197757..5f1abb4595 100644
--- a/src/schema/README.md
+++ b/src/schema/README.md
@@ -265,6 +265,7 @@ The following functions should be defined by an interpreter:
 | `exists(arg: str \| array, rule: str) -> int`   | Count of files in an array that exist in the dataset. String is array with length 1. Rules include `"bids-uri"`, `"dataset"`, `"subject"` and `"stimuli"`. | `exists(sidecar.IntendedFor, "subject")`               | True if all files in `IntendedFor` exist, relative to the subject directory.   |
 | `index(arg: array, val: any)`                   | Index of first element in an array equal to `val`, `null` if not found                                                                                     | `index(["i", "j", "k"], axis)`                         | The number, from 0-2 corresponding to the string `axis`                        |
 | `intersects(a: array, b: array) -> bool`        | `true` if arguments contain any shared elements                                                                                                            | `intersects(dataset.modalities, ["pet", "mri"])`       | True if either PET or MRI data is found in dataset                             |
+| `allequal(a: array, b: array) -> bool`          | `true` if arrays have the same length and paired elements are equal                                                                                        | `intersects(dataset.modalities, ["pet", "mri"])`       | True if either PET or MRI data is found in dataset                             |
 | `length(arg: array) -> int`                     | Number of elements in an array                                                                                                                             | `length(columns.onset) > 0`                            | True if there is at least one value in the onset column                        |
 | `match(arg: str, pattern: str) -> bool`         | `true` if `arg` matches the regular expression `pattern` (anywhere in string)                                                                              | `match(extension, ".gz$")`                             | True if the file extension ends with `.gz`                                     |
 | `max(arg: array) -> number`                     | The largest non-`n/a` value in an array                                                                                                                    | `max(columns.onset)`                                   | The time of the last onset in an events.tsv file                               |
@@ -294,6 +295,9 @@ Most operations involving `null` simply resolve to `null`:
 | `null / 1`                 | `null` |
 | `match(null, pattern)`     | `null` |
 | `intersects(list, null)`   | `null` |
+| `intersects(null, list)`   | `null` |
+| `allequal(list, null)`     | `null` |
+| `allequal(null, list)`     | `null` |
 | `substr(null, 0, 1)`       | `null` |
 | `substr(str, null, 1)`     | `null` |
 | `substr(str, 0, null)`     | `null` |
diff --git a/src/schema/meta/expression_tests.yaml b/src/schema/meta/expression_tests.yaml
index e1f06fdedc..5ebb7dcec0 100644
--- a/src/schema/meta/expression_tests.yaml
+++ b/src/schema/meta/expression_tests.yaml
@@ -26,6 +26,10 @@
   result: false
 - expression: intersects(null, [])
   result: false
+- expression: allequal([], null)
+  result: false
+- expression: allequal(null, [])
+  result: false
 - expression: match(null, 'pattern')
   result: null
 - expression: match('string', null)
@@ -106,6 +110,8 @@
   result: null
 - expression: sorted([3, 2, 1])
   result: [1, 2, 3]
+- expression: allequal(sorted([3, 2, 1]), [1, 2, 3])
+  result: true
 - expression: min([-1, "n/a", 1])
   result: -1
 - expression: max([-1, "n/a", 1])
diff --git a/src/schema/rules/checks/dataset.yaml b/src/schema/rules/checks/dataset.yaml
index a569bc1f97..91704d32e4 100644
--- a/src/schema/rules/checks/dataset.yaml
+++ b/src/schema/rules/checks/dataset.yaml
@@ -14,7 +14,7 @@ SubjectFolders:
     - length(dataset.subjects.sub_dirs) > 0
 
 # 49
-ParticipantIDMismtach:
+ParticipantIDMismatch:
   issue:
     code: PARTICIPANT_ID_MISMATCH
     message: |
@@ -24,7 +24,7 @@ ParticipantIDMismtach:
   selectors:
     - path == '/participants.tsv'
   checks:
-    - sorted(columns.participant_label) == sorted(dataset.subjects.sub_dirs)
+    - allequal(sorted(columns.participant_id), sorted(dataset.subjects.sub_dirs))
 
 # 51
 PhenotypeSubjectsMissing:
@@ -35,8 +35,9 @@ PhenotypeSubjectsMissing:
     level: error
   selectors:
     - path == '/dataset_description.json'
+    - type(dataset.subjects.phenotype) != 'null'
   checks:
-    - sorted(dataset.subjects.phenotype) == sorted(dataset.subjects.sub_dirs)
+    - allequal(sorted(dataset.subjects.phenotype), sorted(dataset.subjects.sub_dirs))
 
 # 214
 SamplesTSVMissing:
diff --git a/src/schema/rules/checks/events.yaml b/src/schema/rules/checks/events.yaml
index 1d6c52584c..234b9d078d 100644
--- a/src/schema/rules/checks/events.yaml
+++ b/src/schema/rules/checks/events.yaml
@@ -39,4 +39,4 @@ SortedOnsets:
     - extension == ".tsv"
   checks:
     # n/a values will likely cause false alarms if encountered. Consider alternatives.
-    - sorted(columns.onset) == columns.onset
+    - allequal(sorted(columns.onset), columns.onset)
diff --git a/src/schema/rules/checks/mri.yaml b/src/schema/rules/checks/mri.yaml
index 8837f5faa9..e2efc0216d 100644
--- a/src/schema/rules/checks/mri.yaml
+++ b/src/schema/rules/checks/mri.yaml
@@ -98,7 +98,7 @@ VolumeTimingNotMonotonicallyIncreasing:
     - modality == "mri"
     - sidecar.VolumeTiming != null
   checks:
-    - sorted(sidecar.VolumeTiming) == sidecar.VolumeTiming
+    - allequal(sorted(sidecar.VolumeTiming), sidecar.VolumeTiming)
 
 # 192
 BolusCutOffDelayTimeNotMonotonicallyIncreasing:
@@ -111,7 +111,7 @@ BolusCutOffDelayTimeNotMonotonicallyIncreasing:
     - modality == "mri"
     - sidecar.BolusCutoffDelayTime != null
   checks:
-    - sorted(sidecar.BolusCutoffDelayTime) == sidecar.BolusCutoffDelayTime
+    - allequal(sorted(sidecar.BolusCutoffDelayTime), sidecar.BolusCutoffDelayTime)
 
 # 201
 RepetitionTimePreparationNotConsistent: