Merge branch 'bids-standard:master' into master

markmikkelsen · May 28, 2024 · 70117ad · 70117ad
2 parents d9f31da + ed53091
commit 70117ad
Show file tree

Hide file tree

Showing 23 changed files with 219 additions and 59 deletions.
diff --git a/.circleci/config.yml b/.circleci/config.yml
@@ -174,8 +174,7 @@ jobs:
     docker:
       - image: cimg/base:stable
     steps:
-      - setup_remote_docker:
-          version: 17.11.0-ce
+      - setup_remote_docker
       # checkout code to default ~/project
       - checkout
       - attach_workspace:

diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md
@@ -2,27 +2,52 @@
 
 See the [CONTRIBUTING](https://github.com/bids-standard/bids-specification/blob/master/CONTRIBUTING.md) guide. Specifically:
 
-- Please keep the title of your Pull Request (PR) short but informative - it will
-  appear in the changelog.
+- Please keep the title of your Pull Request (PR) short but informative - it will appear in the changelog.
 - If you do **not** want a PR to appear in the changelog, it must receive the `exclude-from-changelog` label.
-- Please ensure your name is credited on our [Contributors appendix](https://github.com/bids-standard/bids-specification/blob/master/src/appendices/contributors.md).
-  To add your name, please edit our [Contributors wiki](https://github.com/bids-standard/bids-specification/wiki/Contributors) and add your name with the type of contribution.
+
+- Please ensure your name is credited
+  on our [Contributors appendix](https://github.com/bids-standard/bids-specification/blob/master/src/appendices/contributors.md).
+  To add your name, please edit our [Contributors wiki](https://github.com/bids-standard/bids-specification/wiki/Contributors)
+  and add your name with the type of contribution.
   For assistance, please tag @bids-standard/maintainers.
+
 - Use one of the following prefixes in the title of your PR:
-  - `[ENH]` - enhancement of the specification that adds a new feature or
-    support for a new data type
+  - `[ENH]` - enhancement of the specification that adds a new feature or support for a new data type
   - `[FIX]` - fix of a typo or language clarification
-  - `[INFRA]` - changes to the infrastructure automating the specification
-    release (for example building HTML docs)
+  - `[INFRA]` - changes to the infrastructure automating the specification release (for example building HTML docs)
   - `[SCHEMA]` - changes to the BIDS schema and/or related code
-  - `[MISC]` - everything else including changes to the file listing
-    contributors
-- If you are opening a PR to obtain early feedback, but the changes
-  are not ready to be merged (also known as a "Work in Progress" PR), please
-  use a [Draft PR](https://github.blog/2019-02-14-introducing-draft-pull-requests/).
-- After opening the PR, our continuous integration services will automatically check your contribution  for formatting errors and render a preview of the BIDS specification with your changes.
+  - `[MISC]` - everything else including changes to the file listing contributors
+
+- If you are opening a PR to obtain early feedback,
+  but the changes are not ready to be merged (also known as a "Work in Progress" PR),
+  please use a [Draft PR](https://github.blog/2019-02-14-introducing-draft-pull-requests/).
+
+- After opening the PR, our continuous integration services will automatically check your contribution
+  for formatting errors and render a preview of the BIDS specification with your changes.
   To see the checks and preview, scroll down and click on the `show all checks` link.
-  From the list, select the `Details` link of the `ci/circleci: build_docs artifact` check to see the preview of the BIDS specification.
-- If you are updating the schema *and* you need to subsequently make changes to the bidsschematools code (validation, tests, rendering), this means your PR probably introduces a compatibility breaking change and you should increment the minor version (the second number) in `bids-specification/src/schema/SCHEMA_VERSION`.
+  From the list:
+    - select the `Details` link of the `docs/readthedocs.org:bids-specification` check to see the HTML preview of the BIDS specification.
+    - select the `Details` link of the `Check the rendered PDF version here! ` check to see the PDF preview of the BIDS specification.
+
+- If you are updating the schema *and* you need to subsequently make changes to the bidsschematools code (validation, tests, rendering),
+  this means your PR probably introduces a compatibility breaking change
+  and you should increment the minor version (the second number) in `bids-specification/src/schema/SCHEMA_VERSION`.
+
+- If you are opening a PR for a BIDS extension proposal (BEP),
+  make sure that your top message contains the following notes
+
+> [!Note]
+>
+> **We meet regularly to discuss this BEP**
+>
+> Next meeting: **insert date** on **URL to join**
+>
+> Communication channel on github repo / matrix / slack / discord : **insert URL to join**
+>
+
+> [!Tip]
+>
+> [**HTML preview of this BEP**](insert URL to HTML preview once available)
+>
 
 --- PLEASE READ AND DELETE THE TEXT ABOVE BEFORE OPENING THE PULL REQUEST ---
diff --git a/.github/workflows/schemacode_ci.yml b/.github/workflows/schemacode_ci.yml
@@ -33,7 +33,7 @@ jobs:
       - name: "Install build dependencies"
         run: pip install --upgrade build twine
       - name: "Install test dependencies on tag"
-        run: pip install --upgrade pytest pyyaml pandas tabulate markdown-it-py pyparsing
+        run: pip install --upgrade tools/schemacode[test]
         if: ${{ startsWith(github.ref, 'refs/tags/schema-') }}
       - name: "Build archive on tag"
         run: pytest tools/schemacode/bidsschematools -k make_archive

diff --git a/src/common-principles.md b/src/common-principles.md
@@ -658,6 +658,21 @@ for more information.
 
 ## The Inheritance Principle
 
+In some circumstances, there can be multiple data files for which
+all or a subset of the relevant metadata is precisely equivalent.
+Where this occurs,
+it may be preferable to define those metadata *only once*,
+and be placed on the filesystem in such a way that those files
+are deemed to be *applicable* to each relevant data file individually,
+but *not* be erroneously associated with other data files
+to which the metadata contained within are not applicable.
+The Inheritance Principle defines a systematized set of rules
+to determine which metadata files to associate with which data files.
+Further, because multiple metadata files may apply to an individual data file,
+the Principle defines the *order of precedence* of such metadata files contents.
+
+### Rules
+
 1.  Any metadata file (such as `.json`, `.bvec` or `.tsv`) MAY be defined at any directory level.
 
 1.  For a given data file, any metadata file is applicable to that data file if:
@@ -687,7 +702,7 @@ for more information.
         same key present in another metadata file at a lower level
         (though it is RECOMMENDED to minimize the extent of such overrides).
 
-Corollaries:
+### Corollaries
 
 1.  As per rule 3, metadata files applicable only to a specific participant / session
     MUST be defined in or below the directory corresponding to that participant / session;
@@ -704,6 +719,8 @@ Corollaries:
     a key-value in a later file does not imply the "unsetting" of that field
     (indeed removal of existing fields is not possible).
 
+### Examples
+
 Example 1: Demonstration of inheritance principle
 
 <!-- This block generates a file tree.

diff --git a/src/longitudinal-and-multi-site-studies.md b/src/longitudinal-and-multi-site-studies.md
@@ -80,7 +80,7 @@ for practical guidance when curating a new longitudinal dataset.
 
 This version of the BIDS specification does not explicitly cover studies with
 data coming from multiple sites or multiple centers (such extension is planned
-in [BIDS `2.0`](https://github.com/bids-standard/bids-2-devel).
+in [BIDS `2.0`](https://github.com/bids-standard/bids-2-devel/issues/11)).
 There are however ways to model your data without any loss in terms of metadata.
 
 ### Option 1: Treat each site/center as a separate dataset
@@ -92,11 +92,21 @@ Apps and everything should just work.
 
 ### Option 2: Combining sites/centers into one dataset
 
-Alternatively you can combine data from all sites into one dataset. To identify
-which site each subjects comes from you can add a `site` column in the
+Alternatively you can combine data from all sites into one dataset.
+This can be done in two ways:
+
+
+#### Option 2.a: Collate sites at subject level
+
+To identify which site each subjects comes from you can add a `site` column in the
 `participants.tsv` file indicating the source site. This solution allows you to
-analyze all of the subjects together in one dataset. One caveat is that subjects
+analyze all subjects together in one dataset. One caveat is that subjects
 from all sites will have to have unique labels. To enforce that and improve
 readability you can use a subject label prefix identifying the site. For example
 `sub-NUY001`, `sub-MIT002`, `sub-MPG002` and so on. Remember that hyphens and
 underscores are not allowed in subject labels.
+
+#### Option 2.b: Use different sessions for different sites
+
+In case of studies such as "Traveling Human Phantom" it is possible to incorporate site within session label.
+For example `sub-human1/ses-NUY`, `sub-human1/ses-MIT`, `sub-phantom1/ses-NUY`, `sub-phantom1/ses-MIT` and so on.
diff --git a/src/metaschema.json b/src/metaschema.json
@@ -751,6 +751,10 @@
       "type": "object",
       "properties": {
         "level": { "enum": ["optional", "recommended", "required"] },
+        "datatypes": {
+          "type": "array",
+          "items": { "pattern": "^[a-z]+$" }
+        },
         "stem": { "type": "string" },
         "extensions": { "type": "array", "items": { "type": "string" } }
       },

diff --git a/src/modality-specific-files/magnetic-resonance-imaging-data.md b/src/modality-specific-files/magnetic-resonance-imaging-data.md
@@ -656,13 +656,18 @@ The definitions of these fields can be found in
 and a guide for using macros can be found at
  https://github.com/bids-standard/bids-specification/blob/master/macros_doc.md
 -->
-{{ MACROS___make_suffix_table(
-      [
-         "dwi",
-         "sbref",
-      ]
-   )
-}}
+{{ MACROS___make_suffix_table(["dwi", "sbref"]) }}
+
+Additionally, the following suffixes are used for scanner-generated images:
+
+<!--
+This block generates a suffix table.
+The definitions of these fields can be found in
+  src/schema/rules/files/raw
+and a guide for using macros can be found at
+ https://github.com/bids-standard/bids-specification/blob/master/macros_doc.md
+-->
+{{ MACROS___make_suffix_table(["ADC", "TRACE"]) }}
 
 <!--
 This block generates a filename templates.
@@ -691,6 +696,13 @@ In such a case, two files could have the following names:
 The user is free to choose any other label than `singleband` and
 `multiband`, as long as they are consistent across subjects and sessions.
 
+Scanner-generated TRACE and ADC volumes MAY be included using the
+`TRACE` and `ADC` suffixes.
+If TRACE or ADC volume filenames match a diffusion series with all applicable entities,
+such volumes SHOULD be computed from that series.
+Otherwise, some entity, such as [`acq-<label>`](../appendices/entities.md#acq),
+SHOULD be used to indicate that the files are unrelated.
+
 ### REQUIRED gradient orientation information
 
 The REQUIRED gradient orientation information corresponding to a DWI acquisition

diff --git a/src/schema/README.md b/src/schema/README.md
@@ -265,6 +265,7 @@ The following functions should be defined by an interpreter:
 | `exists(arg: str \| array, rule: str) -> int`   | Count of files in an array that exist in the dataset. String is array with length 1. Rules include `"bids-uri"`, `"dataset"`, `"subject"` and `"stimuli"`. | `exists(sidecar.IntendedFor, "subject")`               | True if all files in `IntendedFor` exist, relative to the subject directory.   |
 | `index(arg: array, val: any)`                   | Index of first element in an array equal to `val`, `null` if not found                                                                                     | `index(["i", "j", "k"], axis)`                         | The number, from 0-2 corresponding to the string `axis`                        |
 | `intersects(a: array, b: array) -> bool`        | `true` if arguments contain any shared elements                                                                                                            | `intersects(dataset.modalities, ["pet", "mri"])`       | True if either PET or MRI data is found in dataset                             |
+| `allequal(a: array, b: array) -> bool`          | `true` if arrays have the same length and paired elements are equal                                                                                        | `intersects(dataset.modalities, ["pet", "mri"])`       | True if either PET or MRI data is found in dataset                             |
 | `length(arg: array) -> int`                     | Number of elements in an array                                                                                                                             | `length(columns.onset) > 0`                            | True if there is at least one value in the onset column                        |
 | `match(arg: str, pattern: str) -> bool`         | `true` if `arg` matches the regular expression `pattern` (anywhere in string)                                                                              | `match(extension, ".gz$")`                             | True if the file extension ends with `.gz`                                     |
 | `max(arg: array) -> number`                     | The largest non-`n/a` value in an array                                                                                                                    | `max(columns.onset)`                                   | The time of the last onset in an events.tsv file                               |
@@ -294,6 +295,9 @@ Most operations involving `null` simply resolve to `null`:
 | `null / 1`                 | `null` |
 | `match(null, pattern)`     | `null` |
 | `intersects(list, null)`   | `null` |
+| `intersects(null, list)`   | `null` |
+| `allequal(list, null)`     | `null` |
+| `allequal(null, list)`     | `null` |
 | `substr(null, 0, 1)`       | `null` |
 | `substr(str, null, 1)`     | `null` |
 | `substr(str, 0, null)`     | `null` |

diff --git a/src/schema/SCHEMA_VERSION b/src/schema/SCHEMA_VERSION
@@ -1 +1 @@
-0.8.2-dev
+0.9.1-dev
diff --git a/src/schema/meta/expression_tests.yaml b/src/schema/meta/expression_tests.yaml
@@ -26,6 +26,10 @@
   result: false
 - expression: intersects(null, [])
   result: false
+- expression: allequal([], null)
+  result: false
+- expression: allequal(null, [])
+  result: false
 - expression: match(null, 'pattern')
   result: null
 - expression: match('string', null)
@@ -106,6 +110,8 @@
   result: null
 - expression: sorted([3, 2, 1])
   result: [1, 2, 3]
+- expression: allequal(sorted([3, 2, 1]), [1, 2, 3])
+  result: true
 - expression: min([-1, "n/a", 1])
   result: -1
 - expression: max([-1, "n/a", 1])

diff --git a/src/schema/objects/suffixes.yaml b/src/schema/objects/suffixes.yaml
@@ -6,6 +6,11 @@ TwoPE:
   display_name: 2-photon excitation microscopy
   description: |
     2-photon excitation microscopy imaging data
+ADC:
+  value: ADC
+  display_name: Apparent diffusion coefficient (ADC)
+  description:
+    Apparent diffusion coefficient (ADC) map
 BF:
   value: BF
   display_name: Bright-field microscopy
@@ -460,6 +465,11 @@ TEM:
   display_name: Transmission electron microscopy
   description: |
     Transmission electron microscopy imaging data
+TRACE:
+  value: TRACE
+  display_name: Trace diffusion weighted image
+  description: |
+    Diffusion images proportional to the trace of the diffusion tensor
 UNIT1:
   value: UNIT1
   display_name: Homogeneous (flat) T1-weighted MP2RAGE image

diff --git a/src/schema/rules/checks/dataset.yaml b/src/schema/rules/checks/dataset.yaml
@@ -9,22 +9,22 @@ SubjectFolders:
       There are no subject directories (labeled "sub-*") in the root of this dataset.
     level: error
   selectors:
-    - path == 'dataset_description.json'
+    - path == '/dataset_description.json'
   checks:
     - length(dataset.subjects.sub_dirs) > 0
 
 # 49
-ParticipantIDMismtach:
+ParticipantIDMismatch:
   issue:
     code: PARTICIPANT_ID_MISMATCH
     message: |
       Participant labels found in this dataset did not match the values in participant_id column
       found in the participants.tsv file.
     level: error
   selectors:
-    - path == 'participants.tsv'
+    - path == '/participants.tsv'
   checks:
-    - sorted(columns.participant_label) == sorted(dataset.subjects.sub_dirs)
+    - allequal(sorted(columns.participant_id), sorted(dataset.subjects.sub_dirs))
 
 # 51
 PhenotypeSubjectsMissing:
@@ -34,9 +34,10 @@ PhenotypeSubjectsMissing:
       A phenotype/ .tsv file lists subjects that were not found in the dataset.
     level: error
   selectors:
-    - path == 'dataset_description.json'
+    - path == '/dataset_description.json'
+    - type(dataset.subjects.phenotype) != 'null'
   checks:
-    - sorted(dataset.subjects.phenotype) == sorted(dataset.subjects.sub_dirs)
+    - allequal(sorted(dataset.subjects.phenotype), sorted(dataset.subjects.sub_dirs))
 
 # 214
 SamplesTSVMissing:
@@ -47,7 +48,7 @@ SamplesTSVMissing:
       See 'Modality agnostic files' section of the BIDS specification.
     level: error
   selectors:
-    - path == 'dataset_description.json'
+    - path == '/dataset_description.json'
     - '"micr" in dataset.modalities'
   checks:
     - "'samples.tsv' in dataset.files"
@@ -60,7 +61,7 @@ UnknownVersion:
       The BIDS Schema used for validation may be out of date.
     level: warning
   selectors:
-    - path == 'dataset_description.json'
+    - path == '/dataset_description.json'
   checks:
     - intersects([json.BIDSVersion], schema.meta.versions)
 
@@ -72,7 +73,7 @@ SingleSourceAuthors:
       'CITATION.cff' file found. The "Authors" field of 'dataset_description.json'
       must be removed to avoid inconsistency.
   selectors:
-    - path == 'CITATION.cff'
+    - path == '/CITATION.cff'
   checks:
     - '!("Authors" in dataset.dataset_description)'
 
@@ -85,7 +86,7 @@ SingleSourceCitationFields:
       The "HowToAckowledge", "License", and "ReferencesAndLinks" fields of
       'dataset_description.json' should be removed to avoid inconsistency.
   selectors:
-    - path == 'CITATION.cff'
+    - path == '/CITATION.cff'
   checks:
     - '!("HowToAcknowledge" in dataset.dataset_description)'
     - '!("License" in dataset.dataset_description)'

diff --git a/src/schema/rules/checks/events.yaml b/src/schema/rules/checks/events.yaml
@@ -39,4 +39,4 @@ SortedOnsets:
     - extension == ".tsv"
   checks:
     # n/a values will likely cause false alarms if encountered. Consider alternatives.
-    - sorted(columns.onset) == columns.onset
+    - allequal(sorted(columns.onset), columns.onset)
diff --git a/src/schema/rules/checks/general.yaml b/src/schema/rules/checks/general.yaml
@@ -20,6 +20,6 @@ ReadmeFileSmall:
       Please consider expanding it with additional information about the dataset.
     level: warning
   selectors:
-    - match(path, '^README')
+    - match(path, '^/README')
   checks:
     - size > 150
diff --git a/src/schema/rules/checks/mri.yaml b/src/schema/rules/checks/mri.yaml
@@ -98,7 +98,7 @@ VolumeTimingNotMonotonicallyIncreasing:
     - modality == "mri"
     - sidecar.VolumeTiming != null
   checks:
-    - sorted(sidecar.VolumeTiming) == sidecar.VolumeTiming
+    - allequal(sorted(sidecar.VolumeTiming), sidecar.VolumeTiming)
 
 # 192
 BolusCutOffDelayTimeNotMonotonicallyIncreasing:
@@ -111,7 +111,7 @@ BolusCutOffDelayTimeNotMonotonicallyIncreasing:
     - modality == "mri"
     - sidecar.BolusCutoffDelayTime != null
   checks:
-    - sorted(sidecar.BolusCutoffDelayTime) == sidecar.BolusCutoffDelayTime
+    - allequal(sorted(sidecar.BolusCutoffDelayTime), sidecar.BolusCutoffDelayTime)
 
 # 201
 RepetitionTimePreparationNotConsistent: