Skip to content

Commit

Permalink
[FIX] Updates to data dictionary model schema (#166)
Browse files Browse the repository at this point in the history
* add test that partially annotated cols fail schema validation

* make Transformation attr required, add IdentifierNeurobagel column class

* refactor validations of data dict alone into separate function

* add new ToolNeurobagel column class

* add required 'Identifies' attr to example data dicts
  • Loading branch information
alyssadai authored Jun 16, 2023
1 parent 4f655c9 commit b6d15e9
Show file tree
Hide file tree
Showing 19 changed files with 127 additions and 45 deletions.
43 changes: 32 additions & 11 deletions bagel/dictionary_models.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from typing import Dict, Optional, Union

from pydantic import BaseModel, Field, conlist
from pydantic import BaseModel, Extra, Field, conlist


class Identifier(BaseModel):
Expand Down Expand Up @@ -33,12 +33,9 @@ class Neurobagel(BaseModel):
"invalid responses, typos, or missing data",
alias="MissingValues",
)
isPartOf: Optional[Identifier] = Field(
None,
description="If the column is a subscale or item of an assessment tool "
"then the assessment tool should be specified here.",
alias="IsPartOf",
)

class Config:
extra = Extra.forbid


class CategoricalNeurobagel(Neurobagel):
Expand All @@ -57,7 +54,7 @@ class ContinuousNeurobagel(Neurobagel):
"""A Neurobagel annotation for a continuous column"""

transformation: Identifier = Field(
None,
...,
description="For continuous columns this field can be used to describe"
"a transformation that can be applied to the values in this"
"column in order to match the desired format of a standardized"
Expand All @@ -66,6 +63,27 @@ class ContinuousNeurobagel(Neurobagel):
)


class IdentifierNeurobagel(Neurobagel):
"""A Neurobagel annotation for an identifier column"""

identifies: "str" = Field(
...,
description="For identifier columns, the type of observation uniquely identified by this column.",
alias="Identifies",
)


class ToolNeurobagel(Neurobagel):
"""A Neurobagel annotation for an assessment tool column"""

isPartOf: Optional[Identifier] = Field(
...,
description="If the column is a subscale or item of an assessment tool "
"then the assessment tool should be specified here.",
alias="IsPartOf",
)


class Column(BaseModel):
"""The base model for a BIDS column description"""

Expand All @@ -74,9 +92,12 @@ class Column(BaseModel):
description="Free-form natural language description",
alias="Description",
)
annotations: Union[CategoricalNeurobagel, ContinuousNeurobagel] = Field(
None, description="Semantic annotations", alias="Annotations"
)
annotations: Union[
CategoricalNeurobagel,
ContinuousNeurobagel,
IdentifierNeurobagel,
ToolNeurobagel,
] = Field(None, description="Semantic annotations", alias="Annotations")


class CategoricalColumn(Column):
Expand Down
8 changes: 6 additions & 2 deletions bagel/pheno_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -311,8 +311,7 @@ def get_rows_with_empty_strings(df: pd.DataFrame, columns: list) -> list:
return list(empty_row[empty_row].index)


def validate_inputs(data_dict: dict, pheno_df: pd.DataFrame) -> None:
"""Determines whether input data are valid"""
def validate_data_dict(data_dict: dict) -> None:
try:
jsonschema.validate(data_dict, DICTIONARY_SCHEMA)
except jsonschema.ValidationError as e:
Expand Down Expand Up @@ -369,6 +368,11 @@ def validate_inputs(data_dict: dict, pheno_df: pd.DataFrame) -> None:
f"The data dictionary contains columns with mismatched levels between the BIDS and Neurobagel annotations: {mismatched_cols}"
)


def validate_inputs(data_dict: dict, pheno_df: pd.DataFrame) -> None:
"""Determines whether input data are valid"""
validate_data_dict(data_dict)

if not are_inputs_compatible(data_dict, pheno_df):
raise LookupError(
"The provided data dictionary and phenotypic file are individually valid, "
Expand Down
6 changes: 4 additions & 2 deletions bagel/tests/data/example1.json
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,8 @@
"IsAbout": {
"TermURL": "nb:ParticipantID",
"Label": "Unique participant identifier"
}
},
"Identifies": "participant"
}
},
"session_id": {
Expand All @@ -14,7 +15,8 @@
"IsAbout": {
"TermURL": "nb:SessionID",
"Label": "Unique session identifier"
}
},
"Identifies": "session"
}
},
"group": {
Expand Down
6 changes: 4 additions & 2 deletions bagel/tests/data/example10.json
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,8 @@
"IsAbout": {
"TermURL": "nb:ParticipantID",
"Label": "Unique participant identifier"
}
},
"Identifies": "participant"
}
},
"session_id": {
Expand All @@ -14,7 +15,8 @@
"IsAbout": {
"TermURL": "nb:SessionID",
"Label": "Unique session identifier"
}
},
"Identifies": "session"
}
},
"group": {
Expand Down
6 changes: 4 additions & 2 deletions bagel/tests/data/example11.json
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,8 @@
"IsAbout": {
"TermURL": "nb:ParticipantID",
"Label": "Unique participant identifier"
}
},
"Identifies": "participant"
}
},
"session_id": {
Expand All @@ -14,7 +15,8 @@
"IsAbout": {
"TermURL": "nb:SessionID",
"Label": "Unique session identifier"
}
},
"Identifies": "session"
}
},
"group": {
Expand Down
6 changes: 4 additions & 2 deletions bagel/tests/data/example12.json
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,8 @@
"IsAbout": {
"TermURL": "nb:ParticipantID",
"Label": "Unique participant identifier"
}
},
"Identifies": "participant"
}
},
"session_id": {
Expand All @@ -14,7 +15,8 @@
"IsAbout": {
"TermURL": "nb:SessionID",
"Label": "Unique session identifier"
}
},
"Identifies": "session"
}
},
"group": {
Expand Down
6 changes: 4 additions & 2 deletions bagel/tests/data/example13.json
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,8 @@
"IsAbout": {
"TermURL": "nb:ParticipantID",
"Label": "Unique participant identifier"
}
},
"Identifies": "participant"
}
},
"session_id": {
Expand All @@ -14,7 +15,8 @@
"IsAbout": {
"TermURL": "nb:SessionID",
"Label": "Unique session identifier"
}
},
"Identifies": "session"
}
},
"pheno_age": {
Expand Down
6 changes: 4 additions & 2 deletions bagel/tests/data/example14.json
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,8 @@
"IsAbout": {
"TermURL": "nb:ParticipantID",
"Label": "Unique participant identifier"
}
},
"Identifies": "participant"
}
},
"session_id": {
Expand All @@ -14,7 +15,8 @@
"IsAbout": {
"TermURL": "nb:SessionID",
"Label": "Unique session identifier"
}
},
"Identifies": "session"
}
},
"group": {
Expand Down
3 changes: 2 additions & 1 deletion bagel/tests/data/example15.json
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,8 @@
"IsAbout": {
"TermURL": "nb:SessionID",
"Label": "Unique session identifier"
}
},
"Identifies": "session"
}
},
"group": {
Expand Down
6 changes: 4 additions & 2 deletions bagel/tests/data/example2.json
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,8 @@
"IsAbout": {
"TermURL": "nb:ParticipantID",
"Label": "Unique participant identifier"
}
},
"Identifies": "participant"
}
},
"session_id": {
Expand All @@ -14,7 +15,8 @@
"IsAbout": {
"TermURL": "nb:SessionID",
"Label": "Unique session identifier"
}
},
"Identifies": "session"
}
},
"group": {
Expand Down
6 changes: 4 additions & 2 deletions bagel/tests/data/example4.json
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,8 @@
"IsAbout": {
"TermURL": "nb:ParticipantID",
"Label": "Unique participant identifier"
}
},
"Identifies": "participant"
}
},
"session_id": {
Expand All @@ -14,7 +15,8 @@
"IsAbout": {
"TermURL": "nb:SessionID",
"Label": "Unique session identifier"
}
},
"Identifies": "session"
}
},
"group": {
Expand Down
6 changes: 4 additions & 2 deletions bagel/tests/data/example5.json
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,8 @@
"IsAbout": {
"TermURL": "nb:ParticipantID",
"Label": "Unique participant identifier"
}
},
"Identifies": "participant"
}
},
"session_id": {
Expand All @@ -14,7 +15,8 @@
"IsAbout": {
"TermURL": "nb:SessionID",
"Label": "Unique session identifier"
}
},
"Identifies": "session"
}
},
"group": {
Expand Down
6 changes: 4 additions & 2 deletions bagel/tests/data/example6.json
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,8 @@
"IsAbout": {
"TermURL": "nb:ParticipantID",
"Label": "Unique participant identifier"
}
},
"Identifies": "participant"
}
},
"session_id": {
Expand All @@ -14,7 +15,8 @@
"IsAbout": {
"TermURL": "nb:SessionID",
"Label": "Unique session identifier"
}
},
"Identifies": "session"
}
},
"group": {
Expand Down
6 changes: 4 additions & 2 deletions bagel/tests/data/example7.json
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,8 @@
"IsAbout": {
"TermURL": "nb:ParticipantID",
"Label": "Unique participant identifier"
}
},
"Identifies": "participant"
}
},
"session_id": {
Expand All @@ -14,7 +15,8 @@
"IsAbout": {
"TermURL": "nb:SessionID",
"Label": "Unique session identifier"
}
},
"Identifies": "session"
}
},
"group": {
Expand Down
9 changes: 6 additions & 3 deletions bagel/tests/data/example8.json
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,8 @@
"IsAbout": {
"TermURL": "nb:ParticipantID",
"Label": "Unique participant identifier"
}
},
"Identifies": "participant"
}
},
"alt_participant_id": {
Expand All @@ -14,7 +15,8 @@
"IsAbout": {
"TermURL": "nb:ParticipantID",
"Label": "Unique participant identifier"
}
},
"Identifies": "participant"
}
},
"session_id": {
Expand All @@ -23,7 +25,8 @@
"IsAbout": {
"TermURL": "nb:SessionID",
"Label": "Unique session identifier"
}
},
"Identifies": "session"
}
},
"group": {
Expand Down
6 changes: 4 additions & 2 deletions bagel/tests/data/example9.json
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,8 @@
"IsAbout": {
"TermURL": "nb:ParticipantID",
"Label": "Unique participant identifier"
}
},
"Identifies": "participant"
}
},
"session_id": {
Expand All @@ -14,7 +15,8 @@
"IsAbout": {
"TermURL": "nb:SessionID",
"Label": "Unique session identifier"
}
},
"Identifies": "session"
}
},
"group": {
Expand Down
6 changes: 4 additions & 2 deletions bagel/tests/data/example_invalid.json
Original file line number Diff line number Diff line change
Expand Up @@ -4,15 +4,17 @@
"Annotations": {
"IsAbout": {
"Label": "Unique participant identifier"
}
},
"Identifies": "participant"
}
},
"session_id": {
"Description": "A session ID",
"Annotations": {
"IsAbout": {
"Label": "Unique session identifier"
}
},
"Identifies": "session"
}
},
"group": {
Expand Down
Loading

0 comments on commit b6d15e9

Please sign in to comment.