Skip to content

Commit

Permalink
fix: individual gender and vital status ingestion
Browse files Browse the repository at this point in the history
  • Loading branch information
v-rocheleau committed Apr 24, 2024
1 parent dc98d37 commit 0fd53c6
Show file tree
Hide file tree
Showing 7 changed files with 70 additions and 3 deletions.
13 changes: 11 additions & 2 deletions chord_metadata_service/chord/ingest/phenopackets.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from chord_metadata_service.phenopackets import models as pm
from chord_metadata_service.phenopackets.schemas import PHENOPACKET_SCHEMA, VRS_REF_REGISTRY
from chord_metadata_service.phenopackets.utils import time_element_to_years
from chord_metadata_service.patients.models import VitalStatus
from chord_metadata_service.patients.values import KaryotypicSex
from chord_metadata_service.restapi.schema_utils import patch_project_schemas
from chord_metadata_service.restapi.types import ExtensionSchemaDict
Expand Down Expand Up @@ -93,7 +94,7 @@ def update_or_create_subject(subject: dict) -> pm.Individual:
# - Be a bit flexible with the subject date_of_birth field for Signature; convert blank strings to None.
subject["date_of_birth"] = subject.get("date_of_birth") or None
subject_query = query_and_check_nulls(subject, "date_of_birth", transform=isoparse)
for k in ("alternate_ids", "time_at_last_encounter", "sex", "taxonomy"):
for k in ("alternate_ids", "time_at_last_encounter", "sex", "taxonomy", "gender"):
subject_query.update(query_and_check_nulls(subject, k))

# --------------------------------------------------------------------------------------------------------------
Expand All @@ -103,13 +104,19 @@ def update_or_create_subject(subject: dict) -> pm.Individual:
if "time_at_last_encounter" in subject:
age_numeric_value, age_unit_value = time_element_to_years(subject["time_at_last_encounter"])

vital_status: VitalStatus | None = None
if vital_status_data := subject.get("vital_status"):
vital_status, _ = VitalStatus.objects.get_or_create(**vital_status_data)

# Check if subject already exists
existing_extra_properties: dict[str, Any]
try:
existing_subject = pm.Individual.objects.get(id=subject["id"])
existing_extra_properties = existing_subject.extra_properties
existing_vital_status = existing_subject.vital_status
except pm.Individual.DoesNotExist:
existing_extra_properties = extra_properties
existing_vital_status = vital_status
pass

# --------------------------------------------------------------------------------------------------------------
Expand All @@ -121,12 +128,14 @@ def update_or_create_subject(subject: dict) -> pm.Individual:
age_numeric=age_numeric_value,
age_unit=age_unit_value if age_unit_value else "",
extra_properties=existing_extra_properties,
vital_status=existing_vital_status,
**subject_query
)

if not subject_obj_created:
# Add any new extra properties to subject if they already exist
# Add any new extra properties or vital status change to subject if they already exist
subject_obj.extra_properties = extra_properties
subject_obj.vital_status = vital_status
subject_obj.save()

return subject_obj
Expand Down
15 changes: 15 additions & 0 deletions chord_metadata_service/chord/tests/example_phenopacket_2_v2.json
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,21 @@
"date_of_birth": "1964-03-15T00:00:00Z",
"sex": "MALE",
"karyotypic_sex": "UNKNOWN_KARYOTYPE",
"gender": {
"id": "SIG:ID",
"label": "a self-identified gender"
},
"vital_status": {
"status": "DECEASED",
"time_of_death": {
"timestamp": "2024-03-15T00:00:00Z"
},
"cause_of_death": {
"id": "NCIT:C36263",
"label": "Metastatic Malignant Neoplasm"
},
"survival_time_in_days": 800
},
"extra_properties": {
"cool_guy": true,
"smoker": false,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,13 @@
"date_of_birth": "1964-03-15T00:00:00Z",
"sex": "MALE",
"karyotypic_sex": "UNKNOWN_KARYOTYPE",
"gender": {
"id": "SIG:ID",
"label": "a self-identified gender"
},
"vital_status": {
"status": "ALIVE"
},
"extra_properties": {
"cool_guy": true,
"smoker": false,
Expand Down
14 changes: 13 additions & 1 deletion chord_metadata_service/chord/tests/test_ingest.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,11 +117,17 @@ def test_ingesting_phenopackets_json(self):
self.assert_model_fields_equal(
db_obj=p.subject,
ground_truth=EXAMPLE_INGEST_PHENOPACKET["subject"],
ignore_fields=IGNORE_COMMON_FIELDS + ["date_of_birth"] # DOB needs parsing
ignore_fields=IGNORE_COMMON_FIELDS + ["date_of_birth", "vital_status"] # DOB needs parsing
)
self.assertIn("__computed", EXAMPLE_INGEST_PHENOPACKET["subject"]["extra_properties"])
self.assertNotIn("__computed", p.subject.extra_properties) # Explicitly test computed extra_properties

self.assert_model_fields_equal(
db_obj=p.subject.vital_status,
ground_truth=EXAMPLE_INGEST_PHENOPACKET["subject"]["vital_status"],
ignore_fields=IGNORE_COMMON_FIELDS
)

# Phenotypic Features
pfs = list(p.phenotypic_features.all().order_by("created"))
self.assert_model_fields_list_equal(
Expand Down Expand Up @@ -209,6 +215,12 @@ def test_reingesting_updating_phenopackets_json(self):
for m1, m2 in zip(p.meta_data.resources.all().order_by("id"), p2.meta_data.resources.all().order_by("id")):
self.assertEqual(m1.id, m2.id)

self.assert_model_fields_equal(
p2.subject.vital_status,
ground_truth=EXAMPLE_INGEST_PHENOPACKET_UPDATE["subject"]["vital_status"],
ignore_fields=IGNORE_COMMON_FIELDS
)

def test_phenopackets_validation(self):
# check invalid phenopacket, must fail validation & validate_phenopacket must raise

Expand Down
3 changes: 3 additions & 0 deletions chord_metadata_service/discovery/tests/test_censorship.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ class CensorshipGetThresholdTest(TestCase):
def test_get_threshold_no_censorship(self):
self.assertEqual(get_threshold(low_counts_censored=False), 0)

@override_settings(CONFIG_PUBLIC={})
def test_get_threshold_no_config(self): # no public config configured
self.assertEqual(get_threshold(low_counts_censored=True), sys.maxsize)

Expand All @@ -28,6 +29,7 @@ def test_get_threshold_configured(self):
def test_thresholded_count_no_censorship(self):
self.assertEqual(thresholded_count(1, low_counts_censored=False), 1)

@override_settings(CONFIG_PUBLIC={})
def test_thresholded_count_no_config(self): # no public config configured
self.assertEqual(thresholded_count(100000, low_counts_censored=True), 0)

Expand All @@ -44,6 +46,7 @@ class CensorshipGetMaxQueryParametersTest(TestCase):
def test_get_max_query_parameters_no_censorship(self):
self.assertEqual(get_max_query_parameters(low_counts_censored=False), sys.maxsize)

@override_settings(CONFIG_PUBLIC={})
def test_get_max_query_parameters_no_config(self):
self.assertEqual(get_max_query_parameters(low_counts_censored=True), 0)

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
# Generated by Django 4.2.11 on 2024-04-24 19:52

import chord_metadata_service.restapi.validators
from django.db import migrations, models


class Migration(migrations.Migration):

dependencies = [
('patients', '0006_v6_0_0'),
]

operations = [
migrations.AddField(
model_name='individual',
name='gender',
field=models.JSONField(blank=True, help_text='Self-identified gender', null=True, validators=[chord_metadata_service.restapi.validators.JsonSchemaValidator({'$id': '/chord_metadata_service/restapi/ontology_class', '$schema': 'http://json-schema.org/draft-07/schema#', 'additionalProperties': False, 'description': 'An ontology term.', 'help': 'An ontology term.', 'properties': {'id': {'$id': '/chord_metadata_service/restapi/ontology_class/id', 'description': 'A CURIE-style identifier for an ontology term.', 'help': 'A CURIE-style identifier for an ontology term.', 'type': 'string'}, 'label': {'$id': '/chord_metadata_service/restapi/ontology_class/label', 'description': 'A human readable class name for an ontology term.', 'help': 'A human readable class name for an ontology term.', 'type': 'string'}}, 'required': ['id', 'label'], 'title': 'Ontology class schema', 'type': 'object'}, formats=None)]),
),
]
2 changes: 2 additions & 0 deletions chord_metadata_service/patients/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,8 @@ def get_project_id(self) -> str | None:
help_text='The karyotypic sex of the individual.')
taxonomy = JSONField(blank=True, null=True, validators=[ontology_validator],
help_text='Ontology resource representing the species (e.g., NCBITaxon:9615).')
gender = JSONField(blank=True, null=True, validators=[ontology_validator],
help_text='Self-identified gender')

# FHIR specific
active = models.BooleanField(default=False, help_text='Whether this patient\'s record is in active use.')
Expand Down

0 comments on commit 0fd53c6

Please sign in to comment.