Skip to content

Commit

Permalink
read library selection, add doc
Browse files Browse the repository at this point in the history
  • Loading branch information
v-rocheleau committed Sep 14, 2023
1 parent d8d9a85 commit d030cb6
Show file tree
Hide file tree
Showing 6 changed files with 64 additions and 12 deletions.
15 changes: 15 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,21 @@ Katsu Metadata Service is a service to store epigenomic metadata.
7. Rest api service handles all generic functionality shared among other services


## Schemas
### Clinical Data

Katsu implements the [Phenopacket V1.0.0](https://phenopacket-schema.readthedocs.io/en/1.0.0/) schema for clinical data.

The schema definition for the phenopacket object is located in [chord_metadata_service/phenopackets/schemas.py](https://github.com/bento-platform/katsu/blob/4ab3c55d6052994ef69b188fb872261c47de24e0/chord_metadata_service/phenopackets/schemas.py#L336).

### Experiments

Katsu's experiments schemas are based on the IHEC [schema](https://github.com/IHEC/ihec-ecosystems/blob/master/docs/metadata/2.0/Ihec_metadata_specification.md#experiments), which is based on EBI/SRA schemas.

The value options for `library_strategy` and `library_selection` are read from [chord_metadata_service/ontologies/xsd/SRA.experiment.xsd.xml](./chord_metadata_service/ontologies/xsd/SRA.experiment.xsd.xml), downloaded from the EBI's [SRA v1.5 database](http://ftp.ebi.ac.uk/pub/databases/ena/doc/xsd/sra_1_5/).

The `SRA.experiment.xsd.xml` file is licensed under Apache License V2.0, the full copyright text is included in the file's header.

## REST API highlights

* Swagger schema docs can be found
Expand Down
31 changes: 26 additions & 5 deletions chord_metadata_service/experiments/migrations/0009_v4_1_0.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,38 @@
from typing import List
from django.db import migrations

LIB_STRATEGY_CONVERSIONS: List[tuple[str, str]] = [
# Convert WES -> WXS ...
("WES", "WXS"),
("Other", "OTHER")
]

def set_experiment_library_strategy(apps, _schema_editor):
LIB_SELECTION_CONVERIONS: List[tuple[str, str]] = [
("Random", "RANDOM"),
("Random PCR", "RANDOM PCR"),
("Exome capture", "other"), # 'Exome capture' no longer supported
("Other", "other"),
]

def set_experiment_library(apps, _schema_editor):
Experiment = apps.get_model("experiments", "Experiment")
for exp in Experiment.objects.filter(library_strategy="WES"):
exp.library_strategy = "WXS"
exp.save()
for (old_val, new_val) in LIB_STRATEGY_CONVERSIONS:
# Modify library_strategy if necessary
for exp in Experiment.objects.filter(library_strategy=old_val):
exp.library_strategy = new_val
exp.save()

for (old_val, new_val) in LIB_SELECTION_CONVERIONS:
# Modify library_selection if necessary
for exp in Experiment.objects.filter(library_selection=old_val):
exp.library_selection = new_val
exp.save()

class Migration(migrations.Migration):
dependencies = [
('experiments', '0007_v4_0_0'),
]

operations = [
migrations.RunPython(set_experiment_library_strategy)
migrations.RunPython(set_experiment_library)
]
17 changes: 12 additions & 5 deletions chord_metadata_service/experiments/schemas.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,21 @@
from .descriptions import EXPERIMENT, EXPERIMENT_RESULT, INSTRUMENT
from chord_metadata_service.restapi.schemas import ONTOLOGY_CLASS_LIST, KEY_VALUE_OBJECT
from chord_metadata_service.restapi.schema_utils import tag_ids_and_describe
from chord_metadata_service.ontologies import read_xsd_simple_type_values

from chord_metadata_service.ontologies import read_xsd_simple_type_values, SRA_EXPERIMENT_FILE_NAME

__all__ = ["EXPERIMENT_SCHEMA", "EXPERIMENT_RESULT_SCHEMA", "INSTRUMENT_SCHEMA"]

# Experiment library strategy options are read from the EBI xsd file
LIBRARY_STRATEGIES = read_xsd_simple_type_values(
'chord_metadata_service/ontologies/xsd/SRA.experiment.xsd.xml',
'typeLibraryStrategy',
SRA_EXPERIMENT_FILE_NAME,
"typeLibraryStrategy",
)


# Experiment library selection options are read from the EBI xsd file
LIBRARY_SELECTION = read_xsd_simple_type_values(
SRA_EXPERIMENT_FILE_NAME,
"typeLibrarySelection",
)

EXPERIMENT_RESULT_SCHEMA = tag_ids_and_describe({
Expand Down Expand Up @@ -118,7 +125,7 @@
},
"library_selection": {
"type": "string",
"enum": ["Random", "PCR", "Random PCR", "RT-PCR", "MF", "Exome capture", "Other"]
"enum": LIBRARY_SELECTION
},
"library_layout": {
"type": "string",
Expand Down
3 changes: 2 additions & 1 deletion chord_metadata_service/ontologies/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from .utils import read_xsd_simple_type_values
from .utils import read_xsd_simple_type_values, SRA_EXPERIMENT_FILE_NAME

__all__ = [
"read_xsd_simple_type_values",
"SRA_EXPERIMENT_FILE_NAME",
]
9 changes: 8 additions & 1 deletion chord_metadata_service/ontologies/utils.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,21 @@
import os
from typing import List
import xmltodict
from pathlib import Path

XSD_ONTOLOGIES_PATH = Path("chord_metadata_service/ontologies/xsd/")
SRA_EXPERIMENT_FILE_NAME = "SRA.experiment.xsd.xml"

def read_xsd_simple_type_values(xsd_file_path: str, type_name: str) -> List[str]:

def read_xsd_simple_type_values(xsd_file_name: str, type_name: str) -> List[str]:
"""Reads an XML Schema Definition (XSD) file and returns a type's values.
The XSD file is parsed using xmltodict following this spec:
https://www.xml.com/pub/a/2006/05/31/converting-between-xml-and-json.html
"""
xsd_file_path = os.path.join(XSD_ONTOLOGIES_PATH, xsd_file_name)
with open(xsd_file_path, 'r') as file:
xsd_file = file.read()

xsd_data = xmltodict.parse(xsd_file, namespaces={'xs': None})
simple_types = {sp["@name"]: sp for sp in xsd_data["schema"]["simpleType"]}
target_type = simple_types[type_name]
Expand Down
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ include = [
"chord_metadata_service/chord/tests/*.json",
"chord_metadata_service/dats/*",
"chord_metadata_service/mcode/tests/*.json",
"chord_metadata_service/ontologies/xsd/*.xml",
"chord_metadata_service/restapi/tests/*.json",
]
repository = "https://github.com/bento-platform/katsu"
Expand Down

0 comments on commit d030cb6

Please sign in to comment.