Skip to content

Commit

Permalink
feat: remove v from ontology_info.json (#196)
Browse files Browse the repository at this point in the history
## Reason for Change

Census currently uses x.x.x for [encoding the CxG dataset schema
version](https://github.com/chanzuckerberg/cellxgene-census/blob/main/docs/cellxgene_census_schema.md#census-metadata--census_objcensus_infosummary--somadataframe),
e.g. 5.0.0 whereas the service uses vx.x.x e.g. v5.0.0 I think we should
probably update the ontology service to adopt the Census semantics since
there are plenty of Census data releases out there (which are
immutable).

## Changes

- update the ontology_info.json to not require the leading V in the
cxg_schema version.
- Use get_latest_schema_version from the API in
all_ontology_generator.py.
- updated tests and notebooks.

## Testing steps

- All unit tests pass
- run all_ontology_generate.py locally using the update API and it
works.

## Notes for Reviewer
*Updates to Ontology Files / ontology-processing (push) * will fail
until we release the API. It also doesn't need to run for this PR.
  • Loading branch information
Bento007 authored Apr 11, 2024
1 parent 96c7213 commit a1292a3
Show file tree
Hide file tree
Showing 8 changed files with 54 additions and 75 deletions.
37 changes: 8 additions & 29 deletions api/python/notebooks/Python API.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,7 @@
"outputs": [],
"source": [
"# Init a CXGSchema object to track what ontology versions are supported\n",
"cxg_schema = CXGSchema(version=\"v5.0.0\")"
"cxg_schema = CXGSchema(version=\"5.0.0\")"
]
},
{
Expand All @@ -104,10 +104,7 @@
"end_time": "2024-04-11T18:01:55.813250Z",
"start_time": "2024-04-11T18:01:55.808657Z"
},
"collapsed": false,
"jupyter": {
"outputs_hidden": false
}
"collapsed": false
},
"outputs": [
{
Expand Down Expand Up @@ -135,10 +132,7 @@
"end_time": "2024-04-11T18:02:03.459302Z",
"start_time": "2024-04-11T18:02:03.454016Z"
},
"collapsed": false,
"jupyter": {
"outputs_hidden": false
}
"collapsed": false
},
"outputs": [
{
Expand All @@ -165,10 +159,7 @@
"end_time": "2024-04-11T18:02:04.680816Z",
"start_time": "2024-04-11T18:02:04.677350Z"
},
"collapsed": false,
"jupyter": {
"outputs_hidden": false
}
"collapsed": false
},
"outputs": [],
"source": [
Expand Down Expand Up @@ -567,10 +558,7 @@
"end_time": "2024-04-11T18:29:24.902116Z",
"start_time": "2024-04-11T18:29:24.897976Z"
},
"collapsed": false,
"jupyter": {
"outputs_hidden": false
}
"collapsed": false
},
"outputs": [
{
Expand All @@ -597,10 +585,7 @@
"end_time": "2024-04-11T18:29:49.816735Z",
"start_time": "2024-04-11T18:29:49.812574Z"
},
"collapsed": false,
"jupyter": {
"outputs_hidden": false
}
"collapsed": false
},
"outputs": [
{
Expand All @@ -627,10 +612,7 @@
"end_time": "2024-04-11T18:34:30.054568Z",
"start_time": "2024-04-11T18:34:29.903171Z"
},
"collapsed": false,
"jupyter": {
"outputs_hidden": false
}
"collapsed": false
},
"outputs": [
{
Expand Down Expand Up @@ -970,10 +952,7 @@
"execution_count": null,
"id": "6056479e1dc03237",
"metadata": {
"collapsed": false,
"jupyter": {
"outputs_hidden": false
}
"collapsed": false
},
"outputs": [],
"source": []
Expand Down
33 changes: 21 additions & 12 deletions api/python/src/cellxgene_ontology_guide/supported_versions.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,13 +28,20 @@ def get_latest_schema_version(versions: List[str]) -> str:
"""Given a list of schema versions, return the latest version.
:param versions: List[str] list of schema versions. Versions can be in the format "v5.0.0" or "5.0.0"
:return: str latest version with a "v" prefix
:return: str latest version without the leading "v"
"""

def _coerce(v: str) -> Version:
return Version.coerce(v[1:]) if v[0] == "v" else Version.coerce(v)
return str(sorted([coerce_version(version) for version in versions])[-1])

return "v" + str(sorted([_coerce(version) for version in versions])[-1])

def coerce_version(version: str) -> Version:
"""Coerce a version string into a semantic_version.Version object.
:param version: str version string to coerce
:return: Version coerced version object
"""
v = version[1:] if version[0] == "v" else version
return Version.coerce(v)


def load_supported_versions() -> Any:
Expand All @@ -60,18 +67,20 @@ def __init__(self, version: Optional[str] = None):
"""
ontology_info = load_supported_versions()
if version is None:
version = get_latest_schema_version(ontology_info.keys())
elif version not in ontology_info:
raise ValueError(f"Schema version {version} is not supported in this package version.")

self.version = version
self.supported_ontologies = ontology_info[version]["ontologies"]
_version = get_latest_schema_version(ontology_info.keys())
else:
_version = str(coerce_version(version))
if str(_version) not in ontology_info:
raise ValueError(f"Schema version {_version} is not supported in this package version.")

self.version = _version
self.supported_ontologies = ontology_info[_version]["ontologies"]
self.ontology_file_names: Dict[str, str] = {}
self.deprecated_on = ontology_info[version].get("deprecated_on")
self.deprecated_on = ontology_info[_version].get("deprecated_on")
if self.deprecated_on:
parsed_date = datetime.strptime(self.deprecated_on, "%Y-%m-%d")
warnings.warn(
f"Schema version {version} is deprecated as of {parsed_date}. It will be removed in a future version.",
f"Schema version {_version} is deprecated as of {parsed_date}. It will be removed in a future version.",
DeprecationWarning,
stacklevel=1,
)
Expand Down
4 changes: 1 addition & 3 deletions api/python/tests/test_ontology_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,9 +43,7 @@ def ontology_dict():
@pytest.fixture
def mock_CXGSchema(ontology_dict, mock_load_supported_versions, mock_load_ontology_file):
mock_load_supported_versions.return_value = {
"v5.0.0": {
"ontologies": {"CL": {"version": "2024-01-01", "source": "http://example.com", "filename": "cl.owl"}}
}
"5.0.0": {"ontologies": {"CL": {"version": "2024-01-01", "source": "http://example.com", "filename": "cl.owl"}}}
}
cxg_schema = CXGSchema()
cxg_schema.ontology_file_names = {"CL": "CL-ontology-2024-01-01.json.gz"}
Expand Down
34 changes: 20 additions & 14 deletions api/python/tests/test_supported_versions.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
@pytest.fixture
def initialized_CXGSchemaInfo(mock_load_supported_versions):
mock_load_supported_versions.return_value = {
"v5.0.0": {
"5.0.0": {
"ontologies": {"CL": {"version": "v2024-01-01", "source": "http://example.com", "filename": "cl.owl"}}
}
}
Expand All @@ -26,7 +26,7 @@ def initialized_CXGSchemaInfo(mock_load_supported_versions):

@pytest.mark.parametrize("versions, expected", [(["v5.0.0", "v0.0.1"], "v5.0.0"), (["5.0.0", "0.0.1"], "v5.0.0")])
def test__get_latest_schema_version__OK(versions, expected):
assert get_latest_schema_version(versions) == "v5.0.0"
assert get_latest_schema_version(versions) == "5.0.0"


@pytest.fixture
Expand Down Expand Up @@ -71,36 +71,42 @@ def test__load_supported_versions__OK(tmpdir):
assert load_supported_versions() == file_contents


@pytest.mark.parametrize("version, expected", [("v5.0.0", "5.0.0"), ("5.0.0", "5.0.0")])
def test_coerce_version(version, expected):
assert get_latest_schema_version([version]) == expected


class TestCXGSchema:
def test__init__defaults(self, mock_load_supported_versions):
support_versions = {"v5.0.0": {"ontologies": {}}, "v0.0.1": {"ontologies": {}}}
support_versions = {"5.0.0": {"ontologies": {}}, "0.0.1": {"ontologies": {}}}
mock_load_supported_versions.return_value = support_versions
cxgs = CXGSchema()
assert cxgs.version == "v5.0.0"
assert cxgs.supported_ontologies == support_versions["v5.0.0"]["ontologies"]
assert cxgs.version == "5.0.0"
assert cxgs.supported_ontologies == support_versions["5.0.0"]["ontologies"]

def test__init__specific_version(self, mock_load_supported_versions):
support_versions = {"v5.0.0": {"ontologies": {}}, "v0.0.1": {"ontologies": {}}}
@pytest.mark.parametrize("version", ["v0.0.1", "0.0.1"])
def test__init__specific_version(self, version, mock_load_supported_versions):
support_versions = {"5.0.0": {"ontologies": {}}, "0.0.1": {"ontologies": {}}}
mock_load_supported_versions.return_value = support_versions
cxgs = CXGSchema(version="v0.0.1")
assert cxgs.version == "v0.0.1"
assert cxgs.supported_ontologies == support_versions["v0.0.1"]["ontologies"]
cxgs = CXGSchema(version=version)
assert cxgs.version == "0.0.1"
assert cxgs.supported_ontologies == support_versions["0.0.1"]["ontologies"]

def test__init__deprecated_version(self, mock_load_supported_versions):
support_versions = {"v5.0.0": {"ontologies": {}}, "v0.0.1": {"ontologies": {}, "deprecated_on": "2024-01-01"}}
support_versions = {"5.0.0": {"ontologies": {}}, "0.0.1": {"ontologies": {}, "deprecated_on": "2024-01-01"}}
mock_load_supported_versions.return_value = support_versions
# catch the deprecation warning
with pytest.warns(DeprecationWarning) as record:
CXGSchema(version="v0.0.1")
CXGSchema(version="0.0.1")
warning = record.pop()
assert warning.message.args[0] == (
"Schema version v0.0.1 is deprecated as of 2024-01-01 00:00:00. It will be removed in a " "future version."
"Schema version 0.0.1 is deprecated as of 2024-01-01 00:00:00. It will be removed in a " "future version."
)

def test__init__unsupported_version(self, mock_load_supported_versions):
mock_load_supported_versions.return_value = {}
with pytest.raises(ValueError):
CXGSchema(version="v5.0.1")
CXGSchema(version="5.0.1")

def test__ontology__unsupported_ontology_by_package(self, initialized_CXGSchemaInfo, mock_load_ontology_file):
with pytest.raises(ValueError):
Expand Down
2 changes: 1 addition & 1 deletion asset-schemas/ontology_info_schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
"description": "A schema for the set of valid ontology reference files mapping to a CZ CellXGene Dataset Schema Versions",
"type": "object",
"patternProperties": {
"^v[0-9]+\\.[0-9]+\\.[0-9]+$": {
"^[0-9]+\\.[0-9]+\\.[0-9]+$": {
"description": "The version of CellxGene schema that maps to this set of ontology versions",
"type": "object",
"properties": {
Expand Down
2 changes: 1 addition & 1 deletion ontology-assets/ontology_info.json
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
{
"v5.0.0": {
"5.0.0": {
"ontologies": {
"CL": {
"version": "v2024-01-04",
Expand Down
8 changes: 2 additions & 6 deletions tools/ontology-builder/src/all_ontology_generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,14 +12,10 @@

import env
import owlready2
import semantic_version
from cellxgene_ontology_guide.supported_versions import get_latest_schema_version
from validate_json_schemas import register_schemas, verify_json


def _get_latest_version(versions: List[str]) -> str:
return "v" + str(sorted([semantic_version.Version.coerce(version[1:]) for version in versions])[-1])


def get_ontology_info_file(ontology_info_file: str = env.ONTOLOGY_INFO_FILE) -> Any:
"""
Read ontology information from file
Expand Down Expand Up @@ -345,7 +341,7 @@ def list_expired_cellxgene_schema_version(ontology_info: Dict[str, Any]) -> List
if __name__ == "__main__":
logging.basicConfig(level=logging.INFO)
ontology_info = get_ontology_info_file()
current_version = _get_latest_version(ontology_info.keys())
current_version = get_latest_schema_version(ontology_info.keys())
latest_ontology_version = ontology_info[current_version]
latest_ontologies = latest_ontology_version["ontologies"]
_download_ontologies(latest_ontologies)
Expand Down
9 changes: 0 additions & 9 deletions tools/ontology-builder/tests/test_all_ontology_generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@
import pytest
from all_ontology_generator import (
_download_ontologies,
_get_latest_version,
_parse_ontologies,
deprecate_previous_cellxgene_schema_versions,
get_ontology_info_file,
Expand Down Expand Up @@ -46,14 +45,6 @@ def mock_raw_ontology_dir(tmpdir):
return str(sub_dir)


def test_get_latest_version():
# Call the function
latest_version = _get_latest_version(versions=["v1", "v2.0", "v3.0.0", "v3.0.1", "v3.1.0"])

# Assertion
assert latest_version == "v3.1.0"


def test_get_ontology_info_file_default(mock_ontology_info_file):
# Call the function
ontology_info = get_ontology_info_file(ontology_info_file=mock_ontology_info_file)
Expand Down

0 comments on commit a1292a3

Please sign in to comment.