From cfb7c1f5bb1d34aa1919c4aea5c53e041c13ec29 Mon Sep 17 00:00:00 2001 From: Victor Rocheleau Date: Thu, 18 Apr 2024 19:00:40 +0000 Subject: [PATCH 01/38] schemas --- chord_metadata_service/chord/models.py | 3 + chord_metadata_service/chord/schemas.py | 88 ++++++++++++++++++++++++- 2 files changed, 90 insertions(+), 1 deletion(-) diff --git a/chord_metadata_service/chord/models.py b/chord_metadata_service/chord/models.py index 3b9168125..1e15da057 100644 --- a/chord_metadata_service/chord/models.py +++ b/chord_metadata_service/chord/models.py @@ -6,6 +6,7 @@ from chord_metadata_service.patients.models import Individual from chord_metadata_service.phenopackets.models import Biosample, Phenopacket from chord_metadata_service.resources.models import Resource +from chord_metadata_service.restapi.validators import JsonSchemaValidator from ..restapi.models import SchemaType @@ -34,6 +35,8 @@ class Project(models.Model): created = models.DateTimeField(auto_now_add=True) updated = models.DateTimeField(auto_now=True) + discovery = models.JSONField(blank=True, null=True, help_text="Discovery configuration", + validators=[JsonSchemaValidator()]) def __str__(self): return f"{self.title} (ID: {self.identifier})" diff --git a/chord_metadata_service/chord/schemas.py b/chord_metadata_service/chord/schemas.py index 89a225b79..b033353ce 100644 --- a/chord_metadata_service/chord/schemas.py +++ b/chord_metadata_service/chord/schemas.py @@ -1,5 +1,7 @@ from pathlib import Path -from chord_metadata_service.restapi.schema_utils import get_schema_app_id, sub_schema_uri +from chord_metadata_service.restapi.schema_utils import ( + get_schema_app_id, sub_schema_uri, array_of, base_type, SchemaTypes, enum_of +) # e.g. PATCH # { @@ -57,3 +59,87 @@ "required": ["object_type", "object_id", "format"], "additionalProperties": False } + +DISCOVERY_FIELD_SCHEMA = { + "$id": sub_schema_uri(base_uri, "discovery_named_fields"), + "description": "Field configuration", + "type": "object", + "properties": { + "mapping": base_type(SchemaTypes.STRING), + "mapping_for_search_filter": base_type(SchemaTypes.STRING), + "title": base_type(SchemaTypes.STRING), + "description": base_type(SchemaTypes.STRING), + "datatype": enum_of(["number", "string"]), + "config": { + "type": "object", + "properties": { + # datatype == string + "enum": array_of(base_type(SchemaTypes.STRING)), + # datatype == number + "bins": array_of(base_type(SchemaTypes.NUMBER)), + "bin_size": base_type(SchemaTypes.NUMBER), + "taper_left": base_type(SchemaTypes.NUMBER), + "taper_right": base_type(SchemaTypes.NUMBER), + "units": base_type(SchemaTypes.STRING), + "minimum": base_type(SchemaTypes.NUMBER), + "maximum": base_type(SchemaTypes.NUMBER), + # JSONField array specific + "group_by": base_type(SchemaTypes.STRING), + "group_by_value": base_type(SchemaTypes.STRING), + "value_mapping": base_type(SchemaTypes.STRING), + } + } + }, + "additionalProperties": False +} + +DISCOVERY_NAMED_FIELDS_SCHEMA = { + "$id": sub_schema_uri(base_uri, "discovery_named_fields"), + "description": "Intermediate schema, enforces field schema with flexible names.", + "type": "object", + "patternProperties": { + "^.*$": { DISCOVERY_FIELD_SCHEMA } + }, + "additionalProperties": False +} + +DISCOVERY_OVERVIEW_CHART_SCHEMA = { + "$id": sub_schema_uri(base_uri, "discovery_overview_chart"), + "description": "Associates a field name with a chart type for overview display", + "type": "object", + "properties": { + "field": base_type(SchemaTypes.STRING), + "chart_type": enum_of(["bar", "pie"]) + }, + "additionalProperties": False +} + +DISCOVERY_OVERVIEW_SCHEMA = { + "$id": sub_schema_uri(base_uri, "discovery_overview"), + "description": "An overview section containing charts", + "type": "object", + "properties": { + "section_title": base_type(SchemaTypes.STRING), + "charts": array_of(DISCOVERY_OVERVIEW_CHART_SCHEMA) + }, + "additionalProperties": False +} + +DISCOVERY_SCHEMA = { + "$id": sub_schema_uri(base_uri, "discovery"), + "description": "Discovery configuration for public fields/search", + "type": "object", + "properties": { + "overview": array_of(DISCOVERY_OVERVIEW_SCHEMA, "List of overview sections"), + "search": array_of(), + "fields": DISCOVERY_NAMED_FIELDS_SCHEMA, + "rules": { + "type": "object", + "properties": { + "count_threshold": base_type(SchemaTypes.INTEGER), + "max_query_parameters": base_type(SchemaTypes.INTEGER) + } + } + }, + "additionalProperties": False +} From 392e253c251346f4a5a858ad2582df75e85a7bc4 Mon Sep 17 00:00:00 2001 From: Victor Rocheleau Date: Thu, 18 Apr 2024 20:36:45 +0000 Subject: [PATCH 02/38] discovery schemas and fields --- ...008_dataset_discovery_project_discovery.py | 24 +++++ chord_metadata_service/chord/models.py | 7 +- chord_metadata_service/chord/schemas.py | 88 +-------------- chord_metadata_service/discovery/schemas.py | 101 ++++++++++++++++++ .../discovery/tests/test_censorship.py | 3 + 5 files changed, 134 insertions(+), 89 deletions(-) create mode 100644 chord_metadata_service/chord/migrations/0008_dataset_discovery_project_discovery.py create mode 100644 chord_metadata_service/discovery/schemas.py diff --git a/chord_metadata_service/chord/migrations/0008_dataset_discovery_project_discovery.py b/chord_metadata_service/chord/migrations/0008_dataset_discovery_project_discovery.py new file mode 100644 index 000000000..2aca2d86f --- /dev/null +++ b/chord_metadata_service/chord/migrations/0008_dataset_discovery_project_discovery.py @@ -0,0 +1,24 @@ +# Generated by Django 4.2.11 on 2024-04-18 20:31 + +import chord_metadata_service.restapi.validators +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('chord', '0007_v7_0_0'), + ] + + operations = [ + migrations.AddField( + model_name='dataset', + name='discovery', + field=models.JSONField(blank=True, help_text='Discovery configuration', null=True, validators=[chord_metadata_service.restapi.validators.JsonSchemaValidator({'$id': '/chord_metadata_service/discovery/discovery', 'additionalProperties': False, 'description': 'Discovery configuration for public fields/search', 'properties': {'fields': {'$id': '/chord_metadata_service/discovery/discovery_named_fields', 'additionalProperties': False, 'description': 'Intermediate schema, enforces field schema with flexible names.', 'patternProperties': {'^.*$': {'$id': '/chord_metadata_service/discovery/discovery_named_fields', 'additionalProperties': False, 'description': 'Field configuration', 'properties': {'config': {'properties': {'bin_size': {'type': 'number'}, 'bins': {'items': {'type': 'number'}, 'type': 'array'}, 'enum': {'items': {'type': 'string'}, 'type': 'array'}, 'group_by': {'type': 'string'}, 'group_by_value': {'type': 'string'}, 'maximum': {'type': 'number'}, 'minimum': {'type': 'number'}, 'taper_left': {'type': 'number'}, 'taper_right': {'type': 'number'}, 'units': {'type': 'string'}, 'value_mapping': {'type': 'string'}}, 'type': 'object'}, 'datatype': {'enum': ['number', 'string'], 'type': 'string'}, 'description': {'type': 'string'}, 'mapping': {'type': 'string'}, 'mapping_for_search_filter': {'type': 'string'}, 'title': {'type': 'string'}}, 'type': 'object'}}, 'type': 'object'}, 'overview': {'description': 'List of overview sections', 'items': {'$id': '/chord_metadata_service/discovery/discovery_overview', 'additionalProperties': False, 'description': 'An overview section containing charts', 'properties': {'charts': {'items': {'$id': '/chord_metadata_service/discovery/discovery_overview_chart', 'additionalProperties': False, 'description': 'Associates a field name with a chart type for overview display', 'properties': {'chart_type': {'enum': ['bar', 'pie'], 'type': 'string'}, 'field': {'type': 'string'}}, 'type': 'object'}, 'type': 'array'}, 'section_title': {'type': 'string'}}, 'type': 'object'}, 'type': 'array'}, 'rules': {'properties': {'count_threshold': {'type': 'integer'}, 'max_query_parameters': {'type': 'integer'}}, 'type': 'object'}, 'search': {'items': {'$id': '/chord_metadata_service/discovery/discovery_search', 'additionalProperties': False, 'description': 'Groups search fields by section.', 'properties': {'fields': {'items': {'type': 'string'}, 'type': 'array'}, 'section_title': {'type': 'string'}}, 'type': 'object'}, 'type': 'array'}}, 'type': 'object'}, formats=None)]), + ), + migrations.AddField( + model_name='project', + name='discovery', + field=models.JSONField(blank=True, help_text='Discovery configuration', null=True, validators=[chord_metadata_service.restapi.validators.JsonSchemaValidator({'$id': '/chord_metadata_service/discovery/discovery', 'additionalProperties': False, 'description': 'Discovery configuration for public fields/search', 'properties': {'fields': {'$id': '/chord_metadata_service/discovery/discovery_named_fields', 'additionalProperties': False, 'description': 'Intermediate schema, enforces field schema with flexible names.', 'patternProperties': {'^.*$': {'$id': '/chord_metadata_service/discovery/discovery_named_fields', 'additionalProperties': False, 'description': 'Field configuration', 'properties': {'config': {'properties': {'bin_size': {'type': 'number'}, 'bins': {'items': {'type': 'number'}, 'type': 'array'}, 'enum': {'items': {'type': 'string'}, 'type': 'array'}, 'group_by': {'type': 'string'}, 'group_by_value': {'type': 'string'}, 'maximum': {'type': 'number'}, 'minimum': {'type': 'number'}, 'taper_left': {'type': 'number'}, 'taper_right': {'type': 'number'}, 'units': {'type': 'string'}, 'value_mapping': {'type': 'string'}}, 'type': 'object'}, 'datatype': {'enum': ['number', 'string'], 'type': 'string'}, 'description': {'type': 'string'}, 'mapping': {'type': 'string'}, 'mapping_for_search_filter': {'type': 'string'}, 'title': {'type': 'string'}}, 'type': 'object'}}, 'type': 'object'}, 'overview': {'description': 'List of overview sections', 'items': {'$id': '/chord_metadata_service/discovery/discovery_overview', 'additionalProperties': False, 'description': 'An overview section containing charts', 'properties': {'charts': {'items': {'$id': '/chord_metadata_service/discovery/discovery_overview_chart', 'additionalProperties': False, 'description': 'Associates a field name with a chart type for overview display', 'properties': {'chart_type': {'enum': ['bar', 'pie'], 'type': 'string'}, 'field': {'type': 'string'}}, 'type': 'object'}, 'type': 'array'}, 'section_title': {'type': 'string'}}, 'type': 'object'}, 'type': 'array'}, 'rules': {'properties': {'count_threshold': {'type': 'integer'}, 'max_query_parameters': {'type': 'integer'}}, 'type': 'object'}, 'search': {'items': {'$id': '/chord_metadata_service/discovery/discovery_search', 'additionalProperties': False, 'description': 'Groups search fields by section.', 'properties': {'fields': {'items': {'type': 'string'}, 'type': 'array'}, 'section_title': {'type': 'string'}}, 'type': 'object'}, 'type': 'array'}}, 'type': 'object'}, formats=None)]), + ), + ] diff --git a/chord_metadata_service/chord/models.py b/chord_metadata_service/chord/models.py index 1e15da057..f09add395 100644 --- a/chord_metadata_service/chord/models.py +++ b/chord_metadata_service/chord/models.py @@ -7,7 +7,8 @@ from chord_metadata_service.phenopackets.models import Biosample, Phenopacket from chord_metadata_service.resources.models import Resource from chord_metadata_service.restapi.validators import JsonSchemaValidator -from ..restapi.models import SchemaType +from chord_metadata_service.restapi.models import SchemaType +from chord_metadata_service.discovery.schemas import DISCOVERY_SCHEMA __all__ = ["Project", "Dataset", "ProjectJsonSchema"] @@ -36,7 +37,7 @@ class Project(models.Model): created = models.DateTimeField(auto_now_add=True) updated = models.DateTimeField(auto_now=True) discovery = models.JSONField(blank=True, null=True, help_text="Discovery configuration", - validators=[JsonSchemaValidator()]) + validators=[JsonSchemaValidator(DISCOVERY_SCHEMA)]) def __str__(self): return f"{self.title} (ID: {self.identifier})" @@ -152,6 +153,8 @@ def resources(self): extra_properties = models.JSONField(blank=True, null=True, help_text="Extra properties that do not fit in the previous " "specified attributes.") + discovery = models.JSONField(blank=True, null=True, help_text="Discovery configuration", + validators=[JsonSchemaValidator(DISCOVERY_SCHEMA)]) # ------------------------------------------------------------------------- diff --git a/chord_metadata_service/chord/schemas.py b/chord_metadata_service/chord/schemas.py index b033353ce..89a225b79 100644 --- a/chord_metadata_service/chord/schemas.py +++ b/chord_metadata_service/chord/schemas.py @@ -1,7 +1,5 @@ from pathlib import Path -from chord_metadata_service.restapi.schema_utils import ( - get_schema_app_id, sub_schema_uri, array_of, base_type, SchemaTypes, enum_of -) +from chord_metadata_service.restapi.schema_utils import get_schema_app_id, sub_schema_uri # e.g. PATCH # { @@ -59,87 +57,3 @@ "required": ["object_type", "object_id", "format"], "additionalProperties": False } - -DISCOVERY_FIELD_SCHEMA = { - "$id": sub_schema_uri(base_uri, "discovery_named_fields"), - "description": "Field configuration", - "type": "object", - "properties": { - "mapping": base_type(SchemaTypes.STRING), - "mapping_for_search_filter": base_type(SchemaTypes.STRING), - "title": base_type(SchemaTypes.STRING), - "description": base_type(SchemaTypes.STRING), - "datatype": enum_of(["number", "string"]), - "config": { - "type": "object", - "properties": { - # datatype == string - "enum": array_of(base_type(SchemaTypes.STRING)), - # datatype == number - "bins": array_of(base_type(SchemaTypes.NUMBER)), - "bin_size": base_type(SchemaTypes.NUMBER), - "taper_left": base_type(SchemaTypes.NUMBER), - "taper_right": base_type(SchemaTypes.NUMBER), - "units": base_type(SchemaTypes.STRING), - "minimum": base_type(SchemaTypes.NUMBER), - "maximum": base_type(SchemaTypes.NUMBER), - # JSONField array specific - "group_by": base_type(SchemaTypes.STRING), - "group_by_value": base_type(SchemaTypes.STRING), - "value_mapping": base_type(SchemaTypes.STRING), - } - } - }, - "additionalProperties": False -} - -DISCOVERY_NAMED_FIELDS_SCHEMA = { - "$id": sub_schema_uri(base_uri, "discovery_named_fields"), - "description": "Intermediate schema, enforces field schema with flexible names.", - "type": "object", - "patternProperties": { - "^.*$": { DISCOVERY_FIELD_SCHEMA } - }, - "additionalProperties": False -} - -DISCOVERY_OVERVIEW_CHART_SCHEMA = { - "$id": sub_schema_uri(base_uri, "discovery_overview_chart"), - "description": "Associates a field name with a chart type for overview display", - "type": "object", - "properties": { - "field": base_type(SchemaTypes.STRING), - "chart_type": enum_of(["bar", "pie"]) - }, - "additionalProperties": False -} - -DISCOVERY_OVERVIEW_SCHEMA = { - "$id": sub_schema_uri(base_uri, "discovery_overview"), - "description": "An overview section containing charts", - "type": "object", - "properties": { - "section_title": base_type(SchemaTypes.STRING), - "charts": array_of(DISCOVERY_OVERVIEW_CHART_SCHEMA) - }, - "additionalProperties": False -} - -DISCOVERY_SCHEMA = { - "$id": sub_schema_uri(base_uri, "discovery"), - "description": "Discovery configuration for public fields/search", - "type": "object", - "properties": { - "overview": array_of(DISCOVERY_OVERVIEW_SCHEMA, "List of overview sections"), - "search": array_of(), - "fields": DISCOVERY_NAMED_FIELDS_SCHEMA, - "rules": { - "type": "object", - "properties": { - "count_threshold": base_type(SchemaTypes.INTEGER), - "max_query_parameters": base_type(SchemaTypes.INTEGER) - } - } - }, - "additionalProperties": False -} diff --git a/chord_metadata_service/discovery/schemas.py b/chord_metadata_service/discovery/schemas.py new file mode 100644 index 000000000..16b24e1c3 --- /dev/null +++ b/chord_metadata_service/discovery/schemas.py @@ -0,0 +1,101 @@ +from pathlib import Path +from chord_metadata_service.restapi.schema_utils import ( + get_schema_app_id, sub_schema_uri, array_of, base_type, SchemaTypes, enum_of +) + +base_uri = get_schema_app_id(Path(__file__).parent.name) + +DISCOVERY_FIELD_SCHEMA = { + "$id": sub_schema_uri(base_uri, "discovery_named_fields"), + "description": "Field configuration", + "type": "object", + "properties": { + "mapping": base_type(SchemaTypes.STRING), + "mapping_for_search_filter": base_type(SchemaTypes.STRING), + "title": base_type(SchemaTypes.STRING), + "description": base_type(SchemaTypes.STRING), + "datatype": enum_of(["number", "string"]), + "config": { + "type": "object", + "properties": { + # datatype == string + "enum": array_of(base_type(SchemaTypes.STRING)), + # datatype == number + "bins": array_of(base_type(SchemaTypes.NUMBER)), + "bin_size": base_type(SchemaTypes.NUMBER), + "taper_left": base_type(SchemaTypes.NUMBER), + "taper_right": base_type(SchemaTypes.NUMBER), + "units": base_type(SchemaTypes.STRING), + "minimum": base_type(SchemaTypes.NUMBER), + "maximum": base_type(SchemaTypes.NUMBER), + # JSONField array specific + "group_by": base_type(SchemaTypes.STRING), + "group_by_value": base_type(SchemaTypes.STRING), + "value_mapping": base_type(SchemaTypes.STRING), + } + } + }, + "additionalProperties": False +} + +DISCOVERY_NAMED_FIELDS_SCHEMA = { + "$id": sub_schema_uri(base_uri, "discovery_named_fields"), + "description": "Intermediate schema, enforces field schema with flexible names.", + "type": "object", + "patternProperties": { + "^.*$": DISCOVERY_FIELD_SCHEMA + }, + "additionalProperties": False +} + +DISCOVERY_OVERVIEW_CHART_SCHEMA = { + "$id": sub_schema_uri(base_uri, "discovery_overview_chart"), + "description": "Associates a field name with a chart type for overview display", + "type": "object", + "properties": { + "field": base_type(SchemaTypes.STRING), + "chart_type": enum_of(["bar", "pie"]) + }, + "additionalProperties": False +} + +DISCOVERY_OVERVIEW_SCHEMA = { + "$id": sub_schema_uri(base_uri, "discovery_overview"), + "description": "An overview section containing charts", + "type": "object", + "properties": { + "section_title": base_type(SchemaTypes.STRING), + "charts": array_of(DISCOVERY_OVERVIEW_CHART_SCHEMA) + }, + "additionalProperties": False +} + +DISCOVERY_SEARCH_SCHEMA = { + "$id": sub_schema_uri(base_uri, "discovery_search"), + "description": "Groups search fields by section.", + "type": "object", + "properties": { + "section_title": base_type(SchemaTypes.STRING), + "fields": array_of(base_type(SchemaTypes.STRING)) + }, + "additionalProperties": False +} + +DISCOVERY_SCHEMA = { + "$id": sub_schema_uri(base_uri, "discovery"), + "description": "Discovery configuration for public fields/search", + "type": "object", + "properties": { + "overview": array_of(DISCOVERY_OVERVIEW_SCHEMA, "List of overview sections"), + "search": array_of(DISCOVERY_SEARCH_SCHEMA), + "fields": DISCOVERY_NAMED_FIELDS_SCHEMA, + "rules": { + "type": "object", + "properties": { + "count_threshold": base_type(SchemaTypes.INTEGER), + "max_query_parameters": base_type(SchemaTypes.INTEGER) + } + } + }, + "additionalProperties": False +} diff --git a/chord_metadata_service/discovery/tests/test_censorship.py b/chord_metadata_service/discovery/tests/test_censorship.py index a6e84ec4d..bdf757637 100644 --- a/chord_metadata_service/discovery/tests/test_censorship.py +++ b/chord_metadata_service/discovery/tests/test_censorship.py @@ -12,6 +12,7 @@ class CensorshipGetThresholdTest(TestCase): def test_get_threshold_no_censorship(self): self.assertEqual(get_threshold(low_counts_censored=False), 0) + @override_settings(CONFIG_PUBLIC={}) def test_get_threshold_no_config(self): # no public config configured self.assertEqual(get_threshold(low_counts_censored=True), sys.maxsize) @@ -28,6 +29,7 @@ def test_get_threshold_configured(self): def test_thresholded_count_no_censorship(self): self.assertEqual(thresholded_count(1, low_counts_censored=False), 1) + @override_settings(CONFIG_PUBLIC={}) def test_thresholded_count_no_config(self): # no public config configured self.assertEqual(thresholded_count(100000, low_counts_censored=True), 0) @@ -44,6 +46,7 @@ class CensorshipGetMaxQueryParametersTest(TestCase): def test_get_max_query_parameters_no_censorship(self): self.assertEqual(get_max_query_parameters(low_counts_censored=False), sys.maxsize) + @override_settings(CONFIG_PUBLIC={}) def test_get_max_query_parameters_no_config(self): self.assertEqual(get_max_query_parameters(low_counts_censored=True), 0) From 472bfd6137b897aa91e0216533fb613f505307c5 Mon Sep 17 00:00:00 2001 From: Victor Rocheleau Date: Thu, 18 Apr 2024 21:08:09 +0000 Subject: [PATCH 03/38] discovery schema endpoint --- chord_metadata_service/discovery/api_views.py | 7 +++++++ chord_metadata_service/discovery/schemas.py | 2 +- chord_metadata_service/restapi/urls.py | 9 +++++++-- 3 files changed, 15 insertions(+), 3 deletions(-) diff --git a/chord_metadata_service/discovery/api_views.py b/chord_metadata_service/discovery/api_views.py index 87bf2e4fc..fa4571d63 100644 --- a/chord_metadata_service/discovery/api_views.py +++ b/chord_metadata_service/discovery/api_views.py @@ -16,6 +16,7 @@ from .fields import get_field_options, get_range_stats, get_categorical_stats, get_date_stats from .model_lookups import PUBLIC_MODEL_NAMES_TO_MODEL +from .schemas import DISCOVERY_SCHEMA @extend_schema( @@ -190,3 +191,9 @@ async def public_dataset(_request: DrfRequest): return Response({ "datasets": datasets }) + + +@api_view(["GET"]) +@permission_classes([AllowAny]) +async def discovery_schema(_request: DrfRequest): + return Response(DISCOVERY_SCHEMA) diff --git a/chord_metadata_service/discovery/schemas.py b/chord_metadata_service/discovery/schemas.py index 16b24e1c3..345665a94 100644 --- a/chord_metadata_service/discovery/schemas.py +++ b/chord_metadata_service/discovery/schemas.py @@ -6,7 +6,7 @@ base_uri = get_schema_app_id(Path(__file__).parent.name) DISCOVERY_FIELD_SCHEMA = { - "$id": sub_schema_uri(base_uri, "discovery_named_fields"), + "$id": sub_schema_uri(base_uri, "discovery_field"), "description": "Field configuration", "type": "object", "properties": { diff --git a/chord_metadata_service/restapi/urls.py b/chord_metadata_service/restapi/urls.py index 62c3b23b1..1769037ca 100644 --- a/chord_metadata_service/restapi/urls.py +++ b/chord_metadata_service/restapi/urls.py @@ -2,7 +2,12 @@ from rest_framework import routers from chord_metadata_service.chord import api_views as chord_views -from chord_metadata_service.discovery.api_views import public_search_fields, public_overview, public_dataset +from chord_metadata_service.discovery.api_views import ( + public_search_fields, + public_overview, + public_dataset, + discovery_schema +) from chord_metadata_service.experiments import api_views as experiment_views from chord_metadata_service.patients import api_views as individual_views from chord_metadata_service.phenopackets import api_views as phenopacket_views @@ -60,7 +65,7 @@ name="chord-phenopacket-schema"), path('experiment_schema', experiment_views.get_experiment_schema, name="experiment-schema"), - + path('discovery_schema', discovery_schema, name="discovery-schema"), # extra properties schema types path('extra_properties_schema_types', extra_properties_schema_types, name="extra-properties-schema-types"), From 39a005a993f9d72be9ce5962c6fbfa199efa408b Mon Sep 17 00:00:00 2001 From: v-rocheleau Date: Tue, 23 Apr 2024 16:00:14 -0400 Subject: [PATCH 04/38] discovery config serializing, schema fixes --- ...008_dataset_discovery_project_discovery.py | 6 +++--- chord_metadata_service/chord/serializers.py | 6 ++++++ chord_metadata_service/discovery/schemas.py | 19 ++++++++++++++----- 3 files changed, 23 insertions(+), 8 deletions(-) diff --git a/chord_metadata_service/chord/migrations/0008_dataset_discovery_project_discovery.py b/chord_metadata_service/chord/migrations/0008_dataset_discovery_project_discovery.py index 2aca2d86f..3837b77d3 100644 --- a/chord_metadata_service/chord/migrations/0008_dataset_discovery_project_discovery.py +++ b/chord_metadata_service/chord/migrations/0008_dataset_discovery_project_discovery.py @@ -1,4 +1,4 @@ -# Generated by Django 4.2.11 on 2024-04-18 20:31 +# Generated by Django 4.2.11 on 2024-04-23 19:47 import chord_metadata_service.restapi.validators from django.db import migrations, models @@ -14,11 +14,11 @@ class Migration(migrations.Migration): migrations.AddField( model_name='dataset', name='discovery', - field=models.JSONField(blank=True, help_text='Discovery configuration', null=True, validators=[chord_metadata_service.restapi.validators.JsonSchemaValidator({'$id': '/chord_metadata_service/discovery/discovery', 'additionalProperties': False, 'description': 'Discovery configuration for public fields/search', 'properties': {'fields': {'$id': '/chord_metadata_service/discovery/discovery_named_fields', 'additionalProperties': False, 'description': 'Intermediate schema, enforces field schema with flexible names.', 'patternProperties': {'^.*$': {'$id': '/chord_metadata_service/discovery/discovery_named_fields', 'additionalProperties': False, 'description': 'Field configuration', 'properties': {'config': {'properties': {'bin_size': {'type': 'number'}, 'bins': {'items': {'type': 'number'}, 'type': 'array'}, 'enum': {'items': {'type': 'string'}, 'type': 'array'}, 'group_by': {'type': 'string'}, 'group_by_value': {'type': 'string'}, 'maximum': {'type': 'number'}, 'minimum': {'type': 'number'}, 'taper_left': {'type': 'number'}, 'taper_right': {'type': 'number'}, 'units': {'type': 'string'}, 'value_mapping': {'type': 'string'}}, 'type': 'object'}, 'datatype': {'enum': ['number', 'string'], 'type': 'string'}, 'description': {'type': 'string'}, 'mapping': {'type': 'string'}, 'mapping_for_search_filter': {'type': 'string'}, 'title': {'type': 'string'}}, 'type': 'object'}}, 'type': 'object'}, 'overview': {'description': 'List of overview sections', 'items': {'$id': '/chord_metadata_service/discovery/discovery_overview', 'additionalProperties': False, 'description': 'An overview section containing charts', 'properties': {'charts': {'items': {'$id': '/chord_metadata_service/discovery/discovery_overview_chart', 'additionalProperties': False, 'description': 'Associates a field name with a chart type for overview display', 'properties': {'chart_type': {'enum': ['bar', 'pie'], 'type': 'string'}, 'field': {'type': 'string'}}, 'type': 'object'}, 'type': 'array'}, 'section_title': {'type': 'string'}}, 'type': 'object'}, 'type': 'array'}, 'rules': {'properties': {'count_threshold': {'type': 'integer'}, 'max_query_parameters': {'type': 'integer'}}, 'type': 'object'}, 'search': {'items': {'$id': '/chord_metadata_service/discovery/discovery_search', 'additionalProperties': False, 'description': 'Groups search fields by section.', 'properties': {'fields': {'items': {'type': 'string'}, 'type': 'array'}, 'section_title': {'type': 'string'}}, 'type': 'object'}, 'type': 'array'}}, 'type': 'object'}, formats=None)]), + field=models.JSONField(blank=True, help_text='Discovery configuration', null=True, validators=[chord_metadata_service.restapi.validators.JsonSchemaValidator({'$id': '/chord_metadata_service/discovery/discovery', 'additionalProperties': False, 'description': 'Discovery configuration for public fields/search', 'properties': {'fields': {'$id': '/chord_metadata_service/discovery/discovery_named_fields', 'additionalProperties': False, 'description': 'Intermediate schema, enforces field schema with flexible names.', 'patternProperties': {'^.*$': {'$id': '/chord_metadata_service/discovery/discovery_field', 'additionalProperties': False, 'description': 'Field configuration', 'properties': {'config': {'properties': {'bin_size': {'type': 'number'}, 'bins': {'items': {'type': 'number'}, 'type': 'array'}, 'enum': {'oneOf': [{'items': {'type': 'string'}, 'type': 'array'}, {'type': 'null'}]}, 'maximum': {'type': 'number'}, 'minimum': {'type': 'number'}, 'taper_left': {'type': 'number'}, 'taper_right': {'type': 'number'}, 'units': {'type': 'string'}}, 'type': 'object'}, 'datatype': {'enum': ['number', 'string'], 'type': 'string'}, 'description': {'type': 'string'}, 'group_by': {'type': 'string'}, 'group_by_value': {'type': 'string'}, 'mapping': {'type': 'string'}, 'mapping_for_search_filter': {'type': 'string'}, 'title': {'type': 'string'}, 'value_mapping': {'type': 'string'}}, 'type': 'object'}}, 'type': 'object'}, 'overview': {'description': 'List of overview sections', 'items': {'$id': '/chord_metadata_service/discovery/discovery_overview', 'additionalProperties': False, 'description': 'An overview section containing charts', 'properties': {'charts': {'items': {'$id': '/chord_metadata_service/discovery/discovery_overview_chart', 'additionalProperties': False, 'description': 'Associates a field name with a chart type for overview display', 'properties': {'chart_type': {'enum': ['bar', 'pie'], 'type': 'string'}, 'field': {'type': 'string'}}, 'type': 'object'}, 'type': 'array'}, 'section_title': {'type': 'string'}}, 'type': 'object'}, 'type': 'array'}, 'rules': {'properties': {'count_threshold': {'type': 'integer'}, 'max_query_parameters': {'type': 'integer'}}, 'type': 'object'}, 'search': {'items': {'$id': '/chord_metadata_service/discovery/discovery_search', 'additionalProperties': False, 'description': 'Groups search fields by section.', 'properties': {'fields': {'items': {'type': 'string'}, 'type': 'array'}, 'section_title': {'type': 'string'}}, 'type': 'object'}, 'type': 'array'}}, 'type': 'object'}, formats=None)]), ), migrations.AddField( model_name='project', name='discovery', - field=models.JSONField(blank=True, help_text='Discovery configuration', null=True, validators=[chord_metadata_service.restapi.validators.JsonSchemaValidator({'$id': '/chord_metadata_service/discovery/discovery', 'additionalProperties': False, 'description': 'Discovery configuration for public fields/search', 'properties': {'fields': {'$id': '/chord_metadata_service/discovery/discovery_named_fields', 'additionalProperties': False, 'description': 'Intermediate schema, enforces field schema with flexible names.', 'patternProperties': {'^.*$': {'$id': '/chord_metadata_service/discovery/discovery_named_fields', 'additionalProperties': False, 'description': 'Field configuration', 'properties': {'config': {'properties': {'bin_size': {'type': 'number'}, 'bins': {'items': {'type': 'number'}, 'type': 'array'}, 'enum': {'items': {'type': 'string'}, 'type': 'array'}, 'group_by': {'type': 'string'}, 'group_by_value': {'type': 'string'}, 'maximum': {'type': 'number'}, 'minimum': {'type': 'number'}, 'taper_left': {'type': 'number'}, 'taper_right': {'type': 'number'}, 'units': {'type': 'string'}, 'value_mapping': {'type': 'string'}}, 'type': 'object'}, 'datatype': {'enum': ['number', 'string'], 'type': 'string'}, 'description': {'type': 'string'}, 'mapping': {'type': 'string'}, 'mapping_for_search_filter': {'type': 'string'}, 'title': {'type': 'string'}}, 'type': 'object'}}, 'type': 'object'}, 'overview': {'description': 'List of overview sections', 'items': {'$id': '/chord_metadata_service/discovery/discovery_overview', 'additionalProperties': False, 'description': 'An overview section containing charts', 'properties': {'charts': {'items': {'$id': '/chord_metadata_service/discovery/discovery_overview_chart', 'additionalProperties': False, 'description': 'Associates a field name with a chart type for overview display', 'properties': {'chart_type': {'enum': ['bar', 'pie'], 'type': 'string'}, 'field': {'type': 'string'}}, 'type': 'object'}, 'type': 'array'}, 'section_title': {'type': 'string'}}, 'type': 'object'}, 'type': 'array'}, 'rules': {'properties': {'count_threshold': {'type': 'integer'}, 'max_query_parameters': {'type': 'integer'}}, 'type': 'object'}, 'search': {'items': {'$id': '/chord_metadata_service/discovery/discovery_search', 'additionalProperties': False, 'description': 'Groups search fields by section.', 'properties': {'fields': {'items': {'type': 'string'}, 'type': 'array'}, 'section_title': {'type': 'string'}}, 'type': 'object'}, 'type': 'array'}}, 'type': 'object'}, formats=None)]), + field=models.JSONField(blank=True, help_text='Discovery configuration', null=True, validators=[chord_metadata_service.restapi.validators.JsonSchemaValidator({'$id': '/chord_metadata_service/discovery/discovery', 'additionalProperties': False, 'description': 'Discovery configuration for public fields/search', 'properties': {'fields': {'$id': '/chord_metadata_service/discovery/discovery_named_fields', 'additionalProperties': False, 'description': 'Intermediate schema, enforces field schema with flexible names.', 'patternProperties': {'^.*$': {'$id': '/chord_metadata_service/discovery/discovery_field', 'additionalProperties': False, 'description': 'Field configuration', 'properties': {'config': {'properties': {'bin_size': {'type': 'number'}, 'bins': {'items': {'type': 'number'}, 'type': 'array'}, 'enum': {'oneOf': [{'items': {'type': 'string'}, 'type': 'array'}, {'type': 'null'}]}, 'maximum': {'type': 'number'}, 'minimum': {'type': 'number'}, 'taper_left': {'type': 'number'}, 'taper_right': {'type': 'number'}, 'units': {'type': 'string'}}, 'type': 'object'}, 'datatype': {'enum': ['number', 'string'], 'type': 'string'}, 'description': {'type': 'string'}, 'group_by': {'type': 'string'}, 'group_by_value': {'type': 'string'}, 'mapping': {'type': 'string'}, 'mapping_for_search_filter': {'type': 'string'}, 'title': {'type': 'string'}, 'value_mapping': {'type': 'string'}}, 'type': 'object'}}, 'type': 'object'}, 'overview': {'description': 'List of overview sections', 'items': {'$id': '/chord_metadata_service/discovery/discovery_overview', 'additionalProperties': False, 'description': 'An overview section containing charts', 'properties': {'charts': {'items': {'$id': '/chord_metadata_service/discovery/discovery_overview_chart', 'additionalProperties': False, 'description': 'Associates a field name with a chart type for overview display', 'properties': {'chart_type': {'enum': ['bar', 'pie'], 'type': 'string'}, 'field': {'type': 'string'}}, 'type': 'object'}, 'type': 'array'}, 'section_title': {'type': 'string'}}, 'type': 'object'}, 'type': 'array'}, 'rules': {'properties': {'count_threshold': {'type': 'integer'}, 'max_query_parameters': {'type': 'integer'}}, 'type': 'object'}, 'search': {'items': {'$id': '/chord_metadata_service/discovery/discovery_search', 'additionalProperties': False, 'description': 'Groups search fields by section.', 'properties': {'fields': {'items': {'type': 'string'}, 'type': 'array'}, 'section_title': {'type': 'string'}}, 'type': 'object'}, 'type': 'array'}}, 'type': 'object'}, formats=None)]), ), ] diff --git a/chord_metadata_service/chord/serializers.py b/chord_metadata_service/chord/serializers.py index cf6b60dc9..a786ecd62 100644 --- a/chord_metadata_service/chord/serializers.py +++ b/chord_metadata_service/chord/serializers.py @@ -34,6 +34,7 @@ class DatasetSerializer(GenericSerializer): "linked_field_sets", "dats_file", "project", + "discovery", ) # noinspection PyMethodMayBeStatic @@ -146,6 +147,11 @@ class Meta: class ProjectSerializer(serializers.ModelSerializer): # Don't inherit GenericSerializer to not pop empty fields + always_include = ( + "title", + "description", + "discovery", + ) datasets = DatasetSerializer(read_only=True, many=True, exclude_when_nested=["project"]) project_schemas = ProjectJsonSchemaSerializer(read_only=True, many=True) diff --git a/chord_metadata_service/discovery/schemas.py b/chord_metadata_service/discovery/schemas.py index 345665a94..95db64e2f 100644 --- a/chord_metadata_service/discovery/schemas.py +++ b/chord_metadata_service/discovery/schemas.py @@ -12,6 +12,9 @@ "properties": { "mapping": base_type(SchemaTypes.STRING), "mapping_for_search_filter": base_type(SchemaTypes.STRING), + "group_by": base_type(SchemaTypes.STRING), + "group_by_value": base_type(SchemaTypes.STRING), + "value_mapping": base_type(SchemaTypes.STRING), "title": base_type(SchemaTypes.STRING), "description": base_type(SchemaTypes.STRING), "datatype": enum_of(["number", "string"]), @@ -19,7 +22,13 @@ "type": "object", "properties": { # datatype == string - "enum": array_of(base_type(SchemaTypes.STRING)), + "enum": { + "oneOf": [ + # either an array of strings, or null + array_of(base_type(SchemaTypes.STRING)), + base_type(SchemaTypes.NULL) + ] + }, # datatype == number "bins": array_of(base_type(SchemaTypes.NUMBER)), "bin_size": base_type(SchemaTypes.NUMBER), @@ -28,10 +37,6 @@ "units": base_type(SchemaTypes.STRING), "minimum": base_type(SchemaTypes.NUMBER), "maximum": base_type(SchemaTypes.NUMBER), - # JSONField array specific - "group_by": base_type(SchemaTypes.STRING), - "group_by_value": base_type(SchemaTypes.STRING), - "value_mapping": base_type(SchemaTypes.STRING), } } }, @@ -99,3 +104,7 @@ }, "additionalProperties": False } + +# import json +# with open("discovery.json", "w") as schema_file: +# json.dump(DISCOVERY_SCHEMA, schema_file) From d19552f7588b6d6d2ecb6d0fecfcc6c9cc888763 Mon Sep 17 00:00:00 2001 From: Victor Rocheleau Date: Wed, 24 Apr 2024 14:11:41 +0000 Subject: [PATCH 05/38] discovery field schema date datatype --- .../migrations/0008_dataset_discovery_project_discovery.py | 6 +++--- chord_metadata_service/discovery/schemas.py | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/chord_metadata_service/chord/migrations/0008_dataset_discovery_project_discovery.py b/chord_metadata_service/chord/migrations/0008_dataset_discovery_project_discovery.py index 3837b77d3..edf876e5d 100644 --- a/chord_metadata_service/chord/migrations/0008_dataset_discovery_project_discovery.py +++ b/chord_metadata_service/chord/migrations/0008_dataset_discovery_project_discovery.py @@ -1,4 +1,4 @@ -# Generated by Django 4.2.11 on 2024-04-23 19:47 +# Generated by Django 4.2.11 on 2024-04-24 14:10 import chord_metadata_service.restapi.validators from django.db import migrations, models @@ -14,11 +14,11 @@ class Migration(migrations.Migration): migrations.AddField( model_name='dataset', name='discovery', - field=models.JSONField(blank=True, help_text='Discovery configuration', null=True, validators=[chord_metadata_service.restapi.validators.JsonSchemaValidator({'$id': '/chord_metadata_service/discovery/discovery', 'additionalProperties': False, 'description': 'Discovery configuration for public fields/search', 'properties': {'fields': {'$id': '/chord_metadata_service/discovery/discovery_named_fields', 'additionalProperties': False, 'description': 'Intermediate schema, enforces field schema with flexible names.', 'patternProperties': {'^.*$': {'$id': '/chord_metadata_service/discovery/discovery_field', 'additionalProperties': False, 'description': 'Field configuration', 'properties': {'config': {'properties': {'bin_size': {'type': 'number'}, 'bins': {'items': {'type': 'number'}, 'type': 'array'}, 'enum': {'oneOf': [{'items': {'type': 'string'}, 'type': 'array'}, {'type': 'null'}]}, 'maximum': {'type': 'number'}, 'minimum': {'type': 'number'}, 'taper_left': {'type': 'number'}, 'taper_right': {'type': 'number'}, 'units': {'type': 'string'}}, 'type': 'object'}, 'datatype': {'enum': ['number', 'string'], 'type': 'string'}, 'description': {'type': 'string'}, 'group_by': {'type': 'string'}, 'group_by_value': {'type': 'string'}, 'mapping': {'type': 'string'}, 'mapping_for_search_filter': {'type': 'string'}, 'title': {'type': 'string'}, 'value_mapping': {'type': 'string'}}, 'type': 'object'}}, 'type': 'object'}, 'overview': {'description': 'List of overview sections', 'items': {'$id': '/chord_metadata_service/discovery/discovery_overview', 'additionalProperties': False, 'description': 'An overview section containing charts', 'properties': {'charts': {'items': {'$id': '/chord_metadata_service/discovery/discovery_overview_chart', 'additionalProperties': False, 'description': 'Associates a field name with a chart type for overview display', 'properties': {'chart_type': {'enum': ['bar', 'pie'], 'type': 'string'}, 'field': {'type': 'string'}}, 'type': 'object'}, 'type': 'array'}, 'section_title': {'type': 'string'}}, 'type': 'object'}, 'type': 'array'}, 'rules': {'properties': {'count_threshold': {'type': 'integer'}, 'max_query_parameters': {'type': 'integer'}}, 'type': 'object'}, 'search': {'items': {'$id': '/chord_metadata_service/discovery/discovery_search', 'additionalProperties': False, 'description': 'Groups search fields by section.', 'properties': {'fields': {'items': {'type': 'string'}, 'type': 'array'}, 'section_title': {'type': 'string'}}, 'type': 'object'}, 'type': 'array'}}, 'type': 'object'}, formats=None)]), + field=models.JSONField(blank=True, help_text='Discovery configuration', null=True, validators=[chord_metadata_service.restapi.validators.JsonSchemaValidator({'$id': '/chord_metadata_service/discovery/discovery', 'additionalProperties': False, 'description': 'Discovery configuration for public fields/search', 'properties': {'fields': {'$id': '/chord_metadata_service/discovery/discovery_named_fields', 'additionalProperties': False, 'description': 'Intermediate schema, enforces field schema with flexible names.', 'patternProperties': {'^.*$': {'$id': '/chord_metadata_service/discovery/discovery_field', 'additionalProperties': False, 'description': 'Field configuration', 'properties': {'config': {'properties': {'bin_size': {'type': 'number'}, 'bins': {'items': {'type': 'number'}, 'type': 'array'}, 'enum': {'oneOf': [{'items': {'type': 'string'}, 'type': 'array'}, {'type': 'null'}]}, 'maximum': {'type': 'number'}, 'minimum': {'type': 'number'}, 'taper_left': {'type': 'number'}, 'taper_right': {'type': 'number'}, 'units': {'type': 'string'}}, 'type': 'object'}, 'datatype': {'enum': ['number', 'string', 'date'], 'type': 'string'}, 'description': {'type': 'string'}, 'group_by': {'type': 'string'}, 'group_by_value': {'type': 'string'}, 'mapping': {'type': 'string'}, 'mapping_for_search_filter': {'type': 'string'}, 'title': {'type': 'string'}, 'value_mapping': {'type': 'string'}}, 'type': 'object'}}, 'type': 'object'}, 'overview': {'description': 'List of overview sections', 'items': {'$id': '/chord_metadata_service/discovery/discovery_overview', 'additionalProperties': False, 'description': 'An overview section containing charts', 'properties': {'charts': {'items': {'$id': '/chord_metadata_service/discovery/discovery_overview_chart', 'additionalProperties': False, 'description': 'Associates a field name with a chart type for overview display', 'properties': {'chart_type': {'enum': ['bar', 'pie'], 'type': 'string'}, 'field': {'type': 'string'}}, 'type': 'object'}, 'type': 'array'}, 'section_title': {'type': 'string'}}, 'type': 'object'}, 'type': 'array'}, 'rules': {'properties': {'count_threshold': {'type': 'integer'}, 'max_query_parameters': {'type': 'integer'}}, 'type': 'object'}, 'search': {'items': {'$id': '/chord_metadata_service/discovery/discovery_search', 'additionalProperties': False, 'description': 'Groups search fields by section.', 'properties': {'fields': {'items': {'type': 'string'}, 'type': 'array'}, 'section_title': {'type': 'string'}}, 'type': 'object'}, 'type': 'array'}}, 'type': 'object'}, formats=None)]), ), migrations.AddField( model_name='project', name='discovery', - field=models.JSONField(blank=True, help_text='Discovery configuration', null=True, validators=[chord_metadata_service.restapi.validators.JsonSchemaValidator({'$id': '/chord_metadata_service/discovery/discovery', 'additionalProperties': False, 'description': 'Discovery configuration for public fields/search', 'properties': {'fields': {'$id': '/chord_metadata_service/discovery/discovery_named_fields', 'additionalProperties': False, 'description': 'Intermediate schema, enforces field schema with flexible names.', 'patternProperties': {'^.*$': {'$id': '/chord_metadata_service/discovery/discovery_field', 'additionalProperties': False, 'description': 'Field configuration', 'properties': {'config': {'properties': {'bin_size': {'type': 'number'}, 'bins': {'items': {'type': 'number'}, 'type': 'array'}, 'enum': {'oneOf': [{'items': {'type': 'string'}, 'type': 'array'}, {'type': 'null'}]}, 'maximum': {'type': 'number'}, 'minimum': {'type': 'number'}, 'taper_left': {'type': 'number'}, 'taper_right': {'type': 'number'}, 'units': {'type': 'string'}}, 'type': 'object'}, 'datatype': {'enum': ['number', 'string'], 'type': 'string'}, 'description': {'type': 'string'}, 'group_by': {'type': 'string'}, 'group_by_value': {'type': 'string'}, 'mapping': {'type': 'string'}, 'mapping_for_search_filter': {'type': 'string'}, 'title': {'type': 'string'}, 'value_mapping': {'type': 'string'}}, 'type': 'object'}}, 'type': 'object'}, 'overview': {'description': 'List of overview sections', 'items': {'$id': '/chord_metadata_service/discovery/discovery_overview', 'additionalProperties': False, 'description': 'An overview section containing charts', 'properties': {'charts': {'items': {'$id': '/chord_metadata_service/discovery/discovery_overview_chart', 'additionalProperties': False, 'description': 'Associates a field name with a chart type for overview display', 'properties': {'chart_type': {'enum': ['bar', 'pie'], 'type': 'string'}, 'field': {'type': 'string'}}, 'type': 'object'}, 'type': 'array'}, 'section_title': {'type': 'string'}}, 'type': 'object'}, 'type': 'array'}, 'rules': {'properties': {'count_threshold': {'type': 'integer'}, 'max_query_parameters': {'type': 'integer'}}, 'type': 'object'}, 'search': {'items': {'$id': '/chord_metadata_service/discovery/discovery_search', 'additionalProperties': False, 'description': 'Groups search fields by section.', 'properties': {'fields': {'items': {'type': 'string'}, 'type': 'array'}, 'section_title': {'type': 'string'}}, 'type': 'object'}, 'type': 'array'}}, 'type': 'object'}, formats=None)]), + field=models.JSONField(blank=True, help_text='Discovery configuration', null=True, validators=[chord_metadata_service.restapi.validators.JsonSchemaValidator({'$id': '/chord_metadata_service/discovery/discovery', 'additionalProperties': False, 'description': 'Discovery configuration for public fields/search', 'properties': {'fields': {'$id': '/chord_metadata_service/discovery/discovery_named_fields', 'additionalProperties': False, 'description': 'Intermediate schema, enforces field schema with flexible names.', 'patternProperties': {'^.*$': {'$id': '/chord_metadata_service/discovery/discovery_field', 'additionalProperties': False, 'description': 'Field configuration', 'properties': {'config': {'properties': {'bin_size': {'type': 'number'}, 'bins': {'items': {'type': 'number'}, 'type': 'array'}, 'enum': {'oneOf': [{'items': {'type': 'string'}, 'type': 'array'}, {'type': 'null'}]}, 'maximum': {'type': 'number'}, 'minimum': {'type': 'number'}, 'taper_left': {'type': 'number'}, 'taper_right': {'type': 'number'}, 'units': {'type': 'string'}}, 'type': 'object'}, 'datatype': {'enum': ['number', 'string', 'date'], 'type': 'string'}, 'description': {'type': 'string'}, 'group_by': {'type': 'string'}, 'group_by_value': {'type': 'string'}, 'mapping': {'type': 'string'}, 'mapping_for_search_filter': {'type': 'string'}, 'title': {'type': 'string'}, 'value_mapping': {'type': 'string'}}, 'type': 'object'}}, 'type': 'object'}, 'overview': {'description': 'List of overview sections', 'items': {'$id': '/chord_metadata_service/discovery/discovery_overview', 'additionalProperties': False, 'description': 'An overview section containing charts', 'properties': {'charts': {'items': {'$id': '/chord_metadata_service/discovery/discovery_overview_chart', 'additionalProperties': False, 'description': 'Associates a field name with a chart type for overview display', 'properties': {'chart_type': {'enum': ['bar', 'pie'], 'type': 'string'}, 'field': {'type': 'string'}}, 'type': 'object'}, 'type': 'array'}, 'section_title': {'type': 'string'}}, 'type': 'object'}, 'type': 'array'}, 'rules': {'properties': {'count_threshold': {'type': 'integer'}, 'max_query_parameters': {'type': 'integer'}}, 'type': 'object'}, 'search': {'items': {'$id': '/chord_metadata_service/discovery/discovery_search', 'additionalProperties': False, 'description': 'Groups search fields by section.', 'properties': {'fields': {'items': {'type': 'string'}, 'type': 'array'}, 'section_title': {'type': 'string'}}, 'type': 'object'}, 'type': 'array'}}, 'type': 'object'}, formats=None)]), ), ] diff --git a/chord_metadata_service/discovery/schemas.py b/chord_metadata_service/discovery/schemas.py index 95db64e2f..df897c406 100644 --- a/chord_metadata_service/discovery/schemas.py +++ b/chord_metadata_service/discovery/schemas.py @@ -17,7 +17,7 @@ "value_mapping": base_type(SchemaTypes.STRING), "title": base_type(SchemaTypes.STRING), "description": base_type(SchemaTypes.STRING), - "datatype": enum_of(["number", "string"]), + "datatype": enum_of(["number", "string", "date"]), "config": { "type": "object", "properties": { From 85fc6118fe0ce4e6aa9f444d9f71c64391023b8a Mon Sep 17 00:00:00 2001 From: Victor Rocheleau Date: Wed, 24 Apr 2024 17:35:47 +0000 Subject: [PATCH 06/38] enable public config scoping --- chord_metadata_service/discovery/api_views.py | 41 +++++++++++++++---- 1 file changed, 33 insertions(+), 8 deletions(-) diff --git a/chord_metadata_service/discovery/api_views.py b/chord_metadata_service/discovery/api_views.py index fa4571d63..dbdf69a63 100644 --- a/chord_metadata_service/discovery/api_views.py +++ b/chord_metadata_service/discovery/api_views.py @@ -19,6 +19,35 @@ from .schemas import DISCOVERY_SCHEMA +async def get_project_discovery(project_id: str = None, project: cm.Project = None): + if not project and project_id: + project = await cm.Project.objects.aget(identifier=project_id) + if not project.discovery: + return settings.CONFIG_PUBLIC + return project.discovery + + +async def get_dataset_discovery(dataset_id: str): + dataset = await cm.Dataset.objects.aget(identifier=dataset_id) + if not dataset.discovery: + project = await cm.Project.objects.aget(datasets=dataset_id) + return await get_project_discovery(project=project) + return dataset.discovery + + +async def get_discovery(request: DrfRequest): + dataset_id = request.query_params.get("dataset") + project_id = request.query_params.get("project") + if dataset_id: + # get dataset's discovery config if dataset_id is passed + return await get_dataset_discovery(dataset_id) + if project_id and not dataset_id: + # get project's discovery config if project_id is passed and dataset_id is not + return await get_project_discovery(project_id) + # fallback to config.json when no dataset or project is in the request + return settings.CONFIG_PUBLIC + + @extend_schema( description="Public search fields with their configuration", responses={ @@ -34,15 +63,13 @@ ) @api_view(["GET"]) @permission_classes([AllowAny]) -async def public_search_fields(_request: DrfRequest): +async def public_search_fields(request: DrfRequest): """ get: Return public search fields with their configuration """ - # TODO: should be project-scoped - - config_public = settings.CONFIG_PUBLIC + config_public = await get_discovery(request) if not config_public: return Response(dres.NO_PUBLIC_FIELDS_CONFIGURED, status=status.HTTP_404_NOT_FOUND) @@ -91,19 +118,17 @@ async def _counts_for_model_name(mn: str) -> tuple[str, int]: ) @api_view(["GET"]) # Don't use BentoAllowAny, we want to be more careful of cases here. @permission_classes([AllowAny]) -async def public_overview(_request: DrfRequest): +async def public_overview(request: DrfRequest): """ get: Overview of all public data in the database """ - config_public = settings.CONFIG_PUBLIC + config_public = await get_discovery(request) if not config_public: return Response(dres.NO_PUBLIC_DATA_AVAILABLE, status=status.HTTP_404_NOT_FOUND) - # TODO: public overviews SHOULD be project-scoped at least. - # Predefined counts counts = dict(await asyncio.gather(*map(_counts_for_model_name, PUBLIC_MODEL_NAMES_TO_MODEL))) From 8c0ea155872d7cb812abd0ac9cba67af30fc11cc Mon Sep 17 00:00:00 2001 From: Victor Rocheleau Date: Tue, 30 Apr 2024 16:19:46 +0000 Subject: [PATCH 07/38] public datasets discovery value --- chord_metadata_service/discovery/api_views.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/chord_metadata_service/discovery/api_views.py b/chord_metadata_service/discovery/api_views.py index dbdf69a63..362ab59fe 100644 --- a/chord_metadata_service/discovery/api_views.py +++ b/chord_metadata_service/discovery/api_views.py @@ -210,7 +210,7 @@ async def public_dataset(_request: DrfRequest): "dimensions", "primary_publications", "citations", "produced_by", "creators", "licenses", "acknowledges", "keywords", "version", "dats_file", - "extra_properties", "identifier" + "extra_properties", "identifier", "discovery" ) return Response({ From 5753bf2ca32a2c2772b19f3d72b63352e949e27e Mon Sep 17 00:00:00 2001 From: Victor Rocheleau Date: Wed, 1 May 2024 22:03:07 +0000 Subject: [PATCH 08/38] project-dataset scoping for discovery api views --- chord_metadata_service/discovery/api_views.py | 24 +++++++++++++++---- chord_metadata_service/discovery/fields.py | 15 ++++++------ .../discovery/fields_utils.py | 6 +++++ .../discovery/model_lookups.py | 15 ++++++++++++ chord_metadata_service/discovery/stats.py | 20 ++++++++++++++-- 5 files changed, 66 insertions(+), 14 deletions(-) diff --git a/chord_metadata_service/discovery/api_views.py b/chord_metadata_service/discovery/api_views.py index 362ab59fe..f643a6bf6 100644 --- a/chord_metadata_service/discovery/api_views.py +++ b/chord_metadata_service/discovery/api_views.py @@ -15,7 +15,7 @@ from ..logger import logger from .fields import get_field_options, get_range_stats, get_categorical_stats, get_date_stats -from .model_lookups import PUBLIC_MODEL_NAMES_TO_MODEL +from .model_lookups import PUBLIC_MODEL_NAMES_TO_MODEL, PUBLIC_MODEL_NAMES_TO_SCOPE_FILTERS from .schemas import DISCOVERY_SCHEMA @@ -125,12 +125,26 @@ async def public_overview(request: DrfRequest): """ config_public = await get_discovery(request) + dataset_id = request.query_params.get("dataset") + project_id = request.query_params.get("project") if not config_public: return Response(dres.NO_PUBLIC_DATA_AVAILABLE, status=status.HTTP_404_NOT_FOUND) + async def _counts_for_scoped_model_name(mn: str) -> tuple[str, int]: + if project_id and not dataset_id: + scope = "project" + value = project_id + elif project_id and dataset_id: + scope = "dataset" + value = dataset_id + else: + # cannot scope + return await _counts_for_model_name(mn) + return mn, await PUBLIC_MODEL_NAMES_TO_MODEL[mn].objects.filter(**{PUBLIC_MODEL_NAMES_TO_SCOPE_FILTERS[mn][scope]: value}).acount() + # Predefined counts - counts = dict(await asyncio.gather(*map(_counts_for_model_name, PUBLIC_MODEL_NAMES_TO_MODEL))) + counts = dict(await asyncio.gather(*map(_counts_for_scoped_model_name, PUBLIC_MODEL_NAMES_TO_MODEL))) # Get the rules config - because we used get_config_public_and_field_set_permissions with no arguments, it'll choose # these values based on if we have access to ALL public fields or not. @@ -165,11 +179,11 @@ async def public_overview(request: DrfRequest): async def _get_field_response(field_id: str, field_props: dict) -> dict: stats: list[BinWithValue] | None if field_props["datatype"] == "string": - stats = await get_categorical_stats(field_props, low_counts_censored=True) + stats = await get_categorical_stats(field_props, project_id, dataset_id, low_counts_censored=True) elif field_props["datatype"] == "number": - stats = await get_range_stats(field_props, low_counts_censored=True) + stats = await get_range_stats(field_props, project_id, dataset_id, low_counts_censored=True) elif field_props["datatype"] == "date": - stats = await get_date_stats(field_props, low_counts_censored=True) + stats = await get_date_stats(field_props, project_id, dataset_id, low_counts_censored=True) else: raise NotImplementedError() diff --git a/chord_metadata_service/discovery/fields.py b/chord_metadata_service/discovery/fields.py index 125e8666e..cd4c02468 100644 --- a/chord_metadata_service/discovery/fields.py +++ b/chord_metadata_service/discovery/fields.py @@ -10,7 +10,7 @@ from . import fields_utils as f_utils from .censorship import get_threshold, thresholded_count -from .stats import stats_for_field +from .stats import stats_for_field, get_scoped_queryset from .types import BinWithValue, DiscoveryFieldProps LENGTH_Y_M = 4 + 1 + 2 # dates stored as yyyy-mm-dd @@ -164,7 +164,7 @@ async def get_month_date_range(field_props: DiscoveryFieldProps) -> tuple[str | return start, end -async def get_range_stats(field_props: DiscoveryFieldProps, low_counts_censored: bool = True) -> list[BinWithValue]: +async def get_range_stats(field_props: DiscoveryFieldProps, project_id: str, dataset_id: str, low_counts_censored: bool = True) -> list[BinWithValue]: model, field = f_utils.get_model_and_field(field_props["mapping"]) # JSONField array specific field props @@ -193,7 +193,7 @@ async def get_range_stats(field_props: DiscoveryFieldProps, low_counts_censored: ] query_set = ( - model.objects + get_scoped_queryset(model, project_id, dataset_id) .values(label=Case(*whens, default=Value("missing"), output_field=CharField())) .annotate(total=Count("label")) ) @@ -215,7 +215,7 @@ async def get_range_stats(field_props: DiscoveryFieldProps, low_counts_censored: return bins -async def get_categorical_stats(field_props: DiscoveryFieldProps, low_counts_censored: bool) -> list[BinWithValue]: +async def get_categorical_stats(field_props: DiscoveryFieldProps, project_id: str, dataset_id: str, low_counts_censored: bool) -> list[BinWithValue]: """ Fetches statistics for a given categorical field and apply privacy policies """ @@ -228,7 +228,8 @@ async def get_categorical_stats(field_props: DiscoveryFieldProps, low_counts_cen # 1 <= this field <= threshold given it being present at all. # - stats_for_field(...) handles this! stats: Mapping[str, int] = await stats_for_field(model, field_name, low_counts_censored, - add_missing=True, group_by=field_props.get("group_by")) + add_missing=True, group_by=field_props.get("group_by"), + project_id=project_id, dataset_id=dataset_id) # Enforce values order from config and apply policies labels: list[str] | None = field_props["config"].get("enum") @@ -255,7 +256,7 @@ async def get_categorical_stats(field_props: DiscoveryFieldProps, low_counts_cen ] -async def get_date_stats(field_props: DiscoveryFieldProps, low_counts_censored: bool = True) -> list[BinWithValue]: +async def get_date_stats(field_props: DiscoveryFieldProps, project_id: str, dataset_id: str, low_counts_censored: bool = True) -> list[BinWithValue]: """ Fetches statistics for a given date field, fill the gaps in the date range and apply privacy policies. @@ -277,7 +278,7 @@ async def get_date_stats(field_props: DiscoveryFieldProps, low_counts_censored: # Note: lexical sort works on ISO dates query_set = ( - model.objects + get_scoped_queryset(model, project_id, dataset_id) .values(field_name) .order_by(field_name) .annotate(total=Count(field_name)) diff --git a/chord_metadata_service/discovery/fields_utils.py b/chord_metadata_service/discovery/fields_utils.py index b2196d39f..050bb3b0e 100644 --- a/chord_metadata_service/discovery/fields_utils.py +++ b/chord_metadata_service/discovery/fields_utils.py @@ -47,6 +47,12 @@ def get_model_and_field(field_id: str) -> tuple[Type[Model], str]: field_name = "__".join(field_path) return model, field_name +def get_public_model_name(model: Type[Model]): + model_name = [key for key, m in PUBLIC_MODEL_NAMES_TO_MODEL.items() if m == model] + if len(model_name) != 1: + raise ValueError(f"Provided model {model} is not available for public.") + return model_name[0] + def parse_duration(duration: str | dict): """ Returns years integer. """ diff --git a/chord_metadata_service/discovery/model_lookups.py b/chord_metadata_service/discovery/model_lookups.py index 3e86aded7..196394aaa 100644 --- a/chord_metadata_service/discovery/model_lookups.py +++ b/chord_metadata_service/discovery/model_lookups.py @@ -19,3 +19,18 @@ "biosample": DATA_TYPE_PHENOPACKET, "experiment": DATA_TYPE_EXPERIMENT, } + +PUBLIC_MODEL_NAMES_TO_SCOPE_FILTERS = { + "individual": { + "project": "phenopackets__dataset__project__identifier", + "dataset": "phenopackets__dataset__identifier", + }, + "biosample": { + "project": "phenopacket__dataset__project__identifier", + "dataset": "phenopacket__dataset__identifier", + }, + "experiment": { + "project": "dataset__project__identifier", + "dataset": "dataset__identifier", + }, +} diff --git a/chord_metadata_service/discovery/stats.py b/chord_metadata_service/discovery/stats.py index 8fe4eecef..b2a5fb91e 100644 --- a/chord_metadata_service/discovery/stats.py +++ b/chord_metadata_service/discovery/stats.py @@ -2,9 +2,11 @@ from typing import Mapping, Type +from chord_metadata_service.discovery.model_lookups import PUBLIC_MODEL_NAMES_TO_SCOPE_FILTERS + from .censorship import thresholded_count from .types import BinWithValue -from .fields_utils import get_jsonb_path_query +from .fields_utils import get_jsonb_path_query, get_public_model_name __all__ = [ "individual_experiment_type_stats", @@ -70,6 +72,17 @@ async def bento_public_format_count_and_stats_list( return thresholded_count(total, low_counts_censored), stats_list +def get_scoped_queryset(model: Type[Model], project_id: str, dataset_id: str): + if project_id and not dataset_id: + scope = "project" + value = project_id + elif project_id and dataset_id: + scope = "dataset" + value = dataset_id + else: + return model.objects.all() + model_name = get_public_model_name(model) + return model.objects.filter(**{PUBLIC_MODEL_NAMES_TO_SCOPE_FILTERS[model_name][scope]: value}) async def stats_for_field( model: Type[Model], @@ -77,13 +90,16 @@ async def stats_for_field( low_counts_censored: bool, add_missing: bool = False, group_by=None, + project_id: str = None, + dataset_id: str = None, ) -> Mapping[str, int]: """ Computes counts of distinct values for a given field. Mainly applicable to char fields representing categories """ + qs = get_scoped_queryset(model, project_id, dataset_id) return await queryset_stats_for_field( - model.objects.all(), field, low_counts_censored=low_counts_censored, add_missing=add_missing, group_by=group_by) + qs, field, low_counts_censored=low_counts_censored, add_missing=add_missing, group_by=group_by) async def queryset_stats_for_field( From a1443b4d7e13d689b473a544c5d1a40b2ee9a855 Mon Sep 17 00:00:00 2001 From: Victor Rocheleau Date: Mon, 27 May 2024 14:48:59 +0000 Subject: [PATCH 09/38] lint --- chord_metadata_service/discovery/api_views.py | 4 +++- chord_metadata_service/discovery/fields.py | 9 ++++++--- chord_metadata_service/discovery/fields_utils.py | 1 + chord_metadata_service/discovery/stats.py | 2 ++ 4 files changed, 12 insertions(+), 4 deletions(-) diff --git a/chord_metadata_service/discovery/api_views.py b/chord_metadata_service/discovery/api_views.py index f643a6bf6..432821e63 100644 --- a/chord_metadata_service/discovery/api_views.py +++ b/chord_metadata_service/discovery/api_views.py @@ -141,7 +141,9 @@ async def _counts_for_scoped_model_name(mn: str) -> tuple[str, int]: else: # cannot scope return await _counts_for_model_name(mn) - return mn, await PUBLIC_MODEL_NAMES_TO_MODEL[mn].objects.filter(**{PUBLIC_MODEL_NAMES_TO_SCOPE_FILTERS[mn][scope]: value}).acount() + return mn, await PUBLIC_MODEL_NAMES_TO_MODEL[mn].objects.filter( + **{PUBLIC_MODEL_NAMES_TO_SCOPE_FILTERS[mn][scope]: value} + ).acount() # Predefined counts counts = dict(await asyncio.gather(*map(_counts_for_scoped_model_name, PUBLIC_MODEL_NAMES_TO_MODEL))) diff --git a/chord_metadata_service/discovery/fields.py b/chord_metadata_service/discovery/fields.py index cd4c02468..1aa35b32e 100644 --- a/chord_metadata_service/discovery/fields.py +++ b/chord_metadata_service/discovery/fields.py @@ -164,7 +164,8 @@ async def get_month_date_range(field_props: DiscoveryFieldProps) -> tuple[str | return start, end -async def get_range_stats(field_props: DiscoveryFieldProps, project_id: str, dataset_id: str, low_counts_censored: bool = True) -> list[BinWithValue]: +async def get_range_stats(field_props: DiscoveryFieldProps, project_id: str, dataset_id: str, + low_counts_censored: bool = True) -> list[BinWithValue]: model, field = f_utils.get_model_and_field(field_props["mapping"]) # JSONField array specific field props @@ -215,7 +216,8 @@ async def get_range_stats(field_props: DiscoveryFieldProps, project_id: str, dat return bins -async def get_categorical_stats(field_props: DiscoveryFieldProps, project_id: str, dataset_id: str, low_counts_censored: bool) -> list[BinWithValue]: +async def get_categorical_stats(field_props: DiscoveryFieldProps, project_id: str, dataset_id: str, + low_counts_censored: bool) -> list[BinWithValue]: """ Fetches statistics for a given categorical field and apply privacy policies """ @@ -256,7 +258,8 @@ async def get_categorical_stats(field_props: DiscoveryFieldProps, project_id: st ] -async def get_date_stats(field_props: DiscoveryFieldProps, project_id: str, dataset_id: str, low_counts_censored: bool = True) -> list[BinWithValue]: +async def get_date_stats(field_props: DiscoveryFieldProps, project_id: str, dataset_id: str, + low_counts_censored: bool = True) -> list[BinWithValue]: """ Fetches statistics for a given date field, fill the gaps in the date range and apply privacy policies. diff --git a/chord_metadata_service/discovery/fields_utils.py b/chord_metadata_service/discovery/fields_utils.py index 050bb3b0e..d8a5c5d3b 100644 --- a/chord_metadata_service/discovery/fields_utils.py +++ b/chord_metadata_service/discovery/fields_utils.py @@ -47,6 +47,7 @@ def get_model_and_field(field_id: str) -> tuple[Type[Model], str]: field_name = "__".join(field_path) return model, field_name + def get_public_model_name(model: Type[Model]): model_name = [key for key, m in PUBLIC_MODEL_NAMES_TO_MODEL.items() if m == model] if len(model_name) != 1: diff --git a/chord_metadata_service/discovery/stats.py b/chord_metadata_service/discovery/stats.py index b2a5fb91e..712901247 100644 --- a/chord_metadata_service/discovery/stats.py +++ b/chord_metadata_service/discovery/stats.py @@ -72,6 +72,7 @@ async def bento_public_format_count_and_stats_list( return thresholded_count(total, low_counts_censored), stats_list + def get_scoped_queryset(model: Type[Model], project_id: str, dataset_id: str): if project_id and not dataset_id: scope = "project" @@ -84,6 +85,7 @@ def get_scoped_queryset(model: Type[Model], project_id: str, dataset_id: str): model_name = get_public_model_name(model) return model.objects.filter(**{PUBLIC_MODEL_NAMES_TO_SCOPE_FILTERS[model_name][scope]: value}) + async def stats_for_field( model: Type[Model], field: str, From bb124471b42bffe5e7cfe5d4b3382b1ddd1a8a83 Mon Sep 17 00:00:00 2001 From: Victor Rocheleau Date: Mon, 27 May 2024 16:13:17 +0000 Subject: [PATCH 10/38] tests --- chord_metadata_service/discovery/fields.py | 6 ++--- .../discovery/tests/test_fields.py | 25 ++++++++++++++----- 2 files changed, 22 insertions(+), 9 deletions(-) diff --git a/chord_metadata_service/discovery/fields.py b/chord_metadata_service/discovery/fields.py index 1aa35b32e..46e998f16 100644 --- a/chord_metadata_service/discovery/fields.py +++ b/chord_metadata_service/discovery/fields.py @@ -216,8 +216,8 @@ async def get_range_stats(field_props: DiscoveryFieldProps, project_id: str, dat return bins -async def get_categorical_stats(field_props: DiscoveryFieldProps, project_id: str, dataset_id: str, - low_counts_censored: bool) -> list[BinWithValue]: +async def get_categorical_stats(field_props: DiscoveryFieldProps, project_id: str = "", dataset_id: str = "", + low_counts_censored: bool = True) -> list[BinWithValue]: """ Fetches statistics for a given categorical field and apply privacy policies """ @@ -258,7 +258,7 @@ async def get_categorical_stats(field_props: DiscoveryFieldProps, project_id: st ] -async def get_date_stats(field_props: DiscoveryFieldProps, project_id: str, dataset_id: str, +async def get_date_stats(field_props: DiscoveryFieldProps, project_id: str = "", dataset_id: str = "", low_counts_censored: bool = True) -> list[BinWithValue]: """ Fetches statistics for a given date field, fill the gaps in the date range diff --git a/chord_metadata_service/discovery/tests/test_fields.py b/chord_metadata_service/discovery/tests/test_fields.py index 12c1fb1b5..e9a8e017e 100644 --- a/chord_metadata_service/discovery/tests/test_fields.py +++ b/chord_metadata_service/discovery/tests/test_fields.py @@ -1,6 +1,7 @@ from django.test import TransactionTestCase, override_settings from rest_framework.test import APITestCase +from chord_metadata_service.chord.tests.helpers import ProjectTestCase from chord_metadata_service.patients import models as pa_m from chord_metadata_service.phenopackets.tests import constants as ph_c from chord_metadata_service.phenopackets import models as ph_m @@ -37,7 +38,7 @@ async def test_get_field_options_not_impl(self): await get_field_options({**self.field_some_prop, "datatype": "made_up"}, low_counts_censored=False) -class TestGetCategoricalStats(TransactionTestCase): +class TestGetCategoricalStats(ProjectTestCase): f_sex = { "mapping": "individual/sex", @@ -51,14 +52,23 @@ class TestGetCategoricalStats(TransactionTestCase): def setUp(self): self.individual_1 = pa_m.Individual.objects.create(**ph_c.VALID_INDIVIDUAL_1) + self.meta_data = ph_m.MetaData.objects.create(**ph_c.VALID_META_DATA_1) + self.phenopacket = ph_m.Phenopacket.objects.create( + id="phenopacket_id:1", + subject=self.individual_1, + dataset=self.dataset, + meta_data=self.meta_data, + ) async def test_categorical_stats_lcf(self): - res = await get_categorical_stats(self.f_sex, low_counts_censored=False) + res = await get_categorical_stats(self.f_sex, project_id=self.project.identifier, + dataset_id=self.dataset.identifier, low_counts_censored=False) self.assertListEqual(res, [{"label": "MALE", "value": 1}, {"label": "missing", "value": 0}]) @override_settings(CONFIG_PUBLIC=CONFIG_PUBLIC_TEST) async def test_categorical_stats_lct(self): - res = await get_categorical_stats(self.f_sex, low_counts_censored=True) + res = await get_categorical_stats(self.f_sex, project_id=self.project.identifier, + dataset_id=self.dataset.identifier, low_counts_censored=True) self.assertListEqual(res, [{"label": "missing", "value": 0}]) @@ -101,7 +111,7 @@ async def test_wrong_field_config(self): await get_month_date_range(fp) -class TestJsonFieldArrayStats(TransactionTestCase): +class TestJsonFieldArrayStats(ProjectTestCase): tumor_lengths = range(1, 50, 5) dm_fp = CONFIG_PUBLIC_TEST["fields"]["diagnostic_markers"] @@ -117,12 +127,14 @@ def setUp(self) -> None: subject=self.individual, measurements=self.tumors, meta_data=self.meta_data, + dataset=self.dataset, ) self.phenopacket.biosamples.set([self.biosample]) @override_settings(CONFIG_PUBLIC=CONFIG_PUBLIC_TEST) async def test_json_categorical_stats_lcf(self): - res = await get_categorical_stats(self.dm_fp, low_counts_censored=False) + res = await get_categorical_stats(self.dm_fp, project_id=self.project.identifier, + dataset_id=self.dataset.identifier, low_counts_censored=False) ground_truth = [ {"label": "Genetic Testing", "value": 1}, {"label": "Hematology Test", "value": 1}, @@ -132,7 +144,8 @@ async def test_json_categorical_stats_lcf(self): @override_settings(CONFIG_PUBLIC=CONFIG_PUBLIC_TEST) async def test_json_categorical_stats_lct(self): - res = await get_categorical_stats(self.dm_fp, low_counts_censored=True) + res = await get_categorical_stats(self.dm_fp, project_id=self.project.identifier, + dataset_id=self.dataset.identifier, low_counts_censored=True) ground_truth = [ {"label": "missing", "value": 0}, ] From 83ea79f0e75ce12cbc86cc3de40caa1ec48cb966 Mon Sep 17 00:00:00 2001 From: Victor Rocheleau Date: Thu, 30 May 2024 21:13:27 +0000 Subject: [PATCH 11/38] feat: dats schema endpoint --- chord_metadata_service/restapi/api_views.py | 12 ++++++++++++ chord_metadata_service/restapi/urls.py | 3 ++- 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/chord_metadata_service/restapi/api_views.py b/chord_metadata_service/restapi/api_views.py index 9f5b2afa2..5476b4f2e 100644 --- a/chord_metadata_service/restapi/api_views.py +++ b/chord_metadata_service/restapi/api_views.py @@ -1,4 +1,6 @@ import asyncio +import json +import os from adrf.decorators import api_view from django.db.models import QuerySet @@ -16,6 +18,7 @@ from chord_metadata_service.metadata.service_info import get_service_info from chord_metadata_service.phenopackets import models as pheno_models from chord_metadata_service.phenopackets.summaries import dt_phenopacket_summary +from chord_metadata_service.restapi.dats_schemas import DATS_PATH from chord_metadata_service.restapi.models import SchemaType @@ -105,3 +108,12 @@ async def search_overview(request: DrfRequest): # hack-y mess of passing IDs around. return await build_overview_response(phenopackets, experiments) + + +@api_view(["GET"]) +async def dats_schema(request: DrfRequest): + dats_file_path = os.path.join(DATS_PATH, 'dataset_schema.json') + dats_schema = None + with open(dats_file_path) as dats_file: + dats_schema = json.loads(dats_file.read()) + return Response(dats_schema) diff --git a/chord_metadata_service/restapi/urls.py b/chord_metadata_service/restapi/urls.py index 1769037ca..10e79b0d7 100644 --- a/chord_metadata_service/restapi/urls.py +++ b/chord_metadata_service/restapi/urls.py @@ -17,7 +17,7 @@ BiosampleSampledTissueAutocomplete ) from chord_metadata_service.resources import api_views as resources_views -from chord_metadata_service.restapi.api_views import overview, search_overview, extra_properties_schema_types +from chord_metadata_service.restapi.api_views import dats_schema, overview, search_overview, extra_properties_schema_types from chord_metadata_service.restapi.routers import BatchListRouter __all__ = ["router", "batch_router", "urlpatterns"] @@ -66,6 +66,7 @@ path('experiment_schema', experiment_views.get_experiment_schema, name="experiment-schema"), path('discovery_schema', discovery_schema, name="discovery-schema"), + path('dats_schema', dats_schema, name="dats-schema"), # extra properties schema types path('extra_properties_schema_types', extra_properties_schema_types, name="extra-properties-schema-types"), From ddc9b0e1f8bd2380beaca43338acb03ff73bd2d1 Mon Sep 17 00:00:00 2001 From: Victor Rocheleau Date: Thu, 30 May 2024 21:31:05 +0000 Subject: [PATCH 12/38] propper dats schema id --- chord_metadata_service/restapi/api_views.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/chord_metadata_service/restapi/api_views.py b/chord_metadata_service/restapi/api_views.py index 5476b4f2e..4b55ba0fe 100644 --- a/chord_metadata_service/restapi/api_views.py +++ b/chord_metadata_service/restapi/api_views.py @@ -115,5 +115,7 @@ async def dats_schema(request: DrfRequest): dats_file_path = os.path.join(DATS_PATH, 'dataset_schema.json') dats_schema = None with open(dats_file_path) as dats_file: - dats_schema = json.loads(dats_file.read()) + dats_schema: dict = json.loads(dats_file.read()) + if schema_id := dats_schema.pop("id"): + dats_schema["$id"] = schema_id return Response(dats_schema) From 83fe43c13fe180fce054b4dbe7715ef38eb614d3 Mon Sep 17 00:00:00 2001 From: Victor Rocheleau Date: Mon, 3 Jun 2024 21:45:23 +0000 Subject: [PATCH 13/38] feat: json schemas retrieval links --- ...ry_project_discovery.py => 0008_v8_0_0.py} | 0 chord_metadata_service/discovery/api_views.py | 4 +- chord_metadata_service/discovery/fields.py | 24 +++-- .../discovery/fields_utils.py | 2 +- chord_metadata_service/discovery/schemas.py | 18 ++-- chord_metadata_service/discovery/stats.py | 20 ++-- .../experiments/api_views.py | 13 ++- chord_metadata_service/experiments/schemas.py | 26 +++-- .../experiments/tests/test_api.py | 2 +- chord_metadata_service/metadata/settings.py | 2 + chord_metadata_service/patients/schemas.py | 11 ++- .../phenopackets/api_views.py | 13 ++- .../phenopackets/schemas.py | 98 +++++++++++-------- chord_metadata_service/restapi/api_views.py | 14 --- chord_metadata_service/restapi/schemas.py | 34 ++++--- chord_metadata_service/restapi/urls.py | 12 ++- 16 files changed, 177 insertions(+), 116 deletions(-) rename chord_metadata_service/chord/migrations/{0008_dataset_discovery_project_discovery.py => 0008_v8_0_0.py} (100%) diff --git a/chord_metadata_service/chord/migrations/0008_dataset_discovery_project_discovery.py b/chord_metadata_service/chord/migrations/0008_v8_0_0.py similarity index 100% rename from chord_metadata_service/chord/migrations/0008_dataset_discovery_project_discovery.py rename to chord_metadata_service/chord/migrations/0008_v8_0_0.py diff --git a/chord_metadata_service/discovery/api_views.py b/chord_metadata_service/discovery/api_views.py index 432821e63..3f62ec72f 100644 --- a/chord_metadata_service/discovery/api_views.py +++ b/chord_metadata_service/discovery/api_views.py @@ -125,8 +125,8 @@ async def public_overview(request: DrfRequest): """ config_public = await get_discovery(request) - dataset_id = request.query_params.get("dataset") - project_id = request.query_params.get("project") + dataset_id = request.query_params.get("dataset", None) + project_id = request.query_params.get("project", None) if not config_public: return Response(dres.NO_PUBLIC_DATA_AVAILABLE, status=status.HTTP_404_NOT_FOUND) diff --git a/chord_metadata_service/discovery/fields.py b/chord_metadata_service/discovery/fields.py index 46e998f16..724951498 100644 --- a/chord_metadata_service/discovery/fields.py +++ b/chord_metadata_service/discovery/fields.py @@ -164,8 +164,12 @@ async def get_month_date_range(field_props: DiscoveryFieldProps) -> tuple[str | return start, end -async def get_range_stats(field_props: DiscoveryFieldProps, project_id: str, dataset_id: str, - low_counts_censored: bool = True) -> list[BinWithValue]: +async def get_range_stats( + field_props: DiscoveryFieldProps, + project_id: str | None = None, + dataset_id: str | None = None, + low_counts_censored: bool = True +) -> list[BinWithValue]: model, field = f_utils.get_model_and_field(field_props["mapping"]) # JSONField array specific field props @@ -216,8 +220,12 @@ async def get_range_stats(field_props: DiscoveryFieldProps, project_id: str, dat return bins -async def get_categorical_stats(field_props: DiscoveryFieldProps, project_id: str = "", dataset_id: str = "", - low_counts_censored: bool = True) -> list[BinWithValue]: +async def get_categorical_stats( + field_props: DiscoveryFieldProps, + project_id: str | None = None, + dataset_id: str | None = None, + low_counts_censored: bool = True +) -> list[BinWithValue]: """ Fetches statistics for a given categorical field and apply privacy policies """ @@ -258,8 +266,12 @@ async def get_categorical_stats(field_props: DiscoveryFieldProps, project_id: st ] -async def get_date_stats(field_props: DiscoveryFieldProps, project_id: str = "", dataset_id: str = "", - low_counts_censored: bool = True) -> list[BinWithValue]: +async def get_date_stats( + field_props: DiscoveryFieldProps, + project_id: str | None = None, + dataset_id: str | None = None, + low_counts_censored: bool = True, +) -> list[BinWithValue]: """ Fetches statistics for a given date field, fill the gaps in the date range and apply privacy policies. diff --git a/chord_metadata_service/discovery/fields_utils.py b/chord_metadata_service/discovery/fields_utils.py index d8a5c5d3b..91387dabe 100644 --- a/chord_metadata_service/discovery/fields_utils.py +++ b/chord_metadata_service/discovery/fields_utils.py @@ -48,7 +48,7 @@ def get_model_and_field(field_id: str) -> tuple[Type[Model], str]: return model, field_name -def get_public_model_name(model: Type[Model]): +def get_public_model_name(model: Type[Model]) -> str: model_name = [key for key, m in PUBLIC_MODEL_NAMES_TO_MODEL.items() if m == model] if len(model_name) != 1: raise ValueError(f"Provided model {model} is not available for public.") diff --git a/chord_metadata_service/discovery/schemas.py b/chord_metadata_service/discovery/schemas.py index df897c406..41f577276 100644 --- a/chord_metadata_service/discovery/schemas.py +++ b/chord_metadata_service/discovery/schemas.py @@ -1,12 +1,15 @@ +from django.conf import settings from pathlib import Path from chord_metadata_service.restapi.schema_utils import ( - get_schema_app_id, sub_schema_uri, array_of, base_type, SchemaTypes, enum_of + sub_schema_uri, array_of, base_type, SchemaTypes, enum_of, get_schema_app_id ) -base_uri = get_schema_app_id(Path(__file__).parent.name) +if settings.SCHEMAS_BASE_URL: + base_uri = settings.SCHEMAS_BASE_URL +else: + base_uri = get_schema_app_id(Path(__file__).parent.name) DISCOVERY_FIELD_SCHEMA = { - "$id": sub_schema_uri(base_uri, "discovery_field"), "description": "Field configuration", "type": "object", "properties": { @@ -44,7 +47,6 @@ } DISCOVERY_NAMED_FIELDS_SCHEMA = { - "$id": sub_schema_uri(base_uri, "discovery_named_fields"), "description": "Intermediate schema, enforces field schema with flexible names.", "type": "object", "patternProperties": { @@ -54,7 +56,6 @@ } DISCOVERY_OVERVIEW_CHART_SCHEMA = { - "$id": sub_schema_uri(base_uri, "discovery_overview_chart"), "description": "Associates a field name with a chart type for overview display", "type": "object", "properties": { @@ -65,7 +66,6 @@ } DISCOVERY_OVERVIEW_SCHEMA = { - "$id": sub_schema_uri(base_uri, "discovery_overview"), "description": "An overview section containing charts", "type": "object", "properties": { @@ -76,7 +76,6 @@ } DISCOVERY_SEARCH_SCHEMA = { - "$id": sub_schema_uri(base_uri, "discovery_search"), "description": "Groups search fields by section.", "type": "object", "properties": { @@ -105,6 +104,5 @@ "additionalProperties": False } -# import json -# with open("discovery.json", "w") as schema_file: -# json.dump(DISCOVERY_SCHEMA, schema_file) + +# DATS diff --git a/chord_metadata_service/discovery/stats.py b/chord_metadata_service/discovery/stats.py index 712901247..01d26de9a 100644 --- a/chord_metadata_service/discovery/stats.py +++ b/chord_metadata_service/discovery/stats.py @@ -2,11 +2,10 @@ from typing import Mapping, Type -from chord_metadata_service.discovery.model_lookups import PUBLIC_MODEL_NAMES_TO_SCOPE_FILTERS - from .censorship import thresholded_count -from .types import BinWithValue from .fields_utils import get_jsonb_path_query, get_public_model_name +from .model_lookups import PUBLIC_MODEL_NAMES_TO_SCOPE_FILTERS +from .types import BinWithValue __all__ = [ "individual_experiment_type_stats", @@ -14,6 +13,7 @@ "bento_public_format_count_and_stats_list", "stats_for_field", "queryset_stats_for_field", + "get_scoped_queryset", ] @@ -73,7 +73,11 @@ async def bento_public_format_count_and_stats_list( return thresholded_count(total, low_counts_censored), stats_list -def get_scoped_queryset(model: Type[Model], project_id: str, dataset_id: str): +def get_scoped_queryset( + model: Type[Model], + project_id: str | None = None, + dataset_id: str | None = None, +): if project_id and not dataset_id: scope = "project" value = project_id @@ -91,9 +95,9 @@ async def stats_for_field( field: str, low_counts_censored: bool, add_missing: bool = False, - group_by=None, - project_id: str = None, - dataset_id: str = None, + group_by: str | None = None, + project_id: str | None = None, + dataset_id: str | None = None, ) -> Mapping[str, int]: """ Computes counts of distinct values for a given field. Mainly applicable to @@ -105,7 +109,7 @@ async def stats_for_field( async def queryset_stats_for_field( - queryset: QuerySet, field: str, low_counts_censored: bool, add_missing: bool = False, group_by: str = None + queryset: QuerySet, field: str, low_counts_censored: bool, add_missing: bool = False, group_by: str | None = None ) -> Mapping[str, int]: """ Computes counts of distinct values for a queryset. diff --git a/chord_metadata_service/experiments/api_views.py b/chord_metadata_service/experiments/api_views.py index 9ce53ec8b..2ad6f44b9 100644 --- a/chord_metadata_service/experiments/api_views.py +++ b/chord_metadata_service/experiments/api_views.py @@ -8,7 +8,7 @@ from .serializers import ExperimentSerializer, ExperimentResultSerializer from .models import Experiment, ExperimentResult -from .schemas import EXPERIMENT_SCHEMA +from .schemas import EXPERIMENT_SCHEMA, experiment_resolver, experiment_base_uri from .filters import ExperimentFilter, ExperimentResultFilter from chord_metadata_service.restapi.constants import MODEL_ID_PATTERN from chord_metadata_service.restapi.pagination import LargeResultsSetPagination, BatchResultsSetPagination @@ -147,3 +147,14 @@ def get_experiment_schema(_request): Experiment schema """ return Response(EXPERIMENT_SCHEMA) + + +@api_view(["GET"]) +@permission_classes([AllowAny]) +def get_experiment_subschema(_request, subschema: str): + """ + get: + Experiment sub-schema + """ + schema = experiment_resolver.lookup(ref=f"{experiment_base_uri}/{subschema}").contents + return Response(schema) diff --git a/chord_metadata_service/experiments/schemas.py b/chord_metadata_service/experiments/schemas.py index 49db0470f..42b3f6e4e 100644 --- a/chord_metadata_service/experiments/schemas.py +++ b/chord_metadata_service/experiments/schemas.py @@ -1,4 +1,7 @@ from pathlib import Path + +from django.conf import settings +from referencing import Registry, Resource from .descriptions import EXPERIMENT, EXPERIMENT_RESULT, INSTRUMENT from chord_metadata_service.restapi.constants import MODEL_ID_PATTERN from chord_metadata_service.restapi.schemas import ONTOLOGY_CLASS_LIST, KEY_VALUE_OBJECT @@ -13,11 +16,15 @@ "INSTRUMENT_SCHEMA", ] -base_uri = get_schema_app_id(Path(__file__).parent.name) +if settings.SCHEMAS_BASE_URL: + base_uri = settings.SCHEMAS_BASE_URL +else: + base_uri = get_schema_app_id(Path(__file__).parent.name) +experiment_base_uri = sub_schema_uri(base_uri, "experiment") DATA_FILE_OR_RECORD_URL_SCHEMA = { "$schema": "http://json-schema.org/draft-07/schema#", - "$id": sub_schema_uri(base_uri, "data_file_or_record_url"), + "$id": sub_schema_uri(experiment_base_uri, "data_file_or_record_url"), "title": "Data file or record URL", "description": "A URL of a particular scheme, pointing to a data file OR a DRS record which itself points to a " "data file.", @@ -29,7 +36,7 @@ EXPERIMENT_RESULT_FILE_INDEX_SCHEMA = { "$schema": "http://json-schema.org/draft-07/schema#", - "$id": sub_schema_uri(base_uri, "experiment_result_file_index"), + "$id": sub_schema_uri(experiment_base_uri, "experiment_result_file_index"), "title": "Experiment result file index schema", "description": "Schema for describing an object representing an index of an experiment result file.", "type": "object", @@ -51,7 +58,7 @@ } EXPERIMENT_RESULT_FILE_INDEX_LIST_SCHEMA = { "$schema": "http://json-schema.org/draft-07/schema#", - "$id": sub_schema_uri(base_uri, "experiment_result_file_index_list"), + "$id": sub_schema_uri(experiment_base_uri, "experiment_result_file_index_list"), "title": "Experiment result file index list schema", "description": "Schema for describing a list of object representing an indices of an experiment result file.", "type": "array", @@ -60,7 +67,7 @@ EXPERIMENT_RESULT_SCHEMA = tag_ids_and_describe({ "$schema": "http://json-schema.org/draft-07/schema#", - "$id": sub_schema_uri(base_uri, "experiment_result"), + "$id": sub_schema_uri(experiment_base_uri, "experiment_result"), "title": "Experiment result schema", "description": "Schema for describing information about analysis of sequencing data in a file format.", "type": "object", @@ -105,7 +112,7 @@ INSTRUMENT_SCHEMA = tag_ids_and_describe({ "$schema": "http://json-schema.org/draft-07/schema#", - "$id": sub_schema_uri(base_uri, "instrument"), + "$id": sub_schema_uri(experiment_base_uri, "instrument"), "title": "Instrument schema", "description": "Schema for describing an instrument used for a sequencing experiment.", "type": "object", @@ -129,7 +136,7 @@ EXPERIMENT_SCHEMA = tag_ids_and_describe({ "$schema": "http://json-schema.org/draft-07/schema#", - "$id": sub_schema_uri(base_uri, "experiment"), + "$id": experiment_base_uri, "title": "Experiment schema", "description": "Schema for describing an experiment.", "type": "object", @@ -198,3 +205,8 @@ }, "required": ["id", "experiment_type"] }, EXPERIMENT) + +# URI referencing +experiment_resource = Resource.from_contents(EXPERIMENT_SCHEMA) +experiment_registry = Registry().with_resource(uri=experiment_base_uri, resource=experiment_resource) +experiment_resolver = experiment_registry.resolver(base_uri=experiment_base_uri) diff --git a/chord_metadata_service/experiments/tests/test_api.py b/chord_metadata_service/experiments/tests/test_api.py index 2a3d7e73a..b4c594965 100644 --- a/chord_metadata_service/experiments/tests/test_api.py +++ b/chord_metadata_service/experiments/tests/test_api.py @@ -54,7 +54,7 @@ def test_get_experiment_one(self): self.assertEqual(response_data['id'], 'katsu.experiment:1') def test_get_experiment_schema(self): - response = self.client.get('/api/experiment_schema') + response = self.client.get('/api/schemas/experiment') self.assertEqual(response.status_code, status.HTTP_200_OK) response_data = response.json() Draft7Validator.check_schema(response_data) diff --git a/chord_metadata_service/metadata/settings.py b/chord_metadata_service/metadata/settings.py index 0bd6747d9..2c2d63407 100644 --- a/chord_metadata_service/metadata/settings.py +++ b/chord_metadata_service/metadata/settings.py @@ -50,6 +50,8 @@ BENTO_CONTAINER_LOCAL = os.environ.get("BENTO_CONTAINER_LOCAL", "false").lower() == "true" CHORD_URL = os.environ.get("CHORD_URL") # Leave None if not specified, for running in other contexts +if CHORD_URL: + SCHEMAS_BASE_URL = CHORD_URL + "/api/metadata/api/schemas" # SECURITY WARNING: Don't run with CHORD_PERMISSIONS turned off in production, # unless an alternative permissions system is in place. diff --git a/chord_metadata_service/patients/schemas.py b/chord_metadata_service/patients/schemas.py index 5443c3f14..4accd6816 100644 --- a/chord_metadata_service/patients/schemas.py +++ b/chord_metadata_service/patients/schemas.py @@ -1,3 +1,4 @@ +from django.conf import settings from chord_metadata_service.restapi.constants import MODEL_ID_PATTERN from chord_metadata_service.restapi.schema_utils import ( DATE_TIME, @@ -17,11 +18,15 @@ from .values import Sex, KaryotypicSex -base_uri = get_schema_app_id(Path(__file__).parent.name) +if settings.SCHEMAS_BASE_URL: + base_uri = settings.SCHEMAS_BASE_URL +else: + base_uri = get_schema_app_id(Path(__file__).parent.name) +phenopacket_base_uri = sub_schema_uri(base_uri=base_uri, name="phenopacket") VITAL_STATUS_SCHEMA = tag_ids_and_describe({ "$schema": DRAFT_07, - "$id": sub_schema_uri(base_uri, "vital_status"), + "$id": sub_schema_uri(phenopacket_base_uri, "vital_status"), "type": "object", "properties": { "status": enum_of(["UNKNOWN_STATUS", "ALIVE", "DECEASED"]), @@ -35,7 +40,7 @@ INDIVIDUAL_SCHEMA = tag_ids_and_describe({ "$schema": DRAFT_07, - "$id": sub_schema_uri(base_uri, "individual"), + "$id": sub_schema_uri(phenopacket_base_uri, "individual"), "type": "object", "properties": { # Phenopacket V2 Individual fields diff --git a/chord_metadata_service/phenopackets/api_views.py b/chord_metadata_service/phenopackets/api_views.py index 9f29722fc..a67380314 100644 --- a/chord_metadata_service/phenopackets/api_views.py +++ b/chord_metadata_service/phenopackets/api_views.py @@ -11,7 +11,7 @@ from chord_metadata_service.restapi.constants import MODEL_ID_PATTERN from chord_metadata_service.restapi.pagination import LargeResultsSetPagination, BatchResultsSetPagination from chord_metadata_service.restapi.negociation import FormatInPostContentNegotiation -from chord_metadata_service.phenopackets.schemas import PHENOPACKET_SCHEMA +from chord_metadata_service.phenopackets.schemas import PHENOPACKET_SCHEMA, phenopacket_resolver, phenopacket_base_uri from . import models as m, serializers as s, filters as f from drf_spectacular.utils import extend_schema, inline_serializer from rest_framework import serializers, status @@ -231,3 +231,14 @@ def get_chord_phenopacket_schema(_request): Chord phenopacket schema that can be shared with data providers. """ return Response(PHENOPACKET_SCHEMA) + + +@api_view(["GET"]) +@permission_classes([AllowAny]) +def get_chord_phenopacket_subschema(_request, subschema: str): + """ + get: + Chord phenopacket schema that can be shared with data providers. + """ + schema = phenopacket_resolver.lookup(ref=f"{phenopacket_base_uri}/{subschema}").contents + return Response(schema) diff --git a/chord_metadata_service/phenopackets/schemas.py b/chord_metadata_service/phenopackets/schemas.py index b35f90882..5c006669d 100644 --- a/chord_metadata_service/phenopackets/schemas.py +++ b/chord_metadata_service/phenopackets/schemas.py @@ -1,6 +1,7 @@ # Individual schemas for validation of JSONField values import json from pathlib import Path +from django.conf import settings from referencing import Registry, Resource from referencing.jsonschema import DRAFT7 from chord_metadata_service.patients.schemas import INDIVIDUAL_SCHEMA @@ -58,9 +59,14 @@ "VCF_RECORD_SCHEMA", "VRS_REF_REGISTRY", "VRS_VARIATION_SCHEMA", + "phenopacket_resolver", ] -base_uri = get_schema_app_id(Path(__file__).parent.name) +if settings.SCHEMAS_BASE_URL: + base_uri = settings.SCHEMAS_BASE_URL +else: + base_uri = get_schema_app_id(Path(__file__).parent.name) +phenopacket_base_uri = sub_schema_uri(base_uri=base_uri, name="phenopacket") with open("chord_metadata_service/vrs/schema/vrs.json", "r") as file: vrs_schema_definitions = json.load(file) @@ -70,7 +76,7 @@ PHENOPACKET_EXTERNAL_REFERENCE_SCHEMA = describe_schema({ "$schema": DRAFT_07, - "$id": sub_schema_uri(base_uri, "external_reference"), + "$id": sub_schema_uri(phenopacket_base_uri, "external_reference"), "title": "External reference schema", "type": "object", "properties": { @@ -82,7 +88,7 @@ PHENOPACKET_UPDATE_SCHEMA = describe_schema({ "$schema": DRAFT_07, - "$id": sub_schema_uri(base_uri, "update"), + "$id": sub_schema_uri(phenopacket_base_uri, "update"), "title": "Updates schema", "type": "object", "properties": { @@ -98,7 +104,7 @@ # noinspection PyProtectedMember PHENOPACKET_META_DATA_SCHEMA = describe_schema({ "$schema": DRAFT_07, - "$id": sub_schema_uri(base_uri, "meta_data"), + "$id": sub_schema_uri(phenopacket_base_uri, "meta_data"), "type": "object", "properties": { "created": DATE_TIME, @@ -114,7 +120,7 @@ PHENOPACKET_EVIDENCE_SCHEMA = describe_schema({ "$schema": DRAFT_07, - "$id": sub_schema_uri(base_uri, "evidence"), + "$id": sub_schema_uri(phenopacket_base_uri, "evidence"), "title": "Evidence schema", "type": "object", "properties": { @@ -128,7 +134,7 @@ PHENOPACKET_PHENOTYPIC_FEATURE_SCHEMA = describe_schema({ "$schema": DRAFT_07, - "$id": sub_schema_uri(base_uri, "phenotypic_feature"), + "$id": sub_schema_uri(phenopacket_base_uri, "phenotypic_feature"), "type": "object", "properties": { "description": base_type(SchemaTypes.STRING), @@ -146,7 +152,7 @@ # TODO: search PHENOPACKET_GENE_SCHEMA = describe_schema({ "$schema": DRAFT_07, - "$id": sub_schema_uri(base_uri, "gene"), + "$id": sub_schema_uri(phenopacket_base_uri, "gene"), "type": "object", "properties": { "id": base_type(SchemaTypes.STRING), @@ -159,7 +165,7 @@ PHENOPACKET_PROCEDURE_SCHEMA = describe_schema({ "$schema": DRAFT_07, - "$id": sub_schema_uri(base_uri, "procedure"), + "$id": sub_schema_uri(phenopacket_base_uri, "procedure"), "title": "Procedure schema", "type": "object", "properties": { @@ -172,7 +178,7 @@ REFERENCE_RANGE_SCHEMA = { "$schema": DRAFT_07, - "$id": sub_schema_uri(base_uri, "reference_range"), + "$id": sub_schema_uri(phenopacket_base_uri, "reference_range"), "title": "Reference range schema", "type": "object", "properties": { @@ -185,7 +191,7 @@ PHENOPACKET_QUANTITY_SCHEMA = { "$schema": DRAFT_07, - "$id": sub_schema_uri(base_uri, "quantity"), + "$id": sub_schema_uri(phenopacket_base_uri, "quantity"), "title": "Quantity schema", "type": "object", "properties": { @@ -198,7 +204,7 @@ PHENOPACKET_TYPED_QUANTITY_SCHEMA = { "$schema": DRAFT_07, - "$id": sub_schema_uri(base_uri, "typed_quantity"), + "$id": sub_schema_uri(phenopacket_base_uri, "typed_quantity"), "title": "Quantity schema", "type": "object", "properties": { @@ -210,7 +216,7 @@ PHENOPACKET_VALUE_SCHEMA = { "$schema": DRAFT_07, - "$id": sub_schema_uri(base_uri, "value"), + "$id": sub_schema_uri(phenopacket_base_uri, "value"), "title": "Value schema", "type": "object", "oneOf": [ @@ -221,7 +227,7 @@ PHENOPACKET_COMPLEX_VALUE_SCHEMA = { "$schema": DRAFT_07, - "$id": sub_schema_uri(base_uri, "complex_value"), + "$id": sub_schema_uri(phenopacket_base_uri, "complex_value"), "title": "Complex value schema", "type": "object", "properties": { @@ -232,7 +238,7 @@ PHENOPACKET_MEASUREMENT_VALUE_SCHEMA = { "$schema": DRAFT_07, - "$id": sub_schema_uri(base_uri, "measurement_value"), + "$id": sub_schema_uri(phenopacket_base_uri, "measurement_value"), "title": "Measurement value schema", "type": "object", "oneOf": [ @@ -243,7 +249,7 @@ PHENOPACKET_MEASUREMENT_SCHEMA = { "$schema": DRAFT_07, - "$id": sub_schema_uri(base_uri, "measurement"), + "$id": sub_schema_uri(phenopacket_base_uri, "measurement"), "title": "Measurement schema", "type": "object", "properties": { @@ -261,7 +267,7 @@ FILE_SCHEMA = describe_schema({ "$schema": DRAFT_07, - "$id": sub_schema_uri(base_uri, "file"), + "$id": sub_schema_uri(phenopacket_base_uri, "file"), "title": "Phenopacket file schema", "description": "The File message allows a Phenopacket to link the structured phenotypic data it " + "contains to external files which can be used to inform analyses.", @@ -276,7 +282,7 @@ # noinspection PyProtectedMember PHENOPACKET_BIOSAMPLE_SCHEMA = describe_schema({ "$schema": DRAFT_07, - "$id": sub_schema_uri(base_uri, "biosample"), + "$id": sub_schema_uri(phenopacket_base_uri, "biosample"), "type": "object", "properties": { "id": string_with_pattern(MODEL_ID_PATTERN), @@ -310,7 +316,7 @@ PHENOPACKET_DISEASE_ONSET_SCHEMA = { "$schema": DRAFT_07, - "$id": sub_schema_uri(base_uri, "disease_onset"), + "$id": sub_schema_uri(phenopacket_base_uri, "disease_onset"), "title": "Onset age", "description": "Schema for the age of the onset of the disease.", "type": "object", @@ -323,7 +329,7 @@ PHENOPACKET_DISEASE_SCHEMA = describe_schema({ "$schema": DRAFT_07, - "$id": sub_schema_uri(base_uri, "disease"), + "$id": sub_schema_uri(phenopacket_base_uri, "disease"), "title": "Disease schema", "type": "object", "properties": { @@ -341,7 +347,7 @@ DOSE_INTERVAL = { "$schema": DRAFT_07, - "$id": sub_schema_uri(base_uri, "dose_interval"), + "$id": sub_schema_uri(phenopacket_base_uri, "dose_interval"), "title": "Phenopacket dose interval for treatment", "description": "Represents the dose intervals of a treatment.", "type": "object", @@ -355,7 +361,7 @@ PHENOPACKET_TREATMENT = describe_schema({ "$schema": DRAFT_07, - "$id": sub_schema_uri(base_uri, "treatment"), + "$id": sub_schema_uri(phenopacket_base_uri, "treatment"), "title": "Phenopacket treatment", "description": "Represents a treatment with an agent, such as a drug.", "type": "object", @@ -376,7 +382,7 @@ PHENOPACKET_RADIATION_THERAPY = describe_schema({ "$schema": DRAFT_07, - "$id": sub_schema_uri(base_uri, "radiation_therapy"), + "$id": sub_schema_uri(phenopacket_base_uri, "radiation_therapy"), "title": "Phenopacket radiation therapy", "description": "Radiation therapy (or radiotherapy) uses ionizing radiation, generally as part of cancer " "treatment to control or kill malignant cells.", @@ -392,7 +398,7 @@ PHENOPACKET_THERAPEUTIC_REGIMEN = describe_schema({ "$schema": DRAFT_07, - "$id": sub_schema_uri(base_uri, "therapeutic_regimen"), + "$id": sub_schema_uri(phenopacket_base_uri, "therapeutic_regimen"), "title": "Phenopacket therapeutic regimen", "description": "This element represents a therapeutic regimen which will involve a specified set of treatments " "for a particular condition.", @@ -411,7 +417,7 @@ ONE_OF_MEDICAL_ACTION = describe_schema({ "$schema": DRAFT_07, - "$id": sub_schema_uri(base_uri, "one_of_medical_actions"), + "$id": sub_schema_uri(phenopacket_base_uri, "one_of_medical_actions"), "title": "Supported Phenopacket medical actions", "description": "One-of schema for supported medical action schemas", "type": "object", @@ -425,7 +431,7 @@ PHENOPACKET_MEDICAL_ACTION_SCHEMA = describe_schema({ "$schema": DRAFT_07, - "$id": sub_schema_uri(base_uri, "medical_action"), + "$id": sub_schema_uri(phenopacket_base_uri, "medical_action"), "title": "Phenopacket medical action schema", "description": "Describes a medical action", "type": "object", @@ -447,7 +453,7 @@ GENE_DESCRIPTOR = describe_schema({ "$schema": DRAFT_07, - "$id": sub_schema_uri(base_uri, "gene_descriptor"), + "$id": sub_schema_uri(phenopacket_base_uri, "gene_descriptor"), "title": "Gene descriptor schema", "description": "Schema used to describe genes", "type": "object", @@ -466,7 +472,7 @@ EXPRESSION_SCHEMA = describe_schema({ "$schema": DRAFT_07, - "$id": sub_schema_uri(base_uri, "expression"), + "$id": sub_schema_uri(phenopacket_base_uri, "expression"), "title": "Expression schema", "description": "Enables description of an object based on a nomenclature", "type": "object", @@ -481,7 +487,7 @@ EXTENSION_SCHEMA = describe_schema({ "$schema": DRAFT_07, - "$id": sub_schema_uri(base_uri, "extension"), + "$id": sub_schema_uri(phenopacket_base_uri, "extension"), "title": "Extension schema", "description": "The Extension class provides a means to extend descriptions with other attributes unique to a " "content provider", @@ -495,7 +501,7 @@ VCF_RECORD_SCHEMA = describe_schema({ "$schema": DRAFT_07, - "$id": sub_schema_uri(base_uri, "vcf_record"), + "$id": sub_schema_uri(phenopacket_base_uri, "vcf_record"), "title": "VCF record schema", "description": "This element is used to describe variants using the Variant Call Format.", "type": "object", @@ -516,38 +522,38 @@ VARIATION_ONE_OF_SCHEMA = describe_schema({ "$schema": DRAFT_07, - "$id": sub_schema_uri(base_uri, "variation"), + "$id": sub_schema_uri(phenopacket_base_uri, "variation"), "title": "VRS Variation object", "description": "Provides a computable representation of variation", "type": "object", "oneOf": [ named_one_of("absolute_copy_numer", { - "$ref": sub_schema_uri(base_uri, "vrs#/definitions/AbsoluteCopyNumber") + "$ref": sub_schema_uri(phenopacket_base_uri, "vrs#/definitions/AbsoluteCopyNumber") }), named_one_of("allele", { - "$ref": sub_schema_uri(base_uri, "vrs#/definitions/Allele") + "$ref": sub_schema_uri(phenopacket_base_uri, "vrs#/definitions/Allele") }), named_one_of("genotype", { - "$ref": sub_schema_uri(base_uri, "#/definitions/Genotype") + "$ref": sub_schema_uri(phenopacket_base_uri, "#/definitions/Genotype") }), named_one_of("haplotype", { - "$ref": sub_schema_uri(base_uri, "#/definitions/Haplotype") + "$ref": sub_schema_uri(phenopacket_base_uri, "#/definitions/Haplotype") }), named_one_of("relative_copy_number", { - "$ref": sub_schema_uri(base_uri, "#/definitions/RelativeCopyNumber") + "$ref": sub_schema_uri(phenopacket_base_uri, "#/definitions/RelativeCopyNumber") }), named_one_of("text", { - "$ref": sub_schema_uri(base_uri, "#/definitions/Text") + "$ref": sub_schema_uri(phenopacket_base_uri, "#/definitions/Text") }), named_one_of("variation_set", { - "$ref": sub_schema_uri(base_uri, "#/definitions/VariationSet") + "$ref": sub_schema_uri(phenopacket_base_uri, "#/definitions/VariationSet") }), ] }, {}) VARIANT_DESCRIPTOR = describe_schema({ "$schema": DRAFT_07, - "$id": sub_schema_uri(base_uri, "variant_descriptor"), + "$id": sub_schema_uri(phenopacket_base_uri, "variant_descriptor"), "title": "Variant descriptor schema", "description": "Schema used to describe variants", "type": "object", @@ -573,7 +579,7 @@ PHENOPACKET_VARIANT_INTERPRETATION = describe_schema({ "$schema": DRAFT_07, - "$id": sub_schema_uri(base_uri, "variant_interpretation"), + "$id": sub_schema_uri(phenopacket_base_uri, "variant_interpretation"), "title": "Phenopacket variant interpretation schema", "description": "This element represents the interpretation of a variant according to the American College of " "Medical Genetics (ACMG) guidelines.", @@ -590,7 +596,7 @@ PHENOPACKET_GENOMIC_INTERPRETATION = describe_schema({ "$schema": DRAFT_07, - "$id": sub_schema_uri(base_uri, "genomic_interpretation"), + "$id": sub_schema_uri(phenopacket_base_uri, "genomic_interpretation"), "title": "Phenopacket genomic interpretation schema", "description": "Describes the interpretation for an individual variant or gene", "type": "object", @@ -608,7 +614,7 @@ PHENOPACKET_DIAGNOSIS_SCHEMA = describe_schema({ "$schema": DRAFT_07, - "$id": sub_schema_uri(base_uri, "diagnosis"), + "$id": sub_schema_uri(phenopacket_base_uri, "diagnosis"), "title": "Phenopacket diagnosis schema", "description": "Refers to a disease and its genomic interpretations", "type": "object", @@ -621,7 +627,7 @@ PHENOPACKET_INTERPRETATION_SCHEMA = describe_schema({ "$schema": DRAFT_07, - "$id": sub_schema_uri(base_uri, "interpretation"), + "$id": sub_schema_uri(phenopacket_base_uri, "interpretation"), "title": "Phenopacket interpretation schema", "description": "This message intends to represent the interpretation of a genomic analysis, such as the report " "from a diagnostic laboratory.", @@ -644,7 +650,7 @@ PHENOPACKET_SCHEMA = describe_schema({ "$schema": DRAFT_07, - "$id": sub_schema_uri(base_uri, "phenopacket"), + "$id": phenopacket_base_uri, "title": "Phenopacket schema", "description": "Schema for metadata service datasets", "type": "object", @@ -664,6 +670,12 @@ "required": ["id", "meta_data"], }, descriptions.PHENOPACKET) +# Phenopacket schema URI referencing +phenopacket_resource = Resource.from_contents(PHENOPACKET_SCHEMA) +phenopacket_registry = Registry().with_resource(uri=phenopacket_base_uri, resource=phenopacket_resource) +phenopacket_resolver = phenopacket_registry.resolver(base_uri=phenopacket_base_uri) + +# VRS referencing schema resolving VRS_REF_RESOURCE = Resource.from_contents(contents=vrs_schema_definitions, default_specification=DRAFT7) VRS_REF_REGISTRY = VRS_REF_RESOURCE @ Registry() diff --git a/chord_metadata_service/restapi/api_views.py b/chord_metadata_service/restapi/api_views.py index 4b55ba0fe..9f5b2afa2 100644 --- a/chord_metadata_service/restapi/api_views.py +++ b/chord_metadata_service/restapi/api_views.py @@ -1,6 +1,4 @@ import asyncio -import json -import os from adrf.decorators import api_view from django.db.models import QuerySet @@ -18,7 +16,6 @@ from chord_metadata_service.metadata.service_info import get_service_info from chord_metadata_service.phenopackets import models as pheno_models from chord_metadata_service.phenopackets.summaries import dt_phenopacket_summary -from chord_metadata_service.restapi.dats_schemas import DATS_PATH from chord_metadata_service.restapi.models import SchemaType @@ -108,14 +105,3 @@ async def search_overview(request: DrfRequest): # hack-y mess of passing IDs around. return await build_overview_response(phenopackets, experiments) - - -@api_view(["GET"]) -async def dats_schema(request: DrfRequest): - dats_file_path = os.path.join(DATS_PATH, 'dataset_schema.json') - dats_schema = None - with open(dats_file_path) as dats_file: - dats_schema: dict = json.loads(dats_file.read()) - if schema_id := dats_schema.pop("id"): - dats_schema["$id"] = schema_id - return Response(dats_schema) diff --git a/chord_metadata_service/restapi/schemas.py b/chord_metadata_service/restapi/schemas.py index afbb1d1fc..a5b69690e 100644 --- a/chord_metadata_service/restapi/schemas.py +++ b/chord_metadata_service/restapi/schemas.py @@ -1,4 +1,6 @@ from pathlib import Path + +from django.conf import settings from . import descriptions from .description_utils import EXTRA_PROPERTIES, ONTOLOGY_CLASS as ONTOLOGY_CLASS_DESC from .schema_utils import DATE_TIME, DRAFT_07, SchemaTypes, base_type, tag_ids_and_describe, \ @@ -25,11 +27,15 @@ # ======================== Phenopackets based schemas ========================= -base_uri = get_schema_app_id(Path(__file__).parent.name) +if settings.SCHEMAS_BASE_URL: + base_uri = settings.SCHEMAS_BASE_URL +else: + base_uri = get_schema_app_id(Path(__file__).parent.name) +phenopacket_base_uri = sub_schema_uri(base_uri=base_uri, name="phenopacket") ONTOLOGY_CLASS = tag_ids_and_describe({ "$schema": DRAFT_07, - "$id": sub_schema_uri(base_uri, "ontology_class"), + "$id": sub_schema_uri(phenopacket_base_uri, "ontology_class"), "title": "Ontology class schema", "type": "object", "properties": { @@ -42,7 +48,7 @@ ONTOLOGY_CLASS_LIST = { "$schema": DRAFT_07, - "$id": sub_schema_uri(base_uri, "ontology_class_list"), + "$id": sub_schema_uri(phenopacket_base_uri, "ontology_class_list"), "title": "Ontology class list", "description": "Ontology class list", "type": "array", @@ -51,7 +57,7 @@ CURIE_SCHEMA = { "$schema": DRAFT_07, - "$id": sub_schema_uri(base_uri, "curie"), + "$id": sub_schema_uri(phenopacket_base_uri, "curie"), "title": "Curie style string", "description": ("A [W3C Compact URI](https://www.w3.org/TR/curie/) formatted string. " "A CURIE string has the structure ``prefix``:``reference``, as defined by the W3C syntax."), @@ -62,7 +68,7 @@ KEY_VALUE_OBJECT = { "$schema": DRAFT_07, - "$id": sub_schema_uri(base_uri, "key_value_object"), + "$id": sub_schema_uri(phenopacket_base_uri, "key_value_object"), "title": "Key-value object", "description": "The schema represents a key-value object.", "type": "object", @@ -74,20 +80,20 @@ EXTRA_PROPERTIES_SCHEMA = tag_ids_and_describe({ "$schema": DRAFT_07, - "$id": sub_schema_uri(base_uri, "extra_properties"), + "$id": sub_schema_uri(phenopacket_base_uri, "extra_properties"), "type": "object" }, EXTRA_PROPERTIES) AGE_STRING = tag_ids_and_describe({ "$schema": DRAFT_07, - "$id": sub_schema_uri(base_uri, "age_string"), + "$id": sub_schema_uri(phenopacket_base_uri, "age_string"), "type": "string" }, descriptions.AGE) AGE = tag_ids_and_describe({ "$schema": DRAFT_07, - "$id": sub_schema_uri(base_uri, "age"), + "$id": sub_schema_uri(phenopacket_base_uri, "age"), "title": "Age schema", "type": "object", "properties": { @@ -100,7 +106,7 @@ AGE_RANGE = tag_ids_and_describe({ "$schema": DRAFT_07, - "$id": sub_schema_uri(base_uri, "age_range"), + "$id": sub_schema_uri(phenopacket_base_uri, "age_range"), "title": "Age range schema", "type": "object", "properties": { @@ -114,7 +120,7 @@ AGE_OR_AGE_RANGE = { "$schema": DRAFT_07, - "$id": sub_schema_uri(base_uri, "age_or_age_range"), + "$id": sub_schema_uri(phenopacket_base_uri, "age_or_age_range"), "title": "Age schema", "description": "An age object describing the age of the individual at the time of collection of biospecimens or " "phenotypic observations.", @@ -128,7 +134,7 @@ TIME_INTERVAL = { "$schema": DRAFT_07, - "$id": sub_schema_uri(base_uri, "time_interval"), + "$id": sub_schema_uri(phenopacket_base_uri, "time_interval"), "title": "Age schema", "description": "An age object describing the age of the individual at the time of collection of biospecimens or " "phenotypic observations.", @@ -144,7 +150,7 @@ DISEASE_ONSET = { "$schema": DRAFT_07, - "$id": sub_schema_uri(base_uri, "disease_onset"), + "$id": sub_schema_uri(phenopacket_base_uri, "disease_onset"), "title": "Onset age", "description": "Schema for the age of the onset of the disease.", "type": "object", @@ -158,7 +164,7 @@ GESTATIONAL_AGE = tag_ids_and_describe({ "$schema": DRAFT_07, - "$id": sub_schema_uri(base_uri, "gestational_age"), + "$id": sub_schema_uri(phenopacket_base_uri, "gestational_age"), "title": "Gestational age schema", "type": "object", "properties": { @@ -171,7 +177,7 @@ TIME_ELEMENT_SCHEMA = tag_ids_and_describe({ "$schema": DRAFT_07, - "$id": sub_schema_uri(base_uri, "time_element"), + "$id": sub_schema_uri(phenopacket_base_uri, "time_element"), "title": "Time element schema", "type": "object", "oneOf": [ diff --git a/chord_metadata_service/restapi/urls.py b/chord_metadata_service/restapi/urls.py index 10e79b0d7..032cac947 100644 --- a/chord_metadata_service/restapi/urls.py +++ b/chord_metadata_service/restapi/urls.py @@ -17,7 +17,7 @@ BiosampleSampledTissueAutocomplete ) from chord_metadata_service.resources import api_views as resources_views -from chord_metadata_service.restapi.api_views import dats_schema, overview, search_overview, extra_properties_schema_types +from chord_metadata_service.restapi.api_views import overview, search_overview, extra_properties_schema_types from chord_metadata_service.restapi.routers import BatchListRouter __all__ = ["router", "batch_router", "urlpatterns"] @@ -61,12 +61,14 @@ path('', include(batch_router.urls)), # apps schemas - path('chord_phenopacket_schema', phenopacket_views.get_chord_phenopacket_schema, + path('schemas/phenopacket', phenopacket_views.get_chord_phenopacket_schema, name="chord-phenopacket-schema"), - path('experiment_schema', experiment_views.get_experiment_schema, + path('schemas/phenopacket/', phenopacket_views.get_chord_phenopacket_subschema, + name="chord-phenopacket-subschema"), + path('schemas/experiment', experiment_views.get_experiment_schema, name="experiment-schema"), - path('discovery_schema', discovery_schema, name="discovery-schema"), - path('dats_schema', dats_schema, name="dats-schema"), + path('schemas/experiment/', experiment_views.get_experiment_subschema, name="experiment-subschema"), + path('schemas/discovery', discovery_schema, name="discovery-schema"), # extra properties schema types path('extra_properties_schema_types', extra_properties_schema_types, name="extra-properties-schema-types"), From 96f03e55ea25f328f413bc0f477b0fba5e35d25b Mon Sep 17 00:00:00 2001 From: v-rocheleau Date: Tue, 4 Jun 2024 10:53:46 -0400 Subject: [PATCH 14/38] fix settings schemas base url --- chord_metadata_service/metadata/settings.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/chord_metadata_service/metadata/settings.py b/chord_metadata_service/metadata/settings.py index 2c2d63407..7ade5b174 100644 --- a/chord_metadata_service/metadata/settings.py +++ b/chord_metadata_service/metadata/settings.py @@ -52,6 +52,8 @@ CHORD_URL = os.environ.get("CHORD_URL") # Leave None if not specified, for running in other contexts if CHORD_URL: SCHEMAS_BASE_URL = CHORD_URL + "/api/metadata/api/schemas" +else: + SCHEMAS_BASE_URL = None # SECURITY WARNING: Don't run with CHORD_PERMISSIONS turned off in production, # unless an alternative permissions system is in place. From 0e895eaea0268b0c7435b7a353c8cdfc79427596 Mon Sep 17 00:00:00 2001 From: Victor Rocheleau Date: Tue, 4 Jun 2024 18:50:39 +0000 Subject: [PATCH 15/38] prefetch related on scoped queries --- chord_metadata_service/discovery/api_views.py | 6 ++- .../discovery/fields_utils.py | 4 +- .../discovery/model_lookups.py | 52 +++++++++++++++---- chord_metadata_service/discovery/stats.py | 6 ++- 4 files changed, 52 insertions(+), 16 deletions(-) diff --git a/chord_metadata_service/discovery/api_views.py b/chord_metadata_service/discovery/api_views.py index 3f62ec72f..830426ac6 100644 --- a/chord_metadata_service/discovery/api_views.py +++ b/chord_metadata_service/discovery/api_views.py @@ -141,8 +141,10 @@ async def _counts_for_scoped_model_name(mn: str) -> tuple[str, int]: else: # cannot scope return await _counts_for_model_name(mn) - return mn, await PUBLIC_MODEL_NAMES_TO_MODEL[mn].objects.filter( - **{PUBLIC_MODEL_NAMES_TO_SCOPE_FILTERS[mn][scope]: value} + filter_query = PUBLIC_MODEL_NAMES_TO_SCOPE_FILTERS[mn][scope]["filter"] + prefetch = PUBLIC_MODEL_NAMES_TO_SCOPE_FILTERS[mn][scope]["prefetch_related"] + return mn, await PUBLIC_MODEL_NAMES_TO_MODEL[mn].objects.prefetch_related(*prefetch).filter( + **{filter_query: value} ).acount() # Predefined counts diff --git a/chord_metadata_service/discovery/fields_utils.py b/chord_metadata_service/discovery/fields_utils.py index 91387dabe..3ff4ca43d 100644 --- a/chord_metadata_service/discovery/fields_utils.py +++ b/chord_metadata_service/discovery/fields_utils.py @@ -1,7 +1,7 @@ from typing import Any, Iterator, Type from django.db.models import Q, Func, BooleanField, F, Value, Model, JSONField -from chord_metadata_service.discovery.model_lookups import PUBLIC_MODEL_NAMES_TO_MODEL +from chord_metadata_service.discovery.model_lookups import PUBLIC_MODEL_NAMES_TO_MODEL, PublicModelNames MAPPING_SEPARATOR = "/" JSON_PATH_ACCESSOR = "." @@ -48,7 +48,7 @@ def get_model_and_field(field_id: str) -> tuple[Type[Model], str]: return model, field_name -def get_public_model_name(model: Type[Model]) -> str: +def get_public_model_name(model: Type[Model]) -> PublicModelNames: model_name = [key for key, m in PUBLIC_MODEL_NAMES_TO_MODEL.items() if m == model] if len(model_name) != 1: raise ValueError(f"Provided model {model} is not available for public.") diff --git a/chord_metadata_service/discovery/model_lookups.py b/chord_metadata_service/discovery/model_lookups.py index 196394aaa..dc3e5dfe8 100644 --- a/chord_metadata_service/discovery/model_lookups.py +++ b/chord_metadata_service/discovery/model_lookups.py @@ -1,5 +1,5 @@ from django.db.models import Model -from typing import Type +from typing import Literal, Type, TypedDict from chord_metadata_service.chord.data_types import DATA_TYPE_PHENOPACKET, DATA_TYPE_EXPERIMENT from chord_metadata_service.experiments import models as exp_models @@ -8,29 +8,61 @@ __all__ = ["PUBLIC_MODEL_NAMES_TO_MODEL", "PUBLIC_MODEL_NAMES_TO_DATA_TYPE"] -PUBLIC_MODEL_NAMES_TO_MODEL: dict[str, Type[Model]] = { +PublicModelNames = Literal["individual", "biosample", "experiment"] + +PUBLIC_MODEL_NAMES_TO_MODEL: dict[PublicModelNames, Type[Model]] = { "individual": patient_models.Individual, "biosample": pheno_models.Biosample, "experiment": exp_models.Experiment, } -PUBLIC_MODEL_NAMES_TO_DATA_TYPE = { +PUBLIC_MODEL_NAMES_TO_DATA_TYPE: dict[PublicModelNames, str] = { "individual": DATA_TYPE_PHENOPACKET, "biosample": DATA_TYPE_PHENOPACKET, "experiment": DATA_TYPE_EXPERIMENT, } -PUBLIC_MODEL_NAMES_TO_SCOPE_FILTERS = { + +class ScopeFilter(TypedDict): + filter: str + prefetch_related: tuple[str] + select_related: tuple[str] + + +class ProjectDatasetScopeFilters(TypedDict): + project: ScopeFilter + dataset: ScopeFilter + + +PUBLIC_MODEL_NAMES_TO_SCOPE_FILTERS: dict[PublicModelNames, ProjectDatasetScopeFilters] = { "individual": { - "project": "phenopackets__dataset__project__identifier", - "dataset": "phenopackets__dataset__identifier", + "project": { + "filter": "phenopackets__dataset__project__identifier", + "prefetch_related": ("phenopackets__dataset__project") + }, + "dataset": { + "filter": "phenopackets__dataset__identifier", + "prefetch_related": ("phenopackets__dataset") + }, }, "biosample": { - "project": "phenopacket__dataset__project__identifier", - "dataset": "phenopacket__dataset__identifier", + "project": { + "filter": "phenopacket__dataset__project__identifier", + "prefetch_related": ("phenopacket__dataset__project"), + }, + "dataset": { + "filter": "phenopacket__dataset__identifier", + "prefetch_related": ("phenopacket__dataset"), + }, }, "experiment": { - "project": "dataset__project__identifier", - "dataset": "dataset__identifier", + "project": { + "filter": "dataset__project__identifier", + "prefetch_related": ("dataset__project"), + }, + "dataset": { + "filter": "dataset__identifier", + "prefetch_related": ("dataset"), + }, }, } diff --git a/chord_metadata_service/discovery/stats.py b/chord_metadata_service/discovery/stats.py index 01d26de9a..431d3caad 100644 --- a/chord_metadata_service/discovery/stats.py +++ b/chord_metadata_service/discovery/stats.py @@ -77,7 +77,7 @@ def get_scoped_queryset( model: Type[Model], project_id: str | None = None, dataset_id: str | None = None, -): +) -> QuerySet: if project_id and not dataset_id: scope = "project" value = project_id @@ -87,7 +87,9 @@ def get_scoped_queryset( else: return model.objects.all() model_name = get_public_model_name(model) - return model.objects.filter(**{PUBLIC_MODEL_NAMES_TO_SCOPE_FILTERS[model_name][scope]: value}) + prefetch = PUBLIC_MODEL_NAMES_TO_SCOPE_FILTERS[model_name][scope]["prefetch_related"] + filter_query = PUBLIC_MODEL_NAMES_TO_SCOPE_FILTERS[model_name][scope]["filter"] + return model.objects.prefetch_related(*prefetch).filter(**{filter_query: value}) async def stats_for_field( From ce033fa320172f83274c4114dabb47e1bbc24305 Mon Sep 17 00:00:00 2001 From: Victor Rocheleau Date: Wed, 5 Jun 2024 17:44:37 +0000 Subject: [PATCH 16/38] tests for scoped config fields endpoint --- chord_metadata_service/discovery/api_views.py | 16 +- .../discovery/tests/constants.py | 157 +++++++++--------- .../discovery/tests/test_api.py | 89 ++++++++-- .../discovery/tests/test_censorship.py | 8 +- .../discovery/tests/test_fields.py | 16 +- .../discovery/tests/test_fields_utils.py | 4 +- .../patients/tests/test_api.py | 56 ++++--- chord_metadata_service/restapi/urls.py | 2 +- 8 files changed, 210 insertions(+), 138 deletions(-) diff --git a/chord_metadata_service/discovery/api_views.py b/chord_metadata_service/discovery/api_views.py index 830426ac6..deb32e324 100644 --- a/chord_metadata_service/discovery/api_views.py +++ b/chord_metadata_service/discovery/api_views.py @@ -19,7 +19,7 @@ from .schemas import DISCOVERY_SCHEMA -async def get_project_discovery(project_id: str = None, project: cm.Project = None): +async def _get_project_discovery(project_id: str = None, project: cm.Project = None): if not project and project_id: project = await cm.Project.objects.aget(identifier=project_id) if not project.discovery: @@ -27,23 +27,23 @@ async def get_project_discovery(project_id: str = None, project: cm.Project = No return project.discovery -async def get_dataset_discovery(dataset_id: str): +async def _get_dataset_discovery(dataset_id: str): dataset = await cm.Dataset.objects.aget(identifier=dataset_id) if not dataset.discovery: project = await cm.Project.objects.aget(datasets=dataset_id) - return await get_project_discovery(project=project) + return await _get_project_discovery(project=project) return dataset.discovery -async def get_discovery(request: DrfRequest): +async def _get_discovery(request: DrfRequest): dataset_id = request.query_params.get("dataset") project_id = request.query_params.get("project") if dataset_id: # get dataset's discovery config if dataset_id is passed - return await get_dataset_discovery(dataset_id) + return await _get_dataset_discovery(dataset_id) if project_id and not dataset_id: # get project's discovery config if project_id is passed and dataset_id is not - return await get_project_discovery(project_id) + return await _get_project_discovery(project_id) # fallback to config.json when no dataset or project is in the request return settings.CONFIG_PUBLIC @@ -69,7 +69,7 @@ async def public_search_fields(request: DrfRequest): Return public search fields with their configuration """ - config_public = await get_discovery(request) + config_public = await _get_discovery(request) if not config_public: return Response(dres.NO_PUBLIC_FIELDS_CONFIGURED, status=status.HTTP_404_NOT_FOUND) @@ -124,7 +124,7 @@ async def public_overview(request: DrfRequest): Overview of all public data in the database """ - config_public = await get_discovery(request) + config_public = await _get_discovery(request) dataset_id = request.query_params.get("dataset", None) project_id = request.query_params.get("project", None) diff --git a/chord_metadata_service/discovery/tests/constants.py b/chord_metadata_service/discovery/tests/constants.py index 369f9c8ff..975fa74fa 100644 --- a/chord_metadata_service/discovery/tests/constants.py +++ b/chord_metadata_service/discovery/tests/constants.py @@ -1,6 +1,6 @@ from copy import deepcopy -CONFIG_PUBLIC_TEST = { +DISCOVERY_CONFIG_TEST = { "overview": [ { "section_title": "First Section", @@ -11,20 +11,6 @@ }, { "section_title": "Second Section", - "charts": [ - {"field": "date_of_consent", "chart_type": "bar"}, - {"field": "smoking", "chart_type": "bar"}, - {"field": "baseline_creatinine", "chart_type": "bar"}, - ] - }, - { - "section_title": "Third Section", - "charts": [ - {"field": "lab_test_result_value", "chart_type": "bar"}, - ] - }, - { - "section_title": "Fourth Section", "charts": [ {"field": "diagnostic_markers", "chart_type": "pie"}, {"field": "measurement_tumor_length", "chart_type": "bar"} @@ -34,11 +20,7 @@ "search": [ { "section_title": "First Section", - "fields": [ - "sex", "age", "smoking", "covidstatus", "death_dc", - "lab_test_result_value", "baseline_creatinine", "date_of_consent", - "tissues" - ] + "fields": ["sex", "age", "tissues"] } ], "fields": { @@ -65,7 +47,53 @@ "maximum": 100 } }, - "smoking": { + "tissues": { + "mapping": "biosample/sampled_tissue/label", + "mapping_for_search_filter": "individual/biosamples/sampled_tissue/label", + "title": "Tissue", + "description": "Tissue from which the biosample was extracted", + "datatype": "string", + "config": { + "enum": None + } + }, + "diagnostic_markers": { + "mapping": "individual/phenopackets/biosamples/diagnostic_markers", + "group_by": "label", + "title": "Diagnostic Markers", + "description": "Markers used for diagnosis", + "datatype": "string", + "config": { + "enum": None + } + }, + "measurement_tumor_length": { + "mapping": "individual/phenopackets/measurements", + "group_by": "assay/id", + "group_by_value": "NCIT:C200479", + "value_mapping": "value/quantity/value", + "title": "Tumor lengths", + "description": "measured tumor lengths in millimeters", + "datatype": "number", + "config": { + "minimum": 0, + "maximum": 200, + "bin_size": 20, + "taper_left": 0, + "taper_right": 200, + "units": "mm" + } + }, + }, + "rules": { + "count_threshold": 5, + "max_query_parameters": 2 + } +} + +DISCOVERY_CONFIG_EXTRA_PROPERTIES = deepcopy(DISCOVERY_CONFIG_TEST) +DISCOVERY_CONFIG_EXTRA_PROPERTIES["fields"].update([ + ("smoking", { "mapping": "individual/extra_properties/smoking", "title": "Smoking", "description": "Smoking exposure", @@ -79,8 +107,8 @@ "Not specified" ] } - }, - "covidstatus": { + }), + ("covidstatus", { "mapping": "individual/extra_properties/covidstatus", "title": "Covid status", "description": "Covid status", @@ -92,8 +120,8 @@ "Indeterminate" ] } - }, - "death_dc": { + }), + ("death_dc", { "mapping": "individual/extra_properties/death_dc", "title": "Death", "description": "Death status", @@ -104,8 +132,8 @@ "Deceased" ] } - }, - "lab_test_result_value": { + }), + ("lab_test_result_value", { "mapping": "individual/extra_properties/lab_test_result_value", "title": "Lab Test Result", "description": "This acts as a placeholder for numeric values", @@ -115,8 +143,8 @@ "minimum": 0, "units": "mg/L" } - }, - "baseline_creatinine": { + }), + ("baseline_creatinine", { "mapping": "individual/extra_properties/baseline_creatinine", "title": "Creatinine", "description": "Baseline Creatinine", @@ -129,8 +157,8 @@ "maximum": 600, "units": "mg/L" } - }, - "date_of_consent": { + }), + ("date_of_consent", { "mapping": "individual/extra_properties/date_of_consent", "title": "Verbal consent date", "description": "Date of initial verbal consent(participant, legal representative or tutor), yyyy-mm-dd", @@ -138,55 +166,32 @@ "config": { "bin_by": "month" } - }, - "tissues": { - "mapping": "biosample/sampled_tissue/label", - "mapping_for_search_filter": "individual/biosamples/sampled_tissue/label", - "title": "Tissue", - "description": "Tissue from which the biosample was extracted", - "datatype": "string", - "config": { - "enum": None - } - }, - "diagnostic_markers": { - "mapping": "individual/phenopackets/biosamples/diagnostic_markers", - "group_by": "label", - "title": "Diagnostic Markers", - "description": "Markers used for diagnosis", - "datatype": "string", - "config": { - "enum": None - } - }, - "measurement_tumor_length": { - "mapping": "individual/phenopackets/measurements", - "group_by": "assay/id", - "group_by_value": "NCIT:C200479", - "value_mapping": "value/quantity/value", - "title": "Tumor lengths", - "description": "measured tumor lengths in millimeters", - "datatype": "number", - "config": { - "minimum": 0, - "maximum": 200, - "bin_size": 20, - "taper_left": 0, - "taper_right": 200, - "units": "mm" - } - }, + }) +]) +DISCOVERY_CONFIG_EXTRA_PROPERTIES["search"][0]["fields"].extend( + ["smoking", "covidstatus", "death_dc", "lab_test_result_value", "baseline_creatinine", "date_of_consent"] +) +DISCOVERY_CONFIG_EXTRA_PROPERTIES["overview"].extend([ + { + "section_title": "Dataset individual exta properties specific Section", + "charts": [ + {"field": "date_of_consent", "chart_type": "bar"}, + {"field": "smoking", "chart_type": "bar"}, + {"field": "baseline_creatinine", "chart_type": "bar"}, + ] }, - "rules": { - "count_threshold": 5, - "max_query_parameters": 2 - } -} + { + "section_title": "Lab test results section", + "charts": [ + {"field": "lab_test_result_value", "chart_type": "bar"}, + ] + }, +]) -CONFIG_PUBLIC_TEST_SEARCH_SEX_ONLY = deepcopy(CONFIG_PUBLIC_TEST) +CONFIG_PUBLIC_TEST_SEARCH_SEX_ONLY = deepcopy(DISCOVERY_CONFIG_TEST) CONFIG_PUBLIC_TEST_SEARCH_SEX_ONLY["search"][0]["fields"] = ["sex"] -CONFIG_PUBLIC_TEST_SEARCH_UNSET_FIELDS = deepcopy(CONFIG_PUBLIC_TEST) +CONFIG_PUBLIC_TEST_SEARCH_UNSET_FIELDS = deepcopy(DISCOVERY_CONFIG_TEST) CONFIG_PUBLIC_TEST_SEARCH_UNSET_FIELDS["fields"].update([ ("unset_date", { diff --git a/chord_metadata_service/discovery/tests/test_api.py b/chord_metadata_service/discovery/tests/test_api.py index b7332fc2a..32b223a6e 100644 --- a/chord_metadata_service/discovery/tests/test_api.py +++ b/chord_metadata_service/discovery/tests/test_api.py @@ -3,6 +3,7 @@ from copy import deepcopy from django.conf import settings +from django.http import HttpResponse from django.urls import reverse from django.test import override_settings from rest_framework import status @@ -21,18 +22,51 @@ INDIVIDUALS_NOT_ACCEPTED_DATA_TYPES_LIST, INDIVIDUALS_NOT_ACCEPTED_DATA_TYPES_DICT ) -from .constants import CONFIG_PUBLIC_TEST, CONFIG_PUBLIC_TEST_SEARCH_UNSET_FIELDS +from .constants import ( + CONFIG_PUBLIC_TEST_SEARCH_SEX_ONLY, + DISCOVERY_CONFIG_TEST, + CONFIG_PUBLIC_TEST_SEARCH_UNSET_FIELDS, + DISCOVERY_CONFIG_EXTRA_PROPERTIES +) class PublicSearchFieldsTest(APITestCase): def setUp(self) -> None: + # create 2 projects, only one with a dataset and data + self.project_a = ch_m.Project.objects.create( + title="Test project A", + description="test description", + # Should use node's discovery config + discovery={}, + ) + self.dataset_a = ch_m.Dataset.objects.create( + title="Dataset 1", + description="Test dataset", + contact_info="Test contact info", + types=["test type 1", "test type 2"], + privacy="Open", + keywords=["test keyword 1", "test keyword 2"], + data_use=ch_c.VALID_DATA_USE_1, + project=self.project_a, + dats_file={}, + # Should use provided discovery config + discovery=DISCOVERY_CONFIG_EXTRA_PROPERTIES, + ) + + self.project_b = ch_m.Project.objects.create( + title="Test project B", + description="test description", + # Should use provided discovery config + discovery=CONFIG_PUBLIC_TEST_SEARCH_SEX_ONLY, + ) # create 2 phenopackets for 2 individuals; each individual has 1 biosample; # one of phenopackets has 1 phenotypic feature and 1 disease self.individual_1 = ph_m.Individual.objects.create(**ph_c.VALID_INDIVIDUAL_1) self.metadata_1 = ph_m.MetaData.objects.create(**ph_c.VALID_META_DATA_1) self.phenopacket_1 = ph_m.Phenopacket.objects.create( - **ph_c.valid_phenopacket(subject=self.individual_1, meta_data=self.metadata_1) + **ph_c.valid_phenopacket(subject=self.individual_1, meta_data=self.metadata_1), + dataset=self.dataset_a ) self.disease = ph_m.Disease.objects.create(**ph_c.VALID_DISEASE_1) self.biosample_1 = ph_m.Biosample.objects.create(**ph_c.valid_biosample_1(self.individual_1)) @@ -48,15 +82,44 @@ def setUp(self) -> None: self.experiment_result = exp_m.ExperimentResult.objects.create(**exp_c.valid_experiment_result()) self.experiment.experiment_results.set([self.experiment_result]) - @override_settings(CONFIG_PUBLIC=CONFIG_PUBLIC_TEST) - def test_public_search_fields_configured(self): - response = self.client.get(reverse("public-search-fields"), content_type="application/json") - self.assertEqual(response.status_code, status.HTTP_200_OK) + def assert_response_section_fields(self, response: HttpResponse, config: dict): response_obj = response.json() self.assertSetEqual( set(field["id"] for section in response_obj["sections"] for field in section["fields"]), - set(field for section in settings.CONFIG_PUBLIC["search"] for field in section["fields"]) + set(field for section in config["search"] for field in section["fields"]) + ) + + @override_settings(CONFIG_PUBLIC=DISCOVERY_CONFIG_TEST) + def test_public_search_fields_configured(self): + search_fields_url = reverse("public-search-fields") + # SCOPE: whole node + response = self.client.get(search_fields_url, content_type="application/json") + self.assertEqual(response.status_code, status.HTTP_200_OK) + self.assert_response_section_fields(response, settings.CONFIG_PUBLIC) + + # SCOPE: project_a (same discovery as whole node) + response_p_a = self.client.get( + f"{search_fields_url}?project={str(self.project_a.identifier)}", + content_type="application/json", + ) + self.assertEqual(response_p_a.status_code, status.HTTP_200_OK) + self.assert_response_section_fields(response_p_a, settings.CONFIG_PUBLIC) + + # SCOPE: project_b (discovery search sex only) + response_p_b = self.client.get( + f"{search_fields_url}?project={str(self.project_b.identifier)}", + content_type="application/json", + ) + self.assertEqual(response_p_b.status_code, status.HTTP_200_OK) + self.assert_response_section_fields(response_p_b, CONFIG_PUBLIC_TEST_SEARCH_SEX_ONLY) + + # SCOPE: dataset_a (discovery with dataset specific extra_properties) + response_d_a = self.client.get( + f"{search_fields_url}?dataset={str(self.dataset_a.identifier)}", + content_type="application/json", ) + self.assertEqual(response_d_a.status_code, status.HTTP_200_OK) + self.assert_response_section_fields(response_d_a, DISCOVERY_CONFIG_EXTRA_PROPERTIES) @override_settings(CONFIG_PUBLIC={}) def test_public_search_fields_not_configured(self): @@ -101,7 +164,7 @@ def setUp(self) -> None: experiment_2["id"] = "experiment:2" self.experiment = exp_m.Experiment.objects.create(**experiment_2) - @override_settings(CONFIG_PUBLIC=CONFIG_PUBLIC_TEST) + @override_settings(CONFIG_PUBLIC=DISCOVERY_CONFIG_TEST) def test_overview(self): response = self.client.get('/api/public_overview') response_obj = response.json() @@ -110,7 +173,7 @@ def test_overview(self): self.assertIsInstance(response_obj, dict) self.assertEqual(response_obj["counts"]["individuals"], db_count) - @override_settings(CONFIG_PUBLIC=CONFIG_PUBLIC_TEST) + @override_settings(CONFIG_PUBLIC=DISCOVERY_CONFIG_EXTRA_PROPERTIES) def test_overview_bins(self): # test that there is the correct number of data entries for number # histograms, vs. number of bins @@ -140,7 +203,7 @@ def setUp(self) -> None: for ind in VALID_INDIVIDUALS[:2]: ph_m.Individual.objects.create(**ind) - @override_settings(CONFIG_PUBLIC=CONFIG_PUBLIC_TEST) + @override_settings(CONFIG_PUBLIC=DISCOVERY_CONFIG_TEST) def test_overview_response(self): # test overview response when individuals count < threshold response = self.client.get('/api/public_overview') @@ -165,7 +228,7 @@ def setUp(self) -> None: for ind in INDIVIDUALS_NOT_ACCEPTED_DATA_TYPES_LIST: ph_m.Individual.objects.create(**ind) - @override_settings(CONFIG_PUBLIC=CONFIG_PUBLIC_TEST) + @override_settings(CONFIG_PUBLIC=DISCOVERY_CONFIG_EXTRA_PROPERTIES) def test_overview_response(self): # test overview response with passing TypeError exception response = self.client.get('/api/public_overview') @@ -190,7 +253,7 @@ def setUp(self) -> None: for ind in INDIVIDUALS_NOT_ACCEPTED_DATA_TYPES_DICT: ph_m.Individual.objects.create(**ind) - @override_settings(CONFIG_PUBLIC=CONFIG_PUBLIC_TEST) + @override_settings(CONFIG_PUBLIC=DISCOVERY_CONFIG_EXTRA_PROPERTIES) def test_overview_response(self): # test overview response with passing TypeError exception response = self.client.get('/api/public_overview') @@ -223,7 +286,7 @@ def setUp(self) -> None: dats_file=dats_content ) - @override_settings(CONFIG_PUBLIC=CONFIG_PUBLIC_TEST) + @override_settings(CONFIG_PUBLIC=DISCOVERY_CONFIG_TEST) def test_public_dataset(self): response = self.client.get(reverse("public-dataset")) response_obj = response.json() diff --git a/chord_metadata_service/discovery/tests/test_censorship.py b/chord_metadata_service/discovery/tests/test_censorship.py index bdf757637..a3f3478a8 100644 --- a/chord_metadata_service/discovery/tests/test_censorship.py +++ b/chord_metadata_service/discovery/tests/test_censorship.py @@ -2,7 +2,7 @@ from django.test import TestCase, override_settings from chord_metadata_service.discovery.censorship import get_threshold, thresholded_count, get_max_query_parameters -from .constants import CONFIG_PUBLIC_TEST +from .constants import DISCOVERY_CONFIG_TEST class CensorshipGetThresholdTest(TestCase): @@ -19,7 +19,7 @@ def test_get_threshold_no_config(self): # no public config configured class CensorshipThresholdedCountTest(TestCase): - @override_settings(CONFIG_PUBLIC=CONFIG_PUBLIC_TEST) + @override_settings(CONFIG_PUBLIC=DISCOVERY_CONFIG_TEST) def test_get_threshold_configured(self): self.assertEqual(get_threshold(low_counts_censored=False), 0) self.assertEqual(get_threshold(low_counts_censored=True), 5) @@ -33,7 +33,7 @@ def test_thresholded_count_no_censorship(self): def test_thresholded_count_no_config(self): # no public config configured self.assertEqual(thresholded_count(100000, low_counts_censored=True), 0) - @override_settings(CONFIG_PUBLIC=CONFIG_PUBLIC_TEST) + @override_settings(CONFIG_PUBLIC=DISCOVERY_CONFIG_TEST) def test_thresholded_count_configured(self): self.assertEqual(thresholded_count(5, low_counts_censored=False), 5) self.assertEqual(thresholded_count(5, low_counts_censored=True), 0) @@ -50,7 +50,7 @@ def test_get_max_query_parameters_no_censorship(self): def test_get_max_query_parameters_no_config(self): self.assertEqual(get_max_query_parameters(low_counts_censored=True), 0) - @override_settings(CONFIG_PUBLIC=CONFIG_PUBLIC_TEST) + @override_settings(CONFIG_PUBLIC=DISCOVERY_CONFIG_TEST) def test_get_max_query_parameters_configured(self): self.assertEqual(get_max_query_parameters(low_counts_censored=False), sys.maxsize) self.assertEqual(get_max_query_parameters(low_counts_censored=True), 2) diff --git a/chord_metadata_service/discovery/tests/test_fields.py b/chord_metadata_service/discovery/tests/test_fields.py index e9a8e017e..615d1b18b 100644 --- a/chord_metadata_service/discovery/tests/test_fields.py +++ b/chord_metadata_service/discovery/tests/test_fields.py @@ -6,7 +6,7 @@ from chord_metadata_service.phenopackets.tests import constants as ph_c from chord_metadata_service.phenopackets import models as ph_m -from .constants import CONFIG_PUBLIC_TEST +from .constants import DISCOVERY_CONFIG_TEST from ..fields import ( get_field_options, get_categorical_stats, @@ -65,7 +65,7 @@ async def test_categorical_stats_lcf(self): dataset_id=self.dataset.identifier, low_counts_censored=False) self.assertListEqual(res, [{"label": "MALE", "value": 1}, {"label": "missing", "value": 0}]) - @override_settings(CONFIG_PUBLIC=CONFIG_PUBLIC_TEST) + @override_settings(CONFIG_PUBLIC=DISCOVERY_CONFIG_TEST) async def test_categorical_stats_lct(self): res = await get_categorical_stats(self.f_sex, project_id=self.project.identifier, dataset_id=self.dataset.identifier, low_counts_censored=True) @@ -74,7 +74,7 @@ async def test_categorical_stats_lct(self): class TestDateStatsExcept(APITestCase): - @override_settings(CONFIG_PUBLIC=CONFIG_PUBLIC_TEST) + @override_settings(CONFIG_PUBLIC=DISCOVERY_CONFIG_TEST) async def test_wrong_bin_config(self): fp = { "title": "Date of Consent", @@ -92,7 +92,7 @@ async def test_wrong_bin_config(self): with self.assertRaises(NotImplementedError): await get_month_date_range(fp) - @override_settings(CONFIG_PUBLIC=CONFIG_PUBLIC_TEST) + @override_settings(CONFIG_PUBLIC=DISCOVERY_CONFIG_TEST) async def test_wrong_field_config(self): fp = { "title": "Date of Consent", @@ -114,8 +114,8 @@ async def test_wrong_field_config(self): class TestJsonFieldArrayStats(ProjectTestCase): tumor_lengths = range(1, 50, 5) - dm_fp = CONFIG_PUBLIC_TEST["fields"]["diagnostic_markers"] - mtl_fp = CONFIG_PUBLIC_TEST["fields"]["measurement_tumor_length"] + dm_fp = DISCOVERY_CONFIG_TEST["fields"]["diagnostic_markers"] + mtl_fp = DISCOVERY_CONFIG_TEST["fields"]["measurement_tumor_length"] def setUp(self) -> None: self.tumors = [ph_c.valid_measurement_tumor_length(length) for length in self.tumor_lengths] @@ -131,7 +131,7 @@ def setUp(self) -> None: ) self.phenopacket.biosamples.set([self.biosample]) - @override_settings(CONFIG_PUBLIC=CONFIG_PUBLIC_TEST) + @override_settings(CONFIG_PUBLIC=DISCOVERY_CONFIG_TEST) async def test_json_categorical_stats_lcf(self): res = await get_categorical_stats(self.dm_fp, project_id=self.project.identifier, dataset_id=self.dataset.identifier, low_counts_censored=False) @@ -142,7 +142,7 @@ async def test_json_categorical_stats_lcf(self): ] self.assertListEqual(res, ground_truth) - @override_settings(CONFIG_PUBLIC=CONFIG_PUBLIC_TEST) + @override_settings(CONFIG_PUBLIC=DISCOVERY_CONFIG_TEST) async def test_json_categorical_stats_lct(self): res = await get_categorical_stats(self.dm_fp, project_id=self.project.identifier, dataset_id=self.dataset.identifier, low_counts_censored=True) diff --git a/chord_metadata_service/discovery/tests/test_fields_utils.py b/chord_metadata_service/discovery/tests/test_fields_utils.py index b944ce7d1..4f651d66b 100644 --- a/chord_metadata_service/discovery/tests/test_fields_utils.py +++ b/chord_metadata_service/discovery/tests/test_fields_utils.py @@ -2,7 +2,7 @@ from django.db.models import Q from django.db.models.base import ModelBase -from chord_metadata_service.discovery.tests.constants import CONFIG_PUBLIC_TEST +from chord_metadata_service.discovery.tests.constants import DISCOVERY_CONFIG_TEST from ..fields_utils import ( get_jsonb_path_query, get_json_range_condition, @@ -201,7 +201,7 @@ def test_get_nested_json_condition(self): }) def test_get_json_range_condition(self): - field_props = CONFIG_PUBLIC_TEST["fields"]["measurement_tumor_length"] + field_props = DISCOVERY_CONFIG_TEST["fields"]["measurement_tumor_length"] # GTE 0 an LT 20 json_range_condition_0_20 = get_json_range_condition(field_props, min=0, max=20) diff --git a/chord_metadata_service/patients/tests/test_api.py b/chord_metadata_service/patients/tests/test_api.py index 1f3791986..f4643509f 100644 --- a/chord_metadata_service/patients/tests/test_api.py +++ b/chord_metadata_service/patients/tests/test_api.py @@ -8,7 +8,11 @@ from rest_framework import status from rest_framework.test import APITestCase from chord_metadata_service.discovery import responses as dres -from chord_metadata_service.discovery.tests.constants import CONFIG_PUBLIC_TEST, CONFIG_PUBLIC_TEST_SEARCH_SEX_ONLY +from chord_metadata_service.discovery.tests.constants import ( + DISCOVERY_CONFIG_EXTRA_PROPERTIES, + DISCOVERY_CONFIG_TEST, + CONFIG_PUBLIC_TEST_SEARCH_SEX_ONLY +) from chord_metadata_service.patients.models import Individual from chord_metadata_service.phenopackets import models as ph_m from chord_metadata_service.phenopackets.tests import constants as ph_c @@ -16,7 +20,7 @@ from . import constants as c -CONFIG_PUBLIC_TEST_NO_THRESHOLD = deepcopy(CONFIG_PUBLIC_TEST) +CONFIG_PUBLIC_TEST_NO_THRESHOLD = deepcopy(DISCOVERY_CONFIG_TEST) CONFIG_PUBLIC_TEST_NO_THRESHOLD["rules"]["count_threshold"] = 0 @@ -297,7 +301,7 @@ def setUp(self): for individual in individuals: Individual.objects.create(**individual) - @override_settings(CONFIG_PUBLIC=CONFIG_PUBLIC_TEST) + @override_settings(CONFIG_PUBLIC=DISCOVERY_CONFIG_TEST) def test_public_get(self): # no filters GET request to /api/public, returns count or INSUFFICIENT_DATA_AVAILABLE response = self.client.get('/api/public') @@ -329,7 +333,7 @@ def test_public_get_no_config(self): class PublicFilteringIndividualsTest(APITestCase): """ Test for api/public GET filtering """ - response_threshold = CONFIG_PUBLIC_TEST["rules"]["count_threshold"] + response_threshold = DISCOVERY_CONFIG_TEST["rules"]["count_threshold"] num_individuals = 137 random_seed = 341 # do not change this please :)) @@ -348,7 +352,7 @@ def setUp(self): random.seed(self.random_seed) - @override_settings(CONFIG_PUBLIC=CONFIG_PUBLIC_TEST) + @override_settings(CONFIG_PUBLIC=DISCOVERY_CONFIG_TEST) def test_public_filtering_sex(self): # sex field search response = self.client.get('/api/public?sex=female') @@ -364,7 +368,7 @@ def test_public_filtering_sex(self): else: self.assertEqual(nb_female, response_obj['count']) - @override_settings(CONFIG_PUBLIC=CONFIG_PUBLIC_TEST) + @override_settings(CONFIG_PUBLIC=DISCOVERY_CONFIG_EXTRA_PROPERTIES) def test_public_filtering_2_fields(self): # sex and extra_properties string search # test GET query string search for extra_properties field @@ -390,7 +394,7 @@ def test_public_filtering_2_fields_config_empty(self): self.assertIsInstance(response_obj, dict) self.assertEqual(response_obj, dres.NO_PUBLIC_DATA_AVAILABLE) - @override_settings(CONFIG_PUBLIC=CONFIG_PUBLIC_TEST) + @override_settings(CONFIG_PUBLIC=DISCOVERY_CONFIG_EXTRA_PROPERTIES) def test_public_filtering_extra_properties_1(self): # extra_properties string search (multiple values) response = self.client.get('/api/public?smoking=Non-smoker&death_dc=Deceased') @@ -416,7 +420,7 @@ def test_public_filtering_extra_properties_1_config_empty(self): self.assertIsInstance(response_obj, dict) self.assertEqual(response_obj, dres.NO_PUBLIC_DATA_AVAILABLE) - @override_settings(CONFIG_PUBLIC=CONFIG_PUBLIC_TEST) + @override_settings(CONFIG_PUBLIC=DISCOVERY_CONFIG_EXTRA_PROPERTIES) def test_public_filtering_extra_properties_2(self): # extra_properties string search (multiple values) response = self.client.get( @@ -426,7 +430,7 @@ def test_public_filtering_extra_properties_2(self): response_obj = response.json() self.assertEqual(response_obj["code"], 400) - @override_settings(CONFIG_PUBLIC=CONFIG_PUBLIC_TEST) + @override_settings(CONFIG_PUBLIC=DISCOVERY_CONFIG_EXTRA_PROPERTIES) def test_public_filtering_extra_properties_invalid_3(self): # if GET query string list has various data types Error response = self.client.get('/api/public?extra_properties=[{"smoking": "Non-smoker"}, 5, "Test"]') @@ -434,7 +438,7 @@ def test_public_filtering_extra_properties_invalid_3(self): response_obj = response.json() self.assertEqual(response_obj["code"], 400) - @override_settings(CONFIG_PUBLIC=CONFIG_PUBLIC_TEST) + @override_settings(CONFIG_PUBLIC=DISCOVERY_CONFIG_EXTRA_PROPERTIES) def test_public_filtering_extra_properties_range_1(self): # extra_properties range search (both min and max ranges, single value) response = self.client.get( @@ -453,7 +457,7 @@ def test_public_filtering_extra_properties_range_1(self): else: self.assertEqual(db_count, response_obj["count"]) - @override_settings(CONFIG_PUBLIC=CONFIG_PUBLIC_TEST) + @override_settings(CONFIG_PUBLIC=DISCOVERY_CONFIG_EXTRA_PROPERTIES) def test_public_filtering_extra_properties_range_2(self): # extra_properties range search (above taper, single value) response = self.client.get( @@ -471,7 +475,7 @@ def test_public_filtering_extra_properties_range_2(self): else: self.assertEqual(db_count, response_obj['count']) - @override_settings(CONFIG_PUBLIC=CONFIG_PUBLIC_TEST) + @override_settings(CONFIG_PUBLIC=DISCOVERY_CONFIG_EXTRA_PROPERTIES) def test_public_filtering_extra_properties_range_3(self): # extra_properties range search (below taper, single value) response = self.client.get( @@ -489,7 +493,7 @@ def test_public_filtering_extra_properties_range_3(self): else: self.assertEqual(db_count, response_obj['count']) - @override_settings(CONFIG_PUBLIC=CONFIG_PUBLIC_TEST) + @override_settings(CONFIG_PUBLIC=DISCOVERY_CONFIG_EXTRA_PROPERTIES) def test_public_filtering_extra_properties_wrong_range(self): # extra_properties range search, unauthorized range response = self.client.get( @@ -499,7 +503,7 @@ def test_public_filtering_extra_properties_wrong_range(self): response_obj = response.json() self.assertEqual(response_obj["code"], 400) - @override_settings(CONFIG_PUBLIC=CONFIG_PUBLIC_TEST) + @override_settings(CONFIG_PUBLIC=DISCOVERY_CONFIG_EXTRA_PROPERTIES) def test_public_filtering_extra_properties_range_string_1(self): # sex string search and extra_properties range search response = self.client.get( @@ -519,7 +523,7 @@ def test_public_filtering_extra_properties_range_string_1(self): else: self.assertEqual(db_count, response_obj['count']) - @override_settings(CONFIG_PUBLIC=CONFIG_PUBLIC_TEST) + @override_settings(CONFIG_PUBLIC=DISCOVERY_CONFIG_EXTRA_PROPERTIES) def test_public_filtering_extra_properties_range_string_2(self): # extra_properties range search and extra_properties string search (single value) response = self.client.get( @@ -539,7 +543,7 @@ def test_public_filtering_extra_properties_range_string_2(self): else: self.assertEqual(db_count, response_obj['count']) - @override_settings(CONFIG_PUBLIC=CONFIG_PUBLIC_TEST) + @override_settings(CONFIG_PUBLIC=DISCOVERY_CONFIG_EXTRA_PROPERTIES) def test_public_filtering_extra_properties_multiple_ranges_1(self): # extra_properties range search (both min and max range, multiple values) response = self.client.get( @@ -560,7 +564,7 @@ def test_public_filtering_extra_properties_multiple_ranges_1(self): else: self.assertEqual(db_count, response_obj['count']) - @override_settings(CONFIG_PUBLIC=CONFIG_PUBLIC_TEST) + @override_settings(CONFIG_PUBLIC=DISCOVERY_CONFIG_EXTRA_PROPERTIES) def test_public_filtering_extra_properties_date_range_1(self): # extra_properties date range search (only after or before, single value) # Testing with a date of consent from 1 year ago @@ -579,7 +583,7 @@ def test_public_filtering_extra_properties_date_range_1(self): else: self.assertEqual(db_count, response_obj['count']) - @override_settings(CONFIG_PUBLIC=CONFIG_PUBLIC_TEST) + @override_settings(CONFIG_PUBLIC=DISCOVERY_CONFIG_EXTRA_PROPERTIES) def test_public_filtering_extra_properties_date_range_and_other_range(self): # extra_properties date range search (both after and before, single value) and other number range search # Testing with a date of consent from 2 years ago @@ -608,7 +612,7 @@ def test_public_filtering_mapping_for_search_filter(self): response_obj = response.json() self.assertEqual(1, response_obj["count"]) - @override_settings(CONFIG_PUBLIC=CONFIG_PUBLIC_TEST) + @override_settings(CONFIG_PUBLIC=DISCOVERY_CONFIG_TEST) def test_public_overview_sex(self): response = self.client.get('/api/public_search_fields') self.assertEqual(response.status_code, status.HTTP_200_OK) @@ -622,7 +626,7 @@ class PublicFilteringIndividualsTestSmallCellCount(PublicFilteringIndividualsTes num_individuals = 3 # below configured test threshold # rest of the methods are inherited - @override_settings(CONFIG_PUBLIC=CONFIG_PUBLIC_TEST) + @override_settings(CONFIG_PUBLIC=DISCOVERY_CONFIG_TEST) def test_public_overview_sex(self): response = self.client.get('/api/public_search_fields') self.assertEqual(response.status_code, status.HTTP_200_OK) @@ -658,7 +662,7 @@ def setUp(self): individual.age_unit = age_unit if age_unit else "" individual.save() - @override_settings(CONFIG_PUBLIC=CONFIG_PUBLIC_TEST) + @override_settings(CONFIG_PUBLIC=DISCOVERY_CONFIG_TEST) def test_public_filtering_age_range(self): # age valid range search response = self.client.get('/api/public?age=[20, 30)') @@ -671,7 +675,7 @@ def test_public_filtering_age_range(self): else: self.assertEqual(db_count, response_obj['count']) - @override_settings(CONFIG_PUBLIC=CONFIG_PUBLIC_TEST) + @override_settings(CONFIG_PUBLIC=DISCOVERY_CONFIG_TEST) def test_public_filtering_age_invalid_range(self): # age invalid range max search response = self.client.get('/api/public?age=[10, 50)') @@ -708,7 +712,7 @@ def setUp(self): Individual.objects.create(**individual) # test beacon formatted response - @override_settings(CONFIG_PUBLIC=CONFIG_PUBLIC_TEST) + @override_settings(CONFIG_PUBLIC=DISCOVERY_CONFIG_TEST) def test_beacon_search_response(self): response = self.client.get('/api/beacon_search?sex=MALE') male_count = Individual.objects.filter(sex="MALE").count() @@ -722,17 +726,17 @@ def test_beacon_search_response_no_config(self): response = self.client.get('/api/beacon_search?sex=MALE') self.assertEqual(response.status_code, status.HTTP_404_NOT_FOUND) - @override_settings(CONFIG_PUBLIC=CONFIG_PUBLIC_TEST) + @override_settings(CONFIG_PUBLIC=DISCOVERY_CONFIG_TEST) def test_beacon_search_response_invalid_search_key(self): response = self.client.get('/api/beacon_search?birdwatcher=yes') self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST) - @override_settings(CONFIG_PUBLIC=CONFIG_PUBLIC_TEST) + @override_settings(CONFIG_PUBLIC=DISCOVERY_CONFIG_EXTRA_PROPERTIES) def test_beacon_search_response_invalid_search_value(self): response = self.client.get('/api/beacon_search?smoking=on_Sundays') self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST) - @override_settings(CONFIG_PUBLIC=CONFIG_PUBLIC_TEST) + @override_settings(CONFIG_PUBLIC=DISCOVERY_CONFIG_EXTRA_PROPERTIES) def test_beacon_search_more_params_than_censorship_limit(self): response = self.client.get('/api/beacon_search?sex=MALE&smoking=Non-smoker&death_dc=Deceased') self.assertEqual(response.status_code, status.HTTP_200_OK) diff --git a/chord_metadata_service/restapi/urls.py b/chord_metadata_service/restapi/urls.py index 032cac947..415e98ac7 100644 --- a/chord_metadata_service/restapi/urls.py +++ b/chord_metadata_service/restapi/urls.py @@ -86,7 +86,7 @@ # public endpoints (no confidential information leak) path('public', individual_views.PublicListIndividuals.as_view(), name='public',), - path('public_search_fields', public_search_fields, name='public-search-fields',), + path('public_search_fields', public_search_fields, name='public-search-fields'), path('public_overview', public_overview, name='public-overview',), path('public_dataset', public_dataset, name='public-dataset'), From 1b02f9823266148f10bfd794f8a74d8690f4bb10 Mon Sep 17 00:00:00 2001 From: Victor Rocheleau Date: Thu, 6 Jun 2024 21:07:08 +0000 Subject: [PATCH 17/38] discovery config error handling and tests --- chord_metadata_service/discovery/api_views.py | 44 ++++++++--- .../discovery/exceptions.py | 21 +++++ .../discovery/tests/test_api.py | 77 +++++++++++++++---- 3 files changed, 116 insertions(+), 26 deletions(-) create mode 100644 chord_metadata_service/discovery/exceptions.py diff --git a/chord_metadata_service/discovery/api_views.py b/chord_metadata_service/discovery/api_views.py index deb32e324..5b384d1e3 100644 --- a/chord_metadata_service/discovery/api_views.py +++ b/chord_metadata_service/discovery/api_views.py @@ -2,6 +2,7 @@ from adrf.decorators import api_view from django.conf import settings +from django.core.exceptions import ObjectDoesNotExist from drf_spectacular.utils import extend_schema, inline_serializer from rest_framework import serializers, status from rest_framework.decorators import permission_classes @@ -9,6 +10,8 @@ from rest_framework.request import Request as DrfRequest from rest_framework.response import Response +from chord_metadata_service.discovery.exceptions import DiscoveryConfigException + from . import responses as dres from .types import BinWithValue from ..chord import models as cm @@ -19,15 +22,17 @@ from .schemas import DISCOVERY_SCHEMA -async def _get_project_discovery(project_id: str = None, project: cm.Project = None): +async def _get_project_discovery(project_id: str = None, project: cm.Project = None) -> dict: if not project and project_id: + # retrieve project by ID if not provided project = await cm.Project.objects.aget(identifier=project_id) if not project.discovery: + # fallback on global discovery config if project has none return settings.CONFIG_PUBLIC return project.discovery -async def _get_dataset_discovery(dataset_id: str): +async def _get_dataset_discovery(dataset_id: str) -> dict: dataset = await cm.Dataset.objects.aget(identifier=dataset_id) if not dataset.discovery: project = await cm.Project.objects.aget(datasets=dataset_id) @@ -35,15 +40,26 @@ async def _get_dataset_discovery(dataset_id: str): return dataset.discovery -async def _get_discovery(request: DrfRequest): +async def _get_discovery(request: DrfRequest) -> dict: dataset_id = request.query_params.get("dataset") project_id = request.query_params.get("project") - if dataset_id: - # get dataset's discovery config if dataset_id is passed - return await _get_dataset_discovery(dataset_id) - if project_id and not dataset_id: - # get project's discovery config if project_id is passed and dataset_id is not - return await _get_project_discovery(project_id) + if dataset_id and project_id: + # check if the dataset belongs to the project + is_scope_valid = await cm.Dataset.objects.filter( + identifier=dataset_id, + project__identifier=project_id, + ).aexists() + if not is_scope_valid: + raise DiscoveryConfigException(dataset_id, project_id) + try: + if dataset_id: + # get dataset's discovery config if dataset_id is passed + return await _get_dataset_discovery(dataset_id) + elif project_id: + # get project's discovery config if project_id is passed and dataset_id is not + return await _get_project_discovery(project_id=project_id) + except ObjectDoesNotExist: + raise DiscoveryConfigException(dataset_id, project_id) # fallback to config.json when no dataset or project is in the request return settings.CONFIG_PUBLIC @@ -69,7 +85,10 @@ async def public_search_fields(request: DrfRequest): Return public search fields with their configuration """ - config_public = await _get_discovery(request) + try: + config_public = await _get_discovery(request) + except DiscoveryConfigException as e: + return Response(e.message, status=status.HTTP_404_NOT_FOUND) if not config_public: return Response(dres.NO_PUBLIC_FIELDS_CONFIGURED, status=status.HTTP_404_NOT_FOUND) @@ -124,7 +143,10 @@ async def public_overview(request: DrfRequest): Overview of all public data in the database """ - config_public = await _get_discovery(request) + try: + config_public = await _get_discovery(request) + except DiscoveryConfigException as e: + return Response(e.message, status=status.HTTP_404_NOT_FOUND) dataset_id = request.query_params.get("dataset", None) project_id = request.query_params.get("project", None) diff --git a/chord_metadata_service/discovery/exceptions.py b/chord_metadata_service/discovery/exceptions.py new file mode 100644 index 000000000..c72186218 --- /dev/null +++ b/chord_metadata_service/discovery/exceptions.py @@ -0,0 +1,21 @@ +__all__ = [ + "DiscoveryConfigException" +] + + +class DiscoveryConfigException(Exception): + + def __init__(self, dataset_id: str | None = None, project_id: str | None = None, *args: object) -> None: + self.dataset_id = dataset_id + self.project_id = project_id + + message = "Error retrieving {0} scoped discovery config: {0} {1} does not exist." + if dataset_id and project_id: + message = message.format("project-dataset", f"({project_id}, {dataset_id}) pair") + elif dataset_id: + message = message.format("dataset", dataset_id) + elif project_id: + message = message.format("project", project_id) + self.message = {"message": message} + + super().__init__(*args) diff --git a/chord_metadata_service/discovery/tests/test_api.py b/chord_metadata_service/discovery/tests/test_api.py index 32b223a6e..c715c7e87 100644 --- a/chord_metadata_service/discovery/tests/test_api.py +++ b/chord_metadata_service/discovery/tests/test_api.py @@ -1,9 +1,9 @@ import json import os from copy import deepcopy +import uuid from django.conf import settings -from django.http import HttpResponse from django.urls import reverse from django.test import override_settings from rest_framework import status @@ -37,7 +37,7 @@ def setUp(self) -> None: self.project_a = ch_m.Project.objects.create( title="Test project A", description="test description", - # Should use node's discovery config + # Should fallback on the node's discovery config discovery={}, ) self.dataset_a = ch_m.Dataset.objects.create( @@ -50,16 +50,29 @@ def setUp(self) -> None: data_use=ch_c.VALID_DATA_USE_1, project=self.project_a, dats_file={}, - # Should use provided discovery config + # Should use provided dataset discovery config discovery=DISCOVERY_CONFIG_EXTRA_PROPERTIES, ) self.project_b = ch_m.Project.objects.create( title="Test project B", description="test description", - # Should use provided discovery config + # Should use provided project discovery config discovery=CONFIG_PUBLIC_TEST_SEARCH_SEX_ONLY, ) + self.dataset_b = ch_m.Dataset.objects.create( + title="Dataset 2", + description="Test dataset 2", + contact_info="Test contact info", + types=["test type 1", "test type 2"], + privacy="Open", + keywords=["test keyword 1", "test keyword 2"], + data_use=ch_c.VALID_DATA_USE_1, + project=self.project_b, + dats_file={}, + # Should fallback on project's discovery config + discovery={}, + ) # create 2 phenopackets for 2 individuals; each individual has 1 biosample; # one of phenopackets has 1 phenotypic feature and 1 disease self.individual_1 = ph_m.Individual.objects.create(**ph_c.VALID_INDIVIDUAL_1) @@ -82,8 +95,7 @@ def setUp(self) -> None: self.experiment_result = exp_m.ExperimentResult.objects.create(**exp_c.valid_experiment_result()) self.experiment.experiment_results.set([self.experiment_result]) - def assert_response_section_fields(self, response: HttpResponse, config: dict): - response_obj = response.json() + def assert_response_section_fields(self, response_obj: dict, config: dict): self.assertSetEqual( set(field["id"] for section in response_obj["sections"] for field in section["fields"]), set(field for section in config["search"] for field in section["fields"]) @@ -95,7 +107,7 @@ def test_public_search_fields_configured(self): # SCOPE: whole node response = self.client.get(search_fields_url, content_type="application/json") self.assertEqual(response.status_code, status.HTTP_200_OK) - self.assert_response_section_fields(response, settings.CONFIG_PUBLIC) + self.assert_response_section_fields(response.json(), settings.CONFIG_PUBLIC) # SCOPE: project_a (same discovery as whole node) response_p_a = self.client.get( @@ -103,7 +115,7 @@ def test_public_search_fields_configured(self): content_type="application/json", ) self.assertEqual(response_p_a.status_code, status.HTTP_200_OK) - self.assert_response_section_fields(response_p_a, settings.CONFIG_PUBLIC) + self.assert_response_section_fields(response_p_a.json(), settings.CONFIG_PUBLIC) # SCOPE: project_b (discovery search sex only) response_p_b = self.client.get( @@ -111,7 +123,7 @@ def test_public_search_fields_configured(self): content_type="application/json", ) self.assertEqual(response_p_b.status_code, status.HTTP_200_OK) - self.assert_response_section_fields(response_p_b, CONFIG_PUBLIC_TEST_SEARCH_SEX_ONLY) + self.assert_response_section_fields(response_p_b.json(), CONFIG_PUBLIC_TEST_SEARCH_SEX_ONLY) # SCOPE: dataset_a (discovery with dataset specific extra_properties) response_d_a = self.client.get( @@ -119,7 +131,46 @@ def test_public_search_fields_configured(self): content_type="application/json", ) self.assertEqual(response_d_a.status_code, status.HTTP_200_OK) - self.assert_response_section_fields(response_d_a, DISCOVERY_CONFIG_EXTRA_PROPERTIES) + self.assert_response_section_fields(response_d_a.json(), DISCOVERY_CONFIG_EXTRA_PROPERTIES) + + # SCOPE: non existing dataset + response_d_invalid = self.client.get( + f"{search_fields_url}?dataset={uuid.uuid4()}", + content_type="application/json", + ) + self.assertEqual(response_d_invalid.status_code, status.HTTP_404_NOT_FOUND) + + # SCOPE: non existing project + response_p_invalid = self.client.get( + f"{search_fields_url}?project={uuid.uuid4()}", + content_type="application/json", + ) + self.assertEqual(response_p_invalid.status_code, status.HTTP_404_NOT_FOUND) + + # SCOPE: dataset_b + response_d_b = self.client.get( + f"{search_fields_url}?dataset={self.dataset_b.identifier}", + content_type="application/json", + ) + self.assertEqual(response_d_b.status_code, status.HTTP_200_OK) + # fallback on project's config, responses should be the same + self.assertEqual(response_d_b.json(), response_p_b.json()) + + # SCOPE: project_a + dataset_b (invalid) + response_pd_invalid = self.client.get( + f"{search_fields_url}?project={str(self.project_a.identifier)}&dataset={self.dataset_b.identifier}", + content_type="application/json", + ) + self.assertEqual(response_pd_invalid.status_code, status.HTTP_404_NOT_FOUND) + + # SCOPE: project_a + dataset_a (valid) + response_pd_valid = self.client.get( + f"{search_fields_url}?project={str(self.project_a.identifier)}&dataset={self.dataset_a.identifier}", + content_type="application/json", + ) + self.assertEqual(response_pd_valid.status_code, status.HTTP_200_OK) + # same as dataset_a + self.assertEqual(response_pd_valid.json(), response_d_a.json()) @override_settings(CONFIG_PUBLIC={}) def test_public_search_fields_not_configured(self): @@ -133,11 +184,7 @@ def test_public_search_fields_not_configured(self): def test_public_search_fields_missing_extra_properties(self): response = self.client.get(reverse("public-search-fields"), content_type="application/json") self.assertEqual(response.status_code, status.HTTP_200_OK) - response_obj = response.json() - self.assertSetEqual( - set(field["id"] for section in response_obj["sections"] for field in section["fields"]), - set(field for section in settings.CONFIG_PUBLIC["search"] for field in section["fields"]) - ) + self.assert_response_section_fields(response.json(), settings.CONFIG_PUBLIC) class PublicOverviewTest(APITestCase): From fe77915b50311fe3dead9ca7ba685c60dedcf562 Mon Sep 17 00:00:00 2001 From: Victor Rocheleau Date: Thu, 6 Jun 2024 21:35:54 +0000 Subject: [PATCH 18/38] scoped discovery test case class --- .../discovery/tests/test_api.py | 60 +++++++++++-------- 1 file changed, 36 insertions(+), 24 deletions(-) diff --git a/chord_metadata_service/discovery/tests/test_api.py b/chord_metadata_service/discovery/tests/test_api.py index c715c7e87..aa0eab2af 100644 --- a/chord_metadata_service/discovery/tests/test_api.py +++ b/chord_metadata_service/discovery/tests/test_api.py @@ -5,7 +5,7 @@ from django.conf import settings from django.urls import reverse -from django.test import override_settings +from django.test import TestCase, override_settings from rest_framework import status from rest_framework.test import APITestCase @@ -30,49 +30,44 @@ ) -class PublicSearchFieldsTest(APITestCase): +class ScopedDiscoveryTestCase(TestCase): - def setUp(self) -> None: - # create 2 projects, only one with a dataset and data - self.project_a = ch_m.Project.objects.create( + @classmethod + def setUpTestData(cls) -> None: + cls.project_a = ch_m.Project.objects.create( title="Test project A", description="test description", # Should fallback on the node's discovery config discovery={}, ) - self.dataset_a = ch_m.Dataset.objects.create( + cls.dataset_a = ch_m.Dataset.objects.create( title="Dataset 1", description="Test dataset", - contact_info="Test contact info", - types=["test type 1", "test type 2"], - privacy="Open", - keywords=["test keyword 1", "test keyword 2"], data_use=ch_c.VALID_DATA_USE_1, - project=self.project_a, - dats_file={}, + project=cls.project_a, # Should use provided dataset discovery config discovery=DISCOVERY_CONFIG_EXTRA_PROPERTIES, ) - self.project_b = ch_m.Project.objects.create( + cls.project_b = ch_m.Project.objects.create( title="Test project B", description="test description", # Should use provided project discovery config discovery=CONFIG_PUBLIC_TEST_SEARCH_SEX_ONLY, ) - self.dataset_b = ch_m.Dataset.objects.create( + cls.dataset_b = ch_m.Dataset.objects.create( title="Dataset 2", description="Test dataset 2", - contact_info="Test contact info", - types=["test type 1", "test type 2"], - privacy="Open", - keywords=["test keyword 1", "test keyword 2"], data_use=ch_c.VALID_DATA_USE_1, - project=self.project_b, - dats_file={}, + project=cls.project_b, # Should fallback on project's discovery config discovery={}, ) + + +class PublicSearchFieldsTest(APITestCase, ScopedDiscoveryTestCase): + + def setUp(self) -> None: # create 2 phenopackets for 2 individuals; each individual has 1 biosample; # one of phenopackets has 1 phenotypic feature and 1 disease self.individual_1 = ph_m.Individual.objects.create(**ph_c.VALID_INDIVIDUAL_1) @@ -91,7 +86,9 @@ def setUp(self) -> None: # experiments self.instrument = exp_m.Instrument.objects.create(**exp_c.valid_instrument()) - self.experiment = exp_m.Experiment.objects.create(**exp_c.valid_experiment(self.biosample_1, self.instrument)) + self.experiment = exp_m.Experiment.objects.create( + **exp_c.valid_experiment(self.biosample_1, self.instrument, dataset=self.dataset_a) + ) self.experiment_result = exp_m.ExperimentResult.objects.create(**exp_c.valid_experiment_result()) self.experiment.experiment_results.set([self.experiment_result]) @@ -187,13 +184,24 @@ def test_public_search_fields_missing_extra_properties(self): self.assert_response_section_fields(response.json(), settings.CONFIG_PUBLIC) -class PublicOverviewTest(APITestCase): +class PublicOverviewTest(APITestCase, ScopedDiscoveryTestCase): def setUp(self) -> None: # individuals (count 8) individuals = { f"individual_{i}": ph_m.Individual.objects.create(**ind) for i, ind in enumerate(VALID_INDIVIDUALS, start=1) } + # all individuals are in phenopackets that belong to dataset_a + phenopackets = { + f"phenopacket_{ind_label}": ph_m.Phenopacket.objects.create( + **ph_c.valid_phenopacket( + subject=ind, + meta_data=ph_m.MetaData.objects.create(**ph_c.VALID_META_DATA_1), + id=f"phenopacket_{ind_label}" + ), + dataset=self.dataset_a + ) for ind_label, ind in individuals.items() + } # biosamples self.biosample_1 = ph_m.Biosample.objects.create( **ph_c.valid_biosample_1(individuals["individual_1"]) @@ -201,13 +209,17 @@ def setUp(self) -> None: self.biosample_2 = ph_m.Biosample.objects.create( **ph_c.valid_biosample_2(individuals["individual_2"]) ) + phenopackets["phenopacket_individual_1"].biosamples.set([self.biosample_1]) + phenopackets["phenopacket_individual_2"].biosamples.set([self.biosample_2]) # experiments self.instrument = exp_m.Instrument.objects.create(**exp_c.valid_instrument()) - self.experiment = exp_m.Experiment.objects.create(**exp_c.valid_experiment(self.biosample_1, self.instrument)) + self.experiment = exp_m.Experiment.objects.create( + **exp_c.valid_experiment(self.biosample_1, self.instrument, dataset=self.dataset_a) + ) self.experiment_result = exp_m.ExperimentResult.objects.create(**exp_c.valid_experiment_result()) self.experiment.experiment_results.set([self.experiment_result]) # make a copy and create experiment 2 - experiment_2 = deepcopy(exp_c.valid_experiment(self.biosample_2, self.instrument)) + experiment_2 = deepcopy(exp_c.valid_experiment(self.biosample_2, self.instrument, dataset=self.dataset_a)) experiment_2["id"] = "experiment:2" self.experiment = exp_m.Experiment.objects.create(**experiment_2) From efdf9fb9a96df44a681fb8d67f741b5e86b2e425 Mon Sep 17 00:00:00 2001 From: Victor Rocheleau Date: Fri, 7 Jun 2024 16:47:58 +0000 Subject: [PATCH 19/38] scoped overview tests --- .../discovery/model_lookups.py | 6 +- .../discovery/tests/constants.py | 3 +- .../discovery/tests/test_api.py | 70 ++++++++++++++++--- 3 files changed, 67 insertions(+), 12 deletions(-) diff --git a/chord_metadata_service/discovery/model_lookups.py b/chord_metadata_service/discovery/model_lookups.py index dc3e5dfe8..6b632b115 100644 --- a/chord_metadata_service/discovery/model_lookups.py +++ b/chord_metadata_service/discovery/model_lookups.py @@ -6,7 +6,11 @@ from chord_metadata_service.patients import models as patient_models from chord_metadata_service.phenopackets import models as pheno_models -__all__ = ["PUBLIC_MODEL_NAMES_TO_MODEL", "PUBLIC_MODEL_NAMES_TO_DATA_TYPE"] +__all__ = [ + "PUBLIC_MODEL_NAMES_TO_MODEL", + "PUBLIC_MODEL_NAMES_TO_DATA_TYPE", + "PublicModelNames" +] PublicModelNames = Literal["individual", "biosample", "experiment"] diff --git a/chord_metadata_service/discovery/tests/constants.py b/chord_metadata_service/discovery/tests/constants.py index 975fa74fa..95dcae2e2 100644 --- a/chord_metadata_service/discovery/tests/constants.py +++ b/chord_metadata_service/discovery/tests/constants.py @@ -13,7 +13,8 @@ "section_title": "Second Section", "charts": [ {"field": "diagnostic_markers", "chart_type": "pie"}, - {"field": "measurement_tumor_length", "chart_type": "bar"} + {"field": "measurement_tumor_length", "chart_type": "bar"}, + {"field": "tissues", "chart_type": "pie"} ] } ], diff --git a/chord_metadata_service/discovery/tests/test_api.py b/chord_metadata_service/discovery/tests/test_api.py index aa0eab2af..aed6bd056 100644 --- a/chord_metadata_service/discovery/tests/test_api.py +++ b/chord_metadata_service/discovery/tests/test_api.py @@ -34,33 +34,33 @@ class ScopedDiscoveryTestCase(TestCase): @classmethod def setUpTestData(cls) -> None: + # fallback on the node's discovery config cls.project_a = ch_m.Project.objects.create( title="Test project A", description="test description", - # Should fallback on the node's discovery config discovery={}, ) + # use provided dataset discovery config cls.dataset_a = ch_m.Dataset.objects.create( title="Dataset 1", description="Test dataset", data_use=ch_c.VALID_DATA_USE_1, project=cls.project_a, - # Should use provided dataset discovery config discovery=DISCOVERY_CONFIG_EXTRA_PROPERTIES, ) + # use provided project discovery config cls.project_b = ch_m.Project.objects.create( title="Test project B", description="test description", - # Should use provided project discovery config discovery=CONFIG_PUBLIC_TEST_SEARCH_SEX_ONLY, ) + # Should fallback on project's discovery config cls.dataset_b = ch_m.Dataset.objects.create( title="Dataset 2", description="Test dataset 2", data_use=ch_c.VALID_DATA_USE_1, project=cls.project_b, - # Should fallback on project's discovery config discovery={}, ) @@ -223,14 +223,64 @@ def setUp(self) -> None: experiment_2["id"] = "experiment:2" self.experiment = exp_m.Experiment.objects.create(**experiment_2) + self.data_type_counts: dict[str, int] = { + "individuals": ph_m.Individual.objects.all().count(), + "biosamples": ph_m.Biosample.objects.all().count(), + "experiments": exp_m.Experiment.objects.all().count() + } + + def assert_counts_censored(self, overview_response: dict, discovery: dict): + count_threshold = discovery["rules"]["count_threshold"] + for data_type in self.data_type_counts.keys(): + response_count = overview_response["counts"][data_type] + if response_count < count_threshold: + self.assertEqual(response_count, 0) + else: + self.assertEqual(response_count, self.data_type_counts[data_type]) + + def assert_scoped_fields(self, overview_response: dict, discovery: dict): + self.assertSetEqual( + set(field for field in overview_response["fields"].keys()), + set(chart["field"] for section in discovery["overview"] for chart in section["charts"]) + ) + + @override_settings(CONFIG_PUBLIC=DISCOVERY_CONFIG_TEST) def test_overview(self): - response = self.client.get('/api/public_overview') - response_obj = response.json() - db_count = ph_m.Individual.objects.all().count() - self.assertEqual(response.status_code, status.HTTP_200_OK) - self.assertIsInstance(response_obj, dict) - self.assertEqual(response_obj["counts"]["individuals"], db_count) + node_discovery = settings.CONFIG_PUBLIC + public_overview_url = '/api/public_overview' + # SCOPE: whole node + response_whole = self.client.get(public_overview_url) + response_whole_obj = response_whole.json() + self.assertEqual(response_whole.status_code, status.HTTP_200_OK) + self.assertIsInstance(response_whole_obj, dict) + self.assert_counts_censored(response_whole_obj, node_discovery) + self.assert_scoped_fields(response_whole_obj, node_discovery) + + # SCOPE: project_a (whole node fallback) + response_p_a = self.client.get(f"{public_overview_url}?project={self.project_a.identifier}") + self.assertEqual(response_p_a.status_code, status.HTTP_200_OK) + self.assertEqual(response_p_a.json(), response_whole_obj) + self.assert_counts_censored(response_whole_obj, node_discovery) + self.assert_scoped_fields(response_whole_obj, node_discovery) + + # SCOPE: dataset_a + response_d_a = self.client.get(f"{public_overview_url}?dataset={self.dataset_a.identifier}") + self.assertEqual(response_d_a.status_code, status.HTTP_200_OK) + self.assert_counts_censored(response_d_a.json(), self.dataset_a.discovery) + self.assert_scoped_fields(response_d_a.json(), self.dataset_a.discovery) + + # SCOPE: project_b + response_p_b = self.client.get(f"{public_overview_url}?project={self.project_b.identifier}") + self.assertEqual(response_p_b.status_code, status.HTTP_200_OK) + self.assert_counts_censored(response_p_b.json(), self.project_b.discovery) + self.assert_scoped_fields(response_p_b.json(), self.project_b.discovery) + + # SCOPE: dataset_b (project_b fallback) + response_d_b = self.client.get(f"{public_overview_url}?dataset={self.dataset_b.identifier}") + self.assertEqual(response_d_b.status_code, status.HTTP_200_OK) + self.assert_counts_censored(response_d_b.json(), self.project_b.discovery) + self.assert_scoped_fields(response_d_b.json(), self.project_b.discovery) @override_settings(CONFIG_PUBLIC=DISCOVERY_CONFIG_EXTRA_PROPERTIES) def test_overview_bins(self): From 1c11e2fddb82d6dc0585dd739bd97977f5cfe318 Mon Sep 17 00:00:00 2001 From: v-rocheleau Date: Fri, 7 Jun 2024 13:57:01 -0400 Subject: [PATCH 20/38] test discovery schema api --- chord_metadata_service/discovery/tests/test_api.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/chord_metadata_service/discovery/tests/test_api.py b/chord_metadata_service/discovery/tests/test_api.py index aed6bd056..7c409963b 100644 --- a/chord_metadata_service/discovery/tests/test_api.py +++ b/chord_metadata_service/discovery/tests/test_api.py @@ -12,6 +12,7 @@ from chord_metadata_service.chord import models as ch_m from chord_metadata_service.chord.tests import constants as ch_c from chord_metadata_service.discovery import responses as dres +from chord_metadata_service.discovery.schemas import DISCOVERY_SCHEMA from chord_metadata_service.phenopackets import models as ph_m from chord_metadata_service.phenopackets.tests import constants as ph_c from chord_metadata_service.experiments import models as exp_m @@ -416,3 +417,10 @@ def test_public_dataset_response_no_config(self): response_obj = response.json() self.assertIsInstance(response_obj, dict) self.assertEqual(response_obj, dres.NO_PUBLIC_DATA_AVAILABLE) + +class DiscoverySchemaTest(APITestCase): + @override_settings(CONFIG_PUBLIC=DISCOVERY_CONFIG_TEST) + def test_discover_schema(self): + response = self.client.get(reverse("discovery-schema")) + self.assertEqual(response.status_code, status.HTTP_200_OK) + self.assertEqual(response.json(), DISCOVERY_SCHEMA) From 75e8227c71a09217de29c1d7297336dd46b49abb Mon Sep 17 00:00:00 2001 From: v-rocheleau Date: Fri, 7 Jun 2024 14:13:47 -0400 Subject: [PATCH 21/38] test discovery overview project dataset --- .../discovery/tests/test_api.py | 24 ++++++++++++++----- 1 file changed, 18 insertions(+), 6 deletions(-) diff --git a/chord_metadata_service/discovery/tests/test_api.py b/chord_metadata_service/discovery/tests/test_api.py index 7c409963b..dec8c3e0d 100644 --- a/chord_metadata_service/discovery/tests/test_api.py +++ b/chord_metadata_service/discovery/tests/test_api.py @@ -188,6 +188,8 @@ def test_public_search_fields_missing_extra_properties(self): class PublicOverviewTest(APITestCase, ScopedDiscoveryTestCase): def setUp(self) -> None: + self.public_overview_url = '/api/public_overview' + # individuals (count 8) individuals = { f"individual_{i}": ph_m.Individual.objects.create(**ind) for i, ind in enumerate(VALID_INDIVIDUALS, start=1) @@ -249,9 +251,8 @@ def assert_scoped_fields(self, overview_response: dict, discovery: dict): @override_settings(CONFIG_PUBLIC=DISCOVERY_CONFIG_TEST) def test_overview(self): node_discovery = settings.CONFIG_PUBLIC - public_overview_url = '/api/public_overview' # SCOPE: whole node - response_whole = self.client.get(public_overview_url) + response_whole = self.client.get(self.public_overview_url) response_whole_obj = response_whole.json() self.assertEqual(response_whole.status_code, status.HTTP_200_OK) self.assertIsInstance(response_whole_obj, dict) @@ -259,30 +260,41 @@ def test_overview(self): self.assert_scoped_fields(response_whole_obj, node_discovery) # SCOPE: project_a (whole node fallback) - response_p_a = self.client.get(f"{public_overview_url}?project={self.project_a.identifier}") + response_p_a = self.client.get(f"{self.public_overview_url}?project={self.project_a.identifier}") self.assertEqual(response_p_a.status_code, status.HTTP_200_OK) self.assertEqual(response_p_a.json(), response_whole_obj) self.assert_counts_censored(response_whole_obj, node_discovery) self.assert_scoped_fields(response_whole_obj, node_discovery) # SCOPE: dataset_a - response_d_a = self.client.get(f"{public_overview_url}?dataset={self.dataset_a.identifier}") + response_d_a = self.client.get(f"{self.public_overview_url}?dataset={self.dataset_a.identifier}") self.assertEqual(response_d_a.status_code, status.HTTP_200_OK) self.assert_counts_censored(response_d_a.json(), self.dataset_a.discovery) self.assert_scoped_fields(response_d_a.json(), self.dataset_a.discovery) # SCOPE: project_b - response_p_b = self.client.get(f"{public_overview_url}?project={self.project_b.identifier}") + response_p_b = self.client.get(f"{self.public_overview_url}?project={self.project_b.identifier}") self.assertEqual(response_p_b.status_code, status.HTTP_200_OK) self.assert_counts_censored(response_p_b.json(), self.project_b.discovery) self.assert_scoped_fields(response_p_b.json(), self.project_b.discovery) # SCOPE: dataset_b (project_b fallback) - response_d_b = self.client.get(f"{public_overview_url}?dataset={self.dataset_b.identifier}") + response_d_b = self.client.get(f"{self.public_overview_url}?dataset={self.dataset_b.identifier}") self.assertEqual(response_d_b.status_code, status.HTTP_200_OK) self.assert_counts_censored(response_d_b.json(), self.project_b.discovery) self.assert_scoped_fields(response_d_b.json(), self.project_b.discovery) + @override_settings(CONFIG_PUBLIC=DISCOVERY_CONFIG_TEST) + def test_overview_project_dataset(self): + # SCOPE: project_a + dataset_a + response_valid = self.client.get(f"{self.public_overview_url}?project={self.project_a.identifier}&dataset={self.dataset_a.identifier}") + self.assertEqual(response_valid.status_code, status.HTTP_200_OK) + self.assert_scoped_fields(response_valid.json(), self.dataset_a.discovery) + + # SCOPE: project_a + dataset_b (invalid) + response_invalid = self.client.get(f"{self.public_overview_url}?project={self.project_a.identifier}&dataset={self.dataset_b.identifier}") + self.assertEqual(response_invalid.status_code, status.HTTP_404_NOT_FOUND) + @override_settings(CONFIG_PUBLIC=DISCOVERY_CONFIG_EXTRA_PROPERTIES) def test_overview_bins(self): # test that there is the correct number of data entries for number From 15e85813fed6499c971c9a0d905a2c13243ef1e9 Mon Sep 17 00:00:00 2001 From: v-rocheleau Date: Fri, 7 Jun 2024 14:29:56 -0400 Subject: [PATCH 22/38] lint --- .../discovery/tests/test_api.py | 47 ++++++++++--------- 1 file changed, 26 insertions(+), 21 deletions(-) diff --git a/chord_metadata_service/discovery/tests/test_api.py b/chord_metadata_service/discovery/tests/test_api.py index dec8c3e0d..ece1cc4a7 100644 --- a/chord_metadata_service/discovery/tests/test_api.py +++ b/chord_metadata_service/discovery/tests/test_api.py @@ -41,6 +41,7 @@ def setUpTestData(cls) -> None: description="test description", discovery={}, ) + cls.id_proj_a = cls.project_a.identifier # use provided dataset discovery config cls.dataset_a = ch_m.Dataset.objects.create( title="Dataset 1", @@ -49,6 +50,7 @@ def setUpTestData(cls) -> None: project=cls.project_a, discovery=DISCOVERY_CONFIG_EXTRA_PROPERTIES, ) + cls.id_ds_a = cls.dataset_a.identifier # use provided project discovery config cls.project_b = ch_m.Project.objects.create( @@ -56,6 +58,7 @@ def setUpTestData(cls) -> None: description="test description", discovery=CONFIG_PUBLIC_TEST_SEARCH_SEX_ONLY, ) + cls.id_proj_b = cls.project_b.identifier # Should fallback on project's discovery config cls.dataset_b = ch_m.Dataset.objects.create( title="Dataset 2", @@ -64,6 +67,7 @@ def setUpTestData(cls) -> None: project=cls.project_b, discovery={}, ) + cls.id_ds_b = cls.dataset_b.identifier class PublicSearchFieldsTest(APITestCase, ScopedDiscoveryTestCase): @@ -109,7 +113,7 @@ def test_public_search_fields_configured(self): # SCOPE: project_a (same discovery as whole node) response_p_a = self.client.get( - f"{search_fields_url}?project={str(self.project_a.identifier)}", + f"{search_fields_url}?project={str(self.id_proj_a)}", content_type="application/json", ) self.assertEqual(response_p_a.status_code, status.HTTP_200_OK) @@ -117,7 +121,7 @@ def test_public_search_fields_configured(self): # SCOPE: project_b (discovery search sex only) response_p_b = self.client.get( - f"{search_fields_url}?project={str(self.project_b.identifier)}", + f"{search_fields_url}?project={str(self.id_proj_b)}", content_type="application/json", ) self.assertEqual(response_p_b.status_code, status.HTTP_200_OK) @@ -125,7 +129,7 @@ def test_public_search_fields_configured(self): # SCOPE: dataset_a (discovery with dataset specific extra_properties) response_d_a = self.client.get( - f"{search_fields_url}?dataset={str(self.dataset_a.identifier)}", + f"{search_fields_url}?dataset={str(self.id_ds_a)}", content_type="application/json", ) self.assertEqual(response_d_a.status_code, status.HTTP_200_OK) @@ -147,7 +151,7 @@ def test_public_search_fields_configured(self): # SCOPE: dataset_b response_d_b = self.client.get( - f"{search_fields_url}?dataset={self.dataset_b.identifier}", + f"{search_fields_url}?dataset={self.id_ds_b}", content_type="application/json", ) self.assertEqual(response_d_b.status_code, status.HTTP_200_OK) @@ -156,14 +160,14 @@ def test_public_search_fields_configured(self): # SCOPE: project_a + dataset_b (invalid) response_pd_invalid = self.client.get( - f"{search_fields_url}?project={str(self.project_a.identifier)}&dataset={self.dataset_b.identifier}", + f"{search_fields_url}?project={str(self.id_proj_a)}&dataset={self.id_ds_b}", content_type="application/json", ) self.assertEqual(response_pd_invalid.status_code, status.HTTP_404_NOT_FOUND) # SCOPE: project_a + dataset_a (valid) response_pd_valid = self.client.get( - f"{search_fields_url}?project={str(self.project_a.identifier)}&dataset={self.dataset_a.identifier}", + f"{search_fields_url}?project={str(self.id_proj_a)}&dataset={self.id_ds_a}", content_type="application/json", ) self.assertEqual(response_pd_valid.status_code, status.HTTP_200_OK) @@ -188,7 +192,7 @@ def test_public_search_fields_missing_extra_properties(self): class PublicOverviewTest(APITestCase, ScopedDiscoveryTestCase): def setUp(self) -> None: - self.public_overview_url = '/api/public_overview' + self.url = '/api/public_overview' # individuals (count 8) individuals = { @@ -246,13 +250,12 @@ def assert_scoped_fields(self, overview_response: dict, discovery: dict): set(field for field in overview_response["fields"].keys()), set(chart["field"] for section in discovery["overview"] for chart in section["charts"]) ) - @override_settings(CONFIG_PUBLIC=DISCOVERY_CONFIG_TEST) def test_overview(self): node_discovery = settings.CONFIG_PUBLIC # SCOPE: whole node - response_whole = self.client.get(self.public_overview_url) + response_whole = self.client.get(self.url) response_whole_obj = response_whole.json() self.assertEqual(response_whole.status_code, status.HTTP_200_OK) self.assertIsInstance(response_whole_obj, dict) @@ -260,26 +263,26 @@ def test_overview(self): self.assert_scoped_fields(response_whole_obj, node_discovery) # SCOPE: project_a (whole node fallback) - response_p_a = self.client.get(f"{self.public_overview_url}?project={self.project_a.identifier}") + response_p_a = self.client.get(f"{self.url}?project={self.id_proj_a}") self.assertEqual(response_p_a.status_code, status.HTTP_200_OK) self.assertEqual(response_p_a.json(), response_whole_obj) self.assert_counts_censored(response_whole_obj, node_discovery) self.assert_scoped_fields(response_whole_obj, node_discovery) # SCOPE: dataset_a - response_d_a = self.client.get(f"{self.public_overview_url}?dataset={self.dataset_a.identifier}") + response_d_a = self.client.get(f"{self.url}?dataset={self.id_ds_a}") self.assertEqual(response_d_a.status_code, status.HTTP_200_OK) self.assert_counts_censored(response_d_a.json(), self.dataset_a.discovery) self.assert_scoped_fields(response_d_a.json(), self.dataset_a.discovery) # SCOPE: project_b - response_p_b = self.client.get(f"{self.public_overview_url}?project={self.project_b.identifier}") + response_p_b = self.client.get(f"{self.url}?project={self.id_proj_b}") self.assertEqual(response_p_b.status_code, status.HTTP_200_OK) self.assert_counts_censored(response_p_b.json(), self.project_b.discovery) self.assert_scoped_fields(response_p_b.json(), self.project_b.discovery) # SCOPE: dataset_b (project_b fallback) - response_d_b = self.client.get(f"{self.public_overview_url}?dataset={self.dataset_b.identifier}") + response_d_b = self.client.get(f"{self.url}?dataset={self.id_ds_b}") self.assertEqual(response_d_b.status_code, status.HTTP_200_OK) self.assert_counts_censored(response_d_b.json(), self.project_b.discovery) self.assert_scoped_fields(response_d_b.json(), self.project_b.discovery) @@ -287,19 +290,19 @@ def test_overview(self): @override_settings(CONFIG_PUBLIC=DISCOVERY_CONFIG_TEST) def test_overview_project_dataset(self): # SCOPE: project_a + dataset_a - response_valid = self.client.get(f"{self.public_overview_url}?project={self.project_a.identifier}&dataset={self.dataset_a.identifier}") - self.assertEqual(response_valid.status_code, status.HTTP_200_OK) - self.assert_scoped_fields(response_valid.json(), self.dataset_a.discovery) + response = self.client.get(f"{self.url}?project={self.id_proj_a}&dataset={self.id_ds_a}") + self.assertEqual(response.status_code, status.HTTP_200_OK) + self.assert_scoped_fields(response.json(), self.dataset_a.discovery) # SCOPE: project_a + dataset_b (invalid) - response_invalid = self.client.get(f"{self.public_overview_url}?project={self.project_a.identifier}&dataset={self.dataset_b.identifier}") + response_invalid = self.client.get(f"{self.url}?project={self.id_proj_a}&dataset={self.id_ds_b}") self.assertEqual(response_invalid.status_code, status.HTTP_404_NOT_FOUND) @override_settings(CONFIG_PUBLIC=DISCOVERY_CONFIG_EXTRA_PROPERTIES) def test_overview_bins(self): # test that there is the correct number of data entries for number # histograms, vs. number of bins - response = self.client.get('/api/public_overview') + response = self.client.get(self.url) response_obj = response.json() self.assertEqual(response.status_code, status.HTTP_200_OK) self.assertIsInstance(response_obj, dict) @@ -312,7 +315,7 @@ def test_overview_bins(self): @override_settings(CONFIG_PUBLIC={}) def test_overview_no_config(self): - response = self.client.get('/api/public_overview') + response = self.client.get(self.url) response_obj = response.json() self.assertIsInstance(response_obj, dict) self.assertEqual(response_obj, dres.NO_PUBLIC_DATA_AVAILABLE) @@ -321,6 +324,7 @@ def test_overview_no_config(self): class PublicOverviewTest2(APITestCase): def setUp(self) -> None: + self.url = '/api/public_overview' # create only 2 individuals for ind in VALID_INDIVIDUALS[:2]: ph_m.Individual.objects.create(**ind) @@ -328,7 +332,7 @@ def setUp(self) -> None: @override_settings(CONFIG_PUBLIC=DISCOVERY_CONFIG_TEST) def test_overview_response(self): # test overview response when individuals count < threshold - response = self.client.get('/api/public_overview') + response = self.client.get(self.url) response_obj = response.json() self.assertEqual(response.status_code, status.HTTP_200_OK) self.assertIsInstance(response_obj, dict) @@ -337,7 +341,7 @@ def test_overview_response(self): @override_settings(CONFIG_PUBLIC={}) def test_overview_response_no_config(self): # test overview response when individuals count < threshold - response = self.client.get('/api/public_overview') + response = self.client.get(self.url) response_obj = response.json() self.assertIsInstance(response_obj, dict) self.assertEqual(response_obj, dres.NO_PUBLIC_DATA_AVAILABLE) @@ -430,6 +434,7 @@ def test_public_dataset_response_no_config(self): self.assertIsInstance(response_obj, dict) self.assertEqual(response_obj, dres.NO_PUBLIC_DATA_AVAILABLE) + class DiscoverySchemaTest(APITestCase): @override_settings(CONFIG_PUBLIC=DISCOVERY_CONFIG_TEST) def test_discover_schema(self): From e39c17e61cbfd146c9743a469b643caf57ff5edf Mon Sep 17 00:00:00 2001 From: Victor Rocheleau Date: Fri, 7 Jun 2024 18:53:48 +0000 Subject: [PATCH 23/38] fix scoped overview counts early return --- chord_metadata_service/discovery/api_views.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/chord_metadata_service/discovery/api_views.py b/chord_metadata_service/discovery/api_views.py index 5b384d1e3..34cd861da 100644 --- a/chord_metadata_service/discovery/api_views.py +++ b/chord_metadata_service/discovery/api_views.py @@ -154,14 +154,13 @@ async def public_overview(request: DrfRequest): return Response(dres.NO_PUBLIC_DATA_AVAILABLE, status=status.HTTP_404_NOT_FOUND) async def _counts_for_scoped_model_name(mn: str) -> tuple[str, int]: - if project_id and not dataset_id: - scope = "project" - value = project_id - elif project_id and dataset_id: + if dataset_id: scope = "dataset" value = dataset_id - else: - # cannot scope + elif project_id and not dataset_id: + scope = "project" + value = project_id + elif not project_id and not dataset_id: return await _counts_for_model_name(mn) filter_query = PUBLIC_MODEL_NAMES_TO_SCOPE_FILTERS[mn][scope]["filter"] prefetch = PUBLIC_MODEL_NAMES_TO_SCOPE_FILTERS[mn][scope]["prefetch_related"] From e26af09a100bbd115f3e4c9fd690fc135c273170 Mon Sep 17 00:00:00 2001 From: Victor Rocheleau Date: Fri, 7 Jun 2024 20:35:42 +0000 Subject: [PATCH 24/38] phenopacket schema and subschema api test --- .../phenopackets/tests/test_api.py | 22 +++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/chord_metadata_service/phenopackets/tests/test_api.py b/chord_metadata_service/phenopackets/tests/test_api.py index 08e512fa7..1f24e942c 100644 --- a/chord_metadata_service/phenopackets/tests/test_api.py +++ b/chord_metadata_service/phenopackets/tests/test_api.py @@ -5,6 +5,8 @@ from django.urls import reverse from rest_framework import status from rest_framework.test import APITestCase + +from chord_metadata_service.phenopackets.schemas import PHENOPACKET_SCHEMA from . import constants as c from .. import models as m, serializers as s @@ -432,3 +434,23 @@ def test_get_phenopackets_with_authz_dataset_5(self): self.assertEqual(response.status_code, status.HTTP_200_OK) response_data = response.json() self.assertEqual(len(response_data["results"]), 0) + + +class PhenopacketSchema(APITestCase): + + def test_get_phenopacket_schema(self): + response = self.client.get("/api/schemas/phenopacket") + schema = response.json() + self.assertEqual(response.status_code, status.HTTP_200_OK) + self.assertEqual(schema, PHENOPACKET_SCHEMA) + + def test_get_phenopacket_subschemas(self): + response_biosample = self.client.get("/api/schemas/phenopacket/biosample") + biosample_schema = response_biosample.json() + self.assertEqual(response_biosample.status_code, status.HTTP_200_OK) + self.assertEqual(biosample_schema, PHENOPACKET_SCHEMA["properties"]["biosamples"]["items"]) + + response_individual = self.client.get("/api/schemas/phenopacket/individual") + individual_schema = response_individual.json() + self.assertEqual(response_individual.status_code, status.HTTP_200_OK) + self.assertEqual(individual_schema, PHENOPACKET_SCHEMA["properties"]["subject"]) From bc2282279dda8dc6e12d3f2da74a77d5616e3d4c Mon Sep 17 00:00:00 2001 From: Victor Rocheleau Date: Sat, 8 Jun 2024 00:35:55 +0000 Subject: [PATCH 25/38] fix url schema resolving --- chord_metadata_service/experiments/schemas.py | 10 ++-------- chord_metadata_service/metadata/settings.py | 2 +- chord_metadata_service/patients/schemas.py | 8 +------- chord_metadata_service/phenopackets/schemas.py | 12 +++--------- 4 files changed, 7 insertions(+), 25 deletions(-) diff --git a/chord_metadata_service/experiments/schemas.py b/chord_metadata_service/experiments/schemas.py index 42b3f6e4e..c4545b1ea 100644 --- a/chord_metadata_service/experiments/schemas.py +++ b/chord_metadata_service/experiments/schemas.py @@ -1,11 +1,9 @@ -from pathlib import Path - from django.conf import settings from referencing import Registry, Resource from .descriptions import EXPERIMENT, EXPERIMENT_RESULT, INSTRUMENT from chord_metadata_service.restapi.constants import MODEL_ID_PATTERN from chord_metadata_service.restapi.schemas import ONTOLOGY_CLASS_LIST, KEY_VALUE_OBJECT -from chord_metadata_service.restapi.schema_utils import tag_ids_and_describe, get_schema_app_id, sub_schema_uri +from chord_metadata_service.restapi.schema_utils import tag_ids_and_describe, sub_schema_uri __all__ = [ "DATA_FILE_OR_RECORD_URL_SCHEMA", @@ -16,11 +14,7 @@ "INSTRUMENT_SCHEMA", ] -if settings.SCHEMAS_BASE_URL: - base_uri = settings.SCHEMAS_BASE_URL -else: - base_uri = get_schema_app_id(Path(__file__).parent.name) -experiment_base_uri = sub_schema_uri(base_uri, "experiment") +experiment_base_uri = f"{settings.SCHEMAS_BASE_URL}/experiment" DATA_FILE_OR_RECORD_URL_SCHEMA = { "$schema": "http://json-schema.org/draft-07/schema#", diff --git a/chord_metadata_service/metadata/settings.py b/chord_metadata_service/metadata/settings.py index 7ade5b174..78b2b796c 100644 --- a/chord_metadata_service/metadata/settings.py +++ b/chord_metadata_service/metadata/settings.py @@ -53,7 +53,7 @@ if CHORD_URL: SCHEMAS_BASE_URL = CHORD_URL + "/api/metadata/api/schemas" else: - SCHEMAS_BASE_URL = None + SCHEMAS_BASE_URL = "/chord_metadata_service/schemas" # SECURITY WARNING: Don't run with CHORD_PERMISSIONS turned off in production, # unless an alternative permissions system is in place. diff --git a/chord_metadata_service/patients/schemas.py b/chord_metadata_service/patients/schemas.py index 4accd6816..148cc3571 100644 --- a/chord_metadata_service/patients/schemas.py +++ b/chord_metadata_service/patients/schemas.py @@ -9,20 +9,14 @@ string_with_pattern, enum_of, tag_ids_and_describe, - get_schema_app_id, sub_schema_uri, ) from chord_metadata_service.restapi.schemas import ONTOLOGY_CLASS, EXTRA_PROPERTIES_SCHEMA, TIME_ELEMENT_SCHEMA -from pathlib import Path from .descriptions import INDIVIDUAL, VITAL_STATUS from .values import Sex, KaryotypicSex -if settings.SCHEMAS_BASE_URL: - base_uri = settings.SCHEMAS_BASE_URL -else: - base_uri = get_schema_app_id(Path(__file__).parent.name) -phenopacket_base_uri = sub_schema_uri(base_uri=base_uri, name="phenopacket") +phenopacket_base_uri = f"{settings.SCHEMAS_BASE_URL}/phenopacket" VITAL_STATUS_SCHEMA = tag_ids_and_describe({ "$schema": DRAFT_07, diff --git a/chord_metadata_service/phenopackets/schemas.py b/chord_metadata_service/phenopackets/schemas.py index 5c006669d..298f1d6ce 100644 --- a/chord_metadata_service/phenopackets/schemas.py +++ b/chord_metadata_service/phenopackets/schemas.py @@ -1,6 +1,5 @@ # Individual schemas for validation of JSONField values import json -from pathlib import Path from django.conf import settings from referencing import Registry, Resource from referencing.jsonschema import DRAFT7 @@ -26,7 +25,6 @@ named_one_of, sub_schema_uri, describe_schema, - get_schema_app_id ) from . import descriptions @@ -62,15 +60,11 @@ "phenopacket_resolver", ] -if settings.SCHEMAS_BASE_URL: - base_uri = settings.SCHEMAS_BASE_URL -else: - base_uri = get_schema_app_id(Path(__file__).parent.name) -phenopacket_base_uri = sub_schema_uri(base_uri=base_uri, name="phenopacket") +phenopacket_base_uri = f"{settings.SCHEMAS_BASE_URL}/phenopacket" with open("chord_metadata_service/vrs/schema/vrs.json", "r") as file: vrs_schema_definitions = json.load(file) - vrs_schema_definitions["$id"] = sub_schema_uri(base_uri, "vrs") + vrs_schema_definitions["$id"] = sub_schema_uri(phenopacket_base_uri, "vrs") file.close() @@ -680,4 +674,4 @@ VRS_REF_REGISTRY = VRS_REF_RESOURCE @ Registry() resolver = VRS_REF_REGISTRY.resolver() -VRS_VARIATION_SCHEMA = resolver.lookup(sub_schema_uri(base_uri, "vrs#/definitions/Variation")).contents +VRS_VARIATION_SCHEMA = resolver.lookup(sub_schema_uri(phenopacket_base_uri, "vrs#/definitions/Variation")).contents From 0ae80a31f85275a4895cf01c9c657c7973b04a98 Mon Sep 17 00:00:00 2001 From: Victor Rocheleau Date: Wed, 12 Jun 2024 17:33:07 -0400 Subject: [PATCH 26/38] extend discovery config to censorship/summaries/individuals, tests, rules endpoint --- chord_metadata_service/chord/views_search.py | 26 ++++- chord_metadata_service/discovery/api_views.py | 96 ++++++------------- .../discovery/censorship.py | 18 ++-- chord_metadata_service/discovery/fields.py | 60 +++++++----- chord_metadata_service/discovery/stats.py | 27 ++++-- .../discovery/tests/test_api.py | 45 ++++++++- .../discovery/tests/test_censorship.py | 29 +++--- .../discovery/tests/test_fields.py | 70 ++++++++------ .../discovery/tests/test_stats.py | 4 +- chord_metadata_service/discovery/types.py | 14 ++- chord_metadata_service/discovery/utils.py | 57 +++++++++++ .../experiments/summaries.py | 53 +++++----- chord_metadata_service/patients/api_views.py | 40 +++++--- chord_metadata_service/patients/summaries.py | 13 +-- .../phenopackets/summaries.py | 42 ++++---- chord_metadata_service/restapi/api_views.py | 25 +++-- chord_metadata_service/restapi/urls.py | 2 + 17 files changed, 387 insertions(+), 234 deletions(-) create mode 100644 chord_metadata_service/discovery/utils.py diff --git a/chord_metadata_service/chord/views_search.py b/chord_metadata_service/chord/views_search.py index fcc2d860d..affdb0a33 100644 --- a/chord_metadata_service/chord/views_search.py +++ b/chord_metadata_service/chord/views_search.py @@ -23,6 +23,9 @@ from typing import Callable from chord_metadata_service.chord.permissions import OverrideOrSuperUserOnly, ReadOnly +from chord_metadata_service.discovery.exceptions import DiscoveryConfigException +from chord_metadata_service.discovery.types import DiscoveryConfig +from chord_metadata_service.discovery.utils import get_discovery from chord_metadata_service.logger import logger from chord_metadata_service.experiments.api_views import EXPERIMENT_SELECT_REL, EXPERIMENT_PREFETCH @@ -47,12 +50,20 @@ OUTPUT_FORMAT_BENTO_SEARCH_RESULT = "bento_search_result" -async def experiment_dataset_summary(_request: DrfRequest, dataset): - return await dt_experiment_summary(Experiment.objects.filter(dataset=dataset), low_counts_censored=False) +async def experiment_dataset_summary(_request: DrfRequest, dataset: Dataset, discovery: DiscoveryConfig): + return await dt_experiment_summary( + Experiment.objects.filter(dataset=dataset), + discovery, + low_counts_censored=False + ) -async def phenopacket_dataset_summary(_request: DrfRequest, dataset: Dataset): - return await dt_phenopacket_summary(Phenopacket.objects.filter(dataset=dataset), low_counts_censored=False) +async def phenopacket_dataset_summary(_request: DrfRequest, dataset: Dataset, discovery: DiscoveryConfig): + return await dt_phenopacket_summary( + Phenopacket.objects.filter(dataset=dataset), + discovery, + low_counts_censored=False + ) # TODO: CHORD-standardized logging @@ -550,10 +561,15 @@ def private_dataset_search(request: DrfRequest, dataset_id: str): async def dataset_summary(request: DrfRequest, dataset_id: str): # TODO: PERMISSIONS + try: + discovery = await get_discovery(dataset_id=dataset_id) + except DiscoveryConfigException as e: + return Response(e.message, status=status.HTTP_404_NOT_FOUND) + dataset = await Dataset.objects.aget(identifier=dataset_id) summaries = await asyncio.gather( - *map(lambda dt: DATASET_DATA_TYPE_SUMMARY_FUNCTIONS[dt](request, dataset), + *map(lambda dt: DATASET_DATA_TYPE_SUMMARY_FUNCTIONS[dt](request, dataset, discovery), DATASET_DATA_TYPE_SUMMARY_FUNCTIONS.keys()) ) diff --git a/chord_metadata_service/discovery/api_views.py b/chord_metadata_service/discovery/api_views.py index 34cd861da..8145f3680 100644 --- a/chord_metadata_service/discovery/api_views.py +++ b/chord_metadata_service/discovery/api_views.py @@ -2,7 +2,6 @@ from adrf.decorators import api_view from django.conf import settings -from django.core.exceptions import ObjectDoesNotExist from drf_spectacular.utils import extend_schema, inline_serializer from rest_framework import serializers, status from rest_framework.decorators import permission_classes @@ -10,7 +9,9 @@ from rest_framework.request import Request as DrfRequest from rest_framework.response import Response +from chord_metadata_service.discovery.censorship import RULES_NO_PERMISSIONS from chord_metadata_service.discovery.exceptions import DiscoveryConfigException +from chord_metadata_service.discovery.utils import get_request_discovery from . import responses as dres from .types import BinWithValue @@ -22,48 +23,6 @@ from .schemas import DISCOVERY_SCHEMA -async def _get_project_discovery(project_id: str = None, project: cm.Project = None) -> dict: - if not project and project_id: - # retrieve project by ID if not provided - project = await cm.Project.objects.aget(identifier=project_id) - if not project.discovery: - # fallback on global discovery config if project has none - return settings.CONFIG_PUBLIC - return project.discovery - - -async def _get_dataset_discovery(dataset_id: str) -> dict: - dataset = await cm.Dataset.objects.aget(identifier=dataset_id) - if not dataset.discovery: - project = await cm.Project.objects.aget(datasets=dataset_id) - return await _get_project_discovery(project=project) - return dataset.discovery - - -async def _get_discovery(request: DrfRequest) -> dict: - dataset_id = request.query_params.get("dataset") - project_id = request.query_params.get("project") - if dataset_id and project_id: - # check if the dataset belongs to the project - is_scope_valid = await cm.Dataset.objects.filter( - identifier=dataset_id, - project__identifier=project_id, - ).aexists() - if not is_scope_valid: - raise DiscoveryConfigException(dataset_id, project_id) - try: - if dataset_id: - # get dataset's discovery config if dataset_id is passed - return await _get_dataset_discovery(dataset_id) - elif project_id: - # get project's discovery config if project_id is passed and dataset_id is not - return await _get_project_discovery(project_id=project_id) - except ObjectDoesNotExist: - raise DiscoveryConfigException(dataset_id, project_id) - # fallback to config.json when no dataset or project is in the request - return settings.CONFIG_PUBLIC - - @extend_schema( description="Public search fields with their configuration", responses={ @@ -86,25 +45,23 @@ async def public_search_fields(request: DrfRequest): """ try: - config_public = await _get_discovery(request) + discovery = await get_request_discovery(request) except DiscoveryConfigException as e: return Response(e.message, status=status.HTTP_404_NOT_FOUND) - if not config_public: + if not discovery: return Response(dres.NO_PUBLIC_FIELDS_CONFIGURED, status=status.HTTP_404_NOT_FOUND) - field_conf = config_public["fields"] - # Note: the array is wrapped in a dictionary structure to help with JSON # processing by some services. async def _get_field_response(field) -> dict | None: - field_props = field_conf[field] + field_props = discovery.get("fields", {}).get(field, {}) return { **field_props, "id": field, - "options": await get_field_options(field_props, low_counts_censored=True), + "options": await get_field_options(field, discovery=discovery, low_counts_censored=True), } async def _get_section_response(section) -> dict: @@ -114,7 +71,7 @@ async def _get_section_response(section) -> dict: } return Response({ - "sections": await asyncio.gather(*map(_get_section_response, config_public["search"])), + "sections": await asyncio.gather(*map(_get_section_response, discovery["search"])), }) @@ -144,13 +101,13 @@ async def public_overview(request: DrfRequest): """ try: - config_public = await _get_discovery(request) + discovery = await get_request_discovery(request) except DiscoveryConfigException as e: return Response(e.message, status=status.HTTP_404_NOT_FOUND) dataset_id = request.query_params.get("dataset", None) project_id = request.query_params.get("project", None) - if not config_public: + if not discovery: return Response(dres.NO_PUBLIC_DATA_AVAILABLE, status=status.HTTP_404_NOT_FOUND) async def _counts_for_scoped_model_name(mn: str) -> tuple[str, int]: @@ -173,7 +130,7 @@ async def _counts_for_scoped_model_name(mn: str) -> tuple[str, int]: # Get the rules config - because we used get_config_public_and_field_set_permissions with no arguments, it'll choose # these values based on if we have access to ALL public fields or not. - rules_config = config_public["rules"] + rules_config = discovery["rules"] count_threshold = rules_config["count_threshold"] # Set counts to 0 if they're under the count threshold, and we don't have full data access permissions for the @@ -184,42 +141,40 @@ async def _counts_for_scoped_model_name(mn: str) -> tuple[str, int]: counts[public_model_name] = 0 response = { - "layout": config_public["overview"], + "layout": discovery["overview"], "fields": {}, "counts": { "individuals": counts["individual"], "biosamples": counts["biosample"], "experiments": counts["experiment"], }, - # TODO: remove these in favour of public_rules endpoint - "max_query_parameters": rules_config["max_query_parameters"], - "count_threshold": count_threshold, } # Parse the public config to gather data for each field defined in the overview - fields = [chart["field"] for section in config_public["overview"] for chart in section["charts"]] - field_conf = config_public["fields"] + fields = [chart["field"] for section in discovery["overview"] for chart in section["charts"]] + field_conf = discovery["fields"] - async def _get_field_response(field_id: str, field_props: dict) -> dict: + async def _get_field_response(field: str) -> dict: + field_props = field_conf.get(field, {"datatype": None}) stats: list[BinWithValue] | None if field_props["datatype"] == "string": - stats = await get_categorical_stats(field_props, project_id, dataset_id, low_counts_censored=True) + stats = await get_categorical_stats(field, discovery, project_id, dataset_id, low_counts_censored=True) elif field_props["datatype"] == "number": - stats = await get_range_stats(field_props, project_id, dataset_id, low_counts_censored=True) + stats = await get_range_stats(field, discovery, project_id, dataset_id, low_counts_censored=True) elif field_props["datatype"] == "date": - stats = await get_date_stats(field_props, project_id, dataset_id, low_counts_censored=True) + stats = await get_date_stats(field, discovery, project_id, dataset_id, low_counts_censored=True) else: raise NotImplementedError() return { **field_props, - "id": field_id, + "id": field, **({"data": stats} if stats is not None else {}), } # Parallel async collection of field responses for public overview - field_responses = await asyncio.gather(*(_get_field_response(field, field_conf[field]) for field in fields)) + field_responses = await asyncio.gather(*(_get_field_response(field) for field in fields)) for field, field_res in zip(fields, field_responses): response["fields"][field] = field_res @@ -261,3 +216,14 @@ async def public_dataset(_request: DrfRequest): @permission_classes([AllowAny]) async def discovery_schema(_request: DrfRequest): return Response(DISCOVERY_SCHEMA) + + +@api_view(["GET"]) +@permission_classes([AllowAny]) +async def public_rules(request: DrfRequest): + try: + discovery = await get_request_discovery(request) + except DiscoveryConfigException as e: + return Response(e.message, status=status.HTTP_404_NOT_FOUND) + rules = discovery["rules"] if discovery and "rules" in discovery else RULES_NO_PERMISSIONS + return Response(rules, status=status.HTTP_200_OK) diff --git a/chord_metadata_service/discovery/censorship.py b/chord_metadata_service/discovery/censorship.py index 85cb34484..05924d750 100644 --- a/chord_metadata_service/discovery/censorship.py +++ b/chord_metadata_service/discovery/censorship.py @@ -1,6 +1,6 @@ import sys -from django.conf import settings +from chord_metadata_service.discovery.types import DiscoveryConfig __all__ = [ "RULES_NO_PERMISSIONS", @@ -16,27 +16,27 @@ } -def get_threshold(low_counts_censored: bool) -> int: +def get_threshold(discovery: DiscoveryConfig, low_counts_censored: bool) -> int: """ Gets the maximum count threshold for hiding censored data (i.e., rounding to 0). """ if not low_counts_censored: return 0 - if not settings.CONFIG_PUBLIC: + if not discovery: return RULES_NO_PERMISSIONS["count_threshold"] - return settings.CONFIG_PUBLIC["rules"]["count_threshold"] + return discovery["rules"]["count_threshold"] -def thresholded_count(c: int, low_counts_censored: bool) -> int: - return 0 if c <= get_threshold(low_counts_censored) else c +def thresholded_count(c: int, discovery: DiscoveryConfig, low_counts_censored: bool) -> int: + return 0 if c <= get_threshold(discovery, low_counts_censored) else c -def get_max_query_parameters(low_counts_censored: bool) -> int: +def get_max_query_parameters(discovery: DiscoveryConfig, low_counts_censored: bool) -> int: """ Gets the maximum number of query parameters allowed for censored discovery. """ if not low_counts_censored: return sys.maxsize - if not settings.CONFIG_PUBLIC: + if not discovery: return RULES_NO_PERMISSIONS["max_query_parameters"] - return settings.CONFIG_PUBLIC["rules"]["max_query_parameters"] + return discovery["rules"]["max_query_parameters"] diff --git a/chord_metadata_service/discovery/fields.py b/chord_metadata_service/discovery/fields.py index 724951498..2928d8f40 100644 --- a/chord_metadata_service/discovery/fields.py +++ b/chord_metadata_service/discovery/fields.py @@ -11,7 +11,7 @@ from . import fields_utils as f_utils from .censorship import get_threshold, thresholded_count from .stats import stats_for_field, get_scoped_queryset -from .types import BinWithValue, DiscoveryFieldProps +from .types import BinWithValue, DiscoveryConfig, DiscoveryFieldProps LENGTH_Y_M = 4 + 1 + 2 # dates stored as yyyy-mm-dd @@ -31,11 +31,12 @@ async def get_field_bins(query_set: QuerySet, field: str, bin_size: int): return stats -async def get_field_options(field_props: DiscoveryFieldProps, low_counts_censored: bool) -> list[Any]: +async def get_field_options(field: str, discovery: DiscoveryConfig, low_counts_censored: bool) -> list[Any]: """ Given properties for a public field, return the list of authorized options for querying this field. """ + field_props = discovery.get("fields", {}).get(field) if field_props["datatype"] == "string": options = field_props["config"].get("enum") # Special case: no list of values specified @@ -43,7 +44,7 @@ async def get_field_options(field_props: DiscoveryFieldProps, low_counts_censore # We must be careful here not to leak 'small cell' values as options # - e.g., if there are three individuals with sex=UNKNOWN_SEX, this # should be treated as if the field isn't in the database at all. - options = await get_distinct_field_values(field_props, low_counts_censored) + options = await get_distinct_field_values(field, discovery, low_counts_censored) elif field_props["datatype"] == "number": options = [label for floor, ceil, label in f_utils.labelled_range_generator(field_props)] elif field_props["datatype"] == "date": @@ -59,20 +60,20 @@ async def get_field_options(field_props: DiscoveryFieldProps, low_counts_censore return options -async def get_distinct_field_values(field_props: DiscoveryFieldProps, low_counts_censored: bool) -> list[Any]: +async def get_distinct_field_values(field: str, discovery: DiscoveryConfig, low_counts_censored: bool) -> list[Any]: # We must be careful here not to leak 'small cell' values as options # - e.g., if there are three individuals with sex=UNKNOWN_SEX, this # should be treated as if the field isn't in the database at all. + field_props = discovery.get("fields", {}).get(field) + model, mapping_field = f_utils.get_model_and_field(field_props["mapping"]) + threshold = get_threshold(discovery, low_counts_censored) - model, field = f_utils.get_model_and_field(field_props["mapping"]) - threshold = get_threshold(low_counts_censored) - - field_query = field + field_query = mapping_field if group_by := field_props.get("group_by"): # JSONField containing an array # use jsonb_path_query field expression - field_query = f_utils.get_jsonb_path_query(field, group_by) - values_with_counts = model.objects.values_list(field_query).annotate(count=Count(field)) + field_query = f_utils.get_jsonb_path_query(mapping_field, group_by) + values_with_counts = model.objects.values_list(field_query).annotate(count=Count(mapping_field)) return [ val @@ -103,7 +104,12 @@ async def compute_binned_ages(individual_queryset: QuerySet, bin_size: int) -> l return binned_ages -async def get_age_numeric_binned(individual_queryset: QuerySet, bin_size: int, low_counts_censored: bool) -> dict: +async def get_age_numeric_binned( + individual_queryset: QuerySet, + bin_size: int, + discovery: DiscoveryConfig, + low_counts_censored: bool +) -> dict: """ age_numeric is computed at ingestion time of phenopackets. On some instances it might be unavailable and as a fallback must be computed from the age JSON field which @@ -121,7 +127,7 @@ async def get_age_numeric_binned(individual_queryset: QuerySet, bin_size: int, l ) return { - b: thresholded_count(bv, low_counts_censored) + b: thresholded_count(bv, discovery, low_counts_censored) for b, bv in individuals_age.items() } @@ -165,12 +171,14 @@ async def get_month_date_range(field_props: DiscoveryFieldProps) -> tuple[str | async def get_range_stats( - field_props: DiscoveryFieldProps, + field: str, + discovery: DiscoveryConfig, project_id: str | None = None, dataset_id: str | None = None, low_counts_censored: bool = True ) -> list[BinWithValue]: - model, field = f_utils.get_model_and_field(field_props["mapping"]) + field_props = discovery.get("fields", {}).get(field) + model, field_mapping = f_utils.get_model_and_field(field_props["mapping"]) # JSONField array specific field props group_by = field_props.get("group_by") @@ -190,8 +198,8 @@ async def get_range_stats( else: whens = [ When( - **{f"{field}__gte": floor} if floor is not None else {}, - **{f"{field}__lt": ceil} if ceil is not None else {}, + **{f"{field_mapping}__gte": floor} if floor is not None else {}, + **{f"{field_mapping}__lt": ceil} if ceil is not None else {}, then=Value(label), ) for floor, ceil, label in f_utils.labelled_range_generator(field_props) @@ -206,7 +214,7 @@ async def get_range_stats( # Maximum number of entries needed to round a count from its true value down to 0 (censored discovery) stats: dict[str, int] = dict() async for item in query_set: - stats[item["label"]] = thresholded_count(item["total"], low_counts_censored) + stats[item["label"]] = thresholded_count(item["total"], discovery, low_counts_censored) # All the bins between start and end must be represented and ordered bins: list[BinWithValue] = [ @@ -221,7 +229,8 @@ async def get_range_stats( async def get_categorical_stats( - field_props: DiscoveryFieldProps, + field: str, + discovery: DiscoveryConfig, project_id: str | None = None, dataset_id: str | None = None, low_counts_censored: bool = True @@ -229,7 +238,7 @@ async def get_categorical_stats( """ Fetches statistics for a given categorical field and apply privacy policies """ - + field_props = discovery.get("fields", {}).get(field) model, field_name = f_utils.get_model_and_field(field_props["mapping"]) # Collect stats for the field, censoring low cell counts along the way @@ -237,7 +246,7 @@ async def get_categorical_stats( # database - i.e., if the label is pulled from the values in the database, someone could otherwise learn # 1 <= this field <= threshold given it being present at all. # - stats_for_field(...) handles this! - stats: Mapping[str, int] = await stats_for_field(model, field_name, low_counts_censored, + stats: Mapping[str, int] = await stats_for_field(model, field_name, discovery, low_counts_censored, add_missing=True, group_by=field_props.get("group_by"), project_id=project_id, dataset_id=dataset_id) @@ -267,7 +276,8 @@ async def get_categorical_stats( async def get_date_stats( - field_props: DiscoveryFieldProps, + field: str, + discovery: DiscoveryConfig, project_id: str | None = None, dataset_id: str | None = None, low_counts_censored: bool = True, @@ -280,6 +290,10 @@ async def get_date_stats( regular fields when needed. TODO: for now only dates binned by month are handled """ + field_props = discovery.get("fields", {}).get(field) + if not field_props: + msg = f"Field {field} is not in the provided discovery config." + raise NotImplementedError(msg) if (bin_by := field_props["config"]["bin_by"]) != "month": msg = f"Binning dates by `{bin_by}` method not implemented" @@ -324,12 +338,12 @@ async def get_date_stats( label = f"{month_abbr[month].capitalize()} {year}" # convert key as yyyy-mm to `abbreviated month yyyy` bins.append({ "label": label, - "value": thresholded_count(stats.get(key, 0), low_counts_censored), + "value": thresholded_count(stats.get(key, 0), discovery, low_counts_censored), }) # Append missing items at the end if any if "missing" in stats: - bins.append({"label": "missing", "value": thresholded_count(stats["missing"], low_counts_censored)}) + bins.append({"label": "missing", "value": thresholded_count(stats["missing"], discovery, low_counts_censored)}) return bins diff --git a/chord_metadata_service/discovery/stats.py b/chord_metadata_service/discovery/stats.py index 431d3caad..9200e393c 100644 --- a/chord_metadata_service/discovery/stats.py +++ b/chord_metadata_service/discovery/stats.py @@ -5,7 +5,7 @@ from .censorship import thresholded_count from .fields_utils import get_jsonb_path_query, get_public_model_name from .model_lookups import PUBLIC_MODEL_NAMES_TO_SCOPE_FILTERS -from .types import BinWithValue +from .types import BinWithValue, DiscoveryConfig __all__ = [ "individual_experiment_type_stats", @@ -18,7 +18,7 @@ async def individual_experiment_type_stats( - queryset: QuerySet, low_counts_censored: bool + queryset: QuerySet, discovery: DiscoveryConfig, low_counts_censored: bool ) -> tuple[int, list[BinWithValue]]: """ Used for a fixed-response public API and beacon. @@ -29,12 +29,13 @@ async def individual_experiment_type_stats( queryset .values(label=F("phenopackets__biosamples__experiment__experiment_type")) .annotate(value=Count("phenopackets__biosamples__experiment", distinct=True)), + discovery, low_counts_censored, ) async def individual_biosample_tissue_stats( - queryset: QuerySet, low_counts_censored: bool + queryset: QuerySet, discovery: DiscoveryConfig, low_counts_censored: bool ) -> tuple[int, list[BinWithValue]]: """ Used for a fixed-response public API and beacon. @@ -44,12 +45,14 @@ async def individual_biosample_tissue_stats( queryset .values(label=F("phenopackets__biosamples__sampled_tissue__label")) .annotate(value=Count("phenopackets__biosamples", distinct=True)), + discovery, low_counts_censored, ) async def bento_public_format_count_and_stats_list( annotated_queryset: QuerySet, + discovery: DiscoveryConfig, low_counts_censored: bool, ) -> tuple[int, list[BinWithValue]]: stats_list: list[BinWithValue] = [] @@ -57,7 +60,7 @@ async def bento_public_format_count_and_stats_list( async for q in annotated_queryset: label = q["label"] - value = thresholded_count(int(q["value"]), low_counts_censored) + value = thresholded_count(int(q["value"]), discovery, low_counts_censored) # Be careful not to leak values if they're in the database but below threshold if value == 0: @@ -70,7 +73,7 @@ async def bento_public_format_count_and_stats_list( total += value stats_list.append({"label": label, "value": value}) - return thresholded_count(total, low_counts_censored), stats_list + return thresholded_count(total, discovery, low_counts_censored), stats_list def get_scoped_queryset( @@ -95,6 +98,7 @@ def get_scoped_queryset( async def stats_for_field( model: Type[Model], field: str, + discovery: DiscoveryConfig, low_counts_censored: bool, add_missing: bool = False, group_by: str | None = None, @@ -107,11 +111,16 @@ async def stats_for_field( """ qs = get_scoped_queryset(model, project_id, dataset_id) return await queryset_stats_for_field( - qs, field, low_counts_censored=low_counts_censored, add_missing=add_missing, group_by=group_by) + qs, field, discovery, low_counts_censored=low_counts_censored, add_missing=add_missing, group_by=group_by) async def queryset_stats_for_field( - queryset: QuerySet, field: str, low_counts_censored: bool, add_missing: bool = False, group_by: str | None = None + queryset: QuerySet, + field: str, + discovery: DiscoveryConfig, + low_counts_censored: bool, + add_missing: bool = False, + group_by: str | None = None ) -> Mapping[str, int]: """ Computes counts of distinct values for a queryset. @@ -143,12 +152,12 @@ async def queryset_stats_for_field( # Censor low cell counts if necessary - we don't want to betray that the value even exists in the database if # we have a low count for it. - if thresholded_count(item["total"], low_counts_censored) == 0: + if thresholded_count(item["total"], discovery, low_counts_censored) == 0: continue stats[key] = item["total"] if add_missing: - stats["missing"] = thresholded_count(num_missing, low_counts_censored) + stats["missing"] = thresholded_count(num_missing, discovery, low_counts_censored) return stats diff --git a/chord_metadata_service/discovery/tests/test_api.py b/chord_metadata_service/discovery/tests/test_api.py index ece1cc4a7..e60b836f9 100644 --- a/chord_metadata_service/discovery/tests/test_api.py +++ b/chord_metadata_service/discovery/tests/test_api.py @@ -12,7 +12,9 @@ from chord_metadata_service.chord import models as ch_m from chord_metadata_service.chord.tests import constants as ch_c from chord_metadata_service.discovery import responses as dres +from chord_metadata_service.discovery.censorship import RULES_NO_PERMISSIONS from chord_metadata_service.discovery.schemas import DISCOVERY_SCHEMA +from chord_metadata_service.discovery.types import DiscoveryConfig from chord_metadata_service.phenopackets import models as ph_m from chord_metadata_service.phenopackets.tests import constants as ph_c from chord_metadata_service.experiments import models as exp_m @@ -236,7 +238,7 @@ def setUp(self) -> None: "experiments": exp_m.Experiment.objects.all().count() } - def assert_counts_censored(self, overview_response: dict, discovery: dict): + def assert_counts_censored(self, overview_response: dict, discovery: DiscoveryConfig): count_threshold = discovery["rules"]["count_threshold"] for data_type in self.data_type_counts.keys(): response_count = overview_response["counts"][data_type] @@ -245,7 +247,7 @@ def assert_counts_censored(self, overview_response: dict, discovery: dict): else: self.assertEqual(response_count, self.data_type_counts[data_type]) - def assert_scoped_fields(self, overview_response: dict, discovery: dict): + def assert_scoped_fields(self, overview_response: dict, discovery: DiscoveryConfig): self.assertSetEqual( set(field for field in overview_response["fields"].keys()), set(chart["field"] for section in discovery["overview"] for chart in section["charts"]) @@ -441,3 +443,42 @@ def test_discover_schema(self): response = self.client.get(reverse("discovery-schema")) self.assertEqual(response.status_code, status.HTTP_200_OK) self.assertEqual(response.json(), DISCOVERY_SCHEMA) + + +class DiscoveryRulesTest(ScopedDiscoveryTestCase): + @override_settings(CONFIG_PUBLIC=DISCOVERY_CONFIG_TEST) + def test_discovery_rules(self): + # Node scope + url = reverse("public-rules") + response = self.client.get(url) + self.assertEqual(response.status_code, status.HTTP_200_OK) + self.assertEqual(response.json(), DISCOVERY_CONFIG_TEST["rules"]) + + # PROJECTS + response_p_a = self.client.get(f"{url}?project={self.id_proj_a}") + self.assertEqual(response_p_a.status_code, status.HTTP_200_OK) + self.assertEqual(response_p_a.json(), DISCOVERY_CONFIG_TEST["rules"]) # node discovery fallback + + response_p_b = self.client.get(f"{url}?project={self.id_proj_b}") + self.assertEqual(response_p_b.status_code, status.HTTP_200_OK) + self.assertEqual(response_p_b.json(), self.project_b.discovery["rules"]) + + # Dataset scope + response_d_a = self.client.get(f"{url}?dataset={self.id_ds_a}") + self.assertEqual(response_d_a.status_code, status.HTTP_200_OK) + self.assertEqual(response_d_a.json(), self.dataset_a.discovery["rules"]) + + response_d_b = self.client.get(f"{url}?dataset={self.id_ds_b}") + self.assertEqual(response_d_b.status_code, status.HTTP_200_OK) + self.assertEqual(response_d_b.json(), self.project_b.discovery["rules"]) + + @override_settings(CONFIG_PUBLIC={}) + def test_discovery_exp(self): + # Node scope not configured + url = reverse("public-rules") + response = self.client.get(url) + self.assertEqual(response.status_code, status.HTTP_200_OK) + self.assertEqual(response.json(), RULES_NO_PERMISSIONS) + + response_exp = self.client.get(f"{url}?project={self.id_proj_a}&dataset={self.id_ds_b}") + self.assertEqual(response_exp.status_code, status.HTTP_404_NOT_FOUND) diff --git a/chord_metadata_service/discovery/tests/test_censorship.py b/chord_metadata_service/discovery/tests/test_censorship.py index a3f3478a8..04719cccc 100644 --- a/chord_metadata_service/discovery/tests/test_censorship.py +++ b/chord_metadata_service/discovery/tests/test_censorship.py @@ -10,33 +10,30 @@ class CensorshipGetThresholdTest(TestCase): # get_threshold(...) def test_get_threshold_no_censorship(self): - self.assertEqual(get_threshold(low_counts_censored=False), 0) + self.assertEqual(get_threshold({}, low_counts_censored=False), 0) @override_settings(CONFIG_PUBLIC={}) def test_get_threshold_no_config(self): # no public config configured - self.assertEqual(get_threshold(low_counts_censored=True), sys.maxsize) + self.assertEqual(get_threshold({}, low_counts_censored=True), sys.maxsize) class CensorshipThresholdedCountTest(TestCase): - @override_settings(CONFIG_PUBLIC=DISCOVERY_CONFIG_TEST) def test_get_threshold_configured(self): - self.assertEqual(get_threshold(low_counts_censored=False), 0) - self.assertEqual(get_threshold(low_counts_censored=True), 5) + self.assertEqual(get_threshold(DISCOVERY_CONFIG_TEST, low_counts_censored=False), 0) + self.assertEqual(get_threshold(DISCOVERY_CONFIG_TEST, low_counts_censored=True), 5) # thresholded_count(...) def test_thresholded_count_no_censorship(self): - self.assertEqual(thresholded_count(1, low_counts_censored=False), 1) + self.assertEqual(thresholded_count(1, {}, low_counts_censored=False), 1) - @override_settings(CONFIG_PUBLIC={}) - def test_thresholded_count_no_config(self): # no public config configured - self.assertEqual(thresholded_count(100000, low_counts_censored=True), 0) + def test_thresholded_count_no_config(self): + self.assertEqual(thresholded_count(100000, {}, low_counts_censored=True), 0) - @override_settings(CONFIG_PUBLIC=DISCOVERY_CONFIG_TEST) def test_thresholded_count_configured(self): - self.assertEqual(thresholded_count(5, low_counts_censored=False), 5) - self.assertEqual(thresholded_count(5, low_counts_censored=True), 0) + self.assertEqual(thresholded_count(5, DISCOVERY_CONFIG_TEST, low_counts_censored=False), 5) + self.assertEqual(thresholded_count(5, DISCOVERY_CONFIG_TEST, low_counts_censored=True), 0) class CensorshipGetMaxQueryParametersTest(TestCase): @@ -44,13 +41,13 @@ class CensorshipGetMaxQueryParametersTest(TestCase): # get_max_query_parameters(...) def test_get_max_query_parameters_no_censorship(self): - self.assertEqual(get_max_query_parameters(low_counts_censored=False), sys.maxsize) + self.assertEqual(get_max_query_parameters({}, low_counts_censored=False), sys.maxsize) @override_settings(CONFIG_PUBLIC={}) def test_get_max_query_parameters_no_config(self): - self.assertEqual(get_max_query_parameters(low_counts_censored=True), 0) + self.assertEqual(get_max_query_parameters({}, low_counts_censored=True), 0) @override_settings(CONFIG_PUBLIC=DISCOVERY_CONFIG_TEST) def test_get_max_query_parameters_configured(self): - self.assertEqual(get_max_query_parameters(low_counts_censored=False), sys.maxsize) - self.assertEqual(get_max_query_parameters(low_counts_censored=True), 2) + self.assertEqual(get_max_query_parameters(DISCOVERY_CONFIG_TEST, low_counts_censored=False), sys.maxsize) + self.assertEqual(get_max_query_parameters(DISCOVERY_CONFIG_TEST, low_counts_censored=True), 2) diff --git a/chord_metadata_service/discovery/tests/test_fields.py b/chord_metadata_service/discovery/tests/test_fields.py index 615d1b18b..8dc905b30 100644 --- a/chord_metadata_service/discovery/tests/test_fields.py +++ b/chord_metadata_service/discovery/tests/test_fields.py @@ -1,7 +1,9 @@ from django.test import TransactionTestCase, override_settings from rest_framework.test import APITestCase +from copy import deepcopy from chord_metadata_service.chord.tests.helpers import ProjectTestCase +from chord_metadata_service.discovery.types import DiscoveryConfig from chord_metadata_service.patients import models as pa_m from chord_metadata_service.phenopackets.tests import constants as ph_c from chord_metadata_service.phenopackets import models as ph_m @@ -18,38 +20,34 @@ class TestGetFieldOptions(TransactionTestCase): - - field_some_prop = { - "datatype": "string", - "mapping": "individual/extra_properties/some_prop", - "title": "Some Prop", - "description": "Some property", - "config": { - "enum": ["a", "b"], - }, + discovery: DiscoveryConfig = { + "fields": { + "some_prop": { + "datatype": "string", + "mapping": "individual/extra_properties/some_prop", + "title": "Some Prop", + "description": "Some property", + "config": { + "enum": ["a", "b"], + }, + } + } } async def test_get_string_options(self): - self.assertListEqual(await get_field_options(self.field_some_prop, low_counts_censored=False), ["a", "b"]) + self.assertListEqual(await get_field_options("some_prop", self.discovery, False), ["a", "b"]) async def test_get_field_options_not_impl(self): + # {**self.field_some_prop, "datatype": "made_up"} + invalid_discovery = deepcopy(self.discovery) + invalid_discovery["fields"]["some_prop"]["datatype"] = "made_up" with self.assertRaises(NotImplementedError): # noinspection PyTypeChecker - await get_field_options({**self.field_some_prop, "datatype": "made_up"}, low_counts_censored=False) + await get_field_options("some_prop", invalid_discovery, low_counts_censored=False) class TestGetCategoricalStats(ProjectTestCase): - f_sex = { - "mapping": "individual/sex", - "datatype": "string", - "title": "Sex", - "description": "Sex", - "config": { - "enum": None, - }, - } - def setUp(self): self.individual_1 = pa_m.Individual.objects.create(**ph_c.VALID_INDIVIDUAL_1) self.meta_data = ph_m.MetaData.objects.create(**ph_c.VALID_META_DATA_1) @@ -61,13 +59,13 @@ def setUp(self): ) async def test_categorical_stats_lcf(self): - res = await get_categorical_stats(self.f_sex, project_id=self.project.identifier, + res = await get_categorical_stats("sex", DISCOVERY_CONFIG_TEST, project_id=self.project.identifier, dataset_id=self.dataset.identifier, low_counts_censored=False) self.assertListEqual(res, [{"label": "MALE", "value": 1}, {"label": "missing", "value": 0}]) @override_settings(CONFIG_PUBLIC=DISCOVERY_CONFIG_TEST) async def test_categorical_stats_lct(self): - res = await get_categorical_stats(self.f_sex, project_id=self.project.identifier, + res = await get_categorical_stats("sex", DISCOVERY_CONFIG_TEST, project_id=self.project.identifier, dataset_id=self.dataset.identifier, low_counts_censored=True) self.assertListEqual(res, [{"label": "missing", "value": 0}]) @@ -87,7 +85,7 @@ async def test_wrong_bin_config(self): } with self.assertRaises(NotImplementedError): - await get_date_stats(fp) + await get_date_stats("date_of_consent", DISCOVERY_CONFIG_TEST) with self.assertRaises(NotImplementedError): await get_month_date_range(fp) @@ -105,7 +103,7 @@ async def test_wrong_field_config(self): } with self.assertRaises(NotImplementedError): - await get_date_stats(fp) + await get_date_stats("date_of_consent", DISCOVERY_CONFIG_TEST) with self.assertRaises(NotImplementedError): await get_month_date_range(fp) @@ -133,8 +131,13 @@ def setUp(self) -> None: @override_settings(CONFIG_PUBLIC=DISCOVERY_CONFIG_TEST) async def test_json_categorical_stats_lcf(self): - res = await get_categorical_stats(self.dm_fp, project_id=self.project.identifier, - dataset_id=self.dataset.identifier, low_counts_censored=False) + res = await get_categorical_stats( + "diagnostic_markers", + DISCOVERY_CONFIG_TEST, + project_id=self.project.identifier, + dataset_id=self.dataset.identifier, + low_counts_censored=False + ) ground_truth = [ {"label": "Genetic Testing", "value": 1}, {"label": "Hematology Test", "value": 1}, @@ -144,8 +147,13 @@ async def test_json_categorical_stats_lcf(self): @override_settings(CONFIG_PUBLIC=DISCOVERY_CONFIG_TEST) async def test_json_categorical_stats_lct(self): - res = await get_categorical_stats(self.dm_fp, project_id=self.project.identifier, - dataset_id=self.dataset.identifier, low_counts_censored=True) + res = await get_categorical_stats( + "diagnostic_markers", + DISCOVERY_CONFIG_TEST, + project_id=self.project.identifier, + dataset_id=self.dataset.identifier, + low_counts_censored=True + ) ground_truth = [ {"label": "missing", "value": 0}, ] @@ -182,10 +190,10 @@ def test_filter_queryset_field_value_number(self): self.assertEqual(qs.count(), 0) async def test_get_distinct_values(self): - dm_values = await get_distinct_field_values(self.dm_fp, False) + dm_values = await get_distinct_field_values("diagnostic_markers", DISCOVERY_CONFIG_TEST, False) self.assertEqual(len(dm_values), 2) self.assertTrue("Genetic Testing" in dm_values) self.assertTrue("Hematology Test" in dm_values) - dm_values_censored = await get_distinct_field_values(self.dm_fp, True) + dm_values_censored = await get_distinct_field_values("diagnostic_markers", DISCOVERY_CONFIG_TEST, True) self.assertListEqual(dm_values_censored, []) diff --git a/chord_metadata_service/discovery/tests/test_stats.py b/chord_metadata_service/discovery/tests/test_stats.py index 5796d0eff..3425f3849 100644 --- a/chord_metadata_service/discovery/tests/test_stats.py +++ b/chord_metadata_service/discovery/tests/test_stats.py @@ -29,11 +29,11 @@ def setUp(self): self.experiment.experiment_results.set([self.experiment_result]) async def test_individual_biosample_tissue_stats(self): - count, res = await individual_biosample_tissue_stats(pa_m.Individual.objects.all(), low_counts_censored=False) + count, res = await individual_biosample_tissue_stats(pa_m.Individual.objects.all(), None, False) self.assertEqual(count, 1) self.assertListEqual(res, [{"label": "wall of urinary bladder", "value": 1}]) async def individual_experiment_type_stats(self): - count, res = await individual_experiment_type_stats(pa_m.Individual.objects.all(), low_counts_censored=False) + count, res = await individual_experiment_type_stats(pa_m.Individual.objects.all(), None, False) self.assertEqual(count, 1) self.assertListEqual(res, [{"label": "DNA Methylation", "value": 1}]) diff --git a/chord_metadata_service/discovery/types.py b/chord_metadata_service/discovery/types.py index ee41bfd81..f6677c739 100644 --- a/chord_metadata_service/discovery/types.py +++ b/chord_metadata_service/discovery/types.py @@ -1,11 +1,12 @@ from typing import Any, Literal, TypedDict __all__ = [ - "OverviewSectionChart", - "OverviewSection", + "BinWithValue", "DiscoveryFieldProps", "DiscoveryRules", - "BinWithValue", + "DiscoveryConfig", + "OverviewSectionChart", + "OverviewSection", ] @@ -38,6 +39,13 @@ class DiscoveryRules(TypedDict): count_threshold: int +class DiscoveryConfig(TypedDict): + overview: list[OverviewSection] + search: list[SearchSection] + fields: dict[str, DiscoveryFieldProps] + rules: DiscoveryRules + + class BinWithValue(TypedDict): label: str value: int diff --git a/chord_metadata_service/discovery/utils.py b/chord_metadata_service/discovery/utils.py new file mode 100644 index 000000000..0563073aa --- /dev/null +++ b/chord_metadata_service/discovery/utils.py @@ -0,0 +1,57 @@ +from django.conf import settings +from django.core.exceptions import ObjectDoesNotExist +from rest_framework.request import Request as DrfRequest + +from chord_metadata_service.discovery.exceptions import DiscoveryConfigException +from chord_metadata_service.discovery.types import DiscoveryConfig +from ..chord import models as cm + +__all__ = [ + "get_request_discovery" +] + + +async def _get_project_discovery(project_id: str = None, project: cm.Project = None) -> dict: + if not project and project_id: + # retrieve project by ID if not provided + project = await cm.Project.objects.aget(identifier=project_id) + if not project.discovery: + # fallback on global discovery config if project has none + return settings.CONFIG_PUBLIC + return project.discovery + + +async def _get_dataset_discovery(dataset_id: str) -> dict: + dataset = await cm.Dataset.objects.aget(identifier=dataset_id) + if not dataset.discovery: + project = await cm.Project.objects.aget(datasets=dataset_id) + return await _get_project_discovery(project=project) + return dataset.discovery + + +async def get_discovery(project_id: str = None, dataset_id: str = None) -> DiscoveryConfig: + if dataset_id and project_id: + # check if the dataset belongs to the project + is_scope_valid = await cm.Dataset.objects.filter( + identifier=dataset_id, + project__identifier=project_id, + ).aexists() + if not is_scope_valid: + raise DiscoveryConfigException(dataset_id, project_id) + try: + if dataset_id: + # get dataset's discovery config if dataset_id is passed + return await _get_dataset_discovery(dataset_id) + elif project_id: + # get project's discovery config if project_id is passed and dataset_id is not + return await _get_project_discovery(project_id=project_id) + except ObjectDoesNotExist: + raise DiscoveryConfigException(dataset_id, project_id) + # fallback to config.json when no dataset or project is in the request + return settings.CONFIG_PUBLIC + + +async def get_request_discovery(request: DrfRequest) -> DiscoveryConfig: + dataset_id = request.query_params.get("dataset") + project_id = request.query_params.get("project") + return await get_discovery(project_id, dataset_id) diff --git a/chord_metadata_service/experiments/summaries.py b/chord_metadata_service/experiments/summaries.py index 261c832d3..2cdc907ed 100644 --- a/chord_metadata_service/experiments/summaries.py +++ b/chord_metadata_service/experiments/summaries.py @@ -4,6 +4,7 @@ from chord_metadata_service.discovery.censorship import thresholded_count from chord_metadata_service.discovery.stats import queryset_stats_for_field +from chord_metadata_service.discovery.types import DiscoveryConfig from . import models __all__ = [ @@ -14,7 +15,7 @@ ] -async def experiment_summary(experiments: QuerySet, low_counts_censored: bool) -> dict: +async def experiment_summary(experiments: QuerySet, discovery: DiscoveryConfig, low_counts_censored: bool) -> dict: # TODO: limit to authorized field list if we're in censored discovery mode - based on discovery config ( @@ -29,18 +30,18 @@ async def experiment_summary(experiments: QuerySet, low_counts_censored: bool) - extraction_protocol, ) = await asyncio.gather( experiments.acount(), - queryset_stats_for_field(experiments, "study_type", low_counts_censored), - queryset_stats_for_field(experiments, "experiment_type", low_counts_censored), - queryset_stats_for_field(experiments, "molecule", low_counts_censored), - queryset_stats_for_field(experiments, "library_strategy", low_counts_censored), - queryset_stats_for_field(experiments, "library_source", low_counts_censored), - queryset_stats_for_field(experiments, "library_selection", low_counts_censored), - queryset_stats_for_field(experiments, "library_layout", low_counts_censored), - queryset_stats_for_field(experiments, "extraction_protocol", low_counts_censored), + queryset_stats_for_field(experiments, "study_type", discovery, low_counts_censored), + queryset_stats_for_field(experiments, "experiment_type", discovery, low_counts_censored), + queryset_stats_for_field(experiments, "molecule", discovery, low_counts_censored), + queryset_stats_for_field(experiments, "library_strategy", discovery, low_counts_censored), + queryset_stats_for_field(experiments, "library_source", discovery, low_counts_censored), + queryset_stats_for_field(experiments, "library_selection", discovery, low_counts_censored), + queryset_stats_for_field(experiments, "library_layout", discovery, low_counts_censored), + queryset_stats_for_field(experiments, "extraction_protocol", discovery, low_counts_censored), ) return { - "count": thresholded_count(count, low_counts_censored), + "count": thresholded_count(count, discovery, low_counts_censored), "study_type": study_type, "experiment_type": experiment_type, "molecule": molecule, @@ -52,7 +53,11 @@ async def experiment_summary(experiments: QuerySet, low_counts_censored: bool) - } -async def experiment_result_summary(experiments: QuerySet, low_counts_censored: bool) -> dict: +async def experiment_result_summary( + experiments: QuerySet, + discovery: DiscoveryConfig, + low_counts_censored: bool +) -> dict: experiment_results = models.ExperimentResult.objects.filter(experiment__in=experiments) ( @@ -62,36 +67,36 @@ async def experiment_result_summary(experiments: QuerySet, low_counts_censored: usage, ) = await asyncio.gather( experiment_results.acount(), - queryset_stats_for_field(experiment_results, "file_format", low_counts_censored), - queryset_stats_for_field(experiment_results, "data_output_type", low_counts_censored), - queryset_stats_for_field(experiment_results, "usage", low_counts_censored), + queryset_stats_for_field(experiment_results, "file_format", discovery, low_counts_censored), + queryset_stats_for_field(experiment_results, "data_output_type", discovery, low_counts_censored), + queryset_stats_for_field(experiment_results, "usage", discovery, low_counts_censored), ) return { - "count": thresholded_count(count, low_counts_censored), + "count": thresholded_count(count, discovery, low_counts_censored), "file_format": file_format, "data_output_type": data_output_type, "usage": usage, } -async def instrument_summary(experiments: QuerySet, low_counts_censored: bool) -> dict: +async def instrument_summary(experiments: QuerySet, discovery: DiscoveryConfig, low_counts_censored: bool) -> dict: instruments = models.Instrument.objects.filter(experiment__in=experiments).distinct() count, platform, model = await asyncio.gather( instruments.acount(), - queryset_stats_for_field(instruments, "platform", low_counts_censored), - queryset_stats_for_field(instruments, "model", low_counts_censored), + queryset_stats_for_field(instruments, "platform", discovery, low_counts_censored), + queryset_stats_for_field(instruments, "model", discovery, low_counts_censored), ) return { - "count": thresholded_count(count, low_counts_censored), + "count": thresholded_count(count, discovery, low_counts_censored), "platform": platform, "model": model, } -async def dt_experiment_summary(experiments: QuerySet, low_counts_censored: bool) -> dict: +async def dt_experiment_summary(experiments: QuerySet, discovery: DiscoveryConfig, low_counts_censored: bool) -> dict: # Parallel-gather all statistics we may need for this response ( experiments_count, @@ -100,13 +105,13 @@ async def dt_experiment_summary(experiments: QuerySet, low_counts_censored: bool instrument_summary_val, ) = await asyncio.gather( experiments.acount(), - experiment_summary(experiments, low_counts_censored), - experiment_result_summary(experiments, low_counts_censored), - instrument_summary(experiments, low_counts_censored), + experiment_summary(experiments, discovery, low_counts_censored), + experiment_result_summary(experiments, discovery, low_counts_censored), + instrument_summary(experiments, discovery, low_counts_censored), ) return { - "count": thresholded_count(experiments_count, low_counts_censored), + "count": thresholded_count(experiments_count, discovery, low_counts_censored), "data_type_specific": { "experiments": experiment_summary_val, "experiment_results": exp_res_summary_val, diff --git a/chord_metadata_service/patients/api_views.py b/chord_metadata_service/patients/api_views.py index bfef00691..0a6a8b984 100644 --- a/chord_metadata_service/patients/api_views.py +++ b/chord_metadata_service/patients/api_views.py @@ -5,14 +5,13 @@ from bento_lib.responses import errors from bento_lib.search import build_search_response from datetime import datetime -from django.conf import settings from django.contrib.postgres.aggregates import ArrayAgg from django.core.exceptions import ValidationError from django.db.models import Count, F, Q, QuerySet from django.db.models.functions import Coalesce from django_filters.rest_framework import DjangoFilterBackend from drf_spectacular.utils import extend_schema, inline_serializer -from rest_framework import viewsets, filters, mixins, serializers +from rest_framework import viewsets, filters, mixins, serializers, status from rest_framework.decorators import action from rest_framework.request import Request as DrfRequest from rest_framework.response import Response @@ -20,8 +19,10 @@ from chord_metadata_service.discovery import responses as dres from chord_metadata_service.discovery.censorship import get_max_query_parameters, get_threshold, thresholded_count +from chord_metadata_service.discovery.exceptions import DiscoveryConfigException from chord_metadata_service.discovery.fields import get_field_options, filter_queryset_field_value from chord_metadata_service.discovery.stats import individual_biosample_tissue_stats, individual_experiment_type_stats +from chord_metadata_service.discovery.utils import get_request_discovery from chord_metadata_service.logger import logger from chord_metadata_service.phenopackets.api_views import BIOSAMPLE_PREFETCH, PHENOPACKET_PREFETCH from chord_metadata_service.phenopackets.models import Phenopacket @@ -149,11 +150,12 @@ def get_queryset(self): async def public_discovery_filter_queryset(request: DrfRequest, queryset: QuerySet, low_counts_censored: bool): # Check query parameters validity qp = request.query_params - if len(qp) > get_max_query_parameters(low_counts_censored=low_counts_censored): + discovery = await get_request_discovery(request) + if len(qp) > get_max_query_parameters(discovery, low_counts_censored): raise ValidationError(f"Wrong number of fields: {len(qp)}") - search_conf = settings.CONFIG_PUBLIC["search"] - field_conf = settings.CONFIG_PUBLIC["fields"] + search_conf = discovery["search"] + field_conf = discovery["fields"] queryable_fields = { f"{f}": field_conf[f] for section in search_conf for f in section["fields"] } @@ -163,7 +165,7 @@ async def public_discovery_filter_queryset(request: DrfRequest, queryset: QueryS raise ValidationError(f"Unsupported field used in query: {field}") field_props = queryable_fields[field] - options = await get_field_options(field_props, low_counts_censored=low_counts_censored) + options = await get_field_options(field, discovery, low_counts_censored) if ( value not in options and not ( @@ -203,7 +205,12 @@ class PublicListIndividuals(APIView): """ async def get(self, request, *_args, **_kwargs): - if not settings.CONFIG_PUBLIC: + try: + discovery = await get_request_discovery(request) + except DiscoveryConfigException as e: + return Response(e.message, status=status.HTTP_404_NOT_FOUND) + + if not discovery: return Response(dres.NO_PUBLIC_DATA_AVAILABLE) base_qs = Individual.objects.all() @@ -214,17 +221,17 @@ async def get(self, request, *_args, **_kwargs): *(e.error_list if hasattr(e, "error_list") else e.error_dict.items()), )) - qct = thresholded_count(await filtered_qs.acount(), low_counts_censored=True) + qct = thresholded_count(await filtered_qs.acount(), discovery, low_counts_censored=True) if qct == 0: logger.info( f"Public individuals endpoint recieved query params {request.query_params} which resulted in " - f"sub-threshold count: {qct} <= {get_threshold(True)}") + f"sub-threshold count: {qct} <= {get_threshold(discovery, low_counts_censored=True)}") return Response(dres.INSUFFICIENT_DATA_AVAILABLE) (tissues_count, sampled_tissues), (experiments_count, experiment_types) = await asyncio.gather( - individual_biosample_tissue_stats(filtered_qs, low_counts_censored=True), - individual_experiment_type_stats(filtered_qs, low_counts_censored=True), + individual_biosample_tissue_stats(filtered_qs, discovery, low_counts_censored=True), + individual_experiment_type_stats(filtered_qs, discovery, low_counts_censored=True), ) return Response({ @@ -248,7 +255,12 @@ class BeaconListIndividuals(APIView): """ async def get(self, request, *_args, **_kwargs): - if not settings.CONFIG_PUBLIC: + try: + discovery = await get_request_discovery(request) + except DiscoveryConfigException as e: + return Response(e.message, status=status.HTTP_404_NOT_FOUND) + + if not discovery: return Response(dres.NO_PUBLIC_DATA_AVAILABLE, status=404) base_qs = Individual.objects.all() @@ -259,8 +271,8 @@ async def get(self, request, *_args, **_kwargs): *(e.error_list if hasattr(e, "error_list") else e.error_dict.items())), status=400) (tissues_count, sampled_tissues), (experiments_count, experiment_types) = await asyncio.gather( - individual_biosample_tissue_stats(filtered_qs, low_counts_censored=False), - individual_experiment_type_stats(filtered_qs, low_counts_censored=False), + individual_biosample_tissue_stats(filtered_qs, discovery, low_counts_censored=False), + individual_experiment_type_stats(filtered_qs, discovery, low_counts_censored=False), ) return Response({ diff --git a/chord_metadata_service/patients/summaries.py b/chord_metadata_service/patients/summaries.py index a775971ac..025664c16 100644 --- a/chord_metadata_service/patients/summaries.py +++ b/chord_metadata_service/patients/summaries.py @@ -5,6 +5,7 @@ from chord_metadata_service.discovery.censorship import thresholded_count from chord_metadata_service.discovery.fields import get_age_numeric_binned from chord_metadata_service.discovery.stats import queryset_stats_for_field +from chord_metadata_service.discovery.types import DiscoveryConfig from . import models __all__ = ["individual_summary"] @@ -13,7 +14,7 @@ OVERVIEW_AGE_BIN_SIZE = 10 # TODO: configurable -async def individual_summary(phenopackets: QuerySet | None, low_counts_censored: bool): +async def individual_summary(phenopackets: QuerySet | None, discovery: DiscoveryConfig, low_counts_censored: bool): individuals = ( models.Individual.objects.all() if phenopackets is None else models.Individual.objects.filter(phenopackets__in=phenopackets).distinct() @@ -23,16 +24,16 @@ async def individual_summary(phenopackets: QuerySet | None, low_counts_censored: individuals.acount(), # - Sex related fields stats are precomputed here and post processed later # to include missing values inferred from the schema - queryset_stats_for_field(individuals, "sex", low_counts_censored), - queryset_stats_for_field(individuals, "karyotypic_sex", low_counts_censored), + queryset_stats_for_field(individuals, "sex", discovery, low_counts_censored), + queryset_stats_for_field(individuals, "karyotypic_sex", discovery, low_counts_censored), # - Age - get_age_numeric_binned(individuals, OVERVIEW_AGE_BIN_SIZE, low_counts_censored), + get_age_numeric_binned(individuals, OVERVIEW_AGE_BIN_SIZE, discovery, low_counts_censored), # - Taxonomy - queryset_stats_for_field(individuals, "taxonomy__label", low_counts_censored), + queryset_stats_for_field(individuals, "taxonomy__label", discovery, low_counts_censored), ) return { - "count": thresholded_count(individual_count, low_counts_censored), + "count": thresholded_count(individual_count, discovery, low_counts_censored), "sex": {k: individual_sex.get(k, 0) for k in (s[0] for s in models.Individual.SEX)}, "karyotypic_sex": {k: individual_k_sex.get(k, 0) for k in (s[0] for s in models.Individual.KARYOTYPIC_SEX)}, "age": individual_age, diff --git a/chord_metadata_service/phenopackets/summaries.py b/chord_metadata_service/phenopackets/summaries.py index c180d2c32..ede42944f 100644 --- a/chord_metadata_service/phenopackets/summaries.py +++ b/chord_metadata_service/phenopackets/summaries.py @@ -4,6 +4,7 @@ from chord_metadata_service.discovery.censorship import thresholded_count from chord_metadata_service.discovery.stats import stats_for_field, queryset_stats_for_field +from chord_metadata_service.discovery.types import DiscoveryConfig from chord_metadata_service.patients.summaries import individual_summary from . import models @@ -16,7 +17,7 @@ ] -async def biosample_summary(phenopackets: QuerySet, low_counts_censored: bool): +async def biosample_summary(phenopackets: QuerySet, discovery: DiscoveryConfig, low_counts_censored: bool): biosamples = models.Biosample.objects.filter(phenopacket__in=phenopackets) ( @@ -27,14 +28,14 @@ async def biosample_summary(phenopackets: QuerySet, low_counts_censored: bool): biosamples_is_control_sample, ) = await asyncio.gather( biosamples.acount(), - queryset_stats_for_field(biosamples, "taxonomy__label", low_counts_censored), - queryset_stats_for_field(biosamples, "sampled_tissue__label", low_counts_censored), - queryset_stats_for_field(biosamples, "histological_diagnosis__label", low_counts_censored), - queryset_stats_for_field(biosamples, "is_control_sample", low_counts_censored), + queryset_stats_for_field(biosamples, "taxonomy__label", discovery, low_counts_censored), + queryset_stats_for_field(biosamples, "sampled_tissue__label", discovery, low_counts_censored), + queryset_stats_for_field(biosamples, "histological_diagnosis__label", discovery, low_counts_censored), + queryset_stats_for_field(biosamples, "is_control_sample", discovery, low_counts_censored), ) return { - "count": thresholded_count(biosamples_count, low_counts_censored), + "count": thresholded_count(biosamples_count, discovery, low_counts_censored), "taxonomy": biosamples_taxonomy, "sampled_tissue": biosamples_sampled_tissue, "histological_diagnosis": biosamples_histological_diagnosis, @@ -42,28 +43,33 @@ async def biosample_summary(phenopackets: QuerySet, low_counts_censored: bool): } -async def disease_summary(phenopackets: QuerySet, low_counts_censored: bool): - disease_stats = await queryset_stats_for_field(phenopackets, "diseases__term__label", low_counts_censored) +async def disease_summary(phenopackets: QuerySet, discovery: DiscoveryConfig, low_counts_censored: bool): + disease_stats = await queryset_stats_for_field( + queryset=phenopackets, + field="diseases__term__label", + discovery=discovery, + low_counts_censored=low_counts_censored + ) return { # count is a number of unique disease terms (not all diseases in the database) - "count": thresholded_count(len(disease_stats), low_counts_censored), + "count": thresholded_count(len(disease_stats), discovery, low_counts_censored), "term": disease_stats, } -async def phenotypic_feature_summary(phenopackets: QuerySet, low_counts_censored: bool): +async def phenotypic_feature_summary(phenopackets: QuerySet, discovery: DiscoveryConfig, low_counts_censored: bool): phenotypic_features_count, phenotypic_features_type = await asyncio.gather( models.PhenotypicFeature.objects.filter(phenopacket__in=phenopackets).distinct('pftype').acount(), - stats_for_field(models.PhenotypicFeature, "pftype__label", low_counts_censored), + stats_for_field(models.PhenotypicFeature, "pftype__label", discovery, low_counts_censored), ) return { # count is a number of unique phenotypic feature types, not all phenotypic features in the database. - "count": thresholded_count(phenotypic_features_count, low_counts_censored), + "count": thresholded_count(phenotypic_features_count, discovery, low_counts_censored), "type": phenotypic_features_type, } -async def dt_phenopacket_summary(phenopackets: QuerySet, low_counts_censored: bool) -> dict: +async def dt_phenopacket_summary(phenopackets: QuerySet, discovery: DiscoveryConfig, low_counts_censored: bool) -> dict: # Parallel-gather all statistics we may need for this response ( phenopackets_count, @@ -73,14 +79,14 @@ async def dt_phenopacket_summary(phenopackets: QuerySet, low_counts_censored: bo pf_summary_val, ) = await asyncio.gather( phenopackets.acount(), - biosample_summary(phenopackets, low_counts_censored), - individual_summary(phenopackets, low_counts_censored), - disease_summary(phenopackets, low_counts_censored), - phenotypic_feature_summary(phenopackets, low_counts_censored), + biosample_summary(phenopackets, discovery, low_counts_censored), + individual_summary(phenopackets, discovery, low_counts_censored), + disease_summary(phenopackets, discovery, low_counts_censored), + phenotypic_feature_summary(phenopackets, discovery, low_counts_censored), ) return { - "count": thresholded_count(phenopackets_count, low_counts_censored), + "count": thresholded_count(phenopackets_count, discovery, low_counts_censored), "data_type_specific": { "biosamples": biosample_summary_val, "diseases": disease_summary_val, diff --git a/chord_metadata_service/restapi/api_views.py b/chord_metadata_service/restapi/api_views.py index 9f5b2afa2..8f5dd2f06 100644 --- a/chord_metadata_service/restapi/api_views.py +++ b/chord_metadata_service/restapi/api_views.py @@ -3,7 +3,7 @@ from adrf.decorators import api_view from django.db.models import QuerySet from drf_spectacular.utils import extend_schema, inline_serializer -from rest_framework import serializers +from rest_framework import serializers, status from rest_framework.decorators import permission_classes from rest_framework.permissions import AllowAny from rest_framework.request import Request as DrfRequest @@ -11,6 +11,9 @@ from chord_metadata_service.chord.data_types import DATA_TYPE_PHENOPACKET, DATA_TYPE_EXPERIMENT from chord_metadata_service.chord.permissions import OverrideOrSuperUserOnly +from chord_metadata_service.discovery.exceptions import DiscoveryConfigException +from chord_metadata_service.discovery.types import DiscoveryConfig +from chord_metadata_service.discovery.utils import get_request_discovery from chord_metadata_service.experiments import models as experiments_models from chord_metadata_service.experiments.summaries import dt_experiment_summary from chord_metadata_service.metadata.service_info import get_service_info @@ -33,10 +36,10 @@ async def service_info(_request: DrfRequest): return Response(await get_service_info()) -async def build_overview_response(phenopackets: QuerySet, experiments: QuerySet): +async def build_overview_response(phenopackets: QuerySet, experiments: QuerySet, discovery: DiscoveryConfig): phenopackets_summary, experiments_summary = await asyncio.gather( - dt_phenopacket_summary(phenopackets, low_counts_censored=False), - dt_experiment_summary(experiments, low_counts_censored=False), + dt_phenopacket_summary(phenopackets, discovery, low_counts_censored=False), + dt_experiment_summary(experiments, discovery, low_counts_censored=False), ) return Response({ @@ -59,18 +62,22 @@ async def build_overview_response(phenopackets: QuerySet, experiments: QuerySet) ) @api_view(["GET"]) @permission_classes([OverrideOrSuperUserOnly]) -async def overview(_request: DrfRequest): +async def overview(request: DrfRequest): """ get: Overview of all Phenopackets and experiments in the database - private endpoint """ # TODO: permissions based on project - this endpoint should be scrapped / completely rethought + try: + discovery = await get_request_discovery(request) + except DiscoveryConfigException as e: + return Response(e.message, status=status.HTTP_404_NOT_FOUND) phenopackets = pheno_models.Phenopacket.objects.all() experiments = experiments_models.Experiment.objects.all() - return await build_overview_response(phenopackets, experiments) + return await build_overview_response(phenopackets, experiments, discovery) @api_view(["GET"]) @@ -94,6 +101,10 @@ async def search_overview(request: DrfRequest): """ # TODO: this should be project / dataset-scoped and probably shouldn't even exist as-is + try: + discovery = await get_request_discovery(request) + except DiscoveryConfigException as e: + return Response(e.message, status=status.HTTP_404_NOT_FOUND) individual_ids = request.GET.getlist("id") if request.method == "GET" else request.data.get("id", []) phenopackets = pheno_models.Phenopacket.objects.all().filter(subject_id__in=individual_ids) @@ -104,4 +115,4 @@ async def search_overview(request: DrfRequest): # - in general, this endpoint is less than ideal and should be derived from search results themselves vs. this # hack-y mess of passing IDs around. - return await build_overview_response(phenopackets, experiments) + return await build_overview_response(phenopackets, experiments, discovery) diff --git a/chord_metadata_service/restapi/urls.py b/chord_metadata_service/restapi/urls.py index 415e98ac7..e3e2f738e 100644 --- a/chord_metadata_service/restapi/urls.py +++ b/chord_metadata_service/restapi/urls.py @@ -3,6 +3,7 @@ from chord_metadata_service.chord import api_views as chord_views from chord_metadata_service.discovery.api_views import ( + public_rules, public_search_fields, public_overview, public_dataset, @@ -89,6 +90,7 @@ path('public_search_fields', public_search_fields, name='public-search-fields'), path('public_overview', public_overview, name='public-overview',), path('public_dataset', public_dataset, name='public-dataset'), + path('public_rules', public_rules, name='public-rules'), # uncensored endpoint for beacon search using fields from config.json path('beacon_search', individual_views.BeaconListIndividuals.as_view(), name='beacon-search'), From c39ad0b5a1feee7ed4ae03c360d619ed8589057b Mon Sep 17 00:00:00 2001 From: Victor Rocheleau Date: Mon, 17 Jun 2024 18:06:13 +0000 Subject: [PATCH 27/38] test get_public_model_name --- chord_metadata_service/discovery/fields_utils.py | 2 +- .../discovery/tests/test_fields_utils.py | 10 ++++++++++ 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/chord_metadata_service/discovery/fields_utils.py b/chord_metadata_service/discovery/fields_utils.py index 3ff4ca43d..bfb096fe9 100644 --- a/chord_metadata_service/discovery/fields_utils.py +++ b/chord_metadata_service/discovery/fields_utils.py @@ -51,7 +51,7 @@ def get_model_and_field(field_id: str) -> tuple[Type[Model], str]: def get_public_model_name(model: Type[Model]) -> PublicModelNames: model_name = [key for key, m in PUBLIC_MODEL_NAMES_TO_MODEL.items() if m == model] if len(model_name) != 1: - raise ValueError(f"Provided model {model} is not available for public.") + raise NotImplementedError(f"Provided model {model} is not available for public.") return model_name[0] diff --git a/chord_metadata_service/discovery/tests/test_fields_utils.py b/chord_metadata_service/discovery/tests/test_fields_utils.py index 4f651d66b..d20407152 100644 --- a/chord_metadata_service/discovery/tests/test_fields_utils.py +++ b/chord_metadata_service/discovery/tests/test_fields_utils.py @@ -3,10 +3,12 @@ from django.db.models.base import ModelBase from chord_metadata_service.discovery.tests.constants import DISCOVERY_CONFIG_TEST +from chord_metadata_service.discovery.model_lookups import PUBLIC_MODEL_NAMES_TO_MODEL from ..fields_utils import ( get_jsonb_path_query, get_json_range_condition, get_model_and_field, + get_public_model_name, labelled_range_generator, get_nested_json_condition ) @@ -30,6 +32,14 @@ def test_get_model_nested_field(self): def test_get_wrong_model(self): self.assertRaises(NotImplementedError, get_model_and_field, "junk/age_numeric") + def test_get_public_model_name(self): + for name, model in PUBLIC_MODEL_NAMES_TO_MODEL.items(): + model_name = get_public_model_name(model) + self.assertEqual(name, model_name) + + def test_get_public_model_name_wrong(self): + self.assertRaises(NotImplementedError, get_public_model_name, ModelBase) + class TestLabelledRangeGenerator(TestCase): def setUp(self): From af1ca4fd936fd4b3f12dc0ffcfc4b57816731e03 Mon Sep 17 00:00:00 2001 From: Victor Rocheleau Date: Mon, 17 Jun 2024 19:50:07 +0000 Subject: [PATCH 28/38] full subschema URI patching for pheno and exp --- .../discovery/model_lookups.py | 4 ++-- chord_metadata_service/discovery/schemas.py | 3 --- chord_metadata_service/experiments/schemas.py | 5 ++++- .../experiments/tests/test_api.py | 17 +++++++++++++++++ .../phenopackets/schemas.py | 3 ++- .../phenopackets/tests/test_api.py | 16 ++++++---------- .../restapi/schema_utils.py | 19 ++++++++++++++----- chord_metadata_service/restapi/urls.py | 4 ++-- 8 files changed, 47 insertions(+), 24 deletions(-) diff --git a/chord_metadata_service/discovery/model_lookups.py b/chord_metadata_service/discovery/model_lookups.py index 6b632b115..0dc1ce13d 100644 --- a/chord_metadata_service/discovery/model_lookups.py +++ b/chord_metadata_service/discovery/model_lookups.py @@ -29,8 +29,8 @@ class ScopeFilter(TypedDict): filter: str - prefetch_related: tuple[str] - select_related: tuple[str] + prefetch_related: tuple[str, ...] + select_related: tuple[str, ...] class ProjectDatasetScopeFilters(TypedDict): diff --git a/chord_metadata_service/discovery/schemas.py b/chord_metadata_service/discovery/schemas.py index 41f577276..808f2edb4 100644 --- a/chord_metadata_service/discovery/schemas.py +++ b/chord_metadata_service/discovery/schemas.py @@ -103,6 +103,3 @@ }, "additionalProperties": False } - - -# DATS diff --git a/chord_metadata_service/experiments/schemas.py b/chord_metadata_service/experiments/schemas.py index c4545b1ea..0b78fc760 100644 --- a/chord_metadata_service/experiments/schemas.py +++ b/chord_metadata_service/experiments/schemas.py @@ -12,6 +12,9 @@ "EXPERIMENT_SCHEMA", "EXPERIMENT_RESULT_SCHEMA", "INSTRUMENT_SCHEMA", + "experiment_resource", + "experiment_registry", + "experiment_resolver", ] experiment_base_uri = f"{settings.SCHEMAS_BASE_URL}/experiment" @@ -198,7 +201,7 @@ "instrument": INSTRUMENT_SCHEMA }, "required": ["id", "experiment_type"] -}, EXPERIMENT) +}, EXPERIMENT, override_children=True) # URI referencing experiment_resource = Resource.from_contents(EXPERIMENT_SCHEMA) diff --git a/chord_metadata_service/experiments/tests/test_api.py b/chord_metadata_service/experiments/tests/test_api.py index b4c594965..2687d82e6 100644 --- a/chord_metadata_service/experiments/tests/test_api.py +++ b/chord_metadata_service/experiments/tests/test_api.py @@ -1,6 +1,8 @@ +from django.urls import reverse from jsonschema.validators import Draft7Validator from django.test import TestCase +from chord_metadata_service.experiments.schemas import EXPERIMENT_SCHEMA from chord_metadata_service.restapi.api_renderers import ExperimentCSVRenderer import csv import io @@ -188,3 +190,18 @@ def test_csv_render_with_empty_data(self): for row in reader: for key in row: self.assertEqual(row[key], '') + + +class TestExperimentSchema(APITestCase): + + def test_experiment_schema(self): + response = self.client.get(reverse("experiment-schema")) + self.assertEqual(response.status_code, status.HTTP_200_OK) + self.assertEqual(response.json(), EXPERIMENT_SCHEMA) + + def test_experiment_subschema(self): + for subschema in EXPERIMENT_SCHEMA["properties"].values(): + property = subschema["$id"].split('/')[-1] + response = self.client.get(reverse("experiment-subschema", kwargs={"subschema": property})) + self.assertEqual(response.status_code, status.HTTP_200_OK) + self.assertEqual(response.json(), subschema) diff --git a/chord_metadata_service/phenopackets/schemas.py b/chord_metadata_service/phenopackets/schemas.py index 298f1d6ce..849a0d3cd 100644 --- a/chord_metadata_service/phenopackets/schemas.py +++ b/chord_metadata_service/phenopackets/schemas.py @@ -25,6 +25,7 @@ named_one_of, sub_schema_uri, describe_schema, + tag_ids_and_describe, ) from . import descriptions @@ -642,7 +643,7 @@ "required": ["id", "progress_status"] }, descriptions=descriptions.INTERPRETATION) -PHENOPACKET_SCHEMA = describe_schema({ +PHENOPACKET_SCHEMA = tag_ids_and_describe({ "$schema": DRAFT_07, "$id": phenopacket_base_uri, "title": "Phenopacket schema", diff --git a/chord_metadata_service/phenopackets/tests/test_api.py b/chord_metadata_service/phenopackets/tests/test_api.py index 1f24e942c..f3b17fa87 100644 --- a/chord_metadata_service/phenopackets/tests/test_api.py +++ b/chord_metadata_service/phenopackets/tests/test_api.py @@ -444,13 +444,9 @@ def test_get_phenopacket_schema(self): self.assertEqual(response.status_code, status.HTTP_200_OK) self.assertEqual(schema, PHENOPACKET_SCHEMA) - def test_get_phenopacket_subschemas(self): - response_biosample = self.client.get("/api/schemas/phenopacket/biosample") - biosample_schema = response_biosample.json() - self.assertEqual(response_biosample.status_code, status.HTTP_200_OK) - self.assertEqual(biosample_schema, PHENOPACKET_SCHEMA["properties"]["biosamples"]["items"]) - - response_individual = self.client.get("/api/schemas/phenopacket/individual") - individual_schema = response_individual.json() - self.assertEqual(response_individual.status_code, status.HTTP_200_OK) - self.assertEqual(individual_schema, PHENOPACKET_SCHEMA["properties"]["subject"]) + def test_get_pheno_subschemas(self): + for subschema in PHENOPACKET_SCHEMA["properties"].values(): + prop_key = subschema["$id"].split('/')[-1] + response = self.client.get(reverse("chord-phenopacket-subschema", kwargs={"subschema": prop_key})) + self.assertEqual(response.status_code, status.HTTP_200_OK) + self.assertEqual(response.json(), subschema) diff --git a/chord_metadata_service/restapi/schema_utils.py b/chord_metadata_service/restapi/schema_utils.py index 41189af09..6b5c78589 100644 --- a/chord_metadata_service/restapi/schema_utils.py +++ b/chord_metadata_service/restapi/schema_utils.py @@ -195,7 +195,7 @@ def tag_schema_with_search_properties(schema, search_descriptions: dict | None): return schema_with_search -def tag_schema_with_nested_ids(schema: dict): +def tag_schema_with_nested_ids(schema: dict, override_children: bool = False): if "$id" not in schema: raise ValueError("Schema to tag with nested IDs must have $id") @@ -206,7 +206,10 @@ def tag_schema_with_nested_ids(schema: dict): return { **schema, "properties": { - k: tag_schema_with_nested_ids({**v, "$id": f"{schema_id}/{k}"} if "$id" not in v else v) + k: tag_schema_with_nested_ids( + schema={**v, "$id": f"{schema_id}/{k}"} if override_children or "$id" not in v else v, + override_children=override_children + ) for k, v in schema["properties"].items() }, } if "properties" in schema else schema @@ -217,15 +220,21 @@ def tag_schema_with_nested_ids(schema: dict): "items": tag_schema_with_nested_ids({ **schema["items"], "$id": f"{schema_id}/item", - } if "$id" not in schema["items"] else schema["items"]), + } if override_children or "$id" not in schema["items"] else schema["items"], override_children), } if "items" in schema else schema # If nothing to tag, return itself (base case) return schema -def tag_ids_and_describe(schema: dict, descriptions: dict): - return tag_schema_with_nested_ids(describe_schema(schema, descriptions)) +def tag_ids_and_describe(schema: dict, descriptions: dict, override_children: bool = False): + """ + Adds descriptions and generates $id fields on a given schema. + Set override_children to True to ignore existing children IDs and use the parent's structure. + Helps to make sure that all the properties of a schema have a resolvable URIs, even if reusing definitions. + e.g. ontology_list may need to be resolved from an experiment or phenopacket URI. + """ + return tag_schema_with_nested_ids(describe_schema(schema, descriptions), override_children) def customize_schema(first_typeof: dict, second_typeof: dict, first_property: str, second_property: str, diff --git a/chord_metadata_service/restapi/urls.py b/chord_metadata_service/restapi/urls.py index e3e2f738e..5b1bf087e 100644 --- a/chord_metadata_service/restapi/urls.py +++ b/chord_metadata_service/restapi/urls.py @@ -64,11 +64,11 @@ # apps schemas path('schemas/phenopacket', phenopacket_views.get_chord_phenopacket_schema, name="chord-phenopacket-schema"), - path('schemas/phenopacket/', phenopacket_views.get_chord_phenopacket_subschema, + path('schemas/phenopacket/', phenopacket_views.get_chord_phenopacket_subschema, name="chord-phenopacket-subschema"), path('schemas/experiment', experiment_views.get_experiment_schema, name="experiment-schema"), - path('schemas/experiment/', experiment_views.get_experiment_subschema, name="experiment-subschema"), + path('schemas/experiment/', experiment_views.get_experiment_subschema, name="experiment-subschema"), path('schemas/discovery', discovery_schema, name="discovery-schema"), # extra properties schema types path('extra_properties_schema_types', extra_properties_schema_types, name="extra-properties-schema-types"), From a0b116bef80e1ee2aeef904b71648af12cb34268 Mon Sep 17 00:00:00 2001 From: Victor Rocheleau Date: Mon, 17 Jun 2024 21:15:46 +0000 Subject: [PATCH 29/38] use SERVICE_URL_BASE_PATH for base schemas URIs --- chord_metadata_service/experiments/schemas.py | 6 +++--- chord_metadata_service/metadata/settings.py | 6 ++++-- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/chord_metadata_service/experiments/schemas.py b/chord_metadata_service/experiments/schemas.py index 0b78fc760..cad38f7c4 100644 --- a/chord_metadata_service/experiments/schemas.py +++ b/chord_metadata_service/experiments/schemas.py @@ -6,15 +6,15 @@ from chord_metadata_service.restapi.schema_utils import tag_ids_and_describe, sub_schema_uri __all__ = [ + "experiment_resource", + "experiment_registry", + "experiment_resolver", "DATA_FILE_OR_RECORD_URL_SCHEMA", "EXPERIMENT_RESULT_FILE_INDEX_SCHEMA", "EXPERIMENT_RESULT_FILE_INDEX_LIST_SCHEMA", "EXPERIMENT_SCHEMA", "EXPERIMENT_RESULT_SCHEMA", "INSTRUMENT_SCHEMA", - "experiment_resource", - "experiment_registry", - "experiment_resolver", ] experiment_base_uri = f"{settings.SCHEMAS_BASE_URL}/experiment" diff --git a/chord_metadata_service/metadata/settings.py b/chord_metadata_service/metadata/settings.py index 78b2b796c..b5e5b8923 100644 --- a/chord_metadata_service/metadata/settings.py +++ b/chord_metadata_service/metadata/settings.py @@ -50,8 +50,10 @@ BENTO_CONTAINER_LOCAL = os.environ.get("BENTO_CONTAINER_LOCAL", "false").lower() == "true" CHORD_URL = os.environ.get("CHORD_URL") # Leave None if not specified, for running in other contexts -if CHORD_URL: - SCHEMAS_BASE_URL = CHORD_URL + "/api/metadata/api/schemas" + +SERVICE_URL_BASE_PATH = os.environ.get("SERVICE_URL_BASE_PATH") +if SERVICE_URL_BASE_PATH: + SCHEMAS_BASE_URL = SERVICE_URL_BASE_PATH + "/api/schemas" else: SCHEMAS_BASE_URL = "/chord_metadata_service/schemas" From cd61cbef95df29bfcf14138522f1be4d47f52468 Mon Sep 17 00:00:00 2001 From: Victor Rocheleau Date: Mon, 17 Jun 2024 21:18:43 +0000 Subject: [PATCH 30/38] schema scoping todos --- chord_metadata_service/experiments/api_views.py | 2 ++ chord_metadata_service/phenopackets/api_views.py | 2 ++ 2 files changed, 4 insertions(+) diff --git a/chord_metadata_service/experiments/api_views.py b/chord_metadata_service/experiments/api_views.py index 2ad6f44b9..2f67b9e44 100644 --- a/chord_metadata_service/experiments/api_views.py +++ b/chord_metadata_service/experiments/api_views.py @@ -146,6 +146,7 @@ def get_experiment_schema(_request): get: Experiment schema """ + # TODO: project-scope return Response(EXPERIMENT_SCHEMA) @@ -156,5 +157,6 @@ def get_experiment_subschema(_request, subschema: str): get: Experiment sub-schema """ + # TODO: project-scope schema = experiment_resolver.lookup(ref=f"{experiment_base_uri}/{subschema}").contents return Response(schema) diff --git a/chord_metadata_service/phenopackets/api_views.py b/chord_metadata_service/phenopackets/api_views.py index a67380314..37c3ea871 100644 --- a/chord_metadata_service/phenopackets/api_views.py +++ b/chord_metadata_service/phenopackets/api_views.py @@ -230,6 +230,7 @@ def get_chord_phenopacket_schema(_request): get: Chord phenopacket schema that can be shared with data providers. """ + # TODO: project-scope return Response(PHENOPACKET_SCHEMA) @@ -240,5 +241,6 @@ def get_chord_phenopacket_subschema(_request, subschema: str): get: Chord phenopacket schema that can be shared with data providers. """ + # TODO: project-scope schema = phenopacket_resolver.lookup(ref=f"{phenopacket_base_uri}/{subschema}").contents return Response(schema) From af6dbb7376efbf6c58c6dc26b117b89c305060fc Mon Sep 17 00:00:00 2001 From: Victor Rocheleau Date: Tue, 18 Jun 2024 19:21:01 +0000 Subject: [PATCH 31/38] schemas base url refact --- chord_metadata_service/discovery/schemas.py | 10 +++------- chord_metadata_service/restapi/schemas.py | 9 ++------- 2 files changed, 5 insertions(+), 14 deletions(-) diff --git a/chord_metadata_service/discovery/schemas.py b/chord_metadata_service/discovery/schemas.py index 808f2edb4..9fa5aac7d 100644 --- a/chord_metadata_service/discovery/schemas.py +++ b/chord_metadata_service/discovery/schemas.py @@ -1,13 +1,9 @@ from django.conf import settings -from pathlib import Path from chord_metadata_service.restapi.schema_utils import ( - sub_schema_uri, array_of, base_type, SchemaTypes, enum_of, get_schema_app_id + sub_schema_uri, array_of, base_type, SchemaTypes, enum_of ) -if settings.SCHEMAS_BASE_URL: - base_uri = settings.SCHEMAS_BASE_URL -else: - base_uri = get_schema_app_id(Path(__file__).parent.name) +discovery_base_uri = sub_schema_uri(settings.SCHEMAS_BASE_URL, "discovery") DISCOVERY_FIELD_SCHEMA = { "description": "Field configuration", @@ -86,7 +82,7 @@ } DISCOVERY_SCHEMA = { - "$id": sub_schema_uri(base_uri, "discovery"), + "$id": discovery_base_uri, "description": "Discovery configuration for public fields/search", "type": "object", "properties": { diff --git a/chord_metadata_service/restapi/schemas.py b/chord_metadata_service/restapi/schemas.py index a5b69690e..48ebe90cf 100644 --- a/chord_metadata_service/restapi/schemas.py +++ b/chord_metadata_service/restapi/schemas.py @@ -1,10 +1,8 @@ -from pathlib import Path - from django.conf import settings from . import descriptions from .description_utils import EXTRA_PROPERTIES, ONTOLOGY_CLASS as ONTOLOGY_CLASS_DESC from .schema_utils import DATE_TIME, DRAFT_07, SchemaTypes, base_type, tag_ids_and_describe, \ - tag_schema_with_nested_ids, named_one_of, get_schema_app_id, sub_schema_uri + tag_schema_with_nested_ids, named_one_of, sub_schema_uri # Individual schemas for validation of JSONField values @@ -24,13 +22,10 @@ "TIME_ELEMENT_SCHEMA" ] +base_uri = settings.SCHEMAS_BASE_URL # ======================== Phenopackets based schemas ========================= -if settings.SCHEMAS_BASE_URL: - base_uri = settings.SCHEMAS_BASE_URL -else: - base_uri = get_schema_app_id(Path(__file__).parent.name) phenopacket_base_uri = sub_schema_uri(base_uri=base_uri, name="phenopacket") ONTOLOGY_CLASS = tag_ids_and_describe({ From 5f2b09b322ce0ffd2c0b1efdbf9995efda1e4f99 Mon Sep 17 00:00:00 2001 From: Victor Rocheleau Date: Thu, 20 Jun 2024 15:33:05 +0000 Subject: [PATCH 32/38] remove discovery scoping from private dataset_summary endpoint --- chord_metadata_service/chord/views_search.py | 15 +++++---------- 1 file changed, 5 insertions(+), 10 deletions(-) diff --git a/chord_metadata_service/chord/views_search.py b/chord_metadata_service/chord/views_search.py index affdb0a33..73858eff0 100644 --- a/chord_metadata_service/chord/views_search.py +++ b/chord_metadata_service/chord/views_search.py @@ -50,18 +50,18 @@ OUTPUT_FORMAT_BENTO_SEARCH_RESULT = "bento_search_result" -async def experiment_dataset_summary(_request: DrfRequest, dataset: Dataset, discovery: DiscoveryConfig): +async def experiment_dataset_summary(dataset: Dataset): return await dt_experiment_summary( Experiment.objects.filter(dataset=dataset), - discovery, + discovery=None, low_counts_censored=False ) -async def phenopacket_dataset_summary(_request: DrfRequest, dataset: Dataset, discovery: DiscoveryConfig): +async def phenopacket_dataset_summary(dataset: Dataset): return await dt_phenopacket_summary( Phenopacket.objects.filter(dataset=dataset), - discovery, + discovery=None, low_counts_censored=False ) @@ -561,15 +561,10 @@ def private_dataset_search(request: DrfRequest, dataset_id: str): async def dataset_summary(request: DrfRequest, dataset_id: str): # TODO: PERMISSIONS - try: - discovery = await get_discovery(dataset_id=dataset_id) - except DiscoveryConfigException as e: - return Response(e.message, status=status.HTTP_404_NOT_FOUND) - dataset = await Dataset.objects.aget(identifier=dataset_id) summaries = await asyncio.gather( - *map(lambda dt: DATASET_DATA_TYPE_SUMMARY_FUNCTIONS[dt](request, dataset, discovery), + *map(lambda dt: DATASET_DATA_TYPE_SUMMARY_FUNCTIONS[dt](dataset), DATASET_DATA_TYPE_SUMMARY_FUNCTIONS.keys()) ) From 8eb3d1b6b1c80fd2cc22d5fa4a429e1b1cdfcc0b Mon Sep 17 00:00:00 2001 From: Victor Rocheleau Date: Thu, 20 Jun 2024 15:34:30 +0000 Subject: [PATCH 33/38] lint --- chord_metadata_service/chord/views_search.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/chord_metadata_service/chord/views_search.py b/chord_metadata_service/chord/views_search.py index 73858eff0..ea3f22593 100644 --- a/chord_metadata_service/chord/views_search.py +++ b/chord_metadata_service/chord/views_search.py @@ -23,9 +23,6 @@ from typing import Callable from chord_metadata_service.chord.permissions import OverrideOrSuperUserOnly, ReadOnly -from chord_metadata_service.discovery.exceptions import DiscoveryConfigException -from chord_metadata_service.discovery.types import DiscoveryConfig -from chord_metadata_service.discovery.utils import get_discovery from chord_metadata_service.logger import logger from chord_metadata_service.experiments.api_views import EXPERIMENT_SELECT_REL, EXPERIMENT_PREFETCH From e862abc9aa0e4b9462c0d07bdba5140b039631e2 Mon Sep 17 00:00:00 2001 From: Victor Rocheleau Date: Mon, 15 Jul 2024 19:26:10 +0000 Subject: [PATCH 34/38] address comments --- chord_metadata_service/chord/views_search.py | 6 +++--- .../discovery/model_lookups.py | 12 ++++++------ .../restapi/tests/test_api.py | 18 ++++++++++++++++-- 3 files changed, 25 insertions(+), 11 deletions(-) diff --git a/chord_metadata_service/chord/views_search.py b/chord_metadata_service/chord/views_search.py index ea3f22593..4c8c50ad1 100644 --- a/chord_metadata_service/chord/views_search.py +++ b/chord_metadata_service/chord/views_search.py @@ -47,7 +47,7 @@ OUTPUT_FORMAT_BENTO_SEARCH_RESULT = "bento_search_result" -async def experiment_dataset_summary(dataset: Dataset): +async def experiment_dataset_summary(request: DrfRequest, dataset: Dataset): return await dt_experiment_summary( Experiment.objects.filter(dataset=dataset), discovery=None, @@ -55,7 +55,7 @@ async def experiment_dataset_summary(dataset: Dataset): ) -async def phenopacket_dataset_summary(dataset: Dataset): +async def phenopacket_dataset_summary(request: DrfRequest, dataset: Dataset): return await dt_phenopacket_summary( Phenopacket.objects.filter(dataset=dataset), discovery=None, @@ -561,7 +561,7 @@ async def dataset_summary(request: DrfRequest, dataset_id: str): dataset = await Dataset.objects.aget(identifier=dataset_id) summaries = await asyncio.gather( - *map(lambda dt: DATASET_DATA_TYPE_SUMMARY_FUNCTIONS[dt](dataset), + *map(lambda dt: DATASET_DATA_TYPE_SUMMARY_FUNCTIONS[dt](request, dataset), DATASET_DATA_TYPE_SUMMARY_FUNCTIONS.keys()) ) diff --git a/chord_metadata_service/discovery/model_lookups.py b/chord_metadata_service/discovery/model_lookups.py index 0dc1ce13d..40b09358b 100644 --- a/chord_metadata_service/discovery/model_lookups.py +++ b/chord_metadata_service/discovery/model_lookups.py @@ -42,31 +42,31 @@ class ProjectDatasetScopeFilters(TypedDict): "individual": { "project": { "filter": "phenopackets__dataset__project__identifier", - "prefetch_related": ("phenopackets__dataset__project") + "prefetch_related": ("phenopackets__dataset__project",) }, "dataset": { "filter": "phenopackets__dataset__identifier", - "prefetch_related": ("phenopackets__dataset") + "prefetch_related": ("phenopackets__dataset",) }, }, "biosample": { "project": { "filter": "phenopacket__dataset__project__identifier", - "prefetch_related": ("phenopacket__dataset__project"), + "prefetch_related": ("phenopacket__dataset__project",), }, "dataset": { "filter": "phenopacket__dataset__identifier", - "prefetch_related": ("phenopacket__dataset"), + "prefetch_related": ("phenopacket__dataset",), }, }, "experiment": { "project": { "filter": "dataset__project__identifier", - "prefetch_related": ("dataset__project"), + "prefetch_related": ("dataset__project",), }, "dataset": { "filter": "dataset__identifier", - "prefetch_related": ("dataset"), + "prefetch_related": ("dataset",), }, }, } diff --git a/chord_metadata_service/restapi/tests/test_api.py b/chord_metadata_service/restapi/tests/test_api.py index a74d8b31f..a4e9a00e3 100644 --- a/chord_metadata_service/restapi/tests/test_api.py +++ b/chord_metadata_service/restapi/tests/test_api.py @@ -1,10 +1,12 @@ import json +import uuid from asgiref.sync import async_to_sync from django.urls import reverse from rest_framework import status from rest_framework.test import APITestCase +from chord_metadata_service.chord.tests.helpers import ProjectTestCase from chord_metadata_service.metadata.service_info import get_service_info from chord_metadata_service.phenopackets import models as ph_m from chord_metadata_service.phenopackets.tests import constants as ph_c @@ -38,7 +40,7 @@ def test_extra_properties_schema_types(self): self.assertEqual(response_obj, expected_response) -class OverviewTest(APITestCase): +class OverviewTest(APITestCase, ProjectTestCase): def setUp(self) -> None: # create 2 phenopackets for 2 individuals; each individual has 1 biosample; @@ -48,7 +50,8 @@ def setUp(self) -> None: self.metadata_1 = ph_m.MetaData.objects.create(**ph_c.VALID_META_DATA_1) self.metadata_2 = ph_m.MetaData.objects.create(**ph_c.VALID_META_DATA_2) self.phenopacket_1 = ph_m.Phenopacket.objects.create( - **ph_c.valid_phenopacket(subject=self.individual_1, meta_data=self.metadata_1) + **ph_c.valid_phenopacket(subject=self.individual_1, meta_data=self.metadata_1), + dataset=self.dataset, ) self.phenopacket_2 = ph_m.Phenopacket.objects.create( id='phenopacket:2', subject=self.individual_2, meta_data=self.metadata_2 @@ -110,6 +113,17 @@ def test_overview(self): self.assertEqual(experiment_res['data_type_specific']['instruments']['platform']['Illumina'], 2) self.assertEqual(experiment_res['data_type_specific']['instruments']['model']['Illumina HiSeq 4000'], 2) + def test_scoped_overview(self): + # OK request + response = self.client.get(f'/api/overview?project={self.project.identifier}') + response_obj = response.json() + self.assertEqual(response.status_code, status.HTTP_200_OK) + self.assertIsInstance(response_obj, dict) + + # 404 request: wrong dataset + response = self.client.get(f'/api/overview?project={self.project.identifier}&dataset={uuid.uuid4()}') + self.assertEqual(response.status_code, status.HTTP_404_NOT_FOUND) + def test_search_overview(self): payload = json.dumps({'id': [ph_c.VALID_INDIVIDUAL_1['id']]}) response = self.client.post(reverse('search-overview'), payload, content_type='application/json') From c6a3abedd4e4de2b9cab1afe90d5d3d77f2a02f0 Mon Sep 17 00:00:00 2001 From: Victor Rocheleau Date: Mon, 15 Jul 2024 19:49:58 +0000 Subject: [PATCH 35/38] unscope private overview --- chord_metadata_service/discovery/utils.py | 3 ++- chord_metadata_service/restapi/api_views.py | 8 +++----- chord_metadata_service/restapi/tests/test_api.py | 12 ------------ 3 files changed, 5 insertions(+), 18 deletions(-) diff --git a/chord_metadata_service/discovery/utils.py b/chord_metadata_service/discovery/utils.py index 0563073aa..a03d38c8b 100644 --- a/chord_metadata_service/discovery/utils.py +++ b/chord_metadata_service/discovery/utils.py @@ -7,7 +7,8 @@ from ..chord import models as cm __all__ = [ - "get_request_discovery" + "get_discovery", + "get_request_discovery", ] diff --git a/chord_metadata_service/restapi/api_views.py b/chord_metadata_service/restapi/api_views.py index 8f5dd2f06..a0929373e 100644 --- a/chord_metadata_service/restapi/api_views.py +++ b/chord_metadata_service/restapi/api_views.py @@ -13,7 +13,7 @@ from chord_metadata_service.chord.permissions import OverrideOrSuperUserOnly from chord_metadata_service.discovery.exceptions import DiscoveryConfigException from chord_metadata_service.discovery.types import DiscoveryConfig -from chord_metadata_service.discovery.utils import get_request_discovery +from chord_metadata_service.discovery.utils import get_discovery, get_request_discovery from chord_metadata_service.experiments import models as experiments_models from chord_metadata_service.experiments.summaries import dt_experiment_summary from chord_metadata_service.metadata.service_info import get_service_info @@ -69,10 +69,8 @@ async def overview(request: DrfRequest): """ # TODO: permissions based on project - this endpoint should be scrapped / completely rethought - try: - discovery = await get_request_discovery(request) - except DiscoveryConfigException as e: - return Response(e.message, status=status.HTTP_404_NOT_FOUND) + # use node level discovery config for private overview + discovery = await get_discovery() phenopackets = pheno_models.Phenopacket.objects.all() experiments = experiments_models.Experiment.objects.all() diff --git a/chord_metadata_service/restapi/tests/test_api.py b/chord_metadata_service/restapi/tests/test_api.py index a4e9a00e3..a06f3be5b 100644 --- a/chord_metadata_service/restapi/tests/test_api.py +++ b/chord_metadata_service/restapi/tests/test_api.py @@ -1,5 +1,4 @@ import json -import uuid from asgiref.sync import async_to_sync from django.urls import reverse @@ -113,17 +112,6 @@ def test_overview(self): self.assertEqual(experiment_res['data_type_specific']['instruments']['platform']['Illumina'], 2) self.assertEqual(experiment_res['data_type_specific']['instruments']['model']['Illumina HiSeq 4000'], 2) - def test_scoped_overview(self): - # OK request - response = self.client.get(f'/api/overview?project={self.project.identifier}') - response_obj = response.json() - self.assertEqual(response.status_code, status.HTTP_200_OK) - self.assertIsInstance(response_obj, dict) - - # 404 request: wrong dataset - response = self.client.get(f'/api/overview?project={self.project.identifier}&dataset={uuid.uuid4()}') - self.assertEqual(response.status_code, status.HTTP_404_NOT_FOUND) - def test_search_overview(self): payload = json.dumps({'id': [ph_c.VALID_INDIVIDUAL_1['id']]}) response = self.client.post(reverse('search-overview'), payload, content_type='application/json') From 4d537abfa9351f7252dcb30f479308d3acc1d3fe Mon Sep 17 00:00:00 2001 From: Victor Rocheleau Date: Mon, 15 Jul 2024 20:08:01 +0000 Subject: [PATCH 36/38] unscope private search discovery --- chord_metadata_service/restapi/api_views.py | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/chord_metadata_service/restapi/api_views.py b/chord_metadata_service/restapi/api_views.py index a0929373e..7f7e4f5de 100644 --- a/chord_metadata_service/restapi/api_views.py +++ b/chord_metadata_service/restapi/api_views.py @@ -3,7 +3,7 @@ from adrf.decorators import api_view from django.db.models import QuerySet from drf_spectacular.utils import extend_schema, inline_serializer -from rest_framework import serializers, status +from rest_framework import serializers from rest_framework.decorators import permission_classes from rest_framework.permissions import AllowAny from rest_framework.request import Request as DrfRequest @@ -11,9 +11,8 @@ from chord_metadata_service.chord.data_types import DATA_TYPE_PHENOPACKET, DATA_TYPE_EXPERIMENT from chord_metadata_service.chord.permissions import OverrideOrSuperUserOnly -from chord_metadata_service.discovery.exceptions import DiscoveryConfigException from chord_metadata_service.discovery.types import DiscoveryConfig -from chord_metadata_service.discovery.utils import get_discovery, get_request_discovery +from chord_metadata_service.discovery.utils import get_discovery from chord_metadata_service.experiments import models as experiments_models from chord_metadata_service.experiments.summaries import dt_experiment_summary from chord_metadata_service.metadata.service_info import get_service_info @@ -99,10 +98,8 @@ async def search_overview(request: DrfRequest): """ # TODO: this should be project / dataset-scoped and probably shouldn't even exist as-is - try: - discovery = await get_request_discovery(request) - except DiscoveryConfigException as e: - return Response(e.message, status=status.HTTP_404_NOT_FOUND) + # use node level discovery config for private search overview + discovery = await get_discovery() individual_ids = request.GET.getlist("id") if request.method == "GET" else request.data.get("id", []) phenopackets = pheno_models.Phenopacket.objects.all().filter(subject_id__in=individual_ids) From fd9625032ae4a6f72c934ee1be019b4bccb1bde1 Mon Sep 17 00:00:00 2001 From: Victor Rocheleau Date: Tue, 16 Jul 2024 15:31:41 +0000 Subject: [PATCH 37/38] type hints --- chord_metadata_service/discovery/utils.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/chord_metadata_service/discovery/utils.py b/chord_metadata_service/discovery/utils.py index a03d38c8b..8cfc9ac42 100644 --- a/chord_metadata_service/discovery/utils.py +++ b/chord_metadata_service/discovery/utils.py @@ -12,7 +12,7 @@ ] -async def _get_project_discovery(project_id: str = None, project: cm.Project = None) -> dict: +async def _get_project_discovery(project_id: str | None = None, project: cm.Project | None = None) -> dict: if not project and project_id: # retrieve project by ID if not provided project = await cm.Project.objects.aget(identifier=project_id) @@ -30,7 +30,7 @@ async def _get_dataset_discovery(dataset_id: str) -> dict: return dataset.discovery -async def get_discovery(project_id: str = None, dataset_id: str = None) -> DiscoveryConfig: +async def get_discovery(project_id: str | None = None, dataset_id: str | None = None) -> DiscoveryConfig: if dataset_id and project_id: # check if the dataset belongs to the project is_scope_valid = await cm.Dataset.objects.filter( From 7ea3925d18e2b363e322b1f987d3303921099ffc Mon Sep 17 00:00:00 2001 From: Victor Rocheleau Date: Tue, 16 Jul 2024 18:39:10 +0000 Subject: [PATCH 38/38] public discovery authz todo --- chord_metadata_service/patients/api_views.py | 1 + 1 file changed, 1 insertion(+) diff --git a/chord_metadata_service/patients/api_views.py b/chord_metadata_service/patients/api_views.py index 0a6a8b984..d60ed5c9f 100644 --- a/chord_metadata_service/patients/api_views.py +++ b/chord_metadata_service/patients/api_views.py @@ -151,6 +151,7 @@ async def public_discovery_filter_queryset(request: DrfRequest, queryset: QueryS # Check query parameters validity qp = request.query_params discovery = await get_request_discovery(request) + # TODO: allow exceeding max query parameters for authorized requests if len(qp) > get_max_query_parameters(discovery, low_counts_censored): raise ValidationError(f"Wrong number of fields: {len(qp)}")