From a64af85b238cd82db8d390b319b7da3044b9362b Mon Sep 17 00:00:00 2001 From: David Lougheed Date: Mon, 30 Sep 2024 13:22:00 -0400 Subject: [PATCH 1/4] feat(chord): allow downloading DATS JSON as attachment --- chord_metadata_service/chord/api_views.py | 8 ++-- chord_metadata_service/patients/api_views.py | 19 +++++---- chord_metadata_service/restapi/utils.py | 41 +++++++++++++++++++- 3 files changed, 54 insertions(+), 14 deletions(-) diff --git a/chord_metadata_service/chord/api_views.py b/chord_metadata_service/chord/api_views.py index 8038dce89..ff8fea820 100644 --- a/chord_metadata_service/chord/api_views.py +++ b/chord_metadata_service/chord/api_views.py @@ -28,12 +28,13 @@ from chord_metadata_service.resources.serializers import ResourceSerializer from chord_metadata_service.restapi.api_renderers import PhenopacketsRenderer, JSONLDDatasetRenderer, RDFDatasetRenderer from chord_metadata_service.restapi.pagination import LargeResultsSetPagination +from chord_metadata_service.restapi.utils import response_optionally_as_attachment from .models import Project, Dataset, ProjectJsonSchema from .serializers import ( ProjectJsonSchemaSerializer, ProjectSerializer, - DatasetSerializer + DatasetSerializer, ) from .filters import AuthorizedDatasetFilter @@ -142,7 +143,7 @@ class DatasetViewSet(CHORDPublicModelViewSet): queryset = Dataset.objects.all().order_by("title") @action(detail=True, methods=['get']) - def dats(self, request, *_args, **_kwargs): + def dats(self, request: DrfRequest, *_args, **_kwargs): """ Retrieve a specific DATS file for a given dataset. @@ -154,7 +155,8 @@ def dats(self, request, *_args, **_kwargs): return not_found(request) # side effect: sets authz done flag authz.mark_authz_done(request) - return Response(dataset.dats_file) + + return response_optionally_as_attachment(dataset.dats_file, f"{dataset.identifier}_dats.json") @action(detail=True, methods=["get"]) def resources(self, request, *_args, **_kwargs): diff --git a/chord_metadata_service/patients/api_views.py b/chord_metadata_service/patients/api_views.py index 2c38d5eac..8af66534e 100644 --- a/chord_metadata_service/patients/api_views.py +++ b/chord_metadata_service/patients/api_views.py @@ -1,5 +1,4 @@ import asyncio -import re from adrf.views import APIView from bento_lib.responses import errors @@ -48,7 +47,11 @@ from chord_metadata_service.restapi.constants import MODEL_ID_PATTERN from chord_metadata_service.restapi.pagination import LargeResultsSetPagination, BatchResultsSetPagination from chord_metadata_service.restapi.negociation import FormatInPostContentNegotiation -from chord_metadata_service.restapi.utils import build_experiments_by_subject, get_biosamples_with_experiment_details +from chord_metadata_service.restapi.utils import ( + build_experiments_by_subject, + get_biosamples_with_experiment_details, + response_optionally_as_attachment, +) from .filters import IndividualFilter from .models import Individual @@ -121,8 +124,7 @@ def list(self, request, *args, **kwargs): return super().list(request, *args, **kwargs) @action(detail=True, methods=["GET", "POST"]) - def phenopackets(self, request, *_args, **_kwargs): - as_attachment = request.query_params.get("attachment", "") in ("1", "true", "yes") + def phenopackets(self, request: DrfRequest, *_args, **_kwargs): individual = self.get_object() phenopackets = ( @@ -132,13 +134,10 @@ def phenopackets(self, request, *_args, **_kwargs): .order_by("id") ) - filename_safe_id = re.sub(r"[\\/:*?\"<>|]", "_", individual.id) - return Response( + return response_optionally_as_attachment( + request, PhenopacketSerializer(phenopackets, many=True).data, - headers=( - {"Content-Disposition": f"attachment; filename=\"{filename_safe_id}_phenopackets.json\""} - if as_attachment else {} - ), + f"{individual.id}_phenopackets.json" ) diff --git a/chord_metadata_service/restapi/utils.py b/chord_metadata_service/restapi/utils.py index 64f236d44..29c63a823 100644 --- a/chord_metadata_service/restapi/utils.py +++ b/chord_metadata_service/restapi/utils.py @@ -1,9 +1,28 @@ from __future__ import annotations -from chord_metadata_service.phenopackets.models import Biosample + +import re + from collections import defaultdict from django.db.models import F +from rest_framework.request import Request as DrfRequest +from rest_framework.response import Response from typing import Any +from chord_metadata_service.phenopackets.models import Biosample + +__all__ = [ + "COMPUTED_PROPERTY_PREFIX", + "camel_case_field_names", + "transform_keys", + "computed_property", + "remove_computed_properties", + "get_biosamples_with_experiment_details", + "build_experiments_by_subject", + "response_as_attachment", + "attachment_content_disposition", + "response_optionally_as_attachment", +] + COMPUTED_PROPERTY_PREFIX = "__" @@ -88,3 +107,23 @@ def build_experiments_by_subject(biosamples_experiments_details: list[dict]) -> } }) return experiments_with_biosamples + + +def response_as_attachment(request: DrfRequest) -> bool: + """ + Helper function for processing the as_attachment query parameter, for consistent behaviour when returning JSON + responses as file attachments. + """ + return request.query_params.get("attachment", "").strip().lower() in ("1", "true", "yes") + + +FILENAME_REPLACE_PATTERN = re.compile(r"[\\/:*?\"<>|]") + + +def attachment_content_disposition(filename: str) -> dict[str, str]: + filename_safe = FILENAME_REPLACE_PATTERN.sub("_", filename) + return {"Content-Disposition": f"attachment; filename=\"{filename_safe}\""} + + +def response_optionally_as_attachment(request: DrfRequest, data, filename: str) -> Response: + return Response(data, headers=attachment_content_disposition(filename) if response_as_attachment(request) else {}) From e979cc550fd968e7fd8e00e35f7b094c8440bf69 Mon Sep 17 00:00:00 2001 From: David Lougheed Date: Mon, 30 Sep 2024 13:25:42 -0400 Subject: [PATCH 2/4] fix --- chord_metadata_service/chord/api_views.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/chord_metadata_service/chord/api_views.py b/chord_metadata_service/chord/api_views.py index ff8fea820..31e3dde86 100644 --- a/chord_metadata_service/chord/api_views.py +++ b/chord_metadata_service/chord/api_views.py @@ -156,7 +156,7 @@ def dats(self, request: DrfRequest, *_args, **_kwargs): authz.mark_authz_done(request) - return response_optionally_as_attachment(dataset.dats_file, f"{dataset.identifier}_dats.json") + return response_optionally_as_attachment(request, dataset.dats_file, f"{dataset.identifier}_dats.json") @action(detail=True, methods=["get"]) def resources(self, request, *_args, **_kwargs): From 07150101b22167b1e59f3d116a7d1ca741e2b2e2 Mon Sep 17 00:00:00 2001 From: David Lougheed Date: Mon, 30 Sep 2024 13:47:40 -0400 Subject: [PATCH 3/4] test: dats as attachment --- chord_metadata_service/chord/tests/test_api.py | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/chord_metadata_service/chord/tests/test_api.py b/chord_metadata_service/chord/tests/test_api.py index d50391001..212c826e0 100644 --- a/chord_metadata_service/chord/tests/test_api.py +++ b/chord_metadata_service/chord/tests/test_api.py @@ -184,6 +184,24 @@ def test_dats(self): response = self.client.get("/api/datasets/does-not-exist/dats") self.assertEqual(response.status_code, status.HTTP_404_NOT_FOUND) + def test_dats_as_attachment(self): + payload = {**self.dats_valid_payload, 'dats_file': {}} + + r = self.one_authz_post('/api/datasets', data=json.dumps(payload)) + + self.assertEqual(r.status_code, status.HTTP_201_CREATED) + dataset_id = Dataset.objects.first().identifier + + response = self.client.get(f"/api/datasets/{dataset_id}/dats?attachment=true") + self.assertEqual(response.status_code, status.HTTP_200_OK) + self.assertDictEqual(response.data, payload['dats_file']) + self.assertEqual(response.headers["Content-Disposition"], f"attachment; filename=\"{dataset_id}_dats.json\"") + + response = self.client.get(f"/api/datasets/{dataset_id}/dats?attachment=false") + self.assertEqual(response.status_code, status.HTTP_200_OK) + self.assertDictEqual(response.data, payload['dats_file']) + self.assertNotIn("Content-Disposition", response.headers) + def test_resources(self): resource = { "id": "NCBITaxon:2023-09-14", From e5a3a8e29cf9e0bf7149bd9dfba4ada616564fbc Mon Sep 17 00:00:00 2001 From: David Lougheed Date: Mon, 30 Sep 2024 14:39:58 -0400 Subject: [PATCH 4/4] address review comments --- .../chord/tests/test_api.py | 26 +++++++++++++------ chord_metadata_service/restapi/utils.py | 6 +---- 2 files changed, 19 insertions(+), 13 deletions(-) diff --git a/chord_metadata_service/chord/tests/test_api.py b/chord_metadata_service/chord/tests/test_api.py index 212c826e0..e7c5676d7 100644 --- a/chord_metadata_service/chord/tests/test_api.py +++ b/chord_metadata_service/chord/tests/test_api.py @@ -192,15 +192,25 @@ def test_dats_as_attachment(self): self.assertEqual(r.status_code, status.HTTP_201_CREATED) dataset_id = Dataset.objects.first().identifier - response = self.client.get(f"/api/datasets/{dataset_id}/dats?attachment=true") - self.assertEqual(response.status_code, status.HTTP_200_OK) - self.assertDictEqual(response.data, payload['dats_file']) - self.assertEqual(response.headers["Content-Disposition"], f"attachment; filename=\"{dataset_id}_dats.json\"") + subtest_params = [ + ("?attachment=true", True), + ("?attachment=false", False), + ("?attachment=", False), + ("", False), + ] - response = self.client.get(f"/api/datasets/{dataset_id}/dats?attachment=false") - self.assertEqual(response.status_code, status.HTTP_200_OK) - self.assertDictEqual(response.data, payload['dats_file']) - self.assertNotIn("Content-Disposition", response.headers) + for params in subtest_params: + with self.subTest(params=params): + response = self.client.get(f"/api/datasets/{dataset_id}/dats{params[0]}") + self.assertEqual(response.status_code, status.HTTP_200_OK) + self.assertDictEqual(response.data, payload['dats_file']) + if params[1]: + self.assertEqual( + response.headers["Content-Disposition"], + f"attachment; filename=\"{dataset_id}_dats.json\"" + ) + else: + self.assertNotIn("Content-Disposition", response.headers) def test_resources(self): resource = { diff --git a/chord_metadata_service/restapi/utils.py b/chord_metadata_service/restapi/utils.py index 29c63a823..442d3d303 100644 --- a/chord_metadata_service/restapi/utils.py +++ b/chord_metadata_service/restapi/utils.py @@ -11,8 +11,6 @@ from chord_metadata_service.phenopackets.models import Biosample __all__ = [ - "COMPUTED_PROPERTY_PREFIX", - "camel_case_field_names", "transform_keys", "computed_property", "remove_computed_properties", @@ -25,6 +23,7 @@ COMPUTED_PROPERTY_PREFIX = "__" +FILENAME_REPLACE_PATTERN = re.compile(r"[\\/:*?\"<>|]") def camel_case_field_names(string) -> str: @@ -117,9 +116,6 @@ def response_as_attachment(request: DrfRequest) -> bool: return request.query_params.get("attachment", "").strip().lower() in ("1", "true", "yes") -FILENAME_REPLACE_PATTERN = re.compile(r"[\\/:*?\"<>|]") - - def attachment_content_disposition(filename: str) -> dict[str, str]: filename_safe = FILENAME_REPLACE_PATTERN.sub("_", filename) return {"Content-Disposition": f"attachment; filename=\"{filename_safe}\""}