begin work adding authz to discovery endpoints + asyncifying

bento-platform · Oct 18, 2023 · 8958858 · 8958858
1 parent 01b9b17
commit 8958858
Show file tree

Hide file tree

Showing 5 changed files with 520 additions and 276 deletions.
diff --git a/chord_metadata_service/authz/counts.py b/chord_metadata_service/authz/counts.py
@@ -1,11 +1,8 @@
 from django.http import HttpRequest
+from typing import overload
 
-from .constants import (
-    PERMISSION_QUERY_DATA,
-    PERMISSION_QUERY_PROJECT_LEVEL_COUNTS,
-    PERMISSION_QUERY_DATASET_LEVEL_COUNTS,
-)
-from .middleware import authz_middleware
+from .constants import PERMISSION_QUERY_PROJECT_LEVEL_COUNTS, PERMISSION_QUERY_DATASET_LEVEL_COUNTS
+from .queries import query_permission, can_query_data
 from .utils import create_resource
 
 
@@ -22,26 +19,48 @@ def get_counts_permission(dataset_level: bool) -> str:
     return PERMISSION_QUERY_PROJECT_LEVEL_COUNTS  # We don't have a node-level counts permission
 
 
-async def can_see_counts(request: HttpRequest, resource: dict) -> bool:
-    return await authz_middleware.async_authz_post(request, "/policy/evaluate", {
-        "requested_resource": resource,
-        "required_permissions": [get_counts_permission(resource.get("dataset") is not None)],
-    })["result"] or (
-        # If we don't have a count permission, we may still have a query:data permission (no cascade)
-        await authz_middleware.async_authz_post(request, "/policy/evaluate", {
-            "requested_resource": resource,
-            "required_permissions": [PERMISSION_QUERY_DATA],
-        })["result"]
+@overload
+async def can_see_counts(request: HttpRequest, resource: dict, dataset_level: bool) -> bool:
+    ...
+
+
+@overload
+async def can_see_counts(request: HttpRequest, resource: list[dict], dataset_level: bool) -> tuple[bool, ...]:
+    ...
+
+
+async def can_see_counts(
+    request: HttpRequest, resource: dict | list[dict], dataset_level: bool
+) -> bool | tuple[bool, ...]:
+    # First, check if we have counts permission on either the project or dataset level, depending on the resource.
+    # If we don't have a count permission, we may still have a query:data permission (no cascade) which gives us these
+    # for free.
+
+    return (
+        await query_permission(request, resource, get_counts_permission(dataset_level))
+        or await can_query_data(request, resource)   # or-shortcut means this only runs if it needs to be checked.
     )
 
 
 async def has_counts_permission_for_data_types(
-    request: HttpRequest, project: str, dataset: str, data_types: list[str]
+    request: HttpRequest, project: str | None, dataset: str | None, data_types: list[str]
 ) -> list[bool]:
-    has_permission: bool = await can_see_counts(request, create_resource(project, dataset, None))
+    dataset_level: bool = dataset is not None
+
+    has_permission: bool = await can_see_counts(
+        request, create_resource(project, dataset, None), dataset_level)
 
     return [
-        # Either we have permission for all (saves many calls) or we have for a specific data type
-        has_permission or (await can_see_counts(request, create_resource(project, dataset, dt_id)))
-        for dt_id in data_types
+        # Either we have permission for all (saves many calls via or-shortcutting) or we have for a specific data type:
+        has_permission or await can_see_counts(request, create_resource(project, dataset, dt_id), dataset_level)
+        for dt_id, can_see_counts_for_dt in data_types
     ]
+
+
+async def has_counts_permission_for_data_types_bulk_resources(
+    request: HttpRequest,
+    resource_tuples: tuple[tuple[str | None, str | None], ...],
+    data_types: list[str],
+    dataset_level: bool,
+):
+    pass  # TODO
diff --git a/chord_metadata_service/authz/queries.py b/chord_metadata_service/authz/queries.py
@@ -0,0 +1,58 @@
+from django.http import HttpRequest
+from typing import overload
+
+from .constants import PERMISSION_QUERY_DATA
+from .middleware import authz_middleware
+from .utils import create_resource
+
+__all__ = [
+    "query_permission",
+    "can_query_data",
+    "has_query_data_permission_for_data_types",
+]
+
+
+@overload
+async def query_permission(request: HttpRequest, resource: dict, permission: str) -> bool:
+    ...
+
+
+@overload
+async def query_permission(request: HttpRequest, resource: list[dict], permission: str) -> tuple[bool, ...]:
+    ...
+
+
+async def query_permission(
+    request: HttpRequest, resource: dict | list[dict], permission: str
+) -> bool | tuple[bool, ...]:
+    return tuple(
+        await authz_middleware.async_authz_post(request, "/policy/evaluate", {
+            "requested_resource": resource,
+            "required_permissions": [permission],
+        })["result"]
+    )
+
+
+@overload
+async def can_query_data(request: HttpRequest, resource: dict) -> bool:
+    ...
+
+
+@overload
+async def can_query_data(request: HttpRequest, resource: list[dict]) -> tuple[bool, ...]:
+    ...
+
+
+async def can_query_data(request: HttpRequest, resource: dict | list[dict]) -> bool | tuple[bool, ...]:
+    return await query_permission(request, resource, PERMISSION_QUERY_DATA)
+
+
+async def has_query_data_permission_for_data_types(
+    request: HttpRequest, project: str | None, dataset: str | None, data_types: list[str]
+) -> list[bool]:
+    has_permission: bool = await can_query_data(request, create_resource(project, dataset, None))
+    return [
+        # Either we have permission for all (saves many calls) or we have for a specific data type
+        has_permission or (await can_query_data(request, create_resource(project, dataset, dt_id)))
+        for dt_id in data_types
+    ]
diff --git a/chord_metadata_service/patients/api_views.py b/chord_metadata_service/patients/api_views.py
@@ -1,8 +1,8 @@
 import re
 
+from asgiref.sync import async_to_sync
 from datetime import datetime
-
-from rest_framework import viewsets, filters, mixins, serializers
+from rest_framework import filters, mixins, serializers, status, viewsets
 from rest_framework.decorators import action
 from rest_framework.response import Response
 from rest_framework.settings import api_settings
@@ -22,6 +22,7 @@
 from .serializers import IndividualSerializer
 from .models import Individual
 from .filters import IndividualFilter
+from chord_metadata_service.authz.middleware import authz_middleware
 from chord_metadata_service.logger import logger
 from chord_metadata_service.phenopackets.api_views import BIOSAMPLE_PREFETCH, PHENOPACKET_PREFETCH
 from chord_metadata_service.phenopackets.models import Phenopacket
@@ -35,10 +36,11 @@
 )
 from chord_metadata_service.restapi.pagination import LargeResultsSetPagination, BatchResultsSetPagination
 from chord_metadata_service.restapi.utils import (
+    get_threshold,
     get_field_options,
     filter_queryset_field_value,
     biosample_tissue_stats,
-    experiment_type_stats
+    experiment_type_stats,
 )
 from chord_metadata_service.restapi.negociation import FormatInPostContentNegotiation
 
@@ -168,14 +170,16 @@ class PublicListIndividuals(APIView):
     View to return only count of all individuals after filtering.
     """
 
-    def filter_queryset(self, queryset):
+    async def filter_queryset(self, queryset, can_query_data: bool):
         # Check query parameters validity
         qp = self.request.query_params
-        if len(qp) > settings.CONFIG_PUBLIC["rules"]["max_query_parameters"]:
+        config_public = settings.CONFIG_PUBLIC
+
+        if not can_query_data and len(qp) > config_public["rules"]["max_query_parameters"]:
             raise ValidationError(f"Wrong number of fields: {len(qp)}")
 
-        search_conf = settings.CONFIG_PUBLIC["search"]
-        field_conf = settings.CONFIG_PUBLIC["fields"]
+        search_conf = config_public["search"]
+        field_conf = config_public["fields"]
         queryable_fields = {
             f"{f}": field_conf[f] for section in search_conf for f in section["fields"]
         }
@@ -185,7 +189,7 @@ def filter_queryset(self, queryset):
                 raise ValidationError(f"Unsupported field used in query: {field}")
 
             field_props = queryable_fields[field]
-            options = get_field_options(field_props)
+            options = await get_field_options(field_props, low_counts_censored=not can_query_data)
             if value not in options \
                     and not (
                         # case-insensitive search on categories
@@ -204,38 +208,46 @@ def filter_queryset(self, queryset):
 
         return queryset
 
-    def get(self, request, *args, **kwargs):
+    # TODO: should be project-scoped
+
+    @async_to_sync
+    async def get(self, request, *_args, **_kwargs):
         if not settings.CONFIG_PUBLIC:
-            return Response(settings.NO_PUBLIC_DATA_AVAILABLE)
+            authz_middleware.mark_authz_done(request)
+            return Response(settings.NO_PUBLIC_DATA_AVAILABLE, status=status.HTTP_404_NOT_FOUND)
+
+        # TODO: permissions
 
         base_qs = Individual.objects.all()
         try:
-            filtered_qs = self.filter_queryset(base_qs)
+            filtered_qs = await self.filter_queryset(base_qs)
         except ValidationError as e:
-            return Response(errors.bad_request_error(
-                *(e.error_list if hasattr(e, "error_list") else e.error_dict.items()),
-            ))
+            return Response(
+                errors.bad_request_error(*(e.error_list if hasattr(e, "error_list") else e.error_dict.items())),
+                status=status.HTTP_400_BAD_REQUEST,
+            )
 
-        qct = filtered_qs.count()
+        qct = await filtered_qs.count()
 
-        if qct <= (threshold := settings.CONFIG_PUBLIC["rules"]["count_threshold"]):
+        if qct <= (threshold := get_threshold()):  # TODO: permissions
+            authz_middleware.mark_authz_done(request)
             logger.info(
                 f"Public individuals endpoint recieved query params {request.query_params} which resulted in "
                 f"sub-threshold count: {qct} <= {threshold}")
             return Response(settings.INSUFFICIENT_DATA_AVAILABLE)
 
-        tissues_count, sampled_tissues = biosample_tissue_stats(filtered_qs)
-        experiments_count, experiment_types = experiment_type_stats(filtered_qs)
+        tissues_count, sampled_tissues = await biosample_tissue_stats(filtered_qs)
+        experiments_count, experiment_types = await experiment_type_stats(filtered_qs)
 
         return Response({
             "count": qct,
             "biosamples": {
                 "count": tissues_count,
-                "sampled_tissue": sampled_tissues
+                "sampled_tissue": sampled_tissues,
             },
             "experiments": {
                 "count": experiments_count,
-                "experiment_type": experiment_types
+                "experiment_type": experiment_types,
             }
         })
 
@@ -245,14 +257,16 @@ class BeaconListIndividuals(APIView):
     View to return lists of individuals filtered using search terms from katsu's config.json.
     Uncensored equivalent of PublicListIndividuals.
     """
-    def filter_queryset(self, queryset):
+    async def filter_queryset(self, queryset, can_query_data: bool):
         # Check query parameters validity
         qp = self.request.query_params
-        if len(qp) > settings.CONFIG_PUBLIC["rules"]["max_query_parameters"]:
+        config_public = settings.CONFIG_PUBLIC
+
+        if not can_query_data and len(qp) > config_public["rules"]["max_query_parameters"]:
             raise ValidationError(f"Wrong number of fields: {len(qp)}")
 
-        search_conf = settings.CONFIG_PUBLIC["search"]
-        field_conf = settings.CONFIG_PUBLIC["fields"]
+        search_conf = config_public["search"]
+        field_conf = config_public["fields"]
         queryable_fields = {
             f: field_conf[f] for section in search_conf for f in section["fields"]
         }
@@ -262,7 +276,7 @@ def filter_queryset(self, queryset):
                 raise ValidationError(f"Unsupported field used in query: {field}")
 
             field_props = queryable_fields[field]
-            options = get_field_options(field_props)
+            options = await get_field_options(field_props, low_counts_censored=not can_query_data)
             if value not in options \
                     and not (
                         # case-insensitive search on categories
@@ -281,28 +295,40 @@ def filter_queryset(self, queryset):
 
         return queryset
 
-    def get(self, request, *args, **kwargs):
+    @async_to_sync
+    async def get(self, request, *_args, **_kwargs):
         if not settings.CONFIG_PUBLIC:
-            return Response(settings.NO_PUBLIC_DATA_AVAILABLE, status=404)
+            authz_middleware.mark_authz_done(request)
+            return Response(settings.NO_PUBLIC_DATA_AVAILABLE, status=status.HTTP_404_NOT_FOUND)
+
+        # Steps for permissions
+        #  - Obtain all datasets
+        #  - Do a bulk request to authz for permissions to see counts for the data types for each...
 
         base_qs = Individual.objects.all()
+
+        # TODO: permissions
+
         try:
-            filtered_qs = self.filter_queryset(base_qs)
+            filtered_qs = await self.filter_queryset(base_qs)
         except ValidationError as e:
-            return Response(errors.bad_request_error(
-                *(e.error_list if hasattr(e, "error_list") else e.error_dict.items())), status=400)
+            authz_middleware.mark_authz_done(request)
+            return Response(
+                errors.bad_request_error(*(e.error_list if hasattr(e, "error_list") else e.error_dict.items())),
+                status=status.HTTP_400_BAD_REQUEST,
+            )
 
-        tissues_count, sampled_tissues = biosample_tissue_stats(filtered_qs)
-        experiments_count, experiment_types = experiment_type_stats(filtered_qs)
+        tissues_count, sampled_tissues = await biosample_tissue_stats(filtered_qs)
+        experiments_count, experiment_types = await experiment_type_stats(filtered_qs)
 
         return Response({
-            "matches": filtered_qs.values_list("id", flat=True),
+            "matches": await filtered_qs.values_list("id", flat=True),
             "biosamples": {
                 "count": tissues_count,
-                "sampled_tissue": sampled_tissues
+                "sampled_tissue": sampled_tissues,
             },
             "experiments": {
                 "count": experiments_count,
-                "experiment_type": experiment_types
+                "experiment_type": experiment_types,
             }
         })