Skip to content

Commit

Permalink
begin work adding authz to discovery endpoints + asyncifying
Browse files Browse the repository at this point in the history
  • Loading branch information
davidlougheed committed Oct 18, 2023
1 parent 01b9b17 commit 8958858
Show file tree
Hide file tree
Showing 5 changed files with 520 additions and 276 deletions.
61 changes: 40 additions & 21 deletions chord_metadata_service/authz/counts.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,8 @@
from django.http import HttpRequest
from typing import overload

from .constants import (
PERMISSION_QUERY_DATA,
PERMISSION_QUERY_PROJECT_LEVEL_COUNTS,
PERMISSION_QUERY_DATASET_LEVEL_COUNTS,
)
from .middleware import authz_middleware
from .constants import PERMISSION_QUERY_PROJECT_LEVEL_COUNTS, PERMISSION_QUERY_DATASET_LEVEL_COUNTS
from .queries import query_permission, can_query_data
from .utils import create_resource


Expand All @@ -22,26 +19,48 @@ def get_counts_permission(dataset_level: bool) -> str:
return PERMISSION_QUERY_PROJECT_LEVEL_COUNTS # We don't have a node-level counts permission


async def can_see_counts(request: HttpRequest, resource: dict) -> bool:
return await authz_middleware.async_authz_post(request, "/policy/evaluate", {
"requested_resource": resource,
"required_permissions": [get_counts_permission(resource.get("dataset") is not None)],
})["result"] or (
# If we don't have a count permission, we may still have a query:data permission (no cascade)
await authz_middleware.async_authz_post(request, "/policy/evaluate", {
"requested_resource": resource,
"required_permissions": [PERMISSION_QUERY_DATA],
})["result"]
@overload
async def can_see_counts(request: HttpRequest, resource: dict, dataset_level: bool) -> bool:
...


@overload
async def can_see_counts(request: HttpRequest, resource: list[dict], dataset_level: bool) -> tuple[bool, ...]:
...


async def can_see_counts(
request: HttpRequest, resource: dict | list[dict], dataset_level: bool
) -> bool | tuple[bool, ...]:
# First, check if we have counts permission on either the project or dataset level, depending on the resource.
# If we don't have a count permission, we may still have a query:data permission (no cascade) which gives us these
# for free.

return (
await query_permission(request, resource, get_counts_permission(dataset_level))
or await can_query_data(request, resource) # or-shortcut means this only runs if it needs to be checked.
)


async def has_counts_permission_for_data_types(
request: HttpRequest, project: str, dataset: str, data_types: list[str]
request: HttpRequest, project: str | None, dataset: str | None, data_types: list[str]
) -> list[bool]:
has_permission: bool = await can_see_counts(request, create_resource(project, dataset, None))
dataset_level: bool = dataset is not None

has_permission: bool = await can_see_counts(
request, create_resource(project, dataset, None), dataset_level)

return [
# Either we have permission for all (saves many calls) or we have for a specific data type
has_permission or (await can_see_counts(request, create_resource(project, dataset, dt_id)))
for dt_id in data_types
# Either we have permission for all (saves many calls via or-shortcutting) or we have for a specific data type:
has_permission or await can_see_counts(request, create_resource(project, dataset, dt_id), dataset_level)
for dt_id, can_see_counts_for_dt in data_types
]


async def has_counts_permission_for_data_types_bulk_resources(
request: HttpRequest,
resource_tuples: tuple[tuple[str | None, str | None], ...],
data_types: list[str],
dataset_level: bool,
):
pass # TODO
58 changes: 58 additions & 0 deletions chord_metadata_service/authz/queries.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
from django.http import HttpRequest
from typing import overload

from .constants import PERMISSION_QUERY_DATA
from .middleware import authz_middleware
from .utils import create_resource

__all__ = [
"query_permission",
"can_query_data",
"has_query_data_permission_for_data_types",
]


@overload
async def query_permission(request: HttpRequest, resource: dict, permission: str) -> bool:
...


@overload
async def query_permission(request: HttpRequest, resource: list[dict], permission: str) -> tuple[bool, ...]:
...


async def query_permission(
request: HttpRequest, resource: dict | list[dict], permission: str
) -> bool | tuple[bool, ...]:
return tuple(
await authz_middleware.async_authz_post(request, "/policy/evaluate", {
"requested_resource": resource,
"required_permissions": [permission],
})["result"]
)


@overload
async def can_query_data(request: HttpRequest, resource: dict) -> bool:
...


@overload
async def can_query_data(request: HttpRequest, resource: list[dict]) -> tuple[bool, ...]:
...


async def can_query_data(request: HttpRequest, resource: dict | list[dict]) -> bool | tuple[bool, ...]:
return await query_permission(request, resource, PERMISSION_QUERY_DATA)


async def has_query_data_permission_for_data_types(
request: HttpRequest, project: str | None, dataset: str | None, data_types: list[str]
) -> list[bool]:
has_permission: bool = await can_query_data(request, create_resource(project, dataset, None))
return [
# Either we have permission for all (saves many calls) or we have for a specific data type
has_permission or (await can_query_data(request, create_resource(project, dataset, dt_id)))
for dt_id in data_types
]
96 changes: 61 additions & 35 deletions chord_metadata_service/patients/api_views.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
import re

from asgiref.sync import async_to_sync
from datetime import datetime

from rest_framework import viewsets, filters, mixins, serializers
from rest_framework import filters, mixins, serializers, status, viewsets
from rest_framework.decorators import action
from rest_framework.response import Response
from rest_framework.settings import api_settings
Expand All @@ -22,6 +22,7 @@
from .serializers import IndividualSerializer
from .models import Individual
from .filters import IndividualFilter
from chord_metadata_service.authz.middleware import authz_middleware
from chord_metadata_service.logger import logger
from chord_metadata_service.phenopackets.api_views import BIOSAMPLE_PREFETCH, PHENOPACKET_PREFETCH
from chord_metadata_service.phenopackets.models import Phenopacket
Expand All @@ -35,10 +36,11 @@
)
from chord_metadata_service.restapi.pagination import LargeResultsSetPagination, BatchResultsSetPagination
from chord_metadata_service.restapi.utils import (
get_threshold,
get_field_options,
filter_queryset_field_value,
biosample_tissue_stats,
experiment_type_stats
experiment_type_stats,
)
from chord_metadata_service.restapi.negociation import FormatInPostContentNegotiation

Expand Down Expand Up @@ -168,14 +170,16 @@ class PublicListIndividuals(APIView):
View to return only count of all individuals after filtering.
"""

def filter_queryset(self, queryset):
async def filter_queryset(self, queryset, can_query_data: bool):
# Check query parameters validity
qp = self.request.query_params
if len(qp) > settings.CONFIG_PUBLIC["rules"]["max_query_parameters"]:
config_public = settings.CONFIG_PUBLIC

if not can_query_data and len(qp) > config_public["rules"]["max_query_parameters"]:
raise ValidationError(f"Wrong number of fields: {len(qp)}")

search_conf = settings.CONFIG_PUBLIC["search"]
field_conf = settings.CONFIG_PUBLIC["fields"]
search_conf = config_public["search"]
field_conf = config_public["fields"]
queryable_fields = {
f"{f}": field_conf[f] for section in search_conf for f in section["fields"]
}
Expand All @@ -185,7 +189,7 @@ def filter_queryset(self, queryset):
raise ValidationError(f"Unsupported field used in query: {field}")

field_props = queryable_fields[field]
options = get_field_options(field_props)
options = await get_field_options(field_props, low_counts_censored=not can_query_data)
if value not in options \
and not (
# case-insensitive search on categories
Expand All @@ -204,38 +208,46 @@ def filter_queryset(self, queryset):

return queryset

def get(self, request, *args, **kwargs):
# TODO: should be project-scoped

@async_to_sync
async def get(self, request, *_args, **_kwargs):
if not settings.CONFIG_PUBLIC:
return Response(settings.NO_PUBLIC_DATA_AVAILABLE)
authz_middleware.mark_authz_done(request)
return Response(settings.NO_PUBLIC_DATA_AVAILABLE, status=status.HTTP_404_NOT_FOUND)

# TODO: permissions

base_qs = Individual.objects.all()
try:
filtered_qs = self.filter_queryset(base_qs)
filtered_qs = await self.filter_queryset(base_qs)
except ValidationError as e:
return Response(errors.bad_request_error(
*(e.error_list if hasattr(e, "error_list") else e.error_dict.items()),
))
return Response(
errors.bad_request_error(*(e.error_list if hasattr(e, "error_list") else e.error_dict.items())),
status=status.HTTP_400_BAD_REQUEST,
)

qct = filtered_qs.count()
qct = await filtered_qs.count()

if qct <= (threshold := settings.CONFIG_PUBLIC["rules"]["count_threshold"]):
if qct <= (threshold := get_threshold()): # TODO: permissions
authz_middleware.mark_authz_done(request)
logger.info(
f"Public individuals endpoint recieved query params {request.query_params} which resulted in "
f"sub-threshold count: {qct} <= {threshold}")
return Response(settings.INSUFFICIENT_DATA_AVAILABLE)

tissues_count, sampled_tissues = biosample_tissue_stats(filtered_qs)
experiments_count, experiment_types = experiment_type_stats(filtered_qs)
tissues_count, sampled_tissues = await biosample_tissue_stats(filtered_qs)
experiments_count, experiment_types = await experiment_type_stats(filtered_qs)

return Response({
"count": qct,
"biosamples": {
"count": tissues_count,
"sampled_tissue": sampled_tissues
"sampled_tissue": sampled_tissues,
},
"experiments": {
"count": experiments_count,
"experiment_type": experiment_types
"experiment_type": experiment_types,
}
})

Expand All @@ -245,14 +257,16 @@ class BeaconListIndividuals(APIView):
View to return lists of individuals filtered using search terms from katsu's config.json.
Uncensored equivalent of PublicListIndividuals.
"""
def filter_queryset(self, queryset):
async def filter_queryset(self, queryset, can_query_data: bool):
# Check query parameters validity
qp = self.request.query_params
if len(qp) > settings.CONFIG_PUBLIC["rules"]["max_query_parameters"]:
config_public = settings.CONFIG_PUBLIC

if not can_query_data and len(qp) > config_public["rules"]["max_query_parameters"]:
raise ValidationError(f"Wrong number of fields: {len(qp)}")

search_conf = settings.CONFIG_PUBLIC["search"]
field_conf = settings.CONFIG_PUBLIC["fields"]
search_conf = config_public["search"]
field_conf = config_public["fields"]
queryable_fields = {
f: field_conf[f] for section in search_conf for f in section["fields"]
}
Expand All @@ -262,7 +276,7 @@ def filter_queryset(self, queryset):
raise ValidationError(f"Unsupported field used in query: {field}")

field_props = queryable_fields[field]
options = get_field_options(field_props)
options = await get_field_options(field_props, low_counts_censored=not can_query_data)
if value not in options \
and not (
# case-insensitive search on categories
Expand All @@ -281,28 +295,40 @@ def filter_queryset(self, queryset):

return queryset

def get(self, request, *args, **kwargs):
@async_to_sync
async def get(self, request, *_args, **_kwargs):
if not settings.CONFIG_PUBLIC:
return Response(settings.NO_PUBLIC_DATA_AVAILABLE, status=404)
authz_middleware.mark_authz_done(request)
return Response(settings.NO_PUBLIC_DATA_AVAILABLE, status=status.HTTP_404_NOT_FOUND)

# Steps for permissions
# - Obtain all datasets
# - Do a bulk request to authz for permissions to see counts for the data types for each...

base_qs = Individual.objects.all()

# TODO: permissions

try:
filtered_qs = self.filter_queryset(base_qs)
filtered_qs = await self.filter_queryset(base_qs)
except ValidationError as e:
return Response(errors.bad_request_error(
*(e.error_list if hasattr(e, "error_list") else e.error_dict.items())), status=400)
authz_middleware.mark_authz_done(request)
return Response(
errors.bad_request_error(*(e.error_list if hasattr(e, "error_list") else e.error_dict.items())),
status=status.HTTP_400_BAD_REQUEST,
)

tissues_count, sampled_tissues = biosample_tissue_stats(filtered_qs)
experiments_count, experiment_types = experiment_type_stats(filtered_qs)
tissues_count, sampled_tissues = await biosample_tissue_stats(filtered_qs)
experiments_count, experiment_types = await experiment_type_stats(filtered_qs)

return Response({
"matches": filtered_qs.values_list("id", flat=True),
"matches": await filtered_qs.values_list("id", flat=True),
"biosamples": {
"count": tissues_count,
"sampled_tissue": sampled_tissues
"sampled_tissue": sampled_tissues,
},
"experiments": {
"count": experiments_count,
"experiment_type": experiment_types
"experiment_type": experiment_types,
}
})
Loading

0 comments on commit 8958858

Please sign in to comment.