Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Api shared datamodels #134

Merged
merged 12 commits into from
Aug 5, 2024
3 changes: 2 additions & 1 deletion api/.vscode/settings.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
{
"python.testing.pytestArgs": [
"."
".",
"-vv"
],
"python.testing.unittestEnabled": false,
"python.testing.pytestEnabled": true,
Expand Down
12 changes: 5 additions & 7 deletions api/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -12,16 +12,14 @@ WORKDIR /bia-integrator
# only add poetry.lock if it exists (building on local)
ADD ./api/poetry.lock* api/poetry.lock
ADD ./api/pyproject.toml api/pyproject.toml

# add the actual project, which is what is often changed in between two different container builds
ADD . .
ADD bia-shared-datamodels bia-shared-datamodels

WORKDIR /bia-integrator/api

RUN poetry lock
RUN poetry install

# Everything up to here should be reused most times


CMD ["poetry", "run", "uvicorn", "--workers", "4", "--port", "8080", "--log-config", ".api/src/log_config.yml", "--host", "0.0.0.0", "api.app:app"]
# add the actual project, which is what is often changed in between two different container builds
ADD . /bia-integrator

CMD ["poetry", "run", "uvicorn", "--workers", "4", "--port", "8080", "--log-config", "./src/log_config.yml", "--host", "0.0.0.0", "api.app:app"]
Empty file added api/api/__init__.py
Empty file.
31 changes: 31 additions & 0 deletions api/api/app.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
from . import public
from . import private
from .models.repository import repository_create, Repository


from fastapi import FastAPI
from typing import AsyncGenerator


async def repository_dependency() -> AsyncGenerator[Repository, None]:
db = await repository_create(init=False)
try:
yield db
finally:
db.close()


app = FastAPI(
generate_unique_id_function=lambda route: route.name,
# Setting this to true results in duplicated client classes (into *Input and *Output) where the api model has default values
# See https://fastapi.tiangolo.com/how-to/separate-openapi-schemas/#do-not-separate-schemas
separate_input_output_schemas=False,
debug=True,
)

app.openapi_version = "3.0.2"

# app.include_router(private.router, prefix="/v2")
# routes applied in the order they are declared
app.include_router(public.router, prefix="/v2")
app.include_router(private.router, prefix="/v2")
2 changes: 2 additions & 0 deletions api/api/constants.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
OPENAPI_TAG_PUBLIC = "public"
OPENAPI_TAG_PRIVATE = "private"
16 changes: 16 additions & 0 deletions api/api/exceptions.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
from fastapi import HTTPException

STATUS_INVALID_UPDATE = 409


class InvalidUpdateException(HTTPException):
def __init__(self, detail, **kwargs) -> None:
super().__init__(STATUS_INVALID_UPDATE, detail, **kwargs)


STATUS_DOCUMENT_NOT_FOUND = 404


class DocumentNotFound(HTTPException):
def __init__(self, detail, **kwargs) -> None:
super().__init__(STATUS_DOCUMENT_NOT_FOUND, detail, **kwargs)
126 changes: 126 additions & 0 deletions api/api/models/repository.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,126 @@
from motor.motor_asyncio import (
AsyncIOMotorClient,
AsyncIOMotorCollection,
AsyncIOMotorDatabase,
)
import os
from enum import Enum
import bia_shared_datamodels.bia_data_model as shared_data_models
import pymongo
from typing import Any
from .. import exceptions
import datetime

from bson.codec_options import CodecOptions
from bson.datetime_ms import DatetimeMS
from bson.codec_options import TypeCodec, TypeRegistry
from bson.binary import UuidRepresentation


DB_NAME = os.environ["DB_NAME"]
COLLECTION_BIA_INTEGRATOR = "bia_integrator"
COLLECTION_USERS = "users"
COLLECTION_OME_METADATA = "ome_metadata"


class DateCodec(TypeCodec):
python_type = datetime.date # the Python type acted upon by this type codec
bson_type = DatetimeMS # the BSON type acted upon by this type codec

def transform_python(self, value: datetime.date) -> DatetimeMS:
same_day_zero_time = datetime.datetime.combine(
value, datetime.datetime.min.time()
)

return DatetimeMS(value=same_day_zero_time)

def transform_bson(self, value: DatetimeMS) -> datetime.date:
return value.as_datetime().date()


class OverwriteMode(str, Enum):
FAIL = "fail"
ALLOW_IDEMPOTENT = "allow_idempotent"


class Repository:
connection: AsyncIOMotorClient
db: AsyncIOMotorDatabase
users: AsyncIOMotorCollection
biaint: AsyncIOMotorCollection
overwrite_mode: OverwriteMode = OverwriteMode.FAIL

def __init__(self) -> None:
mongo_connstring = os.environ["MONGO_CONNSTRING"]
self.connection = AsyncIOMotorClient(
mongo_connstring, uuidRepresentation="standard", maxPoolSize=10
)
self.db = self.connection.get_database(
DB_NAME,
# Looks like explicitly setting codec_options excludes settings from the client
# so uuid_representation needs to be defined even if already defined in connection
codec_options=CodecOptions(
type_registry=TypeRegistry([DateCodec()]),
uuid_representation=UuidRepresentation.STANDARD,
),
)
self.users = self.db[COLLECTION_USERS]
self.biaint = self.db[COLLECTION_BIA_INTEGRATOR]
self.ome_metadata = self.db[COLLECTION_OME_METADATA]

async def persist_doc(self, model_doc: shared_data_models.DocumentMixin):
try:
return await self.biaint.insert_one(model_doc.model_dump())
except pymongo.errors.DuplicateKeyError as e:
if (
(e.details["code"] == 11000)
and (self.overwrite_mode == OverwriteMode.ALLOW_IDEMPOTENT)
and (await self._model_doc_exists(model_doc))
):
return

raise exceptions.InvalidUpdateException(str(e))

async def get_doc(self, uuid: shared_data_models.UUID, doc_type):
doc = await self._get_doc_raw(uuid=uuid)

if doc is None:
raise exceptions.DocumentNotFound("Study does not exist")

return doc_type(**doc)

async def _model_doc_exists(
self, doc_model: shared_data_models.DocumentMixin
) -> bool:
return await self._doc_exists(doc_model.model_dump())

async def _doc_exists(self, doc: dict) -> bool:
result = await self._get_doc_raw(uuid=doc["uuid"])
if not hasattr(result, "pop"):
return False

result.pop("_id", None)
# usually documents we attempt to insert/modify don't have ids, but pymongo modifies the passed dict and adds _id
# so if doing insert(doc), then on failure calling this, doc will actually have _id even if it didn't have it before insert
doc.pop("_id", None)

return result == doc

async def _get_doc_raw(self, **kwargs) -> Any:
doc = await self.biaint.find_one(kwargs)
doc.pop("_id")

return doc


async def repository_create(init: bool) -> Repository:
repository = Repository()

if init:
pass
# TODO
# await repository._init_collection_biaint()
# await repository._init_collection_users()
# await repository._init_collection_ome_metadata()

return repository
48 changes: 48 additions & 0 deletions api/api/private.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
from fastapi import APIRouter
from pydantic.alias_generators import to_snake

# ?
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Haha, must have added it as a note because I wasn't sure if importing directly would work out, deleted now

import bia_shared_datamodels.bia_data_model as shared_data_models
from .models.repository import Repository
from . import constants
from fastapi import APIRouter, Depends, status

router = APIRouter(
prefix="/private",
# dependencies=[Depends(get_current_user)], TODO
tags=[constants.OPENAPI_TAG_PRIVATE],
)
models_private = [
shared_data_models.Study,
shared_data_models.FileReference,
shared_data_models.ImageRepresentation,
shared_data_models.ExperimentalImagingDataset,
shared_data_models.Specimen,
shared_data_models.ExperimentallyCapturedImage,
shared_data_models.ImageAcquisition,
shared_data_models.SpecimenImagingPrepartionProtocol,
shared_data_models.SpecimenGrowthProtocol,
shared_data_models.BioSample,
shared_data_models.ImageAnnotationDataset,
shared_data_models.AnnotationFileReference,
shared_data_models.DerivedImage,
shared_data_models.AnnotationMethod,
]


def make_post_item(t):
async def post_item(doc: t, db: Repository = Depends()) -> None:
await db.persist_doc(doc)

return post_item


for t in models_private:
router.add_api_route(
"/" + to_snake(t.__name__),
operation_id=f"post{t.__name__}",
summary=f"Create {t.__name__}",
methods=["POST"],
endpoint=make_post_item(t),
status_code=status.HTTP_201_CREATED,
)
47 changes: 47 additions & 0 deletions api/api/public.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
from fastapi import APIRouter, Depends
from pydantic.alias_generators import to_snake

# ?
import bia_shared_datamodels.bia_data_model as shared_data_models
from .models.repository import Repository


router = APIRouter()
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

should the be using the public tag, like the private one is?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yep, I think I missed it, fixed

models_public = [
shared_data_models.Study,
shared_data_models.FileReference,
shared_data_models.ImageRepresentation,
shared_data_models.ExperimentalImagingDataset,
shared_data_models.Specimen,
shared_data_models.ExperimentallyCapturedImage,
shared_data_models.ImageAcquisition,
shared_data_models.SpecimenImagingPrepartionProtocol,
shared_data_models.SpecimenGrowthProtocol,
shared_data_models.BioSample,
shared_data_models.ImageAnnotationDataset,
shared_data_models.AnnotationFileReference,
shared_data_models.DerivedImage,
shared_data_models.AnnotationMethod,
]


def make_get_item(t):
# variables are function-scoped => add wrapper to bind each value of t
# https://eev.ee/blog/2011/04/24/gotcha-python-scoping-closures/

# @TODO: nicer wrapper?
async def get_item(uuid: shared_data_models.UUID, db: Repository = Depends()) -> t:
return await db.get_doc(uuid, t)

return get_item


for t in models_public:
Copy link
Contributor

@sherwoodf sherwoodf Aug 5, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

am i right in thinking that this gets run when app.py imports the public and private endpoints? Just wondering whether it would be better python practice to have methods that get explicitly called in app.py...? (This is a question, not a suggestion, i'm genuinely not sure about the implications)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Agree it would be nice to not run code in global scope at import, was just handwaving the problem until I get an example of a non-generated handler too, so I can see how I'd like to do it but still keep router global (to avoid nesting custom handlers in the factory), and make "the correct way" to use a router easy to tell (e.g. no import first and then a function to attach the generation functionality)

Added a fix, but I'm not sure we'll keep it like this once we have more functionality

router.add_api_route(
f"/{to_snake(t.__name__)}/{{uuid}}",
response_model=t,
operation_id=f"get{t.__name__}",
summary=f"Get {t.__name__}",
methods=["GET"],
endpoint=make_get_item(t),
)
Empty file added api/api/tests/__init__.py
Empty file.
89 changes: 89 additions & 0 deletions api/api/tests/test_minimal.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
"""
WIP minimal tests
"""

from fastapi.testclient import TestClient
import pytest
import uuid as uuid_lib


TEST_SERVER_BASE_URL = "http://localhost.com/v2"


def get_uuid() -> str:
# @TODO: make this constant and require mongo to always be clean?
generated = uuid_lib.uuid4()

return str(generated)


def get_client(**kwargs) -> TestClient:
from fastapi.responses import JSONResponse
from fastapi import Request
import traceback

from ..app import app

@app.exception_handler(Exception)
def generic_exception_handler(request: Request, exc: Exception):
return JSONResponse(
status_code=500,
content=traceback.format_exception(exc, value=exc, tb=exc.__traceback__),
)

return TestClient(app, base_url=TEST_SERVER_BASE_URL, **kwargs)


@pytest.fixture(scope="module")
def api_client() -> TestClient:
client = get_client(raise_server_exceptions=False)
# authenticate_client(client) # @TODO: DELETEME

return client


def test_create_study(api_client: TestClient):
study_uuid = get_uuid()
study = {
"uuid": study_uuid,
"version": 0,
"release_date": "2023-01-31",
"accession_id": study_uuid,
"title": "Test BIA study",
"description": "description",
"licence": "CC_BY_4.0",
"see_also": [],
"model": {"type_name": "Study", "version": 1},
"acknowledgement": "test",
"funding_statement": "test",
"grant": [],
"keyword": [],
"related_publication": [],
"author": [
{
"address": None,
"display_name": "Test name",
"contact_email": "[email protected]",
"orcid": None,
"role": None,
"rorid": None,
"website": None,
"affiliation": [
{
"display_name": "Test",
"address": None,
"rorid": None,
"website": None,
}
],
}
],
"attribute": {},
}

rsp = api_client.post("private/study", json=study)
assert rsp.status_code == 201, rsp.json()

rsp = api_client.get(f"study/{study['uuid']}")
assert rsp.status_code == 200, rsp.text
assert rsp.json() == study
Loading
Loading