Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Get tests to pass #34

Merged
merged 17 commits into from
Jun 18, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions .github/workflows/build-and-push.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,11 +16,11 @@ jobs:
- name: set up Python
uses: actions/setup-python@v2
with:
python-version: '3.11'
python-version: '3.12'
- name: install poetry
run: |
python -m pip install --no-cache-dir poetry==1.8 supervisor

- name: set up docker
run: |
make gha-setup
Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/build-and-test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -20,11 +20,11 @@ jobs:
- name: set up Python
uses: actions/setup-python@v2
with:
python-version: '3.11'
python-version: '3.12'
- name: install poetry
run: |
python -m pip install --no-cache-dir poetry==1.8 supervisor

- name: set up docker
run: make gha-setup

Expand Down
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -257,3 +257,6 @@ test_app/test_infra/*

# temp files
/tmp/*

# build artifacts
requirements.txt
2 changes: 2 additions & 0 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@ RUN apt update && \
apt install -y make gcc && \
apt clean

COPY requirements.txt .
RUN pip install -r requirements.txt
COPY dist/platformics-0.1.0-py3-none-any.whl /tmp/platformics-0.1.0-py3-none-any.whl
RUN cd /tmp/ && pip install platformics-0.1.0-py3-none-any.whl && rm -rf /tmp/*.whl

Expand Down
9 changes: 5 additions & 4 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -20,10 +20,6 @@ help: ## display help for this makefile
@grep -E '^[a-zA-Z_-]+:.*?## .*$$' $(MAKEFILE_LIST) | sort | awk 'BEGIN {FS = ":.*?## "}; {printf "\033[36m%-30s\033[0m %s\n", $$1, $$2}'
@echo "### SHARED FUNCTIONS END ###"

.PHONY: codegen
codegen:
$(MAKE_TEST_APP) codegen

.PHONY: codegen
codegen: ## Run codegen to convert the LinkML schema to a GQL API
$(docker_compose_run) $(CONTAINER) python3 -m platformics.cli.main api generate --schemafile ./schema/schema.yaml --output-prefix .
Expand Down Expand Up @@ -75,7 +71,12 @@ gha-setup: ## Set up the environment in CI
build:
rm -rf dist/*.whl
poetry build
# Export poetry dependency list as a requirements.txt, which makes Docker builds
# faster by not having to reinstall all dependencies every time we build a new wheel.
poetry export --without-hashes --format=requirements.txt > requirements.txt
$(docker_compose) build
$(MAKE_TEST_APP) build
rm requirements.txt

.PHONY: dev ## Launch a container suitable for developing the platformics library
dev:
Expand Down
2 changes: 1 addition & 1 deletion platformics/api/core/gql_loaders.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@

import platformics.database.models as db
from platformics.api.core.errors import PlatformicsError
from platformics.api.core.helpers import get_aggregate_db_query, get_db_query, get_db_rows
from platformics.api.core.query_builder import get_aggregate_db_query, get_db_query, get_db_rows
from platformics.database.connect import AsyncDB
from platformics.security.authorization import CerbosAction

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,15 +18,15 @@

import platformics.database.models as db
from platformics.api.core.errors import PlatformicsError
from platformics.api.core.gql_to_sql import OrderBy, aggregator_map, operator_map
from platformics.api.core.query_input_types import aggregator_map, operator_map, orderBy
from platformics.database.models.base import Base
from platformics.security.authorization import CerbosAction, get_resource_query

E = typing.TypeVar("E", db.File, db.Entity)
T = typing.TypeVar("T")


def apply_order_by(field: str, direction: OrderBy, query: Select) -> Select:
def apply_order_by(field: str, direction: orderBy, query: Select) -> Select:
match direction.value:
case "asc":
query = query.order_by(getattr(query.selected_columns, field).asc())
Expand All @@ -44,9 +44,9 @@ def apply_order_by(field: str, direction: OrderBy, query: Select) -> Select:


class IndexedOrderByClause(TypedDict):
field: dict[str, OrderBy] | dict[str, dict[str, Any]]
field: dict[str, orderBy] | dict[str, dict[str, Any]]
index: int
sort: OrderBy
sort: orderBy


def convert_where_clauses_to_sql(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@


@strawberry.enum
class OrderBy(enum.Enum):
class orderBy(enum.Enum): # noqa
# defaults to nulls last
asc = "asc"
asc_nulls_first = "asc_nulls_first"
Expand Down
7 changes: 4 additions & 3 deletions platformics/api/files.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,8 @@
require_auth_principal,
require_system_user,
)
from platformics.api.core.gql_to_sql import EnumComparators, IntComparators, StrComparators, UUIDComparators
from platformics.api.core.helpers import get_db_rows
from platformics.api.core.query_builder import get_db_rows
from platformics.api.core.query_input_types import EnumComparators, IntComparators, StrComparators, UUIDComparators
from platformics.api.core.strawberry_extensions import DependencyExtension
from platformics.api.types.entities import Entity
from platformics.security.authorization import CerbosAction, get_resource_query
Expand Down Expand Up @@ -262,7 +262,8 @@ async def validate_file(

# Validate data
try:
validator.validate(client=s3_client, bucket=file.namespace, file_path=file.path)
validator(s3_client, file.namespace, file.path).validate()

file_size = s3_client.head_object(Bucket=file.namespace, Key=file.path)["ContentLength"]
except: # noqa
file.status = db.FileStatus.FAILED
Expand Down
7 changes: 5 additions & 2 deletions platformics/api/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,10 @@
from strawberry.schema.config import StrawberryConfig
from strawberry.schema.name_converter import HasGraphQLName, NameConverter

from platformics.api.core.deps import get_auth_principal, get_cerbos_client, get_db_module, get_engine
from platformics.api.core.deps import get_auth_principal, get_cerbos_client, get_db_module, get_engine, get_s3_client
from platformics.api.core.gql_loaders import EntityLoader
from platformics.database.connect import AsyncDB
from platformics.database.models.file import File
from platformics.settings import APISettings

# ------------------------------------------------------------------------------
Expand Down Expand Up @@ -53,10 +54,12 @@ def get_app(settings: APISettings, schema: strawberry.Schema, db_module: typing.
"""
Make sure tests can get their own instances of the app.
"""
File.set_settings(settings)
File.set_s3_client(get_s3_client(settings))
settings = APISettings.model_validate({}) # Workaround for https://github.com/pydantic/pydantic/issues/3753

title = settings.SERVICE_NAME
graphql_app: GraphQLRouter = GraphQLRouter(schema, context_getter=get_context, graphiql=True)
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

graphql_ide is set to graphiql by default

graphql_app: GraphQLRouter = GraphQLRouter(schema, context_getter=get_context)
_app = FastAPI(title=title, debug=settings.DEBUG)
_app.include_router(graphql_app, prefix="/graphql")
# Add a global settings object to the app that we can use as a dependency
Expand Down
4 changes: 2 additions & 2 deletions platformics/codegen/templates/api/types/class_name.py.j2
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ import platformics.database.models as base_db
import database.models as db
import strawberry
import datetime
from platformics.api.core.helpers import get_db_rows, get_aggregate_db_rows
from platformics.api.core.query_builder import get_db_rows, get_aggregate_db_rows
from api.validators.{{cls.snake_name}} import {{cls.name}}CreateInputValidator, {{cls.name}}UpdateInputValidator
{%- if render_files %}
from platformics.api.files import File, FileWhereClause
Expand All @@ -34,7 +34,7 @@ from cerbos.sdk.model import Principal, Resource
from fastapi import Depends
from platformics.api.core.errors import PlatformicsError
from platformics.api.core.deps import get_cerbos_client, get_db_session, require_auth_principal, is_system_user
from platformics.api.core.gql_to_sql import aggregator_map, orderBy, EnumComparators, DatetimeComparators, IntComparators, FloatComparators, StrComparators, UUIDComparators, BoolComparators
from platformics.api.core.query_input_types import aggregator_map, orderBy, EnumComparators, DatetimeComparators, IntComparators, FloatComparators, StrComparators, UUIDComparators, BoolComparators
from platformics.api.core.strawberry_extensions import DependencyExtension
from platformics.security.authorization import CerbosAction, get_resource_query
from sqlalchemy import inspect
Expand Down
119 changes: 85 additions & 34 deletions platformics/support/format_handlers.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@
"""

import gzip
import tempfile
import typing
import io
import json
from abc import abstractmethod
from typing import Protocol

Expand All @@ -17,62 +17,113 @@ class FileFormatHandler(Protocol):
Interface for a file format handler
"""

@classmethod
s3client: S3Client
bucket: str
key: str

def __init__(self, s3client: S3Client, bucket: str, key: str):
self.s3client = s3client
self.bucket = bucket
self.key = key

def contents(self) -> str:
"""
Get the contents of the file
"""
body = self.s3client.get_object(Bucket=self.bucket, Key=self.key, Range="bytes=0-1000000")["Body"]
if self.key.endswith(".gz"):
with gzip.GzipFile(fileobj=body) as fp:
return fp.read().decode("utf-8")
return body.read().decode("utf-8")

@abstractmethod
def validate(cls, client: S3Client, bucket: str, file_path: str) -> None:
def validate(self) -> None:
raise NotImplementedError


class FastaHandler(FileFormatHandler):
"""
Validate FASTA files. Note that even truncated FASTA files are supported:
">" is a valid FASTA file, and so is ">abc" (without a sequence).
"""

def validate(self) -> None:
sequences = 0
for _ in SeqIO.parse(io.StringIO(self.contents()), "fasta"):
sequences += 1
assert sequences > 0


class FastqHandler(FileFormatHandler):
"""
Validate FASTQ files (contain sequencing reads)
Validate FASTQ files. Can't use biopython directly because large file would be truncated.
This removes truncated FASTQ records by assuming 1 read = 4 lines.
"""

@classmethod
def validate(cls, client: S3Client, bucket: str, file_path: str) -> None:
for fp in get_file_preview(client, bucket, file_path):
assert len(list(SeqIO.parse(fp, "fastq"))) > 0
def validate(self) -> None:
# Load file and only keep non-truncated FASTQ records (4 lines per record)
fastq = self.contents().split("\n")
fastq = fastq[: len(fastq) - (len(fastq) % 4)]

# Validate it with SeqIO
reads = 0
for _ in SeqIO.parse(io.StringIO("\n".join(fastq)), "fastq"):
reads += 1
assert reads > 0

class FastaHandler(FileFormatHandler):

class BedHandler(FileFormatHandler):
"""
Validate FASTA files (contain sequences)
Validate BED files using basic checks.
"""

@classmethod
def validate(cls, client: S3Client, bucket: str, file_path: str) -> None:
for fp in get_file_preview(client, bucket, file_path):
assert len(list(SeqIO.parse(fp, "fasta"))) > 0
def validate(self) -> None:
# Ignore last line since it could be truncated
records = self.contents().split("\n")[:-1]
assert len(records) > 0

# BED files must have at least 3 columns - error out if the file incorrectly uses spaces instead of tabs
num_cols = -1
for record in records:
assert len(record.split("\t")) >= 3
# All rows should have the same number of columns
if num_cols == -1:
num_cols = len(record.split("\t"))
else:
assert num_cols == len(record.split("\t"))

def get_file_preview(
client: S3Client,
bucket: str,
file_path: str,
) -> typing.Generator[typing.TextIO, typing.Any, typing.Any]:

class JsonHandler(FileFormatHandler):
"""
Validate JSON files
"""

def validate(self) -> None:
json.loads(self.contents()) # throws an exception for invalid JSON


class ZipHandler(FileFormatHandler):
"""
Get first 1MB of a file and save it in a temporary file
Validate ZIP files
"""
data = client.get_object(Bucket=bucket, Key=file_path, Range="bytes=0-1000000")["Body"].read()
fp = tempfile.NamedTemporaryFile("w+b")
fp.write(data)
fp.flush()

try:
data.decode("utf-8")
with open(fp.name, "r") as fh:
yield fh
except UnicodeDecodeError:
return gzip.open(fp.name, "rt")
def validate(self) -> None:
assert self.key.endswith(".zip") # throws an exception if the file is not a zip file


def get_validator(format: str) -> type[FileFormatHandler]:
"""
Returns the validator for a given file format
"""
if format == "fastq":
return FastqHandler
elif format == "fasta":
if format in ["fa", "fasta"]:
return FastaHandler
elif format == "fastq":
return FastqHandler
elif format == "bed":
return BedHandler
elif format == "json":
return JsonHandler
elif format == "zip":
return ZipHandler
else:
raise Exception(f"Unknown file format '{format}'")
4 changes: 4 additions & 0 deletions test_app/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,10 @@ codegen: ## Run codegen to convert the LinkML schema to a GQL API
test: init ## Run tests
$(docker_compose) exec $(APP_CONTAINER) pytest -vvv

.PHONY: test-file
test-file: init ## Run tests for a specific file, ex: make test-file FILE=tests/test_file.py
$(docker_compose) exec $(APP_CONTAINER) pytest -vvv $(FILE)

.PHONY: restart
restart: ## Restart the GQL service
$(docker_compose_run) $(APP_CONTAINER) supervisorctl restart graphql_api
Expand Down
4 changes: 2 additions & 2 deletions test_app/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,15 +5,15 @@
import strawberry
import uvicorn
from platformics.api.setup import get_app, get_strawberry_config
from platformics.api.core.error_handler import HandleErrors
from platformics.settings import APISettings
from database import models

from api.mutations import Mutation
from api.queries import Query

settings = APISettings.model_validate({}) # Workaround for https://github.com/pydantic/pydantic/issues/3753
strawberry_config = get_strawberry_config()
schema = strawberry.Schema(query=Query, mutation=Mutation, config=strawberry_config)
schema = strawberry.Schema(query=Query, mutation=Mutation, config=get_strawberry_config(), extensions=[HandleErrors()])


# Create and run app
Expand Down
2 changes: 1 addition & 1 deletion test_app/schema/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -333,4 +333,4 @@ classes:
range: string
description:
range: string
```
```
Loading
Loading