From b135e6b4f3bd3360400d8208f491bf2190afb467 Mon Sep 17 00:00:00 2001 From: Robert Aboukhalil Date: Fri, 22 Sep 2023 10:10:46 -0700 Subject: [PATCH 1/3] Add tests --- entities/api/tests/test_file_writes.py | 53 +++++++++++++++++++++++++- 1 file changed, 52 insertions(+), 1 deletion(-) diff --git a/entities/api/tests/test_file_writes.py b/entities/api/tests/test_file_writes.py index 31943d19..f73a136b 100644 --- a/entities/api/tests/test_file_writes.py +++ b/entities/api/tests/test_file_writes.py @@ -4,7 +4,7 @@ from platformics.database.connect import SyncDB from test_infra import factories as fa from mypy_boto3_s3.client import S3Client -from database.models import File +from database.models import File, SequencingRead import sqlalchemy as sa @@ -80,3 +80,54 @@ async def test_invalid_fastq( res = await gql_client.query(query, member_projects=[project1_id]) fileinfo = res["data"]["markUploadComplete"] assert fileinfo["status"] == "FAILED" + +# Test creating files +@pytest.mark.asyncio +@pytest.mark.parametrize( + "member_projects,project_id,entity_field", + [ + ([456], 123, "sequence_file"), # Can't create file for entity you don't have access to + ([123], 123, "does_not_exist"), # Can't create file for entity that isn't connected to a valid file type + ([123], 123, "sequence_file"), # Can create file for entity you have access to + ], +) +async def test_create_file( + member_projects: list[int], + project_id: int, + entity_field: str, + sync_db: SyncDB, + gql_client: GQLTestClient, +) -> None: + user_id = 12345 + + # Create mock data + with sync_db.session() as session: + fa.SessionStorage.set_session(session) + fa.SequencingReadFactory.create(owner_user_id=user_id, collection_id=project_id) + fa.FileFactory.update_file_ids() + session.commit() + + sequencing_read = session.execute(sa.select(SequencingRead)).scalars().one() + entity_id = sequencing_read.entity_id + + # Try creating a file + mutation = f""" + mutation MyQuery {{ + createFile(entityId: "{entity_id}", entityFieldName: "{entity_field}", fileName: "test.fastq", fileSize: 123, fileFormat: "fastq") {{ + url + expiration + method + protocol + fields + }} + }} + """ + output = await gql_client.query(mutation, member_projects=member_projects) + + # If don't have access to this entity, or trying to link an entity with a made up file type, should get an error + if project_id not in member_projects or entity_field == "does_not_exist": + assert output["data"] is None + assert output["errors"] is not None + return + + assert output["data"]["createFile"]["url"] == "https://local-bucket.s3.amazonaws.com/" From fc3a521dca995c8735cc27f6c2655ccb1f8c3287 Mon Sep 17 00:00:00 2001 From: Robert Aboukhalil Date: Fri, 22 Sep 2023 10:15:12 -0700 Subject: [PATCH 2/3] Fix lint --- entities/api/tests/test_file_writes.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/entities/api/tests/test_file_writes.py b/entities/api/tests/test_file_writes.py index f73a136b..2a8baa51 100644 --- a/entities/api/tests/test_file_writes.py +++ b/entities/api/tests/test_file_writes.py @@ -81,14 +81,15 @@ async def test_invalid_fastq( fileinfo = res["data"]["markUploadComplete"] assert fileinfo["status"] == "FAILED" + # Test creating files @pytest.mark.asyncio @pytest.mark.parametrize( "member_projects,project_id,entity_field", [ - ([456], 123, "sequence_file"), # Can't create file for entity you don't have access to - ([123], 123, "does_not_exist"), # Can't create file for entity that isn't connected to a valid file type - ([123], 123, "sequence_file"), # Can create file for entity you have access to + ([456], 123, "sequence_file"), # Can't create file for entity you don't have access to + ([123], 123, "does_not_exist"), # Can't create file for entity that isn't connected to a valid file type + ([123], 123, "sequence_file"), # Can create file for entity you have access to ], ) async def test_create_file( @@ -113,7 +114,8 @@ async def test_create_file( # Try creating a file mutation = f""" mutation MyQuery {{ - createFile(entityId: "{entity_id}", entityFieldName: "{entity_field}", fileName: "test.fastq", fileSize: 123, fileFormat: "fastq") {{ + createFile(entityId: "{entity_id}", entityFieldName: "{entity_field}", + fileName: "test.fastq", fileSize: 123, fileFormat: "fastq") {{ url expiration method From a99809fa19779844a60ac96f386099bbe147579c Mon Sep 17 00:00:00 2001 From: Robert Aboukhalil Date: Fri, 22 Sep 2023 10:57:46 -0700 Subject: [PATCH 3/3] Remove lstrip and update seed script --- entities/api/files.py | 6 +++--- entities/api/tests/test_file_writes.py | 4 ++-- entities/test_infra/factories.py | 10 ++++++++-- 3 files changed, 13 insertions(+), 7 deletions(-) diff --git a/entities/api/files.py b/entities/api/files.py index 7a55450c..685aa15e 100644 --- a/entities/api/files.py +++ b/entities/api/files.py @@ -38,7 +38,7 @@ def download_link( ) -> typing.Optional[SignedURL]: if not self.path: # type: ignore return None - key = self.path.lstrip("/") # type: ignore + key = self.path # type: ignore bucket_name = self.namespace # type: ignore url = s3_client.generate_presigned_url( ClientMethod="get_object", Params={"Bucket": bucket_name, "Key": key}, ExpiresIn=expiration @@ -62,7 +62,7 @@ async def mark_upload_complete( validator = get_validator(file.file_format) try: - file_size = validator.validate(s3_client, file.namespace, file.path.lstrip("/")) + file_size = validator.validate(s3_client, file.namespace, file.path) except: # noqa file.status = db.FileStatus.FAILED else: @@ -122,7 +122,7 @@ async def create_file( await session.commit() # Create a signed URL - response = s3_client.generate_presigned_post(Bucket=file.namespace, Key=file.path.lstrip("/"), ExpiresIn=expiration) + response = s3_client.generate_presigned_post(Bucket=file.namespace, Key=file.path, ExpiresIn=expiration) return SignedURL( url=response["url"], fields=response["fields"], protocol="https", method="POST", expiration=expiration ) diff --git a/entities/api/tests/test_file_writes.py b/entities/api/tests/test_file_writes.py index 2a8baa51..2c2d8990 100644 --- a/entities/api/tests/test_file_writes.py +++ b/entities/api/tests/test_file_writes.py @@ -27,7 +27,7 @@ async def test_file_validation( file = session.execute(sa.select(File)).scalars().one() valid_fastq_file = "test_infra/fixtures/test1.fastq" - moto_client.put_object(Bucket=file.namespace, Key=file.path.lstrip("/"), Body=open(valid_fastq_file, "rb")) + moto_client.put_object(Bucket=file.namespace, Key=file.path, Body=open(valid_fastq_file, "rb")) # Mark upload complete query = f""" @@ -64,7 +64,7 @@ async def test_invalid_fastq( session.commit() file = session.execute(sa.select(File)).scalars().one() - moto_client.put_object(Bucket=file.namespace, Key=file.path.lstrip("/"), Body="this is not a fastq file") + moto_client.put_object(Bucket=file.namespace, Key=file.path, Body="this is not a fastq file") # Mark upload complete query = f""" diff --git a/entities/test_infra/factories.py b/entities/test_infra/factories.py index 9795281b..9098ff42 100644 --- a/entities/test_infra/factories.py +++ b/entities/test_infra/factories.py @@ -1,4 +1,5 @@ import factory +import faker import sqlalchemy as sa from database.models import File, FileStatus, Sample, SequencingRead from factory import Faker, fuzzy @@ -11,6 +12,12 @@ Faker.add_provider(EnumProvider) +def generate_relative_file_path(obj) -> str: # type: ignore + fake = faker.Faker() + # Can't use absolute=True param because that requires newer version of faker than faker-biology supports + return fake.file_path(depth=3, extension=obj.file_format).lstrip("/") + + # TODO, this is a lame singleton to prevent this library from # requiring an active SA session at import-time. We should try # to refactor it out when we know more about factoryboy @@ -50,8 +57,7 @@ class Meta: status = factory.Faker("enum", enum_cls=FileStatus) protocol = fuzzy.FuzzyChoice(["S3", "GCP"]) namespace = fuzzy.FuzzyChoice(["local-bucket", "remote-bucket"]) - # path = factory.LazyAttribute(lambda o: {factory.Faker("file_path", depth=3, extension=o.file_format)}) - path = factory.Faker("file_path", depth=3) + path = factory.LazyAttribute(lambda o: generate_relative_file_path(o)) file_format = fuzzy.FuzzyChoice(["fasta", "fastq", "bam"]) compression_type = fuzzy.FuzzyChoice(["gz", "bz2", "xz"]) size = fuzzy.FuzzyInteger(1024, 1024 * 1024 * 1024) # Between 1k and 1G