-
Notifications
You must be signed in to change notification settings - Fork 2
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[CZID-8457] Create a file - part 2 #65
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -4,7 +4,7 @@ | |
from platformics.database.connect import SyncDB | ||
from test_infra import factories as fa | ||
from mypy_boto3_s3.client import S3Client | ||
from database.models import File | ||
from database.models import File, SequencingRead | ||
import sqlalchemy as sa | ||
|
||
|
||
|
@@ -27,7 +27,7 @@ async def test_file_validation( | |
file = session.execute(sa.select(File)).scalars().one() | ||
|
||
valid_fastq_file = "test_infra/fixtures/test1.fastq" | ||
moto_client.put_object(Bucket=file.namespace, Key=file.path.lstrip("/"), Body=open(valid_fastq_file, "rb")) | ||
moto_client.put_object(Bucket=file.namespace, Key=file.path, Body=open(valid_fastq_file, "rb")) | ||
|
||
# Mark upload complete | ||
query = f""" | ||
|
@@ -64,7 +64,7 @@ async def test_invalid_fastq( | |
session.commit() | ||
file = session.execute(sa.select(File)).scalars().one() | ||
|
||
moto_client.put_object(Bucket=file.namespace, Key=file.path.lstrip("/"), Body="this is not a fastq file") | ||
moto_client.put_object(Bucket=file.namespace, Key=file.path, Body="this is not a fastq file") | ||
|
||
# Mark upload complete | ||
query = f""" | ||
|
@@ -80,3 +80,56 @@ async def test_invalid_fastq( | |
res = await gql_client.query(query, member_projects=[project1_id]) | ||
fileinfo = res["data"]["markUploadComplete"] | ||
assert fileinfo["status"] == "FAILED" | ||
|
||
|
||
# Test creating files | ||
@pytest.mark.asyncio | ||
@pytest.mark.parametrize( | ||
"member_projects,project_id,entity_field", | ||
[ | ||
([456], 123, "sequence_file"), # Can't create file for entity you don't have access to | ||
([123], 123, "does_not_exist"), # Can't create file for entity that isn't connected to a valid file type | ||
([123], 123, "sequence_file"), # Can create file for entity you have access to | ||
], | ||
) | ||
async def test_create_file( | ||
member_projects: list[int], | ||
project_id: int, | ||
entity_field: str, | ||
sync_db: SyncDB, | ||
gql_client: GQLTestClient, | ||
) -> None: | ||
user_id = 12345 | ||
|
||
# Create mock data | ||
with sync_db.session() as session: | ||
fa.SessionStorage.set_session(session) | ||
fa.SequencingReadFactory.create(owner_user_id=user_id, collection_id=project_id) | ||
fa.FileFactory.update_file_ids() | ||
session.commit() | ||
|
||
sequencing_read = session.execute(sa.select(SequencingRead)).scalars().one() | ||
entity_id = sequencing_read.entity_id | ||
|
||
# Try creating a file | ||
mutation = f""" | ||
mutation MyQuery {{ | ||
createFile(entityId: "{entity_id}", entityFieldName: "{entity_field}", | ||
fileName: "test.fastq", fileSize: 123, fileFormat: "fastq") {{ | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Do we have (/want/need?) to get the file size info as part of the 'create' call? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Not sure actually, I don't think we need the file size to be stored in the DB for the app to work. The only use case I can think of is: we want basic stats about how much certain uploaded files take up on S3 and querying the DB would be much faster than querying S3 (but not guaranteed to be accurate). |
||
url | ||
expiration | ||
method | ||
protocol | ||
fields | ||
}} | ||
}} | ||
""" | ||
output = await gql_client.query(mutation, member_projects=member_projects) | ||
|
||
# If don't have access to this entity, or trying to link an entity with a made up file type, should get an error | ||
if project_id not in member_projects or entity_field == "does_not_exist": | ||
assert output["data"] is None | ||
assert output["errors"] is not None | ||
return | ||
|
||
assert output["data"]["createFile"]["url"] == "https://local-bucket.s3.amazonaws.com/" | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can we return a fully formed signed URL instead of all the parts of one? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I tried putting it all in a URL but it didn't work for some reason, I had to send the fields in the body of the POST request |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,4 +1,5 @@ | ||
import factory | ||
import faker | ||
import sqlalchemy as sa | ||
from database.models import File, FileStatus, Sample, SequencingRead | ||
from factory import Faker, fuzzy | ||
|
@@ -11,6 +12,12 @@ | |
Faker.add_provider(EnumProvider) | ||
|
||
|
||
def generate_relative_file_path(obj) -> str: # type: ignore | ||
fake = faker.Faker() | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think we need to use factory.Faker() here There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. |
||
# Can't use absolute=True param because that requires newer version of faker than faker-biology supports | ||
return fake.file_path(depth=3, extension=obj.file_format).lstrip("/") | ||
|
||
|
||
# TODO, this is a lame singleton to prevent this library from | ||
# requiring an active SA session at import-time. We should try | ||
# to refactor it out when we know more about factoryboy | ||
|
@@ -50,8 +57,7 @@ class Meta: | |
status = factory.Faker("enum", enum_cls=FileStatus) | ||
protocol = fuzzy.FuzzyChoice(["S3", "GCP"]) | ||
namespace = fuzzy.FuzzyChoice(["local-bucket", "remote-bucket"]) | ||
# path = factory.LazyAttribute(lambda o: {factory.Faker("file_path", depth=3, extension=o.file_format)}) | ||
path = factory.Faker("file_path", depth=3) | ||
path = factory.LazyAttribute(lambda o: generate_relative_file_path(o)) | ||
file_format = fuzzy.FuzzyChoice(["fasta", "fastq", "bam"]) | ||
compression_type = fuzzy.FuzzyChoice(["gz", "bz2", "xz"]) | ||
size = fuzzy.FuzzyInteger(1024, 1024 * 1024 * 1024) # Between 1k and 1G | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
While i'm at it, I'm removing all lstrip calls and making sure the file paths generated in the seed script don't start with
/