Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[CZID-8699] Multipart uploads #87

Merged
merged 6 commits into from
Oct 9, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
101 changes: 86 additions & 15 deletions entities/api/files.py
Original file line number Diff line number Diff line change
@@ -1,18 +1,26 @@
import json
import typing
import database.models as db
import strawberry
import uuid
import uuid6
from fastapi import Depends
from mypy_boto3_s3.client import S3Client
from mypy_boto3_sts.client import STSClient
from platformics.api.core.deps import get_s3_client
from platformics.api.core.strawberry_extensions import DependencyExtension
from api.strawberry import strawberry_sqlalchemy_mapper
from strawberry.scalars import JSON

from cerbos.sdk.client import CerbosClient
from cerbos.sdk.model import Principal
from platformics.api.core.deps import get_cerbos_client, get_db_session, require_auth_principal, get_settings
from platformics.api.core.deps import (
get_cerbos_client,
get_db_session,
require_auth_principal,
get_settings,
get_sts_client,
)
from platformics.api.core.settings import APISettings
from sqlalchemy.ext.asyncio import AsyncSession
from platformics.security.authorization import CerbosAction, get_resource_query
Expand All @@ -33,6 +41,17 @@ class SignedURL:
fields: typing.Optional[JSON] = None # type: ignore


@strawberry.type
class MultipartUploadCredentials:
protocol: str
namespace: str
path: str
access_key_id: str
secret_access_key: str
session_token: str
expiration: str


# Define graphQL input types so we can pass a "file" JSON to mutations.
# Keep them separate so we can control which fields are required.
@strawberry.input()
Expand Down Expand Up @@ -71,7 +90,7 @@ def download_link(


# ------------------------------------------------------------------------------
# Mutations
# Utilities
# ------------------------------------------------------------------------------


Expand All @@ -91,6 +110,49 @@ async def validate_file(
await session.commit()


def generate_multipart_upload_token(
new_file: db.File,
expiration: int = 3600,
sts_client: STSClient = Depends(get_sts_client),
) -> MultipartUploadCredentials:
policy = {
Copy link
Contributor Author

@robertaboukhalil robertaboukhalil Oct 9, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is there any dis/advantage to supporting both multipart file uploads and single-file uploads limited to 5gb? I think the main advantage to supporting both would be if there are some clients that have a hard time with the more complicated flow (ex: does the JS AWS client support multipart uploads?)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, the JS client supports multipart uploads according to this, but I remember Jerry patched that library but not sure for what exactly. But we could totally support both, I don't think it hurts

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm a huge fan of supporting One Right Way to do something w/this project. So I'm fine with merging this for now, and adding support back in for single-part-uploads if/when we run into problems with clients. 🤷‍♀️

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sounds good!

"Version": "2012-10-17",
"Statement": [
{
"Sid": "AllowSampleUploads",
"Effect": "Allow",
"Action": [
"s3:GetObject",
"s3:PutObject",
"s3:CreateMultipartUpload",
"s3:AbortMultipartUpload",
"s3:ListMultipartUploadParts",
],
"Resource": f"arn:aws:s3:::{new_file.namespace}/{new_file.path}",
}
],
}

# Generate an STS token to allow users to
token_name = f"file-upload-token-{uuid6.uuid7()}"
creds = sts_client.get_federation_token(Name=token_name, Policy=json.dumps(policy), DurationSeconds=expiration)

return MultipartUploadCredentials(
protocol="S3",
namespace=new_file.namespace,
path=new_file.path,
access_key_id=creds["Credentials"]["AccessKeyId"],
secret_access_key=creds["Credentials"]["SecretAccessKey"],
session_token=creds["Credentials"]["SessionToken"],
expiration=creds["Credentials"]["Expiration"].isoformat(),
)


# ------------------------------------------------------------------------------
# Mutations
# ------------------------------------------------------------------------------


@strawberry.mutation(extensions=[DependencyExtension()])
async def mark_upload_complete(
file_id: uuid.UUID,
Expand Down Expand Up @@ -120,12 +182,13 @@ async def create_file(
cerbos_client: CerbosClient = Depends(get_cerbos_client),
principal: Principal = Depends(require_auth_principal),
s3_client: S3Client = Depends(get_s3_client),
sts_client: STSClient = Depends(get_sts_client),
settings: APISettings = Depends(get_settings),
) -> db.File:
new_file = await create_or_upload_file(
entity_id, entity_field_name, file, -1, session, cerbos_client, principal, s3_client, settings
entity_id, entity_field_name, file, -1, session, cerbos_client, principal, s3_client, sts_client, settings
)
assert isinstance(new_file, db.File) # this is to reassure mypy that we are in fact returning db.File
assert isinstance(new_file, db.File) # reassure mypy that we're returning the right type
return new_file


Expand All @@ -139,13 +202,23 @@ async def upload_file(
cerbos_client: CerbosClient = Depends(get_cerbos_client),
principal: Principal = Depends(require_auth_principal),
s3_client: S3Client = Depends(get_s3_client),
sts_client: STSClient = Depends(get_sts_client),
settings: APISettings = Depends(get_settings),
) -> SignedURL:
new_file = await create_or_upload_file(
entity_id, entity_field_name, file, expiration, session, cerbos_client, principal, s3_client, settings
) -> MultipartUploadCredentials:
credentials = await create_or_upload_file(
entity_id,
entity_field_name,
file,
expiration,
session,
cerbos_client,
principal,
s3_client,
sts_client,
settings,
)
assert isinstance(new_file, SignedURL) # this is to reassure mypy that we are in fact returning a SignedURL
return new_file
assert isinstance(credentials, MultipartUploadCredentials) # reassure mypy that we're returning the right type
return credentials


async def create_or_upload_file(
Expand All @@ -157,8 +230,9 @@ async def create_or_upload_file(
cerbos_client: CerbosClient = Depends(get_cerbos_client),
principal: Principal = Depends(require_auth_principal),
s3_client: S3Client = Depends(get_s3_client),
sts_client: STSClient = Depends(get_sts_client),
settings: APISettings = Depends(get_settings),
) -> db.File | SignedURL:
) -> db.File | MultipartUploadCredentials:
# Basic validation
if "/" in file.name:
raise Exception("File name should not contain /")
Expand Down Expand Up @@ -210,9 +284,6 @@ async def create_or_upload_file(
await validate_file(new_file, session, s3_client)
return new_file

# If new file, create a signed URL
# If new file, create an STS token for multipart upload
else:
response = s3_client.generate_presigned_post(Bucket=new_file.namespace, Key=new_file.path, ExpiresIn=expiration)
return SignedURL(
url=response["url"], fields=response["fields"], protocol="https", method="POST", expiration=expiration
)
return generate_multipart_upload_token(new_file, expiration, sts_client)
4 changes: 2 additions & 2 deletions entities/api/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
from platformics.database.connect import AsyncDB
from strawberry.fastapi import GraphQLRouter
from api.strawberry import strawberry_sqlalchemy_mapper
from api.files import File, SignedURL, mark_upload_complete, create_file, upload_file
from api.files import File, MultipartUploadCredentials, mark_upload_complete, create_file, upload_file

######################
# Strawberry-GraphQL #
Expand Down Expand Up @@ -75,7 +75,7 @@ class Mutation:

# File management
create_file: File = create_file
upload_file: SignedURL = upload_file
upload_file: MultipartUploadCredentials = upload_file
mark_upload_complete: File = mark_upload_complete


Expand Down
15 changes: 11 additions & 4 deletions entities/api/schema.graphql

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

118 changes: 96 additions & 22 deletions entities/api/schema.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading