Skip to content

Commit

Permalink
M2-5315: Add conversion support
Browse files Browse the repository at this point in the history
  • Loading branch information
ibogretsov committed Feb 22, 2024
1 parent a096cbe commit 27cb2c7
Show file tree
Hide file tree
Showing 7 changed files with 179 additions and 22 deletions.
1 change: 1 addition & 0 deletions .env.default
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@ CDN__ACCESS_KEY=minioaccess
CDN__REGION=
CDN__BUCKET_ANSWER=media
CDN__BUCKET=media
CDN__BUCKET_OPERATIONS=media
CDN__STORAGE_ADDRESS=http://localhost:9000
CDN__LEGACY_REGION=
CDN__LEGACY_BUCKET=
Expand Down
62 changes: 51 additions & 11 deletions src/apps/file/api/file.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import os
import uuid
from functools import partial
from typing import cast
from urllib.parse import quote

import aiofiles
Expand Down Expand Up @@ -38,7 +39,9 @@
from apps.users.services.user import UserService
from apps.workspaces.crud.user_applet_access import UserAppletAccessCRUD
from apps.workspaces.domain.constants import Role
from apps.workspaces.domain.workspace import WorkspaceArbitrary
from apps.workspaces.errors import AnswerViewAccessDenied
from apps.workspaces.service import workspace
from apps.workspaces.service.user_access import UserAccessService
from config import settings
from infrastructure.database.deps import get_session
Expand Down Expand Up @@ -148,6 +151,33 @@ async def convert_not_supported_image(file: UploadFile):
return None


def _get_keys_and_bucket_for_media(orig_key: str) -> tuple[str, str, str]:
if orig_key.endswith(".webm"):
target_key = orig_key + ".mp3"
upload_key = f"{settings.cdn.bucket}/{orig_key}"
bucket = settings.cdn.bucket_operations
else:
target_key = upload_key = orig_key
bucket = settings.cdn.bucket
bucket = cast(str, bucket)
return target_key, upload_key, bucket


def _get_keys_and_bucket_for_image(
orig_key: str, arb_info: WorkspaceArbitrary | None, cdn_client
) -> tuple[str, str, str]:
if orig_key.endswith(".heic"):
target_key = orig_key + ".jpg"
prefix = f"arbitrary-{arb_info.id}/" if arb_info else f"{cdn_client.config.bucket}/"
upload_key = f"{prefix}/{orig_key}"
bucket = settings.cdn.bucket_operations
else:
target_key = upload_key = orig_key
bucket = cdn_client.config.bucket
bucket = cast(str, bucket)
return target_key, upload_key, bucket


async def download(
request: FileDownloadRequest = Body(...),
user: User = Depends(get_current_user),
Expand Down Expand Up @@ -253,27 +283,30 @@ async def check_file_uploaded(
):
raise AnswerViewAccessDenied()

workspace_srv = workspace.WorkspaceService(session, user.id)
arb_info = await workspace_srv.get_arbitrary_info(applet_id)
cdn_client = await select_storage(applet_id, session)
results: list[FileExistenceResponse] = []

for file_id in schema.files:
cleaned_file_id = file_id.strip()

unique = f"{user.id}/{applet_id}"
key = cdn_client.generate_key(FileScopeEnum.ANSWER, unique, cleaned_file_id)
orig_key = cdn_client.generate_key(FileScopeEnum.ANSWER, unique, cleaned_file_id)

target_key, upload_key, bucket = _get_keys_and_bucket_for_image(orig_key, arb_info, cdn_client)

file_existence_factory = partial(
FileExistenceResponse,
key=key,
file_id=file_id,
)

try:
await cdn_client.check_existence(key)
await cdn_client.check_existence(bucket, upload_key)
results.append(
file_existence_factory(
uploaded=True,
url=cdn_client.generate_private_url(key),
url=cdn_client.generate_private_url(target_key),
)
)
except NotFoundError:
Expand Down Expand Up @@ -362,11 +395,12 @@ async def generate_presigned_media_url(
user: User = Depends(get_current_user),
cdn_client: CDNClient = Depends(get_media_bucket),
) -> Response[PresignedUrl]:
key = cdn_client.generate_key(FileScopeEnum.CONTENT, user.id, f"{uuid.uuid4()}/{body.file_name}")
data = cdn_client.generate_presigned_post(key)
orig_key = cdn_client.generate_key(FileScopeEnum.CONTENT, user.id, f"{uuid.uuid4()}/{body.file_name}")
target_key, upload_key, bucket = _get_keys_and_bucket_for_media(orig_key)
data = cdn_client.generate_presigned_post(bucket, upload_key)
return Response(
result=PresignedUrl(
upload_url=data["url"], fields=data["fields"], url=quote(settings.cdn.url.format(key=key), "/:")
upload_url=data["url"], fields=data["fields"], url=quote(settings.cdn.url.format(key=target_key), "/:")
)
)

Expand All @@ -383,13 +417,19 @@ async def generate_presigned_answer_url(
[Role.OWNER, Role.MANAGER, Role.REVIEWER, Role.RESPONDENT],
):
raise AnswerViewAccessDenied()
# TODO: Refactor this part when file storage is covered by tests. Get arbitrary info only once.
workspace_srv = workspace.WorkspaceService(session, user.id)
arb_info = await workspace_srv.get_arbitrary_info(applet_id)
cdn_client = await select_storage(applet_id, session)
unique = f"{user.id}/{applet_id}"
cleaned_file_id = body.file_id.strip()
key = cdn_client.generate_key(FileScopeEnum.ANSWER, unique, cleaned_file_id)
data = cdn_client.generate_presigned_post(key)
orig_key = cdn_client.generate_key(FileScopeEnum.ANSWER, unique, cleaned_file_id)
target_key, upload_key, bucket = _get_keys_and_bucket_for_image(orig_key, arb_info, cdn_client)
data = cdn_client.generate_presigned_post(bucket, upload_key)
return Response(
result=PresignedUrl(upload_url=data["url"], fields=data["fields"], url=cdn_client.generate_private_url(key))
result=PresignedUrl(
upload_url=data["url"], fields=data["fields"], url=cdn_client.generate_private_url(target_key)
)
)


Expand All @@ -401,7 +441,7 @@ async def generate_presigned_logs_url(
) -> Response[PresignedUrl]:
service = LogFileService(user.id, cdn_client)
key = f"{service.device_key_prefix(device_id=device_id)}/{body.file_id}"
data = cdn_client.generate_presigned_post(key)
data = cdn_client.generate_presigned_post(settings.cdn.bucket, key)
return Response(
result=PresignedUrl(upload_url=data["url"], fields=data["fields"], url=cdn_client.generate_private_url(key))
)
2 changes: 1 addition & 1 deletion src/apps/file/domain.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@ class FileCheckRequest(PublicModel):


class FileExistenceResponse(PublicModel):
key: str
uploaded: bool
url: str | None = None
file_id: str | None = None
Expand All @@ -33,6 +32,7 @@ class FilePresignRequest(PublicModel):

class LogFileExistenceResponse(FileExistenceResponse):
file_size: int | None # file size in bytes
key: str


class FileNameRequest(PublicModel):
Expand Down
121 changes: 118 additions & 3 deletions src/apps/file/test_file.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,12 +22,18 @@
from infrastructure.utility.cdn_client import CDNClient
from infrastructure.utility.cdn_config import CdnConfig

# This id is getting from JSON fixtures for answers
WORKSPACE_ARBITRARY_ID = uuid.UUID("8b83d791-0d27-42c5-8b1d-e0c8d7faf808")
ARBITRARY_BUCKET_NAME = "arbitrary_bucket"


async def set_storage_type(storage_type: str, session: AsyncSession):
workspace_id = uuid.UUID("8b83d791-0d27-42c5-8b1d-e0c8d7faf808")
query: Query = update(UserWorkspaceSchema)
query = query.where(UserWorkspaceSchema.id == workspace_id)
query = query.values(storage_type=storage_type)
query = query.where(UserWorkspaceSchema.id == WORKSPACE_ARBITRARY_ID)
if storage_type == StorageType.AWS:
query = query.values(storage_type=storage_type, storage_bucket=ARBITRARY_BUCKET_NAME)
else:
query = query.values(storage_type=storage_type)
await session.execute(query)


Expand Down Expand Up @@ -228,3 +234,112 @@ async def test_generate_presigned_log_url(self, client: TestClient, device_tom:
),
).key(device_tom, file_name)
assert key == expected_key

@pytest.mark.usefixtures("mock_presigned_post")
async def test_generate_presigned_media_for_webm_file__conveted_in_url__upload_url_has_operations_bucket(
self, client: TestClient
) -> None:
await client.login(self.login_url, "[email protected]", "Test1234!")

file_name = "test.webm"
settings.cdn.bucket_operations = "bucket_operations"
settings.cdn.bucket = "bucket_media"

resp = await client.post(self.upload_media_url, data={"file_name": file_name})
assert resp.status_code == http.HTTPStatus.OK
result = resp.json()["result"]
exp_converted_file_name = file_name + ".mp3"
assert result["fields"]["key"].endswith(file_name)
assert result["url"].endswith(exp_converted_file_name)
assert result["fields"]["key"].startswith(settings.cdn.bucket)
assert settings.cdn.bucket_operations in result["uploadUrl"]

@pytest.mark.usefixtures("mock_presigned_post")
async def test_generate_presigned_for_answer_for_heic_format_not_arbitrary(
self, client: TestClient, mocker: MockerFixture, applet_one: AppletFull
) -> None:
await client.login(self.login_url, "[email protected]", "Test1234!")

file_name = "answer.heic"
settings.cdn.bucket_operations = "bucket_operations"
settings.cdn.bucket_answer = "bucket_answer"
mocker.patch("apps.workspaces.service.workspace.WorkspaceService.get_arbitrary_info", return_value=None)

resp = await client.post(self.answer_upload_url.format(applet_id=applet_one.id), data={"file_id": file_name})
assert resp.status_code == http.HTTPStatus.OK
result = resp.json()["result"]
exp_converted_file_name = file_name + ".jpg"
assert result["fields"]["key"].endswith(file_name)
assert result["fields"]["key"].startswith(settings.cdn.bucket_answer)
assert result["url"].endswith(exp_converted_file_name)
assert settings.cdn.bucket_answer in result["url"]
assert settings.cdn.bucket_operations in result["uploadUrl"]

@pytest.mark.usefixtures("mock_presigned_post")
async def test_generate_presigned_for_answer_for_heic_format_arbitrary_workspace(
self, client: TestClient, applet_one: AppletFull, session: AsyncSession
) -> None:
await client.login(self.login_url, "[email protected]", "Test1234!")

file_name = "answer.heic"
settings.cdn.bucket_operations = "bucket_operations"
await set_storage_type(StorageType.AWS, session)
resp = await client.post(self.answer_upload_url.format(applet_id=applet_one.id), data={"file_id": file_name})
assert resp.status_code == http.HTTPStatus.OK
result = resp.json()["result"]
exp_converted_file_name = file_name + ".jpg"
assert result["fields"]["key"].endswith(file_name)
assert result["fields"]["key"].startswith(f"arbitrary-{WORKSPACE_ARBITRARY_ID}")
assert result["url"].endswith(exp_converted_file_name)
assert ARBITRARY_BUCKET_NAME in result["url"]
assert settings.cdn.bucket_operations in result["uploadUrl"]

@pytest.mark.usefixtures("mock_presigned_post")
async def test_answer_existance_for_heic_format_not_arbitrary(
self, client: TestClient, tom: User, applet_one: AppletFull, mocker: MockerFixture
) -> None:
await client.login(self.login_url, "[email protected]", "Test1234!")

file_name = "answer.heic"
settings.cdn.bucket_operations = "bucket_operations"
settings.cdn.bucket_answer = "bucket_answer"
mock_check_existance = mocker.patch("infrastructure.utility.cdn_client.CDNClient.check_existence")
mocker.patch("apps.workspaces.service.workspace.WorkspaceService.get_arbitrary_info", return_value=None)
resp = await client.post(
self.existance_url.format(applet_id=applet_one.id),
data={"files": [file_name]},
)
assert resp.status_code == http.HTTPStatus.OK
result = resp.json()["result"]
exp_converted_file_name = file_name + ".jpg"
check_key = CDNClient.generate_key(FileScopeEnum.ANSWER, f"{tom.id}/{applet_one.id}", file_name)
exp_check_key = f"{settings.cdn.bucket_answer}/{check_key}"

mock_check_existance.assert_awaited_once_with(settings.cdn.bucket_operations, exp_check_key)
assert result[0]["url"].endswith(exp_converted_file_name)
assert settings.cdn.bucket_answer in result[0]["url"]

@pytest.mark.usefixtures("mock_presigned_post")
async def test_answer_existance_for_heic_format_for_arbitrary(
self, client: TestClient, tom: User, applet_one: AppletFull, session: AsyncSession, mocker: MockerFixture
) -> None:
await client.login(self.login_url, "[email protected]", "Test1234!")

file_name = "answer.heic"
settings.cdn.bucket_operations = "bucket_operations"
settings.cdn.bucket_answer = "bucket_answer"
mock_check_existance = mocker.patch("infrastructure.utility.cdn_arbitrary.ArbitraryS3CdnClient.check_existence")
await set_storage_type(StorageType.AWS, session)
resp = await client.post(
self.existance_url.format(applet_id=applet_one.id),
data={"files": [file_name]},
)
assert resp.status_code == http.HTTPStatus.OK
result = resp.json()["result"]
exp_converted_file_name = file_name + ".jpg"
check_key = CDNClient.generate_key(FileScopeEnum.ANSWER, f"{tom.id}/{applet_one.id}", file_name)
exp_check_key = f"arbitrary-{WORKSPACE_ARBITRARY_ID}/{check_key}"

mock_check_existance.assert_awaited_once_with(settings.cdn.bucket_operations, exp_check_key)
assert result[0]["url"].endswith(exp_converted_file_name)
assert ARBITRARY_BUCKET_NAME in result[0]["url"]
1 change: 1 addition & 0 deletions src/config/cdn.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ class CDNSettings(BaseModel):
region: str | None
bucket: str | None
bucket_answer: str | None
bucket_operations: str | None
secret_key: str | None
access_key: str | None

Expand Down
2 changes: 1 addition & 1 deletion src/infrastructure/utility/cdn_arbitrary.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ def _upload(self, path, body: BinaryIO):
blob_client = self.client.get_blob_client(blob=path)
blob_client.upload_blob(body)

def _check_existence(self, key: str):
def _check_existence(self, bucket: str, key: str):
blob_client = self.client.get_blob_client(self.default_container_name, blob=key)
return blob_client.exists()

Expand Down
12 changes: 6 additions & 6 deletions src/infrastructure/utility/cdn_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,17 +57,17 @@ async def upload(self, path, body: BinaryIO):
future = executor.submit(self._upload, path, body)
await asyncio.wrap_future(future)

def _check_existence(self, key: str):
def _check_existence(self, bucket: str, key: str):
try:
return self.client.head_object(Bucket=self.config.bucket, Key=key)
return self.client.head_object(Bucket=bucket, Key=key)
except ClientError:
raise NotFoundError

async def check_existence(self, key: str):
async def check_existence(self, bucket: str, key: str):
if self.env == "testing":
return
with ThreadPoolExecutor() as executor:
future = executor.submit(self._check_existence, key)
future = executor.submit(self._check_existence, bucket, key)
return await asyncio.wrap_future(future)

def download(self, key):
Expand Down Expand Up @@ -110,6 +110,6 @@ async def list_object(self, key: str):
result = await asyncio.wrap_future(future)
return result.get("Contents", [])

def generate_presigned_post(self, key):
def generate_presigned_post(self, bucket, key):
# Not needed ThreadPoolExecutor because there is no any IO operation (no API calls to s3)
return self.client.generate_presigned_post(self.config.bucket, key, ExpiresIn=self.config.ttl_signed_urls)
return self.client.generate_presigned_post(bucket, key, ExpiresIn=self.config.ttl_signed_urls)

0 comments on commit 27cb2c7

Please sign in to comment.