From 27cb2c7c241f2f0baaf5010f1f5176d2048c31c7 Mon Sep 17 00:00:00 2001 From: ibogretsov Date: Tue, 20 Feb 2024 19:19:22 +0300 Subject: [PATCH] M2-5315: Add conversion support --- .env.default | 1 + src/apps/file/api/file.py | 62 ++++++++-- src/apps/file/domain.py | 2 +- src/apps/file/test_file.py | 121 +++++++++++++++++++- src/config/cdn.py | 1 + src/infrastructure/utility/cdn_arbitrary.py | 2 +- src/infrastructure/utility/cdn_client.py | 12 +- 7 files changed, 179 insertions(+), 22 deletions(-) diff --git a/.env.default b/.env.default index c373273ae24..1305aaa5384 100644 --- a/.env.default +++ b/.env.default @@ -60,6 +60,7 @@ CDN__ACCESS_KEY=minioaccess CDN__REGION= CDN__BUCKET_ANSWER=media CDN__BUCKET=media +CDN__BUCKET_OPERATIONS=media CDN__STORAGE_ADDRESS=http://localhost:9000 CDN__LEGACY_REGION= CDN__LEGACY_BUCKET= diff --git a/src/apps/file/api/file.py b/src/apps/file/api/file.py index 47312bf99c1..dad6936765e 100644 --- a/src/apps/file/api/file.py +++ b/src/apps/file/api/file.py @@ -4,6 +4,7 @@ import os import uuid from functools import partial +from typing import cast from urllib.parse import quote import aiofiles @@ -38,7 +39,9 @@ from apps.users.services.user import UserService from apps.workspaces.crud.user_applet_access import UserAppletAccessCRUD from apps.workspaces.domain.constants import Role +from apps.workspaces.domain.workspace import WorkspaceArbitrary from apps.workspaces.errors import AnswerViewAccessDenied +from apps.workspaces.service import workspace from apps.workspaces.service.user_access import UserAccessService from config import settings from infrastructure.database.deps import get_session @@ -148,6 +151,33 @@ async def convert_not_supported_image(file: UploadFile): return None +def _get_keys_and_bucket_for_media(orig_key: str) -> tuple[str, str, str]: + if orig_key.endswith(".webm"): + target_key = orig_key + ".mp3" + upload_key = f"{settings.cdn.bucket}/{orig_key}" + bucket = settings.cdn.bucket_operations + else: + target_key = upload_key = orig_key + bucket = settings.cdn.bucket + bucket = cast(str, bucket) + return target_key, upload_key, bucket + + +def _get_keys_and_bucket_for_image( + orig_key: str, arb_info: WorkspaceArbitrary | None, cdn_client +) -> tuple[str, str, str]: + if orig_key.endswith(".heic"): + target_key = orig_key + ".jpg" + prefix = f"arbitrary-{arb_info.id}/" if arb_info else f"{cdn_client.config.bucket}/" + upload_key = f"{prefix}/{orig_key}" + bucket = settings.cdn.bucket_operations + else: + target_key = upload_key = orig_key + bucket = cdn_client.config.bucket + bucket = cast(str, bucket) + return target_key, upload_key, bucket + + async def download( request: FileDownloadRequest = Body(...), user: User = Depends(get_current_user), @@ -253,6 +283,8 @@ async def check_file_uploaded( ): raise AnswerViewAccessDenied() + workspace_srv = workspace.WorkspaceService(session, user.id) + arb_info = await workspace_srv.get_arbitrary_info(applet_id) cdn_client = await select_storage(applet_id, session) results: list[FileExistenceResponse] = [] @@ -260,20 +292,21 @@ async def check_file_uploaded( cleaned_file_id = file_id.strip() unique = f"{user.id}/{applet_id}" - key = cdn_client.generate_key(FileScopeEnum.ANSWER, unique, cleaned_file_id) + orig_key = cdn_client.generate_key(FileScopeEnum.ANSWER, unique, cleaned_file_id) + + target_key, upload_key, bucket = _get_keys_and_bucket_for_image(orig_key, arb_info, cdn_client) file_existence_factory = partial( FileExistenceResponse, - key=key, file_id=file_id, ) try: - await cdn_client.check_existence(key) + await cdn_client.check_existence(bucket, upload_key) results.append( file_existence_factory( uploaded=True, - url=cdn_client.generate_private_url(key), + url=cdn_client.generate_private_url(target_key), ) ) except NotFoundError: @@ -362,11 +395,12 @@ async def generate_presigned_media_url( user: User = Depends(get_current_user), cdn_client: CDNClient = Depends(get_media_bucket), ) -> Response[PresignedUrl]: - key = cdn_client.generate_key(FileScopeEnum.CONTENT, user.id, f"{uuid.uuid4()}/{body.file_name}") - data = cdn_client.generate_presigned_post(key) + orig_key = cdn_client.generate_key(FileScopeEnum.CONTENT, user.id, f"{uuid.uuid4()}/{body.file_name}") + target_key, upload_key, bucket = _get_keys_and_bucket_for_media(orig_key) + data = cdn_client.generate_presigned_post(bucket, upload_key) return Response( result=PresignedUrl( - upload_url=data["url"], fields=data["fields"], url=quote(settings.cdn.url.format(key=key), "/:") + upload_url=data["url"], fields=data["fields"], url=quote(settings.cdn.url.format(key=target_key), "/:") ) ) @@ -383,13 +417,19 @@ async def generate_presigned_answer_url( [Role.OWNER, Role.MANAGER, Role.REVIEWER, Role.RESPONDENT], ): raise AnswerViewAccessDenied() + # TODO: Refactor this part when file storage is covered by tests. Get arbitrary info only once. + workspace_srv = workspace.WorkspaceService(session, user.id) + arb_info = await workspace_srv.get_arbitrary_info(applet_id) cdn_client = await select_storage(applet_id, session) unique = f"{user.id}/{applet_id}" cleaned_file_id = body.file_id.strip() - key = cdn_client.generate_key(FileScopeEnum.ANSWER, unique, cleaned_file_id) - data = cdn_client.generate_presigned_post(key) + orig_key = cdn_client.generate_key(FileScopeEnum.ANSWER, unique, cleaned_file_id) + target_key, upload_key, bucket = _get_keys_and_bucket_for_image(orig_key, arb_info, cdn_client) + data = cdn_client.generate_presigned_post(bucket, upload_key) return Response( - result=PresignedUrl(upload_url=data["url"], fields=data["fields"], url=cdn_client.generate_private_url(key)) + result=PresignedUrl( + upload_url=data["url"], fields=data["fields"], url=cdn_client.generate_private_url(target_key) + ) ) @@ -401,7 +441,7 @@ async def generate_presigned_logs_url( ) -> Response[PresignedUrl]: service = LogFileService(user.id, cdn_client) key = f"{service.device_key_prefix(device_id=device_id)}/{body.file_id}" - data = cdn_client.generate_presigned_post(key) + data = cdn_client.generate_presigned_post(settings.cdn.bucket, key) return Response( result=PresignedUrl(upload_url=data["url"], fields=data["fields"], url=cdn_client.generate_private_url(key)) ) diff --git a/src/apps/file/domain.py b/src/apps/file/domain.py index 634180ca923..30c0deeb6fd 100644 --- a/src/apps/file/domain.py +++ b/src/apps/file/domain.py @@ -21,7 +21,6 @@ class FileCheckRequest(PublicModel): class FileExistenceResponse(PublicModel): - key: str uploaded: bool url: str | None = None file_id: str | None = None @@ -33,6 +32,7 @@ class FilePresignRequest(PublicModel): class LogFileExistenceResponse(FileExistenceResponse): file_size: int | None # file size in bytes + key: str class FileNameRequest(PublicModel): diff --git a/src/apps/file/test_file.py b/src/apps/file/test_file.py index 4aebfc8e6de..3be6a7e14d7 100644 --- a/src/apps/file/test_file.py +++ b/src/apps/file/test_file.py @@ -22,12 +22,18 @@ from infrastructure.utility.cdn_client import CDNClient from infrastructure.utility.cdn_config import CdnConfig +# This id is getting from JSON fixtures for answers +WORKSPACE_ARBITRARY_ID = uuid.UUID("8b83d791-0d27-42c5-8b1d-e0c8d7faf808") +ARBITRARY_BUCKET_NAME = "arbitrary_bucket" + async def set_storage_type(storage_type: str, session: AsyncSession): - workspace_id = uuid.UUID("8b83d791-0d27-42c5-8b1d-e0c8d7faf808") query: Query = update(UserWorkspaceSchema) - query = query.where(UserWorkspaceSchema.id == workspace_id) - query = query.values(storage_type=storage_type) + query = query.where(UserWorkspaceSchema.id == WORKSPACE_ARBITRARY_ID) + if storage_type == StorageType.AWS: + query = query.values(storage_type=storage_type, storage_bucket=ARBITRARY_BUCKET_NAME) + else: + query = query.values(storage_type=storage_type) await session.execute(query) @@ -228,3 +234,112 @@ async def test_generate_presigned_log_url(self, client: TestClient, device_tom: ), ).key(device_tom, file_name) assert key == expected_key + + @pytest.mark.usefixtures("mock_presigned_post") + async def test_generate_presigned_media_for_webm_file__conveted_in_url__upload_url_has_operations_bucket( + self, client: TestClient + ) -> None: + await client.login(self.login_url, "tom@mindlogger.com", "Test1234!") + + file_name = "test.webm" + settings.cdn.bucket_operations = "bucket_operations" + settings.cdn.bucket = "bucket_media" + + resp = await client.post(self.upload_media_url, data={"file_name": file_name}) + assert resp.status_code == http.HTTPStatus.OK + result = resp.json()["result"] + exp_converted_file_name = file_name + ".mp3" + assert result["fields"]["key"].endswith(file_name) + assert result["url"].endswith(exp_converted_file_name) + assert result["fields"]["key"].startswith(settings.cdn.bucket) + assert settings.cdn.bucket_operations in result["uploadUrl"] + + @pytest.mark.usefixtures("mock_presigned_post") + async def test_generate_presigned_for_answer_for_heic_format_not_arbitrary( + self, client: TestClient, mocker: MockerFixture, applet_one: AppletFull + ) -> None: + await client.login(self.login_url, "tom@mindlogger.com", "Test1234!") + + file_name = "answer.heic" + settings.cdn.bucket_operations = "bucket_operations" + settings.cdn.bucket_answer = "bucket_answer" + mocker.patch("apps.workspaces.service.workspace.WorkspaceService.get_arbitrary_info", return_value=None) + + resp = await client.post(self.answer_upload_url.format(applet_id=applet_one.id), data={"file_id": file_name}) + assert resp.status_code == http.HTTPStatus.OK + result = resp.json()["result"] + exp_converted_file_name = file_name + ".jpg" + assert result["fields"]["key"].endswith(file_name) + assert result["fields"]["key"].startswith(settings.cdn.bucket_answer) + assert result["url"].endswith(exp_converted_file_name) + assert settings.cdn.bucket_answer in result["url"] + assert settings.cdn.bucket_operations in result["uploadUrl"] + + @pytest.mark.usefixtures("mock_presigned_post") + async def test_generate_presigned_for_answer_for_heic_format_arbitrary_workspace( + self, client: TestClient, applet_one: AppletFull, session: AsyncSession + ) -> None: + await client.login(self.login_url, "tom@mindlogger.com", "Test1234!") + + file_name = "answer.heic" + settings.cdn.bucket_operations = "bucket_operations" + await set_storage_type(StorageType.AWS, session) + resp = await client.post(self.answer_upload_url.format(applet_id=applet_one.id), data={"file_id": file_name}) + assert resp.status_code == http.HTTPStatus.OK + result = resp.json()["result"] + exp_converted_file_name = file_name + ".jpg" + assert result["fields"]["key"].endswith(file_name) + assert result["fields"]["key"].startswith(f"arbitrary-{WORKSPACE_ARBITRARY_ID}") + assert result["url"].endswith(exp_converted_file_name) + assert ARBITRARY_BUCKET_NAME in result["url"] + assert settings.cdn.bucket_operations in result["uploadUrl"] + + @pytest.mark.usefixtures("mock_presigned_post") + async def test_answer_existance_for_heic_format_not_arbitrary( + self, client: TestClient, tom: User, applet_one: AppletFull, mocker: MockerFixture + ) -> None: + await client.login(self.login_url, "tom@mindlogger.com", "Test1234!") + + file_name = "answer.heic" + settings.cdn.bucket_operations = "bucket_operations" + settings.cdn.bucket_answer = "bucket_answer" + mock_check_existance = mocker.patch("infrastructure.utility.cdn_client.CDNClient.check_existence") + mocker.patch("apps.workspaces.service.workspace.WorkspaceService.get_arbitrary_info", return_value=None) + resp = await client.post( + self.existance_url.format(applet_id=applet_one.id), + data={"files": [file_name]}, + ) + assert resp.status_code == http.HTTPStatus.OK + result = resp.json()["result"] + exp_converted_file_name = file_name + ".jpg" + check_key = CDNClient.generate_key(FileScopeEnum.ANSWER, f"{tom.id}/{applet_one.id}", file_name) + exp_check_key = f"{settings.cdn.bucket_answer}/{check_key}" + + mock_check_existance.assert_awaited_once_with(settings.cdn.bucket_operations, exp_check_key) + assert result[0]["url"].endswith(exp_converted_file_name) + assert settings.cdn.bucket_answer in result[0]["url"] + + @pytest.mark.usefixtures("mock_presigned_post") + async def test_answer_existance_for_heic_format_for_arbitrary( + self, client: TestClient, tom: User, applet_one: AppletFull, session: AsyncSession, mocker: MockerFixture + ) -> None: + await client.login(self.login_url, "tom@mindlogger.com", "Test1234!") + + file_name = "answer.heic" + settings.cdn.bucket_operations = "bucket_operations" + settings.cdn.bucket_answer = "bucket_answer" + mock_check_existance = mocker.patch("infrastructure.utility.cdn_arbitrary.ArbitraryS3CdnClient.check_existence") + await set_storage_type(StorageType.AWS, session) + resp = await client.post( + self.existance_url.format(applet_id=applet_one.id), + data={"files": [file_name]}, + ) + assert resp.status_code == http.HTTPStatus.OK + result = resp.json()["result"] + exp_converted_file_name = file_name + ".jpg" + check_key = CDNClient.generate_key(FileScopeEnum.ANSWER, f"{tom.id}/{applet_one.id}", file_name) + exp_check_key = f"arbitrary-{WORKSPACE_ARBITRARY_ID}/{check_key}" + + mock_check_existance.assert_awaited_once_with(settings.cdn.bucket_operations, exp_check_key) + assert result[0]["url"].endswith(exp_converted_file_name) + assert ARBITRARY_BUCKET_NAME in result[0]["url"] diff --git a/src/config/cdn.py b/src/config/cdn.py index c92dbf02976..3b5fdbfacb8 100644 --- a/src/config/cdn.py +++ b/src/config/cdn.py @@ -12,6 +12,7 @@ class CDNSettings(BaseModel): region: str | None bucket: str | None bucket_answer: str | None + bucket_operations: str | None secret_key: str | None access_key: str | None diff --git a/src/infrastructure/utility/cdn_arbitrary.py b/src/infrastructure/utility/cdn_arbitrary.py index b3f6e350179..920fc8b9a8b 100644 --- a/src/infrastructure/utility/cdn_arbitrary.py +++ b/src/infrastructure/utility/cdn_arbitrary.py @@ -60,7 +60,7 @@ def _upload(self, path, body: BinaryIO): blob_client = self.client.get_blob_client(blob=path) blob_client.upload_blob(body) - def _check_existence(self, key: str): + def _check_existence(self, bucket: str, key: str): blob_client = self.client.get_blob_client(self.default_container_name, blob=key) return blob_client.exists() diff --git a/src/infrastructure/utility/cdn_client.py b/src/infrastructure/utility/cdn_client.py index 0cac62c7d83..133de59bc03 100644 --- a/src/infrastructure/utility/cdn_client.py +++ b/src/infrastructure/utility/cdn_client.py @@ -57,17 +57,17 @@ async def upload(self, path, body: BinaryIO): future = executor.submit(self._upload, path, body) await asyncio.wrap_future(future) - def _check_existence(self, key: str): + def _check_existence(self, bucket: str, key: str): try: - return self.client.head_object(Bucket=self.config.bucket, Key=key) + return self.client.head_object(Bucket=bucket, Key=key) except ClientError: raise NotFoundError - async def check_existence(self, key: str): + async def check_existence(self, bucket: str, key: str): if self.env == "testing": return with ThreadPoolExecutor() as executor: - future = executor.submit(self._check_existence, key) + future = executor.submit(self._check_existence, bucket, key) return await asyncio.wrap_future(future) def download(self, key): @@ -110,6 +110,6 @@ async def list_object(self, key: str): result = await asyncio.wrap_future(future) return result.get("Contents", []) - def generate_presigned_post(self, key): + def generate_presigned_post(self, bucket, key): # Not needed ThreadPoolExecutor because there is no any IO operation (no API calls to s3) - return self.client.generate_presigned_post(self.config.bucket, key, ExpiresIn=self.config.ttl_signed_urls) + return self.client.generate_presigned_post(bucket, key, ExpiresIn=self.config.ttl_signed_urls)