Skip to content

Commit

Permalink
M2-5315: Add conversion support (#1115)
Browse files Browse the repository at this point in the history
* Add conversion support.
* Add use_arbitrary check to the get_arbitrary_info.
  • Loading branch information
ibogretsov authored Feb 22, 2024
1 parent 65636b7 commit 8d42fcc
Show file tree
Hide file tree
Showing 14 changed files with 201 additions and 36 deletions.
1 change: 1 addition & 0 deletions .env.default
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@ CDN__ACCESS_KEY=minioaccess
CDN__REGION=
CDN__BUCKET_ANSWER=media
CDN__BUCKET=media
CDN__BUCKET_OPERATIONS=media
CDN__STORAGE_ADDRESS=http://localhost:9000
CDN__LEGACY_REGION=
CDN__LEGACY_BUCKET=
Expand Down
8 changes: 4 additions & 4 deletions src/apps/answers/deps/preprocess_arbitrary.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,8 @@
async def get_arbitrary_info(applet_id: uuid.UUID | None, session: AsyncSession) -> str | None:
if applet_id:
service = WorkspaceService(session, uuid.uuid4())
server_info = await service.get_arbitrary_info(applet_id)
if server_info and server_info.use_arbitrary:
server_info = await service.get_arbitrary_info_if_use_arbitrary(applet_id)
if server_info:
return server_info.database_uri
return None

Expand Down Expand Up @@ -57,8 +57,8 @@ async def get_answer_session_by_owner_id(
session: AsyncSession = Depends(get_session),
):
service = WorkspaceService(session, uuid.uuid4())
server_info = await service.get_arbitrary_info_by_owner_id(owner_id)
if server_info and server_info.use_arbitrary:
server_info = await service.get_arbitrary_info_by_owner_id_if_use_arbitrary(owner_id)
if server_info:
url = server_info.database_uri
if not url:
yield None
Expand Down
62 changes: 51 additions & 11 deletions src/apps/file/api/file.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import os
import uuid
from functools import partial
from typing import cast
from urllib.parse import quote

import aiofiles
Expand Down Expand Up @@ -38,7 +39,9 @@
from apps.users.services.user import UserService
from apps.workspaces.crud.user_applet_access import UserAppletAccessCRUD
from apps.workspaces.domain.constants import Role
from apps.workspaces.domain.workspace import WorkspaceArbitrary
from apps.workspaces.errors import AnswerViewAccessDenied
from apps.workspaces.service import workspace
from apps.workspaces.service.user_access import UserAccessService
from config import settings
from infrastructure.database.deps import get_session
Expand Down Expand Up @@ -148,6 +151,33 @@ async def convert_not_supported_image(file: UploadFile):
return None


def _get_keys_and_bucket_for_media(orig_key: str) -> tuple[str, str, str]:
if orig_key.lower().endswith(".webm"):
target_key = orig_key + ".mp3"
upload_key = f"{settings.cdn.bucket}/{orig_key}"
bucket = settings.cdn.bucket_operations
else:
target_key = upload_key = orig_key
bucket = settings.cdn.bucket
bucket = cast(str, bucket)
return target_key, upload_key, bucket


def _get_keys_and_bucket_for_image(
orig_key: str, arb_info: WorkspaceArbitrary | None, cdn_client
) -> tuple[str, str, str]:
if orig_key.lower().endswith(".heic"):
target_key = orig_key + ".jpg"
prefix = f"arbitrary-{arb_info.id}" if arb_info else cdn_client.config.bucket
upload_key = f"{prefix}/{orig_key}"
bucket = settings.cdn.bucket_operations
else:
target_key = upload_key = orig_key
bucket = cdn_client.config.bucket
bucket = cast(str, bucket)
return target_key, upload_key, bucket


async def download(
request: FileDownloadRequest = Body(...),
user: User = Depends(get_current_user),
Expand Down Expand Up @@ -253,27 +283,30 @@ async def check_file_uploaded(
):
raise AnswerViewAccessDenied()

workspace_srv = workspace.WorkspaceService(session, user.id)
arb_info = await workspace_srv.get_arbitrary_info_if_use_arbitrary(applet_id)
cdn_client = await select_storage(applet_id, session)
results: list[FileExistenceResponse] = []

for file_id in schema.files:
cleaned_file_id = file_id.strip()

unique = f"{user.id}/{applet_id}"
key = cdn_client.generate_key(FileScopeEnum.ANSWER, unique, cleaned_file_id)
orig_key = cdn_client.generate_key(FileScopeEnum.ANSWER, unique, cleaned_file_id)

target_key, upload_key, bucket = _get_keys_and_bucket_for_image(orig_key, arb_info, cdn_client)

file_existence_factory = partial(
FileExistenceResponse,
key=key,
file_id=file_id,
)

try:
await cdn_client.check_existence(key)
await cdn_client.check_existence(bucket, upload_key)
results.append(
file_existence_factory(
uploaded=True,
url=cdn_client.generate_private_url(key),
url=cdn_client.generate_private_url(target_key),
)
)
except NotFoundError:
Expand Down Expand Up @@ -362,11 +395,12 @@ async def generate_presigned_media_url(
user: User = Depends(get_current_user),
cdn_client: CDNClient = Depends(get_media_bucket),
) -> Response[PresignedUrl]:
key = cdn_client.generate_key(FileScopeEnum.CONTENT, user.id, f"{uuid.uuid4()}/{body.file_name}")
data = cdn_client.generate_presigned_post(key)
orig_key = cdn_client.generate_key(FileScopeEnum.CONTENT, user.id, f"{uuid.uuid4()}/{body.file_name}")
target_key, upload_key, bucket = _get_keys_and_bucket_for_media(orig_key)
data = cdn_client.generate_presigned_post(bucket, upload_key)
return Response(
result=PresignedUrl(
upload_url=data["url"], fields=data["fields"], url=quote(settings.cdn.url.format(key=key), "/:")
upload_url=data["url"], fields=data["fields"], url=quote(settings.cdn.url.format(key=target_key), "/:")
)
)

Expand All @@ -383,13 +417,19 @@ async def generate_presigned_answer_url(
[Role.OWNER, Role.MANAGER, Role.REVIEWER, Role.RESPONDENT],
):
raise AnswerViewAccessDenied()
# TODO: Refactor this part when file storage is covered by tests. Get arbitrary info only once.
workspace_srv = workspace.WorkspaceService(session, user.id)
arb_info = await workspace_srv.get_arbitrary_info_if_use_arbitrary(applet_id)
cdn_client = await select_storage(applet_id, session)
unique = f"{user.id}/{applet_id}"
cleaned_file_id = body.file_id.strip()
key = cdn_client.generate_key(FileScopeEnum.ANSWER, unique, cleaned_file_id)
data = cdn_client.generate_presigned_post(key)
orig_key = cdn_client.generate_key(FileScopeEnum.ANSWER, unique, cleaned_file_id)
target_key, upload_key, bucket = _get_keys_and_bucket_for_image(orig_key, arb_info, cdn_client)
data = cdn_client.generate_presigned_post(bucket, upload_key)
return Response(
result=PresignedUrl(upload_url=data["url"], fields=data["fields"], url=cdn_client.generate_private_url(key))
result=PresignedUrl(
upload_url=data["url"], fields=data["fields"], url=cdn_client.generate_private_url(target_key)
)
)


Expand All @@ -401,7 +441,7 @@ async def generate_presigned_logs_url(
) -> Response[PresignedUrl]:
service = LogFileService(user.id, cdn_client)
key = f"{service.device_key_prefix(device_id=device_id)}/{body.file_id}"
data = cdn_client.generate_presigned_post(key)
data = cdn_client.generate_presigned_post(settings.cdn.bucket, key)
return Response(
result=PresignedUrl(upload_url=data["url"], fields=data["fields"], url=cdn_client.generate_private_url(key))
)
2 changes: 1 addition & 1 deletion src/apps/file/domain.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@ class FileCheckRequest(PublicModel):


class FileExistenceResponse(PublicModel):
key: str
uploaded: bool
url: str | None = None
file_id: str | None = None
Expand All @@ -33,6 +32,7 @@ class FilePresignRequest(PublicModel):

class LogFileExistenceResponse(FileExistenceResponse):
file_size: int | None # file size in bytes
key: str


class FileNameRequest(PublicModel):
Expand Down
2 changes: 1 addition & 1 deletion src/apps/file/services.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ async def _presign(self, url: str | None):
return url
key = self._get_key(url)
wsp_service = workspace.WorkspaceService(self.session, self.user_id)
arbitrary_info = await wsp_service.get_arbitrary_info(self.applet_id)
arbitrary_info = await wsp_service.get_arbitrary_info_if_use_arbitrary(self.applet_id)
legacy_cdn_client = await self.get_legacy_client(arbitrary_info)
return await legacy_cdn_client.generate_presigned_url(key)
elif self._is_regular_file_url_format(url):
Expand Down
2 changes: 1 addition & 1 deletion src/apps/file/storage.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ async def select_storage(
session: AsyncSession,
):
service = workspace.WorkspaceService(session, uuid.uuid4())
info = await service.get_arbitrary_info(applet_id)
info = await service.get_arbitrary_info_if_use_arbitrary(applet_id)
if not info:
config_cdn = CdnConfig(
endpoint_url=settings.cdn.endpoint_url,
Expand Down
129 changes: 125 additions & 4 deletions src/apps/file/test_file.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,12 +22,18 @@
from infrastructure.utility.cdn_client import CDNClient
from infrastructure.utility.cdn_config import CdnConfig

# This id is getting from JSON fixtures for answers
WORKSPACE_ARBITRARY_ID = uuid.UUID("8b83d791-0d27-42c5-8b1d-e0c8d7faf808")
ARBITRARY_BUCKET_NAME = "arbitrary_bucket"


async def set_storage_type(storage_type: str, session: AsyncSession):
workspace_id = uuid.UUID("8b83d791-0d27-42c5-8b1d-e0c8d7faf808")
query: Query = update(UserWorkspaceSchema)
query = query.where(UserWorkspaceSchema.id == workspace_id)
query = query.values(storage_type=storage_type)
query = query.where(UserWorkspaceSchema.id == WORKSPACE_ARBITRARY_ID)
if storage_type == StorageType.AWS:
query = query.values(storage_type=storage_type, storage_bucket=ARBITRARY_BUCKET_NAME)
else:
query = query.values(storage_type=storage_type)
await session.execute(query)


Expand Down Expand Up @@ -205,7 +211,9 @@ async def test_generate_presigned_url_for_answers(
bucket_name = "bucket"
settings.cdn.bucket = bucket_name
settings.cdn.bucket_answer = bucket_name
mocker.patch("apps.workspaces.service.workspace.WorkspaceService.get_arbitrary_info", return_value=None)
mocker.patch(
"apps.workspaces.service.workspace.WorkspaceService.get_arbitrary_info_if_use_arbitrary", return_value=None
)
resp = await client.post(self.answer_upload_url.format(applet_id=applet_one.id), data={"file_id": file_id})
assert resp.status_code == http.HTTPStatus.OK
assert resp.json()["result"]["fields"]["key"] == expected_key
Expand All @@ -228,3 +236,116 @@ async def test_generate_presigned_log_url(self, client: TestClient, device_tom:
),
).key(device_tom, file_name)
assert key == expected_key

@pytest.mark.usefixtures("mock_presigned_post")
@pytest.mark.parametrize("file_name", ("test.webm", "test.WEBM"))
async def test_generate_presigned_media_for_webm_file__conveted_in_url__upload_url_has_operations_bucket(
self, client: TestClient, file_name
) -> None:
await client.login(self.login_url, "[email protected]", "Test1234!")

settings.cdn.bucket_operations = "bucket_operations"
settings.cdn.bucket = "bucket_media"

resp = await client.post(self.upload_media_url, data={"file_name": file_name})
assert resp.status_code == http.HTTPStatus.OK
result = resp.json()["result"]
exp_converted_file_name = file_name + ".mp3"
assert result["fields"]["key"].endswith(file_name)
assert result["url"].endswith(exp_converted_file_name)
assert result["fields"]["key"].startswith(settings.cdn.bucket)
assert settings.cdn.bucket_operations in result["uploadUrl"]

@pytest.mark.usefixtures("mock_presigned_post")
@pytest.mark.parametrize("file_name", ("answer.heic", "answer.HEIC"))
async def test_generate_presigned_for_answer_for_heic_format_not_arbitrary(
self, client: TestClient, mocker: MockerFixture, applet_one: AppletFull, file_name: str
) -> None:
await client.login(self.login_url, "[email protected]", "Test1234!")

settings.cdn.bucket_operations = "bucket_operations"
settings.cdn.bucket_answer = "bucket_answer"
mocker.patch(
"apps.workspaces.service.workspace.WorkspaceService.get_arbitrary_info_if_use_arbitrary", return_value=None
)

resp = await client.post(self.answer_upload_url.format(applet_id=applet_one.id), data={"file_id": file_name})
assert resp.status_code == http.HTTPStatus.OK
result = resp.json()["result"]
exp_converted_file_name = file_name + ".jpg"
assert result["fields"]["key"].endswith(file_name)
assert result["fields"]["key"].startswith(settings.cdn.bucket_answer)
assert result["url"].endswith(exp_converted_file_name)
assert settings.cdn.bucket_answer in result["url"]
assert settings.cdn.bucket_operations in result["uploadUrl"]

@pytest.mark.usefixtures("mock_presigned_post")
async def test_generate_presigned_for_answer_for_heic_format_arbitrary_workspace(
self, client: TestClient, applet_one: AppletFull, session: AsyncSession
) -> None:
await client.login(self.login_url, "[email protected]", "Test1234!")

file_name = "answer.heic"
settings.cdn.bucket_operations = "bucket_operations"
await set_storage_type(StorageType.AWS, session)
resp = await client.post(self.answer_upload_url.format(applet_id=applet_one.id), data={"file_id": file_name})
assert resp.status_code == http.HTTPStatus.OK
result = resp.json()["result"]
exp_converted_file_name = file_name + ".jpg"
assert result["fields"]["key"].endswith(file_name)
assert result["fields"]["key"].startswith(f"arbitrary-{WORKSPACE_ARBITRARY_ID}")
assert result["url"].endswith(exp_converted_file_name)
assert ARBITRARY_BUCKET_NAME in result["url"]
assert settings.cdn.bucket_operations in result["uploadUrl"]

@pytest.mark.usefixtures("mock_presigned_post")
async def test_answer_existance_for_heic_format_not_arbitrary(
self, client: TestClient, tom: User, applet_one: AppletFull, mocker: MockerFixture
) -> None:
await client.login(self.login_url, "[email protected]", "Test1234!")

file_name = "answer.heic"
settings.cdn.bucket_operations = "bucket_operations"
settings.cdn.bucket_answer = "bucket_answer"
mock_check_existance = mocker.patch("infrastructure.utility.cdn_client.CDNClient.check_existence")
mocker.patch(
"apps.workspaces.service.workspace.WorkspaceService.get_arbitrary_info_if_use_arbitrary", return_value=None
)
resp = await client.post(
self.existance_url.format(applet_id=applet_one.id),
data={"files": [file_name]},
)
assert resp.status_code == http.HTTPStatus.OK
result = resp.json()["result"]
exp_converted_file_name = file_name + ".jpg"
check_key = CDNClient.generate_key(FileScopeEnum.ANSWER, f"{tom.id}/{applet_one.id}", file_name)
exp_check_key = f"{settings.cdn.bucket_answer}/{check_key}"

mock_check_existance.assert_awaited_once_with(settings.cdn.bucket_operations, exp_check_key)
assert result[0]["url"].endswith(exp_converted_file_name)
assert settings.cdn.bucket_answer in result[0]["url"]

@pytest.mark.usefixtures("mock_presigned_post")
async def test_answer_existance_for_heic_format_for_arbitrary(
self, client: TestClient, tom: User, applet_one: AppletFull, session: AsyncSession, mocker: MockerFixture
) -> None:
await client.login(self.login_url, "[email protected]", "Test1234!")

file_name = "answer.heic"
settings.cdn.bucket_operations = "bucket_operations"
settings.cdn.bucket_answer = "bucket_answer"
mock_check_existance = mocker.patch("infrastructure.utility.cdn_arbitrary.ArbitraryS3CdnClient.check_existence")
await set_storage_type(StorageType.AWS, session)
resp = await client.post(
self.existance_url.format(applet_id=applet_one.id),
data={"files": [file_name]},
)
assert resp.status_code == http.HTTPStatus.OK
result = resp.json()["result"]
exp_converted_file_name = file_name + ".jpg"
check_key = CDNClient.generate_key(FileScopeEnum.ANSWER, f"{tom.id}/{applet_one.id}", file_name)
exp_check_key = f"arbitrary-{WORKSPACE_ARBITRARY_ID}/{check_key}"

mock_check_existance.assert_awaited_once_with(settings.cdn.bucket_operations, exp_check_key)
assert result[0]["url"].endswith(exp_converted_file_name)
assert ARBITRARY_BUCKET_NAME in result[0]["url"]
4 changes: 3 additions & 1 deletion src/apps/shared/commands/patch_commands.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,9 @@ async def exec_patch(patch: Patch, owner_id: Optional[uuid.UUID]):
async with atomic(session):
if owner_id:
try:
arbitrary = await WorkspaceService(session, owner_id).get_arbitrary_info_by_owner_id(owner_id)
arbitrary = await WorkspaceService(
session, owner_id
).get_arbitrary_info_by_owner_id_if_use_arbitrary(owner_id)
if not arbitrary:
raise WorkspaceNotFoundError("Workspace not found")

Expand Down
2 changes: 1 addition & 1 deletion src/apps/workspaces/commands/arbitrary_server.py
Original file line number Diff line number Diff line change
Expand Up @@ -146,7 +146,7 @@ async def show(
session_maker = session_manager.get_session()
async with session_maker() as session:
if owner_id:
data = await WorkspaceService(session, owner_id).get_arbitrary_info_by_owner_id(owner_id)
data = await WorkspaceService(session, owner_id).get_arbitrary_info_by_owner_id_if_use_arbitrary(owner_id)
if not data:
print("[bold green]" "Arbitrary server not configured" "[/bold green]")
return
Expand Down
8 changes: 4 additions & 4 deletions src/apps/workspaces/service/workspace.py
Original file line number Diff line number Diff line change
Expand Up @@ -241,18 +241,18 @@ async def get_applets_roles_by_priority(self, owner_id, applet_ids):
owner_id, self._user_id, applet_ids
)

async def get_arbitrary_info(self, applet_id: uuid.UUID) -> WorkspaceArbitrary | None:
async def get_arbitrary_info_if_use_arbitrary(self, applet_id: uuid.UUID) -> WorkspaceArbitrary | None:
schema = await UserWorkspaceCRUD(self.session).get_by_applet_id(applet_id)
if not schema:
if not schema or not schema.use_arbitrary:
return None
try:
return WorkspaceArbitrary.from_orm(schema) if schema else None
except ValidationError:
return None

async def get_arbitrary_info_by_owner_id(self, owner_id: uuid.UUID) -> WorkspaceArbitrary | None:
async def get_arbitrary_info_by_owner_id_if_use_arbitrary(self, owner_id: uuid.UUID) -> WorkspaceArbitrary | None:
schema = await UserWorkspaceCRUD(self.session).get_by_user_id(owner_id)
if not schema:
if not schema or not schema.use_arbitrary:
return None
try:
return WorkspaceArbitrary.from_orm(schema) if schema else None
Expand Down
1 change: 1 addition & 0 deletions src/config/cdn.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ class CDNSettings(BaseModel):
region: str | None
bucket: str | None
bucket_answer: str | None
bucket_operations: str | None
secret_key: str | None
access_key: str | None

Expand Down
Loading

0 comments on commit 8d42fcc

Please sign in to comment.