Skip to content

Commit

Permalink
Feature/fr optimize (#32)
Browse files Browse the repository at this point in the history
* add ! processing case if face regions for image are not detected
  • Loading branch information
vitali-yanushchyk-valor authored Jun 14, 2024
1 parent 6e1ac70 commit 9e8440e
Show file tree
Hide file tree
Showing 33 changed files with 1,196 additions and 725 deletions.
19 changes: 16 additions & 3 deletions src/hope_dedup_engine/apps/api/serializers.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,13 @@
from typing import Any

from rest_framework import serializers

from hope_dedup_engine.apps.api.models import DeduplicationSet
from hope_dedup_engine.apps.api.models.deduplication import Duplicate, IgnoredKeyPair, Image
from hope_dedup_engine.apps.api.models.deduplication import (
Duplicate,
IgnoredKeyPair,
Image,
)


class DeduplicationSetSerializer(serializers.ModelSerializer):
Expand All @@ -10,7 +16,14 @@ class DeduplicationSetSerializer(serializers.ModelSerializer):
class Meta:
model = DeduplicationSet
exclude = ("deleted",)
read_only_fields = "external_system", "created_at", "created_by", "deleted", "updated_at", "updated_by"
read_only_fields = (
"external_system",
"created_at",
"created_by",
"deleted",
"updated_at",
"updated_by",
)


class ImageSerializer(serializers.ModelSerializer):
Expand All @@ -24,7 +37,7 @@ class EntrySerializer(serializers.Serializer):
reference_pk = serializers.SerializerMethodField()
filename = serializers.SerializerMethodField()

def __init__(self, prefix: str, *args, **kwargs) -> None:
def __init__(self, prefix: str, *args: Any, **kwargs: Any) -> None:
self._prefix = prefix
super().__init__(*args, **kwargs)

Expand Down
88 changes: 69 additions & 19 deletions src/hope_dedup_engine/apps/api/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,16 @@
HDETokenAuthentication,
UserAndDeduplicationSetAreOfTheSameSystem,
)
from hope_dedup_engine.apps.api.const import DEDUPLICATION_SET_FILTER, DEDUPLICATION_SET_PARAM
from hope_dedup_engine.apps.api.const import (
DEDUPLICATION_SET_FILTER,
DEDUPLICATION_SET_PARAM,
)
from hope_dedup_engine.apps.api.models import DeduplicationSet
from hope_dedup_engine.apps.api.models.deduplication import Duplicate, IgnoredKeyPair, Image
from hope_dedup_engine.apps.api.models.deduplication import (
Duplicate,
IgnoredKeyPair,
Image,
)
from hope_dedup_engine.apps.api.serializers import (
DeduplicationSetSerializer,
DuplicateSerializer,
Expand All @@ -36,17 +43,29 @@


class DeduplicationSetViewSet(
mixins.ListModelMixin, mixins.CreateModelMixin, mixins.DestroyModelMixin, viewsets.GenericViewSet
mixins.ListModelMixin,
mixins.CreateModelMixin,
mixins.DestroyModelMixin,
viewsets.GenericViewSet,
):
authentication_classes = (HDETokenAuthentication,)
permission_classes = IsAuthenticated, AssignedToExternalSystem, UserAndDeduplicationSetAreOfTheSameSystem
permission_classes = (
IsAuthenticated,
AssignedToExternalSystem,
UserAndDeduplicationSetAreOfTheSameSystem,
)
serializer_class = DeduplicationSetSerializer

def get_queryset(self) -> QuerySet:
return DeduplicationSet.objects.filter(external_system=self.request.user.external_system, deleted=False)
return DeduplicationSet.objects.filter(
external_system=self.request.user.external_system, deleted=False
)

def perform_create(self, serializer: Serializer) -> None:
serializer.save(created_by=self.request.user, external_system=self.request.user.external_system)
serializer.save(
created_by=self.request.user,
external_system=self.request.user.external_system,
)

def perform_destroy(self, instance: DeduplicationSet) -> None:
instance.updated_by = self.request.user
Expand All @@ -70,18 +89,24 @@ def process(self, request: Request, pk: UUID | None = None) -> Response:
self._start_processing(deduplication_set)
return Response({MESSAGE: STARTED})
case DeduplicationSet.State.PROCESSING:
return Response({MESSAGE: ALREADY_PROCESSING}, status=status.HTTP_400_BAD_REQUEST)
return Response(
{MESSAGE: ALREADY_PROCESSING}, status=status.HTTP_400_BAD_REQUEST
)


class ImageViewSet(
nested_viewsets.NestedViewSetMixin,
nested_viewsets.NestedViewSetMixin[Image],
mixins.ListModelMixin,
mixins.CreateModelMixin,
mixins.DestroyModelMixin,
viewsets.GenericViewSet,
):
authentication_classes = (HDETokenAuthentication,)
permission_classes = IsAuthenticated, AssignedToExternalSystem, UserAndDeduplicationSetAreOfTheSameSystem
permission_classes = (
IsAuthenticated,
AssignedToExternalSystem,
UserAndDeduplicationSetAreOfTheSameSystem,
)
serializer_class = ImageSerializer
queryset = Image.objects.all()
parent_lookup_kwargs = {
Expand All @@ -105,22 +130,26 @@ def perform_destroy(self, instance: Image) -> None:

@dataclass
class ListDataWrapper:
data: list[dict]
data: list[dict[str, Any]]

def __setitem__(self, key: str, value: Any) -> None:
for item in self.data:
item[key] = value


class WrapRequestDataMixin:
def initialize_request(self, request: Request, *args: Any, **kwargs: Any) -> Request:
def initialize_request(
self, request: Request, *args: Any, **kwargs: Any
) -> Request:
request = super().initialize_request(request, *args, **kwargs)
request._full_data = ListDataWrapper(request.data)
return request


class UnwrapRequestDataMixin:
def initialize_request(self, request: Request, *args: Any, **kwargs: Any) -> Request:
def initialize_request(
self, request: Request, *args: Any, **kwargs: Any
) -> Request:
request = super().initialize_request(request, *args, **kwargs)
request._full_data = request._full_data.data
return request
Expand All @@ -130,13 +159,17 @@ def initialize_request(self, request: Request, *args: Any, **kwargs: Any) -> Req
# UnwrapRequestDataMixin, and ListDataWrapper to make it work with list of objects
class BulkImageViewSet(
UnwrapRequestDataMixin,
nested_viewsets.NestedViewSetMixin,
nested_viewsets.NestedViewSetMixin[Image],
WrapRequestDataMixin,
mixins.CreateModelMixin,
viewsets.GenericViewSet,
):
authentication_classes = (HDETokenAuthentication,)
permission_classes = IsAuthenticated, AssignedToExternalSystem, UserAndDeduplicationSetAreOfTheSameSystem
permission_classes = (
IsAuthenticated,
AssignedToExternalSystem,
UserAndDeduplicationSetAreOfTheSameSystem,
)
serializer_class = ImageSerializer
queryset = Image.objects.all()
parent_lookup_kwargs = {
Expand All @@ -148,7 +181,9 @@ def get_serializer(self, *args: Any, **kwargs: Any) -> Serializer:

def perform_create(self, serializer: Serializer) -> None:
super().perform_create(serializer)
if deduplication_set := serializer.instance[0].deduplication_set if serializer.instance else None:
if deduplication_set := (
serializer.instance[0].deduplication_set if serializer.instance else None
):
deduplication_set.updated_by = self.request.user
deduplication_set.save()

Expand All @@ -161,9 +196,17 @@ def clear(self, request: Request, deduplication_set_pk: str) -> Response:
return Response(status=status.HTTP_204_NO_CONTENT)


class DuplicateViewSet(nested_viewsets.NestedViewSetMixin, mixins.ListModelMixin, viewsets.GenericViewSet):
class DuplicateViewSet(
nested_viewsets.NestedViewSetMixin[Duplicate],
mixins.ListModelMixin,
viewsets.GenericViewSet,
):
authentication_classes = (HDETokenAuthentication,)
permission_classes = IsAuthenticated, AssignedToExternalSystem, UserAndDeduplicationSetAreOfTheSameSystem
permission_classes = (
IsAuthenticated,
AssignedToExternalSystem,
UserAndDeduplicationSetAreOfTheSameSystem,
)
serializer_class = DuplicateSerializer
queryset = Duplicate.objects.all()
parent_lookup_kwargs = {
Expand All @@ -172,10 +215,17 @@ class DuplicateViewSet(nested_viewsets.NestedViewSetMixin, mixins.ListModelMixin


class IgnoredKeyPairViewSet(
nested_viewsets.NestedViewSetMixin, mixins.ListModelMixin, mixins.CreateModelMixin, viewsets.GenericViewSet
nested_viewsets.NestedViewSetMixin[IgnoredKeyPair],
mixins.ListModelMixin,
mixins.CreateModelMixin,
viewsets.GenericViewSet,
):
authentication_classes = (HDETokenAuthentication,)
permission_classes = IsAuthenticated, AssignedToExternalSystem, UserAndDeduplicationSetAreOfTheSameSystem
permission_classes = (
IsAuthenticated,
AssignedToExternalSystem,
UserAndDeduplicationSetAreOfTheSameSystem,
)
serializer_class = IgnoredKeyPairSerializer
queryset = IgnoredKeyPair.objects.all()
parent_lookup_kwargs = {
Expand Down
16 changes: 9 additions & 7 deletions src/hope_dedup_engine/apps/core/storage.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
from typing import Any

from django.conf import settings
from django.core.files.storage import FileSystemStorage

Expand All @@ -16,7 +18,7 @@ class CV2DNNStorage(UniqueStorageMixin, FileSystemStorage):


class HDEAzureStorage(UniqueStorageMixin, AzureStorage):
def __init__(self, *args, **kwargs):
def __init__(self, *args: Any, **kwargs: Any) -> None:
self.account_name = settings.AZURE_ACCOUNT_NAME
self.account_key = settings.AZURE_ACCOUNT_KEY
self.custom_domain = settings.AZURE_CUSTOM_DOMAIN
Expand All @@ -26,20 +28,20 @@ def __init__(self, *args, **kwargs):


class HOPEAzureStorage(HDEAzureStorage):
def __init__(self, *args, **kwargs):
def __init__(self, *args: Any, **kwargs: Any) -> None:
super().__init__(*args, **kwargs)
self.azure_container = settings.AZURE_CONTAINER_HOPE

def delete(self, name):
def delete(self, name: str) -> None:
raise RuntimeError("This storage cannot delete files")

def open(self, name, mode="rb"):
def open(self, name: str, mode: str = "rb") -> Any:
if "w" in mode:
raise RuntimeError("This storage cannot open files in write mode")
return super().open(name, mode="rb")

def save(self, name, content, max_length=None):
def save(self, name: str, content: Any, max_length: int | None = None) -> None:
raise RuntimeError("This storage cannot save files")

def listdir(self, path=""):
return []
def listdir(self, path: str = "") -> tuple[list[str], list[str]]:
return ([], [])
17 changes: 13 additions & 4 deletions src/hope_dedup_engine/apps/faces/celery_tasks.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,21 @@
import traceback

from celery import shared_task, states
from celery import Task, shared_task, states

from hope_dedup_engine.apps.faces.services.duplication_detector import (
DuplicationDetector,
)
from hope_dedup_engine.apps.faces.utils.celery_utils import task_lifecycle
from hope_dedup_engine.apps.faces.utils.duplication_detector import DuplicationDetector


@shared_task(bind=True, soft_time_limit=0.5 * 60 * 60, time_limit=1 * 60 * 60)
@task_lifecycle(name="Deduplicate", ttl=1 * 60 * 60)
# TODO: Use DeduplicationSet objects as input to deduplication pipeline
def deduplicate(self, filenames: tuple[str], ignore_pairs: tuple[tuple[str, str]] = tuple()) -> tuple[tuple[str]]:
def deduplicate(
self: Task,
filenames: tuple[str],
ignore_pairs: tuple[tuple[str, str], ...] = tuple(),
) -> tuple[tuple[str, ...], ...]:
"""
Deduplicate a set of filenames, ignoring any specified pairs of filenames.
Expand All @@ -25,5 +31,8 @@ def deduplicate(self, filenames: tuple[str], ignore_pairs: tuple[tuple[str, str]
dd = DuplicationDetector(filenames, ignore_pairs)
return dd.find_duplicates()
except Exception as e:
self.update_state(state=states.FAILURE, meta={"exc_message": str(e), "traceback": traceback.format_exc()})
self.update_state(
state=states.FAILURE,
meta={"exc_message": str(e), "traceback": traceback.format_exc()},
)
raise e
8 changes: 8 additions & 0 deletions src/hope_dedup_engine/apps/faces/exceptions.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
class StorageKeyError(Exception):
"""
Exception raised when the storage key does not exist.
"""

def __init__(self, key: str) -> None:
self.key = key
super().__init__(f"Storage key '{key}' does not exist.")
26 changes: 26 additions & 0 deletions src/hope_dedup_engine/apps/faces/forms.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
from django.forms import CharField, ValidationError


class MeanValuesTupleField(CharField):
def to_python(self, value: str) -> tuple[float, float, float]:
try:
values = tuple(map(float, value.split(", ")))
if len(values) != 3:
raise ValueError("The tuple must have exactly three elements.")
if not all(-255 <= v <= 255 for v in values):
raise ValueError(
"Each value in the tuple must be between -255 and 255."
)
return values
except Exception as e:
raise ValidationError(
"""
Enter a valid tuple of three float values separated by commas and spaces, e.g. '0.0, 0.0, 0.0'.
Each value must be between -255 and 255.
"""
) from e

def prepare_value(self, value: tuple[float, float, float]) -> str:
if isinstance(value, tuple):
return ", ".join(map(str, value))
return super().prepare_value(value)
Empty file.
38 changes: 38 additions & 0 deletions src/hope_dedup_engine/apps/faces/managers/net.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
from django.conf import settings

from constance import config
from cv2 import dnn, dnn_Net

from hope_dedup_engine.apps.core.storage import CV2DNNStorage


class DNNInferenceManager:
"""
A class to manage the loading and configuration of a neural network model using OpenCV's DNN module.
The DNNInferenceManager class provides functionality to load a neural network model from Caffe files stored in a
specified storage and configure the model with preferred backend and target settings.
"""

def __init__(self, storage: CV2DNNStorage) -> None:
"""
Loads and configures the neural network model using the specified storage.
Args:
storage (CV2DNNStorage): The storage object from which to load the neural network model.
"""
self.net = dnn.readNetFromCaffe(
storage.path(settings.PROTOTXT_FILE),
storage.path(settings.CAFFEMODEL_FILE),
)
self.net.setPreferableBackend(int(config.DNN_BACKEND))
self.net.setPreferableTarget(int(config.DNN_TARGET))

def get_model(self) -> dnn_Net:
"""
Get the loaded and configured neural network model.
Returns:
cv2.dnn_Net: The neural network model loaded and configured by this manager.
"""
return self.net
Loading

0 comments on commit 9e8440e

Please sign in to comment.