Skip to content

Commit

Permalink
Merge pull request #17 from unicef/feature/duplicate-search-results-api
Browse files Browse the repository at this point in the history
Add duplicate search result API
  • Loading branch information
saxix authored May 24, 2024
2 parents 376c029 + c8c6e85 commit 4f6996c
Show file tree
Hide file tree
Showing 27 changed files with 166 additions and 90 deletions.
3 changes: 3 additions & 0 deletions src/hope_dedup_engine/apps/api/const.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,3 +11,6 @@
BULK = "bulk"
BULK_IMAGE = f"{IMAGE}_{BULK}"
BULK_IMAGE_LIST = f"{IMAGE_LIST}_{BULK}"

DUPLICATE = "duplicate"
DUPLICATE_LIST = f"{DUPLICATE}s"
48 changes: 22 additions & 26 deletions src/hope_dedup_engine/apps/api/migrations/0001_initial.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Generated by Django 5.0.6 on 2024-05-20 12:50
# Generated by Django 5.0.6 on 2024-05-24 11:45

import django.db.models.deletion
import uuid
Expand All @@ -21,17 +21,17 @@ class Migration(migrations.Migration):
fields=[
("id", models.UUIDField(default=uuid.uuid4, primary_key=True, serialize=False)),
("name", models.CharField(max_length=100)),
("reference_pk", models.IntegerField()),
("reference_pk", models.CharField(max_length=100)),
(
"state",
models.IntegerField(
choices=[(0, "Clean"), (1, "Dirty"), (2, "Processing"), (3, "Error")], default=0
),
),
("deleted", models.BooleanField(default=False, verbose_name="deleted")),
("deleted", models.BooleanField(default=False)),
("error", models.CharField(blank=True, max_length=255, null=True)),
("created_at", models.DateTimeField(auto_now_add=True, verbose_name="created at")),
("updated_at", models.DateTimeField(auto_now=True, verbose_name="updated at")),
("created_at", models.DateTimeField(auto_now_add=True)),
("updated_at", models.DateTimeField(auto_now=True)),
(
"created_by",
models.ForeignKey(
Expand All @@ -58,6 +58,21 @@ class Migration(migrations.Migration):
),
],
),
migrations.CreateModel(
name="Duplicate",
fields=[
("id", models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name="ID")),
("first_reference_pk", models.CharField(max_length=100)),
("first_filename", models.CharField(max_length=255)),
("second_reference_pk", models.CharField(max_length=100)),
("second_filename", models.CharField(max_length=255)),
("score", models.FloatField()),
(
"deduplication_set",
models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to="api.deduplicationset"),
),
],
),
migrations.CreateModel(
name="HDEToken",
fields=[
Expand All @@ -69,7 +84,6 @@ class Migration(migrations.Migration):
on_delete=django.db.models.deletion.CASCADE,
related_name="auth_tokens",
to=settings.AUTH_USER_MODEL,
verbose_name="User",
),
),
],
Expand All @@ -79,27 +93,13 @@ class Migration(migrations.Migration):
"abstract": False,
},
),
migrations.CreateModel(
name="Duplicate",
fields=[
("id", models.UUIDField(default=uuid.uuid4, primary_key=True, serialize=False)),
("filename", models.CharField(max_length=255)),
(
"deduplication_set",
models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to="api.deduplicationset"),
),
],
options={
"abstract": False,
"unique_together": {("deduplication_set", "filename")},
},
),
migrations.CreateModel(
name="Image",
fields=[
("id", models.UUIDField(default=uuid.uuid4, primary_key=True, serialize=False)),
("reference_pk", models.CharField(max_length=100)),
("filename", models.CharField(max_length=255)),
("created_at", models.DateTimeField(auto_now_add=True, verbose_name="created at")),
("created_at", models.DateTimeField(auto_now_add=True)),
(
"created_by",
models.ForeignKey(
Expand All @@ -115,9 +115,5 @@ class Migration(migrations.Migration):
models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, to="api.deduplicationset"),
),
],
options={
"abstract": False,
"unique_together": {("deduplication_set", "filename")},
},
),
]
5 changes: 1 addition & 4 deletions src/hope_dedup_engine/apps/api/models/auth.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,8 @@
from django.conf import settings
from django.db import models
from django.utils.translation import gettext_lazy as _

from rest_framework.authtoken.models import Token


class HDEToken(Token):
user = models.ForeignKey(
settings.AUTH_USER_MODEL, related_name="auth_tokens", on_delete=models.CASCADE, verbose_name=_("User")
)
user = models.ForeignKey(settings.AUTH_USER_MODEL, related_name="auth_tokens", on_delete=models.CASCADE)
47 changes: 22 additions & 25 deletions src/hope_dedup_engine/apps/api/models/deduplication.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,57 +2,54 @@

from django.conf import settings
from django.db import models
from django.utils.translation import gettext_lazy as _

from hope_dedup_engine.apps.security.models import ExternalSystem

REFERENCE_PK_LENGTH = 100


class DeduplicationSet(models.Model):
class State(models.IntegerChoices):
CLEAN = 0, _("Clean") # Deduplication set is created or already processed
DIRTY = 1, _("Dirty") # Images are added to deduplication set, but not yet processed
PROCESSING = 2, _("Processing") # Images are being processed
ERROR = 3, _("Error") # Error occurred
CLEAN = 0, "Clean" # Deduplication set is created or already processed
DIRTY = 1, "Dirty" # Images are added to deduplication set, but not yet processed
PROCESSING = 2, "Processing" # Images are being processed
ERROR = 3, "Error" # Error occurred

id = models.UUIDField(primary_key=True, default=uuid4)
name = models.CharField(max_length=100)
reference_pk = models.IntegerField()
reference_pk = models.CharField(max_length=REFERENCE_PK_LENGTH)
state = models.IntegerField(
choices=State.choices,
default=State.CLEAN,
)
deleted = models.BooleanField(_("deleted"), null=False, blank=False, default=False)
deleted = models.BooleanField(null=False, blank=False, default=False)
external_system = models.ForeignKey(ExternalSystem, on_delete=models.CASCADE)
error = models.CharField(max_length=255, null=True, blank=True)
created_by = models.ForeignKey(
settings.AUTH_USER_MODEL, on_delete=models.CASCADE, null=True, blank=True, related_name="+"
)
created_at = models.DateTimeField(_("created at"), auto_now_add=True)
created_at = models.DateTimeField(auto_now_add=True)
updated_by = models.ForeignKey(
settings.AUTH_USER_MODEL, on_delete=models.CASCADE, null=True, blank=True, related_name="+"
)
updated_at = models.DateTimeField(_("updated at"), auto_now=True)
updated_at = models.DateTimeField(auto_now=True)


class ImagePath(models.Model):
class Image(models.Model):
id = models.UUIDField(primary_key=True, default=uuid4)
deduplication_set = models.ForeignKey(DeduplicationSet, on_delete=models.CASCADE)
reference_pk = models.CharField(max_length=REFERENCE_PK_LENGTH)
filename = models.CharField(max_length=255)

class Meta:
abstract = True
unique_together = ("deduplication_set", "filename")


class Duplicate(ImagePath):
pass


class Image(ImagePath):
created_by = models.ForeignKey(
settings.AUTH_USER_MODEL, on_delete=models.CASCADE, null=True, blank=True, related_name="+"
)
created_at = models.DateTimeField(_("created at"), auto_now_add=True)
#
# class Meta:
# unique_together = ("deduplication_set", "filename")
created_at = models.DateTimeField(auto_now_add=True)


class Duplicate(models.Model):
deduplication_set = models.ForeignKey(DeduplicationSet, on_delete=models.CASCADE)
first_reference_pk = models.CharField(max_length=REFERENCE_PK_LENGTH)
first_filename = models.CharField(max_length=255)
second_reference_pk = models.CharField(max_length=REFERENCE_PK_LENGTH)
second_filename = models.CharField(max_length=255)
score = models.FloatField()
22 changes: 21 additions & 1 deletion src/hope_dedup_engine/apps/api/serializers.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from rest_framework import serializers

from hope_dedup_engine.apps.api.models import DeduplicationSet
from hope_dedup_engine.apps.api.models.deduplication import Image
from hope_dedup_engine.apps.api.models.deduplication import Duplicate, Image


class DeduplicationSetSerializer(serializers.ModelSerializer):
Expand All @@ -18,3 +18,23 @@ class Meta:
model = Image
fields = "__all__"
read_only_fields = "created_by", "created_at"


class EntrySerializer(serializers.Serializer):
reference_pk = serializers.SerializerMethodField()
filename = serializers.SerializerMethodField()

def __init__(self, prefix: str, *args, **kwargs) -> None:
self._prefix = prefix
super().__init__(*args, **kwargs)

def get_reference_pk(self, duplicate: Duplicate) -> int:
return getattr(duplicate, f"{self._prefix}_reference_pk")

def get_filename(self, duplicate: Duplicate) -> str:
return getattr(duplicate, f"{self._prefix}_filename")


class DuplicateSerializer(serializers.Serializer):
first = EntrySerializer(prefix="first", source="*")
second = EntrySerializer(prefix="second", source="*")
11 changes: 9 additions & 2 deletions src/hope_dedup_engine/apps/api/urls.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,14 +3,21 @@
from rest_framework import routers
from rest_framework_nested import routers as nested_routers

from hope_dedup_engine.apps.api.const import BULK_IMAGE_LIST, DEDUPLICATION_SET, DEDUPLICATION_SET_LIST, IMAGE_LIST
from hope_dedup_engine.apps.api.views import BulkImageViewSet, DeduplicationSetViewSet, ImageViewSet
from hope_dedup_engine.apps.api.const import (
BULK_IMAGE_LIST,
DEDUPLICATION_SET,
DEDUPLICATION_SET_LIST,
DUPLICATE_LIST,
IMAGE_LIST,
)
from hope_dedup_engine.apps.api.views import BulkImageViewSet, DeduplicationSetViewSet, DuplicateViewSet, ImageViewSet

router = routers.SimpleRouter()
router.register(DEDUPLICATION_SET_LIST, DeduplicationSetViewSet, basename=DEDUPLICATION_SET_LIST)

deduplication_sets_router = nested_routers.NestedSimpleRouter(router, DEDUPLICATION_SET_LIST, lookup=DEDUPLICATION_SET)
deduplication_sets_router.register(IMAGE_LIST, ImageViewSet, basename=IMAGE_LIST)
deduplication_sets_router.register(BULK_IMAGE_LIST, BulkImageViewSet, basename=BULK_IMAGE_LIST)
deduplication_sets_router.register(DUPLICATE_LIST, DuplicateViewSet, basename=DUPLICATE_LIST)

urlpatterns = [path("", include(router.urls)), path("", include(deduplication_sets_router.urls))]
23 changes: 19 additions & 4 deletions src/hope_dedup_engine/apps/api/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,8 @@
)
from hope_dedup_engine.apps.api.const import DEDUPLICATION_SET_FILTER, DEDUPLICATION_SET_PARAM
from hope_dedup_engine.apps.api.models import DeduplicationSet
from hope_dedup_engine.apps.api.models.deduplication import Image
from hope_dedup_engine.apps.api.serializers import DeduplicationSetSerializer, ImageSerializer
from hope_dedup_engine.apps.api.models.deduplication import Duplicate, Image
from hope_dedup_engine.apps.api.serializers import DeduplicationSetSerializer, DuplicateSerializer, ImageSerializer
from hope_dedup_engine.apps.api.utils import delete_model_data, start_processing

MESSAGE = "message"
Expand Down Expand Up @@ -49,15 +49,20 @@ def perform_destroy(self, instance: DeduplicationSet) -> None:
instance.save()
delete_model_data(instance)

@staticmethod
def _start_processing(deduplication_set: DeduplicationSet) -> None:
Duplicate.objects.filter(deduplication_set=deduplication_set).delete()
start_processing(deduplication_set)

@action(detail=True, methods=(HTTPMethod.POST,))
def process(self, request: Request, pk: UUID | None = None) -> Response:
deduplication_set = DeduplicationSet.objects.get(pk=pk)
match deduplication_set.state:
case DeduplicationSet.State.CLEAN | DeduplicationSet.State.ERROR:
start_processing(deduplication_set)
self._start_processing(deduplication_set)
return Response({MESSAGE: RETRYING})
case DeduplicationSet.State.DIRTY:
start_processing(deduplication_set)
self._start_processing(deduplication_set)
return Response({MESSAGE: STARTED})
case DeduplicationSet.State.PROCESSING:
return Response({MESSAGE: ALREADY_PROCESSING}, status=status.HTTP_400_BAD_REQUEST)
Expand Down Expand Up @@ -149,3 +154,13 @@ def clear(self, request: Request, deduplication_set_pk: str) -> Response:
deduplication_set.updated_by = request.user
deduplication_set.save()
return Response(status=status.HTTP_204_NO_CONTENT)


class DuplicateViewSet(nested_viewsets.NestedViewSetMixin, mixins.ListModelMixin, viewsets.GenericViewSet):
authentication_classes = (HDETokenAuthentication,)
permission_classes = IsAuthenticated, AssignedToExternalSystem, UserAndDeduplicationSetAreOfTheSameSystem
serializer_class = DuplicateSerializer
queryset = Duplicate.objects.all()
parent_lookup_kwargs = {
DEDUPLICATION_SET_PARAM: DEDUPLICATION_SET_FILTER,
}
4 changes: 1 addition & 3 deletions src/hope_dedup_engine/apps/core/storage.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,7 @@ def get_available_name(self, name: str, max_length: int | None = None) -> str:


class CV2DNNStorage(UniqueStorageMixin, FileSystemStorage):
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self.location = settings.CV2DNN_PATH
pass


class HDEAzureStorage(UniqueStorageMixin, AzureStorage):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ def __init__(self, filename: str) -> None:

self.storages = {
"images": HOPEAzureStorage(),
"cv2dnn": CV2DNNStorage(),
"cv2dnn": CV2DNNStorage(settings.CV2DNN_PATH),
"encoded": HDEAzureStorage(),
}

Expand Down
3 changes: 2 additions & 1 deletion tests/api/const.py → tests/api/api_const.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from hope_dedup_engine.apps.api.const import BULK_IMAGE_LIST, DEDUPLICATION_SET_LIST, IMAGE_LIST
from hope_dedup_engine.apps.api.const import BULK_IMAGE_LIST, DEDUPLICATION_SET_LIST, DUPLICATE_LIST, IMAGE_LIST

JSON = "json"
DEDUPLICATION_SET_LIST_VIEW = f"{DEDUPLICATION_SET_LIST}-list"
Expand All @@ -8,3 +8,4 @@
IMAGE_DETAIL_VIEW = f"{IMAGE_LIST}-detail"
BULK_IMAGE_LIST_VIEW = f"{BULK_IMAGE_LIST}-list"
BULK_IMAGE_CLEAR_VIEW = f"{BULK_IMAGE_LIST}-clear"
DUPLICATE_LIST_VIEW = f"{DUPLICATE_LIST}-list"
3 changes: 2 additions & 1 deletion tests/api/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from pytest_factoryboy import LazyFixture, register
from pytest_mock import MockerFixture
from rest_framework.test import APIClient
from testutils.factories.api import DeduplicationSetFactory, ImageFactory, TokenFactory
from testutils.factories.api import DeduplicationSetFactory, DuplicateFactory, ImageFactory, TokenFactory
from testutils.factories.user import ExternalSystemFactory, UserFactory

from hope_dedup_engine.apps.api.models import HDEToken
Expand All @@ -15,6 +15,7 @@
register(UserFactory)
register(DeduplicationSetFactory, external_system=LazyFixture("external_system"))
register(ImageFactory, deduplication_Set=LazyFixture("deduplication_set"))
register(DuplicateFactory, deduplication_set=LazyFixture("deduplication_set"))


@fixture
Expand Down
4 changes: 2 additions & 2 deletions tests/api/test_auth.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,7 @@
from typing import Any
from uuid import uuid4

from conftest import get_auth_headers
from const import (
from api_const import (
BULK_IMAGE_CLEAR_VIEW,
BULK_IMAGE_LIST_VIEW,
DEDUPLICATION_SET_DETAIL_VIEW,
Expand All @@ -12,6 +11,7 @@
IMAGE_LIST_VIEW,
JSON,
)
from conftest import get_auth_headers
from pytest import mark
from rest_framework import status
from rest_framework.reverse import reverse
Expand Down
Loading

0 comments on commit 4f6996c

Please sign in to comment.