Skip to content

Commit

Permalink
Make lock optional
Browse files Browse the repository at this point in the history
  • Loading branch information
sergey-misuk-valor committed Sep 16, 2024
1 parent 1ec464d commit 347b9a2
Show file tree
Hide file tree
Showing 4 changed files with 26 additions and 7 deletions.
1 change: 1 addition & 0 deletions src/hope_dedup_engine/apps/api/deduplication/lock.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
from hope_dedup_engine.apps.api.models import DeduplicationSet

DELIMITER: Final[str] = "|"
LOCK_IS_NOT_ENABLED = "LOCK_IS_NOT_ENABLED"


class DeduplicationSetLock:
Expand Down
22 changes: 16 additions & 6 deletions src/hope_dedup_engine/apps/api/deduplication/process.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
)
from hope_dedup_engine.apps.api.models import DeduplicationSet, Duplicate
from hope_dedup_engine.apps.api.utils import send_notification
from hope_dedup_engine.config import settings


def _sort_keys(pair: DuplicateKeyPair) -> DuplicateKeyPair:
Expand All @@ -19,6 +20,7 @@ def _save_duplicates(
finder: DuplicateFinder,
deduplication_set: DeduplicationSet,
ignored_key_pairs: frozenset[tuple[str, str]],
lock_enabled: bool,
lock: DeduplicationSetLock,
) -> None:
for first, second, score in map(_sort_keys, finder.run()):
Expand All @@ -30,7 +32,8 @@ def _save_duplicates(
)
duplicate.score += score * finder.weight
duplicate.save()
lock.refresh()
if lock_enabled:
lock.refresh()


HOUR = 60 * 60
Expand All @@ -40,10 +43,14 @@ def _save_duplicates(
def find_duplicates(deduplication_set_id: str, serialized_lock: str) -> None:
deduplication_set = DeduplicationSet.objects.get(pk=deduplication_set_id)
try:
lock = DeduplicationSetLock.from_string(serialized_lock)
lock_enabled = settings.DEDUPLICATION_SET_LOCK_ENABLED
lock = (
DeduplicationSetLock.from_string(serialized_lock) if lock_enabled else None
)

# refresh lock in case we spent much time waiting in queue
lock.refresh()
if lock_enabled:
# refresh lock in case we spent much time waiting in queue
lock.refresh()

# clean results
Duplicate.objects.filter(deduplication_set=deduplication_set).delete()
Expand All @@ -56,7 +63,9 @@ def find_duplicates(deduplication_set_id: str, serialized_lock: str) -> None:

weight_total = 0
for finder in get_finders(deduplication_set):
_save_duplicates(finder, deduplication_set, ignored_key_pairs, lock)
_save_duplicates(
finder, deduplication_set, ignored_key_pairs, lock_enabled, lock
)
weight_total += finder.weight

for duplicate in deduplication_set.duplicate_set.all():
Expand All @@ -66,7 +75,8 @@ def find_duplicates(deduplication_set_id: str, serialized_lock: str) -> None:
deduplication_set.state = deduplication_set.State.CLEAN
deduplication_set.save()

lock.release()
if lock_enabled:
lock.release()

except Exception as e:
deduplication_set.state = DeduplicationSet.State.ERROR
Expand Down
9 changes: 8 additions & 1 deletion src/hope_dedup_engine/apps/api/utils.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,10 @@
from django.conf import settings

import requests
from rest_framework import status
from rest_framework.exceptions import APIException

from hope_dedup_engine.apps.api.deduplication.lock import LOCK_IS_NOT_ENABLED
from hope_dedup_engine.apps.api.models import DeduplicationSet


Expand All @@ -16,7 +19,11 @@ def start_processing(deduplication_set: DeduplicationSet) -> None:
from hope_dedup_engine.apps.api.deduplication.process import find_duplicates

try:
lock = DeduplicationSetLock.for_deduplication_set(deduplication_set)
lock = (
DeduplicationSetLock.for_deduplication_set(deduplication_set)
if settings.DEDUPLICATION_SET_LOCK_ENABLED
else LOCK_IS_NOT_ENABLED
)
deduplication_set.state = DeduplicationSet.State.PROCESSING
deduplication_set.save()
find_duplicates.delay(str(deduplication_set.pk), str(lock))
Expand Down
1 change: 1 addition & 0 deletions src/hope_dedup_engine/config/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -193,6 +193,7 @@
EMAIL_PORT = env("EMAIL_PORT", default=25)
EMAIL_USE_TLS = env("EMAIL_USE_TLS", default=False)
EMAIL_USE_SSL = env("EMAIL_USE_SSL", default=False)
DEDUPLICATION_SET_LOCK_ENABLED = env("DEDUPLICATION_SET_LOCK_ENABLED", default=False)
DEDUPLICATION_SET_LAST_ACTION_TIMEOUT = env(
"DEDUPLICATION_SET_LAST_ACTION_TIMEOUT", default=60
)
Expand Down

0 comments on commit 347b9a2

Please sign in to comment.