Skip to content

Commit

Permalink
fix ! tuple/list (#33)
Browse files Browse the repository at this point in the history
  • Loading branch information
vitali-yanushchyk-valor authored Jun 18, 2024
1 parent 9e8440e commit 8add1ce
Show file tree
Hide file tree
Showing 6 changed files with 21 additions and 20 deletions.
4 changes: 2 additions & 2 deletions src/hope_dedup_engine/apps/faces/celery_tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ def deduplicate(
self: Task,
filenames: tuple[str],
ignore_pairs: tuple[tuple[str, str], ...] = tuple(),
) -> tuple[tuple[str, ...], ...]:
) -> list[list[str]]:
"""
Deduplicate a set of filenames, ignoring any specified pairs of filenames.
Expand All @@ -25,7 +25,7 @@ def deduplicate(
a pair of filenames to be ignored in the duplication check.
Returns:
tuple[tuple[str]]: A tuple of tuples, where each inner tuple represents a group of duplicates.
list[list[str]]: A list of lists, where each inner list represents a group of duplicates.
"""
try:
dd = DuplicationDetector(filenames, ignore_pairs)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -81,12 +81,12 @@ def _load_encodings_all(self) -> dict[str, list[np.ndarray[np.float32, Any]]]:
raise e
return data

def find_duplicates(self) -> tuple[tuple[str, ...], ...]:
def find_duplicates(self) -> list[list[str]]:
"""
Find and return a list of duplicate images based on face encodings.
Returns:
tuple[tuple[str, ...], ...]: A tuple of tuples, where each inner tuple contains
list[list[str]]: A list of lists, where each inner list contains
the filenames of duplicate images.
"""
try:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,15 +5,15 @@

class DuplicateGroupsBuilder:
@staticmethod
def build(checked: set[tuple[str, str, float]]) -> tuple[tuple[str, ...], ...]:
def build(checked: set[tuple[str, str, float]]) -> list[list[str]]:
"""
Transform a set of tuples with distances into a tuple of grouped duplicate paths.
Args:
checked (set[tuple[str, str, float]]): A set of tuples containing the paths and their distances.
Returns:
tuple[tuple[str, ...], ...]: A tuple of grouped duplicate paths.
list[list[str]]: A list of grouped duplicate paths.
"""
# Dictionary to store connections between paths where distances are less than the threshold
groups = []
Expand Down Expand Up @@ -41,4 +41,4 @@ def build(checked: set[tuple[str, str, float]]) -> tuple[tuple[str, ...], ...]:
)
# Add the newly formed group to the list of groups
groups.append(new_group)
return tuple(map(tuple, groups))
return list(map(list, groups))
9 changes: 4 additions & 5 deletions src/hope_dedup_engine/apps/faces/validators.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,24 +7,23 @@ def validate(ignore: tuple[tuple[str, str], ...]) -> set[tuple[str, str]]:
if not ignore:
return set()
if not (
isinstance(ignore, tuple)
isinstance(ignore, list)
and all(
all(
(
isinstance(pair, tuple),
isinstance(pair, list),
len(pair) == 2,
all(isinstance(item, str) and item for item in pair),
)
)
for pair in ignore
)
):
raise ValidationError(
"Invalid format. Expected a tuple of tuples, each containing exactly two strings."
)
raise ValidationError("Invalid format for ignore pairs.")

result_set = set()
for pair in ignore:
pair = tuple(pair)
result_set.add(pair)
result_set.add((pair[1], pair[0]))
return result_set
8 changes: 4 additions & 4 deletions tests/faces/faces_const.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,10 @@
FILENAME_ENCODED: Final[str] = "test_file.jpg.npy"
FILENAME_ENCODED_FORMAT: Final[str] = "{}.npy"
FILENAMES: Final[list[str]] = ["test_file.jpg", "test_file2.jpg", "test_file3.jpg"]
IGNORE_PAIRS: Final[tuple[tuple[str, str]]] = (
("ignore_file.jpg", "ignore_file2.jpg"),
("ignore_file4.jpg", "ignore_file3.jpg"),
)
IGNORE_PAIRS: Final[list[list[str, str]]] = [
["ignore_file.jpg", "ignore_file2.jpg"],
["ignore_file4.jpg", "ignore_file3.jpg"],
]

CELERY_TASK_NAME: Final[str] = "Deduplicate"
CELERY_TASK_TTL: Final[int] = 1 * 60 * 60
Expand Down
10 changes: 6 additions & 4 deletions tests/faces/test_duplication_detector.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,17 +42,19 @@ def test_init_successful(mock_dd):
@pytest.mark.parametrize(
"ignore_input, expected_output",
[
(tuple(), set()),
(list(), set()),
(
(("file1.jpg", "file2.jpg"),),
[
["file1.jpg", "file2.jpg"],
],
{("file1.jpg", "file2.jpg"), ("file2.jpg", "file1.jpg")},
),
(
(("file1.jpg", "file2.jpg"), ("file2.jpg", "file1.jpg")),
[["file1.jpg", "file2.jpg"], ["file2.jpg", "file1.jpg"]],
{("file1.jpg", "file2.jpg"), ("file2.jpg", "file1.jpg")},
),
(
(("file1.jpg", "file3.jpg"), ("file2.jpg", "file3.jpg")),
[["file1.jpg", "file3.jpg"], ["file2.jpg", "file3.jpg"]],
{
("file1.jpg", "file3.jpg"),
("file3.jpg", "file1.jpg"),
Expand Down

0 comments on commit 8add1ce

Please sign in to comment.