Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix ! tuple/list #33

Merged
merged 1 commit into from
Jun 18, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions src/hope_dedup_engine/apps/faces/celery_tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ def deduplicate(
self: Task,
filenames: tuple[str],
ignore_pairs: tuple[tuple[str, str], ...] = tuple(),
) -> tuple[tuple[str, ...], ...]:
) -> list[list[str]]:
"""
Deduplicate a set of filenames, ignoring any specified pairs of filenames.

Expand All @@ -25,7 +25,7 @@ def deduplicate(
a pair of filenames to be ignored in the duplication check.

Returns:
tuple[tuple[str]]: A tuple of tuples, where each inner tuple represents a group of duplicates.
list[list[str]]: A list of lists, where each inner list represents a group of duplicates.
"""
try:
dd = DuplicationDetector(filenames, ignore_pairs)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -81,12 +81,12 @@ def _load_encodings_all(self) -> dict[str, list[np.ndarray[np.float32, Any]]]:
raise e
return data

def find_duplicates(self) -> tuple[tuple[str, ...], ...]:
def find_duplicates(self) -> list[list[str]]:
"""
Find and return a list of duplicate images based on face encodings.

Returns:
tuple[tuple[str, ...], ...]: A tuple of tuples, where each inner tuple contains
list[list[str]]: A list of lists, where each inner list contains
the filenames of duplicate images.
"""
try:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,15 +5,15 @@

class DuplicateGroupsBuilder:
@staticmethod
def build(checked: set[tuple[str, str, float]]) -> tuple[tuple[str, ...], ...]:
def build(checked: set[tuple[str, str, float]]) -> list[list[str]]:
"""
Transform a set of tuples with distances into a tuple of grouped duplicate paths.

Args:
checked (set[tuple[str, str, float]]): A set of tuples containing the paths and their distances.

Returns:
tuple[tuple[str, ...], ...]: A tuple of grouped duplicate paths.
list[list[str]]: A list of grouped duplicate paths.
"""
# Dictionary to store connections between paths where distances are less than the threshold
groups = []
Expand Down Expand Up @@ -41,4 +41,4 @@ def build(checked: set[tuple[str, str, float]]) -> tuple[tuple[str, ...], ...]:
)
# Add the newly formed group to the list of groups
groups.append(new_group)
return tuple(map(tuple, groups))
return list(map(list, groups))
9 changes: 4 additions & 5 deletions src/hope_dedup_engine/apps/faces/validators.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,24 +7,23 @@ def validate(ignore: tuple[tuple[str, str], ...]) -> set[tuple[str, str]]:
if not ignore:
return set()
if not (
isinstance(ignore, tuple)
isinstance(ignore, list)
and all(
all(
(
isinstance(pair, tuple),
isinstance(pair, list),
len(pair) == 2,
all(isinstance(item, str) and item for item in pair),
)
)
for pair in ignore
)
):
raise ValidationError(
"Invalid format. Expected a tuple of tuples, each containing exactly two strings."
)
raise ValidationError("Invalid format for ignore pairs.")

result_set = set()
for pair in ignore:
pair = tuple(pair)
result_set.add(pair)
result_set.add((pair[1], pair[0]))
return result_set
8 changes: 4 additions & 4 deletions tests/faces/faces_const.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,10 @@
FILENAME_ENCODED: Final[str] = "test_file.jpg.npy"
FILENAME_ENCODED_FORMAT: Final[str] = "{}.npy"
FILENAMES: Final[list[str]] = ["test_file.jpg", "test_file2.jpg", "test_file3.jpg"]
IGNORE_PAIRS: Final[tuple[tuple[str, str]]] = (
("ignore_file.jpg", "ignore_file2.jpg"),
("ignore_file4.jpg", "ignore_file3.jpg"),
)
IGNORE_PAIRS: Final[list[list[str, str]]] = [
["ignore_file.jpg", "ignore_file2.jpg"],
["ignore_file4.jpg", "ignore_file3.jpg"],
]

CELERY_TASK_NAME: Final[str] = "Deduplicate"
CELERY_TASK_TTL: Final[int] = 1 * 60 * 60
Expand Down
10 changes: 6 additions & 4 deletions tests/faces/test_duplication_detector.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,17 +42,19 @@ def test_init_successful(mock_dd):
@pytest.mark.parametrize(
"ignore_input, expected_output",
[
(tuple(), set()),
(list(), set()),
(
(("file1.jpg", "file2.jpg"),),
[
["file1.jpg", "file2.jpg"],
],
{("file1.jpg", "file2.jpg"), ("file2.jpg", "file1.jpg")},
),
(
(("file1.jpg", "file2.jpg"), ("file2.jpg", "file1.jpg")),
[["file1.jpg", "file2.jpg"], ["file2.jpg", "file1.jpg"]],
{("file1.jpg", "file2.jpg"), ("file2.jpg", "file1.jpg")},
),
(
(("file1.jpg", "file3.jpg"), ("file2.jpg", "file3.jpg")),
[["file1.jpg", "file3.jpg"], ["file2.jpg", "file3.jpg"]],
{
("file1.jpg", "file3.jpg"),
("file3.jpg", "file1.jpg"),
Expand Down
Loading