diff --git a/pdm.lock b/pdm.lock index 235662af..e04b9db8 100644 --- a/pdm.lock +++ b/pdm.lock @@ -317,48 +317,48 @@ files = [ [[package]] name = "coverage" -version = "7.5.2" +version = "7.5.3" requires_python = ">=3.8" summary = "Code coverage measurement for Python" groups = ["dev"] files = [ - {file = "coverage-7.5.2-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:9a42970ce74c88bdf144df11c52c5cf4ad610d860de87c0883385a1c9d9fa4ab"}, - {file = "coverage-7.5.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:26716a1118c6ce2188283b4b60a898c3be29b480acbd0a91446ced4fe4e780d8"}, - {file = "coverage-7.5.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:60b66b0363c5a2a79fba3d1cd7430c25bbd92c923d031cae906bdcb6e054d9a2"}, - {file = "coverage-7.5.2-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e5d22eba19273b2069e4efeff88c897a26bdc64633cbe0357a198f92dca94268"}, - {file = "coverage-7.5.2-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3bb5b92a0ab3d22dfdbfe845e2fef92717b067bdf41a5b68c7e3e857c0cff1a4"}, - {file = "coverage-7.5.2-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:1aef719b6559b521ae913ddeb38f5048c6d1a3d366865e8b320270b7bc4693c2"}, - {file = "coverage-7.5.2-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:8809c0ea0e8454f756e3bd5c36d04dddf222989216788a25bfd6724bfcee342c"}, - {file = "coverage-7.5.2-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:1acc2e2ef098a1d4bf535758085f508097316d738101a97c3f996bccba963ea5"}, - {file = "coverage-7.5.2-cp312-cp312-win32.whl", hash = "sha256:97de509043d3f0f2b2cd171bdccf408f175c7f7a99d36d566b1ae4dd84107985"}, - {file = "coverage-7.5.2-cp312-cp312-win_amd64.whl", hash = "sha256:8941e35a0e991a7a20a1fa3e3182f82abe357211f2c335a9e6007067c3392fcf"}, - {file = "coverage-7.5.2-pp38.pp39.pp310-none-any.whl", hash = "sha256:40dbb8e7727560fe8ab65efcddfec1ae25f30ef02e2f2e5d78cfb52a66781ec5"}, - {file = "coverage-7.5.2.tar.gz", hash = "sha256:13017a63b0e499c59b5ba94a8542fb62864ba3016127d1e4ef30d354fc2b00e9"}, + {file = "coverage-7.5.3-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:296a7d9bbc598e8744c00f7a6cecf1da9b30ae9ad51c566291ff1314e6cbbed8"}, + {file = "coverage-7.5.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:34d6d21d8795a97b14d503dcaf74226ae51eb1f2bd41015d3ef332a24d0a17b3"}, + {file = "coverage-7.5.3-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8e317953bb4c074c06c798a11dbdd2cf9979dbcaa8ccc0fa4701d80042d4ebf1"}, + {file = "coverage-7.5.3-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:705f3d7c2b098c40f5b81790a5fedb274113373d4d1a69e65f8b68b0cc26f6db"}, + {file = "coverage-7.5.3-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b1196e13c45e327d6cd0b6e471530a1882f1017eb83c6229fc613cd1a11b53cd"}, + {file = "coverage-7.5.3-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:015eddc5ccd5364dcb902eaecf9515636806fa1e0d5bef5769d06d0f31b54523"}, + {file = "coverage-7.5.3-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:fd27d8b49e574e50caa65196d908f80e4dff64d7e592d0c59788b45aad7e8b35"}, + {file = "coverage-7.5.3-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:33fc65740267222fc02975c061eb7167185fef4cc8f2770267ee8bf7d6a42f84"}, + {file = "coverage-7.5.3-cp312-cp312-win32.whl", hash = "sha256:7b2a19e13dfb5c8e145c7a6ea959485ee8e2204699903c88c7d25283584bfc08"}, + {file = "coverage-7.5.3-cp312-cp312-win_amd64.whl", hash = "sha256:0bbddc54bbacfc09b3edaec644d4ac90c08ee8ed4844b0f86227dcda2d428fcb"}, + {file = "coverage-7.5.3-pp38.pp39.pp310-none-any.whl", hash = "sha256:3538d8fb1ee9bdd2e2692b3b18c22bb1c19ffbefd06880f5ac496e42d7bb3884"}, + {file = "coverage-7.5.3.tar.gz", hash = "sha256:04aefca5190d1dc7a53a4c1a5a7f8568811306d7a8ee231c42fb69215571944f"}, ] [[package]] name = "coverage" -version = "7.5.2" +version = "7.5.3" extras = ["toml"] requires_python = ">=3.8" summary = "Code coverage measurement for Python" groups = ["dev"] dependencies = [ - "coverage==7.5.2", + "coverage==7.5.3", ] files = [ - {file = "coverage-7.5.2-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:9a42970ce74c88bdf144df11c52c5cf4ad610d860de87c0883385a1c9d9fa4ab"}, - {file = "coverage-7.5.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:26716a1118c6ce2188283b4b60a898c3be29b480acbd0a91446ced4fe4e780d8"}, - {file = "coverage-7.5.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:60b66b0363c5a2a79fba3d1cd7430c25bbd92c923d031cae906bdcb6e054d9a2"}, - {file = "coverage-7.5.2-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e5d22eba19273b2069e4efeff88c897a26bdc64633cbe0357a198f92dca94268"}, - {file = "coverage-7.5.2-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3bb5b92a0ab3d22dfdbfe845e2fef92717b067bdf41a5b68c7e3e857c0cff1a4"}, - {file = "coverage-7.5.2-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:1aef719b6559b521ae913ddeb38f5048c6d1a3d366865e8b320270b7bc4693c2"}, - {file = "coverage-7.5.2-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:8809c0ea0e8454f756e3bd5c36d04dddf222989216788a25bfd6724bfcee342c"}, - {file = "coverage-7.5.2-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:1acc2e2ef098a1d4bf535758085f508097316d738101a97c3f996bccba963ea5"}, - {file = "coverage-7.5.2-cp312-cp312-win32.whl", hash = "sha256:97de509043d3f0f2b2cd171bdccf408f175c7f7a99d36d566b1ae4dd84107985"}, - {file = "coverage-7.5.2-cp312-cp312-win_amd64.whl", hash = "sha256:8941e35a0e991a7a20a1fa3e3182f82abe357211f2c335a9e6007067c3392fcf"}, - {file = "coverage-7.5.2-pp38.pp39.pp310-none-any.whl", hash = "sha256:40dbb8e7727560fe8ab65efcddfec1ae25f30ef02e2f2e5d78cfb52a66781ec5"}, - {file = "coverage-7.5.2.tar.gz", hash = "sha256:13017a63b0e499c59b5ba94a8542fb62864ba3016127d1e4ef30d354fc2b00e9"}, + {file = "coverage-7.5.3-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:296a7d9bbc598e8744c00f7a6cecf1da9b30ae9ad51c566291ff1314e6cbbed8"}, + {file = "coverage-7.5.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:34d6d21d8795a97b14d503dcaf74226ae51eb1f2bd41015d3ef332a24d0a17b3"}, + {file = "coverage-7.5.3-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8e317953bb4c074c06c798a11dbdd2cf9979dbcaa8ccc0fa4701d80042d4ebf1"}, + {file = "coverage-7.5.3-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:705f3d7c2b098c40f5b81790a5fedb274113373d4d1a69e65f8b68b0cc26f6db"}, + {file = "coverage-7.5.3-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b1196e13c45e327d6cd0b6e471530a1882f1017eb83c6229fc613cd1a11b53cd"}, + {file = "coverage-7.5.3-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:015eddc5ccd5364dcb902eaecf9515636806fa1e0d5bef5769d06d0f31b54523"}, + {file = "coverage-7.5.3-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:fd27d8b49e574e50caa65196d908f80e4dff64d7e592d0c59788b45aad7e8b35"}, + {file = "coverage-7.5.3-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:33fc65740267222fc02975c061eb7167185fef4cc8f2770267ee8bf7d6a42f84"}, + {file = "coverage-7.5.3-cp312-cp312-win32.whl", hash = "sha256:7b2a19e13dfb5c8e145c7a6ea959485ee8e2204699903c88c7d25283584bfc08"}, + {file = "coverage-7.5.3-cp312-cp312-win_amd64.whl", hash = "sha256:0bbddc54bbacfc09b3edaec644d4ac90c08ee8ed4844b0f86227dcda2d428fcb"}, + {file = "coverage-7.5.3-pp38.pp39.pp310-none-any.whl", hash = "sha256:3538d8fb1ee9bdd2e2692b3b18c22bb1c19ffbefd06880f5ac496e42d7bb3884"}, + {file = "coverage-7.5.3.tar.gz", hash = "sha256:04aefca5190d1dc7a53a4c1a5a7f8568811306d7a8ee231c42fb69215571944f"}, ] [[package]] @@ -919,7 +919,7 @@ files = [ [[package]] name = "faker" -version = "25.2.0" +version = "25.3.0" requires_python = ">=3.8" summary = "Faker is a Python package that generates fake data for you." groups = ["dev"] @@ -927,8 +927,8 @@ dependencies = [ "python-dateutil>=2.4", ] files = [ - {file = "Faker-25.2.0-py3-none-any.whl", hash = "sha256:cfe97c4857c4c36ee32ea4aaabef884895992e209bae4cbd26807cf3e05c6918"}, - {file = "Faker-25.2.0.tar.gz", hash = "sha256:45b84f47ff1ef86e3d1a8d11583ca871ecf6730fad0660edadc02576583a2423"}, + {file = "Faker-25.3.0-py3-none-any.whl", hash = "sha256:0158d47e955b6ec22134c0a74ebb7ed34fe600896208bafbf1008db831b17f04"}, + {file = "Faker-25.3.0.tar.gz", hash = "sha256:bcbe31eee5ef4bbf87ce36c4eba53c01e2a1d912fde2a4d3528b430d2beb784f"}, ] [[package]] @@ -1358,16 +1358,13 @@ files = [ [[package]] name = "nodeenv" -version = "1.8.0" -requires_python = ">=2.7,!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*" +version = "1.9.0" +requires_python = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*,>=2.7" summary = "Node.js virtual environment builder" groups = ["dev"] -dependencies = [ - "setuptools", -] files = [ - {file = "nodeenv-1.8.0-py2.py3-none-any.whl", hash = "sha256:df865724bb3c3adc86b3876fa209771517b0cfe596beff01a92700e0e8be4cec"}, - {file = "nodeenv-1.8.0.tar.gz", hash = "sha256:d51e0c37e64fbf47d017feac3145cdbb58836d7eee8c6f6d3b6880c5456227d2"}, + {file = "nodeenv-1.9.0-py2.py3-none-any.whl", hash = "sha256:508ecec98f9f3330b636d4448c0f1a56fc68017c68f1e7857ebc52acf0eb879a"}, + {file = "nodeenv-1.9.0.tar.gz", hash = "sha256:07f144e90dae547bf0d4ee8da0ee42664a42a04e02ed68e06324348dafe4bdb1"}, ] [[package]] diff --git a/pdm.toml b/pdm.toml deleted file mode 100644 index 0e5b99fa..00000000 --- a/pdm.toml +++ /dev/null @@ -1,5 +0,0 @@ -[python] -use_venv = true - -[venv] -in_project = true diff --git a/src/hope_dedup_engine/apps/faces/utils/duplication_detector.py b/src/hope_dedup_engine/apps/faces/utils/duplication_detector.py index a0618035..b1ec8fd1 100644 --- a/src/hope_dedup_engine/apps/faces/utils/duplication_detector.py +++ b/src/hope_dedup_engine/apps/faces/utils/duplication_detector.py @@ -1,21 +1,31 @@ import logging import os -from typing import Dict, List, Tuple +import re from django.conf import settings import cv2 import face_recognition import numpy as np +from constance import config from hope_dedup_engine.apps.core.storage import CV2DNNStorage, HDEAzureStorage, HOPEAzureStorage class DuplicationDetector: - def __init__(self, filename: str) -> None: - self.logger = logging.getLogger(__name__) + """ + A class to detect and process duplicate faces in images. + """ - self.storages = { + def __init__(self, filename: str) -> None: + """ + Initialize the DuplicationDetector with the given filename. + + Args: + filename (str): The filename of the image to process. + """ + self.logger: logging.Logger = logging.getLogger(__name__) + self.storages: dict[str, CV2DNNStorage | HDEAzureStorage | HOPEAzureStorage] = { "images": HOPEAzureStorage(), "cv2dnn": CV2DNNStorage(settings.CV2DNN_PATH), "encoded": HDEAzureStorage(), @@ -25,48 +35,88 @@ def __init__(self, filename: str) -> None: if not self.storages.get("cv2dnn").exists(file): raise FileNotFoundError(f"File {file} does not exist in storage.") - self.net = cv2.dnn.readNetFromCaffe( - self.storages.get("cv2dnn").path(settings.PROTOTXT_FILE), - self.storages.get("cv2dnn").path(settings.CAFFEMODEL_FILE), - ) - - self.net.setPreferableBackend(settings.DNN_BACKEND) - self.net.setPreferableTarget(settings.DNN_TARGET) + self.shape: dict[str, int] = self._get_shape() + self.net: cv2.dnn_Net = self._set_net(self.storages.get("cv2dnn")) self.filename: str = filename - self.encodings_filename = f"{self.filename}.npy" - - self.confidence: float = settings.FACE_DETECTION_CONFIDENCE - self.threshold: float = settings.DISTANCE_THRESHOLD + self.encodings_filename: str = f"{self.filename}.npy" + self.scale_factor: float = config.BLOB_FROM_IMAGE_SCALE_FACTOR + self.mean_values: tuple[float, float, float] = tuple(map(float, config.BLOB_FROM_IMAGE_MEAN_VALUES.split(", "))) + # self.mean_values: config.BLOB_FROM_IMAGE_MEAN_VALUES + self.face_detection_confidence: float = config.FACE_DETECTION_CONFIDENCE + self.face_encodings_model: str = config.FACE_ENCODINGS_MODEL + self.face_encodings_num_jitters: int = config.FACE_ENCODINGS_NUM_JITTERS + self.distance_threshold: float = config.FACE_DISTANCE_THRESHOLD + self.nms_threshold: float = config.NMS_THRESHOLD @property def has_encodings(self) -> bool: return self.storages["encoded"].exists(self.encodings_filename) - def _get_face_detections_dnn(self) -> List[Tuple[int, int, int, int]]: - # TODO: Implement case if face regions for image are not detected - face_regions: List[Tuple[int, int, int, int]] = [] + def _set_net(self, storage: CV2DNNStorage) -> cv2.dnn_Net: + net = cv2.dnn.readNetFromCaffe( + storage.path(settings.PROTOTXT_FILE), + storage.path(settings.CAFFEMODEL_FILE), + ) + net.setPreferableBackend(int(config.DNN_BACKEND)) + net.setPreferableTarget(int(config.DNN_TARGET)) + return net + + def _get_shape(self) -> dict[str, int]: + pattern = r"input_shape\s*\{\s*" r"dim:\s*(\d+)\s*" r"dim:\s*(\d+)\s*" r"dim:\s*(\d+)\s*" r"dim:\s*(\d+)\s*\}" + with open(settings.PROTOTXT_FILE, "r") as file: + if match := re.search(pattern, file.read()): + return { + "batch_size": int(match.group(1)), + "channels": int(match.group(2)), + "height": int(match.group(3)), + "width": int(match.group(4)), + } + else: + raise ValueError("Could not find input_shape in prototxt file.") + + def _get_face_detections_dnn(self) -> list[tuple[int, int, int, int]]: + face_regions: list[tuple[int, int, int, int]] = [] try: with self.storages["images"].open(self.filename, "rb") as img_file: img_array = np.frombuffer(img_file.read(), dtype=np.uint8) + # Decode image from binary buffer to 3D numpy array (height, width, channels of BlueGreeRed color space) image = cv2.imdecode(img_array, cv2.IMREAD_COLOR) (h, w) = image.shape[:2] + # Create a blob (4D tensor) from the image blob = cv2.dnn.blobFromImage( - image=cv2.resize(image, dsize=(300, 300)), scalefactor=1.0, size=(300, 300), mean=(104.0, 177.0, 123.0) + image=cv2.resize(image, dsize=(self.shape["height"], self.shape["width"])), + size=(self.shape["height"], self.shape["width"]), + scalefactor=self.scale_factor, + mean=self.mean_values, ) self.net.setInput(blob) + # Forward pass to get output with shape (1, 1, N, 7), + # where N is the number of faces and 7 are the detection values: + # 1st: image index (0), 2nd: class label (0), 3rd: confidence (0-1), + # 4th-5th: x, y coordinates, 6th-7th: width, height detections = self.net.forward() - for i in range(0, detections.shape[2]): + boxes, confidences = [], [] + for i in range(detections.shape[2]): confidence = detections[0, 0, i, 2] - if confidence > self.confidence: - box = detections[0, 0, i, 3:7] * np.array([w, h, w, h]) - face_regions.append(tuple(box.astype("int").tolist())) + # Filter out weak detections by ensuring the confidence is greater than the minimum confidence + if confidence > self.face_detection_confidence: + box = (detections[0, 0, i, 3:7] * np.array([w, h, w, h])).astype("int") + boxes.append(box) + confidences.append(confidence) + if boxes: + # Apply non-maxima suppression to suppress weak, overlapping bounding boxes + indices = cv2.dnn.NMSBoxes(boxes, confidences, self.face_detection_confidence, self.nms_threshold) + if indices is not None: + for i in indices: + face_regions.append(tuple(boxes[i])) except Exception as e: - self.logger.exception(f"Error processing face detection for image {self.filename}", exc_info=e) + self.logger.exception("Error processing face detection for image %s", self.filename) + raise e return face_regions - def _load_encodings_all(self) -> Dict[str, List[np.ndarray]]: - data: Dict[str, List[np.ndarray]] = {} + def _load_encodings_all(self) -> dict[str, list[np.ndarray]]: + data: dict[str, list[np.ndarray]] = {} try: _, files = self.storages["encoded"].listdir("") for file in files: @@ -74,7 +124,8 @@ def _load_encodings_all(self) -> Dict[str, List[np.ndarray]]: with self.storages["encoded"].open(file, "rb") as f: data[os.path.splitext(file)[0]] = np.load(f, allow_pickle=False) except Exception as e: - self.logger.exception(f"Error loading encodings: {e}", exc_info=True) + self.logger.exception("Error loading encodings.") + raise e return data def _encode_face(self) -> None: @@ -83,20 +134,36 @@ def _encode_face(self) -> None: image = face_recognition.load_image_file(img_file) encodings: list = [] face_regions = self._get_face_detections_dnn() - for region in face_regions: - if isinstance(region, (list, tuple)) and len(region) == 4: - top, right, bottom, left = region - face_encodings = face_recognition.face_encodings(image, [(top, right, bottom, left)], model="hog") - encodings.extend(face_encodings) - else: - self.logger.error(f"Invalid face region {region}") - with self.storages["encoded"].open(self.encodings_filename, "wb") as f: - np.save(f, encodings) + if not face_regions: + self.logger.error("No face regions detected in image %s", self.filename) + else: + for region in face_regions: + if isinstance(region, (list, tuple)) and len(region) == 4: + top, right, bottom, left = region + # Compute the face encodings for the face regions in the image + face_encodings = face_recognition.face_encodings( + image, + [(top, right, bottom, left)], + num_jitters=self.face_encodings_num_jitters, + model=self.face_encodings_model, + ) + encodings.extend(face_encodings) + else: + self.logger.error("Invalid face region.") + with self.storages["encoded"].open(self.encodings_filename, "wb") as f: + np.save(f, encodings) except Exception as e: - self.logger.exception(f"Error processing face encodings for image {self.filename}", exc_info=e) + self.logger.exception("Error processing face encodings for image %s", self.filename) + raise e + + def find_duplicates(self) -> tuple[str]: + """ + Find and return a list of duplicate images based on face encodings. - def find_duplicates(self) -> Tuple[str]: - duplicated_images = set() + Returns: + tuple[str]: A tuple of filenames of duplicate images. + """ + duplicated_images: set[str] = set() path1 = self.filename try: if not self.has_encodings: @@ -104,17 +171,23 @@ def find_duplicates(self) -> Tuple[str]: encodings_all = self._load_encodings_all() encodings1 = encodings_all[path1] + checked_pairs = set() for path2, encodings2 in encodings_all.items(): if path1 != path2: for encoding1 in encodings1: for encoding2 in encodings2: + if (path1, path2, tuple(encoding1), tuple(encoding2)) in checked_pairs: + continue + distance = face_recognition.face_distance([encoding1], encoding2) - if distance < settings.DISTANCE_THRESHOLD: + if distance < self.distance_threshold: duplicated_images.update([path1, path2]) break + + checked_pairs.add((path1, path2, tuple(encoding1), tuple(encoding2))) if path2 in duplicated_images: break return tuple(duplicated_images) except Exception as e: - self.logger.exception(f"Error finding duplicates for image {path1}", exc_info=e) - return tuple(duplicated_images) + self.logger.exception("Error finding duplicates for image %s", path1) + raise e diff --git a/src/hope_dedup_engine/apps/faces/validators.py b/src/hope_dedup_engine/apps/faces/validators.py new file mode 100644 index 00000000..893275ba --- /dev/null +++ b/src/hope_dedup_engine/apps/faces/validators.py @@ -0,0 +1,24 @@ +from django.forms import CharField, ValidationError + + +class MeanValuesTupleField(CharField): + def to_python(self, value): + try: + values = tuple(map(float, value.split(", "))) + if len(values) != 3: + raise ValueError("The tuple must have exactly three elements.") + if not all(-255 <= v <= 255 for v in values): + raise ValueError("Each value in the tuple must be between -255 and 255.") + return values + except Exception as e: + raise ValidationError( + """ + Enter a valid tuple of three float values separated by commas and spaces, e.g. '0.0, 0.0, 0.0'. + Each value must be between -255 and 255. + """ + ) from e + + def prepare_value(self, value): + if isinstance(value, tuple): + return ", ".join(map(str, value)) + return value diff --git a/src/hope_dedup_engine/config/fragments/constance.py b/src/hope_dedup_engine/config/fragments/constance.py index a4f2c8fe..555dbc49 100644 --- a/src/hope_dedup_engine/config/fragments/constance.py +++ b/src/hope_dedup_engine/config/fragments/constance.py @@ -1,15 +1,132 @@ +import cv2 + from hope_dedup_engine.apps.security.constants import DEFAULT_GROUP_NAME CONSTANCE_BACKEND = "constance.backends.database.DatabaseBackend" +CONSTANCE_CONFIG = { + "NEW_USER_IS_STAFF": (False, "Set any new user as staff", bool), + "NEW_USER_DEFAULT_GROUP": (DEFAULT_GROUP_NAME, "Group to assign to any new user", str), + "DNN_BACKEND": ( + cv2.dnn.DNN_BACKEND_OPENCV, + "Specifies the computation backend to be used by OpenCV for deep learning inference.", + "dnn_backend", + ), + "DNN_TARGET": ( + cv2.dnn.DNN_TARGET_CPU, + "Specifies the target device on which OpenCV will perform the deep learning computations.", + "dnn_target", + ), + "BLOB_FROM_IMAGE_SCALE_FACTOR": ( + 1.0, + """Specifies the scaling factor applied to all pixel values when converting an image to a blob. Mostly + it equals 1.0 for no scaling or 1.0/255.0 and normalizing to the [0, 1] range. + Remember that scaling factor is also applied to mean values. Both scaling factor and mean values + must be the same for the training and inference to get the correct results. + """, + float, + ), + "BLOB_FROM_IMAGE_MEAN_VALUES": ( + "104.0, 177.0, 123.0", + """Specifies the mean BGR values used in image preprocessing to normalize pixel values by subtracting + the mean values of the training dataset. This helps in reducing model bias and improving accuracy. + The specified mean values are subtracted from each channel (Blue, Green, Red) of the input image. + Remember that mean values are also applied to scaling factor. Both scaling factor and mean values + must be the same for the training and inference to get the correct results. + """, + "tuple_field", + ), + "FACE_DETECTION_CONFIDENCE": ( + 0.7, + """ + Specifies the minimum confidence score required for a detected face to be considered valid. Detections + with confidence scores below this threshold are discarded as likely false positives. + """, + float, + ), + "NMS_THRESHOLD": ( + 0.4, + """ + Specifies the Intersection over Union (IoU) threshold used in Non-Maximum Suppression (NMS) to filter out + overlapping bounding boxes. If the IoU between two boxes exceeds this threshold, the box with the lower + confidence score is suppressed. Lower values result in fewer, more distinct boxes; higher values allow more + overlapping boxes to remain. + """, + float, + ), + "FACE_ENCODINGS_NUM_JITTERS": ( + 1, + """ + Specifies the number of times to re-sample the face when calculating the encoding. Higher values increase + accuracy but are computationally more expensive and slower. For example, setting 'num_jitters' to 100 makes + the process 100 times slower. + """, + int, + ), + "FACE_ENCODINGS_MODEL": ( + "small", + """ + Specifies the model type used for encoding face landmarks. It can be either 'small' which is faster and + detects only 5 key facial landmarks, or 'large' which is more precise and identifies 68 key facial landmarks + but requires more computational resources. + """, + "face_encodings_model", + ), + "FACE_DISTANCE_THRESHOLD": ( + 0.5, + """ + Specifies the maximum allowable distance between two face embeddings for them to be considered a match. It helps + determine if two faces belong to the same person by setting a threshold for similarity. Lower values result in + stricter matching, while higher values allow for more lenient matches. + """, + float, + ), +} + + +CONSTANCE_CONFIG_FIELDSETS = { + "User settings": { + "fields": ("NEW_USER_IS_STAFF", "NEW_USER_DEFAULT_GROUP"), + "collapse": False, + }, + "Face recognition settings": { + "fields": ( + "DNN_BACKEND", + "DNN_TARGET", + "BLOB_FROM_IMAGE_SCALE_FACTOR", + "BLOB_FROM_IMAGE_MEAN_VALUES", + "FACE_DETECTION_CONFIDENCE", + "NMS_THRESHOLD", + "FACE_ENCODINGS_NUM_JITTERS", + "FACE_ENCODINGS_MODEL", + "FACE_DISTANCE_THRESHOLD", + ), + "collapse": False, + }, +} + CONSTANCE_ADDITIONAL_FIELDS = { "email": [ "django.forms.EmailField", {}, ], -} - -CONSTANCE_CONFIG = { - "NEW_USER_IS_STAFF": (False, "Set any new user as staff", bool), - "NEW_USER_DEFAULT_GROUP": (DEFAULT_GROUP_NAME, "Group to assign to any new user", str), + "dnn_backend": [ + "django.forms.ChoiceField", + { + "choices": ((cv2.dnn.DNN_BACKEND_OPENCV, "DNN_BACKEND_OPENCV"),), + }, + ], + "dnn_target": [ + "django.forms.ChoiceField", + { + "choices": ((cv2.dnn.DNN_TARGET_CPU, "DNN_TARGET_CPU"),), + }, + ], + "face_encodings_model": [ + "django.forms.ChoiceField", + { + "choices": (("small", "SMALL"), ("large", "LARGE")), + }, + ], + "tuple_field": ["hope_dedup_engine.apps.faces.validators.MeanValuesTupleField", {}], } diff --git a/src/hope_dedup_engine/config/fragments/recognition.py b/src/hope_dedup_engine/config/fragments/recognition.py deleted file mode 100644 index 7b349441..00000000 --- a/src/hope_dedup_engine/config/fragments/recognition.py +++ /dev/null @@ -1,7 +0,0 @@ -import cv2 - -DNN_BACKEND = cv2.dnn.DNN_TARGET_CPU -DNN_TARGET = cv2.dnn.DNN_TARGET_CPU - -FACE_DETECTION_CONFIDENCE = 0.5 -DISTANCE_THRESHOLD = 0.4 diff --git a/src/hope_dedup_engine/config/settings.py b/src/hope_dedup_engine/config/settings.py index d1e75080..ef57a797 100644 --- a/src/hope_dedup_engine/config/settings.py +++ b/src/hope_dedup_engine/config/settings.py @@ -190,7 +190,6 @@ from .fragments.csp import * # noqa from .fragments.debug_toolbar import * # noqa from .fragments.flags import * # noqa -from .fragments.recognition import * # noqa from .fragments.rest_framework import * # noqa from .fragments.root import * # noqa from .fragments.sentry import * # noqa diff --git a/tests/faces/faces_const.py b/tests/faces/faces_const.py index 0d7df597..7a506a4e 100644 --- a/tests/faces/faces_const.py +++ b/tests/faces/faces_const.py @@ -2,3 +2,28 @@ FILENAME: Final[str] = "test_file.jpg" FILENAMES: Final[list[str]] = ["test_file.jpg", "test_file2.jpg"] +DEPLOY_PROTO_CONTENT: Final[str] = "input_shape { dim: 1 dim: 3 dim: 300 dim: 300 }" +DEPLOY_PROTO_SHAPE: Final[dict[str, int]] = {"batch_size": 1, "channels": 3, "height": 300, "width": 300} +FACE_REGIONS_INVALID: Final[list[list[tuple[int, int, int, int]]]] = [[], [(0, 0, 10)]] +FACE_REGIONS_VALID: Final[list[tuple[int, int, int, int]]] = [ + (10, 10, 20, 20), + (30, 30, 40, 40), +] +FACE_DETECTION_CONFIDENCE: Final[float] = 0.7 +FACE_DETECTIONS: Final[list[tuple[float]]] = [ + (0, 0, 0.95, 0.1, 0.1, 0.2, 0.2), # with confidence 0.95 -> valid detection + (0, 0, 0.75, 0.3, 0.3, 0.4, 0.4), # with confidence 0.75 -> valid detection + (0, 0, 0.15, 0.1, 0.1, 0.2, 0.2), # with confidence 0.15 -> invalid detection +] +IMAGE_SIZE: Final[tuple[int, int, int]] = (100, 100, 3) # Size of the image after decoding (h, w, number of channels) +RESIZED_IMAGE_SIZE: Final[tuple[int, int, int]] = ( + 300, + 300, + 3, +) # Size of the image after resizing for processing (h, w, number of channels) +BLOB_SHAPE: Final[tuple[int, int, int, int]] = ( + 1, + 3, + 300, + 300, +) # Shape of the blob (4D tensor) for input to the neural network (batch_size, channels, h, w) diff --git a/tests/faces/fixtures/duplication_detector.py b/tests/faces/fixtures/duplication_detector.py index 46a6f39a..af547724 100644 --- a/tests/faces/fixtures/duplication_detector.py +++ b/tests/faces/fixtures/duplication_detector.py @@ -1,6 +1,7 @@ from io import BytesIO -from unittest.mock import MagicMock, patch +from unittest.mock import MagicMock, mock_open, patch +import cv2 import numpy as np import pytest from PIL import Image @@ -8,66 +9,55 @@ from hope_dedup_engine.apps.core.storage import CV2DNNStorage, HDEAzureStorage, HOPEAzureStorage from hope_dedup_engine.apps.faces.utils.duplication_detector import DuplicationDetector -from ..faces_const import FILENAME +from ..faces_const import ( + BLOB_SHAPE, + DEPLOY_PROTO_CONTENT, + FACE_DETECTIONS, + FACE_REGIONS_VALID, + FILENAME, + IMAGE_SIZE, + RESIZED_IMAGE_SIZE, +) -@pytest.fixture(scope="module", autouse=True) -def dd(mock_hope_azure_storage, mock_cv2dnn_storage, mock_hde_azure_storage): +@pytest.fixture +def dd(mock_hope_azure_storage, mock_cv2dnn_storage, mock_hde_azure_storage, mock_prototxt_file, db): with ( patch("hope_dedup_engine.apps.faces.utils.duplication_detector.CV2DNNStorage", mock_cv2dnn_storage), patch("hope_dedup_engine.apps.faces.utils.duplication_detector.HOPEAzureStorage", mock_hope_azure_storage), patch("hope_dedup_engine.apps.faces.utils.duplication_detector.HDEAzureStorage", mock_hde_azure_storage), + patch("builtins.open", mock_prototxt_file), ): - mock_cv2dnn_storage.exists.return_value = False - detector = DuplicationDetector(FILENAME) - mock_logger = MagicMock() - detector.logger = mock_logger - return detector + return DuplicationDetector(FILENAME) + + +@pytest.fixture +def mock_prototxt_file(): + return mock_open(read_data=DEPLOY_PROTO_CONTENT) -@pytest.fixture(scope="module", autouse=True) +@pytest.fixture def mock_cv2dnn_storage(): - storage = MagicMock(spec=CV2DNNStorage) - storage.exists.return_value = True - storage.path.side_effect = lambda filename: FILENAME - return storage + return MagicMock(spec=CV2DNNStorage) -@pytest.fixture(scope="module", autouse=True) +@pytest.fixture def mock_hde_azure_storage(): - storage = MagicMock(spec=HDEAzureStorage) - storage.exists.return_value = True - # storage.listdir.return_value = (None, FILENAMES) - storage.open.return_value.__enter__.return_value.read.return_value = b"binary image data" - return storage + return MagicMock(spec=HDEAzureStorage) -@pytest.fixture(scope="module", autouse=True) +@pytest.fixture def mock_hope_azure_storage(): - storage = MagicMock(spec=HOPEAzureStorage) - storage.exists.return_value = True - storage.open.return_value.__enter__.return_value.read.return_value = b"binary image data" - return storage + return MagicMock(spec=HOPEAzureStorage) @pytest.fixture def image_bytes_io(dd): - # Create an image and save it to a BytesIO object - image = Image.new("RGB", (100, 100), color="red") img_byte_arr = BytesIO() + image = Image.new("RGB", (100, 100), color="red") image.save(img_byte_arr, format="JPEG") img_byte_arr.seek(0) - - def fake_open(file, mode="rb", *args, **kwargs): - if "rb" in mode and file == dd.filename: - # Return a new BytesIO object with image data each time to avoid file closure - return BytesIO(img_byte_arr.getvalue()) - else: - # Return a MagicMock for other cases to simulate other file behaviors - return MagicMock() - - img_byte_arr.fake_open = fake_open - + img_byte_arr.fake_open = lambda *_: BytesIO(img_byte_arr.getvalue()) return img_byte_arr @@ -80,21 +70,11 @@ def mock_open_context_manager(image_bytes_io): @pytest.fixture def mock_net(): - mock_net = MagicMock() # Mocking the neural network object - mock_detections = np.array( - [ - [ - [ - [0, 0, 0.95, 0.1, 0.1, 0.2, 0.2], # with confidence 0.95 - [0, 0, 0.15, 0.1, 0.1, 0.2, 0.2], # with confidence 0.15 - ] - ] - ], - dtype=np.float32, - ) # Mocking the detections array - expected_regions = [(10, 10, 20, 20)] # Mocking the expected regions + mock_net = MagicMock(spec=cv2.dnn_Net) # Mocking the neural network object + mock_detections = np.array([[FACE_DETECTIONS]], dtype=np.float32) # Mocking the detections array + mock_expected_regions = FACE_REGIONS_VALID mock_net.forward.return_value = mock_detections # Setting up the forward method of the mock network - mock_imdecode = MagicMock(return_value=np.ones((100, 100, 3), dtype=np.uint8)) - mock_resize = MagicMock(return_value=np.ones((300, 300, 3), dtype=np.uint8)) - mock_blob = np.zeros((1, 3, 300, 300)) - return mock_net, mock_imdecode, mock_resize, mock_blob, expected_regions + mock_imdecode = MagicMock(return_value=np.ones(IMAGE_SIZE, dtype=np.uint8)) + mock_resize = MagicMock(return_value=np.ones(RESIZED_IMAGE_SIZE, dtype=np.uint8)) + mock_blob = np.zeros(BLOB_SHAPE) + return mock_net, mock_imdecode, mock_resize, mock_blob, mock_expected_regions diff --git a/tests/faces/test_celery_tasks.py b/tests/faces/test_celery_tasks.py index dc986b64..a468ef11 100644 --- a/tests/faces/test_celery_tasks.py +++ b/tests/faces/test_celery_tasks.py @@ -48,5 +48,6 @@ def test_deduplicate_task_exception_handling( assert task.traceback is not None mock_find.assert_called_once() + # Check that the Redis lock was acquired and then released mock_set.assert_called_once_with(f"Deduplicate_{FILENAME}", "true", nx=True, ex=3600) mock_delete.assert_called_once_with(f"Deduplicate_{FILENAME}") # Lock is released diff --git a/tests/faces/test_duplication_detector.py b/tests/faces/test_duplication_detector.py index d63d6c7a..25b92b1f 100644 --- a/tests/faces/test_duplication_detector.py +++ b/tests/faces/test_duplication_detector.py @@ -6,43 +6,72 @@ import cv2 import numpy as np import pytest -from faces_const import FILENAME, FILENAMES +from constance import config +from faces_const import DEPLOY_PROTO_SHAPE, FACE_REGIONS_INVALID, FILENAME, FILENAMES from hope_dedup_engine.apps.faces.utils.duplication_detector import DuplicationDetector def test_duplication_detector_initialization(dd): assert isinstance(dd.net, cv2.dnn_Net) - assert isinstance(dd.logger, MagicMock) - assert dd.confidence == settings.FACE_DETECTION_CONFIDENCE - assert dd.threshold == settings.DISTANCE_THRESHOLD + assert dd.face_detection_confidence == config.FACE_DETECTION_CONFIDENCE + assert dd.distance_threshold == config.FACE_DISTANCE_THRESHOLD assert dd.filename == FILENAME assert dd.encodings_filename == f"{FILENAME}.npy" + assert dd.scale_factor == config.BLOB_FROM_IMAGE_SCALE_FACTOR + assert dd.mean_values == tuple(map(float, config.BLOB_FROM_IMAGE_MEAN_VALUES.split(", "))) + assert dd.face_encodings_model == config.FACE_ENCODINGS_MODEL + assert dd.face_encodings_num_jitters == config.FACE_ENCODINGS_NUM_JITTERS + assert dd.nms_threshold == config.NMS_THRESHOLD + assert dd.shape == DEPLOY_PROTO_SHAPE + + +def test_get_shape(dd, mock_prototxt_file): + with patch("builtins.open", mock_prototxt_file): + shape = dd._get_shape() + assert shape == DEPLOY_PROTO_SHAPE + + +def test_set_net(dd, mock_cv2dnn_storage, mock_net): + mock_net_instance, *_ = mock_net + with patch("cv2.dnn.readNetFromCaffe", return_value=mock_net_instance) as mock_read_net: + net = dd._set_net(mock_cv2dnn_storage) + mock_read_net.assert_called_once_with( + mock_cv2dnn_storage.path(settings.PROTOTXT_FILE), + mock_cv2dnn_storage.path(settings.CAFFEMODEL_FILE), + ) + + assert net == mock_net_instance + mock_net_instance.setPreferableBackend.assert_called_once_with(int(config.DNN_BACKEND)) + mock_net_instance.setPreferableTarget.assert_called_once_with(int(config.DNN_TARGET)) + for storage_name, storage in dd.storages.items(): assert isinstance(storage, MagicMock) if storage_name == "cv2dnn": storage.exists.assert_any_call(settings.PROTOTXT_FILE) storage.exists.assert_any_call(settings.CAFFEMODEL_FILE) - storage.path.assert_any_call(settings.CAFFEMODEL_FILE) + storage.path.assert_any_call(settings.PROTOTXT_FILE) storage.path.assert_any_call(settings.CAFFEMODEL_FILE) -def test_missing_files_in_storage(dd, mock_cv2dnn_storage): +@pytest.mark.parametrize("missing_file", [settings.PROTOTXT_FILE, settings.CAFFEMODEL_FILE]) +def test_initialization_missing_files_in_cv2dnn_storage(mock_cv2dnn_storage, missing_file): with patch( - "hope_dedup_engine.apps.faces.utils.duplication_detector.CV2DNNStorage", new=lambda _: mock_cv2dnn_storage + "hope_dedup_engine.apps.faces.utils.duplication_detector.CV2DNNStorage", return_value=mock_cv2dnn_storage ): - mock_cv2dnn_storage.exists.return_value = False + mock_cv2dnn_storage.exists.side_effect = lambda filename: filename != missing_file with pytest.raises(FileNotFoundError): DuplicationDetector(FILENAME) + mock_cv2dnn_storage.exists.assert_any_call(missing_file) def test_has_encodings_false(dd): - dd.storages["encoded"].exists = MagicMock(return_value=False) + dd.storages["encoded"].exists.return_value = False assert not dd.has_encodings def test_has_encodings_true(dd): - dd.storages["encoded"].exists = MagicMock(return_value=True) + dd.storages["encoded"].exists.return_value = True assert dd.has_encodings @@ -51,37 +80,36 @@ def test_get_face_detections_dnn_no_detections(dd, mock_open_context_manager): patch.object(dd.storages["images"], "open", return_value=mock_open_context_manager), patch.object(dd, "_get_face_detections_dnn", return_value=[]), ): - face_regions = dd._get_face_detections_dnn() - assert len(face_regions) == 0 # Assuming no faces are detected + assert len(face_regions) == 0 def test_get_face_detections_dnn_with_detections(dd, mock_net, mock_open_context_manager): - net, imdecode, resize, blob, expected_regions = mock_net + net, imdecode, resize, _, expected_regions = mock_net with ( patch.object(dd.storages["images"], "open", return_value=mock_open_context_manager), patch("cv2.imdecode", imdecode), patch("cv2.resize", resize), + patch.object(dd, "net", net), ): - - dd.net.setInput(blob) - dd.net = net face_regions = dd._get_face_detections_dnn() assert face_regions == expected_regions - assert len(face_regions) == 1 # Assuming one face is detected - assert isinstance(face_regions[0], tuple) # Each detected face region should be a tuple - assert len(face_regions[0]) == 4 # Each tuple should have four elements (coordinates of the bounding box) + for region in face_regions: + assert isinstance(region, tuple) + assert len(region) == 4 def test_get_face_detections_dnn_exception_handling(dd): - with patch("builtins.open", side_effect=Exception("Test exception")): - try: + with ( + patch.object(dd.storages["images"], "open", side_effect=Exception("Test exception")) as mock_storage_open, + patch.object(dd.logger, "exception") as mock_logger_exception, + ): + with pytest.raises(Exception, match="Test exception"): dd._get_face_detections_dnn() - except Exception: - ... - dd.logger.exception.assert_called_once() - dd.logger.reset_mock() + + mock_storage_open.assert_called_once_with(dd.filename, "rb") + mock_logger_exception.assert_called_once() def test_load_encodings_all_no_files(dd): @@ -91,77 +119,94 @@ def test_load_encodings_all_no_files(dd): def test_load_encodings_all_with_files(dd): - mock_encoded_data = {f"{filename}.npy": [np.array([1, 2, 3]), np.array([4, 5, 6])] for filename in FILENAMES} + mock_encoded_data = {f"{filename}.npy": np.array([1, 2, 3]) for filename in FILENAMES} encoded_data = {os.path.splitext(key)[0]: value for key, value in mock_encoded_data.items()} - print(f"\n{mock_encoded_data=}\n{encoded_data=}") - # Mock the storage's listdir method to return the file names with patch.object( - dd.storages["encoded"], - "listdir", - return_value=(None, [f"{filename}.npy" for filename in FILENAMES]), + dd.storages["encoded"], "listdir", return_value=(None, [f"{filename}.npy" for filename in FILENAMES]) + ): + with patch("builtins.open", mock_open()) as mocked_open: + for filename, data in mock_encoded_data.items(): + mocked_file = mock_open(read_data=data.tobytes()).return_value + mocked_open.side_effect = lambda f, mode="rb", mocked_file=mocked_file, filename=filename: ( + mocked_file if f.endswith(filename) else MagicMock() + ) + with patch("numpy.load", return_value=data): + result = dd._load_encodings_all() + + for key, value in encoded_data.items(): + assert np.array_equal(result[key], value) + + +def test_load_encodings_all_exception_handling_listdir(dd): + with ( + patch.object(dd.storages["encoded"], "listdir", side_effect=Exception("Test exception")) as mock_listdir, + patch.object(dd.logger, "exception") as mock_logger_exception, ): - print(f"{dd.storages['encoded'].listdir()[1]=}") - # Mock the storage's open method to return the data for each file - with patch( - "builtins.open", - side_effect=lambda f: mock_open(read_data=np.save(mock_encoded_data[f])).return_value, - ): + with pytest.raises(Exception, match="Test exception"): dd._load_encodings_all() - # Assert that the returned encodings match the expected data - # TODO: Fix - # assert all(np.array_equal(encodings[key], value) for key, value in encoded_data.items()) + mock_listdir.assert_called_once_with("") -def test_load_encodings_all_exception_handling(dd): - with patch("builtins.open", side_effect=Exception("Test exception")): - try: + mock_logger_exception.assert_called_once() + + +def test_load_encodings_all_exception_handling_open(dd): + with ( + patch.object(dd.storages["encoded"], "listdir", return_value=(None, [f"{FILENAME}.npy"])) as mock_listdir, + patch.object(dd.storages["encoded"], "open", side_effect=Exception("Test exception")) as mock_open, + patch.object(dd.logger, "exception") as mock_logger_exception, + ): + with pytest.raises(Exception, match="Test exception"): dd._load_encodings_all() - except Exception: - ... - dd.logger.reset_mock() + mock_listdir.assert_called_once_with("") + mock_open.assert_called_once_with(f"{FILENAME}.npy", "rb") -def test_encode_face_successful(dd, image_bytes_io): + mock_logger_exception.assert_called_once() + + +def test_encode_face_successful(dd, image_bytes_io, mock_net): + mock_net, *_ = mock_net with ( - patch("builtins.open", new_callable=lambda: image_bytes_io.fake_open), patch.object(dd.storages["images"], "open", side_effect=image_bytes_io.fake_open) as mocked_image_open, + patch.object(dd, "net", mock_net), ): dd._encode_face() - # Checks that the file was opened correctly and in binary read mode - print(f"{mocked_image_open.assert_called_with(dd.filename, 'rb')=}") - assert mocked_image_open.called, "The open function should be called" + mocked_image_open.assert_called_with(dd.filename, "rb") + assert mocked_image_open.side_effect == image_bytes_io.fake_open + assert mocked_image_open.called -def test_encode_face_invalid_region(dd, image_bytes_io): - # Mock _get_face_detections_dnn to return an invalid region +@pytest.mark.parametrize("face_regions", FACE_REGIONS_INVALID) +def test_encode_face_error(dd, image_bytes_io, face_regions): with ( - patch("builtins.open", new_callable=lambda: image_bytes_io.fake_open), - patch.object(dd.storages["images"], "open", side_effect=image_bytes_io.fake_open), - patch.object(dd, "_get_face_detections_dnn", return_value=[(0, 0, 10)]), + patch.object(dd.storages["images"], "open", side_effect=image_bytes_io.fake_open) as mock_storage_open, + patch.object(dd, "_get_face_detections_dnn", return_value=face_regions) as mock_get_face_detections_dnn, patch.object(dd.logger, "error") as mock_error_logger, ): - - # Invoke the _encode_face method, expecting an error log due to an invalid region dd._encode_face() - # Check that the error was logged with the correct message - mock_error_logger.assert_called_once_with(f"Invalid face region {(0, 0, 10)}") - dd.logger.reset_mock() + mock_storage_open.assert_called_with(dd.filename, "rb") + mock_get_face_detections_dnn.assert_called_once() + + mock_error_logger.assert_called_once() def test_encode_face_exception_handling(dd): - with patch("builtins.open", side_effect=Exception("Test exception")): - try: + with ( + patch.object(dd.storages["images"], "open", side_effect=Exception("Test exception")) as mock_storage_open, + patch.object(dd.logger, "exception") as mock_logger_exception, + ): + with pytest.raises(Exception, match="Test exception"): dd._encode_face() - except Exception: - ... - dd.logger.exception.assert_called_once() - dd.logger.reset_mock() + mock_storage_open.assert_called_with(dd.filename, "rb") + mock_logger_exception.assert_called_once() -def test_find_duplicates_successful(dd, mock_hde_azure_storage): + +def test_find_duplicates_successful_when_encoded(dd, mock_hde_azure_storage): # Generate mock return values dynamically based on FILENAMES mock_encodings = {filename: [np.array([0.1, 0.2, 0.3 + i * 0.001])] for i, filename in enumerate(FILENAMES)} @@ -185,23 +230,21 @@ def test_find_duplicates_successful(dd, mock_hde_azure_storage): def test_find_duplicates_calls_encode_face_when_no_encodings(dd): - # Prepare a mock for the 'exists' method used in the 'has_encodings' property with ( - patch.object(dd.storages["encoded"], "exists", return_value=False), patch.object(dd, "_encode_face") as mock_encode_face, + patch.object(dd, "_load_encodings_all", return_value={FILENAME: [MagicMock()]}), ): - + dd.storages["encoded"].exists.return_value = False dd.find_duplicates() - mock_encode_face.assert_called_once() - dd.logger.reset_mock() def test_find_duplicates_exception_handling(dd): - with patch.object(dd, "_load_encodings_all", side_effect=Exception("Test exception")): - try: + with ( + patch.object(dd, "_load_encodings_all", side_effect=Exception("Test exception")), + patch.object(dd.logger, "exception") as mock_logger_exception, + ): + with pytest.raises(Exception, match="Test exception"): dd.find_duplicates() - except Exception: - ... - dd.logger.exception.assert_called_once() - dd.logger.reset_mock() + + mock_logger_exception.assert_called_once() diff --git a/tests/faces/test_validators.py b/tests/faces/test_validators.py new file mode 100644 index 00000000..79b3e0df --- /dev/null +++ b/tests/faces/test_validators.py @@ -0,0 +1,41 @@ +from django.forms import ValidationError + +import pytest + +from hope_dedup_engine.apps.faces.validators import MeanValuesTupleField + + +def test_to_python_valid_tuple(): + field = MeanValuesTupleField() + assert field.to_python("104.0, 177.0, 123.0") == (104.0, 177.0, 123.0) + + +def test_to_python_invalid_length(): + field = MeanValuesTupleField() + with pytest.raises(ValidationError) as exc_info: + field.to_python("104.0, 177.0") + assert "Enter a valid tuple of three float values separated by commas and spaces" in str(exc_info.value) + + +def test_to_python_value_out_of_range(): + field = MeanValuesTupleField() + with pytest.raises(ValidationError) as exc_info: + field.to_python("104.0, 177.0, 256.0") + assert "Each value must be between -255 and 255." in str(exc_info.value) + + +def test_to_python_non_numeric_value(): + field = MeanValuesTupleField() + with pytest.raises(ValidationError) as exc_info: + field.to_python("104.0, abc, 123.0") + assert "Enter a valid tuple of three float values separated by commas and spaces" in str(exc_info.value) + + +def test_prepare_value_with_tuple(): + field = MeanValuesTupleField() + assert field.prepare_value((104.0, 177.0, 123.0)) == "104.0, 177.0, 123.0" + + +def test_prepare_value_with_string(): + field = MeanValuesTupleField() + assert field.prepare_value("104.0, 177.0, 123.0") == "104.0, 177.0, 123.0"