From 3c185e880483aeb26ef46381b3a1e2164534cb31 Mon Sep 17 00:00:00 2001 From: Richard Abrich Date: Sat, 27 Apr 2024 16:41:25 -0400 Subject: [PATCH] feat(VisualReplayStrategy): compute image similarity to avoid unnecessary segmentation * remove sct_image from Screenshot; fix typo * add Image.cropped_image * add experiments/imagesimilarity.py * bugfix: sct_image -> image * find_similar_image_segmentation * fix test_crop_active_window --- README.md | 1 - experiments/imagesimilarity.py | 351 +++++++++++++++++++++++++++++++++ openadapt/events.py | 6 +- openadapt/models.py | 74 ++++--- openadapt/strategies/visual.py | 106 +++++++++- openadapt/utils.py | 2 +- poetry.lock | 84 +++++++- pyproject.toml | 1 + tests/openadapt/test_crop.py | 6 +- 9 files changed, 572 insertions(+), 59 deletions(-) create mode 100644 experiments/imagesimilarity.py diff --git a/README.md b/README.md index 3db408af1..1eeda7eaa 100644 --- a/README.md +++ b/README.md @@ -97,7 +97,6 @@ poetry install poetry shell alembic upgrade head poetry run install-dashbaord - pytest ``` diff --git a/experiments/imagesimilarity.py b/experiments/imagesimilarity.py new file mode 100644 index 000000000..28361592d --- /dev/null +++ b/experiments/imagesimilarity.py @@ -0,0 +1,351 @@ +"""This module calculates image similarities using various methods.""" + +from typing import Callable +import time + +from matplotlib.offsetbox import OffsetImage, AnnotationBbox +from PIL import Image, ImageOps +from skimage.metrics import structural_similarity as ssim +from sklearn.manifold import MDS +import imagehash +import matplotlib.gridspec as gridspec +import matplotlib.pyplot as plt +import numpy as np + +from openadapt.db import crud + + +SHOW_SSIM = False + + +def calculate_ssim(im1: Image.Image, im2: Image.Image) -> float: + """Calculate the Structural Similarity Index (SSIM) between two images. + + Args: + im1 (Image.Image): The first image. + im2 (Image.Image): The second image. + + Returns: + float: The SSIM index between the two images. + """ + # Calculate aspect ratios + aspect_ratio1 = im1.width / im1.height + aspect_ratio2 = im2.width / im2.height + # Use the smaller image as the base for resizing to maintain the aspect ratio + if aspect_ratio1 < aspect_ratio2: + base_width = min(im1.width, im2.width) + base_height = int(base_width / aspect_ratio1) + else: + base_height = min(im1.height, im2.height) + base_width = int(base_height * aspect_ratio2) + + # Resize images to a common base while maintaining aspect ratio + im1 = im1.resize((base_width, base_height), Image.LANCZOS) + im2 = im2.resize((base_width, base_height), Image.LANCZOS) + + # Convert images to grayscale + im1_gray = np.array(im1.convert("L")) + im2_gray = np.array(im2.convert("L")) + + mssim, grad, S = ssim( + im1_gray, + im2_gray, + data_range=im2_gray.max() - im2_gray.min(), + gradient=True, + full=True, + ) + + if SHOW_SSIM: + # Normalize the gradient for visualization + grad_normalized = (grad - grad.min()) / (grad.max() - grad.min()) + im_grad = Image.fromarray((grad_normalized * 255).astype(np.uint8)) + + # Convert full SSIM image to uint8 + im_S = Image.fromarray((S * 255).astype(np.uint8)) + + # Create a figure to display the images + fig, axs = plt.subplots(1, 4, figsize=(20, 5)) # 1 row, 4 columns + + # Display each image in the subplot + axs[0].imshow(im1, cmap="gray") + axs[0].set_title("Image 1") + axs[0].axis("off") + + axs[1].imshow(im2, cmap="gray") + axs[1].set_title("Image 2") + axs[1].axis("off") + + axs[2].imshow(im_grad, cmap="gray") + axs[2].set_title("Gradient of SSIM") + axs[2].axis("off") + + axs[3].imshow(im_S, cmap="gray") + axs[3].set_title("SSIM Image") + axs[3].axis("off") + + plt.show(block=False) + + return 1 - mssim + + +def calculate_dynamic_threshold( + im1: Image.Image, + im2: Image.Image, + k: float = 1.0, +) -> float: + """Calculate a dynamic threshold for image difference. + + Based on the standard deviation of the pixel differences. + + Args: + im1 (Image.Image): The first image. + im2 (Image.Image): The second image. + k (float): The multiplier for the standard deviation to set the threshold. + + Returns: + float: The dynamically calculated threshold. + """ + # Convert images to numpy arrays + arr1 = np.array(im1) + arr2 = np.array(im2) + + # Calculate the absolute difference between the images + diff = np.abs(arr1 - arr2) + + # Calculate mean and standard deviation of the differences + mean_diff = np.mean(diff) + std_diff = np.std(diff) + + # Calculate the threshold as mean plus k times the standard deviation + threshold = mean_diff + k * std_diff + + return threshold + + +def thresholded_difference(im1: Image.Image, im2: Image.Image, k: float = 1.0) -> int: + """Return number of pixels differing by at least a dynamically calculated threshold. + + Args: + im1 (Image.Image): The first image. + im2 (Image.Image): The second image. + k (float): Multiplier for the standard deviation to set the dynamic threshold. + + Returns: + int: The number of pixels differing by at least the dynamically calculated + threshold. + """ + common_size = (min(im1.width, im2.width), min(im1.height, im2.height)) + im1 = im1.resize(common_size) + im2 = im2.resize(common_size) + + # Calculate the dynamic threshold + difference_threshold = calculate_dynamic_threshold(im1, im2, k) + + # Convert images to numpy arrays + arr1 = np.array(im1) + arr2 = np.array(im2) + + # Calculate the absolute difference between the images + diff = np.abs(arr1 - arr2) + + # Count pixels with a difference above the dynamically calculated threshold + count = np.sum(diff >= difference_threshold) + + return count + + +def prepare_image( + img: Image.Image, + size: tuple[int, int] = (128, 128), + border: int = 2, + color: str = "red", +) -> Image.Image: + """Resize an image to a common size, add a border to it. + + Args: + img (Image.Image): The original image to prepare. + size (tuple[int, int]): The size to which the images should be resized. + border (int): The width of the border around the image. + color (str): The color of the border. + + Returns: + Image.Image: The processed image. + """ + # Resize image + img = img.resize(size, Image.ANTIALIAS) + + # Add border to the image + img_with_border = ImageOps.expand(img, border=border, fill=color) + + return img_with_border + + +def plot_images_with_mds( + images: list[Image.Image], + distance_matrix: np.ndarray, + title: str, + hash_func: Callable, +) -> None: + """Plot images on a scatter plot based on the provided distance matrix. + + Args: + images (list[Image.Image]): list of images to plot. + distance_matrix (np.ndarray): A distance matrix of image differences. + title (str): Title of the plot. + hash_func (Callable): The hashing function to compute hash values. + + Returns: + None + """ + # Prepare images by resizing and adding a border + prepared_images = [prepare_image(img) for img in images] + + # Compute hash values for each image + hash_values = [str(hash_func(img)) if hash_func else "" for img in images] + + # Initialize MDS and fit the distance matrix to get the 2D embedding + mds = MDS(n_components=2, dissimilarity="precomputed", random_state=0) + positions = mds.fit_transform(distance_matrix) + + # Create a scatter plot with the MDS results + fig, ax = plt.subplots(figsize=(15, 10)) + ax.scatter(positions[:, 0], positions[:, 1], alpha=0) + + # Define properties for the bounding box + bbox_props = dict(boxstyle="round,pad=0.3", ec="b", lw=2, fc="white", alpha=0.7) + + # Loop through images, positions, and hash values to create annotations + for img, hash_val, (x, y) in zip(prepared_images, hash_values, positions): + im = OffsetImage(np.array(img), zoom=0.5) + ab = AnnotationBbox( + im, + (x, y), + xycoords="data", + frameon=True, + bboxprops=bbox_props, + ) + ax.add_artist(ab) + # Display the hash value beside the image + ax.text(x, y - 0.05, hash_val, fontsize=9, ha="center") + + # Remove the x and y ticks + ax.set_xticks([]) + ax.set_yticks([]) + + plt.title(title) + plt.show() + + +def display_distance_matrix_with_images( + distance_matrix: np.ndarray, + images: list[Image.Image], + func_name: str, + thumbnail_size: tuple[int, int] = (32, 32), +) -> None: + """Display the distance matrix as an image with thumbnails along the top and left. + + Args: + distance_matrix (np.ndarray): A square matrix with distance values. + images (list[Image.Image]): list of images corresponding to matrix rows/cols. + thumbnail_size (tuple[int, int]): Size to which thumbnails will be resized. + + Returns: + None + """ + # Number of images + n = len(images) + # Create a figure with subplots + fig = plt.figure(figsize=(10, 10)) + # GridSpec layout for the thumbnails and the distance matrix + gs = gridspec.GridSpec(n + 1, n + 1, figure=fig) + + # Place the distance matrix + ax_matrix = fig.add_subplot(gs[1:, 1:]) + ax_matrix.imshow(distance_matrix, cmap="viridis") + ax_matrix.set_xticks([]) + ax_matrix.set_yticks([]) + + # Annotate each cell with the distance value + for (i, j), val in np.ndenumerate(distance_matrix): + ax_matrix.text(j, i, f"{val:.4f}", ha="center", va="center", color="white") + + # Resize images to thumbnails + thumbnails = [img.resize(thumbnail_size, Image.ANTIALIAS) for img in images] + + # Plot images on the top row + for i, img in enumerate(thumbnails): + ax_img_top = fig.add_subplot(gs[0, i + 1]) + ax_img_top.imshow(np.array(img)) + ax_img_top.axis("off") # Hide axes + + # Plot images on the left column + for i, img in enumerate(thumbnails): + ax_img_left = fig.add_subplot(gs[i + 1, 0]) + ax_img_left.imshow(np.array(img)) + ax_img_left.axis("off") # Hide axes + + plt.suptitle(func_name) + plt.show() + + +def main() -> None: + """Main function to process images and display similarity metrics.""" + recording = crud.get_latest_recording() + action_events = recording.processed_action_events + images = [action_event.screenshot.cropped_image for action_event in action_events] + + similarity_funcs = { + "ssim": calculate_ssim, + "thresholded_difference": thresholded_difference, + "average_hash": lambda im1, im2: ( + imagehash.average_hash(im1) - imagehash.average_hash(im2) + ), + "dhash": lambda im1, im2: (imagehash.dhash(im1) - imagehash.dhash(im2)), + "phash": lambda im1, im2: (imagehash.phash(im1) - imagehash.phash(im2)), + "crop_resistant_hash": lambda im1, im2: ( + imagehash.crop_resistant_hash(im1) - imagehash.crop_resistant_hash(im2) + ), + "colorhash": lambda im1, im2: ( + imagehash.colorhash(im1) - imagehash.colorhash(im2) + ), + "whash": lambda im1, im2: imagehash.whash(im1) - imagehash.whash(im2), + } + + # Process each similarity function + for func_name, func in similarity_funcs.items(): + hash_func = { + "average_hash": imagehash.average_hash, + "dhash": imagehash.dhash, + "phash": imagehash.phash, + "crop_resistant_hash": imagehash.crop_resistant_hash, + "colorhash": imagehash.colorhash, + "whash": imagehash.whash, + }.get(func_name, None) + + # Create a matrix to store all pairwise distances + n = len(images) + distance_matrix = np.zeros((n, n)) + durations = [] + for i in range(n): + for j in range(i + 1, n): + start_time = time.time() + distance = abs(func(images[i], images[j])) + duration = time.time() - start_time + durations.append(duration) + distance_matrix[i, j] = distance + distance_matrix[j, i] = distance + mean_duration = sum(durations) / len(durations) + print(f"{func_name=}") + print(f"distance_matrix=\n{distance_matrix}") + print(f"{mean_duration=}") + display_distance_matrix_with_images(distance_matrix, images, func_name) + plot_images_with_mds( + images, + distance_matrix, + f"Image layout based on {func_name} ({mean_duration=:.4f}s)", + hash_func, + ) + + +if __name__ == "__main__": + main() diff --git a/openadapt/events.py b/openadapt/events.py index 19caaa065..09bd77b30 100644 --- a/openadapt/events.py +++ b/openadapt/events.py @@ -357,13 +357,13 @@ def get_timestamp_mappings( "double_click_distance_pixels", utils.get_double_click_distance_pixels, ) - logger.info(f"{double_click_distance=}") + logger.debug(f"{double_click_distance=}") double_click_interval = get_recording_attr( to_merge[0], "double_click_interval_seconds", utils.get_double_click_interval_seconds, ) - logger.info(f"{double_click_interval=}") + logger.debug(f"{double_click_interval=}") press_to_press_t = {} press_to_release_t = {} prev_pressed_event = None @@ -770,7 +770,7 @@ def discard_unused_events( ] num_referred_events_after = len(referred_events) num_referred_events_removed = num_referred_events_before - num_referred_events_after - logger.info(f"{referred_timestamp_key=} {num_referred_events_removed=}") + logger.debug(f"{referred_timestamp_key=} {num_referred_events_removed=}") return referred_events diff --git a/openadapt/models.py b/openadapt/models.py index 54f26a7e7..9110de076 100644 --- a/openadapt/models.py +++ b/openadapt/models.py @@ -6,7 +6,6 @@ from loguru import logger from oa_pynput import keyboard from PIL import Image, ImageChops -import mss.base import numpy as np import sqlalchemy as sa @@ -368,6 +367,7 @@ class Screenshot(db.Base): png_data = sa.Column(sa.LargeBinary) png_diff_data = sa.Column(sa.LargeBinary, nullable=True) png_diff_mask_data = sa.Column(sa.LargeBinary, nullable=True) + # cropped_png_data = sa.Column(sa.LargeBinary, nullable=True) recording = sa.orm.relationship("Recording", back_populates="screenshots") action_event = sa.orm.relationship("ActionEvent", back_populates="screenshot") @@ -375,55 +375,58 @@ class Screenshot(db.Base): def __init__( self, *args: tuple, - sct_img: mss.base.ScreenShot | None = None, + image: Image.Image | None = None, **kwargs: dict, ) -> None: """Initialize.""" super().__init__(*args, **kwargs) self.initialize_instance_attributes() - self.sct_img = sct_img + self._image = image @sa.orm.reconstructor def initialize_instance_attributes(self) -> None: """Initialize attributes for both new and loaded objects.""" # TODO: convert to png_data on save - self.sct_img = None - # TODO: replace prev with prev_timestamp? self.prev = None self._image = None - self._image_history = [] + self._cropped_image = None self._diff = None self._diff_mask = None self._base64 = None @property - def image(self) -> Image: + def image(self) -> Image.Image: """Get the image associated with the screenshot.""" if not self._image: - if self.sct_img: - self._image = Image.frombytes( - "RGB", - self.sct_img.size, - self.sct_img.bgra, - "raw", - "BGRX", - ) - else: - self._image = self.convert_binary_to_png(self.png_data) + self._image = self.convert_binary_to_png(self.png_data) return self._image + @property + def cropped_image(self) -> Image.Image: + """Return screenshot image cropped to corresponding action's active window.""" + if not self._cropped_image: + # if events have been merged, the last event will be the parent, e.g. + # ipdb> [(action.name, action.timestamp) for action in self.action_event] + # [('move', 1714142176.1630979), ('click', 1714142174.4848516), + # ('singleclick', 1714142174.4537418)] + # TODO: verify (e.g. assert) + # TODO: rename action_event -> action_events? + action_event = self.action_event[-1] + self._cropped_image = self.crop_active_window(action_event) + # TODO: save? + # self.cropped_png_data = self.convert_png_to_binary(self._cropped_image) + return self._cropped_image + @property def base64(self) -> str: """Return data URI of JPEG encoded base64.""" if not self._base64: - from openadapt import utils - self._base64 = utils.image2utf8(self.image) return self._base64 @property - def diff(self) -> Image: + def diff(self) -> Image.Image: """Get the difference between the current and previous screenshot.""" if self.png_diff_data: return self.convert_binary_to_png(self.png_diff_data) @@ -433,7 +436,7 @@ def diff(self) -> Image: return self._diff @property - def diff_mask(self) -> Image: + def diff_mask(self) -> Image.Image: """Get the difference mask between the current and previous screenshot.""" if self.png_diff_mask_data: return self.convert_binary_to_png(self.png_diff_mask_data) @@ -450,18 +453,12 @@ def array(self) -> np.ndarray: @classmethod def take_screenshot(cls: "Screenshot") -> "Screenshot": """Capture a screenshot.""" - # avoid circular import - from openadapt import utils - - sct_img = utils.take_screenshot() - screenshot = Screenshot(sct_img=sct_img) + image = utils.take_screenshot() + screenshot = Screenshot(image=image) return screenshot def crop_active_window(self, action_event: ActionEvent) -> None: """Crop the screenshot to the active window defined by the action event.""" - # avoid circular import - from openadapt import utils - window_event = action_event.window_event width_ratio, height_ratio = utils.get_scale_ratios(action_event) @@ -471,17 +468,10 @@ def crop_active_window(self, action_event: ActionEvent) -> None: y1 = y0 + window_event.height * height_ratio box = (x0, y0, x1, y1) - self._image_history.append(self.image) - self._image = self._image.crop(box) - - @property - def original_image(self) -> Image: - """Get the original image (before any cropping).""" - if self._image_history: - return self._image_history[0] - return self.image + cropped_image = self._image.crop(box) + return cropped_image - def convert_binary_to_png(self, image_binary: bytes) -> Image: + def convert_binary_to_png(self, image_binary: bytes) -> Image.Image: """Convert a binary image to a PNG image. Args: @@ -493,7 +483,7 @@ def convert_binary_to_png(self, image_binary: bytes) -> Image: buffer = io.BytesIO(image_binary) return Image.open(buffer) - def convert_png_to_binary(self, image: Image) -> bytes: + def convert_png_to_binary(self, image: Image.Image) -> bytes: """Convert a PNG image to binary image data. Args: @@ -529,3 +519,7 @@ class MemoryStat(db.Base): recording_timestamp = sa.Column(sa.Integer) memory_usage_bytes = sa.Column(ForceFloat) timestamp = sa.Column(ForceFloat) + + +# avoid circular import +from openadapt import utils # noqa diff --git a/openadapt/strategies/visual.py b/openadapt/strategies/visual.py index fb37674c8..a4421b89c 100644 --- a/openadapt/strategies/visual.py +++ b/openadapt/strategies/visual.py @@ -47,12 +47,16 @@ from loguru import logger from PIL import Image, ImageDraw +from skimage.metrics import structural_similarity as ssim +import numpy as np from openadapt import adapters, common, models, strategies, utils, vision DEBUG = False DEBUG_REPLAY = False +SEGMENTATIONS = [] # TODO: store to db +MAX_SSIM = 0.9 # threshold for considering an image as similar @dataclass @@ -60,6 +64,7 @@ class Segmentation: """A data class to encapsulate segmentation data of images. Attributes: + image: The original image used to generate segments. masked_images: A list of PIL Image objects that have been masked based on segmentation. descriptions: Descriptions of each segmented region, correlating with each @@ -72,6 +77,7 @@ class Segmentation: centroid of each segmented region. """ + image: Image.Image masked_images: list[Image.Image] descriptions: list[str] bounding_boxes: list[dict[str, float]] # "top", "left", "height", "width" @@ -362,6 +368,78 @@ def get_active_segment( return active_index +def get_image_similarity(im1: Image.Image, im2: Image.Image) -> tuple[float, np.array]: + """Calculate the structural similarity index (SSIM) between two images. + + This function first resizes the images to a common size maintaining their aspect + ratios. It then converts the resized images to grayscale and computes the SSIM. + + Args: + im1 (Image.Image): The first image to compare. + im2 (Image.Image): The second image to compare. + + Returns: + tuple[float, np.array]: A tuple containing the SSIM and the difference image. + """ + # Calculate aspect ratios + aspect_ratio1 = im1.width / im1.height + aspect_ratio2 = im2.width / im2.height + # Use the smaller image as the base for resizing to maintain the aspect ratio + if aspect_ratio1 < aspect_ratio2: + base_width = min(im1.width, im2.width) + base_height = int(base_width / aspect_ratio1) + else: + base_height = min(im1.height, im2.height) + base_width = int(base_height * aspect_ratio2) + + # Resize images to a common base while maintaining aspect ratio + im1 = im1.resize((base_width, base_height), Image.LANCZOS) + im2 = im2.resize((base_width, base_height), Image.LANCZOS) + + # Convert images to grayscale + im1_gray = np.array(im1.convert("L")) + im2_gray = np.array(im2.convert("L")) + + data_range = im2_gray.max() - im2_gray.min() + mssim, diff_image = ssim(im1_gray, im2_gray, data_range=data_range, full=True) + + return mssim, diff_image + + +def find_similar_image_segmentation( + image: Image.Image, + max_ssim: float = MAX_SSIM, +) -> tuple[Segmentation, np.ndarray] | tuple[None, None]: + """Identify a similar image in the cache based on the SSIM comparison. + + This function iterates through a global list of image segmentations, + comparing each against a given image using the SSIM index calculated by + get_image_similarity. + It logs and updates the best match found above a specified SSIM threshold. + + Args: + image (Image.Image): The image to compare against the cache. + max_ssim (float): The minimum SSIM threshold for considering a match. + + Returns: + tuple[Segmentation, np.ndarray] | tuple[None, None]: The best matching + segmentation and its difference image if a match is found; + otherwise, None for both. + """ + similar_segmentation = None + similar_segmentation_diff = None + + for segmentation in SEGMENTATIONS: + similarity_index, ssim_image = get_image_similarity(image, segmentation.image) + if similarity_index > max_ssim: + logger.info(f"{similarity_index=}") + max_ssim = similarity_index + similar_segmentation = segmentation + similar_segmentation_diff = ssim_image + + return similar_segmentation, similar_segmentation_diff + + def get_window_segmentation( action_event: models.ActionEvent, exceptions: list[Exception] | None = None, @@ -373,23 +451,35 @@ def get_window_segmentation( exceptions: list of exceptions previously raised, added to prompt. Returns: - Segmnetation object containing detailed segmentation information. + Segmentation object containing detailed segmentation information. """ screenshot = action_event.screenshot - screenshot.crop_active_window(action_event) - original_image = screenshot.image + original_image = screenshot.cropped_image if DEBUG: original_image.show() + segmentation_adapter = adapters.get_default_segmentation_adapter() segmented_image = segmentation_adapter.fetch_segmented_image(original_image) if DEBUG: segmented_image.show() + + similar_segmentation, similar_segmentation_diff = find_similar_image_segmentation( + original_image, + ) + if similar_segmentation: + # TODO XXX: create copy of similar_segmentation, but overwrite with segments of + # regions of new image where segments of similar_segmentation overlap non-zero + # regions of similar_segmentation_diff + return similar_segmentation + masks = vision.process_image_for_masks(segmented_image) if DEBUG: vision.display_binary_images_grid(masks) + refined_masks = vision.refine_masks(masks) if DEBUG: vision.display_binary_images_grid(refined_masks) + masked_images = vision.extract_masked_images(original_image, refined_masks) original_image_base64 = screenshot.base64 @@ -408,9 +498,17 @@ def get_window_segmentation( len(descriptions), len(centroids), ) - segmentation = Segmentation(masked_images, descriptions, bounding_boxes, centroids) + segmentation = Segmentation( + original_image, + masked_images, + descriptions, + bounding_boxes, + centroids, + ) if DEBUG: vision.display_images_table_with_titles(masked_images, descriptions) + + SEGMENTATIONS.append(segmentation) return segmentation diff --git a/openadapt/utils.py b/openadapt/utils.py index aa40c574a..5380d2def 100644 --- a/openadapt/utils.py +++ b/openadapt/utils.py @@ -479,7 +479,7 @@ def get_scale_ratios(action_event: ActionEvent) -> tuple[float, float]: float: The height ratio. """ recording = action_event.recording - image = action_event.screenshot.original_image + image = action_event.screenshot.image width_ratio = image.width / recording.monitor_width height_ratio = image.height / recording.monitor_height return width_ratio, height_ratio diff --git a/poetry.lock b/poetry.lock index 78db08024..91d5d3a6c 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,4 +1,4 @@ -# This file is automatically @generated by Poetry 1.8.2 and should not be changed by hand. +# This file is automatically @generated by Poetry 1.5.1 and should not be changed by hand. [[package]] name = "aiofiles" @@ -2259,6 +2259,23 @@ files = [ {file = "ifaddr-0.2.0.tar.gz", hash = "sha256:cc0cbfcaabf765d44595825fb96a99bb12c79716b73b44330ea38ee2b0c4aed4"}, ] +[[package]] +name = "imagehash" +version = "4.3.1" +description = "Image Hashing library" +optional = false +python-versions = "*" +files = [ + {file = "ImageHash-4.3.1-py2.py3-none-any.whl", hash = "sha256:5ad9a5cde14fe255745a8245677293ac0d67f09c330986a351f34b614ba62fb5"}, + {file = "ImageHash-4.3.1.tar.gz", hash = "sha256:7038d1b7f9e0585beb3dd8c0a956f02b95a346c0b5f24a9e8cc03ebadaf0aa70"}, +] + +[package.dependencies] +numpy = "*" +pillow = "*" +PyWavelets = "*" +scipy = "*" + [[package]] name = "imageio" version = "2.34.0" @@ -3195,7 +3212,10 @@ files = [ decorator = ">=4.0.2,<5.0" imageio = {version = ">=2.5,<3.0", markers = "python_version >= \"3.4\""} imageio_ffmpeg = {version = ">=0.2.0", markers = "python_version >= \"3.4\""} -numpy = {version = ">=1.17.3", markers = "python_version > \"2.7\""} +numpy = [ + {version = ">=1.17.3", markers = "python_version != \"2.7\""}, + {version = "*", markers = "python_version >= \"2.7\""}, +] proglog = "<=1.0.0" requests = ">=2.8.1,<3.0" tqdm = ">=4.11.2,<5.0" @@ -3680,8 +3700,11 @@ files = [ [package.dependencies] numpy = [ + {version = ">=1.21.2", markers = "python_version >= \"3.10\""}, {version = ">=1.21.4", markers = "python_version >= \"3.10\" and platform_system == \"Darwin\""}, - {version = ">=1.21.2", markers = "platform_system != \"Darwin\" and python_version >= \"3.10\""}, + {version = ">=1.19.3", markers = "python_version >= \"3.6\" and platform_system == \"Linux\" and platform_machine == \"aarch64\" or python_version >= \"3.9\""}, + {version = ">=1.17.0", markers = "python_version >= \"3.7\""}, + {version = ">=1.17.3", markers = "python_version >= \"3.8\""}, ] [[package]] @@ -3702,8 +3725,11 @@ files = [ [package.dependencies] numpy = [ + {version = ">=1.21.2", markers = "python_version >= \"3.10\""}, {version = ">=1.21.4", markers = "python_version >= \"3.10\" and platform_system == \"Darwin\""}, - {version = ">=1.21.2", markers = "platform_system != \"Darwin\" and python_version >= \"3.10\""}, + {version = ">=1.19.3", markers = "python_version >= \"3.6\" and platform_system == \"Linux\" and platform_machine == \"aarch64\" or python_version >= \"3.9\""}, + {version = ">=1.17.0", markers = "python_version >= \"3.7\""}, + {version = ">=1.17.3", markers = "python_version >= \"3.8\""}, ] [[package]] @@ -5320,6 +5346,51 @@ files = [ {file = "pytz-2024.1.tar.gz", hash = "sha256:2a29735ea9c18baf14b448846bde5a48030ed267578472d8955cd0e7443a9812"}, ] +[[package]] +name = "pywavelets" +version = "1.6.0" +description = "PyWavelets, wavelet transform module" +optional = false +python-versions = ">=3.9" +files = [ + {file = "pywavelets-1.6.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:ddc1ff5ad706313d930f857f9656f565dfb81b85bbe58a9db16ad8fa7d1537c5"}, + {file = "pywavelets-1.6.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:78feab4e0c25fa32034b6b64cb854c6ce15663b4f0ffb25d8f0ee58915300f9b"}, + {file = "pywavelets-1.6.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:be36f08efe9bc3abf40cf40cd2ee0aa0db26e4894e13ce5ac178442864161e8c"}, + {file = "pywavelets-1.6.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0595c51472c9c5724fe087cb73e2797053fd25c788d6553fdad6ff61abc60e91"}, + {file = "pywavelets-1.6.0-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:058a750477dde633ac53b8806f835af3559d52db6532fb2b93c1f4b5441365b8"}, + {file = "pywavelets-1.6.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:538795d9c4181152b414285b5a7f72ac52581ecdcdce74b6cca3fa0b8a5ab0aa"}, + {file = "pywavelets-1.6.0-cp310-cp310-win32.whl", hash = "sha256:47de024ba4f9df97e98b5f540340e1a9edd82d2c477450bef8c9b5381487128e"}, + {file = "pywavelets-1.6.0-cp310-cp310-win_amd64.whl", hash = "sha256:e2c44760c0906ddf2176920a2613287f6eea947f166ce7eee9546081b06a6835"}, + {file = "pywavelets-1.6.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:d91aaaf6de53b758bcdc96c81cdb5a8607758602be49f691188c0e108cf1e738"}, + {file = "pywavelets-1.6.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:3b5302edb6d1d1ff6636d37c9ff29c4892f2a3648d736cc1df01f3f36e25c8cf"}, + {file = "pywavelets-1.6.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c5e655446e37a3c87213d5c6386b86f65c4d61736b4432d720171e7dd6523d6a"}, + {file = "pywavelets-1.6.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9ec7d69b746a0eaa327b829a3252a63619f2345e263177be5dd9bf30d7933c8d"}, + {file = "pywavelets-1.6.0-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:97ea9613bd6b7108ebb44b709060adc7e2d5fac73be7152342bdd5513d75f84e"}, + {file = "pywavelets-1.6.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:48b3813c6d1a7a8194f37dbb5dbbdf2fe1112152c91445ea2e54f64ff6350c36"}, + {file = "pywavelets-1.6.0-cp311-cp311-win32.whl", hash = "sha256:4ffb484d096a5eb10af7121e0203546a03e1369328df321a33ef91f67bac40cf"}, + {file = "pywavelets-1.6.0-cp311-cp311-win_amd64.whl", hash = "sha256:274bc47b289585383aa65519b3fcae5b4dee5e31db3d4198d4fad701a70e59f7"}, + {file = "pywavelets-1.6.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:d6ec113386a432e04103f95e351d2657b42145bd1e1ed26513423391bcb5f011"}, + {file = "pywavelets-1.6.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:ab652112d3932d21f020e281e06926a751354c2b5629fb716f5eb9d0104b84e5"}, + {file = "pywavelets-1.6.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:47b0314a22616c5f3f08760f0e00b4a15b7c7dadca5e39bb701cf7869a4207c5"}, + {file = "pywavelets-1.6.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:138471513bc0a4cd2ddc4e50c7ec04e3468c268e101a0d02f698f6aedd1d5e79"}, + {file = "pywavelets-1.6.0-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:67936491ae3e5f957c428e34fdaed21f131535b8d60c7c729a1b539ce8864837"}, + {file = "pywavelets-1.6.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:dd798cee3d28fb3d32a26a00d9831a20bf316c36d685e4ced01b4e4a8f36f5ce"}, + {file = "pywavelets-1.6.0-cp312-cp312-win32.whl", hash = "sha256:e772f7f0c16bfc3be8ac3cd10d29a9920bb7a39781358856223c491b899e6e79"}, + {file = "pywavelets-1.6.0-cp312-cp312-win_amd64.whl", hash = "sha256:4ef15a63a72afa67ae9f4f3b06c95c5382730fb3075e668d49a880e65f2f089c"}, + {file = "pywavelets-1.6.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:627df378e63e9c789b6f2e7060cb4264ebae6f6b0efc1da287a2c060de454a1f"}, + {file = "pywavelets-1.6.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:a413b51dc19e05243fe0b0864a8e8a16b5ca9bf2e4713da00a95b1b5747a5367"}, + {file = "pywavelets-1.6.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:be615c6c1873e189c265d4a76d1751ec49b17e29725e6dd2e9c74f1868f590b7"}, + {file = "pywavelets-1.6.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4021ef69ec9f3862f66580fc4417be728bd78722914394594b48212fd1fcaf21"}, + {file = "pywavelets-1.6.0-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:8fbf7b61b28b5457693c034e58a01622756d1fd60a80ae13ac5888b1d3e57e80"}, + {file = "pywavelets-1.6.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:f58ddbb0a6cd243928876edfc463b990763a24fb94498607d6fea690e32cca4c"}, + {file = "pywavelets-1.6.0-cp39-cp39-win32.whl", hash = "sha256:42a22e68e345b6de7d387ef752111ab4530c98048d2b4bdac8ceefb078b4ead6"}, + {file = "pywavelets-1.6.0-cp39-cp39-win_amd64.whl", hash = "sha256:32198de321892743c1a3d1957fe1cd8a8ecc078bfbba6b8f3982518e897271d7"}, + {file = "pywavelets-1.6.0.tar.gz", hash = "sha256:ea027c70977122c5fc27b2510f0a0d9528f9c3df6ea3e4c577ca55fd00325a5b"}, +] + +[package.dependencies] +numpy = ">=1.22.4,<3" + [[package]] name = "pywebview" version = "4.4.1" @@ -5424,7 +5495,6 @@ files = [ {file = "PyYAML-6.0.1-cp311-cp311-win_amd64.whl", hash = "sha256:bf07ee2fef7014951eeb99f56f39c9bb4af143d8aa3c21b1677805985307da34"}, {file = "PyYAML-6.0.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:855fb52b0dc35af121542a76b9a84f8d1cd886ea97c84703eaa6d88e37a2ad28"}, {file = "PyYAML-6.0.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:40df9b996c2b73138957fe23a16a4f0ba614f4c0efce1e9406a184b6d07fa3a9"}, - {file = "PyYAML-6.0.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a08c6f0fe150303c1c6b71ebcd7213c2858041a7e01975da3a99aed1e7a378ef"}, {file = "PyYAML-6.0.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6c22bec3fbe2524cde73d7ada88f6566758a8f7227bfbf93a408a9d86bcc12a0"}, {file = "PyYAML-6.0.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:8d4e9c88387b0f5c7d5f281e55304de64cf7f9c0021a3525bd3b1c542da3b0e4"}, {file = "PyYAML-6.0.1-cp312-cp312-win32.whl", hash = "sha256:d483d2cdf104e7c9fa60c544d92981f12ad66a457afae824d146093b8c294c54"}, @@ -6631,7 +6701,7 @@ files = [ ] [package.dependencies] -greenlet = {version = "!=0.4.17", markers = "python_version >= \"3\" and (platform_machine == \"aarch64\" or platform_machine == \"ppc64le\" or platform_machine == \"x86_64\" or platform_machine == \"amd64\" or platform_machine == \"AMD64\" or platform_machine == \"win32\" or platform_machine == \"WIN32\")"} +greenlet = {version = "!=0.4.17", markers = "python_version >= \"3\" and (platform_machine == \"win32\" or platform_machine == \"WIN32\" or platform_machine == \"AMD64\" or platform_machine == \"amd64\" or platform_machine == \"x86_64\" or platform_machine == \"ppc64le\" or platform_machine == \"aarch64\")"} [package.extras] aiomysql = ["aiomysql", "greenlet (!=0.4.17)"] @@ -7968,4 +8038,4 @@ testing = ["coverage (>=5.0.3)", "zope.event", "zope.testing"] [metadata] lock-version = "2.0" python-versions = "3.10.x" -content-hash = "c89aabf0d879d19b441855c57f81dbe688f69547969ec1c2655a0d92c866a029" +content-hash = "890771696b49b9e034aa81375222ac315c7e97bb2ebf99d87d296b5cc01f70da" diff --git a/pyproject.toml b/pyproject.toml index d48d4b2a3..078729527 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -97,6 +97,7 @@ gradio-client = "0.15.0" google-generativeai = "^0.5.0" uvicorn = {extras = ["standard"], version = "^0.22"} ultralytics = "^8.1.47" +imagehash = "^4.3.1" [tool.pytest.ini_options] filterwarnings = [ diff --git a/tests/openadapt/test_crop.py b/tests/openadapt/test_crop.py index 7ce80b179..31f7cd12d 100644 --- a/tests/openadapt/test_crop.py +++ b/tests/openadapt/test_crop.py @@ -41,9 +41,9 @@ def test_crop_active_window() -> None: original_size = screenshot._image.size # Perform the cropping operation - screenshot.crop_active_window(action_event=action_event_mock) + cropped_image = screenshot.crop_active_window(action_event=action_event_mock) # Verify that the image size has been reduced - assert (screenshot._image.size[0] < original_size[0]) or ( - screenshot._image.size[1] < original_size[1] + assert (cropped_image.size[0] < original_size[0]) or ( + cropped_image.size[1] < original_size[1] )