From 91213050cddbcf6f840cb1f79cea06c15809b06c Mon Sep 17 00:00:00 2001 From: Richard Abrich Date: Sun, 6 Apr 2025 00:54:04 -0400 Subject: [PATCH 1/4] feat: Implement tracker matching logic using scipy Replaces the placeholder _match_elements method in SimpleElementTracker with an implementation based on scipy.optimize.linear_sum_assignment. - Matches elements based on type and squared Euclidean distance between centers. - Applies configured distance threshold and type constraints to the cost matrix. - Basic unit tests in test_tracking.py pass with this implementation. Completes the core logic for SimpleElementTracker as specified in Issue #8. --- omnimcp/tracking.py | 169 ++++++++++++++------------------------------ 1 file changed, 54 insertions(+), 115 deletions(-) diff --git a/omnimcp/tracking.py b/omnimcp/tracking.py index ebcac2a..fe5f99f 100644 --- a/omnimcp/tracking.py +++ b/omnimcp/tracking.py @@ -1,53 +1,30 @@ # omnimcp/tracking.py from typing import List, Dict, Optional, Tuple -# Use typing_extensions for Self if needed for older Python versions -# from typing_extensions import Self - -# Added Scipy for matching import numpy as np - -try: - from scipy.optimize import linear_sum_assignment - from scipy.spatial.distance import cdist - - SCIPY_AVAILABLE = True -except ImportError: - SCIPY_AVAILABLE = False - # Fallback or warning needed if scipy is critical - import warnings - - warnings.warn( - "Scipy not found. Tracking matching will be disabled or use a fallback." - ) - +from scipy.optimize import linear_sum_assignment +from scipy.spatial.distance import cdist +from loguru import logger # Assuming UIElement and ElementTrack are defined in omnimcp.types try: from omnimcp.types import UIElement, ElementTrack, Bounds except ImportError: - print("Warning: Could not import types from omnimcp.types") + # This allows standalone testing/linting but relies on installation for runtime + logger.warning( + "Could not import types directly from omnimcp.types. Relying on installed package." + ) UIElement = dict # type: ignore ElementTrack = dict # type: ignore Bounds = tuple # type: ignore -# Assuming logger is setup elsewhere and accessible, or use standard logging -# from omnimcp.utils import logger -import logging - -logger = logging.getLogger(__name__) - -# Helper Function (can stay here or move to utils) def _get_bounds_center(bounds: Bounds) -> Optional[Tuple[float, float]]: - """Calculate the center (relative coords) of a bounding box.""" + """Calculate the center (relative coords 0.0-1.0) of a bounding box.""" if not isinstance(bounds, (list, tuple)) or len(bounds) != 4: - logger.warning( - f"Invalid bounds format received: {bounds}. Cannot calculate center." - ) + logger.warning(f"Invalid bounds format: {bounds}. Cannot calculate center.") return None x, y, w, h = bounds - # Ensure w and h are non-negative if w < 0 or h < 0: logger.warning( f"Invalid bounds dimensions (w={w}, h={h}). Cannot calculate center." @@ -58,130 +35,90 @@ def _get_bounds_center(bounds: Bounds) -> Optional[Tuple[float, float]]: class SimpleElementTracker: """ - Basic element tracking across frames based on type and proximity using optimal assignment. + Tracks UI elements across frames using optimal assignment based on type and proximity. Assigns persistent track_ids. """ - def __init__( - self, miss_threshold: int = 3, matching_threshold: float = 0.1 - ): # Increased threshold slightly + def __init__(self, miss_threshold: int = 3, matching_threshold: float = 0.1): """ Args: - miss_threshold: How many consecutive misses before pruning a track. + miss_threshold: Number of consecutive misses before pruning a track. matching_threshold: Relative distance threshold for matching centers. """ - if not SCIPY_AVAILABLE: - # Optionally raise an error or disable tracking features - logger.error( - "Scipy is required for SimpleElementTracker matching logic but not installed." - ) - # raise ImportError("Scipy is required for SimpleElementTracker") self.tracked_elements: Dict[str, ElementTrack] = {} # track_id -> ElementTrack self.next_track_id_counter: int = 0 self.miss_threshold = miss_threshold - # Store squared threshold for efficiency - self.match_threshold_sq = matching_threshold**2 + self.match_threshold_sq = ( + matching_threshold**2 + ) # Use squared distance for efficiency logger.info( f"SimpleElementTracker initialized (miss_thresh={miss_threshold}, match_dist_sq={self.match_threshold_sq:.4f})." ) def _generate_track_id(self) -> str: - """Generates a unique track ID.""" + """Generates a unique, sequential track ID.""" track_id = f"track_{self.next_track_id_counter}" self.next_track_id_counter += 1 return track_id def _match_elements(self, current_elements: List[UIElement]) -> Dict[int, str]: """ - Performs optimal assignment matching between current elements and active tracks. + Performs optimal assignment matching between current elements and active tracks + using the Hungarian algorithm (linear_sum_assignment). Args: current_elements: List of UIElements detected in the current frame. Returns: - Dict[int, str]: A mapping from current_element.id to matched track_id. - Only includes elements that were successfully matched. + Dict[int, str]: Mapping from current_element.id to matched track_id. """ - if not SCIPY_AVAILABLE: - logger.warning("Scipy not available, skipping matching.") - return {} - if not current_elements or not self.tracked_elements: - return {} # Nothing to match - - # --- Prepare Data for Matching --- + # Filter out invalid elements and prepare data for matching + valid_current_elements = [ + el for el in current_elements if _get_bounds_center(el.bounds) is not None + ] active_tracks = [ track for track in self.tracked_elements.values() - if track.latest_element is not None # Only match tracks currently visible + if track.latest_element is not None + and _get_bounds_center(track.latest_element.bounds) is not None ] - if not active_tracks: - return {} # No active tracks to match against - # current_element_map = {el.id: el for el in current_elements} - # track_map = {track.track_id: track for track in active_tracks} + if not valid_current_elements or not active_tracks: + logger.debug("No valid current elements or active tracks to match.") + return {} - # Get centers and types for cost calculation + current_ids = [el.id for el in valid_current_elements] + current_types = [el.type for el in valid_current_elements] current_centers = np.array( - [ - _get_bounds_center(el.bounds) - for el in current_elements - if _get_bounds_center(el.bounds) is not None # Filter invalid bounds - ] + [_get_bounds_center(el.bounds) for el in valid_current_elements] ) - current_types = [ - el.type - for el in current_elements - if _get_bounds_center(el.bounds) is not None - ] - current_ids_valid = [ - el.id - for el in current_elements - if _get_bounds_center(el.bounds) is not None - ] - track_centers = np.array( - [ - _get_bounds_center(track.latest_element.bounds) - for track in active_tracks - if track.latest_element - and _get_bounds_center(track.latest_element.bounds) is not None - ] - ) + track_ids = [track.track_id for track in active_tracks] track_types = [ - track.latest_element.type - for track in active_tracks - if track.latest_element - and _get_bounds_center(track.latest_element.bounds) is not None - ] - track_ids_valid = [ - track.track_id - for track in active_tracks - if track.latest_element - and _get_bounds_center(track.latest_element.bounds) is not None - ] - - if current_centers.size == 0 or track_centers.size == 0: - logger.debug("No valid centers for matching.") - return {} # Cannot match if no valid centers + track.latest_element.type for track in active_tracks + ] # Assumes latest_element is not None + track_centers = np.array( + [_get_bounds_center(track.latest_element.bounds) for track in active_tracks] + ) # Assumes latest_element is not None - # --- Calculate Cost Matrix (Squared Euclidean Distance) --- - # Cost matrix: rows = current elements, cols = active tracks + # Calculate Cost Matrix (Squared Euclidean Distance) + # Rows: current elements, Cols: active tracks cost_matrix = cdist(current_centers, track_centers, metric="sqeuclidean") - # --- Apply Constraints (Type Mismatch & Distance Threshold) --- - infinity_cost = float("inf") + # Apply Constraints (Type Mismatch & Distance Threshold) + infinity_cost = 1e8 # Use a large number num_current, num_tracks = cost_matrix.shape for i in range(num_current): for j in range(num_tracks): - # Infinite cost if types don't match + # High cost if types don't match if current_types[i] != track_types[j]: cost_matrix[i, j] = infinity_cost - # Infinite cost if distance exceeds threshold + # High cost if distance exceeds threshold elif cost_matrix[i, j] > self.match_threshold_sq: cost_matrix[i, j] = infinity_cost - # --- Optimal Assignment using Hungarian Algorithm --- + # Optimal Assignment using Hungarian Algorithm try: row_ind, col_ind = linear_sum_assignment(cost_matrix) except ValueError as e: @@ -190,18 +127,20 @@ def _match_elements(self, current_elements: List[UIElement]) -> Dict[int, str]: ) return {} - # --- Create Mapping from Valid Assignments --- + # Create Mapping from Valid Assignments assignment_mapping: Dict[int, str] = {} # current_element_id -> track_id valid_matches_count = 0 for r, c in zip(row_ind, col_ind): - # Check if the assignment cost is valid (not infinity) + # Check if the assignment cost is valid (below infinity_cost) if cost_matrix[r, c] < infinity_cost: - current_element_id = current_ids_valid[r] - track_id = track_ids_valid[c] + current_element_id = current_ids[r] + track_id = track_ids[c] assignment_mapping[current_element_id] = track_id valid_matches_count += 1 - logger.debug(f"Matching: Found {valid_matches_count} valid assignments.") + logger.debug( + f"Matching: Found {valid_matches_count} valid assignments using linear_sum_assignment." + ) return assignment_mapping def update( @@ -248,7 +187,7 @@ def update( track.consecutive_misses = 0 track.last_seen_frame = frame_number else: - # Match found in assignment but element missing from map (should not happen ideally) + # Defensive coding for edge cases where mapping might be inconsistent logger.warning( f"Track {track_id} matched but element ID {matched_elem_id} not found in current_element_map. Treating as miss." ) @@ -273,8 +212,8 @@ def update( # Prune tracks marked for deletion for track_id in tracks_to_prune: logger.debug( - f"Pruning track {track_id} after {self.tracked_elements[track_id].consecutive_misses} misses." - ) + f"Pruning track {track_id} after {self.tracked_elements.get(track_id, ElementTrack(track_id=track_id)).consecutive_misses} misses." + ) # Safely access misses if track_id in self.tracked_elements: del self.tracked_elements[track_id] From 5d556cc36336979e65db04d333d3849134c5eed2 Mon Sep 17 00:00:00 2001 From: Richard Abrich Date: Sun, 6 Apr 2025 01:00:31 -0400 Subject: [PATCH 2/4] refactor(tracking): Add size similarity check to matching logic Enhances the `_match_elements` method in `SimpleElementTracker` to improve matching robustness. - Incorporates a check for relative width and height differences between elements, using a configurable `size_rel_threshold`. This adds size similarity as a constraint alongside type and proximity. - Cost matrix for `linear_sum_assignment` now penalizes matches where elements differ significantly in size. - Removes defensive try/except blocks for `scipy`/`types` imports, assuming dependencies are met. Basic unit tests in `test_tracking.py` continue to pass with this refined matching logic. --- omnimcp/tracking.py | 136 +++++++++++++++++++++++++++----------------- 1 file changed, 83 insertions(+), 53 deletions(-) diff --git a/omnimcp/tracking.py b/omnimcp/tracking.py index fe5f99f..2270dcc 100644 --- a/omnimcp/tracking.py +++ b/omnimcp/tracking.py @@ -1,4 +1,5 @@ # omnimcp/tracking.py + from typing import List, Dict, Optional, Tuple import numpy as np @@ -6,17 +7,7 @@ from scipy.spatial.distance import cdist from loguru import logger -# Assuming UIElement and ElementTrack are defined in omnimcp.types -try: - from omnimcp.types import UIElement, ElementTrack, Bounds -except ImportError: - # This allows standalone testing/linting but relies on installation for runtime - logger.warning( - "Could not import types directly from omnimcp.types. Relying on installed package." - ) - UIElement = dict # type: ignore - ElementTrack = dict # type: ignore - Bounds = tuple # type: ignore +from omnimcp.types import UIElement, ElementTrack, Bounds def _get_bounds_center(bounds: Bounds) -> Optional[Tuple[float, float]]: @@ -35,24 +26,31 @@ def _get_bounds_center(bounds: Bounds) -> Optional[Tuple[float, float]]: class SimpleElementTracker: """ - Tracks UI elements across frames using optimal assignment based on type and proximity. - Assigns persistent track_ids. + Tracks UI elements across frames using optimal assignment based on type, + center proximity, and size similarity. Assigns persistent track_ids. """ - def __init__(self, miss_threshold: int = 3, matching_threshold: float = 0.1): + def __init__( + self, + miss_threshold: int = 3, + matching_threshold: float = 0.1, + size_rel_threshold: float = 0.3, + ): """ Args: miss_threshold: Number of consecutive misses before pruning a track. matching_threshold: Relative distance threshold for matching centers. + size_rel_threshold: Relative size difference threshold for width/height. """ self.tracked_elements: Dict[str, ElementTrack] = {} # track_id -> ElementTrack self.next_track_id_counter: int = 0 self.miss_threshold = miss_threshold - self.match_threshold_sq = ( - matching_threshold**2 - ) # Use squared distance for efficiency + self.match_threshold_sq = matching_threshold**2 # Use squared distance + self.size_rel_threshold = size_rel_threshold logger.info( - f"SimpleElementTracker initialized (miss_thresh={miss_threshold}, match_dist_sq={self.match_threshold_sq:.4f})." + f"SimpleElementTracker initialized (miss_thresh={miss_threshold}, " + f"match_dist_sq={self.match_threshold_sq:.4f}, " + f"size_rel_thresh={self.size_rel_threshold:.2f})." ) def _generate_track_id(self) -> str: @@ -72,51 +70,81 @@ def _match_elements(self, current_elements: List[UIElement]) -> Dict[int, str]: Returns: Dict[int, str]: Mapping from current_element.id to matched track_id. """ - # Filter out invalid elements and prepare data for matching - valid_current_elements = [ - el for el in current_elements if _get_bounds_center(el.bounds) is not None - ] - active_tracks = [ - track - for track in self.tracked_elements.values() - if track.latest_element is not None - and _get_bounds_center(track.latest_element.bounds) is not None - ] + # Filter out elements with invalid bounds and prepare data + valid_current_elements = [] + current_centers_list = [] + for el in current_elements: + center = _get_bounds_center(el.bounds) + if center is not None: + valid_current_elements.append(el) + current_centers_list.append(center) + + active_tracks = [] + track_centers_list = [] + for track in self.tracked_elements.values(): + if track.latest_element: # Check if track has a valid last known element + center = _get_bounds_center(track.latest_element.bounds) + if center is not None: + active_tracks.append(track) + track_centers_list.append(center) if not valid_current_elements or not active_tracks: logger.debug("No valid current elements or active tracks to match.") return {} + # Extract properties for cost calculation current_ids = [el.id for el in valid_current_elements] current_types = [el.type for el in valid_current_elements] - current_centers = np.array( - [_get_bounds_center(el.bounds) for el in valid_current_elements] - ) + current_bounds_list = [el.bounds for el in valid_current_elements] + current_centers = np.array(current_centers_list) track_ids = [track.track_id for track in active_tracks] track_types = [ track.latest_element.type for track in active_tracks - ] # Assumes latest_element is not None - track_centers = np.array( - [_get_bounds_center(track.latest_element.bounds) for track in active_tracks] - ) # Assumes latest_element is not None + ] # Safe due to filtering above + track_bounds_list = [ + track.latest_element.bounds for track in active_tracks + ] # Safe due to filtering above + track_centers = np.array(track_centers_list) # Calculate Cost Matrix (Squared Euclidean Distance) - # Rows: current elements, Cols: active tracks cost_matrix = cdist(current_centers, track_centers, metric="sqeuclidean") - # Apply Constraints (Type Mismatch & Distance Threshold) - infinity_cost = 1e8 # Use a large number + # Apply Constraints (Type Mismatch, Distance Threshold, Size Threshold) + infinity_cost = 1e8 # Use a large number for invalid assignments num_current, num_tracks = cost_matrix.shape + epsilon = 1e-6 # Avoid division by zero for i in range(num_current): for j in range(num_tracks): - # High cost if types don't match + # --- Type Constraint --- if current_types[i] != track_types[j]: cost_matrix[i, j] = infinity_cost - # High cost if distance exceeds threshold - elif cost_matrix[i, j] > self.match_threshold_sq: + continue + + # --- Distance Constraint --- + # Check if distance cost already exceeds threshold (slightly redundant but explicit) + if cost_matrix[i, j] > self.match_threshold_sq: cost_matrix[i, j] = infinity_cost + continue + + # --- Size Constraint --- + curr_w, curr_h = current_bounds_list[i][2], current_bounds_list[i][3] + track_w, track_h = track_bounds_list[j][2], track_bounds_list[j][3] + + # Use max dimensions for relative comparison denominator + max_w = max(curr_w, track_w, epsilon) + max_h = max(curr_h, track_h, epsilon) + + rel_width_diff = abs(curr_w - track_w) / max_w + rel_height_diff = abs(curr_h - track_h) / max_h + + if ( + rel_width_diff > self.size_rel_threshold + or rel_height_diff > self.size_rel_threshold + ): + cost_matrix[i, j] = infinity_cost + continue # Element size differs too much # Optimal Assignment using Hungarian Algorithm try: @@ -131,7 +159,7 @@ def _match_elements(self, current_elements: List[UIElement]) -> Dict[int, str]: assignment_mapping: Dict[int, str] = {} # current_element_id -> track_id valid_matches_count = 0 for r, c in zip(row_ind, col_ind): - # Check if the assignment cost is valid (below infinity_cost) + # Check if the assignment cost is valid if cost_matrix[r, c] < infinity_cost: current_element_id = current_ids[r] track_id = track_ids[c] @@ -148,19 +176,17 @@ def update( ) -> List[ElementTrack]: """ Updates tracks based on current detections using optimal assignment matching. + Includes logic for handling misses and pruning tracks. Args: current_elements: List of UIElements detected in the current frame. frame_number: The current step/frame number. Returns: - A list of all currently active ElementTrack objects (including missed ones). + A list of all currently active ElementTrack objects. """ current_element_map = {el.id: el for el in current_elements} - - # Get the mapping: current_element_id -> track_id assignment_mapping = self._match_elements(current_elements) - matched_current_element_ids = set(assignment_mapping.keys()) matched_track_ids = set(assignment_mapping.values()) @@ -187,7 +213,7 @@ def update( track.consecutive_misses = 0 track.last_seen_frame = frame_number else: - # Defensive coding for edge cases where mapping might be inconsistent + # Defensive coding for edge cases logger.warning( f"Track {track_id} matched but element ID {matched_elem_id} not found in current_element_map. Treating as miss." ) @@ -210,14 +236,18 @@ def update( tracks_to_prune.append(track_id) # Prune tracks marked for deletion + pruned_count = 0 for track_id in tracks_to_prune: - logger.debug( - f"Pruning track {track_id} after {self.tracked_elements.get(track_id, ElementTrack(track_id=track_id)).consecutive_misses} misses." - ) # Safely access misses if track_id in self.tracked_elements: + misses = self.tracked_elements[track_id].consecutive_misses del self.tracked_elements[track_id] + logger.debug(f"Pruning track {track_id} after {misses} misses.") + pruned_count += 1 + if pruned_count > 0: + logger.info(f"Pruned {pruned_count} tracks.") # Add tracks for new, unmatched elements + new_count = 0 for element_id, element in current_element_map.items(): if element_id not in matched_current_element_ids: # Ensure element has valid bounds before creating track @@ -235,9 +265,9 @@ def update( last_seen_frame=frame_number, ) self.tracked_elements[new_track_id] = new_track - logger.debug( - f"Created new track {new_track_id} for element ID {element_id}" - ) + new_count += 1 + if new_count > 0: + logger.debug(f"Created {new_count} new tracks.") # Return the current list of all tracked elements' state return list(self.tracked_elements.values()) From 4d01293f218e3c2d6f25ffcb67002bbc89c8428b Mon Sep 17 00:00:00 2001 From: Richard Abrich Date: Sun, 6 Apr 2025 02:16:28 -0400 Subject: [PATCH 3/4] test(core): Fix planner mocks and prompt assertions - Updates mocks in test_core.py to return the correct LLMAnalysisAndDecision structure expected by the refactored plan_action_for_ui. - Corrects assertion strings checking for prompt headings to exactly match the PROMPT_TEMPLATE content. Resolves test failures introduced during planner refactoring. All tests now pass. --- omnimcp/agent_executor.py | 696 ++++++++++++++++++++++++++--------- omnimcp/core.py | 265 ++++++++----- omnimcp/types.py | 11 + omnimcp/visual_state.py | 324 ++++++++++------ tests/test_agent_executor.py | 374 ++++++++++++------- tests/test_core.py | 201 +++++++--- 6 files changed, 1306 insertions(+), 565 deletions(-) diff --git a/omnimcp/agent_executor.py b/omnimcp/agent_executor.py index c8716b5..42866eb 100644 --- a/omnimcp/agent_executor.py +++ b/omnimcp/agent_executor.py @@ -3,52 +3,85 @@ import datetime import os import time -from typing import Callable, List, Optional, Tuple, Protocol, Dict +from typing import Callable, List, Optional, Tuple, Protocol, Dict, Any +import json from PIL import Image - +from loguru import logger # Use loguru from omnimcp import config, setup_run_logging -from omnimcp.types import LLMActionPlan, UIElement + +# Import necessary types from omnimcp.types +from omnimcp.types import ( + LLMActionPlan, + UIElement, + ElementTrack, + LoggedStep, + ScreenAnalysis, + ActionDecision, # Placeholders for future use/logging +) + +# SimpleElementTracker is used within VisualState, not directly here +# from omnimcp.tracking import SimpleElementTracker from omnimcp.utils import ( denormalize_coordinates, draw_action_highlight, draw_bounding_boxes, get_scaling_factor, - logger, - take_screenshot, + take_screenshot, # Keep for final screenshot ) +# --- Interface Definitions --- + class PerceptionInterface(Protocol): - elements: List[UIElement] + """Defines the expected interface for the perception component.""" + + elements: List[UIElement] # Current raw elements from parser + tracked_elements_view: List[ + ElementTrack + ] # Current tracked elements view from tracker screen_dimensions: Optional[Tuple[int, int]] _last_screenshot: Optional[Image.Image] + frame_counter: int # The current frame/step number managed by perception - def update(self) -> None: ... + def update(self) -> None: ... # Updates all state including tracked_elements_view class ExecutionInterface(Protocol): + """Defines the expected interface for the execution component.""" + def click(self, x: int, y: int, click_type: str = "single") -> bool: ... def type_text(self, text: str) -> bool: ... def execute_key_string(self, key_info_str: str) -> bool: ... def scroll(self, dx: int, dy: int) -> bool: ... +# Updated PlannerCallable signature to accept tracking info PlannerCallable = Callable[ - [List[UIElement], str, List[str], int, str], + [ # Inputs: + List[UIElement], # Current raw elements for context + str, # User goal + List[str], # Action history descriptions + int, # Current step number + Optional[List[ElementTrack]], # Tracking info (list of current tracks) + ], + # Outputs: + # Assume for now planner internally handles ActionDecision and converts back + # to this tuple for compatibility with existing handlers. + # This will change when core.py is fully reworked. Tuple[LLMActionPlan, Optional[UIElement]], ] -ImageProcessorCallable = Callable[..., Image.Image] +ImageProcessorCallable = Callable[..., Image.Image] # --- Core Agent Executor --- class AgentExecutor: """ - Orchestrates the perceive-plan-act loop for UI automation tasks. - Refactored to use action handlers for clarity. + Orchestrates the perceive-plan-act loop, integrating perception with tracking, + planning, execution, and structured logging. """ def __init__( @@ -59,23 +92,40 @@ def __init__( box_drawer: Optional[ImageProcessorCallable] = draw_bounding_boxes, highlighter: Optional[ImageProcessorCallable] = draw_action_highlight, ): + """Initializes the AgentExecutor.""" self._perception = perception self._planner = planner self._execution = execution self._box_drawer = box_drawer self._highlighter = highlighter self.action_history: List[str] = [] - - # Map action names to their handler methods self._action_handlers: Dict[str, Callable[..., bool]] = { "click": self._execute_click, "type": self._execute_type, "press_key": self._execute_press_key, "scroll": self._execute_scroll, + # TODO: Add handlers for 'finish', 'wait' if added to action space + } + # Initialize metrics and structured log storage + self.metrics: Dict[str, List[Any]] = self._reset_metrics() + self.run_log_data: List[Dict] = [] + logger.info("AgentExecutor initialized.") + + def _reset_metrics(self) -> Dict[str, List[Any]]: + """Helper to initialize/reset metrics dictionary for a run.""" + return { + "step_times_s": [], + "perception_times_s": [], + "planning_times_s": [], + "execution_times_s": [], + "elements_per_step": [], + "active_tracks_per_step": [], # Added metric + "action_results": [], # Boolean success/fail } - logger.info("AgentExecutor initialized with action handlers.") # --- Private Action Handlers --- + # These currently consume LLMActionPlan. They might need updates + # later if the planner starts returning ActionDecision directly to executor. def _execute_click( self, @@ -87,24 +137,27 @@ def _execute_click( """Handles the 'click' action.""" if not target_element: logger.error( - f"Click action requires target element ID {plan.element_id}, but it's missing." + f"Click planned for element ID {plan.element_id} but element not found by planner." ) - return False # Should have been caught earlier, but safety check + return False + if not screen_dims: + logger.error("Cannot execute click without screen dimensions.") + return False - screen_w, screen_h = screen_dims - # Denormalize to get PHYSICAL PIXEL coordinates for center + # Denormalize using actual screen dimensions from perception abs_x, abs_y = denormalize_coordinates( target_element.bounds[0], target_element.bounds[1], - screen_w, - screen_h, + screen_dims[0], + screen_dims[1], target_element.bounds[2], target_element.bounds[3], ) - # Convert to LOGICAL points for execution component logical_x = int(abs_x / scaling_factor) logical_y = int(abs_y / scaling_factor) - logger.debug(f"Executing click at logical coords: ({logical_x}, {logical_y})") + logger.debug( + f"Executing click at logical coords: ({logical_x}, {logical_y}) on Element ID {target_element.id}" + ) return self._execution.click(logical_x, logical_y, click_type="single") def _execute_type( @@ -117,28 +170,28 @@ def _execute_type( """Handles the 'type' action.""" if plan.text_to_type is None: logger.error("Action 'type' planned but text_to_type is null.") - return False # Should be caught by Pydantic validation + return False - if target_element: # Click target element first if specified - screen_w, screen_h = screen_dims + # Optional: Click target element first if specified + if target_element and screen_dims: abs_x, abs_y = denormalize_coordinates( target_element.bounds[0], target_element.bounds[1], - screen_w, - screen_h, + screen_dims[0], + screen_dims[1], target_element.bounds[2], target_element.bounds[3], ) logical_x = int(abs_x / scaling_factor) logical_y = int(abs_y / scaling_factor) logger.debug( - f"Clicking target element {target_element.id} at logical ({logical_x},{logical_y}) before typing..." + f"Clicking target Element ID {target_element.id} before typing..." ) if not self._execution.click(logical_x, logical_y): logger.warning( "Failed to click target before typing, attempting type anyway." ) - time.sleep(0.2) # Pause after click + time.sleep(0.2) # Short pause after click logger.debug(f"Executing type: '{plan.text_to_type[:50]}...'") return self._execution.type_text(plan.text_to_type) @@ -146,28 +199,28 @@ def _execute_type( def _execute_press_key( self, plan: LLMActionPlan, - target_element: Optional[UIElement], # Unused, but maintains handler signature - screen_dims: Tuple[int, int], # Unused - scaling_factor: int, # Unused + target_element: Optional[UIElement], + screen_dims: Tuple[int, int], + scaling_factor: int, ) -> bool: """Handles the 'press_key' action.""" if not plan.key_info: logger.error("Action 'press_key' planned but key_info is null.") - return False # Should be caught by Pydantic validation + return False logger.debug(f"Executing press_key: '{plan.key_info}'") return self._execution.execute_key_string(plan.key_info) def _execute_scroll( self, plan: LLMActionPlan, - target_element: Optional[UIElement], # Unused - screen_dims: Tuple[int, int], # Unused - scaling_factor: int, # Unused + target_element: Optional[UIElement], + screen_dims: Tuple[int, int], + scaling_factor: int, ) -> bool: """Handles the 'scroll' action.""" - # Basic scroll logic based on reasoning hint + # Basic scroll logic based on reasoning hint (can be improved) scroll_dir = plan.reasoning.lower() - scroll_amount_steps = 3 + scroll_amount_steps = 3 # Arbitrary amount scroll_dy = ( -scroll_amount_steps if "down" in scroll_dir @@ -188,60 +241,36 @@ def _execute_scroll( return self._execution.scroll(scroll_dx, scroll_dy) else: logger.warning( - "Scroll planned but direction/amount unclear, skipping scroll." + "Scroll planned but direction unclear from reasoning, skipping scroll." ) return True # No action needed counts as success - # Comparison Note: - # This `run` method implements an explicit, sequential perceive-plan-act loop. - # Alternative agent architectures exist, such as: - # - ReAct (Reasoning-Acting): Where the LLM explicitly decides between - # reasoning steps and action steps. - # - Callback-driven: Where UI events or timers might trigger agent actions. - # - More complex state machines or graph-based execution flows. - # This simple sequential loop provides a clear baseline. Future work might explore - # these alternatives for more complex or reactive tasks. - def run( self, goal: str, max_steps: int = 10, output_base_dir: Optional[str] = None ) -> bool: - """ - Runs the main perceive-plan-act loop to achieve the goal. - - Args: - goal: The natural language goal for the agent. - max_steps: Maximum number of steps to attempt. - output_base_dir: Base directory to save run artifacts (timestamped). - If None, uses config.RUN_OUTPUT_DIR. - - Returns: - True if the goal was achieved, False otherwise (error or max steps reached). - """ - - # Use configured output dir if none provided + """Runs the main perceive-plan-act loop to achieve the goal.""" + # --- Setup --- if output_base_dir is None: output_base_dir = config.RUN_OUTPUT_DIR - run_timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S") run_output_dir = os.path.join(output_base_dir, run_timestamp) - try: os.makedirs(run_output_dir, exist_ok=True) - - # Configure run-specific logging log_path = setup_run_logging(run_output_dir) - - logger.info(f"Starting agent run. Goal: '{goal}'") - logger.info(f"Saving outputs to: {run_output_dir}") - logger.info(f"Run log file: {log_path}") - except OSError as e: - logger.error(f"Failed to create output directory {run_output_dir}: {e}") + except Exception as setup_e: + logger.error(f"Failed during run setup (directory/logging): {setup_e}") return False + logger.info(f"Starting agent run. Goal: '{goal}'") + logger.info(f"Saving outputs to: {run_output_dir}") + logger.info(f"Run log file: {log_path}") self.action_history = [] + self.metrics = self._reset_metrics() + self.run_log_data = [] goal_achieved = False final_step_success = True last_step_completed = -1 + # --- End Setup --- try: scaling_factor = get_scaling_factor() @@ -252,102 +281,217 @@ def run( # --- Main Loop --- for step in range(max_steps): - logger.info(f"\n--- Step {step + 1}/{max_steps} ---") step_start_time = time.time() + logger.info(f"\n--- Step {step + 1}/{max_steps} ---") step_img_prefix = f"step_{step + 1}" + + # --- Initialize Step Variables --- current_image: Optional[Image.Image] = None current_elements: List[UIElement] = [] + tracked_elements_view: List[ElementTrack] = [] screen_dimensions: Optional[Tuple[int, int]] = None - - # 1. Perceive State + tracking_info_for_log: Optional[List[Dict]] = None + perception_duration = 0.0 + llm_plan: Optional[LLMActionPlan] = None # Assumed output for now + llm_analysis_log: Optional[Dict] = None # Placeholder + llm_decision_log: Optional[Dict] = None # Placeholder + target_element: Optional[UIElement] = None + planning_duration = 0.0 + action_success = False + executed_action_type = "none" + executed_params: Dict[str, Any] = {} + executed_target_id: Optional[int] = None + execution_duration = 0.0 + step_screenshot_path: Optional[str] = None + # --- End Initialize Step Variables --- + + # 1. Perceive State (including Tracking) + perception_start_time = time.time() try: - logger.debug("Perceiving current screen state...") - self._perception.update() - current_elements = self._perception.elements or [] + logger.debug("Updating visual state and tracking...") + self._perception.update() # This now internally calls the tracker + + # Retrieve results from the perception interface + current_elements = self._perception.elements + tracked_elements_view = self._perception.tracked_elements_view current_image = self._perception._last_screenshot screen_dimensions = self._perception.screen_dimensions + perception_duration = time.time() - perception_start_time if not current_image or not screen_dimensions: - raise RuntimeError("Failed to get valid screenshot or dimensions.") - logger.info(f"Perceived state with {len(current_elements)} elements.") + raise RuntimeError( + "Failed to get valid screenshot or dimensions during perception." + ) + + logger.info( + f"Perceived state: {len(current_elements)} raw elements, " + f"{len(tracked_elements_view)} active tracks. " + f"Time: {perception_duration:.2f}s." + ) + # Prepare tracking info for structured logging + tracking_info_for_log = [ + t.model_dump(mode="json") for t in tracked_elements_view + ] except Exception as perceive_e: logger.error(f"Perception failed: {perceive_e}", exc_info=True) final_step_success = False + # Log partial metrics + self.metrics["perception_times_s"].append( + round(time.time() - perception_start_time, 3) + ) + self.metrics["elements_per_step"].append(0) + self.metrics["active_tracks_per_step"].append(0) + # Attempt to log step failure before breaking + step_duration = time.time() - step_start_time + self._log_step_data( + step, + goal, + step_screenshot_path, + current_elements, + tracking_info_for_log, + None, + None, + None, + "perception_error", + None, + {}, + False, + perception_duration, + 0.0, + 0.0, + step_duration, + ) break - # 2. Save State Artifacts (Unchanged) + # Log perception metrics on success + self.metrics["perception_times_s"].append(round(perception_duration, 3)) + self.metrics["elements_per_step"].append(len(current_elements)) + self.metrics["active_tracks_per_step"].append(len(tracked_elements_view)) + + # 2. Save State Artifacts raw_state_path = os.path.join( run_output_dir, f"{step_img_prefix}_state_raw.png" ) + step_screenshot_path = ( + os.path.relpath(raw_state_path, start=run_output_dir) + if run_output_dir + else raw_state_path + ) try: - current_image.save(raw_state_path) - logger.debug(f"Saved raw state image to {raw_state_path}") - except Exception as save_raw_e: - logger.warning(f"Could not save raw state image: {save_raw_e}") - - if self._box_drawer: - parsed_state_path = os.path.join( - run_output_dir, f"{step_img_prefix}_state_parsed.png" - ) - try: - img_with_boxes = self._box_drawer( - current_image, current_elements, color="lime", show_ids=True - ) - img_with_boxes.save(parsed_state_path) - logger.debug( - f"Saved parsed state visualization to {parsed_state_path}" - ) - except Exception as draw_boxes_e: - logger.warning(f"Could not save parsed state image: {draw_boxes_e}") + if current_image: + current_image.save(raw_state_path) + logger.debug(f"Saved raw state image to {raw_state_path}") + if self._box_drawer: + # Draw boxes on raw elements for current frame visualization + parsed_state_path = os.path.join( + run_output_dir, f"{step_img_prefix}_state_parsed.png" + ) + try: + img_with_boxes = self._box_drawer( + current_image, + current_elements, + color="lime", + show_ids=True, + ) + img_with_boxes.save(parsed_state_path) + except Exception as draw_e: + logger.warning( + f"Could not save parsed state image: {draw_e}" + ) + else: + step_screenshot_path = None + except Exception as save_e: + logger.warning(f"Could not save state image(s): {save_e}") + step_screenshot_path = None - # 3. Plan Action (Unchanged) - llm_plan: Optional[LLMActionPlan] = None - target_element: Optional[UIElement] = None + # 3. Plan Action + planning_start_time = time.time() try: logger.debug("Planning next action...") + # Pass the tracked elements view to the planner llm_plan, target_element = self._planner( - elements=current_elements, + elements=current_elements, # Raw elements for context user_goal=goal, action_history=self.action_history, - step=step, # 0-based index + step=step, + tracking_info=tracked_elements_view, # Pass tracked view ) - # (Logging of plan details remains here) - logger.info(f"LLM Reasoning: {llm_plan.reasoning}") - logger.info( - f"LLM Plan: Action={llm_plan.action}, TargetID={llm_plan.element_id}, GoalComplete={llm_plan.is_goal_complete}" - ) - if llm_plan.text_to_type: - logger.info(f"LLM Plan: Text='{llm_plan.text_to_type[:50]}...'") - if llm_plan.key_info: - logger.info(f"LLM Plan: KeyInfo='{llm_plan.key_info}'") + planning_duration = time.time() - planning_start_time + logger.info(f"Planning completed in {planning_duration:.2f}s.") + + if llm_plan: + # Log details from the plan + logger.info(f"LLM Reasoning: {llm_plan.reasoning}") + logger.info( + f"LLM Plan: Action={llm_plan.action}, TargetID={llm_plan.element_id}, " + f"GoalComplete={llm_plan.is_goal_complete}" + ) + # Set execution details based on plan + executed_action_type = llm_plan.action + executed_target_id = llm_plan.element_id + executed_params = {} + if llm_plan.text_to_type is not None: + executed_params["text_to_type"] = llm_plan.text_to_type + if llm_plan.key_info is not None: + executed_params["key_info"] = llm_plan.key_info + else: + raise ValueError("Planner returned None for LLMActionPlan") except Exception as plan_e: logger.error(f"Planning failed: {plan_e}", exc_info=True) final_step_success = False + self.metrics["planning_times_s"].append( + round(time.time() - planning_start_time, 3) + ) + step_duration = time.time() - step_start_time + self._log_step_data( + step, + goal, + step_screenshot_path, + current_elements, + tracking_info_for_log, + None, + None, + None, + "planning_error", + None, + {}, + False, + perception_duration, + planning_duration, + 0.0, + step_duration, + ) break - # 4. Check Goal Completion (Before Action) (Unchanged) + self.metrics["planning_times_s"].append(round(planning_duration, 3)) + + # 4. Check Goal Completion if llm_plan.is_goal_complete: logger.success("LLM determined the goal is achieved!") goal_achieved = True - last_step_completed = step - break - - # 5. Validate Action Requirements (Unchanged) - if llm_plan.action == "click" and target_element is None: + # Log step data before potential break + + # 5. Validate Action Requirements + if ( + llm_plan.action == "click" + and target_element is None + and not goal_achieved + ): logger.error( f"Action 'click' planned for element ID {llm_plan.element_id}, but element not found. Stopping." ) final_step_success = False - break + # Log step data before potential break - # 6. Visualize Planned Action (Unchanged) - if self._highlighter and current_image: + # 6. Visualize Planned Action + if self._highlighter and current_image and llm_plan: highlight_img_path = os.path.join( run_output_dir, f"{step_img_prefix}_action_highlight.png" ) try: + # Target element might be None if action doesn't require it highlighted_image = self._highlighter( current_image, element=target_element, @@ -356,92 +500,284 @@ def run( width=3, ) highlighted_image.save(highlight_img_path) - logger.debug(f"Saved action visualization to {highlight_img_path}") except Exception as draw_highlight_e: logger.warning( f"Could not save action visualization image: {draw_highlight_e}" ) - # 7. Update Action History (Before Execution) (Unchanged) + # 7. Update Action History (Append before execution) action_desc = f"Step {step + 1}: Planned {llm_plan.action}" if target_element: - action_desc += ( - f" on ID {target_element.id} ('{target_element.content[:30]}...')" - ) - if llm_plan.text_to_type: - action_desc += f" Text='{llm_plan.text_to_type[:20]}...'" - if llm_plan.key_info: - action_desc += f" Key='{llm_plan.key_info}'" + action_desc += f" on ElemID {target_element.id}" + if "text_to_type" in executed_params: + action_desc += f" Text='{executed_params['text_to_type'][:20]}...'" + if "key_info" in executed_params: + action_desc += f" Key='{executed_params['key_info']}'" self.action_history.append(action_desc) logger.debug(f"Added to history: {action_desc}") - # 8. Execute Action (Refactored) - logger.info(f"Executing action: {llm_plan.action}...") - action_success = False - try: - handler = self._action_handlers.get(llm_plan.action) - if handler: - # Pass necessary arguments to the handler - action_success = handler( - plan=llm_plan, - target_element=target_element, - screen_dims=screen_dimensions, - scaling_factor=scaling_factor, - ) - else: + # 8. Execute Action + execution_start_time = time.time() + if ( + not goal_achieved and final_step_success + ): # Only execute if needed and possible + logger.info(f"Executing action: {executed_action_type}...") + try: + handler = self._action_handlers.get(executed_action_type) + if handler: + action_success = handler( + plan=llm_plan, + target_element=target_element, + screen_dims=screen_dimensions, + scaling_factor=scaling_factor, + ) + else: + logger.error( + f"Execution handler for '{executed_action_type}' not found." + ) + action_success = False + + if not action_success: + logger.error( + f"Action '{executed_action_type}' execution failed." + ) + final_step_success = False + else: + logger.success("Action executed successfully.") + + except Exception as exec_e: logger.error( - f"Execution handler for action type '{llm_plan.action}' not found." + f"Exception during action execution: {exec_e}", exc_info=True ) action_success = False - - # Check execution result - if not action_success: - logger.error(f"Action '{llm_plan.action}' execution failed.") final_step_success = False - break - else: - logger.success("Action executed successfully.") + else: + # Goal already met or prior failure, skip execution + action_success = True # Treat skipped step as 'successful' non-action + logger.info(f"Skipping execution for step {step + 1}.") + + execution_duration = time.time() - execution_start_time + + # --- Log Execution Metrics and Action Result --- + self.metrics["execution_times_s"].append(round(execution_duration, 3)) + self.metrics["action_results"].append(action_success) + + # --- Log Step Data to Protocol --- + step_duration = time.time() - step_start_time + self.metrics["step_times_s"].append(round(step_duration, 3)) + self._log_step_data( + step, + goal, + step_screenshot_path, + current_elements, + tracking_info_for_log, + llm_analysis_log, + llm_decision_log, + llm_plan, + executed_action_type, + executed_target_id, + executed_params, + action_success, + perception_duration, + planning_duration, + execution_duration, + step_duration, + ) - except Exception as exec_e: - logger.error( - f"Exception during action execution: {exec_e}", exc_info=True - ) - final_step_success = False + # Check if run should terminate based on this step's outcome + if goal_achieved or not final_step_success: + last_step_completed = step break - # Mark step as fully completed (Unchanged) + # Mark step completed if loop continues last_step_completed = step - # Wait for UI to settle (Unchanged) - time.sleep(1.5) - logger.debug( - f"Step {step + 1} duration: {time.time() - step_start_time:.2f}s" - ) + # Wait for UI to settle + time.sleep(1.0) # Make configurable or dynamic later - # --- End of Loop --- (Rest of the method remains the same) + # --- End of Loop --- logger.info("\n--- Agent Run Finished ---") if goal_achieved: - logger.success("Overall goal marked as achieved by LLM.") - elif final_step_success and last_step_completed == max_steps - 1: - logger.warning( - f"Reached maximum steps ({max_steps}) without goal completion." - ) + logger.success("Overall goal marked as achieved.") elif not final_step_success: - logger.error( - f"Execution stopped prematurely after Step {last_step_completed + 1} due to an error." + logger.error(f"Run failed at Step {last_step_completed + 1}.") + else: + logger.warning( + f"Run finished after {max_steps} steps without achieving goal." ) - logger.info("Capturing final screen state...") - final_state_img_path = os.path.join(run_output_dir, "final_state.png") + # Save final metrics and logs + self._save_run_outputs( + run_output_dir, goal_achieved, final_step_success, last_step_completed + ) + + # Capture final screen state try: + final_state_img_path = os.path.join(run_output_dir, "final_state.png") final_image = take_screenshot() if final_image: final_image.save(final_state_img_path) - logger.info(f"Saved final screen state to {final_state_img_path}") - else: - logger.warning("Could not capture final screenshot.") + logger.info(f"Saved final screen state to {final_state_img_path}") except Exception as save_final_e: logger.warning(f"Could not save final state image: {save_final_e}") - logger.info(f"Run artifacts saved in: {run_output_dir}") return goal_achieved + + def _log_step_data( + self, + step_index, + goal, + screenshot_path, + elements, + tracking_context, + analysis, + decision, + raw_plan, + exec_action, + exec_target_id, + exec_params, + success, + perc_time, + plan_time, + exec_time, + step_time, + ): + """Helper to create and store the structured log entry for a step.""" + try: + # Convert Pydantic models to dicts for logging if they are not None + analysis_dict = ( + analysis.model_dump(mode="json") + if isinstance(analysis, (ScreenAnalysis)) + else analysis + ) + decision_dict = ( + decision.model_dump(mode="json") + if isinstance(decision, (ActionDecision)) + else decision + ) + raw_plan_dict = ( + raw_plan.model_dump(mode="json") + if isinstance(raw_plan, (LLMActionPlan)) + else raw_plan + ) + + step_log_entry = LoggedStep( + step_index=step_index, + goal=goal, + screenshot_path=screenshot_path, + input_elements_count=len(elements), + tracking_context=tracking_context, # Assumes already list of dicts + action_history_at_step=list(self.action_history), # Use current history + llm_analysis=analysis_dict, + llm_decision=decision_dict, + raw_llm_action_plan=raw_plan_dict, + executed_action=exec_action, + executed_target_element_id=exec_target_id, + executed_parameters=exec_params, + action_success=success, + perception_time_s=round(perc_time, 3), + planning_time_s=round(plan_time, 3), + execution_time_s=round(exec_time, 3), + step_time_s=round(step_time, 3), + ) + self.run_log_data.append(step_log_entry.model_dump(mode="json")) + except Exception as log_err: + logger.warning( + f"Failed to create or store structured log for step {step_index + 1}: {log_err}" + ) + + def _save_run_outputs( + self, run_output_dir, goal_achieved, final_step_success, last_step_completed + ): + """Helper to save metrics and structured log data at the end of a run.""" + # Save Metrics + metrics_path = os.path.join(run_output_dir, "run_metrics.json") + try: + # Calculate summary stats (handle empty lists) + valid_step_times = [ + t for t in self.metrics["step_times_s"] if isinstance(t, (int, float)) + ] + valid_perc_times = [ + t + for t in self.metrics["perception_times_s"] + if isinstance(t, (int, float)) + ] + valid_plan_times = [ + t + for t in self.metrics["planning_times_s"] + if isinstance(t, (int, float)) + ] + valid_exec_times = [ + t + for t in self.metrics["execution_times_s"] + if isinstance(t, (int, float)) + ] + valid_elem_counts = [ + c for c in self.metrics["elements_per_step"] if isinstance(c, int) + ] + valid_track_counts = [ + c for c in self.metrics["active_tracks_per_step"] if isinstance(c, int) + ] + + summary_metrics = { + "total_steps_attempted": len(self.metrics["step_times_s"]), + "last_step_completed": last_step_completed + 1, + "goal_achieved": goal_achieved, + "final_step_success": final_step_success, + "avg_step_time_s": round( + sum(valid_step_times) / len(valid_step_times), 3 + ) + if valid_step_times + else 0, + "avg_perception_time_s": round( + sum(valid_perc_times) / len(valid_perc_times), 3 + ) + if valid_perc_times + else 0, + "avg_planning_time_s": round( + sum(valid_plan_times) / len(valid_plan_times), 3 + ) + if valid_plan_times + else 0, + "avg_execution_time_s": round( + sum(valid_exec_times) / len(valid_exec_times), 3 + ) + if valid_exec_times + else 0, + "avg_elements_per_step": round( + sum(valid_elem_counts) / len(valid_elem_counts), 1 + ) + if valid_elem_counts + else 0, + "avg_active_tracks_per_step": round( + sum(valid_track_counts) / len(valid_track_counts), 1 + ) + if valid_track_counts + else 0, + "successful_actions": sum( + 1 for r in self.metrics["action_results"] if r is True + ), + "failed_actions": sum( + 1 for r in self.metrics["action_results"] if r is False + ), + } + full_metrics_data = {"summary": summary_metrics, "details": self.metrics} + with open(metrics_path, "w") as f: + json.dump(full_metrics_data, f, indent=4) + logger.info(f"Saved run metrics to {metrics_path}") + logger.info(f"Metrics Summary: {summary_metrics}") + except Exception as metrics_e: + logger.warning(f"Could not save or summarize metrics: {metrics_e}") + + # Save Structured Log Data + log_protocol_path = os.path.join(run_output_dir, "run_log.jsonl") + try: + with open(log_protocol_path, "w") as f: + for step_data_dict in self.run_log_data: + # Ensure complex objects within are serializable; model_dump helps + f.write( + json.dumps(step_data_dict, default=str) + "\n" + ) # Use default=str as fallback + logger.info(f"Saved structured run log to {log_protocol_path}") + except Exception as log_protocol_e: + logger.warning(f"Could not save structured run log: {log_protocol_e}") diff --git a/omnimcp/core.py b/omnimcp/core.py index 3391f38..ac9cd15 100644 --- a/omnimcp/core.py +++ b/omnimcp/core.py @@ -1,110 +1,147 @@ # omnimcp/core.py -from typing import List, Tuple, Optional - +from typing import List, Tuple, Optional # Added Dict, Any import platform -# Assuming these imports are correct -from .types import UIElement +# Import necessary types +from .types import ( + UIElement, + ElementTrack, # Added + LLMActionPlan, # Still needed for temporary return value + LLMAnalysisAndDecision, # Added +) from .utils import ( render_prompt, logger, -) # Assuming render_prompt handles template creation +) from .completions import call_llm_api -from .types import LLMActionPlan +from .config import config # Import config if needed, e.g., for model name +# --- Updated Prompt Template --- PROMPT_TEMPLATE = """ -You are an expert UI automation assistant. Your task is to determine the single next best action to take on a user interface (UI) to achieve a given user goal, and assess if the goal is already complete. +You are an expert UI automation assistant. Your task is to analyze the current UI state, including changes from the previous step, and then decide the single best next action to achieve a given goal. **Operating System:** {{ platform }} **User Goal:** {{ user_goal }} -**Previous Actions Taken:** +**Previous Actions Taken (up to last 5):** {% if action_history %} -{% for action_desc in action_history %} +{% for action_desc in action_history[-5:] %} {# Show only recent history #} - {{ action_desc }} {% endfor %} {% else %} - None {% endif %} -**Current UI Elements:** -Here is a list of UI elements currently visible on the screen (showing first 50 if many). +**Current UI Elements (Raw Detections - Max 50):** +``` +{% for element in elements[:50] %} +{{ element.to_prompt_repr() }} {# Uses per-frame ID #} +{% endfor %} +``` +**Tracked Elements Context (Persistent View - Max 50):** +This shows elements being tracked across frames. Status 'VISIBLE' means seen this frame. 'MISSING(n)' means missed for n consecutive frames. ``` -{% for element in elements %} -{{ element.to_prompt_repr() }} +{% if tracking_info %} +{% for track in tracking_info[:50] %} +- {{ track.short_repr() }} {# Uses persistent TrackID and status #} {% endfor %} +{% else %} +- (No tracking info available or first frame) +{% endif %} ``` **Instructions:** -1. **Analyze:** Review the user goal, previous actions, and the current UI elements. Check if the goal is already achieved based on the current state. -2. **Reason:** If the goal is not complete, explain your step-by-step plan. -3. **App Launch Sequence Logic:** - * If the goal requires an application (like 'calculator') that is *not* visible, and the previous action was *not* pressing the OS search key ("Cmd+Space" or "Win"), then the next action is to press the OS search key: `action: "press_key"`, `key_info: "Cmd+Space"` (or "Win" depending on OS). - * **IMPORTANT:** If the previous action *was* pressing the OS search key, AND a search input field is now visible in the **Current UI Elements**, then the next action is to type the application name: `action: "type"`, `text_to_type: "Calculator"` (or the specific app name needed), `element_id: `. - * If the previous action was typing the application name into search, the next action is to press Enter: `action: "press_key"`, `key_info: "Enter"`. -4. **General Action Selection & Output Format Rules:** - * Identify the most relevant visible UI element for the next logical step based on your reasoning. - * **Rule 1:** If `action` is 'click', `element_id` MUST be the integer ID of a visible element from the list. `text_to_type` and `key_info` MUST be null. - * **Rule 2:** If `action` is 'type', `text_to_type` MUST be the string to type. `key_info` MUST be null. `element_id` SHOULD be the ID of the target field if identifiable, otherwise null (if typing into a general area like Spotlight). - * **Rule 3:** If `action` is 'press_key', `key_info` MUST be the key/shortcut string (e.g., 'Enter', 'Cmd+Space', 'a', '*'). `element_id` and `text_to_type` MUST be null. - * **Rule 4:** If `action` is 'scroll', provide scroll details if possible (or default to generic scroll). `element_id`, `text_to_type`, `key_info` MUST be null. - * **Rule 5:** If the desired element for the next logical step (e.g., the '*' button) is **not found** in the 'Current UI Elements', DO NOT choose `action: "click"` with `element_id: null`. Instead, consider if an alternative valid action like `action: "press_key"` (e.g., with `key_info: "*"`) can achieve the result. If no suitable action exists, explain this in the reasoning and select an action like waiting or reporting failure if appropriate (though the current actions don't support waiting/failure reporting well). - * **Rule 6:** Ensure your entire output is ONLY the single, valid JSON object conforming to the structure, with no extra text or markdown. -5. **Goal Completion:** If the goal is fully achieved, set `is_goal_complete: true`. Otherwise, set `is_goal_complete: false`. -6. **Output Format:** Respond ONLY with a valid JSON object matching the structure below. Do NOT include ```json markdown. + +1. **Analyze State:** Carefully review the Goal, History, Raw Elements, and especially the Tracked Elements Context. Reason about what changed since the last step (newly appeared elements? previously visible elements now missing? critical elements still present?). Consider if missing elements are temporary (e.g., due to UI transition) or permanent. Note any critical elements needed for the goal and their current status. +2. **Decide Action:** Based on your analysis, determine the single best action to take next towards the goal. This could be interacting with a visible element, handling a missing element (e.g., waiting, using a keyboard shortcut if applicable), or finishing if the goal is complete. +3. **Output Format:** Respond ONLY with a single valid JSON object containing two keys: "screen_analysis" and "action_decision". + * The value for "screen_analysis" MUST be a JSON object conforming to the `ScreenAnalysis` structure below. + * The value for "action_decision" MUST be a JSON object conforming to the `ActionDecision` structure below. + * Do NOT include any text outside this main JSON object (e.g., no ```json markdown). + +**JSON Output Structure:** ```json { - "reasoning": "Your step-by-step thinking process here...", - "action": "click | type | scroll | press_key", - "element_id": , - "text_to_type": "", - "key_info": "", - "is_goal_complete": true | false + "screen_analysis": { + "reasoning": "Your detailed step-by-step analysis of the current state, changes from the previous state using tracking context, and assessment relevant to the goal.", + "disappeared_elements": ["list", "of", "track_ids", "considered", "permanently", "gone"], + "temporarily_missing_elements": ["list", "of", "track_ids", "likely", "to", "reappear"], + "new_elements": ["list", "of", "track_ids", "for", "newly", "appeared", "elements"], + "critical_elements_status": { + "track_id_example_1": "Visible", + "track_id_example_2": "Missing" + } + }, + "action_decision": { + "analysis_reasoning": "Brief summary connecting the screen analysis to the chosen action.", + "action_type": "click | type | scroll | press_key | wait | finish", + "target_element_id": , + "parameters": { + "text_to_type": "", + "key_info": "", + "wait_duration_s": + # Add other parameters as needed + }, + "is_goal_complete": + } } + ``` + +**Action Rules (Apply to `action_decision` fields):** +* If `action_type` is 'click', `target_element_id` MUST be the integer ID (from Current UI Elements) of a visible element. `parameters` should be empty or contain only non-essential info like `click_type`. +* If `action_type` is 'type', `parameters.text_to_type` MUST be the string to type. `target_element_id` SHOULD be the ID of the target field if identifiable. +* If `action_type` is 'press_key', `parameters.key_info` MUST be the key/shortcut string. `target_element_id` MUST be null. +* If `action_type` is 'scroll', specify direction/amount in `analysis_reasoning` or `parameters` if possible. `target_element_id` MUST be null. +* If `action_type` is 'wait', specify `parameters.wait_duration_s`. `target_element_id` MUST be null. +* If `action_type` is 'finish', `is_goal_complete` MUST be true. `target_element_id` and `parameters` should generally be null/empty. +* If a required element is missing (use Tracked Elements Context), choose an appropriate action like 'wait' or 'press_key' if a keyboard alternative exists, or explain the issue in `screen_analysis.reasoning` and potentially choose 'finish' with `is_goal_complete: false` if stuck. Do NOT hallucinate `target_element_id` for missing elements. """ +# --- Updated Planner Function --- + -# --- Core Logic Function plan_action_for_ui (remains the same as previous version) --- -# Includes the temporary debug logging for elements on step 2 def plan_action_for_ui( elements: List[UIElement], user_goal: str, action_history: List[str] | None = None, - # Add step parameter for conditional logging (adjust call in demo.py) step: int = 0, -) -> Tuple[LLMActionPlan, Optional[UIElement]]: + tracking_info: Optional[List[ElementTrack]] = None, # Accept list of ElementTrack +) -> Tuple[ + LLMActionPlan, Optional[UIElement] +]: # Still return LLMActionPlan temporarily """ - Uses an LLM to plan the next UI action based on elements, goal, and history. + Uses an LLM to analyze UI state with tracking and plan the next action. + + Args: + elements: Raw UI elements detected in the current frame. + user_goal: The overall goal description. + action_history: Descriptions of previous actions taken. + step: The current step number. + tracking_info: List of ElementTrack objects from the tracker. + + Returns: + A tuple containing an LLMActionPlan (converted from ActionDecision) + and the targeted UIElement (if any) found in the current frame. """ action_history = action_history or [] logger.info( - f"Planning action for goal: '{user_goal}' with {len(elements)} elements. History: {len(action_history)} steps." + f"Planning action for goal: '{user_goal}' with {len(elements)} raw elements. " + f"History: {len(action_history)} steps. Tracking: {len(tracking_info or [])} active tracks." ) - MAX_ELEMENTS_IN_PROMPT = 1000 - if len(elements) > MAX_ELEMENTS_IN_PROMPT: - logger.warning( - f"Too many elements ({len(elements)}), truncating to {MAX_ELEMENTS_IN_PROMPT} for prompt." - ) - elements_for_prompt = elements[:MAX_ELEMENTS_IN_PROMPT] - else: - elements_for_prompt = elements - - # --- Temporary logging to inspect elements --- - # Log elements specifically for the step *after* the first Cmd+Space - if step == 1: # Note: Step index starts at 0 in the demo loop - try: - elements_repr = [el.to_prompt_repr() for el in elements_for_prompt[:10]] - logger.debug(f"Elements for planning (Step {step + 1}): {elements_repr}") - except Exception as log_e: - logger.warning(f"Could not log elements representation: {log_e}") - # --- End temporary logging --- + # Limit elements and tracks passed to the prompt for brevity + MAX_ELEMENTS_IN_PROMPT = 50 + MAX_TRACKS_IN_PROMPT = 50 + elements_for_prompt = elements[:MAX_ELEMENTS_IN_PROMPT] + tracking_info_for_prompt = ( + tracking_info[:MAX_TRACKS_IN_PROMPT] if tracking_info else None + ) prompt = render_prompt( PROMPT_TEMPLATE, @@ -112,43 +149,99 @@ def plan_action_for_ui( elements=elements_for_prompt, action_history=action_history, platform=platform.system(), + tracking_info=tracking_info_for_prompt, # Pass tracking info ) - system_prompt = "You are an AI assistant. Respond ONLY with valid JSON that conforms to the provided structure. Do not include any explanatory text before or after the JSON block." + # System prompt reinforcing the JSON structure + system_prompt = ( + "You are an AI assistant. Respond ONLY with a single valid JSON object " + "containing the keys 'screen_analysis' and 'action_decision', conforming " + "to the specified Pydantic models. Do not include any explanatory text " + "before or after the JSON block, and do not use markdown code fences like ```json." + ) messages = [{"role": "user", "content": prompt}] try: - llm_plan = call_llm_api(messages, LLMActionPlan, system_prompt=system_prompt) + # Call LLM expecting the combined analysis and decision structure + llm_output = call_llm_api( + messages, + LLMAnalysisAndDecision, # Expect the combined model + system_prompt=system_prompt, + model=config.ANTHROPIC_DEFAULT_MODEL, # Use configured model + ) + # Log the structured analysis and decision for debugging + logger.debug( + f"LLM Screen Analysis: {llm_output.screen_analysis.model_dump_json(indent=2)}" + ) + logger.debug( + f"LLM Action Decision: {llm_output.action_decision.model_dump_json(indent=2)}" + ) + except (ValueError, Exception) as e: - logger.error(f"Failed to get valid action plan from LLM: {e}") + logger.error( + f"Failed to get valid analysis/decision from LLM: {e}", exc_info=True + ) + # Fallback or re-raise? Re-raise for now to halt execution on planning failure. raise - target_element = None - if llm_plan.element_id is not None: - target_element = next( - (el for el in elements if el.id == llm_plan.element_id), None + # --- Temporary Conversion back to LLMActionPlan --- + # This allows AgentExecutor handlers to work without immediate refactoring. + # TODO: Refactor AgentExecutor later to consume ActionDecision directly. + analysis = llm_output.screen_analysis + decision = llm_output.action_decision + + # Combine reasoning (can be refined) + combined_reasoning = f"Analysis: {analysis.reasoning}\nDecision Justification: {decision.analysis_reasoning}" + + # Extract parameters for LLMActionPlan + # Ensure parameters is not None before accessing .get() + parameters = decision.parameters or {} + text_param = parameters.get("text_to_type") + key_param = parameters.get("key_info") + # Add handling for 'wait' action type if needed by LLMActionPlan later + # wait_param = parameters.get("wait_duration_s") + + # Handle potential new action types like 'wait' or 'finish' if LLMActionPlan + # doesn't support them directly yet. For now, map 'finish'/'wait' to a state? + # Let's assume LLMActionPlan.action can hold the new types for now. + action_type = decision.action_type + + converted_plan = LLMActionPlan( + reasoning=combined_reasoning, + action=action_type, # Pass action_type directly + element_id=decision.target_element_id, # Pass the current frame ID + text_to_type=text_param, + key_info=key_param, + is_goal_complete=decision.is_goal_complete, + ) + # Validate the converted plan (optional, but good practice) + try: + # Re-validate the object created from ActionDecision fields + # This ensures the LLM followed rules that map to LLMActionPlan + # Note: This validation might fail if action_type is 'wait' or 'finish' + # We might need to adjust LLMActionPlan or skip validation for new types. + # For now, let's try validating. + LLMActionPlan.model_validate(converted_plan.model_dump()) + except Exception as validation_err: + logger.warning( + f"Converted LLMActionPlan failed validation (potentially due to new action types like '{action_type}'): {validation_err}" ) + # Don't raise, just warn for now, as the ActionDecision was likely valid. - # Logging Logic - if llm_plan.is_goal_complete: - logger.info("LLM determined the goal is complete.") - elif llm_plan.action in ["click", "type"]: - if target_element: - logger.info( - f"LLM planned action: '{llm_plan.action}' on element ID {llm_plan.element_id} ('{target_element.content[:30]}...')" - ) - elif llm_plan.action == "click": # Click always needs a target + # Find the target UIElement based on the element_id from the decision + target_ui_element = None + if converted_plan.element_id is not None: + target_ui_element = next( + (el for el in elements if el.id == converted_plan.element_id), None + ) + if target_ui_element is None: logger.warning( - f"LLM planned 'click' on element ID {llm_plan.element_id}, but no such element was found." + f"LLM targeted element ID {converted_plan.element_id}, but it was not found in the current raw elements." ) - # else: Typing without element_id might be okay (e.g., search bar) - - else: # press_key or scroll - action_details = f"'{llm_plan.action}'" - if llm_plan.key_info: - action_details += f" with key_info: '{llm_plan.key_info}'" - logger.info( - f"LLM planned action: {action_details} (no specific element target)" - ) + # Keep element_id in plan, but target_ui_element remains None + + logger.info( + f"Planner returning action: {converted_plan.action}, Target Elem ID: {converted_plan.element_id}, Goal Complete: {converted_plan.is_goal_complete}" + ) - return llm_plan, target_element + return converted_plan, target_ui_element # Return converted plan and element diff --git a/omnimcp/types.py b/omnimcp/types.py index 2a04192..f7bc664 100644 --- a/omnimcp/types.py +++ b/omnimcp/types.py @@ -351,3 +351,14 @@ class LoggedStep(BaseModel): planning_time_s: float execution_time_s: float step_time_s: float + + +class LLMAnalysisAndDecision(BaseModel): + """Defines the full structured output expected from the LLM, combining analysis and decision.""" + + screen_analysis: ScreenAnalysis = Field( + description="The LLM's analysis of the current screen state and element tracks." + ) + action_decision: ActionDecision = Field( + description="The LLM's decision on the next action based on the analysis." + ) diff --git a/omnimcp/visual_state.py b/omnimcp/visual_state.py index 1cd3486..f4b9ea0 100644 --- a/omnimcp/visual_state.py +++ b/omnimcp/visual_state.py @@ -1,7 +1,8 @@ # omnimcp/visual_state.py """ -Manages the perceived state of the UI using screenshots and OmniParser. +Manages the perceived state of the UI using screenshots, OmniParser, +and element tracking across updates. """ import time @@ -10,43 +11,62 @@ from PIL import Image from loguru import logger +# Required imports from omnimcp.config import config from omnimcp.omniparser.client import OmniParserClient -from omnimcp.types import Bounds, UIElement + +# Import necessary types and the tracker +from omnimcp.types import Bounds, UIElement, ElementTrack +from omnimcp.tracking import SimpleElementTracker from omnimcp.utils import take_screenshot, downsample_image class VisualState: """ - Manages the perceived state of the UI using screenshots and OmniParser. - Includes optional screenshot downsampling for performance via config. + Manages the perceived state of the UI using screenshots, OmniParser, + and element tracking across updates. """ def __init__(self, parser_client: OmniParserClient): """Initialize the visual state manager.""" - self.elements: List[UIElement] = [] - self.timestamp: Optional[float] = None - self.screen_dimensions: Optional[Tuple[int, int]] = ( - None # Stores ORIGINAL dimensions - ) - self._last_screenshot: Optional[Image.Image] = ( - None # Stores ORIGINAL screenshot - ) self._parser_client = parser_client if not self._parser_client: logger.critical("VisualState initialized without a valid parser_client!") raise ValueError("VisualState requires a valid OmniParserClient instance.") - logger.info("VisualState initialized.") + + # State attributes + self.elements: List[UIElement] = [] # Raw elements from current frame's parse + self.tracked_elements_view: List[ + ElementTrack + ] = [] # Tracker's view of elements + self.timestamp: Optional[float] = ( + None # Timestamp of the last successful update + ) + self.screen_dimensions: Optional[Tuple[int, int]] = ( + None # Original screen dimensions + ) + self._last_screenshot: Optional[Image.Image] = None # Original screenshot + + # Internal components + self.element_tracker = SimpleElementTracker() # Instantiate the tracker + self.frame_counter: int = 0 # Track update calls for tracker state + + logger.info("VisualState initialized with SimpleElementTracker.") def update(self) -> None: """ - Update visual state: take screenshot, optionally downsample, - parse via client, map results. Updates self.elements, self.timestamp, - self.screen_dimensions (original), self._last_screenshot (original). + Update visual state: capture screen, parse elements via OmniParser, + and update element tracks. Populates self.elements, self.timestamp, + self.screen_dimensions, self._last_screenshot, and self.tracked_elements_view. """ - logger.info("VisualState update requested...") + self.frame_counter += 1 + logger.info(f"VisualState update requested (Frame: {self.frame_counter})...") start_time = time.time() - screenshot: Optional[Image.Image] = None # Define screenshot outside try + screenshot: Optional[Image.Image] = None + parsed_elements: List[ + UIElement + ] = [] # Store result before assigning to self.elements + try: # 1. Capture screenshot logger.debug("Taking screenshot...") @@ -60,20 +80,18 @@ def update(self) -> None: self.screen_dimensions = original_dimensions logger.debug(f"Screenshot taken: original dimensions={original_dimensions}") - # 2. Optionally Downsample before sending to parser (Read config here) + # 2. Optionally Downsample before sending to parser image_to_parse = screenshot scale_factor = config.OMNIPARSER_DOWNSAMPLE_FACTOR - # Validate factor before calling downsample utility + # Validate factor if not (0.0 < scale_factor <= 1.0): logger.warning( - f"Invalid OMNIPARSER_DOWNSAMPLE_FACTOR ({scale_factor}). Must be > 0 and <= 1.0. Using original." + f"Invalid OMNIPARSER_DOWNSAMPLE_FACTOR: {scale_factor}. Using 1.0." ) - scale_factor = 1.0 # Reset to 1.0 if invalid + scale_factor = 1.0 if scale_factor < 1.0: - # Call the utility function from utils.py image_to_parse = downsample_image(screenshot, scale_factor) - # Logging is now handled within downsample_image # 3. Process with UI parser client if not self._parser_client.server_url: @@ -81,176 +99,276 @@ def update(self) -> None: "OmniParser client server URL not available. Cannot parse." ) self.elements = [] + self.tracked_elements_view = [] # Clear tracks too self.timestamp = time.time() - return + return # Exit update early logger.debug( - f"Parsing image (input size: {image_to_parse.size}) via {self._parser_client.server_url}..." + f"Parsing image (size: {image_to_parse.size}) via {self._parser_client.server_url}..." ) + parser_start_time = time.time() parser_result = self._parser_client.parse_image(image_to_parse) + parser_duration = time.time() - parser_start_time + logger.debug(f"Parsing completed in {parser_duration:.2f}s.") + + # 4. Map parser results to UIElement objects + logger.debug("Mapping parser results to UIElements...") + mapping_start_time = time.time() + # Use helper method to get the list of UIElements for this frame + parsed_elements = self._parse_and_map_elements(parser_result) + mapping_duration = time.time() - mapping_start_time + logger.debug( + f"Mapped {len(parsed_elements)} elements in {mapping_duration:.2f}s." + ) - # 4. Update elements list using the mapping logic - logger.debug("Mapping parser results...") - self._update_elements_from_parser(parser_result) - self.timestamp = time.time() + # Assign mapped elements to state for this frame + self.elements = parsed_elements + + # 5. Update Element Tracker + logger.debug("Updating element tracker...") + tracking_start_time = time.time() + # Pass the newly parsed elements and current frame number + self.tracked_elements_view = self.element_tracker.update( + self.elements, self.frame_counter + ) + tracking_duration = time.time() - tracking_start_time logger.info( - f"VisualState update complete. Found {len(self.elements)} " - f"elements. Took {time.time() - start_time:.2f}s." + f"Tracker updated in {tracking_duration:.2f}s. Active tracks: {len(self.tracked_elements_view)}" + ) + + # Update timestamp only on full success + self.timestamp = time.time() + total_duration = time.time() - start_time + logger.success( + f"VisualState update complete for Frame {self.frame_counter}. " + f"Found {len(self.elements)} raw elements. Active tracks: {len(self.tracked_elements_view)}. " + f"Total time: {total_duration:.2f}s." ) except Exception as e: - logger.error(f"Failed to update visual state: {e}", exc_info=True) + logger.error( + f"Failed to update visual state (Frame {self.frame_counter}): {e}", + exc_info=True, + ) + # Reset state on failure self.elements = [] - self.timestamp = time.time() - # Ensure dimensions reflect original even on error if possible + self.tracked_elements_view = [] # Also clear tracker view on error + self.timestamp = time.time() # Record time of failure + # Attempt to keep screen dimensions if screenshot was taken if screenshot: self.screen_dimensions = screenshot.size else: self.screen_dimensions = None - def _update_elements_from_parser(self, parser_json: Dict): - """Maps the raw JSON output from OmniParser to UIElement objects.""" + def _parse_and_map_elements(self, parser_json: Dict) -> List[UIElement]: + """ + Helper method to map raw JSON output from OmniParser to UIElement objects. + Assigns sequential per-frame IDs. + """ new_elements: List[UIElement] = [] - element_id_counter = 0 + element_id_counter = 0 # Assign sequential IDs per-frame if not isinstance(parser_json, dict): logger.error( f"Parser result is not a dictionary: {type(parser_json)}. Cannot map." ) - self.elements = new_elements - return + return new_elements if "error" in parser_json: logger.error(f"Parser returned an error: {parser_json['error']}") - self.elements = new_elements - return + return new_elements raw_elements: List[Dict[str, Any]] = parser_json.get("parsed_content_list", []) if not isinstance(raw_elements, list): logger.error( f"Expected 'parsed_content_list' to be a list, got: {type(raw_elements)}" ) - self.elements = new_elements - return + return new_elements - logger.debug(f"Mapping {len(raw_elements)} raw elements from OmniParser.") + logger.debug(f"Mapping {len(raw_elements)} raw items from OmniParser.") for item in raw_elements: + # Pass counter to assign ID ui_element = self._convert_to_ui_element(item, element_id_counter) if ui_element: new_elements.append(ui_element) - element_id_counter += 1 - logger.debug(f"Successfully mapped {len(new_elements)} valid UIElements.") - self.elements = new_elements + element_id_counter += 1 # Increment only for valid elements + + logger.debug( + f"Successfully mapped {len(new_elements)} valid UIElements for this frame." + ) + return new_elements def _convert_to_ui_element( self, item: Dict[str, Any], element_id: int ) -> Optional[UIElement]: - """Converts a single item from OmniParser result to a UIElement.""" + """ + Converts a single item from OmniParser result to a validated UIElement. + Returns None if item is invalid. + """ try: if not isinstance(item, dict): - logger.warning(f"Skipping non-dict item: {item}") - return None + # logger.warning(f"Skipping non-dict item in parser result: {item}") + return None # Silently skip non-dicts + + bbox_rel = item.get("bbox") or item.get("box") # Check common keys - bbox_rel = item.get("bbox") if not isinstance(bbox_rel, list) or len(bbox_rel) != 4: - logger.debug( - f"Skipping element (id={element_id}) invalid/missing bbox: {item.get('content')}" + # logger.debug(f"Skipping element due to invalid/missing bbox: Content='{item.get('content', '')[:50]}...'") + return None # Silently skip items without valid bbox structure + + # Attempt conversion, handle potential non-numeric values + try: + x_min, y_min, x_max, y_max = map(float, bbox_rel) + except (ValueError, TypeError) as map_err: + logger.warning( + f"Could not map bbox values to float: {bbox_rel} - Error: {map_err}" ) return None - x_min, y_min, x_max, y_max = map(float, bbox_rel) + # Calculate x, y, w, h from x_min, y_min, x_max, y_max x, y, w, h = x_min, y_min, x_max - x_min, y_max - y_min - # Validate and clamp bounds (0.0 to 1.0) - tolerance = 0.001 + # Validate bounds (relative 0.0 to 1.0, non-negative w/h) + tolerance = 0.001 # Allow slight float inaccuracies near boundaries if not ( (-tolerance <= x <= 1.0 + tolerance) and (-tolerance <= y <= 1.0 + tolerance) - and w > 0.0 - and h > 0.0 + and (w >= 0.0) # Check non-negative first + and (h >= 0.0) and (x + w) <= 1.0 + tolerance and (y + h) <= 1.0 + tolerance ): - logger.warning( - f"Skipping element (id={element_id}) invalid relative bounds: {item.get('content')} - Bounds: ({x:.3f}, {y:.3f}, {w:.3f}, {h:.3f})" - ) - return None + # logger.warning(f"Skipping element with invalid relative bounds: xywh=({x:.3f}, {y:.3f}, {w:.3f}, {h:.3f}) Content='{item.get('content', '')[:50]}...'") + return None # Silently skip invalid bounds - x, y = max(0.0, min(1.0, x)), max(0.0, min(1.0, y)) - w, h = max(0.0, min(1.0 - x, w)), max(0.0, min(1.0 - y, h)) - if w <= 0.0 or h <= 0.0: - logger.warning( - f"Skipping element (id={element_id}) zero w/h after clamp: {item.get('content')}" - ) - return None + # Clamp coordinates strictly between 0.0 and 1.0 + x = max(0.0, min(1.0, x)) + y = max(0.0, min(1.0, y)) + w = max(0.0, min(1.0 - x, w)) # Clamp width based on clamped x + h = max(0.0, min(1.0 - y, h)) # Clamp height based on clamped y + + # Filter elements with effectively zero area after clamping + min_dim_threshold = 1e-5 + if w < min_dim_threshold or h < min_dim_threshold: + # logger.debug(f"Skipping element with near-zero dimensions after clamping: w={w:.4f}, h={h:.4f}. Content='{item.get('content', '')[:50]}...'") + return None # Silently skip zero-area elements bounds: Bounds = (x, y, w, h) - # Optional tiny element filter + # Filter tiny elements based on absolute pixel size if dimensions available if self.screen_dimensions: img_width, img_height = self.screen_dimensions - min_pixel_size = 3 + min_pixel_size = 3 # Minimum width or height in pixels if (w * img_width < min_pixel_size) or ( h * img_height < min_pixel_size ): - logger.debug( - f"Skipping tiny element (id={element_id}): {item.get('content')}" - ) - return None + # logger.debug(f"Skipping tiny element (pixels < {min_pixel_size}): w={w*img_width:.1f}, h={h*img_height:.1f}. Content='{item.get('content', '')[:50]}...'") + return None # Silently skip tiny elements + # Extract other fields safely element_type = ( str(item.get("type", "unknown")).lower().strip().replace(" ", "_") ) content = str(item.get("content", "")).strip() - + # Ensure confidence is float, default to 0.0 if invalid + try: + confidence = float(item.get("confidence", 0.0)) + except (ValueError, TypeError): + confidence = 0.0 + attributes = item.get("attributes", {}) + if not isinstance(attributes, dict): # Ensure attributes is a dict + attributes = {} + + # Use the passed-in sequential per-frame ID return UIElement( id=element_id, type=element_type, content=content, bounds=bounds, - confidence=float(item.get("confidence", 0.0)), - attributes=item.get("attributes", {}) or {}, - ) - except (ValueError, TypeError, KeyError) as e: - logger.warning( - f"Skipping element (id={element_id}) mapping error: {item.get('content')} - {e}" + confidence=confidence, + attributes=attributes, ) - return None except Exception as unexpected_e: + # Catch any other unexpected errors during conversion logger.error( - f"Unexpected error mapping element (id={element_id}): {item.get('content')} - {unexpected_e}", + f"Unexpected error mapping element: {item.get('content', '')[:50]}... - {unexpected_e}", exc_info=True, ) return None def find_element(self, description: str) -> Optional[UIElement]: - """Finds the best matching element using basic keyword matching.""" - logger.debug(f"Finding element: '{description}' using basic matching.") + """ + Finds the best matching element using keyword matching with improved scoring + on the current frame's elements. + """ + logger.debug( + f"Finding element by description: '{description}' (using current frame elements: {len(self.elements)})" + ) if not self.elements: return None + + # Prepare search terms from the description search_terms = [term for term in description.lower().split() if term] if not search_terms: + logger.warning("Empty search terms provided to find_element.") return None - best_match = None - highest_score = 0 - for element in self.elements: - content_lower = element.content.lower() - type_lower = element.type.lower() - # Simple scoring: 2 points for term in content, 1 for term in type - score = sum(2 for term in search_terms if term in content_lower) + sum( - 1 for term in search_terms if term in type_lower - ) + best_match: Optional[UIElement] = None + highest_score: float = ( + -1.0 + ) # Use float, start below any potential positive score - if score > highest_score: - highest_score = score + for element in self.elements: + content_lower = element.content.lower() if element.content else "" + type_lower = element.type.lower() if element.type else "" + current_score: float = 0.0 + + # --- Scoring Logic --- + # 1. Exact Content Match (High Score) + # Check if the *entire* description matches the element content exactly + if description.lower() == content_lower: + current_score += 10.0 + + # 2. Term-based scoring + for term in search_terms: + # Type Match Bonus (Medium-High Score) + # Check if term exactly matches the normalized type + if term == type_lower: + current_score += 5.0 + # Check if term matches part of a multi-word type (e.g., "field" in "text_field") + elif "_" in type_lower and term in type_lower.split("_"): + current_score += 2.0 # Lower score for partial type match + + # Content Match Bonuses + # Check for whole word match in content (Medium Score) + # Simple split, might need more robust tokenization later + content_words = content_lower.split() + if term in content_words: + current_score += 3.0 + # Check for substring match in content (Low Score) + # Award only if whole word didn't match, to avoid double counting + elif term in content_lower: + current_score += 1.0 + + # --- Update Best Match --- + if current_score > highest_score: + highest_score = current_score best_match = element - - if best_match: + # --- Tie-breaking (Optional but can help) --- + # If scores are equal, maybe prefer smaller elements or specific types? + # Keep it simple for now: first element with highest score wins. + + # --- Return Result --- + # Define a minimum score threshold to avoid weak matches + # Tunable parameter - start with >= 2.0? + min_score_threshold = 2.0 + if best_match and highest_score >= min_score_threshold: logger.info( - f"Found best match (score={highest_score}) for '{description}': ID={best_match.id}" + f"Found best match (score={highest_score:.1f}) for '{description}': " + f"ID={best_match.id}, Type={best_match.type}, Content='{best_match.content[:30]}...'" ) + return best_match else: logger.warning( - f"No element found with positive match score for: '{description}'" + f"No suitable element found with score >= {min_score_threshold} for: '{description}' (Highest score: {highest_score:.1f})" ) - return best_match + return None # Explicitly return None if no good match found diff --git a/tests/test_agent_executor.py b/tests/test_agent_executor.py index 3e6e1b1..448ab2a 100644 --- a/tests/test_agent_executor.py +++ b/tests/test_agent_executor.py @@ -1,122 +1,185 @@ # tests/test_agent_executor.py -import os -from typing import List, Optional, Tuple -from unittest.mock import MagicMock - import pytest +import os # Import added +from unittest.mock import MagicMock # Added patch for later if needed from PIL import Image +from typing import List, Optional, Tuple # Added Callable -from omnimcp.agent_executor import ( - AgentExecutor, - PerceptionInterface, - ExecutionInterface, - PlannerCallable, -) -from omnimcp import agent_executor -from omnimcp.types import LLMActionPlan, UIElement +# Necessary type imports +from omnimcp.types import UIElement, ElementTrack, LLMActionPlan +# Imports from the module under test +from omnimcp.agent_executor import AgentExecutor, PlannerCallable -class MockPerception(PerceptionInterface): - def __init__( - self, - elements: List[UIElement], - dims: Optional[Tuple[int, int]], - image: Optional[Image.Image], - ): - self.elements = elements - self.screen_dimensions = dims - self._last_screenshot = image - self.update_call_count = 0 - self.fail_on_update = False # Flag to simulate failure +# Import module itself for patching module-level functions +from omnimcp import agent_executor +from loguru import logger - def update(self) -> None: - if ( - self.fail_on_update and self.update_call_count > 0 - ): # Fail on second+ call if requested - raise ConnectionError("Mock perception failure") - self.update_call_count += 1 - # Simulate state update if needed, or keep static for simple tests +# --- Mock Execution Class --- +class MockExecution: + """Mocks the ExecutionInterface.""" -class MockExecution(ExecutionInterface): def __init__(self): self.calls = [] - self.fail_on_action: Optional[str] = None # e.g., "click" to make click fail + self.fail_on_action: Optional[str] = None # For testing failures def click(self, x: int, y: int, click_type: str = "single") -> bool: self.calls.append(("click", x, y, click_type)) - return not (self.fail_on_action == "click") + logger.debug(f"MockExecution: click({x}, {y}, '{click_type}')") + return self.fail_on_action != "click" def type_text(self, text: str) -> bool: self.calls.append(("type_text", text)) - return not (self.fail_on_action == "type") + logger.debug(f"MockExecution: type_text('{text[:50]}...')") + return self.fail_on_action != "type" def execute_key_string(self, key_info_str: str) -> bool: self.calls.append(("execute_key_string", key_info_str)) - return not (self.fail_on_action == "press_key") + logger.debug(f"MockExecution: execute_key_string('{key_info_str}')") + return self.fail_on_action != "press_key" def scroll(self, dx: int, dy: int) -> bool: self.calls.append(("scroll", dx, dy)) - return not (self.fail_on_action == "scroll") + logger.debug(f"MockExecution: scroll({dx}, {dy})") + return self.fail_on_action != "scroll" -# --- Pytest Fixtures --- +# --- Mock Perception Class --- +class MockPerception: + """Mocks the PerceptionInterface for testing AgentExecutor.""" + # Required attributes matching PerceptionInterface + elements: List[UIElement] + tracked_elements_view: List[ElementTrack] + screen_dimensions: Optional[Tuple[int, int]] + _last_screenshot: Optional[Image.Image] + frame_counter: int + update_call_count: int # Renamed from update_calls + fail_on_update: bool = False # For testing failures -@pytest.fixture -def mock_image() -> Image.Image: - return Image.new("RGB", (200, 100), color="gray") # Slightly larger default + def __init__( + self, + elements_to_return: Optional[List[UIElement]] = None, + dims: Optional[Tuple[int, int]] = (200, 100), + ): + """Initializes the mock perception component.""" + self.elements_to_return = ( + elements_to_return if elements_to_return is not None else [] + ) + self.screen_dimensions = dims + # Initialize state variables + self.elements = [] + self.tracked_elements_view = [] + self.frame_counter = 0 + self._last_screenshot = Image.new("RGB", dims) if dims else None + self.update_call_count = 0 # Use the correct name + self.fail_on_update = False + logger.debug("MockPerception initialized.") + def update(self) -> None: + """Simulates updating the perception state.""" + self.update_call_count += 1 # Increment correct counter + self.frame_counter += 1 -@pytest.fixture -def mock_element() -> UIElement: - return UIElement(id=0, type="button", content="OK", bounds=(0.1, 0.1, 0.2, 0.1)) + # Simulate failure if configured + if ( + self.fail_on_update and self.update_call_count > 1 + ): # Fail on second call typically + logger.error("MockPerception: Simulating perception failure.") + raise RuntimeError("Simulated perception failure") + + # Set the elements that the mock should "perceive" + self.elements = self.elements_to_return + # Simulate tracker update (returns empty list as mock doesn't track) + self.tracked_elements_view = [] + # Simulate screenshot/dims update based on init values + if self.screen_dimensions: + self._last_screenshot = Image.new("RGB", self.screen_dimensions) + else: + self._last_screenshot = None + logger.debug( + f"MockPerception updated (call {self.update_call_count}, frame {self.frame_counter})" + ) +# --- Fixtures --- @pytest.fixture -def mock_perception_component(mock_element, mock_image) -> MockPerception: - return MockPerception([mock_element], (200, 100), mock_image) +def mock_perception_component() -> MockPerception: + """Provides a default MockPerception instance.""" + # Provide a default element for tests that expect one + return MockPerception( + elements_to_return=[ + UIElement( + id=0, + type="button", + content="OK", + bounds=(0.1, 0.1, 0.2, 0.1), + confidence=1.0, + attributes={}, + ) + ] + ) @pytest.fixture def mock_execution_component() -> MockExecution: + """Provides a MockExecution instance.""" return MockExecution() @pytest.fixture -def mock_box_drawer() -> MagicMock: - return MagicMock(return_value=Image.new("RGB", (1, 1))) # Return dummy image +def mock_element() -> UIElement: + """Provides a sample UIElement for tests.""" + return UIElement( + id=0, + type="button", + content="OK", + bounds=(0.1, 0.1, 0.2, 0.1), # w=0.2, h=0.1 + ) @pytest.fixture -def mock_highlighter() -> MagicMock: - return MagicMock(return_value=Image.new("RGB", (1, 1))) # Return dummy image +def temp_output_dir(tmp_path) -> str: + """Creates a temporary directory for test run outputs.""" + run_dir = tmp_path / "test_runs" + run_dir.mkdir(exist_ok=True) # Use exist_ok=True + return str(run_dir) @pytest.fixture -def temp_output_dir(tmp_path) -> str: - """Create a temporary directory for test run outputs.""" - # tmp_path is a pytest fixture providing a Path object to a unique temp dir - output_dir = tmp_path / "test_runs" - output_dir.mkdir() - return str(output_dir) +def mock_box_drawer() -> MagicMock: + """Provides a mock for the draw_bounding_boxes utility.""" + return MagicMock(return_value=Image.new("RGB", (10, 10))) # Return dummy image + + +@pytest.fixture +def mock_highlighter() -> MagicMock: + """Provides a mock for the draw_action_highlight utility.""" + return MagicMock(return_value=Image.new("RGB", (10, 10))) # Return dummy image # --- Mock Planners --- def planner_completes_on_step(n: int) -> PlannerCallable: - """Factory for a planner that completes on step index `n`.""" + """Factory for a planner that completes on step index `n-1`.""" def mock_planner( - elements: List[UIElement], user_goal: str, action_history: List[str], step: int + elements: List[UIElement], + user_goal: str, + action_history: List[str], + step: int, + tracking_info: Optional[List[ElementTrack]] = None, # Accept tracking_info ) -> Tuple[LLMActionPlan, Optional[UIElement]]: target_element = elements[0] if elements else None - is_complete = step == n - action = "click" if not is_complete else "press_key" # Vary action + # Goal completes when current step index is n-1 + is_complete = step == (n - 1) + # Example: Click first, then signal completion with a different action + action = "click" if not is_complete else "press_key" element_id = target_element.id if target_element and action == "click" else None - key_info = "Enter" if is_complete else None + key_info = "Enter" if is_complete else None # Example final action plan = LLMActionPlan( reasoning=f"Mock reasoning step {step + 1} for goal '{user_goal}'", @@ -125,36 +188,46 @@ def mock_planner( key_info=key_info, is_goal_complete=is_complete, ) + logger.debug( + f"Mock Planner (complete on {n}): Step {step}, Complete: {is_complete}, Action: {action}" + ) return plan, target_element return mock_planner def planner_never_completes() -> PlannerCallable: - """Planner that never signals goal completion.""" + """Factory for a planner that never signals goal completion.""" def mock_planner( - elements: List[UIElement], user_goal: str, action_history: List[str], step: int + elements: List[UIElement], + user_goal: str, + action_history: List[str], + step: int, + tracking_info: Optional[List[ElementTrack]] = None, # Accept tracking_info ) -> Tuple[LLMActionPlan, Optional[UIElement]]: target_element = elements[0] if elements else None element_id = target_element.id if target_element else None plan = LLMActionPlan( - reasoning=f"Mock reasoning step {step + 1} for goal '{user_goal}', goal not complete", - action="click", + reasoning=f"Mock reasoning step {step + 1}, goal not complete", + action="click", # Always clicks the first element if present element_id=element_id, text_to_type=None, key_info=None, is_goal_complete=False, ) + logger.debug(f"Mock Planner (never complete): Step {step}, Action: click") return plan, target_element return mock_planner def planner_fails() -> PlannerCallable: - """Planner that raises an exception.""" + """Factory for a planner that raises an exception.""" + # Use *args, **kwargs to accept any arguments including tracking_info def failing_planner(*args, **kwargs): + logger.error("Mock Planner: Simulating planning failure.") raise ValueError("Mock planning failure") return failing_planner @@ -163,26 +236,39 @@ def failing_planner(*args, **kwargs): # --- Test Functions --- +def test_agent_executor_init(mock_perception_component, mock_execution_component): + """Test basic initialization.""" + planner = MagicMock() + executor = AgentExecutor( + perception=mock_perception_component, + planner=planner, + execution=mock_execution_component, + ) + assert executor._perception is mock_perception_component + assert executor._planner is planner + assert executor._execution is mock_execution_component + assert executor.action_history == [] + assert isinstance(executor.metrics, dict) + + def test_run_completes_goal( mock_perception_component: MockPerception, mock_execution_component: MockExecution, mock_box_drawer: MagicMock, mock_highlighter: MagicMock, temp_output_dir: str, - mocker, # Add mocker fixture + mocker, ): - """Test a successful run where the goal is completed on the second step.""" - # --- Add Mock for take_screenshot to avoid $DISPLAY error in CI --- - mock_final_image = Image.new("RGB", (50, 50), color="green") # Dummy image + """Test a successful run where the goal is completed on the second step (index 1).""" + mock_final_image = Image.new("RGB", (50, 50), color="green") mocker.patch.object( agent_executor, "take_screenshot", return_value=mock_final_image ) - # --- End Mock --- - complete_step_index = 1 + complete_step_n = 2 # Complete ON step 2 (index 1) executor = AgentExecutor( perception=mock_perception_component, - planner=planner_completes_on_step(complete_step_index), + planner=planner_completes_on_step(complete_step_n), # Pass N execution=mock_execution_component, box_drawer=mock_box_drawer, highlighter=mock_highlighter, @@ -193,22 +279,25 @@ def test_run_completes_goal( ) assert result is True, "Should return True when goal is completed." - assert ( - mock_perception_component.update_call_count == complete_step_index + 1 - ) # Called for steps 0, 1 - assert ( - len(mock_execution_component.calls) == complete_step_index - ) # Executed only for step 0 + # Perception called for step 0 and step 1 (n=2 steps total) + assert mock_perception_component.update_call_count == complete_step_n + # Execution called only for step 0 (before completion) + assert len(mock_execution_component.calls) == complete_step_n - 1 assert mock_execution_component.calls[0][0] == "click" # Action in step 0 - assert len(executor.action_history) == complete_step_index + # History includes step 0's planned action, step 1's planned action + assert len(executor.action_history) == complete_step_n + # Check output files run_dirs = os.listdir(temp_output_dir) assert len(run_dirs) == 1 run_dir_path = os.path.join(temp_output_dir, run_dirs[0]) assert os.path.exists(os.path.join(run_dir_path, "step_1_state_raw.png")) + assert os.path.exists(os.path.join(run_dir_path, "step_2_state_raw.png")) assert os.path.exists(os.path.join(run_dir_path, "final_state.png")) - assert mock_box_drawer.call_count == complete_step_index + 1 - assert mock_highlighter.call_count == complete_step_index + assert os.path.exists(os.path.join(run_dir_path, "run_metrics.json")) + assert os.path.exists(os.path.join(run_dir_path, "run_log.jsonl")) + assert mock_box_drawer.call_count == complete_step_n + assert mock_highlighter.call_count == complete_step_n def test_run_reaches_max_steps( @@ -217,15 +306,13 @@ def test_run_reaches_max_steps( mock_box_drawer: MagicMock, mock_highlighter: MagicMock, temp_output_dir: str, - mocker, # Add mocker fixture for consistency, patch take_screenshot here too + mocker, ): """Test reaching max_steps without completing the goal.""" - # --- Add Mock for take_screenshot to avoid $DISPLAY error in CI --- - mock_final_image = Image.new("RGB", (50, 50), color="blue") # Dummy image + mock_final_image = Image.new("RGB", (50, 50), color="blue") mocker.patch.object( agent_executor, "take_screenshot", return_value=mock_final_image ) - # --- End Mock --- max_steps = 3 executor = AgentExecutor( @@ -241,12 +328,15 @@ def test_run_reaches_max_steps( ) assert result is False, "Should return False when max steps reached." + # Perception called for each step assert mock_perception_component.update_call_count == max_steps + # Execution called for each step assert len(mock_execution_component.calls) == max_steps assert len(executor.action_history) == max_steps + # Visualizers called for each step assert mock_box_drawer.call_count == max_steps assert mock_highlighter.call_count == max_steps - # Also check final state image existence here + # Check final state image existence run_dirs = os.listdir(temp_output_dir) assert len(run_dirs) == 1 run_dir_path = os.path.join(temp_output_dir, run_dirs[0]) @@ -257,20 +347,19 @@ def test_run_perception_failure( mock_perception_component: MockPerception, mock_execution_component: MockExecution, temp_output_dir: str, - mocker, # Add mocker fixture + mocker, ): - """Test that the loop stops if perception fails on the second step.""" - # --- Add Mock for take_screenshot to avoid $DISPLAY error in CI --- - mock_final_image = Image.new("RGB", (50, 50), color="red") # Dummy image + """Test that the loop stops if perception fails (e.g., on the second step).""" + mock_final_image = Image.new("RGB", (50, 50), color="red") mocker.patch.object( agent_executor, "take_screenshot", return_value=mock_final_image ) - # --- End Mock --- - mock_perception_component.fail_on_update = True # Configure mock to fail + # Configure mock to fail on the second update call + mock_perception_component.fail_on_update = True executor = AgentExecutor( perception=mock_perception_component, - planner=planner_never_completes(), + planner=planner_never_completes(), # Planner that would normally continue execution=mock_execution_component, ) @@ -278,13 +367,13 @@ def test_run_perception_failure( goal="Test Perception Fail", max_steps=5, output_base_dir=temp_output_dir ) - assert result is False - assert ( - mock_perception_component.update_call_count == 1 - ) # First call ok, fails during second - assert len(mock_execution_component.calls) == 1 # Only first step executed - assert len(executor.action_history) == 1 - # Check final state image existence + assert result is False # Run fails + # Update called twice: first succeeds, second raises exception + assert mock_perception_component.update_call_count == 2 + # Execution only happens for the first step (step 0) + assert len(mock_execution_component.calls) == 1 + assert len(executor.action_history) == 1 # Only history for step 1 planned + # Check final state image existence (should still be saved) run_dirs = os.listdir(temp_output_dir) assert len(run_dirs) == 1 run_dir_path = os.path.join(temp_output_dir, run_dirs[0]) @@ -295,19 +384,17 @@ def test_run_planning_failure( mock_perception_component: MockPerception, mock_execution_component: MockExecution, temp_output_dir: str, - mocker, # Add mocker fixture + mocker, ): """Test that the loop stops if planning fails.""" - # --- Add Mock for take_screenshot to avoid $DISPLAY error in CI --- - mock_final_image = Image.new("RGB", (50, 50), color="yellow") # Dummy image + mock_final_image = Image.new("RGB", (50, 50), color="yellow") mocker.patch.object( agent_executor, "take_screenshot", return_value=mock_final_image ) - # --- End Mock --- executor = AgentExecutor( perception=mock_perception_component, - planner=planner_fails(), + planner=planner_fails(), # Use the planner that raises an exception execution=mock_execution_component, ) @@ -315,11 +402,13 @@ def test_run_planning_failure( goal="Test Planning Fail", max_steps=5, output_base_dir=temp_output_dir ) - assert result is False - assert ( - mock_perception_component.update_call_count == 1 - ) # Perception called once before planning - assert len(mock_execution_component.calls) == 0 # Execution never reached + assert result is False # Run fails + # Perception called once before planning fails + assert mock_perception_component.update_call_count == 1 + # Execution never reached + assert len(mock_execution_component.calls) == 0 + # Action history not updated as planning fails before history update + assert len(executor.action_history) == 0 # Check final state image existence run_dirs = os.listdir(temp_output_dir) assert len(run_dirs) == 1 @@ -331,20 +420,19 @@ def test_run_execution_failure( mock_perception_component: MockPerception, mock_execution_component: MockExecution, temp_output_dir: str, - mocker, # Add mocker fixture + mocker, ): """Test that the loop stops if execution fails.""" - # --- Add Mock for take_screenshot to avoid $DISPLAY error in CI --- - mock_final_image = Image.new("RGB", (50, 50), color="purple") # Dummy image + mock_final_image = Image.new("RGB", (50, 50), color="purple") mocker.patch.object( agent_executor, "take_screenshot", return_value=mock_final_image ) - # --- End Mock --- - mock_execution_component.fail_on_action = "click" # Make the click action fail + # Configure execution mock to fail on 'click' + mock_execution_component.fail_on_action = "click" executor = AgentExecutor( perception=mock_perception_component, - planner=planner_never_completes(), # Planner plans 'click' first + planner=planner_never_completes(), # Planner plans 'click' on step 0 execution=mock_execution_component, ) @@ -352,12 +440,14 @@ def test_run_execution_failure( goal="Test Execution Fail", max_steps=5, output_base_dir=temp_output_dir ) - assert result is False + assert result is False # Run fails + # Perception called once for the first step assert mock_perception_component.update_call_count == 1 - assert len(mock_execution_component.calls) == 1 # Execution was attempted - assert executor.action_history[0].startswith( - "Step 1: Planned click" - ) # History includes planned action + # Execution was attempted once (the click that failed) + assert len(mock_execution_component.calls) == 1 + # History includes the planned action before execution failed + assert len(executor.action_history) == 1 + assert executor.action_history[0].startswith("Step 1: Planned click") # Check final state image existence run_dirs = os.listdir(temp_output_dir) assert len(run_dirs) == 1 @@ -375,14 +465,15 @@ def test_coordinate_scaling_for_click( scaling_factor: int, ): """Verify coordinate scaling is applied before calling execution.click.""" - # --- Add Mock for take_screenshot to avoid $DISPLAY error in CI --- - # (Not strictly necessary here as loop only runs 1 step, but good practice) - mock_final_image = Image.new("RGB", (50, 50), color="orange") # Dummy image + mock_final_image = Image.new("RGB", (50, 50), color="orange") mocker.patch.object( agent_executor, "take_screenshot", return_value=mock_final_image ) - # --- End Mock --- + mocker.patch.object( + agent_executor, "get_scaling_factor", return_value=scaling_factor + ) + # Use MagicMock directly for the planner in this test planner_click = MagicMock( return_value=( LLMActionPlan( @@ -391,38 +482,43 @@ def test_coordinate_scaling_for_click( element_id=mock_element.id, is_goal_complete=False, ), - mock_element, + mock_element, # Return the mock element as the target ) ) - # Patch get_scaling_factor within the agent_executor module - mocker.patch.object( - agent_executor, "get_scaling_factor", return_value=scaling_factor - ) executor = AgentExecutor( perception=mock_perception_component, - planner=planner_click, + planner=planner_click, # Use MagicMock planner execution=mock_execution_component, ) executor.run(goal="Test Scaling", max_steps=1, output_base_dir=temp_output_dir) - # Dims: W=200, H=100 - # Bounds: x=0.1, y=0.1, w=0.2, h=0.1 - # Center physical x = (0.1 + 0.2 / 2) * 200 = 40 - # Center physical y = (0.1 + 0.1 / 2) * 100 = 15 + # Verify planner was called correctly (including the new tracking_info arg) + planner_click.assert_called_once() + call_args, call_kwargs = planner_click.call_args + assert call_kwargs["tracking_info"] == [] # MockPerception returns empty list + + # Verify execution call + # MockPerception dims: W=200, H=100 + # MockElement bounds: x=0.1, y=0.1, w=0.2, h=0.1 + # Center physical x = (0.1 + 0.2 / 2) * 200 = 0.2 * 200 = 40 + # Center physical y = (0.1 + 0.1 / 2) * 100 = 0.15 * 100 = 15 expected_logical_x = int(40 / scaling_factor) expected_logical_y = int(15 / scaling_factor) - assert len(mock_execution_component.calls) == 1 + assert len(mock_execution_component.calls) == 1, ( + "Execution component should have been called once" + ) assert mock_execution_component.calls[0] == ( "click", expected_logical_x, expected_logical_y, "single", - ) + ), f"Click coordinates incorrect for scaling factor {scaling_factor}" + # Check final state image existence run_dirs = os.listdir(temp_output_dir) - assert len(run_dirs) == 1 + assert len(run_dirs) == 1, "Expected one run directory" run_dir_path = os.path.join(temp_output_dir, run_dirs[0]) assert os.path.exists(os.path.join(run_dir_path, "final_state.png")) diff --git a/tests/test_core.py b/tests/test_core.py index 36f267d..54833ae 100644 --- a/tests/test_core.py +++ b/tests/test_core.py @@ -1,9 +1,21 @@ # tests/test_core.py import pytest -# Assuming imports work based on installation/path -from omnimcp.core import plan_action_for_ui, LLMActionPlan -from omnimcp.types import UIElement, Bounds +# Import types from the project +from omnimcp.types import ( + UIElement, + Bounds, + LLMActionPlan, + ElementTrack, + ScreenAnalysis, + ActionDecision, + LLMAnalysisAndDecision, # Added new types +) + +# Import the function to test +from omnimcp.core import plan_action_for_ui + +# Assuming pytest-mock (mocker fixture) is available # --- Fixture for Sample Elements --- @@ -11,121 +23,196 @@ @pytest.fixture def sample_elements() -> list[UIElement]: """Provides a sample list of UIElements similar to the login screen.""" - # Simplified bounds for brevity - bounds: Bounds = (0.1, 0.1, 0.2, 0.05) + # Use slightly more distinct bounds for testing + bounds_tf1: Bounds = (0.1, 0.1, 0.8, 0.05) + bounds_tf2: Bounds = (0.1, 0.2, 0.8, 0.05) # Below first field + bounds_cb: Bounds = (0.1, 0.3, 0.3, 0.05) + bounds_link: Bounds = (0.5, 0.3, 0.4, 0.05) + bounds_btn: Bounds = (0.4, 0.4, 0.2, 0.08) # Centered below + return [ UIElement( id=0, type="text_field", content="", - bounds=bounds, + bounds=bounds_tf1, attributes={"label": "Username:"}, + confidence=0.95, ), UIElement( id=1, type="text_field", content="", - bounds=bounds, + bounds=bounds_tf2, attributes={"is_password": True, "label": "Password:"}, + confidence=0.95, ), UIElement( id=2, type="checkbox", content="Remember Me", - bounds=bounds, + bounds=bounds_cb, attributes={"checked": False}, + confidence=0.90, + ), + UIElement( + id=3, + type="link", + content="Forgot Password?", + bounds=bounds_link, + confidence=0.92, + ), + UIElement( + id=4, type="button", content="Login", bounds=bounds_btn, confidence=0.98 ), - UIElement(id=3, type="link", content="Forgot Password?", bounds=bounds), - UIElement(id=4, type="button", content="Login", bounds=bounds), ] # --- Tests for plan_action_for_ui --- -# Use pytest-mock's 'mocker' fixture def test_plan_action_step1_type_user(mocker, sample_elements): """Test planning the first step: typing username.""" user_goal = "Log in as testuser with password pass" action_history = [] + step = 0 + tracking_info = None # No tracking info on first step # Mock the LLM API call within the core module mock_llm_api = mocker.patch("omnimcp.core.call_llm_api") - # Configure the mock to return a specific plan - mock_plan_step1 = LLMActionPlan( - reasoning="Need to type username first.", - action="type", - element_id=0, - text_to_type="testuser", + # --- Setup Mock Response --- + # Configure the mock to return the NEW structure (LLMAnalysisAndDecision) + mock_analysis = ScreenAnalysis( + reasoning="Goal is log in. History empty. Need username in field ID 0.", + new_elements=[ + f"track_{i}" for i in range(len(sample_elements)) + ], # Assume all new + critical_elements_status={"track_0": "Visible"}, + ) + mock_decision = ActionDecision( + analysis_reasoning="Typing username into field ID 0.", + action_type="type", + target_element_id=0, # Target the first text field (ID 0) + parameters={"text_to_type": "testuser"}, is_goal_complete=False, ) - mock_llm_api.return_value = mock_plan_step1 + mock_combined_output = LLMAnalysisAndDecision( + screen_analysis=mock_analysis, action_decision=mock_decision + ) + mock_llm_api.return_value = mock_combined_output + # --- End Mock Response Setup --- # Call the function under test + # plan_action_for_ui internally converts result back to LLMActionPlan for now llm_plan_result, target_element_result = plan_action_for_ui( - elements=sample_elements, user_goal=user_goal, action_history=action_history + elements=sample_elements, + user_goal=user_goal, + action_history=action_history, + step=step, + tracking_info=tracking_info, ) - # Assertions - mock_llm_api.assert_called_once() # Check API was called + # --- Assertions --- + mock_llm_api.assert_called_once() call_args, call_kwargs = mock_llm_api.call_args - # Check prompt content (basic check) + # Check arguments passed to call_llm_api messages = call_args[0] - assert user_goal in messages[0]["content"] + response_model_passed = call_args[1] assert ( - sample_elements[0].to_prompt_repr() in messages[0]["content"] - ) # Check element rendering - # assert "Previous Actions Taken:\n- None" in messages[0]['content'] # Check history rendering - # Check prompt content (basic check) - messages = call_args[0] - prompt_text = messages[0]["content"] # Get the rendered prompt text + response_model_passed is LLMAnalysisAndDecision + ) # Check correct model was expected + + # Check basic prompt content + prompt_text = messages[0]["content"] assert user_goal in prompt_text - assert sample_elements[0].to_prompt_repr() in prompt_text # Check element rendering - # Check history rendering more robustly - assert "**Previous Actions Taken:**" in prompt_text - assert "- None" in prompt_text # Check that '- None' appears when history is empty - # Check returned values - assert llm_plan_result == mock_plan_step1 - assert target_element_result is not None - assert target_element_result.id == 0 + assert sample_elements[0].to_prompt_repr() in prompt_text + assert "**Previous Actions Taken" in prompt_text + assert "**Tracked Elements Context" in prompt_text + assert ( + "(No tracking info available or first frame)" in prompt_text + ) # Check tracking section rendering + + # Check returned values (should be converted LLMActionPlan) + assert isinstance(llm_plan_result, LLMActionPlan) + assert llm_plan_result.action == "type" + assert llm_plan_result.element_id == 0 + assert llm_plan_result.text_to_type == "testuser" + assert llm_plan_result.key_info is None + assert llm_plan_result.is_goal_complete is False + assert "Typing username into field ID 0" in llm_plan_result.reasoning + assert target_element_result is sample_elements[0] # Check correct element returned def test_plan_action_step3_click_login(mocker, sample_elements): """Test planning the third step: clicking login and completing goal.""" user_goal = "Log in as testuser with password pass" - # Simulate state where fields are filled - sample_elements[0].content = "testuser" - sample_elements[1].content = "pass" # Content updated internally - action_history = ["Action: type 'testuser'...", "Action: type 'pass'..."] + # Simulate state where fields are filled (by updating content) + sample_elements[0].content = "testuser" # Username field filled + sample_elements[1].content = "********" # Password field filled (masked) + action_history = [ + "Step 1: Planned type on ElemID 0 Text='testuser'", + "Step 2: Planned type on ElemID 1 Text='********'", + ] + step = 2 # 3rd step (0-indexed) + # Simulate tracking info (assume all elements are persistent and visible) + mock_tracking_info = [ + ElementTrack(track_id=f"track_{el.id}", latest_element=el, last_seen_frame=step) + for el in sample_elements + ] # Mock the LLM API call mock_llm_api = mocker.patch("omnimcp.core.call_llm_api") - # Configure mock for step 3 response - mock_plan_step3 = LLMActionPlan( - reasoning="Fields filled, clicking Login.", - action="click", - element_id=4, - text_to_type=None, - is_goal_complete=True, # Goal completes on this step + # --- Setup Mock Response --- + mock_analysis_step3 = ScreenAnalysis( + reasoning="Username and password seem entered based on history. Login button (TrackID track_4) is visible. Ready to click.", + critical_elements_status={"track_4": "Visible"}, + # Assume no new/disappeared elements for simplicity in this mock + ) + mock_decision_step3 = ActionDecision( + analysis_reasoning="Clicking Login button to attempt login.", + action_type="click", + target_element_id=4, # Target the Login button (ID 4) + parameters={}, + is_goal_complete=True, # Assume LLM thinks goal completes after click + ) + mock_combined_output_step3 = LLMAnalysisAndDecision( + screen_analysis=mock_analysis_step3, action_decision=mock_decision_step3 ) - mock_llm_api.return_value = mock_plan_step3 + mock_llm_api.return_value = mock_combined_output_step3 + # --- End Mock Response Setup --- # Call the function llm_plan_result, target_element_result = plan_action_for_ui( - elements=sample_elements, user_goal=user_goal, action_history=action_history + elements=sample_elements, + user_goal=user_goal, + action_history=action_history, + step=step, + tracking_info=mock_tracking_info, # Pass the mock tracking info ) - # Assertions + # --- Assertions --- mock_llm_api.assert_called_once() call_args, call_kwargs = mock_llm_api.call_args messages = call_args[0] - # Check history rendering in prompt - assert action_history[0] in messages[0]["content"] - assert action_history[1] in messages[0]["content"] - # Check results + response_model_passed = call_args[1] + assert response_model_passed is LLMAnalysisAndDecision + + # Check history and tracking rendering in prompt + prompt_text = messages[0]["content"] + assert action_history[0] in prompt_text + assert action_history[1] in prompt_text + assert "**Tracked Elements Context" in prompt_text + assert "TrackID track_4" in prompt_text # Check a specific track mentioned + assert "Status: VISIBLE" in prompt_text # Check status rendering + + # Check results (converted LLMActionPlan) + assert isinstance(llm_plan_result, LLMActionPlan) assert llm_plan_result.is_goal_complete is True assert llm_plan_result.action == "click" - assert target_element_result is not None - assert target_element_result.id == 4 + assert llm_plan_result.element_id == 4 + assert llm_plan_result.text_to_type is None + assert llm_plan_result.key_info is None + assert "Clicking Login button" in llm_plan_result.reasoning + assert target_element_result is sample_elements[4] # Check correct element returned From 363fcc88f64617160fc5e1aa75b73214fcd89093 Mon Sep 17 00:00:00 2001 From: Richard Abrich Date: Mon, 7 Apr 2025 21:16:35 -0400 Subject: [PATCH 4/4] feat: Implement and integrate element tracking and structured planning Implements the SimpleElementTracker (Issue #8) and integrates it throughout the perception-planning-execution loop to provide temporal context. Refactors the planner (core.py) to utilize tracking context and output a structured ActionDecision (aligning with Issue #26), improving maintainability by using PydanticPrompt for schema generation instead of hardcoding in the prompt. Key Changes: Core Functionality: - Add SimpleElementTracker (tracking.py) with scipy-based matching (type, proximity, size). - Integrate tracker into VisualState.update (visual_state.py). - Refactor plan_action_for_ui (core.py) to accept tracking_info, update prompt template to use tracking and generated schemas, call LLM expecting LLMAnalysisAndDecision, and return ActionDecision directly. - Refactor AgentExecutor (agent_executor.py) to pass tracking_info to planner, handle ActionDecision return type, add wait/finish action handlers, and update existing handlers. Data Structures & Schemas (types.py): - Add ElementTrack, ScreenAnalysis, ActionDecision, LLMAnalysisAndDecision, LoggedStep Pydantic models. - Add PydanticPrompt decorator and docstrings to generate LLM schema documentation. Observability (agent_executor.py, utils.py, __init__.py): - Implement structured metrics collection saved to run_metrics.json. - Implement structured step logging (LoggedStep format) saved to run_log.jsonl. - Refactor setup_run_logging function into utils.py. Testing (tests/): - Add unit tests for SimpleElementTracker (test_tracking.py). - Update tests for core, agent_executor, visual_state to reflect refactoring and fix issues. All tests pass. Environment & Dependencies: - Update Python requirement to >=3.11 in pyproject.toml. - Add pydantic-prompt, scipy, numpy dependencies. - Update uv.lock. - Update CI workflow for Python 3.11/3.12. --- .github/workflows/ci.yml | 4 +- omnimcp/__init__.py | 56 ++-- omnimcp/agent_executor.py | 562 +++++++++++++++++++---------------- omnimcp/core.py | 121 +++----- omnimcp/types.py | 87 +++--- omnimcp/utils.py | 81 +++++ pyproject.toml | 3 +- tests/test_agent_executor.py | 200 ++++++------- tests/test_core.py | 150 ++++------ uv.lock | 184 +----------- 10 files changed, 668 insertions(+), 780 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index f7bd27d..026f4db 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -17,10 +17,10 @@ jobs: uses: actions/checkout@v4 # --- 2. Set up Python --- - - name: Set up Python 3.10 + - name: Set up Python 3.11 uses: actions/setup-python@v5 with: - python-version: '3.10' + python-version: '3.11' # --- 3. Install uv --- - name: Install uv diff --git a/omnimcp/__init__.py b/omnimcp/__init__.py index d5b28ba..ba422b2 100644 --- a/omnimcp/__init__.py +++ b/omnimcp/__init__.py @@ -1,45 +1,31 @@ +# omnimcp/__init__.py import sys -import os from loguru import logger -from omnimcp.config import config +# Import config first as it might be needed by others +from .config import config -# Remove default handler -logger.remove() - -# Add stderr handler (keep this functionality) -logger.add(sys.stderr, level=config.LOG_LEVEL.upper() if config.LOG_LEVEL else "INFO") - - -# Define a function to configure run-specific logging -def setup_run_logging(run_dir=None): - """ - Configure additional logging for a specific run. - - Args: - run_dir: Directory to store run-specific logs. If None, logs go to default logs directory. +# Now import the setup function from its new location +from .utils import setup_run_logging - Returns: - The log file path - """ - # Determine log file location - if run_dir: - os.makedirs(run_dir, exist_ok=True) - log_file_path = os.path.join(run_dir, "run.log") - else: - log_dir = config.LOG_DIR or "logs" - os.makedirs(log_dir, exist_ok=True) - log_file_path = os.path.join(log_dir, "run_{time:YYYY-MM-DD_HH-mm-ss}.log") +# Remove default handler added by loguru at import time +logger.remove() - # Add run-specific log handler +# --- Initial Setup --- +# Configure base logging (stderr + optional default file) +# This ensures logging works even if AgentExecutor isn't run immediately +if not config.DISABLE_DEFAULT_LOGGING: + setup_run_logging() # Call without run_dir to set up defaults +else: + # If default is disabled, still add stderr at least logger.add( - log_file_path, rotation="50 MB", level="DEBUG", encoding="utf8", enqueue=True + sys.stderr, level=config.LOG_LEVEL.upper() if config.LOG_LEVEL else "INFO" ) + logger.info("Default file logging disabled via config. Stderr logging enabled.") - logger.info(f"Run logging configured. Log path: {log_file_path}") - return log_file_path +logger.info(f"OmniMCP package initialized. Log level: {config.LOG_LEVEL.upper()}") - -# Set up default logging (for non-run use) -if not config.DISABLE_DEFAULT_LOGGING: - setup_run_logging() +# Optionally expose key classes/functions at the package level +# from .agent_executor import AgentExecutor +# from .visual_state import VisualState +# etc. diff --git a/omnimcp/agent_executor.py b/omnimcp/agent_executor.py index 42866eb..adb5105 100644 --- a/omnimcp/agent_executor.py +++ b/omnimcp/agent_executor.py @@ -7,28 +7,24 @@ import json from PIL import Image -from loguru import logger # Use loguru +from loguru import logger -from omnimcp import config, setup_run_logging +# Local imports using relative paths within the package +from . import config -# Import necessary types from omnimcp.types -from omnimcp.types import ( - LLMActionPlan, +from .types import ( UIElement, ElementTrack, LoggedStep, - ScreenAnalysis, - ActionDecision, # Placeholders for future use/logging + ActionDecision, ) - -# SimpleElementTracker is used within VisualState, not directly here -# from omnimcp.tracking import SimpleElementTracker -from omnimcp.utils import ( +from .utils import ( denormalize_coordinates, draw_action_highlight, draw_bounding_boxes, get_scaling_factor, - take_screenshot, # Keep for final screenshot + take_screenshot, + setup_run_logging, ) # --- Interface Definitions --- @@ -37,15 +33,13 @@ class PerceptionInterface(Protocol): """Defines the expected interface for the perception component.""" - elements: List[UIElement] # Current raw elements from parser - tracked_elements_view: List[ - ElementTrack - ] # Current tracked elements view from tracker + elements: List[UIElement] + tracked_elements_view: List[ElementTrack] screen_dimensions: Optional[Tuple[int, int]] _last_screenshot: Optional[Image.Image] - frame_counter: int # The current frame/step number managed by perception + frame_counter: int - def update(self) -> None: ... # Updates all state including tracked_elements_view + def update(self) -> None: ... class ExecutionInterface(Protocol): @@ -57,20 +51,17 @@ def execute_key_string(self, key_info_str: str) -> bool: ... def scroll(self, dx: int, dy: int) -> bool: ... -# Updated PlannerCallable signature to accept tracking info +# PlannerCallable expects ActionDecision as the primary return type now PlannerCallable = Callable[ [ # Inputs: - List[UIElement], # Current raw elements for context - str, # User goal - List[str], # Action history descriptions - int, # Current step number - Optional[List[ElementTrack]], # Tracking info (list of current tracks) + List[UIElement], + str, + List[str], + int, + Optional[List[ElementTrack]], ], # Outputs: - # Assume for now planner internally handles ActionDecision and converts back - # to this tuple for compatibility with existing handlers. - # This will change when core.py is fully reworked. - Tuple[LLMActionPlan, Optional[UIElement]], + Tuple[ActionDecision, Optional[UIElement]], ] ImageProcessorCallable = Callable[..., Image.Image] @@ -81,7 +72,7 @@ def scroll(self, dx: int, dy: int) -> bool: ... class AgentExecutor: """ Orchestrates the perceive-plan-act loop, integrating perception with tracking, - planning, execution, and structured logging. + planning (using ActionDecision), execution, and structured logging. """ def __init__( @@ -97,19 +88,22 @@ def __init__( self._planner = planner self._execution = execution self._box_drawer = box_drawer - self._highlighter = highlighter + self._highlighter = highlighter # Visualizer for planned action self.action_history: List[str] = [] + + # Map action names to their handler methods, including new actions self._action_handlers: Dict[str, Callable[..., bool]] = { "click": self._execute_click, "type": self._execute_type, "press_key": self._execute_press_key, "scroll": self._execute_scroll, - # TODO: Add handlers for 'finish', 'wait' if added to action space + "wait": self._execute_wait, + "finish": self._execute_finish, } # Initialize metrics and structured log storage self.metrics: Dict[str, List[Any]] = self._reset_metrics() - self.run_log_data: List[Dict] = [] - logger.info("AgentExecutor initialized.") + self.run_log_data: List[Dict] = [] # Stores LoggedStep data as dicts + logger.info("AgentExecutor initialized with updated action handlers.") def _reset_metrics(self) -> Dict[str, List[Any]]: """Helper to initialize/reset metrics dictionary for a run.""" @@ -119,32 +113,31 @@ def _reset_metrics(self) -> Dict[str, List[Any]]: "planning_times_s": [], "execution_times_s": [], "elements_per_step": [], - "active_tracks_per_step": [], # Added metric + "active_tracks_per_step": [], "action_results": [], # Boolean success/fail } - # --- Private Action Handlers --- - # These currently consume LLMActionPlan. They might need updates - # later if the planner starts returning ActionDecision directly to executor. + # --- Private Action Handlers (Updated to use ActionDecision) --- def _execute_click( self, - plan: LLMActionPlan, + decision: ActionDecision, target_element: Optional[UIElement], - screen_dims: Tuple[int, int], + screen_dims: Optional[Tuple[int, int]], scaling_factor: int, ) -> bool: - """Handles the 'click' action.""" + """Handles the 'click' action based on ActionDecision.""" if not target_element: + # The planner should have found the element if target_element_id was set. + # If it's None here, the planner failed to find the element ID specified in the decision. logger.error( - f"Click planned for element ID {plan.element_id} but element not found by planner." + f"Click planned for ElemID {decision.target_element_id} but element could not be resolved in current frame." ) return False if not screen_dims: logger.error("Cannot execute click without screen dimensions.") return False - # Denormalize using actual screen dimensions from perception abs_x, abs_y = denormalize_coordinates( target_element.bounds[0], target_element.bounds[1], @@ -155,24 +148,30 @@ def _execute_click( ) logical_x = int(abs_x / scaling_factor) logical_y = int(abs_y / scaling_factor) + click_type = decision.parameters.get( + "click_type", "single" + ) # Get optional param logger.debug( - f"Executing click at logical coords: ({logical_x}, {logical_y}) on Element ID {target_element.id}" + f"Executing {click_type} click at logical coords: ({logical_x}, {logical_y}) on Element ID {target_element.id}" ) - return self._execution.click(logical_x, logical_y, click_type="single") + return self._execution.click(logical_x, logical_y, click_type=click_type) def _execute_type( self, - plan: LLMActionPlan, + decision: ActionDecision, target_element: Optional[UIElement], - screen_dims: Tuple[int, int], + screen_dims: Optional[Tuple[int, int]], scaling_factor: int, ) -> bool: - """Handles the 'type' action.""" - if plan.text_to_type is None: - logger.error("Action 'type' planned but text_to_type is null.") + """Handles the 'type' action based on ActionDecision.""" + text_to_type = decision.parameters.get("text_to_type") + if text_to_type is None: # Check for None specifically, empty string is allowed + logger.error( + "Action 'type' planned but 'text_to_type' missing in parameters." + ) return False - # Optional: Click target element first if specified + # Optional: Click target element first if specified by target_element_id and found if target_element and screen_dims: abs_x, abs_y = denormalize_coordinates( target_element.bounds[0], @@ -191,93 +190,141 @@ def _execute_type( logger.warning( "Failed to click target before typing, attempting type anyway." ) - time.sleep(0.2) # Short pause after click + time.sleep(0.2) # Short pause after potential click - logger.debug(f"Executing type: '{plan.text_to_type[:50]}...'") - return self._execution.type_text(plan.text_to_type) + logger.debug(f"Executing type: '{text_to_type[:50]}...'") + return self._execution.type_text(text_to_type) def _execute_press_key( self, - plan: LLMActionPlan, + decision: ActionDecision, target_element: Optional[UIElement], - screen_dims: Tuple[int, int], + screen_dims: Optional[Tuple[int, int]], scaling_factor: int, ) -> bool: - """Handles the 'press_key' action.""" - if not plan.key_info: - logger.error("Action 'press_key' planned but key_info is null.") + """Handles the 'press_key' action based on ActionDecision.""" + key_info = decision.parameters.get("key_info") + if not key_info: + logger.error( + "Action 'press_key' planned but 'key_info' missing in parameters." + ) return False - logger.debug(f"Executing press_key: '{plan.key_info}'") - return self._execution.execute_key_string(plan.key_info) + logger.debug(f"Executing press_key: '{key_info}'") + return self._execution.execute_key_string(key_info) def _execute_scroll( self, - plan: LLMActionPlan, + decision: ActionDecision, target_element: Optional[UIElement], - screen_dims: Tuple[int, int], + screen_dims: Optional[Tuple[int, int]], scaling_factor: int, ) -> bool: - """Handles the 'scroll' action.""" - # Basic scroll logic based on reasoning hint (can be improved) - scroll_dir = plan.reasoning.lower() - scroll_amount_steps = 3 # Arbitrary amount - scroll_dy = ( - -scroll_amount_steps - if "down" in scroll_dir - else scroll_amount_steps - if "up" in scroll_dir - else 0 - ) - scroll_dx = ( - -scroll_amount_steps - if "left" in scroll_dir - else scroll_amount_steps - if "right" in scroll_dir - else 0 - ) + """Handles the 'scroll' action based on ActionDecision.""" + # Attempt to get scroll details from parameters first + dx = decision.parameters.get("scroll_dx", 0) + dy = decision.parameters.get("scroll_dy", 0) + scroll_dir = decision.parameters.get("scroll_direction", "").lower() + scroll_steps = decision.parameters.get("scroll_steps", 3) + + # Fallback to reasoning hint if parameters are missing + if dx == 0 and dy == 0 and not scroll_dir: + scroll_dir_reasoning = decision.analysis_reasoning.lower() + if "down" in scroll_dir_reasoning: + scroll_dy = -scroll_steps + elif "up" in scroll_dir_reasoning: + scroll_dy = scroll_steps + if "left" in scroll_dir_reasoning: + scroll_dx = -scroll_steps + elif "right" in scroll_dir_reasoning: + scroll_dx = scroll_steps + elif scroll_dir: # Handle direction string if provided + if "down" in scroll_dir: + scroll_dy = -scroll_steps + elif "up" in scroll_dir: + scroll_dy = scroll_steps + if "left" in scroll_dir: + scroll_dx = -scroll_steps + elif "right" in scroll_dir: + scroll_dx = scroll_steps if scroll_dx != 0 or scroll_dy != 0: logger.debug(f"Executing scroll: dx={scroll_dx}, dy={scroll_dy}") return self._execution.scroll(scroll_dx, scroll_dy) else: logger.warning( - "Scroll planned but direction unclear from reasoning, skipping scroll." + "Scroll planned but direction/amount unclear, skipping scroll." ) return True # No action needed counts as success + def _execute_wait( + self, + decision: ActionDecision, + target_element: Optional[UIElement], + screen_dims: Optional[Tuple[int, int]], + scaling_factor: int, + ) -> bool: + """Handles the 'wait' action.""" + wait_duration = decision.parameters.get("wait_duration_s", 1.0) # Default 1s + try: + wait_duration = float(wait_duration) + if wait_duration < 0: + wait_duration = 0 + except (ValueError, TypeError): + logger.warning( + f"Invalid wait_duration '{wait_duration}', defaulting to 1.0s." + ) + wait_duration = 1.0 + # Define a reasonable maximum wait to prevent infinite loops + max_wait = 30.0 + wait_duration = min(wait_duration, max_wait) + logger.info(f"Executing wait for {wait_duration:.1f} seconds...") + time.sleep(wait_duration) + return True + + def _execute_finish( + self, + decision: ActionDecision, + target_element: Optional[UIElement], + screen_dims: Optional[Tuple[int, int]], + scaling_factor: int, + ) -> bool: + """Handles the 'finish' action (no-op, loop breaks).""" + logger.info( + "Executing finish action planned by LLM (indicates goal met or stuck)." + ) + # The main loop checks the is_goal_complete flag from the decision. + return True # The action itself succeeds trivially + + # --- Main Execution Loop --- + + # This `run` method implements an explicit, sequential perceive-plan-act loop. + # Alternative agent architectures exist, such as: + # - ReAct (Reasoning-Acting): Where the LLM explicitly decides between + # reasoning steps and action steps. + # - Callback-driven: Where UI events or timers might trigger agent actions. + # - More complex state machines or graph-based execution flows. + # This simple sequential loop provides a clear baseline. Future work might explore + # these alternatives for more complex or reactive tasks. def run( self, goal: str, max_steps: int = 10, output_base_dir: Optional[str] = None ) -> bool: """Runs the main perceive-plan-act loop to achieve the goal.""" - # --- Setup --- - if output_base_dir is None: - output_base_dir = config.RUN_OUTPUT_DIR - run_timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S") - run_output_dir = os.path.join(output_base_dir, run_timestamp) - try: - os.makedirs(run_output_dir, exist_ok=True) - log_path = setup_run_logging(run_output_dir) - except Exception as setup_e: - logger.error(f"Failed during run setup (directory/logging): {setup_e}") - return False - logger.info(f"Starting agent run. Goal: '{goal}'") - logger.info(f"Saving outputs to: {run_output_dir}") - logger.info(f"Run log file: {log_path}") + run_output_dir, log_path = self._setup_run(goal, output_base_dir) + if not run_output_dir: + return False # Exit if setup failed self.action_history = [] self.metrics = self._reset_metrics() self.run_log_data = [] goal_achieved = False - final_step_success = True + final_step_success = True # Tracks if any step failed critically last_step_completed = -1 - # --- End Setup --- - + scaling_factor = 1 try: scaling_factor = get_scaling_factor() logger.info(f"Using display scaling factor: {scaling_factor}") except Exception as e: logger.error(f"Failed to get scaling factor: {e}. Assuming 1.") - scaling_factor = 1 # --- Main Loop --- for step in range(max_steps): @@ -285,33 +332,31 @@ def run( logger.info(f"\n--- Step {step + 1}/{max_steps} ---") step_img_prefix = f"step_{step + 1}" - # --- Initialize Step Variables --- + # Initialize Step Variables current_image: Optional[Image.Image] = None current_elements: List[UIElement] = [] tracked_elements_view: List[ElementTrack] = [] screen_dimensions: Optional[Tuple[int, int]] = None tracking_info_for_log: Optional[List[Dict]] = None perception_duration = 0.0 - llm_plan: Optional[LLMActionPlan] = None # Assumed output for now - llm_analysis_log: Optional[Dict] = None # Placeholder - llm_decision_log: Optional[Dict] = None # Placeholder + action_decision: Optional[ActionDecision] = None # Use ActionDecision type + analysis_log: Optional[Dict] = None # For logging analysis part + decision_log: Optional[Dict] = None # For logging decision part target_element: Optional[UIElement] = None planning_duration = 0.0 - action_success = False + action_success = False # Result of the current step's action executed_action_type = "none" executed_params: Dict[str, Any] = {} executed_target_id: Optional[int] = None execution_duration = 0.0 step_screenshot_path: Optional[str] = None - # --- End Initialize Step Variables --- - # 1. Perceive State (including Tracking) + # 1. Perceive State & Update Tracking perception_start_time = time.time() try: logger.debug("Updating visual state and tracking...") - self._perception.update() # This now internally calls the tracker + self._perception.update() # Assumes this updates internal tracker - # Retrieve results from the perception interface current_elements = self._perception.elements tracked_elements_view = self._perception.tracked_elements_view current_image = self._perception._last_screenshot @@ -320,7 +365,7 @@ def run( if not current_image or not screen_dimensions: raise RuntimeError( - "Failed to get valid screenshot or dimensions during perception." + "Perception failed: Missing image or dimensions." ) logger.info( @@ -328,28 +373,29 @@ def run( f"{len(tracked_elements_view)} active tracks. " f"Time: {perception_duration:.2f}s." ) - # Prepare tracking info for structured logging - tracking_info_for_log = [ - t.model_dump(mode="json") for t in tracked_elements_view - ] + try: + tracking_info_for_log = [ + t.model_dump(mode="json") for t in tracked_elements_view + ] + except Exception as track_dump_err: + logger.warning( + f"Could not serialize tracking info for log: {track_dump_err}" + ) + tracking_info_for_log = [{"error": "serialization failed"}] except Exception as perceive_e: logger.error(f"Perception failed: {perceive_e}", exc_info=True) final_step_success = False - # Log partial metrics - self.metrics["perception_times_s"].append( - round(time.time() - perception_start_time, 3) - ) + perc_time = round(time.time() - perception_start_time, 3) + self.metrics["perception_times_s"].append(perc_time) self.metrics["elements_per_step"].append(0) self.metrics["active_tracks_per_step"].append(0) - # Attempt to log step failure before breaking step_duration = time.time() - step_start_time self._log_step_data( step, goal, - step_screenshot_path, - current_elements, - tracking_info_for_log, + None, + [], None, None, None, @@ -357,14 +403,13 @@ def run( None, {}, False, - perception_duration, + perc_time, 0.0, 0.0, step_duration, ) - break + break # Stop run - # Log perception metrics on success self.metrics["perception_times_s"].append(round(perception_duration, 3)) self.metrics["elements_per_step"].append(len(current_elements)) self.metrics["active_tracks_per_step"].append(len(tracked_elements_view)) @@ -381,9 +426,7 @@ def run( try: if current_image: current_image.save(raw_state_path) - logger.debug(f"Saved raw state image to {raw_state_path}") if self._box_drawer: - # Draw boxes on raw elements for current frame visualization parsed_state_path = os.path.join( run_output_dir, f"{step_img_prefix}_state_parsed.png" ) @@ -409,41 +452,41 @@ def run( planning_start_time = time.time() try: logger.debug("Planning next action...") - # Pass the tracked elements view to the planner - llm_plan, target_element = self._planner( - elements=current_elements, # Raw elements for context + # Planner now returns ActionDecision and target element + action_decision, target_element = self._planner( + elements=current_elements, user_goal=goal, action_history=self.action_history, step=step, - tracking_info=tracked_elements_view, # Pass tracked view + tracking_info=tracked_elements_view, ) planning_duration = time.time() - planning_start_time logger.info(f"Planning completed in {planning_duration:.2f}s.") - if llm_plan: - # Log details from the plan - logger.info(f"LLM Reasoning: {llm_plan.reasoning}") - logger.info( - f"LLM Plan: Action={llm_plan.action}, TargetID={llm_plan.element_id}, " - f"GoalComplete={llm_plan.is_goal_complete}" - ) - # Set execution details based on plan - executed_action_type = llm_plan.action - executed_target_id = llm_plan.element_id - executed_params = {} - if llm_plan.text_to_type is not None: - executed_params["text_to_type"] = llm_plan.text_to_type - if llm_plan.key_info is not None: - executed_params["key_info"] = llm_plan.key_info - else: - raise ValueError("Planner returned None for LLMActionPlan") + if not action_decision: + raise ValueError("Planner returned None for ActionDecision") + + # Log details & set execution vars from ActionDecision + logger.info( + f"LLM Decision: Action={action_decision.action_type}, TargetElemID={action_decision.target_element_id}, Params={action_decision.parameters}, GoalComplete={action_decision.is_goal_complete}" + ) + logger.info( + f"LLM Analysis Reasoning: {action_decision.analysis_reasoning}" + ) + executed_action_type = action_decision.action_type + executed_target_id = ( + action_decision.target_element_id + ) # Current frame ID + executed_params = action_decision.parameters or {} + # Store dict representation for logging + decision_log = action_decision.model_dump(mode="json") + # analysis_log would come from the ScreenAnalysis part if returned separately by planner except Exception as plan_e: logger.error(f"Planning failed: {plan_e}", exc_info=True) final_step_success = False - self.metrics["planning_times_s"].append( - round(time.time() - planning_start_time, 3) - ) + plan_time = round(time.time() - planning_start_time, 3) + self.metrics["planning_times_s"].append(plan_time) step_duration = time.time() - step_start_time self._log_step_data( step, @@ -453,13 +496,12 @@ def run( tracking_info_for_log, None, None, - None, "planning_error", None, {}, False, perception_duration, - planning_duration, + plan_time, 0.0, step_duration, ) @@ -467,66 +509,64 @@ def run( self.metrics["planning_times_s"].append(round(planning_duration, 3)) - # 4. Check Goal Completion - if llm_plan.is_goal_complete: + # 4. Check Goal Completion (use ActionDecision) + if action_decision.is_goal_complete: logger.success("LLM determined the goal is achieved!") goal_achieved = True - # Log step data before potential break - # 5. Validate Action Requirements + # 5. Validate Action Requirements (use ActionDecision) if ( - llm_plan.action == "click" + not goal_achieved + and action_decision.action_type == "click" and target_element is None - and not goal_achieved ): logger.error( - f"Action 'click' planned for element ID {llm_plan.element_id}, but element not found. Stopping." + f"Action 'click' planned for element ID {action_decision.target_element_id}, but planner did not find element. Stopping." ) final_step_success = False - # Log step data before potential break - - # 6. Visualize Planned Action - if self._highlighter and current_image and llm_plan: - highlight_img_path = os.path.join( - run_output_dir, f"{step_img_prefix}_action_highlight.png" - ) - try: - # Target element might be None if action doesn't require it - highlighted_image = self._highlighter( - current_image, - element=target_element, - plan=llm_plan, - color="red", - width=3, - ) - highlighted_image.save(highlight_img_path) - except Exception as draw_highlight_e: - logger.warning( - f"Could not save action visualization image: {draw_highlight_e}" - ) - - # 7. Update Action History (Append before execution) - action_desc = f"Step {step + 1}: Planned {llm_plan.action}" + # Log step data before breaking loop + + # 6. Visualize Planned Action (TODO: Needs update for ActionDecision) + if self._highlighter and current_image and action_decision: + # highlight_img_path = os.path.join( + # run_output_dir, f"{step_img_prefix}_action_highlight.png" + # ) + # try: + # # Needs draw_action_highlight updated to accept ActionDecision + # # highlighted_image = self._highlighter( + # # current_image, element=target_element, plan=action_decision, color="red", width=3 + # # ) + # # highlighted_image.save(highlight_img_path) + # logger.debug("Skipping action highlight visualization until updated for ActionDecision.") + # except Exception as draw_highlight_e: + # logger.warning(f"Could not save action visualization image: {draw_highlight_e}") + pass # Skip highlighting for now + + # 7. Update Action History (use ActionDecision) + action_desc = f"Step {step + 1}: Planned {action_decision.action_type}" if target_element: - action_desc += f" on ElemID {target_element.id}" + action_desc += f" on ElemID {target_element.id} ('{target_element.content[:20]}...')" + elif executed_target_id is not None: + action_desc += f" on ElemID {executed_target_id} (not found)" if "text_to_type" in executed_params: action_desc += f" Text='{executed_params['text_to_type'][:20]}...'" if "key_info" in executed_params: action_desc += f" Key='{executed_params['key_info']}'" + if "wait_duration_s" in executed_params: + action_desc += f" Wait={executed_params['wait_duration_s']}s" self.action_history.append(action_desc) logger.debug(f"Added to history: {action_desc}") - # 8. Execute Action + # 8. Execute Action (if needed and possible) execution_start_time = time.time() - if ( - not goal_achieved and final_step_success - ): # Only execute if needed and possible + if not goal_achieved and final_step_success: logger.info(f"Executing action: {executed_action_type}...") try: handler = self._action_handlers.get(executed_action_type) if handler: + # Pass ActionDecision to handlers action_success = handler( - plan=llm_plan, + decision=action_decision, target_element=target_element, screen_dims=screen_dimensions, scaling_factor=scaling_factor, @@ -541,7 +581,7 @@ def run( logger.error( f"Action '{executed_action_type}' execution failed." ) - final_step_success = False + final_step_success = False # Mark run as failed else: logger.success("Action executed successfully.") @@ -550,30 +590,25 @@ def run( f"Exception during action execution: {exec_e}", exc_info=True ) action_success = False - final_step_success = False + final_step_success = False # Mark run as failed else: - # Goal already met or prior failure, skip execution action_success = True # Treat skipped step as 'successful' non-action logger.info(f"Skipping execution for step {step + 1}.") - execution_duration = time.time() - execution_start_time - # --- Log Execution Metrics and Action Result --- + # --- Log Metrics & Step Data --- self.metrics["execution_times_s"].append(round(execution_duration, 3)) self.metrics["action_results"].append(action_success) - - # --- Log Step Data to Protocol --- step_duration = time.time() - step_start_time self.metrics["step_times_s"].append(round(step_duration, 3)) - self._log_step_data( + self._log_step_data( # Log full step data step, goal, step_screenshot_path, current_elements, tracking_info_for_log, - llm_analysis_log, - llm_decision_log, - llm_plan, + analysis_log, + decision_log, # Pass logs executed_action_type, executed_target_id, executed_params, @@ -584,23 +619,25 @@ def run( step_duration, ) - # Check if run should terminate based on this step's outcome + # --- Check Termination Conditions --- if goal_achieved or not final_step_success: last_step_completed = step - break + logger.info( + f"Run ending at step {step + 1} (Goal achieved: {goal_achieved}, Step Success: {final_step_success})" + ) + break # Exit the loop - # Mark step completed if loop continues - last_step_completed = step + last_step_completed = step # Mark step completed if loop continues # Wait for UI to settle - time.sleep(1.0) # Make configurable or dynamic later + time.sleep(1.0) # Make configurable later # --- End of Loop --- logger.info("\n--- Agent Run Finished ---") if goal_achieved: logger.success("Overall goal marked as achieved.") elif not final_step_success: - logger.error(f"Run failed at Step {last_step_completed + 1}.") + logger.error(f"Run failed critically at Step {last_step_completed + 1}.") else: logger.warning( f"Run finished after {max_steps} steps without achieving goal." @@ -617,12 +654,36 @@ def run( final_image = take_screenshot() if final_image: final_image.save(final_state_img_path) - logger.info(f"Saved final screen state to {final_state_img_path}") + logger.info(f"Saved final screen state to {final_state_img_path}") except Exception as save_final_e: logger.warning(f"Could not save final state image: {save_final_e}") return goal_achieved + def _setup_run( + self, goal: str, output_base_dir: Optional[str] + ) -> Tuple[Optional[str], Optional[str]]: + """Sets up directories and logging for a new run.""" + if output_base_dir is None: + output_base_dir = config.RUN_OUTPUT_DIR + run_timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S") + run_output_dir = os.path.join(output_base_dir, run_timestamp) + log_path = None # Initialize + try: + os.makedirs(run_output_dir, exist_ok=True) + # Call the utility function to configure run-specific logging + log_path = setup_run_logging( + run_dir=run_output_dir + ) # Pass the specific dir + logger.info(f"Starting agent run. Goal: '{goal}'") + logger.info(f"Saving outputs to: {run_output_dir}") + if log_path: + logger.info(f"Run log file: {log_path}") + return run_output_dir, log_path + except Exception as setup_e: + logger.critical(f"Failed during run setup (directory/logging): {setup_e}") + return None, None # Return None tuple on failure + def _log_step_data( self, step_index, @@ -630,9 +691,8 @@ def _log_step_data( screenshot_path, elements, tracking_context, - analysis, - decision, - raw_plan, + analysis_log, + decision_log, # Expect dicts exec_action, exec_target_id, exec_params, @@ -642,35 +702,20 @@ def _log_step_data( exec_time, step_time, ): - """Helper to create and store the structured log entry for a step.""" + """Helper to create and store the structured log entry using LoggedStep.""" try: - # Convert Pydantic models to dicts for logging if they are not None - analysis_dict = ( - analysis.model_dump(mode="json") - if isinstance(analysis, (ScreenAnalysis)) - else analysis - ) - decision_dict = ( - decision.model_dump(mode="json") - if isinstance(decision, (ActionDecision)) - else decision - ) - raw_plan_dict = ( - raw_plan.model_dump(mode="json") - if isinstance(raw_plan, (LLMActionPlan)) - else raw_plan - ) - + # We pass decision_log which is already a dict from model_dump + # analysis_log is currently None, raw_llm_action_plan is None step_log_entry = LoggedStep( step_index=step_index, goal=goal, screenshot_path=screenshot_path, input_elements_count=len(elements), - tracking_context=tracking_context, # Assumes already list of dicts + tracking_context=tracking_context, action_history_at_step=list(self.action_history), # Use current history - llm_analysis=analysis_dict, - llm_decision=decision_dict, - raw_llm_action_plan=raw_plan_dict, + llm_analysis=analysis_log, # Placeholder for now + llm_decision=decision_log, # Log the ActionDecision dict + raw_llm_action_plan=None, # No longer using this format executed_action=exec_action, executed_target_element_id=exec_target_id, executed_parameters=exec_params, @@ -680,10 +725,12 @@ def _log_step_data( execution_time_s=round(exec_time, 3), step_time_s=round(step_time, 3), ) + # Append the dictionary representation to the list self.run_log_data.append(step_log_entry.model_dump(mode="json")) except Exception as log_err: logger.warning( - f"Failed to create or store structured log for step {step_index + 1}: {log_err}" + f"Failed to create/store structured log for step {step_index + 1}: {log_err}", + exc_info=True, ) def _save_run_outputs( @@ -693,37 +740,42 @@ def _save_run_outputs( # Save Metrics metrics_path = os.path.join(run_output_dir, "run_metrics.json") try: - # Calculate summary stats (handle empty lists) + # Calculate summary stats safely handling potential empty lists + metrics_details = self.metrics valid_step_times = [ - t for t in self.metrics["step_times_s"] if isinstance(t, (int, float)) + t + for t in metrics_details["step_times_s"] + if isinstance(t, (int, float)) ] valid_perc_times = [ t - for t in self.metrics["perception_times_s"] + for t in metrics_details["perception_times_s"] if isinstance(t, (int, float)) ] valid_plan_times = [ t - for t in self.metrics["planning_times_s"] + for t in metrics_details["planning_times_s"] if isinstance(t, (int, float)) ] valid_exec_times = [ t - for t in self.metrics["execution_times_s"] + for t in metrics_details["execution_times_s"] if isinstance(t, (int, float)) ] valid_elem_counts = [ - c for c in self.metrics["elements_per_step"] if isinstance(c, int) + c for c in metrics_details["elements_per_step"] if isinstance(c, int) ] valid_track_counts = [ - c for c in self.metrics["active_tracks_per_step"] if isinstance(c, int) + c + for c in metrics_details["active_tracks_per_step"] + if isinstance(c, int) ] summary_metrics = { - "total_steps_attempted": len(self.metrics["step_times_s"]), - "last_step_completed": last_step_completed + 1, + "total_steps_attempted": len(metrics_details["step_times_s"]), + "last_step_completed": last_step_completed + 1, # 1-based index "goal_achieved": goal_achieved, - "final_step_success": final_step_success, + "final_step_success": final_step_success, # Did any step fail critically? "avg_step_time_s": round( sum(valid_step_times) / len(valid_step_times), 3 ) @@ -755,29 +807,31 @@ def _save_run_outputs( if valid_track_counts else 0, "successful_actions": sum( - 1 for r in self.metrics["action_results"] if r is True + 1 for r in metrics_details["action_results"] if r is True ), "failed_actions": sum( - 1 for r in self.metrics["action_results"] if r is False + 1 for r in metrics_details["action_results"] if r is False ), } - full_metrics_data = {"summary": summary_metrics, "details": self.metrics} + full_metrics_data = {"summary": summary_metrics, "details": metrics_details} with open(metrics_path, "w") as f: json.dump(full_metrics_data, f, indent=4) logger.info(f"Saved run metrics to {metrics_path}") - logger.info(f"Metrics Summary: {summary_metrics}") + logger.info(f"Metrics Summary: {json.dumps(summary_metrics)}") except Exception as metrics_e: - logger.warning(f"Could not save or summarize metrics: {metrics_e}") + logger.warning( + f"Could not save or summarize metrics: {metrics_e}", exc_info=True + ) - # Save Structured Log Data + # Save Structured Log Data (JSON Lines format) log_protocol_path = os.path.join(run_output_dir, "run_log.jsonl") try: with open(log_protocol_path, "w") as f: for step_data_dict in self.run_log_data: - # Ensure complex objects within are serializable; model_dump helps - f.write( - json.dumps(step_data_dict, default=str) + "\n" - ) # Use default=str as fallback + # Use default=str as a fallback for non-serializable types + f.write(json.dumps(step_data_dict, default=str) + "\n") logger.info(f"Saved structured run log to {log_protocol_path}") except Exception as log_protocol_e: - logger.warning(f"Could not save structured run log: {log_protocol_e}") + logger.warning( + f"Could not save structured run log: {log_protocol_e}", exc_info=True + ) diff --git a/omnimcp/core.py b/omnimcp/core.py index ac9cd15..4167296 100644 --- a/omnimcp/core.py +++ b/omnimcp/core.py @@ -1,23 +1,22 @@ # omnimcp/core.py -from typing import List, Tuple, Optional # Added Dict, Any + +from typing import List, Tuple, Optional import platform -# Import necessary types -from .types import ( +from omnimcp.types import ( UIElement, - ElementTrack, # Added - LLMActionPlan, # Still needed for temporary return value - LLMAnalysisAndDecision, # Added + ElementTrack, + ActionDecision, + LLMAnalysisAndDecision, ) -from .utils import ( +from omnimcp.utils import ( render_prompt, logger, ) -from .completions import call_llm_api -from .config import config # Import config if needed, e.g., for model name +from omnimcp.completions import call_llm_api +from omnimcp.config import config -# --- Updated Prompt Template --- PROMPT_TEMPLATE = """ You are an expert UI automation assistant. Your task is to analyze the current UI state, including changes from the previous step, and then decide the single best next action to achieve a given goal. @@ -85,7 +84,7 @@ "text_to_type": "", "key_info": "", "wait_duration_s": - # Add other parameters as needed + # Add other parameters as needed (e.g., scroll_direction, scroll_steps) }, "is_goal_complete": } @@ -103,7 +102,7 @@ * If a required element is missing (use Tracked Elements Context), choose an appropriate action like 'wait' or 'press_key' if a keyboard alternative exists, or explain the issue in `screen_analysis.reasoning` and potentially choose 'finish' with `is_goal_complete: false` if stuck. Do NOT hallucinate `target_element_id` for missing elements. """ -# --- Updated Planner Function --- +# --- Planner Function --- def plan_action_for_ui( @@ -111,10 +110,8 @@ def plan_action_for_ui( user_goal: str, action_history: List[str] | None = None, step: int = 0, - tracking_info: Optional[List[ElementTrack]] = None, # Accept list of ElementTrack -) -> Tuple[ - LLMActionPlan, Optional[UIElement] -]: # Still return LLMActionPlan temporarily + tracking_info: Optional[List[ElementTrack]] = None, +) -> Tuple[ActionDecision, Optional[UIElement]]: # Updated return type """ Uses an LLM to analyze UI state with tracking and plan the next action. @@ -126,7 +123,7 @@ def plan_action_for_ui( tracking_info: List of ElementTrack objects from the tracker. Returns: - A tuple containing an LLMActionPlan (converted from ActionDecision) + A tuple containing the ActionDecision object from the LLM and the targeted UIElement (if any) found in the current frame. """ action_history = action_history or [] @@ -135,7 +132,7 @@ def plan_action_for_ui( f"History: {len(action_history)} steps. Tracking: {len(tracking_info or [])} active tracks." ) - # Limit elements and tracks passed to the prompt for brevity + # Limit elements and tracks passed to the prompt for performance/context window MAX_ELEMENTS_IN_PROMPT = 50 MAX_TRACKS_IN_PROMPT = 50 elements_for_prompt = elements[:MAX_ELEMENTS_IN_PROMPT] @@ -143,16 +140,17 @@ def plan_action_for_ui( tracking_info[:MAX_TRACKS_IN_PROMPT] if tracking_info else None ) + # Render the prompt using the template and current context prompt = render_prompt( PROMPT_TEMPLATE, user_goal=user_goal, elements=elements_for_prompt, action_history=action_history, platform=platform.system(), - tracking_info=tracking_info_for_prompt, # Pass tracking info + tracking_info=tracking_info_for_prompt, # Include tracking info ) - # System prompt reinforcing the JSON structure + # Define the system prompt guiding the LLM's output format system_prompt = ( "You are an AI assistant. Respond ONLY with a single valid JSON object " "containing the keys 'screen_analysis' and 'action_decision', conforming " @@ -162,86 +160,47 @@ def plan_action_for_ui( messages = [{"role": "user", "content": prompt}] try: - # Call LLM expecting the combined analysis and decision structure - llm_output = call_llm_api( + # Call the LLM API expecting the combined analysis and decision structure + llm_output: LLMAnalysisAndDecision = call_llm_api( messages, - LLMAnalysisAndDecision, # Expect the combined model + LLMAnalysisAndDecision, # Expect the combined model for validation system_prompt=system_prompt, - model=config.ANTHROPIC_DEFAULT_MODEL, # Use configured model - ) - # Log the structured analysis and decision for debugging - logger.debug( - f"LLM Screen Analysis: {llm_output.screen_analysis.model_dump_json(indent=2)}" - ) - logger.debug( - f"LLM Action Decision: {llm_output.action_decision.model_dump_json(indent=2)}" + model=config.ANTHROPIC_DEFAULT_MODEL, # Use model from config ) + # Log the structured analysis and decision for debugging purposes + # Use model_dump_json for pretty printing if desired, or just log the object + logger.debug(f"LLM Screen Analysis Received: {llm_output.screen_analysis}") + logger.debug(f"LLM Action Decision Received: {llm_output.action_decision}") except (ValueError, Exception) as e: logger.error( f"Failed to get valid analysis/decision from LLM: {e}", exc_info=True ) - # Fallback or re-raise? Re-raise for now to halt execution on planning failure. + # Propagate the error to halt execution on planning failure raise - # --- Temporary Conversion back to LLMActionPlan --- - # This allows AgentExecutor handlers to work without immediate refactoring. - # TODO: Refactor AgentExecutor later to consume ActionDecision directly. - analysis = llm_output.screen_analysis + # Extract the decision part to be returned decision = llm_output.action_decision - # Combine reasoning (can be refined) - combined_reasoning = f"Analysis: {analysis.reasoning}\nDecision Justification: {decision.analysis_reasoning}" - - # Extract parameters for LLMActionPlan - # Ensure parameters is not None before accessing .get() - parameters = decision.parameters or {} - text_param = parameters.get("text_to_type") - key_param = parameters.get("key_info") - # Add handling for 'wait' action type if needed by LLMActionPlan later - # wait_param = parameters.get("wait_duration_s") - - # Handle potential new action types like 'wait' or 'finish' if LLMActionPlan - # doesn't support them directly yet. For now, map 'finish'/'wait' to a state? - # Let's assume LLMActionPlan.action can hold the new types for now. - action_type = decision.action_type - - converted_plan = LLMActionPlan( - reasoning=combined_reasoning, - action=action_type, # Pass action_type directly - element_id=decision.target_element_id, # Pass the current frame ID - text_to_type=text_param, - key_info=key_param, - is_goal_complete=decision.is_goal_complete, - ) - # Validate the converted plan (optional, but good practice) - try: - # Re-validate the object created from ActionDecision fields - # This ensures the LLM followed rules that map to LLMActionPlan - # Note: This validation might fail if action_type is 'wait' or 'finish' - # We might need to adjust LLMActionPlan or skip validation for new types. - # For now, let's try validating. - LLMActionPlan.model_validate(converted_plan.model_dump()) - except Exception as validation_err: - logger.warning( - f"Converted LLMActionPlan failed validation (potentially due to new action types like '{action_type}'): {validation_err}" - ) - # Don't raise, just warn for now, as the ActionDecision was likely valid. - - # Find the target UIElement based on the element_id from the decision + # Find the target UIElement in the current frame based on the ID from the decision target_ui_element = None - if converted_plan.element_id is not None: + if decision.target_element_id is not None: + # Search through the raw elements detected in *this* frame target_ui_element = next( - (el for el in elements if el.id == converted_plan.element_id), None + (el for el in elements if el.id == decision.target_element_id), None ) if target_ui_element is None: logger.warning( - f"LLM targeted element ID {converted_plan.element_id}, but it was not found in the current raw elements." + f"LLM targeted element ID {decision.target_element_id} in action decision, " + f"but it was not found in the current raw elements list ({len(elements)} elements)." ) - # Keep element_id in plan, but target_ui_element remains None + # The target_ui_element remains None, AgentExecutor action handlers must check for this logger.info( - f"Planner returning action: {converted_plan.action}, Target Elem ID: {converted_plan.element_id}, Goal Complete: {converted_plan.is_goal_complete}" + f"Planner returning action_type: {decision.action_type}, " + f"Target Elem ID: {decision.target_element_id}, " + f"Goal Complete: {decision.is_goal_complete}" ) - return converted_plan, target_ui_element # Return converted plan and element + # Return the validated ActionDecision object and the resolved target element + return decision, target_ui_element diff --git a/omnimcp/types.py b/omnimcp/types.py index f7bc664..892a434 100644 --- a/omnimcp/types.py +++ b/omnimcp/types.py @@ -6,6 +6,7 @@ from loguru import logger from pydantic import BaseModel, Field, field_validator, ValidationInfo +from pydantic_prompt import prompt_schema # Define Bounds (assuming normalized coordinates 0.0-1.0) Bounds = Tuple[float, float, float, float] # (x, y, width, height) @@ -154,7 +155,7 @@ class LLMActionPlan(BaseModel): reasoning: str = Field( ..., description="Step-by-step thinking process leading to the chosen action." ) - action: Literal["click", "type", "scroll", "press_key"] = Field( + action: Literal["click", "type", "scroll", "press_key", "wait"] = Field( ..., description="The single next action to perform." ) is_goal_complete: bool = Field( @@ -264,50 +265,50 @@ def short_repr(self) -> str: return f"TrackID {self.track_id} (Type Unknown) - Status: {status}, LastSeen: f{self.last_seen_frame}" +@prompt_schema class ScreenAnalysis(BaseModel): """LLM's analysis of the current UI state with tracking information.""" - reasoning: str = Field( - description="Detailed reasoning about the UI state, changes, and tracked elements relevant to the goal." - ) - disappeared_elements: List[str] = Field( - default_factory=list, - description="List of track_ids considered permanently gone.", - ) - temporarily_missing_elements: List[str] = Field( - default_factory=list, - description="List of track_ids considered temporarily missing but likely to reappear.", - ) - new_elements: List[str] = Field( - default_factory=list, - description="List of track_ids for newly appeared elements.", - ) - critical_elements_status: Dict[str, str] = Field( - default_factory=dict, - description="Status (e.g., 'Visible', 'Missing', 'Gone') of track_ids deemed critical for the current goal/step.", - ) + reasoning: str + """Detailed reasoning about the UI state, changes from the previous state using tracking context, and assessment relevant to the goal.""" + + disappeared_elements: List[str] = Field(default_factory=list) + """List of track_ids considered permanently gone since the last visible frame.""" + temporarily_missing_elements: List[str] = Field(default_factory=list) + """List of track_ids considered temporarily missing (e.g., due to UI transition) but likely to reappear.""" + new_elements: List[str] = Field(default_factory=list) + """List of track_ids for newly appeared elements this frame.""" + + critical_elements_status: Dict[str, str] = Field(default_factory=dict) + """Dictionary mapping track_ids of elements deemed critical for the current goal/step to their status (e.g., 'Visible', 'Missing', 'Gone').""" + + +@prompt_schema class ActionDecision(BaseModel): """LLM's decision on the next action based on its analysis.""" - analysis_reasoning: str = Field( - description="Reference or summary of the reasoning from ScreenAnalysis leading to this action." - ) - action_type: str = Field( - description="The type of action to perform (e.g., 'click', 'type', 'press_key', 'wait', 'finish')." - ) - target_element_id: Optional[int] = Field( - None, - description="The CURRENT per-frame 'id' of the target UIElement, if applicable and visible.", - ) - parameters: Dict[str, Any] = Field( - default_factory=dict, - description="Action parameters, e.g., {'text_to_type': 'hello', 'key_info': 'Enter'}", - ) - is_goal_complete: bool = Field( - False, description="Set to true if the overall user goal is now complete." - ) + analysis_reasoning: str + """Brief summary connecting the screen analysis to the chosen action.""" + + action_type: Literal["click", "type", "scroll", "press_key", "wait", "finish"] + """The type of action to perform.""" + + target_element_id: Optional[int] = Field(default=None) + """The CURRENT per-frame 'id' of the target UIElement, IF the action applies to a specific visible element (e.g., 'click', 'type'). Must be null otherwise.""" + + parameters: Dict[str, Any] = Field(default_factory=dict) + """Action-specific parameters. Examples: + - For 'type': {'text_to_type': 'string'} + - For 'press_key': {'key_info': 'key_string'} + - For 'wait': {'wait_duration_s': float} + - For 'scroll': {'scroll_direction': 'up'/'down'/'left'/'right', 'scroll_steps': int} + - For 'click': {'click_type': 'single'/'double'} (Optional) + """ + + is_goal_complete: bool = Field(default=False) + """Set to true if the overall user goal is fully achieved after this action decision.""" # --- Model for Structured Step Logging --- @@ -353,12 +354,12 @@ class LoggedStep(BaseModel): step_time_s: float +@prompt_schema class LLMAnalysisAndDecision(BaseModel): """Defines the full structured output expected from the LLM, combining analysis and decision.""" - screen_analysis: ScreenAnalysis = Field( - description="The LLM's analysis of the current screen state and element tracks." - ) - action_decision: ActionDecision = Field( - description="The LLM's decision on the next action based on the analysis." - ) + screen_analysis: ScreenAnalysis + """The LLM's analysis of the current screen state and element tracks.""" + + action_decision: ActionDecision + """The LLM's decision on the next action based on the analysis.""" diff --git a/omnimcp/utils.py b/omnimcp/utils.py index 98b15dc..c65f90a 100644 --- a/omnimcp/utils.py +++ b/omnimcp/utils.py @@ -6,6 +6,7 @@ from io import BytesIO from typing import Any, Callable, List, Tuple, Union, Optional import base64 +import os import sys import threading import time @@ -636,3 +637,83 @@ def downsample_image(image: Image.Image, factor: float) -> Image.Image: except Exception as resize_err: logger.warning(f"Failed to downsample image, returning original: {resize_err}") return image # Fallback to original on error + + +def setup_run_logging(run_dir=None): + """ + Configure stderr and optional file logging for a specific run. + + Removes default handlers and sets up new ones based on config and run_dir. + + Args: + run_dir: Directory to store run-specific logs. If None, only stderr logging + at the configured level is guaranteed (unless default file logging is enabled). + + Returns: + The log file path if file logging was configured, otherwise None. + """ + from omnimcp.config import config + + # Remove default loguru handler to avoid duplicate messages if added by default + try: + logger.remove(0) # Attempt to remove the default handler ID 0 + except ValueError: + logger.warning( + "Could not remove default logger handler (ID 0). May already be removed." + ) + pass # Ignore if it was already removed or never added + + # Configure stderr logging based on config + stderr_level = config.LOG_LEVEL.upper() if config.LOG_LEVEL else "INFO" + logger.add(sys.stderr, level=stderr_level) + logger.debug(f"Configured stderr logging level: {stderr_level}") + + # Configure file logging if run_dir is provided + log_file_path = None + if run_dir: + try: + os.makedirs(run_dir, exist_ok=True) + log_file_path = os.path.join(run_dir, "run.log") + # Add run-specific log handler (level DEBUG for file) + logger.add( + log_file_path, + rotation="50 MB", + level="DEBUG", + encoding="utf8", + enqueue=True, + ) + logger.info( + f"Configured run-specific file logging. Log path: {log_file_path}" + ) + except Exception as e: + logger.error( + f"Failed to configure file logging for run_dir '{run_dir}': {e}" + ) + log_file_path = None # Ensure path is None on failure + elif not config.DISABLE_DEFAULT_LOGGING and config.LOG_DIR: + # Fallback to default log directory if run_dir is None and default logging not disabled + try: + default_log_dir = config.LOG_DIR + os.makedirs(default_log_dir, exist_ok=True) + log_file_path = os.path.join( + default_log_dir, "omnimcp_{time:YYYY-MM-DD}.log" + ) # Daily default log + logger.add( + log_file_path, + rotation="1 day", + level="DEBUG", + encoding="utf8", + enqueue=True, + ) + logger.info( + f"Configured default file logging. Log path pattern: {log_file_path}" + ) + except Exception as e: + logger.error( + f"Failed to configure default file logging in '{config.LOG_DIR}': {e}" + ) + log_file_path = None + else: + logger.info("File logging disabled or no directory specified.") + + return log_file_path # Return path or None diff --git a/pyproject.toml b/pyproject.toml index 9dd22b5..06ca370 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -10,7 +10,7 @@ version = "0.1.0" description = "OmniMCP - OmniParser with Model Control Protocol for UI Automation" readme = "README.md" # Allow <3.13 as upper bound seems reasonable unless specific 3.12+ features are needed -requires-python = ">=3.10,<3.13" +requires-python = ">=3.11,<3.13" # Use SPDX identifier string for license license = "MIT" authors = [ @@ -37,6 +37,7 @@ dependencies = [ # Add platform-specific dependency for macOS "pyobjc-framework-Cocoa; sys_platform == 'darwin'", "scipy>=1.15.2", + "pydantic-prompt>=0.1.0", ] [project.scripts] diff --git a/tests/test_agent_executor.py b/tests/test_agent_executor.py index 448ab2a..8294aca 100644 --- a/tests/test_agent_executor.py +++ b/tests/test_agent_executor.py @@ -1,13 +1,17 @@ # tests/test_agent_executor.py import pytest -import os # Import added -from unittest.mock import MagicMock # Added patch for later if needed +import os # Import os +from unittest.mock import MagicMock from PIL import Image -from typing import List, Optional, Tuple # Added Callable +from typing import List, Optional, Tuple # Necessary type imports -from omnimcp.types import UIElement, ElementTrack, LLMActionPlan +from omnimcp.types import ( + UIElement, + ElementTrack, + ActionDecision, +) # Imports from the module under test from omnimcp.agent_executor import AgentExecutor, PlannerCallable @@ -23,7 +27,7 @@ class MockExecution: def __init__(self): self.calls = [] - self.fail_on_action: Optional[str] = None # For testing failures + self.fail_on_action: Optional[str] = None def click(self, x: int, y: int, click_type: str = "single") -> bool: self.calls.append(("click", x, y, click_type)) @@ -50,14 +54,13 @@ def scroll(self, dx: int, dy: int) -> bool: class MockPerception: """Mocks the PerceptionInterface for testing AgentExecutor.""" - # Required attributes matching PerceptionInterface elements: List[UIElement] tracked_elements_view: List[ElementTrack] screen_dimensions: Optional[Tuple[int, int]] _last_screenshot: Optional[Image.Image] frame_counter: int - update_call_count: int # Renamed from update_calls - fail_on_update: bool = False # For testing failures + update_call_count: int # Correct attribute name + fail_on_update: bool = False def __init__( self, @@ -69,12 +72,11 @@ def __init__( elements_to_return if elements_to_return is not None else [] ) self.screen_dimensions = dims - # Initialize state variables self.elements = [] self.tracked_elements_view = [] self.frame_counter = 0 self._last_screenshot = Image.new("RGB", dims) if dims else None - self.update_call_count = 0 # Use the correct name + self.update_call_count = 0 # Initialize correctly self.fail_on_update = False logger.debug("MockPerception initialized.") @@ -83,18 +85,12 @@ def update(self) -> None: self.update_call_count += 1 # Increment correct counter self.frame_counter += 1 - # Simulate failure if configured - if ( - self.fail_on_update and self.update_call_count > 1 - ): # Fail on second call typically + if self.fail_on_update and self.update_call_count > 1: logger.error("MockPerception: Simulating perception failure.") raise RuntimeError("Simulated perception failure") - # Set the elements that the mock should "perceive" self.elements = self.elements_to_return - # Simulate tracker update (returns empty list as mock doesn't track) - self.tracked_elements_view = [] - # Simulate screenshot/dims update based on init values + self.tracked_elements_view = [] # Mock returns empty tracking view if self.screen_dimensions: self._last_screenshot = Image.new("RGB", self.screen_dimensions) else: @@ -107,8 +103,7 @@ def update(self) -> None: # --- Fixtures --- @pytest.fixture def mock_perception_component() -> MockPerception: - """Provides a default MockPerception instance.""" - # Provide a default element for tests that expect one + """Provides a default MockPerception instance with one element.""" return MockPerception( elements_to_return=[ UIElement( @@ -132,35 +127,30 @@ def mock_execution_component() -> MockExecution: @pytest.fixture def mock_element() -> UIElement: """Provides a sample UIElement for tests.""" - return UIElement( - id=0, - type="button", - content="OK", - bounds=(0.1, 0.1, 0.2, 0.1), # w=0.2, h=0.1 - ) + return UIElement(id=0, type="button", content="OK", bounds=(0.1, 0.1, 0.2, 0.1)) @pytest.fixture def temp_output_dir(tmp_path) -> str: """Creates a temporary directory for test run outputs.""" run_dir = tmp_path / "test_runs" - run_dir.mkdir(exist_ok=True) # Use exist_ok=True + run_dir.mkdir(exist_ok=True) return str(run_dir) @pytest.fixture def mock_box_drawer() -> MagicMock: """Provides a mock for the draw_bounding_boxes utility.""" - return MagicMock(return_value=Image.new("RGB", (10, 10))) # Return dummy image + return MagicMock(return_value=Image.new("RGB", (10, 10))) @pytest.fixture def mock_highlighter() -> MagicMock: """Provides a mock for the draw_action_highlight utility.""" - return MagicMock(return_value=Image.new("RGB", (10, 10))) # Return dummy image + return MagicMock(return_value=Image.new("RGB", (10, 10))) -# --- Mock Planners --- +# --- Mock Planners (Updated to return ActionDecision) --- def planner_completes_on_step(n: int) -> PlannerCallable: @@ -171,27 +161,27 @@ def mock_planner( user_goal: str, action_history: List[str], step: int, - tracking_info: Optional[List[ElementTrack]] = None, # Accept tracking_info - ) -> Tuple[LLMActionPlan, Optional[UIElement]]: + tracking_info: Optional[List[ElementTrack]] = None, + ) -> Tuple[ActionDecision, Optional[UIElement]]: # Return ActionDecision target_element = elements[0] if elements else None - # Goal completes when current step index is n-1 - is_complete = step == (n - 1) - # Example: Click first, then signal completion with a different action - action = "click" if not is_complete else "press_key" - element_id = target_element.id if target_element and action == "click" else None - key_info = "Enter" if is_complete else None # Example final action - - plan = LLMActionPlan( - reasoning=f"Mock reasoning step {step + 1} for goal '{user_goal}'", - action=action, - element_id=element_id, - key_info=key_info, + is_complete = step == (n - 1) # Complete on index n-1 + action_type = "click" if not is_complete else "finish" # Use 'finish' action + target_element_id = ( + target_element.id if target_element and action_type == "click" else None + ) + params = {} + + decision = ActionDecision( + analysis_reasoning=f"Mock analysis step {step + 1}. Complete={is_complete}", + action_type=action_type, + target_element_id=target_element_id, + parameters=params, is_goal_complete=is_complete, ) logger.debug( - f"Mock Planner (complete on {n}): Step {step}, Complete: {is_complete}, Action: {action}" + f"Mock Planner (complete on {n}): Step {step}, Returning ActionDecision: {decision.action_type}" ) - return plan, target_element + return decision, target_element return mock_planner @@ -204,20 +194,22 @@ def mock_planner( user_goal: str, action_history: List[str], step: int, - tracking_info: Optional[List[ElementTrack]] = None, # Accept tracking_info - ) -> Tuple[LLMActionPlan, Optional[UIElement]]: + tracking_info: Optional[List[ElementTrack]] = None, + ) -> Tuple[ActionDecision, Optional[UIElement]]: # Return ActionDecision target_element = elements[0] if elements else None - element_id = target_element.id if target_element else None - plan = LLMActionPlan( - reasoning=f"Mock reasoning step {step + 1}, goal not complete", - action="click", # Always clicks the first element if present - element_id=element_id, - text_to_type=None, - key_info=None, + target_element_id = target_element.id if target_element else None + + decision = ActionDecision( + analysis_reasoning=f"Mock analysis step {step + 1}, goal not complete.", + action_type="click", # Always plans click + target_element_id=target_element_id, + parameters={}, is_goal_complete=False, ) - logger.debug(f"Mock Planner (never complete): Step {step}, Action: click") - return plan, target_element + logger.debug( + f"Mock Planner (never complete): Step {step}, Returning ActionDecision: {decision.action_type}" + ) + return decision, target_element return mock_planner @@ -225,15 +217,14 @@ def mock_planner( def planner_fails() -> PlannerCallable: """Factory for a planner that raises an exception.""" - # Use *args, **kwargs to accept any arguments including tracking_info - def failing_planner(*args, **kwargs): + def failing_planner(*args, **kwargs): # Accept any args logger.error("Mock Planner: Simulating planning failure.") raise ValueError("Mock planning failure") - return failing_planner + return failing_planner # type: ignore -# --- Test Functions --- +# --- Test Functions (Updated Assertions) --- def test_agent_executor_init(mock_perception_component, mock_execution_component): @@ -259,16 +250,16 @@ def test_run_completes_goal( temp_output_dir: str, mocker, ): - """Test a successful run where the goal is completed on the second step (index 1).""" + """Test a successful run completing on the second step (index 1).""" mock_final_image = Image.new("RGB", (50, 50), color="green") mocker.patch.object( agent_executor, "take_screenshot", return_value=mock_final_image ) - complete_step_n = 2 # Complete ON step 2 (index 1) + complete_step_n = 2 # Completes ON step 2 (index 1) executor = AgentExecutor( perception=mock_perception_component, - planner=planner_completes_on_step(complete_step_n), # Pass N + planner=planner_completes_on_step(complete_step_n), execution=mock_execution_component, box_drawer=mock_box_drawer, highlighter=mock_highlighter, @@ -279,12 +270,12 @@ def test_run_completes_goal( ) assert result is True, "Should return True when goal is completed." - # Perception called for step 0 and step 1 (n=2 steps total) + # Perception called for step 0 and step 1 (Total: 2) assert mock_perception_component.update_call_count == complete_step_n - # Execution called only for step 0 (before completion) + # Execution called only for step 0 (Click before completion on step 1) assert len(mock_execution_component.calls) == complete_step_n - 1 assert mock_execution_component.calls[0][0] == "click" # Action in step 0 - # History includes step 0's planned action, step 1's planned action + # History includes plan for step 0 and step 1 (Total: 2) assert len(executor.action_history) == complete_step_n # Check output files @@ -296,8 +287,11 @@ def test_run_completes_goal( assert os.path.exists(os.path.join(run_dir_path, "final_state.png")) assert os.path.exists(os.path.join(run_dir_path, "run_metrics.json")) assert os.path.exists(os.path.join(run_dir_path, "run_log.jsonl")) + # Visualizers called for each step before potential break assert mock_box_drawer.call_count == complete_step_n - assert mock_highlighter.call_count == complete_step_n + assert ( + mock_highlighter.call_count == 0 + ) # Highlighter call is currently commented out def test_run_reaches_max_steps( @@ -328,15 +322,13 @@ def test_run_reaches_max_steps( ) assert result is False, "Should return False when max steps reached." - # Perception called for each step assert mock_perception_component.update_call_count == max_steps - # Execution called for each step assert len(mock_execution_component.calls) == max_steps assert len(executor.action_history) == max_steps - # Visualizers called for each step assert mock_box_drawer.call_count == max_steps - assert mock_highlighter.call_count == max_steps - # Check final state image existence + assert ( + mock_highlighter.call_count == 0 + ) # Highlighter call is currently commented out run_dirs = os.listdir(temp_output_dir) assert len(run_dirs) == 1 run_dir_path = os.path.join(temp_output_dir, run_dirs[0]) @@ -349,17 +341,16 @@ def test_run_perception_failure( temp_output_dir: str, mocker, ): - """Test that the loop stops if perception fails (e.g., on the second step).""" + """Test that the loop stops if perception fails on the second step.""" mock_final_image = Image.new("RGB", (50, 50), color="red") mocker.patch.object( agent_executor, "take_screenshot", return_value=mock_final_image ) - # Configure mock to fail on the second update call - mock_perception_component.fail_on_update = True + mock_perception_component.fail_on_update = True # Configure mock to fail executor = AgentExecutor( perception=mock_perception_component, - planner=planner_never_completes(), # Planner that would normally continue + planner=planner_never_completes(), execution=mock_execution_component, ) @@ -367,13 +358,12 @@ def test_run_perception_failure( goal="Test Perception Fail", max_steps=5, output_base_dir=temp_output_dir ) - assert result is False # Run fails + assert result is False # Update called twice: first succeeds, second raises exception assert mock_perception_component.update_call_count == 2 # Execution only happens for the first step (step 0) assert len(mock_execution_component.calls) == 1 - assert len(executor.action_history) == 1 # Only history for step 1 planned - # Check final state image existence (should still be saved) + assert len(executor.action_history) == 1 run_dirs = os.listdir(temp_output_dir) assert len(run_dirs) == 1 run_dir_path = os.path.join(temp_output_dir, run_dirs[0]) @@ -402,14 +392,10 @@ def test_run_planning_failure( goal="Test Planning Fail", max_steps=5, output_base_dir=temp_output_dir ) - assert result is False # Run fails - # Perception called once before planning fails + assert result is False assert mock_perception_component.update_call_count == 1 - # Execution never reached assert len(mock_execution_component.calls) == 0 - # Action history not updated as planning fails before history update assert len(executor.action_history) == 0 - # Check final state image existence run_dirs = os.listdir(temp_output_dir) assert len(run_dirs) == 1 run_dir_path = os.path.join(temp_output_dir, run_dirs[0]) @@ -428,7 +414,6 @@ def test_run_execution_failure( agent_executor, "take_screenshot", return_value=mock_final_image ) - # Configure execution mock to fail on 'click' mock_execution_component.fail_on_action = "click" executor = AgentExecutor( perception=mock_perception_component, @@ -440,15 +425,11 @@ def test_run_execution_failure( goal="Test Execution Fail", max_steps=5, output_base_dir=temp_output_dir ) - assert result is False # Run fails - # Perception called once for the first step + assert result is False assert mock_perception_component.update_call_count == 1 - # Execution was attempted once (the click that failed) - assert len(mock_execution_component.calls) == 1 - # History includes the planned action before execution failed + assert len(mock_execution_component.calls) == 1 # Execution was attempted assert len(executor.action_history) == 1 assert executor.action_history[0].startswith("Step 1: Planned click") - # Check final state image existence run_dirs = os.listdir(temp_output_dir) assert len(run_dirs) == 1 run_dir_path = os.path.join(temp_output_dir, run_dirs[0]) @@ -473,37 +454,38 @@ def test_coordinate_scaling_for_click( agent_executor, "get_scaling_factor", return_value=scaling_factor ) - # Use MagicMock directly for the planner in this test + # Configure MagicMock planner to return ActionDecision tuple + mock_decision_for_click = ActionDecision( + analysis_reasoning="Click test", + action_type="click", + target_element_id=mock_element.id, + parameters={}, + is_goal_complete=False, + ) planner_click = MagicMock( return_value=( - LLMActionPlan( - reasoning="Click test", - action="click", - element_id=mock_element.id, - is_goal_complete=False, - ), - mock_element, # Return the mock element as the target - ) + mock_decision_for_click, + mock_element, + ) # Return ActionDecision tuple ) executor = AgentExecutor( perception=mock_perception_component, - planner=planner_click, # Use MagicMock planner + planner=planner_click, execution=mock_execution_component, ) executor.run(goal="Test Scaling", max_steps=1, output_base_dir=temp_output_dir) - # Verify planner was called correctly (including the new tracking_info arg) + # Verify planner call arguments planner_click.assert_called_once() call_args, call_kwargs = planner_click.call_args - assert call_kwargs["tracking_info"] == [] # MockPerception returns empty list + assert call_kwargs.get("tracking_info") == [] # Check tracking info passed # Verify execution call - # MockPerception dims: W=200, H=100 - # MockElement bounds: x=0.1, y=0.1, w=0.2, h=0.1 - # Center physical x = (0.1 + 0.2 / 2) * 200 = 0.2 * 200 = 40 - # Center physical y = (0.1 + 0.1 / 2) * 100 = 0.15 * 100 = 15 + # MockPerception dims: W=200, H=100 ; MockElement bounds: x=0.1,y=0.1,w=0.2,h=0.1 + # Center physical x = (0.1 + 0.2 / 2) * 200 = 40 + # Center physical y = (0.1 + 0.1 / 2) * 100 = 15 expected_logical_x = int(40 / scaling_factor) expected_logical_y = int(15 / scaling_factor) @@ -517,7 +499,7 @@ def test_coordinate_scaling_for_click( "single", ), f"Click coordinates incorrect for scaling factor {scaling_factor}" - # Check final state image existence + # Check output files run_dirs = os.listdir(temp_output_dir) assert len(run_dirs) == 1, "Expected one run directory" run_dir_path = os.path.join(temp_output_dir, run_dirs[0]) diff --git a/tests/test_core.py b/tests/test_core.py index 54833ae..f2eca4a 100644 --- a/tests/test_core.py +++ b/tests/test_core.py @@ -5,11 +5,10 @@ from omnimcp.types import ( UIElement, Bounds, - LLMActionPlan, ElementTrack, ScreenAnalysis, ActionDecision, - LLMAnalysisAndDecision, # Added new types + LLMAnalysisAndDecision, ) # Import the function to test @@ -17,19 +16,16 @@ # Assuming pytest-mock (mocker fixture) is available -# --- Fixture for Sample Elements --- - +# --- Fixture for Sample Elements (Keep as is) --- @pytest.fixture def sample_elements() -> list[UIElement]: - """Provides a sample list of UIElements similar to the login screen.""" - # Use slightly more distinct bounds for testing + # ... (fixture definition as provided previously) ... bounds_tf1: Bounds = (0.1, 0.1, 0.8, 0.05) - bounds_tf2: Bounds = (0.1, 0.2, 0.8, 0.05) # Below first field + bounds_tf2: Bounds = (0.1, 0.2, 0.8, 0.05) bounds_cb: Bounds = (0.1, 0.3, 0.3, 0.05) bounds_link: Bounds = (0.5, 0.3, 0.4, 0.05) - bounds_btn: Bounds = (0.4, 0.4, 0.2, 0.08) # Centered below - + bounds_btn: Bounds = (0.4, 0.4, 0.2, 0.08) return [ UIElement( id=0, @@ -76,24 +72,20 @@ def test_plan_action_step1_type_user(mocker, sample_elements): user_goal = "Log in as testuser with password pass" action_history = [] step = 0 - tracking_info = None # No tracking info on first step + tracking_info = None - # Mock the LLM API call within the core module mock_llm_api = mocker.patch("omnimcp.core.call_llm_api") - # --- Setup Mock Response --- - # Configure the mock to return the NEW structure (LLMAnalysisAndDecision) + # Setup Mock Response (returning LLMAnalysisAndDecision) mock_analysis = ScreenAnalysis( - reasoning="Goal is log in. History empty. Need username in field ID 0.", - new_elements=[ - f"track_{i}" for i in range(len(sample_elements)) - ], # Assume all new + reasoning="Need username", + new_elements=[f"track_{i}" for i in range(len(sample_elements))], critical_elements_status={"track_0": "Visible"}, ) mock_decision = ActionDecision( - analysis_reasoning="Typing username into field ID 0.", + analysis_reasoning="Typing username", action_type="type", - target_element_id=0, # Target the first text field (ID 0) + target_element_id=0, parameters={"text_to_type": "testuser"}, is_goal_complete=False, ) @@ -101,11 +93,9 @@ def test_plan_action_step1_type_user(mocker, sample_elements): screen_analysis=mock_analysis, action_decision=mock_decision ) mock_llm_api.return_value = mock_combined_output - # --- End Mock Response Setup --- - # Call the function under test - # plan_action_for_ui internally converts result back to LLMActionPlan for now - llm_plan_result, target_element_result = plan_action_for_ui( + # Call the function under test - now returns ActionDecision + action_decision_result, target_element_result = plan_action_for_ui( elements=sample_elements, user_goal=user_goal, action_history=action_history, @@ -113,106 +103,94 @@ def test_plan_action_step1_type_user(mocker, sample_elements): tracking_info=tracking_info, ) - # --- Assertions --- + # Assertions mock_llm_api.assert_called_once() call_args, call_kwargs = mock_llm_api.call_args - # Check arguments passed to call_llm_api - messages = call_args[0] - response_model_passed = call_args[1] + assert call_args[1] is LLMAnalysisAndDecision # Check correct model expected + + prompt_text = call_args[0][0]["content"] assert ( - response_model_passed is LLMAnalysisAndDecision - ) # Check correct model was expected - - # Check basic prompt content - prompt_text = messages[0]["content"] - assert user_goal in prompt_text - assert sample_elements[0].to_prompt_repr() in prompt_text - assert "**Previous Actions Taken" in prompt_text - assert "**Tracked Elements Context" in prompt_text + "**Previous Actions Taken (up to last 5):**" in prompt_text + ) # Corrected assertion assert ( - "(No tracking info available or first frame)" in prompt_text - ) # Check tracking section rendering - - # Check returned values (should be converted LLMActionPlan) - assert isinstance(llm_plan_result, LLMActionPlan) - assert llm_plan_result.action == "type" - assert llm_plan_result.element_id == 0 - assert llm_plan_result.text_to_type == "testuser" - assert llm_plan_result.key_info is None - assert llm_plan_result.is_goal_complete is False - assert "Typing username into field ID 0" in llm_plan_result.reasoning - assert target_element_result is sample_elements[0] # Check correct element returned + "**Tracked Elements Context (Persistent View - Max 50):**" in prompt_text + ) # Corrected assertion + assert "(No tracking info available or first frame)" in prompt_text + + # --- START FIX: Assert against ActionDecision fields --- + assert isinstance(action_decision_result, ActionDecision) + assert action_decision_result.action_type == "type" + assert action_decision_result.target_element_id == 0 + assert action_decision_result.parameters.get("text_to_type") == "testuser" + assert action_decision_result.parameters.get("key_info") is None + assert action_decision_result.is_goal_complete is False + assert ( + action_decision_result.analysis_reasoning == "Typing username" + ) # Check reasoning part + # --- END FIX --- + assert target_element_result is sample_elements[0] def test_plan_action_step3_click_login(mocker, sample_elements): """Test planning the third step: clicking login and completing goal.""" user_goal = "Log in as testuser with password pass" - # Simulate state where fields are filled (by updating content) - sample_elements[0].content = "testuser" # Username field filled - sample_elements[1].content = "********" # Password field filled (masked) - action_history = [ - "Step 1: Planned type on ElemID 0 Text='testuser'", - "Step 2: Planned type on ElemID 1 Text='********'", - ] - step = 2 # 3rd step (0-indexed) - # Simulate tracking info (assume all elements are persistent and visible) + sample_elements[0].content = "testuser" + sample_elements[1].content = "********" + action_history = ["Step 1...", "Step 2..."] + step = 2 mock_tracking_info = [ ElementTrack(track_id=f"track_{el.id}", latest_element=el, last_seen_frame=step) for el in sample_elements ] - # Mock the LLM API call mock_llm_api = mocker.patch("omnimcp.core.call_llm_api") - # --- Setup Mock Response --- + # Setup Mock Response (returning LLMAnalysisAndDecision) mock_analysis_step3 = ScreenAnalysis( - reasoning="Username and password seem entered based on history. Login button (TrackID track_4) is visible. Ready to click.", + reasoning="Ready to click Login.", critical_elements_status={"track_4": "Visible"}, - # Assume no new/disappeared elements for simplicity in this mock ) mock_decision_step3 = ActionDecision( - analysis_reasoning="Clicking Login button to attempt login.", + analysis_reasoning="Clicking Login button.", action_type="click", - target_element_id=4, # Target the Login button (ID 4) + target_element_id=4, parameters={}, - is_goal_complete=True, # Assume LLM thinks goal completes after click + is_goal_complete=True, ) mock_combined_output_step3 = LLMAnalysisAndDecision( screen_analysis=mock_analysis_step3, action_decision=mock_decision_step3 ) mock_llm_api.return_value = mock_combined_output_step3 - # --- End Mock Response Setup --- # Call the function - llm_plan_result, target_element_result = plan_action_for_ui( + action_decision_result, target_element_result = plan_action_for_ui( elements=sample_elements, user_goal=user_goal, action_history=action_history, step=step, - tracking_info=mock_tracking_info, # Pass the mock tracking info + tracking_info=mock_tracking_info, ) - # --- Assertions --- + # Assertions mock_llm_api.assert_called_once() call_args, call_kwargs = mock_llm_api.call_args - messages = call_args[0] - response_model_passed = call_args[1] - assert response_model_passed is LLMAnalysisAndDecision + assert call_args[1] is LLMAnalysisAndDecision - # Check history and tracking rendering in prompt - prompt_text = messages[0]["content"] + prompt_text = call_args[0][0]["content"] assert action_history[0] in prompt_text assert action_history[1] in prompt_text - assert "**Tracked Elements Context" in prompt_text - assert "TrackID track_4" in prompt_text # Check a specific track mentioned - assert "Status: VISIBLE" in prompt_text # Check status rendering - - # Check results (converted LLMActionPlan) - assert isinstance(llm_plan_result, LLMActionPlan) - assert llm_plan_result.is_goal_complete is True - assert llm_plan_result.action == "click" - assert llm_plan_result.element_id == 4 - assert llm_plan_result.text_to_type is None - assert llm_plan_result.key_info is None - assert "Clicking Login button" in llm_plan_result.reasoning - assert target_element_result is sample_elements[4] # Check correct element returned + assert ( + "**Tracked Elements Context (Persistent View - Max 50):**" in prompt_text + ) # Corrected assertion + assert "TrackID track_4" in prompt_text + assert "Status: VISIBLE" in prompt_text + + # --- START FIX: Assert against ActionDecision fields --- + assert isinstance(action_decision_result, ActionDecision) + assert action_decision_result.is_goal_complete is True + assert action_decision_result.action_type == "click" + assert action_decision_result.target_element_id == 4 + assert action_decision_result.parameters == {} + assert action_decision_result.analysis_reasoning == "Clicking Login button." + # --- END FIX --- + assert target_element_result is sample_elements[4] diff --git a/uv.lock b/uv.lock index 7e084e8..013892d 100644 --- a/uv.lock +++ b/uv.lock @@ -1,5 +1,5 @@ version = 1 -requires-python = ">=3.10, <3.13" +requires-python = ">=3.11, <3.13" [[package]] name = "annotated-types" @@ -33,7 +33,6 @@ name = "anyio" version = "4.9.0" source = { registry = "https://pypi.org/simple" } dependencies = [ - { name = "exceptiongroup", marker = "python_full_version < '3.11'" }, { name = "idna" }, { name = "sniffio" }, { name = "typing-extensions" }, @@ -77,10 +76,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/40/f2/71b4ed65ce38982ecdda0ff20c3ad1b15e71949c78b2c053df53629ce940/bcrypt-4.3.0-cp39-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:79e70b8342a33b52b55d93b3a59223a844962bef479f6a0ea318ebbcadf71505", size = 363128 }, { url = "https://files.pythonhosted.org/packages/11/99/12f6a58eca6dea4be992d6c681b7ec9410a1d9f5cf368c61437e31daa879/bcrypt-4.3.0-cp39-abi3-win32.whl", hash = "sha256:b4d4e57f0a63fd0b358eb765063ff661328f69a04494427265950c71b992a39a", size = 160598 }, { url = "https://files.pythonhosted.org/packages/a9/cf/45fb5261ece3e6b9817d3d82b2f343a505fd58674a92577923bc500bd1aa/bcrypt-4.3.0-cp39-abi3-win_amd64.whl", hash = "sha256:e53e074b120f2877a35cc6c736b8eb161377caae8925c17688bd46ba56daaa5b", size = 152799 }, - { url = "https://files.pythonhosted.org/packages/55/2d/0c7e5ab0524bf1a443e34cdd3926ec6f5879889b2f3c32b2f5074e99ed53/bcrypt-4.3.0-pp310-pypy310_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:c950d682f0952bafcceaf709761da0a32a942272fad381081b51096ffa46cea1", size = 275367 }, - { url = "https://files.pythonhosted.org/packages/10/4f/f77509f08bdff8806ecc4dc472b6e187c946c730565a7470db772d25df70/bcrypt-4.3.0-pp310-pypy310_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:107d53b5c67e0bbc3f03ebf5b030e0403d24dda980f8e244795335ba7b4a027d", size = 280644 }, - { url = "https://files.pythonhosted.org/packages/35/18/7d9dc16a3a4d530d0a9b845160e9e5d8eb4f00483e05d44bb4116a1861da/bcrypt-4.3.0-pp310-pypy310_pp73-manylinux_2_34_aarch64.whl", hash = "sha256:b693dbb82b3c27a1604a3dff5bfc5418a7e6a781bb795288141e5f80cf3a3492", size = 274881 }, - { url = "https://files.pythonhosted.org/packages/df/c4/ae6921088adf1e37f2a3a6a688e72e7d9e45fdd3ae5e0bc931870c1ebbda/bcrypt-4.3.0-pp310-pypy310_pp73-manylinux_2_34_x86_64.whl", hash = "sha256:b6354d3760fcd31994a14c89659dee887f1351a06e5dac3c1142307172a79f90", size = 280203 }, { url = "https://files.pythonhosted.org/packages/4c/b1/1289e21d710496b88340369137cc4c5f6ee036401190ea116a7b4ae6d32a/bcrypt-4.3.0-pp311-pypy311_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:a839320bf27d474e52ef8cb16449bb2ce0ba03ca9f44daba6d93fa1d8828e48a", size = 275103 }, { url = "https://files.pythonhosted.org/packages/94/41/19be9fe17e4ffc5d10b7b67f10e459fc4eee6ffe9056a88de511920cfd8d/bcrypt-4.3.0-pp311-pypy311_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:bdc6a24e754a555d7316fa4774e64c6c3997d27ed2d1964d55920c7c227bc4ce", size = 280513 }, { url = "https://files.pythonhosted.org/packages/aa/73/05687a9ef89edebdd8ad7474c16d8af685eb4591c3c38300bb6aad4f0076/bcrypt-4.3.0-pp311-pypy311_pp73-manylinux_2_34_aarch64.whl", hash = "sha256:55a935b8e9a1d2def0626c4269db3fcd26728cbff1e84f0341465c31c4ee56d8", size = 274685 }, @@ -133,18 +128,6 @@ dependencies = [ ] sdist = { url = "https://files.pythonhosted.org/packages/fc/97/c783634659c2920c3fc70419e3af40972dbaf758daa229a7d6ea6135c90d/cffi-1.17.1.tar.gz", hash = "sha256:1c39c6016c32bc48dd54561950ebd6836e1670f2ae46128f67cf49e789c52824", size = 516621 } wheels = [ - { url = "https://files.pythonhosted.org/packages/90/07/f44ca684db4e4f08a3fdc6eeb9a0d15dc6883efc7b8c90357fdbf74e186c/cffi-1.17.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:df8b1c11f177bc2313ec4b2d46baec87a5f3e71fc8b45dab2ee7cae86d9aba14", size = 182191 }, - { url = "https://files.pythonhosted.org/packages/08/fd/cc2fedbd887223f9f5d170c96e57cbf655df9831a6546c1727ae13fa977a/cffi-1.17.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:8f2cdc858323644ab277e9bb925ad72ae0e67f69e804f4898c070998d50b1a67", size = 178592 }, - { url = "https://files.pythonhosted.org/packages/de/cc/4635c320081c78d6ffc2cab0a76025b691a91204f4aa317d568ff9280a2d/cffi-1.17.1-cp310-cp310-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:edae79245293e15384b51f88b00613ba9f7198016a5948b5dddf4917d4d26382", size = 426024 }, - { url = "https://files.pythonhosted.org/packages/b6/7b/3b2b250f3aab91abe5f8a51ada1b717935fdaec53f790ad4100fe2ec64d1/cffi-1.17.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:45398b671ac6d70e67da8e4224a065cec6a93541bb7aebe1b198a61b58c7b702", size = 448188 }, - { url = "https://files.pythonhosted.org/packages/d3/48/1b9283ebbf0ec065148d8de05d647a986c5f22586b18120020452fff8f5d/cffi-1.17.1-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ad9413ccdeda48c5afdae7e4fa2192157e991ff761e7ab8fdd8926f40b160cc3", size = 455571 }, - { url = "https://files.pythonhosted.org/packages/40/87/3b8452525437b40f39ca7ff70276679772ee7e8b394934ff60e63b7b090c/cffi-1.17.1-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:5da5719280082ac6bd9aa7becb3938dc9f9cbd57fac7d2871717b1feb0902ab6", size = 436687 }, - { url = "https://files.pythonhosted.org/packages/8d/fb/4da72871d177d63649ac449aec2e8a29efe0274035880c7af59101ca2232/cffi-1.17.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2bb1a08b8008b281856e5971307cc386a8e9c5b625ac297e853d36da6efe9c17", size = 446211 }, - { url = "https://files.pythonhosted.org/packages/ab/a0/62f00bcb411332106c02b663b26f3545a9ef136f80d5df746c05878f8c4b/cffi-1.17.1-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:045d61c734659cc045141be4bae381a41d89b741f795af1dd018bfb532fd0df8", size = 461325 }, - { url = "https://files.pythonhosted.org/packages/36/83/76127035ed2e7e27b0787604d99da630ac3123bfb02d8e80c633f218a11d/cffi-1.17.1-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:6883e737d7d9e4899a8a695e00ec36bd4e5e4f18fabe0aca0efe0a4b44cdb13e", size = 438784 }, - { url = "https://files.pythonhosted.org/packages/21/81/a6cd025db2f08ac88b901b745c163d884641909641f9b826e8cb87645942/cffi-1.17.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:6b8b4a92e1c65048ff98cfe1f735ef8f1ceb72e3d5f0c25fdb12087a23da22be", size = 461564 }, - { url = "https://files.pythonhosted.org/packages/f8/fe/4d41c2f200c4a457933dbd98d3cf4e911870877bd94d9656cc0fcb390681/cffi-1.17.1-cp310-cp310-win32.whl", hash = "sha256:c9c3d058ebabb74db66e431095118094d06abf53284d9c81f27300d0e0d8bc7c", size = 171804 }, - { url = "https://files.pythonhosted.org/packages/d1/b6/0b0f5ab93b0df4acc49cae758c81fe4e5ef26c3ae2e10cc69249dfd8b3ab/cffi-1.17.1-cp310-cp310-win_amd64.whl", hash = "sha256:0f048dcf80db46f0098ccac01132761580d28e28bc0f78ae0d58048063317e15", size = 181299 }, { url = "https://files.pythonhosted.org/packages/6b/f4/927e3a8899e52a27fa57a48607ff7dc91a9ebe97399b357b85a0c7892e00/cffi-1.17.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:a45e3c6913c5b87b3ff120dcdc03f6131fa0065027d0ed7ee6190736a74cd401", size = 182264 }, { url = "https://files.pythonhosted.org/packages/6c/f5/6c3a8efe5f503175aaddcbea6ad0d2c96dad6f5abb205750d1b3df44ef29/cffi-1.17.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:30c5e0cb5ae493c04c8b42916e52ca38079f1b235c2f8ae5f4527b963c401caf", size = 178651 }, { url = "https://files.pythonhosted.org/packages/94/dd/a3f0118e688d1b1a57553da23b16bdade96d2f9bcda4d32e7d2838047ff7/cffi-1.17.1-cp311-cp311-manylinux_2_12_i686.manylinux2010_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f75c7ab1f9e4aca5414ed4d8e5c0e303a34f4421f8a0d47a4d019ceff0ab6af4", size = 445259 }, @@ -176,19 +159,6 @@ version = "3.4.1" source = { registry = "https://pypi.org/simple" } sdist = { url = "https://files.pythonhosted.org/packages/16/b0/572805e227f01586461c80e0fd25d65a2115599cc9dad142fee4b747c357/charset_normalizer-3.4.1.tar.gz", hash = "sha256:44251f18cd68a75b56585dd00dae26183e102cd5e0f9f1466e6df5da2ed64ea3", size = 123188 } wheels = [ - { url = "https://files.pythonhosted.org/packages/0d/58/5580c1716040bc89206c77d8f74418caf82ce519aae06450393ca73475d1/charset_normalizer-3.4.1-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:91b36a978b5ae0ee86c394f5a54d6ef44db1de0815eb43de826d41d21e4af3de", size = 198013 }, - { url = "https://files.pythonhosted.org/packages/d0/11/00341177ae71c6f5159a08168bcb98c6e6d196d372c94511f9f6c9afe0c6/charset_normalizer-3.4.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7461baadb4dc00fd9e0acbe254e3d7d2112e7f92ced2adc96e54ef6501c5f176", size = 141285 }, - { url = "https://files.pythonhosted.org/packages/01/09/11d684ea5819e5a8f5100fb0b38cf8d02b514746607934134d31233e02c8/charset_normalizer-3.4.1-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:e218488cd232553829be0664c2292d3af2eeeb94b32bea483cf79ac6a694e037", size = 151449 }, - { url = "https://files.pythonhosted.org/packages/08/06/9f5a12939db324d905dc1f70591ae7d7898d030d7662f0d426e2286f68c9/charset_normalizer-3.4.1-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:80ed5e856eb7f30115aaf94e4a08114ccc8813e6ed1b5efa74f9f82e8509858f", size = 143892 }, - { url = "https://files.pythonhosted.org/packages/93/62/5e89cdfe04584cb7f4d36003ffa2936681b03ecc0754f8e969c2becb7e24/charset_normalizer-3.4.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b010a7a4fd316c3c484d482922d13044979e78d1861f0e0650423144c616a46a", size = 146123 }, - { url = "https://files.pythonhosted.org/packages/a9/ac/ab729a15c516da2ab70a05f8722ecfccc3f04ed7a18e45c75bbbaa347d61/charset_normalizer-3.4.1-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:4532bff1b8421fd0a320463030c7520f56a79c9024a4e88f01c537316019005a", size = 147943 }, - { url = "https://files.pythonhosted.org/packages/03/d2/3f392f23f042615689456e9a274640c1d2e5dd1d52de36ab8f7955f8f050/charset_normalizer-3.4.1-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:d973f03c0cb71c5ed99037b870f2be986c3c05e63622c017ea9816881d2dd247", size = 142063 }, - { url = "https://files.pythonhosted.org/packages/f2/e3/e20aae5e1039a2cd9b08d9205f52142329f887f8cf70da3650326670bddf/charset_normalizer-3.4.1-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:3a3bd0dcd373514dcec91c411ddb9632c0d7d92aed7093b8c3bbb6d69ca74408", size = 150578 }, - { url = "https://files.pythonhosted.org/packages/8d/af/779ad72a4da0aed925e1139d458adc486e61076d7ecdcc09e610ea8678db/charset_normalizer-3.4.1-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:d9c3cdf5390dcd29aa8056d13e8e99526cda0305acc038b96b30352aff5ff2bb", size = 153629 }, - { url = "https://files.pythonhosted.org/packages/c2/b6/7aa450b278e7aa92cf7732140bfd8be21f5f29d5bf334ae987c945276639/charset_normalizer-3.4.1-cp310-cp310-musllinux_1_2_s390x.whl", hash = "sha256:2bdfe3ac2e1bbe5b59a1a63721eb3b95fc9b6817ae4a46debbb4e11f6232428d", size = 150778 }, - { url = "https://files.pythonhosted.org/packages/39/f4/d9f4f712d0951dcbfd42920d3db81b00dd23b6ab520419626f4023334056/charset_normalizer-3.4.1-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:eab677309cdb30d047996b36d34caeda1dc91149e4fdca0b1a039b3f79d9a807", size = 146453 }, - { url = "https://files.pythonhosted.org/packages/49/2b/999d0314e4ee0cff3cb83e6bc9aeddd397eeed693edb4facb901eb8fbb69/charset_normalizer-3.4.1-cp310-cp310-win32.whl", hash = "sha256:c0429126cf75e16c4f0ad00ee0eae4242dc652290f940152ca8c75c3a4b6ee8f", size = 95479 }, - { url = "https://files.pythonhosted.org/packages/2d/ce/3cbed41cff67e455a386fb5e5dd8906cdda2ed92fbc6297921f2e4419309/charset_normalizer-3.4.1-cp310-cp310-win_amd64.whl", hash = "sha256:9f0b8b1c6d84c8034a44893aba5e767bf9c7a211e313a9605d9c617d7083829f", size = 102790 }, { url = "https://files.pythonhosted.org/packages/72/80/41ef5d5a7935d2d3a773e3eaebf0a9350542f2cab4eac59a7a4741fbbbbe/charset_normalizer-3.4.1-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:8bfa33f4f2672964266e940dd22a195989ba31669bd84629f05fab3ef4e2d125", size = 194995 }, { url = "https://files.pythonhosted.org/packages/7a/28/0b9fefa7b8b080ec492110af6d88aa3dea91c464b17d53474b6e9ba5d2c5/charset_normalizer-3.4.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:28bf57629c75e810b6ae989f03c0828d64d6b26a5e205535585f96093e405ed1", size = 139471 }, { url = "https://files.pythonhosted.org/packages/71/64/d24ab1a997efb06402e3fc07317e94da358e2585165930d9d59ad45fcae2/charset_normalizer-3.4.1-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f08ff5e948271dc7e18a35641d2f11a4cd8dfd5634f55228b691e62b37125eb3", size = 149831 }, @@ -272,12 +242,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/b3/9f/6a3e0391957cc0c5f84aef9fbdd763035f2b52e998a53f99345e3ac69312/cryptography-44.0.2-cp39-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:6f101b1f780f7fc613d040ca4bdf835c6ef3b00e9bd7125a4255ec574c7916e4", size = 4298631 }, { url = "https://files.pythonhosted.org/packages/e2/a5/5bc097adb4b6d22a24dea53c51f37e480aaec3465285c253098642696423/cryptography-44.0.2-cp39-abi3-win32.whl", hash = "sha256:3dc62975e31617badc19a906481deacdeb80b4bb454394b4098e3f2525a488c5", size = 2773792 }, { url = "https://files.pythonhosted.org/packages/33/cf/1f7649b8b9a3543e042d3f348e398a061923ac05b507f3f4d95f11938aa9/cryptography-44.0.2-cp39-abi3-win_amd64.whl", hash = "sha256:5f6f90b72d8ccadb9c6e311c775c8305381db88374c65fa1a68250aa8a9cb3a6", size = 3210957 }, - { url = "https://files.pythonhosted.org/packages/99/10/173be140714d2ebaea8b641ff801cbcb3ef23101a2981cbf08057876f89e/cryptography-44.0.2-pp310-pypy310_pp73-macosx_10_9_x86_64.whl", hash = "sha256:af4ff3e388f2fa7bff9f7f2b31b87d5651c45731d3e8cfa0944be43dff5cfbdb", size = 3396886 }, - { url = "https://files.pythonhosted.org/packages/2f/b4/424ea2d0fce08c24ede307cead3409ecbfc2f566725d4701b9754c0a1174/cryptography-44.0.2-pp310-pypy310_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:0529b1d5a0105dd3731fa65680b45ce49da4d8115ea76e9da77a875396727b41", size = 3892387 }, - { url = "https://files.pythonhosted.org/packages/28/20/8eaa1a4f7c68a1cb15019dbaad59c812d4df4fac6fd5f7b0b9c5177f1edd/cryptography-44.0.2-pp310-pypy310_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:7ca25849404be2f8e4b3c59483d9d3c51298a22c1c61a0e84415104dacaf5562", size = 4109922 }, - { url = "https://files.pythonhosted.org/packages/11/25/5ed9a17d532c32b3bc81cc294d21a36c772d053981c22bd678396bc4ae30/cryptography-44.0.2-pp310-pypy310_pp73-manylinux_2_34_aarch64.whl", hash = "sha256:268e4e9b177c76d569e8a145a6939eca9a5fec658c932348598818acf31ae9a5", size = 3895715 }, - { url = "https://files.pythonhosted.org/packages/63/31/2aac03b19c6329b62c45ba4e091f9de0b8f687e1b0cd84f101401bece343/cryptography-44.0.2-pp310-pypy310_pp73-manylinux_2_34_x86_64.whl", hash = "sha256:9eb9d22b0a5d8fd9925a7764a054dca914000607dff201a24c791ff5c799e1fa", size = 4109876 }, - { url = "https://files.pythonhosted.org/packages/99/ec/6e560908349843718db1a782673f36852952d52a55ab14e46c42c8a7690a/cryptography-44.0.2-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:2bf7bf75f7df9715f810d1b038870309342bff3069c5bd8c6b96128cb158668d", size = 3131719 }, { url = "https://files.pythonhosted.org/packages/d6/d7/f30e75a6aa7d0f65031886fa4a1485c2fbfe25a1896953920f6a9cfe2d3b/cryptography-44.0.2-pp311-pypy311_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:909c97ab43a9c0c0b0ada7a1281430e4e5ec0458e6d9244c0e821bbf152f061d", size = 3887513 }, { url = "https://files.pythonhosted.org/packages/9c/b4/7a494ce1032323ca9db9a3661894c66e0d7142ad2079a4249303402d8c71/cryptography-44.0.2-pp311-pypy311_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:96e7a5e9d6e71f9f4fca8eebfd603f8e86c5225bb18eb621b2c1e50b290a9471", size = 4107432 }, { url = "https://files.pythonhosted.org/packages/45/f8/6b3ec0bc56123b344a8d2b3264a325646d2dcdbdd9848b5e6f3d37db90b3/cryptography-44.0.2-pp311-pypy311_pp73-manylinux_2_34_aarch64.whl", hash = "sha256:d1b3031093a366ac767b3feb8bcddb596671b3aaff82d4050f984da0c248b615", size = 3891421 }, @@ -299,15 +263,6 @@ version = "1.9.1" source = { registry = "https://pypi.org/simple" } sdist = { url = "https://files.pythonhosted.org/packages/d1/99/4d24bb6db12fc170a5f209f4c9108054a2c84d289d1e7f743e979b202023/evdev-1.9.1.tar.gz", hash = "sha256:dc640a064cb1c9fe1f8b970dc2039945a2a275d7b7ee62284bf427238abe45ee", size = 33349 } -[[package]] -name = "exceptiongroup" -version = "1.2.2" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/09/35/2495c4ac46b980e4ca1f6ad6db102322ef3ad2410b79fdde159a4b0f3b92/exceptiongroup-1.2.2.tar.gz", hash = "sha256:47c2edf7c6738fafb49fd34290706d1a1a2f4d1c6df275526b62cbb4aa5393cc", size = 28883 } -wheels = [ - { url = "https://files.pythonhosted.org/packages/02/cc/b7e31358aac6ed1ef2bb790a9746ac2c69bcb3c8588b41616914eb106eaf/exceptiongroup-1.2.2-py3-none-any.whl", hash = "sha256:3111b9d131c238bec2f8f516e123e14ba243563fb135d3fe885990585aa7795b", size = 16453 }, -] - [[package]] name = "fire" version = "0.7.0" @@ -399,18 +354,6 @@ version = "0.9.0" source = { registry = "https://pypi.org/simple" } sdist = { url = "https://files.pythonhosted.org/packages/1e/c2/e4562507f52f0af7036da125bb699602ead37a2332af0788f8e0a3417f36/jiter-0.9.0.tar.gz", hash = "sha256:aadba0964deb424daa24492abc3d229c60c4a31bfee205aedbf1acc7639d7893", size = 162604 } wheels = [ - { url = "https://files.pythonhosted.org/packages/b0/82/39f7c9e67b3b0121f02a0b90d433626caa95a565c3d2449fea6bcfa3f5f5/jiter-0.9.0-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:816ec9b60fdfd1fec87da1d7ed46c66c44ffec37ab2ef7de5b147b2fce3fd5ad", size = 314540 }, - { url = "https://files.pythonhosted.org/packages/01/07/7bf6022c5a152fca767cf5c086bb41f7c28f70cf33ad259d023b53c0b858/jiter-0.9.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:9b1d3086f8a3ee0194ecf2008cf81286a5c3e540d977fa038ff23576c023c0ea", size = 321065 }, - { url = "https://files.pythonhosted.org/packages/6c/b2/de3f3446ecba7c48f317568e111cc112613da36c7b29a6de45a1df365556/jiter-0.9.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1339f839b91ae30b37c409bf16ccd3dc453e8b8c3ed4bd1d6a567193651a4a51", size = 341664 }, - { url = "https://files.pythonhosted.org/packages/13/cf/6485a4012af5d407689c91296105fcdb080a3538e0658d2abf679619c72f/jiter-0.9.0-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:ffba79584b3b670fefae66ceb3a28822365d25b7bf811e030609a3d5b876f538", size = 364635 }, - { url = "https://files.pythonhosted.org/packages/0d/f7/4a491c568f005553240b486f8e05c82547340572d5018ef79414b4449327/jiter-0.9.0-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:5cfc7d0a8e899089d11f065e289cb5b2daf3d82fbe028f49b20d7b809193958d", size = 406288 }, - { url = "https://files.pythonhosted.org/packages/d3/ca/f4263ecbce7f5e6bded8f52a9f1a66540b270c300b5c9f5353d163f9ac61/jiter-0.9.0-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:e00a1a2bbfaaf237e13c3d1592356eab3e9015d7efd59359ac8b51eb56390a12", size = 397499 }, - { url = "https://files.pythonhosted.org/packages/ac/a2/522039e522a10bac2f2194f50e183a49a360d5f63ebf46f6d890ef8aa3f9/jiter-0.9.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d1d9870561eb26b11448854dce0ff27a9a27cb616b632468cafc938de25e9e51", size = 352926 }, - { url = "https://files.pythonhosted.org/packages/b1/67/306a5c5abc82f2e32bd47333a1c9799499c1c3a415f8dde19dbf876f00cb/jiter-0.9.0-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:9872aeff3f21e437651df378cb75aeb7043e5297261222b6441a620218b58708", size = 384506 }, - { url = "https://files.pythonhosted.org/packages/0f/89/c12fe7b65a4fb74f6c0d7b5119576f1f16c79fc2953641f31b288fad8a04/jiter-0.9.0-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:1fd19112d1049bdd47f17bfbb44a2c0001061312dcf0e72765bfa8abd4aa30e5", size = 520621 }, - { url = "https://files.pythonhosted.org/packages/c4/2b/d57900c5c06e6273fbaa76a19efa74dbc6e70c7427ab421bf0095dfe5d4a/jiter-0.9.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:6ef5da104664e526836070e4a23b5f68dec1cc673b60bf1edb1bfbe8a55d0678", size = 512613 }, - { url = "https://files.pythonhosted.org/packages/89/05/d8b90bfb21e58097d5a4e0224f2940568366f68488a079ae77d4b2653500/jiter-0.9.0-cp310-cp310-win32.whl", hash = "sha256:cb12e6d65ebbefe5518de819f3eda53b73187b7089040b2d17f5b39001ff31c4", size = 206613 }, - { url = "https://files.pythonhosted.org/packages/2c/1d/5767f23f88e4f885090d74bbd2755518050a63040c0f59aa059947035711/jiter-0.9.0-cp310-cp310-win_amd64.whl", hash = "sha256:c43ca669493626d8672be3b645dbb406ef25af3f4b6384cfd306da7eb2e70322", size = 208371 }, { url = "https://files.pythonhosted.org/packages/23/44/e241a043f114299254e44d7e777ead311da400517f179665e59611ab0ee4/jiter-0.9.0-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:6c4d99c71508912a7e556d631768dcdef43648a93660670986916b297f1c54af", size = 314654 }, { url = "https://files.pythonhosted.org/packages/fb/1b/a7e5e42db9fa262baaa9489d8d14ca93f8663e7f164ed5e9acc9f467fc00/jiter-0.9.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:8f60fb8ce7df529812bf6c625635a19d27f30806885139e367af93f6e734ef58", size = 320909 }, { url = "https://files.pythonhosted.org/packages/60/bf/8ebdfce77bc04b81abf2ea316e9c03b4a866a7d739cf355eae4d6fd9f6fe/jiter-0.9.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:51c4e1a4f8ea84d98b7b98912aa4290ac3d1eabfde8e3c34541fae30e9d1f08b", size = 341733 }, @@ -477,16 +420,6 @@ version = "3.0.2" source = { registry = "https://pypi.org/simple" } sdist = { url = "https://files.pythonhosted.org/packages/b2/97/5d42485e71dfc078108a86d6de8fa46db44a1a9295e89c5d6d4a06e23a62/markupsafe-3.0.2.tar.gz", hash = "sha256:ee55d3edf80167e48ea11a923c7386f4669df67d7994554387f84e7d8b0a2bf0", size = 20537 } wheels = [ - { url = "https://files.pythonhosted.org/packages/04/90/d08277ce111dd22f77149fd1a5d4653eeb3b3eaacbdfcbae5afb2600eebd/MarkupSafe-3.0.2-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:7e94c425039cde14257288fd61dcfb01963e658efbc0ff54f5306b06054700f8", size = 14357 }, - { url = "https://files.pythonhosted.org/packages/04/e1/6e2194baeae0bca1fae6629dc0cbbb968d4d941469cbab11a3872edff374/MarkupSafe-3.0.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:9e2d922824181480953426608b81967de705c3cef4d1af983af849d7bd619158", size = 12393 }, - { url = "https://files.pythonhosted.org/packages/1d/69/35fa85a8ece0a437493dc61ce0bb6d459dcba482c34197e3efc829aa357f/MarkupSafe-3.0.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:38a9ef736c01fccdd6600705b09dc574584b89bea478200c5fbf112a6b0d5579", size = 21732 }, - { url = "https://files.pythonhosted.org/packages/22/35/137da042dfb4720b638d2937c38a9c2df83fe32d20e8c8f3185dbfef05f7/MarkupSafe-3.0.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bbcb445fa71794da8f178f0f6d66789a28d7319071af7a496d4d507ed566270d", size = 20866 }, - { url = "https://files.pythonhosted.org/packages/29/28/6d029a903727a1b62edb51863232152fd335d602def598dade38996887f0/MarkupSafe-3.0.2-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:57cb5a3cf367aeb1d316576250f65edec5bb3be939e9247ae594b4bcbc317dfb", size = 20964 }, - { url = "https://files.pythonhosted.org/packages/cc/cd/07438f95f83e8bc028279909d9c9bd39e24149b0d60053a97b2bc4f8aa51/MarkupSafe-3.0.2-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:3809ede931876f5b2ec92eef964286840ed3540dadf803dd570c3b7e13141a3b", size = 21977 }, - { url = "https://files.pythonhosted.org/packages/29/01/84b57395b4cc062f9c4c55ce0df7d3108ca32397299d9df00fedd9117d3d/MarkupSafe-3.0.2-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:e07c3764494e3776c602c1e78e298937c3315ccc9043ead7e685b7f2b8d47b3c", size = 21366 }, - { url = "https://files.pythonhosted.org/packages/bd/6e/61ebf08d8940553afff20d1fb1ba7294b6f8d279df9fd0c0db911b4bbcfd/MarkupSafe-3.0.2-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:b424c77b206d63d500bcb69fa55ed8d0e6a3774056bdc4839fc9298a7edca171", size = 21091 }, - { url = "https://files.pythonhosted.org/packages/11/23/ffbf53694e8c94ebd1e7e491de185124277964344733c45481f32ede2499/MarkupSafe-3.0.2-cp310-cp310-win32.whl", hash = "sha256:fcabf5ff6eea076f859677f5f0b6b5c1a51e70a376b0579e0eadef8db48c6b50", size = 15065 }, - { url = "https://files.pythonhosted.org/packages/44/06/e7175d06dd6e9172d4a69a72592cb3f7a996a9c396eee29082826449bbc3/MarkupSafe-3.0.2-cp310-cp310-win_amd64.whl", hash = "sha256:6af100e168aa82a50e186c82875a5893c5597a0c1ccdb0d8b40240b1f28b969a", size = 15514 }, { url = "https://files.pythonhosted.org/packages/6b/28/bbf83e3f76936960b850435576dd5e67034e200469571be53f69174a2dfd/MarkupSafe-3.0.2-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:9025b4018f3a1314059769c7bf15441064b2207cb3f065e6ea1e7359cb46db9d", size = 14353 }, { url = "https://files.pythonhosted.org/packages/6c/30/316d194b093cde57d448a4c3209f22e3046c5bb2fb0820b118292b334be7/MarkupSafe-3.0.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:93335ca3812df2f366e80509ae119189886b0f3c2b81325d39efdb84a1e2ae93", size = 12392 }, { url = "https://files.pythonhosted.org/packages/f2/96/9cdafba8445d3a53cae530aaf83c38ec64c4d5427d975c974084af5bc5d2/MarkupSafe-3.0.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2cb8438c3cbb25e220c2ab33bb226559e7afb3baec11c4f218ffa7308603c832", size = 23984 }, @@ -558,16 +491,6 @@ version = "2.2.4" source = { registry = "https://pypi.org/simple" } sdist = { url = "https://files.pythonhosted.org/packages/e1/78/31103410a57bc2c2b93a3597340a8119588571f6a4539067546cb9a0bfac/numpy-2.2.4.tar.gz", hash = "sha256:9ba03692a45d3eef66559efe1d1096c4b9b75c0986b5dff5530c378fb8331d4f", size = 20270701 } wheels = [ - { url = "https://files.pythonhosted.org/packages/04/89/a79e86e5c1433926ed7d60cb267fb64aa578b6101ab645800fd43b4801de/numpy-2.2.4-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:8146f3550d627252269ac42ae660281d673eb6f8b32f113538e0cc2a9aed42b9", size = 21250661 }, - { url = "https://files.pythonhosted.org/packages/79/c2/f50921beb8afd60ed9589ad880332cfefdb805422210d327fb48f12b7a81/numpy-2.2.4-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:e642d86b8f956098b564a45e6f6ce68a22c2c97a04f5acd3f221f57b8cb850ae", size = 14389926 }, - { url = "https://files.pythonhosted.org/packages/c7/b9/2c4e96130b0b0f97b0ef4a06d6dae3b39d058b21a5e2fa2decd7fd6b1c8f/numpy-2.2.4-cp310-cp310-macosx_14_0_arm64.whl", hash = "sha256:a84eda42bd12edc36eb5b53bbcc9b406820d3353f1994b6cfe453a33ff101775", size = 5428329 }, - { url = "https://files.pythonhosted.org/packages/7f/a5/3d7094aa898f4fc5c84cdfb26beeae780352d43f5d8bdec966c4393d644c/numpy-2.2.4-cp310-cp310-macosx_14_0_x86_64.whl", hash = "sha256:4ba5054787e89c59c593a4169830ab362ac2bee8a969249dc56e5d7d20ff8df9", size = 6963559 }, - { url = "https://files.pythonhosted.org/packages/4c/22/fb1be710a14434c09080dd4a0acc08939f612ec02efcb04b9e210474782d/numpy-2.2.4-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7716e4a9b7af82c06a2543c53ca476fa0b57e4d760481273e09da04b74ee6ee2", size = 14368066 }, - { url = "https://files.pythonhosted.org/packages/c2/07/2e5cc71193e3ef3a219ffcf6ca4858e46ea2be09c026ddd480d596b32867/numpy-2.2.4-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:adf8c1d66f432ce577d0197dceaac2ac00c0759f573f28516246351c58a85020", size = 16417040 }, - { url = "https://files.pythonhosted.org/packages/1a/97/3b1537776ad9a6d1a41813818343745e8dd928a2916d4c9edcd9a8af1dac/numpy-2.2.4-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:218f061d2faa73621fa23d6359442b0fc658d5b9a70801373625d958259eaca3", size = 15879862 }, - { url = "https://files.pythonhosted.org/packages/b0/b7/4472f603dd45ef36ff3d8e84e84fe02d9467c78f92cc121633dce6da307b/numpy-2.2.4-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:df2f57871a96bbc1b69733cd4c51dc33bea66146b8c63cacbfed73eec0883017", size = 18206032 }, - { url = "https://files.pythonhosted.org/packages/0d/bd/6a092963fb82e6c5aa0d0440635827bbb2910da229545473bbb58c537ed3/numpy-2.2.4-cp310-cp310-win32.whl", hash = "sha256:a0258ad1f44f138b791327961caedffbf9612bfa504ab9597157806faa95194a", size = 6608517 }, - { url = "https://files.pythonhosted.org/packages/01/e3/cb04627bc2a1638948bc13e818df26495aa18e20d5be1ed95ab2b10b6847/numpy-2.2.4-cp310-cp310-win_amd64.whl", hash = "sha256:0d54974f9cf14acf49c60f0f7f4084b6579d24d439453d5fc5805d46a165b542", size = 12943498 }, { url = "https://files.pythonhosted.org/packages/16/fb/09e778ee3a8ea0d4dc8329cca0a9c9e65fed847d08e37eba74cb7ed4b252/numpy-2.2.4-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:e9e0a277bb2eb5d8a7407e14688b85fd8ad628ee4e0c7930415687b6564207a4", size = 21254989 }, { url = "https://files.pythonhosted.org/packages/a2/0a/1212befdbecab5d80eca3cde47d304cad986ad4eec7d85a42e0b6d2cc2ef/numpy-2.2.4-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:9eeea959168ea555e556b8188da5fa7831e21d91ce031e95ce23747b7609f8a4", size = 14425910 }, { url = "https://files.pythonhosted.org/packages/2b/3e/e7247c1d4f15086bb106c8d43c925b0b2ea20270224f5186fa48d4fb5cbd/numpy-2.2.4-cp311-cp311-macosx_14_0_arm64.whl", hash = "sha256:bd3ad3b0a40e713fc68f99ecfd07124195333f1e689387c180813f0e94309d6f", size = 5426490 }, @@ -588,10 +511,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/04/1e/f8bb88f6157045dd5d9b27ccf433d016981032690969aa5c19e332b138c0/numpy-2.2.4-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:11c43995255eb4127115956495f43e9343736edb7fcdb0d973defd9de14cd84f", size = 17898106 }, { url = "https://files.pythonhosted.org/packages/2b/93/df59a5a3897c1f036ae8ff845e45f4081bb06943039ae28a3c1c7c780f22/numpy-2.2.4-cp312-cp312-win32.whl", hash = "sha256:65ef3468b53269eb5fdb3a5c09508c032b793da03251d5f8722b1194f1790c00", size = 6311190 }, { url = "https://files.pythonhosted.org/packages/46/69/8c4f928741c2a8efa255fdc7e9097527c6dc4e4df147e3cadc5d9357ce85/numpy-2.2.4-cp312-cp312-win_amd64.whl", hash = "sha256:2aad3c17ed2ff455b8eaafe06bcdae0062a1db77cb99f4b9cbb5f4ecb13c5146", size = 12644305 }, - { url = "https://files.pythonhosted.org/packages/b2/5c/f09c33a511aff41a098e6ef3498465d95f6360621034a3d95f47edbc9119/numpy-2.2.4-pp310-pypy310_pp73-macosx_10_15_x86_64.whl", hash = "sha256:7051ee569db5fbac144335e0f3b9c2337e0c8d5c9fee015f259a5bd70772b7e8", size = 21081956 }, - { url = "https://files.pythonhosted.org/packages/ba/30/74c48b3b6494c4b820b7fa1781d441e94d87a08daa5b35d222f06ba41a6f/numpy-2.2.4-pp310-pypy310_pp73-macosx_14_0_x86_64.whl", hash = "sha256:ab2939cd5bec30a7430cbdb2287b63151b77cf9624de0532d629c9a1c59b1d5c", size = 6827143 }, - { url = "https://files.pythonhosted.org/packages/54/f5/ab0d2f48b490535c7a80e05da4a98902b632369efc04f0e47bb31ca97d8f/numpy-2.2.4-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d0f35b19894a9e08639fd60a1ec1978cb7f5f7f1eace62f38dd36be8aecdef4d", size = 16233350 }, - { url = "https://files.pythonhosted.org/packages/3b/3a/2f6d8c1f8e45d496bca6baaec93208035faeb40d5735c25afac092ec9a12/numpy-2.2.4-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:b4adfbbc64014976d2f91084915ca4e626fbf2057fb81af209c1a6d776d23e3d", size = 12857565 }, ] [[package]] @@ -611,6 +530,7 @@ dependencies = [ { name = "paramiko" }, { name = "pillow" }, { name = "pydantic" }, + { name = "pydantic-prompt" }, { name = "pydantic-settings" }, { name = "pynput" }, { name = "pyobjc-framework-cocoa", marker = "sys_platform == 'darwin'" }, @@ -643,6 +563,7 @@ requires-dist = [ { name = "paramiko", specifier = ">=3.5.1" }, { name = "pillow", specifier = ">=10.0.0" }, { name = "pydantic", specifier = ">=2.10.6" }, + { name = "pydantic-prompt", specifier = ">=0.1.0" }, { name = "pydantic-settings", specifier = ">=2.8.1" }, { name = "pynput", specifier = ">=1.7.6" }, { name = "pyobjc-framework-cocoa", marker = "sys_platform == 'darwin'" }, @@ -684,17 +605,6 @@ version = "11.1.0" source = { registry = "https://pypi.org/simple" } sdist = { url = "https://files.pythonhosted.org/packages/f3/af/c097e544e7bd278333db77933e535098c259609c4eb3b85381109602fb5b/pillow-11.1.0.tar.gz", hash = "sha256:368da70808b36d73b4b390a8ffac11069f8a5c85f29eff1f1b01bcf3ef5b2a20", size = 46742715 } wheels = [ - { url = "https://files.pythonhosted.org/packages/50/1c/2dcea34ac3d7bc96a1fd1bd0a6e06a57c67167fec2cff8d95d88229a8817/pillow-11.1.0-cp310-cp310-macosx_10_10_x86_64.whl", hash = "sha256:e1abe69aca89514737465752b4bcaf8016de61b3be1397a8fc260ba33321b3a8", size = 3229983 }, - { url = "https://files.pythonhosted.org/packages/14/ca/6bec3df25e4c88432681de94a3531cc738bd85dea6c7aa6ab6f81ad8bd11/pillow-11.1.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:c640e5a06869c75994624551f45e5506e4256562ead981cce820d5ab39ae2192", size = 3101831 }, - { url = "https://files.pythonhosted.org/packages/d4/2c/668e18e5521e46eb9667b09e501d8e07049eb5bfe39d56be0724a43117e6/pillow-11.1.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a07dba04c5e22824816b2615ad7a7484432d7f540e6fa86af60d2de57b0fcee2", size = 4314074 }, - { url = "https://files.pythonhosted.org/packages/02/80/79f99b714f0fc25f6a8499ecfd1f810df12aec170ea1e32a4f75746051ce/pillow-11.1.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e267b0ed063341f3e60acd25c05200df4193e15a4a5807075cd71225a2386e26", size = 4394933 }, - { url = "https://files.pythonhosted.org/packages/81/aa/8d4ad25dc11fd10a2001d5b8a80fdc0e564ac33b293bdfe04ed387e0fd95/pillow-11.1.0-cp310-cp310-manylinux_2_28_aarch64.whl", hash = "sha256:bd165131fd51697e22421d0e467997ad31621b74bfc0b75956608cb2906dda07", size = 4353349 }, - { url = "https://files.pythonhosted.org/packages/84/7a/cd0c3eaf4a28cb2a74bdd19129f7726277a7f30c4f8424cd27a62987d864/pillow-11.1.0-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:abc56501c3fd148d60659aae0af6ddc149660469082859fa7b066a298bde9482", size = 4476532 }, - { url = "https://files.pythonhosted.org/packages/8f/8b/a907fdd3ae8f01c7670dfb1499c53c28e217c338b47a813af8d815e7ce97/pillow-11.1.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:54ce1c9a16a9561b6d6d8cb30089ab1e5eb66918cb47d457bd996ef34182922e", size = 4279789 }, - { url = "https://files.pythonhosted.org/packages/6f/9a/9f139d9e8cccd661c3efbf6898967a9a337eb2e9be2b454ba0a09533100d/pillow-11.1.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:73ddde795ee9b06257dac5ad42fcb07f3b9b813f8c1f7f870f402f4dc54b5269", size = 4413131 }, - { url = "https://files.pythonhosted.org/packages/a8/68/0d8d461f42a3f37432203c8e6df94da10ac8081b6d35af1c203bf3111088/pillow-11.1.0-cp310-cp310-win32.whl", hash = "sha256:3a5fe20a7b66e8135d7fd617b13272626a28278d0e578c98720d9ba4b2439d49", size = 2291213 }, - { url = "https://files.pythonhosted.org/packages/14/81/d0dff759a74ba87715509af9f6cb21fa21d93b02b3316ed43bda83664db9/pillow-11.1.0-cp310-cp310-win_amd64.whl", hash = "sha256:b6123aa4a59d75f06e9dd3dac5bf8bc9aa383121bb3dd9a7a612e05eabc9961a", size = 2625725 }, - { url = "https://files.pythonhosted.org/packages/ce/1f/8d50c096a1d58ef0584ddc37e6f602828515219e9d2428e14ce50f5ecad1/pillow-11.1.0-cp310-cp310-win_arm64.whl", hash = "sha256:a76da0a31da6fcae4210aa94fd779c65c75786bc9af06289cd1c184451ef7a65", size = 2375213 }, { url = "https://files.pythonhosted.org/packages/dd/d6/2000bfd8d5414fb70cbbe52c8332f2283ff30ed66a9cde42716c8ecbe22c/pillow-11.1.0-cp311-cp311-macosx_10_10_x86_64.whl", hash = "sha256:e06695e0326d05b06833b40b7ef477e475d0b1ba3a6d27da1bb48c23209bf457", size = 3229968 }, { url = "https://files.pythonhosted.org/packages/d9/45/3fe487010dd9ce0a06adf9b8ff4f273cc0a44536e234b0fad3532a42c15b/pillow-11.1.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:96f82000e12f23e4f29346e42702b6ed9a2f2fea34a740dd5ffffcc8c539eb35", size = 3101806 }, { url = "https://files.pythonhosted.org/packages/e3/72/776b3629c47d9d5f1c160113158a7a7ad177688d3a1159cd3b62ded5a33a/pillow-11.1.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a3cd561ded2cf2bbae44d4605837221b987c216cff94f49dfeed63488bb228d2", size = 4322283 }, @@ -717,13 +627,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/e7/c4/fc6e86750523f367923522014b821c11ebc5ad402e659d8c9d09b3c9d70c/pillow-11.1.0-cp312-cp312-win32.whl", hash = "sha256:cfd5cd998c2e36a862d0e27b2df63237e67273f2fc78f47445b14e73a810e7e6", size = 2291630 }, { url = "https://files.pythonhosted.org/packages/08/5c/2104299949b9d504baf3f4d35f73dbd14ef31bbd1ddc2c1b66a5b7dfda44/pillow-11.1.0-cp312-cp312-win_amd64.whl", hash = "sha256:a697cd8ba0383bba3d2d3ada02b34ed268cb548b369943cd349007730c92bddf", size = 2626369 }, { url = "https://files.pythonhosted.org/packages/37/f3/9b18362206b244167c958984b57c7f70a0289bfb59a530dd8af5f699b910/pillow-11.1.0-cp312-cp312-win_arm64.whl", hash = "sha256:4dd43a78897793f60766563969442020e90eb7847463eca901e41ba186a7d4a5", size = 2375240 }, - { url = "https://files.pythonhosted.org/packages/fa/c5/389961578fb677b8b3244fcd934f720ed25a148b9a5cc81c91bdf59d8588/pillow-11.1.0-pp310-pypy310_pp73-macosx_10_15_x86_64.whl", hash = "sha256:8c730dc3a83e5ac137fbc92dfcfe1511ce3b2b5d7578315b63dbbb76f7f51d90", size = 3198345 }, - { url = "https://files.pythonhosted.org/packages/c4/fa/803c0e50ffee74d4b965229e816af55276eac1d5806712de86f9371858fd/pillow-11.1.0-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:7d33d2fae0e8b170b6a6c57400e077412240f6f5bb2a342cf1ee512a787942bb", size = 3072938 }, - { url = "https://files.pythonhosted.org/packages/dc/67/2a3a5f8012b5d8c63fe53958ba906c1b1d0482ebed5618057ef4d22f8076/pillow-11.1.0-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a8d65b38173085f24bc07f8b6c505cbb7418009fa1a1fcb111b1f4961814a442", size = 3400049 }, - { url = "https://files.pythonhosted.org/packages/e5/a0/514f0d317446c98c478d1872497eb92e7cde67003fed74f696441e647446/pillow-11.1.0-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:015c6e863faa4779251436db398ae75051469f7c903b043a48f078e437656f83", size = 3422431 }, - { url = "https://files.pythonhosted.org/packages/cd/00/20f40a935514037b7d3f87adfc87d2c538430ea625b63b3af8c3f5578e72/pillow-11.1.0-pp310-pypy310_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:d44ff19eea13ae4acdaaab0179fa68c0c6f2f45d66a4d8ec1eda7d6cecbcc15f", size = 3446208 }, - { url = "https://files.pythonhosted.org/packages/28/3c/7de681727963043e093c72e6c3348411b0185eab3263100d4490234ba2f6/pillow-11.1.0-pp310-pypy310_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:d3d8da4a631471dfaf94c10c85f5277b1f8e42ac42bade1ac67da4b4a7359b73", size = 3509746 }, - { url = "https://files.pythonhosted.org/packages/41/67/936f9814bdd74b2dfd4822f1f7725ab5d8ff4103919a1664eb4874c58b2f/pillow-11.1.0-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:4637b88343166249fe8aa94e7c4a62a180c4b3898283bb5d3d2fd5fe10d8e4e0", size = 2626353 }, ] [[package]] @@ -767,19 +670,6 @@ dependencies = [ ] sdist = { url = "https://files.pythonhosted.org/packages/fc/01/f3e5ac5e7c25833db5eb555f7b7ab24cd6f8c322d3a3ad2d67a952dc0abc/pydantic_core-2.27.2.tar.gz", hash = "sha256:eb026e5a4c1fee05726072337ff51d1efb6f59090b7da90d30ea58625b1ffb39", size = 413443 } wheels = [ - { url = "https://files.pythonhosted.org/packages/3a/bc/fed5f74b5d802cf9a03e83f60f18864e90e3aed7223adaca5ffb7a8d8d64/pydantic_core-2.27.2-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:2d367ca20b2f14095a8f4fa1210f5a7b78b8a20009ecced6b12818f455b1e9fa", size = 1895938 }, - { url = "https://files.pythonhosted.org/packages/71/2a/185aff24ce844e39abb8dd680f4e959f0006944f4a8a0ea372d9f9ae2e53/pydantic_core-2.27.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:491a2b73db93fab69731eaee494f320faa4e093dbed776be1a829c2eb222c34c", size = 1815684 }, - { url = "https://files.pythonhosted.org/packages/c3/43/fafabd3d94d159d4f1ed62e383e264f146a17dd4d48453319fd782e7979e/pydantic_core-2.27.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7969e133a6f183be60e9f6f56bfae753585680f3b7307a8e555a948d443cc05a", size = 1829169 }, - { url = "https://files.pythonhosted.org/packages/a2/d1/f2dfe1a2a637ce6800b799aa086d079998959f6f1215eb4497966efd2274/pydantic_core-2.27.2-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:3de9961f2a346257caf0aa508a4da705467f53778e9ef6fe744c038119737ef5", size = 1867227 }, - { url = "https://files.pythonhosted.org/packages/7d/39/e06fcbcc1c785daa3160ccf6c1c38fea31f5754b756e34b65f74e99780b5/pydantic_core-2.27.2-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:e2bb4d3e5873c37bb3dd58714d4cd0b0e6238cebc4177ac8fe878f8b3aa8e74c", size = 2037695 }, - { url = "https://files.pythonhosted.org/packages/7a/67/61291ee98e07f0650eb756d44998214231f50751ba7e13f4f325d95249ab/pydantic_core-2.27.2-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:280d219beebb0752699480fe8f1dc61ab6615c2046d76b7ab7ee38858de0a4e7", size = 2741662 }, - { url = "https://files.pythonhosted.org/packages/32/90/3b15e31b88ca39e9e626630b4c4a1f5a0dfd09076366f4219429e6786076/pydantic_core-2.27.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:47956ae78b6422cbd46f772f1746799cbb862de838fd8d1fbd34a82e05b0983a", size = 1993370 }, - { url = "https://files.pythonhosted.org/packages/ff/83/c06d333ee3a67e2e13e07794995c1535565132940715931c1c43bfc85b11/pydantic_core-2.27.2-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:14d4a5c49d2f009d62a2a7140d3064f686d17a5d1a268bc641954ba181880236", size = 1996813 }, - { url = "https://files.pythonhosted.org/packages/7c/f7/89be1c8deb6e22618a74f0ca0d933fdcb8baa254753b26b25ad3acff8f74/pydantic_core-2.27.2-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:337b443af21d488716f8d0b6164de833e788aa6bd7e3a39c005febc1284f4962", size = 2005287 }, - { url = "https://files.pythonhosted.org/packages/b7/7d/8eb3e23206c00ef7feee17b83a4ffa0a623eb1a9d382e56e4aa46fd15ff2/pydantic_core-2.27.2-cp310-cp310-musllinux_1_1_armv7l.whl", hash = "sha256:03d0f86ea3184a12f41a2d23f7ccb79cdb5a18e06993f8a45baa8dfec746f0e9", size = 2128414 }, - { url = "https://files.pythonhosted.org/packages/4e/99/fe80f3ff8dd71a3ea15763878d464476e6cb0a2db95ff1c5c554133b6b83/pydantic_core-2.27.2-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:7041c36f5680c6e0f08d922aed302e98b3745d97fe1589db0a3eebf6624523af", size = 2155301 }, - { url = "https://files.pythonhosted.org/packages/2b/a3/e50460b9a5789ca1451b70d4f52546fa9e2b420ba3bfa6100105c0559238/pydantic_core-2.27.2-cp310-cp310-win32.whl", hash = "sha256:50a68f3e3819077be2c98110c1f9dcb3817e93f267ba80a2c05bb4f8799e2ff4", size = 1816685 }, - { url = "https://files.pythonhosted.org/packages/57/4c/a8838731cb0f2c2a39d3535376466de6049034d7b239c0202a64aaa05533/pydantic_core-2.27.2-cp310-cp310-win_amd64.whl", hash = "sha256:e0fd26b16394ead34a424eecf8a31a1f5137094cabe84a1bcb10fa6ba39d3d31", size = 1982876 }, { url = "https://files.pythonhosted.org/packages/c2/89/f3450af9d09d44eea1f2c369f49e8f181d742f28220f88cc4dfaae91ea6e/pydantic_core-2.27.2-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:8e10c99ef58cfdf2a66fc15d66b16c4a04f62bca39db589ae8cba08bc55331bc", size = 1893421 }, { url = "https://files.pythonhosted.org/packages/9e/e3/71fe85af2021f3f386da42d291412e5baf6ce7716bd7101ea49c810eda90/pydantic_core-2.27.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:26f32e0adf166a84d0cb63be85c562ca8a6fa8de28e5f0d92250c6b7e9e2aff7", size = 1814998 }, { url = "https://files.pythonhosted.org/packages/a6/3c/724039e0d848fd69dbf5806894e26479577316c6f0f112bacaf67aa889ac/pydantic_core-2.27.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8c19d1ea0673cd13cc2f872f6c9ab42acc4e4f492a7ca9d3795ce2b112dd7e15", size = 1826167 }, @@ -808,15 +698,18 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/9b/67/4e197c300976af185b7cef4c02203e175fb127e414125916bf1128b639a9/pydantic_core-2.27.2-cp312-cp312-win32.whl", hash = "sha256:1e2cb691ed9834cd6a8be61228471d0a503731abfb42f82458ff27be7b2186fc", size = 1834064 }, { url = "https://files.pythonhosted.org/packages/1f/ea/cd7209a889163b8dcca139fe32b9687dd05249161a3edda62860430457a5/pydantic_core-2.27.2-cp312-cp312-win_amd64.whl", hash = "sha256:cc3f1a99a4f4f9dd1de4fe0312c114e740b5ddead65bb4102884b384c15d8bc9", size = 1989046 }, { url = "https://files.pythonhosted.org/packages/bc/49/c54baab2f4658c26ac633d798dab66b4c3a9bbf47cff5284e9c182f4137a/pydantic_core-2.27.2-cp312-cp312-win_arm64.whl", hash = "sha256:3911ac9284cd8a1792d3cb26a2da18f3ca26c6908cc434a18f730dc0db7bfa3b", size = 1885092 }, - { url = "https://files.pythonhosted.org/packages/46/72/af70981a341500419e67d5cb45abe552a7c74b66326ac8877588488da1ac/pydantic_core-2.27.2-pp310-pypy310_pp73-macosx_10_12_x86_64.whl", hash = "sha256:2bf14caea37e91198329b828eae1618c068dfb8ef17bb33287a7ad4b61ac314e", size = 1891159 }, - { url = "https://files.pythonhosted.org/packages/ad/3d/c5913cccdef93e0a6a95c2d057d2c2cba347815c845cda79ddd3c0f5e17d/pydantic_core-2.27.2-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:b0cb791f5b45307caae8810c2023a184c74605ec3bcbb67d13846c28ff731ff8", size = 1768331 }, - { url = "https://files.pythonhosted.org/packages/f6/f0/a3ae8fbee269e4934f14e2e0e00928f9346c5943174f2811193113e58252/pydantic_core-2.27.2-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:688d3fd9fcb71f41c4c015c023d12a79d1c4c0732ec9eb35d96e3388a120dcf3", size = 1822467 }, - { url = "https://files.pythonhosted.org/packages/d7/7a/7bbf241a04e9f9ea24cd5874354a83526d639b02674648af3f350554276c/pydantic_core-2.27.2-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3d591580c34f4d731592f0e9fe40f9cc1b430d297eecc70b962e93c5c668f15f", size = 1979797 }, - { url = "https://files.pythonhosted.org/packages/4f/5f/4784c6107731f89e0005a92ecb8a2efeafdb55eb992b8e9d0a2be5199335/pydantic_core-2.27.2-pp310-pypy310_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:82f986faf4e644ffc189a7f1aafc86e46ef70372bb153e7001e8afccc6e54133", size = 1987839 }, - { url = "https://files.pythonhosted.org/packages/6d/a7/61246562b651dff00de86a5f01b6e4befb518df314c54dec187a78d81c84/pydantic_core-2.27.2-pp310-pypy310_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:bec317a27290e2537f922639cafd54990551725fc844249e64c523301d0822fc", size = 1998861 }, - { url = "https://files.pythonhosted.org/packages/86/aa/837821ecf0c022bbb74ca132e117c358321e72e7f9702d1b6a03758545e2/pydantic_core-2.27.2-pp310-pypy310_pp73-musllinux_1_1_armv7l.whl", hash = "sha256:0296abcb83a797db256b773f45773da397da75a08f5fcaef41f2044adec05f50", size = 2116582 }, - { url = "https://files.pythonhosted.org/packages/81/b0/5e74656e95623cbaa0a6278d16cf15e10a51f6002e3ec126541e95c29ea3/pydantic_core-2.27.2-pp310-pypy310_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:0d75070718e369e452075a6017fbf187f788e17ed67a3abd47fa934d001863d9", size = 2151985 }, - { url = "https://files.pythonhosted.org/packages/63/37/3e32eeb2a451fddaa3898e2163746b0cffbbdbb4740d38372db0490d67f3/pydantic_core-2.27.2-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:7e17b560be3c98a8e3aa66ce828bdebb9e9ac6ad5466fba92eb74c4c95cb1151", size = 2004715 }, +] + +[[package]] +name = "pydantic-prompt" +version = "0.1.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "pydantic" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/bf/65/eee334e13d88a0010a5050ecada582d69bd81bd5546b1791b721059c22d9/pydantic_prompt-0.1.0.tar.gz", hash = "sha256:f7df8263b47fcee10f9d12f257e03aa801d54c8ca3a0795da41d6a0261297a0b", size = 33429 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/1f/df/3b608438e78356e3b2e4880b01ea0f6ce7d5f88e7b254dc2f96b0fe65b61/pydantic_prompt-0.1.0-py3-none-any.whl", hash = "sha256:983cb9b9288f4b6a4523d29cbcfb0d46023959c557f780186824846cb6adef5d", size = 7369 }, ] [[package]] @@ -882,7 +775,6 @@ version = "11.0" source = { registry = "https://pypi.org/simple" } sdist = { url = "https://files.pythonhosted.org/packages/5c/94/a111239b98260869780a5767e5d74bfd3a8c13a40457f479c28dcd91f89d/pyobjc_core-11.0.tar.gz", hash = "sha256:63bced211cb8a8fb5c8ff46473603da30e51112861bd02c438fbbbc8578d9a70", size = 994931 } wheels = [ - { url = "https://files.pythonhosted.org/packages/bc/21/ccc992b38670176a615fb67686d709e03be989511da687f6f49ddc4ff6c8/pyobjc_core-11.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:10866b3a734d47caf48e456eea0d4815c2c9b21856157db5917b61dee06893a1", size = 732162 }, { url = "https://files.pythonhosted.org/packages/52/05/fa97309c3b1bc1ec90d701db89902e0bd5e1024023aa2c5387b889458b1b/pyobjc_core-11.0-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:50675c0bb8696fe960a28466f9baf6943df2928a1fd85625d678fa2f428bd0bd", size = 727295 }, { url = "https://files.pythonhosted.org/packages/56/ce/bf3ff9a9347721a398c3dfb83e29b43fb166b7ef590f3f7b7ddcd283df39/pyobjc_core-11.0-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:a03061d4955c62ddd7754224a80cdadfdf17b6b5f60df1d9169a3b1b02923f0b", size = 739750 }, ] @@ -899,7 +791,6 @@ dependencies = [ ] sdist = { url = "https://files.pythonhosted.org/packages/ba/fb/4e42573b0d3baa3fa18ec53614cf979f951313f1451e8f2e17df9429da1f/pyobjc_framework_applicationservices-11.0.tar.gz", hash = "sha256:d6ea18dfc7d5626a3ecf4ac72d510405c0d3a648ca38cae8db841acdebecf4d2", size = 224334 } wheels = [ - { url = "https://files.pythonhosted.org/packages/29/2e/23d996e8294cc4d4ac719c410b1d210dfb1f64eecf87170d5e72c966592a/pyobjc_framework_ApplicationServices-11.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:bc8f34b5b59ffd3c210ae883d794345c1197558ff3da0f5800669cf16435271e", size = 30839 }, { url = "https://files.pythonhosted.org/packages/99/37/3d4dc6c004aaeb67bd43f7261d7c169ff45b8fc0eefbc7ba8cd6b0c881bc/pyobjc_framework_ApplicationServices-11.0-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:61a99eef23abb704257310db4f5271137707e184768f6407030c01de4731b67b", size = 30846 }, { url = "https://files.pythonhosted.org/packages/74/a9/7a45a67e126d32c61ea22ffd80e87ff7e05b4acf32bede6cce071fbfffc8/pyobjc_framework_ApplicationServices-11.0-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:5fbeb425897d6129471d451ec61a29ddd5b1386eb26b1dd49cb313e34616ee21", size = 30908 }, ] @@ -913,7 +804,6 @@ dependencies = [ ] sdist = { url = "https://files.pythonhosted.org/packages/c5/32/53809096ad5fc3e7a2c5ddea642590a5f2cb5b81d0ad6ea67fdb2263d9f9/pyobjc_framework_cocoa-11.0.tar.gz", hash = "sha256:00346a8cb81ad7b017b32ff7bf596000f9faa905807b1bd234644ebd47f692c5", size = 6173848 } wheels = [ - { url = "https://files.pythonhosted.org/packages/37/16/905a32c5241848ddd91d94bae346342750f28f49fadb3746e9e796f929f3/pyobjc_framework_Cocoa-11.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:fbc65f260d617d5463c7fb9dbaaffc23c9a4fabfe3b1a50b039b61870b8daefd", size = 385509 }, { url = "https://files.pythonhosted.org/packages/23/97/81fd41ad90e9c241172110aa635a6239d56f50d75923aaedbbe351828580/pyobjc_framework_Cocoa-11.0-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:3ea7be6e6dd801b297440de02d312ba3fa7fd3c322db747ae1cb237e975f5d33", size = 385534 }, { url = "https://files.pythonhosted.org/packages/5b/8d/0e2558447c26b3ba64f7c9776a5a6c9d2ae8abf9d34308b174ae0934402e/pyobjc_framework_Cocoa-11.0-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:280a577b83c68175a28b2b7138d1d2d3111f2b2b66c30e86f81a19c2b02eae71", size = 385811 }, ] @@ -929,7 +819,6 @@ dependencies = [ ] sdist = { url = "https://files.pythonhosted.org/packages/9d/e8/9b68dc788828e38143a3e834e66346713751cb83d7f0955016323005c1a2/pyobjc_framework_coretext-11.0.tar.gz", hash = "sha256:a68437153e627847e3898754dd3f13ae0cb852246b016a91f9c9cbccb9f91a43", size = 274222 } wheels = [ - { url = "https://files.pythonhosted.org/packages/ce/af/aa4ab3e029a9f539e782eab894c57590791700d892cda73a324fe22e09a6/pyobjc_framework_CoreText-11.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:6939b4ea745b349b5c964823a2071f155f5defdc9b9fc3a13f036d859d7d0439", size = 30395 }, { url = "https://files.pythonhosted.org/packages/f6/20/b8a967101b585a2425ffe645135f8618edd51e1430aeb668373475a07d1f/pyobjc_framework_CoreText-11.0-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:56a4889858308b0d9f147d568b4d91c441cc0ffd332497cb4f709bb1990450c1", size = 30397 }, { url = "https://files.pythonhosted.org/packages/0d/14/d300b8bf18acd1d98d40820d2a9b5c5b6cf96325bdfc5020bc963218e001/pyobjc_framework_CoreText-11.0-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:fb90e7f370b3fd7cb2fb442e3dc63fedf0b4af6908db1c18df694d10dc94669d", size = 30456 }, ] @@ -944,7 +833,6 @@ dependencies = [ ] sdist = { url = "https://files.pythonhosted.org/packages/a5/ad/f00f3f53387c23bbf4e0bb1410e11978cbf87c82fa6baff0ee86f74c5fb6/pyobjc_framework_quartz-11.0.tar.gz", hash = "sha256:3205bf7795fb9ae34747f701486b3db6dfac71924894d1f372977c4d70c3c619", size = 3952463 } wheels = [ - { url = "https://files.pythonhosted.org/packages/bd/b3/75fccb0406aac00eecbd14f278a9b6e6fc0e4483220d57eb3aff68666fb1/pyobjc_framework_Quartz-11.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:da3ab13c9f92361959b41b0ad4cdd41ae872f90a6d8c58a9ed699bc08ab1c45c", size = 212343 }, { url = "https://files.pythonhosted.org/packages/a3/6a/68957c8c5e8f0128d4d419728bac397d48fa7ad7a66e82b70e64d129ffca/pyobjc_framework_Quartz-11.0-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:d251696bfd8e8ef72fbc90eb29fec95cb9d1cc409008a183d5cc3246130ae8c2", size = 212349 }, { url = "https://files.pythonhosted.org/packages/60/5d/df827b78dcb5140652ad08af8038c9ddd7e01e6bdf84462bfee644e6e661/pyobjc_framework_Quartz-11.0-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:cb4a9f2d9d580ea15e25e6b270f47681afb5689cafc9e25712445ce715bcd18e", size = 212061 }, ] @@ -955,11 +843,9 @@ version = "8.3.5" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "colorama", marker = "sys_platform == 'win32'" }, - { name = "exceptiongroup", marker = "python_full_version < '3.11'" }, { name = "iniconfig" }, { name = "packaging" }, { name = "pluggy" }, - { name = "tomli", marker = "python_full_version < '3.11'" }, ] sdist = { url = "https://files.pythonhosted.org/packages/ae/3c/c9d525a414d506893f0cd8a8d0de7706446213181570cdbd766691164e40/pytest-8.3.5.tar.gz", hash = "sha256:f4efe70cc14e511565ac476b57c279e12a855b11f48f212af1080ef2263d3845", size = 1450891 } wheels = [ @@ -1045,7 +931,6 @@ source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "markdown-it-py" }, { name = "pygments" }, - { name = "typing-extensions", marker = "python_full_version < '3.11'" }, ] sdist = { url = "https://files.pythonhosted.org/packages/a1/53/830aa4c3066a8ab0ae9a9955976fb770fe9c6102117c8ec4ab3ea62d89e8/rich-14.0.0.tar.gz", hash = "sha256:82f1bc23a6a21ebca4ae0c45af9bdbc492ed20231dcb63f297d6d1021a9d5725", size = 224078 } wheels = [ @@ -1098,15 +983,6 @@ dependencies = [ ] sdist = { url = "https://files.pythonhosted.org/packages/b7/b9/31ba9cd990e626574baf93fbc1ac61cf9ed54faafd04c479117517661637/scipy-1.15.2.tar.gz", hash = "sha256:cd58a314d92838f7e6f755c8a2167ead4f27e1fd5c1251fd54289569ef3495ec", size = 59417316 } wheels = [ - { url = "https://files.pythonhosted.org/packages/95/df/ef233fff6838fe6f7840d69b5ef9f20d2b5c912a8727b21ebf876cb15d54/scipy-1.15.2-cp310-cp310-macosx_10_13_x86_64.whl", hash = "sha256:a2ec871edaa863e8213ea5df811cd600734f6400b4af272e1c011e69401218e9", size = 38692502 }, - { url = "https://files.pythonhosted.org/packages/5c/20/acdd4efb8a68b842968f7bc5611b1aeb819794508771ad104de418701422/scipy-1.15.2-cp310-cp310-macosx_12_0_arm64.whl", hash = "sha256:6f223753c6ea76983af380787611ae1291e3ceb23917393079dcc746ba60cfb5", size = 30085508 }, - { url = "https://files.pythonhosted.org/packages/42/55/39cf96ca7126f1e78ee72a6344ebdc6702fc47d037319ad93221063e6cf4/scipy-1.15.2-cp310-cp310-macosx_14_0_arm64.whl", hash = "sha256:ecf797d2d798cf7c838c6d98321061eb3e72a74710e6c40540f0e8087e3b499e", size = 22359166 }, - { url = "https://files.pythonhosted.org/packages/51/48/708d26a4ab8a1441536bf2dfcad1df0ca14a69f010fba3ccbdfc02df7185/scipy-1.15.2-cp310-cp310-macosx_14_0_x86_64.whl", hash = "sha256:9b18aa747da280664642997e65aab1dd19d0c3d17068a04b3fe34e2559196cb9", size = 25112047 }, - { url = "https://files.pythonhosted.org/packages/dd/65/f9c5755b995ad892020381b8ae11f16d18616208e388621dfacc11df6de6/scipy-1.15.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:87994da02e73549dfecaed9e09a4f9d58a045a053865679aeb8d6d43747d4df3", size = 35536214 }, - { url = "https://files.pythonhosted.org/packages/de/3c/c96d904b9892beec978562f64d8cc43f9cca0842e65bd3cd1b7f7389b0ba/scipy-1.15.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:69ea6e56d00977f355c0f84eba69877b6df084516c602d93a33812aa04d90a3d", size = 37646981 }, - { url = "https://files.pythonhosted.org/packages/3d/74/c2d8a24d18acdeae69ed02e132b9bc1bb67b7bee90feee1afe05a68f9d67/scipy-1.15.2-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:888307125ea0c4466287191e5606a2c910963405ce9671448ff9c81c53f85f58", size = 37230048 }, - { url = "https://files.pythonhosted.org/packages/42/19/0aa4ce80eca82d487987eff0bc754f014dec10d20de2f66754fa4ea70204/scipy-1.15.2-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:9412f5e408b397ff5641080ed1e798623dbe1ec0d78e72c9eca8992976fa65aa", size = 40010322 }, - { url = "https://files.pythonhosted.org/packages/d0/d2/f0683b7e992be44d1475cc144d1f1eeae63c73a14f862974b4db64af635e/scipy-1.15.2-cp310-cp310-win_amd64.whl", hash = "sha256:b5e025e903b4f166ea03b109bb241355b9c42c279ea694d8864d033727205e65", size = 41233385 }, { url = "https://files.pythonhosted.org/packages/40/1f/bf0a5f338bda7c35c08b4ed0df797e7bafe8a78a97275e9f439aceb46193/scipy-1.15.2-cp311-cp311-macosx_10_13_x86_64.whl", hash = "sha256:92233b2df6938147be6fa8824b8136f29a18f016ecde986666be5f4d686a91a4", size = 38703651 }, { url = "https://files.pythonhosted.org/packages/de/54/db126aad3874601048c2c20ae3d8a433dbfd7ba8381551e6f62606d9bd8e/scipy-1.15.2-cp311-cp311-macosx_12_0_arm64.whl", hash = "sha256:62ca1ff3eb513e09ed17a5736929429189adf16d2d740f44e53270cc800ecff1", size = 30102038 }, { url = "https://files.pythonhosted.org/packages/61/d8/84da3fffefb6c7d5a16968fe5b9f24c98606b165bb801bb0b8bc3985200f/scipy-1.15.2-cp311-cp311-macosx_14_0_arm64.whl", hash = "sha256:4c6676490ad76d1c2894d77f976144b41bd1a4052107902238047fb6a473e971", size = 22375518 }, @@ -1197,35 +1073,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/7f/be/df630c387a0a054815d60be6a97eb4e8f17385d5d6fe660e1c02750062b4/termcolor-2.5.0-py3-none-any.whl", hash = "sha256:37b17b5fc1e604945c2642c872a3764b5d547a48009871aea3edd3afa180afb8", size = 7755 }, ] -[[package]] -name = "tomli" -version = "2.2.1" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/18/87/302344fed471e44a87289cf4967697d07e532f2421fdaf868a303cbae4ff/tomli-2.2.1.tar.gz", hash = "sha256:cd45e1dc79c835ce60f7404ec8119f2eb06d38b1deba146f07ced3bbc44505ff", size = 17175 } -wheels = [ - { url = "https://files.pythonhosted.org/packages/43/ca/75707e6efa2b37c77dadb324ae7d9571cb424e61ea73fad7c56c2d14527f/tomli-2.2.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:678e4fa69e4575eb77d103de3df8a895e1591b48e740211bd1067378c69e8249", size = 131077 }, - { url = "https://files.pythonhosted.org/packages/c7/16/51ae563a8615d472fdbffc43a3f3d46588c264ac4f024f63f01283becfbb/tomli-2.2.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:023aa114dd824ade0100497eb2318602af309e5a55595f76b626d6d9f3b7b0a6", size = 123429 }, - { url = "https://files.pythonhosted.org/packages/f1/dd/4f6cd1e7b160041db83c694abc78e100473c15d54620083dbd5aae7b990e/tomli-2.2.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ece47d672db52ac607a3d9599a9d48dcb2f2f735c6c2d1f34130085bb12b112a", size = 226067 }, - { url = "https://files.pythonhosted.org/packages/a9/6b/c54ede5dc70d648cc6361eaf429304b02f2871a345bbdd51e993d6cdf550/tomli-2.2.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6972ca9c9cc9f0acaa56a8ca1ff51e7af152a9f87fb64623e31d5c83700080ee", size = 236030 }, - { url = "https://files.pythonhosted.org/packages/1f/47/999514fa49cfaf7a92c805a86c3c43f4215621855d151b61c602abb38091/tomli-2.2.1-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c954d2250168d28797dd4e3ac5cf812a406cd5a92674ee4c8f123c889786aa8e", size = 240898 }, - { url = "https://files.pythonhosted.org/packages/73/41/0a01279a7ae09ee1573b423318e7934674ce06eb33f50936655071d81a24/tomli-2.2.1-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:8dd28b3e155b80f4d54beb40a441d366adcfe740969820caf156c019fb5c7ec4", size = 229894 }, - { url = "https://files.pythonhosted.org/packages/55/18/5d8bc5b0a0362311ce4d18830a5d28943667599a60d20118074ea1b01bb7/tomli-2.2.1-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:e59e304978767a54663af13c07b3d1af22ddee3bb2fb0618ca1593e4f593a106", size = 245319 }, - { url = "https://files.pythonhosted.org/packages/92/a3/7ade0576d17f3cdf5ff44d61390d4b3febb8a9fc2b480c75c47ea048c646/tomli-2.2.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:33580bccab0338d00994d7f16f4c4ec25b776af3ffaac1ed74e0b3fc95e885a8", size = 238273 }, - { url = "https://files.pythonhosted.org/packages/72/6f/fa64ef058ac1446a1e51110c375339b3ec6be245af9d14c87c4a6412dd32/tomli-2.2.1-cp311-cp311-win32.whl", hash = "sha256:465af0e0875402f1d226519c9904f37254b3045fc5084697cefb9bdde1ff99ff", size = 98310 }, - { url = "https://files.pythonhosted.org/packages/6a/1c/4a2dcde4a51b81be3530565e92eda625d94dafb46dbeb15069df4caffc34/tomli-2.2.1-cp311-cp311-win_amd64.whl", hash = "sha256:2d0f2fdd22b02c6d81637a3c95f8cd77f995846af7414c5c4b8d0545afa1bc4b", size = 108309 }, - { url = "https://files.pythonhosted.org/packages/52/e1/f8af4c2fcde17500422858155aeb0d7e93477a0d59a98e56cbfe75070fd0/tomli-2.2.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:4a8f6e44de52d5e6c657c9fe83b562f5f4256d8ebbfe4ff922c495620a7f6cea", size = 132762 }, - { url = "https://files.pythonhosted.org/packages/03/b8/152c68bb84fc00396b83e7bbddd5ec0bd3dd409db4195e2a9b3e398ad2e3/tomli-2.2.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:8d57ca8095a641b8237d5b079147646153d22552f1c637fd3ba7f4b0b29167a8", size = 123453 }, - { url = "https://files.pythonhosted.org/packages/c8/d6/fc9267af9166f79ac528ff7e8c55c8181ded34eb4b0e93daa767b8841573/tomli-2.2.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4e340144ad7ae1533cb897d406382b4b6fede8890a03738ff1683af800d54192", size = 233486 }, - { url = "https://files.pythonhosted.org/packages/5c/51/51c3f2884d7bab89af25f678447ea7d297b53b5a3b5730a7cb2ef6069f07/tomli-2.2.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:db2b95f9de79181805df90bedc5a5ab4c165e6ec3fe99f970d0e302f384ad222", size = 242349 }, - { url = "https://files.pythonhosted.org/packages/ab/df/bfa89627d13a5cc22402e441e8a931ef2108403db390ff3345c05253935e/tomli-2.2.1-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:40741994320b232529c802f8bc86da4e1aa9f413db394617b9a256ae0f9a7f77", size = 252159 }, - { url = "https://files.pythonhosted.org/packages/9e/6e/fa2b916dced65763a5168c6ccb91066f7639bdc88b48adda990db10c8c0b/tomli-2.2.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:400e720fe168c0f8521520190686ef8ef033fb19fc493da09779e592861b78c6", size = 237243 }, - { url = "https://files.pythonhosted.org/packages/b4/04/885d3b1f650e1153cbb93a6a9782c58a972b94ea4483ae4ac5cedd5e4a09/tomli-2.2.1-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:02abe224de6ae62c19f090f68da4e27b10af2b93213d36cf44e6e1c5abd19fdd", size = 259645 }, - { url = "https://files.pythonhosted.org/packages/9c/de/6b432d66e986e501586da298e28ebeefd3edc2c780f3ad73d22566034239/tomli-2.2.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:b82ebccc8c8a36f2094e969560a1b836758481f3dc360ce9a3277c65f374285e", size = 244584 }, - { url = "https://files.pythonhosted.org/packages/1c/9a/47c0449b98e6e7d1be6cbac02f93dd79003234ddc4aaab6ba07a9a7482e2/tomli-2.2.1-cp312-cp312-win32.whl", hash = "sha256:889f80ef92701b9dbb224e49ec87c645ce5df3fa2cc548664eb8a25e03127a98", size = 98875 }, - { url = "https://files.pythonhosted.org/packages/ef/60/9b9638f081c6f1261e2688bd487625cd1e660d0a85bd469e91d8db969734/tomli-2.2.1-cp312-cp312-win_amd64.whl", hash = "sha256:7fc04e92e1d624a4a63c76474610238576942d6b8950a2d7f908a340494e67e4", size = 109418 }, - { url = "https://files.pythonhosted.org/packages/6e/c2/61d3e0f47e2b74ef40a68b9e6ad5984f6241a942f7cd3bbfbdbd03861ea9/tomli-2.2.1-py3-none-any.whl", hash = "sha256:cb55c73c5f4408779d0cf3eef9f762b9c9f147a77de7b258bef0a5628adc85cc", size = 14257 }, -] - [[package]] name = "typer" version = "0.15.2" @@ -1266,7 +1113,6 @@ source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "click" }, { name = "h11" }, - { name = "typing-extensions", marker = "python_full_version < '3.11'" }, ] sdist = { url = "https://files.pythonhosted.org/packages/4b/4d/938bd85e5bf2edeec766267a5015ad969730bb91e31b44021dfe8b22df6c/uvicorn-0.34.0.tar.gz", hash = "sha256:404051050cd7e905de2c9a7e61790943440b3416f49cb409f965d9dcd0fa73e9", size = 76568 } wheels = [