From fe3f5bb3de01b817530f0055872195e6e455158b Mon Sep 17 00:00:00 2001 From: Le Date: Thu, 6 Feb 2025 16:14:52 +0300 Subject: [PATCH 1/3] fixed misprint in `quick example` --- README.md | 58 ++++++++++++++++++++++++++++++------------------------- 1 file changed, 32 insertions(+), 26 deletions(-) diff --git a/README.md b/README.md index 52baa83..0b74bdf 100644 --- a/README.md +++ b/README.md @@ -70,7 +70,7 @@ Code: ```python # content of tests/test_weather.py -import invariant.testing.testing.functional as F +import invariant.testing.functional as F from invariant.testing import Trace, assert_equals def test_weather(): @@ -388,31 +388,37 @@ This section provides a detailed overview of the analyzer's components, includin **Table of Contents** -- [Use Cases](#use-cases) -- [Why Agent Debugging Matters](#why-agent-debugging-matters) -- [Why Agent Security Matters](#why-agent-security-matters) -- [Features](#features) - - [Getting Started](#getting-started) -- [Use Cases](#use-cases-1) - - [Debugging Coding Agents](#debugging-coding-agents) - - [Prevent Data Leaks In Your Productivity Agent](#prevent-data-leaks-in-your-productivity-agent) - - [Detect Vulnerabilities in Your Code Generation Agent](#detect-vulnerabilities-in-your-code-generation-agent) - - [Enforce Access Control In Your RAG-based Chat Agent](#enforce-access-control-in-your-rag-based-chat-agent) -- [Documentation](#documentation) - - [Policy Language](#policy-language) - - [Example Rule](#example-rule) - - [Trace Format](#trace-format) - - [Trace Example](#trace-example) - - [Debugging and Printing Inputs](#debugging-and-printing-inputs) - - [Custom Error Types](#custom-error-types) - - [Predicates](#predicates) - - [Semantic Tool Call Matching](#semantic-tool-call-matching) - - [Integration](#integration) - - [Analyzing Agent Traces](#analyzing-agent-traces) - - [Real-Time Monitoring of an OpenAI Agent](#real-time-monitoring-of-an-openai-agent) - - [Real-Time Monitoring of a `langchain` Agent](#real-time-monitoring-of-a-langchain-agent) - - [Automatic Issue Resolution (Handlers)](#automatic-issue-resolution-handlers) - - [Roadmap](#roadmap) +- [Quickstart](#quickstart) +- [Table Of Contents](#table-of-contents) + - [Testing](#testing) + - [A quick example](#a-quick-example) + - [Testing Features](#testing-features) + - [Explorer](#explorer) + - [Analyzer](#analyzer) + - [Use Cases](#use-cases) + - [Why Agent Debugging Matters](#why-agent-debugging-matters) + - [Why Agent Security Matters](#why-agent-security-matters) + - [Analyzer Features](#analyzer-features) + - [Getting Started](#getting-started) + - [Use Cases](#use-cases-1) + - [Debugging Coding Agents](#debugging-coding-agents) + - [Prevent Data Leaks In Your Productivity Agent](#prevent-data-leaks-in-your-productivity-agent) + - [Detect Vulnerabilities in Your Code Generation Agent](#detect-vulnerabilities-in-your-code-generation-agent) + - [Enforce Access Control In Your RAG-based Chat Agent](#enforce-access-control-in-your-rag-based-chat-agent) + - [Analyzer Documentation](#analyzer-documentation) + - [Policy Language](#policy-language) + - [Example Rule](#example-rule) + - [Trace Format](#trace-format) + - [Trace Example](#trace-example) + - [Debugging and Printing Inputs](#debugging-and-printing-inputs) + - [Custom Error Types](#custom-error-types) + - [Predicates](#predicates) + - [Semantic Tool Call Matching](#semantic-tool-call-matching) + - [Integration](#integration) + - [Analyzing Agent Traces](#analyzing-agent-traces) + - [Error Localization](#error-localization) + - [Real-Time Monitoring of an OpenAI Agent](#real-time-monitoring-of-an-openai-agent) + - [Real-Time Monitoring of a `langchain` Agent](#real-time-monitoring-of-a-langchain-agent) ### Policy Language From 99a6a2cba693093b39333f668ecf9ab81c495783 Mon Sep 17 00:00:00 2001 From: Le Date: Thu, 6 Feb 2025 17:44:58 +0300 Subject: [PATCH 2/3] added client_kwargs for Detector --- invariant/testing/custom_types/invariant_string.py | 9 ++++++++- invariant/testing/scorers/llm/clients/client_factory.py | 4 ++-- invariant/testing/scorers/llm/clients/open_ai_client.py | 4 ++-- invariant/testing/scorers/llm/detector.py | 8 +++++--- 4 files changed, 17 insertions(+), 8 deletions(-) diff --git a/invariant/testing/custom_types/invariant_string.py b/invariant/testing/custom_types/invariant_string.py index a508f61..94ca710 100644 --- a/invariant/testing/custom_types/invariant_string.py +++ b/invariant/testing/custom_types/invariant_string.py @@ -8,6 +8,7 @@ from typing import Any, Literal, Union from _pytest.python_api import ApproxBase + from invariant.testing.scorers.code import execute, is_valid_json, is_valid_python from invariant.testing.scorers.llm.classifier import Classifier from invariant.testing.scorers.llm.detector import Detector @@ -308,6 +309,7 @@ def extract( model: str = "gpt-4o", client: str = "OpenAI", use_cached_result: bool = True, + client_kwargs={}, ) -> list[InvariantString]: """Extract values from the underlying string using an LLM. @@ -322,7 +324,12 @@ def extract( use_cached_result (bool): Whether to use a cached result if available. """ - llm_detector = Detector(predicate_rule=predicate, model=model, client=client) + llm_detector = Detector( + predicate_rule=predicate, + model=model, + client=client, + client_kwargs=client_kwargs, + ) detections = llm_detector.detect(self.value, use_cached_result) ret = [] for substr, r in detections: diff --git a/invariant/testing/scorers/llm/clients/client_factory.py b/invariant/testing/scorers/llm/clients/client_factory.py index 2fa2124..3d957fb 100644 --- a/invariant/testing/scorers/llm/clients/client_factory.py +++ b/invariant/testing/scorers/llm/clients/client_factory.py @@ -9,10 +9,10 @@ class ClientFactory: """Factory for creating LLM clients.""" @staticmethod - def get(client_name: str) -> LLMClient: + def get(client_name: str, client_kwargs: dict) -> LLMClient: """Get an LLM client by name.""" if client_name == SupportedClients.OPENAI: - return OpenAIClient() + return OpenAIClient(client_kwargs) if client_name == SupportedClients.ANTHROPIC: return AnthropicClient() raise ValueError(f"Invalid client name: {client_name}") diff --git a/invariant/testing/scorers/llm/clients/open_ai_client.py b/invariant/testing/scorers/llm/clients/open_ai_client.py index f7f2279..d2348d4 100644 --- a/invariant/testing/scorers/llm/clients/open_ai_client.py +++ b/invariant/testing/scorers/llm/clients/open_ai_client.py @@ -10,9 +10,9 @@ class OpenAIClient(LLMClient): """Client for interacting with OpenAI.""" - def __init__(self): + def __init__(self, client_kwargs: dict): # Add OPENAI_API_KEY to your environment variables. - self.client = openai.OpenAI() + self.client = openai.OpenAI(**client_kwargs) def get_name(self) -> str: return "OpenAI" diff --git a/invariant/testing/scorers/llm/detector.py b/invariant/testing/scorers/llm/detector.py index 5d5b6d8..c86bd0c 100644 --- a/invariant/testing/scorers/llm/detector.py +++ b/invariant/testing/scorers/llm/detector.py @@ -4,11 +4,12 @@ import logging from typing import Any, Tuple -from invariant.testing.cache import CacheManager -from invariant.testing.custom_types.addresses import Range from openai.types.chat.parsed_chat_completion import ParsedChatCompletion from pydantic import BaseModel +from invariant.testing.cache import CacheManager +from invariant.testing.custom_types.addresses import Range + from .clients.anthropic_client import AnthropicClient from .clients.client import SupportedClients from .clients.client_factory import ClientFactory @@ -99,6 +100,7 @@ def __init__( predicate_rule: str, model: str = "gpt-4o", client: str = "OpenAI", + client_kwargs: dict = {}, ): """Instantiate Detector object. @@ -114,7 +116,7 @@ def __init__( """ self.model = model self.prompt = self._get_prompt(predicate_rule, client) - self.client = ClientFactory.get(client) + self.client = ClientFactory.get(client, client_kwargs) self.cache_manager = CacheManager( CACHE_DIRECTORY_LLM_DETECTOR, expiry=CACHE_TIMEOUT ) From 0a258e9ed988fed3ce808cff6c8c172faf9d281e Mon Sep 17 00:00:00 2001 From: Le Date: Thu, 6 Feb 2025 17:45:15 +0300 Subject: [PATCH 3/3] changed prompt for Detector --- invariant/testing/scorers/llm/detector.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/invariant/testing/scorers/llm/detector.py b/invariant/testing/scorers/llm/detector.py index c86bd0c..2b104bb 100644 --- a/invariant/testing/scorers/llm/detector.py +++ b/invariant/testing/scorers/llm/detector.py @@ -29,6 +29,16 @@ Detections: [("1", "Zurich"), ("2", "Geneva"), ("2", "Bern"), ("3", "Bern")] +You response must be in the following format: +{{ + "detections": [ + {{"line": 1, "substring": "Zurich"}}, + {{"line": 2, "substring": "Geneva"}}, + {{"line": 2, "substring": "Bern"}}, + {{"line": 3, "substring": "Bern"}} + ] +}} + Use the following predicate rule to find the detections in the next user message: {predicate_rule} """