From 0b51b22deda491fce37b468fe75d6bd53c4b2c65 Mon Sep 17 00:00:00 2001 From: Dmytro Nikolaiev Date: Wed, 11 Sep 2024 14:26:21 -0400 Subject: [PATCH 1/7] Initial implementation --- council/llm/llm_function.py | 54 +------- council/llm/llm_response_parser.py | 116 ++++++++++++++++++ tests/integration/llm/test_llm_function.py | 8 +- ....py => test_llm_response_parser_blocks.py} | 68 +++++----- 4 files changed, 159 insertions(+), 87 deletions(-) create mode 100644 council/llm/llm_response_parser.py rename tests/unit/llm/{test_code_blocks_response_parser.py => test_llm_response_parser_blocks.py} (69%) diff --git a/council/llm/llm_function.py b/council/llm/llm_function.py index f86623c5..3caf9027 100644 --- a/council/llm/llm_function.py +++ b/council/llm/llm_function.py @@ -1,61 +1,11 @@ -from dataclasses import dataclass, is_dataclass -from typing import Any, Callable, Dict, Generic, Iterable, List, Optional, Sequence, Type, TypeVar, Union +from typing import Any, Generic, Iterable, List, Optional, Sequence, Union from council.contexts import LLMContext -from ..utils import CodeParser from .llm_answer import LLMParsingException from .llm_base import LLMBase, LLMMessage from .llm_middleware import LLMMiddleware, LLMMiddlewareChain, LLMRequest, LLMResponse - -T_Response = TypeVar("T_Response") -LLMResponseParser = Callable[[LLMResponse], T_Response] - -T_Dataclass = TypeVar("T_Dataclass") - - -def code_blocks_response_parser(cls: Type[T_Dataclass]) -> Type[T_Dataclass]: - """ - Decorator providing an automatic parsing of LLMResponse translating code blocks content into class fields. - Implement validate(self) to provide additional validation functionality. - """ - if not is_dataclass(cls): - cls = dataclass(cls) - - def from_response(response: LLMResponse) -> T_Dataclass: - llm_response = response.value - parsed_blocks: Dict[str, Any] = {} - - for field_name, field in cls.__dataclass_fields__.items(): # type: ignore - block = CodeParser.find_first(field_name, llm_response) - if block is None: - raise LLMParsingException(f"`{field_name}` block is not found") - - field_type = field.type - value = block.code.strip() - if field_type is str: - parsed_blocks[field_name] = value - elif field_type is bool: - if value.lower() not in ["true", "false"]: - raise LLMParsingException(f"Cannot convert value `{value}` to bool for field `{field_name}`") - parsed_blocks[field_name] = value.lower() == "true" - elif field_type in [int, float]: - try: - parsed_blocks[field_name] = field_type(value) - except ValueError: - raise LLMParsingException( - f"Cannot convert value `{value}` to {field_type.__name__} for field `{field_name}`" - ) - else: - raise ValueError(f"Unsupported type `{field_type.__name__}` for field `{field_name}`") - - instance = cls(**parsed_blocks) # code blocks in LLM response template must match class fields - if hasattr(instance, "validate"): - instance.validate() - return instance - - setattr(cls, "from_response", staticmethod(from_response)) - return cls +from .llm_response_parser import LLMResponseParser, T_Response class LLMFunctionError(Exception): diff --git a/council/llm/llm_response_parser.py b/council/llm/llm_response_parser.py new file mode 100644 index 00000000..64dbe875 --- /dev/null +++ b/council/llm/llm_response_parser.py @@ -0,0 +1,116 @@ +import json +import re +from typing import Any, Callable, Dict, Type, TypeVar + +import yaml +from pydantic import BaseModel, ValidationError + +from ..utils import CodeParser +from .llm_answer import LLMParsingException +from .llm_middleware import LLMResponse + +T_Response = TypeVar("T_Response") +LLMResponseParser = Callable[[LLMResponse], T_Response] + +T = TypeVar("T", bound="BaseModelResponseParser") + + +class BaseModelResponseParser(BaseModel): + @classmethod + def from_response(cls: Type[T], response: LLMResponse) -> T: + """Implement parsing functionality""" + raise NotImplementedError() + + def validator(self) -> None: + """Implement custom validation functionality - raise LLMParsingException to trigger local correction""" + pass + + @classmethod + def create_and_validate(cls: Type[T], **kwargs) -> T: + instance = cls._try_create(**kwargs) + instance.validator() + return instance + + @classmethod + def _try_create(cls: Type[T], **kwargs) -> T: + """Try to create BaseModel object instance and raise LLMParsingException if any ValidationError occurs""" + + try: + return cls(**kwargs) + except ValidationError as e: + # LLM-friendlier version of pydantic error message without "For further information visit..." + clean_exception_message = re.sub(r"For further information visit.*", "", str(e)) + raise LLMParsingException(clean_exception_message) + + +class CodeBlocksResponseParser(BaseModelResponseParser): + @classmethod + def from_response(cls: Type[T], response: LLMResponse) -> T: + llm_response = response.value + parsed_blocks: Dict[str, Any] = {} + + for field_name in cls.model_fields.keys(): + block = CodeParser.find_first(field_name, llm_response) + if block is None: + raise LLMParsingException(f"`{field_name}` block is not found") + parsed_blocks[field_name] = block.code.strip() + + return cls.create_and_validate(**parsed_blocks) + + +class YAMLBlockResponseParser(BaseModelResponseParser): + @classmethod + def from_response(cls: Type[T], response: LLMResponse) -> T: + llm_response = response.value + + yaml_block = CodeParser.find_first("yaml", llm_response) + if yaml_block is None: + raise LLMParsingException("yaml block is not found") + + yaml_content = YAMLResponseParser.parse(yaml_block.code) + return cls.create_and_validate(**yaml_content) + + +class YAMLResponseParser(BaseModelResponseParser): + @classmethod + def from_response(cls: Type[T], response: LLMResponse) -> T: + llm_response = response.value + + yaml_content = YAMLResponseParser.parse(llm_response) + return cls.create_and_validate(**yaml_content) + + @staticmethod + def parse(content: str) -> Dict[str, Any]: + try: + return yaml.safe_load(content) + except yaml.YAMLError as e: + raise LLMParsingException(f"Error while parsing yaml: {e}") + + +class JSONBlockResponseParser(BaseModelResponseParser): + @classmethod + def from_response(cls: Type[T], response: LLMResponse) -> T: + llm_response = response.value + + json_block = CodeParser.find_first("json", llm_response) + if json_block is None: + raise LLMParsingException("json block is not found") + + json_content = JSONResponseParser.parse(json_block.code) + return cls.create_and_validate(**json_content) + + +class JSONResponseParser(BaseModelResponseParser): + @classmethod + def from_response(cls: Type[T], response: LLMResponse) -> T: + llm_response = response.value + + json_content = JSONResponseParser.parse(llm_response) + return cls.create_and_validate(**json_content) + + @staticmethod + def parse(content: str) -> Dict[str, Any]: + try: + return json.loads(content) + except json.JSONDecodeError as e: + raise LLMParsingException(f"Error while parsing json: {e}") diff --git a/tests/integration/llm/test_llm_function.py b/tests/integration/llm/test_llm_function.py index aac05e43..0e021dcf 100644 --- a/tests/integration/llm/test_llm_function.py +++ b/tests/integration/llm/test_llm_function.py @@ -4,7 +4,8 @@ from council import AzureLLM from council.llm import LLMParsingException, LLMMessage -from council.llm.llm_function import LLMFunction, code_blocks_response_parser +from council.llm.llm_function import LLMFunction +from council.llm.llm_response_parser import CodeBlocksResponseParser from council.prompt import LLMPromptConfigObject from tests import get_data_filename from tests.unit import LLMPrompts @@ -14,13 +15,12 @@ USER = prompt_config.get_user_prompt_template("default") -@code_blocks_response_parser -class SQLResult: +class SQLResult(CodeBlocksResponseParser): solved: bool explanation: str sql: str - def validate(self) -> None: + def validator(self) -> None: if "limit" not in self.sql.lower(): raise LLMParsingException("Generated SQL query should contain a LIMIT clause") diff --git a/tests/unit/llm/test_code_blocks_response_parser.py b/tests/unit/llm/test_llm_response_parser_blocks.py similarity index 69% rename from tests/unit/llm/test_code_blocks_response_parser.py rename to tests/unit/llm/test_llm_response_parser_blocks.py index fc88fa4f..14e64250 100644 --- a/tests/unit/llm/test_code_blocks_response_parser.py +++ b/tests/unit/llm/test_llm_response_parser_blocks.py @@ -1,31 +1,31 @@ import unittest +from pydantic import field_validator + from council.llm import LLMParsingException -from council.llm.llm_function import ( - code_blocks_response_parser, - LLMFunction, - FunctionOutOfRetryError, -) +from council.llm.llm_function import LLMFunction, FunctionOutOfRetryError +from council.llm.llm_response_parser import CodeBlocksResponseParser from council.mocks import MockLLM, MockMultipleResponses -@code_blocks_response_parser -class Response: +class Response(CodeBlocksResponseParser): text: str flag: bool age: int number: float - def validate(self) -> None: + @field_validator("text") + @classmethod + def n(cls, text: str) -> str: + if text == "incorrect": + raise ValueError(f"Incorrect `text` value: `{text}`") + return text + + def validator(self) -> None: if self.age < 0: raise LLMParsingException(f"Age must be a positive number; got `{self.age}`") -@code_blocks_response_parser -class BadResponse: - complex_type: Response - - def format_response(text: str, flag: str, age: str, number: str) -> str: return f""" ```text @@ -65,41 +65,47 @@ def test_wrong_bool(self): with self.assertRaises(FunctionOutOfRetryError) as e: _ = execute_mock_llm_func(llm, Response.from_response) - assert str(e.exception).strip().endswith("Cannot convert value `not-a-bool` to bool for field `flag`") + assert ( + "Input should be a valid boolean, unable to interpret input " + "[type=bool_parsing, input_value='not-a-bool', input_type=str]" + ) in str(e.exception) def test_wrong_int(self): llm = MockLLM.from_response(format_response(text="Some text", flag="true", age="not-an-int", number="3.14")) with self.assertRaises(FunctionOutOfRetryError) as e: _ = execute_mock_llm_func(llm, Response.from_response) - assert str(e.exception).strip().endswith("Cannot convert value `not-an-int` to int for field `age`") + assert ( + "Input should be a valid integer, unable to parse string as an integer " + "[type=int_parsing, input_value='not-an-int', input_type=str]" + ) in str(e.exception) - def test_validate_int(self): - llm = MockLLM.from_response(format_response(text="Some text", flag="true", age="-5", number="3.14")) + def test_wrong_float(self): + llm = MockLLM.from_response(format_response(text="Some text", flag="true", age="34", number="not-a-float")) with self.assertRaises(FunctionOutOfRetryError) as e: _ = execute_mock_llm_func(llm, Response.from_response) - assert str(e.exception).strip().endswith("Age must be a positive number; got `-5`") + assert ( + "Input should be a valid number, unable to parse string as a number " + "[type=float_parsing, input_value='not-a-float', input_type=str]" + ) in str(e.exception) - def test_wrong_float(self): - llm = MockLLM.from_response(format_response(text="Some text", flag="true", age="34", number="not-a-float")) + def test_pydentic_validation(self): + llm = MockLLM.from_response(format_response(text="incorrect", flag="true", age="34", number="3.14")) with self.assertRaises(FunctionOutOfRetryError) as e: _ = execute_mock_llm_func(llm, Response.from_response) - assert str(e.exception).strip().endswith("Cannot convert value `not-a-float` to float for field `number`") + assert ( + "Value error, Incorrect `text` value: `incorrect` " + "[type=value_error, input_value='incorrect', input_type=str]" + ) in str(e.exception) - def test_wrong_type(self): - llm = MockLLM.from_response( - """ -```complex_type -Some text -``` -""" - ) + def test_custom_validation(self): + llm = MockLLM.from_response(format_response(text="Some text", flag="true", age="-5", number="3.14")) with self.assertRaises(FunctionOutOfRetryError) as e: - _ = execute_mock_llm_func(llm, BadResponse.from_response) + _ = execute_mock_llm_func(llm, Response.from_response) - assert str(e.exception).strip().endswith("Unsupported type `Response` for field `complex_type`") + assert str(e.exception).strip().endswith("Age must be a positive number; got `-5`") def test_correct(self): llm = MockLLM.from_response(format_response(text="Some text", flag="true", age="34", number="3.14")) From 8dbb21f8d74581fd66afcccb24b5284a70a2ea4d Mon Sep 17 00:00:00 2001 From: Dmytro Nikolaiev Date: Wed, 11 Sep 2024 14:59:06 -0400 Subject: [PATCH 2/7] Add unit tests for yaml and json --- .../llm/test_llm_response_parser_blocks.py | 28 +-- .../llm/test_llm_response_parser_yaml_json.py | 184 ++++++++++++++++++ 2 files changed, 200 insertions(+), 12 deletions(-) create mode 100644 tests/unit/llm/test_llm_response_parser_yaml_json.py diff --git a/tests/unit/llm/test_llm_response_parser_blocks.py b/tests/unit/llm/test_llm_response_parser_blocks.py index 14e64250..1c6f392c 100644 --- a/tests/unit/llm/test_llm_response_parser_blocks.py +++ b/tests/unit/llm/test_llm_response_parser_blocks.py @@ -65,40 +65,44 @@ def test_wrong_bool(self): with self.assertRaises(FunctionOutOfRetryError) as e: _ = execute_mock_llm_func(llm, Response.from_response) - assert ( + self.assertIn( "Input should be a valid boolean, unable to interpret input " - "[type=bool_parsing, input_value='not-a-bool', input_type=str]" - ) in str(e.exception) + "[type=bool_parsing, input_value='not-a-bool', input_type=str]", + str(e.exception), + ) def test_wrong_int(self): llm = MockLLM.from_response(format_response(text="Some text", flag="true", age="not-an-int", number="3.14")) with self.assertRaises(FunctionOutOfRetryError) as e: _ = execute_mock_llm_func(llm, Response.from_response) - assert ( + self.assertIn( "Input should be a valid integer, unable to parse string as an integer " - "[type=int_parsing, input_value='not-an-int', input_type=str]" - ) in str(e.exception) + "[type=int_parsing, input_value='not-an-int', input_type=str]", + str(e.exception), + ) def test_wrong_float(self): llm = MockLLM.from_response(format_response(text="Some text", flag="true", age="34", number="not-a-float")) with self.assertRaises(FunctionOutOfRetryError) as e: _ = execute_mock_llm_func(llm, Response.from_response) - assert ( + self.assertIn( "Input should be a valid number, unable to parse string as a number " - "[type=float_parsing, input_value='not-a-float', input_type=str]" - ) in str(e.exception) + "[type=float_parsing, input_value='not-a-float', input_type=str]", + str(e.exception), + ) def test_pydentic_validation(self): llm = MockLLM.from_response(format_response(text="incorrect", flag="true", age="34", number="3.14")) with self.assertRaises(FunctionOutOfRetryError) as e: _ = execute_mock_llm_func(llm, Response.from_response) - assert ( + self.assertIn( "Value error, Incorrect `text` value: `incorrect` " - "[type=value_error, input_value='incorrect', input_type=str]" - ) in str(e.exception) + "[type=value_error, input_value='incorrect', input_type=str]", + str(e.exception), + ) def test_custom_validation(self): llm = MockLLM.from_response(format_response(text="Some text", flag="true", age="-5", number="3.14")) diff --git a/tests/unit/llm/test_llm_response_parser_yaml_json.py b/tests/unit/llm/test_llm_response_parser_yaml_json.py new file mode 100644 index 00000000..e12d33c9 --- /dev/null +++ b/tests/unit/llm/test_llm_response_parser_yaml_json.py @@ -0,0 +1,184 @@ +import json +import unittest +from typing import Dict, List, Any, Literal, Optional + +import yaml +from pydantic import BaseModel + +from council.llm.llm_function import FunctionOutOfRetryError +from council.llm.llm_response_parser import YAMLBlockResponseParser, JSONBlockResponseParser +from council.mocks import MockLLM +from tests.unit.llm.test_llm_response_parser_blocks import execute_mock_llm_func + + +class NumberReasoningPair(BaseModel): + number: float + reasoning: str + + +class YAMLComplexResponse(YAMLBlockResponseParser): + mode: Literal["mode_one", "mode_two"] + pairs: List[NumberReasoningPair] + nested_dict: Dict[str, Any] + value_with_default: int = 48 + + +class JSONComplexResponse(JSONBlockResponseParser): + mode: Literal["mode_one", "mode_two"] + pairs: List[NumberReasoningPair] + nested_dict: Dict[str, Any] + value_with_default: int = 48 + + +def format_dict(mode: str, pairs: Optional[List[Dict[str, Any]]], value_with_default: Optional[int]) -> Dict[str, Any]: + data: Dict[str, Any] = {"mode": mode, "nested_dict": {"abc": "xyz", "inner_list": [1, 2, 3]}} + if pairs is not None: + data["pairs"] = pairs + if value_with_default is not None: + data["value_with_default"] = value_with_default + + return data + + +def format_response_yaml( + mode: str, pairs: Optional[List[Dict[str, Any]]] = None, value_with_default: Optional[int] = None +) -> str: + + return f"```yaml\n{yaml.dump(format_dict(mode, pairs, value_with_default))}\n```" + + +def format_response_json( + mode: str, pairs: Optional[List[Dict[str, Any]]] = None, value_with_default: Optional[int] = None +) -> str: + + return f"```json\n{json.dumps(format_dict(mode, pairs, value_with_default))}\n```" + + +class TestYAMLBlockResponseParser(unittest.TestCase): + def test_no_block(self): + llm = MockLLM.from_response("") + + with self.assertRaises(FunctionOutOfRetryError) as e: + _ = execute_mock_llm_func(llm, YAMLComplexResponse.from_response) + + assert str(e.exception).strip().endswith("yaml block is not found") + + def test_incorrect_yaml(self): + llm = MockLLM.from_response( + """ +```yaml +this_yaml: is + not: parsable +``` +""" + ) + + with self.assertRaises(FunctionOutOfRetryError) as e: + _ = execute_mock_llm_func(llm, YAMLComplexResponse.from_response) + + assert "Error while parsing yaml:" in str(e.exception).strip() + + def test_invalid_mode(self): + llm = MockLLM.from_response( + format_response_yaml(mode="invalid_mode", pairs=[{"number": 1.0, "reasoning": "test"}]) + ) + + with self.assertRaises(FunctionOutOfRetryError) as e: + _ = execute_mock_llm_func(llm, YAMLComplexResponse.from_response) + + self.assertIn("Input should be 'mode_one' or 'mode_two'", str(e.exception)) + + def test_missing_required_field(self): + llm = MockLLM.from_response(format_response_yaml(mode="mode_one")) + + with self.assertRaises(FunctionOutOfRetryError) as e: + _ = execute_mock_llm_func(llm, YAMLComplexResponse.from_response) + + self.assertIn("Field required", str(e.exception)) + self.assertIn("pairs", str(e.exception)) + + def test_invalid_number_in_pair(self): + llm = MockLLM.from_response( + format_response_yaml(mode="mode_one", pairs=[{"number": "not_a_number", "reasoning": "test"}]) + ) + + with self.assertRaises(FunctionOutOfRetryError) as e: + _ = execute_mock_llm_func(llm, YAMLComplexResponse.from_response) + + self.assertIn("Input should be a valid number", str(e.exception)) + self.assertIn("not_a_number", str(e.exception)) + + def test_correct(self): + llm = MockLLM.from_response( + format_response_yaml( + mode="mode_one", + pairs=[{"number": 3.14, "reasoning": "some text"}, {"number": 5.4, "reasoning": "more text"}], + value_with_default=123, + ) + ) + response = execute_mock_llm_func(llm, YAMLComplexResponse.from_response) + + self.assertIsInstance(response, YAMLComplexResponse) + self.assertEqual(response.mode, "mode_one") + self.assertIsInstance(response.pairs[0], NumberReasoningPair) + self.assertEqual( + response.pairs, + [ + NumberReasoningPair(number=3.14, reasoning="some text"), + NumberReasoningPair(number=5.4, reasoning="more text"), + ], + ) + self.assertEqual(response.nested_dict["abc"], "xyz") + self.assertEqual(response.nested_dict["inner_list"], [1, 2, 3]) + self.assertEqual(response.value_with_default, 123) + + def test_default_value(self): + llm = MockLLM.from_response( + format_response_yaml( + mode="mode_one", + pairs=[{"number": 3.14, "reasoning": "some text"}, {"number": 5.4, "reasoning": "more text"}], + ) + ) + response = execute_mock_llm_func(llm, YAMLComplexResponse.from_response) + + self.assertIsInstance(response, YAMLComplexResponse) + self.assertEqual(response.value_with_default, 48) + + +class TestJSONBlockResponseParser(unittest.TestCase): + def test_no_block(self): + llm = MockLLM.from_response("") + + with self.assertRaises(FunctionOutOfRetryError) as e: + _ = execute_mock_llm_func(llm, JSONComplexResponse.from_response) + + assert str(e.exception).strip().endswith("json block is not found") + + def test_incorrect_yaml(self): + llm = MockLLM.from_response( + """ +```json +this is non parsable +``` +""" + ) + + with self.assertRaises(FunctionOutOfRetryError) as e: + _ = execute_mock_llm_func(llm, JSONComplexResponse.from_response) + + assert "Error while parsing json:" in str(e.exception).strip() + + def test_correct(self): + llm = MockLLM.from_response( + format_response_json(mode="mode_one", pairs=[{"number": 7.0, "reasoning": "some text"}]) + ) + response = execute_mock_llm_func(llm, JSONComplexResponse.from_response) + + self.assertIsInstance(response, JSONComplexResponse) + self.assertEqual(response.mode, "mode_one") + self.assertIsInstance(response.pairs[0], NumberReasoningPair) + self.assertEqual( + response.pairs, + [NumberReasoningPair(number=7.0, reasoning="some text")], + ) + self.assertEqual(response.value_with_default, 48) From 05555d97d0dbd50ac5704d885b65d910178f23e8 Mon Sep 17 00:00:00 2001 From: Dmytro Nikolaiev Date: Wed, 11 Sep 2024 14:59:57 -0400 Subject: [PATCH 3/7] Update requirements.txt --- requirements.txt | 2 ++ 1 file changed, 2 insertions(+) diff --git a/requirements.txt b/requirements.txt index f4c07fec..6b9d5dc9 100644 --- a/requirements.txt +++ b/requirements.txt @@ -22,3 +22,5 @@ GoogleNews>=1.6.10 pymediawiki~=0.7.3 beautifulsoup4~=4.12.2 +# Response Parsers +pydantic==2.8.* \ No newline at end of file From 1410a8b5825286fc0daccd5a3faab69d292f2249 Mon Sep 17 00:00:00 2001 From: Dmytro Nikolaiev Date: Wed, 11 Sep 2024 15:10:08 -0400 Subject: [PATCH 4/7] Update docstrings --- council/llm/llm_response_parser.py | 29 ++++++++++++++++++++++++++--- 1 file changed, 26 insertions(+), 3 deletions(-) diff --git a/council/llm/llm_response_parser.py b/council/llm/llm_response_parser.py index 64dbe875..2bef4337 100644 --- a/council/llm/llm_response_parser.py +++ b/council/llm/llm_response_parser.py @@ -16,13 +16,23 @@ class BaseModelResponseParser(BaseModel): + """Base class for parsing LLM responses into structured data models""" + @classmethod def from_response(cls: Type[T], response: LLMResponse) -> T: - """Implement parsing functionality""" + """ + Parse an LLM response into a structured data model. + Must be implemented by subclasses to define specific parsing logic. + """ raise NotImplementedError() def validator(self) -> None: - """Implement custom validation functionality - raise LLMParsingException to trigger local correction""" + """ + Implement custom validation logic for the parsed data. + Can be overridden by subclasses to add specific validation rules. + Raise LLMParsingException to trigger local correction. + Alternatively, use pydantic validation. + """ pass @classmethod @@ -33,7 +43,10 @@ def create_and_validate(cls: Type[T], **kwargs) -> T: @classmethod def _try_create(cls: Type[T], **kwargs) -> T: - """Try to create BaseModel object instance and raise LLMParsingException if any ValidationError occurs""" + """ + Attempt to create a BaseModel object instance. + Raises an LLMParsingException if a ValidationError occurs during instantiation. + """ try: return cls(**kwargs) @@ -44,6 +57,8 @@ def _try_create(cls: Type[T], **kwargs) -> T: class CodeBlocksResponseParser(BaseModelResponseParser): + """Parser for responses containing multiple named code blocks""" + @classmethod def from_response(cls: Type[T], response: LLMResponse) -> T: llm_response = response.value @@ -59,6 +74,8 @@ def from_response(cls: Type[T], response: LLMResponse) -> T: class YAMLBlockResponseParser(BaseModelResponseParser): + """Parser for responses containing a single YAML code block""" + @classmethod def from_response(cls: Type[T], response: LLMResponse) -> T: llm_response = response.value @@ -72,6 +89,8 @@ def from_response(cls: Type[T], response: LLMResponse) -> T: class YAMLResponseParser(BaseModelResponseParser): + """Parser for responses containing raw YAML content""" + @classmethod def from_response(cls: Type[T], response: LLMResponse) -> T: llm_response = response.value @@ -88,6 +107,8 @@ def parse(content: str) -> Dict[str, Any]: class JSONBlockResponseParser(BaseModelResponseParser): + """Parser for responses containing a single JSON code block""" + @classmethod def from_response(cls: Type[T], response: LLMResponse) -> T: llm_response = response.value @@ -101,6 +122,8 @@ def from_response(cls: Type[T], response: LLMResponse) -> T: class JSONResponseParser(BaseModelResponseParser): + """Parser for responses containing raw JSON content""" + @classmethod def from_response(cls: Type[T], response: LLMResponse) -> T: llm_response = response.value From 1d31f8cd4799e377d45f078150f0019413493bf0 Mon Sep 17 00:00:00 2001 From: Dmytro Nikolaiev Date: Wed, 18 Sep 2024 13:01:43 -0400 Subject: [PATCH 5/7] Add non_empty_validator() as an alternative validation method --- council/llm/llm_response_parser.py | 8 ++++++++ tests/unit/llm/test_llm_response_parser_blocks.py | 14 +++++++++++++- .../unit/llm/test_llm_response_parser_yaml_json.py | 2 -- 3 files changed, 21 insertions(+), 3 deletions(-) diff --git a/council/llm/llm_response_parser.py b/council/llm/llm_response_parser.py index 2bef4337..c3b7b2c5 100644 --- a/council/llm/llm_response_parser.py +++ b/council/llm/llm_response_parser.py @@ -4,6 +4,7 @@ import yaml from pydantic import BaseModel, ValidationError +from pydantic_core.core_schema import ValidationInfo from ..utils import CodeParser from .llm_answer import LLMParsingException @@ -137,3 +138,10 @@ def parse(content: str) -> Dict[str, Any]: return json.loads(content) except json.JSONDecodeError as e: raise LLMParsingException(f"Error while parsing json: {e}") + + +def non_empty_validator(value: str, v: ValidationInfo) -> str: + """pydantic field validator for non-empty strings""" + if not value.strip(): + raise ValueError(f"`{v.field_name}` string must not be empty") + return value diff --git a/tests/unit/llm/test_llm_response_parser_blocks.py b/tests/unit/llm/test_llm_response_parser_blocks.py index 1c6f392c..b15e41c8 100644 --- a/tests/unit/llm/test_llm_response_parser_blocks.py +++ b/tests/unit/llm/test_llm_response_parser_blocks.py @@ -4,7 +4,7 @@ from council.llm import LLMParsingException from council.llm.llm_function import LLMFunction, FunctionOutOfRetryError -from council.llm.llm_response_parser import CodeBlocksResponseParser +from council.llm.llm_response_parser import CodeBlocksResponseParser, non_empty_validator from council.mocks import MockLLM, MockMultipleResponses @@ -14,6 +14,8 @@ class Response(CodeBlocksResponseParser): age: int number: float + _text = field_validator("text")(non_empty_validator) + @field_validator("text") @classmethod def n(cls, text: str) -> str: @@ -104,6 +106,16 @@ def test_pydentic_validation(self): str(e.exception), ) + def test_non_empty_validator(self): + llm = MockLLM.from_response(format_response(text=" ", flag="true", age="34", number="3.14")) + with self.assertRaises(FunctionOutOfRetryError) as e: + _ = execute_mock_llm_func(llm, Response.from_response) + + self.assertIn( + "`text` string must not be empty", + str(e.exception), + ) + def test_custom_validation(self): llm = MockLLM.from_response(format_response(text="Some text", flag="true", age="-5", number="3.14")) with self.assertRaises(FunctionOutOfRetryError) as e: diff --git a/tests/unit/llm/test_llm_response_parser_yaml_json.py b/tests/unit/llm/test_llm_response_parser_yaml_json.py index e12d33c9..155c4289 100644 --- a/tests/unit/llm/test_llm_response_parser_yaml_json.py +++ b/tests/unit/llm/test_llm_response_parser_yaml_json.py @@ -43,14 +43,12 @@ def format_dict(mode: str, pairs: Optional[List[Dict[str, Any]]], value_with_def def format_response_yaml( mode: str, pairs: Optional[List[Dict[str, Any]]] = None, value_with_default: Optional[int] = None ) -> str: - return f"```yaml\n{yaml.dump(format_dict(mode, pairs, value_with_default))}\n```" def format_response_json( mode: str, pairs: Optional[List[Dict[str, Any]]] = None, value_with_default: Optional[int] = None ) -> str: - return f"```json\n{json.dumps(format_dict(mode, pairs, value_with_default))}\n```" From 02b17612d5ef05017e96ef04512e203668f1387e Mon Sep 17 00:00:00 2001 From: Dmytro Nikolaiev Date: Wed, 18 Sep 2024 15:39:48 -0400 Subject: [PATCH 6/7] Replace non_empty_validator() with Field --- council/llm/llm_response_parser.py | 8 -------- tests/unit/llm/test_llm_response_parser_blocks.py | 10 ++++------ 2 files changed, 4 insertions(+), 14 deletions(-) diff --git a/council/llm/llm_response_parser.py b/council/llm/llm_response_parser.py index c3b7b2c5..2bef4337 100644 --- a/council/llm/llm_response_parser.py +++ b/council/llm/llm_response_parser.py @@ -4,7 +4,6 @@ import yaml from pydantic import BaseModel, ValidationError -from pydantic_core.core_schema import ValidationInfo from ..utils import CodeParser from .llm_answer import LLMParsingException @@ -138,10 +137,3 @@ def parse(content: str) -> Dict[str, Any]: return json.loads(content) except json.JSONDecodeError as e: raise LLMParsingException(f"Error while parsing json: {e}") - - -def non_empty_validator(value: str, v: ValidationInfo) -> str: - """pydantic field validator for non-empty strings""" - if not value.strip(): - raise ValueError(f"`{v.field_name}` string must not be empty") - return value diff --git a/tests/unit/llm/test_llm_response_parser_blocks.py b/tests/unit/llm/test_llm_response_parser_blocks.py index b15e41c8..798685ff 100644 --- a/tests/unit/llm/test_llm_response_parser_blocks.py +++ b/tests/unit/llm/test_llm_response_parser_blocks.py @@ -1,21 +1,19 @@ import unittest -from pydantic import field_validator +from pydantic import field_validator, Field from council.llm import LLMParsingException from council.llm.llm_function import LLMFunction, FunctionOutOfRetryError -from council.llm.llm_response_parser import CodeBlocksResponseParser, non_empty_validator +from council.llm.llm_response_parser import CodeBlocksResponseParser from council.mocks import MockLLM, MockMultipleResponses class Response(CodeBlocksResponseParser): - text: str + text: str = Field(..., min_length=1) flag: bool age: int number: float - _text = field_validator("text")(non_empty_validator) - @field_validator("text") @classmethod def n(cls, text: str) -> str: @@ -112,7 +110,7 @@ def test_non_empty_validator(self): _ = execute_mock_llm_func(llm, Response.from_response) self.assertIn( - "`text` string must not be empty", + "String should have at least 1 character [type=string_too_short, input_value='', input_type=str]", str(e.exception), ) From 03a76207f572406d8a9e4c01c8a9e1a28b1b7334 Mon Sep 17 00:00:00 2001 From: Dmytro Nikolaiev Date: Thu, 19 Sep 2024 10:59:24 -0400 Subject: [PATCH 7/7] Add LLMFunctionWithPrompt to __init__ --- council/llm/__init__.py | 1 + tests/integration/llm/test_llm_function_with_prompt.py | 2 +- tests/unit/llm/test_llm_response_parser_yaml_json.py | 2 +- 3 files changed, 3 insertions(+), 2 deletions(-) diff --git a/council/llm/__init__.py b/council/llm/__init__.py index a8b1e56e..c19d55f8 100644 --- a/council/llm/__init__.py +++ b/council/llm/__init__.py @@ -20,6 +20,7 @@ ExecuteLLMRequest, ) from .llm_function import LLMFunction, LLMFunctionError, FunctionOutOfRetryError +from .llm_function_with_prompt import LLMFunctionWithPrompt from .monitored_llm import MonitoredLLM from .chat_gpt_configuration import ChatGPTConfigurationBase diff --git a/tests/integration/llm/test_llm_function_with_prompt.py b/tests/integration/llm/test_llm_function_with_prompt.py index e19118b2..827c2546 100644 --- a/tests/integration/llm/test_llm_function_with_prompt.py +++ b/tests/integration/llm/test_llm_function_with_prompt.py @@ -3,7 +3,7 @@ import dotenv from council import AzureLLM -from council.llm.llm_function_with_prompt import LLMFunctionWithPrompt +from council.llm import LLMFunctionWithPrompt from council.prompt import LLMPromptConfigObject from tests import get_data_filename from tests.integration.llm.test_llm_function import SQLResult diff --git a/tests/unit/llm/test_llm_response_parser_yaml_json.py b/tests/unit/llm/test_llm_response_parser_yaml_json.py index 155c4289..94dd207b 100644 --- a/tests/unit/llm/test_llm_response_parser_yaml_json.py +++ b/tests/unit/llm/test_llm_response_parser_yaml_json.py @@ -152,7 +152,7 @@ def test_no_block(self): assert str(e.exception).strip().endswith("json block is not found") - def test_incorrect_yaml(self): + def test_incorrect_json(self): llm = MockLLM.from_response( """ ```json