-
Notifications
You must be signed in to change notification settings - Fork 35
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Feature initial response parsers (#165)
* Initial implementation * Add unit tests for yaml and json * Update requirements.txt * Update docstrings * Add non_empty_validator() as an alternative validation method * Replace non_empty_validator() with Field * Add LLMFunctionWithPrompt to __init__
- Loading branch information
1 parent
3ad8e47
commit d84ffb8
Showing
8 changed files
with
382 additions
and
88 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,139 @@ | ||
import json | ||
import re | ||
from typing import Any, Callable, Dict, Type, TypeVar | ||
|
||
import yaml | ||
from pydantic import BaseModel, ValidationError | ||
|
||
from ..utils import CodeParser | ||
from .llm_answer import LLMParsingException | ||
from .llm_middleware import LLMResponse | ||
|
||
T_Response = TypeVar("T_Response") | ||
LLMResponseParser = Callable[[LLMResponse], T_Response] | ||
|
||
T = TypeVar("T", bound="BaseModelResponseParser") | ||
|
||
|
||
class BaseModelResponseParser(BaseModel): | ||
"""Base class for parsing LLM responses into structured data models""" | ||
|
||
@classmethod | ||
def from_response(cls: Type[T], response: LLMResponse) -> T: | ||
""" | ||
Parse an LLM response into a structured data model. | ||
Must be implemented by subclasses to define specific parsing logic. | ||
""" | ||
raise NotImplementedError() | ||
|
||
def validator(self) -> None: | ||
""" | ||
Implement custom validation logic for the parsed data. | ||
Can be overridden by subclasses to add specific validation rules. | ||
Raise LLMParsingException to trigger local correction. | ||
Alternatively, use pydantic validation. | ||
""" | ||
pass | ||
|
||
@classmethod | ||
def create_and_validate(cls: Type[T], **kwargs) -> T: | ||
instance = cls._try_create(**kwargs) | ||
instance.validator() | ||
return instance | ||
|
||
@classmethod | ||
def _try_create(cls: Type[T], **kwargs) -> T: | ||
""" | ||
Attempt to create a BaseModel object instance. | ||
Raises an LLMParsingException if a ValidationError occurs during instantiation. | ||
""" | ||
|
||
try: | ||
return cls(**kwargs) | ||
except ValidationError as e: | ||
# LLM-friendlier version of pydantic error message without "For further information visit..." | ||
clean_exception_message = re.sub(r"For further information visit.*", "", str(e)) | ||
raise LLMParsingException(clean_exception_message) | ||
|
||
|
||
class CodeBlocksResponseParser(BaseModelResponseParser): | ||
"""Parser for responses containing multiple named code blocks""" | ||
|
||
@classmethod | ||
def from_response(cls: Type[T], response: LLMResponse) -> T: | ||
llm_response = response.value | ||
parsed_blocks: Dict[str, Any] = {} | ||
|
||
for field_name in cls.model_fields.keys(): | ||
block = CodeParser.find_first(field_name, llm_response) | ||
if block is None: | ||
raise LLMParsingException(f"`{field_name}` block is not found") | ||
parsed_blocks[field_name] = block.code.strip() | ||
|
||
return cls.create_and_validate(**parsed_blocks) | ||
|
||
|
||
class YAMLBlockResponseParser(BaseModelResponseParser): | ||
"""Parser for responses containing a single YAML code block""" | ||
|
||
@classmethod | ||
def from_response(cls: Type[T], response: LLMResponse) -> T: | ||
llm_response = response.value | ||
|
||
yaml_block = CodeParser.find_first("yaml", llm_response) | ||
if yaml_block is None: | ||
raise LLMParsingException("yaml block is not found") | ||
|
||
yaml_content = YAMLResponseParser.parse(yaml_block.code) | ||
return cls.create_and_validate(**yaml_content) | ||
|
||
|
||
class YAMLResponseParser(BaseModelResponseParser): | ||
"""Parser for responses containing raw YAML content""" | ||
|
||
@classmethod | ||
def from_response(cls: Type[T], response: LLMResponse) -> T: | ||
llm_response = response.value | ||
|
||
yaml_content = YAMLResponseParser.parse(llm_response) | ||
return cls.create_and_validate(**yaml_content) | ||
|
||
@staticmethod | ||
def parse(content: str) -> Dict[str, Any]: | ||
try: | ||
return yaml.safe_load(content) | ||
except yaml.YAMLError as e: | ||
raise LLMParsingException(f"Error while parsing yaml: {e}") | ||
|
||
|
||
class JSONBlockResponseParser(BaseModelResponseParser): | ||
"""Parser for responses containing a single JSON code block""" | ||
|
||
@classmethod | ||
def from_response(cls: Type[T], response: LLMResponse) -> T: | ||
llm_response = response.value | ||
|
||
json_block = CodeParser.find_first("json", llm_response) | ||
if json_block is None: | ||
raise LLMParsingException("json block is not found") | ||
|
||
json_content = JSONResponseParser.parse(json_block.code) | ||
return cls.create_and_validate(**json_content) | ||
|
||
|
||
class JSONResponseParser(BaseModelResponseParser): | ||
"""Parser for responses containing raw JSON content""" | ||
|
||
@classmethod | ||
def from_response(cls: Type[T], response: LLMResponse) -> T: | ||
llm_response = response.value | ||
|
||
json_content = JSONResponseParser.parse(llm_response) | ||
return cls.create_and_validate(**json_content) | ||
|
||
@staticmethod | ||
def parse(content: str) -> Dict[str, Any]: | ||
try: | ||
return json.loads(content) | ||
except json.JSONDecodeError as e: | ||
raise LLMParsingException(f"Error while parsing json: {e}") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -22,3 +22,5 @@ GoogleNews>=1.6.10 | |
pymediawiki~=0.7.3 | ||
beautifulsoup4~=4.12.2 | ||
|
||
# Response Parsers | ||
pydantic==2.8.* |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.