-
Notifications
You must be signed in to change notification settings - Fork 35
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Feature initial response parsers #165
Merged
Merged
Changes from all commits
Commits
Show all changes
9 commits
Select commit
Hold shift + click to select a range
0b51b22
Initial implementation
Winston-503 8dbb21f
Add unit tests for yaml and json
Winston-503 05555d9
Update requirements.txt
Winston-503 1410a8b
Update docstrings
Winston-503 1d31f8c
Add non_empty_validator() as an alternative validation method
Winston-503 02b1761
Replace non_empty_validator() with Field
Winston-503 582fdfe
Merge branch 'main' into feature-initial-response-parsers
Winston-503 03a7620
Add LLMFunctionWithPrompt to __init__
Winston-503 7d4b9a6
Merge branch 'main' into feature-initial-response-parsers
Winston-503 File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,139 @@ | ||
import json | ||
import re | ||
from typing import Any, Callable, Dict, Type, TypeVar | ||
|
||
import yaml | ||
from pydantic import BaseModel, ValidationError | ||
|
||
from ..utils import CodeParser | ||
from .llm_answer import LLMParsingException | ||
from .llm_middleware import LLMResponse | ||
|
||
T_Response = TypeVar("T_Response") | ||
LLMResponseParser = Callable[[LLMResponse], T_Response] | ||
|
||
T = TypeVar("T", bound="BaseModelResponseParser") | ||
|
||
|
||
class BaseModelResponseParser(BaseModel): | ||
"""Base class for parsing LLM responses into structured data models""" | ||
|
||
@classmethod | ||
def from_response(cls: Type[T], response: LLMResponse) -> T: | ||
""" | ||
Parse an LLM response into a structured data model. | ||
Must be implemented by subclasses to define specific parsing logic. | ||
""" | ||
raise NotImplementedError() | ||
|
||
def validator(self) -> None: | ||
""" | ||
Implement custom validation logic for the parsed data. | ||
Can be overridden by subclasses to add specific validation rules. | ||
Raise LLMParsingException to trigger local correction. | ||
Alternatively, use pydantic validation. | ||
""" | ||
pass | ||
|
||
@classmethod | ||
def create_and_validate(cls: Type[T], **kwargs) -> T: | ||
instance = cls._try_create(**kwargs) | ||
instance.validator() | ||
return instance | ||
|
||
@classmethod | ||
def _try_create(cls: Type[T], **kwargs) -> T: | ||
""" | ||
Attempt to create a BaseModel object instance. | ||
Raises an LLMParsingException if a ValidationError occurs during instantiation. | ||
""" | ||
|
||
try: | ||
return cls(**kwargs) | ||
except ValidationError as e: | ||
# LLM-friendlier version of pydantic error message without "For further information visit..." | ||
clean_exception_message = re.sub(r"For further information visit.*", "", str(e)) | ||
raise LLMParsingException(clean_exception_message) | ||
|
||
|
||
class CodeBlocksResponseParser(BaseModelResponseParser): | ||
"""Parser for responses containing multiple named code blocks""" | ||
|
||
@classmethod | ||
def from_response(cls: Type[T], response: LLMResponse) -> T: | ||
llm_response = response.value | ||
parsed_blocks: Dict[str, Any] = {} | ||
|
||
for field_name in cls.model_fields.keys(): | ||
block = CodeParser.find_first(field_name, llm_response) | ||
if block is None: | ||
raise LLMParsingException(f"`{field_name}` block is not found") | ||
parsed_blocks[field_name] = block.code.strip() | ||
|
||
return cls.create_and_validate(**parsed_blocks) | ||
|
||
|
||
class YAMLBlockResponseParser(BaseModelResponseParser): | ||
"""Parser for responses containing a single YAML code block""" | ||
|
||
@classmethod | ||
def from_response(cls: Type[T], response: LLMResponse) -> T: | ||
llm_response = response.value | ||
|
||
yaml_block = CodeParser.find_first("yaml", llm_response) | ||
if yaml_block is None: | ||
raise LLMParsingException("yaml block is not found") | ||
|
||
yaml_content = YAMLResponseParser.parse(yaml_block.code) | ||
return cls.create_and_validate(**yaml_content) | ||
|
||
|
||
class YAMLResponseParser(BaseModelResponseParser): | ||
"""Parser for responses containing raw YAML content""" | ||
|
||
@classmethod | ||
def from_response(cls: Type[T], response: LLMResponse) -> T: | ||
llm_response = response.value | ||
|
||
yaml_content = YAMLResponseParser.parse(llm_response) | ||
return cls.create_and_validate(**yaml_content) | ||
|
||
@staticmethod | ||
def parse(content: str) -> Dict[str, Any]: | ||
try: | ||
return yaml.safe_load(content) | ||
except yaml.YAMLError as e: | ||
raise LLMParsingException(f"Error while parsing yaml: {e}") | ||
|
||
|
||
class JSONBlockResponseParser(BaseModelResponseParser): | ||
"""Parser for responses containing a single JSON code block""" | ||
|
||
@classmethod | ||
def from_response(cls: Type[T], response: LLMResponse) -> T: | ||
llm_response = response.value | ||
|
||
json_block = CodeParser.find_first("json", llm_response) | ||
if json_block is None: | ||
raise LLMParsingException("json block is not found") | ||
|
||
json_content = JSONResponseParser.parse(json_block.code) | ||
return cls.create_and_validate(**json_content) | ||
|
||
|
||
class JSONResponseParser(BaseModelResponseParser): | ||
"""Parser for responses containing raw JSON content""" | ||
|
||
@classmethod | ||
def from_response(cls: Type[T], response: LLMResponse) -> T: | ||
llm_response = response.value | ||
|
||
json_content = JSONResponseParser.parse(llm_response) | ||
return cls.create_and_validate(**json_content) | ||
|
||
@staticmethod | ||
def parse(content: str) -> Dict[str, Any]: | ||
try: | ||
return json.loads(content) | ||
except json.JSONDecodeError as e: | ||
raise LLMParsingException(f"Error while parsing json: {e}") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -22,3 +22,5 @@ GoogleNews>=1.6.10 | |
pymediawiki~=0.7.3 | ||
beautifulsoup4~=4.12.2 | ||
|
||
# Response Parsers | ||
pydantic==2.8.* |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
👍