Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feature response parser to response template #194

Merged
merged 10 commits into from
Dec 6, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 22 additions & 0 deletions council/llm/data/response_hints.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
# YAML
yaml_hints_common: |
- Make sure you respect YAML syntax, particularly for lists and dictionaries.
- All keys must be present in the response, even when their values are empty.
- For empty values, include empty quotes ("") rather than leaving them blank.
- Always wrap string values in double quotes (") to ensure proper parsing, except when using the YAML pipe operator (|) for multi-line strings.
yaml_parser_hints_start: |
- Provide your response as a parsable YAML.
yaml_parser_hints_end: Only respond with parsable YAML. Do not output anything else. Do not wrap your response in ```yaml```.
yaml_block_parser_hints_start: |
- Provide your response in a single YAML code block.

# JSON
json_hints_common: |
- Make sure you respect JSON syntax, particularly for lists and dictionaries.
- All keys must be present in the response, even when their values are empty.
- For empty values, include empty quotes ("") rather than leaving them blank.
json_parser_hints_start: |
- Provide your response as a parsable JSON.
json_parser_hints_end: Only respond with parsable JSON. Do not output anything else.
json_block_parser_hints_start: |
- Provide your response in a single JSON code block.
181 changes: 161 additions & 20 deletions council/llm/llm_response_parser.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,13 @@
from __future__ import annotations

import abc
import json
import os
import re
from typing import Any, Callable, Dict, Type, TypeVar
from typing import Any, Callable, Dict, Final, Type, TypeVar

import yaml
from pydantic import BaseModel, ValidationError
from pydantic import BaseModel, ConfigDict, ValidationError

from ..utils import CodeParser
from .llm_answer import LLMParsingException
Expand All @@ -15,6 +19,38 @@

T = TypeVar("T", bound="BaseModelResponseParser")

RESPONSE_HINTS_FILE_PATH: Final[str] = os.path.join(os.path.dirname(__file__), "data", "response_hints.yaml")


class ResponseHintsHelper:
def __init__(self, hints: Dict[str, str], prefix: str):
self.hints_common = hints[f"{prefix}_hints_common"]
self.parser_hints_start = hints[f"{prefix}_parser_hints_start"]
self.parser_hints_end = hints[f"{prefix}_parser_hints_end"]
self.block_parser_hints_start = hints[f"{prefix}_block_parser_hints_start"]

@classmethod
def from_yaml(cls, path: str, prefix: str) -> ResponseHintsHelper:
with open(path, "r", encoding="utf-8") as file:
hints = yaml.safe_load(file)
return cls(hints, prefix)

@property
def parser(self) -> str:
return self.parser_hints_start + self.hints_common

@property
def block_parser(self) -> str:
return self.block_parser_hints_start + self.hints_common

@property
def parser_end(self) -> str:
return self.parser_hints_end


yaml_response_hints = ResponseHintsHelper.from_yaml(RESPONSE_HINTS_FILE_PATH, prefix="yaml")
json_response_hints = ResponseHintsHelper.from_yaml(RESPONSE_HINTS_FILE_PATH, prefix="json")


class EchoResponseParser:
@staticmethod
Expand All @@ -30,10 +66,13 @@ def from_response(response: LLMResponse) -> str:
return response.value


class BaseModelResponseParser(BaseModel):
class BaseModelResponseParser(BaseModel, abc.ABC):
"""Base class for parsing LLM responses into structured data models"""

model_config = ConfigDict(frozen=True) # to preserve field order

@classmethod
@abc.abstractmethod
def from_response(cls: Type[T], response: LLMResponse) -> T:
"""
Parse an LLM response into a structured data model.
Expand Down Expand Up @@ -88,7 +127,40 @@ def from_response(cls: Type[T], response: LLMResponse) -> T:
return cls.create_and_validate(**parsed_blocks)


class YAMLBlockResponseParser(BaseModelResponseParser):
T_YAMLResponseParserBase = TypeVar("T_YAMLResponseParserBase", bound="YAMLResponseParserBase")


class YAMLResponseParserBase(BaseModelResponseParser, abc.ABC):
@classmethod
def _to_response_template(cls: Type[T]) -> str:
"""Generate a YAML response template based on the model's fields and their descriptions."""
template_parts = []

for field_name, field in cls.model_fields.items():
description = field.description
if description is None:
raise ValueError(f"Description is required for field `{field_name}` in {cls.__name__}")

is_multiline = "\n" in description

if field.annotation is str and is_multiline:
template_parts.append(f"{field_name}: |")
for line in description.split("\n"):
template_parts.append(f" {line.strip()}")
else:
template_parts.append(f"{field_name}: # {description}")

return "\n".join(template_parts)

@staticmethod
def parse(content: str) -> Dict[str, Any]:
try:
return yaml.safe_load(content)
except yaml.YAMLError as e:
raise LLMParsingException(f"Error while parsing yaml: {e}")


class YAMLBlockResponseParser(YAMLResponseParserBase):

@classmethod
def from_response(cls: Type[T], response: LLMResponse) -> T:
Expand All @@ -99,29 +171,79 @@ def from_response(cls: Type[T], response: LLMResponse) -> T:
if yaml_block is None:
raise LLMParsingException("yaml block is not found")

yaml_content = YAMLResponseParser.parse(yaml_block.code)
yaml_content = YAMLResponseParserBase.parse(yaml_block.code)
return cls.create_and_validate(**yaml_content)

@classmethod
def to_response_template(cls: Type[T_YAMLResponseParserBase], include_hints: bool = True) -> str:
"""
Generate YAML block response template based on the model's fields and their descriptions.

Args:
include_hints: If True, returned template will include universal YAML block formatting hints.
"""
template_parts = [yaml_response_hints.block_parser] if include_hints else []
template_parts.extend(["```yaml", cls._to_response_template(), "```"])
return "\n".join(template_parts)


class YAMLResponseParser(BaseModelResponseParser):
class YAMLResponseParser(YAMLResponseParserBase):

@classmethod
def from_response(cls: Type[T], response: LLMResponse) -> T:
"""LLMFunction ResponseParser for response containing raw YAML content"""
llm_response = response.value

yaml_content = YAMLResponseParser.parse(llm_response)
yaml_content = YAMLResponseParserBase.parse(llm_response)
return cls.create_and_validate(**yaml_content)

@classmethod
def to_response_template(cls: Type[T_YAMLResponseParserBase], include_hints: bool = True) -> str:
"""
Generate YAML response template based on the model's fields and their descriptions.

Args:
include_hints: If True, returned template will include universal YAML formatting hints.
"""
template_parts = [yaml_response_hints.parser] if include_hints else []
template_parts.append(cls._to_response_template())
if include_hints:
template_parts.extend(["", yaml_response_hints.parser_end])
return "\n".join(template_parts)


T_JSONResponseParserBase = TypeVar("T_JSONResponseParserBase", bound="JSONResponseParserBase")


class JSONResponseParserBase(BaseModelResponseParser, abc.ABC):
@classmethod
def _to_response_template(cls: Type[T]) -> str:
"""Generate a JSON response template based on the model's fields and their descriptions."""
template_dict = {}

for field_name, field in cls.model_fields.items():
description = field.description
if description is None:
raise ValueError(f"Description is required for field `{field_name}` in {cls.__name__}")

is_multiline = "\n" in description

if field.annotation is str and is_multiline:
template_dict[field_name] = "\n".join(line.strip() for line in description.split("\n"))
else:
template_dict[field_name] = description

return json.dumps(template_dict, indent=2)

@staticmethod
def parse(content: str) -> Dict[str, Any]:
try:
return yaml.safe_load(content)
except yaml.YAMLError as e:
raise LLMParsingException(f"Error while parsing yaml: {e}")
return json.loads(content)
except json.JSONDecodeError as e:
raise LLMParsingException(f"Error while parsing json: {e}")


class JSONBlockResponseParser(BaseModelResponseParser):
class JSONBlockResponseParser(JSONResponseParserBase):

@classmethod
def from_response(cls: Type[T], response: LLMResponse) -> T:
Expand All @@ -132,23 +254,42 @@ def from_response(cls: Type[T], response: LLMResponse) -> T:
if json_block is None:
raise LLMParsingException("json block is not found")

json_content = JSONResponseParser.parse(json_block.code)
json_content = JSONResponseParserBase.parse(json_block.code)
return cls.create_and_validate(**json_content)

@classmethod
def to_response_template(cls: Type[T_JSONResponseParserBase], include_hints: bool = True) -> str:
"""
Generate JSON block response template based on the model's fields and their descriptions.

class JSONResponseParser(BaseModelResponseParser):
Args:
include_hints: If True, returned template will include universal JSON block formatting hints.
"""
template_parts = [json_response_hints.block_parser] if include_hints else []
template_parts.extend(["```json", cls._to_response_template(), "```"])
return "\n".join(template_parts)


class JSONResponseParser(JSONResponseParserBase):

@classmethod
def from_response(cls: Type[T], response: LLMResponse) -> T:
"""LLMFunction ResponseParser for response containing raw JSON content"""
llm_response = response.value

json_content = JSONResponseParser.parse(llm_response)
json_content = JSONResponseParserBase.parse(llm_response)
return cls.create_and_validate(**json_content)

@staticmethod
def parse(content: str) -> Dict[str, Any]:
try:
return json.loads(content)
except json.JSONDecodeError as e:
raise LLMParsingException(f"Error while parsing json: {e}")
@classmethod
def to_response_template(cls: Type[T_JSONResponseParserBase], include_hints: bool = True) -> str:
"""
Generate JSON response template based on the model's fields and their descriptions.

Args:
include_hints: If True, returned template will include universal JSON formatting hints.
"""
template_parts = [json_response_hints.parser] if include_hints else []
template_parts.append(cls._to_response_template())
if include_hints:
template_parts.extend(["", json_response_hints.parser_end])
return "\n".join(template_parts)
54 changes: 26 additions & 28 deletions docs/source/reference/llm/llm_response_parser.md
Original file line number Diff line number Diff line change
Expand Up @@ -83,38 +83,37 @@ print(response.sql)
import os
from typing import Literal

# !pip install council-ai==0.0.24
# !pip install council-ai==0.0.27

from council import OpenAILLM
from council.llm.llm_function import LLMFunction
from council.llm.llm_response_parser import YAMLBlockResponseParser
from pydantic import Field

SYSTEM_PROMPT = """
Output RPG character info in the following YAML block:
Generate RPG character:

```yaml
character_class: # character's class (Warrior, Mage, Rogue, Bard or Tech Support)
name: # character's name
description: # character's tragic backstory, 50 chars minimum
health: # character's health, integer, from 1 to 100 points
```
{response_template}
"""


class RPGCharacterFromYAMLBlock(YAMLBlockResponseParser):
name: str
character_class: Literal["Warrior", "Mage", "Rogue", "Bard", "Tech Support"]
description: str = Field(..., min_length=50)
health: int = Field(..., ge=1, le=100)
character_class: Literal["Warrior", "Mage", "Rogue", "Bard", "Tech Support"] = Field(
..., description="Character's class (Warrior, Mage, Rogue, Bard or Tech Support)"
)
name: str = Field(..., min_length=3, description="Character's name")
description: str = Field(..., min_length=50, description="Character's tragic backstory, 50 chars minimum")
health: int = Field(..., ge=1, le=100, description="Character's health, integer, from 1 to 100 points")


os.environ["OPENAI_API_KEY"] = "sk-YOUR-KEY-HERE"
os.environ["OPENAI_LLM_MODEL"] = "gpt-4o-mini-2024-07-18"
llm = OpenAILLM.from_env()

llm_function: LLMFunction[RPGCharacterFromYAMLBlock] = LLMFunction(
llm, RPGCharacterFromYAMLBlock.from_response, SYSTEM_PROMPT
llm,
RPGCharacterFromYAMLBlock.from_response,
SYSTEM_PROMPT.format(response_template=RPGCharacterFromYAMLBlock.to_response_template()),
)

character = llm_function.execute(user_message="Create some wise mage")
Expand Down Expand Up @@ -155,42 +154,41 @@ Usage example with OpenAI json mode:
import os
from typing import Literal

# !pip install council-ai==0.0.24
# !pip install council-ai==0.0.27

from council import OpenAILLM
from council.llm.llm_function import LLMFunction
from council.llm.llm_response_parser import JSONResponseParser
from pydantic import Field

SYSTEM_PROMPT = """
Output RPG character info in the following JSON format:

{
character_class: # character's class (Warrior, Mage, Rogue, Bard or Tech Support)
name: # character's name
description: # character's tragic backstory, 50 chars minimum
health: # character's health, integer, from 1 to 100 points
}
Generate RPG character:

{response_template}
"""


class RPGCharacterFromJSON(JSONResponseParser):
name: str
character_class: Literal["Warrior", "Mage", "Rogue", "Bard", "Tech Support"]
description: str = Field(..., min_length=50)
health: int = Field(..., ge=1, le=100)
character_class: Literal["Warrior", "Mage", "Rogue", "Bard", "Tech Support"] = Field(
..., description="Character's class (Warrior, Mage, Rogue, Bard or Tech Support)"
)
name: str = Field(..., min_length=3, description="Character's name")
description: str = Field(..., min_length=50, description="Character's tragic backstory, 50 chars minimum")
health: int = Field(..., ge=1, le=100, description="Character's health, integer, from 1 to 100 points")


os.environ["OPENAI_API_KEY"] = "sk-YOUR-KEY-HERE"
os.environ["OPENAI_LLM_MODEL"] = "gpt-4o-mini-2024-07-18"
llm = OpenAILLM.from_env()

llm_function: LLMFunction[RPGCharacterFromJSON] = LLMFunction(
llm, RPGCharacterFromJSON.from_response, SYSTEM_PROMPT
llm,
RPGCharacterFromJSON.from_response,
SYSTEM_PROMPT.format(response_template=RPGCharacterFromJSON.to_response_template()),
)

character = llm_function.execute(
user_message="Create some wise mage",
user_message="Create some strong warrior",
response_format={"type": "json_object"} # using OpenAI's json mode
)
print(type(character))
Expand Down
Loading
Loading