Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added automatic json extraction from the response #21

Merged
merged 4 commits into from
Mar 22, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 6 additions & 1 deletion allms/domain/response.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,18 @@
from allms.domain.input_data import InputData


class ResponseParsingOutput(BaseModel):
response: typing.Optional[typing.Any]
error_message: typing.Optional[str]


class ResponseData(BaseModel):
response: typing.Optional[typing.Any] = None
input_data: typing.Optional[InputData] = None

number_of_prompt_tokens: typing.Optional[int] = None
number_of_generated_tokens: typing.Optional[int] = None
error: typing.Optional[typing.Union[str, Exception]] = None
error: typing.Optional[str] = None

# Without this, only classes inheriting from the pydantic BaseModel are allowed as field types. Exception isn't
# such a class and that's why we need it.
Expand Down
45 changes: 10 additions & 35 deletions allms/models/abstract.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@
from langchain.chat_models.base import BaseChatModel
from langchain.output_parsers import PydanticOutputParser
from langchain.prompts import ChatPromptTemplate
from langchain.schema import OutputParserException
from langchain_core.language_models.llms import create_base_retry_decorator
from langchain_core.prompts import HumanMessagePromptTemplate, SystemMessagePromptTemplate
from langchain_core.prompts.prompt import PromptTemplate
Expand All @@ -34,6 +33,7 @@
from allms.domain.prompt_dto import SummaryOutputClass, KeywordsOutputClass
from allms.domain.response import ResponseData
from allms.utils.long_text_processing_utils import get_max_allowed_number_of_tokens
from allms.utils.response_parsing_utils import ResponseParser

logger = logging.getLogger(__name__)

Expand All @@ -58,6 +58,8 @@ def __init__(
self._is_long_text_bypass_enabled: bool = False # Should be false till we fully implement support for long sequences in our package
self._aggregation_strategy: AggregationLogicForLongInputData = AggregationLogicForLongInputData.SIMPLE_CONCATENATION
self._parser: typing.Optional[PydanticOutputParser] = None
self._json_pattern = re.compile(r"{.*?}", re.DOTALL)
self._is_json_format_injected_into_prompt: bool = True

if max_output_tokens >= model_total_max_tokens:
raise ValueError("max_output_tokens has to be lower than model_total_max_tokens")
Expand Down Expand Up @@ -103,38 +105,9 @@ def generate(
)

if output_data_model_class:
return self._parse_model_output(model_responses)
return ResponseParser(self._parser).parse_model_output(model_responses)
return model_responses

def _parse_response(self, model_response_data: ResponseData) -> typing.Tuple[str, typing.Optional[str]]:
try:
return self._parser.parse(model_response_data.response), None
except OutputParserException as output_parser_exception:
return None, OutputParserException(
f"An OutputParserException has occurred for "
f"The response from model: {model_response_data.response}\n"
f"The exception message: {output_parser_exception}"
)

def _parse_model_output(self, model_responses_data: typing.List[ResponseData]) -> typing.List[ResponseData]:
parsed_responses = []
for model_response_data in model_responses_data:
if not model_response_data.error:
response, error_message = self._parse_response(model_response_data)

parsed_responses.append(ResponseData(
input_data=model_response_data.input_data,
response=response,
error=error_message,
number_of_prompt_tokens=model_response_data.number_of_prompt_tokens,
number_of_generated_tokens=model_response_data.number_of_generated_tokens

))
else:
parsed_responses.append(model_response_data)

return parsed_responses

async def _generate(
self,
prompt: str,
Expand All @@ -155,10 +128,12 @@ async def _generate(

if output_data_model_class:
self._parser = PydanticOutputParser(pydantic_object=output_data_model_class)
prompt_template_args[PromptConstants.PARTIAL_VARIABLES_STR] = {
PromptConstants.OUTPUT_DATA_MODEL: self._parser.get_format_instructions(),
}
prompt_template_args[PromptConstants.TEMPLATE_STR] = self._add_output_data_format(prompt=prompt)

if self._is_json_format_injected_into_prompt:
prompt_template_args[PromptConstants.PARTIAL_VARIABLES_STR] = {
PromptConstants.OUTPUT_DATA_MODEL: self._parser.get_format_instructions(),
}
prompt_template_args[PromptConstants.TEMPLATE_STR] = self._add_output_data_format(prompt=prompt)

chat_prompts = await self._build_chat_prompts(prompt_template_args, system_prompt)

Expand Down
6 changes: 6 additions & 0 deletions allms/models/azure_llama2.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,15 @@
import typing
from asyncio import AbstractEventLoop
from typing import List, Type

from langchain_community.chat_models.azureml_endpoint import LlamaChatContentFormatter
from pydantic import BaseModel

from allms.defaults.azure_defaults import AzureLlama2Defaults
from allms.defaults.general_defaults import GeneralDefaults
from allms.domain.configuration import AzureSelfDeployedConfiguration
from allms.domain.input_data import InputData
from allms.domain.response import ResponseData
from allms.models.abstract import AbstractModel
from allms.models.azure_base import AzureMLOnlineEndpointAsync

Expand Down Expand Up @@ -35,6 +39,8 @@ def __init__(
event_loop=event_loop
)

self._is_json_format_injected_into_prompt = False

def _create_llm(self) -> AzureMLOnlineEndpointAsync:
model_kwargs = {"max_new_tokens": self._max_output_tokens, "top_p": self._top_p, "do_sample": False}
if self._temperature > 0:
Expand Down
2 changes: 2 additions & 0 deletions allms/models/azure_mistral.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,8 @@ def __init__(
event_loop=event_loop
)

self._is_json_format_injected_into_prompt = False

def _create_llm(self) -> AzureMLOnlineEndpointAsync:
model_kwargs = {
"max_new_tokens": self._max_output_tokens, "top_p": self._top_p, "do_sample": False,
Expand Down
2 changes: 2 additions & 0 deletions allms/models/vertexai_gemma.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,8 @@ def __init__(
event_loop=event_loop
)

self._is_json_format_injected_into_prompt = False

def _create_llm(self) -> VertexAIModelGarden:
return VertexAIModelGardenWrapper(
model_name=GemmaModelDefaults.GCP_MODEL_NAME,
Expand Down
70 changes: 70 additions & 0 deletions allms/utils/response_parsing_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
import re
import typing

from langchain.output_parsers import PydanticOutputParser
from langchain.schema import OutputParserException

from allms.domain.response import ResponseData, ResponseParsingOutput


class ResponseParser:
def __init__(self, parser: PydanticOutputParser) -> None:
self._json_pattern = re.compile(r"{.*?}", re.DOTALL)
self._parser = parser

def _clean_extracted_json(self, extracted_json: str) -> str:
json_without_newlines = extracted_json.replace("\\n", "")
json_without_backslashes = json_without_newlines.replace("\\", "")

return json_without_backslashes

def _extract_json_from_response(self, model_response_data: ResponseData) -> str:
search_results = self._json_pattern.findall(model_response_data.response)

if len(search_results) == 0:
return model_response_data.response

return self._clean_extracted_json(search_results[0])

def _parse_response(
self,
model_response_data: ResponseData
) -> ResponseParsingOutput:
raw_response = self._extract_json_from_response(model_response_data)

try:
return ResponseParsingOutput(
response=self._parser.parse(raw_response),
error_message=None
)
except OutputParserException as output_parser_exception:
return ResponseParsingOutput(
response=None,
error_message=f"""
An OutputParserException has occurred for the model response: {raw_response}
The exception message: {output_parser_exception}
"""
)

def parse_model_output(
self,
model_responses_data: typing.List[ResponseData]
) -> typing.List[ResponseData]:
parsed_responses = []

for model_response_data in model_responses_data:
if not model_response_data.error:
response_with_error = self._parse_response(model_response_data)

parsed_responses.append(ResponseData(
input_data=model_response_data.input_data,
response=response_with_error.response,
error=response_with_error.error_message,
number_of_prompt_tokens=model_response_data.number_of_prompt_tokens,
number_of_generated_tokens=model_response_data.number_of_generated_tokens

))
else:
parsed_responses.append(model_response_data)

return parsed_responses
3 changes: 1 addition & 2 deletions docs/api/models/azure_llama2_model.md
Original file line number Diff line number Diff line change
Expand Up @@ -39,8 +39,7 @@ generate(
- `input_data` (`Optional[List[InputData]]`): If prompt contains symbolic variables you can use this parameter to
generate model responses for batch of examples. Each symbolic variable from the prompt should have mapping provided
in the `input_mappings` of `InputData`.
- `output_data_model_class` (`Optional[Type[BaseModel]]`): If provided forces the model to generate output in the
format defined by the passed class. Generated response is automatically parsed to this class.
- `output_data_model_class` (`Optional[Type[BaseModel]]`): Generated response is automatically parsed to this class. WARNING: You need to manually provide the JSON format instructions in the prompt, they are not injected for this model.

#### Returns
`List[ResponseData]`: Each `ResponseData` contains the response for a single example from `input_data`. If `input_data`
Expand Down
3 changes: 1 addition & 2 deletions docs/api/models/azure_mistral_model.md
Original file line number Diff line number Diff line change
Expand Up @@ -37,8 +37,7 @@ generate(
- `input_data` (`Optional[List[InputData]]`): If prompt contains symbolic variables you can use this parameter to
generate model responses for batch of examples. Each symbolic variable from the prompt should have mapping provided
in the `input_mappings` of `InputData`.
- `output_data_model_class` (`Optional[Type[BaseModel]]`): If provided forces the model to generate output in the
format defined by the passed class. Generated response is automatically parsed to this class.
- `output_data_model_class` (`Optional[Type[BaseModel]]`): Generated response is automatically parsed to this class. WARNING: You need to manually provide the JSON format instructions in the prompt, they are not injected for this model.

Note that Mistral-based models currently don't support system prompts.

Expand Down
3 changes: 1 addition & 2 deletions docs/api/models/vertexai_gemma.md
Original file line number Diff line number Diff line change
Expand Up @@ -44,8 +44,7 @@ generate(
- `input_data` (`Optional[List[InputData]]`): If prompt contains symbolic variables you can use this parameter to
generate model responses for batch of examples. Each symbolic variable from the prompt should have mapping provided
in the `input_mappings` of `InputData`.
- `output_data_model_class` (`Optional[Type[BaseModel]]`): If provided forces the model to generate output in the
format defined by the passed class. Generated response is automatically parsed to this class.
- `output_data_model_class` (`Optional[Type[BaseModel]]`): Generated response is automatically parsed to this class. WARNING: You need to manually provide the JSON format instructions in the prompt, they are not injected for this model.

#### Returns
`List[ResponseData]`: Each `ResponseData` contains the response for a single example from `input_data`. If `input_data`
Expand Down
11 changes: 4 additions & 7 deletions docs/usage/forcing_response_format.md
Original file line number Diff line number Diff line change
Expand Up @@ -66,13 +66,10 @@ False

## What to do when output formatting doesn't work?

The feature described above works best with advanced proprietary models like GPT and PaLM/Gemini. Less capable models like Llama2 or Mistral
may not able to understand instructions passed as output_dataclasses, and in most cases the returned response won't be compatible
with the defined format, resulting in an unexpected response.
The feature described above works only with advanced proprietary models like GPT and PaLM/Gemini. Less capable models like Llama2 or Mistral
are unable to understand instructions passed as output_dataclasses.

In such cases, we recommend to address the issue by specifying in the prompt how the response should look like. Using
few-shot learning techniques is also advisable. In the case of JSON-like output, use double curly brackets to escape them in order
to use them in the JSON example.
For these less capable models, you need to manually specify in the prompt how the response should look like. You can then pass the `output_data_model_class` to try parsing the output. Using few-shot learning techniques is also advisable. In the case of JSON-like output, use double curly brackets instead of single ones, e.g. `{{"key": "value"}}` instead of `{"key": "value"}`.

## How forcing response format works under the hood?
To force the model to provide output in a desired format, under the hood `allms` automatically adds a description
Expand All @@ -90,7 +87,7 @@ Here is the output schema:
```
````

This feature is really helpful, but you have to bear in mind that by using it you increase the number or prompt tokens
This feature is really helpful, but you have to keep in mind that by using it you increase the number or prompt tokens
so it'll make the requests more costly (if you're using model with per token pricing)

If the model will return an output that doesn't comform to the defined data model, raw model response will be returned
Expand Down
21 changes: 19 additions & 2 deletions poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 2 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "allms"
version = "1.0.1"
version = "1.0.2"
description = ""
authors = ["Allegro Opensource <[email protected]>"]
readme = "README.md"
Expand All @@ -17,6 +17,7 @@ langchain = "^0.0.351"
aioresponses = "^0.7.6"
tiktoken = "^0.6.0"
openai = "^0.27.8"
pytest-mock = "^3.14.0"

[tool.poetry.group.dev.dependencies]
pytest = "^7.4.0"
Expand Down
10 changes: 6 additions & 4 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ class GenerativeModels:
vertex_palm: typing.Optional[VertexAIPalmModel] = None


class VertexAIMock(FakeListLLM):
class ModelWithoutAsyncRequestsMock(FakeListLLM):
def __init__(self, *args, **kwargs):
super().__init__(responses=["{}"])

Expand All @@ -37,9 +37,11 @@ def models():
event_loop = asyncio.new_event_loop()

with (
patch("allms.models.vertexai_palm.CustomVertexAI", VertexAIMock),
patch("allms.models.vertexai_gemini.CustomVertexAI", VertexAIMock),
patch("allms.models.vertexai_gemma.VertexAIModelGardenWrapper", VertexAIMock)
patch("allms.models.vertexai_palm.CustomVertexAI", ModelWithoutAsyncRequestsMock),
patch("allms.models.vertexai_gemini.CustomVertexAI", ModelWithoutAsyncRequestsMock),
patch("allms.models.vertexai_gemma.VertexAIModelGardenWrapper", ModelWithoutAsyncRequestsMock),
patch("allms.models.azure_llama2.AzureMLOnlineEndpointAsync", ModelWithoutAsyncRequestsMock),
patch("allms.models.azure_mistral.AzureMLOnlineEndpointAsync", ModelWithoutAsyncRequestsMock)
):
return {
"azure_open_ai": AzureOpenAIModel(
Expand Down
Loading
Loading