Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added automatic json extraction from the response #21

Merged
merged 4 commits into from
Mar 22, 2024
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 6 additions & 1 deletion allms/domain/response.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,18 @@
from allms.domain.input_data import InputData


class ResponseWithError(BaseModel):
alle-pawols marked this conversation as resolved.
Show resolved Hide resolved
response: typing.Optional[typing.Any]
error_message: typing.Optional[str]


class ResponseData(BaseModel):
response: typing.Optional[typing.Any] = None
input_data: typing.Optional[InputData] = None

number_of_prompt_tokens: typing.Optional[int] = None
number_of_generated_tokens: typing.Optional[int] = None
error: typing.Optional[typing.Union[str, Exception]] = None
error: typing.Optional[str] = None

# Without this, only classes inheriting from the pydantic BaseModel are allowed as field types. Exception isn't
# such a class and that's why we need it.
Expand Down
45 changes: 34 additions & 11 deletions allms/models/abstract.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@
from allms.domain.enumerables import AggregationLogicForLongInputData, LanguageModelTask
from allms.domain.input_data import InputData
from allms.domain.prompt_dto import SummaryOutputClass, KeywordsOutputClass
from allms.domain.response import ResponseData
from allms.domain.response import ResponseWithError, ResponseData
from allms.utils.long_text_processing_utils import get_max_allowed_number_of_tokens

logger = logging.getLogger(__name__)
Expand All @@ -58,6 +58,7 @@ def __init__(
self._is_long_text_bypass_enabled: bool = False # Should be false till we fully implement support for long sequences in our package
self._aggregation_strategy: AggregationLogicForLongInputData = AggregationLogicForLongInputData.SIMPLE_CONCATENATION
self._parser: typing.Optional[PydanticOutputParser] = None
self._json_pattern = re.compile(r"{.*?}", re.DOTALL)

if max_output_tokens >= model_total_max_tokens:
raise ValueError("max_output_tokens has to be lower than model_total_max_tokens")
Expand Down Expand Up @@ -105,27 +106,49 @@ def generate(
if output_data_model_class:
return self._parse_model_output(model_responses)
return model_responses

def _extract_json_from_response(self, model_response_data: ResponseData) -> str:
search_results = self._json_pattern.findall(model_response_data.response)

if len(search_results) == 0:
return model_response_data.response

return search_results[0]

def _parse_response(
self,
model_response_data: ResponseData
) -> ResponseWithError:
raw_response = self._extract_json_from_response(model_response_data)
alle-pawols marked this conversation as resolved.
Show resolved Hide resolved

def _parse_response(self, model_response_data: ResponseData) -> typing.Tuple[str, typing.Optional[str]]:
try:
return self._parser.parse(model_response_data.response), None
return ResponseWithError(
response=self._parser.parse(raw_response),
error_message=None
)
except OutputParserException as output_parser_exception:
return None, OutputParserException(
f"An OutputParserException has occurred for "
f"The response from model: {model_response_data.response}\n"
f"The exception message: {output_parser_exception}"
return ResponseWithError(
response=None,
error_message=f"""
An OutputParserException has occurred for the model response: {raw_response}
The exception message: {output_parser_exception}
"""
)

def _parse_model_output(self, model_responses_data: typing.List[ResponseData]) -> typing.List[ResponseData]:
def _parse_model_output(
riccardo-alle marked this conversation as resolved.
Show resolved Hide resolved
self,
model_responses_data: typing.List[ResponseData]
) -> typing.List[ResponseData]:
parsed_responses = []

for model_response_data in model_responses_data:
if not model_response_data.error:
response, error_message = self._parse_response(model_response_data)
response_with_error = self._parse_response(model_response_data)

parsed_responses.append(ResponseData(
input_data=model_response_data.input_data,
response=response,
error=error_message,
response=response_with_error.response,
error=response_with_error.error_message,
number_of_prompt_tokens=model_response_data.number_of_prompt_tokens,
number_of_generated_tokens=model_response_data.number_of_generated_tokens

Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "allms"
version = "1.0.1"
version = "1.0.2"
description = ""
authors = ["Allegro Opensource <[email protected]>"]
readme = "README.md"
Expand Down
37 changes: 35 additions & 2 deletions tests/test_output_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,40 @@ def test_output_parser_returns_error_when_model_output_returns_different_field(s
# WHEN & THEN
for model in models.values():
model_response = model.generate(prompt, input_data, SummaryOutputClass)
assert type(model_response[0].error) == OutputParserException
assert "OutputParserException" in model_response[0].error
assert model_response[0].response is None

@patch("langchain.chains.base.Chain.arun")
@patch("langchain_community.llms.vertexai.VertexAI.get_num_tokens")
def test_output_parser_extracts_json_from_response(self, tokens_mock, chain_run_mock, models):
# GIVEN
text_output = "This is the model output"
expected_model_response = json.dumps({"summary": text_output})
chain_run_mock.return_value = f"Sure! Here's the JSON you wanted: {expected_model_response} Have a nice day!"
tokens_mock.return_value = 1

input_data = [InputData(input_mappings={"text": "Some dummy text"}, id="1")]
prompt = "Some Dummy Prompt {text}"

# WHEN & THEN
for model in models.values():
model_response = model.generate(prompt, input_data, SummaryOutputClass)
assert model_response[0].response == SummaryOutputClass(summary=text_output)

@patch("langchain.chains.base.Chain.arun")
@patch("langchain_community.llms.vertexai.VertexAI.get_num_tokens")
def test_output_parser_returns_error_when_json_is_garbled(self, tokens_mock, chain_run_mock, models):
riccardo-alle marked this conversation as resolved.
Show resolved Hide resolved
# GIVEN
chain_run_mock.return_value = "Sure! Here's the JSON you wanted: {\"summary: \"text\"}"
tokens_mock.return_value = 1

input_data = [InputData(input_mappings={"text": "Some dummy text"}, id="1")]
prompt = "Some Dummy Prompt {text}"

# WHEN & THEN
for model in models.values():
model_response = model.generate(prompt, input_data, SummaryOutputClass)
assert "OutputParserException" in model_response[0].error
assert model_response[0].response is None

@patch("langchain.chains.base.Chain.arun")
Expand Down Expand Up @@ -94,4 +127,4 @@ def test_model_output_when_input_data_is_empty(self, tokens_mock, chain_run_mock
for model in models.values():
model_response = model.generate(prompt, None, KeywordsOutputClass)
assert model_response[0].response is None
assert type(model_response[0].error) == OutputParserException
assert "OutputParserException" in model_response[0].error
Loading