Skip to content

Commit

Permalink
Fixed sonnet json formatting issue (#293)
Browse files Browse the repository at this point in the history
* Fixed sonnet json formatting issue

* PR comments - addedd notes and types
  • Loading branch information
whitead authored Jun 25, 2024
1 parent 6581202 commit f16240a
Show file tree
Hide file tree
Showing 3 changed files with 30 additions and 1 deletion.
11 changes: 11 additions & 0 deletions paperqa/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -184,4 +184,15 @@ def llm_read_json(text: str) -> dict:
text = "{" + text.split("{", 1)[-1]
# split anything after the last }
text = text.rsplit("}", 1)[0] + "}"

# escape new lines within strings
def replace_newlines(match: re.Match) -> str:
return match.group(0).replace("\n", "\\n")

# Match anything between double quotes
# including escaped quotes and other escaped characters.
# https://regex101.com/r/VFcDmB/1
pattern = r'"(?:[^"\\]|\\.)*"'
text = re.sub(pattern, replace_newlines, text)

return json.loads(text)
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ name = "paper-qa"
readme = "README.md"
requires-python = ">=3.8"
urls = {repository = "https://github.com/whitead/paper-qa"}
version = "4.8.0"
version = "4.8.1"

[tool.codespell]
check-filenames = true
Expand Down
18 changes: 18 additions & 0 deletions tests/test_paperqa.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import os
import pickle
import tempfile
import textwrap
from io import BytesIO
from pathlib import Path

Expand Down Expand Up @@ -457,6 +458,23 @@ def test_llm_read_json(example: str):
assert llm_read_json(example) == {"example": "json"}


def test_llm_read_json_newlines():
"""Make sure that newlines in json are preserved and escaped."""
example = textwrap.dedent(
"""
{
"summary": "A line
Another line",
"relevance_score": 7
}"""
)
assert llm_read_json(example) == {
"summary": "A line\n\nAnother line",
"relevance_score": 7,
}


@pytest.mark.asyncio()
async def test_chain_completion():
client = AsyncOpenAI()
Expand Down

0 comments on commit f16240a

Please sign in to comment.