From 9cad1d1c6d88e18cb13978755fd4eaedc5e05064 Mon Sep 17 00:00:00 2001 From: zhan_hong_soh Date: Thu, 18 Jul 2024 16:52:36 +0800 Subject: [PATCH] Fix: bugs --- graphrag/index/utils/json.py | 1 + graphrag/query/llm/oai/embedding.py | 1 + 2 files changed, 2 insertions(+) diff --git a/graphrag/index/utils/json.py b/graphrag/index/utils/json.py index ed6c06665d..ffe46065f6 100644 --- a/graphrag/index/utils/json.py +++ b/graphrag/index/utils/json.py @@ -6,6 +6,7 @@ def clean_up_json(json_str: str): """Clean up json string.""" + json_str = json_str[json_str.index('{'):] json_str = ( json_str.replace("\\n", "") .replace("\n", "") diff --git a/graphrag/query/llm/oai/embedding.py b/graphrag/query/llm/oai/embedding.py index f40372dbce..1599a5e385 100644 --- a/graphrag/query/llm/oai/embedding.py +++ b/graphrag/query/llm/oai/embedding.py @@ -82,6 +82,7 @@ def embed(self, text: str, **kwargs: Any) -> list[float]: chunk_lens = [] for chunk in token_chunks: try: + chunk = self.token_encoder.decode(chunk) embedding, chunk_len = self._embed_with_retry(chunk, **kwargs) chunk_embeddings.append(embedding) chunk_lens.append(chunk_len)