From a2f2cad8ce729652af3a6e2fd61a36c91f8acf9b Mon Sep 17 00:00:00 2001 From: CN-P5 Date: Sat, 11 Jan 2025 15:45:31 +0800 Subject: [PATCH 1/2] fix: Use ser.iloc[pos] compatible pandas~=2.2.2 --- api/controllers/console/datasets/datasets_segments.py | 4 ++-- api/core/rag/index_processor/processor/qa_index_processor.py | 2 +- api/services/annotation_service.py | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/api/controllers/console/datasets/datasets_segments.py b/api/controllers/console/datasets/datasets_segments.py index 96654c09fd0223..d48dbe1772c353 100644 --- a/api/controllers/console/datasets/datasets_segments.py +++ b/api/controllers/console/datasets/datasets_segments.py @@ -368,9 +368,9 @@ def post(self, dataset_id, document_id): result = [] for index, row in df.iterrows(): if document.doc_form == "qa_model": - data = {"content": row[0], "answer": row[1]} + data = {"content": row.iloc[0], "answer": row.iloc[1]} else: - data = {"content": row[0]} + data = {"content": row.iloc[0]} result.append(data) if len(result) == 0: raise ValueError("The CSV file is empty.") diff --git a/api/core/rag/index_processor/processor/qa_index_processor.py b/api/core/rag/index_processor/processor/qa_index_processor.py index 58b50a9fcbc67e..0055625e136c79 100644 --- a/api/core/rag/index_processor/processor/qa_index_processor.py +++ b/api/core/rag/index_processor/processor/qa_index_processor.py @@ -112,7 +112,7 @@ def format_by_template(self, file: FileStorage, **kwargs) -> list[Document]: df = pd.read_csv(file) text_docs = [] for index, row in df.iterrows(): - data = Document(page_content=row[0], metadata={"answer": row[1]}) + data = Document(page_content=row.iloc[0], metadata={"answer": row.iloc[1]}) text_docs.append(data) if len(text_docs) == 0: raise ValueError("The CSV file is empty.") diff --git a/api/services/annotation_service.py b/api/services/annotation_service.py index a946405c955cec..45ec1e9b5aec61 100644 --- a/api/services/annotation_service.py +++ b/api/services/annotation_service.py @@ -286,7 +286,7 @@ def batch_import_app_annotations(cls, app_id, file: FileStorage) -> dict: df = pd.read_csv(file) result = [] for index, row in df.iterrows(): - content = {"question": row[0], "answer": row[1]} + content = {"question": row.iloc[0], "answer": row.iloc[1]} result.append(content) if len(result) == 0: raise ValueError("The CSV file is empty.") From 5f20c14f071c7638f6a3d2bb4d69a0e190f87e99 Mon Sep 17 00:00:00 2001 From: CN-P5 Date: Sat, 11 Jan 2025 21:43:04 +0800 Subject: [PATCH 2/2] Fixed Q&A Format import of the knowledge base failed #12637 Fixed Q&A Format import of the knowledge base failed #12637 --- api/tasks/batch_create_segment_to_index_task.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/api/tasks/batch_create_segment_to_index_task.py b/api/tasks/batch_create_segment_to_index_task.py index 05a0f0a407f5f3..dbef6b708e4f29 100644 --- a/api/tasks/batch_create_segment_to_index_task.py +++ b/api/tasks/batch_create_segment_to_index_task.py @@ -77,8 +77,8 @@ def batch_create_segment_to_index_task( index_node_id=doc_id, index_node_hash=segment_hash, position=max_position + 1 if max_position else 1, - content=content, - word_count=len(content), + content=content_str, + word_count=len(content_str), tokens=tokens, created_by=user_id, indexing_at=datetime.datetime.now(datetime.UTC).replace(tzinfo=None),