From 88dec6ef2b33f03302f1a9405c3662b85aef1c6b Mon Sep 17 00:00:00 2001 From: Zixuan Cheng <61724187+Theysua@users.noreply.github.com> Date: Thu, 24 Oct 2024 07:13:06 -0700 Subject: [PATCH] Added description for .ppt, specify the reason for unstructured.io (#9452) Co-authored-by: crazywoola <427733928@qq.com> --- api/core/rag/extractor/extract_processor.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/api/core/rag/extractor/extract_processor.py b/api/core/rag/extractor/extract_processor.py index 6a23f3cfefd051..a0b1aa4cefbd1f 100644 --- a/api/core/rag/extractor/extract_processor.py +++ b/api/core/rag/extractor/extract_processor.py @@ -121,6 +121,8 @@ def extract( extractor = UnstructuredEmailExtractor(file_path, unstructured_api_url, unstructured_api_key) elif file_extension == ".ppt": extractor = UnstructuredPPTExtractor(file_path, unstructured_api_url, unstructured_api_key) + # You must first specify the API key + # because unstructured_api_key is necessary to parse .ppt documents elif file_extension == ".pptx": extractor = UnstructuredPPTXExtractor(file_path, unstructured_api_url, unstructured_api_key) elif file_extension == ".xml":