diff --git a/pipelines/examples/FAQ/dense_faq_example.py b/pipelines/examples/FAQ/dense_faq_example.py index 73decdf5a5fa..9289ae4accaf 100644 --- a/pipelines/examples/FAQ/dense_faq_example.py +++ b/pipelines/examples/FAQ/dense_faq_example.py @@ -12,15 +12,17 @@ # See the License for the specific language governing permissions and # limitations under the License. -### 城市百科知识智能问答系统 +# 城市百科知识智能问答系统 import argparse -import logging import os -import paddle from pipelines.document_stores import FAISSDocumentStore -from pipelines.utils import convert_files_to_dicts, fetch_archive_from_http, print_documents -from pipelines.nodes import ErnieRanker, DensePassageRetriever +from pipelines.nodes import DensePassageRetriever, ErnieRanker +from pipelines.utils import ( + convert_files_to_dicts, + fetch_archive_from_http, + print_documents, +) # yapf: disable parser = argparse.ArgumentParser() @@ -82,10 +84,10 @@ def dense_faq_pipeline(): # save index document_store.save(args.index_name) - ### Ranker + # Ranker ranker = ErnieRanker(model_name_or_path="rocketqa-zh-dureader-cross-encoder", use_gpu=use_gpu) - # ### Pipeline + # Pipeline from pipelines import SemanticSearchPipeline pipe = SemanticSearchPipeline(retriever, ranker) diff --git a/pipelines/examples/document-intelligence/docprompt_example.py b/pipelines/examples/document-intelligence/docprompt_example.py index 8618282a3f66..1522e1b96fe4 100644 --- a/pipelines/examples/document-intelligence/docprompt_example.py +++ b/pipelines/examples/document-intelligence/docprompt_example.py @@ -13,12 +13,9 @@ # limitations under the License. import argparse -import logging -import os -import paddle -from pipelines.nodes import DocOCRProcessor, DocPrompter from pipelines import DocPipeline +from pipelines.nodes import DocOCRProcessor, DocPrompter # yapf: disable parser = argparse.ArgumentParser() diff --git a/pipelines/examples/question-answering/dense_qa_example.py b/pipelines/examples/question-answering/dense_qa_example.py index e0a6e346a35b..e14c98bd8fc4 100644 --- a/pipelines/examples/question-answering/dense_qa_example.py +++ b/pipelines/examples/question-answering/dense_qa_example.py @@ -12,15 +12,17 @@ # See the License for the specific language governing permissions and # limitations under the License. -### 城市百科知识智能问答系统 +# 城市百科知识智能问答系统 import argparse -import logging import os -import paddle from pipelines.document_stores import FAISSDocumentStore -from pipelines.utils import convert_files_to_dicts, fetch_archive_from_http, print_answers -from pipelines.nodes import ErnieReader, ErnieRanker, DensePassageRetriever +from pipelines.nodes import DensePassageRetriever, ErnieRanker, ErnieReader +from pipelines.utils import ( + convert_files_to_dicts, + fetch_archive_from_http, + print_answers, +) # yapf: disable parser = argparse.ArgumentParser() @@ -82,14 +84,14 @@ def dense_qa_pipeline(): # save index document_store.save(args.index_name) - ### Ranker + # Ranker ranker = ErnieRanker(model_name_or_path="rocketqa-zh-dureader-cross-encoder", use_gpu=use_gpu) reader = ErnieReader( model_name_or_path="ernie-gram-zh-finetuned-dureader-robust", use_gpu=use_gpu, num_processes=1 ) - # ### Pipeline + # Pipeline from pipelines import ExtractiveQAPipeline pipe = ExtractiveQAPipeline(reader, ranker, retriever) diff --git a/pipelines/examples/text_to_image/text_to_image_example.py b/pipelines/examples/text_to_image/text_to_image_example.py index 23541e428757..ef6c96263d72 100644 --- a/pipelines/examples/text_to_image/text_to_image_example.py +++ b/pipelines/examples/text_to_image/text_to_image_example.py @@ -12,12 +12,10 @@ # See the License for the specific language governing permissions and # limitations under the License. -import os import argparse -import paddle -from pipelines.nodes import ErnieTextToImageGenerator from pipelines import TextToImagePipeline +from pipelines.nodes import ErnieTextToImageGenerator # yapf: disable parser = argparse.ArgumentParser() @@ -26,8 +24,7 @@ parser.add_argument("--prompt_text", default='宁静的小镇', type=str, help="The prompt_text.") parser.add_argument("--output_dir", default='ernievilg_output', type=str, help="The output path.") parser.add_argument("--style", default='探索无限', type=str, help="The style text.") -parser.add_argument("--size", default='1024*1024', - choices=['1024*1024', '1024*1536', '1536*1024'], help="Size of the generation images") +parser.add_argument("--size", default='1024*1024', choices=['1024*1024', '1024*1536', '1536*1024'], help="Size of the generation images") parser.add_argument("--topk", default=5, type=int, help="The top k images.") args = parser.parse_args() # yapf: enable @@ -47,6 +44,7 @@ def text_to_image(): } }, ) + print(prediction) pipe.save_to_yaml("text_to_image.yaml") diff --git a/pipelines/examples/unsupervised-question-answering/offline_question_answer_pairs_generation.py b/pipelines/examples/unsupervised-question-answering/offline_question_answer_pairs_generation.py index 84b081fd5bf5..ab8f3f7f2f45 100644 --- a/pipelines/examples/unsupervised-question-answering/offline_question_answer_pairs_generation.py +++ b/pipelines/examples/unsupervised-question-answering/offline_question_answer_pairs_generation.py @@ -13,16 +13,10 @@ # limitations under the License. import argparse -import logging import os -from pprint import pprint -import paddle from pipelines.nodes import AnswerExtractor, QAFilter, QuestionGenerator -from pipelines.nodes import ErnieRanker, DensePassageRetriever -from pipelines.document_stores import FAISSDocumentStore -from pipelines.utils import convert_files_to_dicts, fetch_archive_from_http, print_documents -from pipelines.pipelines import QAGenerationPipeline, SemanticSearchPipeline +from pipelines.pipelines import QAGenerationPipeline # yapf: disable parser = argparse.ArgumentParser() diff --git a/pipelines/examples/unsupervised-question-answering/unsupervised_question_answering_example.py b/pipelines/examples/unsupervised-question-answering/unsupervised_question_answering_example.py index bbfcc06cee91..34c8f2a8723e 100644 --- a/pipelines/examples/unsupervised-question-answering/unsupervised_question_answering_example.py +++ b/pipelines/examples/unsupervised-question-answering/unsupervised_question_answering_example.py @@ -13,16 +13,19 @@ # limitations under the License. import argparse -import logging import os from pprint import pprint -import paddle -from pipelines.nodes import AnswerExtractor, QAFilter, QuestionGenerator -from pipelines.nodes import ErnieRanker, DensePassageRetriever from pipelines.document_stores import FAISSDocumentStore -from pipelines.utils import convert_files_to_dicts, fetch_archive_from_http, print_documents +from pipelines.nodes import ( + AnswerExtractor, + DensePassageRetriever, + ErnieRanker, + QAFilter, + QuestionGenerator, +) from pipelines.pipelines import QAGenerationPipeline, SemanticSearchPipeline +from pipelines.utils import convert_files_to_dicts, print_documents # yapf: disable parser = argparse.ArgumentParser() @@ -84,7 +87,7 @@ def dense_faq_pipeline(): # save index document_store.save(args.index_name) - ### Ranker + # Ranker ranker = ErnieRanker(model_name_or_path="rocketqa-zh-dureader-cross-encoder", use_gpu=use_gpu) pipe = SemanticSearchPipeline(retriever, ranker) diff --git a/pipelines/pipelines/utils/export_utils.py b/pipelines/pipelines/utils/export_utils.py index 1003bef94429..36ed698b406a 100644 --- a/pipelines/pipelines/utils/export_utils.py +++ b/pipelines/pipelines/utils/export_utils.py @@ -16,7 +16,7 @@ import logging import pprint from collections import defaultdict -from typing import Any, Dict, List, Optional +from typing import Optional import pandas as pd @@ -161,7 +161,7 @@ def export_answers_to_csv(agg_results: list, output_file): assert "query" in agg_results[0], f"Wrong format used for {agg_results[0]}" assert "answers" in agg_results[0], f"Wrong format used for {agg_results[0]}" - data = {} # type: Dict[str, List[Any]] + data = {} data["query"] = [] data["prediction"] = [] data["prediction_rank"] = [] @@ -193,7 +193,7 @@ def convert_labels_to_squad(labels_file: str): for label in labels: labels_grouped_by_documents[label["document_id"]].append(label) - labels_in_squad_format = {"data": []} # type: Dict[str, Any] + labels_in_squad_format = {"data": []} for document_id, labels in labels_grouped_by_documents.items(): qas = [] for label in labels: diff --git a/pipelines/rest_api/application.py b/pipelines/rest_api/application.py index 37287191504c..3a01be82490d 100644 --- a/pipelines/rest_api/application.py +++ b/pipelines/rest_api/application.py @@ -13,29 +13,29 @@ # See the License for the specific language governing permissions and # limitations under the License. -import sys import logging - -sys.path.append(".") - -logging.basicConfig(format="%(asctime)s %(message)s", datefmt="%m/%d/%Y %I:%M:%S %p") -logger = logging.getLogger(__name__) -logging.getLogger("elasticsearch").setLevel(logging.WARNING) -logging.getLogger("pipelines").setLevel(logging.INFO) +import sys import uvicorn from fastapi import FastAPI, HTTPException -from fastapi.routing import APIRoute from fastapi.openapi.utils import get_openapi +from fastapi.routing import APIRoute from starlette.middleware.cors import CORSMiddleware +# flake8: noqa +sys.path.append(".") +from rest_api.config import ROOT_PATH from rest_api.controller.errors.http_error import http_error_handler -from rest_api.config import ROOT_PATH, PIPELINE_YAML_PATH from rest_api.controller.router import router as api_router +logging.basicConfig(format="%(asctime)s %(message)s", datefmt="%m/%d/%Y %I:%M:%S %p") +logger = logging.getLogger(__name__) +logging.getLogger("elasticsearch").setLevel(logging.WARNING) +logging.getLogger("pipelines").setLevel(logging.INFO) + try: from pipelines import __version__ as pipelines_version -except: +except Exception: # For development pipelines_version = "0.0.0" diff --git a/pipelines/ui/webapp_docprompt_gradio.py b/pipelines/ui/webapp_docprompt_gradio.py index 35cdfcddd242..aac83dd48bed 100644 --- a/pipelines/ui/webapp_docprompt_gradio.py +++ b/pipelines/ui/webapp_docprompt_gradio.py @@ -14,19 +14,17 @@ # See the License for the specific language governing permissions and # limitations under the License. -import os -import json +import argparse import base64 -from io import BytesIO -from PIL import Image import traceback -import argparse +from io import BytesIO -import requests -import numpy as np -import gradio as gr -import fitz import cv2 +import fitz +import gradio as gr +import numpy as np +import requests +from PIL import Image fitz_tools = fitz.Tools() @@ -171,7 +169,7 @@ def read_content(file_path: str) -> str: padding-bottom: 2px !important; padding-left: 8px !important; padding-right: 8px !important; - margin-top: 10px; + margin-top: 10px; } .gradio-container .gr-button-primary { background: linear-gradient(180deg, #CDF9BE 0%, #AFF497 100%); diff --git a/pipelines/ui/webapp_faq.py b/pipelines/ui/webapp_faq.py index b91fa5f683d4..4fb1478df27a 100644 --- a/pipelines/ui/webapp_faq.py +++ b/pipelines/ui/webapp_faq.py @@ -13,18 +13,16 @@ # See the License for the specific language governing permissions and # limitations under the License. +import logging import os import sys -import logging -import pandas as pd from json import JSONDecodeError from pathlib import Path + +import pandas as pd import streamlit as st -from annotated_text import annotation from markdown import markdown - -sys.path.append("ui") -from utils import pipelines_is_ready, semantic_search, send_feedback, upload_doc, pipelines_version, get_backlink +from utils import pipelines_is_ready, semantic_search, upload_doc # Adjust to a question that you would like users to see in the search bar when they load the UI: DEFAULT_QUESTION_AT_STARTUP = os.getenv("DEFAULT_QUESTION_AT_STARTUP", "如何办理企业养老保险?") @@ -58,7 +56,7 @@ def upload(): for data_file in data_files: # Upload file if data_file and data_file.name not in st.session_state.upload_files["uploaded_files"]: - raw_json = upload_doc(data_file) + upload_doc(data_file) st.session_state.upload_files["uploaded_files"].append(data_file.name) # Save the uploaded files st.session_state.upload_files["uploaded_files"] = list(set(st.session_state.upload_files["uploaded_files"])) @@ -115,16 +113,11 @@ def reset_results(*args): for data_file in st.session_state.upload_files["uploaded_files"]: st.sidebar.write(str(data_file) + "    ✅ ") - hs_version = "" - try: - hs_version = f" (v{pipelines_version()})" - except Exception: - pass # Load csv into pandas dataframe try: df = pd.read_csv(EVAL_LABELS, sep=";") except Exception: - st.error(f"The eval file was not found.") + st.error("The eval file was not found.") sys.exit(f"The eval file was not found under `{EVAL_LABELS}`.") # Search bar @@ -181,7 +174,7 @@ def reset_results(*args): st.session_state.results, st.session_state.raw_json = semantic_search( question, top_k_reader=top_k_reader, top_k_retriever=top_k_retriever ) - except JSONDecodeError as je: + except JSONDecodeError: st.error("👓    An error occurred reading the results. Is the document store working?") return except Exception as e: diff --git a/pipelines/ui/webapp_question_answering.py b/pipelines/ui/webapp_question_answering.py index 5b6ed006963f..0dada51fcec1 100644 --- a/pipelines/ui/webapp_question_answering.py +++ b/pipelines/ui/webapp_question_answering.py @@ -13,16 +13,17 @@ # See the License for the specific language governing permissions and # limitations under the License. +import logging import os import sys -import logging -import pandas as pd from json import JSONDecodeError from pathlib import Path + +import pandas as pd import streamlit as st from annotated_text import annotation from markdown import markdown -from ui.utils import pipelines_is_ready, query, send_feedback, upload_doc, pipelines_version, get_backlink +from ui.utils import get_backlink, pipelines_is_ready, query, upload_doc # Adjust to a question that you would like users to see in the search bar when they load the UI: DEFAULT_QUESTION_AT_STARTUP = os.getenv("DEFAULT_QUESTION_AT_STARTUP", "中国的首都在哪里?") @@ -54,7 +55,7 @@ def upload(): for data_file in data_files: # Upload file if data_file and data_file.name not in st.session_state.upload_files["uploaded_files"]: - raw_json = upload_doc(data_file) + upload_doc(data_file) st.session_state.upload_files["uploaded_files"].append(data_file.name) # Save the uploaded files st.session_state.upload_files["uploaded_files"] = list(set(st.session_state.upload_files["uploaded_files"])) @@ -109,7 +110,7 @@ def reset_results(*args): try: df = pd.read_csv(EVAL_LABELS, sep=";") except Exception: - st.error(f"The eval file was not found.") + st.error("The eval file was not found.") sys.exit(f"The eval file was not found under `{EVAL_LABELS}`.") # File upload block @@ -122,11 +123,6 @@ def reset_results(*args): st.sidebar.button("文件上传", on_click=upload) for data_file in st.session_state.upload_files["uploaded_files"]: st.sidebar.write(str(data_file) + "    ✅ ") - hs_version = "" - try: - hs_version = f" (v{pipelines_version()})" - except Exception: - pass # Search bar question = st.text_input( @@ -185,7 +181,7 @@ def reset_results(*args): st.session_state.results, st.session_state.raw_json = query( question, top_k_reader=top_k_reader, top_k_ranker=top_k_ranker, top_k_retriever=top_k_retriever ) - except JSONDecodeError as je: + except JSONDecodeError: st.error("👓    An error occurred reading the results. Is the document store working?") return except Exception as e: diff --git a/pipelines/ui/webapp_semantic_search.py b/pipelines/ui/webapp_semantic_search.py index 918acb192526..17f0bd18cc08 100644 --- a/pipelines/ui/webapp_semantic_search.py +++ b/pipelines/ui/webapp_semantic_search.py @@ -13,20 +13,16 @@ # See the License for the specific language governing permissions and # limitations under the License. +import logging import os import sys -import logging -import pandas as pd from json import JSONDecodeError from pathlib import Path + +import pandas as pd import streamlit as st -from annotated_text import annotation from markdown import markdown -import socket - -sys.path.append("ui") -from utils import pipelines_is_ready, semantic_search, send_feedback, upload_doc, pipelines_version, get_backlink -from utils import pipelines_files +from utils import pipelines_files, pipelines_is_ready, semantic_search, upload_doc # Adjust to a question that you would like users to see in the search bar when they load the UI: DEFAULT_QUESTION_AT_STARTUP = os.getenv("DEFAULT_QUESTION_AT_STARTUP", "衡量酒水的价格的因素有哪些?") @@ -57,7 +53,7 @@ def upload(): for data_file in data_files: # Upload file if data_file and data_file.name not in st.session_state.upload_files["uploaded_files"]: - raw_json = upload_doc(data_file) + upload_doc(data_file) st.session_state.upload_files["uploaded_files"].append(data_file.name) # Save the uploaded files st.session_state.upload_files["uploaded_files"] = list(set(st.session_state.upload_files["uploaded_files"])) @@ -114,16 +110,11 @@ def reset_results(*args): for data_file in st.session_state.upload_files["uploaded_files"]: st.sidebar.write(str(data_file) + "    ✅ ") - hs_version = "" - try: - hs_version = f" (v{pipelines_version()})" - except Exception: - pass # Load csv into pandas dataframe try: df = pd.read_csv(EVAL_LABELS, sep=";") except Exception: - st.error(f"The eval file was not found.") + st.error("The eval file was not found.") sys.exit(f"The eval file was not found under `{EVAL_LABELS}`.") # Search bar @@ -179,7 +170,7 @@ def reset_results(*args): st.session_state.results, st.session_state.raw_json = semantic_search( question, top_k_reader=top_k_reader, top_k_retriever=top_k_retriever ) - except JSONDecodeError as je: + except JSONDecodeError: st.error("👓    An error occurred reading the results. Is the document store working?") return except Exception as e: diff --git a/pipelines/ui/webapp_text_to_image.py b/pipelines/ui/webapp_text_to_image.py index 8946f9faf8d5..5b4a6ce7129a 100644 --- a/pipelines/ui/webapp_text_to_image.py +++ b/pipelines/ui/webapp_text_to_image.py @@ -12,13 +12,10 @@ # See the License for the specific language governing permissions and # limitations under the License. -import re -import os import argparse -from PIL import Image -from utils import text_to_image_search import gradio as gr +from utils import text_to_image_search # yapf: disable parser = argparse.ArgumentParser() diff --git a/pipelines/ui/webapp_unsupervised_question_answering.py b/pipelines/ui/webapp_unsupervised_question_answering.py index a850015a24c4..4d42e9492986 100644 --- a/pipelines/ui/webapp_unsupervised_question_answering.py +++ b/pipelines/ui/webapp_unsupervised_question_answering.py @@ -13,26 +13,21 @@ # See the License for the specific language governing permissions and # limitations under the License. +import logging import os import sys -import logging -import pandas as pd from json import JSONDecodeError from pathlib import Path + +import pandas as pd import streamlit as st -from annotated_text import annotation from markdown import markdown - from ui.utils import ( + file_upload_qa_generate, + offline_ann, pipelines_is_ready, semantic_search, - send_feedback, - upload_doc, - file_upload_qa_generate, - pipelines_version, - get_backlink, text_to_qa_pair_search, - offline_ann, ) # Adjust to a question that you would like users to see in the search bar when they load the UI: @@ -77,8 +72,7 @@ def upload(): for data_file in data_files: # Upload file if data_file and data_file.name not in st.session_state.upload_files["uploaded_files"]: - # raw_json = upload_doc(data_file) - raw_json = file_upload_qa_generate(data_file) + file_upload_qa_generate(data_file) st.session_state.upload_files["uploaded_files"].append(data_file.name) # Save the uploaded files st.session_state.upload_files["uploaded_files"] = list(set(st.session_state.upload_files["uploaded_files"])) @@ -148,19 +142,14 @@ def reset_results_qag(*args): for data_file in st.session_state.upload_files["uploaded_files"]: st.sidebar.write(str(data_file) + "    ✅ ") - hs_version = "" - try: - hs_version = f" (v{pipelines_version()})" - except Exception: - pass # Load csv into pandas dataframe try: df = pd.read_csv(EVAL_LABELS, sep=";") except Exception: - st.error(f"The eval file was not found.") + st.error("The eval file was not found.") sys.exit(f"The eval file was not found under `{EVAL_LABELS}`.") - ## QA pairs generation + # QA pairs generation # Search bar st.write("### 问答对生成:") context = st.text_input( @@ -213,7 +202,7 @@ def reset_results_qag(*args): st.session_state.qag_results, st.session_state.qag_raw_json = text_to_qa_pair_search( context, is_filter=True if is_filter == "是" else False ) - except JSONDecodeError as je: + except JSONDecodeError: st.error("👓    An error occurred reading the results. Is the document store working?") return except Exception as e: @@ -229,9 +218,9 @@ def reset_results_qag(*args): for count, result in enumerate(st.session_state.qag_results): context = result["context"] synthetic_answer = result["synthetic_answer"] - synthetic_answer_probability = result["synthetic_answer_probability"] + # synthetic_answer_probability = result["synthetic_answer_probability"] synthetic_question = result["synthetic_question"] - synthetic_question_probability = result["synthetic_question_probability"] + # synthetic_question_probability = result["synthetic_question_probability"] st.write( markdown(context), unsafe_allow_html=True, @@ -247,7 +236,7 @@ def reset_results_qag(*args): st.write("___") - ## QA search + # QA search # Search bar st.write("### 问答检索:") question = st.text_input( @@ -303,7 +292,7 @@ def reset_results_qag(*args): st.session_state.results, st.session_state.raw_json = semantic_search( question, top_k_reader=top_k_reader, top_k_retriever=top_k_retriever ) - except JSONDecodeError as je: + except JSONDecodeError: st.error("👓    An error occurred reading the results. Is the document store working?") return except Exception as e: