From 9d617af20f9cc9d1f454b7fbca1f19e676fbc8f1 Mon Sep 17 00:00:00 2001
From: xuyang1 <xuyang1@microsoft.com>
Date: Mon, 3 Jun 2024 10:05:53 +0000
Subject: [PATCH 1/3] run the code

---
 .github/workflows/ci.yml                      |   6 -
 Makefile                                      |   8 +-
 pyproject.toml                                |   3 +-
 rdagent/app/CI/run.py                         | 107 ++++++++++++------
 .../factor_extract_and_implement.py           |  11 +-
 rdagent/core/conf.py                          |   2 +-
 rdagent/core/evolving_framework.py            |   3 +-
 rdagent/document_process/document_analysis.py |  19 +---
 .../evolving/evaluators.py                    |  12 +-
 .../evolving/evolving_strategy.py             |  16 +--
 .../factor_implementation_evolving_cli.py     |  31 +++--
 .../evolving/knowledge_management.py          |  44 +++----
 .../share_modules/evaluator.py                |  14 +--
 .../share_modules/factor.py                   |  13 ++-
 ...onf.py => factor_implementation_config.py} |   2 +-
 ...tils.py => factor_implementation_utils.py} |   2 +-
 rdagent/knowledge_management/graph.py         |  34 ++++--
 rdagent/oai/llm_utils.py                      |   9 +-
 test/oai/test_completion.py                   |  48 ++++++++
 19 files changed, 241 insertions(+), 143 deletions(-)
 rename rdagent/factor_implementation/share_modules/{conf.py => factor_implementation_config.py} (97%)
 rename rdagent/factor_implementation/share_modules/{utils.py => factor_implementation_utils.py} (94%)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index f94fa6bf..a18f9cf3 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -20,22 +20,16 @@ jobs:
       - run: env | sort
       - run: make dev
       - env:
-          AZURE_DOCUMENT_INTELLIGENCE_ENDPOINT: ${{ secrets.AZURE_DOCUMENT_INTELLIGENCE_ENDPOINT }}
-          AZURE_DOCUMENT_INTELLIGENCE_KEY: ${{ secrets.AZURE_DOCUMENT_INTELLIGENCE_KEY }}
           CHAT_AZURE_API_BASE: ${{ secrets.CHAT_AZURE_API_BASE }}
           CHAT_AZURE_API_VERSION: ${{ secrets.CHAT_AZURE_API_VERSION }}
           CHAT_MAX_TOKENS: ${{ secrets.CHAT_MAX_TOKENS }}
           CHAT_MODEL: ${{ secrets.CHAT_MODEL }}
           CHAT_OPENAI_API_KEY: ${{ secrets.CHAT_OPENAI_API_KEY }}
           CHAT_TEMPERATURE: ${{ secrets.CHAT_TEMPERATURE }}
-          CONTINOUS_MODE: ${{ secrets.CONTINOUS_MODE }}
           EMBEDDING_AZURE_API_BASE: ${{ secrets.CHAT_AZURE_API_BASE }}
           EMBEDDING_AZURE_API_VERSION: ${{ secrets.CHAT_AZURE_API_VERSION }}
           EMBEDDING_MODEL: ${{ secrets.EMBEDDING_MODEL }}
           EMBEDDING_OPENAI_API_KEY: ${{ secrets.CHAT_OPENAI_API_KEY }}
-          MAX_RETRY: ${{ secrets.MAX_RETRY }}
-          RETRY_WAIT_SECONDS: ${{ secrets.RETRY_WAIT_SECONDS }}
-          USE_AZURE: ${{ secrets.USE_AZURE }}
         name: lint test docs and build
         run: make lint test docs build
     strategy:
diff --git a/Makefile b/Makefile
index a0726a32..cbd5b8e8 100644
--- a/Makefile
+++ b/Makefile
@@ -81,11 +81,11 @@ constraints: deepclean
 
 # Check lint with black.
 black:
-	$(PIPRUN) python -m black --check . --exclude finco --extend-exclude test/scripts --extend-exclude git_ignore_folder -l 120
+	$(PIPRUN) python -m black --check . --extend-exclude test/scripts --extend-exclude git_ignore_folder -l 120
 
 # Check lint with isort.
 isort:
-	$(PIPRUN) python -m isort --check . -s FinCo -s finco -s git_ignore_folder -s test/scripts
+	$(PIPRUN) python -m isort --check . -s git_ignore_folder -s test/scripts
 
 # Check lint with mypy.
 mypy:
@@ -93,14 +93,14 @@ mypy:
 
 # Check lint with ruff.
 ruff:
-	$(PIPRUN) ruff check .  --exclude FinCo,finco,rdagent/scripts,test/scripts,git_ignore_folder
+	$(PIPRUN) ruff check .  --exclude FinCo,finco,rdagent/scripts,test/scripts,git_ignore_folder --line-length 120
 
 # Check lint with toml-sort.
 toml-sort:
 	$(PIPRUN) toml-sort --check pyproject.toml
 
 # Check lint with all linters.
-lint: mypy ruff toml-sort
+lint: black isort mypy ruff toml-sort
 
 # Run pre-commit with autofix against all files.
 pre-commit:
diff --git a/pyproject.toml b/pyproject.toml
index 00634562..da4392e3 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -4,6 +4,7 @@ requires = [
   "setuptools",
   "setuptools-scm",
 ]
+root= "rdagent"
 
 [project]
 authors = [
@@ -33,8 +34,6 @@ name = "rdagent"
 readme = "README.md"
 requires-python = ">=3.8"
 
-[project.scripts]
-rdagent-cli = "rdagent.cli:app"
 
 [project.urls]
 homepage = "https://github.com/microsoft/RD-Agent/"
diff --git a/rdagent/app/CI/run.py b/rdagent/app/CI/run.py
index b47bf15f..2a89835c 100644
--- a/rdagent/app/CI/run.py
+++ b/rdagent/app/CI/run.py
@@ -11,6 +11,14 @@
 from pathlib import Path
 from typing import Dict, List, Tuple, Union, cast
 
+from rich import print
+from rich.panel import Panel
+from rich.prompt import Prompt
+from rich.rule import Rule
+from rich.syntax import Syntax
+from rich.table import Table
+from rich.text import Text
+
 from rdagent.core.evolving_framework import (
     Evaluator,
     EvoAgent,
@@ -21,13 +29,6 @@
     Knowledge,
 )
 from rdagent.oai.llm_utils import APIBackend
-from rich import print
-from rich.panel import Panel
-from rich.prompt import Prompt
-from rich.rule import Rule
-from rich.syntax import Syntax
-from rich.table import Table
-from rich.text import Text
 
 from .prompts import (
     linting_system_prompt_template,
@@ -64,7 +65,6 @@ def __init__(self, path: Union[Path, str]):
         self.path = Path(path)
         self.load()
 
-
     def load(self) -> None:
         code = self.path.read_text(encoding="utf-8")
         self.code_lines = code.split("\n")
@@ -76,32 +76,31 @@ def load(self) -> None:
         for i, code_line in enumerate(self.code_lines):
             self.code_lines_with_lineno.append(f"{i+1: >{self.lineno_width}} | {code_line}")
 
-
-    def get(self, start = 0, end = None, add_line_number: bool = False, return_list: bool = False) -> Union[List[str], str]:
+    def get(self, start=0, end=None, add_line_number: bool = False, return_list: bool = False) -> Union[List[str], str]:
         start -= 1
-        if start < 0: start = 0
-        end = self.lineno if end is None else end-1
+        if start < 0:
+            start = 0
+        end = self.lineno if end is None else end - 1
 
         res = self.code_lines_with_lineno[start:end] if add_line_number else self.code_lines[start:end]
 
         return res if return_list else "\n".join(res)
 
-
     def apply_changes(self, changes: List[Tuple[int, int, str]]) -> None:
         offset = 0
         for start, end, code in changes:
             start -= 1
-            if start < 0: start = 0
+            if start < 0:
+                start = 0
             end -= 1
 
             new_code = code.split("\n")
-            self.code_lines[start+offset:end+offset] = new_code
+            self.code_lines[start + offset : end + offset] = new_code
             offset += len(new_code) - (end - start)
 
         self.path.write_text("\n".join(self.code_lines), encoding="utf-8")
         self.load()
 
-
     def __str__(self):
         return f"{self.path}"
 
@@ -151,6 +150,7 @@ class RuffRule:
         "preview": false
     }
     """
+
     name: str
     code: str
     linter: str
@@ -172,7 +172,6 @@ def __init__(self, command: str = None):
         else:
             self.command = command
 
-
     def explain_rule(self, error_code: str) -> RuffRule:
         explain_command = "ruff rule {error_code} --output-format json"
         try:
@@ -186,7 +185,6 @@ def explain_rule(self, error_code: str) -> RuffRule:
 
         return json.loads(out.decode())
 
-
     def evaluate(self, evo: Repo, **kwargs) -> CIFeedback:
         """Simply run ruff to get the feedbacks."""
         try:
@@ -217,19 +215,21 @@ def evaluate(self, evo: Repo, **kwargs) -> CIFeedback:
         errors = defaultdict(list)
         for match in matches:
             raw_str, file_path, line_number, column_number, error_code, error_message, error_hint = match
-            error = CIError(raw_str=raw_str,
-                            file_path=file_path,
-                            line=int(line_number),
-                            column=int(column_number),
-                            code=error_code,
-                            msg=error_message,
-                            hint=error_hint)
+            error = CIError(
+                raw_str=raw_str,
+                file_path=file_path,
+                line=int(line_number),
+                column=int(column_number),
+                code=error_code,
+                msg=error_message,
+                hint=error_hint,
+            )
             errors[file_path].append(error)
 
         return CIFeedback(errors=errors)
 
-class MypyEvaluator(Evaluator):
 
+class MypyEvaluator(Evaluator):
     def __init__(self, command: str = None):
         if command is None:
             self.command = "mypy . --explicit-package-bases"
@@ -251,7 +251,6 @@ def evaluate(self, evo: Repo, **kwargs) -> CIFeedback:
 
 
 class CIEvoStr(EvolvingStrategy):
-
     def evolve(
         self,
         evo: Repo,
@@ -302,9 +301,23 @@ def evolve(
 
                     errors_str = "\n".join([f"{error.raw_str}\n" for error in group])
 
-                    print(Panel.fit(Syntax("\n".join([f"{error.line}: {error.msg}" for error in group]), lexer="python", background_color="default"), title=f"{len(group)} Errors"))
+                    print(
+                        Panel.fit(
+                            Syntax(
+                                "\n".join([f"{error.line}: {error.msg}" for error in group]),
+                                lexer="python",
+                                background_color="default",
+                            ),
+                            title=f"{len(group)} Errors",
+                        )
+                    )
                     # print(f"[bold yellow]original code:[/bold yellow]\n\n{code_snippet_with_lineno}")
-                    print(Panel.fit(Syntax(code_snippet_with_lineno, lexer="python", background_color="default"), title="Original Code"))
+                    print(
+                        Panel.fit(
+                            Syntax(code_snippet_with_lineno, lexer="python", background_color="default"),
+                            title="Original Code",
+                        )
+                    )
                     user_prompt = session_normal_template.format(
                         code=code_snippet_with_lineno,
                         lint_info=errors_str,
@@ -321,10 +334,14 @@ def evolve(
                         table = Table(show_header=False, box=None)
                         table.add_column()
                         for i in diff:
-                            if i.startswith("+"): table.add_row(Text(i, style="green"))
-                            elif i.startswith("-"): table.add_row(Text(i, style="red"))
-                            elif i.startswith("?"): table.add_row(Text(i, style="yellow"))
-                            else: table.add_row(Syntax(i, lexer="python", background_color="default"))
+                            if i.startswith("+"):
+                                table.add_row(Text(i, style="green"))
+                            elif i.startswith("-"):
+                                table.add_row(Text(i, style="red"))
+                            elif i.startswith("?"):
+                                table.add_row(Text(i, style="yellow"))
+                            else:
+                                table.add_row(Syntax(i, lexer="python", background_color="default"))
                         print(Panel.fit(table, title="Repair Status"))
 
                         operation = input("Input your operation: ")
@@ -407,13 +424,29 @@ def evolve(
 
     total_errors_count = skipped_errors_count + directly_fixed_errors_count + manually_fixed_errors_count
     table.add_row("Total Errors", "", str(total_errors_count), "")
-    table.add_row("Skipped Errors", skipped_errors_statistics, str(skipped_errors_count), f"{skipped_errors_count / total_errors_count:.2%}")
-    table.add_row("Directly Fixed Errors", directly_fixed_errors_statistics, str(directly_fixed_errors_count), f"{directly_fixed_errors_count / total_errors_count:.2%}")
-    table.add_row("Manually Fixed Errors", manually_fixed_errors_statistics, str(manually_fixed_errors_count), f"{manually_fixed_errors_count / total_errors_count:.2%}")
+    table.add_row(
+        "Skipped Errors",
+        skipped_errors_statistics,
+        str(skipped_errors_count),
+        f"{skipped_errors_count / total_errors_count:.2%}",
+    )
+    table.add_row(
+        "Directly Fixed Errors",
+        directly_fixed_errors_statistics,
+        str(directly_fixed_errors_count),
+        f"{directly_fixed_errors_count / total_errors_count:.2%}",
+    )
+    table.add_row(
+        "Manually Fixed Errors",
+        manually_fixed_errors_statistics,
+        str(manually_fixed_errors_count),
+        f"{manually_fixed_errors_count / total_errors_count:.2%}",
+    )
 
     print(table)
     operation = Prompt.ask("Start next round? (y/n): ", choices=["y", "n"])
-    if operation == "n": break
+    if operation == "n":
+        break
 
 
 end_time = time.time()
diff --git a/rdagent/app/factor_extraction_and_implementation/factor_extract_and_implement.py b/rdagent/app/factor_extraction_and_implementation/factor_extract_and_implement.py
index b995c47b..b11b9185 100644
--- a/rdagent/app/factor_extraction_and_implementation/factor_extract_and_implement.py
+++ b/rdagent/app/factor_extraction_and_implementation/factor_extract_and_implement.py
@@ -2,14 +2,16 @@
 import json
 from pathlib import Path
 
-from dotenv import load_dotenv
-
 from document_process.document_analysis import (
     check_factor_dict_viability,
     deduplicate_factors_several_times,
     extract_factors_from_report_dict_and_classify_result,
 )
-from document_process.document_reader import classify_report_from_dict, load_and_process_pdfs_by_langchain
+from document_process.document_reader import (
+    classify_report_from_dict,
+    load_and_process_pdfs_by_langchain,
+)
+from dotenv import load_dotenv
 from oai.llm_utils import APIBackend
 
 
@@ -31,7 +33,8 @@ def extract_factors_and_implement(report_file_path: str):
     for factor_name in factor_dict:
         if len(factor_dict[factor_name]) > 1:
             factor_dict_simple_deduplication[factor_name] = max(
-                factor_dict[factor_name], key=lambda x: len(x["formulation"]),
+                factor_dict[factor_name],
+                key=lambda x: len(x["formulation"]),
             )
         else:
             factor_dict_simple_deduplication[factor_name] = factor_dict[factor_name][0]
diff --git a/rdagent/core/conf.py b/rdagent/core/conf.py
index cbf033b0..596b9d4b 100644
--- a/rdagent/core/conf.py
+++ b/rdagent/core/conf.py
@@ -60,7 +60,7 @@ class FincoSettings(BaseSettings):
 
     # fincov2 llama2 endpoint
     use_gcr_endpoint: bool = False
-    gcr_endpoint_type: str = "llama2_70b" # or "llama3_70b", "phi2", "phi3_4k", "phi3_128k"
+    gcr_endpoint_type: str = "llama2_70b"  # or "llama3_70b", "phi2", "phi3_4k", "phi3_128k"
 
     llama2_70b_endpoint: str = ""
     llama2_70b_endpoint_key: str = ""
diff --git a/rdagent/core/evolving_framework.py b/rdagent/core/evolving_framework.py
index 1bc90044..f48954cb 100644
--- a/rdagent/core/evolving_framework.py
+++ b/rdagent/core/evolving_framework.py
@@ -33,7 +33,8 @@ def clone(self) -> EvolvableSubjects:
         return copy.deepcopy(self)
 
 
-class QlibEvolvableSubjects(EvolvableSubjects): ...
+class QlibEvolvableSubjects(EvolvableSubjects):
+    ...
 
 
 class Evaluator(ABC):
diff --git a/rdagent/document_process/document_analysis.py b/rdagent/document_process/document_analysis.py
index 724c6f73..bb74edc1 100644
--- a/rdagent/document_process/document_analysis.py
+++ b/rdagent/document_process/document_analysis.py
@@ -12,15 +12,14 @@
 import yaml
 from azure.ai.formrecognizer import DocumentAnalysisClient
 from azure.core.credentials import AzureKeyCredential
+from core.conf import FincoSettings as Config
+from core.log import FinCoLog
 from jinja2 import Template
+from oai.llm_utils import APIBackend, create_embedding_with_multiprocessing
 from sklearn.cluster import KMeans
 from sklearn.metrics.pairwise import cosine_similarity
 from sklearn.preprocessing import normalize
 
-from core.conf import FincoSettings as Config
-from core.log import FinCoLog
-from oai.llm_utils import APIBackend, create_embedding_with_multiprocessing
-
 if TYPE_CHECKING:
     from langchain_core.documents import Document
 
@@ -228,9 +227,7 @@ def __extract_factors_name_and_desc_from_content(
         except json.JSONDecodeError:
             parse_success = False
         if ret_json_str is None or not parse_success:
-            current_user_prompt = (
-                "Your response didn't follow the instruction might be wrong json format. Try again."
-            )
+            current_user_prompt = "Your response didn't follow the instruction might be wrong json format. Try again."
         else:
             factors = ret_dict["factors"]
             if len(factors) == 0:
@@ -272,9 +269,7 @@ def __extract_factors_formulation_from_content(
         except json.JSONDecodeError:
             parse_success = False
         if ret_json_str is None or not parse_success:
-            current_user_prompt = (
-                "Your response didn't follow the instruction might be wrong json format. Try again."
-            )
+            current_user_prompt = "Your response didn't follow the instruction might be wrong json format. Try again."
         else:
             for name, formulation_and_description in ret_dict.items():
                 if name in factor_dict:
@@ -392,9 +387,7 @@ def check_factor_dict_viability_simulate_json_mode(
         except json.JSONDecodeError:
             parse_success = False
         if ret_json_str is None or not parse_success:
-            current_user_prompt = (
-                "Your response didn't follow the instruction might be wrong json format. Try again."
-            )
+            current_user_prompt = "Your response didn't follow the instruction might be wrong json format. Try again."
         else:
             return ret_dict
     return {}
diff --git a/rdagent/factor_implementation/evolving/evaluators.py b/rdagent/factor_implementation/evolving/evaluators.py
index 421f9eef..e7e17bf9 100644
--- a/rdagent/factor_implementation/evolving/evaluators.py
+++ b/rdagent/factor_implementation/evolving/evaluators.py
@@ -3,16 +3,11 @@
 import re
 from typing import List
 
-from pandas.core.api import DataFrame as DataFrame
-
 from core.evolving_framework import Evaluator as EvolvingEvaluator
 from core.evolving_framework import Feedback, QueriedKnowledge
 from core.log import FinCoLog
 from core.utils import multiprocessing_wrapper
-from factor_implementation.evolving.evolvable_subjects import (
-    FactorImplementationList,
-)
-from factor_implementation.share_modules.conf import FactorImplementSettings
+from factor_implementation.evolving.evolvable_subjects import FactorImplementationList
 from factor_implementation.share_modules.evaluator import (
     Evaluator as FactorImplementationEvaluator,
 )
@@ -25,6 +20,11 @@
     FactorImplementation,
     FactorImplementationTask,
 )
+from pandas.core.api import DataFrame as DataFrame
+
+from rdagent.factor_implementation.share_modules.factor_implementation_config import (
+    FactorImplementSettings,
+)
 
 
 class FactorImplementationSingleFeedback:
diff --git a/rdagent/factor_implementation/evolving/evolving_strategy.py b/rdagent/factor_implementation/evolving/evolving_strategy.py
index b414f258..1f2cf4b2 100644
--- a/rdagent/factor_implementation/evolving/evolving_strategy.py
+++ b/rdagent/factor_implementation/evolving/evolving_strategy.py
@@ -6,22 +6,24 @@
 from copy import deepcopy
 from typing import TYPE_CHECKING
 
-from jinja2 import Template
-
 from core.evolving_framework import EvolvingStrategy, QueriedKnowledge
 from core.utils import multiprocessing_wrapper
-from factor_implementation.share_modules.conf import FactorImplementSettings
 from factor_implementation.share_modules.factor import (
     FactorImplementation,
     FactorImplementationTask,
     FileBasedFactorImplementation,
 )
-from factor_implementation.share_modules.prompt import (
-    FactorImplementationPrompts,
-)
-from factor_implementation.share_modules.utils import get_data_folder_intro
+from factor_implementation.share_modules.prompt import FactorImplementationPrompts
+from jinja2 import Template
 from oai.llm_utils import APIBackend
 
+from rdagent.factor_implementation.share_modules.factor_implementation_config import (
+    FactorImplementSettings,
+)
+from rdagent.factor_implementation.share_modules.factor_implementation_utils import (
+    get_data_folder_intro,
+)
+
 if TYPE_CHECKING:
     from factor_implementation.evolving.evolvable_subjects import (
         FactorImplementationList,
diff --git a/rdagent/factor_implementation/evolving/factor_implementation_evolving_cli.py b/rdagent/factor_implementation/evolving/factor_implementation_evolving_cli.py
index 705b6730..a8946e87 100644
--- a/rdagent/factor_implementation/evolving/factor_implementation_evolving_cli.py
+++ b/rdagent/factor_implementation/evolving/factor_implementation_evolving_cli.py
@@ -4,18 +4,13 @@
 from pathlib import Path
 
 import pandas as pd
-from fire.core import Fire
-from tqdm import tqdm
-
 from core.evolving_framework import EvoAgent, KnowledgeBase
 from core.utils import multiprocessing_wrapper
 from factor_implementation.evolving.evaluators import (
     FactorImplementationEvaluatorV1,
     FactorImplementationsMultiEvaluator,
 )
-from factor_implementation.evolving.evolvable_subjects import (
-    FactorImplementationList,
-)
+from factor_implementation.evolving.evolvable_subjects import FactorImplementationList
 from factor_implementation.evolving.evolving_strategy import (
     FactorEvolvingStrategy,
     FactorEvolvingStrategyWithGraph,
@@ -30,6 +25,8 @@
     FactorImplementationTask,
     FileBasedFactorImplementation,
 )
+from fire.core import Fire
+from tqdm import tqdm
 
 ALPHA101_INIT_COMPONENTS = [
     "1. abs(): absolute value to certain columns",
@@ -107,11 +104,17 @@ def run_evolving_framework(
     def load_or_init_knowledge_base(self, former_knowledge_base_path: Path = None, component_init_list: list = []):
         if former_knowledge_base_path is not None and former_knowledge_base_path.exists():
             factor_knowledge_base = pickle.load(open(former_knowledge_base_path, "rb"))
-            if self.evolving_version == 1 and not isinstance(
-                factor_knowledge_base, FactorImplementationKnowledgeBaseV1,
-            ) or self.evolving_version == 2 and not isinstance(
-                factor_knowledge_base,
-                FactorImplementationGraphKnowledgeBase,
+            if (
+                self.evolving_version == 1
+                and not isinstance(
+                    factor_knowledge_base,
+                    FactorImplementationKnowledgeBaseV1,
+                )
+                or self.evolving_version == 2
+                and not isinstance(
+                    factor_knowledge_base,
+                    FactorImplementationGraphKnowledgeBase,
+                )
             ):
                 raise ValueError("The former knowledge base is not compatible with the current version")
         else:
@@ -259,7 +262,11 @@ def implement_alpha101(
         print([feedback.final_decision if feedback is not None else None for feedback in feedbacks].count(True))
 
     def implement_amc(
-        self, evo_sub_path_str, former_knowledge_base_path_str, implementation_dump_path_str, slice_index,
+        self,
+        evo_sub_path_str,
+        former_knowledge_base_path_str,
+        implementation_dump_path_str,
+        slice_index,
     ):
         factor_implementations: FactorImplementationList = pickle.load(open(evo_sub_path_str, "rb"))
         factor_implementations.target_factor_tasks = factor_implementations.target_factor_tasks[
diff --git a/rdagent/factor_implementation/evolving/knowledge_management.py b/rdagent/factor_implementation/evolving/knowledge_management.py
index bfd8e7fc..b20e7920 100644
--- a/rdagent/factor_implementation/evolving/knowledge_management.py
+++ b/rdagent/factor_implementation/evolving/knowledge_management.py
@@ -8,9 +8,6 @@
 from pathlib import Path
 from typing import Union
 
-from finco.graph import UndirectedGraph, UndirectedNode
-from jinja2 import Template
-
 from core.evolving_framework import (
     EvolvableSubjects,
     EvoStep,
@@ -20,19 +17,20 @@
     RAGStrategy,
 )
 from core.log import FinCoLog
-from factor_implementation.evolving.evaluators import (
-    FactorImplementationSingleFeedback,
-)
-from factor_implementation.share_modules.conf import FactorImplementSettings
+from factor_implementation.evolving.evaluators import FactorImplementationSingleFeedback
 from factor_implementation.share_modules.factor import (
     FactorImplementation,
     FactorImplementationTask,
 )
-from factor_implementation.share_modules.prompt import (
-    FactorImplementationPrompts,
-)
+from factor_implementation.share_modules.prompt import FactorImplementationPrompts
+from finco.graph import UndirectedGraph, UndirectedNode
+from jinja2 import Template
 from oai.llm_utils import APIBackend, calculate_embedding_distance_between_str_list
 
+from rdagent.factor_implementation.share_modules.factor_implementation_config import (
+    FactorImplementSettings,
+)
+
 
 class FactorImplementationKnowledge(Knowledge):
     def __init__(
@@ -147,9 +145,9 @@ def query(
         for target_factor_task in evo.target_factor_tasks:
             target_factor_task_information = target_factor_task.get_factor_information()
             if target_factor_task_information in self.knowledgebase.success_task_info_set:
-                queried_knowledge.success_task_to_knowledge_dict[target_factor_task_information] = (
-                    self.knowledgebase.implementation_trace[target_factor_task_information][-1]
-                )
+                queried_knowledge.success_task_to_knowledge_dict[
+                    target_factor_task_information
+                ] = self.knowledgebase.implementation_trace[target_factor_task_information][-1]
             elif (
                 len(
                     self.knowledgebase.implementation_trace.setdefault(
@@ -161,12 +159,14 @@ def query(
             ):
                 queried_knowledge.failed_task_info_set.add(target_factor_task_information)
             else:
-                queried_knowledge.working_task_to_former_failed_knowledge_dict[target_factor_task_information] = (
-                    self.knowledgebase.implementation_trace.setdefault(
-                        target_factor_task_information,
-                        [],
-                    )[-v1_query_former_trace_limit:]
-                )
+                queried_knowledge.working_task_to_former_failed_knowledge_dict[
+                    target_factor_task_information
+                ] = self.knowledgebase.implementation_trace.setdefault(
+                    target_factor_task_information,
+                    [],
+                )[
+                    -v1_query_former_trace_limit:
+                ]
 
                 knowledge_base_success_task_list = list(
                     self.knowledgebase.success_task_info_set,
@@ -417,9 +417,9 @@ def former_trace_query(
                     else:
                         current_index += 1
 
-                factor_implementation_queried_graph_knowledge.former_traces[target_factor_task_information] = (
-                    former_trace_knowledge[-v2_query_former_trace_limit:]
-                )
+                factor_implementation_queried_graph_knowledge.former_traces[
+                    target_factor_task_information
+                ] = former_trace_knowledge[-v2_query_former_trace_limit:]
             else:
                 factor_implementation_queried_graph_knowledge.former_traces[target_factor_task_information] = []
 
diff --git a/rdagent/factor_implementation/share_modules/evaluator.py b/rdagent/factor_implementation/share_modules/evaluator.py
index 5f985c46..41c14210 100644
--- a/rdagent/factor_implementation/share_modules/evaluator.py
+++ b/rdagent/factor_implementation/share_modules/evaluator.py
@@ -3,19 +3,19 @@
 from typing import Tuple
 
 import pandas as pd
-from finco.log import FinCoLog
-from jinja2 import Template
-
-from factor_implementation.share_modules.conf import FactorImplementSettings
 from factor_implementation.share_modules.factor import (
     FactorImplementation,
     FactorImplementationTask,
 )
-from factor_implementation.share_modules.prompt import (
-    FactorImplementationPrompts,
-)
+from factor_implementation.share_modules.prompt import FactorImplementationPrompts
+from finco.log import FinCoLog
+from jinja2 import Template
 from oai.llm_utils import APIBackend
 
+from rdagent.factor_implementation.share_modules.factor_implementation_config import (
+    FactorImplementSettings,
+)
+
 
 class Evaluator(ABC):
     @abstractmethod
diff --git a/rdagent/factor_implementation/share_modules/factor.py b/rdagent/factor_implementation/share_modules/factor.py
index 1b81af93..29c8eae1 100644
--- a/rdagent/factor_implementation/share_modules/factor.py
+++ b/rdagent/factor_implementation/share_modules/factor.py
@@ -6,17 +6,19 @@
 from typing import Tuple, Union
 
 import pandas as pd
-from filelock import FileLock
-from finco.log import FinCoLog
-
-from factor_implementation.share_modules.conf import FactorImplementSettings
 from factor_implementation.share_modules.exception import (
     CodeFormatException,
     NoOutputException,
     RuntimeErrorException,
 )
+from filelock import FileLock
+from finco.log import FinCoLog
 from oai.llm_utils import md5_hash
 
+from rdagent.factor_implementation.share_modules.factor_implementation_config import (
+    FactorImplementSettings,
+)
+
 
 class FactorImplementationTask:
     # TODO: remove the factor_ prefix may be better
@@ -122,7 +124,8 @@ def execute(self, store_result: bool = False) -> Tuple[str, pd.DataFrame]:
                 raise ValueError(self.FB_CODE_NOT_SET)
         with FileLock(self.workspace_path / "execution.lock"):
             (Path.cwd() / "git_ignore_folder" / "factor_implementation_execution_cache").mkdir(
-                exist_ok=True, parents=True,
+                exist_ok=True,
+                parents=True,
             )
             if FactorImplementSettings().enable_execution_cache:
                 # NOTE: cache the result for the same code
diff --git a/rdagent/factor_implementation/share_modules/conf.py b/rdagent/factor_implementation/share_modules/factor_implementation_config.py
similarity index 97%
rename from rdagent/factor_implementation/share_modules/conf.py
rename to rdagent/factor_implementation/share_modules/factor_implementation_config.py
index 574c5dfc..45d13294 100644
--- a/rdagent/factor_implementation/share_modules/conf.py
+++ b/rdagent/factor_implementation/share_modules/factor_implementation_config.py
@@ -1,6 +1,6 @@
 from pathlib import Path
 
-from finco.conf import FincoSettings
+from core.conf import FincoSettings
 
 
 class FactorImplementSettings(FincoSettings):
diff --git a/rdagent/factor_implementation/share_modules/utils.py b/rdagent/factor_implementation/share_modules/factor_implementation_utils.py
similarity index 94%
rename from rdagent/factor_implementation/share_modules/utils.py
rename to rdagent/factor_implementation/share_modules/factor_implementation_utils.py
index 0f22aa4f..4538fb12 100644
--- a/rdagent/factor_implementation/share_modules/utils.py
+++ b/rdagent/factor_implementation/share_modules/factor_implementation_utils.py
@@ -5,7 +5,7 @@
 # render it with jinja
 from jinja2 import Template
 
-from factor_implementation.share_modules.conf import FIS
+from rdagent.factor_implementation.share_modules.factor_implementation_config import FIS
 
 TPL = """
 {{file_name}}
diff --git a/rdagent/knowledge_management/graph.py b/rdagent/knowledge_management/graph.py
index b3f4bf19..a30e5390 100644
--- a/rdagent/knowledge_management/graph.py
+++ b/rdagent/knowledge_management/graph.py
@@ -238,7 +238,8 @@ def get_nodes_within_steps(
                 result.append(node)
 
                 for neighbor in sorted(
-                    list(self.get_node(node.id).neighbors), key=lambda x: x.content,
+                    list(self.get_node(node.id).neighbors),
+                    key=lambda x: x.content,
                 ):  # to make sure the result is deterministic
                     if neighbor not in visited:
                         if not (block and neighbor.label not in constraint_labels):
@@ -275,12 +276,16 @@ def get_nodes_intersection(
         for node in nodes:
             if intersection is None:
                 intersection = self.get_nodes_within_steps(
-                    node, steps=steps, constraint_labels=constraint_labels,
+                    node,
+                    steps=steps,
+                    constraint_labels=constraint_labels,
                 )
             intersection = self.intersection(
                 nodes1=intersection,
                 nodes2=self.get_nodes_within_steps(
-                    node, steps=steps, constraint_labels=constraint_labels,
+                    node,
+                    steps=steps,
+                    constraint_labels=constraint_labels,
                 ),
             )
 
@@ -393,7 +398,9 @@ def query_by_content(
         res_list = []
         for query in content:
             similar_nodes = self.semantic_search(
-                content=query, topk_k=topk_k, similarity_threshold=similarity_threshold,
+                content=query,
+                topk_k=topk_k,
+                similarity_threshold=similarity_threshold,
             )
 
             connected_nodes = []
@@ -407,11 +414,7 @@ def query_by_content(
                     block=block,
                 )
                 connected_nodes.extend(
-                    [
-                        node
-                        for node in graph_query_node_res
-                        if node not in connected_nodes
-                    ],
+                    [node for node in graph_query_node_res if node not in connected_nodes],
                 )
                 if len(connected_nodes) >= topk_k:
                     break
@@ -455,7 +458,9 @@ def graph_to_edges(graph: Dict[str, List[str]]):
 
 
 def assign_random_coordinate_to_node(
-    nodes: List, scope: float = 1.0, origin: Tuple = (0.0, 0.0),
+    nodes: List,
+    scope: float = 1.0,
+    origin: Tuple = (0.0, 0.0),
 ) -> Dict:
     coordinates = {}
 
@@ -468,7 +473,10 @@ def assign_random_coordinate_to_node(
 
 
 def assign_isometric_coordinate_to_node(
-    nodes: List, x_step: float = 1.0, x_origin: float = 0.0, y_origin: float = 0.0,
+    nodes: List,
+    x_step: float = 1.0,
+    x_origin: float = 0.0,
+    y_origin: float = 0.0,
 ) -> Dict:
     coordinates = {}
 
@@ -481,7 +489,9 @@ def assign_isometric_coordinate_to_node(
 
 
 def curly_node_coordinate(
-    coordinates: Dict, center_y: float = 1.0, r: float = 1.0,
+    coordinates: Dict,
+    center_y: float = 1.0,
+    r: float = 1.0,
 ) -> Dict:
     # noto: this method can only curly < 90 degree, and the curl line is circle.
     # the original funtion is: x**2 + (y-m)**2 = r**2
diff --git a/rdagent/oai/llm_utils.py b/rdagent/oai/llm_utils.py
index 2e6b5e1e..8e0c69e9 100644
--- a/rdagent/oai/llm_utils.py
+++ b/rdagent/oai/llm_utils.py
@@ -15,6 +15,7 @@
 
 import numpy as np
 import tiktoken
+
 from rdagent.core.conf import FincoSettings as Config
 from rdagent.core.log import FinCoLog, LogColors
 from rdagent.core.utils import SingletonBaseClass
@@ -199,7 +200,9 @@ def build_chat_completion(self, user_prompt, **kwargs):
         messages = self.build_chat_completion_message(user_prompt, **kwargs)
 
         response = self.api_backend._try_create_chat_completion_or_embedding(
-            messages=messages, chat_completion=True, **kwargs,
+            messages=messages,
+            chat_completion=True,
+            **kwargs,
         )
         messages.append(
             {
@@ -397,7 +400,9 @@ def create_embedding(self, input_content, **kwargs):
         elif isinstance(input_content, list):
             input_content_list = input_content
         resp = self._try_create_chat_completion_or_embedding(
-            input_content_list=input_content_list, embedding=True, **kwargs,
+            input_content_list=input_content_list,
+            embedding=True,
+            **kwargs,
         )
         if isinstance(input_content, str):
             return resp[0]
diff --git a/test/oai/test_completion.py b/test/oai/test_completion.py
index e69de29b..8b518244 100644
--- a/test/oai/test_completion.py
+++ b/test/oai/test_completion.py
@@ -0,0 +1,48 @@
+import pickle
+import unittest
+from pathlib import Path
+import json
+import random
+
+from rdagent.oai.llm_utils import APIBackend
+
+
+class TestChatCompletion(unittest.TestCase):
+    def test_chat_completion(self):
+        system_prompt = "You are a helpful assistant."
+        user_prompt = "What is your name?"
+        response = APIBackend().build_messages_and_create_chat_completion(
+            system_prompt=system_prompt, user_prompt=user_prompt
+        )
+        assert response is not None
+        assert type(response) == str
+
+    def test_chat_completion_json_mode(self):
+        system_prompt = "You are a helpful assistant. answer in Json format."
+        user_prompt = "What is your name?"
+        response = APIBackend().build_messages_and_create_chat_completion(
+            system_prompt=system_prompt, user_prompt=user_prompt, json_mode=True
+        )
+        assert response is not None
+        assert type(response) == str
+        json.loads(response)
+
+    def test_chat_multi_round(self):
+        system_prompt = "You are a helpful assistant."
+        fruit_name = ["apple", "banana", "orange", "grape", "watermelon"][random.randint(0, 4)]
+        user_prompt_1 = f"I will tell you a name of fruit, please remember them and tell me later. The name is {fruit_name}. Once you remembeer it, please answer OK."
+        user_prompt_2 = f"What is the name of the fruit I told you before?"
+
+        session = APIBackend().build_chat_session(session_system_prompt=system_prompt)
+
+        response_1 = session.build_chat_completion(user_prompt=user_prompt_1)
+        assert response_1 is not None
+        assert "ok" in response_1.lower()
+
+        response2 = session.build_chat_completion(user_prompt=user_prompt_2)
+        assert response2 is not None
+        assert fruit_name in response2.lower()
+
+
+if __name__ == "__main__":
+    unittest.main()

From 2545277902ca548dd493c299656b98eee9c9214d Mon Sep 17 00:00:00 2001
From: xuyang1 <xuyang1@microsoft.com>
Date: Wed, 5 Jun 2024 05:56:01 +0000
Subject: [PATCH 2/3] update code

---
 pyproject.toml                                |   2 +-
 .../factor_extract_and_implement.py           | 142 ++------------
 rdagent/core/conf.py                          |   1 +
 .../prompt.py => core/prompts.py}             |  14 +-
 rdagent/core/utils.py                         |  14 +-
 rdagent/document_process/__init__.py          |   0
 rdagent/document_process/document_analysis.py | 111 ++++++++---
 rdagent/document_process/document_reader.py   |  12 +-
 rdagent/document_process/prompts.yaml         | 182 ++++++++++++++++++
 .../evolving/evaluators.py                    |  16 +-
 .../evolving/evolvable_subjects.py            |   6 +-
 .../evolving/evolving_strategy.py             |  39 ++--
 .../factor_implementation_evolving_cli.py     |  14 +-
 .../evolving/knowledge_management.py          |  48 +++--
 .../{share_modules => }/prompts.yaml          |   0
 rdagent/oai/llm_utils.py                      |  44 +++--
 requirements.txt                              |   3 +
 test/oai/test_embedding.py                    |   0
 test/oai/test_embedding_and_similarity.py     |  25 +++
 test/oai/test_embedding_similarity.py         |   0
 20 files changed, 428 insertions(+), 245 deletions(-)
 rename rdagent/{factor_implementation/share_modules/prompt.py => core/prompts.py} (52%)
 delete mode 100644 rdagent/document_process/__init__.py
 create mode 100644 rdagent/document_process/prompts.yaml
 rename rdagent/factor_implementation/{share_modules => }/prompts.yaml (100%)
 delete mode 100644 test/oai/test_embedding.py
 create mode 100644 test/oai/test_embedding_and_similarity.py
 delete mode 100644 test/oai/test_embedding_similarity.py

diff --git a/pyproject.toml b/pyproject.toml
index da4392e3..f497268d 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -98,7 +98,7 @@ select = ["ALL"]
 "test/*" = ["S101"]
 
 [tool.setuptools]
-py-modules = ["rdagent"]
+packages = ["rdagent"]
 
 [tool.setuptools.dynamic]
 dependencies = {file = ["requirements.txt"]}
diff --git a/rdagent/app/factor_extraction_and_implementation/factor_extract_and_implement.py b/rdagent/app/factor_extraction_and_implementation/factor_extract_and_implement.py
index b11b9185..8277c36e 100644
--- a/rdagent/app/factor_extraction_and_implementation/factor_extract_and_implement.py
+++ b/rdagent/app/factor_extraction_and_implementation/factor_extract_and_implement.py
@@ -2,143 +2,29 @@
 import json
 from pathlib import Path
 
-from document_process.document_analysis import (
-    check_factor_dict_viability,
+from rdagent.document_process.document_analysis import (
+    filter_factor_by_viability,
     deduplicate_factors_several_times,
-    extract_factors_from_report_dict_and_classify_result,
-)
-from document_process.document_reader import (
-    classify_report_from_dict,
-    load_and_process_pdfs_by_langchain,
+    extract_factors_from_report_dict,
+    merge_file_to_factor_dict_to_factor_dict,
 )
+from rdagent.document_process.document_reader import load_and_process_pdfs_by_langchain
+from rdagent.document_process.document_analysis import classify_report_from_dict
 from dotenv import load_dotenv
-from oai.llm_utils import APIBackend
 
 
 def extract_factors_and_implement(report_file_path: str):
     assert load_dotenv()
-    api = APIBackend()
-    docs_dict_select = load_and_process_pdfs_by_langchain(Path(report_file_path))
-
-    selected_report_dict = classify_report_from_dict(report_dict=docs_dict_select, api=api, vote_time=1)
-    file_to_factor_result = extract_factors_from_report_dict_and_classify_result(docs_dict_select, selected_report_dict)
-
-    factor_dict = {}
-    for file_name in file_to_factor_result:
-        for factor_name in file_to_factor_result[file_name]:
-            factor_dict.setdefault(factor_name, [])
-            factor_dict[factor_name].append(file_to_factor_result[file_name][factor_name])
-
-    factor_dict_simple_deduplication = {}
-    for factor_name in factor_dict:
-        if len(factor_dict[factor_name]) > 1:
-            factor_dict_simple_deduplication[factor_name] = max(
-                factor_dict[factor_name],
-                key=lambda x: len(x["formulation"]),
-            )
-        else:
-            factor_dict_simple_deduplication[factor_name] = factor_dict[factor_name][0]
-    # %%
-
-    factor_viability = check_factor_dict_viability(factor_dict_simple_deduplication)
-    # json.dump(
-    #     factor_viability,
-    #     open(
-    #         "factor_viability_all_reports.json",
-    #         "w",
-    #     ),
-    #     indent=4,
-    # )
-
-    # factor_viability = json.load(
-    #     open(
-    #         "factor_viability_all_reports.json"
-    #     )
-    # )
-
-    # %%
-
-    duplication_names_list = deduplicate_factors_several_times(factor_dict_simple_deduplication)
-    duplication_names_list = sorted(duplication_names_list, key=lambda x: len(x), reverse=True)
-    json.dump(duplication_names_list, open("duplication_names_list.json", "w"), indent=4)
-
-    # %%
-    factor_dict_viable = {
-        factor_name: factor_dict_simple_deduplication[factor_name]
-        for factor_name in factor_dict_simple_deduplication
-        if factor_viability[factor_name]["viability"]
-    }
-
-    to_replace_dict = {}
-    for duplication_names in duplication_names_list:
-        for duplication_factor_name in duplication_names[1:]:
-            to_replace_dict[duplication_factor_name] = duplication_names[0]
-
-    added_lower_name_set = set()
-    factor_dict_deduplication_with_llm = dict()
-    for factor_name in factor_dict_simple_deduplication:
-        if factor_name not in to_replace_dict and factor_name.lower() not in added_lower_name_set:
-            added_lower_name_set.add(factor_name.lower())
-            factor_dict_deduplication_with_llm[factor_name] = factor_dict_simple_deduplication[factor_name]
-
-    to_replace_viable_dict = {}
-    for duplication_names in duplication_names_list:
-        viability_list = [factor_viability[name]["viability"] for name in duplication_names]
-        if True not in viability_list:
-            continue
-        target_factor_name = duplication_names[viability_list.index(True)]
-        for duplication_factor_name in duplication_names:
-            if duplication_factor_name == target_factor_name:
-                continue
-            to_replace_viable_dict[duplication_factor_name] = target_factor_name
-
-    added_lower_name_set = set()
-    factor_dict_deduplication_with_llm_and_viable = dict()
-    for factor_name in factor_dict_viable:
-        if factor_name not in to_replace_viable_dict and factor_name.lower() not in added_lower_name_set:
-            added_lower_name_set.add(factor_name.lower())
-            factor_dict_deduplication_with_llm_and_viable[factor_name] = factor_dict_simple_deduplication[factor_name]
-
-    # %%
+    docs_dict = load_and_process_pdfs_by_langchain(Path(report_file_path))
 
-    dump_md_list = [
-        [factor_dict_simple_deduplication, "final_factor_book"],
-        [factor_dict_viable, "final_viable_factor_book"],
-        [factor_dict_deduplication_with_llm, "final_deduplicated_factor_book"],
-        [factor_dict_deduplication_with_llm_and_viable, "final_deduplicated_viable_factor_book"],
-    ]
+    selected_report_dict = classify_report_from_dict(report_dict=docs_dict, vote_time=1)
+    file_to_factor_result = extract_factors_from_report_dict(docs_dict, selected_report_dict)
+    factor_dict = merge_file_to_factor_dict_to_factor_dict(file_to_factor_result)
 
-    for dump_md in dump_md_list:
-        factor_name_set = set()
-        current_index = 1
-        target_dict = dump_md[0]
-        json.dump(target_dict, open(f"{dump_md[1]}.json", "w"), indent=4)
-        with open(
-            rf"{dump_md[1]}.md",
-            "w",
-        ) as fw:
-            for factor_name in target_dict:
-                formulation = target_dict[factor_name]["formulation"]
-                if factor_name in formulation:
-                    target_factor_name = factor_name.replace("_", r"\_")
-                    formulation = formulation.replace(factor_name, target_factor_name)
-                for variable in target_dict[factor_name]["variables"]:
-                    if variable in formulation:
-                        target_variable = variable.replace("_", r"\_")
-                        formulation = formulation.replace(variable, target_variable)
+    factor_dict_viable, factor_viability = filter_factor_by_viability(factor_dict)
 
-                fw.write(f"## {current_index}. 因子名称：{factor_name}\n")
-                fw.write(f"### Viability: {target_dict[factor_name]['viability']}\n")
-                fw.write(f"### Viability Reason: {target_dict[factor_name]['viability_reason']}\n")
-                fw.write(f"### description: {target_dict[factor_name]['description']}\n")
-                fw.write(f"### formulation: $$ {formulation} $$\n")
-                fw.write(f"### formulation string: {formulation}\n")
-                # write a table of variable and its description
+    factor_dict, duplication_names_list = deduplicate_factors_several_times(factor_dict, factor_viability)
 
-                fw.write("### variable tables: \n")
-                fw.write("| variable | description |\n")
-                fw.write("| -------- | ----------- |\n")
-                for variable in target_dict[factor_name]["variables"]:
-                    fw.write(f"| {variable} | {target_dict[factor_name]['variables'][variable]} |\n")
 
-                current_index += 1
+if __name__ == "__main__":
+    extract_factors_and_implement("/home/xuyang1/workspace/report.pdf")
diff --git a/rdagent/core/conf.py b/rdagent/core/conf.py
index 596b9d4b..12b526b2 100644
--- a/rdagent/core/conf.py
+++ b/rdagent/core/conf.py
@@ -13,6 +13,7 @@
 
 class FincoSettings(BaseSettings):
     use_azure: bool = True
+    use_azure_token_provider: bool = False
     max_retry: int = 10
     retry_wait_seconds: int = 1
     continuous_mode: bool = False
diff --git a/rdagent/factor_implementation/share_modules/prompt.py b/rdagent/core/prompts.py
similarity index 52%
rename from rdagent/factor_implementation/share_modules/prompt.py
rename to rdagent/core/prompts.py
index 33cea23b..e138d953 100644
--- a/rdagent/factor_implementation/share_modules/prompt.py
+++ b/rdagent/core/prompts.py
@@ -2,21 +2,21 @@
 from typing import Dict
 
 import yaml
-from finco.utils import SingletonBaseClass
+from rdagent.core.utils import SingletonBaseClass
 
 
-class FactorImplementationPrompts(Dict, SingletonBaseClass):
-    def __init__(self):
-        super().__init__()
-        prompt_yaml_path = Path(__file__).parent / "prompts.yaml"
-
+class Prompts(Dict, SingletonBaseClass):
+    def __init__(self, file_path: Path):
         prompt_yaml_dict = yaml.load(
             open(
-                prompt_yaml_path,
+                file_path,
                 encoding="utf8",
             ),
             Loader=yaml.FullLoader,
         )
 
+        if prompt_yaml_dict is None:
+            raise ValueError(f"Failed to load prompts from {file_path}")
+
         for key, value in prompt_yaml_dict.items():
             self[key] = value
diff --git a/rdagent/core/utils.py b/rdagent/core/utils.py
index 85628358..f0e058f6 100644
--- a/rdagent/core/utils.py
+++ b/rdagent/core/utils.py
@@ -14,17 +14,21 @@
 from fuzzywuzzy import fuzz
 
 
-class FincoException(Exception):
+class RDAgentException(Exception):
     pass
 
 
 class SingletonMeta(type):
-    _instance = None
+    _instance_dict = {}
 
     def __call__(cls, *args, **kwargs):
-        if cls._instance is None:
-            cls._instance = super(SingletonMeta, cls).__call__(*args, **kwargs)
-        return cls._instance
+        # Since it's hard to align the difference call using args and kwargs, we strictly ask to use kwargs in Singleton
+        if len(args) > 0:
+            raise RDAgentException("Please only use kwargs in Singleton to avoid misunderstanding.")
+        kwargs_hash = hash(tuple(sorted(kwargs.items())))
+        if kwargs_hash not in cls._instance_dict:
+            cls._instance_dict[kwargs_hash] = super(SingletonMeta, cls).__call__(*args, **kwargs)
+        return cls._instance_dict[kwargs_hash]
 
 
 class SingletonBaseClass(metaclass=SingletonMeta):
diff --git a/rdagent/document_process/__init__.py b/rdagent/document_process/__init__.py
deleted file mode 100644
index e69de29b..00000000
diff --git a/rdagent/document_process/document_analysis.py b/rdagent/document_process/document_analysis.py
index bb74edc1..97e85683 100644
--- a/rdagent/document_process/document_analysis.py
+++ b/rdagent/document_process/document_analysis.py
@@ -12,10 +12,11 @@
 import yaml
 from azure.ai.formrecognizer import DocumentAnalysisClient
 from azure.core.credentials import AzureKeyCredential
-from core.conf import FincoSettings as Config
-from core.log import FinCoLog
+from rdagent.core.conf import FincoSettings as Config
+from rdagent.core.log import FinCoLog
+from rdagent.core.prompts import Prompts
 from jinja2 import Template
-from oai.llm_utils import APIBackend, create_embedding_with_multiprocessing
+from rdagent.oai.llm_utils import APIBackend, create_embedding_with_multiprocessing
 from sklearn.cluster import KMeans
 from sklearn.metrics.pairwise import cosine_similarity
 from sklearn.preprocessing import normalize
@@ -25,10 +26,7 @@
 
 from langchain.document_loaders import PyPDFDirectoryLoader, PyPDFLoader
 
-with (Path(__file__).parent / "util_prompt.yaml").open(encoding="utf8") as f:
-    UTIL_PROMPT = yaml.safe_load(
-        f,
-    )
+document_process_prompts = Prompts(file_path=Path(__file__).parent / "prompts.yaml")
 
 
 def load_documents_by_langchain(path: Path) -> list:
@@ -121,7 +119,6 @@ def load_and_process_pdfs_by_azure_document_intelligence(path: Path) -> dict[str
 
 def classify_report_from_dict(
     report_dict: Mapping[str, str],
-    api: APIBackend,
     input_max_token: int = 128000,
     vote_time: int = 1,
     substrings: tuple[str] = (),
@@ -131,7 +128,6 @@ def classify_report_from_dict(
     - report_dict (Dict[str, str]):
       A dictionary where the key is the path of the report (ending with .pdf),
       and the value is either the report content as a string.
-    - api (APIBackend): An instance of the APIBackend class.
     - input_max_token (int): Specifying the maximum number of input tokens.
     - vote_time (int): An integer specifying how many times to vote.
     - substrings (list(str)): List of hardcode substrings.
@@ -154,7 +150,7 @@ def classify_report_from_dict(
         )
 
     res_dict = {}
-    classify_prompt = UTIL_PROMPT["classify_system"]
+    classify_prompt = document_process_prompts["classify_system"]
     enc = tiktoken.encoding_for_model("gpt-4-turbo")
 
     for key, value in report_dict.items():
@@ -182,7 +178,7 @@ def classify_report_from_dict(
             for _ in range(vote_time):
                 user_prompt = content
                 system_prompt = classify_prompt
-                res = api.build_messages_and_create_chat_completion(
+                res = APIBackend().build_messages_and_create_chat_completion(
                     user_prompt=user_prompt,
                     system_prompt=system_prompt,
                     json_mode=True,
@@ -208,7 +204,7 @@ def __extract_factors_name_and_desc_from_content(
     content: str,
 ) -> dict[str, dict[str, str]]:
     session = APIBackend().build_chat_session(
-        session_system_prompt=UTIL_PROMPT["extract_factors_system"],
+        session_system_prompt=document_process_prompts["extract_factors_system"],
     )
 
     extracted_factor_dict = {}
@@ -234,7 +230,7 @@ def __extract_factors_name_and_desc_from_content(
                 break
             for factor_name, factor_description in factors.items():
                 extracted_factor_dict[factor_name] = factor_description
-            current_user_prompt = UTIL_PROMPT["extract_factors_follow_user"]
+            current_user_prompt = document_process_prompts["extract_factors_follow_user"]
 
     return extracted_factor_dict
 
@@ -248,9 +244,9 @@ def __extract_factors_formulation_from_content(
         columns=["factor_name", "factor_description"],
     )
 
-    system_prompt = UTIL_PROMPT["extract_factor_formulation_system"]
+    system_prompt = document_process_prompts["extract_factor_formulation_system"]
     current_user_prompt = Template(
-        UTIL_PROMPT["extract_factor_formulation_user"],
+        document_process_prompts["extract_factor_formulation_user"],
     ).render(report_content=content, factor_dict=factor_dict_df.to_string())
 
     session = APIBackend().build_chat_session(session_system_prompt=system_prompt)
@@ -288,7 +284,7 @@ def __extract_factors_formulation_from_content(
     return factor_to_formulation
 
 
-def extract_factor_and_formulation_from_one_report(
+def __extract_factor_and_formulation_from_one_report(
     content: str,
 ) -> dict[str, dict[str, str]]:
     final_factor_dict_to_one_report = {}
@@ -299,6 +295,9 @@ def extract_factor_and_formulation_from_one_report(
             factor_dict,
         )
     for factor_name in factor_dict:
+        if factor_name not in factor_to_formulation:
+            continue
+
         final_factor_dict_to_one_report.setdefault(factor_name, {})
         final_factor_dict_to_one_report[factor_name]["description"] = factor_dict[factor_name]
 
@@ -318,7 +317,7 @@ def extract_factor_and_formulation_from_one_report(
     return final_factor_dict_to_one_report
 
 
-def extract_factors_from_report_dict_and_classify_result(
+def extract_factors_from_report_dict(
     report_dict: dict[str, str],
     useful_no_dict: dict[str, dict[str, str]],
     n_proc: int = 11,
@@ -334,9 +333,7 @@ def extract_factors_from_report_dict_and_classify_result(
     final_report_factor_dict = {}
     # for file_name, content in useful_report_dict.items():
     #     final_report_factor_dict.setdefault(file_name, {})
-    #     final_report_factor_dict[
-    #         file_name
-    #     ] = extract_factor_and_formulation_from_one_report(content)
+    #     final_report_factor_dict[file_name] = __extract_factor_and_formulation_from_one_report(content)
 
     while len(final_report_factor_dict) != len(useful_report_dict):
         pool = mp.Pool(n_proc)
@@ -348,7 +345,7 @@ def extract_factors_from_report_dict_and_classify_result(
             file_names.append(file_name)
             pool_result_list.append(
                 pool.apply_async(
-                    extract_factor_and_formulation_from_one_report,
+                    __extract_factor_and_formulation_from_one_report,
                     (content,),
                 ),
             )
@@ -366,11 +363,32 @@ def extract_factors_from_report_dict_and_classify_result(
     return final_report_factor_dict
 
 
-def check_factor_dict_viability_simulate_json_mode(
+def merge_file_to_factor_dict_to_factor_dict(
+    file_to_factor_dict: dict[str, dict],
+) -> dict:
+    factor_dict = {}
+    for file_name in file_to_factor_dict:
+        for factor_name in file_to_factor_dict[file_name]:
+            factor_dict.setdefault(factor_name, [])
+            factor_dict[factor_name].append(file_to_factor_dict[file_name][factor_name])
+
+    factor_dict_simple_deduplication = {}
+    for factor_name in factor_dict:
+        if len(factor_dict[factor_name]) > 1:
+            factor_dict_simple_deduplication[factor_name] = max(
+                factor_dict[factor_name],
+                key=lambda x: len(x["formulation"]),
+            )
+        else:
+            factor_dict_simple_deduplication[factor_name] = factor_dict[factor_name][0]
+    return factor_dict_simple_deduplication
+
+
+def __check_factor_dict_viability_simulate_json_mode(
     factor_df_string: str,
 ) -> dict[str, dict[str, str]]:
     session = APIBackend().build_chat_session(
-        session_system_prompt=UTIL_PROMPT["factor_viability_system"],
+        session_system_prompt=document_process_prompts["factor_viability_system"],
     )
     current_user_prompt = factor_df_string
 
@@ -393,9 +411,9 @@ def check_factor_dict_viability_simulate_json_mode(
     return {}
 
 
-def check_factor_dict_viability(
+def filter_factor_by_viability(
     factor_dict: dict[str, dict[str, str]],
-) -> dict[str, dict[str, str]]:
+) -> tuple[dict[str, dict[str, str]], dict[str, dict[str, str]]]:
     factor_viability_dict = {}
 
     factor_df = pd.DataFrame(factor_dict).T
@@ -410,7 +428,7 @@ def check_factor_dict_viability(
 
             result_list.append(
                 pool.apply_async(
-                    check_factor_dict_viability_simulate_json_mode,
+                    __check_factor_dict_viability_simulate_json_mode,
                     (target_factor_df_string,),
                 ),
             )
@@ -425,14 +443,20 @@ def check_factor_dict_viability(
 
         factor_df = factor_df[~factor_df.index.isin(factor_viability_dict)]
 
-    return factor_viability_dict
+    filtered_factor_dict = {
+        factor_name: factor_dict[factor_name]
+        for factor_name in factor_dict
+        if factor_viability_dict[factor_name]["viability"]
+    }
+
+    return filtered_factor_dict, factor_viability_dict
 
 
 def check_factor_duplication_simulate_json_mode(
     factor_df: pd.DataFrame,
 ) -> list[list[str]]:
     session = APIBackend().build_chat_session(
-        session_system_prompt=UTIL_PROMPT["factor_duplicate_system"],
+        session_system_prompt=document_process_prompts["factor_duplicate_system"],
     )
     current_user_prompt = factor_df.to_string()
 
@@ -588,6 +612,7 @@ def deduplicate_factor_dict(factor_dict: dict[str, dict[str, str]]) -> list[list
 
 def deduplicate_factors_several_times(
     factor_dict: dict[str, dict[str, str]],
+    factor_viability_dict: dict[str, dict[str, str]] = None,
 ) -> list[list[str]]:
     final_duplication_names_list = []
     current_round_factor_dict = factor_dict
@@ -604,5 +629,31 @@ def deduplicate_factors_several_times(
         if len(new_round_names) != 0:
             current_round_factor_dict = {factor_name: factor_dict[factor_name] for factor_name in new_round_names}
         else:
-            return final_duplication_names_list
-    return []
+            break
+
+    final_duplication_names_list = sorted(final_duplication_names_list, key=lambda x: len(x), reverse=True)
+
+    to_replace_dict = {}
+    for duplication_names in duplication_names_list:
+        if factor_viability_dict is not None:
+            viability_list = [factor_viability_dict[name]["viability"] for name in duplication_names]
+            if True not in viability_list:
+                continue
+            target_factor_name = duplication_names[viability_list.index(True)]
+        else:
+            target_factor_name = duplication_names[0]
+        for duplication_factor_name in duplication_names:
+            if duplication_factor_name == target_factor_name:
+                continue
+            to_replace_dict[duplication_factor_name] = target_factor_name
+
+    llm_deduplicated_factor_dict = dict()
+    added_lower_name_set = set()
+    for factor_name in factor_dict:
+        if factor_name not in to_replace_dict and factor_name.lower() not in added_lower_name_set:
+            if factor_viability_dict is not None and not factor_viability_dict[factor_name]["viability"]:
+                continue
+            added_lower_name_set.add(factor_name.lower())
+            llm_deduplicated_factor_dict[factor_name] = factor_dict[factor_name]
+
+    return llm_deduplicated_factor_dict, final_duplication_names_list
diff --git a/rdagent/document_process/document_reader.py b/rdagent/document_process/document_reader.py
index 6a93eb1c..2e8ad630 100644
--- a/rdagent/document_process/document_reader.py
+++ b/rdagent/document_process/document_reader.py
@@ -5,18 +5,12 @@
 import yaml
 from azure.ai.formrecognizer import DocumentAnalysisClient
 from azure.core.credentials import AzureKeyCredential
-from finco.conf import FincoSettings as Config
-
-if TYPE_CHECKING:
-    from langchain_core.documents import Document
+from rdagent.core.conf import FincoSettings as Config
+from rdagent.core.prompts import Prompts
 
+from langchain_core.documents import Document
 from langchain.document_loaders import PyPDFDirectoryLoader, PyPDFLoader
 
-with (Path(__file__).parent / "util_prompt.yaml").open(encoding="utf8") as f:
-    UTIL_PROMPT = yaml.safe_load(
-        f,
-    )
-
 
 def load_documents_by_langchain(path: Path) -> list:
     """Load documents from the specified path.
diff --git a/rdagent/document_process/prompts.yaml b/rdagent/document_process/prompts.yaml
new file mode 100644
index 00000000..f08f0048
--- /dev/null
+++ b/rdagent/document_process/prompts.yaml
@@ -0,0 +1,182 @@
+extract_factors_system: |-
+    用户会提供一篇金融工程研报，其中包括了量化因子和模型研究，请按照要求抽取以下信息:
+    1. 概述这篇研报的主要研究思路;
+    2. 抽取出所有的因子，并概述因子的计算过程，请注意有些因子可能存在于表格中，请不要遗漏，因子的名称请使用英文，不能包含空格，可用下划线连接，研报中可能不含有因子，若没有请返回空字典;
+    3. 抽取研报里面的所有模型，并概述模型的计算过程，可以分步骤描述模型搭建或计算的过程，研报中可能不含有模型，若没有请返回空字典;
+
+    user will treat your factor name as key to store the factor, don't put any interaction message in the content. Just response the output without any interaction and explanation.
+    All names should be in English.
+    Respond with your analysis in JSON format. The JSON schema should include:
+    ```json
+    {
+        "summary": "The summary of this report",
+        "factors": {
+            "Name of factor 1": "Description to factor 1",
+            "Name of factor 2": "Description to factor 2"
+        },
+        "models": {
+            "Name of model 1": "Description to model 1",
+            "Name of model 2": "Description to model 2"
+        }
+    }
+    ```
+
+extract_factors_follow_user: |-
+    Please continue extracting the factors. Please ignore factors appeared in former messages. If no factor is found, please return an empty dict.
+    Notice: You should not miss any factor in the report! Some factors might appear several times in the report. You can repeat them to avoid missing other factors.
+    Respond with your analysis in JSON format. The JSON schema should include:
+    ```json
+    {
+        "factors": {
+            "Name of factor 1": "Description to factor 1",
+            "Name of factor 2": "Description to factor 2"
+        }
+    }
+    ```
+
+extract_factor_formulation_system: |-
+    用户会提供一篇金融工程研报，和用户从中提取到的因子列表，请结合文章和用户提供的因子名称和因子描述，按照要求抽取：
+    1. 因子的计算公式，使用latex格式，公式中的变量名称不能包含空格，可用下划线连接，公式中的因子名称与用户提供的因子名称保持一致;
+    2. 因子公式中的变量和函数解释，请使用英文描述，变量名和函数名请与公式中的名称对齐
+
+    User has several source data:
+    1. The Stock Trade Data Table containing information about stock trades, such as daily open, close, high, low, vwap prices, volume, and turnover;
+    2. The Financial Data Table containing company financial statements such as the balance sheet, income statement, and cash flow statement;
+    3. The Stock Fundamental Data Table containing basic information about stocks, like total shares outstanding, free float shares, industry classification, market classification, etc;
+    4. The high frequency data containing price and volume of each stock containing open close high low volume vwap in each minute.
+    Please try to expand the formulation to using the source data provided by user.
+
+    user will treat your factor name as key to store the factor, don't put any interaction message in the content. Just response the output without any interaction and explanation.
+    You can extract part of the user's input factors if token is not enough. To avoid the situation that you don't respond in the valid format, don't extract more than thirty factors in one response.
+    Be caution of the "\" in your formulation because In JSON, certain characters like the backslash need to be escaped with another backslash. Especially, _ and \_ are different in latex so use \_ to represent _ in latex.
+    Respond with your analysis in JSON format. The JSON schema should include:
+    ```json
+    {
+        "name of factor 1": {
+            "formulation": "latex formulation of factor 1",
+            "variables": {
+                "Name to variable or function 1": "Description to variable or function 1",
+                "Name to variable or function 2": "Description to variable or function 2"
+            }
+        },
+        "name of factor 2": {
+            "formulation": "latex formulation of factor 2",
+            "variables": {
+                "Name to variable or function 1": "Description to variable or function 1",
+                "Name to variable or function 2": "Description to variable or function 2"
+            }
+        }
+    }
+    ```
+
+extract_factor_formulation_user: |-
+    ===========================Report content:=============================
+    {{ report_content }}
+    ===========================Factor list in dataframe=============================
+    {{ factor_dict }}
+
+classify_system: |-
+    你是一个研报分类助手。用户会输入一篇金融研报。请按照要求回答：
+    因子指能够解释资产收益率或价格等的变量；而模型则指机器学习或深度学习模型，利用因子等变量来预测价格或收益率变化。
+
+    请你对研报进行分类，考虑两个条件：
+        1. 是金工量化领域中选股（需与择时，选基等严格区分开）方面的研报;
+        2. 涉及了因子或模型的构成，或者是测试了它们的表现。
+    如果研报同时满足上述两个条件，请输出1；若没有，请输出0。
+
+    请使用json进行回答。json key为：class
+
+factor_viability_system: |-
+    User has designed several factors in quant investment. Please help the user to check the viability of these factors.
+    These factors are used to build a daily frequency strategy in China A-share market.
+
+    User will provide a pandas dataframe like table containing following information:
+    1. The name of the factor;
+    2. The simple description of the factor;
+    3. The formulation of the factor in latex format;
+    4. The description to the variables and functions in the formulation of the factor.
+
+    User has several source data:
+    1. The Stock Trade Data Table containing information about stock trades, such as daily open, close, high, low, vwap prices, volume, and turnover;
+    2. The Financial Data Table containing company financial statements such as the balance sheet, income statement, and cash flow statement;
+    3. The Stock Fundamental Data Table containing basic information about stocks, like total shares outstanding, free float shares, industry classification, market classification, etc;
+    4. The high frequency data containing price and volume of each stock containing open close high low volume vwap in each minute;
+    5. The Consensus Expectations Factor containing the consensus expectations of the analysts about the future performance of the company.
+
+
+    A viable factor should satisfy the following conditions:
+    1. The factor should be able to be calculated in daily frequency;
+    2. The factor should be able to be calculated based on each stock;
+    3. The factor should be able to be calculated based on the source data provided by user.
+
+    You should give decision to each factor provided by the user. You should reject the factor based on very solid reason.
+    Please return true to the viable factor and false to the non-viable factor.
+
+    Notice, you can just return part of the factors due to token limit. Your factor name should be the same as the user's factor name.
+
+    Please respond with your decision in JSON format. Just respond the output json string without any interaction and explanation.
+    The JSON schema should include:
+    ```json
+    {
+        "Name to factor 1":
+        {
+            "viability": true,
+            "reason": "The reason to the viability of this factor"
+        },
+        "Name to factor 2":
+        {
+            "viability": false,
+            "reason": "The reason to the non-viability of this factor"
+        }
+        "Name to factor 3":
+        {
+            "viability": true,
+            "reason": "The reason to the viability of this factor"
+        }
+    }
+    ```
+
+
+factor_duplicate_system: |-
+    User has designed several factors in quant investment. Please help the user to duplicate these factors.
+    These factors are used to build a daily frequency strategy in China A-share market.
+
+    User will provide a pandas dataframe like table containing following information:
+    1. The name of the factor;
+    2. The simple description of the factor;
+    3. The formulation of the factor in latex format;
+    4. The description to the variables and functions in the formulation of the factor.
+
+    User wants to find whether there are duplicated groups. The factors in a duplicate group should satisfy the following conditions:
+    1. They might differ in the name, description, formulation, or the description to the variables and functions in the formulation, some upper or lower case difference is included;
+    2. They should be talking about exactly the same factor;
+    3. If horizon information like 1 day, 5 days, 10 days, etc is provided, the horizon information should be the same.
+
+    To make your response valid, we have some very important constraint for you to follow! Listed here:
+    1. You should be very confident to put duplicated factors into a group;
+    2. A group should contain at least two factors;
+    3. To a factor which has no duplication, don't put them into your response;
+    4. To avoid merging too many similar factor, don't put more than ten factors into a group!
+    You should always follow the above constraints to make your response valid. 
+
+    Your response JSON schema should include:
+    ```json
+    [
+        [
+            "factor name 1",
+            "factor name 2"
+        ],
+        [
+            "factor name 5",
+            "factor name 6"
+        ],
+        [
+            "factor name 7",
+            "factor name 8",
+            "factor name 9"
+        ]
+    ]
+    ```
+    Your response is a list of lists. Each list represents a duplicate group containing all the factor names in this group. 
+    The factor names in the list should be unique and the factor names should be the same as the user's factor name.
+    To avoid reaching token limit, don't respond more than fifty groups in one response. You should respond the output json string without any interaction and explanation.
\ No newline at end of file
diff --git a/rdagent/factor_implementation/evolving/evaluators.py b/rdagent/factor_implementation/evolving/evaluators.py
index e7e17bf9..930a9fee 100644
--- a/rdagent/factor_implementation/evolving/evaluators.py
+++ b/rdagent/factor_implementation/evolving/evaluators.py
@@ -3,20 +3,20 @@
 import re
 from typing import List
 
-from core.evolving_framework import Evaluator as EvolvingEvaluator
-from core.evolving_framework import Feedback, QueriedKnowledge
-from core.log import FinCoLog
-from core.utils import multiprocessing_wrapper
-from factor_implementation.evolving.evolvable_subjects import FactorImplementationList
-from factor_implementation.share_modules.evaluator import (
+from rdagent.core.evolving_framework import Evaluator as EvolvingEvaluator
+from rdagent.core.evolving_framework import Feedback, QueriedKnowledge
+from rdagent.core.log import FinCoLog
+from rdagent.core.utils import multiprocessing_wrapper
+from rdagent.factor_implementation.evolving.evolvable_subjects import FactorImplementationList
+from rdagent.factor_implementation.share_modules.evaluator import (
     Evaluator as FactorImplementationEvaluator,
 )
-from factor_implementation.share_modules.evaluator import (
+from rdagent.factor_implementation.share_modules.evaluator import (
     FactorImplementationCodeEvaluator,
     FactorImplementationFinalDecisionEvaluator,
     FactorImplementationValueEvaluator,
 )
-from factor_implementation.share_modules.factor import (
+from rdagent.factor_implementation.share_modules.factor import (
     FactorImplementation,
     FactorImplementationTask,
 )
diff --git a/rdagent/factor_implementation/evolving/evolvable_subjects.py b/rdagent/factor_implementation/evolving/evolvable_subjects.py
index 43e91c0e..5c5ab22d 100644
--- a/rdagent/factor_implementation/evolving/evolvable_subjects.py
+++ b/rdagent/factor_implementation/evolving/evolvable_subjects.py
@@ -1,8 +1,8 @@
 from __future__ import annotations
 
-from core.evolving_framework import EvolvableSubjects
-from core.log import FinCoLog
-from factor_implementation.share_modules.factor import (
+from rdagent.core.evolving_framework import EvolvableSubjects
+from rdagent.core.log import FinCoLog
+from rdagent.factor_implementation.share_modules.factor import (
     FactorImplementation,
     FactorImplementationTask,
 )
diff --git a/rdagent/factor_implementation/evolving/evolving_strategy.py b/rdagent/factor_implementation/evolving/evolving_strategy.py
index 1f2cf4b2..7a1c19ed 100644
--- a/rdagent/factor_implementation/evolving/evolving_strategy.py
+++ b/rdagent/factor_implementation/evolving/evolving_strategy.py
@@ -1,21 +1,22 @@
 from __future__ import annotations
 
 import json
+from pathlib import Path
 import random
 from abc import abstractmethod
 from copy import deepcopy
 from typing import TYPE_CHECKING
 
-from core.evolving_framework import EvolvingStrategy, QueriedKnowledge
-from core.utils import multiprocessing_wrapper
-from factor_implementation.share_modules.factor import (
+from rdagent.core.evolving_framework import EvolvingStrategy, QueriedKnowledge
+from rdagent.core.utils import multiprocessing_wrapper
+from rdagent.factor_implementation.share_modules.factor import (
     FactorImplementation,
     FactorImplementationTask,
     FileBasedFactorImplementation,
 )
-from factor_implementation.share_modules.prompt import FactorImplementationPrompts
+from rdagent.core.prompts import Prompts
 from jinja2 import Template
-from oai.llm_utils import APIBackend
+from rdagent.oai.llm_utils import APIBackend
 
 from rdagent.factor_implementation.share_modules.factor_implementation_config import (
     FactorImplementSettings,
@@ -117,7 +118,9 @@ def implement_one_factor(
             queried_former_failed_knowledge_to_render = queried_former_failed_knowledge
 
             system_prompt = Template(
-                FactorImplementationPrompts()["evolving_strategy_factor_implementation_v1_system"],
+                Prompts(file_path=Path(__file__).parent.parent / "prompts.yaml")[
+                    "evolving_strategy_factor_implementation_v1_system"
+                ],
             ).render(
                 data_info=get_data_folder_intro(),
                 queried_former_failed_knowledge=queried_former_failed_knowledge_to_render,
@@ -130,7 +133,9 @@ def implement_one_factor(
             while True:
                 user_prompt = (
                     Template(
-                        FactorImplementationPrompts()["evolving_strategy_factor_implementation_v1_user"],
+                        Prompts(file_path=Path(__file__).parent.parent / "prompts.yaml")[
+                            "evolving_strategy_factor_implementation_v1_user"
+                        ],
                     )
                     .render(
                         factor_information_str=factor_information_str,
@@ -204,7 +209,9 @@ def implement_one_factor(
             queried_former_failed_knowledge_to_render = queried_former_failed_knowledge
 
             system_prompt = Template(
-                FactorImplementationPrompts()["evolving_strategy_factor_implementation_v1_system"],
+                Prompts(file_path=Path(__file__).parent.parent / "prompts.yaml")[
+                    "evolving_strategy_factor_implementation_v1_system"
+                ],
             ).render(
                 data_info=get_data_folder_intro(),
                 queried_former_failed_knowledge=queried_former_failed_knowledge_to_render,
@@ -224,7 +231,11 @@ def implement_one_factor(
                     and len(queried_former_failed_knowledge_to_render) != 0
                 ):
                     error_summary_system_prompt = (
-                        Template(FactorImplementationPrompts()["evolving_strategy_error_summary_v2_system"])
+                        Template(
+                            Prompts(file_path=Path(__file__).parent.parent / "prompts.yaml")[
+                                "evolving_strategy_error_summary_v2_system"
+                            ]
+                        )
                         .render(
                             factor_information_str=target_factor_task_information,
                             code_and_feedback=queried_former_failed_knowledge_to_render[
@@ -238,7 +249,11 @@ def implement_one_factor(
                     )
                     while True:
                         error_summary_user_prompt = (
-                            Template(FactorImplementationPrompts()["evolving_strategy_error_summary_v2_user"])
+                            Template(
+                                Prompts(file_path=Path(__file__).parent.parent / "prompts.yaml")[
+                                    "evolving_strategy_error_summary_v2_user"
+                                ]
+                            )
                             .render(
                                 queried_similar_component_knowledge=queried_similar_component_knowledge_to_render,
                             )
@@ -258,7 +273,9 @@ def implement_one_factor(
 
                 user_prompt = (
                     Template(
-                        FactorImplementationPrompts()["evolving_strategy_factor_implementation_v2_user"],
+                        Prompts(file_path=Path(__file__).parent.parent / "prompts.yaml")[
+                            "evolving_strategy_factor_implementation_v2_user"
+                        ],
                     )
                     .render(
                         factor_information_str=target_factor_task_information,
diff --git a/rdagent/factor_implementation/evolving/factor_implementation_evolving_cli.py b/rdagent/factor_implementation/evolving/factor_implementation_evolving_cli.py
index a8946e87..2b410eab 100644
--- a/rdagent/factor_implementation/evolving/factor_implementation_evolving_cli.py
+++ b/rdagent/factor_implementation/evolving/factor_implementation_evolving_cli.py
@@ -4,24 +4,24 @@
 from pathlib import Path
 
 import pandas as pd
-from core.evolving_framework import EvoAgent, KnowledgeBase
-from core.utils import multiprocessing_wrapper
-from factor_implementation.evolving.evaluators import (
+from rdagent.core.evolving_framework import EvoAgent, KnowledgeBase
+from rdagent.core.utils import multiprocessing_wrapper
+from rdagent.factor_implementation.evolving.evaluators import (
     FactorImplementationEvaluatorV1,
     FactorImplementationsMultiEvaluator,
 )
-from factor_implementation.evolving.evolvable_subjects import FactorImplementationList
-from factor_implementation.evolving.evolving_strategy import (
+from rdagent.factor_implementation.evolving.evolvable_subjects import FactorImplementationList
+from rdagent.factor_implementation.evolving.evolving_strategy import (
     FactorEvolvingStrategy,
     FactorEvolvingStrategyWithGraph,
 )
-from factor_implementation.evolving.knowledge_management import (
+from rdagent.factor_implementation.evolving.knowledge_management import (
     FactorImplementationGraphKnowledgeBase,
     FactorImplementationGraphRAGStrategy,
     FactorImplementationKnowledgeBaseV1,
     FactorImplementationRAGStrategyV1,
 )
-from factor_implementation.share_modules.factor import (
+from rdagent.factor_implementation.share_modules.factor import (
     FactorImplementationTask,
     FileBasedFactorImplementation,
 )
diff --git a/rdagent/factor_implementation/evolving/knowledge_management.py b/rdagent/factor_implementation/evolving/knowledge_management.py
index b20e7920..d79f5a81 100644
--- a/rdagent/factor_implementation/evolving/knowledge_management.py
+++ b/rdagent/factor_implementation/evolving/knowledge_management.py
@@ -8,7 +8,7 @@
 from pathlib import Path
 from typing import Union
 
-from core.evolving_framework import (
+from rdagent.core.evolving_framework import (
     EvolvableSubjects,
     EvoStep,
     Knowledge,
@@ -16,16 +16,16 @@
     QueriedKnowledge,
     RAGStrategy,
 )
-from core.log import FinCoLog
-from factor_implementation.evolving.evaluators import FactorImplementationSingleFeedback
-from factor_implementation.share_modules.factor import (
+from rdagent.core.log import FinCoLog
+from rdagent.factor_implementation.evolving.evaluators import FactorImplementationSingleFeedback
+from rdagent.factor_implementation.share_modules.factor import (
     FactorImplementation,
     FactorImplementationTask,
 )
-from factor_implementation.share_modules.prompt import FactorImplementationPrompts
-from finco.graph import UndirectedGraph, UndirectedNode
+from rdagent.core.prompts import Prompts
+from rdagent.knowledge_management.graph import UndirectedGraph, UndirectedNode
 from jinja2 import Template
-from oai.llm_utils import APIBackend, calculate_embedding_distance_between_str_list
+from rdagent.oai.llm_utils import APIBackend, calculate_embedding_distance_between_str_list
 
 from rdagent.factor_implementation.share_modules.factor_implementation_config import (
     FactorImplementSettings,
@@ -145,9 +145,9 @@ def query(
         for target_factor_task in evo.target_factor_tasks:
             target_factor_task_information = target_factor_task.get_factor_information()
             if target_factor_task_information in self.knowledgebase.success_task_info_set:
-                queried_knowledge.success_task_to_knowledge_dict[
-                    target_factor_task_information
-                ] = self.knowledgebase.implementation_trace[target_factor_task_information][-1]
+                queried_knowledge.success_task_to_knowledge_dict[target_factor_task_information] = (
+                    self.knowledgebase.implementation_trace[target_factor_task_information][-1]
+                )
             elif (
                 len(
                     self.knowledgebase.implementation_trace.setdefault(
@@ -159,14 +159,12 @@ def query(
             ):
                 queried_knowledge.failed_task_info_set.add(target_factor_task_information)
             else:
-                queried_knowledge.working_task_to_former_failed_knowledge_dict[
-                    target_factor_task_information
-                ] = self.knowledgebase.implementation_trace.setdefault(
-                    target_factor_task_information,
-                    [],
-                )[
-                    -v1_query_former_trace_limit:
-                ]
+                queried_knowledge.working_task_to_former_failed_knowledge_dict[target_factor_task_information] = (
+                    self.knowledgebase.implementation_trace.setdefault(
+                        target_factor_task_information,
+                        [],
+                    )[-v1_query_former_trace_limit:]
+                )
 
                 knowledge_base_success_task_list = list(
                     self.knowledgebase.success_task_info_set,
@@ -187,9 +185,9 @@ def query(
                     )[-1]
                     for index in similar_indexes
                 ]
-                queried_knowledge.working_task_to_similar_successful_knowledge_dict[
-                    target_factor_task_information
-                ] = similar_successful_knowledge
+                queried_knowledge.working_task_to_similar_successful_knowledge_dict[target_factor_task_information] = (
+                    similar_successful_knowledge
+                )
         return queried_knowledge
 
 
@@ -212,7 +210,7 @@ class FactorImplementationGraphRAGStrategy(RAGStrategy):
     def __init__(self, knowledgebase: FactorImplementationGraphKnowledgeBase) -> None:
         super().__init__(knowledgebase)
         self.current_generated_trace_count = 0
-        self.prompt = FactorImplementationPrompts()
+        self.prompt = Prompts(file_path=Path(__file__).parent.parent / "prompts.yaml")
 
     def generate_knowledge(
         self,
@@ -417,9 +415,9 @@ def former_trace_query(
                     else:
                         current_index += 1
 
-                factor_implementation_queried_graph_knowledge.former_traces[
-                    target_factor_task_information
-                ] = former_trace_knowledge[-v2_query_former_trace_limit:]
+                factor_implementation_queried_graph_knowledge.former_traces[target_factor_task_information] = (
+                    former_trace_knowledge[-v2_query_former_trace_limit:]
+                )
             else:
                 factor_implementation_queried_graph_knowledge.former_traces[target_factor_task_information] = []
 
diff --git a/rdagent/factor_implementation/share_modules/prompts.yaml b/rdagent/factor_implementation/prompts.yaml
similarity index 100%
rename from rdagent/factor_implementation/share_modules/prompts.yaml
rename to rdagent/factor_implementation/prompts.yaml
diff --git a/rdagent/oai/llm_utils.py b/rdagent/oai/llm_utils.py
index 8e0c69e9..71d7d859 100644
--- a/rdagent/oai/llm_utils.py
+++ b/rdagent/oai/llm_utils.py
@@ -31,6 +31,11 @@ def md5_hash(input_string):
     return hashed_string
 
 
+try:
+    from azure.identity import DefaultAzureCredential, get_bearer_token_provider
+except ImportError:
+    FinCoLog().warning("azure.identity is not installed.")
+
 try:
     import openai
 except ImportError:
@@ -285,6 +290,7 @@ def __init__(
             self.encoder = None
         else:
             self.use_azure = self.cfg.use_azure
+            self.use_azure_token_provider = self.cfg.use_azure_token_provider
 
             self.chat_api_key = self.cfg.chat_openai_api_key if chat_api_key is None else chat_api_key
             self.chat_model = self.cfg.chat_model if chat_model is None else chat_model
@@ -306,16 +312,32 @@ def __init__(
             )
 
             if self.use_azure:
-                self.chat_client = openai.AzureOpenAI(
-                    api_key=self.chat_api_key,
-                    api_version=self.chat_api_version,
-                    azure_endpoint=self.chat_api_base,
-                )
-                self.embedding_client = openai.AzureOpenAI(
-                    api_key=self.embedding_api_key,
-                    api_version=self.embedding_api_version,
-                    azure_endpoint=self.embedding_api_base,
-                )
+                if self.use_azure_token_provider:
+                    credential = DefaultAzureCredential()
+                    token_provider = get_bearer_token_provider(
+                        credential, "https://cognitiveservices.azure.com/.default"
+                    )
+                    self.chat_client = openai.AzureOpenAI(
+                        azure_ad_token_provider=token_provider,
+                        api_version=self.chat_api_version,
+                        azure_endpoint=self.chat_api_base,
+                    )
+                    self.embedding_client = openai.AzureOpenAI(
+                        azure_ad_token_provider=token_provider,
+                        api_version=self.embedding_api_version,
+                        azure_endpoint=self.embedding_api_base,
+                    )
+                else:
+                    self.chat_client = openai.AzureOpenAI(
+                        api_key=self.chat_api_key,
+                        api_version=self.chat_api_version,
+                        azure_endpoint=self.chat_api_base,
+                    )
+                    self.embedding_client = openai.AzureOpenAI(
+                        api_key=self.embedding_api_key,
+                        api_version=self.embedding_api_version,
+                        azure_endpoint=self.embedding_api_base,
+                    )
             else:
                 self.chat_client = openai.OpenAI(api_key=self.chat_api_key)
                 self.embedding_client = openai.OpenAI(api_key=self.embedding_api_key)
@@ -328,7 +350,7 @@ def __init__(
         self.use_embedding_cache = self.cfg.use_embedding_cache if use_embedding_cache is None else use_embedding_cache
         if self.dump_chat_cache or self.use_chat_cache or self.dump_embedding_cache or self.use_embedding_cache:
             self.cache_file_location = self.cfg.prompt_cache_path
-            self.cache = SQliteLazyCache(self.cache_file_location)
+            self.cache = SQliteLazyCache(cache_location=self.cache_file_location)
 
         # transfer the config to the class if the config is not supposed to change during the runtime
         self.use_llama2 = self.cfg.use_llama2
diff --git a/requirements.txt b/requirements.txt
index 9b68b3cc..13c937bf 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -24,3 +24,6 @@ azure-ai-formrecognizer
 
 # factor implementations
 tables
+
+# azure identity related
+azure.identity
\ No newline at end of file
diff --git a/test/oai/test_embedding.py b/test/oai/test_embedding.py
deleted file mode 100644
index e69de29b..00000000
diff --git a/test/oai/test_embedding_and_similarity.py b/test/oai/test_embedding_and_similarity.py
new file mode 100644
index 00000000..8e426d4d
--- /dev/null
+++ b/test/oai/test_embedding_and_similarity.py
@@ -0,0 +1,25 @@
+import pickle
+import unittest
+from pathlib import Path
+import json
+import random
+
+from rdagent.oai.llm_utils import APIBackend, calculate_embedding_distance_between_str_list
+
+
+class TestEmbedding(unittest.TestCase):
+    def test_embedding(self):
+        emb = APIBackend().create_embedding("hello")
+        assert emb is not None
+        assert type(emb) == list
+        assert len(emb) > 0
+
+    def test_embedding_similarity(self):
+        similarity = calculate_embedding_distance_between_str_list(["Hello"], ["Hi"])[0][0]
+        assert similarity is not None
+        assert type(similarity) == float
+        assert similarity >= 0.8
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/test/oai/test_embedding_similarity.py b/test/oai/test_embedding_similarity.py
deleted file mode 100644
index e69de29b..00000000

From 268e7188158ebd9ce9bc256c0a7e8466339f30ee Mon Sep 17 00:00:00 2001
From: xuyang1 <xuyang1@microsoft.com>
Date: Wed, 5 Jun 2024 07:34:15 +0000
Subject: [PATCH 3/3] remove some redundant code

---
 .../factor_extract_and_implement.py           |   8 +-
 rdagent/document_process/document_analysis.py | 116 +++---------------
 2 files changed, 18 insertions(+), 106 deletions(-)

diff --git a/rdagent/app/factor_extraction_and_implementation/factor_extract_and_implement.py b/rdagent/app/factor_extraction_and_implementation/factor_extract_and_implement.py
index 8277c36e..0de00127 100644
--- a/rdagent/app/factor_extraction_and_implementation/factor_extract_and_implement.py
+++ b/rdagent/app/factor_extraction_and_implementation/factor_extract_and_implement.py
@@ -3,8 +3,8 @@
 from pathlib import Path
 
 from rdagent.document_process.document_analysis import (
-    filter_factor_by_viability,
-    deduplicate_factors_several_times,
+    check_factor_viability,
+    deduplicate_factors_by_llm,
     extract_factors_from_report_dict,
     merge_file_to_factor_dict_to_factor_dict,
 )
@@ -21,9 +21,9 @@ def extract_factors_and_implement(report_file_path: str):
     file_to_factor_result = extract_factors_from_report_dict(docs_dict, selected_report_dict)
     factor_dict = merge_file_to_factor_dict_to_factor_dict(file_to_factor_result)
 
-    factor_dict_viable, factor_viability = filter_factor_by_viability(factor_dict)
+    factor_viability = check_factor_viability(factor_dict)
 
-    factor_dict, duplication_names_list = deduplicate_factors_several_times(factor_dict, factor_viability)
+    factor_dict, duplication_names_list = deduplicate_factors_by_llm(factor_dict, factor_viability)
 
 
 if __name__ == "__main__":
diff --git a/rdagent/document_process/document_analysis.py b/rdagent/document_process/document_analysis.py
index 97e85683..dc390a09 100644
--- a/rdagent/document_process/document_analysis.py
+++ b/rdagent/document_process/document_analysis.py
@@ -29,94 +29,6 @@
 document_process_prompts = Prompts(file_path=Path(__file__).parent / "prompts.yaml")
 
 
-def load_documents_by_langchain(path: Path) -> list:
-    """Load documents from the specified path.
-
-    Args:
-        path (str): The path to the directory or file containing the documents.
-
-    Returns:
-        list: A list of loaded documents.
-    """
-    loader = PyPDFDirectoryLoader(str(path), silent_errors=True) if path.is_dir() else PyPDFLoader(str(path))
-    return loader.load()
-
-
-def process_documents_by_langchain(docs: list[Document]) -> dict[str, str]:
-    """Process a list of documents and group them by document name.
-
-    Args:
-        docs (list): A list of documents.
-
-    Returns:
-        dict: A dictionary where the keys are document names and the values are
-        the concatenated content of the documents.
-    """
-    content_dict = {}
-
-    for doc in docs:
-        doc_name = str(Path(doc.metadata["source"]).resolve())
-        doc_content = doc.page_content
-
-        if doc_name not in content_dict:
-            content_dict[str(doc_name)] = doc_content
-        else:
-            content_dict[str(doc_name)] += doc_content
-
-    return content_dict
-
-
-def load_and_process_pdfs_by_langchain(path: Path) -> dict[str, str]:
-    return process_documents_by_langchain(load_documents_by_langchain(path))
-
-
-def load_and_process_one_pdf_by_azure_document_intelligence(
-    path: Path,
-    key: str,
-    endpoint: str,
-) -> str:
-    pages = len(PyPDFLoader(str(path)).load())
-    document_analysis_client = DocumentAnalysisClient(
-        endpoint=endpoint,
-        credential=AzureKeyCredential(key),
-    )
-
-    with path.open("rb") as file:
-        result = document_analysis_client.begin_analyze_document(
-            "prebuilt-document",
-            file,
-            pages=f"1-{pages}",
-        ).result()
-    return result.content
-
-
-def load_and_process_pdfs_by_azure_document_intelligence(path: Path) -> dict[str, str]:
-    config = Config()
-
-    assert config.azure_document_intelligence_key is not None
-    assert config.azure_document_intelligence_endpoint is not None
-
-    content_dict = {}
-    ab_path = path.resolve()
-    if ab_path.is_file():
-        assert ".pdf" in ab_path.suffixes, "The file must be a PDF file."
-        proc = load_and_process_one_pdf_by_azure_document_intelligence
-        content_dict[str(ab_path)] = proc(
-            ab_path,
-            config.azure_document_intelligence_key,
-            config.azure_document_intelligence_endpoint,
-        )
-    else:
-        for file_path in ab_path.rglob("*"):
-            if file_path.is_file() and ".pdf" in file_path.suffixes:
-                content_dict[str(file_path)] = load_and_process_one_pdf_by_azure_document_intelligence(
-                    file_path,
-                    config.azure_document_intelligence_key,
-                    config.azure_document_intelligence_endpoint,
-                )
-    return content_dict
-
-
 def classify_report_from_dict(
     report_dict: Mapping[str, str],
     input_max_token: int = 128000,
@@ -411,7 +323,7 @@ def __check_factor_dict_viability_simulate_json_mode(
     return {}
 
 
-def filter_factor_by_viability(
+def check_factor_viability(
     factor_dict: dict[str, dict[str, str]],
 ) -> tuple[dict[str, dict[str, str]], dict[str, dict[str, str]]]:
     factor_viability_dict = {}
@@ -443,16 +355,16 @@ def filter_factor_by_viability(
 
         factor_df = factor_df[~factor_df.index.isin(factor_viability_dict)]
 
-    filtered_factor_dict = {
-        factor_name: factor_dict[factor_name]
-        for factor_name in factor_dict
-        if factor_viability_dict[factor_name]["viability"]
-    }
+    # filtered_factor_dict = {
+    #     factor_name: factor_dict[factor_name]
+    #     for factor_name in factor_dict
+    #     if factor_viability_dict[factor_name]["viability"]
+    # }
 
-    return filtered_factor_dict, factor_viability_dict
+    return factor_viability_dict
 
 
-def check_factor_duplication_simulate_json_mode(
+def __check_factor_duplication_simulate_json_mode(
     factor_df: pd.DataFrame,
 ) -> list[list[str]]:
     session = APIBackend().build_chat_session(
@@ -491,7 +403,7 @@ def check_factor_duplication_simulate_json_mode(
     return generated_duplicated_groups
 
 
-def kmeans_embeddings(embeddings: np.ndarray, k: int = 20) -> list[list[str]]:
+def __kmeans_embeddings(embeddings: np.ndarray, k: int = 20) -> list[list[str]]:
     x_normalized = normalize(embeddings)
 
     kmeans = KMeans(
@@ -545,7 +457,7 @@ def find_closest_cluster_cosine_similarity(
     )
 
 
-def deduplicate_factor_dict(factor_dict: dict[str, dict[str, str]]) -> list[list[str]]:
+def __deduplicate_factor_dict(factor_dict: dict[str, dict[str, str]]) -> list[list[str]]:
     factor_df = pd.DataFrame(factor_dict).T
     factor_df.index.names = ["factor_name"]
 
@@ -576,7 +488,7 @@ def deduplicate_factor_dict(factor_dict: dict[str, dict[str, str]]) -> list[list
             len(full_str_list) // Config().max_input_duplicate_factor_group,
             30,
         ):
-            kmeans_index_group = kmeans_embeddings(embeddings=embeddings, k=k)
+            kmeans_index_group = __kmeans_embeddings(embeddings=embeddings, k=k)
             if len(kmeans_index_group[0]) < Config().max_input_duplicate_factor_group:
                 target_k = k
                 FinCoLog().info(f"K-means group number: {k}")
@@ -589,7 +501,7 @@ def deduplicate_factor_dict(factor_dict: dict[str, dict[str, str]]) -> list[list
     result_list = []
     result_list = [
         pool.apply_async(
-            check_factor_duplication_simulate_json_mode,
+            __check_factor_duplication_simulate_json_mode,
             (factor_df.loc[factor_name_group, :],),
         )
         for factor_name_group in factor_name_groups
@@ -610,14 +522,14 @@ def deduplicate_factor_dict(factor_dict: dict[str, dict[str, str]]) -> list[list
     return duplication_names_list
 
 
-def deduplicate_factors_several_times(
+def deduplicate_factors_by_llm(
     factor_dict: dict[str, dict[str, str]],
     factor_viability_dict: dict[str, dict[str, str]] = None,
 ) -> list[list[str]]:
     final_duplication_names_list = []
     current_round_factor_dict = factor_dict
     for _ in range(10):
-        duplication_names_list = deduplicate_factor_dict(current_round_factor_dict)
+        duplication_names_list = __deduplicate_factor_dict(current_round_factor_dict)
 
         new_round_names = []
         for duplication_names in duplication_names_list: