From a7123650638f1725a0bd47666491580502e3f5de Mon Sep 17 00:00:00 2001 From: Xu Yang Date: Sat, 3 Aug 2024 02:07:08 +0000 Subject: [PATCH] use mp to execute all feature and fix a small bug in config --- rdagent/app/qlib_rd_loop/factor_from_report_sh.py | 3 ++- .../components/coder/factor_coder/CoSTEER/__init__.py | 2 ++ rdagent/components/coder/factor_coder/config.py | 2 +- rdagent/components/coder/model_coder/conf.py | 2 +- rdagent/scenarios/qlib/developer/factor_runner.py | 11 ++++++++--- 5 files changed, 14 insertions(+), 6 deletions(-) diff --git a/rdagent/app/qlib_rd_loop/factor_from_report_sh.py b/rdagent/app/qlib_rd_loop/factor_from_report_sh.py index a36d00c0..f8289c52 100644 --- a/rdagent/app/qlib_rd_loop/factor_from_report_sh.py +++ b/rdagent/app/qlib_rd_loop/factor_from_report_sh.py @@ -114,6 +114,7 @@ def propose_hypo_exp(self, prev_out: dict[str, Any]): if self.pdf_file_index > 100: break report_file_path = self.judge_pdf_data_items[self.pdf_file_index] + logger.info(f"Processing number {self.pdf_file_index} report: {report_file_path}") self.pdf_file_index += 1 exp, hypothesis = extract_hypothesis_and_exp_from_reports(str(report_file_path)) if exp is None: @@ -153,7 +154,7 @@ def main(path=None, step_n=None): .. code-block:: python - dotenv run -- python rdagent/app/qlib_rd_loop/factor_from_report_sh.py $LOG_PATH/__session__/1/0_propose --step_n 1 # `step_n` is a optional paramter + dotenv run -- python rdagent/app/qlib_rd_loop/factor_from_report_sh.py $LOG_PATH/__session__/1/0_propose --step_n 1 # `step_n` is a optional parameter """ if path is None: diff --git a/rdagent/components/coder/factor_coder/CoSTEER/__init__.py b/rdagent/components/coder/factor_coder/CoSTEER/__init__.py index 2155be49..58bb8c39 100644 --- a/rdagent/components/coder/factor_coder/CoSTEER/__init__.py +++ b/rdagent/components/coder/factor_coder/CoSTEER/__init__.py @@ -24,6 +24,7 @@ from rdagent.core.developer import Developer from rdagent.core.evolving_agent import RAGEvoAgent from rdagent.core.scenario import Scenario +from rdagent.log import rdagent_logger as logger class FactorCoSTEER(Developer[FactorExperiment]): @@ -107,5 +108,6 @@ def develop(self, exp: FactorExperiment) -> FactorExperiment: # save new knowledge base if self.new_knowledge_base_path is not None: pickle.dump(factor_knowledge_base, open(self.new_knowledge_base_path, "wb")) + logger.info(f"New knowledge base saved to {self.new_knowledge_base_path}") exp.sub_workspace_list = factor_experiment.sub_workspace_list return exp diff --git a/rdagent/components/coder/factor_coder/config.py b/rdagent/components/coder/factor_coder/config.py index 053936bf..9a64ff70 100644 --- a/rdagent/components/coder/factor_coder/config.py +++ b/rdagent/components/coder/factor_coder/config.py @@ -44,7 +44,7 @@ class Config: max_loop: int = 10 knowledge_base_path: Union[str, None] = None - new_knowledge_base_path: Union[str, None] = knowledge_base_path + new_knowledge_base_path: Union[str, None] = None python_bin: str = "python" diff --git a/rdagent/components/coder/model_coder/conf.py b/rdagent/components/coder/model_coder/conf.py index af4c07b9..ea45041b 100644 --- a/rdagent/components/coder/model_coder/conf.py +++ b/rdagent/components/coder/model_coder/conf.py @@ -15,7 +15,7 @@ class Config: ) knowledge_base_path: Union[str, None] = None - new_knowledge_base_path: Union[str, None] = knowledge_base_path + new_knowledge_base_path: Union[str, None] = None max_loop: int = 10 diff --git a/rdagent/scenarios/qlib/developer/factor_runner.py b/rdagent/scenarios/qlib/developer/factor_runner.py index c060e51f..9b893514 100644 --- a/rdagent/scenarios/qlib/developer/factor_runner.py +++ b/rdagent/scenarios/qlib/developer/factor_runner.py @@ -5,6 +5,9 @@ import pandas as pd from pandarallel import pandarallel +from rdagent.core.conf import RD_AGENT_SETTINGS +from rdagent.core.utils import multiprocessing_wrapper + pandarallel.initialize(verbose=1) from rdagent.components.runner import CachedRunner @@ -139,9 +142,11 @@ def process_factor_data(self, exp_or_list: List[QlibFactorExperiment] | QlibFact # Collect all exp's dataframes for exp in exp_or_list: # Iterate over sub-implementations and execute them to get each factor data - for implementation in exp.sub_workspace_list: - message, df = implementation.execute(data_type="All") - + message_and_df_list = multiprocessing_wrapper( + [(implementation.execute, (False, "All")) for implementation in exp.sub_workspace_list], + n=RD_AGENT_SETTINGS.multi_proc_n, + ) + for message, df in message_and_df_list: # Check if factor generation was successful if df is not None and "datetime" in df.index.names: time_diff = df.index.get_level_values("datetime").to_series().diff().dropna().unique()