diff --git a/.gitignore b/.gitignore index b2c38a81f..5972b5b43 100644 --- a/.gitignore +++ b/.gitignore @@ -153,3 +153,6 @@ git_ignore_folder/ # DB files *.db + +# Docker +env_factor/ \ No newline at end of file diff --git a/rdagent/app/qlib_rd_loop/conf.py b/rdagent/app/qlib_rd_loop/conf.py index 7ae8fc8de..d1b7d9cf3 100644 --- a/rdagent/app/qlib_rd_loop/conf.py +++ b/rdagent/app/qlib_rd_loop/conf.py @@ -21,7 +21,7 @@ class PropSetting(BaseSettings): qlib_model_runner: str = "" qlib_model_summarizer: str = "" - evolving_n: int = 10 + evolving_n: int = 1 # changed from 10 to 1 for debug PROP_SETTING = PropSetting() diff --git a/rdagent/app/qlib_rd_loop/factor.py b/rdagent/app/qlib_rd_loop/factor.py index 2a7905973..71efa5b92 100644 --- a/rdagent/app/qlib_rd_loop/factor.py +++ b/rdagent/app/qlib_rd_loop/factor.py @@ -29,11 +29,28 @@ trace = Trace(scen=scen) -for _ in range(PROP_SETTING.evolving_n): - hypothesis = hypothesis_gen.gen(trace) - exp = hypothesis2experiment.convert(hypothesis, trace) - exp = qlib_factor_coder.generate(exp) - exp = qlib_factor_runner.generate(exp) - feedback = qlib_factor_summarizer.generateFeedback(exp, hypothesis, trace) - - trace.hist.append((hypothesis, exp, feedback)) +hypothesis = hypothesis_gen.gen(trace) +# exp = hypothesis2experiment.convert(hypothesis, trace) +# exp = qlib_factor_coder.generate(exp) + + +import pickle +file_path = '/home/finco/v-yuanteli/RD-Agent/git_ignore_folder/factor_data_output/exp_new.pkl' +with open(file_path, 'rb') as file: + exp = pickle.load(file) +# with open('/home/finco/v-yuanteli/RD-Agent/git_ignore_folder/factor_data_output/exp_new.pkl', 'wb') as file: +# pickle.dump(exp, file) + +exp = qlib_factor_runner.generate(exp) +feedback = qlib_factor_summarizer.generateFeedback(exp, hypothesis, trace) + +# print(exp.based_experiments[-1]) + +# for _ in range(PROP_SETTING.evolving_n): +# hypothesis = hypothesis_gen.gen(trace) +# exp = hypothesis2experiment.convert(hypothesis, trace) +# exp = qlib_factor_coder.generate(exp) +# exp = qlib_factor_runner.generate(exp) +# feedback = qlib_factor_summarizer.generateFeedback(exp, hypothesis, trace) + +# trace.hist.append((hypothesis, exp, feedback)) diff --git a/rdagent/scenarios/qlib/prompts.yaml b/rdagent/scenarios/qlib/prompts.yaml index 6f48af232..8d7f4459a 100644 --- a/rdagent/scenarios/qlib/prompts.yaml +++ b/rdagent/scenarios/qlib/prompts.yaml @@ -32,3 +32,30 @@ experiment_output_format: |- } # Don't add ellipsis (...) or any filler text that might cause JSON parsing errors here! } + +data_feedback_generation: + system: |- + You are a professional result analysis assistant. You will receive a hypothesis, multiple tasks with their factors, and some results. + Your feedback should specify whether the current result supports or refutes the hypothesis, compare it with previous results, and suggest improvements or new directions. + Please provide detailed and constructive feedback. + Example JSON Structure for Result Analysis: + { + "Observations": "Your overall observations here", + "Feedback for Hypothesis": "Observations related to the hypothesis", + "New Hypothesis": "Put your new hypothesis here.", + "Reasoning": "Provide reasoning for the hypothesis here.", + "Replace Best Result": "yes or no" + } + user: |- + We are conducting an experiment to validate or reject hypotheses, aiming to generate a powerful factor. + Given the following hypothesis, tasks, factors, and current result, provide feedback on how well the result supports or refutes the hypothesis. + Hypothesis: {hypothesis_text}\n + Tasks and Factors:\n{task_details}\n + Current Result: {current_result}\n + SOTA Result: {sota_result}\n + Analyze the current result in the context of its ability to: + 1. Support or refute the hypothesis. + 2. Show improvement or deterioration compared to the last experiment. + 3. Demonstrate positive or negative effects when compared to Alpha158. + + Provide detailed feedback and recommend whether to replace the best result if the new factor proves superior. \ No newline at end of file diff --git a/rdagent/scenarios/qlib/task_generator/data.py b/rdagent/scenarios/qlib/task_generator/data.py index abc63a853..148353c6c 100644 --- a/rdagent/scenarios/qlib/task_generator/data.py +++ b/rdagent/scenarios/qlib/task_generator/data.py @@ -1,3 +1,5 @@ +import pandas as pd +import pickle from rdagent.core.task_generator import TaskGenerator from rdagent.scenarios.qlib.experiment.factor_experiment import QlibFactorExperiment @@ -15,4 +17,85 @@ class QlibFactorRunner(TaskGenerator[QlibFactorExperiment]): """ def generate(self, exp: QlibFactorExperiment) -> QlibFactorExperiment: + """ + Generate the experiment by processing and combining factor data, + then passing the combined data to Docker for backtest results. + """ + # Check if there are based experiments to get the SOTA factor data + if exp.based_experiments: + # Process the SOTA factor data + SOTA_factor = self.process_factor_data(exp.based_experiments[-1]) + else: + SOTA_factor = None + + # Process the new factors data + new_factors = self.process_factor_data(exp) + + # Combine the SOTA factor and new factors if SOTA factor exists + if SOTA_factor is not None: + combined_factors = self.combine_factors(SOTA_factor, new_factors) + else: + combined_factors = new_factors + + print("Success in processing and combining factor data.") + + # TODO: Call Docker, pass the combined factors to Docker, and generate backtest results + # result = self.test_docker() + # print(result) + DATA_PATH = "/home/finco/test_result2.pkl" + with open(DATA_PATH, 'rb') as f: + exp_res = pickle.load(f) + exp.result = exp_res + return exp # TODO IMPLEMENT THIS + + + def process_factor_data(self, exp: QlibFactorExperiment) -> pd.DataFrame: + """ + Process and combine factor data from experiment implementations. + + Args: + exp (ASpecificExp): The experiment containing factor data. + + Returns: + pd.DataFrame: Combined factor data without NaN values. + """ + factor_dfs = [] + + # Iterate over sub-implementations and execute them to get each factor data + for implementation in exp.sub_implementations: + message, df = implementation.execute() + + # Check if factor generation was successful + if 'Execution succeeded without error.\nExpected output file found.' in message: + factor_dfs.append(df) + + # Combine all successful factor data + if factor_dfs: + combined_factors = pd.concat(factor_dfs, axis=1) + + # Remove rows with NaN values + combined_factors = combined_factors.dropna() + + # print(combined_factors) + return combined_factors + else: + print("No valid factor data found to merge.") + return pd.DataFrame() # Return an empty DataFrame if no valid data + + + def combine_factors(self, SOTA_factor: pd.DataFrame, new_factors: pd.DataFrame) -> pd.DataFrame: + """ + Combine the SOTA factor data with the new factor data. + + Args: + SOTA_factor (pd.DataFrame): The DataFrame containing the SOTA factor data. + new_factors (pd.DataFrame): The DataFrame containing the new factors data. + + Returns: + pd.DataFrame: Combined factor data. + """ + # Ensure both DataFrames have the same index for a correct merge + combined_factors = pd.concat([SOTA_factor, new_factors], axis=1).dropna() + print("Combined Factors:\n", combined_factors.head()) + return combined_factors \ No newline at end of file diff --git a/rdagent/scenarios/qlib/task_generator/feedback.py b/rdagent/scenarios/qlib/task_generator/feedback.py index 9fed7e894..f09db146f 100644 --- a/rdagent/scenarios/qlib/task_generator/feedback.py +++ b/rdagent/scenarios/qlib/task_generator/feedback.py @@ -1,7 +1,144 @@ # TODO: # Implement to feedback. +from pathlib import Path +from rdagent.core.prompts import Prompts from rdagent.core.proposal import HypothesisExperiment2Feedback +from rdagent.core.proposal import Trace +from rdagent.core.experiment import Experiment +from rdagent.core.proposal import Hypothesis, HypothesisFeedback +from rdagent.oai.llm_utils import APIBackend +import json +import pandas as pd +import pickle +from rdagent.utils.env import QTDockerEnv +feedback_prompts = Prompts(file_path=Path(__file__).parent.parent / "prompts.yaml") -class QlibFactorHypothesisExperiment2Feedback(HypothesisExperiment2Feedback): ... +DIRNAME = Path(__file__).absolute().resolve().parent +MLRUNS_DIR = Path("/home/finco/RDAgent_MS/RD-Agent/rdagent/scenarios/qlib/task_generator/env_factor/mlruns/1") + + +class QlibFactorHypothesisExperiment2Feedback(HypothesisExperiment2Feedback): + def generateFeedback(self, exp, hypothesis, trace): + """ + Generate feedback for the given experiment and hypothesis. + + Args: + exp (QlibFactorExperiment): The experiment to generate feedback for. + hypothesis (QlibFactorHypothesis): The hypothesis to generate feedback for. + trace (Trace): The trace of the experiment. + + Returns: + Any: The feedback generated for the given experiment and hypothesis. + """ + print("Generating feedback...") + hypothesis_text = hypothesis.hypothesis + current_result = exp.result + tasks_factors = [(task.factor_name, task.factor_description) for task in exp.sub_tasks] + + # Check if based_experiments is empty + if not exp.based_experiments: + # Run Docker to get alpha158 result as SOTA + sota_result = self.FetchAlpha158ResultFromDocker() + else: + sota_result = exp.based_experiments[-1].result + + # Generate the system prompt + sys_prompt = feedback_prompts["data_feedback_generation"]["system"] + + # Prepare task details + task_details = "\n".join([f"Task: {factor_name}, Factor: {factor_description}" for factor_name, factor_description in tasks_factors]) + + # Generate the user prompt + usr_prompt_template = feedback_prompts["data_feedback_generation"]["user"] + usr_prompt = usr_prompt_template.format( + hypothesis_text=hypothesis_text, + task_details=task_details, + current_result=current_result, + sota_result=sota_result + ) + + try: + # Call the APIBackend to generate the response for hypothesis feedback + response = APIBackend().build_messages_and_create_chat_completion( + user_prompt=usr_prompt, + system_prompt=sys_prompt, + json_mode=True, + ) + + # Parse the JSON response to extract the feedback + response_json = json.loads(response) + + # Extract fields from JSON response + observations = response_json.get("Observations", "No observations provided") + hypothesis_evaluation = response_json.get("Feedback for Hypothesis", "No feedback provided") + new_hypothesis = response_json.get("New Hypothesis", "No new hypothesis provided") + reason = response_json.get("Reasoning", "No reasoning provided") + decision = response_json.get("Replace Best Result", "no").lower() == "yes" + + # Create HypothesisFeedback object + hypothesis_feedback = HypothesisFeedback( + observations=observations, + hypothesis_evaluation=hypothesis_evaluation, + new_hypothesis=new_hypothesis, + reason=reason, + decision=decision + ) + + print("Generated Hypothesis Feedback:") + print(f"Observations: {observations}") + print(f"Feedback for Hypothesis: {hypothesis_evaluation}") + print(f"New Hypothesis: {new_hypothesis}") + print(f"Reason: {reason}") + print(f"Replace Best Result: {'Yes' if decision else 'No'}") + + return hypothesis_feedback + + except json.JSONDecodeError as e: + print("Error parsing JSON response from LLM for hypothesis feedback:", e) + except Exception as e: + print("An unexpected error occurred while generating hypothesis feedback:", e) + return HypothesisFeedback(observations="", hypothesis_evaluation="", new_hypothesis="", reason="", decision=False) + + + def FetchAlpha158ResultFromDocker(self): + """ + Run Docker to get alpha158 result. + + Returns: + Any: The alpha158 result. + """ + # TODO: Implement the Docker call to get alpha158 result + qtde = QTDockerEnv() + qtde.prepare() # Preparing the environment + qtde.prepare() + + # Run the Docker command + result = qtde.run(local_path=str(DIRNAME / "env_factor"), entry="rm -r mlruns", env={"PYTHONPATH": "./"}) + # Run the Qlib backtest + result = qtde.run(local_path=str(DIRNAME / "env_factor"), entry="qrun conf.yaml", env={"PYTHONPATH": "./"}) + + # Check for new directories in MLRUNS_DIR + new_dir = {d.name for d in MLRUNS_DIR.iterdir() if d.is_dir()} + new_dir_name = new_dir.pop() + pkl_path = MLRUNS_DIR / new_dir_name / 'artifacts/portfolio_analysis/port_analysis_1day.pkl' + + if not pkl_path.exists(): + print(f"File {pkl_path} does not exist.") + return None + + with open(pkl_path, 'rb') as f: + result = pickle.load(f) + + # Check if the result is valid and is a DataFrame + if isinstance(result, pd.DataFrame): + if not result.empty: + print("Successfully retrieved alpha158 result.") + return result + else: + print("Result DataFrame is empty.") + return None + else: + print("Data format error.") + return None \ No newline at end of file diff --git a/test/utils/README.md b/test/utils/README.md new file mode 100644 index 000000000..23a36a146 --- /dev/null +++ b/test/utils/README.md @@ -0,0 +1,116 @@ +# 🐳 Run Docker & Qlib +--- + +## 📄 Description +This guide explains how to run the Qlib Docker test file located at `test/utils/test_env.py` in the RD-Agent repository. + +--- + +## 🚀 Running Instructions + +### 1. Install the required Python libraries +- Ensure that the `docker` Python library is installed: + ```sh + pip install docker + ``` + +### 2. Run the test script +- Execute the test script to verify the Docker environment setup: + ```sh + python test/utils/test_env.py + ``` + +### Troubleshooting +- **PermissionError: [Errno 13] Permission denied.** + > This error occurs when the current user does not have the necessary permissions to access the Docker socket. To resolve this issue, follow these steps: + +1. **Add the current user to the `docker` group** +Docker requires root or `docker` group user permissions to access the Docker socket. Add the current user to the `docker` group: + ```sh + sudo usermod -aG docker $USER + ``` + +2. **Refresh group changes** +To apply the group changes, log out and log back in, or use the following command: + ```sh + newgrp docker + ``` + +3. **Verify Docker access** +Run the following command to ensure that Docker can be accessed: + ```sh + docker run hello-world + ``` + +4. **Rerun the test script** + After completing these steps, rerun the test script: + ```sh + python test/utils/test_env.py + ``` +--- +## 🛠️ Detailed Qlib Docker Function Framework + +Here, we provide an overview of the specific functions within the Qlib Docker framework, their purposes, and examples of how to call them. + +### QTDockerEnv Class in `env.py` + +The `QTDockerEnv` class is responsible for setting up and running Docker environments for Qlib experiments. + +#### Methods: + +1. **prepare()** + - **Purpose**: Prepares the Docker environment for running experiments. This includes building the Docker image if necessary. + - **Example**: + ```python + qtde = QTDockerEnv() + qtde.prepare() + ``` + +2. **run(local_path: str, entry: str) -> str** + - **Purpose**: Runs a specified entry point (e.g., a configuration file) in the prepared Docker environment. + - **Parameters**: + - `local_path`: Path to the local directory to mount into the Docker container. + - `entry`: Command or entry point to run inside the Docker container. + - **Returns**: The stdout output from the Docker container. + - **Example**: + ```python + result = qtde.run(local_path="/path/to/env_tpl", entry="qrun conf.yaml") + ``` +--- +### 📊 Expected Output + +Upon successful execution, the test script will produce analysis results of benchmark returns and various risk metrics. The expected output should be similar to: + +``` +'The following are analysis results of benchmark return (1 day).' +risk +mean 0.000477 +std 0.012295 +annualized_return 0.113561 +information_ratio 0.598699 +max_drawdown -0.370479 + +'The following are analysis results of the excess return without cost (1 day).' +risk +mean 0.000530 +std 0.005718 +annualized_return 0.126029 +information_ratio 1.428574 +max_drawdown -0.072310 + +'The following are analysis results of the excess return with cost (1 day).' +risk +mean 0.000339 +std 0.005717 +annualized_return 0.080654 +information_ratio 0.914486 +max_drawdown -0.086083 + +'The following are analysis results of indicators (1 day).' +value +ffr 1.0 +pa 0.0 +pos 0.0 +``` + +By following these steps and using the provided functions, you should be able to run the Qlib Docker tests and obtain the expected analysis results. \ No newline at end of file