Skip to content

Commit

Permalink
Refactor QlibFactorExperiment to process and combine factor data, dyn…
Browse files Browse the repository at this point in the history
…amically handle SOTA factors, and integrate Docker&Qlib-based backtest results.
  • Loading branch information
WinstonLiyt committed Jul 10, 2024
1 parent 44e5610 commit b8d119c
Show file tree
Hide file tree
Showing 7 changed files with 393 additions and 10 deletions.
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -153,3 +153,6 @@ git_ignore_folder/

# DB files
*.db

# Docker
env_factor/
2 changes: 1 addition & 1 deletion rdagent/app/qlib_rd_loop/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ class PropSetting(BaseSettings):
qlib_model_runner: str = ""
qlib_model_summarizer: str = ""

evolving_n: int = 10
evolving_n: int = 1 # changed from 10 to 1 for debug


PROP_SETTING = PropSetting()
33 changes: 25 additions & 8 deletions rdagent/app/qlib_rd_loop/factor.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,11 +29,28 @@


trace = Trace(scen=scen)
for _ in range(PROP_SETTING.evolving_n):
hypothesis = hypothesis_gen.gen(trace)
exp = hypothesis2experiment.convert(hypothesis, trace)
exp = qlib_factor_coder.generate(exp)
exp = qlib_factor_runner.generate(exp)
feedback = qlib_factor_summarizer.generateFeedback(exp, hypothesis, trace)

trace.hist.append((hypothesis, exp, feedback))
hypothesis = hypothesis_gen.gen(trace)
# exp = hypothesis2experiment.convert(hypothesis, trace)
# exp = qlib_factor_coder.generate(exp)


import pickle
file_path = '/home/finco/v-yuanteli/RD-Agent/git_ignore_folder/factor_data_output/exp_new.pkl'
with open(file_path, 'rb') as file:
exp = pickle.load(file)
# with open('/home/finco/v-yuanteli/RD-Agent/git_ignore_folder/factor_data_output/exp_new.pkl', 'wb') as file:
# pickle.dump(exp, file)

exp = qlib_factor_runner.generate(exp)
feedback = qlib_factor_summarizer.generateFeedback(exp, hypothesis, trace)

# print(exp.based_experiments[-1])

# for _ in range(PROP_SETTING.evolving_n):
# hypothesis = hypothesis_gen.gen(trace)
# exp = hypothesis2experiment.convert(hypothesis, trace)
# exp = qlib_factor_coder.generate(exp)
# exp = qlib_factor_runner.generate(exp)
# feedback = qlib_factor_summarizer.generateFeedback(exp, hypothesis, trace)

# trace.hist.append((hypothesis, exp, feedback))
27 changes: 27 additions & 0 deletions rdagent/scenarios/qlib/prompts.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -32,3 +32,30 @@ experiment_output_format: |-
}
# Don't add ellipsis (...) or any filler text that might cause JSON parsing errors here!
}
data_feedback_generation:
system: |-
You are a professional result analysis assistant. You will receive a hypothesis, multiple tasks with their factors, and some results.
Your feedback should specify whether the current result supports or refutes the hypothesis, compare it with previous results, and suggest improvements or new directions.
Please provide detailed and constructive feedback.
Example JSON Structure for Result Analysis:
{
"Observations": "Your overall observations here",
"Feedback for Hypothesis": "Observations related to the hypothesis",
"New Hypothesis": "Put your new hypothesis here.",
"Reasoning": "Provide reasoning for the hypothesis here.",
"Replace Best Result": "yes or no"
}
user: |-
We are conducting an experiment to validate or reject hypotheses, aiming to generate a powerful factor.
Given the following hypothesis, tasks, factors, and current result, provide feedback on how well the result supports or refutes the hypothesis.
Hypothesis: {hypothesis_text}\n
Tasks and Factors:\n{task_details}\n
Current Result: {current_result}\n
SOTA Result: {sota_result}\n
Analyze the current result in the context of its ability to:
1. Support or refute the hypothesis.
2. Show improvement or deterioration compared to the last experiment.
3. Demonstrate positive or negative effects when compared to Alpha158.
Provide detailed feedback and recommend whether to replace the best result if the new factor proves superior.
83 changes: 83 additions & 0 deletions rdagent/scenarios/qlib/task_generator/data.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
import pandas as pd
import pickle
from rdagent.core.task_generator import TaskGenerator
from rdagent.scenarios.qlib.experiment.factor_experiment import QlibFactorExperiment

Expand All @@ -15,4 +17,85 @@ class QlibFactorRunner(TaskGenerator[QlibFactorExperiment]):
"""

def generate(self, exp: QlibFactorExperiment) -> QlibFactorExperiment:
"""
Generate the experiment by processing and combining factor data,
then passing the combined data to Docker for backtest results.
"""
# Check if there are based experiments to get the SOTA factor data
if exp.based_experiments:
# Process the SOTA factor data
SOTA_factor = self.process_factor_data(exp.based_experiments[-1])
else:
SOTA_factor = None

# Process the new factors data
new_factors = self.process_factor_data(exp)

# Combine the SOTA factor and new factors if SOTA factor exists
if SOTA_factor is not None:
combined_factors = self.combine_factors(SOTA_factor, new_factors)
else:
combined_factors = new_factors

print("Success in processing and combining factor data.")

# TODO: Call Docker, pass the combined factors to Docker, and generate backtest results
# result = self.test_docker()
# print(result)
DATA_PATH = "/home/finco/test_result2.pkl"
with open(DATA_PATH, 'rb') as f:
exp_res = pickle.load(f)
exp.result = exp_res

return exp # TODO IMPLEMENT THIS


def process_factor_data(self, exp: QlibFactorExperiment) -> pd.DataFrame:
"""
Process and combine factor data from experiment implementations.
Args:
exp (ASpecificExp): The experiment containing factor data.
Returns:
pd.DataFrame: Combined factor data without NaN values.
"""
factor_dfs = []

# Iterate over sub-implementations and execute them to get each factor data
for implementation in exp.sub_implementations:
message, df = implementation.execute()

# Check if factor generation was successful
if 'Execution succeeded without error.\nExpected output file found.' in message:
factor_dfs.append(df)

# Combine all successful factor data
if factor_dfs:
combined_factors = pd.concat(factor_dfs, axis=1)

# Remove rows with NaN values
combined_factors = combined_factors.dropna()

# print(combined_factors)
return combined_factors
else:
print("No valid factor data found to merge.")
return pd.DataFrame() # Return an empty DataFrame if no valid data


def combine_factors(self, SOTA_factor: pd.DataFrame, new_factors: pd.DataFrame) -> pd.DataFrame:
"""
Combine the SOTA factor data with the new factor data.
Args:
SOTA_factor (pd.DataFrame): The DataFrame containing the SOTA factor data.
new_factors (pd.DataFrame): The DataFrame containing the new factors data.
Returns:
pd.DataFrame: Combined factor data.
"""
# Ensure both DataFrames have the same index for a correct merge
combined_factors = pd.concat([SOTA_factor, new_factors], axis=1).dropna()
print("Combined Factors:\n", combined_factors.head())
return combined_factors
139 changes: 138 additions & 1 deletion rdagent/scenarios/qlib/task_generator/feedback.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,144 @@
# TODO:
# Implement to feedback.

from pathlib import Path
from rdagent.core.prompts import Prompts
from rdagent.core.proposal import HypothesisExperiment2Feedback
from rdagent.core.proposal import Trace
from rdagent.core.experiment import Experiment
from rdagent.core.proposal import Hypothesis, HypothesisFeedback
from rdagent.oai.llm_utils import APIBackend
import json
import pandas as pd
import pickle
from rdagent.utils.env import QTDockerEnv

feedback_prompts = Prompts(file_path=Path(__file__).parent.parent / "prompts.yaml")

class QlibFactorHypothesisExperiment2Feedback(HypothesisExperiment2Feedback): ...
DIRNAME = Path(__file__).absolute().resolve().parent
MLRUNS_DIR = Path("/home/finco/RDAgent_MS/RD-Agent/rdagent/scenarios/qlib/task_generator/env_factor/mlruns/1")


class QlibFactorHypothesisExperiment2Feedback(HypothesisExperiment2Feedback):
def generateFeedback(self, exp, hypothesis, trace):
"""
Generate feedback for the given experiment and hypothesis.
Args:
exp (QlibFactorExperiment): The experiment to generate feedback for.
hypothesis (QlibFactorHypothesis): The hypothesis to generate feedback for.
trace (Trace): The trace of the experiment.
Returns:
Any: The feedback generated for the given experiment and hypothesis.
"""
print("Generating feedback...")
hypothesis_text = hypothesis.hypothesis
current_result = exp.result
tasks_factors = [(task.factor_name, task.factor_description) for task in exp.sub_tasks]

# Check if based_experiments is empty
if not exp.based_experiments:
# Run Docker to get alpha158 result as SOTA
sota_result = self.FetchAlpha158ResultFromDocker()
else:
sota_result = exp.based_experiments[-1].result

# Generate the system prompt
sys_prompt = feedback_prompts["data_feedback_generation"]["system"]

# Prepare task details
task_details = "\n".join([f"Task: {factor_name}, Factor: {factor_description}" for factor_name, factor_description in tasks_factors])

# Generate the user prompt
usr_prompt_template = feedback_prompts["data_feedback_generation"]["user"]
usr_prompt = usr_prompt_template.format(
hypothesis_text=hypothesis_text,
task_details=task_details,
current_result=current_result,
sota_result=sota_result
)

try:
# Call the APIBackend to generate the response for hypothesis feedback
response = APIBackend().build_messages_and_create_chat_completion(
user_prompt=usr_prompt,
system_prompt=sys_prompt,
json_mode=True,
)

# Parse the JSON response to extract the feedback
response_json = json.loads(response)

# Extract fields from JSON response
observations = response_json.get("Observations", "No observations provided")
hypothesis_evaluation = response_json.get("Feedback for Hypothesis", "No feedback provided")
new_hypothesis = response_json.get("New Hypothesis", "No new hypothesis provided")
reason = response_json.get("Reasoning", "No reasoning provided")
decision = response_json.get("Replace Best Result", "no").lower() == "yes"

# Create HypothesisFeedback object
hypothesis_feedback = HypothesisFeedback(
observations=observations,
hypothesis_evaluation=hypothesis_evaluation,
new_hypothesis=new_hypothesis,
reason=reason,
decision=decision
)

print("Generated Hypothesis Feedback:")
print(f"Observations: {observations}")
print(f"Feedback for Hypothesis: {hypothesis_evaluation}")
print(f"New Hypothesis: {new_hypothesis}")
print(f"Reason: {reason}")
print(f"Replace Best Result: {'Yes' if decision else 'No'}")

return hypothesis_feedback

except json.JSONDecodeError as e:
print("Error parsing JSON response from LLM for hypothesis feedback:", e)
except Exception as e:
print("An unexpected error occurred while generating hypothesis feedback:", e)
return HypothesisFeedback(observations="", hypothesis_evaluation="", new_hypothesis="", reason="", decision=False)


def FetchAlpha158ResultFromDocker(self):
"""
Run Docker to get alpha158 result.
Returns:
Any: The alpha158 result.
"""
# TODO: Implement the Docker call to get alpha158 result
qtde = QTDockerEnv()
qtde.prepare() # Preparing the environment
qtde.prepare()

# Run the Docker command
result = qtde.run(local_path=str(DIRNAME / "env_factor"), entry="rm -r mlruns", env={"PYTHONPATH": "./"})
# Run the Qlib backtest
result = qtde.run(local_path=str(DIRNAME / "env_factor"), entry="qrun conf.yaml", env={"PYTHONPATH": "./"})

# Check for new directories in MLRUNS_DIR
new_dir = {d.name for d in MLRUNS_DIR.iterdir() if d.is_dir()}
new_dir_name = new_dir.pop()
pkl_path = MLRUNS_DIR / new_dir_name / 'artifacts/portfolio_analysis/port_analysis_1day.pkl'

if not pkl_path.exists():
print(f"File {pkl_path} does not exist.")
return None

with open(pkl_path, 'rb') as f:
result = pickle.load(f)

# Check if the result is valid and is a DataFrame
if isinstance(result, pd.DataFrame):
if not result.empty:
print("Successfully retrieved alpha158 result.")
return result
else:
print("Result DataFrame is empty.")
return None
else:
print("Data format error.")
return None
Loading

0 comments on commit b8d119c

Please sign in to comment.