Skip to content

Commit

Permalink
First version of factor idea proposal (#46)
Browse files Browse the repository at this point in the history
* update all code

* save code

* update first version of factor proposal

* change a comment

* remove a useless comment

---------

Co-authored-by: xuyang1 <[email protected]>
  • Loading branch information
peteryang1 and peteryangms authored Jul 4, 2024
1 parent fc91f36 commit 637ad10
Show file tree
Hide file tree
Showing 17 changed files with 354 additions and 98 deletions.
12 changes: 0 additions & 12 deletions rdagent/app/model_proposal/conf.py

This file was deleted.

17 changes: 17 additions & 0 deletions rdagent/app/qlib_rd_loop/conf.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
from pydantic_settings import BaseSettings


class PropSetting(BaseSettings):
""""""

scen: str = "rdagent.scenarios.qlib.experiment.factor_experiment.QlibFactorScenario"
hypothesis_gen: str = "rdagent.scenarios.qlib.factor_proposal.QlibFactorHypothesisGen"
hypothesis2experiment: str = "rdagent.scenarios.qlib.factor_proposal.QlibFactorHypothesis2Experiment"
qlib_factor_coder: str = "rdagent.components.task_implementation.factor_implementation.CoSTEER.CoSTEERFG"
qlib_factor_runner: str = "rdagent.scenarios.qlib.task_generator.data.QlibFactorRunner"
qlib_factor_summarizer: str = "rdagent.scenarios.qlib.task_generator.feedback.QlibFactorExperiment2Feedback"

evolving_n: int = 10


PROP_SETTING = PropSetting()
38 changes: 38 additions & 0 deletions rdagent/app/qlib_rd_loop/factor.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
"""
TODO: Factor Structure RD-Loop
"""

# import_from
from rdagent.app.qlib_rd_loop.conf import PROP_SETTING
from rdagent.core.proposal import (
Experiment2Feedback,
Hypothesis2Experiment,
HypothesisGen,
HypothesisSet,
Trace,
)
from rdagent.core.task_generator import TaskGenerator
from rdagent.core.utils import import_class

scen = import_class(PROP_SETTING.scen)()

hypothesis_gen: HypothesisGen = import_class(PROP_SETTING.hypothesis_gen)(scen)

hypothesis2experiment: Hypothesis2Experiment = import_class(PROP_SETTING.hypothesis2experiment)()

qlib_factor_coder: TaskGenerator = import_class(PROP_SETTING.qlib_factor_coder)()
qlib_factor_runner: TaskGenerator = import_class(PROP_SETTING.qlib_factor_runner)()

qlib_factor_summarizer: Experiment2Feedback = import_class(PROP_SETTING.qlib_factor_summarizer)()


trace = Trace(scen=scen)
hs = HypothesisSet(trace=trace)
for _ in range(PROP_SETTING.evolving_n):
hypothesis = hypothesis_gen.gen(trace)
exp = hypothesis2experiment.convert(hs)
exp = qlib_factor_coder.generate(exp)
exp = qlib_factor_runner.generate(exp)
feedback = qlib_factor_summarizer.summarize(exp)

trace.hist.append((hypothesis, exp, feedback))
File renamed without changes.
68 changes: 52 additions & 16 deletions rdagent/components/idea_proposal/factor_proposal.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,18 @@
from abc import abstractmethod
from pathlib import Path
from typing import Tuple

from jinja2 import Environment, StrictUndefined

from rdagent.components.task_implementation.factor_implementation.factor import (
FactorExperiment,
)
from rdagent.core.prompts import Prompts
from rdagent.core.proposal import (
Hypothesis,
Hypothesis2Task,
Hypothesis2Experiment,
HypothesisGen,
HypothesisSet,
Scenario,
Trace,
)
Expand All @@ -22,40 +27,71 @@
class FactorHypothesisGen(HypothesisGen):
def __init__(self, scen: Scenario):
super().__init__(scen)
self.gen_context_flag = False
self.gen_context_dict = None
self.gen_json_flag = False

# The following methods are scenario related so they should be implemented in the subclass
@abstractmethod
def prepare_gen_context(self, trace: Trace) -> None: ...
def prepare_context(self, trace: Trace) -> Tuple[dict, bool]: ...

@abstractmethod
def gen_response_to_hypothesis_list(self, response: str) -> FactorHypothesis: ...
def convert_response(self, response: str) -> FactorHypothesis: ...

def gen(self, trace: Trace) -> FactorHypothesis:
assert self.gen_context_flag, "Please call prepare_gen_context before calling gen."
self.gen_context_flag = False # reset the flag
context_dict, json_flag = self.prepare_context(trace)

system_prompt = (
Environment(undefined=StrictUndefined)
.from_string(prompt_dict["factor_hypothesis_gen"]["system_prompt"])
.render(scenario=self.scen.get_scenario_all_desc())
.render(
scenario=self.scen.get_scenario_all_desc(),
hypothesis_output_format=context_dict["hypothesis_output_format"],
)
)
user_prompt = (
Environment(undefined=StrictUndefined)
.from_string(prompt_dict["factor_hypothesis_gen"]["user_prompt"])
.render(self.gen_context_dict)
.render(
hypothesis_and_feedback=context_dict["hypothesis_and_feedback"],
RAG=context_dict["RAG"],
)
)

resp = APIBackend().build_messages_and_create_chat_completion(
user_prompt, system_prompt, json_mode=self.gen_json_flag
)
resp = APIBackend().build_messages_and_create_chat_completion(user_prompt, system_prompt, json_mode=json_flag)

hypothesis = self.gen_response_to_hypothesis_list(resp)
hypothesis = self.convert_response(resp)

return hypothesis


class FactorHypothesis2Task(Hypothesis2Task):
def convert(self, bs: FactorHypothesis) -> None: ...
class FactorHypothesis2Experiment(Hypothesis2Experiment[FactorExperiment]):
def __init__(self) -> None:
super().__init__()

@abstractmethod
def prepare_context(self, hs: HypothesisSet) -> Tuple[dict, bool]: ...

@abstractmethod
def convert_response(self, response: str) -> FactorExperiment: ...

def convert(self, hs: HypothesisSet) -> FactorExperiment:
context, json_flag = self.prepare_context(hs)
system_prompt = (
Environment(undefined=StrictUndefined)
.from_string(prompt_dict["factor_hypothesis2experiment"]["system_prompt"])
.render(
scenario=hs.trace.scen.get_scenario_all_desc(),
experiment_output_format=context["experiment_output_format"],
)
)
user_prompt = (
Environment(undefined=StrictUndefined)
.from_string(prompt_dict["factor_hypothesis2experiment"]["user_prompt"])
.render(
hypothesis_and_feedback=context["hypothesis_and_feedback"],
factor_list=context["factor_list"],
RAG=context["RAG"],
)
)

resp = APIBackend().build_messages_and_create_chat_completion(user_prompt, system_prompt, json_mode=json_flag)

return self.convert_response(resp)
28 changes: 22 additions & 6 deletions rdagent/components/idea_proposal/prompts.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,20 +3,36 @@ factor_hypothesis_gen:
The user is trying to generate new hypothesis on the factors in data-driven research and development.
The factors are used in a certain scenario, the scenario is as follows:
{{ scenario }}
The user has made several hypothesis on this sencario and did several evaluation on them. The user will provide this information to you.
The user has made several hypothesis on this scenario and did several evaluation on them. The user will provide this information to you.
To help you generate new hypothesis, the user has prepared some additional information for you. You should use this information to help generate new factors.
Please generate the output following the format below:
{{ hypothesis_output_format }}
user_prompt: |-
The user has made several hypothesis on this sencario and did several evaluation on them.
The user has made several hypothesis on this scenario and did several evaluation on them.
The former hypothesis and the corresponding feedbacks are as follows:
{{ hypothesis_and_feedback }}
To help you generate new factors, we have prepared the following information for you:
{{ RAG }}
Please generate the new hypothesis based on the information above and generate the output following the format below:
{{ factor_output_format }}
Please generate the new hypothesis based on the information above.
factor_hypothesis_to_tasks:
factor_hypothesis2experiment:
system_prompt: |-
The user is trying to generate new factors based on the hypothesis generated in the previous step.
The factors are used in certain scenario, the scenario is as follows:
{{ scenario }}
The user will use the factors generated to do some experiments. The user will provide this information to you:
1. The hypothesis generated in the previous steps and their corresponding feedbacks.
2. Former proposed factors on similar hypothesis.
3. Some additional information to help you generate new factors.
Please generate the output following the format below:
{{ experiment_output_format }}
user_prompt: |-
The user has made several hypothesis on this scenario and did several evaluation on them.
The former hypothesis and the corresponding feedbacks are as follows:
{{ hypothesis_and_feedback }}
The former proposed factors on similar hypothesis are as follows:
{{ factor_list }}
To help you generate new factors, we have prepared the following information for you:
{{ RAG }}
Please generate the new factors based on the information above.
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
from rdagent.core.log import RDAgentLog


class FactorEvolvingItem(FactorExperiment[FactorTask, FileBasedFactorImplementation], EvolvableSubjects):
class FactorEvolvingItem(FactorExperiment, EvolvableSubjects):
"""
Intermediate item of factor implementation.
"""
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -220,4 +220,4 @@ def from_folder(task: FactorTask, path: Union[str, Path], **kwargs):
return FileBasedFactorImplementation(task, code=code, **kwargs)


FactorExperiment = Experiment
class FactorExperiment(Experiment[FactorTask, FileBasedFactorImplementation]): ...
2 changes: 1 addition & 1 deletion rdagent/core/experiment.py
Original file line number Diff line number Diff line change
Expand Up @@ -119,7 +119,7 @@ class Experiment(ABC, Generic[ASpecificTask, ASpecificImp]):
The experiment is a sequence of tasks and the implementations of the tasks after generated by the TaskGenerator.
"""

def __init__(self, sub_tasks: Sequence[Task]) -> None:
def __init__(self, sub_tasks: Sequence[ASpecificTask]) -> None:
self.sub_tasks = sub_tasks
self.sub_implementations: Sequence[ASpecificImp] = [None for _ in self.sub_tasks]

Expand Down
59 changes: 42 additions & 17 deletions rdagent/core/proposal.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,8 @@
"""

from typing import Dict, List, Tuple
from abc import ABC, abstractmethod
from typing import Dict, Generic, List, Tuple, TypeVar

from rdagent.core.evolving_framework import Feedback
from rdagent.core.experiment import Experiment, Implementation, Loader, Task
Expand All @@ -18,35 +19,53 @@ class Hypothesis:
- Belief
"""

hypothesis: str = None
reason: str = None
def __init__(self, hypothesis: str, reason: str) -> None:
self.hypothesis: str = hypothesis
self.reason: str = reason

# source: data_ana | model_nan = None


# Origin(path of repo/data/feedback) => view/summarization => generated Hypothesis


class Scenario:
def get_repo_path(self):
"""codebase"""
class Scenario(ABC):

def get_data(self):
""" "data info"""
@property
@abstractmethod
def background(self):
"""Background information"""

def get_env(self):
"""env description"""
@property
@abstractmethod
def source_data(self):
"""Source data description"""

@property
@abstractmethod
def interface(self):
"""Interface description about how to run the code"""

@property
@abstractmethod
def simulator(self):
"""Simulator description"""

@abstractmethod
def get_scenario_all_desc(self) -> str:
"""Combine all the description together"""


class HypothesisFeedback(Feedback): ...


class Trace:
scen: Scenario
hist: list[Tuple[Hypothesis, Experiment, HypothesisFeedback]]
ASpecificScen = TypeVar("ASpecificScen", bound=Scenario)


class Trace(Generic[ASpecificScen]):
def __init__(self, scen: ASpecificScen) -> None:
self.scen: ASpecificScen = scen
self.hist: list[Tuple[Hypothesis, Experiment, HypothesisFeedback]] = []


class HypothesisGen:
Expand Down Expand Up @@ -74,16 +93,21 @@ class HypothesisSet:
true_hypothesis or false_hypothesis
"""

hypothesis_list: list[Hypothesis]
trace: Trace
def __init__(self, trace: Trace, hypothesis_list: list[Hypothesis] = []) -> None:
self.hypothesis_list: list[Hypothesis] = hypothesis_list
self.trace: Trace = trace


ASpecificExp = TypeVar("ASpecificExp", bound=Experiment)


class Hypothesis2Experiment(Loader[Experiment]):
class Hypothesis2Experiment(ABC, Generic[ASpecificExp]):
"""
[Abstract description => concrete description] => Code implement
"""

def convert(self, bs: HypothesisSet) -> Experiment:
@abstractmethod
def convert(self, hs: HypothesisSet) -> ASpecificExp:
"""Connect the idea proposal to implementation"""
...

Expand All @@ -99,3 +123,4 @@ def summarize(self, ti: Experiment) -> HypothesisFeedback:
The `ti` should be executed and the results should be included.
For example: `mlflow` of Qlib will be included.
"""
return HypothesisFeedback()
38 changes: 38 additions & 0 deletions rdagent/scenarios/qlib/experiment/factor_experiment.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,43 @@
from pathlib import Path

from rdagent.components.task_implementation.factor_implementation.factor import (
FactorExperiment,
)
from rdagent.components.task_implementation.factor_implementation.utils import (
get_data_folder_intro,
)
from rdagent.core.prompts import Prompts
from rdagent.core.proposal import Scenario

prompt_dict = Prompts(file_path=Path(__file__).parent / "prompts.yaml")

QlibFactorExperiment = FactorExperiment


class QlibFactorScenario(Scenario):
@property
def background(self) -> str:
return prompt_dict["qlib_factor_background"]

@property
def source_data(self) -> str:
return get_data_folder_intro()

@property
def interface(self) -> str:
return prompt_dict["qlib_factor_interface"]

@property
def simulator(self) -> str:
return prompt_dict["qlib_factor_simulator"]

def get_scenario_all_desc(self) -> str:
return f"""Background of the scenario:
{self.background}
The source data you can use:
{self.source_data}
The interface you should follow to write the runnable code:
{self.interface}
The simulator user can use to test your factor:
{self.simulator}
"""
Loading

0 comments on commit 637ad10

Please sign in to comment.