Skip to content

Commit

Permalink
fix some bugs.
Browse files Browse the repository at this point in the history
  • Loading branch information
WinstonLiyt committed Jul 29, 2024
1 parent 9c64f14 commit 787450c
Show file tree
Hide file tree
Showing 6 changed files with 80 additions and 78 deletions.
115 changes: 58 additions & 57 deletions rdagent/app/qlib_rd_loop/factor_from_report_sh.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
from dotenv import load_dotenv
from jinja2 import Environment, StrictUndefined

from rdagent.app.qlib_rd_loop.conf import PROP_SETTING
from rdagent.app.qlib_rd_loop.conf import FACTOR_PROP_SETTING
from rdagent.components.document_reader.document_reader import (
extract_first_page_screenshot_from_pdf,
load_and_process_pdfs_by_langchain,
Expand Down Expand Up @@ -37,33 +37,33 @@

assert load_dotenv()

scen: Scenario = import_class(PROP_SETTING.factor_scen)()
scen: Scenario = import_class(FACTOR_PROP_SETTING.scen)()

hypothesis_gen: HypothesisGen = import_class(PROP_SETTING.factor_hypothesis_gen)(scen)
hypothesis_gen: HypothesisGen = import_class(FACTOR_PROP_SETTING.hypothesis_gen)(scen)

hypothesis2experiment: Hypothesis2Experiment = import_class(PROP_SETTING.factor_hypothesis2experiment)()
hypothesis2experiment: Hypothesis2Experiment = import_class(FACTOR_PROP_SETTING.hypothesis2experiment)()

qlib_factor_coder: Developer = import_class(PROP_SETTING.factor_coder)(scen)
qlib_factor_coder: Developer = import_class(FACTOR_PROP_SETTING.coder)(scen)

qlib_factor_runner: Developer = import_class(PROP_SETTING.factor_runner)(scen)
qlib_factor_runner: Developer = import_class(FACTOR_PROP_SETTING.runner)(scen)

qlib_factor_summarizer: HypothesisExperiment2Feedback = import_class(PROP_SETTING.factor_summarizer)(scen)
qlib_factor_summarizer: HypothesisExperiment2Feedback = import_class(FACTOR_PROP_SETTING.summarizer)(scen)

with open(PROP_SETTING.report_result_json_file_path, "r") as f:
with open(FACTOR_PROP_SETTING.report_result_json_file_path, "r") as f:
judge_pdf_data = json.load(f)

prompts_path = Path(__file__).parent / "prompts.yaml"
prompts = Prompts(file_path=prompts_path)


def save_progress(trace, current_index):
with open(PROP_SETTING.progress_file_path, "wb") as f:
with open(FACTOR_PROP_SETTING.progress_file_path, "wb") as f:
pickle.dump((trace, current_index), f)


def load_progress():
if Path(PROP_SETTING.progress_file_path).exists():
with open(PROP_SETTING.progress_file_path, "rb") as f:
if Path(FACTOR_PROP_SETTING.progress_file_path).exists():
with open(FACTOR_PROP_SETTING.progress_file_path, "rb") as f:
return pickle.load(f)
return Trace(scen=scen), 0

Expand All @@ -87,8 +87,9 @@ def generate_hypothesis(factor_result: dict, report_content: str) -> str:
response_json = json.loads(response)
hypothesis_text = response_json.get("hypothesis", "No hypothesis generated.")
reason_text = response_json.get("reason", "No reason provided.")
concise_reason_text = response_json.get("concise_reason", "No concise reason provided.")

return Hypothesis(hypothesis=hypothesis_text, reason=reason_text)
return Hypothesis(hypothesis=hypothesis_text, reason=reason_text, concise_reason=concise_reason_text)


def extract_factors_and_implement(report_file_path: str) -> tuple:
Expand Down Expand Up @@ -124,48 +125,48 @@ def extract_factors_and_implement(report_file_path: str) -> tuple:

trace, start_index = load_progress()

try:
judge_pdf_data_items = list(judge_pdf_data.items())
for index in range(start_index, len(judge_pdf_data_items)):
if index > 1000:
break
file_path, attributes = judge_pdf_data_items[index]
if attributes["class"] == 1:
report_file_path = Path(file_path.replace(PROP_SETTING.origin_report_path, PROP_SETTING.local_report_path))
if report_file_path.exists():
logger.info(f"Processing {report_file_path}")

with logger.tag("r"):
exp, hypothesis = extract_factors_and_implement(str(report_file_path))
if exp is None:
continue
exp.based_experiments = [t[1] for t in trace.hist if t[2]]
if len(exp.based_experiments) == 0:
exp.based_experiments.append(QlibFactorExperiment(sub_tasks=[]))
logger.log_object(hypothesis, tag="hypothesis generation")
logger.log_object(exp.sub_tasks, tag="experiment generation")

with logger.tag("d"):
exp = qlib_factor_coder.develop(exp)
logger.log_object(exp.sub_workspace_list)

with logger.tag("ef"):
exp = qlib_factor_runner.develop(exp)
if exp is None:
logger.error(f"Factor extraction failed for {report_file_path}. Skipping to the next report.")
continue
logger.log_object(exp, tag="factor runner result")
feedback = qlib_factor_summarizer.generate_feedback(exp, hypothesis, trace)
logger.log_object(feedback, tag="feedback")

trace.hist.append((hypothesis, exp, feedback))
logger.info(f"Processed {report_file_path}: Result: {exp}")

# Save progress after processing each report
save_progress(trace, index + 1)
else:
logger.error(f"File not found: {report_file_path}")
except Exception as e:
logger.error(f"An error occurred: {e}")
save_progress(trace, index)
raise
# try:
judge_pdf_data_items = list(judge_pdf_data.items())
for index in range(start_index, len(judge_pdf_data_items)):
if index > 1000:
break
file_path, attributes = judge_pdf_data_items[index]
if attributes["class"] == 1:
report_file_path = Path(file_path.replace(FACTOR_PROP_SETTING.origin_report_path, FACTOR_PROP_SETTING.local_report_path))
if report_file_path.exists():
logger.info(f"Processing {report_file_path}")

with logger.tag("r"):
exp, hypothesis = extract_factors_and_implement(str(report_file_path))
if exp is None:
continue
exp.based_experiments = [t[1] for t in trace.hist if t[2]]
if len(exp.based_experiments) == 0:
exp.based_experiments.append(QlibFactorExperiment(sub_tasks=[]))
logger.log_object(hypothesis, tag="hypothesis generation")
logger.log_object(exp.sub_tasks, tag="experiment generation")

with logger.tag("d"):
exp = qlib_factor_coder.develop(exp)
logger.log_object(exp.sub_workspace_list)

with logger.tag("ef"):
exp = qlib_factor_runner.develop(exp)
if exp is None:
logger.error(f"Factor extraction failed for {report_file_path}. Skipping to the next report.")
continue
logger.log_object(exp, tag="factor runner result")
feedback = qlib_factor_summarizer.generate_feedback(exp, hypothesis, trace)
logger.log_object(feedback, tag="feedback")

trace.hist.append((hypothesis, exp, feedback))
logger.info(f"Processed {report_file_path}: Result: {exp}")

# Save progress after processing each report
save_progress(trace, index + 1)
else:
logger.error(f"File not found: {report_file_path}")
# except Exception as e:
# logger.error(f"An error occurred: {e}")
# save_progress(trace, index)
# raise
14 changes: 13 additions & 1 deletion rdagent/app/qlib_rd_loop/factor_w_sc.py
100644 → 100755
Original file line number Diff line number Diff line change
Expand Up @@ -2,15 +2,27 @@
Factor workflow with session control
"""

from typing import Any

import fire

from rdagent.app.qlib_rd_loop.conf import FACTOR_PROP_SETTING
from rdagent.components.workflow.rd_loop import RDLoop
from rdagent.core.exception import FactorEmptyError
from rdagent.log import rdagent_logger as logger


class FactorRDLoop(RDLoop):
skip_loop_error = (FactorEmptyError,)

def running(self, prev_out: dict[str, Any]):
with logger.tag("ef"): # evaluate and feedback
exp = self.runner.develop(prev_out["coding"])
if exp is None:
logger.error(f"Factor extraction failed.")
raise FactorEmptyError("Factor extraction failed.")
logger.log_object(exp, tag="runner result")
return exp


def main(path=None, step_n=None):
Expand All @@ -30,4 +42,4 @@ def main(path=None, step_n=None):


if __name__ == "__main__":
fire.Fire(main)
fire.Fire(main)
3 changes: 2 additions & 1 deletion rdagent/app/qlib_rd_loop/prompts.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,8 @@ hypothesis_generation:
Please ensure your response is in JSON format as shown below:
{
"hypothesis": "A clear and concise hypothesis based on the provided information.",
"reason": "A detailed explanation supporting the generated hypothesis."
"reason": "A detailed explanation supporting the generated hypothesis.",
"concise_reason": One line summary that focuses on the justification for the change that leads to the hypothesis (like a part of a knowledge that we are building)
}
user: |-
Expand Down
4 changes: 2 additions & 2 deletions rdagent/components/benchmark/eval_method.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,14 +19,14 @@
from rdagent.components.coder.factor_coder.factor import FactorFBWorkspace
from rdagent.core.conf import RD_AGENT_SETTINGS
from rdagent.core.developer import Developer
from rdagent.core.exception import CoderException, RunnerException
from rdagent.core.exception import CoderError
from rdagent.core.experiment import Task, Workspace
from rdagent.core.scenario import Scenario
from rdagent.core.utils import multiprocessing_wrapper

EVAL_RES = Dict[
str,
List[Tuple[FactorEvaluator, Union[object, RunnerException]]],
List[Tuple[FactorEvaluator, Union[object, CoderError]]],
]


Expand Down
15 changes: 0 additions & 15 deletions rdagent/scenarios/qlib/prompts.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -43,21 +43,6 @@ model_hypothesis_specification: |-
6th Round Hypothesis (If fourth round didn't work): The model should be a CNN. The CNN should have 5 convolutional layers. Use Leaky ReLU activation for all layers. Use dropout regularization with a rate of 0.3. (Reasoning: As regularisation rate of 0.5 didn't work, we only change a new regularisation and keep the other elements that worked. This means making changes in the current level.)
factor_hypothesis_specification: |-
Additional Specifications:
Hypotheses should grow and evolve based on the previous hypothesis. If there is no previous hypothesis, start with something simple. Gradually build up upon previous hypotheses and feedback.
Ensure that the hypothesis focuses on the creation and selection of factors in quantitative finance. Each hypothesis should address specific factor characteristics such as type (momentum, value, quality), calculation methods, or inclusion criteria. Avoid hypotheses related to model architecture or optimization processes.
Sample Hypotheses (Only learn from the format as these are not the knowledge):
- "Include a momentum factor based on the last 12 months' returns."
- "Add a value factor calculated as the book-to-market ratio."
- "Incorporate a quality factor derived from return on equity (ROE)."
- "Use a volatility factor based on the standard deviation of returns over the past 6 months."
- "Include a sentiment factor derived from news sentiment scores."
- "The momentum factor should be calculated using a 6-month look-back period."
- "Combine value and momentum factors using a weighted average approach."
- "Filter stocks by market capitalization before calculating the factors."
factor_hypothesis_specification: |-
Specifications:
- Hypotheses should grow and evolve based on the previous hypothesis. If there is no previous hypothesis, start with something simple.
Expand Down
7 changes: 5 additions & 2 deletions rdagent/utils/workflow.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,10 @@ def __new__(cls, clsname, bases, attrs):
steps = LoopMeta._get_steps(bases) # all the base classes of parents
for name, attr in attrs.items():
if not name.startswith("__") and isinstance(attr, Callable):
steps.append(name)
if name not in steps:
# NOTE: if we override the step in the subclass
# Then it is not the new step. So we skip it.
steps.append(name)
attrs["steps"] = steps
return super().__new__(cls, clsname, bases, attrs)

Expand Down Expand Up @@ -125,4 +128,4 @@ def load(cls, path: str | Path):

max_loop = max(session.loop_trace.keys())
logger.storage.truncate(time=session.loop_trace[max_loop][-1].end)
return session
return session

0 comments on commit 787450c

Please sign in to comment.