Skip to content

Commit

Permalink
fix: fix some small bugs in report-factor loop (#152)
Browse files Browse the repository at this point in the history
* Init todo

* update all code

* update

* Extract factors from financial reports loop finished

* Fix two small bugs.

* Delete rdagent/app/qlib_rd_loop/run_script.sh

* Minor mod

* Delete rdagent/app/qlib_rd_loop/nohup.out

* Fix a small bug in file reading.

* some updates

* Update the detailed process and prompt of factor loop.

* Evaluation & dataset

* Optimize the prompt for generating hypotheses and feedback in the factor loop.

* Generate new data

* dataset generation

* Performed further optimizations on the factor loop and report extraction loop, added log handling for both processes, and implemented a screenshot feature for report extraction.

* Update rdagent/components/coder/factor_coder/CoSTEER/evaluators.py

* Update package.txt for fitz.

* add the result

* Performed further optimizations on the factor loop and report extraction loop, added log handling for both processes, and implemented a screenshot feature for report extraction. (#100) (#102)

- Performed further optimizations on the factor loop and report extraction loop.
- Added log handling for both processes.
- Implemented a screenshot feature for report extraction.

* Analysis

* Optimized log output.

* Factor update

* A draft of the "Quick Start" section for README

* Add scenario descriptions.

* Updates

* Adjust content

* Enable logging of backtesting in Qlib and store rich-text descriptions in Trace. Support one-step debugging for factor extraction.

* Reformat analysis.py

* CI fix

* Refactor

* remove useless code

* fix bugs (#111)

* Fix two small bugs.

* Fix a merge bug.

* Fix two small bugs.

* fix some bugs.

* Fix some format bugs.

* Restore a file.

* Fix a format bug.

* draft renew of evaluators

* fix a small bug.

* fix a small bug

* Support Factor Report Loop

* Update framework for extracting factors from research reports.

* Refactor report-based factor extraction and fix minor bugs.

* fix a small bug of log.

* change some prompts

* improve factor_runner

* fix a small bug

* change some prompts

* cancel some comments

* cancel some comments and fix some bugs

* fix some bugs in factor from reports loop

---------

Co-authored-by: Young <[email protected]>
Co-authored-by: you-n-g <[email protected]>
Co-authored-by: Taozhi Wang <[email protected]>
Co-authored-by: Suhan Cui <[email protected]>
  • Loading branch information
5 people authored Aug 2, 2024
1 parent 529f935 commit a79f9f9
Show file tree
Hide file tree
Showing 4 changed files with 19 additions and 31 deletions.
40 changes: 14 additions & 26 deletions rdagent/app/qlib_rd_loop/factor_from_report_sh.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,11 +67,15 @@ def generate_hypothesis(factor_result: dict, report_content: str) -> str:
)

response_json = json.loads(response)
hypothesis_text = response_json.get("hypothesis", "No hypothesis generated.")
reason_text = response_json.get("reason", "No reason provided.")
concise_reason_text = response_json.get("concise_reason", "No concise reason provided.")

return Hypothesis(hypothesis=hypothesis_text, reason=reason_text, concise_reason=concise_reason_text)
return Hypothesis(
hypothesis=response_json.get("hypothesis", "No hypothesis provided"),
reason=response_json.get("reason", "No reason provided"),
concise_reason=response_json.get("concise_reason", "No concise reason provided"),
concise_observation=response_json.get("concise_observation", "No concise observation provided"),
concise_justification=response_json.get("concise_justification", "No concise justification provided"),
concise_knowledge=response_json.get("concise_knowledge", "No concise knowledge provided"),
)


def extract_factors_and_implement(report_file_path: str) -> tuple:
Expand Down Expand Up @@ -118,7 +122,7 @@ def __init__(self, PROP_SETTING: BasePropSetting):
self.trace = Trace(scen=scen)

self.judge_pdf_data_items = judge_pdf_data
self.index = 0
self.pdf_file_index = 0
self.hypo_exp_cache = (
pickle.load(open(FACTOR_PROP_SETTING.report_extract_result, "rb"))
if Path(FACTOR_PROP_SETTING.report_extract_result).exists()
Expand All @@ -129,28 +133,12 @@ def __init__(self, PROP_SETTING: BasePropSetting):
def propose_hypo_exp(self, prev_out: dict[str, Any]):
with logger.tag("r"):
while True:
if self.index > 100:
break
report_file_path = self.judge_pdf_data_items[self.index]
self.index += 1
if report_file_path in self.hypo_exp_cache:
hypothesis, exp = self.hypo_exp_cache[report_file_path]
exp.based_experiments = [QlibFactorExperiment(sub_tasks=[])] + [
t[1] for t in self.trace.hist if t[2]
]
else:
report_file_path = self.judge_pdf_data_items[self.pdf_file_index]
self.pdf_file_index += 1
exp, hypothesis = extract_factors_and_implement(str(report_file_path))
if exp is None:
continue
# else:
# exp, hypothesis = extract_factors_and_implement(str(report_file_path))
# if exp is None:
# continue
# exp.based_experiments = [QlibFactorExperiment(sub_tasks=[])] + [t[1] for t in self.trace.hist if t[2]]
# self.hypo_exp_cache[report_file_path] = (hypothesis, exp)
# pickle.dump(self.hypo_exp_cache, open(FACTOR_PROP_SETTING.report_extract_result, "wb"))
with logger.tag("extract_factors_and_implement"):
with logger.tag("load_pdf_screenshot"):
pdf_screenshot = extract_first_page_screenshot_from_pdf(report_file_path)
logger.log_object(pdf_screenshot)
exp.based_experiments = [QlibFactorExperiment(sub_tasks=[])] + [t[1] for t in self.trace.hist if t[2]]
exp.sub_workspace_list = exp.sub_workspace_list[: FACTOR_PROP_SETTING.max_factor_per_report]
exp.sub_tasks = exp.sub_tasks[: FACTOR_PROP_SETTING.max_factor_per_report]
logger.log_object(hypothesis, tag="hypothesis generation")
Expand Down
5 changes: 4 additions & 1 deletion rdagent/app/qlib_rd_loop/prompts.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,10 @@ hypothesis_generation:
{
"hypothesis": "A clear and concise hypothesis based on the provided information.",
"reason": "A detailed explanation supporting the generated hypothesis.",
"concise_reason": One line summary that focuses on the justification for the change that leads to the hypothesis (like a part of a knowledge that we are building)
"concise_reason": "One line summary that focuses on the justification for the change that leads to the hypothesis (like a part of a knowledge that we are building)",
"concise_observation": "One line summary. It focuses on the observation of the given scenario, data characteristics, or previous experiences (failures & succeses).",
"concise_justification": "One line summary. It focuses on the justification for the change in new hypothesis and the route of exploration supporting the growth of the hypothesis, based on the observation. ",
"concise_knowledge": "One line summary. It focuses on a transferable knowledege that comes with the new hypothesis. Use conditional grammar. eg. "If...., ..; When..., .; and etc"
}
user: |-
Expand Down
3 changes: 1 addition & 2 deletions rdagent/scenarios/qlib/developer/factor_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -152,5 +152,4 @@ def process_factor_data(self, exp_or_list: List[QlibFactorExperiment] | QlibFact
if factor_dfs:
return pd.concat(factor_dfs, axis=1)
else:
logger.error("No valid factor data found to merge.")
return pd.DataFrame() # Return an empty DataFrame if no valid data
raise FactorEmptyError("No valid factor data found to merge.")
2 changes: 0 additions & 2 deletions rdagent/scenarios/qlib/developer/feedback.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,8 +97,6 @@ def generate_feedback(self, exp: Experiment, hypothesis: Hypothesis, trace: Trac
.render(
hypothesis_text=hypothesis_text,
task_details=tasks_factors,
# current_result=current_result,
# sota_result=sota_result,
combined_result=combined_result,
)
)
Expand Down

0 comments on commit a79f9f9

Please sign in to comment.