diff --git a/rdagent/app/model_extraction_and_code/GeneralModel.py b/rdagent/app/model_extraction_and_code/GeneralModel.py index 62795ace3..7d417326e 100644 --- a/rdagent/app/model_extraction_and_code/GeneralModel.py +++ b/rdagent/app/model_extraction_and_code/GeneralModel.py @@ -1,13 +1,12 @@ from pathlib import Path -from rdagent.components.coder.model_coder.model import ( - ModelExperiment, -) +from rdagent.components.coder.model_coder.model import ModelExperiment from rdagent.core.prompts import Prompts from rdagent.core.scenario import Scenario prompt_dict = Prompts(file_path=Path(__file__).parent / "prompts.yaml") + class GeneralModelScenario(Scenario): @property def background(self) -> str: @@ -28,10 +27,10 @@ def interface(self) -> str: @property def simulator(self) -> str: return prompt_dict["general_model_simulator"] - + @property - def rich_style_description(self)->str: - return ''' + def rich_style_description(self) -> str: + return """ # General Model Scenario ## Overview @@ -61,7 +60,7 @@ def rich_style_description(self)->str: - **Time-Series Data:** Sequential data points indexed in time order, useful for forecasting and temporal pattern recognition. - **Graph Data:** Data structured as nodes and edges, suitable for network analysis and relational tasks. - ''' + """ def get_scenario_all_desc(self) -> str: return f"""Background of the scenario: diff --git a/rdagent/app/model_extraction_and_code/model_extraction_and_implementation.py b/rdagent/app/model_extraction_and_code/model_extraction_and_implementation.py index 3fcf42dc2..d227029aa 100644 --- a/rdagent/app/model_extraction_and_code/model_extraction_and_implementation.py +++ b/rdagent/app/model_extraction_and_code/model_extraction_and_implementation.py @@ -3,18 +3,24 @@ load_dotenv(override=True) +import fire + +from rdagent.app.model_extraction_and_code.GeneralModel import GeneralModelScenario from rdagent.components.coder.model_coder.task_loader import ( ModelExperimentLoaderFromPDFfiles, ) -from rdagent.scenarios.qlib.developer.model_coder import QlibModelCoSTEER -from rdagent.app.model_extraction_and_code.GeneralModel import GeneralModelScenario -from rdagent.components.document_reader.document_reader import extract_first_page_screenshot_from_pdf +from rdagent.components.document_reader.document_reader import ( + extract_first_page_screenshot_from_pdf, +) from rdagent.log import rdagent_logger as logger -import fire +from rdagent.scenarios.qlib.developer.model_coder import QlibModelCoSTEER -def extract_models_and_implement(report_file_path: str = "/home/v-xisenwang/RD-Agent/rdagent/app/model_extraction_and_code/test_doc1.pdf") -> None: + +def extract_models_and_implement( + report_file_path: str = "/home/v-xisenwang/RD-Agent/rdagent/app/model_extraction_and_code/test_doc1.pdf", +) -> None: with logger.tag("r"): - # Save Relevant Images + # Save Relevant Images img = extract_first_page_screenshot_from_pdf(report_file_path) logger.log_object(img, tag="pdf_image") scenario = GeneralModelScenario() @@ -26,5 +32,6 @@ def extract_models_and_implement(report_file_path: str = "/home/v-xisenwang/RD-A logger.log_object(exp, tag="developed_experiment") return exp + if __name__ == "__main__": fire.Fire(extract_models_and_implement) diff --git a/rdagent/app/quant_factor_benchmark/analysis.py b/rdagent/app/quant_factor_benchmark/analysis.py index 3164860ed..6167ab678 100644 --- a/rdagent/app/quant_factor_benchmark/analysis.py +++ b/rdagent/app/quant_factor_benchmark/analysis.py @@ -41,6 +41,7 @@ def process_results(self, results): processed_data = self.analyze_data(summarized_data) final_res[experiment] = processed_data.iloc[-1, :] return final_res + def reformat_succ_rate(self, display_df): new_idx = [] display_df = display_df[display_df.index.isin(self.index_map.keys())] @@ -166,6 +167,7 @@ def plot_data(data, file_name): plt.title("Comparison of Different Methods") plt.savefig(file_name) + if __name__ == "__main__": settings = BenchmarkSettings() benchmark = BenchmarkAnalyzer(settings) diff --git a/rdagent/app/quant_factor_benchmark/eval.py b/rdagent/app/quant_factor_benchmark/eval.py index b46cf744d..e98b6a2f3 100644 --- a/rdagent/app/quant_factor_benchmark/eval.py +++ b/rdagent/app/quant_factor_benchmark/eval.py @@ -15,12 +15,6 @@ FactorTestCaseLoaderFromJsonFile, ) -from rdagent.core.utils import import_class -from rdagent.core.scenario import Scenario -from rdagent.scenarios.qlib.experiment.factor_experiment import QlibFactorScenario - -from pprint import pprint - # 1.read the settings bs = BenchmarkSettings() diff --git a/rdagent/components/benchmark/eval_method.py b/rdagent/components/benchmark/eval_method.py index 7817595a1..bb5fd8174 100644 --- a/rdagent/components/benchmark/eval_method.py +++ b/rdagent/components/benchmark/eval_method.py @@ -35,6 +35,7 @@ List[Tuple[FactorEvaluator, Union[object, RunnerException]]], ] + class TestCase: def __init__( self, diff --git a/rdagent/core/proposal.py b/rdagent/core/proposal.py index a6c05b8d9..e01ab71ed 100644 --- a/rdagent/core/proposal.py +++ b/rdagent/core/proposal.py @@ -112,7 +112,7 @@ def gen(self, trace: Trace) -> Hypothesis: class Hypothesis2Experiment(ABC, Generic[ASpecificExp]): """ - [Abstract description => concrete description] => Code implementation Card + [Abstract description => concrete description] => Code implementation Card """ @abstractmethod diff --git a/rdagent/log/storage.py b/rdagent/log/storage.py index 7ed014c52..870d83edf 100644 --- a/rdagent/log/storage.py +++ b/rdagent/log/storage.py @@ -90,22 +90,28 @@ def iter_msg(self, watch: bool = False) -> Generator[Message, None, None]: message_end = next_match.start() if next_match else len(content) message_content = content[message_start:message_end].strip() + if "Logging object in" in message_content: + continue + m = Message( tag=tag, level=level, timestamp=timestamp, caller=caller, pid_trace=pid, content=message_content ) - if isinstance(m.content, str) and "Logging object in" in m.content: - absolute_p = m.content.split("Logging object in ")[1] - relative_p = "." + absolute_p.split(self.path.name)[1] - pkl_path = self.path / relative_p - try: - with pkl_path.open("rb") as f: - m.content = pickle.load(f) - except: - continue - msg_l.append(m) + for file in self.path.glob("**/*.pkl"): + tag = ".".join(str(file.relative_to(self.path)).replace("/", ".").split(".")[:-3]) + pid = file.parent.name + + with file.open("rb") as f: + content = pickle.load(f) + + timestamp = datetime.strptime(file.stem, "%Y-%m-%d_%H-%M-%S-%f").replace(tzinfo=timezone.utc) + + m = Message(tag=tag, level="INFO", timestamp=timestamp, caller="", pid_trace=pid, content=content) + + msg_l.append(m) + msg_l.sort(key=lambda x: x.timestamp) for m in msg_l: yield m diff --git a/rdagent/log/ui/app.py b/rdagent/log/ui/app.py index 52060c681..7f22d8dd6 100644 --- a/rdagent/log/ui/app.py +++ b/rdagent/log/ui/app.py @@ -1,26 +1,478 @@ -import pickle -from pathlib import Path - -from rdagent.core.proposal import Trace -from rdagent.log.storage import FileStorage, Message -from rdagent.log.ui.web import ( - SimpleTraceWindow, - TraceObjWindow, - TraceWindow, - WebView, - mock_msg, +import time +from collections import defaultdict +from datetime import datetime, timezone +from typing import Callable, Type + +import pandas as pd +import plotly.express as px +import plotly.graph_objects as go +import streamlit as st +from plotly.subplots import make_subplots +from streamlit import session_state as state +from streamlit.delta_generator import DeltaGenerator + +from rdagent.components.coder.factor_coder.CoSTEER.evaluators import ( + FactorSingleFeedback, +) +from rdagent.components.coder.factor_coder.factor import FactorFBWorkspace, FactorTask +from rdagent.components.coder.model_coder.CoSTEER.evaluators import ModelCoderFeedback +from rdagent.components.coder.model_coder.model import ModelFBWorkspace, ModelTask +from rdagent.core.proposal import Hypothesis, HypothesisFeedback +from rdagent.log.base import Message +from rdagent.log.storage import FileStorage +from rdagent.log.ui.qlib_report_figure import report_figure +from rdagent.scenarios.qlib.experiment.factor_experiment import QlibFactorExperiment +from rdagent.scenarios.qlib.experiment.model_experiment import ( + QlibModelExperiment, + QlibModelScenario, ) -# show logs folder -WebView(TraceWindow()).display(FileStorage("/data/home/bowen/workspace/RD-Agent/log/yuante/2024-07-24_04-03-33-691119")) -# WebView(TraceWindow()).display(FileStorage("/data/home/bowen/workspace/RD-Agent/log/2024-07-22_03-01-12-021659")) -# WebView(TraceWindow()).display(FileStorage("./log/2024-07-18_08-37-00-477228")) +st.set_page_config(layout="wide") + + +if "log_path" not in state: + state.log_path = "" + +if "log_type" not in state: + state.log_type = "qlib_model" + +if "fs" not in state: + state.fs = None + +if "msgs" not in state: + state.msgs = defaultdict(lambda: defaultdict(list)) + +if "last_msg" not in state: + state.last_msg = None + +if "current_tags" not in state: + state.current_tags = [] + +if "lround" not in state: + state.lround = 0 # RD Loop Round + +if "erounds" not in state: + state.erounds = defaultdict(int) # Evolving Rounds in each RD Loop + +# Summary Info +if "hypotheses" not in state: + # Hypotheses in each RD Loop + state.hypotheses = defaultdict(None) + +if "h_decisions" not in state: + state.h_decisions = defaultdict(bool) + +if "metric_series" not in state: + state.metric_series = [] + + +def refresh(): + state.fs = FileStorage(state.log_path).iter_msg() + state.msgs = defaultdict(lambda: defaultdict(list)) + state.lround = 0 + state.erounds = defaultdict(int) + state.hypotheses = defaultdict(None) + state.h_decisions = defaultdict(bool) + state.metric_series = [] + state.last_msg = None + state.current_tags = [] + + +def should_display(msg: Message): + for t in state.excluded_tags: + if t in msg.tag.split("."): + return False + + if type(msg.content).__name__ in state.excluded_types: + return False + + return True + + +def get_msgs_until(end_func: Callable[[Message], bool] = lambda _: True): + if state.fs: + while True: + try: + msg = next(state.fs) + if should_display(msg): + tags = msg.tag.split(".") + if "r" not in state.current_tags and "r" in tags: + state.lround += 1 + if "evolving code" not in state.current_tags and "evolving code" in tags: + state.erounds[state.lround] += 1 + + state.current_tags = tags + state.last_msg = msg + state.msgs[state.lround][msg.tag].append(msg) + + # Update Summary Info + if "model runner result" in tags or "factor runner result" in tags or "runner result" in tags: + if msg.content.result is None: + state.metric_series.append(pd.Series([None], index=["AUROC"])) + else: + if msg.content.result.name == "AUROC": + ps = msg.content.result + ps.index = ["AUROC"] + state.metric_series.append(ps) + else: + state.metric_series.append( + msg.content.result.loc[ + [ + "IC", + "1day.excess_return_without_cost.annualized_return", + "1day.excess_return_without_cost.information_ratio", + "1day.excess_return_without_cost.max_drawdown", + ] + ] + ) + elif "hypothesis generation" in tags: + state.hypotheses[state.lround] = msg.content + elif "ef" in tags and "feedback" in tags: + state.h_decisions[state.lround] = msg.content.decision + + # Stop Getting Logs + if end_func(msg): + break + except StopIteration: + break + + +# Config Sidebar +with st.sidebar: + st.text_input("log path", key="log_path", on_change=refresh) + st.selectbox("trace type", ["qlib_model", "qlib_factor", "model_extraction_and_implementation"], key="log_type") + + st.multiselect("excluded log tags", ["llm_messages"], ["llm_messages"], key="excluded_tags") + st.multiselect("excluded log types", ["str", "dict", "list"], ["str"], key="excluded_types") + + if st.button("refresh"): + refresh() + debug = st.checkbox("debug", value=False) + + if debug: + if st.button("Single Step Run"): + get_msgs_until() + + +# Debug Info Window +if debug: + with st.expander(":red[**Debug Info**]", expanded=True): + dcol1, dcol2 = st.columns([1, 3]) + with dcol1: + st.markdown( + f"**trace type**: {state.log_type}\n\n" + f"**log path**: {state.log_path}\n\n" + f"**excluded tags**: {state.excluded_tags}\n\n" + f"**excluded types**: {state.excluded_types}\n\n" + f":blue[**message id**]: {sum(sum(len(tmsgs) for tmsgs in rmsgs.values()) for rmsgs in state.msgs.values())}\n\n" + f":blue[**round**]: {state.lround}\n\n" + f":blue[**evolving round**]: {state.erounds[state.lround]}\n\n" + ) + with dcol2: + if state.last_msg: + st.write(state.last_msg) + if isinstance(state.last_msg.content, list): + st.write(state.last_msg.content[0]) + elif not isinstance(state.last_msg.content, str): + st.write(state.last_msg.content) + + +# Main Window + +# Project Info +with st.container(): + image_c, toc_c = st.columns([3, 3], vertical_alignment="center") + with image_c: + st.image("./docs/_static/scen.jpg") + with toc_c: + st.markdown( + """ +# RD-Agent🤖 +## [Scenario Description](#_scenario) +## [Summary](#_summary) +## [RD-Loops](#_rdloops) +### [Research](#_research) +### [Development](#_development) +### [Feedback](#_feedback) +""" + ) +with st.container(border=True): + st.header("Scenario Description📖", divider=True, anchor="_scenario") + # TODO: other scenarios + if state.log_type == "qlib_model": + st.markdown(QlibModelScenario().rich_style_description) + elif state.log_type == "model_extraction_and_implementation": + st.markdown( + """ +# General Model Scenario + +## Overview + +This demo automates the extraction and iterative development of models from academic papers, ensuring functionality and correctness. + +### Scenario: Auto-Developing Model Code from Academic Papers + +#### Overview + +This scenario automates the development of PyTorch models by reading academic papers or other sources. It supports various data types, including tabular, time-series, and graph data. The primary workflow involves two main components: the Reader and the Coder. + +#### Workflow Components + +1. **Reader** +- Parses and extracts relevant model information from academic papers or sources, including architectures, parameters, and implementation details. +- Uses Large Language Models to convert content into a structured format for the Coder. + +2. **Evolving Coder** +- Translates structured information from the Reader into executable PyTorch code. +- Utilizes an evolving coding mechanism to ensure correct tensor shapes, verified with sample input tensors. +- Iteratively refines the code to align with source material specifications. + +#### Supported Data Types + +- **Tabular Data:** Structured data with rows and columns, such as spreadsheets or databases. +- **Time-Series Data:** Sequential data points indexed in time order, useful for forecasting and temporal pattern recognition. +- **Graph Data:** Data structured as nodes and edges, suitable for network analysis and relational tasks. +""" + ) + + +# Summary Window +@st.experimental_fragment() +def summary_window(): + if state.log_type in ["qlib_model", "qlib_factor"]: + with st.container(): + st.header("Summary📊", divider=True, anchor="_summary") + hypotheses_c, chart_c = st.columns([2, 3]) + # TODO: not fixed height + with hypotheses_c.container(height=600): + st.markdown("**Hypotheses🏅**") + h_str = "\n".join( + f"{id}. :green[**{h.hypothesis}**]\n\t>:green-background[*{h.__dict__.get('concise_reason', '')}*]" + if state.h_decisions[id] + else f"{id}. {h.hypothesis}\n\t>*{h.__dict__.get('concise_reason', '')}*" + for id, h in state.hypotheses.items() + ) + st.markdown(h_str) + with chart_c.container(height=600): + mt_c, ms_c = st.columns(2, vertical_alignment="center") + with mt_c: + st.markdown("**Metrics📈**") + with ms_c: + show_true_only = st.checkbox("True Decisions Only", value=False) + + labels = [f"Round {i}" for i in range(1, len(state.metric_series) + 1)] + df = pd.DataFrame(state.metric_series, index=labels) + if show_true_only and len(state.hypotheses) >= len(state.metric_series): + df = df.iloc[[i for i in range(df.shape[0]) if state.h_decisions[i + 1]]] + if df.shape[0] == 1: + st.table(df.iloc[0]) + elif df.shape[0] > 1: + # TODO: figure label + # TODO: separate into different figures + if df.shape[1] == 1: + # suhan's scenario + fig = px.line(df, x=df.index, y=df.columns, markers=True) + fig.update_layout(legend_title_text="Metrics", xaxis_title="Loop Round", yaxis_title=None) + else: + # 2*2 figure + fig = make_subplots(rows=2, cols=2, subplot_titles=df.columns) + for ci, col in enumerate(df.columns): + row = ci // 2 + 1 + col_num = ci % 2 + 1 + fig.add_trace( + go.Scatter(x=df.index, y=df[col], mode="lines+markers", name=col), row=row, col=col_num + ) + fig.update_layout(title_text="Metrics", showlegend=False) + st.plotly_chart(fig) + + +summary_window() + +# R&D Loops Window +st.header("R&D Loops♾️", divider=True, anchor="_rdloops") +button_c1, button_c2, round_s_c = st.columns([2, 3, 18], vertical_alignment="center") +with button_c1: + if st.button("Run One Loop"): + get_msgs_until(lambda m: "ef.feedback" in m.tag) +with button_c2: + if st.button("Run One Evolving Step"): + get_msgs_until(lambda m: "d.evolving feedback" in m.tag) + +if len(state.msgs) > 1: + with round_s_c: + round = st.select_slider("Select RDLoop Round", options=state.msgs.keys(), value=state.lround) +else: + round = 1 + +rf_c, d_c = st.columns([2, 2]) + +# Research & Feedback Window +with rf_c: + if state.log_type in ["qlib_model", "qlib_factor"]: + # Research Window + with st.container(border=True): + st.subheader("Research🔍", divider=True, anchor="_research") + # pdf image + if pim := state.msgs[round]["r.extract_factors_and_implement.load_pdf_screenshot"]: + for i in range(min(2, len(pim))): + st.image(pim[i].content) + + # Hypothesis + if hg := state.msgs[round]["r.hypothesis generation"]: + st.markdown("**Hypothesis💡**") # 🧠 + h: Hypothesis = hg[0].content + st.markdown( + f""" +- **Hypothesis**: {h.hypothesis} +- **Reason**: {h.reason}""" + ) + + if eg := state.msgs[round]["r.experiment generation"]: + if isinstance(eg[0].content[0], FactorTask): + st.markdown("**Factor Tasks**") + fts = eg[0].content + tabs = st.tabs([f.factor_name for f in fts]) + for i, ft in enumerate(fts): + with tabs[i]: + # st.markdown(f"**Factor Name**: {ft.factor_name}") + st.markdown(f"**Description**: {ft.factor_description}") + st.latex(f"Formulation: {ft.factor_formulation}") + + variables_df = pd.DataFrame(ft.variables, index=["Description"]).T + variables_df.index.name = "Variable" + st.table(variables_df) + elif isinstance(eg[0].content[0], ModelTask): + st.markdown("**Model Tasks**") + mts = eg[0].content + tabs = st.tabs([m.name for m in mts]) + for i, mt in enumerate(mts): + with tabs[i]: + # st.markdown(f"**Model Name**: {mt.name}") + st.markdown(f"**Model Type**: {mt.model_type}") + st.markdown(f"**Description**: {mt.description}") + st.latex(f"Formulation: {mt.formulation}") + + variables_df = pd.DataFrame(mt.variables, index=["Value"]).T + variables_df.index.name = "Variable" + st.table(variables_df) + + # Feedback Window + with st.container(border=True): + st.subheader("Feedback📝", divider=True, anchor="_feedback") + if fbr := state.msgs[round]["ef.Quantitative Backtesting Chart"]: + st.markdown("**Returns📈**") + fig = report_figure(fbr[0].content) + st.plotly_chart(fig) + if fb := state.msgs[round]["ef.feedback"]: + st.markdown("**Hypothesis Feedback🔍**") + h: HypothesisFeedback = fb[0].content + st.markdown( + f""" +- **Observations**: {h.observations} +- **Hypothesis Evaluation**: {h.hypothesis_evaluation} +- **New Hypothesis**: {h.new_hypothesis} +- **Decision**: {h.decision} +- **Reason**: {h.reason}""" + ) + + elif state.log_type == "model_extraction_and_implementation": + # Research Window + with st.container(border=True): + # pdf image + st.subheader("Research🔍", divider=True, anchor="_research") + if pim := state.msgs[round]["r.pdf_image"]: + for i in range(len(pim)): + st.image(pim[i].content) + + # loaded model exp + if mem := state.msgs[round]["d.load_experiment"]: + me: QlibModelExperiment = mem[0].content + mts: list[ModelTask] = me.sub_tasks + tabs = st.tabs([m.name for m in mts]) + for i, mt in enumerate(mts): + with tabs[i]: + # st.markdown(f"**Model Name**: {mt.name}") + st.markdown(f"**Model Type**: {mt.model_type}") + st.markdown(f"**Description**: {mt.description}") + st.latex(f"Formulation: {mt.formulation}") + + variables_df = pd.DataFrame(mt.variables, index=["Value"]).T + variables_df.index.name = "Variable" + st.table(variables_df) + + # Feedback Window + with st.container(border=True): + st.subheader("Feedback📝", divider=True, anchor="_feedback") + if fbr := state.msgs[round]["d.developed_experiment"]: + st.markdown("**Returns📈**") + result_df = fbr[0].content.result + if result_df: + fig = report_figure(result_df) + st.plotly_chart(fig) + else: + st.markdown("Returns is None") + + +# Development Window (Evolving) +with d_c.container(border=True): + st.subheader("Development🛠️", divider=True, anchor="_development") + # Evolving Tabs + if state.erounds[round] > 0: + etabs = st.tabs([str(i) for i in range(1, state.erounds[round] + 1)]) + + for i in range(0, state.erounds[round]): + with etabs[i]: + ws: list[FactorFBWorkspace | ModelFBWorkspace] = state.msgs[round]["d.evolving code"][i].content + ws = [w for w in ws if w] + # All Tasks + tab_names = [ + w.target_task.factor_name if isinstance(w.target_task, FactorTask) else w.target_task.name for w in ws + ] + wtabs = st.tabs(tab_names) + for j, w in enumerate(ws): + with wtabs[j]: + # Evolving Code + for k, v in w.code_dict.items(): + with st.expander(f":green[`{k}`]", expanded=True): + st.code(v, language="python") -# load Trace obj -# with Path('./log/step_trace.pkl').open('rb') as f: -# obj = pickle.load(f) -# trace: Trace = obj[-1] + # Evolving Feedback + if len(state.msgs[round]["d.evolving feedback"]) > i: + wsf: list[FactorSingleFeedback | ModelCoderFeedback] = state.msgs[round]["d.evolving feedback"][ + i + ].content[j] + if isinstance(wsf, FactorSingleFeedback): + st.markdown( + f"""#### :blue[Factor Execution Feedback] +{wsf.execution_feedback} +#### :blue[Factor Code Feedback] +{wsf.code_feedback} +#### :blue[Factor Value Feedback] +{wsf.factor_value_feedback} +#### :blue[Factor Final Feedback] +{wsf.final_feedback} +#### :blue[Factor Final Decision] +This implementation is {'SUCCESS' if wsf.final_decision else 'FAIL'}. +""" + ) + elif isinstance(wsf, ModelCoderFeedback): + st.markdown( + f"""#### :blue[Model Execution Feedback] +{wsf.execution_feedback} +#### :blue[Model Shape Feedback] +{wsf.shape_feedback} +#### :blue[Model Value Feedback] +{wsf.value_feedback} +#### :blue[Model Code Feedback] +{wsf.code_feedback} +#### :blue[Model Final Feedback] +{wsf.final_feedback} +#### :blue[Model Final Decision] +This implementation is {'SUCCESS' if wsf.final_decision else 'FAIL'}. +""" + ) -# show Trace obj -# TraceObjWindow().consume_msg(mock_msg(trace)) +# TODO: evolving tabs -> slider +# TODO: multi tasks SUCCESS/FAIL +# TODO: evolving progress bar, diff colors diff --git a/rdagent/log/ui/qlib_report_figure.py b/rdagent/log/ui/qlib_report_figure.py new file mode 100644 index 000000000..cd4f94180 --- /dev/null +++ b/rdagent/log/ui/qlib_report_figure.py @@ -0,0 +1,445 @@ +import importlib +import math + +import pandas as pd +import plotly.graph_objs as go +from plotly.subplots import make_subplots + + +class BaseGraph: + _name = None + + def __init__( + self, df: pd.DataFrame = None, layout: dict = None, graph_kwargs: dict = None, name_dict: dict = None, **kwargs + ): + """ + + :param df: + :param layout: + :param graph_kwargs: + :param name_dict: + :param kwargs: + layout: dict + go.Layout parameters + graph_kwargs: dict + Graph parameters, eg: go.Bar(**graph_kwargs) + """ + self._df = df + + self._layout = dict() if layout is None else layout + self._graph_kwargs = dict() if graph_kwargs is None else graph_kwargs + self._name_dict = name_dict + + self.data = None + + self._init_parameters(**kwargs) + self._init_data() + + def _init_data(self): + """ + + :return: + """ + if self._df.empty: + raise ValueError("df is empty.") + + self.data = self._get_data() + + def _init_parameters(self, **kwargs): + """ + + :param kwargs + """ + + # Instantiate graphics parameters + self._graph_type = self._name.lower().capitalize() + + # Displayed column name + if self._name_dict is None: + self._name_dict = {_item: _item for _item in self._df.columns} + + @staticmethod + def get_instance_with_graph_parameters(graph_type: str = None, **kwargs): + """ + + :param graph_type: + :param kwargs: + :return: + """ + try: + _graph_module = importlib.import_module("plotly.graph_objs") + _graph_class = getattr(_graph_module, graph_type) + except AttributeError: + _graph_module = importlib.import_module("qlib.contrib.report.graph") + _graph_class = getattr(_graph_module, graph_type) + return _graph_class(**kwargs) + + def _get_layout(self) -> go.Layout: + """ + + :return: + """ + return go.Layout(**self._layout) + + def _get_data(self) -> list: + """ + + :return: + """ + + _data = [ + self.get_instance_with_graph_parameters( + graph_type=self._graph_type, x=self._df.index, y=self._df[_col], name=_name, **self._graph_kwargs + ) + for _col, _name in self._name_dict.items() + ] + return _data + + @property + def figure(self) -> go.Figure: + """ + + :return: + """ + _figure = go.Figure(data=self.data, layout=self._get_layout()) + # NOTE: Use the default theme from plotly version 3.x, template=None + _figure["layout"].update(template=None) + return _figure + + +class SubplotsGraph: + """Create subplots same as df.plot(subplots=True) + + Simple package for `plotly.tools.subplots` + """ + + def __init__( + self, + df: pd.DataFrame = None, + kind_map: dict = None, + layout: dict = None, + sub_graph_layout: dict = None, + sub_graph_data: list = None, + subplots_kwargs: dict = None, + **kwargs, + ): + """ + + :param df: pd.DataFrame + + :param kind_map: dict, subplots graph kind and kwargs + eg: dict(kind='Scatter', kwargs=dict()) + + :param layout: `go.Layout` parameters + + :param sub_graph_layout: Layout of each graphic, similar to 'layout' + + :param sub_graph_data: Instantiation parameters for each sub-graphic + eg: [(column_name, instance_parameters), ] + + column_name: str or go.Figure + + Instance_parameters: + + - row: int, the row where the graph is located + + - col: int, the col where the graph is located + + - name: str, show name, default column_name in 'df' + + - kind: str, graph kind, default `kind` param, eg: bar, scatter, ... + + - graph_kwargs: dict, graph kwargs, default {}, used in `go.Bar(**graph_kwargs)` + + :param subplots_kwargs: `plotly.tools.make_subplots` original parameters + + - shared_xaxes: bool, default False + + - shared_yaxes: bool, default False + + - vertical_spacing: float, default 0.3 / rows + + - subplot_titles: list, default [] + If `sub_graph_data` is None, will generate 'subplot_titles' according to `df.columns`, + this field will be discarded + + + - specs: list, see `make_subplots` docs + + - rows: int, Number of rows in the subplot grid, default 1 + If `sub_graph_data` is None, will generate 'rows' according to `df`, this field will be discarded + + - cols: int, Number of cols in the subplot grid, default 1 + If `sub_graph_data` is None, will generate 'cols' according to `df`, this field will be discarded + + + :param kwargs: + + """ + + self._df = df + self._layout = layout + self._sub_graph_layout = sub_graph_layout + + self._kind_map = kind_map + if self._kind_map is None: + self._kind_map = dict(kind="Scatter", kwargs=dict()) + + self._subplots_kwargs = subplots_kwargs + if self._subplots_kwargs is None: + self._init_subplots_kwargs() + + self.__cols = self._subplots_kwargs.get("cols", 2) # pylint: disable=W0238 + self.__rows = self._subplots_kwargs.get( # pylint: disable=W0238 + "rows", math.ceil(len(self._df.columns) / self.__cols) + ) + + self._sub_graph_data = sub_graph_data + if self._sub_graph_data is None: + self._init_sub_graph_data() + + self._init_figure() + + def _init_sub_graph_data(self): + """ + + :return: + """ + self._sub_graph_data = [] + self._subplot_titles = [] + + for i, column_name in enumerate(self._df.columns): + row = math.ceil((i + 1) / self.__cols) + _temp = (i + 1) % self.__cols + col = _temp if _temp else self.__cols + res_name = column_name.replace("_", " ") + _temp_row_data = ( + column_name, + dict( + row=row, + col=col, + name=res_name, + kind=self._kind_map["kind"], + graph_kwargs=self._kind_map["kwargs"], + ), + ) + self._sub_graph_data.append(_temp_row_data) + self._subplot_titles.append(res_name) + + def _init_subplots_kwargs(self): + """ + + :return: + """ + # Default cols, rows + _cols = 2 + _rows = math.ceil(len(self._df.columns) / 2) + self._subplots_kwargs = dict() + self._subplots_kwargs["rows"] = _rows + self._subplots_kwargs["cols"] = _cols + self._subplots_kwargs["shared_xaxes"] = False + self._subplots_kwargs["shared_yaxes"] = False + self._subplots_kwargs["vertical_spacing"] = 0.3 / _rows + self._subplots_kwargs["print_grid"] = False + self._subplots_kwargs["subplot_titles"] = self._df.columns.tolist() + + def _init_figure(self): + """ + + :return: + """ + self._figure = make_subplots(**self._subplots_kwargs) + + for column_name, column_map in self._sub_graph_data: + if isinstance(column_name, go.Figure): + _graph_obj = column_name + elif isinstance(column_name, str): + temp_name = column_map.get("name", column_name.replace("_", " ")) + kind = column_map.get("kind", self._kind_map.get("kind", "Scatter")) + _graph_kwargs = column_map.get("graph_kwargs", self._kind_map.get("kwargs", {})) + _graph_obj = BaseGraph.get_instance_with_graph_parameters( + kind, + **dict( + x=self._df.index, + y=self._df[column_name], + name=temp_name, + **_graph_kwargs, + ), + ) + else: + raise TypeError() + + row = column_map["row"] + col = column_map["col"] + + self._figure.add_trace(_graph_obj, row=row, col=col) + + if self._sub_graph_layout is not None: + for k, v in self._sub_graph_layout.items(): + self._figure["layout"][k].update(v) + + # NOTE: Use the default theme from plotly version 3.x: template=None + self._figure["layout"].update(template=None) + self._figure["layout"].update(self._layout) + + @property + def figure(self): + return self._figure + + +def _calculate_maximum(df: pd.DataFrame, is_ex: bool = False): + """ + + :param df: + :param is_ex: + :return: + """ + if is_ex: + end_date = df["cum_ex_return_wo_cost_mdd"].idxmin() + start_date = df.loc[df.index <= end_date]["cum_ex_return_wo_cost"].idxmax() + else: + end_date = df["return_wo_mdd"].idxmin() + start_date = df.loc[df.index <= end_date]["cum_return_wo_cost"].idxmax() + return start_date, end_date + + +def _calculate_mdd(series): + """ + Calculate mdd + + :param series: + :return: + """ + return series - series.cummax() + + +def _calculate_report_data(raw_df: pd.DataFrame) -> pd.DataFrame: + """ + + :param df: + :return: + """ + df = raw_df.copy(deep=True) + index_names = df.index.names + df.index = df.index.strftime("%Y-%m-%d") + + report_df = pd.DataFrame() + + report_df["cum_bench"] = df["bench"].cumsum() + report_df["cum_return_wo_cost"] = df["return"].cumsum() + report_df["cum_return_w_cost"] = (df["return"] - df["cost"]).cumsum() + # report_df['cum_return'] - report_df['cum_return'].cummax() + report_df["return_wo_mdd"] = _calculate_mdd(report_df["cum_return_wo_cost"]) + report_df["return_w_cost_mdd"] = _calculate_mdd((df["return"] - df["cost"]).cumsum()) + + report_df["cum_ex_return_wo_cost"] = (df["return"] - df["bench"]).cumsum() + report_df["cum_ex_return_w_cost"] = (df["return"] - df["bench"] - df["cost"]).cumsum() + report_df["cum_ex_return_wo_cost_mdd"] = _calculate_mdd((df["return"] - df["bench"]).cumsum()) + report_df["cum_ex_return_w_cost_mdd"] = _calculate_mdd((df["return"] - df["cost"] - df["bench"]).cumsum()) + # return_wo_mdd , return_w_cost_mdd, cum_ex_return_wo_cost_mdd, cum_ex_return_w + + report_df["turnover"] = df["turnover"] + report_df.sort_index(ascending=True, inplace=True) + + report_df.index.names = index_names + return report_df + + +def report_figure(df: pd.DataFrame) -> list | tuple: + """ + + :param df: + :return: + """ + + # Get data + report_df = _calculate_report_data(df) + + # Maximum Drawdown + max_start_date, max_end_date = _calculate_maximum(report_df) + ex_max_start_date, ex_max_end_date = _calculate_maximum(report_df, True) + + index_name = report_df.index.name + _temp_df = report_df.reset_index() + _temp_df.loc[-1] = 0 + _temp_df = _temp_df.shift(1) + _temp_df.loc[0, index_name] = "T0" + _temp_df.set_index(index_name, inplace=True) + _temp_df.iloc[0] = 0 + report_df = _temp_df + + # Create figure + _default_kind_map = dict(kind="Scatter", kwargs={"mode": "lines+markers"}) + _temp_fill_args = {"fill": "tozeroy", "mode": "lines+markers"} + _column_row_col_dict = [ + ("cum_bench", dict(row=1, col=1)), + ("cum_return_wo_cost", dict(row=1, col=1)), + ("cum_return_w_cost", dict(row=1, col=1)), + ("return_wo_mdd", dict(row=2, col=1, graph_kwargs=_temp_fill_args)), + ("return_w_cost_mdd", dict(row=3, col=1, graph_kwargs=_temp_fill_args)), + ("cum_ex_return_wo_cost", dict(row=4, col=1)), + ("cum_ex_return_w_cost", dict(row=4, col=1)), + ("turnover", dict(row=5, col=1)), + ("cum_ex_return_w_cost_mdd", dict(row=6, col=1, graph_kwargs=_temp_fill_args)), + ("cum_ex_return_wo_cost_mdd", dict(row=7, col=1, graph_kwargs=_temp_fill_args)), + ] + + _subplot_layout = dict() + for i in range(1, 8): + # yaxis + _subplot_layout.update({"yaxis{}".format(i): dict(zeroline=True, showline=True, showticklabels=True)}) + _show_line = i == 7 + _subplot_layout.update({"xaxis{}".format(i): dict(showline=_show_line, type="category", tickangle=45)}) + + _layout_style = dict( + height=1200, + title=" ", + shapes=[ + { + "type": "rect", + "xref": "x", + "yref": "paper", + "x0": max_start_date, + "y0": 0.55, + "x1": max_end_date, + "y1": 1, + "fillcolor": "#d3d3d3", + "opacity": 0.3, + "line": { + "width": 0, + }, + }, + { + "type": "rect", + "xref": "x", + "yref": "paper", + "x0": ex_max_start_date, + "y0": 0, + "x1": ex_max_end_date, + "y1": 0.55, + "fillcolor": "#d3d3d3", + "opacity": 0.3, + "line": { + "width": 0, + }, + }, + ], + ) + + _subplot_kwargs = dict( + shared_xaxes=True, + vertical_spacing=0.01, + rows=7, + cols=1, + row_width=[1, 1, 1, 3, 1, 1, 3], + print_grid=False, + ) + figure = SubplotsGraph( + df=report_df, + layout=_layout_style, + sub_graph_data=_column_row_col_dict, + subplots_kwargs=_subplot_kwargs, + kind_map=_default_kind_map, + sub_graph_layout=_subplot_layout, + ).figure + return figure diff --git a/rdagent/log/ui/web.py b/rdagent/log/ui/web.py index 83c66bffa..99a2f755c 100644 --- a/rdagent/log/ui/web.py +++ b/rdagent/log/ui/web.py @@ -18,7 +18,10 @@ from rdagent.core.proposal import Hypothesis, HypothesisFeedback, Trace from rdagent.log.base import Message, Storage, View from rdagent.scenarios.qlib.experiment.factor_experiment import QlibFactorExperiment -from rdagent.scenarios.qlib.experiment.model_experiment import QlibModelExperiment +from rdagent.scenarios.qlib.experiment.model_experiment import ( + QlibModelExperiment, + QlibModelScenario, +) st.set_page_config(layout="wide") @@ -213,7 +216,7 @@ def consume_msg(self, msg: Message | ModelTask): self.container.markdown(f"**Model Name**: {mt.name}") self.container.markdown(f"**Model Type**: {mt.model_type}") self.container.markdown(f"**Description**: {mt.description}") - self.container.markdown(f"**Formulation**: {mt.formulation}") + self.container.latex(f"Formulation: {mt.formulation}") variables_df = pd.DataFrame(mt.variables, index=["Value"]).T variables_df.index.name = "Variable" @@ -479,6 +482,10 @@ def consume_msg(self, msg: Message): elif isinstance(msg.content[0], ModelTask): self.container.markdown("**Model Tasks**") ObjectsTabsWindow(self.container.container(), ModelTaskWindow, lambda x: x.name).consume_msg(msg) + elif msg.tag.endswith("load_pdf_screenshot"): + self.container.image(msg.content) + elif msg.tag.endswith("load_factor_tasks"): + self.container.json(msg.content) class EvolvingWindow(StWindow): @@ -535,16 +542,6 @@ def __init__(self, container: "DeltaGenerator"): def consume_msg(self, msg: Message): if "evolving" in msg.tag: self.E_win.consume_msg(msg) - # elif msg.tag.endswith('result'): - # self.container.subheader('Results') - # if isinstance(msg.content[0], FactorFBWorkspace): - # ObjectsTabsWindow(self.container.expander('Factor Workspaces'), - # inner_class=WorkspaceWindow, - # mapper=lambda x: x.target_task.factor_name).consume_msg(msg) - # elif isinstance(msg.content[0], ModelFBWorkspace): - # ObjectsTabsWindow(self.container.expander('Model Workspaces'), - # inner_class=WorkspaceWindow, - # mapper=lambda x: x.target_task.name).consume_msg(msg) class FeedbackWindow(StWindow): @@ -552,7 +549,11 @@ def __init__(self, container: "DeltaGenerator"): self.container = container def consume_msg(self, msg: Message): - if isinstance(msg.content, HypothesisFeedback): + if msg.tag.endswith("returns"): + fig = px.line(msg.content) + self.container.markdown("**Returns📈**") + self.container.plotly_chart(fig) + elif isinstance(msg.content, HypothesisFeedback): HypothesisFeedbackWindow(self.container.container(border=True)).consume_msg(msg) elif isinstance(msg.content, QlibModelExperiment): QlibModelExpWindow(self.container.container(border=True)).consume_msg(msg) @@ -584,13 +585,15 @@ def __init__( ): self.show_llm = show_llm self.show_common_logs = show_common_logs - + image_c, scen_c = container.columns([2, 3], vertical_alignment="center") + image_c.image("scen.jpg") + scen_c.container(border=True).markdown(QlibModelScenario().rich_style_description) top_container = container.container() col1, col2 = top_container.columns([2, 3]) - chart_c = col2.container(border=True, height=300) + chart_c = col2.container(border=True, height=500) chart_c.markdown("**Metrics📈**") self.chart_c = chart_c.empty() - hypothesis_status_c = col1.container(border=True, height=300) + hypothesis_status_c = col1.container(border=True, height=500) hypothesis_status_c.markdown("**Hypotheses🏅**") self.summary_c = hypothesis_status_c.empty() @@ -602,7 +605,7 @@ def __init__( ) self.hypothesis_decisions = defaultdict(bool) - self.current_hypothesis = None + self.hypotheses: list[Hypothesis] = [] self.results = [] @@ -614,12 +617,14 @@ def consume_msg(self, msg: Message): if isinstance(msg.content, dict): return if msg.tag.endswith("hypothesis generation"): - self.current_hypothesis = msg.content.hypothesis + self.hypotheses.append(msg.content) elif msg.tag.endswith("ef.feedback"): - self.hypothesis_decisions[self.current_hypothesis] = msg.content.decision + self.hypothesis_decisions[self.hypotheses[-1]] = msg.content.decision self.summary_c.markdown( "\n".join( - f"{id+1}. :green[{h}]\n" if d else f"{id+1}. {h}\n" + f"{id+1}. :green[{self.hypotheses[id].hypothesis}]\n\t>*{self.hypotheses[id].concise_reason}*" + if d + else f"{id+1}. {self.hypotheses[id].hypothesis}\n\t>*{self.hypotheses[id].concise_reason}*" for id, (h, d) in enumerate(self.hypothesis_decisions.items()) ) ) diff --git a/rdagent/scenarios/qlib/experiment/model_experiment.py b/rdagent/scenarios/qlib/experiment/model_experiment.py index 582f73c92..f1c99c8d2 100644 --- a/rdagent/scenarios/qlib/experiment/model_experiment.py +++ b/rdagent/scenarios/qlib/experiment/model_experiment.py @@ -40,8 +40,8 @@ def simulator(self) -> str: return prompt_dict["qlib_model_simulator"] @property - def rich_style_description(self)->str: - return ''' + def rich_style_description(self) -> str: + return """ ### Qlib Model Evolving Automatic R&D Demo #### Overview @@ -75,7 +75,7 @@ def rich_style_description(self)->str: To demonstrate the dynamic evolution of models through the Qlib platform, emphasizing how each iteration enhances the accuracy and reliability of the resulting models. - ''' + """ def get_scenario_all_desc(self) -> str: return f"""Background of the scenario: