diff --git a/rdagent/app/model_extraction_and_code/GeneralModel.py b/rdagent/app/model_extraction_and_code/GeneralModel.py
new file mode 100644
index 000000000..62795ace3
--- /dev/null
+++ b/rdagent/app/model_extraction_and_code/GeneralModel.py
@@ -0,0 +1,75 @@
+from pathlib import Path
+
+from rdagent.components.coder.model_coder.model import (
+    ModelExperiment,
+)
+from rdagent.core.prompts import Prompts
+from rdagent.core.scenario import Scenario
+
+prompt_dict = Prompts(file_path=Path(__file__).parent / "prompts.yaml")
+
+class GeneralModelScenario(Scenario):
+    @property
+    def background(self) -> str:
+        return prompt_dict["general_model_background"]
+
+    @property
+    def source_data(self) -> str:
+        raise NotImplementedError("source_data of GeneralModelScenario is not implemented")
+
+    @property
+    def output_format(self) -> str:
+        return prompt_dict["general_model_output_format"]
+
+    @property
+    def interface(self) -> str:
+        return prompt_dict["general_model_interface"]
+
+    @property
+    def simulator(self) -> str:
+        return prompt_dict["general_model_simulator"]
+    
+    @property
+    def rich_style_description(self)->str:
+        return '''
+# General Model Scenario
+
+## Overview
+
+This demo automates the extraction and iterative development of models from academic papers, ensuring functionality and correctness.
+
+### Scenario: Auto-Developing Model Code from Academic Papers
+
+#### Overview
+
+This scenario automates the development of PyTorch models by reading academic papers or other sources. It supports various data types, including tabular, time-series, and graph data. The primary workflow involves two main components: the Reader and the Coder.
+
+#### Workflow Components
+
+1. **Reader**
+    - Parses and extracts relevant model information from academic papers or sources, including architectures, parameters, and implementation details.
+    - Uses Large Language Models to convert content into a structured format for the Coder.
+
+2. **Evolving Coder**
+    - Translates structured information from the Reader into executable PyTorch code.
+    - Utilizes an evolving coding mechanism to ensure correct tensor shapes, verified with sample input tensors.
+    - Iteratively refines the code to align with source material specifications.
+
+#### Supported Data Types
+
+- **Tabular Data:** Structured data with rows and columns, such as spreadsheets or databases.
+- **Time-Series Data:** Sequential data points indexed in time order, useful for forecasting and temporal pattern recognition.
+- **Graph Data:** Data structured as nodes and edges, suitable for network analysis and relational tasks.
+
+        '''
+
+    def get_scenario_all_desc(self) -> str:
+        return f"""Background of the scenario:
+{self.background}
+The interface you should follow to write the runnable code:
+{self.interface}
+The output of your code should be in the format:
+{self.output_format}
+The simulator user can use to test your model:
+{self.simulator}
+"""
diff --git a/rdagent/app/model_extraction_and_code/model_extraction_and_implementation.py b/rdagent/app/model_extraction_and_code/model_extraction_and_implementation.py
index 1114cf13d..3fcf42dc2 100644
--- a/rdagent/app/model_extraction_and_code/model_extraction_and_implementation.py
+++ b/rdagent/app/model_extraction_and_code/model_extraction_and_implementation.py
@@ -1,21 +1,30 @@
 # %%
 from dotenv import load_dotenv
 
+load_dotenv(override=True)
+
 from rdagent.components.coder.model_coder.task_loader import (
     ModelExperimentLoaderFromPDFfiles,
 )
 from rdagent.scenarios.qlib.developer.model_coder import QlibModelCoSTEER
-from rdagent.scenarios.qlib.experiment.model_experiment import QlibModelScenario
-
+from rdagent.app.model_extraction_and_code.GeneralModel import GeneralModelScenario
+from rdagent.components.document_reader.document_reader import extract_first_page_screenshot_from_pdf
+from rdagent.log import rdagent_logger as logger
+import fire
 
-def extract_models_and_implement(report_file_path: str = "../test_doc") -> None:
-    scenario = QlibModelScenario()
-    exp = ModelExperimentLoaderFromPDFfiles().load(report_file_path)
-    exp = QlibModelCoSTEER(scenario).develop(exp)
+def extract_models_and_implement(report_file_path: str = "/home/v-xisenwang/RD-Agent/rdagent/app/model_extraction_and_code/test_doc1.pdf") -> None:
+    with logger.tag("r"):
+    # Save Relevant Images 
+        img = extract_first_page_screenshot_from_pdf(report_file_path)
+        logger.log_object(img, tag="pdf_image")
+        scenario = GeneralModelScenario()
+        logger.log_object(scenario, tag="scenario")
+    with logger.tag("d"):
+        exp = ModelExperimentLoaderFromPDFfiles().load(report_file_path)
+        logger.log_object(exp, tag="load_experiment")
+        exp = QlibModelCoSTEER(scenario).develop(exp)
+        logger.log_object(exp, tag="developed_experiment")
     return exp
 
-
-import fire
-
 if __name__ == "__main__":
     fire.Fire(extract_models_and_implement)
diff --git a/rdagent/app/model_extraction_and_code/prompts.yaml b/rdagent/app/model_extraction_and_code/prompts.yaml
new file mode 100644
index 000000000..ddd01e50c
--- /dev/null
+++ b/rdagent/app/model_extraction_and_code/prompts.yaml
@@ -0,0 +1,74 @@
+general_model_background: |-
+  The general model is a flexible and comprehensive framework designed to integrate factor-based, model-based, and graph-based approaches in quantitative investment. It allows users to define custom models that leverage various financial factors to predict the returns and risks of portfolios or single assets. These models are central to many advanced quantitative investment strategies and can be adapted to a wide range of use cases, from factor-based alpha generation to complex deep learning predictions.
+
+  Each general model incorporates the following components:
+  1. Name: The name of the model.
+  2. Description: A detailed description of the model.
+  3. Factors: The financial factors used as inputs, including their definitions and formulations.
+  4. Architecture: The structure of the machine learning, deep learning, or graph-based model.
+  5. Hyperparameters: The hyperparameters used in the model, such as learning rate, number of epochs, etc.
+  6. ModelType: The type of the model, "Tabular" for tabular data, "TimeSeries" for time series data, or "Graph" for graph data.
+  The general model should provide clear and detailed documentation of its factors, architecture, and hyperparameters. Each model should have a fixed architecture and hyperparameters to ensure reproducibility and consistency.
+
+general_model_interface: |-
+  Your python code should follow the interface to better interact with the user's system. It should be a pytorch model. 
+  Your code should contain several parts:
+  1. The import part: import the necessary libraries.
+  2. A class which is a sub-class of pytorch.nn.Module. This class should have an init function and a forward function which inputs a tensor and outputs a tensor.
+  3. Set a variable called "model_cls" to the class you defined.
+
+  The user will save your code into a python file called "model.py". Then the user imports model_cls in file "model.py" after setting the cwd into the directory:
+  ```python
+  from model import model_cls
+
+  So your python code should follow the pattern:
+
+  class XXXModel(torch.nn.Module):
+    ...
+  model_cls = XXXModel
+
+  The model has three types, "Tabular" for tabular data, "TimeSeries" for time series data, and "Graph" for graph data.
+
+  The input shape to a tabular model is (batch_size, num_features).
+  The input shape to a time series model is (batch_size, num_features, num_timesteps).
+  The input to a graph model are two tensors. 
+  node_feature: a tensor of shape (batch_size, num_nodes, num_features)
+  edge_index: a tensor of shape (2, num_edges) 
+  
+  The batch_size is a dynamic value which is determined by the input of the forward function.
+  
+  The output shape of the model should be (batch_size, 1).
+
+  The "num_features", "num_timesteps", and "num_nodes" are static and will be provided to the model through the init function.
+
+  User will initialize the tabular model with the following code:
+
+  model = model_cls(num_features=num_features)
+
+  User will initialize the time series model with the following code:
+
+  model = model_cls(num_features=num_features, num_timesteps=num_timesteps)
+
+  User will initialize the graph model with the following code:
+
+  model = model_cls(num_features=num_features, num_nodes=num_nodes)
+
+
+  No other parameters will be passed to the model, so give other parameters a default value or make them static.
+
+  When dealing with a time series model, remember to permute the input tensor since the input tensor is in the shape of (batch_size, num_features, num_timesteps) and a normal time series model is expecting the input tensor in the shape of (batch_size, num_timesteps, num_features).
+
+  Don't write any try-except block in your python code. The user will catch the exception message and provide the feedback to you. Also, don't write a main function in your python code. The user will call the forward method in the model_cls to get the output tensor.
+
+  Please note that your model should only use current features as input. The user will provide the input tensor to the model's forward function.
+
+general_model_output_format: |-
+  Your output should be a tensor with shape (batch_size, 1).
+  The output tensor should be saved in a file named "output.pth" in the same directory as your python file.
+  The user will evaluate the shape of the output tensor, so the tensor read from "output.pth" should be 8 numbers.
+
+general_model_simulator: |-
+  The models are not loaded and backtested. That said, pay attention to its architecture.
+
+
+  
\ No newline at end of file
diff --git a/rdagent/app/model_extraction_and_code/test_doc1.pdf b/rdagent/app/model_extraction_and_code/test_doc1.pdf
new file mode 100644
index 000000000..e8346d679
Binary files /dev/null and b/rdagent/app/model_extraction_and_code/test_doc1.pdf differ
diff --git a/rdagent/app/model_extraction_and_code/test_doc2.pdf b/rdagent/app/model_extraction_and_code/test_doc2.pdf
new file mode 100644
index 000000000..c3fccd8fc
Binary files /dev/null and b/rdagent/app/model_extraction_and_code/test_doc2.pdf differ
diff --git a/rdagent/app/model_extraction_and_code/test_doc3.pdf b/rdagent/app/model_extraction_and_code/test_doc3.pdf
new file mode 100644
index 000000000..f66de0547
Binary files /dev/null and b/rdagent/app/model_extraction_and_code/test_doc3.pdf differ
diff --git a/rdagent/app/model_extraction_and_code/test_doc4.pdf b/rdagent/app/model_extraction_and_code/test_doc4.pdf
new file mode 100644
index 000000000..e53adaf3f
Binary files /dev/null and b/rdagent/app/model_extraction_and_code/test_doc4.pdf differ
diff --git a/rdagent/app/model_extraction_and_code/test_doc5.pdf b/rdagent/app/model_extraction_and_code/test_doc5.pdf
new file mode 100644
index 000000000..bffc098a6
Binary files /dev/null and b/rdagent/app/model_extraction_and_code/test_doc5.pdf differ
diff --git a/rdagent/app/quant_factor_benchmark/analysis.py b/rdagent/app/quant_factor_benchmark/analysis.py
index 6167ab678..3164860ed 100644
--- a/rdagent/app/quant_factor_benchmark/analysis.py
+++ b/rdagent/app/quant_factor_benchmark/analysis.py
@@ -41,7 +41,6 @@ def process_results(self, results):
             processed_data = self.analyze_data(summarized_data)
             final_res[experiment] = processed_data.iloc[-1, :]
         return final_res
-
     def reformat_succ_rate(self, display_df):
         new_idx = []
         display_df = display_df[display_df.index.isin(self.index_map.keys())]
@@ -167,7 +166,6 @@ def plot_data(data, file_name):
         plt.title("Comparison of Different Methods")
         plt.savefig(file_name)
 
-
 if __name__ == "__main__":
     settings = BenchmarkSettings()
     benchmark = BenchmarkAnalyzer(settings)
diff --git a/rdagent/app/quant_factor_benchmark/eval.py b/rdagent/app/quant_factor_benchmark/eval.py
index 2f3cf2752..b46cf744d 100644
--- a/rdagent/app/quant_factor_benchmark/eval.py
+++ b/rdagent/app/quant_factor_benchmark/eval.py
@@ -15,6 +15,12 @@
     FactorTestCaseLoaderFromJsonFile,
 )
 
+from rdagent.core.utils import import_class
+from rdagent.core.scenario import Scenario
+from rdagent.scenarios.qlib.experiment.factor_experiment import QlibFactorScenario
+
+from pprint import pprint
+
 # 1.read the settings
 bs = BenchmarkSettings()
 
@@ -25,7 +31,6 @@
 
 scen: Scenario = import_class(PROP_SETTING.factor_scen)()
 generate_method = import_class(bs.bench_method_cls)(scen=scen)
-
 # 4.declare the eval method and pass the arguments.
 eval_method = FactorImplementEval(
     method=generate_method,
diff --git a/rdagent/components/benchmark/eval_method.py b/rdagent/components/benchmark/eval_method.py
index c94a0e3e4..7817595a1 100644
--- a/rdagent/components/benchmark/eval_method.py
+++ b/rdagent/components/benchmark/eval_method.py
@@ -30,6 +30,11 @@
 ]
 
 
+EVAL_RES = Dict[
+    str,
+    List[Tuple[FactorEvaluator, Union[object, RunnerException]]],
+]
+
 class TestCase:
     def __init__(
         self,
diff --git a/rdagent/components/coder/model_coder/task_loader.py b/rdagent/components/coder/model_coder/task_loader.py
index ad047c1a4..0e0a06cef 100644
--- a/rdagent/components/coder/model_coder/task_loader.py
+++ b/rdagent/components/coder/model_coder/task_loader.py
@@ -107,6 +107,7 @@ def load(self, model_dict: dict) -> list:
                 formulation=model_data["formulation"],
                 architecture=model_data["architecture"],
                 variables=model_data["variables"],
+                hyperparameters=model_data["hyperparameters"],
                 model_type=model_data["model_type"],
             )
             task_l.append(task)
diff --git a/rdagent/core/proposal.py b/rdagent/core/proposal.py
index c1e598eda..a6c05b8d9 100644
--- a/rdagent/core/proposal.py
+++ b/rdagent/core/proposal.py
@@ -32,7 +32,9 @@ def __init__(self, hypothesis: str, reason: str, concise_reason: str) -> None:
 
     def __str__(self) -> str:
         return f"""Hypothesis: {self.hypothesis}
-Reason: {self.reason}"""
+                Reason: {self.reason}
+                Concise Reason & Knowledge: {self.concise_reason}
+                """
 
     # source: data_ana | model_nan = None
 
@@ -110,7 +112,7 @@ def gen(self, trace: Trace) -> Hypothesis:
 
 class Hypothesis2Experiment(ABC, Generic[ASpecificExp]):
     """
-    [Abstract description => concrete description] => Code implement
+    [Abstract description => concrete description] => Code implementation Card 
     """
 
     @abstractmethod
diff --git a/rdagent/scenarios/qlib/developer/feedback.py b/rdagent/scenarios/qlib/developer/feedback.py
index 7e62071f9..6711a87f2 100644
--- a/rdagent/scenarios/qlib/developer/feedback.py
+++ b/rdagent/scenarios/qlib/developer/feedback.py
@@ -147,6 +147,7 @@ def generate_feedback(self, exp: Experiment, hypothesis: Hypothesis, trace: Trac
                 context=context,
                 last_hypothesis=SOTA_hypothesis,
                 last_task=SOTA_experiment.sub_tasks[0].get_task_information() if SOTA_hypothesis else None,
+                last_code=SOTA_experiment.sub_workspace_list[0].code_dict.get("model.py") if SOTA_hypothesis else None,
                 last_result=SOTA_experiment.result if SOTA_hypothesis else None,
                 hypothesis=hypothesis,
                 exp=exp,
diff --git a/rdagent/scenarios/qlib/experiment/model_experiment.py b/rdagent/scenarios/qlib/experiment/model_experiment.py
index f1c99c8d2..582f73c92 100644
--- a/rdagent/scenarios/qlib/experiment/model_experiment.py
+++ b/rdagent/scenarios/qlib/experiment/model_experiment.py
@@ -40,8 +40,8 @@ def simulator(self) -> str:
         return prompt_dict["qlib_model_simulator"]
 
     @property
-    def rich_style_description(self) -> str:
-        return """
+    def rich_style_description(self)->str:
+        return '''
 ### Qlib Model Evolving Automatic R&D Demo
 
 #### Overview
@@ -75,7 +75,7 @@ def rich_style_description(self) -> str:
 
 To demonstrate the dynamic evolution of models through the Qlib platform, emphasizing how each iteration enhances the accuracy and reliability of the resulting models.
 
-        """
+        '''
 
     def get_scenario_all_desc(self) -> str:
         return f"""Background of the scenario:
diff --git a/rdagent/scenarios/qlib/prompts.yaml b/rdagent/scenarios/qlib/prompts.yaml
index eec734025..63a7db8d6 100644
--- a/rdagent/scenarios/qlib/prompts.yaml
+++ b/rdagent/scenarios/qlib/prompts.yaml
@@ -1,6 +1,7 @@
 hypothesis_and_feedback: |-
   {% for hypothesis, experiment, feedback in trace.hist %}
   Hypothesis {{ loop.index }}: {{ hypothesis }}
+  Corresponding Code (that leads to the difference in performance): {{experiment.sub_workspace_list[0].code_dict.get("model.py")}}
   Observation on the result with the hypothesis: {{ feedback.observations }}
   Feedback on the original hypothesis:  {{ feedback.hypothesis_evaluation }}
   New Feedback for Context (For you to agree or improve upon):  {{ feedback.new_hypothesis }}
@@ -11,9 +12,9 @@ hypothesis_and_feedback: |-
 hypothesis_output_format: |-
   The output should follow JSON format. The schema is as follows:
   {
-    "hypothesis": "The new hypothesis generated based on the information provided.",
+    "hypothesis": "The new hypothesis generated based on the information provided.", Note that this should focus on model architecture, not training process or feature engineering or anything else
     "reason": "The reason why you generate this hypothesis.",
-    "concise_reason": One line summary that focuses on the justification for the change that leads to the hypothesis (like a part of a knowledge that we are building),
+    "concise_reason": Two line summary. First line focuses on the a concise justification for the change. 2nd Line learns from first line and previous experiences (hypothesis & experiments & code & feedbacks) to generalise a knowledge statement (use tend to/because/if/generally/etc. ). 
   }
 
 model_hypothesis_specification: |-
@@ -251,6 +252,7 @@ model_feedback_generation:
     Last Round Information:
     Hypothesis: {{last_hypothesis.hypothesis}}
     Task: {{last_task}}
+    Code Implemented: {{last_code}}
     Result: {{last_result}}
     {% else %}
     This is the first round. No previous information available. As long as the performance is not too negative (eg.ICIR is greater than 0), treat it as successful. Do not set the threshold too high.  
@@ -258,6 +260,8 @@ model_feedback_generation:
     
     Now let's come to this round. You will receive the result and you will evaluate if the performance increases or decreases. 
     Hypothesis: {{hypothesis.hypothesis}}
+    Experiment Setup: {{exp.sub_tasks[0]}}
+    Code Implemented: {{exp.sub_workspace_list[0].code_dict.get("model.py")}}
     Relevant Reasoning: {{hypothesis.reason}}
     Result: {{exp.result}}