doc: add benchmark.rst (#153)

* add benchmark png * Create benchmark.rst * Update benchmark.rst * Draft * rename file * add autopydantic_settings * remove ignored file * add example * sort imports * sort imports * format code with black --------- Co-authored-by: Young <[email protected]>
microsoft · Aug 6, 2024 · d5fdecc · d5fdecc
1 parent f8f1445
commit d5fdecc
Show file tree

Hide file tree

Showing 6 changed files with 135 additions and 10 deletions.
diff --git a/docs/_static/benchmark.png b/docs/_static/benchmark.png
diff --git a/docs/conf.py b/docs/conf.py
@@ -15,9 +15,7 @@
 # -- General configuration ---------------------------------------------------
 # https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration
 
-extensions = [
-    "sphinx.ext.autodoc",
-]
+extensions = ["sphinx.ext.autodoc", "sphinxcontrib.autodoc_pydantic"]
 
 autodoc_member_order = "bysource"
 

diff --git a/docs/index.rst b/docs/index.rst
@@ -14,6 +14,7 @@ Welcome to RDAgent's documentation!
    installation_and_configuration
    scens/catalog
    project_framework_introduction
+   research/research
    development
    api_reference
    policy

diff --git a/docs/research/benchmark.rst b/docs/research/benchmark.rst
@@ -0,0 +1,99 @@
+==============================
+Benchmark
+==============================
+
+Introduction
+=============
+
+
+Benchmarking the capabilities of the R&D is a very important research problem of the research area.
+
+Currently we are continuously exploring how to benchmark them.
+
+The current benchmarks are listed in this page
+
+
+Development Capability Benchmarking
+===================================
+
+
+Benchmark is used to evaluate the effectiveness of factors with fixed data.
+
+It mainly includes the following steps:
+
+1. :ref:`read and prepare the eval_data <data>`
+
+2. :ref:`declare the method to be tested and pass the arguments <config>`
+
+3. :ref:`declare the eval method and pass the arguments <config>`
+
+4. :ref:`run the eval <run>` 
+
+5. :ref:`save and show the result <show>` 
+
+Configuration 
+-------------
+.. _config:
+
+.. autopydantic_settings:: rdagent.components.benchmark.conf.BenchmarkSettings
+
+Example
+++++++++
+.. _example:
+
+The default value for ``bench_test_round`` is 10, and it will take about 2 hours to run 10 rounds.
+To modify it from ``10`` to ``2`` you can adjust this by adding environment variables in the .env file as shown below.
+
+.. code-block:: Properties
+
+      BENCHMARK_BENCH_TEST_ROUND=1
+
+Data Format
+-------------
+.. _data:
+
+The sample data in ``bench_data_path`` is a dictionary where each key represents a factor name. 
+
+The value associated with each key is factor data containing the following information:
+
+- **description**: A textual description of the factor.
+- **formulation**: A LaTeX formula representing the model's formulation.
+- **variables**: A dictionary of variables involved in the factor.
+- **Category**: The category or classification of the factor.
+- **Difficulty**: The difficulty level of implementing or understanding the factor.
+- **gt_code**: A piece of code associated with the factor.
+
+Here is the example of this data format:
+
+.. literalinclude:: ../../rdagent/components/benchmark/example.json
+   :language: json
+
+Run Benchmark
+-------------
+.. _run:
+
+Start benchmark after finishing the :doc:`../installation_and_configuration`.
+
+.. code-block:: Properties
+
+      python rdagent/app/quant_factor_benchmark/eval.py
+
+
+
+Once completed, a pkl file will be generated, and its path will be printed on the last line of the console.
+
+Show Result
+-------------
+.. _show:
+
+The ``analysis.py`` script is used to read data from pkl and convert it to an image.
+Modify the python code in ``rdagent/app/quant_factor_benchmark/analysis.py`` to specify the path to the pkl file and the output path for the png file.
+
+.. code-block:: Properties
+
+      python rdagent/app/quant_factor_benchmark/analysis.py
+
+A png file will be saved to the designated path as shown below.
+
+.. image:: ../_static/benchmark.png
+
diff --git a/docs/research/research.rst b/docs/research/research.rst
@@ -0,0 +1,15 @@
+===========
+Research
+===========
+
+
+
+.. TODO: xiao will add this
+
+
+.. toctree::
+   :maxdepth: 1
+   :caption: Doctree:
+   :hidden:
+
+   benchmark.rst
diff --git a/rdagent/components/benchmark/conf.py b/rdagent/components/benchmark/conf.py
@@ -1,29 +1,41 @@
-from dotenv import load_dotenv
-
-load_dotenv(verbose=True, override=True)
 from dataclasses import field
 from pathlib import Path
 from typing import Optional
 
+from dotenv import load_dotenv
 from pydantic_settings import BaseSettings
 
-DIRNAME = Path(__file__).absolute().resolve().parent
+# Load environment variables
+load_dotenv(verbose=True, override=True)
+
+
+DIRNAME = Path("./")
 
 
 class BenchmarkSettings(BaseSettings):
     class Config:
-        env_prefix = "BENCHMARK_"  # Use BENCHMARK_ as prefix for environment variables
+        env_prefix = "BENCHMARK_"
+        """Use `BENCHMARK_` as prefix for environment variables"""
 
     ground_truth_dir: Path = DIRNAME / "ground_truth"
+    """ground truth dir"""
 
     bench_data_path: Path = DIRNAME / "example.json"
+    """data for benchmark"""
 
     bench_test_round: int = 10
-    bench_test_case_n: Optional[int] = None  # how many test cases to run; If not given, all test cases will be run
+    """how many rounds to run, each round may cost 10 minutes"""
+
+    bench_test_case_n: Optional[int] = None
+    """how many test cases to run; If not given, all test cases will be run"""
 
     bench_method_cls: str = "rdagent.components.coder.factor_coder.CoSTEER.FactorCoSTEER"
+    """method to be used for test cases"""
+
     bench_method_extra_kwargs: dict = field(
         default_factory=dict,
-    )  # extra kwargs for the method to be tested except the task list
+    )
+    """extra kwargs for the method to be tested except the task list"""
 
     bench_result_path: Path = DIRNAME / "result"
+    """result save path"""