diff --git a/docs/_static/benchmark.png b/docs/_static/benchmark.png new file mode 100644 index 00000000..428be9c7 Binary files /dev/null and b/docs/_static/benchmark.png differ diff --git a/docs/conf.py b/docs/conf.py index 65fb9388..6222a046 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -15,9 +15,7 @@ # -- General configuration --------------------------------------------------- # https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration -extensions = [ - "sphinx.ext.autodoc", -] +extensions = ["sphinx.ext.autodoc", "sphinxcontrib.autodoc_pydantic"] autodoc_member_order = "bysource" diff --git a/docs/index.rst b/docs/index.rst index a0bc7649..715546af 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -14,6 +14,7 @@ Welcome to RDAgent's documentation! installation_and_configuration scens/catalog project_framework_introduction + research/research development api_reference policy diff --git a/docs/research/benchmark.rst b/docs/research/benchmark.rst new file mode 100644 index 00000000..6fe1923e --- /dev/null +++ b/docs/research/benchmark.rst @@ -0,0 +1,99 @@ +============================== +Benchmark +============================== + +Introduction +============= + + +Benchmarking the capabilities of the R&D is a very important research problem of the research area. + +Currently we are continuously exploring how to benchmark them. + +The current benchmarks are listed in this page + + +Development Capability Benchmarking +=================================== + + +Benchmark is used to evaluate the effectiveness of factors with fixed data. + +It mainly includes the following steps: + +1. :ref:`read and prepare the eval_data ` + +2. :ref:`declare the method to be tested and pass the arguments ` + +3. :ref:`declare the eval method and pass the arguments ` + +4. :ref:`run the eval ` + +5. :ref:`save and show the result ` + +Configuration +------------- +.. _config: + +.. autopydantic_settings:: rdagent.components.benchmark.conf.BenchmarkSettings + +Example +++++++++ +.. _example: + +The default value for ``bench_test_round`` is 10, and it will take about 2 hours to run 10 rounds. +To modify it from ``10`` to ``2`` you can adjust this by adding environment variables in the .env file as shown below. + +.. code-block:: Properties + + BENCHMARK_BENCH_TEST_ROUND=1 + +Data Format +------------- +.. _data: + +The sample data in ``bench_data_path`` is a dictionary where each key represents a factor name. + +The value associated with each key is factor data containing the following information: + +- **description**: A textual description of the factor. +- **formulation**: A LaTeX formula representing the model's formulation. +- **variables**: A dictionary of variables involved in the factor. +- **Category**: The category or classification of the factor. +- **Difficulty**: The difficulty level of implementing or understanding the factor. +- **gt_code**: A piece of code associated with the factor. + +Here is the example of this data format: + +.. literalinclude:: ../../rdagent/components/benchmark/example.json + :language: json + +Run Benchmark +------------- +.. _run: + +Start benchmark after finishing the :doc:`../installation_and_configuration`. + +.. code-block:: Properties + + python rdagent/app/quant_factor_benchmark/eval.py + + + +Once completed, a pkl file will be generated, and its path will be printed on the last line of the console. + +Show Result +------------- +.. _show: + +The ``analysis.py`` script is used to read data from pkl and convert it to an image. +Modify the python code in ``rdagent/app/quant_factor_benchmark/analysis.py`` to specify the path to the pkl file and the output path for the png file. + +.. code-block:: Properties + + python rdagent/app/quant_factor_benchmark/analysis.py + +A png file will be saved to the designated path as shown below. + +.. image:: ../_static/benchmark.png + diff --git a/docs/research/research.rst b/docs/research/research.rst new file mode 100644 index 00000000..b27165a9 --- /dev/null +++ b/docs/research/research.rst @@ -0,0 +1,15 @@ +=========== +Research +=========== + + + +.. TODO: xiao will add this + + +.. toctree:: + :maxdepth: 1 + :caption: Doctree: + :hidden: + + benchmark.rst diff --git a/rdagent/components/benchmark/conf.py b/rdagent/components/benchmark/conf.py index a05bbe39..095212a7 100644 --- a/rdagent/components/benchmark/conf.py +++ b/rdagent/components/benchmark/conf.py @@ -1,29 +1,41 @@ -from dotenv import load_dotenv - -load_dotenv(verbose=True, override=True) from dataclasses import field from pathlib import Path from typing import Optional +from dotenv import load_dotenv from pydantic_settings import BaseSettings -DIRNAME = Path(__file__).absolute().resolve().parent +# Load environment variables +load_dotenv(verbose=True, override=True) + + +DIRNAME = Path("./") class BenchmarkSettings(BaseSettings): class Config: - env_prefix = "BENCHMARK_" # Use BENCHMARK_ as prefix for environment variables + env_prefix = "BENCHMARK_" + """Use `BENCHMARK_` as prefix for environment variables""" ground_truth_dir: Path = DIRNAME / "ground_truth" + """ground truth dir""" bench_data_path: Path = DIRNAME / "example.json" + """data for benchmark""" bench_test_round: int = 10 - bench_test_case_n: Optional[int] = None # how many test cases to run; If not given, all test cases will be run + """how many rounds to run, each round may cost 10 minutes""" + + bench_test_case_n: Optional[int] = None + """how many test cases to run; If not given, all test cases will be run""" bench_method_cls: str = "rdagent.components.coder.factor_coder.CoSTEER.FactorCoSTEER" + """method to be used for test cases""" + bench_method_extra_kwargs: dict = field( default_factory=dict, - ) # extra kwargs for the method to be tested except the task list + ) + """extra kwargs for the method to be tested except the task list""" bench_result_path: Path = DIRNAME / "result" + """result save path"""