Skip to content

Commit

Permalink
New Framework for idea proposal and implementation on RD-Agent (#34)
Browse files Browse the repository at this point in the history
* Commit init framework

* Co-authored-by: Yuante Li (FESCO Adecco Human Resources) <[email protected]>
Co-authored-by: XianBW <[email protected]>

* add an import

* refine the whole framework

* benchmark related framework

* fix black and isort errors

* move requirements to folder

* fix black again

---------

Co-authored-by: Young <[email protected]>
Co-authored-by: xuyang1 <[email protected]>
  • Loading branch information
3 people authored Jun 28, 2024
1 parent da05927 commit 5ff0b66
Show file tree
Hide file tree
Showing 63 changed files with 649 additions and 1,115 deletions.
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,7 @@ toml-sort:
$(PIPRUN) toml-sort --check pyproject.toml

# Check lint with all linters.
lint: mypy ruff toml-sort
lint: black isort mypy ruff toml-sort

# Run pre-commit with autofix against all files.
pre-commit:
Expand Down
2 changes: 1 addition & 1 deletion constraints/3.8.txt
Original file line number Diff line number Diff line change
Expand Up @@ -151,4 +151,4 @@ typing_extensions==4.9.0
tzdata==2023.4
urllib3==2.1.0
yarl==1.9.4
zipp==3.17.0
zipp==3.17.0
23 changes: 10 additions & 13 deletions rdagent/app/CI/run.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,16 @@
from typing import Any, Literal

import tree_sitter_python
from rich import print
from rich.panel import Panel
from rich.progress import Progress, SpinnerColumn, TimeElapsedColumn
from rich.prompt import Prompt
from rich.rule import Rule
from rich.syntax import Syntax
from rich.table import Table
from rich.text import Text
from tree_sitter import Language, Node, Parser

from rdagent.core.evolving_framework import (
Evaluator,
EvoAgent,
Expand All @@ -24,15 +34,6 @@
)
from rdagent.core.prompts import Prompts
from rdagent.oai.llm_utils import APIBackend
from rich import print
from rich.panel import Panel
from rich.progress import Progress, SpinnerColumn, TimeElapsedColumn
from rich.prompt import Prompt
from rich.rule import Rule
from rich.syntax import Syntax
from rich.table import Table
from rich.text import Text
from tree_sitter import Language, Node, Parser

py_parser = Parser(Language(tree_sitter_python.language()))
CI_prompts = Prompts(file_path=Path(__file__).parent / "prompts.yaml")
Expand Down Expand Up @@ -355,7 +356,6 @@ def evaluate(self, evo: Repo, **kwargs: Any) -> CIFeedback: # noqa: ARG002


class MypyEvaluator(Evaluator):

def __init__(self, command: str | None = None) -> None:
if command is None:
self.command = "mypy . --pretty --no-error-summary --show-column-numbers"
Expand Down Expand Up @@ -411,12 +411,10 @@ def evaluate(self, evo: Repo, **kwargs: Any) -> CIFeedback: # noqa: ARG002


class MultiEvaluator(Evaluator):

def __init__(self, *evaluators: Evaluator) -> None:
self.evaluators = evaluators

def evaluate(self, evo: Repo, **kwargs: Any) -> CIFeedback:

all_errors = defaultdict(list)
for evaluator in self.evaluators:
feedback: CIFeedback = evaluator.evaluate(evo, **kwargs)
Expand All @@ -438,7 +436,6 @@ def evolve( # noqa: C901, PLR0912, PLR0915
knowledge_l: list[Knowledge] | None = None, # noqa: ARG002
**kwargs: Any, # noqa: ARG002
) -> Repo:

@dataclass
class CodeFixGroup:
start_line: int
Expand Down
5 changes: 5 additions & 0 deletions rdagent/app/data mining/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@



# TODO
If we have more efforts, include more scenario.
Original file line number Diff line number Diff line change
@@ -1,14 +1,20 @@
# %%
from dotenv import load_dotenv
from rdagent.factor_implementation.CoSTEER import CoSTEERFG
from rdagent.factor_implementation.task_loader.pdf_loader import FactorImplementationTaskLoaderFromPDFfiles

from rdagent.scenarios.qlib.factor_task_implementation import (
COSTEERFG_QUANT_FACTOR_IMPLEMENTATION,
)
from rdagent.scenarios.qlib.factor_task_loader.pdf_loader import (
FactorImplementationTaskLoaderFromPDFfiles,
)

assert load_dotenv()


def extract_factors_and_implement(report_file_path: str) -> None:
factor_tasks = FactorImplementationTaskLoaderFromPDFfiles().load(report_file_path)
implementation_result = CoSTEERFG().generate(factor_tasks)
implementation_result = COSTEERFG_QUANT_FACTOR_IMPLEMENTATION().generate(factor_tasks)
# Qlib to run the implementation
return implementation_result


Expand Down
2 changes: 1 addition & 1 deletion rdagent/app/model_implementation/eval.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
# TODO: Align it with the benchmark framework after @wenjun's refine the evaluation part.
# Currently, we just handcraft a workflow for fast evaluation.

mil = ModelImpLoader(DIRNAME.parent.parent / "model_implementation" / "benchmark" / "gt_code")
mil = ModelImpLoader(DIRNAME.parent.parent / "model_implementation" / "benchmark" / "gt_code")

mie = ModelImpValEval()
# Evaluation:
Expand Down
12 changes: 12 additions & 0 deletions rdagent/app/model_proposal/conf.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
from pydantic_settings import BaseSettings


class ModelPropSetting(BaseSettings):
""""""

scen: str # a.b.c:XXXClass
# TODO: inital keywards should be included in the settings
...


MODEL_PROP_SETTING = ModelPropSetting()
36 changes: 36 additions & 0 deletions rdagent/app/model_proposal/run.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
"""
TODO: Model Structure RD-Loop
TODO: move the following code to a new class: Model_RD_Agent
"""

# import_from
from rdagent.app.model_proposal.conf import MODEL_PROP_SETTING
from rdagent.core.implementation import TaskGenerator
from rdagent.core.proposal import Belief2Task, BeliefSet, Imp2Feedback, Trace

# load_from_cls_uri


scen = load_from_cls_uri(MODEL_PROP_SETTING.scen)()

belief_gen = load_from_cls_uri(MODEL_PROP_SETTING.belief_gen)(scen)

belief2task: Belief2Task = load_from_cls_uri(MODEL_PROP_SETTING.belief2task)()

task_gen: TaskGenerator = load_from_cls_uri(MODEL_PROP_SETTING.task_gen)(scen) # for implementation

imp2feedback: Imp2Feedback = load_from_cls_uri(MODEL_PROP_SETTING.imp2feedback)(scen) # for implementation


iter_n = MODEL_PROP_SETTING.iter_n

trace = Trace()

belief_set = BeliefSet()
for _ in range(iter_n):
belief = belief_gen.gen(trace)
task = belief2task.convert(belief)
imp = task_gen.gen(task)
imp.execute()
feedback = imp2feedback.summarize(imp)
trace.hist.append((belief, feedback))
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
from rdagent.benchmark.conf import BenchmarkSettings
from rdagent.components.benchmark.conf import BenchmarkSettings
from rdagent.components.benchmark.eval_method import FactorImplementEval
from rdagent.core.utils import import_class
from rdagent.benchmark.eval_method import FactorImplementEval
from rdagent.factor_implementation.task_loader.json_loader import FactorTestCaseLoaderFromJsonFile
from rdagent.scenarios.qlib.factor_task_loader.json_loader import (
FactorTestCaseLoaderFromJsonFile,
)

# 1.read the settings
bs = BenchmarkSettings()
Expand Down
Original file line number Diff line number Diff line change
@@ -1,15 +1,16 @@
from dotenv import load_dotenv

load_dotenv(verbose=True, override=True)
from dataclasses import field
from pathlib import Path
from typing import Optional
from typing import Optional

from pydantic_settings import BaseSettings

DIRNAME = Path(__file__).absolute().resolve().parent

class BenchmarkSettings(BaseSettings):

class BenchmarkSettings(BaseSettings):
ground_truth_dir: Path = DIRNAME / "ground_truth"

bench_data_path: Path = DIRNAME / "example.json"
Expand All @@ -22,4 +23,4 @@ class BenchmarkSettings(BaseSettings):
default_factory=dict,
) # extra kwargs for the method to be tested except the task list

bench_result_path: Path = DIRNAME / "result"
bench_result_path: Path = DIRNAME / "result"
Original file line number Diff line number Diff line change
@@ -1,27 +1,29 @@
from collections import defaultdict
from pathlib import Path
from typing import List, Tuple, Union

from tqdm import tqdm
from collections import defaultdict
from rdagent.factor_implementation.share_modules.factor_implementation_config import FACTOR_IMPLEMENT_SETTINGS
from rdagent.core.exception import ImplementRunException
from rdagent.core.task import (
TaskImplementation,
TestCase,
)
from rdagent.factor_implementation.evolving.evaluators import (

from rdagent.components.task_implementation.factor_implementation.evolving.evaluators import (
FactorImplementationCorrelationEvaluator,
FactorImplementationEvaluator,
FactorImplementationIndexEvaluator,
FactorImplementationIndexFormatEvaluator,
FactorImplementationMissingValuesEvaluator,
FactorImplementationRowCountEvaluator,
FactorImplementationSingleColumnEvaluator,
FactorImplementationValuesEvaluator,
FactorImplementationEvaluator,
)
from rdagent.components.task_implementation.factor_implementation.evolving.factor import (
FileBasedFactorImplementation,
)
from rdagent.components.task_implementation.factor_implementation.share_modules.factor_implementation_config import (
FACTOR_IMPLEMENT_SETTINGS,
)
from rdagent.core.exception import ImplementRunException
from rdagent.core.implementation import TaskGenerator
from rdagent.core.task import TaskImplementation, TestCase
from rdagent.core.utils import multiprocessing_wrapper
from rdagent.factor_implementation.evolving.factor import FileBasedFactorImplementation


class BaseEval:
Expand Down Expand Up @@ -109,14 +111,14 @@ def __init__(
**kwargs,
):
online_evaluator_l = [
FactorImplementationSingleColumnEvaluator(),
FactorImplementationIndexFormatEvaluator(),
FactorImplementationRowCountEvaluator(),
FactorImplementationIndexEvaluator(),
FactorImplementationMissingValuesEvaluator(),
FactorImplementationValuesEvaluator(),
FactorImplementationCorrelationEvaluator(hard_check=False),
]
FactorImplementationSingleColumnEvaluator(),
FactorImplementationIndexFormatEvaluator(),
FactorImplementationRowCountEvaluator(),
FactorImplementationIndexEvaluator(),
FactorImplementationMissingValuesEvaluator(),
FactorImplementationValuesEvaluator(),
FactorImplementationCorrelationEvaluator(hard_check=False),
]
super().__init__(online_evaluator_l, test_cases, method, *args, **kwargs)
self.test_round = test_round

Expand Down
File renamed without changes.
File renamed without changes.
Empty file.
Empty file.
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,13 @@
from pathlib import Path
from typing import Any, NoReturn

from rdagent.components.knowledge_management.vector_base import (
KnowledgeMetaData,
PDVectorBase,
VectorBase,
cosine,
)
from rdagent.oai.llm_utils import APIBackend
from rdagent.knowledge_management.vector_base import KnowledgeMetaData, PDVectorBase, VectorBase, cosine

Node = KnowledgeMetaData

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,8 @@
import pandas as pd
from scipy.spatial.distance import cosine

from rdagent.oai.llm_utils import APIBackend
from rdagent.core.log import RDAgentLog
from rdagent.oai.llm_utils import APIBackend


class KnowledgeMetaData:
Expand Down
Original file line number Diff line number Diff line change
@@ -1,23 +1,29 @@
import pickle
from pathlib import Path
from typing import List
from rdagent.core.implementation import TaskGenerator
from rdagent.core.task import TaskImplementation
from rdagent.factor_implementation.evolving.knowledge_management import FactorImplementationKnowledgeBaseV1
from rdagent.factor_implementation.evolving.factor import FactorImplementTask, FactorEvovlingItem
from rdagent.factor_implementation.evolving.knowledge_management import (

from rdagent.components.task_implementation.factor_implementation.evolving.evaluators import (
FactorImplementationEvaluatorV1,
FactorImplementationsMultiEvaluator,
)
from rdagent.components.task_implementation.factor_implementation.evolving.evolving_strategy import (
FactorEvolvingStrategyWithGraph,
)
from rdagent.components.task_implementation.factor_implementation.evolving.factor import (
FactorEvovlingItem,
FactorImplementTask,
)
from rdagent.components.task_implementation.factor_implementation.evolving.knowledge_management import (
FactorImplementationGraphKnowledgeBase,
FactorImplementationGraphRAGStrategy,
FactorImplementationKnowledgeBaseV1,
)
from rdagent.factor_implementation.evolving.evolving_strategy import FactorEvolvingStrategyWithGraph
from rdagent.factor_implementation.evolving.evaluators import (
FactorImplementationsMultiEvaluator,
FactorImplementationEvaluatorV1,
)
from rdagent.core.evolving_agent import RAGEvoAgent
from rdagent.factor_implementation.share_modules.factor_implementation_config import (
from rdagent.components.task_implementation.factor_implementation.share_modules.factor_implementation_config import (
FACTOR_IMPLEMENT_SETTINGS,
)
from rdagent.core.evolving_agent import RAGEvoAgent
from rdagent.core.implementation import TaskGenerator
from rdagent.core.task import TaskImplementation


class CoSTEERFG(TaskGenerator):
Expand Down Expand Up @@ -47,7 +53,6 @@ def __init__(
self.evolving_version = 2

def load_or_init_knowledge_base(self, former_knowledge_base_path: Path = None, component_init_list: list = []):

if former_knowledge_base_path is not None and former_knowledge_base_path.exists():
factor_knowledge_base = pickle.load(open(former_knowledge_base_path, "rb"))
if self.evolving_version == 1 and not isinstance(
Expand Down
Original file line number Diff line number Diff line change
@@ -1,26 +1,27 @@
import json
import re

from abc import abstractmethod
from typing import Tuple
from pathlib import Path
from typing import List, Tuple

import pandas as pd
from jinja2 import Template

from rdagent.oai.llm_utils import APIBackend
from rdagent.core.log import RDAgentLog
from rdagent.factor_implementation.evolving.evolving_strategy import FactorImplementTask, FactorEvovlingItem
from rdagent.core.task import (
TaskImplementation,
from rdagent.components.task_implementation.factor_implementation.evolving.evolving_strategy import (
FactorEvovlingItem,
FactorImplementTask,
)
from typing import List, Tuple
from rdagent.core.evolving_framework import QueriedKnowledge, Feedback
from rdagent.components.task_implementation.factor_implementation.share_modules.factor_implementation_config import (
FACTOR_IMPLEMENT_SETTINGS,
)
from rdagent.core.conf import RD_AGENT_SETTINGS
from rdagent.core.evaluation import Evaluator
from rdagent.core.evolving_framework import Feedback, QueriedKnowledge
from rdagent.core.log import RDAgentLog
from rdagent.core.prompts import Prompts
from rdagent.core.conf import RD_AGENT_SETTINGS
from rdagent.factor_implementation.share_modules.factor_implementation_config import FACTOR_IMPLEMENT_SETTINGS
from rdagent.core.task import TaskImplementation
from rdagent.core.utils import multiprocessing_wrapper
from pathlib import Path
from rdagent.oai.llm_utils import APIBackend

evaluate_prompts = Prompts(file_path=Path(__file__).parent.parent / "prompts.yaml")

Expand Down
Loading

0 comments on commit 5ff0b66

Please sign in to comment.