From a4443caaf885578a87e6a0aa03e36c10565b34da Mon Sep 17 00:00:00 2001 From: Taozhi Wang Date: Fri, 26 Jul 2024 06:28:29 +0000 Subject: [PATCH 01/11] Openai api & eval debug --- rdagent/app/quant_factor_benchmark/eval.py | 4 +-- rdagent/components/benchmark/eval_method.py | 4 +-- rdagent/oai/llm_utils.py | 36 ++++++++++++++++----- 3 files changed, 32 insertions(+), 12 deletions(-) diff --git a/rdagent/app/quant_factor_benchmark/eval.py b/rdagent/app/quant_factor_benchmark/eval.py index 2f3cf2752..c2bbb65d7 100644 --- a/rdagent/app/quant_factor_benchmark/eval.py +++ b/rdagent/app/quant_factor_benchmark/eval.py @@ -4,7 +4,7 @@ from pathlib import Path from pprint import pprint -from rdagent.app.qlib_rd_loop.conf import PROP_SETTING +from rdagent.app.qlib_rd_loop.conf import FACTOR_PROP_SETTING from rdagent.components.benchmark.conf import BenchmarkSettings from rdagent.components.benchmark.eval_method import FactorImplementEval from rdagent.core.scenario import Scenario @@ -23,7 +23,7 @@ # 3.declare the method to be tested and pass the arguments. -scen: Scenario = import_class(PROP_SETTING.factor_scen)() +scen: Scenario = import_class(FACTOR_PROP_SETTING.scen)() generate_method = import_class(bs.bench_method_cls)(scen=scen) # 4.declare the eval method and pass the arguments. diff --git a/rdagent/components/benchmark/eval_method.py b/rdagent/components/benchmark/eval_method.py index b1b4632d3..c94a0e3e4 100644 --- a/rdagent/components/benchmark/eval_method.py +++ b/rdagent/components/benchmark/eval_method.py @@ -19,14 +19,14 @@ from rdagent.components.coder.factor_coder.factor import FactorFBWorkspace from rdagent.core.conf import RD_AGENT_SETTINGS from rdagent.core.developer import Developer -from rdagent.core.exception import CoderException, RunnerException +from rdagent.core.exception import CoderError from rdagent.core.experiment import Task, Workspace from rdagent.core.scenario import Scenario from rdagent.core.utils import multiprocessing_wrapper EVAL_RES = Dict[ str, - List[Tuple[FactorEvaluator, Union[object, RunnerException]]], + List[Tuple[FactorEvaluator, Union[object, CoderError]]], ] diff --git a/rdagent/oai/llm_utils.py b/rdagent/oai/llm_utils.py index 75e41928d..e238ca2fb 100644 --- a/rdagent/oai/llm_utils.py +++ b/rdagent/oai/llm_utils.py @@ -640,14 +640,34 @@ def _create_chat_completion_inner_function( # noqa: C901, PLR0912, PLR0915 presence_penalty=presence_penalty, ) else: - response = self.chat_client.chat.completions.create( - model=self.chat_model, - messages=messages, - stream=self.chat_stream, - seed=self.chat_seed, - frequency_penalty=frequency_penalty, - presence_penalty=presence_penalty, - ) + if json_mode: + if add_json_in_prompt: + for message in messages[::-1]: + message["content"] = message["content"] + "\nPlease respond in json format." + if message["role"] == "system": + break + response = self.chat_client.chat.completions.create( + model=self.chat_model, + messages=messages, + max_tokens=max_tokens, + temperature=temperature, + response_format={"type": "json_object"}, + stream=self.chat_stream, + seed=self.chat_seed, + frequency_penalty=frequency_penalty, + presence_penalty=presence_penalty, + ) + else: + response = self.chat_client.chat.completions.create( + model=self.chat_model, + messages=messages, + max_tokens=max_tokens, + temperature=temperature, + stream=self.chat_stream, + seed=self.chat_seed, + frequency_penalty=frequency_penalty, + presence_penalty=presence_penalty, + ) if self.chat_stream: resp = "" # TODO: with logger.config(stream=self.chat_stream): and add a `stream_start` flag to add timestamp for first message. From fafea0f7ee3052f691500620307c4121d3c4f1d4 Mon Sep 17 00:00:00 2001 From: Young Date: Fri, 26 Jul 2024 06:41:39 +0000 Subject: [PATCH 02/11] Only Draft!! --- rdagent/oai/llm_utils.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/rdagent/oai/llm_utils.py b/rdagent/oai/llm_utils.py index e238ca2fb..51bd72e8b 100644 --- a/rdagent/oai/llm_utils.py +++ b/rdagent/oai/llm_utils.py @@ -639,6 +639,19 @@ def _create_chat_completion_inner_function( # noqa: C901, PLR0912, PLR0915 frequency_penalty=frequency_penalty, presence_penalty=presence_penalty, ) + kwargs = dict( + model=self.chat_model, + messages=messages, + max_tokens=max_tokens, + temperature=temperature, + stream=self.chat_stream, + seed=self.chat_seed, + frequency_penalty=frequency_penalty, + presence_penalty=presence_penalty, + ) + if json_mode: + kwargs["response_format"]={"type": "json_object"} + response = self.chat_client.chat.completions.create(**kwargs) else: if json_mode: if add_json_in_prompt: From aa922cd12584aaf2bae87ef9d20dcfe8b6ac745b Mon Sep 17 00:00:00 2001 From: Taozhi Wang Date: Fri, 26 Jul 2024 08:27:37 +0000 Subject: [PATCH 03/11] CI issue --- rdagent/oai/llm_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rdagent/oai/llm_utils.py b/rdagent/oai/llm_utils.py index 51bd72e8b..a5f8047f3 100644 --- a/rdagent/oai/llm_utils.py +++ b/rdagent/oai/llm_utils.py @@ -650,7 +650,7 @@ def _create_chat_completion_inner_function( # noqa: C901, PLR0912, PLR0915 presence_penalty=presence_penalty, ) if json_mode: - kwargs["response_format"]={"type": "json_object"} + kwargs["response_format"] = {"type": "json_object"} response = self.chat_client.chat.completions.create(**kwargs) else: if json_mode: From d17d0f08edfe3b8a5b7c54b82d574088dfdfed76 Mon Sep 17 00:00:00 2001 From: Taozhi Wang Date: Fri, 26 Jul 2024 08:46:17 +0000 Subject: [PATCH 04/11] Updates --- rdagent/oai/llm_utils.py | 90 +++++++++------------------------------- 1 file changed, 19 insertions(+), 71 deletions(-) diff --git a/rdagent/oai/llm_utils.py b/rdagent/oai/llm_utils.py index a5f8047f3..a302b83c7 100644 --- a/rdagent/oai/llm_utils.py +++ b/rdagent/oai/llm_utils.py @@ -610,77 +610,25 @@ def _create_chat_completion_inner_function( # noqa: C901, PLR0912, PLR0915 if self.cfg.log_llm_chat_content: logger.info(f"{LogColors.CYAN}Response:{resp}{LogColors.END}", tag="llm_messages") else: - if self.use_azure: - if json_mode: - if add_json_in_prompt: - for message in messages[::-1]: - message["content"] = message["content"] + "\nPlease respond in json format." - if message["role"] == "system": - break - response = self.chat_client.chat.completions.create( - model=self.chat_model, - messages=messages, - max_tokens=max_tokens, - temperature=temperature, - response_format={"type": "json_object"}, - stream=self.chat_stream, - seed=self.chat_seed, - frequency_penalty=frequency_penalty, - presence_penalty=presence_penalty, - ) - else: - response = self.chat_client.chat.completions.create( - model=self.chat_model, - messages=messages, - max_tokens=max_tokens, - temperature=temperature, - stream=self.chat_stream, - seed=self.chat_seed, - frequency_penalty=frequency_penalty, - presence_penalty=presence_penalty, - ) - kwargs = dict( - model=self.chat_model, - messages=messages, - max_tokens=max_tokens, - temperature=temperature, - stream=self.chat_stream, - seed=self.chat_seed, - frequency_penalty=frequency_penalty, - presence_penalty=presence_penalty, - ) - if json_mode: - kwargs["response_format"] = {"type": "json_object"} - response = self.chat_client.chat.completions.create(**kwargs) - else: - if json_mode: - if add_json_in_prompt: - for message in messages[::-1]: - message["content"] = message["content"] + "\nPlease respond in json format." - if message["role"] == "system": - break - response = self.chat_client.chat.completions.create( - model=self.chat_model, - messages=messages, - max_tokens=max_tokens, - temperature=temperature, - response_format={"type": "json_object"}, - stream=self.chat_stream, - seed=self.chat_seed, - frequency_penalty=frequency_penalty, - presence_penalty=presence_penalty, - ) - else: - response = self.chat_client.chat.completions.create( - model=self.chat_model, - messages=messages, - max_tokens=max_tokens, - temperature=temperature, - stream=self.chat_stream, - seed=self.chat_seed, - frequency_penalty=frequency_penalty, - presence_penalty=presence_penalty, - ) + kwargs = dict( + model=self.chat_model, + messages=messages, + max_tokens=max_tokens, + temperature=temperature, + stream=self.chat_stream, + seed=self.chat_seed, + frequency_penalty=frequency_penalty, + presence_penalty=presence_penalty, + ) + if json_mode: + if add_json_in_prompt: + for message in messages[::-1]: + message["content"] = message["content"] + "\nPlease respond in json format." + if message["role"] == "system": + break + kwargs["response_format"] = {"type": "json_object"} + response = self.chat_client.chat.completions.create(**kwargs) + if self.chat_stream: resp = "" # TODO: with logger.config(stream=self.chat_stream): and add a `stream_start` flag to add timestamp for first message. From c2a2e924061d410591239c5deacd418e63c11a46 Mon Sep 17 00:00:00 2001 From: Taozhi Wang Date: Mon, 29 Jul 2024 06:00:26 +0000 Subject: [PATCH 05/11] add docs --- docs/installation.rst | 33 +++++++++++++++++++++++++++++---- 1 file changed, 29 insertions(+), 4 deletions(-) diff --git a/docs/installation.rst b/docs/installation.rst index d1adfabb7..b226bf250 100644 --- a/docs/installation.rst +++ b/docs/installation.rst @@ -12,20 +12,45 @@ For different scenarios Configuration ============= -Quick configuration +To run the application, please create a `.env` file in the root directory of the project and add the following environment variables according to your requirements. +OpenAI API +------------ + +If you are using the OpenAI API, here are the related environment variables that you need to set: + + .. code-block:: Properties + + EMBEDDING_OPENAI_API_KEY= + EMBEDDING_OPENAI_MODEL=text-embedding-3-small + CHAT_MODEL=gpt-4-turbo Azure OpenAI ------------ +The following environment variables are standard configuration options for the user using the OpenAI API. + + .. code-block:: Properties + + USE_AZURE=True + + EMBEDDING_OPENAI_API_KEY= + EMBEDDING_OPENAI_MODEL=text-embedding-3-small + EMBEDDING_AZURE_API_BASE= # The base URL for the Azure OpenAI API. + EMBEDDING_AZURE_API_VERSION = # The version of the Azure OpenAI API. + + CHAT_MODEL=gpt-4-turbo + CHAT_AZURE_API_VERSION = # The version of the Azure OpenAI API. +Use Azure Token Provider +------------------------ -USE_AZURE_TOKEN_PROVIDER -~~~~~~~~~~~~~~~~~~~~~~~~ +If you are using the Azure token provider, you need to set the `USE_AZURE_TOKEN_PROVIDER` environment variable to `True`. then +use the environment variables provided in the [Azure Configuration section](#azure-configuration). -### ☁️ Azure Configuration +☁️ Azure Configuration - Install Azure CLI: ```sh From 085fdfae9d21bcd60bcaef0227ca48b7f75d7350 Mon Sep 17 00:00:00 2001 From: Taozhi Wang Date: Mon, 29 Jul 2024 09:05:55 +0000 Subject: [PATCH 06/11] fix rst link --- docs/installation.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/installation.rst b/docs/installation.rst index b226bf250..61e89303e 100644 --- a/docs/installation.rst +++ b/docs/installation.rst @@ -48,7 +48,7 @@ Use Azure Token Provider ------------------------ If you are using the Azure token provider, you need to set the `USE_AZURE_TOKEN_PROVIDER` environment variable to `True`. then -use the environment variables provided in the [Azure Configuration section](#azure-configuration). +use the environment variables provided in the `Azure Configuration section `_. ☁️ Azure Configuration - Install Azure CLI: From 6fbdb99e990d13b791076dc00b211f797b8221cb Mon Sep 17 00:00:00 2001 From: Taozhi Wang Date: Tue, 30 Jul 2024 09:30:06 +0000 Subject: [PATCH 07/11] Enrionment variables modification --- .env.example | 15 +++------------ docs/installation.rst | 10 +++++++--- rdagent/core/conf.py | 1 + rdagent/oai/llm_utils.py | 12 ++++++++---- 4 files changed, 19 insertions(+), 19 deletions(-) diff --git a/.env.example b/.env.example index 8b504ad1c..94a7ff1b0 100644 --- a/.env.example +++ b/.env.example @@ -1,27 +1,18 @@ # Global configs: -USE_AZURE=True MAX_RETRY=10 RETRY_WAIT_SECONDS=20 DUMP_CHAT_CACHE=True USE_CHAT_CACHE=True DUMP_EMBEDDING_CACHE=True USE_EMBEDDING_CACHE=True -LOG_LLM_CHAT_CONTENT=False -CHAT_FREQUENCY_PENALTY=0.0 -CHAT_PRESENCE_PENALTY=0.0 -LOG_TRACE_PATH=log_traces + +# api key +OPENAI_API_KEY=your_api_key # embedding model configs: -EMBEDDING_OPENAI_API_KEY=your_api_key -EMBEDDING_AZURE_API_BASE=your_api_base -EMBEDDING_AZURE_API_VERSION=your_api_version EMBEDDING_MODEL=text-embedding-3-small - # chat model configs: -CHAT_OPENAI_API_KEY=your_api_key # 5c -CHAT_AZURE_API_BASE=your_api_base -CHAT_AZURE_API_VERSION=your_api_version CHAT_MODEL=your_model_version CHAT_MAX_TOKENS=3000 CHAT_TEMPERATURE=0.7 diff --git a/docs/installation.rst b/docs/installation.rst index 61e89303e..658bd6cbc 100644 --- a/docs/installation.rst +++ b/docs/installation.rst @@ -12,19 +12,23 @@ For different scenarios Configuration ============= -To run the application, please create a `.env` file in the root directory of the project and add the following environment variables according to your requirements. +To run the application, please create a `.env` file in the root directory of the project and add environment variables according to your requirements. +The standard configuration options for the user using the OpenAI API are provided in the `.env.example` file. + +Here are some other configuration options that you can use: OpenAI API ------------ -If you are using the OpenAI API, here are the related environment variables that you need to set: +You can use different OpenAI API keys for embedding model and chat model. .. code-block:: Properties EMBEDDING_OPENAI_API_KEY= EMBEDDING_OPENAI_MODEL=text-embedding-3-small + CHAT_OPENAI_API_KEY= CHAT_MODEL=gpt-4-turbo Azure OpenAI @@ -36,7 +40,7 @@ The following environment variables are standard configuration options for the u USE_AZURE=True - EMBEDDING_OPENAI_API_KEY= + OPENAI_API_KEY= EMBEDDING_OPENAI_MODEL=text-embedding-3-small EMBEDDING_AZURE_API_BASE= # The base URL for the Azure OpenAI API. EMBEDDING_AZURE_API_VERSION = # The version of the Azure OpenAI API. diff --git a/rdagent/core/conf.py b/rdagent/core/conf.py index ea152e382..91e3d5e82 100644 --- a/rdagent/core/conf.py +++ b/rdagent/core/conf.py @@ -34,6 +34,7 @@ class RDAgentSettings(BaseSettings): max_past_message_include: int = 10 # Chat configs + openai_api_key: str = "" chat_openai_api_key: str = "" chat_azure_api_base: str = "" chat_azure_api_version: str = "" diff --git a/rdagent/oai/llm_utils.py b/rdagent/oai/llm_utils.py index a302b83c7..fdd30a19e 100644 --- a/rdagent/oai/llm_utils.py +++ b/rdagent/oai/llm_utils.py @@ -298,7 +298,14 @@ def __init__( # noqa: C901, PLR0912, PLR0915 self.use_azure_token_provider = self.cfg.use_azure_token_provider self.managed_identity_client_id = self.cfg.managed_identity_client_id - self.chat_api_key = self.cfg.chat_openai_api_key if chat_api_key is None else chat_api_key + if self.cfg.openai_api_key: + self.chat_api_key = self.cfg.openai_api_key + self.embedding_api_key = self.cfg.openai_api_key + else: + self.chat_api_key = self.cfg.chat_openai_api_key if chat_api_key is None else chat_api_key + self.embedding_api_key = ( + self.cfg.embedding_openai_api_key if embedding_api_key is None else embedding_api_key + ) self.chat_model = self.cfg.chat_model if chat_model is None else chat_model self.encoder = tiktoken.encoding_for_model(self.chat_model) self.chat_api_base = self.cfg.chat_azure_api_base if chat_api_base is None else chat_api_base @@ -306,9 +313,6 @@ def __init__( # noqa: C901, PLR0912, PLR0915 self.chat_stream = self.cfg.chat_stream self.chat_seed = self.cfg.chat_seed - self.embedding_api_key = ( - self.cfg.embedding_openai_api_key if embedding_api_key is None else embedding_api_key - ) self.embedding_model = self.cfg.embedding_model if embedding_model is None else embedding_model self.embedding_api_base = ( self.cfg.embedding_azure_api_base if embedding_api_base is None else embedding_api_base From 716c482ef0f3f272f3c29ffc5cdec11a30078bb5 Mon Sep 17 00:00:00 2001 From: Taozhi Wang Date: Tue, 30 Jul 2024 09:40:13 +0000 Subject: [PATCH 08/11] Readme & simplify example .env --- .env.example | 4 ---- README.md | 4 ++-- 2 files changed, 2 insertions(+), 6 deletions(-) diff --git a/.env.example b/.env.example index 94a7ff1b0..9a80a884b 100644 --- a/.env.example +++ b/.env.example @@ -1,10 +1,6 @@ # Global configs: MAX_RETRY=10 RETRY_WAIT_SECONDS=20 -DUMP_CHAT_CACHE=True -USE_CHAT_CACHE=True -DUMP_EMBEDDING_CACHE=True -USE_EMBEDDING_CACHE=True # api key OPENAI_API_KEY=your_api_key diff --git a/README.md b/README.md index 595fc7bd4..4b4d833e0 100644 --- a/README.md +++ b/README.md @@ -60,8 +60,8 @@ TODO: use docker in quick start intead. ### ⚙️ Environment Configuration - Place the `.env` file in the same directory as the `.env.example` file. - - TOOD: please refer to ... for the detailed explanation of the `.env` - - TODO: simplify `.env.example` only keep OpenAI or Azure Azure OpenAI + - The `.env.example` file contains the environment variables required for users using the OpenAI API + - please refer to [Configuration](docs/build/html/installation.html#azure-openai) for the detailed explanation of the `.env` - Export each variable in the `.env` file: ```sh export $(grep -v '^#' .env | xargs) From e7854dcd057867f6c303a22dfe807bf44a138763 Mon Sep 17 00:00:00 2001 From: Taozhi Wang Date: Tue, 30 Jul 2024 09:45:20 +0000 Subject: [PATCH 09/11] fix rst file --- docs/installation.rst | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/docs/installation.rst b/docs/installation.rst index 658bd6cbc..25725ad36 100644 --- a/docs/installation.rst +++ b/docs/installation.rst @@ -26,7 +26,7 @@ You can use different OpenAI API keys for embedding model and chat model. .. code-block:: Properties EMBEDDING_OPENAI_API_KEY= - EMBEDDING_OPENAI_MODEL=text-embedding-3-small + EMBEDDING_MODEL=text-embedding-3-small CHAT_OPENAI_API_KEY= CHAT_MODEL=gpt-4-turbo @@ -41,7 +41,8 @@ The following environment variables are standard configuration options for the u USE_AZURE=True OPENAI_API_KEY= - EMBEDDING_OPENAI_MODEL=text-embedding-3-small + + EMBEDDING_MODEL=text-embedding-3-small EMBEDDING_AZURE_API_BASE= # The base URL for the Azure OpenAI API. EMBEDDING_AZURE_API_VERSION = # The version of the Azure OpenAI API. From f0300d3362a4f11aa96b24bad9fba52d8d376d59 Mon Sep 17 00:00:00 2001 From: you-n-g Date: Tue, 30 Jul 2024 17:54:57 +0800 Subject: [PATCH 10/11] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 4b4d833e0..5efccee52 100644 --- a/README.md +++ b/README.md @@ -60,7 +60,7 @@ TODO: use docker in quick start intead. ### ⚙️ Environment Configuration - Place the `.env` file in the same directory as the `.env.example` file. - - The `.env.example` file contains the environment variables required for users using the OpenAI API + - The `.env.example` file contains the environment variables required for users using the OpenAI API (Please note that `.env.example` is an example file. `.env` is the one that will be finally used.) - please refer to [Configuration](docs/build/html/installation.html#azure-openai) for the detailed explanation of the `.env` - Export each variable in the `.env` file: ```sh From bdd4c2c3fda36ba2b95bfcda470437be7f191a88 Mon Sep 17 00:00:00 2001 From: you-n-g Date: Tue, 30 Jul 2024 17:59:47 +0800 Subject: [PATCH 11/11] Update rdagent/core/conf.py --- rdagent/core/conf.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rdagent/core/conf.py b/rdagent/core/conf.py index 91e3d5e82..975cad47b 100644 --- a/rdagent/core/conf.py +++ b/rdagent/core/conf.py @@ -34,7 +34,7 @@ class RDAgentSettings(BaseSettings): max_past_message_include: int = 10 # Chat configs - openai_api_key: str = "" + openai_api_key: str = "" # TODO: simplify the key design. chat_openai_api_key: str = "" chat_azure_api_base: str = "" chat_azure_api_version: str = ""