From 9eb403c646e7eab199c92db1805f59351c6f0423 Mon Sep 17 00:00:00 2001 From: bowen xian Date: Thu, 4 Jul 2024 12:09:25 +0000 Subject: [PATCH 01/14] remove ruff comment in log.py --- rdagent/core/log.py | 48 --------------------------------------------- 1 file changed, 48 deletions(-) diff --git a/rdagent/core/log.py b/rdagent/core/log.py index 9c6e2b74..e789863c 100644 --- a/rdagent/core/log.py +++ b/rdagent/core/log.py @@ -43,27 +43,9 @@ def render(self, text: str, color: str = "", style: str = "") -> str: colors = self.get_all_colors() # Perhaps color and font should be distinguished here. if color and color in colors: - # Changes to accommodate ruff checks. - # Original code: - # raise ValueError(f"color should be in: {colors} but now is: {color}") - # Description of the problem: - # TRY003 Avoid specifying long messages outside the exception class - # EM102 Exception must not use an f-string literal, assign to variable first - # References: - # https://docs.astral.sh/ruff/rules/raise-vanilla-args/ - # https://docs.astral.sh/ruff/rules/f-string-in-exception/ error_message = f"color should be in: {colors} but now is: {color}" raise ValueError(error_message) if style and style in colors: - # Changes to accommodate ruff checks. - # Original code: - # raise ValueError(f"style should be in: {colors} but now is: {style}") - # Description of the problem: - # TRY003 Avoid specifying long messages outside the exception class - # EM102 Exception must not use an f-string literal, assign to variable first - # References: - # https://docs.astral.sh/ruff/rules/raise-vanilla-args/ - # https://docs.astral.sh/ruff/rules/f-string-in-exception/ error_message = f"style should be in: {colors} but now is: {style}" raise ValueError(error_message) @@ -94,18 +76,6 @@ def __setstate__(self, _: str) -> None: def plain_info(self, *args: Sequence) -> None: for arg in args: - # Changes to accommodate ruff checks. - # Original code: - # self.logger.info( - # f""" - # {LogColors.YELLOW}{LogColors.BOLD} - # Info:{LogColors.END}{LogColors.WHITE}{arg}{LogColors.END} - # """, - # ) - # Description of the problem: - # G004 Logging statement uses f-string - # References: - # https://docs.astral.sh/ruff/rules/logging-f-string/ info = f""" {LogColors.YELLOW}{LogColors.BOLD} Info:{LogColors.END}{LogColors.WHITE}{arg}{LogColors.END} @@ -114,28 +84,10 @@ def plain_info(self, *args: Sequence) -> None: def warning(self, *args: Sequence) -> None: for arg in args: - # Changes to accommodate ruff checks. - # Original code: - # self.logger.warning( - # f"{LogColors.BLUE}{LogColors.BOLD}Warning:{LogColors.END}{arg}", - # ) - # Description of the problem: - # G004 Logging statement uses f-string - # References: - # https://docs.astral.sh/ruff/rules/logging-f-string/ info = f"{LogColors.BLUE}{LogColors.BOLD}Warning:{LogColors.END}{arg}" self.logger.warning(info) def error(self, *args: Sequence) -> None: for arg in args: - # Changes to accommodate ruff checks. - # Original code: - # self.logger.error( - # f"{LogColors.RED}{LogColors.BOLD}Error:{LogColors.END}{arg}", - # ) - # Description of the problem: - # G004 Logging statement uses f-string - # References: - # https://docs.astral.sh/ruff/rules/logging-f-string/ info = f"{LogColors.RED}{LogColors.BOLD}Error:{LogColors.END}{arg}" self.logger.error(info) From cce68ad81f7d487ea3aa9fe74efa2512c3bb260c Mon Sep 17 00:00:00 2001 From: bowen xian Date: Tue, 9 Jul 2024 04:04:42 +0000 Subject: [PATCH 02/14] change log framework and fix llm_utils.py's logs --- rdagent/core/conf.py | 5 +- rdagent/core/log.py | 220 +++++++++++++++++++++++++++++++++------ rdagent/oai/llm_utils.py | 69 ++++++------ 3 files changed, 226 insertions(+), 68 deletions(-) diff --git a/rdagent/core/conf.py b/rdagent/core/conf.py index 120cb8f8..a926d54c 100644 --- a/rdagent/core/conf.py +++ b/rdagent/core/conf.py @@ -16,6 +16,10 @@ class RDAgentSettings(BaseSettings): # TODO: (xiao) I think most of the config should be in oai.config + # Log configs + log_trace_path: str | None = None + log_llm_chat_content: bool = True + use_azure: bool = True use_azure_token_provider: bool = False managed_identity_client_id: str | None = None @@ -28,7 +32,6 @@ class RDAgentSettings(BaseSettings): prompt_cache_path: str = str(Path.cwd() / "prompt_cache.db") session_cache_folder_location: str = str(Path.cwd() / "session_cache_folder/") max_past_message_include: int = 10 - log_llm_chat_content: bool = True # Chat configs chat_openai_api_key: str = "" diff --git a/rdagent/core/log.py b/rdagent/core/log.py index e789863c..5b2e5371 100644 --- a/rdagent/core/log.py +++ b/rdagent/core/log.py @@ -1,11 +1,138 @@ from __future__ import annotations -from typing import TYPE_CHECKING, Sequence +import re +import sys +import pickle +import json +import inspect +from typing import TYPE_CHECKING, Sequence, Literal + +if TYPE_CHECKING: + from loguru import Logger, Message, Record from loguru import logger +from abc import abstractmethod +from datetime import datetime, timezone +from pathlib import Path +from functools import partial -if TYPE_CHECKING: - from loguru import Logger +from rdagent.core.conf import RD_AGENT_SETTINGS +from rdagent.core.utils import SingletonBaseClass + + +def get_caller_info(): + # Get the current stack information + stack = inspect.stack() + # The second element is usually the caller's information + caller_info = stack[2] + frame = caller_info[0] + info = { + 'line': caller_info.lineno, + 'name': frame.f_globals['__name__'], # Get the module name from the frame's globals + } + return info + + +class Storage: + """ + Basic storage to support saving objects; + + # Usage: + + The storage has mainly two kind of users: + - The logging end: you can choose any of the following method to use the object + - We can use it directly with the native logging storage + - We can use it with other logging tools; For example, serve as a handler for loggers + - The view end: + - Mainly for the subclass of `logging.base.View` + - It should provide two kind of ways to provide content + - offline content provision. + - online content preovision. + """ + + @abstractmethod + def log(self, obj: object, name: str = "", **kwargs: dict) -> str | Path | None: + """ + + Parameters + ---------- + obj : object + The object for logging. + name : str + The name of the object. For example "a.b.c" + We may log a lot of objects to a same name + """ + ... + + +class View: + """ + Motivation: + + Display the content in the storage + """ + + +class FileStorage(Storage): + """ + The info are logginged to the file systems + + TODO: describe the storage format + """ + + def __init__(self, path: str = "./log/") -> None: + self.path = Path(path) + self.path.mkdir(parents=True, exist_ok=True) + + def log(self, + obj: object, + name: str = "", + save_type: Literal["json", "text", "pkl", "short-text"] = "short-text", + timestamp: datetime | None = None, + split_name: bool = True, + ) -> Path: + if timestamp is None: + timestamp = datetime.now(timezone.utc) + else: + timestamp = timestamp.astimezone(timezone.utc) + + cur_p = self.path + if split_name: + uri_l = name.split(".") + for u in uri_l: + cur_p = cur_p / u + else: + cur_p = cur_p / name + cur_p.mkdir(parents=True, exist_ok=True) + + path = cur_p / f"{timestamp.strftime('%Y-%m-%d_%H-%M-%S-%f')}.log" + + if save_type == "json": + path = path.with_suffix(".json") + with path.open("w") as f: + try: + json.dump(obj, f) + except TypeError: + json.dump(json.loads(str(obj)), f) + return path + elif save_type == "pkl": + path = path.with_suffix(".pkl") + with path.open("wb") as f: + pickle.dump(obj, f) + return path + elif save_type == "text": + obj = str(obj) + with path.open("w") as f: + f.write(obj) + return path + else: + obj = str(obj).strip() + if obj == "": + return + path = cur_p / "common_logs.log" + with path.open("a") as f: + f.write(f"{timestamp.isoformat()}: {obj}\n\n") # add a new line to separate logs + return path class LogColors: @@ -54,40 +181,67 @@ def render(self, text: str, color: str = "", style: str = "") -> str: return f"{style}{text}{self.END}" -class RDAgentLog: - # logger.add(loguru_handler, level="INFO") # you can add use storage as a loguru handler +def loguru2storage_handler(storage: Storage, record: Message) -> None: + msg = f"{record.record['level']} | {record.record['name']}:{record.record['line']} - {RDAgentLog.remove_ansi_codes(record.record['message'])}" + storage.log(msg, timestamp=record.record["time"], save_type="short-text") + + +class RDAgentLog(SingletonBaseClass): + + def __init__(self, log_trace_path: str | None = RD_AGENT_SETTINGS.log_trace_path) -> None: + if log_trace_path is None: + timestamp = datetime.now(timezone.utc).strftime("%Y-%m-%d_%H-%M-%S-%f") + log_trace_path: Path = Path.cwd() / "log" / timestamp + log_trace_path.mkdir(parents=True, exist_ok=True) + + self.storage = FileStorage(log_trace_path) + + # add handler to save log to storage + logger.add(partial(loguru2storage_handler, self.storage)) + + self.log_stream = self.LogStreamContextManager(self.storage) + + @staticmethod + def remove_ansi_codes(s: str) -> str: + ansi_escape = re.compile(r'\x1B\[[0-?]*[ -/]*[@-~]') + return ansi_escape.sub('', s) + + class LogStreamContextManager: + def __init__(self, storage: Storage) -> None: + self.captured_logs = [] + self.storage = storage + + def capture(self, message: Message) -> None: + self.captured_logs.append(message.record["message"]) + + def __enter__(self): + logger.remove() + logger.add(sys.stderr, format=lambda x: x["message"]) + logger.add(self.capture) - def __init__(self) -> None: - self.logger: Logger = logger + def __exit__(self, exc_type, exc_value, traceback) -> None: + logger.info('\n') + logger.remove() + logger.add(partial(loguru2storage_handler, self.storage)) + logger.info("[stream log] " + "".join(self.captured_logs)) + logger.add(sys.stderr) - def info(self, *args: Sequence, plain: bool = False) -> None: - if plain: - return self.plain_info(*args) - for arg in args: - info = f"{LogColors.WHITE}{arg}{LogColors.END}" - self.logger.info(info) - return None + def log_objects(self, *objs: Sequence[object]) -> None: + caller_info = get_caller_info() + for obj in objs: + logp = self.storage.log(obj, name=f"{type(obj).__module__}.{type(obj).__name__}", save_type="pkl", split_name=False) - def __getstate__(self) -> dict: - return {} + logger.patch(lambda r: r.update(caller_info)).info(f"Logging object in {logp.absolute()}") - def __setstate__(self, _: str) -> None: - self.logger = logger + def info(self, msg: str) -> None: + caller_info = get_caller_info() + logger.patch(lambda r: r.update(caller_info)).info(msg) - def plain_info(self, *args: Sequence) -> None: - for arg in args: - info = f""" - {LogColors.YELLOW}{LogColors.BOLD} - Info:{LogColors.END}{LogColors.WHITE}{arg}{LogColors.END} - """ - self.logger.info(info) + def warning(self, msg: str) -> None: + caller_info = get_caller_info() + logger.patch(lambda r: r.update(caller_info)).warning(msg) - def warning(self, *args: Sequence) -> None: - for arg in args: - info = f"{LogColors.BLUE}{LogColors.BOLD}Warning:{LogColors.END}{arg}" - self.logger.warning(info) + def error(self, msg: str) -> None: + caller_info = get_caller_info() + logger.patch(lambda r: r.update(caller_info)).error(msg) - def error(self, *args: Sequence) -> None: - for arg in args: - info = f"{LogColors.RED}{LogColors.BOLD}Error:{LogColors.END}{arg}" - self.logger.error(info) diff --git a/rdagent/oai/llm_utils.py b/rdagent/oai/llm_utils.py index 72ab564c..2c5abc56 100644 --- a/rdagent/oai/llm_utils.py +++ b/rdagent/oai/llm_utils.py @@ -24,6 +24,7 @@ DEFAULT_QLIB_DOT_PATH = Path("./") +logger: RDAgentLog = RDAgentLog() def md5_hash(input_string: str) -> str: hash_md5 = hashlib.md5(usedforsecurity=False) @@ -35,17 +36,17 @@ def md5_hash(input_string: str) -> str: try: from azure.identity import DefaultAzureCredential, get_bearer_token_provider except ImportError: - RDAgentLog().warning("azure.identity is not installed.") + logger.warning("azure.identity is not installed.") try: import openai except ImportError: - RDAgentLog().warning("openai is not installed.") + logger.warning("openai is not installed.") try: from llama import Llama except ImportError: - RDAgentLog().warning("llama is not installed.") + logger.warning("llama is not installed.") class ConvManager: @@ -150,11 +151,11 @@ def __init__(self) -> None: self.session_cache_location = Path(self.cfg.session_cache_folder_location) self.cache = {} if not self.session_cache_location.exists(): - RDAgentLog().warning(f"Directory {self.session_cache_location} does not exist.") + logger.warning(f"Directory {self.session_cache_location} does not exist.") self.session_cache_location.mkdir(parents=True, exist_ok=True) json_files = [f for f in self.session_cache_location.iterdir() if f.suffix == ".json"] if not json_files: - RDAgentLog().info(f"No JSON files found in {self.session_cache_location}.") + logger.info(f"No JSON files found in {self.session_cache_location}.") for file_path in json_files: conversation_id = file_path.stem with file_path.open("r") as f: @@ -478,8 +479,8 @@ def _try_create_chat_completion_or_embedding( if chat_completion: return self._create_chat_completion_auto_continue(**kwargs) except openai.BadRequestError as e: # noqa: PERF203 - RDAgentLog().warning(e) - RDAgentLog().warning(f"Retrying {i+1}th time...") + logger.warning(e) + logger.warning(f"Retrying {i+1}th time...") if "'messages' must contain the word 'json' in some form" in e.message: kwargs["add_json_in_prompt"] = True elif embedding and "maximum context length" in e.message: @@ -487,8 +488,8 @@ def _try_create_chat_completion_or_embedding( content[: len(content) // 2] for content in kwargs.get("input_content_list", []) ] except Exception as e: # noqa: BLE001 - RDAgentLog().warning(e) - RDAgentLog().warning(f"Retrying {i+1}th time...") + logger.warning(e) + logger.warning(f"Retrying {i+1}th time...") time.sleep(self.retry_wait_seconds) error_message = f"Failed to create chat completion after {max_retry} retries." raise RuntimeError(error_message) @@ -526,7 +527,7 @@ def _create_embedding_inner_function( self.cache.embedding_set(content_to_embedding_dict) return [content_to_embedding_dict[content] for content in input_content_list] - def _build_messages(self, messages: list[dict]) -> str: + def _build_log_messages(self, messages: list[dict]) -> str: log_messages = "" for m in messages: log_messages += ( @@ -537,16 +538,12 @@ def _build_messages(self, messages: list[dict]) -> str: ) return log_messages - def log_messages(self, messages: list[dict]) -> None: - if self.cfg.log_llm_chat_content: - RDAgentLog().info(self._build_messages(messages)) - def log_response(self, response: str | None = None, *, stream: bool = False) -> None: if self.cfg.log_llm_chat_content: if stream: - RDAgentLog().info(f"\n{LogColors.CYAN}Response:{LogColors.END}") + logger.info(f"\n{LogColors.CYAN}Response:{LogColors.END}") else: - RDAgentLog().info(f"\n{LogColors.CYAN}Response:{response}{LogColors.END}") + logger.info(f"\n{LogColors.CYAN}Response:{response}{LogColors.END}") def _create_chat_completion_inner_function( # noqa: C901, PLR0912, PLR0915 self, @@ -560,7 +557,8 @@ def _create_chat_completion_inner_function( # noqa: C901, PLR0912, PLR0915 json_mode: bool = False, add_json_in_prompt: bool = False, ) -> str: - self.log_messages(messages) + if self.cfg.log_llm_chat_content: + logger.info(self._build_log_messages(messages)) # TODO: fail to use loguru adaptor due to stream response input_content_json = json.dumps(messages) input_content_json = ( @@ -588,7 +586,8 @@ def _create_chat_completion_inner_function( # noqa: C901, PLR0912, PLR0915 temperature=temperature, ) resp = response[0]["generation"]["content"] - self.log_response(resp) + if self.cfg.log_llm_chat_content: + logger.info(f"{LogColors.CYAN}Response:{resp}{LogColors.END}") elif self.use_gcr_endpoint: body = str.encode( json.dumps( @@ -609,7 +608,8 @@ def _create_chat_completion_inner_function( # noqa: C901, PLR0912, PLR0915 req = urllib.request.Request(self.gcr_endpoint, body, self.headers) # noqa: S310 response = urllib.request.urlopen(req) # noqa: S310 resp = json.loads(response.read().decode())["output"] - self.log_response(resp) + if self.cfg.log_llm_chat_content: + logger.info(f"{LogColors.CYAN}Response:{resp}{LogColors.END}") else: if self.use_azure: if json_mode: @@ -650,33 +650,34 @@ def _create_chat_completion_inner_function( # noqa: C901, PLR0912, PLR0915 presence_penalty=presence_penalty, ) if self.chat_stream: - self.log_response(stream=True) resp = "" - for chunk in response: - content = ( - chunk.choices[0].delta.content - if len(chunk.choices) > 0 and chunk.choices[0].delta.content is not None - else "" - ) - if self.cfg.log_llm_chat_content: - print(LogColors.CYAN + content, end="") - resp += content - if len(chunk.choices) > 0 and chunk.choices[0].finish_reason is not None: - finish_reason = chunk.choices[0].finish_reason + with logger.log_stream: + logger.info(f"{LogColors.CYAN}Response:{LogColors.END}") + for chunk in response: + content = ( + chunk.choices[0].delta.content + if len(chunk.choices) > 0 and chunk.choices[0].delta.content is not None + else "" + ) + if self.cfg.log_llm_chat_content: + logger.info(LogColors.CYAN + content + LogColors.END) + resp += content + if len(chunk.choices) > 0 and chunk.choices[0].finish_reason is not None: + finish_reason = chunk.choices[0].finish_reason else: resp = response.choices[0].message.content finish_reason = response.choices[0].finish_reason - self.log_response(resp) + if self.cfg.log_llm_chat_content: + logger.info(f"{LogColors.CYAN}Response:{resp}{LogColors.END}") if json_mode: json.loads(resp) if self.dump_chat_cache: self.cache.chat_set(input_content_json, resp) - # TODO: fail to use loguru adaptor due to stream response return resp, finish_reason def calculate_token_from_messages(self, messages: list[dict]) -> int: if self.use_llama2 or self.use_gcr_endpoint: - RDAgentLog().warning("num_tokens_from_messages() is not implemented for model llama2.") + logger.warning("num_tokens_from_messages() is not implemented for model llama2.") return 0 # TODO implement this function for llama2 if "gpt4" in self.chat_model or "gpt-4" in self.chat_model: From d2c1b95cbd47767fd785dfb68dcf4fa11654a53d Mon Sep 17 00:00:00 2001 From: Young Date: Tue, 9 Jul 2024 06:01:36 +0000 Subject: [PATCH 03/14] Some thoughts for logging --- rdagent/core/log.py | 13 +++++++++++ rdagent/log/__init__.py | 3 +++ rdagent/log/ui/__init__.py | 47 ++++++++++++++++++++++++++++++++++++++ 3 files changed, 63 insertions(+) create mode 100644 rdagent/log/__init__.py create mode 100644 rdagent/log/ui/__init__.py diff --git a/rdagent/core/log.py b/rdagent/core/log.py index 5b2e5371..4dd1f8c7 100644 --- a/rdagent/core/log.py +++ b/rdagent/core/log.py @@ -71,6 +71,19 @@ class View: Display the content in the storage """ + # TODO: pleas fix me + @abstractmethod + def display(s: Storage, watch: bool=False): + """ + + Parameters + ---------- + s : Storage + + watch : bool + should we watch the new content and display them + """ + ... class FileStorage(Storage): diff --git a/rdagent/log/__init__.py b/rdagent/log/__init__.py new file mode 100644 index 00000000..250e32d4 --- /dev/null +++ b/rdagent/log/__init__.py @@ -0,0 +1,3 @@ +""" +Here is a more reasonable place +""" diff --git a/rdagent/log/ui/__init__.py b/rdagent/log/ui/__init__.py new file mode 100644 index 00000000..69f018ad --- /dev/null +++ b/rdagent/log/ui/__init__.py @@ -0,0 +1,47 @@ +""" +UI is a kind of view for user. + +We are not sure how generality of the UI, we can't make decision among following options: +- in general folder like rdagent/log/ui +- It is for specific scenario rdagent/scenarios/ +""" + + +from rdagent.core.log import View + + +class WebView(View): + r""" + + We have tree structure for sequence + + session + | \ + ... defined by user ... + | \ + info1 -> info2 -> ... -> info3 -> ... overtime. + + Some design principles: + session1.module(e.g. implement). + `s.log(a.b.1.c) s.log(a.b.2.c)` should not handed over to users. + + An display example: + + W1 write factor + W2 evaluate factor + W3 backtest + + W123 + R + RX + RXX + RX + + W4 + trace r1 r2 r3 r4 + + What to do next? + 1. Data structure + 2. Map path like `a.b.c` to frontend components + 3. Display logic + """ From 0cb7b597dcfce8cb9bdac8618d5910dd6a945053 Mon Sep 17 00:00:00 2001 From: bowen xian Date: Wed, 10 Jul 2024 15:14:28 +0000 Subject: [PATCH 04/14] fix SingletonMeta's definition, maintain an instance dict for each class that inherits it --- rdagent/core/utils.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/rdagent/core/utils.py b/rdagent/core/utils.py index e72b17eb..9688e84a 100644 --- a/rdagent/core/utils.py +++ b/rdagent/core/utils.py @@ -19,7 +19,10 @@ class RDAgentException(Exception): # noqa: N818 class SingletonMeta(type): - _instance_dict: ClassVar[dict] = {} + # _instance_dict: ClassVar[dict] = {} + def __init__(cls, *args, **kwargs): + cls._instance_dict = {} # 为每个类创建一个唯一的_instance_dict字典 + super().__init__(*args, **kwargs) def __call__(cls, *args: Any, **kwargs: Any) -> Any: # Since it's hard to align the difference call using args and kwargs, we strictly ask to use kwargs in Singleton From f32d85b9d4d74d9aa83593aa2f7d32ae2e309155 Mon Sep 17 00:00:00 2001 From: bowen xian Date: Tue, 16 Jul 2024 04:59:45 +0000 Subject: [PATCH 05/14] adjust log codes directory, add some tag for factor implementation logging --- .env.example | 1 + .../factor_extract_and_implement.py | 11 +- .../knowledge_management/vector_base.py | 7 +- .../evolving/evaluators.py | 9 +- .../evolving/evolvable_subjects.py | 4 +- .../evolving/knowledge_management.py | 7 +- .../evolving/scheduler.py | 4 +- .../factor_implementation/factor.py | 4 +- .../model_implementation/task_loader.py | 5 +- rdagent/core/log.py | 260 ------------------ rdagent/log/__init__.py | 7 +- rdagent/log/base.py | 62 +++++ rdagent/log/logger.py | 122 ++++++++ rdagent/log/storage.py | 53 ++++ rdagent/log/ui/__init__.py | 40 --- rdagent/log/ui/web.py | 59 ++++ rdagent/log/utils.py | 67 +++++ rdagent/oai/llm_utils.py | 51 ++-- .../factor_experiment_loader/pdf_loader.py | 14 +- 19 files changed, 430 insertions(+), 357 deletions(-) delete mode 100644 rdagent/core/log.py create mode 100644 rdagent/log/base.py create mode 100644 rdagent/log/logger.py create mode 100644 rdagent/log/storage.py create mode 100644 rdagent/log/ui/web.py create mode 100644 rdagent/log/utils.py diff --git a/.env.example b/.env.example index b7244697..8b504ad1 100644 --- a/.env.example +++ b/.env.example @@ -9,6 +9,7 @@ USE_EMBEDDING_CACHE=True LOG_LLM_CHAT_CONTENT=False CHAT_FREQUENCY_PENALTY=0.0 CHAT_PRESENCE_PENALTY=0.0 +LOG_TRACE_PATH=log_traces # embedding model configs: EMBEDDING_OPENAI_API_KEY=your_api_key diff --git a/rdagent/app/factor_extraction_and_implementation/factor_extract_and_implement.py b/rdagent/app/factor_extraction_and_implementation/factor_extract_and_implement.py index 830e8216..418014fb 100644 --- a/rdagent/app/factor_extraction_and_implementation/factor_extract_and_implement.py +++ b/rdagent/app/factor_extraction_and_implementation/factor_extract_and_implement.py @@ -7,16 +7,23 @@ from rdagent.scenarios.qlib.factor_task_implementation import ( COSTEERFG_QUANT_FACTOR_IMPLEMENTATION, ) +from rdagent.log import rdagent_logger as logger assert load_dotenv() def extract_factors_and_implement(report_file_path: str) -> None: - factor_tasks = FactorImplementationExperimentLoaderFromPDFfiles().load(report_file_path) - implementation_result = COSTEERFG_QUANT_FACTOR_IMPLEMENTATION().generate(factor_tasks) + with logger.tag('factor'): + with logger.tag('load'): + factor_tasks = FactorImplementationExperimentLoaderFromPDFfiles().load(report_file_path) + with logger.tag('implementation'): + implementation_result = COSTEERFG_QUANT_FACTOR_IMPLEMENTATION().generate(factor_tasks) + logger.log_object(implementation_result, tag="results") # Qlib to run the implementation return implementation_result if __name__ == "__main__": extract_factors_and_implement("/home/xuyang1/workspace/report.pdf") + +# %% diff --git a/rdagent/components/knowledge_management/vector_base.py b/rdagent/components/knowledge_management/vector_base.py index 9322a510..17ca514b 100644 --- a/rdagent/components/knowledge_management/vector_base.py +++ b/rdagent/components/knowledge_management/vector_base.py @@ -5,10 +5,9 @@ import pandas as pd from scipy.spatial.distance import cosine -from rdagent.core.log import RDAgentLog +from rdagent.log import rdagent_logger as logger from rdagent.oai.llm_utils import APIBackend - class KnowledgeMetaData: def __init__(self, content: str = "", label: str = None, embedding=None, identity=None): self.label = label @@ -127,7 +126,7 @@ def __init__(self, vector_df_path: Union[str, Path] = None): else: self.vector_df = pd.DataFrame(columns=["id", "label", "content", "embedding"]) - RDAgentLog().info(f"VectorBase loaded, shape={self.vector_df.shape}") + logger.info(f"VectorBase loaded, shape={self.vector_df.shape}") def shape(self): return self.vector_df.shape @@ -205,4 +204,4 @@ def load(self, vector_df_path, **kwargs): def save(self, vector_df_path, **kwargs): self.vector_df.to_pickle(vector_df_path) - RDAgentLog().info(f"Save vectorBase vector_df to: {vector_df_path}") + logger.info(f"Save vectorBase vector_df to: {vector_df_path}") diff --git a/rdagent/components/task_implementation/factor_implementation/evolving/evaluators.py b/rdagent/components/task_implementation/factor_implementation/evolving/evaluators.py index 19289031..e86985b6 100644 --- a/rdagent/components/task_implementation/factor_implementation/evolving/evaluators.py +++ b/rdagent/components/task_implementation/factor_implementation/evolving/evaluators.py @@ -20,14 +20,13 @@ from rdagent.core.evaluation import Evaluator from rdagent.core.evolving_framework import Feedback, QueriedKnowledge from rdagent.core.experiment import Implementation -from rdagent.core.log import RDAgentLog +from rdagent.log import rdagent_logger as logger from rdagent.core.prompts import Prompts from rdagent.core.utils import multiprocessing_wrapper from rdagent.oai.llm_utils import APIBackend evaluate_prompts = Prompts(file_path=Path(__file__).parent.parent / "prompts.yaml") - class FactorImplementationEvaluator(Evaluator): # TODO: # I think we should have unified interface for all evaluates, for examples. @@ -457,7 +456,7 @@ def evaluate( ) except Exception as e: - RDAgentLog().warning(f"Error occurred when calculating the correlation: {str(e)}") + logger.warning(f"Error occurred when calculating the correlation: {str(e)}") conclusions.append( f"Some error occurred when calculating the correlation. Investigate the factors that might be causing the discrepancies and ensure that the logic of the factor calculation is consistent. Error: {e}", ) @@ -661,7 +660,7 @@ def evaluate( value_decision, ) = self.value_evaluator.evaluate(source_df=source_df, gt_df=gt_df) except Exception as e: - RDAgentLog().warning("Value evaluation failed with exception: %s", e) + logger.warning("Value evaluation failed with exception: %s", e) factor_feedback.factor_value_feedback = "Value evaluation failed." value_decision = False @@ -745,6 +744,6 @@ def evaluate( None if single_feedback is None else single_feedback.final_decision for single_feedback in multi_implementation_feedback ] - RDAgentLog().info(f"Final decisions: {final_decision} True count: {final_decision.count(True)}") + logger.info(f"Final decisions: {final_decision} True count: {final_decision.count(True)}") return multi_implementation_feedback diff --git a/rdagent/components/task_implementation/factor_implementation/evolving/evolvable_subjects.py b/rdagent/components/task_implementation/factor_implementation/evolving/evolvable_subjects.py index a79d96a6..ae10aa7f 100644 --- a/rdagent/components/task_implementation/factor_implementation/evolving/evolvable_subjects.py +++ b/rdagent/components/task_implementation/factor_implementation/evolving/evolvable_subjects.py @@ -4,7 +4,7 @@ FileBasedFactorImplementation, ) from rdagent.core.evolving_framework import EvolvableSubjects -from rdagent.core.log import RDAgentLog +from rdagent.log import rdagent_logger as logger class FactorEvolvingItem(FactorExperiment, EvolvableSubjects): @@ -23,7 +23,7 @@ def __init__( sub_gt_implementations, ) != len(self.sub_tasks): self.sub_gt_implementations = None - RDAgentLog().warning( + logger.warning( "The length of sub_gt_implementations is not equal to the length of sub_tasks, set sub_gt_implementations to None", ) else: diff --git a/rdagent/components/task_implementation/factor_implementation/evolving/knowledge_management.py b/rdagent/components/task_implementation/factor_implementation/evolving/knowledge_management.py index 400be6ef..528a57eb 100644 --- a/rdagent/components/task_implementation/factor_implementation/evolving/knowledge_management.py +++ b/rdagent/components/task_implementation/factor_implementation/evolving/knowledge_management.py @@ -32,14 +32,13 @@ RAGStrategy, ) from rdagent.core.experiment import Implementation -from rdagent.core.log import RDAgentLog +from rdagent.log import rdagent_logger as logger from rdagent.core.prompts import Prompts from rdagent.oai.llm_utils import ( APIBackend, calculate_embedding_distance_between_str_list, ) - class FactorImplementationKnowledge(Knowledge): def __init__( self, @@ -340,7 +339,7 @@ def analyze_component( )["component_no_list"] return [all_component_nodes[index - 1] for index in sorted(list(set(component_no_list)))] except: - RDAgentLog().warning("Error when analyzing components.") + logger.warning("Error when analyzing components.") analyze_component_user_prompt = "Your response is not a valid component index list." return [] @@ -718,7 +717,7 @@ def __init__(self, init_component_list=None) -> None: Load knowledge, offer brief information of knowledge and common handle interfaces """ self.graph: UndirectedGraph = UndirectedGraph.load(Path.cwd() / "graph.pkl") - RDAgentLog().info(f"Knowledge Graph loaded, size={self.graph.size()}") + logger.info(f"Knowledge Graph loaded, size={self.graph.size()}") if init_component_list: for component in init_component_list: diff --git a/rdagent/components/task_implementation/factor_implementation/evolving/scheduler.py b/rdagent/components/task_implementation/factor_implementation/evolving/scheduler.py index ded011ee..36f38be6 100644 --- a/rdagent/components/task_implementation/factor_implementation/evolving/scheduler.py +++ b/rdagent/components/task_implementation/factor_implementation/evolving/scheduler.py @@ -10,7 +10,7 @@ get_data_folder_intro, ) from rdagent.core.conf import RD_AGENT_SETTINGS -from rdagent.core.log import RDAgentLog +from rdagent.log import rdagent_logger as logger from rdagent.core.prompts import Prompts from rdagent.oai.llm_utils import APIBackend @@ -25,7 +25,7 @@ def RandomSelect(to_be_finished_task_index, implementation_factors_per_round): implementation_factors_per_round, ) - RDAgentLog().info(f"The random selection is: {to_be_finished_task_index}") + logger.info(f"The random selection is: {to_be_finished_task_index}") return to_be_finished_task_index diff --git a/rdagent/components/task_implementation/factor_implementation/factor.py b/rdagent/components/task_implementation/factor_implementation/factor.py index 91232693..4def1393 100644 --- a/rdagent/components/task_implementation/factor_implementation/factor.py +++ b/rdagent/components/task_implementation/factor_implementation/factor.py @@ -18,7 +18,7 @@ RuntimeErrorException, ) from rdagent.core.experiment import Experiment, FBImplementation, Task -from rdagent.core.log import RDAgentLog +from rdagent.log import rdagent_logger as logger from rdagent.oai.llm_utils import md5_hash @@ -77,7 +77,7 @@ def __init__( super().__init__(target_task) self.code = code self.executed_factor_value_dataframe = executed_factor_value_dataframe - self.logger = RDAgentLog() + self.logger = logger self.raise_exception = raise_exception self.workspace_path = Path( FACTOR_IMPLEMENT_SETTINGS.file_based_execution_workspace, diff --git a/rdagent/components/task_implementation/model_implementation/task_loader.py b/rdagent/components/task_implementation/model_implementation/task_loader.py index f11a1f33..8da65f6a 100644 --- a/rdagent/components/task_implementation/model_implementation/task_loader.py +++ b/rdagent/components/task_implementation/model_implementation/task_loader.py @@ -13,13 +13,12 @@ ModelImplementationTaskLoaderFromDict, ModelTask, ) -from rdagent.core.log import RDAgentLog +from rdagent.log import rdagent_logger as logger from rdagent.core.prompts import Prompts from rdagent.oai.llm_utils import APIBackend document_process_prompts = Prompts(file_path=Path(__file__).parent / "prompts.yaml") - def extract_model_from_doc(doc_content: str) -> dict: """ Extract model information from document content. @@ -66,7 +65,7 @@ def extract_model_from_doc(doc_content: str) -> dict: else: break - RDAgentLog().info(f"已经完成{len(model_dict)}个模型的提取") + logger.info(f"已经完成{len(model_dict)}个模型的提取") return model_dict diff --git a/rdagent/core/log.py b/rdagent/core/log.py deleted file mode 100644 index 4dd1f8c7..00000000 --- a/rdagent/core/log.py +++ /dev/null @@ -1,260 +0,0 @@ -from __future__ import annotations - -import re -import sys -import pickle -import json -import inspect -from typing import TYPE_CHECKING, Sequence, Literal - -if TYPE_CHECKING: - from loguru import Logger, Message, Record - -from loguru import logger -from abc import abstractmethod -from datetime import datetime, timezone -from pathlib import Path -from functools import partial - -from rdagent.core.conf import RD_AGENT_SETTINGS -from rdagent.core.utils import SingletonBaseClass - - -def get_caller_info(): - # Get the current stack information - stack = inspect.stack() - # The second element is usually the caller's information - caller_info = stack[2] - frame = caller_info[0] - info = { - 'line': caller_info.lineno, - 'name': frame.f_globals['__name__'], # Get the module name from the frame's globals - } - return info - - -class Storage: - """ - Basic storage to support saving objects; - - # Usage: - - The storage has mainly two kind of users: - - The logging end: you can choose any of the following method to use the object - - We can use it directly with the native logging storage - - We can use it with other logging tools; For example, serve as a handler for loggers - - The view end: - - Mainly for the subclass of `logging.base.View` - - It should provide two kind of ways to provide content - - offline content provision. - - online content preovision. - """ - - @abstractmethod - def log(self, obj: object, name: str = "", **kwargs: dict) -> str | Path | None: - """ - - Parameters - ---------- - obj : object - The object for logging. - name : str - The name of the object. For example "a.b.c" - We may log a lot of objects to a same name - """ - ... - - -class View: - """ - Motivation: - - Display the content in the storage - """ - # TODO: pleas fix me - @abstractmethod - def display(s: Storage, watch: bool=False): - """ - - Parameters - ---------- - s : Storage - - watch : bool - should we watch the new content and display them - """ - ... - - -class FileStorage(Storage): - """ - The info are logginged to the file systems - - TODO: describe the storage format - """ - - def __init__(self, path: str = "./log/") -> None: - self.path = Path(path) - self.path.mkdir(parents=True, exist_ok=True) - - def log(self, - obj: object, - name: str = "", - save_type: Literal["json", "text", "pkl", "short-text"] = "short-text", - timestamp: datetime | None = None, - split_name: bool = True, - ) -> Path: - if timestamp is None: - timestamp = datetime.now(timezone.utc) - else: - timestamp = timestamp.astimezone(timezone.utc) - - cur_p = self.path - if split_name: - uri_l = name.split(".") - for u in uri_l: - cur_p = cur_p / u - else: - cur_p = cur_p / name - cur_p.mkdir(parents=True, exist_ok=True) - - path = cur_p / f"{timestamp.strftime('%Y-%m-%d_%H-%M-%S-%f')}.log" - - if save_type == "json": - path = path.with_suffix(".json") - with path.open("w") as f: - try: - json.dump(obj, f) - except TypeError: - json.dump(json.loads(str(obj)), f) - return path - elif save_type == "pkl": - path = path.with_suffix(".pkl") - with path.open("wb") as f: - pickle.dump(obj, f) - return path - elif save_type == "text": - obj = str(obj) - with path.open("w") as f: - f.write(obj) - return path - else: - obj = str(obj).strip() - if obj == "": - return - path = cur_p / "common_logs.log" - with path.open("a") as f: - f.write(f"{timestamp.isoformat()}: {obj}\n\n") # add a new line to separate logs - return path - - -class LogColors: - """ - ANSI color codes for use in console output. - """ - - RED = "\033[91m" - GREEN = "\033[92m" - YELLOW = "\033[93m" - BLUE = "\033[94m" - MAGENTA = "\033[95m" - CYAN = "\033[96m" - WHITE = "\033[97m" - GRAY = "\033[90m" - BLACK = "\033[30m" - - BOLD = "\033[1m" - ITALIC = "\033[3m" - - END = "\033[0m" - - @classmethod - def get_all_colors(cls: type[LogColors]) -> list: - names = dir(cls) - names = [name for name in names if not name.startswith("__") and not callable(getattr(cls, name))] - return [getattr(cls, name) for name in names] - - def render(self, text: str, color: str = "", style: str = "") -> str: - """ - render text by input color and style. - It's not recommend that input text is already rendered. - """ - # This method is called too frequently, which is not good. - colors = self.get_all_colors() - # Perhaps color and font should be distinguished here. - if color and color in colors: - error_message = f"color should be in: {colors} but now is: {color}" - raise ValueError(error_message) - if style and style in colors: - error_message = f"style should be in: {colors} but now is: {style}" - raise ValueError(error_message) - - text = f"{color}{text}{self.END}" - - return f"{style}{text}{self.END}" - - -def loguru2storage_handler(storage: Storage, record: Message) -> None: - msg = f"{record.record['level']} | {record.record['name']}:{record.record['line']} - {RDAgentLog.remove_ansi_codes(record.record['message'])}" - storage.log(msg, timestamp=record.record["time"], save_type="short-text") - - -class RDAgentLog(SingletonBaseClass): - - def __init__(self, log_trace_path: str | None = RD_AGENT_SETTINGS.log_trace_path) -> None: - if log_trace_path is None: - timestamp = datetime.now(timezone.utc).strftime("%Y-%m-%d_%H-%M-%S-%f") - log_trace_path: Path = Path.cwd() / "log" / timestamp - log_trace_path.mkdir(parents=True, exist_ok=True) - - self.storage = FileStorage(log_trace_path) - - # add handler to save log to storage - logger.add(partial(loguru2storage_handler, self.storage)) - - self.log_stream = self.LogStreamContextManager(self.storage) - - @staticmethod - def remove_ansi_codes(s: str) -> str: - ansi_escape = re.compile(r'\x1B\[[0-?]*[ -/]*[@-~]') - return ansi_escape.sub('', s) - - class LogStreamContextManager: - def __init__(self, storage: Storage) -> None: - self.captured_logs = [] - self.storage = storage - - def capture(self, message: Message) -> None: - self.captured_logs.append(message.record["message"]) - - def __enter__(self): - logger.remove() - logger.add(sys.stderr, format=lambda x: x["message"]) - logger.add(self.capture) - - def __exit__(self, exc_type, exc_value, traceback) -> None: - logger.info('\n') - logger.remove() - logger.add(partial(loguru2storage_handler, self.storage)) - logger.info("[stream log] " + "".join(self.captured_logs)) - logger.add(sys.stderr) - - def log_objects(self, *objs: Sequence[object]) -> None: - caller_info = get_caller_info() - for obj in objs: - logp = self.storage.log(obj, name=f"{type(obj).__module__}.{type(obj).__name__}", save_type="pkl", split_name=False) - - logger.patch(lambda r: r.update(caller_info)).info(f"Logging object in {logp.absolute()}") - - def info(self, msg: str) -> None: - caller_info = get_caller_info() - logger.patch(lambda r: r.update(caller_info)).info(msg) - - def warning(self, msg: str) -> None: - caller_info = get_caller_info() - logger.patch(lambda r: r.update(caller_info)).warning(msg) - - def error(self, msg: str) -> None: - caller_info = get_caller_info() - logger.patch(lambda r: r.update(caller_info)).error(msg) - diff --git a/rdagent/log/__init__.py b/rdagent/log/__init__.py index 250e32d4..ff6f158d 100644 --- a/rdagent/log/__init__.py +++ b/rdagent/log/__init__.py @@ -1,3 +1,4 @@ -""" -Here is a more reasonable place -""" +from .logger import RDAgentLog +from .utils import LogColors + +rdagent_logger: RDAgentLog = RDAgentLog() \ No newline at end of file diff --git a/rdagent/log/base.py b/rdagent/log/base.py new file mode 100644 index 00000000..d99c6a81 --- /dev/null +++ b/rdagent/log/base.py @@ -0,0 +1,62 @@ +from __future__ import annotations + +from abc import abstractmethod +from pathlib import Path + + +class Storage: + """ + Basic storage to support saving objects; + + # Usage: + + The storage has mainly two kind of users: + - The logging end: you can choose any of the following method to use the object + - We can use it directly with the native logging storage + - We can use it with other logging tools; For example, serve as a handler for loggers + - The view end: + - Mainly for the subclass of `logging.base.View` + - It should provide two kind of ways to provide content + - offline content provision. + - online content preovision. + """ + + @abstractmethod + def log(self, obj: object, name: str = "", **kwargs: dict) -> str | Path: + """ + + Parameters + ---------- + obj : object + The object for logging. + name : str + The name of the object. For example "a.b.c" + We may log a lot of objects to a same name + + Returns + ------- + str | Path + The storage identifier of the object. + """ + ... + + +class View: + """ + Motivation: + + Display the content in the storage + """ + # TODO: pleas fix me + @abstractmethod + def display(s: Storage, watch: bool=False): + """ + + Parameters + ---------- + s : Storage + + watch : bool + should we watch the new content and display them + """ + ... \ No newline at end of file diff --git a/rdagent/log/logger.py b/rdagent/log/logger.py new file mode 100644 index 00000000..566acf4f --- /dev/null +++ b/rdagent/log/logger.py @@ -0,0 +1,122 @@ +import sys +import os + +from loguru import logger +from rdagent.core.utils import SingletonBaseClass +from rdagent.core.conf import RD_AGENT_SETTINGS +from pathlib import Path +from psutil import Process +from datetime import datetime, timezone +from functools import partial +from contextlib import contextmanager +from multiprocessing import Pipe +from multiprocessing.connection import Connection + +from .storage import FileStorage +from .utils import get_caller_info, remove_ansi_codes + + +class RDAgentLog(SingletonBaseClass): + _tag: str = "" + + def __init__(self, log_trace_path: str | None = RD_AGENT_SETTINGS.log_trace_path) -> None: + + if log_trace_path is None: + timestamp = datetime.now(timezone.utc).strftime("%Y-%m-%d_%H-%M-%S-%f") + log_trace_path: Path = Path.cwd() / "log" / timestamp + + self.log_trace_path = Path(log_trace_path) + self.log_trace_path.mkdir(parents=True, exist_ok=True) + + self.storage = FileStorage(log_trace_path) + + self.outter_conn, self.inner_conn = Pipe() + + self.main_pid = os.getpid() + + @property + def stream(self) -> Connection: + return self.outter_conn + + + @contextmanager + def tag(self, tag: str): + if tag.strip() == "": + raise ValueError("Tag cannot be empty.") + if self._tag != "": + tag = "." + tag + + self._tag = self._tag + tag + yield + self._tag = self._tag[:-len(tag)] + + + def get_pids(self) -> str: + ''' + Returns a string of pids from the current process to the main process. + Split by '-'. + ''' + pid = os.getpid() + process = Process(pid) + pid_chain = f"{pid}" + while process.pid != self.main_pid: + parent_pid = process.ppid() + parent_process = Process(parent_pid) + pid_chain = f"{parent_pid}-{pid_chain}" + process = parent_process + return pid_chain + + + def file_format(self, record, raw: bool=False): + record["message"] = remove_ansi_codes(record["message"]) + if raw: + return "{message}" + return "{time:YYYY-MM-DD HH:mm:ss.SSS} | {level: <8} | {name}:{function}:{line} - {message}\n" + + + def log_object(self, obj: object, *, tag: str = "") -> None: + caller_info = get_caller_info() + tag = f"{self._tag}.{tag}".strip('.') + f".{self.get_pids()}" + + logp = self.storage.log(obj, name=tag, save_type="pkl") + + file_handler_id = logger.add(self.log_trace_path / tag.replace('.','/') / "common_logs.log", format=self.file_format) + logger.patch(lambda r: r.update(caller_info)).info(f"Logging object in {logp.absolute()}") + logger.remove(file_handler_id) + + + def info(self, msg: str, *, tag: str = "", raw: bool=False) -> None: + caller_info = get_caller_info() + if raw: + logger.remove() + logger.add(sys.stderr, format=lambda r: "{message}") + + tag = f"{self._tag}.{tag}".strip('.') + f".{self.get_pids()}" + log_file_path = self.log_trace_path / tag.replace('.','/') /"common_logs.log" + if raw: + file_handler_id = logger.add(log_file_path, format=partial(self.file_format, raw=True)) + else: + file_handler_id = logger.add(log_file_path, format=self.file_format) + + logger.patch(lambda r: r.update(caller_info)).info(msg) + logger.remove(file_handler_id) + + if raw: + logger.remove() + logger.add(sys.stderr) + + def warning(self, msg: str, *, tag: str = "") -> None: + caller_info = get_caller_info() + + tag = f"{self._tag}.{tag}".strip('.') + f".{self.get_pids()}" + file_handler_id = logger.add(self.log_trace_path / tag.replace('.','/') / "common_logs.log", format=self.file_format) + logger.patch(lambda r: r.update(caller_info)).warning(msg) + logger.remove(file_handler_id) + + def error(self, msg: str, *, tag: str = "") -> None: + caller_info = get_caller_info() + + tag = f"{self._tag}.{tag}".strip('.') + f".{self.get_pids()}" + file_handler_id = logger.add(self.log_trace_path / tag.replace('.','/') / "common_logs.log", format=self.file_format) + logger.patch(lambda r: r.update(caller_info)).error(msg) + logger.remove(file_handler_id) diff --git a/rdagent/log/storage.py b/rdagent/log/storage.py new file mode 100644 index 00000000..262f640e --- /dev/null +++ b/rdagent/log/storage.py @@ -0,0 +1,53 @@ +import json +import pickle +from typing import Literal + +from pathlib import Path +from datetime import datetime, timezone +from .base import Storage + +class FileStorage(Storage): + """ + The info are logginged to the file systems + + TODO: describe the storage format + """ + + def __init__(self, path: str = "./log/") -> None: + self.path = Path(path) + self.path.mkdir(parents=True, exist_ok=True) + + def log(self, + obj: object, + name: str = "", + save_type: Literal["json", "text", "pkl"] = "text", + timestamp: datetime | None = None, + ) -> Path: + if timestamp is None: + timestamp = datetime.now(timezone.utc) + else: + timestamp = timestamp.astimezone(timezone.utc) + + cur_p = self.path / name.replace(".", "/") + cur_p.mkdir(parents=True, exist_ok=True) + + path = cur_p / f"{timestamp.strftime('%Y-%m-%d_%H-%M-%S-%f')}.log" + + if save_type == "json": + path = path.with_suffix(".json") + with path.open("w") as f: + try: + json.dump(obj, f) + except TypeError: + json.dump(json.loads(str(obj)), f) + return path + elif save_type == "pkl": + path = path.with_suffix(".pkl") + with path.open("wb") as f: + pickle.dump(obj, f) + return path + elif save_type == "text": + obj = str(obj) + with path.open("w") as f: + f.write(obj) + return path \ No newline at end of file diff --git a/rdagent/log/ui/__init__.py b/rdagent/log/ui/__init__.py index 69f018ad..6040d81b 100644 --- a/rdagent/log/ui/__init__.py +++ b/rdagent/log/ui/__init__.py @@ -5,43 +5,3 @@ - in general folder like rdagent/log/ui - It is for specific scenario rdagent/scenarios/ """ - - -from rdagent.core.log import View - - -class WebView(View): - r""" - - We have tree structure for sequence - - session - | \ - ... defined by user ... - | \ - info1 -> info2 -> ... -> info3 -> ... overtime. - - Some design principles: - session1.module(e.g. implement). - `s.log(a.b.1.c) s.log(a.b.2.c)` should not handed over to users. - - An display example: - - W1 write factor - W2 evaluate factor - W3 backtest - - W123 - R - RX - RXX - RX - - W4 - trace r1 r2 r3 r4 - - What to do next? - 1. Data structure - 2. Map path like `a.b.c` to frontend components - 3. Display logic - """ diff --git a/rdagent/log/ui/web.py b/rdagent/log/ui/web.py new file mode 100644 index 00000000..2cc3ad1c --- /dev/null +++ b/rdagent/log/ui/web.py @@ -0,0 +1,59 @@ +from rdagent.log.base import View, Storage +from pathlib import Path + +class ProcessView(View): + def __init__(self, trace_path: Path): + + + # Save logs to your desired data structure + # ... + pass + + + def display(s: Storage, watch: bool = False): + pass + + + +class WebView(View): + r""" + + We have tree structure for sequence + + session + | \ + ... defined by user ... + | \ + info1 -> info2 -> ... -> info3 -> ... overtime. + + Some design principles: + session1.module(e.g. implement). + `s.log(a.b.1.c) s.log(a.b.2.c)` should not handed over to users. + + An display example: + + W1 write factor + W2 evaluate factor + W3 backtest + + W123 + R + RX + RXX + RX + + W4 + trace r1 r2 r3 r4 + + What to do next? + 1. Data structure + 2. Map path like `a.b.c` to frontend components + 3. Display logic + """ + def __init__(self, trace_path: Path): + pass + # Save logs to your desired data structure + # ... + + def display(s: Storage, watch: bool = False): + pass \ No newline at end of file diff --git a/rdagent/log/utils.py b/rdagent/log/utils.py new file mode 100644 index 00000000..1631b2c7 --- /dev/null +++ b/rdagent/log/utils.py @@ -0,0 +1,67 @@ +import inspect +import re + + +class LogColors: + """ + ANSI color codes for use in console output. + """ + + RED = "\033[91m" + GREEN = "\033[92m" + YELLOW = "\033[93m" + BLUE = "\033[94m" + MAGENTA = "\033[95m" + CYAN = "\033[96m" + WHITE = "\033[97m" + GRAY = "\033[90m" + BLACK = "\033[30m" + + BOLD = "\033[1m" + ITALIC = "\033[3m" + + END = "\033[0m" + + @classmethod + def get_all_colors(cls: type['LogColors']) -> list: + names = dir(cls) + names = [name for name in names if not name.startswith("__") and not callable(getattr(cls, name))] + return [getattr(cls, name) for name in names] + + def render(self, text: str, color: str = "", style: str = "") -> str: + """ + render text by input color and style. + It's not recommend that input text is already rendered. + """ + # This method is called too frequently, which is not good. + colors = self.get_all_colors() + # Perhaps color and font should be distinguished here. + if color and color in colors: + error_message = f"color should be in: {colors} but now is: {color}" + raise ValueError(error_message) + if style and style in colors: + error_message = f"style should be in: {colors} but now is: {style}" + raise ValueError(error_message) + + text = f"{color}{text}{self.END}" + + return f"{style}{text}{self.END}" + + +def remove_ansi_codes(s: str) -> str: + ansi_escape = re.compile(r'\x1B\[[0-?]*[ -/]*[@-~]') + return ansi_escape.sub('', s) + + +def get_caller_info(): + # Get the current stack information + stack = inspect.stack() + # The second element is usually the caller's information + caller_info = stack[2] + frame = caller_info[0] + info = { + 'line': caller_info.lineno, + 'name': frame.f_globals['__name__'], # Get the module name from the frame's globals + 'function': frame.f_code.co_name, # Get the caller's function name + } + return info \ No newline at end of file diff --git a/rdagent/oai/llm_utils.py b/rdagent/oai/llm_utils.py index 2c5abc56..6c3e0431 100644 --- a/rdagent/oai/llm_utils.py +++ b/rdagent/oai/llm_utils.py @@ -19,13 +19,11 @@ import tiktoken from rdagent.core.conf import RD_AGENT_SETTINGS -from rdagent.core.log import LogColors, RDAgentLog +from rdagent.log import LogColors, rdagent_logger as logger from rdagent.core.utils import SingletonBaseClass DEFAULT_QLIB_DOT_PATH = Path("./") -logger: RDAgentLog = RDAgentLog() - def md5_hash(input_string: str) -> str: hash_md5 = hashlib.md5(usedforsecurity=False) input_bytes = input_string.encode("utf-8") @@ -36,17 +34,17 @@ def md5_hash(input_string: str) -> str: try: from azure.identity import DefaultAzureCredential, get_bearer_token_provider except ImportError: - logger.warning("azure.identity is not installed.") + logger.warning("azure.identity is not installed.", tag="gpt") try: import openai except ImportError: - logger.warning("openai is not installed.") + logger.warning("openai is not installed.", tag="gpt") try: from llama import Llama except ImportError: - logger.warning("llama is not installed.") + logger.warning("llama is not installed.", tag="gpt") class ConvManager: @@ -204,12 +202,14 @@ def build_chat_completion(self, user_prompt: str, **kwargs: Any) -> str: user prompt should always be provided """ messages = self.build_chat_completion_message(user_prompt) + + with logger.tag(self.conversation_id): + response = self.api_backend._try_create_chat_completion_or_embedding( # noqa: SLF001 + messages=messages, + chat_completion=True, + **kwargs, + ) - response = self.api_backend._try_create_chat_completion_or_embedding( # noqa: SLF001 - messages=messages, - chat_completion=True, - **kwargs, - ) messages.append( { "role": "assistant", @@ -651,19 +651,24 @@ def _create_chat_completion_inner_function( # noqa: C901, PLR0912, PLR0915 ) if self.chat_stream: resp = "" - with logger.log_stream: + if self.cfg.log_llm_chat_content: logger.info(f"{LogColors.CYAN}Response:{LogColors.END}") - for chunk in response: - content = ( - chunk.choices[0].delta.content - if len(chunk.choices) > 0 and chunk.choices[0].delta.content is not None - else "" - ) - if self.cfg.log_llm_chat_content: - logger.info(LogColors.CYAN + content + LogColors.END) - resp += content - if len(chunk.choices) > 0 and chunk.choices[0].finish_reason is not None: - finish_reason = chunk.choices[0].finish_reason + + for chunk in response: + content = ( + chunk.choices[0].delta.content + if len(chunk.choices) > 0 and chunk.choices[0].delta.content is not None + else "" + ) + if self.cfg.log_llm_chat_content: + logger.info(LogColors.CYAN + content + LogColors.END, raw=True) + resp += content + if len(chunk.choices) > 0 and chunk.choices[0].finish_reason is not None: + finish_reason = chunk.choices[0].finish_reason + + if self.cfg.log_llm_chat_content: + logger.info("\n", raw=True) + else: resp = response.choices[0].message.content finish_reason = response.choices[0].finish_reason diff --git a/rdagent/scenarios/qlib/factor_experiment_loader/pdf_loader.py b/rdagent/scenarios/qlib/factor_experiment_loader/pdf_loader.py index 1161d010..881fb513 100644 --- a/rdagent/scenarios/qlib/factor_experiment_loader/pdf_loader.py +++ b/rdagent/scenarios/qlib/factor_experiment_loader/pdf_loader.py @@ -18,7 +18,7 @@ ) from rdagent.components.loader.experiment_loader import FactorExperimentLoader from rdagent.core.conf import RD_AGENT_SETTINGS -from rdagent.core.log import RDAgentLog +from rdagent.log import rdagent_logger as logger from rdagent.core.prompts import Prompts from rdagent.oai.llm_utils import APIBackend, create_embedding_with_multiprocessing from rdagent.scenarios.qlib.factor_experiment_loader.json_loader import ( @@ -27,7 +27,6 @@ document_process_prompts = Prompts(file_path=Path(__file__).parent / "prompts.yaml") - def classify_report_from_dict( report_dict: Mapping[str, str], vote_time: int = 1, @@ -69,7 +68,7 @@ def classify_report_from_dict( if isinstance(value, str): content = value else: - RDAgentLog().warning(f"输入格式不符合要求: {file_name}") + logger.warning(f"输入格式不符合要求: {file_name}") res_dict[file_name] = {"class": 0} continue @@ -101,7 +100,7 @@ def classify_report_from_dict( res = json.loads(res) vote_list.append(int(res["class"])) except json.JSONDecodeError: - RDAgentLog().warning(f"返回值无法解析: {file_name}") + logger.warning(f"返回值无法解析: {file_name}") res_dict[file_name] = {"class": 0} count_0 = vote_list.count(0) count_1 = vote_list.count(1) @@ -246,7 +245,7 @@ def extract_factors_from_report_dict( if int(value.get("class")) == 1: useful_report_dict[key] = report_dict[key] else: - RDAgentLog().warning(f"Invalid input format: {key}") + logger.warning(f"Invalid input format: {key}") final_report_factor_dict = {} # for file_name, content in useful_report_dict.items(): @@ -276,7 +275,7 @@ def extract_factors_from_report_dict( file_name = file_names[index] final_report_factor_dict.setdefault(file_name, {}) final_report_factor_dict[file_name] = result.get() - RDAgentLog().info(f"已经完成{len(final_report_factor_dict)}个报告的因子提取") + logger.info(f"已经完成{len(final_report_factor_dict)}个报告的因子提取") return final_report_factor_dict @@ -499,7 +498,7 @@ def __deduplicate_factor_dict(factor_dict: dict[str, dict[str, str]]) -> list[li kmeans_index_group = __kmeans_embeddings(embeddings=embeddings, k=k) if len(kmeans_index_group[0]) < RD_AGENT_SETTINGS.max_input_duplicate_factor_group: target_k = k - RDAgentLog().info(f"K-means group number: {k}") + logger.info(f"K-means group number: {k}") break factor_name_groups = [[factor_names[index] for index in index_group] for index_group in kmeans_index_group] @@ -589,4 +588,5 @@ def load(self, file_or_folder_path: Path) -> dict: factor_viability = check_factor_viability(factor_dict) factor_dict, duplication_names_list = deduplicate_factors_by_llm(factor_dict, factor_viability) + logger.log_object(factor_dict, tag="factor_dict") return FactorImplementationExperimentLoaderFromDict().load(factor_dict) From bdffbdb308bd2b8f63955c064b8eb8beee2172fc Mon Sep 17 00:00:00 2001 From: you-n-g Date: Tue, 16 Jul 2024 13:50:11 +0800 Subject: [PATCH 06/14] Update rdagent/core/conf.py --- rdagent/core/conf.py | 1 + 1 file changed, 1 insertion(+) diff --git a/rdagent/core/conf.py b/rdagent/core/conf.py index a926d54c..1873e117 100644 --- a/rdagent/core/conf.py +++ b/rdagent/core/conf.py @@ -17,6 +17,7 @@ class RDAgentSettings(BaseSettings): # TODO: (xiao) I think most of the config should be in oai.config # Log configs + # TODO: (xiao) think it can be a seperate config. log_trace_path: str | None = None log_llm_chat_content: bool = True From 3bb181defe7537c930059be69ffe4f41c6d818ab Mon Sep 17 00:00:00 2001 From: bowen xian Date: Tue, 16 Jul 2024 05:58:12 +0000 Subject: [PATCH 07/14] fix factor task app & log --- .../factor_extract_and_implement.py | 15 +++++++-------- rdagent/oai/llm_utils.py | 7 ------- 2 files changed, 7 insertions(+), 15 deletions(-) diff --git a/rdagent/app/factor_extraction_and_implementation/factor_extract_and_implement.py b/rdagent/app/factor_extraction_and_implementation/factor_extract_and_implement.py index 4004409d..de5048d9 100644 --- a/rdagent/app/factor_extraction_and_implementation/factor_extract_and_implement.py +++ b/rdagent/app/factor_extraction_and_implementation/factor_extract_and_implement.py @@ -1,20 +1,19 @@ # %% from dotenv import load_dotenv -from rdagent.scenarios.qlib.factor_experiment_loader.pdf_loader import ( - FactorExperimentLoaderFromPDFfiles, -) -from rdagent.scenarios.qlib.factor_task_implementation import ( - COSTEERFG_QUANT_FACTOR_IMPLEMENTATION, -) +from rdagent.scenarios.qlib.factor_experiment_loader.pdf_loader import FactorExperimentLoaderFromPDFfiles +from rdagent.scenarios.qlib.factor_task_implementation import QlibFactorCoSTEER from rdagent.log import rdagent_logger as logger assert load_dotenv() def extract_factors_and_implement(report_file_path: str) -> None: - factor_tasks = FactorExperimentLoaderFromPDFfiles().load(report_file_path) - implementation_result = COSTEERFG_QUANT_FACTOR_IMPLEMENTATION().generate(factor_tasks) + with logger.tag('extract_factors_and_implement'): + with logger.tag('load_factor_tasks'): + factor_tasks = FactorExperimentLoaderFromPDFfiles().load(report_file_path) + with logger.tag('implement_factors'): + implementation_result = QlibFactorCoSTEER().generate(factor_tasks) # Qlib to run the implementation return implementation_result diff --git a/rdagent/oai/llm_utils.py b/rdagent/oai/llm_utils.py index 6c3e0431..9f742603 100644 --- a/rdagent/oai/llm_utils.py +++ b/rdagent/oai/llm_utils.py @@ -538,13 +538,6 @@ def _build_log_messages(self, messages: list[dict]) -> str: ) return log_messages - def log_response(self, response: str | None = None, *, stream: bool = False) -> None: - if self.cfg.log_llm_chat_content: - if stream: - logger.info(f"\n{LogColors.CYAN}Response:{LogColors.END}") - else: - logger.info(f"\n{LogColors.CYAN}Response:{response}{LogColors.END}") - def _create_chat_completion_inner_function( # noqa: C901, PLR0912, PLR0915 self, messages: list[dict], From f9f89ee0f04d1861ec8c13bf7283462e12977af7 Mon Sep 17 00:00:00 2001 From: bowen xian Date: Tue, 16 Jul 2024 06:24:28 +0000 Subject: [PATCH 08/14] fix log import --- .../coder/factor_coder/CoSTEER/evaluators.py | 6 +++--- .../components/coder/model_coder/CoSTEER/evaluators.py | 4 ++-- .../coder/model_coder/CoSTEER/evolvable_subjects.py | 4 ++-- rdagent/scenarios/qlib/task_generator/data.py | 3 +-- rdagent/scenarios/qlib/task_generator/feedback.py | 3 +-- rdagent/scenarios/qlib/task_generator/model.py | 4 ++-- rdagent/utils/env.py | 10 +++++----- 7 files changed, 16 insertions(+), 18 deletions(-) diff --git a/rdagent/components/coder/factor_coder/CoSTEER/evaluators.py b/rdagent/components/coder/factor_coder/CoSTEER/evaluators.py index 9b8a0fe3..5a2869f6 100644 --- a/rdagent/components/coder/factor_coder/CoSTEER/evaluators.py +++ b/rdagent/components/coder/factor_coder/CoSTEER/evaluators.py @@ -17,7 +17,7 @@ from rdagent.core.evaluation import Evaluator from rdagent.core.evolving_framework import Feedback, QueriedKnowledge from rdagent.core.experiment import Implementation, Task -from rdagent.core.log import RDAgentLog +from rdagent.log import rdagent_logger as logger from rdagent.core.prompts import Prompts from rdagent.core.utils import multiprocessing_wrapper from rdagent.oai.llm_utils import APIBackend @@ -585,7 +585,7 @@ def evaluate( value_feedback=factor_feedback.factor_value_feedback, code_feedback=factor_feedback.code_feedback, ) - RDAgentLog().info(factor_feedback.final_decision) + logger.info(factor_feedback.final_decision) return factor_feedback @@ -640,7 +640,7 @@ def evaluate( None if single_feedback is None else single_feedback.final_decision for single_feedback in multi_implementation_feedback ] - RDAgentLog().info(f"Final decisions: {final_decision} True count: {final_decision.count(True)}") + logger.info(f"Final decisions: {final_decision} True count: {final_decision.count(True)}") return multi_implementation_feedback diff --git a/rdagent/components/coder/model_coder/CoSTEER/evaluators.py b/rdagent/components/coder/model_coder/CoSTEER/evaluators.py index c3e5f468..9ae642d4 100644 --- a/rdagent/components/coder/model_coder/CoSTEER/evaluators.py +++ b/rdagent/components/coder/model_coder/CoSTEER/evaluators.py @@ -16,7 +16,7 @@ from rdagent.core.evaluation import Evaluator from rdagent.core.evolving_framework import QueriedKnowledge from rdagent.core.experiment import Implementation, Task -from rdagent.core.log import RDAgentLog +from rdagent.log import rdagent_logger as logger from rdagent.core.prompts import Prompts from rdagent.core.utils import multiprocessing_wrapper from rdagent.oai.llm_utils import APIBackend @@ -328,6 +328,6 @@ def evaluate( None if single_feedback is None else single_feedback.final_decision for single_feedback in multi_implementation_feedback ] - RDAgentLog().info(f"Final decisions: {final_decision} True count: {final_decision.count(True)}") + logger.info(f"Final decisions: {final_decision} True count: {final_decision.count(True)}") return multi_implementation_feedback diff --git a/rdagent/components/coder/model_coder/CoSTEER/evolvable_subjects.py b/rdagent/components/coder/model_coder/CoSTEER/evolvable_subjects.py index 70e485b3..8ed10dd5 100644 --- a/rdagent/components/coder/model_coder/CoSTEER/evolvable_subjects.py +++ b/rdagent/components/coder/model_coder/CoSTEER/evolvable_subjects.py @@ -4,7 +4,7 @@ ModelTask, ) from rdagent.core.evolving_framework import EvolvableSubjects -from rdagent.core.log import RDAgentLog +from rdagent.log import rdagent_logger as logger class ModelEvolvingItem(ModelExperiment, EvolvableSubjects): @@ -22,7 +22,7 @@ def __init__( sub_gt_implementations, ) != len(self.sub_tasks): self.sub_gt_implementations = None - RDAgentLog().warning( + logger.warning( "The length of sub_gt_implementations is not equal to the length of sub_tasks, set sub_gt_implementations to None", ) else: diff --git a/rdagent/scenarios/qlib/task_generator/data.py b/rdagent/scenarios/qlib/task_generator/data.py index 2203b43d..4995bcf0 100644 --- a/rdagent/scenarios/qlib/task_generator/data.py +++ b/rdagent/scenarios/qlib/task_generator/data.py @@ -7,7 +7,7 @@ from rdagent.components.runner import CachedRunner from rdagent.components.runner.conf import RUNNER_SETTINGS -from rdagent.core.log import RDAgentLog +from rdagent.log import rdagent_logger as logger from rdagent.core.task_generator import TaskGenerator from rdagent.oai.llm_utils import md5_hash from rdagent.scenarios.qlib.experiment.factor_experiment import QlibFactorExperiment @@ -15,7 +15,6 @@ DIRNAME = Path(__file__).absolute().resolve().parent DIRNAME_local = Path.cwd() -logger = RDAgentLog() # class QlibFactorExpWorkspace: diff --git a/rdagent/scenarios/qlib/task_generator/feedback.py b/rdagent/scenarios/qlib/task_generator/feedback.py index 8ce1e08e..339ab7f0 100644 --- a/rdagent/scenarios/qlib/task_generator/feedback.py +++ b/rdagent/scenarios/qlib/task_generator/feedback.py @@ -7,7 +7,7 @@ from jinja2 import Environment, StrictUndefined from rdagent.core.experiment import Experiment -from rdagent.core.log import RDAgentLog +from rdagent.log import rdagent_logger as logger from rdagent.core.prompts import Prompts from rdagent.core.proposal import ( Hypothesis, @@ -19,7 +19,6 @@ feedback_prompts = Prompts(file_path=Path(__file__).parent.parent / "prompts.yaml") DIRNAME = Path(__file__).absolute().resolve().parent -logger = RDAgentLog() class QlibFactorHypothesisExperiment2Feedback(HypothesisExperiment2Feedback): diff --git a/rdagent/scenarios/qlib/task_generator/model.py b/rdagent/scenarios/qlib/task_generator/model.py index 47586924..88c7ac33 100644 --- a/rdagent/scenarios/qlib/task_generator/model.py +++ b/rdagent/scenarios/qlib/task_generator/model.py @@ -10,7 +10,7 @@ ) from rdagent.components.runner import CachedRunner from rdagent.components.runner.conf import RUNNER_SETTINGS -from rdagent.core.log import RDAgentLog +from rdagent.log import rdagent_logger as logger from rdagent.core.task_generator import TaskGenerator from rdagent.utils.env import QTDockerEnv @@ -72,7 +72,7 @@ def generate(self, exp: ModelExperiment) -> ModelExperiment: csv_path = exp.exp_ws.workspace_path / "qlib_res.csv" if not csv_path.exists(): - RDAgentLog().error(f"File {csv_path} does not exist.") + logger.error(f"File {csv_path} does not exist.") return None result = pd.read_csv(csv_path, index_col=0).iloc[:,0] diff --git a/rdagent/utils/env.py b/rdagent/utils/env.py index 8b1503b9..d90becf1 100644 --- a/rdagent/utils/env.py +++ b/rdagent/utils/env.py @@ -19,7 +19,7 @@ from pydantic import BaseModel from pydantic_settings import BaseSettings -from rdagent.core.log import RDAgentLog +from rdagent.log import rdagent_logger as logger ASpecificBaseModel = TypeVar("ASpecificBaseModel", bound=BaseModel) @@ -146,11 +146,11 @@ def prepare(self): """ client = docker.from_env() if self.conf.build_from_dockerfile and self.conf.dockerfile_folder_path.exists(): - RDAgentLog().info(f"Building the image from dockerfile: {self.conf.dockerfile_folder_path}") + logger.info(f"Building the image from dockerfile: {self.conf.dockerfile_folder_path}") image, logs = client.images.build( path=str(self.conf.dockerfile_folder_path), tag=self.conf.image, network_mode=self.conf.network ) - RDAgentLog().info(f"Finished building the image from dockerfile: {self.conf.dockerfile_folder_path}") + logger.info(f"Finished building the image from dockerfile: {self.conf.dockerfile_folder_path}") try: client.images.get(self.conf.image) except docker.errors.ImageNotFound: @@ -216,8 +216,8 @@ def prepare(self): super().prepare() qlib_data_path = next(iter(self.conf.extra_volumes.keys())) if not (Path(qlib_data_path) / "qlib_data" / "cn_data").exists(): - RDAgentLog().info("We are downloading!") + logger.info("We are downloading!") cmd = "python -m qlib.run.get_data qlib_data --target_dir ~/.qlib/qlib_data/cn_data --region cn --interval 1d --delete_old False" self.run(entry=cmd) else: - RDAgentLog().info("Data already exists. Download skipped.") + logger.info("Data already exists. Download skipped.") From 9c762e55e21bd30b38acf21653a4d4fc777522e5 Mon Sep 17 00:00:00 2001 From: Young Date: Tue, 16 Jul 2024 09:02:25 +0000 Subject: [PATCH 09/14] Streamlet framework --- rdagent/log/ui/web.py | 35 ++++++++++++++++++++++++++++++++++- 1 file changed, 34 insertions(+), 1 deletion(-) diff --git a/rdagent/log/ui/web.py b/rdagent/log/ui/web.py index 2cc3ad1c..ad5aada4 100644 --- a/rdagent/log/ui/web.py +++ b/rdagent/log/ui/web.py @@ -26,6 +26,11 @@ class WebView(View): | \ info1 -> info2 -> ... -> info3 -> ... overtime. + + | | - dispatch according to uri(e.g. `a.b.c. ...`) + Frontend is composed of windows. + Each window can individually display the message flow. + Some design principles: session1.module(e.g. implement). `s.log(a.b.1.c) s.log(a.b.2.c)` should not handed over to users. @@ -56,4 +61,32 @@ def __init__(self, trace_path: Path): # ... def display(s: Storage, watch: bool = False): - pass \ No newline at end of file + ui = STLUI() + for msg in s.iter_msg(): # iterate overtime + # NOTE: iter_msg will correctly seperate the information. + # TODO: msg may support streaming mode. + ui.dispatch(msg) + pass + + +# TODO: Implement the following classes +class STLWindow: + ... + + def consume_msg(self, msg): + ... # update it's view + + +class STLUI: + wd_l: list[STLWindow] + + def __init__(self): + self.build_ui() + + def build_ui(self): + # control the dispaly of windows + ... + + def dispatch(self, msg): + # map the message to a specific window + ... From 82583fa30a10f112a334b3731d24a503b6085185 Mon Sep 17 00:00:00 2001 From: bowen xian Date: Tue, 16 Jul 2024 09:44:01 +0000 Subject: [PATCH 10/14] fix log tag to path logic --- rdagent/log/logger.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/rdagent/log/logger.py b/rdagent/log/logger.py index 566acf4f..98ff27a5 100644 --- a/rdagent/log/logger.py +++ b/rdagent/log/logger.py @@ -76,7 +76,7 @@ def file_format(self, record, raw: bool=False): def log_object(self, obj: object, *, tag: str = "") -> None: caller_info = get_caller_info() - tag = f"{self._tag}.{tag}".strip('.') + f".{self.get_pids()}" + tag = f"{self._tag}.{tag}.{self.get_pids()}".strip('.') logp = self.storage.log(obj, name=tag, save_type="pkl") @@ -91,8 +91,8 @@ def info(self, msg: str, *, tag: str = "", raw: bool=False) -> None: logger.remove() logger.add(sys.stderr, format=lambda r: "{message}") - tag = f"{self._tag}.{tag}".strip('.') + f".{self.get_pids()}" - log_file_path = self.log_trace_path / tag.replace('.','/') /"common_logs.log" + tag = f"{self._tag}.{tag}.{self.get_pids()}".strip('.') + log_file_path = self.log_trace_path / tag.replace('.','/') / "common_logs.log" if raw: file_handler_id = logger.add(log_file_path, format=partial(self.file_format, raw=True)) else: @@ -108,7 +108,7 @@ def info(self, msg: str, *, tag: str = "", raw: bool=False) -> None: def warning(self, msg: str, *, tag: str = "") -> None: caller_info = get_caller_info() - tag = f"{self._tag}.{tag}".strip('.') + f".{self.get_pids()}" + tag = f"{self._tag}.{tag}.{self.get_pids()}".strip('.') file_handler_id = logger.add(self.log_trace_path / tag.replace('.','/') / "common_logs.log", format=self.file_format) logger.patch(lambda r: r.update(caller_info)).warning(msg) logger.remove(file_handler_id) @@ -116,7 +116,7 @@ def warning(self, msg: str, *, tag: str = "") -> None: def error(self, msg: str, *, tag: str = "") -> None: caller_info = get_caller_info() - tag = f"{self._tag}.{tag}".strip('.') + f".{self.get_pids()}" + tag = f"{self._tag}.{tag}.{self.get_pids()}".strip('.') file_handler_id = logger.add(self.log_trace_path / tag.replace('.','/') / "common_logs.log", format=self.file_format) logger.patch(lambda r: r.update(caller_info)).error(msg) logger.remove(file_handler_id) From 4105ef3d97504e0dc1835b90b7714d3528ed44d7 Mon Sep 17 00:00:00 2001 From: Young Date: Tue, 16 Jul 2024 10:30:56 +0000 Subject: [PATCH 11/14] Add todos --- rdagent/core/conf.py | 1 + rdagent/core/utils.py | 5 +++-- rdagent/log/logger.py | 26 ++++++++++++++------------ rdagent/log/storage.py | 3 ++- rdagent/log/utils.py | 13 ++++++++----- rdagent/oai/llm_utils.py | 3 +++ 6 files changed, 31 insertions(+), 20 deletions(-) diff --git a/rdagent/core/conf.py b/rdagent/core/conf.py index 1873e117..f709d575 100644 --- a/rdagent/core/conf.py +++ b/rdagent/core/conf.py @@ -15,6 +15,7 @@ class RDAgentSettings(BaseSettings): + # TODO: (xiao) I think LLMSetting may be a better name. # TODO: (xiao) I think most of the config should be in oai.config # Log configs # TODO: (xiao) think it can be a seperate config. diff --git a/rdagent/core/utils.py b/rdagent/core/utils.py index 9688e84a..efec9184 100644 --- a/rdagent/core/utils.py +++ b/rdagent/core/utils.py @@ -19,14 +19,15 @@ class RDAgentException(Exception): # noqa: N818 class SingletonMeta(type): - # _instance_dict: ClassVar[dict] = {} def __init__(cls, *args, **kwargs): - cls._instance_dict = {} # 为每个类创建一个唯一的_instance_dict字典 + cls._instance_dict: dict = {} + # This must be the class variable instead of sharing one in all classes to avoid confliction like `A()`, `B()` super().__init__(*args, **kwargs) def __call__(cls, *args: Any, **kwargs: Any) -> Any: # Since it's hard to align the difference call using args and kwargs, we strictly ask to use kwargs in Singleton if args: + # TODO: this restriction can be solved. exception_message = "Please only use kwargs in Singleton to avoid misunderstanding." raise RDAgentException(exception_message) kwargs_hash = hash(tuple(sorted(kwargs.items()))) diff --git a/rdagent/log/logger.py b/rdagent/log/logger.py index 98ff27a5..5e7329ac 100644 --- a/rdagent/log/logger.py +++ b/rdagent/log/logger.py @@ -13,10 +13,17 @@ from multiprocessing.connection import Connection from .storage import FileStorage -from .utils import get_caller_info, remove_ansi_codes +from .utils import LogColors, get_caller_info class RDAgentLog(SingletonBaseClass): + # TODO: Simplify it to introduce less concepts ( We may merge RDAgentLog, Storage &) + # Solution: Storage => PipeLog, View => PipeLogView, RDAgentLog is an instance of PipeLogger + # PipeLogger.info(...) , PipeLogger.get_resp() to get feedback from frontend. + # def f(): + # logger = PipeLog() + # logger.info("") + # feedback = logger.get_reps() _tag: str = "" def __init__(self, log_trace_path: str | None = RD_AGENT_SETTINGS.log_trace_path) -> None: @@ -30,15 +37,8 @@ def __init__(self, log_trace_path: str | None = RD_AGENT_SETTINGS.log_trace_path self.storage = FileStorage(log_trace_path) - self.outter_conn, self.inner_conn = Pipe() - self.main_pid = os.getpid() - @property - def stream(self) -> Connection: - return self.outter_conn - - @contextmanager def tag(self, tag: str): if tag.strip() == "": @@ -46,11 +46,11 @@ def tag(self, tag: str): if self._tag != "": tag = "." + tag + # TODO: It may result in error in mutithreading or co-routine self._tag = self._tag + tag yield self._tag = self._tag[:-len(tag)] - def get_pids(self) -> str: ''' Returns a string of pids from the current process to the main process. @@ -66,14 +66,12 @@ def get_pids(self) -> str: process = parent_process return pid_chain - def file_format(self, record, raw: bool=False): - record["message"] = remove_ansi_codes(record["message"]) + record["message"] = LogColors.remove_ansi_codes(record["message"]) if raw: return "{message}" return "{time:YYYY-MM-DD HH:mm:ss.SSS} | {level: <8} | {name}:{function}:{line} - {message}\n" - def log_object(self, obj: object, *, tag: str = "") -> None: caller_info = get_caller_info() tag = f"{self._tag}.{tag}.{self.get_pids()}".strip('.') @@ -86,6 +84,7 @@ def log_object(self, obj: object, *, tag: str = "") -> None: def info(self, msg: str, *, tag: str = "", raw: bool=False) -> None: + # TODO: too much duplicated. due to we have no logger with stream context; caller_info = get_caller_info() if raw: logger.remove() @@ -106,6 +105,9 @@ def info(self, msg: str, *, tag: str = "", raw: bool=False) -> None: logger.add(sys.stderr) def warning(self, msg: str, *, tag: str = "") -> None: + # TODO: reuse code + # _log(self, msg: str, *, tag: str = "", level=Literal["warning", "error", ..]) -> None: + # getattr(logger.patch(lambda r: r.update(caller_info)), level)(msg) caller_info = get_caller_info() tag = f"{self._tag}.{tag}.{self.get_pids()}".strip('.') diff --git a/rdagent/log/storage.py b/rdagent/log/storage.py index 262f640e..50418a24 100644 --- a/rdagent/log/storage.py +++ b/rdagent/log/storage.py @@ -23,6 +23,7 @@ def log(self, save_type: Literal["json", "text", "pkl"] = "text", timestamp: datetime | None = None, ) -> Path: + # TODO: We can remove the timestamp after we implement PipeLog if timestamp is None: timestamp = datetime.now(timezone.utc) else: @@ -50,4 +51,4 @@ def log(self, obj = str(obj) with path.open("w") as f: f.write(obj) - return path \ No newline at end of file + return path diff --git a/rdagent/log/utils.py b/rdagent/log/utils.py index 1631b2c7..1459f888 100644 --- a/rdagent/log/utils.py +++ b/rdagent/log/utils.py @@ -47,10 +47,13 @@ def render(self, text: str, color: str = "", style: str = "") -> str: return f"{style}{text}{self.END}" - -def remove_ansi_codes(s: str) -> str: - ansi_escape = re.compile(r'\x1B\[[0-?]*[ -/]*[@-~]') - return ansi_escape.sub('', s) + @staticmethod + def remove_ansi_codes(s: str) -> str: + """ + It is for removing ansi ctrl characters in the string(e.g. colored text) + """ + ansi_escape = re.compile(r'\x1B\[[0-?]*[ -/]*[@-~]') + return ansi_escape.sub('', s) def get_caller_info(): @@ -64,4 +67,4 @@ def get_caller_info(): 'name': frame.f_globals['__name__'], # Get the module name from the frame's globals 'function': frame.f_code.co_name, # Get the caller's function name } - return info \ No newline at end of file + return info diff --git a/rdagent/oai/llm_utils.py b/rdagent/oai/llm_utils.py index 9f742603..b44c05b8 100644 --- a/rdagent/oai/llm_utils.py +++ b/rdagent/oai/llm_utils.py @@ -87,6 +87,7 @@ def __init__(self, cache_location: str) -> None: super().__init__() self.cache_location = cache_location db_file_exist = Path(cache_location).exists() + # TODO: sqlite3 does not support multiprocessing. self.conn = sqlite3.connect(cache_location) self.c = self.conn.cursor() if not db_file_exist: @@ -550,6 +551,7 @@ def _create_chat_completion_inner_function( # noqa: C901, PLR0912, PLR0915 json_mode: bool = False, add_json_in_prompt: bool = False, ) -> str: + # TODO: we can add this function back to avoid so much `self.cfg.log_llm_chat_content` if self.cfg.log_llm_chat_content: logger.info(self._build_log_messages(messages)) # TODO: fail to use loguru adaptor due to stream response @@ -644,6 +646,7 @@ def _create_chat_completion_inner_function( # noqa: C901, PLR0912, PLR0915 ) if self.chat_stream: resp = "" + # TODO: with logger.config(stream=self.chat_stream): and add a `stream_start` flag to add timestamp for first message. if self.cfg.log_llm_chat_content: logger.info(f"{LogColors.CYAN}Response:{LogColors.END}") From 97a5598491688b1e3e00282060ddeea617bb9fe6 Mon Sep 17 00:00:00 2001 From: Young Date: Tue, 16 Jul 2024 10:44:09 +0000 Subject: [PATCH 12/14] Add example in docstring --- rdagent/log/logger.py | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/rdagent/log/logger.py b/rdagent/log/logger.py index 5e7329ac..2b9b3ead 100644 --- a/rdagent/log/logger.py +++ b/rdagent/log/logger.py @@ -17,6 +17,27 @@ class RDAgentLog(SingletonBaseClass): + """ + The files are organized based on the tag & PID + Here is an example tag + + .. code-block:: + + a + - b + - c + - 123 + - common_logs.log + - 1322 + - common_logs.log + - 1233 + - .pkl + - d + - 1233-673 ... + - 1233-4563 ... + - 1233-365 ... + + """ # TODO: Simplify it to introduce less concepts ( We may merge RDAgentLog, Storage &) # Solution: Storage => PipeLog, View => PipeLogView, RDAgentLog is an instance of PipeLogger # PipeLogger.info(...) , PipeLogger.get_resp() to get feedback from frontend. From 3ed6b42ffad47b5c61cf4d640da7edf3ff41590a Mon Sep 17 00:00:00 2001 From: bowen xian Date: Tue, 16 Jul 2024 10:50:16 +0000 Subject: [PATCH 13/14] add log tag for llm_utils.py --- rdagent/oai/llm_utils.py | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/rdagent/oai/llm_utils.py b/rdagent/oai/llm_utils.py index 9f742603..01ed453a 100644 --- a/rdagent/oai/llm_utils.py +++ b/rdagent/oai/llm_utils.py @@ -34,17 +34,17 @@ def md5_hash(input_string: str) -> str: try: from azure.identity import DefaultAzureCredential, get_bearer_token_provider except ImportError: - logger.warning("azure.identity is not installed.", tag="gpt") + logger.warning("azure.identity is not installed.") try: import openai except ImportError: - logger.warning("openai is not installed.", tag="gpt") + logger.warning("openai is not installed.") try: from llama import Llama except ImportError: - logger.warning("llama is not installed.", tag="gpt") + logger.warning("llama is not installed.") class ConvManager: @@ -203,7 +203,7 @@ def build_chat_completion(self, user_prompt: str, **kwargs: Any) -> str: """ messages = self.build_chat_completion_message(user_prompt) - with logger.tag(self.conversation_id): + with logger.tag(f"session_{self.conversation_id}"): response = self.api_backend._try_create_chat_completion_or_embedding( # noqa: SLF001 messages=messages, chat_completion=True, @@ -551,7 +551,7 @@ def _create_chat_completion_inner_function( # noqa: C901, PLR0912, PLR0915 add_json_in_prompt: bool = False, ) -> str: if self.cfg.log_llm_chat_content: - logger.info(self._build_log_messages(messages)) + logger.info(self._build_log_messages(messages), tag="llm_messages") # TODO: fail to use loguru adaptor due to stream response input_content_json = json.dumps(messages) input_content_json = ( @@ -580,7 +580,7 @@ def _create_chat_completion_inner_function( # noqa: C901, PLR0912, PLR0915 ) resp = response[0]["generation"]["content"] if self.cfg.log_llm_chat_content: - logger.info(f"{LogColors.CYAN}Response:{resp}{LogColors.END}") + logger.info(f"{LogColors.CYAN}Response:{resp}{LogColors.END}", tag="llm_messages") elif self.use_gcr_endpoint: body = str.encode( json.dumps( @@ -602,7 +602,7 @@ def _create_chat_completion_inner_function( # noqa: C901, PLR0912, PLR0915 response = urllib.request.urlopen(req) # noqa: S310 resp = json.loads(response.read().decode())["output"] if self.cfg.log_llm_chat_content: - logger.info(f"{LogColors.CYAN}Response:{resp}{LogColors.END}") + logger.info(f"{LogColors.CYAN}Response:{resp}{LogColors.END}", tag="llm_messages") else: if self.use_azure: if json_mode: @@ -645,7 +645,7 @@ def _create_chat_completion_inner_function( # noqa: C901, PLR0912, PLR0915 if self.chat_stream: resp = "" if self.cfg.log_llm_chat_content: - logger.info(f"{LogColors.CYAN}Response:{LogColors.END}") + logger.info(f"{LogColors.CYAN}Response:{LogColors.END}", tag="llm_messages") for chunk in response: content = ( @@ -654,19 +654,19 @@ def _create_chat_completion_inner_function( # noqa: C901, PLR0912, PLR0915 else "" ) if self.cfg.log_llm_chat_content: - logger.info(LogColors.CYAN + content + LogColors.END, raw=True) + logger.info(LogColors.CYAN + content + LogColors.END, raw=True, tag="llm_messages") resp += content if len(chunk.choices) > 0 and chunk.choices[0].finish_reason is not None: finish_reason = chunk.choices[0].finish_reason if self.cfg.log_llm_chat_content: - logger.info("\n", raw=True) + logger.info("\n", raw=True, tag="llm_messages") else: resp = response.choices[0].message.content finish_reason = response.choices[0].finish_reason if self.cfg.log_llm_chat_content: - logger.info(f"{LogColors.CYAN}Response:{resp}{LogColors.END}") + logger.info(f"{LogColors.CYAN}Response:{resp}{LogColors.END}", tag="llm_messages") if json_mode: json.loads(resp) if self.dump_chat_cache: From 1f8081f6e68e9d526f2eb3636490f086dee94f33 Mon Sep 17 00:00:00 2001 From: Young Date: Tue, 16 Jul 2024 12:34:59 +0000 Subject: [PATCH 14/14] Capture lost content --- rdagent/oai/llm_utils.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/rdagent/oai/llm_utils.py b/rdagent/oai/llm_utils.py index 4ad88c7b..35fb0fb5 100644 --- a/rdagent/oai/llm_utils.py +++ b/rdagent/oai/llm_utils.py @@ -562,6 +562,8 @@ def _create_chat_completion_inner_function( # noqa: C901, PLR0912, PLR0915 if self.use_chat_cache: cache_result = self.cache.chat_get(input_content_json) if cache_result is not None: + if self.cfg.log_llm_chat_content: + logger.info(f"{LogColors.CYAN}Response:{cache_result}{LogColors.END}", tag="llm_messages") return cache_result, None if temperature is None: