From 9eb403c646e7eab199c92db1805f59351c6f0423 Mon Sep 17 00:00:00 2001
From: bowen xian <xianbowen@outlook.com>
Date: Thu, 4 Jul 2024 12:09:25 +0000
Subject: [PATCH 01/14] remove ruff comment in log.py

---
 rdagent/core/log.py | 48 ---------------------------------------------
 1 file changed, 48 deletions(-)

diff --git a/rdagent/core/log.py b/rdagent/core/log.py
index 9c6e2b74..e789863c 100644
--- a/rdagent/core/log.py
+++ b/rdagent/core/log.py
@@ -43,27 +43,9 @@ def render(self, text: str, color: str = "", style: str = "") -> str:
         colors = self.get_all_colors()
         # Perhaps color and font should be distinguished here.
         if color and color in colors:
-            # Changes to accommodate ruff checks.
-            # Original code:
-            # raise ValueError(f"color should be in: {colors} but now is: {color}")
-            # Description of the problem:
-            # TRY003 Avoid specifying long messages outside the exception class
-            # EM102 Exception must not use an f-string literal, assign to variable first
-            # References:
-            # https://docs.astral.sh/ruff/rules/raise-vanilla-args/
-            # https://docs.astral.sh/ruff/rules/f-string-in-exception/
             error_message = f"color should be in: {colors} but now is: {color}"
             raise ValueError(error_message)
         if style and style in colors:
-            # Changes to accommodate ruff checks.
-            # Original code:
-            # raise ValueError(f"style should be in: {colors} but now is: {style}")
-            # Description of the problem:
-            # TRY003 Avoid specifying long messages outside the exception class
-            # EM102 Exception must not use an f-string literal, assign to variable first
-            # References:
-            # https://docs.astral.sh/ruff/rules/raise-vanilla-args/
-            # https://docs.astral.sh/ruff/rules/f-string-in-exception/
             error_message = f"style should be in: {colors} but now is: {style}"
             raise ValueError(error_message)
 
@@ -94,18 +76,6 @@ def __setstate__(self, _: str) -> None:
 
     def plain_info(self, *args: Sequence) -> None:
         for arg in args:
-            # Changes to accommodate ruff checks.
-            # Original code:
-            # self.logger.info(
-            #   f"""
-            #   {LogColors.YELLOW}{LogColors.BOLD}
-            #   Info:{LogColors.END}{LogColors.WHITE}{arg}{LogColors.END}
-            #   """,
-            # )
-            # Description of the problem:
-            # G004 Logging statement uses f-string
-            # References:
-            # https://docs.astral.sh/ruff/rules/logging-f-string/
             info = f"""
                 {LogColors.YELLOW}{LogColors.BOLD}
                 Info:{LogColors.END}{LogColors.WHITE}{arg}{LogColors.END}
@@ -114,28 +84,10 @@ def plain_info(self, *args: Sequence) -> None:
 
     def warning(self, *args: Sequence) -> None:
         for arg in args:
-            # Changes to accommodate ruff checks.
-            # Original code:
-            # self.logger.warning(
-            #   f"{LogColors.BLUE}{LogColors.BOLD}Warning:{LogColors.END}{arg}",
-            # )
-            # Description of the problem:
-            # G004 Logging statement uses f-string
-            # References:
-            # https://docs.astral.sh/ruff/rules/logging-f-string/
             info = f"{LogColors.BLUE}{LogColors.BOLD}Warning:{LogColors.END}{arg}"
             self.logger.warning(info)
 
     def error(self, *args: Sequence) -> None:
         for arg in args:
-            # Changes to accommodate ruff checks.
-            # Original code:
-            # self.logger.error(
-            #   f"{LogColors.RED}{LogColors.BOLD}Error:{LogColors.END}{arg}",
-            # )
-            # Description of the problem:
-            # G004 Logging statement uses f-string
-            # References:
-            # https://docs.astral.sh/ruff/rules/logging-f-string/
             info = f"{LogColors.RED}{LogColors.BOLD}Error:{LogColors.END}{arg}"
             self.logger.error(info)

From cce68ad81f7d487ea3aa9fe74efa2512c3bb260c Mon Sep 17 00:00:00 2001
From: bowen xian <xianbowen@outlook.com>
Date: Tue, 9 Jul 2024 04:04:42 +0000
Subject: [PATCH 02/14] change log framework and fix llm_utils.py's logs

---
 rdagent/core/conf.py     |   5 +-
 rdagent/core/log.py      | 220 +++++++++++++++++++++++++++++++++------
 rdagent/oai/llm_utils.py |  69 ++++++------
 3 files changed, 226 insertions(+), 68 deletions(-)

diff --git a/rdagent/core/conf.py b/rdagent/core/conf.py
index 120cb8f8..a926d54c 100644
--- a/rdagent/core/conf.py
+++ b/rdagent/core/conf.py
@@ -16,6 +16,10 @@
 
 class RDAgentSettings(BaseSettings):
     # TODO: (xiao) I think most of the config should be in oai.config
+    # Log configs
+    log_trace_path: str | None = None
+    log_llm_chat_content: bool = True
+
     use_azure: bool = True
     use_azure_token_provider: bool = False
     managed_identity_client_id: str | None = None
@@ -28,7 +32,6 @@ class RDAgentSettings(BaseSettings):
     prompt_cache_path: str = str(Path.cwd() / "prompt_cache.db")
     session_cache_folder_location: str = str(Path.cwd() / "session_cache_folder/")
     max_past_message_include: int = 10
-    log_llm_chat_content: bool = True
 
     # Chat configs
     chat_openai_api_key: str = ""
diff --git a/rdagent/core/log.py b/rdagent/core/log.py
index e789863c..5b2e5371 100644
--- a/rdagent/core/log.py
+++ b/rdagent/core/log.py
@@ -1,11 +1,138 @@
 from __future__ import annotations
 
-from typing import TYPE_CHECKING, Sequence
+import re
+import sys
+import pickle
+import json
+import inspect
+from typing import TYPE_CHECKING, Sequence, Literal
+
+if TYPE_CHECKING:
+    from loguru import Logger, Message, Record
 
 from loguru import logger
+from abc import abstractmethod
+from datetime import datetime, timezone
+from pathlib import Path
+from functools import partial
 
-if TYPE_CHECKING:
-    from loguru import Logger
+from rdagent.core.conf import RD_AGENT_SETTINGS
+from rdagent.core.utils import SingletonBaseClass
+
+
+def get_caller_info():
+    # Get the current stack information
+    stack = inspect.stack()
+    # The second element is usually the caller's information
+    caller_info = stack[2]
+    frame = caller_info[0]
+    info = {
+        'line': caller_info.lineno,
+        'name': frame.f_globals['__name__'],  # Get the module name from the frame's globals
+    }
+    return info
+
+
+class Storage:
+    """
+    Basic storage to support saving objects;
+
+    # Usage:
+
+    The storage has mainly two kind of users:
+    - The logging end: you can choose any of the following method to use the object
+        - We can use it directly with the native logging storage
+        - We can use it with other logging tools; For example, serve as a handler for loggers
+    - The view end:
+        - Mainly for the subclass of `logging.base.View`
+        - It should provide two kind of ways to provide content
+            - offline content provision.
+            - online content preovision.
+    """
+
+    @abstractmethod
+    def log(self, obj: object, name: str = "", **kwargs: dict) -> str | Path | None:
+        """
+
+        Parameters
+        ----------
+        obj : object
+            The object for logging.
+        name : str
+            The name of the object.  For example "a.b.c"
+            We may log a lot of objects to a same name
+        """
+        ...
+
+
+class View:
+    """
+    Motivation:
+
+    Display the content in the storage
+    """
+
+
+class FileStorage(Storage):
+    """
+    The info are logginged to the file systems
+
+    TODO: describe the storage format
+    """
+
+    def __init__(self, path: str = "./log/") -> None:
+        self.path = Path(path)
+        self.path.mkdir(parents=True, exist_ok=True)
+
+    def log(self,
+            obj: object,
+            name: str = "",
+            save_type: Literal["json", "text", "pkl", "short-text"] = "short-text",
+            timestamp: datetime | None = None,
+            split_name: bool = True,
+            ) -> Path:
+        if timestamp is None:
+            timestamp = datetime.now(timezone.utc)
+        else:
+            timestamp = timestamp.astimezone(timezone.utc)
+        
+        cur_p = self.path
+        if split_name:
+            uri_l = name.split(".")
+            for u in uri_l:
+                cur_p = cur_p / u
+        else:
+            cur_p = cur_p / name
+        cur_p.mkdir(parents=True, exist_ok=True)
+
+        path = cur_p / f"{timestamp.strftime('%Y-%m-%d_%H-%M-%S-%f')}.log"
+
+        if save_type == "json":
+            path = path.with_suffix(".json")
+            with path.open("w") as f:
+                try:
+                    json.dump(obj, f)
+                except TypeError:
+                    json.dump(json.loads(str(obj)), f)
+            return path
+        elif save_type == "pkl":
+            path = path.with_suffix(".pkl")
+            with path.open("wb") as f:
+                pickle.dump(obj, f)
+            return path
+        elif save_type == "text":
+            obj = str(obj)
+            with path.open("w") as f:
+                f.write(obj)
+            return path
+        else:
+            obj = str(obj).strip()
+            if obj == "":
+                return
+            path = cur_p / "common_logs.log"
+            with path.open("a") as f:
+                f.write(f"{timestamp.isoformat()}: {obj}\n\n") # add a new line to separate logs
+            return path
 
 
 class LogColors:
@@ -54,40 +181,67 @@ def render(self, text: str, color: str = "", style: str = "") -> str:
         return f"{style}{text}{self.END}"
 
 
-class RDAgentLog:
-    # logger.add(loguru_handler, level="INFO")   # you can add use storage as a loguru handler
+def loguru2storage_handler(storage: Storage, record: Message) -> None:
+    msg = f"{record.record['level']} | {record.record['name']}:{record.record['line']} - {RDAgentLog.remove_ansi_codes(record.record['message'])}"
+    storage.log(msg, timestamp=record.record["time"], save_type="short-text")
+
+
+class RDAgentLog(SingletonBaseClass):
+
+    def __init__(self, log_trace_path: str | None = RD_AGENT_SETTINGS.log_trace_path) -> None:
+        if log_trace_path is None:
+            timestamp = datetime.now(timezone.utc).strftime("%Y-%m-%d_%H-%M-%S-%f")
+            log_trace_path: Path = Path.cwd() / "log" / timestamp
+            log_trace_path.mkdir(parents=True, exist_ok=True)
+        
+        self.storage = FileStorage(log_trace_path)
+
+        # add handler to save log to storage
+        logger.add(partial(loguru2storage_handler, self.storage))
+
+        self.log_stream = self.LogStreamContextManager(self.storage)
+
+    @staticmethod
+    def remove_ansi_codes(s: str) -> str:
+        ansi_escape = re.compile(r'\x1B\[[0-?]*[ -/]*[@-~]')
+        return ansi_escape.sub('', s)
+
+    class LogStreamContextManager:
+        def __init__(self, storage: Storage) -> None:
+            self.captured_logs = []
+            self.storage = storage
+
+        def capture(self, message: Message) -> None:
+            self.captured_logs.append(message.record["message"])
+
+        def __enter__(self):
+            logger.remove()
+            logger.add(sys.stderr, format=lambda x: x["message"])
+            logger.add(self.capture)
 
-    def __init__(self) -> None:
-        self.logger: Logger = logger
+        def __exit__(self, exc_type, exc_value, traceback) -> None:
+            logger.info('\n')
+            logger.remove()
+            logger.add(partial(loguru2storage_handler, self.storage))
+            logger.info("[stream log] " + "".join(self.captured_logs))
+            logger.add(sys.stderr)
 
-    def info(self, *args: Sequence, plain: bool = False) -> None:
-        if plain:
-            return self.plain_info(*args)
-        for arg in args:
-            info = f"{LogColors.WHITE}{arg}{LogColors.END}"
-            self.logger.info(info)
-        return None
+    def log_objects(self, *objs: Sequence[object]) -> None:
+        caller_info = get_caller_info()
+        for obj in objs:
+            logp = self.storage.log(obj, name=f"{type(obj).__module__}.{type(obj).__name__}", save_type="pkl", split_name=False)
 
-    def __getstate__(self) -> dict:
-        return {}
+            logger.patch(lambda r: r.update(caller_info)).info(f"Logging object in {logp.absolute()}")
 
-    def __setstate__(self, _: str) -> None:
-        self.logger = logger
+    def info(self, msg: str) -> None:
+        caller_info = get_caller_info()
+        logger.patch(lambda r: r.update(caller_info)).info(msg)
 
-    def plain_info(self, *args: Sequence) -> None:
-        for arg in args:
-            info = f"""
-                {LogColors.YELLOW}{LogColors.BOLD}
-                Info:{LogColors.END}{LogColors.WHITE}{arg}{LogColors.END}
-            """
-            self.logger.info(info)
+    def warning(self, msg: str) -> None:
+        caller_info = get_caller_info()
+        logger.patch(lambda r: r.update(caller_info)).warning(msg)
 
-    def warning(self, *args: Sequence) -> None:
-        for arg in args:
-            info = f"{LogColors.BLUE}{LogColors.BOLD}Warning:{LogColors.END}{arg}"
-            self.logger.warning(info)
+    def error(self, msg: str) -> None:
+        caller_info = get_caller_info()
+        logger.patch(lambda r: r.update(caller_info)).error(msg)
 
-    def error(self, *args: Sequence) -> None:
-        for arg in args:
-            info = f"{LogColors.RED}{LogColors.BOLD}Error:{LogColors.END}{arg}"
-            self.logger.error(info)
diff --git a/rdagent/oai/llm_utils.py b/rdagent/oai/llm_utils.py
index 72ab564c..2c5abc56 100644
--- a/rdagent/oai/llm_utils.py
+++ b/rdagent/oai/llm_utils.py
@@ -24,6 +24,7 @@
 
 DEFAULT_QLIB_DOT_PATH = Path("./")
 
+logger: RDAgentLog = RDAgentLog()
 
 def md5_hash(input_string: str) -> str:
     hash_md5 = hashlib.md5(usedforsecurity=False)
@@ -35,17 +36,17 @@ def md5_hash(input_string: str) -> str:
 try:
     from azure.identity import DefaultAzureCredential, get_bearer_token_provider
 except ImportError:
-    RDAgentLog().warning("azure.identity is not installed.")
+    logger.warning("azure.identity is not installed.")
 
 try:
     import openai
 except ImportError:
-    RDAgentLog().warning("openai is not installed.")
+    logger.warning("openai is not installed.")
 
 try:
     from llama import Llama
 except ImportError:
-    RDAgentLog().warning("llama is not installed.")
+    logger.warning("llama is not installed.")
 
 
 class ConvManager:
@@ -150,11 +151,11 @@ def __init__(self) -> None:
         self.session_cache_location = Path(self.cfg.session_cache_folder_location)
         self.cache = {}
         if not self.session_cache_location.exists():
-            RDAgentLog().warning(f"Directory {self.session_cache_location} does not exist.")
+            logger.warning(f"Directory {self.session_cache_location} does not exist.")
             self.session_cache_location.mkdir(parents=True, exist_ok=True)
         json_files = [f for f in self.session_cache_location.iterdir() if f.suffix == ".json"]
         if not json_files:
-            RDAgentLog().info(f"No JSON files found in {self.session_cache_location}.")
+            logger.info(f"No JSON files found in {self.session_cache_location}.")
         for file_path in json_files:
             conversation_id = file_path.stem
             with file_path.open("r") as f:
@@ -478,8 +479,8 @@ def _try_create_chat_completion_or_embedding(
                 if chat_completion:
                     return self._create_chat_completion_auto_continue(**kwargs)
             except openai.BadRequestError as e:  # noqa: PERF203
-                RDAgentLog().warning(e)
-                RDAgentLog().warning(f"Retrying {i+1}th time...")
+                logger.warning(e)
+                logger.warning(f"Retrying {i+1}th time...")
                 if "'messages' must contain the word 'json' in some form" in e.message:
                     kwargs["add_json_in_prompt"] = True
                 elif embedding and "maximum context length" in e.message:
@@ -487,8 +488,8 @@ def _try_create_chat_completion_or_embedding(
                         content[: len(content) // 2] for content in kwargs.get("input_content_list", [])
                     ]
             except Exception as e:  # noqa: BLE001
-                RDAgentLog().warning(e)
-                RDAgentLog().warning(f"Retrying {i+1}th time...")
+                logger.warning(e)
+                logger.warning(f"Retrying {i+1}th time...")
                 time.sleep(self.retry_wait_seconds)
         error_message = f"Failed to create chat completion after {max_retry} retries."
         raise RuntimeError(error_message)
@@ -526,7 +527,7 @@ def _create_embedding_inner_function(
                 self.cache.embedding_set(content_to_embedding_dict)
         return [content_to_embedding_dict[content] for content in input_content_list]
 
-    def _build_messages(self, messages: list[dict]) -> str:
+    def _build_log_messages(self, messages: list[dict]) -> str:
         log_messages = ""
         for m in messages:
             log_messages += (
@@ -537,16 +538,12 @@ def _build_messages(self, messages: list[dict]) -> str:
             )
         return log_messages
 
-    def log_messages(self, messages: list[dict]) -> None:
-        if self.cfg.log_llm_chat_content:
-            RDAgentLog().info(self._build_messages(messages))
-
     def log_response(self, response: str | None = None, *, stream: bool = False) -> None:
         if self.cfg.log_llm_chat_content:
             if stream:
-                RDAgentLog().info(f"\n{LogColors.CYAN}Response:{LogColors.END}")
+                logger.info(f"\n{LogColors.CYAN}Response:{LogColors.END}")
             else:
-                RDAgentLog().info(f"\n{LogColors.CYAN}Response:{response}{LogColors.END}")
+                logger.info(f"\n{LogColors.CYAN}Response:{response}{LogColors.END}")
 
     def _create_chat_completion_inner_function(  # noqa: C901, PLR0912, PLR0915
         self,
@@ -560,7 +557,8 @@ def _create_chat_completion_inner_function(  # noqa: C901, PLR0912, PLR0915
         json_mode: bool = False,
         add_json_in_prompt: bool = False,
     ) -> str:
-        self.log_messages(messages)
+        if self.cfg.log_llm_chat_content:
+            logger.info(self._build_log_messages(messages))
         # TODO: fail to use loguru adaptor due to stream response
         input_content_json = json.dumps(messages)
         input_content_json = (
@@ -588,7 +586,8 @@ def _create_chat_completion_inner_function(  # noqa: C901, PLR0912, PLR0915
                 temperature=temperature,
             )
             resp = response[0]["generation"]["content"]
-            self.log_response(resp)
+            if self.cfg.log_llm_chat_content:
+                logger.info(f"{LogColors.CYAN}Response:{resp}{LogColors.END}")
         elif self.use_gcr_endpoint:
             body = str.encode(
                 json.dumps(
@@ -609,7 +608,8 @@ def _create_chat_completion_inner_function(  # noqa: C901, PLR0912, PLR0915
             req = urllib.request.Request(self.gcr_endpoint, body, self.headers)  # noqa: S310
             response = urllib.request.urlopen(req)  # noqa: S310
             resp = json.loads(response.read().decode())["output"]
-            self.log_response(resp)
+            if self.cfg.log_llm_chat_content:
+                logger.info(f"{LogColors.CYAN}Response:{resp}{LogColors.END}")
         else:
             if self.use_azure:
                 if json_mode:
@@ -650,33 +650,34 @@ def _create_chat_completion_inner_function(  # noqa: C901, PLR0912, PLR0915
                     presence_penalty=presence_penalty,
                 )
             if self.chat_stream:
-                self.log_response(stream=True)
                 resp = ""
-                for chunk in response:
-                    content = (
-                        chunk.choices[0].delta.content
-                        if len(chunk.choices) > 0 and chunk.choices[0].delta.content is not None
-                        else ""
-                    )
-                    if self.cfg.log_llm_chat_content:
-                        print(LogColors.CYAN + content, end="")
-                    resp += content
-                    if len(chunk.choices) > 0 and chunk.choices[0].finish_reason is not None:
-                        finish_reason = chunk.choices[0].finish_reason
+                with logger.log_stream:
+                    logger.info(f"{LogColors.CYAN}Response:{LogColors.END}")
+                    for chunk in response:
+                        content = (
+                            chunk.choices[0].delta.content
+                            if len(chunk.choices) > 0 and chunk.choices[0].delta.content is not None
+                            else ""
+                        )
+                        if self.cfg.log_llm_chat_content:
+                            logger.info(LogColors.CYAN + content + LogColors.END)
+                        resp += content
+                        if len(chunk.choices) > 0 and chunk.choices[0].finish_reason is not None:
+                            finish_reason = chunk.choices[0].finish_reason
             else:
                 resp = response.choices[0].message.content
                 finish_reason = response.choices[0].finish_reason
-                self.log_response(resp)
+                if self.cfg.log_llm_chat_content:
+                    logger.info(f"{LogColors.CYAN}Response:{resp}{LogColors.END}")
             if json_mode:
                 json.loads(resp)
         if self.dump_chat_cache:
             self.cache.chat_set(input_content_json, resp)
-        # TODO: fail to use loguru adaptor due to stream response
         return resp, finish_reason
 
     def calculate_token_from_messages(self, messages: list[dict]) -> int:
         if self.use_llama2 or self.use_gcr_endpoint:
-            RDAgentLog().warning("num_tokens_from_messages() is not implemented for model llama2.")
+            logger.warning("num_tokens_from_messages() is not implemented for model llama2.")
             return 0  # TODO implement this function for llama2
 
         if "gpt4" in self.chat_model or "gpt-4" in self.chat_model:

From d2c1b95cbd47767fd785dfb68dcf4fa11654a53d Mon Sep 17 00:00:00 2001
From: Young <afe.young@gmail.com>
Date: Tue, 9 Jul 2024 06:01:36 +0000
Subject: [PATCH 03/14] Some thoughts for logging

---
 rdagent/core/log.py        | 13 +++++++++++
 rdagent/log/__init__.py    |  3 +++
 rdagent/log/ui/__init__.py | 47 ++++++++++++++++++++++++++++++++++++++
 3 files changed, 63 insertions(+)
 create mode 100644 rdagent/log/__init__.py
 create mode 100644 rdagent/log/ui/__init__.py

diff --git a/rdagent/core/log.py b/rdagent/core/log.py
index 5b2e5371..4dd1f8c7 100644
--- a/rdagent/core/log.py
+++ b/rdagent/core/log.py
@@ -71,6 +71,19 @@ class View:
 
     Display the content in the storage
     """
+    # TODO: pleas fix me
+    @abstractmethod
+    def display(s: Storage, watch: bool=False):
+        """
+
+        Parameters
+        ----------
+        s : Storage
+            
+        watch : bool
+            should we watch the new content and display them
+        """
+        ...
 
 
 class FileStorage(Storage):
diff --git a/rdagent/log/__init__.py b/rdagent/log/__init__.py
new file mode 100644
index 00000000..250e32d4
--- /dev/null
+++ b/rdagent/log/__init__.py
@@ -0,0 +1,3 @@
+"""
+Here is a more reasonable place
+"""
diff --git a/rdagent/log/ui/__init__.py b/rdagent/log/ui/__init__.py
new file mode 100644
index 00000000..69f018ad
--- /dev/null
+++ b/rdagent/log/ui/__init__.py
@@ -0,0 +1,47 @@
+"""
+UI is a kind of view for user.
+
+We are not sure how generality of the UI, we can't make decision among following options:
+- in general folder like rdagent/log/ui
+- It is for specific scenario rdagent/scenarios/
+"""
+
+
+from rdagent.core.log import View
+
+
+class WebView(View):
+    r"""
+
+    We have tree structure for sequence
+
+    session
+    |      \
+    ... defined by user ...
+    |                              \
+    info1 -> info2 -> ... -> info3 -> ...  overtime.
+
+    Some design principles:
+        session1.module(e.g. implement).
+        `s.log(a.b.1.c) s.log(a.b.2.c)` should not handed over to users.
+
+    An display example:
+
+        W1 write factor
+        W2 evaluate factor
+        W3 backtest
+
+        W123
+            R
+            RX
+            RXX
+            RX
+
+        W4
+            trace r1 r2 r3 r4
+
+    What to do next?
+    1. Data structure
+    2. Map path like `a.b.c` to frontend components
+    3. Display logic
+    """

From 0cb7b597dcfce8cb9bdac8618d5910dd6a945053 Mon Sep 17 00:00:00 2001
From: bowen xian <xianbowen@outlook.com>
Date: Wed, 10 Jul 2024 15:14:28 +0000
Subject: [PATCH 04/14] fix SingletonMeta's definition, maintain an instance
 dict for each class that inherits it

---
 rdagent/core/utils.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/rdagent/core/utils.py b/rdagent/core/utils.py
index e72b17eb..9688e84a 100644
--- a/rdagent/core/utils.py
+++ b/rdagent/core/utils.py
@@ -19,7 +19,10 @@ class RDAgentException(Exception):  # noqa: N818
 
 
 class SingletonMeta(type):
-    _instance_dict: ClassVar[dict] = {}
+    # _instance_dict: ClassVar[dict] = {}
+    def __init__(cls, *args, **kwargs):
+        cls._instance_dict = {}  # 为每个类创建一个唯一的_instance_dict字典
+        super().__init__(*args, **kwargs)
 
     def __call__(cls, *args: Any, **kwargs: Any) -> Any:
         # Since it's hard to align the difference call using args and kwargs, we strictly ask to use kwargs in Singleton

From f32d85b9d4d74d9aa83593aa2f7d32ae2e309155 Mon Sep 17 00:00:00 2001
From: bowen xian <xianbowen@outlook.com>
Date: Tue, 16 Jul 2024 04:59:45 +0000
Subject: [PATCH 05/14] adjust log codes directory, add some tag for factor
 implementation logging

---
 .env.example                                  |   1 +
 .../factor_extract_and_implement.py           |  11 +-
 .../knowledge_management/vector_base.py       |   7 +-
 .../evolving/evaluators.py                    |   9 +-
 .../evolving/evolvable_subjects.py            |   4 +-
 .../evolving/knowledge_management.py          |   7 +-
 .../evolving/scheduler.py                     |   4 +-
 .../factor_implementation/factor.py           |   4 +-
 .../model_implementation/task_loader.py       |   5 +-
 rdagent/core/log.py                           | 260 ------------------
 rdagent/log/__init__.py                       |   7 +-
 rdagent/log/base.py                           |  62 +++++
 rdagent/log/logger.py                         | 122 ++++++++
 rdagent/log/storage.py                        |  53 ++++
 rdagent/log/ui/__init__.py                    |  40 ---
 rdagent/log/ui/web.py                         |  59 ++++
 rdagent/log/utils.py                          |  67 +++++
 rdagent/oai/llm_utils.py                      |  51 ++--
 .../factor_experiment_loader/pdf_loader.py    |  14 +-
 19 files changed, 430 insertions(+), 357 deletions(-)
 delete mode 100644 rdagent/core/log.py
 create mode 100644 rdagent/log/base.py
 create mode 100644 rdagent/log/logger.py
 create mode 100644 rdagent/log/storage.py
 create mode 100644 rdagent/log/ui/web.py
 create mode 100644 rdagent/log/utils.py

diff --git a/.env.example b/.env.example
index b7244697..8b504ad1 100644
--- a/.env.example
+++ b/.env.example
@@ -9,6 +9,7 @@ USE_EMBEDDING_CACHE=True
 LOG_LLM_CHAT_CONTENT=False
 CHAT_FREQUENCY_PENALTY=0.0
 CHAT_PRESENCE_PENALTY=0.0
+LOG_TRACE_PATH=log_traces
 
 # embedding model configs:
 EMBEDDING_OPENAI_API_KEY=your_api_key
diff --git a/rdagent/app/factor_extraction_and_implementation/factor_extract_and_implement.py b/rdagent/app/factor_extraction_and_implementation/factor_extract_and_implement.py
index 830e8216..418014fb 100644
--- a/rdagent/app/factor_extraction_and_implementation/factor_extract_and_implement.py
+++ b/rdagent/app/factor_extraction_and_implementation/factor_extract_and_implement.py
@@ -7,16 +7,23 @@
 from rdagent.scenarios.qlib.factor_task_implementation import (
     COSTEERFG_QUANT_FACTOR_IMPLEMENTATION,
 )
+from rdagent.log import rdagent_logger as logger
 
 assert load_dotenv()
 
 
 def extract_factors_and_implement(report_file_path: str) -> None:
-    factor_tasks = FactorImplementationExperimentLoaderFromPDFfiles().load(report_file_path)
-    implementation_result = COSTEERFG_QUANT_FACTOR_IMPLEMENTATION().generate(factor_tasks)
+    with logger.tag('factor'):
+        with logger.tag('load'):
+            factor_tasks = FactorImplementationExperimentLoaderFromPDFfiles().load(report_file_path)
+        with logger.tag('implementation'):
+            implementation_result = COSTEERFG_QUANT_FACTOR_IMPLEMENTATION().generate(factor_tasks)
+            logger.log_object(implementation_result, tag="results")
     # Qlib to run the implementation
     return implementation_result
 
 
 if __name__ == "__main__":
     extract_factors_and_implement("/home/xuyang1/workspace/report.pdf")
+
+# %%
diff --git a/rdagent/components/knowledge_management/vector_base.py b/rdagent/components/knowledge_management/vector_base.py
index 9322a510..17ca514b 100644
--- a/rdagent/components/knowledge_management/vector_base.py
+++ b/rdagent/components/knowledge_management/vector_base.py
@@ -5,10 +5,9 @@
 import pandas as pd
 from scipy.spatial.distance import cosine
 
-from rdagent.core.log import RDAgentLog
+from rdagent.log import rdagent_logger as logger
 from rdagent.oai.llm_utils import APIBackend
 
-
 class KnowledgeMetaData:
     def __init__(self, content: str = "", label: str = None, embedding=None, identity=None):
         self.label = label
@@ -127,7 +126,7 @@ def __init__(self, vector_df_path: Union[str, Path] = None):
         else:
             self.vector_df = pd.DataFrame(columns=["id", "label", "content", "embedding"])
 
-        RDAgentLog().info(f"VectorBase loaded, shape={self.vector_df.shape}")
+        logger.info(f"VectorBase loaded, shape={self.vector_df.shape}")
 
     def shape(self):
         return self.vector_df.shape
@@ -205,4 +204,4 @@ def load(self, vector_df_path, **kwargs):
 
     def save(self, vector_df_path, **kwargs):
         self.vector_df.to_pickle(vector_df_path)
-        RDAgentLog().info(f"Save vectorBase vector_df to: {vector_df_path}")
+        logger.info(f"Save vectorBase vector_df to: {vector_df_path}")
diff --git a/rdagent/components/task_implementation/factor_implementation/evolving/evaluators.py b/rdagent/components/task_implementation/factor_implementation/evolving/evaluators.py
index 19289031..e86985b6 100644
--- a/rdagent/components/task_implementation/factor_implementation/evolving/evaluators.py
+++ b/rdagent/components/task_implementation/factor_implementation/evolving/evaluators.py
@@ -20,14 +20,13 @@
 from rdagent.core.evaluation import Evaluator
 from rdagent.core.evolving_framework import Feedback, QueriedKnowledge
 from rdagent.core.experiment import Implementation
-from rdagent.core.log import RDAgentLog
+from rdagent.log import rdagent_logger as logger
 from rdagent.core.prompts import Prompts
 from rdagent.core.utils import multiprocessing_wrapper
 from rdagent.oai.llm_utils import APIBackend
 
 evaluate_prompts = Prompts(file_path=Path(__file__).parent.parent / "prompts.yaml")
 
-
 class FactorImplementationEvaluator(Evaluator):
     # TODO:
     # I think we should have unified interface for all evaluates, for examples.
@@ -457,7 +456,7 @@ def evaluate(
                         )
 
                 except Exception as e:
-                    RDAgentLog().warning(f"Error occurred when calculating the correlation: {str(e)}")
+                    logger.warning(f"Error occurred when calculating the correlation: {str(e)}")
                     conclusions.append(
                         f"Some error occurred when calculating the correlation. Investigate the factors that might be causing the discrepancies and ensure that the logic of the factor calculation is consistent. Error: {e}",
                     )
@@ -661,7 +660,7 @@ def evaluate(
                         value_decision,
                     ) = self.value_evaluator.evaluate(source_df=source_df, gt_df=gt_df)
                 except Exception as e:
-                    RDAgentLog().warning("Value evaluation failed with exception: %s", e)
+                    logger.warning("Value evaluation failed with exception: %s", e)
                     factor_feedback.factor_value_feedback = "Value evaluation failed."
                     value_decision = False
 
@@ -745,6 +744,6 @@ def evaluate(
             None if single_feedback is None else single_feedback.final_decision
             for single_feedback in multi_implementation_feedback
         ]
-        RDAgentLog().info(f"Final decisions: {final_decision} True count: {final_decision.count(True)}")
+        logger.info(f"Final decisions: {final_decision} True count: {final_decision.count(True)}")
 
         return multi_implementation_feedback
diff --git a/rdagent/components/task_implementation/factor_implementation/evolving/evolvable_subjects.py b/rdagent/components/task_implementation/factor_implementation/evolving/evolvable_subjects.py
index a79d96a6..ae10aa7f 100644
--- a/rdagent/components/task_implementation/factor_implementation/evolving/evolvable_subjects.py
+++ b/rdagent/components/task_implementation/factor_implementation/evolving/evolvable_subjects.py
@@ -4,7 +4,7 @@
     FileBasedFactorImplementation,
 )
 from rdagent.core.evolving_framework import EvolvableSubjects
-from rdagent.core.log import RDAgentLog
+from rdagent.log import rdagent_logger as logger
 
 
 class FactorEvolvingItem(FactorExperiment, EvolvableSubjects):
@@ -23,7 +23,7 @@ def __init__(
             sub_gt_implementations,
         ) != len(self.sub_tasks):
             self.sub_gt_implementations = None
-            RDAgentLog().warning(
+            logger.warning(
                 "The length of sub_gt_implementations is not equal to the length of sub_tasks, set sub_gt_implementations to None",
             )
         else:
diff --git a/rdagent/components/task_implementation/factor_implementation/evolving/knowledge_management.py b/rdagent/components/task_implementation/factor_implementation/evolving/knowledge_management.py
index 400be6ef..528a57eb 100644
--- a/rdagent/components/task_implementation/factor_implementation/evolving/knowledge_management.py
+++ b/rdagent/components/task_implementation/factor_implementation/evolving/knowledge_management.py
@@ -32,14 +32,13 @@
     RAGStrategy,
 )
 from rdagent.core.experiment import Implementation
-from rdagent.core.log import RDAgentLog
+from rdagent.log import rdagent_logger as logger
 from rdagent.core.prompts import Prompts
 from rdagent.oai.llm_utils import (
     APIBackend,
     calculate_embedding_distance_between_str_list,
 )
 
-
 class FactorImplementationKnowledge(Knowledge):
     def __init__(
         self,
@@ -340,7 +339,7 @@ def analyze_component(
             )["component_no_list"]
             return [all_component_nodes[index - 1] for index in sorted(list(set(component_no_list)))]
         except:
-            RDAgentLog().warning("Error when analyzing components.")
+            logger.warning("Error when analyzing components.")
             analyze_component_user_prompt = "Your response is not a valid component index list."
 
         return []
@@ -718,7 +717,7 @@ def __init__(self, init_component_list=None) -> None:
         Load knowledge, offer brief information of knowledge and common handle interfaces
         """
         self.graph: UndirectedGraph = UndirectedGraph.load(Path.cwd() / "graph.pkl")
-        RDAgentLog().info(f"Knowledge Graph loaded, size={self.graph.size()}")
+        logger.info(f"Knowledge Graph loaded, size={self.graph.size()}")
 
         if init_component_list:
             for component in init_component_list:
diff --git a/rdagent/components/task_implementation/factor_implementation/evolving/scheduler.py b/rdagent/components/task_implementation/factor_implementation/evolving/scheduler.py
index ded011ee..36f38be6 100644
--- a/rdagent/components/task_implementation/factor_implementation/evolving/scheduler.py
+++ b/rdagent/components/task_implementation/factor_implementation/evolving/scheduler.py
@@ -10,7 +10,7 @@
     get_data_folder_intro,
 )
 from rdagent.core.conf import RD_AGENT_SETTINGS
-from rdagent.core.log import RDAgentLog
+from rdagent.log import rdagent_logger as logger
 from rdagent.core.prompts import Prompts
 from rdagent.oai.llm_utils import APIBackend
 
@@ -25,7 +25,7 @@ def RandomSelect(to_be_finished_task_index, implementation_factors_per_round):
         implementation_factors_per_round,
     )
 
-    RDAgentLog().info(f"The random selection is: {to_be_finished_task_index}")
+    logger.info(f"The random selection is: {to_be_finished_task_index}")
     return to_be_finished_task_index
 
 
diff --git a/rdagent/components/task_implementation/factor_implementation/factor.py b/rdagent/components/task_implementation/factor_implementation/factor.py
index 91232693..4def1393 100644
--- a/rdagent/components/task_implementation/factor_implementation/factor.py
+++ b/rdagent/components/task_implementation/factor_implementation/factor.py
@@ -18,7 +18,7 @@
     RuntimeErrorException,
 )
 from rdagent.core.experiment import Experiment, FBImplementation, Task
-from rdagent.core.log import RDAgentLog
+from rdagent.log import rdagent_logger as logger
 from rdagent.oai.llm_utils import md5_hash
 
 
@@ -77,7 +77,7 @@ def __init__(
         super().__init__(target_task)
         self.code = code
         self.executed_factor_value_dataframe = executed_factor_value_dataframe
-        self.logger = RDAgentLog()
+        self.logger = logger
         self.raise_exception = raise_exception
         self.workspace_path = Path(
             FACTOR_IMPLEMENT_SETTINGS.file_based_execution_workspace,
diff --git a/rdagent/components/task_implementation/model_implementation/task_loader.py b/rdagent/components/task_implementation/model_implementation/task_loader.py
index f11a1f33..8da65f6a 100644
--- a/rdagent/components/task_implementation/model_implementation/task_loader.py
+++ b/rdagent/components/task_implementation/model_implementation/task_loader.py
@@ -13,13 +13,12 @@
     ModelImplementationTaskLoaderFromDict,
     ModelTask,
 )
-from rdagent.core.log import RDAgentLog
+from rdagent.log import rdagent_logger as logger
 from rdagent.core.prompts import Prompts
 from rdagent.oai.llm_utils import APIBackend
 
 document_process_prompts = Prompts(file_path=Path(__file__).parent / "prompts.yaml")
 
-
 def extract_model_from_doc(doc_content: str) -> dict:
     """
     Extract model information from document content.
@@ -66,7 +65,7 @@ def extract_model_from_doc(doc_content: str) -> dict:
             else:
                 break
 
-    RDAgentLog().info(f"已经完成{len(model_dict)}个模型的提取")
+    logger.info(f"已经完成{len(model_dict)}个模型的提取")
 
     return model_dict
 
diff --git a/rdagent/core/log.py b/rdagent/core/log.py
deleted file mode 100644
index 4dd1f8c7..00000000
--- a/rdagent/core/log.py
+++ /dev/null
@@ -1,260 +0,0 @@
-from __future__ import annotations
-
-import re
-import sys
-import pickle
-import json
-import inspect
-from typing import TYPE_CHECKING, Sequence, Literal
-
-if TYPE_CHECKING:
-    from loguru import Logger, Message, Record
-
-from loguru import logger
-from abc import abstractmethod
-from datetime import datetime, timezone
-from pathlib import Path
-from functools import partial
-
-from rdagent.core.conf import RD_AGENT_SETTINGS
-from rdagent.core.utils import SingletonBaseClass
-
-
-def get_caller_info():
-    # Get the current stack information
-    stack = inspect.stack()
-    # The second element is usually the caller's information
-    caller_info = stack[2]
-    frame = caller_info[0]
-    info = {
-        'line': caller_info.lineno,
-        'name': frame.f_globals['__name__'],  # Get the module name from the frame's globals
-    }
-    return info
-
-
-class Storage:
-    """
-    Basic storage to support saving objects;
-
-    # Usage:
-
-    The storage has mainly two kind of users:
-    - The logging end: you can choose any of the following method to use the object
-        - We can use it directly with the native logging storage
-        - We can use it with other logging tools; For example, serve as a handler for loggers
-    - The view end:
-        - Mainly for the subclass of `logging.base.View`
-        - It should provide two kind of ways to provide content
-            - offline content provision.
-            - online content preovision.
-    """
-
-    @abstractmethod
-    def log(self, obj: object, name: str = "", **kwargs: dict) -> str | Path | None:
-        """
-
-        Parameters
-        ----------
-        obj : object
-            The object for logging.
-        name : str
-            The name of the object.  For example "a.b.c"
-            We may log a lot of objects to a same name
-        """
-        ...
-
-
-class View:
-    """
-    Motivation:
-
-    Display the content in the storage
-    """
-    # TODO: pleas fix me
-    @abstractmethod
-    def display(s: Storage, watch: bool=False):
-        """
-
-        Parameters
-        ----------
-        s : Storage
-            
-        watch : bool
-            should we watch the new content and display them
-        """
-        ...
-
-
-class FileStorage(Storage):
-    """
-    The info are logginged to the file systems
-
-    TODO: describe the storage format
-    """
-
-    def __init__(self, path: str = "./log/") -> None:
-        self.path = Path(path)
-        self.path.mkdir(parents=True, exist_ok=True)
-
-    def log(self,
-            obj: object,
-            name: str = "",
-            save_type: Literal["json", "text", "pkl", "short-text"] = "short-text",
-            timestamp: datetime | None = None,
-            split_name: bool = True,
-            ) -> Path:
-        if timestamp is None:
-            timestamp = datetime.now(timezone.utc)
-        else:
-            timestamp = timestamp.astimezone(timezone.utc)
-        
-        cur_p = self.path
-        if split_name:
-            uri_l = name.split(".")
-            for u in uri_l:
-                cur_p = cur_p / u
-        else:
-            cur_p = cur_p / name
-        cur_p.mkdir(parents=True, exist_ok=True)
-
-        path = cur_p / f"{timestamp.strftime('%Y-%m-%d_%H-%M-%S-%f')}.log"
-
-        if save_type == "json":
-            path = path.with_suffix(".json")
-            with path.open("w") as f:
-                try:
-                    json.dump(obj, f)
-                except TypeError:
-                    json.dump(json.loads(str(obj)), f)
-            return path
-        elif save_type == "pkl":
-            path = path.with_suffix(".pkl")
-            with path.open("wb") as f:
-                pickle.dump(obj, f)
-            return path
-        elif save_type == "text":
-            obj = str(obj)
-            with path.open("w") as f:
-                f.write(obj)
-            return path
-        else:
-            obj = str(obj).strip()
-            if obj == "":
-                return
-            path = cur_p / "common_logs.log"
-            with path.open("a") as f:
-                f.write(f"{timestamp.isoformat()}: {obj}\n\n") # add a new line to separate logs
-            return path
-
-
-class LogColors:
-    """
-    ANSI color codes for use in console output.
-    """
-
-    RED = "\033[91m"
-    GREEN = "\033[92m"
-    YELLOW = "\033[93m"
-    BLUE = "\033[94m"
-    MAGENTA = "\033[95m"
-    CYAN = "\033[96m"
-    WHITE = "\033[97m"
-    GRAY = "\033[90m"
-    BLACK = "\033[30m"
-
-    BOLD = "\033[1m"
-    ITALIC = "\033[3m"
-
-    END = "\033[0m"
-
-    @classmethod
-    def get_all_colors(cls: type[LogColors]) -> list:
-        names = dir(cls)
-        names = [name for name in names if not name.startswith("__") and not callable(getattr(cls, name))]
-        return [getattr(cls, name) for name in names]
-
-    def render(self, text: str, color: str = "", style: str = "") -> str:
-        """
-        render text by input color and style.
-        It's not recommend that input text is already rendered.
-        """
-        # This method is called too frequently, which is not good.
-        colors = self.get_all_colors()
-        # Perhaps color and font should be distinguished here.
-        if color and color in colors:
-            error_message = f"color should be in: {colors} but now is: {color}"
-            raise ValueError(error_message)
-        if style and style in colors:
-            error_message = f"style should be in: {colors} but now is: {style}"
-            raise ValueError(error_message)
-
-        text = f"{color}{text}{self.END}"
-
-        return f"{style}{text}{self.END}"
-
-
-def loguru2storage_handler(storage: Storage, record: Message) -> None:
-    msg = f"{record.record['level']} | {record.record['name']}:{record.record['line']} - {RDAgentLog.remove_ansi_codes(record.record['message'])}"
-    storage.log(msg, timestamp=record.record["time"], save_type="short-text")
-
-
-class RDAgentLog(SingletonBaseClass):
-
-    def __init__(self, log_trace_path: str | None = RD_AGENT_SETTINGS.log_trace_path) -> None:
-        if log_trace_path is None:
-            timestamp = datetime.now(timezone.utc).strftime("%Y-%m-%d_%H-%M-%S-%f")
-            log_trace_path: Path = Path.cwd() / "log" / timestamp
-            log_trace_path.mkdir(parents=True, exist_ok=True)
-        
-        self.storage = FileStorage(log_trace_path)
-
-        # add handler to save log to storage
-        logger.add(partial(loguru2storage_handler, self.storage))
-
-        self.log_stream = self.LogStreamContextManager(self.storage)
-
-    @staticmethod
-    def remove_ansi_codes(s: str) -> str:
-        ansi_escape = re.compile(r'\x1B\[[0-?]*[ -/]*[@-~]')
-        return ansi_escape.sub('', s)
-
-    class LogStreamContextManager:
-        def __init__(self, storage: Storage) -> None:
-            self.captured_logs = []
-            self.storage = storage
-
-        def capture(self, message: Message) -> None:
-            self.captured_logs.append(message.record["message"])
-
-        def __enter__(self):
-            logger.remove()
-            logger.add(sys.stderr, format=lambda x: x["message"])
-            logger.add(self.capture)
-
-        def __exit__(self, exc_type, exc_value, traceback) -> None:
-            logger.info('\n')
-            logger.remove()
-            logger.add(partial(loguru2storage_handler, self.storage))
-            logger.info("[stream log] " + "".join(self.captured_logs))
-            logger.add(sys.stderr)
-
-    def log_objects(self, *objs: Sequence[object]) -> None:
-        caller_info = get_caller_info()
-        for obj in objs:
-            logp = self.storage.log(obj, name=f"{type(obj).__module__}.{type(obj).__name__}", save_type="pkl", split_name=False)
-
-            logger.patch(lambda r: r.update(caller_info)).info(f"Logging object in {logp.absolute()}")
-
-    def info(self, msg: str) -> None:
-        caller_info = get_caller_info()
-        logger.patch(lambda r: r.update(caller_info)).info(msg)
-
-    def warning(self, msg: str) -> None:
-        caller_info = get_caller_info()
-        logger.patch(lambda r: r.update(caller_info)).warning(msg)
-
-    def error(self, msg: str) -> None:
-        caller_info = get_caller_info()
-        logger.patch(lambda r: r.update(caller_info)).error(msg)
-
diff --git a/rdagent/log/__init__.py b/rdagent/log/__init__.py
index 250e32d4..ff6f158d 100644
--- a/rdagent/log/__init__.py
+++ b/rdagent/log/__init__.py
@@ -1,3 +1,4 @@
-"""
-Here is a more reasonable place
-"""
+from .logger import RDAgentLog
+from .utils import LogColors
+
+rdagent_logger: RDAgentLog = RDAgentLog()
\ No newline at end of file
diff --git a/rdagent/log/base.py b/rdagent/log/base.py
new file mode 100644
index 00000000..d99c6a81
--- /dev/null
+++ b/rdagent/log/base.py
@@ -0,0 +1,62 @@
+from __future__ import annotations
+
+from abc import abstractmethod
+from pathlib import Path
+
+
+class Storage:
+    """
+    Basic storage to support saving objects;
+
+    # Usage:
+
+    The storage has mainly two kind of users:
+    - The logging end: you can choose any of the following method to use the object
+        - We can use it directly with the native logging storage
+        - We can use it with other logging tools; For example, serve as a handler for loggers
+    - The view end:
+        - Mainly for the subclass of `logging.base.View`
+        - It should provide two kind of ways to provide content
+            - offline content provision.
+            - online content preovision.
+    """
+
+    @abstractmethod
+    def log(self, obj: object, name: str = "", **kwargs: dict) -> str | Path:
+        """
+
+        Parameters
+        ----------
+        obj : object
+            The object for logging.
+        name : str
+            The name of the object.  For example "a.b.c"
+            We may log a lot of objects to a same name
+
+        Returns
+        -------
+        str | Path
+            The storage identifier of the object.
+        """
+        ...
+
+
+class View:
+    """
+    Motivation:
+
+    Display the content in the storage
+    """
+    # TODO: pleas fix me
+    @abstractmethod
+    def display(s: Storage, watch: bool=False):
+        """
+
+        Parameters
+        ----------
+        s : Storage
+            
+        watch : bool
+            should we watch the new content and display them
+        """
+        ...
\ No newline at end of file
diff --git a/rdagent/log/logger.py b/rdagent/log/logger.py
new file mode 100644
index 00000000..566acf4f
--- /dev/null
+++ b/rdagent/log/logger.py
@@ -0,0 +1,122 @@
+import sys
+import os
+
+from loguru import logger
+from rdagent.core.utils import SingletonBaseClass
+from rdagent.core.conf import RD_AGENT_SETTINGS
+from pathlib import Path
+from psutil import Process
+from datetime import datetime, timezone
+from functools import partial
+from contextlib import contextmanager
+from multiprocessing import Pipe
+from multiprocessing.connection import Connection
+
+from .storage import FileStorage
+from .utils import get_caller_info, remove_ansi_codes
+
+
+class RDAgentLog(SingletonBaseClass):
+    _tag: str = ""
+
+    def __init__(self, log_trace_path: str | None = RD_AGENT_SETTINGS.log_trace_path) -> None:
+        
+        if log_trace_path is None:
+            timestamp = datetime.now(timezone.utc).strftime("%Y-%m-%d_%H-%M-%S-%f")
+            log_trace_path: Path = Path.cwd() / "log" / timestamp
+        
+        self.log_trace_path = Path(log_trace_path)
+        self.log_trace_path.mkdir(parents=True, exist_ok=True)
+        
+        self.storage = FileStorage(log_trace_path)
+        
+        self.outter_conn, self.inner_conn = Pipe()
+        
+        self.main_pid = os.getpid()
+
+    @property
+    def stream(self) -> Connection:
+        return self.outter_conn
+
+
+    @contextmanager
+    def tag(self, tag: str):
+        if tag.strip() == "":
+            raise ValueError("Tag cannot be empty.")
+        if self._tag != "":
+            tag = "." + tag
+
+        self._tag = self._tag + tag
+        yield
+        self._tag = self._tag[:-len(tag)]
+
+
+    def get_pids(self) -> str:
+        '''
+        Returns a string of pids from the current process to the main process.
+        Split by '-'.
+        '''
+        pid = os.getpid()
+        process = Process(pid)
+        pid_chain = f"{pid}"
+        while process.pid != self.main_pid:
+            parent_pid = process.ppid()
+            parent_process = Process(parent_pid)
+            pid_chain = f"{parent_pid}-{pid_chain}"
+            process = parent_process
+        return pid_chain
+
+
+    def file_format(self, record, raw: bool=False):
+        record["message"] = remove_ansi_codes(record["message"])
+        if raw:
+            return "{message}"
+        return "{time:YYYY-MM-DD HH:mm:ss.SSS} | {level: <8} | {name}:{function}:{line} - {message}\n"
+
+
+    def log_object(self, obj: object, *, tag: str = "") -> None:
+        caller_info = get_caller_info()
+        tag = f"{self._tag}.{tag}".strip('.') + f".{self.get_pids()}"
+
+        logp = self.storage.log(obj, name=tag, save_type="pkl")
+
+        file_handler_id = logger.add(self.log_trace_path / tag.replace('.','/') / "common_logs.log", format=self.file_format)
+        logger.patch(lambda r: r.update(caller_info)).info(f"Logging object in {logp.absolute()}")
+        logger.remove(file_handler_id)
+
+
+    def info(self, msg: str, *, tag: str = "", raw: bool=False) -> None:
+        caller_info = get_caller_info()
+        if raw:
+            logger.remove()
+            logger.add(sys.stderr, format=lambda r: "{message}")
+
+        tag = f"{self._tag}.{tag}".strip('.') + f".{self.get_pids()}"
+        log_file_path = self.log_trace_path / tag.replace('.','/') /"common_logs.log"
+        if raw:
+            file_handler_id = logger.add(log_file_path, format=partial(self.file_format, raw=True))
+        else:
+            file_handler_id = logger.add(log_file_path, format=self.file_format)
+
+        logger.patch(lambda r: r.update(caller_info)).info(msg)
+        logger.remove(file_handler_id)
+
+        if raw:
+            logger.remove()
+            logger.add(sys.stderr)
+
+    def warning(self, msg: str, *, tag: str = "") -> None:
+        caller_info = get_caller_info()
+
+        tag = f"{self._tag}.{tag}".strip('.') + f".{self.get_pids()}"
+        file_handler_id = logger.add(self.log_trace_path / tag.replace('.','/') / "common_logs.log", format=self.file_format)
+        logger.patch(lambda r: r.update(caller_info)).warning(msg)
+        logger.remove(file_handler_id)
+
+    def error(self, msg: str, *, tag: str = "") -> None:
+        caller_info = get_caller_info()
+        
+        tag = f"{self._tag}.{tag}".strip('.') + f".{self.get_pids()}"
+        file_handler_id = logger.add(self.log_trace_path / tag.replace('.','/') / "common_logs.log", format=self.file_format)
+        logger.patch(lambda r: r.update(caller_info)).error(msg)
+        logger.remove(file_handler_id)
diff --git a/rdagent/log/storage.py b/rdagent/log/storage.py
new file mode 100644
index 00000000..262f640e
--- /dev/null
+++ b/rdagent/log/storage.py
@@ -0,0 +1,53 @@
+import json
+import pickle
+from typing import Literal
+
+from pathlib import Path
+from datetime import datetime, timezone
+from .base import Storage
+
+class FileStorage(Storage):
+    """
+    The info are logginged to the file systems
+
+    TODO: describe the storage format
+    """
+
+    def __init__(self, path: str = "./log/") -> None:
+        self.path = Path(path)
+        self.path.mkdir(parents=True, exist_ok=True)
+
+    def log(self,
+            obj: object,
+            name: str = "",
+            save_type: Literal["json", "text", "pkl"] = "text",
+            timestamp: datetime | None = None,
+            ) -> Path:
+        if timestamp is None:
+            timestamp = datetime.now(timezone.utc)
+        else:
+            timestamp = timestamp.astimezone(timezone.utc)
+        
+        cur_p = self.path / name.replace(".", "/")
+        cur_p.mkdir(parents=True, exist_ok=True)
+
+        path = cur_p / f"{timestamp.strftime('%Y-%m-%d_%H-%M-%S-%f')}.log"
+
+        if save_type == "json":
+            path = path.with_suffix(".json")
+            with path.open("w") as f:
+                try:
+                    json.dump(obj, f)
+                except TypeError:
+                    json.dump(json.loads(str(obj)), f)
+            return path
+        elif save_type == "pkl":
+            path = path.with_suffix(".pkl")
+            with path.open("wb") as f:
+                pickle.dump(obj, f)
+            return path
+        elif save_type == "text":
+            obj = str(obj)
+            with path.open("w") as f:
+                f.write(obj)
+            return path
\ No newline at end of file
diff --git a/rdagent/log/ui/__init__.py b/rdagent/log/ui/__init__.py
index 69f018ad..6040d81b 100644
--- a/rdagent/log/ui/__init__.py
+++ b/rdagent/log/ui/__init__.py
@@ -5,43 +5,3 @@
 - in general folder like rdagent/log/ui
 - It is for specific scenario rdagent/scenarios/
 """
-
-
-from rdagent.core.log import View
-
-
-class WebView(View):
-    r"""
-
-    We have tree structure for sequence
-
-    session
-    |      \
-    ... defined by user ...
-    |                              \
-    info1 -> info2 -> ... -> info3 -> ...  overtime.
-
-    Some design principles:
-        session1.module(e.g. implement).
-        `s.log(a.b.1.c) s.log(a.b.2.c)` should not handed over to users.
-
-    An display example:
-
-        W1 write factor
-        W2 evaluate factor
-        W3 backtest
-
-        W123
-            R
-            RX
-            RXX
-            RX
-
-        W4
-            trace r1 r2 r3 r4
-
-    What to do next?
-    1. Data structure
-    2. Map path like `a.b.c` to frontend components
-    3. Display logic
-    """
diff --git a/rdagent/log/ui/web.py b/rdagent/log/ui/web.py
new file mode 100644
index 00000000..2cc3ad1c
--- /dev/null
+++ b/rdagent/log/ui/web.py
@@ -0,0 +1,59 @@
+from rdagent.log.base import View, Storage
+from pathlib import Path
+
+class ProcessView(View):
+    def __init__(self, trace_path: Path):
+
+        
+        # Save logs to your desired data structure
+        # ...
+        pass
+
+
+    def display(s: Storage, watch: bool = False):
+        pass
+
+
+
+class WebView(View):
+    r"""
+
+    We have tree structure for sequence
+
+    session
+    |      \
+    ... defined by user ...
+    |                              \
+    info1 -> info2 -> ... -> info3 -> ...  overtime.
+
+    Some design principles:
+        session1.module(e.g. implement).
+        `s.log(a.b.1.c) s.log(a.b.2.c)` should not handed over to users.
+
+    An display example:
+
+        W1 write factor
+        W2 evaluate factor
+        W3 backtest
+
+        W123
+            R
+            RX
+            RXX
+            RX
+
+        W4
+            trace r1 r2 r3 r4
+
+    What to do next?
+    1. Data structure
+    2. Map path like `a.b.c` to frontend components
+    3. Display logic
+    """
+    def __init__(self, trace_path: Path):
+        pass
+        # Save logs to your desired data structure
+        # ...
+
+    def display(s: Storage, watch: bool = False):
+        pass
\ No newline at end of file
diff --git a/rdagent/log/utils.py b/rdagent/log/utils.py
new file mode 100644
index 00000000..1631b2c7
--- /dev/null
+++ b/rdagent/log/utils.py
@@ -0,0 +1,67 @@
+import inspect
+import re
+
+
+class LogColors:
+    """
+    ANSI color codes for use in console output.
+    """
+
+    RED = "\033[91m"
+    GREEN = "\033[92m"
+    YELLOW = "\033[93m"
+    BLUE = "\033[94m"
+    MAGENTA = "\033[95m"
+    CYAN = "\033[96m"
+    WHITE = "\033[97m"
+    GRAY = "\033[90m"
+    BLACK = "\033[30m"
+
+    BOLD = "\033[1m"
+    ITALIC = "\033[3m"
+
+    END = "\033[0m"
+
+    @classmethod
+    def get_all_colors(cls: type['LogColors']) -> list:
+        names = dir(cls)
+        names = [name for name in names if not name.startswith("__") and not callable(getattr(cls, name))]
+        return [getattr(cls, name) for name in names]
+
+    def render(self, text: str, color: str = "", style: str = "") -> str:
+        """
+        render text by input color and style.
+        It's not recommend that input text is already rendered.
+        """
+        # This method is called too frequently, which is not good.
+        colors = self.get_all_colors()
+        # Perhaps color and font should be distinguished here.
+        if color and color in colors:
+            error_message = f"color should be in: {colors} but now is: {color}"
+            raise ValueError(error_message)
+        if style and style in colors:
+            error_message = f"style should be in: {colors} but now is: {style}"
+            raise ValueError(error_message)
+
+        text = f"{color}{text}{self.END}"
+
+        return f"{style}{text}{self.END}"
+
+
+def remove_ansi_codes(s: str) -> str:
+    ansi_escape = re.compile(r'\x1B\[[0-?]*[ -/]*[@-~]')
+    return ansi_escape.sub('', s)
+
+
+def get_caller_info():
+    # Get the current stack information
+    stack = inspect.stack()
+    # The second element is usually the caller's information
+    caller_info = stack[2]
+    frame = caller_info[0]
+    info = {
+        'line': caller_info.lineno,
+        'name': frame.f_globals['__name__'],  # Get the module name from the frame's globals
+        'function': frame.f_code.co_name,  # Get the caller's function name
+    }
+    return info
\ No newline at end of file
diff --git a/rdagent/oai/llm_utils.py b/rdagent/oai/llm_utils.py
index 2c5abc56..6c3e0431 100644
--- a/rdagent/oai/llm_utils.py
+++ b/rdagent/oai/llm_utils.py
@@ -19,13 +19,11 @@
 import tiktoken
 
 from rdagent.core.conf import RD_AGENT_SETTINGS
-from rdagent.core.log import LogColors, RDAgentLog
+from rdagent.log import LogColors, rdagent_logger as logger
 from rdagent.core.utils import SingletonBaseClass
 
 DEFAULT_QLIB_DOT_PATH = Path("./")
 
-logger: RDAgentLog = RDAgentLog()
-
 def md5_hash(input_string: str) -> str:
     hash_md5 = hashlib.md5(usedforsecurity=False)
     input_bytes = input_string.encode("utf-8")
@@ -36,17 +34,17 @@ def md5_hash(input_string: str) -> str:
 try:
     from azure.identity import DefaultAzureCredential, get_bearer_token_provider
 except ImportError:
-    logger.warning("azure.identity is not installed.")
+    logger.warning("azure.identity is not installed.", tag="gpt")
 
 try:
     import openai
 except ImportError:
-    logger.warning("openai is not installed.")
+    logger.warning("openai is not installed.", tag="gpt")
 
 try:
     from llama import Llama
 except ImportError:
-    logger.warning("llama is not installed.")
+    logger.warning("llama is not installed.", tag="gpt")
 
 
 class ConvManager:
@@ -204,12 +202,14 @@ def build_chat_completion(self, user_prompt: str, **kwargs: Any) -> str:
         user prompt should always be provided
         """
         messages = self.build_chat_completion_message(user_prompt)
+        
+        with logger.tag(self.conversation_id):
+            response = self.api_backend._try_create_chat_completion_or_embedding(  # noqa: SLF001
+                messages=messages,
+                chat_completion=True,
+                **kwargs,
+            )
 
-        response = self.api_backend._try_create_chat_completion_or_embedding(  # noqa: SLF001
-            messages=messages,
-            chat_completion=True,
-            **kwargs,
-        )
         messages.append(
             {
                 "role": "assistant",
@@ -651,19 +651,24 @@ def _create_chat_completion_inner_function(  # noqa: C901, PLR0912, PLR0915
                 )
             if self.chat_stream:
                 resp = ""
-                with logger.log_stream:
+                if self.cfg.log_llm_chat_content:
                     logger.info(f"{LogColors.CYAN}Response:{LogColors.END}")
-                    for chunk in response:
-                        content = (
-                            chunk.choices[0].delta.content
-                            if len(chunk.choices) > 0 and chunk.choices[0].delta.content is not None
-                            else ""
-                        )
-                        if self.cfg.log_llm_chat_content:
-                            logger.info(LogColors.CYAN + content + LogColors.END)
-                        resp += content
-                        if len(chunk.choices) > 0 and chunk.choices[0].finish_reason is not None:
-                            finish_reason = chunk.choices[0].finish_reason
+                
+                for chunk in response:
+                    content = (
+                        chunk.choices[0].delta.content
+                        if len(chunk.choices) > 0 and chunk.choices[0].delta.content is not None
+                        else ""
+                    )
+                    if self.cfg.log_llm_chat_content:
+                        logger.info(LogColors.CYAN + content + LogColors.END, raw=True)
+                    resp += content
+                    if len(chunk.choices) > 0 and chunk.choices[0].finish_reason is not None:
+                        finish_reason = chunk.choices[0].finish_reason
+                
+                if self.cfg.log_llm_chat_content:
+                    logger.info("\n", raw=True)
+                
             else:
                 resp = response.choices[0].message.content
                 finish_reason = response.choices[0].finish_reason
diff --git a/rdagent/scenarios/qlib/factor_experiment_loader/pdf_loader.py b/rdagent/scenarios/qlib/factor_experiment_loader/pdf_loader.py
index 1161d010..881fb513 100644
--- a/rdagent/scenarios/qlib/factor_experiment_loader/pdf_loader.py
+++ b/rdagent/scenarios/qlib/factor_experiment_loader/pdf_loader.py
@@ -18,7 +18,7 @@
 )
 from rdagent.components.loader.experiment_loader import FactorExperimentLoader
 from rdagent.core.conf import RD_AGENT_SETTINGS
-from rdagent.core.log import RDAgentLog
+from rdagent.log import rdagent_logger as logger
 from rdagent.core.prompts import Prompts
 from rdagent.oai.llm_utils import APIBackend, create_embedding_with_multiprocessing
 from rdagent.scenarios.qlib.factor_experiment_loader.json_loader import (
@@ -27,7 +27,6 @@
 
 document_process_prompts = Prompts(file_path=Path(__file__).parent / "prompts.yaml")
 
-
 def classify_report_from_dict(
     report_dict: Mapping[str, str],
     vote_time: int = 1,
@@ -69,7 +68,7 @@ def classify_report_from_dict(
         if isinstance(value, str):
             content = value
         else:
-            RDAgentLog().warning(f"输入格式不符合要求: {file_name}")
+            logger.warning(f"输入格式不符合要求: {file_name}")
             res_dict[file_name] = {"class": 0}
             continue
 
@@ -101,7 +100,7 @@ def classify_report_from_dict(
                 res = json.loads(res)
                 vote_list.append(int(res["class"]))
             except json.JSONDecodeError:
-                RDAgentLog().warning(f"返回值无法解析: {file_name}")
+                logger.warning(f"返回值无法解析: {file_name}")
                 res_dict[file_name] = {"class": 0}
             count_0 = vote_list.count(0)
             count_1 = vote_list.count(1)
@@ -246,7 +245,7 @@ def extract_factors_from_report_dict(
             if int(value.get("class")) == 1:
                 useful_report_dict[key] = report_dict[key]
         else:
-            RDAgentLog().warning(f"Invalid input format: {key}")
+            logger.warning(f"Invalid input format: {key}")
 
     final_report_factor_dict = {}
     # for file_name, content in useful_report_dict.items():
@@ -276,7 +275,7 @@ def extract_factors_from_report_dict(
                 file_name = file_names[index]
                 final_report_factor_dict.setdefault(file_name, {})
                 final_report_factor_dict[file_name] = result.get()
-        RDAgentLog().info(f"已经完成{len(final_report_factor_dict)}个报告的因子提取")
+        logger.info(f"已经完成{len(final_report_factor_dict)}个报告的因子提取")
 
     return final_report_factor_dict
 
@@ -499,7 +498,7 @@ def __deduplicate_factor_dict(factor_dict: dict[str, dict[str, str]]) -> list[li
             kmeans_index_group = __kmeans_embeddings(embeddings=embeddings, k=k)
             if len(kmeans_index_group[0]) < RD_AGENT_SETTINGS.max_input_duplicate_factor_group:
                 target_k = k
-                RDAgentLog().info(f"K-means group number: {k}")
+                logger.info(f"K-means group number: {k}")
                 break
     factor_name_groups = [[factor_names[index] for index in index_group] for index_group in kmeans_index_group]
 
@@ -589,4 +588,5 @@ def load(self, file_or_folder_path: Path) -> dict:
 
         factor_viability = check_factor_viability(factor_dict)
         factor_dict, duplication_names_list = deduplicate_factors_by_llm(factor_dict, factor_viability)
+        logger.log_object(factor_dict, tag="factor_dict")
         return FactorImplementationExperimentLoaderFromDict().load(factor_dict)

From bdffbdb308bd2b8f63955c064b8eb8beee2172fc Mon Sep 17 00:00:00 2001
From: you-n-g <you-n-g@users.noreply.github.com>
Date: Tue, 16 Jul 2024 13:50:11 +0800
Subject: [PATCH 06/14] Update rdagent/core/conf.py

---
 rdagent/core/conf.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/rdagent/core/conf.py b/rdagent/core/conf.py
index a926d54c..1873e117 100644
--- a/rdagent/core/conf.py
+++ b/rdagent/core/conf.py
@@ -17,6 +17,7 @@
 class RDAgentSettings(BaseSettings):
     # TODO: (xiao) I think most of the config should be in oai.config
     # Log configs
+    # TODO: (xiao) think it can be a seperate config.
     log_trace_path: str | None = None
     log_llm_chat_content: bool = True
 

From 3bb181defe7537c930059be69ffe4f41c6d818ab Mon Sep 17 00:00:00 2001
From: bowen xian <xianbowen@outlook.com>
Date: Tue, 16 Jul 2024 05:58:12 +0000
Subject: [PATCH 07/14] fix factor task app & log

---
 .../factor_extract_and_implement.py               | 15 +++++++--------
 rdagent/oai/llm_utils.py                          |  7 -------
 2 files changed, 7 insertions(+), 15 deletions(-)

diff --git a/rdagent/app/factor_extraction_and_implementation/factor_extract_and_implement.py b/rdagent/app/factor_extraction_and_implementation/factor_extract_and_implement.py
index 4004409d..de5048d9 100644
--- a/rdagent/app/factor_extraction_and_implementation/factor_extract_and_implement.py
+++ b/rdagent/app/factor_extraction_and_implementation/factor_extract_and_implement.py
@@ -1,20 +1,19 @@
 # %%
 from dotenv import load_dotenv
 
-from rdagent.scenarios.qlib.factor_experiment_loader.pdf_loader import (
-    FactorExperimentLoaderFromPDFfiles,
-)
-from rdagent.scenarios.qlib.factor_task_implementation import (
-    COSTEERFG_QUANT_FACTOR_IMPLEMENTATION,
-)
+from rdagent.scenarios.qlib.factor_experiment_loader.pdf_loader import FactorExperimentLoaderFromPDFfiles
+from rdagent.scenarios.qlib.factor_task_implementation import QlibFactorCoSTEER
 from rdagent.log import rdagent_logger as logger
 
 assert load_dotenv()
 
 
 def extract_factors_and_implement(report_file_path: str) -> None:
-    factor_tasks = FactorExperimentLoaderFromPDFfiles().load(report_file_path)
-    implementation_result = COSTEERFG_QUANT_FACTOR_IMPLEMENTATION().generate(factor_tasks)
+    with logger.tag('extract_factors_and_implement'):
+        with logger.tag('load_factor_tasks'):
+            factor_tasks = FactorExperimentLoaderFromPDFfiles().load(report_file_path)
+        with logger.tag('implement_factors'):
+            implementation_result = QlibFactorCoSTEER().generate(factor_tasks)
     # Qlib to run the implementation
     return implementation_result
 
diff --git a/rdagent/oai/llm_utils.py b/rdagent/oai/llm_utils.py
index 6c3e0431..9f742603 100644
--- a/rdagent/oai/llm_utils.py
+++ b/rdagent/oai/llm_utils.py
@@ -538,13 +538,6 @@ def _build_log_messages(self, messages: list[dict]) -> str:
             )
         return log_messages
 
-    def log_response(self, response: str | None = None, *, stream: bool = False) -> None:
-        if self.cfg.log_llm_chat_content:
-            if stream:
-                logger.info(f"\n{LogColors.CYAN}Response:{LogColors.END}")
-            else:
-                logger.info(f"\n{LogColors.CYAN}Response:{response}{LogColors.END}")
-
     def _create_chat_completion_inner_function(  # noqa: C901, PLR0912, PLR0915
         self,
         messages: list[dict],

From f9f89ee0f04d1861ec8c13bf7283462e12977af7 Mon Sep 17 00:00:00 2001
From: bowen xian <xianbowen@outlook.com>
Date: Tue, 16 Jul 2024 06:24:28 +0000
Subject: [PATCH 08/14] fix log import

---
 .../coder/factor_coder/CoSTEER/evaluators.py           |  6 +++---
 .../components/coder/model_coder/CoSTEER/evaluators.py |  4 ++--
 .../coder/model_coder/CoSTEER/evolvable_subjects.py    |  4 ++--
 rdagent/scenarios/qlib/task_generator/data.py          |  3 +--
 rdagent/scenarios/qlib/task_generator/feedback.py      |  3 +--
 rdagent/scenarios/qlib/task_generator/model.py         |  4 ++--
 rdagent/utils/env.py                                   | 10 +++++-----
 7 files changed, 16 insertions(+), 18 deletions(-)

diff --git a/rdagent/components/coder/factor_coder/CoSTEER/evaluators.py b/rdagent/components/coder/factor_coder/CoSTEER/evaluators.py
index 9b8a0fe3..5a2869f6 100644
--- a/rdagent/components/coder/factor_coder/CoSTEER/evaluators.py
+++ b/rdagent/components/coder/factor_coder/CoSTEER/evaluators.py
@@ -17,7 +17,7 @@
 from rdagent.core.evaluation import Evaluator
 from rdagent.core.evolving_framework import Feedback, QueriedKnowledge
 from rdagent.core.experiment import Implementation, Task
-from rdagent.core.log import RDAgentLog
+from rdagent.log import rdagent_logger as logger
 from rdagent.core.prompts import Prompts
 from rdagent.core.utils import multiprocessing_wrapper
 from rdagent.oai.llm_utils import APIBackend
@@ -585,7 +585,7 @@ def evaluate(
                     value_feedback=factor_feedback.factor_value_feedback,
                     code_feedback=factor_feedback.code_feedback,
                 )
-            RDAgentLog().info(factor_feedback.final_decision)
+            logger.info(factor_feedback.final_decision)
             return factor_feedback
 
 
@@ -640,7 +640,7 @@ def evaluate(
             None if single_feedback is None else single_feedback.final_decision
             for single_feedback in multi_implementation_feedback
         ]
-        RDAgentLog().info(f"Final decisions: {final_decision} True count: {final_decision.count(True)}")
+        logger.info(f"Final decisions: {final_decision} True count: {final_decision.count(True)}")
 
         return multi_implementation_feedback
 
diff --git a/rdagent/components/coder/model_coder/CoSTEER/evaluators.py b/rdagent/components/coder/model_coder/CoSTEER/evaluators.py
index c3e5f468..9ae642d4 100644
--- a/rdagent/components/coder/model_coder/CoSTEER/evaluators.py
+++ b/rdagent/components/coder/model_coder/CoSTEER/evaluators.py
@@ -16,7 +16,7 @@
 from rdagent.core.evaluation import Evaluator
 from rdagent.core.evolving_framework import QueriedKnowledge
 from rdagent.core.experiment import Implementation, Task
-from rdagent.core.log import RDAgentLog
+from rdagent.log import rdagent_logger as logger
 from rdagent.core.prompts import Prompts
 from rdagent.core.utils import multiprocessing_wrapper
 from rdagent.oai.llm_utils import APIBackend
@@ -328,6 +328,6 @@ def evaluate(
             None if single_feedback is None else single_feedback.final_decision
             for single_feedback in multi_implementation_feedback
         ]
-        RDAgentLog().info(f"Final decisions: {final_decision} True count: {final_decision.count(True)}")
+        logger.info(f"Final decisions: {final_decision} True count: {final_decision.count(True)}")
 
         return multi_implementation_feedback
diff --git a/rdagent/components/coder/model_coder/CoSTEER/evolvable_subjects.py b/rdagent/components/coder/model_coder/CoSTEER/evolvable_subjects.py
index 70e485b3..8ed10dd5 100644
--- a/rdagent/components/coder/model_coder/CoSTEER/evolvable_subjects.py
+++ b/rdagent/components/coder/model_coder/CoSTEER/evolvable_subjects.py
@@ -4,7 +4,7 @@
     ModelTask,
 )
 from rdagent.core.evolving_framework import EvolvableSubjects
-from rdagent.core.log import RDAgentLog
+from rdagent.log import rdagent_logger as logger
 
 
 class ModelEvolvingItem(ModelExperiment, EvolvableSubjects):
@@ -22,7 +22,7 @@ def __init__(
             sub_gt_implementations,
         ) != len(self.sub_tasks):
             self.sub_gt_implementations = None
-            RDAgentLog().warning(
+            logger.warning(
                 "The length of sub_gt_implementations is not equal to the length of sub_tasks, set sub_gt_implementations to None",
             )
         else:
diff --git a/rdagent/scenarios/qlib/task_generator/data.py b/rdagent/scenarios/qlib/task_generator/data.py
index 2203b43d..4995bcf0 100644
--- a/rdagent/scenarios/qlib/task_generator/data.py
+++ b/rdagent/scenarios/qlib/task_generator/data.py
@@ -7,7 +7,7 @@
 
 from rdagent.components.runner import CachedRunner
 from rdagent.components.runner.conf import RUNNER_SETTINGS
-from rdagent.core.log import RDAgentLog
+from rdagent.log import rdagent_logger as logger
 from rdagent.core.task_generator import TaskGenerator
 from rdagent.oai.llm_utils import md5_hash
 from rdagent.scenarios.qlib.experiment.factor_experiment import QlibFactorExperiment
@@ -15,7 +15,6 @@
 
 DIRNAME = Path(__file__).absolute().resolve().parent
 DIRNAME_local = Path.cwd()
-logger = RDAgentLog()
 
 # class QlibFactorExpWorkspace:
 
diff --git a/rdagent/scenarios/qlib/task_generator/feedback.py b/rdagent/scenarios/qlib/task_generator/feedback.py
index 8ce1e08e..339ab7f0 100644
--- a/rdagent/scenarios/qlib/task_generator/feedback.py
+++ b/rdagent/scenarios/qlib/task_generator/feedback.py
@@ -7,7 +7,7 @@
 from jinja2 import Environment, StrictUndefined
 
 from rdagent.core.experiment import Experiment
-from rdagent.core.log import RDAgentLog
+from rdagent.log import rdagent_logger as logger
 from rdagent.core.prompts import Prompts
 from rdagent.core.proposal import (
     Hypothesis,
@@ -19,7 +19,6 @@
 
 feedback_prompts = Prompts(file_path=Path(__file__).parent.parent / "prompts.yaml")
 DIRNAME = Path(__file__).absolute().resolve().parent
-logger = RDAgentLog()
 
 
 class QlibFactorHypothesisExperiment2Feedback(HypothesisExperiment2Feedback):
diff --git a/rdagent/scenarios/qlib/task_generator/model.py b/rdagent/scenarios/qlib/task_generator/model.py
index 47586924..88c7ac33 100644
--- a/rdagent/scenarios/qlib/task_generator/model.py
+++ b/rdagent/scenarios/qlib/task_generator/model.py
@@ -10,7 +10,7 @@
 )
 from rdagent.components.runner import CachedRunner
 from rdagent.components.runner.conf import RUNNER_SETTINGS
-from rdagent.core.log import RDAgentLog
+from rdagent.log import rdagent_logger as logger
 from rdagent.core.task_generator import TaskGenerator
 from rdagent.utils.env import QTDockerEnv
 
@@ -72,7 +72,7 @@ def generate(self, exp: ModelExperiment) -> ModelExperiment:
         csv_path = exp.exp_ws.workspace_path / "qlib_res.csv"
 
         if not csv_path.exists():
-            RDAgentLog().error(f"File {csv_path} does not exist.")
+            logger.error(f"File {csv_path} does not exist.")
             return None
 
         result = pd.read_csv(csv_path, index_col=0).iloc[:,0]
diff --git a/rdagent/utils/env.py b/rdagent/utils/env.py
index 8b1503b9..d90becf1 100644
--- a/rdagent/utils/env.py
+++ b/rdagent/utils/env.py
@@ -19,7 +19,7 @@
 from pydantic import BaseModel
 from pydantic_settings import BaseSettings
 
-from rdagent.core.log import RDAgentLog
+from rdagent.log import rdagent_logger as logger
 
 ASpecificBaseModel = TypeVar("ASpecificBaseModel", bound=BaseModel)
 
@@ -146,11 +146,11 @@ def prepare(self):
         """
         client = docker.from_env()
         if self.conf.build_from_dockerfile and self.conf.dockerfile_folder_path.exists():
-            RDAgentLog().info(f"Building the image from dockerfile: {self.conf.dockerfile_folder_path}")
+            logger.info(f"Building the image from dockerfile: {self.conf.dockerfile_folder_path}")
             image, logs = client.images.build(
                 path=str(self.conf.dockerfile_folder_path), tag=self.conf.image, network_mode=self.conf.network
             )
-            RDAgentLog().info(f"Finished building the image from dockerfile: {self.conf.dockerfile_folder_path}")
+            logger.info(f"Finished building the image from dockerfile: {self.conf.dockerfile_folder_path}")
         try:
             client.images.get(self.conf.image)
         except docker.errors.ImageNotFound:
@@ -216,8 +216,8 @@ def prepare(self):
         super().prepare()
         qlib_data_path = next(iter(self.conf.extra_volumes.keys()))
         if not (Path(qlib_data_path) / "qlib_data" / "cn_data").exists():
-            RDAgentLog().info("We are downloading!")
+            logger.info("We are downloading!")
             cmd = "python -m qlib.run.get_data qlib_data --target_dir ~/.qlib/qlib_data/cn_data --region cn --interval 1d --delete_old False"
             self.run(entry=cmd)
         else:
-            RDAgentLog().info("Data already exists. Download skipped.")
+            logger.info("Data already exists. Download skipped.")

From 9c762e55e21bd30b38acf21653a4d4fc777522e5 Mon Sep 17 00:00:00 2001
From: Young <afe.young@gmail.com>
Date: Tue, 16 Jul 2024 09:02:25 +0000
Subject: [PATCH 09/14] Streamlet framework

---
 rdagent/log/ui/web.py | 35 ++++++++++++++++++++++++++++++++++-
 1 file changed, 34 insertions(+), 1 deletion(-)

diff --git a/rdagent/log/ui/web.py b/rdagent/log/ui/web.py
index 2cc3ad1c..ad5aada4 100644
--- a/rdagent/log/ui/web.py
+++ b/rdagent/log/ui/web.py
@@ -26,6 +26,11 @@ class WebView(View):
     |                              \
     info1 -> info2 -> ... -> info3 -> ...  overtime.
 
+    <message dispature>
+          |  | -  dispatch according to uri(e.g. `a.b.c. ...`)
+    Frontend is composed of windows.
+    Each window can individually display the message flow.
+
     Some design principles:
         session1.module(e.g. implement).
         `s.log(a.b.1.c) s.log(a.b.2.c)` should not handed over to users.
@@ -56,4 +61,32 @@ def __init__(self, trace_path: Path):
         # ...
 
     def display(s: Storage, watch: bool = False):
-        pass
\ No newline at end of file
+        ui = STLUI()
+        for msg in s.iter_msg():  # iterate overtime
+            # NOTE:  iter_msg will correctly seperate the information.
+            # TODO: msg may support streaming mode.
+            ui.dispatch(msg)
+        pass
+
+
+# TODO: Implement the following classes
+class STLWindow:
+    ...
+
+    def consume_msg(self, msg):
+        ... # update it's view
+
+
+class STLUI:
+    wd_l: list[STLWindow]
+
+    def __init__(self):
+        self.build_ui()
+
+    def build_ui(self):
+        # control the dispaly of windows
+        ...
+
+    def dispatch(self, msg):
+        # map the message to a specific window
+        ...

From 82583fa30a10f112a334b3731d24a503b6085185 Mon Sep 17 00:00:00 2001
From: bowen xian <xianbowen@outlook.com>
Date: Tue, 16 Jul 2024 09:44:01 +0000
Subject: [PATCH 10/14] fix log tag to path logic

---
 rdagent/log/logger.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/rdagent/log/logger.py b/rdagent/log/logger.py
index 566acf4f..98ff27a5 100644
--- a/rdagent/log/logger.py
+++ b/rdagent/log/logger.py
@@ -76,7 +76,7 @@ def file_format(self, record, raw: bool=False):
 
     def log_object(self, obj: object, *, tag: str = "") -> None:
         caller_info = get_caller_info()
-        tag = f"{self._tag}.{tag}".strip('.') + f".{self.get_pids()}"
+        tag = f"{self._tag}.{tag}.{self.get_pids()}".strip('.')
 
         logp = self.storage.log(obj, name=tag, save_type="pkl")
 
@@ -91,8 +91,8 @@ def info(self, msg: str, *, tag: str = "", raw: bool=False) -> None:
             logger.remove()
             logger.add(sys.stderr, format=lambda r: "{message}")
 
-        tag = f"{self._tag}.{tag}".strip('.') + f".{self.get_pids()}"
-        log_file_path = self.log_trace_path / tag.replace('.','/') /"common_logs.log"
+        tag = f"{self._tag}.{tag}.{self.get_pids()}".strip('.')
+        log_file_path = self.log_trace_path / tag.replace('.','/') / "common_logs.log"
         if raw:
             file_handler_id = logger.add(log_file_path, format=partial(self.file_format, raw=True))
         else:
@@ -108,7 +108,7 @@ def info(self, msg: str, *, tag: str = "", raw: bool=False) -> None:
     def warning(self, msg: str, *, tag: str = "") -> None:
         caller_info = get_caller_info()
 
-        tag = f"{self._tag}.{tag}".strip('.') + f".{self.get_pids()}"
+        tag = f"{self._tag}.{tag}.{self.get_pids()}".strip('.')
         file_handler_id = logger.add(self.log_trace_path / tag.replace('.','/') / "common_logs.log", format=self.file_format)
         logger.patch(lambda r: r.update(caller_info)).warning(msg)
         logger.remove(file_handler_id)
@@ -116,7 +116,7 @@ def warning(self, msg: str, *, tag: str = "") -> None:
     def error(self, msg: str, *, tag: str = "") -> None:
         caller_info = get_caller_info()
         
-        tag = f"{self._tag}.{tag}".strip('.') + f".{self.get_pids()}"
+        tag = f"{self._tag}.{tag}.{self.get_pids()}".strip('.')
         file_handler_id = logger.add(self.log_trace_path / tag.replace('.','/') / "common_logs.log", format=self.file_format)
         logger.patch(lambda r: r.update(caller_info)).error(msg)
         logger.remove(file_handler_id)

From 4105ef3d97504e0dc1835b90b7714d3528ed44d7 Mon Sep 17 00:00:00 2001
From: Young <afe.young@gmail.com>
Date: Tue, 16 Jul 2024 10:30:56 +0000
Subject: [PATCH 11/14] Add todos

---
 rdagent/core/conf.py     |  1 +
 rdagent/core/utils.py    |  5 +++--
 rdagent/log/logger.py    | 26 ++++++++++++++------------
 rdagent/log/storage.py   |  3 ++-
 rdagent/log/utils.py     | 13 ++++++++-----
 rdagent/oai/llm_utils.py |  3 +++
 6 files changed, 31 insertions(+), 20 deletions(-)

diff --git a/rdagent/core/conf.py b/rdagent/core/conf.py
index 1873e117..f709d575 100644
--- a/rdagent/core/conf.py
+++ b/rdagent/core/conf.py
@@ -15,6 +15,7 @@
 
 
 class RDAgentSettings(BaseSettings):
+    # TODO: (xiao) I think LLMSetting may be a better name.
     # TODO: (xiao) I think most of the config should be in oai.config
     # Log configs
     # TODO: (xiao) think it can be a seperate config.
diff --git a/rdagent/core/utils.py b/rdagent/core/utils.py
index 9688e84a..efec9184 100644
--- a/rdagent/core/utils.py
+++ b/rdagent/core/utils.py
@@ -19,14 +19,15 @@ class RDAgentException(Exception):  # noqa: N818
 
 
 class SingletonMeta(type):
-    # _instance_dict: ClassVar[dict] = {}
     def __init__(cls, *args, **kwargs):
-        cls._instance_dict = {}  # 为每个类创建一个唯一的_instance_dict字典
+        cls._instance_dict: dict = {}
+        # This must be the class variable instead of sharing one in all classes to avoid confliction like `A()`, `B()`
         super().__init__(*args, **kwargs)
 
     def __call__(cls, *args: Any, **kwargs: Any) -> Any:
         # Since it's hard to align the difference call using args and kwargs, we strictly ask to use kwargs in Singleton
         if args:
+            # TODO: this restriction can be solved.
             exception_message = "Please only use kwargs in Singleton to avoid misunderstanding."
             raise RDAgentException(exception_message)
         kwargs_hash = hash(tuple(sorted(kwargs.items())))
diff --git a/rdagent/log/logger.py b/rdagent/log/logger.py
index 98ff27a5..5e7329ac 100644
--- a/rdagent/log/logger.py
+++ b/rdagent/log/logger.py
@@ -13,10 +13,17 @@
 from multiprocessing.connection import Connection
 
 from .storage import FileStorage
-from .utils import get_caller_info, remove_ansi_codes
+from .utils import LogColors, get_caller_info
 
 
 class RDAgentLog(SingletonBaseClass):
+    # TODO: Simplify it to introduce less concepts ( We may merge RDAgentLog, Storage &)
+    # Solution:  Storage => PipeLog, View => PipeLogView, RDAgentLog is an instance of PipeLogger
+    # PipeLogger.info(...) ,  PipeLogger.get_resp() to get feedback from frontend.
+    # def f():
+    #   logger = PipeLog()
+    #   logger.info("<code>")
+    #   feedback = logger.get_reps()
     _tag: str = ""
 
     def __init__(self, log_trace_path: str | None = RD_AGENT_SETTINGS.log_trace_path) -> None:
@@ -30,15 +37,8 @@ def __init__(self, log_trace_path: str | None = RD_AGENT_SETTINGS.log_trace_path
         
         self.storage = FileStorage(log_trace_path)
         
-        self.outter_conn, self.inner_conn = Pipe()
-        
         self.main_pid = os.getpid()
 
-    @property
-    def stream(self) -> Connection:
-        return self.outter_conn
-
-
     @contextmanager
     def tag(self, tag: str):
         if tag.strip() == "":
@@ -46,11 +46,11 @@ def tag(self, tag: str):
         if self._tag != "":
             tag = "." + tag
 
+        # TODO: It may result in error in mutithreading or co-routine
         self._tag = self._tag + tag
         yield
         self._tag = self._tag[:-len(tag)]
 
-
     def get_pids(self) -> str:
         '''
         Returns a string of pids from the current process to the main process.
@@ -66,14 +66,12 @@ def get_pids(self) -> str:
             process = parent_process
         return pid_chain
 
-
     def file_format(self, record, raw: bool=False):
-        record["message"] = remove_ansi_codes(record["message"])
+        record["message"] = LogColors.remove_ansi_codes(record["message"])
         if raw:
             return "{message}"
         return "{time:YYYY-MM-DD HH:mm:ss.SSS} | {level: <8} | {name}:{function}:{line} - {message}\n"
 
-
     def log_object(self, obj: object, *, tag: str = "") -> None:
         caller_info = get_caller_info()
         tag = f"{self._tag}.{tag}.{self.get_pids()}".strip('.')
@@ -86,6 +84,7 @@ def log_object(self, obj: object, *, tag: str = "") -> None:
 
 
     def info(self, msg: str, *, tag: str = "", raw: bool=False) -> None:
+        # TODO: too much duplicated. due to we have no logger with stream context;
         caller_info = get_caller_info()
         if raw:
             logger.remove()
@@ -106,6 +105,9 @@ def info(self, msg: str, *, tag: str = "", raw: bool=False) -> None:
             logger.add(sys.stderr)
 
     def warning(self, msg: str, *, tag: str = "") -> None:
+        # TODO: reuse code
+        # _log(self, msg: str, *, tag: str = "", level=Literal["warning", "error", ..]) -> None:
+        # getattr(logger.patch(lambda r: r.update(caller_info)), level)(msg)
         caller_info = get_caller_info()
 
         tag = f"{self._tag}.{tag}.{self.get_pids()}".strip('.')
diff --git a/rdagent/log/storage.py b/rdagent/log/storage.py
index 262f640e..50418a24 100644
--- a/rdagent/log/storage.py
+++ b/rdagent/log/storage.py
@@ -23,6 +23,7 @@ def log(self,
             save_type: Literal["json", "text", "pkl"] = "text",
             timestamp: datetime | None = None,
             ) -> Path:
+        # TODO: We can remove the timestamp after we implement PipeLog
         if timestamp is None:
             timestamp = datetime.now(timezone.utc)
         else:
@@ -50,4 +51,4 @@ def log(self,
             obj = str(obj)
             with path.open("w") as f:
                 f.write(obj)
-            return path
\ No newline at end of file
+            return path
diff --git a/rdagent/log/utils.py b/rdagent/log/utils.py
index 1631b2c7..1459f888 100644
--- a/rdagent/log/utils.py
+++ b/rdagent/log/utils.py
@@ -47,10 +47,13 @@ def render(self, text: str, color: str = "", style: str = "") -> str:
 
         return f"{style}{text}{self.END}"
 
-
-def remove_ansi_codes(s: str) -> str:
-    ansi_escape = re.compile(r'\x1B\[[0-?]*[ -/]*[@-~]')
-    return ansi_escape.sub('', s)
+    @staticmethod
+    def remove_ansi_codes(s: str) -> str:
+        """
+        It is for removing ansi ctrl characters in the string(e.g. colored text)
+        """
+        ansi_escape = re.compile(r'\x1B\[[0-?]*[ -/]*[@-~]')
+        return ansi_escape.sub('', s)
 
 
 def get_caller_info():
@@ -64,4 +67,4 @@ def get_caller_info():
         'name': frame.f_globals['__name__'],  # Get the module name from the frame's globals
         'function': frame.f_code.co_name,  # Get the caller's function name
     }
-    return info
\ No newline at end of file
+    return info
diff --git a/rdagent/oai/llm_utils.py b/rdagent/oai/llm_utils.py
index 9f742603..b44c05b8 100644
--- a/rdagent/oai/llm_utils.py
+++ b/rdagent/oai/llm_utils.py
@@ -87,6 +87,7 @@ def __init__(self, cache_location: str) -> None:
         super().__init__()
         self.cache_location = cache_location
         db_file_exist = Path(cache_location).exists()
+        # TODO: sqlite3 does not support multiprocessing.
         self.conn = sqlite3.connect(cache_location)
         self.c = self.conn.cursor()
         if not db_file_exist:
@@ -550,6 +551,7 @@ def _create_chat_completion_inner_function(  # noqa: C901, PLR0912, PLR0915
         json_mode: bool = False,
         add_json_in_prompt: bool = False,
     ) -> str:
+        # TODO: we can add this function back to avoid so much `self.cfg.log_llm_chat_content`
         if self.cfg.log_llm_chat_content:
             logger.info(self._build_log_messages(messages))
         # TODO: fail to use loguru adaptor due to stream response
@@ -644,6 +646,7 @@ def _create_chat_completion_inner_function(  # noqa: C901, PLR0912, PLR0915
                 )
             if self.chat_stream:
                 resp = ""
+                # TODO: with logger.config(stream=self.chat_stream): and add a `stream_start` flag to add timestamp for first message.
                 if self.cfg.log_llm_chat_content:
                     logger.info(f"{LogColors.CYAN}Response:{LogColors.END}")
                 

From 97a5598491688b1e3e00282060ddeea617bb9fe6 Mon Sep 17 00:00:00 2001
From: Young <afe.young@gmail.com>
Date: Tue, 16 Jul 2024 10:44:09 +0000
Subject: [PATCH 12/14] Add example in docstring

---
 rdagent/log/logger.py | 21 +++++++++++++++++++++
 1 file changed, 21 insertions(+)

diff --git a/rdagent/log/logger.py b/rdagent/log/logger.py
index 5e7329ac..2b9b3ead 100644
--- a/rdagent/log/logger.py
+++ b/rdagent/log/logger.py
@@ -17,6 +17,27 @@
 
 
 class RDAgentLog(SingletonBaseClass):
+    """
+    The files are organized based on the tag & PID
+    Here is an example tag
+
+    .. code-block::
+        
+        a
+        - b
+        - c
+            - 123
+              - common_logs.log
+            - 1322
+              - common_logs.log
+            - 1233
+              - <timestamp>.pkl
+            - d
+                - 1233-673 ...
+                - 1233-4563 ...
+                - 1233-365 ...
+
+    """
     # TODO: Simplify it to introduce less concepts ( We may merge RDAgentLog, Storage &)
     # Solution:  Storage => PipeLog, View => PipeLogView, RDAgentLog is an instance of PipeLogger
     # PipeLogger.info(...) ,  PipeLogger.get_resp() to get feedback from frontend.

From 3ed6b42ffad47b5c61cf4d640da7edf3ff41590a Mon Sep 17 00:00:00 2001
From: bowen xian <xianbowen@outlook.com>
Date: Tue, 16 Jul 2024 10:50:16 +0000
Subject: [PATCH 13/14] add log tag for llm_utils.py

---
 rdagent/oai/llm_utils.py | 22 +++++++++++-----------
 1 file changed, 11 insertions(+), 11 deletions(-)

diff --git a/rdagent/oai/llm_utils.py b/rdagent/oai/llm_utils.py
index 9f742603..01ed453a 100644
--- a/rdagent/oai/llm_utils.py
+++ b/rdagent/oai/llm_utils.py
@@ -34,17 +34,17 @@ def md5_hash(input_string: str) -> str:
 try:
     from azure.identity import DefaultAzureCredential, get_bearer_token_provider
 except ImportError:
-    logger.warning("azure.identity is not installed.", tag="gpt")
+    logger.warning("azure.identity is not installed.")
 
 try:
     import openai
 except ImportError:
-    logger.warning("openai is not installed.", tag="gpt")
+    logger.warning("openai is not installed.")
 
 try:
     from llama import Llama
 except ImportError:
-    logger.warning("llama is not installed.", tag="gpt")
+    logger.warning("llama is not installed.")
 
 
 class ConvManager:
@@ -203,7 +203,7 @@ def build_chat_completion(self, user_prompt: str, **kwargs: Any) -> str:
         """
         messages = self.build_chat_completion_message(user_prompt)
         
-        with logger.tag(self.conversation_id):
+        with logger.tag(f"session_{self.conversation_id}"):
             response = self.api_backend._try_create_chat_completion_or_embedding(  # noqa: SLF001
                 messages=messages,
                 chat_completion=True,
@@ -551,7 +551,7 @@ def _create_chat_completion_inner_function(  # noqa: C901, PLR0912, PLR0915
         add_json_in_prompt: bool = False,
     ) -> str:
         if self.cfg.log_llm_chat_content:
-            logger.info(self._build_log_messages(messages))
+            logger.info(self._build_log_messages(messages), tag="llm_messages")
         # TODO: fail to use loguru adaptor due to stream response
         input_content_json = json.dumps(messages)
         input_content_json = (
@@ -580,7 +580,7 @@ def _create_chat_completion_inner_function(  # noqa: C901, PLR0912, PLR0915
             )
             resp = response[0]["generation"]["content"]
             if self.cfg.log_llm_chat_content:
-                logger.info(f"{LogColors.CYAN}Response:{resp}{LogColors.END}")
+                logger.info(f"{LogColors.CYAN}Response:{resp}{LogColors.END}", tag="llm_messages")
         elif self.use_gcr_endpoint:
             body = str.encode(
                 json.dumps(
@@ -602,7 +602,7 @@ def _create_chat_completion_inner_function(  # noqa: C901, PLR0912, PLR0915
             response = urllib.request.urlopen(req)  # noqa: S310
             resp = json.loads(response.read().decode())["output"]
             if self.cfg.log_llm_chat_content:
-                logger.info(f"{LogColors.CYAN}Response:{resp}{LogColors.END}")
+                logger.info(f"{LogColors.CYAN}Response:{resp}{LogColors.END}", tag="llm_messages")
         else:
             if self.use_azure:
                 if json_mode:
@@ -645,7 +645,7 @@ def _create_chat_completion_inner_function(  # noqa: C901, PLR0912, PLR0915
             if self.chat_stream:
                 resp = ""
                 if self.cfg.log_llm_chat_content:
-                    logger.info(f"{LogColors.CYAN}Response:{LogColors.END}")
+                    logger.info(f"{LogColors.CYAN}Response:{LogColors.END}", tag="llm_messages")
                 
                 for chunk in response:
                     content = (
@@ -654,19 +654,19 @@ def _create_chat_completion_inner_function(  # noqa: C901, PLR0912, PLR0915
                         else ""
                     )
                     if self.cfg.log_llm_chat_content:
-                        logger.info(LogColors.CYAN + content + LogColors.END, raw=True)
+                        logger.info(LogColors.CYAN + content + LogColors.END, raw=True, tag="llm_messages")
                     resp += content
                     if len(chunk.choices) > 0 and chunk.choices[0].finish_reason is not None:
                         finish_reason = chunk.choices[0].finish_reason
                 
                 if self.cfg.log_llm_chat_content:
-                    logger.info("\n", raw=True)
+                    logger.info("\n", raw=True, tag="llm_messages")
                 
             else:
                 resp = response.choices[0].message.content
                 finish_reason = response.choices[0].finish_reason
                 if self.cfg.log_llm_chat_content:
-                    logger.info(f"{LogColors.CYAN}Response:{resp}{LogColors.END}")
+                    logger.info(f"{LogColors.CYAN}Response:{resp}{LogColors.END}", tag="llm_messages")
             if json_mode:
                 json.loads(resp)
         if self.dump_chat_cache:

From 1f8081f6e68e9d526f2eb3636490f086dee94f33 Mon Sep 17 00:00:00 2001
From: Young <afe.young@gmail.com>
Date: Tue, 16 Jul 2024 12:34:59 +0000
Subject: [PATCH 14/14] Capture lost content

---
 rdagent/oai/llm_utils.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/rdagent/oai/llm_utils.py b/rdagent/oai/llm_utils.py
index 4ad88c7b..35fb0fb5 100644
--- a/rdagent/oai/llm_utils.py
+++ b/rdagent/oai/llm_utils.py
@@ -562,6 +562,8 @@ def _create_chat_completion_inner_function(  # noqa: C901, PLR0912, PLR0915
         if self.use_chat_cache:
             cache_result = self.cache.chat_get(input_content_json)
             if cache_result is not None:
+                if self.cfg.log_llm_chat_content:
+                    logger.info(f"{LogColors.CYAN}Response:{cache_result}{LogColors.END}", tag="llm_messages")
                 return cache_result, None
 
         if temperature is None: