fix: done adapting

TEN-framework · Jan 7, 2025 · aea809d · aea809d
1 parent 10e3338
commit aea809d
Show file tree

Hide file tree

Showing 8 changed files with 59 additions and 43 deletions.
diff --git a/agents/examples/default/property.json b/agents/examples/default/property.json
@@ -698,7 +698,7 @@
               "max_memory_length": 10,
               "max_tokens": 512,
               "model": "${env:OPENAI_MODEL}",
-              "prompt": "You are an ai agent bot producing child picture books. Each response should be short and no more than 50 words as it's for child. \nFor each response, you will use the 'image_generate' tool to create an image based on the description or key moment in that part of the story.  The story should be set in a fantasy world. Try asking questions relevant to the story to decide how the story should proceed. Each response should include rich, vivid descriptions that will guide the 'image_generate' tool to produce an image that aligns with the scene or mood.\n Whether it’s the setting, a character’s expression, or a dramatic moment, the paragraph should give enough detail for a meaningful visual representation.",
+              "prompt": "You are an ai agent bot producing child picture books. Each response should be short and no more than 50 words as it's for child. \nFor every response relevant to the story-telling, you will use the 'image_generate' tool to create an image based on the description or key moment in that part of the story. \n The story should be set in a fantasy world. Try asking questions relevant to the story to decide how the story should proceed. Every response should include rich, vivid descriptions that will guide the 'image_generate' tool to produce an image that aligns with the scene or mood.\n Whether it’s the setting, a character’s expression, or a dramatic moment, the paragraph should give enough detail for a meaningful visual representation.",
               "proxy_url": "${env:OPENAI_PROXY_URL}"
             }
           },
@@ -845,14 +845,6 @@
                     "extension": "message_collector"
                   }
                 ]
-              },
-              {
-                "name": "raw_text_data",
-                "dest": [
-                  {
-                    "extension": "message_collector2"
-                  }
-                ]
               }
             ]
           },
@@ -952,6 +944,16 @@
                   }
                 ]
               }
+            ],
+            "data": [
+              {
+                "name": "raw_text_data",
+                "dest": [
+                  {
+                    "extension": "message_collector2"
+                  }
+                ]
+              }
             ]
           }
         ]

diff --git a/agents/ten_packages/extension/message_collector/src/extension.py b/agents/ten_packages/extension/message_collector/src/extension.py
@@ -32,8 +32,6 @@
 TEXT_DATA_STREAM_ID_FIELD = "stream_id"
 TEXT_DATA_END_OF_SEGMENT_FIELD = "end_of_segment"
 
-# record the cached text data for each stream id
-cached_text_map = {}
 MAX_CHUNK_SIZE_BYTES = 1024
 
 
@@ -104,6 +102,7 @@ def __init__(self, name: str):
         super().__init__(name)
         self.queue = asyncio.Queue()
         self.loop = None
+        self.cached_text_map = {}
 
     def on_init(self, ten_env: TenEnv) -> None:
         ten_env.log_info("on_init")
@@ -191,15 +190,15 @@ def on_data(self, ten_env: TenEnv, data: Data) -> None:
         # We cache all final text data and append the non-final text data to the cached data
         # until the end of the segment.
         if end_of_segment:
-            if stream_id in cached_text_map:
-                text = cached_text_map[stream_id] + text
-                del cached_text_map[stream_id]
+            if stream_id in self.cached_text_map:
+                text = self.cached_text_map[stream_id] + text
+                del self.cached_text_map[stream_id]
         else:
             if final:
-                if stream_id in cached_text_map:
-                    text = cached_text_map[stream_id] + text
+                if stream_id in self.cached_text_map:
+                    text = self.cached_text_map[stream_id] + text
 
-                cached_text_map[stream_id] = text
+                self.cached_text_map[stream_id] = text
 
         # Generate a unique message ID for this batch of parts
         message_id = str(uuid.uuid4())[:8]

diff --git a/agents/ten_packages/extension/openai_chatgpt_python/extension.py b/agents/ten_packages/extension/openai_chatgpt_python/extension.py
@@ -378,5 +378,8 @@ def message_to_dict(self, message: LLMChatCompletionMessageParam):
 
     def _append_memory(self, message: str):
         if len(self.memory) > self.config.max_memory_length:
-            self.memory.pop(0)
+            removed_item = self.memory.pop(0)
+            # Remove tool calls from memory
+            if removed_item.get("tool_calls") and self.memory[0].get("role") == "tool":
+                self.memory.pop(0)
         self.memory.append(message)
diff --git a/agents/ten_packages/extension/openai_chatgpt_python/manifest.json b/agents/ten_packages/extension/openai_chatgpt_python/manifest.json
@@ -85,14 +85,6 @@
             "type": "string"
           }
         }
-      },
-      {
-        "name": "raw_text_data",
-        "property": {
-          "text": {
-            "type": "string"
-          }
-        }
       }
     ],
     "cmd_in": [

diff --git a/agents/ten_packages/extension/openai_image_generate_tool/extension.py b/agents/ten_packages/extension/openai_image_generate_tool/extension.py
@@ -3,14 +3,17 @@
 # Licensed under the Apache License, Version 2.0.
 # See the LICENSE file for more information.
 #
+import asyncio
 import json
 from ten import (
+    Data,
     TenEnv,
     AsyncTenEnv,
 )
 from ten_ai_base import (
     AsyncLLMToolBaseExtension, LLMToolMetadata, LLMToolResult
 )
+from ten_ai_base.const import DATA_OUT_PROPERTY_END_OF_SEGMENT, DATA_OUT_PROPERTY_TEXT, RAW_DATA_OUT_NAME
 from ten_ai_base.types import LLMChatCompletionContentPartImageParam, LLMToolMetadataParameter, LLMToolResultNormal
 from .openai import OpenAIImageGenerateClient, OpenAIImageGenerateToolConfig
 
@@ -52,6 +55,27 @@ def get_tool_metadata(self, ten_env: TenEnv) -> list[LLMToolMetadata]:
             )
         ]
 
+    async def send_image(self, async_ten_env: AsyncTenEnv, image_url: str) -> None:
+        # Implement this method to send the image to the chat.
+        async_ten_env.log_info(f"Sending image: {image_url}")
+        try:
+            sentence = json.dumps({"data":{"image_url": image_url}, "type": "image_url"})
+            output_data = Data.create(RAW_DATA_OUT_NAME)
+            output_data.set_property_string(
+                DATA_OUT_PROPERTY_TEXT,
+                sentence
+            )
+            output_data.set_property_bool(
+                DATA_OUT_PROPERTY_END_OF_SEGMENT, True
+            )
+            asyncio.create_task(async_ten_env.send_data(output_data))
+            async_ten_env.log_info(
+                f"sent sentence [{sentence}]"
+            )
+        except Exception as err:
+            async_ten_env.log_warn(f"send sentence [{sentence}] failed, err: {err}")
+
+
     async def run_tool(self, ten_env: AsyncTenEnv, name: str, args: dict) -> LLMToolResult | None:
         ten_env.log_info(f"run_tool {name} {args}")
         if name == "image_generate":
@@ -62,8 +86,9 @@ async def run_tool(self, ten_env: AsyncTenEnv, name: str, args: dict) -> LLMTool
                 # call OpenAIImageGenerateClient to generate images
                 response_url = await self.client.generate_images(prompt)
                 ten_env.log_info(f"Generated image: {response_url}")
+                await self.send_image(ten_env, response_url)
                 result = LLMToolResultNormal(
                     type="normal",
-                    content={"data":{"image_url": response_url}, "type": "image_url"},
+                    content=json.dumps({"success": True}),
                 )
                 return result
diff --git a/agents/ten_packages/extension/openai_image_generate_tool/manifest.json b/agents/ten_packages/extension/openai_image_generate_tool/manifest.json
@@ -102,6 +102,16 @@
           }
         }
       }
+    ],
+    "data_out": [
+      {
+        "name": "raw_text_data",
+        "property": {
+          "text": {
+            "type": "string"
+          }
+        }
+      }
     ]
   }
 }
diff --git a/agents/ten_packages/system/ten_ai_base/interface/ten_ai_base/llm.py b/agents/ten_packages/system/ten_ai_base/interface/ten_ai_base/llm.py
@@ -117,22 +117,6 @@ async def flush_input_items(self, async_ten_env: AsyncTenEnv):
             async_ten_env.log_info("Cancelling the current task during flush.")
             self.current_task.cancel()
 
-    def send_raw_text_output(
-        self, async_ten_env: AsyncTenEnv, sentence: str, end_of_segment: bool
-    ):
-        try:
-            output_data = Data.create(RAW_DATA_OUT_NAME)
-            output_data.set_property_string(DATA_OUT_PROPERTY_TEXT, sentence)
-            output_data.set_property_bool(
-                DATA_OUT_PROPERTY_END_OF_SEGMENT, end_of_segment
-            )
-            asyncio.create_task(async_ten_env.send_data(output_data))
-            async_ten_env.log_info(
-                f"{'end of segment ' if end_of_segment else ''}sent raw sentence [{sentence}]"
-            )
-        except Exception as err:
-            async_ten_env.log_warn(f"send sentence [{sentence}] failed, err: {err}")
-
     def send_text_output(
         self, async_ten_env: AsyncTenEnv, sentence: str, end_of_segment: bool
     ):

diff --git a/playground/src/manager/rtc/rtc.ts b/playground/src/manager/rtc/rtc.ts
@@ -233,6 +233,7 @@ export class RtcManager extends AGEventEmitter<RtcEvents> {
         const { stream_id, is_final, text, text_ts, data_type } = JSON.parse(
           atob(completeMessage)
         );
+        console.log(`[test] message_id: ${message_id} stream_id: ${stream_id}, text: ${text}, data_type: ${data_type}`);
         const isAgent = Number(stream_id) != Number(this.userId)
         let textItem: IChatItem = {
           type: isAgent ? EMessageType.AGENT : EMessageType.USER,