From 2bb514cd782715c2d072612e986ac0243c6bddbe Mon Sep 17 00:00:00 2001
From: Dzmitry Hramyka <grom.dima.grom@gmail.com>
Date: Sun, 12 Jan 2025 23:45:55 +0100
Subject: [PATCH] wip

---
 src/agent.py                                  | 48 +++++++------
 src/core/config.py                            |  6 ++
 src/core/defs.py                              | 18 ++++-
 src/execution/__init__.py                     |  3 +
 src/execution/execution.py                    | 38 +++++++++++
 src/feedback/__init__.py                      |  3 +
 .../{feedback_module.py => feedback.py}       |  0
 src/memory/__init__.py                        |  3 +
 src/memory/{memory_module.py => memory.py}    |  0
 src/planning/__init__.py                      |  3 +
 src/planning/planning.py                      | 67 +++++++++++++++++++
 src/workflows/analyze_signal.py               |  2 +-
 tests/feedback/test_feedback_module.py        |  2 +-
 13 files changed, 168 insertions(+), 25 deletions(-)
 create mode 100644 src/execution/__init__.py
 create mode 100644 src/execution/execution.py
 rename src/feedback/{feedback_module.py => feedback.py} (100%)
 rename src/memory/{memory_module.py => memory.py} (100%)
 create mode 100644 src/planning/planning.py

diff --git a/src/agent.py b/src/agent.py
index 90a84ad..838c797 100644
--- a/src/agent.py
+++ b/src/agent.py
@@ -5,9 +5,10 @@
 
 from src.core.config import settings
 from src.core.defs import AgentAction, AgentState
-from src.feedback.feedback_module import FeedbackModule
-from src.memory.memory_module import get_memory_module
-from src.planning.planning_module import PlanningModule
+from src.execution import ExecutionModule
+from src.feedback import FeedbackModule
+from src.memory import get_memory_module
+from src.planning import PlanningModule
 from src.workflows.analyze_signal import analyze_signal
 from src.workflows.research_news import analyze_news_workflow
 
@@ -35,10 +36,10 @@ def __init__(self):
         self.memory_module = get_memory_module()
 
         #: Initialize Planning Module with persistent Q-table
-        self.planning_module = PlanningModule(
-            actions=list(AgentAction),
-            q_table_path=settings.PERSISTENT_Q_TABLE_PATH,  # Persistent Q-table file
-        )
+        self.planning_module = PlanningModule()
+
+        #: Initialize Execution Module
+        self.execution_module = ExecutionModule()
 
         #: Initialize Feedback Module
         self.feedback_module = FeedbackModule()
@@ -73,10 +74,6 @@ def _update_planning_policy(
         """Update the Q-learning table in the PlanningModule."""
         self.planning_module.update_q_table(state, action, reward, next_state)
 
-    def _collect_feedback(self, action: str, outcome: Optional[Any]) -> float:
-        """Collect feedback for the action & outcome in the FeedbackModule."""
-        return self.feedback_module.collect_feedback(action, outcome)
-
     # --------------------------------------------------------------
     # RL-based PLANNING & EXECUTION
     # --------------------------------------------------------------
@@ -130,23 +127,30 @@ async def start_runtime_loop(self) -> None:
                 # 1. Choose an action
                 #    You might treat the entire system as one "state", or define states.
                 logger.info(f"Current state: {self.state.value}")
-                action_name = self.planning_module.get_action(self.state)
-                logger.info(f"Action chosen: {action_name.value}")
+                next_action = await self.planning_module.get_next_action(self.state)
+                logger.info(f"Action chosen: {next_action.value}")
 
                 # 2. Perform that action
-                outcome = await self._perform_planned_action(action_name)
-                logger.info(f"Outcome: {outcome}")
+                result = await self.execution_module.execute_action(next_action)
+                logger.info(f"Outcome: {result}")
 
                 # 3. Collect feedback
-                reward = self._collect_feedback(action_name.value, outcome)
+                reward = self.feedback_module.collect_feedback(
+                    next_action.value, result.get("outcome")
+                )
                 logger.info(f"Reward: {reward}")
 
-                # 4. Update the planning policy
-                next_state = self.state
-                logger.info(f"Next state: {next_state.value}")
-                self._update_planning_policy(self.state, action_name, reward, next_state)
-
-                # 4. Sleep or yield
+                # 4. Update state and memory
+                self.state = AgentState.from_action(next_action)
+                # self.memory_module.store(
+                #     {
+                #         "state": self.state.value,
+                #         "action": next_action.value,
+                #         "outcome": result.get("outcome", "Unknown"),
+                #     }
+                # )
+
+                # 5. Sleep or yield
                 logger.info("Let's rest a bit...")
                 await asyncio.sleep(settings.AGENT_REST_TIME)
 
diff --git a/src/core/config.py b/src/core/config.py
index 44ba532..f8af68f 100644
--- a/src/core/config.py
+++ b/src/core/config.py
@@ -27,6 +27,12 @@ class Settings(BaseSettings):
 
     # --- Planning settings ---
 
+    #: Planning API URL
+    PLANNING_API_URL: str = "http://localhost:11434/api/generate"
+
+    #: Planning model
+    PLANNING_MODEL: str = "llama3.2"
+
     #: Path to the persistent Q-table file
     PERSISTENT_Q_TABLE_PATH: str = "persistent_q_table.json"
 
diff --git a/src/core/defs.py b/src/core/defs.py
index c76f324..1bfe7a2 100644
--- a/src/core/defs.py
+++ b/src/core/defs.py
@@ -24,10 +24,26 @@ class AgentState(Enum):
 
     DEFAULT = "default"
     IDLE = "idle"
-    WAITING_FOR_NEWS = "waiting_for_news"
     JUST_ANALYZED_NEWS = "just_analyzed_news"
     JUST_ANALYZED_SIGNAL = "just_analyzed_signal"
 
+    @classmethod
+    def from_action(cls, action: AgentAction) -> "AgentState":
+        """Convert an AgentAction to the corresponding AgentState.
+
+        Args:
+            action: The AgentAction to convert
+
+        Returns:
+            The corresponding AgentState
+        """
+        action_to_state = {
+            AgentAction.IDLE: cls.IDLE,
+            AgentAction.ANALYZE_NEWS: cls.JUST_ANALYZED_NEWS,
+            AgentAction.CHECK_SIGNAL: cls.JUST_ANALYZED_SIGNAL,
+        }
+        return action_to_state.get(action, cls.DEFAULT)
+
 
 class MemoryBackendType(str, Enum):
     """Available memory backend types."""
diff --git a/src/execution/__init__.py b/src/execution/__init__.py
new file mode 100644
index 0000000..3bcb73a
--- /dev/null
+++ b/src/execution/__init__.py
@@ -0,0 +1,3 @@
+from src.execution.execution import ExecutionModule
+
+__all__ = ["ExecutionModule"]
diff --git a/src/execution/execution.py b/src/execution/execution.py
new file mode 100644
index 0000000..a04636b
--- /dev/null
+++ b/src/execution/execution.py
@@ -0,0 +1,38 @@
+from loguru import logger
+
+from src.core.defs import AgentAction
+from src.workflows.analyze_signal import analyze_signal
+from src.workflows.research_news import analyze_news_workflow
+
+
+class ExecutionModule:
+    """Module to execute agent actions."""
+
+    async def execute_action(self, action: AgentAction) -> dict:
+        """
+        Execute the given action and return the result.
+
+        Args:
+            action (AgentAction): The action to execute.
+            context (dict): Context to pass to the action.
+
+        Returns:
+            dict: Outcome and status of the action.
+        """
+        try:
+            if action == AgentAction.CHECK_SIGNAL:
+                result = await analyze_signal()
+                return {"status": "success", "outcome": result or "No signal"}
+
+            elif action == AgentAction.ANALYZE_NEWS:
+                recent_news = "Placeholder"
+                result = await analyze_news_workflow(recent_news)
+                return {"status": "success", "outcome": result or "No outcome"}
+
+            elif action == AgentAction.IDLE:
+                logger.info("Agent is idling.")
+                return {"status": "success", "outcome": "Idling"}
+
+        except Exception as e:
+            logger.error(f"Error executing action {action}: {e}")
+            return {"status": "error", "outcome": str(e)}
diff --git a/src/feedback/__init__.py b/src/feedback/__init__.py
index e69de29..ef24fe1 100644
--- a/src/feedback/__init__.py
+++ b/src/feedback/__init__.py
@@ -0,0 +1,3 @@
+from src.feedback.feedback import FeedbackModule
+
+__all__ = ["FeedbackModule"]
diff --git a/src/feedback/feedback_module.py b/src/feedback/feedback.py
similarity index 100%
rename from src/feedback/feedback_module.py
rename to src/feedback/feedback.py
diff --git a/src/memory/__init__.py b/src/memory/__init__.py
index e69de29..8344761 100644
--- a/src/memory/__init__.py
+++ b/src/memory/__init__.py
@@ -0,0 +1,3 @@
+from src.memory.memory import MemoryModule, get_memory_module
+
+__all__ = ["MemoryModule", "get_memory_module"]
diff --git a/src/memory/memory_module.py b/src/memory/memory.py
similarity index 100%
rename from src/memory/memory_module.py
rename to src/memory/memory.py
diff --git a/src/planning/__init__.py b/src/planning/__init__.py
index e69de29..4aab5e1 100644
--- a/src/planning/__init__.py
+++ b/src/planning/__init__.py
@@ -0,0 +1,3 @@
+from src.planning.planning import PlanningModule
+
+__all__ = ["PlanningModule"]
diff --git a/src/planning/planning.py b/src/planning/planning.py
new file mode 100644
index 0000000..c565024
--- /dev/null
+++ b/src/planning/planning.py
@@ -0,0 +1,67 @@
+import json
+
+import httpx
+from loguru import logger
+
+from src.core.config import settings
+from src.core.defs import AgentAction, AgentState
+
+
+class PlanningModule:
+    """Centralized Planning Module using LLM for action selection."""
+
+    def __init__(
+        self, api_url: str = settings.PLANNING_API_URL, model: str = settings.PLANNING_MODEL
+    ):
+        """
+        Initialize the Planning Module.
+
+        Args:
+            api_url (str): The URL for the LLM API.
+            model (str): The model name (e.g., 'llama3.2').
+        """
+        self.api_url = api_url
+        self.model = model
+
+    async def get_next_action(self, state: AgentState) -> AgentAction:
+        """
+        Call the LLM to decide the next action.
+
+        Args:
+            context (dict): Context including state, previous actions, and outcomes.
+
+        Returns:
+            AgentAction: The next action to perform.
+        """
+        prompt = (
+            "You are a helpful assistant, who only decides the next action to perform. "
+            f"Following states are possible: {AgentState.__members__}. "
+            f"You have the following possible actions: {AgentAction.__members__}. "
+            "You need to decide the next action to perform. "
+            "Return only the name of the action to perform and nothing else!"
+            f"Current state of the agent: {state.value}. "
+        )
+        try:
+            payload = {
+                "model": self.model,
+                "prompt": prompt,
+                "stream": False,
+            }
+            async with httpx.AsyncClient() as client:
+                logger.debug(f"Sending payload: {payload}")
+                response = await client.post(self.api_url, json=payload)
+                logger.debug(f"Received response: {response}")
+                response.raise_for_status()
+
+            data = response.json()
+            logger.debug(f"LLM response: {data}")
+            next_action = data.get("response")
+            if not next_action or next_action not in AgentAction.__members__:
+                logger.warning(f"Invalid action returned by LLM: {next_action}")
+                return AgentAction.IDLE
+
+            logger.info(f"LLM suggested action: {next_action}")
+            return AgentAction[next_action]
+        except Exception as e:
+            logger.error(f"Failed to get next action from LLM: {e}")
+            return AgentAction.IDLE
diff --git a/src/workflows/analyze_signal.py b/src/workflows/analyze_signal.py
index ffcfc94..2368825 100644
--- a/src/workflows/analyze_signal.py
+++ b/src/workflows/analyze_signal.py
@@ -3,7 +3,7 @@
 from loguru import logger
 
 from src.llm.llm import LLM
-from src.memory.memory_module import MemoryModule, get_memory_module
+from src.memory import MemoryModule, get_memory_module
 from src.tools.get_signal import fetch_signal
 from src.tools.twitter import post_twitter_thread
 
diff --git a/tests/feedback/test_feedback_module.py b/tests/feedback/test_feedback_module.py
index 57b872b..3153a7a 100644
--- a/tests/feedback/test_feedback_module.py
+++ b/tests/feedback/test_feedback_module.py
@@ -1,6 +1,6 @@
 import pytest
 
-from src.feedback.feedback_module import FeedbackModule
+from src.feedback import FeedbackModule
 
 
 @pytest.fixture