From 7ba390902e0734ae55ee8363d881664d13d10e29 Mon Sep 17 00:00:00 2001
From: Marvin Weigand <marvin@gottfrieds.de>
Date: Tue, 13 Aug 2024 09:48:47 +0200
Subject: [PATCH] feat: Containerized application and reinstated code execution
 functionality for Ollama usage

- Containerized the application as docker-compose stack
- Reinstated and refined code execution functionality for Ollama, with support for customizable hosts.

feat: Isolated network using Tinyproxy

- Implemented network isolation by configuring Tinyproxy to avoid malicious side effects with the running host.

feat: Added simple web interface for easy interaction with CLI application in the container

- Added a basic web interface using ttyd to facilitate interaction with the CLI application running within the container.

feat: Restricted file operations to mounted data directory

- Ensured all file operations are confined to the `data` directory to enhance security and data management.
---
 .env.example                                  |   5 +
 .gitignore                                    |   2 +
 claude-engineer/Dockerfile                    |  24 +++
 main.py => claude-engineer/main.py            |   3 +
 .../ollama-eng.py                             | 181 ++++++++++++++++--
 .../requirements.txt                          |   2 +
 docker-compose.yml                            |  41 ++++
 tinyproxy/Dockerfile                          |  19 ++
 tinyproxy/start.sh                            |  12 ++
 tinyproxy/tinyproxy.conf                      |  19 ++
 10 files changed, 290 insertions(+), 18 deletions(-)
 create mode 100644 .gitignore
 create mode 100644 claude-engineer/Dockerfile
 rename main.py => claude-engineer/main.py (99%)
 rename ollama-eng.py => claude-engineer/ollama-eng.py (87%)
 rename requirements.txt => claude-engineer/requirements.txt (82%)
 create mode 100644 docker-compose.yml
 create mode 100644 tinyproxy/Dockerfile
 create mode 100644 tinyproxy/start.sh
 create mode 100644 tinyproxy/tinyproxy.conf

diff --git a/.env.example b/.env.example
index 83052419..eb5641e1 100644
--- a/.env.example
+++ b/.env.example
@@ -1,2 +1,7 @@
 ANTHROPIC_API_KEY="YOUR API KEY"
 TAVILY_API_KEY="YOUR API KEY"
+OLLAMA_HOST="YOUR URL"
+OLLAMA_MAIN_MODEL="llama3.1:8b-instruct-q8_0"
+OLLAMA_TOOLCHECKER_MODEL="llama3.1:8b-instruct-q8_0"
+OLLAMA_CODE_MODEL="deepseek-coder-v2"
+OLLAMA_CTX_WINDOW=128000
\ No newline at end of file
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 00000000..6c5156be
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,2 @@
+data
+.env
diff --git a/claude-engineer/Dockerfile b/claude-engineer/Dockerfile
new file mode 100644
index 00000000..0309b41b
--- /dev/null
+++ b/claude-engineer/Dockerfile
@@ -0,0 +1,24 @@
+# Use an official Python runtime as a parent image
+FROM python:3.11-slim
+
+# Set the working directory inside the container
+WORKDIR /app
+
+# Install necessary dependencies for downloading and extracting
+RUN apt-get update && apt-get install -y curl unzip
+
+# Download the ttyd binary
+RUN curl -Lo /usr/local/bin/ttyd https://github.com/tsl0922/ttyd/releases/download/1.6.3/ttyd.x86_64 \
+    && chmod +x /usr/local/bin/ttyd
+
+# Copy the current directory contents into the container at /app
+COPY . /app
+
+# Install the dependencies specified in requirements.txt
+RUN pip install --no-cache-dir -r requirements.txt
+
+# Set environment variables
+ENV PYTHONUNBUFFERED=1
+
+# Define the command to run ttyd with the application
+CMD ["ttyd", "python", "ollama-eng.py"]
diff --git a/main.py b/claude-engineer/main.py
similarity index 99%
rename from main.py
rename to claude-engineer/main.py
index 1a2f460f..6dc41529 100644
--- a/main.py
+++ b/claude-engineer/main.py
@@ -34,6 +34,9 @@ async def get_user_input(prompt="You: "):
 from typing import Tuple, Optional
 
 
+# Change the current working directory to the specific directory
+os.chdir('data')
+
 def setup_virtual_environment() -> Tuple[str, str]:
     venv_name = "code_execution_env"
     venv_path = os.path.join(os.getcwd(), venv_name)
diff --git a/ollama-eng.py b/claude-engineer/ollama-eng.py
similarity index 87%
rename from ollama-eng.py
rename to claude-engineer/ollama-eng.py
index 71ff1040..51fef191 100644
--- a/ollama-eng.py
+++ b/claude-engineer/ollama-eng.py
@@ -1,4 +1,8 @@
 import os
+import venv
+import subprocess
+import sys
+import signal
 from dotenv import load_dotenv
 import json
 from tavily import TavilyClient
@@ -17,6 +21,9 @@
 import aiohttp
 from prompt_toolkit import PromptSession
 from prompt_toolkit.styles import Style
+from typing import Tuple, Optional
+
+os.chdir('data')
 
 async def get_user_input(prompt="You: "):
     style = Style.from_dict({
@@ -30,7 +37,7 @@ async def get_user_input(prompt="You: "):
 load_dotenv()
 
 # Initialize the Ollama client
-client = ollama.AsyncClient()
+client = ollama.AsyncClient(host=os.getenv("OLLAMA_HOST"))
 
 # Initialize the Tavily client
 tavily_api_key = os.getenv("TAVILY_API_KEY")
@@ -66,15 +73,15 @@ async def get_user_input(prompt="You: "):
 # Constants
 CONTINUATION_EXIT_PHRASE = "AUTOMODE_COMPLETE"
 MAX_CONTINUATION_ITERATIONS = 25
-MAX_CONTEXT_TOKENS = 200000  # Reduced to 200k tokens for context window
+MAX_CONTEXT_TOKENS = os.getenv("OLLAMA_CTX_WINDOW")
 
 # Models
 # Models that maintain context memory across interactions
-MAINMODEL = "mistral-nemo"  # Maintains conversation history and file contents
+MAINMODEL = os.getenv("OLLAMA_MAIN_MODEL")  # Maintains conversation history and file contents
 
 # Models that don't maintain context (memory is reset after each call)
-TOOLCHECKERMODEL = "mistral-nemo"
-CODEEDITORMODEL = "mistral-nemo"
+TOOLCHECKERMODEL = os.getenv("OLLAMA_TOOLCHECKER_MODEL")
+CODEEDITORMODEL = os.getenv("OLLAMA_CODE_MODEL")
 
 # System prompts
 BASE_SYSTEM_PROMPT = """
@@ -174,7 +181,95 @@ async def get_user_input(prompt="You: "):
 Remember: Focus on completing the established goals efficiently and effectively. Avoid unnecessary conversations or requests for additional tasks.
 """
 
+def setup_virtual_environment() -> Tuple[str, str]:
+    venv_name = "code_execution_env"
+    venv_path = os.path.join(os.getcwd(), venv_name)
+    try:
+        if not os.path.exists(venv_path):
+            venv.create(venv_path, with_pip=True)
+        
+        # Activate the virtual environment
+        if sys.platform == "win32":
+            activate_script = os.path.join(venv_path, "Scripts", "activate.bat")
+        else:
+            activate_script = os.path.join(venv_path, "bin", "activate")
+        
+        return venv_path, activate_script
+    except Exception as e:
+        logging.error(f"Error setting up virtual environment: {str(e)}")
+        raise
+    
+def restrict_to_data_directory(path: str, base_dir: str = "data") -> str:
+    # Remove first '/' if present to ensure that the path is relative
+    if path.startswith("/"):
+        path = path[1:]
+    # Resolve the full absolute path
+    base_dir = os.path.abspath(base_dir)
+    full_path = os.path.abspath(os.path.join(base_dir, path))
+    
+    # Ensure the path starts with the base_dir
+    if not full_path.startswith(base_dir):
+        raise ValueError(f"Operation not allowed outside the {base_dir} directory")
+    
+    return full_path
 
+async def execute_code(code, timeout=10):
+    global running_processes
+    venv_path, activate_script = setup_virtual_environment()
+    
+    # Generate a unique identifier for this process
+    process_id = f"process_{len(running_processes)}"
+    
+    # Write the code to a temporary file
+    with open(f"{process_id}.py", "w") as f:
+        f.write(code)
+    
+    # Prepare the command to run the code
+    if sys.platform == "win32":
+        command = f'"{activate_script}" && python {process_id}.py'
+    else:
+        command = f'source "{activate_script}" && python3 {process_id}.py'
+    
+    # Create a process to run the command
+    process = await asyncio.create_subprocess_shell(
+        command,
+        stdout=asyncio.subprocess.PIPE,
+        stderr=asyncio.subprocess.PIPE,
+        shell=True,
+        preexec_fn=None if sys.platform == "win32" else os.setsid
+    )
+    
+    # Store the process in our global dictionary
+    running_processes[process_id] = process
+    
+    try:
+        # Wait for initial output or timeout
+        stdout, stderr = await asyncio.wait_for(process.communicate(), timeout=timeout)
+        stdout = stdout.decode()
+        stderr = stderr.decode()
+        return_code = process.returncode
+    except asyncio.TimeoutError:
+        # If we timeout, it means the process is still running
+        stdout = "Process started and running in the background."
+        stderr = ""
+        return_code = "Running"
+    
+    execution_result = f"Process ID: {process_id}\n\nStdout:\n{stdout}\n\nStderr:\n{stderr}\n\nReturn Code: {return_code}"
+    return process_id, execution_result
+
+def stop_process(process_id):
+    global running_processes
+    if process_id in running_processes:
+        process = running_processes[process_id]
+        if sys.platform == "win32":
+            process.terminate()
+        else:
+            os.killpg(os.getpgid(process.pid), signal.SIGTERM)
+        del running_processes[process_id]
+        return f"Process {process_id} has been stopped."
+    else:
+        return f"No running process found with ID {process_id}."
+    
 def update_system_prompt(current_iteration: Optional[int] = None, max_iterations: Optional[int] = None) -> str:
     global file_contents
     chain_of_thought_prompt = """
@@ -197,18 +292,24 @@ def update_system_prompt(current_iteration: Optional[int] = None, max_iterations
 
 def create_folder(path):
     try:
-        os.makedirs(path, exist_ok=True)
-        return f"Folder created: {path}"
+        # Ensure path is within the data directory
+        safe_path = restrict_to_data_directory(path)
+        os.makedirs(safe_path, exist_ok=True)
+        return f"Folder created: {safe_path}"
     except Exception as e:
         return f"Error creating folder: {str(e)}"
 
 def create_file(path, content=""):
     global file_contents
     try:
-        with open(path, 'w') as f:
+        # Ensure path is within the data directory
+        safe_path = restrict_to_data_directory(path)
+        # Create folders if they don't exist
+        os.makedirs(os.path.dirname(safe_path), exist_ok=True)
+        with open(safe_path, 'w') as f:
             f.write(content)
-        file_contents[path] = content
-        return f"File created and added to system prompt: {path}"
+        file_contents[safe_path] = content
+        return f"File created and added to system prompt: {safe_path}"
     except Exception as e:
         return f"Error creating file: {str(e)}"
 
@@ -475,10 +576,12 @@ def generate_diff(original, new, path):
 def read_file(path):
     global file_contents
     try:
-        with open(path, 'r') as f:
+        # Ensure path is within the data directory
+        safe_path = restrict_to_data_directory(path)
+        with open(safe_path, 'r') as f:
             content = f.read()
-        file_contents[path] = content
-        return f"File '{path}' has been read and stored in the system prompt."
+        file_contents[safe_path] = content
+        return f"File '{safe_path}' has been read and stored in the system prompt."
     except Exception as e:
         return f"Error reading file: {str(e)}"
 
@@ -486,18 +589,21 @@ def read_multiple_files(paths):
     global file_contents
     results = []
     for path in paths:
+        safe_path = restrict_to_data_directory(path)
         try:
-            with open(path, 'r') as f:
+            with open(safe_path, 'r') as f:
                 content = f.read()
-            file_contents[path] = content
-            results.append(f"File '{path}' has been read and stored in the system prompt.")
+            file_contents[safe_path] = content
+            results.append(f"File '{safe_path}' has been read and stored in the system prompt.")
         except Exception as e:
-            results.append(f"Error reading file '{path}': {str(e)}")
+            results.append(f"Error reading file '{safe_path}': {str(e)}")
     return "\n".join(results)
 
 def list_files(path="."):
     try:
-        files = os.listdir(path)
+        # Ensure path is within the data directory
+        safe_path = restrict_to_data_directory(path)
+        files = os.listdir(safe_path)
         return "\n".join(files)
     except Exception as e:
         return f"Error listing files: {str(e)}"
@@ -626,6 +732,40 @@ def tavily_search(query):
             }
         }
     },
+    {
+        "type": "function",
+        "function": {
+            "name": "execute_code",
+            "description": "Execute Python code in the 'code_execution_env' virtual environment and return the output",
+            "parameters": {
+                "type": "object",
+                "properties": {
+                    "code": {
+                        "type": "string",
+                        "description": "The Python code to execute"
+                    }
+                },
+                "required": ["code"]
+            }
+        }
+    },
+    {
+        "type": "function",
+        "function": {
+            "name": "stop_process",
+            "description": "Stop a running process by its ID",
+            "parameters": {
+                "type": "object",
+                "properties": {
+                    "process_id": {
+                        "type": "string",
+                        "description": "The ID of the process to stop"
+                    }
+                },
+                "required": ["process_id"]
+            }
+        }
+    },    
     {
         "type": "function",
         "function": {
@@ -687,6 +827,11 @@ async def execute_tool(tool_call: Dict[str, Any]) -> Dict[str, Any]:
             result = read_multiple_files(tool_input["paths"])
         elif tool_name == "list_files":
             result = list_files(tool_input.get("path", "."))
+        elif tool_name == "execute_code":
+            process_id, execution_result = await execute_code(tool_input["code"])
+            result = f"{execution_result}\n\nNote: Use 'stop_process' tool if you need to terminate a running process."
+        elif tool_name == "stop_process":
+            result = stop_process(tool_input["process_id"])
         elif tool_name == "tavily_search":
             result = tavily_search(tool_input["query"])
         else:
diff --git a/requirements.txt b/claude-engineer/requirements.txt
similarity index 82%
rename from requirements.txt
rename to claude-engineer/requirements.txt
index 45e7f3fb..38cf47df 100644
--- a/requirements.txt
+++ b/claude-engineer/requirements.txt
@@ -5,3 +5,5 @@ Pillow
 rich
 aiohttp
 prompt_toolkit
+pymongo
+ollama
diff --git a/docker-compose.yml b/docker-compose.yml
new file mode 100644
index 00000000..03ab1d04
--- /dev/null
+++ b/docker-compose.yml
@@ -0,0 +1,41 @@
+version: '3.8'
+
+services:
+  tinyproxy:
+    build: ./tinyproxy
+    networks:
+      - proxy_network
+    environment:
+      - ALLOW=host.docker.internal
+      - OLLAMA_HOST
+
+  claude-engineer:
+    build: ./claude-engineer
+    volumes:
+      - ./data:/app/data
+    networks:
+      - isolated_network
+      - proxy_network
+    ports:
+      - "8080:7681"  # Expose ttyd on port 8080 (or any other port you choose)
+    depends_on:
+      - tinyproxy
+    environment:
+      - HTTP_PROXY=http://tinyproxy:8888
+      - HTTPS_PROXY=http://tinyproxy:8888
+      - OLLAMA_MAIN_MODEL
+      - OLLAMA_TOOLCHECKER_MODEL
+      - OLLAMA_CODE_MODEL
+      - OLLAMA_CTX_WINDOW
+      - ANTHROPIC_API_KEY
+      - TAVILY_API_KEY
+      - OLLAMA_HOST
+    tty: true
+    stdin_open: true
+
+networks:
+  isolated_network:
+    driver: bridge
+
+  proxy_network:
+    driver: bridge
diff --git a/tinyproxy/Dockerfile b/tinyproxy/Dockerfile
new file mode 100644
index 00000000..53fa4f87
--- /dev/null
+++ b/tinyproxy/Dockerfile
@@ -0,0 +1,19 @@
+# Dockerfile
+
+FROM alpine:3.13
+
+RUN apk add --no-cache tinyproxy
+
+# Copy the tinyproxy configuration file
+COPY tinyproxy.conf /etc/tinyproxy/tinyproxy.conf
+
+# Copy the start script to the container
+COPY start.sh /usr/local/bin/start.sh
+
+# Make the start script executable
+RUN chmod +x /usr/local/bin/start.sh
+
+EXPOSE 8888
+
+# Run the start script when the container starts
+CMD ["/usr/local/bin/start.sh"]
diff --git a/tinyproxy/start.sh b/tinyproxy/start.sh
new file mode 100644
index 00000000..dadd4ee7
--- /dev/null
+++ b/tinyproxy/start.sh
@@ -0,0 +1,12 @@
+#!/bin/sh
+
+# Extract the IP address from the OLLAMA_HOST environment variable
+OLLAMA_HOST_IP=$(echo $OLLAMA_HOST | sed 's|http://||' | cut -d ':' -f 1)
+
+# Replace the placeholder in the filter file
+echo "^${OLLAMA_HOST_IP}$" > /etc/tinyproxy/filter
+echo "api.anthropic.com" >> /etc/tinyproxy/filter
+echo "api.tavily.com" >> /etc/tinyproxy/filter
+
+# Start tinyproxy
+exec tinyproxy -d
\ No newline at end of file
diff --git a/tinyproxy/tinyproxy.conf b/tinyproxy/tinyproxy.conf
new file mode 100644
index 00000000..35541f13
--- /dev/null
+++ b/tinyproxy/tinyproxy.conf
@@ -0,0 +1,19 @@
+# tinyproxy.conf
+
+Port 8888
+
+Listen 0.0.0.0
+
+MaxClients 100
+StartServers 5
+
+# Allow Docker internal network
+Allow 172.16.0.0/12
+Allow 127.0.0.1
+Allow host.docker.internal
+
+# Use a filter to restrict access
+Filter "/etc/tinyproxy/filter"
+
+# Default deny all other URLs
+FilterDefaultDeny Yes