ipa-lab · andreashappe · Jun 19, 2024 · May 10, 2024 · andreashappe · Jun 19, 2024
@@ -32,7 +32,8 @@ dependencies = [
 	'instructor == 1.2.2',
 	'PyYAML == 6.0.1',
 	'python-dotenv == 1.0.1',
-	'pypsexec == 0.3.0'
+	'pypsexec == 0.3.0',
+	'openai == 1.28.0',
 ]
 
 [project.urls]

@@ -1,7 +1,10 @@
 import abc
 import inspect
-from typing import Union, Type, Dict, Callable, Any
+from typing import Union, Type, Dict, Callable, Any, Iterable
 
+import openai
+from openai.types.chat import ChatCompletionToolParam
+from openai.types.chat.completion_create_params import Function
 from pydantic import create_model, BaseModel
 
 
@@ -46,7 +49,7 @@ def to_model(self) -> BaseModel:
         the `__call__` method can then be accessed by calling the `execute` method of the model.
         """
         sig = inspect.signature(self.__call__)
-        fields = {param: (param_info.annotation, ...) for param, param_info in sig.parameters.items()}
+        fields = {param: (param_info.annotation, param_info.default if param_info.default is not inspect._empty else ...) for param, param_info in sig.parameters.items()}
         model_type = create_model(self.__class__.__name__, __doc__=self.describe(), **fields)
 
         def execute(model):
@@ -170,3 +173,26 @@ def default_capability_parser(text: str) -> SimpleTextHandlerResult:
         resolved_parser = default_capability_parser
 
     return capability_descriptions, resolved_parser
+
+
+def capabilities_to_functions(capabilities: Dict[str, Capability]) -> Iterable[openai.types.chat.completion_create_params.Function]:
+    """
+    This function takes a dictionary of capabilities and returns a dictionary of functions, that can be called with the
+    parameters of the respective capabilities.
+    """
+    return [
+        Function(name=name, description=capability.describe(), parameters=capability.to_model().model_json_schema())
+        for name, capability in capabilities.items()
+    ]
+
+
+def capabilities_to_tools(capabilities: Dict[str, Capability]) -> Iterable[openai.types.chat.completion_create_params.ChatCompletionToolParam]:
+    """
+    This function takes a dictionary of capabilities and returns a dictionary of functions, that can be called with the
+    parameters of the respective capabilities.
+    """
+    return [
+        ChatCompletionToolParam(type="function", function=Function(name=name, description=capability.describe(), parameters=capability.to_model().model_json_schema()))
+        for name, capability in capabilities.items()
+    ]
+
@@ -5,6 +5,7 @@
 
 from . import Capability
 
+
 @dataclass
 class HTTPRequest(Capability):
     host: str
@@ -18,7 +19,17 @@ def __post_init__(self):
             self._client = requests
 
     def describe(self) -> str:
-        return f"Sends a request to the host {self.host} and returns the response."
+        description = (f"Sends a request to the host {self.host} using the python requests library and returns the response. The schema and host are fixed and do not need to be provided.\n"
+                       f"Make sure that you send a Content-Type header if you are sending a body.")
+        if self.use_cookie_jar:
+            description += "\nThe cookie jar is used for storing cookies between requests."
+        else:
+            description += "\nCookies are not automatically stored, and need to be provided as header manually every time."
+        if self.follow_redirects:
+            description += "\nRedirects are followed."
+        else:
+            description += "\nRedirects are not followed."
+        return description
 
     def __call__(self,
                  method: Literal["GET", "HEAD", "POST", "PUT", "DELETE", "OPTION", "PATCH"],
@@ -31,18 +42,18 @@ def __call__(self,
         if body is not None and body_is_base64:
             body = base64.b64decode(body).decode()
 
-        resp = self._client.request(
-            method,
-            self.host + path,
-            params=query,
-            data=body,
-            headers=headers,
-            allow_redirects=self.follow_redirects,
-        )
         try:
-            resp.raise_for_status()
-        except requests.exceptions.HTTPError as e:
-            return str(e)
+            resp = self._client.request(
+                method,
+                self.host + path,
+                params=query,
+                data=body,
+                headers=headers,
+                allow_redirects=self.follow_redirects,
+            )
+        except requests.exceptions.RequestException as e:
+            url = self.host + ("" if path.startswith("/") else "/") + path + ("?{query}" if query else "")
+            return f"Could not request '{url}': {e}"
 
         headers = "\r\n".join(f"{k}: {v}" for k, v in resp.headers.items())
 

@@ -1 +1,2 @@
 from .simple import MinimalWebTesting
+from .with_explanation import WebTestingWithExplanation
@@ -0,0 +1,80 @@
+import time
+from dataclasses import dataclass, field
+from typing import List, Any, Union, Dict
+
+from openai.types.chat import ChatCompletionMessageParam, ChatCompletionMessage
+from rich.panel import Panel
+
+from hackingBuddyGPT.capabilities import Capability
+from hackingBuddyGPT.capabilities.http_request import HTTPRequest
+from hackingBuddyGPT.capabilities.submit_flag import SubmitFlag
+from hackingBuddyGPT.utils import LLMResult, tool_message
+from hackingBuddyGPT.usecases import use_case
+from hackingBuddyGPT.usecases.common_patterns import RoundBasedUseCase
+from hackingBuddyGPT.utils.configurable import parameter
+from hackingBuddyGPT.utils.openai.openai_lib import OpenAILib
+
+
+Prompt = List[Union[ChatCompletionMessage, ChatCompletionMessageParam]]
+Context = Any
+
+
+@use_case("web_test_with_explanation", "Minimal implementation of a web testing use case while allowing the llm to 'talk'")
+@dataclass
+class WebTestingWithExplanation(RoundBasedUseCase):
+    llm: OpenAILib
+    host: str = parameter(desc="The host to test", default="http://localhost")
+    flag_format_description: str = parameter(desc="Description of the flag provided to the LLM", default="a string starting with 'FLAG.' and ending with '.GALF'")
+    flag_template: str = parameter(desc="The template of the flags, whereby {flag} is replaced with the flags", default="FLAG.{flag}.GALF")
+    flags: str = parameter(desc="A comma (,) separated list of flags to find", default="hostname,dir,username,rootfile,secretfile,adminpass")
+
+    _prompt_history: Prompt = field(default_factory=list)
+    _context: Context = field(default_factory=lambda: {"notes": list()})
+    _capabilities: Dict[str, Capability] = field(default_factory=dict)
+    _all_flags_found: bool = False
+
+    def init(self):
+        super().init()
+        self._prompt_history.append(
+            {
+                "role": "system",
+                "content": f"You are a senior penetration tester, having received a new project of testing a website at {self.host}."
+                           f"Your goal is first to explore the website, building up your knowledge about its parts and inner workings, and then try to plan and execute possible exploitations.\n"
+                           f"Think step by step and say everything out loud that you might be doing.\n"
+                           f"Give a list of assumptions you base your tool calls on, before you start a tool call, and reason whether they held or not after the call has completed.\n"
+                           f"Do not brute-force anything, all information that you might need can be found on the webserver.\n"
+                           f"THE MOST IMPORTANT THING TO DO IS, that if you see a flag ({self.flag_format_description}), you should submit it immediately."})
+        self._context["host"] = self.host
+        self._capabilities = {
+            "submit_flag": SubmitFlag(self.flag_format_description, set(self.flag_template.format(flag=flag) for flag in self.flags.split(",")), success_function=self.all_flags_found),
+            "http_request": HTTPRequest(self.host),
+        }
+
+    def all_flags_found(self):
+        self.console.print(Panel("All flags found! Congratulations!", title="system"))
+        self._all_flags_found = True
+
+    def perform_round(self, turn: int):
+        prompt = self._prompt_history  # TODO: in the future, this should do some context truncation
+
+        result: LLMResult = None
+        stream = self.llm.stream_response(prompt, self.console, capabilities=self._capabilities)
+        for part in stream:
+            result = part
+
+        message: ChatCompletionMessage = result.result
+        message_id = self.log_db.add_log_message(self._run_id, message.role, message.content, result.tokens_query, result.tokens_response, result.duration)
+        self._prompt_history.append(result.result)
+
+        if message.tool_calls is not None:
+            for tool_call in message.tool_calls:
+                tic = time.perf_counter()
+                tool_call_result = self._capabilities[tool_call.function.name].to_model().model_validate_json(tool_call.function.arguments).execute()
+                toc = time.perf_counter()
+
+                self.console.print(f"\n[bold green on gray3]{' '*self.console.width}\nTOOL RESPONSE:[/bold green on gray3]")
+                self.console.print(tool_call_result)
+                self._prompt_history.append(tool_message(tool_call_result, tool_call.id))
+                self.log_db.add_log_tool_call(self._run_id, message_id, tool_call.id, tool_call.function.name, tool_call.function.arguments, tool_call_result, toc - tic)
+
+        return self._all_flags_found
@@ -30,11 +30,51 @@ def insert_or_select_cmd(self, name: str) -> int:
 
     def setup_db(self):
         # create tables
-        self.cursor.execute(
-            "CREATE TABLE IF NOT EXISTS runs (id INTEGER PRIMARY KEY, model text, context_size INTEGER, state TEXT, tag TEXT, started_at text, stopped_at text, rounds INTEGER, configuration TEXT)")
-        self.cursor.execute("CREATE TABLE IF NOT EXISTS commands (id INTEGER PRIMARY KEY, name string unique)")
-        self.cursor.execute(
-            "CREATE TABLE IF NOT EXISTS queries (run_id INTEGER, round INTEGER, cmd_id INTEGER, query TEXT, response TEXT, duration REAL, tokens_query INTEGER, tokens_response INTEGER, prompt TEXT, answer TEXT)")
+        self.cursor.execute("""CREATE TABLE IF NOT EXISTS runs (
+            id INTEGER PRIMARY KEY,
+            model text,
+            context_size INTEGER,
+            state TEXT,
+            tag TEXT,
+            started_at text,
+            stopped_at text,
+            rounds INTEGER,
+            configuration TEXT
+        )""")
+        self.cursor.execute("""CREATE TABLE IF NOT EXISTS commands (
+            id INTEGER PRIMARY KEY,
+            name string unique
+        )""")
+        self.cursor.execute("""CREATE TABLE IF NOT EXISTS queries (
+            run_id INTEGER,
+            round INTEGER,
+            cmd_id INTEGER,
+            query TEXT,
+            response TEXT,
+            duration REAL,
+            tokens_query INTEGER,
+            tokens_response INTEGER,
+            prompt TEXT,
+            answer TEXT
+        )""")
+        self.cursor.execute("""CREATE TABLE IF NOT EXISTS messages (
+            run_id INTEGER,
+            message_id INTEGER,
+            role TEXT,
+            content TEXT,
+            duration REAL,
+            tokens_query INTEGER,
+            tokens_response INTEGER
+        )""")
+        self.cursor.execute("""CREATE TABLE IF NOT EXISTS tool_calls (
+            run_id INTEGER,
+            message_id INTEGER,
+            tool_call_id INTEGER,
+            function_name TEXT,
+            arguments TEXT,
+            result_text TEXT,
+            duration REAL
+        )""")
 
         # insert commands
         self.query_cmd_id = self.insert_or_select_cmd('query_cmd')
@@ -72,6 +112,18 @@ def add_log_update_state(self, run_id, round, cmd, result, answer):
                 "INSERT INTO queries (run_id, round, cmd_id, query, response, duration, tokens_query, tokens_response, prompt, answer) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)",
                 (run_id, round, self.state_update_id, cmd, result, 0, 0, 0, '', ''))
 
+    def add_log_message(self, run_id: int, role: str, content: str, tokens_query: int, tokens_response: int, duration):
+        self.cursor.execute(
+            "INSERT INTO messages (run_id, message_id, role, content, tokens_query, tokens_response, duration) VALUES (?, (SELECT COALESCE(MAX(message_id), 0) + 1 FROM messages WHERE run_id = ?), ?, ?, ?, ?, ?)",
+            (run_id, run_id, role, content, tokens_query, tokens_response, duration))
+        self.cursor.execute("SELECT MAX(message_id) FROM messages WHERE run_id = ?", (run_id,))
+        return self.cursor.fetchone()[0]
+
+    def add_log_tool_call(self, run_id: int, message_id: int, tool_call_id: str, function_name: str, arguments: str, result_text: str, duration):
+        self.cursor.execute(
+            "INSERT INTO tool_calls (run_id, message_id, tool_call_id, function_name, arguments, result_text, duration) VALUES (?, ?, ?, ?, ?, ?, ?)",
+            (run_id, message_id, tool_call_id, function_name, arguments, result_text, duration))
+
     def get_round_data(self, run_id, round, explanation, status_update):
         rows = self.cursor.execute(
             "select cmd_id, query, response, duration, tokens_query, tokens_response from queries where run_id = ? and round = ?",
Original file line number	Diff line number	Diff line change
		@@ -1 +1,2 @@
		from .simple import MinimalWebTesting
		from .with_explanation import WebTestingWithExplanation