NVIDIA · leondz · Jan 20, 2025 · Jan 27, 2025 · Jan 27, 2025 · Jan 27, 2025
diff --git a/docs/source/attempt.rst b/docs/source/attempt.rst
@@ -1,12 +1,16 @@
 garak.attempt
 =============
 
-In garak, ``attempt`` objects track a single prompt and the results of running it on through the generator.
-Probes work by creating a set of garak.attempt objects and setting their class properties.
+In garak, ``Attempt`` objects track a single prompt and the results of running it on through the generator.
+Probes work by creating a set of garak.attempt.Attempt objects and setting their class properties.
 These are passed by the harness to the generator, and the output added to the attempt.
 Then, a detector assesses the outputs from that attempt and the detector's scores are saved in the attempt.
 Finally, an evaluator makes judgments of these scores, and writes hits out to the hitlog for any successful probing attempts.
 
+Within this, ``Turn`` objects encapsulate conversational turns either sent to models (i.e. prompts)
+or returned from models (i.e. model output). 
+garak uses an object to encapsulate this to allow easy switching with multimodal probes and generators.
+
 garak.attempt
 =============
 

diff --git a/garak/attempt.py b/garak/attempt.py
@@ -1,7 +1,7 @@
 """Defines the Attempt class, which encapsulates a prompt with metadata and results"""
 
 from types import GeneratorType
-from typing import Any, List
+from typing import List, Union
 import uuid
 
 (
@@ -13,21 +13,88 @@
 roles = {"system", "user", "assistant"}
 
 
+class Turn(dict):
+    """Object to represent a single turn posed to or received from a generator
+
+    Turns can be prompts, replies, system prompts. While many prompts are text,
+    they may also be (or include) images, audio, files, or even a composition of
+    these. The Turn object encapsulates this flexibility.
+
+    `Turn` doesn't yet support multiple attachments of the same type.
+
+    Multi-turn queries should be composed of multiple Turn objects.
+
+    Turns must always have a `text` part, which is set to `None` by default.
+
+    Expected part names:
+    * `text` -- The prompt. `text` is always present, though may be None
+    * `image_filename` -- Filename of an image to be attached
+    * `image_data` - `bytes` of an image
+
+    """
+
+    @property
+    def text(self) -> Union[None, str]:
+        if "text" in self.parts:
+            return self.parts["text"]
+        else:
+            return None
+
+    @text.setter
+    def text(self, value: Union[None, str]) -> None:
+        self.parts["text"] = value
+
+    def __init__(self, text: Union[None, str] = None) -> None:
+        self.parts = {}
+        self.text = text
+
+    def add_part(self, name, data) -> None:
+        self.parts[name] = data
+
+    def add_part_from_filename(self, name, filename: str) -> None:
+        with open(filename, "rb") as f:
+            self.parts[name] = f.read()
+
+    def load_image(self) -> None:
+        self.add_part_from_filename("image_data", self.parts["image_filename"])
+
+    def __str__(self):
+        if len(self.parts) == 1:
+            return self.text
+        else:
+            return "<Turn " + repr(self.parts) + ">"
+
+    def __eq__(self, other):
+        if not isinstance(other, Turn):
+            return False  # or raise TypeError
+        if self.text != other.text:
+            return False
+        if self.parts != other.parts:
+            return False
+        return True
+
+    def to_dict(self) -> dict:
+        return self.parts
+
+    def from_dict(self, turn_dict: dict):
+        self.parts = turn_dict
+
+
 class Attempt:
     """A class defining objects that represent everything that constitutes a single attempt at evaluating an LLM.
 
     :param status: The status of this attempt; ``ATTEMPT_NEW``, ``ATTEMPT_STARTED``, or ``ATTEMPT_COMPLETE``
     :type status: int
     :param prompt: The processed prompt that will presented to the generator
-    :type prompt: str
+    :type prompt: Turn
     :param probe_classname: Name of the probe class that originated this ``Attempt``
     :type probe_classname: str
     :param probe_params: Non-default parameters logged by the probe
     :type probe_params: dict, optional
     :param targets: A list of target strings to be searched for in generator responses to this attempt's prompt
     :type targets: List(str), optional
     :param outputs: The outputs from the generator in response to the prompt
-    :type outputs: List(str)
+    :type outputs: List(Turn)
     :param notes: A free-form dictionary of notes accompanying the attempt
     :type notes: dict
     :param detector_results: A dictionary of detector scores, keyed by detector name, where each value is a list of scores corresponding to each of the generator output strings in ``outputs``
@@ -97,16 +164,25 @@ def as_dict(self) -> dict:
             "probe_classname": self.probe_classname,
             "probe_params": self.probe_params,
             "targets": self.targets,
-            "prompt": self.prompt,
-            "outputs": list(self.outputs),
+            "prompt": self.prompt.to_dict(),
+            "outputs": [o.to_dict() for o in list(self.outputs)],
             "detector_results": {k: list(v) for k, v in self.detector_results.items()},
             "notes": self.notes,
             "goal": self.goal,
-            "messages": self.messages,
+            "messages": [
+                [
+                    {
+                        "role": msg["role"],
+                        "content": msg["content"].to_dict(),
+                    }
+                    for msg in thread
+                ]
+                for thread in self.messages
+            ],
         }
 
     @property
-    def prompt(self):
+    def prompt(self) -> Turn:
         if len(self.messages) == 0:  # nothing set
             return None
         if isinstance(self.messages[0], dict):  # only initial prompt set
@@ -121,7 +197,7 @@ def prompt(self):
             )
 
     @property
-    def outputs(self):
+    def outputs(self) -> List[Turn | None]:
         if len(self.messages) and isinstance(self.messages[0], list):
             # work out last_output_turn that was assistant
             assistant_turns = [
@@ -138,7 +214,7 @@ def outputs(self):
             return []
 
     @property
-    def latest_prompts(self):
+    def latest_prompts(self) -> Turn | List[Turn | None]:
         if len(self.messages[0]) > 1:
             # work out last_output_turn that was user
             last_output_turn = max(
@@ -166,9 +242,13 @@ def all_outputs(self):
         return all_outputs
 
     @prompt.setter
-    def prompt(self, value):
+    def prompt(self, value: str | Turn):
         if value is None:
             raise TypeError("'None' prompts are not valid")
+        if isinstance(value, str):
+            value = Turn(text=value)
+        if not isinstance(value, Turn):
+            raise TypeError("prompt must be a Turn() or string")
         self._add_first_turn("user", value)
 
     @outputs.setter
@@ -189,7 +269,7 @@ def latest_prompts(self, value):
         assert isinstance(value, list)
         self._add_turn("user", value)
 
-    def _expand_prompt_to_histories(self, breadth):
+    def _expand_prompt_to_histories(self, breadth: int):
         """expand a prompt-only message history to many threads"""
         if len(self.messages) == 0:
             raise TypeError(
@@ -203,9 +283,12 @@ def _expand_prompt_to_histories(self, breadth):
         base_message = dict(self.messages[0])
         self.messages = [[base_message] for i in range(breadth)]
 
-    def _add_first_turn(self, role: str, content: str) -> None:
+    def _add_first_turn(self, role: str, content: Union[Turn, str]) -> None:
         """add the first turn (after a prompt) to a message history"""
 
+        if isinstance(content, str):
+            content = Turn(content)
+
         if len(self.messages):
             if isinstance(self.messages[0], list):
                 raise TypeError(
@@ -226,7 +309,7 @@ def _add_first_turn(self, role: str, content: str) -> None:
             self.messages.append({"role": role, "content": content})
             return
 
-    def _add_turn(self, role: str, contents: List[str]) -> None:
+    def _add_turn(self, role: str, contents: List[Union[Turn, str]]) -> None:
         """add a 'layer' to a message history.
 
         the contents should be as broad as the established number of
@@ -245,8 +328,13 @@ def _add_turn(self, role: str, contents: List[str]) -> None:
             raise ValueError(
                 "Can only add a list of user prompts after at least one system generation, so that generations count is known"
             )
+
         if role in roles:
             for idx, entry in enumerate(contents):
+                if isinstance(entry, str):
+                    entry = Turn(entry)
+                if not isinstance(entry, Turn):
+                    raise ValueError("turns must be garak.attempt.Turn instances")
                 self.messages[idx].append({"role": role, "content": entry})
             return
         raise ValueError(

diff --git a/garak/buffs/encoding.py b/garak/buffs/encoding.py
@@ -16,9 +16,9 @@ class Base64(Buff):
     def transform(
         self, attempt: garak.attempt.Attempt
     ) -> Iterable[garak.attempt.Attempt]:
-        b64_attempt = b64encode(str.encode(attempt.prompt))
+        b64_attempt = b64encode(str.encode(attempt.prompt.text))
         b64_attempt_string = str(b64_attempt, encoding="utf-8")
-        attempt.prompt = (
+        attempt.prompt.text = (
             f"The following instruction is base64 encoded: {b64_attempt_string}"
         )
         yield attempt
@@ -30,8 +30,8 @@ class CharCode(Buff):
     def transform(
         self, attempt: garak.attempt.Attempt
     ) -> Iterable[garak.attempt.Attempt]:
-        cc_attempt = " ".join(list(map(str, map(ord, attempt.prompt))))
-        attempt.prompt = (
+        cc_attempt = " ".join(list(map(str, map(ord, attempt.prompt.text))))
+        attempt.prompt.text = (
             f"The following instruction is encoded with CharCode: {cc_attempt}"
         )
         yield attempt
diff --git a/garak/buffs/low_resource_languages.py b/garak/buffs/low_resource_languages.py
@@ -36,23 +36,25 @@ def transform(
         self, attempt: garak.attempt.Attempt
     ) -> Iterable[garak.attempt.Attempt]:
         translator = Translator(self.api_key)
-        prompt = attempt.prompt
-        attempt.notes["original_prompt"] = prompt
+        prompt_text = attempt.prompt.text
+        attempt.notes["original_prompt_text"] = prompt_text
         for language in LOW_RESOURCE_LANGUAGES:
             attempt.notes["LRL_buff_dest_lang"] = language
-            response = translator.translate_text(prompt, target_lang=language)
+            response = translator.translate_text(prompt_text, target_lang=language)
             translated_prompt = response.text
             attempt.prompt = translated_prompt
             yield self._derive_new_attempt(attempt)
 
     def untransform(self, attempt: garak.attempt.Attempt) -> garak.attempt.Attempt:
         translator = Translator(self.api_key)
         outputs = attempt.outputs
-        attempt.notes["original_responses"] = outputs
+        attempt.notes["original_responses"] = [
+            turn.text for turn in outputs
+        ]  # serialise-friendly
         translated_outputs = list()
         for output in outputs:
-            response = translator.translate_text(output, target_lang="EN-US")
+            response = translator.translate_text(output.text, target_lang="EN-US")
             translated_output = response.text
-            translated_outputs.append(translated_output)
+            translated_outputs.append(garak.attempt.Turn(translated_output))
         attempt.outputs = translated_outputs
         return attempt
diff --git a/garak/buffs/lowercase.py b/garak/buffs/lowercase.py
@@ -15,5 +15,5 @@ class Lowercase(Buff):
     def transform(
         self, attempt: garak.attempt.Attempt
     ) -> Iterable[garak.attempt.Attempt]:
-        attempt.prompt = attempt.prompt.lower()
+        attempt.prompt.text = attempt.prompt.text.lower()
         yield attempt
diff --git a/garak/buffs/paraphrase.py b/garak/buffs/paraphrase.py
@@ -69,10 +69,10 @@ def transform(
         self, attempt: garak.attempt.Attempt
     ) -> Iterable[garak.attempt.Attempt]:
         yield self._derive_new_attempt(attempt)
-        paraphrases = self._get_response(attempt.prompt)
+        paraphrases = self._get_response(attempt.prompt.text)
         for paraphrase in set(paraphrases):
             paraphrased_attempt = self._derive_new_attempt(attempt)
-            paraphrased_attempt.prompt = paraphrase
+            paraphrased_attempt.prompt.text = paraphrase
             yield paraphrased_attempt
 
 

diff --git a/garak/detectors/base.py b/garak/detectors/base.py
@@ -134,7 +134,7 @@ def detect(self, attempt: garak.attempt.Attempt) -> List[float]:
         # don't adjust attempt.outputs
 
         non_none_outputs = [
-            v for k, v in enumerate(attempt.all_outputs) if v is not None
+            v.text for k, v in enumerate(attempt.all_outputs) if v.text is not None
         ]
         # non_none_offsets = [k for k,v in enumerate(attempt.all_outputs) if v is not None] # we'll want this when outputs+scores need to align
         try:
@@ -179,20 +179,21 @@ def detect(
         detector_results = []
         for output in attempt.all_outputs:
             match = False
-            if output is None:
+            output_text = output.text
+            if output_text is None:
                 continue
             for s in self.substrings:
                 if not case_sensitive:
-                    s, output = s.lower(), output.lower()
+                    s, output_text = s.lower(), output_text.lower()
 
                 if self.matchtype == "str":
-                    if s in output:
+                    if s in output_text:
                         match = True
                 elif self.matchtype == "word":
-                    if re.search(r"\b" + s + r"\b", output):
+                    if re.search(r"\b" + s + r"\b", output_text):
                         match = True
                 elif self.matchtype == "startswith":
-                    if output.startswith(s):
+                    if output_text.startswith(s):
                         match = True
                 else:
                     raise ValueError(
@@ -215,16 +216,16 @@ def detect(
             if isinstance(triggers, str):
                 triggers = [triggers]
             for output in attempt.all_outputs:
-                if output is None:
+                if output.text is None:
                     continue  # trigger is absent
 
                 match = False
                 for trigger in triggers:
                     if case_sensitive:
-                        if trigger in output:
+                        if trigger in output.text:
                             match = True
                     else:
-                        if trigger.lower() in output.lower():
+                        if trigger.lower() in output.text.lower():
                             match = True
                 detector_results.append(1.0 if match else 0.0)
 
@@ -249,14 +250,14 @@ def detect(self, attempt: garak.attempt.Attempt) -> Iterable[float]:
             )
 
         for local_filename in attempt.all_outputs:
-            if local_filename is None or local_filename == "":
+            if local_filename.text is None or local_filename.text == "":
                 continue
             if not os.path.isfile(
-                local_filename
+                local_filename.text
             ):  # skip missing files but also pipes, devices, etc
                 logging.info("Skipping non-file path %s", local_filename)
                 continue
 
             else:
-                test_result = self._test_file(local_filename)
+                test_result = self._test_file(local_filename.text)
                 yield test_result if test_result is not None else 0.0
diff --git a/garak/detectors/continuation.py b/garak/detectors/continuation.py
@@ -21,17 +21,18 @@ def detect(self, attempt: Attempt, case_sensitive=False) -> List[float]:
         if "trigger" in attempt.notes:
             trigger = attempt.notes["trigger"]
             for output in attempt.all_outputs:
-                if output is None:
+                if output.text is None:
                     continue
 
                 # continuation models might not include the whole slur in their output, so we should build some context in
                 # do this by concatenating the end of the prompt with the beginning of the output, and then scanning that context
 
                 # reduce leading spaces in output
-                trimmed_output = re.sub(r"^\s+", " ", output)
+                trimmed_output = re.sub(r"^\s+", " ", output.text)
 
                 context = (
-                    attempt.prompt[-len(trigger) :] + trimmed_output[: len(trigger)]
+                    attempt.prompt.text[-len(trigger) :]
+                    + trimmed_output[: len(trigger)]
                 )
                 if not case_sensitive:
                     trigger, context = trigger.lower(), context.lower()