From 7002186e8ca19ab07605748cef1f3493dddbc547 Mon Sep 17 00:00:00 2001 From: Martin Date: Tue, 3 Sep 2024 22:16:07 +0200 Subject: [PATCH 01/11] Add 2 new Ollama generators, and tests. The tests are skipped when no Ollama server can be found. --- garak/generators/ollama.py | 83 +++++++++++++++++++++++++++++++ pyproject.toml | 1 + requirements.txt | 1 + tests/generators/test_ollama.py | 86 +++++++++++++++++++++++++++++++++ 4 files changed, 171 insertions(+) create mode 100644 garak/generators/ollama.py create mode 100644 tests/generators/test_ollama.py diff --git a/garak/generators/ollama.py b/garak/generators/ollama.py new file mode 100644 index 00000000..8580e40f --- /dev/null +++ b/garak/generators/ollama.py @@ -0,0 +1,83 @@ +"""Ollama interface""" + +from typing import List, Union + +import backoff +import ollama + +from garak import _config +from garak.generators.base import Generator + + +def _give_up(error): + return isinstance(error, ollama.ResponseError) and error.status_code == 404 + + +class OllamaGenerator(Generator): + """Interface for Ollama endpoints + + Model names can be passed in short form like "llama2" or specific versions or sizes like "gemma:7b" or "llama2:latest" + """ + + DEFAULT_PARAMS = Generator.DEFAULT_PARAMS | { + "timeout": 30, # Add a timeout of 30 seconds. Ollama can tend to hang forever on failures, if this is not present + "host": "127.0.0.1:11434", # The default host of an Ollama server. This should maybe be loaded from a config file somewhere + } + + active = True + generator_family_name = "Ollama" + + def __init__(self, name="", config_root=_config): + super().__init__(name, config_root) # Sets the name and generations + + self.client = ollama.Client( + self.DEFAULT_PARAMS["host"], timeout=self.DEFAULT_PARAMS["timeout"] + ) # Instantiates the client with the timeout + + @backoff.on_exception( + backoff.fibo, + (TimeoutError, ollama.ResponseError), + max_value=70, + giveup=_give_up, + ) + @backoff.on_predicate( + backoff.fibo, lambda ans: ans == None or len(ans) == 0, max_tries=3 + ) # Ollama sometimes returns empty responses. Only 3 retries to not delay generations expecting empty responses too much + def _call_model( + self, prompt: str, generations_this_call: int = 1 + ) -> List[Union[str, None]]: + response = self.client.generate(self.name, prompt) + return [response["response"]] + + +class OllamaGeneratorChat(OllamaGenerator): + """Interface for Ollama endpoints, using the chat functionality + + Model names can be passed in short form like "llama2" or specific versions or sizes like "gemma:7b" or "llama2:latest" + """ + + @backoff.on_exception( + backoff.fibo, + (TimeoutError, ollama.ResponseError), + max_value=70, + giveup=_give_up, + ) + @backoff.on_predicate( + backoff.fibo, lambda ans: ans == None or len(ans) == 0, max_tries=3 + ) # Ollama sometimes returns empty responses. Only 3 retries to not delay generations expecting empty responses too much + def _call_model( + self, prompt: str, generations_this_call: int = 1 + ) -> List[Union[str, None]]: + response = self.client.chat( + model=self.name, + messages=[ + { + "role": "user", + "content": prompt, + }, + ], + ) + return [response["message"]["content"]] + + +DEFAULT_CLASS = "OllamaGeneratorChat" diff --git a/pyproject.toml b/pyproject.toml index 50864c67..e7a19525 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -73,6 +73,7 @@ dependencies = [ "lorem==0.1.1", "xdg-base-dirs>=6.0.1", "wn==0.9.5", + "ollama>=0.1.7" ] [project.optional-dependencies] diff --git a/requirements.txt b/requirements.txt index 7bda3a64..156c02c6 100644 --- a/requirements.txt +++ b/requirements.txt @@ -34,6 +34,7 @@ python-magic>=0.4.21; sys_platform != "win32" lorem==0.1.1 xdg-base-dirs>=6.0.1 wn==0.9.5 +ollama>=0.1.7 # tests pytest>=8.0 requests-mock==1.12.1 diff --git a/tests/generators/test_ollama.py b/tests/generators/test_ollama.py new file mode 100644 index 00000000..2e3d6e41 --- /dev/null +++ b/tests/generators/test_ollama.py @@ -0,0 +1,86 @@ +import pytest +import ollama +from httpx import ConnectError +from garak.generators.ollama import OllamaGeneratorChat, OllamaGenerator + +PINGED_OLLAMA_SERVER = ( + False # Avoid calling the server multiple times if it is not running +) +OLLAMA_SERVER_UP = False + + +def ollama_is_running(): + global PINGED_OLLAMA_SERVER + global OLLAMA_SERVER_UP + + if not PINGED_OLLAMA_SERVER: + try: + ollama.list() # Gets a list of all pulled models. Used as a ping + OLLAMA_SERVER_UP = True + except ConnectError: + OLLAMA_SERVER_UP = False + finally: + PINGED_OLLAMA_SERVER = True + return OLLAMA_SERVER_UP + + +def no_models(): + return len(ollama.list()) == 0 or len(ollama.list()["models"]) == 0 + + +@pytest.mark.skipif( + not ollama_is_running(), + reason=f"Ollama server is not currently running", +) +def test_error_on_nonexistant_model_chat(): + model_name = "non-existant-model" + gen = OllamaGeneratorChat(model_name) + with pytest.raises(ollama.ResponseError): + gen.generate("This shouldnt work") + + +@pytest.mark.skipif( + not ollama_is_running(), + reason=f"Ollama server is not currently running", +) +def test_error_on_nonexistant_model(): + model_name = "non-existant-model" + gen = OllamaGenerator(model_name) + with pytest.raises(ollama.ResponseError): + gen.generate("This shouldnt work") + + +@pytest.mark.skipif( + not ollama_is_running(), + reason=f"Ollama server is not currently running", +) +@pytest.mark.skipif( + not ollama_is_running() or no_models(), # Avoid checking models if no server + reason=f"No Ollama models pulled", +) +# This test might fail if the GPU is busy, and the generation takes more than 30 seconds +def test_generation_on_pulled_model_chat(): + model_name = ollama.list()["models"][0]["name"] + gen = OllamaGeneratorChat(model_name) + responses = gen.generate('Say "Hello!"') + assert len(responses) == 1 + assert all(isinstance(response, str) for response in responses) + assert all(len(response) > 0 for response in responses) + + +@pytest.mark.skipif( + not ollama_is_running(), + reason=f"Ollama server is not currently running", +) +@pytest.mark.skipif( + not ollama_is_running() or no_models(), # Avoid checking models if no server + reason=f"No Ollama models pulled", +) +# This test might fail if the GPU is busy, and the generation takes more than 30 seconds +def test_generation_on_pulled_model(): + model_name = ollama.list()["models"][0]["name"] + gen = OllamaGenerator(model_name) + responses = gen.generate('Say "Hello!"') + assert len(responses) == 1 + assert all(isinstance(response, str) for response in responses) + assert all(len(response) > 0 for response in responses) From 87abde501689e53ac9c76aad555dee5c2319e9bc Mon Sep 17 00:00:00 2001 From: Martin <31307962+martinebl@users.noreply.github.com> Date: Thu, 12 Sep 2024 19:51:29 +0200 Subject: [PATCH 02/11] Update garak/generators/ollama.py Co-authored-by: Jeffrey Martin Signed-off-by: Martin <31307962+martinebl@users.noreply.github.com> --- garak/generators/ollama.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/garak/generators/ollama.py b/garak/generators/ollama.py index 8580e40f..c40e7150 100644 --- a/garak/generators/ollama.py +++ b/garak/generators/ollama.py @@ -31,7 +31,7 @@ def __init__(self, name="", config_root=_config): super().__init__(name, config_root) # Sets the name and generations self.client = ollama.Client( - self.DEFAULT_PARAMS["host"], timeout=self.DEFAULT_PARAMS["timeout"] + self.host, timeout=self.timeout ) # Instantiates the client with the timeout @backoff.on_exception( From f1a660cb14cf9112271f8a5006cc9780ef5d825e Mon Sep 17 00:00:00 2001 From: Martin Date: Thu, 12 Sep 2024 20:01:04 +0200 Subject: [PATCH 03/11] Avoid missing key errors on empty responses, and trigger the backoff predicate instead --- garak/generators/ollama.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/garak/generators/ollama.py b/garak/generators/ollama.py index c40e7150..4e7ee632 100644 --- a/garak/generators/ollama.py +++ b/garak/generators/ollama.py @@ -26,6 +26,7 @@ class OllamaGenerator(Generator): active = True generator_family_name = "Ollama" + parallel_capable = False def __init__(self, name="", config_root=_config): super().__init__(name, config_root) # Sets the name and generations @@ -41,13 +42,13 @@ def __init__(self, name="", config_root=_config): giveup=_give_up, ) @backoff.on_predicate( - backoff.fibo, lambda ans: ans == None or len(ans) == 0, max_tries=3 + backoff.fibo, lambda ans: ans == [None] or len(ans) == 0, max_tries=3 ) # Ollama sometimes returns empty responses. Only 3 retries to not delay generations expecting empty responses too much def _call_model( self, prompt: str, generations_this_call: int = 1 ) -> List[Union[str, None]]: response = self.client.generate(self.name, prompt) - return [response["response"]] + return [response.get("response", None)] class OllamaGeneratorChat(OllamaGenerator): @@ -63,7 +64,7 @@ class OllamaGeneratorChat(OllamaGenerator): giveup=_give_up, ) @backoff.on_predicate( - backoff.fibo, lambda ans: ans == None or len(ans) == 0, max_tries=3 + backoff.fibo, lambda ans: ans == [None] or len(ans) == 0, max_tries=3 ) # Ollama sometimes returns empty responses. Only 3 retries to not delay generations expecting empty responses too much def _call_model( self, prompt: str, generations_this_call: int = 1 @@ -77,7 +78,7 @@ def _call_model( }, ], ) - return [response["message"]["content"]] + return [response.get("message", {}).get("content", None)] # Return the response or None DEFAULT_CLASS = "OllamaGeneratorChat" From 48000361177d8eae7dd323d7f4c61b4a4119f96c Mon Sep 17 00:00:00 2001 From: Martin Date: Thu, 12 Sep 2024 21:40:29 +0200 Subject: [PATCH 04/11] Make mock tests of the happy paths of the Ollama generators --- tests/generators/test_ollama.py | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/tests/generators/test_ollama.py b/tests/generators/test_ollama.py index 2e3d6e41..684412bb 100644 --- a/tests/generators/test_ollama.py +++ b/tests/generators/test_ollama.py @@ -1,5 +1,7 @@ import pytest import ollama +import respx +import httpx from httpx import ConnectError from garak.generators.ollama import OllamaGeneratorChat, OllamaGenerator @@ -84,3 +86,33 @@ def test_generation_on_pulled_model(): assert len(responses) == 1 assert all(isinstance(response, str) for response in responses) assert all(len(response) > 0 for response in responses) + +@pytest.mark.respx(base_url="http://" + OllamaGenerator.DEFAULT_PARAMS["host"]) +def test_ollama_generation_mocked(respx_mock): + mock_response = { + 'model': 'mistral', + 'response': 'Hello how are you?' + } + respx_mock.post('/api/generate').mock( + return_value=httpx.Response(200, json=mock_response) + ) + gen = OllamaGenerator("mistral") + generation = gen.generate("Bla bla") + assert generation == ['Hello how are you?'] + + +@pytest.mark.respx(base_url="http://" + OllamaGenerator.DEFAULT_PARAMS["host"]) +def test_ollama_generation_chat_mocked(respx_mock): + mock_response = { + 'model': 'mistral', + 'message': { + 'role': 'assistant', + 'content': 'Hello how are you?' + } + } + respx_mock.post('/api/chat').mock( + return_value=httpx.Response(200, json=mock_response) + ) + gen = OllamaGeneratorChat("mistral") + generation = gen.generate("Bla bla") + assert generation == ['Hello how are you?'] From 7b0a1530d023bd6740ff1c147ab1bc69054fc3e3 Mon Sep 17 00:00:00 2001 From: Martin Date: Sun, 22 Sep 2024 12:53:41 +0200 Subject: [PATCH 05/11] Add mocked test of model not found flow --- tests/generators/test_ollama.py | 32 +++++++++++++++++++++++++++++--- 1 file changed, 29 insertions(+), 3 deletions(-) diff --git a/tests/generators/test_ollama.py b/tests/generators/test_ollama.py index 684412bb..49a55770 100644 --- a/tests/generators/test_ollama.py +++ b/tests/generators/test_ollama.py @@ -5,9 +5,7 @@ from httpx import ConnectError from garak.generators.ollama import OllamaGeneratorChat, OllamaGenerator -PINGED_OLLAMA_SERVER = ( - False # Avoid calling the server multiple times if it is not running -) +PINGED_OLLAMA_SERVER = False # Avoid calling the server multiple times if it is not running OLLAMA_SERVER_UP = False @@ -116,3 +114,31 @@ def test_ollama_generation_chat_mocked(respx_mock): gen = OllamaGeneratorChat("mistral") generation = gen.generate("Bla bla") assert generation == ['Hello how are you?'] + + +@pytest.mark.respx(base_url="http://" + OllamaGenerator.DEFAULT_PARAMS["host"]) +def test_error_on_nonexistant_model_mocked(respx_mock): + mock_response = { + 'error': "No such model" + } + respx_mock.post('/api/generate').mock( + return_value=httpx.Response(404, json=mock_response) + ) + model_name = "non-existant-model" + gen = OllamaGenerator(model_name) + with pytest.raises(ollama.ResponseError): + gen.generate("This shouldnt work") + + +@pytest.mark.respx(base_url="http://" + OllamaGenerator.DEFAULT_PARAMS["host"]) +def test_error_on_nonexistant_model_chat_mocked(respx_mock): + mock_response = { + 'error': "No such model" + } + respx_mock.post('/api/chat').mock( + return_value=httpx.Response(404, json=mock_response) + ) + model_name = "non-existant-model" + gen = OllamaGeneratorChat(model_name) + with pytest.raises(ollama.ResponseError): + gen.generate("This shouldnt work") \ No newline at end of file From bd110dd69a632eddc19269e4ced6e41692196d95 Mon Sep 17 00:00:00 2001 From: Martin Date: Sun, 22 Sep 2024 12:55:04 +0200 Subject: [PATCH 06/11] Add ollama docs file --- docs/source/garak.generators.ollama.rst | 8 ++++++++ 1 file changed, 8 insertions(+) create mode 100644 docs/source/garak.generators.ollama.rst diff --git a/docs/source/garak.generators.ollama.rst b/docs/source/garak.generators.ollama.rst new file mode 100644 index 00000000..900961ce --- /dev/null +++ b/docs/source/garak.generators.ollama.rst @@ -0,0 +1,8 @@ +garak.generators.ollama +======================== + +.. automodule:: garak.generators.ollama + :members: + :undoc-members: + :show-inheritance: + From 3f917e034f893f01e86d20e297b46d9973b47663 Mon Sep 17 00:00:00 2001 From: Martin Date: Sun, 22 Sep 2024 12:57:55 +0200 Subject: [PATCH 07/11] Change a comment --- garak/generators/ollama.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/garak/generators/ollama.py b/garak/generators/ollama.py index 4e7ee632..8b2c4332 100644 --- a/garak/generators/ollama.py +++ b/garak/generators/ollama.py @@ -21,7 +21,7 @@ class OllamaGenerator(Generator): DEFAULT_PARAMS = Generator.DEFAULT_PARAMS | { "timeout": 30, # Add a timeout of 30 seconds. Ollama can tend to hang forever on failures, if this is not present - "host": "127.0.0.1:11434", # The default host of an Ollama server. This should maybe be loaded from a config file somewhere + "host": "127.0.0.1:11434", # The default host of an Ollama server. This can be overwritten with a passed config or generator config file. } active = True From 9e79354c742f1885b085f8f65ff877c36713222e Mon Sep 17 00:00:00 2001 From: Martin Date: Tue, 24 Sep 2024 19:57:19 +0200 Subject: [PATCH 08/11] Fix missing docs, causing tests to fail --- docs/source/generators.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/source/generators.rst b/docs/source/generators.rst index 48986758..80d4a7b4 100644 --- a/docs/source/generators.rst +++ b/docs/source/generators.rst @@ -20,6 +20,7 @@ For a detailed oversight into how a generator operates, see :ref:`garak.generato garak.generators.langchain_serve garak.generators.litellm garak.generators.octo + garak.generators.ollama garak.generators.openai garak.generators.nemo garak.generators.nim From 3ffc4ada18097d9a6c84f59bb67a13313a938dfa Mon Sep 17 00:00:00 2001 From: Martin <31307962+martinebl@users.noreply.github.com> Date: Mon, 30 Sep 2024 18:43:11 +0200 Subject: [PATCH 09/11] Update garak/generators/ollama.py Co-authored-by: Jeffrey Martin Signed-off-by: Martin <31307962+martinebl@users.noreply.github.com> --- garak/generators/ollama.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/garak/generators/ollama.py b/garak/generators/ollama.py index 8b2c4332..1badde5a 100644 --- a/garak/generators/ollama.py +++ b/garak/generators/ollama.py @@ -37,7 +37,7 @@ def __init__(self, name="", config_root=_config): @backoff.on_exception( backoff.fibo, - (TimeoutError, ollama.ResponseError), + (TimeoutException, ollama.ResponseError), max_value=70, giveup=_give_up, ) From 7935a229ae07e65f1aac1b5d57a01a00acb585dc Mon Sep 17 00:00:00 2001 From: Martin Date: Mon, 30 Sep 2024 18:47:03 +0200 Subject: [PATCH 10/11] Add missing import --- garak/generators/ollama.py | 1 + 1 file changed, 1 insertion(+) diff --git a/garak/generators/ollama.py b/garak/generators/ollama.py index 1badde5a..b6b8ee53 100644 --- a/garak/generators/ollama.py +++ b/garak/generators/ollama.py @@ -7,6 +7,7 @@ from garak import _config from garak.generators.base import Generator +from httpx import TimeoutException def _give_up(error): From c090a4d26607b2d20c5587c5b1f40cb719c9c888 Mon Sep 17 00:00:00 2001 From: Martin Date: Mon, 30 Sep 2024 18:49:32 +0200 Subject: [PATCH 11/11] Swap second TimeoutError to TimeoutException as well --- garak/generators/ollama.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/garak/generators/ollama.py b/garak/generators/ollama.py index b6b8ee53..69520388 100644 --- a/garak/generators/ollama.py +++ b/garak/generators/ollama.py @@ -60,7 +60,7 @@ class OllamaGeneratorChat(OllamaGenerator): @backoff.on_exception( backoff.fibo, - (TimeoutError, ollama.ResponseError), + (TimeoutException, ollama.ResponseError), max_value=70, giveup=_give_up, )