From cd093042f0a17d756c737d53c140904d1dffd81e Mon Sep 17 00:00:00 2001 From: Nikhil Varghese Date: Thu, 29 Feb 2024 22:29:56 -0500 Subject: [PATCH 1/2] Add Got It AI's Truthchecker --- README.md | 5 +- docs/user_guides/guardrails-library.md | 23 +++++ docs/user_guides/llm-support.md | 1 + examples/sample_config.yml | 1 + nemoguardrails/library/gotitai/__init__.py | 14 +++ nemoguardrails/library/gotitai/actions.py | 84 +++++++++++++++++ nemoguardrails/library/gotitai/flows.co | 7 ++ .../gotitai_truthchecker/config.yml | 8 ++ .../gotitai_truthchecker/truthcheck.co | 9 ++ tests/test_gotitai_output_rail.py | 90 +++++++++++++++++++ 10 files changed, 240 insertions(+), 2 deletions(-) create mode 100644 nemoguardrails/library/gotitai/__init__.py create mode 100644 nemoguardrails/library/gotitai/actions.py create mode 100644 nemoguardrails/library/gotitai/flows.co create mode 100644 tests/test_configs/gotitai_truthchecker/config.yml create mode 100644 tests/test_configs/gotitai_truthchecker/truthcheck.co create mode 100644 tests/test_gotitai_output_rail.py diff --git a/README.md b/README.md index 8c5470331..1485d80bb 100644 --- a/README.md +++ b/README.md @@ -158,6 +158,7 @@ rails: - self check facts - self check hallucination - activefence moderation + - gotitai rag truthcheck config: # Configure the types of entities that should be masked on user input. @@ -208,7 +209,7 @@ NeMo Guardrails comes with a set of [built-in guardrails](docs/user_guides/guard > **NOTE**: The built-in guardrails are only intended to enable you to get started quickly with NeMo Guardrails. For production use cases, further development and testing of the rails are needed. -Currently, the guardrails library includes guardrails for: [jailbreak detection](docs/user_guides/guardrails-library.md#jailbreak-detection), [output moderation](docs/user_guides/guardrails-library.md#output-moderation), [fact-checking](docs/user_guides/guardrails-library.md#fact-checking), [sensitive data detection](docs/user_guides/guardrails-library.md#sensitive-data-detection), [hallucination detection](docs/user_guides/guardrails-library.md#hallucination-detection) and [input moderation using ActiveFence](docs/user_guides/guardrails-library.md#active-fence). +Currently, the guardrails library includes guardrails for: [jailbreak detection](docs/user_guides/guardrails-library.md#jailbreak-detection), [output moderation](docs/user_guides/guardrails-library.md#output-moderation), [fact-checking](docs/user_guides/guardrails-library.md#fact-checking), [sensitive data detection](docs/user_guides/guardrails-library.md#sensitive-data-detection), [hallucination detection](docs/user_guides/guardrails-library.md#hallucination-detection), [input moderation using ActiveFence](docs/user_guides/guardrails-library.md#active-fence) and [hallucination detection for RAG applications using Got It AI's TruthChecker API](docs/user_guides/guardrails-library.md#got-it-ai). ## CLI @@ -271,7 +272,7 @@ Evaluating the safety of a LLM-based conversational application is a complex tas ## How is this different? -There are many ways guardrails can be added to an LLM-based conversational application. For example: explicit moderation endpoints (e.g., OpenAI, ActiveFence), critique chains (e.g. constitutional chain), parsing the output (e.g. guardrails.ai), individual guardrails (e.g., LLM-Guard). +There are many ways guardrails can be added to an LLM-based conversational application. For example: explicit moderation endpoints (e.g., OpenAI, ActiveFence), critique chains (e.g. constitutional chain), parsing the output (e.g. guardrails.ai), individual guardrails (e.g., LLM-Guard), hallucination detection for RAG applications (e.g., Got It AI). NeMo Guardrails aims to provide a flexible toolkit that can integrate all these complementary approaches into a cohesive LLM guardrails layer. For example, the toolkit provides out-of-the-box integration with ActiveFence, AlignScore and LangChain chains. diff --git a/docs/user_guides/guardrails-library.md b/docs/user_guides/guardrails-library.md index 04236b192..1ed728bd6 100644 --- a/docs/user_guides/guardrails-library.md +++ b/docs/user_guides/guardrails-library.md @@ -16,6 +16,7 @@ NeMo Guardrails comes with a library of built-in guardrails that you can easily 3. Third-Party APIs - [ActiveFence Moderation](#activefence) + - [Got It AI RAG TruthChecker](#got-it-ai) - OpenAI Moderation API - *[COMING SOON]* 4. Other @@ -685,6 +686,28 @@ define bot inform cannot engage in abusive or harmful behavior "I will not engage in any abusive or harmful behavior." ``` +### Got It AI + +Got It AI's Hallucination Manager helps you to detect and manage hallucinations in your AI models. +The [TruthChecker API for RAG applications](https://www.app.got-it.ai/hallucination-manager) is a part of the Hallucination Manager suite of APIs. + +Existing fact-checking methods are not sufficient to detect hallucinations in AI models for real-world RAG applications. The TruthChecker API performs a dual task to determine whether a response is a `hallucination` or not: +1. Check for faithfulness of the generated response to the retrieved knowledge chunks. +2. Check for the relevance of the response to the user query and the conversation history. + +The TruthChecker API can be configured to work for open-domain use-case or for a specific domain or knowledge base. By default, the TruthChecker API is configured to work for open-domain and we expect it to deliver strong performance on specific domains. However, for an enhanced experience for a specific domain or knowledge base, you can fine-tuning the model on the knowledge base and unlock benefits like secure on-premise model deployments. + +Please [contact the Got It AI team](https://www.app.got-it.ai/) for more information on how to fine-tune the truthchecker api for your specific domain or knowledge base. + +[Got It AI's TruthChecker API for RAG applications](https://www.app.got-it.ai/hallucination-manager) can be used in Nemo Guardrails as an output rail out-of-the-box (you need to have the `GOTITAI_API_KEY` environment variable set). + +```yaml +rails: + output: + flows: + - gotitai rag truthcheck +``` + ## Other ### Jailbreak Detection Heuristics diff --git a/docs/user_guides/llm-support.md b/docs/user_guides/llm-support.md index 1d773fa73..b9c7ed55e 100644 --- a/docs/user_guides/llm-support.md +++ b/docs/user_guides/llm-support.md @@ -34,6 +34,7 @@ If you want to use an LLM and you cannot see a prompt in the [prompts folder](.. | AlignScore fact-checking _(LLM independent)_ | :heavy_check_mark: (0.89) | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: | | ActiveFence moderation _(LLM independent)_ | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: | | Llama Guard moderation _(LLM independent)_ | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: | +| Got It AI RAG TruthChecker _(LLM independent)_ | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: | :heavy_check_mark: | Table legend: - :heavy_check_mark: - Supported (_The feature is fully supported by the LLM based on our experiments and tests_) diff --git a/examples/sample_config.yml b/examples/sample_config.yml index e99a47a1e..fe383bd67 100644 --- a/examples/sample_config.yml +++ b/examples/sample_config.yml @@ -37,6 +37,7 @@ rails: - check hallucination - activefence moderation - check sensitive data + - gotitai rag truthcheck # Execution rails are triggered before and after an action is invoked # TODO diff --git a/nemoguardrails/library/gotitai/__init__.py b/nemoguardrails/library/gotitai/__init__.py new file mode 100644 index 000000000..9ba9d4310 --- /dev/null +++ b/nemoguardrails/library/gotitai/__init__.py @@ -0,0 +1,14 @@ +# SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/nemoguardrails/library/gotitai/actions.py b/nemoguardrails/library/gotitai/actions.py new file mode 100644 index 000000000..efd44857e --- /dev/null +++ b/nemoguardrails/library/gotitai/actions.py @@ -0,0 +1,84 @@ +# SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import json +import logging +import os +from typing import Optional + +import aiohttp + +from nemoguardrails.actions import action + +log = logging.getLogger(__name__) + + +@action(name="call gotitai truthchecker api", is_system_action=True) +async def call_gotitai_truthchecker_api(context: Optional[dict] = None): + api_key = os.environ.get("GOTITAI_API_KEY") + + if api_key is None: + raise ValueError("GOTITAI_API_KEY environment variable not set.") + + if context is None: + raise ValueError( + "Context is empty. `user_message`, `bot_response` and `relevant_chunks` keys are required to call the GotIt AI Truthchecker api." + ) + + user_message = context.get("user_message", "") + response = context.get("bot_message", "") + knowledge = context.get("relevant_chunks", []) + + if not isinstance(knowledge, list): + raise ValueError("`relevant_chunks` must be a list of knowledge.") + + if not knowledge: + raise ValueError("At least 1 relevant chunk is required.") + + url = "https://api.got-it.ai/api/v1/hallucination-manager/truthchecker" + headers = { + "Content-Type": "application/json", + "Authorization": "Bearer " + api_key, + } + data = { + "knowledge": [ + { + "text": chunk, + } + for chunk in knowledge + ], + "prompt": user_message, + "generated_text": response, + # Messages is empty for now since there is no standard way to get them. + # This should be updated once 0.8.0 is released. + # Reference: https://github.com/NVIDIA/NeMo-Guardrails/issues/246 + "messages": [], + } + + async with aiohttp.ClientSession() as session: + async with session.post( + url=url, + headers=headers, + json=data, + ) as response: + if response.status != 200: + raise ValueError( + f"GotItAI TruthChecking call failed with status code {response.status}.\n" + f"Details: {await response.json()}" + ) + response_json = await response.json() + log.info(json.dumps(response_json, indent=True)) + hallucination = response_json["hallucination"] + + return {"hallucination": hallucination} diff --git a/nemoguardrails/library/gotitai/flows.co b/nemoguardrails/library/gotitai/flows.co new file mode 100644 index 000000000..1d7685ce0 --- /dev/null +++ b/nemoguardrails/library/gotitai/flows.co @@ -0,0 +1,7 @@ +define subflow gotitai rag truthcheck + """Guardrail based on the maximum risk score.""" + $result = execute call gotitai truthchecker api + + if $result.hallucination == "yes" + bot inform answer unknown + stop diff --git a/tests/test_configs/gotitai_truthchecker/config.yml b/tests/test_configs/gotitai_truthchecker/config.yml new file mode 100644 index 000000000..fa7b5b4f5 --- /dev/null +++ b/tests/test_configs/gotitai_truthchecker/config.yml @@ -0,0 +1,8 @@ +models: + - type: main + engine: openai + model: gpt-3.5-turbo-instruct +rails: + output: + flows: + - gotitai rag truthcheck diff --git a/tests/test_configs/gotitai_truthchecker/truthcheck.co b/tests/test_configs/gotitai_truthchecker/truthcheck.co new file mode 100644 index 000000000..1f3b74931 --- /dev/null +++ b/tests/test_configs/gotitai_truthchecker/truthcheck.co @@ -0,0 +1,9 @@ +define user ask general question + "Do you ship within 2 days?" + +define flow + user ask general question + bot provide answer + +define bot inform answer unknown + "I don't know the answer to that." diff --git a/tests/test_gotitai_output_rail.py b/tests/test_gotitai_output_rail.py new file mode 100644 index 000000000..e93a4b7dc --- /dev/null +++ b/tests/test_gotitai_output_rail.py @@ -0,0 +1,90 @@ +# SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os + +import pytest +from aioresponses import aioresponses + +from nemoguardrails import RailsConfig +from nemoguardrails.actions.actions import ActionResult, action +from tests.utils import TestChat + +CONFIGS_FOLDER = os.path.join(os.path.dirname(__file__), ".", "test_configs") + +GOTITAI_API_URL = "https://api.got-it.ai/api/v1/hallucination-manager/truthchecker" + + +@action(is_system_action=True) +async def retrieve_relevant_chunks(): + """Retrieve relevant chunks from the knowledge base and add them to the context.""" + context_updates = {} + context_updates["relevant_chunks"] = ["Shipping takes at least 3 days."] + + return ActionResult( + return_value=context_updates["relevant_chunks"], + context_updates=context_updates, + ) + + +@pytest.mark.asyncio +async def test_hallucination(monkeypatch): + monkeypatch.setenv("GOTITAI_API_KEY", "xxx") + config = RailsConfig.from_path(os.path.join(CONFIGS_FOLDER, "gotitai_truthchecker")) + chat = TestChat( + config, + llm_completions=[ + "user ask general question", # user intent + "Yes, shipping can be done in 2 days.", # bot response that will be intercepted + ], + ) + + with aioresponses() as m: + chat.app.register_action(retrieve_relevant_chunks, "retrieve_relevant_chunks") + m.post( + GOTITAI_API_URL, + payload={ + "hallucination": "yes", + }, + ) + + chat >> "Do you ship within 2 days?" + await chat.bot_async("I don't know the answer to that.") + + +@pytest.mark.asyncio +async def test_not_hallucination(monkeypatch): + monkeypatch.setenv("GOTITAI_API_KEY", "xxx") + config = RailsConfig.from_path(os.path.join(CONFIGS_FOLDER, "gotitai_truthchecker")) + chat = TestChat( + config, + llm_completions=[ + # " express greeting", + "user ask general question", # user intent + "No, shipping takes at least 3 days.", # bot response that will not be intercepted + ], + ) + + with aioresponses() as m: + chat.app.register_action(retrieve_relevant_chunks, "retrieve_relevant_chunks") + m.post( + GOTITAI_API_URL, + payload={ + "hallucination": "no", + }, + ) + + chat >> "Do you ship within 2 days?" + await chat.bot_async("No, shipping takes at least 3 days.") From fcb9d5cb1b33e3747251481eab68174945133787 Mon Sep 17 00:00:00 2001 From: Nikhil Varghese Date: Wed, 20 Mar 2024 16:05:16 -0400 Subject: [PATCH 2/2] Use check_facts as a qualifying condition to run the got it ai guardrail --- docs/user_guides/guardrails-library.md | 9 ++++++ nemoguardrails/library/gotitai/actions.py | 19 ++++++++---- nemoguardrails/library/gotitai/flows.co | 11 ++++--- .../gotitai_truthchecker/truthcheck.co | 1 + tests/test_gotitai_output_rail.py | 29 +++++++++++++++++-- 5 files changed, 58 insertions(+), 11 deletions(-) diff --git a/docs/user_guides/guardrails-library.md b/docs/user_guides/guardrails-library.md index 1ed728bd6..94545b9eb 100644 --- a/docs/user_guides/guardrails-library.md +++ b/docs/user_guides/guardrails-library.md @@ -708,6 +708,15 @@ rails: - gotitai rag truthcheck ``` +To trigger the fact-checking rail, you have to set the `$check_facts` context variable to `True` before a bot message that requires fact-checking, e.g.: + +```colang +define flow + user ask about report + $check_facts = True + bot provide report answer +``` + ## Other ### Jailbreak Detection Heuristics diff --git a/nemoguardrails/library/gotitai/actions.py b/nemoguardrails/library/gotitai/actions.py index efd44857e..a5cba7d1b 100644 --- a/nemoguardrails/library/gotitai/actions.py +++ b/nemoguardrails/library/gotitai/actions.py @@ -38,13 +38,21 @@ async def call_gotitai_truthchecker_api(context: Optional[dict] = None): user_message = context.get("user_message", "") response = context.get("bot_message", "") - knowledge = context.get("relevant_chunks", []) + knowledge = context.get("relevant_chunks_sep", []) + + retval = {"hallucination": None} # in case the api call is skipped if not isinstance(knowledge, list): - raise ValueError("`relevant_chunks` must be a list of knowledge.") + log.error( + "Could not run Got It AI Truthchecker. `relevant_chunks_sep` must be a list of knowledge." + ) + return retval if not knowledge: - raise ValueError("At least 1 relevant chunk is required.") + log.error( + "Could not run Got It AI Truthchecker. At least 1 relevant chunk is required." + ) + return retval url = "https://api.got-it.ai/api/v1/hallucination-manager/truthchecker" headers = { @@ -73,12 +81,13 @@ async def call_gotitai_truthchecker_api(context: Optional[dict] = None): json=data, ) as response: if response.status != 200: - raise ValueError( + log.error( f"GotItAI TruthChecking call failed with status code {response.status}.\n" f"Details: {await response.json()}" ) response_json = await response.json() log.info(json.dumps(response_json, indent=True)) hallucination = response_json["hallucination"] + retval = {"hallucination": hallucination} - return {"hallucination": hallucination} + return retval diff --git a/nemoguardrails/library/gotitai/flows.co b/nemoguardrails/library/gotitai/flows.co index 1d7685ce0..4b1409b6c 100644 --- a/nemoguardrails/library/gotitai/flows.co +++ b/nemoguardrails/library/gotitai/flows.co @@ -1,7 +1,10 @@ define subflow gotitai rag truthcheck """Guardrail based on the maximum risk score.""" - $result = execute call gotitai truthchecker api + if $check_facts == True + $check_facts = False - if $result.hallucination == "yes" - bot inform answer unknown - stop + $result = execute call gotitai truthchecker api + + if $result.hallucination == "yes" + bot inform answer unknown + stop diff --git a/tests/test_configs/gotitai_truthchecker/truthcheck.co b/tests/test_configs/gotitai_truthchecker/truthcheck.co index 1f3b74931..7dc12308f 100644 --- a/tests/test_configs/gotitai_truthchecker/truthcheck.co +++ b/tests/test_configs/gotitai_truthchecker/truthcheck.co @@ -3,6 +3,7 @@ define user ask general question define flow user ask general question + $check_facts = True bot provide answer define bot inform answer unknown diff --git a/tests/test_gotitai_output_rail.py b/tests/test_gotitai_output_rail.py index e93a4b7dc..483f2abf4 100644 --- a/tests/test_gotitai_output_rail.py +++ b/tests/test_gotitai_output_rail.py @@ -31,10 +31,10 @@ async def retrieve_relevant_chunks(): """Retrieve relevant chunks from the knowledge base and add them to the context.""" context_updates = {} - context_updates["relevant_chunks"] = ["Shipping takes at least 3 days."] + context_updates["relevant_chunks_sep"] = ["Shipping takes at least 3 days."] return ActionResult( - return_value=context_updates["relevant_chunks"], + return_value=context_updates["relevant_chunks_sep"], context_updates=context_updates, ) @@ -88,3 +88,28 @@ async def test_not_hallucination(monkeypatch): chat >> "Do you ship within 2 days?" await chat.bot_async("No, shipping takes at least 3 days.") + + +@pytest.mark.asyncio +async def test_no_context(monkeypatch): + monkeypatch.setenv("GOTITAI_API_KEY", "xxx") + config = RailsConfig.from_path(os.path.join(CONFIGS_FOLDER, "gotitai_truthchecker")) + chat = TestChat( + config, + llm_completions=[ + # " express greeting", + "user ask general question", # user intent + "Yes, shipping can be done in 2 days.", # bot response that will not be intercepted + ], + ) + + with aioresponses() as m: + m.post( + GOTITAI_API_URL, + payload={ + "hallucination": None, + }, + ) + + chat >> "Do you ship within 2 days?" + await chat.bot_async("Yes, shipping can be done in 2 days.")