Merge pull request #27 from alphagov/bedrock-basic-answer-strat

Bedrock basic answer strategy
alphagov · Feb 6, 2025 · 6fc80a5 · 6fc80a5
2 parents 9106b19 + 40b1146
commit 6fc80a5
Show file tree

Hide file tree

Showing 17 changed files with 383 additions and 13 deletions.
diff --git a/Gemfile b/Gemfile
@@ -6,6 +6,7 @@ ruby "~> #{File.read('.ruby-version').strip}"
 
 gem "rails", "8.0.1"
 
+gem "aws-sdk-bedrockruntime"
 gem "bootsnap"
 gem "chartkick"
 gem "csv"

diff --git a/Gemfile.lock b/Gemfile.lock
@@ -84,6 +84,18 @@ GEM
       public_suffix (>= 2.0.2, < 7.0)
     amq-protocol (2.3.2)
     ast (2.4.2)
+    aws-eventstream (1.3.0)
+    aws-partitions (1.1043.0)
+    aws-sdk-bedrockruntime (1.36.0)
+      aws-sdk-core (~> 3, >= 3.216.0)
+      aws-sigv4 (~> 1.5)
+    aws-sdk-core (3.217.0)
+      aws-eventstream (~> 1, >= 1.3.0)
+      aws-partitions (~> 1, >= 1.992.0)
+      aws-sigv4 (~> 1.9)
+      jmespath (~> 1, >= 1.6.1)
+    aws-sigv4 (1.11.0)
+      aws-eventstream (~> 1, >= 1.0.2)
     base64 (0.2.0)
     bcrypt (3.1.20)
     benchmark (0.4.0)
@@ -293,6 +305,7 @@ GEM
       pp (>= 0.6.0)
       rdoc (>= 4.0.0)
       reline (>= 0.4.2)
+    jmespath (1.6.2)
     json (2.9.1)
     json-schema (4.3.1)
       addressable (>= 2.8)
@@ -865,6 +878,7 @@ PLATFORMS
   x86_64-linux
 
 DEPENDENCIES
+  aws-sdk-bedrockruntime
   bootsnap
   brakeman
   chartkick

diff --git a/app/models/answer.rb b/app/models/answer.rb
@@ -5,7 +5,7 @@ module CannedResponses
     CONTEXT_LENGTH_EXCEEDED_RESPONSE = "Sorry, your last question was too complex for me to answer. " \
       "Could you make your question more specific? You can also try splitting it into multiple " \
       "smaller questions and asking them separately.".freeze
-    OPENAI_CLIENT_ERROR_RESPONSE = <<~MESSAGE.freeze
+    ANSWER_SERVICE_ERROR_RESPONSE = <<~MESSAGE.freeze
       Sorry, something went wrong while trying to answer your question. Try again later.
 
       We saved your conversation. Check [GOV.UK guidance for businesses](https://www.gov.uk/browse/business) if you need information now.

diff --git a/app/models/form/create_question.rb b/app/models/form/create_question.rb
@@ -22,6 +22,7 @@ def submit
     validate!
 
     question = Question.create!(
+      answer_strategy: Rails.configuration.answer_strategy,
       message: @sanitised_user_question,
       unsanitised_message: (@unsanitised_user_question if @sanitised_user_question != @unsanitised_user_question),
       conversation:,

diff --git a/app/models/question.rb b/app/models/question.rb
@@ -3,6 +3,7 @@ class Question < ApplicationRecord
        {
          open_ai_rag_completion: "open_ai_rag_completion", # legacy strategy - no longer used
          openai_structured_answer: "openai_structured_answer",
+         claude_structured_answer: "claude_structured_answer",
        },
        prefix: true
 

diff --git a/config/application.rb b/config/application.rb
@@ -103,5 +103,7 @@ class Application < Rails::Application
     config.conversation_js_progressive_disclosure_delay = nil
 
     config.bigquery_dataset_id = ENV["BIGQUERY_DATASET"]
+
+    config.answer_strategy = ENV.fetch("ANSWER_STRATEGY", "openai_structured_answer")
   end
 end
diff --git a/lib/answer_composition/composer.rb b/lib/answer_composition/composer.rb
@@ -43,7 +43,7 @@ def compose_answer
 
       case answer_strategy
       when "openai_structured_answer"
-        OpenAIAnswer.call(question:, pipeline: [
+        PipelineRunner.call(question:, pipeline: [
           Pipeline::JailbreakGuardrails,
           Pipeline::QuestionRephraser,
           Pipeline::QuestionRouter,
@@ -52,6 +52,10 @@ def compose_answer
           Pipeline::OpenAIStructuredAnswerComposer,
           Pipeline::AnswerGuardrails,
         ])
+      when "claude_structured_answer"
+        PipelineRunner.call(question:, pipeline: [
+          Pipeline::Claude::StructuredAnswerComposer,
+        ])
       else
         raise "Answer strategy #{answer_strategy} not configured"
       end

diff --git a/lib/answer_composition/pipeline/claude/structured_answer_composer.rb b/lib/answer_composition/pipeline/claude/structured_answer_composer.rb
@@ -0,0 +1,99 @@
+module AnswerComposition::Pipeline::Claude
+  class StructuredAnswerComposer
+    BEDROCK_MODEL = "eu.anthropic.claude-3-5-sonnet-20240620-v1:0".freeze
+
+    def self.call(...) = new(...).call
+
+    def initialize(context)
+      @context = context
+    end
+
+    def call
+      start_time = Clock.monotonic_time
+
+      response = bedrock_client.converse(
+        system: [{ text: system_prompt }],
+        model_id: BEDROCK_MODEL,
+        messages:,
+        inference_config:,
+        tool_config:,
+      )
+
+      context.answer.assign_llm_response("structured_answer", response.to_h)
+      message = response.dig("output", "message", "content", 0, "tool_use", "input", "answer")
+      context.answer.assign_attributes(message:, status: "answered")
+      context.answer.assign_metrics("structured_answer", build_metrics(start_time, response))
+    end
+
+  private
+
+    attr_reader :context
+
+    def messages
+      [
+        {
+          role: "user",
+          content: [{ text: context.question_message }],
+        },
+      ]
+    end
+
+    def inference_config
+      {
+        max_tokens: 1000,
+        temperature: 0.0,
+      }
+    end
+
+    def system_prompt
+      <<~PROMPT
+        You are a chat assistant for the UK government, designed to provide helpful and contextually relevant responses to user queries.
+        Provide concise responses based on the content on the GOV.UK website.
+      PROMPT
+    end
+
+    def bedrock_client
+      @bedrock_client ||= Aws::BedrockRuntime::Client.new
+    end
+
+    def build_metrics(start_time, response)
+      {
+        duration: Clock.monotonic_time - start_time,
+        llm_prompt_tokens: response.dig("usage", "input_tokens"),
+        llm_completion_tokens: response.dig("usage", "output_tokens"),
+      }
+    end
+
+    def tool_config
+      {
+        tools: tools,
+        tool_choice: {
+          tool: {
+            name: "answer_confidence",
+          },
+        },
+      }
+    end
+
+    def tools
+      [
+        {
+          tool_spec: {
+            name: "answer_confidence",
+            description: "Prints the answer of a given question with a confidence score.",
+            input_schema: {
+              json: {
+                type: "object",
+                properties: {
+                  answer: { description: "Your answer to the question in markdown format", title: "Answer", type: "string" },
+                  confidence: { description: "Your confidence in the answer provided, ranging from 0.0 to 1.0", title: "Confidence", type: "number" },
+                },
+                required: %w[answer confidence],
+              },
+            },
+          },
+        },
+      ]
+    end
+  end
+end
diff --git a/lib/answer_composition/openai_answer.rb → lib/answer_composition/pipeline_runner.rb b/lib/answer_composition/openai_answer.rb → lib/answer_composition/pipeline_runner.rb
@@ -1,5 +1,5 @@
 module AnswerComposition
-  class OpenAIAnswer
+  class PipelineRunner
     def self.call(...) = new(...).call
 
     def initialize(question:, pipeline: [])
@@ -26,10 +26,17 @@ def call
     rescue OpenAIClient::RequestError => e
       GovukError.notify(e)
       context.abort_pipeline(
-        message: Answer::CannedResponses::OPENAI_CLIENT_ERROR_RESPONSE,
+        message: Answer::CannedResponses::ANSWER_SERVICE_ERROR_RESPONSE,
         status: "error_answer_service_error",
         error_message: error_message(e),
       )
+    rescue Aws::Errors::ServiceError => e
+      GovukError.notify(e)
+      context.abort_pipeline(
+        message: Answer::CannedResponses::ANSWER_SERVICE_ERROR_RESPONSE,
+        status: "error_answer_service_error",
+        error_message: e.message,
+      )
     end
 
   private

diff --git a/spec/lib/answer_composition/composer_spec.rb b/spec/lib/answer_composition/composer_spec.rb
@@ -5,7 +5,7 @@
   describe ".call" do
     it "assigns metrics to the answer" do
       answer = create(:answer)
-      allow(AnswerComposition::OpenAIAnswer).to receive(:call).and_return(answer)
+      allow(AnswerComposition::PipelineRunner).to receive(:call).and_return(answer)
       allow(Clock).to receive(:monotonic_time).and_return(100.0, 101.5)
 
       described_class.call(answer.question)
@@ -36,7 +36,7 @@
       context "and the answer strategy is 'openai_structured_answer'" do
         let(:question) { create :question, answer_strategy: :openai_structured_answer }
 
-        it "calls OpenAIAnswer with the correct pipeline" do
+        it "calls PipelineRunner with the correct pipeline" do
           expected_pipeline = [
             AnswerComposition::Pipeline::JailbreakGuardrails,
             AnswerComposition::Pipeline::QuestionRephraser,
@@ -49,7 +49,7 @@
           expected_pipeline.each do |pipeline|
             allow(pipeline).to receive(:call) { |context| context }
           end
-          expect(AnswerComposition::OpenAIAnswer).to receive(:call).and_call_original
+          expect(AnswerComposition::PipelineRunner).to receive(:call).and_call_original
           result = described_class.call(question)
 
           expect(result)
@@ -88,7 +88,7 @@
       let(:result) { described_class.call(question) }
 
       before do
-        allow(AnswerComposition::OpenAIAnswer)
+        allow(AnswerComposition::PipelineRunner)
         .to receive(:call)
         .and_raise(StandardError, "error message")
       end
@@ -140,7 +140,7 @@
 
       before do
         allow(Rails.configuration.govuk_chat_private).to receive(:forbidden_terms).and_return(Set.new(%w[badword]))
-        allow(AnswerComposition::OpenAIAnswer).to receive(:call).and_return(answer)
+        allow(AnswerComposition::PipelineRunner).to receive(:call).and_return(answer)
       end
 
       it "returns an answer with FORBIDDEN_TERMS_MESSAGE" do

diff --git a/spec/lib/answer_composition/pipeline/claude/structured_answer_composer_spec.rb b/spec/lib/answer_composition/pipeline/claude/structured_answer_composer_spec.rb
@@ -0,0 +1,52 @@
+RSpec.describe AnswerComposition::Pipeline::Claude::StructuredAnswerComposer, :chunked_content_index do
+  describe ".call" do
+    let(:question) { build :question }
+    let(:context) { build(:answer_pipeline_context, question:) }
+
+    it "uses Bedrock converse endpoint to assign the correct values to the context's answer" do
+      answer = "VAT (Value Added Tax) is a tax applied to most goods and services in the UK."
+
+      stub_bedrock_converse(
+        bedrock_claude_structured_answer_response(question.message, answer),
+      )
+
+      described_class.call(context)
+
+      expect(context.answer.message).to eq(answer)
+      expect(context.answer.status).to eq("answered")
+    end
+
+    it "stores the LLM response" do
+      response = bedrock_claude_tool_response(
+        { "answer" => "answer", "confidence" => 0.9 },
+        tool_name: "answer_confidence",
+      )
+
+      stub_bedrock_converse(response)
+
+      described_class.call(context)
+      expect(context.answer.llm_responses["structured_answer"]).to match(response)
+    end
+
+    it "assigns metrics to the answer" do
+      allow(Clock).to receive(:monotonic_time).and_return(100.0, 101.5)
+
+      stub_bedrock_converse(
+        bedrock_claude_tool_response(
+          { "answer" => "answer", "confidence" => 0.9 },
+          tool_name: "answer_confidence",
+          input_tokens: 15,
+          output_tokens: 25,
+        ),
+      )
+
+      described_class.call(context)
+
+      expect(context.answer.metrics["structured_answer"]).to eq({
+        duration: 1.5,
+        llm_prompt_tokens: 15,
+        llm_completion_tokens: 25,
+      })
+    end
+  end
+end
diff --git a/.../answer_composition/openai_answer_spec.rb → ...nswer_composition/pipeline_runner_spec.rb b/.../answer_composition/openai_answer_spec.rb → ...nswer_composition/pipeline_runner_spec.rb
@@ -1,4 +1,4 @@
-RSpec.describe AnswerComposition::OpenAIAnswer do # rubocop:disable RSpec/SpecFilePathFormat
+RSpec.describe AnswerComposition::PipelineRunner do
   describe "#call" do
     let(:question) { build(:question) }
 
@@ -91,7 +91,7 @@
           .and have_attributes(
             question:,
             status: "error_answer_service_error",
-            message: Answer::CannedResponses::OPENAI_CLIENT_ERROR_RESPONSE,
+            message: Answer::CannedResponses::ANSWER_SERVICE_ERROR_RESPONSE,
             error_message: "class: OpenAIClient::RequestError message: nested error message",
           )
       end
@@ -119,5 +119,30 @@
         end
       end
     end
+
+    context "when the step raises an Aws::Errors::ServiceError" do
+      let(:pipeline_step) do
+        client = stub_bedrock_converse("ServerError")
+        ->(_context) { client.converse(model_id: "just-generating-an-error") }
+      end
+
+      it "notifies sentry" do
+        expect(GovukError).to receive(:notify).with(kind_of(Aws::Errors::ServiceError))
+        described_class.call(question:, pipeline: [pipeline_step])
+      end
+
+      it "returns the context's answer with the correct message, status and error_message" do
+        result = described_class.call(question:, pipeline: [pipeline_step])
+
+        expect(result)
+          .to be_a(Answer)
+          .and have_attributes(
+            question:,
+            status: "error_answer_service_error",
+            message: Answer::CannedResponses::ANSWER_SERVICE_ERROR_RESPONSE,
+            error_message: "stubbed-response-error-message",
+          )
+      end
+    end
   end
 end
diff --git a/spec/models/form/create_question_spec.rb b/spec/models/form/create_question_spec.rb
@@ -128,7 +128,7 @@
           .to have_attributes(
             message: user_question,
             unsanitised_message: nil,
-            answer_strategy: "openai_structured_answer",
+            answer_strategy: Rails.configuration.answer_strategy,
           )
       end
 
@@ -192,7 +192,7 @@
         expect(question)
           .to have_attributes(
             message: user_question,
-            answer_strategy: "openai_structured_answer",
+            answer_strategy: Rails.configuration.answer_strategy,
           )
       end
 

diff --git a/spec/spec_helper.rb b/spec/spec_helper.rb
@@ -35,6 +35,7 @@
   config.include FactoryBot::Syntax::Methods
   config.include MailerExamples
   config.include StubOpenAIChat
+  config.include StubBedrock
   config.include PasswordlessRequestHelpers, type: :request
   config.include StubOpenAIEmbedding
   config.include SidekiqHelpers