From a91a9d139e0d8445b3863245be9ab04286809ab4 Mon Sep 17 00:00:00 2001
From: Cyril Blaecke <1692273+cbldev@users.noreply.github.com>
Date: Tue, 17 Dec 2024 22:17:07 +0000
Subject: [PATCH 1/6] Add customized ruby-openai for Infomaniak

---
 Gemfile      |  1 +
 Gemfile.lock | 30 ++++++++++++++++++++++--------
 2 files changed, 23 insertions(+), 8 deletions(-)

diff --git a/Gemfile b/Gemfile
index 2fbd6ca..e6f74b9 100644
--- a/Gemfile
+++ b/Gemfile
@@ -87,6 +87,7 @@ gem "passwordless"
 gem "pundit"
 gem "reverse_markdown"
 gem "rss"
+gem "ruby-openai", github: "nosia-ai/ruby-openai"
 gem "sequel", "~> 5.68.0"
 gem "solid_queue"
 gem "thruster"
diff --git a/Gemfile.lock b/Gemfile.lock
index e7a8c70..d7df628 100644
--- a/Gemfile.lock
+++ b/Gemfile.lock
@@ -1,3 +1,12 @@
+GIT
+  remote: https://github.com/nosia-ai/ruby-openai.git
+  revision: 6db7453d12b1d562866c2d78ceb1cf7cf3696dc8
+  specs:
+    ruby-openai (7.3.1)
+      event_stream_parser (>= 0.3.0, < 2.0.0)
+      faraday (>= 1)
+      faraday-multipart (>= 1)
+
 GIT
   remote: https://github.com/patterns-ai-core/langchainrb.git
   revision: 5403cc5ac66aa0d99114827cb4ce15233551e540
@@ -162,12 +171,15 @@ GEM
     erubi (1.13.0)
     et-orbi (1.2.11)
       tzinfo
-    faraday (2.12.0)
-      faraday-net_http (>= 2.0, < 3.4)
+    event_stream_parser (1.0.0)
+    faraday (2.12.2)
+      faraday-net_http (>= 2.0, < 3.5)
       json
       logger
-    faraday-net_http (3.3.0)
-      net-http
+    faraday-multipart (1.0.4)
+      multipart-post (~> 2)
+    faraday-net_http (3.4.0)
+      net-http (>= 0.5.0)
     feedjira (3.2.3)
       loofah (>= 2.3.1, < 3)
       sax-machine (>= 1.0, < 2)
@@ -193,7 +205,7 @@ GEM
     jbuilder (2.13.0)
       actionview (>= 5.0.0)
       activesupport (>= 5.0.0)
-    json (2.7.5)
+    json (2.9.0)
     json-schema (4.3.1)
       addressable (>= 2.8)
     kamal (2.3.0)
@@ -208,7 +220,7 @@ GEM
       thor (~> 1.3)
       zeitwerk (>= 2.6.18, < 3.0)
     language_server-protocol (3.17.0.3)
-    logger (1.6.1)
+    logger (1.6.3)
     loofah (2.23.1)
       crass (~> 1.0.2)
       nokogiri (>= 1.12.0)
@@ -226,9 +238,10 @@ GEM
     mini_mime (1.1.5)
     minitest (5.25.1)
     msgpack (1.7.3)
+    multipart-post (2.4.1)
     neighbor (0.5.0)
       activerecord (>= 7)
-    net-http (0.4.1)
+    net-http (0.6.0)
       uri
     net-imap (0.5.1)
       date
@@ -428,7 +441,7 @@ GEM
     tzinfo (2.0.6)
       concurrent-ruby (~> 1.0)
     unicode-display_width (2.6.0)
-    uri (1.0.1)
+    uri (1.0.2)
     useragent (0.16.10)
     web-console (4.2.1)
       actionview (>= 6.0.0)
@@ -487,6 +500,7 @@ DEPENDENCIES
   reverse_markdown
   rss
   rubocop-rails-omakase
+  ruby-openai!
   selenium-webdriver
   sequel (~> 5.68.0)
   solid_queue

From c0c5840a69dae4bc161b1efb64ad1c5bfb9e3eb5 Mon Sep 17 00:00:00 2001
From: Cyril Blaecke <1692273+cbldev@users.noreply.github.com>
Date: Tue, 17 Dec 2024 22:31:12 +0000
Subject: [PATCH 2/6] Prepare Infomaniak compatibility

---
 .../api/v1/application_controller.rb          |  21 ++
 .../api/v1/completions_controller.rb          | 238 +++++-------------
 app/jobs/get_ai_response_job.rb               |  79 +-----
 app/models/account.rb                         |   8 +
 app/models/account_user.rb                    |   4 +
 app/models/api_token.rb                       |   1 +
 app/models/chat.rb                            |   8 +
 app/models/chat/completionable.rb             |  36 +++
 app/models/chat/infomaniak.rb                 |  58 +++++
 app/models/chat/ollama.rb                     |  90 +++++++
 app/models/chunk.rb                           |   2 +-
 app/models/document.rb                        |   1 +
 app/models/document/chunkable.rb              |   2 +-
 app/models/user.rb                            |   2 +
 config/initializers/langchainrb_rails.rb      |  48 +++-
 .../20241114182820_add_done_to_messages.rb    |   5 +
 db/migrate/20241216181749_create_accounts.rb  |  18 ++
 .../20241216181845_create_account_users.rb    |  18 ++
 .../20241216190106_add_chunkable_to_chunks.rb |  18 ++
 ...04_add_account_references_to_api_tokens.rb |  16 ++
 ...009_add_account_references_to_documents.rb |   5 +
 .../20241216213448_change_vector_limit.rb     |  15 ++
 db/schema.rb                                  |  42 +++-
 23 files changed, 459 insertions(+), 276 deletions(-)
 create mode 100644 app/controllers/api/v1/application_controller.rb
 create mode 100644 app/models/account.rb
 create mode 100644 app/models/account_user.rb
 create mode 100644 app/models/chat/completionable.rb
 create mode 100644 app/models/chat/infomaniak.rb
 create mode 100644 app/models/chat/ollama.rb
 create mode 100644 db/migrate/20241114182820_add_done_to_messages.rb
 create mode 100644 db/migrate/20241216181749_create_accounts.rb
 create mode 100644 db/migrate/20241216181845_create_account_users.rb
 create mode 100644 db/migrate/20241216190106_add_chunkable_to_chunks.rb
 create mode 100644 db/migrate/20241216202004_add_account_references_to_api_tokens.rb
 create mode 100644 db/migrate/20241216203009_add_account_references_to_documents.rb
 create mode 100644 db/migrate/20241216213448_change_vector_limit.rb

diff --git a/app/controllers/api/v1/application_controller.rb b/app/controllers/api/v1/application_controller.rb
new file mode 100644
index 0000000..773896b
--- /dev/null
+++ b/app/controllers/api/v1/application_controller.rb
@@ -0,0 +1,21 @@
+# frozen_string_literal: true
+
+module Api
+  module V1
+    class ApplicationController < ApplicationController
+      allow_unauthenticated_access
+      skip_before_action :verify_authenticity_token
+      before_action :verify_api_key
+
+      private
+
+      def verify_api_key
+        authenticate_or_request_with_http_token do |token, _options|
+          api_token = ApiToken.find_by(token:)
+          @account = api_token&.account
+          @user = api_token&.user
+        end
+      end
+    end
+  end
+end
diff --git a/app/controllers/api/v1/completions_controller.rb b/app/controllers/api/v1/completions_controller.rb
index 71978ae..e900cb9 100644
--- a/app/controllers/api/v1/completions_controller.rb
+++ b/app/controllers/api/v1/completions_controller.rb
@@ -5,145 +5,78 @@ module V1
     class CompletionsController < ApplicationController
       include ActionController::Live
 
-      allow_unauthenticated_access only: [ :create ]
-      skip_before_action :verify_authenticity_token
-      before_action :verify_api_key
-      before_action :parse_params, only: [ :create ]
-
       def create
-        @llm = LangchainrbRails.config.vectorsearch.llm
-        @uuid = SecureRandom.uuid
-
-        if @stream
-          stream_response
-        else
-          non_stream_response
-        end
-      rescue StandardError => e
-        handle_error(e)
-      end
-
-      private
-
-      def build_check_llm
-        Langchain::LLM::Ollama.new(
-          url: ENV.fetch("OLLAMA_BASE_URL", "http://localhost:11434"),
-          api_key: ENV.fetch("OLLAMA_API_KEY", ""),
-          default_options: {
-            chat_completion_model_name: ENV.fetch("CHECK_MODEL", "bespoke-minicheck"),
-            completion_model_name: ENV.fetch("CHECK_MODEL", "bespoke-minicheck"),
-            temperature: ENV.fetch("LLM_TEMPERATURE", 0.1).to_f,
-            num_ctx: ENV.fetch("LLM_NUM_CTX", 4_096).to_i
-          }
-        )
-      end
-
-      def build_context(checked_chunks)
-        checked_chunks.map(&:content).join("\n")
-      end
-
-      def build_messages(question, context)
-        system_message = {
-          role: "system",
-          content: ENV.fetch("RAG_SYSTEM_TEMPLATE", "You are Nosia. You are a helpful assistant.")
-        }
-
-        user_content = if context.present?
-          template = ENV.fetch(
-            "QUERY_PROMPT_TEMPLATE",
-            "Nosia helpful content: {context}\n\n---\n\nNow here is the question you need to answer.\n\nQuestion: {question}"
+        @chat = @user.chats.create
+
+        if completion_params[:messages].present?
+          completion_params[:messages].each do |message_params|
+            @chat.messages.create(
+              content: message_params[:content],
+              response_number: @chat.messages.count,
+              role: message_params[:role]
+            )
+          end
+        elsif completion_params[:prompt].present?
+          @chat.messages.create(
+            content: message_params[:prompt],
+            response_number: @chat.messages.count,
+            role: "user"
           )
-          template.gsub("{context}", context).gsub("{question}", question)
-        else
-          question
         end
 
-        user_message = { role: "user", content: user_content }
-
-        [ system_message, user_message ]
-      end
-
-      def check_context(question)
-        k = ENV.fetch("RETRIEVAL_FETCH_K", 4)
-
-        check_llm = build_check_llm
-        checked_chunks = []
-
-        search_results = Chunk.similarity_search(question, k:)
-        search_results.each do |search_result|
-          context_to_check = search_result.content
-          check_message = [ { role: "user", content: "Document: #{context_to_check}\nClaim: #{question}" } ]
-
-          check_llm.chat(messages: check_message, top_k: @top_k, top_p: @top_p) do |stream|
-            check_response = stream.raw_response.dig("message", "content")
-
-            if check_response && check_response.eql?("Yes")
-              checked_chunks << search_result
+        stream_response = ActiveModel::Type::Boolean.new.cast(params[:stream]) || false
+
+        if stream_response
+          chat_response = @chat.complete do |stream|
+            stream_content = stream.dig("delta", "content")
+            next unless stream_content
+            done = !!stream.dig("finish_reason")
+            if done
+              response.stream.write("data: [DONE]\n\n")
+            else
+              data = {
+                choices: [
+                  delta: {
+                    content: stream_content,
+                    role: "assistant"
+                  },
+                  finish_reason: done ? "stop" : nil,
+                  index: 0
+                ],
+                created: Time.now.to_i,
+                id: "chatcmpl-#{@chat.id}",
+                model: "nosia:#{ENV["LLM_MODEL"]}",
+                object: "chat.completion.chunk",
+                system_fingerprint: "fp_nosia"
+              }
+              response.stream.write("data: #{data.to_json}\n\n")
             end
           end
-        end
-
-        checked_chunks
-      rescue StandardError => e
-        handle_error(e)
-        []
-      end
-
-      def handle_error(e)
-        error_message = {
-          error: {
-            message: e.message,
-            backtrace: Rails.env.development? ? e.backtrace : nil
-          }
-        }
-
-        if @stream
-          response.stream.write("data: #{error_message.to_json}\n\n")
-          response.stream.write("data: [DONE]\n\n")
         else
-          render json: error_message, status: :internal_server_error
+          chat_response = @chat.complete
+          render json: {
+            choices: [
+              finish_reason: "stop",
+              index: 0,
+              message: {
+                content: chat_response.content,
+                role: "assistant"
+              }
+            ],
+            created: Time.now.to_i,
+            id: "chatcmpl-#{@uuid}",
+            model: "nosia:#{ENV["LLM_MODEL"]}",
+            object: "chat.completion",
+            system_fingerprint: "fp_nosia"
+          }
         end
       ensure
-        response.stream.close if @stream
+        response.stream.close if response.stream.respond_to?(:close)
       end
 
-      def non_stream_response
-        checked_chunks = check_context(@question)
-        context = build_context(checked_chunks)
-        messages = build_messages(@question, context)
-
-        content = ""
-        data = {}
-
-        @llm.chat(messages:, top_k: @top_k, top_p: @top_p) do |stream|
-          stream_content = stream.raw_response.dig("message", "content")
-          content += stream_content if stream_content
-
-          done = !!stream.raw_response["done"]
-
-          if done
-            data = {
-              choices: [
-                finish_reason: "stop",
-                index: 0,
-                message: {
-                  content: content,
-                  role: "assistant"
-                }
-              ],
-              created: Time.now.to_i,
-              id: "chatcmpl-#{@uuid}",
-              model: "nosia:#{ENV["LLM_MODEL"]}",
-              object: "chat.completion",
-              system_fingerprint: "fp_nosia"
-            }
-          end
-        end
-
-        render json: data
-      end
+      private
 
-      def parse_params
+      def completion_params
         params.permit(
           :max_tokens,
           :model,
@@ -160,57 +93,6 @@ def parse_params
           chat: {},
           completion: {},
         )
-
-        @question = params[:prompt] || params.dig(:messages, 0, :content)
-        @stream = ActiveModel::Type::Boolean.new.cast(params[:stream]) || false
-        @top_p = params[:top_p].to_f || ENV.fetch("LLM_TOP_P", 0.9).to_f
-        @top_k = params[:top_k].to_i || ENV.fetch("LLM_TOP_K", 40)
-      end
-
-      def stream_response
-        checked_chunks = check_context(@question)
-        context = build_context(checked_chunks)
-        messages = build_messages(@question, context)
-
-        response.headers["Content-Type"] = "text/event-stream"
-
-        @llm.chat(messages:, top_k: @top_k, top_p: @top_p) do |stream|
-          stream_content = stream.raw_response.dig("message", "content")
-          next unless stream_content
-
-          done = !!stream.raw_response["done"]
-
-          if done
-            response.stream.write("data: [DONE]\n\n")
-          else
-            data = {
-              choices: [
-                delta: {
-                  content: stream_content,
-                  role: "assistant"
-                },
-                finish_reason: done ? "stop" : nil,
-                index: 0
-              ],
-              created: Time.now.to_i,
-              id: "chatcmpl-#{@uuid}",
-              model: "nosia:#{ENV["LLM_MODEL"]}",
-              object: "chat.completion.chunk",
-              system_fingerprint: "fp_nosia"
-            }
-
-            response.stream.write("data: #{data.to_json}\n\n")
-          end
-        end
-      ensure
-        response.stream.close if response.stream.respond_to?(:close)
-      end
-
-      def verify_api_key
-        authenticate_or_request_with_http_token do |token, _options|
-          api_token = ApiToken.find_by(token:)
-          @user = api_token&.user
-        end
       end
     end
   end
diff --git a/app/jobs/get_ai_response_job.rb b/app/jobs/get_ai_response_job.rb
index c20b506..80510c5 100644
--- a/app/jobs/get_ai_response_job.rb
+++ b/app/jobs/get_ai_response_job.rb
@@ -3,83 +3,6 @@ class GetAiResponseJob < ApplicationJob
 
   def perform(chat_id)
     chat = Chat.find(chat_id)
-    call_ollama(chat:)
-  end
-
-  private
-
-  def create_message(chat:)
-    response_number = chat.messages.count
-    message = chat.messages.create(role: "assistant", content: "", response_number:)
-    message.broadcast_created
-    message
-  end
-
-  def call_ollama(chat:)
-    top_k = ENV.fetch("LLM_TOP_K", 40).to_f
-    top_p = ENV.fetch("LLM_TOP_P", 0.9).to_f
-
-    message = create_message(chat:)
-
-    question = chat.last_question
-
-    check_llm = Langchain::LLM::Ollama.new(
-      url: ENV.fetch("OLLAMA_BASE_URL", "http://localhost:11434"),
-      api_key: ENV.fetch("OLLAMA_API_KEY", ""),
-      default_options: {
-        chat_completion_model_name: ENV.fetch("CHECK_MODEL", "bespoke-minicheck"),
-        completion_model_name: ENV.fetch("CHECK_MODEL", "bespoke-minicheck"),
-        temperature: ENV.fetch("LLM_TEMPERATURE", 0.1).to_f,
-        num_ctx: ENV.fetch("LLM_NUM_CTX", 2_048).to_i
-      }
-    )
-
-    checked_chunks = []
-
-    search_results = Chunk.similarity_search(question, k: ENV.fetch("RETRIEVAL_FETCH_K", 4))
-    search_results.each do |search_result|
-      context_to_check = search_result.content
-
-      check_message = [
-        { role: "user", content: "Document: #{context_to_check}\nClaim: #{question}" }
-      ]
-
-      check_llm.chat(messages: check_message, top_k:, top_p:) do |stream|
-        check_response = stream.raw_response.dig("message", "content")
-
-        if check_response.eql?("Yes")
-          checked_chunks << search_result
-        end
-      end
-    end
-
-    llm = LangchainrbRails.config.vectorsearch.llm
-    context = []
-
-    messages = []
-    messages << { role: "system", content: ENV.fetch("RAG_SYSTEM_TEMPLATE", "You are Nosia. You are a helpful assistant.") }
-    messages << chat.messages_hash if chat.messages_hash.any?
-
-    if checked_chunks.any?
-      message.update(similar_document_ids: checked_chunks.pluck(:document_id).uniq)
-
-      context << checked_chunks.map(&:content).join("\n\n")
-      context = context.join("\n\n")
-
-      prompt = ENV.fetch("QUERY_PROMPT_TEMPLATE", "Nosia helpful content: {context}\n\n---\n\nNow here is the question you need to answer.\n\nQuestion: {question}")
-      prompt = prompt.gsub("{context}", context)
-      prompt = prompt.gsub("{question}", question)
-
-      messages << { role: "user", content: prompt }
-    else
-      messages << { role: "user", content: question }
-    end
-
-    messages = messages.flatten
-
-    llm.chat(messages:, top_k:, top_p:) do |stream|
-      new_content = stream.raw_response.dig("message", "content")
-      message.update(content: message.content + new_content) if new_content
-    end
+    chat.complete
   end
 end
diff --git a/app/models/account.rb b/app/models/account.rb
new file mode 100644
index 0000000..0eb87e2
--- /dev/null
+++ b/app/models/account.rb
@@ -0,0 +1,8 @@
+class Account < ApplicationRecord
+  belongs_to :owner, class_name: "User"
+
+  has_many :account_users, dependent: :destroy
+  has_many :users, through: :account_users
+  has_many :chunks
+  has_many :documents, dependent: :destroy
+end
diff --git a/app/models/account_user.rb b/app/models/account_user.rb
new file mode 100644
index 0000000..20b5d90
--- /dev/null
+++ b/app/models/account_user.rb
@@ -0,0 +1,4 @@
+class AccountUser < ApplicationRecord
+  belongs_to :account
+  belongs_to :user
+end
diff --git a/app/models/api_token.rb b/app/models/api_token.rb
index 7be0c0a..d54a2a8 100644
--- a/app/models/api_token.rb
+++ b/app/models/api_token.rb
@@ -1,4 +1,5 @@
 class ApiToken < ApplicationRecord
+  belongs_to :account
   belongs_to :user
 
   has_secure_token :token
diff --git a/app/models/chat.rb b/app/models/chat.rb
index b647716..1a3583e 100644
--- a/app/models/chat.rb
+++ b/app/models/chat.rb
@@ -1,4 +1,8 @@
 class Chat < ApplicationRecord
+  include Completionable
+  include Infomaniak
+  include Ollama
+
   has_many :messages, dependent: :destroy
 
   def first_question
@@ -17,4 +21,8 @@ def messages_hash
       }
     end
   end
+
+  def response_number
+    messages.count
+  end
 end
diff --git a/app/models/chat/completionable.rb b/app/models/chat/completionable.rb
new file mode 100644
index 0000000..2706bef
--- /dev/null
+++ b/app/models/chat/completionable.rb
@@ -0,0 +1,36 @@
+module Chat::Completionable
+  extend ActiveSupport::Concern
+
+  class_methods do
+    def ai_provider
+      ENV.fetch("AI_PROVIDER", "ollama")
+    end
+  end
+
+  def complete(&block)
+    case Chat.ai_provider
+    when "ollama"
+      complete_with_ollama(&block)
+    when "infomaniak"
+      complete_with_infomaniak(&block)
+    end
+  end
+
+  private
+
+  def retrieval_fetch_k
+    ENV.fetch("RETRIEVAL_FETCH_K", 4)
+  end
+
+  def system_prompt
+    ENV.fetch("RAG_SYSTEM_TEMPLATE", "You are Nosia. You are a helpful assistant.")
+  end
+
+  def top_k
+    ENV.fetch("LLM_TOP_K", 40).to_f
+  end
+
+  def top_p
+    ENV.fetch("LLM_TOP_P", 0.9).to_f
+  end
+end
diff --git a/app/models/chat/infomaniak.rb b/app/models/chat/infomaniak.rb
new file mode 100644
index 0000000..2ef13b2
--- /dev/null
+++ b/app/models/chat/infomaniak.rb
@@ -0,0 +1,58 @@
+module Chat::Infomaniak
+  extend ActiveSupport::Concern
+
+  class_methods do
+    def new_infomaniak_llm
+      Langchain::LLM::OpenAI.new(
+        api_key: ENV.fetch("INFOMANIAK_API_KEY", ""),
+        llm_options: {
+          api_type: :infomaniak,
+          uri_base: "https://api.infomaniak.com/1/ai/#{ENV.fetch("INFOMANIAK_PRODUCT_ID", "")}/openai"
+        },
+        default_options: {
+          chat_completion_model_name: ENV.fetch("LLM_MODEL", "mixtral"),
+          completion_model_name: ENV.fetch("LLM_MODEL", "mixtral"),
+          embeddings_model_name: ENV.fetch("EMBEDDING_MODEL", "bge_multilingual_gemma2"),
+          temperature: ENV.fetch("LLM_TEMPERATURE", 0.1).to_f,
+          num_ctx: ENV.fetch("LLM_NUM_CTX", 4_096).to_i
+        }
+      )
+    end
+  end
+
+  def complete_with_infomaniak(&block)
+    question = last_question
+
+    context = []
+
+    messages_for_assistant = []
+    messages_for_assistant << { role: "system", content: system_prompt }
+    messages_for_assistant << messages_hash if messages_hash.any?
+
+    assistant_response = messages.create(role: "assistant", done: false, content: "", response_number:)
+    assistant_response.broadcast_created
+
+    retrieved_chunks = Chunk.similarity_search(question, k: retrieval_fetch_k)
+    assistant_response.update(similar_document_ids: retrieved_chunks.pluck(:chunkable_id).uniq) if retrieved_chunks.any?
+
+    if retrieved_chunks.any?
+      context << retrieved_chunks.map(&:content).join("\n\n")
+      context = context.join("\n\n")
+
+      prompt = ENV.fetch("QUERY_PROMPT_TEMPLATE", "Nosia helpful content: {context}\n\n---\n\nNow here is the question you need to answer.\n\nQuestion: {question}")
+      prompt = prompt.gsub("{context}", context)
+      prompt = prompt.gsub("{question}", question)
+
+      messages_for_assistant.pop
+      messages_for_assistant << { role: "user", content: prompt }
+    end
+
+    messages_for_assistant = messages_for_assistant.flatten
+
+    llm = Chat.new_infomaniak_llm
+    llm_response = llm.chat(messages: messages_for_assistant, &block)
+
+    assistant_response.update(done: true, content: llm_response.completion)
+    assistant_response
+  end
+end
diff --git a/app/models/chat/ollama.rb b/app/models/chat/ollama.rb
new file mode 100644
index 0000000..94ea196
--- /dev/null
+++ b/app/models/chat/ollama.rb
@@ -0,0 +1,90 @@
+module Chat::Ollama
+  extend ActiveSupport::Concern
+
+  class_methods do
+    def new_ollama_llm
+      Langchain::LLM::OpenAI.new(
+        api_key: ENV.fetch("OLLAMA_API_KEY", ""),
+        llm_options: {
+          uri_base: ENV.fetch("OLLAMA_BASE_URL", "http://localhost:11434")
+        },
+        default_options: {
+          chat_completion_model_name: ENV.fetch("LLM_MODEL", "qwen2.5"),
+          completion_model_name: ENV.fetch("LLM_MODEL", "qwen2.5"),
+          embeddings_model_name: ENV.fetch("EMBEDDING_MODEL", "nomic-embed-text"),
+          temperature: ENV.fetch("LLM_TEMPERATURE", 0.1).to_f,
+          num_ctx: ENV.fetch("LLM_NUM_CTX", 4_096).to_i
+        }
+      )
+    end
+
+    def new_ollama_check_llm
+      Langchain::LLM::Ollama.new(
+        url: ENV.fetch("OLLAMA_BASE_URL", "http://localhost:11434"),
+        api_key: ENV.fetch("OLLAMA_API_KEY", ""),
+        default_options: {
+          chat_completion_model_name: ENV.fetch("CHECK_MODEL", "bespoke-minicheck"),
+          completion_model_name: ENV.fetch("CHECK_MODEL", "bespoke-minicheck"),
+          temperature: ENV.fetch("LLM_TEMPERATURE", 0.1).to_f,
+          num_ctx: ENV.fetch("LLM_NUM_CTX", 2_048).to_i
+        }
+      )
+    end
+  end
+
+  def complete_with_ollama(&block)
+    question = last_question
+
+    context = []
+
+    assistant_response = messages.create(role: "assistant", done: false, content: "", response_number:)
+    assistant_response.broadcast_created
+
+    messages_for_assistant = []
+    messages_for_assistant << { role: "system", content: system_prompt }
+    messages_for_assistant << messages_hash if messages_hash.any?
+
+    checked_chunks = []
+
+    check_llm = Chat.new_ollama_check_llm
+
+    search_results = Chunk.similarity_search(question, k: retrieval_fetch_k)
+    search_results.each do |search_result|
+      context_to_check = search_result.content
+
+      check_message = [
+        { role: "user", content: "Document: #{context_to_check}\nClaim: #{question}" }
+      ]
+
+      check_llm.chat(messages: check_message, top_k:, top_p:) do |stream|
+        check_response = stream.raw_response.dig("message", "content")
+
+        if check_response.eql?("Yes")
+          checked_chunks << search_result
+        end
+      end
+    end
+
+    if checked_chunks.any?
+      assistant_response.update(similar_document_ids: checked_chunks.pluck(:document_id).uniq)
+
+      context << checked_chunks.map(&:content).join("\n\n")
+      context = context.join("\n\n")
+
+      prompt = ENV.fetch("QUERY_PROMPT_TEMPLATE", "Nosia helpful content: {context}\n\n---\n\nNow here is the question you need to answer.\n\nQuestion: {question}")
+      prompt = prompt.gsub("{context}", context)
+      prompt = prompt.gsub("{question}", question)
+
+      messages_for_assistant.pop
+      messages << { role: "user", content: prompt }
+    end
+
+    messages_for_assistant = messages_for_assistant.flatten
+
+    llm = Chat.new_ollama_llm
+    llm_response = llm.chat(messages:, top_k:, top_p:, &block)
+
+    assistant_response.update(done: true, content: llm_response.completion)
+    assistant_response
+  end
+end
diff --git a/app/models/chunk.rb b/app/models/chunk.rb
index 00868fe..f39ea94 100644
--- a/app/models/chunk.rb
+++ b/app/models/chunk.rb
@@ -1,5 +1,5 @@
 class Chunk < ApplicationRecord
   include Vectorizable
 
-  belongs_to :document
+  belongs_to :chunkable, polymorphic: true
 end
diff --git a/app/models/document.rb b/app/models/document.rb
index c1655aa..dc9aa4a 100644
--- a/app/models/document.rb
+++ b/app/models/document.rb
@@ -1,6 +1,7 @@
 class Document < ApplicationRecord
   include Chunkable, Parsable, Vectorizable
 
+  belongs_to :account, optional: true
   belongs_to :author, optional: true
   has_one_attached :file
 
diff --git a/app/models/document/chunkable.rb b/app/models/document/chunkable.rb
index 55f09e7..639279e 100644
--- a/app/models/document/chunkable.rb
+++ b/app/models/document/chunkable.rb
@@ -2,7 +2,7 @@ module Document::Chunkable
   extend ActiveSupport::Concern
 
   included do
-    has_many :chunks, dependent: :destroy
+    has_many :chunks, as: :chunkable, dependent: :destroy
   end
 
   def chunkify!
diff --git a/app/models/user.rb b/app/models/user.rb
index 4952512..85af29e 100644
--- a/app/models/user.rb
+++ b/app/models/user.rb
@@ -1,6 +1,8 @@
 class User < ApplicationRecord
   has_secure_password
 
+  has_many :account_users, dependent: :destroy
+  has_many :accounts, through: :account_users
   has_many :api_tokens, dependent: :destroy
   has_many :chats, dependent: :destroy
   has_many :credentials, dependent: :destroy
diff --git a/config/initializers/langchainrb_rails.rb b/config/initializers/langchainrb_rails.rb
index a6089df..559f08f 100644
--- a/config/initializers/langchainrb_rails.rb
+++ b/config/initializers/langchainrb_rails.rb
@@ -4,18 +4,40 @@
   timeout: 240
 })
 
-LangchainrbRails.configure do |config|
-  config.vectorsearch = Langchain::Vectorsearch::Pgvector.new(
-    llm: Langchain::LLM::Ollama.new(
-      url: ENV.fetch("OLLAMA_BASE_URL", "http://localhost:11434"),
-      api_key: ENV.fetch("OLLAMA_API_KEY", ""),
-      default_options: {
-        chat_completion_model_name: ENV.fetch("LLM_MODEL", "qwen2.5"),
-        completion_model_name: ENV.fetch("LLM_MODEL", "qwen2.5"),
-        embeddings_model_name: ENV.fetch("EMBEDDING_MODEL", "nomic-embed-text"),
-        temperature: ENV.fetch("LLM_TEMPERATURE", 0.1).to_f,
-        num_ctx: ENV.fetch("LLM_NUM_CTX", 4_096).to_i
-      }
+if ENV.fetch("AI_PROVIDER", "ollama").eql?("infomaniak")
+  # Infomaniak configuration
+  LangchainrbRails.configure do |config|
+    config.vectorsearch = Langchain::Vectorsearch::Pgvector.new(
+      llm: Langchain::LLM::OpenAI.new(
+        api_key: ENV.fetch("INFOMANIAK_API_KEY", ""),
+        llm_options: {
+          uri_base: "https://api.infomaniak.com/1/ai/#{ENV.fetch("INFOMANIAK_PRODUCT_ID", "")}/openai"
+        },
+        default_options: {
+          chat_completion_model_name: ENV.fetch("LLM_MODEL", "mixtral"),
+          completion_model_name: ENV.fetch("LLM_MODEL", "mixtral"),
+          embeddings_model_name: ENV.fetch("EMBEDDING_MODEL", "bge_multilingual_gemma2"),
+          temperature: ENV.fetch("LLM_TEMPERATURE", 0.1).to_f,
+          num_ctx: ENV.fetch("LLM_NUM_CTX", 4_096).to_i
+        }
+      )
     )
-  )
+  end
+else
+  # Ollama default configuration
+  LangchainrbRails.configure do |config|
+    config.vectorsearch = Langchain::Vectorsearch::Pgvector.new(
+      llm: Langchain::LLM::Ollama.new(
+        url: ENV.fetch("OLLAMA_BASE_URL", "http://localhost:11434"),
+        api_key: ENV.fetch("OLLAMA_API_KEY", ""),
+        default_options: {
+          chat_completion_model_name: ENV.fetch("LLM_MODEL", "qwen2.5"),
+          completion_model_name: ENV.fetch("LLM_MODEL", "qwen2.5"),
+          embeddings_model_name: ENV.fetch("EMBEDDING_MODEL", "nomic-embed-text"),
+          temperature: ENV.fetch("LLM_TEMPERATURE", 0.1).to_f,
+          num_ctx: ENV.fetch("LLM_NUM_CTX", 4_096).to_i
+        }
+      )
+    )
+  end
 end
diff --git a/db/migrate/20241114182820_add_done_to_messages.rb b/db/migrate/20241114182820_add_done_to_messages.rb
new file mode 100644
index 0000000..0d7a885
--- /dev/null
+++ b/db/migrate/20241114182820_add_done_to_messages.rb
@@ -0,0 +1,5 @@
+class AddDoneToMessages < ActiveRecord::Migration[8.0]
+  def change
+    add_column :messages, :done, :boolean, default: false
+  end
+end
diff --git a/db/migrate/20241216181749_create_accounts.rb b/db/migrate/20241216181749_create_accounts.rb
new file mode 100644
index 0000000..2bef022
--- /dev/null
+++ b/db/migrate/20241216181749_create_accounts.rb
@@ -0,0 +1,18 @@
+class CreateAccounts < ActiveRecord::Migration[8.0]
+  def up
+    create_table :accounts do |t|
+      t.string :name
+      t.belongs_to :owner, null: false, foreign_key: { to_table: :users }
+
+      t.timestamps
+    end
+
+    User.all.each do |user|
+      Account.create(name: user.email, owner: user)
+    end
+  end
+
+  def down
+    drop_table :accounts
+  end
+end
diff --git a/db/migrate/20241216181845_create_account_users.rb b/db/migrate/20241216181845_create_account_users.rb
new file mode 100644
index 0000000..250456d
--- /dev/null
+++ b/db/migrate/20241216181845_create_account_users.rb
@@ -0,0 +1,18 @@
+class CreateAccountUsers < ActiveRecord::Migration[8.0]
+  def up
+    create_table :account_users do |t|
+      t.references :account, null: false, foreign_key: true
+      t.references :user, null: false, foreign_key: true
+
+      t.timestamps
+    end
+
+    Account.all.each do |account|
+      account.account_users.create(user: account.owner)
+    end
+  end
+
+  def down
+    drop_table :account_users
+  end
+end
diff --git a/db/migrate/20241216190106_add_chunkable_to_chunks.rb b/db/migrate/20241216190106_add_chunkable_to_chunks.rb
new file mode 100644
index 0000000..6bac179
--- /dev/null
+++ b/db/migrate/20241216190106_add_chunkable_to_chunks.rb
@@ -0,0 +1,18 @@
+class AddChunkableToChunks < ActiveRecord::Migration[8.0]
+  def up
+    add_column :chunks, :chunkable_type, :string
+    Chunk.update_all(chunkable_type: "Document")
+    remove_index :chunks, :document_id
+    rename_column :chunks, :document_id, :chunkable_id
+    add_index :chunks, [ :chunkable_type, :chunkable_id ]
+    add_reference :chunks, :account, null: true, foreign_key: true
+  end
+
+  def down
+    remove_reference :chunks, :account
+    remove_index :chunks, [ :chunkable_type, :chunkable_id ]
+    rename_column :chunks, :chunkable_id, :document_id
+    add_index :chunks, :document_id
+    remove_column :chunks, :chunkable_type
+  end
+end
diff --git a/db/migrate/20241216202004_add_account_references_to_api_tokens.rb b/db/migrate/20241216202004_add_account_references_to_api_tokens.rb
new file mode 100644
index 0000000..8cc2ce3
--- /dev/null
+++ b/db/migrate/20241216202004_add_account_references_to_api_tokens.rb
@@ -0,0 +1,16 @@
+class AddAccountReferencesToApiTokens < ActiveRecord::Migration[8.0]
+  def up
+    add_reference :api_tokens, :account, null: true, foreign_key: true
+
+    ApiToken.all.each do |api_token|
+      account = Account.find_by(owner: api_token.user)
+      api_token.update(account:)
+    end
+
+    change_column_null :api_tokens, :account_id, false
+  end
+
+  def down
+    remove_reference :api_tokens, :account
+  end
+end
diff --git a/db/migrate/20241216203009_add_account_references_to_documents.rb b/db/migrate/20241216203009_add_account_references_to_documents.rb
new file mode 100644
index 0000000..ae482f2
--- /dev/null
+++ b/db/migrate/20241216203009_add_account_references_to_documents.rb
@@ -0,0 +1,5 @@
+class AddAccountReferencesToDocuments < ActiveRecord::Migration[8.0]
+  def change
+    add_reference :documents, :account, null: true, foreign_key: true
+  end
+end
diff --git a/db/migrate/20241216213448_change_vector_limit.rb b/db/migrate/20241216213448_change_vector_limit.rb
new file mode 100644
index 0000000..ad26a0a
--- /dev/null
+++ b/db/migrate/20241216213448_change_vector_limit.rb
@@ -0,0 +1,15 @@
+class ChangeVectorLimit < ActiveRecord::Migration[8.0]
+  def up
+    unless ENV.fetch("EMBEDDING_DIMENSIONS", 768).to_i.eql?(768)
+      Chunk.update_all(embedding: nil)
+      change_column :chunks, :embedding, :vector, limit: ENV.fetch("EMBEDDING_DIMENSIONS", 768).to_i
+    end
+  end
+
+  def down
+    unless ENV.fetch("EMBEDDING_DIMENSIONS", 768).to_i.eql?(768)
+      Chunk.update_all(embedding: nil)
+      change_column :chunks, :embedding, :vector, limit: 768
+    end
+  end
+end
diff --git a/db/schema.rb b/db/schema.rb
index 0d35607..a852efc 100644
--- a/db/schema.rb
+++ b/db/schema.rb
@@ -10,11 +10,28 @@
 #
 # It's strongly recommended that you check this file into your version control system.
 
-ActiveRecord::Schema[8.0].define(version: 2024_11_18_194153) do
+ActiveRecord::Schema[8.0].define(version: 2024_12_16_213448) do
   # These are extensions that must be enabled in order to support this database
   enable_extension "pg_catalog.plpgsql"
   enable_extension "vector"
 
+  create_table "account_users", force: :cascade do |t|
+    t.bigint "account_id", null: false
+    t.bigint "user_id", null: false
+    t.datetime "created_at", null: false
+    t.datetime "updated_at", null: false
+    t.index ["account_id"], name: "index_account_users_on_account_id"
+    t.index ["user_id"], name: "index_account_users_on_user_id"
+  end
+
+  create_table "accounts", force: :cascade do |t|
+    t.string "name"
+    t.bigint "owner_id", null: false
+    t.datetime "created_at", null: false
+    t.datetime "updated_at", null: false
+    t.index ["owner_id"], name: "index_accounts_on_owner_id"
+  end
+
   create_table "active_storage_attachments", force: :cascade do |t|
     t.string "name", null: false
     t.string "record_type", null: false
@@ -49,6 +66,8 @@
     t.string "token"
     t.datetime "created_at", null: false
     t.datetime "updated_at", null: false
+    t.bigint "account_id", null: false
+    t.index ["account_id"], name: "index_api_tokens_on_account_id"
     t.index ["user_id"], name: "index_api_tokens_on_user_id"
   end
 
@@ -69,12 +88,15 @@
   end
 
   create_table "chunks", force: :cascade do |t|
-    t.bigint "document_id", null: false
+    t.bigint "chunkable_id", null: false
     t.text "content"
-    t.vector "embedding", limit: 768
+    t.vector "embedding", limit: 3584
     t.datetime "created_at", null: false
     t.datetime "updated_at", null: false
-    t.index ["document_id"], name: "index_chunks_on_document_id"
+    t.string "chunkable_type"
+    t.bigint "account_id"
+    t.index ["account_id"], name: "index_chunks_on_account_id"
+    t.index ["chunkable_type", "chunkable_id"], name: "index_chunks_on_chunkable_type_and_chunkable_id"
   end
 
   create_table "credentials", force: :cascade do |t|
@@ -97,6 +119,9 @@
     t.bigint "author_id"
     t.string "uid"
     t.string "content_hash"
+    t.string "purpose"
+    t.bigint "account_id"
+    t.index ["account_id"], name: "index_documents_on_account_id"
     t.index ["author_id"], name: "index_documents_on_author_id"
   end
 
@@ -108,6 +133,7 @@
     t.datetime "created_at", null: false
     t.datetime "updated_at", null: false
     t.string "similar_document_ids", default: [], array: true
+    t.boolean "done", default: false
     t.index ["chat_id"], name: "index_messages_on_chat_id"
   end
 
@@ -264,12 +290,18 @@
     t.index "lower((email)::text)", name: "index_users_on_lowercase_email", unique: true
   end
 
+  add_foreign_key "account_users", "accounts"
+  add_foreign_key "account_users", "users"
+  add_foreign_key "accounts", "users", column: "owner_id"
   add_foreign_key "active_storage_attachments", "active_storage_blobs", column: "blob_id"
   add_foreign_key "active_storage_variant_records", "active_storage_blobs", column: "blob_id"
+  add_foreign_key "api_tokens", "accounts"
   add_foreign_key "api_tokens", "users"
   add_foreign_key "chats", "users"
-  add_foreign_key "chunks", "documents"
+  add_foreign_key "chunks", "accounts"
+  add_foreign_key "chunks", "documents", column: "chunkable_id"
   add_foreign_key "credentials", "users"
+  add_foreign_key "documents", "accounts"
   add_foreign_key "documents", "authors"
   add_foreign_key "messages", "chats"
   add_foreign_key "sessions", "users"

From cce3c6cf029bab928b452f3339e9f917db658b06 Mon Sep 17 00:00:00 2001
From: Cyril Blaecke <1692273+cbldev@users.noreply.github.com>
Date: Wed, 18 Dec 2024 21:26:33 +0000
Subject: [PATCH 3/6] Prepare Infomaniak compatibility and multi-accounts

---
 app/controllers/api/v1/completions_controller.rb  |  6 +++++-
 app/controllers/api_tokens_controller.rb          |  9 +++++++--
 app/controllers/chats_controller.rb               |  2 +-
 app/models/account.rb                             |  3 ++-
 app/models/chat.rb                                |  2 ++
 app/models/chat/infomaniak.rb                     |  2 +-
 app/models/chat/ollama.rb                         |  2 +-
 app/models/chunk.rb                               |  1 +
 app/models/current.rb                             |  1 +
 app/models/document/chunkable.rb                  |  2 +-
 app/models/user.rb                                |  4 ++++
 app/views/api_tokens/_form.html.erb               | 15 ++++++++-------
 app/views/api_tokens/index.html.erb               |  2 ++
 db/migrate/20241216181749_create_accounts.rb      |  6 +++---
 db/migrate/20241216181845_create_account_users.rb |  5 +++--
 .../20241216190106_add_chunkable_to_chunks.rb     |  7 +++++++
 ...202004_add_account_references_to_api_tokens.rb |  6 ++----
 ...6203009_add_account_references_to_documents.rb | 11 ++++++++++-
 ...41218200949_add_account_references_to_chats.rb | 14 ++++++++++++++
 db/schema.rb                                      | 10 +++++++---
 20 files changed, 82 insertions(+), 28 deletions(-)
 create mode 100644 db/migrate/20241218200949_add_account_references_to_chats.rb

diff --git a/app/controllers/api/v1/completions_controller.rb b/app/controllers/api/v1/completions_controller.rb
index e900cb9..0a362ea 100644
--- a/app/controllers/api/v1/completions_controller.rb
+++ b/app/controllers/api/v1/completions_controller.rb
@@ -6,7 +6,10 @@ class CompletionsController < ApplicationController
       include ActionController::Live
 
       def create
-        @chat = @user.chats.create
+        account = @user.accounts.find_or_create_by(uid: completion_params[:user]) if completion_params[:user].present?
+        account ||= @account
+
+        @chat = @user.chats.create(account:)
 
         if completion_params[:messages].present?
           completion_params[:messages].each do |message_params|
@@ -85,6 +88,7 @@ def completion_params
           :top_p,
           :top_k,
           :temperature,
+          :user,
           messages: [
             :content,
             :role
diff --git a/app/controllers/api_tokens_controller.rb b/app/controllers/api_tokens_controller.rb
index 7637077..1b42e68 100644
--- a/app/controllers/api_tokens_controller.rb
+++ b/app/controllers/api_tokens_controller.rb
@@ -6,7 +6,12 @@ def index
   end
 
   def create
-    @api_token = Current.user.api_tokens.new(api_token_params)
+    account = Current.user.accounts.find(api_token_params[:account_id])
+
+    @api_token = Current.user.api_tokens.new(
+      account:,
+      name: api_token_params[:name]
+    )
 
     if @api_token.save
       redirect_to api_tokens_path
@@ -24,6 +29,6 @@ def destroy
   private
 
   def api_token_params
-    params.require(:api_token).permit(:name)
+    params.require(:api_token).permit(:account_id, :name)
   end
 end
diff --git a/app/controllers/chats_controller.rb b/app/controllers/chats_controller.rb
index 5d9cf5d..aa1e6f3 100644
--- a/app/controllers/chats_controller.rb
+++ b/app/controllers/chats_controller.rb
@@ -5,7 +5,7 @@ def show
   end
 
   def create
-    @chat = Current.user.chats.create
+    @chat = Current.user.chats.create(account: Current.account)
     redirect_to @chat
   end
 
diff --git a/app/models/account.rb b/app/models/account.rb
index 0eb87e2..7b1e15a 100644
--- a/app/models/account.rb
+++ b/app/models/account.rb
@@ -3,6 +3,7 @@ class Account < ApplicationRecord
 
   has_many :account_users, dependent: :destroy
   has_many :users, through: :account_users
-  has_many :chunks
+  has_many :chats, dependent: :destroy
+  has_many :chunks, dependent: :destroy
   has_many :documents, dependent: :destroy
 end
diff --git a/app/models/chat.rb b/app/models/chat.rb
index 1a3583e..11d396d 100644
--- a/app/models/chat.rb
+++ b/app/models/chat.rb
@@ -3,6 +3,8 @@ class Chat < ApplicationRecord
   include Infomaniak
   include Ollama
 
+  belongs_to :account
+  belongs_to :user
   has_many :messages, dependent: :destroy
 
   def first_question
diff --git a/app/models/chat/infomaniak.rb b/app/models/chat/infomaniak.rb
index 2ef13b2..6165b42 100644
--- a/app/models/chat/infomaniak.rb
+++ b/app/models/chat/infomaniak.rb
@@ -32,7 +32,7 @@ def complete_with_infomaniak(&block)
     assistant_response = messages.create(role: "assistant", done: false, content: "", response_number:)
     assistant_response.broadcast_created
 
-    retrieved_chunks = Chunk.similarity_search(question, k: retrieval_fetch_k)
+    retrieved_chunks = Chunk.where(account:).similarity_search(question, k: retrieval_fetch_k)
     assistant_response.update(similar_document_ids: retrieved_chunks.pluck(:chunkable_id).uniq) if retrieved_chunks.any?
 
     if retrieved_chunks.any?
diff --git a/app/models/chat/ollama.rb b/app/models/chat/ollama.rb
index 94ea196..f536b3b 100644
--- a/app/models/chat/ollama.rb
+++ b/app/models/chat/ollama.rb
@@ -48,7 +48,7 @@ def complete_with_ollama(&block)
 
     check_llm = Chat.new_ollama_check_llm
 
-    search_results = Chunk.similarity_search(question, k: retrieval_fetch_k)
+    search_results = Chunk.where(account:).similarity_search(question, k: retrieval_fetch_k)
     search_results.each do |search_result|
       context_to_check = search_result.content
 
diff --git a/app/models/chunk.rb b/app/models/chunk.rb
index f39ea94..ba3e43b 100644
--- a/app/models/chunk.rb
+++ b/app/models/chunk.rb
@@ -1,5 +1,6 @@
 class Chunk < ApplicationRecord
   include Vectorizable
 
+  belongs_to :account
   belongs_to :chunkable, polymorphic: true
 end
diff --git a/app/models/current.rb b/app/models/current.rb
index 83baf84..dbc5270 100644
--- a/app/models/current.rb
+++ b/app/models/current.rb
@@ -2,6 +2,7 @@ class Current < ActiveSupport::CurrentAttributes
   attribute :session
   attribute :impersonated_user
 
+  def account = user.first_account
   def user = true_user
   def true_user = session&.user
 end
diff --git a/app/models/document/chunkable.rb b/app/models/document/chunkable.rb
index 639279e..92aebdf 100644
--- a/app/models/document/chunkable.rb
+++ b/app/models/document/chunkable.rb
@@ -19,7 +19,7 @@ def chunkify!
     self.chunks.destroy_all
 
     new_chunks.each do |new_chunk|
-      chunk = self.chunks.create!(content: new_chunk.dig(:text))
+      chunk = self.chunks.create!(account:, content: new_chunk.dig(:text))
       chunk.vectorize!
     end
   end
diff --git a/app/models/user.rb b/app/models/user.rb
index 85af29e..a797727 100644
--- a/app/models/user.rb
+++ b/app/models/user.rb
@@ -14,4 +14,8 @@ class User < ApplicationRecord
     format: { with: URI::MailTo::EMAIL_REGEXP }
 
   validates :password, presence: true, length: { minimum: 12 }
+
+  def first_account
+    accounts.order(:created_at).first
+  end
 end
diff --git a/app/views/api_tokens/_form.html.erb b/app/views/api_tokens/_form.html.erb
index 267683a..6b6ca12 100644
--- a/app/views/api_tokens/_form.html.erb
+++ b/app/views/api_tokens/_form.html.erb
@@ -1,15 +1,16 @@
-<%= form_with(model: api_token, class: "flex items-center space-x-4 h-12") do |form| %>
+<%= form_with(model: api_token, class: "flex items-center space-x-4") do |form| %>
   <%= form.label :name, "Name of your API token" %>
   <%= form.text_field :name,
                   required: true,
                   class:
-                    "
-                            resize-none rounded-xl border-none bg-slate-200 text-sm text-slate-900
-                            focus:outline-none focus:ring-2 focus:ring-blue-500 dark:bg-slate-800
-                            dark:text-slate-200 dark:placeholder-slate-400 dark:focus:ring-blue-500
-                            sm:text-base
-                          " %>
+                    "resize-none rounded-xl border-none bg-slate-200 text-sm text-slate-900 focus:outline-none focus:ring-2 focus:ring-blue-500 dark:bg-slate-800 dark:text-slate-200 dark:placeholder-slate-400 dark:focus:ring-blue-500 sm:text-base" %>
 
+  <%= form.collection_select :account_id,
+                         Current.user.accounts,
+                         :id,
+                         :name,
+                         prompt: true,
+                         required: true %>
   <%= form.submit "Generate",
               class:
                 "cursor-pointer rounded-lg bg-blue-700 px-4 py-2 text-sm font-medium text-slate-200 hover:bg-blue-800 focus:outline-none focus:ring-4 focus:ring-blue-300 dark:bg-blue-600 dark:hover:bg-blue-700 dark:focus:ring-blue-800 sm:text-base disabled:bg-gray-700" %>
diff --git a/app/views/api_tokens/index.html.erb b/app/views/api_tokens/index.html.erb
index 819b8a7..0ffd134 100644
--- a/app/views/api_tokens/index.html.erb
+++ b/app/views/api_tokens/index.html.erb
@@ -7,6 +7,8 @@
     <div class="flex items-center space-x-4">
       <h3 class="text-xl w-32 truncate"><%= api_token.name %></h3>
 
+      <p><%= api_token.account.name %></p>
+
       <%= button_to api_token, method: :delete, data: { turbo_confirm: "Are you sure?" } do %>
         <svg
           xmlns="http://www.w3.org/2000/svg"
diff --git a/db/migrate/20241216181749_create_accounts.rb b/db/migrate/20241216181749_create_accounts.rb
index 2bef022..abcc642 100644
--- a/db/migrate/20241216181749_create_accounts.rb
+++ b/db/migrate/20241216181749_create_accounts.rb
@@ -3,13 +3,13 @@ def up
     create_table :accounts do |t|
       t.string :name
       t.belongs_to :owner, null: false, foreign_key: { to_table: :users }
+      t.string :uid
 
       t.timestamps
     end
 
-    User.all.each do |user|
-      Account.create(name: user.email, owner: user)
-    end
+    owner = User.order(:created_at).first
+    account = Account.find_or_create_by(name: "First account", owner:)
   end
 
   def down
diff --git a/db/migrate/20241216181845_create_account_users.rb b/db/migrate/20241216181845_create_account_users.rb
index 250456d..3b67871 100644
--- a/db/migrate/20241216181845_create_account_users.rb
+++ b/db/migrate/20241216181845_create_account_users.rb
@@ -7,8 +7,9 @@ def up
       t.timestamps
     end
 
-    Account.all.each do |account|
-      account.account_users.create(user: account.owner)
+    account = Account.find_by(name: "First account")
+    User.all.each do |user|
+      user.account_users.find_or_create_by(account:)
     end
   end
 
diff --git a/db/migrate/20241216190106_add_chunkable_to_chunks.rb b/db/migrate/20241216190106_add_chunkable_to_chunks.rb
index 6bac179..f3f9e83 100644
--- a/db/migrate/20241216190106_add_chunkable_to_chunks.rb
+++ b/db/migrate/20241216190106_add_chunkable_to_chunks.rb
@@ -5,11 +5,18 @@ def up
     remove_index :chunks, :document_id
     rename_column :chunks, :document_id, :chunkable_id
     add_index :chunks, [ :chunkable_type, :chunkable_id ]
+
     add_reference :chunks, :account, null: true, foreign_key: true
+
+    account = Account.find_by(name: "First account")
+    Chunk.update_all(account_id: account.id)
+
+    change_column_null :chunks, :account_id, false
   end
 
   def down
     remove_reference :chunks, :account
+
     remove_index :chunks, [ :chunkable_type, :chunkable_id ]
     rename_column :chunks, :chunkable_id, :document_id
     add_index :chunks, :document_id
diff --git a/db/migrate/20241216202004_add_account_references_to_api_tokens.rb b/db/migrate/20241216202004_add_account_references_to_api_tokens.rb
index 8cc2ce3..51710fb 100644
--- a/db/migrate/20241216202004_add_account_references_to_api_tokens.rb
+++ b/db/migrate/20241216202004_add_account_references_to_api_tokens.rb
@@ -2,10 +2,8 @@ class AddAccountReferencesToApiTokens < ActiveRecord::Migration[8.0]
   def up
     add_reference :api_tokens, :account, null: true, foreign_key: true
 
-    ApiToken.all.each do |api_token|
-      account = Account.find_by(owner: api_token.user)
-      api_token.update(account:)
-    end
+    account = Account.find_by(name: "First account")
+    ApiToken.update_all(account_id: account.id)
 
     change_column_null :api_tokens, :account_id, false
   end
diff --git a/db/migrate/20241216203009_add_account_references_to_documents.rb b/db/migrate/20241216203009_add_account_references_to_documents.rb
index ae482f2..e7b2ca7 100644
--- a/db/migrate/20241216203009_add_account_references_to_documents.rb
+++ b/db/migrate/20241216203009_add_account_references_to_documents.rb
@@ -1,5 +1,14 @@
 class AddAccountReferencesToDocuments < ActiveRecord::Migration[8.0]
-  def change
+  def up
     add_reference :documents, :account, null: true, foreign_key: true
+
+    account = Account.find_by(name: "First account")
+    Document.update_all(account_id: account.id)
+
+    change_column_null :documents, :account_id, false
+  end
+
+  def down
+    remove_reference :documents, :account
   end
 end
diff --git a/db/migrate/20241218200949_add_account_references_to_chats.rb b/db/migrate/20241218200949_add_account_references_to_chats.rb
new file mode 100644
index 0000000..25fc6be
--- /dev/null
+++ b/db/migrate/20241218200949_add_account_references_to_chats.rb
@@ -0,0 +1,14 @@
+class AddAccountReferencesToChats < ActiveRecord::Migration[8.0]
+  def up
+    add_reference :chats, :account, null: true, foreign_key: true
+
+    account = Account.find_by(name: "First account")
+    Chat.update_all(account_id: account.id)
+
+    change_column_null :chats, :account_id, false
+  end
+
+  def down
+    remove_reference :chats, :account
+  end
+end
diff --git a/db/schema.rb b/db/schema.rb
index a852efc..7f786da 100644
--- a/db/schema.rb
+++ b/db/schema.rb
@@ -10,7 +10,7 @@
 #
 # It's strongly recommended that you check this file into your version control system.
 
-ActiveRecord::Schema[8.0].define(version: 2024_12_16_213448) do
+ActiveRecord::Schema[8.0].define(version: 2024_12_18_200949) do
   # These are extensions that must be enabled in order to support this database
   enable_extension "pg_catalog.plpgsql"
   enable_extension "vector"
@@ -27,6 +27,7 @@
   create_table "accounts", force: :cascade do |t|
     t.string "name"
     t.bigint "owner_id", null: false
+    t.string "uid"
     t.datetime "created_at", null: false
     t.datetime "updated_at", null: false
     t.index ["owner_id"], name: "index_accounts_on_owner_id"
@@ -84,6 +85,8 @@
     t.datetime "created_at", null: false
     t.datetime "updated_at", null: false
     t.bigint "user_id", null: false
+    t.bigint "account_id", null: false
+    t.index ["account_id"], name: "index_chats_on_account_id"
     t.index ["user_id"], name: "index_chats_on_user_id"
   end
 
@@ -94,7 +97,7 @@
     t.datetime "created_at", null: false
     t.datetime "updated_at", null: false
     t.string "chunkable_type"
-    t.bigint "account_id"
+    t.bigint "account_id", null: false
     t.index ["account_id"], name: "index_chunks_on_account_id"
     t.index ["chunkable_type", "chunkable_id"], name: "index_chunks_on_chunkable_type_and_chunkable_id"
   end
@@ -120,7 +123,7 @@
     t.string "uid"
     t.string "content_hash"
     t.string "purpose"
-    t.bigint "account_id"
+    t.bigint "account_id", null: false
     t.index ["account_id"], name: "index_documents_on_account_id"
     t.index ["author_id"], name: "index_documents_on_author_id"
   end
@@ -297,6 +300,7 @@
   add_foreign_key "active_storage_variant_records", "active_storage_blobs", column: "blob_id"
   add_foreign_key "api_tokens", "accounts"
   add_foreign_key "api_tokens", "users"
+  add_foreign_key "chats", "accounts"
   add_foreign_key "chats", "users"
   add_foreign_key "chunks", "accounts"
   add_foreign_key "chunks", "documents", column: "chunkable_id"

From c569342326fbd5b9fc2bf4294ef2a1c535183e12 Mon Sep 17 00:00:00 2001
From: Cyril Blaecke <1692273+cbldev@users.noreply.github.com>
Date: Thu, 19 Dec 2024 20:08:47 +0000
Subject: [PATCH 4/6] Prepare Infomaniak compatibility and multi-accounts

---
 app/controllers/api/v1/application_controller.rb |  6 +++++-
 app/controllers/api/v1/completions_controller.rb | 15 +++++++++------
 app/controllers/documents_controller.rb          |  2 +-
 app/controllers/messages_controller.rb           |  2 ++
 app/models/chat/completionable.rb                |  6 +++---
 app/models/chat/infomaniak.rb                    | 12 ++++++------
 app/models/chat/ollama.rb                        |  2 +-
 7 files changed, 27 insertions(+), 18 deletions(-)

diff --git a/app/controllers/api/v1/application_controller.rb b/app/controllers/api/v1/application_controller.rb
index 773896b..3866c26 100644
--- a/app/controllers/api/v1/application_controller.rb
+++ b/app/controllers/api/v1/application_controller.rb
@@ -12,8 +12,12 @@ class ApplicationController < ApplicationController
       def verify_api_key
         authenticate_or_request_with_http_token do |token, _options|
           api_token = ApiToken.find_by(token:)
-          @account = api_token&.account
           @user = api_token&.user
+          if params[:user].present?
+            @account = @user.accounts.create_with(owner: @user).find_or_create_by(uid: params[:user])
+          else
+            @account = api_token&.account
+          end
         end
       end
     end
diff --git a/app/controllers/api/v1/completions_controller.rb b/app/controllers/api/v1/completions_controller.rb
index 0a362ea..65ed379 100644
--- a/app/controllers/api/v1/completions_controller.rb
+++ b/app/controllers/api/v1/completions_controller.rb
@@ -6,10 +6,13 @@ class CompletionsController < ApplicationController
       include ActionController::Live
 
       def create
-        account = @user.accounts.find_or_create_by(uid: completion_params[:user]) if completion_params[:user].present?
-        account ||= @account
+        @chat = @user.chats.create(account: @account)
 
-        @chat = @user.chats.create(account:)
+        max_tokens = (completion_params[:max_tokens] || ENV["LLM_MAX_TOKENS"]).to_i
+        model = (completion_params[:model] || ENV["LLM_MODEL"]).to_i
+        temperature = (completion_params[:temperature] || ENV["LLM_TEMPERATURE"]).to_f
+        top_k = (completion_params[:top_k] || ENV["LLM_TOP_K"]).to_f
+        top_p = (completion_params[:top_p] || ENV["LLM_TOP_P"]).to_f
 
         if completion_params[:messages].present?
           completion_params[:messages].each do |message_params|
@@ -30,7 +33,7 @@ def create
         stream_response = ActiveModel::Type::Boolean.new.cast(params[:stream]) || false
 
         if stream_response
-          chat_response = @chat.complete do |stream|
+          chat_response = @chat.complete(model:, temperature:, top_k:, top_p:, max_tokens:) do |stream|
             stream_content = stream.dig("delta", "content")
             next unless stream_content
             done = !!stream.dig("finish_reason")
@@ -56,7 +59,7 @@ def create
             end
           end
         else
-          chat_response = @chat.complete
+          chat_response = @chat.complete(model:, temperature:, top_k:, top_p:, max_tokens:)
           render json: {
             choices: [
               finish_reason: "stop",
@@ -85,8 +88,8 @@ def completion_params
           :model,
           :prompt,
           :stream,
-          :top_p,
           :top_k,
+          :top_p,
           :temperature,
           :user,
           messages: [
diff --git a/app/controllers/documents_controller.rb b/app/controllers/documents_controller.rb
index 9ae0cee..06255d7 100644
--- a/app/controllers/documents_controller.rb
+++ b/app/controllers/documents_controller.rb
@@ -67,6 +67,6 @@ def set_document
 
     # Only allow a list of trusted parameters through.
     def document_params
-      params.require(:document).permit(:title, :file)
+      params.require(:document).permit(:title, :file).merge(account: Current.account)
     end
 end
diff --git a/app/controllers/messages_controller.rb b/app/controllers/messages_controller.rb
index 587f26a..4a57a6a 100644
--- a/app/controllers/messages_controller.rb
+++ b/app/controllers/messages_controller.rb
@@ -1,3 +1,5 @@
+# frozen_string_literal: true
+
 class MessagesController < ApplicationController
   include ActionView::RecordIdentifier
 
diff --git a/app/models/chat/completionable.rb b/app/models/chat/completionable.rb
index 2706bef..0aa5063 100644
--- a/app/models/chat/completionable.rb
+++ b/app/models/chat/completionable.rb
@@ -7,12 +7,12 @@ def ai_provider
     end
   end
 
-  def complete(&block)
+  def complete(model:, temperature:, top_k:, top_p:, max_tokens:, &block)
     case Chat.ai_provider
     when "ollama"
-      complete_with_ollama(&block)
+      complete_with_ollama(top_k:, top_p:, &block)
     when "infomaniak"
-      complete_with_infomaniak(&block)
+      complete_with_infomaniak(model:, temperature:, top_p:, max_tokens:, &block)
     end
   end
 
diff --git a/app/models/chat/infomaniak.rb b/app/models/chat/infomaniak.rb
index 6165b42..df30a2c 100644
--- a/app/models/chat/infomaniak.rb
+++ b/app/models/chat/infomaniak.rb
@@ -2,7 +2,7 @@ module Chat::Infomaniak
   extend ActiveSupport::Concern
 
   class_methods do
-    def new_infomaniak_llm
+    def new_infomaniak_llm(model: ENV.fetch("LLM_MODEL", "mixtral"))
       Langchain::LLM::OpenAI.new(
         api_key: ENV.fetch("INFOMANIAK_API_KEY", ""),
         llm_options: {
@@ -10,8 +10,8 @@ def new_infomaniak_llm
           uri_base: "https://api.infomaniak.com/1/ai/#{ENV.fetch("INFOMANIAK_PRODUCT_ID", "")}/openai"
         },
         default_options: {
-          chat_completion_model_name: ENV.fetch("LLM_MODEL", "mixtral"),
-          completion_model_name: ENV.fetch("LLM_MODEL", "mixtral"),
+          chat_completion_model_name: model,
+          completion_model_name: model,
           embeddings_model_name: ENV.fetch("EMBEDDING_MODEL", "bge_multilingual_gemma2"),
           temperature: ENV.fetch("LLM_TEMPERATURE", 0.1).to_f,
           num_ctx: ENV.fetch("LLM_NUM_CTX", 4_096).to_i
@@ -20,7 +20,7 @@ def new_infomaniak_llm
     end
   end
 
-  def complete_with_infomaniak(&block)
+  def complete_with_infomaniak(model:, temperature:, top_p:, max_tokens:, &block)
     question = last_question
 
     context = []
@@ -49,8 +49,8 @@ def complete_with_infomaniak(&block)
 
     messages_for_assistant = messages_for_assistant.flatten
 
-    llm = Chat.new_infomaniak_llm
-    llm_response = llm.chat(messages: messages_for_assistant, &block)
+    llm = Chat.new_infomaniak_llm(model:)
+    llm_response = llm.chat(messages: messages_for_assistant, temperature:, top_p:, max_tokens:, &block)
 
     assistant_response.update(done: true, content: llm_response.completion)
     assistant_response
diff --git a/app/models/chat/ollama.rb b/app/models/chat/ollama.rb
index f536b3b..1d8c55c 100644
--- a/app/models/chat/ollama.rb
+++ b/app/models/chat/ollama.rb
@@ -32,7 +32,7 @@ def new_ollama_check_llm
     end
   end
 
-  def complete_with_ollama(&block)
+  def complete_with_ollama(top_k:, top_p:, &block)
     question = last_question
 
     context = []

From bd504556b0bf3dce626a2a6f748fc4a68e6103c1 Mon Sep 17 00:00:00 2001
From: Cyril Blaecke <1692273+cbldev@users.noreply.github.com>
Date: Thu, 19 Dec 2024 20:43:14 +0000
Subject: [PATCH 5/6] Prepare Infomaniak compatibility and multi-accounts

---
 app/controllers/api/v1/completions_controller.rb | 10 +++++-----
 app/models/chat/completionable.rb                | 16 +++++++---------
 app/models/chat/infomaniak.rb                    |  2 +-
 3 files changed, 13 insertions(+), 15 deletions(-)

diff --git a/app/controllers/api/v1/completions_controller.rb b/app/controllers/api/v1/completions_controller.rb
index 65ed379..e389d54 100644
--- a/app/controllers/api/v1/completions_controller.rb
+++ b/app/controllers/api/v1/completions_controller.rb
@@ -8,11 +8,11 @@ class CompletionsController < ApplicationController
       def create
         @chat = @user.chats.create(account: @account)
 
-        max_tokens = (completion_params[:max_tokens] || ENV["LLM_MAX_TOKENS"]).to_i
-        model = (completion_params[:model] || ENV["LLM_MODEL"]).to_i
-        temperature = (completion_params[:temperature] || ENV["LLM_TEMPERATURE"]).to_f
-        top_k = (completion_params[:top_k] || ENV["LLM_TOP_K"]).to_f
-        top_p = (completion_params[:top_p] || ENV["LLM_TOP_P"]).to_f
+        max_tokens = completion_params[:max_tokens]&.to_i
+        model = completion_params[:model]
+        temperature = completion_params[:temperature]&.to_f
+        top_k = completion_params[:top_k]&.to_f
+        top_p = completion_params[:top_p]&.to_f
 
         if completion_params[:messages].present?
           completion_params[:messages].each do |message_params|
diff --git a/app/models/chat/completionable.rb b/app/models/chat/completionable.rb
index 0aa5063..b772d30 100644
--- a/app/models/chat/completionable.rb
+++ b/app/models/chat/completionable.rb
@@ -7,7 +7,13 @@ def ai_provider
     end
   end
 
-  def complete(model:, temperature:, top_k:, top_p:, max_tokens:, &block)
+  def complete(model: nil, temperature: nil, top_k: nil, top_p: nil, max_tokens: nil, &block)
+    max_tokens ||= ENV.fetch("LLM_MAX_TOKENS", 1_024).to_i
+    model ||= ENV.fetch("LLM_MODEL", "mixtral")
+    temperature ||= ENV.fetch("LLM_TEMPERATURE", 0.1).to_f
+    top_k ||= ENV.fetch("LLM_TOP_K", 40).to_f
+    top_p ||= ENV.fetch("LLM_TOP_P", 0.9).to_f
+
     case Chat.ai_provider
     when "ollama"
       complete_with_ollama(top_k:, top_p:, &block)
@@ -25,12 +31,4 @@ def retrieval_fetch_k
   def system_prompt
     ENV.fetch("RAG_SYSTEM_TEMPLATE", "You are Nosia. You are a helpful assistant.")
   end
-
-  def top_k
-    ENV.fetch("LLM_TOP_K", 40).to_f
-  end
-
-  def top_p
-    ENV.fetch("LLM_TOP_P", 0.9).to_f
-  end
 end
diff --git a/app/models/chat/infomaniak.rb b/app/models/chat/infomaniak.rb
index df30a2c..904f62b 100644
--- a/app/models/chat/infomaniak.rb
+++ b/app/models/chat/infomaniak.rb
@@ -2,7 +2,7 @@ module Chat::Infomaniak
   extend ActiveSupport::Concern
 
   class_methods do
-    def new_infomaniak_llm(model: ENV.fetch("LLM_MODEL", "mixtral"))
+    def new_infomaniak_llm(model:)
       Langchain::LLM::OpenAI.new(
         api_key: ENV.fetch("INFOMANIAK_API_KEY", ""),
         llm_options: {

From 139b94dc21511f0b386180fa77108e0b4879ccdc Mon Sep 17 00:00:00 2001
From: Cyril Blaecke <1692273+cbldev@users.noreply.github.com>
Date: Thu, 19 Dec 2024 21:53:52 +0000
Subject: [PATCH 6/6] Prepare Infomaniak compatibility and multi-accounts

---
 app/models/chat/completionable.rb | 32 ++++++++++++++++------
 app/models/chat/infomaniak.rb     | 44 +++++++++++++++++--------------
 2 files changed, 48 insertions(+), 28 deletions(-)

diff --git a/app/models/chat/completionable.rb b/app/models/chat/completionable.rb
index b772d30..53c0453 100644
--- a/app/models/chat/completionable.rb
+++ b/app/models/chat/completionable.rb
@@ -8,22 +8,38 @@ def ai_provider
   end
 
   def complete(model: nil, temperature: nil, top_k: nil, top_p: nil, max_tokens: nil, &block)
-    max_tokens ||= ENV.fetch("LLM_MAX_TOKENS", 1_024).to_i
-    model ||= ENV.fetch("LLM_MODEL", "mixtral")
-    temperature ||= ENV.fetch("LLM_TEMPERATURE", 0.1).to_f
-    top_k ||= ENV.fetch("LLM_TOP_K", 40).to_f
-    top_p ||= ENV.fetch("LLM_TOP_P", 0.9).to_f
+    options = default_options.merge(
+      {
+        model:,
+        temperature:,
+        top_k:,
+        top_p:,
+        max_tokens:
+      }.compact_blank
+    )
 
-    case Chat.ai_provider
+    case self.class.ai_provider
     when "ollama"
-      complete_with_ollama(top_k:, top_p:, &block)
+      complete_with_ollama(top_k: options[:top_k], top_p: options[:top_p], &block)
     when "infomaniak"
-      complete_with_infomaniak(model:, temperature:, top_p:, max_tokens:, &block)
+      complete_with_infomaniak(model: options[:model], temperature: options[:temperature], top_p: options[:top_p], max_tokens: options[:max_tokens], &block)
+    else
+      raise "Unsupported AI provider: #{self.class.ai_provider}"
     end
   end
 
   private
 
+  def default_options
+    {
+      max_tokens: ENV.fetch("LLM_MAX_TOKENS", 1_024).to_i,
+      model: ENV.fetch("LLM_MODEL", "mixtral"),
+      temperature: ENV.fetch("LLM_TEMPERATURE", 0.1).to_f,
+      top_k: ENV.fetch("LLM_TOP_K", 40).to_f,
+      top_p: ENV.fetch("LLM_TOP_P", 0.9).to_f
+    }
+  end
+
   def retrieval_fetch_k
     ENV.fetch("RETRIEVAL_FETCH_K", 4)
   end
diff --git a/app/models/chat/infomaniak.rb b/app/models/chat/infomaniak.rb
index 904f62b..5eb2946 100644
--- a/app/models/chat/infomaniak.rb
+++ b/app/models/chat/infomaniak.rb
@@ -23,31 +23,15 @@ def new_infomaniak_llm(model:)
   def complete_with_infomaniak(model:, temperature:, top_p:, max_tokens:, &block)
     question = last_question
 
-    context = []
-
-    messages_for_assistant = []
-    messages_for_assistant << { role: "system", content: system_prompt }
-    messages_for_assistant << messages_hash if messages_hash.any?
+    messages_for_assistant = prepare_messages(question)
 
     assistant_response = messages.create(role: "assistant", done: false, content: "", response_number:)
     assistant_response.broadcast_created
 
-    retrieved_chunks = Chunk.where(account:).similarity_search(question, k: retrieval_fetch_k)
-    assistant_response.update(similar_document_ids: retrieved_chunks.pluck(:chunkable_id).uniq) if retrieved_chunks.any?
-
-    if retrieved_chunks.any?
-      context << retrieved_chunks.map(&:content).join("\n\n")
-      context = context.join("\n\n")
-
-      prompt = ENV.fetch("QUERY_PROMPT_TEMPLATE", "Nosia helpful content: {context}\n\n---\n\nNow here is the question you need to answer.\n\nQuestion: {question}")
-      prompt = prompt.gsub("{context}", context)
-      prompt = prompt.gsub("{question}", question)
+    context = fetch_context(question)
+    prompt = generate_prompt(context, question) if context.present?
 
-      messages_for_assistant.pop
-      messages_for_assistant << { role: "user", content: prompt }
-    end
-
-    messages_for_assistant = messages_for_assistant.flatten
+    messages_for_assistant[-1][:content] = prompt if prompt
 
     llm = Chat.new_infomaniak_llm(model:)
     llm_response = llm.chat(messages: messages_for_assistant, temperature:, top_p:, max_tokens:, &block)
@@ -55,4 +39,24 @@ def complete_with_infomaniak(model:, temperature:, top_p:, max_tokens:, &block)
     assistant_response.update(done: true, content: llm_response.completion)
     assistant_response
   end
+
+  private
+
+  def prepare_messages(question)
+    messages = [ { role: "system", content: system_prompt } ]
+    messages << messages_hash if messages_hash.any?
+    messages.flatten
+  end
+
+  def fetch_context(question)
+    retrieved_chunks = Chunk.where(account:).similarity_search(question, k: retrieval_fetch_k)
+    return unless retrieved_chunks.any?
+
+    retrieved_chunks.map(&:content).join("\n\n")
+  end
+
+  def generate_prompt(context, question)
+    prompt_template = ENV.fetch("QUERY_PROMPT_TEMPLATE", "Nosia helpful content: {context}\n\n---\n\nNow here is the question you need to answer.\n\nQuestion: {question}")
+    prompt_template.gsub("{context}", context).gsub("{question}", question)
+  end
 end