From a91a9d139e0d8445b3863245be9ab04286809ab4 Mon Sep 17 00:00:00 2001 From: Cyril Blaecke <1692273+cbldev@users.noreply.github.com> Date: Tue, 17 Dec 2024 22:17:07 +0000 Subject: [PATCH 1/6] Add customized ruby-openai for Infomaniak --- Gemfile | 1 + Gemfile.lock | 30 ++++++++++++++++++++++-------- 2 files changed, 23 insertions(+), 8 deletions(-) diff --git a/Gemfile b/Gemfile index 2fbd6ca..e6f74b9 100644 --- a/Gemfile +++ b/Gemfile @@ -87,6 +87,7 @@ gem "passwordless" gem "pundit" gem "reverse_markdown" gem "rss" +gem "ruby-openai", github: "nosia-ai/ruby-openai" gem "sequel", "~> 5.68.0" gem "solid_queue" gem "thruster" diff --git a/Gemfile.lock b/Gemfile.lock index e7a8c70..d7df628 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -1,3 +1,12 @@ +GIT + remote: https://github.com/nosia-ai/ruby-openai.git + revision: 6db7453d12b1d562866c2d78ceb1cf7cf3696dc8 + specs: + ruby-openai (7.3.1) + event_stream_parser (>= 0.3.0, < 2.0.0) + faraday (>= 1) + faraday-multipart (>= 1) + GIT remote: https://github.com/patterns-ai-core/langchainrb.git revision: 5403cc5ac66aa0d99114827cb4ce15233551e540 @@ -162,12 +171,15 @@ GEM erubi (1.13.0) et-orbi (1.2.11) tzinfo - faraday (2.12.0) - faraday-net_http (>= 2.0, < 3.4) + event_stream_parser (1.0.0) + faraday (2.12.2) + faraday-net_http (>= 2.0, < 3.5) json logger - faraday-net_http (3.3.0) - net-http + faraday-multipart (1.0.4) + multipart-post (~> 2) + faraday-net_http (3.4.0) + net-http (>= 0.5.0) feedjira (3.2.3) loofah (>= 2.3.1, < 3) sax-machine (>= 1.0, < 2) @@ -193,7 +205,7 @@ GEM jbuilder (2.13.0) actionview (>= 5.0.0) activesupport (>= 5.0.0) - json (2.7.5) + json (2.9.0) json-schema (4.3.1) addressable (>= 2.8) kamal (2.3.0) @@ -208,7 +220,7 @@ GEM thor (~> 1.3) zeitwerk (>= 2.6.18, < 3.0) language_server-protocol (3.17.0.3) - logger (1.6.1) + logger (1.6.3) loofah (2.23.1) crass (~> 1.0.2) nokogiri (>= 1.12.0) @@ -226,9 +238,10 @@ GEM mini_mime (1.1.5) minitest (5.25.1) msgpack (1.7.3) + multipart-post (2.4.1) neighbor (0.5.0) activerecord (>= 7) - net-http (0.4.1) + net-http (0.6.0) uri net-imap (0.5.1) date @@ -428,7 +441,7 @@ GEM tzinfo (2.0.6) concurrent-ruby (~> 1.0) unicode-display_width (2.6.0) - uri (1.0.1) + uri (1.0.2) useragent (0.16.10) web-console (4.2.1) actionview (>= 6.0.0) @@ -487,6 +500,7 @@ DEPENDENCIES reverse_markdown rss rubocop-rails-omakase + ruby-openai! selenium-webdriver sequel (~> 5.68.0) solid_queue From c0c5840a69dae4bc161b1efb64ad1c5bfb9e3eb5 Mon Sep 17 00:00:00 2001 From: Cyril Blaecke <1692273+cbldev@users.noreply.github.com> Date: Tue, 17 Dec 2024 22:31:12 +0000 Subject: [PATCH 2/6] Prepare Infomaniak compatibility --- .../api/v1/application_controller.rb | 21 ++ .../api/v1/completions_controller.rb | 238 +++++------------- app/jobs/get_ai_response_job.rb | 79 +----- app/models/account.rb | 8 + app/models/account_user.rb | 4 + app/models/api_token.rb | 1 + app/models/chat.rb | 8 + app/models/chat/completionable.rb | 36 +++ app/models/chat/infomaniak.rb | 58 +++++ app/models/chat/ollama.rb | 90 +++++++ app/models/chunk.rb | 2 +- app/models/document.rb | 1 + app/models/document/chunkable.rb | 2 +- app/models/user.rb | 2 + config/initializers/langchainrb_rails.rb | 48 +++- .../20241114182820_add_done_to_messages.rb | 5 + db/migrate/20241216181749_create_accounts.rb | 18 ++ .../20241216181845_create_account_users.rb | 18 ++ .../20241216190106_add_chunkable_to_chunks.rb | 18 ++ ...04_add_account_references_to_api_tokens.rb | 16 ++ ...009_add_account_references_to_documents.rb | 5 + .../20241216213448_change_vector_limit.rb | 15 ++ db/schema.rb | 42 +++- 23 files changed, 459 insertions(+), 276 deletions(-) create mode 100644 app/controllers/api/v1/application_controller.rb create mode 100644 app/models/account.rb create mode 100644 app/models/account_user.rb create mode 100644 app/models/chat/completionable.rb create mode 100644 app/models/chat/infomaniak.rb create mode 100644 app/models/chat/ollama.rb create mode 100644 db/migrate/20241114182820_add_done_to_messages.rb create mode 100644 db/migrate/20241216181749_create_accounts.rb create mode 100644 db/migrate/20241216181845_create_account_users.rb create mode 100644 db/migrate/20241216190106_add_chunkable_to_chunks.rb create mode 100644 db/migrate/20241216202004_add_account_references_to_api_tokens.rb create mode 100644 db/migrate/20241216203009_add_account_references_to_documents.rb create mode 100644 db/migrate/20241216213448_change_vector_limit.rb diff --git a/app/controllers/api/v1/application_controller.rb b/app/controllers/api/v1/application_controller.rb new file mode 100644 index 0000000..773896b --- /dev/null +++ b/app/controllers/api/v1/application_controller.rb @@ -0,0 +1,21 @@ +# frozen_string_literal: true + +module Api + module V1 + class ApplicationController < ApplicationController + allow_unauthenticated_access + skip_before_action :verify_authenticity_token + before_action :verify_api_key + + private + + def verify_api_key + authenticate_or_request_with_http_token do |token, _options| + api_token = ApiToken.find_by(token:) + @account = api_token&.account + @user = api_token&.user + end + end + end + end +end diff --git a/app/controllers/api/v1/completions_controller.rb b/app/controllers/api/v1/completions_controller.rb index 71978ae..e900cb9 100644 --- a/app/controllers/api/v1/completions_controller.rb +++ b/app/controllers/api/v1/completions_controller.rb @@ -5,145 +5,78 @@ module V1 class CompletionsController < ApplicationController include ActionController::Live - allow_unauthenticated_access only: [ :create ] - skip_before_action :verify_authenticity_token - before_action :verify_api_key - before_action :parse_params, only: [ :create ] - def create - @llm = LangchainrbRails.config.vectorsearch.llm - @uuid = SecureRandom.uuid - - if @stream - stream_response - else - non_stream_response - end - rescue StandardError => e - handle_error(e) - end - - private - - def build_check_llm - Langchain::LLM::Ollama.new( - url: ENV.fetch("OLLAMA_BASE_URL", "http://localhost:11434"), - api_key: ENV.fetch("OLLAMA_API_KEY", ""), - default_options: { - chat_completion_model_name: ENV.fetch("CHECK_MODEL", "bespoke-minicheck"), - completion_model_name: ENV.fetch("CHECK_MODEL", "bespoke-minicheck"), - temperature: ENV.fetch("LLM_TEMPERATURE", 0.1).to_f, - num_ctx: ENV.fetch("LLM_NUM_CTX", 4_096).to_i - } - ) - end - - def build_context(checked_chunks) - checked_chunks.map(&:content).join("\n") - end - - def build_messages(question, context) - system_message = { - role: "system", - content: ENV.fetch("RAG_SYSTEM_TEMPLATE", "You are Nosia. You are a helpful assistant.") - } - - user_content = if context.present? - template = ENV.fetch( - "QUERY_PROMPT_TEMPLATE", - "Nosia helpful content: {context}\n\n---\n\nNow here is the question you need to answer.\n\nQuestion: {question}" + @chat = @user.chats.create + + if completion_params[:messages].present? + completion_params[:messages].each do |message_params| + @chat.messages.create( + content: message_params[:content], + response_number: @chat.messages.count, + role: message_params[:role] + ) + end + elsif completion_params[:prompt].present? + @chat.messages.create( + content: message_params[:prompt], + response_number: @chat.messages.count, + role: "user" ) - template.gsub("{context}", context).gsub("{question}", question) - else - question end - user_message = { role: "user", content: user_content } - - [ system_message, user_message ] - end - - def check_context(question) - k = ENV.fetch("RETRIEVAL_FETCH_K", 4) - - check_llm = build_check_llm - checked_chunks = [] - - search_results = Chunk.similarity_search(question, k:) - search_results.each do |search_result| - context_to_check = search_result.content - check_message = [ { role: "user", content: "Document: #{context_to_check}\nClaim: #{question}" } ] - - check_llm.chat(messages: check_message, top_k: @top_k, top_p: @top_p) do |stream| - check_response = stream.raw_response.dig("message", "content") - - if check_response && check_response.eql?("Yes") - checked_chunks << search_result + stream_response = ActiveModel::Type::Boolean.new.cast(params[:stream]) || false + + if stream_response + chat_response = @chat.complete do |stream| + stream_content = stream.dig("delta", "content") + next unless stream_content + done = !!stream.dig("finish_reason") + if done + response.stream.write("data: [DONE]\n\n") + else + data = { + choices: [ + delta: { + content: stream_content, + role: "assistant" + }, + finish_reason: done ? "stop" : nil, + index: 0 + ], + created: Time.now.to_i, + id: "chatcmpl-#{@chat.id}", + model: "nosia:#{ENV["LLM_MODEL"]}", + object: "chat.completion.chunk", + system_fingerprint: "fp_nosia" + } + response.stream.write("data: #{data.to_json}\n\n") end end - end - - checked_chunks - rescue StandardError => e - handle_error(e) - [] - end - - def handle_error(e) - error_message = { - error: { - message: e.message, - backtrace: Rails.env.development? ? e.backtrace : nil - } - } - - if @stream - response.stream.write("data: #{error_message.to_json}\n\n") - response.stream.write("data: [DONE]\n\n") else - render json: error_message, status: :internal_server_error + chat_response = @chat.complete + render json: { + choices: [ + finish_reason: "stop", + index: 0, + message: { + content: chat_response.content, + role: "assistant" + } + ], + created: Time.now.to_i, + id: "chatcmpl-#{@uuid}", + model: "nosia:#{ENV["LLM_MODEL"]}", + object: "chat.completion", + system_fingerprint: "fp_nosia" + } end ensure - response.stream.close if @stream + response.stream.close if response.stream.respond_to?(:close) end - def non_stream_response - checked_chunks = check_context(@question) - context = build_context(checked_chunks) - messages = build_messages(@question, context) - - content = "" - data = {} - - @llm.chat(messages:, top_k: @top_k, top_p: @top_p) do |stream| - stream_content = stream.raw_response.dig("message", "content") - content += stream_content if stream_content - - done = !!stream.raw_response["done"] - - if done - data = { - choices: [ - finish_reason: "stop", - index: 0, - message: { - content: content, - role: "assistant" - } - ], - created: Time.now.to_i, - id: "chatcmpl-#{@uuid}", - model: "nosia:#{ENV["LLM_MODEL"]}", - object: "chat.completion", - system_fingerprint: "fp_nosia" - } - end - end - - render json: data - end + private - def parse_params + def completion_params params.permit( :max_tokens, :model, @@ -160,57 +93,6 @@ def parse_params chat: {}, completion: {}, ) - - @question = params[:prompt] || params.dig(:messages, 0, :content) - @stream = ActiveModel::Type::Boolean.new.cast(params[:stream]) || false - @top_p = params[:top_p].to_f || ENV.fetch("LLM_TOP_P", 0.9).to_f - @top_k = params[:top_k].to_i || ENV.fetch("LLM_TOP_K", 40) - end - - def stream_response - checked_chunks = check_context(@question) - context = build_context(checked_chunks) - messages = build_messages(@question, context) - - response.headers["Content-Type"] = "text/event-stream" - - @llm.chat(messages:, top_k: @top_k, top_p: @top_p) do |stream| - stream_content = stream.raw_response.dig("message", "content") - next unless stream_content - - done = !!stream.raw_response["done"] - - if done - response.stream.write("data: [DONE]\n\n") - else - data = { - choices: [ - delta: { - content: stream_content, - role: "assistant" - }, - finish_reason: done ? "stop" : nil, - index: 0 - ], - created: Time.now.to_i, - id: "chatcmpl-#{@uuid}", - model: "nosia:#{ENV["LLM_MODEL"]}", - object: "chat.completion.chunk", - system_fingerprint: "fp_nosia" - } - - response.stream.write("data: #{data.to_json}\n\n") - end - end - ensure - response.stream.close if response.stream.respond_to?(:close) - end - - def verify_api_key - authenticate_or_request_with_http_token do |token, _options| - api_token = ApiToken.find_by(token:) - @user = api_token&.user - end end end end diff --git a/app/jobs/get_ai_response_job.rb b/app/jobs/get_ai_response_job.rb index c20b506..80510c5 100644 --- a/app/jobs/get_ai_response_job.rb +++ b/app/jobs/get_ai_response_job.rb @@ -3,83 +3,6 @@ class GetAiResponseJob < ApplicationJob def perform(chat_id) chat = Chat.find(chat_id) - call_ollama(chat:) - end - - private - - def create_message(chat:) - response_number = chat.messages.count - message = chat.messages.create(role: "assistant", content: "", response_number:) - message.broadcast_created - message - end - - def call_ollama(chat:) - top_k = ENV.fetch("LLM_TOP_K", 40).to_f - top_p = ENV.fetch("LLM_TOP_P", 0.9).to_f - - message = create_message(chat:) - - question = chat.last_question - - check_llm = Langchain::LLM::Ollama.new( - url: ENV.fetch("OLLAMA_BASE_URL", "http://localhost:11434"), - api_key: ENV.fetch("OLLAMA_API_KEY", ""), - default_options: { - chat_completion_model_name: ENV.fetch("CHECK_MODEL", "bespoke-minicheck"), - completion_model_name: ENV.fetch("CHECK_MODEL", "bespoke-minicheck"), - temperature: ENV.fetch("LLM_TEMPERATURE", 0.1).to_f, - num_ctx: ENV.fetch("LLM_NUM_CTX", 2_048).to_i - } - ) - - checked_chunks = [] - - search_results = Chunk.similarity_search(question, k: ENV.fetch("RETRIEVAL_FETCH_K", 4)) - search_results.each do |search_result| - context_to_check = search_result.content - - check_message = [ - { role: "user", content: "Document: #{context_to_check}\nClaim: #{question}" } - ] - - check_llm.chat(messages: check_message, top_k:, top_p:) do |stream| - check_response = stream.raw_response.dig("message", "content") - - if check_response.eql?("Yes") - checked_chunks << search_result - end - end - end - - llm = LangchainrbRails.config.vectorsearch.llm - context = [] - - messages = [] - messages << { role: "system", content: ENV.fetch("RAG_SYSTEM_TEMPLATE", "You are Nosia. You are a helpful assistant.") } - messages << chat.messages_hash if chat.messages_hash.any? - - if checked_chunks.any? - message.update(similar_document_ids: checked_chunks.pluck(:document_id).uniq) - - context << checked_chunks.map(&:content).join("\n\n") - context = context.join("\n\n") - - prompt = ENV.fetch("QUERY_PROMPT_TEMPLATE", "Nosia helpful content: {context}\n\n---\n\nNow here is the question you need to answer.\n\nQuestion: {question}") - prompt = prompt.gsub("{context}", context) - prompt = prompt.gsub("{question}", question) - - messages << { role: "user", content: prompt } - else - messages << { role: "user", content: question } - end - - messages = messages.flatten - - llm.chat(messages:, top_k:, top_p:) do |stream| - new_content = stream.raw_response.dig("message", "content") - message.update(content: message.content + new_content) if new_content - end + chat.complete end end diff --git a/app/models/account.rb b/app/models/account.rb new file mode 100644 index 0000000..0eb87e2 --- /dev/null +++ b/app/models/account.rb @@ -0,0 +1,8 @@ +class Account < ApplicationRecord + belongs_to :owner, class_name: "User" + + has_many :account_users, dependent: :destroy + has_many :users, through: :account_users + has_many :chunks + has_many :documents, dependent: :destroy +end diff --git a/app/models/account_user.rb b/app/models/account_user.rb new file mode 100644 index 0000000..20b5d90 --- /dev/null +++ b/app/models/account_user.rb @@ -0,0 +1,4 @@ +class AccountUser < ApplicationRecord + belongs_to :account + belongs_to :user +end diff --git a/app/models/api_token.rb b/app/models/api_token.rb index 7be0c0a..d54a2a8 100644 --- a/app/models/api_token.rb +++ b/app/models/api_token.rb @@ -1,4 +1,5 @@ class ApiToken < ApplicationRecord + belongs_to :account belongs_to :user has_secure_token :token diff --git a/app/models/chat.rb b/app/models/chat.rb index b647716..1a3583e 100644 --- a/app/models/chat.rb +++ b/app/models/chat.rb @@ -1,4 +1,8 @@ class Chat < ApplicationRecord + include Completionable + include Infomaniak + include Ollama + has_many :messages, dependent: :destroy def first_question @@ -17,4 +21,8 @@ def messages_hash } end end + + def response_number + messages.count + end end diff --git a/app/models/chat/completionable.rb b/app/models/chat/completionable.rb new file mode 100644 index 0000000..2706bef --- /dev/null +++ b/app/models/chat/completionable.rb @@ -0,0 +1,36 @@ +module Chat::Completionable + extend ActiveSupport::Concern + + class_methods do + def ai_provider + ENV.fetch("AI_PROVIDER", "ollama") + end + end + + def complete(&block) + case Chat.ai_provider + when "ollama" + complete_with_ollama(&block) + when "infomaniak" + complete_with_infomaniak(&block) + end + end + + private + + def retrieval_fetch_k + ENV.fetch("RETRIEVAL_FETCH_K", 4) + end + + def system_prompt + ENV.fetch("RAG_SYSTEM_TEMPLATE", "You are Nosia. You are a helpful assistant.") + end + + def top_k + ENV.fetch("LLM_TOP_K", 40).to_f + end + + def top_p + ENV.fetch("LLM_TOP_P", 0.9).to_f + end +end diff --git a/app/models/chat/infomaniak.rb b/app/models/chat/infomaniak.rb new file mode 100644 index 0000000..2ef13b2 --- /dev/null +++ b/app/models/chat/infomaniak.rb @@ -0,0 +1,58 @@ +module Chat::Infomaniak + extend ActiveSupport::Concern + + class_methods do + def new_infomaniak_llm + Langchain::LLM::OpenAI.new( + api_key: ENV.fetch("INFOMANIAK_API_KEY", ""), + llm_options: { + api_type: :infomaniak, + uri_base: "https://api.infomaniak.com/1/ai/#{ENV.fetch("INFOMANIAK_PRODUCT_ID", "")}/openai" + }, + default_options: { + chat_completion_model_name: ENV.fetch("LLM_MODEL", "mixtral"), + completion_model_name: ENV.fetch("LLM_MODEL", "mixtral"), + embeddings_model_name: ENV.fetch("EMBEDDING_MODEL", "bge_multilingual_gemma2"), + temperature: ENV.fetch("LLM_TEMPERATURE", 0.1).to_f, + num_ctx: ENV.fetch("LLM_NUM_CTX", 4_096).to_i + } + ) + end + end + + def complete_with_infomaniak(&block) + question = last_question + + context = [] + + messages_for_assistant = [] + messages_for_assistant << { role: "system", content: system_prompt } + messages_for_assistant << messages_hash if messages_hash.any? + + assistant_response = messages.create(role: "assistant", done: false, content: "", response_number:) + assistant_response.broadcast_created + + retrieved_chunks = Chunk.similarity_search(question, k: retrieval_fetch_k) + assistant_response.update(similar_document_ids: retrieved_chunks.pluck(:chunkable_id).uniq) if retrieved_chunks.any? + + if retrieved_chunks.any? + context << retrieved_chunks.map(&:content).join("\n\n") + context = context.join("\n\n") + + prompt = ENV.fetch("QUERY_PROMPT_TEMPLATE", "Nosia helpful content: {context}\n\n---\n\nNow here is the question you need to answer.\n\nQuestion: {question}") + prompt = prompt.gsub("{context}", context) + prompt = prompt.gsub("{question}", question) + + messages_for_assistant.pop + messages_for_assistant << { role: "user", content: prompt } + end + + messages_for_assistant = messages_for_assistant.flatten + + llm = Chat.new_infomaniak_llm + llm_response = llm.chat(messages: messages_for_assistant, &block) + + assistant_response.update(done: true, content: llm_response.completion) + assistant_response + end +end diff --git a/app/models/chat/ollama.rb b/app/models/chat/ollama.rb new file mode 100644 index 0000000..94ea196 --- /dev/null +++ b/app/models/chat/ollama.rb @@ -0,0 +1,90 @@ +module Chat::Ollama + extend ActiveSupport::Concern + + class_methods do + def new_ollama_llm + Langchain::LLM::OpenAI.new( + api_key: ENV.fetch("OLLAMA_API_KEY", ""), + llm_options: { + uri_base: ENV.fetch("OLLAMA_BASE_URL", "http://localhost:11434") + }, + default_options: { + chat_completion_model_name: ENV.fetch("LLM_MODEL", "qwen2.5"), + completion_model_name: ENV.fetch("LLM_MODEL", "qwen2.5"), + embeddings_model_name: ENV.fetch("EMBEDDING_MODEL", "nomic-embed-text"), + temperature: ENV.fetch("LLM_TEMPERATURE", 0.1).to_f, + num_ctx: ENV.fetch("LLM_NUM_CTX", 4_096).to_i + } + ) + end + + def new_ollama_check_llm + Langchain::LLM::Ollama.new( + url: ENV.fetch("OLLAMA_BASE_URL", "http://localhost:11434"), + api_key: ENV.fetch("OLLAMA_API_KEY", ""), + default_options: { + chat_completion_model_name: ENV.fetch("CHECK_MODEL", "bespoke-minicheck"), + completion_model_name: ENV.fetch("CHECK_MODEL", "bespoke-minicheck"), + temperature: ENV.fetch("LLM_TEMPERATURE", 0.1).to_f, + num_ctx: ENV.fetch("LLM_NUM_CTX", 2_048).to_i + } + ) + end + end + + def complete_with_ollama(&block) + question = last_question + + context = [] + + assistant_response = messages.create(role: "assistant", done: false, content: "", response_number:) + assistant_response.broadcast_created + + messages_for_assistant = [] + messages_for_assistant << { role: "system", content: system_prompt } + messages_for_assistant << messages_hash if messages_hash.any? + + checked_chunks = [] + + check_llm = Chat.new_ollama_check_llm + + search_results = Chunk.similarity_search(question, k: retrieval_fetch_k) + search_results.each do |search_result| + context_to_check = search_result.content + + check_message = [ + { role: "user", content: "Document: #{context_to_check}\nClaim: #{question}" } + ] + + check_llm.chat(messages: check_message, top_k:, top_p:) do |stream| + check_response = stream.raw_response.dig("message", "content") + + if check_response.eql?("Yes") + checked_chunks << search_result + end + end + end + + if checked_chunks.any? + assistant_response.update(similar_document_ids: checked_chunks.pluck(:document_id).uniq) + + context << checked_chunks.map(&:content).join("\n\n") + context = context.join("\n\n") + + prompt = ENV.fetch("QUERY_PROMPT_TEMPLATE", "Nosia helpful content: {context}\n\n---\n\nNow here is the question you need to answer.\n\nQuestion: {question}") + prompt = prompt.gsub("{context}", context) + prompt = prompt.gsub("{question}", question) + + messages_for_assistant.pop + messages << { role: "user", content: prompt } + end + + messages_for_assistant = messages_for_assistant.flatten + + llm = Chat.new_ollama_llm + llm_response = llm.chat(messages:, top_k:, top_p:, &block) + + assistant_response.update(done: true, content: llm_response.completion) + assistant_response + end +end diff --git a/app/models/chunk.rb b/app/models/chunk.rb index 00868fe..f39ea94 100644 --- a/app/models/chunk.rb +++ b/app/models/chunk.rb @@ -1,5 +1,5 @@ class Chunk < ApplicationRecord include Vectorizable - belongs_to :document + belongs_to :chunkable, polymorphic: true end diff --git a/app/models/document.rb b/app/models/document.rb index c1655aa..dc9aa4a 100644 --- a/app/models/document.rb +++ b/app/models/document.rb @@ -1,6 +1,7 @@ class Document < ApplicationRecord include Chunkable, Parsable, Vectorizable + belongs_to :account, optional: true belongs_to :author, optional: true has_one_attached :file diff --git a/app/models/document/chunkable.rb b/app/models/document/chunkable.rb index 55f09e7..639279e 100644 --- a/app/models/document/chunkable.rb +++ b/app/models/document/chunkable.rb @@ -2,7 +2,7 @@ module Document::Chunkable extend ActiveSupport::Concern included do - has_many :chunks, dependent: :destroy + has_many :chunks, as: :chunkable, dependent: :destroy end def chunkify! diff --git a/app/models/user.rb b/app/models/user.rb index 4952512..85af29e 100644 --- a/app/models/user.rb +++ b/app/models/user.rb @@ -1,6 +1,8 @@ class User < ApplicationRecord has_secure_password + has_many :account_users, dependent: :destroy + has_many :accounts, through: :account_users has_many :api_tokens, dependent: :destroy has_many :chats, dependent: :destroy has_many :credentials, dependent: :destroy diff --git a/config/initializers/langchainrb_rails.rb b/config/initializers/langchainrb_rails.rb index a6089df..559f08f 100644 --- a/config/initializers/langchainrb_rails.rb +++ b/config/initializers/langchainrb_rails.rb @@ -4,18 +4,40 @@ timeout: 240 }) -LangchainrbRails.configure do |config| - config.vectorsearch = Langchain::Vectorsearch::Pgvector.new( - llm: Langchain::LLM::Ollama.new( - url: ENV.fetch("OLLAMA_BASE_URL", "http://localhost:11434"), - api_key: ENV.fetch("OLLAMA_API_KEY", ""), - default_options: { - chat_completion_model_name: ENV.fetch("LLM_MODEL", "qwen2.5"), - completion_model_name: ENV.fetch("LLM_MODEL", "qwen2.5"), - embeddings_model_name: ENV.fetch("EMBEDDING_MODEL", "nomic-embed-text"), - temperature: ENV.fetch("LLM_TEMPERATURE", 0.1).to_f, - num_ctx: ENV.fetch("LLM_NUM_CTX", 4_096).to_i - } +if ENV.fetch("AI_PROVIDER", "ollama").eql?("infomaniak") + # Infomaniak configuration + LangchainrbRails.configure do |config| + config.vectorsearch = Langchain::Vectorsearch::Pgvector.new( + llm: Langchain::LLM::OpenAI.new( + api_key: ENV.fetch("INFOMANIAK_API_KEY", ""), + llm_options: { + uri_base: "https://api.infomaniak.com/1/ai/#{ENV.fetch("INFOMANIAK_PRODUCT_ID", "")}/openai" + }, + default_options: { + chat_completion_model_name: ENV.fetch("LLM_MODEL", "mixtral"), + completion_model_name: ENV.fetch("LLM_MODEL", "mixtral"), + embeddings_model_name: ENV.fetch("EMBEDDING_MODEL", "bge_multilingual_gemma2"), + temperature: ENV.fetch("LLM_TEMPERATURE", 0.1).to_f, + num_ctx: ENV.fetch("LLM_NUM_CTX", 4_096).to_i + } + ) ) - ) + end +else + # Ollama default configuration + LangchainrbRails.configure do |config| + config.vectorsearch = Langchain::Vectorsearch::Pgvector.new( + llm: Langchain::LLM::Ollama.new( + url: ENV.fetch("OLLAMA_BASE_URL", "http://localhost:11434"), + api_key: ENV.fetch("OLLAMA_API_KEY", ""), + default_options: { + chat_completion_model_name: ENV.fetch("LLM_MODEL", "qwen2.5"), + completion_model_name: ENV.fetch("LLM_MODEL", "qwen2.5"), + embeddings_model_name: ENV.fetch("EMBEDDING_MODEL", "nomic-embed-text"), + temperature: ENV.fetch("LLM_TEMPERATURE", 0.1).to_f, + num_ctx: ENV.fetch("LLM_NUM_CTX", 4_096).to_i + } + ) + ) + end end diff --git a/db/migrate/20241114182820_add_done_to_messages.rb b/db/migrate/20241114182820_add_done_to_messages.rb new file mode 100644 index 0000000..0d7a885 --- /dev/null +++ b/db/migrate/20241114182820_add_done_to_messages.rb @@ -0,0 +1,5 @@ +class AddDoneToMessages < ActiveRecord::Migration[8.0] + def change + add_column :messages, :done, :boolean, default: false + end +end diff --git a/db/migrate/20241216181749_create_accounts.rb b/db/migrate/20241216181749_create_accounts.rb new file mode 100644 index 0000000..2bef022 --- /dev/null +++ b/db/migrate/20241216181749_create_accounts.rb @@ -0,0 +1,18 @@ +class CreateAccounts < ActiveRecord::Migration[8.0] + def up + create_table :accounts do |t| + t.string :name + t.belongs_to :owner, null: false, foreign_key: { to_table: :users } + + t.timestamps + end + + User.all.each do |user| + Account.create(name: user.email, owner: user) + end + end + + def down + drop_table :accounts + end +end diff --git a/db/migrate/20241216181845_create_account_users.rb b/db/migrate/20241216181845_create_account_users.rb new file mode 100644 index 0000000..250456d --- /dev/null +++ b/db/migrate/20241216181845_create_account_users.rb @@ -0,0 +1,18 @@ +class CreateAccountUsers < ActiveRecord::Migration[8.0] + def up + create_table :account_users do |t| + t.references :account, null: false, foreign_key: true + t.references :user, null: false, foreign_key: true + + t.timestamps + end + + Account.all.each do |account| + account.account_users.create(user: account.owner) + end + end + + def down + drop_table :account_users + end +end diff --git a/db/migrate/20241216190106_add_chunkable_to_chunks.rb b/db/migrate/20241216190106_add_chunkable_to_chunks.rb new file mode 100644 index 0000000..6bac179 --- /dev/null +++ b/db/migrate/20241216190106_add_chunkable_to_chunks.rb @@ -0,0 +1,18 @@ +class AddChunkableToChunks < ActiveRecord::Migration[8.0] + def up + add_column :chunks, :chunkable_type, :string + Chunk.update_all(chunkable_type: "Document") + remove_index :chunks, :document_id + rename_column :chunks, :document_id, :chunkable_id + add_index :chunks, [ :chunkable_type, :chunkable_id ] + add_reference :chunks, :account, null: true, foreign_key: true + end + + def down + remove_reference :chunks, :account + remove_index :chunks, [ :chunkable_type, :chunkable_id ] + rename_column :chunks, :chunkable_id, :document_id + add_index :chunks, :document_id + remove_column :chunks, :chunkable_type + end +end diff --git a/db/migrate/20241216202004_add_account_references_to_api_tokens.rb b/db/migrate/20241216202004_add_account_references_to_api_tokens.rb new file mode 100644 index 0000000..8cc2ce3 --- /dev/null +++ b/db/migrate/20241216202004_add_account_references_to_api_tokens.rb @@ -0,0 +1,16 @@ +class AddAccountReferencesToApiTokens < ActiveRecord::Migration[8.0] + def up + add_reference :api_tokens, :account, null: true, foreign_key: true + + ApiToken.all.each do |api_token| + account = Account.find_by(owner: api_token.user) + api_token.update(account:) + end + + change_column_null :api_tokens, :account_id, false + end + + def down + remove_reference :api_tokens, :account + end +end diff --git a/db/migrate/20241216203009_add_account_references_to_documents.rb b/db/migrate/20241216203009_add_account_references_to_documents.rb new file mode 100644 index 0000000..ae482f2 --- /dev/null +++ b/db/migrate/20241216203009_add_account_references_to_documents.rb @@ -0,0 +1,5 @@ +class AddAccountReferencesToDocuments < ActiveRecord::Migration[8.0] + def change + add_reference :documents, :account, null: true, foreign_key: true + end +end diff --git a/db/migrate/20241216213448_change_vector_limit.rb b/db/migrate/20241216213448_change_vector_limit.rb new file mode 100644 index 0000000..ad26a0a --- /dev/null +++ b/db/migrate/20241216213448_change_vector_limit.rb @@ -0,0 +1,15 @@ +class ChangeVectorLimit < ActiveRecord::Migration[8.0] + def up + unless ENV.fetch("EMBEDDING_DIMENSIONS", 768).to_i.eql?(768) + Chunk.update_all(embedding: nil) + change_column :chunks, :embedding, :vector, limit: ENV.fetch("EMBEDDING_DIMENSIONS", 768).to_i + end + end + + def down + unless ENV.fetch("EMBEDDING_DIMENSIONS", 768).to_i.eql?(768) + Chunk.update_all(embedding: nil) + change_column :chunks, :embedding, :vector, limit: 768 + end + end +end diff --git a/db/schema.rb b/db/schema.rb index 0d35607..a852efc 100644 --- a/db/schema.rb +++ b/db/schema.rb @@ -10,11 +10,28 @@ # # It's strongly recommended that you check this file into your version control system. -ActiveRecord::Schema[8.0].define(version: 2024_11_18_194153) do +ActiveRecord::Schema[8.0].define(version: 2024_12_16_213448) do # These are extensions that must be enabled in order to support this database enable_extension "pg_catalog.plpgsql" enable_extension "vector" + create_table "account_users", force: :cascade do |t| + t.bigint "account_id", null: false + t.bigint "user_id", null: false + t.datetime "created_at", null: false + t.datetime "updated_at", null: false + t.index ["account_id"], name: "index_account_users_on_account_id" + t.index ["user_id"], name: "index_account_users_on_user_id" + end + + create_table "accounts", force: :cascade do |t| + t.string "name" + t.bigint "owner_id", null: false + t.datetime "created_at", null: false + t.datetime "updated_at", null: false + t.index ["owner_id"], name: "index_accounts_on_owner_id" + end + create_table "active_storage_attachments", force: :cascade do |t| t.string "name", null: false t.string "record_type", null: false @@ -49,6 +66,8 @@ t.string "token" t.datetime "created_at", null: false t.datetime "updated_at", null: false + t.bigint "account_id", null: false + t.index ["account_id"], name: "index_api_tokens_on_account_id" t.index ["user_id"], name: "index_api_tokens_on_user_id" end @@ -69,12 +88,15 @@ end create_table "chunks", force: :cascade do |t| - t.bigint "document_id", null: false + t.bigint "chunkable_id", null: false t.text "content" - t.vector "embedding", limit: 768 + t.vector "embedding", limit: 3584 t.datetime "created_at", null: false t.datetime "updated_at", null: false - t.index ["document_id"], name: "index_chunks_on_document_id" + t.string "chunkable_type" + t.bigint "account_id" + t.index ["account_id"], name: "index_chunks_on_account_id" + t.index ["chunkable_type", "chunkable_id"], name: "index_chunks_on_chunkable_type_and_chunkable_id" end create_table "credentials", force: :cascade do |t| @@ -97,6 +119,9 @@ t.bigint "author_id" t.string "uid" t.string "content_hash" + t.string "purpose" + t.bigint "account_id" + t.index ["account_id"], name: "index_documents_on_account_id" t.index ["author_id"], name: "index_documents_on_author_id" end @@ -108,6 +133,7 @@ t.datetime "created_at", null: false t.datetime "updated_at", null: false t.string "similar_document_ids", default: [], array: true + t.boolean "done", default: false t.index ["chat_id"], name: "index_messages_on_chat_id" end @@ -264,12 +290,18 @@ t.index "lower((email)::text)", name: "index_users_on_lowercase_email", unique: true end + add_foreign_key "account_users", "accounts" + add_foreign_key "account_users", "users" + add_foreign_key "accounts", "users", column: "owner_id" add_foreign_key "active_storage_attachments", "active_storage_blobs", column: "blob_id" add_foreign_key "active_storage_variant_records", "active_storage_blobs", column: "blob_id" + add_foreign_key "api_tokens", "accounts" add_foreign_key "api_tokens", "users" add_foreign_key "chats", "users" - add_foreign_key "chunks", "documents" + add_foreign_key "chunks", "accounts" + add_foreign_key "chunks", "documents", column: "chunkable_id" add_foreign_key "credentials", "users" + add_foreign_key "documents", "accounts" add_foreign_key "documents", "authors" add_foreign_key "messages", "chats" add_foreign_key "sessions", "users" From cce3c6cf029bab928b452f3339e9f917db658b06 Mon Sep 17 00:00:00 2001 From: Cyril Blaecke <1692273+cbldev@users.noreply.github.com> Date: Wed, 18 Dec 2024 21:26:33 +0000 Subject: [PATCH 3/6] Prepare Infomaniak compatibility and multi-accounts --- app/controllers/api/v1/completions_controller.rb | 6 +++++- app/controllers/api_tokens_controller.rb | 9 +++++++-- app/controllers/chats_controller.rb | 2 +- app/models/account.rb | 3 ++- app/models/chat.rb | 2 ++ app/models/chat/infomaniak.rb | 2 +- app/models/chat/ollama.rb | 2 +- app/models/chunk.rb | 1 + app/models/current.rb | 1 + app/models/document/chunkable.rb | 2 +- app/models/user.rb | 4 ++++ app/views/api_tokens/_form.html.erb | 15 ++++++++------- app/views/api_tokens/index.html.erb | 2 ++ db/migrate/20241216181749_create_accounts.rb | 6 +++--- db/migrate/20241216181845_create_account_users.rb | 5 +++-- .../20241216190106_add_chunkable_to_chunks.rb | 7 +++++++ ...202004_add_account_references_to_api_tokens.rb | 6 ++---- ...6203009_add_account_references_to_documents.rb | 11 ++++++++++- ...41218200949_add_account_references_to_chats.rb | 14 ++++++++++++++ db/schema.rb | 10 +++++++--- 20 files changed, 82 insertions(+), 28 deletions(-) create mode 100644 db/migrate/20241218200949_add_account_references_to_chats.rb diff --git a/app/controllers/api/v1/completions_controller.rb b/app/controllers/api/v1/completions_controller.rb index e900cb9..0a362ea 100644 --- a/app/controllers/api/v1/completions_controller.rb +++ b/app/controllers/api/v1/completions_controller.rb @@ -6,7 +6,10 @@ class CompletionsController < ApplicationController include ActionController::Live def create - @chat = @user.chats.create + account = @user.accounts.find_or_create_by(uid: completion_params[:user]) if completion_params[:user].present? + account ||= @account + + @chat = @user.chats.create(account:) if completion_params[:messages].present? completion_params[:messages].each do |message_params| @@ -85,6 +88,7 @@ def completion_params :top_p, :top_k, :temperature, + :user, messages: [ :content, :role diff --git a/app/controllers/api_tokens_controller.rb b/app/controllers/api_tokens_controller.rb index 7637077..1b42e68 100644 --- a/app/controllers/api_tokens_controller.rb +++ b/app/controllers/api_tokens_controller.rb @@ -6,7 +6,12 @@ def index end def create - @api_token = Current.user.api_tokens.new(api_token_params) + account = Current.user.accounts.find(api_token_params[:account_id]) + + @api_token = Current.user.api_tokens.new( + account:, + name: api_token_params[:name] + ) if @api_token.save redirect_to api_tokens_path @@ -24,6 +29,6 @@ def destroy private def api_token_params - params.require(:api_token).permit(:name) + params.require(:api_token).permit(:account_id, :name) end end diff --git a/app/controllers/chats_controller.rb b/app/controllers/chats_controller.rb index 5d9cf5d..aa1e6f3 100644 --- a/app/controllers/chats_controller.rb +++ b/app/controllers/chats_controller.rb @@ -5,7 +5,7 @@ def show end def create - @chat = Current.user.chats.create + @chat = Current.user.chats.create(account: Current.account) redirect_to @chat end diff --git a/app/models/account.rb b/app/models/account.rb index 0eb87e2..7b1e15a 100644 --- a/app/models/account.rb +++ b/app/models/account.rb @@ -3,6 +3,7 @@ class Account < ApplicationRecord has_many :account_users, dependent: :destroy has_many :users, through: :account_users - has_many :chunks + has_many :chats, dependent: :destroy + has_many :chunks, dependent: :destroy has_many :documents, dependent: :destroy end diff --git a/app/models/chat.rb b/app/models/chat.rb index 1a3583e..11d396d 100644 --- a/app/models/chat.rb +++ b/app/models/chat.rb @@ -3,6 +3,8 @@ class Chat < ApplicationRecord include Infomaniak include Ollama + belongs_to :account + belongs_to :user has_many :messages, dependent: :destroy def first_question diff --git a/app/models/chat/infomaniak.rb b/app/models/chat/infomaniak.rb index 2ef13b2..6165b42 100644 --- a/app/models/chat/infomaniak.rb +++ b/app/models/chat/infomaniak.rb @@ -32,7 +32,7 @@ def complete_with_infomaniak(&block) assistant_response = messages.create(role: "assistant", done: false, content: "", response_number:) assistant_response.broadcast_created - retrieved_chunks = Chunk.similarity_search(question, k: retrieval_fetch_k) + retrieved_chunks = Chunk.where(account:).similarity_search(question, k: retrieval_fetch_k) assistant_response.update(similar_document_ids: retrieved_chunks.pluck(:chunkable_id).uniq) if retrieved_chunks.any? if retrieved_chunks.any? diff --git a/app/models/chat/ollama.rb b/app/models/chat/ollama.rb index 94ea196..f536b3b 100644 --- a/app/models/chat/ollama.rb +++ b/app/models/chat/ollama.rb @@ -48,7 +48,7 @@ def complete_with_ollama(&block) check_llm = Chat.new_ollama_check_llm - search_results = Chunk.similarity_search(question, k: retrieval_fetch_k) + search_results = Chunk.where(account:).similarity_search(question, k: retrieval_fetch_k) search_results.each do |search_result| context_to_check = search_result.content diff --git a/app/models/chunk.rb b/app/models/chunk.rb index f39ea94..ba3e43b 100644 --- a/app/models/chunk.rb +++ b/app/models/chunk.rb @@ -1,5 +1,6 @@ class Chunk < ApplicationRecord include Vectorizable + belongs_to :account belongs_to :chunkable, polymorphic: true end diff --git a/app/models/current.rb b/app/models/current.rb index 83baf84..dbc5270 100644 --- a/app/models/current.rb +++ b/app/models/current.rb @@ -2,6 +2,7 @@ class Current < ActiveSupport::CurrentAttributes attribute :session attribute :impersonated_user + def account = user.first_account def user = true_user def true_user = session&.user end diff --git a/app/models/document/chunkable.rb b/app/models/document/chunkable.rb index 639279e..92aebdf 100644 --- a/app/models/document/chunkable.rb +++ b/app/models/document/chunkable.rb @@ -19,7 +19,7 @@ def chunkify! self.chunks.destroy_all new_chunks.each do |new_chunk| - chunk = self.chunks.create!(content: new_chunk.dig(:text)) + chunk = self.chunks.create!(account:, content: new_chunk.dig(:text)) chunk.vectorize! end end diff --git a/app/models/user.rb b/app/models/user.rb index 85af29e..a797727 100644 --- a/app/models/user.rb +++ b/app/models/user.rb @@ -14,4 +14,8 @@ class User < ApplicationRecord format: { with: URI::MailTo::EMAIL_REGEXP } validates :password, presence: true, length: { minimum: 12 } + + def first_account + accounts.order(:created_at).first + end end diff --git a/app/views/api_tokens/_form.html.erb b/app/views/api_tokens/_form.html.erb index 267683a..6b6ca12 100644 --- a/app/views/api_tokens/_form.html.erb +++ b/app/views/api_tokens/_form.html.erb @@ -1,15 +1,16 @@ -<%= form_with(model: api_token, class: "flex items-center space-x-4 h-12") do |form| %> +<%= form_with(model: api_token, class: "flex items-center space-x-4") do |form| %> <%= form.label :name, "Name of your API token" %> <%= form.text_field :name, required: true, class: - " - resize-none rounded-xl border-none bg-slate-200 text-sm text-slate-900 - focus:outline-none focus:ring-2 focus:ring-blue-500 dark:bg-slate-800 - dark:text-slate-200 dark:placeholder-slate-400 dark:focus:ring-blue-500 - sm:text-base - " %> + "resize-none rounded-xl border-none bg-slate-200 text-sm text-slate-900 focus:outline-none focus:ring-2 focus:ring-blue-500 dark:bg-slate-800 dark:text-slate-200 dark:placeholder-slate-400 dark:focus:ring-blue-500 sm:text-base" %> + <%= form.collection_select :account_id, + Current.user.accounts, + :id, + :name, + prompt: true, + required: true %> <%= form.submit "Generate", class: "cursor-pointer rounded-lg bg-blue-700 px-4 py-2 text-sm font-medium text-slate-200 hover:bg-blue-800 focus:outline-none focus:ring-4 focus:ring-blue-300 dark:bg-blue-600 dark:hover:bg-blue-700 dark:focus:ring-blue-800 sm:text-base disabled:bg-gray-700" %> diff --git a/app/views/api_tokens/index.html.erb b/app/views/api_tokens/index.html.erb index 819b8a7..0ffd134 100644 --- a/app/views/api_tokens/index.html.erb +++ b/app/views/api_tokens/index.html.erb @@ -7,6 +7,8 @@

<%= api_token.name %>

+

<%= api_token.account.name %>

+ <%= button_to api_token, method: :delete, data: { turbo_confirm: "Are you sure?" } do %> Date: Thu, 19 Dec 2024 20:08:47 +0000 Subject: [PATCH 4/6] Prepare Infomaniak compatibility and multi-accounts --- app/controllers/api/v1/application_controller.rb | 6 +++++- app/controllers/api/v1/completions_controller.rb | 15 +++++++++------ app/controllers/documents_controller.rb | 2 +- app/controllers/messages_controller.rb | 2 ++ app/models/chat/completionable.rb | 6 +++--- app/models/chat/infomaniak.rb | 12 ++++++------ app/models/chat/ollama.rb | 2 +- 7 files changed, 27 insertions(+), 18 deletions(-) diff --git a/app/controllers/api/v1/application_controller.rb b/app/controllers/api/v1/application_controller.rb index 773896b..3866c26 100644 --- a/app/controllers/api/v1/application_controller.rb +++ b/app/controllers/api/v1/application_controller.rb @@ -12,8 +12,12 @@ class ApplicationController < ApplicationController def verify_api_key authenticate_or_request_with_http_token do |token, _options| api_token = ApiToken.find_by(token:) - @account = api_token&.account @user = api_token&.user + if params[:user].present? + @account = @user.accounts.create_with(owner: @user).find_or_create_by(uid: params[:user]) + else + @account = api_token&.account + end end end end diff --git a/app/controllers/api/v1/completions_controller.rb b/app/controllers/api/v1/completions_controller.rb index 0a362ea..65ed379 100644 --- a/app/controllers/api/v1/completions_controller.rb +++ b/app/controllers/api/v1/completions_controller.rb @@ -6,10 +6,13 @@ class CompletionsController < ApplicationController include ActionController::Live def create - account = @user.accounts.find_or_create_by(uid: completion_params[:user]) if completion_params[:user].present? - account ||= @account + @chat = @user.chats.create(account: @account) - @chat = @user.chats.create(account:) + max_tokens = (completion_params[:max_tokens] || ENV["LLM_MAX_TOKENS"]).to_i + model = (completion_params[:model] || ENV["LLM_MODEL"]).to_i + temperature = (completion_params[:temperature] || ENV["LLM_TEMPERATURE"]).to_f + top_k = (completion_params[:top_k] || ENV["LLM_TOP_K"]).to_f + top_p = (completion_params[:top_p] || ENV["LLM_TOP_P"]).to_f if completion_params[:messages].present? completion_params[:messages].each do |message_params| @@ -30,7 +33,7 @@ def create stream_response = ActiveModel::Type::Boolean.new.cast(params[:stream]) || false if stream_response - chat_response = @chat.complete do |stream| + chat_response = @chat.complete(model:, temperature:, top_k:, top_p:, max_tokens:) do |stream| stream_content = stream.dig("delta", "content") next unless stream_content done = !!stream.dig("finish_reason") @@ -56,7 +59,7 @@ def create end end else - chat_response = @chat.complete + chat_response = @chat.complete(model:, temperature:, top_k:, top_p:, max_tokens:) render json: { choices: [ finish_reason: "stop", @@ -85,8 +88,8 @@ def completion_params :model, :prompt, :stream, - :top_p, :top_k, + :top_p, :temperature, :user, messages: [ diff --git a/app/controllers/documents_controller.rb b/app/controllers/documents_controller.rb index 9ae0cee..06255d7 100644 --- a/app/controllers/documents_controller.rb +++ b/app/controllers/documents_controller.rb @@ -67,6 +67,6 @@ def set_document # Only allow a list of trusted parameters through. def document_params - params.require(:document).permit(:title, :file) + params.require(:document).permit(:title, :file).merge(account: Current.account) end end diff --git a/app/controllers/messages_controller.rb b/app/controllers/messages_controller.rb index 587f26a..4a57a6a 100644 --- a/app/controllers/messages_controller.rb +++ b/app/controllers/messages_controller.rb @@ -1,3 +1,5 @@ +# frozen_string_literal: true + class MessagesController < ApplicationController include ActionView::RecordIdentifier diff --git a/app/models/chat/completionable.rb b/app/models/chat/completionable.rb index 2706bef..0aa5063 100644 --- a/app/models/chat/completionable.rb +++ b/app/models/chat/completionable.rb @@ -7,12 +7,12 @@ def ai_provider end end - def complete(&block) + def complete(model:, temperature:, top_k:, top_p:, max_tokens:, &block) case Chat.ai_provider when "ollama" - complete_with_ollama(&block) + complete_with_ollama(top_k:, top_p:, &block) when "infomaniak" - complete_with_infomaniak(&block) + complete_with_infomaniak(model:, temperature:, top_p:, max_tokens:, &block) end end diff --git a/app/models/chat/infomaniak.rb b/app/models/chat/infomaniak.rb index 6165b42..df30a2c 100644 --- a/app/models/chat/infomaniak.rb +++ b/app/models/chat/infomaniak.rb @@ -2,7 +2,7 @@ module Chat::Infomaniak extend ActiveSupport::Concern class_methods do - def new_infomaniak_llm + def new_infomaniak_llm(model: ENV.fetch("LLM_MODEL", "mixtral")) Langchain::LLM::OpenAI.new( api_key: ENV.fetch("INFOMANIAK_API_KEY", ""), llm_options: { @@ -10,8 +10,8 @@ def new_infomaniak_llm uri_base: "https://api.infomaniak.com/1/ai/#{ENV.fetch("INFOMANIAK_PRODUCT_ID", "")}/openai" }, default_options: { - chat_completion_model_name: ENV.fetch("LLM_MODEL", "mixtral"), - completion_model_name: ENV.fetch("LLM_MODEL", "mixtral"), + chat_completion_model_name: model, + completion_model_name: model, embeddings_model_name: ENV.fetch("EMBEDDING_MODEL", "bge_multilingual_gemma2"), temperature: ENV.fetch("LLM_TEMPERATURE", 0.1).to_f, num_ctx: ENV.fetch("LLM_NUM_CTX", 4_096).to_i @@ -20,7 +20,7 @@ def new_infomaniak_llm end end - def complete_with_infomaniak(&block) + def complete_with_infomaniak(model:, temperature:, top_p:, max_tokens:, &block) question = last_question context = [] @@ -49,8 +49,8 @@ def complete_with_infomaniak(&block) messages_for_assistant = messages_for_assistant.flatten - llm = Chat.new_infomaniak_llm - llm_response = llm.chat(messages: messages_for_assistant, &block) + llm = Chat.new_infomaniak_llm(model:) + llm_response = llm.chat(messages: messages_for_assistant, temperature:, top_p:, max_tokens:, &block) assistant_response.update(done: true, content: llm_response.completion) assistant_response diff --git a/app/models/chat/ollama.rb b/app/models/chat/ollama.rb index f536b3b..1d8c55c 100644 --- a/app/models/chat/ollama.rb +++ b/app/models/chat/ollama.rb @@ -32,7 +32,7 @@ def new_ollama_check_llm end end - def complete_with_ollama(&block) + def complete_with_ollama(top_k:, top_p:, &block) question = last_question context = [] From bd504556b0bf3dce626a2a6f748fc4a68e6103c1 Mon Sep 17 00:00:00 2001 From: Cyril Blaecke <1692273+cbldev@users.noreply.github.com> Date: Thu, 19 Dec 2024 20:43:14 +0000 Subject: [PATCH 5/6] Prepare Infomaniak compatibility and multi-accounts --- app/controllers/api/v1/completions_controller.rb | 10 +++++----- app/models/chat/completionable.rb | 16 +++++++--------- app/models/chat/infomaniak.rb | 2 +- 3 files changed, 13 insertions(+), 15 deletions(-) diff --git a/app/controllers/api/v1/completions_controller.rb b/app/controllers/api/v1/completions_controller.rb index 65ed379..e389d54 100644 --- a/app/controllers/api/v1/completions_controller.rb +++ b/app/controllers/api/v1/completions_controller.rb @@ -8,11 +8,11 @@ class CompletionsController < ApplicationController def create @chat = @user.chats.create(account: @account) - max_tokens = (completion_params[:max_tokens] || ENV["LLM_MAX_TOKENS"]).to_i - model = (completion_params[:model] || ENV["LLM_MODEL"]).to_i - temperature = (completion_params[:temperature] || ENV["LLM_TEMPERATURE"]).to_f - top_k = (completion_params[:top_k] || ENV["LLM_TOP_K"]).to_f - top_p = (completion_params[:top_p] || ENV["LLM_TOP_P"]).to_f + max_tokens = completion_params[:max_tokens]&.to_i + model = completion_params[:model] + temperature = completion_params[:temperature]&.to_f + top_k = completion_params[:top_k]&.to_f + top_p = completion_params[:top_p]&.to_f if completion_params[:messages].present? completion_params[:messages].each do |message_params| diff --git a/app/models/chat/completionable.rb b/app/models/chat/completionable.rb index 0aa5063..b772d30 100644 --- a/app/models/chat/completionable.rb +++ b/app/models/chat/completionable.rb @@ -7,7 +7,13 @@ def ai_provider end end - def complete(model:, temperature:, top_k:, top_p:, max_tokens:, &block) + def complete(model: nil, temperature: nil, top_k: nil, top_p: nil, max_tokens: nil, &block) + max_tokens ||= ENV.fetch("LLM_MAX_TOKENS", 1_024).to_i + model ||= ENV.fetch("LLM_MODEL", "mixtral") + temperature ||= ENV.fetch("LLM_TEMPERATURE", 0.1).to_f + top_k ||= ENV.fetch("LLM_TOP_K", 40).to_f + top_p ||= ENV.fetch("LLM_TOP_P", 0.9).to_f + case Chat.ai_provider when "ollama" complete_with_ollama(top_k:, top_p:, &block) @@ -25,12 +31,4 @@ def retrieval_fetch_k def system_prompt ENV.fetch("RAG_SYSTEM_TEMPLATE", "You are Nosia. You are a helpful assistant.") end - - def top_k - ENV.fetch("LLM_TOP_K", 40).to_f - end - - def top_p - ENV.fetch("LLM_TOP_P", 0.9).to_f - end end diff --git a/app/models/chat/infomaniak.rb b/app/models/chat/infomaniak.rb index df30a2c..904f62b 100644 --- a/app/models/chat/infomaniak.rb +++ b/app/models/chat/infomaniak.rb @@ -2,7 +2,7 @@ module Chat::Infomaniak extend ActiveSupport::Concern class_methods do - def new_infomaniak_llm(model: ENV.fetch("LLM_MODEL", "mixtral")) + def new_infomaniak_llm(model:) Langchain::LLM::OpenAI.new( api_key: ENV.fetch("INFOMANIAK_API_KEY", ""), llm_options: { From 139b94dc21511f0b386180fa77108e0b4879ccdc Mon Sep 17 00:00:00 2001 From: Cyril Blaecke <1692273+cbldev@users.noreply.github.com> Date: Thu, 19 Dec 2024 21:53:52 +0000 Subject: [PATCH 6/6] Prepare Infomaniak compatibility and multi-accounts --- app/models/chat/completionable.rb | 32 ++++++++++++++++------ app/models/chat/infomaniak.rb | 44 +++++++++++++++++-------------- 2 files changed, 48 insertions(+), 28 deletions(-) diff --git a/app/models/chat/completionable.rb b/app/models/chat/completionable.rb index b772d30..53c0453 100644 --- a/app/models/chat/completionable.rb +++ b/app/models/chat/completionable.rb @@ -8,22 +8,38 @@ def ai_provider end def complete(model: nil, temperature: nil, top_k: nil, top_p: nil, max_tokens: nil, &block) - max_tokens ||= ENV.fetch("LLM_MAX_TOKENS", 1_024).to_i - model ||= ENV.fetch("LLM_MODEL", "mixtral") - temperature ||= ENV.fetch("LLM_TEMPERATURE", 0.1).to_f - top_k ||= ENV.fetch("LLM_TOP_K", 40).to_f - top_p ||= ENV.fetch("LLM_TOP_P", 0.9).to_f + options = default_options.merge( + { + model:, + temperature:, + top_k:, + top_p:, + max_tokens: + }.compact_blank + ) - case Chat.ai_provider + case self.class.ai_provider when "ollama" - complete_with_ollama(top_k:, top_p:, &block) + complete_with_ollama(top_k: options[:top_k], top_p: options[:top_p], &block) when "infomaniak" - complete_with_infomaniak(model:, temperature:, top_p:, max_tokens:, &block) + complete_with_infomaniak(model: options[:model], temperature: options[:temperature], top_p: options[:top_p], max_tokens: options[:max_tokens], &block) + else + raise "Unsupported AI provider: #{self.class.ai_provider}" end end private + def default_options + { + max_tokens: ENV.fetch("LLM_MAX_TOKENS", 1_024).to_i, + model: ENV.fetch("LLM_MODEL", "mixtral"), + temperature: ENV.fetch("LLM_TEMPERATURE", 0.1).to_f, + top_k: ENV.fetch("LLM_TOP_K", 40).to_f, + top_p: ENV.fetch("LLM_TOP_P", 0.9).to_f + } + end + def retrieval_fetch_k ENV.fetch("RETRIEVAL_FETCH_K", 4) end diff --git a/app/models/chat/infomaniak.rb b/app/models/chat/infomaniak.rb index 904f62b..5eb2946 100644 --- a/app/models/chat/infomaniak.rb +++ b/app/models/chat/infomaniak.rb @@ -23,31 +23,15 @@ def new_infomaniak_llm(model:) def complete_with_infomaniak(model:, temperature:, top_p:, max_tokens:, &block) question = last_question - context = [] - - messages_for_assistant = [] - messages_for_assistant << { role: "system", content: system_prompt } - messages_for_assistant << messages_hash if messages_hash.any? + messages_for_assistant = prepare_messages(question) assistant_response = messages.create(role: "assistant", done: false, content: "", response_number:) assistant_response.broadcast_created - retrieved_chunks = Chunk.where(account:).similarity_search(question, k: retrieval_fetch_k) - assistant_response.update(similar_document_ids: retrieved_chunks.pluck(:chunkable_id).uniq) if retrieved_chunks.any? - - if retrieved_chunks.any? - context << retrieved_chunks.map(&:content).join("\n\n") - context = context.join("\n\n") - - prompt = ENV.fetch("QUERY_PROMPT_TEMPLATE", "Nosia helpful content: {context}\n\n---\n\nNow here is the question you need to answer.\n\nQuestion: {question}") - prompt = prompt.gsub("{context}", context) - prompt = prompt.gsub("{question}", question) + context = fetch_context(question) + prompt = generate_prompt(context, question) if context.present? - messages_for_assistant.pop - messages_for_assistant << { role: "user", content: prompt } - end - - messages_for_assistant = messages_for_assistant.flatten + messages_for_assistant[-1][:content] = prompt if prompt llm = Chat.new_infomaniak_llm(model:) llm_response = llm.chat(messages: messages_for_assistant, temperature:, top_p:, max_tokens:, &block) @@ -55,4 +39,24 @@ def complete_with_infomaniak(model:, temperature:, top_p:, max_tokens:, &block) assistant_response.update(done: true, content: llm_response.completion) assistant_response end + + private + + def prepare_messages(question) + messages = [ { role: "system", content: system_prompt } ] + messages << messages_hash if messages_hash.any? + messages.flatten + end + + def fetch_context(question) + retrieved_chunks = Chunk.where(account:).similarity_search(question, k: retrieval_fetch_k) + return unless retrieved_chunks.any? + + retrieved_chunks.map(&:content).join("\n\n") + end + + def generate_prompt(context, question) + prompt_template = ENV.fetch("QUERY_PROMPT_TEMPLATE", "Nosia helpful content: {context}\n\n---\n\nNow here is the question you need to answer.\n\nQuestion: {question}") + prompt_template.gsub("{context}", context).gsub("{question}", question) + end end