OpenAI chat compatible API ✨ (#56)

nosia-ai · Nov 18, 2024 · c775faa · c775faa
1 parent e1af1ad
commit c775faa
Show file tree

Hide file tree

Showing 15 changed files with 457 additions and 4 deletions.
diff --git a/README.md b/README.md
@@ -5,11 +5,11 @@ It is designed to be easy to install and use.
 
 ## Easy to install
 
-https://github.com/nosia-ai/nosia/assets/1692273/671ccb6a-054c-4dc2-bcd9-2b874a888548
+<https://github.com/nosia-ai/nosia/assets/1692273/671ccb6a-054c-4dc2-bcd9-2b874a888548>
 
 ## Easy to use
 
-https://github.com/nosia-ai/nosia/assets/1692273/ce60094b-abb5-4ed4-93aa-f69485e058b0
+<https://github.com/nosia-ai/nosia/assets/1692273/ce60094b-abb5-4ed4-93aa-f69485e058b0>
 
 ## macOS, Debian or Ubuntu one command installation
 
@@ -110,6 +110,60 @@ cd nosia
 ./script/stop
 ```
 
+## OpenAI chat compatible API
+
+1. Go as a logged in user to <https://nosia.localhost/api_tokens>
+
+2. Generate and copy your token
+
+3. Use your favorite OpenAI chat API client by configuring API base to `https://nosia.localhost/v1` and API key with your token.
+
+### Try the API
+
+1. Install HTTPie CLI or use any HTTP client of your choice:
+
+2. Try the stream API by creating a `test-stream.json` file with the following content:
+
+```json
+{"messages":[{"role":"user","content":"When Ruby 3.3.7 will be released?"}],"model":"qwen2.5","stream":true,"top_p":0.9,"top_k":40.0,"temperature":0.1}
+```
+
+3. Replace `<token>` with your token and run the following command:
+
+```sh
+http -A bearer -a <token> --stream POST https://nosia.localhost/v1/completions < test-stream.json
+```
+
+4. Try the API without streaming by creating a `test-non-stream.json` file with the following content:
+
+```json
+{"messages":[{"role":"user","content":"When Ruby 3.3.7 will be released?"}],"model":"qwen2.5","stream":false,"top_p":0.9,"top_k":40.0,"temperature":0.1}
+```
+
+5. Replace `<token>` with your token and run the following command:
+
+```sh
+http -A bearer -a <token> POST https://nosia.localhost/v1/completions < test-non-stream.json
+```
+
+## Continue.dev integration
+
+1. In your `~/.continue/config.json`, configure a `nosia` model:
+
+```json
+  "models": [
+    {
+      "model": "nosia",
+      "provider": "openai",
+      "apiBase": "https://nosia.localhost/v1",
+      "apiKey": "token",
+      "title": "Nosia"
+    }
+  ]
+```
+
+2. Enjoy!
+
 ## Troubleshooting
 
 If you encounter any issue:

diff --git a/app/controllers/api/v1/completions_controller.rb b/app/controllers/api/v1/completions_controller.rb
@@ -0,0 +1,217 @@
+# frozen_string_literal: true
+
+module Api
+  module V1
+    class CompletionsController < ApplicationController
+      include ActionController::Live
+
+      allow_unauthenticated_access only: [ :create ]
+      skip_before_action :verify_authenticity_token
+      before_action :verify_api_key
+      before_action :parse_params, only: [ :create ]
+
+      def create
+        @llm = LangchainrbRails.config.vectorsearch.llm
+        @uuid = SecureRandom.uuid
+
+        if @stream
+          stream_response
+        else
+          non_stream_response
+        end
+      rescue StandardError => e
+        handle_error(e)
+      end
+
+      private
+
+      def build_check_llm
+        Langchain::LLM::Ollama.new(
+          url: ENV.fetch("OLLAMA_BASE_URL", "http://localhost:11434"),
+          api_key: ENV.fetch("OLLAMA_API_KEY", ""),
+          default_options: {
+            chat_completion_model_name: ENV.fetch("CHECK_MODEL", "bespoke-minicheck"),
+            completion_model_name: ENV.fetch("CHECK_MODEL", "bespoke-minicheck"),
+            temperature: ENV.fetch("LLM_TEMPERATURE", 0.1).to_f,
+            num_ctx: ENV.fetch("LLM_NUM_CTX", 4_096).to_i
+          }
+        )
+      end
+
+      def build_context(checked_chunks)
+        checked_chunks.map(&:content).join("\n")
+      end
+
+      def build_messages(question, context)
+        system_message = {
+          role: "system",
+          content: ENV.fetch("RAG_SYSTEM_TEMPLATE", "You are Nosia. You are a helpful assistant.")
+        }
+
+        user_content = if context.present?
+          template = ENV.fetch(
+            "QUERY_PROMPT_TEMPLATE",
+            "Nosia helpful content: {context}\n\n---\n\nNow here is the question you need to answer.\n\nQuestion: {question}"
+          )
+          template.gsub("{context}", context).gsub("{question}", question)
+        else
+          question
+        end
+
+        user_message = { role: "user", content: user_content }
+
+        [ system_message, user_message ]
+      end
+
+      def check_context(question)
+        k = ENV.fetch("RETRIEVAL_FETCH_K", 4)
+
+        check_llm = build_check_llm
+        checked_chunks = []
+
+        search_results = Chunk.similarity_search(question, k:)
+        search_results.each do |search_result|
+          context_to_check = search_result.content
+          check_message = [ { role: "user", content: "Document: #{context_to_check}\nClaim: #{question}" } ]
+
+          check_llm.chat(messages: check_message, top_k: @top_k, top_p: @top_p) do |stream|
+            check_response = stream.raw_response.dig("message", "content")
+
+            if check_response && check_response.eql?("Yes")
+              checked_chunks << search_result
+            end
+          end
+        end
+
+        checked_chunks
+      rescue StandardError => e
+        handle_error(e)
+        []
+      end
+
+      def handle_error(e)
+        error_message = {
+          error: {
+            message: e.message,
+            backtrace: Rails.env.development? ? e.backtrace : nil
+          }
+        }
+
+        if @stream
+          response.stream.write("data: #{error_message.to_json}\n\n")
+          response.stream.write("data: [DONE]\n\n")
+        else
+          render json: error_message, status: :internal_server_error
+        end
+      ensure
+        response.stream.close if @stream
+      end
+
+      def non_stream_response
+        checked_chunks = check_context(@question)
+        context = build_context(checked_chunks)
+        messages = build_messages(@question, context)
+
+        content = ""
+        data = {}
+
+        @llm.chat(messages:, top_k: @top_k, top_p: @top_p) do |stream|
+          stream_content = stream.raw_response.dig("message", "content")
+          content += stream_content if stream_content
+
+          done = !!stream.raw_response["done"]
+
+          if done
+            data = {
+              choices: [
+                finish_reason: "stop",
+                index: 0,
+                message: {
+                  content: content,
+                  role: "assistant"
+                }
+              ],
+              created: Time.now.to_i,
+              id: "chatcmpl-#{@uuid}",
+              model: "nosia:#{ENV["LLM_MODEL"]}",
+              object: "chat.completion",
+              system_fingerprint: "fp_nosia"
+            }
+          end
+        end
+
+        render json: data
+      end
+
+      def parse_params
+        params.permit(
+          :max_tokens,
+          :model,
+          :prompt,
+          :stream,
+          :top_p,
+          :top_k,
+          :temperature,
+          messages: [
+            :content,
+            :role
+          ],
+          stop: [],
+          chat: {},
+          completion: {},
+        )
+
+        @question = params[:prompt] || params.dig(:messages, 0, :content)
+        @stream = ActiveModel::Type::Boolean.new.cast(params[:stream]) || false
+        @top_p = params[:top_p].to_f || ENV.fetch("LLM_TOP_P", 0.9).to_f
+        @top_k = params[:top_k].to_i || ENV.fetch("LLM_TOP_K", 40)
+      end
+
+      def stream_response
+        checked_chunks = check_context(@question)
+        context = build_context(checked_chunks)
+        messages = build_messages(@question, context)
+
+        response.headers["Content-Type"] = "text/event-stream"
+
+        @llm.chat(messages:, top_k: @top_k, top_p: @top_p) do |stream|
+          stream_content = stream.raw_response.dig("message", "content")
+          next unless stream_content
+
+          done = !!stream.raw_response["done"]
+
+          if done
+            response.stream.write("data: [DONE]\n\n")
+          else
+            data = {
+              choices: [
+                delta: {
+                  content: stream_content,
+                  role: "assistant"
+                },
+                finish_reason: done ? "stop" : nil,
+                index: 0
+              ],
+              created: Time.now.to_i,
+              id: "chatcmpl-#{@uuid}",
+              model: "nosia:#{ENV["LLM_MODEL"]}",
+              object: "chat.completion.chunk",
+              system_fingerprint: "fp_nosia"
+            }
+
+            response.stream.write("data: #{data.to_json}\n\n")
+          end
+        end
+      ensure
+        response.stream.close if response.stream.respond_to?(:close)
+      end
+
+      def verify_api_key
+        authenticate_or_request_with_http_token do |token, _options|
+          api_token = ApiToken.find_by(token:)
+          @user = api_token&.user
+        end
+      end
+    end
+  end
+end
diff --git a/app/controllers/api_tokens_controller.rb b/app/controllers/api_tokens_controller.rb
@@ -0,0 +1,29 @@
+# frozen_string_literal: true
+
+class ApiTokensController < ApplicationController
+  def index
+    @api_tokens = Current.user.api_tokens
+  end
+
+  def create
+    @api_token = Current.user.api_tokens.new(api_token_params)
+
+    if @api_token.save
+      redirect_to api_tokens_path
+    else
+      redirect_to api_tokens_path, alert: "An error occured."
+    end
+  end
+
+  def destroy
+    @api_token = Current.user.api_tokens.find(params[:id])
+    @api_token.destroy
+    redirect_to api_tokens_path
+  end
+
+  private
+
+  def api_token_params
+    params.require(:api_token).permit(:name)
+  end
+end
diff --git a/app/javascript/controllers/application.js b/app/javascript/controllers/application.js
@@ -1,6 +1,10 @@
 import { Application } from "@hotwired/stimulus"
+import Clipboard from "@stimulus-components/clipboard"
+import RevealController from "@stimulus-components/reveal"
 
 const application = Application.start()
+application.register('clipboard', Clipboard)
+application.register('reveal', RevealController)
 
 // Configure Stimulus development experience
 application.debug = false

diff --git a/app/models/api_token.rb b/app/models/api_token.rb
@@ -0,0 +1,7 @@
+class ApiToken < ApplicationRecord
+  belongs_to :user
+
+  has_secure_token :token
+
+  validates :name, presence: true
+end
diff --git a/app/models/user.rb b/app/models/user.rb
@@ -1,6 +1,7 @@
 class User < ApplicationRecord
   has_secure_password
 
+  has_many :api_tokens, dependent: :destroy
   has_many :chats, dependent: :destroy
   has_many :credentials, dependent: :destroy
   has_many :sessions, dependent: :destroy

diff --git a/app/views/api_tokens/_form.html.erb b/app/views/api_tokens/_form.html.erb
@@ -0,0 +1,16 @@
+<%= form_with(model: api_token, class: "flex items-center space-x-4 h-12") do |form| %>
+  <%= form.label :name, "Name of your API token" %>
+  <%= form.text_field :name,
+                  required: true,
+                  class:
+                    "
+                            resize-none rounded-xl border-none bg-slate-200 text-sm text-slate-900
+                            focus:outline-none focus:ring-2 focus:ring-blue-500 dark:bg-slate-800
+                            dark:text-slate-200 dark:placeholder-slate-400 dark:focus:ring-blue-500
+                            sm:text-base
+                          " %>
+
+  <%= form.submit "Generate",
+              class:
+                "cursor-pointer rounded-lg bg-blue-700 px-4 py-2 text-sm font-medium text-slate-200 hover:bg-blue-800 focus:outline-none focus:ring-4 focus:ring-blue-300 dark:bg-blue-600 dark:hover:bg-blue-700 dark:focus:ring-blue-800 sm:text-base disabled:bg-gray-700" %>
+<% end %>