Skip to content

Commit

Permalink
OpenAI chat compatible API ✨ (#56)
Browse files Browse the repository at this point in the history
  • Loading branch information
cbldev authored Nov 18, 2024
1 parent e1af1ad commit c775faa
Show file tree
Hide file tree
Showing 15 changed files with 457 additions and 4 deletions.
58 changes: 56 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,11 @@ It is designed to be easy to install and use.

## Easy to install

https://github.com/nosia-ai/nosia/assets/1692273/671ccb6a-054c-4dc2-bcd9-2b874a888548
<https://github.com/nosia-ai/nosia/assets/1692273/671ccb6a-054c-4dc2-bcd9-2b874a888548>

## Easy to use

https://github.com/nosia-ai/nosia/assets/1692273/ce60094b-abb5-4ed4-93aa-f69485e058b0
<https://github.com/nosia-ai/nosia/assets/1692273/ce60094b-abb5-4ed4-93aa-f69485e058b0>

## macOS, Debian or Ubuntu one command installation

Expand Down Expand Up @@ -110,6 +110,60 @@ cd nosia
./script/stop
```

## OpenAI chat compatible API

1. Go as a logged in user to <https://nosia.localhost/api_tokens>

2. Generate and copy your token

3. Use your favorite OpenAI chat API client by configuring API base to `https://nosia.localhost/v1` and API key with your token.

### Try the API

1. Install HTTPie CLI or use any HTTP client of your choice:

2. Try the stream API by creating a `test-stream.json` file with the following content:

```json
{"messages":[{"role":"user","content":"When Ruby 3.3.7 will be released?"}],"model":"qwen2.5","stream":true,"top_p":0.9,"top_k":40.0,"temperature":0.1}
```

3. Replace `<token>` with your token and run the following command:

```sh
http -A bearer -a <token> --stream POST https://nosia.localhost/v1/completions < test-stream.json
```

4. Try the API without streaming by creating a `test-non-stream.json` file with the following content:

```json
{"messages":[{"role":"user","content":"When Ruby 3.3.7 will be released?"}],"model":"qwen2.5","stream":false,"top_p":0.9,"top_k":40.0,"temperature":0.1}
```

5. Replace `<token>` with your token and run the following command:

```sh
http -A bearer -a <token> POST https://nosia.localhost/v1/completions < test-non-stream.json
```

## Continue.dev integration

1. In your `~/.continue/config.json`, configure a `nosia` model:

```json
"models": [
{
"model": "nosia",
"provider": "openai",
"apiBase": "https://nosia.localhost/v1",
"apiKey": "token",
"title": "Nosia"
}
]
```

2. Enjoy!

## Troubleshooting

If you encounter any issue:
Expand Down
217 changes: 217 additions & 0 deletions app/controllers/api/v1/completions_controller.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,217 @@
# frozen_string_literal: true

module Api
module V1
class CompletionsController < ApplicationController
include ActionController::Live

allow_unauthenticated_access only: [ :create ]
skip_before_action :verify_authenticity_token
before_action :verify_api_key
before_action :parse_params, only: [ :create ]

def create
@llm = LangchainrbRails.config.vectorsearch.llm
@uuid = SecureRandom.uuid

if @stream
stream_response
else
non_stream_response
end
rescue StandardError => e
handle_error(e)
end

private

def build_check_llm
Langchain::LLM::Ollama.new(
url: ENV.fetch("OLLAMA_BASE_URL", "http://localhost:11434"),
api_key: ENV.fetch("OLLAMA_API_KEY", ""),
default_options: {
chat_completion_model_name: ENV.fetch("CHECK_MODEL", "bespoke-minicheck"),
completion_model_name: ENV.fetch("CHECK_MODEL", "bespoke-minicheck"),
temperature: ENV.fetch("LLM_TEMPERATURE", 0.1).to_f,
num_ctx: ENV.fetch("LLM_NUM_CTX", 4_096).to_i
}
)
end

def build_context(checked_chunks)
checked_chunks.map(&:content).join("\n")
end

def build_messages(question, context)
system_message = {
role: "system",
content: ENV.fetch("RAG_SYSTEM_TEMPLATE", "You are Nosia. You are a helpful assistant.")
}

user_content = if context.present?
template = ENV.fetch(
"QUERY_PROMPT_TEMPLATE",
"Nosia helpful content: {context}\n\n---\n\nNow here is the question you need to answer.\n\nQuestion: {question}"
)
template.gsub("{context}", context).gsub("{question}", question)
else
question
end

user_message = { role: "user", content: user_content }

[ system_message, user_message ]
end

def check_context(question)
k = ENV.fetch("RETRIEVAL_FETCH_K", 4)

check_llm = build_check_llm
checked_chunks = []

search_results = Chunk.similarity_search(question, k:)
search_results.each do |search_result|
context_to_check = search_result.content
check_message = [ { role: "user", content: "Document: #{context_to_check}\nClaim: #{question}" } ]

check_llm.chat(messages: check_message, top_k: @top_k, top_p: @top_p) do |stream|
check_response = stream.raw_response.dig("message", "content")

if check_response && check_response.eql?("Yes")
checked_chunks << search_result
end
end
end

checked_chunks
rescue StandardError => e
handle_error(e)
[]
end

def handle_error(e)
error_message = {
error: {
message: e.message,
backtrace: Rails.env.development? ? e.backtrace : nil
}
}

if @stream
response.stream.write("data: #{error_message.to_json}\n\n")
response.stream.write("data: [DONE]\n\n")
else
render json: error_message, status: :internal_server_error
end
ensure
response.stream.close if @stream
end

def non_stream_response
checked_chunks = check_context(@question)
context = build_context(checked_chunks)
messages = build_messages(@question, context)

content = ""
data = {}

@llm.chat(messages:, top_k: @top_k, top_p: @top_p) do |stream|
stream_content = stream.raw_response.dig("message", "content")
content += stream_content if stream_content

done = !!stream.raw_response["done"]

if done
data = {
choices: [
finish_reason: "stop",
index: 0,
message: {
content: content,
role: "assistant"
}
],
created: Time.now.to_i,
id: "chatcmpl-#{@uuid}",
model: "nosia:#{ENV["LLM_MODEL"]}",
object: "chat.completion",
system_fingerprint: "fp_nosia"
}
end
end

render json: data
end

def parse_params
params.permit(
:max_tokens,
:model,
:prompt,
:stream,
:top_p,
:top_k,
:temperature,
messages: [
:content,
:role
],
stop: [],
chat: {},
completion: {},
)

@question = params[:prompt] || params.dig(:messages, 0, :content)
@stream = ActiveModel::Type::Boolean.new.cast(params[:stream]) || false
@top_p = params[:top_p].to_f || ENV.fetch("LLM_TOP_P", 0.9).to_f
@top_k = params[:top_k].to_i || ENV.fetch("LLM_TOP_K", 40)
end

def stream_response
checked_chunks = check_context(@question)
context = build_context(checked_chunks)
messages = build_messages(@question, context)

response.headers["Content-Type"] = "text/event-stream"

@llm.chat(messages:, top_k: @top_k, top_p: @top_p) do |stream|
stream_content = stream.raw_response.dig("message", "content")
next unless stream_content

done = !!stream.raw_response["done"]

if done
response.stream.write("data: [DONE]\n\n")
else
data = {
choices: [
delta: {
content: stream_content,
role: "assistant"
},
finish_reason: done ? "stop" : nil,
index: 0
],
created: Time.now.to_i,
id: "chatcmpl-#{@uuid}",
model: "nosia:#{ENV["LLM_MODEL"]}",
object: "chat.completion.chunk",
system_fingerprint: "fp_nosia"
}

response.stream.write("data: #{data.to_json}\n\n")
end
end
ensure
response.stream.close if response.stream.respond_to?(:close)
end

def verify_api_key
authenticate_or_request_with_http_token do |token, _options|
api_token = ApiToken.find_by(token:)
@user = api_token&.user
end
end
end
end
end
29 changes: 29 additions & 0 deletions app/controllers/api_tokens_controller.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
# frozen_string_literal: true

class ApiTokensController < ApplicationController
def index
@api_tokens = Current.user.api_tokens
end

def create
@api_token = Current.user.api_tokens.new(api_token_params)

if @api_token.save
redirect_to api_tokens_path
else
redirect_to api_tokens_path, alert: "An error occured."
end
end

def destroy
@api_token = Current.user.api_tokens.find(params[:id])
@api_token.destroy
redirect_to api_tokens_path
end

private

def api_token_params
params.require(:api_token).permit(:name)
end
end
4 changes: 4 additions & 0 deletions app/javascript/controllers/application.js
Original file line number Diff line number Diff line change
@@ -1,6 +1,10 @@
import { Application } from "@hotwired/stimulus"
import Clipboard from "@stimulus-components/clipboard"
import RevealController from "@stimulus-components/reveal"

const application = Application.start()
application.register('clipboard', Clipboard)
application.register('reveal', RevealController)

// Configure Stimulus development experience
application.debug = false
Expand Down
7 changes: 7 additions & 0 deletions app/models/api_token.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
class ApiToken < ApplicationRecord
belongs_to :user

has_secure_token :token

validates :name, presence: true
end
1 change: 1 addition & 0 deletions app/models/user.rb
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
class User < ApplicationRecord
has_secure_password

has_many :api_tokens, dependent: :destroy
has_many :chats, dependent: :destroy
has_many :credentials, dependent: :destroy
has_many :sessions, dependent: :destroy
Expand Down
16 changes: 16 additions & 0 deletions app/views/api_tokens/_form.html.erb
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
<%= form_with(model: api_token, class: "flex items-center space-x-4 h-12") do |form| %>
<%= form.label :name, "Name of your API token" %>
<%= form.text_field :name,
required: true,
class:
"
resize-none rounded-xl border-none bg-slate-200 text-sm text-slate-900
focus:outline-none focus:ring-2 focus:ring-blue-500 dark:bg-slate-800
dark:text-slate-200 dark:placeholder-slate-400 dark:focus:ring-blue-500
sm:text-base
" %>

<%= form.submit "Generate",
class:
"cursor-pointer rounded-lg bg-blue-700 px-4 py-2 text-sm font-medium text-slate-200 hover:bg-blue-800 focus:outline-none focus:ring-4 focus:ring-blue-300 dark:bg-blue-600 dark:hover:bg-blue-700 dark:focus:ring-blue-800 sm:text-base disabled:bg-gray-700" %>
<% end %>
Loading

0 comments on commit c775faa

Please sign in to comment.