Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Refactor lantern_extras SQL functions #351

Merged
merged 1 commit into from
Nov 4, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 8 additions & 10 deletions lantern_cli/src/embeddings/core/openai_runtime.rs
Original file line number Diff line number Diff line change
Expand Up @@ -188,7 +188,7 @@ pub struct OpenAiRuntime<'a> {
request_timeout: u64,
base_url: String,
headers: Vec<(String, String)>,
context: serde_json::Value,
system_prompt: serde_json::Value,
dimensions: Option<usize>,
deployment_type: OpenAiDeployment,
#[allow(dead_code)]
Expand All @@ -199,9 +199,8 @@ pub struct OpenAiRuntime<'a> {
pub struct OpenAiRuntimeParams {
pub base_url: Option<String>,
pub api_token: Option<String>,
pub azure_api_token: Option<String>,
pub azure_entra_token: Option<String>,
pub context: Option<String>,
pub system_prompt: Option<String>,
pub dimensions: Option<usize>,
}

Expand All @@ -223,15 +222,14 @@ impl<'a> OpenAiRuntime<'a> {
}
OpenAiDeployment::Azure => {
// https://learn.microsoft.com/en-us/azure/ai-services/openai/reference
if runtime_params.azure_api_token.is_none()
&& runtime_params.azure_entra_token.is_none()
if runtime_params.api_token.is_none() && runtime_params.azure_entra_token.is_none()
{
anyhow::bail!(
"'azure_api_key' or 'azure_entra_id' is required for Azure OpenAi runtime"
"'api_token' or 'azure_entra_id' is required for Azure OpenAi runtime"
);
}

if let Some(key) = runtime_params.azure_api_token {
if let Some(key) = runtime_params.api_token {
("api-key".to_owned(), format!("{}", key))
} else {
(
Expand All @@ -242,7 +240,7 @@ impl<'a> OpenAiRuntime<'a> {
}
};

let context = match &runtime_params.context {
let system_prompt = match &runtime_params.system_prompt {
Some(system_prompt) => json!({ "role": "system", "content": system_prompt.clone()}),
None => json!({ "role": "system", "content": "" }),
};
Expand All @@ -257,7 +255,7 @@ impl<'a> OpenAiRuntime<'a> {
auth_header,
],
dimensions: runtime_params.dimensions,
context,
system_prompt,
})
}

Expand Down Expand Up @@ -388,7 +386,7 @@ impl<'a> OpenAiRuntime<'a> {
serde_json::to_string(&json!({
"model": model_name,
"messages": [
self.context,
self.system_prompt,
{ "role": "user", "content": query }
]
}))?,
Expand Down
2 changes: 1 addition & 1 deletion lantern_cli/tests/daemon_completion_test_with_db.rs
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ async fn test_daemon_completion_init_job() {
('Test5');

INSERT INTO _lantern_extras_internal.embedding_generation_jobs ("id", "table", src_column, dst_column, embedding_model, runtime, runtime_params, job_type, column_type)
VALUES (1, '{CLIENT_TABLE_NAME}', 'title', 'num', 'openai/gpt-4o', 'openai', '{{"api_token": "{api_token}", "context": "Given text testN, return the N as number without any quotes, so for Test1 you should return 1, Test105 you should return 105" }}', 'completion', 'INT');
VALUES (1, '{CLIENT_TABLE_NAME}', 'title', 'num', 'openai/gpt-4o', 'openai', '{{"api_token": "{api_token}", "system_prompt": "Given text testN, return the N as number without any quotes, so for Test1 you should return 1, Test105 you should return 105" }}', 'completion', 'INT');
"#
))
.await
Expand Down
6 changes: 3 additions & 3 deletions lantern_cli/tests/embedding_test_with_db.rs
Original file line number Diff line number Diff line change
Expand Up @@ -163,7 +163,7 @@ async fn test_openai_completion_from_db() {
limit: Some(10),
filter: None,
runtime: Runtime::OpenAi,
runtime_params: format!(r#"{{"api_token": "{api_token}", "context": "you will be given text, return postgres array of TEXT[] by splitting the text by characters skipping spaces. Example 'te st' -> {{t,e,s,t}} . Do not put tailing commas, do not put double or single quotes around characters" }}"#),
runtime_params: format!(r#"{{"api_token": "{api_token}", "system_prompt": "you will be given text, return postgres array of TEXT[] by splitting the text by characters skipping spaces. Example 'te st' -> {{t,e,s,t}} . Do not put tailing commas, do not put double or single quotes around characters" }}"#),
create_column: true,
stream: true,
job_type: Some(EmbeddingJobType::Completion),
Expand Down Expand Up @@ -241,7 +241,7 @@ async fn test_openai_completion_special_chars_from_db() {
limit: Some(2),
filter: None,
runtime: Runtime::OpenAi,
runtime_params: format!(r#"{{"api_token": "{api_token}", "context": "for any input return multi line text which will contain escape characters which can potentially break postgres COPY" }}"#),
runtime_params: format!(r#"{{"api_token": "{api_token}", "system_prompt": "for any input return multi line text which will contain escape characters which can potentially break postgres COPY" }}"#),
create_column: true,
stream: true,
job_type: Some(EmbeddingJobType::Completion),
Expand Down Expand Up @@ -319,7 +319,7 @@ async fn test_openai_completion_failed_rows_from_db() {
limit: Some(10),
filter: None,
runtime: Runtime::OpenAi,
runtime_params: format!(r#"{{"api_token": "{api_token}", "context": "you will be given text, return array by splitting the text by characters skipping spaces. Example 'te st' -> [t,e,s,t]" }}"#),
runtime_params: format!(r#"{{"api_token": "{api_token}", "system_prompt": "you will be given text, return array by splitting the text by characters skipping spaces. Example 'te st' -> [t,e,s,t]" }}"#),
create_column: true,
stream: true,
job_type: Some(EmbeddingJobType::Completion),
Expand Down
6 changes: 3 additions & 3 deletions lantern_cli/tests/query_completion_test.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
use lantern_cli::embeddings::core::{EmbeddingRuntime, Runtime};
use std::env;

static LLM_CONTEXT: &'static str = "You will be provided JSON with the following schema: {x: string}, answer to the message returning the x propery from the provided JSON object";
static LLM_SYSTEM_PROMPT: &'static str = "You will be provided JSON with the following schema: {x: string}, answer to the message returning the x propery from the provided JSON object";

macro_rules! query_completion_test {
($($name:ident: $value:expr,)*) => {
Expand All @@ -19,7 +19,7 @@ macro_rules! query_completion_test {
return;
}

let params = format!(r#"{{"api_token": "{api_token}", "context": "{LLM_CONTEXT}"}}"#);
let params = format!(r#"{{"api_token": "{api_token}", "system_prompt": "{LLM_SYSTEM_PROMPT}"}}"#);

let runtime = EmbeddingRuntime::new(&runtime_name, None, &params).unwrap();
let output = runtime.completion(
Expand Down Expand Up @@ -62,7 +62,7 @@ macro_rules! query_completion_test_multiple {
expected_output.push(output);
}

let params = format!(r#"{{"api_token": "{api_token}", "context": "{LLM_CONTEXT}"}}"#);
let params = format!(r#"{{"api_token": "{api_token}", "system_prompt": "{LLM_SYSTEM_PROMPT}"}}"#);

let runtime = EmbeddingRuntime::new(&runtime_name, None, &params).unwrap();
let output = runtime.batch_completion(
Expand Down
84 changes: 59 additions & 25 deletions lantern_extras/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -42,14 +42,34 @@ FROM papers;
-- generate embeddings from other models which can be extended

```sql
SELECT llm_embedding(
input => 'User input', -- User prompt to LLM model
model => 'gpt-4o', -- Model for runtime to use (default: 'gpt-4o')
base_url => 'https://api.openai.com', -- If you have custom LLM deployment provide the server url. (default: OpenAi API URL)
api_token => '<llm_api_token>', -- API token for LLM server. (default: inferred from lantern_extras.llm_token GUC)
azure_entra_token => '', -- If this is Azure deployment it supports Auth with entra token too
dimensions => 1536, -- For new generation OpenAi models you can provide dimensions for returned embeddings. (default: 1536)
input_type => 'search_query', -- Needed only for cohere runtime to indicate if this input is for search or storing. (default: 'search_query'). Can also be 'search_document'
runtime => 'openai' -- Runtime to use. (default: 'openai'). Use `SELECT get_available_runtimes()` for list
);

-- generate text embedding
SELECT text_embedding('BAAI/bge-base-en', 'My text input');
SELECT llm_embedding(model => 'BAAI/bge-base-en', input => 'My text input', runtime => 'ort');
-- generate image embedding with image url
SELECT image_embedding('clip/ViT-B-32-visual', 'https://link-to-your-image');
SELECT llm_embedding(model => 'clip/ViT-B-32-visual', input => 'https://link-to-your-image', runtime => 'ort');
-- generate image embedding with image path (this path should be accessible from postgres server)
SELECT image_embedding('clip/ViT-B-32-visual', '/path/to/image/in-postgres-server');
SELECT llm_embedding(model => 'clip/ViT-B-32-visual', input => '/path/to/image/in-postgres-server', runtime => 'ort');
-- get available list of models
SELECT get_available_models();
-- generate openai embeddings
SELECT llm_embedding(model => 'text-embedding-3-small', api_token => '<openai_api_token>', input => 'My text input', runtime => 'openai');
-- generate embeddings from custom openai compatible servers
SELECT llm_embedding(model => 'intfloat/e5-mistral-7b-instruct', api_token => '<api_token>', input => 'My text input', runtime => 'openai', base_url => 'https://my-llm-url');
-- generate cohere embeddings
SELECT llm_embedding(model => 'embed-multilingual-light-v3.0', api_token => '<cohere_api_token>', input => 'My text input', runtime => 'cohere');
-- api_token can be set via GUC
SET lantern_extras.llm_token = '<api_token>';
SELECT llm_embedding(model => 'text-embedding-3-small', input => 'My text input', runtime => 'openai');
```

## Getting started
Expand Down Expand Up @@ -135,7 +155,7 @@ To add new textual or visual models for generating vector embeddings you can fol
After this your model should be callable from SQL like

```sql
SELECT text_embedding('your/model_name', 'Your text');
SELECT llm_embedding(model => 'your/model_name', input => 'Your text', runtime => 'ort');
```

## Lantern Daemon in SQL
Expand All @@ -158,14 +178,18 @@ To add a new embedding job, use the `add_embedding_job` function:

```sql
SELECT add_embedding_job(
'table_name', -- Name of the table
'src_column', -- Source column for embeddings
'dst_column', -- Destination column for embeddings
'embedding_model', -- Embedding model to use
'runtime', -- Runtime environment (default: 'ort')
'runtime_params', -- Runtime parameters (default: '{}')
'pk', -- Primary key column (default: 'id')
'schema' -- Schema name (default: 'public')
table => 'articles', -- Name of the table
src_column => 'content', -- Source column for embeddings
dst_column => 'content_embedding', -- Destination column for embeddings (will be created automatically)
model => 'text-embedding-3-small', -- Model for runtime to use (default: 'text-embedding-3-small')
pk => 'id', -- Primary key of the table. It is required for table to have primary key (default: id)
schema => 'public', -- Schema on which the table is located (default: 'public')
base_url => 'https://api.openai.com', -- If you have custom LLM deployment provide the server url. (default: OpenAi API URL)
batch_size => 500, -- Batch size for the inputs to use when requesting LLM server. This is based on your API tier. (default: determined based on model and runtime)
dimensions => 1536, -- For new generation OpenAi models you can provide dimensions for returned embeddings. (default: 1536)
api_token => '<llm_api_token>', -- API token for LLM server. (default: inferred from lantern_extras.llm_token GUC)
azure_entra_token => '', -- If this is Azure deployment it supports Auth with entra token too
runtime => 'openai' -- Runtime to use. (default: 'openai'). Use `SELECT get_available_runtimes()` for list
);
```

Expand Down Expand Up @@ -200,17 +224,19 @@ To add a new completion job, use the `add_completion_job` function:

```sql
SELECT add_completion_job(
'table_name', -- Name of the table
'src_column', -- Source column for embeddings
'dst_column', -- Destination column for embeddings
'context', -- System prompt to be used for LLM (default: lantern_extras.completion_context GUC)
'column_type', -- Target column type to be used for destination (default: TEXT)
'model', -- LLM model to use (default: 'gpt-4o')
'batch_size', -- Batch size to use when sending batch requests (default: 2)
'runtime', -- Runtime environment (default: 'openai')
'runtime_params', -- Runtime parameters (default: '{}' inferred from GUC variables)
'pk', -- Primary key column (default: 'id')
'schema' -- Schema name (default: 'public')
table => 'articles', -- Name of the table
src_column => 'content', -- Source column for embeddings
dst_column => 'content_summary', -- Destination column for llm response (will be created automatically)
system_prompt => 'Provide short summary for the given text', -- System prompt for LLM (default: '')
column_type => 'TEXT', -- Destination column type
model => 'gpt-4o', -- Model for runtime to use (default: 'gpt-4o')
pk => 'id', -- Primary key of the table. It is required for table to have primary key (default: id)
schema => 'public', -- Schema on which the table is located (default: 'public')
base_url => 'https://api.openai.com', -- If you have custom LLM deployment provide the server url. (default: OpenAi API URL)
batch_size => 10, -- Batch size for the inputs to use when requesting LLM server. This is based on your API tier. (default: determined based on model and runtime)
api_token => '<llm_api_token>', -- API token for LLM server. (default: inferred from lantern_extras.llm_token GUC)
azure_entra_token => '', -- If this is Azure deployment it supports Auth with entra token too
runtime => 'openai' -- Runtime to use. (default: 'openai'). Use `SELECT get_available_runtimes()` for list
);
```

Expand Down Expand Up @@ -258,6 +284,14 @@ This will return a table with the following columns:

***Calling LLM Completion API***
```sql
SET lantern_extras.llm_token='xxxx';
SELECT llm_completion(query, [model, context, base_url, runtime]);
SET lantern_extras.llm_token='xxxx'; -- this will be used as api_token if it is not passed via arguments
SELECT llm_completion(
user_prompt => 'User input', -- User prompt to LLM model
model => 'gpt-4o', -- Model for runtime to use (default: 'gpt-4o')
system_prompt => 'Provide short summary for the given text', -- System prompt for LLM (default: '')
base_url => 'https://api.openai.com', -- If you have custom LLM deployment provide the server url. (default: OpenAi API URL)
api_token => '<llm_api_token>', -- API token for LLM server. (default: inferred from lantern_extras.llm_token GUC)
azure_entra_token => '', -- If this is Azure deployment it supports Auth with entra token too
runtime => 'openai' -- Runtime to use. (default: 'openai'). Use `SELECT get_available_runtimes()` for list
);
```
Loading
Loading