lanterndata · var77 · Nov 4, 2024 · Nov 1, 2024
diff --git a/lantern_cli/src/embeddings/core/openai_runtime.rs b/lantern_cli/src/embeddings/core/openai_runtime.rs
@@ -188,7 +188,7 @@ pub struct OpenAiRuntime<'a> {
     request_timeout: u64,
     base_url: String,
     headers: Vec<(String, String)>,
-    context: serde_json::Value,
+    system_prompt: serde_json::Value,
     dimensions: Option<usize>,
     deployment_type: OpenAiDeployment,
     #[allow(dead_code)]
@@ -199,9 +199,8 @@ pub struct OpenAiRuntime<'a> {
 pub struct OpenAiRuntimeParams {
     pub base_url: Option<String>,
     pub api_token: Option<String>,
-    pub azure_api_token: Option<String>,
     pub azure_entra_token: Option<String>,
-    pub context: Option<String>,
+    pub system_prompt: Option<String>,
     pub dimensions: Option<usize>,
 }
 
@@ -223,15 +222,14 @@ impl<'a> OpenAiRuntime<'a> {
             }
             OpenAiDeployment::Azure => {
                 // https://learn.microsoft.com/en-us/azure/ai-services/openai/reference
-                if runtime_params.azure_api_token.is_none()
-                    && runtime_params.azure_entra_token.is_none()
+                if runtime_params.api_token.is_none() && runtime_params.azure_entra_token.is_none()
                 {
                     anyhow::bail!(
-                        "'azure_api_key' or 'azure_entra_id' is required for Azure OpenAi runtime"
+                        "'api_token' or 'azure_entra_id' is required for Azure OpenAi runtime"
                     );
                 }
 
-                if let Some(key) = runtime_params.azure_api_token {
+                if let Some(key) = runtime_params.api_token {
                     ("api-key".to_owned(), format!("{}", key))
                 } else {
                     (
@@ -242,7 +240,7 @@ impl<'a> OpenAiRuntime<'a> {
             }
         };
 
-        let context = match &runtime_params.context {
+        let system_prompt = match &runtime_params.system_prompt {
             Some(system_prompt) => json!({ "role": "system", "content": system_prompt.clone()}),
             None => json!({ "role": "system", "content": "" }),
         };
@@ -257,7 +255,7 @@ impl<'a> OpenAiRuntime<'a> {
                 auth_header,
             ],
             dimensions: runtime_params.dimensions,
-            context,
+            system_prompt,
         })
     }
 
@@ -388,7 +386,7 @@ impl<'a> OpenAiRuntime<'a> {
             serde_json::to_string(&json!({
             "model": model_name,
             "messages": [
-              self.context,
+              self.system_prompt,
               { "role": "user", "content": query }
             ]
             }))?,

diff --git a/lantern_cli/tests/daemon_completion_test_with_db.rs b/lantern_cli/tests/daemon_completion_test_with_db.rs
@@ -28,7 +28,7 @@ async fn test_daemon_completion_init_job() {
            ('Test5');
 
     INSERT INTO _lantern_extras_internal.embedding_generation_jobs ("id", "table", src_column, dst_column, embedding_model, runtime, runtime_params, job_type, column_type)
-    VALUES (1, '{CLIENT_TABLE_NAME}', 'title', 'num', 'openai/gpt-4o', 'openai', '{{"api_token": "{api_token}", "context": "Given text testN, return the N as number without any quotes, so for Test1 you should return 1, Test105 you should return 105" }}', 'completion', 'INT');
+    VALUES (1, '{CLIENT_TABLE_NAME}', 'title', 'num', 'openai/gpt-4o', 'openai', '{{"api_token": "{api_token}", "system_prompt": "Given text testN, return the N as number without any quotes, so for Test1 you should return 1, Test105 you should return 105" }}', 'completion', 'INT');
      "#
         ))
         .await

diff --git a/lantern_cli/tests/embedding_test_with_db.rs b/lantern_cli/tests/embedding_test_with_db.rs
@@ -163,7 +163,7 @@ async fn test_openai_completion_from_db() {
             limit: Some(10),
             filter: None,
             runtime: Runtime::OpenAi,
-            runtime_params: format!(r#"{{"api_token": "{api_token}", "context": "you will be given text, return postgres array of TEXT[] by splitting the text by characters skipping spaces. Example 'te st' -> {{t,e,s,t}} . Do not put tailing commas, do not put double or single quotes around characters" }}"#),
+            runtime_params: format!(r#"{{"api_token": "{api_token}", "system_prompt": "you will be given text, return postgres array of TEXT[] by splitting the text by characters skipping spaces. Example 'te st' -> {{t,e,s,t}} . Do not put tailing commas, do not put double or single quotes around characters" }}"#),
             create_column: true,
             stream: true,
             job_type: Some(EmbeddingJobType::Completion),
@@ -241,7 +241,7 @@ async fn test_openai_completion_special_chars_from_db() {
             limit: Some(2),
             filter: None,
             runtime: Runtime::OpenAi,
-            runtime_params: format!(r#"{{"api_token": "{api_token}", "context": "for any input return multi line text which will contain escape characters which can potentially break postgres COPY" }}"#),
+            runtime_params: format!(r#"{{"api_token": "{api_token}", "system_prompt": "for any input return multi line text which will contain escape characters which can potentially break postgres COPY" }}"#),
             create_column: true,
             stream: true,
             job_type: Some(EmbeddingJobType::Completion),
@@ -319,7 +319,7 @@ async fn test_openai_completion_failed_rows_from_db() {
             limit: Some(10),
             filter: None,
             runtime: Runtime::OpenAi,
-            runtime_params: format!(r#"{{"api_token": "{api_token}", "context": "you will be given text, return array by splitting the text by characters skipping spaces. Example 'te st' -> [t,e,s,t]" }}"#),
+            runtime_params: format!(r#"{{"api_token": "{api_token}", "system_prompt": "you will be given text, return array by splitting the text by characters skipping spaces. Example 'te st' -> [t,e,s,t]" }}"#),
             create_column: true,
             stream: true,
             job_type: Some(EmbeddingJobType::Completion),

diff --git a/lantern_cli/tests/query_completion_test.rs b/lantern_cli/tests/query_completion_test.rs
@@ -1,7 +1,7 @@
 use lantern_cli::embeddings::core::{EmbeddingRuntime, Runtime};
 use std::env;
 
-static LLM_CONTEXT: &'static str = "You will be provided JSON with the following schema: {x: string}, answer to the message returning the x propery from the provided JSON object";
+static LLM_SYSTEM_PROMPT: &'static str = "You will be provided JSON with the following schema: {x: string}, answer to the message returning the x propery from the provided JSON object";
 
 macro_rules! query_completion_test {
     ($($name:ident: $value:expr,)*) => {
@@ -19,7 +19,7 @@ macro_rules! query_completion_test {
                 return;
             }
 
-            let params = format!(r#"{{"api_token": "{api_token}", "context": "{LLM_CONTEXT}"}}"#);
+            let params = format!(r#"{{"api_token": "{api_token}", "system_prompt": "{LLM_SYSTEM_PROMPT}"}}"#);
 
             let runtime = EmbeddingRuntime::new(&runtime_name, None, &params).unwrap();
             let output = runtime.completion(
@@ -62,7 +62,7 @@ macro_rules! query_completion_test_multiple {
                 expected_output.push(output);
             }
 
-            let params = format!(r#"{{"api_token": "{api_token}", "context": "{LLM_CONTEXT}"}}"#);
+            let params = format!(r#"{{"api_token": "{api_token}", "system_prompt": "{LLM_SYSTEM_PROMPT}"}}"#);
 
             let runtime = EmbeddingRuntime::new(&runtime_name, None, &params).unwrap();
             let output = runtime.batch_completion(

diff --git a/lantern_extras/README.md b/lantern_extras/README.md
@@ -42,14 +42,34 @@ FROM papers;
 -- generate embeddings from other models which can be extended
 
 ```sql
+SELECT llm_embedding(
+    input => 'User input', -- User prompt to LLM model
+    model => 'gpt-4o', -- Model for runtime to use (default: 'gpt-4o')
+    base_url => 'https://api.openai.com', -- If you have custom LLM deployment provide the server url. (default: OpenAi API URL)
+    api_token => '<llm_api_token>', -- API token for LLM server. (default: inferred from lantern_extras.llm_token GUC)
+    azure_entra_token => '', -- If this is Azure deployment it supports Auth with entra token too
+    dimensions => 1536, -- For new generation OpenAi models you can provide dimensions for returned embeddings. (default: 1536)
+    input_type => 'search_query', -- Needed only for cohere runtime to indicate if this input is for search or storing. (default: 'search_query'). Can also be 'search_document'
+    runtime => 'openai' -- Runtime to use. (default: 'openai'). Use `SELECT get_available_runtimes()` for list
+);
+
 -- generate text embedding
-SELECT text_embedding('BAAI/bge-base-en', 'My text input');
+SELECT llm_embedding(model => 'BAAI/bge-base-en', input => 'My text input', runtime => 'ort');
 -- generate image embedding with image url
-SELECT image_embedding('clip/ViT-B-32-visual', 'https://link-to-your-image');
+SELECT llm_embedding(model => 'clip/ViT-B-32-visual', input => 'https://link-to-your-image', runtime => 'ort');
 -- generate image embedding with image path (this path should be accessible from postgres server)
-SELECT image_embedding('clip/ViT-B-32-visual', '/path/to/image/in-postgres-server');
+SELECT llm_embedding(model => 'clip/ViT-B-32-visual', input => '/path/to/image/in-postgres-server', runtime => 'ort');
 -- get available list of models
 SELECT get_available_models();
+-- generate openai embeddings
+SELECT llm_embedding(model => 'text-embedding-3-small', api_token => '<openai_api_token>', input => 'My text input', runtime => 'openai');
+-- generate embeddings from custom openai compatible servers
+SELECT llm_embedding(model => 'intfloat/e5-mistral-7b-instruct', api_token => '<api_token>', input => 'My text input', runtime => 'openai', base_url => 'https://my-llm-url');
+-- generate cohere embeddings
+SELECT llm_embedding(model => 'embed-multilingual-light-v3.0', api_token => '<cohere_api_token>', input => 'My text input', runtime => 'cohere');
+-- api_token can be set via GUC
+SET lantern_extras.llm_token = '<api_token>';
+SELECT llm_embedding(model => 'text-embedding-3-small', input => 'My text input', runtime => 'openai');
 ```
 
 ## Getting started
@@ -135,7 +155,7 @@ To add new textual or visual models for generating vector embeddings you can fol
 After this your model should be callable from SQL like
 
 ```sql
-SELECT text_embedding('your/model_name', 'Your text');
+SELECT llm_embedding(model => 'your/model_name', input => 'Your text', runtime => 'ort');
 ```
 
 ## Lantern Daemon in SQL
@@ -158,14 +178,18 @@ To add a new embedding job, use the `add_embedding_job` function:
 
 ```sql
 SELECT add_embedding_job(
-    'table_name',        -- Name of the table
-    'src_column',        -- Source column for embeddings
-    'dst_column',        -- Destination column for embeddings
-    'embedding_model',   -- Embedding model to use
-    'runtime',           -- Runtime environment (default: 'ort')
-    'runtime_params',    -- Runtime parameters (default: '{}')
-    'pk',                -- Primary key column (default: 'id')
-    'schema'             -- Schema name (default: 'public')
+    table => 'articles', -- Name of the table
+    src_column => 'content', -- Source column for embeddings
+    dst_column => 'content_embedding', -- Destination column for embeddings (will be created automatically)
+    model => 'text-embedding-3-small', -- Model for runtime to use (default: 'text-embedding-3-small')
+    pk => 'id', -- Primary key of the table. It is required for table to have primary key (default: id)
+    schema => 'public', -- Schema on which the table is located (default: 'public')
+    base_url => 'https://api.openai.com', -- If you have custom LLM deployment provide the server url. (default: OpenAi API URL)
+    batch_size => 500, -- Batch size for the inputs to use when requesting LLM server. This is based on your API tier. (default: determined based on model and runtime)
+    dimensions => 1536, -- For new generation OpenAi models you can provide dimensions for returned embeddings. (default: 1536)
+    api_token => '<llm_api_token>', -- API token for LLM server. (default: inferred from lantern_extras.llm_token GUC)
+    azure_entra_token => '', -- If this is Azure deployment it supports Auth with entra token too
+    runtime => 'openai' -- Runtime to use. (default: 'openai'). Use `SELECT get_available_runtimes()` for list
 );
 ```
 
@@ -200,17 +224,19 @@ To add a new completion job, use the `add_completion_job` function:
 
 ```sql
 SELECT add_completion_job(
-    'table_name',        -- Name of the table
-    'src_column',        -- Source column for embeddings
-    'dst_column',        -- Destination column for embeddings
-    'context',           -- System prompt to be used for LLM (default: lantern_extras.completion_context GUC)
-    'column_type',       -- Target column type to be used for destination (default: TEXT)
-    'model',             -- LLM model to use (default: 'gpt-4o')
-    'batch_size',        -- Batch size to use when sending batch requests (default: 2)
-    'runtime',           -- Runtime environment (default: 'openai')
-    'runtime_params',    -- Runtime parameters (default: '{}' inferred from GUC variables)
-    'pk',                -- Primary key column (default: 'id')
-    'schema'             -- Schema name (default: 'public')
+    table => 'articles', -- Name of the table
+    src_column => 'content', -- Source column for embeddings
+    dst_column => 'content_summary', -- Destination column for llm response (will be created automatically)
+    system_prompt => 'Provide short summary for the given text', -- System prompt for LLM (default: '')
+    column_type => 'TEXT', -- Destination column type
+    model => 'gpt-4o', -- Model for runtime to use (default: 'gpt-4o')
+    pk => 'id', -- Primary key of the table. It is required for table to have primary key (default: id)
+    schema => 'public', -- Schema on which the table is located (default: 'public')
+    base_url => 'https://api.openai.com', -- If you have custom LLM deployment provide the server url. (default: OpenAi API URL)
+    batch_size => 10, -- Batch size for the inputs to use when requesting LLM server. This is based on your API tier. (default: determined based on model and runtime)
+    api_token => '<llm_api_token>', -- API token for LLM server. (default: inferred from lantern_extras.llm_token GUC)
+    azure_entra_token => '', -- If this is Azure deployment it supports Auth with entra token too
+    runtime => 'openai' -- Runtime to use. (default: 'openai'). Use `SELECT get_available_runtimes()` for list
 );
 ```
 
@@ -258,6 +284,14 @@ This will return a table with the following columns:
 
 ***Calling LLM Completion API***
 ```sql
-SET lantern_extras.llm_token='xxxx';
-SELECT llm_completion(query, [model, context, base_url, runtime]);
+SET lantern_extras.llm_token='xxxx'; -- this will be used as api_token if it is not passed via arguments
+SELECT llm_completion(
+    user_prompt => 'User input', -- User prompt to LLM model
+    model => 'gpt-4o', -- Model for runtime to use (default: 'gpt-4o')
+    system_prompt => 'Provide short summary for the given text', -- System prompt for LLM (default: '')
+    base_url => 'https://api.openai.com', -- If you have custom LLM deployment provide the server url. (default: OpenAi API URL)
+    api_token => '<llm_api_token>', -- API token for LLM server. (default: inferred from lantern_extras.llm_token GUC)
+    azure_entra_token => '', -- If this is Azure deployment it supports Auth with entra token too
+    runtime => 'openai' -- Runtime to use. (default: 'openai'). Use `SELECT get_available_runtimes()` for list
+);
 ```