Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(tabby-webserver): add readRepositoryRelatedQuestions query and related functionality #3768

1 change: 1 addition & 0 deletions ee/tabby-schema/graphql/schema.graphql
Original file line number Diff line number Diff line change
Expand Up @@ -798,6 +798,7 @@ type Query {
userGroups: [UserGroup!]!
sourceIdAccessPolicies(sourceId: String!): SourceIdAccessPolicy!
testModelConnection(backend: ModelHealthBackend!): ModelBackendHealthInfo!
readRepositoryRelatedQuestions(sourceId: String!): [String!]!
}

type RefreshTokenResponse {
Expand Down
18 changes: 18 additions & 0 deletions ee/tabby-schema/src/schema/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -836,6 +836,24 @@ impl Query {
}
}
}

async fn read_repository_related_questions(
ctx: &Context,
source_id: String,
) -> Result<Vec<String>, CoreError> {
let user = check_user(ctx).await?;
ctx.locator
.repository()
.read_repository_related_questions(
ctx.locator
.chat()
.ok_or(CoreError::NotFound("The Chat didn't initialize yet"))?,
&user.policy,
source_id,
)
.await
.map_err(|e| e.into())
}
}

#[derive(GraphQLObject)]
Expand Down
7 changes: 7 additions & 0 deletions ee/tabby-schema/src/schema/repository/mod.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
mod types;
use std::{path::PathBuf, sync::Arc};

use tabby_inference::ChatCompletionStream;
pub use types::*;

mod git;
Expand Down Expand Up @@ -293,4 +294,10 @@ pub trait RepositoryService: Send + Sync {
fn third_party(&self) -> Arc<dyn ThirdPartyRepositoryService>;

async fn list_all_code_repository(&self) -> Result<Vec<CodeRepository>>;
async fn read_repository_related_questions(
&self,
chat: Arc<dyn ChatCompletionStream>,
policy: &AccessPolicy,
source_id: String,
) -> Result<Vec<String>>;
}
2 changes: 1 addition & 1 deletion ee/tabby-webserver/src/service/answer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ use tabby_schema::{
auth::AuthenticationService,
context::{ContextInfoHelper, ContextService},
policy::AccessPolicy,
repository::{Repository, RepositoryService},
repository::{Repository, RepositoryKind, RepositoryService},
thread::{
self, CodeQueryInput, CodeSearchParamsOverrideInput, DocQueryInput, MessageAttachment,
MessageAttachmentCodeInput, MessageAttachmentDoc, MessageAttachmentInput,
Expand Down
79 changes: 2 additions & 77 deletions ee/tabby-webserver/src/service/answer/prompt_tools.rs
Original file line number Diff line number Diff line change
@@ -1,54 +1,11 @@
use std::sync::Arc;

use anyhow::{anyhow, Result};
use async_openai_alt::types::{
ChatCompletionRequestMessage, ChatCompletionRequestUserMessageArgs,
CreateChatCompletionRequestArgs,
};
use anyhow::Result;
use tabby_inference::ChatCompletionStream;
use tabby_schema::thread::ThreadAssistantMessageReadingCode;
use tracing::debug;

async fn request_llm(chat: Arc<dyn ChatCompletionStream>, prompt: &str) -> Result<String> {
let request = CreateChatCompletionRequestArgs::default()
.messages(vec![ChatCompletionRequestMessage::User(
ChatCompletionRequestUserMessageArgs::default()
.content(prompt)
.build()
.expect("Failed to create ChatCompletionRequestUserMessage"),
)])
.build()?;

let s = chat.chat(request).await?;
let content = s.choices[0]
.message
.content
.as_deref()
.ok_or_else(|| anyhow!("Failed to get content from chat completion"))?;

Ok(content.into())
}

/// Extracts items from the given content.
/// Assumptions:
/// 1. Each item is on a new line.
/// 2. Items may start with a bullet point, which needs to be trimmed.
fn transform_line_items(content: &str) -> Vec<String> {
content
.lines()
.map(trim_bullet)
.filter(|x| !x.is_empty())
.collect()
}

fn trim_bullet(s: &str) -> String {
let is_bullet = |c: char| c == '-' || c == '*' || c == '.' || c.is_numeric();
s.trim()
.trim_start_matches(is_bullet)
.trim_end_matches(is_bullet)
.trim()
.to_owned()
}
use crate::service::common_prompt_tools::{detect_content, request_llm, transform_line_items};

/// Given context and a question, generate related questions.
pub async fn pipeline_related_questions(
Expand All @@ -74,10 +31,6 @@ Remember, based on the original question and related contexts, suggest three suc
Ok(transform_line_items(&content))
}

fn detect_content(content: &str, check: &str) -> bool {
content.to_lowercase().contains(check)
}

/// Decide whether the question requires knowledge from codebase content.
pub async fn pipeline_decide_need_codebase_context(
chat: Arc<dyn ChatCompletionStream>,
Expand Down Expand Up @@ -109,31 +62,3 @@ Here's the original question:
debug!("decide_need_codebase_context: {:?} {:?}", content, context);
Ok(context)
}

#[cfg(test)]
mod tests {
use super::*;

#[test]
fn test_trim_bullet() {
assert_eq!(trim_bullet("- Hello"), "Hello");
assert_eq!(trim_bullet("* World"), "World");
assert_eq!(trim_bullet("1. Test"), "Test");
assert_eq!(trim_bullet(".Dot"), "Dot");

assert_eq!(trim_bullet("- Hello -"), "Hello");
assert_eq!(trim_bullet("1. Test 1"), "Test");

assert_eq!(trim_bullet("--** Mixed"), "Mixed");

assert_eq!(trim_bullet(" - Hello "), "Hello");

assert_eq!(trim_bullet("-"), "");
assert_eq!(trim_bullet(""), "");
assert_eq!(trim_bullet(" "), "");

assert_eq!(trim_bullet("Hello World"), "Hello World");

assert_eq!(trim_bullet("1. *Bold* and -italic-"), "*Bold* and -italic");
}
}
118 changes: 118 additions & 0 deletions ee/tabby-webserver/src/service/common_prompt_tools.rs
Sma1lboy marked this conversation as resolved.
Show resolved Hide resolved
Original file line number Diff line number Diff line change
@@ -0,0 +1,118 @@
use std::sync::Arc;

use anyhow::{anyhow, Result};
use async_openai_alt::types::{
ChatCompletionRequestMessage, ChatCompletionRequestUserMessageArgs,
CreateChatCompletionRequestArgs,
};
use tabby_inference::ChatCompletionStream;
use tabby_schema::repository::FileEntrySearchResult;

/// Sends a prompt to the provided ChatCompletionStream and returns the generated response as a String.
pub async fn request_llm(chat: Arc<dyn ChatCompletionStream>, prompt: &str) -> Result<String> {
let request = CreateChatCompletionRequestArgs::default()
.messages(vec![ChatCompletionRequestMessage::User(
ChatCompletionRequestUserMessageArgs::default()
.content(prompt)
.build()
.expect("Failed to create ChatCompletionRequestUserMessage"),
)])
.build()?;

let s = chat.chat(request).await?;
let content = s.choices[0]
.message
.content
.as_deref()
.ok_or_else(|| anyhow!("Failed to get content from chat completion"))?;

Ok(content.into())
}

/// Extracts items from the given content.
/// Assumptions:
/// 1. Each item is on a new line.
/// 2. Items may start with a bullet point, which needs to be trimmed.
pub fn transform_line_items(content: &str) -> Vec<String> {
content
.lines()
.map(trim_bullet)
.filter(|x| !x.is_empty())
.collect()
}

/// Trims leading and trailing bullet-like characters or digits from the provided string and returns the trimmed result.
pub fn trim_bullet(s: &str) -> String {
let is_bullet = |c: char| c == '-' || c == '*' || c == '.' || c.is_numeric();
s.trim()
.trim_start_matches(is_bullet)
.trim_end_matches(is_bullet)
.trim()
.to_owned()
}

/// Checks if the `check` string is contained within `content` in a case-insensitive manner.
pub fn detect_content(content: &str, check: &str) -> bool {
content.to_lowercase().contains(check)
}

// this is use to input a files tree, and generate some related question relate to repo dirs
pub async fn pipeline_related_questions_with_repo_dirs(
Sma1lboy marked this conversation as resolved.
Show resolved Hide resolved
chat: Arc<dyn ChatCompletionStream>,
files: Vec<FileEntrySearchResult>,
) -> Result<Vec<String>> {
// Convert files into a formatted string for the prompt
let files_content = files
.iter()
.map(|f| format!("Type: {}, Path: {}", f.r#type, f.path))
.collect::<Vec<_>>()
.join("\n");

let prompt = format!(
r#"You are a helpful assistant that helps the user to ask related questions about a codebase structure.
Based on the following file structure, please generate 3 relevant questions that would help understand the codebase better.
Each question should be no longer than 20 words and be specific enough to stand alone.

File structure:
{}

Please generate 3 questions about this codebase structure that would help understand:
1. The organization and architecture
2. The main functionality
3. The potential implementation details

Return only the questions, one per line."#,
files_content
);

let content = request_llm(chat, &prompt).await?;
Ok(transform_line_items(&content))
}

#[cfg(test)]
mod tests {
use super::*;

#[test]
fn test_trim_bullet() {
assert_eq!(trim_bullet("- Hello"), "Hello");
assert_eq!(trim_bullet("* World"), "World");
assert_eq!(trim_bullet("1. Test"), "Test");
assert_eq!(trim_bullet(".Dot"), "Dot");

assert_eq!(trim_bullet("- Hello -"), "Hello");
assert_eq!(trim_bullet("1. Test 1"), "Test");

assert_eq!(trim_bullet("--** Mixed"), "Mixed");

assert_eq!(trim_bullet(" - Hello "), "Hello");

assert_eq!(trim_bullet("-"), "");
assert_eq!(trim_bullet(""), "");
assert_eq!(trim_bullet(" "), "");

assert_eq!(trim_bullet("Hello World"), "Hello World");

assert_eq!(trim_bullet("1. *Bold* and -italic-"), "*Bold* and -italic");
}
}
1 change: 1 addition & 0 deletions ee/tabby-webserver/src/service/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ mod analytic;
pub mod answer;
mod auth;
pub mod background_job;
pub mod common_prompt_tools;
pub mod context;
mod email;
pub mod event_logger;
Expand Down
53 changes: 53 additions & 0 deletions ee/tabby-webserver/src/service/repository/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,14 @@ mod third_party;
use std::sync::Arc;

use async_trait::async_trait;
use cached::{Cached, TimedCache};
use futures::StreamExt;
use juniper::ID;
use tabby_common::config::{
config_id_to_index, config_index_to_id, CodeRepository, Config, RepositoryConfig,
};
use tabby_db::DbConn;
use tabby_inference::ChatCompletionStream;
use tabby_schema::{
integration::IntegrationService,
job::JobService,
Expand All @@ -20,13 +22,18 @@ use tabby_schema::{
},
Result,
};
use tokio::sync::Mutex;

use super::common_prompt_tools::pipeline_related_questions_with_repo_dirs;

struct RepositoryServiceImpl {
git: Arc<dyn GitRepositoryService>,
third_party: Arc<dyn ThirdPartyRepositoryService>,
config: Vec<RepositoryConfig>,
relavent_dirs_questions_cache: Mutex<TimedCache<String, Vec<String>>>,
}

static RELEVANT_QUESTION_CACHE_LIFESPAN: u64 = 60 * 30; // 30 minutes
pub fn create(
db: DbConn,
integration: Arc<dyn IntegrationService>,
Expand All @@ -38,11 +45,57 @@ pub fn create(
config: Config::load()
.map(|config| config.repositories)
.unwrap_or_default(),
relavent_dirs_questions_cache: Mutex::new(TimedCache::with_lifespan(
RELEVANT_QUESTION_CACHE_LIFESPAN,
)),
})
}

#[async_trait]
impl RepositoryService for RepositoryServiceImpl {
async fn read_repository_related_questions(
&self,
chat: Arc<dyn ChatCompletionStream>,
policy: &AccessPolicy,
source_id: String,
) -> Result<Vec<String>> {
if source_id.is_empty() {
return Err(anyhow::anyhow!("Invalid source_id format"))?;
}

let mut cache = self.relavent_dirs_questions_cache.lock().await;
if let Some(questions) = cache.cache_get(&source_id) {
return Ok(questions.clone());
}

let repositories = self.repository_list(Some(policy)).await?;
let repo = repositories
.iter()
.find(|r| r.source_id == source_id)
.ok_or_else(|| {
anyhow::anyhow!(
"Repository not found or 'sourceId' is invalid: {}",
source_id
)
})?;
let files = match self
.list_files(policy, &repo.kind, &repo.id, None, Some(300))
.await
{
Ok(files) => files,
Err(_) => {
return Err(anyhow::anyhow!(
"Repository exists but not accessible: {}",
source_id
))?
}
};

let questions = pipeline_related_questions_with_repo_dirs(chat, files).await?;
cache.cache_set(source_id, questions.clone());
Ok(questions)
}

async fn list_all_code_repository(&self) -> Result<Vec<CodeRepository>> {
let mut repos: Vec<CodeRepository> = self
.git
Expand Down