TabbyML · Sma1lboy · Jan 27, 2025 · Jan 27, 2025 · Jan 28, 2025 · Jan 28, 2025
diff --git a/ee/tabby-schema/graphql/schema.graphql b/ee/tabby-schema/graphql/schema.graphql
@@ -798,6 +798,7 @@ type Query {
   userGroups: [UserGroup!]!
   sourceIdAccessPolicies(sourceId: String!): SourceIdAccessPolicy!
   testModelConnection(backend: ModelHealthBackend!): ModelBackendHealthInfo!
+  readRepositoryRelatedQuestions(sourceId: String!): [String!]!
 }
 
 type RefreshTokenResponse {

diff --git a/ee/tabby-schema/src/schema/mod.rs b/ee/tabby-schema/src/schema/mod.rs
@@ -836,6 +836,24 @@ impl Query {
             }
         }
     }
+
+    async fn read_repository_related_questions(
+        ctx: &Context,
+        source_id: String,
+    ) -> Result<Vec<String>, CoreError> {
+        let user = check_user(ctx).await?;
+        ctx.locator
+            .repository()
+            .read_repository_related_questions(
+                ctx.locator
+                    .chat()
+                    .ok_or(CoreError::NotFound("The Chat didn't initialize yet"))?,
+                &user.policy,
+                source_id,
+            )
+            .await
+            .map_err(|e| e.into())
+    }
 }
 
 #[derive(GraphQLObject)]

diff --git a/ee/tabby-schema/src/schema/repository/mod.rs b/ee/tabby-schema/src/schema/repository/mod.rs
@@ -1,6 +1,7 @@
 mod types;
 use std::{path::PathBuf, sync::Arc};
 
+use tabby_inference::ChatCompletionStream;
 pub use types::*;
 
 mod git;
@@ -293,4 +294,10 @@ pub trait RepositoryService: Send + Sync {
     fn third_party(&self) -> Arc<dyn ThirdPartyRepositoryService>;
 
     async fn list_all_code_repository(&self) -> Result<Vec<CodeRepository>>;
+    async fn read_repository_related_questions(
+        &self,
+        chat: Arc<dyn ChatCompletionStream>,
+        policy: &AccessPolicy,
+        source_id: String,
+    ) -> Result<Vec<String>>;
 }
diff --git a/ee/tabby-webserver/src/service/answer.rs b/ee/tabby-webserver/src/service/answer.rs
@@ -36,7 +36,7 @@ use tabby_schema::{
     auth::AuthenticationService,
     context::{ContextInfoHelper, ContextService},
     policy::AccessPolicy,
-    repository::{Repository, RepositoryService},
+    repository::{Repository, RepositoryKind, RepositoryService},
     thread::{
         self, CodeQueryInput, CodeSearchParamsOverrideInput, DocQueryInput, MessageAttachment,
         MessageAttachmentCodeInput, MessageAttachmentDoc, MessageAttachmentInput,

diff --git a/ee/tabby-webserver/src/service/answer/prompt_tools.rs b/ee/tabby-webserver/src/service/answer/prompt_tools.rs
@@ -1,54 +1,11 @@
 use std::sync::Arc;
 
-use anyhow::{anyhow, Result};
-use async_openai_alt::types::{
-    ChatCompletionRequestMessage, ChatCompletionRequestUserMessageArgs,
-    CreateChatCompletionRequestArgs,
-};
+use anyhow::Result;
 use tabby_inference::ChatCompletionStream;
 use tabby_schema::thread::ThreadAssistantMessageReadingCode;
 use tracing::debug;
 
-async fn request_llm(chat: Arc<dyn ChatCompletionStream>, prompt: &str) -> Result<String> {
-    let request = CreateChatCompletionRequestArgs::default()
-        .messages(vec![ChatCompletionRequestMessage::User(
-            ChatCompletionRequestUserMessageArgs::default()
-                .content(prompt)
-                .build()
-                .expect("Failed to create ChatCompletionRequestUserMessage"),
-        )])
-        .build()?;
-
-    let s = chat.chat(request).await?;
-    let content = s.choices[0]
-        .message
-        .content
-        .as_deref()
-        .ok_or_else(|| anyhow!("Failed to get content from chat completion"))?;
-
-    Ok(content.into())
-}
-
-/// Extracts items from the given content.
-/// Assumptions:
-/// 1. Each item is on a new line.
-/// 2. Items may start with a bullet point, which needs to be trimmed.
-fn transform_line_items(content: &str) -> Vec<String> {
-    content
-        .lines()
-        .map(trim_bullet)
-        .filter(|x| !x.is_empty())
-        .collect()
-}
-
-fn trim_bullet(s: &str) -> String {
-    let is_bullet = |c: char| c == '-' || c == '*' || c == '.' || c.is_numeric();
-    s.trim()
-        .trim_start_matches(is_bullet)
-        .trim_end_matches(is_bullet)
-        .trim()
-        .to_owned()
-}
+use crate::service::common_prompt_tools::{detect_content, request_llm, transform_line_items};
 
 /// Given context and a question, generate related questions.
 pub async fn pipeline_related_questions(
@@ -74,10 +31,6 @@ Remember, based on the original question and related contexts, suggest three suc
     Ok(transform_line_items(&content))
 }
 
-fn detect_content(content: &str, check: &str) -> bool {
-    content.to_lowercase().contains(check)
-}
-
 /// Decide whether the question requires knowledge from codebase content.
 pub async fn pipeline_decide_need_codebase_context(
     chat: Arc<dyn ChatCompletionStream>,
@@ -109,31 +62,3 @@ Here's the original question:
     debug!("decide_need_codebase_context: {:?} {:?}", content, context);
     Ok(context)
 }
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-
-    #[test]
-    fn test_trim_bullet() {
-        assert_eq!(trim_bullet("- Hello"), "Hello");
-        assert_eq!(trim_bullet("* World"), "World");
-        assert_eq!(trim_bullet("1. Test"), "Test");
-        assert_eq!(trim_bullet(".Dot"), "Dot");
-
-        assert_eq!(trim_bullet("- Hello -"), "Hello");
-        assert_eq!(trim_bullet("1. Test 1"), "Test");
-
-        assert_eq!(trim_bullet("--** Mixed"), "Mixed");
-
-        assert_eq!(trim_bullet("  - Hello  "), "Hello");
-
-        assert_eq!(trim_bullet("-"), "");
-        assert_eq!(trim_bullet(""), "");
-        assert_eq!(trim_bullet("   "), "");
-
-        assert_eq!(trim_bullet("Hello World"), "Hello World");
-
-        assert_eq!(trim_bullet("1. *Bold* and -italic-"), "*Bold* and -italic");
-    }
-}
diff --git a/ee/tabby-webserver/src/service/common_prompt_tools.rs b/ee/tabby-webserver/src/service/common_prompt_tools.rs
@@ -0,0 +1,118 @@
+use std::sync::Arc;
+
+use anyhow::{anyhow, Result};
+use async_openai_alt::types::{
+    ChatCompletionRequestMessage, ChatCompletionRequestUserMessageArgs,
+    CreateChatCompletionRequestArgs,
+};
+use tabby_inference::ChatCompletionStream;
+use tabby_schema::repository::FileEntrySearchResult;
+
+/// Sends a prompt to the provided ChatCompletionStream and returns the generated response as a String.
+pub async fn request_llm(chat: Arc<dyn ChatCompletionStream>, prompt: &str) -> Result<String> {
+    let request = CreateChatCompletionRequestArgs::default()
+        .messages(vec![ChatCompletionRequestMessage::User(
+            ChatCompletionRequestUserMessageArgs::default()
+                .content(prompt)
+                .build()
+                .expect("Failed to create ChatCompletionRequestUserMessage"),
+        )])
+        .build()?;
+
+    let s = chat.chat(request).await?;
+    let content = s.choices[0]
+        .message
+        .content
+        .as_deref()
+        .ok_or_else(|| anyhow!("Failed to get content from chat completion"))?;
+
+    Ok(content.into())
+}
+
+/// Extracts items from the given content.
+/// Assumptions:
+/// 1. Each item is on a new line.
+/// 2. Items may start with a bullet point, which needs to be trimmed.
+pub fn transform_line_items(content: &str) -> Vec<String> {
+    content
+        .lines()
+        .map(trim_bullet)
+        .filter(|x| !x.is_empty())
+        .collect()
+}
+
+/// Trims leading and trailing bullet-like characters or digits from the provided string and returns the trimmed result.
+pub fn trim_bullet(s: &str) -> String {
+    let is_bullet = |c: char| c == '-' || c == '*' || c == '.' || c.is_numeric();
+    s.trim()
+        .trim_start_matches(is_bullet)
+        .trim_end_matches(is_bullet)
+        .trim()
+        .to_owned()
+}
+
+/// Checks if the `check` string is contained within `content` in a case-insensitive manner.
+pub fn detect_content(content: &str, check: &str) -> bool {
+    content.to_lowercase().contains(check)
+}
+
+// this is use to input a files tree, and generate some related question relate to repo dirs
+pub async fn pipeline_related_questions_with_repo_dirs(
+    chat: Arc<dyn ChatCompletionStream>,
+    files: Vec<FileEntrySearchResult>,
+) -> Result<Vec<String>> {
+    // Convert files into a formatted string for the prompt
+    let files_content = files
+        .iter()
+        .map(|f| format!("Type: {}, Path: {}", f.r#type, f.path))
+        .collect::<Vec<_>>()
+        .join("\n");
+
+    let prompt = format!(
+        r#"You are a helpful assistant that helps the user to ask related questions about a codebase structure.
+Based on the following file structure, please generate 3 relevant questions that would help understand the codebase better.
+Each question should be no longer than 20 words and be specific enough to stand alone.
+
+File structure:
+{}
+
+Please generate 3 questions about this codebase structure that would help understand:
+1. The organization and architecture
+2. The main functionality
+3. The potential implementation details
+
+Return only the questions, one per line."#,
+        files_content
+    );
+
+    let content = request_llm(chat, &prompt).await?;
+    Ok(transform_line_items(&content))
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_trim_bullet() {
+        assert_eq!(trim_bullet("- Hello"), "Hello");
+        assert_eq!(trim_bullet("* World"), "World");
+        assert_eq!(trim_bullet("1. Test"), "Test");
+        assert_eq!(trim_bullet(".Dot"), "Dot");
+
+        assert_eq!(trim_bullet("- Hello -"), "Hello");
+        assert_eq!(trim_bullet("1. Test 1"), "Test");
+
+        assert_eq!(trim_bullet("--** Mixed"), "Mixed");
+
+        assert_eq!(trim_bullet("  - Hello  "), "Hello");
+
+        assert_eq!(trim_bullet("-"), "");
+        assert_eq!(trim_bullet(""), "");
+        assert_eq!(trim_bullet("   "), "");
+
+        assert_eq!(trim_bullet("Hello World"), "Hello World");
+
+        assert_eq!(trim_bullet("1. *Bold* and -italic-"), "*Bold* and -italic");
+    }
+}
diff --git a/ee/tabby-webserver/src/service/mod.rs b/ee/tabby-webserver/src/service/mod.rs
@@ -3,6 +3,7 @@ mod analytic;
 pub mod answer;
 mod auth;
 pub mod background_job;
+pub mod common_prompt_tools;
 pub mod context;
 mod email;
 pub mod event_logger;

diff --git a/ee/tabby-webserver/src/service/repository/mod.rs b/ee/tabby-webserver/src/service/repository/mod.rs
@@ -4,12 +4,14 @@ mod third_party;
 use std::sync::Arc;
 
 use async_trait::async_trait;
+use cached::{Cached, TimedCache};
 use futures::StreamExt;
 use juniper::ID;
 use tabby_common::config::{
     config_id_to_index, config_index_to_id, CodeRepository, Config, RepositoryConfig,
 };
 use tabby_db::DbConn;
+use tabby_inference::ChatCompletionStream;
 use tabby_schema::{
     integration::IntegrationService,
     job::JobService,
@@ -20,13 +22,18 @@ use tabby_schema::{
     },
     Result,
 };
+use tokio::sync::Mutex;
+
+use super::common_prompt_tools::pipeline_related_questions_with_repo_dirs;
 
 struct RepositoryServiceImpl {
     git: Arc<dyn GitRepositoryService>,
     third_party: Arc<dyn ThirdPartyRepositoryService>,
     config: Vec<RepositoryConfig>,
+    relavent_dirs_questions_cache: Mutex<TimedCache<String, Vec<String>>>,
 }
 
+static RELEVANT_QUESTION_CACHE_LIFESPAN: u64 = 60 * 30; // 30 minutes
 pub fn create(
     db: DbConn,
     integration: Arc<dyn IntegrationService>,
@@ -38,11 +45,57 @@ pub fn create(
         config: Config::load()
             .map(|config| config.repositories)
             .unwrap_or_default(),
+        relavent_dirs_questions_cache: Mutex::new(TimedCache::with_lifespan(
+            RELEVANT_QUESTION_CACHE_LIFESPAN,
+        )),
     })
 }
 
 #[async_trait]
 impl RepositoryService for RepositoryServiceImpl {
+    async fn read_repository_related_questions(
+        &self,
+        chat: Arc<dyn ChatCompletionStream>,
+        policy: &AccessPolicy,
+        source_id: String,
+    ) -> Result<Vec<String>> {
+        if source_id.is_empty() {
+            return Err(anyhow::anyhow!("Invalid source_id format"))?;
+        }
+
+        let mut cache = self.relavent_dirs_questions_cache.lock().await;
+        if let Some(questions) = cache.cache_get(&source_id) {
+            return Ok(questions.clone());
+        }
+
+        let repositories = self.repository_list(Some(policy)).await?;
+        let repo = repositories
+            .iter()
+            .find(|r| r.source_id == source_id)
+            .ok_or_else(|| {
+                anyhow::anyhow!(
+                    "Repository not found or 'sourceId' is invalid: {}",
+                    source_id
+                )
+            })?;
+        let files = match self
+            .list_files(policy, &repo.kind, &repo.id, None, Some(300))
+            .await
+        {
+            Ok(files) => files,
+            Err(_) => {
+                return Err(anyhow::anyhow!(
+                    "Repository exists but not accessible: {}",
+                    source_id
+                ))?
+            }
+        };
+
+        let questions = pipeline_related_questions_with_repo_dirs(chat, files).await?;
+        cache.cache_set(source_id, questions.clone());
+        Ok(questions)
+    }
+
     async fn list_all_code_repository(&self) -> Result<Vec<CodeRepository>> {
         let mut repos: Vec<CodeRepository> = self
             .git