From f9375505a0ed50ea15dbee4bbf00d448b718f362 Mon Sep 17 00:00:00 2001 From: Meng Zhang Date: Wed, 25 Dec 2024 15:15:18 +0800 Subject: [PATCH] set CHUNK_START_LINE to None if it contains the entire file --- crates/tabby-common/src/index/code/mod.rs | 2 ++ crates/tabby-index/src/code/mod.rs | 6 ++++-- crates/tabby/src/services/code.rs | 15 +++++++++------ 3 files changed, 15 insertions(+), 8 deletions(-) diff --git a/crates/tabby-common/src/index/code/mod.rs b/crates/tabby-common/src/index/code/mod.rs index 7380b1966e4a..bc9355a38bbe 100644 --- a/crates/tabby-common/src/index/code/mod.rs +++ b/crates/tabby-common/src/index/code/mod.rs @@ -14,6 +14,8 @@ pub mod fields { pub const CHUNK_FILEPATH: &str = "chunk_filepath"; pub const CHUNK_LANGUAGE: &str = "chunk_language"; pub const CHUNK_BODY: &str = "chunk_body"; + + // Optional field, when None, it means this chunk contains entire content of the file. pub const CHUNK_START_LINE: &str = "chunk_start_line"; } diff --git a/crates/tabby-index/src/code/mod.rs b/crates/tabby-index/src/code/mod.rs index acf913e8a97b..73388b773976 100644 --- a/crates/tabby-index/src/code/mod.rs +++ b/crates/tabby-index/src/code/mod.rs @@ -99,14 +99,16 @@ impl IndexAttributeBuilder for CodeBuilder { let source_code = source_code.clone(); let s = stream! { for await (start_line, body) in CodeIntelligence::chunks(&text, &source_code.language) { - let attributes = json!({ + let mut attributes = json!({ code::fields::CHUNK_FILEPATH: source_code.filepath, code::fields::CHUNK_GIT_URL: source_code.git_url, code::fields::CHUNK_LANGUAGE: source_code.language, code::fields::CHUNK_BODY: body, - code::fields::CHUNK_START_LINE: start_line, }); + if text.len() == body.len() { + attributes[code::fields::CHUNK_START_LINE] = start_line.into(); + } let embedding = embedding.clone(); let rewritten_body = format!("```{}\n{}\n```", source_code.filepath, body); yield tokio::spawn(async move { diff --git a/crates/tabby/src/services/code.rs b/crates/tabby/src/services/code.rs index dd74bb3a7249..b02ba114acaa 100644 --- a/crates/tabby/src/services/code.rs +++ b/crates/tabby/src/services/code.rs @@ -191,11 +191,11 @@ fn create_hit(scores: CodeSearchScores, doc: TantivyDocument) -> CodeSearchHit { code::fields::CHUNK_LANGUAGE, ) .to_owned(), - start_line: Some(get_json_number_field( + start_line: get_optional_json_number_field( &doc, schema.field_chunk_attributes, code::fields::CHUNK_START_LINE, - ) as usize), + ), }; CodeSearchHit { scores, doc } } @@ -204,16 +204,19 @@ fn get_text(doc: &TantivyDocument, field: schema::Field) -> &str { doc.get_first(field).unwrap().as_str().unwrap() } -fn get_json_number_field(doc: &TantivyDocument, field: schema::Field, name: &str) -> i64 { +fn get_optional_json_number_field( + doc: &TantivyDocument, + field: schema::Field, + name: &str, +) -> Option { doc.get_first(field) .unwrap() .as_object() .unwrap() - .find(|(k, _)| *k == name) - .unwrap() + .find(|(k, _)| *k == name)? .1 .as_i64() - .unwrap() + .map(|x| x as usize) } fn get_json_text_field<'a>(doc: &'a TantivyDocument, field: schema::Field, name: &str) -> &'a str {