diff --git a/crates/tabby-common/src/index/code/mod.rs b/crates/tabby-common/src/index/code/mod.rs index d829959d462b..c0e63bacf8f3 100644 --- a/crates/tabby-common/src/index/code/mod.rs +++ b/crates/tabby-common/src/index/code/mod.rs @@ -16,6 +16,8 @@ pub mod fields { pub const CHUNK_FILEPATH: &str = "chunk_filepath"; pub const CHUNK_LANGUAGE: &str = "chunk_language"; pub const CHUNK_BODY: &str = "chunk_body"; + + // Optional field, when None, it means this chunk contains entire content of the file. pub const CHUNK_START_LINE: &str = "chunk_start_line"; } diff --git a/crates/tabby-index/src/code/mod.rs b/crates/tabby-index/src/code/mod.rs index b91b5c14ebe1..6c64b1501c01 100644 --- a/crates/tabby-index/src/code/mod.rs +++ b/crates/tabby-index/src/code/mod.rs @@ -101,14 +101,16 @@ impl IndexAttributeBuilder for CodeBuilder { let source_code = source_code.clone(); let s = stream! { for await (start_line, body) in CodeIntelligence::chunks(&text, &source_code.language) { - let attributes = json!({ + let mut attributes = json!({ code::fields::CHUNK_FILEPATH: source_code.filepath, code::fields::CHUNK_GIT_URL: source_code.git_url, code::fields::CHUNK_LANGUAGE: source_code.language, code::fields::CHUNK_BODY: body, - code::fields::CHUNK_START_LINE: start_line, }); + if text.len() == body.len() { + attributes[code::fields::CHUNK_START_LINE] = start_line.into(); + } let embedding = embedding.clone(); let rewritten_body = format!("```{}\n{}\n```", source_code.filepath, body); yield tokio::spawn(async move { diff --git a/crates/tabby/src/services/code.rs b/crates/tabby/src/services/code.rs index e0ebb0d8265c..d4eafeed5493 100644 --- a/crates/tabby/src/services/code.rs +++ b/crates/tabby/src/services/code.rs @@ -205,11 +205,11 @@ async fn create_hit( code::fields::CHUNK_LANGUAGE, ) .to_owned(), - start_line: Some(get_json_number_field( + start_line: get_optional_json_number_field( &doc, schema.field_chunk_attributes, code::fields::CHUNK_START_LINE, - ) as usize), + ), }; CodeSearchHit { scores, doc } } @@ -238,16 +238,19 @@ fn get_text(doc: &TantivyDocument, field: schema::Field) -> &str { doc.get_first(field).unwrap().as_str().unwrap() } -fn get_json_number_field(doc: &TantivyDocument, field: schema::Field, name: &str) -> i64 { +fn get_optional_json_number_field( + doc: &TantivyDocument, + field: schema::Field, + name: &str, +) -> Option { doc.get_first(field) .unwrap() .as_object() .unwrap() - .find(|(k, _)| *k == name) - .unwrap() + .find(|(k, _)| *k == name)? .1 .as_i64() - .unwrap() + .map(|x| x as usize) } fn get_json_text_field<'a>(doc: &'a TantivyDocument, field: schema::Field, name: &str) -> &'a str {