Skip to content

Commit

Permalink
set CHUNK_START_LINE to None if it contains the entire file
Browse files Browse the repository at this point in the history
  • Loading branch information
wsxiaoys committed Dec 25, 2024
1 parent eb67ae9 commit f937550
Show file tree
Hide file tree
Showing 3 changed files with 15 additions and 8 deletions.
2 changes: 2 additions & 0 deletions crates/tabby-common/src/index/code/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@ pub mod fields {
pub const CHUNK_FILEPATH: &str = "chunk_filepath";
pub const CHUNK_LANGUAGE: &str = "chunk_language";
pub const CHUNK_BODY: &str = "chunk_body";

// Optional field, when None, it means this chunk contains entire content of the file.
pub const CHUNK_START_LINE: &str = "chunk_start_line";
}

Expand Down
6 changes: 4 additions & 2 deletions crates/tabby-index/src/code/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -99,14 +99,16 @@ impl IndexAttributeBuilder<SourceCode> for CodeBuilder {
let source_code = source_code.clone();
let s = stream! {
for await (start_line, body) in CodeIntelligence::chunks(&text, &source_code.language) {
let attributes = json!({
let mut attributes = json!({
code::fields::CHUNK_FILEPATH: source_code.filepath,
code::fields::CHUNK_GIT_URL: source_code.git_url,
code::fields::CHUNK_LANGUAGE: source_code.language,
code::fields::CHUNK_BODY: body,
code::fields::CHUNK_START_LINE: start_line,
});

if text.len() == body.len() {
attributes[code::fields::CHUNK_START_LINE] = start_line.into();
}
let embedding = embedding.clone();
let rewritten_body = format!("```{}\n{}\n```", source_code.filepath, body);
yield tokio::spawn(async move {
Expand Down
15 changes: 9 additions & 6 deletions crates/tabby/src/services/code.rs
Original file line number Diff line number Diff line change
Expand Up @@ -191,11 +191,11 @@ fn create_hit(scores: CodeSearchScores, doc: TantivyDocument) -> CodeSearchHit {
code::fields::CHUNK_LANGUAGE,
)
.to_owned(),
start_line: Some(get_json_number_field(
start_line: get_optional_json_number_field(
&doc,
schema.field_chunk_attributes,
code::fields::CHUNK_START_LINE,
) as usize),
),
};
CodeSearchHit { scores, doc }
}
Expand All @@ -204,16 +204,19 @@ fn get_text(doc: &TantivyDocument, field: schema::Field) -> &str {
doc.get_first(field).unwrap().as_str().unwrap()
}

fn get_json_number_field(doc: &TantivyDocument, field: schema::Field, name: &str) -> i64 {
fn get_optional_json_number_field(
doc: &TantivyDocument,
field: schema::Field,
name: &str,
) -> Option<usize> {
doc.get_first(field)
.unwrap()
.as_object()
.unwrap()
.find(|(k, _)| *k == name)
.unwrap()
.find(|(k, _)| *k == name)?
.1
.as_i64()
.unwrap()
.map(|x| x as usize)
}

fn get_json_text_field<'a>(doc: &'a TantivyDocument, field: schema::Field, name: &str) -> &'a str {
Expand Down

0 comments on commit f937550

Please sign in to comment.