Skip to content

Commit

Permalink
fix: flat KNN column stats order doesn't match schema (#3451)
Browse files Browse the repository at this point in the history
this causes an error when query with distance range, and there are
unindexed rows

---------

Signed-off-by: BubbleCal <[email protected]>
  • Loading branch information
BubbleCal authored Feb 15, 2025
1 parent a6101e5 commit 6b58bc1
Showing 1 changed file with 16 additions and 3 deletions.
19 changes: 16 additions & 3 deletions rust/lance/src/io/exec/knn.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ use arrow_array::{
ArrayRef, RecordBatch, StringArray,
};
use arrow_schema::{DataType, Field, Schema, SchemaRef};
use datafusion::common::ColumnStatistics;
use datafusion::error::{DataFusionError, Result as DataFusionResult};
use datafusion::physical_plan::PlanProperties;
use datafusion::physical_plan::{
Expand Down Expand Up @@ -184,18 +185,30 @@ impl ExecutionPlan for KNNVectorDistanceExec {

fn statistics(&self) -> DataFusionResult<Statistics> {
let inner_stats = self.input.statistics()?;
let dist_col_stats = inner_stats.column_statistics[0].clone();
let schema = self.input.schema();
let dist_stats = inner_stats
.column_statistics
.iter()
.zip(schema.fields())
.find(|(_, field)| field.name() == &self.column)
.map(|(stats, _)| ColumnStatistics {
null_count: stats.null_count,
..Default::default()
})
.unwrap_or_default();
let column_statistics = inner_stats
.column_statistics
.into_iter()
.chain([dist_col_stats])
.zip(schema.fields())
.filter(|(_, field)| field.name() != DIST_COL)
.map(|(stats, _)| stats)
.chain(std::iter::once(dist_stats))
.collect::<Vec<_>>();
Ok(Statistics {
num_rows: inner_stats.num_rows,
column_statistics,
..Statistics::new_unknown(self.schema().as_ref())
})
// self.input.statistics()
}

fn properties(&self) -> &PlanProperties {
Expand Down

0 comments on commit 6b58bc1

Please sign in to comment.