Skip to content

Commit b4e018b

Browse files
committed
enhance: Remove redundant statistics from FileScanConfig
Signed-off-by: Alan Tang <[email protected]>
1 parent e2b7919 commit b4e018b

File tree

2 files changed

+16
-20
lines changed

2 files changed

+16
-20
lines changed

datafusion/datasource/src/file_scan_config.rs

+15-19
Original file line numberDiff line numberDiff line change
@@ -151,9 +151,6 @@ pub struct FileScanConfig {
151151
pub file_groups: Vec<FileGroup>,
152152
/// Table constraints
153153
pub constraints: Constraints,
154-
/// Estimated overall statistics of the files, taking `filters` into account.
155-
/// Defaults to [`Statistics::new_unknown`].
156-
pub statistics: Statistics,
157154
/// Columns on which to project the data. Indexes that are higher than the
158155
/// number of columns of `file_schema` refer to `table_partition_cols`.
159156
pub projection: Option<Vec<usize>>,
@@ -412,7 +409,6 @@ impl FileScanConfigBuilder {
412409
table_partition_cols,
413410
constraints,
414411
file_groups,
415-
statistics,
416412
output_ordering,
417413
file_compression_type,
418414
new_lines_in_values,
@@ -426,9 +422,9 @@ impl From<FileScanConfig> for FileScanConfigBuilder {
426422
Self {
427423
object_store_url: config.object_store_url,
428424
file_schema: config.file_schema,
429-
file_source: config.file_source,
425+
file_source: config.file_source.clone(),
430426
file_groups: config.file_groups,
431-
statistics: Some(config.statistics),
427+
statistics: config.file_source.statistics().ok(),
432428
output_ordering: config.output_ordering,
433429
file_compression_type: Some(config.file_compression_type),
434430
new_lines_in_values: Some(config.new_lines_in_values),
@@ -610,7 +606,6 @@ impl FileScanConfig {
610606
file_schema,
611607
file_groups: vec![],
612608
constraints: Constraints::empty(),
613-
statistics,
614609
projection: None,
615610
limit: None,
616611
table_partition_cols: vec![],
@@ -625,7 +620,8 @@ impl FileScanConfig {
625620
/// Set the file source
626621
#[deprecated(since = "47.0.0", note = "use FileScanConfigBuilder instead")]
627622
pub fn with_source(mut self, file_source: Arc<dyn FileSource>) -> Self {
628-
self.file_source = file_source.with_statistics(self.statistics.clone());
623+
self.file_source =
624+
file_source.with_statistics(Statistics::new_unknown(&self.file_schema));
629625
self
630626
}
631627

@@ -639,7 +635,6 @@ impl FileScanConfig {
639635
/// Set the statistics of the files
640636
#[deprecated(since = "47.0.0", note = "use FileScanConfigBuilder instead")]
641637
pub fn with_statistics(mut self, statistics: Statistics) -> Self {
642-
self.statistics = statistics.clone();
643638
self.file_source = self.file_source.with_statistics(statistics);
644639
self
645640
}
@@ -654,10 +649,7 @@ impl FileScanConfig {
654649
}
655650

656651
fn projected_stats(&self) -> Statistics {
657-
let statistics = self
658-
.file_source
659-
.statistics()
660-
.unwrap_or(self.statistics.clone());
652+
let statistics = self.file_source.statistics().unwrap();
661653

662654
let table_cols_stats = self
663655
.projection_indices()
@@ -804,7 +796,7 @@ impl FileScanConfig {
804796
return (
805797
Arc::clone(&self.file_schema),
806798
self.constraints.clone(),
807-
self.statistics.clone(),
799+
self.file_source.statistics().unwrap().clone(),
808800
self.output_ordering.clone(),
809801
);
810802
}
@@ -949,7 +941,11 @@ impl Debug for FileScanConfig {
949941
write!(f, "FileScanConfig {{")?;
950942
write!(f, "object_store_url={:?}, ", self.object_store_url)?;
951943

952-
write!(f, "statistics={:?}, ", self.statistics)?;
944+
write!(
945+
f,
946+
"statistics={:?}, ",
947+
self.file_source.statistics().unwrap()
948+
)?;
953949

954950
DisplayAs::fmt_as(self, DisplayFormatType::Verbose, f)?;
955951
write!(f, "}}")
@@ -2161,13 +2157,13 @@ mod tests {
21612157
assert!(config.constraints.is_empty());
21622158

21632159
// Verify statistics are set to unknown
2164-
assert_eq!(config.statistics.num_rows, Precision::Absent);
2165-
assert_eq!(config.statistics.total_byte_size, Precision::Absent);
2160+
assert_eq!(config.file_source.statistics().unwrap().num_rows, Precision::Absent);
2161+
assert_eq!(config.file_source.statistics().unwrap().total_byte_size, Precision::Absent);
21662162
assert_eq!(
2167-
config.statistics.column_statistics.len(),
2163+
config.file_source.statistics().unwrap().column_statistics.len(),
21682164
file_schema.fields().len()
21692165
);
2170-
for stat in config.statistics.column_statistics {
2166+
for stat in config.file_source.statistics().unwrap().column_statistics {
21712167
assert_eq!(stat.distinct_count, Precision::Absent);
21722168
assert_eq!(stat.min_value, Precision::Absent);
21732169
assert_eq!(stat.max_value, Precision::Absent);

datafusion/proto/src/physical_plan/to_proto.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -507,7 +507,7 @@ pub fn serialize_file_scan_config(
507507

508508
Ok(protobuf::FileScanExecConf {
509509
file_groups,
510-
statistics: Some((&conf.statistics).into()),
510+
statistics: Some((&conf.file_source.statistics().unwrap()).into()),
511511
limit: conf.limit.map(|l| protobuf::ScanLimit { limit: l as u32 }),
512512
projection: conf
513513
.projection

0 commit comments

Comments
 (0)