Skip to content

Commit ffeda12

Browse files
authored
fix prefetch of page index (#6999)
* fix prefetch of page index * move to assertion * fmt * less invasive version * typo * fmt
1 parent 1664214 commit ffeda12

File tree

1 file changed

+39
-1
lines changed

1 file changed

+39
-1
lines changed

parquet/src/file/metadata/reader.rs

Lines changed: 39 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -412,7 +412,9 @@ impl ParquetMetaDataReader {
412412
let bytes = match &remainder {
413413
Some((remainder_start, remainder)) if *remainder_start <= range.start => {
414414
let offset = range.start - *remainder_start;
415-
remainder.slice(offset..range.end - *remainder_start + offset)
415+
let end = offset + range.end - range.start;
416+
assert!(end <= remainder.len());
417+
remainder.slice(offset..end)
416418
}
417419
// Note: this will potentially fetch data already in remainder, this keeps things simple
418420
_ => fetch.fetch(range.start..range.end).await?,
@@ -1052,5 +1054,41 @@ mod async_tests {
10521054
.unwrap();
10531055
assert_eq!(fetch_count.load(Ordering::SeqCst), 1);
10541056
assert!(metadata.offset_index().is_some() && metadata.column_index().is_some());
1057+
1058+
// Prefetch more than enough but less than the entire file
1059+
fetch_count.store(0, Ordering::SeqCst);
1060+
let f = MetadataFetchFn(&mut fetch);
1061+
let metadata = ParquetMetaDataReader::new()
1062+
.with_page_indexes(true)
1063+
.with_prefetch_hint(Some(len - 1000)) // prefetch entire file
1064+
.load_and_finish(f, len)
1065+
.await
1066+
.unwrap();
1067+
assert_eq!(fetch_count.load(Ordering::SeqCst), 1);
1068+
assert!(metadata.offset_index().is_some() && metadata.column_index().is_some());
1069+
1070+
// Prefetch the entire file
1071+
fetch_count.store(0, Ordering::SeqCst);
1072+
let f = MetadataFetchFn(&mut fetch);
1073+
let metadata = ParquetMetaDataReader::new()
1074+
.with_page_indexes(true)
1075+
.with_prefetch_hint(Some(len)) // prefetch entire file
1076+
.load_and_finish(f, len)
1077+
.await
1078+
.unwrap();
1079+
assert_eq!(fetch_count.load(Ordering::SeqCst), 1);
1080+
assert!(metadata.offset_index().is_some() && metadata.column_index().is_some());
1081+
1082+
// Prefetch more than the entire file
1083+
fetch_count.store(0, Ordering::SeqCst);
1084+
let f = MetadataFetchFn(&mut fetch);
1085+
let metadata = ParquetMetaDataReader::new()
1086+
.with_page_indexes(true)
1087+
.with_prefetch_hint(Some(len + 1000)) // prefetch entire file
1088+
.load_and_finish(f, len)
1089+
.await
1090+
.unwrap();
1091+
assert_eq!(fetch_count.load(Ordering::SeqCst), 1);
1092+
assert!(metadata.offset_index().is_some() && metadata.column_index().is_some());
10551093
}
10561094
}

0 commit comments

Comments
 (0)