@@ -89,7 +89,56 @@ impl<F: MetadataFetch> MetadataLoader<F> {
89
89
file_size : usize ,
90
90
prefetch : Option < usize > ,
91
91
) -> Result < Self > {
92
- todo ! ( )
92
+ if file_size < 8 {
93
+ return Err ( ParquetError :: EOF ( format ! (
94
+ "file size of {file_size} is less than footer"
95
+ ) ) ) ;
96
+ }
97
+
98
+ // If a size hint is provided, read more than the minimum size
99
+ // to try and avoid a second fetch.
100
+ let footer_start = if let Some ( size_hint) = prefetch {
101
+ file_size. saturating_sub ( size_hint)
102
+ } else {
103
+ file_size - 8
104
+ } ;
105
+
106
+ let suffix = fetch. fetch ( ( footer_start..file_size) . into ( ) ) . await ?;
107
+ let suffix_len = suffix. len ( ) ;
108
+
109
+ let mut footer = [ 0 ; 8 ] ;
110
+ footer. copy_from_slice ( & suffix[ suffix_len - 8 ..suffix_len] ) ;
111
+
112
+ let length = decode_footer ( & footer) ?;
113
+
114
+ if file_size < length + 8 {
115
+ return Err ( ParquetError :: EOF ( format ! (
116
+ "file size of {} is less than footer + metadata {}" ,
117
+ file_size,
118
+ length + 8
119
+ ) ) ) ;
120
+ }
121
+
122
+ // Did not fetch the entire file metadata in the initial read, need to make a second request
123
+ let ( metadata, remainder) = if length > suffix_len - 8 {
124
+ let metadata_start = file_size - length - 8 ;
125
+ let meta = fetch. fetch ( ( metadata_start..file_size - 8 ) . into ( ) ) . await ?;
126
+ ( decode_metadata ( & meta) ?, None )
127
+ } else {
128
+ let metadata_offset = length + 8 ;
129
+ let metadata_start = suffix_len - metadata_offset;
130
+
131
+ let slice = & suffix[ metadata_start..suffix_len - 8 ] ;
132
+ (
133
+ decode_metadata ( slice) ?,
134
+ Some ( ( 0 , suffix. slice ( ..metadata_start) ) ) ,
135
+ )
136
+ } ;
137
+ Ok ( Self {
138
+ fetch,
139
+ metadata,
140
+ remainder,
141
+ } )
93
142
}
94
143
95
144
/// Create a new [`MetadataLoader`] from an existing [`ParquetMetaData`]
@@ -245,7 +294,7 @@ mod tests {
245
294
GetRange :: Bounded ( range) => range,
246
295
GetRange :: Offset ( offset) => offset..file_size,
247
296
GetRange :: Suffix ( end_offset) => {
248
- ( file_size. saturating_sub ( end_offset. try_into ( ) . unwrap ( ) ) ..file_size)
297
+ file_size. saturating_sub ( end_offset. try_into ( ) . unwrap ( ) ) ..file_size
249
298
}
250
299
} ;
251
300
file. seek ( SeekFrom :: Start ( range. start as _ ) ) ?;
@@ -268,7 +317,14 @@ mod tests {
268
317
fetch_count. fetch_add ( 1 , Ordering :: SeqCst ) ;
269
318
futures:: future:: ready ( read_range ( & mut file, range) )
270
319
} ;
320
+ // Known file size, unknown metadata size
321
+ let actual = fetch_parquet_metadata ( & mut fetch, Some ( len) , None )
322
+ . await
323
+ . unwrap ( ) ;
324
+ assert_eq ! ( actual. file_metadata( ) . schema( ) , expected) ;
325
+ assert_eq ! ( fetch_count. load( Ordering :: SeqCst ) , 2 ) ;
271
326
327
+ fetch_count. store ( 0 , Ordering :: SeqCst ) ;
272
328
let actual = fetch_parquet_metadata ( & mut fetch, None , None )
273
329
. await
274
330
. unwrap ( ) ;
@@ -369,5 +425,63 @@ mod tests {
369
425
assert_eq ! ( fetch_count. load( Ordering :: SeqCst ) , 1 ) ;
370
426
let metadata = loader. finish ( ) ;
371
427
assert ! ( metadata. offset_index( ) . is_some( ) && metadata. column_index( ) . is_some( ) ) ;
428
+
429
+ // Known-size file
430
+ fetch_count. store ( 0 , Ordering :: SeqCst ) ;
431
+ let f = MetadataFetchFn ( & mut fetch) ;
432
+ let mut loader = MetadataLoader :: load_absolute ( f, len, None ) . await . unwrap ( ) ;
433
+ assert_eq ! ( fetch_count. load( Ordering :: SeqCst ) , 2 ) ;
434
+ loader. load_page_index ( true , true ) . await . unwrap ( ) ;
435
+ assert_eq ! ( fetch_count. load( Ordering :: SeqCst ) , 3 ) ;
436
+ let metadata = loader. finish ( ) ;
437
+ assert ! ( metadata. offset_index( ) . is_some( ) && metadata. column_index( ) . is_some( ) ) ;
438
+
439
+ // Prefetch just footer exactly
440
+ fetch_count. store ( 0 , Ordering :: SeqCst ) ;
441
+ let f = MetadataFetchFn ( & mut fetch) ;
442
+ let mut loader = MetadataLoader :: load_absolute ( f, len, Some ( 1729 ) )
443
+ . await
444
+ . unwrap ( ) ;
445
+ assert_eq ! ( fetch_count. load( Ordering :: SeqCst ) , 1 ) ;
446
+ loader. load_page_index ( true , true ) . await . unwrap ( ) ;
447
+ assert_eq ! ( fetch_count. load( Ordering :: SeqCst ) , 2 ) ;
448
+ let metadata = loader. finish ( ) ;
449
+ assert ! ( metadata. offset_index( ) . is_some( ) && metadata. column_index( ) . is_some( ) ) ;
450
+
451
+ // Prefetch more than footer but not enough
452
+ fetch_count. store ( 0 , Ordering :: SeqCst ) ;
453
+ let f = MetadataFetchFn ( & mut fetch) ;
454
+ let mut loader = MetadataLoader :: load_absolute ( f, len, Some ( 130649 ) )
455
+ . await
456
+ . unwrap ( ) ;
457
+ assert_eq ! ( fetch_count. load( Ordering :: SeqCst ) , 1 ) ;
458
+ loader. load_page_index ( true , true ) . await . unwrap ( ) ;
459
+ assert_eq ! ( fetch_count. load( Ordering :: SeqCst ) , 2 ) ;
460
+ let metadata = loader. finish ( ) ;
461
+ assert ! ( metadata. offset_index( ) . is_some( ) && metadata. column_index( ) . is_some( ) ) ;
462
+
463
+ // Prefetch exactly enough
464
+ fetch_count. store ( 0 , Ordering :: SeqCst ) ;
465
+ let f = MetadataFetchFn ( & mut fetch) ;
466
+ let mut loader = MetadataLoader :: load_absolute ( f, len, Some ( 130650 ) )
467
+ . await
468
+ . unwrap ( ) ;
469
+ assert_eq ! ( fetch_count. load( Ordering :: SeqCst ) , 1 ) ;
470
+ loader. load_page_index ( true , true ) . await . unwrap ( ) ;
471
+ assert_eq ! ( fetch_count. load( Ordering :: SeqCst ) , 1 ) ;
472
+ let metadata = loader. finish ( ) ;
473
+ assert ! ( metadata. offset_index( ) . is_some( ) && metadata. column_index( ) . is_some( ) ) ;
474
+
475
+ // Prefetch more than enough
476
+ fetch_count. store ( 0 , Ordering :: SeqCst ) ;
477
+ let f = MetadataFetchFn ( & mut fetch) ;
478
+ let mut loader = MetadataLoader :: load_absolute ( f, len, Some ( 131651 ) )
479
+ . await
480
+ . unwrap ( ) ;
481
+ assert_eq ! ( fetch_count. load( Ordering :: SeqCst ) , 1 ) ;
482
+ loader. load_page_index ( true , true ) . await . unwrap ( ) ;
483
+ assert_eq ! ( fetch_count. load( Ordering :: SeqCst ) , 1 ) ;
484
+ let metadata = loader. finish ( ) ;
485
+ assert ! ( metadata. offset_index( ) . is_some( ) && metadata. column_index( ) . is_some( ) ) ;
372
486
}
373
487
}
0 commit comments