@@ -23,6 +23,7 @@ use bytes::Bytes;
23
23
use datafusion_physical_plan:: metrics:: ExecutionPlanMetricsSet ;
24
24
use futures:: future:: BoxFuture ;
25
25
use object_store:: ObjectStore ;
26
+ use parquet:: arrow:: arrow_reader:: { ArrowReaderMetadata , ArrowReaderOptions } ;
26
27
use parquet:: arrow:: async_reader:: { AsyncFileReader , ParquetObjectReader } ;
27
28
use parquet:: file:: metadata:: ParquetMetaData ;
28
29
use std:: fmt:: Debug ;
@@ -57,9 +58,49 @@ pub trait ParquetFileReaderFactory: Debug + Send + Sync + 'static {
57
58
file_meta : FileMeta ,
58
59
metadata_size_hint : Option < usize > ,
59
60
metrics : & ExecutionPlanMetricsSet ,
60
- ) -> datafusion_common:: Result < Box < dyn AsyncFileReader + Send > > ;
61
+ ) -> datafusion_common:: Result < Box < dyn ParquetFileReader > > ;
61
62
}
62
63
64
+ /// [`AsyncFileReader`] augmented with a method to customize how file metadata is loaded.
65
+ pub trait ParquetFileReader : AsyncFileReader + Send + ' static {
66
+ /// Returns a [`AsyncFileReader`] trait object
67
+ ///
68
+ /// This can usually be implemented as `Box::new(*self)`
69
+ fn upcast ( self : Box < Self > ) -> Box < dyn AsyncFileReader + ' static > ;
70
+
71
+ /// Parses the file's metadata
72
+ ///
73
+ /// The default implementation is:
74
+ ///
75
+ /// ```
76
+ /// Box::pin(ArrowReaderMetadata::load_async(self, options))
77
+ /// ```
78
+ fn load_metadata (
79
+ & mut self ,
80
+ options : ArrowReaderOptions ,
81
+ ) -> BoxFuture < ' _ , parquet:: errors:: Result < ArrowReaderMetadata > > ;
82
+ }
83
+
84
+ macro_rules! impl_ParquetFileReader {
85
+ ( $type: ty) => {
86
+ impl ParquetFileReader for $type {
87
+ fn upcast( self : Box <Self >) -> Box <dyn AsyncFileReader + ' static > {
88
+ Box :: new( * self )
89
+ }
90
+
91
+ fn load_metadata(
92
+ & mut self ,
93
+ options: ArrowReaderOptions ,
94
+ ) -> BoxFuture <' _, parquet:: errors:: Result <ArrowReaderMetadata >> {
95
+ Box :: pin( ArrowReaderMetadata :: load_async( self , options) )
96
+ }
97
+ }
98
+ } ;
99
+ }
100
+
101
+ impl_ParquetFileReader ! ( ParquetObjectReader ) ;
102
+ impl_ParquetFileReader ! ( DefaultParquetFileReader ) ;
103
+
63
104
/// Default implementation of [`ParquetFileReaderFactory`]
64
105
///
65
106
/// This implementation:
@@ -86,12 +127,12 @@ impl DefaultParquetFileReaderFactory {
86
127
/// This implementation does not coalesce I/O operations or cache bytes. Such
87
128
/// optimizations can be done either at the object store level or by providing a
88
129
/// custom implementation of [`ParquetFileReaderFactory`].
89
- pub ( crate ) struct ParquetFileReader {
130
+ pub ( crate ) struct DefaultParquetFileReader {
90
131
pub file_metrics : ParquetFileMetrics ,
91
132
pub inner : ParquetObjectReader ,
92
133
}
93
134
94
- impl AsyncFileReader for ParquetFileReader {
135
+ impl AsyncFileReader for DefaultParquetFileReader {
95
136
fn get_bytes (
96
137
& mut self ,
97
138
range : Range < usize > ,
@@ -126,7 +167,7 @@ impl ParquetFileReaderFactory for DefaultParquetFileReaderFactory {
126
167
file_meta : FileMeta ,
127
168
metadata_size_hint : Option < usize > ,
128
169
metrics : & ExecutionPlanMetricsSet ,
129
- ) -> datafusion_common:: Result < Box < dyn AsyncFileReader + Send > > {
170
+ ) -> datafusion_common:: Result < Box < dyn ParquetFileReader > > {
130
171
let file_metrics = ParquetFileMetrics :: new (
131
172
partition_index,
132
173
file_meta. location ( ) . as_ref ( ) ,
@@ -139,7 +180,7 @@ impl ParquetFileReaderFactory for DefaultParquetFileReaderFactory {
139
180
inner = inner. with_footer_size_hint ( hint)
140
181
} ;
141
182
142
- Ok ( Box :: new ( ParquetFileReader {
183
+ Ok ( Box :: new ( DefaultParquetFileReader {
143
184
inner,
144
185
file_metrics,
145
186
} ) )
0 commit comments