15
15
// specific language governing permissions and limitations
16
16
// under the License.
17
17
18
- use std:: path:: Path ;
19
18
use arrow_array:: RecordBatch ;
20
- use arrow_cast:: pretty:: { pretty_format_batches} ;
19
+ use arrow_cast:: pretty:: pretty_format_batches;
20
+ use parquet:: arrow:: ParquetRecordBatchStreamBuilder ;
21
21
use parquet:: file:: metadata:: ParquetMetaData ;
22
+ use std:: path:: Path ;
22
23
23
24
/// This example demonstrates advanced usage of Parquet metadata.
24
25
///
@@ -45,6 +46,10 @@ async fn main() -> parquet::errors::Result<()> {
45
46
let metadata_path = "thift_metadata.dat" ; // todo tempdir for now use local file to inspect it
46
47
47
48
let metadata = get_metadata_from_parquet_file ( & parquet_path) . await ;
49
+ println ! (
50
+ "Read metadata from Parquet file into memory: {} bytes" ,
51
+ metadata. memory_size( )
52
+ ) ;
48
53
let metadata = prepare_metadata ( metadata) ;
49
54
write_metadata_to_file ( metadata, & metadata_path) ;
50
55
@@ -53,22 +58,25 @@ async fn main() -> parquet::errors::Result<()> {
53
58
let batches = read_parquet_file_with_metadata ( & parquet_path, metadata) ;
54
59
55
60
// display the results
56
- let batches_string = pretty_format_batches ( & batches) . unwrap ( )
57
- . to_string ( ) ;
58
- let batches_lines : Vec < _ > = batches_string
59
- . split ( '\n' )
60
- . collect ( ) ;
61
+ let batches_string = pretty_format_batches ( & batches) . unwrap ( ) . to_string ( ) ;
62
+ let batches_lines: Vec < _ > = batches_string. split ( '\n' ) . collect ( ) ;
61
63
62
- assert_eq ! ( batches_lines,
63
- vec![ "todo" ]
64
- ) ;
64
+ assert_eq ! ( batches_lines, vec![ "todo" ] ) ;
65
65
66
66
Ok ( ( ) )
67
67
}
68
68
69
69
/// Reads the metadata from a parquet file
70
70
async fn get_metadata_from_parquet_file ( file : impl AsRef < Path > ) -> ParquetMetaData {
71
- todo ! ( ) ;
71
+ // pretend we are reading the metadata from a remote object store
72
+ let file = std:: fs:: File :: open ( file) . unwrap ( ) ;
73
+ let file = tokio:: fs:: File :: from_std ( file) ;
74
+
75
+ let builder = ParquetRecordBatchStreamBuilder :: new ( file) . await . unwrap ( ) ;
76
+
77
+ // The metadata is Arc'd -- since we are going to modify it we
78
+ // need to clone it
79
+ builder. metadata ( ) . as_ref ( ) . clone ( )
72
80
}
73
81
74
82
/// modifies the metadata to reduce its size
@@ -98,8 +106,9 @@ fn read_metadata_from_file(file: impl AsRef<Path>) -> ParquetMetaData {
98
106
/// beginning to read it.
99
107
///
100
108
/// In this example, we read the results as Arrow record batches
101
- fn read_parquet_file_with_metadata ( file : impl AsRef < Path > , metadata : ParquetMetaData ) -> Vec < RecordBatch > {
109
+ fn read_parquet_file_with_metadata (
110
+ file : impl AsRef < Path > ,
111
+ metadata : ParquetMetaData ,
112
+ ) -> Vec < RecordBatch > {
102
113
todo ! ( ) ;
103
114
}
104
-
105
-
0 commit comments