Skip to content

Commit ea603d4

Browse files
committed
Implement reading metadata
1 parent faf0b9e commit ea603d4

File tree

1 file changed

+23
-14
lines changed

1 file changed

+23
-14
lines changed

parquet/examples/external_metadata.rs

Lines changed: 23 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -15,10 +15,11 @@
1515
// specific language governing permissions and limitations
1616
// under the License.
1717

18-
use std::path::Path;
1918
use arrow_array::RecordBatch;
20-
use arrow_cast::pretty::{pretty_format_batches};
19+
use arrow_cast::pretty::pretty_format_batches;
20+
use parquet::arrow::ParquetRecordBatchStreamBuilder;
2121
use parquet::file::metadata::ParquetMetaData;
22+
use std::path::Path;
2223

2324
/// This example demonstrates advanced usage of Parquet metadata.
2425
///
@@ -45,6 +46,10 @@ async fn main() -> parquet::errors::Result<()> {
4546
let metadata_path = "thift_metadata.dat"; // todo tempdir for now use local file to inspect it
4647

4748
let metadata = get_metadata_from_parquet_file(&parquet_path).await;
49+
println!(
50+
"Read metadata from Parquet file into memory: {} bytes",
51+
metadata.memory_size()
52+
);
4853
let metadata = prepare_metadata(metadata);
4954
write_metadata_to_file(metadata, &metadata_path);
5055

@@ -53,22 +58,25 @@ async fn main() -> parquet::errors::Result<()> {
5358
let batches = read_parquet_file_with_metadata(&parquet_path, metadata);
5459

5560
// display the results
56-
let batches_string = pretty_format_batches(&batches).unwrap()
57-
.to_string();
58-
let batches_lines :Vec<_> = batches_string
59-
.split('\n')
60-
.collect();
61+
let batches_string = pretty_format_batches(&batches).unwrap().to_string();
62+
let batches_lines: Vec<_> = batches_string.split('\n').collect();
6163

62-
assert_eq!(batches_lines,
63-
vec!["todo"]
64-
);
64+
assert_eq!(batches_lines, vec!["todo"]);
6565

6666
Ok(())
6767
}
6868

6969
/// Reads the metadata from a parquet file
7070
async fn get_metadata_from_parquet_file(file: impl AsRef<Path>) -> ParquetMetaData {
71-
todo!();
71+
// pretend we are reading the metadata from a remote object store
72+
let file = std::fs::File::open(file).unwrap();
73+
let file = tokio::fs::File::from_std(file);
74+
75+
let builder = ParquetRecordBatchStreamBuilder::new(file).await.unwrap();
76+
77+
// The metadata is Arc'd -- since we are going to modify it we
78+
// need to clone it
79+
builder.metadata().as_ref().clone()
7280
}
7381

7482
/// modifies the metadata to reduce its size
@@ -98,8 +106,9 @@ fn read_metadata_from_file(file: impl AsRef<Path>) -> ParquetMetaData {
98106
/// beginning to read it.
99107
///
100108
/// In this example, we read the results as Arrow record batches
101-
fn read_parquet_file_with_metadata(file: impl AsRef<Path>, metadata: ParquetMetaData) -> Vec<RecordBatch>{
109+
fn read_parquet_file_with_metadata(
110+
file: impl AsRef<Path>,
111+
metadata: ParquetMetaData,
112+
) -> Vec<RecordBatch> {
102113
todo!();
103114
}
104-
105-

0 commit comments

Comments
 (0)