Skip to content

Commit 759767b

Browse files
committed
Add example script to write Parquet files with a Bloom filter
1 parent ada986c commit 759767b

File tree

2 files changed

+81
-0
lines changed

2 files changed

+81
-0
lines changed

parquet/Cargo.toml

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,9 @@ twox-hash = { version = "1.6", default-features = false }
6868
paste = { version = "1.0" }
6969
half = { version = "2.1", default-features = false, features = ["num-traits"] }
7070

71+
dsi-progress-logger = { version = "0.2.4", optional = true }
72+
simplelog = { version = "0.12.2", optional = true }
73+
7174
[dev-dependencies]
7275
base64 = { version = "0.22", default-features = false, features = ["std"] }
7376
criterion = { version = "0.5", default-features = false }
@@ -104,12 +107,19 @@ experimental = []
104107
async = ["futures", "tokio"]
105108
# Enable object_store integration
106109
object_store = ["dep:object_store", "async"]
110+
# Enable progress logging
111+
log = ["dep:simplelog", "dep:dsi-progress-logger"]
107112

108113
[[example]]
109114
name = "read_parquet"
110115
required-features = ["arrow"]
111116
path = "./examples/read_parquet.rs"
112117

118+
[[example]]
119+
name = "write_parquet"
120+
required-features = ["log"]
121+
path = "./examples/write_parquet.rs"
122+
113123
[[example]]
114124
name = "async_read_parquet"
115125
required-features = ["arrow", "async"]

parquet/examples/write_parquet.rs

Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,71 @@
1+
// Licensed to the Apache Software Foundation (ASF) under one
2+
// or more contributor license agreements. See the NOTICE file
3+
// distributed with this work for additional information
4+
// regarding copyright ownership. The ASF licenses this file
5+
// to you under the Apache License, Version 2.0 (the
6+
// "License"); you may not use this file except in compliance
7+
// with the License. You may obtain a copy of the License at
8+
//
9+
// http://www.apache.org/licenses/LICENSE-2.0
10+
//
11+
// Unless required by applicable law or agreed to in writing,
12+
// software distributed under the License is distributed on an
13+
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
// KIND, either express or implied. See the License for the
15+
// specific language governing permissions and limitations
16+
// under the License.
17+
18+
use std::fs::File;
19+
use std::sync::Arc;
20+
21+
use dsi_progress_logger::prelude::*;
22+
23+
use arrow::array::{StructArray, UInt64Builder};
24+
use arrow::datatypes::DataType::UInt64;
25+
use arrow::datatypes::{Field, Schema};
26+
use parquet::arrow::ArrowWriter as ParquetWriter;
27+
use parquet::basic::Encoding;
28+
use parquet::errors::Result;
29+
use parquet::file::properties::WriterProperties;
30+
31+
fn main() -> Result<()> {
32+
let _ = simplelog::SimpleLogger::init(simplelog::LevelFilter::Info, Default::default());
33+
34+
let properties = WriterProperties::builder()
35+
.set_column_bloom_filter_enabled("id".into(), true)
36+
.set_column_encoding("id".into(), Encoding::DELTA_BINARY_PACKED)
37+
.build();
38+
let schema = Arc::new(Schema::new(vec![Field::new("id", UInt64, false)]));
39+
// Create parquet file that will be read.
40+
let path = "/tmp/test.parquet";
41+
let file = File::create(path).unwrap();
42+
let mut writer = ParquetWriter::try_new(file, schema.clone(), Some(properties))?;
43+
44+
let num_iterations = 3000;
45+
let mut pl = progress_logger!(
46+
item_name = "iterations",
47+
display_memory = true,
48+
expected_updates = Some(num_iterations as usize)
49+
);
50+
pl.start("Writing batches");
51+
let mut array_builder = UInt64Builder::new();
52+
for i in 0..num_iterations {
53+
pl.update();
54+
for j in 0..1_000_000 {
55+
array_builder.append_value(i + j);
56+
}
57+
writer.write(
58+
&StructArray::new(
59+
schema.fields().clone(),
60+
vec![Arc::new(array_builder.finish())],
61+
None,
62+
)
63+
.into(),
64+
)?;
65+
}
66+
writer.flush()?;
67+
writer.close()?;
68+
pl.done();
69+
70+
Ok(())
71+
}

0 commit comments

Comments
 (0)