Skip to content

Commit f3e7e78

Browse files
committed
Add a test for Bloom Filters written at the end
1 parent f23759a commit f3e7e78

File tree

4 files changed

+25
-4
lines changed

4 files changed

+25
-4
lines changed

parquet/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,7 @@ half = { version = "2.1", default-features = false, features = ["num-traits"] }
7070

7171
dsi-progress-logger = { version = "0.2.4", optional = true }
7272
simplelog = { version = "0.12.2", optional = true }
73+
backtrace = "0.3.72"
7374

7475
[dev-dependencies]
7576
base64 = { version = "0.22", default-features = false, features = ["std"] }

parquet/examples/write_parquet.rs

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -26,22 +26,23 @@ use arrow::datatypes::{Field, Schema};
2626
use parquet::arrow::ArrowWriter as ParquetWriter;
2727
use parquet::basic::Encoding;
2828
use parquet::errors::Result;
29-
use parquet::file::properties::WriterProperties;
29+
use parquet::file::properties::{BloomFilterPosition, WriterProperties};
3030

3131
fn main() -> Result<()> {
3232
let _ = simplelog::SimpleLogger::init(simplelog::LevelFilter::Info, Default::default());
3333

3434
let properties = WriterProperties::builder()
3535
.set_column_bloom_filter_enabled("id".into(), true)
3636
.set_column_encoding("id".into(), Encoding::DELTA_BINARY_PACKED)
37+
.set_bloom_filter_position(BloomFilterPosition::End)
3738
.build();
3839
let schema = Arc::new(Schema::new(vec![Field::new("id", UInt64, false)]));
3940
// Create parquet file that will be read.
4041
let path = "/tmp/test.parquet";
4142
let file = File::create(path).unwrap();
4243
let mut writer = ParquetWriter::try_new(file, schema.clone(), Some(properties))?;
4344

44-
let num_iterations = 3000;
45+
let num_iterations = 10;
4546
let mut pl = progress_logger!(
4647
item_name = "iterations",
4748
display_memory = true,

parquet/src/arrow/arrow_writer/mod.rs

Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1689,6 +1689,7 @@ mod tests {
16891689
values: ArrayRef,
16901690
schema: SchemaRef,
16911691
bloom_filter: bool,
1692+
bloom_filter_position: BloomFilterPosition,
16921693
}
16931694

16941695
impl RoundTripOptions {
@@ -1699,6 +1700,7 @@ mod tests {
16991700
values,
17001701
schema: Arc::new(schema),
17011702
bloom_filter: false,
1703+
bloom_filter_position: BloomFilterPosition::AfterRowGroup,
17021704
}
17031705
}
17041706
}
@@ -1718,6 +1720,7 @@ mod tests {
17181720
values,
17191721
schema,
17201722
bloom_filter,
1723+
bloom_filter_position,
17211724
} = options;
17221725

17231726
let encodings = match values.data_type() {
@@ -1758,7 +1761,7 @@ mod tests {
17581761
.set_dictionary_page_size_limit(dictionary_size.max(1))
17591762
.set_encoding(*encoding)
17601763
.set_bloom_filter_enabled(bloom_filter)
1761-
.set_bloom_filter_position(BloomFilterPosition::AfterRowGroup)
1764+
.set_bloom_filter_position(bloom_filter_position)
17621765
.build();
17631766

17641767
files.push(roundtrip_opts(&expected_batch, props))
@@ -2106,6 +2109,22 @@ mod tests {
21062109
values_required::<BinaryArray, _>(many_vecs_iter);
21072110
}
21082111

2112+
#[test]
2113+
fn i32_column_bloom_filter_at_end() {
2114+
let array = Arc::new(Int32Array::from_iter(0..SMALL_SIZE as i32));
2115+
let mut options = RoundTripOptions::new(array, false);
2116+
options.bloom_filter = true;
2117+
options.bloom_filter_position = BloomFilterPosition::End;
2118+
2119+
let files = one_column_roundtrip_with_options(options);
2120+
check_bloom_filter(
2121+
files,
2122+
"col".to_string(),
2123+
(0..SMALL_SIZE as i32).collect(),
2124+
(SMALL_SIZE as i32 + 1..SMALL_SIZE as i32 + 10).collect(),
2125+
);
2126+
}
2127+
21092128
#[test]
21102129
fn i32_column_bloom_filter() {
21112130
let array = Arc::new(Int32Array::from_iter(0..SMALL_SIZE as i32));

0 commit comments

Comments
 (0)