Skip to content

Commit b010fc6

Browse files
committed
Add coerce_types flag to parquet WriterProperties (apache#1938)
1 parent 36a6e51 commit b010fc6

File tree

1 file changed

+24
-4
lines changed

1 file changed

+24
-4
lines changed

parquet/src/file/properties.rs

Lines changed: 24 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,8 @@ pub const DEFAULT_BLOOM_FILTER_FPP: f64 = 0.05;
5353
pub const DEFAULT_BLOOM_FILTER_NDV: u64 = 1_000_000_u64;
5454
/// Default values for [`WriterProperties::statistics_truncate_length`]
5555
pub const DEFAULT_STATISTICS_TRUNCATE_LENGTH: Option<usize> = None;
56+
/// Default values for [`WriterProperties::coerce_types`]
57+
pub const DEFAULT_COERCE_TYPES: bool = false;
5658

5759
/// Parquet writer version.
5860
///
@@ -139,6 +141,7 @@ pub struct WriterProperties {
139141
sorting_columns: Option<Vec<SortingColumn>>,
140142
column_index_truncate_length: Option<usize>,
141143
statistics_truncate_length: Option<usize>,
144+
coerce_types: bool,
142145
}
143146

144147
impl Default for WriterProperties {
@@ -251,6 +254,13 @@ impl WriterProperties {
251254
self.statistics_truncate_length
252255
}
253256

257+
/// Returns `coerce_types` boolean
258+
///
259+
/// `true` if type coercion enabled.
260+
pub fn coerce_types(&self) -> bool {
261+
self.coerce_types
262+
}
263+
254264
/// Returns encoding for a data page, when dictionary encoding is enabled.
255265
/// This is not configurable.
256266
#[inline]
@@ -345,6 +355,7 @@ pub struct WriterPropertiesBuilder {
345355
sorting_columns: Option<Vec<SortingColumn>>,
346356
column_index_truncate_length: Option<usize>,
347357
statistics_truncate_length: Option<usize>,
358+
coerce_types: bool,
348359
}
349360

350361
impl WriterPropertiesBuilder {
@@ -364,6 +375,7 @@ impl WriterPropertiesBuilder {
364375
sorting_columns: None,
365376
column_index_truncate_length: DEFAULT_COLUMN_INDEX_TRUNCATE_LENGTH,
366377
statistics_truncate_length: DEFAULT_STATISTICS_TRUNCATE_LENGTH,
378+
coerce_types: DEFAULT_COERCE_TYPES,
367379
}
368380
}
369381

@@ -383,6 +395,7 @@ impl WriterPropertiesBuilder {
383395
sorting_columns: self.sorting_columns,
384396
column_index_truncate_length: self.column_index_truncate_length,
385397
statistics_truncate_length: self.statistics_truncate_length,
398+
coerce_types: self.coerce_types,
386399
}
387400
}
388401

@@ -667,6 +680,13 @@ impl WriterPropertiesBuilder {
667680
self.statistics_truncate_length = max_length;
668681
self
669682
}
683+
684+
/// Sets flag to enable/disable type coercion.
685+
/// Takes precedence over globally defined settings.
686+
pub fn set_coerce_types(mut self, coerce_types: bool) -> Self {
687+
self.coerce_types = coerce_types;
688+
self
689+
}
670690
}
671691

672692
/// Controls the level of statistics to be computed by the writer
@@ -1092,7 +1112,7 @@ mod tests {
10921112
assert_eq!(
10931113
props.key_value_metadata(),
10941114
Some(&vec![
1095-
KeyValue::new("key".to_string(), "value".to_string(),)
1115+
KeyValue::new("key".to_string(), "value".to_string())
10961116
])
10971117
);
10981118

@@ -1156,7 +1176,7 @@ mod tests {
11561176
props.bloom_filter_properties(&ColumnPath::from("col")),
11571177
Some(&BloomFilterProperties {
11581178
fpp: 0.05,
1159-
ndv: 1_000_000_u64
1179+
ndv: 1_000_000_u64,
11601180
})
11611181
);
11621182
}
@@ -1176,7 +1196,7 @@ mod tests {
11761196
.bloom_filter_properties(&ColumnPath::from("col")),
11771197
Some(&BloomFilterProperties {
11781198
fpp: 0.05,
1179-
ndv: 100
1199+
ndv: 100,
11801200
})
11811201
);
11821202
assert_eq!(
@@ -1186,7 +1206,7 @@ mod tests {
11861206
.bloom_filter_properties(&ColumnPath::from("col")),
11871207
Some(&BloomFilterProperties {
11881208
fpp: 0.1,
1189-
ndv: 1_000_000_u64
1209+
ndv: 1_000_000_u64,
11901210
})
11911211
);
11921212
}

0 commit comments

Comments
 (0)