@@ -53,6 +53,8 @@ pub const DEFAULT_BLOOM_FILTER_FPP: f64 = 0.05;
53
53
pub const DEFAULT_BLOOM_FILTER_NDV : u64 = 1_000_000_u64 ;
54
54
/// Default values for [`WriterProperties::statistics_truncate_length`]
55
55
pub const DEFAULT_STATISTICS_TRUNCATE_LENGTH : Option < usize > = None ;
56
+ /// Default values for [`WriterProperties::coerce_types`]
57
+ pub const DEFAULT_COERCE_TYPES : bool = false ;
56
58
57
59
/// Parquet writer version.
58
60
///
@@ -139,6 +141,7 @@ pub struct WriterProperties {
139
141
sorting_columns : Option < Vec < SortingColumn > > ,
140
142
column_index_truncate_length : Option < usize > ,
141
143
statistics_truncate_length : Option < usize > ,
144
+ coerce_types : bool ,
142
145
}
143
146
144
147
impl Default for WriterProperties {
@@ -251,6 +254,13 @@ impl WriterProperties {
251
254
self . statistics_truncate_length
252
255
}
253
256
257
+ /// Returns `coerce_types` boolean
258
+ ///
259
+ /// `true` if type coercion enabled.
260
+ pub fn coerce_types ( & self ) -> bool {
261
+ self . coerce_types
262
+ }
263
+
254
264
/// Returns encoding for a data page, when dictionary encoding is enabled.
255
265
/// This is not configurable.
256
266
#[ inline]
@@ -345,6 +355,7 @@ pub struct WriterPropertiesBuilder {
345
355
sorting_columns : Option < Vec < SortingColumn > > ,
346
356
column_index_truncate_length : Option < usize > ,
347
357
statistics_truncate_length : Option < usize > ,
358
+ coerce_types : bool ,
348
359
}
349
360
350
361
impl WriterPropertiesBuilder {
@@ -364,6 +375,7 @@ impl WriterPropertiesBuilder {
364
375
sorting_columns : None ,
365
376
column_index_truncate_length : DEFAULT_COLUMN_INDEX_TRUNCATE_LENGTH ,
366
377
statistics_truncate_length : DEFAULT_STATISTICS_TRUNCATE_LENGTH ,
378
+ coerce_types : DEFAULT_COERCE_TYPES ,
367
379
}
368
380
}
369
381
@@ -383,6 +395,7 @@ impl WriterPropertiesBuilder {
383
395
sorting_columns : self . sorting_columns ,
384
396
column_index_truncate_length : self . column_index_truncate_length ,
385
397
statistics_truncate_length : self . statistics_truncate_length ,
398
+ coerce_types : self . coerce_types ,
386
399
}
387
400
}
388
401
@@ -667,6 +680,13 @@ impl WriterPropertiesBuilder {
667
680
self . statistics_truncate_length = max_length;
668
681
self
669
682
}
683
+
684
+ /// Sets flag to enable/disable type coercion.
685
+ /// Takes precedence over globally defined settings.
686
+ pub fn set_coerce_types ( mut self , coerce_types : bool ) -> Self {
687
+ self . coerce_types = coerce_types;
688
+ self
689
+ }
670
690
}
671
691
672
692
/// Controls the level of statistics to be computed by the writer
@@ -1092,7 +1112,7 @@ mod tests {
1092
1112
assert_eq ! (
1093
1113
props. key_value_metadata( ) ,
1094
1114
Some ( & vec![
1095
- KeyValue :: new( "key" . to_string( ) , "value" . to_string( ) , )
1115
+ KeyValue :: new( "key" . to_string( ) , "value" . to_string( ) )
1096
1116
] )
1097
1117
) ;
1098
1118
@@ -1156,7 +1176,7 @@ mod tests {
1156
1176
props. bloom_filter_properties( & ColumnPath :: from( "col" ) ) ,
1157
1177
Some ( & BloomFilterProperties {
1158
1178
fpp: 0.05 ,
1159
- ndv: 1_000_000_u64
1179
+ ndv: 1_000_000_u64 ,
1160
1180
} )
1161
1181
) ;
1162
1182
}
@@ -1176,7 +1196,7 @@ mod tests {
1176
1196
. bloom_filter_properties( & ColumnPath :: from( "col" ) ) ,
1177
1197
Some ( & BloomFilterProperties {
1178
1198
fpp: 0.05 ,
1179
- ndv: 100
1199
+ ndv: 100 ,
1180
1200
} )
1181
1201
) ;
1182
1202
assert_eq ! (
@@ -1186,7 +1206,7 @@ mod tests {
1186
1206
. bloom_filter_properties( & ColumnPath :: from( "col" ) ) ,
1187
1207
Some ( & BloomFilterProperties {
1188
1208
fpp: 0.1 ,
1189
- ndv: 1_000_000_u64
1209
+ ndv: 1_000_000_u64 ,
1190
1210
} )
1191
1211
) ;
1192
1212
}
0 commit comments