1
1
use arrow:: array:: { Array , BinaryViewArray , DictionaryArray , DictionaryKey , Utf8ViewArray } ;
2
2
use arrow:: bitmap:: { Bitmap , MutableBitmap } ;
3
3
use arrow:: datatypes:: { ArrowDataType , IntegerType } ;
4
+ use num_traits:: ToPrimitive ;
4
5
use polars_error:: { polars_bail, PolarsResult } ;
5
6
6
7
use super :: binary:: {
@@ -15,19 +16,23 @@ use super::primitive::{
15
16
use super :: { binview, nested, Nested , WriteOptions } ;
16
17
use crate :: arrow:: read:: schema:: is_nullable;
17
18
use crate :: arrow:: write:: { slice_nested_leaf, utils} ;
18
- use crate :: parquet:: encoding:: hybrid_rle:: encode ;
19
+ use crate :: parquet:: encoding:: hybrid_rle:: encode_u32 ;
19
20
use crate :: parquet:: encoding:: Encoding ;
20
21
use crate :: parquet:: page:: { DictPage , Page } ;
21
22
use crate :: parquet:: schema:: types:: PrimitiveType ;
22
23
use crate :: parquet:: statistics:: { serialize_statistics, ParquetStatistics } ;
23
- use crate :: write:: DynIter ;
24
+ use crate :: write:: { to_nested , DynIter , ParquetType } ;
24
25
25
26
pub ( crate ) fn encode_as_dictionary_optional (
26
27
array : & dyn Array ,
27
- nested : & [ Nested ] ,
28
28
type_ : PrimitiveType ,
29
29
options : WriteOptions ,
30
30
) -> Option < PolarsResult < DynIter < ' static , PolarsResult < Page > > > > {
31
+ let nested = to_nested ( array, & ParquetType :: PrimitiveType ( type_. clone ( ) ) )
32
+ . ok ( ) ?
33
+ . pop ( )
34
+ . unwrap ( ) ;
35
+
31
36
let dtype = Box :: new ( array. data_type ( ) . clone ( ) ) ;
32
37
33
38
let len_before = array. len ( ) ;
@@ -47,11 +52,35 @@ pub(crate) fn encode_as_dictionary_optional(
47
52
if ( array. values ( ) . len ( ) as f64 ) / ( len_before as f64 ) > 0.75 {
48
53
return None ;
49
54
}
55
+ if array. values ( ) . len ( ) . to_u16 ( ) . is_some ( ) {
56
+ let array = arrow:: compute:: cast:: cast (
57
+ array,
58
+ & ArrowDataType :: Dictionary (
59
+ IntegerType :: UInt16 ,
60
+ Box :: new ( array. values ( ) . data_type ( ) . clone ( ) ) ,
61
+ false ,
62
+ ) ,
63
+ Default :: default ( ) ,
64
+ )
65
+ . unwrap ( ) ;
66
+
67
+ let array = array
68
+ . as_any ( )
69
+ . downcast_ref :: < DictionaryArray < u16 > > ( )
70
+ . unwrap ( ) ;
71
+ return Some ( array_to_pages (
72
+ array,
73
+ type_,
74
+ & nested,
75
+ options,
76
+ Encoding :: RleDictionary ,
77
+ ) ) ;
78
+ }
50
79
51
80
Some ( array_to_pages (
52
81
array,
53
82
type_,
54
- nested,
83
+ & nested,
55
84
options,
56
85
Encoding :: RleDictionary ,
57
86
) )
@@ -87,15 +116,15 @@ fn serialize_keys_values<K: DictionaryKey>(
87
116
buffer. push ( num_bits as u8 ) ;
88
117
89
118
// followed by the encoded indices.
90
- Ok ( encode :: < u32 , _ , _ > ( buffer, keys, num_bits) ?)
119
+ Ok ( encode_u32 ( buffer, keys, num_bits) ?)
91
120
} else {
92
121
let num_bits = utils:: get_bit_width ( keys. clone ( ) . max ( ) . unwrap_or ( 0 ) as u64 ) ;
93
122
94
123
// num_bits as a single byte
95
124
buffer. push ( num_bits as u8 ) ;
96
125
97
126
// followed by the encoded indices.
98
- Ok ( encode :: < u32 , _ , _ > ( buffer, keys, num_bits) ?)
127
+ Ok ( encode_u32 ( buffer, keys, num_bits) ?)
99
128
}
100
129
}
101
130
0 commit comments