15
15
// specific language governing permissions and limitations
16
16
// under the License.
17
17
18
- use crate :: { RowConverter , Rows , SortField } ;
19
- use arrow_array:: builder:: BufferBuilder ;
18
+ use crate :: { null_sentinel, RowConverter , Rows , SortField } ;
20
19
use arrow_array:: { Array , GenericListArray , OffsetSizeTrait } ;
20
+ use arrow_buffer:: { Buffer , MutableBuffer } ;
21
21
use arrow_data:: ArrayDataBuilder ;
22
22
use arrow_schema:: { ArrowError , SortOptions } ;
23
23
use std:: ops:: Range ;
@@ -43,12 +43,10 @@ pub fn compute_lengths<O: OffsetSizeTrait>(
43
43
fn encoded_len ( rows : & Rows , range : Option < Range < usize > > ) -> usize {
44
44
match range {
45
45
None => 1 ,
46
- Some ( range) if range. start == range. end => 1 ,
47
46
Some ( range) => {
48
- let element_count = range. end - range. start ;
49
- let row_bytes = range. map ( |i| rows. row ( i) . as_ref ( ) . len ( ) ) . sum :: < usize > ( ) ;
50
- let total = ( 1 + element_count) * std:: mem:: size_of :: < u32 > ( ) + row_bytes;
51
- super :: variable:: padded_length ( Some ( total) )
47
+ 1 + range
48
+ . map ( |i| super :: variable:: padded_length ( Some ( rows. row ( i) . as_ref ( ) . len ( ) ) ) )
49
+ . sum :: < usize > ( )
52
50
}
53
51
}
54
52
}
@@ -63,7 +61,6 @@ pub fn encode<O: OffsetSizeTrait>(
63
61
opts : SortOptions ,
64
62
array : & GenericListArray < O > ,
65
63
) {
66
- let mut temporary = vec ! [ ] ;
67
64
offsets
68
65
. iter_mut ( )
69
66
. skip ( 1 )
@@ -74,42 +71,28 @@ pub fn encode<O: OffsetSizeTrait>(
74
71
let end = offsets[ 1 ] . as_usize ( ) ;
75
72
let range = array. is_valid ( idx) . then_some ( start..end) ;
76
73
let out = & mut data[ * offset..] ;
77
- * offset += encode_one ( out, & mut temporary , rows, range, opts)
74
+ * offset += encode_one ( out, rows, range, opts)
78
75
} ) ;
79
76
}
80
77
81
78
#[ inline]
82
79
fn encode_one (
83
80
out : & mut [ u8 ] ,
84
- temporary : & mut Vec < u8 > ,
85
81
rows : & Rows ,
86
82
range : Option < Range < usize > > ,
87
83
opts : SortOptions ,
88
84
) -> usize {
89
- temporary. clear ( ) ;
90
-
91
85
match range {
92
- None => super :: variable:: encode_one ( out, None , opts) ,
93
- Some ( range) if range. start == range. end => {
94
- super :: variable:: encode_one ( out, Some ( & [ ] ) , opts)
95
- }
86
+ None => super :: variable:: encode_null ( out, opts) ,
87
+ Some ( range) if range. start == range. end => super :: variable:: encode_empty ( out, opts) ,
96
88
Some ( range) => {
97
- for row in range. clone ( ) . map ( |i| rows. row ( i) ) {
98
- temporary. extend_from_slice ( row. as_ref ( ) ) ;
99
- }
100
- for row in range. clone ( ) . map ( |i| rows. row ( i) ) {
101
- let len: u32 = row
102
- . as_ref ( )
103
- . len ( )
104
- . try_into ( )
105
- . expect ( "ListArray or LargeListArray containing a list of more than u32::MAX items is not supported" ) ;
106
- temporary. extend_from_slice ( & len. to_be_bytes ( ) ) ;
89
+ let mut offset = 0 ;
90
+ for i in range {
91
+ let row = rows. row ( i) ;
92
+ offset += super :: variable:: encode_one ( & mut out[ offset..] , Some ( row. data ) , opts) ;
107
93
}
108
- let row_count: u32 = ( range. end - range. start )
109
- . try_into ( )
110
- . expect ( "lists containing more than u32::MAX elements not supported" ) ;
111
- temporary. extend_from_slice ( & row_count. to_be_bytes ( ) ) ;
112
- super :: variable:: encode_one ( out, Some ( temporary) , opts)
94
+ offset += super :: variable:: encode_empty ( & mut out[ offset..] , opts) ;
95
+ offset
113
96
}
114
97
}
115
98
}
@@ -125,50 +108,78 @@ pub unsafe fn decode<O: OffsetSizeTrait>(
125
108
field : & SortField ,
126
109
validate_utf8 : bool ,
127
110
) -> Result < GenericListArray < O > , ArrowError > {
128
- let canonical = super :: variable:: decode_binary :: < i64 > ( rows, field. options ) ;
129
-
130
- let mut offsets = BufferBuilder :: < O > :: new ( rows. len ( ) + 1 ) ;
131
- offsets. append ( O :: from_usize ( 0 ) . unwrap ( ) ) ;
132
- let mut current_offset = 0 ;
133
-
134
- let mut child_rows = Vec :: with_capacity ( rows. len ( ) ) ;
135
- canonical. value_offsets ( ) . windows ( 2 ) . for_each ( |w| {
136
- let start = w[ 0 ] as usize ;
137
- let end = w[ 1 ] as usize ;
138
- if start == end {
139
- // Null or empty list
140
- offsets. append ( O :: from_usize ( current_offset) . unwrap ( ) ) ;
141
- return ;
142
- }
111
+ let opts = field. options ;
112
+
113
+ let mut values_bytes = 0 ;
143
114
144
- let row = & canonical. value_data ( ) [ start..end] ;
145
- let element_count_start = row. len ( ) - 4 ;
146
- let element_count =
147
- u32:: from_be_bytes ( ( & row[ element_count_start..] ) . try_into ( ) . unwrap ( ) ) as usize ;
115
+ let mut offset = 0 ;
116
+ let mut offsets = Vec :: with_capacity ( rows. len ( ) + 1 ) ;
117
+ offsets. push ( O :: usize_as ( 0 ) ) ;
148
118
149
- let lengths_start = element_count_start - ( element_count * 4 ) ;
119
+ for row in rows . iter_mut ( ) {
150
120
let mut row_offset = 0 ;
151
- row[ lengths_start..element_count_start]
152
- . chunks_exact ( 4 )
153
- . for_each ( |chunk| {
154
- let len = u32:: from_be_bytes ( chunk. try_into ( ) . unwrap ( ) ) ;
155
- let next_row_offset = row_offset + len as usize ;
156
- child_rows. push ( & row[ row_offset..next_row_offset] ) ;
157
- row_offset = next_row_offset;
121
+ loop {
122
+ let decoded = super :: variable:: decode_blocks ( & row[ row_offset..] , opts, |x| {
123
+ values_bytes += x. len ( ) ;
158
124
} ) ;
125
+ if decoded <= 1 {
126
+ offsets. push ( O :: usize_as ( offset) ) ;
127
+ break ;
128
+ }
129
+ row_offset += decoded;
130
+ offset += 1 ;
131
+ }
132
+ }
133
+ O :: from_usize ( offset) . expect ( "overflow" ) ;
159
134
160
- current_offset += element_count;
161
- offsets. append ( O :: from_usize ( current_offset) . unwrap ( ) ) ;
135
+ let mut null_count = 0 ;
136
+ let nulls = MutableBuffer :: collect_bool ( rows. len ( ) , |x| {
137
+ let valid = rows[ x] [ 0 ] != null_sentinel ( opts) ;
138
+ null_count += !valid as usize ;
139
+ valid
162
140
} ) ;
163
141
142
+ let mut values_offsets = Vec :: with_capacity ( offset) ;
143
+ let mut values_bytes = Vec :: with_capacity ( values_bytes) ;
144
+ for row in rows. iter_mut ( ) {
145
+ let mut row_offset = 0 ;
146
+ loop {
147
+ let decoded = super :: variable:: decode_blocks ( & row[ row_offset..] , opts, |x| {
148
+ values_bytes. extend_from_slice ( x)
149
+ } ) ;
150
+ row_offset += decoded;
151
+ if decoded <= 1 {
152
+ break ;
153
+ }
154
+ values_offsets. push ( values_bytes. len ( ) ) ;
155
+ }
156
+ * row = & row[ row_offset..] ;
157
+ }
158
+
159
+ if opts. descending {
160
+ values_bytes. iter_mut ( ) . for_each ( |o| * o = !* o) ;
161
+ }
162
+
163
+ let mut last_value_offset = 0 ;
164
+ let mut child_rows: Vec < _ > = values_offsets
165
+ . into_iter ( )
166
+ . map ( |offset| {
167
+ let v = & values_bytes[ last_value_offset..offset] ;
168
+ last_value_offset = offset;
169
+ v
170
+ } )
171
+ . collect ( ) ;
172
+
164
173
let child = converter. convert_raw ( & mut child_rows, validate_utf8) ?;
165
174
assert_eq ! ( child. len( ) , 1 ) ;
175
+
166
176
let child_data = child[ 0 ] . to_data ( ) ;
167
177
168
178
let builder = ArrayDataBuilder :: new ( field. data_type . clone ( ) )
169
179
. len ( rows. len ( ) )
170
- . nulls ( canonical. nulls ( ) . cloned ( ) )
171
- . add_buffer ( offsets. finish ( ) )
180
+ . null_count ( null_count)
181
+ . null_bit_buffer ( Some ( nulls. into ( ) ) )
182
+ . add_buffer ( Buffer :: from_vec ( offsets) )
172
183
. add_child_data ( child_data) ;
173
184
174
185
Ok ( GenericListArray :: from ( unsafe { builder. build_unchecked ( ) } ) )
0 commit comments