@@ -46,6 +46,8 @@ use crate::cast::dictionary::*;
46
46
use crate :: cast:: list:: * ;
47
47
use crate :: cast:: string:: * ;
48
48
49
+ use arrow_buffer:: ScalarBuffer ;
50
+ use arrow_data:: ByteView ;
49
51
use chrono:: { NaiveTime , Offset , TimeZone , Utc } ;
50
52
use std:: cmp:: Ordering ;
51
53
use std:: sync:: Arc ;
@@ -119,6 +121,8 @@ pub fn can_cast_types(from_type: &DataType, to_type: &DataType) -> bool {
119
121
| Utf8
120
122
| LargeBinary
121
123
| LargeUtf8
124
+ | BinaryView
125
+ | Utf8View
122
126
| List ( _)
123
127
| LargeList ( _)
124
128
| FixedSizeList ( _, _)
@@ -192,8 +196,8 @@ pub fn can_cast_types(from_type: &DataType, to_type: &DataType) -> bool {
192
196
DataType :: is_integer ( to_type) || DataType :: is_floating ( to_type) || to_type == & Utf8 || to_type == & LargeUtf8
193
197
}
194
198
195
- ( Binary , LargeBinary | Utf8 | LargeUtf8 | FixedSizeBinary ( _) ) => true ,
196
- ( LargeBinary , Binary | Utf8 | LargeUtf8 | FixedSizeBinary ( _) ) => true ,
199
+ ( Binary , LargeBinary | Utf8 | LargeUtf8 | FixedSizeBinary ( _) | BinaryView ) => true ,
200
+ ( LargeBinary , Binary | Utf8 | LargeUtf8 | FixedSizeBinary ( _) | BinaryView ) => true ,
197
201
( FixedSizeBinary ( _) , Binary | LargeBinary ) => true ,
198
202
(
199
203
Utf8 | LargeUtf8 ,
@@ -213,6 +217,7 @@ pub fn can_cast_types(from_type: &DataType, to_type: &DataType) -> bool {
213
217
| Timestamp ( Nanosecond , _)
214
218
| Interval ( _) ,
215
219
) => true ,
220
+ ( Utf8 | LargeUtf8 , Utf8View ) => true ,
216
221
( Utf8 | LargeUtf8 , _) => to_type. is_numeric ( ) && to_type != & Float16 ,
217
222
( _, Utf8 | LargeUtf8 ) => from_type. is_primitive ( ) ,
218
223
@@ -611,6 +616,8 @@ pub fn cast_with_options(
611
616
| Utf8
612
617
| LargeBinary
613
618
| LargeUtf8
619
+ | BinaryView
620
+ | Utf8View
614
621
| List ( _)
615
622
| LargeList ( _)
616
623
| FixedSizeList ( _, _)
@@ -1120,6 +1127,7 @@ pub fn cast_with_options(
1120
1127
let binary = BinaryArray :: from ( array. as_string :: < i32 > ( ) . clone ( ) ) ;
1121
1128
cast_byte_container :: < BinaryType , LargeBinaryType > ( & binary)
1122
1129
}
1130
+ Utf8View => cast_byte_to_view :: < Utf8Type , StringViewType > ( array) ,
1123
1131
LargeUtf8 => cast_byte_container :: < Utf8Type , LargeUtf8Type > ( array) ,
1124
1132
Time32 ( TimeUnit :: Second ) => parse_string :: < Time32SecondType , i32 > ( array, cast_options) ,
1125
1133
Time32 ( TimeUnit :: Millisecond ) => {
@@ -1179,6 +1187,7 @@ pub fn cast_with_options(
1179
1187
LargeBinary => Ok ( Arc :: new ( LargeBinaryArray :: from (
1180
1188
array. as_string :: < i64 > ( ) . clone ( ) ,
1181
1189
) ) ) ,
1190
+ Utf8View => cast_byte_to_view :: < LargeUtf8Type , StringViewType > ( array) ,
1182
1191
Time32 ( TimeUnit :: Second ) => parse_string :: < Time32SecondType , i64 > ( array, cast_options) ,
1183
1192
Time32 ( TimeUnit :: Millisecond ) => {
1184
1193
parse_string :: < Time32MillisecondType , i64 > ( array, cast_options)
@@ -1226,6 +1235,7 @@ pub fn cast_with_options(
1226
1235
FixedSizeBinary ( size) => {
1227
1236
cast_binary_to_fixed_size_binary :: < i32 > ( array, * size, cast_options)
1228
1237
}
1238
+ BinaryView => cast_byte_to_view :: < BinaryType , BinaryViewType > ( array) ,
1229
1239
_ => Err ( ArrowError :: CastError ( format ! (
1230
1240
"Casting from {from_type:?} to {to_type:?} not supported" ,
1231
1241
) ) ) ,
@@ -1240,6 +1250,7 @@ pub fn cast_with_options(
1240
1250
FixedSizeBinary ( size) => {
1241
1251
cast_binary_to_fixed_size_binary :: < i64 > ( array, * size, cast_options)
1242
1252
}
1253
+ BinaryView => cast_byte_to_view :: < LargeBinaryType , BinaryViewType > ( array) ,
1243
1254
_ => Err ( ArrowError :: CastError ( format ! (
1244
1255
"Casting from {from_type:?} to {to_type:?} not supported" ,
1245
1256
) ) ) ,
@@ -2238,6 +2249,56 @@ where
2238
2249
Ok ( Arc :: new ( GenericByteArray :: < TO > :: from ( array_data) ) )
2239
2250
}
2240
2251
2252
+ /// Helper function to cast from one `ByteArrayType` array to `ByteViewType` array.
2253
+ fn cast_byte_to_view < FROM , V > ( array : & dyn Array ) -> Result < ArrayRef , ArrowError >
2254
+ where
2255
+ FROM : ByteArrayType ,
2256
+ FROM :: Offset : OffsetSizeTrait + ToPrimitive ,
2257
+ V : ByteViewType ,
2258
+ {
2259
+ let data = array. to_data ( ) ;
2260
+ assert_eq ! ( data. data_type( ) , & FROM :: DATA_TYPE ) ;
2261
+
2262
+ let len = array. len ( ) ;
2263
+ let str_values_buf = data. buffers ( ) [ 1 ] . clone ( ) ;
2264
+ let offsets = data. buffers ( ) [ 0 ] . typed_data :: < FROM :: Offset > ( ) ;
2265
+
2266
+ let mut views_builder = BufferBuilder :: < u128 > :: new ( len) ;
2267
+ for w in offsets. windows ( 2 ) {
2268
+ let offset = w[ 0 ] . to_u32 ( ) . unwrap ( ) ;
2269
+ let end = w[ 1 ] . to_u32 ( ) . unwrap ( ) ;
2270
+ let value_buf = & str_values_buf[ offset as usize ..end as usize ] ;
2271
+ let length = end - offset;
2272
+
2273
+ if length <= 12 {
2274
+ let mut view_buffer = [ 0 ; 16 ] ;
2275
+ view_buffer[ 0 ..4 ] . copy_from_slice ( & length. to_le_bytes ( ) ) ;
2276
+ view_buffer[ 4 ..4 + value_buf. len ( ) ] . copy_from_slice ( value_buf) ;
2277
+ views_builder. append ( u128:: from_le_bytes ( view_buffer) ) ;
2278
+ } else {
2279
+ let view = ByteView {
2280
+ length,
2281
+ prefix : u32:: from_le_bytes ( value_buf[ 0 ..4 ] . try_into ( ) . unwrap ( ) ) ,
2282
+ buffer_index : 0 ,
2283
+ offset,
2284
+ } ;
2285
+ views_builder. append ( view. into ( ) ) ;
2286
+ }
2287
+ }
2288
+
2289
+ assert_eq ! ( views_builder. len( ) , len) ;
2290
+
2291
+ // Safety: the input was a valid array so it valid UTF8 (if string). And
2292
+ // all offsets were valid and we created the views correctly
2293
+ Ok ( Arc :: new ( unsafe {
2294
+ GenericByteViewArray :: < V > :: new_unchecked (
2295
+ ScalarBuffer :: new ( views_builder. finish ( ) , 0 , len) ,
2296
+ vec ! [ str_values_buf] ,
2297
+ data. nulls ( ) . cloned ( ) ,
2298
+ )
2299
+ } ) )
2300
+ }
2301
+
2241
2302
#[ cfg( test) ]
2242
2303
mod tests {
2243
2304
use arrow_buffer:: { Buffer , NullBuffer } ;
@@ -5044,6 +5105,70 @@ mod tests {
5044
5105
}
5045
5106
}
5046
5107
5108
+ #[ test]
5109
+ fn test_string_to_view ( ) {
5110
+ _test_string_to_view :: < i32 > ( ) ;
5111
+ _test_string_to_view :: < i64 > ( ) ;
5112
+ }
5113
+
5114
+ fn _test_string_to_view < O > ( )
5115
+ where
5116
+ O : OffsetSizeTrait ,
5117
+ {
5118
+ let data = vec ! [
5119
+ Some ( "hello" ) ,
5120
+ Some ( "world" ) ,
5121
+ None ,
5122
+ Some ( "large payload over 12 bytes" ) ,
5123
+ Some ( "lulu" ) ,
5124
+ ] ;
5125
+
5126
+ let string_array = GenericStringArray :: < O > :: from ( data. clone ( ) ) ;
5127
+
5128
+ assert ! ( can_cast_types(
5129
+ string_array. data_type( ) ,
5130
+ & DataType :: Utf8View
5131
+ ) ) ;
5132
+
5133
+ let string_view_array = cast ( & string_array, & DataType :: Utf8View ) . unwrap ( ) ;
5134
+ assert_eq ! ( string_view_array. data_type( ) , & DataType :: Utf8View ) ;
5135
+
5136
+ let expect_string_view_array = StringViewArray :: from ( data) ;
5137
+ assert_eq ! ( string_view_array. as_ref( ) , & expect_string_view_array) ;
5138
+ }
5139
+
5140
+ #[ test]
5141
+ fn test_bianry_to_view ( ) {
5142
+ _test_binary_to_view :: < i32 > ( ) ;
5143
+ _test_binary_to_view :: < i64 > ( ) ;
5144
+ }
5145
+
5146
+ fn _test_binary_to_view < O > ( )
5147
+ where
5148
+ O : OffsetSizeTrait ,
5149
+ {
5150
+ let data: Vec < Option < & [ u8 ] > > = vec ! [
5151
+ Some ( b"hello" ) ,
5152
+ Some ( b"world" ) ,
5153
+ None ,
5154
+ Some ( b"large payload over 12 bytes" ) ,
5155
+ Some ( b"lulu" ) ,
5156
+ ] ;
5157
+
5158
+ let binary_array = GenericBinaryArray :: < O > :: from ( data. clone ( ) ) ;
5159
+
5160
+ assert ! ( can_cast_types(
5161
+ binary_array. data_type( ) ,
5162
+ & DataType :: BinaryView
5163
+ ) ) ;
5164
+
5165
+ let binary_view_array = cast ( & binary_array, & DataType :: BinaryView ) . unwrap ( ) ;
5166
+ assert_eq ! ( binary_view_array. data_type( ) , & DataType :: BinaryView ) ;
5167
+
5168
+ let expect_binary_view_array = BinaryViewArray :: from ( data) ;
5169
+ assert_eq ! ( binary_view_array. as_ref( ) , & expect_binary_view_array) ;
5170
+ }
5171
+
5047
5172
#[ test]
5048
5173
fn test_cast_from_f64 ( ) {
5049
5174
let f64_values: Vec < f64 > = vec ! [
0 commit comments