@@ -273,7 +273,9 @@ mod union_builder;
273
273
274
274
pub use union_builder:: * ;
275
275
276
+ use crate :: types:: { Int16Type , Int32Type , Int64Type , Int8Type } ;
276
277
use crate :: ArrayRef ;
278
+ use arrow_schema:: { DataType , IntervalUnit , TimeUnit } ;
277
279
use std:: any:: Any ;
278
280
279
281
/// Trait for dealing with different array builders at runtime
@@ -418,3 +420,186 @@ pub type StringBuilder = GenericStringBuilder<i32>;
418
420
///
419
421
/// See examples on [`GenericStringBuilder`]
420
422
pub type LargeStringBuilder = GenericStringBuilder < i64 > ;
423
+
424
+ /// Returns a builder with capacity for `capacity` elements of datatype
425
+ /// `DataType`.
426
+ ///
427
+ /// This function is useful to construct arrays from an arbitrary vectors with
428
+ /// known/expected schema.
429
+ ///
430
+ /// See comments on [StructBuilder] for retrieving collection builders built by
431
+ /// make_builder.
432
+ pub fn make_builder ( datatype : & DataType , capacity : usize ) -> Box < dyn ArrayBuilder > {
433
+ use crate :: builder:: * ;
434
+ match datatype {
435
+ DataType :: Null => Box :: new ( NullBuilder :: new ( ) ) ,
436
+ DataType :: Boolean => Box :: new ( BooleanBuilder :: with_capacity ( capacity) ) ,
437
+ DataType :: Int8 => Box :: new ( Int8Builder :: with_capacity ( capacity) ) ,
438
+ DataType :: Int16 => Box :: new ( Int16Builder :: with_capacity ( capacity) ) ,
439
+ DataType :: Int32 => Box :: new ( Int32Builder :: with_capacity ( capacity) ) ,
440
+ DataType :: Int64 => Box :: new ( Int64Builder :: with_capacity ( capacity) ) ,
441
+ DataType :: UInt8 => Box :: new ( UInt8Builder :: with_capacity ( capacity) ) ,
442
+ DataType :: UInt16 => Box :: new ( UInt16Builder :: with_capacity ( capacity) ) ,
443
+ DataType :: UInt32 => Box :: new ( UInt32Builder :: with_capacity ( capacity) ) ,
444
+ DataType :: UInt64 => Box :: new ( UInt64Builder :: with_capacity ( capacity) ) ,
445
+ DataType :: Float16 => Box :: new ( Float16Builder :: with_capacity ( capacity) ) ,
446
+ DataType :: Float32 => Box :: new ( Float32Builder :: with_capacity ( capacity) ) ,
447
+ DataType :: Float64 => Box :: new ( Float64Builder :: with_capacity ( capacity) ) ,
448
+ DataType :: Binary => Box :: new ( BinaryBuilder :: with_capacity ( capacity, 1024 ) ) ,
449
+ DataType :: LargeBinary => Box :: new ( LargeBinaryBuilder :: with_capacity ( capacity, 1024 ) ) ,
450
+ DataType :: FixedSizeBinary ( len) => {
451
+ Box :: new ( FixedSizeBinaryBuilder :: with_capacity ( capacity, * len) )
452
+ }
453
+ DataType :: Decimal128 ( p, s) => Box :: new (
454
+ Decimal128Builder :: with_capacity ( capacity) . with_data_type ( DataType :: Decimal128 ( * p, * s) ) ,
455
+ ) ,
456
+ DataType :: Decimal256 ( p, s) => Box :: new (
457
+ Decimal256Builder :: with_capacity ( capacity) . with_data_type ( DataType :: Decimal256 ( * p, * s) ) ,
458
+ ) ,
459
+ DataType :: Utf8 => Box :: new ( StringBuilder :: with_capacity ( capacity, 1024 ) ) ,
460
+ DataType :: LargeUtf8 => Box :: new ( LargeStringBuilder :: with_capacity ( capacity, 1024 ) ) ,
461
+ DataType :: Date32 => Box :: new ( Date32Builder :: with_capacity ( capacity) ) ,
462
+ DataType :: Date64 => Box :: new ( Date64Builder :: with_capacity ( capacity) ) ,
463
+ DataType :: Time32 ( TimeUnit :: Second ) => {
464
+ Box :: new ( Time32SecondBuilder :: with_capacity ( capacity) )
465
+ }
466
+ DataType :: Time32 ( TimeUnit :: Millisecond ) => {
467
+ Box :: new ( Time32MillisecondBuilder :: with_capacity ( capacity) )
468
+ }
469
+ DataType :: Time64 ( TimeUnit :: Microsecond ) => {
470
+ Box :: new ( Time64MicrosecondBuilder :: with_capacity ( capacity) )
471
+ }
472
+ DataType :: Time64 ( TimeUnit :: Nanosecond ) => {
473
+ Box :: new ( Time64NanosecondBuilder :: with_capacity ( capacity) )
474
+ }
475
+ DataType :: Timestamp ( TimeUnit :: Second , tz) => Box :: new (
476
+ TimestampSecondBuilder :: with_capacity ( capacity)
477
+ . with_data_type ( DataType :: Timestamp ( TimeUnit :: Second , tz. clone ( ) ) ) ,
478
+ ) ,
479
+ DataType :: Timestamp ( TimeUnit :: Millisecond , tz) => Box :: new (
480
+ TimestampMillisecondBuilder :: with_capacity ( capacity)
481
+ . with_data_type ( DataType :: Timestamp ( TimeUnit :: Millisecond , tz. clone ( ) ) ) ,
482
+ ) ,
483
+ DataType :: Timestamp ( TimeUnit :: Microsecond , tz) => Box :: new (
484
+ TimestampMicrosecondBuilder :: with_capacity ( capacity)
485
+ . with_data_type ( DataType :: Timestamp ( TimeUnit :: Microsecond , tz. clone ( ) ) ) ,
486
+ ) ,
487
+ DataType :: Timestamp ( TimeUnit :: Nanosecond , tz) => Box :: new (
488
+ TimestampNanosecondBuilder :: with_capacity ( capacity)
489
+ . with_data_type ( DataType :: Timestamp ( TimeUnit :: Nanosecond , tz. clone ( ) ) ) ,
490
+ ) ,
491
+ DataType :: Interval ( IntervalUnit :: YearMonth ) => {
492
+ Box :: new ( IntervalYearMonthBuilder :: with_capacity ( capacity) )
493
+ }
494
+ DataType :: Interval ( IntervalUnit :: DayTime ) => {
495
+ Box :: new ( IntervalDayTimeBuilder :: with_capacity ( capacity) )
496
+ }
497
+ DataType :: Interval ( IntervalUnit :: MonthDayNano ) => {
498
+ Box :: new ( IntervalMonthDayNanoBuilder :: with_capacity ( capacity) )
499
+ }
500
+ DataType :: Duration ( TimeUnit :: Second ) => {
501
+ Box :: new ( DurationSecondBuilder :: with_capacity ( capacity) )
502
+ }
503
+ DataType :: Duration ( TimeUnit :: Millisecond ) => {
504
+ Box :: new ( DurationMillisecondBuilder :: with_capacity ( capacity) )
505
+ }
506
+ DataType :: Duration ( TimeUnit :: Microsecond ) => {
507
+ Box :: new ( DurationMicrosecondBuilder :: with_capacity ( capacity) )
508
+ }
509
+ DataType :: Duration ( TimeUnit :: Nanosecond ) => {
510
+ Box :: new ( DurationNanosecondBuilder :: with_capacity ( capacity) )
511
+ }
512
+ DataType :: List ( field) => {
513
+ let builder = make_builder ( field. data_type ( ) , capacity) ;
514
+ Box :: new ( ListBuilder :: with_capacity ( builder, capacity) . with_field ( field. clone ( ) ) )
515
+ }
516
+ DataType :: LargeList ( field) => {
517
+ let builder = make_builder ( field. data_type ( ) , capacity) ;
518
+ Box :: new ( LargeListBuilder :: with_capacity ( builder, capacity) . with_field ( field. clone ( ) ) )
519
+ }
520
+ DataType :: FixedSizeList ( field, size) => {
521
+ let size = * size;
522
+ let values_builder_capacity = {
523
+ let size: usize = size. try_into ( ) . unwrap ( ) ;
524
+ capacity * size
525
+ } ;
526
+ let builder = make_builder ( field. data_type ( ) , values_builder_capacity) ;
527
+ Box :: new (
528
+ FixedSizeListBuilder :: with_capacity ( builder, size, capacity)
529
+ . with_field ( field. clone ( ) ) ,
530
+ )
531
+ }
532
+ DataType :: ListView ( field) => {
533
+ let builder = make_builder ( field. data_type ( ) , capacity) ;
534
+ Box :: new ( ListViewBuilder :: with_capacity ( builder, capacity) . with_field ( field. clone ( ) ) )
535
+ }
536
+ DataType :: LargeListView ( field) => {
537
+ let builder = make_builder ( field. data_type ( ) , capacity) ;
538
+ Box :: new (
539
+ LargeListViewBuilder :: with_capacity ( builder, capacity) . with_field ( field. clone ( ) ) ,
540
+ )
541
+ }
542
+ DataType :: Map ( field, _) => match field. data_type ( ) {
543
+ DataType :: Struct ( fields) => {
544
+ let map_field_names = MapFieldNames {
545
+ key : fields[ 0 ] . name ( ) . clone ( ) ,
546
+ value : fields[ 1 ] . name ( ) . clone ( ) ,
547
+ entry : field. name ( ) . clone ( ) ,
548
+ } ;
549
+ let key_builder = make_builder ( fields[ 0 ] . data_type ( ) , capacity) ;
550
+ let value_builder = make_builder ( fields[ 1 ] . data_type ( ) , capacity) ;
551
+ Box :: new (
552
+ MapBuilder :: with_capacity (
553
+ Some ( map_field_names) ,
554
+ key_builder,
555
+ value_builder,
556
+ capacity,
557
+ )
558
+ . with_keys_field ( fields[ 0 ] . clone ( ) )
559
+ . with_values_field ( fields[ 1 ] . clone ( ) ) ,
560
+ )
561
+ }
562
+ t => panic ! ( "The field of Map data type {t:?} should have a child Struct field" ) ,
563
+ } ,
564
+ DataType :: Struct ( fields) => Box :: new ( StructBuilder :: from_fields ( fields. clone ( ) , capacity) ) ,
565
+ t @ DataType :: Dictionary ( key_type, value_type) => {
566
+ macro_rules! dict_builder {
567
+ ( $key_type: ty) => {
568
+ match & * * value_type {
569
+ DataType :: Utf8 => {
570
+ let dict_builder: StringDictionaryBuilder <$key_type> =
571
+ StringDictionaryBuilder :: with_capacity( capacity, 256 , 1024 ) ;
572
+ Box :: new( dict_builder)
573
+ }
574
+ DataType :: LargeUtf8 => {
575
+ let dict_builder: LargeStringDictionaryBuilder <$key_type> =
576
+ LargeStringDictionaryBuilder :: with_capacity( capacity, 256 , 1024 ) ;
577
+ Box :: new( dict_builder)
578
+ }
579
+ DataType :: Binary => {
580
+ let dict_builder: BinaryDictionaryBuilder <$key_type> =
581
+ BinaryDictionaryBuilder :: with_capacity( capacity, 256 , 1024 ) ;
582
+ Box :: new( dict_builder)
583
+ }
584
+ DataType :: LargeBinary => {
585
+ let dict_builder: LargeBinaryDictionaryBuilder <$key_type> =
586
+ LargeBinaryDictionaryBuilder :: with_capacity( capacity, 256 , 1024 ) ;
587
+ Box :: new( dict_builder)
588
+ }
589
+ t => panic!( "Dictionary value type {t:?} is not currently supported" ) ,
590
+ }
591
+ } ;
592
+ }
593
+ match & * * key_type {
594
+ DataType :: Int8 => dict_builder ! ( Int8Type ) ,
595
+ DataType :: Int16 => dict_builder ! ( Int16Type ) ,
596
+ DataType :: Int32 => dict_builder ! ( Int32Type ) ,
597
+ DataType :: Int64 => dict_builder ! ( Int64Type ) ,
598
+ _ => {
599
+ panic ! ( "Data type {t:?} with key type {key_type:?} is not currently supported" )
600
+ }
601
+ }
602
+ }
603
+ t => panic ! ( "Data type {t:?} is not currently supported" ) ,
604
+ }
605
+ }
0 commit comments