Skip to content

Commit 936dc59

Browse files
authored
Move make_builder into mod.rs (#7218)
1 parent eca46e7 commit 936dc59

File tree

2 files changed

+187
-188
lines changed

2 files changed

+187
-188
lines changed

arrow-array/src/builder/mod.rs

Lines changed: 185 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -273,7 +273,9 @@ mod union_builder;
273273

274274
pub use union_builder::*;
275275

276+
use crate::types::{Int16Type, Int32Type, Int64Type, Int8Type};
276277
use crate::ArrayRef;
278+
use arrow_schema::{DataType, IntervalUnit, TimeUnit};
277279
use std::any::Any;
278280

279281
/// Trait for dealing with different array builders at runtime
@@ -418,3 +420,186 @@ pub type StringBuilder = GenericStringBuilder<i32>;
418420
///
419421
/// See examples on [`GenericStringBuilder`]
420422
pub type LargeStringBuilder = GenericStringBuilder<i64>;
423+
424+
/// Returns a builder with capacity for `capacity` elements of datatype
425+
/// `DataType`.
426+
///
427+
/// This function is useful to construct arrays from an arbitrary vectors with
428+
/// known/expected schema.
429+
///
430+
/// See comments on [StructBuilder] for retrieving collection builders built by
431+
/// make_builder.
432+
pub fn make_builder(datatype: &DataType, capacity: usize) -> Box<dyn ArrayBuilder> {
433+
use crate::builder::*;
434+
match datatype {
435+
DataType::Null => Box::new(NullBuilder::new()),
436+
DataType::Boolean => Box::new(BooleanBuilder::with_capacity(capacity)),
437+
DataType::Int8 => Box::new(Int8Builder::with_capacity(capacity)),
438+
DataType::Int16 => Box::new(Int16Builder::with_capacity(capacity)),
439+
DataType::Int32 => Box::new(Int32Builder::with_capacity(capacity)),
440+
DataType::Int64 => Box::new(Int64Builder::with_capacity(capacity)),
441+
DataType::UInt8 => Box::new(UInt8Builder::with_capacity(capacity)),
442+
DataType::UInt16 => Box::new(UInt16Builder::with_capacity(capacity)),
443+
DataType::UInt32 => Box::new(UInt32Builder::with_capacity(capacity)),
444+
DataType::UInt64 => Box::new(UInt64Builder::with_capacity(capacity)),
445+
DataType::Float16 => Box::new(Float16Builder::with_capacity(capacity)),
446+
DataType::Float32 => Box::new(Float32Builder::with_capacity(capacity)),
447+
DataType::Float64 => Box::new(Float64Builder::with_capacity(capacity)),
448+
DataType::Binary => Box::new(BinaryBuilder::with_capacity(capacity, 1024)),
449+
DataType::LargeBinary => Box::new(LargeBinaryBuilder::with_capacity(capacity, 1024)),
450+
DataType::FixedSizeBinary(len) => {
451+
Box::new(FixedSizeBinaryBuilder::with_capacity(capacity, *len))
452+
}
453+
DataType::Decimal128(p, s) => Box::new(
454+
Decimal128Builder::with_capacity(capacity).with_data_type(DataType::Decimal128(*p, *s)),
455+
),
456+
DataType::Decimal256(p, s) => Box::new(
457+
Decimal256Builder::with_capacity(capacity).with_data_type(DataType::Decimal256(*p, *s)),
458+
),
459+
DataType::Utf8 => Box::new(StringBuilder::with_capacity(capacity, 1024)),
460+
DataType::LargeUtf8 => Box::new(LargeStringBuilder::with_capacity(capacity, 1024)),
461+
DataType::Date32 => Box::new(Date32Builder::with_capacity(capacity)),
462+
DataType::Date64 => Box::new(Date64Builder::with_capacity(capacity)),
463+
DataType::Time32(TimeUnit::Second) => {
464+
Box::new(Time32SecondBuilder::with_capacity(capacity))
465+
}
466+
DataType::Time32(TimeUnit::Millisecond) => {
467+
Box::new(Time32MillisecondBuilder::with_capacity(capacity))
468+
}
469+
DataType::Time64(TimeUnit::Microsecond) => {
470+
Box::new(Time64MicrosecondBuilder::with_capacity(capacity))
471+
}
472+
DataType::Time64(TimeUnit::Nanosecond) => {
473+
Box::new(Time64NanosecondBuilder::with_capacity(capacity))
474+
}
475+
DataType::Timestamp(TimeUnit::Second, tz) => Box::new(
476+
TimestampSecondBuilder::with_capacity(capacity)
477+
.with_data_type(DataType::Timestamp(TimeUnit::Second, tz.clone())),
478+
),
479+
DataType::Timestamp(TimeUnit::Millisecond, tz) => Box::new(
480+
TimestampMillisecondBuilder::with_capacity(capacity)
481+
.with_data_type(DataType::Timestamp(TimeUnit::Millisecond, tz.clone())),
482+
),
483+
DataType::Timestamp(TimeUnit::Microsecond, tz) => Box::new(
484+
TimestampMicrosecondBuilder::with_capacity(capacity)
485+
.with_data_type(DataType::Timestamp(TimeUnit::Microsecond, tz.clone())),
486+
),
487+
DataType::Timestamp(TimeUnit::Nanosecond, tz) => Box::new(
488+
TimestampNanosecondBuilder::with_capacity(capacity)
489+
.with_data_type(DataType::Timestamp(TimeUnit::Nanosecond, tz.clone())),
490+
),
491+
DataType::Interval(IntervalUnit::YearMonth) => {
492+
Box::new(IntervalYearMonthBuilder::with_capacity(capacity))
493+
}
494+
DataType::Interval(IntervalUnit::DayTime) => {
495+
Box::new(IntervalDayTimeBuilder::with_capacity(capacity))
496+
}
497+
DataType::Interval(IntervalUnit::MonthDayNano) => {
498+
Box::new(IntervalMonthDayNanoBuilder::with_capacity(capacity))
499+
}
500+
DataType::Duration(TimeUnit::Second) => {
501+
Box::new(DurationSecondBuilder::with_capacity(capacity))
502+
}
503+
DataType::Duration(TimeUnit::Millisecond) => {
504+
Box::new(DurationMillisecondBuilder::with_capacity(capacity))
505+
}
506+
DataType::Duration(TimeUnit::Microsecond) => {
507+
Box::new(DurationMicrosecondBuilder::with_capacity(capacity))
508+
}
509+
DataType::Duration(TimeUnit::Nanosecond) => {
510+
Box::new(DurationNanosecondBuilder::with_capacity(capacity))
511+
}
512+
DataType::List(field) => {
513+
let builder = make_builder(field.data_type(), capacity);
514+
Box::new(ListBuilder::with_capacity(builder, capacity).with_field(field.clone()))
515+
}
516+
DataType::LargeList(field) => {
517+
let builder = make_builder(field.data_type(), capacity);
518+
Box::new(LargeListBuilder::with_capacity(builder, capacity).with_field(field.clone()))
519+
}
520+
DataType::FixedSizeList(field, size) => {
521+
let size = *size;
522+
let values_builder_capacity = {
523+
let size: usize = size.try_into().unwrap();
524+
capacity * size
525+
};
526+
let builder = make_builder(field.data_type(), values_builder_capacity);
527+
Box::new(
528+
FixedSizeListBuilder::with_capacity(builder, size, capacity)
529+
.with_field(field.clone()),
530+
)
531+
}
532+
DataType::ListView(field) => {
533+
let builder = make_builder(field.data_type(), capacity);
534+
Box::new(ListViewBuilder::with_capacity(builder, capacity).with_field(field.clone()))
535+
}
536+
DataType::LargeListView(field) => {
537+
let builder = make_builder(field.data_type(), capacity);
538+
Box::new(
539+
LargeListViewBuilder::with_capacity(builder, capacity).with_field(field.clone()),
540+
)
541+
}
542+
DataType::Map(field, _) => match field.data_type() {
543+
DataType::Struct(fields) => {
544+
let map_field_names = MapFieldNames {
545+
key: fields[0].name().clone(),
546+
value: fields[1].name().clone(),
547+
entry: field.name().clone(),
548+
};
549+
let key_builder = make_builder(fields[0].data_type(), capacity);
550+
let value_builder = make_builder(fields[1].data_type(), capacity);
551+
Box::new(
552+
MapBuilder::with_capacity(
553+
Some(map_field_names),
554+
key_builder,
555+
value_builder,
556+
capacity,
557+
)
558+
.with_keys_field(fields[0].clone())
559+
.with_values_field(fields[1].clone()),
560+
)
561+
}
562+
t => panic!("The field of Map data type {t:?} should have a child Struct field"),
563+
},
564+
DataType::Struct(fields) => Box::new(StructBuilder::from_fields(fields.clone(), capacity)),
565+
t @ DataType::Dictionary(key_type, value_type) => {
566+
macro_rules! dict_builder {
567+
($key_type:ty) => {
568+
match &**value_type {
569+
DataType::Utf8 => {
570+
let dict_builder: StringDictionaryBuilder<$key_type> =
571+
StringDictionaryBuilder::with_capacity(capacity, 256, 1024);
572+
Box::new(dict_builder)
573+
}
574+
DataType::LargeUtf8 => {
575+
let dict_builder: LargeStringDictionaryBuilder<$key_type> =
576+
LargeStringDictionaryBuilder::with_capacity(capacity, 256, 1024);
577+
Box::new(dict_builder)
578+
}
579+
DataType::Binary => {
580+
let dict_builder: BinaryDictionaryBuilder<$key_type> =
581+
BinaryDictionaryBuilder::with_capacity(capacity, 256, 1024);
582+
Box::new(dict_builder)
583+
}
584+
DataType::LargeBinary => {
585+
let dict_builder: LargeBinaryDictionaryBuilder<$key_type> =
586+
LargeBinaryDictionaryBuilder::with_capacity(capacity, 256, 1024);
587+
Box::new(dict_builder)
588+
}
589+
t => panic!("Dictionary value type {t:?} is not currently supported"),
590+
}
591+
};
592+
}
593+
match &**key_type {
594+
DataType::Int8 => dict_builder!(Int8Type),
595+
DataType::Int16 => dict_builder!(Int16Type),
596+
DataType::Int32 => dict_builder!(Int32Type),
597+
DataType::Int64 => dict_builder!(Int64Type),
598+
_ => {
599+
panic!("Data type {t:?} with key type {key_type:?} is not currently supported")
600+
}
601+
}
602+
}
603+
t => panic!("Data type {t:?} is not currently supported"),
604+
}
605+
}

0 commit comments

Comments
 (0)