Skip to content

feat: Add Date32/Date64 in aggregate fuzz testing #13041

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
Oct 26, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions datafusion/core/tests/fuzz_cases/aggregate_fuzz.rs
Original file line number Diff line number Diff line change
Expand Up @@ -164,6 +164,8 @@ fn baseline_config() -> DatasetGeneratorConfig {
ColumnDescr::new("u16", DataType::UInt16),
ColumnDescr::new("u32", DataType::UInt32),
ColumnDescr::new("u64", DataType::UInt64),
ColumnDescr::new("date32", DataType::Date32),
ColumnDescr::new("date64", DataType::Date64),
// TODO: date/time columns
// todo decimal columns
// begin string columns
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,10 @@

use std::sync::Arc;

use arrow::datatypes::{
Date32Type, Date64Type, Float32Type, Float64Type, Int16Type, Int32Type, Int64Type,
Int8Type, UInt16Type, UInt32Type, UInt64Type, UInt8Type,
};
use arrow_array::{ArrayRef, RecordBatch};
use arrow_schema::{DataType, Field, Schema};
use datafusion_common::{arrow_datafusion_err, DataFusionError, Result};
Expand Down Expand Up @@ -222,7 +226,7 @@ macro_rules! generate_string_array {
}

macro_rules! generate_primitive_array {
($SELF:ident, $NUM_ROWS:ident, $BATCH_GEN_RNG:ident, $ARRAY_GEN_RNG:ident, $DATA_TYPE:ident) => {
($SELF:ident, $NUM_ROWS:ident, $BATCH_GEN_RNG:ident, $ARRAY_GEN_RNG:ident, $ARROW_TYPE:ident) => {
paste::paste! {{
let null_pct_idx = $BATCH_GEN_RNG.gen_range(0..$SELF.candidate_null_pcts.len());
let null_pct = $SELF.candidate_null_pcts[null_pct_idx];
Expand All @@ -239,7 +243,7 @@ macro_rules! generate_primitive_array {
rng: $ARRAY_GEN_RNG,
};

generator.[< gen_data_ $DATA_TYPE >]()
generator.gen_data::<$ARROW_TYPE>()
}}}
}

Expand Down Expand Up @@ -297,7 +301,7 @@ impl RecordBatchGenerator {
num_rows,
batch_gen_rng,
array_gen_rng,
i8
Int8Type
)
}
DataType::Int16 => {
Expand All @@ -306,7 +310,7 @@ impl RecordBatchGenerator {
num_rows,
batch_gen_rng,
array_gen_rng,
i16
Int16Type
)
}
DataType::Int32 => {
Expand All @@ -315,7 +319,7 @@ impl RecordBatchGenerator {
num_rows,
batch_gen_rng,
array_gen_rng,
i32
Int32Type
)
}
DataType::Int64 => {
Expand All @@ -324,7 +328,7 @@ impl RecordBatchGenerator {
num_rows,
batch_gen_rng,
array_gen_rng,
i64
Int64Type
)
}
DataType::UInt8 => {
Expand All @@ -333,7 +337,7 @@ impl RecordBatchGenerator {
num_rows,
batch_gen_rng,
array_gen_rng,
u8
UInt8Type
)
}
DataType::UInt16 => {
Expand All @@ -342,7 +346,7 @@ impl RecordBatchGenerator {
num_rows,
batch_gen_rng,
array_gen_rng,
u16
UInt16Type
)
}
DataType::UInt32 => {
Expand All @@ -351,7 +355,7 @@ impl RecordBatchGenerator {
num_rows,
batch_gen_rng,
array_gen_rng,
u32
UInt32Type
)
}
DataType::UInt64 => {
Expand All @@ -360,7 +364,7 @@ impl RecordBatchGenerator {
num_rows,
batch_gen_rng,
array_gen_rng,
u64
UInt64Type
)
}
DataType::Float32 => {
Expand All @@ -369,7 +373,7 @@ impl RecordBatchGenerator {
num_rows,
batch_gen_rng,
array_gen_rng,
f32
Float32Type
)
}
DataType::Float64 => {
Expand All @@ -378,7 +382,25 @@ impl RecordBatchGenerator {
num_rows,
batch_gen_rng,
array_gen_rng,
f64
Float64Type
)
}
DataType::Date32 => {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

❤️

generate_primitive_array!(
self,
num_rows,
batch_gen_rng,
array_gen_rng,
Date32Type
)
}
DataType::Date64 => {
generate_primitive_array!(
self,
num_rows,
batch_gen_rng,
array_gen_rng,
Date64Type
)
}
DataType::Utf8 => {
Expand Down
134 changes: 90 additions & 44 deletions test-utils/src/array_gen/primitive.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,14 +15,45 @@
// specific language governing permissions and limitations
// under the License.

use arrow::array::{ArrayRef, PrimitiveArray, UInt32Array};
use arrow::datatypes::{
Float32Type, Float64Type, Int16Type, Int32Type, Int64Type, Int8Type, UInt16Type,
UInt32Type, UInt64Type, UInt8Type,
};
use arrow::array::{ArrayRef, ArrowPrimitiveType, PrimitiveArray, UInt32Array};
use arrow::datatypes::DataType;
use rand::distributions::Standard;
use rand::prelude::Distribution;
use rand::rngs::StdRng;
use rand::Rng;

/// Trait for converting type safely from a native type T impl this trait.
pub trait FromNative: std::fmt::Debug + Send + Sync + Copy + Default {
/// Convert native type from i64.
fn from_i64(_: i64) -> Option<Self> {
None
}
}

macro_rules! native_type {
($t: ty $(, $from:ident)*) => {
impl FromNative for $t {
$(
#[inline]
fn $from(v: $t) -> Option<Self> {
Some(v)
}
)*
}
};
}

native_type!(i8);
native_type!(i16);
native_type!(i32);
native_type!(i64, from_i64);
native_type!(u8);
native_type!(u16);
native_type!(u32);
native_type!(u64);
native_type!(f32);
native_type!(f64);

/// Randomly generate primitive array
pub struct PrimitiveArrayGenerator {
/// the total number of strings in the output
Expand All @@ -35,46 +66,61 @@ pub struct PrimitiveArrayGenerator {
pub rng: StdRng,
}

macro_rules! impl_gen_data {
($NATIVE_TYPE:ty, $ARROW_TYPE:ident) => {
paste::paste! {
pub fn [< gen_data_ $NATIVE_TYPE >](&mut self) -> ArrayRef {
// table of strings from which to draw
let distinct_primitives: PrimitiveArray<$ARROW_TYPE> = (0..self.num_distinct_primitives)
.map(|_| Some(self.rng.gen::<$NATIVE_TYPE>()))
.collect();
// TODO: support generating more primitive arrays
impl PrimitiveArrayGenerator {
pub fn gen_data<A>(&mut self) -> ArrayRef
where
A: ArrowPrimitiveType,
A::Native: FromNative,
Standard: Distribution<<A as ArrowPrimitiveType>::Native>,
{
// table of primitives from which to draw
let distinct_primitives: PrimitiveArray<A> = (0..self.num_distinct_primitives)
.map(|_| {
Some(match A::DATA_TYPE {
DataType::Int8
| DataType::Int16
| DataType::Int32
| DataType::Int64
| DataType::UInt8
| DataType::UInt16
| DataType::UInt32
| DataType::UInt64
| DataType::Float32
| DataType::Float64
| DataType::Date32 => self.rng.gen::<A::Native>(),

// pick num_strings randomly from the distinct string table
let indicies: UInt32Array = (0..self.num_primitives)
.map(|_| {
if self.rng.gen::<f64>() < self.null_pct {
None
} else if self.num_distinct_primitives > 1 {
let range = 1..(self.num_distinct_primitives as u32);
Some(self.rng.gen_range(range))
} else {
Some(0)
}
})
.collect();
DataType::Date64 => {
// TODO: constrain this range to valid dates if necessary
let date_value = self.rng.gen_range(i64::MIN..=i64::MAX);
let millis_per_day = 86_400_000;
let adjusted_value = date_value - (date_value % millis_per_day);
A::Native::from_i64(adjusted_value).unwrap()
}

let options = None;
arrow::compute::take(&distinct_primitives, &indicies, options).unwrap()
}
}
};
}
_ => {
let arrow_type = A::DATA_TYPE;
panic!("Unsupported arrow data type: {arrow_type}")
}
})
})
.collect();

// TODO: support generating more primitive arrays
impl PrimitiveArrayGenerator {
impl_gen_data!(i8, Int8Type);
impl_gen_data!(i16, Int16Type);
impl_gen_data!(i32, Int32Type);
impl_gen_data!(i64, Int64Type);
impl_gen_data!(u8, UInt8Type);
impl_gen_data!(u16, UInt16Type);
impl_gen_data!(u32, UInt32Type);
impl_gen_data!(u64, UInt64Type);
impl_gen_data!(f32, Float32Type);
impl_gen_data!(f64, Float64Type);
// pick num_primitves randomly from the distinct string table
let indicies: UInt32Array = (0..self.num_primitives)
.map(|_| {
if self.rng.gen::<f64>() < self.null_pct {
None
} else if self.num_distinct_primitives > 1 {
let range = 1..(self.num_distinct_primitives as u32);
Some(self.rng.gen_range(range))
} else {
Some(0)
}
})
.collect();

let options = None;
arrow::compute::take(&distinct_primitives, &indicies, options).unwrap()
}
}