Skip to content

Commit 73cfa6c

Browse files
authored
feat: Add Date32/Date64 in aggregate fuzz testing (#13041)
* refactor PrimitiveArrayGenerator. * support Date32/Date64 type in data generator. * fix format. * remove unnecessary type para in PrimitiveArrayGenerator. * introduce FromNative trait and replace the unsafe.
1 parent 7b2284c commit 73cfa6c

File tree

3 files changed

+126
-56
lines changed

3 files changed

+126
-56
lines changed

datafusion/core/tests/fuzz_cases/aggregate_fuzz.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -164,6 +164,8 @@ fn baseline_config() -> DatasetGeneratorConfig {
164164
ColumnDescr::new("u16", DataType::UInt16),
165165
ColumnDescr::new("u32", DataType::UInt32),
166166
ColumnDescr::new("u64", DataType::UInt64),
167+
ColumnDescr::new("date32", DataType::Date32),
168+
ColumnDescr::new("date64", DataType::Date64),
167169
// TODO: date/time columns
168170
// todo decimal columns
169171
// begin string columns

datafusion/core/tests/fuzz_cases/aggregation_fuzzer/data_generator.rs

Lines changed: 34 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,10 @@
1717

1818
use std::sync::Arc;
1919

20+
use arrow::datatypes::{
21+
Date32Type, Date64Type, Float32Type, Float64Type, Int16Type, Int32Type, Int64Type,
22+
Int8Type, UInt16Type, UInt32Type, UInt64Type, UInt8Type,
23+
};
2024
use arrow_array::{ArrayRef, RecordBatch};
2125
use arrow_schema::{DataType, Field, Schema};
2226
use datafusion_common::{arrow_datafusion_err, DataFusionError, Result};
@@ -222,7 +226,7 @@ macro_rules! generate_string_array {
222226
}
223227

224228
macro_rules! generate_primitive_array {
225-
($SELF:ident, $NUM_ROWS:ident, $BATCH_GEN_RNG:ident, $ARRAY_GEN_RNG:ident, $DATA_TYPE:ident) => {
229+
($SELF:ident, $NUM_ROWS:ident, $BATCH_GEN_RNG:ident, $ARRAY_GEN_RNG:ident, $ARROW_TYPE:ident) => {
226230
paste::paste! {{
227231
let null_pct_idx = $BATCH_GEN_RNG.gen_range(0..$SELF.candidate_null_pcts.len());
228232
let null_pct = $SELF.candidate_null_pcts[null_pct_idx];
@@ -239,7 +243,7 @@ macro_rules! generate_primitive_array {
239243
rng: $ARRAY_GEN_RNG,
240244
};
241245

242-
generator.[< gen_data_ $DATA_TYPE >]()
246+
generator.gen_data::<$ARROW_TYPE>()
243247
}}}
244248
}
245249

@@ -297,7 +301,7 @@ impl RecordBatchGenerator {
297301
num_rows,
298302
batch_gen_rng,
299303
array_gen_rng,
300-
i8
304+
Int8Type
301305
)
302306
}
303307
DataType::Int16 => {
@@ -306,7 +310,7 @@ impl RecordBatchGenerator {
306310
num_rows,
307311
batch_gen_rng,
308312
array_gen_rng,
309-
i16
313+
Int16Type
310314
)
311315
}
312316
DataType::Int32 => {
@@ -315,7 +319,7 @@ impl RecordBatchGenerator {
315319
num_rows,
316320
batch_gen_rng,
317321
array_gen_rng,
318-
i32
322+
Int32Type
319323
)
320324
}
321325
DataType::Int64 => {
@@ -324,7 +328,7 @@ impl RecordBatchGenerator {
324328
num_rows,
325329
batch_gen_rng,
326330
array_gen_rng,
327-
i64
331+
Int64Type
328332
)
329333
}
330334
DataType::UInt8 => {
@@ -333,7 +337,7 @@ impl RecordBatchGenerator {
333337
num_rows,
334338
batch_gen_rng,
335339
array_gen_rng,
336-
u8
340+
UInt8Type
337341
)
338342
}
339343
DataType::UInt16 => {
@@ -342,7 +346,7 @@ impl RecordBatchGenerator {
342346
num_rows,
343347
batch_gen_rng,
344348
array_gen_rng,
345-
u16
349+
UInt16Type
346350
)
347351
}
348352
DataType::UInt32 => {
@@ -351,7 +355,7 @@ impl RecordBatchGenerator {
351355
num_rows,
352356
batch_gen_rng,
353357
array_gen_rng,
354-
u32
358+
UInt32Type
355359
)
356360
}
357361
DataType::UInt64 => {
@@ -360,7 +364,7 @@ impl RecordBatchGenerator {
360364
num_rows,
361365
batch_gen_rng,
362366
array_gen_rng,
363-
u64
367+
UInt64Type
364368
)
365369
}
366370
DataType::Float32 => {
@@ -369,7 +373,7 @@ impl RecordBatchGenerator {
369373
num_rows,
370374
batch_gen_rng,
371375
array_gen_rng,
372-
f32
376+
Float32Type
373377
)
374378
}
375379
DataType::Float64 => {
@@ -378,7 +382,25 @@ impl RecordBatchGenerator {
378382
num_rows,
379383
batch_gen_rng,
380384
array_gen_rng,
381-
f64
385+
Float64Type
386+
)
387+
}
388+
DataType::Date32 => {
389+
generate_primitive_array!(
390+
self,
391+
num_rows,
392+
batch_gen_rng,
393+
array_gen_rng,
394+
Date32Type
395+
)
396+
}
397+
DataType::Date64 => {
398+
generate_primitive_array!(
399+
self,
400+
num_rows,
401+
batch_gen_rng,
402+
array_gen_rng,
403+
Date64Type
382404
)
383405
}
384406
DataType::Utf8 => {

test-utils/src/array_gen/primitive.rs

Lines changed: 90 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -15,14 +15,45 @@
1515
// specific language governing permissions and limitations
1616
// under the License.
1717

18-
use arrow::array::{ArrayRef, PrimitiveArray, UInt32Array};
19-
use arrow::datatypes::{
20-
Float32Type, Float64Type, Int16Type, Int32Type, Int64Type, Int8Type, UInt16Type,
21-
UInt32Type, UInt64Type, UInt8Type,
22-
};
18+
use arrow::array::{ArrayRef, ArrowPrimitiveType, PrimitiveArray, UInt32Array};
19+
use arrow::datatypes::DataType;
20+
use rand::distributions::Standard;
21+
use rand::prelude::Distribution;
2322
use rand::rngs::StdRng;
2423
use rand::Rng;
2524

25+
/// Trait for converting type safely from a native type T impl this trait.
26+
pub trait FromNative: std::fmt::Debug + Send + Sync + Copy + Default {
27+
/// Convert native type from i64.
28+
fn from_i64(_: i64) -> Option<Self> {
29+
None
30+
}
31+
}
32+
33+
macro_rules! native_type {
34+
($t: ty $(, $from:ident)*) => {
35+
impl FromNative for $t {
36+
$(
37+
#[inline]
38+
fn $from(v: $t) -> Option<Self> {
39+
Some(v)
40+
}
41+
)*
42+
}
43+
};
44+
}
45+
46+
native_type!(i8);
47+
native_type!(i16);
48+
native_type!(i32);
49+
native_type!(i64, from_i64);
50+
native_type!(u8);
51+
native_type!(u16);
52+
native_type!(u32);
53+
native_type!(u64);
54+
native_type!(f32);
55+
native_type!(f64);
56+
2657
/// Randomly generate primitive array
2758
pub struct PrimitiveArrayGenerator {
2859
/// the total number of strings in the output
@@ -35,46 +66,61 @@ pub struct PrimitiveArrayGenerator {
3566
pub rng: StdRng,
3667
}
3768

38-
macro_rules! impl_gen_data {
39-
($NATIVE_TYPE:ty, $ARROW_TYPE:ident) => {
40-
paste::paste! {
41-
pub fn [< gen_data_ $NATIVE_TYPE >](&mut self) -> ArrayRef {
42-
// table of strings from which to draw
43-
let distinct_primitives: PrimitiveArray<$ARROW_TYPE> = (0..self.num_distinct_primitives)
44-
.map(|_| Some(self.rng.gen::<$NATIVE_TYPE>()))
45-
.collect();
69+
// TODO: support generating more primitive arrays
70+
impl PrimitiveArrayGenerator {
71+
pub fn gen_data<A>(&mut self) -> ArrayRef
72+
where
73+
A: ArrowPrimitiveType,
74+
A::Native: FromNative,
75+
Standard: Distribution<<A as ArrowPrimitiveType>::Native>,
76+
{
77+
// table of primitives from which to draw
78+
let distinct_primitives: PrimitiveArray<A> = (0..self.num_distinct_primitives)
79+
.map(|_| {
80+
Some(match A::DATA_TYPE {
81+
DataType::Int8
82+
| DataType::Int16
83+
| DataType::Int32
84+
| DataType::Int64
85+
| DataType::UInt8
86+
| DataType::UInt16
87+
| DataType::UInt32
88+
| DataType::UInt64
89+
| DataType::Float32
90+
| DataType::Float64
91+
| DataType::Date32 => self.rng.gen::<A::Native>(),
4692

47-
// pick num_strings randomly from the distinct string table
48-
let indicies: UInt32Array = (0..self.num_primitives)
49-
.map(|_| {
50-
if self.rng.gen::<f64>() < self.null_pct {
51-
None
52-
} else if self.num_distinct_primitives > 1 {
53-
let range = 1..(self.num_distinct_primitives as u32);
54-
Some(self.rng.gen_range(range))
55-
} else {
56-
Some(0)
57-
}
58-
})
59-
.collect();
93+
DataType::Date64 => {
94+
// TODO: constrain this range to valid dates if necessary
95+
let date_value = self.rng.gen_range(i64::MIN..=i64::MAX);
96+
let millis_per_day = 86_400_000;
97+
let adjusted_value = date_value - (date_value % millis_per_day);
98+
A::Native::from_i64(adjusted_value).unwrap()
99+
}
60100

61-
let options = None;
62-
arrow::compute::take(&distinct_primitives, &indicies, options).unwrap()
63-
}
64-
}
65-
};
66-
}
101+
_ => {
102+
let arrow_type = A::DATA_TYPE;
103+
panic!("Unsupported arrow data type: {arrow_type}")
104+
}
105+
})
106+
})
107+
.collect();
67108

68-
// TODO: support generating more primitive arrays
69-
impl PrimitiveArrayGenerator {
70-
impl_gen_data!(i8, Int8Type);
71-
impl_gen_data!(i16, Int16Type);
72-
impl_gen_data!(i32, Int32Type);
73-
impl_gen_data!(i64, Int64Type);
74-
impl_gen_data!(u8, UInt8Type);
75-
impl_gen_data!(u16, UInt16Type);
76-
impl_gen_data!(u32, UInt32Type);
77-
impl_gen_data!(u64, UInt64Type);
78-
impl_gen_data!(f32, Float32Type);
79-
impl_gen_data!(f64, Float64Type);
109+
// pick num_primitves randomly from the distinct string table
110+
let indicies: UInt32Array = (0..self.num_primitives)
111+
.map(|_| {
112+
if self.rng.gen::<f64>() < self.null_pct {
113+
None
114+
} else if self.num_distinct_primitives > 1 {
115+
let range = 1..(self.num_distinct_primitives as u32);
116+
Some(self.rng.gen_range(range))
117+
} else {
118+
Some(0)
119+
}
120+
})
121+
.collect();
122+
123+
let options = None;
124+
arrow::compute::take(&distinct_primitives, &indicies, options).unwrap()
125+
}
80126
}

0 commit comments

Comments
 (0)