Skip to content

Commit 77789a4

Browse files
committed
Implement sort fuzzer
1 parent 19a1e58 commit 77789a4

File tree

7 files changed

+1302
-657
lines changed

7 files changed

+1302
-657
lines changed

datafusion/core/tests/fuzz_cases/aggregate_fuzz.rs

+5-80
Original file line numberDiff line numberDiff line change
@@ -18,15 +18,12 @@
1818
use std::sync::Arc;
1919

2020
use crate::fuzz_cases::aggregation_fuzzer::{
21-
AggregationFuzzerBuilder, ColumnDescr, DatasetGeneratorConfig, QueryBuilder,
21+
AggregationFuzzerBuilder, DatasetGeneratorConfig, QueryBuilder,
2222
};
2323

2424
use arrow::array::{types::Int64Type, Array, ArrayRef, AsArray, Int64Array, RecordBatch};
2525
use arrow::compute::{concat_batches, SortOptions};
26-
use arrow::datatypes::{
27-
DataType, IntervalUnit, TimeUnit, DECIMAL128_MAX_PRECISION, DECIMAL128_MAX_SCALE,
28-
DECIMAL256_MAX_PRECISION, DECIMAL256_MAX_SCALE,
29-
};
26+
use arrow::datatypes::DataType;
3027
use arrow::util::pretty::pretty_format_batches;
3128
use datafusion::common::Result;
3229
use datafusion::datasource::memory::MemorySourceConfig;
@@ -51,6 +48,8 @@ use test_utils::{add_empty_batches, StringBatchGenerator};
5148
use rand::rngs::StdRng;
5249
use rand::{thread_rng, Rng, SeedableRng};
5350

51+
use super::record_batch_generator::get_supported_types_columns;
52+
5453
// ========================================================================
5554
// The new aggregation fuzz tests based on [`AggregationFuzzer`]
5655
// ========================================================================
@@ -201,81 +200,7 @@ async fn test_median() {
201200
/// 1. structured types
202201
fn baseline_config() -> DatasetGeneratorConfig {
203202
let mut rng = thread_rng();
204-
let columns = vec![
205-
ColumnDescr::new("i8", DataType::Int8),
206-
ColumnDescr::new("i16", DataType::Int16),
207-
ColumnDescr::new("i32", DataType::Int32),
208-
ColumnDescr::new("i64", DataType::Int64),
209-
ColumnDescr::new("u8", DataType::UInt8),
210-
ColumnDescr::new("u16", DataType::UInt16),
211-
ColumnDescr::new("u32", DataType::UInt32),
212-
ColumnDescr::new("u64", DataType::UInt64),
213-
ColumnDescr::new("date32", DataType::Date32),
214-
ColumnDescr::new("date64", DataType::Date64),
215-
ColumnDescr::new("time32_s", DataType::Time32(TimeUnit::Second)),
216-
ColumnDescr::new("time32_ms", DataType::Time32(TimeUnit::Millisecond)),
217-
ColumnDescr::new("time64_us", DataType::Time64(TimeUnit::Microsecond)),
218-
ColumnDescr::new("time64_ns", DataType::Time64(TimeUnit::Nanosecond)),
219-
// `None` is passed in here however when generating the array, it will generate
220-
// random timezones.
221-
ColumnDescr::new("timestamp_s", DataType::Timestamp(TimeUnit::Second, None)),
222-
ColumnDescr::new(
223-
"timestamp_ms",
224-
DataType::Timestamp(TimeUnit::Millisecond, None),
225-
),
226-
ColumnDescr::new(
227-
"timestamp_us",
228-
DataType::Timestamp(TimeUnit::Microsecond, None),
229-
),
230-
ColumnDescr::new(
231-
"timestamp_ns",
232-
DataType::Timestamp(TimeUnit::Nanosecond, None),
233-
),
234-
ColumnDescr::new("float32", DataType::Float32),
235-
ColumnDescr::new("float64", DataType::Float64),
236-
ColumnDescr::new(
237-
"interval_year_month",
238-
DataType::Interval(IntervalUnit::YearMonth),
239-
),
240-
ColumnDescr::new(
241-
"interval_day_time",
242-
DataType::Interval(IntervalUnit::DayTime),
243-
),
244-
ColumnDescr::new(
245-
"interval_month_day_nano",
246-
DataType::Interval(IntervalUnit::MonthDayNano),
247-
),
248-
// begin decimal columns
249-
ColumnDescr::new("decimal128", {
250-
// Generate valid precision and scale for Decimal128 randomly.
251-
let precision: u8 = rng.gen_range(1..=DECIMAL128_MAX_PRECISION);
252-
// It's safe to cast `precision` to i8 type directly.
253-
let scale: i8 = rng.gen_range(
254-
i8::MIN..=std::cmp::min(precision as i8, DECIMAL128_MAX_SCALE),
255-
);
256-
DataType::Decimal128(precision, scale)
257-
}),
258-
ColumnDescr::new("decimal256", {
259-
// Generate valid precision and scale for Decimal256 randomly.
260-
let precision: u8 = rng.gen_range(1..=DECIMAL256_MAX_PRECISION);
261-
// It's safe to cast `precision` to i8 type directly.
262-
let scale: i8 = rng.gen_range(
263-
i8::MIN..=std::cmp::min(precision as i8, DECIMAL256_MAX_SCALE),
264-
);
265-
DataType::Decimal256(precision, scale)
266-
}),
267-
// begin string columns
268-
ColumnDescr::new("utf8", DataType::Utf8),
269-
ColumnDescr::new("largeutf8", DataType::LargeUtf8),
270-
ColumnDescr::new("utf8view", DataType::Utf8View),
271-
// low cardinality columns
272-
ColumnDescr::new("u8_low", DataType::UInt8).with_max_num_distinct(10),
273-
ColumnDescr::new("utf8_low", DataType::Utf8).with_max_num_distinct(10),
274-
ColumnDescr::new("bool", DataType::Boolean),
275-
ColumnDescr::new("binary", DataType::Binary),
276-
ColumnDescr::new("large_binary", DataType::LargeBinary),
277-
ColumnDescr::new("binaryview", DataType::BinaryView),
278-
];
203+
let columns = get_supported_types_columns(rng.gen());
279204

280205
let min_num_rows = 512;
281206
let max_num_rows = 1024;

0 commit comments

Comments
 (0)