Skip to content

Commit f6ecbcd

Browse files
2010YOUY01alamb
andauthored
Test: configuration fuzzer for (external) sort queries (#15501)
* Implement sort fuzzer * clippy * Update datafusion/core/tests/fuzz_cases/sort_query_fuzz.rs Co-authored-by: Andrew Lamb <[email protected]> * Reduce fuzzer running time to 5 seconds --------- Co-authored-by: Andrew Lamb <[email protected]>
1 parent 910029d commit f6ecbcd

File tree

7 files changed

+1290
-657
lines changed

7 files changed

+1290
-657
lines changed

datafusion/core/tests/fuzz_cases/aggregate_fuzz.rs

+5-80
Original file line numberDiff line numberDiff line change
@@ -18,18 +18,15 @@
1818
use std::sync::Arc;
1919

2020
use crate::fuzz_cases::aggregation_fuzzer::{
21-
AggregationFuzzerBuilder, ColumnDescr, DatasetGeneratorConfig, QueryBuilder,
21+
AggregationFuzzerBuilder, DatasetGeneratorConfig, QueryBuilder,
2222
};
2323

2424
use arrow::array::{
2525
types::Int64Type, Array, ArrayRef, AsArray, Int32Array, Int64Array, RecordBatch,
2626
StringArray,
2727
};
2828
use arrow::compute::{concat_batches, SortOptions};
29-
use arrow::datatypes::{
30-
DataType, IntervalUnit, TimeUnit, DECIMAL128_MAX_PRECISION, DECIMAL128_MAX_SCALE,
31-
DECIMAL256_MAX_PRECISION, DECIMAL256_MAX_SCALE,
32-
};
29+
use arrow::datatypes::DataType;
3330
use arrow::util::pretty::pretty_format_batches;
3431
use arrow_schema::{Field, Schema, SchemaRef};
3532
use datafusion::common::Result;
@@ -59,6 +56,8 @@ use datafusion_physical_plan::metrics::MetricValue;
5956
use rand::rngs::StdRng;
6057
use rand::{random, thread_rng, Rng, SeedableRng};
6158

59+
use super::record_batch_generator::get_supported_types_columns;
60+
6261
// ========================================================================
6362
// The new aggregation fuzz tests based on [`AggregationFuzzer`]
6463
// ========================================================================
@@ -209,81 +208,7 @@ async fn test_median() {
209208
/// 1. structured types
210209
fn baseline_config() -> DatasetGeneratorConfig {
211210
let mut rng = thread_rng();
212-
let columns = vec![
213-
ColumnDescr::new("i8", DataType::Int8),
214-
ColumnDescr::new("i16", DataType::Int16),
215-
ColumnDescr::new("i32", DataType::Int32),
216-
ColumnDescr::new("i64", DataType::Int64),
217-
ColumnDescr::new("u8", DataType::UInt8),
218-
ColumnDescr::new("u16", DataType::UInt16),
219-
ColumnDescr::new("u32", DataType::UInt32),
220-
ColumnDescr::new("u64", DataType::UInt64),
221-
ColumnDescr::new("date32", DataType::Date32),
222-
ColumnDescr::new("date64", DataType::Date64),
223-
ColumnDescr::new("time32_s", DataType::Time32(TimeUnit::Second)),
224-
ColumnDescr::new("time32_ms", DataType::Time32(TimeUnit::Millisecond)),
225-
ColumnDescr::new("time64_us", DataType::Time64(TimeUnit::Microsecond)),
226-
ColumnDescr::new("time64_ns", DataType::Time64(TimeUnit::Nanosecond)),
227-
// `None` is passed in here however when generating the array, it will generate
228-
// random timezones.
229-
ColumnDescr::new("timestamp_s", DataType::Timestamp(TimeUnit::Second, None)),
230-
ColumnDescr::new(
231-
"timestamp_ms",
232-
DataType::Timestamp(TimeUnit::Millisecond, None),
233-
),
234-
ColumnDescr::new(
235-
"timestamp_us",
236-
DataType::Timestamp(TimeUnit::Microsecond, None),
237-
),
238-
ColumnDescr::new(
239-
"timestamp_ns",
240-
DataType::Timestamp(TimeUnit::Nanosecond, None),
241-
),
242-
ColumnDescr::new("float32", DataType::Float32),
243-
ColumnDescr::new("float64", DataType::Float64),
244-
ColumnDescr::new(
245-
"interval_year_month",
246-
DataType::Interval(IntervalUnit::YearMonth),
247-
),
248-
ColumnDescr::new(
249-
"interval_day_time",
250-
DataType::Interval(IntervalUnit::DayTime),
251-
),
252-
ColumnDescr::new(
253-
"interval_month_day_nano",
254-
DataType::Interval(IntervalUnit::MonthDayNano),
255-
),
256-
// begin decimal columns
257-
ColumnDescr::new("decimal128", {
258-
// Generate valid precision and scale for Decimal128 randomly.
259-
let precision: u8 = rng.gen_range(1..=DECIMAL128_MAX_PRECISION);
260-
// It's safe to cast `precision` to i8 type directly.
261-
let scale: i8 = rng.gen_range(
262-
i8::MIN..=std::cmp::min(precision as i8, DECIMAL128_MAX_SCALE),
263-
);
264-
DataType::Decimal128(precision, scale)
265-
}),
266-
ColumnDescr::new("decimal256", {
267-
// Generate valid precision and scale for Decimal256 randomly.
268-
let precision: u8 = rng.gen_range(1..=DECIMAL256_MAX_PRECISION);
269-
// It's safe to cast `precision` to i8 type directly.
270-
let scale: i8 = rng.gen_range(
271-
i8::MIN..=std::cmp::min(precision as i8, DECIMAL256_MAX_SCALE),
272-
);
273-
DataType::Decimal256(precision, scale)
274-
}),
275-
// begin string columns
276-
ColumnDescr::new("utf8", DataType::Utf8),
277-
ColumnDescr::new("largeutf8", DataType::LargeUtf8),
278-
ColumnDescr::new("utf8view", DataType::Utf8View),
279-
// low cardinality columns
280-
ColumnDescr::new("u8_low", DataType::UInt8).with_max_num_distinct(10),
281-
ColumnDescr::new("utf8_low", DataType::Utf8).with_max_num_distinct(10),
282-
ColumnDescr::new("bool", DataType::Boolean),
283-
ColumnDescr::new("binary", DataType::Binary),
284-
ColumnDescr::new("large_binary", DataType::LargeBinary),
285-
ColumnDescr::new("binaryview", DataType::BinaryView),
286-
];
211+
let columns = get_supported_types_columns(rng.gen());
287212

288213
let min_num_rows = 512;
289214
let max_num_rows = 1024;

0 commit comments

Comments
 (0)