|
18 | 18 | use std::sync::Arc;
|
19 | 19 |
|
20 | 20 | use crate::fuzz_cases::aggregation_fuzzer::{
|
21 |
| - AggregationFuzzerBuilder, ColumnDescr, DatasetGeneratorConfig, QueryBuilder, |
| 21 | + AggregationFuzzerBuilder, DatasetGeneratorConfig, QueryBuilder, |
22 | 22 | };
|
23 | 23 |
|
24 | 24 | use arrow::array::{
|
25 | 25 | types::Int64Type, Array, ArrayRef, AsArray, Int32Array, Int64Array, RecordBatch,
|
26 | 26 | StringArray,
|
27 | 27 | };
|
28 | 28 | use arrow::compute::{concat_batches, SortOptions};
|
29 |
| -use arrow::datatypes::{ |
30 |
| - DataType, IntervalUnit, TimeUnit, DECIMAL128_MAX_PRECISION, DECIMAL128_MAX_SCALE, |
31 |
| - DECIMAL256_MAX_PRECISION, DECIMAL256_MAX_SCALE, |
32 |
| -}; |
| 29 | +use arrow::datatypes::DataType; |
33 | 30 | use arrow::util::pretty::pretty_format_batches;
|
34 | 31 | use arrow_schema::{Field, Schema, SchemaRef};
|
35 | 32 | use datafusion::common::Result;
|
@@ -59,6 +56,8 @@ use datafusion_physical_plan::metrics::MetricValue;
|
59 | 56 | use rand::rngs::StdRng;
|
60 | 57 | use rand::{random, thread_rng, Rng, SeedableRng};
|
61 | 58 |
|
| 59 | +use super::record_batch_generator::get_supported_types_columns; |
| 60 | + |
62 | 61 | // ========================================================================
|
63 | 62 | // The new aggregation fuzz tests based on [`AggregationFuzzer`]
|
64 | 63 | // ========================================================================
|
@@ -209,81 +208,7 @@ async fn test_median() {
|
209 | 208 | /// 1. structured types
|
210 | 209 | fn baseline_config() -> DatasetGeneratorConfig {
|
211 | 210 | let mut rng = thread_rng();
|
212 |
| - let columns = vec![ |
213 |
| - ColumnDescr::new("i8", DataType::Int8), |
214 |
| - ColumnDescr::new("i16", DataType::Int16), |
215 |
| - ColumnDescr::new("i32", DataType::Int32), |
216 |
| - ColumnDescr::new("i64", DataType::Int64), |
217 |
| - ColumnDescr::new("u8", DataType::UInt8), |
218 |
| - ColumnDescr::new("u16", DataType::UInt16), |
219 |
| - ColumnDescr::new("u32", DataType::UInt32), |
220 |
| - ColumnDescr::new("u64", DataType::UInt64), |
221 |
| - ColumnDescr::new("date32", DataType::Date32), |
222 |
| - ColumnDescr::new("date64", DataType::Date64), |
223 |
| - ColumnDescr::new("time32_s", DataType::Time32(TimeUnit::Second)), |
224 |
| - ColumnDescr::new("time32_ms", DataType::Time32(TimeUnit::Millisecond)), |
225 |
| - ColumnDescr::new("time64_us", DataType::Time64(TimeUnit::Microsecond)), |
226 |
| - ColumnDescr::new("time64_ns", DataType::Time64(TimeUnit::Nanosecond)), |
227 |
| - // `None` is passed in here however when generating the array, it will generate |
228 |
| - // random timezones. |
229 |
| - ColumnDescr::new("timestamp_s", DataType::Timestamp(TimeUnit::Second, None)), |
230 |
| - ColumnDescr::new( |
231 |
| - "timestamp_ms", |
232 |
| - DataType::Timestamp(TimeUnit::Millisecond, None), |
233 |
| - ), |
234 |
| - ColumnDescr::new( |
235 |
| - "timestamp_us", |
236 |
| - DataType::Timestamp(TimeUnit::Microsecond, None), |
237 |
| - ), |
238 |
| - ColumnDescr::new( |
239 |
| - "timestamp_ns", |
240 |
| - DataType::Timestamp(TimeUnit::Nanosecond, None), |
241 |
| - ), |
242 |
| - ColumnDescr::new("float32", DataType::Float32), |
243 |
| - ColumnDescr::new("float64", DataType::Float64), |
244 |
| - ColumnDescr::new( |
245 |
| - "interval_year_month", |
246 |
| - DataType::Interval(IntervalUnit::YearMonth), |
247 |
| - ), |
248 |
| - ColumnDescr::new( |
249 |
| - "interval_day_time", |
250 |
| - DataType::Interval(IntervalUnit::DayTime), |
251 |
| - ), |
252 |
| - ColumnDescr::new( |
253 |
| - "interval_month_day_nano", |
254 |
| - DataType::Interval(IntervalUnit::MonthDayNano), |
255 |
| - ), |
256 |
| - // begin decimal columns |
257 |
| - ColumnDescr::new("decimal128", { |
258 |
| - // Generate valid precision and scale for Decimal128 randomly. |
259 |
| - let precision: u8 = rng.gen_range(1..=DECIMAL128_MAX_PRECISION); |
260 |
| - // It's safe to cast `precision` to i8 type directly. |
261 |
| - let scale: i8 = rng.gen_range( |
262 |
| - i8::MIN..=std::cmp::min(precision as i8, DECIMAL128_MAX_SCALE), |
263 |
| - ); |
264 |
| - DataType::Decimal128(precision, scale) |
265 |
| - }), |
266 |
| - ColumnDescr::new("decimal256", { |
267 |
| - // Generate valid precision and scale for Decimal256 randomly. |
268 |
| - let precision: u8 = rng.gen_range(1..=DECIMAL256_MAX_PRECISION); |
269 |
| - // It's safe to cast `precision` to i8 type directly. |
270 |
| - let scale: i8 = rng.gen_range( |
271 |
| - i8::MIN..=std::cmp::min(precision as i8, DECIMAL256_MAX_SCALE), |
272 |
| - ); |
273 |
| - DataType::Decimal256(precision, scale) |
274 |
| - }), |
275 |
| - // begin string columns |
276 |
| - ColumnDescr::new("utf8", DataType::Utf8), |
277 |
| - ColumnDescr::new("largeutf8", DataType::LargeUtf8), |
278 |
| - ColumnDescr::new("utf8view", DataType::Utf8View), |
279 |
| - // low cardinality columns |
280 |
| - ColumnDescr::new("u8_low", DataType::UInt8).with_max_num_distinct(10), |
281 |
| - ColumnDescr::new("utf8_low", DataType::Utf8).with_max_num_distinct(10), |
282 |
| - ColumnDescr::new("bool", DataType::Boolean), |
283 |
| - ColumnDescr::new("binary", DataType::Binary), |
284 |
| - ColumnDescr::new("large_binary", DataType::LargeBinary), |
285 |
| - ColumnDescr::new("binaryview", DataType::BinaryView), |
286 |
| - ]; |
| 211 | + let columns = get_supported_types_columns(rng.gen()); |
287 | 212 |
|
288 | 213 | let min_num_rows = 512;
|
289 | 214 | let max_num_rows = 1024;
|
|
0 commit comments