@@ -44,6 +44,307 @@ use rand::rngs::StdRng;
44
44
use rand:: { Rng , SeedableRng } ;
45
45
use tokio:: task:: JoinSet ;
46
46
47
+ use crate :: fuzz_cases:: aggregation_fuzzer:: {
48
+ AggregationFuzzerBuilder , ColumnDescr , DatasetGeneratorConfig ,
49
+ } ;
50
+
51
+ // ========================================================================
52
+ // The new aggregation fuzz tests based on [`AggregationFuzzer`]
53
+ // ========================================================================
54
+
55
+ // TODO: write more test case to cover more `group by`s and `aggregation function`s
56
+ // TODO: maybe we can use macro to simply the case creating
57
+
58
+ /// Fuzz test for `basic prim aggr(sum/sum distinct/max/min/count/avg)` + `no group by`
59
+ #[ tokio:: test( flavor = "multi_thread" ) ]
60
+ async fn test_basic_prim_aggr_no_group ( ) {
61
+ let builder = AggregationFuzzerBuilder :: default ( ) ;
62
+
63
+ // Define data generator config
64
+ let columns = vec ! [ ColumnDescr :: new( "a" , DataType :: Int32 ) ] ;
65
+
66
+ let data_gen_config = DatasetGeneratorConfig {
67
+ columns,
68
+ rows_num_range : ( 512 , 1024 ) ,
69
+ sort_keys_set : Vec :: new ( ) ,
70
+ } ;
71
+
72
+ // Build fuzzer
73
+ let fuzzer = builder
74
+ . data_gen_config ( data_gen_config)
75
+ . data_gen_rounds ( 16 )
76
+ . add_sql ( "SELECT sum(a) FROM fuzz_table" )
77
+ . add_sql ( "SELECT sum(distinct a) FROM fuzz_table" )
78
+ . add_sql ( "SELECT max(a) FROM fuzz_table" )
79
+ . add_sql ( "SELECT min(a) FROM fuzz_table" )
80
+ . add_sql ( "SELECT count(a) FROM fuzz_table" )
81
+ . add_sql ( "SELECT count(distinct a) FROM fuzz_table" )
82
+ . add_sql ( "SELECT avg(a) FROM fuzz_table" )
83
+ . table_name ( "fuzz_table" )
84
+ . build ( ) ;
85
+
86
+ fuzzer. run ( ) . await ;
87
+ }
88
+
89
+ /// Fuzz test for `basic prim aggr(sum/sum distinct/max/min/count/avg)` + `group by single int64`
90
+ #[ tokio:: test( flavor = "multi_thread" ) ]
91
+ async fn test_basic_prim_aggr_group_by_single_int64 ( ) {
92
+ let builder = AggregationFuzzerBuilder :: default ( ) ;
93
+
94
+ // Define data generator config
95
+ let columns = vec ! [
96
+ ColumnDescr :: new( "a" , DataType :: Int32 ) ,
97
+ ColumnDescr :: new( "b" , DataType :: Int64 ) ,
98
+ ColumnDescr :: new( "c" , DataType :: Int64 ) ,
99
+ ] ;
100
+ let sort_keys_set = vec ! [
101
+ vec![ "b" . to_string( ) ] ,
102
+ vec![ "c" . to_string( ) , "b" . to_string( ) ] ,
103
+ ] ;
104
+ let data_gen_config = DatasetGeneratorConfig {
105
+ columns,
106
+ rows_num_range : ( 512 , 1024 ) ,
107
+ sort_keys_set,
108
+ } ;
109
+
110
+ // Build fuzzer
111
+ let fuzzer = builder
112
+ . data_gen_config ( data_gen_config)
113
+ . data_gen_rounds ( 16 )
114
+ . add_sql ( "SELECT b, sum(a) FROM fuzz_table GROUP BY b" )
115
+ . add_sql ( "SELECT b, sum(distinct a) FROM fuzz_table GROUP BY b" )
116
+ . add_sql ( "SELECT b, max(a) FROM fuzz_table GROUP BY b" )
117
+ . add_sql ( "SELECT b, min(a) FROM fuzz_table GROUP BY b" )
118
+ . add_sql ( "SELECT b, count(a) FROM fuzz_table GROUP BY b" )
119
+ . add_sql ( "SELECT b, count(distinct a) FROM fuzz_table GROUP BY b" )
120
+ . add_sql ( "SELECT b, avg(a) FROM fuzz_table GROUP BY b" )
121
+ . table_name ( "fuzz_table" )
122
+ . build ( ) ;
123
+
124
+ fuzzer. run ( ) . await ;
125
+ }
126
+
127
+ /// Fuzz test for `basic prim aggr(sum/sum distinct/max/min/count/avg)` + `group by single string`
128
+ #[ tokio:: test( flavor = "multi_thread" ) ]
129
+ async fn test_basic_prim_aggr_group_by_single_string ( ) {
130
+ let builder = AggregationFuzzerBuilder :: default ( ) ;
131
+
132
+ // Define data generator config
133
+ let columns = vec ! [
134
+ ColumnDescr :: new( "a" , DataType :: Int32 ) ,
135
+ ColumnDescr :: new( "b" , DataType :: Utf8 ) ,
136
+ ColumnDescr :: new( "c" , DataType :: Int64 ) ,
137
+ ] ;
138
+ let sort_keys_set = vec ! [
139
+ vec![ "b" . to_string( ) ] ,
140
+ vec![ "c" . to_string( ) , "b" . to_string( ) ] ,
141
+ ] ;
142
+ let data_gen_config = DatasetGeneratorConfig {
143
+ columns,
144
+ rows_num_range : ( 512 , 1024 ) ,
145
+ sort_keys_set,
146
+ } ;
147
+
148
+ // Build fuzzer
149
+ let fuzzer = builder
150
+ . data_gen_config ( data_gen_config)
151
+ . data_gen_rounds ( 16 )
152
+ . add_sql ( "SELECT b, sum(a) FROM fuzz_table GROUP BY b" )
153
+ . add_sql ( "SELECT b, sum(distinct a) FROM fuzz_table GROUP BY b" )
154
+ . add_sql ( "SELECT b, max(a) FROM fuzz_table GROUP BY b" )
155
+ . add_sql ( "SELECT b, min(a) FROM fuzz_table GROUP BY b" )
156
+ . add_sql ( "SELECT b, count(a) FROM fuzz_table GROUP BY b" )
157
+ . add_sql ( "SELECT b, count(distinct a) FROM fuzz_table GROUP BY b" )
158
+ . add_sql ( "SELECT b, avg(a) FROM fuzz_table GROUP BY b" )
159
+ . table_name ( "fuzz_table" )
160
+ . build ( ) ;
161
+
162
+ fuzzer. run ( ) . await ;
163
+ }
164
+
165
+ /// Fuzz test for `basic prim aggr(sum/sum distinct/max/min/count/avg)` + `group by string + int64`
166
+ #[ tokio:: test( flavor = "multi_thread" ) ]
167
+ async fn test_basic_prim_aggr_group_by_mixed_string_int64 ( ) {
168
+ let builder = AggregationFuzzerBuilder :: default ( ) ;
169
+
170
+ // Define data generator config
171
+ let columns = vec ! [
172
+ ColumnDescr :: new( "a" , DataType :: Int32 ) ,
173
+ ColumnDescr :: new( "b" , DataType :: Utf8 ) ,
174
+ ColumnDescr :: new( "c" , DataType :: Int64 ) ,
175
+ ColumnDescr :: new( "d" , DataType :: Int32 ) ,
176
+ ] ;
177
+ let sort_keys_set = vec ! [
178
+ vec![ "b" . to_string( ) , "c" . to_string( ) ] ,
179
+ vec![ "d" . to_string( ) , "b" . to_string( ) , "c" . to_string( ) ] ,
180
+ ] ;
181
+ let data_gen_config = DatasetGeneratorConfig {
182
+ columns,
183
+ rows_num_range : ( 512 , 1024 ) ,
184
+ sort_keys_set,
185
+ } ;
186
+
187
+ // Build fuzzer
188
+ let fuzzer = builder
189
+ . data_gen_config ( data_gen_config)
190
+ . data_gen_rounds ( 16 )
191
+ . add_sql ( "SELECT b, c, sum(a) FROM fuzz_table GROUP BY b, c" )
192
+ . add_sql ( "SELECT b, c, sum(distinct a) FROM fuzz_table GROUP BY b,c" )
193
+ . add_sql ( "SELECT b, c, max(a) FROM fuzz_table GROUP BY b, c" )
194
+ . add_sql ( "SELECT b, c, min(a) FROM fuzz_table GROUP BY b, c" )
195
+ . add_sql ( "SELECT b, c, count(a) FROM fuzz_table GROUP BY b, c" )
196
+ . add_sql ( "SELECT b, c, count(distinct a) FROM fuzz_table GROUP BY b, c" )
197
+ . add_sql ( "SELECT b, c, avg(a) FROM fuzz_table GROUP BY b, c" )
198
+ . table_name ( "fuzz_table" )
199
+ . build ( ) ;
200
+
201
+ fuzzer. run ( ) . await ;
202
+ }
203
+
204
+ /// Fuzz test for `basic string aggr(count/count distinct/min/max)` + `no group by`
205
+ #[ tokio:: test( flavor = "multi_thread" ) ]
206
+ async fn test_basic_string_aggr_no_group ( ) {
207
+ let builder = AggregationFuzzerBuilder :: default ( ) ;
208
+
209
+ // Define data generator config
210
+ let columns = vec ! [ ColumnDescr :: new( "a" , DataType :: Utf8 ) ] ;
211
+
212
+ let data_gen_config = DatasetGeneratorConfig {
213
+ columns,
214
+ rows_num_range : ( 512 , 1024 ) ,
215
+ sort_keys_set : Vec :: new ( ) ,
216
+ } ;
217
+
218
+ // Build fuzzer
219
+ let fuzzer = builder
220
+ . data_gen_config ( data_gen_config)
221
+ . data_gen_rounds ( 8 )
222
+ . add_sql ( "SELECT max(a) FROM fuzz_table" )
223
+ . add_sql ( "SELECT min(a) FROM fuzz_table" )
224
+ . add_sql ( "SELECT count(a) FROM fuzz_table" )
225
+ . add_sql ( "SELECT count(distinct a) FROM fuzz_table" )
226
+ . table_name ( "fuzz_table" )
227
+ . build ( ) ;
228
+
229
+ fuzzer. run ( ) . await ;
230
+ }
231
+
232
+ /// Fuzz test for `basic string aggr(count/count distinct/min/max)` + `group by single int64`
233
+ #[ tokio:: test( flavor = "multi_thread" ) ]
234
+ async fn test_basic_string_aggr_group_by_single_int64 ( ) {
235
+ let builder = AggregationFuzzerBuilder :: default ( ) ;
236
+
237
+ // Define data generator config
238
+ let columns = vec ! [
239
+ ColumnDescr :: new( "a" , DataType :: Utf8 ) ,
240
+ ColumnDescr :: new( "b" , DataType :: Int64 ) ,
241
+ ColumnDescr :: new( "c" , DataType :: Int64 ) ,
242
+ ] ;
243
+ let sort_keys_set = vec ! [
244
+ vec![ "b" . to_string( ) ] ,
245
+ vec![ "c" . to_string( ) , "b" . to_string( ) ] ,
246
+ ] ;
247
+ let data_gen_config = DatasetGeneratorConfig {
248
+ columns,
249
+ rows_num_range : ( 512 , 1024 ) ,
250
+ sort_keys_set,
251
+ } ;
252
+
253
+ // Build fuzzer
254
+ let fuzzer = builder
255
+ . data_gen_config ( data_gen_config)
256
+ . data_gen_rounds ( 8 )
257
+ // FIXME: Encounter error in min/max
258
+ // ArrowError(InvalidArgumentError("number of columns(1) must match number of fields(2) in schema"))
259
+ // .add_sql("SELECT b, max(a) FROM fuzz_table GROUP BY b")
260
+ // .add_sql("SELECT b, min(a) FROM fuzz_table GROUP BY b")
261
+ . add_sql ( "SELECT b, count(a) FROM fuzz_table GROUP BY b" )
262
+ . add_sql ( "SELECT b, count(distinct a) FROM fuzz_table GROUP BY b" )
263
+ . table_name ( "fuzz_table" )
264
+ . build ( ) ;
265
+
266
+ fuzzer. run ( ) . await ;
267
+ }
268
+
269
+ /// Fuzz test for `basic string aggr(count/count distinct/min/max)` + `group by single string`
270
+ #[ tokio:: test( flavor = "multi_thread" ) ]
271
+ async fn test_basic_string_aggr_group_by_single_string ( ) {
272
+ let builder = AggregationFuzzerBuilder :: default ( ) ;
273
+
274
+ // Define data generator config
275
+ let columns = vec ! [
276
+ ColumnDescr :: new( "a" , DataType :: Utf8 ) ,
277
+ ColumnDescr :: new( "b" , DataType :: Utf8 ) ,
278
+ ColumnDescr :: new( "c" , DataType :: Int64 ) ,
279
+ ] ;
280
+ let sort_keys_set = vec ! [
281
+ vec![ "b" . to_string( ) ] ,
282
+ vec![ "c" . to_string( ) , "b" . to_string( ) ] ,
283
+ ] ;
284
+ let data_gen_config = DatasetGeneratorConfig {
285
+ columns,
286
+ rows_num_range : ( 512 , 1024 ) ,
287
+ sort_keys_set,
288
+ } ;
289
+
290
+ // Build fuzzer
291
+ let fuzzer = builder
292
+ . data_gen_config ( data_gen_config)
293
+ . data_gen_rounds ( 16 )
294
+ // FIXME: Encounter error in min/max
295
+ // ArrowError(InvalidArgumentError("number of columns(1) must match number of fields(2) in schema"))
296
+ // .add_sql("SELECT b, max(a) FROM fuzz_table GROUP BY b")
297
+ // .add_sql("SELECT b, min(a) FROM fuzz_table GROUP BY b")
298
+ . add_sql ( "SELECT b, count(a) FROM fuzz_table GROUP BY b" )
299
+ . add_sql ( "SELECT b, count(distinct a) FROM fuzz_table GROUP BY b" )
300
+ . table_name ( "fuzz_table" )
301
+ . build ( ) ;
302
+
303
+ fuzzer. run ( ) . await ;
304
+ }
305
+
306
+ /// Fuzz test for `basic string aggr(count/count distinct/min/max)` + `group by string + int64`
307
+ #[ tokio:: test( flavor = "multi_thread" ) ]
308
+ async fn test_basic_string_aggr_group_by_mixed_string_int64 ( ) {
309
+ let builder = AggregationFuzzerBuilder :: default ( ) ;
310
+
311
+ // Define data generator config
312
+ let columns = vec ! [
313
+ ColumnDescr :: new( "a" , DataType :: Utf8 ) ,
314
+ ColumnDescr :: new( "b" , DataType :: Utf8 ) ,
315
+ ColumnDescr :: new( "c" , DataType :: Int64 ) ,
316
+ ColumnDescr :: new( "d" , DataType :: Int32 ) ,
317
+ ] ;
318
+ let sort_keys_set = vec ! [
319
+ vec![ "b" . to_string( ) , "c" . to_string( ) ] ,
320
+ vec![ "d" . to_string( ) , "b" . to_string( ) , "c" . to_string( ) ] ,
321
+ ] ;
322
+ let data_gen_config = DatasetGeneratorConfig {
323
+ columns,
324
+ rows_num_range : ( 512 , 1024 ) ,
325
+ sort_keys_set,
326
+ } ;
327
+
328
+ // Build fuzzer
329
+ let fuzzer = builder
330
+ . data_gen_config ( data_gen_config)
331
+ . data_gen_rounds ( 16 )
332
+ // FIXME: Encounter error in min/max
333
+ // ArrowError(InvalidArgumentError("number of columns(1) must match number of fields(2) in schema"))
334
+ // .add_sql("SELECT b, c, max(a) FROM fuzz_table GROUP BY b, c")
335
+ // .add_sql("SELECT b, c, min(a) FROM fuzz_table GROUP BY b, c")
336
+ . add_sql ( "SELECT b, c, count(a) FROM fuzz_table GROUP BY b, c" )
337
+ . add_sql ( "SELECT b, c, count(distinct a) FROM fuzz_table GROUP BY b, c" )
338
+ . table_name ( "fuzz_table" )
339
+ . build ( ) ;
340
+
341
+ fuzzer. run ( ) . await ;
342
+ }
343
+
344
+ // ========================================================================
345
+ // The old aggregation fuzz tests
346
+ // ========================================================================
347
+ /// Tracks if this stream is generating input or output
47
348
/// Tests that streaming aggregate and batch (non streaming) aggregate produce
48
349
/// same results
49
350
#[ tokio:: test( flavor = "multi_thread" ) ]
@@ -311,6 +612,7 @@ async fn group_by_string_test(
311
612
let actual = extract_result_counts ( results) ;
312
613
assert_eq ! ( expected, actual) ;
313
614
}
615
+
314
616
async fn verify_ordered_aggregate ( frame : & DataFrame , expected_sort : bool ) {
315
617
struct Visitor {
316
618
expected_sort : bool ,
0 commit comments