@@ -278,63 +278,50 @@ def test_distinct():
278
278
assert df_a .collect () == df_b .collect ()
279
279
280
280
281
- def test_window_functions (df ):
281
+ data_test_window_functions = [
282
+ ("row" , f .window ("row_number" , [], order_by = [f .order_by (column ("c" ))]), [2 , 1 , 3 ]),
283
+ ("rank" , f .window ("rank" , [], order_by = [f .order_by (column ("c" ))]), [2 , 1 , 2 ]),
284
+ ("dense_rank" , f .window ("dense_rank" , [], order_by = [f .order_by (column ("c" ))]), [2 , 1 , 2 ] ),
285
+ ("percent_rank" , f .window ("percent_rank" , [], order_by = [f .order_by (column ("c" ))]), [0.5 , 0 , 0.5 ]),
286
+ ("cume_dist" , f .window ("cume_dist" , [], order_by = [f .order_by (column ("b" ))]), [0.3333333333333333 , 0.6666666666666666 , 1.0 ]),
287
+ ("ntile" , f .window ("ntile" , [literal (2 )], order_by = [f .order_by (column ("c" ))]), [1 , 1 , 2 ]),
288
+ ("next" , f .window ("lead" , [column ("b" )], order_by = [f .order_by (column ("b" ))]), [5 , 6 , None ]),
289
+ ("previous" , f .window ("lag" , [column ("b" )], order_by = [f .order_by (column ("b" ))]), [None , 4 , 5 ]),
290
+ pytest .param (
291
+ "first_value" ,
292
+ f .window (
293
+ "first_value" ,
294
+ [column ("a" )],
295
+ order_by = [f .order_by (column ("b" ))]
296
+ ),
297
+ [1 , 1 , 1 ],
298
+ marks = pytest .mark .xfail ,
299
+ ),
300
+ pytest .param (
301
+ "last_value" ,
302
+ f .window ("last_value" , [column ("b" )], order_by = [f .order_by (column ("b" ))]),
303
+ [4 , 5 , 6 ],
304
+ marks = pytest .mark .xfail ,
305
+ ),
306
+ pytest .param (
307
+ "2nd_value" ,
308
+ f .window (
309
+ "nth_value" ,
310
+ [column ("b" ), literal (2 )],
311
+ order_by = [f .order_by (column ("b" ))],
312
+ ),
313
+ [None , 5 , 5 ],
314
+ ),
315
+ ]
316
+
317
+
318
+ @pytest .mark .parametrize ("name,expr,result" , data_test_window_functions )
319
+ def test_window_functions (df , name , expr , result ):
282
320
df = df .select (
283
321
column ("a" ),
284
322
column ("b" ),
285
323
column ("c" ),
286
- f .alias (
287
- f .window ("row_number" , [], order_by = [f .order_by (column ("c" ))]),
288
- "row" ,
289
- ),
290
- f .alias (
291
- f .window ("rank" , [], order_by = [f .order_by (column ("c" ))]),
292
- "rank" ,
293
- ),
294
- f .alias (
295
- f .window ("dense_rank" , [], order_by = [f .order_by (column ("c" ))]),
296
- "dense_rank" ,
297
- ),
298
- f .alias (
299
- f .window ("percent_rank" , [], order_by = [f .order_by (column ("c" ))]),
300
- "percent_rank" ,
301
- ),
302
- f .alias (
303
- f .window ("cume_dist" , [], order_by = [f .order_by (column ("b" ))]),
304
- "cume_dist" ,
305
- ),
306
- f .alias (
307
- f .window ("ntile" , [literal (2 )], order_by = [f .order_by (column ("c" ))]),
308
- "ntile" ,
309
- ),
310
- f .alias (
311
- f .window ("lag" , [column ("b" )], order_by = [f .order_by (column ("b" ))]),
312
- "previous" ,
313
- ),
314
- f .alias (
315
- f .window ("lead" , [column ("b" )], order_by = [f .order_by (column ("b" ))]),
316
- "next" ,
317
- ),
318
- f .alias (
319
- f .window (
320
- "first_value" ,
321
- [column ("a" )],
322
- order_by = [f .order_by (column ("b" ))],
323
- ),
324
- "first_value" ,
325
- ),
326
- f .alias (
327
- f .window ("last_value" , [column ("b" )], order_by = [f .order_by (column ("b" ))]),
328
- "last_value" ,
329
- ),
330
- f .alias (
331
- f .window (
332
- "nth_value" ,
333
- [column ("b" ), literal (2 )],
334
- order_by = [f .order_by (column ("b" ))],
335
- ),
336
- "2nd_value" ,
337
- ),
324
+ f .alias (expr , name )
338
325
)
339
326
340
327
table = pa .Table .from_batches (df .collect ())
@@ -343,18 +330,9 @@ def test_window_functions(df):
343
330
"a" : [1 , 2 , 3 ],
344
331
"b" : [4 , 5 , 6 ],
345
332
"c" : [8 , 5 , 8 ],
346
- "row" : [2 , 1 , 3 ],
347
- "rank" : [2 , 1 , 2 ],
348
- "dense_rank" : [2 , 1 , 2 ],
349
- "percent_rank" : [0.5 , 0 , 0.5 ],
350
- "cume_dist" : [0.3333333333333333 , 0.6666666666666666 , 1.0 ],
351
- "ntile" : [1 , 1 , 2 ],
352
- "next" : [5 , 6 , None ],
353
- "previous" : [None , 4 , 5 ],
354
- "first_value" : [1 , 1 , 1 ],
355
- "last_value" : [4 , 5 , 6 ],
356
- "2nd_value" : [None , 5 , 5 ],
333
+ name : result
357
334
}
335
+
358
336
assert table .sort_by ("a" ).to_pydict () == expected
359
337
360
338
0 commit comments