You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
feat(optimizer): Enable filter pushdown on window functions (#14026)
* feat(optimizer): Enable filter pushdown on window functions
Ensures selections can be pushed past window functions similarly
to what is already done with aggregations, when possible.
* fix: Add missing dependency
* minor(optimizer): Use 'datafusion-functions-window' as a dev dependency
* docs(optimizer): Add example to filter pushdown on LogicalPlan::Window
/// verifies that filters on partition expressions are not pushed, as the single expression
1610
+
/// column is not available to the user, unlike with aggregations
1611
+
#[test]
1612
+
fnfilter_expression_keep_window() -> Result<()>{
1613
+
let table_scan = test_table_scan()?;
1614
+
1615
+
let window = Expr::WindowFunction(WindowFunction::new(
1616
+
WindowFunctionDefinition::WindowUDF(
1617
+
datafusion_functions_window::rank::rank_udwf(),
1618
+
),
1619
+
vec![],
1620
+
))
1621
+
.partition_by(vec![add(col("a"), col("b"))])// PARTITION BY a + b
1622
+
.order_by(vec![col("c").sort(true,true)])
1623
+
.build()
1624
+
.unwrap();
1625
+
1626
+
let plan = LogicalPlanBuilder::from(table_scan)
1627
+
.window(vec![window])?
1628
+
// unlike with aggregations, single partition column "test.a + test.b" is not available
1629
+
// to the plan, so we use multiple columns when filtering
1630
+
.filter(add(col("a"),col("b")).gt(lit(10i64)))?
1631
+
.build()?;
1632
+
1633
+
let expected = "\
1634
+
Filter: test.a + test.b > Int64(10)\
1635
+
\n WindowAggr: windowExpr=[[rank() PARTITION BY [test.a + test.b] ORDER BY [test.c ASC NULLS FIRST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]]\
1636
+
\n TableScan: test";
1637
+
assert_optimized_plan_eq(plan, expected)
1638
+
}
1639
+
1640
+
/// verifies that filters are not pushed on order by columns (that are not used in partitioning)
1641
+
#[test]
1642
+
fnfilter_order_keep_window() -> Result<()>{
1643
+
let table_scan = test_table_scan()?;
1644
+
1645
+
let window = Expr::WindowFunction(WindowFunction::new(
1646
+
WindowFunctionDefinition::WindowUDF(
1647
+
datafusion_functions_window::rank::rank_udwf(),
1648
+
),
1649
+
vec![],
1650
+
))
1651
+
.partition_by(vec![col("a")])
1652
+
.order_by(vec![col("c").sort(true,true)])
1653
+
.build()
1654
+
.unwrap();
1655
+
1656
+
let plan = LogicalPlanBuilder::from(table_scan)
1657
+
.window(vec![window])?
1658
+
.filter(col("c").gt(lit(10i64)))?
1659
+
.build()?;
1660
+
1661
+
let expected = "\
1662
+
Filter: test.c > Int64(10)\
1663
+
\n WindowAggr: windowExpr=[[rank() PARTITION BY [test.a] ORDER BY [test.c ASC NULLS FIRST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]]\
1664
+
\n TableScan: test";
1665
+
assert_optimized_plan_eq(plan, expected)
1666
+
}
1667
+
1668
+
/// verifies that when we use multiple window functions with a common partition key, the filter
let window1 = Expr::WindowFunction(WindowFunction::new(
1675
+
WindowFunctionDefinition::WindowUDF(
1676
+
datafusion_functions_window::rank::rank_udwf(),
1677
+
),
1678
+
vec![],
1679
+
))
1680
+
.partition_by(vec![col("a")])
1681
+
.order_by(vec![col("c").sort(true,true)])
1682
+
.build()
1683
+
.unwrap();
1684
+
1685
+
let window2 = Expr::WindowFunction(WindowFunction::new(
1686
+
WindowFunctionDefinition::WindowUDF(
1687
+
datafusion_functions_window::rank::rank_udwf(),
1688
+
),
1689
+
vec![],
1690
+
))
1691
+
.partition_by(vec![col("b"), col("a")])
1692
+
.order_by(vec![col("c").sort(true,true)])
1693
+
.build()
1694
+
.unwrap();
1695
+
1696
+
let plan = LogicalPlanBuilder::from(table_scan)
1697
+
.window(vec![window1, window2])?
1698
+
.filter(col("a").gt(lit(10i64)))? // a appears in both window functions
1699
+
.build()?;
1700
+
1701
+
let expected = "\
1702
+
WindowAggr: windowExpr=[[rank() PARTITION BY [test.a] ORDER BY [test.c ASC NULLS FIRST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW, rank() PARTITION BY [test.b, test.a] ORDER BY [test.c ASC NULLS FIRST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]]\
let window1 = Expr::WindowFunction(WindowFunction::new(
1714
+
WindowFunctionDefinition::WindowUDF(
1715
+
datafusion_functions_window::rank::rank_udwf(),
1716
+
),
1717
+
vec![],
1718
+
))
1719
+
.partition_by(vec![col("a")])
1720
+
.order_by(vec![col("c").sort(true,true)])
1721
+
.build()
1722
+
.unwrap();
1723
+
1724
+
let window2 = Expr::WindowFunction(WindowFunction::new(
1725
+
WindowFunctionDefinition::WindowUDF(
1726
+
datafusion_functions_window::rank::rank_udwf(),
1727
+
),
1728
+
vec![],
1729
+
))
1730
+
.partition_by(vec![col("b"), col("a")])
1731
+
.order_by(vec![col("c").sort(true,true)])
1732
+
.build()
1733
+
.unwrap();
1734
+
1735
+
let plan = LogicalPlanBuilder::from(table_scan)
1736
+
.window(vec![window1, window2])?
1737
+
.filter(col("b").gt(lit(10i64)))? // b only appears in one window function
1738
+
.build()?;
1739
+
1740
+
let expected = "\
1741
+
Filter: test.b > Int64(10)\
1742
+
\n WindowAggr: windowExpr=[[rank() PARTITION BY [test.a] ORDER BY [test.c ASC NULLS FIRST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW, rank() PARTITION BY [test.b, test.a] ORDER BY [test.c ASC NULLS FIRST] ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]]\
1743
+
\n TableScan: test";
1744
+
assert_optimized_plan_eq(plan, expected)
1745
+
}
1746
+
1445
1747
/// verifies that a filter is pushed to before a projection, the filter expression is correctly re-written
0 commit comments