From 309fc486c47d86776aeec07d86cd04b5d70d97a1 Mon Sep 17 00:00:00 2001 From: Phillip Cloud <417981+cpcloud@users.noreply.github.com> Date: Mon, 24 Jul 2023 05:47:17 -0700 Subject: [PATCH] feat: expose offset in python API (#437) --- datafusion/tests/test_dataframe.py | 11 +++++++++++ datafusion/tests/test_expr.py | 8 ++++++++ src/dataframe.rs | 5 +++-- 3 files changed, 22 insertions(+), 2 deletions(-) diff --git a/datafusion/tests/test_dataframe.py b/datafusion/tests/test_dataframe.py index 4df2061e..78cb50f1 100644 --- a/datafusion/tests/test_dataframe.py +++ b/datafusion/tests/test_dataframe.py @@ -115,6 +115,17 @@ def test_limit(df): assert len(result.column(1)) == 1 +def test_limit_with_offset(df): + # only 3 rows, but limit past the end to ensure that offset is working + df = df.limit(5, offset=2) + + # execute and collect the first (and only) batch + result = df.collect()[0] + + assert len(result.column(0)) == 1 + assert len(result.column(1)) == 1 + + def test_with_column(df): df = df.with_column("c", column("a") + column("b")) diff --git a/datafusion/tests/test_expr.py b/datafusion/tests/test_expr.py index 0c4869f2..73f7d087 100644 --- a/datafusion/tests/test_expr.py +++ b/datafusion/tests/test_expr.py @@ -81,6 +81,14 @@ def test_limit(test_ctx): plan = plan.to_variant() assert isinstance(plan, Limit) + assert plan.skip() == 0 + + df = test_ctx.sql("select c1 from test LIMIT 10 OFFSET 5") + plan = df.logical_plan() + + plan = plan.to_variant() + assert isinstance(plan, Limit) + assert plan.skip() == 5 def test_aggregate_query(test_ctx): diff --git a/src/dataframe.rs b/src/dataframe.rs index 00c12e85..b8d8ddc3 100644 --- a/src/dataframe.rs +++ b/src/dataframe.rs @@ -136,8 +136,9 @@ impl PyDataFrame { Ok(Self::new(df)) } - fn limit(&self, count: usize) -> PyResult { - let df = self.df.as_ref().clone().limit(0, Some(count))?; + #[pyo3(signature = (count, offset=0))] + fn limit(&self, count: usize, offset: usize) -> PyResult { + let df = self.df.as_ref().clone().limit(offset, Some(count))?; Ok(Self::new(df)) }