From 16dade3e68b78c5a4140833e05343361a1c777a9 Mon Sep 17 00:00:00 2001 From: MarcoGorelli <33491632+MarcoGorelli@users.noreply.github.com> Date: Sun, 29 Oct 2023 19:02:39 +0000 Subject: [PATCH 1/2] docs(python): use more ergonomic syntax in select/with_columns where possible --- docs/getting-started/expressions.md | 4 ++-- .../src/python/getting-started/expressions.py | 20 +++++++++---------- py-polars/polars/dataframe/group_by.py | 2 +- py-polars/polars/expr/expr.py | 8 +++----- .../polars/io/pyarrow_dataset/functions.py | 2 +- py-polars/polars/lazyframe/frame.py | 2 +- 6 files changed, 17 insertions(+), 21 deletions(-) diff --git a/docs/getting-started/expressions.md b/docs/getting-started/expressions.md index 692806d75de9..93c7b3ecc871 100644 --- a/docs/getting-started/expressions.md +++ b/docs/getting-started/expressions.md @@ -22,7 +22,7 @@ print( ) ``` -You can also specify the specific columns that you want to return. There are two ways to do this. The first option is to create a `list` of column names, as seen below. +You can also specify the specific columns that you want to return. There are two ways to do this. The first option is to pass the column names, as seen below. {{code_block('getting-started/expressions','select2',['select'])}} @@ -32,7 +32,7 @@ print( ) ``` -The second option is to specify each column within a `list` in the `select` statement. This option is shown below. +The second option is to specify each column using `pl.col`. This option is shown below. {{code_block('getting-started/expressions','select3',['select'])}} diff --git a/docs/src/python/getting-started/expressions.py b/docs/src/python/getting-started/expressions.py index ea73e0819a90..763bab5d9ad6 100644 --- a/docs/src/python/getting-started/expressions.py +++ b/docs/src/python/getting-started/expressions.py @@ -27,15 +27,15 @@ # --8<-- [end:select] # --8<-- [start:select2] -df.select(pl.col(["a", "b"])) +df.select(pl.col("a", "b")) # --8<-- [end:select2] # --8<-- [start:select3] -df.select([pl.col("a"), pl.col("b")]).limit(3) +df.select(pl.col("a"), pl.col("b")).limit(3) # --8<-- [end:select3] # --8<-- [start:exclude] -df.select([pl.exclude("a")]) +df.select(pl.exclude("a")) # --8<-- [end:exclude] # --8<-- [start:filter] @@ -49,7 +49,7 @@ # --8<-- [end:filter2] # --8<-- [start:with_columns] -df.with_columns([pl.col("b").sum().alias("e"), (pl.col("b") + 42).alias("b+42")]) +df.with_columns(pl.col("b").sum().alias("e"), (pl.col("b") + 42).alias("b+42")) # --8<-- [end:with_columns] # --8<-- [start:dataframe2] @@ -67,24 +67,22 @@ # --8<-- [start:group_by2] df2.group_by("y", maintain_order=True).agg( - [ - pl.col("*").count().alias("count"), - pl.col("*").sum().alias("sum"), - ] + pl.col("*").count().alias("count"), + pl.col("*").sum().alias("sum"), ) # --8<-- [end:group_by2] # --8<-- [start:combine] df_x = df.with_columns((pl.col("a") * pl.col("b")).alias("a * b")).select( - [pl.all().exclude(["c", "d"])] + pl.all().exclude(["c", "d"]) ) print(df_x) # --8<-- [end:combine] # --8<-- [start:combine2] -df_y = df.with_columns([(pl.col("a") * pl.col("b")).alias("a * b")]).select( - [pl.all().exclude("d")] +df_y = df.with_columns((pl.col("a") * pl.col("b")).alias("a * b")).select( + pl.all().exclude("d") ) print(df_y) diff --git a/py-polars/polars/dataframe/group_by.py b/py-polars/polars/dataframe/group_by.py index b423e767702d..eb20490fe99d 100644 --- a/py-polars/polars/dataframe/group_by.py +++ b/py-polars/polars/dataframe/group_by.py @@ -160,7 +160,7 @@ def agg( ... "c": [5, 4, 3, 2, 1], ... } ... ) - >>> df.group_by("a").agg([pl.col("b"), pl.col("c")]) # doctest: +IGNORE_RESULT + >>> df.group_by("a").agg(pl.col("b"), pl.col("c")) # doctest: +IGNORE_RESULT shape: (3, 3) ┌─────┬───────────┬───────────┐ │ a ┆ b ┆ c │ diff --git a/py-polars/polars/expr/expr.py b/py-polars/polars/expr/expr.py index 37ccffaa8e14..98b095b866f4 100644 --- a/py-polars/polars/expr/expr.py +++ b/py-polars/polars/expr/expr.py @@ -3867,10 +3867,8 @@ def filter(self, predicate: Expr) -> Self: ... } ... ) >>> df.group_by("group_col").agg( - ... [ - ... pl.col("b").filter(pl.col("b") < 2).sum().alias("lt"), - ... pl.col("b").filter(pl.col("b") >= 2).sum().alias("gte"), - ... ] + ... lt=pl.col("b").filter(pl.col("b") < 2).sum().alias("lt"), + ... gte=pl.col("b").filter(pl.col("b") >= 2).sum().alias("gte"), ... ).sort("group_col") shape: (2, 3) ┌───────────┬─────┬─────┐ @@ -5151,7 +5149,7 @@ def is_in(self, other: Expr | Collection[Any] | Series) -> Self: >>> df = pl.DataFrame( ... {"sets": [[1, 2, 3], [1, 2], [9, 10]], "optional_members": [1, 2, 3]} ... ) - >>> df.select([pl.col("optional_members").is_in("sets").alias("contains")]) + >>> df.select(pl.col("optional_members").is_in("sets").alias("contains")) shape: (3, 1) ┌──────────┐ │ contains │ diff --git a/py-polars/polars/io/pyarrow_dataset/functions.py b/py-polars/polars/io/pyarrow_dataset/functions.py index 339cb270c0a2..5cfed9c393ea 100644 --- a/py-polars/polars/io/pyarrow_dataset/functions.py +++ b/py-polars/polars/io/pyarrow_dataset/functions.py @@ -56,7 +56,7 @@ def scan_pyarrow_dataset( >>> ( ... pl.scan_pyarrow_dataset(dset) ... .filter("bools") - ... .select(["bools", "floats", "date"]) + ... .select("bools", "floats", "date") ... .collect() ... ) # doctest: +SKIP shape: (1, 3) diff --git a/py-polars/polars/lazyframe/frame.py b/py-polars/polars/lazyframe/frame.py index e894a3f05c84..0b7e4b0f368e 100644 --- a/py-polars/polars/lazyframe/frame.py +++ b/py-polars/polars/lazyframe/frame.py @@ -684,7 +684,7 @@ def columns(self) -> list[str]: ... "bar": [6, 7, 8], ... "ham": ["a", "b", "c"], ... } - ... ).select(["foo", "bar"]) + ... ).select("foo", "bar") >>> lf.columns ['foo', 'bar'] From 56084f99439e9ce7c79705f93790a8d81f9b8ad0 Mon Sep 17 00:00:00 2001 From: Marco Edward Gorelli Date: Mon, 30 Oct 2023 07:58:49 +0000 Subject: [PATCH 2/2] Update py-polars/polars/expr/expr.py Co-authored-by: Stijn de Gooijer --- py-polars/polars/expr/expr.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/py-polars/polars/expr/expr.py b/py-polars/polars/expr/expr.py index 98b095b866f4..e287f80c0ef2 100644 --- a/py-polars/polars/expr/expr.py +++ b/py-polars/polars/expr/expr.py @@ -3867,8 +3867,8 @@ def filter(self, predicate: Expr) -> Self: ... } ... ) >>> df.group_by("group_col").agg( - ... lt=pl.col("b").filter(pl.col("b") < 2).sum().alias("lt"), - ... gte=pl.col("b").filter(pl.col("b") >= 2).sum().alias("gte"), + ... lt=pl.col("b").filter(pl.col("b") < 2).sum(), + ... gte=pl.col("b").filter(pl.col("b") >= 2).sum(), ... ).sort("group_col") shape: (2, 3) ┌───────────┬─────┬─────┐