From 0cd7540b21bb4afd34037ed7dc1aff25d317afa9 Mon Sep 17 00:00:00 2001 From: ritchie Date: Sat, 7 Sep 2024 10:14:54 +0200 Subject: [PATCH] docstring and fix test --- py-polars/polars/dataframe/frame.py | 37 +++++++++++++++++++++++++ py-polars/polars/lazyframe/frame.py | 39 ++++++++++++++++++++++++++- py-polars/tests/unit/io/test_delta.py | 1 - 3 files changed, 75 insertions(+), 2 deletions(-) diff --git a/py-polars/polars/dataframe/frame.py b/py-polars/polars/dataframe/frame.py index 848c629a9e82..ce6c46299db6 100644 --- a/py-polars/polars/dataframe/frame.py +++ b/py-polars/polars/dataframe/frame.py @@ -7110,6 +7110,43 @@ def join_where( For example: `pl.col("time") >= pl.col("duration")` suffix Suffix to append to columns with a duplicate name. + + Examples + -------- + >>> east = pl.DataFrame( + ... { + ... "id": [100, 101, 102], + ... "dur": [120, 140, 160], + ... "rev": [12, 14, 16], + ... "cores": [2, 8, 4], + ... } + ...) + >>> west = pl.DataFrame( + ... { + ... "t_id": [404, 498, 676, 742], + ... "time": [90, 130, 150, 170], + ... "cost": [9, 13, 15, 16], + ... "cores": [4, 2, 1, 4], + ... } + ... ) + >>> east.join_where( + >>> west, + >>> pl.col("dur") < pl.col("time"), + >>> pl.col("rev") < pl.col("cost"), + >>> ) + shape: (5, 8) + ┌─────┬─────┬─────┬───────┬──────┬──────┬──────┬─────────────┐ + │ id ┆ dur ┆ rev ┆ cores ┆ t_id ┆ time ┆ cost ┆ cores_right │ + │ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │ + │ i64 ┆ i64 ┆ i64 ┆ i64 ┆ i64 ┆ i64 ┆ i64 ┆ i64 │ + ╞═════╪═════╪═════╪═══════╪══════╪══════╪══════╪═════════════╡ + │ 100 ┆ 120 ┆ 12 ┆ 2 ┆ 498 ┆ 130 ┆ 13 ┆ 2 │ + │ 100 ┆ 120 ┆ 12 ┆ 2 ┆ 676 ┆ 150 ┆ 15 ┆ 1 │ + │ 100 ┆ 120 ┆ 12 ┆ 2 ┆ 742 ┆ 170 ┆ 16 ┆ 4 │ + │ 101 ┆ 140 ┆ 14 ┆ 8 ┆ 676 ┆ 150 ┆ 15 ┆ 1 │ + │ 101 ┆ 140 ┆ 14 ┆ 8 ┆ 742 ┆ 170 ┆ 16 ┆ 4 │ + └─────┴─────┴─────┴───────┴──────┴──────┴──────┴─────────────┘ + """ if not isinstance(other, DataFrame): msg = f"expected `other` join table to be a DataFrame, got {type(other).__name__!r}" diff --git a/py-polars/polars/lazyframe/frame.py b/py-polars/polars/lazyframe/frame.py index d08bd7a0462c..bc2c7fd1a870 100644 --- a/py-polars/polars/lazyframe/frame.py +++ b/py-polars/polars/lazyframe/frame.py @@ -4569,7 +4569,7 @@ def join_where( suffix: str = "_right", ) -> LazyFrame: """ - Perform a join based on one or multiple equality predicates. + Perform a join based on one or multiple (in)equality predicates. A row from this table may be included in zero or multiple rows in the result, and the relative order of rows may differ between the input and output tables. @@ -4586,6 +4586,43 @@ def join_where( For example: `pl.col("time") >= pl.col("duration")` suffix Suffix to append to columns with a duplicate name. + + Examples + -------- + >>> east = pl.LazyFrame( + ... { + ... "id": [100, 101, 102], + ... "dur": [120, 140, 160], + ... "rev": [12, 14, 16], + ... "cores": [2, 8, 4], + ... } + ...) + >>> west = pl.LazyFrame( + ... { + ... "t_id": [404, 498, 676, 742], + ... "time": [90, 130, 150, 170], + ... "cost": [9, 13, 15, 16], + ... "cores": [4, 2, 1, 4], + ... } + ... ) + >>> east.join_where( + >>> west, + >>> pl.col("dur") < pl.col("time"), + >>> pl.col("rev") < pl.col("cost"), + >>> ) + shape: (5, 8) + ┌─────┬─────┬─────┬───────┬──────┬──────┬──────┬─────────────┐ + │ id ┆ dur ┆ rev ┆ cores ┆ t_id ┆ time ┆ cost ┆ cores_right │ + │ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │ + │ i64 ┆ i64 ┆ i64 ┆ i64 ┆ i64 ┆ i64 ┆ i64 ┆ i64 │ + ╞═════╪═════╪═════╪═══════╪══════╪══════╪══════╪═════════════╡ + │ 100 ┆ 120 ┆ 12 ┆ 2 ┆ 498 ┆ 130 ┆ 13 ┆ 2 │ + │ 100 ┆ 120 ┆ 12 ┆ 2 ┆ 676 ┆ 150 ┆ 15 ┆ 1 │ + │ 100 ┆ 120 ┆ 12 ┆ 2 ┆ 742 ┆ 170 ┆ 16 ┆ 4 │ + │ 101 ┆ 140 ┆ 14 ┆ 8 ┆ 676 ┆ 150 ┆ 15 ┆ 1 │ + │ 101 ┆ 140 ┆ 14 ┆ 8 ┆ 742 ┆ 170 ┆ 16 ┆ 4 │ + └─────┴─────┴─────┴───────┴──────┴──────┴──────┴─────────────┘ + """ if not isinstance(other, LazyFrame): msg = f"expected `other` join table to be a LazyFrame, not a {type(other).__name__!r}" diff --git a/py-polars/tests/unit/io/test_delta.py b/py-polars/tests/unit/io/test_delta.py index 6cb487f4abb0..c6cb461c5211 100644 --- a/py-polars/tests/unit/io/test_delta.py +++ b/py-polars/tests/unit/io/test_delta.py @@ -472,7 +472,6 @@ def test_write_delta_with_merge(tmp_path: Path) -> None: ) assert isinstance(merger, TableMerger) - assert merger.predicate == "s.a = t.a" assert merger.source_alias == "s" assert merger.target_alias == "t"