Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

depr(python): Rename SQLContext "eager_execution" param to "eager" #16595

Merged
merged 1 commit into from
May 30, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion docs/src/python/user-guide/sql/create.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
data = {"name": ["Alice", "Bob", "Charlie", "David"], "age": [25, 30, 35, 40]}
df = pl.LazyFrame(data)

ctx = pl.SQLContext(my_table=df, eager_execution=True)
ctx = pl.SQLContext(my_table=df, eager=True)

result = ctx.execute(
"""
Expand Down
4 changes: 2 additions & 2 deletions docs/src/python/user-guide/sql/intro.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@
pokemon = pl.read_csv(
"https://gist.githubusercontent.com/ritchie46/cac6b337ea52281aa23c049250a4ff03/raw/89a957ff3919d90e6ef2d34235e6bf22304f3366/pokemon.csv"
)
with pl.SQLContext(register_globals=True, eager_execution=True) as ctx:
with pl.SQLContext(register_globals=True, eager=True) as ctx:
df_small = ctx.execute("SELECT * from pokemon LIMIT 5")
print(df_small)
# --8<-- [end:execute]
Expand Down Expand Up @@ -76,7 +76,7 @@
products_masterdata=pl.scan_csv("docs/data/products_masterdata.csv"),
products_categories=pl.scan_ndjson("docs/data/products_categories.json"),
sales_data=pl.from_pandas(sales_data),
eager_execution=True,
eager=True,
) as ctx:
query = """
SELECT
Expand Down
2 changes: 1 addition & 1 deletion docs/src/python/user-guide/sql/select.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
}
)

ctx = pl.SQLContext(population=df, eager_execution=True)
ctx = pl.SQLContext(population=df, eager=True)

print(ctx.execute("SELECT * FROM population"))
# --8<-- [end:df]
Expand Down
34 changes: 7 additions & 27 deletions py-polars/polars/dataframe/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -4620,7 +4620,7 @@ def sort(
.collect(_eager=True)
)

def sql(self, query: str, *, table_name: str | None = None) -> Self:
def sql(self, query: str, *, table_name: str = "self") -> Self:
alexander-beedie marked this conversation as resolved.
Show resolved Hide resolved
"""
Execute a SQL query against the DataFrame.
Expand All @@ -4637,17 +4637,17 @@ def sql(self, query: str, *, table_name: str | None = None) -> Self:
SQL query to execute.
table_name
Optionally provide an explicit name for the table that represents the
calling frame (the alias "self" will always be registered/available).
calling frame (defaults to "self").
Notes
-----
* The calling frame is automatically registered as a table in the SQL context
under the name "self". All DataFrames and LazyFrames found in the current
set of global variables are also registered, using their variable name.
under the name "self". If you want access to the DataFrames and LazyFrames
found in the current globals, use the top-level :meth:`pl.sql <polars.sql>`.
* More control over registration and execution behaviour is available by
using the :class:`SQLContext` object.
* The SQL query executes entirely in lazy mode before being collected and
returned as a DataFrame.
* The SQL query executes in lazy mode before being collected and returned
as a DataFrame.
See Also
--------
Expand Down Expand Up @@ -4677,26 +4677,6 @@ def sql(self, query: str, *, table_name: str | None = None) -> Self:
│ 2077-08-08 ┆ xx │
└────────────┴─────┘
Join two DataFrames using SQL.
>>> df2 = pl.DataFrame({"a": [3, 2, 1], "d": [125, -654, 888]})
>>> df1.sql(
... '''
... SELECT self.*, d
... FROM self
... INNER JOIN df2 USING (a)
... WHERE a > 1 AND EXTRACT(year FROM c) < 2050
... '''
... )
shape: (1, 4)
┌─────┬─────┬────────────┬──────┐
│ a ┆ b ┆ c ┆ d │
│ --- ┆ --- ┆ --- ┆ --- │
│ i64 ┆ str ┆ date ┆ i64 │
╞═════╪═════╪════════════╪══════╡
│ 2 ┆ yy ┆ 2010-10-10 ┆ -654 │
└─────┴─────┴────────────┴──────┘
Apply transformations to a DataFrame using SQL, aliasing "self" to "frame".
>>> df1.sql(
Expand Down Expand Up @@ -4729,7 +4709,7 @@ def sql(self, query: str, *, table_name: str | None = None) -> Self:
)
with SQLContext(
register_globals=True,
eager_execution=True,
eager=True,
) as ctx:
frames = {table_name: self} if table_name else {}
frames["self"] = self
Expand Down
33 changes: 7 additions & 26 deletions py-polars/polars/lazyframe/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -1248,7 +1248,7 @@ def sort(
)
)

def sql(self, query: str, *, table_name: str | None = None) -> Self:
def sql(self, query: str, *, table_name: str = "self") -> Self:
"""
Execute a SQL query against the LazyFrame.
Expand All @@ -1265,13 +1265,13 @@ def sql(self, query: str, *, table_name: str | None = None) -> Self:
SQL query to execute.
table_name
Optionally provide an explicit name for the table that represents the
calling frame (the alias "self" will always be registered/available).
calling frame (defaults to "self").
Notes
-----
* The calling frame is automatically registered as a table in the SQL context
under the name "self". All DataFrames and LazyFrames found in the current
set of global variables are also registered, using their variable name.
under the name "self". If you want access to the DataFrames and LazyFrames
found in the current globals, use the top-level :meth:`pl.sql <polars.sql>`.
* More control over registration and execution behaviour is available by
using the :class:`SQLContext` object.
Expand All @@ -1297,27 +1297,8 @@ def sql(self, query: str, *, table_name: str | None = None) -> Self:
│ x ┆ 8 │
└─────┴─────┘
Join two LazyFrames:
>>> lf1.sql(
... '''
... SELECT self.*, d
... FROM self
... INNER JOIN lf2 USING (a)
... WHERE a > 1 AND b < 8
... '''
... ).collect()
shape: (1, 4)
┌─────┬─────┬─────┬──────┐
│ a ┆ b ┆ c ┆ d │
│ --- ┆ --- ┆ --- ┆ --- │
│ i64 ┆ i64 ┆ str ┆ i64 │
╞═════╪═════╪═════╪══════╡
│ 2 ┆ 7 ┆ y ┆ -654 │
└─────┴─────┴─────┴──────┘
Apply SQL transforms (aliasing "self" to "frame") and subsequently
filter natively (you can freely mix SQL and native operations):
Apply SQL transforms (aliasing "self" to "frame") then filter
natively (you can freely mix SQL and native operations):
>>> lf1.sql(
... query='''
Expand Down Expand Up @@ -1348,7 +1329,7 @@ def sql(self, query: str, *, table_name: str | None = None) -> Self:
)
with SQLContext(
register_globals=True,
eager_execution=False,
eager=False,
) as ctx:
frames = {table_name: self} if table_name else {}
frames["self"] = self
Expand Down
28 changes: 15 additions & 13 deletions py-polars/polars/sql/context.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import contextlib
from typing import TYPE_CHECKING, Collection, Generic, Mapping, overload

from polars._utils.deprecation import deprecate_renamed_parameter
from polars._utils.unstable import issue_unstable_warning
from polars._utils.various import _get_stack_locals
from polars._utils.wrap import wrap_ldf
Expand Down Expand Up @@ -40,17 +41,17 @@ class SQLContext(Generic[FrameType]):

# note: the type-overloaded methods are required to support accurate typing
# of the frame return from "execute" (which may be DataFrame or LazyFrame),
# as that is influenced by both the "eager_execution" flag at init-time AND
# the "eager" flag at query-time (if anyone can find a lighter-weight set
# of annotations that successfully resolves this, please go for it... ;)
# as that is influenced by both the "eager" flag at init-time AND the "eager"
# flag at query-time (if anyone can find a lighter-weight set of annotations
# that successfully resolves this, please go for it... ;)

@overload
def __init__(
self: SQLContext[LazyFrame],
frames: Mapping[str, DataFrame | LazyFrame | None] | None = ...,
*,
register_globals: bool | int = ...,
eager_execution: Literal[False] = False,
eager: Literal[False] = False,
**named_frames: DataFrame | LazyFrame | None,
) -> None: ...

Expand All @@ -60,7 +61,7 @@ def __init__(
frames: Mapping[str, DataFrame | LazyFrame | None] | None = ...,
*,
register_globals: bool | int = ...,
eager_execution: Literal[True],
eager: Literal[True],
**named_frames: DataFrame | LazyFrame | None,
) -> None: ...

Expand All @@ -70,16 +71,17 @@ def __init__(
frames: Mapping[str, DataFrame | LazyFrame | None] | None = ...,
*,
register_globals: bool | int = ...,
eager_execution: bool,
eager: bool,
**named_frames: DataFrame | LazyFrame | None,
) -> None: ...

@deprecate_renamed_parameter("eager_execution", "eager", version="0.20.31")
def __init__(
self,
frames: Mapping[str, DataFrame | LazyFrame | None] | None = None,
*,
register_globals: bool | int = False,
eager_execution: bool = False,
eager: bool = False,
**named_frames: DataFrame | LazyFrame | None,
) -> None:
"""
Expand All @@ -93,7 +95,7 @@ def __init__(
Register all eager/lazy frames found in the globals, automatically
mapping their variable name to a table name. If given an integer
then only the most recent "n" frames found will be registered.
eager_execution
eager
Return query execution results as `DataFrame` instead of `LazyFrame`.
(Note that the query itself is always executed in lazy-mode; this
parameter impacts whether :meth:`execute` returns an eager or lazy
Expand Down Expand Up @@ -123,7 +125,7 @@ def __init__(
)

self._ctxt = PySQLContext.new()
self._eager_execution = eager_execution
self._eager_execution = eager

frames = dict(frames or {})
if register_globals:
Expand Down Expand Up @@ -166,7 +168,7 @@ def __repr__(self) -> str:
return f"<SQLContext [tables:{n_tables}] at 0x{id(self):x}>"

# these overloads are necessary to cover the possible permutations
# of the init-time "eager_execution" param, and the "eager" param.
# of the init-time "eager" param, and the "eager" param.

@overload
def execute(
Expand Down Expand Up @@ -208,9 +210,9 @@ def execute(self, query: str, eager: bool | None = None) -> LazyFrame | DataFram
A valid string SQL query.
eager
Apply the query eagerly, returning `DataFrame` instead of `LazyFrame`.
If unset, the value of the init-time parameter "eager_execution" will be
used. (Note that the query itself is always executed in lazy-mode; this
parameter only impacts the type of the returned frame).
If unset, the value of the init-time "eager" parameter will be used.
Note that the query itself is always executed in lazy-mode; this
parameter only impacts the type of the returned frame.
Examples
--------
Expand Down
5 changes: 1 addition & 4 deletions py-polars/polars/sql/functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,10 +105,7 @@ def sql(query: str, *, eager: bool = False) -> DataFrame | LazyFrame:
"""
from polars.sql import SQLContext

with SQLContext(
eager_execution=eager,
register_globals=True,
) as ctx:
with SQLContext(eager=eager, register_globals=True) as ctx:
return ctx.execute(query)


Expand Down
10 changes: 5 additions & 5 deletions py-polars/tests/unit/sql/test_cast.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ def test_cast() -> None:
)
# test various dtype casts, using standard ("CAST <col> AS <dtype>")
# and postgres-specific ("<col>::<dtype>") cast syntax
with pl.SQLContext(df=df, eager_execution=True) as ctx:
with pl.SQLContext(df=df, eager=True) as ctx:
res = ctx.execute(
"""
SELECT
Expand Down Expand Up @@ -142,7 +142,7 @@ def test_cast() -> None:
]

with pytest.raises(ComputeError, match="unsupported use of FORMAT in CAST"):
pl.SQLContext(df=df, eager_execution=True).execute(
pl.SQLContext(df=df, eager=True).execute(
"SELECT CAST(a AS STRING FORMAT 'HEX') FROM df"
)

Expand All @@ -163,18 +163,18 @@ def test_cast_errors(values: Any, cast_op: str, error: str) -> None:

# invalid CAST should raise an error...
with pytest.raises(ComputeError, match=error):
df.sql(f"SELECT {cast_op} FROM df")
df.sql(f"SELECT {cast_op} FROM self")

# ... or return `null` values if using TRY_CAST
target_type = cast_op.split("::")[1]
res = df.sql(f"SELECT TRY_CAST(values AS {target_type}) AS cast_values FROM df")
res = df.sql(f"SELECT TRY_CAST(values AS {target_type}) AS cast_values FROM self")
assert None in res.to_series()


def test_cast_json() -> None:
df = pl.DataFrame({"txt": ['{"a":[1,2,3],"b":["x","y","z"],"c":5.0}']})

with pl.SQLContext(df=df, eager_execution=True) as ctx:
with pl.SQLContext(df=df, eager=True) as ctx:
for json_cast in ("txt::json", "CAST(txt AS JSON)"):
res = ctx.execute(f"SELECT {json_cast} AS j FROM df")

Expand Down
2 changes: 1 addition & 1 deletion py-polars/tests/unit/sql/test_conditional.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ def test_case_when() -> None:
"v2": [101, 202, 303, 404],
}
)
with pl.SQLContext(test_data=lf, eager_execution=True) as ctx:
with pl.SQLContext(test_data=lf, eager=True) as ctx:
out = ctx.execute(
"""
SELECT *, CASE WHEN COALESCE(v1, v2) % 2 != 0 THEN 'odd' ELSE 'even' END as "v3"
Expand Down
2 changes: 1 addition & 1 deletion py-polars/tests/unit/sql/test_group_by.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ def foods_ipc_path() -> Path:
def test_group_by(foods_ipc_path: Path) -> None:
lf = pl.scan_ipc(foods_ipc_path)

ctx = pl.SQLContext(eager_execution=True)
ctx = pl.SQLContext(eager=True)
ctx.register("foods", lf)

out = ctx.execute(
Expand Down
Loading