pola-rs · stinodego · May 30, 2024 · May 30, 2024
@@ -7,7 +7,7 @@
 data = {"name": ["Alice", "Bob", "Charlie", "David"], "age": [25, 30, 35, 40]}
 df = pl.LazyFrame(data)
 
-ctx = pl.SQLContext(my_table=df, eager_execution=True)
+ctx = pl.SQLContext(my_table=df, eager=True)
 
 result = ctx.execute(
     """

@@ -33,7 +33,7 @@
 pokemon = pl.read_csv(
     "https://gist.githubusercontent.com/ritchie46/cac6b337ea52281aa23c049250a4ff03/raw/89a957ff3919d90e6ef2d34235e6bf22304f3366/pokemon.csv"
 )
-with pl.SQLContext(register_globals=True, eager_execution=True) as ctx:
+with pl.SQLContext(register_globals=True, eager=True) as ctx:
     df_small = ctx.execute("SELECT * from pokemon LIMIT 5")
     print(df_small)
 # --8<-- [end:execute]
@@ -76,7 +76,7 @@
     products_masterdata=pl.scan_csv("docs/data/products_masterdata.csv"),
     products_categories=pl.scan_ndjson("docs/data/products_categories.json"),
     sales_data=pl.from_pandas(sales_data),
-    eager_execution=True,
+    eager=True,
 ) as ctx:
     query = """
     SELECT

@@ -20,7 +20,7 @@
     }
 )
 
-ctx = pl.SQLContext(population=df, eager_execution=True)
+ctx = pl.SQLContext(population=df, eager=True)
 
 print(ctx.execute("SELECT * FROM population"))
 # --8<-- [end:df]

@@ -4620,7 +4620,7 @@ def sort(
             .collect(_eager=True)
         )
 
-    def sql(self, query: str, *, table_name: str | None = None) -> Self:
+    def sql(self, query: str, *, table_name: str = "self") -> Self:
         """
         Execute a SQL query against the DataFrame.
 
@@ -4637,17 +4637,17 @@ def sql(self, query: str, *, table_name: str | None = None) -> Self:
             SQL query to execute.
         table_name
             Optionally provide an explicit name for the table that represents the
-            calling frame (the alias "self" will always be registered/available).
+            calling frame (defaults to "self").
 
         Notes
         -----
         * The calling frame is automatically registered as a table in the SQL context
-          under the name "self". All DataFrames and LazyFrames found in the current
-          set of global variables are also registered, using their variable name.
+          under the name "self". If you want access to the DataFrames and LazyFrames
+          found in the current globals, use the top-level :meth:`pl.sql <polars.sql>`.
         * More control over registration and execution behaviour is available by
           using the :class:`SQLContext` object.
-        * The SQL query executes entirely in lazy mode before being collected and
-          returned as a DataFrame.
+        * The SQL query executes in lazy mode before being collected and returned
+          as a DataFrame.
 
         See Also
         --------
@@ -4677,26 +4677,6 @@ def sql(self, query: str, *, table_name: str | None = None) -> Self:
         │ 2077-08-08 ┆ xx  │
         └────────────┴─────┘
 
-        Join two DataFrames using SQL.
-
-        >>> df2 = pl.DataFrame({"a": [3, 2, 1], "d": [125, -654, 888]})
-        >>> df1.sql(
-        ...     '''
-        ...     SELECT self.*, d
-        ...     FROM self
-        ...     INNER JOIN df2 USING (a)
-        ...     WHERE a > 1 AND EXTRACT(year FROM c) < 2050
-        ...     '''
-        ... )
-        shape: (1, 4)
-        ┌─────┬─────┬────────────┬──────┐
-        │ a   ┆ b   ┆ c          ┆ d    │
-        │ --- ┆ --- ┆ ---        ┆ ---  │
-        │ i64 ┆ str ┆ date       ┆ i64  │
-        ╞═════╪═════╪════════════╪══════╡
-        │ 2   ┆ yy  ┆ 2010-10-10 ┆ -654 │
-        └─────┴─────┴────────────┴──────┘
-
         Apply transformations to a DataFrame using SQL, aliasing "self" to "frame".
 
         >>> df1.sql(
@@ -4729,7 +4709,7 @@ def sql(self, query: str, *, table_name: str | None = None) -> Self:
         )
         with SQLContext(
             register_globals=True,
-            eager_execution=True,
+            eager=True,
         ) as ctx:
             frames = {table_name: self} if table_name else {}
             frames["self"] = self

@@ -1248,7 +1248,7 @@ def sort(
             )
         )
 
-    def sql(self, query: str, *, table_name: str | None = None) -> Self:
+    def sql(self, query: str, *, table_name: str = "self") -> Self:
         """
         Execute a SQL query against the LazyFrame.
 
@@ -1265,13 +1265,13 @@ def sql(self, query: str, *, table_name: str | None = None) -> Self:
             SQL query to execute.
         table_name
             Optionally provide an explicit name for the table that represents the
-            calling frame (the alias "self" will always be registered/available).
+            calling frame (defaults to "self").
 
         Notes
         -----
         * The calling frame is automatically registered as a table in the SQL context
-          under the name "self". All DataFrames and LazyFrames found in the current
-          set of global variables are also registered, using their variable name.
+          under the name "self". If you want access to the DataFrames and LazyFrames
+          found in the current globals, use the top-level :meth:`pl.sql <polars.sql>`.
         * More control over registration and execution behaviour is available by
           using the :class:`SQLContext` object.
 
@@ -1297,27 +1297,8 @@ def sql(self, query: str, *, table_name: str | None = None) -> Self:
         │ x   ┆ 8   │
         └─────┴─────┘
 
-        Join two LazyFrames:
-
-        >>> lf1.sql(
-        ...     '''
-        ...     SELECT self.*, d
-        ...     FROM self
-        ...     INNER JOIN lf2 USING (a)
-        ...     WHERE a > 1 AND b < 8
-        ...     '''
-        ... ).collect()
-        shape: (1, 4)
-        ┌─────┬─────┬─────┬──────┐
-        │ a   ┆ b   ┆ c   ┆ d    │
-        │ --- ┆ --- ┆ --- ┆ ---  │
-        │ i64 ┆ i64 ┆ str ┆ i64  │
-        ╞═════╪═════╪═════╪══════╡
-        │ 2   ┆ 7   ┆ y   ┆ -654 │
-        └─────┴─────┴─────┴──────┘
-
-        Apply SQL transforms (aliasing "self" to "frame") and subsequently
-        filter natively (you can freely mix SQL and native operations):
+        Apply SQL transforms (aliasing "self" to "frame") then filter
+        natively (you can freely mix SQL and native operations):
 
         >>> lf1.sql(
         ...     query='''
@@ -1348,7 +1329,7 @@ def sql(self, query: str, *, table_name: str | None = None) -> Self:
         )
         with SQLContext(
             register_globals=True,
-            eager_execution=False,
+            eager=False,
         ) as ctx:
             frames = {table_name: self} if table_name else {}
             frames["self"] = self

@@ -3,6 +3,7 @@
 import contextlib
 from typing import TYPE_CHECKING, Collection, Generic, Mapping, overload
 
+from polars._utils.deprecation import deprecate_renamed_parameter
 from polars._utils.unstable import issue_unstable_warning
 from polars._utils.various import _get_stack_locals
 from polars._utils.wrap import wrap_ldf
@@ -40,17 +41,17 @@ class SQLContext(Generic[FrameType]):
 
     # note: the type-overloaded methods are required to support accurate typing
     # of the frame return from "execute" (which may be DataFrame or LazyFrame),
-    # as that is influenced by both the "eager_execution" flag at init-time AND
-    # the "eager" flag at query-time (if anyone can find a lighter-weight set
-    # of annotations that successfully resolves this, please go for it... ;)
+    # as that is influenced by both the "eager" flag at init-time AND the "eager"
+    # flag at query-time (if anyone can find a lighter-weight set of annotations
+    # that successfully resolves this, please go for it... ;)
 
     @overload
     def __init__(
         self: SQLContext[LazyFrame],
         frames: Mapping[str, DataFrame | LazyFrame | None] | None = ...,
         *,
         register_globals: bool | int = ...,
-        eager_execution: Literal[False] = False,
+        eager: Literal[False] = False,
         **named_frames: DataFrame | LazyFrame | None,
     ) -> None: ...
 
@@ -60,7 +61,7 @@ def __init__(
         frames: Mapping[str, DataFrame | LazyFrame | None] | None = ...,
         *,
         register_globals: bool | int = ...,
-        eager_execution: Literal[True],
+        eager: Literal[True],
         **named_frames: DataFrame | LazyFrame | None,
     ) -> None: ...
 
@@ -70,16 +71,17 @@ def __init__(
         frames: Mapping[str, DataFrame | LazyFrame | None] | None = ...,
         *,
         register_globals: bool | int = ...,
-        eager_execution: bool,
+        eager: bool,
         **named_frames: DataFrame | LazyFrame | None,
     ) -> None: ...
 
+    @deprecate_renamed_parameter("eager_execution", "eager", version="0.20.31")
     def __init__(
         self,
         frames: Mapping[str, DataFrame | LazyFrame | None] | None = None,
         *,
         register_globals: bool | int = False,
-        eager_execution: bool = False,
+        eager: bool = False,
         **named_frames: DataFrame | LazyFrame | None,
     ) -> None:
         """
@@ -93,7 +95,7 @@ def __init__(
             Register all eager/lazy frames found in the globals, automatically
             mapping their variable name to a table name. If given an integer
             then only the most recent "n" frames found will be registered.
-        eager_execution
+        eager
             Return query execution results as `DataFrame` instead of `LazyFrame`.
             (Note that the query itself is always executed in lazy-mode; this
             parameter impacts whether :meth:`execute` returns an eager or lazy
@@ -123,7 +125,7 @@ def __init__(
         )
 
         self._ctxt = PySQLContext.new()
-        self._eager_execution = eager_execution
+        self._eager_execution = eager
 
         frames = dict(frames or {})
         if register_globals:
@@ -166,7 +168,7 @@ def __repr__(self) -> str:
         return f"<SQLContext [tables:{n_tables}] at 0x{id(self):x}>"
 
     # these overloads are necessary to cover the possible permutations
-    # of the init-time "eager_execution" param, and the "eager" param.
+    # of the init-time "eager" param, and the "eager" param.
 
     @overload
     def execute(
@@ -208,9 +210,9 @@ def execute(self, query: str, eager: bool | None = None) -> LazyFrame | DataFram
             A valid string SQL query.
         eager
             Apply the query eagerly, returning `DataFrame` instead of `LazyFrame`.
-            If unset, the value of the init-time parameter "eager_execution" will be
-            used. (Note that the query itself is always executed in lazy-mode; this
-            parameter only impacts the type of the returned frame).
+            If unset, the value of the init-time "eager" parameter will be used.
+            Note that the query itself is always executed in lazy-mode; this
+            parameter only impacts the type of the returned frame.
 
         Examples
         --------

@@ -105,10 +105,7 @@ def sql(query: str, *, eager: bool = False) -> DataFrame | LazyFrame:
     """
     from polars.sql import SQLContext
 
-    with SQLContext(
-        eager_execution=eager,
-        register_globals=True,
-    ) as ctx:
+    with SQLContext(eager=eager, register_globals=True) as ctx:
         return ctx.execute(query)
 
 

@@ -22,7 +22,7 @@ def test_cast() -> None:
     )
     # test various dtype casts, using standard ("CAST <col> AS <dtype>")
     # and postgres-specific ("<col>::<dtype>") cast syntax
-    with pl.SQLContext(df=df, eager_execution=True) as ctx:
+    with pl.SQLContext(df=df, eager=True) as ctx:
         res = ctx.execute(
             """
             SELECT
@@ -142,7 +142,7 @@ def test_cast() -> None:
     ]
 
     with pytest.raises(ComputeError, match="unsupported use of FORMAT in CAST"):
-        pl.SQLContext(df=df, eager_execution=True).execute(
+        pl.SQLContext(df=df, eager=True).execute(
             "SELECT CAST(a AS STRING FORMAT 'HEX') FROM df"
         )
 
@@ -163,18 +163,18 @@ def test_cast_errors(values: Any, cast_op: str, error: str) -> None:
 
     # invalid CAST should raise an error...
     with pytest.raises(ComputeError, match=error):
-        df.sql(f"SELECT {cast_op} FROM df")
+        df.sql(f"SELECT {cast_op} FROM self")
 
     # ... or return `null` values if using TRY_CAST
     target_type = cast_op.split("::")[1]
-    res = df.sql(f"SELECT TRY_CAST(values AS {target_type}) AS cast_values FROM df")
+    res = df.sql(f"SELECT TRY_CAST(values AS {target_type}) AS cast_values FROM self")
     assert None in res.to_series()
 
 
 def test_cast_json() -> None:
     df = pl.DataFrame({"txt": ['{"a":[1,2,3],"b":["x","y","z"],"c":5.0}']})
 
-    with pl.SQLContext(df=df, eager_execution=True) as ctx:
+    with pl.SQLContext(df=df, eager=True) as ctx:
         for json_cast in ("txt::json", "CAST(txt AS JSON)"):
             res = ctx.execute(f"SELECT {json_cast} AS j FROM df")
 

@@ -22,7 +22,7 @@ def test_case_when() -> None:
             "v2": [101, 202, 303, 404],
         }
     )
-    with pl.SQLContext(test_data=lf, eager_execution=True) as ctx:
+    with pl.SQLContext(test_data=lf, eager=True) as ctx:
         out = ctx.execute(
             """
             SELECT *, CASE WHEN COALESCE(v1, v2) % 2 != 0 THEN 'odd' ELSE 'even' END as "v3"

@@ -18,7 +18,7 @@ def foods_ipc_path() -> Path:
 def test_group_by(foods_ipc_path: Path) -> None:
     lf = pl.scan_ipc(foods_ipc_path)
 
-    ctx = pl.SQLContext(eager_execution=True)
+    ctx = pl.SQLContext(eager=True)
     ctx.register("foods", lf)
 
     out = ctx.execute(