diff --git a/.github/scripts/test_bytecode_parser.py b/.github/scripts/test_bytecode_parser.py index 50073c1b0035..0fd65333cb7f 100644 --- a/.github/scripts/test_bytecode_parser.py +++ b/.github/scripts/test_bytecode_parser.py @@ -18,7 +18,7 @@ from typing import Any, Callable import pytest -from polars.utils.udfs import BytecodeParser +from polars._utils.udfs import BytecodeParser from tests.unit.operations.map.test_inefficient_map_warning import ( MY_DICT, NOOP_TEST_CASES, @@ -44,7 +44,7 @@ def test_bytecode_parser_expression_in_ipython( col: str, func: Callable[[Any], Any], expected: str ) -> None: script = ( - "from polars.utils.udfs import BytecodeParser; " + "from polars._utils.udfs import BytecodeParser; " "import datetime as dt; " "from datetime import datetime; " "import numpy as np; " @@ -73,7 +73,7 @@ def test_bytecode_parser_expression_noop(func: str) -> None: ) def test_bytecode_parser_expression_noop_in_ipython(func: str) -> None: script = ( - "from polars.utils.udfs import BytecodeParser; " + "from polars._utils.udfs import BytecodeParser; " f"MY_DICT = {MY_DICT};" f'parser = BytecodeParser({func}, map_target="expr");' f'print(not parser.can_attempt_rewrite() or not parser.to_expression("x"));' diff --git a/crates/polars-lazy/src/physical_plan/executors/python_scan.rs b/crates/polars-lazy/src/physical_plan/executors/python_scan.rs index 85ed618f79f0..1df5ad7861ef 100644 --- a/crates/polars-lazy/src/physical_plan/executors/python_scan.rs +++ b/crates/polars-lazy/src/physical_plan/executors/python_scan.rs @@ -21,7 +21,7 @@ impl Executor for PythonScanExec { let n_rows = self.options.n_rows.take(); Python::with_gil(|py| { let pl = PyModule::import(py, "polars").unwrap(); - let utils = pl.getattr("utils").unwrap(); + let utils = pl.getattr("_utils").unwrap(); let callable = utils.getattr("_execute_from_rust").unwrap(); let python_scan_function = self.options.scan_fn.take().unwrap().0; diff --git a/py-polars/polars/__init__.py b/py-polars/polars/__init__.py index d7f093484221..dc958f9975fe 100644 --- a/py-polars/polars/__init__.py +++ b/py-polars/polars/__init__.py @@ -18,6 +18,10 @@ __register_startup_deps() from polars import api +from polars._utils.polars_version import get_polars_version as _get_polars_version + +# TODO: remove need for importing wrap utils at top level +from polars._utils.wrap import wrap_df, wrap_s # noqa: F401 from polars.config import Config from polars.convert import ( from_arrow, @@ -214,10 +218,6 @@ using_string_cache, ) from polars.type_aliases import PolarsDataType -from polars.utils._polars_version import get_polars_version as _get_polars_version - -# TODO: remove need for importing wrap utils at top level -from polars.utils._wrap import wrap_df, wrap_s # noqa: F401 __version__: str = _get_polars_version() del _get_polars_version diff --git a/py-polars/polars/_utils/__init__.py b/py-polars/polars/_utils/__init__.py new file mode 100644 index 000000000000..fbe278e8c7be --- /dev/null +++ b/py-polars/polars/_utils/__init__.py @@ -0,0 +1,36 @@ +""" +Utility functions. + +Functions that are part of the public API are re-exported here. +""" +from polars._utils.convert import ( + date_to_int, + datetime_to_int, + time_to_int, + timedelta_to_int, + to_py_date, + to_py_datetime, + to_py_decimal, + to_py_time, + to_py_timedelta, +) +from polars._utils.scan import _execute_from_rust +from polars._utils.various import NoDefault, _polars_warn, is_column, no_default + +__all__ = [ + "NoDefault", + "is_column", + "no_default", + # Required for Rust bindings + "date_to_int", + "datetime_to_int", + "time_to_int", + "timedelta_to_int", + "_execute_from_rust", + "_polars_warn", + "to_py_date", + "to_py_datetime", + "to_py_decimal", + "to_py_time", + "to_py_timedelta", +] diff --git a/py-polars/polars/utils/_async.py b/py-polars/polars/_utils/async_.py similarity index 98% rename from py-polars/polars/utils/_async.py rename to py-polars/polars/_utils/async_.py index 60104271712e..e90ae5d614ca 100644 --- a/py-polars/polars/utils/_async.py +++ b/py-polars/polars/_utils/async_.py @@ -2,8 +2,8 @@ from typing import TYPE_CHECKING, Any, Awaitable, Generator, Generic, TypeVar +from polars._utils.wrap import wrap_df from polars.dependencies import _GEVENT_AVAILABLE -from polars.utils._wrap import wrap_df if TYPE_CHECKING: from asyncio.futures import Future diff --git a/py-polars/polars/utils/_construction.py b/py-polars/polars/_utils/construction.py similarity index 99% rename from py-polars/polars/utils/_construction.py rename to py-polars/polars/_utils/construction.py index daefe7b5b1e9..8c13cd2a2a97 100644 --- a/py-polars/polars/utils/_construction.py +++ b/py-polars/polars/_utils/construction.py @@ -23,6 +23,14 @@ import polars._reexport as pl from polars import functions as F +from polars._utils.various import ( + _is_generator, + arrlen, + find_stacklevel, + parse_version, + range_to_series, +) +from polars._utils.wrap import wrap_df, wrap_s from polars.datatypes import ( INTEGER_DTYPES, N_INFER_DEFAULT, @@ -70,14 +78,6 @@ TimeZoneAwareConstructorWarning, ) from polars.meta import get_index_type, thread_pool_size -from polars.utils._wrap import wrap_df, wrap_s -from polars.utils.various import ( - _is_generator, - arrlen, - find_stacklevel, - parse_version, - range_to_series, -) with contextlib.suppress(ImportError): # Module not available when building docs from polars.polars import PyDataFrame, PySeries diff --git a/py-polars/polars/utils/convert.py b/py-polars/polars/_utils/convert.py similarity index 100% rename from py-polars/polars/utils/convert.py rename to py-polars/polars/_utils/convert.py diff --git a/py-polars/polars/utils/deprecation.py b/py-polars/polars/_utils/deprecation.py similarity index 99% rename from py-polars/polars/utils/deprecation.py rename to py-polars/polars/_utils/deprecation.py index a95d711ebc5f..515c57daaa56 100644 --- a/py-polars/polars/utils/deprecation.py +++ b/py-polars/polars/_utils/deprecation.py @@ -5,7 +5,7 @@ from functools import wraps from typing import TYPE_CHECKING, Callable, Sequence, TypeVar -from polars.utils.various import find_stacklevel +from polars._utils.various import find_stacklevel if TYPE_CHECKING: import sys diff --git a/py-polars/polars/utils/_parse_expr_input.py b/py-polars/polars/_utils/parse_expr_input.py similarity index 98% rename from py-polars/polars/utils/_parse_expr_input.py rename to py-polars/polars/_utils/parse_expr_input.py index 05970f11b367..1aa72d2b9226 100644 --- a/py-polars/polars/utils/_parse_expr_input.py +++ b/py-polars/polars/_utils/parse_expr_input.py @@ -5,8 +5,8 @@ import polars._reexport as pl from polars import functions as F +from polars._utils.deprecation import issue_deprecation_warning from polars.exceptions import ComputeError -from polars.utils.deprecation import issue_deprecation_warning with contextlib.suppress(ImportError): # Module not available when building docs import polars.polars as plr diff --git a/py-polars/polars/utils/_polars_version.py b/py-polars/polars/_utils/polars_version.py similarity index 100% rename from py-polars/polars/utils/_polars_version.py rename to py-polars/polars/_utils/polars_version.py diff --git a/py-polars/polars/utils/_scan.py b/py-polars/polars/_utils/scan.py similarity index 100% rename from py-polars/polars/utils/_scan.py rename to py-polars/polars/_utils/scan.py diff --git a/py-polars/polars/_utils/udfs.py b/py-polars/polars/_utils/udfs.py new file mode 100644 index 000000000000..e0450b3b4d89 --- /dev/null +++ b/py-polars/polars/_utils/udfs.py @@ -0,0 +1,980 @@ +"""Utilities related to user defined functions (such as those passed to `apply`).""" +from __future__ import annotations + +import datetime +import dis +import inspect +import re +import sys +import warnings +from bisect import bisect_left +from collections import defaultdict +from dis import get_instructions +from inspect import signature +from itertools import count, zip_longest +from pathlib import Path +from typing import ( + TYPE_CHECKING, + AbstractSet, + Any, + Callable, + ClassVar, + Iterator, + Literal, + NamedTuple, + Union, +) + +from polars._utils.various import re_escape + +if TYPE_CHECKING: + from dis import Instruction + + if sys.version_info >= (3, 10): + from typing import TypeAlias + else: + from typing_extensions import TypeAlias + + +class StackValue(NamedTuple): + operator: str + operator_arity: int + left_operand: str + right_operand: str + + +MapTarget: TypeAlias = Literal["expr", "frame", "series"] +StackEntry: TypeAlias = Union[str, StackValue] + +_MIN_PY311 = sys.version_info >= (3, 11) +_MIN_PY312 = _MIN_PY311 and sys.version_info >= (3, 12) + + +class OpNames: + BINARY: ClassVar[dict[str, str]] = { + "BINARY_ADD": "+", + "BINARY_AND": "&", + "BINARY_FLOOR_DIVIDE": "//", + "BINARY_LSHIFT": "<<", + "BINARY_RSHIFT": ">>", + "BINARY_MODULO": "%", + "BINARY_MULTIPLY": "*", + "BINARY_OR": "|", + "BINARY_POWER": "**", + "BINARY_SUBTRACT": "-", + "BINARY_TRUE_DIVIDE": "/", + "BINARY_XOR": "^", + } + CALL = frozenset({"CALL"} if _MIN_PY311 else {"CALL_FUNCTION", "CALL_METHOD"}) + CONTROL_FLOW: ClassVar[dict[str, str]] = ( + { + "POP_JUMP_FORWARD_IF_FALSE": "&", + "POP_JUMP_FORWARD_IF_TRUE": "|", + "JUMP_IF_FALSE_OR_POP": "&", + "JUMP_IF_TRUE_OR_POP": "|", + } + # note: 3.12 dropped POP_JUMP_FORWARD_IF_* opcodes + if _MIN_PY311 and not _MIN_PY312 + else { + "POP_JUMP_IF_FALSE": "&", + "POP_JUMP_IF_TRUE": "|", + "JUMP_IF_FALSE_OR_POP": "&", + "JUMP_IF_TRUE_OR_POP": "|", + } + ) + LOAD_VALUES = frozenset(("LOAD_CONST", "LOAD_DEREF", "LOAD_FAST", "LOAD_GLOBAL")) + LOAD_ATTR = frozenset({"LOAD_METHOD", "LOAD_ATTR"}) + LOAD = LOAD_VALUES | LOAD_ATTR + SYNTHETIC: ClassVar[dict[str, int]] = { + "POLARS_EXPRESSION": 1, + } + UNARY: ClassVar[dict[str, str]] = { + "UNARY_NEGATIVE": "-", + "UNARY_POSITIVE": "+", + "UNARY_NOT": "~", + } + PARSEABLE_OPS = frozenset( + {"BINARY_OP", "BINARY_SUBSCR", "COMPARE_OP", "CONTAINS_OP", "IS_OP"} + | set(UNARY) + | set(CONTROL_FLOW) + | set(SYNTHETIC) + | LOAD_VALUES + ) + UNARY_VALUES = frozenset(UNARY.values()) + + +# numpy functions that we can map to native expressions +_NUMPY_MODULE_ALIASES = frozenset(("np", "numpy")) +_NUMPY_FUNCTIONS = frozenset( + ( + # "abs", # TODO: this one clashes with Python builtin abs + "arccos", + "arccosh", + "arcsin", + "arcsinh", + "arctan", + "arctanh", + "cbrt", + "ceil", + "cos", + "cosh", + "degrees", + "exp", + "floor", + "log", + "log10", + "log1p", + "radians", + "sign", + "sin", + "sinh", + "sqrt", + "tan", + "tanh", + ) +) + +# python attrs/funcs that map to native expressions +_PYTHON_ATTRS_MAP = { + "date": "dt.date()", + "day": "dt.day()", + "hour": "dt.hour()", + "microsecond": "dt.microsecond()", + "minute": "dt.minute()", + "month": "dt.month()", + "second": "dt.second()", + "year": "dt.year()", +} +_PYTHON_CASTS_MAP = {"float": "Float64", "int": "Int64", "str": "String"} +_PYTHON_BUILTINS = frozenset(_PYTHON_CASTS_MAP) | {"abs"} +_PYTHON_METHODS_MAP = { + # string + "endswith": "str.ends_with", + "lower": "str.to_lowercase", + "lstrip": "str.strip_chars_start", + "rstrip": "str.strip_chars_end", + "startswith": "str.starts_with", + "strip": "str.strip_chars", + "title": "str.to_titlecase", + "upper": "str.to_uppercase", + # temporal + "date": "dt.date", + "isoweekday": "dt.weekday", + "time": "dt.time", +} + +_MODULE_FUNCTIONS: list[dict[str, list[AbstractSet[str]]]] = [ + # lambda x: numpy.func(x) + # lambda x: numpy.func(CONSTANT) + { + "argument_1_opname": [{"LOAD_FAST", "LOAD_CONST"}], + "argument_2_opname": [], + "module_opname": [OpNames.LOAD_ATTR], + "attribute_opname": [], + "module_name": [_NUMPY_MODULE_ALIASES], + "attribute_name": [], + "function_name": [_NUMPY_FUNCTIONS], + }, + # lambda x: json.loads(x) + { + "argument_1_opname": [{"LOAD_FAST"}], + "argument_2_opname": [], + "module_opname": [OpNames.LOAD_ATTR], + "attribute_opname": [], + "module_name": [{"json"}], + "attribute_name": [], + "function_name": [{"loads"}], + }, + # lambda x: datetime.strptime(x, CONSTANT) + { + "argument_1_opname": [{"LOAD_FAST"}], + "argument_2_opname": [{"LOAD_CONST"}], + "module_opname": [OpNames.LOAD_ATTR], + "attribute_opname": [], + "module_name": [{"datetime"}], + "attribute_name": [], + "function_name": [{"strptime"}], + }, + # lambda x: module.attribute.func(x, CONSTANT) + { + "argument_1_opname": [{"LOAD_FAST"}], + "argument_2_opname": [{"LOAD_CONST"}], + "module_opname": [{"LOAD_ATTR"}], + "attribute_opname": [OpNames.LOAD_ATTR], + "module_name": [{"datetime", "dt"}], + "attribute_name": [{"datetime"}], + "function_name": [{"strptime"}], + }, +] +# In addition to `lambda x: func(x)`, also support cases when a unary operation +# has been applied to `x`, like `lambda x: func(-x)` or `lambda x: func(~x)`. +_MODULE_FUNCTIONS = [ + {**kind, "argument_1_unary_opname": unary} # type: ignore[dict-item] + for kind in _MODULE_FUNCTIONS + for unary in [[set(OpNames.UNARY)], []] +] +_RE_IMPLICIT_BOOL = re.compile(r'pl\.col\("([^"]*)"\) & pl\.col\("\1"\)\.(.+)') + + +def _get_all_caller_variables() -> dict[str, Any]: + """Get all local and global variables from caller's frame.""" + pkg_dir = Path(__file__).parent.parent + + # https://stackoverflow.com/questions/17407119/python-inspect-stack-is-slow + frame = inspect.currentframe() + n = 0 + try: + while frame: + fname = inspect.getfile(frame) + if fname.startswith(str(pkg_dir)): + frame = frame.f_back + n += 1 + else: + break + variables: dict[str, Any] + if frame is None: + variables = {} + else: + variables = {**frame.f_locals, **frame.f_globals} + finally: + # https://docs.python.org/3/library/inspect.html + # > Though the cycle detector will catch these, destruction of the frames + # > (and local variables) can be made deterministic by removing the cycle + # > in a finally clause. + del frame + return variables + + +class BytecodeParser: + """Introspect UDF bytecode and determine if we can rewrite as native expression.""" + + _map_target_name: str | None = None + + def __init__(self, function: Callable[[Any], Any], map_target: MapTarget): + try: + original_instructions = get_instructions(function) + except TypeError: + # in case we hit something that can't be disassembled (eg: code object + # unavailable, like a bare numpy ufunc that isn't in a lambda/function) + original_instructions = iter([]) + + self._function = function + self._map_target = map_target + self._param_name = self._get_param_name(function) + self._rewritten_instructions = RewrittenInstructions( + instructions=original_instructions, + ) + + def _omit_implicit_bool(self, expr: str) -> str: + """Drop extraneous/implied bool (eg: `pl.col("d") & pl.col("d").dt.date()`).""" + while _RE_IMPLICIT_BOOL.search(expr): + expr = _RE_IMPLICIT_BOOL.sub(repl=r'pl.col("\1").\2', string=expr) + return expr + + @staticmethod + def _get_param_name(function: Callable[[Any], Any]) -> str | None: + """Return single function parameter name.""" + try: + # note: we do not parse/handle functions with > 1 params + sig = signature(function) + except ValueError: + return None + return ( + next(iter(parameters.keys())) + if len(parameters := sig.parameters) == 1 + else None + ) + + def _inject_nesting( + self, + expression_blocks: dict[int, str], + logical_instructions: list[Instruction], + ) -> list[tuple[int, str]]: + """Inject nesting boundaries into expression blocks (as parentheses).""" + if logical_instructions: + # reconstruct nesting boundaries for mixed and/or ops by associating control + # flow jump offsets with their target expression blocks and applying parens + if len({inst.opname for inst in logical_instructions}) > 1: + block_offsets: list[int] = list(expression_blocks.keys()) + prev_end = -1 + for inst in logical_instructions: + start = block_offsets[bisect_left(block_offsets, inst.offset) - 1] + end = block_offsets[bisect_left(block_offsets, inst.argval) - 1] + if not (start == 0 and end == block_offsets[-1]): + if prev_end not in (start, end): + expression_blocks[start] = "(" + expression_blocks[start] + expression_blocks[end] += ")" + prev_end = end + + for inst in logical_instructions: # inject connecting "&" and "|" ops + expression_blocks[inst.offset] = OpNames.CONTROL_FLOW[inst.opname] + + return sorted(expression_blocks.items()) + + def _get_target_name(self, col: str, expression: str) -> str: + """The name of the object against which the 'map' is being invoked.""" + if self._map_target_name is not None: + return self._map_target_name + else: + col_expr = f'pl.col("{col}")' + if self._map_target == "expr": + return col_expr + elif self._map_target == "series": + # note: handle overlapping name from global variables; fallback + # through "s", "srs", "series" and (finally) srs0 -> srsN... + search_expr = expression.replace(col_expr, "") + for name in ("s", "srs", "series"): + if not re.search(rf"\b{name}\b", search_expr): + self._map_target_name = name + return name + n = count() + while True: + name = f"srs{next(n)}" + if not re.search(rf"\b{name}\b", search_expr): + self._map_target_name = name + return name + + msg = f"TODO: map_target = {self._map_target!r}" + raise NotImplementedError(msg) + + @property + def map_target(self) -> MapTarget: + """The map target, eg: one of 'expr', 'frame', or 'series'.""" + return self._map_target + + def can_attempt_rewrite(self) -> bool: + """ + Determine if we may be able to offer a native polars expression instead. + + Note that `lambda x: x` is inefficient, but we ignore it because it is not + guaranteed that using the equivalent bare constant value will return the + same output. (Hopefully nobody is writing lambdas like that anyway...) + """ + return ( + self._param_name is not None + # check minimum number of ops, ensuring all are parseable + and len(self._rewritten_instructions) >= 2 + and all( + inst.opname in OpNames.PARSEABLE_OPS + for inst in self._rewritten_instructions + ) + # exclude constructs/functions with multiple RETURN_VALUE ops + and sum( + 1 + for inst in self.original_instructions + if inst.opname == "RETURN_VALUE" + ) + == 1 + ) + + def dis(self) -> None: + """Print disassembled function bytecode.""" + dis.dis(self._function) + + @property + def function(self) -> Callable[[Any], Any]: + """The function being parsed.""" + return self._function + + @property + def original_instructions(self) -> list[Instruction]: + """The original bytecode instructions from the function we are parsing.""" + return list(self._rewritten_instructions._original_instructions) + + @property + def param_name(self) -> str | None: + """The parameter name of the function being parsed.""" + return self._param_name + + @property + def rewritten_instructions(self) -> list[Instruction]: + """The rewritten bytecode instructions from the function we are parsing.""" + return list(self._rewritten_instructions) + + def to_expression(self, col: str) -> str | None: + """Translate postfix bytecode instructions to polars expression/string.""" + self._map_target_name = None + if self._param_name is None: + return None + + # decompose bytecode into logical 'and'/'or' expression blocks (if present) + control_flow_blocks = defaultdict(list) + logical_instructions = [] + jump_offset = 0 + for idx, inst in enumerate(self._rewritten_instructions): + if inst.opname in OpNames.CONTROL_FLOW: + jump_offset = self._rewritten_instructions[idx + 1].offset + logical_instructions.append(inst) + else: + control_flow_blocks[jump_offset].append(inst) + + # convert each block to a polars expression string + caller_variables: dict[str, Any] = {} + try: + expression_strings = self._inject_nesting( + { + offset: InstructionTranslator( + instructions=ops, + caller_variables=caller_variables, + map_target=self._map_target, + ).to_expression( + col=col, + param_name=self._param_name, + depth=int(bool(logical_instructions)), + ) + for offset, ops in control_flow_blocks.items() + }, + logical_instructions, + ) + except NotImplementedError: + return None + polars_expr = " ".join(expr for _offset, expr in expression_strings) + + # note: if no 'pl.col' in the expression, it likely represents a compound + # constant value (e.g. `lambda x: CONST + 123`), so we don't want to warn + if "pl.col(" not in polars_expr: + return None + else: + polars_expr = self._omit_implicit_bool(polars_expr) + if self._map_target == "series": + target_name = self._get_target_name(col, polars_expr) + return polars_expr.replace(f'pl.col("{col}")', target_name) + else: + return polars_expr + + def warn( + self, + col: str, + suggestion_override: str | None = None, + udf_override: str | None = None, + ) -> None: + """Generate warning that suggests an equivalent native polars expression.""" + # Import these here so that udfs can be imported without polars installed. + + from polars._utils.various import ( + find_stacklevel, + in_terminal_that_supports_colour, + ) + from polars.exceptions import PolarsInefficientMapWarning + + suggested_expression = suggestion_override or self.to_expression(col) + + if suggested_expression is not None: + target_name = self._get_target_name(col, suggested_expression) + func_name = udf_override or self._function.__name__ or "..." + if func_name == "": + func_name = f"lambda {self._param_name}: ..." + + addendum = ( + 'Note: in list.eval context, pl.col("") should be written as pl.element()' + if 'pl.col("")' in suggested_expression + else "" + ) + if self._map_target == "expr": + apitype = "expressions" + clsname = "Expr" + else: + apitype = "series" + clsname = "Series" + + before, after = ( + ( + f" \033[31m- {target_name}.map_elements({func_name})\033[0m\n", + f" \033[32m+ {suggested_expression}\033[0m\n{addendum}", + ) + if in_terminal_that_supports_colour() + else ( + f" - {target_name}.map_elements({func_name})\n", + f" + {suggested_expression}\n{addendum}", + ) + ) + warnings.warn( + f"\n{clsname}.map_elements is significantly slower than the native {apitype} API.\n" + "Only use if you absolutely CANNOT implement your logic otherwise.\n" + "Replace this expression...\n" + f"{before}" + "with this one instead:\n" + f"{after}", + PolarsInefficientMapWarning, + stacklevel=find_stacklevel(), + ) + + +class InstructionTranslator: + """Translates Instruction bytecode to a polars expression string.""" + + def __init__( + self, + instructions: list[Instruction], + caller_variables: dict[str, Any], + map_target: MapTarget, + ) -> None: + self._caller_variables: dict[str, Any] = caller_variables + self._stack = self._to_intermediate_stack(instructions, map_target) + + def to_expression(self, col: str, param_name: str, depth: int) -> str: + """Convert intermediate stack to polars expression string.""" + return self._expr(self._stack, col, param_name, depth) + + @staticmethod + def op(inst: Instruction) -> str: + """Convert bytecode instruction to suitable intermediate op string.""" + if inst.opname in OpNames.CONTROL_FLOW: + return OpNames.CONTROL_FLOW[inst.opname] + elif inst.argrepr: + return inst.argrepr + elif inst.opname == "IS_OP": + return "is not" if inst.argval else "is" + elif inst.opname == "CONTAINS_OP": + return "not in" if inst.argval else "in" + elif inst.opname in OpNames.UNARY: + return OpNames.UNARY[inst.opname] + elif inst.opname == "BINARY_SUBSCR": + return "replace" + else: + msg = ( + "unrecognized opname" + "\n\nPlease report a bug to https://github.com/pola-rs/polars/issues" + " with the content of function you were passing to `map` and the" + f" following instruction object:\n{inst!r}" + ) + raise AssertionError(msg) + + def _expr(self, value: StackEntry, col: str, param_name: str, depth: int) -> str: + """Take stack entry value and convert to polars expression string.""" + if isinstance(value, StackValue): + op = value.operator + e1 = self._expr(value.left_operand, col, param_name, depth + 1) + if value.operator_arity == 1: + if op not in OpNames.UNARY_VALUES: + if e1.startswith("pl.col("): + call = "" if op.endswith(")") else "()" + return f"{e1}.{op}{call}" + if e1[0] in OpNames.UNARY_VALUES and e1[1:].startswith("pl.col("): + call = "" if op.endswith(")") else "()" + return f"({e1}).{op}{call}" + + # support use of consts as numpy/builtin params, eg: + # "np.sin(3) + np.cos(x)", or "len('const_string') + len(x)" + pfx = "np." if op in _NUMPY_FUNCTIONS else "" + return f"{pfx}{op}({e1})" + return f"{op}{e1}" + else: + e2 = self._expr(value.right_operand, col, param_name, depth + 1) + if op in ("is", "is not") and value[2] == "None": + not_ = "" if op == "is" else "not_" + return f"{e1}.is_{not_}null()" + elif op in ("in", "not in"): + not_ = "" if op == "in" else "~" + return ( + f"{not_}({e1}.is_in({e2}))" + if " " in e1 + else f"{not_}{e1}.is_in({e2})" + ) + elif op == "replace": + if not self._caller_variables: + self._caller_variables.update(_get_all_caller_variables()) + if not isinstance(self._caller_variables.get(e1, None), dict): + msg = "require dict mapping" + raise NotImplementedError(msg) + return f"{e2}.{op}({e1})" + elif op == "<<": + # Result of 2**e2 might be float is e2 was negative. + # But, if e1 << e2 was valid, then e2 must have been positive. + # Hence, the output of 2**e2 can be safely cast to Int64, which + # may be necessary if chaining operations which assume Int64 output. + return f"({e1} * 2**{e2}).cast(pl.Int64)" + elif op == ">>": + # Motivation for the cast is the same as in the '<<' case above. + return f"({e1} / 2**{e2}).cast(pl.Int64)" + else: + expr = f"{e1} {op} {e2}" + return f"({expr})" if depth else expr + + elif value == param_name: + return f'pl.col("{col}")' + + return value + + def _to_intermediate_stack( + self, instructions: list[Instruction], map_target: MapTarget + ) -> StackEntry: + """Take postfix bytecode and convert to an intermediate natural-order stack.""" + if map_target in ("expr", "series"): + stack: list[StackEntry] = [] + for inst in instructions: + stack.append( + inst.argrepr + if inst.opname in OpNames.LOAD + else ( + StackValue( + operator=self.op(inst), + operator_arity=1, + left_operand=stack.pop(), # type: ignore[arg-type] + right_operand=None, # type: ignore[arg-type] + ) + if ( + inst.opname in OpNames.UNARY + or OpNames.SYNTHETIC.get(inst.opname) == 1 + ) + else StackValue( + operator=self.op(inst), + operator_arity=2, + left_operand=stack.pop(-2), # type: ignore[arg-type] + right_operand=stack.pop(-1), # type: ignore[arg-type] + ) + ) + ) + return stack[0] + + # TODO: dataframe.apply(...) + msg = f"TODO: {map_target!r} apply" + raise NotImplementedError(msg) + + +class RewrittenInstructions: + """ + Standalone class that applies Instruction rewrite/filtering rules. + + This significantly simplifies subsequent parsing by injecting + synthetic POLARS_EXPRESSION ops into the Instruction stream for + easy identification/translation and separates the parsing logic + from the identification of expression translation opportunities. + """ + + _ignored_ops = frozenset( + [ + "COPY", + "COPY_FREE_VARS", + "POP_TOP", + "PRECALL", + "PUSH_NULL", + "RESUME", + "RETURN_VALUE", + ] + ) + _caller_variables: ClassVar[dict[str, Any]] = {} + + def __init__(self, instructions: Iterator[Instruction]): + self._original_instructions = list(instructions) + self._rewritten_instructions = self._rewrite( + self._upgrade_instruction(inst) + for inst in self._original_instructions + if inst.opname not in self._ignored_ops + ) + + def __len__(self) -> int: + return len(self._rewritten_instructions) + + def __iter__(self) -> Iterator[Instruction]: + return iter(self._rewritten_instructions) + + def __getitem__(self, item: Any) -> Instruction: + return self._rewritten_instructions[item] + + def _matches( + self, + idx: int, + *, + opnames: list[AbstractSet[str]], + argvals: list[AbstractSet[Any] | dict[Any, Any] | None] | None, + is_attr: bool = False, + ) -> list[Instruction]: + """ + Check if a sequence of Instructions matches the specified ops/argvals. + + Parameters + ---------- + idx + The index of the first instruction to check. + opnames + The full opname sequence that defines a match. + argvals + Associated argvals that must also match (in same position as opnames). + is_attr + Indicate if the match represents pure attribute access (cannot be called). + """ + n_required_ops, argvals = len(opnames), argvals or [] + idx_offset = idx + n_required_ops + if ( + is_attr + and (trailing_inst := self._instructions[idx_offset : idx_offset + 1]) + and trailing_inst[0].opname in OpNames.CALL # not pure attr if called + ): + return [] + + instructions = self._instructions[idx:idx_offset] + if len(instructions) == n_required_ops and all( + inst.opname in match_opnames + and (match_argval is None or inst.argval in match_argval) + for inst, match_opnames, match_argval in zip_longest( + instructions, opnames, argvals + ) + ): + return instructions + return [] + + def _rewrite(self, instructions: Iterator[Instruction]) -> list[Instruction]: + """ + Apply rewrite rules, potentially injecting synthetic operations. + + Rules operate on the instruction stream and can examine/modify + it as needed, pushing updates into "updated_instructions" and + returning True/False to indicate if any changes were made. + """ + self._instructions = list(instructions) + updated_instructions: list[Instruction] = [] + idx = 0 + while idx < len(self._instructions): + inst, increment = self._instructions[idx], 1 + if inst.opname not in OpNames.LOAD or not any( + (increment := map_rewrite(idx, updated_instructions)) + for map_rewrite in ( + # add any other rewrite methods here + self._rewrite_functions, + self._rewrite_methods, + self._rewrite_builtins, + self._rewrite_attrs, + ) + ): + updated_instructions.append(inst) + idx += increment or 1 + return updated_instructions + + def _rewrite_attrs(self, idx: int, updated_instructions: list[Instruction]) -> int: + """Replace python attribute lookup with synthetic POLARS_EXPRESSION op.""" + if matching_instructions := self._matches( + idx, + opnames=[{"LOAD_FAST"}, {"LOAD_ATTR"}], + argvals=[None, _PYTHON_ATTRS_MAP], + is_attr=True, + ): + inst = matching_instructions[1] + expr_name = _PYTHON_ATTRS_MAP[inst.argval] + px = inst._replace( + opname="POLARS_EXPRESSION", argval=expr_name, argrepr=expr_name + ) + updated_instructions.extend([matching_instructions[0], px]) + + return len(matching_instructions) + + def _rewrite_builtins( + self, idx: int, updated_instructions: list[Instruction] + ) -> int: + """Replace builtin function calls with a synthetic POLARS_EXPRESSION op.""" + if matching_instructions := self._matches( + idx, + opnames=[{"LOAD_GLOBAL"}, {"LOAD_FAST", "LOAD_CONST"}, OpNames.CALL], + argvals=[_PYTHON_BUILTINS], + ): + inst1, inst2 = matching_instructions[:2] + if (argval := inst1.argval) in _PYTHON_CASTS_MAP: + dtype = _PYTHON_CASTS_MAP[argval] + argval = f"cast(pl.{dtype})" + + px = inst1._replace( + opname="POLARS_EXPRESSION", + argval=argval, + argrepr=argval, + offset=inst2.offset, + ) + # POLARS_EXPRESSION is mapped as a unary op, so switch instruction order + operand = inst2._replace(offset=inst1.offset) + updated_instructions.extend((operand, px)) + + return len(matching_instructions) + + def _rewrite_functions( + self, idx: int, updated_instructions: list[Instruction] + ) -> int: + """Replace function calls with a synthetic POLARS_EXPRESSION op.""" + for function_kind in _MODULE_FUNCTIONS: + opnames: list[AbstractSet[str]] = [ + {"LOAD_GLOBAL", "LOAD_DEREF"}, + *function_kind["module_opname"], + *function_kind["attribute_opname"], + *function_kind["argument_1_opname"], + *function_kind["argument_1_unary_opname"], + *function_kind["argument_2_opname"], + OpNames.CALL, + ] + if matching_instructions := self._matches( + idx, + opnames=opnames, + argvals=[ + *function_kind["module_name"], + *function_kind["attribute_name"], + *function_kind["function_name"], + ], + ): + attribute_count = len(function_kind["attribute_name"]) + inst1, inst2, inst3 = matching_instructions[ + attribute_count : 3 + attribute_count + ] + if inst1.argval == "json": + expr_name = "str.json_decode" + elif inst1.argval == "datetime": + fmt = matching_instructions[attribute_count + 3].argval + expr_name = f'str.to_datetime(format="{fmt}")' + if not self._is_stdlib_datetime( + inst1.argval, + matching_instructions[0].argval, + fmt, + attribute_count, + ): + return 0 + else: + expr_name = inst2.argval + + px = inst1._replace( + opname="POLARS_EXPRESSION", + argval=expr_name, + argrepr=expr_name, + offset=inst3.offset, + ) + + # POLARS_EXPRESSION is mapped as a unary op, so switch instruction order + operand = inst3._replace(offset=inst1.offset) + updated_instructions.extend( + ( + operand, + matching_instructions[3 + attribute_count], + px, + ) + if function_kind["argument_1_unary_opname"] + else (operand, px) + ) + return len(matching_instructions) + + return 0 + + def _rewrite_methods( + self, idx: int, updated_instructions: list[Instruction] + ) -> int: + """Replace python method calls with synthetic POLARS_EXPRESSION op.""" + LOAD_METHOD = OpNames.LOAD_ATTR if _MIN_PY312 else {"LOAD_METHOD"} + if matching_instructions := ( + # method call with one basic arg, eg: "s.endswith('!')" + self._matches( + idx, + opnames=[LOAD_METHOD, {"LOAD_CONST"}, OpNames.CALL], + argvals=[_PYTHON_METHODS_MAP], + ) + or + # method call with no arg, eg: "s.lower()" + self._matches( + idx, + opnames=[LOAD_METHOD, OpNames.CALL], + argvals=[_PYTHON_METHODS_MAP], + ) + ): + inst = matching_instructions[0] + expr = _PYTHON_METHODS_MAP[inst.argval] + + if matching_instructions[1].opname == "LOAD_CONST": + param_value = matching_instructions[1].argval + if isinstance(param_value, tuple) and expr in ( + "str.starts_with", + "str.ends_with", + ): + starts, ends = ("^", "") if "starts" in expr else ("", "$") + rx = "|".join(re_escape(v) for v in param_value) + q = '"' if "'" in param_value else "'" + expr = f"str.contains(r{q}{starts}({rx}){ends}{q})" + else: + expr += f"({param_value!r})" + + px = inst._replace(opname="POLARS_EXPRESSION", argval=expr, argrepr=expr) + updated_instructions.append(px) + + return len(matching_instructions) + + @staticmethod + def _upgrade_instruction(inst: Instruction) -> Instruction: + """Rewrite any older binary opcodes using py 3.11 'BINARY_OP' instead.""" + if not _MIN_PY311 and inst.opname in OpNames.BINARY: + inst = inst._replace( + argrepr=OpNames.BINARY[inst.opname], + opname="BINARY_OP", + ) + return inst + + def _is_stdlib_datetime( + self, function_name: str, module_name: str, fmt: str, attribute_count: int + ) -> bool: + if not self._caller_variables: + self._caller_variables.update(_get_all_caller_variables()) + vars = self._caller_variables + return ( + attribute_count == 0 and vars.get(function_name) is datetime.datetime + ) or (attribute_count == 1 and vars.get(module_name) is datetime) + + +def _is_raw_function(function: Callable[[Any], Any]) -> tuple[str, str]: + """Identify translatable calls that aren't wrapped inside a lambda/function.""" + try: + func_module = function.__class__.__module__ + func_name = function.__name__ + except AttributeError: + return "", "" + + # numpy function calls + if func_module == "numpy" and func_name in _NUMPY_FUNCTIONS: + return "np", f"{func_name}()" + + # python function calls + elif func_module == "builtins": + if func_name in _PYTHON_CASTS_MAP: + return "builtins", f"cast(pl.{_PYTHON_CASTS_MAP[func_name]})" + elif func_name == "loads": + import json # double-check since it is referenced via 'builtins' + + if function is json.loads: + return "json", "str.json_decode()" + + return "", "" + + +def warn_on_inefficient_map( + function: Callable[[Any], Any], columns: list[str], map_target: MapTarget +) -> None: + """ + Generate `PolarsInefficientMapWarning` on poor usage of a `map` function. + + Parameters + ---------- + function + The function passed to `map`. + columns + The column names of the original object; in the case of an `Expr` this + will be a list of length 1 containing the expression's root name. + map_target + The target of the `map` call. One of `"expr"`, `"frame"`, + or `"series"`. + """ + if map_target == "frame": + msg = "TODO: 'frame' map-function parsing" + raise NotImplementedError(msg) + + # note: we only consider simple functions with a single col/param + if not (col := columns and columns[0]): + return None + + # the parser introspects function bytecode to determine if we can + # rewrite as a much more optimal native polars expression instead + parser = BytecodeParser(function, map_target) + if parser.can_attempt_rewrite(): + parser.warn(col) + else: + # handle bare numpy/json functions + module, suggestion = _is_raw_function(function) + if module and suggestion: + fn = function.__name__ + parser.warn( + col, + suggestion_override=f'pl.col("{col}").{suggestion}', + udf_override=fn if module == "builtins" else f"{module}.{fn}", + ) + + +__all__ = ["BytecodeParser", "warn_on_inefficient_map"] diff --git a/py-polars/polars/utils/unstable.py b/py-polars/polars/_utils/unstable.py similarity index 97% rename from py-polars/polars/utils/unstable.py rename to py-polars/polars/_utils/unstable.py index e00c9177e06b..3ad2e4fde306 100644 --- a/py-polars/polars/utils/unstable.py +++ b/py-polars/polars/_utils/unstable.py @@ -6,8 +6,8 @@ from functools import wraps from typing import TYPE_CHECKING, Callable, TypeVar +from polars._utils.various import find_stacklevel from polars.exceptions import UnstableWarning -from polars.utils.various import find_stacklevel if TYPE_CHECKING: import sys diff --git a/py-polars/polars/utils/various.py b/py-polars/polars/_utils/various.py similarity index 99% rename from py-polars/polars/utils/various.py rename to py-polars/polars/_utils/various.py index d06e604ead34..e689cf2ea632 100644 --- a/py-polars/polars/utils/various.py +++ b/py-polars/polars/_utils/various.py @@ -385,7 +385,7 @@ def str_duration_(td: str | None) -> int | None: NS = TypeVar("NS") -class sphinx_accessor(property): # noqa: D101 +class sphinx_accessor(property): def __get__( # type: ignore[override] self, instance: Any, diff --git a/py-polars/polars/utils/_wrap.py b/py-polars/polars/_utils/wrap.py similarity index 100% rename from py-polars/polars/utils/_wrap.py rename to py-polars/polars/_utils/wrap.py diff --git a/py-polars/polars/api.py b/py-polars/polars/api.py index f1020b50bc09..00cbb1673344 100644 --- a/py-polars/polars/api.py +++ b/py-polars/polars/api.py @@ -6,7 +6,7 @@ from warnings import warn import polars._reexport as pl -from polars.utils.various import find_stacklevel +from polars._utils.various import find_stacklevel if TYPE_CHECKING: from polars import DataFrame, Expr, LazyFrame, Series diff --git a/py-polars/polars/config.py b/py-polars/polars/config.py index a481597f3ae0..0aabe4d59a6a 100644 --- a/py-polars/polars/config.py +++ b/py-polars/polars/config.py @@ -6,9 +6,9 @@ from pathlib import Path from typing import TYPE_CHECKING, Any, Literal, get_args +from polars._utils.deprecation import deprecate_nonkeyword_arguments +from polars._utils.various import normalize_filepath from polars.dependencies import json -from polars.utils.deprecation import deprecate_nonkeyword_arguments -from polars.utils.various import normalize_filepath if sys.version_info >= (3, 10): from typing import TypeAlias diff --git a/py-polars/polars/convert.py b/py-polars/polars/convert.py index ed5ea49f21d7..a5deab752fe2 100644 --- a/py-polars/polars/convert.py +++ b/py-polars/polars/convert.py @@ -7,12 +7,12 @@ import polars._reexport as pl from polars import functions as F +from polars._utils.various import _cast_repr_strings_with_schema from polars.datatypes import N_INFER_DEFAULT, Categorical, List, Object, String, Struct from polars.dependencies import pandas as pd from polars.dependencies import pyarrow as pa from polars.exceptions import NoDataError from polars.io import read_csv -from polars.utils.various import _cast_repr_strings_with_schema if TYPE_CHECKING: from polars import DataFrame, Series diff --git a/py-polars/polars/dataframe/frame.py b/py-polars/polars/dataframe/frame.py index b4728041aae2..b78d05b109e0 100644 --- a/py-polars/polars/dataframe/frame.py +++ b/py-polars/polars/dataframe/frame.py @@ -31,6 +31,43 @@ import polars._reexport as pl from polars import functions as F +from polars._utils.construction import ( + arrow_to_pydf, + dict_to_pydf, + frame_to_pydf, + iterable_to_pydf, + numpy_to_idxs, + numpy_to_pydf, + pandas_to_pydf, + sequence_to_pydf, + series_to_pydf, +) +from polars._utils.convert import parse_as_duration_string +from polars._utils.deprecation import ( + deprecate_function, + deprecate_nonkeyword_arguments, + deprecate_parameter_as_positional, + deprecate_renamed_function, + deprecate_renamed_parameter, + deprecate_saturating, + issue_deprecation_warning, +) +from polars._utils.parse_expr_input import parse_as_expression +from polars._utils.unstable import issue_unstable_warning, unstable +from polars._utils.various import ( + _prepare_row_index_args, + _process_null_values, + handle_projection_columns, + is_bool_sequence, + is_int_sequence, + is_str_sequence, + normalize_filepath, + parse_version, + range_to_slice, + scale_bytes, + warn_null_comparison, +) +from polars._utils.wrap import wrap_expr, wrap_ldf, wrap_s from polars.dataframe._html import NotebookFormatter from polars.dataframe.group_by import DynamicGroupBy, GroupBy, RollingGroupBy from polars.datatypes import ( @@ -76,43 +113,6 @@ from polars.selectors import _expand_selector_dicts, _expand_selectors from polars.slice import PolarsSlice from polars.type_aliases import DbWriteMode -from polars.utils._construction import ( - arrow_to_pydf, - dict_to_pydf, - frame_to_pydf, - iterable_to_pydf, - numpy_to_idxs, - numpy_to_pydf, - pandas_to_pydf, - sequence_to_pydf, - series_to_pydf, -) -from polars.utils._parse_expr_input import parse_as_expression -from polars.utils._wrap import wrap_expr, wrap_ldf, wrap_s -from polars.utils.convert import parse_as_duration_string -from polars.utils.deprecation import ( - deprecate_function, - deprecate_nonkeyword_arguments, - deprecate_parameter_as_positional, - deprecate_renamed_function, - deprecate_renamed_parameter, - deprecate_saturating, - issue_deprecation_warning, -) -from polars.utils.unstable import issue_unstable_warning, unstable -from polars.utils.various import ( - _prepare_row_index_args, - _process_null_values, - handle_projection_columns, - is_bool_sequence, - is_int_sequence, - is_str_sequence, - normalize_filepath, - parse_version, - range_to_slice, - scale_bytes, - warn_null_comparison, -) with contextlib.suppress(ImportError): # Module not available when building docs from polars.polars import PyDataFrame @@ -6498,7 +6498,7 @@ def map_rows( >>> df.select(pl.col("foo") * 2 + pl.col("bar")) # doctest: +IGNORE_RESULT """ # TODO: Enable warning for inefficient map - # from polars.utils.udfs import warn_on_inefficient_map + # from polars._utils.udfs import warn_on_inefficient_map # warn_on_inefficient_map(function, columns=self.columns, map_target="frame) out, is_df = self._df.map_rows(function, return_dtype, inference_size) diff --git a/py-polars/polars/dataframe/group_by.py b/py-polars/polars/dataframe/group_by.py index f6ad0144098b..789468987ca4 100644 --- a/py-polars/polars/dataframe/group_by.py +++ b/py-polars/polars/dataframe/group_by.py @@ -3,8 +3,8 @@ from typing import TYPE_CHECKING, Callable, Iterable, Iterator from polars import functions as F -from polars.utils.convert import parse_as_duration_string -from polars.utils.deprecation import ( +from polars._utils.convert import parse_as_duration_string +from polars._utils.deprecation import ( deprecate_renamed_function, issue_deprecation_warning, ) diff --git a/py-polars/polars/datatypes/classes.py b/py-polars/polars/datatypes/classes.py index 014a2e94437a..370c3f290b8c 100644 --- a/py-polars/polars/datatypes/classes.py +++ b/py-polars/polars/datatypes/classes.py @@ -168,7 +168,7 @@ def is_not(self, other: PolarsDataType) -> bool: >>> pl.List.is_not(pl.List(pl.Int32)) # doctest: +SKIP True """ - from polars.utils.deprecation import issue_deprecation_warning + from polars._utils.deprecation import issue_deprecation_warning issue_deprecation_warning( "`DataType.is_not` is deprecated and will be removed in the next breaking release." @@ -382,7 +382,7 @@ def __init__( ): # Issuing the warning on `__init__` does not trigger when the class is used # without being instantiated, but it's better than nothing - from polars.utils.unstable import issue_unstable_warning + from polars._utils.unstable import issue_unstable_warning issue_unstable_warning( "The Decimal data type is considered unstable." @@ -488,7 +488,7 @@ def __init__( self, time_unit: TimeUnit = "us", time_zone: str | timezone | None = None ): if time_unit is None: - from polars.utils.deprecation import issue_deprecation_warning + from polars._utils.deprecation import issue_deprecation_warning issue_deprecation_warning( "Passing `time_unit=None` to the Datetime constructor is deprecated." @@ -634,7 +634,7 @@ class Enum(DataType): def __init__(self, categories: Series | Iterable[str]): # Issuing the warning on `__init__` does not trigger when the class is used # without being instantiated, but it's better than nothing - from polars.utils.unstable import issue_unstable_warning + from polars._utils.unstable import issue_unstable_warning issue_unstable_warning( "The Enum data type is considered unstable." diff --git a/py-polars/polars/datatypes/convert.py b/py-polars/polars/datatypes/convert.py index c7da2453d064..8279ef4dce86 100644 --- a/py-polars/polars/datatypes/convert.py +++ b/py-polars/polars/datatypes/convert.py @@ -485,7 +485,7 @@ def numpy_char_code_to_dtype(dtype_char: str) -> PolarsDataType: def maybe_cast(el: Any, dtype: PolarsDataType) -> Any: """Try casting a value to a value that is valid for the given Polars dtype.""" # cast el if it doesn't match - from polars.utils.convert import ( + from polars._utils.convert import ( datetime_to_int, timedelta_to_int, ) diff --git a/py-polars/polars/dependencies.py b/py-polars/polars/dependencies.py index 1cc61eb4609c..61d37c3877a8 100644 --- a/py-polars/polars/dependencies.py +++ b/py-polars/polars/dependencies.py @@ -244,8 +244,8 @@ def import_optional( min_version : {str, tuple[int]}, optional If a minimum module version is required, specify it here. """ + from polars._utils.various import parse_version from polars.exceptions import ModuleUpgradeRequired - from polars.utils.various import parse_version try: module = import_module(module_name) diff --git a/py-polars/polars/expr/array.py b/py-polars/polars/expr/array.py index b228b7b562b7..6972d5e1f062 100644 --- a/py-polars/polars/expr/array.py +++ b/py-polars/polars/expr/array.py @@ -2,8 +2,8 @@ from typing import TYPE_CHECKING, Callable, Sequence -from polars.utils._parse_expr_input import parse_as_expression -from polars.utils._wrap import wrap_expr +from polars._utils.parse_expr_input import parse_as_expression +from polars._utils.wrap import wrap_expr if TYPE_CHECKING: from datetime import date, datetime, time diff --git a/py-polars/polars/expr/binary.py b/py-polars/polars/expr/binary.py index ed1af57c8b2d..978df94bb063 100644 --- a/py-polars/polars/expr/binary.py +++ b/py-polars/polars/expr/binary.py @@ -2,8 +2,8 @@ from typing import TYPE_CHECKING -from polars.utils._parse_expr_input import parse_as_expression -from polars.utils._wrap import wrap_expr +from polars._utils.parse_expr_input import parse_as_expression +from polars._utils.wrap import wrap_expr if TYPE_CHECKING: from polars import Expr diff --git a/py-polars/polars/expr/categorical.py b/py-polars/polars/expr/categorical.py index 89ecef5188ea..ca00114c4e36 100644 --- a/py-polars/polars/expr/categorical.py +++ b/py-polars/polars/expr/categorical.py @@ -2,8 +2,8 @@ from typing import TYPE_CHECKING -from polars.utils._wrap import wrap_expr -from polars.utils.deprecation import deprecate_function +from polars._utils.deprecation import deprecate_function +from polars._utils.wrap import wrap_expr if TYPE_CHECKING: from polars import Expr diff --git a/py-polars/polars/expr/datetime.py b/py-polars/polars/expr/datetime.py index 823f56604644..09e76b5327bf 100644 --- a/py-polars/polars/expr/datetime.py +++ b/py-polars/polars/expr/datetime.py @@ -5,18 +5,18 @@ import polars._reexport as pl from polars import functions as F -from polars.datatypes import DTYPE_TEMPORAL_UNITS, Date, Int32 -from polars.utils._parse_expr_input import parse_as_expression -from polars.utils._wrap import wrap_expr -from polars.utils.convert import parse_as_duration_string -from polars.utils.deprecation import ( +from polars._utils.convert import parse_as_duration_string +from polars._utils.deprecation import ( deprecate_function, deprecate_renamed_function, deprecate_saturating, issue_deprecation_warning, rename_use_earliest_to_ambiguous, ) -from polars.utils.unstable import unstable +from polars._utils.parse_expr_input import parse_as_expression +from polars._utils.unstable import unstable +from polars._utils.wrap import wrap_expr +from polars.datatypes import DTYPE_TEMPORAL_UNITS, Date, Int32 if TYPE_CHECKING: from datetime import timedelta diff --git a/py-polars/polars/expr/expr.py b/py-polars/polars/expr/expr.py index 699de0973a4b..71e8521b9764 100644 --- a/py-polars/polars/expr/expr.py +++ b/py-polars/polars/expr/expr.py @@ -25,6 +25,29 @@ import polars._reexport as pl from polars import functions as F +from polars._utils.convert import negate_duration_string, parse_as_duration_string +from polars._utils.deprecation import ( + deprecate_function, + deprecate_nonkeyword_arguments, + deprecate_renamed_function, + deprecate_renamed_parameter, + deprecate_saturating, + issue_deprecation_warning, +) +from polars._utils.parse_expr_input import ( + parse_as_expression, + parse_as_list_of_expressions, + parse_predicates_constraints_as_expression, +) +from polars._utils.unstable import issue_unstable_warning, unstable +from polars._utils.various import ( + BUILDING_SPHINX_DOCS, + find_stacklevel, + no_default, + normalize_filepath, + sphinx_accessor, + warn_null_comparison, +) from polars.datatypes import ( Int64, is_polars_dtype, @@ -43,29 +66,6 @@ from polars.expr.string import ExprStringNameSpace from polars.expr.struct import ExprStructNameSpace from polars.meta import thread_pool_size -from polars.utils._parse_expr_input import ( - parse_as_expression, - parse_as_list_of_expressions, - parse_predicates_constraints_as_expression, -) -from polars.utils.convert import negate_duration_string, parse_as_duration_string -from polars.utils.deprecation import ( - deprecate_function, - deprecate_nonkeyword_arguments, - deprecate_renamed_function, - deprecate_renamed_parameter, - deprecate_saturating, - issue_deprecation_warning, -) -from polars.utils.unstable import issue_unstable_warning, unstable -from polars.utils.various import ( - BUILDING_SPHINX_DOCS, - find_stacklevel, - no_default, - normalize_filepath, - sphinx_accessor, - warn_null_comparison, -) with contextlib.suppress(ImportError): # Module not available when building docs from polars.polars import arg_where as py_arg_where @@ -78,6 +78,9 @@ from io import IOBase from polars import DataFrame, LazyFrame, Series + from polars._utils.various import ( + NoDefault, + ) from polars.type_aliases import ( ClosedInterval, FillNullStrategy, @@ -94,9 +97,6 @@ TemporalLiteral, WindowMappingStrategy, ) - from polars.utils.various import ( - NoDefault, - ) if sys.version_info >= (3, 11): from typing import Concatenate, ParamSpec, Self @@ -4358,7 +4358,7 @@ def map_elements( ) # input x: Series of type list containing the group values - from polars.utils.udfs import warn_on_inefficient_map + from polars._utils.udfs import warn_on_inefficient_map root_names = self.meta.root_names() if len(root_names) > 0: diff --git a/py-polars/polars/expr/list.py b/py-polars/polars/expr/list.py index 71139e65cb66..b81caf6d6113 100644 --- a/py-polars/polars/expr/list.py +++ b/py-polars/polars/expr/list.py @@ -5,12 +5,12 @@ import polars._reexport as pl from polars import functions as F -from polars.utils._parse_expr_input import parse_as_expression -from polars.utils._wrap import wrap_expr -from polars.utils.deprecation import ( +from polars._utils.deprecation import ( deprecate_renamed_function, deprecate_renamed_parameter, ) +from polars._utils.parse_expr_input import parse_as_expression +from polars._utils.wrap import wrap_expr if TYPE_CHECKING: from datetime import date, datetime, time diff --git a/py-polars/polars/expr/meta.py b/py-polars/polars/expr/meta.py index 0c6c3fde46d7..e4c008fecb41 100644 --- a/py-polars/polars/expr/meta.py +++ b/py-polars/polars/expr/meta.py @@ -4,13 +4,13 @@ from pathlib import Path from typing import TYPE_CHECKING, Literal, overload -from polars.exceptions import ComputeError -from polars.utils._wrap import wrap_expr -from polars.utils.deprecation import ( +from polars._utils.deprecation import ( deprecate_nonkeyword_arguments, deprecate_renamed_function, ) -from polars.utils.various import normalize_filepath +from polars._utils.various import normalize_filepath +from polars._utils.wrap import wrap_expr +from polars.exceptions import ComputeError if TYPE_CHECKING: from io import IOBase diff --git a/py-polars/polars/expr/string.py b/py-polars/polars/expr/string.py index aa2fcf6d49e3..e00e555c0df7 100644 --- a/py-polars/polars/expr/string.py +++ b/py-polars/polars/expr/string.py @@ -5,18 +5,18 @@ import polars._reexport as pl from polars import functions as F -from polars.datatypes import Date, Datetime, Int32, Time, py_type_to_dtype -from polars.datatypes.constants import N_INFER_DEFAULT -from polars.exceptions import ChronoFormatWarning -from polars.utils._parse_expr_input import parse_as_expression -from polars.utils._wrap import wrap_expr -from polars.utils.deprecation import ( +from polars._utils.deprecation import ( deprecate_renamed_function, deprecate_renamed_parameter, issue_deprecation_warning, rename_use_earliest_to_ambiguous, ) -from polars.utils.various import find_stacklevel +from polars._utils.parse_expr_input import parse_as_expression +from polars._utils.various import find_stacklevel +from polars._utils.wrap import wrap_expr +from polars.datatypes import Date, Datetime, Int32, Time, py_type_to_dtype +from polars.datatypes.constants import N_INFER_DEFAULT +from polars.exceptions import ChronoFormatWarning if TYPE_CHECKING: from polars import Expr diff --git a/py-polars/polars/expr/struct.py b/py-polars/polars/expr/struct.py index a8669b2f3317..709e9b8d16b1 100644 --- a/py-polars/polars/expr/struct.py +++ b/py-polars/polars/expr/struct.py @@ -2,7 +2,7 @@ from typing import TYPE_CHECKING, Sequence -from polars.utils._wrap import wrap_expr +from polars._utils.wrap import wrap_expr if TYPE_CHECKING: from polars import Expr diff --git a/py-polars/polars/expr/whenthen.py b/py-polars/polars/expr/whenthen.py index ced5df6d68fe..c6e45f4520cf 100644 --- a/py-polars/polars/expr/whenthen.py +++ b/py-polars/polars/expr/whenthen.py @@ -3,12 +3,12 @@ from typing import TYPE_CHECKING, Any, Iterable import polars.functions as F -from polars.expr.expr import Expr -from polars.utils._parse_expr_input import ( +from polars._utils.parse_expr_input import ( parse_as_expression, parse_when_inputs, ) -from polars.utils._wrap import wrap_expr +from polars._utils.wrap import wrap_expr +from polars.expr.expr import Expr if TYPE_CHECKING: from polars.polars import PyExpr diff --git a/py-polars/polars/functions/aggregation/horizontal.py b/py-polars/polars/functions/aggregation/horizontal.py index 6d06aab8162c..729a50d8de75 100644 --- a/py-polars/polars/functions/aggregation/horizontal.py +++ b/py-polars/polars/functions/aggregation/horizontal.py @@ -4,10 +4,10 @@ from typing import TYPE_CHECKING, Iterable import polars.functions as F +from polars._utils.deprecation import deprecate_renamed_function +from polars._utils.parse_expr_input import parse_as_list_of_expressions +from polars._utils.wrap import wrap_expr from polars.datatypes import UInt32 -from polars.utils._parse_expr_input import parse_as_list_of_expressions -from polars.utils._wrap import wrap_expr -from polars.utils.deprecation import deprecate_renamed_function with contextlib.suppress(ImportError): # Module not available when building docs import polars.polars as plr diff --git a/py-polars/polars/functions/aggregation/vertical.py b/py-polars/polars/functions/aggregation/vertical.py index 16828027f3dd..ecae33eac192 100644 --- a/py-polars/polars/functions/aggregation/vertical.py +++ b/py-polars/polars/functions/aggregation/vertical.py @@ -3,7 +3,7 @@ from typing import TYPE_CHECKING import polars.functions as F -from polars.utils.deprecation import deprecate_renamed_function +from polars._utils.deprecation import deprecate_renamed_function if TYPE_CHECKING: from polars import Expr diff --git a/py-polars/polars/functions/as_datatype.py b/py-polars/polars/functions/as_datatype.py index 02d220868075..0e4af3ed2a08 100644 --- a/py-polars/polars/functions/as_datatype.py +++ b/py-polars/polars/functions/as_datatype.py @@ -4,13 +4,13 @@ from typing import TYPE_CHECKING, Iterable, overload from polars import functions as F -from polars.datatypes import Date, Struct, Time -from polars.utils._parse_expr_input import ( +from polars._utils.deprecation import rename_use_earliest_to_ambiguous +from polars._utils.parse_expr_input import ( parse_as_expression, parse_as_list_of_expressions, ) -from polars.utils._wrap import wrap_expr -from polars.utils.deprecation import rename_use_earliest_to_ambiguous +from polars._utils.wrap import wrap_expr +from polars.datatypes import Date, Struct, Time with contextlib.suppress(ImportError): # Module not available when building docs import polars.polars as plr diff --git a/py-polars/polars/functions/col.py b/py-polars/polars/functions/col.py index da5debc32de4..ef2209c0ba41 100644 --- a/py-polars/polars/functions/col.py +++ b/py-polars/polars/functions/col.py @@ -3,8 +3,8 @@ import contextlib from typing import TYPE_CHECKING, Any, Iterable, Protocol, cast +from polars._utils.wrap import wrap_expr from polars.datatypes import is_polars_dtype -from polars.utils._wrap import wrap_expr plr: Any = None with contextlib.suppress(ImportError): # Module not available when building docs diff --git a/py-polars/polars/functions/eager.py b/py-polars/polars/functions/eager.py index 573fac1912c1..dc47f8323dbd 100644 --- a/py-polars/polars/functions/eager.py +++ b/py-polars/polars/functions/eager.py @@ -7,10 +7,10 @@ import polars._reexport as pl from polars import functions as F +from polars._utils.various import ordered_unique +from polars._utils.wrap import wrap_df, wrap_expr, wrap_ldf, wrap_s from polars.exceptions import InvalidOperationError from polars.type_aliases import ConcatMethod, FrameType -from polars.utils._wrap import wrap_df, wrap_expr, wrap_ldf, wrap_s -from polars.utils.various import ordered_unique with contextlib.suppress(ImportError): # Module not available when building docs import polars.polars as plr diff --git a/py-polars/polars/functions/lazy.py b/py-polars/polars/functions/lazy.py index bbd1e1d14b38..308acb78f68c 100644 --- a/py-polars/polars/functions/lazy.py +++ b/py-polars/polars/functions/lazy.py @@ -5,19 +5,19 @@ import polars._reexport as pl import polars.functions as F -from polars.datatypes import DTYPE_TEMPORAL_UNITS, Date, Datetime, Int64, UInt32 -from polars.utils._async import _AioDataFrameResult, _GeventDataFrameResult -from polars.utils._parse_expr_input import ( - parse_as_expression, - parse_as_list_of_expressions, -) -from polars.utils._wrap import wrap_df, wrap_expr -from polars.utils.deprecation import ( +from polars._utils.async_ import _AioDataFrameResult, _GeventDataFrameResult +from polars._utils.deprecation import ( deprecate_parameter_as_positional, deprecate_renamed_function, issue_deprecation_warning, ) -from polars.utils.unstable import issue_unstable_warning, unstable +from polars._utils.parse_expr_input import ( + parse_as_expression, + parse_as_list_of_expressions, +) +from polars._utils.unstable import issue_unstable_warning, unstable +from polars._utils.wrap import wrap_df, wrap_expr +from polars.datatypes import DTYPE_TEMPORAL_UNITS, Date, Datetime, Int64, UInt32 with contextlib.suppress(ImportError): # Module not available when building docs import polars.polars as plr diff --git a/py-polars/polars/functions/len.py b/py-polars/polars/functions/len.py index f34a3e84cbe2..d731e5c16d54 100644 --- a/py-polars/polars/functions/len.py +++ b/py-polars/polars/functions/len.py @@ -8,7 +8,7 @@ import contextlib from typing import TYPE_CHECKING -from polars.utils._wrap import wrap_expr +from polars._utils.wrap import wrap_expr with contextlib.suppress(ImportError): # Module not available when building docs import polars.polars as plr diff --git a/py-polars/polars/functions/lit.py b/py-polars/polars/functions/lit.py index b02aa57e6ce1..b636d5f2544a 100644 --- a/py-polars/polars/functions/lit.py +++ b/py-polars/polars/functions/lit.py @@ -5,16 +5,16 @@ from typing import TYPE_CHECKING, Any import polars._reexport as pl -from polars.datatypes import Date, Datetime, Duration, Time -from polars.dependencies import _check_for_numpy -from polars.dependencies import numpy as np -from polars.utils._wrap import wrap_expr -from polars.utils.convert import ( +from polars._utils.convert import ( date_to_int, datetime_to_int, time_to_int, timedelta_to_int, ) +from polars._utils.wrap import wrap_expr +from polars.datatypes import Date, Datetime, Duration, Time +from polars.dependencies import _check_for_numpy +from polars.dependencies import numpy as np with contextlib.suppress(ImportError): # Module not available when building docs import polars.polars as plr diff --git a/py-polars/polars/functions/range/_utils.py b/py-polars/polars/functions/range/_utils.py index 173125996ef7..86bdeedd15cd 100644 --- a/py-polars/polars/functions/range/_utils.py +++ b/py-polars/polars/functions/range/_utils.py @@ -2,7 +2,7 @@ from datetime import timedelta -from polars.utils.convert import parse_as_duration_string +from polars._utils.convert import parse_as_duration_string def parse_interval_argument(interval: str | timedelta) -> str: diff --git a/py-polars/polars/functions/range/date_range.py b/py-polars/polars/functions/range/date_range.py index ae6e0af5dea1..918bffa53a4e 100644 --- a/py-polars/polars/functions/range/date_range.py +++ b/py-polars/polars/functions/range/date_range.py @@ -5,13 +5,13 @@ from typing import TYPE_CHECKING, overload from polars import functions as F -from polars.functions.range._utils import parse_interval_argument -from polars.utils._parse_expr_input import parse_as_expression -from polars.utils._wrap import wrap_expr -from polars.utils.deprecation import ( +from polars._utils.deprecation import ( deprecate_saturating, issue_deprecation_warning, ) +from polars._utils.parse_expr_input import parse_as_expression +from polars._utils.wrap import wrap_expr +from polars.functions.range._utils import parse_interval_argument with contextlib.suppress(ImportError): # Module not available when building docs import polars.polars as plr diff --git a/py-polars/polars/functions/range/datetime_range.py b/py-polars/polars/functions/range/datetime_range.py index 76321e59b0a5..73a78042f668 100644 --- a/py-polars/polars/functions/range/datetime_range.py +++ b/py-polars/polars/functions/range/datetime_range.py @@ -4,10 +4,10 @@ from typing import TYPE_CHECKING, overload from polars import functions as F +from polars._utils.deprecation import deprecate_saturating +from polars._utils.parse_expr_input import parse_as_expression +from polars._utils.wrap import wrap_expr from polars.functions.range._utils import parse_interval_argument -from polars.utils._parse_expr_input import parse_as_expression -from polars.utils._wrap import wrap_expr -from polars.utils.deprecation import deprecate_saturating with contextlib.suppress(ImportError): # Module not available when building docs import polars.polars as plr diff --git a/py-polars/polars/functions/range/int_range.py b/py-polars/polars/functions/range/int_range.py index c23e0196a8e2..0be6beeed3a6 100644 --- a/py-polars/polars/functions/range/int_range.py +++ b/py-polars/polars/functions/range/int_range.py @@ -4,9 +4,9 @@ from typing import TYPE_CHECKING, overload from polars import functions as F +from polars._utils.parse_expr_input import parse_as_expression +from polars._utils.wrap import wrap_expr, wrap_s from polars.datatypes import Int64 -from polars.utils._parse_expr_input import parse_as_expression -from polars.utils._wrap import wrap_expr, wrap_s with contextlib.suppress(ImportError): # Module not available when building docs import polars.polars as plr diff --git a/py-polars/polars/functions/range/time_range.py b/py-polars/polars/functions/range/time_range.py index 5563e072b357..bef7e8aef58c 100644 --- a/py-polars/polars/functions/range/time_range.py +++ b/py-polars/polars/functions/range/time_range.py @@ -5,10 +5,10 @@ from typing import TYPE_CHECKING, overload from polars import functions as F +from polars._utils.deprecation import deprecate_saturating +from polars._utils.parse_expr_input import parse_as_expression +from polars._utils.wrap import wrap_expr from polars.functions.range._utils import parse_interval_argument -from polars.utils._parse_expr_input import parse_as_expression -from polars.utils._wrap import wrap_expr -from polars.utils.deprecation import deprecate_saturating with contextlib.suppress(ImportError): # Module not available when building docs import polars.polars as plr diff --git a/py-polars/polars/functions/repeat.py b/py-polars/polars/functions/repeat.py index c922a5a05503..ae1dc413c1c1 100644 --- a/py-polars/polars/functions/repeat.py +++ b/py-polars/polars/functions/repeat.py @@ -6,6 +6,8 @@ from typing import TYPE_CHECKING, Any, overload from polars import functions as F +from polars._utils.parse_expr_input import parse_as_expression +from polars._utils.wrap import wrap_expr from polars.datatypes import ( FLOAT_DTYPES, INTEGER_DTYPES, @@ -16,8 +18,6 @@ List, Utf8, ) -from polars.utils._parse_expr_input import parse_as_expression -from polars.utils._wrap import wrap_expr with contextlib.suppress(ImportError): # Module not available when building docs import polars.polars as plr diff --git a/py-polars/polars/functions/whenthen.py b/py-polars/polars/functions/whenthen.py index 77ab37a09c02..2c6254a346d3 100644 --- a/py-polars/polars/functions/whenthen.py +++ b/py-polars/polars/functions/whenthen.py @@ -4,7 +4,7 @@ from typing import TYPE_CHECKING, Any, Iterable import polars._reexport as pl -from polars.utils._parse_expr_input import parse_when_inputs +from polars._utils.parse_expr_input import parse_when_inputs with contextlib.suppress(ImportError): # Module not available when building docs import polars.polars as plr diff --git a/py-polars/polars/io/_utils.py b/py-polars/polars/io/_utils.py index 2efbb3616d48..66afaf637016 100644 --- a/py-polars/polars/io/_utils.py +++ b/py-polars/polars/io/_utils.py @@ -8,9 +8,9 @@ from tempfile import NamedTemporaryFile from typing import IO, Any, ContextManager, Iterator, cast, overload +from polars._utils.various import normalize_filepath from polars.dependencies import _FSSPEC_AVAILABLE, fsspec from polars.exceptions import NoDataError -from polars.utils.various import normalize_filepath def _is_glob_pattern(file: str) -> bool: diff --git a/py-polars/polars/io/csv/batched_reader.py b/py-polars/polars/io/csv/batched_reader.py index a4f416e8a268..201b578be964 100644 --- a/py-polars/polars/io/csv/batched_reader.py +++ b/py-polars/polars/io/csv/batched_reader.py @@ -3,15 +3,15 @@ import contextlib from typing import TYPE_CHECKING, Sequence -from polars.datatypes import N_INFER_DEFAULT, py_type_to_dtype -from polars.io.csv._utils import _update_columns -from polars.utils._wrap import wrap_df -from polars.utils.various import ( +from polars._utils.various import ( _prepare_row_index_args, _process_null_values, handle_projection_columns, normalize_filepath, ) +from polars._utils.wrap import wrap_df +from polars.datatypes import N_INFER_DEFAULT, py_type_to_dtype +from polars.io.csv._utils import _update_columns with contextlib.suppress(ImportError): # Module not available when building docs from polars.polars import PyBatchedCsv diff --git a/py-polars/polars/io/csv/functions.py b/py-polars/polars/io/csv/functions.py index e3ad2cd5c80f..9291345a9a60 100644 --- a/py-polars/polars/io/csv/functions.py +++ b/py-polars/polars/io/csv/functions.py @@ -4,12 +4,12 @@ from typing import IO, TYPE_CHECKING, Any, Callable, Mapping, Sequence import polars._reexport as pl +from polars._utils.deprecation import deprecate_renamed_parameter +from polars._utils.various import handle_projection_columns, normalize_filepath from polars.datatypes import N_INFER_DEFAULT, String from polars.io._utils import _prepare_file_arg from polars.io.csv._utils import _check_arg_is_1byte, _update_columns from polars.io.csv.batched_reader import BatchedCsvReader -from polars.utils.deprecation import deprecate_renamed_parameter -from polars.utils.various import handle_projection_columns, normalize_filepath if TYPE_CHECKING: from polars import DataFrame, LazyFrame diff --git a/py-polars/polars/io/database.py b/py-polars/polars/io/database.py index 76db913aeb02..c7d6d9d3a08c 100644 --- a/py-polars/polars/io/database.py +++ b/py-polars/polars/io/database.py @@ -6,9 +6,9 @@ from inspect import Parameter, signature from typing import TYPE_CHECKING, Any, Iterable, Literal, Sequence, TypedDict, overload +from polars._utils.deprecation import issue_deprecation_warning from polars.convert import from_arrow from polars.exceptions import InvalidOperationError, UnsuitableSQLError -from polars.utils.deprecation import issue_deprecation_warning if TYPE_CHECKING: from types import TracebackType diff --git a/py-polars/polars/io/iceberg.py b/py-polars/polars/io/iceberg.py index fd5d3f6a4aa1..ec57b401c907 100644 --- a/py-polars/polars/io/iceberg.py +++ b/py-polars/polars/io/iceberg.py @@ -21,8 +21,8 @@ from typing import TYPE_CHECKING, Any, Callable import polars._reexport as pl +from polars._utils.convert import to_py_date, to_py_datetime from polars.dependencies import pyiceberg -from polars.utils.convert import to_py_date, to_py_datetime if TYPE_CHECKING: from datetime import date, datetime diff --git a/py-polars/polars/io/ipc/functions.py b/py-polars/polars/io/ipc/functions.py index 411ed43043d9..ddec69601732 100644 --- a/py-polars/polars/io/ipc/functions.py +++ b/py-polars/polars/io/ipc/functions.py @@ -5,10 +5,10 @@ from typing import IO, TYPE_CHECKING, Any import polars._reexport as pl +from polars._utils.deprecation import deprecate_renamed_parameter +from polars._utils.various import normalize_filepath from polars.dependencies import _PYARROW_AVAILABLE from polars.io._utils import _prepare_file_arg -from polars.utils.deprecation import deprecate_renamed_parameter -from polars.utils.various import normalize_filepath with contextlib.suppress(ImportError): from polars.polars import read_ipc_schema as _read_ipc_schema diff --git a/py-polars/polars/io/ndjson.py b/py-polars/polars/io/ndjson.py index d8e5aa9d403a..9d413e6de10d 100644 --- a/py-polars/polars/io/ndjson.py +++ b/py-polars/polars/io/ndjson.py @@ -3,8 +3,8 @@ from typing import TYPE_CHECKING import polars._reexport as pl +from polars._utils.deprecation import deprecate_renamed_parameter from polars.datatypes import N_INFER_DEFAULT -from polars.utils.deprecation import deprecate_renamed_parameter if TYPE_CHECKING: from io import IOBase diff --git a/py-polars/polars/io/parquet/functions.py b/py-polars/polars/io/parquet/functions.py index a80da70569a3..8c0acd413a9f 100644 --- a/py-polars/polars/io/parquet/functions.py +++ b/py-polars/polars/io/parquet/functions.py @@ -6,11 +6,11 @@ from typing import IO, TYPE_CHECKING, Any import polars._reexport as pl +from polars._utils.deprecation import deprecate_renamed_parameter +from polars._utils.various import is_int_sequence, normalize_filepath from polars.convert import from_arrow from polars.dependencies import _PYARROW_AVAILABLE from polars.io._utils import _prepare_file_arg -from polars.utils.deprecation import deprecate_renamed_parameter -from polars.utils.various import is_int_sequence, normalize_filepath with contextlib.suppress(ImportError): from polars.polars import read_parquet_schema as _read_parquet_schema diff --git a/py-polars/polars/io/pyarrow_dataset/anonymous_scan.py b/py-polars/polars/io/pyarrow_dataset/anonymous_scan.py index df639d478a96..5ecaeacff2ca 100644 --- a/py-polars/polars/io/pyarrow_dataset/anonymous_scan.py +++ b/py-polars/polars/io/pyarrow_dataset/anonymous_scan.py @@ -71,13 +71,13 @@ def _scan_pyarrow_dataset_impl( _filter = None if predicate: - from polars.datatypes import Date, Datetime, Duration - from polars.utils.convert import ( + from polars._utils.convert import ( to_py_date, to_py_datetime, to_py_time, to_py_timedelta, ) + from polars.datatypes import Date, Datetime, Duration _filter = eval( predicate, diff --git a/py-polars/polars/io/pyarrow_dataset/functions.py b/py-polars/polars/io/pyarrow_dataset/functions.py index f1d6edf8b1eb..2d23a95bddfd 100644 --- a/py-polars/polars/io/pyarrow_dataset/functions.py +++ b/py-polars/polars/io/pyarrow_dataset/functions.py @@ -2,8 +2,8 @@ from typing import TYPE_CHECKING +from polars._utils.unstable import unstable from polars.io.pyarrow_dataset.anonymous_scan import _scan_pyarrow_dataset -from polars.utils.unstable import unstable if TYPE_CHECKING: from polars import LazyFrame diff --git a/py-polars/polars/io/spreadsheet/functions.py b/py-polars/polars/io/spreadsheet/functions.py index 1053e4b6f2cf..3dccdb347b0a 100644 --- a/py-polars/polars/io/spreadsheet/functions.py +++ b/py-polars/polars/io/spreadsheet/functions.py @@ -9,6 +9,8 @@ import polars._reexport as pl from polars import functions as F +from polars._utils.deprecation import deprecate_renamed_parameter +from polars._utils.various import normalize_filepath from polars.datatypes import ( FLOAT_DTYPES, NUMERIC_DTYPES, @@ -22,8 +24,6 @@ from polars.exceptions import NoDataError, ParameterCollisionError from polars.io._utils import PortableTemporaryFile, _looks_like_url, _process_file_url from polars.io.csv.functions import read_csv -from polars.utils.deprecation import deprecate_renamed_parameter -from polars.utils.various import normalize_filepath if TYPE_CHECKING: from typing import Literal diff --git a/py-polars/polars/lazyframe/frame.py b/py-polars/polars/lazyframe/frame.py index 0d1d8ed8c3e4..01611c1a88db 100644 --- a/py-polars/polars/lazyframe/frame.py +++ b/py-polars/polars/lazyframe/frame.py @@ -24,6 +24,31 @@ import polars._reexport as pl from polars import functions as F +from polars._utils.async_ import _AioDataFrameResult, _GeventDataFrameResult +from polars._utils.convert import negate_duration_string, parse_as_duration_string +from polars._utils.deprecation import ( + deprecate_function, + deprecate_parameter_as_positional, + deprecate_renamed_function, + deprecate_renamed_parameter, + deprecate_saturating, + issue_deprecation_warning, +) +from polars._utils.parse_expr_input import ( + parse_as_expression, + parse_as_list_of_expressions, +) +from polars._utils.unstable import issue_unstable_warning, unstable +from polars._utils.various import ( + _in_notebook, + _prepare_row_index_args, + _process_null_values, + is_bool_sequence, + is_sequence, + normalize_filepath, + parse_percentiles, +) +from polars._utils.wrap import wrap_df, wrap_expr from polars.convert import from_dict from polars.datatypes import ( DTYPE_TEMPORAL_UNITS, @@ -62,31 +87,6 @@ from polars.lazyframe.in_process import InProcessQuery from polars.selectors import _expand_selectors, by_dtype, expand_selector from polars.slice import LazyPolarsSlice -from polars.utils._async import _AioDataFrameResult, _GeventDataFrameResult -from polars.utils._parse_expr_input import ( - parse_as_expression, - parse_as_list_of_expressions, -) -from polars.utils._wrap import wrap_df, wrap_expr -from polars.utils.convert import negate_duration_string, parse_as_duration_string -from polars.utils.deprecation import ( - deprecate_function, - deprecate_parameter_as_positional, - deprecate_renamed_function, - deprecate_renamed_parameter, - deprecate_saturating, - issue_deprecation_warning, -) -from polars.utils.unstable import issue_unstable_warning, unstable -from polars.utils.various import ( - _in_notebook, - _prepare_row_index_args, - _process_null_values, - is_bool_sequence, - is_sequence, - normalize_filepath, - parse_percentiles, -) with contextlib.suppress(ImportError): # Module not available when building docs from polars.polars import PyLazyFrame diff --git a/py-polars/polars/lazyframe/group_by.py b/py-polars/polars/lazyframe/group_by.py index ca6b712bc481..72a9a8f22daa 100644 --- a/py-polars/polars/lazyframe/group_by.py +++ b/py-polars/polars/lazyframe/group_by.py @@ -3,9 +3,9 @@ from typing import TYPE_CHECKING, Callable, Iterable from polars import functions as F -from polars.utils._parse_expr_input import parse_as_list_of_expressions -from polars.utils._wrap import wrap_ldf -from polars.utils.deprecation import deprecate_renamed_function +from polars._utils.deprecation import deprecate_renamed_function +from polars._utils.parse_expr_input import parse_as_list_of_expressions +from polars._utils.wrap import wrap_ldf if TYPE_CHECKING: from polars import DataFrame, LazyFrame diff --git a/py-polars/polars/lazyframe/in_process.py b/py-polars/polars/lazyframe/in_process.py index 3c04020a2f68..9bcee8dccfa0 100644 --- a/py-polars/polars/lazyframe/in_process.py +++ b/py-polars/polars/lazyframe/in_process.py @@ -2,7 +2,7 @@ from typing import TYPE_CHECKING -from polars.utils._wrap import wrap_df +from polars._utils.wrap import wrap_df if TYPE_CHECKING: from polars import DataFrame diff --git a/py-polars/polars/meta/build.py b/py-polars/polars/meta/build.py index d38d92fc4414..4b6abcf5fcf9 100644 --- a/py-polars/polars/meta/build.py +++ b/py-polars/polars/meta/build.py @@ -2,7 +2,7 @@ from typing import Any -from polars.utils._polars_version import get_polars_version +from polars._utils.polars_version import get_polars_version try: from polars.polars import __build__ diff --git a/py-polars/polars/meta/thread_pool.py b/py-polars/polars/meta/thread_pool.py index 446eb486ceb2..c86f358e7e6e 100644 --- a/py-polars/polars/meta/thread_pool.py +++ b/py-polars/polars/meta/thread_pool.py @@ -2,7 +2,7 @@ import contextlib -from polars.utils.deprecation import deprecate_renamed_function +from polars._utils.deprecation import deprecate_renamed_function with contextlib.suppress(ImportError): # Module not available when building docs import polars.polars as plr diff --git a/py-polars/polars/meta/versions.py b/py-polars/polars/meta/versions.py index 305b2e2a17f8..e2cfaba48e60 100644 --- a/py-polars/polars/meta/versions.py +++ b/py-polars/polars/meta/versions.py @@ -2,8 +2,8 @@ import sys +from polars._utils.polars_version import get_polars_version from polars.meta.index_type import get_index_type -from polars.utils._polars_version import get_polars_version def show_versions() -> None: diff --git a/py-polars/polars/selectors.py b/py-polars/polars/selectors.py index 846dd4472bd6..ac552dc2e0b7 100644 --- a/py-polars/polars/selectors.py +++ b/py-polars/polars/selectors.py @@ -7,6 +7,9 @@ from typing import TYPE_CHECKING, Any, Collection, Literal, Mapping, overload from polars import functions as F +from polars._utils.deprecation import deprecate_nonkeyword_arguments +from polars._utils.parse_expr_input import _parse_inputs_as_iterable +from polars._utils.various import is_column from polars.datatypes import ( FLOAT_DTYPES, INTEGER_DTYPES, @@ -27,9 +30,6 @@ is_polars_dtype, ) from polars.expr import Expr -from polars.utils._parse_expr_input import _parse_inputs_as_iterable -from polars.utils.deprecation import deprecate_nonkeyword_arguments -from polars.utils.various import is_column if TYPE_CHECKING: import sys diff --git a/py-polars/polars/series/array.py b/py-polars/polars/series/array.py index 4a547485f962..04c88f701575 100644 --- a/py-polars/polars/series/array.py +++ b/py-polars/polars/series/array.py @@ -3,8 +3,8 @@ from typing import TYPE_CHECKING, Callable, Sequence from polars import functions as F +from polars._utils.wrap import wrap_s from polars.series.utils import expr_dispatch -from polars.utils._wrap import wrap_s if TYPE_CHECKING: from datetime import date, datetime, time diff --git a/py-polars/polars/series/categorical.py b/py-polars/polars/series/categorical.py index 03057ea81f98..204a125853a9 100644 --- a/py-polars/polars/series/categorical.py +++ b/py-polars/polars/series/categorical.py @@ -2,10 +2,10 @@ from typing import TYPE_CHECKING +from polars._utils.deprecation import deprecate_function +from polars._utils.unstable import unstable +from polars._utils.wrap import wrap_s from polars.series.utils import expr_dispatch -from polars.utils._wrap import wrap_s -from polars.utils.deprecation import deprecate_function -from polars.utils.unstable import unstable if TYPE_CHECKING: from polars import Series diff --git a/py-polars/polars/series/datetime.py b/py-polars/polars/series/datetime.py index f4b60d80dc1c..2a8628b2e94c 100644 --- a/py-polars/polars/series/datetime.py +++ b/py-polars/polars/series/datetime.py @@ -2,12 +2,12 @@ from typing import TYPE_CHECKING +from polars._utils.convert import to_py_date, to_py_datetime +from polars._utils.deprecation import deprecate_function, deprecate_renamed_function +from polars._utils.unstable import unstable +from polars._utils.wrap import wrap_s from polars.datatypes import Date, Datetime, Duration from polars.series.utils import expr_dispatch -from polars.utils._wrap import wrap_s -from polars.utils.convert import to_py_date, to_py_datetime -from polars.utils.deprecation import deprecate_function, deprecate_renamed_function -from polars.utils.unstable import unstable if TYPE_CHECKING: import datetime as dt diff --git a/py-polars/polars/series/list.py b/py-polars/polars/series/list.py index e7c5eb2f0828..ca89c0ceea2d 100644 --- a/py-polars/polars/series/list.py +++ b/py-polars/polars/series/list.py @@ -3,12 +3,12 @@ from typing import TYPE_CHECKING, Any, Callable, Sequence from polars import functions as F -from polars.series.utils import expr_dispatch -from polars.utils._wrap import wrap_s -from polars.utils.deprecation import ( +from polars._utils.deprecation import ( deprecate_renamed_function, deprecate_renamed_parameter, ) +from polars._utils.wrap import wrap_s +from polars.series.utils import expr_dispatch if TYPE_CHECKING: from datetime import date, datetime, time diff --git a/py-polars/polars/series/series.py b/py-polars/polars/series/series.py index e2500eb91c87..36af2fd1c70b 100644 --- a/py-polars/polars/series/series.py +++ b/py-polars/polars/series/series.py @@ -22,6 +22,41 @@ import polars._reexport as pl from polars import functions as F +from polars._utils.construction import ( + arrow_to_pyseries, + dataframe_to_pyseries, + iterable_to_pyseries, + numpy_to_idxs, + numpy_to_pyseries, + pandas_to_pyseries, + sequence_to_pyseries, + series_to_pyseries, +) +from polars._utils.convert import ( + date_to_int, + datetime_to_int, + time_to_int, + timedelta_to_int, +) +from polars._utils.deprecation import ( + deprecate_function, + deprecate_nonkeyword_arguments, + deprecate_renamed_function, + deprecate_renamed_parameter, + issue_deprecation_warning, +) +from polars._utils.unstable import unstable +from polars._utils.various import ( + BUILDING_SPHINX_DOCS, + _is_generator, + no_default, + parse_version, + range_to_slice, + scale_bytes, + sphinx_accessor, + warn_null_comparison, +) +from polars._utils.wrap import wrap_df from polars.datatypes import ( Array, Boolean, @@ -74,41 +109,6 @@ from polars.series.struct import StructNameSpace from polars.series.utils import expr_dispatch, get_ffi_func from polars.slice import PolarsSlice -from polars.utils._construction import ( - arrow_to_pyseries, - dataframe_to_pyseries, - iterable_to_pyseries, - numpy_to_idxs, - numpy_to_pyseries, - pandas_to_pyseries, - sequence_to_pyseries, - series_to_pyseries, -) -from polars.utils._wrap import wrap_df -from polars.utils.convert import ( - date_to_int, - datetime_to_int, - time_to_int, - timedelta_to_int, -) -from polars.utils.deprecation import ( - deprecate_function, - deprecate_nonkeyword_arguments, - deprecate_renamed_function, - deprecate_renamed_parameter, - issue_deprecation_warning, -) -from polars.utils.unstable import unstable -from polars.utils.various import ( - BUILDING_SPHINX_DOCS, - _is_generator, - no_default, - parse_version, - range_to_slice, - scale_bytes, - sphinx_accessor, - warn_null_comparison, -) with contextlib.suppress(ImportError): # Module not available when building docs from polars.polars import PyDataFrame, PySeries @@ -119,6 +119,9 @@ from hvplot.plotting.core import hvPlotTabularPolars from polars import DataFrame, DataType, Expr + from polars._utils.various import ( + NoDefault, + ) from polars.series._numpy import SeriesView from polars.type_aliases import ( BufferInfo, @@ -140,9 +143,6 @@ SizeUnit, TemporalLiteral, ) - from polars.utils.various import ( - NoDefault, - ) if sys.version_info >= (3, 11): from typing import Self @@ -5289,7 +5289,7 @@ def map_elements( ------- Series """ - from polars.utils.udfs import warn_on_inefficient_map + from polars._utils.udfs import warn_on_inefficient_map if return_dtype is None: pl_return_dtype = None diff --git a/py-polars/polars/series/string.py b/py-polars/polars/series/string.py index 05e385dc5fad..d8cdbef21c4d 100644 --- a/py-polars/polars/series/string.py +++ b/py-polars/polars/series/string.py @@ -2,12 +2,12 @@ from typing import TYPE_CHECKING -from polars.datatypes.constants import N_INFER_DEFAULT -from polars.series.utils import expr_dispatch -from polars.utils.deprecation import ( +from polars._utils.deprecation import ( deprecate_renamed_function, deprecate_renamed_parameter, ) +from polars.datatypes.constants import N_INFER_DEFAULT +from polars.series.utils import expr_dispatch if TYPE_CHECKING: from polars import Expr, Series diff --git a/py-polars/polars/series/struct.py b/py-polars/polars/series/struct.py index b0fe9f4e22b9..3d95f2109c4c 100644 --- a/py-polars/polars/series/struct.py +++ b/py-polars/polars/series/struct.py @@ -3,9 +3,9 @@ from collections import OrderedDict from typing import TYPE_CHECKING, Sequence +from polars._utils.various import BUILDING_SPHINX_DOCS, sphinx_accessor +from polars._utils.wrap import wrap_df from polars.series.utils import expr_dispatch -from polars.utils._wrap import wrap_df -from polars.utils.various import BUILDING_SPHINX_DOCS, sphinx_accessor if TYPE_CHECKING: from polars import DataFrame, DataType, Series diff --git a/py-polars/polars/series/utils.py b/py-polars/polars/series/utils.py index fb2f1440fb7a..237b55a396da 100644 --- a/py-polars/polars/series/utils.py +++ b/py-polars/polars/series/utils.py @@ -7,8 +7,8 @@ import polars._reexport as pl from polars import functions as F +from polars._utils.wrap import wrap_s from polars.datatypes import dtype_to_ffiname -from polars.utils._wrap import wrap_s if TYPE_CHECKING: from polars import Series diff --git a/py-polars/polars/sql/context.py b/py-polars/polars/sql/context.py index afbd1dcea4c1..a7129091cb63 100644 --- a/py-polars/polars/sql/context.py +++ b/py-polars/polars/sql/context.py @@ -3,12 +3,12 @@ import contextlib from typing import TYPE_CHECKING, Collection, Generic, Mapping, overload +from polars._utils.unstable import issue_unstable_warning +from polars._utils.various import _get_stack_locals +from polars._utils.wrap import wrap_ldf from polars.dataframe import DataFrame from polars.lazyframe import LazyFrame from polars.type_aliases import FrameType -from polars.utils._wrap import wrap_ldf -from polars.utils.unstable import issue_unstable_warning -from polars.utils.various import _get_stack_locals with contextlib.suppress(ImportError): # Module not available when building docs from polars.polars import PySQLContext diff --git a/py-polars/polars/string_cache.py b/py-polars/polars/string_cache.py index dbf15d6244e8..6955d7da5e8e 100644 --- a/py-polars/polars/string_cache.py +++ b/py-polars/polars/string_cache.py @@ -3,7 +3,7 @@ import contextlib from typing import TYPE_CHECKING -from polars.utils.deprecation import issue_deprecation_warning +from polars._utils.deprecation import issue_deprecation_warning with contextlib.suppress(ImportError): # Module not available when building docs import polars.polars as plr diff --git a/py-polars/polars/testing/parametric/profiles.py b/py-polars/polars/testing/parametric/profiles.py index c1afda1c2bfe..76682af6d7f2 100644 --- a/py-polars/polars/testing/parametric/profiles.py +++ b/py-polars/polars/testing/parametric/profiles.py @@ -5,8 +5,8 @@ from hypothesis import settings +from polars._utils.deprecation import deprecate_nonkeyword_arguments from polars.type_aliases import ParametricProfileNames -from polars.utils.deprecation import deprecate_nonkeyword_arguments @deprecate_nonkeyword_arguments(allowed_args=["profile"], version="0.19.3") diff --git a/py-polars/polars/utils/__init__.py b/py-polars/polars/utils/__init__.py index db6c1085bc14..6efa5aaa60c0 100644 --- a/py-polars/polars/utils/__init__.py +++ b/py-polars/polars/utils/__init__.py @@ -1,36 +1 @@ -""" -Utility functions. - -Functions that are part of the public API are re-exported here. -""" -from polars.utils._scan import _execute_from_rust -from polars.utils.convert import ( - date_to_int, - datetime_to_int, - time_to_int, - timedelta_to_int, - to_py_date, - to_py_datetime, - to_py_decimal, - to_py_time, - to_py_timedelta, -) -from polars.utils.various import NoDefault, _polars_warn, is_column, no_default - -__all__ = [ - "NoDefault", - "is_column", - "no_default", - # Required for Rust bindings - "date_to_int", - "datetime_to_int", - "time_to_int", - "timedelta_to_int", - "_execute_from_rust", - "_polars_warn", - "to_py_date", - "to_py_datetime", - "to_py_decimal", - "to_py_time", - "to_py_timedelta", -] +"""Deprecated module. Do not use.""" diff --git a/py-polars/polars/utils/udfs.py b/py-polars/polars/utils/udfs.py index 57c58094dbef..398197ecef1a 100644 --- a/py-polars/polars/utils/udfs.py +++ b/py-polars/polars/utils/udfs.py @@ -1,993 +1,17 @@ -"""Utilities related to user defined functions (such as those passed to `apply`).""" -from __future__ import annotations +"""Deprecated module. Do not use.""" -import datetime -import dis -import inspect -import re -import sys -import warnings -from bisect import bisect_left -from collections import defaultdict -from dis import get_instructions -from inspect import signature -from itertools import count, zip_longest -from pathlib import Path -from typing import ( - TYPE_CHECKING, - AbstractSet, - Any, - Callable, - ClassVar, - Iterator, - Literal, - NamedTuple, - Union, -) +import os +from typing import Any -from polars.utils.various import re_escape - -if TYPE_CHECKING: - from dis import Instruction - - if sys.version_info >= (3, 10): - from typing import TypeAlias - else: - from typing_extensions import TypeAlias - - -class StackValue(NamedTuple): - operator: str - operator_arity: int - left_operand: str - right_operand: str - - -MapTarget: TypeAlias = Literal["expr", "frame", "series"] -StackEntry: TypeAlias = Union[str, StackValue] - -_MIN_PY311 = sys.version_info >= (3, 11) -_MIN_PY312 = _MIN_PY311 and sys.version_info >= (3, 12) - - -class OpNames: - BINARY: ClassVar[dict[str, str]] = { - "BINARY_ADD": "+", - "BINARY_AND": "&", - "BINARY_FLOOR_DIVIDE": "//", - "BINARY_LSHIFT": "<<", - "BINARY_RSHIFT": ">>", - "BINARY_MODULO": "%", - "BINARY_MULTIPLY": "*", - "BINARY_OR": "|", - "BINARY_POWER": "**", - "BINARY_SUBTRACT": "-", - "BINARY_TRUE_DIVIDE": "/", - "BINARY_XOR": "^", - } - CALL = frozenset({"CALL"} if _MIN_PY311 else {"CALL_FUNCTION", "CALL_METHOD"}) - CONTROL_FLOW: ClassVar[dict[str, str]] = ( - { - "POP_JUMP_FORWARD_IF_FALSE": "&", - "POP_JUMP_FORWARD_IF_TRUE": "|", - "JUMP_IF_FALSE_OR_POP": "&", - "JUMP_IF_TRUE_OR_POP": "|", - } - # note: 3.12 dropped POP_JUMP_FORWARD_IF_* opcodes - if _MIN_PY311 and not _MIN_PY312 - else { - "POP_JUMP_IF_FALSE": "&", - "POP_JUMP_IF_TRUE": "|", - "JUMP_IF_FALSE_OR_POP": "&", - "JUMP_IF_TRUE_OR_POP": "|", - } - ) - LOAD_VALUES = frozenset(("LOAD_CONST", "LOAD_DEREF", "LOAD_FAST", "LOAD_GLOBAL")) - LOAD_ATTR = frozenset({"LOAD_METHOD", "LOAD_ATTR"}) - LOAD = LOAD_VALUES | LOAD_ATTR - SYNTHETIC: ClassVar[dict[str, int]] = { - "POLARS_EXPRESSION": 1, - } - UNARY: ClassVar[dict[str, str]] = { - "UNARY_NEGATIVE": "-", - "UNARY_POSITIVE": "+", - "UNARY_NOT": "~", - } - PARSEABLE_OPS = frozenset( - {"BINARY_OP", "BINARY_SUBSCR", "COMPARE_OP", "CONTAINS_OP", "IS_OP"} - | set(UNARY) - | set(CONTROL_FLOW) - | set(SYNTHETIC) - | LOAD_VALUES - ) - UNARY_VALUES = frozenset(UNARY.values()) - - -# numpy functions that we can map to native expressions -_NUMPY_MODULE_ALIASES = frozenset(("np", "numpy")) -_NUMPY_FUNCTIONS = frozenset( - ( - # "abs", # TODO: this one clashes with Python builtin abs - "arccos", - "arccosh", - "arcsin", - "arcsinh", - "arctan", - "arctanh", - "cbrt", - "ceil", - "cos", - "cosh", - "degrees", - "exp", - "floor", - "log", - "log10", - "log1p", - "radians", - "sign", - "sin", - "sinh", - "sqrt", - "tan", - "tanh", - ) -) - -# python attrs/funcs that map to native expressions -_PYTHON_ATTRS_MAP = { - "date": "dt.date()", - "day": "dt.day()", - "hour": "dt.hour()", - "microsecond": "dt.microsecond()", - "minute": "dt.minute()", - "month": "dt.month()", - "second": "dt.second()", - "year": "dt.year()", -} -_PYTHON_CASTS_MAP = {"float": "Float64", "int": "Int64", "str": "String"} -_PYTHON_BUILTINS = frozenset(_PYTHON_CASTS_MAP) | {"abs"} -_PYTHON_METHODS_MAP = { - # string - "endswith": "str.ends_with", - "lower": "str.to_lowercase", - "lstrip": "str.strip_chars_start", - "rstrip": "str.strip_chars_end", - "startswith": "str.starts_with", - "strip": "str.strip_chars", - "title": "str.to_titlecase", - "upper": "str.to_uppercase", - # temporal - "date": "dt.date", - "isoweekday": "dt.weekday", - "time": "dt.time", -} - -_MODULE_FUNCTIONS: list[dict[str, list[AbstractSet[str]]]] = [ - # lambda x: numpy.func(x) - # lambda x: numpy.func(CONSTANT) - { - "argument_1_opname": [{"LOAD_FAST", "LOAD_CONST"}], - "argument_2_opname": [], - "module_opname": [OpNames.LOAD_ATTR], - "attribute_opname": [], - "module_name": [_NUMPY_MODULE_ALIASES], - "attribute_name": [], - "function_name": [_NUMPY_FUNCTIONS], - }, - # lambda x: json.loads(x) - { - "argument_1_opname": [{"LOAD_FAST"}], - "argument_2_opname": [], - "module_opname": [OpNames.LOAD_ATTR], - "attribute_opname": [], - "module_name": [{"json"}], - "attribute_name": [], - "function_name": [{"loads"}], - }, - # lambda x: datetime.strptime(x, CONSTANT) - { - "argument_1_opname": [{"LOAD_FAST"}], - "argument_2_opname": [{"LOAD_CONST"}], - "module_opname": [OpNames.LOAD_ATTR], - "attribute_opname": [], - "module_name": [{"datetime"}], - "attribute_name": [], - "function_name": [{"strptime"}], - }, - # lambda x: module.attribute.func(x, CONSTANT) - { - "argument_1_opname": [{"LOAD_FAST"}], - "argument_2_opname": [{"LOAD_CONST"}], - "module_opname": [{"LOAD_ATTR"}], - "attribute_opname": [OpNames.LOAD_ATTR], - "module_name": [{"datetime", "dt"}], - "attribute_name": [{"datetime"}], - "function_name": [{"strptime"}], - }, -] -# In addition to `lambda x: func(x)`, also support cases when a unary operation -# has been applied to `x`, like `lambda x: func(-x)` or `lambda x: func(~x)`. -_MODULE_FUNCTIONS = [ - {**kind, "argument_1_unary_opname": unary} # type: ignore[dict-item] - for kind in _MODULE_FUNCTIONS - for unary in [[set(OpNames.UNARY)], []] -] -_RE_IMPLICIT_BOOL = re.compile(r'pl\.col\("([^"]*)"\) & pl\.col\("\1"\)\.(.+)') - - -def _get_all_caller_variables() -> dict[str, Any]: - """Get all local and global variables from caller's frame.""" - pkg_dir = Path(__file__).parent.parent - - # https://stackoverflow.com/questions/17407119/python-inspect-stack-is-slow - frame = inspect.currentframe() - n = 0 - try: - while frame: - fname = inspect.getfile(frame) - if fname.startswith(str(pkg_dir)): - frame = frame.f_back - n += 1 - else: - break - variables: dict[str, Any] - if frame is None: - variables = {} - else: - variables = {**frame.f_locals, **frame.f_globals} - finally: - # https://docs.python.org/3/library/inspect.html - # > Though the cycle detector will catch these, destruction of the frames - # > (and local variables) can be made deterministic by removing the cycle - # > in a finally clause. - del frame - return variables - - -class BytecodeParser: - """Introspect UDF bytecode and determine if we can rewrite as native expression.""" - - _map_target_name: str | None = None - - def __init__(self, function: Callable[[Any], Any], map_target: MapTarget): - try: - original_instructions = get_instructions(function) - except TypeError: - # in case we hit something that can't be disassembled (eg: code object - # unavailable, like a bare numpy ufunc that isn't in a lambda/function) - original_instructions = iter([]) - - self._function = function - self._map_target = map_target - self._param_name = self._get_param_name(function) - self._rewritten_instructions = RewrittenInstructions( - instructions=original_instructions, - ) - - def _omit_implicit_bool(self, expr: str) -> str: - """Drop extraneous/implied bool (eg: `pl.col("d") & pl.col("d").dt.date()`).""" - while _RE_IMPLICIT_BOOL.search(expr): - expr = _RE_IMPLICIT_BOOL.sub(repl=r'pl.col("\1").\2', string=expr) - return expr - - @staticmethod - def _get_param_name(function: Callable[[Any], Any]) -> str | None: - """Return single function parameter name.""" - try: - # note: we do not parse/handle functions with > 1 params - sig = signature(function) - except ValueError: - return None - return ( - next(iter(parameters.keys())) - if len(parameters := sig.parameters) == 1 - else None - ) - - def _inject_nesting( - self, - expression_blocks: dict[int, str], - logical_instructions: list[Instruction], - ) -> list[tuple[int, str]]: - """Inject nesting boundaries into expression blocks (as parentheses).""" - if logical_instructions: - # reconstruct nesting boundaries for mixed and/or ops by associating control - # flow jump offsets with their target expression blocks and applying parens - if len({inst.opname for inst in logical_instructions}) > 1: - block_offsets: list[int] = list(expression_blocks.keys()) - prev_end = -1 - for inst in logical_instructions: - start = block_offsets[bisect_left(block_offsets, inst.offset) - 1] - end = block_offsets[bisect_left(block_offsets, inst.argval) - 1] - if not (start == 0 and end == block_offsets[-1]): - if prev_end not in (start, end): - expression_blocks[start] = "(" + expression_blocks[start] - expression_blocks[end] += ")" - prev_end = end - - for inst in logical_instructions: # inject connecting "&" and "|" ops - expression_blocks[inst.offset] = OpNames.CONTROL_FLOW[inst.opname] - - return sorted(expression_blocks.items()) - - def _get_target_name(self, col: str, expression: str) -> str: - """The name of the object against which the 'map' is being invoked.""" - if self._map_target_name is not None: - return self._map_target_name - else: - col_expr = f'pl.col("{col}")' - if self._map_target == "expr": - return col_expr - elif self._map_target == "series": - # note: handle overlapping name from global variables; fallback - # through "s", "srs", "series" and (finally) srs0 -> srsN... - search_expr = expression.replace(col_expr, "") - for name in ("s", "srs", "series"): - if not re.search(rf"\b{name}\b", search_expr): - self._map_target_name = name - return name - n = count() - while True: - name = f"srs{next(n)}" - if not re.search(rf"\b{name}\b", search_expr): - self._map_target_name = name - return name - - msg = f"TODO: map_target = {self._map_target!r}" - raise NotImplementedError(msg) - - @property - def map_target(self) -> MapTarget: - """The map target, eg: one of 'expr', 'frame', or 'series'.""" - return self._map_target - - def can_attempt_rewrite(self) -> bool: - """ - Determine if we may be able to offer a native polars expression instead. - - Note that `lambda x: x` is inefficient, but we ignore it because it is not - guaranteed that using the equivalent bare constant value will return the - same output. (Hopefully nobody is writing lambdas like that anyway...) - """ - return ( - self._param_name is not None - # check minimum number of ops, ensuring all are parseable - and len(self._rewritten_instructions) >= 2 - and all( - inst.opname in OpNames.PARSEABLE_OPS - for inst in self._rewritten_instructions - ) - # exclude constructs/functions with multiple RETURN_VALUE ops - and sum( - 1 - for inst in self.original_instructions - if inst.opname == "RETURN_VALUE" - ) - == 1 - ) - - def dis(self) -> None: - """Print disassembled function bytecode.""" - dis.dis(self._function) - - @property - def function(self) -> Callable[[Any], Any]: - """The function being parsed.""" - return self._function - - @property - def original_instructions(self) -> list[Instruction]: - """The original bytecode instructions from the function we are parsing.""" - return list(self._rewritten_instructions._original_instructions) - - @property - def param_name(self) -> str | None: - """The parameter name of the function being parsed.""" - return self._param_name - - @property - def rewritten_instructions(self) -> list[Instruction]: - """The rewritten bytecode instructions from the function we are parsing.""" - return list(self._rewritten_instructions) - - def to_expression(self, col: str) -> str | None: - """Translate postfix bytecode instructions to polars expression/string.""" - self._map_target_name = None - if self._param_name is None: - return None - - # decompose bytecode into logical 'and'/'or' expression blocks (if present) - control_flow_blocks = defaultdict(list) - logical_instructions = [] - jump_offset = 0 - for idx, inst in enumerate(self._rewritten_instructions): - if inst.opname in OpNames.CONTROL_FLOW: - jump_offset = self._rewritten_instructions[idx + 1].offset - logical_instructions.append(inst) - else: - control_flow_blocks[jump_offset].append(inst) - - # convert each block to a polars expression string - caller_variables: dict[str, Any] = {} - try: - expression_strings = self._inject_nesting( - { - offset: InstructionTranslator( - instructions=ops, - caller_variables=caller_variables, - map_target=self._map_target, - ).to_expression( - col=col, - param_name=self._param_name, - depth=int(bool(logical_instructions)), - ) - for offset, ops in control_flow_blocks.items() - }, - logical_instructions, - ) - except NotImplementedError: - return None - polars_expr = " ".join(expr for _offset, expr in expression_strings) - - # note: if no 'pl.col' in the expression, it likely represents a compound - # constant value (e.g. `lambda x: CONST + 123`), so we don't want to warn - if "pl.col(" not in polars_expr: - return None - else: - polars_expr = self._omit_implicit_bool(polars_expr) - if self._map_target == "series": - target_name = self._get_target_name(col, polars_expr) - return polars_expr.replace(f'pl.col("{col}")', target_name) - else: - return polars_expr - - def warn( - self, - col: str, - suggestion_override: str | None = None, - udf_override: str | None = None, - ) -> None: - """Generate warning that suggests an equivalent native polars expression.""" - # Import these here so that udfs can be imported without polars installed. - - from polars.exceptions import PolarsInefficientMapWarning - from polars.utils.various import ( - find_stacklevel, - in_terminal_that_supports_colour, - ) - - suggested_expression = suggestion_override or self.to_expression(col) - - if suggested_expression is not None: - target_name = self._get_target_name(col, suggested_expression) - func_name = udf_override or self._function.__name__ or "..." - if func_name == "": - func_name = f"lambda {self._param_name}: ..." - - addendum = ( - 'Note: in list.eval context, pl.col("") should be written as pl.element()' - if 'pl.col("")' in suggested_expression - else "" - ) - if self._map_target == "expr": - apitype = "expressions" - clsname = "Expr" - else: - apitype = "series" - clsname = "Series" - - before, after = ( - ( - f" \033[31m- {target_name}.map_elements({func_name})\033[0m\n", - f" \033[32m+ {suggested_expression}\033[0m\n{addendum}", - ) - if in_terminal_that_supports_colour() - else ( - f" - {target_name}.map_elements({func_name})\n", - f" + {suggested_expression}\n{addendum}", - ) - ) - warnings.warn( - f"\n{clsname}.map_elements is significantly slower than the native {apitype} API.\n" - "Only use if you absolutely CANNOT implement your logic otherwise.\n" - "Replace this expression...\n" - f"{before}" - "with this one instead:\n" - f"{after}", - PolarsInefficientMapWarning, - stacklevel=find_stacklevel(), - ) - - -class InstructionTranslator: - """Translates Instruction bytecode to a polars expression string.""" - - def __init__( - self, - instructions: list[Instruction], - caller_variables: dict[str, Any], - map_target: MapTarget, - ) -> None: - self._caller_variables: dict[str, Any] = caller_variables - self._stack = self._to_intermediate_stack(instructions, map_target) - - def to_expression(self, col: str, param_name: str, depth: int) -> str: - """Convert intermediate stack to polars expression string.""" - return self._expr(self._stack, col, param_name, depth) - - @staticmethod - def op(inst: Instruction) -> str: - """Convert bytecode instruction to suitable intermediate op string.""" - if inst.opname in OpNames.CONTROL_FLOW: - return OpNames.CONTROL_FLOW[inst.opname] - elif inst.argrepr: - return inst.argrepr - elif inst.opname == "IS_OP": - return "is not" if inst.argval else "is" - elif inst.opname == "CONTAINS_OP": - return "not in" if inst.argval else "in" - elif inst.opname in OpNames.UNARY: - return OpNames.UNARY[inst.opname] - elif inst.opname == "BINARY_SUBSCR": - return "replace" - else: - msg = ( - "unrecognized opname" - "\n\nPlease report a bug to https://github.com/pola-rs/polars/issues" - " with the content of function you were passing to `map` and the" - f" following instruction object:\n{inst!r}" - ) - raise AssertionError(msg) - - def _expr(self, value: StackEntry, col: str, param_name: str, depth: int) -> str: - """Take stack entry value and convert to polars expression string.""" - if isinstance(value, StackValue): - op = value.operator - e1 = self._expr(value.left_operand, col, param_name, depth + 1) - if value.operator_arity == 1: - if op not in OpNames.UNARY_VALUES: - if e1.startswith("pl.col("): - call = "" if op.endswith(")") else "()" - return f"{e1}.{op}{call}" - if e1[0] in OpNames.UNARY_VALUES and e1[1:].startswith("pl.col("): - call = "" if op.endswith(")") else "()" - return f"({e1}).{op}{call}" - - # support use of consts as numpy/builtin params, eg: - # "np.sin(3) + np.cos(x)", or "len('const_string') + len(x)" - pfx = "np." if op in _NUMPY_FUNCTIONS else "" - return f"{pfx}{op}({e1})" - return f"{op}{e1}" - else: - e2 = self._expr(value.right_operand, col, param_name, depth + 1) - if op in ("is", "is not") and value[2] == "None": - not_ = "" if op == "is" else "not_" - return f"{e1}.is_{not_}null()" - elif op in ("in", "not in"): - not_ = "" if op == "in" else "~" - return ( - f"{not_}({e1}.is_in({e2}))" - if " " in e1 - else f"{not_}{e1}.is_in({e2})" - ) - elif op == "replace": - if not self._caller_variables: - self._caller_variables.update(_get_all_caller_variables()) - if not isinstance(self._caller_variables.get(e1, None), dict): - msg = "require dict mapping" - raise NotImplementedError(msg) - return f"{e2}.{op}({e1})" - elif op == "<<": - # Result of 2**e2 might be float is e2 was negative. - # But, if e1 << e2 was valid, then e2 must have been positive. - # Hence, the output of 2**e2 can be safely cast to Int64, which - # may be necessary if chaining operations which assume Int64 output. - return f"({e1} * 2**{e2}).cast(pl.Int64)" - elif op == ">>": - # Motivation for the cast is the same as in the '<<' case above. - return f"({e1} / 2**{e2}).cast(pl.Int64)" - else: - expr = f"{e1} {op} {e2}" - return f"({expr})" if depth else expr - - elif value == param_name: - return f'pl.col("{col}")' - - return value - - def _to_intermediate_stack( - self, instructions: list[Instruction], map_target: MapTarget - ) -> StackEntry: - """Take postfix bytecode and convert to an intermediate natural-order stack.""" - if map_target in ("expr", "series"): - stack: list[StackEntry] = [] - for inst in instructions: - stack.append( - inst.argrepr - if inst.opname in OpNames.LOAD - else ( - StackValue( - operator=self.op(inst), - operator_arity=1, - left_operand=stack.pop(), # type: ignore[arg-type] - right_operand=None, # type: ignore[arg-type] - ) - if ( - inst.opname in OpNames.UNARY - or OpNames.SYNTHETIC.get(inst.opname) == 1 - ) - else StackValue( - operator=self.op(inst), - operator_arity=2, - left_operand=stack.pop(-2), # type: ignore[arg-type] - right_operand=stack.pop(-1), # type: ignore[arg-type] - ) - ) - ) - return stack[0] - - # TODO: dataframe.apply(...) - msg = f"TODO: {map_target!r} apply" - raise NotImplementedError(msg) - - -class RewrittenInstructions: - """ - Standalone class that applies Instruction rewrite/filtering rules. - - This significantly simplifies subsequent parsing by injecting - synthetic POLARS_EXPRESSION ops into the Instruction stream for - easy identification/translation and separates the parsing logic - from the identification of expression translation opportunities. - """ - - _ignored_ops = frozenset( - [ - "COPY", - "COPY_FREE_VARS", - "POP_TOP", - "PRECALL", - "PUSH_NULL", - "RESUME", - "RETURN_VALUE", - ] - ) - _caller_variables: ClassVar[dict[str, Any]] = {} - - def __init__(self, instructions: Iterator[Instruction]): - self._original_instructions = list(instructions) - self._rewritten_instructions = self._rewrite( - self._upgrade_instruction(inst) - for inst in self._original_instructions - if inst.opname not in self._ignored_ops - ) - - def __len__(self) -> int: - return len(self._rewritten_instructions) - - def __iter__(self) -> Iterator[Instruction]: - return iter(self._rewritten_instructions) - - def __getitem__(self, item: Any) -> Instruction: - return self._rewritten_instructions[item] - - def _matches( - self, - idx: int, - *, - opnames: list[AbstractSet[str]], - argvals: list[AbstractSet[Any] | dict[Any, Any] | None] | None, - is_attr: bool = False, - ) -> list[Instruction]: - """ - Check if a sequence of Instructions matches the specified ops/argvals. - - Parameters - ---------- - idx - The index of the first instruction to check. - opnames - The full opname sequence that defines a match. - argvals - Associated argvals that must also match (in same position as opnames). - is_attr - Indicate if the match represents pure attribute access (cannot be called). - """ - n_required_ops, argvals = len(opnames), argvals or [] - idx_offset = idx + n_required_ops - if ( - is_attr - and (trailing_inst := self._instructions[idx_offset : idx_offset + 1]) - and trailing_inst[0].opname in OpNames.CALL # not pure attr if called - ): - return [] - - instructions = self._instructions[idx:idx_offset] - if len(instructions) == n_required_ops and all( - inst.opname in match_opnames - and (match_argval is None or inst.argval in match_argval) - for inst, match_opnames, match_argval in zip_longest( - instructions, opnames, argvals - ) - ): - return instructions - return [] - - def _rewrite(self, instructions: Iterator[Instruction]) -> list[Instruction]: - """ - Apply rewrite rules, potentially injecting synthetic operations. - - Rules operate on the instruction stream and can examine/modify - it as needed, pushing updates into "updated_instructions" and - returning True/False to indicate if any changes were made. - """ - self._instructions = list(instructions) - updated_instructions: list[Instruction] = [] - idx = 0 - while idx < len(self._instructions): - inst, increment = self._instructions[idx], 1 - if inst.opname not in OpNames.LOAD or not any( - (increment := map_rewrite(idx, updated_instructions)) - for map_rewrite in ( - # add any other rewrite methods here - self._rewrite_functions, - self._rewrite_methods, - self._rewrite_builtins, - self._rewrite_attrs, - ) - ): - updated_instructions.append(inst) - idx += increment or 1 - return updated_instructions - - def _rewrite_attrs(self, idx: int, updated_instructions: list[Instruction]) -> int: - """Replace python attribute lookup with synthetic POLARS_EXPRESSION op.""" - if matching_instructions := self._matches( - idx, - opnames=[{"LOAD_FAST"}, {"LOAD_ATTR"}], - argvals=[None, _PYTHON_ATTRS_MAP], - is_attr=True, - ): - inst = matching_instructions[1] - expr_name = _PYTHON_ATTRS_MAP[inst.argval] - px = inst._replace( - opname="POLARS_EXPRESSION", argval=expr_name, argrepr=expr_name - ) - updated_instructions.extend([matching_instructions[0], px]) - - return len(matching_instructions) - - def _rewrite_builtins( - self, idx: int, updated_instructions: list[Instruction] - ) -> int: - """Replace builtin function calls with a synthetic POLARS_EXPRESSION op.""" - if matching_instructions := self._matches( - idx, - opnames=[{"LOAD_GLOBAL"}, {"LOAD_FAST", "LOAD_CONST"}, OpNames.CALL], - argvals=[_PYTHON_BUILTINS], - ): - inst1, inst2 = matching_instructions[:2] - if (argval := inst1.argval) in _PYTHON_CASTS_MAP: - dtype = _PYTHON_CASTS_MAP[argval] - argval = f"cast(pl.{dtype})" - - px = inst1._replace( - opname="POLARS_EXPRESSION", - argval=argval, - argrepr=argval, - offset=inst2.offset, - ) - # POLARS_EXPRESSION is mapped as a unary op, so switch instruction order - operand = inst2._replace(offset=inst1.offset) - updated_instructions.extend((operand, px)) - - return len(matching_instructions) - - def _rewrite_functions( - self, idx: int, updated_instructions: list[Instruction] - ) -> int: - """Replace function calls with a synthetic POLARS_EXPRESSION op.""" - for function_kind in _MODULE_FUNCTIONS: - opnames: list[AbstractSet[str]] = [ - {"LOAD_GLOBAL", "LOAD_DEREF"}, - *function_kind["module_opname"], - *function_kind["attribute_opname"], - *function_kind["argument_1_opname"], - *function_kind["argument_1_unary_opname"], - *function_kind["argument_2_opname"], - OpNames.CALL, - ] - if matching_instructions := self._matches( - idx, - opnames=opnames, - argvals=[ - *function_kind["module_name"], - *function_kind["attribute_name"], - *function_kind["function_name"], - ], - ): - attribute_count = len(function_kind["attribute_name"]) - inst1, inst2, inst3 = matching_instructions[ - attribute_count : 3 + attribute_count - ] - if inst1.argval == "json": - expr_name = "str.json_decode" - elif inst1.argval == "datetime": - fmt = matching_instructions[attribute_count + 3].argval - expr_name = f'str.to_datetime(format="{fmt}")' - if not self._is_stdlib_datetime( - inst1.argval, - matching_instructions[0].argval, - fmt, - attribute_count, - ): - return 0 - else: - expr_name = inst2.argval - - px = inst1._replace( - opname="POLARS_EXPRESSION", - argval=expr_name, - argrepr=expr_name, - offset=inst3.offset, - ) - - # POLARS_EXPRESSION is mapped as a unary op, so switch instruction order - operand = inst3._replace(offset=inst1.offset) - updated_instructions.extend( - ( - operand, - matching_instructions[3 + attribute_count], - px, - ) - if function_kind["argument_1_unary_opname"] - else (operand, px) - ) - return len(matching_instructions) - - return 0 - - def _rewrite_methods( - self, idx: int, updated_instructions: list[Instruction] - ) -> int: - """Replace python method calls with synthetic POLARS_EXPRESSION op.""" - LOAD_METHOD = OpNames.LOAD_ATTR if _MIN_PY312 else {"LOAD_METHOD"} - if matching_instructions := ( - # method call with one basic arg, eg: "s.endswith('!')" - self._matches( - idx, - opnames=[LOAD_METHOD, {"LOAD_CONST"}, OpNames.CALL], - argvals=[_PYTHON_METHODS_MAP], - ) - or - # method call with no arg, eg: "s.lower()" - self._matches( - idx, - opnames=[LOAD_METHOD, OpNames.CALL], - argvals=[_PYTHON_METHODS_MAP], - ) - ): - inst = matching_instructions[0] - expr = _PYTHON_METHODS_MAP[inst.argval] - - if matching_instructions[1].opname == "LOAD_CONST": - param_value = matching_instructions[1].argval - if isinstance(param_value, tuple) and expr in ( - "str.starts_with", - "str.ends_with", - ): - starts, ends = ("^", "") if "starts" in expr else ("", "$") - rx = "|".join(re_escape(v) for v in param_value) - q = '"' if "'" in param_value else "'" - expr = f"str.contains(r{q}{starts}({rx}){ends}{q})" - else: - expr += f"({param_value!r})" - - px = inst._replace(opname="POLARS_EXPRESSION", argval=expr, argrepr=expr) - updated_instructions.append(px) - - return len(matching_instructions) - - @staticmethod - def _upgrade_instruction(inst: Instruction) -> Instruction: - """Rewrite any older binary opcodes using py 3.11 'BINARY_OP' instead.""" - if not _MIN_PY311 and inst.opname in OpNames.BINARY: - inst = inst._replace( - argrepr=OpNames.BINARY[inst.opname], - opname="BINARY_OP", - ) - return inst - - def _is_stdlib_datetime( - self, function_name: str, module_name: str, fmt: str, attribute_count: int - ) -> bool: - if not self._caller_variables: - self._caller_variables.update(_get_all_caller_variables()) - vars = self._caller_variables - return ( - attribute_count == 0 and vars.get(function_name) is datetime.datetime - ) or (attribute_count == 1 and vars.get(module_name) is datetime) - - -def _is_raw_function(function: Callable[[Any], Any]) -> tuple[str, str]: - """Identify translatable calls that aren't wrapped inside a lambda/function.""" - try: - func_module = function.__class__.__module__ - func_name = function.__name__ - except AttributeError: - return "", "" - - # numpy function calls - if func_module == "numpy" and func_name in _NUMPY_FUNCTIONS: - return "np", f"{func_name}()" - - # python function calls - elif func_module == "builtins": - if func_name in _PYTHON_CASTS_MAP: - return "builtins", f"cast(pl.{_PYTHON_CASTS_MAP[func_name]})" - elif func_name == "loads": - import json # double-check since it is referenced via 'builtins' - - if function is json.loads: - return "json", "str.json_decode()" - - return "", "" - - -def warn_on_inefficient_map( - function: Callable[[Any], Any], columns: list[str], map_target: MapTarget -) -> None: - """ - Generate `PolarsInefficientMapWarning` on poor usage of a `map` function. - - Parameters - ---------- - function - The function passed to `map`. - columns - The column names of the original object; in the case of an `Expr` this - will be a list of length 1 containing the expression's root name. - map_target - The target of the `map` call. One of `"expr"`, `"frame"`, - or `"series"`. - """ - if map_target == "frame": - msg = "TODO: 'frame' map-function parsing" - raise NotImplementedError(msg) - - # note: we only consider simple functions with a single col/param - if not (col := columns and columns[0]): - return None - - # the parser introspects function bytecode to determine if we can - # rewrite as a much more optimal native polars expression instead - parser = BytecodeParser(function, map_target) - if parser.can_attempt_rewrite(): - parser.warn(col) - else: - # handle bare numpy/json functions - module, suggestion = _is_raw_function(function) - if module and suggestion: - fn = function.__name__ - parser.warn( - col, - suggestion_override=f'pl.col("{col}").{suggestion}', - udf_override=fn if module == "builtins" else f"{module}.{fn}", - ) - - -def is_shared_lib(file: str) -> bool: - return file.endswith((".so", ".dll", ".pyd")) +__all__ = ["_get_shared_lib_location"] def _get_shared_lib_location(main_file: Any) -> str: - import os - directory = os.path.dirname(main_file) # noqa: PTH120 return os.path.join( # noqa: PTH118 - directory, next(filter(is_shared_lib, os.listdir(directory))) + directory, next(filter(_is_shared_lib, os.listdir(directory))) ) -__all__ = ["BytecodeParser", "warn_on_inefficient_map", "_get_shared_lib_location"] +def _is_shared_lib(file: str) -> bool: + return file.endswith((".so", ".dll", ".pyd")) diff --git a/py-polars/src/py_modules.rs b/py-polars/src/py_modules.rs index 6c7dbd2658a1..7b0d37061349 100644 --- a/py-polars/src/py_modules.rs +++ b/py-polars/src/py_modules.rs @@ -5,7 +5,7 @@ pub(crate) static POLARS: Lazy = Lazy::new(|| Python::with_gil(|py| PyModule::import(py, "polars").unwrap().to_object(py))); pub(crate) static UTILS: Lazy = - Lazy::new(|| Python::with_gil(|py| POLARS.getattr(py, "utils").unwrap())); + Lazy::new(|| Python::with_gil(|py| POLARS.getattr(py, "_utils").unwrap())); pub(crate) static SERIES: Lazy = Lazy::new(|| Python::with_gil(|py| POLARS.getattr(py, "Series").unwrap())); diff --git a/py-polars/tests/parametric/test_groupby_rolling.py b/py-polars/tests/parametric/test_groupby_rolling.py index ed7f0b5d513c..d6356167c7a8 100644 --- a/py-polars/tests/parametric/test_groupby_rolling.py +++ b/py-polars/tests/parametric/test_groupby_rolling.py @@ -8,10 +8,10 @@ from hypothesis import assume, given import polars as pl +from polars._utils.convert import parse_as_duration_string from polars.testing import assert_frame_equal from polars.testing.parametric.primitives import column, dataframes from polars.testing.parametric.strategies import strategy_closed, strategy_time_unit -from polars.utils.convert import parse_as_duration_string if TYPE_CHECKING: from polars.type_aliases import ClosedInterval, TimeUnit diff --git a/py-polars/tests/unit/constructors/test_any_value_fallbacks.py b/py-polars/tests/unit/constructors/test_any_value_fallbacks.py index bff14e5a461e..e98072fbf07d 100644 --- a/py-polars/tests/unit/constructors/test_any_value_fallbacks.py +++ b/py-polars/tests/unit/constructors/test_any_value_fallbacks.py @@ -8,8 +8,8 @@ import pytest import polars as pl +from polars._utils.wrap import wrap_s from polars.polars import PySeries -from polars.utils._wrap import wrap_s @pytest.mark.parametrize( diff --git a/py-polars/tests/unit/constructors/test_constructors.py b/py-polars/tests/unit/constructors/test_constructors.py index ca57a8b6fa13..7cdeb7ce90e2 100644 --- a/py-polars/tests/unit/constructors/test_constructors.py +++ b/py-polars/tests/unit/constructors/test_constructors.py @@ -13,11 +13,11 @@ from pydantic import BaseModel, Field, TypeAdapter import polars as pl +from polars._utils.construction import type_hints from polars.datatypes import PolarsDataType, numpy_char_code_to_dtype from polars.dependencies import dataclasses, pydantic from polars.exceptions import TimeZoneAwareConstructorWarning from polars.testing import assert_frame_equal, assert_series_equal -from polars.utils._construction import type_hints if TYPE_CHECKING: from collections.abc import Callable @@ -26,7 +26,7 @@ from polars.datatypes import PolarsDataType else: - from polars.utils.convert import string_to_zoneinfo as ZoneInfo + from polars._utils.convert import string_to_zoneinfo as ZoneInfo # ----------------------------------------------------------------------------------- @@ -1461,7 +1461,7 @@ def test_nested_schema_construction2() -> None: def test_arrow_to_pyseries_with_one_chunk_does_not_copy_data() -> None: - from polars.utils._construction import arrow_to_pyseries + from polars._utils.construction import arrow_to_pyseries original_array = pa.chunked_array([[1, 2, 3]], type=pa.int64()) pyseries = arrow_to_pyseries("", original_array) diff --git a/py-polars/tests/unit/dataframe/test_df.py b/py-polars/tests/unit/dataframe/test_df.py index 48f23f39152c..1063739e7797 100644 --- a/py-polars/tests/unit/dataframe/test_df.py +++ b/py-polars/tests/unit/dataframe/test_df.py @@ -16,6 +16,7 @@ import polars as pl import polars.selectors as cs +from polars._utils.construction import iterable_to_pydf from polars.datatypes import DTYPE_TEMPORAL_UNITS, INTEGER_DTYPES from polars.exceptions import ComputeError, TimeZoneAwareConstructorWarning from polars.testing import ( @@ -24,14 +25,13 @@ assert_series_equal, ) from polars.testing.parametric import columns -from polars.utils._construction import iterable_to_pydf if TYPE_CHECKING: from zoneinfo import ZoneInfo from polars.type_aliases import JoinStrategy, UniqueKeepStrategy else: - from polars.utils.convert import string_to_zoneinfo as ZoneInfo + from polars._utils.convert import string_to_zoneinfo as ZoneInfo def test_version() -> None: diff --git a/py-polars/tests/unit/datatypes/test_temporal.py b/py-polars/tests/unit/datatypes/test_temporal.py index e3af046ad233..7f78dec6fb6e 100644 --- a/py-polars/tests/unit/datatypes/test_temporal.py +++ b/py-polars/tests/unit/datatypes/test_temporal.py @@ -28,7 +28,7 @@ from polars.type_aliases import Ambiguous, PolarsTemporalType, TimeUnit else: - from polars.utils.convert import string_to_zoneinfo as ZoneInfo + from polars._utils.convert import string_to_zoneinfo as ZoneInfo def test_fill_null() -> None: diff --git a/py-polars/tests/unit/expr/test_exprs.py b/py-polars/tests/unit/expr/test_exprs.py index fc5453035128..08ba41843495 100644 --- a/py-polars/tests/unit/expr/test_exprs.py +++ b/py-polars/tests/unit/expr/test_exprs.py @@ -21,7 +21,7 @@ if TYPE_CHECKING: from zoneinfo import ZoneInfo else: - from polars.utils.convert import string_to_zoneinfo as ZoneInfo + from polars._utils.convert import string_to_zoneinfo as ZoneInfo def test_arg_true() -> None: diff --git a/py-polars/tests/unit/functions/as_datatype/test_as_datatype.py b/py-polars/tests/unit/functions/as_datatype/test_as_datatype.py index d0cafb850a61..2074865f694c 100644 --- a/py-polars/tests/unit/functions/as_datatype/test_as_datatype.py +++ b/py-polars/tests/unit/functions/as_datatype/test_as_datatype.py @@ -13,7 +13,7 @@ from polars.type_aliases import TimeUnit else: - from polars.utils.convert import string_to_zoneinfo as ZoneInfo + from polars._utils.convert import string_to_zoneinfo as ZoneInfo def test_date_datetime() -> None: diff --git a/py-polars/tests/unit/functions/range/test_datetime_range.py b/py-polars/tests/unit/functions/range/test_datetime_range.py index 02150992d910..b9873f833b4b 100644 --- a/py-polars/tests/unit/functions/range/test_datetime_range.py +++ b/py-polars/tests/unit/functions/range/test_datetime_range.py @@ -16,7 +16,7 @@ from polars.datatypes import PolarsDataType from polars.type_aliases import ClosedInterval, TimeUnit else: - from polars.utils.convert import string_to_zoneinfo as ZoneInfo + from polars._utils.convert import string_to_zoneinfo as ZoneInfo def test_datetime_range() -> None: diff --git a/py-polars/tests/unit/io/test_csv.py b/py-polars/tests/unit/io/test_csv.py index 70bce7dfe921..a940e26f3697 100644 --- a/py-polars/tests/unit/io/test_csv.py +++ b/py-polars/tests/unit/io/test_csv.py @@ -14,9 +14,9 @@ import zstandard import polars as pl +from polars._utils.various import normalize_filepath from polars.exceptions import ComputeError, NoDataError from polars.testing import assert_frame_equal, assert_series_equal -from polars.utils.various import normalize_filepath if TYPE_CHECKING: from pathlib import Path diff --git a/py-polars/tests/unit/namespaces/test_datetime.py b/py-polars/tests/unit/namespaces/test_datetime.py index 1f6fc9138581..70c75d3d1b84 100644 --- a/py-polars/tests/unit/namespaces/test_datetime.py +++ b/py-polars/tests/unit/namespaces/test_datetime.py @@ -15,7 +15,7 @@ from polars.type_aliases import TemporalLiteral, TimeUnit else: - from polars.utils.convert import string_to_zoneinfo as ZoneInfo + from polars._utils.convert import string_to_zoneinfo as ZoneInfo @pytest.fixture() diff --git a/py-polars/tests/unit/namespaces/test_strptime.py b/py-polars/tests/unit/namespaces/test_strptime.py index 0d7a8fba8914..52d0fa4a61a3 100644 --- a/py-polars/tests/unit/namespaces/test_strptime.py +++ b/py-polars/tests/unit/namespaces/test_strptime.py @@ -20,7 +20,7 @@ from polars.type_aliases import PolarsTemporalType, TimeUnit else: - from polars.utils.convert import string_to_zoneinfo as ZoneInfo + from polars._utils.convert import string_to_zoneinfo as ZoneInfo def test_str_strptime() -> None: diff --git a/py-polars/tests/unit/operations/map/test_inefficient_map_warning.py b/py-polars/tests/unit/operations/map/test_inefficient_map_warning.py index a120c4076691..8f9740a42032 100644 --- a/py-polars/tests/unit/operations/map/test_inefficient_map_warning.py +++ b/py-polars/tests/unit/operations/map/test_inefficient_map_warning.py @@ -12,10 +12,10 @@ import pytest import polars as pl +from polars._utils.udfs import _NUMPY_FUNCTIONS, BytecodeParser +from polars._utils.various import in_terminal_that_supports_colour from polars.exceptions import PolarsInefficientMapWarning from polars.testing import assert_frame_equal, assert_series_equal -from polars.utils.udfs import _NUMPY_FUNCTIONS, BytecodeParser -from polars.utils.various import in_terminal_that_supports_colour MY_CONSTANT = 3 MY_DICT = {0: "a", 1: "b", 2: "c", 3: "d", 4: "e"} diff --git a/py-polars/tests/unit/operations/test_cast.py b/py-polars/tests/unit/operations/test_cast.py index 4040c112cb24..9c1cfdf1c47d 100644 --- a/py-polars/tests/unit/operations/test_cast.py +++ b/py-polars/tests/unit/operations/test_cast.py @@ -6,13 +6,13 @@ import pytest import polars as pl -from polars.testing import assert_frame_equal -from polars.testing.asserts.series import assert_series_equal -from polars.utils.convert import ( +from polars._utils.convert import ( MS_PER_SECOND, NS_PER_SECOND, US_PER_SECOND, ) +from polars.testing import assert_frame_equal +from polars.testing.asserts.series import assert_series_equal if TYPE_CHECKING: from polars import PolarsDataType diff --git a/py-polars/tests/unit/operations/test_group_by_dynamic.py b/py-polars/tests/unit/operations/test_group_by_dynamic.py index bb3b5fa1f514..9fdcebfad510 100644 --- a/py-polars/tests/unit/operations/test_group_by_dynamic.py +++ b/py-polars/tests/unit/operations/test_group_by_dynamic.py @@ -14,7 +14,7 @@ from polars.type_aliases import Label, StartBy else: - from polars.utils.convert import string_to_zoneinfo as ZoneInfo + from polars._utils.convert import string_to_zoneinfo as ZoneInfo @pytest.mark.parametrize( diff --git a/py-polars/tests/unit/series/test_series.py b/py-polars/tests/unit/series/test_series.py index 8b48beb5edce..5f48a3347711 100644 --- a/py-polars/tests/unit/series/test_series.py +++ b/py-polars/tests/unit/series/test_series.py @@ -11,6 +11,7 @@ import polars import polars as pl +from polars._utils.construction import iterable_to_pyseries from polars.datatypes import ( Date, Datetime, @@ -29,14 +30,13 @@ ) from polars.exceptions import ComputeError, PolarsInefficientMapWarning, ShapeError from polars.testing import assert_frame_equal, assert_series_equal -from polars.utils._construction import iterable_to_pyseries if TYPE_CHECKING: from zoneinfo import ZoneInfo from polars.type_aliases import EpochTimeUnit, PolarsDataType, TimeUnit else: - from polars.utils.convert import string_to_zoneinfo as ZoneInfo + from polars._utils.convert import string_to_zoneinfo as ZoneInfo def test_cum_agg() -> None: diff --git a/py-polars/tests/unit/test_config.py b/py-polars/tests/unit/test_config.py index f4ed561a3b4e..f8efda2222b7 100644 --- a/py-polars/tests/unit/test_config.py +++ b/py-polars/tests/unit/test_config.py @@ -8,8 +8,8 @@ import polars as pl import polars.polars as plr +from polars._utils.unstable import issue_unstable_warning from polars.config import _POLARS_CFG_ENV_VARS -from polars.utils.unstable import issue_unstable_warning @pytest.fixture(autouse=True) diff --git a/py-polars/tests/unit/utils/test_deprecation.py b/py-polars/tests/unit/utils/test_deprecation.py index fee0a18724b6..2f42a0cd52bf 100644 --- a/py-polars/tests/unit/utils/test_deprecation.py +++ b/py-polars/tests/unit/utils/test_deprecation.py @@ -5,7 +5,7 @@ import pytest -from polars.utils.deprecation import ( +from polars._utils.deprecation import ( deprecate_function, deprecate_nonkeyword_arguments, deprecate_renamed_function, diff --git a/py-polars/tests/unit/utils/test_parse_expr_input.py b/py-polars/tests/unit/utils/test_parse_expr_input.py index 8c58c1307688..a17debfc94cb 100644 --- a/py-polars/tests/unit/utils/test_parse_expr_input.py +++ b/py-polars/tests/unit/utils/test_parse_expr_input.py @@ -6,9 +6,9 @@ import pytest import polars as pl +from polars._utils.parse_expr_input import parse_as_expression +from polars._utils.wrap import wrap_expr from polars.testing import assert_frame_equal -from polars.utils._parse_expr_input import parse_as_expression -from polars.utils._wrap import wrap_expr def assert_expr_equal(result: pl.Expr, expected: pl.Expr) -> None: diff --git a/py-polars/tests/unit/utils/test_unstable.py b/py-polars/tests/unit/utils/test_unstable.py index ea9e5d594c9f..a760480a9a88 100644 --- a/py-polars/tests/unit/utils/test_unstable.py +++ b/py-polars/tests/unit/utils/test_unstable.py @@ -3,7 +3,7 @@ import pytest import polars as pl -from polars.utils.unstable import issue_unstable_warning, unstable +from polars._utils.unstable import issue_unstable_warning, unstable def test_issue_unstable_warning(monkeypatch: pytest.MonkeyPatch) -> None: diff --git a/py-polars/tests/unit/utils/test_utils.py b/py-polars/tests/unit/utils/test_utils.py index 352a1a3371e0..d81540efc10d 100644 --- a/py-polars/tests/unit/utils/test_utils.py +++ b/py-polars/tests/unit/utils/test_utils.py @@ -7,15 +7,14 @@ import pytest import polars as pl -from polars.io._utils import _looks_like_url -from polars.utils.convert import ( +from polars._utils.convert import ( date_to_int, datetime_to_int, parse_as_duration_string, time_to_int, timedelta_to_int, ) -from polars.utils.various import ( +from polars._utils.various import ( _in_notebook, is_bool_sequence, is_int_sequence, @@ -24,13 +23,14 @@ parse_percentiles, parse_version, ) +from polars.io._utils import _looks_like_url if TYPE_CHECKING: from zoneinfo import ZoneInfo from polars.type_aliases import TimeUnit else: - from polars.utils.convert import string_to_zoneinfo as ZoneInfo + from polars._utils.convert import string_to_zoneinfo as ZoneInfo @pytest.mark.parametrize(