diff --git a/py-polars/polars/dataframe/frame.py b/py-polars/polars/dataframe/frame.py index c72beb2e4c44..75bd1e7a2ec0 100644 --- a/py-polars/polars/dataframe/frame.py +++ b/py-polars/polars/dataframe/frame.py @@ -126,7 +126,7 @@ import deltalake from xlsxwriter import Workbook - from polars import Expr, LazyFrame, Series + from polars import DataType, Expr, LazyFrame, Series from polars.interchange.dataframe import PolarsDataFrame from polars.type_aliases import ( AsofJoinStrategy, @@ -1206,7 +1206,7 @@ def columns(self, names: Sequence[str]) -> None: self._df.set_column_names(names) @property - def dtypes(self) -> list[PolarsDataType]: + def dtypes(self) -> list[DataType]: """ Get the datatypes of the columns of this DataFrame. @@ -1255,7 +1255,7 @@ def flags(self) -> dict[str, dict[str, bool]]: return {name: self[name].flags for name in self.columns} @property - def schema(self) -> SchemaDict: + def schema(self) -> OrderedDict[str, DataType]: """ Get a dict[column name, DataType]. diff --git a/py-polars/polars/io/delta.py b/py-polars/polars/io/delta.py index 8521bd05fef4..2f0a21f8f191 100644 --- a/py-polars/polars/io/delta.py +++ b/py-polars/polars/io/delta.py @@ -12,8 +12,7 @@ from polars.io.pyarrow_dataset import scan_pyarrow_dataset if TYPE_CHECKING: - from polars import DataFrame, LazyFrame - from polars.type_aliases import PolarsDataType + from polars import DataFrame, DataType, LazyFrame def read_delta( @@ -320,7 +319,7 @@ def _check_if_delta_available() -> None: ) -def _check_for_unsupported_types(dtypes: list[PolarsDataType]) -> None: +def _check_for_unsupported_types(dtypes: list[DataType]) -> None: schema_dtypes = unpack_dtypes(*dtypes) unsupported_types = {Time, Categorical, Null} overlap = schema_dtypes & unsupported_types diff --git a/py-polars/polars/io/ipc/functions.py b/py-polars/polars/io/ipc/functions.py index f426bbeae2ae..3d520b5cc388 100644 --- a/py-polars/polars/io/ipc/functions.py +++ b/py-polars/polars/io/ipc/functions.py @@ -15,8 +15,7 @@ if TYPE_CHECKING: from io import BytesIO - from polars import DataFrame, LazyFrame - from polars.type_aliases import PolarsDataType + from polars import DataFrame, DataType, LazyFrame def read_ipc( @@ -185,7 +184,7 @@ def read_ipc_stream( ) -def read_ipc_schema(source: str | BinaryIO | Path | bytes) -> dict[str, PolarsDataType]: +def read_ipc_schema(source: str | BinaryIO | Path | bytes) -> dict[str, DataType]: """ Get the schema of an IPC file without reading data. diff --git a/py-polars/polars/io/parquet/functions.py b/py-polars/polars/io/parquet/functions.py index 59554a587b1a..cfce8e1d085c 100644 --- a/py-polars/polars/io/parquet/functions.py +++ b/py-polars/polars/io/parquet/functions.py @@ -16,8 +16,8 @@ if TYPE_CHECKING: from io import BytesIO - from polars import DataFrame, LazyFrame - from polars.type_aliases import ParallelStrategy, PolarsDataType + from polars import DataFrame, DataType, LazyFrame + from polars.type_aliases import ParallelStrategy def read_parquet( @@ -143,7 +143,7 @@ def read_parquet( def read_parquet_schema( source: str | BinaryIO | Path | bytes, -) -> dict[str, PolarsDataType]: +) -> dict[str, DataType]: """ Get the schema of a Parquet file without reading data. diff --git a/py-polars/polars/lazyframe/frame.py b/py-polars/polars/lazyframe/frame.py index 5dc7e57ae3ae..9ec5813c164d 100644 --- a/py-polars/polars/lazyframe/frame.py +++ b/py-polars/polars/lazyframe/frame.py @@ -87,7 +87,7 @@ import pyarrow as pa - from polars import DataFrame, Expr + from polars import DataFrame, DataType, Expr from polars.dependencies import numpy as np from polars.type_aliases import ( AsofJoinStrategy, @@ -693,7 +693,7 @@ def columns(self) -> list[str]: return self._ldf.columns() @property - def dtypes(self) -> list[PolarsDataType]: + def dtypes(self) -> list[DataType]: """ Get dtypes of columns in LazyFrame. @@ -717,7 +717,7 @@ def dtypes(self) -> list[PolarsDataType]: return self._ldf.dtypes() @property - def schema(self) -> SchemaDict: + def schema(self) -> OrderedDict[str, DataType]: """ Get a dict[column name, DataType]. diff --git a/py-polars/polars/series/datetime.py b/py-polars/polars/series/datetime.py index 38f63002c174..19d588298023 100644 --- a/py-polars/polars/series/datetime.py +++ b/py-polars/polars/series/datetime.py @@ -85,7 +85,7 @@ def median(self) -> dt.date | dt.datetime | dt.timedelta | None: if s.dtype == Date: return _to_python_date(int(out)) else: - return _to_python_datetime(int(out), s.dtype.time_unit) # type: ignore[union-attr] + return _to_python_datetime(int(out), s.dtype.time_unit) # type: ignore[attr-defined] return None def mean(self) -> dt.date | dt.datetime | None: @@ -108,7 +108,7 @@ def mean(self) -> dt.date | dt.datetime | None: if s.dtype == Date: return _to_python_date(int(out)) else: - return _to_python_datetime(int(out), s.dtype.time_unit) # type: ignore[union-attr] + return _to_python_datetime(int(out), s.dtype.time_unit) # type: ignore[attr-defined] return None def to_string(self, format: str) -> Series: diff --git a/py-polars/polars/series/series.py b/py-polars/polars/series/series.py index 52853672e147..14b46aa9c237 100644 --- a/py-polars/polars/series/series.py +++ b/py-polars/polars/series/series.py @@ -112,7 +112,7 @@ if TYPE_CHECKING: import sys - from polars import DataFrame, Expr + from polars import DataFrame, DataType, Expr from polars.series._numpy import SeriesView from polars.type_aliases import ( ClosedInterval, @@ -365,7 +365,7 @@ def _get_ptr(self) -> tuple[int, int, int]: return self._s.get_ptr() @property - def dtype(self) -> PolarsDataType: + def dtype(self) -> DataType: """ Get the data type of this Series. @@ -398,7 +398,7 @@ def flags(self) -> dict[str, bool]: return out @property - def inner_dtype(self) -> PolarsDataType | None: + def inner_dtype(self) -> DataType | None: """ Get the inner dtype in of a List typed Series. @@ -412,7 +412,7 @@ def inner_dtype(self) -> PolarsDataType | None: version="0.19.14", ) try: - return self.dtype.inner # type: ignore[union-attr] + return self.dtype.inner # type: ignore[attr-defined] except AttributeError: return None @@ -502,12 +502,12 @@ def _comp(self, other: Any, op: ComparisonOperator) -> Series: time_unit = "us" elif self.dtype == Datetime: # Use local time zone info - time_zone = self.dtype.time_zone # type: ignore[union-attr] + time_zone = self.dtype.time_zone # type: ignore[attr-defined] if str(other.tzinfo) != str(time_zone): raise TypeError( f"Datetime time zone {other.tzinfo!r} does not match Series timezone {time_zone!r}" ) - time_unit = self.dtype.time_unit # type: ignore[union-attr] + time_unit = self.dtype.time_unit # type: ignore[attr-defined] else: raise ValueError( f"cannot compare datetime.datetime to Series of type {self.dtype}" @@ -524,7 +524,7 @@ def _comp(self, other: Any, op: ComparisonOperator) -> Series: return self._from_pyseries(f(d)) elif isinstance(other, timedelta) and self.dtype == Duration: - time_unit = self.dtype.time_unit # type: ignore[union-attr] + time_unit = self.dtype.time_unit # type: ignore[attr-defined] td = _timedelta_to_pl_timedelta(other, time_unit) # type: ignore[arg-type] f = get_ffi_func(op + "_<>", Int64, self._s) assert f is not None @@ -4051,9 +4051,9 @@ def convert_to_date(arr: np.ndarray[Any, Any]) -> np.ndarray[Any, Any]: if self.dtype == Date: tp = "datetime64[D]" elif self.dtype == Duration: - tp = f"timedelta64[{self.dtype.time_unit}]" # type: ignore[union-attr] + tp = f"timedelta64[{self.dtype.time_unit}]" # type: ignore[attr-defined] else: - tp = f"datetime64[{self.dtype.time_unit}]" # type: ignore[union-attr] + tp = f"datetime64[{self.dtype.time_unit}]" # type: ignore[attr-defined] return arr.astype(tp) def raise_no_zero_copy() -> None: @@ -4066,7 +4066,7 @@ def raise_no_zero_copy() -> None: writable=writable, use_pyarrow=use_pyarrow, ) - np_array.shape = (self.len(), self.dtype.width) # type: ignore[union-attr] + np_array.shape = (self.len(), self.dtype.width) # type: ignore[attr-defined] return np_array if ( diff --git a/py-polars/polars/series/struct.py b/py-polars/polars/series/struct.py index 6af6baa7f8c0..a12613ed117f 100644 --- a/py-polars/polars/series/struct.py +++ b/py-polars/polars/series/struct.py @@ -9,9 +9,8 @@ from polars.utils.various import sphinx_accessor if TYPE_CHECKING: - from polars import DataFrame, Series + from polars import DataFrame, DataType, Series from polars.polars import PySeries - from polars.type_aliases import SchemaDict elif os.getenv("BUILDING_SPHINX_DOCS"): property = sphinx_accessor @@ -66,10 +65,10 @@ def rename_fields(self, names: Sequence[str]) -> Series: """ @property - def schema(self) -> SchemaDict: + def schema(self) -> OrderedDict[str, DataType]: """Get the struct definition as a name/dtype schema dict.""" if getattr(self, "_s", None) is None: - return {} + return OrderedDict() return OrderedDict(self._s.dtype().to_schema()) def unnest(self) -> DataFrame: diff --git a/py-polars/polars/testing/asserts/series.py b/py-polars/polars/testing/asserts/series.py index cec49db44b89..84fcccf14c39 100644 --- a/py-polars/polars/testing/asserts/series.py +++ b/py-polars/polars/testing/asserts/series.py @@ -18,7 +18,7 @@ from polars.testing.asserts.utils import raise_assertion_error if TYPE_CHECKING: - from polars.type_aliases import PolarsDataType + from polars import DataType def assert_series_equal( @@ -252,19 +252,19 @@ def _assert_series_nan_values_match(left: Series, right: Series) -> None: ) -def _comparing_floats(left: PolarsDataType, right: PolarsDataType) -> bool: +def _comparing_floats(left: DataType, right: DataType) -> bool: return left.is_float() and right.is_float() -def _comparing_lists(left: PolarsDataType, right: PolarsDataType) -> bool: +def _comparing_lists(left: DataType, right: DataType) -> bool: return left in (List, Array) and right in (List, Array) -def _comparing_structs(left: PolarsDataType, right: PolarsDataType) -> bool: +def _comparing_structs(left: DataType, right: DataType) -> bool: return left == Struct and right == Struct -def _comparing_nested_floats(left: PolarsDataType, right: PolarsDataType) -> bool: +def _comparing_nested_floats(left: DataType, right: DataType) -> bool: if not (_comparing_lists(left, right) or _comparing_structs(left, right)): return False diff --git a/py-polars/tests/parametric/test_series.py b/py-polars/tests/parametric/test_series.py index e70ab742c8d7..27d4062afe76 100644 --- a/py-polars/tests/parametric/test_series.py +++ b/py-polars/tests/parametric/test_series.py @@ -139,7 +139,7 @@ def test_series_duration_timeunits( "us": 1_000, "ms": 1_000_000, } - assert nanos == [v * scale[s.dtype.time_unit] for v in s.to_physical()] # type: ignore[union-attr] + assert nanos == [v * scale[s.dtype.time_unit] for v in s.to_physical()] # type: ignore[attr-defined] assert micros == [int(v / 1_000) for v in nanos] assert millis == [int(v / 1_000) for v in micros] diff --git a/py-polars/tests/unit/datatypes/test_duration.py b/py-polars/tests/unit/datatypes/test_duration.py index 27bd042bb16b..e9db9940c5b5 100644 --- a/py-polars/tests/unit/datatypes/test_duration.py +++ b/py-polars/tests/unit/datatypes/test_duration.py @@ -16,7 +16,7 @@ def test_duration_cum_sum() -> None: pl.Duration(time_unit="ms"), pl.Duration(time_unit="ns"), ): - assert df.schema["A"].is_(duration_dtype) is False # type: ignore[arg-type] + assert df.schema["A"].is_(duration_dtype) is False def test_duration_std_var() -> None: diff --git a/py-polars/tests/unit/datatypes/test_list.py b/py-polars/tests/unit/datatypes/test_list.py index 3ef49b524c4a..bda21e661f15 100644 --- a/py-polars/tests/unit/datatypes/test_list.py +++ b/py-polars/tests/unit/datatypes/test_list.py @@ -18,7 +18,7 @@ def test_dtype() -> None: # inferred a = pl.Series("a", [[1, 2, 3], [2, 5], [6, 7, 8, 9]]) assert a.dtype == pl.List - assert a.dtype.inner == pl.Int64 # type: ignore[union-attr] + assert a.dtype.inner == pl.Int64 # type: ignore[attr-defined] assert a.dtype.is_(pl.List(pl.Int64)) # explicit @@ -43,7 +43,7 @@ def test_dtype() -> None: "dtm": pl.List(pl.Datetime), } assert all(tp.is_nested() for tp in df.dtypes) - assert df.schema["i"].inner == pl.Int8 # type: ignore[union-attr] + assert df.schema["i"].inner == pl.Int8 # type: ignore[attr-defined] assert df.rows() == [ ( [1, 2, 3], @@ -75,8 +75,8 @@ def test_categorical() -> None: .to_series(3) ) - assert out.dtype.inner == pl.Categorical # type: ignore[union-attr] - assert out.dtype.inner.is_nested() is False # type: ignore[union-attr] + assert out.dtype.inner == pl.Categorical # type: ignore[attr-defined] + assert out.dtype.inner.is_nested() is False # type: ignore[attr-defined] def test_cast_inner() -> None: @@ -89,7 +89,7 @@ def test_cast_inner() -> None: # this creates an inner null type df = pl.from_pandas(pd.DataFrame(data=[[[]], [[]]], columns=["A"])) assert ( - df["A"].cast(pl.List(int)).dtype.inner == pl.Int64 # type: ignore[union-attr] + df["A"].cast(pl.List(int)).dtype.inner == pl.Int64 # type: ignore[attr-defined] ) @@ -192,7 +192,7 @@ def test_local_categorical_list() -> None: values = [["a", "b"], ["c"], ["a", "d", "d"]] s = pl.Series(values, dtype=pl.List(pl.Categorical)) assert s.dtype == pl.List - assert s.dtype.inner == pl.Categorical # type: ignore[union-attr] + assert s.dtype.inner == pl.Categorical # type: ignore[attr-defined] assert s.to_list() == values # Check that underlying physicals match diff --git a/py-polars/tests/unit/functions/range/test_datetime_range.py b/py-polars/tests/unit/functions/range/test_datetime_range.py index 3f50616f9a28..0068afd45396 100644 --- a/py-polars/tests/unit/functions/range/test_datetime_range.py +++ b/py-polars/tests/unit/functions/range/test_datetime_range.py @@ -37,7 +37,7 @@ def test_datetime_range() -> None: time_unit=time_unit, eager=True, ) - assert rng.dtype.time_unit == time_unit # type: ignore[union-attr] + assert rng.dtype.time_unit == time_unit # type: ignore[attr-defined] assert rng.shape == (13,) assert rng.dt[0] == datetime(2020, 1, 1) assert rng.dt[-1] == datetime(2020, 1, 2) @@ -67,7 +67,7 @@ def test_datetime_range() -> None: datetime(2022, 1, 1), datetime(2022, 1, 1, 0, 1), "987456321ns", eager=True ) assert len(result) == 61 - assert result.dtype.time_unit == "ns" # type: ignore[union-attr] + assert result.dtype.time_unit == "ns" # type: ignore[attr-defined] assert result.dt.second()[-1] == 59 assert result.cast(pl.Utf8)[-1] == "2022-01-01 00:00:59.247379260" diff --git a/py-polars/tests/unit/io/test_ipc.py b/py-polars/tests/unit/io/test_ipc.py index 6a7161fd9fe2..d71dea5d0374 100644 --- a/py-polars/tests/unit/io/test_ipc.py +++ b/py-polars/tests/unit/io/test_ipc.py @@ -130,7 +130,7 @@ def test_ipc_schema(compression: IpcCompression) -> None: df.write_ipc(f, compression=compression) f.seek(0) - expected = {"a": pl.Int64, "b": pl.Utf8, "c": pl.Boolean} + expected = {"a": pl.Int64(), "b": pl.Utf8(), "c": pl.Boolean()} assert pl.read_ipc_schema(f) == expected @@ -152,18 +152,18 @@ def test_ipc_schema_from_file( schema = pl.read_ipc_schema(file_path) expected = { - "bools": pl.Boolean, - "bools_nulls": pl.Boolean, - "int": pl.Int64, - "int_nulls": pl.Int64, - "floats": pl.Float64, - "floats_nulls": pl.Float64, - "strings": pl.Utf8, - "strings_nulls": pl.Utf8, - "date": pl.Date, - "datetime": pl.Datetime, - "time": pl.Time, - "cat": pl.Categorical, + "bools": pl.Boolean(), + "bools_nulls": pl.Boolean(), + "int": pl.Int64(), + "int_nulls": pl.Int64(), + "floats": pl.Float64(), + "floats_nulls": pl.Float64(), + "strings": pl.Utf8(), + "strings_nulls": pl.Utf8(), + "date": pl.Date(), + "datetime": pl.Datetime(), + "time": pl.Time(), + "cat": pl.Categorical(), } assert schema == expected diff --git a/py-polars/tests/unit/series/test_series.py b/py-polars/tests/unit/series/test_series.py index aa34495621f7..4b86094e58f4 100644 --- a/py-polars/tests/unit/series/test_series.py +++ b/py-polars/tests/unit/series/test_series.py @@ -142,8 +142,8 @@ def test_init_inputs(monkeypatch: Any) -> None: s = pl.Series([date(2023, 1, 1), date(2023, 1, 2)], dtype=pl.Datetime) assert s.to_list() == [datetime(2023, 1, 1), datetime(2023, 1, 2)] assert Datetime == s.dtype - assert s.dtype.time_unit == "us" # type: ignore[union-attr] - assert s.dtype.time_zone is None # type: ignore[union-attr] + assert s.dtype.time_unit == "us" # type: ignore[attr-defined] + assert s.dtype.time_zone is None # type: ignore[attr-defined] # conversion of Date to Datetime with specified timezone and units tu: TimeUnit = "ms" @@ -153,8 +153,8 @@ def test_init_inputs(monkeypatch: Any) -> None: d2 = datetime(2023, 1, 2, 0, 0, 0, 0, ZoneInfo(tz)) assert s.to_list() == [d1, d2] assert Datetime == s.dtype - assert s.dtype.time_unit == tu # type: ignore[union-attr] - assert s.dtype.time_zone == tz # type: ignore[union-attr] + assert s.dtype.time_unit == tu # type: ignore[attr-defined] + assert s.dtype.time_zone == tz # type: ignore[attr-defined] # datetime64: check timeunit (auto-detect, implicit/explicit) and NaT d64 = pd.date_range(date(2021, 8, 1), date(2021, 8, 3)).values @@ -165,10 +165,10 @@ def test_init_inputs(monkeypatch: Any) -> None: s = pl.Series("dates", d64, dtype) assert s.to_list() == expected assert Datetime == s.dtype - assert s.dtype.time_unit == "ns" # type: ignore[union-attr] + assert s.dtype.time_unit == "ns" # type: ignore[attr-defined] s = pl.Series(values=d64.astype("