pola-rs · stinodego · Oct 21, 2023 · Oct 20, 2023 · Oct 20, 2023 · Oct 20, 2023
@@ -97,7 +97,10 @@
         pl.Series("Array_1", [[1, 3], [2, 5]]),
         pl.Series("Array_2", [[1, 7, 3], [8, 1, 0]]),
     ],
-    schema={"Array_1": pl.Array(2, pl.Int64), "Array_2": pl.Array(3, pl.Int64)},
+    schema={
+        "Array_1": pl.Array(inner=pl.Int64, width=2),
+        "Array_2": pl.Array(inner=pl.Int64, width=3),
+    },
 )
 print(array_df)
 # --8<-- [end:array_df]

@@ -456,18 +456,18 @@ class Unknown(DataType):
 
 
 class List(NestedType):
-    """Nested list/array type with variable length of inner lists."""
+    """Variable length list type."""
 
     inner: PolarsDataType | None = None
 
     def __init__(self, inner: PolarsDataType | PythonDataType):
         """
-        Nested list/array type with variable length of inner lists.
+        Variable length list type.
 
         Parameters
         ----------
         inner
-            The `DataType` of values within the list
+            The ``DataType`` of the values within each list.
 
         Examples
         --------
@@ -518,26 +518,31 @@ def __repr__(self) -> str:
 
 
 class Array(NestedType):
-    """Nested list/array type with fixed length of inner arrays."""
+    """Fixed length list type."""
 
     inner: PolarsDataType | None = None
     width: int
 
-    def __init__(self, width: int, inner: PolarsDataType | PythonDataType = Null):
+    def __init__(  # noqa: D417
+        self,
+        *args: Any,
+        width: int | None = None,
+        inner: PolarsDataType | PythonDataType | None = None,
+    ):
         """
-        Nested list/array type with fixed length of inner arrays.
+        Fixed length list type.
 
         Parameters
         ----------
         width
-            The fixed size length of the inner arrays.
+            The length of the arrays.
         inner
-            The `DataType` of values within the inner arrays
+            The ``DataType`` of the values within each array.
 
         Examples
         --------
         >>> s = pl.Series(
-        ...     "a", [[1, 2], [4, 3]], dtype=pl.Array(width=2, inner=pl.Int64)
+        ...     "a", [[1, 2], [4, 3]], dtype=pl.Array(inner=pl.Int64, width=2)
         ... )
         >>> s
         shape: (2,)
@@ -548,6 +553,32 @@ def __init__(self, width: int, inner: PolarsDataType | PythonDataType = Null):
         ]
 
         """
+        from polars.utils.deprecation import issue_deprecation_warning
+
+        if args:
+            # TODO: When removing this deprecation, update the `to_object`
+            # implementation in py-polars/src/conversion.rs to use `call1` instead of
+            # `call`
+            issue_deprecation_warning(
+                "Parameters `inner` and `width` will change positions in the next breaking release."
+                " Use keyword arguments to keep current behavior and silence this warning.",
+                version="0.19.11",
+            )
+            if len(args) == 1:
+                width = args[0]
+            else:
+                width, inner = args[:2]
+        if width is None:
+            raise TypeError("`width` must be specified when initializing an `Array`")
+
+        if inner is None:
+            issue_deprecation_warning(
+                "The default value for the `inner` parameter of `Array` will be removed in the next breaking release."
+                " Pass `inner=pl.Null`to keep current behavior and silence this warning.",
+                version="0.19.11",
+            )
+            inner = Null
+
         self.width = width
         self.inner = polars.datatypes.py_type_to_dtype(inner)
 
@@ -570,11 +601,11 @@ def __eq__(self, other: PolarsDataType) -> bool:  # type: ignore[override]
             return False
 
     def __hash__(self) -> int:
-        return hash((self.__class__, self.inner))
+        return hash((self.__class__, self.inner, self.width))
 
     def __repr__(self) -> str:
         class_name = self.__class__.__name__
-        return f"{class_name}({self.inner!r})"
+        return f"{class_name}({self.inner!r}, {self.width})"
 
 
 class Field:

@@ -3,6 +3,7 @@
 from typing import TYPE_CHECKING
 
 from polars.datatypes import (
+    Array,
     DataTypeGroup,
     Date,
     Datetime,
@@ -75,7 +76,7 @@
     FLOAT_DTYPES | INTEGER_DTYPES | frozenset([Decimal])
 )
 
-NESTED_DTYPES: frozenset[PolarsDataType] = DataTypeGroup([List, Struct])
+NESTED_DTYPES: frozenset[PolarsDataType] = DataTypeGroup([List, Struct, Array])
 
 # number of rows to scan by default when inferring datatypes
 N_INFER_DEFAULT = 100
@@ -21,6 +21,7 @@
 )
 
 from polars.datatypes import (
+    Array,
     Binary,
     Boolean,
     Categorical,
@@ -203,7 +204,7 @@ def unpack_dtypes(
 
     unpacked: set[PolarsDataType] = set()
     for tp in dtypes:
-        if isinstance(tp, List):
+        if isinstance(tp, (List, Array)):
             if include_compound:
                 unpacked.add(tp)
             unpacked.update(unpack_dtypes(tp.inner, include_compound=include_compound))

@@ -24,7 +24,7 @@ def min(self) -> Expr:
         --------
         >>> df = pl.DataFrame(
         ...     data={"a": [[1, 2], [4, 3]]},
-        ...     schema={"a": pl.Array(width=2, inner=pl.Int64)},
+        ...     schema={"a": pl.Array(inner=pl.Int64, width=2)},
         ... )
         >>> df.select(pl.col("a").arr.min())
         shape: (2, 1)
@@ -48,7 +48,7 @@ def max(self) -> Expr:
         --------
         >>> df = pl.DataFrame(
         ...     data={"a": [[1, 2], [4, 3]]},
-        ...     schema={"a": pl.Array(width=2, inner=pl.Int64)},
+        ...     schema={"a": pl.Array(inner=pl.Int64, width=2)},
         ... )
         >>> df.select(pl.col("a").arr.max())
         shape: (2, 1)
@@ -72,7 +72,7 @@ def sum(self) -> Expr:
         --------
         >>> df = pl.DataFrame(
         ...     data={"a": [[1, 2], [4, 3]]},
-        ...     schema={"a": pl.Array(width=2, inner=pl.Int64)},
+        ...     schema={"a": pl.Array(inner=pl.Int64, width=2)},
         ... )
         >>> df.select(pl.col("a").arr.sum())
         shape: (2, 1)
@@ -103,7 +103,7 @@ def unique(self, *, maintain_order: bool = False) -> Expr:
         ...     {
         ...         "a": [[1, 1, 2]],
         ...     },
-        ...     schema_overrides={"a": pl.Array(width=3, inner=pl.Int64)},
+        ...     schema_overrides={"a": pl.Array(inner=pl.Int64, width=3)},
         ... )
         >>> df.select(pl.col("a").arr.unique())
         shape: (1, 1)

@@ -25,7 +25,7 @@ def min(self) -> Series:
         Examples
         --------
         >>> s = pl.Series(
-        ...     "a", [[1, 2], [4, 3]], dtype=pl.Array(width=2, inner=pl.Int64)
+        ...     "a", [[1, 2], [4, 3]], dtype=pl.Array(inner=pl.Int64, width=2)
         ... )
         >>> s.arr.min()
         shape: (2,)
@@ -44,7 +44,7 @@ def max(self) -> Series:
         Examples
         --------
         >>> s = pl.Series(
-        ...     "a", [[1, 2], [4, 3]], dtype=pl.Array(width=2, inner=pl.Int64)
+        ...     "a", [[1, 2], [4, 3]], dtype=pl.Array(inner=pl.Int64, width=2)
         ... )
         >>> s.arr.max()
         shape: (2,)
@@ -64,7 +64,7 @@ def sum(self) -> Series:
         --------
         >>> df = pl.DataFrame(
         ...     data={"a": [[1, 2], [4, 3]]},
-        ...     schema={"a": pl.Array(width=2, inner=pl.Int64)},
+        ...     schema={"a": pl.Array(inner=pl.Int64, width=2)},
         ... )
         >>> df.select(pl.col("a").arr.sum())
         shape: (2, 1)
@@ -94,7 +94,7 @@ def unique(self, *, maintain_order: bool = False) -> Series:
         ...     {
         ...         "a": [[1, 1, 2]],
         ...     },
-        ...     schema_overrides={"a": pl.Array(width=3, inner=pl.Int64)},
+        ...     schema_overrides={"a": pl.Array(inner=pl.Int64, width=3)},
         ... )
         >>> df.select(pl.col("a").arr.unique())
         shape: (1, 1)

@@ -312,7 +312,10 @@ impl ToPyObject for Wrap<DataType> {
             DataType::Array(inner, size) => {
                 let inner = Wrap(*inner.clone()).to_object(py);
                 let list_class = pl.getattr(intern!(py, "Array")).unwrap();
-                list_class.call1((*size, inner)).unwrap().into()
+                let kwargs = PyDict::new(py);
+                kwargs.set_item("inner", inner).unwrap();
+                kwargs.set_item("width", size).unwrap();
+                list_class.call((), Some(kwargs)).unwrap().into()
             },
             DataType::List(inner) => {
                 let inner = Wrap(*inner.clone()).to_object(py);

@@ -9,7 +9,7 @@ def test_cast_list_array() -> None:
     payload = [[1, 2, 3], [4, 2, 3]]
     s = pl.Series(payload)
 
-    dtype = pl.Array(width=3, inner=pl.Int64)
+    dtype = pl.Array(inner=pl.Int64, width=3)
     out = s.cast(dtype)
     assert out.dtype == dtype
     assert out.to_list() == payload
@@ -20,19 +20,19 @@ def test_cast_list_array() -> None:
         pl.ComputeError,
         match=r"incompatible offsets in source list",
     ):
-        s.cast(pl.Array(width=2, inner=pl.Int64))
+        s.cast(pl.Array(inner=pl.Int64, width=2))
 
 
 def test_array_construction() -> None:
     payload = [[1, 2, 3], [4, 2, 3]]
 
-    dtype = pl.Array(width=3, inner=pl.Int64)
+    dtype = pl.Array(inner=pl.Int64, width=3)
     s = pl.Series(payload, dtype=dtype)
     assert s.dtype == dtype
     assert s.to_list() == payload
 
     # inner type
-    dtype = pl.Array(2, pl.UInt8)
+    dtype = pl.Array(inner=pl.UInt8, width=2)
     payload = [[1, 2], [3, 4]]
     s = pl.Series(payload, dtype=dtype)
     assert s.dtype == dtype
@@ -41,13 +41,13 @@ def test_array_construction() -> None:
     # create using schema
     df = pl.DataFrame(
         schema={
-            "a": pl.Array(width=3, inner=pl.Float32),
-            "b": pl.Array(width=5, inner=pl.Datetime("ms")),
+            "a": pl.Array(inner=pl.Float32, width=3),
+            "b": pl.Array(inner=pl.Datetime("ms"), width=5),
         }
     )
     assert df.dtypes == [
-        pl.Array(width=3, inner=pl.Float32),
-        pl.Array(width=5, inner=pl.Datetime("ms")),
+        pl.Array(inner=pl.Float32, width=3),
+        pl.Array(inner=pl.Datetime("ms"), width=5),
     ]
     assert df.rows() == []
 
@@ -56,7 +56,9 @@ def test_array_in_group_by() -> None:
     df = pl.DataFrame(
         [
             pl.Series("id", [1, 2]),
-            pl.Series("list", [[1, 2], [5, 5]], dtype=pl.Array(2, pl.UInt8)),
+            pl.Series(
+                "list", [[1, 2], [5, 5]], dtype=pl.Array(inner=pl.UInt8, width=2)
+            ),
         ]
     )
 
@@ -83,7 +85,7 @@ def test_array_in_group_by() -> None:
 def test_array_invalid_operation() -> None:
     s = pl.Series(
         [[1, 2], [8, 9]],
-        dtype=pl.Array(width=2, inner=pl.Int32),
+        dtype=pl.Array(inner=pl.Int32, width=2),
     )
     with pytest.raises(
         InvalidOperationError,
@@ -94,11 +96,22 @@ def test_array_invalid_operation() -> None:
 
 def test_array_concat() -> None:
     a_df = pl.DataFrame({"a": [[0, 1], [1, 0]]}).select(
-        pl.col("a").cast(pl.Array(width=2, inner=pl.Int32))
+        pl.col("a").cast(pl.Array(inner=pl.Int32, width=2))
     )
     b_df = pl.DataFrame({"a": [[1, 1], [0, 0]]}).select(
-        pl.col("a").cast(pl.Array(width=2, inner=pl.Int32))
+        pl.col("a").cast(pl.Array(inner=pl.Int32, width=2))
     )
     assert pl.concat([a_df, b_df]).to_dict(False) == {
         "a": [[0, 1], [1, 0], [1, 1], [0, 0]]
     }
+
+
+def test_array_init_deprecation() -> None:
+    with pytest.deprecated_call():
+        pl.Array(2)
+    with pytest.deprecated_call():
+        pl.Array(2, pl.Utf8)
+    with pytest.deprecated_call():
+        pl.Array(2, inner=pl.Utf8)
+    with pytest.deprecated_call():
+        pl.Array(width=2)
@@ -645,8 +645,8 @@ def test_empty_struct() -> None:
         pl.List,
         pl.List(pl.Null),
         pl.List(pl.Utf8),
-        pl.Array(32),
-        pl.Array(16, inner=pl.UInt8),
+        pl.Array(inner=pl.Null, width=32),
+        pl.Array(inner=pl.UInt8, width=16),
         pl.Struct,
         pl.Struct([pl.Field("", pl.Null)]),
         pl.Struct([pl.Field("x", pl.UInt32), pl.Field("y", pl.Float64)]),

@@ -5,19 +5,19 @@
 
 
 def test_arr_min_max() -> None:
-    s = pl.Series("a", [[1, 2], [4, 3]], dtype=pl.Array(width=2, inner=pl.Int64))
+    s = pl.Series("a", [[1, 2], [4, 3]], dtype=pl.Array(inner=pl.Int64, width=2))
     assert s.arr.max().to_list() == [2, 4]
     assert s.arr.min().to_list() == [1, 3]
 
 
 def test_arr_sum() -> None:
-    s = pl.Series("a", [[1, 2], [4, 3]], dtype=pl.Array(width=2, inner=pl.Int64))
+    s = pl.Series("a", [[1, 2], [4, 3]], dtype=pl.Array(inner=pl.Int64, width=2))
     assert s.arr.sum().to_list() == [3, 7]
 
 
 def test_arr_unique() -> None:
     df = pl.DataFrame(
-        {"a": pl.Series("a", [[1, 1], [4, 3]], dtype=pl.Array(width=2, inner=pl.Int64))}
+        {"a": pl.Series("a", [[1, 1], [4, 3]], dtype=pl.Array(inner=pl.Int64, width=2))}
     )
 
     out = df.select(pl.col("a").arr.unique(maintain_order=True))
@@ -26,5 +26,5 @@ def test_arr_unique() -> None:
 
 
 def test_array_to_numpy() -> None:
-    s = pl.Series([[1, 2], [3, 4], [5, 6]], dtype=pl.Array(width=2, inner=pl.Int64))
+    s = pl.Series([[1, 2], [3, 4], [5, 6]], dtype=pl.Array(inner=pl.Int64, width=2))
     assert (s.to_numpy() == np.array([[1, 2], [3, 4], [5, 6]])).all()
@@ -309,7 +309,7 @@ def test_explode_inner_null() -> None:
 def test_explode_array() -> None:
     df = pl.LazyFrame(
         {"a": [[1, 2], [2, 3]], "b": [1, 2]},
-        schema_overrides={"a": pl.Array(2, inner=pl.Int64)},
+        schema_overrides={"a": pl.Array(inner=pl.Int64, width=2)},
     )
     expected = pl.DataFrame({"a": [1, 2, 2, 3], "b": [1, 1, 2, 2]})
     for ex in ("a", ~cs.integer()):