feat: make data parameter of Table and Column required (#978)

### Summary of Changes The `data` parameter of `Table` and `Column` must now be specified. Being able to omit it was nice for tests, but also means that users might forget to specify their data. Since tables and columns are immutable, creating an empty table is rarely useful in practice. --------- Co-authored-by: megalinter-bot <[email protected]>
Safe-DS · Jan 8, 2025 · 29fdefa · 29fdefa
1 parent 7a7fa77
commit 29fdefa
Show file tree

Hide file tree

Showing 78 changed files with 197 additions and 150 deletions.
diff --git a/src/safeds/data/tabular/containers/_column.py b/src/safeds/data/tabular/containers/_column.py
@@ -68,12 +68,9 @@ def _from_polars_series(data: Series) -> Column:
     # Dunder methods
     # ------------------------------------------------------------------------------------------------------------------
 
-    def __init__(self, name: str, data: Sequence[T_co] | None = None) -> None:
+    def __init__(self, name: str, data: Sequence[T_co]) -> None:
         import polars as pl
 
-        if data is None:
-            data = []
-
         self._series: pl.Series = pl.Series(name, data, strict=False)
 
     def __contains__(self, item: Any) -> bool:

diff --git a/src/safeds/data/tabular/containers/_table.py b/src/safeds/data/tabular/containers/_table.py
@@ -243,7 +243,7 @@ def from_json_file(path: str | Path) -> Table:
             return Table._from_polars_data_frame(pl.read_json(path))
         except (pl.exceptions.PanicException, pl.exceptions.ComputeError):
             # Can happen if the JSON file is empty (https://github.com/pola-rs/polars/issues/10234)
-            return Table()
+            return Table({})
 
     @staticmethod
     def from_parquet_file(path: str | Path) -> Table:
@@ -304,12 +304,9 @@ def _from_polars_lazy_frame(data: pl.LazyFrame) -> Table:
     # Dunder methods
     # ------------------------------------------------------------------------------------------------------------------
 
-    def __init__(self, data: Mapping[str, Sequence[Any]] | None = None) -> None:
+    def __init__(self, data: Mapping[str, Sequence[Any]]) -> None:
         import polars as pl
 
-        if data is None:
-            data = {}
-
         # Validation
         expected_length: int | None = None
         for column_values in data.values():
@@ -487,7 +484,7 @@ def add_columns(
         except DuplicateError:
             # polars already validates this, so we don't need to do it again upfront (performance)
             _check_columns_dont_exist(self, [column.name for column in columns])
-            return Table()  # pragma: no cover
+            return Table({})  # pragma: no cover
 
     def add_computed_column(
         self,
@@ -1837,7 +1834,7 @@ def summarize_statistics(self) -> Table:
         +----------------------+---------+
         """
         if self.column_count == 0:
-            return Table()
+            return Table({})
 
         head = self.get_column(self.column_names[0]).summarize_statistics()
         tail = [self.get_column(name).summarize_statistics().get_column(name)._series for name in self.column_names[1:]]

diff --git a/tests/safeds/data/image/containers/test_image.py b/tests/safeds/data/image/containers/test_image.py
@@ -307,7 +307,7 @@ def test_should_not_be_equal(self, device: Device) -> None:
     def test_should_be_not_implemented(self, resource_path: str, device: Device) -> None:
         configure_test_with_device(device)
         image = Image.from_file(resolve_resource_path(resource_path))
-        other = Table()
+        other = Table({})
         assert (image.__eq__(other)) is NotImplemented
 
 

diff --git a/tests/safeds/data/image/containers/test_image_list.py b/tests/safeds/data/image/containers/test_image_list.py
@@ -121,7 +121,7 @@ def test_from_files(self, resource_path1: str, resource_path2: str, resource_pat
         assert image_list != image_list_unequal_1
         assert image_list != image_list_unequal_2
         assert image_list != image_list_unequal_3
-        assert image_list.__eq__(Table()) is NotImplemented
+        assert image_list.__eq__(Table({})) is NotImplemented
 
         # Test hash
         assert hash(image_list) == hash(image_list_clone)
@@ -746,7 +746,7 @@ def test_should_save_images_in_files(self, resource_path: list[str], device: Dev
 
         with tempfile.TemporaryDirectory() as tmp_parent_dir:
             tmp_files = [
-                tempfile.NamedTemporaryFile(suffix=".jpg", prefix=str(i), dir=tmp_parent_dir)
+                tempfile.NamedTemporaryFile(suffix=".jpg", prefix=str(i), dir=tmp_parent_dir)  # noqa: SIM115
                 for i in range(len(image_list))
             ]
             for tmp_file in tmp_files:
@@ -837,7 +837,7 @@ def test_should_save_images_in_files(self, resource_path: list[str], device: Dev
 
         with tempfile.TemporaryDirectory() as tmp_parent_dir:
             tmp_files = [
-                tempfile.NamedTemporaryFile(suffix=".png", prefix=str(i), dir=tmp_parent_dir)
+                tempfile.NamedTemporaryFile(suffix=".png", prefix=str(i), dir=tmp_parent_dir)  # noqa: SIM115
                 for i in range(len(image_list))
             ]
             for tmp_file in tmp_files:
@@ -1540,7 +1540,7 @@ def test_repr_png(self, device: Device) -> None:
     def test_eq(self, device: Device) -> None:
         configure_test_with_device(device)
         assert _EmptyImageList() == _EmptyImageList()
-        assert _EmptyImageList().__eq__(Table()) is NotImplemented
+        assert _EmptyImageList().__eq__(Table({})) is NotImplemented
 
     def test_hash(self, device: Device) -> None:
         configure_test_with_device(device)

diff --git a/tests/safeds/data/labeled/containers/_tabular_dataset/test_eq.py b/tests/safeds/data/labeled/containers/_tabular_dataset/test_eq.py
@@ -1,6 +1,7 @@
 from typing import Any
 
 import pytest
+
 from safeds.data.labeled.containers import TabularDataset
 from safeds.data.tabular.containers import Table
 
@@ -66,7 +67,7 @@ def test_should_return_whether_two_tabular_datasets_are_equal(
     ("table", "other"),
     [
         (TabularDataset({"a": [1, 2, 3], "b": [4, 5, 6]}, "b"), None),
-        (TabularDataset({"a": [1, 2, 3], "b": [4, 5, 6]}, "b"), Table()),
+        (TabularDataset({"a": [1, 2, 3], "b": [4, 5, 6]}, "b"), Table({})),
     ],
     ids=[
         "TabularDataset vs. None",

diff --git a/tests/safeds/data/labeled/containers/_tabular_dataset/test_extras.py b/tests/safeds/data/labeled/containers/_tabular_dataset/test_extras.py
@@ -1,4 +1,5 @@
 import pytest
+
 from safeds.data.labeled.containers import TabularDataset
 from safeds.data.tabular.containers import Table
 
@@ -16,7 +17,7 @@
                 },
                 target_name="T",
             ),
-            Table(),
+            Table({}),
         ),
         (
             TabularDataset(

diff --git a/tests/safeds/data/labeled/containers/_time_series_dataset/test_eq.py b/tests/safeds/data/labeled/containers/_time_series_dataset/test_eq.py
@@ -1,6 +1,7 @@
 from typing import Any
 
 import pytest
+
 from safeds.data.labeled.containers import TimeSeriesDataset
 from safeds.data.tabular.containers import Table
 
@@ -101,7 +102,7 @@ def test_should_return_whether_two_tabular_datasets_are_equal(
         ),
         (
             TimeSeriesDataset({"a": [1, 2, 3], "b": [4, 5, 6], "c": [0, 0, 0]}, "b", window_size=1),
-            Table(),
+            Table({}),
         ),
     ],
     ids=[

diff --git a/tests/safeds/data/labeled/containers/_time_series_dataset/test_extras.py b/tests/safeds/data/labeled/containers/_time_series_dataset/test_extras.py
@@ -1,4 +1,5 @@
 import pytest
+
 from safeds.data.labeled.containers import TimeSeriesDataset
 from safeds.data.tabular.containers import Table
 
@@ -17,7 +18,7 @@
                 target_name="T",
                 window_size=1,
             ),
-            Table(),
+            Table({}),
         ),
         (
             TimeSeriesDataset(

diff --git a/tests/safeds/data/labeled/containers/test_image_dataset.py b/tests/safeds/data/labeled/containers/test_image_dataset.py
@@ -5,6 +5,9 @@
 
 import pytest
 import torch
+from torch import Tensor
+from torch.types import Device
+
 from safeds._config import _get_device
 from safeds.data.image.containers import ImageList
 from safeds.data.image.containers._empty_image_list import _EmptyImageList
@@ -20,9 +23,6 @@
     OutputLengthMismatchError,
     TransformerNotFittedError,
 )
-from torch import Tensor
-from torch.types import Device
-
 from tests.helpers import (
     configure_test_with_device,
     get_devices,
@@ -43,11 +43,11 @@ class TestImageDatasetInit:
         [
             (
                 _MultiSizeImageList(),
-                Table(),
+                Table({}),
                 ValueError,
                 r"The given input ImageList contains images of different sizes.",
             ),
-            (_EmptyImageList(), Table(), ValueError, r"The given input ImageList contains no images."),
+            (_EmptyImageList(), Table({}), ValueError, r"The given input ImageList contains no images."),
             (
                 ImageList.from_files(resolve_resource_path([plane_png_path, plane_png_path])),
                 ImageList.from_files(resolve_resource_path([plane_png_path, white_square_png_path])),
@@ -62,7 +62,7 @@ class TestImageDatasetInit:
             ),
             (
                 ImageList.from_files(resolve_resource_path(plane_png_path)),
-                Table(),
+                Table({}),
                 OutputLengthMismatchError,
                 r"The length of the output container differs",
             ),
@@ -210,7 +210,7 @@ def test_should_not_be_equal(
     def test_should_be_not_implemented(self, device: Device) -> None:
         configure_test_with_device(device)
         image_dataset = ImageDataset(ImageList.from_files(resolve_resource_path(plane_png_path)), Column("images", [1]))
-        other = Table()
+        other = Table({})
         assert image_dataset.__eq__(other) is NotImplemented
 
 
@@ -510,7 +510,7 @@ def test_should_raise_from_tensor(self, tensor: Tensor, error_msg: str, device:
 
     def test_eq_should_be_not_implemented(self, device: Device) -> None:
         configure_test_with_device(device)
-        assert _TableAsTensor(Table()).__eq__(Table()) is NotImplemented
+        assert _TableAsTensor(Table({})).__eq__(Table({})) is NotImplemented
 
 
 @pytest.mark.parametrize("device", get_devices(), ids=get_devices_ids())
@@ -554,7 +554,7 @@ def test_should_raise_from_tensor(
 
     def test_eq_should_be_not_implemented(self, device: Device) -> None:
         configure_test_with_device(device)
-        assert _ColumnAsTensor(Column("column", [1])).__eq__(Table()) is NotImplemented
+        assert _ColumnAsTensor(Column("column", [1])).__eq__(Table({})) is NotImplemented
 
     def test_should_not_warn(self, device: Device) -> None:
         configure_test_with_device(device)

diff --git a/tests/safeds/data/tabular/containers/_cell/test_equals.py b/tests/safeds/data/tabular/containers/_cell/test_equals.py
@@ -2,6 +2,7 @@
 
 import polars as pl
 import pytest
+
 from safeds.data.tabular.containers import Cell, Table
 from safeds.data.tabular.containers._lazy_cell import _LazyCell
 
@@ -30,7 +31,7 @@ def test_should_return_true_if_objects_are_identical() -> None:
     ("cell", "other"),
     [
         (_LazyCell(pl.col("a")), None),
-        (_LazyCell(pl.col("a")), Table()),
+        (_LazyCell(pl.col("a")), Table({})),
     ],
     ids=[
         "Cell vs. None",

diff --git a/tests/safeds/data/tabular/containers/_column/test_eq.py b/tests/safeds/data/tabular/containers/_column/test_eq.py
@@ -1,15 +1,16 @@
 from typing import Any
 
 import pytest
+
 from safeds.data.tabular.containers import Column, Table
 
 
 @pytest.mark.parametrize(
     ("column1", "column2", "expected"),
     [
-        (Column("a"), Column("a"), True),
+        (Column("a", []), Column("a", []), True),
         (Column("a", [1, 2, 3]), Column("a", [1, 2, 3]), True),
-        (Column("a"), Column("b"), False),
+        (Column("a", []), Column("b", []), False),
         (Column("a", [1, 2, 3]), Column("a", [1, 2, 4]), False),
         (Column("a", [1, 2, 3]), Column("a", ["1", "2", "3"]), False),
     ],
@@ -28,7 +29,7 @@ def test_should_return_whether_two_columns_are_equal(column1: Column, column2: C
 @pytest.mark.parametrize(
     "column",
     [
-        Column("a"),
+        Column("a", []),
         Column("a", [1, 2, 3]),
     ],
     ids=[
@@ -43,8 +44,8 @@ def test_should_return_true_if_objects_are_identical(column: Column) -> None:
 @pytest.mark.parametrize(
     ("column", "other"),
     [
-        (Column("a"), None),
-        (Column("a", [1, 2, 3]), Table()),
+        (Column("a", []), None),
+        (Column("a", [1, 2, 3]), Table({})),
     ],
     ids=[
         "Column vs. None",

diff --git a/tests/safeds/data/tabular/containers/_column/test_hash.py b/tests/safeds/data/tabular/containers/_column/test_hash.py
@@ -1,11 +1,12 @@
 import pytest
+
 from safeds.data.tabular.containers import Column
 
 
 @pytest.mark.parametrize(
     ("column", "expected"),
     [
-        (Column("a"), 1581717131331298536),
+        (Column("a", []), 1581717131331298536),
         (Column("a", [1, 2, 3]), 239695622656180157),
     ],
     ids=[
@@ -20,9 +21,9 @@ def test_should_be_deterministic(column: Column, expected: int) -> None:
 @pytest.mark.parametrize(
     ("column1", "column2", "expected"),
     [
-        (Column("a"), Column("a"), True),
+        (Column("a", []), Column("a", []), True),
         (Column("a", [1, 2, 3]), Column("a", [1, 2, 3]), True),
-        (Column("a"), Column("b"), False),
+        (Column("a", []), Column("b", []), False),
         (Column("a", [1, 2, 3]), Column("a", [1, 2]), False),
         (Column("a", [1, 2, 3]), Column("a", ["1", "2", "3"]), False),
         # We don't use the column values in the hash calculation

diff --git a/tests/safeds/data/tabular/containers/_column/test_init.py b/tests/safeds/data/tabular/containers/_column/test_init.py
@@ -1,6 +1,7 @@
 from typing import Any
 
 import pytest
+
 from safeds.data.tabular.containers import Column
 
 
@@ -12,14 +13,10 @@ def test_should_store_the_name() -> None:
 @pytest.mark.parametrize(
     ("column", "expected"),
     [
-        (Column("a"), []),
-        (Column("a", None), []),
         (Column("a", []), []),
         (Column("a", [1, 2, 3]), [1, 2, 3]),
     ],
     ids=[
-        "none (implicit)",
-        "none (explicit)",
         "empty",
         "non-empty",
     ],

diff --git a/tests/safeds/data/tabular/containers/_row/test_column_count.py b/tests/safeds/data/tabular/containers/_row/test_column_count.py
@@ -7,7 +7,7 @@
 @pytest.mark.parametrize(
     ("table", "expected"),
     [
-        (Table(), 0),
+        (Table({}), 0),
         (Table({"A": [1, 2, 3]}), 1),
     ],
     ids=[

diff --git a/tests/safeds/data/tabular/containers/_row/test_get_cell.py b/tests/safeds/data/tabular/containers/_row/test_get_cell.py
@@ -30,7 +30,7 @@ def test_should_get_correct_item(table_data: dict, column_name: str, target: int
 @pytest.mark.parametrize(
     ("table", "column_name"),
     [
-        (Table(), "A"),
+        (Table({}), "A"),
         (Table({"A": ["a", "aa", "aaa"]}), "B"),
         (Table({"A": ["b", "aa", "aaa"], "C": ["b", "aa", "aaa"]}), "B"),
     ],

diff --git a/tests/safeds/data/tabular/containers/_row/test_getitem.py b/tests/safeds/data/tabular/containers/_row/test_getitem.py
@@ -30,7 +30,7 @@ def test_should_get_correct_item(table_data: dict, column_name: str, target: int
 @pytest.mark.parametrize(
     ("table", "column_name"),
     [
-        (Table(), "A"),
+        (Table({}), "A"),
         (Table({"A": ["a", "aa", "aaa"]}), "B"),
         (Table({"A": ["b", "aa", "aaa"], "C": ["b", "aa", "aaa"]}), "B"),
     ],

diff --git a/tests/safeds/data/tabular/containers/_row/test_has_column.py b/tests/safeds/data/tabular/containers/_row/test_has_column.py
@@ -7,7 +7,7 @@
 @pytest.mark.parametrize(
     ("table", "column_name", "expected"),
     [
-        (Table(), "A", False),
+        (Table({}), "A", False),
         (Table({"A": ["a", "aa", "aaa"]}), "A", True),
         (Table({"A": ["a", "aa", "aaa"]}), "B", False),
     ],

diff --git a/tests/safeds/data/tabular/containers/_row/test_hash.py b/tests/safeds/data/tabular/containers/_row/test_hash.py
@@ -7,8 +7,8 @@
 @pytest.mark.parametrize(
     ("table1", "table2", "expected"),
     [
-        (Table(), Table({"A": ["a", "aa", "aaa"]}), False),
-        (Table(), Table(), True),
+        (Table({}), Table({"A": ["a", "aa", "aaa"]}), False),
+        (Table({}), Table({}), True),
         (Table({"A": ["a", "aa", "aaa"]}), Table({"A": ["a", "aa", "aaa"]}), True),
         (Table({"A": ["a", "aa", "aaa"]}), Table({"B": ["a", "aa", "aaa"]}), False),
     ],

diff --git a/tests/safeds/data/tabular/containers/_row/test_iter.py b/tests/safeds/data/tabular/containers/_row/test_iter.py
@@ -7,7 +7,7 @@
 @pytest.mark.parametrize(
     ("table", "expected"),
     [
-        (Table(), []),
+        (Table({}), []),
         (Table({"A": ["a", "aa", "aaa"]}), ["A"]),
         (Table({"A": ["a", "aa", "aaa"], "B": ["b", "bb", "bbb"], "C": ["c", "cc", "ccc"]}), ["A", "B", "C"]),
     ],