Skip to content

Commit

Permalink
feat: make data parameter of Table and Column required (#978)
Browse files Browse the repository at this point in the history
### Summary of Changes

The `data` parameter of `Table` and `Column` must now be specified.
Being able to omit it was nice for tests, but also means that users
might forget to specify their data. Since tables and columns are
immutable, creating an empty table is rarely useful in practice.

---------

Co-authored-by: megalinter-bot <[email protected]>
  • Loading branch information
lars-reimann and megalinter-bot authored Jan 8, 2025
1 parent 7a7fa77 commit 29fdefa
Show file tree
Hide file tree
Showing 78 changed files with 197 additions and 150 deletions.
5 changes: 1 addition & 4 deletions src/safeds/data/tabular/containers/_column.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,12 +68,9 @@ def _from_polars_series(data: Series) -> Column:
# Dunder methods
# ------------------------------------------------------------------------------------------------------------------

def __init__(self, name: str, data: Sequence[T_co] | None = None) -> None:
def __init__(self, name: str, data: Sequence[T_co]) -> None:
import polars as pl

if data is None:
data = []

self._series: pl.Series = pl.Series(name, data, strict=False)

def __contains__(self, item: Any) -> bool:
Expand Down
11 changes: 4 additions & 7 deletions src/safeds/data/tabular/containers/_table.py
Original file line number Diff line number Diff line change
Expand Up @@ -243,7 +243,7 @@ def from_json_file(path: str | Path) -> Table:
return Table._from_polars_data_frame(pl.read_json(path))
except (pl.exceptions.PanicException, pl.exceptions.ComputeError):
# Can happen if the JSON file is empty (https://github.com/pola-rs/polars/issues/10234)
return Table()
return Table({})

@staticmethod
def from_parquet_file(path: str | Path) -> Table:
Expand Down Expand Up @@ -304,12 +304,9 @@ def _from_polars_lazy_frame(data: pl.LazyFrame) -> Table:
# Dunder methods
# ------------------------------------------------------------------------------------------------------------------

def __init__(self, data: Mapping[str, Sequence[Any]] | None = None) -> None:
def __init__(self, data: Mapping[str, Sequence[Any]]) -> None:
import polars as pl

if data is None:
data = {}

# Validation
expected_length: int | None = None
for column_values in data.values():
Expand Down Expand Up @@ -487,7 +484,7 @@ def add_columns(
except DuplicateError:
# polars already validates this, so we don't need to do it again upfront (performance)
_check_columns_dont_exist(self, [column.name for column in columns])
return Table() # pragma: no cover
return Table({}) # pragma: no cover

def add_computed_column(
self,
Expand Down Expand Up @@ -1837,7 +1834,7 @@ def summarize_statistics(self) -> Table:
+----------------------+---------+
"""
if self.column_count == 0:
return Table()
return Table({})

head = self.get_column(self.column_names[0]).summarize_statistics()
tail = [self.get_column(name).summarize_statistics().get_column(name)._series for name in self.column_names[1:]]
Expand Down
2 changes: 1 addition & 1 deletion tests/safeds/data/image/containers/test_image.py
Original file line number Diff line number Diff line change
Expand Up @@ -307,7 +307,7 @@ def test_should_not_be_equal(self, device: Device) -> None:
def test_should_be_not_implemented(self, resource_path: str, device: Device) -> None:
configure_test_with_device(device)
image = Image.from_file(resolve_resource_path(resource_path))
other = Table()
other = Table({})
assert (image.__eq__(other)) is NotImplemented


Expand Down
8 changes: 4 additions & 4 deletions tests/safeds/data/image/containers/test_image_list.py
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,7 @@ def test_from_files(self, resource_path1: str, resource_path2: str, resource_pat
assert image_list != image_list_unequal_1
assert image_list != image_list_unequal_2
assert image_list != image_list_unequal_3
assert image_list.__eq__(Table()) is NotImplemented
assert image_list.__eq__(Table({})) is NotImplemented

# Test hash
assert hash(image_list) == hash(image_list_clone)
Expand Down Expand Up @@ -746,7 +746,7 @@ def test_should_save_images_in_files(self, resource_path: list[str], device: Dev

with tempfile.TemporaryDirectory() as tmp_parent_dir:
tmp_files = [
tempfile.NamedTemporaryFile(suffix=".jpg", prefix=str(i), dir=tmp_parent_dir)
tempfile.NamedTemporaryFile(suffix=".jpg", prefix=str(i), dir=tmp_parent_dir) # noqa: SIM115
for i in range(len(image_list))
]
for tmp_file in tmp_files:
Expand Down Expand Up @@ -837,7 +837,7 @@ def test_should_save_images_in_files(self, resource_path: list[str], device: Dev

with tempfile.TemporaryDirectory() as tmp_parent_dir:
tmp_files = [
tempfile.NamedTemporaryFile(suffix=".png", prefix=str(i), dir=tmp_parent_dir)
tempfile.NamedTemporaryFile(suffix=".png", prefix=str(i), dir=tmp_parent_dir) # noqa: SIM115
for i in range(len(image_list))
]
for tmp_file in tmp_files:
Expand Down Expand Up @@ -1540,7 +1540,7 @@ def test_repr_png(self, device: Device) -> None:
def test_eq(self, device: Device) -> None:
configure_test_with_device(device)
assert _EmptyImageList() == _EmptyImageList()
assert _EmptyImageList().__eq__(Table()) is NotImplemented
assert _EmptyImageList().__eq__(Table({})) is NotImplemented

def test_hash(self, device: Device) -> None:
configure_test_with_device(device)
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from typing import Any

import pytest

from safeds.data.labeled.containers import TabularDataset
from safeds.data.tabular.containers import Table

Expand Down Expand Up @@ -66,7 +67,7 @@ def test_should_return_whether_two_tabular_datasets_are_equal(
("table", "other"),
[
(TabularDataset({"a": [1, 2, 3], "b": [4, 5, 6]}, "b"), None),
(TabularDataset({"a": [1, 2, 3], "b": [4, 5, 6]}, "b"), Table()),
(TabularDataset({"a": [1, 2, 3], "b": [4, 5, 6]}, "b"), Table({})),
],
ids=[
"TabularDataset vs. None",
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import pytest

from safeds.data.labeled.containers import TabularDataset
from safeds.data.tabular.containers import Table

Expand All @@ -16,7 +17,7 @@
},
target_name="T",
),
Table(),
Table({}),
),
(
TabularDataset(
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from typing import Any

import pytest

from safeds.data.labeled.containers import TimeSeriesDataset
from safeds.data.tabular.containers import Table

Expand Down Expand Up @@ -101,7 +102,7 @@ def test_should_return_whether_two_tabular_datasets_are_equal(
),
(
TimeSeriesDataset({"a": [1, 2, 3], "b": [4, 5, 6], "c": [0, 0, 0]}, "b", window_size=1),
Table(),
Table({}),
),
],
ids=[
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import pytest

from safeds.data.labeled.containers import TimeSeriesDataset
from safeds.data.tabular.containers import Table

Expand All @@ -17,7 +18,7 @@
target_name="T",
window_size=1,
),
Table(),
Table({}),
),
(
TimeSeriesDataset(
Expand Down
18 changes: 9 additions & 9 deletions tests/safeds/data/labeled/containers/test_image_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,9 @@

import pytest
import torch
from torch import Tensor
from torch.types import Device

from safeds._config import _get_device
from safeds.data.image.containers import ImageList
from safeds.data.image.containers._empty_image_list import _EmptyImageList
Expand All @@ -20,9 +23,6 @@
OutputLengthMismatchError,
TransformerNotFittedError,
)
from torch import Tensor
from torch.types import Device

from tests.helpers import (
configure_test_with_device,
get_devices,
Expand All @@ -43,11 +43,11 @@ class TestImageDatasetInit:
[
(
_MultiSizeImageList(),
Table(),
Table({}),
ValueError,
r"The given input ImageList contains images of different sizes.",
),
(_EmptyImageList(), Table(), ValueError, r"The given input ImageList contains no images."),
(_EmptyImageList(), Table({}), ValueError, r"The given input ImageList contains no images."),
(
ImageList.from_files(resolve_resource_path([plane_png_path, plane_png_path])),
ImageList.from_files(resolve_resource_path([plane_png_path, white_square_png_path])),
Expand All @@ -62,7 +62,7 @@ class TestImageDatasetInit:
),
(
ImageList.from_files(resolve_resource_path(plane_png_path)),
Table(),
Table({}),
OutputLengthMismatchError,
r"The length of the output container differs",
),
Expand Down Expand Up @@ -210,7 +210,7 @@ def test_should_not_be_equal(
def test_should_be_not_implemented(self, device: Device) -> None:
configure_test_with_device(device)
image_dataset = ImageDataset(ImageList.from_files(resolve_resource_path(plane_png_path)), Column("images", [1]))
other = Table()
other = Table({})
assert image_dataset.__eq__(other) is NotImplemented


Expand Down Expand Up @@ -510,7 +510,7 @@ def test_should_raise_from_tensor(self, tensor: Tensor, error_msg: str, device:

def test_eq_should_be_not_implemented(self, device: Device) -> None:
configure_test_with_device(device)
assert _TableAsTensor(Table()).__eq__(Table()) is NotImplemented
assert _TableAsTensor(Table({})).__eq__(Table({})) is NotImplemented


@pytest.mark.parametrize("device", get_devices(), ids=get_devices_ids())
Expand Down Expand Up @@ -554,7 +554,7 @@ def test_should_raise_from_tensor(

def test_eq_should_be_not_implemented(self, device: Device) -> None:
configure_test_with_device(device)
assert _ColumnAsTensor(Column("column", [1])).__eq__(Table()) is NotImplemented
assert _ColumnAsTensor(Column("column", [1])).__eq__(Table({})) is NotImplemented

def test_should_not_warn(self, device: Device) -> None:
configure_test_with_device(device)
Expand Down
3 changes: 2 additions & 1 deletion tests/safeds/data/tabular/containers/_cell/test_equals.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

import polars as pl
import pytest

from safeds.data.tabular.containers import Cell, Table
from safeds.data.tabular.containers._lazy_cell import _LazyCell

Expand Down Expand Up @@ -30,7 +31,7 @@ def test_should_return_true_if_objects_are_identical() -> None:
("cell", "other"),
[
(_LazyCell(pl.col("a")), None),
(_LazyCell(pl.col("a")), Table()),
(_LazyCell(pl.col("a")), Table({})),
],
ids=[
"Cell vs. None",
Expand Down
11 changes: 6 additions & 5 deletions tests/safeds/data/tabular/containers/_column/test_eq.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,16 @@
from typing import Any

import pytest

from safeds.data.tabular.containers import Column, Table


@pytest.mark.parametrize(
("column1", "column2", "expected"),
[
(Column("a"), Column("a"), True),
(Column("a", []), Column("a", []), True),
(Column("a", [1, 2, 3]), Column("a", [1, 2, 3]), True),
(Column("a"), Column("b"), False),
(Column("a", []), Column("b", []), False),
(Column("a", [1, 2, 3]), Column("a", [1, 2, 4]), False),
(Column("a", [1, 2, 3]), Column("a", ["1", "2", "3"]), False),
],
Expand All @@ -28,7 +29,7 @@ def test_should_return_whether_two_columns_are_equal(column1: Column, column2: C
@pytest.mark.parametrize(
"column",
[
Column("a"),
Column("a", []),
Column("a", [1, 2, 3]),
],
ids=[
Expand All @@ -43,8 +44,8 @@ def test_should_return_true_if_objects_are_identical(column: Column) -> None:
@pytest.mark.parametrize(
("column", "other"),
[
(Column("a"), None),
(Column("a", [1, 2, 3]), Table()),
(Column("a", []), None),
(Column("a", [1, 2, 3]), Table({})),
],
ids=[
"Column vs. None",
Expand Down
7 changes: 4 additions & 3 deletions tests/safeds/data/tabular/containers/_column/test_hash.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,12 @@
import pytest

from safeds.data.tabular.containers import Column


@pytest.mark.parametrize(
("column", "expected"),
[
(Column("a"), 1581717131331298536),
(Column("a", []), 1581717131331298536),
(Column("a", [1, 2, 3]), 239695622656180157),
],
ids=[
Expand All @@ -20,9 +21,9 @@ def test_should_be_deterministic(column: Column, expected: int) -> None:
@pytest.mark.parametrize(
("column1", "column2", "expected"),
[
(Column("a"), Column("a"), True),
(Column("a", []), Column("a", []), True),
(Column("a", [1, 2, 3]), Column("a", [1, 2, 3]), True),
(Column("a"), Column("b"), False),
(Column("a", []), Column("b", []), False),
(Column("a", [1, 2, 3]), Column("a", [1, 2]), False),
(Column("a", [1, 2, 3]), Column("a", ["1", "2", "3"]), False),
# We don't use the column values in the hash calculation
Expand Down
5 changes: 1 addition & 4 deletions tests/safeds/data/tabular/containers/_column/test_init.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from typing import Any

import pytest

from safeds.data.tabular.containers import Column


Expand All @@ -12,14 +13,10 @@ def test_should_store_the_name() -> None:
@pytest.mark.parametrize(
("column", "expected"),
[
(Column("a"), []),
(Column("a", None), []),
(Column("a", []), []),
(Column("a", [1, 2, 3]), [1, 2, 3]),
],
ids=[
"none (implicit)",
"none (explicit)",
"empty",
"non-empty",
],
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
@pytest.mark.parametrize(
("table", "expected"),
[
(Table(), 0),
(Table({}), 0),
(Table({"A": [1, 2, 3]}), 1),
],
ids=[
Expand Down
2 changes: 1 addition & 1 deletion tests/safeds/data/tabular/containers/_row/test_get_cell.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ def test_should_get_correct_item(table_data: dict, column_name: str, target: int
@pytest.mark.parametrize(
("table", "column_name"),
[
(Table(), "A"),
(Table({}), "A"),
(Table({"A": ["a", "aa", "aaa"]}), "B"),
(Table({"A": ["b", "aa", "aaa"], "C": ["b", "aa", "aaa"]}), "B"),
],
Expand Down
2 changes: 1 addition & 1 deletion tests/safeds/data/tabular/containers/_row/test_getitem.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ def test_should_get_correct_item(table_data: dict, column_name: str, target: int
@pytest.mark.parametrize(
("table", "column_name"),
[
(Table(), "A"),
(Table({}), "A"),
(Table({"A": ["a", "aa", "aaa"]}), "B"),
(Table({"A": ["b", "aa", "aaa"], "C": ["b", "aa", "aaa"]}), "B"),
],
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
@pytest.mark.parametrize(
("table", "column_name", "expected"),
[
(Table(), "A", False),
(Table({}), "A", False),
(Table({"A": ["a", "aa", "aaa"]}), "A", True),
(Table({"A": ["a", "aa", "aaa"]}), "B", False),
],
Expand Down
4 changes: 2 additions & 2 deletions tests/safeds/data/tabular/containers/_row/test_hash.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,8 @@
@pytest.mark.parametrize(
("table1", "table2", "expected"),
[
(Table(), Table({"A": ["a", "aa", "aaa"]}), False),
(Table(), Table(), True),
(Table({}), Table({"A": ["a", "aa", "aaa"]}), False),
(Table({}), Table({}), True),
(Table({"A": ["a", "aa", "aaa"]}), Table({"A": ["a", "aa", "aaa"]}), True),
(Table({"A": ["a", "aa", "aaa"]}), Table({"B": ["a", "aa", "aaa"]}), False),
],
Expand Down
2 changes: 1 addition & 1 deletion tests/safeds/data/tabular/containers/_row/test_iter.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
@pytest.mark.parametrize(
("table", "expected"),
[
(Table(), []),
(Table({}), []),
(Table({"A": ["a", "aa", "aaa"]}), ["A"]),
(Table({"A": ["a", "aa", "aaa"], "B": ["b", "bb", "bbb"], "C": ["c", "cc", "ccc"]}), ["A", "B", "C"]),
],
Expand Down
Loading

0 comments on commit 29fdefa

Please sign in to comment.