From a801436b2d810bfaa2debcfa1b4e97cf91f11358 Mon Sep 17 00:00:00 2001 From: Lars Reimann Date: Mon, 13 Jan 2025 11:12:14 +0100 Subject: [PATCH] feat: finalize `Row` class --- .../containers/_lazy_vectorized_row.py | 26 +--- src/safeds/data/tabular/containers/_row.py | 74 +-------- tests/helpers/_assertions.py | 65 +++++--- .../_row/__snapshots__/test_hash.ambr | 10 ++ .../containers/_row/test_column_count.py | 20 --- .../containers/_row/test_column_names.py | 25 --- .../tabular/containers/_row/test_contains.py | 24 --- .../data/tabular/containers/_row/test_eq.py | 145 ++++++++++++++---- .../tabular/containers/_row/test_get_cell.py | 47 ++++-- .../containers/_row/test_get_column_type.py | 21 --- .../tabular/containers/_row/test_getitem.py | 51 +++--- .../containers/_row/test_has_column.py | 22 --- .../data/tabular/containers/_row/test_hash.py | 91 +++++++++-- .../data/tabular/containers/_row/test_iter.py | 23 --- .../data/tabular/containers/_row/test_len.py | 22 --- .../tabular/containers/_row/test_schema.py | 22 --- .../tabular/containers/_row/test_sizeof.py | 22 ++- .../data/tabular/containers/_table/test_eq.py | 6 +- .../tabular/typing/_column_type/test_eq.py | 6 +- .../data/tabular/typing/_schema/test_eq.py | 6 +- 20 files changed, 340 insertions(+), 388 deletions(-) create mode 100644 tests/safeds/data/tabular/containers/_row/__snapshots__/test_hash.ambr delete mode 100644 tests/safeds/data/tabular/containers/_row/test_column_count.py delete mode 100644 tests/safeds/data/tabular/containers/_row/test_column_names.py delete mode 100644 tests/safeds/data/tabular/containers/_row/test_contains.py delete mode 100644 tests/safeds/data/tabular/containers/_row/test_get_column_type.py delete mode 100644 tests/safeds/data/tabular/containers/_row/test_has_column.py delete mode 100644 tests/safeds/data/tabular/containers/_row/test_iter.py delete mode 100644 tests/safeds/data/tabular/containers/_row/test_len.py delete mode 100644 tests/safeds/data/tabular/containers/_row/test_schema.py diff --git a/src/safeds/data/tabular/containers/_lazy_vectorized_row.py b/src/safeds/data/tabular/containers/_lazy_vectorized_row.py index 1a53da529..c233a1b2a 100644 --- a/src/safeds/data/tabular/containers/_lazy_vectorized_row.py +++ b/src/safeds/data/tabular/containers/_lazy_vectorized_row.py @@ -8,8 +8,6 @@ from ._row import Row if TYPE_CHECKING: - from safeds.data.tabular.typing import ColumnType, Schema - from ._table import Table @@ -21,7 +19,7 @@ class _LazyVectorizedRow(Row): up operations on the row. Moreover, accessing a column only builds an expression that will be evaluated when needed. This is useful when later - operations remove more rows or columns, so we don't do unnecessary work upfront. + operations remove rows or columns, so we don't do unnecessary work upfront. """ # ------------------------------------------------------------------------------------------------------------------ @@ -44,22 +42,6 @@ def __hash__(self) -> int: def __sizeof__(self) -> int: return self._table.__sizeof__() - # ------------------------------------------------------------------------------------------------------------------ - # Properties - # ------------------------------------------------------------------------------------------------------------------ - - @property - def column_names(self) -> list[str]: - return self._table.column_names - - @property - def column_count(self) -> int: - return self._table.column_count - - @property - def schema(self) -> Schema: - return self._table.schema - # ------------------------------------------------------------------------------------------------------------------ # Column operations # ------------------------------------------------------------------------------------------------------------------ @@ -70,9 +52,3 @@ def get_cell(self, name: str) -> _LazyCell: _check_columns_exist(self._table, name) return _LazyCell(pl.col(name)) - - def get_column_type(self, name: str) -> ColumnType: - return self._table.get_column_type(name) - - def has_column(self, name: str) -> bool: - return self._table.has_column(name) diff --git a/src/safeds/data/tabular/containers/_row.py b/src/safeds/data/tabular/containers/_row.py index 6d43fb570..5b6d13e1c 100644 --- a/src/safeds/data/tabular/containers/_row.py +++ b/src/safeds/data/tabular/containers/_row.py @@ -1,29 +1,23 @@ from __future__ import annotations from abc import ABC, abstractmethod -from collections.abc import Iterator, Mapping -from typing import TYPE_CHECKING, Any +from typing import TYPE_CHECKING if TYPE_CHECKING: - from safeds.data.tabular.typing import ColumnType, Schema - from ._cell import Cell -class Row(ABC, Mapping[str, Any]): +class Row(ABC): """ A one-dimensional collection of named, heterogeneous values. - This class cannot be instantiated directly. It is only used for arguments of callbacks. + You only need to interact with this class in callbacks passed to higher-order functions. """ # ------------------------------------------------------------------------------------------------------------------ # Dunder methods # ------------------------------------------------------------------------------------------------------------------ - def __contains__(self, name: Any) -> bool: - return self.has_column(name) - @abstractmethod def __eq__(self, other: object) -> bool: ... @@ -33,34 +27,9 @@ def __getitem__(self, name: str) -> Cell: @abstractmethod def __hash__(self) -> int: ... - def __iter__(self) -> Iterator[Any]: - return iter(self.column_names) - - def __len__(self) -> int: - return self.column_count - @abstractmethod def __sizeof__(self) -> int: ... - # ------------------------------------------------------------------------------------------------------------------ - # Properties - # ------------------------------------------------------------------------------------------------------------------ - - @property - @abstractmethod - def column_names(self) -> list[str]: - """The names of the columns in the row.""" - - @property - @abstractmethod - def column_count(self) -> int: - """The number of columns in the row.""" - - @property - @abstractmethod - def schema(self) -> Schema: - """The schema of the row.""" - # ------------------------------------------------------------------------------------------------------------------ # Column operations # ------------------------------------------------------------------------------------------------------------------ @@ -108,40 +77,3 @@ def get_cell(self, name: str) -> Cell: | 2 | 4 | +------+------+ """ - - @abstractmethod - def get_column_type(self, name: str) -> ColumnType: - """ - Get the type of the specified column. - - Parameters - ---------- - name: - The name of the column. - - Returns - ------- - type: - The type of the column. - - Raises - ------ - ColumnNotFoundError - If the column name does not exist. - """ - - @abstractmethod - def has_column(self, name: str) -> bool: - """ - Check if the row has a column with the specified name. - - Parameters - ---------- - name: - The name of the column. - - Returns - ------- - has_column: - Whether the row has a column with the specified name. - """ diff --git a/tests/helpers/_assertions.py b/tests/helpers/_assertions.py index 76358b17e..0a0434ed5 100644 --- a/tests/helpers/_assertions.py +++ b/tests/helpers/_assertions.py @@ -4,7 +4,7 @@ from polars.testing import assert_frame_equal from safeds.data.labeled.containers import TabularDataset -from safeds.data.tabular.containers import Cell, Column, Table +from safeds.data.tabular.containers import Cell, Column, Row, Table def assert_tables_are_equal( @@ -62,44 +62,71 @@ def assert_that_tabular_datasets_are_equal(table1: TabularDataset, table2: Tabul def assert_cell_operation_works( - input_value: Any, + value: Any, transformer: Callable[[Cell], Cell], - expected_value: Any, + expected: Any, ) -> None: """ Assert that a cell operation works as expected. Parameters ---------- - input_value: + value: The value in the input cell. transformer: The transformer to apply to the cells. - expected_value: + expected: The expected value of the transformed cell. """ - column = Column("A", [input_value]) + column = Column("A", [value]) transformed_column = column.transform(transformer) - assert transformed_column == Column("A", [expected_value]), f"Expected: {expected_value}\nGot: {transformed_column}" + actual = transformed_column[0] + assert actual == expected def assert_row_operation_works( - input_value: Any, - transformer: Callable[[Table], Table], - expected_value: Any, + table: Table, + computer: Callable[[Row], Cell], + expected: list[Any], ) -> None: """ Assert that a row operation works as expected. Parameters ---------- - input_value: - The value in the input row. - transformer: - The transformer to apply to the rows. - expected_value: - The expected value of the transformed row. + table: + The input table. + computer: + The function that computes the new column. + expected: + The expected values of the computed column. + """ + column_name = _find_free_column_name(table, "computed") + + new_table = table.add_computed_column(column_name, computer) + actual = list(new_table.get_column(column_name)) + assert actual == expected + + +def _find_free_column_name(table: Table, prefix: str) -> str: """ - table = Table(input_value) - transformed_table = transformer(table) - assert transformed_table == Table(expected_value), f"Expected: {expected_value}\nGot: {transformed_table}" + Find a free column name in the table. + + Parameters + ---------- + table: + The table to search for a free column name. + prefix: + The prefix to use for the column name. + + Returns + ------- + free_name: + A free column name. + """ + column_name = prefix + + while column_name in table.column_names: + column_name += "_" + + return column_name diff --git a/tests/safeds/data/tabular/containers/_row/__snapshots__/test_hash.ambr b/tests/safeds/data/tabular/containers/_row/__snapshots__/test_hash.ambr new file mode 100644 index 000000000..f079fdc47 --- /dev/null +++ b/tests/safeds/data/tabular/containers/_row/__snapshots__/test_hash.ambr @@ -0,0 +1,10 @@ +# serializer version: 1 +# name: TestContract.test_should_return_same_hash_in_different_processes[empty] + 1789859531466043636 +# --- +# name: TestContract.test_should_return_same_hash_in_different_processes[no rows] + 585695607399955642 +# --- +# name: TestContract.test_should_return_same_hash_in_different_processes[with data] + 909875695937937648 +# --- diff --git a/tests/safeds/data/tabular/containers/_row/test_column_count.py b/tests/safeds/data/tabular/containers/_row/test_column_count.py deleted file mode 100644 index c7a65c9e0..000000000 --- a/tests/safeds/data/tabular/containers/_row/test_column_count.py +++ /dev/null @@ -1,20 +0,0 @@ -import pytest - -from safeds.data.tabular.containers import Table -from safeds.data.tabular.containers._lazy_vectorized_row import _LazyVectorizedRow - - -@pytest.mark.parametrize( - ("table", "expected"), - [ - (Table({}), 0), - (Table({"A": [1, 2, 3]}), 1), - ], - ids=[ - "empty", - "non-empty", - ], -) -def test_should_return_the_number_of_columns(table: Table, expected: int) -> None: - row = _LazyVectorizedRow(table=table) - assert row.column_count == expected diff --git a/tests/safeds/data/tabular/containers/_row/test_column_names.py b/tests/safeds/data/tabular/containers/_row/test_column_names.py deleted file mode 100644 index 09b32bbf7..000000000 --- a/tests/safeds/data/tabular/containers/_row/test_column_names.py +++ /dev/null @@ -1,25 +0,0 @@ -import pytest - -from safeds.data.tabular.containers import Table -from safeds.data.tabular.containers._lazy_vectorized_row import _LazyVectorizedRow - - -@pytest.mark.parametrize( - ("table", "expected"), - [ - (Table({}), []), - (Table({"A": [1, 2, 3]}), ["A"]), - ( - Table({"A": [1, 2, 3], "B": ["A", "A", "Bla"], "C": [True, True, False], "D": [1.0, 2.1, 4.5]}), - ["A", "B", "C", "D"], - ), - ], - ids=[ - "empty", - "one-column", - "four-column", - ], -) -def test_should_return_the_column_names(table: Table, expected: list[str]) -> None: - row = _LazyVectorizedRow(table=table) - assert row.column_names == expected diff --git a/tests/safeds/data/tabular/containers/_row/test_contains.py b/tests/safeds/data/tabular/containers/_row/test_contains.py deleted file mode 100644 index fc9cda260..000000000 --- a/tests/safeds/data/tabular/containers/_row/test_contains.py +++ /dev/null @@ -1,24 +0,0 @@ -import pytest - -from safeds.data.tabular.containers import Table -from safeds.data.tabular.containers._lazy_vectorized_row import _LazyVectorizedRow - - -@pytest.mark.parametrize( - ("table", "column_name", "expected"), - [ - (Table({}), "A", False), - (Table({"A": [1, 2, 3]}), "A", True), - (Table({"A": [1, 2, 3], "B": ["A", "A", "Bla"]}), "C", False), - (Table({"col1": [1, 2, 3], "B": ["A", "A", "Bla"]}), 1, False), - ], - ids=[ - "empty row", - "column exists", - "column does not exist", - "not a string", - ], -) -def test_should_return_whether_the_row_has_the_column(table: Table, column_name: str, expected: bool) -> None: - row = _LazyVectorizedRow(table=table) - assert (column_name in row) == expected diff --git a/tests/safeds/data/tabular/containers/_row/test_eq.py b/tests/safeds/data/tabular/containers/_row/test_eq.py index 5e57cbc56..f27c2180f 100644 --- a/tests/safeds/data/tabular/containers/_row/test_eq.py +++ b/tests/safeds/data/tabular/containers/_row/test_eq.py @@ -1,59 +1,140 @@ +from typing import Any + import pytest -from safeds.data.tabular.containers import Table +from safeds.data.tabular.containers import Column, Table from safeds.data.tabular.containers._lazy_vectorized_row import _LazyVectorizedRow @pytest.mark.parametrize( - ("table1", "table2", "expected"), + ("table_1", "table_2", "expected"), [ - (Table({"col1": []}), Table({"col1": []}), True), - (Table({"col1": [1, 2]}), Table({"col1": [1, 2]}), True), - (Table({"col1": [1, 2]}), Table({"col1": [2, 3]}), False), - (Table({"col1": [1, 2]}), Table({"col2": [1, 2]}), False), - (Table({"col1": ["1", "2"]}), Table({"col1": [1, 2]}), False), + # equal (empty) + ( + Table({}), + Table({}), + True, + ), + # equal (no rows) + ( + Table({"col1": []}), + Table({"col1": []}), + True, + ), + # equal (with data) + ( + Table({"col1": [1], "col2": [2]}), + Table({"col1": [1], "col2": [2]}), + True, + ), + # not equal (too few columns) + ( + Table({"col1": [1]}), + Table({}), + False, + ), + # not equal (too many columns) + ( + Table({}), + Table({"col1": [1]}), + False, + ), + # not equal (different column order) + ( + Table({"col1": [1], "col2": [2]}), + Table({"col2": [2], "col1": [1]}), + False, + ), + # not equal (different column names) + ( + Table({"col1": [1]}), + Table({"col2": [1]}), + False, + ), + # not equal (different types) + ( + Table({"col1": [1]}), + Table({"col1": ["1"]}), + False, + ), + # not equal (too few rows) + ( + Table({"col1": [1, 2]}), + Table({"col1": [1]}), # Needs at least one value, so the types match + False, + ), + # not equal (too many rows) + ( + Table({"col1": [1]}), # Needs at least one value, so the types match + Table({"col1": [1, 2]}), + False, + ), + # not equal (different row order) + ( + Table({"col1": [1, 2]}), + Table({"col1": [2, 1]}), + False, + ), + # not equal (different values) + ( + Table({"col1": [1, 2]}), + Table({"col1": [1, 3]}), + False, + ), ], ids=[ - "empty rows", - "equal rows", - "different values", - "different columns", - "different types", + # Equal + "equal (empty)", + "equal (no rows)", + "equal (with data)", + # Not equal because of columns + "not equal (too few columns)", + "not equal (too many columns)", + "not equal (different column order)", + "not equal (different column names)", + "not equal (different types)", + # Not equal because of rows + "not equal (too few rows)", + "not equal (too many rows)", + "not equal (different row order)", + "not equal (different values)", ], ) -def test_should_return_whether_two_rows_are_equal(table1: Table, table2: Table, expected: bool) -> None: - row1 = _LazyVectorizedRow(table=table1) - row2 = _LazyVectorizedRow(table=table2) - assert (row1.__eq__(row2)) == expected +def test_should_return_whether_objects_are_equal(table_1: Table, table_2: Table, expected: bool) -> None: + row_1 = _LazyVectorizedRow(table_1) + row_2 = _LazyVectorizedRow(table_2) + assert (row_1.__eq__(row_2)) == expected @pytest.mark.parametrize( - ("table", "expected"), + "table", [ - (Table({"col1": []}), True), - (Table({"col1": [1, 2]}), True), + Table({}), + Table({"col1": []}), + Table({"col1": [1]}), ], ids=[ - "empty table", - "filled table", + "empty", + "no rows", + "non-empty", ], ) -def test_should_return_true_if_rows_are_strict_equal(table: Table, expected: bool) -> None: - row1 = _LazyVectorizedRow(table=table) - assert (row1.__eq__(row1)) == expected +def test_should_return_true_if_objects_are_identical(table: Table) -> None: + row = _LazyVectorizedRow(table) + assert (row.__eq__(row)) is True @pytest.mark.parametrize( - ("table1", "table2"), + ("table", "other"), [ - (Table({"col1": []}), Table({"col1": []})), - (Table({"col1": [1, 2]}), Table({"col1": [1, 2]})), + (Table({}), None), + (Table({}), Column("col1", [])), ], ids=[ - "empty tables", - "filled tables", + "Row vs. None", + "Row vs. Column", ], ) -def test_should_return_false_if_object_is_other_type(table1: Table, table2: Table) -> None: - row1 = _LazyVectorizedRow(table=table1) - assert (row1.__eq__(table2)) == NotImplemented +def test_should_return_not_implemented_if_other_has_different_type(table: Table, other: Any) -> None: + row = _LazyVectorizedRow(table) + assert (row.__eq__(other)) is NotImplemented diff --git a/tests/safeds/data/tabular/containers/_row/test_get_cell.py b/tests/safeds/data/tabular/containers/_row/test_get_cell.py index aec1adf63..189f8d20e 100644 --- a/tests/safeds/data/tabular/containers/_row/test_get_cell.py +++ b/tests/safeds/data/tabular/containers/_row/test_get_cell.py @@ -1,4 +1,4 @@ -import re +from typing import Any import pytest @@ -9,38 +9,51 @@ @pytest.mark.parametrize( - ("table_data", "column_name", "target", "expected"), + ("table", "name", "target", "expected"), [ - ({"A": [1, 2]}, "A", 1, {"A": [2]}), - ({"A": [1, 2, 3], "B": [4, 5, 2]}, "B", 2, {"A": [1, 2], "B": [4, 5]}), + ( + Table({"A": [1, 2]}), + "A", + 1, + [True, False], + ), + ( + Table({"A": [1, 2], "B": [3, 4]}), + "A", + 1, + [True, False], + ), ], ids=[ "one column", "two columns", ], ) -def test_should_get_correct_item(table_data: dict, column_name: str, target: int, expected: dict) -> None: +def test_should_get_correct_item( + table: Table, + name: str, + target: int, + expected: list[Any], +) -> None: assert_row_operation_works( - table_data, - lambda table: table.remove_rows(lambda row: row.get_cell(column_name).eq(target)), + table, + lambda row: row.get_cell(name) == target, expected, ) @pytest.mark.parametrize( - ("table", "column_name"), + ("table", "name"), [ (Table({}), "A"), - (Table({"A": ["a", "aa", "aaa"]}), "B"), - (Table({"A": ["b", "aa", "aaa"], "C": ["b", "aa", "aaa"]}), "B"), + (Table({"A": []}), "B"), ], ids=[ - "empty table", - "table with one column", - "table with two columns", + "empty", + "non-empty", ], ) -def test_should_raise_column_not_found_error(table: Table, column_name: str) -> None: - row = _LazyVectorizedRow(table=table) - with pytest.raises(ColumnNotFoundError, match=re.escape(f"Could not find column(s):\n - '{column_name}'")): - row.get_cell(column_name) +def test_should_raise_if_column_does_not_exist(table: Table, name: str) -> None: + row = _LazyVectorizedRow(table) + with pytest.raises(ColumnNotFoundError): + row.get_cell(name) diff --git a/tests/safeds/data/tabular/containers/_row/test_get_column_type.py b/tests/safeds/data/tabular/containers/_row/test_get_column_type.py deleted file mode 100644 index 2cf29bfab..000000000 --- a/tests/safeds/data/tabular/containers/_row/test_get_column_type.py +++ /dev/null @@ -1,21 +0,0 @@ -import pytest - -from safeds.data.tabular.containers import Table -from safeds.data.tabular.containers._lazy_vectorized_row import _LazyVectorizedRow -from safeds.data.tabular.typing import ColumnType - - -@pytest.mark.parametrize( - ("table", "column_name", "expected"), - [ - (Table({"col1": ["A"]}), "col1", ColumnType.string()), - (Table({"col1": ["a"], "col2": [1]}), "col2", ColumnType.int64()), - ], - ids=[ - "one column", - "two columns", - ], -) -def test_should_return_the_type_of_the_column(table: Table, column_name: str, expected: ColumnType) -> None: - row = _LazyVectorizedRow(table=table) - assert row.get_column_type(column_name) == expected diff --git a/tests/safeds/data/tabular/containers/_row/test_getitem.py b/tests/safeds/data/tabular/containers/_row/test_getitem.py index 2ca90d53c..0f5ee9fb1 100644 --- a/tests/safeds/data/tabular/containers/_row/test_getitem.py +++ b/tests/safeds/data/tabular/containers/_row/test_getitem.py @@ -1,4 +1,4 @@ -import re +from typing import Any import pytest @@ -9,38 +9,51 @@ @pytest.mark.parametrize( - ("table_data", "column_name", "target", "expected"), + ("table", "name", "target", "expected"), [ - ({"A": [1, 2]}, "A", 1, {"A": [2]}), - ({"A": [1, 2, 3], "B": [4, 5, 2]}, "B", 2, {"A": [1, 2], "B": [4, 5]}), + ( + Table({"A": [1, 2]}), + "A", + 1, + [True, False], + ), + ( + Table({"A": [1, 2], "B": [3, 4]}), + "A", + 1, + [True, False], + ), ], ids=[ - "table one column", - "table two columns", + "one column", + "two columns", ], ) -def test_should_get_correct_item(table_data: dict, column_name: str, target: int, expected: dict) -> None: +def test_should_get_correct_item( + table: Table, + name: str, + target: int, + expected: list[Any], +) -> None: assert_row_operation_works( - table_data, - lambda table: table.remove_rows(lambda row: row[column_name].eq(target)), + table, + lambda row: row[name] == target, expected, ) @pytest.mark.parametrize( - ("table", "column_name"), + ("table", "name"), [ (Table({}), "A"), - (Table({"A": ["a", "aa", "aaa"]}), "B"), - (Table({"A": ["b", "aa", "aaa"], "C": ["b", "aa", "aaa"]}), "B"), + (Table({"A": []}), "B"), ], ids=[ - "empty table", - "table with one column", - "table with two columns", + "empty", + "non-empty", ], ) -def test_should_raise_column_not_found_error(table: Table, column_name: str) -> None: - row = _LazyVectorizedRow(table=table) - with pytest.raises(ColumnNotFoundError, match=re.escape(f"Could not find column(s):\n - '{column_name}'")): - row[column_name] +def test_should_raise_if_column_does_not_exist(table: Table, name: str) -> None: + row = _LazyVectorizedRow(table) + with pytest.raises(ColumnNotFoundError): + _ignored = row[name] diff --git a/tests/safeds/data/tabular/containers/_row/test_has_column.py b/tests/safeds/data/tabular/containers/_row/test_has_column.py deleted file mode 100644 index 2fad69b81..000000000 --- a/tests/safeds/data/tabular/containers/_row/test_has_column.py +++ /dev/null @@ -1,22 +0,0 @@ -import pytest - -from safeds.data.tabular.containers import Table -from safeds.data.tabular.containers._lazy_vectorized_row import _LazyVectorizedRow - - -@pytest.mark.parametrize( - ("table", "column_name", "expected"), - [ - (Table({}), "A", False), - (Table({"A": ["a", "aa", "aaa"]}), "A", True), - (Table({"A": ["a", "aa", "aaa"]}), "B", False), - ], - ids=[ - "empty table", - "table with existing column_name", - "table with non existing column_name", - ], -) -def test_should_have_column_name(table: Table, column_name: str, expected: bool) -> None: - row = _LazyVectorizedRow(table=table) - assert row.has_column(column_name) == expected diff --git a/tests/safeds/data/tabular/containers/_row/test_hash.py b/tests/safeds/data/tabular/containers/_row/test_hash.py index 7dacd8439..bbc5ac3e0 100644 --- a/tests/safeds/data/tabular/containers/_row/test_hash.py +++ b/tests/safeds/data/tabular/containers/_row/test_hash.py @@ -1,25 +1,90 @@ +from collections.abc import Callable + import pytest +from syrupy import SnapshotAssertion from safeds.data.tabular.containers import Table from safeds.data.tabular.containers._lazy_vectorized_row import _LazyVectorizedRow @pytest.mark.parametrize( - ("table1", "table2", "expected"), + "table_factory", + [ + lambda: Table({}), + lambda: Table({"col1": []}), + lambda: Table({"col1": [1, 2]}), + ], + ids=[ + "empty", + "no rows", + "with data", + ], +) +class TestContract: + def test_should_return_same_hash_for_equal_objects(self, table_factory: Callable[[], Table]) -> None: + row_1 = _LazyVectorizedRow(table_factory()) + row_2 = _LazyVectorizedRow(table_factory()) + assert hash(row_1) == hash(row_2) + + def test_should_return_same_hash_in_different_processes( + self, + table_factory: Callable[[], Table], + snapshot: SnapshotAssertion, + ) -> None: + row = _LazyVectorizedRow(table_factory()) + assert hash(row) == snapshot + + +@pytest.mark.parametrize( + ("table_1", "table_2"), [ - (Table({}), Table({"A": ["a", "aa", "aaa"]}), False), - (Table({}), Table({}), True), - (Table({"A": ["a", "aa", "aaa"]}), Table({"A": ["a", "aa", "aaa"]}), True), - (Table({"A": ["a", "aa", "aaa"]}), Table({"B": ["a", "aa", "aaa"]}), False), + # too few columns + ( + Table({"col1": [1]}), + Table({}), + ), + # too many columns + ( + Table({}), + Table({"col1": [1]}), + ), + # different column order + ( + Table({"col1": [1], "col2": [2]}), + Table({"col2": [2], "col1": [1]}), + ), + # different column names + ( + Table({"col1": [1]}), + Table({"col2": [1]}), + ), + # different types + ( + Table({"col1": [1]}), + Table({"col1": ["1"]}), + ), + # too few rows + ( + Table({"col1": [1, 2]}), + Table({"col1": [1]}), # Needs at least one value, so the types match + ), + # too many rows + ( + Table({"col1": [1]}), # Needs at least one value, so the types match + Table({"col1": [1, 2]}), + ), ], ids=[ - "empty and different table", - "same empty tables", - "same tables", - "different tables", + "too few columns", + "too many columns", + "different column order", + "different column names", + "different types", + "too few rows", + "too many rows", ], ) -def test_should_return_consistent_hashes(table1: Table, table2: Table, expected: bool) -> None: - row1 = _LazyVectorizedRow(table=table1) - row2 = _LazyVectorizedRow(table=table2) - assert (hash(row1) == hash(row2)) == expected +def test_should_be_good_hash(table_1: Table, table_2: Table) -> None: + row_1 = _LazyVectorizedRow(table_1) + row_2 = _LazyVectorizedRow(table_2) + assert hash(row_1) != hash(row_2) diff --git a/tests/safeds/data/tabular/containers/_row/test_iter.py b/tests/safeds/data/tabular/containers/_row/test_iter.py deleted file mode 100644 index ea74f3703..000000000 --- a/tests/safeds/data/tabular/containers/_row/test_iter.py +++ /dev/null @@ -1,23 +0,0 @@ -import pytest - -from safeds.data.tabular.containers import Table -from safeds.data.tabular.containers._lazy_vectorized_row import _LazyVectorizedRow - - -@pytest.mark.parametrize( - ("table", "expected"), - [ - (Table({}), []), - (Table({"A": ["a", "aa", "aaa"]}), ["A"]), - (Table({"A": ["a", "aa", "aaa"], "B": ["b", "bb", "bbb"], "C": ["c", "cc", "ccc"]}), ["A", "B", "C"]), - ], - ids=[ - "empty", - "one column", - "three columns", - ], -) -def test_should_return_same_list_of_column_name_with_iter(table: Table, expected: list) -> None: - row = _LazyVectorizedRow(table=table) - iterable = iter(row) - assert list(iterable) == expected diff --git a/tests/safeds/data/tabular/containers/_row/test_len.py b/tests/safeds/data/tabular/containers/_row/test_len.py deleted file mode 100644 index 577018476..000000000 --- a/tests/safeds/data/tabular/containers/_row/test_len.py +++ /dev/null @@ -1,22 +0,0 @@ -import pytest - -from safeds.data.tabular.containers import Table -from safeds.data.tabular.containers._lazy_vectorized_row import _LazyVectorizedRow - - -@pytest.mark.parametrize( - ("table", "expected"), - [ - (Table({}), 0), - (Table({"A": ["a", "aa", "aaa"]}), 1), - (Table({"A": ["a", "aa", "aaa"], "B": ["b", "bb", "bbb"]}), 2), - ], - ids=[ - "empty", - "one column", - "two columns", - ], -) -def test_should_have_same_length_as_number_of_columns(table: Table, expected: int) -> None: - row = _LazyVectorizedRow(table=table) - assert len(row) == expected diff --git a/tests/safeds/data/tabular/containers/_row/test_schema.py b/tests/safeds/data/tabular/containers/_row/test_schema.py deleted file mode 100644 index 8e9412bce..000000000 --- a/tests/safeds/data/tabular/containers/_row/test_schema.py +++ /dev/null @@ -1,22 +0,0 @@ -import pytest - -from safeds.data.tabular.containers import Table -from safeds.data.tabular.containers._lazy_vectorized_row import _LazyVectorizedRow - - -@pytest.mark.parametrize( - ("table"), - [ - (Table({})), - (Table({"A": ["a", "aa", "aaa"]})), - (Table({"A": ["a", "aa", "aaa"], "B": ["b", "bb", "bbb"]})), - ], - ids=[ - "empty", - "one column", - "two columns", - ], -) -def test_should_return_same_schema(table: Table) -> None: - row = _LazyVectorizedRow(table=table) - assert table.schema == row.schema diff --git a/tests/safeds/data/tabular/containers/_row/test_sizeof.py b/tests/safeds/data/tabular/containers/_row/test_sizeof.py index 808e0882b..111ed5bc7 100644 --- a/tests/safeds/data/tabular/containers/_row/test_sizeof.py +++ b/tests/safeds/data/tabular/containers/_row/test_sizeof.py @@ -1,10 +1,24 @@ import sys -import polars as pl +import pytest +from safeds.data.tabular.containers import Table from safeds.data.tabular.containers._lazy_vectorized_row import _LazyVectorizedRow -def test_should_return_size_greater_than_normal_object() -> None: - cell = _LazyVectorizedRow(pl.col("a")) - assert sys.getsizeof(cell) > sys.getsizeof(object()) +@pytest.mark.parametrize( + "table", + [ + Table({}), + Table({"col1": []}), + Table({"col1": [0, 1], "col2": ["a", "b"]}), + ], + ids=[ + "empty", + "no rows", + "with data", + ], +) +def test_should_size_be_greater_than_normal_object(table: Table) -> None: + row = _LazyVectorizedRow(table) + assert sys.getsizeof(row) > sys.getsizeof(object()) diff --git a/tests/safeds/data/tabular/containers/_table/test_eq.py b/tests/safeds/data/tabular/containers/_table/test_eq.py index da61b8564..7ef114ece 100644 --- a/tests/safeds/data/tabular/containers/_table/test_eq.py +++ b/tests/safeds/data/tabular/containers/_table/test_eq.py @@ -99,7 +99,7 @@ "not equal (different values)", ], ) -def test_should_return_whether_tables_are_equal(table_1: Table, table_2: Table, expected: bool) -> None: +def test_should_return_whether_objects_are_equal(table_1: Table, table_2: Table, expected: bool) -> None: assert (table_1.__eq__(table_2)) == expected @@ -116,7 +116,7 @@ def test_should_return_whether_tables_are_equal(table_1: Table, table_2: Table, "non-empty", ], ) -def test_should_return_true_if_tables_are_identical(table: Table) -> None: +def test_should_return_true_if_objects_are_identical(table: Table) -> None: assert (table.__eq__(table)) is True @@ -131,5 +131,5 @@ def test_should_return_true_if_tables_are_identical(table: Table) -> None: "Table vs. Column", ], ) -def test_should_return_not_implemented_if_other_is_not_table(table: Table, other: Any) -> None: +def test_should_return_not_implemented_if_other_has_different_type(table: Table, other: Any) -> None: assert (table.__eq__(other)) is NotImplemented diff --git a/tests/safeds/data/tabular/typing/_column_type/test_eq.py b/tests/safeds/data/tabular/typing/_column_type/test_eq.py index 2edeba7a2..9a7250247 100644 --- a/tests/safeds/data/tabular/typing/_column_type/test_eq.py +++ b/tests/safeds/data/tabular/typing/_column_type/test_eq.py @@ -23,7 +23,7 @@ "not equal (numeric vs. non-numeric)", ], ) -def test_should_return_whether_column_types_are_equal(type_1: Table, type_2: Table, expected: bool) -> None: +def test_should_return_whether_objects_are_equal(type_1: Table, type_2: Table, expected: bool) -> None: assert (type_1.__eq__(type_2)) == expected @@ -70,7 +70,7 @@ def test_should_return_whether_column_types_are_equal(type_1: Table, type_2: Tab "null", ], ) -def test_should_return_true_if_column_types_are_identical(type_: ColumnType) -> None: +def test_should_return_true_if_objects_are_identical(type_: ColumnType) -> None: assert (type_.__eq__(type_)) is True @@ -85,5 +85,5 @@ def test_should_return_true_if_column_types_are_identical(type_: ColumnType) -> "ColumnType vs. Column", ], ) -def test_should_return_not_implemented_if_other_is_not_column_type(type_: ColumnType, other: Any) -> None: +def test_should_return_not_implemented_if_other_has_different_type(type_: ColumnType, other: Any) -> None: assert (type_.__eq__(other)) is NotImplemented diff --git a/tests/safeds/data/tabular/typing/_schema/test_eq.py b/tests/safeds/data/tabular/typing/_schema/test_eq.py index 668d029b4..a36565b10 100644 --- a/tests/safeds/data/tabular/typing/_schema/test_eq.py +++ b/tests/safeds/data/tabular/typing/_schema/test_eq.py @@ -71,7 +71,7 @@ "not equal (different types)", ], ) -def test_should_return_whether_schemas_are_equal(schema_1: Schema, schema_2: Schema, expected: bool) -> None: +def test_should_return_whether_objects_are_equal(schema_1: Schema, schema_2: Schema, expected: bool) -> None: assert (schema_1.__eq__(schema_2)) == expected @@ -88,7 +88,7 @@ def test_should_return_whether_schemas_are_equal(schema_1: Schema, schema_2: Sch "two columns", ], ) -def test_should_return_true_if_schemas_are_identical(schema: Schema) -> None: +def test_should_return_true_if_objects_are_identical(schema: Schema) -> None: assert (schema.__eq__(schema)) is True @@ -103,5 +103,5 @@ def test_should_return_true_if_schemas_are_identical(schema: Schema) -> None: "Schema vs. Column", ], ) -def test_should_return_not_implemented_if_other_is_not_schema(schema: Schema, other: Any) -> None: +def test_should_return_not_implemented_if_other_has_different_type(schema: Schema, other: Any) -> None: assert (schema.__eq__(other)) is NotImplemented