From 8af94c9de8cce7a34173ee15508ab489073a11ef Mon Sep 17 00:00:00 2001
From: Eva Lott <evalott100@gmail.com>
Date: Fri, 20 Sep 2024 09:25:58 +0100
Subject: [PATCH] Make table subclass enums be sequence enum rather than numpy
 string (#579)

---------

Co-authored-by: Eva Lott <twj43146@localhost.localdomain>
Co-authored-by: Tom Cobb <tom.cobb@diamond.ac.uk>
---
 src/ophyd_async/core/_table.py         | 119 +++++++++++--
 src/ophyd_async/fastcs/panda/_table.py |  40 +----
 tests/fastcs/panda/test_table.py       | 237 ++++++++++++++++++++-----
 tests/fastcs/panda/test_trigger.py     |  29 ++-
 4 files changed, 320 insertions(+), 105 deletions(-)

diff --git a/src/ophyd_async/core/_table.py b/src/ophyd_async/core/_table.py
index ff54a0102..f36b60dce 100644
--- a/src/ophyd_async/core/_table.py
+++ b/src/ophyd_async/core/_table.py
@@ -1,4 +1,5 @@
-from typing import TypeVar
+from enum import Enum
+from typing import TypeVar, get_args, get_origin
 
 import numpy as np
 from pydantic import BaseModel, ConfigDict, model_validator
@@ -6,6 +7,13 @@
 TableSubclass = TypeVar("TableSubclass", bound="Table")
 
 
+def _concat(value1, value2):
+    if isinstance(value1, np.ndarray):
+        return np.concatenate((value1, value2))
+    else:
+        return value1 + value2
+
+
 class Table(BaseModel):
     """An abstraction of a Table of str to numpy array."""
 
@@ -13,34 +21,105 @@ class Table(BaseModel):
 
     @staticmethod
     def row(cls: type[TableSubclass], **kwargs) -> TableSubclass:  # type: ignore
-        arrayified_kwargs = {
-            field_name: np.concatenate(
-                (
-                    (default_arr := field_value.default_factory()),  # type: ignore
-                    np.array([kwargs[field_name]], dtype=default_arr.dtype),
+        arrayified_kwargs = {}
+        for field_name, field_value in cls.model_fields.items():
+            value = kwargs.pop(field_name)
+            if field_value.default_factory is None:
+                raise ValueError(
+                    "`Table` models should have default factories for their "
+                    "mutable empty columns."
+                )
+            default_array = field_value.default_factory()
+            if isinstance(default_array, np.ndarray):
+                arrayified_kwargs[field_name] = np.array(
+                    [value], dtype=default_array.dtype
+                )
+            elif issubclass(type(value), Enum) and isinstance(value, str):
+                arrayified_kwargs[field_name] = [value]
+            else:
+                raise TypeError(
+                    "Row column should be numpy arrays or sequence of string `Enum`."
                 )
+        if kwargs:
+            raise TypeError(
+                f"Unexpected keyword arguments {kwargs.keys()} for {cls.__name__}."
             )
-            for field_name, field_value in cls.model_fields.items()
-        }
         return cls(**arrayified_kwargs)
 
     def __add__(self, right: TableSubclass) -> TableSubclass:
         """Concatenate the arrays in field values."""
 
-        assert type(right) is type(self), (
-            f"{right} is not a `Table`, or is not the same "
-            f"type of `Table` as {self}."
-        )
+        if type(right) is not type(self):
+            raise RuntimeError(
+                f"{right} is not a `Table`, or is not the same "
+                f"type of `Table` as {self}."
+            )
 
         return type(right)(
             **{
-                field_name: np.concatenate(
-                    (getattr(self, field_name), getattr(right, field_name))
+                field_name: _concat(
+                    getattr(self, field_name), getattr(right, field_name)
                 )
                 for field_name in self.model_fields
             }
         )
 
+    def numpy_dtype(self) -> np.dtype:
+        dtype = []
+        for field_name, field_value in self.model_fields.items():
+            if np.ndarray in (
+                get_origin(field_value.annotation),
+                field_value.annotation,
+            ):
+                dtype.append((field_name, getattr(self, field_name).dtype))
+            else:
+                enum_type = get_args(field_value.annotation)[0]
+                assert issubclass(enum_type, Enum)
+                enum_values = [element.value for element in enum_type]
+                max_length_in_enum = max(len(value) for value in enum_values)
+                dtype.append((field_name, np.dtype(f"<U{max_length_in_enum}")))
+
+        return np.dtype(dtype)
+
+    def numpy_table(self):
+        # It would be nice to be able to use np.transpose for this,
+        # but it defaults to the largest dtype for everything.
+        dtype = self.numpy_dtype()
+        transposed_list = [
+            np.array(tuple(row), dtype=dtype)
+            for row in zip(*self.numpy_columns(), strict=False)
+        ]
+        transposed = np.array(transposed_list, dtype=dtype)
+        return transposed
+
+    def numpy_columns(self) -> list[np.ndarray]:
+        """Columns in the table can be lists of string enums or numpy arrays.
+
+        This method returns the columns, converting the string enums to numpy arrays.
+        """
+
+        columns = []
+        for field_name, field_value in self.model_fields.items():
+            if np.ndarray in (
+                get_origin(field_value.annotation),
+                field_value.annotation,
+            ):
+                columns.append(getattr(self, field_name))
+            else:
+                enum_type = get_args(field_value.annotation)[0]
+                assert issubclass(enum_type, Enum)
+                enum_values = [element.value for element in enum_type]
+                max_length_in_enum = max(len(value) for value in enum_values)
+                dtype = np.dtype(f"<U{max_length_in_enum}")
+
+                columns.append(
+                    np.array(
+                        [enum.value for enum in getattr(self, field_name)], dtype=dtype
+                    )
+                )
+
+        return columns
+
     @model_validator(mode="after")
     def validate_arrays(self) -> "Table":
         first_length = len(next(iter(self))[1])
@@ -49,11 +128,15 @@ def validate_arrays(self) -> "Table":
         ), "Rows should all be of equal size."
 
         if not all(
-            np.issubdtype(
-                self.model_fields[field_name].default_factory().dtype,  # type: ignore
-                field_value.dtype,
+            # Checks if the values are numpy subtypes if the array is a numpy array,
+            # or if the value is a string enum.
+            np.issubdtype(getattr(self, field_name).dtype, default_array.dtype)
+            if isinstance(
+                default_array := self.model_fields[field_name].default_factory(),  # type: ignore
+                np.ndarray,
             )
-            for field_name, field_value in self
+            else issubclass(get_args(field_value.annotation)[0], Enum)
+            for field_name, field_value in self.model_fields.items()
         ):
             raise ValueError(
                 f"Cannot construct a `{type(self).__name__}`, "
diff --git a/src/ophyd_async/fastcs/panda/_table.py b/src/ophyd_async/fastcs/panda/_table.py
index 91fce9a29..a021d23fa 100644
--- a/src/ophyd_async/fastcs/panda/_table.py
+++ b/src/ophyd_async/fastcs/panda/_table.py
@@ -4,7 +4,7 @@
 
 import numpy as np
 import numpy.typing as npt
-from pydantic import Field, field_validator, model_validator
+from pydantic import Field, model_validator
 from pydantic_numpy.helper.annotation import NpArrayPydanticAnnotation
 from typing_extensions import TypedDict
 
@@ -51,13 +51,7 @@ class SeqTrigger(str, Enum):
     ),
     Field(default_factory=lambda: np.array([], dtype=np.bool_)),
 ]
-TriggerStr = Annotated[
-    np.ndarray[tuple[int], np.dtype[np.unicode_]],
-    NpArrayPydanticAnnotation.factory(
-        data_type=np.unicode_, dimensions=1, strict_data_typing=False
-    ),
-    Field(default_factory=lambda: np.array([], dtype=np.dtype("<U32"))),
-]
+TriggerStr = Annotated[Sequence[SeqTrigger], Field(default_factory=list)]
 
 
 class SeqTable(Table):
@@ -101,35 +95,7 @@ def row(  # type: ignore
         oute2: bool = False,
         outf2: bool = False,
     ) -> "SeqTable":
-        if isinstance(trigger, SeqTrigger):
-            trigger = trigger.value
-        return super().row(**locals())
-
-    @field_validator("trigger", mode="before")
-    @classmethod
-    def trigger_to_np_array(cls, trigger_column):
-        """
-        The user can provide a list of SeqTrigger enum elements instead of a numpy str.
-        """
-        if isinstance(trigger_column, Sequence) and all(
-            isinstance(trigger, SeqTrigger) for trigger in trigger_column
-        ):
-            trigger_column = np.array(
-                [trigger.value for trigger in trigger_column], dtype=np.dtype("<U32")
-            )
-        elif isinstance(trigger_column, Sequence) or isinstance(
-            trigger_column, np.ndarray
-        ):
-            for trigger in trigger_column:
-                SeqTrigger(
-                    trigger
-                )  # To check all the given strings are actually `SeqTrigger`s
-        else:
-            raise ValueError(
-                "Expected a numpy array or a sequence of `SeqTrigger`, got "
-                f"{type(trigger_column)}."
-            )
-        return trigger_column
+        return Table.row(**locals())
 
     @model_validator(mode="after")
     def validate_max_length(self) -> "SeqTable":
diff --git a/tests/fastcs/panda/test_table.py b/tests/fastcs/panda/test_table.py
index b34103250..ed963c91f 100644
--- a/tests/fastcs/panda/test_table.py
+++ b/tests/fastcs/panda/test_table.py
@@ -12,15 +12,18 @@ def test_seq_table_converts_lists():
     seq_table_dict_with_lists = {field_name: [] for field_name, _ in SeqTable()}
     # Validation passes
     seq_table = SeqTable(**seq_table_dict_with_lists)
-    assert isinstance(seq_table.trigger, np.ndarray)
-    assert seq_table.trigger.dtype == np.dtype("U32")
+    for field_name, field_value in seq_table:
+        if field_name == "trigger":
+            assert field_value == []
+        else:
+            assert np.array_equal(field_value, np.array([], dtype=field_value.dtype))
 
 
 def test_seq_table_validation_errors():
     with pytest.raises(ValidationError, match="81 validation errors for SeqTable"):
         SeqTable(
             repeats=0,
-            trigger="Immediate",
+            trigger=SeqTrigger.IMMEDIATE,
             position=0,
             time1=0,
             outa1=False,
@@ -40,7 +43,7 @@ def test_seq_table_validation_errors():
 
     large_seq_table = SeqTable(
         repeats=np.zeros(4095, dtype=np.int32),
-        trigger=np.array(["Immediate"] * 4095, dtype="U32"),
+        trigger=["Immediate"] * 4095,
         position=np.zeros(4095, dtype=np.int32),
         time1=np.zeros(4095, dtype=np.int32),
         outa1=np.zeros(4095, dtype=np.bool_),
@@ -73,16 +76,25 @@ def test_seq_table_validation_errors():
         wrong_types = {
             field_name: field_value.astype(np.unicode_)
             for field_name, field_value in row_one
+            if isinstance(field_value, np.ndarray)
         }
         SeqTable(**wrong_types)
+    with pytest.raises(
+        TypeError,
+        match="Row column should be numpy arrays or sequence of string `Enum`",
+    ):
+        SeqTable.row(trigger="A")
 
 
 def test_seq_table_pva_conversion():
     pva_dict = {
         "repeats": np.array([1, 2, 3, 4], dtype=np.int32),
-        "trigger": np.array(
-            ["Immediate", "Immediate", "BITC=0", "Immediate"], dtype=np.dtype("U32")
-        ),
+        "trigger": [
+            SeqTrigger.IMMEDIATE,
+            SeqTrigger.IMMEDIATE,
+            SeqTrigger.BITC_0,
+            SeqTrigger.IMMEDIATE,
+        ],
         "position": np.array([1, 2, 3, 4], dtype=np.int32),
         "time1": np.array([1, 0, 1, 0], dtype=np.int32),
         "outa1": np.array([1, 0, 1, 0], dtype=np.bool_),
@@ -102,7 +114,7 @@ def test_seq_table_pva_conversion():
     row_wise_dicts = [
         {
             "repeats": 1,
-            "trigger": "Immediate",
+            "trigger": SeqTrigger.IMMEDIATE,
             "position": 1,
             "time1": 1,
             "outa1": 1,
@@ -121,7 +133,7 @@ def test_seq_table_pva_conversion():
         },
         {
             "repeats": 2,
-            "trigger": "Immediate",
+            "trigger": SeqTrigger.IMMEDIATE,
             "position": 2,
             "time1": 0,
             "outa1": 0,
@@ -140,7 +152,7 @@ def test_seq_table_pva_conversion():
         },
         {
             "repeats": 3,
-            "trigger": "BITC=0",
+            "trigger": SeqTrigger.BITC_0,
             "position": 3,
             "time1": 1,
             "outa1": 1,
@@ -159,7 +171,7 @@ def test_seq_table_pva_conversion():
         },
         {
             "repeats": 4,
-            "trigger": "Immediate",
+            "trigger": SeqTrigger.IMMEDIATE,
             "position": 4,
             "time1": 0,
             "outa1": 0,
@@ -178,12 +190,20 @@ def test_seq_table_pva_conversion():
         },
     ]
 
+    def _assert_col_equal(column1, column2):
+        if isinstance(column1, np.ndarray):
+            assert np.array_equal(column1, column2)
+            assert column1.dtype == column2.dtype
+        else:
+            assert column1 == column2
+            assert all(isinstance(x, SeqTrigger) for x in column1)
+            assert all(isinstance(x, SeqTrigger) for x in column2)
+
     seq_table_from_pva_dict = SeqTable(**pva_dict)
     for (_, column1), column2 in zip(
         seq_table_from_pva_dict, pva_dict.values(), strict=False
     ):
-        assert np.array_equal(column1, column2)
-        assert column1.dtype == column2.dtype
+        _assert_col_equal(column1, column2)
 
     seq_table_from_rows = reduce(
         lambda x, y: x + y,
@@ -192,41 +212,174 @@ def test_seq_table_pva_conversion():
     for (_, column1), column2 in zip(
         seq_table_from_rows, pva_dict.values(), strict=False
     ):
-        assert np.array_equal(column1, column2)
-        assert column1.dtype == column2.dtype
+        _assert_col_equal(column1, column2)
 
     # Idempotency
     applied_twice_to_pva_dict = SeqTable(**pva_dict).model_dump(mode="python")
     for column1, column2 in zip(
         applied_twice_to_pva_dict.values(), pva_dict.values(), strict=False
     ):
-        assert np.array_equal(column1, column2)
-        assert column1.dtype == column2.dtype
+        _assert_col_equal(column1, column2)
+
+    assert np.array_equal(
+        seq_table_from_pva_dict.numpy_columns(),
+        [
+            np.array([1, 2, 3, 4], dtype=np.int32),
+            np.array(
+                [
+                    "Immediate",
+                    "Immediate",
+                    "BITC=0",
+                    "Immediate",
+                ],
+                dtype="<U14",
+            ),
+            np.array([1, 2, 3, 4], dtype=np.int32),
+            np.array([1, 0, 1, 0], dtype=np.int32),
+            np.array([True, False, True, False], dtype=np.bool_),
+            np.array([True, False, True, False], dtype=np.bool_),
+            np.array([True, False, True, False], dtype=np.bool_),
+            np.array([True, False, True, False], dtype=np.bool_),
+            np.array([True, False, True, False], dtype=np.bool_),
+            np.array([True, False, True, False], dtype=np.bool_),
+            np.array([1, 2, 3, 4], dtype=np.int32),
+            np.array([True, False, True, False], dtype=np.bool_),
+            np.array([True, False, True, False], dtype=np.bool_),
+            np.array([True, False, True, False], dtype=np.bool_),
+            np.array([True, False, True, False], dtype=np.bool_),
+            np.array([True, False, True, False], dtype=np.bool_),
+            np.array([True, False, True, False], dtype=np.bool_),
+        ],
+    )
+    dtype = seq_table_from_pva_dict.numpy_dtype()
+    assert dtype == np.dtype(
+        [
+            ("repeats", np.int32),
+            ("trigger", "<U14"),
+            ("position", np.int32),
+            ("time1", np.int32),
+            ("outa1", np.bool_),
+            ("outb1", np.bool_),
+            ("outc1", np.bool_),
+            ("outd1", np.bool_),
+            ("oute1", np.bool_),
+            ("outf1", np.bool_),
+            ("time2", np.int32),
+            ("outa2", np.bool_),
+            ("outb2", np.bool_),
+            ("outc2", np.bool_),
+            ("outd2", np.bool_),
+            ("oute2", np.bool_),
+            ("outf2", np.bool_),
+        ]
+    )
+
+    assert np.array_equal(
+        seq_table_from_pva_dict.numpy_table(),
+        np.array(
+            [
+                (
+                    1,
+                    "Immediate",
+                    1,
+                    1,
+                    True,
+                    True,
+                    True,
+                    True,
+                    True,
+                    True,
+                    1,
+                    True,
+                    True,
+                    True,
+                    True,
+                    True,
+                    True,
+                ),
+                (
+                    2,
+                    "Immediate",
+                    2,
+                    0,
+                    False,
+                    False,
+                    False,
+                    False,
+                    False,
+                    False,
+                    2,
+                    False,
+                    False,
+                    False,
+                    False,
+                    False,
+                    False,
+                ),
+                (
+                    3,
+                    "BITC=0",
+                    3,
+                    1,
+                    True,
+                    True,
+                    True,
+                    True,
+                    True,
+                    True,
+                    3,
+                    True,
+                    True,
+                    True,
+                    True,
+                    True,
+                    True,
+                ),
+                (
+                    4,
+                    "Immediate",
+                    4,
+                    0,
+                    False,
+                    False,
+                    False,
+                    False,
+                    False,
+                    False,
+                    4,
+                    False,
+                    False,
+                    False,
+                    False,
+                    False,
+                    False,
+                ),
+            ],
+            dtype=dtype,
+        ),
+    )
 
 
 def test_seq_table_takes_trigger_enum_row():
-    for trigger in (SeqTrigger.BITA_0, "BITA=0"):
-        table = SeqTable.row(trigger=trigger)
-        assert table.trigger[0] == "BITA=0"
-        assert np.issubdtype(table.trigger.dtype, np.dtype("<U32"))
-        table = SeqTable(
-            repeats=np.array([1], dtype=np.int32),
-            trigger=[trigger],
-            position=np.array([1], dtype=np.int32),
-            time1=np.array([1], dtype=np.int32),
-            outa1=np.array([1], dtype=np.bool_),
-            outb1=np.array([1], dtype=np.bool_),
-            outc1=np.array([1], dtype=np.bool_),
-            outd1=np.array([1], dtype=np.bool_),
-            oute1=np.array([1], dtype=np.bool_),
-            outf1=np.array([1], dtype=np.bool_),
-            time2=np.array([1], dtype=np.int32),
-            outa2=np.array([1], dtype=np.bool_),
-            outb2=np.array([1], dtype=np.bool_),
-            outc2=np.array([1], dtype=np.bool_),
-            outd2=np.array([1], dtype=np.bool_),
-            oute2=np.array([1], dtype=np.bool_),
-            outf2=np.array([1], dtype=np.bool_),
-        )
-        assert table.trigger[0] == "BITA=0"
-        assert np.issubdtype(table.trigger.dtype, np.dtype("<U32"))
+    table = SeqTable.row(trigger=SeqTrigger.BITA_0)
+    assert table.trigger[0] == SeqTrigger.BITA_0
+    table = SeqTable(
+        repeats=np.array([1], dtype=np.int32),
+        trigger=[SeqTrigger.BITA_0],
+        position=np.array([1], dtype=np.int32),
+        time1=np.array([1], dtype=np.int32),
+        outa1=np.array([1], dtype=np.bool_),
+        outb1=np.array([1], dtype=np.bool_),
+        outc1=np.array([1], dtype=np.bool_),
+        outd1=np.array([1], dtype=np.bool_),
+        oute1=np.array([1], dtype=np.bool_),
+        outf1=np.array([1], dtype=np.bool_),
+        time2=np.array([1], dtype=np.int32),
+        outa2=np.array([1], dtype=np.bool_),
+        outb2=np.array([1], dtype=np.bool_),
+        outc2=np.array([1], dtype=np.bool_),
+        outd2=np.array([1], dtype=np.bool_),
+        oute2=np.array([1], dtype=np.bool_),
+        outf2=np.array([1], dtype=np.bool_),
+    )
+    assert table.trigger[0] == SeqTrigger.BITA_0
diff --git a/tests/fastcs/panda/test_trigger.py b/tests/fastcs/panda/test_trigger.py
index 0973baad6..44735e758 100644
--- a/tests/fastcs/panda/test_trigger.py
+++ b/tests/fastcs/panda/test_trigger.py
@@ -14,6 +14,7 @@
     StaticPcompTriggerLogic,
     StaticSeqTableTriggerLogic,
 )
+from ophyd_async.fastcs.panda._table import SeqTrigger
 
 
 @pytest.fixture
@@ -117,7 +118,7 @@ def test_malformed_seq_table_info(kwargs, error_msg):
 
 def test_malformed_trigger_in_seq_table():
     def full_seq_table(trigger):
-        SeqTable(
+        return SeqTable(
             repeats=np.array([1], dtype=np.int32),
             trigger=trigger,
             position=np.array([1], dtype=np.int32),
@@ -137,12 +138,24 @@ def full_seq_table(trigger):
             outf2=np.array([1], dtype=np.bool_),
         )
 
-    with pytest.raises(ValidationError) as exc:
-        full_seq_table(np.array(["A"], dtype="U32"))
-    assert "Value error, 'A' is not a valid SeqTrigger" in str(exc)
+    for attempted_table in [
+        np.array(["A"], dtype="U32"),
+        np.array(["Immediate"], dtype="U32"),
+        {"Immediate"},
+    ]:
+        with pytest.raises(ValidationError) as exc:
+            full_seq_table(attempted_table)
+        assert "Input should be an instance of Sequence" in str(exc)
+
     with pytest.raises(ValidationError) as exc:
         full_seq_table(["A"])
-    assert "Value error, 'A' is not a valid SeqTrigger" in str(exc)
-    with pytest.raises(ValidationError) as exc:
-        full_seq_table({"A"})
-    assert "Expected a numpy array or a sequence of `SeqTrigger`, got" in str(exc)
+    assert (
+        "Input should be 'Immediate', 'BITA=0', 'BITA=1', 'BITB=0', 'BITB=1', "
+        "'BITC...' [type=enum, input_value='A', input_type=str]"
+    ) in str(exc)
+
+    # Pydantic is able to infer type from these
+    table = full_seq_table([SeqTrigger.IMMEDIATE])
+    assert table.trigger == ["Immediate"] == [SeqTrigger.IMMEDIATE]
+    table = full_seq_table(["Immediate"])
+    assert table.trigger == ["Immediate"] == [SeqTrigger.IMMEDIATE]