diff --git a/holonote/annotate/connector.py b/holonote/annotate/connector.py index a0ed041..9144082 100644 --- a/holonote/annotate/connector.py +++ b/holonote/annotate/connector.py @@ -41,7 +41,7 @@ class PrimaryKey(param.Parameterized): connector_class = param.String(default='SQLiteDB') - def __call__(self, connector, key_list=[]): + def __call__(self, connector, key_list=None): """ The key list is the current list of index values that are outstanding (i.e. have not been comitted). @@ -74,7 +74,7 @@ class AutoIncrementKey(PrimaryKey): schema = param.String(default='INTEGER PRIMARY KEY AUTOINCREMENT', constant=True, allow_None=False) - def __call__(self, connector, key_list=[]): + def __call__(self, connector, key_list=None): key_list_max = max(key_list) if key_list else 0 connector_max = connector.max_rowid() connector_max = 0 if connector_max is None else connector_max @@ -101,9 +101,7 @@ class UUIDHexStringKey(PrimaryKey): # Probably the better default length = param.Integer(default=32, bounds=(4,32)) - - - def __call__(self, connector, key_list=[]): + def __call__(self, connector, key_list=None): return uuid.uuid4().hex[:self.length] def cast(self, value): @@ -124,9 +122,11 @@ class UUIDBinaryKey(PrimaryKey): schema = param.String('BINARY PRIMARY KEY', constant=True, allow_None=False) - def __call__(self, connector, key_list=[]): + def __call__(self, connector, key_list=None): return uuid.uuid4().bytes + def cast(self, value): + return bytes(value) class WidgetKey(PrimaryKey): """ diff --git a/holonote/tests/annotations.db b/holonote/tests/annotations.db deleted file mode 100644 index 6fca006..0000000 Binary files a/holonote/tests/annotations.db and /dev/null differ diff --git a/holonote/tests/conftest.py b/holonote/tests/conftest.py new file mode 100644 index 0000000..ee85496 --- /dev/null +++ b/holonote/tests/conftest.py @@ -0,0 +1,97 @@ +from __future__ import annotations + +from typing import Iterator + +import numpy as np +import pytest + +from holonote.annotate import Annotator, SQLiteDB, UUIDHexStringKey + + +@pytest.fixture() +def conn_sqlite_uuid(tmp_path) -> Iterator[SQLiteDB]: + conn = SQLiteDB(filename=str(tmp_path / "test.db"), primary_key=UUIDHexStringKey()) + yield conn + try: + conn.cursor.close() + except Exception: + pass + try: + conn.con.close() + except Exception: + pass + + +@pytest.fixture() +def annotator_range1d(conn_sqlite_uuid) -> Annotator: + anno = Annotator( + {"TIME": np.datetime64}, + fields=["description"], + region_types=["Range"], + connector=conn_sqlite_uuid, + ) + return anno + + +@pytest.fixture() +def annotator_point1d(conn_sqlite_uuid) -> Annotator: + anno = Annotator( + {"TIME": np.datetime64}, + fields=["description"], + region_types=["Point"], + connector=conn_sqlite_uuid, + ) + return anno + + +@pytest.fixture() +def annotator_range2d(conn_sqlite_uuid) -> Annotator: + anno = Annotator( + {"x": float, "y": float}, + fields=["description"], + region_types=["Range"], + connector=conn_sqlite_uuid, + ) + return anno + + +@pytest.fixture() +def annotator_point2d(conn_sqlite_uuid) -> Annotator: + anno = Annotator( + {"x": float, "y": float}, + fields=["description"], + region_types=["Point"], + connector=conn_sqlite_uuid, + ) + return anno + + +@pytest.fixture() +def multiple_region_annotator(annotator_range1d) -> Annotator: + annotator_range1d.region_types = ["Point", "Range"] + return annotator_range1d + + +@pytest.fixture() +def multiple_annotators( + conn_sqlite_uuid, annotator_range1d, annotator_range2d +) -> dict[str, Annotator | SQLiteDB]: + annotator_range1d.connector = conn_sqlite_uuid + annotator_range2d.connector = conn_sqlite_uuid + output = { + "annotation1d": annotator_range1d, + "annotation2d": annotator_range2d, + "conn": conn_sqlite_uuid, + } + return output + + +@pytest.fixture() +def multiple_fields_annotator(conn_sqlite_uuid) -> Annotator: + conn_sqlite_uuid.fields = ["field1", "field2"] + anno = Annotator( + {"TIME": np.datetime64}, + fields=["field1", "field2"], + connector=conn_sqlite_uuid, + ) + return anno diff --git a/holonote/tests/test_annotation_table.py b/holonote/tests/test_annotation_table.py index a06546e..a5ac429 100644 --- a/holonote/tests/test_annotation_table.py +++ b/holonote/tests/test_annotation_table.py @@ -1,36 +1,24 @@ -import unittest -import numpy as np import pandas as pd from holonote.annotate import AnnotationTable +def test_table_region_df(): + table = AnnotationTable() + table.load(primary_key_name='id', fields=['test_description']) + assert len(table._region_df) == 0, 'Should be initialized empty' + assert tuple(table._region_df.columns) == AnnotationTable.columns -class TestBasicTableLoad(unittest.TestCase): + start = pd.Timestamp('2022-06-17 18:32:48.623476') + end = pd.Timestamp('2022-06-19 04:44:09.306402') + regions = [ {'region_type': 'Range', + 'value': (start, end), + 'dim1': 'TIME', + 'dim2': None} ] + table.add_annotation(regions, id=100, test_description='A test') - def setUp(self): - self.table = AnnotationTable() - # Load some metadata and region data + expected = pd.DataFrame([{'region_type':'Range', 'dim1':'TIME', 'dim2': None, + 'value':(start, end), '_id':100}]).astype({'_id':object}) - def test_table_region_df(self): - self.table.load(primary_key_name='id', fields=['test_description']) - assert len(self.table._region_df) == 0, 'Should be initialized empty' - assert tuple(self.table._region_df.columns) == AnnotationTable.columns - - start = pd.Timestamp('2022-06-17 18:32:48.623476') - end = pd.Timestamp('2022-06-19 04:44:09.306402') - regions = [ {'region_type': 'Range', - 'value': (start, end), - 'dim1': 'TIME', - 'dim2': None} ] - self.table.add_annotation(regions, id=100, test_description='A test') - - - expected = pd.DataFrame([{'region_type':'Range', 'dim1':'TIME', 'dim2': None, - 'value':(start, end), '_id':100}]).astype({'_id':object}) - - pd.testing.assert_frame_equal(self.table._region_df, expected) - - -# Test other metadata field + pd.testing.assert_frame_equal(table._region_df, expected) diff --git a/holonote/tests/test_annotators.py b/holonote/tests/test_annotators.py deleted file mode 100644 index de66725..0000000 --- a/holonote/tests/test_annotators.py +++ /dev/null @@ -1,652 +0,0 @@ -from holonote.annotate import AnnotationTable -# TODO: - -# * (after refactor) annotators -> annotator, connectors -> connector [ ] - -# TESTS - -# Schema error (needs file or connect in memory??) -# .snapshot() and .revert_to_snapshot() - -import uuid -import unittest -import numpy as np -import pandas as pd - -import holoviews as hv -from holonote.annotate import Annotator -from holonote.annotate import SQLiteDB, UUIDHexStringKey - -class TestBasicRange1DAnnotator(unittest.TestCase): - - def setUp(self): - assert Annotator.connector_class is SQLiteDB, 'Expecting default SQLite connector' - Annotator.connector_class.filename = ':memory:' - Annotator.connector_class.primary_key = UUIDHexStringKey() - self.annotator = Annotator({'TIME': np.datetime64}, fields=['description'], region_types=['Range']) - - def tearDown(self): - self.annotator.connector.cursor.close() - self.annotator.connector.con.close() - del self.annotator - - def test_point_insertion_exception(self): - timestamp = np.datetime64('2022-06-06') - with self.assertRaises(ValueError) as cm: - self.annotator.set_point(timestamp) - - expected_msg = "Point region types not enabled as region_types=['Range']" - self.assertEqual(str(cm.exception), expected_msg) - - def test_insertion_edit_table_columns(self): - self.annotator.set_range(np.datetime64('2022-06-06'), np.datetime64('2022-06-08')) - self.annotator.add_annotation(description='A test annotation!') - commits = self.annotator.annotation_table.commits() - assert len(commits)==1, 'Only one insertion commit made ' - self.annotator.commit() - self.assertEqual(commits[0]['operation'],'insert') - self.assertEqual(set(commits[0]['kwargs'].keys()), - set(self.annotator.connector.columns)) - - def test_range_insertion_values(self): - start, end = np.datetime64('2022-06-06'), np.datetime64('2022-06-08') - self.annotator.set_range(start, end) - self.annotator.add_annotation(description='A test annotation!') - commits = self.annotator.annotation_table.commits() - assert len(commits)==1, 'Only one insertion commit made' - kwargs = commits[0]['kwargs'] - assert 'uuid' in kwargs.keys(), 'Expected uuid primary key in kwargs' - kwargs.pop('uuid') - self.assertEqual(kwargs, dict(description='A test annotation!', - start_TIME=start, end_TIME=end)) - - def test_range_commit_insertion(self): - start, end = np.datetime64('2022-06-06'), np.datetime64('2022-06-08') - description = 'A test annotation!' - self.annotator.set_range(start, end) - self.annotator.add_annotation(description=description) - self.annotator.commit() - - df = pd.DataFrame({'uuid': pd.Series(self.annotator.df.index[0], dtype=object), - 'start_TIME':[start], - 'end_TIME':[end], - 'description':[description]} - ).set_index('uuid') - - sql_df = self.annotator.connector.load_dataframe() - pd.testing.assert_frame_equal(sql_df, df) - - - def test_range_addition_deletion_by_uuid(self): - start1, end1 = np.datetime64('2022-06-06'), np.datetime64('2022-06-08') - start2, end2 = np.datetime64('2023-06-06'), np.datetime64('2023-06-08') - start3, end3 = np.datetime64('2024-06-06'), np.datetime64('2024-06-08') - self.annotator.set_range(start1, end1) - self.annotator.add_annotation(description='Annotation 1') - self.annotator.set_range(start2, end2) - self.annotator.add_annotation(description='Annotation 2', uuid='08286429') - self.annotator.set_range(start3, end3) - self.annotator.add_annotation(description='Annotation 3') - self.annotator.commit() - sql_df = self.annotator.connector.load_dataframe() - self.assertEqual(set(sql_df['description']), set(['Annotation 1', 'Annotation 2', 'Annotation 3'])) - deletion_index = sql_df.index[1] - self.annotator.delete_annotation(deletion_index) - self.annotator.commit() - sql_df = self.annotator.connector.load_dataframe() - self.assertEqual(set(sql_df['description']), set(['Annotation 1', 'Annotation 3'])) - - - def test_range_define_preserved_index_mismatch(self): - starts = [np.datetime64('2022-06-%.2d' % d) for d in range(6,15, 4)] - ends = [np.datetime64('2022-06-%.2d' % (d+2)) for d in range(6,15, 4)] - descriptions = ['Annotation %d' % d for d in [1,2,3]] - annotation_id = [uuid.uuid4().hex[:8] for d in [1,2,3]] - - data = pd.DataFrame({'uuid':annotation_id, 'start':starts, 'end':ends, 'description':descriptions}).set_index('uuid') - self.annotator.define_fields(data[['description']], preserve_index=True) - self.annotator.define_ranges(data['start'].iloc[:2], data['end'].iloc[:2]) - with self.assertRaisesRegex(ValueError, - f"Following annotations have no associated region: {{{repr(annotation_id[2])}}}"): - self.annotator.commit() - - def test_range_define_auto_index_mismatch(self): - starts = [np.datetime64('2022-06-%.2d' % d) for d in range(6,15, 4)] - ends = [np.datetime64('2022-06-%.2d' % (d+2)) for d in range(6,15, 4)] - descriptions = ['Annotation %d' % d for d in [1,2,3]] - annotation_id = [uuid.uuid4().hex[:8] for d in [1,2,3]] - - data = pd.DataFrame({'uuid':annotation_id, 'start':starts, - 'end':ends, 'description':descriptions}).set_index('uuid') - self.annotator.define_fields(data[['description']], preserve_index=False) - self.annotator.define_ranges(data['start'].iloc[:2], data['end'].iloc[:2]) - with self.assertRaisesRegex(ValueError, - "Following annotations have no associated region:"): - self.annotator.commit() - - def test_range_define_unassigned_indices(self): - starts = [np.datetime64('2022-06-%.2d' % d) for d in range(6,15, 4)] - ends = [np.datetime64('2022-06-%.2d' % (d+2)) for d in range(6,15, 4)] - descriptions = ['Annotation %d' % d for d in [1,2,3]] - annotation_id1 = [uuid.uuid4().hex[:8] for d in [1,2,3]] - mismatched = [uuid.uuid4().hex[:8] for d in [1,2]] - annotation_id2 = mismatched + [annotation_id1[2]] - - data1 = pd.DataFrame({'uuid':annotation_id1, 'start':starts, - 'end':ends, 'description':descriptions}).set_index('uuid') - data2 = pd.DataFrame({'uuid':annotation_id2, 'start':starts, - 'end':ends, 'description':descriptions}).set_index('uuid') - - self.annotator.define_fields(data1[['description']]) - with self.assertRaises(KeyError) as cm: - self.annotator.define_ranges(data2['start'], data2['end']) - assert f'{mismatched}' in str(cm.exception) - - -class TestBasicRange2DAnnotator(unittest.TestCase): - - def setUp(self): - assert Annotator.connector_class is SQLiteDB, 'Expecting default SQLite connector' - Annotator.connector_class.filename = ':memory:' - Annotator.connector_class.primary_key = UUIDHexStringKey() - self.annotator = Annotator({'x': float, 'y':float}, - fields=['description'], region_types=['Range']) - - def tearDown(self): - self.annotator.connector.cursor.close() - self.annotator.connector.con.close() - del self.annotator - - def test_point_insertion_exception(self): - x,y = 0.5,0.5 - with self.assertRaises(ValueError) as cm: - self.annotator.set_point(x,y) - - expected_msg = "Point region types not enabled as region_types=['Range']" - self.assertEqual(str(cm.exception), expected_msg) - - def test_insertion_edit_table_columns(self): - self.annotator.set_range(-0.25, 0.25, -0.1, 0.1) - self.annotator.add_annotation(description='A test annotation!') - commits = self.annotator.annotation_table.commits() - assert len(commits)==1, 'Only one insertion commit made ' - self.annotator.commit() - self.assertEqual(commits[0]['operation'],'insert') - self.assertEqual(set(commits[0]['kwargs'].keys()), - set(self.annotator.connector.columns)) - - def test_range_insertion_values(self): - startx, endx, starty, endy = -0.25, 0.25, -0.1, 0.1 - self.annotator.set_range(startx, endx, starty, endy) - self.annotator.add_annotation(description='A test annotation!') - commits = self.annotator.annotation_table.commits() - assert len(commits)==1, 'Only one insertion commit made' - kwargs = commits[0]['kwargs'] - assert 'uuid' in kwargs.keys(), 'Expected uuid primary key in kwargs' - kwargs.pop('uuid') - self.assertEqual(kwargs, dict(description='A test annotation!', - start_x=startx, end_x=endx, start_y=starty, end_y=endy)) - - def test_range_commit_insertion(self): - startx, endx, starty, endy = -0.25, 0.25, -0.1, 0.1 - description = 'A test annotation!' - self.annotator.set_range(startx, endx, starty, endy) - self.annotator.add_annotation(description=description) - self.annotator.commit() - - df = pd.DataFrame({'uuid': pd.Series(self.annotator.df.index[0], dtype=object), - 'start_x':[startx], - 'start_y':[starty], - 'end_x':[endx], - 'end_y':[endy], - 'description':[description]} - ).set_index('uuid') - - sql_df = self.annotator.connector.load_dataframe() - pd.testing.assert_frame_equal(sql_df, df) - - - def test_range_addition_deletion_by_uuid(self): - startx1, endx1, starty1, endy1 = -0.251, 0.251, -0.11, 0.11 - startx2, endx2, starty2, endy2 = -0.252, 0.252, -0.12, 0.12 - startx3, endx3, starty3, endy3 = -0.253, 0.253, -0.13, 0.13 - self.annotator.set_range(startx1, endx1, starty1, endy1) - self.annotator.add_annotation(description='Annotation 1') - self.annotator.set_range(startx2, endx2, starty2, endy2) - self.annotator.add_annotation(description='Annotation 2', uuid='08286429') - self.annotator.set_range(startx3, endx3, starty3, endy3) - self.annotator.add_annotation(description='Annotation 3') - self.annotator.commit() - sql_df = self.annotator.connector.load_dataframe() - self.assertEqual(set(sql_df['description']), set(['Annotation 1', 'Annotation 2', 'Annotation 3'])) - deletion_index = sql_df.index[1] - self.annotator.delete_annotation(deletion_index) - self.annotator.commit() - sql_df = self.annotator.connector.load_dataframe() - self.assertEqual(set(sql_df['description']), set(['Annotation 1', 'Annotation 3'])) - - - def test_range_define_preserved_index_mismatch(self): - xstarts, xends = [-0.3, -0.2, -0.1], [0.3, 0.2, 0.1] - ystarts, yends = [-0.35, -0.25, -0.15], [0.35, 0.25, 0.15] - descriptions = ['Annotation %d' % d for d in [1,2,3]] - annotation_id = [uuid.uuid4().hex[:8] for d in [1,2,3]] - - data = pd.DataFrame({'uuid':annotation_id, 'xstart':xstarts, 'xend':xends, - 'ystart':ystarts, 'yend':yends, - 'description':descriptions}).set_index('uuid') - self.annotator.define_fields(data[['description']], preserve_index=True) - self.annotator.define_ranges(data['xstart'].iloc[:2], data['xend'].iloc[:2], - data['ystart'].iloc[:2], data['yend'].iloc[:2]) - with self.assertRaisesRegex(ValueError, - f"Following annotations have no associated region: {{{repr(annotation_id[2])}}}"): - self.annotator.commit() - - def test_range_define_auto_index_mismatch(self): - xstarts, xends = [-0.3, -0.2, -0.1], [0.3, 0.2, 0.1] - ystarts, yends = [-0.35, -0.25, -0.15], [0.35, 0.25, 0.15] - descriptions = ['Annotation %d' % d for d in [1,2,3]] - annotation_id = [uuid.uuid4().hex[:8] for d in [1,2,3]] - data = pd.DataFrame({'uuid':annotation_id, 'xstart':xstarts, 'xend':xends, - 'ystart':ystarts, 'yend':yends, - 'description':descriptions}).set_index('uuid') - self.annotator.define_fields(data[['description']], preserve_index=False) - self.annotator.define_ranges(data['xstart'].iloc[:2], data['xend'].iloc[:2], - data['ystart'].iloc[:2], data['yend'].iloc[:2]) - with self.assertRaisesRegex(ValueError, - "Following annotations have no associated region:"): - self.annotator.commit() - - def test_range_define_unassigned_indices(self): - xstarts, xends = [-0.3, -0.2, -0.1], [0.3, 0.2, 0.1] - ystarts, yends = [-0.35, -0.25, -0.15], [0.35, 0.25, 0.15] - descriptions = ['Annotation %d' % d for d in [1,2,3]] - annotation_id1 = [uuid.uuid4().hex[:8] for d in [1,2,3]] - mismatched = [uuid.uuid4().hex[:8] for d in [1,2]] - annotation_id2 = mismatched + [annotation_id1[2]] - - data1 = pd.DataFrame({'uuid':annotation_id1, 'xstart':xstarts, 'xend':xends, - 'ystart':ystarts, 'yend':yends, - 'description':descriptions}).set_index('uuid') - data2 = pd.DataFrame({'uuid':annotation_id2, 'xstart':xstarts, 'xend':xends, - 'ystart':ystarts, 'yend':yends, - 'description':descriptions}).set_index('uuid') - - self.annotator.define_fields(data1[['description']]) - with self.assertRaises(KeyError) as cm: - self.annotator.define_ranges(data2['xstart'], data2['xend'], - data2['ystart'], data2['yend']) - assert f'{mismatched}' in str(cm.exception) - - - -class TestBasicPoint1DAnnotator(unittest.TestCase): - - def setUp(self): - assert Annotator.connector_class is SQLiteDB, 'Expecting default SQLite connector' - Annotator.connector_class.filename = ':memory:' - Annotator.connector_class.primary_key = UUIDHexStringKey() - self.annotator = Annotator({'TIME': np.datetime64}, fields=['description'], region_types=['Point']) - - def tearDown(self): - self.annotator.connector.cursor.close() - self.annotator.connector.con.close() - del self.annotator - - def test_insertion_edit_table_columns(self): - self.annotator.set_point(np.datetime64('2022-06-06')) - self.annotator.add_annotation(description='A test annotation!') - commits = self.annotator.annotation_table.commits() - assert len(commits)==1, 'Only one insertion commit made ' - self.annotator.commit() - self.assertEqual(commits[0]['operation'],'insert') - self.assertEqual(set(commits[0]['kwargs'].keys()), - set(self.annotator.connector.columns)) - - def test_range_insertion_exception(self): - start, end = np.datetime64('2022-06-06'), np.datetime64('2022-06-08') - with self.assertRaises(ValueError) as cm: - self.annotator.set_range(start, end) - expected_msg = "Range region types not enabled as region_types=['Point']" - self.assertEqual(str(cm.exception), expected_msg) - - def test_point_insertion_values(self): - timestamp = np.datetime64('2022-06-06') - self.annotator.set_point(timestamp) - self.annotator.add_annotation(description='A test annotation!') - commits = self.annotator.annotation_table.commits() - assert len(commits)==1, 'Only one insertion commit made' - kwargs = commits[0]['kwargs'] - assert 'uuid' in kwargs.keys(), 'Expected uuid primary key in kwargs' - kwargs.pop('uuid') - self.assertEqual(kwargs, dict(description='A test annotation!', point_TIME=timestamp)) - - def test_point_commit_insertion(self): - timestamp = np.datetime64('2022-06-06') - description = 'A test annotation!' - self.annotator.set_point(timestamp) - self.annotator.add_annotation(description=description) - self.annotator.commit() - - df = pd.DataFrame({'uuid': pd.Series(self.annotator.df.index[0], dtype=object), - 'point_TIME':[timestamp], - 'description':[description]} - ).set_index('uuid') - - sql_df = self.annotator.connector.load_dataframe() - pd.testing.assert_frame_equal(sql_df, df) - - - def test_point_addition_deletion_by_uuid(self): - ts1 = np.datetime64('2022-06-06') - ts2 = np.datetime64('2023-06-06') - ts3 = np.datetime64('2024-06-06') - self.annotator.set_point(ts1) - self.annotator.add_annotation(description='Annotation 1') - self.annotator.set_point(ts2) - self.annotator.add_annotation(description='Annotation 2', uuid='08286429') - self.annotator.set_point(ts3) - self.annotator.add_annotation(description='Annotation 3') - self.annotator.commit() - sql_df = self.annotator.connector.load_dataframe() - self.assertEqual(set(sql_df['description']), set(['Annotation 1', 'Annotation 2', 'Annotation 3'])) - deletion_index = sql_df.index[1] - self.annotator.delete_annotation(deletion_index) - self.annotator.commit() - sql_df = self.annotator.connector.load_dataframe() - self.assertEqual(set(sql_df['description']), set(['Annotation 1', 'Annotation 3'])) - - def test_point_define_preserved_index_mismatch(self): - timestamps = [np.datetime64('2022-06-%.2d' % d) for d in range(6,15, 4)] - descriptions = ['Annotation %d' % d for d in [1,2,3]] - annotation_id = [uuid.uuid4().hex[:8] for d in [1,2,3]] - - data = pd.DataFrame({'uuid':annotation_id, 'timestamps':timestamps, - 'description':descriptions}).set_index('uuid') - self.annotator.define_fields(data[['description']], preserve_index=True) - self.annotator.define_points(data['timestamps'].iloc[:2]) - with self.assertRaisesRegex(ValueError, - f"Following annotations have no associated region: {{{repr(annotation_id[2])}}}"): - self.annotator.commit() - - def test_point_define_auto_index_mismatch(self): - timestamps = [np.datetime64('2022-06-%.2d' % d) for d in range(6,15, 4)] - descriptions = ['Annotation %d' % d for d in [1,2,3]] - annotation_id = [uuid.uuid4().hex[:8] for d in [1,2,3]] - - data = pd.DataFrame({'uuid':annotation_id, 'timestamps':timestamps, - 'description':descriptions}).set_index('uuid') - self.annotator.define_fields(data[['description']], preserve_index=False) - self.annotator.define_points(data['timestamps'].iloc[:2]) - with self.assertRaisesRegex(ValueError, - "Following annotations have no associated region:"): - self.annotator.commit() - - def test_point_define_unassigned_indices(self): - timestamps = [np.datetime64('2022-06-%.2d' % d) for d in range(6,15, 4)] - descriptions = ['Annotation %d' % d for d in [1,2,3]] - annotation_id1 = [uuid.uuid4().hex[:8] for d in [1,2,3]] - mismatched = [uuid.uuid4().hex[:8] for d in [1,2]] - annotation_id2 = mismatched + [annotation_id1[2]] - - data1 = pd.DataFrame({'uuid':annotation_id1, 'timestamps':timestamps, - 'description':descriptions}).set_index('uuid') - data2 = pd.DataFrame({'uuid':annotation_id2, 'timestamps':timestamps, - 'description':descriptions}).set_index('uuid') - - self.annotator.define_fields(data1[['description']]) - with self.assertRaises(KeyError) as cm: - self.annotator.define_points(data2['timestamps']) - assert f'{mismatched}' in str(cm.exception) - - - -class TestBasicPoint2DAnnotator(unittest.TestCase): - - def setUp(self): - assert Annotator.connector_class is SQLiteDB, 'Expecting default SQLite connector' - Annotator.connector_class.filename = ':memory:' - Annotator.connector_class.primary_key = UUIDHexStringKey() - self.annotator = Annotator({'x': float, 'y':float}, fields=['description'], region_types=['Point']) - - def tearDown(self): - self.annotator.connector.cursor.close() - self.annotator.connector.con.close() - del self.annotator - - def test_insertion_edit_table_columns(self): - self.annotator.set_point(-0.25, 0.1) - self.annotator.add_annotation(description='A test annotation!') - commits = self.annotator.annotation_table.commits() - assert len(commits)==1, 'Only one insertion commit made ' - self.annotator.commit() - self.assertEqual(commits[0]['operation'],'insert') - self.assertEqual(set(commits[0]['kwargs'].keys()), - set(self.annotator.connector.columns)) - - def test_range_insertion_exception(self): - x1,x2,y1,y2 = -0.25,0.25, -0.3, 0.3 - with self.assertRaises(ValueError) as cm: - self.annotator.set_range(x1,x2,y1,y2) - expected_msg = "Range region types not enabled as region_types=['Point']" - self.assertEqual(str(cm.exception), expected_msg) - - def test_point_insertion_values(self): - x,y = 0.5, 0.3 - self.annotator.set_point(x,y) - self.annotator.add_annotation(description='A test annotation!') - commits = self.annotator.annotation_table.commits() - assert len(commits)==1, 'Only one insertion commit made' - kwargs = commits[0]['kwargs'] - assert 'uuid' in kwargs.keys(), 'Expected uuid primary key in kwargs' - kwargs.pop('uuid') - self.assertEqual(kwargs, dict(description='A test annotation!', point_x=x, point_y=y)) - - def test_point_commit_insertion(self): - x, y = 0.5, 0.3 - description = 'A test annotation!' - self.annotator.set_point(x,y) - self.annotator.add_annotation(description=description) - self.annotator.commit() - - df = pd.DataFrame({'uuid': pd.Series(self.annotator.df.index[0], dtype=object), - 'point_x':[x], - 'point_y':[y], - 'description':[description]} - ).set_index('uuid') - - sql_df = self.annotator.connector.load_dataframe() - pd.testing.assert_frame_equal(sql_df, df) - - - def test_point_addition_deletion_by_uuid(self): - x1, y1 = 0.2,0.2 - x2, y2 = 0.3,0.3 - x3, y3 = 0.4,0.4 - self.annotator.set_point(x1, y1) - self.annotator.add_annotation(description='Annotation 1') - self.annotator.set_point(x2, y2) - self.annotator.add_annotation(description='Annotation 2', uuid='08286429') - self.annotator.set_point(x3, y3) - self.annotator.add_annotation(description='Annotation 3') - self.annotator.commit() - sql_df = self.annotator.connector.load_dataframe() - self.assertEqual(set(sql_df['description']), set(['Annotation 1', 'Annotation 2', 'Annotation 3'])) - deletion_index = sql_df.index[1] - self.annotator.delete_annotation(deletion_index) - self.annotator.commit() - sql_df = self.annotator.connector.load_dataframe() - self.assertEqual(set(sql_df['description']), set(['Annotation 1', 'Annotation 3'])) - - def test_point_define_preserved_index_mismatch(self): - xs, ys = [-0.1,-0.2,-0.3], [0.1,0.2,0.3] - descriptions = ['Annotation %d' % d for d in [1,2,3]] - annotation_id = [uuid.uuid4().hex[:8] for d in [1,2,3]] - - data = pd.DataFrame({'uuid':annotation_id, 'xs':xs, 'ys':ys, - 'description':descriptions}).set_index('uuid') - self.annotator.define_fields(data[['description']], preserve_index=True) - self.annotator.define_points(data['xs'].iloc[:2], data['ys'].iloc[:2]) - with self.assertRaisesRegex(ValueError, - f"Following annotations have no associated region: {{{repr(annotation_id[2])}}}"): - self.annotator.commit() - - def test_point_define_auto_index_mismatch(self): - xs, ys = [-0.1,-0.2,-0.3], [0.1,0.2,0.3] - descriptions = ['Annotation %d' % d for d in [1,2,3]] - annotation_id = [uuid.uuid4().hex[:8] for d in [1,2,3]] - - data = pd.DataFrame({'uuid':annotation_id, 'xs':xs, 'ys':ys, - 'description':descriptions}).set_index('uuid') - self.annotator.define_fields(data[['description']], preserve_index=False) - self.annotator.define_points(data['xs'].iloc[:2], data['ys'].iloc[:2]) - with self.assertRaisesRegex(ValueError, - "Following annotations have no associated region:"): - self.annotator.commit() - - def test_point_define_unassigned_indices(self): - xs, ys = [-0.1,-0.2,-0.3], [0.1,0.2,0.3] - descriptions = ['Annotation %d' % d for d in [1,2,3]] - annotation_id1 = [uuid.uuid4().hex[:8] for d in [1,2,3]] - mismatched = [uuid.uuid4().hex[:8] for d in [1,2]] - annotation_id2 = mismatched + [annotation_id1[2]] - - data1 = pd.DataFrame({'uuid':annotation_id1, 'xs':xs, 'ys':ys, - 'description':descriptions}).set_index('uuid') - data2 = pd.DataFrame({'uuid':annotation_id2, 'xs':xs, 'ys':ys, - 'description':descriptions}).set_index('uuid') - - self.annotator.define_fields(data1[['description']]) - with self.assertRaises(KeyError) as cm: - self.annotator.define_points(data2['xs'], data2['ys']) - assert f'{mismatched}' in str(cm.exception) - - - -class TestMultipleRegion1DAnnotator(unittest.TestCase): - - def setUp(self): - assert Annotator.connector_class is SQLiteDB, 'Expecting default SQLite connector' - Annotator.connector_class.filename = ':memory:' - Annotator.connector_class.primary_key = UUIDHexStringKey() - self.annotator = Annotator({'TIME': np.datetime64}, fields=['description'], - region_types=['Point', 'Range']) - - def tearDown(self): - self.annotator.connector.cursor.close() - self.annotator.connector.con.close() - del self.annotator - - def test_point_range_commit_insertion(self): - descriptions = ['A point insertion', 'A range insertion'] - timestamp = np.datetime64('2022-06-06') - self.annotator.set_point(timestamp) - self.annotator.add_annotation(description=descriptions[0]) - - start, end = np.datetime64('2022-06-06'), np.datetime64('2022-06-08') - self.annotator.set_range(start, end) - self.annotator.add_annotation(description=descriptions[1]) - - self.annotator.commit() - - # FIXME! Index order is inverted? - df = pd.DataFrame({'uuid': pd.Series(self.annotator.df.index[::-1], dtype=object), - 'point_TIME':[timestamp, pd.NaT], - 'start_TIME':[pd.NaT, start], - 'end_TIME': [pd.NaT, end], - 'description':descriptions} - ).set_index('uuid') - - sql_df = self.annotator.connector.load_dataframe() - pd.testing.assert_frame_equal(sql_df, df) - - -class TestMultiplePlotAnnotator(unittest.TestCase): - - def setUp(self): - assert Annotator.connector_class is SQLiteDB, 'Expecting default SQLite connector' - Annotator.connector_class.filename = ':memory:' - Annotator.connector_class.primary_key = UUIDHexStringKey() - - self.connector = SQLiteDB() - xvals, yvals = np.linspace(-4, 0, 202), np.linspace(4, 0, 202) - xs, ys = np.meshgrid(xvals, yvals) - image = hv.Image(np.sin(ys*xs), kdims=['A', 'B']) - self.image_annotator = Annotator(image, connector=self.connector, - fields=['description'], region_types=['Range']) - - curve = hv.Curve((np.arange('2005-02', '2005-03', dtype='datetime64[D]'), range(28)), kdims=['TIME']) - self.curve_annotator = Annotator(curve, connector=self.connector, - fields=['description'], region_types=['Range']) - - def test_element_kdim_dtypes(self): - self.assertEqual(self.image_annotator.kdim_dtypes, {'A':np.float64 , 'B':np.float64}) - self.assertEqual(self.curve_annotator.kdim_dtypes, {'TIME': np.datetime64}) - - def test_multiplot_add_annotation(self): - self.image_annotator.set_range(-0.25, 0.25, -0.1, 0.1) - self.curve_annotator.set_range(np.datetime64('2005-02-13'), np.datetime64('2005-02-16')) - self.connector.add_annotation(description='Multi-plot annotation') - - - - def tearDown(self): - self.connector.cursor.close() - self.connector.con.close() - del self.image_annotator - - -class TestAnnotatorMultipleStringFields(unittest.TestCase): - - def setUp(self): - assert Annotator.connector_class is SQLiteDB, 'Expecting default SQLite connector' - Annotator.connector_class.filename = ':memory:' - Annotator.connector_class.primary_key = UUIDHexStringKey() - self.annotator = Annotator({'TIME': np.datetime64}, fields=['field1', 'field2']) - - - def test_insertion_values(self): - start, end = np.datetime64('2022-06-06'), np.datetime64('2022-06-08') - self.annotator.set_range(start, end) - self.annotator.add_annotation(field1='A test field', field2='Another test field') - commits = self.annotator.annotation_table.commits() - kwargs = commits[0]['kwargs'] - assert len(commits)==1, 'Only one insertion commit made' - assert 'uuid' in kwargs.keys(), 'Expected uuid primary key in kwargs' - kwargs.pop('uuid') - self.assertEqual(kwargs, dict(field1='A test field', field2='Another test field', - start_TIME=start, end_TIME=end)) - - - def test_commit_insertion(self): - start, end = np.datetime64('2022-06-06'), np.datetime64('2022-06-08') - field1 = 'A test field' - field2 = 'Another test field' - self.annotator.set_range(start, end) - self.annotator.add_annotation(field1=field1, field2=field2) - self.annotator.commit() - - df = pd.DataFrame({'uuid': pd.Series(self.annotator.df.index[0], dtype=object), - 'start_TIME':[start], - 'end_TIME':[end], - 'field1':[field1], - 'field2':[field2]} - ).set_index('uuid') - - sql_df = self.annotator.connector.load_dataframe() - pd.testing.assert_frame_equal(sql_df, df) - - - def test_commit_update(self): - start1, end1 = np.datetime64('2022-06-06'), np.datetime64('2022-06-08') - start2, end2 = np.datetime64('2023-06-06'), np.datetime64('2023-06-08') - start3, end3 = np.datetime64('2024-06-06'), np.datetime64('2024-06-08') - self.annotator.set_range(start1, end1) - self.annotator.add_annotation(field1='Field 1.1', field2='Field 1.2') - self.annotator.set_range(start2, end2) - self.annotator.add_annotation(field1='Field 2.1', field2='Field 2.2') - self.annotator.commit() - self.annotator.update_annotation_fields(self.annotator.df.index[0], field1='NEW Field 1.1') - self.annotator.commit() - sql_df = self.annotator.connector.load_dataframe() - self.assertEqual(set(sql_df['field1']), set(['NEW Field 1.1', 'Field 2.1'])) diff --git a/holonote/tests/test_annotators_advanced.py b/holonote/tests/test_annotators_advanced.py new file mode 100644 index 0000000..eee86e6 --- /dev/null +++ b/holonote/tests/test_annotators_advanced.py @@ -0,0 +1,94 @@ +import holoviews as hv +import numpy as np +import pandas as pd + +from holonote.annotate import Annotator + + +def test_multipoint_range_commit_insertion(multiple_region_annotator): + descriptions = ['A point insertion', 'A range insertion'] + timestamp = np.datetime64('2022-06-06') + multiple_region_annotator.set_point(timestamp) + multiple_region_annotator.add_annotation(description=descriptions[0]) + + start, end = np.datetime64('2022-06-06'), np.datetime64('2022-06-08') + multiple_region_annotator.set_range(start, end) + multiple_region_annotator.add_annotation(description=descriptions[1]) + + multiple_region_annotator.commit() + + # FIXME! Index order is inverted? + df = pd.DataFrame({'uuid': pd.Series(multiple_region_annotator.df.index[::-1], dtype=object), + 'point_TIME':[timestamp, pd.NaT], + 'start_TIME':[pd.NaT, start], + 'end_TIME': [pd.NaT, end], + 'description':descriptions} + ).set_index('uuid') + + sql_df = multiple_region_annotator.connector.load_dataframe() + pd.testing.assert_frame_equal(sql_df, df) + + +def test_infer_kdim_dtype_image(): + xvals, yvals = np.linspace(-4, 0, 202), np.linspace(4, 0, 202) + xs, ys = np.meshgrid(xvals, yvals) + image = hv.Image(np.sin(ys*xs), kdims=['A', 'B']) + assert Annotator._infer_kdim_dtypes(image) == {'A': np.float64, 'B': np.float64} + + +def test_infer_kdim_dtype_curve(): + curve = hv.Curve((np.arange('2005-02', '2005-03', dtype='datetime64[D]'), range(28)), kdims=['TIME']) + assert Annotator._infer_kdim_dtypes(curve) == {'TIME': np.datetime64} + + +def test_multiplot_add_annotation(multiple_annotators): + multiple_annotators["annotation1d"].set_range(np.datetime64('2005-02-13'), np.datetime64('2005-02-16')) + multiple_annotators["annotation2d"].set_range(-0.25, 0.25, -0.1, 0.1) + multiple_annotators["conn"].add_annotation(description='Multi-plot annotation') + + +class TestAnnotatorMultipleStringFields: + + def test_insertion_values(self, multiple_fields_annotator): + start, end = np.datetime64('2022-06-06'), np.datetime64('2022-06-08') + multiple_fields_annotator.set_range(start, end) + multiple_fields_annotator.add_annotation(field1='A test field', field2='Another test field') + commits = multiple_fields_annotator.annotation_table.commits() + kwargs = commits[0]['kwargs'] + assert len(commits)==1, 'Only one insertion commit made' + assert 'uuid' in kwargs.keys(), 'Expected uuid primary key in kwargs' + kwargs.pop('uuid') + assert kwargs == dict(field1='A test field', field2='Another test field', start_TIME=start, end_TIME=end) + + + def test_commit_insertion(self, multiple_fields_annotator): + start, end = np.datetime64('2022-06-06'), np.datetime64('2022-06-08') + field1 = 'A test field' + field2 = 'Another test field' + multiple_fields_annotator.set_range(start, end) + multiple_fields_annotator.add_annotation(field1=field1, field2=field2) + multiple_fields_annotator.commit() + + df = pd.DataFrame({'uuid': pd.Series(multiple_fields_annotator.df.index[0], dtype=object), + 'start_TIME':[start], + 'end_TIME':[end], + 'field1':[field1], + 'field2':[field2]} + ).set_index('uuid') + + sql_df = multiple_fields_annotator.connector.load_dataframe() + pd.testing.assert_frame_equal(sql_df, df) + + + def test_commit_update(self, multiple_fields_annotator): + start1, end1 = np.datetime64('2022-06-06'), np.datetime64('2022-06-08') + start2, end2 = np.datetime64('2023-06-06'), np.datetime64('2023-06-08') + multiple_fields_annotator.set_range(start1, end1) + multiple_fields_annotator.add_annotation(field1='Field 1.1', field2='Field 1.2') + multiple_fields_annotator.set_range(start2, end2) + multiple_fields_annotator.add_annotation(field1='Field 2.1', field2='Field 2.2') + multiple_fields_annotator.commit() + multiple_fields_annotator.update_annotation_fields(multiple_fields_annotator.df.index[0], field1='NEW Field 1.1') + multiple_fields_annotator.commit() + sql_df = multiple_fields_annotator.connector.load_dataframe() + assert set(sql_df['field1']) == {'NEW Field 1.1', 'Field 2.1'} diff --git a/holonote/tests/test_annotators_basic.py b/holonote/tests/test_annotators_basic.py new file mode 100644 index 0000000..2c91c31 --- /dev/null +++ b/holonote/tests/test_annotators_basic.py @@ -0,0 +1,456 @@ +# TODO: + +# * (after refactor) annotators -> annotator, connectors -> connector [ ] + +# TESTS + +# Schema error (needs file or connect in memory??) +# .snapshot() and .revert_to_snapshot() + +import uuid + +import numpy as np +import pandas as pd +import pytest + + +class TestBasicRange1DAnnotator: + def test_point_insertion_exception(self, annotator_range1d): + timestamp = np.datetime64('2022-06-06') + expected_msg = r"Point region types not enabled as region_types=\['Range'\]" + with pytest.raises(ValueError, match=expected_msg): + annotator_range1d.set_point(timestamp) + + def test_insertion_edit_table_columns(self, annotator_range1d): + annotator_range1d.set_range(np.datetime64('2022-06-06'), np.datetime64('2022-06-08')) + annotator_range1d.add_annotation(description='A test annotation!') + commits = annotator_range1d.annotation_table.commits() + assert len(commits)==1, 'Only one insertion commit made ' + annotator_range1d.commit() + assert commits[0]['operation'] == 'insert' + assert set(commits[0]['kwargs'].keys()) == set(annotator_range1d.connector.columns) + + def test_range_insertion_values(self, annotator_range1d) -> None: + start, end = np.datetime64('2022-06-06'), np.datetime64('2022-06-08') + annotator_range1d.set_range(start, end) + annotator_range1d.add_annotation(description='A test annotation!') + commits = annotator_range1d.annotation_table.commits() + assert len(commits)==1, 'Only one insertion commit made' + kwargs = commits[0]['kwargs'] + assert 'uuid' in kwargs.keys(), 'Expected uuid primary key in kwargs' + kwargs.pop('uuid') + assert kwargs, dict(description='A test annotation!', start_TIME=start, end_TIME=end) + + def test_range_commit_insertion(self, annotator_range1d): + start, end = np.datetime64('2022-06-06'), np.datetime64('2022-06-08') + description = 'A test annotation!' + annotator_range1d.set_range(start, end) + annotator_range1d.add_annotation(description=description) + annotator_range1d.commit() + + df = pd.DataFrame({'uuid': pd.Series(annotator_range1d.df.index[0], dtype=object), + 'start_TIME':[start], + 'end_TIME':[end], + 'description':[description]} + ).set_index('uuid') + + sql_df = annotator_range1d.connector.load_dataframe() + pd.testing.assert_frame_equal(sql_df, df) + + + def test_range_addition_deletion_by_uuid(self, annotator_range1d): + start1, end1 = np.datetime64('2022-06-06'), np.datetime64('2022-06-08') + start2, end2 = np.datetime64('2023-06-06'), np.datetime64('2023-06-08') + start3, end3 = np.datetime64('2024-06-06'), np.datetime64('2024-06-08') + annotator_range1d.set_range(start1, end1) + annotator_range1d.add_annotation(description='Annotation 1') + annotator_range1d.set_range(start2, end2) + annotator_range1d.add_annotation(description='Annotation 2', uuid='08286429') + annotator_range1d.set_range(start3, end3) + annotator_range1d.add_annotation(description='Annotation 3') + annotator_range1d.commit() + sql_df = annotator_range1d.connector.load_dataframe() + assert set(sql_df['description']) ==set(['Annotation 1', 'Annotation 2', 'Annotation 3']) + deletion_index = sql_df.index[1] + annotator_range1d.delete_annotation(deletion_index) + annotator_range1d.commit() + sql_df = annotator_range1d.connector.load_dataframe() + assert set(sql_df['description']) == set(['Annotation 1', 'Annotation 3']) + + + def test_range_define_preserved_index_mismatch(self, annotator_range1d): + starts = [np.datetime64('2022-06-%.2d' % d) for d in range(6,15, 4)] + ends = [np.datetime64('2022-06-%.2d' % (d+2)) for d in range(6,15, 4)] + descriptions = ['Annotation %d' % d for d in [1,2,3]] + annotation_id = [uuid.uuid4().hex[:8] for d in [1,2,3]] + + data = pd.DataFrame({'uuid':annotation_id, 'start':starts, 'end':ends, 'description':descriptions}).set_index('uuid') + annotator_range1d.define_fields(data[['description']], preserve_index=True) + annotator_range1d.define_ranges(data['start'].iloc[:2], data['end'].iloc[:2]) + msg = f"Following annotations have no associated region: {{{annotation_id[2]!r}}}" + with pytest.raises(ValueError, match=msg): + annotator_range1d.commit() + + def test_range_define_auto_index_mismatch(self, annotator_range1d): + starts = [np.datetime64('2022-06-%.2d' % d) for d in range(6,15, 4)] + ends = [np.datetime64('2022-06-%.2d' % (d+2)) for d in range(6,15, 4)] + descriptions = ['Annotation %d' % d for d in [1,2,3]] + annotation_id = [uuid.uuid4().hex[:8] for d in [1,2,3]] + + data = pd.DataFrame({'uuid':annotation_id, 'start':starts, + 'end':ends, 'description':descriptions}).set_index('uuid') + annotator_range1d.define_fields(data[['description']], preserve_index=False) + annotator_range1d.define_ranges(data['start'].iloc[:2], data['end'].iloc[:2]) + with pytest.raises(ValueError, + match="Following annotations have no associated region:"): + annotator_range1d.commit() + + def test_range_define_unassigned_indices(self, annotator_range1d): + starts = [np.datetime64('2022-06-%.2d' % d) for d in range(6,15, 4)] + ends = [np.datetime64('2022-06-%.2d' % (d+2)) for d in range(6,15, 4)] + descriptions = ['Annotation %d' % d for d in [1,2,3]] + annotation_id1 = [uuid.uuid4().hex[:8] for d in [1,2,3]] + mismatched = [uuid.uuid4().hex[:8] for d in [1,2]] + annotation_id2 = [*mismatched, annotation_id1[2]] + + data1 = pd.DataFrame({'uuid':annotation_id1, 'start':starts, + 'end':ends, 'description':descriptions}).set_index('uuid') + data2 = pd.DataFrame({'uuid':annotation_id2, 'start':starts, + 'end':ends, 'description':descriptions}).set_index('uuid') + + annotator_range1d.define_fields(data1[['description']]) + with pytest.raises(KeyError, match=str(mismatched)): + annotator_range1d.define_ranges(data2['start'], data2['end']) + + +class TestBasicRange2DAnnotator: + + def test_point_insertion_exception(self, annotator_range2d): + x,y = 0.5,0.5 + expected_msg = r"Point region types not enabled as region_types=\['Range'\]" + with pytest.raises(ValueError, match=expected_msg): + annotator_range2d.set_point(x,y) + + def test_insertion_edit_table_columns(self, annotator_range2d): + annotator_range2d.set_range(-0.25, 0.25, -0.1, 0.1) + annotator_range2d.add_annotation(description='A test annotation!') + commits = annotator_range2d.annotation_table.commits() + assert len(commits)==1, 'Only one insertion commit made ' + annotator_range2d.commit() + assert commits[0]['operation'] == 'insert' + assert set(commits[0]['kwargs'].keys()) == set(annotator_range2d.connector.columns) + + def test_range_insertion_values(self, annotator_range2d): + startx, endx, starty, endy = -0.25, 0.25, -0.1, 0.1 + annotator_range2d.set_range(startx, endx, starty, endy) + annotator_range2d.add_annotation(description='A test annotation!') + commits = annotator_range2d.annotation_table.commits() + assert len(commits)==1, 'Only one insertion commit made' + kwargs = commits[0]['kwargs'] + assert 'uuid' in kwargs.keys(), 'Expected uuid primary key in kwargs' + kwargs.pop('uuid') + assert kwargs == dict(description='A test annotation!', + start_x=startx, end_x=endx, start_y=starty, end_y=endy) + + def test_range_commit_insertion(self, annotator_range2d): + startx, endx, starty, endy = -0.25, 0.25, -0.1, 0.1 + description = 'A test annotation!' + annotator_range2d.set_range(startx, endx, starty, endy) + annotator_range2d.add_annotation(description=description) + annotator_range2d.commit() + + df = pd.DataFrame({'uuid': pd.Series(annotator_range2d.df.index[0], dtype=object), + 'start_x':[startx], + 'start_y':[starty], + 'end_x':[endx], + 'end_y':[endy], + 'description':[description]} + ).set_index('uuid') + + sql_df = annotator_range2d.connector.load_dataframe() + pd.testing.assert_frame_equal(sql_df, df) + + + def test_range_addition_deletion_by_uuid(self, annotator_range2d): + startx1, endx1, starty1, endy1 = -0.251, 0.251, -0.11, 0.11 + startx2, endx2, starty2, endy2 = -0.252, 0.252, -0.12, 0.12 + startx3, endx3, starty3, endy3 = -0.253, 0.253, -0.13, 0.13 + annotator_range2d.set_range(startx1, endx1, starty1, endy1) + annotator_range2d.add_annotation(description='Annotation 1') + annotator_range2d.set_range(startx2, endx2, starty2, endy2) + annotator_range2d.add_annotation(description='Annotation 2', uuid='08286429') + annotator_range2d.set_range(startx3, endx3, starty3, endy3) + annotator_range2d.add_annotation(description='Annotation 3') + annotator_range2d.commit() + sql_df = annotator_range2d.connector.load_dataframe() + assert set(sql_df['description']) == set(['Annotation 1', 'Annotation 2', 'Annotation 3']) + deletion_index = sql_df.index[1] + annotator_range2d.delete_annotation(deletion_index) + annotator_range2d.commit() + sql_df = annotator_range2d.connector.load_dataframe() + assert set(sql_df['description']) == set(['Annotation 1', 'Annotation 3']) + + + def test_range_define_preserved_index_mismatch(self, annotator_range2d): + xstarts, xends = [-0.3, -0.2, -0.1], [0.3, 0.2, 0.1] + ystarts, yends = [-0.35, -0.25, -0.15], [0.35, 0.25, 0.15] + descriptions = ['Annotation %d' % d for d in [1,2,3]] + annotation_id = [uuid.uuid4().hex[:8] for d in [1,2,3]] + + data = pd.DataFrame({'uuid':annotation_id, 'xstart':xstarts, 'xend':xends, + 'ystart':ystarts, 'yend':yends, + 'description':descriptions}).set_index('uuid') + annotator_range2d.define_fields(data[['description']], preserve_index=True) + annotator_range2d.define_ranges(data['xstart'].iloc[:2], data['xend'].iloc[:2], + data['ystart'].iloc[:2], data['yend'].iloc[:2]) + + msg = f"Following annotations have no associated region: {{{annotation_id[2]!r}}}" + with pytest.raises(ValueError, match=msg): + annotator_range2d.commit() + + def test_range_define_auto_index_mismatch(self, annotator_range2d): + xstarts, xends = [-0.3, -0.2, -0.1], [0.3, 0.2, 0.1] + ystarts, yends = [-0.35, -0.25, -0.15], [0.35, 0.25, 0.15] + descriptions = ['Annotation %d' % d for d in [1,2,3]] + annotation_id = [uuid.uuid4().hex[:8] for d in [1,2,3]] + data = pd.DataFrame({'uuid':annotation_id, 'xstart':xstarts, 'xend':xends, + 'ystart':ystarts, 'yend':yends, + 'description':descriptions}).set_index('uuid') + annotator_range2d.define_fields(data[['description']], preserve_index=False) + annotator_range2d.define_ranges(data['xstart'].iloc[:2], data['xend'].iloc[:2], + data['ystart'].iloc[:2], data['yend'].iloc[:2]) + msg = "Following annotations have no associated region:" + with pytest.raises(ValueError, match=msg): + annotator_range2d.commit() + + def test_range_define_unassigned_indices(self, annotator_range2d): + xstarts, xends = [-0.3, -0.2, -0.1], [0.3, 0.2, 0.1] + ystarts, yends = [-0.35, -0.25, -0.15], [0.35, 0.25, 0.15] + descriptions = ['Annotation %d' % d for d in [1,2,3]] + annotation_id1 = [uuid.uuid4().hex[:8] for d in [1,2,3]] + mismatched = [uuid.uuid4().hex[:8] for d in [1,2]] + annotation_id2 = [*mismatched, annotation_id1[2]] + + data1 = pd.DataFrame({'uuid':annotation_id1, 'xstart':xstarts, 'xend':xends, + 'ystart':ystarts, 'yend':yends, + 'description':descriptions}).set_index('uuid') + data2 = pd.DataFrame({'uuid':annotation_id2, 'xstart':xstarts, 'xend':xends, + 'ystart':ystarts, 'yend':yends, + 'description':descriptions}).set_index('uuid') + + annotator_range2d.define_fields(data1[['description']]) + with pytest.raises(KeyError, match=str(mismatched)): + annotator_range2d.define_ranges(data2['xstart'], data2['xend'], + data2['ystart'], data2['yend']) + + +class TestBasicPoint1DAnnotator: + + def test_insertion_edit_table_columns(self, annotator_point1d): + annotator_point1d.set_point(np.datetime64('2022-06-06')) + annotator_point1d.add_annotation(description='A test annotation!') + commits = annotator_point1d.annotation_table.commits() + assert len(commits)==1, 'Only one insertion commit made ' + annotator_point1d.commit() + assert commits[0]['operation'] == 'insert' + assert set(commits[0]['kwargs'].keys()) == set(annotator_point1d.connector.columns) + + def test_range_insertion_exception(self, annotator_point1d): + start, end = np.datetime64('2022-06-06'), np.datetime64('2022-06-08') + msg = r"Range region types not enabled as region_types=\['Point'\]" + with pytest.raises(ValueError, match=msg): + annotator_point1d.set_range(start, end) + + def test_point_insertion_values(self, annotator_point1d): + timestamp = np.datetime64('2022-06-06') + annotator_point1d.set_point(timestamp) + annotator_point1d.add_annotation(description='A test annotation!') + commits = annotator_point1d.annotation_table.commits() + assert len(commits)==1, 'Only one insertion commit made' + kwargs = commits[0]['kwargs'] + assert 'uuid' in kwargs.keys(), 'Expected uuid primary key in kwargs' + kwargs.pop('uuid') + assert kwargs == dict(description='A test annotation!', point_TIME=timestamp) + + def test_point_commit_insertion(self, annotator_point1d): + timestamp = np.datetime64('2022-06-06') + description = 'A test annotation!' + annotator_point1d.set_point(timestamp) + annotator_point1d.add_annotation(description=description) + annotator_point1d.commit() + + df = pd.DataFrame({'uuid': pd.Series(annotator_point1d.df.index[0], dtype=object), + 'point_TIME':[timestamp], + 'description':[description]} + ).set_index('uuid') + + sql_df = annotator_point1d.connector.load_dataframe() + pd.testing.assert_frame_equal(sql_df, df) + + + def test_point_addition_deletion_by_uuid(self, annotator_point1d): + ts1 = np.datetime64('2022-06-06') + ts2 = np.datetime64('2023-06-06') + ts3 = np.datetime64('2024-06-06') + annotator_point1d.set_point(ts1) + annotator_point1d.add_annotation(description='Annotation 1') + annotator_point1d.set_point(ts2) + annotator_point1d.add_annotation(description='Annotation 2', uuid='08286429') + annotator_point1d.set_point(ts3) + annotator_point1d.add_annotation(description='Annotation 3') + annotator_point1d.commit() + sql_df = annotator_point1d.connector.load_dataframe() + assert set(sql_df['description']) == set(['Annotation 1', 'Annotation 2', 'Annotation 3']) + deletion_index = sql_df.index[1] + annotator_point1d.delete_annotation(deletion_index) + annotator_point1d.commit() + sql_df = annotator_point1d.connector.load_dataframe() + assert set(sql_df['description']) == set(['Annotation 1', 'Annotation 3']) + + def test_point_define_preserved_index_mismatch(self, annotator_point1d): + timestamps = [np.datetime64('2022-06-%.2d' % d) for d in range(6,15, 4)] + descriptions = ['Annotation %d' % d for d in [1,2,3]] + annotation_id = [uuid.uuid4().hex[:8] for d in [1,2,3]] + + data = pd.DataFrame({'uuid':annotation_id, 'timestamps':timestamps, + 'description':descriptions}).set_index('uuid') + annotator_point1d.define_fields(data[['description']], preserve_index=True) + annotator_point1d.define_points(data['timestamps'].iloc[:2]) + msg = f"Following annotations have no associated region: {{{annotation_id[2]!r}}}" + with pytest.raises(ValueError, match=msg): + annotator_point1d.commit() + + def test_point_define_auto_index_mismatch(self, annotator_point1d): + timestamps = [np.datetime64('2022-06-%.2d' % d) for d in range(6,15, 4)] + descriptions = ['Annotation %d' % d for d in [1,2,3]] + annotation_id = [uuid.uuid4().hex[:8] for d in [1,2,3]] + + data = pd.DataFrame({'uuid':annotation_id, 'timestamps':timestamps, + 'description':descriptions}).set_index('uuid') + annotator_point1d.define_fields(data[['description']], preserve_index=False) + annotator_point1d.define_points(data['timestamps'].iloc[:2]) + with pytest.raises(ValueError, match="Following annotations have no associated region:"): + annotator_point1d.commit() + + def test_point_define_unassigned_indices(self, annotator_point1d): + timestamps = [np.datetime64('2022-06-%.2d' % d) for d in range(6,15, 4)] + descriptions = ['Annotation %d' % d for d in [1,2,3]] + annotation_id1 = [uuid.uuid4().hex[:8] for d in [1,2,3]] + mismatched = [uuid.uuid4().hex[:8] for d in [1,2]] + annotation_id2 = [*mismatched, annotation_id1[2]] + + data1 = pd.DataFrame({'uuid':annotation_id1, 'timestamps':timestamps, + 'description':descriptions}).set_index('uuid') + data2 = pd.DataFrame({'uuid':annotation_id2, 'timestamps':timestamps, + 'description':descriptions}).set_index('uuid') + + annotator_point1d.define_fields(data1[['description']]) + with pytest.raises(KeyError, match=str(mismatched)): + annotator_point1d.define_points(data2['timestamps']) + + +class TestBasicPoint2DAnnotator: + + def test_insertion_edit_table_columns(self, annotator_point2d): + annotator_point2d.set_point(-0.25, 0.1) + annotator_point2d.add_annotation(description='A test annotation!') + commits = annotator_point2d.annotation_table.commits() + assert len(commits)==1, 'Only one insertion commit made ' + annotator_point2d.commit() + assert commits[0]['operation'] == 'insert' + assert set(commits[0]['kwargs'].keys()) == set(annotator_point2d.connector.columns) + + def test_range_insertion_exception(self, annotator_point2d): + x1,x2,y1,y2 = -0.25,0.25, -0.3, 0.3 + expected_msg = r"Range region types not enabled as region_types=\['Point'\]" + with pytest.raises(ValueError, match=expected_msg): + annotator_point2d.set_range(x1,x2,y1,y2) + + def test_point_insertion_values(self, annotator_point2d): + x,y = 0.5, 0.3 + annotator_point2d.set_point(x,y) + annotator_point2d.add_annotation(description='A test annotation!') + commits = annotator_point2d.annotation_table.commits() + assert len(commits)==1, 'Only one insertion commit made' + kwargs = commits[0]['kwargs'] + assert 'uuid' in kwargs.keys(), 'Expected uuid primary key in kwargs' + kwargs.pop('uuid') + assert kwargs == dict(description='A test annotation!', point_x=x, point_y=y) + + def test_point_commit_insertion(self, annotator_point2d): + x, y = 0.5, 0.3 + description = 'A test annotation!' + annotator_point2d.set_point(x,y) + annotator_point2d.add_annotation(description=description) + annotator_point2d.commit() + + df = pd.DataFrame({'uuid': pd.Series(annotator_point2d.df.index[0], dtype=object), + 'point_x':[x], + 'point_y':[y], + 'description':[description]} + ).set_index('uuid') + + sql_df = annotator_point2d.connector.load_dataframe() + pd.testing.assert_frame_equal(sql_df, df) + + + def test_point_addition_deletion_by_uuid(self, annotator_point2d): + x1, y1 = 0.2,0.2 + x2, y2 = 0.3,0.3 + x3, y3 = 0.4,0.4 + annotator_point2d.set_point(x1, y1) + annotator_point2d.add_annotation(description='Annotation 1') + annotator_point2d.set_point(x2, y2) + annotator_point2d.add_annotation(description='Annotation 2', uuid='08286429') + annotator_point2d.set_point(x3, y3) + annotator_point2d.add_annotation(description='Annotation 3') + annotator_point2d.commit() + sql_df = annotator_point2d.connector.load_dataframe() + assert set(sql_df['description']) == set(['Annotation 1', 'Annotation 2', 'Annotation 3']) + deletion_index = sql_df.index[1] + annotator_point2d.delete_annotation(deletion_index) + annotator_point2d.commit() + sql_df = annotator_point2d.connector.load_dataframe() + assert set(sql_df['description']) == set(['Annotation 1', 'Annotation 3']) + + def test_point_define_preserved_index_mismatch(self, annotator_point2d): + xs, ys = [-0.1,-0.2,-0.3], [0.1,0.2,0.3] + descriptions = ['Annotation %d' % d for d in [1,2,3]] + annotation_id = [uuid.uuid4().hex[:8] for d in [1,2,3]] + + data = pd.DataFrame({'uuid':annotation_id, 'xs':xs, 'ys':ys, + 'description':descriptions}).set_index('uuid') + annotator_point2d.define_fields(data[['description']], preserve_index=True) + annotator_point2d.define_points(data['xs'].iloc[:2], data['ys'].iloc[:2]) + msg = f"Following annotations have no associated region: {{{annotation_id[2]!r}}}" + with pytest.raises(ValueError, match=msg): + annotator_point2d.commit() + + def test_point_define_auto_index_mismatch(self, annotator_point2d): + xs, ys = [-0.1,-0.2,-0.3], [0.1,0.2,0.3] + descriptions = ['Annotation %d' % d for d in [1,2,3]] + annotation_id = [uuid.uuid4().hex[:8] for d in [1,2,3]] + + data = pd.DataFrame({'uuid':annotation_id, 'xs':xs, 'ys':ys, + 'description':descriptions}).set_index('uuid') + annotator_point2d.define_fields(data[['description']], preserve_index=False) + annotator_point2d.define_points(data['xs'].iloc[:2], data['ys'].iloc[:2]) + msg = "Following annotations have no associated region:" + with pytest.raises(ValueError, match=msg): + annotator_point2d.commit() + + def test_point_define_unassigned_indices(self, annotator_point2d): + xs, ys = [-0.1,-0.2,-0.3], [0.1,0.2,0.3] + descriptions = ['Annotation %d' % d for d in [1,2,3]] + annotation_id1 = [uuid.uuid4().hex[:8] for d in [1,2,3]] + mismatched = [uuid.uuid4().hex[:8] for d in [1,2]] + annotation_id2 = [*mismatched, annotation_id1[2]] + + data1 = pd.DataFrame({'uuid':annotation_id1, 'xs':xs, 'ys':ys, + 'description':descriptions}).set_index('uuid') + data2 = pd.DataFrame({'uuid':annotation_id2, 'xs':xs, 'ys':ys, + 'description':descriptions}).set_index('uuid') + + annotator_point2d.define_fields(data1[['description']]) + with pytest.raises(KeyError, match=str(mismatched)): + annotator_point2d.define_points(data2['xs'], data2['ys']) diff --git a/holonote/tests/test_connectors.py b/holonote/tests/test_connectors.py index ba4a058..33d1e2d 100644 --- a/holonote/tests/test_connectors.py +++ b/holonote/tests/test_connectors.py @@ -1,103 +1,102 @@ -import sys -import unittest - import numpy as np import pandas as pd -from holonote.annotate import Connector, SQLiteDB, AutoIncrementKey, UUIDHexStringKey, UUIDBinaryKey - - -filename = ':memory:' - - -class TestConnector(unittest.TestCase): +import pytest + +from holonote.annotate import ( + AutoIncrementKey, + Connector, + UUIDBinaryKey, + UUIDHexStringKey, +) + + +@pytest.fixture(params=[UUIDHexStringKey, AutoIncrementKey, UUIDBinaryKey]) +def database(conn_sqlite_uuid, request): + # Change the primary key type + conn_sqlite_uuid.primary_key = request.param(field_name='uuid') + fields = { + 'uuid': conn_sqlite_uuid.primary_key.schema, + 'description': 'TEXT', + 'start':'TIMESTAMP', + 'end': 'TIMESTAMP' + } + conn_sqlite_uuid.initialize(fields) + return conn_sqlite_uuid + + +class TestConnector: "Tests for classmethods on the base class" def test_fields_from_metadata_literals(self): fields = Connector.schema_from_field_values({'A':3, 'B':'string', 'C':False}) - self.assertEqual(fields,{'A': 'INTEGER', 'B': 'TEXT', 'C': 'BOOLEAN'}) + assert fields == {'A': 'INTEGER', 'B': 'TEXT', 'C': 'BOOLEAN'} def test_schema_from_value_datetime(self): datetime_type = Connector.field_value_to_type(np.datetime64('NaT')) - self.assertEqual(Connector.type_mapping[datetime_type],'TIMESTAMP') + assert Connector.type_mapping[datetime_type] == 'TIMESTAMP' def test_expand_range_region_column_schema_datetime(self): result = Connector.expand_region_column_schema(['Range'], {'xdim':np.datetime64}) - self.assertEqual(result, {'start_xdim':'TIMESTAMP', 'end_xdim':'TIMESTAMP'}) + assert result == {'start_xdim': 'TIMESTAMP', 'end_xdim': 'TIMESTAMP'} def test_expand_range_region_column_schema_datetimes(self): result = Connector.expand_region_column_schema(['Range'], {'xdim':np.datetime64, 'ydim':int}) - self.assertEqual(result,{'start_xdim':'TIMESTAMP', - 'end_xdim':'TIMESTAMP', - 'start_ydim':'INTEGER', - 'end_ydim':'INTEGER'}) + expected = { + 'start_xdim': 'TIMESTAMP', 'end_xdim': 'TIMESTAMP', + 'start_ydim': 'INTEGER', 'end_ydim': 'INTEGER' + } + assert result == expected def test_generate_schema(self): region_types = [['Range']] kdim_dtypes = [{'xdim':np.datetime64, 'ydim':int}] result = Connector.generate_schema(AutoIncrementKey(), region_types, kdim_dtypes, {'description':str}) - self.assertEqual(result, {'id': 'INTEGER PRIMARY KEY AUTOINCREMENT', - 'start_xdim': 'TIMESTAMP', - 'start_ydim': 'INTEGER', - 'end_xdim': 'TIMESTAMP', - 'end_ydim': 'INTEGER', - 'description': 'TEXT'}) + expected = { + 'id': 'INTEGER PRIMARY KEY AUTOINCREMENT', 'start_xdim': 'TIMESTAMP', + 'start_ydim': 'INTEGER', 'end_xdim': 'TIMESTAMP', + 'end_ydim': 'INTEGER', 'description': 'TEXT' + } + assert result == expected -class TestSQLiteUUIDHexKey(unittest.TestCase): - """ - Example using policy = 'insert' - """ +class TestSQLiteDB: - def setUp(self): - self.db = SQLiteDB(filename=filename, primary_key=UUIDHexStringKey()) + def test_setup(self, database): + assert database.con is not None - fields = {'uuid': self.db.primary_key.schema, - 'description': 'TEXT', - 'start':'TIMESTAMP', - 'end': 'TIMESTAMP'} - self.db.initialize(fields) + def test_initialized(self, database): + assert not database.uninitialized - - def tearDown(self): - self.db.con.close() - - def test_setup(self): - self.assertTrue(self.db.con is not None) - - def test_initialized(self): - self.assertFalse(self.db.uninitialized) - - def test_add_row(self): - id1 = self.db.primary_key(self.db) + def test_add_row(self, database, request): + id1 = database.primary_key(database) start = pd.Timestamp('2022-06-01') end = pd.Timestamp('2022-06-03') description = 'A description' - insertion = {'uuid': id1, 'description':description, 'start':start, 'end':end} - df = pd.DataFrame({'uuid':pd.Series([id1], dtype=object), - 'description':[description], 'start':[start], 'end':[end]}).set_index('uuid') - self.db.add_row(**insertion) - pd.testing.assert_frame_equal(self.db.load_dataframe(), df) - - - def test_add_three_rows_delete_one(self): - id1 = self.db.primary_key(self.db) + insertion = {"uuid": id1, 'description':description, 'start':start, 'end':end} + df = pd.DataFrame({"uuid":pd.Series([id1], dtype=object), + 'description':[description], 'start':[start], 'end':[end]}).set_index("uuid") + database.add_row(**insertion) + pd.testing.assert_frame_equal(database.load_dataframe(), df) + + def test_add_three_rows_delete_one(self, database): + id1 = database.primary_key(database) insertion1 = {'uuid': id1, - 'description':f'A description', + 'description':'A description', 'start':pd.Timestamp('2022-06-01'), 'end':pd.Timestamp('2022-06-03')} - id2 = self.db.primary_key(self.db) + id2 = database.primary_key(database, [id1]) insertion2 = {'uuid': id2, - 'description':f'A 2nd description', + 'description':'A 2nd description', 'start':pd.Timestamp('2024-06-01'), 'end':pd.Timestamp('2024-06-03')} - id3 = self.db.primary_key(self.db) + id3 = database.primary_key(database, [id2]) insertion3 = {'uuid': id3, - 'description':f'A 3rd description', + 'description':'A 3rd description', 'start':pd.Timestamp('2026-06-01'), 'end':pd.Timestamp('2026-06-03')} @@ -106,86 +105,8 @@ def test_add_three_rows_delete_one(self): 'start':[insertion1['start'], insertion3['start']], 'end':[insertion1['end'], insertion3['end']]} df = pd.DataFrame(df_data).set_index('uuid') - self.db.add_row(**insertion1) - self.db.add_row(**insertion2) - self.db.add_row(**insertion3) - self.db.delete_row(id2) - pd.testing.assert_frame_equal(self.db.load_dataframe(), df) - - -class TestSQLiteDBAutoIncrementKey(unittest.TestCase): - - def setUp(self): - self.db = SQLiteDB(filename=filename, primary_key=AutoIncrementKey()) - - fields = {'id': self.db.primary_key.schema, - 'description': 'TEXT', - 'start':'TIMESTAMP', - 'end': 'TIMESTAMP'} - self.db.initialize(fields) - - def tearDown(self): - self.db.con.close() - - def test_setup(self): - self.assertTrue(self.db.con is not None) - - def test_columns(self): - self.assertEqual(self.db.columns,('id', 'description', 'start', 'end')) - - def test_add_row(self): - id1 = self.db.primary_key(self.db) - insertion = {'id': id1, - 'description':f'A description', - 'start':pd.Timestamp('2022-06-01'), - 'end':pd.Timestamp('2022-06-03')} - - self.db.add_row(**insertion) - df = pd.DataFrame([insertion]).set_index('id') - pd.testing.assert_frame_equal(self.db.load_dataframe(), df) - - def test_add_row_mismatch(self): - insertion = {'id': 200, # Will not match autoincrement rowid - 'description':f'A description', - 'start':pd.Timestamp('2022-06-01'), - 'end':pd.Timestamp('2022-06-03')} - - insertion_mismatched_id = insertion.copy() - df = pd.DataFrame([insertion_mismatched_id]).set_index('id') - self.db.add_row(**insertion) - self.assertFalse(self.db.load_dataframe().equals(df)) - - -class TestSQLiteUUIDBinaryKey(unittest.TestCase): - """ - Example using policy = 'insert' - """ - - def setUp(self): - self.db = SQLiteDB(filename=filename, primary_key=UUIDBinaryKey()) - - fields = {'uuid': self.db.primary_key.schema, - 'description': 'TEXT', - 'start':'TIMESTAMP', - 'end': 'TIMESTAMP'} - self.db.initialize(fields) - - - def tearDown(self): - self.db.con.close() - - def test_setup(self): - self.assertTrue(self.db.con is not None) - - def test_initialized(self): - self.assertFalse(self.db.uninitialized) - - def test_add_row(self): - id1 = self.db.primary_key(self.db) - insertion = {'uuid': id1, - 'description':f'A description', - 'start':pd.Timestamp('2022-06-01'), - 'end':pd.Timestamp('2022-06-03')} - df = pd.DataFrame([insertion]).set_index('uuid') - self.db.add_row(**insertion) - pd.testing.assert_frame_equal(self.db.load_dataframe(), df) + database.add_row(**insertion1) + database.add_row(**insertion2) + database.add_row(**insertion3) + database.delete_row(id2) + pd.testing.assert_frame_equal(database.load_dataframe(), df)