From ef4fd0cc8f7b40c1b896d1ff7e7ecec96329a31e Mon Sep 17 00:00:00 2001 From: Marko Toplak Date: Wed, 30 Aug 2023 15:13:06 +0200 Subject: [PATCH] DaskTable: fix loading zero length dask arrays --- Orange/data/dask.py | 4 ++-- Orange/tests/test_dasktable.py | 26 +++++++++++++++++++++++++- 2 files changed, 27 insertions(+), 3 deletions(-) diff --git a/Orange/data/dask.py b/Orange/data/dask.py index c9e4bb99b38..3f4575d61e0 100644 --- a/Orange/data/dask.py +++ b/Orange/data/dask.py @@ -86,10 +86,10 @@ def from_arrays(cls, domain, X=None, Y=None, metas=None): assert size is not None if X is None: - X = da.zeros((size, 0), chunks=(size, 0)) + X = da.zeros((size, len(domain.attributes)), chunks=(size, -1)) if Y is None: - Y = da.zeros((size, 0), chunks=(size, 0)) + Y = da.zeros((size, len(domain.class_vars)), chunks=(size, -1)) if metas is None: metas = np.zeros((size, 0)) diff --git a/Orange/tests/test_dasktable.py b/Orange/tests/test_dasktable.py index 6a97328535e..cca5ea6e28f 100644 --- a/Orange/tests/test_dasktable.py +++ b/Orange/tests/test_dasktable.py @@ -90,7 +90,7 @@ def same_tables(self, table, dasktable): numpy.testing.assert_equal(dasktable.Y, table.Y) numpy.testing.assert_equal(dasktable.metas, table.metas) - def test_zero_size_dask_arrays(self): + def test_zero_width_dask_arrays(self): empty = Table.from_numpy(Domain([], [ContinuousVariable("y")]), X=np.ones((10**5, 0)), Y=np.ones((10**5, 1))) @@ -111,6 +111,30 @@ def test_zero_size_dask_arrays(self): self.assertEqual(data.X.shape, (10**5, 1)) self.assertEqual(data._Y.shape, (10**5, 0)) + def test_zero_len_dask_arrays(self): + atts = [ContinuousVariable("x"), ContinuousVariable("y")] + empty = Table.from_numpy(Domain([], atts), + X=np.ones((0, 0)), + Y=np.ones((0, 2))) + + with open_as_dask(empty) as data: + self.assertIsInstance(data, DaskTable) + self.assertIsInstance(data.X, da.Array) + self.assertIsInstance(data._Y, da.Array) + self.assertEqual(data.X.shape, (0, 0)) + self.assertEqual(data._Y.shape, (0, 2)) + + empty = Table.from_numpy(Domain(atts, []), + X=np.ones((0, 2)), + Y=np.ones((0, 0))) + + with open_as_dask(empty) as data: + self.assertIsInstance(data, DaskTable) + self.assertIsInstance(data.X, da.Array) + self.assertIsInstance(data._Y, da.Array) + self.assertEqual(data.X.shape, (0, 2)) + self.assertEqual(data._Y.shape, (0, 0)) + def test_compute(self): zoo = Table('zoo') with named_file('', suffix='.hdf5') as fn: