Skip to content

Commit

Permalink
Merge pull request #6558 from markotoplak/dask-zero-len
Browse files Browse the repository at this point in the history
DaskTable: fix loading zero length dask arrays
  • Loading branch information
markotoplak authored Aug 30, 2023
2 parents 51beee7 + ef4fd0c commit 1431a72
Show file tree
Hide file tree
Showing 2 changed files with 27 additions and 3 deletions.
4 changes: 2 additions & 2 deletions Orange/data/dask.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,10 +86,10 @@ def from_arrays(cls, domain, X=None, Y=None, metas=None):
assert size is not None

if X is None:
X = da.zeros((size, 0), chunks=(size, 0))
X = da.zeros((size, len(domain.attributes)), chunks=(size, -1))

if Y is None:
Y = da.zeros((size, 0), chunks=(size, 0))
Y = da.zeros((size, len(domain.class_vars)), chunks=(size, -1))

if metas is None:
metas = np.zeros((size, 0))
Expand Down
26 changes: 25 additions & 1 deletion Orange/tests/test_dasktable.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ def same_tables(self, table, dasktable):
numpy.testing.assert_equal(dasktable.Y, table.Y)
numpy.testing.assert_equal(dasktable.metas, table.metas)

def test_zero_size_dask_arrays(self):
def test_zero_width_dask_arrays(self):
empty = Table.from_numpy(Domain([], [ContinuousVariable("y")]),
X=np.ones((10**5, 0)),
Y=np.ones((10**5, 1)))
Expand All @@ -111,6 +111,30 @@ def test_zero_size_dask_arrays(self):
self.assertEqual(data.X.shape, (10**5, 1))
self.assertEqual(data._Y.shape, (10**5, 0))

def test_zero_len_dask_arrays(self):
atts = [ContinuousVariable("x"), ContinuousVariable("y")]
empty = Table.from_numpy(Domain([], atts),
X=np.ones((0, 0)),
Y=np.ones((0, 2)))

with open_as_dask(empty) as data:
self.assertIsInstance(data, DaskTable)
self.assertIsInstance(data.X, da.Array)
self.assertIsInstance(data._Y, da.Array)
self.assertEqual(data.X.shape, (0, 0))
self.assertEqual(data._Y.shape, (0, 2))

empty = Table.from_numpy(Domain(atts, []),
X=np.ones((0, 2)),
Y=np.ones((0, 0)))

with open_as_dask(empty) as data:
self.assertIsInstance(data, DaskTable)
self.assertIsInstance(data.X, da.Array)
self.assertIsInstance(data._Y, da.Array)
self.assertEqual(data.X.shape, (0, 2))
self.assertEqual(data._Y.shape, (0, 0))

def test_compute(self):
zoo = Table('zoo')
with named_file('', suffix='.hdf5') as fn:
Expand Down

0 comments on commit 1431a72

Please sign in to comment.