From f8ec503d6ce5a6ad6b55a7c85ac8817c15f400f8 Mon Sep 17 00:00:00 2001 From: noahnovsak Date: Wed, 16 Aug 2023 12:20:11 +0200 Subject: [PATCH] RemoveNaNColumns computes nans in advance --- Orange/preprocess/preprocess.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Orange/preprocess/preprocess.py b/Orange/preprocess/preprocess.py index 44f7cc6f6f2..6843dc1072c 100644 --- a/Orange/preprocess/preprocess.py +++ b/Orange/preprocess/preprocess.py @@ -307,7 +307,8 @@ def __call__(self, data, threshold=None): self.threshold if isinstance(threshold, float): threshold = threshold * data.X.shape[0] - nans = np.sum(np.isnan(data.X), axis=0) + # compute nans in advance, otherwise dask will do it for every attribute + nans = np.asarray(np.sum(np.isnan(data.X), axis=0)) att = [a for a, n in zip(data.domain.attributes, nans) if n < threshold] domain = Orange.data.Domain(att, data.domain.class_vars, data.domain.metas)