Skip to content

Commit e412e87

Browse files
authored
Merge pull request #5 from mgbckr/issue/1
Catch zero variance features
2 parents 7a29cdd + 2e368bd commit e412e87

File tree

3 files changed

+71
-2
lines changed

3 files changed

+71
-2
lines changed

setup.cfg

-1
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,6 @@ package_dir =
3939
=src
4040

4141
# Require a min/specific Python version (comma-separated conditions)
42-
# numba currently only supports Python up to 3.10
4342
python_requires = >=3.9
4443

4544
# Add here dependencies of your project (line-separated), e.g. requests>=2.2,<3.0.

src/corals/correlation/topk/_deprecated/original.py

+35-1
Original file line numberDiff line numberDiff line change
@@ -212,7 +212,41 @@ def topk_balltree_combined_tree_parallel_optimized(
212212
n_jobs_transfer_mode="function",
213213
symmetrize=False,
214214
argtopk_method="argsort",
215-
require_sorted_topk=True):
215+
require_sorted_topk=True,
216+
handle_zero_variance="raise" # None, "raise", "return indices",
217+
):
218+
219+
if handle_zero_variance is not None:
220+
221+
X = np.array(X)
222+
223+
X_zero_var_msk = np.all(np.isclose(X, X[0,:]), axis=0)
224+
X_zero_var_idx = np.arange(X.shape[1])[X_zero_var_msk]
225+
226+
if handle_zero_variance == "raise":
227+
if len(X_zero_var_idx) > 0:
228+
raise ValueError(
229+
f"Zero variance in X. Please remove. Indices: {X_zero_var_idx}")
230+
231+
if Y is not None:
232+
233+
Y = np.array(Y)
234+
235+
Y_zero_var_msk = np.all(np.isclose(Y, Y[0,:]), axis=0)
236+
Y_zero_var_idx = np.arange(Y.shape[1])[Y_zero_var_msk]
237+
238+
if handle_zero_variance == "raise":
239+
if len(Y_zero_var_idx) > 0:
240+
raise ValueError(
241+
f"Zero variance in Y. Please remove. Indices: {Y_zero_var_idx}")
242+
243+
if handle_zero_variance == "return indices":
244+
245+
if Y is None:
246+
return None, X_zero_var_idx
247+
else:
248+
return None, (X_zero_var_idx, Y_zero_var_idx)
249+
216250

217251
if n_jobs is None:
218252
n_jobs = 1

tests/test_issue1.py

+36
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
import pytest
2+
3+
4+
def test_issue1():
5+
6+
data = """
7+
Name ERR4165185 ERR4165186 ERR4165187 ERR4165188 ERR4165189 ERR4165190 ERR4165191 ERR4165192 ERR4165193 ERR4165194
8+
Scp1_US851008_k31_TRINITY_DN18756_c0_g1_i4 0 0 0 0 0 0 0 0 0 0
9+
Scp1_US851008_k25_TRINITY_DN10094_c0_g1_i2 0 5.94091 5.58655 5.81978 5.39608 8.5646 6.63334 6.29947 4.76114 2.87519
10+
Scp1_US851008_k25_TRINITY_DN7610_c1_g1_i24 0 0.584322 0.361902 0.372344 0.372583 0 0.16313 0 0.874301 0.567489
11+
Scp1_US851008_k25_TRINITY_DN3138_c0_g1_i2 0 3.66967 2.61626 4.58642 1.00118 7.74655 6.50818 3.50205 2.80706 3.18808
12+
Scp1_US851008_k25_TRINITY_DN66949_c0_g1_i1 0 0.55958 0.508452 0.188733 0 2.05569 0.622747 0.18403 1.22889 0.525269
13+
Scp1_US851008_k25_TRINITY_DN42729_c0_g1_i3 0 NaN 0 0 0 0 0 0 4.98475 0
14+
Scp1_US851008_k25_TRINITY_DN5537_c0_g1_i1 0 0 0 0 0.068946 0.997404 0.394103 0.13994 0.375641 3.50364
15+
Scp1_US851008_k31_TRINITY_DN9195_c0_g2_i2 0 1.38316 0.785248 2.0806 1.17822 0 0 0 1.1218 5.1715
16+
Scp1_US851008_k31_TRINITY_DN9068_c0_g1_i22 0 0 0 0 0 0 0.164973 0.296276 0 2.88606
17+
"""
18+
19+
from corals.threads import set_threads_for_external_libraries
20+
set_threads_for_external_libraries(n_threads=1)
21+
22+
import pandas as pd
23+
from io import StringIO
24+
from corals.correlation.topk.default import cor_topk
25+
26+
df = pd.read_csv(StringIO(data), sep='\t')
27+
df.set_index('Name', inplace=True)
28+
df_transposed = df.T
29+
30+
with pytest.raises(ValueError) as e:
31+
cor_topk(df_transposed, k=0.001, correlation_type="spearman", n_jobs=4)
32+
assert e.match(r"^Zero variance in X\. Please remove\. Indices: \[0\]$")
33+
34+
35+
if __name__ == '__main__':
36+
test_issue1()

0 commit comments

Comments
 (0)