From b65a1bfb70f0a40d64c89f2ae6f7e776c3637393 Mon Sep 17 00:00:00 2001 From: Daniel McDonald Date: Thu, 2 May 2024 09:46:05 -0700 Subject: [PATCH] BUG: fixes #952, edgecase creating numerical stability summing floats for pvalues --- biom/_subsample.pyx | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/biom/_subsample.pyx b/biom/_subsample.pyx index 35892025..0a324efd 100644 --- a/biom/_subsample.pyx +++ b/biom/_subsample.pyx @@ -44,13 +44,18 @@ cdef _subsample_with_replacement(cnp.ndarray[cnp.float64_t, ndim=1] data, cnp.int32_t start,end,length Py_ssize_t i cnp.ndarray[cnp.float64_t, ndim=1] pvals - + cnp.ndarray[cnp.float64_t, ndim=1] data_ceil + + data_ceil = np.ceil(data) for i in range(indptr.shape[0] - 1): start, end = indptr[i], indptr[i+1] length = end - start - counts_sum = data[start:end].sum() - - pvals = data[start:end] / counts_sum + + # base p-values on integer data to avoid small numerical issues with + # float on sum + counts_sum = data_ceil[start:end].sum() + pvals = data_ceil[start:end] / counts_sum + data[start:end] = rng.multinomial(n, pvals)