From 8e577cb83b4441b522ce3727daf6691fa17ec05f Mon Sep 17 00:00:00 2001 From: Igor Sfiligoi Date: Fri, 9 Jun 2023 15:13:21 -0700 Subject: [PATCH 01/15] Split _subsample in w and wo replacement --- biom/_subsample.pyx | 109 ++++++++++++++++++++++++++++++++++---------- 1 file changed, 84 insertions(+), 25 deletions(-) diff --git a/biom/_subsample.pyx b/biom/_subsample.pyx index 371d39e1..d59c7ba2 100644 --- a/biom/_subsample.pyx +++ b/biom/_subsample.pyx @@ -6,13 +6,49 @@ # The full license is in the file COPYING.txt, distributed with this software. # ----------------------------------------------------------------------------- - import numpy as np cimport numpy as cnp +def _subsample_with_replacement(arr, n, rng): + """Subsample non-zero values of a sparse array with replacement + + Parameters + ---------- + arr : {csr_matrix, csc_matrix} + A 1xM sparse vector + n : int + Number of items to subsample from `arr` + rng : Generator instance + A random generator. This will likely be an instance returned + by np.random.default_rng + + Returns + ------- + ndarray + Subsampled data + + Notes + ----- + This code was adapted from scikit-bio (`skbio.math._subsample`) + + """ + cdef: + cnp.int64_t counts_sum + cnp.ndarray[cnp.float64_t, ndim=1] data = arr.data + cnp.ndarray[cnp.int32_t, ndim=1] indptr = arr.indptr + Py_ssize_t i, length + + for i in range(indptr.shape[0] - 1): + start, end = indptr[i], indptr[i+1] + length = end - start + counts_sum = data[start:end].sum() + + pvals = data[start:end] / counts_sum + data[start:end] = rng.multinomial(n, pvals) -def _subsample(arr, n, with_replacement, rng): - """Subsample non-zero values of a sparse array + +def _subsample_without_replacement(arr, n, rng): + """Subsample non-zero values of a sparse array w/out replacement Parameters ---------- @@ -20,8 +56,6 @@ def _subsample(arr, n, with_replacement, rng): A 1xM sparse vector n : int Number of items to subsample from `arr` - with_replacement : bool - Whether to permute or use multinomial sampling rng : Generator instance A random generator. This will likely be an instance returned by np.random.default_rng @@ -41,31 +75,56 @@ def _subsample(arr, n, with_replacement, rng): cnp.ndarray[cnp.float64_t, ndim=1] data = arr.data cnp.ndarray[cnp.int64_t, ndim=1] data_i = arr.data.astype(np.int64) cnp.ndarray[cnp.float64_t, ndim=1] result - cnp.ndarray[cnp.int32_t, ndim=1] indices = arr.indices cnp.ndarray[cnp.int32_t, ndim=1] indptr = arr.indptr cnp.ndarray[cnp.int32_t, ndim=1] permuted, unpacked, r - cnp.float64_t cnt - Py_ssize_t i, j, length + Py_ssize_t i, length for i in range(indptr.shape[0] - 1): start, end = indptr[i], indptr[i+1] length = end - start counts_sum = data[start:end].sum() - if with_replacement: - pvals = data[start:end] / counts_sum - data[start:end] = rng.multinomial(n, pvals) - else: - if counts_sum < n: - data[start:end] = 0 - continue - - r = np.arange(length, dtype=np.int32) - unpacked = np.repeat(r, data_i[start:end]) - permuted = rng.permutation(unpacked)[:n] - - result = np.zeros(length, dtype=np.float64) - for idx in range(permuted.shape[0]): - result[permuted[idx]] += 1 - - data[start:end] = result + if counts_sum < n: + data[start:end] = 0 + continue + + r = np.arange(length, dtype=np.int32) + unpacked = np.repeat(r, data_i[start:end]) + permuted = rng.permutation(unpacked)[:n] + + result = np.zeros(length, dtype=np.float64) + for idx in range(permuted.shape[0]): + result[permuted[idx]] += 1 + + data[start:end] = result + + +def _subsample(arr, n, with_replacement, rng): + """Subsample non-zero values of a sparse array + + Parameters + ---------- + arr : {csr_matrix, csc_matrix} + A 1xM sparse vector + n : int + Number of items to subsample from `arr` + with_replacement : bool + Whether to permute or use multinomial sampling + rng : Generator instance + A random generator. This will likely be an instance returned + by np.random.default_rng + + Returns + ------- + ndarray + Subsampled data + + Notes + ----- + This code was adapted from scikit-bio (`skbio.math._subsample`) + + """ + if (with_replacement): + return _subsample_with_replacement(arr, n, rng) + else: + return _subsample_without_replacement(arr, n, rng) From e8577ba722679f8d146ba5d3c647e7e101960070 Mon Sep 17 00:00:00 2001 From: Igor Sfiligoi Date: Tue, 13 Jun 2023 12:39:53 -0700 Subject: [PATCH 02/15] Use rng.choice and reverse lookup... slower than original --- biom/_subsample.pyx | 33 +++++++++++++++++++++++---------- 1 file changed, 23 insertions(+), 10 deletions(-) diff --git a/biom/_subsample.pyx b/biom/_subsample.pyx index d59c7ba2..ec27fb91 100644 --- a/biom/_subsample.pyx +++ b/biom/_subsample.pyx @@ -71,30 +71,43 @@ def _subsample_without_replacement(arr, n, rng): """ cdef: - cnp.int64_t counts_sum + cnp.int64_t counts_sum,idx,count_el, perm_count_ela + cnp.float64_t count_rem + cnp.int64_t cn = n cnp.ndarray[cnp.float64_t, ndim=1] data = arr.data - cnp.ndarray[cnp.int64_t, ndim=1] data_i = arr.data.astype(np.int64) cnp.ndarray[cnp.float64_t, ndim=1] result cnp.ndarray[cnp.int32_t, ndim=1] indptr = arr.indptr - cnp.ndarray[cnp.int32_t, ndim=1] permuted, unpacked, r - Py_ssize_t i, length + cnp.ndarray[cnp.int64_t, ndim=1] permuted + Py_ssize_t i + cnp.int32_t length,el for i in range(indptr.shape[0] - 1): start, end = indptr[i], indptr[i+1] length = end - start counts_sum = data[start:end].sum() - if counts_sum < n: + if counts_sum < cn: data[start:end] = 0 continue - r = np.arange(length, dtype=np.int32) - unpacked = np.repeat(r, data_i[start:end]) - permuted = rng.permutation(unpacked)[:n] + permuted = rng.choice(counts_sum,cn,replace=False,shuffle=False) + permuted.sort() + # now need to do reverse mapping result = np.zeros(length, dtype=np.float64) - for idx in range(permuted.shape[0]): - result[permuted[idx]] += 1 + el=0 # index in result/data + count_el =0 # index in permutted + count_rem=0 # since each data has multiple els, sub count there + for idx in range(cn): + perm_count_el = permuted[idx] + # the array is sorted, so just jump ahead + for _ in range(perm_count_el-count_el): + count_rem += 1 + if not (count_rem Date: Tue, 13 Jun 2023 14:14:17 -0700 Subject: [PATCH 03/15] Optimize reverse lookup --- biom/_subsample.pyx | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/biom/_subsample.pyx b/biom/_subsample.pyx index ec27fb91..7adad50b 100644 --- a/biom/_subsample.pyx +++ b/biom/_subsample.pyx @@ -72,7 +72,7 @@ def _subsample_without_replacement(arr, n, rng): """ cdef: cnp.int64_t counts_sum,idx,count_el, perm_count_ela - cnp.float64_t count_rem + cnp.int64_t count_rem cnp.int64_t cn = n cnp.ndarray[cnp.float64_t, ndim=1] data = arr.data cnp.ndarray[cnp.float64_t, ndim=1] result @@ -97,16 +97,17 @@ def _subsample_without_replacement(arr, n, rng): result = np.zeros(length, dtype=np.float64) el=0 # index in result/data count_el =0 # index in permutted - count_rem=0 # since each data has multiple els, sub count there + count_rem=data[start] # since each data has multiple els, sub count there for idx in range(cn): perm_count_el = permuted[idx] # the array is sorted, so just jump ahead - for _ in range(perm_count_el-count_el): - count_rem += 1 - if not (count_rem= count_rem: + count_el += count_rem + el += 1 + count_rem = data[start+el] + count_rem -= (perm_count_el-count_el) count_el = perm_count_el + result[el] += 1 data[start:end] = result From 61e2a09b559b9318243aaec565ca98d5c1061a58 Mon Sep 17 00:00:00 2001 From: Igor Sfiligoi Date: Tue, 13 Jun 2023 15:30:54 -0700 Subject: [PATCH 04/15] Improve spacing Co-authored-by: Daniel McDonald --- biom/_subsample.pyx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/biom/_subsample.pyx b/biom/_subsample.pyx index 7adad50b..91d0cc9b 100644 --- a/biom/_subsample.pyx +++ b/biom/_subsample.pyx @@ -71,7 +71,7 @@ def _subsample_without_replacement(arr, n, rng): """ cdef: - cnp.int64_t counts_sum,idx,count_el, perm_count_ela + cnp.int64_t counts_sum, idx, count_el, perm_count_ela cnp.int64_t count_rem cnp.int64_t cn = n cnp.ndarray[cnp.float64_t, ndim=1] data = arr.data From 7e724a5d2bfe9af8c59a3c6b0e94486a3433159f Mon Sep 17 00:00:00 2001 From: Igor Sfiligoi Date: Tue, 13 Jun 2023 15:32:16 -0700 Subject: [PATCH 05/15] Add spacing Co-authored-by: Daniel McDonald --- biom/_subsample.pyx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/biom/_subsample.pyx b/biom/_subsample.pyx index 91d0cc9b..2fa92287 100644 --- a/biom/_subsample.pyx +++ b/biom/_subsample.pyx @@ -90,7 +90,7 @@ def _subsample_without_replacement(arr, n, rng): data[start:end] = 0 continue - permuted = rng.choice(counts_sum,cn,replace=False,shuffle=False) + permuted = rng.choice(counts_sum, cn, replace=False, shuffle=False) permuted.sort() # now need to do reverse mapping From 254c9ebab096ffed7cc4bed5c0f0f91064a1f71b Mon Sep 17 00:00:00 2001 From: Igor Sfiligoi Date: Tue, 13 Jun 2023 15:33:25 -0700 Subject: [PATCH 06/15] Add spaces Co-authored-by: Daniel McDonald --- biom/_subsample.pyx | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/biom/_subsample.pyx b/biom/_subsample.pyx index 2fa92287..260470e6 100644 --- a/biom/_subsample.pyx +++ b/biom/_subsample.pyx @@ -95,9 +95,9 @@ def _subsample_without_replacement(arr, n, rng): # now need to do reverse mapping result = np.zeros(length, dtype=np.float64) - el=0 # index in result/data - count_el =0 # index in permutted - count_rem=data[start] # since each data has multiple els, sub count there + el = 0 # index in result/data + count_el = 0 # index in permutted + count_rem = data[start] # since each data has multiple els, sub count there for idx in range(cn): perm_count_el = permuted[idx] # the array is sorted, so just jump ahead From ec818597f70cf4b346cf499827ce3a05259bd58f Mon Sep 17 00:00:00 2001 From: Igor Sfiligoi Date: Tue, 13 Jun 2023 15:34:22 -0700 Subject: [PATCH 07/15] Add space Co-authored-by: Daniel McDonald --- biom/_subsample.pyx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/biom/_subsample.pyx b/biom/_subsample.pyx index 260470e6..41327904 100644 --- a/biom/_subsample.pyx +++ b/biom/_subsample.pyx @@ -101,7 +101,7 @@ def _subsample_without_replacement(arr, n, rng): for idx in range(cn): perm_count_el = permuted[idx] # the array is sorted, so just jump ahead - while (perm_count_el-count_el) >= count_rem: + while (perm_count_el - count_el) >= count_rem: count_el += count_rem el += 1 count_rem = data[start+el] From e7f948ed3120a3bd7fd017d9cfae5890da4c6e98 Mon Sep 17 00:00:00 2001 From: Igor Sfiligoi Date: Tue, 13 Jun 2023 15:34:42 -0700 Subject: [PATCH 08/15] Add space Co-authored-by: Daniel McDonald --- biom/_subsample.pyx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/biom/_subsample.pyx b/biom/_subsample.pyx index 41327904..8bca603f 100644 --- a/biom/_subsample.pyx +++ b/biom/_subsample.pyx @@ -105,7 +105,7 @@ def _subsample_without_replacement(arr, n, rng): count_el += count_rem el += 1 count_rem = data[start+el] - count_rem -= (perm_count_el-count_el) + count_rem -= (perm_count_el - count_el) count_el = perm_count_el result[el] += 1 From 96bcaff6f7b65bc8428b4260f32452f5101058f6 Mon Sep 17 00:00:00 2001 From: Igor Sfiligoi Date: Tue, 13 Jun 2023 15:42:39 -0700 Subject: [PATCH 09/15] Fix typo --- biom/_subsample.pyx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/biom/_subsample.pyx b/biom/_subsample.pyx index 8bca603f..2c601275 100644 --- a/biom/_subsample.pyx +++ b/biom/_subsample.pyx @@ -71,7 +71,7 @@ def _subsample_without_replacement(arr, n, rng): """ cdef: - cnp.int64_t counts_sum, idx, count_el, perm_count_ela + cnp.int64_t counts_sum, idx, count_el, perm_count_el cnp.int64_t count_rem cnp.int64_t cn = n cnp.ndarray[cnp.float64_t, ndim=1] data = arr.data From 9ffe1aa3579f33f903c29c5325c0d8b43d410089 Mon Sep 17 00:00:00 2001 From: Igor Sfiligoi Date: Tue, 13 Jun 2023 15:49:32 -0700 Subject: [PATCH 10/15] Add comment --- biom/_subsample.pyx | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/biom/_subsample.pyx b/biom/_subsample.pyx index 2c601275..6a3bc076 100644 --- a/biom/_subsample.pyx +++ b/biom/_subsample.pyx @@ -94,6 +94,12 @@ def _subsample_without_replacement(arr, n, rng): permuted.sort() # now need to do reverse mapping + # since I am not using np.repeat anymore + # reminder, old logic was + # r = np.arange(length) + # unpacked = np.repeat(r, data_i[start:end]) + # permuted_unpacked = rng.choice(unpacked, cn, replace=False, shuffle=False) + result = np.zeros(length, dtype=np.float64) el = 0 # index in result/data count_el = 0 # index in permutted From ae1317f2f0ea02041203b050c23c7b903933e8b4 Mon Sep 17 00:00:00 2001 From: Igor Sfiligoi Date: Tue, 13 Jun 2023 16:29:39 -0700 Subject: [PATCH 11/15] Declare idx as Py_ssize_t since it is used in a range loop --- biom/_subsample.pyx | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/biom/_subsample.pyx b/biom/_subsample.pyx index 6a3bc076..f9a4ccc8 100644 --- a/biom/_subsample.pyx +++ b/biom/_subsample.pyx @@ -71,14 +71,14 @@ def _subsample_without_replacement(arr, n, rng): """ cdef: - cnp.int64_t counts_sum, idx, count_el, perm_count_el + cnp.int64_t counts_sum, count_el, perm_count_el cnp.int64_t count_rem cnp.int64_t cn = n cnp.ndarray[cnp.float64_t, ndim=1] data = arr.data cnp.ndarray[cnp.float64_t, ndim=1] result cnp.ndarray[cnp.int32_t, ndim=1] indptr = arr.indptr cnp.ndarray[cnp.int64_t, ndim=1] permuted - Py_ssize_t i + Py_ssize_t i, idx cnp.int32_t length,el for i in range(indptr.shape[0] - 1): From 1353a7cdd4a9f56259c300418e26ee1815e030a0 Mon Sep 17 00:00:00 2001 From: Igor Sfiligoi Date: Tue, 13 Jun 2023 17:10:56 -0700 Subject: [PATCH 12/15] Use cdef --- biom/_subsample.pyx | 49 ++++++++++++++++++++++++++------------------- 1 file changed, 28 insertions(+), 21 deletions(-) diff --git a/biom/_subsample.pyx b/biom/_subsample.pyx index f9a4ccc8..9789aeb1 100644 --- a/biom/_subsample.pyx +++ b/biom/_subsample.pyx @@ -9,13 +9,18 @@ import numpy as np cimport numpy as cnp -def _subsample_with_replacement(arr, n, rng): +cdef _subsample_with_replacement(cnp.ndarray[cnp.float64_t, ndim=1] data, + cnp.ndarray[cnp.int32_t, ndim=1] indptr, + cnp.int64_t n, + object rng): """Subsample non-zero values of a sparse array with replacement Parameters ---------- - arr : {csr_matrix, csc_matrix} - A 1xM sparse vector + data : {csr_matrix, csc_matrix}.data + A 1xM sparse vector data + indptr : {csr_matrix, csc_matrix}.indptr + A 1xM sparse vector indptr n : int Number of items to subsample from `arr` rng : Generator instance @@ -34,9 +39,9 @@ def _subsample_with_replacement(arr, n, rng): """ cdef: cnp.int64_t counts_sum - cnp.ndarray[cnp.float64_t, ndim=1] data = arr.data - cnp.ndarray[cnp.int32_t, ndim=1] indptr = arr.indptr - Py_ssize_t i, length + cnp.int32_t start,end,length + Py_ssize_t i + cnp.ndarray[cnp.float64_t, ndim=1] pvals for i in range(indptr.shape[0] - 1): start, end = indptr[i], indptr[i+1] @@ -47,13 +52,18 @@ def _subsample_with_replacement(arr, n, rng): data[start:end] = rng.multinomial(n, pvals) -def _subsample_without_replacement(arr, n, rng): +cdef _subsample_without_replacement(cnp.ndarray[cnp.float64_t, ndim=1] data, + cnp.ndarray[cnp.int32_t, ndim=1] indptr, + cnp.int64_t n, + object rng): """Subsample non-zero values of a sparse array w/out replacement Parameters ---------- - arr : {csr_matrix, csc_matrix} - A 1xM sparse vector + data : {csr_matrix, csc_matrix}.data + A 1xM sparse vector data + indptr : {csr_matrix, csc_matrix}.indptr + A 1xM sparse vector indptr n : int Number of items to subsample from `arr` rng : Generator instance @@ -73,24 +83,21 @@ def _subsample_without_replacement(arr, n, rng): cdef: cnp.int64_t counts_sum, count_el, perm_count_el cnp.int64_t count_rem - cnp.int64_t cn = n - cnp.ndarray[cnp.float64_t, ndim=1] data = arr.data cnp.ndarray[cnp.float64_t, ndim=1] result - cnp.ndarray[cnp.int32_t, ndim=1] indptr = arr.indptr cnp.ndarray[cnp.int64_t, ndim=1] permuted Py_ssize_t i, idx - cnp.int32_t length,el + cnp.int32_t length,el,start,end for i in range(indptr.shape[0] - 1): start, end = indptr[i], indptr[i+1] length = end - start counts_sum = data[start:end].sum() - if counts_sum < cn: + if counts_sum < n: data[start:end] = 0 continue - permuted = rng.choice(counts_sum, cn, replace=False, shuffle=False) + permuted = rng.choice(counts_sum, n, replace=False, shuffle=False) permuted.sort() # now need to do reverse mapping @@ -98,19 +105,19 @@ def _subsample_without_replacement(arr, n, rng): # reminder, old logic was # r = np.arange(length) # unpacked = np.repeat(r, data_i[start:end]) - # permuted_unpacked = rng.choice(unpacked, cn, replace=False, shuffle=False) + # permuted_unpacked = rng.choice(unpacked, n, replace=False, shuffle=False) result = np.zeros(length, dtype=np.float64) el = 0 # index in result/data count_el = 0 # index in permutted - count_rem = data[start] # since each data has multiple els, sub count there - for idx in range(cn): + count_rem = long(data[start]) # since each data has multiple els, sub count there + for idx in range(n): perm_count_el = permuted[idx] # the array is sorted, so just jump ahead while (perm_count_el - count_el) >= count_rem: count_el += count_rem el += 1 - count_rem = data[start+el] + count_rem = long(data[start+el]) count_rem -= (perm_count_el - count_el) count_el = perm_count_el @@ -145,6 +152,6 @@ def _subsample(arr, n, with_replacement, rng): """ if (with_replacement): - return _subsample_with_replacement(arr, n, rng) + return _subsample_with_replacement(arr.data, arr.indptr, n, rng) else: - return _subsample_without_replacement(arr, n, rng) + return _subsample_without_replacement(arr.data, arr.indptr, n, rng) From 2dd9bb01ce2c56e0d35bb20168ce0bb8e4ab2e53 Mon Sep 17 00:00:00 2001 From: Igor Sfiligoi Date: Tue, 13 Jun 2023 17:33:41 -0700 Subject: [PATCH 13/15] Remove tmp buffer result --- biom/_subsample.pyx | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/biom/_subsample.pyx b/biom/_subsample.pyx index 9789aeb1..e6594b2a 100644 --- a/biom/_subsample.pyx +++ b/biom/_subsample.pyx @@ -83,7 +83,6 @@ cdef _subsample_without_replacement(cnp.ndarray[cnp.float64_t, ndim=1] data, cdef: cnp.int64_t counts_sum, count_el, perm_count_el cnp.int64_t count_rem - cnp.ndarray[cnp.float64_t, ndim=1] result cnp.ndarray[cnp.int64_t, ndim=1] permuted Py_ssize_t i, idx cnp.int32_t length,el,start,end @@ -107,10 +106,10 @@ cdef _subsample_without_replacement(cnp.ndarray[cnp.float64_t, ndim=1] data, # unpacked = np.repeat(r, data_i[start:end]) # permuted_unpacked = rng.choice(unpacked, n, replace=False, shuffle=False) - result = np.zeros(length, dtype=np.float64) el = 0 # index in result/data count_el = 0 # index in permutted count_rem = long(data[start]) # since each data has multiple els, sub count there + data[start] = 0.0 for idx in range(n): perm_count_el = permuted[idx] # the array is sorted, so just jump ahead @@ -118,12 +117,13 @@ cdef _subsample_without_replacement(cnp.ndarray[cnp.float64_t, ndim=1] data, count_el += count_rem el += 1 count_rem = long(data[start+el]) + data[start+el] = 0.0 count_rem -= (perm_count_el - count_el) count_el = perm_count_el - result[el] += 1 - - data[start:end] = result + data[start+el] += 1 + # clean up tail elements + data[start+el+1:end] = 0.0 def _subsample(arr, n, with_replacement, rng): From 2614fec7633c6715f61e7fc819d0f4aa2f162d30 Mon Sep 17 00:00:00 2001 From: Igor Sfiligoi Date: Sat, 17 Jun 2023 07:37:50 -0700 Subject: [PATCH 14/15] Update comment Co-authored-by: Daniel McDonald --- biom/_subsample.pyx | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/biom/_subsample.pyx b/biom/_subsample.pyx index e6594b2a..7abca385 100644 --- a/biom/_subsample.pyx +++ b/biom/_subsample.pyx @@ -105,6 +105,13 @@ cdef _subsample_without_replacement(cnp.ndarray[cnp.float64_t, ndim=1] data, # r = np.arange(length) # unpacked = np.repeat(r, data_i[start:end]) # permuted_unpacked = rng.choice(unpacked, n, replace=False, shuffle=False) + # + # specifically, what we're going to do here is randomly pick what elements within + # each sample to keep. this is analogous issuing the prior np.repeat call, and obtaining + # a random set of index positions for that resulting array. however, we do not need to + # perform the np.repeat call as we know the length of that resulting vector already, + # and additionally, we can compute the sample associated with an index in that array + # without constructing it. el = 0 # index in result/data count_el = 0 # index in permutted From 1555a4cb30b1e0e43edebf694fa139a2cda579b1 Mon Sep 17 00:00:00 2001 From: Igor Sfiligoi Date: Fri, 28 Jul 2023 12:13:43 -0700 Subject: [PATCH 15/15] Document PR 935 --- ChangeLog.md | 1 + 1 file changed, 1 insertion(+) diff --git a/ChangeLog.md b/ChangeLog.md index c7469417..371464d6 100644 --- a/ChangeLog.md +++ b/ChangeLog.md @@ -7,6 +7,7 @@ biom 2.1.15-dev Performance improvements: * Revise `Table._fast_merge` to use COO directly. For very large tables, this reduces runtime by ~50x and memory by ~5x. See PR [#913](https://github.com/biocore/biom-format/pull/933). +* Drastically reduce the memory needs of subsampling when sums are large. Also adds 64-bit support. See PR [#935](https://github.com/biocore/biom-format/pull/935). biom 2.1.15 -----------