From 48c1410cb807a190e42ec01cce6e25cbeea7a858 Mon Sep 17 00:00:00 2001 From: Daniel McDonald Date: Thu, 2 May 2024 09:21:02 -0700 Subject: [PATCH 1/4] TST: regression test for issue #952 --- biom/tests/test_data/edgecase_issue_952.biom | Bin 0 -> 59295 bytes biom/tests/test_table.py | 17 +++++++++++++++++ 2 files changed, 17 insertions(+) create mode 100644 biom/tests/test_data/edgecase_issue_952.biom diff --git a/biom/tests/test_data/edgecase_issue_952.biom b/biom/tests/test_data/edgecase_issue_952.biom new file mode 100644 index 0000000000000000000000000000000000000000..1b24d478ca9d68e2c4a1d22330f8a250771f6a4c GIT binary patch literal 59295 zcmeFa1zc54yEjZpHz?gmNOyxYC?O@?AV`;Vcc-LMf^qcK~{q``_hRV80JQL100CVwiXJ$anGy!GBlS-_gm-3JZhWQUc@y@;f#V`#S*u zIe_{<`Tq z0K$J~j&kR(_x&0kwh`&}7!%}9o~^!>^)J&R-_rgjfBusb1oXDwKNDa$?&kpe|FfM4zuIY_ zZ>DdhZKJP8qvP|7-AR^lpXzJLF;iRC*`x5s*B{ zU;VZ=)c!Rf|K9%o4x}I;?|;g@P4FP}zjDyq$O?a}$jr>~SNVawU$r10?LQlFn?Qg4 z{WER?{IKy32L#lA6*t{+f6Ke;;_t$r&p#Hpvw*0ew8XEu`7>bMCcr%X6{l{?Qs#v6 z+BzosgyKTKaKOC&Rk;1QJB@?3k^M^JUs^|13M=ZD=jMr8wV5fZ+^%-IRHLZRu*=~+d8Ow z^-S#aj6Y*N!0!S2S(%twev-o6(F5vP*q9i8RsrTOKt2;a11r;QJ-{9SdS+&}U#AlI zJ2`-MF|aW(GqHhyAl%gh=$TmQfAs+I0X-8d>um>+?$I;Tv;LyL8>&=sPyPT0)=%*e z2jZu^kKgm}XAf~8eyRsJuzu=?I1oS09S-o{d))`2eSib8gV5c_fz{t#J^VBNFax_E z$giMvD+A!)Ecg8HXKehL{(=KF;Ad?6nf{3bH{dSp{uKw}{}oLC%74%QSbs_Xi+m>F z`F9&Cf2DuuX9Cs_xL*GY4y2#sK0h0<|8YLVf%v%|;K2I1AL2m#Joj-9VEw!gaUg!a z2RN|)=MQlpe*QZg;D0Q@>Olk^;6Uskg7Ulh{OXNm|yZ9e$V|tq#oeF{L=UF zd-{KW3;XB%XS+B5W$y9c3x6Q84{%`pa`*9j`sE+s!2AmL@O$nL^7;V|%&&MKzo%d6 z0S?Tse1`+B+jLB{&5ZwXbp!&Ua)$%_lkr~sQGI{|H$d$n4%7hk`}n;HsPPa7;@5nD z1MAm%hy(F!-^cGw0G)?85WnsN99X~JLmY@-|2}?i0=#*M1M%N}HTaeOF+Oa-`VAlA zK>Tm-9P^Zdui0UZC#AMgX~w|IyH>9@SY z0r8LVUi`CqfCI6ESl`Ei)q~hPz=8SS-Qj@oW4d?!VfzpV5@7cL2X27E!VQRlmH9ghnt#^bE;d@dy&bH#dcQpi^p1}Z zSj<04a#GSlptq}qfbZvVcVD;xeD~M60L>A*8!!N!#a%fZFcAOH@IT|=@74)d@4gsA z-+h7oy-o=7xA#~6XISe`FaOR0_u~LyUQ*oo4}cFa?*P}OfcpQ{{C6CH_{Ta=C*E&K z06n{RWn{wN_4Id&paCbgyYl^v;;zD<@b>{g-j};_|J}j}pr4I*^94Y6fBxKw`%jzw zT^zV?zdv!{KkX3yr)b#U#Q}Ok;N}75$$wNF_&fi-KVSdV{Vt`u1xvv70&uJUl>WP0 z;J*J7-gS8QF5UzB54e_nYo_;BSKk`sHn*_3J*NQtnjJv+@616#Ab<1T2>`u2qXF)V z1Mcera`0RHCn*Xc-`zFva}Z`Y5aye^8)T1eYk%Tk;J?Mffe`*WxVz1Md;Ajz1<})g zWAgJ(H^f~#q3`s9gMp>otb2{_H&Qfb{${_o9xpVz_vp;(OM-`0nG2m1pSg#Lyc zFt0le|AyedS@q9&_j8;9zlQ?km4F)`_xjrpxFK;@5Ae6YIKVxQKgB;5_+x=T7WiX< zKNk37f&c0ju-BkbvEv@O!6=6%A_)*;jVNYI?wEfZfy_)zhOy4@}Y@@S2oI5iUBetO$}`RHPf zg8SK-hFRbG30iSl1X(_=>We@DrM&DN(@bcSFoKuynMf@NNvNE3lhEi}Pu z=RxDqzsEg!s($$)*az&L9sVG{#LyLW#R#H6(!Sk%&rVCY-XJ<%|x`x}LWVI*CvBz*B8Ui-1#c-2OB^y8jbDvdhWrc=F&S(qh| zV2I&?7@PCP)0c=%I13(NzTXtCYZ6lw;?LPEr$L=%nKL(n58JEjh@n2ElZ2nx7rNJB zz#Mxpg=Zu3KV{=Ug(vkc;ypV3Tu;Emv!G}+`$(tOZ*3rK`?)jhgw1hBz6+r!OW7M~ z`j|J(Pr9qywq%|pwe|L7I zKxvRVC&l{V)ZCXH(qQ4lyl%AG@FvH*iR4N?AW?Ddg2?^DZc8R_O_E?YY5)0}&1)rK#YPg}3yGiMlt()OZy)HKe;K&#>}$^e*8Z zN497;b*m0{yMcF@QvVlUhd zRIS6D&+MLTC_h#U>hAoj`Lt^V7H^6q^r5deoxuPC_I6U7* z3l*qZt!r!H7Prvm!mdedL!cH}n74TbWjVIVS9TEV{3Z4aLnkyJ$UY^kaR5ZY=#G*- z<}x-7n!&NJj;U4s_1@4$)>&~a6k3Gr#x*r0QMu*3HHp)lk!zg+jGw%~a@`e}b{ugF ze|s&D{f|g>65Xr$RmZe710SYA&8?fS(OVF)I_;f2&W=jx>>?FIallu`A|6v{(x0=WBln__v zJx-X6mw_Xte|TcVaNb%aac4x*(Zm4D4WRIE`&`aRsNaVQ+2*7?_9qz8D_nscF z)S7we`E&{NLTs2Do@{>5njm?N9<{(e0&Upc`*r!aa8)kplV;qI&T%3p6{HYfs7pjqmKtQ;l1P|j*DyO zNJ;+jB-L5*4DpnPahK#>$>-bHj5_%6P30@N((6ou&k5RyZR3d`ZbWLo;U+z5aZz@2 zbvB3AXdp{Ka1ltkUan43mZC3}Wr~UC5Y$qlUM`@(^o;&c)NnKT1k6~)YH11y^unPH z?nPPCsg=4jDGM)dk^wdAn+c)0?%qTs0tuf*r5Vr0qD$yk&8MdlVBXH;YKOQB{T%Pe z&c>7N?KN>vF6Tr-KJ2c&GE(OwY!bgZ9`eURE#vL;@hVRaJ^RiKBVH!9SA`Q8sI~QO znn18I8FxvF-Fp#EhGDmn0(W9%^JR`p3yL^LT%d(fy?uE2?AKaFp$|e^(r>is+2fk{ z*3NGFCVh1yVx8%W=Lyrgcdt4v)GG5iC-*hkH@_kr?dD@DRVXN~=J`odxma z6cfKb5Zcy+7>zfMzTQL>ZIT=f&n+In)Yeq3eTCwMYEUp3l&-}5H9fDhKDen}(ACpA z`(_8KnZZ6|ceOoq+3?f`kB+Ml%d1bicWPxm#AnGwU%*YDx%TnqQhJ5Z;z)Gp2_mQr z^cb;XzW}<**DvAI*pIAN;LB*O+>A<22)=h>Np4MU3^n#zt>`dkvhXTK66~hq55IAp zDN%|94fxg_^;R53=SS;nM!Lr1-V)7od$=N3M;l4$bTWJqKX^rqc%8th0^J|-DdV=n zC*4vC{TUtIOzH~H?Y%t{;zNi_E0#>JPm^(y8tDGHH zqpG3}MyX=04-ctjUgPyh6Cz2g8lq7&r@6ZnWl{4<(P7+b`O%ZAo*>)I6#YI6JVWl; z0=vafNGsQsM=^&l8^h3}CgPj@o#0fUPh>&_#n|M(S3)=( zEu%Tk$R>)N3rU!hZ`%28{f&<<;CW8(TRWo~=;qe78MM!`$X}@)raCF1+O*=h>Sshk zyguOCx=t)T<1B=Y?&Om+TdG0P+SXF4APtM{d*h8%r`XZt(xrp8(0i-6cn8SMW|x}M-_2!eAPCM1kz%q`jW$y})NYgaA*8B;JtMIjI4CB-;8`&Hfi0NG-(NKi;xR5%V34f)HG zW6z7vA3Y5&kmdpkv#>c(igwMazIG#do4wfA4Xs}LGu6@*?y{&-<#;d}P7 zsm|C@{kdS(05!7BQACVH8W}qp&#K`x>vMB~R-F?>*YJUixd~ph&e`={9$|Ce-egqa z#kN}etZA7~#0C+?%_k|=L6YmaZF5cO34!lV_g0g35~}8KXHVIob>Xc^>$mO- z_?~Uf8LmUxgm=t#t-C1syu2a&_+IxNtb!ug4xF?$YCiAgO9xq`jTeHJd#DZak(i}* z5RXN~WifNUaaTCVf^AtVE<#4X@t-Xf4cH6R78vARnRlY&&HGT9Fc{YelUBiX6B*&7){ zff$_F?=OyoTyM{huAMw%2Jz(eb*8A_Ru+A0dJ=gdkKg%o;|wy#^gMzMQSKmj=M~qT zfFmE%vLuS~v5Z5Zihx|3HNf0EdaUlS&qxENS;Rc6S@TGnL-Xa}(pmqaDFeYp2+|C> znTzgu7zXH5>2ywNR9E*DHEapU|$`h~mNu(Pq`#gH?_-=Q?kv+W=|Ky@S zve-MsBA#*q!%n=aleuu}k3*V+Cd4Nn;@Yl*S`8k{4}S2U_+aU-?L2FNPJR7Rq?v^#&oqCh_AqT9m_Q8nx!bWlxU2+BAqH1$oKwXiPFmV@Fw4wTL>~?+wU& zS%^da`5tG7X`x5o%s#|&H-~TEFTIyrnZ8L zp#8B4(p|bzHkrnTh*OfhMEe~R@`x0N_ivxd<4(0alQg!2TdRb6q4tyTfKBVHr>gLM-BTq8*L(^bVys9gjS5S5r_Doz#|e|I zn#!_Qw%OBSSd@b+7g#aQ*jOG&V|}x8gSX}RCb5+GAeuF z+wM=5;vA+ge1aK7%}T&05g6p`Um6^SfkwL>ZM2B>9Vsk1@YD`J3A%Wk4f?c8s|OrC zK3_8O^*|$MfwapZsymW?e<@r0_Iv$06FhKd*{>2NwZFQmZ zI3k-@jA7~@C-%|wB&?pvsdVw=2dOvs#;zv>e{qfFA5upnVgJYzZPh=Gt>|hTWW`$w zans9NR5ssEVPTB(+LRUI5hA8r#KqYgMz6;VA}fhlco#A}-R9pu749)K9o6s#5Xc^B z8@5s`=jTe-ZKpH9es(zEN(2q+S(>*MjLCA!*_u%__5HMEcaAiUL_uE3<~64Sfn#O_!cRz$W4O}dy$7B^IjxWHXEy> zS1jKHPs1{WjY^d*hfW!jLl_ZUebn@>^@M}Ua|&X%RJc2*JC|#gu-S@F;AxU(I=u^g z`JgNGM4#vQ2xNhi>p8lnL!?`J9d};Eg*ab^f0{RxQf>hapgb=w1M{3S*;dV1GU(V7 zh3~Q)F^S8DHI6CJ6}IYkoL7E_pU0CQ_albFfB%blQq>Rc$NtQ^mwQtns7-u`PoP#m zeVbqDy;wT`k{>**$MUAxX9k=%M@@B?jVmv8n~7@X*$Q6ug*IBfBN+x;_(fD!dX0yD zhYGdf?DuHR5*T9&kAxq-BQ1U*mS$6xn3$-FNmY>IoA@W@NT$t5O-N>4Wk1ezYKOMw z;s`?tt}EKsh}qyF&yy@87Wese76Z7Zo5Kwg167Ung3V^=>x%Hf{pBE)_N(yDNgART z%TZu^{8N+F1ybIh4HK9d%_x|rY*Xg4X?vM#H_L$Q`omkPt=TX(UW^&nt-eIy=XjmV z?6R&lWr{ey@yKhSP^pm*Z9`6|Hwx85%kJ#4%d=$V%tu(;<}fwO!{OtVUk_|iurP80 zo>Waf@u#W!Bx-H~^5peO{f=a{m~z$fN&0h~ppN&`BEASkQy-e*a%!GKf0PIsvtbck z_c2UZB;V@K(u+IXE@60fMRjg^qDnG-aH053Pk5a-`z4apkK?2F(%7MAd!g`nwa99` zPVekk2~3n)4@`EUrCDZXJlNxZ3fjDE@;o$HPzBgmKen(MS4 zCg@cotv?q5ZQ-_jtNzG7-AE=n{n_d|nTx~oIDyL8(!FKB5&CRfa#xrbTJL8R?oT~t zGl^cpa5~aSzi^xNMWfcsuob}(*{e!`S|71gQy_9GQbu($f}8LvD<|~YHhX4J{aDAY z4Za(HCS=ZYN`|2(@q1Z5+0D@)YdHS}cb~pPtx9_xp{oP@2UCL6Hyuc^s@PYeH4ql6 zTp29aVJHb(K?Az_o?JTy_{VxyT2xNX>D|^uCNxjnx_+elXPY>|-@31ZbJN&QCf-QlRrfDcBxRd@_=o8n4~NldnHl zg)SoSGnz1MbuZSzt2TKVXI!!>Ot+en!>_e!TgMaK!Cfqf`z@^F1aBnEY2#aUWop?J zE(3|bz3t~AJx;khWt%`EWTU+!9p@ zV`eq_-y%)8_b*zXbp251cdpXG2tGpT%FMY`5l7`YIi}oBSZ>B(h zd=x*I*x%Dw&;S$5WsoVvFyC$=<&F7|0m311*fJnMp}r2)Hh2z-Obb&g$!OI_^f) z;76Za1;#~E@*dZpV>KD&TRI(a@uQ@V`|^!uPeqH= zlF^85vMD`{rG1HlG&&hlhpQXR4CGNKT#q%Ppb#CDR})q_zCgd+bxY_lM>EUZR~@rw zc<*kSPyMv45924x3=*=`SLCYQe4bZ_4lTlyNd7Pm2{584Y}58F6$zNITv*sVax*W# zCc}I*)T*km9FP^)wRLXWWxs_|ctkFQ0_CcvQguEUUft2f(wZ&GC0ROCOKgTMG9!L` zE+x@EtGE}2O-?;g$XXwTM_M^{K$fG1^R#P-ZB*Ys6b-k~$u(#Pm#h~?o+r+gSL&4> ziW4})v+ceB!ZiOey{+Ctp^g5l;XDj4Z*Dol9+a>Xp=!0!h4~N{?<$qli?jBpI#D@F zK1w_!tQw_zMmcL_U>lx;^?U@xoE+CRmIGFC;o`;OGF+z-$Qrs&H`!Cc6XrMYeG5zJ z_dHXNW~iu463EsjeG@(<1i9JQVIg_9uwW!j$3C|;b)2g9z+c$h%5l_<#$T8@`4T-d zXz6)lUzAjJ2u*};{UVGZBRMPnl7sI1kl-sD^*$>Hcwxn(k7t=6oNkjFQOdBVGU}3c z9WU`c!!#_7^PtlvNakfPp5RY8rb(4fqk#FDM~}T(G;d|;NbYFXaYfsPLz*6scSD{u zTYIsq0F%bQ?xG!g!uM&+JGtLvlSnqqW`@`7JXUGRO{7FfC3RzQHl6Yl$Jt_C9I8tC zNcRAbV;PLV0t&yMyc>#K_K-roU8}`#%c_WZ0ipd8-TM;KA_32n=zX|Q$Tg zk3S1k)}i%COxnI6tJNfXD~f5-r1rr$qK7~QKIl9yKp2icN&nK>ofh17PwkszC4*$; z_D-x?HH&a(_Xd>;-X(t}B}1ld)Xg=SV#8a<4)cZ0j*jhk+7_Yx(Kt7#!*>5nb>#{5 z_9*XHJ~d>zlZpb2CCa;m({>AGy`EjXeGWR@3~N$z87RbIN3rrMZK#&3ELUmAa1H*e z)>XlK@Yb!HS8sMywPSyII|cgpejHnVO+eB+Mhl@9{gD*j8oc2) zB%^*lqeH`CMMTMU`Q{jINTY*dit8je9NQk>UgscQc{05kp)~~Jay?AfhKJqBsv7=H zjfuZ9(iNLCtQ1X+W{F&=B%S{ao!sJaaa`U<>MN}rP>*%Fw%Is^S8Te`Q<-}aadP

^U%CsS<_0nZ8mk&k#`Mf&hgo?@hWk*63td>FLc`ow!TBjecfU!>KP9yqB z74D4MDO$Rqj9VIa#;h(UEmpIFpEv&XCR}NgNLR{^wwHcklhX~X2?~RKQpw;9{wA-}=woFL z9TPztv$$dsg@&Ck!Y}7+cTz~mlonhgH!;m0$u;D=_Kx~$m8+t3B@Xc?Z%YRii?FT! zXrStKS%eP!$eH`rw~%~a1v|K>sG4rULL{`Q*NPlbQB7{5JVUw{Qmsab``X0X-Ohi) z=EzbM8J+*YpUJjuno{xLedF2q^TlxZr~NXJM2|()CdUY@@@5cyqLyDGl#5VQ@y^>2 zM#G^yY*mMocdBo9*SjRGZY@yq=4(Nf4U_d@yt8Gm|8Zk3Gz*IB(9}FmQ(&<-g!_e& zEq(or*71uO*Zg_23~mTBoNi*Vl|0XBAn(UwDKcaI4ILs0s+7`3G#J*!oeaoCh6HxcaU5#pF^)k@51wp!lPTu0AZ1Q-;BS)%u5~CRP2Ta!}92k0P(I~tvbG(y=L4q#lH&zMik(itsMrESHiMM+Td04V1 zd+YvrlRoB0QQ1S*WGS7}4YQa;mAi=z+?k;3*IeII;@=94m}iFYztuI#q~=e;KING` zZ(O>r#HZ|so22xl4q_Z(BialGTh8bWEi9O0BV!trz*((RQIe5+*IZ~!_GGlE_*Beo zYhIi&IgziW7o9cLqhAmvA992fnw%B}h1xx3`b+!bZhlJtv|zNEMMfva3p4r*XruE{ zd7ixwFh)ExpEK0Hv~FdhAiXeCkvaL!^M<`Jh0{W)6O3PPf-gbWPjHZR28t24zqyU} zoz(D$E=?>oA{*6R51$a0I4R{B<8kuPa_#=*7%v9#q_oSWPGE1c*QnvU_TBp)H)u&*}cc+}i z*j|QzcDiLLs%=<7q7a+(rR_-|N@Eiwx_zNT3C@;{9;Cwnda5A(yn`O>ESGh_POf?r zKZ@26x8+$0w@ZByRpcd=WCcvMHqwPD=M{+c=0_^!%^F?EK=|Vykxb5gYz&zvPK$#H zi3b}ZAG7<$>g*JfO)usG3&CRBCD^AdS>4ncZYmy^n-@-UzAcBFGG0aqJhAs6X2aJy zXg>@nTG&Np;!ff+aA`%bC}&TxzN&>ASxH?BoW@yrm&8-n686o5GS&xgS9No8{20^; zTGm@Z(%2i$OfA4>sD-Y^ZvxLn03>KRY@R~DUuPnf!9i^m4?^lVunwcG0 zhJ>JKoZp{)CWW$rRCu1iZJu7Z9`{+;p#L(jd$&NJmYu@$*aY|8%?QDZ5e2zWxpy*? zX!4p8b26LV&~=0sOZ50_5fP>>Ftwl8GP%0mfh8}af|V7dr<~GYH9khmzFch)-^Fk; zak^&3=w0%}n1ROg>h4~vuO$%8w$Bi~zOkfI;Yc;hxkegOm#cUY|U zvLU7I4j;}MnIj^MUZbuUM@<(RfnBnO3sPHj@d!IFI*lI|ReaND+|TE}qxv!~Y#jLV zH$hVj-f~Zh*2teLHyGFR6tClK1aZN3mD>ba!4u~}I25S$fqzzy!9-u^OqQN5-GG%b z?|OkzV~>QMU6^9Mf9f;Z2X%WT@gaVUM+MGF-)Ths%h>{p+q zQ|;K`!uZWZw+?k^`F@0?k-JcI0bLfUQX;F!EN{4j_z3WnH7eAJ=3qyCPT&rZecCAkKlyJV5C9zc@+OvCYUR`Y+q=j zqg&}-6sWxw-5Z^2o`9p{rY4^fgg^m2h&I~OtTCJ$pGfJLCZsw!R7u~046Vo4)$ijn z9=X+PXG;2(i)~iq8lF|+xS*wRt}9Z%zOwXALg=J$bF81l)||Q_6E49{yHqH2g{yMA zRte3qL}AT59~l0aB7v{U-Hb~lAr9Nx&A9&SH($^v2 zQp3wmABR}e)=Prh>9@-5cbobU@#{0`PHH%G=JoO?2W?4&c9&_Kpw|UNILd7nKe6y0 zB~e~WPePXD;{|LE<``Z$H?^e*z)a|!PvjUP&F^qBE0m2QkJpWzC1-MScNSjAO9ZXU z$yegv)P|g^=VK$ zh+8IIpvfV9D+JS6X1$W>@G4J9wN;DZh+lEfaNPPqmUYIcX$H-w053^PDMYaZooR0K z9q+b|>9*d*-rRR$pX|WxHfQn~oyuflsTz|n#X@ox-=2FA&yqt{wS?$YI=yKK977Yd zHB6{3$(&1@eztvZS=NDjQ89rn!<9FF6*j8(9UJt$@PMOrt-ak&Gv)qA28Xqxs+?+k zB}(65w42!>H%8_Bl6WOucnYg%OWt9>QgGFmg=Gwgub}s&pekqHn-!@Q!r9vH2tTDk zTd%E|h0eq~^n1jSsC`8-(x z{M8rF5MDRbNjN-sjn3T7_18_V$KQ+7Ww^pK^ZE~M4W=qdzc?RbIwUDq7YcR_x44B$ zHML^FtWOQV2)SkrY}TZCqJ0fy<}dUOnEAw&Y)fAyQ1RofMjMM=4&A#jb_Me<k$0~ujQqx`%!y9KGG^8$`i%sMZ%_WpOBz-8 z`qMlUd@6ALKt2hltm_x2axN|AiP`n>Y2_*D z(#TH3n-bIRZYKO^WYZQhV z23l+m3tu9Qm`zyxe}8@m`Sz(05RjkyNdA8Ihw#@=SN{W^8;Vo<{Ug|)kB$HyE4r=u zKl{MV-#zz2|FhHoFR4=txDxQ(%U?PGOz@}l#{z#W@W%pwEbzwye=P7{-U9X-Tf@q7 z{-Bt?^L>$dnig+Zc za?^{~MG5Gm;aw`DMM(%IdT1ZSNfO~1#|56Kj*GBPjyFl!Mzok6e~DGw|K8!c&+!}- ziZb0_2hlQ)V)vTnEKl+6?+X`EiJ7$uvY9~cz7pwcJ}~}c^G6xdM0{{ zB6|96a2=A@kiNRl;X#ZZL(JK0s0WE7g3v>QaeVJFySXvl^JT&65u&$nq@Ig>=IS4? z(?@RQ8bOTDcjP-2mc|=?AcZeE46=4RDBb+9?$43In=D5ZZf>-}U9}+S1RkBgPR}tC zC&_>N(kWq0%%FN5tHToa*Z&mG7{hd7p?Zf+1AFM=?SG|P6^@Gn)|tHqLVxND@Z0Ss z8Z)JkQM~*yeLb})Fom@F(Vw(8dtyG()7Hj(((b8Ejj7i}2nVO5gxumuO%3n$tCk-g zQkmZWS3RaiB|#;!K%7B53+wojJ9>v6$7dm*XDnuwX>H)r@b>zh7$n#J5D&hfwB-yzCE^#sc*o*atuPLO@2lnljm@eu1!H;X zY7f35HAwV%e`$o>Fe&OFq740N^&$XP!Dz}tGzcmQbGIv;PUI3LfL%8lp*$dpP|_Fo zQ7`*SBqu^bsY7}PlezP5PaKSONv}VuuAfUJuBAW{u|93(LW1h{TL0%hzKB-w=-UAq zQ7~(Lg)B6HkpaU5NG&48c_!qRZ(bA^ekMP=EZr>6*<@^YzKOe`I@qZ+>2)8PmuE+i zaz~pdWyg~8Xqks*M-BB%o5y5-66&?c!|Gwy&_b|Fez{b%?9epx1!)uRqUqWK#DvFp zn%{yR7fkiU)~{I`f5RnqPT&5KhR@jCJA3dM?}|B7d#D+olR1}D5MJ-FIo(qTUZ1`> z>(l1Sh6raE&8D&45X;^gpPf0UQ&3m$kc;eztdCp3FGk2VMvPvv757l|rO1y3QUKg8YbqlsB-H!*3^SqS=`1HLF zUc1QL@L`QjU~Id+(>koCw#IRms}0tq3Q|$ajN}~!(RsgJN=~tzyyCv ze=P9F0)H&<#{z#W@W%rGjQGY{&j!%zPA5>`%sv7dz1fE3D_R)_E78p zh^_zb{?p&>3;0wAxLE)@{iWf+1b<3@Ebzwye=P9F0)H&<#{&Q5E%0~ySKog>#lPCW zy3_r)DgNx&`d{89{+Bv%fB$O0_2!p*?>G79n^%6T`TzSqmcR2~`nwm{ymJ4o=ePg< zd5_5d(%}9NV7l+WfH-Dx=RW`w0Di`Idqej>B;9|H2(ssIu>d{ncV#8--}Ur&S~YwB zZS@p)aQ_`2K;G$H8Po4K0id7Ick=~6cYpr;+uHu`qkk6%?%VHA9QeOKR{xn>@ON>* z{%+m?+76f}|50(^@BH`veEnC~VJZH?kSh z#3AXTszbEn?49yft1I+V?Gq9g7Z}{LuD5qNR8Q_G!o9>O z<}(Oxf)Y_QZ-4UMf#hO9Qyim&BCGc@heakbo;3K5(fYM_?-wfr~z}!1)I4c=x}z)srVRl z!X9TcEm8*(TnFja>T&GU?h~uzBmS5DMaiiv=er(C6+;Cri+)d4W?o(-g|H*OPBniC zLs>&4^Kox>P7B1o0dEu~2j0hUTBrKcThNIcWjD%PcK7cS^D4${rs>r;r*B+^K%14d zo17Vv>=llpGtP3K_Ci>Z^%c9h)9r+}ym2wM=$kTKONMUAkcc9HUlAxuTbvrxNGf69 zB%zPp#BrY-dP>c=`mUd5gd2T08+^x-950`)e$E_+h&H)GK+q6hx?_@-^l)6w_3$m7 z-bB>SEEeJX_|I20qzM*j8|g{fc|%Hd6?1xLQF=CI^HRD`q9cOE7V<7e3(RgK2|P2Q zMXuvounHc>E_66?U7)J;p4mg8Dcg8M(W8**@)-@bB4{SA3eVS1rb+NIMo81apt`?M zE?n2fsfIS_#U;CwuPuJ5Lby_LNX_%*bOSE8&))RRH}r!Q zRB>~K0lF&rvWPiLP||&N%Eaf@W?B;pa{e#c4MszJ1=5bHkK@&!y;7>}B9EsCjMnt+ zNE%pe3KXX9JHLKYy5)GJb>d}8p@;IemQ2^1;H*NKykU7EcVo7d;pN&0)~7*7()Q;w@M8uQhZ?6f2$omf!SRx_0RuLnqMzCF8$Z=G{pe0@ zsa$7Vt(F@BokklX3kAm?b{K+d)6dnssNghhBA9^J+1y9o3tbUeJ2Fz=g&f-Ei>5;A z6{MwN%gyk=Om9?&y3C(@npU&EsyVdr#3<~h)3p9bfkLHZiCEA7Rd?5pw`((1;!eO&XC2coo zXhk$YKg^@r?a^qzy-iB|lFO6r9;*Rom|~AipY~6Y^>i_TDmrOz1RJ9>_jmQnnMt|6 z-59#Yv`={?9b9o85-rDRs0^gKGxkJ@_m#S$&VysHcbS+Hoo;`rnU9y9Iw+Flpvw*p zajXmWe5|pWFBRZ&;0d8#?gjob(_w&o_(fC%X;6HLU8zXMk%i>vL*q6|2gB}dW*9Vc z^6$0B`89^xVOzB0VefruQ#qi%E(^S2e|q7Xk<;`Tf$mWEX|?$2IU~11kT{G6*EhaL zNph{~9S!f2&utj?Dy>Qia0w%@qw{xjyfCLRqj7Td!uWQ&>t;$dJ@Vww+S$}bl}>Hv zQnkw9I_VdaGO+_`t3H(F@S*RS=U-rz92L{|6VZ9YaUwe|rPy1FqfsS;X)f z38zm?P6(u|ZuEsnU&vc)#ae&e_Xs|ERcoWDAA@sB^Qsn^m<|bNscRokW3s6A=+NKg zskRT2S%N;-j)&5lct=+^vC1Ni-y@qafdP#?9xK_(ad8Eq~j30V^hFyx+%}V82WvxPSIy$w{ z`51J+yqcg&8V>_i3_S?aGqMhA3 z&Z-LAxIJQwxF$6CzQRg)$Owxp7pRh_LD*79^|&nElc9ZG@#5e;HI{0^ytjMGf`M&Y z5!jLSp%fB4_U$ve-Rt+2)#NATU^JF9vlSM}xwnD^V%b~;^}G8Cuuta|+a;eYz$0~? zynT872Bk?e?MGko`?4>E87VY|C$HHrc+|TkGg>Op9Kx{SB^l0(tY-Ar!8m7d5Wg^B zdv0XRYaO~lSx$RhYVj~$j~LwU|9}K7dDl$Ad+02u`GII~b^EDxx=LdfWvFJf zWdeKOiK{MX4ar~?Uw!DQD_Fbw8zkQI?(#*KG_9Ef#wKwYoIYI`44vrI)s0OhxyT8H&J@T(*&2j@JJreTr>t~Ht{+{|L9&fN z7p(ULALdXXZ0b7I`E*)?Y!fv$CBx>lTVUwoFr4R+;yHyP^FcjJc}2n5!lw~In9;pU zg}+1hN6+UyN9vDxmdX>_Ap7VqoWrL@+lB;x9IQ=Abz{P{&2KwJ+NmV=^QfzJvwRNW znatgvr8X7KiD`*q21l^i*0%gGV+IusZ;JH!AOw2ltEw$pzs6R>=_W-MyhIs24}ypz zQf}At8c=5Yia>>F@Z(E-l^j#X=dK~0kG74iI%j#FeJUt@lGKzV_qbzA@hrJyghq_2 z4c%O&R45Sr3`v&MQKI7g_0^N$Rbx<^NQ}d|;VW0c<`hKM%>m{hKv%)2GLh7MskuHQgLv{n4(>hKnolT^6R)j!oQ zM4K%?y!rL_M6PJ`Rigl+D7>p#(^Cum2m$!laMo!plS({20?v_(XCB{W+wCbG}I zqp&3~OA`c!5PV!q6!l`5(chwUj)gkJd9ud9Agy+jM%ldAk1J(2e8I?h)B+A2z0bmFwT zTkDV2ao+gXBn;444ng_{(@v%ur$JPIJwJV;f1%*2=DY{b{IxV!xamnQyQ<))<1$s~ zVa?6{DusN+daq=N4*kkkbAz1egLz?ZCIxI%G3P^6r?%I$9!;@7hwOEN9+%H?Asy}HpCR*)L6iD`q9BIKEx*}deN38Nd0Gbaeww*7CGf=#V$JbZ}JFybs~{jT%g!`WMnsU*f?PU0r8CA1eT8r>PLIWoF+#sdvim%?u z^0inx2(p7TFU3T0ZY&MNn=n~B^0q9mcS1YYoEomHIP`&zMjM`I!ZF*4sITM76j-;*W_NiiiOi!=7?|#<0rl)#MPtWr#YERGWd{pZtd)VJ~p0VOC1Wh77paa;1 zve&$|p@~_@uQAdbcBq55{_v&{0(L{&xc6PJeEW)rzaPb8 zybkl7ly0cav8}^2 zXna!Uhbgf>xSLj#{1u6DW6o~Qz$))cM6HImuavQp5bUpx5Rt%K>b)?y{a57%ft%VN z{VPzj5^wbbP>j^ybyz?Sp)kdc+%#!mA6V5t1|8xMQ!?u47DtZQojx>_Q+Rg?(uP?U z@OPHY#BIU)>)go{`KSFLiYj%OO<5!lvntuRKh={}~c6=NU_;JqV1Zn5F^G(z7QB(tCT;Vr1If z;z&b(RCvT=kpl%}cxkA)2unDT%Bs%yUZpzS+S8OlyzH?j_J+qn%sDoNU)iBFh{5@- zW~x^+qzGr#&Rdc|T4B3pjJ*C+AGdYjfiJ0MoK*y0Vq#Nlal@A1m}C{>JP`?8vvAEJ z!}6wHYO*w`9&6ilUG~*kG)MJ&9DXuzSRuwAUR8KY=|<;FD=yIrUQCu7pNZl*?omYN z#qe`}G&G|?#zDAiVnyVEB`(9_i^LDxh3?nDM@<_|xYKoFrkR<06d(D?19L-ZdF2*? z!xscdv`Vy4^k+$gTN64;De;tQkjE6pO}K=(m9qGAX=Mnz$rDX30xYf4T!j@xTGs_T zjNkFQ4}z(Ocs78umoAZ2y^XN&0k)m?u~NHQQ895-dUxo8^(L#NV(Ao;N9HNRN2a5-zVESC?GXlZw+Fnnr6y{rKE4M7{c9SmTfus6$sNh~RMAD(SpvG~n!BA9-jGt>&O)Xr zi86#vvt*ybVR74K7j zUS3Ji$Z_8aG6VuQnwzNEH7m|ciuZ`s`s}r3dJ)`~w`$REKkA*n{g}0$L}HOruIUNl zPTFg64qTlGI&VgbtFX-P5hyq9XkjPZ>-nQM`8Z6OZ;nVE)iH@4p2Y~O#$)Nv=H zzZR&q46}^@R*l7;_1>aT-`J(RT*FSF$glS!Of!kk#kmPqao$xyZ_f;*;4{#5D1iP^ zckeENU-H3*au(rZ?;#6Otn{W+UAjD${I?B|9xN>1Y*&_?M|sVaph>uY6sQ)na$45< z4O6YJbh)3l=%a*(01iK;POb7$!^}&E(A|j)!x%=JqIW0p{z%lFk<(R7N#5eB@;!Od zX#ETI;}PM>K1MtFDG&QgCR4PduriLq)eeO_Tc^|~-?0)&!E5!LRL@rY90KJlkB54*M_%`s zFIhiL)E$LFkuf{}%6CkI-S`(VYqc&GgAXMv%< z(ZMeW&Q}jeO&|{a3wu1G*VYT!KCxCNzAzj&RB+YZ;6~`euhV_=`}RB_GAyF54Fp=s zpkNlXzX_C$z;`F)fr^`Dsee`|dOLAgNO*fTu8>v_PpHa*IT3A)n8g_GzkS`MD-v6G zER#>g&nsv1SdsLjxS{rOzJ1yYavbAl-C6^brfy_-rm~=1G6=3HvqQc5GKgqUE~)4F zYm&HL_bOXe2LFREld}$>rA|f6I^@1-`LCmu0#=EUTke7RR5X3&l%W!G)^VNvX|-v6gfPU z!HZ2LJQ4db?8#Xv88`MOc=pS;byt9tp67H@eGlsU#L*EZ~D>qn$gsmX(M02iD_bh`{)wggq8@a zz9348xVBru3><*A>+c%q)+v!2?hheaMR{OSGaCBCZQej zL(m{R&Wc}P@_Jan4=WQO=0&m%ZBt*EIO|t`y&)MDpB_v#Ef1~m85}aFzhUG)R^rgh z8v!R-eAewm!!NYgdXdwzMuP}&k zD^OH57smFSId!IV1}v@oawwSAr8tAjFv$5VlaVw{qy@|vrbk56BJQT3jSU&58V)uQ z-Q4HIQ=UaS?mQd!p^2u(^Ec5^EF6*!U1%o?%)w+Q^LA z@JbMgC4Sn1py^DxOnsqsXv=(c{2STp9(&0L{1UhOEJ>qmvIHHi+bLDW8GR#iI5I|QpIGQr| z?nN&Aw+a_}>LYeAV^#h0-8OLC0rgdZ1Oh}g*wL76?t+rYVG1Lf+iTbTM&63oxE)NdzSljB2jy--qvTWF;s+@ zuk>BU$!`pAW1wV80Qxzk7Wa0OzexYsy(m!cw<^Btuq9<#Unn4a z?RMjjeZg}PQkh}ZHm`VC1<8PZdKVC#Y#26J%usN8MCk)@WeQ5T zps%q@@-)ag8=rG>jdkff_ax*Z4Uoe}1FKmIacrU3am6#g*0?ErBkHyOGv7E;?oP(? zEc`}d3IFoj^YF}#7c`NmfVWIK1W}`2{wlqy$KEh=i&;5ZyrW<9)UQ8Uy}>k{Q-NUN z8Y2V(EDE*-^d8{fDD2d78=RSoR#S_SOoA?n+@{JTlTgM8l!L*=ssl!%3hm5yz2&Y9 zvJYFmYvl_3OITH-+2it5J8bl%3k%# zvdwgXUb!(DP6Ei85-x}lBz$2Tol}u;al=LS#%}XGQD3>TkmEO1q*3nQ*N<%0TST#K zl%jNAQdUSuFRLPBFvvM*#kbaf9VZ-W6&$@9V^#EQop{PI|?H!?nqKp(4x$KQ_b>fJAtkG@R5V)d9<9v znAmsg+*$k(`q>N?OgC(V(FYIVAo7IP;0f>4K25+X59;o_S$`q3fi^!699L^OEDK(| zMwq!fNGv%kz4m6OkDkycwj_CG5l5%{LyX^-3TH~pgUhdSJN0fw;||O;ir*+}Um`)| zeJE-iBZLpzp5Ny`FVQ`^a8`KK;24`@3}>`sH!$_cD`*f}Hr4~Y3{>(&E4kZj7^5y% z+L7xL0p#E0uyHGDpKF1!jx1m*IR|E~FMfO|vFEln4(1=c)r9rzGdd%3#J!P^0T?TE z2J~$d*&ds{pR8??ldU(1P$Na)Q|beM+f8+f&v!x=huh2$n1_=dAn6!dRhIkuqIc;wbEg`DvU;Sb*M0{fj+=V8vUH1p zKSxzjU?$v;$I?J>Dep6lQzD#+QT9@6*BbFx1lo^P50z2j_lR4d_AjJK&aY?aSSi~_jeV&PkG%Xy5!~#? zaU4n0EL)WG>I8$%8m$u z37hF4K_N=1y5PU7@|~S00n)Wp3I!{e^U~Q|x0Ku?^+(F@o8yTOVeF*o3ZwM)gxTCP z`F^nIa+L-(eA_BGr3KhO^o7N4BA*7u%A7R@ZJY7sM=F=z;tXco6amYHi7{Pz!L-TY zd0WQ8BJAdMM;gTH-juB+Q6YUwC^i(M&pEN_ijm2DQ@YErjYp*Hg6YdLp4uC7{DCXm z#CSR3@q6a2$RG^1{SueG05LpkHQ$f2iQ*fq@z~bm1xZ|(Swhy5?(k8t9`teu7+k5| z@sZwCLhxf)UdF1*&Xe?Gd@18aoxFj>+J`($hmoP4&w5pSw+!%i+LJbOHO2aBk}O$A z8c-Ei--WZKd|o68(-I_u;<`9uw+FnS1dyc*2KVvtSbNAr0A|PD-cl}xX`1~f1;_~+ z4D#JfAX(PE^nR1XIBQ}p5?@m?QTV*9l_k47dqA>j=jMeHI{&aaLmZzLA%41N+yRGS zbx6l`r`#H$y7b$%AO!N73EubEi%@6fUY4%7QilpSFxCiE9A!&1FCL)Lx-%FqtS`i? z^B3tH53Gukl3RGCmU^Zzop)#p>9(zLH}?E;tGU{JzZ%`|>$>5x4UJQVh6QxqEzsm& zm|K@Qw4N)piw8l0;At-=x^HRHt0b_z>zvtL>j_eXu1}4R*%e-O5_oE(OAv&se*`9u zMBnKegzfHqfzVHl3ynh23lj^rVrcU)C0X-A5cqJq93tDG-Efq+m(9!rY6GFJb}+wX z(XgVieA6TF_1#!!SgzDnPM+({gYW+a)_sj{H}Dk>DpT)YmKM!2Y5a~ENhdgSMKm}o+Wz% zmoPm+fy|%O66v|i8!X)^7y_jD0vi=MYxICiWM$Q#!?96MnNh+~JCCa|)7~_St6YIN zG}bZ#?ST6A9(b)2xASP`=OS$)L_76>6Y5>Xd(bGdu`8*SlNMU%%E4&BJVPlo7yrkP z#0BtN2^Daj^YBSK!9{pR9No^eSsT-7fkBZ>sD{G{|LribI2#2;bcJ2XJ)y=xBT%WYo|R4FI&;QsZW zzu^>Ss4a!It10c&seoK4etsL~3kEp#AaBlZWMyMSyhBxOU9xRk9)L45@17(g1{70F zH)Ae2K%rkr+X%aAeQYB>p%7{VUQ{nCX=ZA;W6X<R05}-f?K`wG`X#P| zbuB=zRF?^;N*s(&5dyPaoEqJ;u*GxT24ylL(99{p-F@Je<5oTGS(*o?t|;RYF0ah<|XMy&YVgNiE6P&o?Mo9rCWLH(<&l$ zf@ef;BzPjJ1k%tUUA~Z3n#KwRr!YLgcd)PYn1`@HM~xj&-Wh=fTUcHnBU)+=wfWH) zoRqRiRZUp?%(_ln&vgvLqTe>CIOC5%`{!6_6f0`lQlz^cRWM3-Uw4j>nF-wo-=h|A zohYp4gnmGPLPb2F!+9u%ZX5WD6IQkY9YR~|+Tb*WwX>)O3mnz%N~H-=T;_Nk;rn(5 z)1=v-gz|&>NQNv5nj0Dp!LB_jvpycZcMcdbe%M>dh!w=(K_hR$dIHfmc&}Z)V*8fn z`#G-ghPjPT44rXTj-oba_ax&-PG-27L!c6WoBcqjgP#wHzpm5tMogsh^ukLW@2jF=_Z_MQ z?G!@9&7OTCd8Q(iE2wFNtIyp@Pf$<92BWS86}1G!frPd^Wd}n*ohl?gg))Z_e{#j^ z3h)l5R`$?O2VaD~chjw>!_6-TzbfCg$-bI!`{a(kVxL_v1zp`lyiJ2&NECTlW6-}z z=98vJm{+^rREO2zyKN|ckfEk75;b-71vuz?qsVoPYR!bniY}N0p>~g|YMCM>(rC!+ zSmsR>W7$2%j))C7d2`$IDbW*crnz@)`we&bRwz@MOKr0yXnlc!wzc=d!JgTbK*%-}ev$*V@KJ%?0?u-HK9!3e$+l1FvSk^+9@r4DO zLoSM-@+}xL*pt~~5voreII;~U)}*y#WAzq(R0XcOdswnT@4-)K>mtzgi#t%Ou2cAq zYh!@a3!2=T0t)hLba)H2yt9%83;hIVBHdIMcG-HrHc8rG6xf8fWeuhL^PZ)g~l(|nWMSc)Kh$b(_2A2Q|43VA`@=y3Wbr=8QQ zZ02pYx=C@EV>p??B9C;+l^bz}GYOd60q)M`(V%R_TuY`j9-Aqzc#uSQi}o5;53Gw| zHa1dK$fSF=JIT?ufSkQ^3j&rI<~=nTc~FVD<52k~d7Hu&K$6dTc1Jok zZQK(}T`l$PC-PNIlB(0l5?V!KAs_ocfaXGmoU!Tml`f>vZtEUafQ{sZcRA*d#lz__ z4SvhDr52WOFlLsyxROnY#AsrdDcHn9(LMeh53@TS?EE4l$W~(T^M=BC@~My1S-KO1 zLZ>Tg0^H@xXp2>gkij{Z7CH`{e6f#;3>ueUV#*8}5#nO}b_zXH9o9}#VQ_fYU@kc2 zPMkZs9%8w@lZEV4fj@8wn@_dz*z}yle7r;od&`Z?_`fgNTrNwi%@Uz=#n4EF2w_av z%FqYWOA<&hpeOIFcclElzv#Mkb_|iZdlkv8Mqe4k8qrLqe0@z2s?^dp&;XX$XY1*n zLWg1H2O1+y`%ZjLjdO&aqqX!RL=%cW5oqqK$Do?K)9wrCRBa-|n8fPA>1p;5JD7dn z@TuJZskdO^-yQXnU1bI+!OqRY+on_50J9I|2>f4qJyugJZ3cGE($ody?&R*s+#B3Zx-@2XsHtKh~UbwrsZt-)-!O!~z zG|yX?w3h?AFA5+L_dux*4ewuHw2zvpqNPT@lM6C<+ctI(H9Z6;J9#}F4HmM5+j%uKZL-0d;@k%#}>D-7pfd9 zmjS?J{cMZuo16;nE6cD6*M~tPhsSQu!K||kNH;Uh8}E zIMbFou9D->@q$Q@iIJ67$ou7^t9G%LP7V0xe1G{s1fzqr!r>x{e^5YmhgEWyE|IWJ z{xpf{X28eXo9#K0udzonM&gaH1_(X~;k=73`25EjTKWG!& zcO@TwR#snmkimUF8dQE>_;Fhhyd(O7qwGM1H-CefHx~&OuL_Xpea$kI&9$VvR)g@G z$_;@L4RmZ8R)A$Yn;lQXzSCY5#iU12j>xvX90iuF4K) zT5*Q%f5!SpsJ~4=O#W9P|GU2tBmT_5KV$wQsapSOhkkMT zTgRVn@Vxd+;F-WPfoB5G1fB`}pC=$8A}9S{Q-i-w_`jwJ@PCbriCu)5PM(SWldFvM zKbpY)YX6tcA;k1|{nelP6aTO4GXLK%^2Gl$1>k=@{-!_iL;j{SGd_(E^{4*C|0})R zul+MKK8+9ahyUsLf0bSNm!J7*{9k45{-!^P|EvAil9~Bwe1zZQGe3=w_`A;XG(OUA zI@53a|241s75}IHq<<*C`&pmvAN5cDi689`{ptA7|J0xOF@D$Cp2o-gQ-9*e`a^#j z|5sYZe~q6>=!yUJ?|!DIew;t`C;m6T>%SczxPR(T{CL0Xj8ESm{NHt^r+f&0*MB>{ z3IEWaj-TjH{mJL^FZspjuLJ#8!}Ic)z%zko0?!1V2|N>cCh$z)nZPrF|9Arb4-V5Y ARsaA1 literal 0 HcmV?d00001 diff --git a/biom/tests/test_table.py b/biom/tests/test_table.py index 5d6d9640..5998638f 100644 --- a/biom/tests/test_table.py +++ b/biom/tests/test_table.py @@ -3203,6 +3203,23 @@ def f(vals, id_, md): with errstate(empty='raise'), self.assertRaises(TableException): self.st_rich.filter(f, 'observation') + def test_subsample_edgecase_issue_952(self): + # this file triggers an exception on Linux on subsample + # with replacement where the pvals computed sum to > 1. It is a + # subset of the data reported in issue 952, specifically constrained + # to the first 10 features with any empty samples removed. + path = 'test_data/edgecase_issue_952.biom' + + # ...existing logic for test_data, not ideal, but consistent + cwd = os.getcwd() + if '/' in __file__: + os.chdir(__file__.rsplit('/', 1)[0]) + table = Table.from_hdf5(h5py.File(path, 'r')) + os.chdir(cwd) + + obs = table.subsample(10, with_replacement=True) + self.assertEqual(set(obs.sum('sample')), {10.0, }) + def test_subsample_same_seed_without_replacement(self): table = Table(np.array([[3, 1, 2], [0, 3, 4]]), ['O1', 'O2'], ['S1', 'S2', 'S3']) From b65a1bfb70f0a40d64c89f2ae6f7e776c3637393 Mon Sep 17 00:00:00 2001 From: Daniel McDonald Date: Thu, 2 May 2024 09:46:05 -0700 Subject: [PATCH 2/4] BUG: fixes #952, edgecase creating numerical stability summing floats for pvalues --- biom/_subsample.pyx | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/biom/_subsample.pyx b/biom/_subsample.pyx index 35892025..0a324efd 100644 --- a/biom/_subsample.pyx +++ b/biom/_subsample.pyx @@ -44,13 +44,18 @@ cdef _subsample_with_replacement(cnp.ndarray[cnp.float64_t, ndim=1] data, cnp.int32_t start,end,length Py_ssize_t i cnp.ndarray[cnp.float64_t, ndim=1] pvals - + cnp.ndarray[cnp.float64_t, ndim=1] data_ceil + + data_ceil = np.ceil(data) for i in range(indptr.shape[0] - 1): start, end = indptr[i], indptr[i+1] length = end - start - counts_sum = data[start:end].sum() - - pvals = data[start:end] / counts_sum + + # base p-values on integer data to avoid small numerical issues with + # float on sum + counts_sum = data_ceil[start:end].sum() + pvals = data_ceil[start:end] / counts_sum + data[start:end] = rng.multinomial(n, pvals) From 0c12df000857ca65e4823927746e953f804c0ab2 Mon Sep 17 00:00:00 2001 From: Daniel McDonald Date: Thu, 2 May 2024 10:32:36 -0700 Subject: [PATCH 3/4] DOC: changelog mention --- ChangeLog.md | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/ChangeLog.md b/ChangeLog.md index 2c488763..24fa1f4e 100644 --- a/ChangeLog.md +++ b/ChangeLog.md @@ -4,6 +4,11 @@ BIOM-Format ChangeLog biom 2.1.15-dev --------------- +Bug Fixes: + +* Fixed an edge case on in `align_tree` when a feature was empty, see issue [#948](https://github.com/biocore/biom-format/issues/948) +* In `subsample(..., with_replacement=True)`, it was possible to trigger a numerical stability on sum, see issue [#952](https://github.com/biocore/biom-format/issues/952) + Performance improvements: * Add Windows support. PR[#951](https://github.com/biocore/biom-format/pull/951) revises codebase to be Windows compatible and adds this support to the CI testing matrix. @@ -22,8 +27,7 @@ Bug fixes: * Allow `Table.to_json` to properly handle numpy types in metadata, see issue [#886](https://github.com/biocore/biom-format/issues/886) * Do not modify IDs in place in the presence of duplicate relabels, see issue [#892](https://github.com/biocore/biom-format/issues/892) * Catch an edge case where a failured ID update in place would actually change IDs, see issue [#892](https://github.com/biocore/biom-format/issues/892) -* Fixed an edge case on in `align_tree` when a feature was empty, see issue [#948](https://github.com/biocore/biom-format/issues/948) - + New features: * `biom.parse.save_table` makes saving less tedious, see issue [#897](https://github.com/biocore/biom-format/issues/897) From f3e4764f2e6f034a7751c1a2625decf3e8fc5920 Mon Sep 17 00:00:00 2001 From: Daniel McDonald Date: Thu, 2 May 2024 11:57:37 -0700 Subject: [PATCH 4/4] DOC: updates to subsample doc to reflect use of ceil and filtering of samples below a sum of n --- biom/table.py | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/biom/table.py b/biom/table.py index 6ae4acb5..7e395bc3 100644 --- a/biom/table.py +++ b/biom/table.py @@ -2914,7 +2914,8 @@ def subsample(self, n, axis='sample', by_id=False, with_replacement=False, with_replacement : boolean, optional If `False` (default), subsample without replacement. If `True`, resample with replacement via the multinomial distribution. - Should not be `True` if `by_id` is `True`. + Should not be `True` if `by_id` is `True`. Important: If `True`, + samples with a sum below `n` are retained. seed : int, optional If provided, set the numpy random seed with this value @@ -2931,14 +2932,16 @@ def subsample(self, n, axis='sample', by_id=False, with_replacement=False, Notes ----- - Subsampling is performed without replacement. If `n` is greater than - the sum of a given vector, that vector is omitted from the result. - - Adapted from `skbio.math.subsample`, see biom-format/licenses for more - information about scikit-bio. + If subsampling is performed without replacement, vectors with a sum + less than `n` are omitted from the result. This condition is not held + when operating with replacement. This code assumes absolute abundance if `by_id` is False. + If subsampling with replacement, `np.ceil` is applied prior to + calculating p-values to ensure that low-abundance features have a + chance to be sampled. + Examples -------- >>> import numpy as np