From ec544734e8bbb67e90c0aca6398867fe1f97d31e Mon Sep 17 00:00:00 2001 From: Stephen Fleming Date: Thu, 29 Sep 2022 19:39:38 -0400 Subject: [PATCH] Fix legacy 10x loader when more than one genome exists (#2248) * Fix bug with legacy 10x loader for multiple genomes * Add test and small test data h5 * black --- scanpy/readwrite.py | 2 +- .../_data/10x_data/1.2.0/multiple_genomes.h5 | Bin 0 -> 52082 bytes scanpy/tests/test_read_10x.py | 17 +++++++++++++++++ 3 files changed, 18 insertions(+), 1 deletion(-) create mode 100644 scanpy/tests/_data/10x_data/1.2.0/multiple_genomes.h5 diff --git a/scanpy/readwrite.py b/scanpy/readwrite.py index f2aef41297..5ff556ddc5 100644 --- a/scanpy/readwrite.py +++ b/scanpy/readwrite.py @@ -219,7 +219,7 @@ def _read_legacy_10x_h5(filename, *, genome=None, start=None): ) dsets = {} - _collect_datasets(dsets, f) + _collect_datasets(dsets, f[genome]) # AnnData works with csr matrices # 10x stores the transposed data, so we do the transposition right away diff --git a/scanpy/tests/_data/10x_data/1.2.0/multiple_genomes.h5 b/scanpy/tests/_data/10x_data/1.2.0/multiple_genomes.h5 new file mode 100644 index 0000000000000000000000000000000000000000..3d04d4e909c5ac4570351a5eb0d9767bc60ccddd GIT binary patch literal 52082 zcmeHQ3p`Zm`#^Crn#ibDV&L z0E8SwBE$m;GoARUA3NxGQ4Q|L!)-7J0h6JaOyNQu1ZXUTgiXfu!dU(!Zd9R-#WOM1 z(175Wc>cJ=l0kEbs0$Og8pt060c{Pn)ksw>u=>uyWC7NGFfT-NR9s|jN24!LL^V`5 zcQ2X)-P+N?&E3@jX<_K0fF~G1ieT-PwOtEB=oJv4F%S{{Gtn{8hZ`_70sbOo;N{@i zv&LVkuSTI@@p%Wu*D^LVMJoC!K9V~TGSe`o=olI>^GCW4p1-0TS`$GCl>|7hApF(T z(KmsUK*Df`fj{s;W}fkM1On<3gj6v7WH=rXT5V~fwnATnVy$Utyh_c)np3$8780s#_drn93txH@@y(S5D$9X##m zP99!P?rw053BnL0GAKR8$<5Klfwa=W#f9|CY?Ai^W-ds6WTuP2to>xj#mSaVg>!SV z_hjmk>)?UEc)juK!KIgjrxy}Y5JJ#Lu2&3pPlnv-PL57)R2R4bxmr8Y-8Xum`3=Jr z!xPVMxE#a8fOQ=7;Aa*G)uI9#X&kf}i$)}4@qzfU5WvU5G_2ZaYlWaRWblEHe?M;E z{g=c{hmK=B!^Saqcx8^T6azzb0_?=wuP=?6Y@lBs#r(O>s2&}2Ac%$xZt#KkhX~A{ z#)s;Ev@`yD!ivME`}TeRp5S~Bzu^R|#o*(vIo1$#MA19X4Uhha3K^e;;4d%c=i)|l z7z_VCA>jmOD?}P25}yEh(l)MN@8ZBr0|&zzRB!LF-i7Mrz)XYaiE#NydTuT*XdI$| zAf#xdDCQvw$3PAw&eO%EYgzBZ;NR)Nzhf)ajE&VSA($P&KTzwu5ZZ6} zAQGlXnU06Bm)e**L7ja#u0>0QAccvzhP}l5;cArOvyTL86|fM}9F9?#0VrF5pahIx zaf(gG0!T|Z#rQn^r~5JzANss;4f(xA+7tf&1}@#O-(}XTj4&9g!Q1gbgBltPd;kG} z06+jB01yBOi~VEsvv|BKE5G->m!a{(pTlT-wN;gXpUA6KLg%{BLO5p03ZMm z00;mCMlS**@h)uo4N}1-Vg}DEB2$f@JFkdMPs03e#CgRINfdUZXgnG0__BG$|26Nz z=T})yT}~oyKvrxKj7qspNAq*S>KQO zT>gbWxBM6U%R>gknS{UU1pB`?{$+vTi&bSnxY`XkQ3nYXI6f?)zzYxn2mk~C0ssMk zz$idqB>siH(+2&A{HtRDhBN6q=3mP=@UMJ(mPQ0u5;*|>LU90RfB--MAOH{m2#iJq zM&e)CvIKm29W;3U3t4Z56SL2q|3cILt91x@2KucpHj`Mn-!YXc6+++ z6O)XJl8h_NO0?sQd`mP_8M)zTklQJ(lO}Vq`E)!dCMqUUt@hm4+Z}Q_-qosf%CVA5^WOQYPdRgKnO67wMxTuV zfjw=JKiU2I(th@m{AJ^!ymq!s4oTW%Auu-c(WP3)Ez|q#^_nOT9mCh`6>-SqzVlmv z*YOLAem++1TG`n-XQyW6TdqChYP3(hU|E;f`MV1mMTFvZ?EGa^xM3OfMD;1 z>s97fN2Mp+*3&L}=F@uj!{H7xoBF|(CQU63JH63!ce2N2{Yf-!s+wC#n^pf*qdnB< zrs&$mP z;;0m*Lp`_QNV{soxw|U&de(1%?_rxla)6Pct{PvAqw-vb@xNsKrNH-BqON>3J$UPq z?Gt8boW4yBY3Y1JTcxfqw2$`JwvBGLZ$0-p|17lng>r0Ko$orij?M!a0(ZqsiHZ$Y zN4ghIYiv2tG$C%PW1?m5{L+Lhjn;z#kqy@3{1FhF&z7ZJnG>92c4w;`rJE@79uIk> zZ*Wrhr{wbddr^GbFZT46wF%J*f6nJqCa#rSr7Bp~@^pUXjv#ZRrF@6SwWY-vH5O;S zZFXtRq6hg-TPJntK&+##k{{#ftCXV~o4TGaxT+L(yV4J?=pKO=b-(xbu6dca$RTOV zwz!$4sT)pi2&r&dtkV5J=*|?+2U00BoR`w(oie38ptbwvD^rcljX>nUfF4w&gY*4^l|T~u^&WQfV@-2qRZFgDG}ru8P4 zoNA-Q2TeTYzCGQR?}k_zZB{wyxe<9wtI!(F&c>Qrvfj+tMzx0Q1C}(YXDei;xUJ9Z z4PuC&JJYt-XY*KwI`LqEl2K@qWkZqf2eYTzvlI_C)rI)n*S#^gT(@=G%NFm>j+4wM%~CTfEhsfwsBF=1oK0@pcP=O*IDeH!spZY z0*>9g%4pTii%>r+c377Br}Syj6YAl#=hr+s=&h4Byq?35&67XUJ;r#mB16DH_>T(( zd-9~?l!K!Les^NuCp+E}lXJ9- zT^7z=P{vRS^R7v)n_%*%|J*BeM2VuDTYndMpIBHR;>`2T^zdV|XP!^;+G712B{FUm zUJ^KEbEWh*-l~%{O;b~Evl%+USA)#Tlzh%sD)sRBO>7_$Ewfj@4gKJ(ZPlzWD}}x% zb)uO~f@|Wm?xeM068oe7(3cL*FRc@->hm{B6g+Zn`iXsZ!IS@z2-!gk_UNz~J9mZM zK385|fk|&42;97(L*Yu;$F4(0T(6kvA*)sSCSkg>o=8wk23bhBcjBG>yWf;0UM~D_ zlH_Wq;d^;X>8{MUINh1!QUsba^PFggvQmZGnFS)EFLIns)7I>pkQ#Y#W7_^Bi4|t1 zQwp@!W@zhZWEsC_O!!!_khn<6)`F67Co|tyCc^lD(TXXu*|7=RrIV)}NLsKu%*3ti z%q?{T5mgttV)H}iuQWTY4zY<9Jh(sqp-=UbaLdCaXDOLe!I@lhMJ_l@_YsqANujjK*d)Hvt)1-3~C?Q+#9DPi0!zjUmNWX^cM&9tN9ypMc<`Hkx_ zhO-({-)ALRd=M`1sBqhR{Ghg*j5^KckX$v1ddBSKh1`xFX`;^hCruBNMHeI+hwt*c z@VGu!Z<^fGGo=%4#f2YwU#i?u>^8mDZRrD789h-Ffz9XrF&@&>r-o+s5|6gdk@Y#d zHB|jYvtxc$Rd)K8%m(@J`dyj|Vw%0IDJ$Y_*3LwR|pY%lM{hU1%!&Blh&B69=o zU1~R8?9TnD%dxlI)!69x!~3%S5+#PoE*u)bt5peBE3aDm>$UbM&D0T3dM^LuP-BE=C%sd;l-RVb2u(=wM% zt9n`a=3pOx;mcPA^R|lZU#25<z2>E7_=}V_~X^d z-CMP&eF{9@``Km*@&CP^XJJo^@~)U?9Anh;EW7X3duwIg4i0^~e|Cp^=25?Bz_tL} z0&EMgEx@(_+X8G0ur0v0Sn2gwW?NP1uH1@(yv1whXlRm5JP7T}^>U1-JaoOjX^cxW zrmiyo5I-^h>7I6WBl?RBsW9UQ>3V{F@n7yXf)DmP@ZmJPYSEaIEgI2ZkVqw4p^W~vVJsaG7~$<(>fcvEL-M;w)(y`tY7hVI z_03qV;KA2d&s%yT=DcDyqs466Uc!(>JaU1NiZ35X%$-EH+BF1NlEOMtkcqimU4fL= z+nb%S@NTe!!q@Sb#bWcW1|vBXgdzBkf}qnAqaX2%kwkFiBS0iUD51x_ zKWmK03}Kn5%HHVT0!~Y(x`;Rn>%Wb4Czy&q!ngeC*&7H4ovvqw;2Sv5*}ZBW3ZAaqTLD_9mftA z_&AUc3WUZX1_zLLKyZTs_zN$Ken%f-J;I)4jbso*$`y^Mjl~Dz$3lRUL5vN_C=P-U ziRl>0zy-$*x8R-jhZ@{LH<-r_Ij|bO;8-4<57ORogBqX8Lk&I*7#e`%AvQ9QxX3Z0 zL7-vRr@&12$uQ6783Y>1fCK)*9ljCR|6-#xq(ZO(hg>&i&6s<<4eUQQ_(eO**Y39X zIO2pw#$TNKC%oAic0h~zWWWJm@G0Vghp?CUc-HC7J{;GQFvo^%xQ4w%?2~r}=bJcv%qLE7&Sogrn zb3dO?4gWub9q;|blfR6A;RZ6Wstkz5cLtoOgN%xSin0U)FF*hw01yBO00aO6-#-F4 z|HApfw_rdEK^5j*mjh9FIU3MH<`=(p-j#t*9J4ePM$S+LFrYOQ58wj`00aO600Dr& zXhh%}=3R=rP{2ne@9NGP+L{3G8j1(-0R#X700DpiKwvZ?fb%XgHZQ|E-!<^Ok%ZQM z@bgCWoij2s;Bz++#@|=_Cj(kY#_+sX`T=GM5JVbe*Wi15-pI*-mcvb~a=3sii5xgS zs1KY00ssMk06+jB01y~W2#mzP{!0T|2+lD7QapkIP8vi@zBm5G$$(b&Ev$05fGdd{ zfPbMlfHOb11Ccc`0HLaSRKB$;IzJylfkaO0+uTvqkiyyKMO9v3lIPZ00aO60D)12 z0K6R%*yQfhZf&*VKjbENFe?T7Vg0Lp=kWco;C=%Pp8&%r!0-t$d;$!g0K+H1@Ch({ z0t}xF*^T=vZ^d1?NGM1_jHh^NnE#?xj8M65d5WUk%D`Fn*(U=61p|7^L+c|1xa6fi zad~vZlZ?33UsY!4eNGI%p)WQW^z)+eQGGKOt$+s^;s{j^(u#=jHK4<8oj4kt+; zrfui<`k-LYbCZYR|T(|{E63-%Iyqu z_SEYP+5vmmwGB`E|Elr%9$&ilu- N$5JgM2JL}_{tq#-m;3+# literal 0 HcmV?d00001 diff --git a/scanpy/tests/test_read_10x.py b/scanpy/tests/test_read_10x.py index e9cf188a4b..0f4373334a 100644 --- a/scanpy/tests/test_read_10x.py +++ b/scanpy/tests/test_read_10x.py @@ -73,6 +73,23 @@ def test_read_10x_h5_v1(): assert_anndata_equal(spec_genome_v1, nospec_genome_v1) +def test_read_10x_h5_v2_multiple_genomes(): + genome1_v1 = sc.read_10x_h5( + ROOT / '1.2.0' / 'multiple_genomes.h5', + genome='hg19_chr21', + ) + genome2_v1 = sc.read_10x_h5( + ROOT / '1.2.0' / 'multiple_genomes.h5', + genome='another_genome', + ) + # the test data are such that X is the same shape for both "genomes", + # but the values are different + assert (genome1_v1.X != genome2_v1.X).sum() > 0, ( + 'loading data from two different genomes in 10x v2 format. ' + 'should be different, but is the same. ' + ) + + def test_read_10x_h5(): spec_genome_v3 = sc.read_10x_h5( ROOT / '3.0.0' / 'filtered_feature_bc_matrix.h5',