From 20d042c7436da7192308a55a355e0cd8353e369d Mon Sep 17 00:00:00 2001 From: Tom White Date: Tue, 14 Sep 2021 12:01:02 +0100 Subject: [PATCH] Use of int8 for variant_contig results in integer overflow with fragmented reference genomes #584 --- sgkit/io/vcf/vcf_reader.py | 9 ++++-- sgkit/io/vcfzarr_reader.py | 9 ++++-- .../Homo_sapiens_assembly38.headerOnly.vcf.gz | Bin 0 -> 29652 bytes sgkit/tests/io/vcf/test_vcf_reader.py | 12 ++++++++ sgkit/tests/test_utils.py | 29 ++++++++++++++++++ sgkit/utils.py | 19 ++++++++++++ 6 files changed, 74 insertions(+), 4 deletions(-) create mode 100644 sgkit/tests/io/vcf/data/Homo_sapiens_assembly38.headerOnly.vcf.gz diff --git a/sgkit/io/vcf/vcf_reader.py b/sgkit/io/vcf/vcf_reader.py index 725032376..fa7c5954d 100644 --- a/sgkit/io/vcf/vcf_reader.py +++ b/sgkit/io/vcf/vcf_reader.py @@ -36,7 +36,7 @@ create_genotype_call_dataset, ) from sgkit.typing import ArrayLike, DType, PathType -from sgkit.utils import max_str_len +from sgkit.utils import max_str_len, smallest_numpy_int_dtype DEFAULT_MAX_ALT_ALLELES = ( 3 # equivalent to DEFAULT_ALT_NUMBER in vcf_read.py in scikit_allel @@ -384,7 +384,12 @@ def vcf_to_zarr_sequential( else: variants = vcf(region) - variant_contig = np.empty(chunk_length, dtype="i1") + variant_contig_dtype = smallest_numpy_int_dtype(len(variant_contig_names)) + if variant_contig_dtype is None: + raise ValueError( + f"Number of contigs ({len(variant_contig_names)}) exceeds maxmimum NumPy signed int dtype" + ) # pragma: no cover + variant_contig = np.empty(chunk_length, dtype=variant_contig_dtype) variant_position = np.empty(chunk_length, dtype="i4") fields = fields or ["FORMAT/GT"] # default to GT as the only extra field diff --git a/sgkit/io/vcfzarr_reader.py b/sgkit/io/vcfzarr_reader.py index 436fa8991..bcd612543 100644 --- a/sgkit/io/vcfzarr_reader.py +++ b/sgkit/io/vcfzarr_reader.py @@ -14,7 +14,7 @@ from ..model import DIM_SAMPLE, DIM_VARIANT, create_genotype_call_dataset from ..typing import ArrayLike, PathType -from ..utils import encode_array, max_str_len +from ..utils import encode_array, max_str_len, smallest_numpy_int_dtype def _ensure_2d(arr: ArrayLike) -> ArrayLike: @@ -170,7 +170,12 @@ def _vcfzarr_to_dataset( # Get the contigs from variants/CHROM variants_chrom = da.from_zarr(vcfzarr["variants/CHROM"]).astype(str) variant_contig, variant_contig_names = encode_array(variants_chrom.compute()) - variant_contig = variant_contig.astype("i1") + variant_contig_dtype = smallest_numpy_int_dtype(len(variant_contig_names)) + if variant_contig_dtype is None: + raise ValueError( + f"Number of contigs ({len(variant_contig_names)}) exceeds maxmimum NumPy signed int dtype" + ) # pragma: no cover + variant_contig = variant_contig.astype(variant_contig_dtype) variant_contig_names = list(variant_contig_names) else: # Single contig: contig names were passed in diff --git a/sgkit/tests/io/vcf/data/Homo_sapiens_assembly38.headerOnly.vcf.gz b/sgkit/tests/io/vcf/data/Homo_sapiens_assembly38.headerOnly.vcf.gz new file mode 100644 index 0000000000000000000000000000000000000000..8c3c28dae2f4d46946bf3764ce1731167d7be1f3 GIT binary patch literal 29652 zcmcG!WmuKX_XbJ}2uLH{hzbZuh;&G!q=2M^fD$U*-6<*Mra_Q$BM1lx5(2`ey9GAg zU7mTizt{7>&iCU7pFJ~c-RoX!)~tyO#ziVuDjs z2gj*m6pFPBo8*)%RF3Yvvns1#7C~!up$+ z4h!Dfjklh_1`CTDwRDhP*q6wwDevUT^Uu!;dpbjP%Nb8eE>$jSC0*;r7oUu}yjvbO zN)po7&+tm1w7Jr@I{fB7F_xgt(izcHomzjKd2ynpS-7rdk9;+m*IlJF{Uk{(Wxsy& zvY~X@`N*~MR+|tap+{DK>{Pb4`j&7_p{`ZosFi5vIhTX&5;;7la@ag=RFv@C=AA>z zk=dWou9?X$%Il)#oA(TkJ34oNxHNojpaICI3VtBXUPReBPtFY4!9r0PQyf4AOAL$QO7u9?@-+gd5SqLWCvdhx5 zf99r9w*9$ z;Wu)hd=Ss?J#9jUv6&ru5_tj{1uhaGJ{AdNa>9Z54`U!h@}w1@9kCqdf}e70$6QB1 zJjRU$X#PbfL!m2P`nUnUYp2UTZ}aBt#^F&lI&~+fcZ^&=Ep<;uo=!ZyD_UM%D}H*l zwlJ-unb?+UXZxi$;*7NoCZ##KPGlWwQ3Ns#2%A`ccBNV_|*u`e%={(Hk~joQ5aEkxMU^ zzV~eA_B`P!Gj3AZd%KiX&lquFy03Q2SftMTxX3D4*ZHC6m~HXiwk?&K_qVFp4wdx6 zwyx!lZG)$$I_o`&XvAsdp7L7i+$aK-Sa$^_u@YD75 z(;DIZ_2d0WU4_!!ypIEByQb@sI=4N}m1^xCcfE9W(oI_Bzq`iqsybS{IQ^3zed)k5 zGWU~Mar&l__rd(|dXrI-V0E2MwQJ{a{b>!d&wUdM8wI`-0n}@rr zlakWWh^8h9i9c6QOvmQac}B+zDOZomC8e3|%g$sDd@u3vcdJW$*U>jkn!K4aZd4V- zhBis)hLyh=8GW!`69xAvuWak0%MB(Up_zTWa=#>C`XedE`LlS$aE`kfY@yY-wU zmea~2y9rI3r>EzqZR@A2!;H(bCJl{Wzlk?VNIGWPH-7y5xht*EXhobL>s|=@Vq0IN z$=B0Qo8$K$o0y2=9sjyavn~E)ZJcCZ-%(i8YYTgH@aSRb#&H{{C(JkA;tf3Pk^JT{ zeW^aV?C=Wq_~j&?&wxeOQQ%@(CncrwQj#$L%?+;yT zQKD^Qw@L6i+2W|>O6iGbRlWJPnuq$>W`F zwe6WsT)a3NiviyQWbMS7;@8$8?HXakUZN@wG8N-nRHQj6opKA9b z_2h1&^yAErj|U#=Pn>_M=x%HVk<1*Tt^c_Nf0Vwy)iqE(Zs{^Rwv?@SDaf*)9<*+! zxx19@Zs$vVEIZsWo4o$kxG?<}<)K*VtNO-n*mpX~mTZOQvX4Rnl3a;XM8W;Y~N;>SNt38mX53=J=Qt#oz-Wn}8jc^Z;r3_I|O{S zmb=qAxRY&$k7jd+XW3JG9{QUc_LP0s-?c7zq5E#JF>~GuZXcIUVcAbx78v^DVnlLG zySjQfH}lZ()t{`->wEpZ)xqhQRv)$Q4z$OVJ7t(Bai-X2$!y!o?kn}O$w*sgKE5-^x}R9hvxI{S`Nhe@AF0!XKp};W>m%-v*w1ICfBR z!Ff3u^~KFm^i`MfTkDh|jgHcL$+6X4gFPH~pTgV9SbO(ARlt*f!yafiOh0h=Wc33} zY%;Is$IdrowpFj!mma&%1_rM$vpqj6rNq2=Y{(qSI2og1WyvSYSnu^csnq*-ufNfb zFfH|Es^fu~Wn`Py0N+_}5~XG3Co9+LQ)@fEN$sU#?n@WA?K$2?ntN?-yVd2$`Z&MA zgOhjrh*3#(z%rP!SpCzspWx5q zlKx#@On&Za4mvF?PkyzUa=U_w}e6Hqx(;DlqbS-_v-0wQ%9$ z_N{dDV*A^Ldt=oxI9%Ovi=&tc9M1VrKKNw6OGxL%dQe@&;gWq9Kg?N00X|aiGF|9+ zAro|jby$>mEVc8punVwg^zgla< z5b2x0XIO;n%F?E*RZCWjD1M&6bzI0(iK_u41!R~?1laHD5oL`bFsycI~D5K4PKE1Tw z%V8g|t#ea+e9u92WG|TP%_qyuGS4;s(!z};>#HwTSI))-JrcBI)V@ZWEnS6w%I%B~ zh0MKH?Z)%mHLC)Y+-G%4*)6EHwwK0m?QZgGe`I$o8(cu9Z%*6~(%y2AETOWrOgJw# zyyaWZWs(T%V);0*fXtgIBTLB>T`zP`@G6Rpf8`0A%KxI~W3xK^YvjBgVSL2VS!Q3p zKg6n=;wr4WolrBPG<;*c-LskMU1y=aWLKeWrclh6$0?#5=MEo+mOPGSGd~R$E;4&p zb!IwOS_|KBZ@2d_zov3t15Z*LSm+d#L2BA@2ySG2?rust)=VG0e3k8M*SA%Eep>wP z37w$vt@5rMC3wrD3E}iA>pu_Rwa%KxcbgcxyvM#X?0+qA0X9Krb{mNQPzWLj=f>QPBiR8(LZVQ?8qF)?h>G@0*q^a+TOl3Vb+pN~rO^YCkQ}C2FmlreqtPH{uvKPZ1P) zLt8Rs)>Y^9pU-<-V_9e|w{OEp(4|@|(v# z`?lL5$59OY-klEpDmsVI;Cely8RPbu-L24j?j7}vX4_v+bLyt(%DTaUk7w4~|A%pM zNuD9kdfwYye_FKdPwSXA1DbO5)Jb9!#WN$*@N=R26Axcz(;KGUvD0PKU9B#IS1FE+ z7rrnUZ?&WQb$n@A*(0no;)eZW4;$v;5(=PO$Iwv@t2N|W- zNo^563!+EG!C zL7s$|)b5DZNyhOsa4he!`A`~Re}XHdgM-ltyEbZ5Z};8L(Bw_W;zgM)!$s0q?iNi% zD8-5hNfsI%_MvnB;kxpAVfo$C9;#1OJ|-5&sYXY%8h3mbcVOFubfPphv$*|ZhXtQN zaV83i6D~VkH+Q}9Yqw;7JC@^|CH5vA_uKTkb+fI3!^lOI+5Iy!hm_qv9oz2vduO`t zh_>!@lO^#QVXAk>VxyZfk}WW5jC%3y5Z;>Kmxz6Xneoa=OSm<{zp8BMX2jHp!WN@K zd|zPwbi*LKJ>Sxp(Xi9EZzYjEbT>0+`J5Av?=NlJB4U_2M`bmXUN5gMyE=0gBM?Y; zp+)4r5UzwxEP^fFU4t^IVcSRS&-}r#-JU6}L6UDuVUkM!hsQrjn5E)-a?&P?@07SN zQQn*n%Rsmx(7pyf+oHJT&|5w;eKmi1dePGsx?gg-y0mnOY19$4C%kyVa&KC@}u7^`$|I{8hbU*%DM0$+-pRE{O|Vu_!Yy}|e(r$xG7 z!Yjf4*-`OWwZ5`%1WA~v(lFizy#x7Wpo1sQD#v;K7@1nU^wu6;P)93vpbXHq+bw=ma+nF_?zo$;QJqNBTM7f1s&6oOfe5Ujz;jWW9h1Fb) z@PhO6o}Sf<%fov3w#l^dxB4_uht&^NSDFmTWpZ>ke- z*dU6XHf)gWnSAc8yVgrPCht`NPtB{{KP3iF zeU&9Vby_OAkSzCWPMr4kZtSNI)@Hk$bJ`8w&Er_tyoq#ibqeyehiI z4F24sAtduF z4z**g0YuFh4k307>L(2Lz$t{M9R2!(zL{x|6rb&`x! z$2@{A_0&@wOZ24E@ERVSUde-7+8ME5S8EKMnl|wWQ+96In>>B0FZ_-x%_!td2%IVH z<_5X1dPemdTqNtlr`SL4SLs!MlF_YD^3$G8W;AY)Ag14B@W}3mjgnPMnq}J0&_~%y z2%4m-s9xr_5IU6HpFQ@;2^v$rnlk;8Xdv6UsrG=7peIbe0 zT64kM==tgDV#j%O8Y6B@@JN7T3@8zF^FAktsKfqlHsWo{+zV zmnX_BJBq>LD_Q}2LUt6j#rqc_ch$2A;xxi;=l015W#{|j<>bo+hZNwn7m-FW=>%4m z`rq%Q9uDgp8@V!dj6gZ!Y*};1k70Z?(l8=B?l7Dl_5mr$0~RK>c{$p=U%o4rK{?uQ zq~vCfSeSxa)7-9V8nPldSs0%i=WjW!Wo2VEpWz+cxugC(k@r`P2O+&RnvQ0NwrUnj ze)3`kPMlOAnL2KU_aihJLS^O`fjBf-Z1cDyfh3eguXEI6w*=ja&{sI6JPApx{jkd7 zc=(E#zFsdNSh!<*wH!|@V>4#RgmquB`Q&jh@ui|Tg>qC&$jwTvY_(xXH0PDX43j^B&1tuf+Z$_IKCazN>egtfhh(FN8x#kqmR9wSg!< zyGb4GI|tXC_c-yWFkSf*t81z&ou+=dzxz#ZZQN7E6l#rrbiId{>PD+}3nkhk!HMH* z_n)WCtMYx~&(W1OOnB8Ln8WIs*(V}>x45DLYp5*$Hi&got7cExLq1P7C7ybr@*533vd`W`L>Wjsz#>KrpT%`5t)PD+tH=p&{732(8g>Wmb+o?Qubu5l+1*_ugAU;(OVGffy!w(K@UXHVYaBob`Oo6JqFoG~C zB@1Ipuf1o){gizP8boeGHx98dURJbPaF%=!NbdGD&mlu-vI%+P3EJr5unBeHiN9OQ zP{^EARqke|L_|}h`_1WfzF*06RgyNt?j!rg7WUpD70)=zRBlH7>3tjhY}Gu=@u6Dj zh%krBhM7lZ`~2n!?HH8n{84Jfk1T(gk2@8h2jqu(U*{FF(VY|E=qltUrm`H&PH&ah zYv(Y+VWQmT6|q6@V_VQyeLRu0HkSBvu@x`Xl&`9QIp{@Y=ZB9r_3g~>kMx-;{UX)s zx%#Zf#5XGwSAROS7UWXmQj!U3Ap-R1I)CL=O0DaetBDHtSudD#e5axCUL9&2=&w#A z!2E9Be|s{WfINBphS95sMCh!Q7%NJpQtk4>FF3Yd@+!Mjlox*C*XS4_(^KPGH}f$2 zFsO&tIYf?T%BMY?ccmRsw33GwV;CO(7=5l%FX5vSrh0)HT9Fc5lb-Nbp>zm82fr0= z$MBJZ&K}x%U%X?*=rzINYXzlG-?uoms^btvQH##oY~hg}_~RCneK5u8`l&}=c#`MA zMBb6?+Ia8b(Ywd=3UQd?R=2KeMX?v&oE|&IZM)S%u2_}(vB6Yxjk72B1dEJ_hE=&k zQ?>XOdu+Z+#FNk$xfGFK)6?qGpKEMM?S3@DjUH&04R0H|_jRPwxUiLBNREDIX{#q; zMwOy2Mljhd;6olqyTxrj^jNa61!j&hzK_-%{F8*?;QPV+M72cwcNet zUVaf_&80}++Axxxs1%Vk*{xDuH#a72k(V$2%*R>8TyJ}8wp?ucT1a79;J58eRWhk> zc5%num_g4yg+1I?J?|3-EQOUd%4o4JHM-K^5pesAiupL-9vr(Epi*$pq$j@?F7)T| zOn;H*9w`=OaOLUjvOy;siM{{ZJLNY`F}c>ea>X4m&Cnccq-XYDD2otRi5Z=r91hEw z1seSESsK;l>m>P#<9@@qOR()hn6dG-tsl4}o}TVY&Q!`+fn(?1zHy1>cbP8Fzl?fI zJI3saUoL#bn}DfDoh*1CA5Mhc4qupTGCAM8VOn83(lFbDkl8#>?;|zprk&YNx)vlf zw#K^My`7@)*56X~$NA-atDTSR4$Y#hS3lazxZ{h|w~s5ta%WRbFRmW3r|lB%HU8lY zEK>$wN!sj+X^NazPnH`@w1V4+9#6flnSEn2RmyBS{faYYyEp#&o4Ly<>fY(Hw9a#B z48c(Rq2$?pUnZ%_RZGsegGCCRF*o+JquoiZTJD1PUdKMOvp21|U`gJ-y8(<_3ooOd zZ&iP^UeMLUi`L0X&~10LCMJJyXIAIIYxOeTYnwCW;iFQ|NMas21b#zGD-%c2#O*h2 zs>n#cZJd?J3T`Kj-8$?#`5maSa>D!5^Y;y%tcuHb)ADp`UZIp-ru3$9$7wK9)(Sb7 zxZ_JLYXJuJ2H&$yOq_}@h4YsZ-_V$NJzlHh(uSD}>@sa*O0jov3`x(d^A2s!Kf2Iz z-IEq-31(BoB;5PZq@kwa%djZ!TM)AMUhHr&nv&%i!wocf+&0dRMzNv0O?esSn9BEV zGzPxy8w(N!RPY=P>PWn+ zX}^Q*<8%e;J8{eC0|qoVBff} z9${)^tG%P1E1!eJ#=%~5Ct=Sp)xeUPRhAZ>%V2u`@cV_sJ~u9=J@cDlDWj*6C#yws z2E5-Z#WBt4#jrmP`I9>D8HY=$e5X8Xww!o&yVNTy-)yY-q-aimPVTpyev0j@nxMLJ zDc-dl!M>ciW}HN8F6zK$KV9rDfh=u(2Whu?Mnz2gKs)uFj7j%)yzJW|ZkLaeZgya`S#UNre*YtEFpTpgm0F9ud;PrUe4=z>gF1_xP(T5@eBTf4 z*hKZmI$>6Lix6uRhggq)i8Y{YNnPxrj|;fu&KEtpo7+q-u7I5 zCobVvR0d16stU7iXBl8TiyjHuR<++3GcG7g{U)K7+GkBgKQX+XBUKfz z=X*0^QLu!WJd@NDb6`C?tExGfFbiRxH!C~M$Nvex?%umlT4#3(`{#wkpR|;OzqXTO zI1Fn)pLs~$8Q8qJ+VYkBCb`$mJpM7#^DJU62|U)m{E2M+hUYkWEyx6kAA2bYY@ANU zjEmwX6enA{yxxT0F!;FH9=;&JpIJnUP(Qdh&6=w%o(z!Rz2k)6xQ?f=my2(dDSkZz zz1k5kL};oaM|-CrJ}v3xBXs*|1#gDlP4vDQz9z z_BHq0?4-bNy#eKO0uA>cc^1}vQs)P1r$tI1D}sYGn)k(CFQN~Ox0AY0HPjE`isp%N zTHia>Ff^O2S8yLoxfZvA7gC9Dd_1acMyBM;dGDvxM_wG>#B12)$AQV@`$PQ!NCkUt zI&*|PHsZ@i3)1U@P##q+UR!qeJ}YRz>Q@YO_I>zs(Az!kz0Qa?`IT2D$CTl-F&J$iTuh1Fm z=D<`^R#wTmhv7cLy-yf}(?n;3_;pkU{Rb#)>pW~61#uwl`xq08L z!c_VwyIMHwZB24;Y2)$pmlf}=aVJvhUw55E27b?Lc0+)I#EkPShYRNbzftAuG}}+p zA^7_cx0_mXq>~K82~XNhcE}?5Z84Q$z3E3UQdUo_*<{BHrJEgD=eLNP3o<3%32c3u z)c#=?BnJ;RM*rsGtm0!mxSMy4?5A{v zgx9cVPlIG?WWE#2duU|m$eUW}P8ptSDsx;ueJQC(+x4f9>gTa|8=i7)x^eTqxH8tw zUwk;&h@`3a(T7N%IN_~d=N|%=F(NY*uxAem_$qB$vDUvN&VQOWZGNTwx`w@t{DMNB zmu6a%cc|IqbAFm8*CI8A*XUrEsm5f{95dm>bUx*s3ga0}nmls4U9p*N&j;6;`|bvX z_|HU+e`TX1oH%Qg6>rM2ry|nye{Pun!9=}gD)2TzW>2??pJH!C8^uIy>(DV9J9>V? zHFB>f1M%_{?~zK0#2I><+`NbcDH12gp1GUz6M5?WTR*&>ynS~|h_%ke+414-XI}M) z=fXq>WgQr!=5*KNadh_vuz8Z=^;krRN_@hnk~M9keK_NHA}q*%aQ?_vWN76pv4P(0=S$mKU`+cF1PFy-R@hj+9WP*Tm;$ z%RTHgmYx-&aKl={VY_eDRgrrib+gGmkwsZv*e*L4C%pkgVDNx}$^S}yDxdyXT+HJ0R zY9Q_Z+;c$zNO#3GI{^?%{)f1IXg2-~lQ+486lph-e`zqwJu}a4$@(sn*YEu#S0=0~ z+~e-`cguQoV$X2nLIP_dj6{&;%mY~Ix$*$r`i|HgEXBZ5mBqxzVgr$0xg7)8(3rp&DTwnb?Mi_Gfv%fT__zc8Z^9wz+s~_r znFuiq#%L&H@+e%^XS0|96EO!!nX`=-7{fG0I>-m3+4SkLLALLAnwPR+0J z$-}Fz7Q7RrNJdR-@F>j5%a9a(K--M&hdLWIQ>lo&L^MA7fQ(!&H8qo3f4T(}A1u~T z2rc>r8{}-%kqsCUY5y8ciIHdH#hZ*ikSND$PR&F}9Q{JuPoS^pRS}v^aYY+4d*F`T zvq!hcDOl{}s$z@LUed%q)ZuX8Ca z&8lavGL4$!4sn!Rjn0dFGT?V3)h~jV# z)EA40nR;x7jGX-he4=bCOfm9OBjjf}HlHu(WtUWedo96Q-c*MRzJ9J^ymb_Y3N)4s>GWN*1gPGs!ok{_nIx!Wj?{W7Qb z8u~y~s|@-u58jqR-@|8lXf_YrQu@n?&C_ea&QQA(A{ImzSM@H_4-4BZ_K)C2Rl zvC^!UNBoi@GGE72+rB)>uU%7Qm_ctsYmRHpn@cKf(E5gqHGcq)@{KPpDXW|5-^<_9 zP+%0*wou9{1cTc`zi|&{kx1R7M-{P*_f8AbC`9r6pKHoE|hW>a3e+^AkWz^0&8Gbk7=CQ5tM=^LBrKs z;9ZDYxe)UJKh1iGSZYrH?KUh$_}6`AG3wZdFub_s-8uC6&Y|XVXTw@_F}5zOa1zUh zY+amf)htIpSK~NyIVgSB!Dr=pA-EcRG38s(3zeg`s+`kHKzrG2?w?Oolb}SC?k*^k zm`zm?lSGvDf{oJLCWtfA1(%-G($KUcGn6G*j#`Qa%Oa#TST6%(X#pX` zHWUWGMwvA@a_&|GtK_X1D$_G|UvzDx5MKaR91e{^!N?Y==M!}C3t>InaQ>bgN|~G} z`9Y0nVoR1}qqR6r^J`5eZ}P(ia5dU<#Ch1d#+qXh8(dh5gDb`qKxk!?n-#1Y@NTnG zI!T*<^lqe*9fN+)f=--Wg#7km=SuVTE#um!EpDH0V&=(yARutbvEtGU69_-D3A!cw zq$CQ|I_hYQ4vai>Gv-txcm5he(E?%?HM8#Whl`zK&G+W2z6T4W%HNy1`3w|kl>_S) zs2=rSw}v(zifAl1KLOd;niv3cJKFNa!@N$v_Mc?*0~Wi{%oE-(Tkgh3*tHEXtI6Yq zf6-J-ctbKtm@At`AZPn0fXIfQHK&JWz+#IgZj6%{WEoA_D-y6^?uP4@$p(p~N=f#J zJ|JsJ$VM7M%gVI>wjz%D{e!%IQ!u60wmOeP&pV|UF78Yxz2cxYEzLE(q6NTy0`qs7p?Pb_j;=p_eztj_2z_iHO>7x;=9CbMRCYY435v495&bikl zPra7nh{E|P*w3r*<0!95-mis2u6lk8eW3%YsJbv)mqdazlNfb)31sTUU=D^ zIR-Y)iV6KI(zwxm7I&)AP0R)KZMA|yOdgz2G~d--eJx5YeiGfD305gb<*+Z%{hq_8 zub9xL>n13Zz!zpm)yMoqp1Dg2Ece0waN)epo6Ti;E1q%E*8B&7!nqhH_)%>IOo4qC zE0%9^LHKT0+;7g6K@+@R`Pfb-ElS?iGc}GQayyZqS*;-FN5~t-zL|2&oAmpHWP27T znI5wDgT?eSE`)HO6qxVU;JNc>X)MMOf_%$0r^hH-ZN8UlQhUQNNR0Y23V_R-6U0fo z@gF=*t0Y1n67ZNpfDy@)(#Glm0jad`G4&V3A5>evLfBjqwqVT1|F};X1QFZCs2?O0 zAyZBx$rdLjO*)IJVj_C@QVO^{m^JBkgqjC&=JGYR1<6ro3Fi>?kds=XRi3mmf!TYg zrI^^yiEYtBXi3b@MJSV4sn`2_;(Yn|^&H!bGWtM7$4y@{Qp+vltM?#?ag_qI0Ayv% zhaYtM&0z3-l~+uZPITuLlX_O6hi1`oX+eG|F$4<#>dHI;+19OGvFe)^#5r7blif^m z)KT z&V>HVG7h$`_c7c->7aE@vchZW%-A)X1+rcUWs28P4teKJJeBN+EN*GM1hS&E$@yOfdly4MwmwwUq5DB1n52 zNsdtj4m#IA^1n0_wN_P3(m3F6%GM_-j(QY&@mhiUQInhyIQbRyV>6R!S`M}HDDyGv zwD+Bhbg(chb{tJp2i=LB@iZeYj*=TpjJxxjZH|Qo*l^@Na2sgDgNzMtf zDzV(M${%riRZ7WwCLS+o1$hUXP2Eh!_wntW>vk*1tbp&_0=h6+e2nrw0tuut@q^%k zJx4Vn|0mX_FdckD8M0md{O_!ZSGLD~;1hrla-SnneuvP{@mDB9{FcLvRaf=tjkca6 z!C$nBq;d;l(ewh)xR2)G&iG<7X$T@xfgN0%Uq}xgLZ~SW)HS^u-TW04r6or_xOQ-r zYA-T0=SW)m5O=T{mz5oWvtzcRJ6!%c%wf-DQ zzz-{K&{=dc^}Pi%mKb)7%iw|a9r=18TOkh7`$TL9*S;0`eaTAAW)p*#GAcp37N20qjv2!rH zwcSV7i>fF|`>ZNDPm{L4x-8*1*=mSqFsGdN}>Xs#k$Tq!()t~RXm zWMcbZgm_LXBx&Bgtl$GH!Jakc{PW$FSt5clHo#US+vIx%#;27T!Kao7%U?Ii|1 zq(DrUL7IpM_?C70gE&hHw_xCj2YAQ8u}D3z&grc1fMkJ$)$K(t>T|lLYs65e&NXJM z&N1Wz8(a~Uc8QJ{nVqSh!g4f6~&}v4$a5ep#iChP` zdm_pdpz|FfhJ@=N48c<`1=wQ1((wS+JH*ixu(s&fEC%RaZoI?{%2-O_#AcB0LF!@5 z`GX|l(E#!T2+ix?F=J0i!Fnt@wROEAqVlp8DL)nm&y)A+Wd-nQ2yQwcsJSoFNC%oP z_eJgjq2fbin%E-Os+F!fp!f00ww3g4vUUkr$sidV@5U2@mhVV3P*_8hDh3dU%lPu# z3IeDw_7^CHPbn&`tP-^9!GIlC2L`j1(xFvu1tef3?GHX@j?{t>b1ptiXbgGN1Vpx2 zBHaln=)mZ^fse%qmlhzo2A4Ji{0_(a)PfnZ7p>C84sZ|0^;rR}Guv-efQWZYTL>iR zc$X96Rf&j}2bjaiA_o*^jouBYX(jp`1;fb&Y0{Yx(d^>x)%vqTUZ%YwR?ey0yQ@r8AyPqUM>RoBHi~iWW*L6Nv;D^ zHI7&Jm4Q3hr&Bjk0zAL6LG#o~As66YIKC1DmT&(te5(rh>>=1Y{!jDm?sy0;Se^FW zYyjzOJoo|4+vG&wYDqA$OR#7t<@OMz-kL%1McCw;6iIKquf2Oz$T`59dMq9BI{5-_ zggMW3YV{iz5>Yv^`HG1D3}PMxw}AaV=4{dbj0nNPwdy=T7SKR_2JLyEB~ioe$CAN* zKEZ5VE(6fXoAxquNg)_#i5x^{kM9j~)=%faj7xNJLzau4`Ecq)T_H2q_7b zNI&RC*yrR!L!vay&`7v6G-7`k1LQ7kC;VO~A2<$SJbIvVRx5}G@D3dBc973=ts33vgm@u#y&NI0lNTMI69J2RA~C^`(o`8Xq6Df#vsa9; z8LIh?9W_dyEZa-^3)!MU`8L^9BIWbvI5^h+cqJ&1&0ah^00>|LdDyg7rwj;cJt7_? z`+EdEgDDE-jR*=QvRnVBe!x8J2iTgLz3fXKfZmgh5G??@Gp+>(hUejhFMy0sIOzfj zjFnFhh@Wocp~f?7R9P5y$CbDtU1K~Dg8o<(Rr_0_D0ERa1hDQE2mb4Q4Fj7GGsEBNL%R&>;tJv}XxG}I14-#fgrNIZ#GXnb zsDHM$92Q2-7DBrdG64>1$b zj03$Zh*Gk>|4i0~r=hw~<}OLVoJjlfAeI2enWH-o*viylliJN-{)UildJJ1|ti#&H z_dufv5sU>1d&g)(!pgqYZrKW6{+i$v=AHAhcUkrw@8MCr_%AhO2NJBfaawB7@;SO71NG!^ zJe+2pYZ&>I4JEO=LVOE0_}^9Bu&t{ge<}aolwmPy0G10mWe&V$nRra zL{j>r(0)=YfK@b8+6Gmeb9gSPH^Zsm$XmJv$_8&g?)GcYexPB z9b+>|uci;rJQCEpla1SwDAV=(0JRIjE$j#Z8Q1}>+mb0Es8)0fC@t4fNwJen49>;- zC~MJrpd{^#|BgYibstC5xT}k^W#$30#s;5DEuO z!w{RT#3%~a9f7>hkrNz}@sU;m-cUhnu69h3DI@>H(3TBFJcjxol*(>xGz;L=Bn1WP zof`Nl>4|RyNl%Fg0x_6)Fb|{@V@vukOjN;mP#7kyZbKfY2t)!BI|w^+60jRe)@aB9 zv%qVxYsb7tbt(Kp?bFwbQ0%H-0$4*|BGl$|tsCm|hl*KrG^8RV1h(U=ao%fSe@Bkd z0nrP`dTs2_hN*5z>*>eg!F2>kMKYCn>;9Mc|nr5&&-Ue!*w zVvPT9;}fETdd6Ra?fewkJB;!!`Z-9HznMx93E)u$3sr?T4FH$Z@pKcE`zm4o!d;34 zrY|-^m;S{dR~ux{9-@8ezZ|tE(SuU5{1CVXRQ(Frz0>7TqsSwo_+LC31imBjy?j7d#_$k&Kyp^j(Es{a7^1tAqwD0wL2At6^) zX<+PcRuAKVQCPz$;0+FK>^#8A-#kVY`V4ib+tA|{%IjW~3D=rrIjD5$+a^4U-$qGj9666 z{trr~GKi?kkvsb@RtY3@D3Pp@eFhy8r$(580|)R`dgy>x;0g_;&wvxqrL9Nk-}(z~ z{T`tk;M}LS{jU%{2H6X1Ak_XB2MQ$!#&JCe%sN2u4msAxxU^+KVW-lduw_tYiEq7w z&mX`~$)G9$uLORA(`cQA8>n_i2JnAwL*W}gAOfRS_XVK|RIB@x!=)zbWUZ1F=MB~wZvenKnZm&WF$hBNX#-y+&+%|j z0VW6U1TnTKan^M5&j?b9j!MAqZ|G`L032BQrrH1bGo|wgXwN%-d-vZivkz5~r%?H~ zD+VFKgNbex{U=SgUI2weBW`}Bvie*8U z2h`m0ETZVuf0RM_rlkF^!2BP8bMmr(V7ZL{z_@OMY~K-Kj|TtOn&ZNPtfYS^vzV+(T{fy&Nd%K$Trj-woq_Ztd? zAUg4gOBG2XOJ+LzOZr)Lo4(0Z0K$vxEO8 z6_Np&T&W4>q7!*khhj7}`c3ey&;@FP1X%^?Y_SkVXAs_kGU?g4|x`Wz}Gl4rOotiCJdW()x<>03O_cUf;K=>C>@Fz4f_4%JT z>bUcF^Pj@g3IqZEe|P`gB^+=;1rJS7w!e-$-Gw#u3iJh?b0wW_Y=EndrmxU5qHXx%f}^K`+D~v8Qb-2xst3p<@FKki zUo`@63?C7jJba*Y$ZDE%kt_9LN}Q1cd{3r^6Tx@fYP(GcoK@Q;TwjiWYv%bzGz;ja zzO;?I`4+tD+;UR|7~V$~VR*;-{3lbwvZ~ zhiYm88PU zmhBL&L-5cAy>M-lrGZy5*d=Z{9`uNK-Ts7-p-XZShI;9u*Wv{)XW&3v3kVG$rf*zl z0}qbKvH~pdpnBkVT!#*nx+H#7qJi&OtwE?CWN~VVNeXaH^m$QBtq7j=&Qh27m;lbI zRoP86pqS{xv4ai1qP|?=?B8X9qKh&Na5s{H5S{HmD2qE=4){~(4KDh zN(vD2v9Fa3iJ%M|(~07nm3T;KFc=2&)#M=^X;7yiQ(& z&w*1EQsNa6N*{VfN`&Eg`2)}k0&Fa-Q=Hff+jVBpvVCv|T!W{O-e3$UY+uJ32X{)^ zWk6=hs__vzstFIk065^*0dCayBbk9enm!}X!I^8}e!8_kh>X`({U<4l|EIPukB9pE z{?ASrYxa>+wyasQgrpErNlIEMH7QBS^3K?aB1^U?jV+}WQ#2vQQnU?4Q3$D_#u&1% zzjI&S_4|At-|xTQf4tm#&pG$J?!9yGxzFb@21NRF87eSfoFkd>GBA=6Q*T6#v2urs z0Br6?Awo;Fj-pFazhz)%M9n5O%wdZ8`gBj}ZxV;NBpYyk(J4Y%H3zJW9+EwP2Y{e} zpK_PjVCs7LvEP4Vzxy6;=ZIjC=Q>}GqB&yGyzR~-bvH#q(sgb=7pR#ZpP zY(g;#AAO$+E<&61vKFl?#9Zwi6R1ezBpCiKGk%l4dGl(iAs&EEeVG zfF<5}t8^JV9GDH?K}p75^ZaJJj#N(5S$T9{E*t>uW zzLK46sKMY3e8#6bN2Oq+?M1sIR(S*iUHwRvaM(eu5-X`-#mZ|w-Xt}>jHVObm9ZK zg5)xuVsZxq*+H&}Q1%O;|JSso;(y#qW24cA!>mV)0I4elu?a+=PX73d^$jD42d3iR zTW+)=h%N;b37NOi_+D2-JjV0^aVzMUT;T-J{Z(3n74*xu2)mX1hzo?PbDdB4{`%Od zwhnp2909^&no=PV#vCTkg|hK$z7qT2@)GJ5|F7~APRcukY0sC>q3`)C{~t*TzOyr~ z>`}{(jf(uRt56{w>Mt8eZKbESE-Kr~>=GrBJvOhMq4b@NP@&ROdltpk%BY1;-&i}b zcuCYcwV$rex8+i(1K$X*8)^6EjSZMijbQ|E5H zd{whHN{?i@ms@j{;o^F5iL9w4yw+)J%vtA<*>Kq`Jjl)9@xP2Dy zi!q=y;ZaXFMg;w?d5ZC1v*MkZ&YC>P0vPqh-GfJ;uY4{%#>naDqiW@3JjNSFo4$!N zWj8Yk{n@M-J99%8v|{JffkaQD8Bq8(MudbE>xEnI%s1cAb`99yT!2@_0wL{SoEIK# zKYb6@amrha(_OV#ymuQ2*uz{lF$+XzyaG0WW`kLzm?Dt&^U^n@5QXo}dlb0!Ci4%% z@xbBtB43jFbsxO6!Pt9%&v_>@bt00R6*-A7UQ1&ve(W*OcxbLGqXV4aJ(|7|#@M%M zL+Ldw{<{FM`E$Mxcyg_MCMX{xn-vX-2!?0T)<#2<8)9}qp`IYSdW`pl*O&e(ua;<9!!HI}o zLL%^>7Xc!r|0ISQ7qY#p$cJ>&;5 zU@~?e&^f7CNUcr6yF+APc0G8BhI$Gd1C>~I>fZzaHmhKzWDhv}e)#g4W6Y2ru>DcA z()R7#@@zA|1w}4IIg@;$4(OEUH=+^Y0K9q!#`jx_XHBW30$Bac7@z%KoL2G$Ku|i$Rc}GGdg7 zPI-o{M$79TVNWe0eNzO^+}Sg4Fpe|NNgD?&58wc z0%*7_LcG5t4ych9ZG;sQG<#Peb|3umR!3xwBN2D|FOQ#&*qJ7CdQce5gmWFDc;-4^ z=>X%CM7qAr5Oy6(^A*gT{W8p)W=m&YeM?l4Ff484*HovY zD98wtTnCX#ADL{0A}kotKJsq3bVN)sisjCwE3^_cqUBeanWRHEzaIQ$WW}_1ni;((SP3$=u%S?y-S~O%liN7qi(%5;LF%g2o6xwQX+tYS^0&RF#YxP zkNQi_*-B!H+4?X^^>qImqc9L76n*^!ug7x?Yy^5ps1XZHdr0}uVQknpkbY6%^SaP6 z&4(cLQW+p?=Ey04RfhPN+yn-EQEtAmE*J%m=6`_2j%|SSPt((18X}0?L0@zPW>5)u zt%Z)q5NJ))RTLfyLpIeDg!${D2!`Q*5ZU#Wy#OjpkwiAcBmnbQVw?|92^O2B@sOh?{(y@E4) z&UZ`^+&gae38;C<8Aw2(oOfj(hIM}hMh4Jn)++EPSaV7kD#x%5>c8s%)L4#XFdPv7 z6C!vk=0lV=fzEgjNDbQZlqT9vXIBHJgaH^TYNCYMLTnV?1bZohx3{|gWgLc%0*}zN zgv#w>$5eQz;6Gse{2|8&p)AsoK9#!v<4PsX|h&; z*#UYD&>~jmt{WlX!%&EbEy&surtqm8Guh~fOm?yedjVU6 zf^4o!P&r7)?2P$hUd>S2xIuT!H>e9lLJ%7}M$VN*(hn~3qgl5K`fK~6K4!*B9mT^M zURX2L7O9inaiBd>AC4wPzW49wCTs!*2oQQ_MR>;$gPhG1V7LVNwiLvjek_UF@4*#s zsoRhV!%KeySvJ5e-_x?I5Q~J33{(8sPWY5z$gmFfG6Z18fF=w9HVPnc))TD%-xB|# zXAvO}n_#hoU0Y6T)VLnAsQPKgjt(R zz#9N_+4r}G{|B%g@8MbiV1BP}Tn+Z1RtnHA)}hRhktI&ZOagcVH#6Nf8#I zib0^~ZU3lSqhF&SPvI}WklhY14N$Y?S^(!*6jLX}WMM<;3|obHkY(bMzlhcRejHpy zpzr!W3l7~cphKjJeUc@oF#Bokkx=~WBUK&ta~4WGT?O2SE1g$l`n!XqZIoUqDHa~ zX4m1O2w5@?0^oQ*nZo^t*TmFa1(Knwj&(wqlV~Nl`eQY&j%$!BN(UP|d`2m|Ly!k_ z2QNFgxGvFYn95a}#zmWcFT?51XVB-r$mjLLnCg!x%<>Re#>9|^Z(ILjgxKW{mJbXv zL(mjhh9-LYL2)Fz_9~SA;}W#RaTS7w&us<S$0IFv?cLobj{J3V3K9!TYp1gD`onKpFH zsn^-g)2w{RGWzkYR?-*idr<$ArsJwspmA%VGplI1r(KbGxx{U#zDpa5bzF5?)jTNp zIaI4c9^9^ISB@^Czx9ri=^LGFTwg7DtvX0}84F!S(MtTl)>>j2{o>0y_%w9! zmhZQnVsNf}pAHLhF%B%{nwJaYKnDY~>|cAJgH}39oHh%p&V$C0>5`2F8V!U;J-flx zTr=5ZuoFGS*e>-xz3;cKyN&Dek&aC5&XcNB1hM9%GI56Q#tNj@@j((g=9=$3BmyJR zN1i9t4=qWn+IRqyZ_ugJi z=kLYs`dD;fmN~fT9nqIgA{~m%6NuPnF zvi><=y*Qkw-8*w@@d2NEYZn(Itjn;Jaj%od9CY*EWZ#jF3>O{GMRdKu*fnx zcPPeHW13KIgKUrvKKg`wd(Hoo6==AqzAMM_^VLkqkF{}KKTdJDQ}-Z!d+pHUW(LXY2H5pJZx&DJ1h)93#kiSg$QIqu^7aQrsWPlgkl!W*b6}E-yI?b zSY^{v<%kF-!$4W8twe(y$f!PPX0i)<*YEfBhD0-`P^|K~_M8LoV<%r>wg>kMDQ-X(?jlgN{apS!??G4kF-^cD=?7{uH- zPCz0Y>J&0?-7&Kx$o)aWqd`6JTYAU%Ig2+C7I9=R5yF zc2h%`=X3sbBqF4&0#?X8V+CW(2jkxoxYZtZoJ%c z;<1*UwR&w-v!wi&EKZ(V#Ez1Q)dR-)>e;F3SvH;>Tap^?HY~))H$;VvOWDV11?SH> zT8u|WXh%pF@vS&KwqA|<_O0H9+1zJN31LZYt|MEv3mi#)k?)8*vL;7R`HbwV7qwka z^RH!H#-CR=kak}^^rCg20q@6Ap8OkE&hVcJl5eOluvmTMLegdN(-A71`EzcL#Vz5U z{l`+47uOw)*cmC#_mRkR{p$8$Np|^Qak=Y}czLb zBEOjjAOC)5!Ag2*L@jxQmvK{_)h&E^;bK7BdFN@|WblBHl)Q(s&TFb8t~TYJL_Z-Y z+D-9AZ%9W`|A0$8mn)Z*#l)?&&qA`RS7c4(*Et3;{dNjWaL?K3I>cmH!k3zxbLd?B zB`h}}wLd!Hv}DOP_r8xltYhcyZwlPe?&i3|>M@I>B>P%>Ky75MiXXLS3(HN#_B+Ym zRddZY%N4Huw1-Tv)>(v>Cq1+%#=zFdsv4G^Ykg5 zT!>FBk>S5Kb5Kl&UKHeKymPy<&Ezp5+x8S-zUrKia{0!Ch+SFmV} zCnk6PE_P1BaY*TlP~FR7w^7#!;?9tVvSbT$qHZ`Vf7jPD*~d3S@F3#(9X|OzI(m4< zIHNnbsK@XZ{_$&$ia;TcF5>!1oyvPp?_Ii4$;=UART)RERPC;^dlq2NB_wFzxF8*$ zW*U{*TJg!g+eWai$}Kf}W{~ZdiHq?+!&LWOxYBPyLe7VWKhN2Htw_OtUB`<{mg_bd z+|RQ<&cQ!WRdD$9VDbCegnxXv&z@-&vykXc)~OV6Q(47-<~6>-im_^{`J;;drI)p2 z(?}`&&sVh-Jv^%Vs-eZbk5bmB?3ee#eZSDNWqD{u%4ds-myHSfMta7B-rkw1;kzzP zcI@&ep0P=+cvd}kUG&MiVTDGOiTBw#*LdHvv>8(`+~uxp3zurBjZ~<6-QMcr@>IuN zxL>TuO}9-&|BZQkUczVRJ@FQAsN5|T&Nq3Fxc9xTF8Z}aED@)*XXsVoqCopHsWi3K z2Cd;)Jz=z|vTek-5miEw?Q5E^*p!rnS{GPdU?{M9dOFV-cqs%?a|^ZNQB`Xhod5n+ytAituS;tCXFg`R?UtQa zaKha$-|wj%AM9Ghd(>%oKW9i?ww%(@I~>TVR4EdW+V!Y4+>E{UdIhfRHfwb+OJ=BL z?9hJMqK!!{s}o!iyNp7?!nSUTk6s>LU#*?azA`0<|6Sh zDq*~%31_+6$$7_N8BaT7oJnKmBdUQPvx9%Vf4jz=bskz$r_TQ{`b-raEvfCDc$*t` za(fY&o?NU zT%2>tH~Iim7Bm-19g8d@KlL>~yR1j}Xh)^oe$8!9MfeP`9iQr=o+;%GA1uZ{D1DBgk$Q@9N*Y zV~$<%QK*qVrtGnimnbJ4t<2G$o5%F9rUvr4G3L)&7=OI9b^CBof4B#=&DufkifyV9 zaeBkCAGC>D39A9N#4^qyFd>*?J!~K4jzq*p(yK{mcCtHqkp0ROQ z5U5|nT0OPtMH6XcmztyOldpz6M~*lwcx@MO35?CX$H}P?8Dt(n9gyUEuNADA9XRGb znHOrXZP_OIq;niWZj+np1WLz_6H5$t4p{{H=yC7r;QMf)nffK;y+zVVzo{!9I!4`8 zUi@=C&;I)FP0IbDR!RpKmp%G+Xu}LHlH+;ZqJWYjXPsmR3?9XG;rfLMYF^9W&9u5b z_@le;3XL0?9W0F1lU@018?OAKlq8=};+fij8V)hl0MB!a#u7JUeqA`OeY}LjKOo|0 zPfkliJ*?8_aoTSCts*of9FFmjj_@2CXI4e_P=8EpV@28*DQld6N*zCc!tg}v)Ghvm z<9>qP3-MRd1Zf7$f+*S1>VCi4Z9{yHm9l;j`Gc18Hc^(W-VJxP^h}jHTXS6Zf}7F0 z&t|Ku7}WNEQt{ohT&3UIXQVr{0*ic$wbbC$kh)c?8;kGk48@x?+sx5kM5X@h3A`}* z%qQNE^(?!mz#Z2ixL(V@S#NvRFzeTg&a{#`N8FdnZM%vQL$w6NW${4~G2CqW8T|AmU@;Kgh@Cp1zJwfBcImU}r2DgfrN9rWMf6!8- zuwZPOW^DN7gycg9PM5&d4V3wN6M^+N-2xRt)eJ{uBDe?VWp5{l1uM!PdBCAE+-*2! zD6y`mXWA*hVspT-40mLEg;J(x|H1Po60bFQ;)PuMBgOTpnRjqe`1|s1JU%&9jC$*q zs<1~3g;8({eDw423N1FHl5*|lD+28KxU3>hrCt2gM72vY8R1qn-4Yj%e;6iTlV~gW zj{v{9-v^iTl%|5{BC8Ds)ic+hiTv}aHd3Ac5q!h4YPOz08~3lu50eg63#_0?o@+L2 z5v(3^sik}Vif=(rHdDe5J}8`)(;rwq=iswxSP{c-C14R z7e)fNN581;lp@a7_-ACddweAnT*9~M^1e9vW&O%3g`-qTH&ym!R7zG)@uGr$I`AX-xPBgmX)4!Y_~ir{WX!6_e5sD znVP=$+Z^t<$zSd+!xP9Xo3k;Bi@H+bTbP^W8(IxFTD>Y%GF^Vm1Nrp;tzzAQ8NArrCd%f2=xzejn#&H~NP%v>eseJtO)VpkkN@G3S z`Tf);pVxUGt(8)uKY8!4|Lsn6E7`!znJ#@c?D0}fL@)X4lR*c6_8)}T+Tr2{E3=+Y z#{Dwjm2Y1(sXV{_?p8as+YKAl8>?GMg;buSDzZ=3eUn@vTqwTone^k}j>enCtj}cX zB_*?MLV_AUX#@MTMLXXbR-SsZ>d2<;iD{*l)Pxz`LS+THp9>CNo6J9QwXYfDJ|BMj zwC?feygQF8`N$i{0A_q9;b&XShDLmY?_=ZTW=-e{8 z)xK7llhvDdb~ts^y`=Y8YtH%Zb>+W@`CtDyy~X%IWp+<=s z7al7gX=yy0JCSqEx_#xt@Xj-pb!kZp-PM+7ycFL}EG9H;+|)7t<;0}%55ln+34;1+ z6Wi#?albB`;WUxA2i`q5iLNd>Y}M4lbaA}ujZH61@>8vKi^K2 z>RBuFP}uK*PGy(utl8v5_EZ65q})VkMsfdfWwU>RDz!hno2Ntw9klPU-ZaA@MMHcl)TP9Bm8U9G7n^KST+cC64^ z5mEe4?h%Wf7Xtzk%mz1g&3`>~peJZ=z%ai4WjTLphD`m-o17{~jd=YZ$=7pb-3aPS zIg(SCq--XkREZ;gR?-ezz12f|w2&=)8r9bRK5F&9@K6Jd9SsVtYZ zb-V3q!8*FQ@-NPS#d68LQ>$2A_jpE1dPpL}3it-0r*3FO`iDf$(vGgEao4VD-^xZ+ zUb$DGT*v*hf{)w4kjLBS>NDM^l=)}BbLAQC?>Q8>-)-4tA^Qi#>zK^N2`;wd%^!{s zW=Ab8IlhVe)i~cAF6ZUFY`y6LOZd)}=!~$$&$FXP-PeRH{IhqeE2MPSHNNxw9~|1Z zUp<^Ob-dcSw5!eJfudNyf^pB@Bx$1AiLZ<`%I@p5iJATPC4U4p)a-CzMrFq^_`U_0 zj@KT2zf8FMa(BY<2Wx&Gj?=XyRB3lfnlaj#Di_-&kDvH@^6Q($?Sb}G+=cAdj02gs zM>nxNi-M;W%=_5cNm&!$V|1j#ayHB}vW^8V3Oj^4hdh$zvkXWP)SA>`U)6HW{cy;B zmsG{Bs;{ydY`mWM9L@5!+MAv2otjxfo@uwX?p|EG(0T97qpwx-an+##za^^;XX6f& zsu$u&hq@OUs%IC*Mzt1Z`bH<_X0$@ij?Z)*?px%#K&h17Kl(qvTts#m2lj6~(U1QJ D?Glyo literal 0 HcmV?d00001 diff --git a/sgkit/tests/io/vcf/test_vcf_reader.py b/sgkit/tests/io/vcf/test_vcf_reader.py index 2ed0ea55b..497483fee 100644 --- a/sgkit/tests/io/vcf/test_vcf_reader.py +++ b/sgkit/tests/io/vcf/test_vcf_reader.py @@ -651,6 +651,18 @@ def test_vcf_to_zarr__contig_not_defined_in_header(shared_datadir, tmp_path): vcf_to_zarr(path, output) +def test_vcf_to_zarr__large_number_of_contigs(shared_datadir, tmp_path): + path = path_for_test(shared_datadir, "Homo_sapiens_assembly38.headerOnly.vcf.gz") + output = tmp_path.joinpath("vcf.zarr").as_posix() + + vcf_to_zarr(path, output) + + ds = xr.open_zarr(output) + + assert len(ds.attrs["contigs"]) == 3366 + assert ds["variant_contig"].dtype == np.int16 # needs larger dtype than np.int8 + + def test_vcf_to_zarr__fields(shared_datadir, tmp_path): path = path_for_test(shared_datadir, "sample.vcf.gz") output = tmp_path.joinpath("vcf.zarr").as_posix() diff --git a/sgkit/tests/test_utils.py b/sgkit/tests/test_utils.py index ee90832a6..fba366283 100644 --- a/sgkit/tests/test_utils.py +++ b/sgkit/tests/test_utils.py @@ -16,6 +16,7 @@ hash_array, max_str_len, merge_datasets, + smallest_numpy_int_dtype, split_array_chunks, ) @@ -231,3 +232,31 @@ def test_hash_array(n_rows, n_cols): # counts[inverse] gives the count for each column in x # these should be the same for both ways of counting np.testing.assert_equal(counts[inverse], expected_counts[expected_inverse]) + + +@pytest.mark.parametrize( + "value,expected_dtype", + [ + (0, np.int8), + (1, np.int8), + (-1, np.int8), + (np.iinfo(np.int8).min, np.int8), + (np.iinfo(np.int8).max, np.int8), + (np.iinfo(np.int8).min - 1, np.int16), + (np.iinfo(np.int8).max + 1, np.int16), + (np.iinfo(np.int16).min, np.int16), + (np.iinfo(np.int16).max, np.int16), + (np.iinfo(np.int16).min - 1, np.int32), + (np.iinfo(np.int16).max + 1, np.int32), + (np.iinfo(np.int32).min, np.int32), + (np.iinfo(np.int32).max, np.int32), + (np.iinfo(np.int32).min - 1, np.int64), + (np.iinfo(np.int32).max + 1, np.int64), + (np.iinfo(np.int64).min, np.int64), + (np.iinfo(np.int64).max, np.int64), + (np.iinfo(np.int64).min - 1, None), + (np.iinfo(np.int64).max + 1, None), + ], +) +def test_smallest_numpy_int_dtype(value, expected_dtype): + assert smallest_numpy_int_dtype(value) == expected_dtype diff --git a/sgkit/utils.py b/sgkit/utils.py index 1a40affab..5e2c7f488 100644 --- a/sgkit/utils.py +++ b/sgkit/utils.py @@ -336,3 +336,22 @@ def hash_array(x: ArrayLike, out: ArrayLike) -> None: # pragma: no cover out[0] = 5381 for i in range(x.shape[0]): out[0] = out[0] * 33 + x[i] + + +def smallest_numpy_int_dtype(value: int) -> Optional[DType]: + """Return the smallest NumPy signed integer dtype that can be used to store the given value. + + Parameters + ---------- + value + An integer value to be stored. + + Returns + ------- + A NumPy signed integer dtype suitable for storing the given value, or None if the value exceeds + the bounds of the largest dtype (``np.int64``). + """ + for dtype in (np.int8, np.int16, np.int32, np.int64): + if np.iinfo(dtype).min <= value <= np.iinfo(dtype).max: + return dtype + return None