From 90bf38d2618febc50948f68e2dfe37a2d4b0bd27 Mon Sep 17 00:00:00 2001 From: Nasty Date: Fri, 22 Dec 2023 17:21:06 +0300 Subject: [PATCH] Fix bugs --- dedoc/readers/docx_reader/README.md | 3 +- .../docx_reader/properties_extractor.py | 20 +++++++++---- dedoc/utils/annotation_merger.py | 25 +++++++++++++++- tests/data/docx/size2.docx | Bin 29669 -> 9952 bytes tests/unit_tests/test_format_docx_reader.py | 9 ++++-- tests/unit_tests/test_misc_annotations.py | 28 ++++++++++++++++++ 6 files changed, 73 insertions(+), 12 deletions(-) diff --git a/dedoc/readers/docx_reader/README.md b/dedoc/readers/docx_reader/README.md index 35e2343c..ad28b7cc 100644 --- a/dedoc/readers/docx_reader/README.md +++ b/dedoc/readers/docx_reader/README.md @@ -1,6 +1,5 @@ # Docx reader documentation - -[стандарт Office Open XML File Formats с. 28-62; 167-1301](http://www.ecma-international.org/publications/files/ECMA-ST/ECMA-376,%20Fifth%20Edition,%20Part%201%20-%20Fundamentals%20And%20Markup%20Language%20Reference.zip) +[Стандарт Office Open XML File Formats с. 28-62; 167-1301](https://ecma-international.org/wp-content/uploads/ECMA-376-1_5th_edition_december_2016.zip) ## Структура docx diff --git a/dedoc/readers/docx_reader/properties_extractor.py b/dedoc/readers/docx_reader/properties_extractor.py index 0e92b80a..b5349a27 100644 --- a/dedoc/readers/docx_reader/properties_extractor.py +++ b/dedoc/readers/docx_reader/properties_extractor.py @@ -1,8 +1,16 @@ +from typing import Union + from bs4 import Tag from dedoc.readers.docx_reader.data_structures.base_props import BaseProperties +def spacing_to_float(spacing: Union[str, int, float]) -> float: + if str(spacing).endswith("pt"): + return float(spacing[:-2]) + return float(spacing) + + def check_if_true(value: str) -> bool: if value == "1" or value == "True" or value == "true": return True @@ -79,7 +87,7 @@ def change_indent(old_properties: BaseProperties, tree: Tag) -> None: ["firstLine", "firstLineChars", "hanging", "hangingChars", "start", "startChars", "left"] } for attribute in attributes: - attributes[attribute] = float(tree.ind.get(f"w:{attribute}", 0)) + attributes[attribute] = spacing_to_float(tree.ind.get(f"w:{attribute}", 0)) indentation = 0 if attributes["left"] != 0: @@ -109,7 +117,7 @@ def change_size(old_properties: BaseProperties, tree: Tag) -> None: :param tree: BeautifulSoup tree with properties """ if tree.sz: - new_size = float(tree.sz.get("w:val", old_properties.size)) + new_size = spacing_to_float(tree.sz.get("w:val", old_properties.size)) old_properties.size = int(new_size) @@ -180,19 +188,19 @@ def change_spacing(old_properties: BaseProperties, tree: Tag) -> None: if not before_autospacing: before_lines = tree.spacing.get("w:beforeLines", False) - before_lines = int(float(before_lines)) if before_lines else before_lines + before_lines = int(spacing_to_float(before_lines)) if before_lines else before_lines if not before_lines: before_tag = tree.spacing.get("w:before", False) - before = int(float(before_tag)) if before_tag else before + before = int(spacing_to_float(before_tag)) if before_tag else before else: before = before_lines if not after_autospacing: after_lines = tree.spacing.get("w:afterLines", False) - after_lines = int(float(after_lines)) if after_lines else after_lines + after_lines = int(spacing_to_float(after_lines)) if after_lines else after_lines if not after_lines: after_tag = tree.spacing.get("w:after", False) - after = int(float(after_tag)) if after_tag else after + after = int(spacing_to_float(after_tag)) if after_tag else after else: after = after_lines diff --git a/dedoc/utils/annotation_merger.py b/dedoc/utils/annotation_merger.py index 8bf0c299..a95012cc 100644 --- a/dedoc/utils/annotation_merger.py +++ b/dedoc/utils/annotation_merger.py @@ -71,6 +71,7 @@ def merge_annotations(self, annotations: List[Annotation], text: str) -> List[An """ if not annotations: return [] + annotations_group_by_name_value = self._group_annotations(annotations).values() spaces = [Space(m.start(), m.end()) for m in self.spaces.finditer(text)] @@ -78,7 +79,9 @@ def merge_annotations(self, annotations: List[Annotation], text: str) -> List[An for annotation_group in annotations_group_by_name_value: group = self._merge_one_group(annotations=annotation_group, spaces=spaces) merged.extend(group) - return merged + + filtered = self.__filter_contradicting_annotations(merged, text) + return filtered def _merge_one_group(self, annotations: List[Annotation], spaces: List[Space]) -> List[Annotation]: """ @@ -118,6 +121,26 @@ def _group_annotations(annotations: List[Annotation]) -> Dict[str, List[Annotati annotations_group_by_value[(annotation.name, annotation.value)].append(annotation) return annotations_group_by_value + def __filter_contradicting_annotations(self, annotations: List[Annotation], text: str) -> List[Annotation]: + annotations_by_type = defaultdict(list) + for annotation in annotations: + annotations_by_type[annotation.name].append(annotation) + + filtered = [] + for annotation_list in annotations_by_type.values(): + sorted_annotations = sorted(annotation_list, key=lambda x: x.start) + prev_end = 0 + for annotation in sorted_annotations: + if annotation.start >= prev_end: + filtered.append(annotation) + prev_end = annotation.end + elif self.spaces.match(text[filtered[-1].start:filtered[-1].end]): + del filtered[-1] + filtered.append(annotation) + prev_end = annotation.end + + return filtered + @staticmethod def delete_previous_merged(merged: List[Annotation], new_annotations: Annotation) -> List[Annotation]: """ diff --git a/tests/data/docx/size2.docx b/tests/data/docx/size2.docx index fea67457de76247c394c9c178b93f89e3ef36b5f..ec5c69484e9a6a678d72801179dc5585080527a0 100644 GIT binary patch literal 9952 zcmeHtWmsIzvhHBP-QC^Y-6goYySux)yC%3h0fIXOcXziyus{fxPxihcXMg9pfA0Bp z);zPOr{{TVO?7wmTU}N1(jcHH00002uwWdcCat1+v-J9n7Z?D52A}}+98Ij9=;{8r z0|5|Te|URUCBByi5rhf7d`1<%ShuDQmvHk!EMQM~1nHG=bu&K(RuB9tVk`WT%)f@f~5-Ez)4D);V&xp=u1&iGJO|IRMsWpJ=tDmHnFY}+K& zllG)lnPX&lB|!SFU~vEAv@+9{N5So7$P9z~nA->+c&z6{l-%faU3;Ynryf(=qufW2 zU9AJw2;qB7zvZnn=*IzW`8R_CO1Oa9_Nq8G5CDMqIw;0=M)Hn!_D=Lhc8(@=?l#u{ zX=rR>-&+L>Ac$Q4gvw}dwj}`7xJ^|;1D4dU5ni1($bm2B#`N-XA37GH|$Xn&_W0UElUzJ~ceIxv;yn(&_ zKk1$`F%H$sfB?GXl}EINGTMbqRX)1s;iXn`Fq;MEVZ%R?Ev;$ZJ3HP@;cuE04Mr? zO{JTiqcQ#Ke7e|}*gF4{I_ElCPP1YtzK3e&EVC_GBaulQcsQZbtzWufg!6f7ikdpA z3g;^qTziq_rMQKm;m?)uM;kA5Bq=EgDYxk*UP{f>*jG6sPC2S0*g+ER949V(56|50 zI1F0t9cG=H=u8OqKxVMqWe8rWkTj}xzzGS$V(P#n@~tkmUbuz(W>uD&C~hq#ys}4{<=JE$ zz*gUwudn8|>560#`=!X|!vgOqN+3NJZGv zVvYa+#lH-@pVV7gD68xkFT|q7KQK1H=X#i&qfAT4NNbU&-L+v~DHMyY`cz-a2UojG zFo*UEX0L@?hL7zbHL6LsDr&9G7t`}7v=ko17{qeQKNF{qE&z7EqO~8~N0pR*?K~Vv zK5SMb`XSc5=e_PCwH0;h(IN@WUQ1zN#*N{~(izsR2Sq4^jnK&>?_4n+5|QhYo~7P( zUw~<}kS`?(MU8av4+gDJl@ZJ8;2;+BT$rj* zV$mgXj8QF6u6JzmPwT#CN@)ZKU@+hqE(|<+-cb3XoQpk83*NXjB6oz{hh;~w%9p}dIcVoSr~K1wjh`bnh9-CI zn#|FE4qw~eVEycj=nA-deh4dkrZDBqP7t%puwXd&5zq;)-x1yg;;l#S4~#Fc?|Or_ zv<<0DJ~hNYD7RbkwGBK4*wUbIXh#RboZq)>VDfrm8hk4icBQ#u5mo z`DVmMU{$wbzOk43abw`@)v#=oeA6;m-7dPz8j^e;hn6qT3K>zas9$k*J@xYL?cOx3 zJoK#D0)=5}H&MR6cts{IetqsN`OCWy;-~DKC}tRszW4ZL3Ro8)@xhhOCN4HAlW~d@ zWL!MOl66_v0)Rl11}-5uPwh}|SR{9i4}!&Fd$x3`Q6+R>{#n=3%E9obZF4DH5&c}& z#9?-*95xVRxq+Is_$3>Z6sCpqd@+#7_SoQ?9IWGdvN{cnX9NbTBhOz3!f#^; zThe)K+NMs@;ifb$j9RA9&cCibXBs1O4**)@^vwDZYiQkd3v5q5jZstX!YXUclUgae z3{;|IGKXaEuO3H@L!ukPP;6o9)MJ0z)T?BTFy!9H^_7knCd(aJ8o95$(bw05t7-GX zZcv>l5E#a(W0A1;`Z8`cTJr^nYIfywQU$N+H`(fkqjBysT_cFoy7dwWp0&;xb%PQE zFE#KhzDmxa^IEv&NAVVHpKnLHj*?^e8BiwGoU#;`i2i3#BC=v(a#;BLwPvisr%vE` zpNu&ANR|iWuJd2uX6g?cxMwEEAI0;e`14*w9LYNC(#X>NBH;5yA~|-9ZBYUaAHFtN z(^jEOyATU-DyUQ4lumsVniQHIo7^!HzG|JNpRPEH1B>QEFz4gWR*d24;b681g>@zI z1*fimX&mm%6O4Jm_!bqdxvT}|hPsAM=7#YgsNd)`vqty?A$t|RgE&A8-A5eu^Q2oL z+L0`FBJu!boH8@(+KzJ~upw1?e;zr{Re7P%TwuDpL>93IIq8_PMdS|900{IrNUD?)3M2)Rx;0X1Ig2P`k|EiE%r`e4$}}43BFac` z6nGjIc$zX6(FA{@fB~`qm6T!%Q^i+N$Hr=n<9q08esR7ry5o0HT^f}Y%53v;F({jq zol3{)QbT`;l|NomI8IO47q((J|1`xHw< z`cVipNdW;&v+%Dn5W|8=IHY^2B*Ics2?L}GG8CoevUKcpHJK)yp&|v|t%<^pEtXE^W8VhR}IggW!Bbn{dvxT-`p=>wriikonHzMphwtAd?6zv3ckD7FRL3 zG7Z&j8f?bdiSfGwM<(WkxA4KYw6tUoEh1D@?~6D&@lJ6LG{pUi)|Vg7F4s{n`#XJy z-o-bui2DH?Y7O2OtWa5sLY=COyV1T=0q_ zt(S9!4;Z|ZiKp*f14T0l-wThHZfNKfWqR|RA4yWC3Pq^r^>zyMGY<4?f^rWLb4z7h z7P-4N>OQq+tQq2wFpq4r((-g*btM%f|_S!94Fat3$aBkp|V(|N{=TEWz*znX9_0# zsM+hmi64hkPnOsc%v4Yss%+==*aS+3S|6t!VI$=aEqexp{dAHVqF_Q-^2I}}P?w>t zyc3g1RHio4B=iC5U4V3DevYk%m#Vv1J@@qt+=l==K_~7#Ow9yo$*IRg0fQzv%2W!{ z`$7AD2co^IZmR|efeT;c`*xq;sU)OIIo--Ar@RF9tnbw*o=fSYMD!3^-tD!U=}+?W z)cS!tN9H8gq;p9qm0Sac5q2}ZCZo-jCLERPBCs^2Hu70CN!`3;1a4U@ps`kA`%LwT zn#MUw%)ZXu^5(QNa5TqM$B1NHrc9Bw4VGP6)%?&Wcj3(oj_P%t0)(sQL z?HsP^6Vk@R%h=-dbQ{_3ZHfhvg&`fpq)vJCSxd$2>kW$07<{sD+pw4f>>*_;A1f6O z=$GE25jMY*WXE>?J2{hD?SeUJ;-lJT8}(Y1OQr|;P!dhaWYR(ZQUqqn1J0f-^NY=l z;y!EeEqSDUGhFRL?W&^_j&!8COyL;0VqR~i^)%lIMBnQlOJ_i+C^Ha`V2R{$&|Q+K z3pdpb96)xA*=zm{bjlk(m>82Rjj`Xr`?-2JXxs`jQVinzM0!VRxSP+^PU@=Fm)Dpu zIO&8|SbhW?@npDM3J9^FaVH#Ar;Zc|tER&QVQUePcA<%h(hqBNr6up{epb6SuWSs~ zIcvDRA#i+&2MG6_s`e<>3W5or67Ebt$eB9_AtkAi4lWA$;w~^!^o%6d_L*LL+iSpH zs?J~Zc?{Tm3=qL7#Jo(UTJK{9+Yn}H3L(mX)0!e^^HD%VK|FpAo;g!1l`)8{R?_8V z!qtL<@`=kD>F(IV#v+~j?AW!hx|$MAvj$zz6-~BJ`ytt*$6A6$=V zG1w-e7^7#`Sa>u{=1{cWZ^W7Xb4!@aE+JFBpRinlou@mDHcBRZoa@glt-aG_{vR-y za%NcNcX7Q>z*{>|&P&n9T5SG^6sA%PSo6h>sC1aYFoRY3V6<2HWoNe=>y!2l{AB~+ z?leDZl+el6*0X8c-cR!TRvN=ktxlSm4b84Tbj++GF=9!1g4WEF_#v$DlQ&Aj7rYOB zNt&tM+cfz!XeDhci=LY+z{pkB7dFgh|;x z2AHr*uSg2F)Orx1`4tNxA?>a1HgJ0-x29x%mF%Xo^O3f5P$7w+b)*-*C#9YJlRhj) zef{g|`1@}%P|hptd#)jCs*`SUjw%zWMjM{!8YC+Yd}#Gib(1NE>jhxBPk1_`n)o#R z&am99k5e~2R>rb@l?qWbUh-(eBiB4~(XAj+A(g{KQS3Ma?Jf4e&fo!8!!Y_#877MF zee_3g=;j%=m8O*D){FQA2ufk`h+PtW5^Uj`?+&yUcWQJ_gVQ(pqr?q6#YmfDXV4Tr z+g87Pd8f6M^^kM}HtwImQb>AnMK(&#pbXv?Q4vVT`Vx2Iz2$fZ`Icz3rCr=^zyJVw zRR93>UjyiDZen9X|L2|YAL&MW)p3;#!RJ!V{vlUBSwi|+WMWB`GX7YzPTVi+sjAM**M9Bg zS(#Buq?Vs_9}K7c7b!{v3qJFz!6Q)-<|)00Jo#E6jug6@-b6j8zZ9|c;J zp-G;Sc>oZ59e~5wFC+(>x&fNSvyDl0n%A|{AJXfXUB-_hSS-hq2eyHs-VZiblToh< z)W@&(R)H~ENEe>OsoPAjk$`wqC)B2jE?QZblz~PraX+z5kQ|h&TNE(^SQNU740L%c zxgyH3leET)^Aj$%>Uza?Ymg5Py2YiOo_#HTcFbglvKkbqi`ZdbY%rkPjQBHPS+*bI zCeL06D(%5xIW-azo>SUJPvXjM-<~bpbV)=UGG6B!8JDzYV%Qy4PBMF)!27I1Nw75# z+atxxFdfNVH`yRIhaLtB?1Ol_({5m1HRh>Ybg!D+riRN%u#!6_wJ@pdrw&^^hjo^m zf8_zO&n+J?-|-cv&2Cqc{uutwQd}}8r0sLk#>yc$w@+r(LKObev6q}?V-vxpG)~yz zJ!kCk&8f0`^f$|J?7KMkqo+6*B}@fuX+OZR5n`>U6~COE!Clp;xTd9PSLU9i0&2Cu zp`?+0TZLUykjy*!u1mIP-W7who-LE;^ZTWhuFW}Fw+OD7FwUgY>`ycE=;sdl} z(#G#vdFoKsRXV?M%m?mIEa1kjPTUceDBwhOcGsPdYlwyn>=bVG=kMQU>B_=eprHAn z2Wb^;-zu8hT3%={L8CEBa1kofMi69{JZ@F-^@{f^Vkm1CT3aPWW_3Eogf%eFtt^-o zfYZOMAqm<k6eC?idvBw<1^O(bC;~_xFw)vZ%`7#Z(T2N6RFs>Zu|?(Kx`Is=kz}iYU78 zd?Yg(o+yA!uP#*nSta5v=(3(6DLC@}g5O|plni~9M0Q5bL%MpGhNiRN(n{-CF84%q z+W7kg=}KgDqyBkH-Tc5n{3V`XoM1#sxYtB(RpvrLg9>p?(od?AM&k@^v@f5HZSQ)W zofBgDL2y@5D8m)(jl*#SV{aUc)t!uk|7s~a^$W)mh(B`d%;Q`o5DzEt#Xhi#-^Ksg zI-AGABo>b+_QAni#K|D|vz0N2lR@-X%haR|aU;bf@Jb0w!@*qzgQF|plU6G9DM8N{ zl?+CK0y7s2^>thZ-$SGVCg2U&H7|Amwey@GyZju_v#o%O?K}5!yLTTheVP5M56GQa zF{;EP^&9ax1E)RrAjD1+h+jKm-*gNkMlTSH`|g?hpcbmB$we-(%f|?`-*>$rcJ}6N zeN5}l+uAJ-5%k%Ecz4TK8i&)9&*uml^f-(Nb3!c6@V0~BO};C9pL@OQj`HQ_;a1uU z;4O8e&T7cMe~q>JzjE&%A;&+@DCQ;x#wL#c$QngSBd^Sh5PHcYx(Cw2Mkxf5QQV;n zW-Ac_ZjVb|SIfJwBzd&DdnP17L4{7=;LZHg#_nEEQ?yBdyrF zXr6fa(zY3vmN{Ce(g8(D(bLWXhg%nFd=0CTp(hQ*@}Lhb3gcSa(E*@qVc_kZMgq?W zHQ-&kD`kO;X&$2!I2O-ZKw9Og2$HS=N|z12Abgn}F73|`Nvy_fLH31AQZghIE;%I? zr$_KY4Tt3%UP5ak+u^0+CbPkPg3>J zH5;yd*sil9ddcBfn#?JQ=sipU)3md9lw82>0VKA46HG})@-vw`5&nJE*nMFu>)yXb zfd8B6VEQK#|KI8O|27>J_8nS#uh!uL{)=^(+Sxh(H`XzpGGe>>hjn;B1-NNBo`6;% za6%-KQA`Q@e}WfKB94G%|1zbzRzlyjNQ;DVOxS6kv7a?9C)L*GJ599$l-NjS6y?!N z#L#;%lZ^RP*O}*Ix>(Ur!9~j>Mz|m!iCOX4%>pN>RKO`y3!Ll$Dk}4-c7Ul_ln`u9 zqi$Z`+dG#ejWKYCZ>~U!%aFIqb!f|R&&6D_B34=J3$7-RNRQ_tr}z$Iw{UOwymlsH zgTyN~=`J}V6_|FpcQSb1;@;n)FM}!dB^fX2fUr!hh!ziuF447Uz7VVfoPl>pX#iKI z@%qIa-n_*&-@+lfGIRIa0xNK<{Ty~#;bzMWT>83J<^>t$8RClD?9!H3jfuS~%?#1A zDpr*DM*^d5ie;)b&qcVnC0RM&8iMj!Ts2JU%3FULTb^9I(mZhX(fVtevus$iM^X*l z*aG2j`ECcO@6Ha>!Trb=B^-q=8=YCUM~n9h?od71!O56f-$pGr@++`~lM2rLePQRm z?d!J|b=e%v>q|ZM`qT;Sx1bK0YggUVQ8WkCd$?(yx>4u83_ne|77@l-xnGU5N{%=n zX1YLtegbKZ#IwWe6np;z!yWj2*9XRc-C^GB>@Ou$80kNP8o89oY`aJtp3rr2p5MLQ zLv1^I!T!r0`qwJ?A2ix=-q-cP5lKne(ZUjoOCMrt3#*#GH3r1`%^*q?#)Gbv15ne* zz}G3A7={sI$T#Ou)EEoXI7ruHGMqV;sLERg^o0f(eFE&Xz)iNFv^y_2p(29?*)*xJ zP)HbjR7yH_i{PFbF7pn&kk)MC(~F9ylxA3=U`n++#V5(O6x9Z_3#@MMSf!E5bc=x( z^GS#lT#HQl1Qq5VmDbepuxdW3QjgN)=iy~9GmW`L{$MNjvvXTblS~}aNEWhs&>+JT zQiUYiq{|^{cJ14+?Z(j zYzv>;b^^Vf4zynOBahdd>g;Ri?T-}VUtu>BXJ-prvwsY{#VC*2A~T|_-%}IS=ugWg z@|Av&txzpG2?SPz!tU?Qa4@ZOw;sDA5h+bX6yga+fEK7Cz(%uHCuR~tMaoP3?*AQ( zXV*bELfM72^Kk9d%jZI}*)^A>kWoa)P|Rma(<1QQ2Owz181JuJ5BeU+XV&yKFL*2jQy@k6NabM^J8^1*g+sj3^;z#$#t+_= z))??ffSu2&(U5Z>$kcaINpn?h4gr<(R~x#k3@{MV**WKY7n2Frt_4V1Y^26pvN5w3 z>#u(H2%j!B;ER9ohn7M8D#g(w#NYrb8eHb=J?qHp^ijbnZ{4IkEAw-5Xcz?68CC>% zbXiD0g0$?GJN&z#($SN__>YlD=HX)lyp2wpP2*?*kkL4hAEXuvGMw=fxN4C~Er+V1 zPx_Kejh0TS2G=24GP6=c%M8Pe@Q5w%hD&#~QbmPD+$`MbEA!eYps;ZmkT&R9S#(}L zQnm+Ct{_9e?b&Wp3FvdK*-eIzIc}or_k# z_8d>%Ys*C*#~D60q?-=hs4?==7k5RSX(aH{*QbfPsv74-Su1Ay>6~?iMROcQ0!^5D zIx;Wfs~Bl^9x5$i&0bEx&|^lzyf|b?+%aH`Qe%&As*6pN{%jbz5KL zRz{AvUXO<>IAOD@J9iD#qflAyLX8|Uh^;aewZghs{i%7s9PlonE_$RI-l_7u*6&&s zx0!2b0jHU5XaTpGZ-@=2S;6osomh|o7ejPy`Y{=SO=x>NThHH;cWra_Zbt9z+GPTT z{PE*86q5G3c5(i-cK<32_%(C-k3|Aa$?yMg%|h;ZLs-3bNCe8{b8Hb5TPSe4Ig;=b zmQr~MJiRK#8q1V5Xx8?zS1-Mou{_$JGQ{p;Cg8=ODe7@BEKM>6-GDbv7u#Xa%IEK3 z#U-~RFTqbW+&^~s=MM8ZU8uw*-DK32_DNc;kx)u3I#AD-oW-YkbRO6q1I>q!ud)NL zbyE7zgGaqwvDt7_cr%_@3B6ZCSR+I0WM<{|;|OQCTG>cT6K!7O0SwEWJv!`WAc z0_~9J4CjpDz!~x}b{k)D$154OjoEiWH2THe%NKLEX7<9rfYfhc`e&Z)WZYes1BWXsWtx)?)-CYm>0P^cdY6-svlwK3hddeR5CQds4dymx{juecTCRlel0&;LIVfr3kp=wQ2z!&9Cf#== zGS=D4WmUI6vv}s}nkBISYZ!rJzi4`mPjv4H|LjS!OZtv|#M@&Y$33a){O~D16_NH+`*qbFq1dild z-tr)^NIu<94N4G7$OLfCFBs%YXnZPoGs9Uy_csg{Gpmt1ikIhoAVdy7fB0A@-Z#s~ zJCE>Na`?fo9z8pCyTGQU3uU#d=}lJvUp)T!N%tV|aHWvt&aOQ+UXa+<@blbC5vC-Cco4o$}IPua!Lj0Nm@#?KO0uMEu9M{{ujN88QF> literal 29669 zcmeFYQEBSGL|W|ynFEo#fb_@xsDBuPTsxhdaY%dHx@6E|4UcNyRRD&R>DxJIqfLh@am_0O}SKap>>xdiT zMReR(2R>M*`5vxk2Y?!u6>o!|G7+rKhWd3qT8KWxs7YG?K#3mFZ$^P&;3ZFyP7@3} zIGyS&y?&ZdO95z)3%7K*3zpT1&&)hmIa*q{AHP=t&HGl~?%-fn!KhQm7`%v1k|w93 zYF^+TNg55Nvnv=( z)}+tQ-IsCkQ;^0grqFAeY!`?6iOin>3*eJ`kO=E09s&h^d;shNnE>+SkJE_V6jySw z6CTbeuW}LU4M;$lhyH4%OCePsIfN*_JRL;lUhoP*y3dGfz|VbhW3X3(Wib~4f(D9d z?x?H~b>l2jqeXCr2JRc&zRgEwr3V5WN1&!i60iH`Z1W_P;wE>troj-}EZoEPOWq;x z?-2j`0tS%#e+7Sv(r?huZ$$5W$2RnL@axzcSvt_s{`LI7LI1xv(*N6EFOQX$1ZF@8 zy7K!X9Pi*-qavE!&=xwDHv9k-WPI~YgO4p~{Mw`lrJZjG#$$3j8etM#OzXg1pdMm& zRq3gv;IEn6gsCjsZgEpx_mdEqoIb`?vE2qqd%e~F=qDDpmiR!39Ysl^Iu`E5)t}|j zkO+i7zm-1ABOMu9)+6{3z}+kTVMG6E{F6tow4#K-dIGih5aAi6(PoYRu49R7OeT`s zw;kO%Xo9GJ9p7kpt~f}4NWu5!l_3dP0YX>=7>`Xo!}|WI-c)Tk8o?ajZS#njfxMpk zD6}vHKaC9^Hnr+Ded2+`>Yg^F>8t#R>8>ImE=sKn_@BaMra9nQ|GQ2Os{jBXe%r{! z#@>+5(8j>Y>bqe6RVmLkB%}5Q5j(+$y$L|}57jy;6|Tz(Pxa_@!3URRCTVI}^b_fr zDa{7S*X;0okgo_i4x)wa;j359`}a1^pXs8%C>A`$v0@BD^W4r;{ncX1FJD{RN;^;W zEgmQ->clJ(oEK_>Ve*`My@# z#xhSZ9l+ZNff1~yHC_kxOwfXUM;QSd1mU7^+y3x(^<{&_$H?q+pDQuabv{9iLuJpD zy!70~WQWjg2ee10I5xB$6z5@%%O<5z{29Vx{3qbqFtFzehNup!%o-RH)<-uGs0)_# z5~iTqQCZ`s0x?1I0$S`kOo^eL1QUrKcF7uy__~Nf8lhESTpjLqW|Wp`x_duAA$P37 zBLDu)Iv;>8e&Aw%ZN)VTJqKwpf8twnPx#NWpSD5{CIPf(u4IkEOrke%Tn@XBj3532 zSu!0-rzS}Pz-tddfji7-Spm)nhpqXM7YYx0iK<9tH6UxcqEoQOi8_YhMU+^~llIXA zN?@w&&YLzTcPu5!39CUR*vdXmg;_6?HFGIC5yK8F?c#(`5uA$4I80d~z=-Ww%`s++ z;bTiuyc|B&pxGGBhsH4m#h73g<;&7`<*D_fb_!K;I2J~#oyKtO+$k(R-7pg?G86*V zDv77L2orL0Ae_SeZPF67+cFs}*e^p%#Jz9*X>ZibXYQs^%9M8EVR5+Ci~<#L+|8SY zyeWkEj0Fbwc(`qN{K(kGAosciy(#^`_sBqj+OZm*vBM=*n5hPw@HwTau7{KzuhV1? zoNKObY_(4tZaGiDai0GaQ1ep63nVSSxV?Xz4 z@D=4%4)SvnVa~m!j!Ca$0Ly%3OtJKbS3~@)2j^Oa8wUx0oBiR@-Vv~-k!3rOOi)JMR+6LyhDPh|7+RjbaPDZ)L zoLwb?O2G2;RKNMNxDhhg7(bn#h|+vv6YEq1KA@4U^g8eEKdjT2NJnr^icx&P6Wj^E zqgL+_eH!qe9)teT>Dhadx<|79Lg(>MM4vf(PJ4Eda)j#|xpTY6`Rw@P4Si9y%U5t= z1exQPl6LPQ7@?Zr*iE!G=nX*3D_a3J0v<4xdqsfi1-d)md6TtQwO6(CND5KHQ$pUE z|1=L>wM#Db{+8=-4@^=Eee4KkWd`NZ`_p@MkHsfzPRIq#3mgv61porq3_0Go_joI1 zY6|&^w<->lL;8YBP;1!%4Q7U9xU%KO%sOwh6Fd`no_pbl=SlV`1e{LVr8(ya44&NC zdBl>b#v3;Enu^%@zz}z*MAm;CqKru)_mPnE;ZQG!zCotC#r(c~s0EMw0eQPb*zbDS zKj*n$o5Dk#1&-U}a+;~Wod=ynOY;gCw}n}1wn(Qy*7LHhfwV*RN8Bb}A1eJU=xuH6 zeFq8JF9zxc)&V3Hp#dm*k^=tb&K@yAHN2Dh>>pt}C?shVF8~@tOy+&_4q6wZTG`V~ zvs3Ss4g}@0njF*IBe_O5h-5?T-~sE1M6oZd%#Ouf4B{(0FK*=OVQb0ypjm=6sq6VI z#gIrKN8%+gH|Ua__)czj=|+Cv^Tb1JwF^pyI3B&h)AXHeD%D<* zRJQwx%iWPt9wWt>{(kNk6}B`q%QKqcRqeMhFdK)a2}^F%o6#|2k)iJB5|+*L?&y2Z zO=TNVjxUZ;XexJ8N(@4(5|ap2QH;LbKuEPQXR4c}P%&y>andMFF zpjfT#gQmBx7q8|qqHN!qyyKjyW*FSFX+(PsEst$T zU1s0WS8;3JKRL01ZBjkc;%v|35r_i=7R=zpy@;O1WJ`b2T*cNoSU?NoMyrC)^7f*T z!elF{KH*jRC`wlu#}A8D#88@&kVp`o$;u-mk*f$te!(dLy^JV_IeInTLG4jF3G&o; zx>&)q?9F4q(VD;_wX{*nCPlmF-k8}&2jUWS0#zL6G}pd^uhY`(6Kzg&V~Q();$fhZ zAd(7*29(k}*cATl>IzXv2iugmgNM~}PE;wO8y_O=E3V87zu93`qLwP$wcsPNvcL+| zYM`;X%O-uHdFKzl3aXsc*veSUr{bQa3tvJjgDlFRQM?t;E>cN*m2QisNyH$6Yt0rc!X1PIHPv6gQDgI^CcxZz98Dp-M0}{Rh88 zT(q{*x&*k~Q!4!@G7n|9qpxF+2CdH`-(E>ULhCx`k;F~k!K2^8#K6vxiC43vhO$du zlb>^FsJIQe94OWS*jXloy&_p;maraz$uVwz=q7c%$YU@=py2_Y0j(FCl#_(|eWtRZ z=|VE>eqKi!fvEDW!5|l@E0?js*?@btM06}vtt8IUTth>-KVyfk{-Yo?5F>>(Oc(mp znV+WA&zeA8q>NF&n*J97tltF3p-3ePBM`&OzN0H9?&%jh6@5cw7yHN;0y60%k9IJp zpm`*H?6QV5GoTp)Av+VFWmfHzw!964o}W?WdKRpo0ya^IsRHM`Db&n5Wm=xV!J<5a zq-4$=E8ezs@>rlN=4o0skM4DJTbB1>X#;%3if(V7Exqh8O(oc$GxjSo`0k zL$TKZGtEcSkaUZn`pU|8^$r{r&DnFaABRx2l>teikR`n48>Tg!Ix^kFzP`u_XW)AM11doIx|BkIi2%$NN)ziu>?*Xb_@_sLvC`c8FbotFM*LkIY+ z;(UMozx!(cRx$klQZW)<;fLS!hs)qWYfGq%))Y!Kz=MKAtqf&X53ejpp`ux_jP#5+ z&|fBRG_h4He;ZIMoEK+Ro%LTUsaQ$D$vxJ6a> zwT`Jsn*^H_UI|$Evw?iXTE!2AeF_)vplG*hei>$bS?YgrmUpvV+X84QQ^i?**<%v zwBV%iPMnC<0a$oj2Cy_;4Dk&ovRt4&o1>#h6x0BHm-7z?dm?SS>54!=BON@UTg(2K ztEJ;CP4|%8WG${_OyRrZxI=7Kn=f8l;ux5rGfjyKdgL@`M+x^6u^!n^f?1t^L@G(L zfKSu6%1{Ii0D$!U#lMhhY-8hSZR2R<@K^6No;YFuol1wUGETVYTe*PF*;$df=H}yM ztTXbx0E8j(AYU=WG2SkyZ#K5w>09Q(fnz`UE-IbTFYzOP+`$Q%Q9Xl!u<>3)3=tK2 zb^&&I^Gqmr&w<@w82QD)vLnLcHR^P&qstQo?DQgBsDUlX6W)O34-P{JL@4$~cj}=~ z=LqXtSL@1=Kn{SQ2@KHR19!E@N$BP_Y+Q5gg6H!`^bOh7tLF5E;@Se*g~k&OMIvg# zwgg%sJaNW433nJX%eNJ4O%q9l7cVo*=jW`^ z^Me*|&X5YwA6^!12!<(n(wzLEN+h^tDl##`_+jMFAKxWgJzS~|ls2S-d`W=JFmh-o zh=?6cjkoZsR_0PDn$X!<4;34+wrJfHv~S92g;jxt+$Pb9%x)5p#%L}VGd?J9o1w7c z49=QhOg;x@-;Ck|JeKMd2qTiX+NHi$$r%L4KKTB6uBxo|I|bvJSZ0n;eE#jo`B)Er3z^w^_ym(+>xQw+QquOP{MMJb#<8 zYTVTrkb{(yiTL}hQLTERZF2}kONt`9CAf!2a5{ssUetRd77N^)>;!8Ly>Qb|ai z^r|aP@)gC@mlE?0a_+mwU)W>=qwrTiTX}xN0U;ZsUms_0raK;k`Z3#5P_4JJ^Uv^S zRXUgO45%T|iCFLmz5U?A%D>GkLJ=8B3UJc6CjTUgSYP1A;EY@(B8I{#kW)bQW;ZLU zBbmdTKf#9au>KAt9{OD`vQezsXs9fsIxHpB&4lZr~$o|2=v2cW>W|_XuDLx09W5--iB~J= zH5x>o)$wKZvbX-qcpH)t4%Eg!ShLLj>=Jqkg{ha_!Lz!I6@+gVfXjE~+AeZ>Qwnb? z7HjcC(?D;@IvKcA5@S`!ec3~1oI-iW2EAJzi;1?w+s=FZIULZ|%XKgFKE=Svh^u!- z@=R!`z%h-jxWNxU%2E|GL-(p#tM#Vvo@bJoe8eqdH-$n@n~91B(2ACsLoWZ^v*s1T zUiMGv=R0Rn2jHt8mKM}Z&19}6y4vECvsIV+anjk}{RQ&!Y(;w{zN>)wXByzJY;Nvh z+bVXzC-{(E+5s9Kw7tu6o+X2Kr#GboTzwCPpY*@OtFm-NsP{1?g`^lMRRb!=vx_c<;M2IJ^e=py==Ca1 z1CT6~{?z0^?)8VeNi;Cu>JI&~{{bHy&zLTWyQjM=8_HEcYBbpWYf30Rgr780yLCoi z+TPZ=8?z8{CQ%D3XbYQh$fDic-w%*LRd2gVer~fE)VWkWdxZd@*gWR2PwY!|T_1ze zG%m4EY>w58C463jd{f4@B-tkzyo96XS_Pkh{PiMV1VFjJu*xiPm71PB3`$VphV>_+ z>jDUGga>kB@>sIbTyyR)L`FS?Y|l;vUSADfbpPmXRGA7^(B|M zY&xWd5UfQaKTZL?cVA;Get#-5#$y?a+L|mAvq-uyR=o0$UdT-QvRCM(Le( zEX#RbC^(O&Me@#JQV<{r#)b?JHKAqu5jjx=1VYp8R~v~R7Ft7vnaui~#m$ClOWB*W z?9%HY%eQL4KWPr(boH^xJ1F>3Rn9#_NFf=Va=D1k$j!C)D&J{EPM=i?Jr{2CvD9BN zthVa=*E1yDt4IA>zX6_{oD@KD9G@QamKUAcZk!D<+UF1R^j#rU#8B8!n$8uLH?%z} zhq7sKicyx0bBI#U2{F;fR$e)hy+s01JB1h3k`Ca@32|0;@e%z%r9WPZ)T~x(fH{mF zFU9%4D#LaLw3()nGa()CZNmGdh}$=|Z(-TcRMf4^qrmm3z&bg zjQ<17@uaoCI7ZMF*PM%PvpF4-udH42a!Nmkl=Z4q9JkL8LqR{dgnh-VPU@e_w9;U4 zDTSQ?^|QZU)y&Xe&OIj>ZNw8~K=EI8?6y7Pkr9hLd2h4uY$Ag?i(n5KCP9jby#*V* zc|YGzEKw)IZ@bgZOi4$EWy50-1yV`k^}NOvas$wq9Hc-5^;sn$g{gD&=OKFuykjL0 zN?D{>dOSlKbasbZF=>=tK_}6c1xb2Frymj#@nBB!6q(EYIca`g2W3=8p5ldL5tNmJ zoi+gZmSUA`McyYRVb>Eoo#w|m=UZnmTZ)!ikt=U)3n;*4Fy&ydPXBV~uXa}(CI*nS zoHa`=Fa{pi-KJ+hprb+YRT0+Rc-HNO)F&!bV{=AfxJ3^YE%=f@uqhA?^mZ%bHeIw- zPrgK%X&9-u>ns&NhkBysM+>))6NRNr<;MYC5~nIV?OvVDX0nSu&;V4e9gol$sn#p) zP+qMJHChHr8>U-|Gzz?J3elU^WueKvH6_yXmMIG8f!mZ>r()*nfyG9{2S1(%02aXh z{R789l@VdZxalAtc5c)zRH-K25qr5>5B6Pg9PZ~3nG9U6qo!ue!vGk$stWc7(urQ~ zE2?#$vp&hum1KYhDVn{c+^^{$Hcz9x%QrJff;+IX!Fto!aJ`*Tma;#F zc|5Z^WwTt=o7l&SSJ|^XxQjNMK+}46Cg1ES!x6`knWk2|F7+~9Umf^+{2oV4MWMN+ zK-lD@tR($jwMlAvU7}gPp}Wg@to71@+L=12E<1{^O3bc3$S1-Z(24y>=bms9I(bdR z)L8U7|Qf7@fpc&dFQ0wJiB^OnM1CUF!(8D4s=z($}b$hSxmDE;^cxu-gVp=H~B z>ZjwvDCBEWpR@ZUV@)wGHWj%*!uam3EO~I93!3b9z^;;P6gH%XwZxHJ)>sTa8STMp zvFI`wYZnm>1E86Ly!1lstHIUh?&E6RUYRfckLLtabMtT25FbyD3C{A24;K00hE;%- zPV=A`N@S+~&qpt!RBKX_|IR}jl;}#_+!iektqFC+HN5jX_CHmNAkOU~NwDt?11tal z)c>p&j;2OdMs)vr{;Oo1t8Ij0v!Qg--Q)JJbA6`WOd;7+4>`{nvq7$W>*)uWZX#+E z!(zAAQV;BJ^WCYB6se90T-M*z$k)2`H<{-D7e`|0Y%_kC$lB2eGsC z#SodjxPLpTnj@bhwl!k!$E#&!`r;w-*6)b}i>l7(=Nb*~^l)b9N#)Jp&BWtrBOjAJ zcwjO*yGn?I&azFSuZ9;Nixqsk*G1nzLnDZWUYV(tI5j|Jn?gSl5h|~lSb115F=k`B zexe$U1odypvEp*B;CZS75Gvnwf6%T_NqbWSICir?q)g&oJYnY2G6|aSoz3lWY1*tv zWbvHzb^_gUj;#t($*wo8+J5tLLF~h!py3Qko~N#?*g>VBSyaA$P-wGQVrd5o1R3(6 zK8{r^YhAHCiY6pQkGL*0csfhK^z@}VRMs%Z4^z?dKx`LQ)4?h>6!bM9AR}fD$gFoD z>;VqyL4y-$K@g=c)MQPfD)?POjn}DkB^W4>FoLSd9y$j(_PZK82{!Saa0FI^z_%>F=em@LwHlPPj*apIGyD%3V zd$^vy!Oth5;v70z0$(sq@Ds%Lo_xz>AN+6|vj7(XRH3Wk=eTrXTo7f=$iq(7#5?*NgY z;o;$WLbt;-6avU28IX7NJ`RLTxC8?HQ=;it$_#hrq&t>eJqONDP>BI>QOn`=u2wW;W0vrNW? zG`_{1_~eEVG4|S;vv&|frp0O_;2g#YUJq!5U~bNsRbe zUKf|WcAlss7y3S(ahEQ$A6-e8PH!*wpdL9hr-i(2PAb;9DxFdI`U1}V0ogQ_^O<8O zuU;#~RHGyB4EiXeyOyr7==8;-KJqsVXge|xqV~(RScKb5OMu|SqX%v|sS%xVaEy_S zC0UJ$V=HlG%s%=vv{Py2TA4CLRuv2AQKk-0rD(;XXW>8f-#AI^{ZB|r!H^!xZ$OaD zjf;A7&2(qP)~k$iXzR*{KOl5(`TiIbh3~)mgS?1mD!lz@E6O}^>X|RnVvwpzvE|8- zvLLY^lO~VR2qL_nvA?4{YHblK5ut+|po(zcv)boGOdz!)sL=)nx1UnNJsI@>&I$1@ zp|uzh`Fxqj6eRrXwcHHQ_ule}hZQn4?{LDl|*T9k}!M zQw!?QIh;nwF=2y;kuB8Glva{B86G>Vg}ke=k5=12bSHY%UI)up4^t-zgjZ-6Nt}ZXSI@Jlj3sB zXgc_i6cwuRZEg}~88l3LMJQ2ijHxe;*IRE;-W-ApM6T-7N2vEM|FoJZRpc7L_FK2T zD`>e4XTO-!bz2|<7jn)jF{iF#Jl;;!e9>}y<0SK=E11tyZGI6>5LUv zsBwnU%F?uX;i6SEbH3Hk-3hdIQEIW|9wAV1-U`~LEOE@18rZ5;Qh(vzHdD9eVp5aH zG*zkQidr@C&nqqvRHg=UAOHYiF#oM1cQA5vG_yAOm+n-rvSz)sYW=X$XUEp$7n=0BhEWIVo(Tz^t9pUyiK+ZlHba zWSnjKlIt$o zKH{-fPec_Q3FdNoOo1O zL|M0<-?n(mt-F40iW(R@C1Bjt6<9u2Ir@zgL-XiM(au++9}11k!G)`2&X{}m~~6I2Wz zupRyrsv3f!d2^!+)M0XsZoRrfL4GSb#`BwXkEuKYC?G|9FsWQRRg05TCox2y?eC)I z2C^?7hWm&eV(z#iY_srvQJZ7(y~n5%=qwD?w*hol%1sok33{xFSTe;j9#IDlOp?2@ zCFZy^)5LzOcUCId3)e=vQZ=>HN7p^OFxNtgQ<@8aD~&KIo(;L32TWgD@ko|3&bv{1 zOO#aNK{Pp`D%+@(qU2L^8K@>>$^1oJYl|H11c9nI$L`vWxMIG9@-mS&rDAi66X{$M zGoBK9_;`<&%DRLnr0J!oUk%+^Y@-V$)Kjc1wgZ=Dd<3GXCS|ABV=5jkAZAQ+DAK@f zV-#Ta^(Y4BMkoA<#idw@MrWp2^sThgh83{q^qyZW(v6WCTIY^-c@Eg|YsmgiZO4W+ zNw_Pp9hS=@NOY2%E5lTd-Y$(lg6oLTDS$bFWXMj_sTO82j&=KkB^w{P0Fx`ufL?^zl11O}=L? z7T*LI8Gr)7z{$bU#!A)7lFrQ0$m(Czmmrq}fFhgVn;HLqU*`#9GC&L{K{^E~EK(*2 zbkQ;)zS;fRBHdm^ILU+B)fWguMNR&GJpui&)+Ac@_wPJSRbjE4X|blTi;#JB6cKR{ zzK6!N_l#l2b9#4Fr}DFklguqMc=cJz)DW~yewWC$2u}1+YV~JeI%ueyRjn9Y+CKWU z%6=Mqk4{VH7LIIs2KE*}#30s3go-+1S&%DQt<`5h7#>hk-2W z?M(Qbg==+`xIKKr3QZDoB;pEwY^*8W7MO?rZwd!F`W1oAUgRliA(So7aKC7&IX9)R zt5qT3ZW{X-<}4u_7Jg3YK+N5-m#6?gNS;VSrv%MvkYOL!j2CRX$VpE?AD=S@%1jbU z_ib7`vG=rv=F_D(bW!3&^`@y#7*<3aHc3s9f~aRf4*R&FiC{uuT7v?6*L?z?R`ku! zAwL+%+u#>&a?10eJ%shmPkmB4wLb|@|FfX08fQ~``t29~Zy#d(&3|(CHntA`mJG+6;2Qmr$)hdIa#RSb^nD9h)HzTjzB(9r3V3Mh`Xe+Mm}n= zSzfAKN6BDUG3(;7p1nAkFH%xFG+Tm49OP@$zNXu4_i-JdHFI{>9n&jPMM{&OX0r=SvPAi3RzAz5Ax!U!N=X$ zP&m+Fv(e=poImV86g`6D=qK+NK(bx641W>g-C4(^hJxO#OEl6=Vl2HwD@+t{+UOQK zXGLrh949SNdV96~Tu0B6_2uX=sSdQ{2qz^+#dBEgq3Tx*!KZ)3hj7cERy z`xWPX(3RG8zx8JG4u6O4R{GHqvYy$%b-jiMH=oh5(NkSi+U6qiv14koj9r{(-Nl4s z`Lw|0wmDRz()NivfP3H~+fP24jSF7qi&O}CBUzYrrWe;rVG z8Xdi9$~hbC+LmXA#jl0x;xT8P1S7&zL9_LKABW9U7$FEQ8X1fpax^tLsczDN<`fl} z#WkAr1lMaiB%fMyT0dzF7%{M1&J`1!KeyCW4h)FQQD7nkz{)|E5&|L)B%SFdH#Op* z93)ahj(0_fjSCK4W=J?U9%dK98w1S6g3Py)=c)tDhWY3f&V$Za&{6IMI7Yl=+5PZy zpJ#7?i;6PPe~QDy&n4!1pW~wZREppHqbsmL4SM&Xqyxt&&|g^Gi=#P}zHhO_|2ZU@ z^A{dL(pvvUVBx+(9O}GwLu|ymTT<&H-b{>%Knu@ZwAs%RGiq}ahr2iu3o_3JE{RfB zMRHTiWvmqbtsrB)WU|y$k_8K9e-1LvD4$c4pTS`-E)v_$Q>$xaI~i<>nXhaP7gE zt5SULVmyfXFG9Jlc()q$U%!E8j{y71x&zVgRFK+TdB5<|pJjxAHhm?5W6>oU1-CLW z06v{11{h6-9uuD106=q9h-T(K(|Y1C1QqyQ@2_E8@W`G^+T9D7_?r)sT#Fjy2L2VM;~Z*$A73o(7h_>Ir|K z<*S8yeVW61{5saF{DAyX_qsJ?cWi0$EFq{)*MEZT?2DyHcGC%uF~5 z#0}=^3Pn-tm})n&c)P8R{1x%guMsrC?nfd`<_qe~m1zO?1524GFJ;!uDIS!jpM-@Y zAJZ=(AQs5D60nE88!E&6?CA0^QFu|SI68%6r!8kYy z5j|32oK!XzSdC)*YvCYjU1r5hUljVfqWU+9HfTnTYPZ0fJc^%7HC_bNWrB@-kR0#| z{19!&moste0==21J~4lcc9R>=F>-bof`YnB(yrdnQU_!+V)OV7e^KxYb@&F<0P=)~ zm6w$8pV!YP3_?8uNvYR=`misZeEIOc$@@8bqB0-3I#RIsW3R)s+PcpTpndq!{0=Pq zL|b)t1eK>vbDOJzIGlJ-6UZim`zicc=THSQ z(xOFFyeuBKT$lfN#=%A=yXD~IH53plU0TLV!+}RVp=EHQuTWqnvRfNS@wDw5m#23E zwCt`A<3~cx5^J!%FsZEuu>(68#TbZV%7+ow=VFabZeCgqQn2=q+k#hTWwF5_4~LS( zCuNz)@(V`@`KxT!TqozN60xDA?z`!aP>rGr1s{bYTkvs;loLi~0AJYgt5Lo~tK}5k zDaV5qr`K@G`bH$U-I1wG<-^{Q8&>I@6Y5x=3+5uLoGV0WAS3JJy5A%y4M{8okpY}q%JK0WTsDGcg9X$A#5r7I07Z`cI6;ogMklf zr71i#>9+ag`TV>;SwBK|&CjKQkZDW;Sf*KVQAn_)vSlU{>odDp>x2fSkISL<`@>!{ zI8Qwn=9oL3Fqs7J_?broeV;!)wDyNey39a;VhvSM{rHO?ZGh^LUy?)3C7V9NtRuCUieB+-Y=xv_JyQmcq|7^J z?5bxSzYF#2Z)uys6c5I=w-GG?4&-nZC-vjoTCaSnzeFxPe@y2E(Pw55xobT@>_iV* zH%Yo|4gMA)GGRkM;v4ej8(1%!=CGZSD#NrwdkGJ1M`EtJg#@Ln9qLd*s2+Y^0qq6ogjI7AJ zSkz+S4?XXWhImcNV`vA~cko|JER}Dnr-IaX z!*{nkC%w+i^odYlTT}y|hm*pDs0~Aj=rO1Z%08K5dJR8dM`JZ>6+wGxOR~XDi+-#e zkgiFSrX6J&Q-*d!huZSlhU#(K`nlq>-|#--X8Ur5dqT8x3#~XWZk0Dh#SK{QaP?8Z z_;-iBMdFj~ho0}UwZ3Nw&QjE}Hk#S9DRlzWfkBsmdJnt`T`Eh~QlGSOs|mJ8Yii;V z9$_iwP42xgci0(1%NFV;AC9ta*;327RA68A5_C04w9>_U{UakD=l$3s`F7ah|J+&p zTVBhLk+tZfhZ(r?_X^%_J0o#IDq^Pjs1vX;u&geN8?GR1^t*`cS$4@1bU7K6;R+$o z+42w=duYRqWwX`Jvl3&K6bEYT(_T{))Xs$e72OYR=E0F27^F&aw(zl0`xK@Pb^^@F z4n{9-d8``s7UcQo#kw!dO(x7LMP9oo-THAYeDOh0OC?x~boPZ@JhveNV=LMLV$Y9Z zE2Bq@uMg1@hsseTh+-ooN1@f9#~B@%wVGR6a#1U{!;J-4P1>z;O*Z?zB0X0t{qsQI z@n|{^st#_VsRz}PVEBZSwH0Fk6gIQk`)7j^`>1W^F{(#At*R|oeb3G{TZ>=d%FDie1Ni;+?;*qd?}Uuy zWDlGlJ&eze3SnCxvvxS9t-Q=2GH*AIaALKS8ujQJS2&(j^P|P5@&X;3F|ttYv1APm zD-p0wb={|SsKwg8hrCLub3hCK)OJe-aK60%Ov83>F(!p1s%!y9av#40BQU#kU>zBe z-c%qzx!E)#kouJU>WEE*qV!{laIS)=WC@#!C^LN0wWW>xkQ+zAKKw#SU3=#(qLc3t zCtddb*51ND4(DDz3iNNkZ6NzUqwwD{gkd!QUnmT`^8X0l_BbW6ZmcJvq#>O?Th>+C zqlowhVL^OE%@=QQMr%g@oZyv5F|TJcWeYz+SSR8O88W1+f5*d;NPCB~87Y$fg$GYg zu!t*4@Z(L+rP-FnVn4=q?{;i+wVn24#IyS*o|00ckvsQ+TI+> zU)=}$nI5rRZY>m;JTshn%z;+vKLb+JWR87%FWgtpbAH^iT8vwAv{G!Z8qoyKyipSO z-yoto)TGD-m$NA|;LC6!%su(-^<1mvJQ^mEP?`x-2V(w68$|7_{VK+aALen6&>>ZD z{gqUT7=|B}ajmERfjF-Y6>+m`NYUQIqzDN2L+EJ>{`;)$_r}}bLu@%kp=0OU97_K) zy#6hlq(}cHpTGoN`F99TZ#XHhlTq7Atde}gD_dnr9!by=d<_%Z;OZ}U?aA!MMR($( z4#D$~ULCjB2KCL61%gD>XD|W>H!UUZM?Vutk&{_O``6h1z{d}r#OFi|fM47p&6d#@ z9B?n;@T}3Qvq?J@N-g9?eq1*u;k#f|Fftp%SW|vU@e)K@Aj`bL8ALb)F3^rF4%}MC zl*V~nG(#k6^;k^T4h;x91mr)*j8QJ@T7 zsjRyE`EgL&vuyXNIv2@!;mlGtW7;t11z;YBwcrEFZuWEnUTyb1qr@qyTU}Y7e*pZA z(+Rw9@w&5zJRyqxgJkD?)*pEbTQBiCzB*X)>Z)xmERZc0ss`mCeMj;NQ}SwWp{mPs zd9XbHN%c6ActV$P6K$j*iBhQ_V3?#2WdxEmWIMKMfn*2e`XQAB!?Hz;e_L?tQyC&t zmy+cOs|Ge!J3=z^bwCaQo?C^o>Y49_#$?AMbVukZAw+ms4mmNwD~=A&ks|P?e?vD z_g2zwS*oPb2vo27vY@T&{QP9fGAso4B~E)XCI#juU|fm0h!lTIILPXpma`@r-Ko6C z#RX@@1U>NcxZhe_sVXV>WCC5HEU>wlGvtAf>9okDF4xw({IlmF&>q^na0re7bs$gg z)wbcn696NZj@WP~Zsj;drJ*~~8P(NCxX+9!JU&uNskp+B>6P8#!cazHtk@d}H+JH} zu$L*9X1}=8oe_ScxBqZvw#T}-yOdbRa!Tk z;l(5F=o4)OIxO0xIMnkVkY%+t=V5dScPX$!ZB3HM7V@GwDSmiVyOOq=Vx~b;^e_CJ~ z)7D7Q{#az&P^x|#VMg$}b6DHbJXw8OFtI~p^1f(`V6Hnz`PP zN7kBdXR_9m#Z|pdTIVMD0AxRB%!=_YtD@z=x#Hm7O@iKhPmVlN4-Z6@>;ht$LAp<94W2+ z)$uxXU?;R9w|Zr1_c1=WPY2X$q|WHo!&#||LauCeohCNb2D|4vs3H@$o6PHUXmr7X z-Kz$dx7TUKQu@r#AA#)UN;>#!v!^~3d&jtjkp3_4K8%fOT}FR+5bm~6kKe-Zv!!*m z)^yTFTp#gnrGV3hG;Wg*6S?W3e*18HRou$oN)0yN7Ez&0ob!5063(qFP^hlemdb=taOcOFS)ZWLtOw7V zar#D&N!HiR!AE}V)P?^$VxuW29nK~yNl)a4T;@Sy?F}VLaOzPmuil%4SS;ifUM~1 z_)S3fQqk3>$j7w@cY6UQZSU35;q48-3C;a59Jw)lH#gaz={Mm|UdNBRw4Q3xj%xow zF8-4!ae8kCc|t_T^K)&>^YNN>EQ=`B4JZ2h(9D->{qWtv+Qr4V&UCV2)p9JE=TUkA z!^q3j{ckR^lOg?Y;!SE+Q3b>DWBg@RL-3)n}SPEL+%^2^287NyH z;B%nv99ew3@TJh22~sB+jBWN{7IGNoNG${ApJ@9%K*B2(_pLOB;xcF?8plGR7W4cO z9-)De`d=3ZUcCy8VeBCq8+$=_B&zRYZ*-q=-m{`DT8?`698rNjFD8yMp%hd7Q}<*_ zd<%3hTO5+N;~uV(jC?RQZon^UyU|(KyCfsMh+BfeI_;G5ejXiD(-5m-mS{QKq#{Y^Z6*=^orfCyVz&5Y}ap z311c)T9se1ZU6-19|7gF7B4@1ZaFX7m#; zk$W?drubW~E^cu_KMvFmT<;GfR@n>eToJ(lrR#y?gfXUV+?XyIvC130y=a;ISGVhk zcD%1}aV3aF)vhmRiX>x9o;L#^5_F(_#c;F&%Hkw%_>`(^6dcpu{g7fTJ8n1ySn&gw zQIS#>*PM-O7G2y>PK7+^oN_;sIA?k!GmftdWE}4m$lX5_$YYarZiV!auG?v&f{Os> zrEvtAb2)-^1sz~&0uC_*KqnZ4fKm-?9hrx=hfE`Tq!H88U&!d@)4is)FE-XE3<7Xn z?xJ;vHn@S7?bygYxh@ait%o+!Coh`Q>Y8v}9BKz0#!3=hM*4n?fQzoA*Kq4`JlMao zr;8v>#-bKuXJXu&e(!a{(Hj{n0^jJS%I;rH;1T)H|K^?&COoQXVS~Sd*7k(4W|!uj z`9o78>h}|c>C%UCB5Xbz5j#ufB)yRNrbKec89bslz+D`13g`JScAd*$hQzkKGxV5* zavn}P)RWMU#BqC`;YN|Q-7K7a3|>W#RZ|6MHv>&x2)CwkqdD0SwhETS*wu`Hp^q9k z1XCNsl+~etNZ28t^82R$zME$Gv`(aFaQ@F7baufyPMos)pf=R})zBv1|9zBh+gYmb zE#Mlm`+7%2i|N5yOyW|pR?~Gi*MUpZBpMv%8B&aWovfjYJ>SY0KI1S9^>E;0Q35SJ^(4*F_zA(N414PLq;e8oj)aev3>TD*|+TuTP z<3>@WQ+mOqQ%>Hvt>OC|%_RR%dsiJ*#rE&%x_}@esfdJhHxifbK5$S#^3WwM2qKCg zAkxwxogyj1p+UM2-67I~()C8JSKwUU@2>UM`{%tStl5h*GoR0F&ic%ry}vbg+`MG! zYoghy+aqXzjRe?msN1s;eJ7JsYJDfue>6Gjb>xbOTyGdf90U#p@6Gq<=N~H6>Fh;6 zE8pbAbeK1FH@z9(wMRWPj+a8IyBFu{$lFcL9~8wzvlvD-SK&87fKDVMKcd+Bd8Xw` zWH_(ApeQh|h19n=`#YG-*)Fzg_o1|zRFLc7)2KGQNwKudL6nx`S=Sw)xlmU4~+KHHh57=sFU8+Z~66c=8= zW*Mvv*Uj;++rw0L*&3(yybKn5aHfws>T;qb#5a>NsP8UJLt0==<&Q@uKIVsDM zWZ<;qyYf*je$tfYTiQ(g*u=>*+RnbvKGzPhGBVV?MT4seTbY$H!|_Oar+TDQ8|$aGeyT3R47qTcWL;E zqBF!jnMNXPodvd06o%9xU`s+Kyxr}Mh5tqLIfi4B7>a-j^TM{2eU9k!Ykz&D1Q;Jy z@!e8MzRRMLT&b8O_7d2z1XO+;04{aPEAfLy#T%<34~bCij4m>mbOfCt{1Ylecp0XK z%zI$_6P@AoMA6@yho0HHaxP8E?n=l@iuq&B5}`NR0lv1+B2PJ06bvHBl^C zrj&Cr^iS4zB2Wf3wkve>qEI9!Jfkv}L-hyBCnFv^$+BUuxzOm6M8Op4J@}C7bj_s* zZjv=r6qeB9g2p0Jxc|(Y&Tr4f6hn2HH&XmsL(t-;yPuSifV+GkkC@zcgS##yx_dsd zTN5PU>)kVG1`&I(9F6(7!;Cg*#k^q($5JYp*-C0Sn$(wMpnu!jf>vJ<2U9a0jo4jX zJiH`}vbD3HjolE~2UZ`wW6D|E+K*slYvEVO!z}@Ta81DK;z$Giub*&l)gTW0z31hY z(2ga2yP|+|$K}@{ZfbBess4&0j_N?VFP)&e4+Dv{g-lHh>D7XCUlc}lZ2At-YTP*yRPp`v5X|1)H%~9GcFh{Zr`Sb7f20>`Qmn1}Y z4+cVzS`(zOCki0fM(QBp@mcmTwb-mxWS9bQ1ubS0#h{!>fnQB~dJ{Yb7S{U0o{I?gr7=*Cn{6F+wJ0ZogQg-%I7ayhFhbSH-+n!^6WOqk?>VjKSGe z3m{?b)?mtSap{MSFE*p8H`ad4vb=Q&uZsv@n=!ITZH``&->PLD<{BdLs&${uDKjDw zNSSzeUnHGo(!OGXyFSOxhXK^Zlt~TiGjXkwzB}YY@ zN1jr98n`G@HO@us^IbL~^pm^q)>PO8pv}(AOOa)5*f2+BAO)Gz;JvHQ+!Af*mUD@x zMY0C#bjG1vD*kv$?6Js@ZTp6otD~#Nczdca=~$w_X+YX=*I1o?mH) z)<|0}#=ZBAu`Px>JEM7k+k0qU$3D|te9UsWTqf6hCLrjM#r5Nm-g$13q+(?unneGd z$(QE4RdE3z&hH~;awD&`-iZkVzAsAk(B}JgS`Uz`ApBvm&=!j7@96~0d!^0(yYVZu*?!FIM<&<)z)}~|VM7#o zQb(R)Gj7e7Jm%Z4B@MxQcjSe#&sZkVcTx;aS z8jo>J${AmRHoARA76M$lg+8PD_b(TqNo4Ag=eoEZ5xkDwx1tl!>_hm*W~&SJu1x0` zX2>fs<@f{&q_nkkYWVn&jMcf`%o&O|lBBI=QfQ9Q^CEac?j%A(_zgW?A||zWO!2!> zwFr?z1m(nm9XT&>qMBsyBZB?~5c{iCn8WTDE<&o3GzX3_E$6)xnElL44}N>SLWSH& z@+=tcDzTexp7319gty0QWA{d~xhG%~vXwkxu>jRlP37YU!|~#(AmV&R=sYsEH5!6e z#rJ*W6)vLmYDb%Km3Je!{9BAO12C=inysvqrntZzJ*_B7bRb46=HPHJ@#OGueIYU7 z{E{Sf~~{Iof;BbuvZfxxdHiM_nMVpUvyD5fpfIl~bM@m|Yo&;^$M0)$?`twsYqtypC}{S=u7*e0HOJE= z5!E+Tzb7C<4opAZH0=w{t}g64xzu!dIA58|0$6?J0lUDpzgPY)CWh+gLa*a`I&A9L zuo0#XQla7zf~iT~Dw5M2iX(2L$j=(m@MU!x4W5@Jjw0qXJYNg-@+yqq+Md(h#tC8a zRclW#Z@y*5#j_&6v9-se?H?G5CB=`e@{;rQkWE73fOSvZ8nW6sumxWbrzVm3!@2JFkhr2s{sHW`6BV#F+i!cA&h4{cLB| za>!ugfv~=Y;HRr6kD34x*zrC*8t-SF?oV`~!6@GYzQuoS?`n*z>FHT_Cr* zVy0UEG`eS_zU*nUNwseVRUBimnryl5%jU;XP>|}~Qe}({=q)YourwpH4(&B-%4>`< zQT;}ZQBp5ak1K-(DTZYUHX6nR-o<($-Ct(BfkeE8+;F4j&BlUf;5T3VndZl@WcKZw z1YgN)^V!NnuILj)%|Yqxm$`GBp8AkmRVmLMg_5q?-Ph(1wR#vuZ>_I=3)3`yssaB> z{ukd8*K;vQw8?7&%tJ!!+?|W%y|us~$J| zLoiydMe*LySBH>XGc<3&CvdP$+Sg#58y{>cFhUBh za&_INq`m2u&hPe7@S`FFp{I{4rBAS?_u?dO7Z5O?{eei{=CGQ`4aEeCPdA3_T$<(8 z*dE}6W%gYY^;54~x6*+`^UA2pCZ8amIC_5lBKLI4sHS-t8)MZ_WM!;PojRCsEqK$B zg}i*)QswoZMJ1j^yvY8sxTF*(7Bn5hqHB_o6)ATloLCMs?nv|%L{$TK|3CFRO$!mR z0+d||Xc!-16cI4Jokr8L8reQPmG7;$gTL$50AJnz!=Kd9TID)6z-dK$d_{geWn8oo z66oU_MoS%NRteiigymU|)J;{FE$hE(h%JLwtsiYx*|Nq`86Jr7Qgq{< zOi6sQ(|f-Ry24lVwXOGRvEysrx_P>X*(ziBPG|3i(+@Y5*Pe_h;h%vOm_?QsK>tKe-Ohw5a?CJJQe=hUB}z#DZlT1{ap9k0#vB$A&+cZ5%9>xF$w8R?K$MW zz8mn)T1iAs=2X$_jW6DD)N^URUTL|#loA0gFpp8?#I@W#A~-zWFT~O`${NY#OS(%C zH*&NeCV2<_#HY0@7i6Z(&+aB6|>%>;$nGLJNTmV9r#8`907 zK_G(h89K8hbfl%hKfD)Eu{qVk_TuV&TM4n_p<#CQd9^>RM3Wt5->8)}6Uo=Tz)g@0 z3h8>Xg}S1v5|W7Dtz|dvcgy%K%ASWkrd5A^#Ez7F2;KeTBon(H5WV}p9OZH+GMO!+ z93)_q`HSNeYJvS9J0sgt`%eQ=RnezoZC%9C@6%)J_3c((rM+E1k)-Yx#9HOZt4ln2 zXluvSqn80$ex4ezuuu&lV=qWUQ3)ANnYe=B67Iz|o)#XEr+Q=Okmh5t^O5vi(R^{z z7GW2D${k`)j1#~^{?7+Y?ixaqC?Ivt(||aPGpYZTg7cT!QGTl(P9}HK*tBs z!7BBCMBA<8g_(U>YNw>*?Oj&C@wkk2wzz#pJwGSzSc$@Ek?> zAJqXz=m7EE1d8&VrF{KWneSKP*YC1?=SJGVuJn^v z;eI5QmQ^rn16!C+R((!qbo@f+e{!H&0evp1%U+aWmXu|oLVN>9SP9MZ;K%~`Wn)t< z%e9`G8ziOU3)1cljdGY9D+D=jC3x5ZQf>2wJBXRI!r3C*Wcb1KMhT9EYgTvqA2>k= z_No!b!?JLFygi6XRgQ$F1$lx}Ts}~_U=q(uK6fU%j+IexXv`3lUFIgnK80F8nVt0# z^P}ZfpV#=Mo^PGVJGcK$1*dV?zg2KS1+_2h*znM%4xX;#eYQ~-@?%njGCk!DVR6M- zEXEl%7K2=S?Cz66c-J+K1DK#Br;GC=XsCn`mEVh*(hsS64+{GRRJGo zEW}`tr7DJ?Q&Z0GP9m6)rqElK`mrTocF-?>`|Q0AT5GKw$+yffCog9SX#R3g=LwG4 z0|loqICJ9FNU(7NPs2Vo*B2slEQl$US>0=OV$)2B+b$zh&?rWPnQae{eKFL}CWa;FrI2>)ii(`sXFD;8Kyl zNO1ARJ?9CIfZ52o*Z9B#;a_2L24w<5z$WnPg(mRuiv_38zz7IXpxo&x{J$wb z4bKE$5crJC6mS=uGF>PS43CGew0eeDBKnCxUw0KA4qw9Z49*3VDLcLVxq_DPX!tsV zXXp;{bDjTLbr2p6Ul8pKZFuV^8ooRlJOzAJn=^_j+Mg8XYuv!YFIJ^E0|U}wM7XR* z1w6sUvIyr1D45|1&Wa?!vs^3+aGu4K1)k+}t_)9bG1u`tK@T@P!D%-xWx1F-cb=u0 z7oO#GvIgIci>X}a3EcSM2~N9lDa*xlr1LBX0`M%SlLh#0T+HP;PjCXPRL_>7KNg^; z-MEzHVg}237G)uLmeYq7d^axUe4HoH118CrbmLMM_|%6po_LX;3crvD0pAPw$m27- zp2XjG$genLcrd*4@(c{t_z8w{Wx_MSyXVdrK7f8QTyWOG<1e~L&d>-5u3!X&UuN6C zoh9(_i+0}g@U{Er;6JUt@aT)Cv-9X-y>sX@LmE86MH9$*0yTs461dDD0?&1EA$^_; zY;-x-*|Hj*>*8|kJeQuycrGkC6xbNcgKCZ?Bjoz3v!xh_r&&vRLsU(R(lO;nLb V1!@Z+Alw3eb%6IkI9UGp^k3s9oD2W} diff --git a/tests/unit_tests/test_format_docx_reader.py b/tests/unit_tests/test_format_docx_reader.py index c2cef74e..65601a8a 100644 --- a/tests/unit_tests/test_format_docx_reader.py +++ b/tests/unit_tests/test_format_docx_reader.py @@ -284,12 +284,15 @@ def test_docx_metadata_broken_file(self) -> None: path = os.path.abspath(path) self.assertDictEqual({"broken_docx": True}, extractor._get_docx_fields(path)) - @unittest.skip("For issues") def test_annotations(self) -> None: docx_reader = DocxReader(config=get_config()) path = self._get_path("size1.docx") - # test 'pt' ending in size - document = docx_reader.read(path) # TODO fix this and check font size value + # test 'pt' ending in size and check font size value + document = docx_reader.read(path) + for i in range(len(document.lines)): + for annotation in document.lines[i].annotations: + if annotation.name == SizeAnnotation.name: + self.assertEqual(12.0, float(annotation.value)) # test that different annotations of one type don't overlap path = self._get_path("size2.docx") diff --git a/tests/unit_tests/test_misc_annotations.py b/tests/unit_tests/test_misc_annotations.py index f9b76470..088b98dc 100644 --- a/tests/unit_tests/test_misc_annotations.py +++ b/tests/unit_tests/test_misc_annotations.py @@ -173,6 +173,34 @@ def test_merge_1000_no_intersection(self) -> None: result = self.merge(annotations, text) self.assertSetEqual({(a.start, a.end, a.name, a.value) for a in annotations}, result) + def test_merge_space(self) -> None: + annotations = [ + Annotation(start=0, end=6, name="size", value="12.0"), + Annotation(start=7, end=11, name="size", value="12.0"), + Annotation(start=6, end=7, name="size", value="1"), + Annotation(start=6, end=7, name="bold", value="True") + ] + text = "normal text" + result = self.merge(annotations, text) + self.assertEqual(2, len(result)) + self.assertIn((0, 11, "size", "12.0"), result) + self.assertIn((6, 7, "bold", "True"), result) + + def test_merge_only_spaces(self) -> None: + annotations = [ + Annotation(start=0, end=1, name="size", value="12.0"), + Annotation(start=0, end=1, name="bold", value="True"), + Annotation(start=1, end=2, name="italic", value="True"), + Annotation(start=2, end=3, name="bold", value="False"), + Annotation(start=3, end=4, name="size", value="1"), + Annotation(start=4, end=5, name="size", value="5") + ] + text = " \t \t\n" + result = self.merge(annotations, text) + self.assertEqual(6, len(result)) + actual_result = {(ann.start, ann.end, ann.name, ann.value) for ann in annotations} + self.assertSetEqual(actual_result, result) + class TestAbstractStructureExtractor(unittest.TestCase): def test_annotation_extractor_left(self) -> None: