From 173792c0d102356ecc4af27e744112da92137b95 Mon Sep 17 00:00:00 2001 From: Hanif Yuli Abdillah P Date: Thu, 21 Sep 2023 13:18:32 +0700 Subject: [PATCH] bug fix: fix overlaping entity --- dist/lexifuzz_ner-0.0.3-py3-none-any.whl | Bin 5557 -> 0 bytes dist/lexifuzz_ner-0.0.3.tar.gz | Bin 5185 -> 0 bytes dist/lexifuzz_ner-0.0.4-py3-none-any.whl | Bin 0 -> 5553 bytes dist/lexifuzz_ner-0.0.4.tar.gz | Bin 0 -> 5184 bytes pyproject.toml | 2 +- src/lexifuzz_ner/ner.py | 6 +----- 6 files changed, 2 insertions(+), 6 deletions(-) delete mode 100644 dist/lexifuzz_ner-0.0.3-py3-none-any.whl delete mode 100644 dist/lexifuzz_ner-0.0.3.tar.gz create mode 100644 dist/lexifuzz_ner-0.0.4-py3-none-any.whl create mode 100644 dist/lexifuzz_ner-0.0.4.tar.gz diff --git a/dist/lexifuzz_ner-0.0.3-py3-none-any.whl b/dist/lexifuzz_ner-0.0.3-py3-none-any.whl deleted file mode 100644 index a9c74a1e6bf26cad96c2a4da946cf3db22ee6d00..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 5557 zcmai&bySq?*2ag9Aq9p|VhB-*K^g-c%3-JhWB}=AKzaZHX$FvP6zPx>1f-<9yM&>U zZb>QmIOnYMo$vKqXT8s}_H+NWe`~LM?fvX)T?hp{d|ChiKztM6n*acC|M>*~7;etj zW^YjD&hG98wq}kH0|S&T3TQ^y9J5)kaf9Gw0sv_K%)80GNvYS;j+_>y z@}i8oZmOY7D5<15l`0Je((>UhXAnivx(Sf^Cs=;edv^opmMi+$ zBBw2EMeu8xnr_;q)ctX!la|xo#_^#AnOAIg0LvV64vu>K<8~$vvRA-f+?*1rUEzJ> zN@I_F$12yjpe%ue_!;vT`4oLo54g?C;6UUKqlGf}UAiZrTfs&@6X?SQfH4>PY>@V{ zg3sSvARKS)>7IJC8*M9NISQG=k=8qq=&Sd`K2aQ1hvZsEC~VYF3ho@-`O z*!im8EuI?2Vs#tesD|i@uHe3OH~Pj~Adv`3K5`Rq=8uaW8>eknoosj`1WR z2Vsnbql_QxTE$ugizk&AjAH~n3Yx78#FhAeT54n~LX{gkMZVzRK|~f*7l1vpJ-W*b zFtnLRDs=44{m(<8p(Dnd{R?%m$8y>ZROcwDWP_7y9V9e6^eX5q$Cdo6#+85C^0cgNPP zwmT)D&@4V{Kbd#$4g02{3|J6Io7Zt`E!>%TZ;Q(;?iEIw5PcVcx!r4!CnG5`tdYT)>{|he@%zj zG#@?zdn6m96oV;h8M+Z7p7_ttja;NHYYK=ydE;?TI*B(cQO+L0E&JmOc<+@~*yC}f zytTS$2KtZ>lKb(}!VOy#`2?vxJRIFl-|{KQsdGtTfj5y7xf)8Sd*D}73g0PKtx1Ki z%b$fybBA7@DlIOB%BnU!Q-A?vu&T1}RGk%Lq~i8j%(8?K=jv?Vm(~*rA6Z;Hc$eJol?M?W@D*hr$bKJFt)_DE|=QTA1nf;=OT*gp)c2Y31lCh@OV7$E zO*|VV`a%@+{CJSW;~@$3FNrL!nJceFU99t1rCSrNM6spc`NV|7)z@0`TjIw)!7my! zy_ekET8}yMw#q{A&}JuE#uR6P2VTKiEHZVr-Fq`bmM zSs2u5F6Kfpz-`3fN{VUl3#w$LZWjYRu1>Z0r=pF%oE&Jf@GRJhtnIg=__g|K*pdlS z5T!ZRZWiUS9wy$=bUm{oWNxj;`0lf`&Lz(bZ~&X>?r-7QY#P3wK$sZN%&sg}tN5d1 zY)9qeTiSzi0{ z%)5(!EZbO`hZh8_v(HtG*XC^M>0OQy={zmgd+%Rw3#$C_yTCI-aM98CfRS3M-)JxO zohOc7l5lz3Dg!Mpr5PyccL!qc`%kUjHMLlQ5{u8OY$B3aezs=Jr^Stqj9jUn)t%$3 zXQI@=6Q-|pLPU2nR+=22mt;1P&MgLzmaXGKk+K2DJB$zQ&4w0g)?1z$ z3_stkt@277ra{bMCyY9uwpQ-Sp46kZOe}8&mV15QAM$RWmsyRq4jS0ykVx-*9T0qu z%eI|6MV!+(80Dfxw{_+yY5iE1v7V4RO{V zRcUg%?p`r@**XY0T{B>k4e6$O zr%jdXgMwD6QyV3{{j*t=h=SY%nj(rA18ai(%z;l+!ks!I4DTGdk93NOD;=*p-1^O%Eg#@u!P8XK_p3xvWSPR<1%kwz#d%0WPon zFz9|%!QDJShHduW6Zzcx*!ZeCue^rEFP_miY42$@)BN;QYj#SSqBJM?eJSH=f^JUs5CbN50fy3NxWc4mzA! zM>=E1^^q>7|J>4hi)(OaChOSpbk%Rb{_M$a{mQvJ!$RJkjvQL5tQcZr3=vv5C<4TN^~jG*fBAttOt7fmTt29h;*FEmg3fPx`j&#P*afwcVph zA(_u|oO{+KkFm2I^r~HwD+hAq1=G_oh3WUt_R1gc>~vHW+zUuK+CVP}W#!X$!^pVE z!*X7_e|VD<#95)KFaH7_n<$oQyl4s!rjL8j#t_cViH5%&3P^tNlK=BTY=;r1rBa0z zgTy;8?4e`s$nSBA9^N=YaRM_AB+c${ER|PXO-JpO)2Is%EDyr9H!^G5C9m8|X1%7x zdoWH`>6}(+c`{bq`SZD7<|8&rc#9abpl`3=zLoT-i#e!>L*m=d-yP`99LOu<6v}$)=Km7okFM=OQN3B~d#**cUiro%3;Ew<<)aVY@ zLG-+>t@GX@+j$#{KHRNJuqmfoyDO)Oh?vFI9;t12no$n3OLVXH_4>BhI{A`?h)+zp zr}^nM_VSUG=&Flij=FR>%nnv6KAzrz>-ZV=WCLNbJL)S!pn-MM;c6&L?y6iNAO1um z5qJ{xL>218z{A(_!MtR7HSgqC%V>L?Kpb4K$O}1AVM!3pNLHfP<+f;MN=UtWhh$5h zQy#<5T_^cE{AU92sU?$2=Op7N$2g`*gDfww+?{-c42L1_0le)SKIL6X1vu&wviUpL zbfFA`ZQ^t6u}CU1vww5ZMM0d=Wfey$R?aj&iEqO;KUSr@Rg1cFpmw$=n)09Pdd7>;$FJc$LfiZz z3B92p|5q!;7@i38`M^?rX)aS=J;(iIV@)nbk{4QD6j9a0j^WHw%T>NIZ~K)5YXc0$ zeYzin#oMG~UMD_kuEKZGsA-uzAkpB;#ZSq!DS6KLi@ga(d%e3KWme=;%Z4*{`aUmc zU|XoxiKZ*FuOGiH+a!bL43@~w%5=GGiqR?U#*2quW`yF$+ABt+#nmnKC~^&jmQQGb zQXPxq+#*_hGuZl*Km&5BlD8&1bs1fAXU6iFwP`d5tT$$)#|IyOll!9UqV{pKLsNVn zZofjMof7|U`F?G!JX;Ie0pUghHhbbWldQ|8A`ca8m%Ob7+r!y z94Ej2G)3mFCRy#(0wjKo&KncbH%%UpK=Y@O#%+>;`D&`?`KlIHv=bq0dga<*4Ib~- zw%R-VB=!HU*!=JWL^aMG5ug67MbsR^ydtMCnJcX9X2MP0G9StMtc5#4lncD@&AKk! z$BpEk{n>K-hL|k2Ezr@zP%MzcZ z9v-NS8=jlnb)nsB+z2ssy|7_FNh<1`5+N%F;HHxLh;V@nm~X9nu@l>IBH1&iH=g{Y zMR(tRQg1EgGvPF&9~!wj;E{51F>xs1a&l5@r(Sxy8(npKXS>azoVEALvP!J!3cA;? zye3Z!3_JF@rd*}Ly`t>9sJDRb9)qGzcQpk;^9dFGJ*2aB&kOpA&NRD(FB{?{^)_&G z`1ew?Qaoz%BJ1ir1{ii(ohP)|$oYQ}{dJk3@-<>KZkCAFO|bvnGSP&|$|{ogKuLYr zs0fDYbN6v+WJ8;PGPS{Z745nBAKt0b&q;pYOMy^r_Z9SH-}kBkDwPRvf_pQk|afEalngjn91ZfB{TTr4`Ra~fGijXKxC8qSl~ zWc8i+6DSd8rP=8}5$&A!R5+~5!UzV;6Jdzg94Nm4FWoqd4t97V?yH#)InH*9K}DKA zn>+d-Z#>vxYrTLMFZeKOE6zQ}o9|s@tnH%M#M4-7dbVtNZ(f`bL|nVH{xz;!amUh5V0}es~2bUJ_zk7N&*Zt3D$M+w{ ze{=Z$r2Ogd{fhwr?D&fRN%@!G_b1>_asD4b!;NwHf6Mr9QvFZjpEBv+#BAR{;NKAc zEt>vB{y8lFL5@=Y4f3C(^9S$$_4)tua4c{A4ewuFk5C}E5rO~!;mzBBGb|(ja908U E15)(cCjbBd diff --git a/dist/lexifuzz_ner-0.0.3.tar.gz b/dist/lexifuzz_ner-0.0.3.tar.gz deleted file mode 100644 index 1f91da0631c96acf27eb2f337ffd90b664977631..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 5185 zcmV-H6u#>piwFn+00002|7>M=X=ZhLdS7m3axE|}FfKDLbYXG;?L2F5+qkm*Y#{%E z#|2L8)>bS(@-Q}w<0ejHY+?say1hjZcxj2YxsfFuJ(9RZ{`;LFsh49n$!?q5yA@G1 zGC3U142Po`azt%Q+xpKLy*p;qWZtXqQfjiS`&+9u8p$)(>$SSBzan?9UZ8}2Ks^}# z)z4Y#J7ga4IosE__nOT;eW$Uf>CJj`w^4YNmw$e-*d3pQpFfXm=2dH2O>1nO^bWfH z^X|7@|2wVLy7dohU4NxFn>#x@+jSjnfnKlITCYegU;p2|oc1or2{)MSvyH+=;lObh z9-qzvQZmYltqX3}64Dk5QB)Fv>189>_-**g7 z0U+oDKqM8y zZNmypq#|mz_?)XDu)7e2zfl0Rp$|+TsTDGJOg_QCOmOCgV~hK<3Nbn29)|%``dBH% zT0u^>9FO?S0>S_V2hs(*34H_6L z-o^3Y!vz6|VW)rbi42ZNr~irk*6SZuNcVrvhTZdXG8k?YdZ%Y6y)IPt`UfW;4txFg zmXU|@(O zl7qq7r(y5?@dY^^oE&zc@?95b>%2SZ%CUg2gOg70v_cL$r=9m*(Q5!`fIWnh1d@-( zU97_qJMiz|qBrOxCkKQ6#Sng0fYISa-1lSeyjvliVecGeax@&A0*@$D=rRykpku!) z(V)CZW?BFQe}6dd#u&+Aw{rr>&at!LGJ@9fSLy%DqWuv5fAgXLU(fyj7xDjhQ~rOe zw%2UbwVmDFon5`22LL~(|3B<@4o|z<+o{1NdJIJ9dJakeQYJ1nu7NfP6ns9lr3cMT2ra;= z7Ywnxz@y+t+X4?Z1nA#RJaEKA8+`oG2tp4#0wghMKs9p0ZkR`>QA9Hzb6bWH;QgAh zysBd%LmXrxq&8Pl160P$=Kl?uUm+QeoXG@Y0RmbYu&Ohuf-7C6UO=c}gh1s2y>L(R z_!$u`5e=<3)ZZHgRjN&HDvz{xyLYs^Ksg$-#4czu{j9Q&0*qhTqtN95Ez;`BKYt4XP7utV#_{&G?%)?W{DzfLr`C z^W`MD9-Bdk#vu|)<_KpjE5r&sAvCC^oZnke7XpFqR7AM3to_iKLC6}#xQGP$I^Gl% z{I?fxrkj2K`M@0>2=13%DJ^{8%V&#agwg@bh< z=rCX|WbC(aovBCN88<+6nCCej>L3J{E(FAYSjb3dsT2(M2Ba!*y+O=StE40O_KD|= z!E)Qo_m>wyObN#Hh;|{@*If|8_#Ojk`O!Xt)XxUW<^a$+b2U#7^oC+6?c*kZC%9+S z=P)6L>co`YaAo|=qFG4IC3NybSaQI%!6)2M%Mn?pu4{3r6+mCIOccu#w1Z>N+W;ry z8W%OJFq680TD6fzQqQA8eSu6V|7iJr@L_`S={3y0jdJLL9RsH1pf{Y7_rgGnHFaP) zHiKp4IzA5^Z?RDzQ2zMi5BUJ5oEQS+sm5IcfD{tY#q{t?=na#k`;veEo4DMCSr$Er zmI6n9GK_o>3-pY;f{&Gh_%r#K#+e@P0eUdMu>6_8sN|?1TqPOtVn{nu8q&?@xC25a z+%`v29dI%bKBX1=SF^ z_;<{_Kfx4T)6J_&3fCCylPS6v#|AA{NO4T$Us)%2PS5?e6J2CT&%j+0v`c=6GD)8?(J?t3HAHNqxInqmOVZXZ%rSSE%jRINz&3 zjq!RtVSM*K+a>igqxGn7f~03?@a8gt5p;FX-!fODx&&NTU!I~#_vBKJdG+Nf({cZR zw0MEWi{l@;|C68p{t)NC+dHk@y_VM8Y3Q|`eEa7o-T!eHV14i)Ndw>3`u_VrdL5p{ z{!go+Z*T9w`fuvBM!x^^66JCna?7mx3qN3U@CQ}=fqvX&(REk=VKu(Df8EzXIcn%K z${u5g8sX)P?xF2z}uPb$Jmzxl7zs&?E%(#b6P;q&;qcyMeF@{dZ^)ItW!X z3}z513OZIEL6cviSfk$f&lF-Q3j>f}kwTOChR5ANp;Q|UQJjlA93~4u!873^o*1M5 zJC3%wXxdRvwrqSp=N1I3!J>%bqNe01-Zd>ILae%SWN7|%zoG3wlVRa5!h|!)ntZVr z&pZr~=OX4ki8fc-?P!Yxo*G5vr4-d^Jys6Rd!*ySYP|u9;dkmVKL#JT%%g(Sb1}!V zAH+2#b7B5~eFVK)o+~YCHb6Ge`j8U{b5S62&g!TeFuhiAjTH@TSJR6taNbmx@a7gc zKmW_m|9+73zvfk&r5 zj`&?}tJt`x4zJ`fYiNsJczWuKLtK@BKrBA?)DbS8ZHYEKcxQf)oQ|t)fdvuA9C!*# ziJ(cONdpc5hDuDf5a+Tu?nj2WY>|q9iQS0!As&&-ogBr>b8RQ|kSjh9ab`T`&m2o8 zmUt`(JseU(bBRFy_>}|-REy61&H>8kFUwWj2YKrGlt?8hKtAlVsi$@@g)%E0m0OB zUmVoh29vW~A%5VME2MaVzkFg)8}rhkvQ)-&1CW|+EsE=Kq)#L988M|&nj{4Tq!fZg z0Wa>aaFy?lqD<`y!K~3a3&1|E9W7Qp{MZkVN}bgByqnbB_Zf?IWj>r7|3EH-{J;{w1in; zv2j66Ym7@E5;!tji`W&@hPds9fuLVWOJsWZsdH{J26phX3OR@KWUmr_aZi~H)B@%k zOeAFyFsq<4l9XN`?TyOa5$QUaY?-uW6UapFlOK9&MDg(nu^r;vFwdf{(o~KqVahK)91+mNTrg^hRHdb~q-(}pg3Iz6t-$S!<0=DMKm|d# zH@L9HI|@X~L>7i>9H}ZKj0L55r~{9oC8xw@xAA-L5j~UnQ!=M5Q7^;?NYWy>6gews za#V^H2yaPH@j@1f1kgu9UaAy>t4oz(a23JHV~ht-n0j!Pn4sv5rCz8iOTv#u6)g$D zpzE<4E|c~mDJi%J9iX9>RgyIRiZsOX6wI0A7s)_ejZ9~1Dw$r~UY?k8#-ybMsln2K zzB2n}k+HMlEg|v@1)FQA7aU|q)2L~dBtL?%a;y+3beYo@Ts2t>fbh%O4O5dQJwkeh z=|d0A2{tOzEq_XD9c6Tk@QA0B(x=29Tw}1g$oezo_lpuqlEWvyck@KX22Q*1^yDGZ zf`B+g3Zrn3!0rPKh1aNjJ*dPF(w_A#FBY}(by;q z9Q`pPm)Oy7ZRk`2Sv0_<%*PQ6%?LdY>PONV?~~fYb)0ldW?!jrEa=oiRVz>}V=3p6 zTp%EAJcha~X4-JI$d*a*>dTie@rs9aet&WEo3L4NRT|(ev6!6+Q1My^TUTM8UWqDDo?4>U2JGEX0S-G_s&*$!AFn{J>8Y{&Ct_m6-zR40mcd zkzPqJPUS-KIE+zA{Zd;+7PZPX$Kg7r{1zMD{t!^eZ@tmzqR>6`u_N~%86RUN|J=C| zuY;6Nn-GIv$;m^w!<8#ntyIfvvVu{X)=D<5KCOjGxWAt~ms5^7ET0ua zOXo22o{9Ye`O=g@OY9s2K_DR`z%O-t>HDQ5mf%_EBrNf>%2AMHeAQh*fHh9T=C`fxW;lIeuVp~{eu}KcrV?}~} zQThb{m{qeCkWp?wB3)V)msS00RTt5?3Vt=XkQsolU`C=tbrL+q3R+nM3K9aqcp@G` zt^abM`~-Whwp_jjwY>du_i(47yylg%w28P3v4%hyR4LQ39whY{0|IA5;59=DmvXr+ zKPCOC&VSE-dtdGKj|Sf~|6{w^T$lf`qt}y3i2`;O*7-kEzxw!@Tb{S(OQ17)DvxlEgL< znahQPI0L&)o|a3U>GF6!bq4$incdHYU*h2@J$9enWAE8L^xE^g=*c7*JsR5+4 zzb_KV%ge1%3IYJgl=E`oFiG{V9yk#E*+KOJBGk6fw=LX1HJw|#Q1}iPT@=3GH5Y}< zB^UV}7eDkJ7llus#3+`ov&f)+>@tfOc>Prt%Qsmh7g>Du9*ZT+>K2Rm5{u*xi)US7 zQCM+-#W%da;+fZ1$dBU-fBo)?XJ1{h>gI}A(#f?I$*mRg(u&o0R%G{e{@#rhk6u{u zr28tqeqF_1xvk=9JU;ENilRQ#}aR1^|U#VW`* vRq~RTyyPV>dC5y&@{*UlSwvc?Cu!Gg=+mLNld2A9DGcMGnAy9bv90)*i1VUQt^V1dCMf(8f_Al84LsafsG82tLZ^vl;mj`Nbp{=|J+q@1Z< zg35I6cXcIwWSW^=iC=~4=ft?M8fP1-%aiLd4g{%!&!a<<9gk}zJuP?UCDxw`LiZK* z1~ppbYBO2c$_DU~Wf}s<-{S2d_X(Y@g4q+@>7VM%D=x|m9HZnMEes1t-*0pRz*`PJ zDa~hrcK#xGKMZEi-B9HgA_E^ox5#HW%ap^YOH=Fr0 z^xs0Rjj#K=%_L*Qa<8yMi<>8yETw$D_4FyCirIHdUa+Bl!gs5K%s=7RLJvetEHlH%~ zH_q-2zUW%h?WIr0s|O75nQ2O{=VJsuTCUV{LxZ8==rxvol8kW|L}M=?Z}L<;A6F`= zTY$hgPxgV}#ddF8*7CLjFCnSe!G=?|?FqHXyZn?fa=c3ClmeNZcMG1UtZpU>l?lY8 zkXW2w!Ll)pKTO8dukdVAh>t|m{1vwSBMbPjwJcpzxjIS>2SG<+!}#{@x6%c_#gD#A zE7H6&`6}|^0>WJA zd9p@*g)*t>yG?O*7EqN&k(*T55SJ#24#OU<2q>`zOdr*(K`HiIivfuh5scqvR-Pth zk4hy~yh2W*G3pyy%fNn3-E}43aZD<2l=92VfkY4aZh?o>2QM--er>vNWB)qOW_bHx z@l&6;-vwR9k@4BpA9uE_i8y<4nY(TolZY%9zi`oi~YbEf2l9x+x)TRe=O|lpG-r2q=iN zcWh-O6N9pzEQ?}}5^dJ^Gl7~MVuu22x-0zZqq5Tnwgo>w(MuGndAg=KgNV{eFvays z0@Zwn+LWvl!evj(gsQDql2Kq(@pM&Om}Mn&SZws>W-CdYa3mJYWquROtQ6{2@&-OqUky^66K#6^(X<(f zvvj=3v?v3Duze9M0&sC=I1`S)l)BTJzeyluHAh6W8q7|yb>ai~X{)uH-#^(=y^iRA z9CMq9IQ`z8qklX`%mzq0(NVJ00(nZk#$8T!I-`V!;2nxU?huS)3vEhJm+dAgZ=Jfn zj+;C;|85D>beL}yp6kH)#*Qf{8Wm3$8oaFiJEAentsxixqK49bTX>-ri7W|^eir=l z<-s{T-?Bx0KH3(k&D0cr=wgHy5O6XfPYIH#w!OuGf@r|ovI0VkJV@m3# z*qf}NJ8CbM=SYW~D6s|{P$~DW#v5j5U~g~wnoXeH0-&-Xn^f~^wz9HhQY?7D{nv+a3kYwDb`~Rhmkp{2)rH0y1PT5w$5BG@NMsobHJ=Wmeome_;t>OA`-{EpY`+*WRdoUTH`Rb!~gB`rci)A6XP!9l9uM zSK^NOds`y^sd4r^)EY#y(cayI-(U$7zlP=9yP6RA?dSvN+qX0B0tx1bdh|Yv%h8iqb(@-&3Cg!(*XJkP zlDhyFICdAza^mtY&s2eNS(0|fw-Iku4e4Q?gxz(~)G=8*;Kcr5YWSz1^#1JWknmFy zjRw;*$O^)0UEwJ3BMyI;tyr;K)>p=4?>zP5+x|lnN&t};xVbNf@ z)lu+?&X556J*nO?3YNxOxqRGM5k@Ebu~%W>h)#X_cmg9r`CK9Dv<*(1Ua(lB^<3(` z%v0Hq;8~HCYTxG{>4TPT$g_H=oMr`(K(iB}=>A!G}R$r z2}r-!=ku~E=pfq#ep}%&l)-u+j!})bEcA8W_CjNBTeI3JmjIY^NN_%ui_*V@=@vKfd`a!%X zp#uOce`pYHULIZ^ejaO>rx!QO#nu(9q9iM)rY&cpW#*dAOB9mW<14&bo~j|_?HcIW z@puNnESt5N6pE9Fols*q<=QU#u3@1>G3n^=@+d(?=WUjJq8e7EIoaIsg0)&}Ow1N> zUb{p{Lx;ZA?7Ee4k-m@>koc1hWvR_D8^?b*hMn(vR-w^TsLs>FG zr!l-=sA*@Av4ifVvpip3K{Td71X;ojoqx{3ZG9fUTBC-j9gQ|KV_ie$Sio65d*>){ zzBA%TplVpu@~#P9*Gw0lN)?uMB;G{5vM|?OlAEt0%fgsZOSP5u0@;Y=Dd>w64z6_T z#D&?#Fb+6i+bv`z{62rT|BN1M{f5X$cCmlXYC_Z9&{0vTZ5AhoZk$q!vaYS??yM1* z^b3!Bdga>&+~eTjdW8!UF5RJTVgT7dJUmB0dA0Xclqfzx}s2k^k-lI$Q%>SsL8k6N?A(Zb+N zA}e1tAoWi`K3@Ke4t~wO{sekFD9A)M%?Q*p*4*rAPQ|4p zRncXdFIADQ)sEtNPW>uLh~iqW5IE*%DqhGn6WYFpHWXR<7>2>w&<%*?l3|I9cm!6$ zc&%cU(2K9&>Xdw%O}-VY#rBcNGOd=Pt$=0HTr8`FD)MDlxqK`zM3dE8O^GQ}A2**7 zQ+FE62Mnb{)h;T@bxQrh%X~n}=wTskKI+O}=X$lxB`kAfGAK#OxWy&+@m+`V(B*EK zOmPCBRvWW|3|suPguR~o}o zuWzcP08LI8%Cr1#4ShwC@}!3{o{7Z^8Z;i6^F)WI>NL5}BC4xFojytr?WS^w4AfUX zr?d5-B#E}hl@r_t#XI7*77enzn9Gb5gfQ` z!o&dvPnGVTJ2%Y-!*0=lV}rAS?tQ=G_?mOm%e*LL1Ilual1J}e%@Fd!TYY@jzia0! zrQu+C&5L8Z7oVs#_f)(~B?VC(bcT3IJT5eySs>>ZfsWL>@S9F-ZQ-Ebh2aJHhs>~( z`$LZmi%3M0UsE~sdhc9jDy|@)XR>d1?7k-h;etXm@n$>;c&p7X#*ko>5JyLNkX|oW zJ0g>f2uq~ z`MRO*Aw$oS&;9)Kg!B5AOo%38uvFUB*OYiP+y8>fw`_3=4l*RsA9dVal${DxXa(&;VmXDH1?3>WlA6986@*Z}jKm;|`P~bQbQI6+ zR)rpVv$3BQu3twWINDY`(XvM(x?D^1rd-p`fpjL8!KBXclbPsYOShZ*32wwUmCmOZ zhQXGdgws6P&e+x153q*C)K zx!eac?Q*D!V2^en-4|ZHn|nH-e+AU#zWjyr5p2f9xwzVmP(u`g*(-N)QNd9tl3RUnk6np{W zDWpZ}lQ!xsvCT1q`Sw~@`6B;0qmO2)9Li5%*!+^wK+dl{bE;ht3MX!9=Lv~rA=FJhSi8i- z|BP=WK{UB~XnUSx(ybmc!BvT4{j;U=DV~TnM8MZq?n5&2b+Pag@x*9D%Ea>Wp%3X% z`);hY?=NSji!4O{oB*&IfSQLJD!@u^270s;%!KXAqQ_LUuq$>#>NQ9s*6Jt|I^(%$ z8ke{=8kqa**UV2|pNoqYSM6GwL9a%dgZ&=&I{M)|`$nOTJL#hlwQVJAyf^2e_k>#{ zsCR^4ezn?3AD)xHy*$)^M!u3(KQe^7gp{fn!MfHT;Jmx0W5FlE}q% z;6Ykm!Rvd=&@*3aQY=e+JIVzU?S{T**xw&Nx5bowN$!0?VvX>ME1H;cBN?n?F^Xe~ z9-gbhu)S0t_GTf+^snCfYf*}0TYg3CC;-6JLookcQCf1c>ROPfFD*Xdya2%Xk}D1+ zDK-JfPZB+aD7I-MR0f4ivjo>{g(uI7LUi$*?x%w8*}7u(tw?E|Lj_H#lW1tQie30s zC$lQyywS%LUK@^D^{33j4$D~2C34VL9CQ5~M~7>Km;mhk-wwVh_uw12ec~XqVXLCn zn)=r35rM`XSKG5`;^F9<6e=RGDaycfb!o|fXPCF7zZ74dwI=56ta~YYm#Ofqv~|bb zuSX;iy1<~bSQuH?W1O8h)y(x%#-vP<#=oNzBOpIeBuOB1VC?R;UhsG(Xdkw6Jpyv& z6FtvGVLL38ZwTNf>y+~6v8%<}D!Lk@Gj2YV_;FWtRjp6GC%Ci?PunXEwGLIYTISBj5@!#CNKPi8@d;ekp z00&{he^UPC_5BI>Q=0z=fP63w|8E)pO{o7#{8J?Tn^+PSiT5|ee@mu6k$;ZLe~=Tz te}nwz;QYb+e_j5+JQUkUf5ZD%&ub`SJjg%*0Q2D;c^H+^e>kgv{{iWP*v0?= literal 0 HcmV?d00001 diff --git a/dist/lexifuzz_ner-0.0.4.tar.gz b/dist/lexifuzz_ner-0.0.4.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..58f3699d036dc71e0137ec068289d4f2b246409e GIT binary patch literal 5184 zcmV-G6u;{qiwFn+00002|7>M=X=ZhLdS7m3axE|}FfKGMbYXG;?L2E!+sLx{>?;1l z9H;7FC$_Ns0PI?}jsqsR7BFSV=B6mhJC?@wEV87d2jHam@3(tq^fGpU-GrOFkt!AB zQBO~IPj{<(rZw7@w)LMgdUwRA$-GzJ<*LbR-QQZR(MayGU9Z)3{S~=;^#WJu2h@Y% zU;Ug{eTU2gK4*LScDvbZ>pP9MrZ?-&-A3V6e*N<=i{0@_`1$k5W?r?X)wJf;aeu#e zdfxlC_P^6=t+Riyb^VpzZ0_vrY}a-41$w<++kQoAx&43l>!g1{j=8~XpKTO23j2<` z@c48Vkdjd*^;*3_j;PHiRRTCr}^~m|)@Zw{4*dzUOayA_N-aqIakYe{7eitj` zWB=l4@Zo|0#ISpM@reu$N%!;<`K^C?P$9klIUDxQ&&go8QRttX9rt_C*gxGr{&3Jg zeNWy&$I}65b|3T`uwD!Zj;L_;d*_Jlq&M6@f}h=Y{p0?{r^-g*uzzuiSPut7(j{lz z;YEM{!*O>=&OQv!2IoB>-~jNR_D>IoFih{HcY2}0u+T<&zrzo5e$+ia#<4aE-4DRP z5J@EagR@V={`;c~ax^$T=t1MV9?;f(cifX>0bl#a-Tq019CS~*?|Y)x0MGz?2qy_7 zACG$2h9h?2-~L5^aEhGl4^A(J@UsGp4lm-qAN%LM3h55}=O~lI;ot;#M43XDfxrSC zPkRy#%9~`S1wiokhx1;HksS28$AIh{I}0u&Xf1!0{=dAmA0qy5K8*kCdHnw(@&9fr z{%_UV%|>0@+1=gQ)$4fz@N>rh!(R8`q^Hfz7mEK|+uIobw=flGwsv%k|C<1w$Nw*I zy(Vwo9D@!Vf)0?=-mpVX=^SFa9t3g0f()7AOl=W6l6TaH2XJ#B<7etEM7)Sy7vzKn z#teeZH*YY$``2ZEPN(duGz$XP?`&;NA;1pDn&Hg1W|&aWvAGrLUKMn2Yiv2=tvU4r zxQDQx1%C1znhht=TzgtZ4f?OdXCO4PhR(pB`WQhJPVVK`F!-eq#%A$dCl)OD)qiXil`TDm64U<8R({veFC# zZt>5|my_grYz8SBhfFA$Bb>3U5G!zp(4dxbes4it2qd~wk>SR&_CsR^DQlGDA`^{6}J2B;46JjX*FgyhnNgcuMD83`?wg2CQ^R3)xAh#6XybR^$C z@tiSuZkzf3vH`@DU`~%{7t+4&f)K{{7)Z;H))AzBHc&SEfXF0^Jf;#LTWCdlOKY~0oMkfa6_3RvQAys;!-PszQjxv%MC~-q%@?P&v6BW zOt@{1q&nbaAbd(I_Q)66hSw&5@*8f#{ES@BG2u0{Njl_mGoUjHsC`J9sDY*-a z95O$&60Mn=@-{85sP85)nKUm!CBiiUZnM>*^_sCgu8z$ntu`BsRd;t=jq13)ty81b z*fHqtW@SZp$q=FdS*(Nr3ufRUC5({%Dkz9#JO&_ot+whh$d28?<1d>Smj(#9Tfxzh z`g-mDzVcK~YrAcI$E2+)Gh2GK*&L6n?J;XtTlERFPwLyv8hwOQIpdEqyFzWh#(BH` zG{)=ogz??`Y?su}jMk&R36h?n!JEqnM$py4c*|Uk>Jo5WeR+x|-IGf>=GB*{Ovn8L z(qaRRjpHA={*&+j{s{ZOt@_S(yH?kBckA1&?R@#?Ctd$>7vOzxBT0kU*82YIKYAVR zas8*h)7oh@cfkIex}LB9yvTJq4!LDk{e>T}ImCl1|G+rzvgkT2fV3Ll+rRGVpd2*} z8D)<#WR380#&A)BX$YpxDWtyegSxzkk=!LNDrgb{`eL$(UD6RZz}-Mvh4DK)5jqG> zGz?~tDGEAPZb6e@qFkfi_|Ft_DGL*jUy(wS`G&{cK%rC%4N;zpD;y>ZK*2NVBJLPt z{5y`8xoA32PnK+aKIaxBtHGj(;-aSHC|)%!CPJ*ba%6b?>s~|Kfk%dgs|XX$Bx~}; zUOe+KMV^bC_c&Ty>2#tc61Zy=Rg_XxyY<*OIPa6L3)XrA6vOY-W_}DlaG6I1r{`jh zWk1MkOy`b@i}w7^0h7Tx@m}tnnLM0}z1)Cu%c)YnB}T7CtX88X zlPb5Xv?}+&W#ginsTV92{>eY{;`DS^w|*JOXjjBBwNLvabi#qEEwI0NJO5(w*n zVCuOqHfn8y$yu%tKk&*GQoO)lJ~60`b?MMpDr30;NX@nu#dSE+yODU0m{KK8k^%x! z3PGZP7uQ#~s&_|GrdEYu*65rC;2+nH7ONh9ZXm|$+X0(0@9}}u{8mNx>{x2!8H9Ju zfDNft*e@YY;{ludrAXPO(B%d>B3JfH95*Uc5*GCl|ErK&CZy^!>VevTf;r)I7FRZS z86hAQ_+%X0Lo%XNyS->MO;r;O?070A=@qH&9&R541)5;F9Pue>Kau*3wL3xRTJoEY zlyVD9B&xv3+L4wZ0)vR>g#5B6J74Noo$L;yGNY2j{g#x3pv&dR1as6x)*JXxuPU))m;19gD; z1`{P&1e_`;j3lKWNN=MmcSNyHrdB5H*90hn8!!$lJVb}|2nyESpSyv%j1d*4C zR$Nuk1?*XLJ~k=2>yT3Q?o#cw!!x*-JL^}MYGWN5tLawUVZSPuBU<}dG)65^)3j8- zbj_Gc(0;GcUEIz%nj?@6+6wb`IOk~Hhw`q zqLQ-EN*2E*nuz!SNm^Q$qPPWpld8o6IWNgHws;{J=p!K?N-Tnt`yCIUgai{`6}{Tj zn^%=n__3%W>k$mP9=qYPx-XKFf{V}r8fsZ3N#n0bLo83hoJoF>48+aIbf%?}>Ba5k zi797%Us{kFSP}G&C}JmfPWIo3S88A3qQ=0J^_F{0TquJla4xI$$G(PY?K&| z{+PQ_jLt~InCdI2SU%tc!4Q70Qf1ip`52J8jQmcaTga{=H`RMiEJWgj+alh17QBAFC&T+V|DZj~vmpddD@|$mTx+ruHqwOdWK*q;d(LZ-?#Oojx z-6q7~*Yfhc6lHio$&>?nDW_%qf9&NTD|erqXuo8d{gO$xC(4jVs$N7?KzzAh^O_`<)+>MzH4z2{$gHyHwsbcz3=%a&Rn!Ip&^mTgF}vWax_a@sEw>Qect z9KBh^5|qe*N_r(V^jkd&{N(SE+EV69}!>(g48r2TuzeK{3pgZZo&S_X%i z_e>lY$d{(fN#f`i2m%Qi0e-RL%h)d^u>{Y$AYqB0Re^#eAU|cypzIZ*moN3JGNR9{Zhg(0L^;@_h5tov7CXW_i^X#A9xDp%i!v?% zz^t0BfQ)MV5$TduTvqj~Rb9lxRq(69h0Fka1v3&Is*~U;R?x~CP>>J+#uM=nYW)`k z-_tpOC;ozIre{46K>*_yt^!oDmAN6`YKmY4Rt`ineTp+9dE=#aGq^{`&{DQud z`7E*!KNm_;I1lG@2&&=|ohp>GF6zbq4$imEF&UU*h2@J$9enqy6k2dhPjL^kk9_Am!+Y(g0H0+Y<%k z<>jYQ2?7AflJj!mASv~)?mLk5*+KOJGSiOGw=G;ZHJw|#Q1}i9T@=3GF&Bl*As6`> z7eDkF7ll8c#3+`Iv&f)+>@bTMc>Pfp%O_bR2U&de9E&B)>J*Fk5R2pti)S5SQCM++ z#Wy^^;+e-+$dBU-fBo!=XCGa$>g0-8(#f$E$*C3c(2CV(R%F+6{@#fdj~-a@r1L7i zeq6<0Ij!PpJU;ELil-b^@%)o2zIsr_^UtZsE;6M~srb8xRQ#}KR1^|Ug%#wBD*2UP u`ITS!m0$UlU-^|^`ITS!m0$UlU-^|^`ITS!m0$nI*Z%?;$R}(7cmM$UBoA}| literal 0 HcmV?d00001 diff --git a/pyproject.toml b/pyproject.toml index f315d2d..7210d95 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "poetry.core.masonry.api" [tool.poetry] name = "lexifuzz-ner" -version = "0.0.3" +version = "0.0.4" authors = ["Hanif Yuli Abdillah P "] description = "Python package for detecting entities in text based on a dictionary and fuzzy similarity" readme = "README.md" diff --git a/src/lexifuzz_ner/ner.py b/src/lexifuzz_ner/ner.py index 60296c2..0bb5405 100644 --- a/src/lexifuzz_ner/ner.py +++ b/src/lexifuzz_ner/ner.py @@ -29,7 +29,6 @@ def getFuzzySimilarity(token=None, dictionary=None, min_ratio=None): return (match + (key, )) def handle_slicing(data=None): - """ This function takes a dictionary data as input and processes its 'entities' by sorting them based on their score in descending order. It then identifies entities with the highest scores, ensuring there is no overlap in their index ranges. @@ -39,7 +38,7 @@ def handle_slicing(data=None): assert isinstance(data, dict), "Dictionary format should be provided in the dictionary parameter." # Sort entities by their score in descending order - sorted_entities = sorted(data['entities'], key=lambda x: -x['score']) + sorted_entities = sorted(data['entities'], key=lambda x: (-x['score'], x['index']['start'], -x['index']['end'])) # Initialize a dictionary to keep track of which indices have been covered indices_covered = set() @@ -53,14 +52,11 @@ def handle_slicing(data=None): # Check if the entity's indices overlap with previously covered indices if all(start > end_covered or end < start_covered for start_covered, end_covered in indices_covered): new_entities.append(entity) - # Update the covered indices indices_covered.add((start, end)) # Update the entities in the data dictionary data['entities'] = new_entities - - # Print the modified data return data def annotate_text(entities = None):