From 53836320695eb6393a439581f0420c88a13fa225 Mon Sep 17 00:00:00 2001 From: Eric Jolibois Date: Mon, 7 Oct 2024 15:04:17 +0200 Subject: [PATCH] feat: mimic excel behavior to convert float to string (#292) --- python/tests/fixtures/decimal-numbers.xlsx | Bin 0 -> 9013 bytes python/tests/test_fastexcel.py | 16 ++++++++++ src/data.rs | 4 +++ src/types/dtype.rs | 33 +++++++++++++++++++++ 4 files changed, 53 insertions(+) create mode 100644 python/tests/fixtures/decimal-numbers.xlsx diff --git a/python/tests/fixtures/decimal-numbers.xlsx b/python/tests/fixtures/decimal-numbers.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..dc9fabd2cb9130236dac4a66bce92e776d03dfe0 GIT binary patch literal 9013 zcmeHN1y>x|)@|G!LXhAP+zFE45*&i33GUEn;}&S#f@=dIL4v!x1*dTd4nYHf5F7%0 zoq6-#%w*>K1@GNjU3c}W+GpLWd-mD)o>S^dNXWzhQ~){v0H6h!9%fk^A^-q!$N&Hl z03A_Z+R?$)!ok%@%gf0EY{2Ph52AUGjL4h=K!o4_zxH4J21=8LReQMcWiRA^NN;i| zE>%mQ3w(w26SBS#>+DVJD>l>5wz0X-j<~^>dPia_SVcItDusD-c5Ixv z>O(qv_O<-}Y#B9SK~{+Whk?OHp-#20W|Y1T1runS$g-FB#ODNi%_p5w_=BCa41Qr> z*9P{U`W%dJ=?5Jir|iAvy+Z{6?(dNR>VKhSoi;c9DcskT;jF`g)6&Sr0tDvb{CWN# z9si3t_@_rNO;Azm;l>UG|t6{~tq(%g@m(V`Xwlk#jCx?BaJXX@x^_fxk@wHF;0h4B51jPE7KTck8}mBKajNX?KKWY-!MZiu-F z9}&dMaLIneh1~gsRc|r4z3`k808MU2yw$>~(_= zHG05jXG)&+-d@3OqI%xfGV5yZ;HX1&PD*SA>)09~vG~z4f0|XQkd1YA{7|M;4=W`< z4oAB(39nFJL2kI*R9Qfvn&fGcndG z&e*S{x6C@KF(*m^8`*QOMDcP8CWt4lq}dqSDrgUIj-z9OEAYf2CYAk(ovuz=Jy6neCI@g_E1uWZPE>9H-d;aM9BN1c2 zie;zzLqoTBseRii4f%S~=v3BSo-16BC{p&jJcpIE0}T;Zmyd3HE~QbTBdB*(#ycz@ zK)KHOReHv|bVQB`*}l0V56NIQ0&3MoDThDjW%Pnub|3Tm!5f+-WqSmqc?_ zWz(YvOIY98)t(tp-=c=!#rimnd-1me`G+GF>*8uXgGnbdIfxblbLtTgRvXNT921Pu zn49@{YhDqW4&)yJ4WEEp1I%p>M-2zRd`qQn4PmnO%y=;)9rk6LayFjIs%80h+oh$d zWU6H)YC9cvbGWxEY$$yy@O0A49h_%&edXfpm3JiD-q@1$X#ZK8a`oQv-PD!uwSD~S zN3jwe%FN`L<^7Tc)=7h(elqay91RppN)Htjz{`vr1e@j-g&h zqy6p|0=OGUL}y)Wg^`5F-bA3Irzno2C!WZMi6soM zXA=u;Uotb2h2<8|$(%M#hVt9OxZQYd!Q+pH;BkGpb~E5QSo5F^Qrpl#lfh!Sb>Aet zc#irO;zd1iLWZI6495#Fu{%x6cG)F+j{d#%hoy=a%&sf<8heYfM2V_cK-4$Xl_qT9 zIrVFn1n}1T^cL--byumIA(oEC@9e{;N0ys|{WGiZnf|-WWVWdJ#KGO=9=;`c2*2a^ zunxAiuy6%){nqjPjPIF=gAR+__@PHA*SN?>ymd1%$isnOP)5sk6EazB%?X%flk)_L z{Vt--=P}lz2qX~Yde$$-;yNs2pO+0#l_nOjS@UDC1gfS1-Jy`Xt2SIJX6I}V0o{Ti zNOtE$|J9CKO;DBo9HJ*A7)DZi);>lCevdM`+;&KzH8h)sxl1=A8Q!izfyltM84qQxq!o@@&-0bza0VZS~Bt z4^KKhy8m$UlH)iutSCIBAjcvp6~p4>)9za<473YQrfl5~iNJ3G)|TWninxxDu$CdJu{8iPV!a8;jkL~c}QO80mS z-SdYX(Fm6H9}qvMZcUu~z$g@0@X?ZqXQ=h(WZwTa*VQaAc@++41$Kqn8kQk0UFK?S zUM&q~PWcfw)aufJ15D|}Uh|S9#nkDI&FNM`eMTfvdd8GH zTT?|_f^e(SyttINMu?wx7eDQ$?vPrLTICg_%>Rs7fA`{Z<66l^Gys5r?&rq(H!pUz zwy?M0`tAIi10MiKV<{R)I?x90B}NQpM|OQogvUP!o*#OzCmU?aDDTG)sY zFn6Yv0ugN{lmb`0nIloH15lebV|k3}Rqn%|xyWU#M@F{YNhFwhaPdtaiQa9u_4fEQ!VI5vl(G5nA*;F5Rt1*36T$I*WH-AH>CO@Z7wCCq?^f~r$1BoO) zjV4ZQ6XDQ1USm(Pj@61gdh1bk0|IJB6Rka7n!Mf8dZ{}k@%QkwYH`T4-bAk8%J+SL1G$5u{6NjPCpsOXwslE)nmPU zw|})gRb$AQ$p5~hLbW^B@ zIv%X~_|G3rv$rBxTyEpGi+!o#c37^>XC0z#9Esj#Qg|G`fb?tsSTChsK{S6Ui zqB4e4^dsc4tx@U6OEOY3fHh4|B$IrZW_2fD_=aI|ql`4GX6PK1qH{MR_KgX?#O}Unn-wR7)v{E?I^`yT7jghd{& zcFGd1YKlb@_1#_n{q32@ZpZ!IBA(IRU`(pTkCNAScQXyEw@Kc5wjnHrcUL94Mz7Dl zbG^R2qc#$$y+F=RStlYViclS3^d2$ulrY1Dil;|H6_n|eIWQ;#9m2lEN3yRClJk+4 ztAU8bNa8GBb>5U%J(#PJFC}&x`N18@vBtAgIu1& z+3z3@ON%9W)^Dc?+;8N-bool#@c}y=!nd@cz1yxgJo^1lG?={)C%uA&x#+`w_~Ne% zxw#Z5X!ZHCj?v2Sh-?j^h;N@`PF$>^WgfU)XezRMbchi=3q;l}&_WKsvtXobZ&hn- zqnZN+Mz9T^%7859vU6a&yA>%q*3<;u-x#xrAlbfOjK_D1EKVS&LFm;(w9Y8ANTB}W zcD3+eUWGY<7MAbfifra%0|`g#;MVpO&sm{}s`(FB$gK_`hgzMcuIiq>mOB<}7$M*B zHa^-UJtuTcg80;`?b`&GwJEJCd9;>?Y;_2}G^^x%yX zrM;)uZAKv&z6}Xq(T$lOVri%(wO>bNjxP0nTrjvF`Id+sx#NeI?OQyZvqq8*rM4Qb z3QEHgm-BW1kh)R)iUdu>#X*OSW)6D?C5of__S#PkDUMhfi;(U59OpKOUmT>M&?Kjo z1t?l@Omy`1hMn2W%64=hp1+J5qe9?>2RkD>$wt}XCcp+c_1eoA5vzfCet@QH#<4IG zMwcn|nwi^RB!5Jki&3#DxIUij$*q=)N`sA9*@l_e*g{r8$EFoik{-!ted0-z7&KrI#xWGIaJs15(;BA|}nBL}UcX*zsMG6E=g-{q= z)foEpEC`BYq13-*Z?E1w{FV&6*{*v}Iato(4redqnV7LOF4WQ04x)rk3O^kgb>Z4fXG_*ya0=0Z zxVNod#~AwIKS3qwSbnJerLNh?M$o%Xv0|bSC0A|@?`C~RLQU3re4yR;VUG>I{?4j+ z+Hsa6&BkcpTozeu$&&A9b%7PZRSD$vJ4c$%cnLO2l(R}wT-G3y@zUuYlbT6~8SiAt zdCE}H`C91bl^v>P?zJ3o$~XNnm6Mc!^1}=AWW$aph5U=T{zABNXYcUVd~cc{4G*)x zVExYgb+@DE|0wjNO1 z7zxw`RN`6fXY$-94RO+Xi^b!dH70HD{pKVH-@DtVwgvdvzx6p3ZJ(R4pch)uFyLb} zJ23Km_#(XoGN>K>`dJ=~ICla6C4M}iP>X23 z8oMfP!mA(djLA%7XvHz&Ov1K5B5>7nFI0j1SmKZ^;7l<;X~2GQwYjjLg2y)ORRVQ9 z;OZHJxNF-Q!@1ns2!3~gWnMOF)%O|-nz^s+^CFJX!xxB0sPnxOiVP4#ZVVK(#B({; zjWH^rGEEzb^hlGE+rD=3GUE>Y+7?9H<|2wn zRx3WRaty0<%Zh8^z>&`WE1p)_CO1tax4tV{XrKz^!vys5tarcyZpqtA6J5y#1}%9~ zua={%Wi}Oee|%ir#d=ZrkxaR~;>$w^dfwux`kQ%*<(Kv(lvcW#WqCPSED>wlZkn|^8KONp z`qaXun%30g3^>DR51=6hKYVgh&cX__)oj(?X(N3X$G?)<2+|wSaf?0z0fkP(#MYS? zxgFAcWeq#oGd_pd?9a`kE*j01E9A5!-l zeQi5`T+d`});>Efz83o=%cH?m^yCKCD|{Hf+1;J++rqQs!y-(S^v3e2&yhc_8Vu)N#|= z-WbfuRc?t)dPJ;hVk7#_zBzfB^q)2W{SEKB<>7k+XZW^*0G|0UcQjLXadZN6nK`;x z{9af4U%~@E&(R4+N6lxb61nnDu zpQwD5VND&GV1@dnVb)7jGubkiBpDt|iK+}N?d@H+8cb5=$~Ua4B|ynHxW*IJjqO8e zOr=l0foyU5Fe(EdvnZv(CSbxU-1Q)|goS~pNu&*R@zYniEL8;ui75e7LfK{*Z(P1G zI|RO?3f~Ri1WA11(sTpBxV2~`g`*OgyE+Wf1WC(0lfW{dd4ZT~jg(#M8@f~kp>pVR)0TlqV8i*- zRm?SyFswUp?2d=q_45EUF`B*o%%q0F+aAl1NPg81CcYE}f=~vgK}hdO?33=dS5KT? z)5c2Psbs0NKznF@F z6TJP&N#?+VfCo)q!5=V+kYF|o!{hw3Yku60kGxZKExg!{J9q6$xjql7E59kpzMreb zwOq?hUFNYgE^F`IEk%G;Osmc2NxWO%z=%GhEaYgX!WLoN;Y}89uwF^n%H>SX{9tR> zxh@hrjw2++D31Asz(j8q6+~C&ZFpualm|JeXzgTs@dLRVp_EXR0V|ttjRu;);~p=x zM@`$N5jBu!{y_O7Po~{YRoleyi?|^0x-oa)W}ol|tXled1w?Z!9-a!`h&S2Da;isK zqu-@beAtDaRh&|KtW!N)o%1r(T6{n_dxfUDFGezLwqouQG}w&N z(SL1l;$bEfl)5*2_TBx3Vd|BUl>no7Iq?2Jbx;IE4tV_k=MNPAJ!}8&|KW25b)~-= z_-iTk-@u=JDx4aBDy;qr{I$IF2ecJF)?bQEzk>go_x=M402pKb2L6At;J@1WH5vMc zr9|xi4B~I8(O<3nnvD9x%1e0K9{wo5rlx*1@arP?4+Bzg@q-)qW6Ap~^jBH`0~$sC zFX*o#|Eq<+D(N41002k<0Q^l?e}(@w68;(f>CvCyzsE&&B^3DV0sxrsBLFVlZ)tx1 F`#+40!E^us literal 0 HcmV?d00001 diff --git a/python/tests/test_fastexcel.py b/python/tests/test_fastexcel.py index 555ae1c..2ab280a 100644 --- a/python/tests/test_fastexcel.py +++ b/python/tests/test_fastexcel.py @@ -536,3 +536,19 @@ def test_null_values_in_cells() -> None: def test_null_column_is_nullable() -> None: sheet = fastexcel.read_excel(path_for_fixture("null-column.xlsx")).load_sheet(0) assert sheet.to_arrow().schema.field("nullonly").nullable is True + + +def test_sheet_with_decimal_numbers() -> None: + sheet = fastexcel.read_excel(path_for_fixture("decimal-numbers.xlsx")).load_sheet(0) + pl_assert_frame_equal( + sheet.to_polars(), + pl.DataFrame({"Decimals": [28.14, 29.02]}), + ) + + sheet2 = fastexcel.read_excel(path_for_fixture("decimal-numbers.xlsx")).load_sheet( + 0, dtypes={0: "string"} + ) + pl_assert_frame_equal( + sheet2.to_polars(), + pl.DataFrame({"Decimals": ["28.14", "29.02"]}), + ) diff --git a/src/data.rs b/src/data.rs index 553d10d..8da9ada 100644 --- a/src/data.rs +++ b/src/data.rs @@ -81,6 +81,8 @@ mod array_impls { use calamine::{CellType, DataType, Range}; use chrono::NaiveDate; + use crate::types::dtype::excel_float_to_string; + pub(crate) fn create_boolean_array( data: &Range
, col: usize, @@ -142,6 +144,8 @@ mod array_impls { cell.get_datetime_iso().map(str::to_string) } else if cell.is_bool() { cell.get_bool().map(|v| v.to_string()) + } else if cell.is_float() { + cell.get_float().map(excel_float_to_string) } else { cell.as_string() } diff --git a/src/types/dtype.rs b/src/types/dtype.rs index 9af844a..32b3dd9 100644 --- a/src/types/dtype.rs +++ b/src/types/dtype.rs @@ -280,6 +280,31 @@ pub(crate) fn get_dtype_for_column( } } +/// Convert a float to a nice string to mimic Excel behaviour. +/// +/// Excel can store a float like 29.02 set by the user as "29.020000000000003" in the XML. +/// But in fact, the user will see "29.02" in the cell. +/// Excel indeed displays decimal numbers with 8 digits in a standard cell width +/// and 10 digits in a wide cell. Like this: +/// +/// Format = 0.000000000 | Unformatted, wide cell | Unformatted, standard width +/// ---------------------|--------------------------|---------------------------- +/// 1.123456789 | 1.123456789 | 1.123457 +/// 12.123456789 | 12.12345679 | 12.12346 +/// ... | ... | ... +/// 123456.123456789 | 123456.1235 | 123456.1 +/// +/// Excel also trims trailing zeros and the decimal point if there is no fractional part. +/// +/// We do not distinguish between wide cells and standard cells here, so we retain at most +/// nine digits after the decimal point and trim any trailing zeros. +pub(crate) fn excel_float_to_string(x: f64) -> String { + format!("{x:.9}") + .trim_end_matches('0') + .trim_end_matches('.') + .to_string() +} + #[cfg(test)] mod tests { use calamine::{Cell, Data as CalData}; @@ -394,4 +419,12 @@ mod tests { FastExcelErrorKind::UnsupportedColumnTypeCombination(_) )); } + + #[rstest] + #[case(29.020000000000003, "29.02")] + #[case(10000_f64, "10000")] + #[case(23.0, "23")] + fn test_excel_float_to_string(#[case] x: f64, #[case] expected: &str) { + assert_eq!(excel_float_to_string(x), expected.to_string()); + } }