From 112641787eae20252d35b8066923255a96c080f5 Mon Sep 17 00:00:00 2001 From: Sally Date: Thu, 9 Jan 2025 14:03:09 +0000 Subject: [PATCH] adding tests for corrupted files --- tests/core/data/garbage.csv | 2 + tests/core/data/garbage.xlsx | Bin 0 -> 10895 bytes tests/core/test_readers.py | 71 +++++++++++++++++++++++++++++- virtual_ecosystem/core/readers.py | 19 +++++--- 4 files changed, 86 insertions(+), 6 deletions(-) create mode 100644 tests/core/data/garbage.csv create mode 100644 tests/core/data/garbage.xlsx diff --git a/tests/core/data/garbage.csv b/tests/core/data/garbage.csv new file mode 100644 index 000000000..c55856dbd --- /dev/null +++ b/tests/core/data/garbage.csv @@ -0,0 +1,2 @@ +a,b,c +1,2,"3 \ No newline at end of file diff --git a/tests/core/data/garbage.xlsx b/tests/core/data/garbage.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..bd39652b1bcc79fce379dfadb6b99037085f9b9d GIT binary patch literal 10895 zcmeHN2UncenI4*efdfMmT{I1%4-AMRMDH^+(S04!;3GE9)sd^ zie0#&7BXtG1Uy_Ut7efC5Hr)FgI|2RD_9#q=^ctMF=+-PwCGr{`N5;$k@3|9vBSgA zPJ)=zr$9hgQ9XE#1uSkNYA-jYp)U;T#ozEPgP<~P0!n)ySXk+YFF_b&A_8L4nYlV7 z!`v2MD5l0&AxbG&1T9o(~d)XxbjpO7^uC#c4)S1}}bp zDjVwB@wF8QK1Bw%R+bYw(==<#DB zg^#tzq)z8Rzw~r{M_EZ0v<;cL^6OG49iY*KH9j8j2l+B5fJhL1E2vfeOWD7!*+eO) z^8`GEhD~O~9#3n6;-l-luXDlFo1X@bT5LsE3PS4|o_DwwOnKygZCT&q9`(S{kt9GH zGc{i|6Sl0~xchxWt#zq}=9C`hcMB*~Td_a>?(YsH7+PN> zV=w1(=MSYW50AN^0o)D1jjZMJb^y9ki^3sT$xR@(8DNu}qmQmWwG3Vo`2Gb5;L2IB zB+=zd3G{EMd|A=&nPi*Cmzhum{fx`BG&?_KpPilJ*A`6lPBZGIbka`5GQNBSV?Z4r zS~`da)Prmw0o@Ru)N<#wL~d9^>9pR(ckf&6o!Wj{%;UUqPPJF5n`_11#qhRGGt#u3 z1_5V4`J(3O4?xKUIr*?gw`;Ph(6M<)X2CysLsM%H59)6460)(STMS(e2=1xBr4j8G!QwwVj;t%1+6H!Q}=MWQkaZh~}ZgHYUXao=>OK$O06cT!3WV z0DRIg=*~kG;C>l*e#`6uj+-ZPAfiF~wA_dlv1)v%xI2Em*KoKC7GqvbKP5o70Q8`S z#B8W5eKKyQBk6)#bI%bTGhXcA+-T^n=fPWecEELX7JL(wm}~++0_q=B38gw++&YXM zolLI=5R-BtqlIlk;EaJxVFdt}e>wra+RhCuu^t2l?w7lqW#R>cc$^_nc~s0tOm^-% zlr`us91Lh*;gp6Y)g@rybN}bbM`dcMMC7Lz?pqFH_*B|~8{dU;nC>?y*DONMwt)dE z@G=+?;mN90=*|$ZID0oZ%ZBIBm4gB8AQfn;ZDtlWyW!<08A*YEEhM)J*!>u9*Xg=K zh#;|*r|CoyK4oroS<|5>FW+|%xU zkFG;wKiUvW4aE2|E1v|=@0)kd)rGR0rY^lIR`8$}u%(Cj`;H_JWxu3|@ zYH(9C7%(QHNWV^Z=$T5*#n&JC_aFTtkjVwaXElO<@)+1YvG7iy3)vkQ84VX-p(&0J z0hkj@9ioE=)qnYFnl?&HgleMWn?OVV`MtMgiVpxBplg38x@}k_lbd_u2a?hw^?ppE z%0!Sk6& zsuuE>pf5&+fxAsCzFtshz?IiCA=6U|RpQ2HVQ6DqttOvYh@I<$$#XCS%tv*_Z0JC^ z?@ELVvfPjB1wH|)@R@{7ei7l?43r=~WPuM+Va|aXSqcVZ77bpQ7+&uW8YXqjvW0@fa|N5nFV7>EsQIjmpaX%H#uIBdQFvgBkN zPd_CC@L*5^$jVb4tYB|;^Ibqw(k30b@W}*fQlIPGLtOhH+ z#nyrv*!0usVz5$uDO;FuINS>+k0?;z&am({OVJa1NS-PY^p{5;CfD3h+x!QP*xHIu z2K1?WW`XwwGo=~?SVZ)Bdu|^LRGSgA@ZgXUDEWujk}^`gCT18bpiIPQgo?7qm+4~s zM$YcT_U{eS1wODOgD&vXjQq~PyRe{7b?il}ZCsm8H#)-`RF;ZSMIwzMWDlxyf(^+W z`x>9j#tsaA{F`iE=uLAnzWZ(vlp~3$S^PFz71|ljHUrFv5YF_?Y>y3X-wVZ8Y`rST znJNLeVIYlnfK_yjn>Tjats%W#vqLd~^dJ=@ZHmF6>q9~5B?Ftsi*JAL&;b>p$qL(j z58;W5$be-=8Hf>aI#%RS5h_RHG1BK0mC;I4CCrxrMiJn@f0%w}&Hxpl##XQ|o(5B) zffm&r#?N2AzfTwu&UfNG5;5uC-63aZ`!)37txF!#V3$Ostn!03O!nJDlxAi(?R zAkUi5o~2Xg3uLcu$p?$!o5Daz$zZ@|kC!lw&+Jsk1cr6&v^O3DYXSvoF3=QMGn-pG zAlh=6cR+R@SW6qj*lPwW?C+vqTCYC@&h!of2`qZL�nf2eWNZkgn+m^!;L%-Vkd z(T?c*mhpa5AH5I@F#TU%pR~8A7Q{~m$TA=9-|wVNH5v>8L0$Xo16i^_w7iPnpFJ?S(;;J{M|Bg)?{B3uDY|7THwZAV zf%>{EW#4Bxg|RUh?1W+GNOKs^a=W1cc`Z}?6x&CzvyzYWZR-VJ)eVA&gMgAR|5-IF z8G|p27Htw4lnBYQoB3fNg_jo%NiyEemv#?Ad9o{ygDKZ-%Iv3aX{%NS7~@QJI;j1_ zYLQ9Fq~Sxd+E@EfL|g=P?!w5D7(G)xs)wus30hiHTXO^H{~6F%=eFn&Ag+)R>TpF= zvbr3{#R@*Ycl$LuDDt$JSX;RuR>k$d0n;(%j~jzJRLxXiH~?Y{xQK42Z8ua+SZrDT zVX#kt%%BX={=g%AognZOON@PVaFNeo#ErvT7Td7i3c;$o@lurfL<&ZLX)3ygZ>4Mh zL!}EE7kK?y z|8(vlh+KY9y&M{Ix_~8K+?By708E@Rllrc-X27WQ^gMV=57-7Dol;+*qUooi#lQ(v zt7indEjdi2$}l>{w;!xWMc-j$RDli=`ygDRW{aqIIllfSB@BQ5rziR*lIVWG2x8^{ zYoHNPm4B2oQcn57?BlxhGK6}GnH~Kgh|S89&Axs42VlU02ypthR7Rh@0LcwA`v4x_ z?(XD@lOVp3eG_0-0Zvd;I6Vrig7{(@1?+HEIz&UsQMS`O8M|$e!J-H6hb$4@w{Qwd z>Il;ZNTVP9lq;L0Z9Nl_qe8Ge30tVD1J;phz$T~(EM$fp z*bo8EOaQu7EpmPwA0P= zKGqMtGg>)|lWma1ELK_zAZGLmvuLjqj z!+_fpaO-TIJb(4-QoAuBNo67o%)_EfJMxa+E{xVA2a|qs3wC^9B2qs3@W<)vyIXa9tPlfxG@bp%)){?40JlV>HqtAb1xQ* zf$+<@e*9g9UVh$Oa4)+Y3)=seGHjmT$5levWS$5ZfgJW2tBgA#K72M2+Bj%z+(z}zuLOc3k~r6v5Ifny%%4`;Kr7N zWtO9+HW_Y!djgnU>?fUc91ju8(ec6-aE3%M7~hhZb8Slo`GZF{oAs=3pc%xR3gd%- zBm+MQ1X^W;NjXX#~;@ z8na9+*LsURr6Euqios?5gcZ21zOz=^!E*r(A~O8#-bcPX`}u1avSS(z$V$uSEvv{ z6O1;}v((}SO1-)x&Q0QWw^wU?w z3V!8XFiQhxz7b3c6;ECTfJ&3;Fz$PIfy`w_aUR0b6g-aMAtFSPD6 sw!-WY24@`vgeP@?2Zw(B=O;{M7NGO6O1BK DataArray: Raises: FileNotFoundError: with bad file path names. - ValueError: if the file data is not readable. + ParserError: if the csv data is not readable. + Exception: if the excel data is not readable. + + Note: the general exception is used because of the variety of exceptions that are + possible with read_excel. """ to_raise: Exception @@ -161,13 +166,17 @@ def load_from_dataframe(file: Path, var_name: str) -> DataArray: if file_type == ".csv": dataset = read_csv(file) else: - dataset = read_excel(file) + dataset = read_excel(file, engine="openpyxl") except FileNotFoundError: to_raise = FileNotFoundError(f"Data file not found: {file}") LOGGER.critical(to_raise) raise to_raise - except ValueError as err: - to_raise = ValueError(f"Could not load data from {file}: {err}") + except ParserError as err: + to_raise = ParserError(f"Could not load data from {file}: {err}.") + LOGGER.critical(to_raise) + raise to_raise + except Exception as err: + to_raise = Exception(f"Unidentified exception opening {file}: {err}") LOGGER.critical(to_raise) raise to_raise @@ -177,7 +186,7 @@ def load_from_dataframe(file: Path, var_name: str) -> DataArray: LOGGER.critical(to_raise) raise to_raise - return dataset[var_name] + return dataset[var_name].to_xarray() def load_to_dataarray(