From 6d689d8760bf32c53c30c22e2a48a1740c0b4e04 Mon Sep 17 00:00:00 2001 From: roblabla Date: Fri, 6 Jan 2023 11:26:15 +0100 Subject: [PATCH 1/2] Add COLNAMES option to give names to the columns The COLNAMES option should contain the row number giving the name to the columns. This name will be used in place of the lettered column name used by excel in the SQL Table. --- src/options.rs | 14 +++++++++++++- src/spreadsheet/manager.rs | 24 +++++++++++++++++++++++- 2 files changed, 36 insertions(+), 2 deletions(-) diff --git a/src/options.rs b/src/options.rs index bfccb19..12fcb01 100644 --- a/src/options.rs +++ b/src/options.rs @@ -1,6 +1,6 @@ use nom::branch::alt; use nom::bytes::complete::{escaped, tag, tag_no_case}; -use nom::character::complete::{alpha1, digit0, multispace0, multispace1, none_of}; +use nom::character::complete::{alpha1, digit0, digit1, multispace0, multispace1, none_of}; use nom::combinator::{map, recognize}; use nom::{IResult, Parser}; use nom::sequence::{delimited, preceded, separated_pair, terminated, tuple}; @@ -9,6 +9,7 @@ pub enum UsingOption { File(String), Worksheet(String), Range(String), + ColNames(String), } pub fn parse_option(input: &str) -> IResult<&str, UsingOption> { @@ -16,6 +17,7 @@ pub fn parse_option(input: &str) -> IResult<&str, UsingOption> { parse_filename_option, parse_worksheet_option, parse_range_option, + parse_colnames_option, ))).parse(input) } @@ -49,6 +51,16 @@ fn parse_range_option(input: &str) -> IResult<&str, UsingOption> { |t: (&str, &str)| UsingOption::Range(t.1.to_string()))(input) } +fn parse_colnames_option(input: &str) -> IResult<&str, UsingOption> { + let option = tag_no_case("COLNAMES"); + + let value = preceded( + tag("'"), terminated(digit1, tag("'"))); + + map(separated_pair(option, multispace1, value), + |t: (&str, &str)| UsingOption::ColNames(t.1.to_string()))(input) +} + fn parse_with_spaces<'a, T>(parser: impl Parser<&'a str, T, nom::error::Error<&'a str>>) -> impl Parser<&'a str, T, nom::error::Error<&'a str>> { preceded(multispace0, terminated(parser, multispace0)) diff --git a/src/spreadsheet/manager.rs b/src/spreadsheet/manager.rs index 15a6c76..ce19387 100644 --- a/src/spreadsheet/manager.rs +++ b/src/spreadsheet/manager.rs @@ -7,11 +7,13 @@ use calamine::{open_workbook_auto, DataType, Range, Reader, Sheets}; use std::fs::File; use std::io::BufReader; use std::path::Path; +use std::str::FromStr; pub struct DataManager { sheets: Sheets>, worksheet: String, range: Option, + colnames_row: Option, } pub enum DataManagerError { @@ -49,9 +51,17 @@ impl DataManager { pub fn get_columns(&mut self) -> Vec { let range = self.get_effective_range(); if range.get_size().1 > 0 { + let row_workspace_sheet = self.colnames_row + .and_then(|v| Some((v, self.sheets.worksheet_range(self.worksheet.as_str())))) + .and_then(|(row, sheet)| Some((row, sheet?.ok()?))); (range.start().unwrap().1..=range.end().unwrap().1) .into_iter() - .map(|n| CellIndex::new(n + 1, 1).get_x_as_string()) + .map(|n| { + row_workspace_sheet + .as_ref() + .and_then(|(row, sheet)| sheet.get_value((*row, n)).map(|v| v.to_string())) + .unwrap_or_else(|| CellIndex::new(n + 1, 1).get_x_as_string()) + }) .collect() } else { Vec::new() @@ -70,6 +80,7 @@ pub struct DataManagerBuilder { file: Option, worksheet: Option, range: Option, + colnames_row: Option, } impl DataManagerBuilder { @@ -91,6 +102,11 @@ impl DataManagerBuilder { UsingOption::Range(range) => { builder = builder.range(CellRange::try_parse(range.as_str()).unwrap()); } + UsingOption::ColNames(colnames) => { + // We substract 1 to go from excel indexing (which starts at 1) to 0-based + // indexing of the row. + builder = builder.colnames_row(u32::from_str(colnames.as_str()).unwrap().saturating_sub(1)); + }, } } @@ -112,6 +128,11 @@ impl DataManagerBuilder { self } + pub fn colnames_row(mut self, row: u32) -> Self { + self.colnames_row = Some(row); + self + } + pub fn open(self) -> Result { if let Some(file) = self.file { if let Some(worksheet) = self.worksheet { @@ -120,6 +141,7 @@ impl DataManagerBuilder { sheets, worksheet, range: self.range, + colnames_row: self.colnames_row, }), Err(err) => Err(DataManagerError::Calamine(err)), } From f73381f43400ef90c73b60a0f63aecbdeee6c025 Mon Sep 17 00:00:00 2001 From: roblabla Date: Sun, 8 Jan 2023 15:37:38 +0100 Subject: [PATCH 2/2] Add tests for colnames feature --- src/options.rs | 13 +++++++++++++ tests/abcdef_colnames.xlsx | Bin 0 -> 5372 bytes tests/lib.rs | 37 +++++++++++++++++++++++++++++++++++++ 3 files changed, 50 insertions(+) create mode 100644 tests/abcdef_colnames.xlsx diff --git a/src/options.rs b/src/options.rs index 12fcb01..ae34a6d 100644 --- a/src/options.rs +++ b/src/options.rs @@ -132,4 +132,17 @@ mod tests { _ => panic!("Expected range option") } } + + #[test] + fn parse_colnames_option_produces_colname() { + let (output, option) = parse_colnames_option("COLNAMES '152'").unwrap(); + + assert_eq!(output, ""); + match option { + UsingOption::ColNames(colname) => { + assert_eq!(colname, "152"); + }, + _ => panic!("Expected colnames option") + } + } } diff --git a/tests/abcdef_colnames.xlsx b/tests/abcdef_colnames.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..477c08c2693d8dec308972c79a00f673430f288c GIT binary patch literal 5372 zcmaJ_1z1$;)}9fN9CB!o?i$jeQAE1C1c3n&h9O2eq`N~H5s(y>kWfjL5|k9B89-7b zBxI!T;5q-1>-~>wKYQxg>zi+{wf6hIYw2m?;DP}J1O$Kf>G{RJhAgx=oj_fjIg}(DpMNw`?=U+kKn-7f$$a7Qtbstx zIG$jWd5dLUowS1mi=2OzYcGF?mHZ6gqORv>FXlX zVF~YuH|bs!zHIR>`=RuWClvbN{a5Tw-g;zmwJkeGoJJ3IMT@Ad(Nnq`2b@GtZ-ovI zxlSFrU0D-I7#GY{?N!d3ei0rw&RO780->>{h;A!ScPj9&Pyy{DWIScR>gqOxHr~6b znc3eI#xMJTH8L-fI)}5%k&V}OBd(im5AxAXXNQ&`YMUy^=o*`=G(E54moV0m2=ByK z!ppZ2O6)vo?W_cCG_?Ich6-h?4AM{=AwBAGy-GZ+Wn4?li815JnBiNf-qImfi|C>& zbZ$K1WJOyJDQ?3V-1l75{0`EQi+Z%XnldLepde0+-iC19q*vLqJ1^yzSx1v{bbF>> zajan8nH^s^D;+cNR{aSO0Px27Q|W;IDjhGRpDWDktZ>%NtP%Gi;GmhR>Z8`HA0(`K z3RY!A*I|!Gj8GEmZ}4zTjze9`K*!&kx+0Vmp9>Gy8EM>bW!aF%xWlQGk&Tp0(+2SD zG|@b!2s-_o^4uP=F4Oqkb=iajE-@Y}b%yyUhCozvYyuqG@|3rp09+jF0fQea5NMg` zU+E2fTpl41`60}VZdD9YU&?toiPIMXYWHI$NQcHQ7!+ECIo-WA-mM|z$`UJ>#s*Oo3#Q2hXxu86P6{)kL= zxR;pr8U*b-Xedw!nyNE(@j4{YUG$5c75>0;`dAGGz=L)RmP&E5hqTOl4bhHtSaRDnujp;6s zSg4R7fs2VrW&LB)+?aX|C8s`*@)pmT&|Uh)qYv<0MCa0mGPyN%GHs(Pel^PjOy$?i zMqs%My7*nJSxYhVw}&A5D^9tY27~yav=-*5aqpQI?v$x_SRZFNVp!L?tJn5&0<_Gs zq`PNCu4h%QnZjaCfx14ZkHmmOcCnlnv2&<$iS*0FnKdV38RZ*r*2)@2RF$@gApR<= zm>D%0KegMc#Q1Za2LsWO3~lWi75CnJ6&0Vp@7L8k65xVx{J(=yytdu?pe$G0`(2-F z*_4(~PhU@n6ctJG<_Sbm?kyr9!G=2d0on0U$@fw3QkG2ko)au!KjPH^rl7zwIQ-tb zV7~q>fXYUse3#8sG?fDH_r)xD$%M-q1T{(dOo;^11C)c9Z>qYeWVIb0b{g{xn#a(UJ(Z zHkdv*y3?xmeN`fHHKs$E@*do*;)M|&E>*Y!o-Z>taI0{f^9D=cy9+ywNRyxLBy?MFeRHbnvX zkZurRpXa4i@gaj`yIBDdG5lTl{-XS%B0KXK$GfWVd;=*T!cR7imp1*05rmT!dckW) zoIBeua#Sj+(9?lG5kU9bzW)d8eRKVhJm`Lrhu19_4Cy8E^G);&3|dU#$)BajPtZ0L zi-)PStbziwNc<90=c?RyH*{Aor$aj|-s-?{nhw9pdfyk11u+|Cx zqLM$reu7XNqO*{vlrMjYCoqgbnzL^qA1MI5@wCgJ$6W!VsDgPCn*;%dPz0v6xaiUx z)kPhUf_rHC=|ZSD16WDIjboMVkJ9tAa)W`ww%UhDuS8Qu7;wjF_2zK;jq%F|-QxLl zsQV9=rSMhQdiF}DJSKn+P*wcLNWzq)_>tgVOr2*P8LLu3BA=eU!U-1J zZZYkD50B$b0Y~24JR$R%dVX7mWsq2r&Vr#TxW+qI(K)>f^gTt37KJAIQdV8wh9Ngg zep$}FF*B+vzppEw@;?SC>r~ylqQ^`Vwx`{4~4~TzYp7 zfHuJE5$lyvWj;<1(~^Y9$MKhAimVTe%7)W3xdoWAY~_X9%-th2hPgl2GE!Jy7o3jy zA!fIDounxSAhS7<95NEzcX~P*vSuVzbFise-53&ISVAoJoWPwU?JF(`Li_b#xvL!o z1W8zZ`88fcE%x`)Q9`~8OxK(iCLhk2bt(yu35f4}QHwD)D2H-TrJ?BunN)tl_fMk0=r zIu|AtAAN9FJ7-cmMZse3ldqKpRMpW$NW;j^tmZIZ)P+9zM8^oY01`E+Gw~xYYK$OV zex7H~%=S6Z%{CMlc)R!B+o`ITdKW149Ax7@l<5i{>XUzRhLN?3`!`D91wCIVIf+QM zu&9aGf69O1)Tgn|`=XQPJAtsiG$kKN0T<=F_fqAerA6zxujU!vS<=or_Loxx$I}ng zSQxfje6O6Vx&E|o`i>&~l=njU}USzJk3L?2O$ zA6{d2y$s?lRsiJ*GdH*9Zu%6wgfdvEMKA}lx4%KtFBPuxd*B=L?c8wL;7Lfj?0o%k zy6EDHhR}}6PVNv$jxCE0IVnLVBcoezP^b5$0DeVip^ErY2;(HPSvwk)&k>MkZG$i+ zw!cB~lz3+$L3m^ZvC5K`^&`qW0&Q^V+H4aK{SV$kqO_QHV*-U93eVfc8tNY(UmndPe=%mZ=Wuw)pmwWp zNeZ#WI55_^_6TK5Hqv8qk2BVe^6pYaN~3U(za*JBCDg&9y7ZG?8WmLR$t5B;Pqc+^ zZ3Qr%(rPxm?tKGa56#d*hDOGmu2$yy{~>j&1iXS~`?y zYAy#J3rAnSRM6d7FoRyf&=9A_NeB?TWt5=dNc%NLeX$>!?zoV!B zl$Jjc+P|FD_vYfIBGB&h`Ac-2rY}VEd=ghfOW1-4S9&LO2sheQ#-3P^K@`lNSJl;#DP35>}AKyJ)8DWy3QsaB%}hO?_T2n$@@ zsugty0znj0yx4plA>7@bcW@Rm7kK2Q`~M45#R#| zfk=6T5XU7@^*24UwwEFb7B31md5>Cg;KlR+;{{n=q|3m!OUC+VLHDHXg*j9Z26-L^ z5=s4SIcuMR=`u#2^v5Hq_Spla_qSFhk2OghW;TDI5o`hQqF|><+OtBL8v)$Chu!rS z_WbjGSt=vkkT5r-jgg-_%***ne9b=HIV2qi`65zb0V)XEjdab|F! zb@W1T{97T~Ma~BJ4*PA5m-Y2vIos?`-mf2}9|a}57wxcoDzVAbH8~0YBW=ks+Ew?mM$GS=9l@xAD%n1bXA}e%!f{%`3!35 zs{tD*Tp}q~D{%4-iC7<@qy&tuRyhJ$yd{4;{F3;E!c%ss{p8eRLAxYR^1Z|C2Il1L zbsG=-eJ}SPt@y7(l{Z^u^j^7l`5)0U<=*JP92Lpb-@+B{K6x@jHFU66$=3RvY1?qz zZarMzY2Nbw%s2fml#Tx$;aQsAR^I>Sh%LR3cxPz>UH}7r&6&=RR?g;3|83`{PQN>z zZ>-NIFTacj`&YlsVSaZ$->sbW-oMNaO9EKuU!C~xuIJ_aSv&K~G%o&h{g>wE_jS&5 z^qKhoWe)iN?EimSf!|j-&s}GH{>w(O(LB46-`V~56>`sk?HR}YGCXW#{)h2?cRvp; zXNdgEvasUgANRlU`FHR05AYfM{IX?iPXFlxN@m1J(A~8vp>(); + + assert_eq!(data, vec![ + Abcd { alpha: "A".to_string(), number: 10.0, word: "ten".to_string(), kind: "even".to_string() }, + Abcd { alpha: "B".to_string(), number: 11.0, word: "eleven".to_string(), kind: "odd".to_string() }, + Abcd { alpha: "C".to_string(), number: 12.0, word: "twelve".to_string(), kind: "even".to_string() }, + Abcd { alpha: "D".to_string(), number: 13.0, word: "thirteen".to_string(), kind: "odd".to_string() }, + Abcd { alpha: "E".to_string(), number: 14.0, word: "fourteen".to_string(), kind: "even".to_string() }, + Abcd { alpha: "F".to_string(), number: 15.0, word: "fifteen".to_string(), kind: "odd".to_string() }, + ]); +}