diff --git a/Cargo.toml b/Cargo.toml index 5c5a647..e36fafd 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -22,7 +22,8 @@ regex = { version = "1.10", default-features = false, features = ["std", "unicod serde_json = "1.0.113" [features] -default = ["regex"] +default = ["regex", "fast-lane"] +fast-lane = [] [dev-dependencies] assert_cmd = "2.0.13" diff --git a/README.md b/README.md index a03d329..567fca6 100644 --- a/README.md +++ b/README.md @@ -53,7 +53,8 @@ FLAGS: OPTIONS: -f, --fields Fields to keep, 1-indexed, comma separated. - Use colon to include everything in a range. + Use colon (:) to match a range (inclusive). + Use equal (=) to apply out of bound fallback. Fields can be negative (-1 is the last field). [default 1:] @@ -64,6 +65,7 @@ OPTIONS: -f 3,2 => cb -f 3,1:2 => ca-b -f -3:-2 => b-c + -f 1,8=fallback => afallback To re-apply the delimiter add -j, to replace it add -r (followed by the new delimiter). @@ -84,6 +86,10 @@ OPTIONS: -r, --replace-delimiter Replace the delimiter with the provided text -t, --trim Trim the delimiter (greedy). Valid values are (l|L)eft, (r|R)ight, (b|B)oth + --fallback-oob Generic fallback output for any field that + cannot be found (oob stands for out of bound). + It's overridden by any fallback assigned to a + specific field (see -f for help) Options precedence: --trim and --compress-delimiter are applied before --fields or similar diff --git a/doc/tuc.1 b/doc/tuc.1 index e5a33b1..40efcf3 100644 --- a/doc/tuc.1 +++ b/doc/tuc.1 @@ -67,7 +67,11 @@ Print fields as a JSON array of strings .PD 0 .P .PD -\ \ \ \ \ \ \ Use colon to include everything in a range. +\ \ \ \ \ \ \ Use colon (:) to match a range (inclusive). +.PD 0 +.P +.PD +\ \ \ \ \ \ \ Use equal (=) to apply out of bound fallback. .PD 0 .P .PD @@ -100,6 +104,10 @@ Print fields as a JSON array of strings .P .PD \ \ \ \ \ \ \ \ \ \f[V]-f -3:-2 => b-c\f[R] +.PD 0 +.P +.PD +\ \ \ \ \ \ \ \ \ \f[V]-f 1,8=fallback => afallback\f[R] .PP \ \ \ \ \ \ \ To re-apply the delimiter add -j, to replace .PD 0 @@ -173,6 +181,24 @@ To merge lines, use --no-join .P .PD \ \ \ \ \ \ \ Valid values are (l|L)eft, (r|R)ight, (b|B)oth +.PP +\ \ \ \ \f[B]--fallback-oob\f[R] [fallback] +.PD 0 +.P +.PD +\ \ \ \ \ \ \ Generic fallback output for any field that +.PD 0 +.P +.PD +\ \ \ \ \ \ \ cannot be found (oob stands for out of bound). +.PD 0 +.P +.PD +\ \ \ \ \ \ \ It\[cq]s overridden by any fallback assigned to a +.PD 0 +.P +.PD +\ \ \ \ \ \ \ specific field (see -f for help) .SH OPTIONS PRECEDENCE .PP --trim and --compress-delimiter are applied before --fields or similar @@ -181,15 +207,25 @@ To merge lines, use --no-join --characters and --fields read and allocate memory one line at a time .PP --lines allocate memory one line at a time as long as the requested -fields are ordered and non-negative (e.g.\ -l 1,3:4,4,7), otherwise it -allocates the whole input in memory (it also happens when -p or -m are -being used) +fields are +.PD 0 +.P +.PD +ordered and non-negative (e.g.\ -l 1,3:4,4,7), otherwise it allocates +.PD 0 +.P +.PD +the whole input in memory (it also happens when -p or -m are being used) .PP --bytes allocate the whole input in memory .SH COLORS .PP Help is displayed using colors. -Colors will be suppressed in the following circumstances: +Colors will be suppressed in the +.PD 0 +.P +.PD +following circumstances: .IP \[bu] 2 when the TERM environment variable is not set or set to \[lq]dumb\[rq] .IP \[bu] 2 diff --git a/doc/tuc.1.md b/doc/tuc.1.md index f264785..f47ebbe 100644 --- a/doc/tuc.1.md +++ b/doc/tuc.1.md @@ -54,7 +54,8 @@ OPTIONS | **-f**, **\--fields** [bounds] | Fields to keep, 1-indexed, comma separated. -| Use colon to include everything in a range. +| Use colon (:) to match a range (inclusive). +| Use equal (=) to apply out of bound fallback. | Fields can be negative (-1 is the last field). | [default 1:] @@ -66,6 +67,7 @@ OPTIONS | `-f 3,2 => cb` | `-f 3,1:2 => ca-b` | `-f -3:-2 => b-c` +| `-f 1,8=fallback => afallback` | To re-apply the delimiter add -j, to replace | it add -r (followed by the new delimiter) @@ -100,6 +102,12 @@ OPTIONS | Trim the delimiter (greedy). | Valid values are (l|L)eft, (r|R)ight, (b|B)oth +| **\--fallback-oob** [fallback] +| Generic fallback output for any field that +| cannot be found (oob stands for out of bound). +| It's overridden by any fallback assigned to a +| specific field (see -f for help) + OPTIONS PRECEDENCE ================== @@ -110,17 +118,17 @@ MEMORY CONSUMPTION \--characters and \--fields read and allocate memory one line at a time -\--lines allocate memory one line at a time as long as the requested fields are - ordered and non-negative (e.g. -l 1,3:4,4,7), otherwise it allocates - the whole input in memory (it also happens when -p or -m are being used) +| \--lines allocate memory one line at a time as long as the requested fields are +| ordered and non-negative (e.g. -l 1,3:4,4,7), otherwise it allocates +| the whole input in memory (it also happens when -p or -m are being used) \--bytes allocate the whole input in memory COLORS ====== -Help is displayed using colors. Colors will be suppressed in the -following circumstances: +| Help is displayed using colors. Colors will be suppressed in the +| following circumstances: - when the TERM environment variable is not set or set to "dumb" - when the NO_COLOR environment variable is set (regardless of value) diff --git a/src/bin/tuc.rs b/src/bin/tuc.rs index 21d2119..f309c37 100644 --- a/src/bin/tuc.rs +++ b/src/bin/tuc.rs @@ -7,16 +7,41 @@ use tuc::bounds::{BoundOrFiller, BoundsType, UserBoundsList}; use tuc::cut_bytes::read_and_cut_bytes; use tuc::cut_lines::read_and_cut_lines; use tuc::cut_str::read_and_cut_str; -use tuc::fast_lane::{read_and_cut_text_as_bytes, FastOpt}; use tuc::help::{get_help, get_short_help}; use tuc::options::{Opt, EOL}; +#[cfg(feature = "fast-lane")] +use tuc::fast_lane::{read_and_cut_text_as_bytes, FastOpt}; + #[cfg(feature = "regex")] use tuc::options::RegexBag; #[cfg(feature = "regex")] use regex::bytes::Regex; +#[cfg(not(feature = "fast-lane"))] +struct FastOpt {} + +#[cfg(not(feature = "fast-lane"))] +impl<'a> TryFrom<&'a Opt> for FastOpt { + type Error = &'static str; + + fn try_from(_value: &'a Opt) -> Result { + Err("This binary was not compiled with the feature fast-lane") + } +} + +#[cfg(not(feature = "fast-lane"))] +fn read_and_cut_text_as_bytes( + _stdin: &mut R, + _stdout: &mut W, + _fast_opt: &FastOpt, +) -> Result<()> { + Err(anyhow::Error::msg( + "This binary was not compiled with the feature fast-lane", + )) +} + fn parse_args() -> Result { let mut pargs = pico_args::Arguments::from_env(); @@ -185,6 +210,18 @@ fn parse_args() -> Result { bounds, replace_delimiter, trim: pargs.opt_value_from_str(["-t", "--trim"])?, + fallback_oob: pargs + .opt_value_from_str("--fallback-oob") + .or_else(|e| match e { + pico_args::Error::OptionWithoutAValue(_) => { + // We must consume the arg ourselves (it's not done on error) + pargs.contains("--fallback-oob="); + + Ok(Some("".into())) + } + _ => Err(e), + })? + .map(|x: String| x.into()), regex_bag, }; diff --git a/src/bounds.rs b/src/bounds.rs index d0aec39..67040fc 100644 --- a/src/bounds.rs +++ b/src/bounds.rs @@ -231,24 +231,77 @@ impl UserBoundsList { self.is_sortable() && self.is_sorted() && !self.has_negative_indices() } - /** - * Create a new UserBoundsList with only the bounds (no fillers) - * and with every ranged bound converted into single slot bounds. - */ + /// Create a new UserBoundsList with every ranged bound converted + /// into single-field bounds. + /// + /// ```rust + /// # use tuc::bounds::{UserBoundsList, UserBoundsTrait}; + /// # use std::ops::Range; + /// # use tuc::bounds::Side; + /// # use std::str::FromStr; + /// + /// assert_eq!( + /// UserBoundsList::from_str("1:3,4,-2:").unwrap().unpack(6).list, + /// UserBoundsList::from_str("1,2,3,4,5,6").unwrap().list, + /// ); + /// ``` pub fn unpack(&self, num_fields: usize) -> UserBoundsList { let list: Vec = self .list .iter() - .filter_map(|x| match x { - BoundOrFiller::Filler(_) => None, - BoundOrFiller::Bound(b) => Some(b.unpack(num_fields)), + .flat_map(|bof| match bof { + // XXX how to do it using only iterators, no collect? + BoundOrFiller::Bound(b) => b + .unpack(num_fields) + .into_iter() + .map(BoundOrFiller::Bound) + .collect(), + BoundOrFiller::Filler(f) => vec![BoundOrFiller::Filler(f.clone())], }) - .flatten() - .map(BoundOrFiller::Bound) .collect(); list.into() } + + /// Create a new UserBoundsList with every range complemented (inverted). + pub fn complement(&self, num_fields: usize) -> Result { + let list: Vec = self + .list + .iter() + .flat_map(|bof| match bof { + // XXX how to do it using only iterators, no collect? + BoundOrFiller::Bound(b) => anyhow::Ok( + b.complement(num_fields)? + .into_iter() + .map(BoundOrFiller::Bound) + .collect(), + ), + BoundOrFiller::Filler(f) => Ok(vec![BoundOrFiller::Filler(f.clone())]), + }) + .flatten() + .collect(); + + if list.is_empty() { + bail!("the complement is empty"); + } + + Ok(list.into()) + } +} + +fn complement_std_range(parts_length: usize, r: &Range) -> Vec> { + match (r.start, r.end) { + // full match => no match + (0, end) if end == parts_length => Vec::new(), + // match left side => match right side + #[allow(clippy::single_range_in_vec_init)] + (0, right) => vec![right..parts_length], + // match right side => match left side + #[allow(clippy::single_range_in_vec_init)] + (left, end) if end == parts_length => vec![0..left], + // match middle of string => match before and after + (left, right) => vec![0..left, right..parts_length], + } } #[derive(Debug, Clone, Copy, PartialEq, Eq)] @@ -302,6 +355,7 @@ pub struct UserBounds { pub l: Side, pub r: Side, pub is_last: bool, + pub fallback_oob: Option>, } impl fmt::Display for UserBounds { @@ -324,6 +378,13 @@ impl FromStr for UserBounds { bail!("Field format error, no numbers next to `:`"); } + let mut fallback_oob: Option> = None; + let mut s = s; + if let Some((range_part, fallback)) = s.split_once('=') { + fallback_oob = Some(fallback.into()); + s = range_part; + } + let (l, r) = match s.find(':') { None => { let side = Side::from_str(s)?; @@ -356,15 +417,35 @@ impl FromStr for UserBounds { _ => (), } - Ok(UserBounds::new(l, r)) + let mut b = UserBounds::new(l, r); + b.fallback_oob = fallback_oob; + Ok(b) + } +} + +impl From> for UserBounds { + fn from(value: Range) -> Self { + let start: i32 = value + .start + .try_into() + .expect("range was bigger than expected"); + + let end: i32 = value + .end + .try_into() + .expect("range was bigger than expected"); + + UserBounds::new(Side::Some(start + 1), Side::Some(end)) } } pub trait UserBoundsTrait { fn new(l: Side, r: Side) -> Self; + fn with_fallback(l: Side, r: Side, fallback_oob: Option>) -> Self; fn try_into_range(&self, parts_length: usize) -> Result>; fn matches(&self, idx: T) -> Result; fn unpack(&self, num_fields: usize) -> Vec; + fn complement(&self, num_fields: usize) -> Result>; } impl UserBoundsTrait for UserBounds { @@ -373,8 +454,19 @@ impl UserBoundsTrait for UserBounds { l, r, is_last: false, + fallback_oob: None, + } + } + + fn with_fallback(l: Side, r: Side, fallback_oob: Option>) -> Self { + UserBounds { + l, + r, + is_last: false, + fallback_oob, } } + /** * Check if a field is between the bounds. * @@ -498,6 +590,13 @@ impl UserBoundsTrait for UserBounds { bounds } + + /// Transform a bound in its complement (invert the bound). + fn complement(&self, num_fields: usize) -> Result> { + let r = self.try_into_range(num_fields)?; + let r_complement = complement_std_range(num_fields, &r); + Ok(r_complement.into_iter().map(|x| x.into()).collect()) + } } impl PartialOrd for UserBounds { @@ -526,6 +625,29 @@ impl Default for UserBounds { mod tests { use super::*; + #[test] + fn test_complement_std_range() { + // remember, it assumes that ranges are "legit" (not out of bounds) + + let empty_vec: Vec> = vec![]; + + // test 1-long string + assert_eq!(complement_std_range(1, &(0..1)), empty_vec); + + // test ranges that reach left or right bounds + assert_eq!(complement_std_range(5, &(0..5)), empty_vec); + assert_eq!(complement_std_range(5, &(0..3)), vec![3..5]); + assert_eq!(complement_std_range(5, &(3..5)), vec![0..3]); + + // test internal range + assert_eq!(complement_std_range(5, &(1..3)), vec![0..1, 3..5]); + + // test 2-long string + assert_eq!(complement_std_range(2, &(0..2)), empty_vec); + assert_eq!(complement_std_range(2, &(0..1)), vec![1..2]); + assert_eq!(complement_std_range(2, &(1..2)), vec![0..1]); + } + #[test] fn test_user_bounds_formatting() { assert_eq!( @@ -585,6 +707,60 @@ mod tests { Some(UserBounds::new(Side::Continue, Side::Some(-1))), ); + assert_eq!( + UserBounds::from_str("1").ok(), + Some(UserBounds::with_fallback( + Side::Some(1), + Side::Some(1), + None + )), + ); + + assert_eq!( + UserBounds::from_str("1=foo").ok(), + Some(UserBounds::with_fallback( + Side::Some(1), + Side::Some(1), + Some("foo".as_bytes().to_owned()) + )), + ); + + assert_eq!( + UserBounds::from_str("1:2=foo").ok(), + Some(UserBounds::with_fallback( + Side::Some(1), + Side::Some(2), + Some("foo".as_bytes().to_owned()) + )), + ); + + assert_eq!( + UserBounds::from_str("-1=foo").ok(), + Some(UserBounds::with_fallback( + Side::Some(-1), + Side::Some(-1), + Some("foo".as_bytes().to_owned()) + )), + ); + + assert_eq!( + UserBounds::from_str("1=allow:colon:in:fallback").ok(), + Some(UserBounds::with_fallback( + Side::Some(1), + Side::Some(1), + Some("allow:colon:in:fallback".as_bytes().to_owned()) + )), + ); + + assert_eq!( + UserBounds::from_str("1:2=allow:colon:in:fallback").ok(), + Some(UserBounds::with_fallback( + Side::Some(1), + Side::Some(2), + Some("allow:colon:in:fallback".as_bytes().to_owned()) + )), + ); + { #![allow(clippy::bind_instead_of_map)] assert_eq!( @@ -769,6 +945,33 @@ mod tests { ); } + #[test] + fn test_complement_bound() { + assert_eq!( + UserBounds::new(Side::Some(1), Side::Some(1)) + .complement(2) + .unwrap(), + vec![UserBounds::new(Side::Some(2), Side::Some(2))], + ); + + assert_eq!( + UserBounds::new(Side::Some(1), Side::Continue) + .complement(2) + .unwrap(), + Vec::new(), + ); + + assert_eq!( + UserBounds::new(Side::Some(-3), Side::Some(3)) + .complement(4) + .unwrap(), + vec![ + UserBounds::new(Side::Some(1), Side::Some(1)), + UserBounds::new(Side::Some(4), Side::Some(4)), + ], + ); + } + #[test] fn test_user_bounds_cannot_be_empty() { assert!(UserBoundsList::from_str("").is_err()); @@ -834,14 +1037,42 @@ mod tests { ); assert_eq!( - UserBoundsList::from_str("a{1}b{2}c") + UserBoundsList::from_str("a{1:2}b").unwrap().unpack(4).list, + vec![ + BoundOrFiller::Filler(String::from("a")), + BoundOrFiller::Bound(UserBounds::new(Side::Some(1), Side::Some(1))), + BoundOrFiller::Bound(UserBounds::new(Side::Some(2), Side::Some(2))), + BoundOrFiller::Filler(String::from("b")), + ] + ); + } + + #[test] + fn test_vec_of_bounds_can_complement() { + assert_eq!( + UserBoundsList::from_str("1:2,2:3,5,-2") + .unwrap() + .complement(6) .unwrap() - .unpack(4) .list, vec![ + BoundOrFiller::Bound(UserBounds::new(Side::Some(3), Side::Some(6))), BoundOrFiller::Bound(UserBounds::new(Side::Some(1), Side::Some(1))), - BoundOrFiller::Bound(UserBounds::new(Side::Some(2), Side::Some(2))), + BoundOrFiller::Bound(UserBounds::new(Side::Some(4), Side::Some(6))), + BoundOrFiller::Bound(UserBounds::new(Side::Some(1), Side::Some(4))), + BoundOrFiller::Bound(UserBounds::new(Side::Some(6), Side::Some(6))), + BoundOrFiller::Bound(UserBounds::new(Side::Some(1), Side::Some(4))), + BoundOrFiller::Bound(UserBounds::new(Side::Some(6), Side::Some(6))), ] ); + + assert_eq!( + UserBoundsList::from_str("1:") + .unwrap() + .complement(6) + .err() + .map(|x| x.to_string()), + Some("the complement is empty".to_owned()) + ); } } diff --git a/src/cut_str.rs b/src/cut_str.rs index 96ed79c..7d29f9c 100644 --- a/src/cut_str.rs +++ b/src/cut_str.rs @@ -10,21 +10,6 @@ use crate::options::{Opt, Trim, EOL}; #[cfg(feature = "regex")] use regex::bytes::Regex; -fn complement_std_range(parts_length: usize, r: &Range) -> Vec> { - match (r.start, r.end) { - // full match => no match - (0, end) if end == parts_length => Vec::new(), - // match left side => match right side - #[allow(clippy::single_range_in_vec_init)] - (0, right) => vec![right..parts_length], - // match right side => match left side - #[allow(clippy::single_range_in_vec_init)] - (left, end) if end == parts_length => vec![0..left], - // match middle of string => match before and after - (left, right) => vec![0..left, right..parts_length], - } -} - /// Split a string into parts and fill a buffer with ranges /// that match those parts. /// @@ -361,18 +346,28 @@ pub fn cut_str( stdout.write_all(b"[")?; } - let _bounds: UserBoundsList; + let mut _bounds: UserBoundsList; let mut bounds = &opt.bounds; - if opt.bounds_type == BoundsType::Characters && opt.replace_delimiter.is_some() { - // Unpack bounds such as 1:3 or 2: into single character bounds + if opt.complement { + _bounds = bounds.complement(num_fields)?; + bounds = &_bounds; + + if bounds.is_empty() { + // If the original bounds matched all the fields, the complement is empty + if !opt.only_delimited { + stdout.write_all(eol)?; + } + return Ok(()); + } + } + + if opt.json || (opt.bounds_type == BoundsType::Characters && opt.replace_delimiter.is_some()) { + // Unpack bounds such as 1:3 or 2: into single-field bounds // such as 1:1,2:2,3:3 etc... - // We need it to be able to insert a replace character between every field. - // It can cost quite a bit and is risky because it may end up creating a - // char vector of the whole input (then again -c with -r is quite the - // rare usage). - // Start by checking if we actually need to rewrite the bounds + // Start by checking if we actually need to rewrite the bounds, since + // it's an expensive operation. if bounds.iter().any(|b| { matches!( b, @@ -380,10 +375,10 @@ pub fn cut_str( l: x, r: y, is_last: _, + fallback_oob: _, }) if x != y || x == &Side::Continue ) }) { - // Yep, there at least a range bound. Let's do it _bounds = bounds.unpack(num_fields); bounds = &_bounds; } @@ -406,42 +401,31 @@ pub fn cut_str( BoundOrFiller::Bound(b) => b, }; - let mut r_array = vec![b.try_into_range(num_fields)?]; - - if opt.complement { - r_array = complement_std_range(num_fields, &r_array[0]); - } - - if opt.json { - r_array = r_array - .iter() - .flat_map(|r| r.start..r.end) - .map(|i| Range { - start: i, - end: i + 1, - }) - .collect(); - } - - let r_iter = r_array.iter(); - let n_ranges = r_array.len(); + let r = b.try_into_range(num_fields); - for (idx_r, r) in r_iter.enumerate() { + let output = if r.is_ok() { + let r = r.unwrap(); let idx_start = fields[r.start].start; let idx_end = fields[r.end - 1].end; - let output = &line[idx_start..idx_end]; - - let field_to_print = maybe_replace_delimiter(output, opt); - write_maybe_as_json!(stdout, field_to_print, opt.json); - - if opt.join && !(i == bounds.len() - 1 && idx_r == n_ranges - 1) { - stdout.write_all( - opt.replace_delimiter - .as_ref() - .unwrap_or(&opt.delimiter) - .as_bytes(), - )?; - } + &line[idx_start..idx_end] + } else if b.fallback_oob.is_some() { + b.fallback_oob.as_ref().unwrap() + } else if let Some(generic_fallback) = &opt.fallback_oob { + generic_fallback + } else { + return Err(r.unwrap_err()); + }; + + let field_to_print = maybe_replace_delimiter(output, opt); + write_maybe_as_json!(stdout, field_to_print, opt.json); + + if opt.join && i != bounds.len() - 1 { + stdout.write_all( + opt.replace_delimiter + .as_ref() + .unwrap_or(&opt.delimiter) + .as_bytes(), + )?; } Ok(()) @@ -540,29 +524,6 @@ mod tests { } } - #[test] - fn test_complement_std_range() { - // remember, it assumes that ranges are "legit" (not out of bounds) - - let empty_vec: Vec> = vec![]; - - // test 1-long string - assert_eq!(complement_std_range(1, &(0..1)), empty_vec); - - // test ranges that reach left or right bounds - assert_eq!(complement_std_range(5, &(0..5)), empty_vec); - assert_eq!(complement_std_range(5, &(0..3)), vec![3..5]); - assert_eq!(complement_std_range(5, &(3..5)), vec![0..3]); - - // test internal range - assert_eq!(complement_std_range(5, &(1..3)), vec![0..1, 3..5]); - - // test 2-long string - assert_eq!(complement_std_range(2, &(0..2)), empty_vec); - assert_eq!(complement_std_range(2, &(0..1)), vec![1..2]); - assert_eq!(complement_std_range(2, &(1..2)), vec![0..1]); - } - #[test] fn test_fill_with_fields_locations() { let mut v_range: Vec> = Vec::new(); @@ -1023,7 +984,6 @@ mod tests { opt.regex_bag = Some(make_regex_bag()); cut_str(line, &opt, &mut output, &mut buffer1, &mut buffer2, eol).unwrap(); - dbg!(std::str::from_utf8(&output).unwrap()); assert_eq!(output, b"b..,,c\n".as_slice()); } diff --git a/src/fast_lane.rs b/src/fast_lane.rs index 2ebd09f..709a71c 100644 --- a/src/fast_lane.rs +++ b/src/fast_lane.rs @@ -103,16 +103,25 @@ fn output_parts( stdout: &mut W, opt: &FastOpt, ) -> Result<()> { - let r = b.try_into_range(fields.len())?; + let r = b.try_into_range(fields.len()); - let idx_start = if r.start == 0 { - 0 + let output = if r.is_ok() { + let r = r.unwrap(); + + let idx_start = if r.start == 0 { + 0 + } else { + fields[r.start - 1] + 1 + }; + let idx_end = fields[r.end - 1]; + &line[idx_start..idx_end] + } else if b.fallback_oob.is_some() { + b.fallback_oob.as_ref().unwrap() + } else if let Some(generic_fallback) = opt.fallback_oob { + generic_fallback } else { - fields[r.start - 1] + 1 + return Err(r.unwrap_err()); }; - let idx_end = fields[r.end - 1]; - - let output = &line[idx_start..idx_end]; let field_to_print = output; stdout.write_all(field_to_print)?; @@ -132,6 +141,7 @@ pub struct FastOpt<'a> { bounds: &'a UserBoundsList, only_delimited: bool, trim: Option, + fallback_oob: Option<&'a [u8]>, } impl<'a> TryFrom<&'a Opt> for FastOpt<'a> { @@ -163,6 +173,7 @@ impl<'a> TryFrom<&'a Opt> for FastOpt<'a> { bounds: &value.bounds, only_delimited: value.only_delimited, trim: value.trim, + fallback_oob: value.fallback_oob.as_deref(), }) } } @@ -213,6 +224,7 @@ mod tests { bounds, only_delimited: false, trim: None, + fallback_oob: None, } } diff --git a/src/help.rs b/src/help.rs index 0135213..e7a174d 100644 --- a/src/help.rs +++ b/src/help.rs @@ -29,7 +29,8 @@ FLAGS: OPTIONS: -f, --fields Fields to keep, 1-indexed, comma separated. - Use colon to include everything in a range. + Use colon (:) to match a range (inclusive). + Use equal (=) to apply out of bound fallback. Fields can be negative (-1 is the last field). [default: 1:] @@ -40,6 +41,7 @@ OPTIONS: -f 3,2 => cb -f 3,1:2 => ca-b -f -3:-2 => b-c + -f 1,8=fallback => afallback To re-apply the delimiter add -j, to replace it add -r (followed by the new delimiter). @@ -61,6 +63,10 @@ OPTIONS: Implies --join -t, --trim Trim the delimiter (greedy). Valid values are (l|L)eft, (r|R)ight, (b|B)oth + --fallback-oob Generic fallback output for any field that + cannot be found (oob stands for out of bound). + It's overridden by any fallback assigned to a + specific field (see -f for help) Options precedence: --trim and --compress-delimiter are applied before --fields or similar @@ -126,7 +132,7 @@ fn get_colored_help(text: &str) -> String { .replace_all(text, "\x1b[33m$0\x1b[0m"); // any example using "-f something" - let text = Regex::new(r#"-(f|l) ('.+'|[0-9,:-]+)"#) + let text = Regex::new(r#"-(f|l) ('.+'|[0-9,:-]+(=[^\s]+)?)"#) .unwrap() .replace_all(&text, "-$1 \x1b[33m$2\x1b[0m"); diff --git a/src/lib.rs b/src/lib.rs index cf1c9e0..0e91263 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -2,6 +2,7 @@ pub mod bounds; pub mod cut_bytes; pub mod cut_lines; pub mod cut_str; +#[cfg(feature = "fast-lane")] pub mod fast_lane; pub mod help; pub mod options; diff --git a/src/options.rs b/src/options.rs index bace495..f116df7 100644 --- a/src/options.rs +++ b/src/options.rs @@ -43,6 +43,7 @@ pub struct Opt { pub complement: bool, pub join: bool, pub json: bool, + pub fallback_oob: Option>, #[cfg(feature = "regex")] pub regex_bag: Option, #[cfg(not(feature = "regex"))] @@ -65,6 +66,7 @@ impl Default for Opt { complement: false, join: false, json: false, + fallback_oob: None, regex_bag: None, } } diff --git a/tests/cli.rs b/tests/cli.rs index 1d9739c..15b9e63 100644 --- a/tests/cli.rs +++ b/tests/cli.rs @@ -159,7 +159,7 @@ fn it_can_complement_the_fields() { .write_stdin("a b c") .assert(); - assert.success().stdout("\n"); + assert.failure().stderr("Error: the complement is empty\n"); } #[test] @@ -260,13 +260,18 @@ fn it_format_fields() { let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME")).unwrap(); let assert = cmd - .args(["-f", "Say {1} to our {2}.\nJust {{saying}}"]) + .args([ + "--fallback-oob", + "generic fallback", + "-f", + "Say {1} to our {2}.\nJust {{saying}} {3=dedicated fallback} {4}", + ]) .write_stdin("hello\tworld") .assert(); assert .success() - .stdout("Say hello to our world.\nJust {saying}\n"); + .stdout("Say hello to our world.\nJust {saying} dedicated fallback generic fallback\n"); } #[test]