diff --git a/crates/core/flags/defs.rs b/crates/core/flags/defs.rs index 9a196c491..d2f28ea8a 100644 --- a/crates/core/flags/defs.rs +++ b/crates/core/flags/defs.rs @@ -59,6 +59,7 @@ pub(super) const FLAGS: &[&dyn Flag] = &[ &ContextSeparator, &Count, &CountMatches, + &Histogram, &Crlf, &Debug, &DfaSizeLimit, @@ -1322,6 +1323,44 @@ given. } } +/// --histogram +#[derive(Debug)] +struct Histogram; + +impl Flag for Histogram { + fn is_switch(&self) -> bool { + false + } + fn name_short(&self) -> Option<u8> { + None + } + fn name_long(&self) -> &'static str { + "histogram" + } + fn doc_variable(&self) -> Option<&'static str> { + Some("NUM") + } + fn doc_category(&self) -> Category { + Category::OutputModes + } + fn doc_short(&self) -> &'static str { + r"Print a histogram of the matches" + } + fn doc_long(&self) -> &'static str { + r" +The offset of the match and the specified bin size +(NUM) of this argument are used to determine which bin gets +incremented for every match." + } + + fn update(&self, v: FlagValue, args: &mut LowArgs) -> anyhow::Result<()> { + let binsize = convert::u64(&v.unwrap_value())?; + args.histogram_bin_size = Some(binsize); + args.mode.update(Mode::Search(SearchMode::Histogram)); + Ok(()) + } +} + #[cfg(test)] #[test] fn test_count_matches() { diff --git a/crates/core/flags/hiargs.rs b/crates/core/flags/hiargs.rs index df09dceda..a9feca761 100644 --- a/crates/core/flags/hiargs.rs +++ b/crates/core/flags/hiargs.rs @@ -55,6 +55,7 @@ pub(crate) struct HiArgs { follow: bool, globs: ignore::overrides::Override, heading: bool, + histogram_bin_size: Option<u64>, hidden: bool, hyperlink_config: grep::printer::HyperlinkConfig, ignore_file_case_insensitive: bool, @@ -203,6 +204,7 @@ impl HiArgs { SearchMode::FilesWithMatches | SearchMode::FilesWithoutMatch | SearchMode::Count + | SearchMode::Histogram | SearchMode::CountMatches => return false, SearchMode::JSON => return true, SearchMode::Standard => { @@ -272,6 +274,7 @@ impl HiArgs { follow: low.follow, heading, hidden: low.hidden, + histogram_bin_size: low.histogram_bin_size, hyperlink_config, ignore_file: low.ignore_file, ignore_file_case_insensitive: low.ignore_file_case_insensitive, @@ -569,6 +572,10 @@ impl HiArgs { SearchMode::FilesWithoutMatch => SummaryKind::PathWithoutMatch, SearchMode::Count => SummaryKind::Count, SearchMode::CountMatches => SummaryKind::CountMatches, + SearchMode::Histogram => SummaryKind::Histogram( + self.histogram_bin_size + .expect("Histogram bin size must be specified"), + ), SearchMode::JSON => { return Printer::JSON(self.printer_json(wtr)) } diff --git a/crates/core/flags/lowargs.rs b/crates/core/flags/lowargs.rs index 184c96ae8..25da1af4d 100644 --- a/crates/core/flags/lowargs.rs +++ b/crates/core/flags/lowargs.rs @@ -59,6 +59,7 @@ pub(crate) struct LowArgs { pub(crate) globs: Vec<String>, pub(crate) heading: Option<bool>, pub(crate) hidden: bool, + pub(crate) histogram_bin_size: Option<u64>, pub(crate) hostname_bin: Option<PathBuf>, pub(crate) hyperlink_format: HyperlinkFormat, pub(crate) iglobs: Vec<String>, @@ -209,6 +210,8 @@ pub(crate) enum SearchMode { /// Show files containing at least one match and the total number of /// matches. CountMatches, + /// Show a histogram of the matches + Histogram, /// Print matches in a JSON lines format. JSON, } diff --git a/crates/printer/src/stats.rs b/crates/printer/src/stats.rs index 555401b3d..7fcfe2f34 100644 --- a/crates/printer/src/stats.rs +++ b/crates/printer/src/stats.rs @@ -1,4 +1,5 @@ use std::{ + collections::HashMap, ops::{Add, AddAssign}, time::Duration, }; @@ -17,6 +18,7 @@ pub struct Stats { bytes_searched: u64, bytes_printed: u64, matched_lines: u64, + histogram: HashMap<u64, u64>, matches: u64, } @@ -33,6 +35,11 @@ impl Stats { self.elapsed.0 } + /// Returns a reference to the histogram + pub fn histogram(&self) -> &HashMap<u64, u64> { + &self.histogram + } + /// Return the total number of searches executed. pub fn searches(&self) -> u64 { self.searches @@ -102,6 +109,11 @@ impl Stats { pub fn add_matches(&mut self, n: u64) { self.matches += n; } + + /// Add to the total number of matches. + pub fn increment_histogram(&mut self, entry: u64) { + self.histogram.entry(entry).and_modify(|c| *c += 1).or_insert(1); + } } impl Add for Stats { @@ -125,6 +137,14 @@ impl<'a> Add<&'a Stats> for Stats { bytes_printed: self.bytes_printed + rhs.bytes_printed, matched_lines: self.matched_lines + rhs.matched_lines, matches: self.matches + rhs.matches, + histogram: self + .histogram + .into_iter() + .chain(rhs.histogram.clone()) + .fold(std::collections::HashMap::new(), |mut acc, (k, v)| { + acc.entry(k).and_modify(|e| *e += v).or_insert(v); + acc + }), } } } diff --git a/crates/printer/src/summary.rs b/crates/printer/src/summary.rs index 275419d4c..2e5cd8bbf 100644 --- a/crates/printer/src/summary.rs +++ b/crates/printer/src/summary.rs @@ -71,6 +71,8 @@ pub enum SummaryKind { /// If the `path` setting is enabled, then the count is prefixed by the /// corresponding file path. CountMatches, + /// Show a histogram of the matches + Histogram(u64), /// Show only the file path if and only if a match was found. /// /// This ignores the `path` setting and always shows the file path. If no @@ -101,7 +103,7 @@ impl SummaryKind { match *self { PathWithMatch | PathWithoutMatch => true, - Count | CountMatches | Quiet => false, + Count | CountMatches | Histogram { .. } | Quiet => false, } } @@ -111,7 +113,7 @@ impl SummaryKind { use self::SummaryKind::*; match *self { - CountMatches => true, + Histogram { .. } | CountMatches => true, Count | PathWithMatch | PathWithoutMatch | Quiet => false, } } @@ -123,7 +125,9 @@ impl SummaryKind { match *self { PathWithMatch | Quiet => true, - Count | CountMatches | PathWithoutMatch => false, + Count | CountMatches | Histogram { .. } | PathWithoutMatch => { + false + } } } } @@ -682,6 +686,13 @@ impl<'p, 's, M: Matcher, W: WriteColor> Sink for SummarySink<'p, 's, M, W> { if let Some(ref mut stats) = self.stats { stats.add_matches(sink_match_count); stats.add_matched_lines(mat.lines().count() as u64); + + if let SummaryKind::Histogram(bin_size) = self.summary.config.kind + { + stats.increment_histogram( + mat.absolute_byte_offset() / bin_size, + ); + } } else if self.summary.config.kind.quit_early() { return Ok(false); } @@ -788,6 +799,32 @@ impl<'p, 's, M: Matcher, W: WriteColor> Sink for SummarySink<'p, 's, M, W> { self.write_line_term(searcher)?; } } + SummaryKind::Histogram(bin_size) => { + let stats = self + .stats + .as_ref() + .expect("Histogram should enable stats tracking"); + if self.match_count > 0 { + let bin_iter = 0..=(stats.bytes_searched() / bin_size); + let terminal_str = bin_iter + .map(|i| { + stats + .histogram() + .get(&i) + .unwrap_or(&0) + .to_string() + .into_bytes() + }) + .collect::<Vec<Vec<u8>>>() + .join(searcher.line_terminator().as_bytes()); + if self.path.is_some() { + self.write_path_field()?; + self.write_line_term(searcher)?; + } + self.write(&terminal_str)?; + self.write_line_term(searcher)?; + } + } SummaryKind::PathWithMatch => { if self.match_count > 0 { self.write_path_line(searcher)?;