From ffd8af9b993882ce73e26704f01b3ea9fcca58f8 Mon Sep 17 00:00:00 2001 From: marius david Date: Wed, 8 May 2024 19:59:25 +0200 Subject: [PATCH 1/4] bgrep: Add a recursive option --- src/bgrepapp.rs | 127 +++++++++++++++++++++++++++++++++++++----------- 1 file changed, 98 insertions(+), 29 deletions(-) diff --git a/src/bgrepapp.rs b/src/bgrepapp.rs index bd4c9cf..7c83dcd 100644 --- a/src/bgrepapp.rs +++ b/src/bgrepapp.rs @@ -2,7 +2,7 @@ use crate::applet::Applet; use anyhow::{bail, Context, Result}; use clap::{arg, Command}; use memmap2::Mmap; -use std::fs::{self, File}; +use std::{collections::BTreeSet, fs::{self, read_dir, File}, path::PathBuf}; use regex::bytes::{Regex, RegexBuilder}; @@ -23,6 +23,7 @@ pub struct BgrepApplet { files: Option>, pattern: Option, verbose: bool, + recursive: bool } impl Applet for BgrepApplet { @@ -43,6 +44,7 @@ impl Applet for BgrepApplet { .about(self.description()) .arg(arg!(-v --verbose "verbose")) .arg(arg!(-x --hex "pattern is hex")) + .arg(arg!(-r --recursive "search in subfolders")) .arg(arg!( "pattern to search")) .arg(arg!( "file to search").num_args(1..)) } @@ -56,6 +58,7 @@ impl Applet for BgrepApplet { files: None, pattern: None, verbose: false, + recursive: false }) } @@ -88,40 +91,81 @@ impl Applet for BgrepApplet { files: Some(filenames), pattern: Some(pattern), verbose: args.get_flag("verbose"), + recursive: args.get_flag("recursive") })) } fn process(&self, _val: Vec) -> Result> { - let filenames = self.files.as_ref().unwrap(); - let many = filenames.len() > 1; - for filename in filenames.iter() { - if !fs::metadata(filename).is_ok_and(|f| f.is_file()) { - if self.verbose { - eprintln!("Skipping non-file {}", filename); - } - continue; - }; - - let f = File::open(filename); - match f { - Ok(f) => { - /* Mmap is necessarily unsafe as data can change unexpectedly */ - let data = - unsafe { Mmap::map(&f).with_context(|| "Could not mmap input file")? }; - - let regex = self.pattern.as_ref().unwrap(); - let matches = regex.find_iter(&data); - - /* Print offsets on stdout directly, to avoid buffering */ - for m in matches { - if many { - println!("{}: 0x{:x}", filename, m.start()); - } else { - println!("0x{:x}", m.start()); + let input_paths = self.files.as_ref().unwrap(); + let many = input_paths.len() > 1 || self.recursive; + // Make sure we keep the search order based on what is given first as the input + for input_path in input_paths.iter() { + // A BTreeSet ensure we get a consistant order + let mut paths_to_explore = BTreeSet::new(); + paths_to_explore.insert(PathBuf::from(input_path)); + + while let Some(path) = paths_to_explore.pop_first() { + let path_metadata = match fs::metadata(&path) { + Ok(x) => x, + Err(err) => { + eprintln!("Skiping {} with non-obtainable metadata ({})", path.to_string_lossy(), err); + continue; + } + }; + + if path_metadata.is_file() { + let f = File::open(&path); + match f { + Ok(f) => { + /* Mmap is necessarily unsafe as data can change unexpectedly */ + let data = + unsafe { Mmap::map(&f).with_context(|| "Could not mmap input file")? }; + + let regex = self.pattern.as_ref().unwrap(); + let matches = regex.find_iter(&data); + + /* Print offsets on stdout directly, to avoid buffering */ + for m in matches { + if many { + println!("{}: 0x{:x}", path.to_string_lossy(), m.start()); + } else { + println!("0x{:x}", m.start()); + } + } + } + Err(e) => eprintln!("Could not open {}: {}", path.to_string_lossy(), e), + } + } else if path_metadata.is_dir() { + if !self.recursive { + if self.verbose { + eprintln!("Skipping directory {}", path.to_string_lossy()) + } + continue; + } + + let dir_read = match read_dir(&path) { + Ok(x) => x, + Err(err) => { + eprintln!("Skipping directory {}, failed to list childs ({})", path.to_string_lossy(), err); + continue; } + }; + for sub_path_unchecked in dir_read { + let sub_path = match sub_path_unchecked { + Ok(x) => x, + Err(err) => { + eprintln!("Skipping a sub-path of directory {}, failed to list a child ({})", path.to_string_lossy(), err); + continue; + } + }; + paths_to_explore.insert(sub_path.path()); + } + } else { + if self.verbose { + eprintln!("Skipping non-file {}", path.to_string_lossy()); } + continue; } - Err(e) => eprintln!("Could not open {}: {}", filename, e), } } @@ -132,7 +176,7 @@ impl Applet for BgrepApplet { #[cfg(test)] mod tests { - use std::io::Write; + use std::{fs::File, io::Write}; #[test] fn test_cli() { @@ -173,4 +217,29 @@ mod tests { .stdout(predicates::str::contains(": 0x1\n")) .success(); } + + #[test] + fn test_recursive() { + let tmp_dir = tempfile::TempDir::new().unwrap(); + + { + let mut tmp_file = File::create(&tmp_dir.path().join("test_file.bin")).unwrap(); + tmp_file.write(b"2tmpfile").unwrap(); + } + + assert_cmd::Command::cargo_bin("rsbkb") + .expect("Could not run binary") + .args(&[ + "bgrep", + "--recursive", + "tmpfile", + tmp_dir.path().to_str().expect("Could not convert temp path to unicode") + ]) + .assert() + .stdout(predicates::str::contains(": 0x1\n")) + .success(); + + + + } } From decd5def096564b65a6d69152265f48de0cac30b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rapha=C3=ABl=20Rigo?= Date: Thu, 9 May 2024 20:43:29 +0200 Subject: [PATCH 2/4] cargo fmt --- src/bgrepapp.rs | 39 ++++++++++++++++++++++++++------------- 1 file changed, 26 insertions(+), 13 deletions(-) diff --git a/src/bgrepapp.rs b/src/bgrepapp.rs index 7c83dcd..48f1bd1 100644 --- a/src/bgrepapp.rs +++ b/src/bgrepapp.rs @@ -2,7 +2,11 @@ use crate::applet::Applet; use anyhow::{bail, Context, Result}; use clap::{arg, Command}; use memmap2::Mmap; -use std::{collections::BTreeSet, fs::{self, read_dir, File}, path::PathBuf}; +use std::{ + collections::BTreeSet, + fs::{self, read_dir, File}, + path::PathBuf, +}; use regex::bytes::{Regex, RegexBuilder}; @@ -23,7 +27,7 @@ pub struct BgrepApplet { files: Option>, pattern: Option, verbose: bool, - recursive: bool + recursive: bool, } impl Applet for BgrepApplet { @@ -58,7 +62,7 @@ impl Applet for BgrepApplet { files: None, pattern: None, verbose: false, - recursive: false + recursive: false, }) } @@ -91,7 +95,7 @@ impl Applet for BgrepApplet { files: Some(filenames), pattern: Some(pattern), verbose: args.get_flag("verbose"), - recursive: args.get_flag("recursive") + recursive: args.get_flag("recursive"), })) } @@ -108,7 +112,11 @@ impl Applet for BgrepApplet { let path_metadata = match fs::metadata(&path) { Ok(x) => x, Err(err) => { - eprintln!("Skiping {} with non-obtainable metadata ({})", path.to_string_lossy(), err); + eprintln!( + "Skiping {} with non-obtainable metadata ({})", + path.to_string_lossy(), + err + ); continue; } }; @@ -118,8 +126,9 @@ impl Applet for BgrepApplet { match f { Ok(f) => { /* Mmap is necessarily unsafe as data can change unexpectedly */ - let data = - unsafe { Mmap::map(&f).with_context(|| "Could not mmap input file")? }; + let data = unsafe { + Mmap::map(&f).with_context(|| "Could not mmap input file")? + }; let regex = self.pattern.as_ref().unwrap(); let matches = regex.find_iter(&data); @@ -146,7 +155,11 @@ impl Applet for BgrepApplet { let dir_read = match read_dir(&path) { Ok(x) => x, Err(err) => { - eprintln!("Skipping directory {}, failed to list childs ({})", path.to_string_lossy(), err); + eprintln!( + "Skipping directory {}, failed to list childs ({})", + path.to_string_lossy(), + err + ); continue; } }; @@ -221,7 +234,7 @@ mod tests { #[test] fn test_recursive() { let tmp_dir = tempfile::TempDir::new().unwrap(); - + { let mut tmp_file = File::create(&tmp_dir.path().join("test_file.bin")).unwrap(); tmp_file.write(b"2tmpfile").unwrap(); @@ -233,13 +246,13 @@ mod tests { "bgrep", "--recursive", "tmpfile", - tmp_dir.path().to_str().expect("Could not convert temp path to unicode") + tmp_dir + .path() + .to_str() + .expect("Could not convert temp path to unicode"), ]) .assert() .stdout(predicates::str::contains(": 0x1\n")) .success(); - - - } } From b7d397989159fed85153b244f3b66a74f5594ab3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rapha=C3=ABl=20Rigo?= Date: Thu, 9 May 2024 20:46:50 +0200 Subject: [PATCH 3/4] bgrep: rename 'file' arg to 'path' --- src/bgrepapp.rs | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/bgrepapp.rs b/src/bgrepapp.rs index 48f1bd1..32ddebb 100644 --- a/src/bgrepapp.rs +++ b/src/bgrepapp.rs @@ -24,7 +24,7 @@ fn build_pattern>(pattern: &P) -> Result { } pub struct BgrepApplet { - files: Option>, + paths: Option>, pattern: Option, verbose: bool, recursive: bool, @@ -50,7 +50,7 @@ impl Applet for BgrepApplet { .arg(arg!(-x --hex "pattern is hex")) .arg(arg!(-r --recursive "search in subfolders")) .arg(arg!( "pattern to search")) - .arg(arg!( "file to search").num_args(1..)) + .arg(arg!( "file(s) or directory(ies) to search in").num_args(1..)) } fn arg_or_stdin(&self) -> Option<&'static str> { @@ -59,7 +59,7 @@ impl Applet for BgrepApplet { fn new() -> Box { Box::new(Self { - files: None, + paths: None, pattern: None, verbose: false, recursive: false, @@ -68,7 +68,7 @@ impl Applet for BgrepApplet { fn parse_args(&self, args: &clap::ArgMatches) -> Result> { let filenames = args - .get_many::("file") + .get_many::("path") .unwrap() .map(|s| s.to_string()) .collect(); @@ -92,7 +92,7 @@ impl Applet for BgrepApplet { let pattern = build_pattern(&final_pat)?; Ok(Box::new(Self { - files: Some(filenames), + paths: Some(filenames), pattern: Some(pattern), verbose: args.get_flag("verbose"), recursive: args.get_flag("recursive"), @@ -100,7 +100,7 @@ impl Applet for BgrepApplet { } fn process(&self, _val: Vec) -> Result> { - let input_paths = self.files.as_ref().unwrap(); + let input_paths = self.paths.as_ref().unwrap(); let many = input_paths.len() > 1 || self.recursive; // Make sure we keep the search order based on what is given first as the input for input_path in input_paths.iter() { From cb7d5328c4bebe97fbd30e00296d3389fe519e9b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rapha=C3=ABl=20Rigo?= Date: Thu, 9 May 2024 20:49:24 +0200 Subject: [PATCH 4/4] update Changelog --- Changelog.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Changelog.md b/Changelog.md index 4acd360..70026c4 100644 --- a/Changelog.md +++ b/Changelog.md @@ -1,4 +1,4 @@ -* 2024-XX-XX: v1.4 : `crc` can now compute all known types, alg list updated. +* 2024-XX-XX: v1.4 : `crc` can now compute all known types, alg list updated. Add `--recursive` option to `bgrep`, thanks @marius851000! * 2024-01-24: v1.3 : `slice` now supports non-seekable files. `tsdec` verbose mode. `bgrep` multiple args. Tests now cover real CLI invocations. * 2023-09-26: v1.2.1: fix CLI flags parsing, add skipping of invalid files in findso * 2023-08-13: v1.2: inflate/deflate applet ; base64 update: support custom alphabet ; global: check if given value is potentially a file and warn user