From 5a0bc0af0d36381503221bc169e2d796fec40d41 Mon Sep 17 00:00:00 2001 From: Danny McClanahan <1305167+cosmicexplorer@users.noreply.github.com> Date: Sun, 11 Aug 2024 15:46:58 -0400 Subject: [PATCH 01/31] add new zip-cli crate in workspace - implement absurd arg parsing - add help text - add long help text - first iteration of compression - add --stdout - add large file support - make the compress command mostly work - make compress flags work better - verbose output works! - reduce size of zip-cli from 2.9M->1.3M - make a new subcrate clite for different features/opt flags - remove clap - set --large-file automatically - clarify info and extract are a TODO for now - move OutputHandle to lib.rs - factor out main method into lib.rs - clarify the behavior of -f and -r around symlinks - add --append option - rename CompressError -> CommandError - make much more subcommand logic generic through traits - wrap compress help to 80 chars - begin extract help text squashed commits: - implement matching logic except for --match - FINALLY fix input zips iteration - implement a couple basic transforms - add terrible ContentTransformer - add entry spec transformer - give up and use an unsafecell - impl transform_name - initial extract impl - add name matchers - impl --match - modularize extract - begin impl transforms - refactor args modules - init entry data - stub out utterly absurd cli spec - parse our absurd cli spec - impl merged input - do absurd stuff without checking for compilation success - ok it compiles now - ok it might even run correctly now? - make output files work - default to regexp for replacement and disallow globs from replacement - add --{min,max}-size match exprs - impl pattern transformers - support --transform!! - anchoring, prefixes, the whole shebang. i think we're done here - make glob and rx optional dependencies - add stub for info command - parameterize the match help text to reuse for info - stub out info format specs - write out some more help text - remove unnecessary unsafe - parse archive overview format strings - make a trait for format parsing - parse entry format - finish help text for info - add info command stub - implement basic entry info - write directly to the output stream, don't allocate a string - add escaped tab component - rename some traits and methods - pass around a reference to EntryData instead - mess with mode bits to make them look more like ls - add some verbose logs - refactor info modules - refactor parseable directive - make compiled format strings much more generic (!) - refactor modules of compiled formatting - archive format works!!!!! omg - move entry and archive iteration into helper methods - make archive formatting work fully with ArchiveData - archive data works for stdin! - add more logging to extraction - remove allocations in perms todo sorting - several preliminary notes added to extract command - make process_entry() helper method - remove UnsafeCell!!! - don't reallocate the symlink target - move symlink processing to a helper - refactor a lot of extraction - ok extract makes a lot more sense now - support --archive-comment for compression - add a TODO - make symlink creation more readable - refactor output parsing into modules - make parsing outputs much MUCH more readable with a builder - all info directives are now supported --- Cargo.toml | 7 +- cli/Cargo.toml | 70 + cli/clite/Cargo.toml | 35 + cli/clite/src/main.rs | 3 + cli/src/args.rs | 208 +++ cli/src/args/compress.rs | 479 +++++++ cli/src/args/extract.rs | 1619 ++++++++++++++++++++++++ cli/src/args/info.rs | 760 +++++++++++ cli/src/compress.rs | 502 ++++++++ cli/src/extract.rs | 195 +++ cli/src/extract/entries.rs | 132 ++ cli/src/extract/matcher.rs | 528 ++++++++ cli/src/extract/named_outputs.rs | 347 +++++ cli/src/extract/receiver.rs | 386 ++++++ cli/src/extract/transform.rs | 707 +++++++++++ cli/src/info.rs | 167 +++ cli/src/info/directives.rs | 703 ++++++++++ cli/src/info/formats.rs | 425 +++++++ cli/src/lib.rs | 175 +++ cli/src/main.rs | 3 + src/compression.rs | 2 +- src/extra_fields/extended_timestamp.rs | 2 +- src/write.rs | 8 +- 23 files changed, 7456 insertions(+), 7 deletions(-) create mode 100644 cli/Cargo.toml create mode 100644 cli/clite/Cargo.toml create mode 100644 cli/clite/src/main.rs create mode 100644 cli/src/args.rs create mode 100644 cli/src/args/compress.rs create mode 100644 cli/src/args/extract.rs create mode 100644 cli/src/args/info.rs create mode 100644 cli/src/compress.rs create mode 100644 cli/src/extract.rs create mode 100644 cli/src/extract/entries.rs create mode 100644 cli/src/extract/matcher.rs create mode 100644 cli/src/extract/named_outputs.rs create mode 100644 cli/src/extract/receiver.rs create mode 100644 cli/src/extract/transform.rs create mode 100644 cli/src/info.rs create mode 100644 cli/src/info/directives.rs create mode 100644 cli/src/info/formats.rs create mode 100644 cli/src/lib.rs create mode 100644 cli/src/main.rs diff --git a/Cargo.toml b/Cargo.toml index 11c9f18be..575be8b30 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -10,6 +10,7 @@ authors = [ license = "MIT" repository = "https://github.com/zip-rs/zip2.git" keywords = ["zip", "archive", "compression"] +categories = ["compression", "filesystem", "parser-implementations"] rust-version = "1.73.0" description = """ Library to support the reading and writing of zip files. @@ -23,7 +24,9 @@ all-features = true rustdoc-args = ["--cfg", "docsrs"] [workspace.dependencies] -time = { version = "0.3.1", default-features = false } +arbitrary = { version = "1.3.2", features = ["derive"] } +time = { version = "0.3.36", default-features = false } +zip = { path = ".", default-features = false } [dependencies] aes = { version = "0.8", optional = true } @@ -53,7 +56,7 @@ lzma-rs = { version = "0.3", default-features = false, optional = true } crossbeam-utils = "0.8.20" [target.'cfg(fuzzing)'.dependencies] -arbitrary = { version = "1.3.2", features = ["derive"] } +arbitrary.workspace = true [dev-dependencies] bencher = "0.1.5" diff --git a/cli/Cargo.toml b/cli/Cargo.toml new file mode 100644 index 000000000..135270248 --- /dev/null +++ b/cli/Cargo.toml @@ -0,0 +1,70 @@ +[package] +name = "zip-cli" +version = "0.0.1" +authors = [ + "Danny McClanahan ", +] +license = "MIT" +repository = "https://github.com/zip-rs/zip2.git" +keywords = ["zip", "archive", "compression", "cli"] +categories = ["command-line-utilities", "compression", "filesystem", "development-tools::build-utils"] +rust-version = "1.74.0" +description = """ +Binary for creation and manipulation of zip files. +""" +edition = "2021" + +# Prevent this from interfering with workspaces +[workspace] +members = ["."] + +[lib] + +[[bin]] +name = "zip-cli" + +[dependencies] +glob = { version = "0.3", optional = true } +regex = { version = "1", optional = true } + +[dependencies.zip] +path = ".." +default-features = false + +[features] +aes-crypto = ["zip/aes-crypto"] +bzip2 = ["zip/bzip2"] +chrono = ["zip/chrono"] +deflate64 = ["zip/deflate64"] +deflate = ["zip/deflate"] +deflate-flate2 = ["zip/deflate-flate2"] +deflate-zlib = ["zip/deflate-zlib"] +deflate-zlib-ng = ["zip/deflate-zlib-ng"] +deflate-zopfli = ["zip/deflate-zopfli"] +lzma = ["zip/lzma"] +time = ["zip/time"] +xz = ["zip/xz"] +zstd = ["zip/zstd"] + +glob = ["dep:glob"] +rx = ["dep:regex"] + +default = [ + "aes-crypto", + "bzip2", + "deflate64", + "deflate", + "lzma", + "time", + "xz", + "zstd", + "glob", + "rx", +] + + +[profile.release] +strip = true +lto = true +opt-level = 3 +codegen-units = 1 diff --git a/cli/clite/Cargo.toml b/cli/clite/Cargo.toml new file mode 100644 index 000000000..607bf3314 --- /dev/null +++ b/cli/clite/Cargo.toml @@ -0,0 +1,35 @@ +[package] +name = "zip-clite" +version = "0.0.1" +authors = [ + "Danny McClanahan ", +] +license = "MIT" +repository = "https://github.com/zip-rs/zip2.git" +keywords = ["zip", "archive", "compression", "cli"] +categories = ["command-line-utilities", "compression", "filesystem", "development-tools::build-utils"] +rust-version = "1.74.0" +description = """ +Binary for creation and manipulation of zip files. +""" +edition = "2021" + +# Prevent this from interfering with workspaces +[workspace] +members = ["."] + +[[bin]] +name = "zip-clite" + +[dependencies] + +[dependencies.zip-cli] +path = ".." +default-features = false +features = ["deflate-flate2", "deflate-zlib"] + +[profile.release] +strip = true +lto = true +opt-level = "s" +codegen-units = 1 diff --git a/cli/clite/src/main.rs b/cli/clite/src/main.rs new file mode 100644 index 000000000..95fae2ac9 --- /dev/null +++ b/cli/clite/src/main.rs @@ -0,0 +1,3 @@ +fn main() { + zip_cli::driver::main(); +} diff --git a/cli/src/args.rs b/cli/src/args.rs new file mode 100644 index 000000000..b59fd0cbd --- /dev/null +++ b/cli/src/args.rs @@ -0,0 +1,208 @@ +use std::{collections::VecDeque, ffi::OsString, fmt, sync::OnceLock}; + +#[derive(Debug)] +pub enum ArgParseError { + StdoutMessage(String), + StderrMessage(String), +} + +#[derive(Debug)] +pub struct ZipCli { + pub verbose: bool, + pub command: ZipCommand, +} + +#[derive(Debug)] +enum SubcommandName { + Compress, + Info, + Extract, +} + +static PARSED_EXE_NAME: OnceLock = OnceLock::new(); + +impl ZipCli { + const VERSION: &'static str = env!("CARGO_PKG_VERSION"); + const DESCRIPTION: &'static str = env!("CARGO_PKG_DESCRIPTION"); + + pub const INTERNAL_ERROR_EXIT_CODE: i32 = 3; + pub const ARGV_PARSE_FAILED_EXIT_CODE: i32 = 2; + pub const NON_FAILURE_EXIT_CODE: i32 = 0; + + pub fn binary_name() -> &'static str { + PARSED_EXE_NAME.get().expect("binary name was not set yet") + } + + fn generate_version_text() -> String { + format!("{} {}\n", Self::binary_name(), Self::VERSION) + } + + fn generate_usage_line() -> String { + format!("Usage: {} [OPTIONS] ", Self::binary_name()) + } + + fn generate_full_help_text() -> String { + format!( + "\ +{} + +{} + +Commands: + {}{}{} + {}{}{} + {}{}{} + +Options: + -v, --verbose Write information logs to stderr + -h, --help Print help + -V, --version Print version +", + Self::DESCRIPTION, + Self::generate_usage_line(), + compress::Compress::COMMAND_NAME, + compress::Compress::COMMAND_TABS, + compress::Compress::COMMAND_DESCRIPTION, + info::Info::COMMAND_NAME, + info::Info::COMMAND_TABS, + info::Info::COMMAND_DESCRIPTION, + extract::Extract::COMMAND_NAME, + extract::Extract::COMMAND_TABS, + extract::Extract::COMMAND_DESCRIPTION, + ) + } + + fn generate_brief_help_text(context: &str) -> String { + format!( + "\ +error: {context} + +{} + +For more information, try '--help'. +", + Self::generate_usage_line() + ) + } + + fn parse_up_to_subcommand_name( + argv: &mut VecDeque, + ) -> Result<(bool, SubcommandName), ArgParseError> { + let mut verbose: bool = false; + let mut subcommand_name: Option = None; + while subcommand_name.is_none() { + match argv.pop_front() { + None => { + let help_text = Self::generate_full_help_text(); + return Err(ArgParseError::StderrMessage(help_text)); + } + Some(arg) => match arg.as_encoded_bytes() { + b"-v" | b"--verbose" => verbose = true, + b"-V" | b"--version" => { + let version_text = Self::generate_version_text(); + return Err(ArgParseError::StdoutMessage(version_text)); + } + b"-h" | b"--help" => { + let help_text = Self::generate_full_help_text(); + return Err(ArgParseError::StdoutMessage(help_text)); + } + b"compress" => subcommand_name = Some(SubcommandName::Compress), + b"info" => subcommand_name = Some(SubcommandName::Info), + b"extract" => subcommand_name = Some(SubcommandName::Extract), + arg_bytes => { + let context = if arg_bytes.starts_with(b"-") { + format!("unrecognized global flag {arg:?}") + } else { + format!("unrecognized subcommand name {arg:?}") + }; + let help_text = Self::generate_brief_help_text(&context); + return Err(ArgParseError::StderrMessage(help_text)); + } + }, + } + } + Ok((verbose, subcommand_name.unwrap())) + } + + pub fn parse_argv(argv: impl IntoIterator) -> Result { + let mut argv: VecDeque = argv.into_iter().collect(); + let exe_name: String = argv + .pop_front() + .expect("exe name not on command line") + .into_string() + .expect("exe name not valid unicode"); + PARSED_EXE_NAME + .set(exe_name) + .expect("exe name already written"); + let (verbose, subcommand_name) = Self::parse_up_to_subcommand_name(&mut argv)?; + let command = match subcommand_name { + SubcommandName::Info => ZipCommand::Info(info::Info::parse_argv(argv)?), + SubcommandName::Extract => ZipCommand::Extract(extract::Extract::parse_argv(argv)?), + SubcommandName::Compress => ZipCommand::Compress(compress::Compress::parse_argv(argv)?), + }; + Ok(Self { verbose, command }) + } +} + +#[derive(Debug)] +pub enum ZipCommand { + Compress(compress::Compress), + Info(info::Info), + Extract(extract::Extract), +} + +pub trait CommandFormat: fmt::Debug { + const COMMAND_NAME: &'static str; + const COMMAND_TABS: &'static str; + const COMMAND_DESCRIPTION: &'static str; + + const USAGE_LINE: &'static str; + + fn generate_usage_line() -> String { + format!( + "Usage: {} {} {}", + ZipCli::binary_name(), + Self::COMMAND_NAME, + Self::USAGE_LINE, + ) + } + + fn generate_help() -> String; + + fn generate_full_help_text() -> String { + format!( + "\ +{} + +{} +{}", + Self::COMMAND_DESCRIPTION, + Self::generate_usage_line(), + Self::generate_help(), + ) + } + + fn generate_brief_help_text(context: &str) -> String { + format!( + "\ +error: {context} + +{} +", + Self::generate_usage_line() + ) + } + + fn exit_arg_invalid(context: &str) -> ArgParseError { + let message = Self::generate_brief_help_text(context); + ArgParseError::StderrMessage(message) + } + + fn parse_argv(argv: VecDeque) -> Result + where + Self: Sized; +} + +pub mod compress; +pub mod extract; +pub mod info; diff --git a/cli/src/args/compress.rs b/cli/src/args/compress.rs new file mode 100644 index 000000000..796b47990 --- /dev/null +++ b/cli/src/args/compress.rs @@ -0,0 +1,479 @@ +use super::{ArgParseError, CommandFormat}; + +use std::{collections::VecDeque, ffi::OsString, num::ParseIntError, path::PathBuf}; + +#[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord)] +pub enum CompressionMethodArg { + Stored, + Deflate, /* requires having zip/_deflate-any set to compile */ + #[cfg(feature = "deflate64")] + Deflate64, + #[cfg(feature = "bzip2")] + Bzip2, + #[cfg(feature = "zstd")] + Zstd, +} + +#[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord)] +pub struct CompressionLevel(pub i64); + +#[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord)] +pub struct UnixPermissions(pub u32); + +impl UnixPermissions { + pub fn parse(s: &str) -> Result { + Ok(Self(u32::from_str_radix(s, 8)?)) + } +} + +#[derive(Debug)] +pub enum CompressionArg { + CompressionMethod(CompressionMethodArg), + Level(CompressionLevel), + UnixPermissions(UnixPermissions), + LargeFile(bool), + Name(String), + Dir, + Symlink, + Immediate(OsString), + FilePath(PathBuf), + RecursiveDirPath(PathBuf), +} + +#[derive(Debug)] +pub enum OutputType { + Stdout { allow_tty: bool }, + File { path: PathBuf, append: bool }, +} + +#[derive(Debug)] +pub struct Compress { + pub output: OutputType, + pub archive_comment: Option, + pub args: Vec, + pub positional_paths: Vec, +} + +impl Compress { + #[cfg(feature = "deflate64")] + const DEFLATE64_HELP_LINE: &'static str = " - deflate64:\twith deflate64\n"; + #[cfg(not(feature = "deflate64"))] + const DEFLATE64_HELP_LINE: &'static str = ""; + + #[cfg(feature = "bzip2")] + const BZIP2_HELP_LINE: &'static str = " - bzip2:\twith bzip2\n"; + #[cfg(not(feature = "bzip2"))] + const BZIP2_HELP_LINE: &'static str = ""; + + #[cfg(feature = "zstd")] + const ZSTD_HELP_LINE: &'static str = " - zstd:\twith zstd\n"; + #[cfg(not(feature = "zstd"))] + const ZSTD_HELP_LINE: &'static str = ""; +} + +/* TODO: add support for entry and file comments! */ +impl CommandFormat for Compress { + const COMMAND_NAME: &'static str = "compress"; + const COMMAND_TABS: &'static str = "\t"; + const COMMAND_DESCRIPTION: &'static str = + "Generate an archive from data in argument strings or read from the filesystem."; + + const USAGE_LINE: &'static str = + "[-h|--help] [OUTPUT-FLAGS] [--archive-comment ] [ENTRY]... [--] [PATH]..."; + + fn generate_help() -> String { + format!( + r#" + -h, --help Print help + +Output flags: +Where and how to write the generated zip archive. + + -o, --output-file + Output zip file path to write. + The output file is truncated if it already exists, unless --append is + provided. If not provided, output is written to stdout. + + --append + If an output path is provided with -o, open it as an existing zip + archive and append to it. If the output path does not already exist, + no error is produced, and a new zip file is created at the given path. + + --stdout + Allow writing output to stdout even if stdout is a tty. + +Global flags: +These flags describe information set for the entire produced archive. + + --archive-comment + If provided, this will set the archive's comment field to the + specified bytes. This does not need to be valid unicode. + +Entries: +After output flags are provided, the rest of the command line is +attributes and entry data. Attributes modify later entries. + +Sticky attributes: +These flags apply to everything that comes after them until reset by another +instance of the same attribute. Sticky attributes continue to apply to +positional arguments received after processing all flags. + + -c, --compression-method + Which compression technique to use. + Defaults to deflate if not specified. + + Possible values: + - stored: uncompressed + - deflate: with deflate (default) +{}{}{} + -l, --compression-level + How much compression to perform, from 0..=24. + The accepted range of values differs for each technique. + + -m, --mode + Unix permissions to apply to the file, in octal (like chmod). + + --large-file [true|false] + Whether to enable large file support. + This may take up more space for records, but allows files over 32 bits + in length to be written, up to 64 bit sizes. + File arguments over 32 bits in length (either provided explicitly or + encountered when traversing a recursive directory) will have this flag + set automatically, without affecting the sticky value for + later options. + Therefore, this option likely never has to be set explicitly by + the user. + +Non-sticky attributes: +These flags only apply to the next entry after them, and may not be repeated. + + -n, --name + The name to apply to the entry. This must be UTF-8 encoded. + + -s, --symlink + Make the next entry into a symlink entry. + A symlink entry may be immediate with -i, or it may copy the target + from an existing symlink with -f. + +Entry data: +Each of these flags creates an entry in the output zip archive. + + -d, --dir + Create a directory entry. + A name must be provided beforehand with -n. + + -i, --immediate + Write an entry containing the data in the argument, which need not be + UTF-8 encoded but will exit early upon encountering any null bytes. + A name must be provided beforehand with -n. + + -f, --file + Write an entry with the contents of this file path. + A name may be provided beforehand with -n, otherwise the name will be + inferred from relativizing the given path to the working directory. + Note that sockets are currently not supported and will produce an + error. Providing a path to a directory will produce an error. + + If -s was specified beforehand, the path will be read as a symlink, + which will produce an error if the path does not point to a symbolic + link. If -s was not specified beforehand and a symlink path was + provided, then the symbolic link will be interpreted as if it was + a file with the contents of the symlink target, but with its name + corresponding to the symlink path (unless overridden with -n). + + -r, --recursive-dir + Write all the recursive contents of this directory path. + A name may be provided beforehand with -n, which will be used as the + prefix for all recursive contents of this directory. Otherwise, the + name will be inferred from relativizing the given path to the + working directory. + + -s is not allowed before this argument. If a path to a symbolic link + is provided, it will be treated as if it pointed to a directory with + the recursive contents of the target directory, but with its name + corresponding to the symlink path (unless overridden with -n). + Providing a symlink path which points to a file will produce an error. + +Positional entries: + [PATH]... + Write the file or recursive directory contents, relativizing the path. + If the given path points to a file, then a single file entry will + be written. + If the given path is a symlink, then a single symlink entry will + be written. + If the given path refers to a directory, then the recursive contents + will be written, reproducing files and symlinks. + Socket paths will produce an error. +"#, + Self::DEFLATE64_HELP_LINE, + Self::BZIP2_HELP_LINE, + Self::ZSTD_HELP_LINE, + ) + } + + fn parse_argv(mut argv: VecDeque) -> Result { + let mut allow_stdout: bool = false; + let mut append_to_output_path: bool = false; + let mut output_path: Option = None; + let mut archive_comment: Option = None; + let mut args: Vec = Vec::new(); + let mut positional_paths: Vec = Vec::new(); + + while let Some(arg) = argv.pop_front() { + match arg.as_encoded_bytes() { + b"-h" | b"--help" => { + let help_text = Self::generate_full_help_text(); + return Err(ArgParseError::StdoutMessage(help_text)); + } + + /* Output flags */ + b"--stdout" => { + if let Some(output_path) = output_path.take() { + return Err(Self::exit_arg_invalid(&format!( + "--stdout provided along with output file {output_path:?}" + ))); + } else if append_to_output_path { + return Err(Self::exit_arg_invalid( + "--stdout provided along with --append", + )); + } else if !args.is_empty() || !positional_paths.is_empty() { + return Err(Self::exit_arg_invalid("--stdout provided after entries")); + } else if allow_stdout { + return Err(Self::exit_arg_invalid("--stdout provided twice")); + } else { + allow_stdout = true; + } + } + b"--append" => { + if append_to_output_path { + return Err(Self::exit_arg_invalid("--append provided twice")); + } else if !args.is_empty() || !positional_paths.is_empty() { + return Err(Self::exit_arg_invalid("--append provided after entries")); + } else if allow_stdout { + return Err(Self::exit_arg_invalid( + "--stdout provided along with --append", + )); + } else { + append_to_output_path = true; + } + } + b"-o" | b"--output-file" => { + let new_path = argv.pop_front().map(PathBuf::from).ok_or_else(|| { + Self::exit_arg_invalid("no argument provided for -o/--output-file") + })?; + if let Some(prev_path) = output_path.take() { + return Err(Self::exit_arg_invalid(&format!( + "--output-file provided twice: {prev_path:?} and {new_path:?}" + ))); + } else if allow_stdout { + return Err(Self::exit_arg_invalid( + "--stdout provided along with output file", + )); + } else if !args.is_empty() || !positional_paths.is_empty() { + return Err(Self::exit_arg_invalid( + "-o/--output-file provided after entries", + )); + } else { + output_path = Some(new_path); + } + } + + /* Global flags */ + b"--archive-comment" => { + let new_comment = argv.pop_front().ok_or_else(|| { + Self::exit_arg_invalid("no argument provided for --archive-comment") + })?; + if let Some(prev_comment) = archive_comment.take() { + return Err(Self::exit_arg_invalid(&format!( + "--archive-comment provided twice: {prev_comment:?} and {new_comment:?}" + ))); + } else if !args.is_empty() || !positional_paths.is_empty() { + return Err(Self::exit_arg_invalid( + "--archive-comment provided after entries", + )); + } else { + archive_comment = Some(new_comment); + } + } + + /* Attributes */ + b"-c" | b"--compression-method" => match argv.pop_front() { + None => { + return Err(Self::exit_arg_invalid( + "no argument provided for -c/--compression-method", + )) + } + Some(name) => match name.as_encoded_bytes() { + b"stored" => args.push(CompressionArg::CompressionMethod( + CompressionMethodArg::Stored, + )), + b"deflate" => args.push(CompressionArg::CompressionMethod( + CompressionMethodArg::Deflate, + )), + #[cfg(feature = "deflate64")] + b"deflate64" => args.push(CompressionArg::CompressionMethod( + CompressionMethodArg::Deflate64, + )), + #[cfg(feature = "bzip2")] + b"bzip2" => args.push(CompressionArg::CompressionMethod( + CompressionMethodArg::Bzip2, + )), + #[cfg(feature = "zstd")] + b"zstd" => args.push(CompressionArg::CompressionMethod( + CompressionMethodArg::Zstd, + )), + _ => { + return Err(Self::exit_arg_invalid( + "unrecognized compression method {name:?}", + )); + } + }, + }, + b"-l" | b"--compression-level" => match argv.pop_front() { + None => { + return Err(Self::exit_arg_invalid( + "no argument provided for -l/--compression-level", + )); + } + Some(level) => match level.into_string() { + Err(level) => { + return Err(Self::exit_arg_invalid(&format!( + "invalid unicode provided for compression level: {level:?}" + ))); + } + Ok(level) => match level.parse::() { + Err(e) => { + return Err(Self::exit_arg_invalid(&format!( + "failed to parse integer for compression level: {e}" + ))); + } + Ok(level) => { + if (0..=24).contains(&level) { + args.push(CompressionArg::Level(CompressionLevel(level))) + } else { + return Err(Self::exit_arg_invalid(&format!( + "compression level {level} was not between 0 and 24" + ))); + } + } + }, + }, + }, + b"-m" | b"--mode" => match argv.pop_front() { + None => { + return Err(Self::exit_arg_invalid("no argument provided for -m/--mode")); + } + Some(mode) => match mode.into_string() { + Err(mode) => { + return Err(Self::exit_arg_invalid(&format!( + "invalid unicode provided for mode: {mode:?}" + ))); + } + Ok(mode) => match UnixPermissions::parse(&mode) { + Err(e) => { + return Err(Self::exit_arg_invalid(&format!( + "failed to parse integer for mode: {e}" + ))); + } + Ok(mode) => args.push(CompressionArg::UnixPermissions(mode)), + }, + }, + }, + b"--large-file" => match argv.pop_front() { + None => { + return Err(Self::exit_arg_invalid( + "no argument provided for --large-file", + )); + } + Some(large_file) => match large_file.as_encoded_bytes() { + b"true" => args.push(CompressionArg::LargeFile(true)), + b"false" => args.push(CompressionArg::LargeFile(false)), + _ => { + return Err(Self::exit_arg_invalid(&format!( + "unrecognized value for --large-file: {large_file:?}" + ))); + } + }, + }, + + /* Data */ + b"-n" | b"--name" => match argv.pop_front() { + None => { + return Err(Self::exit_arg_invalid("no argument provided for -n/--name")) + } + Some(name) => match name.into_string() { + Err(name) => { + return Err(Self::exit_arg_invalid(&format!( + "invalid unicode provided for name: {name:?}" + ))); + } + Ok(name) => args.push(CompressionArg::Name(name)), + }, + }, + b"-s" | b"--symlink" => args.push(CompressionArg::Symlink), + b"-d" | b"--dir" => args.push(CompressionArg::Dir), + b"-i" | b"--immediate" => match argv.pop_front() { + None => { + return Err(Self::exit_arg_invalid( + "no argument provided for -i/--immediate", + )); + } + Some(data) => args.push(CompressionArg::Immediate(data)), + }, + b"-f" | b"--file" => match argv.pop_front() { + None => { + return Err(Self::exit_arg_invalid("no argument provided for -f/--file")); + } + Some(file) => args.push(CompressionArg::FilePath(file.into())), + }, + b"-r" | b"--recursive-dir" => match argv.pop_front() { + None => { + return Err(Self::exit_arg_invalid( + "no argument provided for -r/--recursive-dir", + )); + } + Some(dir) => args.push(CompressionArg::RecursiveDirPath(dir.into())), + }, + + /* Transition to positional args */ + b"--" => break, + arg_bytes => { + if arg_bytes.starts_with(b"-") { + return Err(Self::exit_arg_invalid(&format!( + "unrecognized flag {arg:?}" + ))); + } else { + argv.push_front(arg); + break; + } + } + } + } + + positional_paths.extend(argv.into_iter().map(|arg| arg.into())); + + let output = if let Some(path) = output_path { + OutputType::File { + path, + append: append_to_output_path, + } + } else { + OutputType::Stdout { + allow_tty: allow_stdout, + } + }; + + Ok(Self { + output, + archive_comment, + args, + positional_paths, + }) + } +} + +impl crate::driver::ExecuteCommand for Compress { + fn execute(self, err: impl std::io::Write) -> Result<(), crate::CommandError> { + crate::compress::execute_compress(err, self) + } +} diff --git a/cli/src/args/extract.rs b/cli/src/args/extract.rs new file mode 100644 index 000000000..1a580ad54 --- /dev/null +++ b/cli/src/args/extract.rs @@ -0,0 +1,1619 @@ +use super::{ArgParseError, CommandFormat}; + +use zip::CompressionMethod; + +use std::{collections::VecDeque, ffi::OsString, mem, path::PathBuf}; + +#[derive(Debug, PartialEq, Eq, Hash, PartialOrd, Ord)] +pub enum ContentTransform { + Extract { name: Option }, +} + +#[derive(Debug, Default, PartialEq, Eq, PartialOrd, Ord, Hash, Copy, Clone)] +pub enum ComponentSelector { + #[default] + Path, + Basename, + Dirname, + FileExtension, +} + +impl ComponentSelector { + pub fn parse(s: &[u8]) -> Option { + match s { + b"path" => Some(Self::Path), + b"basename" => Some(Self::Basename), + b"dirname" => Some(Self::Dirname), + b"ext" => Some(Self::FileExtension), + _ => None, + } + } +} + +#[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Hash, Copy, Clone)] +pub enum PatternSelectorType { + Glob, + Literal, + Regexp, +} + +impl PatternSelectorType { + pub fn parse(s: &[u8]) -> Option { + match s { + b"glob" => Some(Self::Glob), + b"lit" => Some(Self::Literal), + b"rx" => Some(Self::Regexp), + _ => None, + } + } + + pub const fn default_for_match() -> Self { + if cfg!(feature = "glob") { + Self::Glob + } else { + Self::Literal + } + } + + pub const fn default_for_replacement() -> Self { + if cfg!(feature = "rx") { + Self::Regexp + } else { + Self::Literal + } + } +} + +#[derive(Debug)] +pub enum PatternSelectorModifier { + CaseInsensitive, + MultipleMatches, + PrefixAnchored, + SuffixAnchored, +} + +impl PatternSelectorModifier { + pub fn parse(s: &[u8]) -> Option { + match s { + b"i" => Some(Self::CaseInsensitive), + b"g" => Some(Self::MultipleMatches), + b"p" => Some(Self::PrefixAnchored), + b"s" => Some(Self::SuffixAnchored), + _ => None, + } + } +} + +#[derive(Debug, Default, Copy, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)] +pub struct PatternModifierFlags { + pub case_insensitive: bool, + pub multiple_matches: bool, + pub prefix_anchored: bool, + pub suffix_anchored: bool, +} + +#[derive(Debug)] +pub struct PatternSelector { + pub pat_sel: PatternSelectorType, + pub modifiers: PatternModifierFlags, +} + +impl PatternSelector { + pub fn parse(s: &[u8]) -> Option { + match s.iter().position(|c| *c == b':') { + Some(modifiers_ind) => { + let pat_sel_str = &s[..modifiers_ind]; + let modifiers_str = &s[(modifiers_ind + 1)..]; + + let pat_sel = PatternSelectorType::parse(pat_sel_str)?; + + let mut modifiers = PatternModifierFlags::default(); + let mod_els = modifiers_str + .split(|c| *c == b':') + .map(PatternSelectorModifier::parse) + .collect::>>()?; + for m in mod_els.into_iter() { + match m { + PatternSelectorModifier::CaseInsensitive => { + modifiers.case_insensitive = true; + } + PatternSelectorModifier::MultipleMatches => { + modifiers.multiple_matches = true; + } + PatternSelectorModifier::PrefixAnchored => { + modifiers.prefix_anchored = true; + } + PatternSelectorModifier::SuffixAnchored => { + modifiers.suffix_anchored = true; + } + } + } + Some(Self { pat_sel, modifiers }) + } + None => { + let pat_sel = PatternSelectorType::parse(s)?; + Some(Self { + pat_sel, + modifiers: Default::default(), + }) + } + } + } + + pub fn default_for_context(ctx: PatternContext) -> Self { + match ctx { + PatternContext::Match => Self::default_for_match(), + PatternContext::Replacement => Self::default_for_replacement(), + } + } + + pub fn default_for_match() -> Self { + Self { + pat_sel: PatternSelectorType::default_for_match(), + modifiers: PatternModifierFlags::default(), + } + } + + pub fn default_for_replacement() -> Self { + Self { + pat_sel: PatternSelectorType::default_for_replacement(), + modifiers: PatternModifierFlags::default(), + } + } +} + +#[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub enum PatternContext { + Match, + Replacement, +} + +pub fn parse_only_pat_sel(s: &[u8], ctx: PatternContext) -> Option { + match s.iter().position(|c| *c == b':') { + Some(pat_sel_ind) => { + let pat_sel_str = &s[(pat_sel_ind + 1)..]; + + let pat_sel = PatternSelector::parse(pat_sel_str)?; + Some(pat_sel) + } + None => Some(PatternSelector::default_for_context(ctx)), + } +} + +pub fn parse_comp_and_pat_sel( + s: &[u8], + ctx: PatternContext, +) -> Option<(ComponentSelector, PatternSelector)> { + match ( + s.iter().position(|c| *c == b'='), + s.iter().position(|c| *c == b':'), + ) { + (Some(comp_sel_ind), Some(pat_sel_ind)) => { + if comp_sel_ind >= pat_sel_ind { + return None; + } + let comp_sel_str = &s[(comp_sel_ind + 1)..pat_sel_ind]; + let pat_sel_str = &s[(pat_sel_ind + 1)..]; + + let comp_sel = ComponentSelector::parse(comp_sel_str)?; + let pat_sel = PatternSelector::parse(pat_sel_str)?; + Some((comp_sel, pat_sel)) + } + (Some(comp_sel_ind), None) => { + let comp_sel_str = &s[(comp_sel_ind + 1)..]; + + let comp_sel = ComponentSelector::parse(comp_sel_str)?; + let pat_sel = PatternSelector::default_for_context(ctx); + Some((comp_sel, pat_sel)) + } + (None, Some(pat_sel_ind)) => { + let pat_sel_str = &s[(pat_sel_ind + 1)..]; + + let pat_sel = PatternSelector::parse(pat_sel_str)?; + let comp_sel = ComponentSelector::default(); + Some((comp_sel, pat_sel)) + } + (None, None) => { + let comp_sel = ComponentSelector::default(); + let pat_sel = PatternSelector::default_for_context(ctx); + Some((comp_sel, pat_sel)) + } + } +} + +#[derive(Debug)] +pub enum EntryType { + File, + Dir, + Symlink, +} + +impl EntryType { + pub fn parse(s: &[u8]) -> Option { + match s { + b"file" => Some(Self::File), + b"dir" => Some(Self::Dir), + b"symlink" => Some(Self::Symlink), + _ => None, + } + } +} + +#[derive(Debug, PartialEq, Eq)] +pub enum NonSpecificCompressionMethodArg { + Any, + Known, +} + +#[derive(Debug, PartialEq, Eq, Copy, Clone)] +pub enum SpecificCompressionMethodArg { + Stored, + Deflated, + #[cfg(feature = "deflate64")] + Deflate64, + #[cfg(feature = "bzip2")] + Bzip2, + #[cfg(feature = "zstd")] + Zstd, + #[cfg(feature = "lzma")] + Lzma, + #[cfg(feature = "xz")] + Xz, +} + +impl SpecificCompressionMethodArg { + pub const KNOWN_COMPRESSION_METHODS: &[CompressionMethod] = &[ + CompressionMethod::Stored, + CompressionMethod::Deflated, + #[cfg(feature = "deflate64")] + CompressionMethod::Deflate64, + #[cfg(feature = "bzip2")] + CompressionMethod::Bzip2, + #[cfg(feature = "zstd")] + CompressionMethod::Zstd, + #[cfg(feature = "lzma")] + CompressionMethod::Lzma, + #[cfg(feature = "xz")] + CompressionMethod::Xz, + ]; + + pub fn translate_to_zip(self) -> CompressionMethod { + match self { + Self::Stored => CompressionMethod::Stored, + Self::Deflated => CompressionMethod::Deflated, + #[cfg(feature = "deflate64")] + Self::Deflate64 => CompressionMethod::Deflate64, + #[cfg(feature = "bzip2")] + Self::Bzip2 => CompressionMethod::Bzip2, + #[cfg(feature = "zstd")] + Self::Zstd => CompressionMethod::Zstd, + #[cfg(feature = "lzma")] + Self::Lzma => CompressionMethod::Lzma, + #[cfg(feature = "xz")] + Self::Xz => CompressionMethod::Xz, + } + } +} + +#[derive(Debug, PartialEq, Eq)] +pub enum CompressionMethodArg { + NonSpecific(NonSpecificCompressionMethodArg), + Specific(SpecificCompressionMethodArg), +} + +impl CompressionMethodArg { + pub fn parse(s: &[u8]) -> Option { + match s { + b"any" => Some(Self::NonSpecific(NonSpecificCompressionMethodArg::Any)), + b"known" => Some(Self::NonSpecific(NonSpecificCompressionMethodArg::Known)), + b"stored" => Some(Self::Specific(SpecificCompressionMethodArg::Stored)), + b"deflated" => Some(Self::Specific(SpecificCompressionMethodArg::Deflated)), + #[cfg(feature = "deflate64")] + b"deflate64" => Some(Self::Specific(SpecificCompressionMethodArg::Deflate64)), + #[cfg(feature = "bzip2")] + b"bzip2" => Some(Self::Specific(SpecificCompressionMethodArg::Bzip2)), + #[cfg(feature = "zstd")] + b"zstd" => Some(Self::Specific(SpecificCompressionMethodArg::Zstd)), + #[cfg(feature = "lzma")] + b"lzma" => Some(Self::Specific(SpecificCompressionMethodArg::Lzma)), + #[cfg(feature = "xz")] + b"xz" => Some(Self::Specific(SpecificCompressionMethodArg::Xz)), + _ => None, + } + } +} + +#[derive(Debug)] +pub enum DepthLimitArg { + Max(u8), + Min(u8), +} + +#[derive(Debug)] +pub enum SizeArg { + Max(u64), + Min(u64), +} + +#[derive(Debug)] +pub struct MatchArg { + pub comp_sel: ComponentSelector, + pub pat_sel: PatternSelector, + pub pattern: String, +} + +#[derive(Debug)] +pub enum TrivialPredicate { + True, + False, +} + +#[derive(Debug)] +pub enum Predicate { + Trivial(TrivialPredicate), + EntryType(EntryType), + CompressionMethod(CompressionMethodArg), + DepthLimit(DepthLimitArg), + Size(SizeArg), + Match(MatchArg), +} + +#[derive(Debug)] +enum ExprOp { + Negation, + And, + Or, +} + +#[derive(Debug)] +enum ExprArg { + PrimitivePredicate(Predicate), + Op(ExprOp), + Subgroup(MatchExpression), +} + +#[derive(Debug, Default)] +struct SingleExprLevel { + expr_args: Vec, +} + +impl SingleExprLevel { + pub fn push_arg(&mut self, arg: ExprArg) { + self.expr_args.push(arg); + } + + fn get_negation(expr_args: &mut VecDeque) -> Result { + let negated_expr: MatchExpression = match expr_args.pop_front().ok_or_else(|| { + Extract::exit_arg_invalid(&format!( + "negation was only expression in list inside match expr (rest: {expr_args:?})" + )) + })? { + ExprArg::Subgroup(match_expr) => { + /* We have a valid match expression, so just negate it without + * wrapping. */ + MatchExpression::Negated(Box::new(match_expr)) + } + ExprArg::PrimitivePredicate(predicate) => { + /* We got a primitive predicate, so just negate it! */ + MatchExpression::Negated(Box::new(MatchExpression::PrimitivePredicate(predicate))) + } + ExprArg::Op(op) => { + /* Negation before any other operator is invalid. */ + return Err(Extract::exit_arg_invalid(&format!( + "negation before operator {op:?} inside match expr is invalid (rest: {expr_args:?})" + ))); + } + }; + Ok(negated_expr) + } + + fn get_non_operator( + expr_args: &mut VecDeque, + ) -> Result { + let next_expr: MatchExpression = match expr_args.pop_front().ok_or_else(|| { + /* We can't fold an empty list. */ + Extract::exit_arg_invalid(&format!( + "empty expression list inside match expr (rest: {expr_args:?})" + )) + })? { + /* This is already an evaluated match expression, so just start with that. */ + ExprArg::Subgroup(match_expr) => match_expr, + ExprArg::PrimitivePredicate(predicate) => { + /* Success! We start with a simple predicate. */ + MatchExpression::PrimitivePredicate(predicate) + } + ExprArg::Op(op) => match op { + /* We started with negation, which means we need to get the next arg to resolve + * it. */ + ExprOp::Negation => Self::get_negation(expr_args)?, + /* Starting with a binary operator is invalid. */ + op @ (ExprOp::And | ExprOp::Or) => { + return Err(Extract::exit_arg_invalid(&format!( + "expression list cannot begin with binary operator {op:?} (rest: {expr_args:?})" + ))); + } + }, + }; + Ok(next_expr) + } + + pub fn fold(self) -> Result { + let Self { expr_args } = self; + let mut expr_args: VecDeque<_> = expr_args.into(); + + /* Get a valid match expression to start our fold with. */ + let mut cur_expr: MatchExpression = Self::get_non_operator(&mut expr_args)?; + + /* Now fold the expression rightwards! */ + while let Some(next_arg) = expr_args.pop_front() { + match next_arg { + /* Implicit AND, wrapping the primitive result into a match. */ + ExprArg::PrimitivePredicate(predicate) => { + let next_expr = MatchExpression::PrimitivePredicate(predicate); + cur_expr = MatchExpression::And { + explicit: false, + left: Box::new(cur_expr), + right: Box::new(next_expr), + }; + } + /* Implicit AND, without needing to wrap the result. */ + ExprArg::Subgroup(match_expr) => { + cur_expr = MatchExpression::And { + explicit: false, + left: Box::new(cur_expr), + right: Box::new(match_expr), + }; + } + /* Evaluate the operator according to association. */ + ExprArg::Op(op) => match op { + /* Negation applies to the next element, so retrieve it! */ + ExprOp::Negation => { + let next_expr = Self::get_negation(&mut expr_args)?; + cur_expr = MatchExpression::And { + explicit: false, + left: Box::new(cur_expr), + right: Box::new(next_expr), + }; + } + /* Explicit AND requires the next element. */ + ExprOp::And => { + let next_expr = Self::get_non_operator(&mut expr_args)?; + cur_expr = MatchExpression::And { + explicit: true, + left: Box::new(cur_expr), + right: Box::new(next_expr), + }; + } + /* OR requires the next element. */ + ExprOp::Or => { + let next_expr = Self::get_non_operator(&mut expr_args)?; + cur_expr = MatchExpression::Or { + left: Box::new(cur_expr), + right: Box::new(next_expr), + }; + } + }, + } + } + + assert!(expr_args.is_empty()); + Ok(cur_expr) + } +} + +#[derive(Debug)] +pub enum MatchExpression { + PrimitivePredicate(Predicate), + Negated(Box), + And { + explicit: bool, + left: Box, + right: Box, + }, + Or { + left: Box, + right: Box, + }, + Grouped(Box), +} + +impl MatchExpression { + pub fn parse_argv( + argv: &mut VecDeque, + ) -> Result { + let mut expr_stack: Vec = Vec::new(); + let mut top_exprs = SingleExprLevel::default(); + + while let Some(arg) = argv.pop_front() { + match arg.as_encoded_bytes() { + /* Parse primitive predicates. */ + b"-true" => { + top_exprs.push_arg(ExprArg::PrimitivePredicate(Predicate::Trivial( + TrivialPredicate::True, + ))); + } + b"-false" => { + top_exprs.push_arg(ExprArg::PrimitivePredicate(Predicate::Trivial( + TrivialPredicate::False, + ))); + } + b"-t" | b"--type" => { + let type_arg = argv + .pop_front() + .ok_or_else(|| C::exit_arg_invalid("no argument provided for -t/--type"))?; + let entry_type = + EntryType::parse(type_arg.as_encoded_bytes()).ok_or_else(|| { + C::exit_arg_invalid(&format!("invalid --type argument: {type_arg:?}")) + })?; + top_exprs.push_arg(ExprArg::PrimitivePredicate(Predicate::EntryType( + entry_type, + ))); + } + b"--compression-method" => { + let method_arg = argv.pop_front().ok_or_else(|| { + C::exit_arg_invalid("no argument provided for --compression-method") + })?; + let method = CompressionMethodArg::parse(method_arg.as_encoded_bytes()) + .ok_or_else(|| { + C::exit_arg_invalid(&format!( + "invalid --compression-method argument: {method_arg:?}" + )) + })?; + top_exprs.push_arg(ExprArg::PrimitivePredicate(Predicate::CompressionMethod( + method, + ))); + } + b"--max-depth" => { + let max_depth: u8 = argv + .pop_front() + .ok_or_else(|| C::exit_arg_invalid("no argument provided for --max-depth"))? + .into_string() + .map_err(|depth_arg| { + C::exit_arg_invalid(&format!( + "invalid unicode provided for --max-depth: {depth_arg:?}" + )) + })? + .parse::() + .map_err(|e| { + C::exit_arg_invalid(&format!( + "failed to parse --max-depth arg as u8: {e:?}" + )) + })?; + top_exprs.push_arg(ExprArg::PrimitivePredicate(Predicate::DepthLimit( + DepthLimitArg::Max(max_depth), + ))); + } + b"--min-depth" => { + let min_depth: u8 = argv + .pop_front() + .ok_or_else(|| C::exit_arg_invalid("no argument provided for --min-depth"))? + .into_string() + .map_err(|depth_arg| { + C::exit_arg_invalid(&format!( + "invalid unicode provided for --min-depth: {depth_arg:?}" + )) + })? + .parse::() + .map_err(|e| { + C::exit_arg_invalid(&format!( + "failed to parse --min-depth arg as u8: {e:?}" + )) + })?; + top_exprs.push_arg(ExprArg::PrimitivePredicate(Predicate::DepthLimit( + DepthLimitArg::Min(min_depth), + ))); + } + b"--max-size" => { + let max_size: u64 = argv + .pop_front() + .ok_or_else(|| C::exit_arg_invalid("no argument provided for --max-size"))? + .into_string() + .map_err(|size_arg| { + C::exit_arg_invalid(&format!( + "invalid unicode provided for --max-size: {size_arg:?}" + )) + })? + .parse::() + .map_err(|e| { + C::exit_arg_invalid(&format!( + "failed to parse --max-size arg as u64: {e:?}" + )) + })?; + top_exprs.push_arg(ExprArg::PrimitivePredicate(Predicate::Size(SizeArg::Max( + max_size, + )))); + } + b"--min-size" => { + let min_size: u64 = argv + .pop_front() + .ok_or_else(|| C::exit_arg_invalid("no argument provided for --min-size"))? + .into_string() + .map_err(|size_arg| { + C::exit_arg_invalid(&format!( + "invalid unicode provided for --min-size: {size_arg:?}" + )) + })? + .parse::() + .map_err(|e| { + C::exit_arg_invalid(&format!( + "failed to parse --min-size arg as u64: {e:?}" + )) + })?; + top_exprs.push_arg(ExprArg::PrimitivePredicate(Predicate::Size(SizeArg::Min( + min_size, + )))); + } + b"-m" => { + let pattern: String = argv + .pop_front() + .ok_or_else(|| C::exit_arg_invalid("no argument provided for -m"))? + .into_string() + .map_err(|pattern| { + C::exit_arg_invalid(&format!( + "invalid unicode provided for -m: {pattern:?}" + )) + })?; + let comp_sel = ComponentSelector::default(); + let pat_sel = PatternSelector::default_for_context(PatternContext::Match); + top_exprs.push_arg(ExprArg::PrimitivePredicate(Predicate::Match(MatchArg { + comp_sel, + pat_sel, + pattern, + }))); + } + arg_bytes if arg_bytes.starts_with(b"--match") => { + let (comp_sel, pat_sel) = parse_comp_and_pat_sel( + arg_bytes, + PatternContext::Match, + ) + .ok_or_else(|| { + C::exit_arg_invalid(&format!("invalid --match argument modifiers: {arg:?}")) + })?; + let pattern: String = argv + .pop_front() + .ok_or_else(|| C::exit_arg_invalid("no argument provided for --match"))? + .into_string() + .map_err(|pattern| { + C::exit_arg_invalid(&format!( + "invalid unicode provided for --match: {pattern:?}" + )) + })?; + top_exprs.push_arg(ExprArg::PrimitivePredicate(Predicate::Match(MatchArg { + comp_sel, + pat_sel, + pattern, + }))); + } + + /* Parse operators. */ + b"!" | b"-not" => { + top_exprs.push_arg(ExprArg::Op(ExprOp::Negation)); + } + b"&" | b"-and" => { + top_exprs.push_arg(ExprArg::Op(ExprOp::And)); + } + b"|" | b"-or" => { + top_exprs.push_arg(ExprArg::Op(ExprOp::Or)); + } + + /* Process groups with stack logic! */ + b"(" | b"-open" => { + expr_stack.push(mem::take(&mut top_exprs)); + } + b")" | b"-close" => { + /* Get the unevaluated exprs from the previous nesting level. */ + let prev_level = expr_stack.pop().ok_or_else(|| { + C::exit_arg_invalid("too many close parens inside match expr") + })?; + /* Move the previous nesting level into current, and evaluate the current + * nesting level. */ + let group_expr = mem::replace(&mut top_exprs, prev_level).fold()?; + /* Wrap the completed group in a Grouped. */ + let group_expr = MatchExpression::Grouped(Box::new(group_expr)); + /* Push the completed and evaluated group into the current nesting level. */ + top_exprs.push_arg(ExprArg::Subgroup(group_expr)); + } + + /* Conclude the match expr processing. */ + b"--expr" => { + break; + } + _ => { + return Err(C::exit_arg_invalid(&format!( + "unrecognized match expression component {arg:?}: all match expressions must start and end with a --expr flag" + ))); + } + } + } + + if !expr_stack.is_empty() { + return Err(C::exit_arg_invalid( + "not enough close parens inside match expr", + )); + } + top_exprs.fold() + } +} + +#[derive(Debug)] +pub enum TrivialTransform { + Identity, +} + +#[derive(Debug)] +pub enum BasicTransform { + StripComponents(u8), + AddPrefix(String), +} + +#[derive(Debug)] +pub struct TransformArg { + pub comp_sel: ComponentSelector, + pub pat_sel: PatternSelector, + pub pattern: String, + pub replacement_spec: String, +} + +#[derive(Debug)] +pub enum ComplexTransform { + Transform(TransformArg), +} + +#[derive(Debug)] +pub enum NameTransform { + Trivial(TrivialTransform), + Basic(BasicTransform), + Complex(ComplexTransform), +} + +#[derive(Debug)] +enum ExtractArg { + Match(MatchExpression), + NameTransform(NameTransform), + ContentTransform(ContentTransform), +} + +#[derive(Debug)] +pub struct EntrySpec { + pub match_expr: Option, + pub name_transforms: Vec, + pub content_transform: ContentTransform, +} + +impl EntrySpec { + fn parse_extract_args( + args: impl IntoIterator, + ) -> Result, ArgParseError> { + let mut match_expr: Option = None; + let mut name_transforms: Vec = Vec::new(); + + let mut ret: Vec = Vec::new(); + + for arg in args.into_iter() { + match arg { + ExtractArg::Match(new_expr) => { + if let Some(prev_expr) = match_expr.take() { + return Err(Extract::exit_arg_invalid(&format!( + "more than one match expr was provided for the same entry: {prev_expr:?} and {new_expr:?}" + ))); + } + match_expr = Some(new_expr); + } + ExtractArg::NameTransform(n_trans) => { + name_transforms.push(n_trans); + } + ExtractArg::ContentTransform(c_trans) => { + let spec = Self { + match_expr: match_expr.take(), + name_transforms: mem::take(&mut name_transforms), + content_transform: c_trans, + }; + ret.push(spec); + } + } + } + if let Some(match_expr) = match_expr { + return Err(Extract::exit_arg_invalid(&format!( + "match expr {match_expr:?} was provided with no corresponding content \ +transform. add -x/--extract to construct a complete entry spec" + ))); + } + if !name_transforms.is_empty() { + return Err(Extract::exit_arg_invalid(&format!( + "name transforms {name_transforms:?} were provided with no corresponding \ +content transform. add -x/--extract to construct a complete entry spec" + ))); + } + + Ok(ret) + } +} + +#[derive(Debug)] +pub enum OutputCollation { + ConcatenateStdout, + ConcatenateFile { path: PathBuf, append: bool }, + Filesystem { output_dir: PathBuf, mkdir: bool }, +} + +#[derive(Debug)] +pub struct NamedOutput { + pub name: String, + pub output: OutputCollation, +} + +#[derive(Debug)] +pub struct OutputSpecs { + pub default: Option, + pub named: Vec, +} + +impl Default for OutputSpecs { + fn default() -> Self { + Self { + default: Some(OutputCollation::Filesystem { + output_dir: PathBuf::from("."), + mkdir: false, + }), + named: Vec::new(), + } + } +} + +impl OutputSpecs { + pub fn parse_argv(argv: &mut VecDeque) -> Result { + let mut default: Option = None; + let mut named: Vec = Vec::new(); + let mut cur_name: Option = None; + + while let Some(arg) = argv.pop_front() { + match arg.as_encoded_bytes() { + b"-h" | b"--help" => { + let help_text = Extract::generate_full_help_text(); + return Err(ArgParseError::StdoutMessage(help_text)); + } + b"--name" => { + let name = argv + .pop_front() + .ok_or_else(|| { + Extract::exit_arg_invalid("no argument provided for --name") + })? + .into_string() + .map_err(|name| { + Extract::exit_arg_invalid(&format!( + "invalid unicode provided for --name: {name:?}" + )) + })?; + if let Some(prev_name) = cur_name.take() { + return Err(Extract::exit_arg_invalid(&format!( + "multiple names provided for output: {prev_name:?} and {name:?}" + ))); + } + cur_name = Some(name); + } + b"-d" => { + let dir_path = argv + .pop_front() + .map(PathBuf::from) + .ok_or_else(|| Extract::exit_arg_invalid("no argument provided for -d"))?; + let output = OutputCollation::Filesystem { + output_dir: dir_path, + mkdir: false, + }; + if let Some(name) = cur_name.take() { + named.push(NamedOutput { name, output }); + } else if let Some(default) = default.take() { + return Err(Extract::exit_arg_invalid(&format!( + "multiple unnamed outputs provided: {default:?} and {output:?}" + ))); + } else { + default = Some(output); + } + } + arg_bytes if arg_bytes.starts_with(b"--output-directory") => { + let mkdir = match arg_bytes { + b"--output-directory" => false, + b"--output-directory:mkdir" => true, + _ => { + return Err(Extract::exit_arg_invalid(&format!( + "invalid suffix provided to --output-directory: {arg:?}" + ))); + } + }; + let dir_path = argv.pop_front().map(PathBuf::from).ok_or_else(|| { + Extract::exit_arg_invalid("no argument provided for --output-directory") + })?; + let output = OutputCollation::Filesystem { + output_dir: dir_path, + mkdir, + }; + if let Some(name) = cur_name.take() { + named.push(NamedOutput { name, output }); + } else if let Some(default) = default.take() { + return Err(Extract::exit_arg_invalid(&format!( + "multiple unnamed outputs provided: {default:?} and {output:?}" + ))); + } else { + default = Some(output); + } + } + b"--stdout" => { + let output = OutputCollation::ConcatenateStdout; + if let Some(name) = cur_name.take() { + named.push(NamedOutput { name, output }); + } else if let Some(default) = default.take() { + return Err(Extract::exit_arg_invalid(&format!( + "multiple unnamed outputs provided: {default:?} and {output:?}" + ))); + } else { + default = Some(output); + } + } + b"-f" => { + let file_path = argv + .pop_front() + .map(PathBuf::from) + .ok_or_else(|| Extract::exit_arg_invalid("no argument provided for -f"))?; + let output = OutputCollation::ConcatenateFile { + path: file_path, + append: false, + }; + if let Some(name) = cur_name.take() { + named.push(NamedOutput { name, output }); + } else if let Some(default) = default.take() { + return Err(Extract::exit_arg_invalid(&format!( + "multiple unnamed outputs provided: {default:?} and {output:?}" + ))); + } else { + default = Some(output); + } + } + arg_bytes if arg_bytes.starts_with(b"--output-file") => { + let append = match arg_bytes { + b"--output-file" => false, + b"--output-file:append" => true, + _ => { + return Err(Extract::exit_arg_invalid(&format!( + "invalid suffix provided to --output-file: {arg:?}" + ))); + } + }; + let file_path = argv.pop_front().map(PathBuf::from).ok_or_else(|| { + Extract::exit_arg_invalid("no argument provided for --output-file") + })?; + let output = OutputCollation::ConcatenateFile { + path: file_path, + append, + }; + if let Some(name) = cur_name.take() { + named.push(NamedOutput { name, output }); + } else if let Some(default) = default.take() { + return Err(Extract::exit_arg_invalid(&format!( + "multiple unnamed outputs provided: {default:?} and {output:?}" + ))); + } else { + default = Some(output); + } + } + _ => { + argv.push_front(arg); + break; + } + } + } + if let Some(name) = cur_name { + return Err(Extract::exit_arg_invalid(&format!( + "trailing --name argument provided without output spec: {name:?}" + ))); + } + + Ok(if default.is_none() && named.is_empty() { + Self::default() + } else { + Self { default, named } + }) + } +} + +#[derive(Debug)] +pub struct InputSpec { + pub stdin_stream: bool, + pub zip_paths: Vec, +} + +#[derive(Debug)] +pub struct Extract { + pub output_specs: OutputSpecs, + pub entry_specs: Vec, + pub input_spec: InputSpec, +} + +impl Extract { + #[cfg(feature = "deflate64")] + const DEFLATE64_HELP_LINE: &'static str = " - deflate64:\twith deflate64\n"; + #[cfg(not(feature = "deflate64"))] + const DEFLATE64_HELP_LINE: &'static str = ""; + + #[cfg(feature = "bzip2")] + const BZIP2_HELP_LINE: &'static str = " - bzip2:\twith bzip2\n"; + #[cfg(not(feature = "bzip2"))] + const BZIP2_HELP_LINE: &'static str = ""; + + #[cfg(feature = "zstd")] + const ZSTD_HELP_LINE: &'static str = " - zstd:\twith zstd\n"; + #[cfg(not(feature = "zstd"))] + const ZSTD_HELP_LINE: &'static str = ""; + + #[cfg(feature = "lzma")] + const LZMA_HELP_LINE: &'static str = " - lzma:\twith lzma\n"; + #[cfg(not(feature = "lzma"))] + const LZMA_HELP_LINE: &'static str = ""; + + #[cfg(feature = "xz")] + const XZ_HELP_LINE: &'static str = " - xz:\t\twith xz\n"; + #[cfg(not(feature = "xz"))] + const XZ_HELP_LINE: &'static str = ""; + + pub fn generate_match_expr_help_text() -> String { + format!( + r#" +## Match expressions (match-expr): + +Entry matching logic composes boolean arithmetic expressions ("expr") in terms +of basic "predicates" which test some component of the zip entry. Expressions +can be composed as follows, in order of precedence: + +expr = ( ) (grouping to force precedence) + = ! (negation) + = & (short-circuiting conjunction "and") + = (implicit &) + = | (disjunction "or") + = (evaluate on entry) + +### Operators: +The operators to compose match expressions must be quoted in shell commands +(e.g. as \( or '('), so alternatives are provided which do not require +special quoting: + +Grouping operators: + (, -open + ), -close + +Unary operators: + !, -not + +Binary operators: + |, -or + &, -and + +### Predicates (predicate): +These arguments are interpreted as basic predicates, returning true or false in +response to a specific zip entry. + +Trivial: +These results do not depend on the entry data at all: + + -true Always return true. + -false Always return false. + +If a match expression is not provided, it defaults to the behavior of -true. + +Basic: +These results are dependent on the entry data: + + -t, --type [file|dir|symlink] + Match entries of the given type. + Note that directory entries may have specific mode bits set, or they may just be + zero-length entries whose name ends in '/'. + + --compression-method + Match entries compressed with the given compression technique. + + Possible values: + - any: any compression method at all + - known: any compression method this binary is able to decompress + - stored: uncompressed + - deflated: with deflate +{}{}{}{}{} + Using e.g. '--compression-method known' as a match expression filters + entries to only those which can be successfully decompressed. + + --max-depth + Match entries with at *most* components of their + containing directory. + --min-depth + Match entries with at *least* components of their + containing directory. + + --max-size + Match entries of at *most* in *uncompressed* size. + --min-size + Match entries of at *least* in *uncompressed* size. + + Directory entries are 0 bytes in size, and symlink entries are the + size required to store their target. + + TODO: Abbrevations such as 1k, 1M are not currently supported; the + precise byte number must be provided, parseable as a u64. + + -m, --match[=][:] + Return true for entries whose name matches . + + See section on "Selector syntax" for and for how + the string argument is interpreted into a string matching + predicate against the entry name. +"#, + Self::DEFLATE64_HELP_LINE, + Self::BZIP2_HELP_LINE, + Self::ZSTD_HELP_LINE, + Self::LZMA_HELP_LINE, + Self::XZ_HELP_LINE, + ) + } + + pub fn generate_pattern_selector_help_text(match_only: bool) -> String { + format!( + r#" +## Selector syntax: + +The string matching operations of {} expose an interface to +configure various pattern matching techniques on various components of the entry +name string. + +These flags default to interpreting a argument as a glob string to +match against the entire entry name, which can be explicitly requested as +follows: + + --match=path:glob + +The entire range of search options is described below: + +### Component selector (comp-sel): +comp-sel = path [DEFAULT] (match full entry) + = basename (match only the final component of entry) + = dirname (match all except final component of entry) + = ext (match only the file extension, if available) + +### Pattern selector (pat-sel): +pat-sel = glob [DEFAULT{}] (interpret as a shell glob) + = lit (interpret as literal string) + = rx {}(interpret as a regular expression) + = (apply search modifiers from ) + +{} + +Also note that glob and regex patterns require building this binary with the +"glob" and "rx" cargo features respectively. Specifying ':glob' or ':rx' without +the requisite feature support will produce an error. If the requisite feature is +not provided, the default is to use literal matching, which is supported in +all cases. + +#### Pattern modifiers (pat-mod): +pat-mod = :i (use case-insensitive matching for the given pattern) +{} = :p (perform left-anchored "prefix" searches) + = :s (perform right-anchored "suffix" searches) + +Pattern modifiers from (pat-mod) can be sequenced, e.g. ':i:p'. If ':p' and ':s' +are provided together, the result is to perform a doubly-anchored match, against +the entire string. For regexp matching with ':rx', ':p' and ':s' are converted +to '^' or '$' anchors in the regexp pattern string. If the pattern string also +contains '^' or '$' as well, no error is produced. + +*Note:* not all pattern modifiers apply everywhere. In particular, {}':p' and ':s' are +incompatible with glob search and will produce an error. +"#, + if match_only { + "--match" + } else { + "--match and --transform" + }, + if match_only { "" } else { " for matching" }, + if match_only { + "" + } else { + "[DEFAULT for replacement] " + }, + if match_only { + "" + } else { + "*Note:* glob patterns are not supported for replacement, and attempting to use +them with e.g '--transform:glob' will produce an error." + }, + if match_only { + "" + } else { + " = :g (use multi-match behavior for string replacements)\n" + }, + if match_only { + "" + } else { + "':g' only +applies to string replacement, and using it for a match expression like +'--match:rx:g' will produce an error. Additionally, " + } + ) + } + + pub const INPUT_HELP_TEXT: &'static str = r#" +# Input arguments: +Zip file inputs to extract from can be specified by streaming from stdin, or as +at least one path pointing to an existing zip file. Input arguments are always +specified after all output flags and entry specs on the command line. If no +positional argument is provided and --stdin is not present, an error will +be produced. + + --stdin + If this argument is provided, the streaming API will be used to read + entries as they are encountered, instead of filtering them beforehand + as is done with file inputs. This disables some optimizations, but + also avoids waiting for the entire input to buffer to start writing + output, so can be used in a streaming context. + +Positional paths: + ZIP-PATH... + Apply the entry specs to filter and rename entries to extract from all + of the provided zip files. At least one zip path must be provided, and + all provided paths must exist and point to an existing zip file. Pipes + are not supported and will produce an error. + + If --stdin is provided, it will be read in a streaming manner before + reading entries from any positional zip paths. +"#; +} + +impl CommandFormat for Extract { + const COMMAND_NAME: &'static str = "extract"; + const COMMAND_TABS: &'static str = "\t"; + const COMMAND_DESCRIPTION: &'static str = + "Decompress and transform matching entries into a stream or directory."; + + const USAGE_LINE: &'static str = + "[-h|--help] [OUTPUT-SPEC]... [ENTRY-SPEC]... [--stdin] [--] [ZIP-PATH]..."; + + fn generate_help() -> String { + format!( + r#" + -h, --help Print help + +# Output flags: +Where and how to collate the extracted entries. + +## Directory extraction: +Extract entries into relative paths of a named directory according to the +entry's name. + + -d, --output-directory[:mkdir] + Output directory path to write extracted entries into. + Paths for extracted entries will be constructed by interpreting entry + names as relative paths to the provided directory. + + If the provided path is not a directory, an error is produced. If the + provided path does not exist, an error is produced, unless :mkdir is + specified, which attempts to create the specified directory along with + any missing parent directories. + + If not provided, entries will be extracted into the current directory + (as if '-d .' had been provided). + +## Pipe decompression: +Concatenate decompressed entry data into a pipe or file. Entry names are +effectively ignored. This disables some optimizations that are possible when +extracting to the filesystem. + + --stdout + Concatenate all extracted entries and write them in order to stdout + instead of writing anything to the filesystem. + This will write output to stdout even if stdout is a tty. + + -f, --output-file[:append] + Write all entries into the specified file path . + + The output file will be truncated if it already exists, unless :append + is provided. If the specified file path could not be created + (e.g. because the containing directory does not exist, or because the + path exists but does not point to a regular file), an error + is produced. + +## Output teeing: +Entries may be *received* by one or more named outputs. Without any output names specified, the +above flags will produce a single receiver named "default". This is the default receiver used for +the -x/--extract argument unless otherwise specified. However, multiple named receivers may be +specified in sequence, separated by the --name flag: + + --name + Assign the output receiver created from the following output flags to the name . + +Note that the first output in a list need not have a name, as it will be assigned to the name +"default" if not provided. + +'--stdout' Creates a single default receiver decompressing contents to stdout. +'-d ./a' Creates a single default receiver extracting entries into './a'. + +'--name one -d ./a' + Creates a single named receiver "one" extracting into './a'. -x/--extract + must specify the name "one", or an error will be produced. +'--output-directory:mkdir ./a --name two --stdout' + Creates a default receiver extracting into './a', which will be created if + it does not exist, and a named receiver "two" concatenating into stdout. +'--name one -d ./a --name two -f ./b' + Creates a named receiver "one" extracting into './a', and a second named receiver "two" + concatenating into the file './b'. + +# Entry specs: + +After output flags are provided, entry specs are processed in order until an +input argument is reached. Entry specs are modelled after the arguments to +find(1), although "actions" are separated from "matching" expressions with +test clauses instead of being fully recursive like find(1). + +The full specification of an entry spec is provided below +(we will use lowercase names to describe this grammar): + + entry-spec = [--expr match-expr --expr] [name-transform]... content-transform + +1. (match-expr) matches against entries, +2. (name-transform) may transform the entry name string, +3. (content-transform) processes the entry content and writes it + to the output. + +Note that only the "content transform" is required: each entry spec must +conclude with exactly one content transform, but the other arguments may +be omitted and will be set to their default values. + +If no entry specs are provided, by default all entries are decompressed and written to the +output collator without modification. This behavior can be requested explicitly +with the command line: + + --expr -true --expr --identity --extract + +*Note:* if a match-expr is provided, it *must* be surrounded with --expr arguments on both sides! +This is a necessary constraint of the current command line parsing. + +{} + +## Name transforms (name-transform): + +Name transforms modify the entry name before writing the entry to the +output. Unlike match expressions, name transforms do not involve any boolean +logic, and instead are composed linearly, each processing the string produced by +the prior name transform in the series. + +*Note:* name transforms do *not* perform any filtering, so if a string +replacement operation "fails", the entry name is simply returned unchanged. + +Trivial: + --identity Return the entry name string unchanged. + +If no name transforms are provided, it defaults to the behavior of --identity. + +Basic: +These transformers do not perform any complex pattern matching, and instead add +or remove a fixed string from the entry name: + + --strip-components + Remove at most directory components from the entry name. + If is greater than or equal the number of components in the + entry dirname, then the basename of the entry is returned. + --add-prefix + Prefix the entry name with a directory path . + A single separator '/' will be added after before the rest of + the entry name, and any trailing '/' in will be trimmed + before joining. + +Complex: +These transformers perform complex pattern matching and replacement upon the +entry name string: + + --transform[=][:] + Extract the portion of the entry name corresponding to , + search it against corresponding to , and then + replace the result with . + + If == 'rx', then may contain references + to numbered capture groups specified by . Otherwise, + is interpreted as a literal string. + + +## Content transforms (content-transform): + +Content transforms determine how to interpret the content of the zip +entry itself. + +*Note:* when multiple entry specs are provided on the command line, a single +entry may be matched more than once. In this case, the entry's content will be +teed to all the specified outputs. + + -x, --extract[=] + Decompress the entry's contents (if necessary) before writing it to + the named output , or the default output if the receiver name is + not specified. + +Attempting to extract an entry using an unsupported compression method with +-x/--extract will produce an error. In this case, --compression-method can be +used to filter out such entries. + +{} +{}"#, + Self::generate_match_expr_help_text(), + Self::generate_pattern_selector_help_text(false), + Self::INPUT_HELP_TEXT, + ) + } + + fn parse_argv(mut argv: VecDeque) -> Result { + let mut args: Vec = Vec::new(); + let mut stdin_flag: bool = false; + let mut positional_zips: Vec = Vec::new(); + + let output_specs = OutputSpecs::parse_argv(&mut argv)?; + + while let Some(arg) = argv.pop_front() { + match arg.as_encoded_bytes() { + b"-h" | b"--help" => { + let help_text = Self::generate_full_help_text(); + return Err(ArgParseError::StdoutMessage(help_text)); + } + + /* Transition to entry specs */ + /* Try content transforms first, as they are unambiguous sentinel values. */ + b"-x" | b"--extract" => { + args.push(ExtractArg::ContentTransform(ContentTransform::Extract { + name: None, + })); + } + arg_bytes if arg_bytes.starts_with(b"--extract=") => { + let name = arg + .into_string() + .map_err(|arg| { + Self::exit_arg_invalid(&format!( + "invalid unicode provided to --extract=: {arg:?}" + )) + })? + .strip_prefix("--extract=") + .unwrap() + .to_string(); + args.push(ExtractArg::ContentTransform(ContentTransform::Extract { + name: Some(name), + })); + } + + /* Try name transforms next, as they only stack linearly and do not require CFG + * parsing of paired delimiters. */ + /* FIXME: none of these name transforms have any effect if --stdout is + * provided. Should we error or warn about this? */ + b"--identity" => { + args.push(ExtractArg::NameTransform(NameTransform::Trivial( + TrivialTransform::Identity, + ))); + } + b"--strip-components" => { + let num: u8 = argv + .pop_front() + .ok_or_else(|| { + Self::exit_arg_invalid("no argument provided for --strip-component") + })? + .into_string() + .map_err(|num| { + Self::exit_arg_invalid(&format!( + "invalid unicode provided for --strip-component: {num:?}" + )) + })? + .parse::() + .map_err(|e| { + Self::exit_arg_invalid(&format!( + "failed to parse --strip-component arg {e:?} as u8" + )) + })?; + args.push(ExtractArg::NameTransform(NameTransform::Basic( + BasicTransform::StripComponents(num), + ))); + } + b"--add-prefix" => { + let prefix = argv + .pop_front() + .ok_or_else(|| { + Self::exit_arg_invalid("no argument provided for --add-prefix") + })? + .into_string() + .map_err(|prefix| { + Self::exit_arg_invalid(&format!( + "invalid unicode provided for --add-prefix: {prefix:?}" + )) + })?; + args.push(ExtractArg::NameTransform(NameTransform::Basic( + BasicTransform::AddPrefix(prefix), + ))); + } + arg_bytes if arg_bytes.starts_with(b"--transform") => { + let (comp_sel, pat_sel) = + parse_comp_and_pat_sel(arg_bytes, PatternContext::Replacement).ok_or_else( + || { + Self::exit_arg_invalid(&format!( + "invalid --transform argument modifiers: {arg:?}" + )) + }, + )?; + let pattern = argv + .pop_front() + .ok_or_else(|| { + Self::exit_arg_invalid("no argument provided for --transform") + })? + .into_string() + .map_err(|pattern| { + Self::exit_arg_invalid(&format!( + "invalid unicode provided for --transform : {pattern:?}" + )) + })?; + let replacement_spec = argv + .pop_front() + .ok_or_else(|| { + Self::exit_arg_invalid( + "no argument provided for --transform", + ) + })? + .into_string() + .map_err(|replacement_spec| { + Self::exit_arg_invalid(&format!( + "invalid unicode provided for --transform : {replacement_spec:?}" + )) + })?; + args.push(ExtractArg::NameTransform(NameTransform::Complex( + ComplexTransform::Transform(TransformArg { + comp_sel, + pat_sel, + pattern, + replacement_spec, + }), + ))); + } + + /* Try parsing match specs! */ + b"--expr" => { + let match_expr = MatchExpression::parse_argv::(&mut argv)?; + args.push(ExtractArg::Match(match_expr)); + } + + /* Transition to input args */ + b"--stdin" => { + stdin_flag = true; + } + b"--" => break, + arg_bytes => { + if arg_bytes.starts_with(b"-") { + return Err(Self::exit_arg_invalid(&format!( + "unrecognized flag {arg:?}" + ))); + } else { + argv.push_front(arg); + break; + } + } + } + } + + positional_zips.extend(argv.into_iter().map(|arg| arg.into())); + if !stdin_flag && positional_zips.is_empty() { + return Err(Self::exit_arg_invalid( + "no zip input files were provided, and --stdin was not provided", + )); + }; + let input_spec = InputSpec { + stdin_stream: stdin_flag, + zip_paths: positional_zips, + }; + + let entry_specs = EntrySpec::parse_extract_args(args)?; + + Ok(Self { + output_specs, + entry_specs, + input_spec, + }) + } +} + +impl crate::driver::ExecuteCommand for Extract { + fn execute(self, err: impl std::io::Write) -> Result<(), crate::CommandError> { + crate::extract::execute_extract(err, self) + } +} diff --git a/cli/src/args/info.rs b/cli/src/args/info.rs new file mode 100644 index 000000000..f571829c5 --- /dev/null +++ b/cli/src/args/info.rs @@ -0,0 +1,760 @@ +use super::{ + extract::{Extract, InputSpec, MatchExpression}, + ArgParseError, CommandFormat, +}; + +use std::{collections::VecDeque, ffi::OsString, fmt, path::PathBuf}; + +#[derive(Debug)] +pub struct ModifierParseError(pub String); + +impl fmt::Display for ModifierParseError { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "{}", &self.0) + } +} + +#[derive(Debug)] +pub enum DirectiveParseError { + Modifier(String, ModifierParseError), + Unrecognized(String), +} + +impl fmt::Display for DirectiveParseError { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + Self::Modifier(d, e) => { + write!(f, "unrecognized modifier in directive {d:?}: {e}") + } + Self::Unrecognized(d) => { + write!(f, "unrecognized directive: {d:?}") + } + } + } +} + +#[derive(Debug)] +pub enum FormatParseError { + Directive(DirectiveParseError), + Search(String), +} + +impl fmt::Display for FormatParseError { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + Self::Directive(e) => { + write!(f, "{e}") + } + Self::Search(e) => { + write!(f, "error in parsing logic: {e}") + } + } + } +} + +#[derive(Debug, Default, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub enum ByteSizeFormat { + #[default] + FullDecimal, + HumanAbbreviated, +} + +impl ByteSizeFormat { + pub fn parse(s: &str) -> Result { + match s { + "" => Ok(Self::default()), + ":decimal" => Ok(Self::FullDecimal), + ":human" => Ok(Self::HumanAbbreviated), + _ => Err(ModifierParseError(format!( + "unrecognized byte size format: {s:?}" + ))), + } + } +} + +#[derive(Debug, Default, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub enum OffsetFormat { + Decimal, + #[default] + Hexadecimal, +} + +impl OffsetFormat { + pub fn parse(s: &str) -> Result { + match s { + "" => Ok(Self::default()), + ":decimal" => Ok(Self::Decimal), + ":hex" => Ok(Self::Hexadecimal), + _ => Err(ModifierParseError(format!( + "unrecognized offset format: {s:?}" + ))), + } + } +} + +#[derive(Debug, Default, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub enum BinaryStringFormat { + #[default] + PrintAsString, + EscapeAscii, + WriteBinaryContents, +} + +impl BinaryStringFormat { + pub fn parse(s: &str) -> Result { + match s { + "" => Ok(Self::default()), + ":print" => Ok(Self::PrintAsString), + ":escape" => Ok(Self::EscapeAscii), + ":write" => Ok(Self::WriteBinaryContents), + _ => Err(ModifierParseError(format!( + "unrecognized string format: {s:?}" + ))), + } + } +} + +#[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub enum ArchiveOverviewFormatDirective { + ArchiveName, + TotalSize(ByteSizeFormat), + NumEntries, + ArchiveComment(BinaryStringFormat), + FirstEntryStart(OffsetFormat), + CentralDirectoryStart(OffsetFormat), +} + +impl ParseableDirective for ArchiveOverviewFormatDirective { + fn parse_directive(s: &str) -> Result { + match s { + "name" => Ok(Self::ArchiveName), + s if s.starts_with("size") => { + let size_fmt = ByteSizeFormat::parse(&s["size".len()..]) + .map_err(|e| DirectiveParseError::Modifier(s.to_string(), e))?; + Ok(Self::TotalSize(size_fmt)) + } + "num" => Ok(Self::NumEntries), + s if s.starts_with("comment") => { + let str_fmt = BinaryStringFormat::parse(&s["comment".len()..]) + .map_err(|e| DirectiveParseError::Modifier(s.to_string(), e))?; + Ok(Self::ArchiveComment(str_fmt)) + } + s if s.starts_with("offset") => { + let offset_fmt = OffsetFormat::parse(&s["offset".len()..]) + .map_err(|e| DirectiveParseError::Modifier(s.to_string(), e))?; + Ok(Self::FirstEntryStart(offset_fmt)) + } + s if s.starts_with("cde-offset") => { + let offset_fmt = OffsetFormat::parse(&s["cde-offset".len()..]) + .map_err(|e| DirectiveParseError::Modifier(s.to_string(), e))?; + Ok(Self::CentralDirectoryStart(offset_fmt)) + } + _ => Err(DirectiveParseError::Unrecognized(s.to_string())), + } + } +} + +#[derive(Debug)] +pub enum ParseableFormatComponent { + Directive(D), + Escaped(&'static str), + Literal(String), +} + +#[derive(Debug)] +pub struct ParseableFormatSpec { + pub components: Vec>, +} + +pub trait ParseableDirective: Sized { + fn parse_directive(s: &str) -> Result; +} + +impl ParseableFormatSpec +where + D: ParseableDirective, +{ + pub fn parse_format(s: &str) -> Result { + let mut components: Vec> = Vec::new(); + let mut last_source_position: usize = 0; + while let Some(pcnt_pos) = s[last_source_position..] + .find('%') + .map(|p| p + last_source_position) + { + /* Anything in between directives is a literal string. */ + if pcnt_pos > last_source_position { + components.push(ParseableFormatComponent::Literal( + s[last_source_position..pcnt_pos].to_string(), + )); + last_source_position = pcnt_pos; + } + let next_pcnt = s[(pcnt_pos + 1)..] + .find('%') + .map(|p| p + pcnt_pos + 1) + .ok_or_else(|| { + FormatParseError::Search("% directive opened but not closed".to_string()) + })?; + let directive_contents = &s[pcnt_pos..=next_pcnt]; + match directive_contents { + /* An empty directive is a literal percent. */ + "%%" => { + components.push(ParseableFormatComponent::Escaped("%")); + } + /* A single '!' directive is a literal newline. */ + "%!%" => { + components.push(ParseableFormatComponent::Escaped("\n")); + } + "%,%" => { + components.push(ParseableFormatComponent::Escaped("\t")); + } + /* Otherwise, parse the space between percents. */ + d => { + let directive = D::parse_directive(&d[1..(d.len() - 1)]) + .map_err(FormatParseError::Directive)?; + components.push(ParseableFormatComponent::Directive(directive)); + } + } + last_source_position += directive_contents.len(); + } + if s.len() > last_source_position { + components.push(ParseableFormatComponent::Literal( + s[last_source_position..].to_string(), + )); + } + Ok(Self { components }) + } +} + +#[derive(Debug, Default, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub enum UnixModeFormat { + #[default] + Octal, + Pretty, +} + +impl UnixModeFormat { + pub fn parse(s: &str) -> Result { + match s { + "" => Ok(Self::default()), + ":octal" => Ok(Self::Octal), + ":pretty" => Ok(Self::Pretty), + _ => Err(ModifierParseError(format!( + "unrecognized unix mode format: {s:?}" + ))), + } + } +} + +#[derive(Debug, Default, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub enum TimestampFormat { + DateOnly, + TimeOnly, + #[default] + DateAndTime, +} + +impl TimestampFormat { + pub fn parse(s: &str) -> Result { + match s { + "" => Ok(Self::default()), + ":date" => Ok(Self::DateOnly), + ":time" => Ok(Self::TimeOnly), + ":date-time" => Ok(Self::DateAndTime), + _ => Err(ModifierParseError(format!( + "unrecognized timestamp format: {s:?}" + ))), + } + } +} + +#[derive(Debug, Default, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub enum CompressionMethodFormat { + Abbreviated, + #[default] + Full, +} + +impl CompressionMethodFormat { + pub fn parse(s: &str) -> Result { + match s { + "" => Ok(Self::default()), + ":abbrev" => Ok(Self::Abbreviated), + ":full" => Ok(Self::Full), + _ => Err(ModifierParseError(format!( + "unrecognized compression method format: {s:?}" + ))), + } + } +} + +#[derive(Debug, Default, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub enum BinaryNumericValueFormat { + Decimal, + #[default] + Hexadecimal, +} + +impl BinaryNumericValueFormat { + pub fn parse(s: &str) -> Result { + match s { + "" => Ok(Self::default()), + ":decimal" => Ok(Self::Decimal), + ":hex" => Ok(Self::Hexadecimal), + _ => Err(ModifierParseError(format!( + "unrecognized binary numeric value format: {s:?}" + ))), + } + } +} + +#[derive(Debug, Default, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub enum FileTypeFormat { + Abbreviated, + #[default] + Full, +} + +impl FileTypeFormat { + pub fn parse(s: &str) -> Result { + match s { + "" => Ok(Self::default()), + ":abbrev" => Ok(Self::Abbreviated), + ":full" => Ok(Self::Full), + _ => Err(ModifierParseError(format!( + "unrecognized file type format: {s:?}" + ))), + } + } +} + +#[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub enum EntryFormatDirective { + Name, + FileType(FileTypeFormat), + Comment(BinaryStringFormat), + LocalHeaderStart(OffsetFormat), + ContentStart(OffsetFormat), + ContentEnd(OffsetFormat), + CentralHeaderStart(OffsetFormat), + CompressedSize(ByteSizeFormat), + UncompressedSize(ByteSizeFormat), + UnixMode(UnixModeFormat), + CompressionMethod(CompressionMethodFormat), + CrcValue(BinaryNumericValueFormat), + Timestamp(TimestampFormat), +} + +impl ParseableDirective for EntryFormatDirective { + fn parse_directive(s: &str) -> Result { + match s { + "name" => Ok(Self::Name), + s if s.starts_with("type") => { + let type_fmt = FileTypeFormat::parse(&s["type".len()..]) + .map_err(|e| DirectiveParseError::Modifier(s.to_string(), e))?; + Ok(Self::FileType(type_fmt)) + } + s if s.starts_with("comment") => { + let str_fmt = BinaryStringFormat::parse(&s["comment".len()..]) + .map_err(|e| DirectiveParseError::Modifier(s.to_string(), e))?; + Ok(Self::Comment(str_fmt)) + } + s if s.starts_with("header-start") => { + let offset_fmt = OffsetFormat::parse(&s["header-start".len()..]) + .map_err(|e| DirectiveParseError::Modifier(s.to_string(), e))?; + Ok(Self::LocalHeaderStart(offset_fmt)) + } + s if s.starts_with("content-start") => { + let offset_fmt = OffsetFormat::parse(&s["content-start".len()..]) + .map_err(|e| DirectiveParseError::Modifier(s.to_string(), e))?; + Ok(Self::ContentStart(offset_fmt)) + } + s if s.starts_with("content-end") => { + let offset_fmt = OffsetFormat::parse(&s["content-end".len()..]) + .map_err(|e| DirectiveParseError::Modifier(s.to_string(), e))?; + Ok(Self::ContentEnd(offset_fmt)) + } + s if s.starts_with("central-header-start") => { + let offset_fmt = OffsetFormat::parse(&s["central-header-start".len()..]) + .map_err(|e| DirectiveParseError::Modifier(s.to_string(), e))?; + Ok(Self::CentralHeaderStart(offset_fmt)) + } + s if s.starts_with("compressed-size") => { + let size_fmt = ByteSizeFormat::parse(&s["compressed-size".len()..]) + .map_err(|e| DirectiveParseError::Modifier(s.to_string(), e))?; + Ok(Self::CompressedSize(size_fmt)) + } + s if s.starts_with("uncompressed-size") => { + let size_fmt = ByteSizeFormat::parse(&s["uncompressed-size".len()..]) + .map_err(|e| DirectiveParseError::Modifier(s.to_string(), e))?; + Ok(Self::UncompressedSize(size_fmt)) + } + s if s.starts_with("unix-mode") => { + let mode_fmt = UnixModeFormat::parse(&s["unix-mode".len()..]) + .map_err(|e| DirectiveParseError::Modifier(s.to_string(), e))?; + Ok(Self::UnixMode(mode_fmt)) + } + s if s.starts_with("compression-method") => { + let method_fmt = CompressionMethodFormat::parse(&s["compression-method".len()..]) + .map_err(|e| DirectiveParseError::Modifier(s.to_string(), e))?; + Ok(Self::CompressionMethod(method_fmt)) + } + s if s.starts_with("crc") => { + let num_fmt = BinaryNumericValueFormat::parse(&s["crc".len()..]) + .map_err(|e| DirectiveParseError::Modifier(s.to_string(), e))?; + Ok(Self::CrcValue(num_fmt)) + } + s if s.starts_with("timestamp") => { + let ts_fmt = TimestampFormat::parse(&s["timestamp".len()..]) + .map_err(|e| DirectiveParseError::Modifier(s.to_string(), e))?; + Ok(Self::Timestamp(ts_fmt)) + } + _ => Err(DirectiveParseError::Unrecognized(s.to_string())), + } + } +} + +#[derive(Debug, Default)] +pub enum FormatSpec { + #[default] + Compact, + Extended, + Custom { + overview: ParseableFormatSpec, + entry: ParseableFormatSpec, + }, +} + +impl FormatSpec { + pub fn parse_format_strings( + archive_format: String, + entry_format: String, + ) -> Result { + let overview = + ParseableFormatSpec::::parse_format(&archive_format) + .map_err(|e| { + Info::exit_arg_invalid(&format!( + "failed to parse archive format string {archive_format:?}: {e}" + )) + })?; + let entry = ParseableFormatSpec::::parse_format(&entry_format) + .map_err(|e| { + Info::exit_arg_invalid(&format!( + "failed to parse entry format string {entry_format:?}: {e}" + )) + })?; + Ok(Self::Custom { overview, entry }) + } +} + +#[derive(Debug)] +pub struct Info { + pub format_spec: FormatSpec, + pub match_expr: Option, + pub input_spec: InputSpec, +} + +impl CommandFormat for Info { + const COMMAND_NAME: &'static str = "info"; + const COMMAND_TABS: &'static str = "\t\t"; + const COMMAND_DESCRIPTION: &'static str = + "Print info about archive contents and individual entries."; + + const USAGE_LINE: &'static str = + "[-h|--help] [--extended|--format ] [--expr MATCH-EXPR --expr] [--stdin] [--] [ZIP-PATH]..."; + + fn generate_help() -> String { + format!( + r#" + -h, --help Print help + +By default, a compact representation of the metadata within the top-level +archive and individual entries is printed to stdout. This format, along with the +"extended" format from --extended, is not stable for processing by external +tools. For stable output, a custom format string should be provided with +--format. + +*Note:* the archive metadata is printed *after* the metadata for each entry, +because zip files store metadata at the end of the file! + +Note that the contents of individual entries are not accessible with this +command, and should instead be extracted with the '{}' subcommand, which can +write entries to stdout or a given file path as well as extracted into an +output directory. + + --extended + Print a verbose description of all top-level archive and individual + entry fields. + + --format + Print a custom description of the top-level archive and individual + entry metadata. + + Both format specs must be provided, but empty strings are + accepted. Explicit trailing newlines must be specified and will not be + inserted automatically. + + Note again that archive metadata is printed after all entries + are formatted. + +# Format specs: +Format specs are literal strings interspersed with directives, which are +surrounded by *paired* '%' characters. This is different from typical %-encoded +format strings which only use a single '%'. A doubled '%%' produces a literal +'%', while '%name%' encodes a directive "name". The directives for archive and +entry format strings are different, but certain directives are parsed with +modifier strings which are shared across both format types. These modifiers are +discussed in the section on . + +## Escape characters: +%% + Prints a literal percent '%'. + +%!% + Prints a single literal newline '\n'. + +%,% + Prints a single literal tab character '\t'. + +## Archive format directives: +This is printed at the bottom of the output, after all entries are formatted. + +%name% + The name of the file provided as input, or '' for stdin. + +%size% + The size of the entire archive. + +%num% + The number of entries in the archive. + +%comment% + The archive comment, if provided (otherwise an empty string). + +%offset% + The offset of the first entry's local header from the start of the + file. This is where the zip file content starts, and arbitrary data may be + present in the space before this point. + +%cde-offset% + The offset of the central directory record from the start of the file. This + is where entry contents end, and after this point is only zip metadata until + the end of the file. + +## Entry format directives: +This is printed for each entry. Note again that no newlines are inserted +automatically, so an explicit trailing newline must be provided to avoid writing +all the output to a single line. + +%name% + The name of the entry in the archive. This is the relative path that the + entry would be extracted to. + +%type% + The type of the entry (file, directory, or symlink). + +%comment% + The entry comment, if provided (otherwise an empty string). + +%header-start% + The offset of the entry's local header, which comes before any + entry contents. + +%content-start% + The offset of the entry's possibly-compressed content, which comes after the + local header. + +%content-end% + The offset of the end of the entry's possibly-compressed content. The next + entry's local header begins immediately after. + +%central-header-start% + The offset of the entry's central directory header, at the end of the + zip file. + +%compressed-size% + The size of the entry's possibly-compressed content as stored in + the archive. + +%uncompressed-size% + The size of the entry's content after decompression, as it would be + after extraction. + +%unix-mode% + The mode bits for the entry, if set. If unset, this is interpreted as + a value of 0. + +%compression-method% + The method used to compress the entry. + +%crc% + The CRC32 value for the entry. + +%timestamp% + The timestamp for the entry. + + Note that zip timestamps only have precision down to 2 seconds. + +## Entry format directives: + +## Modifiers : +byte-size = '' [DEFAULT => decimal] + = ':decimal' (decimal numeric representation) + = ':human' (human-abbreviated size e.g. 1K, 1M) + +offset = '' [DEFAULT => hex] + = ':decimal' (decimal numeric representation) + = ':hex' (hexadecimal numeric representation) + +bin-str = '' [DEFAULT => print] + = ':print' (non-unicode chunks are replaced with + the unicode replacement character '�') + = ':escape' (surround with "" and escape each byte as ascii) + = ':write' (write string to output without checking for unicode) + +unix-mode = '' [DEFAULT => octal] + = ':octal' (octal numeric representation) + = ':pretty' (`ls`-like permissions string) + +timestamp = '' [DEFAULT => date-time] + = ':date' (ISO 8601 string representation of date) + = ':time' (HH:MM:SS string representation of time) + = ':date-time' + (ISO 8601 date then HH:MM time joined by a space) + +compression-method + = '' [DEFAULT => full] + = ':abbrev' (abbreviated name of method) + = ':full' (full name of method) + +bin-num = '' [DEFAULT => hex] + = ':decimal' (decimal numeric representation) + = ':hex' (hexadecimal numeric representation) + +file-type = '' [DEFAULT => full] + = ':abbrev' (abbreviated name of file type) + = ':full' (full name of file type) + + +{} + +{} +{} +"#, + Extract::COMMAND_NAME, + Extract::generate_match_expr_help_text(), + Extract::generate_pattern_selector_help_text(true), + Extract::INPUT_HELP_TEXT, + ) + } + + fn parse_argv(mut argv: VecDeque) -> Result { + let mut format_spec: Option = None; + let mut match_expr: Option = None; + let mut stdin_flag = false; + let mut positional_zips: Vec = Vec::new(); + + while let Some(arg) = argv.pop_front() { + match arg.as_encoded_bytes() { + b"-h" | b"--help" => { + let help_text = Self::generate_full_help_text(); + return Err(ArgParseError::StdoutMessage(help_text)); + } + + /* Try parsing format specs. */ + b"--extended" => { + if let Some(prev_spec) = format_spec.take() { + return Err(Self::exit_arg_invalid(&format!( + "format spec already provided before --extended: {prev_spec:?}" + ))); + } + format_spec = Some(FormatSpec::Extended); + } + b"--format" => { + if let Some(prev_spec) = format_spec.take() { + return Err(Self::exit_arg_invalid(&format!( + "format spec already provided before --format: {prev_spec:?}" + ))); + } + let archive_format = argv + .pop_front() + .ok_or_else(|| { + Self::exit_arg_invalid("no arg provided to --format") + })? + .into_string() + .map_err(|fmt_arg| { + Self::exit_arg_invalid(&format!( + "invalid unicode provided to --format: {fmt_arg:?}" + )) + })?; + let entry_format = argv + .pop_front() + .ok_or_else(|| { + Self::exit_arg_invalid("no arg provided to --format") + })? + .into_string() + .map_err(|fmt_arg| { + Self::exit_arg_invalid(&format!( + "invalid unicode provided to --format: {fmt_arg:?}" + )) + })?; + format_spec = Some(FormatSpec::parse_format_strings( + archive_format, + entry_format, + )?); + } + + /* Try parsing match specs! */ + b"--expr" => { + let new_expr = MatchExpression::parse_argv::(&mut argv)?; + if let Some(prev_expr) = match_expr.take() { + return Err(Self::exit_arg_invalid(&format!( + "multiple match expressions provided: {prev_expr:?} and {new_expr:?}" + ))); + } + match_expr = Some(new_expr); + } + + /* Transition to input args */ + b"--stdin" => { + stdin_flag = true; + } + b"--" => break, + arg_bytes => { + if arg_bytes.starts_with(b"-") { + return Err(Self::exit_arg_invalid(&format!( + "unrecognized flag {arg:?}" + ))); + } else { + argv.push_front(arg); + break; + } + } + } + } + + positional_zips.extend(argv.into_iter().map(|arg| arg.into())); + if !stdin_flag && positional_zips.is_empty() { + return Err(Self::exit_arg_invalid( + "no zip input files were provided, and --stdin was not provided", + )); + }; + let input_spec = InputSpec { + stdin_stream: stdin_flag, + zip_paths: positional_zips, + }; + + let format_spec = format_spec.unwrap_or_default(); + + Ok(Self { + format_spec, + match_expr, + input_spec, + }) + } +} + +impl crate::driver::ExecuteCommand for Info { + fn execute(self, err: impl std::io::Write) -> Result<(), crate::CommandError> { + crate::info::execute_info(err, self) + } +} diff --git a/cli/src/compress.rs b/cli/src/compress.rs new file mode 100644 index 000000000..e35058273 --- /dev/null +++ b/cli/src/compress.rs @@ -0,0 +1,502 @@ +use std::{ + fs, + io::{self, Cursor, IsTerminal, Seek, Write}, + mem, + path::Path, +}; + +use zip::{ + unstable::path_to_string, + write::{SimpleFileOptions, ZipWriter}, + CompressionMethod, ZIP64_BYTES_THR, +}; + +use crate::{args::compress::*, CommandError, OutputHandle, WrapCommandErr}; + +fn enter_recursive_dir_entries( + err: &mut impl Write, + base_rename: Option, + root: &Path, + writer: &mut ZipWriter, + options: SimpleFileOptions, +) -> Result<(), CommandError> { + let base_dirname: String = base_rename + .unwrap_or_else(|| path_to_string(root).into()) + .trim_end_matches('/') + .to_string(); + writeln!( + err, + "writing top-level directory entry for {base_dirname:?}" + ) + .unwrap(); + writer + .add_directory(&base_dirname, options) + .wrap_err_with(|| format!("error adding top-level directory entry {base_dirname}"))?; + + let mut readdir_stack: Vec<(fs::ReadDir, String)> = vec![( + fs::read_dir(root) + .wrap_err_with(|| format!("error reading directory contents for {}", root.display()))?, + base_dirname, + )]; + while let Some((mut readdir, top_component)) = readdir_stack.pop() { + if let Some(dir_entry) = readdir + .next() + .transpose() + .wrap_err("reading next dir entry")? + { + let mut components: Vec<&str> = readdir_stack.iter().map(|(_, s)| s.as_ref()).collect(); + components.push(&top_component); + + let entry_basename: String = dir_entry.file_name().into_string().map_err(|name| { + CommandError::InvalidArg(format!("failed to decode basename {name:?}")) + })?; + components.push(&entry_basename); + let full_path: String = components.join("/"); + readdir_stack.push((readdir, top_component)); + + let file_type = dir_entry.file_type().wrap_err_with(|| { + format!("failed to read file type for dir entry {dir_entry:?}") + })?; + if file_type.is_symlink() { + let target: String = path_to_string( + fs::read_link(dir_entry.path()) + .wrap_err_with(|| format!("failed to read symlink from {dir_entry:?}"))?, + ) + .into(); + if target.len() > ZIP64_BYTES_THR.try_into().unwrap() { + return Err(CommandError::InvalidArg(format!( + "symlink target for {full_path} is over {ZIP64_BYTES_THR} bytes (was: {})", + target.len() + ))); + } + writeln!( + err, + "writing recursive symlink entry with name {full_path:?} and target {target:?}" + ) + .unwrap(); + writer + .add_symlink(&full_path, &target, options) + .wrap_err_with(|| format!("error adding symlink from {full_path}->{target}"))?; + } else if file_type.is_file() { + writeln!(err, "writing recursive file entry with name {full_path:?}").unwrap(); + let mut f = fs::File::open(dir_entry.path()).wrap_err_with(|| { + format!("error opening file for {full_path} from dir entry {dir_entry:?}") + })?; + /* Get the length of the file before reading it and set large_file if needed. */ + let input_len: u64 = f + .metadata() + .wrap_err_with(|| format!("error reading file metadata for {f:?}"))? + .len(); + let maybe_large_file_options = if input_len > ZIP64_BYTES_THR { + writeln!( + err, + "temporarily ensuring .large_file(true) for current entry" + ) + .unwrap(); + options.large_file(true) + } else { + options + }; + writer + .start_file(&full_path, maybe_large_file_options) + .wrap_err_with(|| format!("error creating file entry for {full_path}"))?; + io::copy(&mut f, writer).wrap_err_with(|| { + format!("error copying content for {full_path} from file {f:?}") + })?; + } else { + assert!(file_type.is_dir()); + writeln!( + err, + "writing recursive directory entry with name {full_path:?}" + ) + .unwrap(); + writer + .add_directory(&full_path, options) + .wrap_err_with(|| format!("failed to create directory entry {full_path}"))?; + writeln!( + err, + "adding subdirectories depth-first for recursive directory entry {entry_basename:?}" + ).unwrap(); + let new_readdir = fs::read_dir(dir_entry.path()).wrap_err_with(|| { + format!("failed to read recursive directory contents from {dir_entry:?}") + })?; + readdir_stack.push((new_readdir, entry_basename)); + } + } + } + Ok(()) +} + +pub fn execute_compress(mut err: impl Write, args: Compress) -> Result<(), CommandError> { + let Compress { + output, + archive_comment, + args, + positional_paths, + } = args; + + let (out, do_append) = match output { + OutputType::File { path, append } => { + if append { + writeln!( + err, + "reading compressed zip from output file path {path:?} for append" + ) + .unwrap(); + match fs::OpenOptions::new() + .read(true) + .write(true) + .create(false) + .open(&path) + { + Ok(f) => { + writeln!(err, "output zip file existed, appending").unwrap(); + (OutputHandle::File(f), true) + } + Err(e) if e.kind() == io::ErrorKind::NotFound => { + writeln!( + err, + "output zip file did not exist, creating new file instead of appending" + ) + .unwrap(); + let out = + OutputHandle::File(fs::File::create(&path).wrap_err_with(|| { + format!("failed to create new zip output file at {path:?}") + })?); + (out, false) + } + Err(e) => { + return Err(e).wrap_err_with(|| { + format!( + "unexpected error reading zip output file for append at {path:?}" + ) + }); + } + } + } else { + writeln!(err, "writing compressed zip to output file path {path:?}").unwrap(); + let out = OutputHandle::File(fs::File::create(&path).wrap_err_with(|| { + format!("failed to create output file at {}", path.display()) + })?); + (out, false) + } + } + OutputType::Stdout { allow_tty } => { + writeln!( + err, + "writing to stdout and buffering compressed zip in memory" + ) + .unwrap(); + if io::stdout().is_terminal() && !allow_tty { + /* TODO: maybe figure out some way to ensure --stdout is still the correct flag */ + return Err(CommandError::InvalidArg( + "stdout is a tty, but --stdout was not set".to_string(), + )); + } + let out = OutputHandle::InMem(Cursor::new(Vec::new())); + (out, false) + } + }; + let mut writer = if do_append { + ZipWriter::new_append(out) + .wrap_err("failed to initialize zip writer from existing zip file for append")? + } else { + ZipWriter::new(out) + }; + + if let Some(comment) = archive_comment { + writeln!(err, "comment was provided: {comment:?}").unwrap(); + let comment = comment.into_encoded_bytes(); + writer.set_raw_comment(comment.into()); + } + + let mut options = SimpleFileOptions::default() + .compression_method(CompressionMethod::Deflated) + .large_file(false); + writeln!(err, "default zip entry options: {options:?}").unwrap(); + let mut last_name: Option = None; + let mut symlink_flag: bool = false; + + for arg in args.into_iter() { + match arg { + CompressionArg::CompressionMethod(method) => { + let method = match method { + CompressionMethodArg::Stored => CompressionMethod::Stored, + CompressionMethodArg::Deflate => CompressionMethod::Deflated, + #[cfg(feature = "deflate64")] + CompressionMethodArg::Deflate64 => CompressionMethod::Deflate64, + #[cfg(feature = "bzip2")] + CompressionMethodArg::Bzip2 => CompressionMethod::Bzip2, + #[cfg(feature = "zstd")] + CompressionMethodArg::Zstd => CompressionMethod::Zstd, + }; + writeln!(err, "setting compression method {method:?}").unwrap(); + options = options.compression_method(method); + } + CompressionArg::Level(CompressionLevel(level)) => { + writeln!(err, "setting compression level {level:?}").unwrap(); + options = options.compression_level(Some(level)); + } + CompressionArg::UnixPermissions(UnixPermissions(mode)) => { + writeln!(err, "setting file mode {mode:#o}").unwrap(); + options = options.unix_permissions(mode); + } + CompressionArg::LargeFile(large_file) => { + writeln!(err, "setting large file flag to {large_file:?}").unwrap(); + options = options.large_file(large_file); + } + CompressionArg::Name(name) => { + writeln!(err, "setting name of next entry to {name:?}").unwrap(); + if let Some(last_name) = last_name { + return Err(CommandError::InvalidArg(format!( + "got two names before an entry: {last_name} and {name}" + ))); + } + last_name = Some(name); + } + CompressionArg::Dir => { + writeln!(err, "writing dir entry").unwrap(); + if symlink_flag { + return Err(CommandError::InvalidArg( + "symlink flag provided before dir entry".to_string(), + )); + } + let dirname = last_name.take().ok_or_else(|| { + CommandError::InvalidArg("no name provided before dir entry".to_string()) + })?; + writer + .add_directory(&dirname, options) + .wrap_err_with(|| format!("failed to create dir entry {dirname}"))?; + } + CompressionArg::Symlink => { + writeln!(err, "setting symlink flag for next entry").unwrap(); + if symlink_flag { + /* TODO: make this a warning? */ + return Err(CommandError::InvalidArg( + "symlink flag provided twice before entry".to_string(), + )); + } + symlink_flag = true; + } + CompressionArg::Immediate(data) => { + let name = last_name.take().ok_or_else(|| { + CommandError::InvalidArg(format!( + "no name provided for immediate data {data:?}" + )) + })?; + /* It's highly unlikely any OS allows process args of this length, so even though + * we're using rust's env::args_os() and it would be very impressive for an attacker + * to get CLI args to overflow, it seems likely to be inefficient in any case, and + * very unlikely to be useful, so exit with a clear error. */ + if data.len() > ZIP64_BYTES_THR.try_into().unwrap() { + return Err(CommandError::InvalidArg(format!( + "length of immediate data argument is {}; use a file for inputs over {} bytes", + data.len(), + ZIP64_BYTES_THR + ))); + }; + if symlink_flag { + /* This is a symlink entry. */ + let target = data.into_string().map_err(|target| { + CommandError::InvalidArg(format!( + "failed to decode immediate symlink target {target:?}" + )) + })?; + writeln!( + err, + "writing immediate symlink entry with name {name:?} and target {target:?}" + ) + .unwrap(); + /* TODO: .add_symlink() should support OsString targets! */ + writer + .add_symlink(&name, &target, options) + .wrap_err_with(|| { + format!("failed to created symlink entry {name}->{target}") + })?; + symlink_flag = false; + } else { + /* This is a file entry. */ + writeln!( + err, + "writing immediate file entry with name {name:?} and data {data:?}" + ) + .unwrap(); + let data = data.into_encoded_bytes(); + writer + .start_file(&name, options) + .wrap_err_with(|| format!("failed to create file entry {name}"))?; + writer.write_all(data.as_ref()).wrap_err_with(|| { + format!( + "failed writing immediate data of length {} to file entry {name}", + data.len() + ) + })?; + } + } + CompressionArg::FilePath(path) => { + let name = last_name + .take() + .unwrap_or_else(|| path_to_string(&path).into()); + if symlink_flag { + /* This is a symlink entry. */ + let target: String = + path_to_string(fs::read_link(&path).wrap_err_with(|| { + format!("failed to read symlink from path {}", path.display()) + })?) + .into(); + /* Similarly to immediate data arguments, we're simply not going to support + * symlinks over this length, which should be impossible anyway. */ + if target.len() > ZIP64_BYTES_THR.try_into().unwrap() { + return Err(CommandError::InvalidArg(format!( + "symlink target for {name} is over {ZIP64_BYTES_THR} bytes (was: {})", + target.len() + ))); + } + writeln!(err, "writing symlink entry from path {path:?} with name {name:?} and target {target:?}").unwrap(); + writer + .add_symlink(&name, &target, options) + .wrap_err_with(|| { + format!("failed to create symlink entry for {name}->{target}") + })?; + symlink_flag = false; + } else { + /* This is a file entry. */ + writeln!( + err, + "writing file entry from path {path:?} with name {name:?}" + ) + .unwrap(); + let mut f = fs::File::open(&path).wrap_err_with(|| { + format!("error opening file for {name} at {}", path.display()) + })?; + /* Get the length of the file before reading it and set large_file if needed. */ + let input_len: u64 = f + .metadata() + .wrap_err_with(|| format!("error reading file metadata for {f:?}"))? + .len(); + writeln!(err, "entry is {input_len} bytes long").unwrap(); + let maybe_large_file_options = if input_len > ZIP64_BYTES_THR { + writeln!( + err, + "temporarily ensuring .large_file(true) for current entry" + ) + .unwrap(); + options.large_file(true) + } else { + options + }; + writer + .start_file(&name, maybe_large_file_options) + .wrap_err_with(|| format!("error creating file entry for {name}"))?; + io::copy(&mut f, &mut writer).wrap_err_with(|| { + format!("error copying content for {name} from file {f:?}") + })?; + } + } + CompressionArg::RecursiveDirPath(r) => { + if symlink_flag { + return Err(CommandError::InvalidArg( + "symlink flag provided before recursive dir entry".to_string(), + )); + } + writeln!( + err, + "writing recursive dir entries for path {r:?} with name {last_name:?}" + ) + .unwrap(); + enter_recursive_dir_entries(&mut err, last_name.take(), &r, &mut writer, options)?; + } + } + } + if symlink_flag { + return Err(CommandError::InvalidArg( + "symlink flag remaining after all entry flags processed".to_string(), + )); + } + if let Some(last_name) = last_name { + return Err(CommandError::InvalidArg(format!( + "name {last_name} remaining after all entry flags processed" + ))); + } + + for pos_arg in positional_paths.into_iter() { + let file_type = fs::symlink_metadata(&pos_arg) + .wrap_err_with(|| format!("failed to read metadata from path {}", pos_arg.display()))? + .file_type(); + if file_type.is_symlink() { + let target = fs::read_link(&pos_arg).wrap_err_with(|| { + format!("failed to read symlink content from {}", pos_arg.display()) + })?; + writeln!( + err, + "writing positional symlink entry with path {pos_arg:?} and target {target:?}" + ) + .unwrap(); + writer + .add_symlink_from_path(&pos_arg, &target, options) + .wrap_err_with(|| { + format!( + "failed to create symlink entry for {}->{}", + pos_arg.display(), + target.display() + ) + })?; + } else if file_type.is_file() { + writeln!(err, "writing positional file entry with path {pos_arg:?}").unwrap(); + let mut f = fs::File::open(&pos_arg) + .wrap_err_with(|| format!("failed to open file at {}", pos_arg.display()))?; + /* Get the length of the file before reading it and set large_file if needed. */ + let input_len: u64 = f + .metadata() + .wrap_err_with(|| format!("error reading file metadata for {f:?}"))? + .len(); + let maybe_large_file_options = if input_len > ZIP64_BYTES_THR { + writeln!( + err, + "temporarily ensuring .large_file(true) for current entry" + ) + .unwrap(); + options.large_file(true) + } else { + options + }; + writer + .start_file_from_path(&pos_arg, maybe_large_file_options) + .wrap_err_with(|| format!("failed to create file entry {}", pos_arg.display()))?; + io::copy(&mut f, &mut writer) + .wrap_err_with(|| format!("failed to copy file contents from {f:?}"))?; + } else { + assert!(file_type.is_dir()); + writeln!( + err, + "writing positional recursive dir entry for {pos_arg:?}" + ) + .unwrap(); + enter_recursive_dir_entries(&mut err, None, &pos_arg, &mut writer, options)?; + } + } + + let handle = writer + .finish() + .wrap_err("failed to write zip to output handle")?; + match handle { + OutputHandle::File(f) => { + let archive_len: u64 = f + .metadata() + .wrap_err_with(|| format!("failed reading metadata from file {f:?}"))? + .len(); + writeln!(err, "file archive {f:?} was {archive_len} bytes").unwrap(); + mem::drop(f); /* Superfluous explicit drop. */ + } + OutputHandle::InMem(mut cursor) => { + let archive_len: u64 = cursor.position(); + writeln!(err, "in-memory archive was {archive_len} bytes").unwrap(); + cursor.rewind().wrap_err("failed to rewind cursor")?; + let mut stdout = io::stdout().lock(); + io::copy(&mut cursor, &mut stdout) + .wrap_err("failed to copy {archive_len} byte archive to stdout")?; + } + } + + Ok(()) +} diff --git a/cli/src/extract.rs b/cli/src/extract.rs new file mode 100644 index 000000000..f5aaa28c7 --- /dev/null +++ b/cli/src/extract.rs @@ -0,0 +1,195 @@ +use std::{ + borrow::Cow, + cell::RefCell, + fs, + io::{self, Read, Write}, + rc::Rc, +}; + +use zip::read::{ZipArchive, ZipFile}; + +use crate::{args::extract::*, CommandError, WrapCommandErr}; + +pub mod entries; +pub mod matcher; +pub mod named_outputs; +pub mod receiver; +pub mod transform; +use entries::{IterateEntries, StreamInput, ZipFileInput}; +use receiver::{CompiledEntrySpec, EntryData, EntryKind, EntryReceiver, ExtractEntry}; + +fn maybe_process_symlink<'a, 't>( + entry: &mut ZipFile<'a>, + err: &Rc>, + symlink_target: &'t mut Vec, +) -> Result, CommandError> { + let (kind, size) = { + /* FIXME: the ZipFile<'a> struct contains a *mutable* reference to the parent archive, + * and this actually imposes a mutable reference upon any references to the + * immutable ZipFileData contents. This means we cannot have any immutable + * references to the ZipFileData contents at the same time as a mutable + * reference. What this means here is that we have to create a temporary EntryData + * struct and then immediately throw it away in order to be able to read the entry + * contents with io::Read. ZipEntry<'a, R> from + * https://github.com/zip-rs/zip2/pull/233 avoids this issue!!! */ + let data = EntryData::from_entry(&entry); + (data.kind, data.uncompressed_size) + }; + if !matches!(kind, EntryKind::Symlink) { + return Ok(None); + } + + /* We can't read the entry name from EntryData because we can't have any immutable + * references to ZipFileData like the name at the same time we use the entry as + * a reader! That means our log message here is very unclear! */ + writeln!(&mut err.borrow_mut(), "reading symlink target").unwrap(); + /* Re-use the vector allocation, but make sure to avoid re-using the symlink data from + * a previous iteration. */ + symlink_target.clear(); + entry + .read_to_end(symlink_target) + .wrap_err("failed to read symlink target from zip archive entry")?; + debug_assert_eq!(symlink_target.len(), size.try_into().unwrap()); + Ok(Some(symlink_target)) +} + +fn process_entry<'a, 'w, 'c, 'it>( + mut entry: ZipFile<'a>, + err: &Rc>, + compiled_specs: impl Iterator>, + copy_buf: &mut [u8], + symlink_target: &mut Vec, + deduped_concat_writers: &mut Vec<&'c Rc>>, + matching_handles: &mut Vec>, +) -> Result<(), CommandError> +where + 'w: 'it, + 'it: 'c, +{ + deduped_concat_writers.clear(); + matching_handles.clear(); + + let symlink_target = maybe_process_symlink(&mut entry, err, symlink_target)?; + /* We dropped any mutable handles to the entry, so now we can access its metadata again. */ + let data = EntryData::from_entry(&entry); + + let mut deduped_matching_extracts: Vec<(&'c Rc, Vec>)> = + Vec::new(); + for matching_spec in compiled_specs.filter_map(|spec| spec.try_match_and_transform(&data)) { + if matching_spec.is_nested_duplicate(deduped_concat_writers, &mut deduped_matching_extracts) + { + writeln!(&mut err.borrow_mut(), "skipping repeated output").unwrap(); + } + } + + matching_handles.extend( + deduped_matching_extracts + .into_iter() + .flat_map(|(recv, names)| names.into_iter().map(move |n| (recv, n))) + .map(|(recv, name)| recv.generate_entry_handle(&data, symlink_target.as_deref(), name)) + .collect::, _>>()? + .into_iter() + .flatten(), + ); + + let mut read_len: usize; + loop { + read_len = entry.read(copy_buf).wrap_err("read of entry failed")?; + if read_len == 0 { + break; + } + let cur_data: &[u8] = ©_buf[..read_len]; + for concat_writer in deduped_concat_writers.iter() { + concat_writer + .borrow_mut() + .write_all(cur_data) + .wrap_err("failed to write data to concat output")?; + } + for extract_writer in matching_handles.iter_mut() { + extract_writer + .write_all(cur_data) + .wrap_err("failed to write data to extract output")?; + } + } + + Ok(()) +} + +pub fn execute_extract(err: impl Write, extract: Extract) -> Result<(), CommandError> { + let Extract { + output_specs, + entry_specs, + input_spec: InputSpec { + stdin_stream, + zip_paths, + }, + } = extract; + let err = Rc::new(RefCell::new(err)); + + writeln!(&mut err.borrow_mut(), "entry specs: {entry_specs:?}").unwrap(); + let compiled_specs = + named_outputs::process_entry_and_output_specs(err.clone(), entry_specs, output_specs)?; + writeln!(&mut err.borrow_mut(), "compiled specs: {compiled_specs:?}").unwrap(); + + let mut copy_buf: Vec = vec![0u8; 1024 * 16]; + let mut symlink_target: Vec = Vec::new(); + + let mut deduped_concat_writers: Vec<&Rc>> = Vec::new(); + let mut matching_handles: Vec> = Vec::new(); + + if stdin_stream { + writeln!(&mut err.borrow_mut(), "extracting from stdin").unwrap(); + let mut stdin = StreamInput::new(io::stdin().lock()); + + while let Some(entry) = stdin.next_entry()? { + process_entry( + entry, + &err, + compiled_specs.iter(), + &mut copy_buf, + &mut symlink_target, + &mut deduped_concat_writers, + &mut matching_handles, + )?; + } + } + + for p in zip_paths.into_iter() { + writeln!( + &mut err.borrow_mut(), + "extracting from zip input file {p:?}", + ) + .unwrap(); + let zip = fs::File::open(&p) + .wrap_err_with(|| format!("failed to open zip input file path {p:?}")) + .and_then(|f| { + ZipArchive::new(f) + .wrap_err_with(|| format!("failed to create zip archive for file {p:?}")) + })?; + let mut zip_entries = ZipFileInput::new(Box::new(zip)); + + while let Some(entry) = zip_entries.next_entry()? { + process_entry( + entry, + &err, + compiled_specs.iter(), + &mut copy_buf, + &mut symlink_target, + &mut deduped_concat_writers, + &mut matching_handles, + )?; + } + } + + /* Finalize all extract entries. */ + for spec in compiled_specs.into_iter() { + match spec { + CompiledEntrySpec::Concat(_) => (), + CompiledEntrySpec::Extract(ExtractEntry { recv, .. }) => { + recv.finalize_entries()?; + } + } + } + + Ok(()) +} diff --git a/cli/src/extract/entries.rs b/cli/src/extract/entries.rs new file mode 100644 index 000000000..bb46fb79b --- /dev/null +++ b/cli/src/extract/entries.rs @@ -0,0 +1,132 @@ +use std::{fs, io, ops}; + +use zip::{ + read::{read_zipfile_from_stream, ZipFile}, + ZipArchive, +}; + +use crate::{CommandError, WrapCommandErr}; + +pub trait IterateEntries { + fn next_entry(&mut self) -> Result, CommandError>; +} + +pub struct ReadChecker { + inner: R, + bytes_read: u64, +} + +impl ReadChecker { + pub const fn current_bytes_read(&self) -> u64 { + self.bytes_read + } +} + +impl ReadChecker +where + R: io::Read, +{ + pub fn exhaust(mut self) -> io::Result<(R, u64)> { + io::copy(&mut self, &mut io::sink())?; + let Self { inner, bytes_read } = self; + Ok((inner, bytes_read)) + } +} + +impl io::Read for ReadChecker +where + R: io::Read, +{ + fn read(&mut self, buf: &mut [u8]) -> io::Result { + let n = self.inner.read(buf)?; + let num_read: u64 = n.try_into().unwrap(); + self.bytes_read += num_read; + Ok(n) + } +} + +pub struct StreamInput { + inner: ReadChecker, + entries_read: usize, +} + +impl StreamInput { + pub fn new(inner: R) -> Self { + Self { + inner: ReadChecker { + inner, + bytes_read: 0, + }, + entries_read: 0, + } + } + + pub fn into_inner(self) -> (ReadChecker, usize) { + let Self { + inner, + entries_read, + } = self; + (inner, entries_read) + } +} + +impl IterateEntries for StreamInput +where + R: io::Read, +{ + fn next_entry(&mut self) -> Result, CommandError> { + if let Some(entry) = read_zipfile_from_stream(&mut self.inner) + .wrap_err("failed to read zip entries from stdin")? + { + self.entries_read += 1; + Ok(Some(entry)) + } else { + Ok(None) + } + } +} + +#[derive(Debug)] +pub struct ZipFileInput { + inner: A, + file_counter: usize, +} + +impl ZipFileInput { + pub fn new(inner: A) -> Self { + Self { + inner, + file_counter: 0, + } + } +} + +impl ZipFileInput +where + A: ops::Deref>, +{ + pub fn remaining(&self) -> usize { + self.inner.len() - self.file_counter + } + + pub fn none_left(&self) -> bool { + self.remaining() == 0 + } +} + +impl IterateEntries for ZipFileInput +where + A: ops::DerefMut>, +{ + fn next_entry(&mut self) -> Result, CommandError> { + if self.none_left() { + return Ok(None); + } + let prev_counter = self.file_counter; + self.file_counter += 1; + self.inner + .by_index(prev_counter) + .map(Some) + .wrap_err_with(|| format!("failed to read entry #{prev_counter} from zip",)) + } +} diff --git a/cli/src/extract/matcher.rs b/cli/src/extract/matcher.rs new file mode 100644 index 000000000..9e3eb463f --- /dev/null +++ b/cli/src/extract/matcher.rs @@ -0,0 +1,528 @@ +use std::{borrow::Cow, fmt}; + +#[cfg(feature = "glob")] +use glob; +#[cfg(feature = "rx")] +use regex; + +use zip::CompressionMethod; + +use super::receiver::{EntryData, EntryKind}; +use super::transform::ComponentSplit; +use crate::{args::extract::*, CommandError}; + +#[inline(always)] +fn process_component_selector<'s>(sel: ComponentSelector, name: &'s str) -> Option<&'s str> { + ComponentSplit::split_by_component_selector(sel, name).map(|split| match split { + ComponentSplit::LeftAnchored { selected_left, .. } => selected_left, + ComponentSplit::RightAnchored { selected_right, .. } => selected_right, + ComponentSplit::Whole(s) => s, + }) +} + +#[derive(Debug, Default, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub enum SearchAnchoring { + #[default] + Unanchored, + LeftAnchored, + RightAnchored, + DoublyAnchored, +} + +impl SearchAnchoring { + pub const fn from_prefix_suffix_flags(prefix_anchored: bool, suffix_anchored: bool) -> Self { + match (prefix_anchored, suffix_anchored) { + (true, true) => Self::DoublyAnchored, + (true, false) => Self::LeftAnchored, + (false, true) => Self::RightAnchored, + (false, false) => Self::Unanchored, + } + } + + pub fn wrap_regex_pattern<'s>(self, pattern: &'s str) -> Cow<'s, str> { + match self { + Self::Unanchored => Cow::Borrowed(pattern), + Self::LeftAnchored => Cow::Owned(format!("^(?:{pattern})")), + Self::RightAnchored => Cow::Owned(format!("(?:{pattern})$")), + Self::DoublyAnchored => Cow::Owned(format!("^(?:{pattern})$")), + } + } +} + +#[derive(Debug, Default, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub enum CaseSensitivity { + #[default] + Sensitive, + Insensitive, +} + +impl CaseSensitivity { + pub const fn from_case_insensitive_flag(case_insensitive: bool) -> Self { + match case_insensitive { + true => Self::Insensitive, + false => Self::Sensitive, + } + } + + pub fn string_equal(self, a: &str, b: &str) -> bool { + match self { + Self::Insensitive => a.eq_ignore_ascii_case(b), + Self::Sensitive => a == b, + } + } +} + +#[derive(Debug, Default, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub struct MatchModifiers { + pub anchoring: SearchAnchoring, + pub case: CaseSensitivity, +} + +impl MatchModifiers { + pub fn from_flags(flags: PatternModifierFlags) -> Result { + let PatternModifierFlags { + case_insensitive, + multiple_matches, + prefix_anchored, + suffix_anchored, + } = flags; + if multiple_matches { + return Err(CommandError::InvalidArg(format!( + "multimatch modifier :g is unused in match expressions: {flags:?}" + ))); + } + let case = CaseSensitivity::from_case_insensitive_flag(case_insensitive); + let anchoring = SearchAnchoring::from_prefix_suffix_flags(prefix_anchored, suffix_anchored); + Ok(Self { anchoring, case }) + } +} + +trait NameMatcher: fmt::Debug { + fn create(pattern: String, opts: MatchModifiers) -> Result + where + Self: Sized; + fn matches(&self, input: &str) -> bool; +} + +#[derive(Debug)] +struct LiteralMatcher { + lit: String, + case: CaseSensitivity, + anchoring: SearchAnchoring, +} + +impl NameMatcher for LiteralMatcher { + fn create(pattern: String, opts: MatchModifiers) -> Result + where + Self: Sized, + { + let MatchModifiers { case, anchoring } = opts; + Ok(Self { + lit: match case { + CaseSensitivity::Sensitive => pattern, + CaseSensitivity::Insensitive => pattern.to_ascii_uppercase(), + }, + case, + anchoring, + }) + } + + fn matches(&self, input: &str) -> bool { + if input.len() < self.lit.len() { + return false; + } + match self.anchoring { + SearchAnchoring::Unanchored => match self.case { + CaseSensitivity::Insensitive => input.to_ascii_uppercase().contains(&self.lit), + CaseSensitivity::Sensitive => input.contains(&self.lit), + }, + SearchAnchoring::DoublyAnchored => self.case.string_equal(&self.lit, input), + SearchAnchoring::LeftAnchored => { + let prefix = &input[..self.lit.len()]; + self.case.string_equal(&self.lit, prefix) + } + SearchAnchoring::RightAnchored => { + let suffix = &input[(input.len() - self.lit.len())..]; + self.case.string_equal(&self.lit, suffix) + } + } + } +} + +#[derive(Debug)] +#[cfg(feature = "glob")] +struct GlobMatcher { + pat: glob::Pattern, + glob_opts: glob::MatchOptions, +} + +#[cfg(feature = "glob")] +impl NameMatcher for GlobMatcher { + fn create(pattern: String, opts: MatchModifiers) -> Result + where + Self: Sized, + { + let MatchModifiers { anchoring, case } = opts; + if !matches!(anchoring, SearchAnchoring::Unanchored) { + return Err(CommandError::InvalidArg(format!( + "anchored search with :p or :s is incompatible with glob patterns: {opts:?}" + ))); + } + let glob_opts = glob::MatchOptions { + case_sensitive: match case { + CaseSensitivity::Sensitive => true, + CaseSensitivity::Insensitive => false, + }, + ..Default::default() + }; + let pat = glob::Pattern::new(&pattern).map_err(|e| { + CommandError::InvalidArg(format!( + "failed to construct glob matcher from pattern {pattern:?}: {e}" + )) + })?; + Ok(Self { pat, glob_opts }) + } + + fn matches(&self, input: &str) -> bool { + self.pat.matches_with(input, self.glob_opts) + } +} + +#[derive(Debug)] +#[cfg(feature = "rx")] +struct RegexMatcher { + pat: regex::Regex, +} + +#[cfg(feature = "rx")] +impl NameMatcher for RegexMatcher { + fn create(pattern: String, opts: MatchModifiers) -> Result + where + Self: Sized, + { + let MatchModifiers { case, anchoring } = opts; + + let pattern = anchoring.wrap_regex_pattern(&pattern); + + let pat = regex::RegexBuilder::new(&pattern) + .case_insensitive(match case { + CaseSensitivity::Sensitive => false, + CaseSensitivity::Insensitive => true, + }) + .build() + .map_err(|e| { + CommandError::InvalidArg(format!( + "failed to construct regex matcher from pattern {pattern:?}: {e}" + )) + })?; + Ok(Self { pat }) + } + + fn matches(&self, input: &str) -> bool { + self.pat.is_match(input) + } +} + +pub trait EntryMatcher: fmt::Debug { + type Arg + where + Self: Sized; + fn from_arg(arg: Self::Arg) -> Result + where + Self: Sized; + fn matches(&self, entry: &EntryData) -> bool; +} + +#[derive(Debug, Copy, Clone)] +enum TrivialMatcher { + True, + False, +} + +impl EntryMatcher for TrivialMatcher { + type Arg = TrivialPredicate where Self: Sized; + + fn from_arg(arg: Self::Arg) -> Result + where + Self: Sized, + { + Ok(match arg { + TrivialPredicate::True => Self::True, + TrivialPredicate::False => Self::False, + }) + } + + fn matches(&self, _entry: &EntryData) -> bool { + match self { + Self::True => true, + Self::False => false, + } + } +} + +#[derive(Debug, Copy, Clone)] +enum EntryTypeMatcher { + File, + Dir, + Symlink, +} + +impl EntryMatcher for EntryTypeMatcher { + type Arg = EntryType where Self: Sized; + + fn from_arg(arg: Self::Arg) -> Result + where + Self: Sized, + { + Ok(match arg { + EntryType::File => Self::File, + EntryType::Dir => Self::Dir, + EntryType::Symlink => Self::Symlink, + }) + } + + fn matches(&self, entry: &EntryData) -> bool { + match (self, entry.kind) { + (Self::File, EntryKind::File) => true, + (Self::Dir, EntryKind::Dir) => true, + (Self::Symlink, EntryKind::Symlink) => true, + _ => false, + } + } +} + +#[derive(Debug, Copy, Clone)] +enum NonSpecificMethods { + Any, + Known, +} + +impl EntryMatcher for NonSpecificMethods { + type Arg = NonSpecificCompressionMethodArg where Self: Sized; + + fn from_arg(arg: Self::Arg) -> Result + where + Self: Sized, + { + Ok(match arg { + NonSpecificCompressionMethodArg::Any => Self::Any, + NonSpecificCompressionMethodArg::Known => Self::Known, + }) + } + + fn matches(&self, entry: &EntryData) -> bool { + match self { + Self::Any => true, + Self::Known => { + SpecificCompressionMethodArg::KNOWN_COMPRESSION_METHODS.contains(&entry.compression) + } + } + } +} + +#[derive(Debug)] +struct SpecificMethods { + specific_method: CompressionMethod, +} + +impl EntryMatcher for SpecificMethods { + type Arg = SpecificCompressionMethodArg where Self: Sized; + + fn from_arg(arg: Self::Arg) -> Result + where + Self: Sized, + { + Ok(Self { + specific_method: arg.translate_to_zip(), + }) + } + + fn matches(&self, entry: &EntryData) -> bool { + self.specific_method == entry.compression + } +} + +#[derive(Debug, Copy, Clone)] +enum DepthLimit { + Max(usize), + Min(usize), +} + +impl EntryMatcher for DepthLimit { + type Arg = DepthLimitArg where Self: Sized; + + fn from_arg(arg: Self::Arg) -> Result + where + Self: Sized, + { + Ok(match arg { + DepthLimitArg::Max(max) => Self::Max(max.into()), + DepthLimitArg::Min(min) => Self::Min(min.into()), + }) + } + + fn matches(&self, entry: &EntryData) -> bool { + let num_components = entry.name.split('/').count(); + match self { + Self::Max(max) => num_components <= *max, + Self::Min(min) => num_components >= *min, + } + } +} + +#[derive(Debug, Copy, Clone)] +enum Size { + Max(u64), + Min(u64), +} + +impl EntryMatcher for Size { + type Arg = SizeArg where Self: Sized; + + fn from_arg(arg: Self::Arg) -> Result + where + Self: Sized, + { + Ok(match arg { + SizeArg::Max(max) => Self::Max(max), + SizeArg::Min(min) => Self::Min(min), + }) + } + + fn matches(&self, entry: &EntryData) -> bool { + match self { + Self::Max(max) => entry.uncompressed_size <= *max, + Self::Min(min) => entry.uncompressed_size >= *min, + } + } +} + +#[derive(Debug)] +struct PatternMatcher { + matcher: Box, + comp_sel: ComponentSelector, +} + +impl EntryMatcher for PatternMatcher { + type Arg = MatchArg where Self: Sized; + + fn from_arg(arg: Self::Arg) -> Result + where + Self: Sized, + { + let MatchArg { + comp_sel, + pat_sel: PatternSelector { pat_sel, modifiers }, + pattern, + } = arg; + + let opts = MatchModifiers::from_flags(modifiers)?; + let matcher: Box = match pat_sel { + PatternSelectorType::Glob => { + #[cfg(feature = "glob")] + { + Box::new(GlobMatcher::create(pattern, opts)?) + } + #[cfg(not(feature = "glob"))] + { + return Err(CommandError::InvalidArg(format!( + "glob patterns were requested, but this binary was built without the \"glob\" feature: {pattern:?}" + ))); + } + } + + PatternSelectorType::Literal => Box::new(LiteralMatcher::create(pattern, opts)?), + PatternSelectorType::Regexp => { + #[cfg(feature = "rx")] + { + Box::new(RegexMatcher::create(pattern, opts)?) + } + #[cfg(not(feature = "rx"))] + { + return Err(CommandError::InvalidArg(format!( + "regexp patterns were requested, but this binary was built without the \"rx\" feature: {pattern:?}" + ))); + } + } + }; + + Ok(Self { matcher, comp_sel }) + } + + fn matches(&self, entry: &EntryData) -> bool { + match process_component_selector(self.comp_sel, entry.name) { + None => false, + Some(s) => self.matcher.matches(s), + } + } +} + +#[derive(Debug)] +pub enum CompiledMatcher { + Primitive(Box), + Negated(Box), + And { + left: Box, + right: Box, + }, + Or { + left: Box, + right: Box, + }, +} + +impl CompiledMatcher { + fn create_primitive(arg: Predicate) -> Result { + Ok(Self::Primitive(match arg { + Predicate::Trivial(arg) => Box::new(TrivialMatcher::from_arg(arg)?), + Predicate::EntryType(arg) => Box::new(EntryTypeMatcher::from_arg(arg)?), + Predicate::CompressionMethod(method_arg) => match method_arg { + CompressionMethodArg::NonSpecific(arg) => { + Box::new(NonSpecificMethods::from_arg(arg)?) + } + CompressionMethodArg::Specific(arg) => Box::new(SpecificMethods::from_arg(arg)?), + }, + Predicate::DepthLimit(arg) => Box::new(DepthLimit::from_arg(arg)?), + Predicate::Size(arg) => Box::new(Size::from_arg(arg)?), + Predicate::Match(arg) => Box::new(PatternMatcher::from_arg(arg)?), + })) + } +} + +impl EntryMatcher for CompiledMatcher { + type Arg = MatchExpression where Self: Sized; + + fn from_arg(arg: Self::Arg) -> Result + where + Self: Sized, + { + Ok(match arg { + MatchExpression::PrimitivePredicate(pred) => Self::create_primitive(pred)?, + MatchExpression::Negated(arg) => Self::Negated(Box::new(Self::from_arg(*arg)?)), + MatchExpression::And { + explicit: _, + left, + right, + } => { + let left = Box::new(Self::from_arg(*left)?); + let right = Box::new(Self::from_arg(*right)?); + Self::And { left, right } + } + MatchExpression::Or { left, right } => { + let left = Box::new(Self::from_arg(*left)?); + let right = Box::new(Self::from_arg(*right)?); + Self::Or { left, right } + } + MatchExpression::Grouped(inner) => Self::from_arg(*inner)?, + }) + } + + fn matches(&self, entry: &EntryData) -> bool { + match self { + Self::Primitive(m) => m.matches(entry), + Self::Negated(m) => !m.matches(entry), + Self::And { left, right } => left.matches(entry) && right.matches(entry), + Self::Or { left, right } => left.matches(entry) || right.matches(entry), + } + } +} diff --git a/cli/src/extract/named_outputs.rs b/cli/src/extract/named_outputs.rs new file mode 100644 index 000000000..535cde155 --- /dev/null +++ b/cli/src/extract/named_outputs.rs @@ -0,0 +1,347 @@ +use std::{ + cell::RefCell, + collections::{HashMap, HashSet}, + fs, + io::{self, Seek, Write}, + path::PathBuf, + rc::Rc, +}; + +use super::matcher::{CompiledMatcher, EntryMatcher}; +use super::receiver::{ + CompiledEntrySpec, ConcatEntry, EntryReceiver, ExtractEntry, FilesystemReceiver, +}; +use super::transform::{CompiledTransformer, NameTransformer}; +use crate::{args::extract::*, CommandError, WrapCommandErr}; + +pub fn process_entry_and_output_specs<'w>( + err: Rc>, + entry_specs: impl IntoIterator, + output_specs: OutputSpecs, +) -> Result>, CommandError> { + let mut entry_specs: Vec = entry_specs + .into_iter() + .map(ParsedEntrySpecArg::from_entry_spec) + .collect::>()?; + if entry_specs.is_empty() { + entry_specs.push(ParsedEntrySpecArg { + matcher: None, + transforms: None, + output_name: OutputName::default_name(), + }); + } + let parsed_outputs = ParsedNamedOutputs::from_output_specs(err, output_specs)?; + parsed_outputs.process_entry_specs_for_outputs(entry_specs) +} + +#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] +struct OutputName(pub String); + +impl OutputName { + pub fn default_name() -> Self { + Self("default".to_string()) + } +} + +struct ParsedEntrySpecArg { + pub matcher: Option, + pub transforms: Option, + pub output_name: OutputName, +} + +impl ParsedEntrySpecArg { + pub fn from_entry_spec(spec: EntrySpec) -> Result { + let EntrySpec { + match_expr, + name_transforms, + content_transform, + } = spec; + let matcher = match match_expr { + None => None, + Some(expr) => Some(CompiledMatcher::from_arg(expr)?), + }; + let transforms = if name_transforms.is_empty() { + None + } else { + Some(CompiledTransformer::from_arg(name_transforms)?) + }; + let output_name = match content_transform { + ContentTransform::Extract { name } => name + .map(OutputName) + .unwrap_or_else(OutputName::default_name), + }; + Ok(Self { + matcher, + transforms, + output_name, + }) + } +} + +struct NamedOutputsBuilder<'w, W> { + err: Rc>, + concats: HashMap>>, + extracts: HashMap>, + seen_stdout: bool, + seen_files: HashSet, + seen_dirs: HashSet, + seen_names: HashSet, +} + +impl<'w, W> NamedOutputsBuilder<'w, W> { + pub fn new(err: Rc>) -> Self { + Self { + err, + concats: HashMap::new(), + extracts: HashMap::new(), + seen_stdout: false, + seen_files: HashSet::new(), + seen_dirs: HashSet::new(), + seen_names: HashSet::new(), + } + } + + pub fn into_tables( + self, + ) -> ( + HashMap>>, + HashMap>, + ) { + let Self { + concats, extracts, .. + } = self; + (concats, extracts) + } + + fn add_name( + &mut self, + name: OutputName, + f: impl FnOnce() -> Result, + ) -> Result { + if self.seen_names.contains(&name) { + return Err(CommandError::InvalidArg(format!( + "output name {name:?} provided more than once" + ))); + } + + let ret = f()?; + + assert!(self.seen_names.insert(name)); + + Ok(ret) + } + + fn add_concat( + &mut self, + name: OutputName, + handle: impl Write + 'w, + ) -> Result<(), CommandError> { + /* This should be assured by the check against self.seen_names. */ + assert!(!self.concats.contains_key(&name)); + + let handle = Rc::new(RefCell::new(handle)); + + assert!(self.concats.insert(name, handle).is_none()); + + Ok(()) + } + + pub fn add_stdout(&mut self, name: OutputName) -> Result<(), CommandError> { + if self.seen_stdout { + return Err(CommandError::InvalidArg( + "--stdout output provided for more than one receiver".to_string(), + )); + } + + let handle = self.add_name(name.clone(), || Ok(io::stdout()))?; + self.add_concat(name, handle)?; + + self.seen_stdout = true; + Ok(()) + } + + fn add_seen_file(&mut self, path: PathBuf) -> Result<(), CommandError> { + let canon_path = path + .canonicalize() + .wrap_err_with(|| format!("canonicalizing path {path:?} failed"))?; + + if self.seen_files.contains(&canon_path) { + return Err(CommandError::InvalidArg(format!( + "canonical output file path {canon_path:?} provided more than once" + ))); + } + + assert!(self.seen_files.insert(canon_path)); + + Ok(()) + } + + pub fn add_file( + &mut self, + path: PathBuf, + append: bool, + name: OutputName, + ) -> Result<(), CommandError> { + let handle = self.add_name(name.clone(), || { + let mut f: fs::File = if append { + fs::OpenOptions::new() + .write(true) + .create(true) + .open(&path) + .wrap_err_with(|| format!("failed to open file for append at {path:?}"))? + } else { + fs::File::create(&path) + .wrap_err_with(|| format!("failed to open file with truncation at {path:?}"))? + }; + f.seek(io::SeekFrom::End(0)) + .wrap_err_with(|| format!("failed to seek to end of opened file {f:?}"))?; + Ok(f) + })?; + self.add_seen_file(path)?; + self.add_concat(name, handle)?; + Ok(()) + } + + fn add_seen_dir(&mut self, path: PathBuf) -> Result<(), CommandError> { + let canon_path = path + .canonicalize() + .wrap_err_with(|| format!("canonicalizing dir path {path:?} failed"))?; + if self.seen_dirs.contains(&canon_path) { + return Err(CommandError::InvalidArg(format!( + "canonical output dir path {canon_path:?} provided more than once" + ))); + } + + assert!(self.seen_dirs.insert(canon_path)); + + Ok(()) + } + + fn add_extract( + &mut self, + name: OutputName, + handle: impl EntryReceiver + 'w, + ) -> Result<(), CommandError> { + assert!(!self.extracts.contains_key(&name)); + + let handle = Rc::new(handle); + + assert!(self.extracts.insert(name, handle).is_none()); + + Ok(()) + } +} + +impl<'w, W> NamedOutputsBuilder<'w, W> +where + W: Write + 'w, +{ + pub fn add_dir( + &mut self, + output_dir: PathBuf, + mkdir: bool, + name: OutputName, + ) -> Result<(), CommandError> { + let err = self.err.clone(); + let handle = self.add_name(name.clone(), || { + if mkdir { + fs::create_dir_all(&output_dir).wrap_err_with(|| { + format!("failed to create output directory {output_dir:?}") + })?; + }; + Ok(FilesystemReceiver::new(err, output_dir.clone())) + })?; + self.add_seen_dir(output_dir.clone())?; + self.add_extract(name, handle)?; + Ok(()) + } +} + +struct ParsedNamedOutputs<'w> { + concats: HashMap>>, + extracts: HashMap>, +} + +impl<'w> ParsedNamedOutputs<'w> { + pub fn process_entry_specs_for_outputs( + self, + args: impl IntoIterator, + ) -> Result>, CommandError> { + args.into_iter() + .map(|arg| self.lookup_entry_spec_arg(arg)) + .collect() + } + + fn lookup_entry_spec_arg( + &self, + arg: ParsedEntrySpecArg, + ) -> Result, CommandError> { + let ParsedEntrySpecArg { + matcher, + transforms, + output_name, + } = arg; + if let Some(stream) = self.concats.get(&output_name) { + if transforms.is_some() { + return Err(CommandError::InvalidArg(format!( + "entry name transforms do not apply to concat output {output_name:?}" + ))); + } + return Ok(CompiledEntrySpec::Concat(ConcatEntry { + matcher, + stream: stream.clone(), + })); + } + let Some(recv) = self.extracts.get(&output_name) else { + return Err(CommandError::InvalidArg(format!( + "output name {output_name:?} was not found" + ))); + }; + Ok(CompiledEntrySpec::Extract(ExtractEntry { + matcher, + transforms, + recv: recv.clone(), + })) + } + + pub fn from_output_specs( + err: Rc>, + spec: OutputSpecs, + ) -> Result { + let OutputSpecs { default, named } = spec; + + let mut builder = NamedOutputsBuilder::new(err); + + if let Some(default) = default { + let name = OutputName::default_name(); + match default { + OutputCollation::ConcatenateStdout => { + builder.add_stdout(name)?; + } + OutputCollation::ConcatenateFile { path, append } => { + builder.add_file(path, append, name)?; + } + OutputCollation::Filesystem { output_dir, mkdir } => { + builder.add_dir(output_dir, mkdir, name)?; + } + } + } + for NamedOutput { name, output } in named.into_iter() { + let name = OutputName(name); + match output { + OutputCollation::ConcatenateStdout => { + builder.add_stdout(name)?; + } + OutputCollation::ConcatenateFile { path, append } => { + builder.add_file(path, append, name)?; + } + OutputCollation::Filesystem { output_dir, mkdir } => { + builder.add_dir(output_dir, mkdir, name)?; + } + } + } + + let (concats, extracts) = builder.into_tables(); + Ok(Self { concats, extracts }) + } +} diff --git a/cli/src/extract/receiver.rs b/cli/src/extract/receiver.rs new file mode 100644 index 000000000..6495ccd60 --- /dev/null +++ b/cli/src/extract/receiver.rs @@ -0,0 +1,386 @@ +use std::{ + borrow::Cow, + cell::RefCell, + fmt, fs, + io::{self, Write}, + mem, + path::{Path, PathBuf}, + rc::Rc, +}; + +use zip::{ + extra_fields::{ExtendedTimestamp, ExtraField}, + read::ZipFile, + CompressionMethod, DateTime, +}; + +use super::matcher::{CompiledMatcher, EntryMatcher}; +use super::transform::{CompiledTransformer, NameTransformer}; +use crate::{CommandError, WrapCommandErr}; + +#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash, PartialOrd, Ord)] +pub enum EntryKind { + File, + Dir, + Symlink, +} + +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub struct EntryData<'a> { + pub name: &'a str, + pub kind: EntryKind, + pub compression: CompressionMethod, + pub unix_mode: Option, + pub comment: &'a str, + pub uncompressed_size: u64, + pub compressed_size: u64, + pub local_header_start: u64, + pub content_start: u64, + pub central_header_start: u64, + pub crc32: u32, + pub last_modified_time: Option, + pub extended_timestamp: Option, +} + +impl<'a> EntryData<'a> { + #[inline(always)] + pub fn from_entry<'b>(entry: &'a ZipFile<'b>) -> Self { + Self { + name: entry.name(), + kind: if entry.is_dir() { + EntryKind::Dir + } else if entry.is_symlink() { + EntryKind::Symlink + } else { + EntryKind::File + }, + compression: entry.compression(), + unix_mode: entry.unix_mode(), + comment: entry.comment(), + uncompressed_size: entry.size(), + compressed_size: entry.compressed_size(), + local_header_start: entry.header_start(), + content_start: entry.data_start(), + central_header_start: entry.central_header_start(), + crc32: entry.crc32(), + last_modified_time: entry.last_modified(), + extended_timestamp: entry + .extra_data_fields() + .find_map(|f| match f { + ExtraField::ExtendedTimestamp(ts) => Some(ts), + }) + .cloned(), + } + } + + #[inline(always)] + pub const fn content_end(&self) -> u64 { + self.content_start + self.compressed_size + } +} + +pub struct ConcatEntry<'w> { + pub matcher: Option, + pub stream: Rc>, +} + +impl<'w> fmt::Debug for ConcatEntry<'w> { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!( + f, + "ConcatEntry {{ matcher: {:?}, stream: {:p} }}", + &self.matcher, &self.stream + ) + } +} + +impl<'w> ConcatEntry<'w> { + pub fn do_match<'a>(&self, data: &EntryData<'a>) -> Option<&Rc>> { + if self + .matcher + .as_ref() + .map(|m| m.matches(data)) + .unwrap_or(true) + { + Some(&self.stream) + } else { + None + } + } +} + +#[derive(Debug)] +pub struct ExtractEntry<'w> { + pub matcher: Option, + pub transforms: Option, + pub recv: Rc, +} + +impl<'w> ExtractEntry<'w> { + pub fn do_match_and_transform<'a>( + &self, + data: &EntryData<'a>, + ) -> Option<(Cow<'a, str>, &Rc)> { + if self + .matcher + .as_ref() + .map(|m| m.matches(data)) + .unwrap_or(true) + { + let new_name = self + .transforms + .as_ref() + .map(|t| t.transform_name(data.name)) + .unwrap_or_else(|| Cow::Borrowed(data.name)); + Some((new_name, &self.recv)) + } else { + None + } + } +} + +#[derive(Debug)] +pub enum CompiledEntrySpec<'w> { + Concat(ConcatEntry<'w>), + Extract(ExtractEntry<'w>), +} + +impl<'w> CompiledEntrySpec<'w> { + pub fn try_match_and_transform<'a>( + &self, + data: &EntryData<'a>, + ) -> Option> { + match self { + Self::Concat(c) => c.do_match(data).map(MatchingEntrySpec::Concat), + Self::Extract(e) => e + .do_match_and_transform(data) + .map(|(n, p)| MatchingEntrySpec::Extract(n, p)), + } + } +} + +pub enum MatchingEntrySpec<'a, 'c, 'w> { + Concat(&'c Rc>), + Extract(Cow<'a, str>, &'c Rc), +} + +impl<'a, 'c, 'w> MatchingEntrySpec<'a, 'c, 'w> { + /* Split output handles for concat, and split generated handles by extract source and + * name. use Rc::ptr_eq() to split, and Cow::<'s, str>::eq() with str AsRef. */ + pub fn is_nested_duplicate( + self, + deduped_concat_writers: &mut Vec<&'c Rc>>, + deduped_matching_extracts: &mut Vec<(&'c Rc, Vec>)>, + ) -> bool { + match self { + MatchingEntrySpec::Concat(concat_writer) => { + if deduped_concat_writers + .iter() + .any(|p| Rc::ptr_eq(p, &concat_writer)) + { + true + } else { + deduped_concat_writers.push(concat_writer); + false + } + } + MatchingEntrySpec::Extract(name, extract_receiver) => { + if let Some((_, names)) = deduped_matching_extracts + .iter_mut() + .find(|(p, _)| Rc::ptr_eq(p, &extract_receiver)) + { + if names.iter().any(|n| n.as_ref() == name.as_ref()) { + true + } else { + names.push(name); + false + } + } else { + deduped_matching_extracts.push((extract_receiver, vec![name])); + false + } + } + } + } +} + +pub trait EntryReceiver: fmt::Debug { + fn generate_entry_handle<'s>( + &self, + data: &EntryData<'s>, + symlink_target: Option<&[u8]>, + name: Cow<'s, str>, + ) -> Result>, CommandError>; + + fn finalize_entries(&self) -> Result<(), CommandError>; +} + +#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] +#[cfg(unix)] +struct PermsEntry { + path: PathBuf, + mode: u32, +} + +pub struct FilesystemReceiver { + err: Rc>, + output_dir: PathBuf, + #[cfg(unix)] + perms_to_set: RefCell>, +} + +impl FilesystemReceiver { + pub fn new(err: Rc>, output_dir: PathBuf) -> Self { + Self { + err, + output_dir, + #[cfg(unix)] + perms_to_set: RefCell::new(Vec::new()), + } + } +} + +impl fmt::Debug for FilesystemReceiver { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!( + f, + "FilesystemReceiver {{ output_dir: {:?} }}", + &self.output_dir + ) + } +} + +impl FilesystemReceiver +where + W: Write, +{ + #[cfg(unix)] + fn create_or_overwrite_symlink( + err: &mut impl Write, + target: &[u8], + full_output_path: &Path, + ) -> Result<(), CommandError> { + use std::{ + ffi::OsStr, + os::unix::{ffi::OsStrExt, fs::symlink}, + }; + let target = OsStr::from_bytes(target); + writeln!(err, "entry is symlink to {target:?}, creating").unwrap(); + /* The stdlib symlink function has no functionality like OpenOptions to + * truncate a symlink if it already exists, so we have to do that ourselves + * here. */ + if let Err(e) = symlink(target, full_output_path) { + let e = match e.kind() { + io::ErrorKind::AlreadyExists => { + writeln!(err, "a file already existed at the symlink target {full_output_path:?}, removing") + .unwrap(); + fs::remove_file(full_output_path).wrap_err_with(|| { + format!("failed to remove file at symlink target {full_output_path:?}") + })?; + writeln!( + err, + "successfully removed file entry, creating symlink again" + ) + .unwrap(); + symlink(target, full_output_path).err() + } + _ => Some(e), + }; + if let Some(e) = e { + return Err(e).wrap_err_with(|| { + format!( + "failed to create symlink at {full_output_path:?} with target {target:?}" + ) + }); + } + } + Ok(()) + } +} + +impl EntryReceiver for FilesystemReceiver +where + W: Write, +{ + fn generate_entry_handle<'s>( + &self, + data: &EntryData<'s>, + symlink_target: Option<&[u8]>, + name: Cow<'s, str>, + ) -> Result>, CommandError> { + let mut err = self.err.borrow_mut(); + let full_output_path = self.output_dir.join(name.as_ref()); + writeln!( + err, + "receiving entry {} with name {name} and writing to path {full_output_path:?}", + data.name + ) + .unwrap(); + + match data.kind { + EntryKind::Dir => { + writeln!(err, "entry is directory, creating").unwrap(); + fs::create_dir_all(&full_output_path).wrap_err_with(|| { + format!("failed to create directory entry at {full_output_path:?}") + })?; + } + EntryKind::Symlink => { + let target = symlink_target.expect("we should have generated this"); + + #[cfg(unix)] + Self::create_or_overwrite_symlink(&mut *err, target, &full_output_path)?; + #[cfg(not(unix))] + todo!("TODO: cannot create symlink for entry {name} on non-unix yet!"); + } + EntryKind::File => { + writeln!(err, "entry is file, creating").unwrap(); + if let Some(containing_dir) = full_output_path.parent() { + fs::create_dir_all(containing_dir).wrap_err_with(|| { + format!("failed to create parent dirs for file at {full_output_path:?}") + })?; + } else { + writeln!(err, "entry had no parent dir (in root dir?)").unwrap(); + } + let outfile = fs::File::create(&full_output_path) + .wrap_err_with(|| format!("failed to create file at {full_output_path:?}"))?; + return Ok(Some(Box::new(outfile))); + } + } + + #[cfg(unix)] + if let Some(mode) = data.unix_mode { + writeln!( + err, + "storing unix mode {mode} for path {full_output_path:?}" + ) + .unwrap(); + self.perms_to_set.borrow_mut().push(PermsEntry { + path: full_output_path, + mode, + }); + } + + Ok(None) + } + + fn finalize_entries(&self) -> Result<(), CommandError> { + #[cfg(unix)] + { + use std::os::unix::fs::PermissionsExt; + + let mut perms_to_set = mem::take(&mut *self.perms_to_set.borrow_mut()); + perms_to_set.sort_unstable(); + writeln!( + &mut self.err.borrow_mut(), + "perms to set (these are done in reverse order): {perms_to_set:?}" + ) + .unwrap(); + for PermsEntry { path, mode } in perms_to_set.into_iter().rev() { + let perms = fs::Permissions::from_mode(mode); + fs::set_permissions(&path, perms.clone()) + .wrap_err_with(|| format!("error setting perms {perms:?} for path {path:?}"))?; + } + } + Ok(()) + } +} diff --git a/cli/src/extract/transform.rs b/cli/src/extract/transform.rs new file mode 100644 index 000000000..9494da36d --- /dev/null +++ b/cli/src/extract/transform.rs @@ -0,0 +1,707 @@ +use std::{borrow::Cow, collections::VecDeque, fmt, ops, path::Path, str}; + +#[cfg(feature = "rx")] +use regex; + +use super::matcher::{CaseSensitivity, SearchAnchoring}; +use crate::{args::extract::*, CommandError}; + +pub trait NameTransformer: fmt::Debug { + type Arg + where + Self: Sized; + fn from_arg(arg: Self::Arg) -> Result + where + Self: Sized; + fn transform_name<'s>(&self, name: &'s str) -> Cow<'s, str>; +} + +#[derive(Debug, Copy, Clone)] +enum Trivial { + Identity, +} + +impl NameTransformer for Trivial { + type Arg = TrivialTransform where Self: Sized; + fn from_arg(arg: Self::Arg) -> Result + where + Self: Sized, + { + Ok(match arg { + TrivialTransform::Identity => Self::Identity, + }) + } + fn transform_name<'s>(&self, name: &'s str) -> Cow<'s, str> { + match self { + Self::Identity => Cow::Borrowed(name), + } + } +} + +#[derive(Debug)] +struct StripComponents { + num_components_to_strip: usize, +} + +impl NameTransformer for StripComponents { + type Arg = u8 where Self: Sized; + fn from_arg(arg: Self::Arg) -> Result + where + Self: Sized, + { + Ok(Self { + num_components_to_strip: arg.into(), + }) + } + fn transform_name<'s>(&self, name: &'s str) -> Cow<'s, str> { + /* If no directory components, then nothing to strip. */ + if !name.contains('/') { + return Cow::Borrowed(name); + } + /* We allow stripping 0 components, which does nothing. */ + if self.num_components_to_strip == 0 { + return Cow::Borrowed(name); + } + /* Pop off prefix components until only one is left or we have stripped all the + * requested prefix components. */ + let mut remaining_to_strip = self.num_components_to_strip; + let mut separator_indices: VecDeque = + name.match_indices('/').map(|(i, _)| i).collect(); + debug_assert!(separator_indices.len() > 0); + /* Always keep the final separator, as regardless of how many we strip, we want + * to keep the basename in all cases. */ + while separator_indices.len() > 1 && remaining_to_strip > 0 { + let _ = separator_indices.pop_front().unwrap(); + remaining_to_strip -= 1; + } + debug_assert!(separator_indices.len() > 0); + let leftmost_remaining_separator_index: usize = separator_indices.pop_front().unwrap(); + Cow::Borrowed(&name[(leftmost_remaining_separator_index + 1)..]) + } +} + +#[derive(Debug)] +struct AddPrefix { + prefix_to_add: String, +} + +impl NameTransformer for AddPrefix { + type Arg = String where Self: Sized; + fn from_arg(arg: Self::Arg) -> Result + where + Self: Sized, + { + Ok(Self { prefix_to_add: arg }) + } + fn transform_name<'s>(&self, name: &'s str) -> Cow<'s, str> { + /* We allow an empty prefix, which means to do nothing. */ + if self.prefix_to_add.is_empty() { + return Cow::Borrowed(name); + } + Cow::Owned(format!("{}/{}", self.prefix_to_add, name)) + } +} + +#[derive(Debug, Default, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub enum Multiplicity { + #[default] + Single, + All, +} + +impl Multiplicity { + pub const fn from_multiple_matches_flag(multiple_matches: bool) -> Self { + match multiple_matches { + true => Self::All, + false => Self::Single, + } + } +} + +#[derive(Debug, Default, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub struct ReplaceModifiers { + pub anchoring: SearchAnchoring, + pub case: CaseSensitivity, + pub multi: Multiplicity, +} + +impl ReplaceModifiers { + pub const fn from_flags(flags: PatternModifierFlags) -> Self { + let PatternModifierFlags { + case_insensitive, + multiple_matches, + prefix_anchored, + suffix_anchored, + } = flags; + let multi = Multiplicity::from_multiple_matches_flag(multiple_matches); + let case = CaseSensitivity::from_case_insensitive_flag(case_insensitive); + let anchoring = SearchAnchoring::from_prefix_suffix_flags(prefix_anchored, suffix_anchored); + Self { + anchoring, + case, + multi, + } + } +} + +trait PatternTransformer: fmt::Debug { + type Replacement + where + Self: Sized; + fn create( + pattern: String, + opts: ReplaceModifiers, + rep: Self::Replacement, + ) -> Result + where + Self: Sized; + + fn replace<'s>(&self, input: &'s str) -> Cow<'s, str>; +} + +#[derive(Debug)] +struct LiteralTransformer { + lit: String, + case: CaseSensitivity, + anchoring: SearchAnchoring, + multi: Multiplicity, + rep: String, +} + +impl LiteralTransformer { + fn format_single_replacement<'s>( + input: &'s str, + lit_len: usize, + rep: &str, + match_index: usize, + ) -> Cow<'s, str> { + /* If the replacement is empty, we have the opportunity to return a borrowed Cow. */ + if rep.is_empty() { + /* Remove the prefix alone! */ + if match_index == 0 { + return Cow::Borrowed(&input[lit_len..]); + } + /* Remove the suffix alone! */ + if match_index == input.len() - lit_len { + return Cow::Borrowed(&input[..match_index]); + } + } + /* Otherwise, we allocate a new string. */ + Cow::Owned(format!( + "{}{}{}", + &input[..match_index], + rep, + &input[(match_index + lit_len)..] + )) + } + + fn replace_single_anchored<'s>( + input: &'s str, + lit: &str, + rep: &str, + range: ops::Range, + case: CaseSensitivity, + ) -> Cow<'s, str> { + let sub = &input[range.clone()]; + if case.string_equal(lit, sub) { + Self::format_single_replacement(input, lit.len(), rep, range.start) + } else { + Cow::Borrowed(input) + } + } + + fn replace_single_exact<'s>(input: &'s str, lit: &str, rep: &str) -> Cow<'s, str> { + match input.find(lit) { + None => Cow::Borrowed(input), + Some(i) => Self::format_single_replacement(input, lit.len(), rep, i), + } + } + + fn replace_single_icase<'s>(input: &'s str, lit: &str, rep: &str) -> Cow<'s, str> { + /* NB: literal was already changed to uppercase upon construction in Self::create()! */ + match input.to_ascii_uppercase().find(&lit) { + None => Cow::Borrowed(input), + Some(i) => Self::format_single_replacement(input, lit.len(), rep, i), + } + } + + fn format_multiple_replacements<'s>( + input: &'s str, + lit_len: usize, + rep: &str, + match_indices: Vec, + ) -> Cow<'s, str> { + if match_indices.is_empty() { + return Cow::Borrowed(input); + } + if match_indices.len() == 1 { + return Self::format_single_replacement(input, lit_len, rep, match_indices[0]); + } + let expected_len: usize = + input.len() - (lit_len * match_indices.len()) + (rep.len() * match_indices.len()); + let mut ret = String::with_capacity(expected_len); + let mut last_source_position: usize = 0; + for i in match_indices.into_iter() { + ret.push_str(&input[last_source_position..i]); + ret.push_str(rep); + last_source_position = i + lit_len; + } + assert_eq!(ret.len(), expected_len); + Cow::Owned(ret) + } + + fn replace_multiple_exact<'s>(input: &'s str, lit: &str, rep: &str) -> Cow<'s, str> { + let match_indices: Vec = input.match_indices(lit).map(|(i, _)| i).collect(); + Self::format_multiple_replacements(input, lit.len(), rep, match_indices) + } + + fn replace_multiple_icase<'s>(input: &'s str, lit: &str, rep: &str) -> Cow<'s, str> { + let match_indices: Vec = input + .to_ascii_uppercase() + /* NB: literal was already changed to uppercase upon construction in Self::create()! */ + .match_indices(&lit) + .map(|(i, _)| i) + .collect(); + Self::format_multiple_replacements(input, lit.len(), rep, match_indices) + } +} + +impl PatternTransformer for LiteralTransformer { + type Replacement = String where Self: Sized; + fn create( + pattern: String, + opts: ReplaceModifiers, + rep: Self::Replacement, + ) -> Result + where + Self: Sized, + { + let ReplaceModifiers { + case, + anchoring, + multi, + } = opts; + + if matches!(multi, Multiplicity::All) && !matches!(anchoring, SearchAnchoring::Unanchored) { + return Err(CommandError::InvalidArg(format!( + "multimatch replacement with :g is not supported with anchoring flags :p or :s for literal transforms: {opts:?} {pattern:?}" + ))); + } + + Ok(Self { + lit: match case { + CaseSensitivity::Sensitive => pattern, + CaseSensitivity::Insensitive => pattern.to_ascii_uppercase(), + }, + case, + anchoring, + multi, + rep, + }) + } + + fn replace<'s>(&self, input: &'s str) -> Cow<'s, str> { + /* Empty replacement or literal is allowed, it just does nothing. */ + if self.lit.is_empty() || input.is_empty() { + return Cow::Borrowed(input); + } + /* Can't match input longer than the literal. */ + if self.lit.len() > input.len() { + return Cow::Borrowed(input); + } + + match self.multi { + Multiplicity::Single => match self.anchoring { + SearchAnchoring::DoublyAnchored => Self::replace_single_anchored( + input, + &self.lit, + &self.rep, + 0..input.len(), + self.case, + ), + SearchAnchoring::LeftAnchored => Self::replace_single_anchored( + input, + &self.lit, + &self.rep, + 0..self.lit.len(), + self.case, + ), + SearchAnchoring::RightAnchored => Self::replace_single_anchored( + input, + &self.lit, + &self.rep, + (input.len() - self.lit.len())..input.len(), + self.case, + ), + SearchAnchoring::Unanchored => match self.case { + CaseSensitivity::Sensitive => { + Self::replace_single_exact(input, &self.lit, &self.rep) + } + CaseSensitivity::Insensitive => { + Self::replace_single_icase(input, &self.lit, &self.rep) + } + }, + }, + Multiplicity::All => match self.anchoring { + SearchAnchoring::Unanchored => match self.case { + CaseSensitivity::Sensitive => { + Self::replace_multiple_exact(input, &self.lit, &self.rep) + } + CaseSensitivity::Insensitive => { + Self::replace_multiple_icase(input, &self.lit, &self.rep) + } + }, + _ => unreachable!("checked during construction"), + }, + } + } +} + +#[derive(Debug)] +#[cfg(feature = "rx")] +struct RegexpTransformer { + pat: regex::Regex, + multi: Multiplicity, + rep: String, +} + +#[cfg(feature = "rx")] +impl PatternTransformer for RegexpTransformer { + type Replacement = String where Self: Sized; + fn create( + pattern: String, + opts: ReplaceModifiers, + rep: Self::Replacement, + ) -> Result + where + Self: Sized, + { + let ReplaceModifiers { + case, + anchoring, + multi, + } = opts; + let pattern = anchoring.wrap_regex_pattern(&pattern); + + let pat = regex::RegexBuilder::new(&pattern) + .case_insensitive(match case { + CaseSensitivity::Insensitive => true, + CaseSensitivity::Sensitive => false, + }) + .build() + .map_err(|e| { + CommandError::InvalidArg(format!( + "failed to construct regex replacer from search pattern {pattern:?}: {e}" + )) + })?; + Ok(Self { pat, multi, rep }) + } + + fn replace<'s>(&self, input: &'s str) -> Cow<'s, str> { + match self.multi { + Multiplicity::Single => self.pat.replace(input, &self.rep), + Multiplicity::All => self.pat.replace_all(input, &self.rep), + } + } +} + +pub enum ComponentSplit<'s> { + LeftAnchored { + selected_left: &'s str, + right: &'s str, + }, + RightAnchored { + left: &'s str, + selected_right: &'s str, + }, + Whole(&'s str), +} + +impl<'s> ComponentSplit<'s> { + #[inline(always)] + pub fn split_by_component_selector(sel: ComponentSelector, name: &'s str) -> Option { + let path = Path::new(name); + match sel { + ComponentSelector::Path => Some(ComponentSplit::Whole(name)), + ComponentSelector::Basename => path + .file_name() + .map(|bname| bname.to_str().unwrap()) + .map(|bname| name.split_at(name.len() - bname.len())) + .map(|(pfx, bname)| ComponentSplit::RightAnchored { + left: pfx, + selected_right: bname, + }), + ComponentSelector::Dirname => path + .parent() + .map(|p| p.to_str().unwrap()) + /* "a".parent() becomes Some(""), which we want to treat as no parent */ + .filter(|s| !s.is_empty()) + .map(|dirname| name.split_at(dirname.len())) + .map(|(dirname, sfx)| ComponentSplit::LeftAnchored { + selected_left: dirname, + right: sfx, + }), + ComponentSelector::FileExtension => path + .extension() + .map(|ext| ext.to_str().unwrap()) + .map(|ext| name.split_at(name.len() - ext.len())) + .map(|(pfx, ext)| ComponentSplit::RightAnchored { + left: pfx, + selected_right: ext, + }), + } + } +} + +#[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)] +enum SubstringAnchoring { + RetainsLeftAnchor, + RetainsRightAnchor, + RetainsBothAnchors, + LosesBothAnchors, +} + +impl SubstringAnchoring { + #[inline(always)] + pub fn analyze<'s, 't>(parent: &'s str, sub: &'t str) -> Self + where + 't: 's, + { + let p = parent.as_bytes().as_ptr_range(); + let s = sub.as_bytes().as_ptr_range(); + assert!(s.start >= p.start); + assert!(s.end <= p.end); + if p.start == s.start { + if p.end == s.end { + debug_assert_eq!(parent, sub); + Self::RetainsBothAnchors + } else { + Self::RetainsLeftAnchor + } + } else { + if p.end == s.end { + Self::RetainsRightAnchor + } else { + Self::LosesBothAnchors + } + } + } + + #[inline(always)] + pub fn split_then_transform_then_reformulate<'s>( + input: &'s str, + split: impl FnOnce(&'s str) -> Option>, + transform: impl FnOnce(&'s str) -> Cow<'s, str>, + ) -> Cow<'s, str> { + let components = match split(input) { + /* If the given name doesn't have the specified component, return it unchanged. */ + None => return Cow::Borrowed(input), + Some(s) => s, + }; + match components { + /* If there was no splitting (the whole path was selected), then we don't need to do + * any work to hook things back up! */ + ComponentSplit::Whole(s) => transform(s), + /* If there was splitting, we need to do more work. */ + ComponentSplit::LeftAnchored { + selected_left, + right, + } => match transform(selected_left) { + /* If we reallocated, then we have to reallocate the whole thing, so reuse the + * returned String. */ + Cow::Owned(mut new_left) => { + new_left.push_str(right); + Cow::Owned(new_left) + } + /* If no reallocation, we now have to figure out whether the result is still + * contiguous. */ + Cow::Borrowed(left_sub) => match Self::analyze(selected_left, left_sub) { + Self::RetainsBothAnchors => Cow::Borrowed(input), + Self::RetainsRightAnchor => { + Cow::Borrowed(Self::join_adjacent_strings(input, left_sub, right)) + } + _ => Cow::Owned(format!("{}{}", left_sub, right)), + }, + }, + ComponentSplit::RightAnchored { + left, + selected_right, + } => match transform(selected_right) { + Cow::Owned(mut new_right) => { + new_right.insert_str(0, left); + Cow::Owned(new_right) + } + Cow::Borrowed(right_sub) => match Self::analyze(selected_right, right_sub) { + Self::RetainsBothAnchors => Cow::Borrowed(input), + Self::RetainsLeftAnchor => { + Cow::Borrowed(Self::join_adjacent_strings(input, left, right_sub)) + } + _ => Cow::Owned(format!("{}{}", left, right_sub)), + }, + }, + } + } + + #[inline(always)] + fn join_adjacent_strings<'s, 't>(parent: &'s str, left: &'t str, right: &'t str) -> &'s str + where + 't: 's, + { + let parent_range = parent.as_bytes().as_ptr_range(); + let left = left.as_bytes().as_ptr_range(); + debug_assert!(left.start >= parent_range.start && left.end <= parent_range.end); + let right = right.as_bytes().as_ptr_range(); + debug_assert!(right.start >= parent_range.start && right.end <= parent_range.end); + debug_assert_eq!(left.end, right.start); + let start_offset = (left.start as usize) - (parent_range.start as usize); + let end_offset = (parent_range.end as usize) - (right.end as usize); + &parent[start_offset..(parent.len() - end_offset)] + } +} + +#[derive(Debug)] +struct ComponentTransformer { + pattern_trans: Box, + comp_sel: ComponentSelector, +} + +impl NameTransformer for ComponentTransformer { + type Arg = TransformArg where Self: Sized; + fn from_arg(arg: Self::Arg) -> Result + where + Self: Sized, + { + let TransformArg { + comp_sel, + pat_sel: PatternSelector { pat_sel, modifiers }, + pattern, + replacement_spec, + } = arg; + + let opts = ReplaceModifiers::from_flags(modifiers); + let pattern_trans: Box = match pat_sel { + PatternSelectorType::Glob => { + return Err(CommandError::InvalidArg(format!( + "glob patterns are not supported for name transformations: {pattern:?}" + ))); + } + PatternSelectorType::Literal => { + Box::new(LiteralTransformer::create(pattern, opts, replacement_spec)?) + } + PatternSelectorType::Regexp => { + #[cfg(feature = "rx")] + { + Box::new(RegexpTransformer::create(pattern, opts, replacement_spec)?) + } + #[cfg(not(feature = "rx"))] + { + return Err(CommandError::InvalidArg(format!( + "regexp patterns were requested, but this binary was built without the \"rx\" feature: {pattern:?}" + ))); + } + } + }; + + Ok(Self { + pattern_trans, + comp_sel, + }) + } + + fn transform_name<'s>(&self, name: &'s str) -> Cow<'s, str> { + SubstringAnchoring::split_then_transform_then_reformulate( + name, + move |name| ComponentSplit::split_by_component_selector(self.comp_sel, name), + |name| self.pattern_trans.replace(name), + ) + } +} + +#[derive(Debug)] +pub struct CompiledTransformer { + transformers: Vec>, +} + +impl CompiledTransformer { + fn make_single(trans: NameTransform) -> Result, CommandError> { + Ok(match trans { + NameTransform::Trivial(arg) => Box::new(Trivial::from_arg(arg)?), + NameTransform::Basic(basic_trans) => match basic_trans { + BasicTransform::StripComponents(arg) => Box::new(StripComponents::from_arg(arg)?), + BasicTransform::AddPrefix(arg) => Box::new(AddPrefix::from_arg(arg)?), + }, + NameTransform::Complex(complex_trans) => match complex_trans { + ComplexTransform::Transform(arg) => Box::new(ComponentTransformer::from_arg(arg)?), + }, + }) + } +} + +impl NameTransformer for CompiledTransformer { + type Arg = Vec where Self: Sized; + fn from_arg(arg: Self::Arg) -> Result + where + Self: Sized, + { + assert!(!arg.is_empty()); + Ok(Self { + transformers: arg + .into_iter() + .map(Self::make_single) + .collect::>()?, + }) + } + + /// Transform the name from the zip entry, maintaining a few invariants: + /// 1. If the transformations all return substrings (no prefixing, non-empty replacements, or + /// empty replacements that lead to non-contiguous input chunks), return a slice of the + /// original input, pointing back to the ZipFile's memory location with associated lifetime. + /// 2. If some intermediate transformation requires an allocation (e.g. adding a prefix), do + /// not perform intermediate reallocations for subsequent substring-only transformations. + /// - TODO: The returned string may be reallocated from the initial allocation exactly once + /// at the end, if substring-only transformations reduced its length. This is because Cow + /// can only describe a substring of the original input or an entirely new allocated + /// string, as opposed to a more general sort of string view wrapper. + fn transform_name<'s>(&self, mut original_name: &'s str) -> Cow<'s, str> { + let mut newly_allocated_name: Option = None; + let mut newly_allocated_str: Option<&str> = None; + for transformer in self.transformers.iter() { + match newly_allocated_str { + Some(s) => match transformer.transform_name(s) { + Cow::Borrowed(t) => { + let _ = newly_allocated_str.replace(t); + } + Cow::Owned(t) => { + assert!(newly_allocated_name.replace(t).is_some()); + newly_allocated_str = Some(newly_allocated_name.as_ref().unwrap().as_str()); + } + }, + None => match transformer.transform_name(original_name) { + Cow::Borrowed(t) => { + original_name = t; + } + Cow::Owned(t) => { + assert!(newly_allocated_name.replace(t).is_none()); + newly_allocated_str = Some(newly_allocated_name.as_ref().unwrap().as_str()); + } + }, + } + } + + if newly_allocated_name.is_none() { + /* If we have never allocated anything new, just return the substring of the original + * name! */ + Cow::Borrowed(original_name) + } else { + let subref = newly_allocated_str.unwrap(); + /* If the active substring is the same length as the backing string, assume it's + * unchanged, so we can return the backing string without reallocating. */ + if subref.len() == newly_allocated_name.as_ref().unwrap().len() { + Cow::Owned(newly_allocated_name.unwrap()) + } else { + let reallocated_string = subref.to_string(); + Cow::Owned(reallocated_string) + } + } + } +} diff --git a/cli/src/info.rs b/cli/src/info.rs new file mode 100644 index 000000000..4a206bdce --- /dev/null +++ b/cli/src/info.rs @@ -0,0 +1,167 @@ +use std::{ + fs, + io::{self, Write}, + path::PathBuf, +}; + +use zip::read::ZipArchive; + +use crate::{ + args::{extract::InputSpec, info::*}, + extract::{ + entries::{IterateEntries, StreamInput, ZipFileInput}, + matcher::{CompiledMatcher, EntryMatcher}, + receiver::EntryData, + }, + CommandError, WrapCommandErr, +}; + +mod directives; +mod formats; +use directives::{ + archive::{ + compiled::{CompiledArchiveDirective, CompiledArchiveFormat}, + ArchiveData, + }, + compiled::CompiledFormatSpec, + entry::compiled::{CompiledEntryDirective, CompiledEntryFormat}, +}; + +pub struct ArchiveWithPath { + pub path: PathBuf, + pub len: u64, + pub archive: ZipArchive, +} + +impl ArchiveWithPath { + pub fn open(path: PathBuf) -> Result { + let f = fs::File::open(&path) + .wrap_err_with(|| format!("failed to open zip input file path {:?}", &path))?; + let len = f + .metadata() + .wrap_err("failed to extract file metadata")? + .len(); + let archive = ZipArchive::new(f) + .wrap_err_with(|| format!("failed to create zip archive from file {:?}", &path))?; + Ok(Self { path, len, archive }) + } +} + +fn format_entry_info( + mut err: impl Write, + entry_formatter: &CompiledFormatSpec, + matcher: Option<&CompiledMatcher>, + mut output_stream: impl Write, + source: &mut impl IterateEntries, +) -> Result<(), CommandError> { + if entry_formatter.is_empty() { + writeln!( + &mut err, + "empty entry format, skipping reading from any entries" + ) + .unwrap(); + return Ok(()); + } + + while let Some(entry) = source.next_entry()? { + let data = EntryData::from_entry(&entry); + if matcher.as_ref().is_some_and(|m| !m.matches(&data)) { + writeln!(&mut err, "matcher ignored entry: {:?}", data.name).unwrap(); + continue; + } + entry_formatter.execute_format(data, &mut output_stream)?; + } + Ok(()) +} + +fn format_archive_info( + mut err: impl Write, + archive_formatter: &CompiledFormatSpec, + mut output_stream: impl Write, + zip: ArchiveData, +) -> Result<(), CommandError> { + if archive_formatter.is_empty() { + writeln!(&mut err, "empty archive format, skipping archive overview").unwrap(); + return Ok(()); + } + + archive_formatter.execute_format(zip, &mut output_stream)?; + Ok(()) +} + +pub fn execute_info(mut err: impl Write, args: Info) -> Result<(), CommandError> { + let Info { + format_spec, + match_expr, + input_spec: InputSpec { + stdin_stream, + zip_paths, + }, + } = args; + + let matcher = match match_expr { + None => None, + Some(expr) => Some(CompiledMatcher::from_arg(expr)?), + }; + let (archive_formatter, entry_formatter) = match format_spec { + FormatSpec::Compact => todo!(), + FormatSpec::Extended => todo!(), + FormatSpec::Custom { overview, entry } => ( + CompiledFormatSpec::from_spec::(overview)?, + CompiledFormatSpec::from_spec::(entry)?, + ), + }; + let mut output_stream = io::stdout().lock(); + + if stdin_stream { + let mut stdin = StreamInput::new(io::stdin().lock()); + + format_entry_info( + &mut err, + &entry_formatter, + matcher.as_ref(), + &mut output_stream, + &mut stdin, + )?; + + let (stdin, num_entries) = stdin.into_inner(); + /* NB: The read_zipfile_from_stream() method overruns the size of a single local header into + * the CDE after reading the last input. There are unstable APIs to address this, but for + * now just rely on that internal knowledge. See e.g. zip::read::stream on master or + * zip::unstable::read in https://github.com/zip-rs/zip2/pull/233. */ + let cde_start = stdin.current_bytes_read() - 30; + let (_stdin, stream_length) = stdin + .exhaust() + .wrap_err("failed to exhaust all of stdin after reading all zip entries")?; + + let data = ArchiveData { + path: None, + stream_length, + num_entries, + comment: None, + first_entry_start: Some(0), + central_directory_start: Some(cde_start), + }; + format_archive_info(&mut err, &archive_formatter, &mut output_stream, data)?; + } + + for p in zip_paths.into_iter() { + let mut zip = ArchiveWithPath::open(p)?; + + { + let mut zip_entry_counter = ZipFileInput::new(&mut zip.archive); + format_entry_info( + &mut err, + &entry_formatter, + matcher.as_ref(), + &mut output_stream, + &mut zip_entry_counter, + )?; + } + + let data = ArchiveData::from_archive_with_path(&zip); + format_archive_info(&mut err, &archive_formatter, &mut output_stream, data)?; + } + + Ok(()) +} diff --git a/cli/src/info/directives.rs b/cli/src/info/directives.rs new file mode 100644 index 000000000..e4e3e5bfd --- /dev/null +++ b/cli/src/info/directives.rs @@ -0,0 +1,703 @@ +use std::{ + fmt, + io::{self, Write}, +}; + +use super::formats::FormatValue; +use crate::{ + args::info::{ParseableDirective, ParseableFormatComponent, ParseableFormatSpec}, + CommandError, WrapCommandErr, +}; + +pub trait Writeable { + fn write_to(&self, out: &mut dyn Write) -> Result<(), io::Error>; +} + +impl Writeable for S +where + S: fmt::Display, +{ + fn write_to(&self, out: &mut dyn Write) -> Result<(), io::Error> { + write!(out, "{}", self) + } +} + +pub trait FormatDirective { + type Data<'a>; + type FieldType: FormatValue; + fn extract_field<'a>( + &self, + data: Self::Data<'a>, + ) -> ::Input<'a>; + fn value_formatter(&self) -> Self::FieldType; + + fn format_field<'a>( + &self, + data: Self::Data<'a>, + ) -> Result<::Output<'a>, ::E> + { + self.value_formatter() + .format_value(self.extract_field(data)) + } +} + +/// Wrap a [`FormatDirective`] and write it to a stream. This isn't directly type-eraseable, but it +/// removes one layer of polymorphism to enable us to do that in a subsequent wrapper trait. +pub trait DirectiveFormatter { + type Data<'a>; + + fn write_directive<'a>( + &self, + data: Self::Data<'a>, + out: &mut dyn Write, + ) -> Result<(), CommandError>; +} + +impl DirectiveFormatter for FD +where + FD: FormatDirective, + for<'a> <::FieldType as FormatValue>::Output<'a>: Writeable + fmt::Debug, + <::FieldType as FormatValue>::E: fmt::Display, +{ + type Data<'a> = ::Data<'a>; + + fn write_directive<'a>( + &self, + data: Self::Data<'a>, + out: &mut dyn Write, + ) -> Result<(), CommandError> { + let output = self + .format_field(data) + .map_err(|e| CommandError::InvalidData(format!("error formatting field: {e}")))?; + output + .write_to(out) + .wrap_err_with(|| format!("failed to write output to stream: {output:?}")) + } +} + +pub mod compiled { + use super::*; + + enum CompiledFormatComponent { + Directive(F), + ContiguousLiteral(String), + } + + impl CompiledFormatComponent + where + F: DirectiveFormatter, + { + pub fn write_component<'a>( + &self, + data: ::Data<'a>, + mut out: impl Write, + ) -> Result<(), CommandError> { + match self { + Self::Directive(d) => d.write_directive(data, &mut out), + Self::ContiguousLiteral(lit) => out + .write_all(lit.as_bytes()) + .wrap_err_with(|| format!("failed to write literal {lit:?} to output")), + } + } + } + + pub trait CompiledFormat { + type Spec: ParseableDirective; + type Fmt: DirectiveFormatter; + + fn from_directive_spec(spec: Self::Spec) -> Result; + } + + pub struct CompiledFormatSpec { + components: Vec>, + } + + impl CompiledFormatSpec { + pub fn is_empty(&self) -> bool { + self.components.is_empty() + } + } + + impl CompiledFormatSpec + where + F: DirectiveFormatter, + { + pub fn from_spec( + spec: ParseableFormatSpec<::Spec>, + ) -> Result + where + CF: CompiledFormat, + { + let ParseableFormatSpec { + components: spec_components, + } = spec; + + let mut components: Vec> = Vec::new(); + for c in spec_components.into_iter() { + match c { + ParseableFormatComponent::Directive(d) => { + let d = CF::from_directive_spec(d)?; + components.push(CompiledFormatComponent::Directive(d)); + } + ParseableFormatComponent::Escaped(s) => match components.last_mut() { + Some(CompiledFormatComponent::ContiguousLiteral(ref mut last_lit)) => { + last_lit.push_str(s); + } + _ => { + components + .push(CompiledFormatComponent::ContiguousLiteral(s.to_string())); + } + }, + ParseableFormatComponent::Literal(new_lit) => match components.last_mut() { + Some(CompiledFormatComponent::ContiguousLiteral(ref mut last_lit)) => { + last_lit.push_str(new_lit.as_str()); + } + _ => { + components.push(CompiledFormatComponent::ContiguousLiteral(new_lit)); + } + }, + } + } + + Ok(Self { components }) + } + + pub fn execute_format<'a>( + &self, + data: ::Data<'a>, + mut out: impl Write, + ) -> Result<(), CommandError> + where + ::Data<'a>: Clone, + { + for c in self.components.iter() { + c.write_component(data.clone(), &mut out)? + } + Ok(()) + } + } +} + +pub mod entry { + use super::{ + super::formats::{ + BinaryNumericValue, BinaryStringValue, ByteSizeValue, CompressionMethodValue, + FileTypeValue, FormatValue, NameString, OffsetValue, TimestampValue, UnixModeValue, + }, + FormatDirective, + }; + use crate::extract::receiver::EntryData; + + pub struct EntryNameField(pub NameString); + + impl FormatDirective for EntryNameField { + type Data<'a> = &'a EntryData<'a>; + type FieldType = NameString; + fn extract_field<'a>( + &self, + data: Self::Data<'a>, + ) -> ::Input<'a> { + data.name + } + fn value_formatter(&self) -> NameString { + self.0 + } + } + + pub struct FileTypeField(pub FileTypeValue); + + impl FormatDirective for FileTypeField { + type Data<'a> = &'a EntryData<'a>; + type FieldType = FileTypeValue; + fn extract_field<'a>( + &self, + data: Self::Data<'a>, + ) -> ::Input<'a> { + data.kind + } + fn value_formatter(&self) -> FileTypeValue { + self.0 + } + } + + pub struct EntryCommentField(pub BinaryStringValue); + + impl FormatDirective for EntryCommentField { + type Data<'a> = &'a EntryData<'a>; + type FieldType = BinaryStringValue; + fn extract_field<'a>( + &self, + data: Self::Data<'a>, + ) -> ::Input<'a> { + Some(data.comment.as_bytes()) + } + fn value_formatter(&self) -> BinaryStringValue { + self.0 + } + } + + pub struct LocalHeaderStartField(pub OffsetValue); + + impl FormatDirective for LocalHeaderStartField { + type Data<'a> = &'a EntryData<'a>; + type FieldType = OffsetValue; + fn extract_field<'a>( + &self, + data: Self::Data<'a>, + ) -> ::Input<'a> { + Some(data.local_header_start) + } + fn value_formatter(&self) -> OffsetValue { + self.0 + } + } + + pub struct ContentStartField(pub OffsetValue); + + impl FormatDirective for ContentStartField { + type Data<'a> = &'a EntryData<'a>; + type FieldType = OffsetValue; + fn extract_field<'a>( + &self, + data: Self::Data<'a>, + ) -> ::Input<'a> { + Some(data.content_start) + } + fn value_formatter(&self) -> OffsetValue { + self.0 + } + } + + pub struct UncompressedSizeField(pub ByteSizeValue); + + impl FormatDirective for UncompressedSizeField { + type Data<'a> = &'a EntryData<'a>; + type FieldType = ByteSizeValue; + fn extract_field<'a>( + &self, + data: Self::Data<'a>, + ) -> ::Input<'a> { + data.uncompressed_size + } + fn value_formatter(&self) -> ByteSizeValue { + self.0 + } + } + + pub struct CompressedSizeField(pub ByteSizeValue); + + impl FormatDirective for CompressedSizeField { + type Data<'a> = &'a EntryData<'a>; + type FieldType = ByteSizeValue; + fn extract_field<'a>( + &self, + data: Self::Data<'a>, + ) -> ::Input<'a> { + data.compressed_size + } + fn value_formatter(&self) -> ByteSizeValue { + self.0 + } + } + + pub struct ContentEndField(pub OffsetValue); + + impl FormatDirective for ContentEndField { + type Data<'a> = &'a EntryData<'a>; + type FieldType = OffsetValue; + fn extract_field<'a>( + &self, + data: Self::Data<'a>, + ) -> ::Input<'a> { + Some(data.content_end()) + } + fn value_formatter(&self) -> OffsetValue { + self.0 + } + } + + pub struct CentralHeaderStartField(pub OffsetValue); + + impl FormatDirective for CentralHeaderStartField { + type Data<'a> = &'a EntryData<'a>; + type FieldType = OffsetValue; + fn extract_field<'a>( + &self, + data: Self::Data<'a>, + ) -> ::Input<'a> { + Some(data.central_header_start) + } + fn value_formatter(&self) -> OffsetValue { + self.0 + } + } + + pub struct CompressionMethodField(pub CompressionMethodValue); + + impl FormatDirective for CompressionMethodField { + type Data<'a> = &'a EntryData<'a>; + type FieldType = CompressionMethodValue; + fn extract_field<'a>( + &self, + data: Self::Data<'a>, + ) -> ::Input<'a> { + data.compression + } + fn value_formatter(&self) -> CompressionMethodValue { + self.0 + } + } + + pub struct UnixModeField(pub UnixModeValue); + + impl FormatDirective for UnixModeField { + type Data<'a> = &'a EntryData<'a>; + type FieldType = UnixModeValue; + fn extract_field<'a>( + &self, + data: Self::Data<'a>, + ) -> ::Input<'a> { + data.unix_mode + } + fn value_formatter(&self) -> UnixModeValue { + self.0 + } + } + + pub struct Crc32Field(pub BinaryNumericValue); + + impl FormatDirective for Crc32Field { + type Data<'a> = &'a EntryData<'a>; + type FieldType = BinaryNumericValue; + fn extract_field<'a>( + &self, + data: Self::Data<'a>, + ) -> ::Input<'a> { + data.crc32 + } + fn value_formatter(&self) -> BinaryNumericValue { + self.0 + } + } + + pub struct TimestampField(pub TimestampValue); + + impl FormatDirective for TimestampField { + type Data<'a> = &'a EntryData<'a>; + type FieldType = TimestampValue; + fn extract_field<'a>( + &self, + data: Self::Data<'a>, + ) -> ::Input<'a> { + data.last_modified_time + } + fn value_formatter(&self) -> TimestampValue { + self.0 + } + } + + pub mod compiled { + use super::{ + super::{compiled::CompiledFormat, DirectiveFormatter}, + *, + }; + use crate::{args::info::EntryFormatDirective, CommandError}; + + use std::io::Write; + + /// Used for type erasure by removing the lifetime-bounded associated type. + trait EntryDirectiveFormatter { + fn write_entry_directive<'a>( + &self, + data: &EntryData<'a>, + out: &mut dyn Write, + ) -> Result<(), CommandError>; + } + + impl EntryDirectiveFormatter for CF + where + CF: for<'a> DirectiveFormatter = &'a EntryData<'a>>, + { + fn write_entry_directive<'a>( + &self, + data: &EntryData<'a>, + out: &mut dyn Write, + ) -> Result<(), CommandError> { + self.write_directive(data, out) + } + } + + /// This re-implements the generic trait using the type-erased boxed vtable. + pub struct CompiledEntryDirective(Box); + + impl DirectiveFormatter for CompiledEntryDirective { + type Data<'a> = EntryData<'a>; + + fn write_directive<'a>( + &self, + data: Self::Data<'a>, + out: &mut dyn Write, + ) -> Result<(), CommandError> { + self.0.write_entry_directive(&data, out) + } + } + + pub struct CompiledEntryFormat; + + impl CompiledFormat for CompiledEntryFormat { + type Spec = EntryFormatDirective; + type Fmt = CompiledEntryDirective; + + fn from_directive_spec( + spec: EntryFormatDirective, + ) -> Result { + Ok(CompiledEntryDirective(match spec { + EntryFormatDirective::Name => Box::new(EntryNameField(NameString)), + EntryFormatDirective::FileType(f) => Box::new(FileTypeField(FileTypeValue(f))), + EntryFormatDirective::CompressedSize(f) => { + Box::new(CompressedSizeField(ByteSizeValue(f))) + } + EntryFormatDirective::UncompressedSize(f) => { + Box::new(UncompressedSizeField(ByteSizeValue(f))) + } + EntryFormatDirective::UnixMode(f) => Box::new(UnixModeField(UnixModeValue(f))), + EntryFormatDirective::CompressionMethod(f) => { + Box::new(CompressionMethodField(CompressionMethodValue(f))) + } + EntryFormatDirective::Comment(f) => { + Box::new(EntryCommentField(BinaryStringValue(f))) + } + EntryFormatDirective::LocalHeaderStart(f) => { + Box::new(LocalHeaderStartField(OffsetValue(f))) + } + EntryFormatDirective::ContentStart(f) => { + Box::new(ContentStartField(OffsetValue(f))) + } + EntryFormatDirective::ContentEnd(f) => { + Box::new(ContentEndField(OffsetValue(f))) + } + EntryFormatDirective::CentralHeaderStart(f) => { + Box::new(CentralHeaderStartField(OffsetValue(f))) + } + EntryFormatDirective::CrcValue(f) => { + Box::new(Crc32Field(BinaryNumericValue(f))) + } + EntryFormatDirective::Timestamp(f) => { + Box::new(TimestampField(TimestampValue(f))) + } + })) + } + } + } +} + +pub mod archive { + use super::{ + super::{ + formats::{ + BinaryStringValue, ByteSizeValue, DecimalNumberValue, FormatValue, OffsetValue, + PathString, + }, + ArchiveWithPath, + }, + FormatDirective, + }; + + use std::path::Path; + + #[derive(Debug, Clone, PartialEq, Eq, Hash)] + pub struct ArchiveData<'a> { + pub path: Option<&'a Path>, + pub stream_length: u64, + pub num_entries: usize, + pub comment: Option<&'a [u8]>, + pub first_entry_start: Option, + pub central_directory_start: Option, + } + + impl<'a> ArchiveData<'a> { + pub fn from_archive_with_path(zip: &'a ArchiveWithPath) -> Self { + Self { + path: Some(zip.path.as_path()), + stream_length: zip.len, + num_entries: zip.archive.len(), + comment: Some(zip.archive.comment()), + first_entry_start: Some(zip.archive.offset()), + central_directory_start: Some(zip.archive.central_directory_start()), + } + } + } + + pub struct ArchiveNameField(pub PathString); + + impl FormatDirective for ArchiveNameField { + type Data<'a> = ArchiveData<'a>; + type FieldType = PathString; + fn extract_field<'a>( + &self, + data: Self::Data<'a>, + ) -> ::Input<'a> { + data.path + } + fn value_formatter(&self) -> PathString { + self.0 + } + } + + pub struct ArchiveSizeField(pub ByteSizeValue); + + impl FormatDirective for ArchiveSizeField { + type Data<'a> = ArchiveData<'a>; + type FieldType = ByteSizeValue; + fn extract_field<'a>( + &self, + data: Self::Data<'a>, + ) -> ::Input<'a> { + data.stream_length + } + fn value_formatter(&self) -> ByteSizeValue { + self.0 + } + } + + pub struct NumEntriesField(pub DecimalNumberValue); + + impl FormatDirective for NumEntriesField { + type Data<'a> = ArchiveData<'a>; + type FieldType = DecimalNumberValue; + fn extract_field<'a>( + &self, + data: Self::Data<'a>, + ) -> ::Input<'a> { + data.num_entries.try_into().unwrap() + } + fn value_formatter(&self) -> DecimalNumberValue { + self.0 + } + } + + pub struct ArchiveCommentField(pub BinaryStringValue); + + impl FormatDirective for ArchiveCommentField { + type Data<'a> = ArchiveData<'a>; + type FieldType = BinaryStringValue; + fn extract_field<'a>( + &self, + data: Self::Data<'a>, + ) -> ::Input<'a> { + data.comment + } + fn value_formatter(&self) -> BinaryStringValue { + self.0 + } + } + + pub struct FirstEntryStartField(pub OffsetValue); + + impl FormatDirective for FirstEntryStartField { + type Data<'a> = ArchiveData<'a>; + type FieldType = OffsetValue; + fn extract_field<'a>( + &self, + data: Self::Data<'a>, + ) -> ::Input<'a> { + data.first_entry_start + } + fn value_formatter(&self) -> OffsetValue { + self.0 + } + } + + pub struct CentralDirectoryStartField(pub OffsetValue); + + impl FormatDirective for CentralDirectoryStartField { + type Data<'a> = ArchiveData<'a>; + type FieldType = OffsetValue; + fn extract_field<'a>( + &self, + data: Self::Data<'a>, + ) -> ::Input<'a> { + data.central_directory_start + } + fn value_formatter(&self) -> OffsetValue { + self.0 + } + } + + pub mod compiled { + use super::{ + super::{compiled::CompiledFormat, DirectiveFormatter}, + *, + }; + use crate::{args::info::ArchiveOverviewFormatDirective, CommandError}; + + use std::io::Write; + + trait ArchiveDirectiveFormatter { + fn write_archive_directive<'a>( + &self, + data: ArchiveData<'a>, + out: &mut dyn Write, + ) -> Result<(), CommandError>; + } + + impl ArchiveDirectiveFormatter for CF + where + CF: for<'a> DirectiveFormatter = ArchiveData<'a>>, + { + fn write_archive_directive<'a>( + &self, + data: ArchiveData<'a>, + out: &mut dyn Write, + ) -> Result<(), CommandError> { + self.write_directive(data, out) + } + } + + pub struct CompiledArchiveDirective(Box); + + impl DirectiveFormatter for CompiledArchiveDirective { + type Data<'a> = ArchiveData<'a>; + + fn write_directive<'a>( + &self, + data: Self::Data<'a>, + out: &mut dyn Write, + ) -> Result<(), CommandError> { + self.0.write_archive_directive(data, out) + } + } + + pub struct CompiledArchiveFormat; + + impl CompiledFormat for CompiledArchiveFormat { + type Spec = ArchiveOverviewFormatDirective; + type Fmt = CompiledArchiveDirective; + + fn from_directive_spec( + spec: ArchiveOverviewFormatDirective, + ) -> Result { + Ok(CompiledArchiveDirective(match spec { + ArchiveOverviewFormatDirective::ArchiveName => { + Box::new(ArchiveNameField(PathString)) + } + ArchiveOverviewFormatDirective::TotalSize(f) => { + Box::new(ArchiveSizeField(ByteSizeValue(f))) + } + ArchiveOverviewFormatDirective::NumEntries => { + Box::new(NumEntriesField(DecimalNumberValue)) + } + ArchiveOverviewFormatDirective::ArchiveComment(f) => { + Box::new(ArchiveCommentField(BinaryStringValue(f))) + } + ArchiveOverviewFormatDirective::FirstEntryStart(f) => { + Box::new(FirstEntryStartField(OffsetValue(f))) + } + ArchiveOverviewFormatDirective::CentralDirectoryStart(f) => { + Box::new(CentralDirectoryStartField(OffsetValue(f))) + } + })) + } + } + } +} diff --git a/cli/src/info/formats.rs b/cli/src/info/formats.rs new file mode 100644 index 000000000..a320fb122 --- /dev/null +++ b/cli/src/info/formats.rs @@ -0,0 +1,425 @@ +use std::{ + convert::Infallible, + fmt, + io::{self, Write}, + path, +}; + +use zip::{CompressionMethod, DateTime}; + +use super::directives::Writeable; +use crate::{args::info::*, extract::receiver::EntryKind}; + +pub trait FormatValue { + type Input<'a>; + type Output<'a>; + type E; + fn format_value<'a>(&self, input: Self::Input<'a>) -> Result, Self::E>; +} + +#[derive(Copy, Clone)] +pub struct NameString; + +impl FormatValue for NameString { + type Input<'a> = &'a str; + type Output<'a> = &'a str; + type E = Infallible; + fn format_value<'a>(&self, input: Self::Input<'a>) -> Result, Self::E> { + Ok(input) + } +} + +#[derive(Copy, Clone)] +pub struct PathString; + +#[derive(Debug)] +pub enum PathWriter<'a> { + Path(path::Display<'a>), + None, +} + +impl<'a> fmt::Display for PathWriter<'a> { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + Self::Path(p) => path::Display::fmt(p, f), + Self::None => write!(f, ""), + } + } +} + +impl FormatValue for PathString { + type Input<'a> = Option<&'a path::Path>; + type Output<'a> = PathWriter<'a>; + type E = Infallible; + fn format_value<'a>(&self, input: Self::Input<'a>) -> Result, Self::E> { + Ok(match input { + Some(p) => PathWriter::Path(p.display()), + None => PathWriter::None, + }) + } +} + +#[derive(Copy, Clone)] +pub struct FileTypeValue(pub FileTypeFormat); + +impl FormatValue for FileTypeValue { + type Input<'a> = EntryKind; + type Output<'a> = &'static str; + type E = Infallible; + fn format_value<'a>(&self, input: Self::Input<'a>) -> Result, Self::E> { + Ok(match self.0 { + FileTypeFormat::Full => match input { + EntryKind::File => "file", + EntryKind::Dir => "directory", + EntryKind::Symlink => "symlink", + }, + FileTypeFormat::Abbreviated => match input { + EntryKind::File => "-", + EntryKind::Dir => "d", + EntryKind::Symlink => "l", + }, + }) + } +} + +#[derive(Copy, Clone)] +pub struct CompressionMethodValue(pub CompressionMethodFormat); + +impl FormatValue for CompressionMethodValue { + type Input<'a> = CompressionMethod; + type Output<'a> = &'static str; + type E = Infallible; + fn format_value<'a>(&self, input: Self::Input<'a>) -> Result, Self::E> { + Ok(match self.0 { + CompressionMethodFormat::Full => match input { + CompressionMethod::Stored => "stored", + CompressionMethod::Deflated => "deflate", + #[cfg(feature = "deflate64")] + CompressionMethod::Deflate64 => "deflate64", + #[cfg(feature = "bzip2")] + CompressionMethod::Bzip2 => "bzip2", + #[cfg(feature = "zstd")] + CompressionMethod::Zstd => "zstd", + #[cfg(feature = "lzma")] + CompressionMethod::Lzma => "lzma", + #[cfg(feature = "xz")] + CompressionMethod::Xz => "xz", + _ => "unknown", + }, + CompressionMethodFormat::Abbreviated => match input { + CompressionMethod::Stored => "stor", + CompressionMethod::Deflated => "defl", + #[cfg(feature = "deflate64")] + CompressionMethod::Deflate64 => "df64", + #[cfg(feature = "bzip2")] + CompressionMethod::Bzip2 => "bz2", + #[cfg(feature = "zstd")] + CompressionMethod::Zstd => "zst", + #[cfg(feature = "lzma")] + CompressionMethod::Lzma => "lz", + #[cfg(feature = "xz")] + CompressionMethod::Xz => "xz", + _ => "?", + }, + }) + } +} + +#[derive(Copy, Clone)] +pub struct UnixModeValue(pub UnixModeFormat); + +impl UnixModeValue { + const S_IRUSR: u32 = 256; + const S_IWUSR: u32 = 128; + const S_IXUSR: u32 = 64; + + const S_IRGRP: u32 = 32; + const S_IWGRP: u32 = 16; + const S_IXGRP: u32 = 8; + + const S_IROTH: u32 = 4; + const S_IWOTH: u32 = 2; + const S_IXOTH: u32 = 1; + + const UNKNOWN_MODE_BITS: [u8; 9] = [b'?'; 9]; + + fn pretty_format_mode_bits(mode: u32) -> [u8; 9] { + let mut ret = [b'-'; 9]; + + if mode & Self::S_IRUSR == Self::S_IRUSR { + ret[0] = b'r'; + } + if mode & Self::S_IWUSR == Self::S_IWUSR { + ret[1] = b'w'; + } + if mode & Self::S_IXUSR == Self::S_IXUSR { + ret[2] = b'x'; + } + + if mode & Self::S_IRGRP == Self::S_IRGRP { + ret[3] = b'r'; + } + if mode & Self::S_IWGRP == Self::S_IWGRP { + ret[4] = b'w'; + } + if mode & Self::S_IXGRP == Self::S_IXGRP { + ret[5] = b'x'; + } + + if mode & Self::S_IROTH == Self::S_IROTH { + ret[6] = b'r'; + } + if mode & Self::S_IWOTH == Self::S_IWOTH { + ret[7] = b'w'; + } + if mode & Self::S_IXOTH == Self::S_IXOTH { + ret[8] = b'x'; + } + + ret + } +} + +#[derive(Debug)] +pub enum ModeValueWriter { + Octal(Option), + Pretty([u8; 9]), +} + +impl Writeable for ModeValueWriter { + fn write_to(&self, out: &mut dyn Write) -> Result<(), io::Error> { + match self { + Self::Octal(mode) => match mode { + Some(bits) => write!(out, "{:o}", bits), + None => write!(out, "?"), + }, + Self::Pretty(bits) => out.write_all(bits.as_ref()), + } + } +} + +impl FormatValue for UnixModeValue { + type Input<'a> = Option; + type Output<'a> = ModeValueWriter; + type E = Infallible; + fn format_value<'a>(&self, input: Self::Input<'a>) -> Result, Self::E> { + Ok(match self.0 { + UnixModeFormat::Octal => ModeValueWriter::Octal(input), + UnixModeFormat::Pretty => ModeValueWriter::Pretty(match input { + Some(bits) => Self::pretty_format_mode_bits(bits), + None => Self::UNKNOWN_MODE_BITS, + }), + }) + } +} + +#[derive(Copy, Clone)] +pub struct ByteSizeValue(pub ByteSizeFormat); + +#[derive(Debug)] +pub enum ByteSizeWriter { + FullDecimal(u64), +} + +impl fmt::Display for ByteSizeWriter { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + Self::FullDecimal(n) => write!(f, "{}", n), + } + } +} + +impl FormatValue for ByteSizeValue { + type Input<'a> = u64; + type Output<'a> = ByteSizeWriter; + type E = Infallible; + fn format_value<'a>(&self, input: Self::Input<'a>) -> Result, Self::E> { + Ok(match self.0 { + ByteSizeFormat::FullDecimal => ByteSizeWriter::FullDecimal(input), + ByteSizeFormat::HumanAbbreviated => todo!("human abbreviated byte sizes"), + }) + } +} + +#[derive(Copy, Clone)] +pub struct DecimalNumberValue; + +impl FormatValue for DecimalNumberValue { + type Input<'a> = u64; + type Output<'a> = u64; + type E = Infallible; + fn format_value<'a>(&self, input: Self::Input<'a>) -> Result, Self::E> { + Ok(input) + } +} + +#[derive(Copy, Clone)] +pub struct OffsetValue(pub OffsetFormat); + +#[derive(Debug)] +pub enum OffsetWriter { + Unknown, + Decimal(u64), + Hexadecimal(u64), +} + +impl fmt::Display for OffsetWriter { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + Self::Unknown => write!(f, "?"), + Self::Decimal(x) => write!(f, "{}", x), + Self::Hexadecimal(x) => write!(f, "{:x}", x), + } + } +} + +impl FormatValue for OffsetValue { + type Input<'a> = Option; + type Output<'a> = OffsetWriter; + type E = Infallible; + fn format_value<'a>(&self, input: Self::Input<'a>) -> Result, Self::E> { + let input = match input { + None => return Ok(OffsetWriter::Unknown), + Some(input) => input, + }; + Ok(match self.0 { + OffsetFormat::Decimal => OffsetWriter::Decimal(input), + OffsetFormat::Hexadecimal => OffsetWriter::Hexadecimal(input), + }) + } +} + +#[derive(Copy, Clone)] +pub struct BinaryNumericValue(pub BinaryNumericValueFormat); + +#[derive(Debug)] +pub enum BinaryNumericValueWriter { + Decimal(u32), + Hexadecimal(u32), +} + +impl fmt::Display for BinaryNumericValueWriter { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + Self::Decimal(x) => write!(f, "{}", x), + Self::Hexadecimal(x) => write!(f, "{:x}", x), + } + } +} + +impl FormatValue for BinaryNumericValue { + type Input<'a> = u32; + type Output<'a> = BinaryNumericValueWriter; + type E = Infallible; + fn format_value<'a>(&self, input: Self::Input<'a>) -> Result, Self::E> { + Ok(match self.0 { + BinaryNumericValueFormat::Decimal => BinaryNumericValueWriter::Decimal(input), + BinaryNumericValueFormat::Hexadecimal => BinaryNumericValueWriter::Hexadecimal(input), + }) + } +} + +#[derive(Copy, Clone)] +pub struct BinaryStringValue(pub BinaryStringFormat); + +#[derive(Debug)] +pub enum BinaryStringWriter<'a> { + ReplaceNonUnicode(&'a [u8]), + EscapeAscii(&'a [u8]), + WriteExactly(&'a [u8]), +} + +impl<'a> BinaryStringWriter<'a> { + const INVALID_CHUNK_BUFS: [&'static str; 4] = ["", "�", "��", "���"]; +} + +impl<'a> Writeable for BinaryStringWriter<'a> { + fn write_to(&self, out: &mut dyn Write) -> Result<(), io::Error> { + match self { + Self::ReplaceNonUnicode(s) => { + for chunk in s.utf8_chunks() { + write!(out, "{}", chunk.valid())?; + /* The length of invalid bytes is never longer than 3. */ + write!(out, "{}", Self::INVALID_CHUNK_BUFS[chunk.invalid().len()])?; + } + Ok(()) + } + Self::EscapeAscii(s) => { + if s.is_empty() { + return write!(out, "\"\""); + } + write!(out, "\" ")?; + for b in s.iter().copied() { + write!(out, "{} ", b.escape_ascii())?; + } + write!(out, "\"")?; + Ok(()) + } + Self::WriteExactly(s) => out.write_all(s), + } + } +} + +impl FormatValue for BinaryStringValue { + type Input<'a> = Option<&'a [u8]>; + type Output<'a> = BinaryStringWriter<'a>; + type E = Infallible; + fn format_value<'a>(&self, input: Self::Input<'a>) -> Result, Self::E> { + let input = input.unwrap_or(&[]); + Ok(match self.0 { + BinaryStringFormat::PrintAsString => BinaryStringWriter::ReplaceNonUnicode(input), + BinaryStringFormat::EscapeAscii => BinaryStringWriter::EscapeAscii(input), + BinaryStringFormat::WriteBinaryContents => BinaryStringWriter::WriteExactly(input), + }) + } +} + +#[derive(Copy, Clone)] +pub struct TimestampValue(pub TimestampFormat); + +#[derive(Debug)] +pub enum TimestampValueWriter { + None, + DateOnly(DateTime), + TimeOnly(DateTime), + DateAndTime(DateTime), +} + +impl fmt::Display for TimestampValueWriter { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + Self::None => write!(f, "?"), + Self::DateOnly(d) => write!(f, "{}-{}-{}", d.year(), d.month(), d.day()), + Self::TimeOnly(t) => write!(f, "{}:{}:{}", t.hour(), t.minute(), t.second()), + Self::DateAndTime(dt) => { + write!( + f, + "{}-{}-{} {}:{}:{}", + dt.year(), + dt.month(), + dt.day(), + dt.hour(), + dt.minute(), + dt.second() + ) + } + } + } +} + +impl FormatValue for TimestampValue { + type Input<'a> = Option; + type Output<'a> = TimestampValueWriter; + type E = Infallible; + fn format_value<'a>(&self, input: Self::Input<'a>) -> Result, Self::E> { + let input = match input { + None => return Ok(TimestampValueWriter::None), + Some(input) => input, + }; + Ok(match self.0 { + TimestampFormat::DateOnly => TimestampValueWriter::DateOnly(input), + TimestampFormat::TimeOnly => TimestampValueWriter::TimeOnly(input), + TimestampFormat::DateAndTime => TimestampValueWriter::DateAndTime(input), + }) + } +} diff --git a/cli/src/lib.rs b/cli/src/lib.rs new file mode 100644 index 000000000..24db1aaae --- /dev/null +++ b/cli/src/lib.rs @@ -0,0 +1,175 @@ +//! ??? + +#![cfg_attr(docsrs, feature(doc_auto_cfg))] + +use std::{fs, io}; + +pub mod args; +pub mod compress; +pub mod extract; +pub mod info; + +pub enum ErrHandle { + Output(W), + NoOutput, +} + +impl io::Write for ErrHandle +where + W: io::Write, +{ + fn write(&mut self, buf: &[u8]) -> io::Result { + match self { + Self::Output(w) => w.write(buf), + Self::NoOutput => Ok(buf.len()), + } + } + + fn flush(&mut self) -> io::Result<()> { + match self { + Self::Output(w) => w.flush(), + Self::NoOutput => Ok(()), + } + } +} + +pub enum OutputHandle { + File(fs::File), + InMem(io::Cursor>), +} + +impl io::Read for OutputHandle { + fn read(&mut self, buf: &mut [u8]) -> io::Result { + match self { + Self::File(f) => f.read(buf), + Self::InMem(c) => c.read(buf), + } + } +} + +impl io::Write for OutputHandle { + fn write(&mut self, buf: &[u8]) -> io::Result { + match self { + Self::File(f) => f.write(buf), + Self::InMem(c) => c.write(buf), + } + } + + fn flush(&mut self) -> io::Result<()> { + match self { + Self::File(f) => f.flush(), + Self::InMem(c) => c.flush(), + } + } +} + +impl io::Seek for OutputHandle { + fn seek(&mut self, pos: io::SeekFrom) -> io::Result { + match self { + Self::File(f) => f.seek(pos), + Self::InMem(c) => c.seek(pos), + } + } +} + +#[derive(Debug)] +pub enum CommandError { + InvalidArg(String), + InvalidData(String), + Io(String, io::Error), + Zip(String, zip::result::ZipError), +} + +pub trait WrapCommandErr: Sized { + fn wrap_err(self, context: &str) -> Result { + self.wrap_err_with(|| context.to_string()) + } + fn wrap_err_with(self, f: impl FnOnce() -> String) -> Result; +} + +impl WrapCommandErr for Result { + fn wrap_err_with(self, f: impl FnOnce() -> String) -> Result { + self.map_err(|e| CommandError::Io(f(), e)) + } +} + +impl WrapCommandErr for Result { + fn wrap_err_with(self, f: impl FnOnce() -> String) -> Result { + self.map_err(|e| CommandError::Zip(f(), e)) + } +} + +pub mod driver { + use std::env; + use std::io::{self, Write}; + use std::process; + + use super::args::{ArgParseError, CommandFormat, ZipCli, ZipCommand}; + use super::{CommandError, ErrHandle}; + + pub trait ExecuteCommand: CommandFormat { + fn execute(self, err: impl Write) -> Result<(), CommandError>; + + fn do_main(self, mut err: impl Write) -> ! + where + Self: Sized, + { + writeln!(&mut err, "{} args: {:?}", Self::COMMAND_NAME, &self).unwrap(); + match self.execute(err) { + Ok(()) => process::exit(ZipCli::NON_FAILURE_EXIT_CODE), + Err(e) => match e { + CommandError::InvalidArg(msg) => { + let msg = Self::generate_brief_help_text(&msg); + let _ = io::stderr().write_all(msg.as_bytes()); + process::exit(ZipCli::ARGV_PARSE_FAILED_EXIT_CODE); + } + CommandError::InvalidData(msg) => { + let msg = format!("error processing zip data: {msg}\n"); + let _ = io::stderr().write_all(msg.as_bytes()); + process::exit(ZipCli::ARGV_PARSE_FAILED_EXIT_CODE); + } + CommandError::Io(context, e) => { + let msg = format!("i/o error: {context}: {e}\n"); + let _ = io::stderr().write_all(msg.as_bytes()); + process::exit(ZipCli::INTERNAL_ERROR_EXIT_CODE); + } + CommandError::Zip(context, e) => { + let msg = format!("zip error: {context}: {e}\n"); + let _ = io::stderr().write_all(msg.as_bytes()); + process::exit(ZipCli::INTERNAL_ERROR_EXIT_CODE); + } + }, + } + } + } + + pub fn main() { + let ZipCli { verbose, command } = match ZipCli::parse_argv(env::args_os()) { + Ok(cli) => cli, + Err(e) => match e { + ArgParseError::StdoutMessage(msg) => { + io::stdout() + .write_all(msg.as_bytes()) + .expect("couldn't write message to stdout"); + process::exit(ZipCli::NON_FAILURE_EXIT_CODE); + } + ArgParseError::StderrMessage(msg) => { + /* If we can't write anything to stderr, no use aborting, so just exit. */ + let _ = io::stderr().write_all(msg.as_bytes()); + process::exit(ZipCli::ARGV_PARSE_FAILED_EXIT_CODE); + } + }, + }; + let err = if verbose { + ErrHandle::Output(io::stderr()) + } else { + ErrHandle::NoOutput + }; + + match command { + ZipCommand::Info(info) => info.do_main(err), + ZipCommand::Extract(extract) => extract.do_main(err), + ZipCommand::Compress(compress) => compress.do_main(err), + } + } +} diff --git a/cli/src/main.rs b/cli/src/main.rs new file mode 100644 index 000000000..95fae2ac9 --- /dev/null +++ b/cli/src/main.rs @@ -0,0 +1,3 @@ +fn main() { + zip_cli::driver::main(); +} diff --git a/src/compression.rs b/src/compression.rs index 83a7669bd..02c264641 100644 --- a/src/compression.rs +++ b/src/compression.rs @@ -10,7 +10,7 @@ use std::{fmt, io}; /// /// When creating ZIP files, you may choose the method to use with /// [`crate::write::FileOptions::compression_method`] -#[derive(Copy, Clone, PartialEq, Eq, Debug)] +#[derive(Copy, Clone, PartialEq, Eq, Debug, Hash, PartialOrd, Ord)] #[cfg_attr(fuzzing, derive(arbitrary::Arbitrary))] #[non_exhaustive] pub enum CompressionMethod { diff --git a/src/extra_fields/extended_timestamp.rs b/src/extra_fields/extended_timestamp.rs index 1cc0f1de4..0cf794c3c 100644 --- a/src/extra_fields/extended_timestamp.rs +++ b/src/extra_fields/extended_timestamp.rs @@ -4,7 +4,7 @@ use std::io::Read; /// extended timestamp, as described in -#[derive(Debug, Clone)] +#[derive(Debug, Clone, PartialEq, Eq, Hash)] pub struct ExtendedTimestamp { mod_time: Option, ac_time: Option, diff --git a/src/write.rs b/src/write.rs index 8d077b595..c4d851a13 100644 --- a/src/write.rs +++ b/src/write.rs @@ -15,8 +15,6 @@ use crate::types::{ ZipRawValues, MIN_VERSION, }; use crate::write::ffi::S_IFLNK; -#[cfg(any(feature = "_deflate-any", feature = "bzip2", feature = "zstd",))] -use core::num::NonZeroU64; use crc32fast::Hasher; use indexmap::IndexMap; use std::borrow::ToOwned; @@ -253,6 +251,7 @@ impl<'a> arbitrary::Arbitrary<'a> for EncryptWith<'a> { } /// Metadata for a file to be written +/* TODO: add accessors for this data as well so options can be introspected! */ #[derive(Clone, Debug, Copy, Eq, PartialEq)] pub struct FileOptions<'k, T: FileOptionExtension> { pub(crate) compression_method: CompressionMethod, @@ -780,6 +779,8 @@ impl ZipWriter { } } +/* TODO: consider a ZipWriter which works with just a Write bound to support streaming output? This + * would require some work, but is possible in the protocol. */ impl ZipWriter { /// Initializes the archive. /// @@ -1441,6 +1442,7 @@ impl ZipWriter { /// implementations may materialize a symlink as a regular file, possibly with the /// content incorrectly set to the symlink target. For maximum portability, consider /// storing a regular file instead. + /* TODO: support OsStr instead of just str, for non-unicode paths. */ pub fn add_symlink( &mut self, name: N, @@ -1654,7 +1656,7 @@ impl GenericZipWriter { let best_non_zopfli = Compression::best().level(); if level > best_non_zopfli { let options = Options { - iteration_count: NonZeroU64::try_from( + iteration_count: core::num::NonZeroU64::try_from( (level - best_non_zopfli) as u64, ) .unwrap(), From 38c2ecf04b0e8e81fc69a8ed0076942975d903a2 Mon Sep 17 00:00:00 2001 From: Danny McClanahan <1305167+cosmicexplorer@users.noreply.github.com> Date: Wed, 27 Nov 2024 18:18:39 -0500 Subject: [PATCH 02/31] demonstrate more-parameterized help text --- cli/src/args/compress.rs | 2 +- cli/src/args/extract.rs | 212 ++++++++++++++++++++++++++++++--------- cli/src/args/info.rs | 4 +- 3 files changed, 167 insertions(+), 51 deletions(-) diff --git a/cli/src/args/compress.rs b/cli/src/args/compress.rs index 796b47990..15a22f531 100644 --- a/cli/src/args/compress.rs +++ b/cli/src/args/compress.rs @@ -79,7 +79,7 @@ impl CommandFormat for Compress { "Generate an archive from data in argument strings or read from the filesystem."; const USAGE_LINE: &'static str = - "[-h|--help] [OUTPUT-FLAGS] [--archive-comment ] [ENTRY]... [--] [PATH]..."; + "[-h|--help] [OUTPUT-FLAGS] [GLOBAL-FLAGS] [ENTRY]... [--] [PATH]..."; fn generate_help() -> String { format!( diff --git a/cli/src/args/extract.rs b/cli/src/args/extract.rs index 1a580ad54..8eb46e7fd 100644 --- a/cli/src/args/extract.rs +++ b/cli/src/args/extract.rs @@ -47,20 +47,155 @@ impl PatternSelectorType { } } + const fn help_description(self) -> &'static str { + match self { + Self::Glob => "glob", + Self::Literal => "literal", + Self::Regexp => "regexp", + } + } + + const fn arg_abbreviation(self) -> &'static str { + match self { + Self::Glob => "glob", + Self::Literal => "lit", + Self::Regexp => "rx", + } + } + + fn generate_match_help_text(self) -> String { + format!( + r#"These flags default to interpreting a argument as a {} string to +match against the entire entry name, which can be explicitly requested as +follows: + + --match=path:{} "#, + self.help_description(), + self.arg_abbreviation(), + ) + } + + pub fn generate_match_default_help_text() -> String { + Self::default_for_match().generate_match_help_text() + } +} + +#[derive(Copy, Clone)] +pub enum PatSelContext { + MatchOnly, + MatchAndTransform, +} + +impl PatSelContext { + #[allow(dead_code)] + const fn first_default(self) -> &'static str { + match self { + Self::MatchOnly => "[DEFAULT] ", + Self::MatchAndTransform => "[DEFAULT for matching] ", + } + } + + #[allow(dead_code)] + const fn second_default(self) -> &'static str { + match self { + Self::MatchOnly => "", + Self::MatchAndTransform => "[DEFAULT for replacement] ", + } + } +} + +#[cfg(all(feature = "glob", feature = "rx"))] +impl PatternSelectorType { + pub fn generate_pat_sel_help_section(ctx: PatSelContext) -> String { + format!( + r#"pat-sel = glob {}(interpret as a shell glob) + = lit (interpret as literal string) + = rx {}(interpret as a regular expression) + = + (apply search modifiers from )"#, + ctx.first_default(), + ctx.second_default(), + ) + } +} + +#[cfg(all(feature = "glob", not(feature = "rx")))] +impl PatternSelectorType { + pub fn generate_pat_sel_help_section(ctx: PatSelContext) -> String { + format!( + r#"pat-sel = glob {}(interpret as a shell glob) + = lit {}(interpret as literal string) + = + (apply search modifiers from )"#, + ctx.first_default(), + ctx.second_default(), + ) + } +} + +#[cfg(all(not(feature = "glob"), feature = "rx"))] +impl PatternSelectorType { + pub fn generate_pat_sel_help_section(ctx: PatSelContext) -> String { + format!( + r#"pat-sel = lit {}(interpret as literal string) + = rx {}(interpret as a regular expression) + = + (apply search modifiers from )"#, + ctx.first_default(), + ctx.second_default(), + ) + } +} + +#[cfg(not(any(feature = "glob", feature = "rx")))] +impl PatternSelectorType { + pub fn generate_pat_sel_help_section(_ctx: PatSelContext) -> String { + r#"pat-sel = lit [DEFAULT] (interpret as literal string) + = + (apply search modifiers from )"# + .to_string() + } +} + +#[cfg(feature = "glob")] +impl PatternSelectorType { pub const fn default_for_match() -> Self { - if cfg!(feature = "glob") { - Self::Glob - } else { - Self::Literal + Self::Glob + } + + pub const fn generate_glob_replacement_note(ctx: PatSelContext) -> &'static str { + match ctx { + PatSelContext::MatchOnly => "", + PatSelContext::MatchAndTransform => { + "\n*Note:* glob patterns are not supported for replacement, and attempting to use +them with e.g '--transform:glob' will produce an error.\n" + } } } +} + +#[cfg(not(feature = "glob"))] +impl PatternSelectorType { + pub const fn default_for_match() -> Self { + Self::Literal + } + + pub const fn generate_glob_replacement_note(_ctx: PatSelContext) -> &'static str { + "" + } +} +#[cfg(feature = "rx")] +impl PatternSelectorType { pub const fn default_for_replacement() -> Self { - if cfg!(feature = "rx") { - Self::Regexp - } else { - Self::Literal - } + Self::Regexp + } +} + +#[cfg(not(feature = "rx"))] +impl PatternSelectorType { + pub const fn default_for_replacement() -> Self { + Self::Literal } } @@ -1151,7 +1286,7 @@ These results are dependent on the entry data: ) } - pub fn generate_pattern_selector_help_text(match_only: bool) -> String { + pub fn generate_pattern_selector_help_text(ctx: PatSelContext) -> String { format!( r#" ## Selector syntax: @@ -1160,11 +1295,7 @@ The string matching operations of {} expose an interface to configure various pattern matching techniques on various components of the entry name string. -These flags default to interpreting a argument as a glob string to -match against the entire entry name, which can be explicitly requested as -follows: - - --match=path:glob +{} The entire range of search options is described below: @@ -1175,13 +1306,8 @@ comp-sel = path [DEFAULT] (match full entry) = ext (match only the file extension, if available) ### Pattern selector (pat-sel): -pat-sel = glob [DEFAULT{}] (interpret as a shell glob) - = lit (interpret as literal string) - = rx {}(interpret as a regular expression) - = (apply search modifiers from ) - {} - +{} Also note that glob and regex patterns require building this binary with the "glob" and "rx" cargo features respectively. Specifying ':glob' or ':rx' without the requisite feature support will produce an error. If the requisite feature is @@ -1202,35 +1328,25 @@ contains '^' or '$' as well, no error is produced. *Note:* not all pattern modifiers apply everywhere. In particular, {}':p' and ':s' are incompatible with glob search and will produce an error. "#, - if match_only { - "--match" - } else { - "--match and --transform" - }, - if match_only { "" } else { " for matching" }, - if match_only { - "" - } else { - "[DEFAULT for replacement] " - }, - if match_only { - "" - } else { - "*Note:* glob patterns are not supported for replacement, and attempting to use -them with e.g '--transform:glob' will produce an error." + match ctx { + PatSelContext::MatchOnly => "--match", + PatSelContext::MatchAndTransform => "--match and --transform", }, - if match_only { - "" - } else { - " = :g (use multi-match behavior for string replacements)\n" + PatternSelectorType::generate_match_default_help_text(), + PatternSelectorType::generate_pat_sel_help_section(ctx), + PatternSelectorType::generate_glob_replacement_note(ctx), + match ctx { + PatSelContext::MatchOnly => "", + PatSelContext::MatchAndTransform => + " = :g (use multi-match behavior for string replacements)\n", }, - if match_only { - "" - } else { - "':g' only + match ctx { + PatSelContext::MatchOnly => "", + PatSelContext::MatchAndTransform => + "':g' only applies to string replacement, and using it for a match expression like -'--match:rx:g' will produce an error. Additionally, " - } +'--match:rx:g' will produce an error. Additionally, ", + }, ) } @@ -1435,7 +1551,7 @@ used to filter out such entries. {} {}"#, Self::generate_match_expr_help_text(), - Self::generate_pattern_selector_help_text(false), + Self::generate_pattern_selector_help_text(PatSelContext::MatchAndTransform), Self::INPUT_HELP_TEXT, ) } diff --git a/cli/src/args/info.rs b/cli/src/args/info.rs index f571829c5..4bfd903c3 100644 --- a/cli/src/args/info.rs +++ b/cli/src/args/info.rs @@ -1,5 +1,5 @@ use super::{ - extract::{Extract, InputSpec, MatchExpression}, + extract::{Extract, InputSpec, MatchExpression, PatSelContext}, ArgParseError, CommandFormat, }; @@ -642,7 +642,7 @@ file-type = '' [DEFAULT => full] "#, Extract::COMMAND_NAME, Extract::generate_match_expr_help_text(), - Extract::generate_pattern_selector_help_text(true), + Extract::generate_pattern_selector_help_text(PatSelContext::MatchOnly), Extract::INPUT_HELP_TEXT, ) } From 0d6e4348c099c2d3a0c71bcf484d2646d32b8f4c Mon Sep 17 00:00:00 2001 From: Danny McClanahan <1305167+cosmicexplorer@users.noreply.github.com> Date: Wed, 27 Nov 2024 20:20:53 -0500 Subject: [PATCH 03/31] restructure help output --- cli/src/args/compress.rs | 81 +++++++++++++++++++++++++++++----------- 1 file changed, 59 insertions(+), 22 deletions(-) diff --git a/cli/src/args/compress.rs b/cli/src/args/compress.rs index 15a22f531..3a81ba23b 100644 --- a/cli/src/args/compress.rs +++ b/cli/src/args/compress.rs @@ -79,44 +79,62 @@ impl CommandFormat for Compress { "Generate an archive from data in argument strings or read from the filesystem."; const USAGE_LINE: &'static str = - "[-h|--help] [OUTPUT-FLAGS] [GLOBAL-FLAGS] [ENTRY]... [--] [PATH]..."; + "[-h|--help] [OUTPUT-FLAGS] [GLOBAL-FLAGS] [ATTR|ENTRY-DATA]... [--] [ENTRY-PATH]..."; fn generate_help() -> String { format!( r#" -h, --help Print help -Output flags: -Where and how to write the generated zip archive. +Output flags (OUTPUT-FLAGS): Where and how to write the generated zip archive. + +If not specified, output is written to stdout. + +OUTPUT-FLAGS = [--append] --output-file + = --stdout -o, --output-file Output zip file path to write. + The output file is truncated if it already exists, unless --append is - provided. If not provided, output is written to stdout. + provided. --append If an output path is provided with -o, open it as an existing zip - archive and append to it. If the output path does not already exist, - no error is produced, and a new zip file is created at the given path. + archive and append to it. + + If the output path does not already exist, no error is produced, and + a new zip file is created at the given path. --stdout Allow writing output to stdout even if stdout is a tty. -Global flags: -These flags describe information set for the entire produced archive. +Global flags (GLOBAL-FLAGS): These flags describe information set for the entire produced archive. + +GLOBAL-FLAGS = --archive-comment --archive-comment If provided, this will set the archive's comment field to the specified bytes. This does not need to be valid unicode. -Entries: -After output flags are provided, the rest of the command line is -attributes and entry data. Attributes modify later entries. +Attributes (ATTR): Settings for entry metadata. + +Attributes may be "sticky" or "non-sticky". Sticky attributes apply to +everything that comes after them, while non-sticky attributes only apply to the +next entry after them. + +ATTR = STICKY + = NON-STICKY + +Sticky attributes (STICKY): Generic metadata. -Sticky attributes: These flags apply to everything that comes after them until reset by another -instance of the same attribute. Sticky attributes continue to apply to -positional arguments received after processing all flags. +instance of the same attribute. + +STICKY = --compression-method + = --compression-level + = --mode + = --large-file [true|false] -c, --compression-method Which compression technique to use. @@ -144,28 +162,39 @@ positional arguments received after processing all flags. Therefore, this option likely never has to be set explicitly by the user. -Non-sticky attributes: +Non-sticky attributes (NON-STICKY): Metadata for a single entry. + These flags only apply to the next entry after them, and may not be repeated. +NON-STICKY = --name + = --symlink + -n, --name The name to apply to the entry. This must be UTF-8 encoded. -s, --symlink Make the next entry into a symlink entry. + A symlink entry may be immediate with -i, or it may copy the target from an existing symlink with -f. -Entry data: -Each of these flags creates an entry in the output zip archive. +Entry data (ENTRY-DATA): Create an entry in the output zip archive. + +ENTRY-DATA = --dir + = --immediate + = --file + = --recursive-dir -d, --dir Create a directory entry. A name must be provided beforehand with -n. -i, --immediate - Write an entry containing the data in the argument, which need not be - UTF-8 encoded but will exit early upon encountering any null bytes. - A name must be provided beforehand with -n. + Write an entry containing the data in the argument + + This data need not be UTF-8 encoded, but will exit early upon + encountering any null bytes. A name must be provided beforehand with + -n. -f, --file Write an entry with the contents of this file path. @@ -194,9 +223,17 @@ Each of these flags creates an entry in the output zip archive. corresponding to the symlink path (unless overridden with -n). Providing a symlink path which points to a file will produce an error. -Positional entries: - [PATH]... +Positional entries (ENTRY-PATH): Paths which are converted into entries. + +Any sticky attributes will continue to apply to entries specified via path, +while any non-sticky attributes not matched to an explicit ENTRY-DATA will produce +an error. + +ENTRY-PATH = + + Write the file or recursive directory contents, relativizing the path. + If the given path points to a file, then a single file entry will be written. If the given path is a symlink, then a single symlink entry will From 6454f7a7f5a9c95c9e858828c1377415bdc7eb42 Mon Sep 17 00:00:00 2001 From: Danny McClanahan <1305167+cosmicexplorer@users.noreply.github.com> Date: Wed, 27 Nov 2024 21:04:02 -0500 Subject: [PATCH 04/31] add print module --- cli/src/args/compress.rs | 3 ++- cli/src/lib.rs | 1 + 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/cli/src/args/compress.rs b/cli/src/args/compress.rs index 3a81ba23b..a8734dac5 100644 --- a/cli/src/args/compress.rs +++ b/cli/src/args/compress.rs @@ -71,7 +71,8 @@ impl Compress { const ZSTD_HELP_LINE: &'static str = ""; } -/* TODO: add support for entry and file comments! */ +/* TODO: add support for entry comments! */ +/* TODO: add support for merging/transforming other zips!! */ impl CommandFormat for Compress { const COMMAND_NAME: &'static str = "compress"; const COMMAND_TABS: &'static str = "\t"; diff --git a/cli/src/lib.rs b/cli/src/lib.rs index 24db1aaae..90c4053d5 100644 --- a/cli/src/lib.rs +++ b/cli/src/lib.rs @@ -8,6 +8,7 @@ pub mod args; pub mod compress; pub mod extract; pub mod info; +pub mod print; pub enum ErrHandle { Output(W), From 85e12e099ab4701e2e6040b5c1d5f621837cfe34 Mon Sep 17 00:00:00 2001 From: Danny McClanahan <1305167+cosmicexplorer@users.noreply.github.com> Date: Wed, 27 Nov 2024 21:27:20 -0500 Subject: [PATCH 05/31] save progress --- cli/src/args/compress.rs | 4 +-- cli/src/lib.rs | 1 + cli/src/print.rs | 59 ++++++++++++++++++++++++++++++++++++++++ cli/src/schema.rs | 4 +++ 4 files changed, 66 insertions(+), 2 deletions(-) create mode 100644 cli/src/print.rs create mode 100644 cli/src/schema.rs diff --git a/cli/src/args/compress.rs b/cli/src/args/compress.rs index a8734dac5..25bc974a0 100644 --- a/cli/src/args/compress.rs +++ b/cli/src/args/compress.rs @@ -135,7 +135,7 @@ instance of the same attribute. STICKY = --compression-method = --compression-level = --mode - = --large-file [true|false] + = --large-file # [true|false] -c, --compression-method Which compression technique to use. @@ -152,7 +152,7 @@ STICKY = --compression-method -m, --mode Unix permissions to apply to the file, in octal (like chmod). - --large-file [true|false] + --large-file # [true|false] Whether to enable large file support. This may take up more space for records, but allows files over 32 bits in length to be written, up to 64 bit sizes. diff --git a/cli/src/lib.rs b/cli/src/lib.rs index 90c4053d5..22b16c75e 100644 --- a/cli/src/lib.rs +++ b/cli/src/lib.rs @@ -9,6 +9,7 @@ pub mod compress; pub mod extract; pub mod info; pub mod print; +pub mod schema; pub enum ErrHandle { Output(W), diff --git a/cli/src/print.rs b/cli/src/print.rs new file mode 100644 index 000000000..83e0c5227 --- /dev/null +++ b/cli/src/print.rs @@ -0,0 +1,59 @@ +pub mod printer { + #[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)] + pub struct PrintOptions { + pub wrap_width: usize, + } + + impl Default for PrintOptions { + fn default() -> Self { + Self { wrap_width: 80 } + } + } + + #[derive(Debug, Clone)] + struct PrintContext { + prefix: String, + } + + impl PrintContext { + pub const fn new() -> Self { + Self { + prefix: String::new(), + } + } + } + + pub struct Printer { + opts: PrintOptions, + ctx: PrintContext, + } + + impl Printer { + pub const fn create(opts: PrintOptions) -> Self { + Self { + opts, + ctx: PrintContext::new(), + } + } + } +} + +pub trait HelpSection {} + +pub enum HelpVerbosity { + NameOnly, + NameAndDescription, + CompleteWithCaveats, +} + +pub struct FlagsSection {} + +pub enum FlagKind { + Boolean, + Choice(Vec), +} + +pub struct Flag { + pub short: Option, + pub long: &'static str, +} diff --git a/cli/src/schema.rs b/cli/src/schema.rs new file mode 100644 index 000000000..e55530d36 --- /dev/null +++ b/cli/src/schema.rs @@ -0,0 +1,4 @@ +pub trait Schema { + /* parse_argv()? */ + /* json()? */ +} From 248f14dc255016138b97f5602d116f2a507b0d6b Mon Sep 17 00:00:00 2001 From: Danny McClanahan <1305167+cosmicexplorer@users.noreply.github.com> Date: Wed, 27 Nov 2024 23:16:29 -0500 Subject: [PATCH 06/31] lots of scheming --- cli/src/args/compress.rs | 2 ++ cli/src/print.rs | 14 ++------ cli/src/schema.rs | 76 ++++++++++++++++++++++++++++++++++++++++ 3 files changed, 80 insertions(+), 12 deletions(-) diff --git a/cli/src/args/compress.rs b/cli/src/args/compress.rs index 25bc974a0..4dd3dba95 100644 --- a/cli/src/args/compress.rs +++ b/cli/src/args/compress.rs @@ -147,7 +147,9 @@ STICKY = --compression-method {}{}{} -l, --compression-level How much compression to perform, from 0..=24. + The accepted range of values differs for each technique. + TODO: how much??? -m, --mode Unix permissions to apply to the file, in octal (like chmod). diff --git a/cli/src/print.rs b/cli/src/print.rs index 83e0c5227..404dfe574 100644 --- a/cli/src/print.rs +++ b/cli/src/print.rs @@ -13,12 +13,14 @@ pub mod printer { #[derive(Debug, Clone)] struct PrintContext { prefix: String, + value_column: Option, } impl PrintContext { pub const fn new() -> Self { Self { prefix: String::new(), + value_column: None, } } } @@ -45,15 +47,3 @@ pub enum HelpVerbosity { NameAndDescription, CompleteWithCaveats, } - -pub struct FlagsSection {} - -pub enum FlagKind { - Boolean, - Choice(Vec), -} - -pub struct Flag { - pub short: Option, - pub long: &'static str, -} diff --git a/cli/src/schema.rs b/cli/src/schema.rs index e55530d36..c1a6ba998 100644 --- a/cli/src/schema.rs +++ b/cli/src/schema.rs @@ -2,3 +2,79 @@ pub trait Schema { /* parse_argv()? */ /* json()? */ } + +#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub struct SectionName(String); + +impl SectionName { + pub fn create(name: impl Into) -> Self { + let name: String = name.into(); + assert!(!name.is_empty()); + assert!(name.chars().all(|c| c.is_ascii_uppercase())); + Self(name) + } +} + +#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub struct MetaVarName(String); + +impl MetaVarName { + pub fn create(name: impl Into) -> Self { + let name: String = name.into(); + assert!(!name.is_empty()); + assert!(name.chars().all(|c| c.is_ascii_lowercase() || c == '-')); + Self(name) + } +} + +pub trait MetaVar { + fn choices(&self) -> Option>; +} + +pub enum FormatCaseElement { + FormatRef(MetaVarName), + Literal(String), +} + +pub struct FormatCase { + pub elements: Vec, + pub description: Option, +} + +pub enum MetaVarKind { + /* e.g. */ + NameOnly(String), + Format { cases: Vec }, +} + +pub struct MetaVarDecl { + pub id: MetaVarName, + pub spec: MetaVarKind, +} + +pub struct FlagSuffixCase { + pub prefix_marker: &'static str, + pub format: MetaVarName, +} + +pub struct Flag { + pub short: Option, + pub long: String, + pub suffix_cases: Vec, + pub value: Option, +} + +pub enum FlagCaseElement { + SectionRef(SectionName), + Literal(Flag), + Optional(Box), +} + +pub struct FlagCase { + pub elements: Vec, +} + +pub struct FlagsSectionDecl { + pub id: SectionName, + pub cases: Vec, +} From 8e612b577aadd8433f380c0415df1b4af678718c Mon Sep 17 00:00:00 2001 From: Danny McClanahan <1305167+cosmicexplorer@users.noreply.github.com> Date: Thu, 28 Nov 2024 00:59:33 -0500 Subject: [PATCH 07/31] create EntryData --- cli/src/compress.rs | 173 +++++++++++++++++++++++++++++++++++++++++++- cli/src/schema.rs | 10 ++- 2 files changed, 179 insertions(+), 4 deletions(-) diff --git a/cli/src/compress.rs b/cli/src/compress.rs index e35058273..007e1e613 100644 --- a/cli/src/compress.rs +++ b/cli/src/compress.rs @@ -1,8 +1,9 @@ use std::{ + ffi::OsString, fs, io::{self, Cursor, IsTerminal, Seek, Write}, mem, - path::Path, + path::{Path, PathBuf}, }; use zip::{ @@ -13,6 +14,176 @@ use zip::{ use crate::{args::compress::*, CommandError, OutputHandle, WrapCommandErr}; +pub enum EntryData { + Dir { + name: String, + }, + Immediate { + name: String, + data: OsString, + symlink_flag: bool, + }, + File { + name: Option, + path: PathBuf, + symlink_flag: bool, + }, + RecDir { + name: Option, + path: PathBuf, + }, +} + +impl EntryData { + pub fn interpret_entry_path(path: PathBuf) -> Result { + let file_type = fs::symlink_metadata(&path) + .wrap_err_with(|| format!("failed to read metadata from path {}", path.display()))? + .file_type(); + Ok(if file_type.is_dir() { + Self::RecDir { name: None, path } + } else { + Self::File { + name: None, + path, + symlink_flag: file_type.is_symlink(), + } + }) + } + + pub fn create_entry( + self, + writer: &mut ZipWriter, + options: SimpleFileOptions, + mut err: impl Write, + ) -> Result<(), CommandError> { + match self { + Self::Dir { name } => writer + .add_directory(&name, options) + .wrap_err_with(|| format!("failed to create dir entry {name}")), + Self::Immediate { + name, + data, + symlink_flag, + } => { + if data.len() > ZIP64_BYTES_THR.try_into().unwrap() { + return Err(CommandError::InvalidArg(format!( + "length of immediate data argument is {}; use a file for inputs over {} bytes", + data.len(), + ZIP64_BYTES_THR + ))); + }; + if symlink_flag { + /* This is a symlink entry. */ + let target = data.into_string().map_err(|target| { + CommandError::InvalidArg(format!( + "failed to decode immediate symlink target {target:?}" + )) + })?; + writeln!( + err, + "writing immediate symlink entry with name {name:?} and target {target:?}" + ) + .unwrap(); + /* TODO: .add_symlink() should support OsString targets! */ + writer + .add_symlink(&name, &target, options) + .wrap_err_with(|| { + format!("failed to created symlink entry {name}->{target}") + }) + } else { + /* This is a file entry. */ + writeln!( + err, + "writing immediate file entry with name {name:?} and data {data:?}" + ) + .unwrap(); + let data = data.into_encoded_bytes(); + writer + .start_file(&name, options) + .wrap_err_with(|| format!("failed to create file entry {name}"))?; + writer.write_all(data.as_ref()).wrap_err_with(|| { + format!( + "failed writing immediate data of length {} to file entry {name}", + data.len() + ) + }) + } + } + Self::File { + name, + path, + symlink_flag, + } => { + let name = name.unwrap_or_else(|| path_to_string(&path).into()); + if symlink_flag { + /* This is a symlink entry. */ + let target: String = + path_to_string(fs::read_link(&path).wrap_err_with(|| { + format!("failed to read symlink from path {}", path.display()) + })?) + .into(); + /* Similarly to immediate data arguments, we're simply not going to support + * symlinks over this length, which should be impossible anyway. */ + if target.len() > ZIP64_BYTES_THR.try_into().unwrap() { + return Err(CommandError::InvalidArg(format!( + "symlink target for {name} is over {ZIP64_BYTES_THR} bytes (was: {})", + target.len() + ))); + } + writeln!(err, "writing symlink entry from path {path:?} with name {name:?} and target {target:?}").unwrap(); + writer + .add_symlink(&name, &target, options) + .wrap_err_with(|| { + format!("failed to create symlink entry for {name}->{target}") + }) + } else { + /* This is a file entry. */ + writeln!( + err, + "writing file entry from path {path:?} with name {name:?}" + ) + .unwrap(); + let mut f = fs::File::open(&path).wrap_err_with(|| { + format!("error opening file for {name} at {}", path.display()) + })?; + /* Get the length of the file before reading it and set large_file if needed. */ + let input_len: u64 = f + .metadata() + .wrap_err_with(|| format!("error reading file metadata for {f:?}"))? + .len(); + writeln!(err, "entry is {input_len} bytes long").unwrap(); + let maybe_large_file_options = if input_len > ZIP64_BYTES_THR { + writeln!( + err, + "temporarily ensuring .large_file(true) for current entry" + ) + .unwrap(); + options.large_file(true) + } else { + options + }; + writer + .start_file(&name, maybe_large_file_options) + .wrap_err_with(|| format!("error creating file entry for {name}"))?; + io::copy(&mut f, writer) + .wrap_err_with(|| { + format!("error copying content for {name} from file {f:?}") + }) + .map(|_| ()) + } + } + Self::RecDir { name, path } => { + writeln!( + err, + "writing recursive dir entries for path {path:?} with name {name:?}" + ) + .unwrap(); + enter_recursive_dir_entries(&mut err, name, &path, writer, options) + } + } + } +} + fn enter_recursive_dir_entries( err: &mut impl Write, base_rename: Option, diff --git a/cli/src/schema.rs b/cli/src/schema.rs index c1a6ba998..6344efde3 100644 --- a/cli/src/schema.rs +++ b/cli/src/schema.rs @@ -1,6 +1,10 @@ -pub trait Schema { - /* parse_argv()? */ - /* json()? */ +use std::{collections::VecDeque, ffi::OsString, fmt}; + +pub trait Schema: Sized + fmt::Debug { + type E; + fn parse_argv(argv: &mut VecDeque) -> Result; + type J: From; + fn from_json() -> Result; } #[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)] From 0b102a3abc0777955fe7fe0843266b3f5c7cccb1 Mon Sep 17 00:00:00 2001 From: Danny McClanahan <1305167+cosmicexplorer@users.noreply.github.com> Date: Thu, 28 Nov 2024 02:24:43 -0500 Subject: [PATCH 08/31] use EntryData to impl compress command --- cli/src/compress.rs | 465 +++++++++++++++++++------------------------- 1 file changed, 200 insertions(+), 265 deletions(-) diff --git a/cli/src/compress.rs b/cli/src/compress.rs index 007e1e613..f0474513a 100644 --- a/cli/src/compress.rs +++ b/cli/src/compress.rs @@ -14,6 +14,7 @@ use zip::{ use crate::{args::compress::*, CommandError, OutputHandle, WrapCommandErr}; +#[derive(Debug, Clone)] pub enum EntryData { Dir { name: String, @@ -184,6 +185,203 @@ impl EntryData { } } +#[derive(Debug, Clone)] +pub enum ModificationOperation { + CreateEntry { + options: SimpleFileOptions, + spec: EntryData, + }, +} + +impl ModificationOperation { + pub fn invoke( + self, + writer: &mut ZipWriter, + err: impl Write, + ) -> Result<(), CommandError> { + match self { + Self::CreateEntry { options, spec } => spec.create_entry(writer, options, err), + } + } +} + +#[derive(Debug, Default, Clone)] +pub struct ModificationSequence { + pub operations: Vec, +} + +impl ModificationSequence { + fn initial_options() -> SimpleFileOptions { + SimpleFileOptions::default() + .compression_method(CompressionMethod::Deflated) + .large_file(false) + } + + pub fn from_args( + args: Vec, + positional_paths: Vec, + mut err: impl Write, + ) -> Result { + let mut operations: Vec = Vec::new(); + + let mut options = Self::initial_options(); + + let mut last_name: Option = None; + let mut symlink_flag: bool = false; + + for arg in args.into_iter() { + match arg { + /* attributes: */ + CompressionArg::CompressionMethod(method) => { + let method = match method { + CompressionMethodArg::Stored => CompressionMethod::Stored, + CompressionMethodArg::Deflate => CompressionMethod::Deflated, + #[cfg(feature = "deflate64")] + CompressionMethodArg::Deflate64 => CompressionMethod::Deflate64, + #[cfg(feature = "bzip2")] + CompressionMethodArg::Bzip2 => CompressionMethod::Bzip2, + #[cfg(feature = "zstd")] + CompressionMethodArg::Zstd => CompressionMethod::Zstd, + }; + writeln!(err, "setting compression method {method:?}").unwrap(); + options = options.compression_method(method); + } + CompressionArg::Level(CompressionLevel(level)) => { + writeln!(err, "setting compression level {level:?}").unwrap(); + options = options.compression_level(Some(level)); + } + CompressionArg::UnixPermissions(UnixPermissions(mode)) => { + writeln!(err, "setting file mode {mode:#o}").unwrap(); + options = options.unix_permissions(mode); + } + CompressionArg::LargeFile(large_file) => { + writeln!(err, "setting large file flag to {large_file:?}").unwrap(); + options = options.large_file(large_file); + } + CompressionArg::Name(name) => { + writeln!(err, "setting name of next entry to {name:?}").unwrap(); + if let Some(last_name) = last_name { + return Err(CommandError::InvalidArg(format!( + "got two names before an entry: {last_name} and {name}" + ))); + } + last_name = Some(name); + } + CompressionArg::Symlink => { + writeln!(err, "setting symlink flag for next entry").unwrap(); + if symlink_flag { + /* TODO: make this a warning? */ + return Err(CommandError::InvalidArg( + "symlink flag provided twice before entry".to_string(), + )); + } + symlink_flag = true; + } + + /* new operations: */ + CompressionArg::Dir => { + let last_name = last_name.take(); + let symlink_flag = mem::replace(&mut symlink_flag, false); + + writeln!(err, "writing dir entry").unwrap(); + if symlink_flag { + return Err(CommandError::InvalidArg( + "symlink flag provided before dir entry".to_string(), + )); + } + let name = last_name.ok_or_else(|| { + CommandError::InvalidArg("no name provided before dir entry".to_string()) + })?; + operations.push(ModificationOperation::CreateEntry { + options, + spec: EntryData::Dir { name }, + }); + } + CompressionArg::Immediate(data) => { + let last_name = last_name.take(); + let symlink_flag = mem::replace(&mut symlink_flag, false); + + let name = last_name.ok_or_else(|| { + CommandError::InvalidArg(format!( + "no name provided for immediate data {data:?}" + )) + })?; + operations.push(ModificationOperation::CreateEntry { + options, + spec: EntryData::Immediate { + name, + data, + symlink_flag, + }, + }); + } + CompressionArg::FilePath(path) => { + let last_name = last_name.take(); + let symlink_flag = mem::replace(&mut symlink_flag, false); + + let name = last_name.unwrap_or_else(|| path_to_string(&path).into()); + operations.push(ModificationOperation::CreateEntry { + options, + spec: EntryData::File { + name: Some(name), + path, + symlink_flag, + }, + }); + } + CompressionArg::RecursiveDirPath(path) => { + let last_name = last_name.take(); + let symlink_flag = mem::replace(&mut symlink_flag, false); + + if symlink_flag { + return Err(CommandError::InvalidArg( + "symlink flag provided before recursive dir entry".to_string(), + )); + } + + operations.push(ModificationOperation::CreateEntry { + options, + spec: EntryData::RecDir { + name: last_name, + path, + }, + }); + } + } + } + if symlink_flag { + return Err(CommandError::InvalidArg( + "symlink flag remaining after all entry flags processed".to_string(), + )); + } + if let Some(last_name) = last_name { + return Err(CommandError::InvalidArg(format!( + "name {last_name} remaining after all entry flags processed" + ))); + } + + for p in positional_paths.into_iter() { + operations.push(ModificationOperation::CreateEntry { + options, + spec: EntryData::interpret_entry_path(p)?, + }); + } + Ok(Self { operations }) + } + + pub fn invoke( + self, + writer: &mut ZipWriter, + mut err: impl Write, + ) -> Result<(), CommandError> { + let Self { operations } = self; + for op in operations.into_iter() { + op.invoke(writer, &mut err)?; + } + Ok(()) + } +} + fn enter_recursive_dir_entries( err: &mut impl Write, base_rename: Option, @@ -381,271 +579,8 @@ pub fn execute_compress(mut err: impl Write, args: Compress) -> Result<(), Comma writer.set_raw_comment(comment.into()); } - let mut options = SimpleFileOptions::default() - .compression_method(CompressionMethod::Deflated) - .large_file(false); - writeln!(err, "default zip entry options: {options:?}").unwrap(); - let mut last_name: Option = None; - let mut symlink_flag: bool = false; - - for arg in args.into_iter() { - match arg { - CompressionArg::CompressionMethod(method) => { - let method = match method { - CompressionMethodArg::Stored => CompressionMethod::Stored, - CompressionMethodArg::Deflate => CompressionMethod::Deflated, - #[cfg(feature = "deflate64")] - CompressionMethodArg::Deflate64 => CompressionMethod::Deflate64, - #[cfg(feature = "bzip2")] - CompressionMethodArg::Bzip2 => CompressionMethod::Bzip2, - #[cfg(feature = "zstd")] - CompressionMethodArg::Zstd => CompressionMethod::Zstd, - }; - writeln!(err, "setting compression method {method:?}").unwrap(); - options = options.compression_method(method); - } - CompressionArg::Level(CompressionLevel(level)) => { - writeln!(err, "setting compression level {level:?}").unwrap(); - options = options.compression_level(Some(level)); - } - CompressionArg::UnixPermissions(UnixPermissions(mode)) => { - writeln!(err, "setting file mode {mode:#o}").unwrap(); - options = options.unix_permissions(mode); - } - CompressionArg::LargeFile(large_file) => { - writeln!(err, "setting large file flag to {large_file:?}").unwrap(); - options = options.large_file(large_file); - } - CompressionArg::Name(name) => { - writeln!(err, "setting name of next entry to {name:?}").unwrap(); - if let Some(last_name) = last_name { - return Err(CommandError::InvalidArg(format!( - "got two names before an entry: {last_name} and {name}" - ))); - } - last_name = Some(name); - } - CompressionArg::Dir => { - writeln!(err, "writing dir entry").unwrap(); - if symlink_flag { - return Err(CommandError::InvalidArg( - "symlink flag provided before dir entry".to_string(), - )); - } - let dirname = last_name.take().ok_or_else(|| { - CommandError::InvalidArg("no name provided before dir entry".to_string()) - })?; - writer - .add_directory(&dirname, options) - .wrap_err_with(|| format!("failed to create dir entry {dirname}"))?; - } - CompressionArg::Symlink => { - writeln!(err, "setting symlink flag for next entry").unwrap(); - if symlink_flag { - /* TODO: make this a warning? */ - return Err(CommandError::InvalidArg( - "symlink flag provided twice before entry".to_string(), - )); - } - symlink_flag = true; - } - CompressionArg::Immediate(data) => { - let name = last_name.take().ok_or_else(|| { - CommandError::InvalidArg(format!( - "no name provided for immediate data {data:?}" - )) - })?; - /* It's highly unlikely any OS allows process args of this length, so even though - * we're using rust's env::args_os() and it would be very impressive for an attacker - * to get CLI args to overflow, it seems likely to be inefficient in any case, and - * very unlikely to be useful, so exit with a clear error. */ - if data.len() > ZIP64_BYTES_THR.try_into().unwrap() { - return Err(CommandError::InvalidArg(format!( - "length of immediate data argument is {}; use a file for inputs over {} bytes", - data.len(), - ZIP64_BYTES_THR - ))); - }; - if symlink_flag { - /* This is a symlink entry. */ - let target = data.into_string().map_err(|target| { - CommandError::InvalidArg(format!( - "failed to decode immediate symlink target {target:?}" - )) - })?; - writeln!( - err, - "writing immediate symlink entry with name {name:?} and target {target:?}" - ) - .unwrap(); - /* TODO: .add_symlink() should support OsString targets! */ - writer - .add_symlink(&name, &target, options) - .wrap_err_with(|| { - format!("failed to created symlink entry {name}->{target}") - })?; - symlink_flag = false; - } else { - /* This is a file entry. */ - writeln!( - err, - "writing immediate file entry with name {name:?} and data {data:?}" - ) - .unwrap(); - let data = data.into_encoded_bytes(); - writer - .start_file(&name, options) - .wrap_err_with(|| format!("failed to create file entry {name}"))?; - writer.write_all(data.as_ref()).wrap_err_with(|| { - format!( - "failed writing immediate data of length {} to file entry {name}", - data.len() - ) - })?; - } - } - CompressionArg::FilePath(path) => { - let name = last_name - .take() - .unwrap_or_else(|| path_to_string(&path).into()); - if symlink_flag { - /* This is a symlink entry. */ - let target: String = - path_to_string(fs::read_link(&path).wrap_err_with(|| { - format!("failed to read symlink from path {}", path.display()) - })?) - .into(); - /* Similarly to immediate data arguments, we're simply not going to support - * symlinks over this length, which should be impossible anyway. */ - if target.len() > ZIP64_BYTES_THR.try_into().unwrap() { - return Err(CommandError::InvalidArg(format!( - "symlink target for {name} is over {ZIP64_BYTES_THR} bytes (was: {})", - target.len() - ))); - } - writeln!(err, "writing symlink entry from path {path:?} with name {name:?} and target {target:?}").unwrap(); - writer - .add_symlink(&name, &target, options) - .wrap_err_with(|| { - format!("failed to create symlink entry for {name}->{target}") - })?; - symlink_flag = false; - } else { - /* This is a file entry. */ - writeln!( - err, - "writing file entry from path {path:?} with name {name:?}" - ) - .unwrap(); - let mut f = fs::File::open(&path).wrap_err_with(|| { - format!("error opening file for {name} at {}", path.display()) - })?; - /* Get the length of the file before reading it and set large_file if needed. */ - let input_len: u64 = f - .metadata() - .wrap_err_with(|| format!("error reading file metadata for {f:?}"))? - .len(); - writeln!(err, "entry is {input_len} bytes long").unwrap(); - let maybe_large_file_options = if input_len > ZIP64_BYTES_THR { - writeln!( - err, - "temporarily ensuring .large_file(true) for current entry" - ) - .unwrap(); - options.large_file(true) - } else { - options - }; - writer - .start_file(&name, maybe_large_file_options) - .wrap_err_with(|| format!("error creating file entry for {name}"))?; - io::copy(&mut f, &mut writer).wrap_err_with(|| { - format!("error copying content for {name} from file {f:?}") - })?; - } - } - CompressionArg::RecursiveDirPath(r) => { - if symlink_flag { - return Err(CommandError::InvalidArg( - "symlink flag provided before recursive dir entry".to_string(), - )); - } - writeln!( - err, - "writing recursive dir entries for path {r:?} with name {last_name:?}" - ) - .unwrap(); - enter_recursive_dir_entries(&mut err, last_name.take(), &r, &mut writer, options)?; - } - } - } - if symlink_flag { - return Err(CommandError::InvalidArg( - "symlink flag remaining after all entry flags processed".to_string(), - )); - } - if let Some(last_name) = last_name { - return Err(CommandError::InvalidArg(format!( - "name {last_name} remaining after all entry flags processed" - ))); - } - - for pos_arg in positional_paths.into_iter() { - let file_type = fs::symlink_metadata(&pos_arg) - .wrap_err_with(|| format!("failed to read metadata from path {}", pos_arg.display()))? - .file_type(); - if file_type.is_symlink() { - let target = fs::read_link(&pos_arg).wrap_err_with(|| { - format!("failed to read symlink content from {}", pos_arg.display()) - })?; - writeln!( - err, - "writing positional symlink entry with path {pos_arg:?} and target {target:?}" - ) - .unwrap(); - writer - .add_symlink_from_path(&pos_arg, &target, options) - .wrap_err_with(|| { - format!( - "failed to create symlink entry for {}->{}", - pos_arg.display(), - target.display() - ) - })?; - } else if file_type.is_file() { - writeln!(err, "writing positional file entry with path {pos_arg:?}").unwrap(); - let mut f = fs::File::open(&pos_arg) - .wrap_err_with(|| format!("failed to open file at {}", pos_arg.display()))?; - /* Get the length of the file before reading it and set large_file if needed. */ - let input_len: u64 = f - .metadata() - .wrap_err_with(|| format!("error reading file metadata for {f:?}"))? - .len(); - let maybe_large_file_options = if input_len > ZIP64_BYTES_THR { - writeln!( - err, - "temporarily ensuring .large_file(true) for current entry" - ) - .unwrap(); - options.large_file(true) - } else { - options - }; - writer - .start_file_from_path(&pos_arg, maybe_large_file_options) - .wrap_err_with(|| format!("failed to create file entry {}", pos_arg.display()))?; - io::copy(&mut f, &mut writer) - .wrap_err_with(|| format!("failed to copy file contents from {f:?}"))?; - } else { - assert!(file_type.is_dir()); - writeln!( - err, - "writing positional recursive dir entry for {pos_arg:?}" - ) - .unwrap(); - enter_recursive_dir_entries(&mut err, None, &pos_arg, &mut writer, options)?; - } - } + let mod_seq = ModificationSequence::from_args(args, positional_paths, &mut err)?; + mod_seq.invoke(&mut writer, &mut err)?; let handle = writer .finish() From 122224b798ceda983ade2e8d2836690d88b30d28 Mon Sep 17 00:00:00 2001 From: Danny McClanahan <1305167+cosmicexplorer@users.noreply.github.com> Date: Thu, 28 Nov 2024 02:52:01 -0500 Subject: [PATCH 09/31] remove unused schema trait --- cli/src/schema.rs | 9 --------- 1 file changed, 9 deletions(-) diff --git a/cli/src/schema.rs b/cli/src/schema.rs index 6344efde3..d6241a404 100644 --- a/cli/src/schema.rs +++ b/cli/src/schema.rs @@ -1,12 +1,3 @@ -use std::{collections::VecDeque, ffi::OsString, fmt}; - -pub trait Schema: Sized + fmt::Debug { - type E; - fn parse_argv(argv: &mut VecDeque) -> Result; - type J: From; - fn from_json() -> Result; -} - #[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)] pub struct SectionName(String); From 2eda9b2874495ff83d4f00570104d9d62ea422e1 Mon Sep 17 00:00:00 2001 From: Danny McClanahan <1305167+cosmicexplorer@users.noreply.github.com> Date: Thu, 28 Nov 2024 03:24:34 -0500 Subject: [PATCH 10/31] add json conditional dependency --- cli/Cargo.toml | 3 +++ cli/clite/Cargo.toml | 13 ++++++++++--- 2 files changed, 13 insertions(+), 3 deletions(-) diff --git a/cli/Cargo.toml b/cli/Cargo.toml index 135270248..e6cd1398b 100644 --- a/cli/Cargo.toml +++ b/cli/Cargo.toml @@ -26,6 +26,7 @@ name = "zip-cli" [dependencies] glob = { version = "0.3", optional = true } regex = { version = "1", optional = true } +json = { version = "0.12", optional = true } [dependencies.zip] path = ".." @@ -48,6 +49,7 @@ zstd = ["zip/zstd"] glob = ["dep:glob"] rx = ["dep:regex"] +json = ["dep:json"] default = [ "aes-crypto", @@ -60,6 +62,7 @@ default = [ "zstd", "glob", "rx", + "json", ] diff --git a/cli/clite/Cargo.toml b/cli/clite/Cargo.toml index 607bf3314..475b3b483 100644 --- a/cli/clite/Cargo.toml +++ b/cli/clite/Cargo.toml @@ -21,12 +21,19 @@ members = ["."] [[bin]] name = "zip-clite" -[dependencies] - [dependencies.zip-cli] path = ".." default-features = false -features = ["deflate-flate2", "deflate-zlib"] +features = [ + "deflate-flate2", + "deflate-zlib", +] + +[features] +# These are all pure rust crates with no significant dependency tree. +rust-deps = ["zip-cli/glob", "zip-cli/rx", "zip-cli/json"] + +default = [] [profile.release] strip = true From 44a8444a6ced1fcb15a42be21bf480853a0b580b Mon Sep 17 00:00:00 2001 From: Danny McClanahan <1305167+cosmicexplorer@users.noreply.github.com> Date: Thu, 28 Nov 2024 07:22:03 -0500 Subject: [PATCH 11/31] save work on backends and such --- cli/src/args/compress.rs | 3 + cli/src/compress.rs | 16 +-- cli/src/extract.rs | 2 +- cli/src/schema.rs | 203 ++++++++++++++++++++++++++++----------- 4 files changed, 161 insertions(+), 63 deletions(-) diff --git a/cli/src/args/compress.rs b/cli/src/args/compress.rs index 4dd3dba95..6b823c79d 100644 --- a/cli/src/args/compress.rs +++ b/cli/src/args/compress.rs @@ -46,6 +46,9 @@ pub enum OutputType { File { path: PathBuf, append: bool }, } +/* #[cfg(feature = "json")] */ +/* impl OutputType {} */ + #[derive(Debug)] pub struct Compress { pub output: OutputType, diff --git a/cli/src/compress.rs b/cli/src/compress.rs index f0474513a..0bbef6c8f 100644 --- a/cli/src/compress.rs +++ b/cli/src/compress.rs @@ -15,7 +15,7 @@ use zip::{ use crate::{args::compress::*, CommandError, OutputHandle, WrapCommandErr}; #[derive(Debug, Clone)] -pub enum EntryData { +enum EntrySpec { Dir { name: String, }, @@ -35,7 +35,7 @@ pub enum EntryData { }, } -impl EntryData { +impl EntrySpec { pub fn interpret_entry_path(path: PathBuf) -> Result { let file_type = fs::symlink_metadata(&path) .wrap_err_with(|| format!("failed to read metadata from path {}", path.display()))? @@ -189,7 +189,7 @@ impl EntryData { pub enum ModificationOperation { CreateEntry { options: SimpleFileOptions, - spec: EntryData, + spec: EntrySpec, }, } @@ -294,7 +294,7 @@ impl ModificationSequence { })?; operations.push(ModificationOperation::CreateEntry { options, - spec: EntryData::Dir { name }, + spec: EntrySpec::Dir { name }, }); } CompressionArg::Immediate(data) => { @@ -308,7 +308,7 @@ impl ModificationSequence { })?; operations.push(ModificationOperation::CreateEntry { options, - spec: EntryData::Immediate { + spec: EntrySpec::Immediate { name, data, symlink_flag, @@ -322,7 +322,7 @@ impl ModificationSequence { let name = last_name.unwrap_or_else(|| path_to_string(&path).into()); operations.push(ModificationOperation::CreateEntry { options, - spec: EntryData::File { + spec: EntrySpec::File { name: Some(name), path, symlink_flag, @@ -341,7 +341,7 @@ impl ModificationSequence { operations.push(ModificationOperation::CreateEntry { options, - spec: EntryData::RecDir { + spec: EntrySpec::RecDir { name: last_name, path, }, @@ -363,7 +363,7 @@ impl ModificationSequence { for p in positional_paths.into_iter() { operations.push(ModificationOperation::CreateEntry { options, - spec: EntryData::interpret_entry_path(p)?, + spec: EntrySpec::interpret_entry_path(p)?, }); } Ok(Self { operations }) diff --git a/cli/src/extract.rs b/cli/src/extract.rs index f5aaa28c7..9ae58fb58 100644 --- a/cli/src/extract.rs +++ b/cli/src/extract.rs @@ -49,7 +49,7 @@ fn maybe_process_symlink<'a, 't>( entry .read_to_end(symlink_target) .wrap_err("failed to read symlink target from zip archive entry")?; - debug_assert_eq!(symlink_target.len(), size.try_into().unwrap()); + debug_assert_eq!(u64::try_from(symlink_target.len()).unwrap(), size); Ok(Some(symlink_target)) } diff --git a/cli/src/schema.rs b/cli/src/schema.rs index d6241a404..a839097ce 100644 --- a/cli/src/schema.rs +++ b/cli/src/schema.rs @@ -1,75 +1,170 @@ -#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)] -pub struct SectionName(String); - -impl SectionName { - pub fn create(name: impl Into) -> Self { - let name: String = name.into(); - assert!(!name.is_empty()); - assert!(name.chars().all(|c| c.is_ascii_uppercase())); - Self(name) - } +use std::{error, ffi, fmt, marker::PhantomData, str}; + +pub trait Schema {} + +/* pub enum SchemaValue { */ +/* Bool(bool), */ +/* Path(PathBuf), */ +/* } */ + +/* pub trait SchemaValue {} */ + +/* impl SchemaValue for bool {} */ + +pub trait Backend { + type Input<'a>; + type Value; + type Error; + fn parse<'a>(s: Self::Input<'a>) -> Result; } -#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)] -pub struct MetaVarName(String); +pub struct StrDecoderTransformer(PhantomData); -impl MetaVarName { - pub fn create(name: impl Into) -> Self { - let name: String = name.into(); - assert!(!name.is_empty()); - assert!(name.chars().all(|c| c.is_ascii_lowercase() || c == '-')); - Self(name) +impl StrDecoderTransformer { + pub const fn new() -> Self { + Self(PhantomData) } } -pub trait MetaVar { - fn choices(&self) -> Option>; +#[derive(Debug, Clone, PartialEq, Eq)] +pub enum StrWrapperError { + Utf8(str::Utf8Error), + Unwrap(E), } -pub enum FormatCaseElement { - FormatRef(MetaVarName), - Literal(String), +impl fmt::Display for StrWrapperError +where + E: fmt::Display, +{ + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + Self::Utf8(e) => ::fmt(e, f), + Self::Unwrap(e) => ::fmt(e, f), + } + } } -pub struct FormatCase { - pub elements: Vec, - pub description: Option, +impl error::Error for StrWrapperError +where + E: error::Error, +{ + fn source(&self) -> Option<&(dyn error::Error + 'static)> { + match self { + Self::Utf8(e) => e.source(), + Self::Unwrap(e) => e.source(), + } + } } -pub enum MetaVarKind { - /* e.g. */ - NameOnly(String), - Format { cases: Vec }, +impl Backend for StrDecoderTransformer +where + for<'a> B: Backend = &'a str>, +{ + type Input<'a> = &'a ffi::OsStr; + type Value = ::Value; + type Error = StrWrapperError<::Error>; + fn parse<'a>(s: &'a ffi::OsStr) -> Result { + let s: &'a str = s + .try_into() + .map_err(|e: str::Utf8Error| StrWrapperError::Utf8(e))?; + ::parse(s).map_err(|e| StrWrapperError::Unwrap(e)) + } } -pub struct MetaVarDecl { - pub id: MetaVarName, - pub spec: MetaVarKind, -} +pub struct JsonBackend; -pub struct FlagSuffixCase { - pub prefix_marker: &'static str, - pub format: MetaVarName, +#[cfg(feature = "json")] +impl Backend for JsonBackend { + type Input<'a> = &'a str; + type Value = json::JsonValue; + type Error = json::Error; + fn parse<'a>(s: &'a str) -> Result { + json::parse(s) + } } -pub struct Flag { - pub short: Option, - pub long: String, - pub suffix_cases: Vec, - pub value: Option, -} +#[cfg(test)] +mod test { + use super::*; -pub enum FlagCaseElement { - SectionRef(SectionName), - Literal(Flag), - Optional(Box), -} + struct BoolBackend; + impl Backend for BoolBackend { + type Input<'a> = &'a str; + type Value = bool; + type Error = String; + fn parse<'a>(s: &'a str) -> Result { + match s { + "true" => Ok(true), + "false" => Ok(false), + e => Err(e.to_string()), + } + } + } -pub struct FlagCase { - pub elements: Vec, -} + #[test] + fn parse_bool() { + assert!(BoolBackend::parse("true").unwrap()); + assert!(!BoolBackend::parse("false").unwrap()); + assert_eq!(BoolBackend::parse("").err().unwrap(), ""); + assert_eq!(BoolBackend::parse("aaaaasdf").err().unwrap(), "aaaaasdf"); + } + + #[cfg(unix)] + mod unix { + use std::{ffi, os::unix::ffi::OsStrExt}; + + pub fn broken_utf8() -> &'static ffi::OsStr { + // Here, the values 0x66 and 0x6f correspond to 'f' and 'o' + // respectively. The value 0x80 is a lone continuation byte, invalid + // in a UTF-8 sequence. + ffi::OsStr::from_bytes(&[0x66, 0x6f, 0x80, 0x6f]) + } + } + #[cfg(windows)] + mod windows { + use std::{ffi, os::windows::ffi::OsStringExt}; + + pub fn broken_utf8() -> ffi::OsString { + // Here the values 0x0066 and 0x006f correspond to 'f' and 'o' + // respectively. The value 0xD800 is a lone surrogate half, invalid + // in a UTF-16 sequence. + ffi::OsString::from_wide(&[0x0066, 0x006f, 0xD800, 0x006f]) + } + } + fn broken_utf8() -> ffi::OsString { + #[cfg(unix)] + let broken = unix::broken_utf8().to_os_string(); + #[cfg(windows)] + let broken = windows::broken_utf8(); -pub struct FlagsSectionDecl { - pub id: SectionName, - pub cases: Vec, + broken + } + + #[test] + fn utf8_parse_failure() { + let broken = broken_utf8(); + assert!(broken.to_str().is_none()); + } + + #[test] + fn str_wrapper() { + type Wrapper = StrDecoderTransformer; + + assert!(Wrapper::parse(ffi::OsStr::new("true")).unwrap()); + assert!(!Wrapper::parse(ffi::OsStr::new("false")).unwrap()); + assert_eq!( + Wrapper::parse(ffi::OsStr::new("")).err().unwrap(), + StrWrapperError::Unwrap(String::from("")) + ); + assert_eq!( + Wrapper::parse(ffi::OsStr::new("aaaaasdf")).err().unwrap(), + StrWrapperError::Unwrap(String::from("aaaaasdf")) + ); + + let broken = broken_utf8(); + assert_eq!( + Wrapper::parse(broken.as_ref()).err().unwrap(), + StrWrapperError::Utf8(str::from_utf8(broken.as_encoded_bytes()).err().unwrap()), + ); + } } From 38d1b9fb8f53ee9e8611786e4caa7db07e84aeb1 Mon Sep 17 00:00:00 2001 From: Danny McClanahan <1305167+cosmicexplorer@users.noreply.github.com> Date: Thu, 28 Nov 2024 07:37:22 -0500 Subject: [PATCH 12/31] refactor schema.rs --- cli/src/schema.rs | 90 ++++++++++++++++++++++++++--------------------- 1 file changed, 49 insertions(+), 41 deletions(-) diff --git a/cli/src/schema.rs b/cli/src/schema.rs index a839097ce..c660c1c8d 100644 --- a/cli/src/schema.rs +++ b/cli/src/schema.rs @@ -1,4 +1,4 @@ -use std::{error, ffi, fmt, marker::PhantomData, str}; +use std::str; pub trait Schema {} @@ -18,56 +18,62 @@ pub trait Backend { fn parse<'a>(s: Self::Input<'a>) -> Result; } -pub struct StrDecoderTransformer(PhantomData); +pub mod transformers { + use super::Backend; -impl StrDecoderTransformer { - pub const fn new() -> Self { - Self(PhantomData) + use std::{error, ffi, fmt, marker::PhantomData, str}; + + pub struct StrDecoderTransformer(PhantomData); + + impl StrDecoderTransformer { + pub const fn new() -> Self { + Self(PhantomData) + } } -} -#[derive(Debug, Clone, PartialEq, Eq)] -pub enum StrWrapperError { - Utf8(str::Utf8Error), - Unwrap(E), -} + #[derive(Debug, Clone, PartialEq, Eq)] + pub enum StrWrapperError { + Utf8(str::Utf8Error), + Unwrap(E), + } -impl fmt::Display for StrWrapperError -where - E: fmt::Display, -{ - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - match self { - Self::Utf8(e) => ::fmt(e, f), - Self::Unwrap(e) => ::fmt(e, f), + impl fmt::Display for StrWrapperError + where + E: fmt::Display, + { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + Self::Utf8(e) => ::fmt(e, f), + Self::Unwrap(e) => ::fmt(e, f), + } } } -} -impl error::Error for StrWrapperError -where - E: error::Error, -{ - fn source(&self) -> Option<&(dyn error::Error + 'static)> { - match self { - Self::Utf8(e) => e.source(), - Self::Unwrap(e) => e.source(), + impl error::Error for StrWrapperError + where + E: error::Error, + { + fn source(&self) -> Option<&(dyn error::Error + 'static)> { + match self { + Self::Utf8(e) => e.source(), + Self::Unwrap(e) => e.source(), + } } } -} -impl Backend for StrDecoderTransformer -where - for<'a> B: Backend = &'a str>, -{ - type Input<'a> = &'a ffi::OsStr; - type Value = ::Value; - type Error = StrWrapperError<::Error>; - fn parse<'a>(s: &'a ffi::OsStr) -> Result { - let s: &'a str = s - .try_into() - .map_err(|e: str::Utf8Error| StrWrapperError::Utf8(e))?; - ::parse(s).map_err(|e| StrWrapperError::Unwrap(e)) + impl Backend for StrDecoderTransformer + where + for<'a> B: Backend = &'a str>, + { + type Input<'a> = &'a ffi::OsStr; + type Value = ::Value; + type Error = StrWrapperError<::Error>; + fn parse<'a>(s: &'a ffi::OsStr) -> Result { + let s: &'a str = s + .try_into() + .map_err(|e: str::Utf8Error| StrWrapperError::Utf8(e))?; + ::parse(s).map_err(|e| StrWrapperError::Unwrap(e)) + } } } @@ -86,6 +92,7 @@ impl Backend for JsonBackend { #[cfg(test)] mod test { use super::*; + use std::ffi; struct BoolBackend; impl Backend for BoolBackend { @@ -148,6 +155,7 @@ mod test { #[test] fn str_wrapper() { + use transformers::{StrDecoderTransformer, StrWrapperError}; type Wrapper = StrDecoderTransformer; assert!(Wrapper::parse(ffi::OsStr::new("true")).unwrap()); From 7713e0700f51bb16c037a1e4e4ba0e76ac64526a Mon Sep 17 00:00:00 2001 From: Danny McClanahan <1305167+cosmicexplorer@users.noreply.github.com> Date: Thu, 28 Nov 2024 08:11:36 -0500 Subject: [PATCH 13/31] refactor transforms --- cli/src/schema.rs | 76 ++++++++++++++++++++++++++++++----------------- 1 file changed, 48 insertions(+), 28 deletions(-) diff --git a/cli/src/schema.rs b/cli/src/schema.rs index c660c1c8d..29a5d5b20 100644 --- a/cli/src/schema.rs +++ b/cli/src/schema.rs @@ -23,56 +23,76 @@ pub mod transformers { use std::{error, ffi, fmt, marker::PhantomData, str}; - pub struct StrDecoderTransformer(PhantomData); + pub trait Transformer { + type A<'a>; + type B<'a>; + type Error; + fn convert_input<'a>(s: Self::A<'a>) -> Result, Self::Error>; + } + + pub struct StrTransformer; + + impl Transformer for StrTransformer { + type A<'a> = &'a ffi::OsStr; + type B<'a> = &'a str; + type Error = str::Utf8Error; + fn convert_input<'a>(s: Self::A<'a>) -> Result, Self::Error> { + s.try_into() + } + } + + pub struct DecoderTransformer(PhantomData<(T, B)>); - impl StrDecoderTransformer { + impl DecoderTransformer { pub const fn new() -> Self { Self(PhantomData) } } #[derive(Debug, Clone, PartialEq, Eq)] - pub enum StrWrapperError { - Utf8(str::Utf8Error), - Unwrap(E), + pub enum WrapperError { + In(In), + Out(Out), } - impl fmt::Display for StrWrapperError + impl fmt::Display for WrapperError where - E: fmt::Display, + In: fmt::Display, + Out: fmt::Display, { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { match self { - Self::Utf8(e) => ::fmt(e, f), - Self::Unwrap(e) => ::fmt(e, f), + Self::In(e) => e.fmt(f), + Self::Out(e) => e.fmt(f), } } } - impl error::Error for StrWrapperError + impl error::Error for WrapperError where - E: error::Error, + In: error::Error, + Out: error::Error, { fn source(&self) -> Option<&(dyn error::Error + 'static)> { match self { - Self::Utf8(e) => e.source(), - Self::Unwrap(e) => e.source(), + Self::In(e) => e.source(), + Self::Out(e) => e.source(), } } } - impl Backend for StrDecoderTransformer + impl Backend for DecoderTransformer where - for<'a> B: Backend = &'a str>, + T: Transformer, + for<'a> B: Backend = ::B<'a>>, { - type Input<'a> = &'a ffi::OsStr; + type Input<'a> = ::A<'a>; type Value = ::Value; - type Error = StrWrapperError<::Error>; - fn parse<'a>(s: &'a ffi::OsStr) -> Result { - let s: &'a str = s - .try_into() - .map_err(|e: str::Utf8Error| StrWrapperError::Utf8(e))?; - ::parse(s).map_err(|e| StrWrapperError::Unwrap(e)) + type Error = WrapperError<::Error, ::Error>; + fn parse<'a>(s: Self::Input<'a>) -> Result { + let s: ::B<'a> = + ::convert_input(s).map_err(|e| WrapperError::In(e))?; + ::parse(s).map_err(|e| WrapperError::Out(e)) } } } @@ -138,7 +158,7 @@ mod test { ffi::OsString::from_wide(&[0x0066, 0x006f, 0xD800, 0x006f]) } } - fn broken_utf8() -> ffi::OsString { + fn broken_utf8() -> std::ffi::OsString { #[cfg(unix)] let broken = unix::broken_utf8().to_os_string(); #[cfg(windows)] @@ -155,24 +175,24 @@ mod test { #[test] fn str_wrapper() { - use transformers::{StrDecoderTransformer, StrWrapperError}; - type Wrapper = StrDecoderTransformer; + use transformers::{DecoderTransformer, StrTransformer, WrapperError}; + type Wrapper = DecoderTransformer; assert!(Wrapper::parse(ffi::OsStr::new("true")).unwrap()); assert!(!Wrapper::parse(ffi::OsStr::new("false")).unwrap()); assert_eq!( Wrapper::parse(ffi::OsStr::new("")).err().unwrap(), - StrWrapperError::Unwrap(String::from("")) + WrapperError::Out(String::from("")) ); assert_eq!( Wrapper::parse(ffi::OsStr::new("aaaaasdf")).err().unwrap(), - StrWrapperError::Unwrap(String::from("aaaaasdf")) + WrapperError::Out(String::from("aaaaasdf")) ); let broken = broken_utf8(); assert_eq!( Wrapper::parse(broken.as_ref()).err().unwrap(), - StrWrapperError::Utf8(str::from_utf8(broken.as_encoded_bytes()).err().unwrap()), + WrapperError::In(str::from_utf8(broken.as_encoded_bytes()).err().unwrap()), ); } } From 0aa7a19a961271ac23aea11c7542d1460cba65eb Mon Sep 17 00:00:00 2001 From: Danny McClanahan <1305167+cosmicexplorer@users.noreply.github.com> Date: Thu, 28 Nov 2024 10:07:59 -0500 Subject: [PATCH 14/31] save progress --- cli/src/schema.rs | 128 ++++++++++++++++++++++++++++++++++++---------- 1 file changed, 100 insertions(+), 28 deletions(-) diff --git a/cli/src/schema.rs b/cli/src/schema.rs index 29a5d5b20..c7c2fc5c5 100644 --- a/cli/src/schema.rs +++ b/cli/src/schema.rs @@ -1,27 +1,99 @@ -use std::str; +pub mod backends { + use std::io; -pub trait Schema {} + pub trait Backend { + type Input<'a>; + type Value; + type Error; + fn parse<'a>(s: Self::Input<'a>) -> Result; + + fn write(v: &Self::Value, w: impl io::Write) -> io::Result<()>; + } + + #[cfg(feature = "json")] + pub mod json_backend { + use std::io; + + pub struct JsonBackend; + + impl super::Backend for JsonBackend { + type Input<'a> = &'a str; + + type Value = json::JsonValue; + + type Error = json::Error; + + fn parse<'a>(s: &'a str) -> Result { + json::parse(s) + } + + fn write(v: &json::JsonValue, mut w: impl io::Write) -> io::Result<()> { + v.write(&mut w) + } + } + } +} + +pub mod values { + use super::backends::Backend; + + pub trait SchemaValue: Sized { + type SerErr; + type DeserErr; + fn serialize(self) -> Result<::Value, Self::SerErr>; + fn deserialize(s: ::Value) -> Result; + } -/* pub enum SchemaValue { */ -/* Bool(bool), */ -/* Path(PathBuf), */ -/* } */ + #[cfg(feature = "json")] + pub mod json_value { + use super::*; + use crate::schema::backends::json_backend::JsonBackend; -/* pub trait SchemaValue {} */ + impl SchemaValue for bool { + type SerErr = (); + type DeserErr = String; -/* impl SchemaValue for bool {} */ + fn serialize(self) -> Result { + Ok(json::JsonValue::Boolean(self)) + } + fn deserialize(s: json::JsonValue) -> Result { + match s { + json::JsonValue::Boolean(value) => Ok(value), + s => Err(format!("non-boolean value {s}")), + } + } + } + + impl SchemaValue for String { + type SerErr = (); + type DeserErr = String; + + fn serialize(self) -> Result { + Ok(json::JsonValue::String(self)) + } + fn deserialize(s: json::JsonValue) -> Result { + match s { + json::JsonValue::String(value) => Ok(value), + s => Err(format!("non-string value {s}")), + } + } + } + } -pub trait Backend { - type Input<'a>; - type Value; - type Error; - fn parse<'a>(s: Self::Input<'a>) -> Result; + /* pub enum SchemaValue { */ + /* Bool(bool), */ + /* Path(PathBuf), */ + /* } */ + + /* pub trait SchemaValue {} */ + + /* impl SchemaValue for bool {} */ } pub mod transformers { - use super::Backend; + use super::backends::Backend; - use std::{error, ffi, fmt, marker::PhantomData, str}; + use std::{error, ffi, fmt, io, marker::PhantomData, str}; pub trait Transformer { type A<'a>; @@ -88,31 +160,24 @@ pub mod transformers { { type Input<'a> = ::A<'a>; type Value = ::Value; + type Error = WrapperError<::Error, ::Error>; fn parse<'a>(s: Self::Input<'a>) -> Result { let s: ::B<'a> = ::convert_input(s).map_err(|e| WrapperError::In(e))?; ::parse(s).map_err(|e| WrapperError::Out(e)) } - } -} -pub struct JsonBackend; - -#[cfg(feature = "json")] -impl Backend for JsonBackend { - type Input<'a> = &'a str; - type Value = json::JsonValue; - type Error = json::Error; - fn parse<'a>(s: &'a str) -> Result { - json::parse(s) + fn write(v: &::Value, w: impl io::Write) -> io::Result<()> { + ::write(v, w) + } } } #[cfg(test)] mod test { - use super::*; - use std::ffi; + use super::{backends::Backend, *}; + use std::{ffi, io}; struct BoolBackend; impl Backend for BoolBackend { @@ -126,6 +191,12 @@ mod test { e => Err(e.to_string()), } } + fn write(v: &bool, mut w: impl io::Write) -> io::Result<()> { + match v { + true => w.write_all(b"true"), + false => w.write_all(b"false"), + } + } } #[test] @@ -175,6 +246,7 @@ mod test { #[test] fn str_wrapper() { + use std::str; use transformers::{DecoderTransformer, StrTransformer, WrapperError}; type Wrapper = DecoderTransformer; From f58ba146ea9440f746e7af2cb4091985f594f62d Mon Sep 17 00:00:00 2001 From: Danny McClanahan <1305167+cosmicexplorer@users.noreply.github.com> Date: Sat, 30 Nov 2024 15:02:22 -0500 Subject: [PATCH 15/31] refactor "backend" --- cli/src/schema.rs | 138 +++++++++++++--------------------------------- 1 file changed, 38 insertions(+), 100 deletions(-) diff --git a/cli/src/schema.rs b/cli/src/schema.rs index c7c2fc5c5..04df1ec2c 100644 --- a/cli/src/schema.rs +++ b/cli/src/schema.rs @@ -1,94 +1,32 @@ pub mod backends { - use std::io; - pub trait Backend { - type Input<'a>; - type Value; - type Error; - fn parse<'a>(s: Self::Input<'a>) -> Result; - - fn write(v: &Self::Value, w: impl io::Write) -> io::Result<()>; + type Str<'a>; + type Val<'a>; + type Err<'a>; + fn parse<'a>(s: Self::Str<'a>) -> Result, Self::Err<'a>>; + /* fn print(v: Self::Val) -> Self::Str; */ } #[cfg(feature = "json")] pub mod json_backend { - use std::io; - pub struct JsonBackend; impl super::Backend for JsonBackend { - type Input<'a> = &'a str; - - type Value = json::JsonValue; + type Str<'a> = &'a str; + type Val<'a> = json::JsonValue; + type Err<'a> = json::Error; - type Error = json::Error; - - fn parse<'a>(s: &'a str) -> Result { + fn parse<'a>(s: Self::Str<'a>) -> Result, Self::Err<'a>> { json::parse(s) } - - fn write(v: &json::JsonValue, mut w: impl io::Write) -> io::Result<()> { - v.write(&mut w) - } + /* fn print(v: json::JsonValue) -> String { */ + /* v.pretty(2) */ + /* } */ } } } -pub mod values { - use super::backends::Backend; - - pub trait SchemaValue: Sized { - type SerErr; - type DeserErr; - fn serialize(self) -> Result<::Value, Self::SerErr>; - fn deserialize(s: ::Value) -> Result; - } - - #[cfg(feature = "json")] - pub mod json_value { - use super::*; - use crate::schema::backends::json_backend::JsonBackend; - - impl SchemaValue for bool { - type SerErr = (); - type DeserErr = String; - - fn serialize(self) -> Result { - Ok(json::JsonValue::Boolean(self)) - } - fn deserialize(s: json::JsonValue) -> Result { - match s { - json::JsonValue::Boolean(value) => Ok(value), - s => Err(format!("non-boolean value {s}")), - } - } - } - - impl SchemaValue for String { - type SerErr = (); - type DeserErr = String; - - fn serialize(self) -> Result { - Ok(json::JsonValue::String(self)) - } - fn deserialize(s: json::JsonValue) -> Result { - match s { - json::JsonValue::String(value) => Ok(value), - s => Err(format!("non-string value {s}")), - } - } - } - } - - /* pub enum SchemaValue { */ - /* Bool(bool), */ - /* Path(PathBuf), */ - /* } */ - - /* pub trait SchemaValue {} */ - - /* impl SchemaValue for bool {} */ -} +pub mod values; pub mod transformers { use super::backends::Backend; @@ -98,8 +36,8 @@ pub mod transformers { pub trait Transformer { type A<'a>; type B<'a>; - type Error; - fn convert_input<'a>(s: Self::A<'a>) -> Result, Self::Error>; + type Error<'a>; + fn convert_input<'a>(s: Self::A<'a>) -> Result, Self::Error<'a>>; } pub struct StrTransformer; @@ -107,8 +45,8 @@ pub mod transformers { impl Transformer for StrTransformer { type A<'a> = &'a ffi::OsStr; type B<'a> = &'a str; - type Error = str::Utf8Error; - fn convert_input<'a>(s: Self::A<'a>) -> Result, Self::Error> { + type Error<'a> = str::Utf8Error; + fn convert_input<'a>(s: Self::A<'a>) -> Result, Self::Error<'a>> { s.try_into() } } @@ -156,21 +94,21 @@ pub mod transformers { impl Backend for DecoderTransformer where T: Transformer, - for<'a> B: Backend = ::B<'a>>, + for<'a> B: Backend = ::B<'a>>, { - type Input<'a> = ::A<'a>; - type Value = ::Value; + type Str<'a> = ::A<'a>; + type Val<'a> = ::Val<'a>; - type Error = WrapperError<::Error, ::Error>; - fn parse<'a>(s: Self::Input<'a>) -> Result { + type Err<'a> = WrapperError<::Error<'a>, ::Err<'a>>; + fn parse<'a>(s: Self::Str<'a>) -> Result, Self::Err<'a>> { let s: ::B<'a> = ::convert_input(s).map_err(|e| WrapperError::In(e))?; ::parse(s).map_err(|e| WrapperError::Out(e)) } - fn write(v: &::Value, w: impl io::Write) -> io::Result<()> { - ::write(v, w) - } + /* fn write(v: &::Value, w: impl io::Write) -> io::Result<()> { */ + /* ::write(v, w) */ + /* } */ } } @@ -181,22 +119,22 @@ mod test { struct BoolBackend; impl Backend for BoolBackend { - type Input<'a> = &'a str; - type Value = bool; - type Error = String; - fn parse<'a>(s: &'a str) -> Result { + type Str<'a> = &'a str; + type Val<'a> = bool; + type Err<'a> = &'a str; + fn parse<'a>(s: Self::Str<'a>) -> Result, Self::Err<'a>> { match s { "true" => Ok(true), "false" => Ok(false), - e => Err(e.to_string()), - } - } - fn write(v: &bool, mut w: impl io::Write) -> io::Result<()> { - match v { - true => w.write_all(b"true"), - false => w.write_all(b"false"), + e => Err(e), } } + /* fn write(v: &bool, mut w: impl io::Write) -> io::Result<()> { */ + /* match v { */ + /* true => w.write_all(b"true"), */ + /* false => w.write_all(b"false"), */ + /* } */ + /* } */ } #[test] @@ -254,11 +192,11 @@ mod test { assert!(!Wrapper::parse(ffi::OsStr::new("false")).unwrap()); assert_eq!( Wrapper::parse(ffi::OsStr::new("")).err().unwrap(), - WrapperError::Out(String::from("")) + WrapperError::Out("") ); assert_eq!( Wrapper::parse(ffi::OsStr::new("aaaaasdf")).err().unwrap(), - WrapperError::Out(String::from("aaaaasdf")) + WrapperError::Out("aaaaasdf") ); let broken = broken_utf8(); From 9e2536a71f3b44e1fcfdaf37d2d02b0a7d6f067f Mon Sep 17 00:00:00 2001 From: Danny McClanahan <1305167+cosmicexplorer@users.noreply.github.com> Date: Sat, 30 Nov 2024 16:23:48 -0500 Subject: [PATCH 16/31] add a Resource trait but not really sure what to do with it yet --- cli/src/args.rs | 8 ++++ cli/src/args/compress.rs | 90 ++++++++++++++++++++++++++++++++++++++-- cli/src/schema/cli.rs | 75 +++++++++++++++++++++++++++++++++ cli/src/schema/values.rs | 80 +++++++++++++++++++++++++++++++++++ 4 files changed, 250 insertions(+), 3 deletions(-) create mode 100644 cli/src/schema/cli.rs create mode 100644 cli/src/schema/values.rs diff --git a/cli/src/args.rs b/cli/src/args.rs index b59fd0cbd..0fbf1829e 100644 --- a/cli/src/args.rs +++ b/cli/src/args.rs @@ -151,6 +151,14 @@ pub enum ZipCommand { Extract(extract::Extract), } +pub trait Resource: Sized { + const ID: &'static str; + type ParseError; + fn parse_argv(argv: &mut VecDeque) -> Result; +} + +pub trait CommandInputs {} + pub trait CommandFormat: fmt::Debug { const COMMAND_NAME: &'static str; const COMMAND_TABS: &'static str; diff --git a/cli/src/args/compress.rs b/cli/src/args/compress.rs index 6b823c79d..3cc2cb641 100644 --- a/cli/src/args/compress.rs +++ b/cli/src/args/compress.rs @@ -1,4 +1,4 @@ -use super::{ArgParseError, CommandFormat}; +use super::{ArgParseError, CommandFormat, CommandInputs, Resource}; use std::{collections::VecDeque, ffi::OsString, num::ParseIntError, path::PathBuf}; @@ -46,8 +46,90 @@ pub enum OutputType { File { path: PathBuf, append: bool }, } -/* #[cfg(feature = "json")] */ -/* impl OutputType {} */ +pub enum OutputTypeError { + ArgWith(&'static str, String), + ArgTwice(&'static str), + NoValFor(&'static str), + ValArgTwice { + arg: &'static str, + prev: String, + new: String, + }, +} + +impl Resource for OutputType { + const ID: &'static str = "OUTPUT-FLAGS"; + type ParseError = OutputTypeError; + fn parse_argv(argv: &mut VecDeque) -> Result { + let mut allow_stdout: bool = false; + let mut append_to_output_path: bool = false; + let mut output_path: Option = None; + + while let Some(arg) = argv.pop_front() { + match arg.as_encoded_bytes() { + b"--stdout" => { + if let Some(output_path) = output_path.take() { + return Err(OutputTypeError::ArgWith( + "--stdout", + format!("output file {output_path:?}"), + )); + } + if append_to_output_path { + return Err(OutputTypeError::ArgWith("--stdout", "--append".to_string())); + } + if allow_stdout { + return Err(OutputTypeError::ArgTwice("--stdout")); + } + allow_stdout = true; + } + b"--append" => { + if append_to_output_path { + return Err(OutputTypeError::ArgTwice("--append")); + } + if allow_stdout { + return Err(OutputTypeError::ArgWith("--append", "--stdout".to_string())); + } + append_to_output_path = true; + } + b"-o" | b"--output-file" => { + let new_path = argv + .pop_front() + .map(PathBuf::from) + .ok_or_else(|| OutputTypeError::NoValFor("-o/--output-file"))?; + if let Some(prev_path) = output_path.take() { + return Err(OutputTypeError::ValArgTwice { + arg: "-o/--output-file", + prev: format!("{prev_path:?}"), + new: format!("{new_path:?}"), + }); + } + if allow_stdout { + return Err(OutputTypeError::ArgWith( + "--stdout", + "-o/--output-file".to_string(), + )); + } + output_path = Some(new_path); + } + _ => { + argv.push_front(arg); + break; + } + } + } + + Ok(if let Some(output_path) = output_path { + Self::File { + path: output_path, + append: append_to_output_path, + } + } else { + Self::Stdout { + allow_tty: allow_stdout, + } + }) + } +} #[derive(Debug)] pub struct Compress { @@ -57,6 +139,8 @@ pub struct Compress { pub positional_paths: Vec, } +impl CommandInputs for Compress {} + impl Compress { #[cfg(feature = "deflate64")] const DEFLATE64_HELP_LINE: &'static str = " - deflate64:\twith deflate64\n"; diff --git a/cli/src/schema/cli.rs b/cli/src/schema/cli.rs new file mode 100644 index 000000000..d6241a404 --- /dev/null +++ b/cli/src/schema/cli.rs @@ -0,0 +1,75 @@ +#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub struct SectionName(String); + +impl SectionName { + pub fn create(name: impl Into) -> Self { + let name: String = name.into(); + assert!(!name.is_empty()); + assert!(name.chars().all(|c| c.is_ascii_uppercase())); + Self(name) + } +} + +#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub struct MetaVarName(String); + +impl MetaVarName { + pub fn create(name: impl Into) -> Self { + let name: String = name.into(); + assert!(!name.is_empty()); + assert!(name.chars().all(|c| c.is_ascii_lowercase() || c == '-')); + Self(name) + } +} + +pub trait MetaVar { + fn choices(&self) -> Option>; +} + +pub enum FormatCaseElement { + FormatRef(MetaVarName), + Literal(String), +} + +pub struct FormatCase { + pub elements: Vec, + pub description: Option, +} + +pub enum MetaVarKind { + /* e.g. */ + NameOnly(String), + Format { cases: Vec }, +} + +pub struct MetaVarDecl { + pub id: MetaVarName, + pub spec: MetaVarKind, +} + +pub struct FlagSuffixCase { + pub prefix_marker: &'static str, + pub format: MetaVarName, +} + +pub struct Flag { + pub short: Option, + pub long: String, + pub suffix_cases: Vec, + pub value: Option, +} + +pub enum FlagCaseElement { + SectionRef(SectionName), + Literal(Flag), + Optional(Box), +} + +pub struct FlagCase { + pub elements: Vec, +} + +pub struct FlagsSectionDecl { + pub id: SectionName, + pub cases: Vec, +} diff --git a/cli/src/schema/values.rs b/cli/src/schema/values.rs new file mode 100644 index 000000000..bc0acd5a7 --- /dev/null +++ b/cli/src/schema/values.rs @@ -0,0 +1,80 @@ +use super::backends::Backend; + +/* pub trait SchemaValue: Sized { */ +/* type DeserErr; */ +/* fn serialize(self) -> ::Value; */ +/* fn deserialize(s: ::Value) -> Result; */ +/* } */ + +pub trait NamedList { + fn f(self); +} + +/* pub enum Schema { */ +/* Bool, */ +/* Str, */ +/* Arr, */ +/* Obj, */ +/* Arr(Vec>) */ +/* Str(String), */ +/* Arr(Vec>), */ +/* Obj(Vec<(String, Box)>), */ +/* } */ + +pub enum HydratedValue<'a> { + Bool(bool), + Str(&'a str), + Arr(Vec>>), + Obj(Vec<(&'a str, Box>)>), +} + +pub trait Hydrate { + fn hydrate(v: HydratedValue) -> Value; +} + +pub trait Schema: Backend { + fn print<'a>(v: HydratedValue<'a>) -> ::Val<'a>; +} + +#[cfg(feature = "json")] +pub mod json_value { + use super::*; + use crate::schema::backends::json_backend::JsonBackend; + + /* impl SchemaValue for bool { */ + /* type DeserErr = String; */ + + /* fn serialize(self) -> json::JsonValue { */ + /* json::JsonValue::Boolean(self) */ + /* } */ + /* fn deserialize(s: json::JsonValue) -> Result { */ + /* match s { */ + /* json::JsonValue::Boolean(value) => Ok(value), */ + /* s => Err(format!("non-boolean value {s}")), */ + /* } */ + /* } */ + /* } */ + + /* impl SchemaValue for String { */ + /* type DeserErr = String; */ + + /* fn serialize(self) -> json::JsonValue { */ + /* json::JsonValue::String(self) */ + /* } */ + /* fn deserialize(s: json::JsonValue) -> Result { */ + /* match s { */ + /* json::JsonValue::String(value) => Ok(value), */ + /* s => Err(format!("non-string value {s}")), */ + /* } */ + /* } */ + /* } */ +} + +/* pub enum SchemaValue { */ +/* Bool(bool), */ +/* Path(PathBuf), */ +/* } */ + +/* pub trait SchemaValue {} */ + +/* impl SchemaValue for bool {} */ From a23f35d81eb59557a19796c4d1466c3a6f10f2ed Mon Sep 17 00:00:00 2001 From: Danny McClanahan <1305167+cosmicexplorer@users.noreply.github.com> Date: Sat, 30 Nov 2024 18:33:15 -0500 Subject: [PATCH 17/31] do more with Resource --- cli/src/args.rs | 37 ++++- cli/src/args/compress.rs | 89 +---------- cli/src/args/compress/resource.rs | 258 ++++++++++++++++++++++++++++++ 3 files changed, 292 insertions(+), 92 deletions(-) create mode 100644 cli/src/args/compress/resource.rs diff --git a/cli/src/args.rs b/cli/src/args.rs index 0fbf1829e..a2017d659 100644 --- a/cli/src/args.rs +++ b/cli/src/args.rs @@ -151,13 +151,38 @@ pub enum ZipCommand { Extract(extract::Extract), } -pub trait Resource: Sized { - const ID: &'static str; - type ParseError; - fn parse_argv(argv: &mut VecDeque) -> Result; -} +pub mod resource { + use super::*; + + use crate::schema::{backends::Backend, transformers::WrapperError}; + + pub trait Resource { + const ID: &'static str; + } + + pub trait ArgvResource: Resource + Sized { + type ArgvParseError; + fn parse_argv(argv: &mut VecDeque) -> Result; + } -pub trait CommandInputs {} + pub trait SchemaResource: Resource + Sized { + type B: Backend; + type SchemaParseError; + fn parse_schema<'a>( + v: ::Val<'a>, + ) -> Result; + + fn parse_schema_str<'a>( + s: ::Str<'a>, + ) -> Result::Err<'a>, Self::SchemaParseError>> + { + let v = ::parse(s).map_err(|e| WrapperError::In(e))?; + Ok(Self::parse_schema(v).map_err(|e| WrapperError::Out(e))?) + } + } + + pub trait CommandInputs {} +} pub trait CommandFormat: fmt::Debug { const COMMAND_NAME: &'static str; diff --git a/cli/src/args/compress.rs b/cli/src/args/compress.rs index 3cc2cb641..ffba6866a 100644 --- a/cli/src/args/compress.rs +++ b/cli/src/args/compress.rs @@ -1,4 +1,4 @@ -use super::{ArgParseError, CommandFormat, CommandInputs, Resource}; +use super::{ArgParseError, CommandFormat}; use std::{collections::VecDeque, ffi::OsString, num::ParseIntError, path::PathBuf}; @@ -46,90 +46,7 @@ pub enum OutputType { File { path: PathBuf, append: bool }, } -pub enum OutputTypeError { - ArgWith(&'static str, String), - ArgTwice(&'static str), - NoValFor(&'static str), - ValArgTwice { - arg: &'static str, - prev: String, - new: String, - }, -} - -impl Resource for OutputType { - const ID: &'static str = "OUTPUT-FLAGS"; - type ParseError = OutputTypeError; - fn parse_argv(argv: &mut VecDeque) -> Result { - let mut allow_stdout: bool = false; - let mut append_to_output_path: bool = false; - let mut output_path: Option = None; - - while let Some(arg) = argv.pop_front() { - match arg.as_encoded_bytes() { - b"--stdout" => { - if let Some(output_path) = output_path.take() { - return Err(OutputTypeError::ArgWith( - "--stdout", - format!("output file {output_path:?}"), - )); - } - if append_to_output_path { - return Err(OutputTypeError::ArgWith("--stdout", "--append".to_string())); - } - if allow_stdout { - return Err(OutputTypeError::ArgTwice("--stdout")); - } - allow_stdout = true; - } - b"--append" => { - if append_to_output_path { - return Err(OutputTypeError::ArgTwice("--append")); - } - if allow_stdout { - return Err(OutputTypeError::ArgWith("--append", "--stdout".to_string())); - } - append_to_output_path = true; - } - b"-o" | b"--output-file" => { - let new_path = argv - .pop_front() - .map(PathBuf::from) - .ok_or_else(|| OutputTypeError::NoValFor("-o/--output-file"))?; - if let Some(prev_path) = output_path.take() { - return Err(OutputTypeError::ValArgTwice { - arg: "-o/--output-file", - prev: format!("{prev_path:?}"), - new: format!("{new_path:?}"), - }); - } - if allow_stdout { - return Err(OutputTypeError::ArgWith( - "--stdout", - "-o/--output-file".to_string(), - )); - } - output_path = Some(new_path); - } - _ => { - argv.push_front(arg); - break; - } - } - } - - Ok(if let Some(output_path) = output_path { - Self::File { - path: output_path, - append: append_to_output_path, - } - } else { - Self::Stdout { - allow_tty: allow_stdout, - } - }) - } -} +pub mod resource; #[derive(Debug)] pub struct Compress { @@ -139,7 +56,7 @@ pub struct Compress { pub positional_paths: Vec, } -impl CommandInputs for Compress {} +/* impl CommandInputs for Compress {} */ impl Compress { #[cfg(feature = "deflate64")] diff --git a/cli/src/args/compress/resource.rs b/cli/src/args/compress/resource.rs new file mode 100644 index 000000000..6fbedd314 --- /dev/null +++ b/cli/src/args/compress/resource.rs @@ -0,0 +1,258 @@ +use super::OutputType; +use crate::args::resource::*; + +use std::{collections::VecDeque, ffi::OsString, path::PathBuf}; + +pub enum OutputTypeError { + ArgWith(&'static str, String), + ArgTwice(&'static str), + NoValFor(&'static str), + ValArgTwice { + arg: &'static str, + prev: String, + new: String, + }, +} + +impl Resource for OutputType { + const ID: &'static str = "OUTPUT-FLAGS"; +} + +impl ArgvResource for OutputType { + type ArgvParseError = OutputTypeError; + fn parse_argv(argv: &mut VecDeque) -> Result { + let mut allow_stdout: bool = false; + let mut append_to_output_path: bool = false; + let mut output_path: Option = None; + + while let Some(arg) = argv.pop_front() { + match arg.as_encoded_bytes() { + b"--stdout" => { + if let Some(output_path) = output_path.take() { + return Err(OutputTypeError::ArgWith( + "--stdout", + format!("output file {output_path:?}"), + )); + } + if append_to_output_path { + return Err(OutputTypeError::ArgWith("--stdout", "--append".to_string())); + } + if allow_stdout { + return Err(OutputTypeError::ArgTwice("--stdout")); + } + allow_stdout = true; + } + b"--append" => { + if append_to_output_path { + return Err(OutputTypeError::ArgTwice("--append")); + } + if allow_stdout { + return Err(OutputTypeError::ArgWith("--append", "--stdout".to_string())); + } + append_to_output_path = true; + } + b"-o" | b"--output-file" => { + let new_path = argv + .pop_front() + .map(PathBuf::from) + .ok_or_else(|| OutputTypeError::NoValFor("-o/--output-file"))?; + if let Some(prev_path) = output_path.take() { + return Err(OutputTypeError::ValArgTwice { + arg: "-o/--output-file", + prev: format!("{prev_path:?}"), + new: format!("{new_path:?}"), + }); + } + if allow_stdout { + return Err(OutputTypeError::ArgWith( + "--stdout", + "-o/--output-file".to_string(), + )); + } + output_path = Some(new_path); + } + _ => { + argv.push_front(arg); + break; + } + } + } + + Ok(if let Some(output_path) = output_path { + Self::File { + path: output_path, + append: append_to_output_path, + } + } else { + Self::Stdout { + allow_tty: allow_stdout, + } + }) + } +} + +#[cfg(feature = "json")] +pub mod json_resource { + use super::*; + use crate::schema::backends::{json_backend::JsonBackend, Backend}; + + use std::{error, fmt}; + + use json::{object::Object as JsonObject, JsonValue}; + + #[derive(Debug)] + pub enum JsonOutputTypeError { + InvalidType { + val: JsonValue, + valid_types: &'static [&'static str], + context: &'static str, + }, + InvalidObjectKeys { + obj: JsonObject, + expected_keys: &'static [&'static str], + context: &'static str, + }, + } + + impl fmt::Display for JsonOutputTypeError { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + Self::InvalidType { + valid_types, + context, + val, + } => { + assert!(!valid_types.is_empty()); + let types_str: String = valid_types.join(", "); + write!( + f, + "{context} expected types [{types_str}], but received: {val}" + ) + } + Self::InvalidObjectKeys { + obj, + expected_keys, + context, + } => { + assert!(!expected_keys.is_empty()); + let keys_str: String = expected_keys.join(", "); + let obj = JsonValue::Object(obj.clone()); + write!( + f, + "{context} expected object keys [{keys_str}], but object was {obj}" + ) + } + } + } + } + + impl error::Error for JsonOutputTypeError {} + + impl SchemaResource for OutputType { + type B = JsonBackend; + type SchemaParseError = JsonOutputTypeError; + + fn parse_schema<'a>( + v: ::Val<'a>, + ) -> Result { + match v { + /* => {"file": {"path": , "append": false}}} */ + JsonValue::String(path) => Ok(OutputType::File { + path: path.into(), + append: false, + }), + /* => {"stdout": {"allow_tty": }} */ + JsonValue::Boolean(allow_tty) => Ok(OutputType::Stdout { allow_tty }), + /* An object--destructure by enum case. */ + JsonValue::Object(o) => { + if let Some(o) = o.get("stdout") { + match o { + /* {"stdout": } => {"stdout": {"allow_tty": }} */ + JsonValue::Boolean(allow_tty) => Ok(OutputType::Stdout { + allow_tty: *allow_tty, + }), + /* {"stdout": {"allow_tty": }} => {"stdout": {"allow_tty": }} */ + JsonValue::Object(o) => { + let allow_tty: bool = if let Some(allow_tty) = o.get("allow_tty") { + match allow_tty { + JsonValue::Boolean(allow_tty) => Ok(*allow_tty), + _ => Err(JsonOutputTypeError::InvalidType { + val: allow_tty.clone(), + valid_types: &["boolean"], + context: "the 'allow_tty' field in the 'stdout' case", + }), + } + } else { + Ok(false) + }?; + Ok(OutputType::Stdout { allow_tty }) + } + _ => Err(JsonOutputTypeError::InvalidType { + val: o.clone(), + valid_types: &["boolean", "object"], + context: "the 'stdout' enum case", + }), + } + } else if let Some(o) = o.get("file") { + match o { + /* {"file": } => {"file": {"path": , append: false}} */ + JsonValue::String(path) => Ok(OutputType::File { + path: path.into(), + append: false, + }), + /* {"file": {"path": , "append": }} => {"file": {"path": , append: }} */ + JsonValue::Object(o) => { + let path: PathBuf = if let Some(path) = o.get("path") { + match path { + JsonValue::String(path) => Ok(path.into()), + _ => Err(JsonOutputTypeError::InvalidType { + val: path.clone(), + valid_types: &["string"], + context: "the 'path' field in the 'file' case", + }), + } + } else { + /* This *must* be provided, whereas "append" has a default. */ + Err(JsonOutputTypeError::InvalidObjectKeys { + obj: o.clone(), + expected_keys: &["path"], + context: "the 'file' enum case", + }) + }?; + let append: bool = if let Some(append) = o.get("append") { + match append { + JsonValue::Boolean(append) => Ok(*append), + _ => Err(JsonOutputTypeError::InvalidType { + val: append.clone(), + valid_types: &["boolean"], + context: "the 'append' field in 'file' case", + }), + } + } else { + Ok(false) + }?; + Ok(OutputType::File { path, append }) + } + _ => Err(JsonOutputTypeError::InvalidType { + val: o.clone(), + valid_types: &["string", "object"], + context: "the 'file' enum case", + }), + } + } else { + Err(JsonOutputTypeError::InvalidObjectKeys { + obj: o, + expected_keys: &["stdout", "file"], + context: "destructuring into 'file' and 'stdout' enum cases", + }) + } + } + _ => Err(JsonOutputTypeError::InvalidType { + val: v, + valid_types: &["string", "boolean", "object"], + context: "top-level value", + }), + } + } + } +} From 39e5cb929ca7e3e61fd91cc66d4bfe98fac5212e Mon Sep 17 00:00:00 2001 From: Danny McClanahan <1305167+cosmicexplorer@users.noreply.github.com> Date: Sat, 30 Nov 2024 19:46:30 -0500 Subject: [PATCH 18/31] more schema development --- cli/src/args/compress.rs | 19 ++++ cli/src/args/compress/resource.rs | 148 +++++++++++++++++++++++------- cli/src/lib.rs | 1 + cli/src/schema/values.rs | 76 +++++++++++++++ 4 files changed, 213 insertions(+), 31 deletions(-) diff --git a/cli/src/args/compress.rs b/cli/src/args/compress.rs index ffba6866a..2a693eb05 100644 --- a/cli/src/args/compress.rs +++ b/cli/src/args/compress.rs @@ -46,8 +46,27 @@ pub enum OutputType { File { path: PathBuf, append: bool }, } +impl Default for OutputType { + fn default() -> Self { + Self::Stdout { allow_tty: false } + } +} + pub mod resource; +#[derive(Debug)] +pub struct GlobalFlags { + pub archive_comment: Option, +} + +impl Default for GlobalFlags { + fn default() -> Self { + Self { + archive_comment: None, + } + } +} + #[derive(Debug)] pub struct Compress { pub output: OutputType, diff --git a/cli/src/args/compress/resource.rs b/cli/src/args/compress/resource.rs index 6fbedd314..2ba949858 100644 --- a/cli/src/args/compress/resource.rs +++ b/cli/src/args/compress/resource.rs @@ -1,4 +1,4 @@ -use super::OutputType; +use super::{GlobalFlags, OutputType}; use crate::args::resource::*; use std::{collections::VecDeque, ffi::OsString, path::PathBuf}; @@ -91,6 +91,50 @@ impl ArgvResource for OutputType { } } +pub enum GlobalFlagsError { + NoValFor(&'static str), + ValArgTwice { + arg: &'static str, + prev: String, + new: String, + }, +} + +impl Resource for GlobalFlags { + const ID: &'static str = "GLOBAL-FLAGS"; +} + +impl ArgvResource for GlobalFlags { + type ArgvParseError = GlobalFlagsError; + fn parse_argv(argv: &mut VecDeque) -> Result { + let mut archive_comment: Option = None; + + while let Some(arg) = argv.pop_front() { + match arg.as_encoded_bytes() { + b"--archive-comment" => { + let new_comment = argv + .pop_front() + .ok_or_else(|| GlobalFlagsError::NoValFor("--archive-comment"))?; + if let Some(prev_comment) = archive_comment.take() { + return Err(GlobalFlagsError::ValArgTwice { + arg: "--archive-comment", + prev: format!("{prev_comment:?}"), + new: format!("{new_comment:?}"), + }); + } + archive_comment = Some(new_comment); + } + _ => { + argv.push_front(arg); + break; + } + } + } + + Ok(Self { archive_comment }) + } +} + #[cfg(feature = "json")] pub mod json_resource { use super::*; @@ -101,7 +145,7 @@ pub mod json_resource { use json::{object::Object as JsonObject, JsonValue}; #[derive(Debug)] - pub enum JsonOutputTypeError { + pub enum JsonSchemaError { InvalidType { val: JsonValue, valid_types: &'static [&'static str], @@ -114,7 +158,7 @@ pub mod json_resource { }, } - impl fmt::Display for JsonOutputTypeError { + impl fmt::Display for JsonSchemaError { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { match self { Self::InvalidType { @@ -146,29 +190,31 @@ pub mod json_resource { } } - impl error::Error for JsonOutputTypeError {} + impl error::Error for JsonSchemaError {} impl SchemaResource for OutputType { type B = JsonBackend; - type SchemaParseError = JsonOutputTypeError; + type SchemaParseError = JsonSchemaError; fn parse_schema<'a>( v: ::Val<'a>, ) -> Result { match v { + JsonValue::Null => Ok(Self::default()), /* => {"file": {"path": , "append": false}}} */ - JsonValue::String(path) => Ok(OutputType::File { + JsonValue::String(path) => Ok(Self::File { path: path.into(), append: false, }), /* => {"stdout": {"allow_tty": }} */ - JsonValue::Boolean(allow_tty) => Ok(OutputType::Stdout { allow_tty }), + JsonValue::Boolean(allow_tty) => Ok(Self::Stdout { allow_tty }), /* An object--destructure by enum case. */ JsonValue::Object(o) => { if let Some(o) = o.get("stdout") { match o { + JsonValue::Null => Ok(Self::Stdout { allow_tty: false }), /* {"stdout": } => {"stdout": {"allow_tty": }} */ - JsonValue::Boolean(allow_tty) => Ok(OutputType::Stdout { + JsonValue::Boolean(allow_tty) => Ok(Self::Stdout { allow_tty: *allow_tty, }), /* {"stdout": {"allow_tty": }} => {"stdout": {"allow_tty": }} */ @@ -176,27 +222,28 @@ pub mod json_resource { let allow_tty: bool = if let Some(allow_tty) = o.get("allow_tty") { match allow_tty { JsonValue::Boolean(allow_tty) => Ok(*allow_tty), - _ => Err(JsonOutputTypeError::InvalidType { + JsonValue::Null => Ok(false), + _ => Err(JsonSchemaError::InvalidType { val: allow_tty.clone(), - valid_types: &["boolean"], - context: "the 'allow_tty' field in the 'stdout' case", + valid_types: &["boolean", "null"], + context: "the 'allow_tty' field in the 'stdout' case of output flags", }), } } else { Ok(false) }?; - Ok(OutputType::Stdout { allow_tty }) + Ok(Self::Stdout { allow_tty }) } - _ => Err(JsonOutputTypeError::InvalidType { + _ => Err(JsonSchemaError::InvalidType { val: o.clone(), - valid_types: &["boolean", "object"], - context: "the 'stdout' enum case", + valid_types: &["boolean", "object", "null"], + context: "the 'stdout' enum case of output flags", }), } } else if let Some(o) = o.get("file") { match o { /* {"file": } => {"file": {"path": , append: false}} */ - JsonValue::String(path) => Ok(OutputType::File { + JsonValue::String(path) => Ok(Self::File { path: path.into(), append: false, }), @@ -205,52 +252,91 @@ pub mod json_resource { let path: PathBuf = if let Some(path) = o.get("path") { match path { JsonValue::String(path) => Ok(path.into()), - _ => Err(JsonOutputTypeError::InvalidType { + _ => Err(JsonSchemaError::InvalidType { val: path.clone(), valid_types: &["string"], - context: "the 'path' field in the 'file' case", + context: "the 'path' field in the 'file' case of output flags", }), } } else { /* This *must* be provided, whereas "append" has a default. */ - Err(JsonOutputTypeError::InvalidObjectKeys { + Err(JsonSchemaError::InvalidObjectKeys { obj: o.clone(), expected_keys: &["path"], - context: "the 'file' enum case", + context: "the 'file' enum case of output flags", }) }?; let append: bool = if let Some(append) = o.get("append") { match append { JsonValue::Boolean(append) => Ok(*append), - _ => Err(JsonOutputTypeError::InvalidType { + JsonValue::Null => Ok(false), + _ => Err(JsonSchemaError::InvalidType { val: append.clone(), - valid_types: &["boolean"], - context: "the 'append' field in 'file' case", + valid_types: &["boolean", "null"], + context: + "the 'append' field in 'file' case of output flags", }), } } else { Ok(false) }?; - Ok(OutputType::File { path, append }) + Ok(Self::File { path, append }) } - _ => Err(JsonOutputTypeError::InvalidType { + _ => Err(JsonSchemaError::InvalidType { val: o.clone(), valid_types: &["string", "object"], - context: "the 'file' enum case", + context: "the 'file' enum case of output flags", }), } } else { - Err(JsonOutputTypeError::InvalidObjectKeys { + Err(JsonSchemaError::InvalidObjectKeys { obj: o, expected_keys: &["stdout", "file"], - context: "destructuring into 'file' and 'stdout' enum cases", + context: + "destructuring into 'file' and 'stdout' enum cases of output flags", }) } } - _ => Err(JsonOutputTypeError::InvalidType { + _ => Err(JsonSchemaError::InvalidType { val: v, - valid_types: &["string", "boolean", "object"], - context: "top-level value", + valid_types: &["string", "boolean", "object", "null"], + context: "top-level value for output flags", + }), + } + } + } + + impl SchemaResource for GlobalFlags { + type B = JsonBackend; + type SchemaParseError = JsonSchemaError; + + fn parse_schema<'a>( + v: ::Val<'a>, + ) -> Result { + match v { + JsonValue::Object(o) => { + let archive_comment: Option = if let Some(archive_comment) = + o.get("archive-comment") + { + match archive_comment { + JsonValue::String(archive_comment) => Ok(Some(archive_comment.into())), + JsonValue::Null => Ok(None), + _ => Err(JsonSchemaError::InvalidType { + val: archive_comment.clone(), + valid_types: &["string", "null"], + context: "the 'archive-comment' field in global flags", + }), + } + } else { + Ok(None) + }?; + Ok(Self { archive_comment }) + } + JsonValue::Null => Ok(Self::default()), + _ => Err(JsonSchemaError::InvalidType { + val: v.clone(), + valid_types: &["object", "null"], + context: "the top-level global flags object", }), } } diff --git a/cli/src/lib.rs b/cli/src/lib.rs index 22b16c75e..10cb0c0b4 100644 --- a/cli/src/lib.rs +++ b/cli/src/lib.rs @@ -172,6 +172,7 @@ pub mod driver { ZipCommand::Info(info) => info.do_main(err), ZipCommand::Extract(extract) => extract.do_main(err), ZipCommand::Compress(compress) => compress.do_main(err), + /* TODO: ZipCommand::Crawl! */ } } } diff --git a/cli/src/schema/values.rs b/cli/src/schema/values.rs index bc0acd5a7..511500a6b 100644 --- a/cli/src/schema/values.rs +++ b/cli/src/schema/values.rs @@ -21,6 +21,82 @@ pub trait NamedList { /* Obj(Vec<(String, Box)>), */ /* } */ +/* pub trait Schema {} */ + +/* pub enum Command { */ +/* /// Write a JSON object to stdout which contains all the file paths under */ +/* /// the top-level `paths`. */ +/* Crawl { */ +/* #[command(flatten)] */ +/* crawl: MedusaCrawl, */ +/* }, */ +/* /// Consume a JSON object from [`Self::Crawl`] over stdin and write those */ +/* /// files into a zip file at `output`. */ +/* Zip { */ +/* #[command(flatten)] */ +/* output: Output, */ +/* #[command(flatten)] */ +/* zip_options: ZipOutputOptions, */ +/* #[command(flatten)] */ +/* modifications: EntryModifications, */ +/* #[arg(long, value_enum, default_value_t)] */ +/* parallelism: Parallelism, */ +/* }, */ +/* /// Merge the content of several zip files into one. */ +/* Merge { */ +/* #[command(flatten)] */ +/* output: Output, */ +/* /// ??? */ +/* #[command(flatten)] */ +/* mtime_behavior: ModifiedTimeBehavior, */ +/* #[command(flatten)] */ +/* merge: MedusaMerge, */ +/* }, */ +/* /// Perform a `crawl` and then a `zip` on its output in memory. */ +/* CrawlZip { */ +/* #[command(flatten)] */ +/* crawl: MedusaCrawl, */ +/* #[command(flatten)] */ +/* output: Output, */ +/* #[command(flatten)] */ +/* zip_options: ZipOutputOptions, */ +/* #[command(flatten)] */ +/* modifications: EntryModifications, */ +/* #[arg(long, value_enum, default_value_t)] */ +/* parallelism: Parallelism, */ +/* }, */ +/* /// Perform a `zip` and then a `merge` without releasing the output file */ +/* /// handle. */ +/* ZipMerge { */ +/* #[command(flatten)] */ +/* output: Output, */ +/* #[command(flatten)] */ +/* zip_options: ZipOutputOptions, */ +/* #[command(flatten)] */ +/* modifications: EntryModifications, */ +/* #[arg(long, value_enum, default_value_t)] */ +/* parallelism: Parallelism, */ +/* #[command(flatten)] */ +/* merge: MedusaMerge, */ +/* }, */ +/* /// Perform `crawl`, then a `zip` on its output in memory, then a `merge` */ +/* /// into the same output file. */ +/* CrawlZipMerge { */ +/* #[command(flatten)] */ +/* crawl: MedusaCrawl, */ +/* #[command(flatten)] */ +/* output: Output, */ +/* #[command(flatten)] */ +/* zip_options: ZipOutputOptions, */ +/* #[command(flatten)] */ +/* modifications: EntryModifications, */ +/* #[arg(long, value_enum, default_value_t)] */ +/* parallelism: Parallelism, */ +/* #[command(flatten)] */ +/* merge: MedusaMerge, */ +/* }, */ +/* } */ + pub enum HydratedValue<'a> { Bool(bool), Str(&'a str), From ba2f42b2318508f0bf15f1ce468b58a0f4559df8 Mon Sep 17 00:00:00 2001 From: Danny McClanahan <1305167+cosmicexplorer@users.noreply.github.com> Date: Sat, 30 Nov 2024 20:08:25 -0500 Subject: [PATCH 19/31] add json output type tests --- cli/src/args/compress.rs | 4 +- cli/src/args/compress/resource.rs | 97 +++++++++++++++++++++++++++++++ 2 files changed, 99 insertions(+), 2 deletions(-) diff --git a/cli/src/args/compress.rs b/cli/src/args/compress.rs index 2a693eb05..920dd920b 100644 --- a/cli/src/args/compress.rs +++ b/cli/src/args/compress.rs @@ -40,7 +40,7 @@ pub enum CompressionArg { RecursiveDirPath(PathBuf), } -#[derive(Debug)] +#[derive(Debug, Clone, PartialEq, Eq, Hash)] pub enum OutputType { Stdout { allow_tty: bool }, File { path: PathBuf, append: bool }, @@ -54,7 +54,7 @@ impl Default for OutputType { pub mod resource; -#[derive(Debug)] +#[derive(Debug, Clone, PartialEq, Eq, Hash)] pub struct GlobalFlags { pub archive_comment: Option, } diff --git a/cli/src/args/compress/resource.rs b/cli/src/args/compress/resource.rs index 2ba949858..0757a0252 100644 --- a/cli/src/args/compress/resource.rs +++ b/cli/src/args/compress/resource.rs @@ -202,6 +202,10 @@ pub mod json_resource { match v { JsonValue::Null => Ok(Self::default()), /* => {"file": {"path": , "append": false}}} */ + JsonValue::Short(path) => Ok(Self::File { + path: path.as_str().into(), + append: false, + }), JsonValue::String(path) => Ok(Self::File { path: path.into(), append: false, @@ -243,6 +247,10 @@ pub mod json_resource { } else if let Some(o) = o.get("file") { match o { /* {"file": } => {"file": {"path": , append: false}} */ + JsonValue::Short(path) => Ok(Self::File { + path: path.as_str().into(), + append: false, + }), JsonValue::String(path) => Ok(Self::File { path: path.into(), append: false, @@ -251,6 +259,7 @@ pub mod json_resource { JsonValue::Object(o) => { let path: PathBuf = if let Some(path) = o.get("path") { match path { + JsonValue::Short(path) => Ok(path.as_str().into()), JsonValue::String(path) => Ok(path.into()), _ => Err(JsonSchemaError::InvalidType { val: path.clone(), @@ -319,6 +328,9 @@ pub mod json_resource { o.get("archive-comment") { match archive_comment { + JsonValue::Short(archive_comment) => { + Ok(Some(archive_comment.as_str().into())) + } JsonValue::String(archive_comment) => Ok(Some(archive_comment.into())), JsonValue::Null => Ok(None), _ => Err(JsonSchemaError::InvalidType { @@ -341,4 +353,89 @@ pub mod json_resource { } } } + + #[cfg(test)] + mod test { + use super::*; + + #[test] + fn parse_output_type() { + assert_eq!( + OutputType::Stdout { allow_tty: false }, + OutputType::default() + ); + + assert_eq!( + OutputType::Stdout { allow_tty: true }, + OutputType::parse_schema_str("true").unwrap(), + ); + assert_eq!( + OutputType::Stdout { allow_tty: false }, + OutputType::parse_schema_str("false").unwrap(), + ); + assert_eq!( + OutputType::default(), + OutputType::parse_schema_str("null").unwrap(), + ); + + assert_eq!( + OutputType::File { + path: "asdf".into(), + append: false + }, + OutputType::parse_schema_str("\"asdf\"").unwrap(), + ); + + assert_eq!( + OutputType::File { + path: "asdf".into(), + append: false + }, + OutputType::parse_schema_str("{\"file\": \"asdf\"}").unwrap(), + ); + assert_eq!( + OutputType::File { + path: "asdf".into(), + append: true + }, + OutputType::parse_schema_str("{\"file\": {\"path\": \"asdf\", \"append\": true}}") + .unwrap(), + ); + assert_eq!( + OutputType::File { + path: "asdf".into(), + append: false + }, + OutputType::parse_schema_str("{\"file\": {\"path\": \"asdf\", \"append\": false}}") + .unwrap(), + ); + } + + #[test] + fn parse_global_flags() { + assert_eq!( + GlobalFlags { + archive_comment: None + }, + GlobalFlags::default(), + ); + assert_eq!( + GlobalFlags::default(), + GlobalFlags::parse_schema_str("null").unwrap(), + ); + + assert_eq!( + GlobalFlags { + archive_comment: Some("aaaaasdf".into()), + }, + GlobalFlags::parse_schema_str("{\"archive-comment\": \"aaaaasdf\"}").unwrap(), + ); + assert_eq!( + GlobalFlags { + archive_comment: None, + }, + GlobalFlags::parse_schema_str("{\"archive-comment\": null}").unwrap(), + ); + } + } } From 243708a0a156fa10b00470b4845c9453936a00f4 Mon Sep 17 00:00:00 2001 From: Danny McClanahan <1305167+cosmicexplorer@users.noreply.github.com> Date: Sat, 30 Nov 2024 20:38:56 -0500 Subject: [PATCH 20/31] test argv and json serde separately --- cli/src/args.rs | 7 + cli/src/args/compress/resource.rs | 290 ++++++++++++++++++------------ 2 files changed, 184 insertions(+), 113 deletions(-) diff --git a/cli/src/args.rs b/cli/src/args.rs index a2017d659..e8d4cc807 100644 --- a/cli/src/args.rs +++ b/cli/src/args.rs @@ -163,6 +163,13 @@ pub mod resource { pub trait ArgvResource: Resource + Sized { type ArgvParseError; fn parse_argv(argv: &mut VecDeque) -> Result; + + fn parse_argv_from( + argv: impl IntoIterator, + ) -> Result { + let mut argv: VecDeque = argv.into_iter().map(|s| s.into()).collect(); + Self::parse_argv(&mut argv) + } } pub trait SchemaResource: Resource + Sized { diff --git a/cli/src/args/compress/resource.rs b/cli/src/args/compress/resource.rs index 0757a0252..3913a4eb7 100644 --- a/cli/src/args/compress/resource.rs +++ b/cli/src/args/compress/resource.rs @@ -1,146 +1,210 @@ use super::{GlobalFlags, OutputType}; use crate::args::resource::*; -use std::{collections::VecDeque, ffi::OsString, path::PathBuf}; - -pub enum OutputTypeError { - ArgWith(&'static str, String), - ArgTwice(&'static str), - NoValFor(&'static str), - ValArgTwice { - arg: &'static str, - prev: String, - new: String, - }, -} - impl Resource for OutputType { const ID: &'static str = "OUTPUT-FLAGS"; } -impl ArgvResource for OutputType { - type ArgvParseError = OutputTypeError; - fn parse_argv(argv: &mut VecDeque) -> Result { - let mut allow_stdout: bool = false; - let mut append_to_output_path: bool = false; - let mut output_path: Option = None; - - while let Some(arg) = argv.pop_front() { - match arg.as_encoded_bytes() { - b"--stdout" => { - if let Some(output_path) = output_path.take() { - return Err(OutputTypeError::ArgWith( - "--stdout", - format!("output file {output_path:?}"), - )); - } - if append_to_output_path { - return Err(OutputTypeError::ArgWith("--stdout", "--append".to_string())); +impl Resource for GlobalFlags { + const ID: &'static str = "GLOBAL-FLAGS"; +} + +pub mod argv { + use super::{GlobalFlags, OutputType}; + use crate::args::resource::ArgvResource; + use std::{collections::VecDeque, ffi::OsString, path::PathBuf}; + + #[derive(Debug)] + pub enum OutputTypeError { + ArgWith(&'static str, String), + ArgTwice(&'static str), + NoValFor(&'static str), + ValArgTwice { + arg: &'static str, + prev: String, + new: String, + }, + } + + impl ArgvResource for OutputType { + type ArgvParseError = OutputTypeError; + fn parse_argv(argv: &mut VecDeque) -> Result { + let mut allow_stdout: bool = false; + let mut append_to_output_path: bool = false; + let mut output_path: Option = None; + + while let Some(arg) = argv.pop_front() { + match arg.as_encoded_bytes() { + b"--stdout" => { + if let Some(output_path) = output_path.take() { + return Err(OutputTypeError::ArgWith( + "--stdout", + format!("output file {output_path:?}"), + )); + } + if append_to_output_path { + return Err(OutputTypeError::ArgWith( + "--stdout", + "--append".to_string(), + )); + } + if allow_stdout { + return Err(OutputTypeError::ArgTwice("--stdout")); + } + allow_stdout = true; } - if allow_stdout { - return Err(OutputTypeError::ArgTwice("--stdout")); + b"--append" => { + if append_to_output_path { + return Err(OutputTypeError::ArgTwice("--append")); + } + if allow_stdout { + return Err(OutputTypeError::ArgWith( + "--append", + "--stdout".to_string(), + )); + } + append_to_output_path = true; } - allow_stdout = true; - } - b"--append" => { - if append_to_output_path { - return Err(OutputTypeError::ArgTwice("--append")); + b"-o" | b"--output-file" => { + let new_path = argv + .pop_front() + .map(PathBuf::from) + .ok_or_else(|| OutputTypeError::NoValFor("-o/--output-file"))?; + if let Some(prev_path) = output_path.take() { + return Err(OutputTypeError::ValArgTwice { + arg: "-o/--output-file", + prev: format!("{prev_path:?}"), + new: format!("{new_path:?}"), + }); + } + if allow_stdout { + return Err(OutputTypeError::ArgWith( + "--stdout", + "-o/--output-file".to_string(), + )); + } + output_path = Some(new_path); } - if allow_stdout { - return Err(OutputTypeError::ArgWith("--append", "--stdout".to_string())); + _ => { + argv.push_front(arg); + break; } - append_to_output_path = true; } - b"-o" | b"--output-file" => { - let new_path = argv - .pop_front() - .map(PathBuf::from) - .ok_or_else(|| OutputTypeError::NoValFor("-o/--output-file"))?; - if let Some(prev_path) = output_path.take() { - return Err(OutputTypeError::ValArgTwice { - arg: "-o/--output-file", - prev: format!("{prev_path:?}"), - new: format!("{new_path:?}"), - }); - } - if allow_stdout { - return Err(OutputTypeError::ArgWith( - "--stdout", - "-o/--output-file".to_string(), - )); - } - output_path = Some(new_path); + } + + Ok(if let Some(output_path) = output_path { + Self::File { + path: output_path, + append: append_to_output_path, } - _ => { - argv.push_front(arg); - break; + } else { + Self::Stdout { + allow_tty: allow_stdout, } - } + }) } - - Ok(if let Some(output_path) = output_path { - Self::File { - path: output_path, - append: append_to_output_path, - } - } else { - Self::Stdout { - allow_tty: allow_stdout, - } - }) } -} -pub enum GlobalFlagsError { - NoValFor(&'static str), - ValArgTwice { - arg: &'static str, - prev: String, - new: String, - }, -} + #[derive(Debug)] + pub enum GlobalFlagsError { + NoValFor(&'static str), + ValArgTwice { + arg: &'static str, + prev: String, + new: String, + }, + } -impl Resource for GlobalFlags { - const ID: &'static str = "GLOBAL-FLAGS"; -} + impl ArgvResource for GlobalFlags { + type ArgvParseError = GlobalFlagsError; + fn parse_argv(argv: &mut VecDeque) -> Result { + let mut archive_comment: Option = None; -impl ArgvResource for GlobalFlags { - type ArgvParseError = GlobalFlagsError; - fn parse_argv(argv: &mut VecDeque) -> Result { - let mut archive_comment: Option = None; - - while let Some(arg) = argv.pop_front() { - match arg.as_encoded_bytes() { - b"--archive-comment" => { - let new_comment = argv - .pop_front() - .ok_or_else(|| GlobalFlagsError::NoValFor("--archive-comment"))?; - if let Some(prev_comment) = archive_comment.take() { - return Err(GlobalFlagsError::ValArgTwice { - arg: "--archive-comment", - prev: format!("{prev_comment:?}"), - new: format!("{new_comment:?}"), - }); + while let Some(arg) = argv.pop_front() { + match arg.as_encoded_bytes() { + b"--archive-comment" => { + let new_comment = argv + .pop_front() + .ok_or_else(|| GlobalFlagsError::NoValFor("--archive-comment"))?; + if let Some(prev_comment) = archive_comment.take() { + return Err(GlobalFlagsError::ValArgTwice { + arg: "--archive-comment", + prev: format!("{prev_comment:?}"), + new: format!("{new_comment:?}"), + }); + } + archive_comment = Some(new_comment); + } + _ => { + argv.push_front(arg); + break; } - archive_comment = Some(new_comment); - } - _ => { - argv.push_front(arg); - break; } } + + Ok(Self { archive_comment }) } + } + + #[cfg(test)] + mod test { + use super::*; + + #[test] + fn parse_output_type() { + assert_eq!( + OutputType::default(), + OutputType::parse_argv_from([]).unwrap() + ); + + assert_eq!( + OutputType::Stdout { allow_tty: true }, + OutputType::parse_argv_from(["--stdout".into()]).unwrap() + ); + + assert_eq!( + OutputType::File { + path: "asdf".into(), + append: false + }, + OutputType::parse_argv_from(["-o".into(), "asdf".into()]).unwrap() + ); + assert_eq!( + OutputType::File { + path: "asdf".into(), + append: true + }, + OutputType::parse_argv_from(["--append".into(), "-o".into(), "asdf".into()]) + .unwrap() + ); + } + + #[test] + fn parse_global_flags() { + assert_eq!( + GlobalFlags::default(), + GlobalFlags::parse_argv_from([]).unwrap(), + ); - Ok(Self { archive_comment }) + assert_eq!( + GlobalFlags { + archive_comment: Some("asdf".into()) + }, + GlobalFlags::parse_argv_from(["--archive-comment".into(), "asdf".into()]).unwrap() + ); + } } } #[cfg(feature = "json")] pub mod json_resource { - use super::*; - use crate::schema::backends::{json_backend::JsonBackend, Backend}; + use super::{GlobalFlags, OutputType}; + use crate::{ + args::resource::SchemaResource, + schema::backends::{json_backend::JsonBackend, Backend}, + }; - use std::{error, fmt}; + use std::{error, ffi::OsString, fmt, path::PathBuf}; use json::{object::Object as JsonObject, JsonValue}; From ab1a65b5400caf7b98b02dbe8d6e06e612480cfc Mon Sep 17 00:00:00 2001 From: Danny McClanahan <1305167+cosmicexplorer@users.noreply.github.com> Date: Sat, 30 Nov 2024 23:18:22 -0500 Subject: [PATCH 21/31] save progress --- cli/src/args.rs | 11 +- cli/src/args/compress.rs | 40 ++- cli/src/args/compress/resource.rs | 397 +++++++++++++++++++++++++++++- cli/src/compress.rs | 207 ---------------- 4 files changed, 439 insertions(+), 216 deletions(-) diff --git a/cli/src/args.rs b/cli/src/args.rs index e8d4cc807..d0ac4861b 100644 --- a/cli/src/args.rs +++ b/cli/src/args.rs @@ -3,6 +3,7 @@ use std::{collections::VecDeque, ffi::OsString, fmt, sync::OnceLock}; #[derive(Debug)] pub enum ArgParseError { StdoutMessage(String), + /* FIXME: give these errors much more structure!! */ StderrMessage(String), } @@ -172,6 +173,8 @@ pub mod resource { } } + pub trait PositionalArgvResource: ArgvResource {} + pub trait SchemaResource: Resource + Sized { type B: Backend; type SchemaParseError; @@ -183,12 +186,14 @@ pub mod resource { s: ::Str<'a>, ) -> Result::Err<'a>, Self::SchemaParseError>> { - let v = ::parse(s).map_err(|e| WrapperError::In(e))?; - Ok(Self::parse_schema(v).map_err(|e| WrapperError::Out(e))?) + let v = ::parse(s).map_err(WrapperError::In)?; + Ok(Self::parse_schema(v).map_err(WrapperError::Out)?) } } - pub trait CommandInputs {} + pub trait CommandSpec { + /* fn resources() -> Vec<>; */ + } } pub trait CommandFormat: fmt::Debug { diff --git a/cli/src/args/compress.rs b/cli/src/args/compress.rs index 920dd920b..7e6e3cf15 100644 --- a/cli/src/args/compress.rs +++ b/cli/src/args/compress.rs @@ -1,7 +1,11 @@ use super::{ArgParseError, CommandFormat}; +use zip::{write::SimpleFileOptions, CompressionMethod}; + use std::{collections::VecDeque, ffi::OsString, num::ParseIntError, path::PathBuf}; +pub mod resource; + #[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord)] pub enum CompressionMethodArg { Stored, @@ -52,8 +56,6 @@ impl Default for OutputType { } } -pub mod resource; - #[derive(Debug, Clone, PartialEq, Eq, Hash)] pub struct GlobalFlags { pub archive_comment: Option, @@ -67,6 +69,40 @@ impl Default for GlobalFlags { } } +#[derive(Debug, Clone)] +pub enum EntrySpec { + Dir { + name: String, + }, + Immediate { + name: String, + data: OsString, + symlink_flag: bool, + }, + File { + name: Option, + path: PathBuf, + symlink_flag: bool, + }, + RecDir { + name: Option, + path: PathBuf, + }, +} + +#[derive(Debug, Clone)] +pub enum ModificationOperation { + CreateEntry { + options: SimpleFileOptions, + spec: EntrySpec, + }, +} + +#[derive(Debug, Default, Clone)] +pub struct ModificationSequence { + pub operations: Vec, +} + #[derive(Debug)] pub struct Compress { pub output: OutputType, diff --git a/cli/src/args/compress/resource.rs b/cli/src/args/compress/resource.rs index 3913a4eb7..704f2ef55 100644 --- a/cli/src/args/compress/resource.rs +++ b/cli/src/args/compress/resource.rs @@ -1,4 +1,4 @@ -use super::{GlobalFlags, OutputType}; +use super::*; use crate::args::resource::*; impl Resource for OutputType { @@ -9,10 +9,14 @@ impl Resource for GlobalFlags { const ID: &'static str = "GLOBAL-FLAGS"; } +impl Resource for ModificationSequence { + const ID: &'static str = "MOD-SEQ"; +} + pub mod argv { - use super::{GlobalFlags, OutputType}; - use crate::args::resource::ArgvResource; - use std::{collections::VecDeque, ffi::OsString, path::PathBuf}; + use super::*; + + use std::{collections::VecDeque, ffi::OsString, fmt, path::PathBuf}; #[derive(Debug)] pub enum OutputTypeError { @@ -146,6 +150,391 @@ pub mod argv { } } + pub mod compression_args { + use super::*; + use crate::{schema::transformers::WrapperError, CommandError, WrapCommandErr}; + + use zip::{unstable::path_to_string, write::SimpleFileOptions, CompressionMethod}; + + use std::mem; + + #[derive(Debug)] + pub enum ModificationSequenceError { + NoValFor(&'static str), + Unrecognized { + context: &'static str, + value: String, + }, + ValidationFailed { + codec: &'static str, + context: &'static str, + value: String, + }, + } + + struct CompressionArgs { + pub args: Vec, + pub positional_paths: Vec, + } + + impl CompressionArgs { + fn initial_options() -> SimpleFileOptions { + SimpleFileOptions::default() + .compression_method(CompressionMethod::Deflated) + .large_file(false) + } + + fn parse_compression_method( + name: OsString, + ) -> Result { + Ok(match name.as_encoded_bytes() { + b"stored" => CompressionArg::CompressionMethod(CompressionMethodArg::Stored), + b"deflate" => CompressionArg::CompressionMethod(CompressionMethodArg::Deflate), + #[cfg(feature = "deflate64")] + b"deflate64" => { + CompressionArg::CompressionMethod(CompressionMethodArg::Deflate64) + } + #[cfg(feature = "bzip2")] + b"bzip2" => CompressionArg::CompressionMethod(CompressionMethodArg::Bzip2), + #[cfg(feature = "zstd")] + b"zstd" => CompressionArg::CompressionMethod(CompressionMethodArg::Zstd), + _ => { + return Err(ModificationSequenceError::Unrecognized { + context: "compression method", + value: format!("{name:?}"), + }) + } + }) + } + + fn parse_unicode( + context: &'static str, + arg: OsString, + ) -> Result { + arg.into_string() + .map_err(|arg| ModificationSequenceError::ValidationFailed { + codec: "invalid unicode", + context, + value: format!("{arg:?}"), + }) + } + + fn parse_i64( + context: &'static str, + arg: String, + ) -> Result { + arg.parse::() + .map_err(|e| ModificationSequenceError::ValidationFailed { + codec: "failed to parse integer", + context, + value: format!("{e}"), + }) + } + + fn parse_compression_level( + level: OsString, + ) -> Result { + let level = Self::parse_unicode("compression level", level)?; + let level = Self::parse_i64("compression level", level)?; + if (0..=24).contains(&level) { + Ok(CompressionArg::Level(CompressionLevel(level))) + } else { + Err(ModificationSequenceError::ValidationFailed { + codec: "integer was not between 0 and 24", + context: "compression level", + value: format!("{level}"), + }) + } + } + + fn parse_mode(mode: OsString) -> Result { + let mode = Self::parse_unicode("mode", mode)?; + let mode = UnixPermissions::parse(&mode).map_err(|e| { + ModificationSequenceError::ValidationFailed { + codec: "failed to parse octal integer", + context: "compression mode", + value: format!("{e}"), + } + })?; + Ok(CompressionArg::UnixPermissions(mode)) + } + + fn parse_large_file( + large_file: OsString, + ) -> Result { + Ok(match large_file.as_encoded_bytes() { + b"true" => CompressionArg::LargeFile(true), + b"false" => CompressionArg::LargeFile(false), + _ => { + return Err(ModificationSequenceError::Unrecognized { + context: "value for --large-file", + value: format!("{large_file:?}"), + }) + } + }) + } + + pub fn parse_argv( + argv: &mut VecDeque, + ) -> Result { + let mut args: Vec = Vec::new(); + let mut positional_paths: Vec = Vec::new(); + + while let Some(arg) = argv.pop_front() { + let arg = match arg.as_encoded_bytes() { + /* Attributes */ + b"-c" | b"--compression-method" => match argv.pop_front() { + None => Err(ModificationSequenceError::NoValFor( + "-c/--compression-method", + )), + Some(name) => Self::parse_compression_method(name), + }, + b"-l" | b"--compression-level" => match argv.pop_front() { + None => Err(ModificationSequenceError::NoValFor( + "-l/--compression-level", + )), + Some(level) => Self::parse_compression_level(level), + }, + b"-m" | b"--mode" => match argv.pop_front() { + None => Err(ModificationSequenceError::NoValFor("-m/--mode")), + Some(mode) => Self::parse_mode(mode), + }, + b"--large-file" => match argv.pop_front() { + None => Err(ModificationSequenceError::NoValFor("--large-file")), + Some(large_file) => Self::parse_large_file(large_file), + }, + + /* Data */ + b"-n" | b"--name" => match argv.pop_front() { + None => Err(ModificationSequenceError::NoValFor("-n/--name")), + Some(name) => { + Self::parse_unicode("name", name).map(CompressionArg::Name) + } + }, + b"-s" | b"--symlink" => Ok(CompressionArg::Symlink), + b"-d" | b"--dir" => Ok(CompressionArg::Dir), + b"-i" | b"--immediate" => match argv.pop_front() { + None => Err(ModificationSequenceError::NoValFor("-i/--immediate")), + Some(data) => Ok(CompressionArg::Immediate(data)), + }, + b"-f" | b"--file" => match argv.pop_front() { + None => Err(ModificationSequenceError::NoValFor("-f/--file")), + Some(file) => Ok(CompressionArg::FilePath(file.into())), + }, + b"-r" | b"--recursive-dir" => match argv.pop_front() { + None => Err(ModificationSequenceError::NoValFor("-r/--recursive-dir")), + Some(dir) => Ok(CompressionArg::RecursiveDirPath(dir.into())), + }, + + /* Transition to positional args */ + b"--" => break, + arg_bytes => { + if arg_bytes.starts_with(b"-") { + Err(ModificationSequenceError::Unrecognized { + context: "flag", + value: format!("{arg:?}"), + }) + } else { + argv.push_front(arg); + break; + } + } + }?; + args.push(arg); + } + + positional_paths.extend(mem::take(argv).into_iter().map(PathBuf::from)); + + Ok(Self { + args, + positional_paths, + }) + } + + fn interpret_entry_path(path: PathBuf) -> Result { + let file_type = std::fs::symlink_metadata(&path) + .wrap_err_with(|| format!("failed to read metadata from path {path:?}"))? + .file_type(); + Ok(if file_type.is_dir() { + EntrySpec::RecDir { name: None, path } + } else { + EntrySpec::File { + name: None, + path, + symlink_flag: file_type.is_symlink(), + } + }) + } + + pub fn build_mod_seq( + self, + /* mut err: impl Write, */ + ) -> Result { + let Self { + args, + positional_paths, + } = self; + + let mut operations: Vec = Vec::new(); + + let mut options = Self::initial_options(); + + let mut last_name: Option = None; + let mut symlink_flag: bool = false; + + for arg in args.into_iter() { + match arg { + /* attributes: */ + CompressionArg::CompressionMethod(method) => { + let method = match method { + CompressionMethodArg::Stored => CompressionMethod::Stored, + CompressionMethodArg::Deflate => CompressionMethod::Deflated, + #[cfg(feature = "deflate64")] + CompressionMethodArg::Deflate64 => CompressionMethod::Deflate64, + #[cfg(feature = "bzip2")] + CompressionMethodArg::Bzip2 => CompressionMethod::Bzip2, + #[cfg(feature = "zstd")] + CompressionMethodArg::Zstd => CompressionMethod::Zstd, + }; + /* writeln!(err, "setting compression method {method:?}").unwrap(); */ + options = options.compression_method(method); + } + CompressionArg::Level(CompressionLevel(level)) => { + /* writeln!(err, "setting compression level {level:?}").unwrap(); */ + options = options.compression_level(Some(level)); + } + CompressionArg::UnixPermissions(UnixPermissions(mode)) => { + /* writeln!(err, "setting file mode {mode:#o}").unwrap(); */ + options = options.unix_permissions(mode); + } + CompressionArg::LargeFile(large_file) => { + /* writeln!(err, "setting large file flag to {large_file:?}").unwrap(); */ + options = options.large_file(large_file); + } + CompressionArg::Name(name) => { + /* writeln!(err, "setting name of next entry to {name:?}").unwrap(); */ + if let Some(last_name) = last_name { + return Err(CommandError::InvalidArg(format!( + "got two names before an entry: {last_name} and {name}" + ))); + } + last_name = Some(name); + } + CompressionArg::Symlink => { + /* writeln!(err, "setting symlink flag for next entry").unwrap(); */ + if symlink_flag { + /* TODO: make this a warning? */ + return Err(CommandError::InvalidArg( + "symlink flag provided twice before entry".to_string(), + )); + } + symlink_flag = true; + } + + /* new operations: */ + CompressionArg::Dir => { + let last_name = last_name.take(); + let symlink_flag = mem::replace(&mut symlink_flag, false); + + /* writeln!(err, "writing dir entry").unwrap(); */ + if symlink_flag { + return Err(CommandError::InvalidArg( + "symlink flag provided before dir entry".to_string(), + )); + } + let name = last_name.ok_or_else(|| { + CommandError::InvalidArg( + "no name provided before dir entry".to_string(), + ) + })?; + operations.push(ModificationOperation::CreateEntry { + options, + spec: EntrySpec::Dir { name }, + }); + } + CompressionArg::Immediate(data) => { + let last_name = last_name.take(); + let symlink_flag = mem::replace(&mut symlink_flag, false); + + let name = last_name.ok_or_else(|| { + CommandError::InvalidArg(format!( + "no name provided for immediate data {data:?}" + )) + })?; + operations.push(ModificationOperation::CreateEntry { + options, + spec: EntrySpec::Immediate { + name, + data, + symlink_flag, + }, + }); + } + CompressionArg::FilePath(path) => { + let last_name = last_name.take(); + let symlink_flag = mem::replace(&mut symlink_flag, false); + + let name = last_name.unwrap_or_else(|| path_to_string(&path).into()); + operations.push(ModificationOperation::CreateEntry { + options, + spec: EntrySpec::File { + name: Some(name), + path, + symlink_flag, + }, + }); + } + CompressionArg::RecursiveDirPath(path) => { + let last_name = last_name.take(); + let symlink_flag = mem::replace(&mut symlink_flag, false); + + if symlink_flag { + return Err(CommandError::InvalidArg( + "symlink flag provided before recursive dir entry".to_string(), + )); + } + + operations.push(ModificationOperation::CreateEntry { + options, + spec: EntrySpec::RecDir { + name: last_name, + path, + }, + }); + } + } + } + if symlink_flag { + return Err(CommandError::InvalidArg( + "symlink flag remaining after all entry flags processed".to_string(), + )); + } + if let Some(last_name) = last_name { + return Err(CommandError::InvalidArg(format!( + "name {last_name} remaining after all entry flags processed" + ))); + } + + for p in positional_paths.into_iter() { + operations.push(ModificationOperation::CreateEntry { + options, + spec: Self::interpret_entry_path(p)?, + }); + } + Ok(ModificationSequence { operations }) + } + } + + impl ArgvResource for ModificationSequence { + type ArgvParseError = WrapperError; + fn parse_argv(argv: &mut VecDeque) -> Result { + let compression_args = + CompressionArgs::parse_argv(argv).map_err(WrapperError::In)?; + compression_args.build_mod_seq().map_err(WrapperError::Out) + } + } + } + #[cfg(test)] mod test { use super::*; diff --git a/cli/src/compress.rs b/cli/src/compress.rs index 0bbef6c8f..2becb632e 100644 --- a/cli/src/compress.rs +++ b/cli/src/compress.rs @@ -14,43 +14,7 @@ use zip::{ use crate::{args::compress::*, CommandError, OutputHandle, WrapCommandErr}; -#[derive(Debug, Clone)] -enum EntrySpec { - Dir { - name: String, - }, - Immediate { - name: String, - data: OsString, - symlink_flag: bool, - }, - File { - name: Option, - path: PathBuf, - symlink_flag: bool, - }, - RecDir { - name: Option, - path: PathBuf, - }, -} - impl EntrySpec { - pub fn interpret_entry_path(path: PathBuf) -> Result { - let file_type = fs::symlink_metadata(&path) - .wrap_err_with(|| format!("failed to read metadata from path {}", path.display()))? - .file_type(); - Ok(if file_type.is_dir() { - Self::RecDir { name: None, path } - } else { - Self::File { - name: None, - path, - symlink_flag: file_type.is_symlink(), - } - }) - } - pub fn create_entry( self, writer: &mut ZipWriter, @@ -185,14 +149,6 @@ impl EntrySpec { } } -#[derive(Debug, Clone)] -pub enum ModificationOperation { - CreateEntry { - options: SimpleFileOptions, - spec: EntrySpec, - }, -} - impl ModificationOperation { pub fn invoke( self, @@ -205,170 +161,7 @@ impl ModificationOperation { } } -#[derive(Debug, Default, Clone)] -pub struct ModificationSequence { - pub operations: Vec, -} - impl ModificationSequence { - fn initial_options() -> SimpleFileOptions { - SimpleFileOptions::default() - .compression_method(CompressionMethod::Deflated) - .large_file(false) - } - - pub fn from_args( - args: Vec, - positional_paths: Vec, - mut err: impl Write, - ) -> Result { - let mut operations: Vec = Vec::new(); - - let mut options = Self::initial_options(); - - let mut last_name: Option = None; - let mut symlink_flag: bool = false; - - for arg in args.into_iter() { - match arg { - /* attributes: */ - CompressionArg::CompressionMethod(method) => { - let method = match method { - CompressionMethodArg::Stored => CompressionMethod::Stored, - CompressionMethodArg::Deflate => CompressionMethod::Deflated, - #[cfg(feature = "deflate64")] - CompressionMethodArg::Deflate64 => CompressionMethod::Deflate64, - #[cfg(feature = "bzip2")] - CompressionMethodArg::Bzip2 => CompressionMethod::Bzip2, - #[cfg(feature = "zstd")] - CompressionMethodArg::Zstd => CompressionMethod::Zstd, - }; - writeln!(err, "setting compression method {method:?}").unwrap(); - options = options.compression_method(method); - } - CompressionArg::Level(CompressionLevel(level)) => { - writeln!(err, "setting compression level {level:?}").unwrap(); - options = options.compression_level(Some(level)); - } - CompressionArg::UnixPermissions(UnixPermissions(mode)) => { - writeln!(err, "setting file mode {mode:#o}").unwrap(); - options = options.unix_permissions(mode); - } - CompressionArg::LargeFile(large_file) => { - writeln!(err, "setting large file flag to {large_file:?}").unwrap(); - options = options.large_file(large_file); - } - CompressionArg::Name(name) => { - writeln!(err, "setting name of next entry to {name:?}").unwrap(); - if let Some(last_name) = last_name { - return Err(CommandError::InvalidArg(format!( - "got two names before an entry: {last_name} and {name}" - ))); - } - last_name = Some(name); - } - CompressionArg::Symlink => { - writeln!(err, "setting symlink flag for next entry").unwrap(); - if symlink_flag { - /* TODO: make this a warning? */ - return Err(CommandError::InvalidArg( - "symlink flag provided twice before entry".to_string(), - )); - } - symlink_flag = true; - } - - /* new operations: */ - CompressionArg::Dir => { - let last_name = last_name.take(); - let symlink_flag = mem::replace(&mut symlink_flag, false); - - writeln!(err, "writing dir entry").unwrap(); - if symlink_flag { - return Err(CommandError::InvalidArg( - "symlink flag provided before dir entry".to_string(), - )); - } - let name = last_name.ok_or_else(|| { - CommandError::InvalidArg("no name provided before dir entry".to_string()) - })?; - operations.push(ModificationOperation::CreateEntry { - options, - spec: EntrySpec::Dir { name }, - }); - } - CompressionArg::Immediate(data) => { - let last_name = last_name.take(); - let symlink_flag = mem::replace(&mut symlink_flag, false); - - let name = last_name.ok_or_else(|| { - CommandError::InvalidArg(format!( - "no name provided for immediate data {data:?}" - )) - })?; - operations.push(ModificationOperation::CreateEntry { - options, - spec: EntrySpec::Immediate { - name, - data, - symlink_flag, - }, - }); - } - CompressionArg::FilePath(path) => { - let last_name = last_name.take(); - let symlink_flag = mem::replace(&mut symlink_flag, false); - - let name = last_name.unwrap_or_else(|| path_to_string(&path).into()); - operations.push(ModificationOperation::CreateEntry { - options, - spec: EntrySpec::File { - name: Some(name), - path, - symlink_flag, - }, - }); - } - CompressionArg::RecursiveDirPath(path) => { - let last_name = last_name.take(); - let symlink_flag = mem::replace(&mut symlink_flag, false); - - if symlink_flag { - return Err(CommandError::InvalidArg( - "symlink flag provided before recursive dir entry".to_string(), - )); - } - - operations.push(ModificationOperation::CreateEntry { - options, - spec: EntrySpec::RecDir { - name: last_name, - path, - }, - }); - } - } - } - if symlink_flag { - return Err(CommandError::InvalidArg( - "symlink flag remaining after all entry flags processed".to_string(), - )); - } - if let Some(last_name) = last_name { - return Err(CommandError::InvalidArg(format!( - "name {last_name} remaining after all entry flags processed" - ))); - } - - for p in positional_paths.into_iter() { - operations.push(ModificationOperation::CreateEntry { - options, - spec: EntrySpec::interpret_entry_path(p)?, - }); - } - Ok(Self { operations }) - } - pub fn invoke( self, writer: &mut ZipWriter, From b08bf0291da7b57f204e730abe821dae27fde62e Mon Sep 17 00:00:00 2001 From: Danny McClanahan <1305167+cosmicexplorer@users.noreply.github.com> Date: Sat, 30 Nov 2024 23:27:00 -0500 Subject: [PATCH 22/31] add Error impls --- cli/src/args/compress/resource.rs | 66 ++++++++++++++++++++++++++++++- 1 file changed, 65 insertions(+), 1 deletion(-) diff --git a/cli/src/args/compress/resource.rs b/cli/src/args/compress/resource.rs index 704f2ef55..49f4f701b 100644 --- a/cli/src/args/compress/resource.rs +++ b/cli/src/args/compress/resource.rs @@ -16,7 +16,7 @@ impl Resource for ModificationSequence { pub mod argv { use super::*; - use std::{collections::VecDeque, ffi::OsString, fmt, path::PathBuf}; + use std::{collections::VecDeque, error, ffi::OsString, fmt, path::PathBuf}; #[derive(Debug)] pub enum OutputTypeError { @@ -30,6 +30,30 @@ pub mod argv { }, } + impl fmt::Display for OutputTypeError { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + Self::ArgWith(arg_name, other_entity) => { + write!(f, "{arg_name} is mutually exclusive with {other_entity}") + } + Self::ArgTwice(arg_name) => { + write!(f, "{arg_name} provided twice") + } + Self::NoValFor(arg_name) => { + write!(f, "no value provided for {arg_name}") + } + Self::ValArgTwice { arg, prev, new } => { + write!( + f, + "value provided twice for argument {arg}. prev was: {prev}, new was {new}" + ) + } + } + } + } + + impl error::Error for OutputTypeError {} + impl ArgvResource for OutputType { type ArgvParseError = OutputTypeError; fn parse_argv(argv: &mut VecDeque) -> Result { @@ -119,6 +143,24 @@ pub mod argv { }, } + impl fmt::Display for GlobalFlagsError { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + Self::NoValFor(arg_name) => { + write!(f, "no value provided for {arg_name}") + } + Self::ValArgTwice { arg, prev, new } => { + write!( + f, + "value provided twice for argument {arg}. prev was: {prev}, new was {new}" + ) + } + } + } + } + + impl error::Error for GlobalFlagsError {} + impl ArgvResource for GlobalFlags { type ArgvParseError = GlobalFlagsError; fn parse_argv(argv: &mut VecDeque) -> Result { @@ -172,6 +214,28 @@ pub mod argv { }, } + impl fmt::Display for ModificationSequenceError { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + Self::NoValFor(arg_name) => { + write!(f, "no value provided for {arg_name}") + } + Self::Unrecognized { context, value } => { + write!(f, "unrecognized {context}: {value}") + } + Self::ValidationFailed { + codec, + context, + value, + } => { + write!(f, "{codec} for {context}: {value}") + } + } + } + } + + impl error::Error for ModificationSequenceError {} + struct CompressionArgs { pub args: Vec, pub positional_paths: Vec, From 2eb7bb735d6c30276e86c1155e71fc6f428b3f9d Mon Sep 17 00:00:00 2001 From: Danny McClanahan <1305167+cosmicexplorer@users.noreply.github.com> Date: Sat, 30 Nov 2024 23:57:15 -0500 Subject: [PATCH 23/31] [UNNECESSARY] make FileOptions hashable! --- src/types.rs | 2 +- src/write.rs | 17 ++++++++++++++++- 2 files changed, 17 insertions(+), 2 deletions(-) diff --git a/src/types.rs b/src/types.rs index de22f6055..3b43c3903 100644 --- a/src/types.rs +++ b/src/types.rs @@ -1051,7 +1051,7 @@ pub enum AesVendorVersion { } /// AES variant used. -#[derive(Copy, Clone, Debug, Eq, PartialEq)] +#[derive(Copy, Clone, Debug, Eq, PartialEq, Hash)] #[cfg_attr(fuzzing, derive(arbitrary::Arbitrary))] #[repr(u8)] pub enum AesMode { diff --git a/src/write.rs b/src/write.rs index c4d851a13..8cd8d5f1b 100644 --- a/src/write.rs +++ b/src/write.rs @@ -20,6 +20,7 @@ use indexmap::IndexMap; use std::borrow::ToOwned; use std::default::Default; use std::fmt::{Debug, Formatter}; +use std::hash; use std::io; use std::io::prelude::*; use std::io::Cursor; @@ -232,6 +233,20 @@ pub(crate) enum EncryptWith<'k> { ZipCrypto(ZipCryptoKeys, PhantomData<&'k ()>), } +impl hash::Hash for EncryptWith<'_> { + fn hash(&self, state: &mut H) { + match self { + Self::Aes {mode, password} => { + mode.hash(state); + password.hash(state); + } + Self::ZipCrypto(keys, _ph) => { + keys.hash(state); + } + } + } +} + #[cfg(fuzzing)] impl<'a> arbitrary::Arbitrary<'a> for EncryptWith<'a> { fn arbitrary(u: &mut arbitrary::Unstructured<'a>) -> arbitrary::Result { @@ -252,7 +267,7 @@ impl<'a> arbitrary::Arbitrary<'a> for EncryptWith<'a> { /// Metadata for a file to be written /* TODO: add accessors for this data as well so options can be introspected! */ -#[derive(Clone, Debug, Copy, Eq, PartialEq)] +#[derive(Clone, Debug, Copy, Eq, PartialEq, Hash)] pub struct FileOptions<'k, T: FileOptionExtension> { pub(crate) compression_method: CompressionMethod, pub(crate) compression_level: Option, From 2bf12b15a6451f5f89e4cf8e983a55a9e2f3b0a3 Mon Sep 17 00:00:00 2001 From: Danny McClanahan <1305167+cosmicexplorer@users.noreply.github.com> Date: Sat, 30 Nov 2024 23:57:58 -0500 Subject: [PATCH 24/31] make other things hash and test them --- cli/src/args.rs | 8 +++++- cli/src/args/compress.rs | 14 ++++++++--- cli/src/args/compress/resource.rs | 41 +++++++++++++++++++++++-------- cli/src/compress.rs | 5 ++-- 4 files changed, 52 insertions(+), 16 deletions(-) diff --git a/cli/src/args.rs b/cli/src/args.rs index d0ac4861b..c9bd9e26c 100644 --- a/cli/src/args.rs +++ b/cli/src/args.rs @@ -165,12 +165,18 @@ pub mod resource { type ArgvParseError; fn parse_argv(argv: &mut VecDeque) -> Result; + #[cfg(test)] fn parse_argv_from( - argv: impl IntoIterator, + argv: impl IntoIterator>, ) -> Result { let mut argv: VecDeque = argv.into_iter().map(|s| s.into()).collect(); Self::parse_argv(&mut argv) } + + #[cfg(test)] + fn parse_argv_from_empty() -> Result { + Self::parse_argv_from(Vec::::new()) + } } pub trait PositionalArgvResource: ArgvResource {} diff --git a/cli/src/args/compress.rs b/cli/src/args/compress.rs index 7e6e3cf15..4603853f0 100644 --- a/cli/src/args/compress.rs +++ b/cli/src/args/compress.rs @@ -69,7 +69,7 @@ impl Default for GlobalFlags { } } -#[derive(Debug, Clone)] +#[derive(Debug, Clone, PartialEq, Eq, Hash)] pub enum EntrySpec { Dir { name: String, @@ -90,7 +90,7 @@ pub enum EntrySpec { }, } -#[derive(Debug, Clone)] +#[derive(Debug, Clone, PartialEq, Eq, Hash)] pub enum ModificationOperation { CreateEntry { options: SimpleFileOptions, @@ -98,11 +98,19 @@ pub enum ModificationOperation { }, } -#[derive(Debug, Default, Clone)] +#[derive(Debug, Clone, PartialEq, Eq, Hash)] pub struct ModificationSequence { pub operations: Vec, } +impl Default for ModificationSequence { + fn default() -> Self { + Self { + operations: Vec::new(), + } + } +} + #[derive(Debug)] pub struct Compress { pub output: OutputType, diff --git a/cli/src/args/compress/resource.rs b/cli/src/args/compress/resource.rs index 49f4f701b..8c9ab97ac 100644 --- a/cli/src/args/compress/resource.rs +++ b/cli/src/args/compress/resource.rs @@ -236,7 +236,7 @@ pub mod argv { impl error::Error for ModificationSequenceError {} - struct CompressionArgs { + pub struct CompressionArgs { pub args: Vec, pub positional_paths: Vec, } @@ -538,11 +538,10 @@ pub mod argv { let last_name = last_name.take(); let symlink_flag = mem::replace(&mut symlink_flag, false); - let name = last_name.unwrap_or_else(|| path_to_string(&path).into()); operations.push(ModificationOperation::CreateEntry { options, spec: EntrySpec::File { - name: Some(name), + name: last_name, path, symlink_flag, }, @@ -598,6 +597,7 @@ pub mod argv { } } } + use compression_args::{CompressionArgs, ModificationSequenceError}; #[cfg(test)] mod test { @@ -607,12 +607,12 @@ pub mod argv { fn parse_output_type() { assert_eq!( OutputType::default(), - OutputType::parse_argv_from([]).unwrap() + OutputType::parse_argv_from_empty().unwrap() ); assert_eq!( OutputType::Stdout { allow_tty: true }, - OutputType::parse_argv_from(["--stdout".into()]).unwrap() + OutputType::parse_argv_from(["--stdout"]).unwrap() ); assert_eq!( @@ -620,15 +620,14 @@ pub mod argv { path: "asdf".into(), append: false }, - OutputType::parse_argv_from(["-o".into(), "asdf".into()]).unwrap() + OutputType::parse_argv_from(["-o", "asdf"]).unwrap() ); assert_eq!( OutputType::File { path: "asdf".into(), append: true }, - OutputType::parse_argv_from(["--append".into(), "-o".into(), "asdf".into()]) - .unwrap() + OutputType::parse_argv_from(["--append", "-o", "asdf"]).unwrap() ); } @@ -636,14 +635,36 @@ pub mod argv { fn parse_global_flags() { assert_eq!( GlobalFlags::default(), - GlobalFlags::parse_argv_from([]).unwrap(), + GlobalFlags::parse_argv_from_empty().unwrap(), ); assert_eq!( GlobalFlags { archive_comment: Some("asdf".into()) }, - GlobalFlags::parse_argv_from(["--archive-comment".into(), "asdf".into()]).unwrap() + GlobalFlags::parse_argv_from(["--archive-comment", "asdf"]).unwrap() + ); + } + + #[test] + fn parse_mod_seq() { + assert_eq!( + ModificationSequence::default(), + ModificationSequence::parse_argv_from_empty().unwrap(), + ); + + assert_eq!( + ModificationSequence { + operations: vec![ModificationOperation::CreateEntry { + options: SimpleFileOptions::default(), + spec: EntrySpec::File { + name: None, + path: "file.txt".into(), + symlink_flag: false + }, + }], + }, + ModificationSequence::parse_argv_from(["-f", "file.txt"]).unwrap(), ); } } diff --git a/cli/src/compress.rs b/cli/src/compress.rs index 2becb632e..2ea47ce7c 100644 --- a/cli/src/compress.rs +++ b/cli/src/compress.rs @@ -372,8 +372,9 @@ pub fn execute_compress(mut err: impl Write, args: Compress) -> Result<(), Comma writer.set_raw_comment(comment.into()); } - let mod_seq = ModificationSequence::from_args(args, positional_paths, &mut err)?; - mod_seq.invoke(&mut writer, &mut err)?; + todo!(); + /* let mod_seq = ModificationSequence::from_args(args, positional_paths, &mut err)?; */ + /* mod_seq.invoke(&mut writer, &mut err)?; */ let handle = writer .finish() From b70a03d239f442eeed73f0a2649524588ae5c676 Mon Sep 17 00:00:00 2001 From: Danny McClanahan <1305167+cosmicexplorer@users.noreply.github.com> Date: Sun, 1 Dec 2024 00:25:37 -0500 Subject: [PATCH 25/31] the Compress command is now a conjunction of Resources! --- cli/src/args/compress.rs | 260 +++------------------------------------ cli/src/compress.rs | 10 +- 2 files changed, 18 insertions(+), 252 deletions(-) diff --git a/cli/src/args/compress.rs b/cli/src/args/compress.rs index 4603853f0..2f508d5be 100644 --- a/cli/src/args/compress.rs +++ b/cli/src/args/compress.rs @@ -114,9 +114,8 @@ impl Default for ModificationSequence { #[derive(Debug)] pub struct Compress { pub output: OutputType, - pub archive_comment: Option, - pub args: Vec, - pub positional_paths: Vec, + pub global_flags: GlobalFlags, + pub mod_seq: ModificationSequence, } /* impl CommandInputs for Compress {} */ @@ -319,262 +318,31 @@ ENTRY-PATH = } fn parse_argv(mut argv: VecDeque) -> Result { - let mut allow_stdout: bool = false; - let mut append_to_output_path: bool = false; - let mut output_path: Option = None; - let mut archive_comment: Option = None; - let mut args: Vec = Vec::new(); - let mut positional_paths: Vec = Vec::new(); - - while let Some(arg) = argv.pop_front() { + if let Some(arg) = argv.pop_front() { match arg.as_encoded_bytes() { b"-h" | b"--help" => { let help_text = Self::generate_full_help_text(); return Err(ArgParseError::StdoutMessage(help_text)); } - - /* Output flags */ - b"--stdout" => { - if let Some(output_path) = output_path.take() { - return Err(Self::exit_arg_invalid(&format!( - "--stdout provided along with output file {output_path:?}" - ))); - } else if append_to_output_path { - return Err(Self::exit_arg_invalid( - "--stdout provided along with --append", - )); - } else if !args.is_empty() || !positional_paths.is_empty() { - return Err(Self::exit_arg_invalid("--stdout provided after entries")); - } else if allow_stdout { - return Err(Self::exit_arg_invalid("--stdout provided twice")); - } else { - allow_stdout = true; - } - } - b"--append" => { - if append_to_output_path { - return Err(Self::exit_arg_invalid("--append provided twice")); - } else if !args.is_empty() || !positional_paths.is_empty() { - return Err(Self::exit_arg_invalid("--append provided after entries")); - } else if allow_stdout { - return Err(Self::exit_arg_invalid( - "--stdout provided along with --append", - )); - } else { - append_to_output_path = true; - } - } - b"-o" | b"--output-file" => { - let new_path = argv.pop_front().map(PathBuf::from).ok_or_else(|| { - Self::exit_arg_invalid("no argument provided for -o/--output-file") - })?; - if let Some(prev_path) = output_path.take() { - return Err(Self::exit_arg_invalid(&format!( - "--output-file provided twice: {prev_path:?} and {new_path:?}" - ))); - } else if allow_stdout { - return Err(Self::exit_arg_invalid( - "--stdout provided along with output file", - )); - } else if !args.is_empty() || !positional_paths.is_empty() { - return Err(Self::exit_arg_invalid( - "-o/--output-file provided after entries", - )); - } else { - output_path = Some(new_path); - } - } - - /* Global flags */ - b"--archive-comment" => { - let new_comment = argv.pop_front().ok_or_else(|| { - Self::exit_arg_invalid("no argument provided for --archive-comment") - })?; - if let Some(prev_comment) = archive_comment.take() { - return Err(Self::exit_arg_invalid(&format!( - "--archive-comment provided twice: {prev_comment:?} and {new_comment:?}" - ))); - } else if !args.is_empty() || !positional_paths.is_empty() { - return Err(Self::exit_arg_invalid( - "--archive-comment provided after entries", - )); - } else { - archive_comment = Some(new_comment); - } - } - - /* Attributes */ - b"-c" | b"--compression-method" => match argv.pop_front() { - None => { - return Err(Self::exit_arg_invalid( - "no argument provided for -c/--compression-method", - )) - } - Some(name) => match name.as_encoded_bytes() { - b"stored" => args.push(CompressionArg::CompressionMethod( - CompressionMethodArg::Stored, - )), - b"deflate" => args.push(CompressionArg::CompressionMethod( - CompressionMethodArg::Deflate, - )), - #[cfg(feature = "deflate64")] - b"deflate64" => args.push(CompressionArg::CompressionMethod( - CompressionMethodArg::Deflate64, - )), - #[cfg(feature = "bzip2")] - b"bzip2" => args.push(CompressionArg::CompressionMethod( - CompressionMethodArg::Bzip2, - )), - #[cfg(feature = "zstd")] - b"zstd" => args.push(CompressionArg::CompressionMethod( - CompressionMethodArg::Zstd, - )), - _ => { - return Err(Self::exit_arg_invalid( - "unrecognized compression method {name:?}", - )); - } - }, - }, - b"-l" | b"--compression-level" => match argv.pop_front() { - None => { - return Err(Self::exit_arg_invalid( - "no argument provided for -l/--compression-level", - )); - } - Some(level) => match level.into_string() { - Err(level) => { - return Err(Self::exit_arg_invalid(&format!( - "invalid unicode provided for compression level: {level:?}" - ))); - } - Ok(level) => match level.parse::() { - Err(e) => { - return Err(Self::exit_arg_invalid(&format!( - "failed to parse integer for compression level: {e}" - ))); - } - Ok(level) => { - if (0..=24).contains(&level) { - args.push(CompressionArg::Level(CompressionLevel(level))) - } else { - return Err(Self::exit_arg_invalid(&format!( - "compression level {level} was not between 0 and 24" - ))); - } - } - }, - }, - }, - b"-m" | b"--mode" => match argv.pop_front() { - None => { - return Err(Self::exit_arg_invalid("no argument provided for -m/--mode")); - } - Some(mode) => match mode.into_string() { - Err(mode) => { - return Err(Self::exit_arg_invalid(&format!( - "invalid unicode provided for mode: {mode:?}" - ))); - } - Ok(mode) => match UnixPermissions::parse(&mode) { - Err(e) => { - return Err(Self::exit_arg_invalid(&format!( - "failed to parse integer for mode: {e}" - ))); - } - Ok(mode) => args.push(CompressionArg::UnixPermissions(mode)), - }, - }, - }, - b"--large-file" => match argv.pop_front() { - None => { - return Err(Self::exit_arg_invalid( - "no argument provided for --large-file", - )); - } - Some(large_file) => match large_file.as_encoded_bytes() { - b"true" => args.push(CompressionArg::LargeFile(true)), - b"false" => args.push(CompressionArg::LargeFile(false)), - _ => { - return Err(Self::exit_arg_invalid(&format!( - "unrecognized value for --large-file: {large_file:?}" - ))); - } - }, - }, - - /* Data */ - b"-n" | b"--name" => match argv.pop_front() { - None => { - return Err(Self::exit_arg_invalid("no argument provided for -n/--name")) - } - Some(name) => match name.into_string() { - Err(name) => { - return Err(Self::exit_arg_invalid(&format!( - "invalid unicode provided for name: {name:?}" - ))); - } - Ok(name) => args.push(CompressionArg::Name(name)), - }, - }, - b"-s" | b"--symlink" => args.push(CompressionArg::Symlink), - b"-d" | b"--dir" => args.push(CompressionArg::Dir), - b"-i" | b"--immediate" => match argv.pop_front() { - None => { - return Err(Self::exit_arg_invalid( - "no argument provided for -i/--immediate", - )); - } - Some(data) => args.push(CompressionArg::Immediate(data)), - }, - b"-f" | b"--file" => match argv.pop_front() { - None => { - return Err(Self::exit_arg_invalid("no argument provided for -f/--file")); - } - Some(file) => args.push(CompressionArg::FilePath(file.into())), - }, - b"-r" | b"--recursive-dir" => match argv.pop_front() { - None => { - return Err(Self::exit_arg_invalid( - "no argument provided for -r/--recursive-dir", - )); - } - Some(dir) => args.push(CompressionArg::RecursiveDirPath(dir.into())), - }, - - /* Transition to positional args */ - b"--" => break, - arg_bytes => { - if arg_bytes.starts_with(b"-") { - return Err(Self::exit_arg_invalid(&format!( - "unrecognized flag {arg:?}" - ))); - } else { - argv.push_front(arg); - break; - } + _ => { + argv.push_front(arg); } } } - positional_paths.extend(argv.into_iter().map(|arg| arg.into())); + use crate::args::resource::ArgvResource; - let output = if let Some(path) = output_path { - OutputType::File { - path, - append: append_to_output_path, - } - } else { - OutputType::Stdout { - allow_tty: allow_stdout, - } - }; + let output = OutputType::parse_argv(&mut argv) + .map_err(|e| Self::exit_arg_invalid(&format!("{e}")))?; + let global_flags = GlobalFlags::parse_argv(&mut argv) + .map_err(|e| Self::exit_arg_invalid(&format!("{e}")))?; + let mod_seq = ModificationSequence::parse_argv(&mut argv) + .map_err(|e| Self::exit_arg_invalid(&format!("{e:?}")))?; Ok(Self { output, - archive_comment, - args, - positional_paths, + global_flags, + mod_seq, }) } } diff --git a/cli/src/compress.rs b/cli/src/compress.rs index 2ea47ce7c..27705dd22 100644 --- a/cli/src/compress.rs +++ b/cli/src/compress.rs @@ -292,9 +292,8 @@ fn enter_recursive_dir_entries( pub fn execute_compress(mut err: impl Write, args: Compress) -> Result<(), CommandError> { let Compress { output, - archive_comment, - args, - positional_paths, + global_flags, + mod_seq, } = args; let (out, do_append) = match output { @@ -366,15 +365,14 @@ pub fn execute_compress(mut err: impl Write, args: Compress) -> Result<(), Comma ZipWriter::new(out) }; + let GlobalFlags { archive_comment } = global_flags; if let Some(comment) = archive_comment { writeln!(err, "comment was provided: {comment:?}").unwrap(); let comment = comment.into_encoded_bytes(); writer.set_raw_comment(comment.into()); } - todo!(); - /* let mod_seq = ModificationSequence::from_args(args, positional_paths, &mut err)?; */ - /* mod_seq.invoke(&mut writer, &mut err)?; */ + mod_seq.invoke(&mut writer, &mut err)?; let handle = writer .finish() From 3b8be6b5ab02e47d5d30a56cba7496c1b98e3760 Mon Sep 17 00:00:00 2001 From: Danny McClanahan <1305167+cosmicexplorer@users.noreply.github.com> Date: Sun, 1 Dec 2024 03:27:34 -0500 Subject: [PATCH 26/31] refactor argv parsing to be dyn friendly --- cli/src/args.rs | 33 ++++--- cli/src/args/compress.rs | 20 ++-- cli/src/args/compress/resource.rs | 150 ++++++++++++++++++++---------- 3 files changed, 139 insertions(+), 64 deletions(-) diff --git a/cli/src/args.rs b/cli/src/args.rs index c9bd9e26c..f6cd2534a 100644 --- a/cli/src/args.rs +++ b/cli/src/args.rs @@ -159,23 +159,30 @@ pub mod resource { pub trait Resource { const ID: &'static str; + type Value; + type Args; + fn declare(args: Self::Args) -> Self; } - pub trait ArgvResource: Resource + Sized { + pub trait ArgvResource: Resource { type ArgvParseError; - fn parse_argv(argv: &mut VecDeque) -> Result; + fn parse_argv( + &self, + argv: &mut VecDeque, + ) -> Result<::Value, Self::ArgvParseError>; #[cfg(test)] fn parse_argv_from( + &self, argv: impl IntoIterator>, - ) -> Result { + ) -> Result<::Value, Self::ArgvParseError> { let mut argv: VecDeque = argv.into_iter().map(|s| s.into()).collect(); - Self::parse_argv(&mut argv) + self.parse_argv(&mut argv) } #[cfg(test)] - fn parse_argv_from_empty() -> Result { - Self::parse_argv_from(Vec::::new()) + fn parse_argv_from_empty(&self) -> Result<::Value, Self::ArgvParseError> { + self.parse_argv_from(Vec::::new()) } } @@ -185,20 +192,24 @@ pub mod resource { type B: Backend; type SchemaParseError; fn parse_schema<'a>( + &self, v: ::Val<'a>, - ) -> Result; + ) -> Result<::Value, Self::SchemaParseError>; fn parse_schema_str<'a>( + &self, s: ::Str<'a>, - ) -> Result::Err<'a>, Self::SchemaParseError>> - { + ) -> Result< + ::Value, + WrapperError<::Err<'a>, Self::SchemaParseError>, + > { let v = ::parse(s).map_err(WrapperError::In)?; - Ok(Self::parse_schema(v).map_err(WrapperError::Out)?) + Ok(self.parse_schema(v).map_err(WrapperError::Out)?) } } pub trait CommandSpec { - /* fn resources() -> Vec<>; */ + /* fn resources() -> Vec; */ } } diff --git a/cli/src/args/compress.rs b/cli/src/args/compress.rs index 2f508d5be..b52571de5 100644 --- a/cli/src/args/compress.rs +++ b/cli/src/args/compress.rs @@ -118,8 +118,6 @@ pub struct Compress { pub mod_seq: ModificationSequence, } -/* impl CommandInputs for Compress {} */ - impl Compress { #[cfg(feature = "deflate64")] const DEFLATE64_HELP_LINE: &'static str = " - deflate64:\twith deflate64\n"; @@ -330,13 +328,23 @@ ENTRY-PATH = } } - use crate::args::resource::ArgvResource; + use crate::args::{ + compress::resource::{GlobalFlagsResource, ModSeqResource, OutputFlagsResource}, + resource::{ArgvResource, Resource}, + }; + + let output = OutputFlagsResource::declare(()); + let global_flags = GlobalFlagsResource::declare(()); + let mod_seq = ModSeqResource::declare(()); - let output = OutputType::parse_argv(&mut argv) + let output = output + .parse_argv(&mut argv) .map_err(|e| Self::exit_arg_invalid(&format!("{e}")))?; - let global_flags = GlobalFlags::parse_argv(&mut argv) + let global_flags = global_flags + .parse_argv(&mut argv) .map_err(|e| Self::exit_arg_invalid(&format!("{e}")))?; - let mod_seq = ModificationSequence::parse_argv(&mut argv) + let mod_seq = mod_seq + .parse_argv(&mut argv) .map_err(|e| Self::exit_arg_invalid(&format!("{e:?}")))?; Ok(Self { diff --git a/cli/src/args/compress/resource.rs b/cli/src/args/compress/resource.rs index 8c9ab97ac..2e017d7cb 100644 --- a/cli/src/args/compress/resource.rs +++ b/cli/src/args/compress/resource.rs @@ -1,16 +1,37 @@ use super::*; use crate::args::resource::*; -impl Resource for OutputType { +pub struct OutputFlagsResource; + +impl Resource for OutputFlagsResource { const ID: &'static str = "OUTPUT-FLAGS"; + type Value = OutputType; + type Args = (); + fn declare(args: Self::Args) -> Self { + Self + } } -impl Resource for GlobalFlags { +pub struct GlobalFlagsResource; + +impl Resource for GlobalFlagsResource { const ID: &'static str = "GLOBAL-FLAGS"; + type Value = GlobalFlags; + type Args = (); + fn declare(args: Self::Args) -> Self { + Self + } } -impl Resource for ModificationSequence { +pub struct ModSeqResource; + +impl Resource for ModSeqResource { const ID: &'static str = "MOD-SEQ"; + type Value = ModificationSequence; + type Args = (); + fn declare(args: Self::Args) -> Self { + Self + } } pub mod argv { @@ -54,9 +75,12 @@ pub mod argv { impl error::Error for OutputTypeError {} - impl ArgvResource for OutputType { + impl ArgvResource for OutputFlagsResource { type ArgvParseError = OutputTypeError; - fn parse_argv(argv: &mut VecDeque) -> Result { + fn parse_argv( + &self, + argv: &mut VecDeque, + ) -> Result { let mut allow_stdout: bool = false; let mut append_to_output_path: bool = false; let mut output_path: Option = None; @@ -121,12 +145,12 @@ pub mod argv { } Ok(if let Some(output_path) = output_path { - Self::File { + OutputType::File { path: output_path, append: append_to_output_path, } } else { - Self::Stdout { + OutputType::Stdout { allow_tty: allow_stdout, } }) @@ -161,9 +185,12 @@ pub mod argv { impl error::Error for GlobalFlagsError {} - impl ArgvResource for GlobalFlags { + impl ArgvResource for GlobalFlagsResource { type ArgvParseError = GlobalFlagsError; - fn parse_argv(argv: &mut VecDeque) -> Result { + fn parse_argv( + &self, + argv: &mut VecDeque, + ) -> Result { let mut archive_comment: Option = None; while let Some(arg) = argv.pop_front() { @@ -188,7 +215,7 @@ pub mod argv { } } - Ok(Self { archive_comment }) + Ok(GlobalFlags { archive_comment }) } } @@ -588,14 +615,19 @@ pub mod argv { } } - impl ArgvResource for ModificationSequence { + impl ArgvResource for ModSeqResource { type ArgvParseError = WrapperError; - fn parse_argv(argv: &mut VecDeque) -> Result { + fn parse_argv( + &self, + argv: &mut VecDeque, + ) -> Result { let compression_args = CompressionArgs::parse_argv(argv).map_err(WrapperError::In)?; compression_args.build_mod_seq().map_err(WrapperError::Out) } } + + impl PositionalArgvResource for ModSeqResource {} } use compression_args::{CompressionArgs, ModificationSequenceError}; @@ -605,14 +637,16 @@ pub mod argv { #[test] fn parse_output_type() { + let output = OutputFlagsResource::declare(()); + assert_eq!( OutputType::default(), - OutputType::parse_argv_from_empty().unwrap() + output.parse_argv_from_empty().unwrap() ); assert_eq!( OutputType::Stdout { allow_tty: true }, - OutputType::parse_argv_from(["--stdout"]).unwrap() + output.parse_argv_from(["--stdout"]).unwrap() ); assert_eq!( @@ -620,37 +654,43 @@ pub mod argv { path: "asdf".into(), append: false }, - OutputType::parse_argv_from(["-o", "asdf"]).unwrap() + output.parse_argv_from(["-o", "asdf"]).unwrap() ); assert_eq!( OutputType::File { path: "asdf".into(), append: true }, - OutputType::parse_argv_from(["--append", "-o", "asdf"]).unwrap() + output.parse_argv_from(["--append", "-o", "asdf"]).unwrap() ); } #[test] fn parse_global_flags() { + let global_flags = GlobalFlagsResource::declare(()); + assert_eq!( GlobalFlags::default(), - GlobalFlags::parse_argv_from_empty().unwrap(), + global_flags.parse_argv_from_empty().unwrap(), ); assert_eq!( GlobalFlags { archive_comment: Some("asdf".into()) }, - GlobalFlags::parse_argv_from(["--archive-comment", "asdf"]).unwrap() + global_flags + .parse_argv_from(["--archive-comment", "asdf"]) + .unwrap() ); } #[test] fn parse_mod_seq() { + let mod_seq = ModSeqResource::declare(()); + assert_eq!( ModificationSequence::default(), - ModificationSequence::parse_argv_from_empty().unwrap(), + mod_seq.parse_argv_from_empty().unwrap(), ); assert_eq!( @@ -664,7 +704,7 @@ pub mod argv { }, }], }, - ModificationSequence::parse_argv_from(["-f", "file.txt"]).unwrap(), + mod_seq.parse_argv_from(["-f", "file.txt"]).unwrap(), ); } } @@ -672,7 +712,10 @@ pub mod argv { #[cfg(feature = "json")] pub mod json_resource { - use super::{GlobalFlags, OutputType}; + use super::{ + GlobalFlags, GlobalFlagsResource, ModSeqResource, ModificationSequence, + OutputFlagsResource, OutputType, Resource, + }; use crate::{ args::resource::SchemaResource, schema::backends::{json_backend::JsonBackend, Backend}, @@ -730,33 +773,34 @@ pub mod json_resource { impl error::Error for JsonSchemaError {} - impl SchemaResource for OutputType { + impl SchemaResource for OutputFlagsResource { type B = JsonBackend; type SchemaParseError = JsonSchemaError; fn parse_schema<'a>( + &self, v: ::Val<'a>, - ) -> Result { + ) -> Result { match v { - JsonValue::Null => Ok(Self::default()), + JsonValue::Null => Ok(OutputType::default()), /* => {"file": {"path": , "append": false}}} */ - JsonValue::Short(path) => Ok(Self::File { + JsonValue::Short(path) => Ok(OutputType::File { path: path.as_str().into(), append: false, }), - JsonValue::String(path) => Ok(Self::File { + JsonValue::String(path) => Ok(OutputType::File { path: path.into(), append: false, }), /* => {"stdout": {"allow_tty": }} */ - JsonValue::Boolean(allow_tty) => Ok(Self::Stdout { allow_tty }), + JsonValue::Boolean(allow_tty) => Ok(OutputType::Stdout { allow_tty }), /* An object--destructure by enum case. */ JsonValue::Object(o) => { if let Some(o) = o.get("stdout") { match o { - JsonValue::Null => Ok(Self::Stdout { allow_tty: false }), + JsonValue::Null => Ok(OutputType::Stdout { allow_tty: false }), /* {"stdout": } => {"stdout": {"allow_tty": }} */ - JsonValue::Boolean(allow_tty) => Ok(Self::Stdout { + JsonValue::Boolean(allow_tty) => Ok(OutputType::Stdout { allow_tty: *allow_tty, }), /* {"stdout": {"allow_tty": }} => {"stdout": {"allow_tty": }} */ @@ -774,7 +818,7 @@ pub mod json_resource { } else { Ok(false) }?; - Ok(Self::Stdout { allow_tty }) + Ok(OutputType::Stdout { allow_tty }) } _ => Err(JsonSchemaError::InvalidType { val: o.clone(), @@ -785,11 +829,11 @@ pub mod json_resource { } else if let Some(o) = o.get("file") { match o { /* {"file": } => {"file": {"path": , append: false}} */ - JsonValue::Short(path) => Ok(Self::File { + JsonValue::Short(path) => Ok(OutputType::File { path: path.as_str().into(), append: false, }), - JsonValue::String(path) => Ok(Self::File { + JsonValue::String(path) => Ok(OutputType::File { path: path.into(), append: false, }), @@ -827,7 +871,7 @@ pub mod json_resource { } else { Ok(false) }?; - Ok(Self::File { path, append }) + Ok(OutputType::File { path, append }) } _ => Err(JsonSchemaError::InvalidType { val: o.clone(), @@ -853,13 +897,14 @@ pub mod json_resource { } } - impl SchemaResource for GlobalFlags { + impl SchemaResource for GlobalFlagsResource { type B = JsonBackend; type SchemaParseError = JsonSchemaError; fn parse_schema<'a>( + &self, v: ::Val<'a>, - ) -> Result { + ) -> Result { match v { JsonValue::Object(o) => { let archive_comment: Option = if let Some(archive_comment) = @@ -880,9 +925,9 @@ pub mod json_resource { } else { Ok(None) }?; - Ok(Self { archive_comment }) + Ok(GlobalFlags { archive_comment }) } - JsonValue::Null => Ok(Self::default()), + JsonValue::Null => Ok(GlobalFlags::default()), _ => Err(JsonSchemaError::InvalidType { val: v.clone(), valid_types: &["object", "null"], @@ -903,17 +948,19 @@ pub mod json_resource { OutputType::default() ); + let output = OutputFlagsResource::declare(()); + assert_eq!( OutputType::Stdout { allow_tty: true }, - OutputType::parse_schema_str("true").unwrap(), + output.parse_schema_str("true").unwrap(), ); assert_eq!( OutputType::Stdout { allow_tty: false }, - OutputType::parse_schema_str("false").unwrap(), + output.parse_schema_str("false").unwrap(), ); assert_eq!( OutputType::default(), - OutputType::parse_schema_str("null").unwrap(), + output.parse_schema_str("null").unwrap(), ); assert_eq!( @@ -921,7 +968,7 @@ pub mod json_resource { path: "asdf".into(), append: false }, - OutputType::parse_schema_str("\"asdf\"").unwrap(), + output.parse_schema_str("\"asdf\"").unwrap(), ); assert_eq!( @@ -929,14 +976,15 @@ pub mod json_resource { path: "asdf".into(), append: false }, - OutputType::parse_schema_str("{\"file\": \"asdf\"}").unwrap(), + output.parse_schema_str("{\"file\": \"asdf\"}").unwrap(), ); assert_eq!( OutputType::File { path: "asdf".into(), append: true }, - OutputType::parse_schema_str("{\"file\": {\"path\": \"asdf\", \"append\": true}}") + output + .parse_schema_str("{\"file\": {\"path\": \"asdf\", \"append\": true}}") .unwrap(), ); assert_eq!( @@ -944,7 +992,8 @@ pub mod json_resource { path: "asdf".into(), append: false }, - OutputType::parse_schema_str("{\"file\": {\"path\": \"asdf\", \"append\": false}}") + output + .parse_schema_str("{\"file\": {\"path\": \"asdf\", \"append\": false}}") .unwrap(), ); } @@ -957,22 +1006,29 @@ pub mod json_resource { }, GlobalFlags::default(), ); + + let global_flags = GlobalFlagsResource::declare(()); + assert_eq!( GlobalFlags::default(), - GlobalFlags::parse_schema_str("null").unwrap(), + global_flags.parse_schema_str("null").unwrap(), ); assert_eq!( GlobalFlags { archive_comment: Some("aaaaasdf".into()), }, - GlobalFlags::parse_schema_str("{\"archive-comment\": \"aaaaasdf\"}").unwrap(), + global_flags + .parse_schema_str("{\"archive-comment\": \"aaaaasdf\"}") + .unwrap(), ); assert_eq!( GlobalFlags { archive_comment: None, }, - GlobalFlags::parse_schema_str("{\"archive-comment\": null}").unwrap(), + global_flags + .parse_schema_str("{\"archive-comment\": null}") + .unwrap(), ); } } From 0cab7e0cb97fd0a6636392db8b21f1137afcaa08 Mon Sep 17 00:00:00 2001 From: Danny McClanahan <1305167+cosmicexplorer@users.noreply.github.com> Date: Sun, 1 Dec 2024 04:33:46 -0500 Subject: [PATCH 27/31] rearrange some stuff to prepare for dyn magic --- cli/src/args.rs | 97 +++++++++++++++++++++++++++---- cli/src/args/compress/resource.rs | 6 ++ 2 files changed, 92 insertions(+), 11 deletions(-) diff --git a/cli/src/args.rs b/cli/src/args.rs index f6cd2534a..c4199f2b1 100644 --- a/cli/src/args.rs +++ b/cli/src/args.rs @@ -157,11 +157,21 @@ pub mod resource { use crate::schema::{backends::Backend, transformers::WrapperError}; + use std::error; + + pub trait ResourceValue {} + pub trait Resource { const ID: &'static str; - type Value; - type Args; - fn declare(args: Self::Args) -> Self; + type Value: ResourceValue + where + Self: Sized; + type Args + where + Self: Sized; + fn declare(args: Self::Args) -> Self + where + Self: Sized; } pub trait ArgvResource: Resource { @@ -169,32 +179,93 @@ pub mod resource { fn parse_argv( &self, argv: &mut VecDeque, - ) -> Result<::Value, Self::ArgvParseError>; + ) -> Result<::Value, Self::ArgvParseError> + where + ::Value: Sized, + Self: Sized; + + fn parse_argv_dyn( + &self, + argv: &mut VecDeque, + ) -> Result, Box> + where + Self::ArgvParseError: error::Error, + Self: Sized, + { + self.parse_argv(argv) + .map(|val| { + let val: Box = Box::new(val); + val + }) + .map_err(|e| { + let e: Box = Box::new(e); + e + }) + } #[cfg(test)] fn parse_argv_from( &self, argv: impl IntoIterator>, - ) -> Result<::Value, Self::ArgvParseError> { + ) -> Result<::Value, Self::ArgvParseError> + where + ::Value: Sized, + Self: Sized, + { let mut argv: VecDeque = argv.into_iter().map(|s| s.into()).collect(); self.parse_argv(&mut argv) } #[cfg(test)] - fn parse_argv_from_empty(&self) -> Result<::Value, Self::ArgvParseError> { + fn parse_argv_from_empty(&self) -> Result<::Value, Self::ArgvParseError> + where + ::Value: Sized, + Self: Sized, + { self.parse_argv_from(Vec::::new()) } } - pub trait PositionalArgvResource: ArgvResource {} + pub trait PositionalArgvResource: ArgvResource { + /* fn parse_argv_ensure_complete( */ + /* &self, */ + /* mut argv: VecDeque, */ + /* ) -> Result<::Value, Self::ArgvParseError> */ + /* where */ + /* ::Value: Sized, */ + /* Self: Sized, */ + /* { */ + /* let ret = self.parse_argv(&mut argv)?; */ + /* assert!(argv.is_empty(), "argv should have been drained: {argv:?}"); */ + /* Ok(ret) */ + /* } */ + + /* fn parse_argv_ensure_complete_dyn( */ + /* &self, */ + /* mut argv: VecDeque, */ + /* ) -> Result, Box> { */ + /* self.parse_argv_ensure_complete(argv) */ + /* .map(|val| { */ + /* let val: Box = Box::new(val); */ + /* val */ + /* }) */ + /* .map_err(|e| { */ + /* let e: Box = Box::new(e); */ + /* e */ + /* }) */ + /* } */ + } - pub trait SchemaResource: Resource + Sized { + pub trait SchemaResource: Resource { type B: Backend; type SchemaParseError; fn parse_schema<'a>( &self, v: ::Val<'a>, - ) -> Result<::Value, Self::SchemaParseError>; + ) -> Result<::Value, Self::SchemaParseError> + where + ::Value: Sized, + Self: Sized; fn parse_schema_str<'a>( &self, @@ -202,14 +273,18 @@ pub mod resource { ) -> Result< ::Value, WrapperError<::Err<'a>, Self::SchemaParseError>, - > { + > + where + ::Value: Sized, + Self: Sized, + { let v = ::parse(s).map_err(WrapperError::In)?; Ok(self.parse_schema(v).map_err(WrapperError::Out)?) } } pub trait CommandSpec { - /* fn resources() -> Vec; */ + /* fn resources(&self) -> Vec>; */ } } diff --git a/cli/src/args/compress/resource.rs b/cli/src/args/compress/resource.rs index 2e017d7cb..daf5ad21e 100644 --- a/cli/src/args/compress/resource.rs +++ b/cli/src/args/compress/resource.rs @@ -3,6 +3,8 @@ use crate::args::resource::*; pub struct OutputFlagsResource; +impl ResourceValue for OutputType {} + impl Resource for OutputFlagsResource { const ID: &'static str = "OUTPUT-FLAGS"; type Value = OutputType; @@ -14,6 +16,8 @@ impl Resource for OutputFlagsResource { pub struct GlobalFlagsResource; +impl ResourceValue for GlobalFlags {} + impl Resource for GlobalFlagsResource { const ID: &'static str = "GLOBAL-FLAGS"; type Value = GlobalFlags; @@ -25,6 +29,8 @@ impl Resource for GlobalFlagsResource { pub struct ModSeqResource; +impl ResourceValue for ModificationSequence {} + impl Resource for ModSeqResource { const ID: &'static str = "MOD-SEQ"; type Value = ModificationSequence; From f20f0c3ae30ad507d21a43f55e5bb1f06060cb2c Mon Sep 17 00:00:00 2001 From: Danny McClanahan <1305167+cosmicexplorer@users.noreply.github.com> Date: Sun, 1 Dec 2024 06:17:16 -0500 Subject: [PATCH 28/31] sketch out the command builders --- cli/src/args.rs | 101 +++++++++++++++++++++++------- cli/src/args/compress/resource.rs | 6 +- 2 files changed, 81 insertions(+), 26 deletions(-) diff --git a/cli/src/args.rs b/cli/src/args.rs index c4199f2b1..e4190db53 100644 --- a/cli/src/args.rs +++ b/cli/src/args.rs @@ -162,7 +162,7 @@ pub mod resource { pub trait ResourceValue {} pub trait Resource { - const ID: &'static str; + /* const ID: &'static str; */ type Value: ResourceValue where Self: Sized; @@ -175,7 +175,9 @@ pub mod resource { } pub trait ArgvResource: Resource { - type ArgvParseError; + type ArgvParseError + where + Self: Sized; fn parse_argv( &self, argv: &mut VecDeque, @@ -184,25 +186,6 @@ pub mod resource { ::Value: Sized, Self: Sized; - fn parse_argv_dyn( - &self, - argv: &mut VecDeque, - ) -> Result, Box> - where - Self::ArgvParseError: error::Error, - Self: Sized, - { - self.parse_argv(argv) - .map(|val| { - let val: Box = Box::new(val); - val - }) - .map_err(|e| { - let e: Box = Box::new(e); - e - }) - } - #[cfg(test)] fn parse_argv_from( &self, @@ -226,6 +209,40 @@ pub mod resource { } } + pub struct DynResourceWrapper { + resource: R, + } + + pub trait ArgvDynResource { + fn parse_argv_dyn( + &self, + argv: &mut VecDeque, + ) -> Result, Box>; + } + + impl ArgvDynResource for DynResourceWrapper + where + R: ArgvResource, + ::ArgvParseError: error::Error, + { + fn parse_argv_dyn( + &self, + argv: &mut VecDeque, + ) -> Result, Box> { + let Self { resource } = self; + resource + .parse_argv(argv) + .map(|val| { + let val: Box = Box::new(val); + val + }) + .map_err(|e| { + let e: Box = Box::new(e); + e + }) + } + } + pub trait PositionalArgvResource: ArgvResource { /* fn parse_argv_ensure_complete( */ /* &self, */ @@ -283,8 +300,46 @@ pub mod resource { } } - pub trait CommandSpec { - /* fn resources(&self) -> Vec>; */ + pub trait SchemaDynResource { + type B: Backend; + fn parse_schema_dyn_str<'a>( + &'a self, + s: ::Str<'a>, + ) -> Result, Box>; + } + + impl SchemaDynResource for DynResourceWrapper + where + R: SchemaResource, + ::SchemaParseError: error::Error, + for<'a> <::B as Backend>::Err<'a>: error::Error, + { + type B = ::B; + fn parse_schema_dyn_str<'a>( + &'a self, + s: ::Str<'a>, + ) -> Result, Box> { + let Self { resource } = self; + resource + .parse_schema_str(s) + .map(|val| { + let val: Box = Box::new(val); + val + }) + .map_err(|e| { + let e: Box = Box::new(e); + e + }) + } + } + + pub trait CliCommandSpec { + fn resources(&self) -> Vec>; + } + + pub trait SchemaCommandSpec { + type B: Backend; + fn resources(&self) -> Vec>>; } } diff --git a/cli/src/args/compress/resource.rs b/cli/src/args/compress/resource.rs index daf5ad21e..a7a260c12 100644 --- a/cli/src/args/compress/resource.rs +++ b/cli/src/args/compress/resource.rs @@ -6,7 +6,7 @@ pub struct OutputFlagsResource; impl ResourceValue for OutputType {} impl Resource for OutputFlagsResource { - const ID: &'static str = "OUTPUT-FLAGS"; + /* const ID: &'static str = "OUTPUT-FLAGS"; */ type Value = OutputType; type Args = (); fn declare(args: Self::Args) -> Self { @@ -19,7 +19,7 @@ pub struct GlobalFlagsResource; impl ResourceValue for GlobalFlags {} impl Resource for GlobalFlagsResource { - const ID: &'static str = "GLOBAL-FLAGS"; + /* const ID: &'static str = "GLOBAL-FLAGS"; */ type Value = GlobalFlags; type Args = (); fn declare(args: Self::Args) -> Self { @@ -32,7 +32,7 @@ pub struct ModSeqResource; impl ResourceValue for ModificationSequence {} impl Resource for ModSeqResource { - const ID: &'static str = "MOD-SEQ"; + /* const ID: &'static str = "MOD-SEQ"; */ type Value = ModificationSequence; type Args = (); fn declare(args: Self::Args) -> Self { From 8ca7aa07a3d04bc15f1cb36f9d7bcbf60203577f Mon Sep 17 00:00:00 2001 From: Danny McClanahan <1305167+cosmicexplorer@users.noreply.github.com> Date: Sun, 1 Dec 2024 07:18:04 -0500 Subject: [PATCH 29/31] example composed command for compress --- cli/src/args.rs | 74 +++++++++++++++++++++------------------- cli/src/args/compress.rs | 46 +++++++++++++------------ 2 files changed, 62 insertions(+), 58 deletions(-) diff --git a/cli/src/args.rs b/cli/src/args.rs index e4190db53..412266146 100644 --- a/cli/src/args.rs +++ b/cli/src/args.rs @@ -157,9 +157,9 @@ pub mod resource { use crate::schema::{backends::Backend, transformers::WrapperError}; - use std::error; + use std::{any, error}; - pub trait ResourceValue {} + pub trait ResourceValue: any::Any {} pub trait Resource { /* const ID: &'static str; */ @@ -243,35 +243,7 @@ pub mod resource { } } - pub trait PositionalArgvResource: ArgvResource { - /* fn parse_argv_ensure_complete( */ - /* &self, */ - /* mut argv: VecDeque, */ - /* ) -> Result<::Value, Self::ArgvParseError> */ - /* where */ - /* ::Value: Sized, */ - /* Self: Sized, */ - /* { */ - /* let ret = self.parse_argv(&mut argv)?; */ - /* assert!(argv.is_empty(), "argv should have been drained: {argv:?}"); */ - /* Ok(ret) */ - /* } */ - - /* fn parse_argv_ensure_complete_dyn( */ - /* &self, */ - /* mut argv: VecDeque, */ - /* ) -> Result, Box> { */ - /* self.parse_argv_ensure_complete(argv) */ - /* .map(|val| { */ - /* let val: Box = Box::new(val); */ - /* val */ - /* }) */ - /* .map_err(|e| { */ - /* let e: Box = Box::new(e); */ - /* e */ - /* }) */ - /* } */ - } + pub trait PositionalArgvResource: ArgvResource {} pub trait SchemaResource: Resource { type B: Backend; @@ -333,13 +305,12 @@ pub mod resource { } } - pub trait CliCommandSpec { - fn resources(&self) -> Vec>; + pub struct CliCommandSpec { + resources: Vec>, } - pub trait SchemaCommandSpec { - type B: Backend; - fn resources(&self) -> Vec>>; + pub struct SchemaCommandSpec { + resources: Vec>>, } } @@ -395,6 +366,37 @@ error: {context} Self: Sized; } +pub trait ComposedCommand: CommandFormat { + type ResourceArgs; + fn get_resource_args() -> Self::ResourceArgs; + fn from_resource_args( + args: Self::ResourceArgs, + argv: VecDeque, + ) -> Result + where + Self: Sized; + + fn parse_composed_argv(mut argv: VecDeque) -> Result + where + Self: Sized, + { + if let Some(arg) = argv.pop_front() { + match arg.as_encoded_bytes() { + b"-h" | b"--help" => { + let help_text = Self::generate_full_help_text(); + return Err(ArgParseError::StdoutMessage(help_text)); + } + _ => { + argv.push_front(arg); + } + } + } + + let spec = Self::get_resource_args(); + Self::from_resource_args(spec, argv) + } +} + pub mod compress; pub mod extract; pub mod info; diff --git a/cli/src/args/compress.rs b/cli/src/args/compress.rs index b52571de5..2961ebb9b 100644 --- a/cli/src/args/compress.rs +++ b/cli/src/args/compress.rs @@ -1,10 +1,12 @@ -use super::{ArgParseError, CommandFormat}; +use super::{ArgParseError, CommandFormat, ComposedCommand}; use zip::{write::SimpleFileOptions, CompressionMethod}; use std::{collections::VecDeque, ffi::OsString, num::ParseIntError, path::PathBuf}; pub mod resource; +use super::resource::{ArgvResource, Resource}; +use resource::{GlobalFlagsResource, ModSeqResource, OutputFlagsResource}; #[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord)] pub enum CompressionMethodArg { @@ -315,28 +317,28 @@ ENTRY-PATH = ) } - fn parse_argv(mut argv: VecDeque) -> Result { - if let Some(arg) = argv.pop_front() { - match arg.as_encoded_bytes() { - b"-h" | b"--help" => { - let help_text = Self::generate_full_help_text(); - return Err(ArgParseError::StdoutMessage(help_text)); - } - _ => { - argv.push_front(arg); - } - } - } - - use crate::args::{ - compress::resource::{GlobalFlagsResource, ModSeqResource, OutputFlagsResource}, - resource::{ArgvResource, Resource}, - }; - - let output = OutputFlagsResource::declare(()); - let global_flags = GlobalFlagsResource::declare(()); - let mod_seq = ModSeqResource::declare(()); + fn parse_argv(mut argv: VecDeque) -> Result + where + Self: Sized, + { + ComposedCommand::parse_composed_argv(argv) + } +} +impl ComposedCommand for Compress { + type ResourceArgs = (OutputFlagsResource, GlobalFlagsResource, ModSeqResource); + fn get_resource_args() -> Self::ResourceArgs { + ( + OutputFlagsResource::declare(()), + GlobalFlagsResource::declare(()), + ModSeqResource::declare(()), + ) + } + fn from_resource_args( + args: Self::ResourceArgs, + mut argv: VecDeque, + ) -> Result { + let (output, global_flags, mod_seq) = args; let output = output .parse_argv(&mut argv) .map_err(|e| Self::exit_arg_invalid(&format!("{e}")))?; From 3d9e660852218fbf18f1fef08ef9fa5813992a0e Mon Sep 17 00:00:00 2001 From: Danny McClanahan <1305167+cosmicexplorer@users.noreply.github.com> Date: Sun, 1 Dec 2024 07:29:29 -0500 Subject: [PATCH 30/31] remove unused dyn traits --- cli/src/args.rs | 75 ------------------------------------------------- 1 file changed, 75 deletions(-) diff --git a/cli/src/args.rs b/cli/src/args.rs index 412266146..ecf876d00 100644 --- a/cli/src/args.rs +++ b/cli/src/args.rs @@ -209,40 +209,6 @@ pub mod resource { } } - pub struct DynResourceWrapper { - resource: R, - } - - pub trait ArgvDynResource { - fn parse_argv_dyn( - &self, - argv: &mut VecDeque, - ) -> Result, Box>; - } - - impl ArgvDynResource for DynResourceWrapper - where - R: ArgvResource, - ::ArgvParseError: error::Error, - { - fn parse_argv_dyn( - &self, - argv: &mut VecDeque, - ) -> Result, Box> { - let Self { resource } = self; - resource - .parse_argv(argv) - .map(|val| { - let val: Box = Box::new(val); - val - }) - .map_err(|e| { - let e: Box = Box::new(e); - e - }) - } - } - pub trait PositionalArgvResource: ArgvResource {} pub trait SchemaResource: Resource { @@ -271,47 +237,6 @@ pub mod resource { Ok(self.parse_schema(v).map_err(WrapperError::Out)?) } } - - pub trait SchemaDynResource { - type B: Backend; - fn parse_schema_dyn_str<'a>( - &'a self, - s: ::Str<'a>, - ) -> Result, Box>; - } - - impl SchemaDynResource for DynResourceWrapper - where - R: SchemaResource, - ::SchemaParseError: error::Error, - for<'a> <::B as Backend>::Err<'a>: error::Error, - { - type B = ::B; - fn parse_schema_dyn_str<'a>( - &'a self, - s: ::Str<'a>, - ) -> Result, Box> { - let Self { resource } = self; - resource - .parse_schema_str(s) - .map(|val| { - let val: Box = Box::new(val); - val - }) - .map_err(|e| { - let e: Box = Box::new(e); - e - }) - } - } - - pub struct CliCommandSpec { - resources: Vec>, - } - - pub struct SchemaCommandSpec { - resources: Vec>>, - } } pub trait CommandFormat: fmt::Debug { From b0cfa9dba1391e012f958b8e589a03d88c204432 Mon Sep 17 00:00:00 2001 From: Danny McClanahan <1305167+cosmicexplorer@users.noreply.github.com> Date: Sun, 1 Dec 2024 10:11:50 -0500 Subject: [PATCH 31/31] save progress --- cli/src/args.rs | 2 ++ cli/src/args/compress/resource.rs | 27 +++++++++++++++++++++++++++ cli/src/print.rs | 19 ++++++++++++++++++- 3 files changed, 47 insertions(+), 1 deletion(-) diff --git a/cli/src/args.rs b/cli/src/args.rs index ecf876d00..1fd5282d0 100644 --- a/cli/src/args.rs +++ b/cli/src/args.rs @@ -186,6 +186,8 @@ pub mod resource { ::Value: Sized, Self: Sized; + /* fn print_help(&self) -> String; */ + #[cfg(test)] fn parse_argv_from( &self, diff --git a/cli/src/args/compress/resource.rs b/cli/src/args/compress/resource.rs index a7a260c12..696f78196 100644 --- a/cli/src/args/compress/resource.rs +++ b/cli/src/args/compress/resource.rs @@ -82,6 +82,33 @@ pub mod argv { impl error::Error for OutputTypeError {} impl ArgvResource for OutputFlagsResource { + /* fn print_help(&self) -> String { */ +/* r#" */ +/* Output flags (OUTPUT-FLAGS): Where and how to write the generated zip archive. */ + +/* If not specified, output is written to stdout. */ + +/* OUTPUT-FLAGS = [--append] --output-file */ +/* = --stdout */ + +/* -o, --output-file */ +/* Output zip file path to write. */ + +/* The output file is truncated if it already exists, unless --append is */ +/* provided. */ + +/* --append */ +/* If an output path is provided with -o, open it as an existing zip */ +/* archive and append to it. */ + +/* If the output path does not already exist, no error is produced, and */ +/* a new zip file is created at the given path. */ + +/* --stdout */ +/* Allow writing output to stdout even if stdout is a tty. */ +/* "# */ +/* } */ + type ArgvParseError = OutputTypeError; fn parse_argv( &self, diff --git a/cli/src/print.rs b/cli/src/print.rs index 404dfe574..09b5263d3 100644 --- a/cli/src/print.rs +++ b/cli/src/print.rs @@ -40,7 +40,24 @@ pub mod printer { } } -pub trait HelpSection {} +pub trait HelpCase { + fn value(&self) -> &str; + fn description(&self) -> String; + fn extended_description_with_caveats_and_defaults(&self) -> Option; +} + +pub trait HelpSection { + fn name(&self) -> &str; + fn id(&self) -> &str; + fn description(&self) -> Option; + fn extended_description_with_caveats_and_defaults(&self) -> Option; + fn cases(&self) -> Vec>; + fn post(&self) -> Option; +} + +pub struct FlagsSection {} + +pub struct StringPattern {} pub enum HelpVerbosity { NameOnly,