From d2bd7f8b109deb8e2082179cc78976b136b440fc Mon Sep 17 00:00:00 2001 From: Tomasz Kramkowski Date: Tue, 1 Oct 2024 23:58:39 +0100 Subject: [PATCH] &OsStr support Implement `&OsStr as Argument` on top of `&[u8] as Argument`. The unsafe code to convert back from the sliced `&[u8]` keeps to the safety invariants defined for the `OsStr::from_encoded_bytes_unchecked` function. --- README.md | 6 ++-- bench/examples/vs.rs | 36 +++++++++++++++++++ bench/src/evolution.rs | 79 ++++++++++++++++++++++++++++++++++++++++++ examples/no_alloc.rs | 9 ++--- examples/os_str.rs | 22 ++++-------- src/lib.rs | 4 +-- src/tests.rs | 47 +++++++++++++++++++++++++ src/traits.rs | 63 +++++++++++++++++++++++++++++++++ 8 files changed, 240 insertions(+), 26 deletions(-) diff --git a/README.md b/README.md index 776a485..47b7c9f 100644 --- a/README.md +++ b/README.md @@ -28,12 +28,12 @@ you do with it. * Zero cost * Zero copy -* Zero unsafe code +* Zero unsafe code except for `&OsStr` * Zero dependencies * Zero allocation * Simple to use yet versatile -* `#![no_std]`-compatible -* Compatible with `&str` and `&[u8]` +* `#![no_std]`-compatible except for `&OsStr` +* Compatible with `&str`, `&[u8]` and `&OsStr` ## Performance diff --git a/bench/examples/vs.rs b/bench/examples/vs.rs index cb189fd..9caaeac 100644 --- a/bench/examples/vs.rs +++ b/bench/examples/vs.rs @@ -1,5 +1,6 @@ use bench::{ARGS, ARGS_BYTES}; use getargs::Argument; +use std::ffi::OsStr; use std::hint::black_box; use std::time::Instant; @@ -104,6 +105,34 @@ fn getargs5b<'arg, I: Iterator>(iter: I) -> Settings<&'arg [u settings } +#[inline(never)] +fn getargs5o<'arg, I: Iterator>(iter: I) -> Settings<&'arg OsStr> { + use getargs::{Opt, Options}; + + let mut settings = Settings::default(); + let mut opts = Options::new(iter); + + while let Some(opt) = opts.next_opt().unwrap() { + match opt { + Opt::Short('1') => settings.short_present1 = true, + Opt::Short('2') => settings.short_present2 = true, + Opt::Short('3') => settings.short_present3 = true, + Opt::Long("present1") => settings.long_present1 = true, + Opt::Long("present2") => settings.long_present2 = true, + Opt::Long("present3") => settings.long_present3 = true, + Opt::Short('4') => settings.short_value1 = Some(opts.value().unwrap()), + Opt::Short('5') => settings.short_value2 = Some(opts.value().unwrap()), + Opt::Short('6') => settings.short_value3 = Some(opts.value().unwrap()), + Opt::Long("val1") => settings.long_value1 = Some(opts.value().unwrap()), + Opt::Long("val2") => settings.long_value2 = Some(opts.value().unwrap()), + Opt::Long("val3") => settings.long_value3 = Some(opts.value().unwrap()), + _ => {} + } + } + + settings +} + fn main() { const ITERATIONS: usize = 10_000_000; @@ -127,7 +156,14 @@ fn main() { let d = Instant::now(); + for _ in 0..ITERATIONS { + black_box(getargs5o(ARGS.iter().copied().map(AsRef::as_ref))); + } + + let e = Instant::now(); + eprintln!("getargs4: {}ns", (b - a).as_nanos() / ITERATIONS as u128); eprintln!("getargs5: {}ns", (c - b).as_nanos() / ITERATIONS as u128); eprintln!("getargs5b: {}ns", (d - c).as_nanos() / ITERATIONS as u128); + eprintln!("getargs5o: {}ns", (e - d).as_nanos() / ITERATIONS as u128); } diff --git a/bench/src/evolution.rs b/bench/src/evolution.rs index 0425768..37e03c6 100644 --- a/bench/src/evolution.rs +++ b/bench/src/evolution.rs @@ -1,5 +1,7 @@ #![allow(non_snake_case)] +use std::ffi::OsStr; + use crate::{ARGS, ARGS_BYTES}; use getargs::Argument; use test::Bencher; @@ -161,6 +163,34 @@ fn getargsLb<'arg, I: Iterator>(iter: I) -> Settings<&'arg [u settings } +#[inline(always)] +fn getargsLo<'arg, I: Iterator>(iter: I) -> Settings<&'arg OsStr> { + use getargs::{Opt, Options}; + + let mut settings = Settings::default(); + let mut opts = Options::new(iter); + + while let Some(opt) = opts.next_opt().unwrap() { + match opt { + Opt::Short('1') => settings.short_present1 = true, + Opt::Short('2') => settings.short_present2 = true, + Opt::Short('3') => settings.short_present3 = true, + Opt::Long("present1") => settings.long_present1 = true, + Opt::Long("present2") => settings.long_present2 = true, + Opt::Long("present3") => settings.long_present3 = true, + Opt::Short('4') => settings.short_value1 = Some(opts.value().unwrap()), + Opt::Short('5') => settings.short_value2 = Some(opts.value().unwrap()), + Opt::Short('6') => settings.short_value3 = Some(opts.value().unwrap()), + Opt::Long("val1") => settings.long_value1 = Some(opts.value().unwrap()), + Opt::Long("val2") => settings.long_value2 = Some(opts.value().unwrap()), + Opt::Long("val3") => settings.long_value3 = Some(opts.value().unwrap()), + _ => {} + } + } + + settings +} + #[bench] #[inline(never)] fn getargs4_varied_small(bencher: &mut Bencher) { @@ -191,6 +221,13 @@ fn getargsLb_varied_small(bencher: &mut Bencher) { bencher.iter(|| getargsLb(ARGS_BYTES.iter().copied())); } +#[bench] +#[inline(never)] +fn getargsLo_varied_small(bencher: &mut Bencher) { + let args_os: Box<[&OsStr]> = ARGS.iter().copied().map(AsRef::as_ref).collect(); + bencher.iter(|| getargsLo(args_os.iter().copied())); +} + pub const ARGS_LONG: [&str; 1000] = ["--dsfigadsjfdgsfjkasbfjksdfabsdbfdaf"; 1000]; pub const ARGS_LONG_BYTES: [&[u8]; 1000] = [b"--dsfigadsjfdgsfjkasbfjksdfabsdbfdaf"; 1000]; @@ -224,6 +261,13 @@ fn getargsLb_long(bencher: &mut Bencher) { bencher.iter(|| getargsLb(ARGS_LONG_BYTES.iter().copied())); } +#[bench] +#[inline(never)] +fn getargsLo_long(bencher: &mut Bencher) { + let args_os: Vec<&OsStr> = ARGS_LONG.iter().copied().map(AsRef::as_ref).collect(); + bencher.iter(|| getargsLo(args_os.iter().copied())); +} + pub const ARGS_SHORT_CLUSTER: [&str; 1000] = ["-rjryets8kzrlxu7lzvnmsooiac8u9lxluphwrfudxaitfdomtce78grull9cpcvk7lyi07mdoclybtolssg7w7kwei79k"; 1000]; @@ -260,6 +304,13 @@ fn getargsLb_short_cluster(bencher: &mut Bencher) { bencher.iter(|| getargsLb(ARGS_SHORT_CLUSTER_BYTES.iter().copied())); } +#[bench] +#[inline(never)] +fn getargsLo_short_cluster(bencher: &mut Bencher) { + let args_os: Vec<&OsStr> = ARGS_SHORT_CLUSTER.iter().copied().map(AsRef::as_ref).collect(); + bencher.iter(|| getargsLo(args_os.iter().copied())); +} + pub const ARGS_SHORT_EVALUE: [&str; 1000] = ["-rjryets8kzrlxu7lzvnmso4oiac8u9lxluphwrfudxaitfdomtce78grull9cpcvk7lyi07mdoclybtolssg7w7kwei79k"; 1000]; @@ -296,6 +347,13 @@ fn getargsLb_short_evalue(bencher: &mut Bencher) { bencher.iter(|| getargsLb(ARGS_SHORT_EVALUE_BYTES.iter().copied())); } +#[bench] +#[inline(never)] +fn getargsLo_short_evalue(bencher: &mut Bencher) { + let args_os: Vec<&OsStr> = ARGS_SHORT_EVALUE.iter().copied().map(AsRef::as_ref).collect(); + bencher.iter(|| getargsLo(args_os.iter().copied())); +} + pub const ARGS_SHORT_IVALUE: [&str; 1000] = ["-rjryets8kzrlxu7lzvnmsooiac8u9lxluphwrfudxaitfdomtce78grull9cpcvk7lyi07mdoclybtolssg7w7kwei79k4"; 1000]; @@ -332,6 +390,13 @@ fn getargsLb_short_ivalue(bencher: &mut Bencher) { bencher.iter(|| getargsLb(ARGS_SHORT_IVALUE_BYTES.iter().copied())); } +#[bench] +#[inline(never)] +fn getargsLo_short_ivalue(bencher: &mut Bencher) { + let args_os: Vec<&OsStr> = ARGS_SHORT_IVALUE.iter().copied().map(AsRef::as_ref).collect(); + bencher.iter(|| getargsLo(args_os.iter().copied())); +} + pub const ARGS_LONG_EVALUE: [&str; 1000] = ["--val1=rjryets8kzrlxu7lzvnms4ooiac8u9lxluphwrfudxaitfdomtce78grull9cpcvk7lyi07mdoclybtolssg7w7kwei79k"; 1000]; @@ -368,6 +433,13 @@ fn getargsLb_long_evalue(bencher: &mut Bencher) { bencher.iter(|| getargsLb(ARGS_LONG_EVALUE_BYTES.iter().copied())); } +#[bench] +#[inline(never)] +fn getargsLo_long_evalue(bencher: &mut Bencher) { + let args_os: Vec<&OsStr> = ARGS_LONG_EVALUE.iter().copied().map(AsRef::as_ref).collect(); + bencher.iter(|| getargsLo(args_os.iter().copied())); +} + pub const ARGS_LONG_IVALUE: [&str; 1000] = ["--val1"; 1000]; pub const ARGS_LONG_IVALUE_BYTES: [&[u8]; 1000] = [b"--val1"; 1000]; @@ -400,3 +472,10 @@ fn getargsL_long_ivalue(bencher: &mut Bencher) { fn getargsLb_long_ivalue(bencher: &mut Bencher) { bencher.iter(|| getargsLb(ARGS_LONG_IVALUE_BYTES.iter().copied())); } + +#[bench] +#[inline(never)] +fn getargsLo_long_ivalue(bencher: &mut Bencher) { + let args_os: Vec<&OsStr> = ARGS_LONG_IVALUE.iter().copied().map(AsRef::as_ref).collect(); + bencher.iter(|| getargsLo(args_os.iter().copied())); +} diff --git a/examples/no_alloc.rs b/examples/no_alloc.rs index 834fe41..a682829 100644 --- a/examples/no_alloc.rs +++ b/examples/no_alloc.rs @@ -5,13 +5,10 @@ //! Additionally, all strings and errors are annotated with the correct lifetimes, so that the //! lifetime of the iterator itself does not matter so much anymore. -use getargs::{Opt, Options}; +use getargs::{Argument, Opt, Options}; fn main() { - let args = argv::iter().skip(1).map(|os| { - os.to_str() - .expect("argument couldn't be converted to UTF-8") - }); + let args = argv::iter().skip(1); let mut opts = Options::new(args); @@ -24,6 +21,6 @@ fn main() { } for positional in opts.positionals() { - eprintln!("positional argument: {}", positional); + eprintln!("positional argument: {}", Argument::display(positional)); } } diff --git a/examples/os_str.rs b/examples/os_str.rs index 77f307d..a7fa0ff 100644 --- a/examples/os_str.rs +++ b/examples/os_str.rs @@ -1,30 +1,22 @@ -use getargs::{Arg, Options}; -use std::ffi::OsStr; +use getargs::{Arg, Argument, Options}; +use std::ffi::OsString; +use std::path::PathBuf; -#[cfg(unix)] fn main() { - use std::os::unix::ffi::OsStrExt; - let args: Vec<_> = std::env::args_os().skip(1).collect(); - let mut opts = Options::new(args.iter().map(|s| s.as_bytes())); + let mut opts = Options::new(args.iter().map(OsString::as_os_str)); while let Some(arg) = opts.next_arg().expect("usage error") { match arg { Arg::Short('f') | Arg::Long("file") => { - let f = OsStr::from_bytes(opts.value().expect("usage error")); + let f = PathBuf::from(opts.value().expect("usage error")); println!("file option: {f:?}"); } Arg::Positional(pos) => { - let pos = OsStr::from_bytes(pos); - println!("positional: {pos:?}"); + println!("positional: {}", Argument::display(pos)); } - _ => println!("other: {arg:?}"), + _ => println!("other: {}", arg), } } } - -#[cfg(not(unix))] -fn main() { - eprintln!("Only supported on Unix because UTF-16 is hard, sorry :("); -} diff --git a/src/lib.rs b/src/lib.rs index 247b7a2..8eeb11d 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -28,12 +28,12 @@ //! //! * Zero cost //! * Zero copy -//! * Zero unsafe code +//! * Zero unsafe code except for `&OsStr` //! * Zero dependencies //! * Zero allocation //! * Simple to use yet versatile //! * `#![no_std]`-compatible -//! * Compatible with `&str` and `&[u8]` +//! * Compatible with `&str`, `&[u8]` and `&OsStr` //! //! ## Performance //! diff --git a/src/tests.rs b/src/tests.rs index 261cac1..274ce4a 100644 --- a/src/tests.rs +++ b/src/tests.rs @@ -1,3 +1,5 @@ +use std::ffi::OsStr; + use super::*; #[test] @@ -574,3 +576,48 @@ fn repeating_iterator() { assert_eq!(opts.next_positional(), None); assert!(opts.is_empty()); } + +#[test] +fn os_str() { + let args = [ + "-ohi", + "--opt=HI", + "-o", + "hi", + "--opt", + "hi", + "--optional", + "--optional=value", + "-O", + "-Ovalue", + "--", + "one", + "two", + ]; + + let mut opts = Options::<'_, &OsStr, _>::new(args.into_iter().map(AsRef::as_ref)); + + assert_eq!(opts.next_opt(), Ok(Some(Opt::Short('o')))); + assert_eq!(opts.value(), Ok("hi".as_ref())); + assert_eq!(opts.next_opt(), Ok(Some(Opt::Long("opt")))); + assert_eq!(opts.value(), Ok("HI".as_ref())); + assert_eq!(opts.next_opt(), Ok(Some(Opt::Short('o')))); + assert_eq!(opts.value(), Ok("hi".as_ref())); + assert_eq!(opts.next_opt(), Ok(Some(Opt::Long("opt")))); + assert_eq!(opts.value(), Ok("hi".as_ref())); + assert_eq!(opts.next_opt(), Ok(Some(Opt::Long("optional")))); + assert_eq!(opts.value_opt(), None); + assert_eq!(opts.next_opt(), Ok(Some(Opt::Long("optional")))); + assert_eq!(opts.value_opt(), Some("value".as_ref())); + assert_eq!(opts.next_opt(), Ok(Some(Opt::Short('O')))); + assert_eq!(opts.value_opt(), None); + assert_eq!(opts.next_opt(), Ok(Some(Opt::Short('O')))); + assert_eq!(opts.value_opt(), Some("value".as_ref())); + assert_eq!(opts.next_opt(), Ok(None)); + assert!(opts.opts_ended()); + assert_eq!(opts.next_positional(), Some("one".as_ref())); + assert_eq!(opts.next_positional(), Some("two".as_ref())); + assert_eq!(opts.next_positional(), None); + assert!(opts.opts_ended()); + assert!(opts.is_empty()); +} diff --git a/src/traits.rs b/src/traits.rs index f0a7cff..2199fb0 100644 --- a/src/traits.rs +++ b/src/traits.rs @@ -1,4 +1,6 @@ use core::fmt::{Debug, Display, Formatter, Write}; +#[cfg(feature = "std")] +use std::ffi::OsStr; /// The argument trait for types that can be parsed by /// [`Options`][crate::Options]. @@ -317,6 +319,67 @@ impl Argument for &[u8] { } } +#[cfg(feature = "std")] +impl Argument for &OsStr { + #[inline] + fn ends_opts(self) -> bool { + self.as_encoded_bytes().ends_opts() + } + + #[inline] + fn parse_long_opt<'opt>(self) -> Result)>> + where + Self: 'opt, + { + self.as_encoded_bytes() + .parse_long_opt() + .map(|o| { + o.map(|t| { + ( + t.0, + t.1.map(|s| unsafe { OsStr::from_encoded_bytes_unchecked(s) }), + ) + }) + }) + .map_err(|e| { + ConversionError::new(unsafe { OsStr::from_encoded_bytes_unchecked(e.option) }) + }) + } + + #[inline] + fn parse_short_cluster(self, allow_number: bool) -> Option { + self.as_encoded_bytes() + .parse_short_cluster(allow_number) + .map(|s| unsafe { OsStr::from_encoded_bytes_unchecked(s) }) + } + + #[inline] + fn consume_short_opt(self) -> Result)> { + self.as_encoded_bytes() + .consume_short_opt() + .map(|t| { + ( + t.0, + t.1.map(|s| unsafe { OsStr::from_encoded_bytes_unchecked(s) }), + ) + }) + .map_err(|e| { + ConversionError::new(unsafe { OsStr::from_encoded_bytes_unchecked(e.option) }) + }) + } + + #[inline] + fn consume_short_val(self) -> Self { + self + } + + fn display(self) -> impl Display { + DisplaySliceU8 { + slice: self.as_encoded_bytes(), + } + } +} + #[cfg(test)] mod tests { use super::is_number;