From 3c8fefae71a543c13e058f63452d51bd86e10275 Mon Sep 17 00:00:00 2001 From: Bradford Hovinen Date: Sun, 26 Nov 2023 18:22:05 +0100 Subject: [PATCH 1/4] Introduce a matcher for byte sequences which represent UTF-8 encoded strings. This allows the use of all matchers for string types on such types. These types come up often when working with various types of I/O, so matching on them can be useful. This currently only supports UTF-8 encoding, but could be extended for other encodings with the addition of a suitable (presumably optional) dependency. --- .../src/matchers/is_encoded_string_matcher.rs | 124 ++++++++++++++++++ googletest/src/matchers/mod.rs | 2 + 2 files changed, 126 insertions(+) create mode 100644 googletest/src/matchers/is_encoded_string_matcher.rs diff --git a/googletest/src/matchers/is_encoded_string_matcher.rs b/googletest/src/matchers/is_encoded_string_matcher.rs new file mode 100644 index 00000000..ff901b3b --- /dev/null +++ b/googletest/src/matchers/is_encoded_string_matcher.rs @@ -0,0 +1,124 @@ +// Copyright 2023 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use crate::matcher::{Matcher, MatcherResult}; +use std::{fmt::Debug, marker::PhantomData}; + +/// Matches a byte sequence which is a UTF-8 encoded string matched by `inner`. +/// +/// The matcher reports no match if either the string is not UTF-8 encoded or if `inner` does not +/// match on the decoded string. +/// +/// The input may be a slice `&[u8]` or a `Vec` of bytes. +/// +/// ``` +/// # use googletest::prelude::*; +/// # fn should_pass() -> Result<()> { +/// let bytes: &[u8] = "A string".as_bytes(); +/// verify_that!(bytes, is_utf8_string(eq("A string")))?; // Passes +/// let bytes: Vec = "A string".as_bytes().to_vec(); +/// verify_that!(bytes, is_utf8_string(eq("A string")))?; // Passes +/// # Ok(()) +/// # } +/// # fn should_fail_1() -> Result<()> { +/// # let bytes: &[u8] = "A string".as_bytes(); +/// verify_that!(bytes, is_utf8_string(eq("Another string")))?; // Fails (inner matcher does not match) +/// # Ok(()) +/// # } +/// # fn should_fail_2() -> Result<()> { +/// let bytes: Vec = vec![192, 64, 128, 32]; +/// verify_that!(bytes, is_utf8_string(anything()))?; // Fails (not UTF-8 encoded) +/// # Ok(()) +/// # } +/// # should_pass().unwrap(); +/// # should_fail_1().unwrap_err(); +/// # should_fail_2().unwrap_err(); +/// ``` +pub fn is_utf8_string<'a, ActualT: AsRef<[u8]> + Debug + 'a, InnerMatcherT>( + inner: InnerMatcherT, +) -> impl Matcher +where + InnerMatcherT: Matcher, +{ + IsEncodedStringMatcher { inner, phantom: Default::default() } +} + +struct IsEncodedStringMatcher { + inner: InnerMatcherT, + phantom: PhantomData, +} + +impl<'a, ActualT: AsRef<[u8]> + Debug + 'a, InnerMatcherT> Matcher + for IsEncodedStringMatcher +where + InnerMatcherT: Matcher, +{ + type ActualT = ActualT; + + fn matches(&self, actual: &Self::ActualT) -> MatcherResult { + std::str::from_utf8(actual.as_ref()) + .map(|s| self.inner.matches(&s)) + .unwrap_or(MatcherResult::NoMatch) + } + + fn describe(&self, matcher_result: MatcherResult) -> String { + match matcher_result { + MatcherResult::Match => format!( + "is a UTF-8 encoded string which {}", + self.inner.describe(MatcherResult::Match) + ), + MatcherResult::NoMatch => format!( + "is not a UTF-8 encoded string which {}", + self.inner.describe(MatcherResult::Match) + ), + } + } + + fn explain_match(&self, actual: &Self::ActualT) -> String { + match std::str::from_utf8(actual.as_ref()) { + Ok(s) => format!("which is a UTF-8 encoded string {}", self.inner.explain_match(&s)), + Err(_) => "which is not a UTF-8 encoded string".into(), + } + } +} + +#[cfg(test)] +mod tests { + use crate::prelude::*; + + #[test] + fn matches_string_as_byte_slice() -> Result<()> { + verify_that!("A string".as_bytes(), is_utf8_string(eq("A string"))) + } + + #[test] + fn matches_string_as_byte_vec() -> Result<()> { + verify_that!("A string".as_bytes().to_vec(), is_utf8_string(eq("A string"))) + } + + #[test] + fn matches_string_with_utf_8_encoded_sequences() -> Result<()> { + verify_that!("äöüÄÖÜ".as_bytes().to_vec(), is_utf8_string(eq("äöüÄÖÜ"))) + } + + #[test] + fn does_not_match_non_equal_string() -> Result<()> { + verify_that!("äöüÄÖÜ".as_bytes().to_vec(), not(is_utf8_string(eq("A string")))) + } + + #[test] + fn does_not_match_non_utf_8_encoded_byte_sequence() -> Result<()> { + verify_that!(&[192, 64, 255, 32], not(is_utf8_string(eq("A string")))) + } +} diff --git a/googletest/src/matchers/mod.rs b/googletest/src/matchers/mod.rs index 97276e83..1e028b97 100644 --- a/googletest/src/matchers/mod.rs +++ b/googletest/src/matchers/mod.rs @@ -34,6 +34,7 @@ mod field_matcher; mod ge_matcher; mod gt_matcher; mod has_entry_matcher; +mod is_encoded_string_matcher; mod is_matcher; mod is_nan_matcher; mod le_matcher; @@ -70,6 +71,7 @@ pub use err_matcher::err; pub use ge_matcher::ge; pub use gt_matcher::gt; pub use has_entry_matcher::has_entry; +pub use is_encoded_string_matcher::is_utf8_string; pub use is_nan_matcher::is_nan; pub use le_matcher::le; pub use len_matcher::len; From e6a8c7ffda0bef47c4735013e7774e024c629f3f Mon Sep 17 00:00:00 2001 From: Bradford Hovinen Date: Sun, 26 Nov 2023 18:29:23 +0100 Subject: [PATCH 2/4] Fix lifetime problems by using `String`. This fixes the lifetime problems in `IsEncodedStringMatcher` at the cost of an extra allocation: the input must be copied to a `Vec`, then converted to a `String`. --- googletest/src/matchers/is_encoded_string_matcher.rs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/googletest/src/matchers/is_encoded_string_matcher.rs b/googletest/src/matchers/is_encoded_string_matcher.rs index ff901b3b..f0a0bb76 100644 --- a/googletest/src/matchers/is_encoded_string_matcher.rs +++ b/googletest/src/matchers/is_encoded_string_matcher.rs @@ -49,7 +49,7 @@ pub fn is_utf8_string<'a, ActualT: AsRef<[u8]> + Debug + 'a, InnerMatcherT>( inner: InnerMatcherT, ) -> impl Matcher where - InnerMatcherT: Matcher, + InnerMatcherT: Matcher, { IsEncodedStringMatcher { inner, phantom: Default::default() } } @@ -62,12 +62,12 @@ struct IsEncodedStringMatcher { impl<'a, ActualT: AsRef<[u8]> + Debug + 'a, InnerMatcherT> Matcher for IsEncodedStringMatcher where - InnerMatcherT: Matcher, + InnerMatcherT: Matcher, { type ActualT = ActualT; fn matches(&self, actual: &Self::ActualT) -> MatcherResult { - std::str::from_utf8(actual.as_ref()) + String::from_utf8(actual.as_ref().to_vec()) .map(|s| self.inner.matches(&s)) .unwrap_or(MatcherResult::NoMatch) } @@ -86,7 +86,7 @@ where } fn explain_match(&self, actual: &Self::ActualT) -> String { - match std::str::from_utf8(actual.as_ref()) { + match String::from_utf8(actual.as_ref().to_vec()) { Ok(s) => format!("which is a UTF-8 encoded string {}", self.inner.explain_match(&s)), Err(_) => "which is not a UTF-8 encoded string".into(), } From 505ea531218200b070c322f3150a81060615907d Mon Sep 17 00:00:00 2001 From: Bradford Hovinen Date: Thu, 30 Nov 2023 21:47:13 +0100 Subject: [PATCH 3/4] Add tests of describe and explain_match --- .../src/matchers/is_encoded_string_matcher.rs | 49 +++++++++++++++++++ 1 file changed, 49 insertions(+) diff --git a/googletest/src/matchers/is_encoded_string_matcher.rs b/googletest/src/matchers/is_encoded_string_matcher.rs index f0a0bb76..582ffaf2 100644 --- a/googletest/src/matchers/is_encoded_string_matcher.rs +++ b/googletest/src/matchers/is_encoded_string_matcher.rs @@ -95,6 +95,7 @@ where #[cfg(test)] mod tests { + use crate::matcher::MatcherResult; use crate::prelude::*; #[test] @@ -121,4 +122,52 @@ mod tests { fn does_not_match_non_utf_8_encoded_byte_sequence() -> Result<()> { verify_that!(&[192, 64, 255, 32], not(is_utf8_string(eq("A string")))) } + + #[test] + fn has_correct_description_in_matched_case() -> Result<()> { + let matcher = is_utf8_string::<&[u8], _>(eq("A string")); + + verify_that!( + matcher.describe(MatcherResult::Match), + eq("is a UTF-8 encoded string which is equal to \"A string\"") + ) + } + + #[test] + fn has_correct_description_in_not_matched_case() -> Result<()> { + let matcher = is_utf8_string::<&[u8], _>(eq("A string")); + + verify_that!( + matcher.describe(MatcherResult::NoMatch), + eq("is not a UTF-8 encoded string which is equal to \"A string\"") + ) + } + + #[test] + fn has_correct_explanation_in_matched_case() -> Result<()> { + let explanation = is_utf8_string(eq("A string")).explain_match(&"A string".as_bytes()); + + verify_that!( + explanation, + eq("which is a UTF-8 encoded string which is equal to \"A string\"") + ) + } + + #[test] + fn has_correct_explanation_when_byte_array_is_not_utf8_encoded() -> Result<()> { + let explanation = is_utf8_string(eq("A string")).explain_match(&&[192, 128, 0, 64]); + + verify_that!(explanation, eq("which is not a UTF-8 encoded string")) + } + + #[test] + fn has_correct_explanation_when_inner_matcher_does_not_match() -> Result<()> { + let explanation = + is_utf8_string(eq("A string")).explain_match(&"Another string".as_bytes()); + + verify_that!( + explanation, + eq("which is a UTF-8 encoded string which isn't equal to \"A string\"") + ) + } } From d27f1660d4736c2c4746b70fe8aa0a82e63c83a6 Mon Sep 17 00:00:00 2001 From: Bradford Hovinen Date: Thu, 30 Nov 2023 21:51:59 +0100 Subject: [PATCH 4/4] Add error message for UTF-8 conversion --- googletest/src/matchers/is_encoded_string_matcher.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/googletest/src/matchers/is_encoded_string_matcher.rs b/googletest/src/matchers/is_encoded_string_matcher.rs index 582ffaf2..a3210084 100644 --- a/googletest/src/matchers/is_encoded_string_matcher.rs +++ b/googletest/src/matchers/is_encoded_string_matcher.rs @@ -88,7 +88,7 @@ where fn explain_match(&self, actual: &Self::ActualT) -> String { match String::from_utf8(actual.as_ref().to_vec()) { Ok(s) => format!("which is a UTF-8 encoded string {}", self.inner.explain_match(&s)), - Err(_) => "which is not a UTF-8 encoded string".into(), + Err(e) => format!("which is not a UTF-8 encoded string: {e}"), } } } @@ -157,7 +157,7 @@ mod tests { fn has_correct_explanation_when_byte_array_is_not_utf8_encoded() -> Result<()> { let explanation = is_utf8_string(eq("A string")).explain_match(&&[192, 128, 0, 64]); - verify_that!(explanation, eq("which is not a UTF-8 encoded string")) + verify_that!(explanation, starts_with("which is not a UTF-8 encoded string: ")) } #[test]