Skip to content

Commit 8a271e6

Browse files
committed
Introduce a matcher for byte sequences which represent UTF-8 encoded strings.
This allows the use of all matchers for string types on such types. These types come up often when working with various types of I/O, so matching on them can be useful. This currently only supports UTF-8 encoding, but could be extended for other encodings with the addition of a suitable (presumably optional) dependency.
1 parent 15fc0be commit 8a271e6

File tree

2 files changed

+126
-0
lines changed

2 files changed

+126
-0
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,124 @@
1+
// Copyright 2023 Google LLC
2+
//
3+
// Licensed under the Apache License, Version 2.0 (the "License");
4+
// you may not use this file except in compliance with the License.
5+
// You may obtain a copy of the License at
6+
//
7+
// http://www.apache.org/licenses/LICENSE-2.0
8+
//
9+
// Unless required by applicable law or agreed to in writing, software
10+
// distributed under the License is distributed on an "AS IS" BASIS,
11+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
// See the License for the specific language governing permissions and
13+
// limitations under the License.
14+
15+
use crate::matcher::{Matcher, MatcherResult};
16+
use std::{fmt::Debug, marker::PhantomData};
17+
18+
/// Matches a byte sequence which is a UTF-8 encoded string matched by `inner`.
19+
///
20+
/// The matcher reports no match if either the string is not UTF-8 encoded or if `inner` does not
21+
/// match on the decoded string.
22+
///
23+
/// The input may be a slice `&[u8]` or a `Vec` of bytes.
24+
///
25+
/// ```
26+
/// # use googletest::prelude::*;
27+
/// # fn should_pass() -> Result<()> {
28+
/// let bytes: &[u8] = "A string".as_bytes();
29+
/// verify_that!(bytes, is_encoded_string(eq("A string")))?; // Passes
30+
/// let bytes: Vec<u8> = "A string".as_bytes().to_vec();
31+
/// verify_that!(bytes, is_encoded_string(eq("A string")))?; // Passes
32+
/// # Ok(())
33+
/// # }
34+
/// # fn should_fail_1() -> Result<()> {
35+
/// # let bytes: &[u8] = "A string".as_bytes();
36+
/// verify_that!(bytes, is_encoded_string(eq("Another string")))?; // Fails (inner matcher does not match)
37+
/// # Ok(())
38+
/// # }
39+
/// # fn should_fail_2() -> Result<()> {
40+
/// let bytes: Vec<u8> = vec![192, 64, 128, 32];
41+
/// verify_that!(bytes, is_encoded_string(anything()))?; // Fails (not UTF-8 encoded)
42+
/// # Ok(())
43+
/// # }
44+
/// # should_pass().unwrap();
45+
/// # should_fail_1().unwrap_err();
46+
/// # should_fail_2().unwrap_err();
47+
/// ```
48+
pub fn is_utf8_string<'a, ActualT: AsRef<[u8]> + Debug + 'a, InnerMatcherT>(
49+
inner: InnerMatcherT,
50+
) -> impl Matcher<ActualT = ActualT>
51+
where
52+
InnerMatcherT: Matcher<ActualT = &'a str>,
53+
{
54+
IsEncodedStringMatcher { inner, phantom: Default::default() }
55+
}
56+
57+
struct IsEncodedStringMatcher<ActualT, InnerMatcherT> {
58+
inner: InnerMatcherT,
59+
phantom: PhantomData<ActualT>,
60+
}
61+
62+
impl<'a, ActualT: AsRef<[u8]> + Debug + 'a, InnerMatcherT> Matcher
63+
for IsEncodedStringMatcher<ActualT, InnerMatcherT>
64+
where
65+
InnerMatcherT: Matcher<ActualT = &'a str>,
66+
{
67+
type ActualT = ActualT;
68+
69+
fn matches(&self, actual: &Self::ActualT) -> MatcherResult {
70+
std::str::from_utf8(actual.as_ref())
71+
.map(|s| self.inner.matches(&s))
72+
.unwrap_or(MatcherResult::NoMatch)
73+
}
74+
75+
fn describe(&self, matcher_result: MatcherResult) -> String {
76+
match matcher_result {
77+
MatcherResult::Match => format!(
78+
"is a UTF-8 encoded string which {}",
79+
self.inner.describe(MatcherResult::Match)
80+
),
81+
MatcherResult::NoMatch => format!(
82+
"is not a UTF-8 encoded string which {}",
83+
self.inner.describe(MatcherResult::Match)
84+
),
85+
}
86+
}
87+
88+
fn explain_match(&self, actual: &Self::ActualT) -> String {
89+
match std::str::from_utf8(actual.as_ref()) {
90+
Ok(s) => format!("which is a UTF-8 encoded string {}", self.inner.explain_match(&s)),
91+
Err(_) => "which is not a UTF-8 encoded string".into(),
92+
}
93+
}
94+
}
95+
96+
#[cfg(test)]
97+
mod tests {
98+
use crate::prelude::*;
99+
100+
#[test]
101+
fn matches_string_as_byte_slice() -> Result<()> {
102+
verify_that!("A string".as_bytes(), is_utf8_string(eq("A string")))
103+
}
104+
105+
#[test]
106+
fn matches_string_as_byte_vec() -> Result<()> {
107+
verify_that!("A string".as_bytes().to_vec(), is_utf8_string(eq("A string")))
108+
}
109+
110+
#[test]
111+
fn matches_string_with_utf_8_encoded_sequences() -> Result<()> {
112+
verify_that!("äöüÄÖÜ".as_bytes().to_vec(), is_utf8_string(eq("äöüÄÖÜ")))
113+
}
114+
115+
#[test]
116+
fn does_not_match_non_equal_string() -> Result<()> {
117+
verify_that!("äöüÄÖÜ".as_bytes().to_vec(), not(is_utf8_string(eq("A string"))))
118+
}
119+
120+
#[test]
121+
fn does_not_match_non_utf_8_encoded_byte_sequence() -> Result<()> {
122+
verify_that!(&[192, 64, 255, 32], not(is_utf8_string(eq("A string"))))
123+
}
124+
}

googletest/src/matchers/mod.rs

+2
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@ mod field_matcher;
3434
mod ge_matcher;
3535
mod gt_matcher;
3636
mod has_entry_matcher;
37+
mod is_encoded_string_matcher;
3738
mod is_matcher;
3839
mod is_nan_matcher;
3940
mod le_matcher;
@@ -70,6 +71,7 @@ pub use err_matcher::err;
7071
pub use ge_matcher::ge;
7172
pub use gt_matcher::gt;
7273
pub use has_entry_matcher::has_entry;
74+
pub use is_encoded_string_matcher::is_utf8_string;
7375
pub use is_nan_matcher::is_nan;
7476
pub use le_matcher::le;
7577
pub use len_matcher::len;

0 commit comments

Comments
 (0)