Skip to content

Commit d31ca4f

Browse files
committed
Move Utf8Error to new mod
1 parent c6622d1 commit d31ca4f

File tree

2 files changed

+134
-126
lines changed

2 files changed

+134
-126
lines changed

library/core/src/str/error.rs

+129
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,129 @@
1+
//! Defines utf8 error type.
2+
3+
use crate::fmt;
4+
5+
/// Errors which can occur when attempting to interpret a sequence of [`u8`]
6+
/// as a string.
7+
///
8+
/// As such, the `from_utf8` family of functions and methods for both [`String`]s
9+
/// and [`&str`]s make use of this error, for example.
10+
///
11+
/// [`String`]: ../../std/string/struct.String.html#method.from_utf8
12+
/// [`&str`]: from_utf8
13+
///
14+
/// # Examples
15+
///
16+
/// This error type’s methods can be used to create functionality
17+
/// similar to `String::from_utf8_lossy` without allocating heap memory:
18+
///
19+
/// ```
20+
/// fn from_utf8_lossy<F>(mut input: &[u8], mut push: F) where F: FnMut(&str) {
21+
/// loop {
22+
/// match std::str::from_utf8(input) {
23+
/// Ok(valid) => {
24+
/// push(valid);
25+
/// break
26+
/// }
27+
/// Err(error) => {
28+
/// let (valid, after_valid) = input.split_at(error.valid_up_to());
29+
/// unsafe {
30+
/// push(std::str::from_utf8_unchecked(valid))
31+
/// }
32+
/// push("\u{FFFD}");
33+
///
34+
/// if let Some(invalid_sequence_length) = error.error_len() {
35+
/// input = &after_valid[invalid_sequence_length..]
36+
/// } else {
37+
/// break
38+
/// }
39+
/// }
40+
/// }
41+
/// }
42+
/// }
43+
/// ```
44+
#[derive(Copy, Eq, PartialEq, Clone, Debug)]
45+
#[stable(feature = "rust1", since = "1.0.0")]
46+
pub struct Utf8Error {
47+
pub(super) valid_up_to: usize,
48+
pub(super) error_len: Option<u8>,
49+
}
50+
51+
impl Utf8Error {
52+
/// Returns the index in the given string up to which valid UTF-8 was
53+
/// verified.
54+
///
55+
/// It is the maximum index such that `from_utf8(&input[..index])`
56+
/// would return `Ok(_)`.
57+
///
58+
/// # Examples
59+
///
60+
/// Basic usage:
61+
///
62+
/// ```
63+
/// use std::str;
64+
///
65+
/// // some invalid bytes, in a vector
66+
/// let sparkle_heart = vec![0, 159, 146, 150];
67+
///
68+
/// // std::str::from_utf8 returns a Utf8Error
69+
/// let error = str::from_utf8(&sparkle_heart).unwrap_err();
70+
///
71+
/// // the second byte is invalid here
72+
/// assert_eq!(1, error.valid_up_to());
73+
/// ```
74+
#[stable(feature = "utf8_error", since = "1.5.0")]
75+
pub fn valid_up_to(&self) -> usize {
76+
self.valid_up_to
77+
}
78+
79+
/// Provides more information about the failure:
80+
///
81+
/// * `None`: the end of the input was reached unexpectedly.
82+
/// `self.valid_up_to()` is 1 to 3 bytes from the end of the input.
83+
/// If a byte stream (such as a file or a network socket) is being decoded incrementally,
84+
/// this could be a valid `char` whose UTF-8 byte sequence is spanning multiple chunks.
85+
///
86+
/// * `Some(len)`: an unexpected byte was encountered.
87+
/// The length provided is that of the invalid byte sequence
88+
/// that starts at the index given by `valid_up_to()`.
89+
/// Decoding should resume after that sequence
90+
/// (after inserting a [`U+FFFD REPLACEMENT CHARACTER`][U+FFFD]) in case of
91+
/// lossy decoding.
92+
///
93+
/// [U+FFFD]: ../../std/char/constant.REPLACEMENT_CHARACTER.html
94+
#[stable(feature = "utf8_error_error_len", since = "1.20.0")]
95+
pub fn error_len(&self) -> Option<usize> {
96+
self.error_len.map(|len| len as usize)
97+
}
98+
}
99+
100+
#[stable(feature = "rust1", since = "1.0.0")]
101+
impl fmt::Display for Utf8Error {
102+
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
103+
if let Some(error_len) = self.error_len {
104+
write!(
105+
f,
106+
"invalid utf-8 sequence of {} bytes from index {}",
107+
error_len, self.valid_up_to
108+
)
109+
} else {
110+
write!(f, "incomplete utf-8 byte sequence from index {}", self.valid_up_to)
111+
}
112+
}
113+
}
114+
115+
/// An error returned when parsing a `bool` using [`from_str`] fails
116+
///
117+
/// [`from_str`]: FromStr::from_str
118+
#[derive(Debug, Clone, PartialEq, Eq)]
119+
#[stable(feature = "rust1", since = "1.0.0")]
120+
pub struct ParseBoolError {
121+
pub(super) _priv: (),
122+
}
123+
124+
#[stable(feature = "rust1", since = "1.0.0")]
125+
impl fmt::Display for ParseBoolError {
126+
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
127+
"provided string was not `true` or `false`".fmt(f)
128+
}
129+
}

library/core/src/str/mod.rs

+5-126
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,8 @@
88
99
#![stable(feature = "rust1", since = "1.0.0")]
1010

11+
mod error;
12+
1113
use self::pattern::Pattern;
1214
use self::pattern::{DoubleEndedSearcher, ReverseSearcher, Searcher};
1315

@@ -27,6 +29,9 @@ pub mod pattern;
2729
#[allow(missing_docs)]
2830
pub mod lossy;
2931

32+
#[stable(feature = "rust1", since = "1.0.0")]
33+
pub use error::{ParseBoolError, Utf8Error};
34+
3035
/// Parse a value from a string
3136
///
3237
/// `FromStr`'s [`from_str`] method is often used implicitly, through
@@ -138,121 +143,10 @@ impl FromStr for bool {
138143
}
139144
}
140145

141-
/// An error returned when parsing a `bool` using [`from_str`] fails
142-
///
143-
/// [`from_str`]: FromStr::from_str
144-
#[derive(Debug, Clone, PartialEq, Eq)]
145-
#[stable(feature = "rust1", since = "1.0.0")]
146-
pub struct ParseBoolError {
147-
_priv: (),
148-
}
149-
150-
#[stable(feature = "rust1", since = "1.0.0")]
151-
impl fmt::Display for ParseBoolError {
152-
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
153-
"provided string was not `true` or `false`".fmt(f)
154-
}
155-
}
156-
157146
/*
158147
Section: Creating a string
159148
*/
160149

161-
/// Errors which can occur when attempting to interpret a sequence of [`u8`]
162-
/// as a string.
163-
///
164-
/// As such, the `from_utf8` family of functions and methods for both [`String`]s
165-
/// and [`&str`]s make use of this error, for example.
166-
///
167-
/// [`String`]: ../../std/string/struct.String.html#method.from_utf8
168-
/// [`&str`]: from_utf8
169-
///
170-
/// # Examples
171-
///
172-
/// This error type’s methods can be used to create functionality
173-
/// similar to `String::from_utf8_lossy` without allocating heap memory:
174-
///
175-
/// ```
176-
/// fn from_utf8_lossy<F>(mut input: &[u8], mut push: F) where F: FnMut(&str) {
177-
/// loop {
178-
/// match std::str::from_utf8(input) {
179-
/// Ok(valid) => {
180-
/// push(valid);
181-
/// break
182-
/// }
183-
/// Err(error) => {
184-
/// let (valid, after_valid) = input.split_at(error.valid_up_to());
185-
/// unsafe {
186-
/// push(std::str::from_utf8_unchecked(valid))
187-
/// }
188-
/// push("\u{FFFD}");
189-
///
190-
/// if let Some(invalid_sequence_length) = error.error_len() {
191-
/// input = &after_valid[invalid_sequence_length..]
192-
/// } else {
193-
/// break
194-
/// }
195-
/// }
196-
/// }
197-
/// }
198-
/// }
199-
/// ```
200-
#[derive(Copy, Eq, PartialEq, Clone, Debug)]
201-
#[stable(feature = "rust1", since = "1.0.0")]
202-
pub struct Utf8Error {
203-
valid_up_to: usize,
204-
error_len: Option<u8>,
205-
}
206-
207-
impl Utf8Error {
208-
/// Returns the index in the given string up to which valid UTF-8 was
209-
/// verified.
210-
///
211-
/// It is the maximum index such that `from_utf8(&input[..index])`
212-
/// would return `Ok(_)`.
213-
///
214-
/// # Examples
215-
///
216-
/// Basic usage:
217-
///
218-
/// ```
219-
/// use std::str;
220-
///
221-
/// // some invalid bytes, in a vector
222-
/// let sparkle_heart = vec![0, 159, 146, 150];
223-
///
224-
/// // std::str::from_utf8 returns a Utf8Error
225-
/// let error = str::from_utf8(&sparkle_heart).unwrap_err();
226-
///
227-
/// // the second byte is invalid here
228-
/// assert_eq!(1, error.valid_up_to());
229-
/// ```
230-
#[stable(feature = "utf8_error", since = "1.5.0")]
231-
pub fn valid_up_to(&self) -> usize {
232-
self.valid_up_to
233-
}
234-
235-
/// Provides more information about the failure:
236-
///
237-
/// * `None`: the end of the input was reached unexpectedly.
238-
/// `self.valid_up_to()` is 1 to 3 bytes from the end of the input.
239-
/// If a byte stream (such as a file or a network socket) is being decoded incrementally,
240-
/// this could be a valid `char` whose UTF-8 byte sequence is spanning multiple chunks.
241-
///
242-
/// * `Some(len)`: an unexpected byte was encountered.
243-
/// The length provided is that of the invalid byte sequence
244-
/// that starts at the index given by `valid_up_to()`.
245-
/// Decoding should resume after that sequence
246-
/// (after inserting a [`U+FFFD REPLACEMENT CHARACTER`][U+FFFD]) in case of
247-
/// lossy decoding.
248-
///
249-
/// [U+FFFD]: ../../std/char/constant.REPLACEMENT_CHARACTER.html
250-
#[stable(feature = "utf8_error_error_len", since = "1.20.0")]
251-
pub fn error_len(&self) -> Option<usize> {
252-
self.error_len.map(|len| len as usize)
253-
}
254-
}
255-
256150
/// Converts a slice of bytes to a string slice.
257151
///
258152
/// A string slice ([`&str`]) is made of bytes ([`u8`]), and a byte slice
@@ -440,21 +334,6 @@ pub unsafe fn from_utf8_unchecked_mut(v: &mut [u8]) -> &mut str {
440334
unsafe { &mut *(v as *mut [u8] as *mut str) }
441335
}
442336

443-
#[stable(feature = "rust1", since = "1.0.0")]
444-
impl fmt::Display for Utf8Error {
445-
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
446-
if let Some(error_len) = self.error_len {
447-
write!(
448-
f,
449-
"invalid utf-8 sequence of {} bytes from index {}",
450-
error_len, self.valid_up_to
451-
)
452-
} else {
453-
write!(f, "incomplete utf-8 byte sequence from index {}", self.valid_up_to)
454-
}
455-
}
456-
}
457-
458337
/*
459338
Section: Iterators
460339
*/

0 commit comments

Comments
 (0)