Skip to content

Commit fe10f1a

Browse files
committed
Auto merge of rust-lang#72918 - Dylan-DPC:rollup-pnvj62b, r=Dylan-DPC
Rollup of 4 pull requests Successful merges: - rust-lang#72884 (RawString error reporting cleanup ) - rust-lang#72888 (Add a warning about infinite reading in read_(until|line)) - rust-lang#72914 (Minor: off-by-one error in RELEASES.md) - rust-lang#72916 (Update README.md) Failed merges: r? @ghost
2 parents eeaf497 + bed597a commit fe10f1a

File tree

6 files changed

+100
-242
lines changed

6 files changed

+100
-242
lines changed

README.md

+1-1
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ or reading the [rustc dev guide][rustcguidebuild].
2323

2424
[rustcguidebuild]: https://rustc-dev-guide.rust-lang.org/building/how-to-build-and-run.html
2525

26-
### Building on Unix-like system
26+
### Building on a Unix-like system
2727
1. Make sure you have installed the dependencies:
2828

2929
* `g++` 5.1 or later or `clang++` 3.5 or later

RELEASES.md

+1-1
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@ Libraries
4343
- [Unicode 13 is now supported.][69929]
4444
- [`String` now implements `From<&mut str>`.][69661]
4545
- [`IoSlice` now implements `Copy`.][69403]
46-
- [`Vec<T>` now implements `From<[T; N]>`.][68692] Where `N` is less than 32.
46+
- [`Vec<T>` now implements `From<[T; N]>`.][68692] Where `N` is at most 32.
4747
- [`proc_macro::LexError` now implements `fmt::Display` and `Error`.][68899]
4848
- [`from_le_bytes`, `to_le_bytes`, `from_be_bytes`, `to_be_bytes`,
4949
`from_ne_bytes`, and `to_ne_bytes` methods are now `const` for all

src/librustc_lexer/src/lib.rs

+45-105
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ mod tests;
2929
use self::LiteralKind::*;
3030
use self::TokenKind::*;
3131
use crate::cursor::{Cursor, EOF_CHAR};
32-
use std::convert::TryInto;
32+
use std::convert::TryFrom;
3333

3434
/// Parsed token.
3535
/// It doesn't contain information about data that has been parsed,
@@ -142,84 +142,24 @@ pub enum LiteralKind {
142142
/// "b"abc"", "b"abc"
143143
ByteStr { terminated: bool },
144144
/// "r"abc"", "r#"abc"#", "r####"ab"###"c"####", "r#"a"
145-
RawStr(UnvalidatedRawStr),
145+
RawStr { n_hashes: u16, err: Option<RawStrError> },
146146
/// "br"abc"", "br#"abc"#", "br####"ab"###"c"####", "br#"a"
147-
RawByteStr(UnvalidatedRawStr),
148-
}
149-
150-
/// Represents something that looks like a raw string, but may have some
151-
/// problems. Use `.validate()` to convert it into something
152-
/// usable.
153-
#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)]
154-
pub struct UnvalidatedRawStr {
155-
/// The prefix (`r###"`) is valid
156-
valid_start: bool,
157-
158-
/// The postfix (`"###`) is valid
159-
valid_end: bool,
160-
161-
/// The number of leading `#`
162-
n_start_hashes: usize,
163-
/// The number of trailing `#`. `n_end_hashes` <= `n_start_hashes`
164-
n_end_hashes: usize,
165-
/// The offset starting at `r` or `br` where the user may have intended to end the string.
166-
/// Currently, it is the longest sequence of pattern `"#+"`.
167-
possible_terminator_offset: Option<usize>,
147+
RawByteStr { n_hashes: u16, err: Option<RawStrError> },
168148
}
169149

170150
/// Error produced validating a raw string. Represents cases like:
171-
/// - `r##~"abcde"##`: `LexRawStrError::InvalidStarter`
172-
/// - `r###"abcde"##`: `LexRawStrError::NoTerminator { expected: 3, found: 2, possible_terminator_offset: Some(11)`
173-
/// - Too many `#`s (>65536): `TooManyDelimiters`
151+
/// - `r##~"abcde"##`: `InvalidStarter`
152+
/// - `r###"abcde"##`: `NoTerminator { expected: 3, found: 2, possible_terminator_offset: Some(11)`
153+
/// - Too many `#`s (>65535): `TooManyDelimiters`
174154
#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)]
175-
pub enum LexRawStrError {
155+
pub enum RawStrError {
176156
/// Non `#` characters exist between `r` and `"` eg. `r#~"..`
177-
InvalidStarter,
157+
InvalidStarter { bad_char: char },
178158
/// The string was never terminated. `possible_terminator_offset` is the number of characters after `r` or `br` where they
179159
/// may have intended to terminate it.
180160
NoTerminator { expected: usize, found: usize, possible_terminator_offset: Option<usize> },
181-
/// More than 65536 `#`s exist.
182-
TooManyDelimiters,
183-
}
184-
185-
/// Raw String that contains a valid prefix (`#+"`) and postfix (`"#+`) where
186-
/// there are a matching number of `#` characters in both. Note that this will
187-
/// not consume extra trailing `#` characters: `r###"abcde"####` is lexed as a
188-
/// `ValidatedRawString { n_hashes: 3 }` followed by a `#` token.
189-
#[derive(Debug, Eq, PartialEq, Copy, Clone)]
190-
pub struct ValidatedRawStr {
191-
n_hashes: u16,
192-
}
193-
194-
impl ValidatedRawStr {
195-
pub fn num_hashes(&self) -> u16 {
196-
self.n_hashes
197-
}
198-
}
199-
200-
impl UnvalidatedRawStr {
201-
pub fn validate(self) -> Result<ValidatedRawStr, LexRawStrError> {
202-
if !self.valid_start {
203-
return Err(LexRawStrError::InvalidStarter);
204-
}
205-
206-
// Only up to 65535 `#`s are allowed in raw strings
207-
let n_start_safe: u16 =
208-
self.n_start_hashes.try_into().map_err(|_| LexRawStrError::TooManyDelimiters)?;
209-
210-
if self.n_start_hashes > self.n_end_hashes || !self.valid_end {
211-
Err(LexRawStrError::NoTerminator {
212-
expected: self.n_start_hashes,
213-
found: self.n_end_hashes,
214-
possible_terminator_offset: self.possible_terminator_offset,
215-
})
216-
} else {
217-
// Since the lexer should never produce a literal with n_end > n_start, if n_start <= n_end,
218-
// they must be equal.
219-
debug_assert_eq!(self.n_start_hashes, self.n_end_hashes);
220-
Ok(ValidatedRawStr { n_hashes: n_start_safe })
221-
}
222-
}
161+
/// More than 65535 `#`s exist.
162+
TooManyDelimiters { found: usize },
223163
}
224164

225165
/// Base of numeric literal encoding according to its prefix.
@@ -354,12 +294,12 @@ impl Cursor<'_> {
354294
'r' => match (self.first(), self.second()) {
355295
('#', c1) if is_id_start(c1) => self.raw_ident(),
356296
('#', _) | ('"', _) => {
357-
let raw_str_i = self.raw_double_quoted_string(1);
297+
let (n_hashes, err) = self.raw_double_quoted_string(1);
358298
let suffix_start = self.len_consumed();
359-
if raw_str_i.n_end_hashes == raw_str_i.n_start_hashes {
299+
if err.is_none() {
360300
self.eat_literal_suffix();
361301
}
362-
let kind = RawStr(raw_str_i);
302+
let kind = RawStr { n_hashes, err };
363303
Literal { kind, suffix_start }
364304
}
365305
_ => self.ident(),
@@ -389,14 +329,12 @@ impl Cursor<'_> {
389329
}
390330
('r', '"') | ('r', '#') => {
391331
self.bump();
392-
let raw_str_i = self.raw_double_quoted_string(2);
332+
let (n_hashes, err) = self.raw_double_quoted_string(2);
393333
let suffix_start = self.len_consumed();
394-
let terminated = raw_str_i.n_start_hashes == raw_str_i.n_end_hashes;
395-
if terminated {
334+
if err.is_none() {
396335
self.eat_literal_suffix();
397336
}
398-
399-
let kind = RawByteStr(raw_str_i);
337+
let kind = RawByteStr { n_hashes, err };
400338
Literal { kind, suffix_start }
401339
}
402340
_ => self.ident(),
@@ -692,27 +630,34 @@ impl Cursor<'_> {
692630
false
693631
}
694632

695-
/// Eats the double-quoted string and returns an `UnvalidatedRawStr`.
696-
fn raw_double_quoted_string(&mut self, prefix_len: usize) -> UnvalidatedRawStr {
633+
/// Eats the double-quoted string and returns `n_hashes` and an error if encountered.
634+
fn raw_double_quoted_string(&mut self, prefix_len: usize) -> (u16, Option<RawStrError>) {
635+
// Wrap the actual function to handle the error with too many hashes.
636+
// This way, it eats the whole raw string.
637+
let (n_hashes, err) = self.raw_string_unvalidated(prefix_len);
638+
// Only up to 65535 `#`s are allowed in raw strings
639+
match u16::try_from(n_hashes) {
640+
Ok(num) => (num, err),
641+
// We lie about the number of hashes here :P
642+
Err(_) => (0, Some(RawStrError::TooManyDelimiters { found: n_hashes })),
643+
}
644+
}
645+
646+
fn raw_string_unvalidated(&mut self, prefix_len: usize) -> (usize, Option<RawStrError>) {
697647
debug_assert!(self.prev() == 'r');
698-
let mut valid_start: bool = false;
699648
let start_pos = self.len_consumed();
700-
let (mut possible_terminator_offset, mut max_hashes) = (None, 0);
649+
let mut possible_terminator_offset = None;
650+
let mut max_hashes = 0;
701651

702652
// Count opening '#' symbols.
703653
let n_start_hashes = self.eat_while(|c| c == '#');
704654

705655
// Check that string is started.
706656
match self.bump() {
707-
Some('"') => valid_start = true,
708-
_ => {
709-
return UnvalidatedRawStr {
710-
valid_start,
711-
valid_end: false,
712-
n_start_hashes,
713-
n_end_hashes: 0,
714-
possible_terminator_offset,
715-
};
657+
Some('"') => (),
658+
c => {
659+
let c = c.unwrap_or(EOF_CHAR);
660+
return (n_start_hashes, Some(RawStrError::InvalidStarter { bad_char: c }));
716661
}
717662
}
718663

@@ -722,13 +667,14 @@ impl Cursor<'_> {
722667
self.eat_while(|c| c != '"');
723668

724669
if self.is_eof() {
725-
return UnvalidatedRawStr {
726-
valid_start,
727-
valid_end: false,
670+
return (
728671
n_start_hashes,
729-
n_end_hashes: max_hashes,
730-
possible_terminator_offset,
731-
};
672+
Some(RawStrError::NoTerminator {
673+
expected: n_start_hashes,
674+
found: max_hashes,
675+
possible_terminator_offset,
676+
}),
677+
);
732678
}
733679

734680
// Eat closing double quote.
@@ -737,7 +683,7 @@ impl Cursor<'_> {
737683
// Check that amount of closing '#' symbols
738684
// is equal to the amount of opening ones.
739685
// Note that this will not consume extra trailing `#` characters:
740-
// `r###"abcde"####` is lexed as a `LexedRawString { n_hashes: 3 }`
686+
// `r###"abcde"####` is lexed as a `RawStr { n_hashes: 3 }`
741687
// followed by a `#` token.
742688
let mut hashes_left = n_start_hashes;
743689
let is_closing_hash = |c| {
@@ -751,13 +697,7 @@ impl Cursor<'_> {
751697
let n_end_hashes = self.eat_while(is_closing_hash);
752698

753699
if n_end_hashes == n_start_hashes {
754-
return UnvalidatedRawStr {
755-
valid_start,
756-
valid_end: true,
757-
n_start_hashes,
758-
n_end_hashes,
759-
possible_terminator_offset: None,
760-
};
700+
return (n_start_hashes, None);
761701
} else if n_end_hashes > max_hashes {
762702
// Keep track of possible terminators to give a hint about
763703
// where there might be a missing terminator

0 commit comments

Comments
 (0)