Skip to content

Commit e9fec38

Browse files
authored
Rollup merge of rust-lang#81307 - estebank:invalid-byte-str-span, r=petrochenkov
Handle `Span`s for byte and raw strings and add more detail CC rust-lang#81208.
2 parents e210a80 + 3b5d018 commit e9fec38

35 files changed

+278
-183
lines changed

compiler/rustc_parse/src/lexer/mod.rs

+20-10
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ mod tokentrees;
1414
mod unescape_error_reporting;
1515
mod unicode_chars;
1616

17-
use unescape_error_reporting::{emit_unescape_error, push_escaped_char};
17+
use unescape_error_reporting::{emit_unescape_error, escaped_char};
1818

1919
#[derive(Clone, Debug)]
2020
pub struct UnmatchedBrace {
@@ -122,11 +122,9 @@ impl<'a> StringReader<'a> {
122122
m: &str,
123123
c: char,
124124
) -> DiagnosticBuilder<'a> {
125-
let mut m = m.to_string();
126-
m.push_str(": ");
127-
push_escaped_char(&mut m, c);
128-
129-
self.sess.span_diagnostic.struct_span_fatal(self.mk_sp(from_pos, to_pos), &m[..])
125+
self.sess
126+
.span_diagnostic
127+
.struct_span_fatal(self.mk_sp(from_pos, to_pos), &format!("{}: {}", m, escaped_char(c)))
130128
}
131129

132130
/// Turns simple `rustc_lexer::TokenKind` enum into a rich
@@ -421,7 +419,7 @@ impl<'a> StringReader<'a> {
421419
let content_start = start + BytePos(prefix_len);
422420
let content_end = suffix_start - BytePos(postfix_len);
423421
let id = self.symbol_from_to(content_start, content_end);
424-
self.validate_literal_escape(mode, content_start, content_end);
422+
self.validate_literal_escape(mode, content_start, content_end, prefix_len, postfix_len);
425423
(lit_kind, id)
426424
}
427425

@@ -525,17 +523,29 @@ impl<'a> StringReader<'a> {
525523
.raise();
526524
}
527525

528-
fn validate_literal_escape(&self, mode: Mode, content_start: BytePos, content_end: BytePos) {
526+
fn validate_literal_escape(
527+
&self,
528+
mode: Mode,
529+
content_start: BytePos,
530+
content_end: BytePos,
531+
prefix_len: u32,
532+
postfix_len: u32,
533+
) {
529534
let lit_content = self.str_from_to(content_start, content_end);
530535
unescape::unescape_literal(lit_content, mode, &mut |range, result| {
531536
// Here we only check for errors. The actual unescaping is done later.
532537
if let Err(err) = result {
533-
let span_with_quotes =
534-
self.mk_sp(content_start - BytePos(1), content_end + BytePos(1));
538+
let span_with_quotes = self
539+
.mk_sp(content_start - BytePos(prefix_len), content_end + BytePos(postfix_len));
540+
let (start, end) = (range.start as u32, range.end as u32);
541+
let lo = content_start + BytePos(start);
542+
let hi = lo + BytePos(end - start);
543+
let span = self.mk_sp(lo, hi);
535544
emit_unescape_error(
536545
&self.sess.span_diagnostic,
537546
lit_content,
538547
span_with_quotes,
548+
span,
539549
mode,
540550
range,
541551
err,

compiler/rustc_parse/src/lexer/unescape_error_reporting.rs

+116-70
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,8 @@ pub(crate) fn emit_unescape_error(
1313
lit: &str,
1414
// full span of the literal, including quotes
1515
span_with_quotes: Span,
16+
// interior span of the literal, without quotes
17+
span: Span,
1618
mode: Mode,
1719
// range of the error inside `lit`
1820
range: Range<usize>,
@@ -26,13 +28,6 @@ pub(crate) fn emit_unescape_error(
2628
range,
2729
error
2830
);
29-
let span = {
30-
let Range { start, end } = range;
31-
let (start, end) = (start as u32, end as u32);
32-
let lo = span_with_quotes.lo() + BytePos(start + 1);
33-
let hi = lo + BytePos(end - start);
34-
span_with_quotes.with_lo(lo).with_hi(hi)
35-
};
3631
let last_char = || {
3732
let c = lit[range.clone()].chars().rev().next().unwrap();
3833
let span = span.with_lo(span.hi() - BytePos(c.len_utf8() as u32));
@@ -42,20 +37,22 @@ pub(crate) fn emit_unescape_error(
4237
EscapeError::LoneSurrogateUnicodeEscape => {
4338
handler
4439
.struct_span_err(span, "invalid unicode character escape")
40+
.span_label(span, "invalid escape")
4541
.help("unicode escape must not be a surrogate")
4642
.emit();
4743
}
4844
EscapeError::OutOfRangeUnicodeEscape => {
4945
handler
5046
.struct_span_err(span, "invalid unicode character escape")
47+
.span_label(span, "invalid escape")
5148
.help("unicode escape must be at most 10FFFF")
5249
.emit();
5350
}
5451
EscapeError::MoreThanOneChar => {
55-
let msg = if mode.is_bytes() {
56-
"if you meant to write a byte string literal, use double quotes"
52+
let (prefix, msg) = if mode.is_bytes() {
53+
("b", "if you meant to write a byte string literal, use double quotes")
5754
} else {
58-
"if you meant to write a `str` literal, use double quotes"
55+
("", "if you meant to write a `str` literal, use double quotes")
5956
};
6057

6158
handler
@@ -66,31 +63,44 @@ pub(crate) fn emit_unescape_error(
6663
.span_suggestion(
6764
span_with_quotes,
6865
msg,
69-
format!("\"{}\"", lit),
66+
format!("{}\"{}\"", prefix, lit),
7067
Applicability::MachineApplicable,
7168
)
7269
.emit();
7370
}
7471
EscapeError::EscapeOnlyChar => {
75-
let (c, _span) = last_char();
72+
let (c, char_span) = last_char();
7673

77-
let mut msg = if mode.is_bytes() {
78-
"byte constant must be escaped: "
74+
let msg = if mode.is_bytes() {
75+
"byte constant must be escaped"
7976
} else {
80-
"character constant must be escaped: "
81-
}
82-
.to_string();
83-
push_escaped_char(&mut msg, c);
84-
85-
handler.span_err(span, msg.as_str())
77+
"character constant must be escaped"
78+
};
79+
handler
80+
.struct_span_err(span, &format!("{}: `{}`", msg, escaped_char(c)))
81+
.span_suggestion(
82+
char_span,
83+
"escape the character",
84+
c.escape_default().to_string(),
85+
Applicability::MachineApplicable,
86+
)
87+
.emit()
8688
}
8789
EscapeError::BareCarriageReturn => {
8890
let msg = if mode.in_double_quotes() {
89-
"bare CR not allowed in string, use \\r instead"
91+
"bare CR not allowed in string, use `\\r` instead"
9092
} else {
91-
"character constant must be escaped: \\r"
93+
"character constant must be escaped: `\\r`"
9294
};
93-
handler.span_err(span, msg);
95+
handler
96+
.struct_span_err(span, msg)
97+
.span_suggestion(
98+
span,
99+
"escape the character",
100+
"\\r".to_string(),
101+
Applicability::MachineApplicable,
102+
)
103+
.emit();
94104
}
95105
EscapeError::BareCarriageReturnInRawString => {
96106
assert!(mode.in_double_quotes());
@@ -102,21 +112,22 @@ pub(crate) fn emit_unescape_error(
102112

103113
let label =
104114
if mode.is_bytes() { "unknown byte escape" } else { "unknown character escape" };
105-
let mut msg = label.to_string();
106-
msg.push_str(": ");
107-
push_escaped_char(&mut msg, c);
108-
109-
let mut diag = handler.struct_span_err(span, msg.as_str());
115+
let ec = escaped_char(c);
116+
let mut diag = handler.struct_span_err(span, &format!("{}: `{}`", label, ec));
110117
diag.span_label(span, label);
111118
if c == '{' || c == '}' && !mode.is_bytes() {
112119
diag.help(
113-
"if used in a formatting string, \
114-
curly braces are escaped with `{{` and `}}`",
120+
"if used in a formatting string, curly braces are escaped with `{{` and `}}`",
115121
);
116122
} else if c == '\r' {
117123
diag.help(
118-
"this is an isolated carriage return; \
119-
consider checking your editor and version control settings",
124+
"this is an isolated carriage return; consider checking your editor and \
125+
version control settings",
126+
);
127+
} else {
128+
diag.help(
129+
"for more information, visit \
130+
<https://static.rust-lang.org/doc/master/reference.html#literals>",
120131
);
121132
}
122133
diag.emit();
@@ -127,45 +138,70 @@ pub(crate) fn emit_unescape_error(
127138
EscapeError::InvalidCharInHexEscape | EscapeError::InvalidCharInUnicodeEscape => {
128139
let (c, span) = last_char();
129140

130-
let mut msg = if error == EscapeError::InvalidCharInHexEscape {
131-
"invalid character in numeric character escape: "
141+
let msg = if error == EscapeError::InvalidCharInHexEscape {
142+
"invalid character in numeric character escape"
132143
} else {
133-
"invalid character in unicode escape: "
134-
}
135-
.to_string();
136-
push_escaped_char(&mut msg, c);
144+
"invalid character in unicode escape"
145+
};
146+
let c = escaped_char(c);
137147

138-
handler.span_err(span, msg.as_str())
148+
handler
149+
.struct_span_err(span, &format!("{}: `{}`", msg, c))
150+
.span_label(span, msg)
151+
.emit();
139152
}
140153
EscapeError::NonAsciiCharInByte => {
141154
assert!(mode.is_bytes());
142-
let (_c, span) = last_char();
143-
handler.span_err(
144-
span,
145-
"byte constant must be ASCII. \
146-
Use a \\xHH escape for a non-ASCII byte",
147-
)
155+
let (c, span) = last_char();
156+
handler
157+
.struct_span_err(span, "non-ASCII character in byte constant")
158+
.span_label(span, "byte constant must be ASCII")
159+
.span_suggestion(
160+
span,
161+
"use a \\xHH escape for a non-ASCII byte",
162+
format!("\\x{:X}", c as u32),
163+
Applicability::MachineApplicable,
164+
)
165+
.emit();
148166
}
149167
EscapeError::NonAsciiCharInByteString => {
150168
assert!(mode.is_bytes());
151169
let (_c, span) = last_char();
152-
handler.span_err(span, "raw byte string must be ASCII")
170+
handler
171+
.struct_span_err(span, "raw byte string must be ASCII")
172+
.span_label(span, "must be ASCII")
173+
.emit();
174+
}
175+
EscapeError::OutOfRangeHexEscape => {
176+
handler
177+
.struct_span_err(span, "out of range hex escape")
178+
.span_label(span, "must be a character in the range [\\x00-\\x7f]")
179+
.emit();
153180
}
154-
EscapeError::OutOfRangeHexEscape => handler.span_err(
155-
span,
156-
"this form of character escape may only be used \
157-
with characters in the range [\\x00-\\x7f]",
158-
),
159181
EscapeError::LeadingUnderscoreUnicodeEscape => {
160-
let (_c, span) = last_char();
161-
handler.span_err(span, "invalid start of unicode escape")
182+
let (c, span) = last_char();
183+
let msg = "invalid start of unicode escape";
184+
handler
185+
.struct_span_err(span, &format!("{}: `{}`", msg, c))
186+
.span_label(span, msg)
187+
.emit();
162188
}
163189
EscapeError::OverlongUnicodeEscape => {
164-
handler.span_err(span, "overlong unicode escape (must have at most 6 hex digits)")
165-
}
166-
EscapeError::UnclosedUnicodeEscape => {
167-
handler.span_err(span, "unterminated unicode escape (needed a `}`)")
190+
handler
191+
.struct_span_err(span, "overlong unicode escape")
192+
.span_label(span, "must have at most 6 hex digits")
193+
.emit();
168194
}
195+
EscapeError::UnclosedUnicodeEscape => handler
196+
.struct_span_err(span, "unterminated unicode escape")
197+
.span_label(span, "missing a closing `}`")
198+
.span_suggestion_verbose(
199+
span.shrink_to_hi(),
200+
"terminate the unicode escape",
201+
"}".to_string(),
202+
Applicability::MaybeIncorrect,
203+
)
204+
.emit(),
169205
EscapeError::NoBraceInUnicodeEscape => {
170206
let msg = "incorrect unicode escape sequence";
171207
let mut diag = handler.struct_span_err(span, msg);
@@ -195,28 +231,38 @@ pub(crate) fn emit_unescape_error(
195231

196232
diag.emit();
197233
}
198-
EscapeError::UnicodeEscapeInByte => handler.span_err(
199-
span,
200-
"unicode escape sequences cannot be used \
201-
as a byte or in a byte string",
202-
),
234+
EscapeError::UnicodeEscapeInByte => {
235+
let msg = "unicode escape in byte string";
236+
handler
237+
.struct_span_err(span, msg)
238+
.span_label(span, msg)
239+
.help("unicode escape sequences cannot be used as a byte or in a byte string")
240+
.emit();
241+
}
203242
EscapeError::EmptyUnicodeEscape => {
204-
handler.span_err(span, "empty unicode escape (must have at least 1 hex digit)")
243+
handler
244+
.struct_span_err(span, "empty unicode escape")
245+
.span_label(span, "this escape must have at least 1 hex digit")
246+
.emit();
247+
}
248+
EscapeError::ZeroChars => {
249+
let msg = "empty character literal";
250+
handler.struct_span_err(span, msg).span_label(span, msg).emit()
251+
}
252+
EscapeError::LoneSlash => {
253+
let msg = "invalid trailing slash in literal";
254+
handler.struct_span_err(span, msg).span_label(span, msg).emit();
205255
}
206-
EscapeError::ZeroChars => handler.span_err(span, "empty character literal"),
207-
EscapeError::LoneSlash => handler.span_err(span, "invalid trailing slash in literal"),
208256
}
209257
}
210258

211259
/// Pushes a character to a message string for error reporting
212-
pub(crate) fn push_escaped_char(msg: &mut String, c: char) {
260+
pub(crate) fn escaped_char(c: char) -> String {
213261
match c {
214262
'\u{20}'..='\u{7e}' => {
215263
// Don't escape \, ' or " for user-facing messages
216-
msg.push(c);
217-
}
218-
_ => {
219-
msg.extend(c.escape_default());
264+
c.to_string()
220265
}
266+
_ => c.escape_default().to_string(),
221267
}
222268
}
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
11
#![feature(rustc_attrs)]
22

3-
#[rustc_dummy = b"ffi.rs"] //~ ERROR byte constant must be ASCII
3+
#[rustc_dummy = b"ffi.rs"] //~ ERROR non-ASCII character in byte constant
44
fn main() {}
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,11 @@
1-
error: byte constant must be ASCII. Use a \xHH escape for a non-ASCII byte
1+
error: non-ASCII character in byte constant
22
--> $DIR/key-value-non-ascii.rs:3:19
33
|
44
LL | #[rustc_dummy = b"ffi.rs"]
55
| ^
6+
| |
7+
| byte constant must be ASCII
8+
| help: use a \xHH escape for a non-ASCII byte: `\xFB03`
69

710
error: aborting due to previous error
811

Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
fn main() {
2-
let x = "\x80"; //~ ERROR may only be used
3-
let y = "\xff"; //~ ERROR may only be used
4-
let z = "\xe2"; //~ ERROR may only be used
2+
let x = "\x80"; //~ ERROR out of range hex escape
3+
let y = "\xff"; //~ ERROR out of range hex escape
4+
let z = "\xe2"; //~ ERROR out of range hex escape
55
let a = b"\x00e2"; // ok because byte literal
66
}

0 commit comments

Comments
 (0)