Skip to content

Commit de1f766

Browse files
committed
Fix highlighting of byte escape sequences
Currently non-UTF8 escape sequences in byte strings and any escape sequences in byte literals are ignored.
1 parent 37f84c1 commit de1f766

File tree

5 files changed

+48
-10
lines changed

5 files changed

+48
-10
lines changed

crates/ide/src/syntax_highlighting.rs

+9-1
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ use syntax::{
2424

2525
use crate::{
2626
syntax_highlighting::{
27-
escape::{highlight_escape_char, highlight_escape_string},
27+
escape::{highlight_escape_byte, highlight_escape_char, highlight_escape_string},
2828
format::highlight_format_string,
2929
highlights::Highlights,
3030
macro_::MacroHighlighter,
@@ -471,6 +471,14 @@ fn traverse(
471471
};
472472

473473
highlight_escape_char(hl, &char, range.start())
474+
} else if ast::Byte::can_cast(token.kind())
475+
&& ast::Byte::can_cast(descended_token.kind())
476+
{
477+
let Some(byte) = ast::Byte::cast(token) else {
478+
continue;
479+
};
480+
481+
highlight_escape_byte(hl, &byte, range.start())
474482
}
475483
}
476484

crates/ide/src/syntax_highlighting/escape.rs

+21-1
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
//! Syntax highlighting for escape sequences
22
use crate::syntax_highlighting::highlights::Highlights;
33
use crate::{HlRange, HlTag};
4-
use syntax::ast::{Char, IsString};
4+
use syntax::ast::{Byte, Char, IsString};
55
use syntax::{AstToken, TextRange, TextSize};
66

77
pub(super) fn highlight_escape_string<T: IsString>(
@@ -43,3 +43,23 @@ pub(super) fn highlight_escape_char(stack: &mut Highlights, char: &Char, start:
4343
TextRange::new(start + TextSize::from(1), start + TextSize::from(text.len() as u32 + 1));
4444
stack.add(HlRange { range, highlight: HlTag::EscapeSequence.into(), binding_hash: None })
4545
}
46+
47+
pub(super) fn highlight_escape_byte(stack: &mut Highlights, byte: &Byte, start: TextSize) {
48+
if byte.value().is_none() {
49+
return;
50+
}
51+
52+
let text = byte.text();
53+
if !text.starts_with("b'") || !text.ends_with('\'') {
54+
return;
55+
}
56+
57+
let text = &text[2..text.len() - 1];
58+
if !text.starts_with('\\') {
59+
return;
60+
}
61+
62+
let range =
63+
TextRange::new(start + TextSize::from(2), start + TextSize::from(text.len() as u32 + 2));
64+
stack.add(HlRange { range, highlight: HlTag::EscapeSequence.into(), binding_hash: None })
65+
}

crates/ide/src/syntax_highlighting/test_data/highlight_strings.html

+4-2
Original file line numberDiff line numberDiff line change
@@ -105,6 +105,8 @@
105105
<span class="keyword">let</span> <span class="variable declaration">a</span> <span class="operator">=</span> <span class="char_literal">'</span><span class="escape_sequence">\x65</span><span class="char_literal">'</span><span class="semicolon">;</span>
106106
<span class="keyword">let</span> <span class="variable declaration">a</span> <span class="operator">=</span> <span class="char_literal">'</span><span class="escape_sequence">\x00</span><span class="char_literal">'</span><span class="semicolon">;</span>
107107

108+
<span class="keyword">let</span> <span class="variable declaration">a</span> <span class="operator">=</span> <span class="byte_literal">b'</span><span class="escape_sequence">\xFF</span><span class="byte_literal">'</span><span class="semicolon">;</span>
109+
108110
<span class="macro">println</span><span class="macro_bang">!</span><span class="parenthesis macro">(</span><span class="string_literal macro">"Hello </span><span class="escape_sequence">{{</span><span class="string_literal macro">Hello</span><span class="escape_sequence">}}</span><span class="string_literal macro">"</span><span class="parenthesis macro">)</span><span class="semicolon">;</span>
109111
<span class="comment">// from https://doc.rust-lang.org/std/fmt/index.html</span>
110112
<span class="macro">println</span><span class="macro_bang">!</span><span class="parenthesis macro">(</span><span class="string_literal macro">"Hello"</span><span class="parenthesis macro">)</span><span class="semicolon">;</span> <span class="comment">// =&gt; "Hello"</span>
@@ -159,8 +161,8 @@
159161
<span class="macro">println</span><span class="macro_bang">!</span><span class="parenthesis macro">(</span><span class="string_literal macro">"Hello</span><span class="escape_sequence">\n</span><span class="string_literal macro">World"</span><span class="parenthesis macro">)</span><span class="semicolon">;</span>
160162
<span class="macro">println</span><span class="macro_bang">!</span><span class="parenthesis macro">(</span><span class="string_literal macro">"</span><span class="escape_sequence">\u{48}</span><span class="escape_sequence">\x65</span><span class="escape_sequence">\x6C</span><span class="escape_sequence">\x6C</span><span class="escape_sequence">\x6F</span><span class="string_literal macro"> World"</span><span class="parenthesis macro">)</span><span class="semicolon">;</span>
161163

162-
<span class="keyword">let</span> <span class="punctuation">_</span> <span class="operator">=</span> <span class="string_literal">"</span><span class="escape_sequence">\x28</span><span class="escape_sequence">\x28</span><span class="escape_sequence">\x00</span><span class="escape_sequence">\x63</span><span class="escape_sequence">\n</span><span class="string_literal">"</span><span class="semicolon">;</span>
163-
<span class="keyword">let</span> <span class="punctuation">_</span> <span class="operator">=</span> <span class="string_literal">b"</span><span class="escape_sequence">\x28</span><span class="escape_sequence">\x28</span><span class="escape_sequence">\x00</span><span class="escape_sequence">\x63</span><span class="escape_sequence">\n</span><span class="string_literal">"</span><span class="semicolon">;</span>
164+
<span class="keyword">let</span> <span class="punctuation">_</span> <span class="operator">=</span> <span class="string_literal">"</span><span class="escape_sequence">\x28</span><span class="escape_sequence">\x28</span><span class="escape_sequence">\x00</span><span class="escape_sequence">\x63</span><span class="string_literal">\xFF</span><span class="escape_sequence">\n</span><span class="string_literal">"</span><span class="semicolon">;</span> <span class="comment">// invalid non-UTF8 escape sequences</span>
165+
<span class="keyword">let</span> <span class="punctuation">_</span> <span class="operator">=</span> <span class="string_literal">b"</span><span class="escape_sequence">\x28</span><span class="escape_sequence">\x28</span><span class="escape_sequence">\x00</span><span class="escape_sequence">\x63</span><span class="escape_sequence">\xFF</span><span class="escape_sequence">\n</span><span class="string_literal">"</span><span class="semicolon">;</span> <span class="comment">// valid bytes</span>
164166
<span class="keyword">let</span> <span class="variable declaration reference">backslash</span> <span class="operator">=</span> <span class="string_literal">r"\\"</span><span class="semicolon">;</span>
165167

166168
<span class="macro">println</span><span class="macro_bang">!</span><span class="parenthesis macro">(</span><span class="string_literal macro">"</span><span class="format_specifier">{</span><span class="escape_sequence">\x41</span><span class="format_specifier">}</span><span class="string_literal macro">"</span><span class="comma macro">,</span> <span class="none macro">A</span> <span class="operator macro">=</span> <span class="numeric_literal macro">92</span><span class="parenthesis macro">)</span><span class="semicolon">;</span>

crates/ide/src/syntax_highlighting/tests.rs

+4-2
Original file line numberDiff line numberDiff line change
@@ -451,6 +451,8 @@ fn main() {
451451
let a = '\x65';
452452
let a = '\x00';
453453
454+
let a = b'\xFF';
455+
454456
println!("Hello {{Hello}}");
455457
// from https://doc.rust-lang.org/std/fmt/index.html
456458
println!("Hello"); // => "Hello"
@@ -505,8 +507,8 @@ fn main() {
505507
println!("Hello\nWorld");
506508
println!("\u{48}\x65\x6C\x6C\x6F World");
507509
508-
let _ = "\x28\x28\x00\x63\n";
509-
let _ = b"\x28\x28\x00\x63\n";
510+
let _ = "\x28\x28\x00\x63\xFF\n"; // invalid non-UTF8 escape sequences
511+
let _ = b"\x28\x28\x00\x63\xFF\n"; // valid bytes
510512
let backslash = r"\\";
511513
512514
println!("{\x41}", A = 92);

crates/syntax/src/ast/token_ext.rs

+10-4
Original file line numberDiff line numberDiff line change
@@ -146,6 +146,7 @@ impl QuoteOffsets {
146146

147147
pub trait IsString: AstToken {
148148
const RAW_PREFIX: &'static str;
149+
const MODE: Mode;
149150
fn is_raw(&self) -> bool {
150151
self.text().starts_with(Self::RAW_PREFIX)
151152
}
@@ -181,7 +182,7 @@ pub trait IsString: AstToken {
181182
let text = &self.text()[text_range_no_quotes - start];
182183
let offset = text_range_no_quotes.start() - start;
183184

184-
unescape_literal(text, Mode::Str, &mut |range, unescaped_char| {
185+
unescape_literal(text, Self::MODE, &mut |range, unescaped_char| {
185186
let text_range =
186187
TextRange::new(range.start.try_into().unwrap(), range.end.try_into().unwrap());
187188
cb(text_range + offset, unescaped_char);
@@ -196,6 +197,7 @@ pub trait IsString: AstToken {
196197

197198
impl IsString for ast::String {
198199
const RAW_PREFIX: &'static str = "r";
200+
const MODE: Mode = Mode::Str;
199201
}
200202

201203
impl ast::String {
@@ -213,7 +215,7 @@ impl ast::String {
213215
let mut buf = String::new();
214216
let mut prev_end = 0;
215217
let mut has_error = false;
216-
unescape_literal(text, Mode::Str, &mut |char_range, unescaped_char| match (
218+
unescape_literal(text, Self::MODE, &mut |char_range, unescaped_char| match (
217219
unescaped_char,
218220
buf.capacity() == 0,
219221
) {
@@ -239,6 +241,7 @@ impl ast::String {
239241

240242
impl IsString for ast::ByteString {
241243
const RAW_PREFIX: &'static str = "br";
244+
const MODE: Mode = Mode::ByteStr;
242245
}
243246

244247
impl ast::ByteString {
@@ -256,7 +259,7 @@ impl ast::ByteString {
256259
let mut buf: Vec<u8> = Vec::new();
257260
let mut prev_end = 0;
258261
let mut has_error = false;
259-
unescape_literal(text, Mode::ByteStr, &mut |char_range, unescaped_char| match (
262+
unescape_literal(text, Self::MODE, &mut |char_range, unescaped_char| match (
260263
unescaped_char,
261264
buf.capacity() == 0,
262265
) {
@@ -282,6 +285,9 @@ impl ast::ByteString {
282285

283286
impl IsString for ast::CString {
284287
const RAW_PREFIX: &'static str = "cr";
288+
// XXX: `Mode::CStr` is not supported by `unescape_literal` of ra-ap-rustc_lexer yet.
289+
// Here we pretend it to be a byte string.
290+
const MODE: Mode = Mode::ByteStr;
285291
}
286292

287293
impl ast::CString {
@@ -299,7 +305,7 @@ impl ast::CString {
299305
let mut buf = String::new();
300306
let mut prev_end = 0;
301307
let mut has_error = false;
302-
unescape_literal(text, Mode::Str, &mut |char_range, unescaped_char| match (
308+
unescape_literal(text, Self::MODE, &mut |char_range, unescaped_char| match (
303309
unescaped_char,
304310
buf.capacity() == 0,
305311
) {

0 commit comments

Comments
 (0)