Skip to content

Commit bde59d0

Browse files
committed
rustc_errors: use perfect hashing for character replacements
1 parent 51b5bb1 commit bde59d0

File tree

4 files changed

+67
-50
lines changed

4 files changed

+67
-50
lines changed

Cargo.lock

+15
Original file line numberDiff line numberDiff line change
@@ -2857,6 +2857,7 @@ version = "0.11.2"
28572857
source = "registry+https://github.com/rust-lang/crates.io-index"
28582858
checksum = "ade2d8b8f33c7333b51bcf0428d37e217e9f32192ae4772156f65063b8ce03dc"
28592859
dependencies = [
2860+
"phf_macros",
28602861
"phf_shared 0.11.2",
28612862
]
28622863

@@ -2890,6 +2891,19 @@ dependencies = [
28902891
"rand",
28912892
]
28922893

2894+
[[package]]
2895+
name = "phf_macros"
2896+
version = "0.11.2"
2897+
source = "registry+https://github.com/rust-lang/crates.io-index"
2898+
checksum = "3444646e286606587e49f3bcf1679b8cef1dc2c5ecc29ddacaffc305180d464b"
2899+
dependencies = [
2900+
"phf_generator 0.11.2",
2901+
"phf_shared 0.11.2",
2902+
"proc-macro2",
2903+
"quote",
2904+
"syn 2.0.67",
2905+
]
2906+
28932907
[[package]]
28942908
name = "phf_shared"
28952909
version = "0.10.0"
@@ -3867,6 +3881,7 @@ version = "0.0.0"
38673881
dependencies = [
38683882
"annotate-snippets 0.10.2",
38693883
"derive_setters",
3884+
"phf",
38703885
"rustc_ast",
38713886
"rustc_ast_pretty",
38723887
"rustc_data_structures",

compiler/rustc_errors/Cargo.toml

+1
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ edition = "2021"
77
# tidy-alphabetical-start
88
annotate-snippets = "0.10"
99
derive_setters = "0.1.6"
10+
phf = { version = "0.11.2", features = ["macros"] }
1011
rustc_ast = { path = "../rustc_ast" }
1112
rustc_ast_pretty = { path = "../rustc_ast_pretty" }
1213
rustc_data_structures = { path = "../rustc_data_structures" }

compiler/rustc_errors/src/emitter.rs

+46-50
Original file line numberDiff line numberDiff line change
@@ -2558,66 +2558,62 @@ fn num_decimal_digits(num: usize) -> usize {
25582558
}
25592559

25602560
// We replace some characters so the CLI output is always consistent and underlines aligned.
2561-
// Keep the following list in sync with `rustc_span::char_width`.
2562-
// ATTENTION: keep lexicografically sorted so that the binary search will work
2563-
const OUTPUT_REPLACEMENTS: &[(char, &str)] = &[
2564-
// tidy-alphabetical-start
2561+
const OUTPUT_REPLACEMENTS: phf::Map<char, &'static str> = phf::phf_map![
25652562
// In terminals without Unicode support the following will be garbled, but in *all* terminals
25662563
// the underlying codepoint will be as well. We could gate this replacement behind a "unicode
25672564
// support" gate.
2568-
('\0', "␀"),
2569-
('\u{0001}', "␁"),
2570-
('\u{0002}', "␂"),
2571-
('\u{0003}', "␃"),
2572-
('\u{0004}', "␄"),
2573-
('\u{0005}', "␅"),
2574-
('\u{0006}', "␆"),
2575-
('\u{0007}', "␇"),
2576-
('\u{0008}', "␈"),
2577-
('\u{0009}', " "), // We do our own tab replacement
2578-
('\u{000b}', "␋"),
2579-
('\u{000c}', "␌"),
2580-
('\u{000d}', "␍"),
2581-
('\u{000e}', "␎"),
2582-
('\u{000f}', "␏"),
2583-
('\u{0010}', "␐"),
2584-
('\u{0011}', "␑"),
2585-
('\u{0012}', "␒"),
2586-
('\u{0013}', "␓"),
2587-
('\u{0014}', "␔"),
2588-
('\u{0015}', "␕"),
2589-
('\u{0016}', "␖"),
2590-
('\u{0017}', "␗"),
2591-
('\u{0018}', "␘"),
2592-
('\u{0019}', "␙"),
2593-
('\u{001a}', "␚"),
2594-
('\u{001b}', "␛"),
2595-
('\u{001c}', "␜"),
2596-
('\u{001d}', "␝"),
2597-
('\u{001e}', "␞"),
2598-
('\u{001f}', "␟"),
2599-
('\u{007f}', "␡"),
2600-
('\u{200d}', ""), // Replace ZWJ for consistent terminal output of grapheme clusters.
2601-
('\u{202a}', "�"), // The following unicode text flow control characters are inconsistently
2602-
('\u{202b}', "�"), // supported across CLIs and can cause confusion due to the bytes on disk
2603-
('\u{202c}', "�"), // not corresponding to the visible source code, so we replace them always.
2604-
('\u{202d}', "�"),
2605-
('\u{202e}', "�"),
2606-
('\u{2066}', "�"),
2607-
('\u{2067}', "�"),
2608-
('\u{2068}', "�"),
2609-
('\u{2069}', "�"),
2610-
// tidy-alphabetical-end
2565+
'\0' => "␀",
2566+
'\t' => " ", // We do our own tab replacement
2567+
'\r' => "␍",
2568+
'\u{0001}' => "␁",
2569+
'\u{0002}' => "␂",
2570+
'\u{0003}' => "␃",
2571+
'\u{0004}' => "␄",
2572+
'\u{0005}' => "␅",
2573+
'\u{0006}' => "␆",
2574+
'\u{0007}' => "␇",
2575+
'\u{0008}' => "␈",
2576+
'\u{000b}' => "␋",
2577+
'\u{000c}' => "␌",
2578+
'\u{000e}' => "␎",
2579+
'\u{000f}' => "␏",
2580+
'\u{0010}' => "␐",
2581+
'\u{0011}' => "␑",
2582+
'\u{0012}' => "␒",
2583+
'\u{0013}' => "␓",
2584+
'\u{0014}' => "␔",
2585+
'\u{0015}' => "␕",
2586+
'\u{0016}' => "␖",
2587+
'\u{0017}' => "␗",
2588+
'\u{0018}' => "␘",
2589+
'\u{0019}' => "␙",
2590+
'\u{001a}' => "␚",
2591+
'\u{001b}' => "␛",
2592+
'\u{001c}' => "␜",
2593+
'\u{001d}' => "␝",
2594+
'\u{001e}' => "␞",
2595+
'\u{001f}' => "␟",
2596+
'\u{007f}' => "␡",
2597+
'\u{200d}' => "", // Replace ZWJ for consistent terminal output of grapheme clusters.
2598+
'\u{202a}' => "�", // The following unicode text flow control characters are inconsistently
2599+
'\u{202b}' => "�", // supported across CLIs and can cause confusion due to the bytes on disk
2600+
'\u{202c}' => "�", // not corresponding to the visible source code, so we replace them always.
2601+
'\u{202d}' => "�",
2602+
'\u{202e}' => "�",
2603+
'\u{2066}' => "�",
2604+
'\u{2067}' => "�",
2605+
'\u{2068}' => "�",
2606+
'\u{2069}' => "�",
26112607
];
26122608

26132609
fn normalize_whitespace(s: &str) -> String {
26142610
// Scan the input string for a character in the ordered table above. If it's present, replace
26152611
// it with it's alternative string (it can be more than 1 char!). Otherwise, retain the input
26162612
// char. At the end, allocate all chars into a string in one operation.
26172613
s.chars().fold(String::with_capacity(s.len()), |mut s, c| {
2618-
match OUTPUT_REPLACEMENTS.binary_search_by_key(&c, |(k, _)| *k) {
2619-
Ok(i) => s.push_str(OUTPUT_REPLACEMENTS[i].1),
2620-
_ => s.push(c),
2614+
match OUTPUT_REPLACEMENTS.get(&c) {
2615+
Some(r) => s.push_str(r),
2616+
None => s.push(c),
26212617
}
26222618
s
26232619
})

src/tools/tidy/src/deps.rs

+5
Original file line numberDiff line numberDiff line change
@@ -342,6 +342,10 @@ const PERMITTED_RUSTC_DEPENDENCIES: &[&str] = &[
342342
"parking_lot_core",
343343
"pathdiff",
344344
"perf-event-open-sys",
345+
"phf",
346+
"phf_generator",
347+
"phf_macros",
348+
"phf_shared",
345349
"pin-project-lite",
346350
"polonius-engine",
347351
"portable-atomic", // dependency for platforms doesn't support `AtomicU64` in std
@@ -386,6 +390,7 @@ const PERMITTED_RUSTC_DEPENDENCIES: &[&str] = &[
386390
"sha2",
387391
"sharded-slab",
388392
"shlex",
393+
"siphasher",
389394
"smallvec",
390395
"snap",
391396
"stable_deref_trait",

0 commit comments

Comments
 (0)