Skip to content

Commit 4fc3e48

Browse files
committed
Improve, document and test symbol normalization
1 parent 91a13d9 commit 4fc3e48

File tree

1 file changed

+89
-7
lines changed

1 file changed

+89
-7
lines changed

collector/src/artifact_stats.rs

+89-7
Original file line numberDiff line numberDiff line change
@@ -102,16 +102,41 @@ impl ArtifactStats {
102102
}
103103
}
104104

105-
/// Tries to match hashes produces by rustc in mangled symbol names.
106-
static RUSTC_HASH_REGEX: OnceLock<Regex> = OnceLock::new();
107-
108105
/// Demangle the symbol and remove rustc mangling hashes.
106+
///
107+
/// Normalizes the following things, in the following order:
108+
/// - Demangles the symbol.
109+
/// - Removes `.cold` and `.warm` from the end of the symbol, to merge cold and hot parts of a function
110+
/// into the same symbol.
111+
/// - Removes rustc hashes from the symbol, e.g. `foo::[abcdef]` -> `foo::[]` or
112+
/// `foo::abcd` -> `foo`.
113+
/// - Removes suffixes after a dot from the symbol, e.g. `anon.abcdef.123` -> `anon` or
114+
/// `foo.llvm.123` -> `foo`.
109115
fn normalize_symbol_name(symbol: &str) -> String {
110-
let regex =
111-
RUSTC_HASH_REGEX.get_or_init(|| Regex::new(r#"(::)?\b[a-z0-9]{15,17}\b(\.\d+)?"#).unwrap());
116+
/// Tries to match hashes in brackets produces by rustc in mangled symbol names.
117+
static RUSTC_BRACKET_HASH_REGEX: OnceLock<Regex> = OnceLock::new();
118+
/// Tries to match hashes after :: produces by rustc in mangled symbol names.
119+
static RUSTC_HASH_REGEX: OnceLock<Regex> = OnceLock::new();
120+
/// Tries to match suffixes after a dot.
121+
static DOT_SUFFIX_REGEX: OnceLock<Regex> = OnceLock::new();
122+
123+
let bracket_hash_regex =
124+
RUSTC_BRACKET_HASH_REGEX.get_or_init(|| Regex::new(r#"\[[a-z0-9]{13,17}\]"#).unwrap());
125+
let hash_regex = RUSTC_HASH_REGEX.get_or_init(|| Regex::new(r#"::[a-z0-9]{15,17}$"#).unwrap());
126+
let dot_suffix_regex = DOT_SUFFIX_REGEX.get_or_init(|| Regex::new(r#"\.[a-z0-9]+\b"#).unwrap());
127+
128+
let mut symbol = rustc_demangle::demangle(symbol).to_string();
112129

113-
let symbol = rustc_demangle::demangle(symbol).to_string();
114-
regex.replace_all(&symbol, "").to_string()
130+
if let Some(stripped) = symbol.strip_suffix(".cold") {
131+
symbol = stripped.to_string();
132+
}
133+
if let Some(stripped) = symbol.strip_suffix(".warm") {
134+
symbol = stripped.to_string();
135+
}
136+
let symbol = bracket_hash_regex.replace_all(&symbol, "");
137+
let symbol = hash_regex.replace_all(&symbol, "");
138+
let symbol = dot_suffix_regex.replace_all(&symbol, "");
139+
symbol.to_string()
115140
}
116141

117142
/// Simple heuristic that tries to normalize section names.
@@ -229,3 +254,60 @@ pub fn compile_and_get_stats(
229254

230255
Ok(archives)
231256
}
257+
258+
#[cfg(test)]
259+
mod tests {
260+
use super::*;
261+
262+
#[test]
263+
fn normalize_remove_cold_annotation() {
264+
assert_eq!(
265+
normalize_symbol_name("_RNvNtNtNtCs1WKcaCLTok2_16rustc_query_impl10query_impl23specialization_graph_of14get_query_incr26___rust_end_short_backtrace.cold"),
266+
"rustc_query_impl::query_impl::specialization_graph_of::get_query_incr::__rust_end_short_backtrace".to_string()
267+
);
268+
}
269+
270+
#[test]
271+
fn normalize_remove_numeric_suffix() {
272+
assert_eq!(
273+
normalize_symbol_name("_RNvMs3_NtNtCs6gyBaxODSsO_12regex_syntax3ast5parseINtB5_7ParserIQNtB5_6ParserE19parse_with_commentsB9_.llvm.5849848722809994645"),
274+
"<regex_syntax::ast::parse::ParserI<&mut regex_syntax::ast::parse::Parser>>::parse_with_comments".to_string()
275+
);
276+
}
277+
278+
#[test]
279+
fn normalize_remove_numeric_suffix_with_cold() {
280+
assert_eq!(
281+
normalize_symbol_name("_RNvMs_NtNtCs60zRYs2wPJS_11rustc_parse6parser2tyNtB6_6Parser15parse_ty_common.llvm.13047176952295404880.cold"),
282+
"<rustc_parse::parser::Parser>::parse_ty_common".to_string()
283+
);
284+
}
285+
286+
#[test]
287+
fn normalize_hash_at_end() {
288+
assert_eq!(
289+
normalize_symbol_name("anon.58936091071a36b1b82cf536b463328b.3488"),
290+
"anon".to_string()
291+
);
292+
}
293+
294+
#[test]
295+
fn normalize_short_hash() {
296+
assert_eq!(
297+
normalize_symbol_name(
298+
"_RNvNtCsifRNxopDi_20rustc_builtin_macros6format16make_format_args"
299+
),
300+
"rustc_builtin_macros::format::make_format_args".to_string()
301+
);
302+
}
303+
304+
#[test]
305+
fn normalize_hash_without_brackets() {
306+
assert_eq!(
307+
normalize_symbol_name(
308+
"_ZN10proc_macro5quote5quote28_$u7b$$u7b$closure$u7d$$u7d$17h90045007b0e69fc9E"
309+
),
310+
"proc_macro::quote::quote::{{closure}}".to_string()
311+
);
312+
}
313+
}

0 commit comments

Comments
 (0)