Skip to content

Commit 11f70c8

Browse files
committed
proc_macro: stop using a remote object handle for Ident
This requires a dependency on `unicode-normalization` and `rustc_lexer`, which is currently not possible for `proc_macro`. Instead, a second `extern "C" fn` is provided by the compiler server to perform these steps from any thread. String values are interned in both the server and client, meaning that identifiers can be stringified without any RPC roundtrips without substantially inflating their size. RPC messages passing symbols include the full un-interned value, and are re-interned on the receiving side. This could potentially be optimized in the future. The symbol infrastructure will alwo be used for literals in a following part.
1 parent c8ea717 commit 11f70c8

File tree

9 files changed

+342
-133
lines changed

9 files changed

+342
-133
lines changed

Cargo.lock

+1
Original file line numberDiff line numberDiff line change
@@ -3857,6 +3857,7 @@ dependencies = [
38573857
"rustc_span",
38583858
"smallvec",
38593859
"tracing",
3860+
"unicode-normalization",
38603861
]
38613862

38623863
[[package]]

compiler/rustc_expand/Cargo.toml

+1
Original file line numberDiff line numberDiff line change
@@ -26,3 +26,4 @@ rustc_session = { path = "../rustc_session" }
2626
smallvec = { version = "1.6.1", features = ["union", "may_dangle"] }
2727
rustc_ast = { path = "../rustc_ast" }
2828
crossbeam-channel = "0.5.0"
29+
unicode-normalization = "0.1.11"

compiler/rustc_expand/src/proc_macro_server.rs

+37-53
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,6 @@ use rustc_data_structures::sync::Lrc;
1010
use rustc_errors::Diagnostic;
1111
use rustc_lint_defs::builtin::PROC_MACRO_BACK_COMPAT;
1212
use rustc_lint_defs::BuiltinLintDiagnostics;
13-
use rustc_parse::lexer::nfc_normalize;
1413
use rustc_parse::{nt_to_tokenstream, parse_stream_from_source_str};
1514
use rustc_session::parse::ParseSess;
1615
use rustc_span::def_id::CrateNum;
@@ -19,10 +18,10 @@ use rustc_span::hygiene::ExpnKind;
1918
use rustc_span::symbol::{self, kw, sym, Symbol};
2019
use rustc_span::{BytePos, FileName, MultiSpan, Pos, RealFileName, SourceFile, Span};
2120

22-
use pm::bridge::{server, DelimSpan, Group, Punct, TokenTree};
21+
use pm::bridge::{server, DelimSpan, Group, Ident, Punct, TokenTree};
2322
use pm::{Delimiter, Level, LineColumn};
23+
use std::ascii;
2424
use std::ops::Bound;
25-
use std::{ascii, panic};
2625

2726
trait FromInternal<T> {
2827
fn from_internal(x: T) -> Self;
@@ -55,7 +54,7 @@ impl ToInternal<token::DelimToken> for Delimiter {
5554
}
5655

5756
impl FromInternal<(TokenStream, &mut Rustc<'_>)>
58-
for Vec<TokenTree<TokenStream, Span, Ident, Literal>>
57+
for Vec<TokenTree<TokenStream, Span, Symbol, Literal>>
5958
{
6059
fn from_internal((stream, rustc): (TokenStream, &mut Rustc<'_>)) -> Self {
6160
use rustc_ast::token::*;
@@ -157,12 +156,11 @@ impl FromInternal<(TokenStream, &mut Rustc<'_>)>
157156
Question => op!('?'),
158157
SingleQuote => op!('\''),
159158

160-
Ident(name, false) if name == kw::DollarCrate => tt!(Ident::dollar_crate()),
161-
Ident(name, is_raw) => tt!(Ident::new(rustc.sess, name, is_raw)),
159+
Ident(sym, is_raw) => tt!(Ident { sym, is_raw }),
162160
Lifetime(name) => {
163161
let ident = symbol::Ident::new(name, span).without_first_quote();
164162
tt!(Punct { ch: '\'', joint: true });
165-
tt!(Ident::new(rustc.sess, ident.name, false));
163+
tt!(Ident { sym: ident.name, is_raw: false });
166164
}
167165
Literal(lit) => tt!(Literal { lit }),
168166
DocComment(_, attr_style, data) => {
@@ -191,9 +189,11 @@ impl FromInternal<(TokenStream, &mut Rustc<'_>)>
191189

192190
Interpolated(nt) => {
193191
if let Some((name, is_raw)) = ident_name_compatibility_hack(&nt, span, rustc) {
194-
trees.push(TokenTree::Ident(Ident::new(
195-
rustc.sess, name.name, is_raw, name.span,
196-
)));
192+
trees.push(TokenTree::Ident(Ident {
193+
sym: name.name,
194+
is_raw,
195+
span: name.span,
196+
}));
197197
} else {
198198
let stream =
199199
nt_to_tokenstream(&nt, rustc.sess, CanSynthesizeMissingTokens::No);
@@ -217,7 +217,7 @@ impl FromInternal<(TokenStream, &mut Rustc<'_>)>
217217
}
218218
}
219219

220-
impl ToInternal<TokenStream> for TokenTree<TokenStream, Span, Ident, Literal> {
220+
impl ToInternal<TokenStream> for TokenTree<TokenStream, Span, Symbol, Literal> {
221221
fn to_internal(self) -> TokenStream {
222222
use rustc_ast::token::*;
223223

@@ -306,32 +306,6 @@ impl ToInternal<rustc_errors::Level> for Level {
306306

307307
pub struct FreeFunctions;
308308

309-
#[derive(Copy, Clone, PartialEq, Eq, Hash)]
310-
pub struct Ident {
311-
sym: Symbol,
312-
is_raw: bool,
313-
span: Span,
314-
}
315-
316-
impl Ident {
317-
fn new(sess: &ParseSess, sym: Symbol, is_raw: bool, span: Span) -> Ident {
318-
let sym = nfc_normalize(&sym.as_str());
319-
let string = sym.as_str();
320-
if !rustc_lexer::is_ident(&string) {
321-
panic!("`{:?}` is not a valid identifier", string)
322-
}
323-
if is_raw && !sym.can_be_raw() {
324-
panic!("`{}` cannot be a raw identifier", string);
325-
}
326-
sess.symbol_gallery.insert(sym, span);
327-
Ident { sym, is_raw, span }
328-
}
329-
fn dollar_crate(span: Span) -> Ident {
330-
// `$crate` is accepted as an ident only if it comes from the compiler.
331-
Ident { sym: kw::DollarCrate, is_raw: false, span }
332-
}
333-
}
334-
335309
// FIXME(eddyb) `Literal` should not expose internal `Debug` impls.
336310
#[derive(Clone, Debug)]
337311
pub struct Literal {
@@ -382,12 +356,12 @@ impl<'a> Rustc<'a> {
382356
impl server::Types for Rustc<'_> {
383357
type FreeFunctions = FreeFunctions;
384358
type TokenStream = TokenStream;
385-
type Ident = Ident;
386359
type Literal = Literal;
387360
type SourceFile = Lrc<SourceFile>;
388361
type MultiSpan = Vec<Span>;
389362
type Diagnostic = Diagnostic;
390363
type Span = Span;
364+
type Symbol = Symbol;
391365
}
392366

393367
impl server::FreeFunctions for Rustc<'_> {
@@ -413,14 +387,14 @@ impl server::TokenStream for Rustc<'_> {
413387
}
414388
fn from_token_tree(
415389
&mut self,
416-
tree: TokenTree<Self::TokenStream, Self::Span, Self::Ident, Self::Literal>,
390+
tree: TokenTree<Self::TokenStream, Self::Span, Self::Symbol, Self::Literal>,
417391
) -> Self::TokenStream {
418392
tree.to_internal()
419393
}
420394
fn concat_trees(
421395
&mut self,
422396
base: Option<Self::TokenStream>,
423-
trees: Vec<TokenTree<Self::TokenStream, Self::Span, Self::Ident, Self::Literal>>,
397+
trees: Vec<TokenTree<Self::TokenStream, Self::Span, Self::Symbol, Self::Literal>>,
424398
) -> Self::TokenStream {
425399
let mut builder = tokenstream::TokenStreamBuilder::new();
426400
if let Some(base) = base {
@@ -448,23 +422,11 @@ impl server::TokenStream for Rustc<'_> {
448422
fn into_iter(
449423
&mut self,
450424
stream: Self::TokenStream,
451-
) -> Vec<TokenTree<Self::TokenStream, Self::Span, Self::Ident, Self::Literal>> {
425+
) -> Vec<TokenTree<Self::TokenStream, Self::Span, Self::Symbol, Self::Literal>> {
452426
FromInternal::from_internal((stream, self))
453427
}
454428
}
455429

456-
impl server::Ident for Rustc<'_> {
457-
fn new(&mut self, string: &str, span: Self::Span, is_raw: bool) -> Self::Ident {
458-
Ident::new(self.sess, Symbol::intern(string), is_raw, span)
459-
}
460-
fn span(&mut self, ident: Self::Ident) -> Self::Span {
461-
ident.span
462-
}
463-
fn with_span(&mut self, ident: Self::Ident, span: Self::Span) -> Self::Ident {
464-
Ident { span, ..ident }
465-
}
466-
}
467-
468430
impl server::Literal for Rustc<'_> {
469431
fn from_str(&mut self, s: &str) -> Result<Self::Literal, ()> {
470432
let override_span = None;
@@ -729,6 +691,28 @@ impl server::Context for Rustc<'_> {
729691
fn mixed_site(&mut self) -> Self::Span {
730692
self.mixed_site
731693
}
694+
695+
// NOTE: May be run on any thread, so cannot use `nfc_normalize`
696+
fn validate_ident(s: &str) -> Result<Option<String>, ()> {
697+
use unicode_normalization::{is_nfc_quick, IsNormalized, UnicodeNormalization};
698+
let normalized: Option<String> = match is_nfc_quick(s.chars()) {
699+
IsNormalized::Yes => None,
700+
_ => Some(s.chars().nfc().collect()),
701+
};
702+
if rustc_lexer::is_ident(normalized.as_ref().map(|s| &s[..]).unwrap_or(s)) {
703+
Ok(normalized)
704+
} else {
705+
Err(())
706+
}
707+
}
708+
709+
fn intern_symbol(string: &str) -> Self::Symbol {
710+
Symbol::intern(string)
711+
}
712+
713+
fn with_symbol_string(symbol: &Self::Symbol, f: impl FnOnce(&str)) {
714+
f(&symbol.as_str())
715+
}
732716
}
733717

734718
// See issue #74616 for details

library/proc_macro/src/bridge/buffer.rs

+29
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,35 @@ use std::mem;
55
use std::ops::{Deref, DerefMut};
66
use std::slice;
77

8+
#[repr(C)]
9+
pub struct Slice<'a, T> {
10+
data: &'a [T; 0],
11+
len: usize,
12+
}
13+
14+
unsafe impl<'a, T: Sync> Sync for Slice<'a, T> {}
15+
unsafe impl<'a, T: Sync> Send for Slice<'a, T> {}
16+
17+
impl<T> Copy for Slice<'a, T> {}
18+
impl<T> Clone for Slice<'a, T> {
19+
fn clone(&self) -> Self {
20+
*self
21+
}
22+
}
23+
24+
impl<T> From<&'a [T]> for Slice<'a, T> {
25+
fn from(xs: &'a [T]) -> Self {
26+
Slice { data: unsafe { &*(xs.as_ptr() as *const [T; 0]) }, len: xs.len() }
27+
}
28+
}
29+
30+
impl<T> Deref for Slice<'a, T> {
31+
type Target = [T];
32+
fn deref(&self) -> &[T] {
33+
unsafe { slice::from_raw_parts(self.data.as_ptr(), self.len) }
34+
}
35+
}
36+
837
#[repr(C)]
938
pub struct Buffer<T: Copy> {
1039
data: *mut T,

0 commit comments

Comments
 (0)