Skip to content

Commit 4bb4dc4

Browse files
committed
Auto merge of rust-lang#97251 - petrochenkov:eqtokens, r=nnethercote
rustc_parse: Move AST -> TokenStream conversion logic to rustc_ast In the past falling back to reparsing pretty-printed strings was common, so some of this logic had to live in `rustc_parse`, but now the reparsing fallback is only used in two corner cases so we can move this logic to `rustc_ast` which makes many things simpler. It also helps to fix `MacArgs::inner_tokens` for `MacArgs::Eq` with non-literal expressions, which is done in the second commit. r? `@nnethercote`
2 parents 6534637 + 09b4c7c commit 4bb4dc4

File tree

14 files changed

+127
-300
lines changed

14 files changed

+127
-300
lines changed

compiler/rustc_ast/src/ast.rs

+4-18
Original file line numberDiff line numberDiff line change
@@ -23,8 +23,8 @@ pub use GenericArgs::*;
2323
pub use UnsafeSource::*;
2424

2525
use crate::ptr::P;
26-
use crate::token::{self, CommentKind, Delimiter, Token, TokenKind};
27-
use crate::tokenstream::{DelimSpan, LazyTokenStream, TokenStream, TokenTree};
26+
use crate::token::{self, CommentKind, Delimiter};
27+
use crate::tokenstream::{DelimSpan, LazyTokenStream, TokenStream};
2828

2929
use rustc_data_structures::stable_hasher::{HashStable, StableHasher};
3030
use rustc_data_structures::stack::ensure_sufficient_stack;
@@ -444,8 +444,7 @@ impl Default for Generics {
444444
pub struct WhereClause {
445445
/// `true` if we ate a `where` token: this can happen
446446
/// if we parsed no predicates (e.g. `struct Foo where {}`).
447-
/// This allows us to accurately pretty-print
448-
/// in `nt_to_tokenstream`
447+
/// This allows us to pretty-print accurately.
449448
pub has_where_token: bool,
450449
pub predicates: Vec<WherePredicate>,
451450
pub span: Span,
@@ -1571,20 +1570,7 @@ impl MacArgs {
15711570
match self {
15721571
MacArgs::Empty => TokenStream::default(),
15731572
MacArgs::Delimited(.., tokens) => tokens.clone(),
1574-
MacArgs::Eq(_, MacArgsEq::Ast(expr)) => {
1575-
// Currently only literals are allowed here. If more complex expression kinds are
1576-
// allowed in the future, then `nt_to_tokenstream` should be used to extract the
1577-
// token stream. This will require some cleverness, perhaps with a function
1578-
// pointer, because `nt_to_tokenstream` is not directly usable from this crate.
1579-
// It will also require changing the `parse_expr` call in `parse_mac_args_common`
1580-
// to `parse_expr_force_collect`.
1581-
if let ExprKind::Lit(lit) = &expr.kind {
1582-
let token = Token::new(TokenKind::Literal(lit.token), lit.span);
1583-
TokenTree::Token(token).into()
1584-
} else {
1585-
unreachable!("couldn't extract literal when getting inner tokens: {:?}", expr)
1586-
}
1587-
}
1573+
MacArgs::Eq(_, MacArgsEq::Ast(expr)) => TokenStream::from_ast(expr),
15881574
MacArgs::Eq(_, MacArgsEq::Hir(lit)) => {
15891575
unreachable!("in literal form when getting inner tokens: {:?}", lit)
15901576
}

compiler/rustc_ast/src/ast_traits.rs

+7-1
Original file line numberDiff line numberDiff line change
@@ -108,14 +108,20 @@ macro_rules! impl_has_span {
108108
};
109109
}
110110

111-
impl_has_span!(AssocItem, Expr, ForeignItem, Item, Stmt);
111+
impl_has_span!(AssocItem, Block, Expr, ForeignItem, Item, Pat, Path, Stmt, Ty, Visibility);
112112

113113
impl<T: AstDeref<Target: HasSpan>> HasSpan for T {
114114
fn span(&self) -> Span {
115115
self.ast_deref().span()
116116
}
117117
}
118118

119+
impl HasSpan for AttrItem {
120+
fn span(&self) -> Span {
121+
self.span()
122+
}
123+
}
124+
119125
/// A trait for AST nodes having (or not having) collected tokens.
120126
pub trait HasTokens {
121127
fn tokens(&self) -> Option<&LazyTokenStream>;

compiler/rustc_ast/src/tokenstream.rs

+86-7
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,9 @@
1313
//! and a borrowed `TokenStream` is sufficient to build an owned `TokenStream` without taking
1414
//! ownership of the original.
1515
16-
use crate::token::{self, Delimiter, Token, TokenKind};
16+
use crate::ast::StmtKind;
17+
use crate::ast_traits::{HasAttrs, HasSpan, HasTokens};
18+
use crate::token::{self, Delimiter, Nonterminal, Token, TokenKind};
1719
use crate::AttrVec;
1820

1921
use rustc_data_structures::stable_hasher::{HashStable, StableHasher};
@@ -45,12 +47,6 @@ pub enum TokenTree {
4547
Delimited(DelimSpan, Delimiter, TokenStream),
4648
}
4749

48-
#[derive(Copy, Clone)]
49-
pub enum CanSynthesizeMissingTokens {
50-
Yes,
51-
No,
52-
}
53-
5450
// Ensure all fields of `TokenTree` is `Send` and `Sync`.
5551
#[cfg(parallel_compiler)]
5652
fn _dummy()
@@ -471,6 +467,89 @@ impl TokenStream {
471467
.collect(),
472468
))
473469
}
470+
471+
fn opt_from_ast(node: &(impl HasAttrs + HasTokens)) -> Option<TokenStream> {
472+
let tokens = node.tokens()?;
473+
let attrs = node.attrs();
474+
let attr_annotated = if attrs.is_empty() {
475+
tokens.create_token_stream()
476+
} else {
477+
let attr_data = AttributesData { attrs: attrs.to_vec().into(), tokens: tokens.clone() };
478+
AttrAnnotatedTokenStream::new(vec![(
479+
AttrAnnotatedTokenTree::Attributes(attr_data),
480+
Spacing::Alone,
481+
)])
482+
};
483+
Some(attr_annotated.to_tokenstream())
484+
}
485+
486+
pub fn from_ast(node: &(impl HasAttrs + HasSpan + HasTokens + fmt::Debug)) -> TokenStream {
487+
TokenStream::opt_from_ast(node)
488+
.unwrap_or_else(|| panic!("missing tokens for node at {:?}: {:?}", node.span(), node))
489+
}
490+
491+
pub fn from_nonterminal_ast(nt: &Nonterminal) -> TokenStream {
492+
match nt {
493+
Nonterminal::NtIdent(ident, is_raw) => {
494+
TokenTree::token(token::Ident(ident.name, *is_raw), ident.span).into()
495+
}
496+
Nonterminal::NtLifetime(ident) => {
497+
TokenTree::token(token::Lifetime(ident.name), ident.span).into()
498+
}
499+
Nonterminal::NtItem(item) => TokenStream::from_ast(item),
500+
Nonterminal::NtBlock(block) => TokenStream::from_ast(block),
501+
Nonterminal::NtStmt(stmt) if let StmtKind::Empty = stmt.kind => {
502+
// FIXME: Properly collect tokens for empty statements.
503+
TokenTree::token(token::Semi, stmt.span).into()
504+
}
505+
Nonterminal::NtStmt(stmt) => TokenStream::from_ast(stmt),
506+
Nonterminal::NtPat(pat) => TokenStream::from_ast(pat),
507+
Nonterminal::NtTy(ty) => TokenStream::from_ast(ty),
508+
Nonterminal::NtMeta(attr) => TokenStream::from_ast(attr),
509+
Nonterminal::NtPath(path) => TokenStream::from_ast(path),
510+
Nonterminal::NtVis(vis) => TokenStream::from_ast(vis),
511+
Nonterminal::NtExpr(expr) | Nonterminal::NtLiteral(expr) => TokenStream::from_ast(expr),
512+
}
513+
}
514+
515+
fn flatten_token(token: &Token) -> TokenTree {
516+
match &token.kind {
517+
token::Interpolated(nt) if let token::NtIdent(ident, is_raw) = **nt => {
518+
TokenTree::token(token::Ident(ident.name, is_raw), ident.span)
519+
}
520+
token::Interpolated(nt) => TokenTree::Delimited(
521+
DelimSpan::from_single(token.span),
522+
Delimiter::Invisible,
523+
TokenStream::from_nonterminal_ast(&nt).flattened(),
524+
),
525+
_ => TokenTree::Token(token.clone()),
526+
}
527+
}
528+
529+
fn flatten_token_tree(tree: &TokenTree) -> TokenTree {
530+
match tree {
531+
TokenTree::Token(token) => TokenStream::flatten_token(token),
532+
TokenTree::Delimited(span, delim, tts) => {
533+
TokenTree::Delimited(*span, *delim, tts.flattened())
534+
}
535+
}
536+
}
537+
538+
#[must_use]
539+
pub fn flattened(&self) -> TokenStream {
540+
fn can_skip(stream: &TokenStream) -> bool {
541+
stream.trees().all(|tree| match tree {
542+
TokenTree::Token(token) => !matches!(token.kind, token::Interpolated(_)),
543+
TokenTree::Delimited(_, _, inner) => can_skip(inner),
544+
})
545+
}
546+
547+
if can_skip(self) {
548+
return self.clone();
549+
}
550+
551+
self.trees().map(|tree| TokenStream::flatten_token_tree(tree)).collect()
552+
}
474553
}
475554

476555
// 99.5%+ of the time we have 1 or 2 elements in this vector.

compiler/rustc_ast_lowering/src/item.rs

-3
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,6 @@ use rustc_hir::def::{DefKind, Res};
1313
use rustc_hir::def_id::{LocalDefId, CRATE_DEF_ID};
1414
use rustc_hir::PredicateOrigin;
1515
use rustc_index::vec::{Idx, IndexVec};
16-
use rustc_session::utils::NtToTokenstream;
1716
use rustc_session::Session;
1817
use rustc_span::source_map::DesugaringKind;
1918
use rustc_span::symbol::{kw, sym, Ident};
@@ -27,7 +26,6 @@ use std::iter;
2726
pub(super) struct ItemLowerer<'a, 'hir> {
2827
pub(super) sess: &'a Session,
2928
pub(super) resolver: &'a mut dyn ResolverAstLowering,
30-
pub(super) nt_to_tokenstream: NtToTokenstream,
3129
pub(super) arena: &'hir Arena<'hir>,
3230
pub(super) ast_index: &'a IndexVec<LocalDefId, AstOwner<'a>>,
3331
pub(super) owners: &'a mut IndexVec<LocalDefId, hir::MaybeOwner<&'hir hir::OwnerInfo<'hir>>>,
@@ -63,7 +61,6 @@ impl<'a, 'hir> ItemLowerer<'a, 'hir> {
6361
// Pseudo-globals.
6462
sess: &self.sess,
6563
resolver: self.resolver,
66-
nt_to_tokenstream: self.nt_to_tokenstream,
6764
arena: self.arena,
6865

6966
// HirId handling.

compiler/rustc_ast_lowering/src/lib.rs

+3-35
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,6 @@
3737
#![recursion_limit = "256"]
3838
#![allow(rustc::potential_query_instability)]
3939

40-
use rustc_ast::tokenstream::{CanSynthesizeMissingTokens, TokenStream};
4140
use rustc_ast::visit;
4241
use rustc_ast::{self as ast, *};
4342
use rustc_ast_pretty::pprust;
@@ -56,7 +55,6 @@ use rustc_hir::{ConstArg, GenericArg, ItemLocalId, ParamName, TraitCandidate};
5655
use rustc_index::vec::{Idx, IndexVec};
5756
use rustc_query_system::ich::StableHashingContext;
5857
use rustc_session::parse::feature_err;
59-
use rustc_session::utils::{FlattenNonterminals, NtToTokenstream};
6058
use rustc_session::Session;
6159
use rustc_span::hygiene::{ExpnId, MacroKind};
6260
use rustc_span::source_map::DesugaringKind;
@@ -89,11 +87,6 @@ struct LoweringContext<'a, 'hir: 'a> {
8987

9088
resolver: &'a mut dyn ResolverAstLowering,
9189

92-
/// HACK(Centril): there is a cyclic dependency between the parser and lowering
93-
/// if we don't have this function pointer. To avoid that dependency so that
94-
/// `rustc_middle` is independent of the parser, we use dynamic dispatch here.
95-
nt_to_tokenstream: NtToTokenstream,
96-
9790
/// Used to allocate HIR nodes.
9891
arena: &'hir Arena<'hir>,
9992

@@ -436,7 +429,6 @@ pub fn lower_crate<'a, 'hir>(
436429
sess: &'a Session,
437430
krate: &'a Crate,
438431
resolver: &'a mut dyn ResolverAstLowering,
439-
nt_to_tokenstream: NtToTokenstream,
440432
arena: &'hir Arena<'hir>,
441433
) -> &'hir hir::Crate<'hir> {
442434
let _prof_timer = sess.prof.verbose_generic_activity("hir_lowering");
@@ -447,15 +439,8 @@ pub fn lower_crate<'a, 'hir>(
447439
IndexVec::from_fn_n(|_| hir::MaybeOwner::Phantom, resolver.definitions().def_index_count());
448440

449441
for def_id in ast_index.indices() {
450-
item::ItemLowerer {
451-
sess,
452-
resolver,
453-
nt_to_tokenstream,
454-
arena,
455-
ast_index: &ast_index,
456-
owners: &mut owners,
457-
}
458-
.lower_node(def_id);
442+
item::ItemLowerer { sess, resolver, arena, ast_index: &ast_index, owners: &mut owners }
443+
.lower_node(def_id);
459444
}
460445

461446
let hir_hash = compute_hir_hash(resolver, &owners);
@@ -875,11 +860,7 @@ impl<'a, 'hir> LoweringContext<'a, 'hir> {
875860
// ```
876861
//
877862
// In both cases, we don't want to synthesize any tokens
878-
MacArgs::Delimited(
879-
dspan,
880-
delim,
881-
self.lower_token_stream(tokens.clone(), CanSynthesizeMissingTokens::No),
882-
)
863+
MacArgs::Delimited(dspan, delim, tokens.flattened())
883864
}
884865
// This is an inert key-value attribute - it will never be visible to macros
885866
// after it gets lowered to HIR. Therefore, we can extract literals to handle
@@ -904,19 +885,6 @@ impl<'a, 'hir> LoweringContext<'a, 'hir> {
904885
}
905886
}
906887

907-
fn lower_token_stream(
908-
&self,
909-
tokens: TokenStream,
910-
synthesize_tokens: CanSynthesizeMissingTokens,
911-
) -> TokenStream {
912-
FlattenNonterminals {
913-
parse_sess: &self.sess.parse_sess,
914-
synthesize_tokens,
915-
nt_to_tokenstream: self.nt_to_tokenstream,
916-
}
917-
.process_token_stream(tokens)
918-
}
919-
920888
/// Given an associated type constraint like one of these:
921889
///
922890
/// ```ignore (illustrative)

compiler/rustc_ast_pretty/src/pprust/mod.rs

+1-31
Original file line numberDiff line numberDiff line change
@@ -4,42 +4,12 @@ mod tests;
44
pub mod state;
55
pub use state::{print_crate, AnnNode, Comments, PpAnn, PrintState, State};
66

7+
use rustc_ast as ast;
78
use rustc_ast::token::{Nonterminal, Token, TokenKind};
89
use rustc_ast::tokenstream::{TokenStream, TokenTree};
9-
use rustc_ast::{self as ast, AstDeref};
1010

1111
use std::borrow::Cow;
1212

13-
pub trait AstPrettyPrint {
14-
fn pretty_print(&self) -> String;
15-
}
16-
17-
impl<T: AstDeref<Target: AstPrettyPrint>> AstPrettyPrint for T {
18-
fn pretty_print(&self) -> String {
19-
self.ast_deref().pretty_print()
20-
}
21-
}
22-
23-
macro_rules! impl_ast_pretty_print {
24-
($($T:ty => $method:ident),+ $(,)?) => {
25-
$(
26-
impl AstPrettyPrint for $T {
27-
fn pretty_print(&self) -> String {
28-
State::new().$method(self)
29-
}
30-
}
31-
)+
32-
};
33-
}
34-
35-
impl_ast_pretty_print! {
36-
ast::Item => item_to_string,
37-
ast::AssocItem => assoc_item_to_string,
38-
ast::ForeignItem => foreign_item_to_string,
39-
ast::Expr => expr_to_string,
40-
ast::Stmt => stmt_to_string,
41-
}
42-
4313
pub fn nonterminal_to_string(nt: &Nonterminal) -> String {
4414
State::new().nonterminal_to_string(nt)
4515
}

compiler/rustc_builtin_macros/src/cfg_eval.rs

+1-10
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,6 @@ use crate::util::{check_builtin_macro_attribute, warn_on_duplicate_attribute};
33
use rustc_ast as ast;
44
use rustc_ast::mut_visit::MutVisitor;
55
use rustc_ast::ptr::P;
6-
use rustc_ast::tokenstream::CanSynthesizeMissingTokens;
76
use rustc_ast::visit::Visitor;
87
use rustc_ast::NodeId;
98
use rustc_ast::{mut_visit, visit};
@@ -13,7 +12,6 @@ use rustc_expand::config::StripUnconfigured;
1312
use rustc_expand::configure;
1413
use rustc_feature::Features;
1514
use rustc_parse::parser::{ForceCollect, Parser};
16-
use rustc_session::utils::FlattenNonterminals;
1715
use rustc_session::Session;
1816
use rustc_span::symbol::sym;
1917
use rustc_span::Span;
@@ -174,8 +172,6 @@ impl CfgEval<'_, '_> {
174172
_ => unreachable!(),
175173
};
176174

177-
let mut orig_tokens = annotatable.to_tokens(&self.cfg.sess.parse_sess);
178-
179175
// 'Flatten' all nonterminals (i.e. `TokenKind::Interpolated`)
180176
// to `None`-delimited groups containing the corresponding tokens. This
181177
// is normally delayed until the proc-macro server actually needs to
@@ -189,12 +185,7 @@ impl CfgEval<'_, '_> {
189185
// where `$item` is `#[cfg_attr] struct Foo {}`. We want to make
190186
// sure to evaluate *all* `#[cfg]` and `#[cfg_attr]` attributes - the simplest
191187
// way to do this is to do a single parse of a stream without any nonterminals.
192-
let mut flatten = FlattenNonterminals {
193-
nt_to_tokenstream: rustc_parse::nt_to_tokenstream,
194-
parse_sess: &self.cfg.sess.parse_sess,
195-
synthesize_tokens: CanSynthesizeMissingTokens::No,
196-
};
197-
orig_tokens = flatten.process_token_stream(orig_tokens);
188+
let orig_tokens = annotatable.to_tokens().flattened();
198189

199190
// Re-parse the tokens, setting the `capture_cfg` flag to save extra information
200191
// to the captured `AttrAnnotatedTokenStream` (specifically, we capture

0 commit comments

Comments
 (0)