From 14ef167676d15d7865136311df02fae8b383ccd0 Mon Sep 17 00:00:00 2001 From: Leo Testard Date: Tue, 7 Jun 2016 16:49:09 +0200 Subject: [PATCH 01/10] [WIP] Check future proofing of macros with multiple arms using FIRST sets. --- src/libsyntax/ext/tt/macro_rules.rs | 372 +++++++++++++++++++++++++++- 1 file changed, 371 insertions(+), 1 deletion(-) diff --git a/src/libsyntax/ext/tt/macro_rules.rs b/src/libsyntax/ext/tt/macro_rules.rs index 23f0b1fff0ae7..53d7cfbda3b9d 100644 --- a/src/libsyntax/ext/tt/macro_rules.rs +++ b/src/libsyntax/ext/tt/macro_rules.rs @@ -309,11 +309,25 @@ pub fn compile<'cx>(cx: &'cx mut ExtCtxt, (**tt).clone() } _ => cx.span_bug(def.span, "wrong-structured lhs") - }).collect() + }).collect::>() } _ => cx.span_bug(def.span, "wrong-structured lhs") }; + 'a: for (i, lhs) in lhses.iter().enumerate() { + for lhs_ in lhses[i + 1 ..].iter() { + if !check_lhs_firsts(cx, lhs, lhs_) { + cx.struct_span_err(def.span, "macro is not future-proof") + .span_help(lhs.get_span(), "parsing of this arm is ambiguous...") + .span_help(lhs_.get_span(), "with the parsing of this arm.") + .help("the behaviour of this macro might change in the future") + .emit(); + valid = false; + break 'a; + } + } + } + let rhses = match **argument_map.get(&rhs_nm.name).unwrap() { MatchedSeq(ref s, _) => { s.iter().map(|m| match **m { @@ -339,6 +353,362 @@ pub fn compile<'cx>(cx: &'cx mut ExtCtxt, NormalTT(exp, Some(def.span), def.allow_internal_unstable) } +fn check_lhs_firsts(cx: &ExtCtxt, lhs: &TokenTree, lhs_: &TokenTree) -> bool { + match (lhs, lhs_) { + (&TokenTree::Delimited(_, ref tta), + &TokenTree::Delimited(_, ref ttb)) => + check_matcher_firsts(cx, &tta.tts, &ttb.tts), + _ => cx.span_bug(lhs.get_span(), "malformed macro lhs") + } +} + +fn match_same_input(ma: &TokenTree, mb: &TokenTree) -> bool { + match (ma, mb) { + (&TokenTree::Token(_, MatchNt(_, nta)), + &TokenTree::Token(_, MatchNt(_, ntb))) => + nta == ntb, + // FIXME: must we descend into Interpolated TTs here? + (&TokenTree::Token(_, ref toka), + &TokenTree::Token(_, ref tokb)) => + toka == tokb, + (&TokenTree::Delimited(_, ref delima), + &TokenTree::Delimited(_, ref delimb)) => { + delima.delim == delimb.delim && + delima.tts.iter().zip(delimb.tts.iter()) + .all(|(ref t1, ref t2)| match_same_input(t1, t2)) + } + (&TokenTree::Sequence(_, ref seqa), + &TokenTree::Sequence(_, ref seqb)) => { + seqa.separator == seqb.separator && + seqa.op == seqb.op && + seqa.tts.iter().zip(seqb.tts.iter()) + .all(|(ref t1, ref t2)| match_same_input(t1, t2)) + } + _ => false + } +} + +// assumes that tok != MatchNt +fn nt_first_set_contains(nt: ast::Ident, tok: &Token) -> bool { + use parse::token::BinOpToken::*; + use parse::token::DelimToken::*; + match &nt.name.as_str() as &str { + "tt" => true, + "ident" => match *tok { + Ident(_) => true, + _ => false + }, + "meta" => match *tok { + Ident(_) => true, + _ => false + }, + "path" => match *tok { + ModSep | + Ident(_) => true, + _ => false + }, + "ty" => match *tok { + AndAnd | + BinOp(And) | + OpenDelim(Paren) | + OpenDelim(Bracket) | + BinOp(Star) | + ModSep | + BinOp(Shl) | + Lt | + Underscore | + Ident(_) => true, + _ => false + }, + "expr" => match *tok { + BinOp(And) | + AndAnd | + Not | + BinOp(Star) | + BinOp(Minus) | + OpenDelim(_) | + DotDot | + ModSep | + BinOp(Shl) | + Lt | + Lifetime(_) | + BinOp(Or) | + OrOr | + Ident(_) | + Literal(..) => true, + _ => false + }, + "pat" => match *tok { + AndAnd | + BinOp(And) | + OpenDelim(Paren) | + OpenDelim(Bracket) | + BinOp(Minus) | + ModSep | + BinOp(Shl) | + Lt| + Underscore | + Ident(_) | + Literal(..) => true, + _ => false + }, + "stmt" => match *tok { + BinOp(And) | + AndAnd | + Not | + BinOp(Star) | + BinOp(Minus) | + Pound | + OpenDelim(_) | + DotDot | + ModSep | + Semi | + BinOp(Shl) | + Lt | + Lifetime(_) | + BinOp(Or) | + OrOr | + Ident(_) | + Literal(..) => true, + _ => false + }, + "block" => match *tok { + OpenDelim(Brace) => true, + _ => false + }, + "item" => match *tok { + ModSep | + Ident(_) => true, + _ => false + }, + _ => panic!("unknown NT") + } +} + +fn nt_first_disjoints(nt1: ast::Ident, nt2: ast::Ident) -> bool { + use parse::token::DelimToken::*; + match (&nt1.name.as_str() as &str, &nt2.name.as_str() as &str) { + ("block", _) => !nt_first_set_contains(nt2, &OpenDelim(Brace)), + (_, "block") => !nt_first_set_contains(nt1, &OpenDelim(Brace)), + // all the others can contain Ident + _ => false + } +} + +fn first_set_contains(set: &TokenSet, tok: &Token) -> bool { + for &(_, ref t) in set.tokens.iter() { + match (t, tok) { + (&MatchNt(_, nt1), &MatchNt(_, nt2)) => + if !nt_first_disjoints(nt1, nt2) { return true }, + (&MatchNt(_, nt), tok) | (tok, &MatchNt(_, nt)) => + if nt_first_set_contains(nt, tok) { return true }, + (t1, t2) => if t1 == t2 { return true } + } + } + return false +} + +fn token_of(tt: &TokenTree) -> Token { + use tokenstream::TokenTree::*; + match tt { + &Delimited(_, ref delim) => OpenDelim(delim.delim.clone()), + &Token(_, ref tok) => tok.clone(), + &Sequence(..) => panic!("unexpected seq") + } +} + +#[allow(unused_variables)] +fn first_sets_disjoints(ma: &TokenTree, mb: &TokenTree, + first_a: &FirstSets, first_b: &FirstSets) -> bool { + use tokenstream::TokenTree::*; + match (ma, mb) { + (&Token(_, MatchNt(_, nta)), + &Token(_, MatchNt(_, ntb))) => nt_first_disjoints(nta, ntb), + + (&Token(_, MatchNt(_, nt)), &Token(_, ref tok)) | + (&Token(_, ref tok), &Token(_, MatchNt(_, nt))) => + !nt_first_set_contains(nt, tok), + + (&Token(_, MatchNt(_, nt)), &Delimited(_, ref delim)) | + (&Delimited(_, ref delim), &Token(_, MatchNt(_, nt))) => + !nt_first_set_contains(nt, &OpenDelim(delim.delim.clone())), + + (&Sequence(ref spa, _), &Sequence(ref spb, _)) => { + match (first_a.first.get(spa), first_b.first.get(spb)) { + (Some(&Some(ref seta)), Some(&Some(ref setb))) => { + for &(_, ref tok) in setb.tokens.iter() { + if first_set_contains(seta, tok) { + return false + } + } + true + } + _ => panic!("no FIRST set for sequence") + } + } + + (&Sequence(ref sp, _), ref tok) => { + match first_a.first.get(sp) { + Some(&Some(ref set)) => !first_set_contains(set, &token_of(tok)), + _ => panic!("no FIRST set for sequence") + } + } + + (ref tok, &Sequence(ref sp, _)) => { + match first_b.first.get(sp) { + Some(&Some(ref set)) => !first_set_contains(set, &token_of(tok)), + _ => panic!("no FIRST set for sequence") + } + } + + (&Token(_, ref t1), &Token(_, ref t2)) => + t1 != t2, + + (&Token(_, ref t), &Delimited(_, ref delim)) | + (&Delimited(_, ref delim), &Token(_, ref t)) => + t != &OpenDelim(delim.delim.clone()), + + (&Delimited(_, ref d1), &Delimited(_, ref d2)) => + d1.delim != d2.delim + } +} + +fn check_matcher_firsts(cx: &ExtCtxt, ma: &[TokenTree], mb: &[TokenTree]) -> bool { + let mut need_disambiguation = false; + + // first compute the FIRST sets. FIRST sets for tokens, delimited TTs and NT + // matchers are fixed, this will compute the FIRST sets for all sequence TTs + // that appear in the matcher. Note that if a sequence starts with a matcher, + // for ex. $e:expr, its FIRST set will be the singleton { MatchNt(expr) }. + // This is okay because none of our matchable NTs can be empty. + let firsts_a = FirstSets::new(ma); + let firsts_b = FirstSets::new(mb); + + // analyse until one of the cases happen: + // * we find an obvious disambiguation, that is a proof that all inputs that + // matches A will never match B or vice-versa + // * we find a case that is too complex to handle and reject it + // * we reach the end of the macro + for (ta, tb) in ma.iter().zip(mb.iter()) { + if match_same_input(ta, tb) { + continue; + } + + if first_sets_disjoints(&ta, &tb, &firsts_a, &firsts_b) { + // accept the macro + return true + } + + // i.e. A or B is either a repeated sequence or a NT matcher that is + // not tt, ident, or block (that is, either A or B could match several + // token trees), we cannot know where we should continue the analysis. + match (ta, tb) { + (&TokenTree::Sequence(_, _), _) | + (_, &TokenTree::Sequence(_, _)) => return false, + + (&TokenTree::Token(_, MatchNt(_, nta)), + &TokenTree::Token(_, MatchNt(_, ntb))) => + if !(nt_is_single_tt(nta) && nt_is_single_tt(ntb)) { + return false + }, + + (&TokenTree::Token(_, MatchNt(_, nt)), _) | + (_ ,&TokenTree::Token(_, MatchNt(_, nt))) => + if !nt_is_single_tt(nt) { return false }, + + _ => () + } + + // A and B always both match a single TT + match (ta, tb) { + (&TokenTree::Sequence(_, _), _) | + (_, &TokenTree::Sequence(_, _)) => + // cannot happen since sequences are not always a single-TT + cx.bug("unexpeceted seq"), + + (&TokenTree::Token(_, MatchNt(_, nt)), _) | + (_ ,&TokenTree::Token(_, MatchNt(_, nt))) + if nt.name.as_str() == "tt" => + // this is okay for now, either A will always have priority, + // either B will always be unreachable. but search for errors + // further + continue, + + (&TokenTree::Token(_, MatchNt(_, _)), + &TokenTree::Token(_, MatchNt(_, _))) => + // this case cannot happen. the only NTs that are a single-TT + // and that are not tt are ident and block, that do not share any + // FIRST token. + cx.bug("unexpected NT vs. NT"), + + (&TokenTree::Token(_, MatchNt(_, nt)), &TokenTree::Token(_, Ident(_))) | + (&TokenTree::Token(_, Ident(_)), &TokenTree::Token(_, MatchNt(_, nt))) => + if nt.name.as_str() == "ident" { + // NT ident vs token ident. it's the same as with tt: + // either A is included, either B is unreachable + continue + } else { + // the only possible NT here is ident because the only token in + // the FIRST set of block is {, and { is not seen as a token but + // as the beginning of a Delim + // the only possible token is thus an hardcoded identifier or + // keyword. so the only possible case of NT vs. Token is the + // the case above. + cx.bug("unexpeceted NT vs. Token") + }, + + (&TokenTree::Token(_, MatchNt(_, nt)), &TokenTree::Delimited(_, ref delim)) | + (&TokenTree::Delimited(_, ref delim), &TokenTree::Token(_, MatchNt(_, nt))) => + if nt.name.as_str() == "block" + && delim.delim == token::DelimToken::Brace { + // we cannot say much here. we cannot look inside. we + // can just hope we will find an obvious disambiguation later + need_disambiguation = true; + continue + } else { + // again, the other possibilites do not share any FIRST token + cx.bug("unexpeceted NT vs. Delim") + }, + + (&TokenTree::Delimited(..), &TokenTree::Delimited(..)) => { + // they have the same delim. as above. + // FIXME: we could search for disambiguation *inside* the + // delimited TTs + need_disambiguation = true; + continue + } + + // cannot happen. either they're the same token or their FIRST sets + // are disjoint. + (&TokenTree::Token(..), &TokenTree::Token(..)) | + (&TokenTree::Token(..), &TokenTree::Delimited(..)) | + (&TokenTree::Delimited(..), &TokenTree::Token(..)) => + cx.bug("unexpected Token vs. Token") + } + } + + // now we are at the end of one arm: + if need_disambiguation { + // we couldn't find any. we cannot say anything about those arms. + // reject conservatively. + // FIXME: if we are not at the end of the other arm, and that the other + // arm cannot derive empty, I think we could accept...? + false + } else { + // either A is strictly included in B and the other inputs that match B + // will never match A, or B is included in or equal to A, which means + // it's unreachable. this is not our problem. accept. + true + } +} + +fn nt_is_single_tt(nt: ast::Ident) -> bool { + match &nt.name.as_str() as &str { + "block" | "ident" | "tt" => true, + _ => false + } +} + fn check_lhs_nt_follows(cx: &mut ExtCtxt, lhs: &TokenTree) -> bool { // lhs is going to be like TokenTree::Delimited(...), where the // entire lhs is those tts. Or, it can be a "bare sequence", not wrapped in parens. From ab3836d24477e5f5fe1f77f3947c0723e883983f Mon Sep 17 00:00:00 2001 From: Leo Testard Date: Wed, 8 Jun 2016 16:19:42 +0200 Subject: [PATCH 02/10] Accept invalid macros for now. --- src/libsyntax/ext/tt/macro_rules.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/libsyntax/ext/tt/macro_rules.rs b/src/libsyntax/ext/tt/macro_rules.rs index 53d7cfbda3b9d..52922a8164ccf 100644 --- a/src/libsyntax/ext/tt/macro_rules.rs +++ b/src/libsyntax/ext/tt/macro_rules.rs @@ -317,12 +317,12 @@ pub fn compile<'cx>(cx: &'cx mut ExtCtxt, 'a: for (i, lhs) in lhses.iter().enumerate() { for lhs_ in lhses[i + 1 ..].iter() { if !check_lhs_firsts(cx, lhs, lhs_) { - cx.struct_span_err(def.span, "macro is not future-proof") + cx.struct_span_warn(def.span, "macro is not future-proof") .span_help(lhs.get_span(), "parsing of this arm is ambiguous...") .span_help(lhs_.get_span(), "with the parsing of this arm.") .help("the behaviour of this macro might change in the future") .emit(); - valid = false; + //valid = false; break 'a; } } From 035b6989125b1b592fee8829e07a60f238ae5f0d Mon Sep 17 00:00:00 2001 From: Leo Testard Date: Thu, 9 Jun 2016 14:28:55 +0200 Subject: [PATCH 03/10] Do not future-proof check imported macros. --- src/libsyntax/ext/base.rs | 4 ++-- src/libsyntax/ext/expand.rs | 4 ++-- src/libsyntax/ext/tt/macro_rules.rs | 29 +++++++++++++++++------------ 3 files changed, 21 insertions(+), 16 deletions(-) diff --git a/src/libsyntax/ext/base.rs b/src/libsyntax/ext/base.rs index 92670cd9def90..beca29dfc253f 100644 --- a/src/libsyntax/ext/base.rs +++ b/src/libsyntax/ext/base.rs @@ -715,12 +715,12 @@ impl<'a> ExtCtxt<'a> { } } - pub fn insert_macro(&mut self, def: ast::MacroDef) { + pub fn insert_macro(&mut self, def: ast::MacroDef, imported: bool) { if def.export { self.exported_macros.push(def.clone()); } if def.use_locally { - let ext = macro_rules::compile(self, &def); + let ext = macro_rules::compile(self, &def, imported); self.syntax_env.insert(def.ident.name, ext); } } diff --git a/src/libsyntax/ext/expand.rs b/src/libsyntax/ext/expand.rs index c670283e559d9..1085cafa58c8a 100644 --- a/src/libsyntax/ext/expand.rs +++ b/src/libsyntax/ext/expand.rs @@ -290,7 +290,7 @@ fn expand_mac_invoc(mac: ast::Mac, ident: Option, attrs: Vec MacroExpander<'a, 'b> { // We need to error on `#[macro_use] extern crate` when it isn't at the // crate root, because `$crate` won't work properly. for def in self.cx.loader.load_crate(item, self.at_crate_root) { - self.cx.insert_macro(def); + self.cx.insert_macro(def, true); } } else { let at_crate_root = ::std::mem::replace(&mut self.at_crate_root, false); diff --git a/src/libsyntax/ext/tt/macro_rules.rs b/src/libsyntax/ext/tt/macro_rules.rs index 52922a8164ccf..24b64a08ae977 100644 --- a/src/libsyntax/ext/tt/macro_rules.rs +++ b/src/libsyntax/ext/tt/macro_rules.rs @@ -249,7 +249,8 @@ fn generic_extension<'cx>(cx: &'cx ExtCtxt, /// Converts a `macro_rules!` invocation into a syntax extension. pub fn compile<'cx>(cx: &'cx mut ExtCtxt, - def: &ast::MacroDef) -> SyntaxExtension { + def: &ast::MacroDef, + imported: bool) -> SyntaxExtension { let lhs_nm = gensym_ident("lhs"); let rhs_nm = gensym_ident("rhs"); @@ -305,7 +306,9 @@ pub fn compile<'cx>(cx: &'cx mut ExtCtxt, MatchedSeq(ref s, _) => { s.iter().map(|m| match **m { MatchedNonterminal(NtTT(ref tt)) => { - valid &= check_lhs_nt_follows(cx, tt); + if !imported { + valid &= check_lhs_nt_follows(cx, tt); + } (**tt).clone() } _ => cx.span_bug(def.span, "wrong-structured lhs") @@ -314,16 +317,18 @@ pub fn compile<'cx>(cx: &'cx mut ExtCtxt, _ => cx.span_bug(def.span, "wrong-structured lhs") }; - 'a: for (i, lhs) in lhses.iter().enumerate() { - for lhs_ in lhses[i + 1 ..].iter() { - if !check_lhs_firsts(cx, lhs, lhs_) { - cx.struct_span_warn(def.span, "macro is not future-proof") - .span_help(lhs.get_span(), "parsing of this arm is ambiguous...") - .span_help(lhs_.get_span(), "with the parsing of this arm.") - .help("the behaviour of this macro might change in the future") - .emit(); - //valid = false; - break 'a; + if !imported { + 'a: for (i, lhs) in lhses.iter().enumerate() { + for lhs_ in lhses[i + 1 ..].iter() { + if !check_lhs_firsts(cx, lhs, lhs_) { + cx.struct_span_warn(def.span, "macro is not future-proof") + .span_help(lhs.get_span(), "parsing of this arm is ambiguous...") + .span_help(lhs_.get_span(), "with the parsing of this arm.") + .help("the behaviour of this macro might change in the future") + .emit(); + //valid = false; + break 'a; + } } } } From a27606c10ecabae5099c21fbf37093a725823225 Mon Sep 17 00:00:00 2001 From: Leo Testard Date: Thu, 9 Jun 2016 15:01:17 +0200 Subject: [PATCH 04/10] Accept T or delimited versus NT or seq cases where the rest of the matcher only contain simple tokens. Those were rejected before when the NT or seq can match several token trees because we did not know where to continue the analysis. But if the rest of the first matcher does not contain any NT matcher, we do not need to continue the analysis. --- src/libsyntax/ext/tt/macro_rules.rs | 63 ++++++++++++++++++++--------- 1 file changed, 44 insertions(+), 19 deletions(-) diff --git a/src/libsyntax/ext/tt/macro_rules.rs b/src/libsyntax/ext/tt/macro_rules.rs index 24b64a08ae977..76bd4012ecee9 100644 --- a/src/libsyntax/ext/tt/macro_rules.rs +++ b/src/libsyntax/ext/tt/macro_rules.rs @@ -594,7 +594,7 @@ fn check_matcher_firsts(cx: &ExtCtxt, ma: &[TokenTree], mb: &[TokenTree]) -> boo // matches A will never match B or vice-versa // * we find a case that is too complex to handle and reject it // * we reach the end of the macro - for (ta, tb) in ma.iter().zip(mb.iter()) { + for ((idx_a, ta), tb) in ma.iter().enumerate().zip(mb.iter()) { if match_same_input(ta, tb) { continue; } @@ -608,18 +608,25 @@ fn check_matcher_firsts(cx: &ExtCtxt, ma: &[TokenTree], mb: &[TokenTree]) -> boo // not tt, ident, or block (that is, either A or B could match several // token trees), we cannot know where we should continue the analysis. match (ta, tb) { - (&TokenTree::Sequence(_, _), _) | - (_, &TokenTree::Sequence(_, _)) => return false, - (&TokenTree::Token(_, MatchNt(_, nta)), &TokenTree::Token(_, MatchNt(_, ntb))) => if !(nt_is_single_tt(nta) && nt_is_single_tt(ntb)) { return false }, - (&TokenTree::Token(_, MatchNt(_, nt)), _) | - (_ ,&TokenTree::Token(_, MatchNt(_, nt))) => - if !nt_is_single_tt(nt) { return false }, + (&TokenTree::Token(_, MatchNt(_, nt)), _) if !nt_is_single_tt(nt) => + return false, + + // super specific corner case: if one arm is always one token, + // followed by the end of the macro invocation, then we can accept + // it. + + (&TokenTree::Sequence(_, _), _) | + (_, &TokenTree::Sequence(_, _)) => + return only_simple_tokens(&ma[idx_a..]) && !need_disambiguation, + + (_ ,&TokenTree::Token(_, MatchNt(_, nt))) if !nt_is_single_tt(nt) => + return only_simple_tokens(&ma[idx_a..]) && !need_disambiguation, _ => () } @@ -662,18 +669,26 @@ fn check_matcher_firsts(cx: &ExtCtxt, ma: &[TokenTree], mb: &[TokenTree]) -> boo cx.bug("unexpeceted NT vs. Token") }, - (&TokenTree::Token(_, MatchNt(_, nt)), &TokenTree::Delimited(_, ref delim)) | - (&TokenTree::Delimited(_, ref delim), &TokenTree::Token(_, MatchNt(_, nt))) => - if nt.name.as_str() == "block" - && delim.delim == token::DelimToken::Brace { - // we cannot say much here. we cannot look inside. we - // can just hope we will find an obvious disambiguation later - need_disambiguation = true; - continue - } else { - // again, the other possibilites do not share any FIRST token - cx.bug("unexpeceted NT vs. Delim") - }, + (&TokenTree::Token(_, MatchNt(_, nt)), + &TokenTree::Delimited(_, ref delim)) => { + // should be the only possibility. + assert!(nt.name.as_str() == "block" && + delim.delim == token::DelimToken::Brace); + // we cannot say much here. we cannot look inside. we + // can just hope we will find an obvious disambiguation later + need_disambiguation = true; + continue + } + + (&TokenTree::Delimited(_, ref delim), + &TokenTree::Token(_, MatchNt(_, nt))) => { + assert!(nt.name.as_str() == "block" && + delim.delim == token::DelimToken::Brace); + // as with several-TTs NTs, if the above is only + // made of simple tokens this is ok... + need_disambiguation |= !only_simple_tokens(&delim.tts); + continue + } (&TokenTree::Delimited(..), &TokenTree::Delimited(..)) => { // they have the same delim. as above. @@ -707,6 +722,16 @@ fn check_matcher_firsts(cx: &ExtCtxt, ma: &[TokenTree], mb: &[TokenTree]) -> boo } } +// checks that a matcher does not contain any NT except ident +fn only_simple_tokens(m: &[TokenTree]) -> bool { + m.iter().all(|tt| match *tt { + TokenTree::Token(_, MatchNt(_, nt)) => nt.name.as_str() == "ident", + TokenTree::Token(..) => true, + TokenTree::Delimited(_, ref delim) => only_simple_tokens(&delim.tts), + TokenTree::Sequence(_, ref seq) => only_simple_tokens(&seq.tts) + }) +} + fn nt_is_single_tt(nt: ast::Ident) -> bool { match &nt.name.as_str() as &str { "block" | "ident" | "tt" => true, From 72066423d3087f1cefbaf42fbef9792eeb95ca38 Mon Sep 17 00:00:00 2001 From: Leo Testard Date: Thu, 9 Jun 2016 16:12:33 +0200 Subject: [PATCH 05/10] Make the algorithm descend into delimited TTs. --- src/libcollections/lib.rs | 1 + src/libcollections/macros.rs | 2 + src/libsyntax/ext/expand.rs | 5 +- src/libsyntax/ext/tt/macro_rules.rs | 78 +++++++++++++++++++---------- src/libsyntax/feature_gate.rs | 5 ++ 5 files changed, 63 insertions(+), 28 deletions(-) diff --git a/src/libcollections/lib.rs b/src/libcollections/lib.rs index f027d074cb6f0..88404f7ad084b 100644 --- a/src/libcollections/lib.rs +++ b/src/libcollections/lib.rs @@ -43,6 +43,7 @@ #![feature(pattern)] #![feature(placement_in)] #![feature(placement_new_protocol)] +#![feature(rustc_attrs)] #![feature(shared)] #![feature(slice_patterns)] #![feature(specialization)] diff --git a/src/libcollections/macros.rs b/src/libcollections/macros.rs index d6a8362d58182..62094cff51e6b 100644 --- a/src/libcollections/macros.rs +++ b/src/libcollections/macros.rs @@ -42,6 +42,7 @@ #[macro_export] #[stable(feature = "rust1", since = "1.0.0")] #[allow_internal_unstable] +#[rustc_unsafe_macro] macro_rules! vec { ($elem:expr; $n:expr) => ( $crate::vec::from_elem($elem, $n) @@ -57,6 +58,7 @@ macro_rules! vec { // `slice::into_vec` function which is only available with cfg(test) // NB see the slice::hack module in slice.rs for more information #[cfg(test)] +#[rustc_unsafe_macro] macro_rules! vec { ($elem:expr; $n:expr) => ( $crate::vec::from_elem($elem, $n) diff --git a/src/libsyntax/ext/expand.rs b/src/libsyntax/ext/expand.rs index 1085cafa58c8a..5e53c3643f32d 100644 --- a/src/libsyntax/ext/expand.rs +++ b/src/libsyntax/ext/expand.rs @@ -279,6 +279,7 @@ fn expand_mac_invoc(mac: ast::Mac, ident: Option, attrs: Vec(mac: ast::Mac, ident: Option, attrs: Vec MacroExpander<'a, 'b> { // We need to error on `#[macro_use] extern crate` when it isn't at the // crate root, because `$crate` won't work properly. for def in self.cx.loader.load_crate(item, self.at_crate_root) { - self.cx.insert_macro(def, true); + self.cx.insert_macro(def, false); } } else { let at_crate_root = ::std::mem::replace(&mut self.at_crate_root, false); diff --git a/src/libsyntax/ext/tt/macro_rules.rs b/src/libsyntax/ext/tt/macro_rules.rs index 76bd4012ecee9..b69b2a22f9cbe 100644 --- a/src/libsyntax/ext/tt/macro_rules.rs +++ b/src/libsyntax/ext/tt/macro_rules.rs @@ -116,7 +116,7 @@ impl<'a> MacResult for ParserAnyMacro<'a> { fn make_stmts(self: Box>) - -> Option> { + -> Option> { let mut ret = SmallVector::zero(); loop { let mut parser = self.parser.borrow_mut(); @@ -250,7 +250,7 @@ fn generic_extension<'cx>(cx: &'cx ExtCtxt, /// Converts a `macro_rules!` invocation into a syntax extension. pub fn compile<'cx>(cx: &'cx mut ExtCtxt, def: &ast::MacroDef, - imported: bool) -> SyntaxExtension { + check_macro: bool) -> SyntaxExtension { let lhs_nm = gensym_ident("lhs"); let rhs_nm = gensym_ident("rhs"); @@ -306,7 +306,7 @@ pub fn compile<'cx>(cx: &'cx mut ExtCtxt, MatchedSeq(ref s, _) => { s.iter().map(|m| match **m { MatchedNonterminal(NtTT(ref tt)) => { - if !imported { + if check_macro { valid &= check_lhs_nt_follows(cx, tt); } (**tt).clone() @@ -317,17 +317,20 @@ pub fn compile<'cx>(cx: &'cx mut ExtCtxt, _ => cx.span_bug(def.span, "wrong-structured lhs") }; - if !imported { + if check_macro { 'a: for (i, lhs) in lhses.iter().enumerate() { for lhs_ in lhses[i + 1 ..].iter() { - if !check_lhs_firsts(cx, lhs, lhs_) { - cx.struct_span_warn(def.span, "macro is not future-proof") - .span_help(lhs.get_span(), "parsing of this arm is ambiguous...") - .span_help(lhs_.get_span(), "with the parsing of this arm.") - .help("the behaviour of this macro might change in the future") - .emit(); - //valid = false; - break 'a; + match check_lhs_firsts(cx, lhs, lhs_) { + AnalysisResult::Error => { + cx.struct_span_err(def.span, "macro is not future-proof") + .span_help(lhs.get_span(), "parsing of this arm is ambiguous...") + .span_help(lhs_.get_span(), "with the parsing of this arm.") + .help("the behaviour of this macro might change in the future") + .emit(); + //valid = false; + break 'a; + } + _ => () } } } @@ -358,7 +361,8 @@ pub fn compile<'cx>(cx: &'cx mut ExtCtxt, NormalTT(exp, Some(def.span), def.allow_internal_unstable) } -fn check_lhs_firsts(cx: &ExtCtxt, lhs: &TokenTree, lhs_: &TokenTree) -> bool { +fn check_lhs_firsts(cx: &ExtCtxt, lhs: &TokenTree, lhs_: &TokenTree) + -> AnalysisResult { match (lhs, lhs_) { (&TokenTree::Delimited(_, ref tta), &TokenTree::Delimited(_, ref ttb)) => @@ -578,7 +582,20 @@ fn first_sets_disjoints(ma: &TokenTree, mb: &TokenTree, } } -fn check_matcher_firsts(cx: &ExtCtxt, ma: &[TokenTree], mb: &[TokenTree]) -> bool { +// the result of the FIRST set analysis. +// * Ok -> an obvious disambiguation has been found +// * Unsure -> no problem between those matchers but analysis should continue +// * Error -> maybe a problem. should be accepted only if an obvious +// disambiguation is found later +enum AnalysisResult { + Ok, + Unsure, + Error +} + +fn check_matcher_firsts(cx: &ExtCtxt, ma: &[TokenTree], mb: &[TokenTree]) + -> AnalysisResult { + use self::AnalysisResult::*; let mut need_disambiguation = false; // first compute the FIRST sets. FIRST sets for tokens, delimited TTs and NT @@ -601,7 +618,7 @@ fn check_matcher_firsts(cx: &ExtCtxt, ma: &[TokenTree], mb: &[TokenTree]) -> boo if first_sets_disjoints(&ta, &tb, &firsts_a, &firsts_b) { // accept the macro - return true + return Ok } // i.e. A or B is either a repeated sequence or a NT matcher that is @@ -611,11 +628,11 @@ fn check_matcher_firsts(cx: &ExtCtxt, ma: &[TokenTree], mb: &[TokenTree]) -> boo (&TokenTree::Token(_, MatchNt(_, nta)), &TokenTree::Token(_, MatchNt(_, ntb))) => if !(nt_is_single_tt(nta) && nt_is_single_tt(ntb)) { - return false + return Error }, (&TokenTree::Token(_, MatchNt(_, nt)), _) if !nt_is_single_tt(nt) => - return false, + return Error, // super specific corner case: if one arm is always one token, // followed by the end of the macro invocation, then we can accept @@ -623,10 +640,14 @@ fn check_matcher_firsts(cx: &ExtCtxt, ma: &[TokenTree], mb: &[TokenTree]) -> boo (&TokenTree::Sequence(_, _), _) | (_, &TokenTree::Sequence(_, _)) => - return only_simple_tokens(&ma[idx_a..]) && !need_disambiguation, + return if only_simple_tokens(&ma[idx_a..]) && !need_disambiguation { + Unsure + } else { Error }, (_ ,&TokenTree::Token(_, MatchNt(_, nt))) if !nt_is_single_tt(nt) => - return only_simple_tokens(&ma[idx_a..]) && !need_disambiguation, + return if only_simple_tokens(&ma[idx_a..]) && !need_disambiguation { + Unsure + } else { Error }, _ => () } @@ -690,12 +711,17 @@ fn check_matcher_firsts(cx: &ExtCtxt, ma: &[TokenTree], mb: &[TokenTree]) -> boo continue } - (&TokenTree::Delimited(..), &TokenTree::Delimited(..)) => { + (&TokenTree::Delimited(_, ref d1), + &TokenTree::Delimited(_, ref d2)) => { // they have the same delim. as above. - // FIXME: we could search for disambiguation *inside* the - // delimited TTs - need_disambiguation = true; - continue + match check_matcher_firsts(cx, &d1.tts, &d2.tts) { + Ok => return Ok, + Unsure => continue, + Error => { + need_disambiguation = true; + continue + } + } } // cannot happen. either they're the same token or their FIRST sets @@ -713,12 +739,12 @@ fn check_matcher_firsts(cx: &ExtCtxt, ma: &[TokenTree], mb: &[TokenTree]) -> boo // reject conservatively. // FIXME: if we are not at the end of the other arm, and that the other // arm cannot derive empty, I think we could accept...? - false + Error } else { // either A is strictly included in B and the other inputs that match B // will never match A, or B is included in or equal to A, which means // it's unreachable. this is not our problem. accept. - true + Unsure } } diff --git a/src/libsyntax/feature_gate.rs b/src/libsyntax/feature_gate.rs index 27485ee65fcc0..13373d961359d 100644 --- a/src/libsyntax/feature_gate.rs +++ b/src/libsyntax/feature_gate.rs @@ -510,6 +510,11 @@ pub const KNOWN_ATTRIBUTES: &'static [(&'static str, AttributeType, AttributeGat across crates and will never be stable", cfg_fn!(rustc_attrs))), + ("unsafe_macro", Normal, Gated("rustc_attrs", + "unsafe_macro allows to write \ + non-future proof macros", + cfg_fn!(rustc_attrs))), + ("allow_internal_unstable", Normal, Gated("allow_internal_unstable", EXPLAIN_ALLOW_INTERNAL_UNSTABLE, cfg_fn!(allow_internal_unstable))), From 3eec2929765883fbac63497e8aa44c71a7cadf55 Mon Sep 17 00:00:00 2001 From: Leo Testard Date: Tue, 14 Jun 2016 15:11:38 +0200 Subject: [PATCH 06/10] Flag as unsafe several macros in the codebase for now. --- src/librustc/hir/mod.rs | 1 + src/librustc/lib.rs | 1 + src/librustc/macros.rs | 1 + src/librustc_metadata/lib.rs | 1 + src/librustc_metadata/macros.rs | 1 + src/librustc_mir/build/mod.rs | 1 + src/librustc_mir/lib.rs | 1 + src/libsyntax/feature_gate.rs | 1 + src/libsyntax/lib.rs | 1 + src/libsyntax/parse/parser.rs | 1 + 10 files changed, 10 insertions(+) diff --git a/src/librustc/hir/mod.rs b/src/librustc/hir/mod.rs index a139dd152f006..52fcb72e3bf35 100644 --- a/src/librustc/hir/mod.rs +++ b/src/librustc/hir/mod.rs @@ -55,6 +55,7 @@ use std::fmt; /// of `Vec` to avoid keeping extra capacity. pub type HirVec = P<[T]>; +#[rustc_unsafe_macro] macro_rules! hir_vec { ($elem:expr; $n:expr) => ( $crate::hir::HirVec::from(vec![$elem; $n]) diff --git a/src/librustc/lib.rs b/src/librustc/lib.rs index 48ea953cc1e8b..471df53e8f5c6 100644 --- a/src/librustc/lib.rs +++ b/src/librustc/lib.rs @@ -33,6 +33,7 @@ #![feature(libc)] #![feature(nonzero)] #![feature(quote)] +#![feature(rustc_attrs)] #![feature(rustc_diagnostic_macros)] #![feature(rustc_private)] #![feature(slice_patterns)] diff --git a/src/librustc/macros.rs b/src/librustc/macros.rs index 76dca1bb5b649..31e51e34ef5d4 100644 --- a/src/librustc/macros.rs +++ b/src/librustc/macros.rs @@ -8,6 +8,7 @@ // option. This file may not be copied, modified, or distributed // except according to those terms. +#[rustc_unsafe_macro] macro_rules! enum_from_u32 { ($(#[$attr:meta])* pub enum $name:ident { $($variant:ident = $e:expr,)* diff --git a/src/librustc_metadata/lib.rs b/src/librustc_metadata/lib.rs index cd92493e3db70..232728ff0f9c1 100644 --- a/src/librustc_metadata/lib.rs +++ b/src/librustc_metadata/lib.rs @@ -20,6 +20,7 @@ #![feature(box_patterns)] #![feature(enumset)] #![feature(quote)] +#![feature(rustc_attrs)] #![feature(rustc_diagnostic_macros)] #![feature(rustc_private)] #![feature(staged_api)] diff --git a/src/librustc_metadata/macros.rs b/src/librustc_metadata/macros.rs index ed764ebd9f95d..0d9a93531ac0f 100644 --- a/src/librustc_metadata/macros.rs +++ b/src/librustc_metadata/macros.rs @@ -8,6 +8,7 @@ // option. This file may not be copied, modified, or distributed // except according to those terms. +#[rustc_unsafe_macro] macro_rules! enum_from_u32 { ($(#[$attr:meta])* pub enum $name:ident { $($variant:ident = $e:expr,)* diff --git a/src/librustc_mir/build/mod.rs b/src/librustc_mir/build/mod.rs index 362e1e26fdf1e..238298fa42176 100644 --- a/src/librustc_mir/build/mod.rs +++ b/src/librustc_mir/build/mod.rs @@ -139,6 +139,7 @@ impl BlockAndExtension for BasicBlock { /// Update a block pointer and return the value. /// Use it like `let x = unpack!(block = self.foo(block, foo))`. +#[rustc_unsafe_macro] macro_rules! unpack { ($x:ident = $c:expr) => { { diff --git a/src/librustc_mir/lib.rs b/src/librustc_mir/lib.rs index 3d01d49c53472..150fabb714f92 100644 --- a/src/librustc_mir/lib.rs +++ b/src/librustc_mir/lib.rs @@ -22,6 +22,7 @@ Rust MIR: a lowered representation of Rust. Also: an experiment! #![feature(associated_consts)] #![feature(box_patterns)] +#![feature(rustc_attrs)] #![feature(rustc_diagnostic_macros)] #![feature(rustc_private)] #![feature(staged_api)] diff --git a/src/libsyntax/feature_gate.rs b/src/libsyntax/feature_gate.rs index 13373d961359d..cae845ea165fb 100644 --- a/src/libsyntax/feature_gate.rs +++ b/src/libsyntax/feature_gate.rs @@ -48,6 +48,7 @@ macro_rules! setter { }} } +#[rustc_unsafe_macro] macro_rules! declare_features { ($((active, $feature: ident, $ver: expr, $issue: expr)),+) => { /// Represents active features that are currently being implemented or diff --git a/src/libsyntax/lib.rs b/src/libsyntax/lib.rs index 8febf1c49ec2b..9adca025238e9 100644 --- a/src/libsyntax/lib.rs +++ b/src/libsyntax/lib.rs @@ -28,6 +28,7 @@ #![feature(const_fn)] #![feature(filling_drop)] #![feature(libc)] +#![feature(rustc_attrs)] #![feature(rustc_private)] #![feature(staged_api)] #![feature(str_escape)] diff --git a/src/libsyntax/parse/parser.rs b/src/libsyntax/parse/parser.rs index a06270bb7727a..5ad4d37da9022 100644 --- a/src/libsyntax/parse/parser.rs +++ b/src/libsyntax/parse/parser.rs @@ -148,6 +148,7 @@ macro_rules! maybe_whole_expr { } /// As maybe_whole_expr, but for things other than expressions +#[rustc_unsafe_macro] macro_rules! maybe_whole { ($p:expr, $constructor:ident) => ( { From 20e1fd9195b9c79db98eb6901b393d22ee9bbc31 Mon Sep 17 00:00:00 2001 From: Leo Testard Date: Tue, 28 Jun 2016 18:12:06 +0200 Subject: [PATCH 07/10] [WIP] Fix sequence repetitions. --- src/libcollections/macros.rs | 2 - src/librustc/hir/mod.rs | 1 - src/librustc/macros.rs | 1 - src/librustc_metadata/macros.rs | 1 - src/libsyntax/ext/tt/macro_rules.rs | 113 +++++++++++++++++++++++++--- src/libsyntax/feature_gate.rs | 1 - 6 files changed, 102 insertions(+), 17 deletions(-) diff --git a/src/libcollections/macros.rs b/src/libcollections/macros.rs index 62094cff51e6b..d6a8362d58182 100644 --- a/src/libcollections/macros.rs +++ b/src/libcollections/macros.rs @@ -42,7 +42,6 @@ #[macro_export] #[stable(feature = "rust1", since = "1.0.0")] #[allow_internal_unstable] -#[rustc_unsafe_macro] macro_rules! vec { ($elem:expr; $n:expr) => ( $crate::vec::from_elem($elem, $n) @@ -58,7 +57,6 @@ macro_rules! vec { // `slice::into_vec` function which is only available with cfg(test) // NB see the slice::hack module in slice.rs for more information #[cfg(test)] -#[rustc_unsafe_macro] macro_rules! vec { ($elem:expr; $n:expr) => ( $crate::vec::from_elem($elem, $n) diff --git a/src/librustc/hir/mod.rs b/src/librustc/hir/mod.rs index 52fcb72e3bf35..a139dd152f006 100644 --- a/src/librustc/hir/mod.rs +++ b/src/librustc/hir/mod.rs @@ -55,7 +55,6 @@ use std::fmt; /// of `Vec` to avoid keeping extra capacity. pub type HirVec = P<[T]>; -#[rustc_unsafe_macro] macro_rules! hir_vec { ($elem:expr; $n:expr) => ( $crate::hir::HirVec::from(vec![$elem; $n]) diff --git a/src/librustc/macros.rs b/src/librustc/macros.rs index 31e51e34ef5d4..76dca1bb5b649 100644 --- a/src/librustc/macros.rs +++ b/src/librustc/macros.rs @@ -8,7 +8,6 @@ // option. This file may not be copied, modified, or distributed // except according to those terms. -#[rustc_unsafe_macro] macro_rules! enum_from_u32 { ($(#[$attr:meta])* pub enum $name:ident { $($variant:ident = $e:expr,)* diff --git a/src/librustc_metadata/macros.rs b/src/librustc_metadata/macros.rs index 0d9a93531ac0f..ed764ebd9f95d 100644 --- a/src/librustc_metadata/macros.rs +++ b/src/librustc_metadata/macros.rs @@ -8,7 +8,6 @@ // option. This file may not be copied, modified, or distributed // except according to those terms. -#[rustc_unsafe_macro] macro_rules! enum_from_u32 { ($(#[$attr:meta])* pub enum $name:ident { $($variant:ident = $e:expr,)* diff --git a/src/libsyntax/ext/tt/macro_rules.rs b/src/libsyntax/ext/tt/macro_rules.rs index b69b2a22f9cbe..df738a79d0d2e 100644 --- a/src/libsyntax/ext/tt/macro_rules.rs +++ b/src/libsyntax/ext/tt/macro_rules.rs @@ -26,7 +26,7 @@ use tokenstream::{self, TokenTree}; use util::small_vector::SmallVector; use std::cell::RefCell; -use std::collections::{HashMap}; +use std::collections::{HashMap, HashSet}; use std::collections::hash_map::{Entry}; struct ParserAnyMacro<'a> { @@ -366,7 +366,7 @@ fn check_lhs_firsts(cx: &ExtCtxt, lhs: &TokenTree, lhs_: &TokenTree) match (lhs, lhs_) { (&TokenTree::Delimited(_, ref tta), &TokenTree::Delimited(_, ref ttb)) => - check_matcher_firsts(cx, &tta.tts, &ttb.tts), + check_matcher_firsts(cx, &tta.tts, &ttb.tts, &mut HashSet::new()), _ => cx.span_bug(lhs.get_span(), "malformed macro lhs") } } @@ -593,7 +593,8 @@ enum AnalysisResult { Error } -fn check_matcher_firsts(cx: &ExtCtxt, ma: &[TokenTree], mb: &[TokenTree]) +fn check_matcher_firsts(cx: &ExtCtxt, ma: &[TokenTree], mb: &[TokenTree], + visited_spans: &mut HashSet<(Span, Span)>) -> AnalysisResult { use self::AnalysisResult::*; let mut need_disambiguation = false; @@ -611,7 +612,13 @@ fn check_matcher_firsts(cx: &ExtCtxt, ma: &[TokenTree], mb: &[TokenTree]) // matches A will never match B or vice-versa // * we find a case that is too complex to handle and reject it // * we reach the end of the macro - for ((idx_a, ta), tb) in ma.iter().enumerate().zip(mb.iter()) { + for ((idx_a, ta), (idx_b, tb)) in ma.iter().enumerate().zip(mb.iter().enumerate()) { + if visited_spans.contains(&(ta.get_span(), tb.get_span())) { + break + } + + visited_spans.insert((ta.get_span(), tb.get_span())); + if match_same_input(ta, tb) { continue; } @@ -625,6 +632,96 @@ fn check_matcher_firsts(cx: &ExtCtxt, ma: &[TokenTree], mb: &[TokenTree]) // not tt, ident, or block (that is, either A or B could match several // token trees), we cannot know where we should continue the analysis. match (ta, tb) { + (&TokenTree::Sequence(sp_a, ref seq_a), &TokenTree::Sequence(sp_b, ref seq_b)) => { + let new_tt_a = TokenTree::Sequence(sp_a, tokenstream::SequenceRepetition { op: tokenstream::KleeneOp::ZeroOrMore, .. seq_a.clone() }); + let mut new_a = seq_a.tts.iter().map(|x| x.clone()).collect::>(); + if let Some(ref sep) = seq_a.separator { new_a.push(TokenTree::Token(sp_a, sep.clone())) }; + new_a.push(new_tt_a); + new_a.extend(ma[idx_a + 1 ..].iter().map(|x| x.clone())); + + let new_tt_b = TokenTree::Sequence(sp_b, tokenstream::SequenceRepetition { op: tokenstream::KleeneOp::ZeroOrMore, .. seq_b.clone() }); + let mut new_b = seq_b.tts.iter().map(|x| x.clone()).collect::>(); + if let Some(ref sep) = seq_b.separator { new_b.push(TokenTree::Token(sp_b, sep.clone())) }; + new_b.push(new_tt_b); + new_b.extend(mb[idx_b + 1 ..].iter().map(|x| x.clone())); + + let mut ret = match check_matcher_firsts(cx, &new_a, &mb[idx_b ..], visited_spans) { + Error => return Error, + ret => ret + }; + + match check_matcher_firsts(cx, &ma[idx_a ..], &new_b, visited_spans) { + Error => return Error, + Unsure => ret = Unsure, + Ok => () + }; + + if seq_a.op == tokenstream::KleeneOp::ZeroOrMore { + match check_matcher_firsts(cx, &ma[idx_a + 1 ..], &mb[idx_b ..], visited_spans) { + Error => return Error, + Unsure => ret = Unsure, + Ok => () + }; + } + + if seq_b.op == tokenstream::KleeneOp::ZeroOrMore { + match check_matcher_firsts(cx, &ma[idx_a ..], &mb[idx_b + 1 ..], visited_spans) { + Error => return Error, + Unsure => ret = Unsure, + Ok => () + }; + } + + return ret; + } + + (&TokenTree::Sequence(sp, ref seq), _) => { + // unroll 1 step. + let new_tt = TokenTree::Sequence(sp, tokenstream::SequenceRepetition { op: tokenstream::KleeneOp::ZeroOrMore, .. seq.clone() }); + let mut new_a = seq.tts.iter().map(|x| x.clone()).collect::>(); + if let Some(ref sep) = seq.separator { new_a.push(TokenTree::Token(sp, sep.clone())) }; + new_a.push(new_tt); + new_a.extend(ma[idx_a + 1 ..].iter().map(|x| x.clone())); + let mut ret = match check_matcher_firsts(cx, &new_a, &mb[idx_b ..], visited_spans) { + Error => return Error, + ret => ret + }; + + if seq.op == tokenstream::KleeneOp::ZeroOrMore { + match check_matcher_firsts(cx, &ma[idx_a + 1 ..], &mb[idx_b ..], visited_spans) { + Error => return Error, + Unsure => ret = Unsure, + Ok => () + }; + } + + return ret; + } + + (_, &TokenTree::Sequence(sp, ref seq)) => { + // unroll 1 step. + let new_tt = TokenTree::Sequence(sp, tokenstream::SequenceRepetition { op: tokenstream::KleeneOp::ZeroOrMore, .. seq.clone() }); + let mut new_b = seq.tts.iter().map(|x| x.clone()).collect::>(); + if let Some(ref sep) = seq.separator { new_b.push(TokenTree::Token(sp, sep.clone())) }; + new_b.push(new_tt); + new_b.extend(mb[idx_b + 1 ..].iter().map(|x| x.clone())); + let mut ret = match check_matcher_firsts(cx, &ma[idx_a ..], &new_b, visited_spans) { + Error => return Error, + ret => ret + }; + + if seq.op == tokenstream::KleeneOp::ZeroOrMore { + match check_matcher_firsts(cx, &ma[idx_a ..], &mb[idx_b + 1 ..], visited_spans) { + Error => return Error, + Unsure => ret = Unsure, + Ok => () + }; + + } + + return ret; + } + (&TokenTree::Token(_, MatchNt(_, nta)), &TokenTree::Token(_, MatchNt(_, ntb))) => if !(nt_is_single_tt(nta) && nt_is_single_tt(ntb)) { @@ -638,12 +735,6 @@ fn check_matcher_firsts(cx: &ExtCtxt, ma: &[TokenTree], mb: &[TokenTree]) // followed by the end of the macro invocation, then we can accept // it. - (&TokenTree::Sequence(_, _), _) | - (_, &TokenTree::Sequence(_, _)) => - return if only_simple_tokens(&ma[idx_a..]) && !need_disambiguation { - Unsure - } else { Error }, - (_ ,&TokenTree::Token(_, MatchNt(_, nt))) if !nt_is_single_tt(nt) => return if only_simple_tokens(&ma[idx_a..]) && !need_disambiguation { Unsure @@ -714,7 +805,7 @@ fn check_matcher_firsts(cx: &ExtCtxt, ma: &[TokenTree], mb: &[TokenTree]) (&TokenTree::Delimited(_, ref d1), &TokenTree::Delimited(_, ref d2)) => { // they have the same delim. as above. - match check_matcher_firsts(cx, &d1.tts, &d2.tts) { + match check_matcher_firsts(cx, &d1.tts, &d2.tts, visited_spans) { Ok => return Ok, Unsure => continue, Error => { diff --git a/src/libsyntax/feature_gate.rs b/src/libsyntax/feature_gate.rs index cae845ea165fb..13373d961359d 100644 --- a/src/libsyntax/feature_gate.rs +++ b/src/libsyntax/feature_gate.rs @@ -48,7 +48,6 @@ macro_rules! setter { }} } -#[rustc_unsafe_macro] macro_rules! declare_features { ($((active, $feature: ident, $ver: expr, $issue: expr)),+) => { /// Represents active features that are currently being implemented or From c127c327fc60f09f2203b4e3c42385d6a7744ca3 Mon Sep 17 00:00:00 2001 From: Leo Testard Date: Mon, 4 Jul 2016 16:18:55 +0200 Subject: [PATCH 08/10] Make sure `need_disambiguation` is taken into account when checking sequence repetitions. --- src/libsyntax/ext/tt/macro_rules.rs | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/src/libsyntax/ext/tt/macro_rules.rs b/src/libsyntax/ext/tt/macro_rules.rs index df738a79d0d2e..7ef39327bafd4 100644 --- a/src/libsyntax/ext/tt/macro_rules.rs +++ b/src/libsyntax/ext/tt/macro_rules.rs @@ -672,7 +672,11 @@ fn check_matcher_firsts(cx: &ExtCtxt, ma: &[TokenTree], mb: &[TokenTree], }; } - return ret; + return match ret { + Ok => Ok, + Unsure => if need_disambiguation { Error } else { Unsure }, + Error => Error + }; } (&TokenTree::Sequence(sp, ref seq), _) => { @@ -695,7 +699,11 @@ fn check_matcher_firsts(cx: &ExtCtxt, ma: &[TokenTree], mb: &[TokenTree], }; } - return ret; + return match ret { + Ok => Ok, + Unsure => if need_disambiguation { Error } else { Unsure }, + Error => Error + }; } (_, &TokenTree::Sequence(sp, ref seq)) => { @@ -719,7 +727,11 @@ fn check_matcher_firsts(cx: &ExtCtxt, ma: &[TokenTree], mb: &[TokenTree], } - return ret; + return match ret { + Ok => Ok, + Unsure => if need_disambiguation { Error } else { Unsure }, + Error => Error + }; } (&TokenTree::Token(_, MatchNt(_, nta)), From 02f1e17ab01fc20ccdb74a1949e5953d9dbdb6b7 Mon Sep 17 00:00:00 2001 From: Leo Testard Date: Tue, 12 Jul 2016 15:26:11 +0200 Subject: [PATCH 09/10] A `tt` matcher in the second arm should not always be accepted. A `tt` matcher in the second arm versus a `block` matcher in the first arm is not future-proof. Also clean-up a bit the code for the single-TT special cases. --- src/libsyntax/ext/tt/macro_rules.rs | 322 +++++++++++++--------------- 1 file changed, 154 insertions(+), 168 deletions(-) diff --git a/src/libsyntax/ext/tt/macro_rules.rs b/src/libsyntax/ext/tt/macro_rules.rs index 7ef39327bafd4..88165ad68dc6f 100644 --- a/src/libsyntax/ext/tt/macro_rules.rs +++ b/src/libsyntax/ext/tt/macro_rules.rs @@ -386,13 +386,15 @@ fn match_same_input(ma: &TokenTree, mb: &TokenTree) -> bool { delima.tts.iter().zip(delimb.tts.iter()) .all(|(ref t1, ref t2)| match_same_input(t1, t2)) } - (&TokenTree::Sequence(_, ref seqa), - &TokenTree::Sequence(_, ref seqb)) => { - seqa.separator == seqb.separator && - seqa.op == seqb.op && - seqa.tts.iter().zip(seqb.tts.iter()) - .all(|(ref t1, ref t2)| match_same_input(t1, t2)) - } + // we cannot consider that sequences match the same input + // they need to be checked specially. + // (&TokenTree::Sequence(_, ref seqa), + // &TokenTree::Sequence(_, ref seqb)) => { + // seqa.separator == seqb.separator && + // seqa.op == seqb.op && + // seqa.tts.iter().zip(seqb.tts.iter()) + // .all(|(ref t1, ref t2)| match_same_input(t1, t2)) + // } _ => false } } @@ -593,12 +595,51 @@ enum AnalysisResult { Error } +impl AnalysisResult { + fn chain AnalysisResult>(self, mut next: F) -> AnalysisResult { + if let AnalysisResult::Error = self { return self }; + match next() { + AnalysisResult::Ok => self, + ret => ret + } + } +} + +fn unroll_sequence<'a>(sp: Span, seq: &tokenstream::SequenceRepetition, + next: &[TokenTree]) -> Vec { + let mut ret = seq.tts.to_vec(); + seq.separator.clone().map(|sep| ret.push(TokenTree::Token(sp, sep))); + ret.push(TokenTree::Sequence( + // clone a sequence. change $(...)+ into $(...)* + sp, tokenstream::SequenceRepetition { + op: tokenstream::KleeneOp::ZeroOrMore, + .. seq.clone() + } + )); + ret.extend_from_slice(next); + ret +} + +fn check_sequence(sp: Span, seq: &tokenstream::SequenceRepetition, + next: &[TokenTree], against: &[TokenTree], mut callback: F) + -> AnalysisResult + where F: FnMut(&[TokenTree], &[TokenTree]) -> AnalysisResult { + let unrolled = unroll_sequence(sp, seq, next); + let ret = callback(&unrolled, against); + + if seq.op == tokenstream::KleeneOp::ZeroOrMore { + ret.chain(|| callback(next, against)) + } else { ret } +} + fn check_matcher_firsts(cx: &ExtCtxt, ma: &[TokenTree], mb: &[TokenTree], visited_spans: &mut HashSet<(Span, Span)>) -> AnalysisResult { use self::AnalysisResult::*; let mut need_disambiguation = false; + //println!("running on {:?} <-> {:?}", ma, mb); + // first compute the FIRST sets. FIRST sets for tokens, delimited TTs and NT // matchers are fixed, this will compute the FIRST sets for all sequence TTs // that appear in the matcher. Note that if a sequence starts with a matcher, @@ -612,211 +653,142 @@ fn check_matcher_firsts(cx: &ExtCtxt, ma: &[TokenTree], mb: &[TokenTree], // matches A will never match B or vice-versa // * we find a case that is too complex to handle and reject it // * we reach the end of the macro - for ((idx_a, ta), (idx_b, tb)) in ma.iter().enumerate().zip(mb.iter().enumerate()) { + let iter_a = ma.iter().enumerate(); + let iter_b = mb.iter().enumerate(); + let mut iter = iter_a.clone().zip(iter_b.clone()); + while let Some(((idx_a, ta), (idx_b, tb))) = iter.next() { if visited_spans.contains(&(ta.get_span(), tb.get_span())) { - break + return if need_disambiguation { Error } else { Unsure }; } visited_spans.insert((ta.get_span(), tb.get_span())); - if match_same_input(ta, tb) { - continue; - } + // sequence analysis - if first_sets_disjoints(&ta, &tb, &firsts_a, &firsts_b) { - // accept the macro - return Ok - } - - // i.e. A or B is either a repeated sequence or a NT matcher that is - // not tt, ident, or block (that is, either A or B could match several - // token trees), we cannot know where we should continue the analysis. match (ta, tb) { - (&TokenTree::Sequence(sp_a, ref seq_a), &TokenTree::Sequence(sp_b, ref seq_b)) => { - let new_tt_a = TokenTree::Sequence(sp_a, tokenstream::SequenceRepetition { op: tokenstream::KleeneOp::ZeroOrMore, .. seq_a.clone() }); - let mut new_a = seq_a.tts.iter().map(|x| x.clone()).collect::>(); - if let Some(ref sep) = seq_a.separator { new_a.push(TokenTree::Token(sp_a, sep.clone())) }; - new_a.push(new_tt_a); - new_a.extend(ma[idx_a + 1 ..].iter().map(|x| x.clone())); - - let new_tt_b = TokenTree::Sequence(sp_b, tokenstream::SequenceRepetition { op: tokenstream::KleeneOp::ZeroOrMore, .. seq_b.clone() }); - let mut new_b = seq_b.tts.iter().map(|x| x.clone()).collect::>(); - if let Some(ref sep) = seq_b.separator { new_b.push(TokenTree::Token(sp_b, sep.clone())) }; - new_b.push(new_tt_b); - new_b.extend(mb[idx_b + 1 ..].iter().map(|x| x.clone())); - - let mut ret = match check_matcher_firsts(cx, &new_a, &mb[idx_b ..], visited_spans) { - Error => return Error, - ret => ret - }; - - match check_matcher_firsts(cx, &ma[idx_a ..], &new_b, visited_spans) { - Error => return Error, - Unsure => ret = Unsure, - Ok => () - }; - - if seq_a.op == tokenstream::KleeneOp::ZeroOrMore { - match check_matcher_firsts(cx, &ma[idx_a + 1 ..], &mb[idx_b ..], visited_spans) { - Error => return Error, - Unsure => ret = Unsure, - Ok => () - }; - } - - if seq_b.op == tokenstream::KleeneOp::ZeroOrMore { - match check_matcher_firsts(cx, &ma[idx_a ..], &mb[idx_b + 1 ..], visited_spans) { - Error => return Error, - Unsure => ret = Unsure, - Ok => () - }; - } + (&TokenTree::Sequence(sp_a, ref seq_a), + &TokenTree::Sequence(sp_b, ref seq_b)) => { + let mut ret = check_sequence(sp_a, seq_a, &ma[idx_a + 1 ..], &mb[idx_b ..], |u, a| { + check_matcher_firsts(cx, u, a, visited_spans) + }); + + ret = ret.chain(|| { + check_sequence(sp_b, seq_b, &mb[idx_b + 1 ..], &ma[idx_a ..], |u, a| { + check_matcher_firsts(cx, a, u, visited_spans) + }) + }); return match ret { - Ok => Ok, Unsure => if need_disambiguation { Error } else { Unsure }, - Error => Error + _ => ret }; } (&TokenTree::Sequence(sp, ref seq), _) => { - // unroll 1 step. - let new_tt = TokenTree::Sequence(sp, tokenstream::SequenceRepetition { op: tokenstream::KleeneOp::ZeroOrMore, .. seq.clone() }); - let mut new_a = seq.tts.iter().map(|x| x.clone()).collect::>(); - if let Some(ref sep) = seq.separator { new_a.push(TokenTree::Token(sp, sep.clone())) }; - new_a.push(new_tt); - new_a.extend(ma[idx_a + 1 ..].iter().map(|x| x.clone())); - let mut ret = match check_matcher_firsts(cx, &new_a, &mb[idx_b ..], visited_spans) { - Error => return Error, - ret => ret - }; - - if seq.op == tokenstream::KleeneOp::ZeroOrMore { - match check_matcher_firsts(cx, &ma[idx_a + 1 ..], &mb[idx_b ..], visited_spans) { - Error => return Error, - Unsure => ret = Unsure, - Ok => () - }; - } + let ret = check_sequence(sp, seq, &ma[idx_a + 1 ..], &mb[idx_b ..], |u, a| { + check_matcher_firsts(cx, u, a, visited_spans) + }); return match ret { - Ok => Ok, Unsure => if need_disambiguation { Error } else { Unsure }, - Error => Error + _ => ret }; } (_, &TokenTree::Sequence(sp, ref seq)) => { - // unroll 1 step. - let new_tt = TokenTree::Sequence(sp, tokenstream::SequenceRepetition { op: tokenstream::KleeneOp::ZeroOrMore, .. seq.clone() }); - let mut new_b = seq.tts.iter().map(|x| x.clone()).collect::>(); - if let Some(ref sep) = seq.separator { new_b.push(TokenTree::Token(sp, sep.clone())) }; - new_b.push(new_tt); - new_b.extend(mb[idx_b + 1 ..].iter().map(|x| x.clone())); - let mut ret = match check_matcher_firsts(cx, &ma[idx_a ..], &new_b, visited_spans) { - Error => return Error, - ret => ret - }; - - if seq.op == tokenstream::KleeneOp::ZeroOrMore { - match check_matcher_firsts(cx, &ma[idx_a ..], &mb[idx_b + 1 ..], visited_spans) { - Error => return Error, - Unsure => ret = Unsure, - Ok => () - }; - - } + let ret = check_sequence(sp, seq, &mb[idx_b + 1 ..], &ma[idx_a ..], |u, a| { + check_matcher_firsts(cx, a, u, visited_spans) + }); return match ret { - Ok => Ok, Unsure => if need_disambiguation { Error } else { Unsure }, - Error => Error + _ => ret }; } - (&TokenTree::Token(_, MatchNt(_, nta)), - &TokenTree::Token(_, MatchNt(_, ntb))) => - if !(nt_is_single_tt(nta) && nt_is_single_tt(ntb)) { - return Error - }, + _ => () + } - (&TokenTree::Token(_, MatchNt(_, nt)), _) if !nt_is_single_tt(nt) => - return Error, + if match_same_input(ta, tb) { + continue; + } - // super specific corner case: if one arm is always one token, - // followed by the end of the macro invocation, then we can accept - // it. + if first_sets_disjoints(&ta, &tb, &firsts_a, &firsts_b) { + // accept the macro + return Ok + } + + // now we cannot say anything in the general case but we can + // still look if we are in a particular case we know how to handle... + match (ta, tb) { + (&TokenTree::Sequence(_, _), _) | + (_, &TokenTree::Sequence(_, _)) => + // cannot happen since we treated sequences earlier + cx.bug("unexpeceted seq"), (_ ,&TokenTree::Token(_, MatchNt(_, nt))) if !nt_is_single_tt(nt) => return if only_simple_tokens(&ma[idx_a..]) && !need_disambiguation { Unsure } else { Error }, - _ => () - } + // first case: NT vs _. + // invariant: B is always a single-TT + + (&TokenTree::Token(_, MatchNt(_, nt)), _) + // ident or tt will never start matching more input + if nt.name.as_str() == "ident" || + nt.name.as_str() == "tt" => continue, + + (&TokenTree::Token(_, MatchNt(_, nt)), _) + if nt.name.as_str() == "block" => { + match tb { + &TokenTree::Delimited(_, ref delim) + if delim.delim == token::DelimToken::Brace => { + // we cannot say much here. we cannot look inside. we + // can just hope we will find an obvious disambiguation later + need_disambiguation = true; + continue; + } + &TokenTree::Token(_, MatchNt(_, nt)) + if nt.name.as_str() == "tt" => { + // same + need_disambiguation = true; + continue; + } + // should be the only possibility. + _ => cx.bug("unexpected matcher against block") + } + } - // A and B always both match a single TT - match (ta, tb) { - (&TokenTree::Sequence(_, _), _) | - (_, &TokenTree::Sequence(_, _)) => - // cannot happen since sequences are not always a single-TT - cx.bug("unexpeceted seq"), + (&TokenTree::Token(_, MatchNt(_, _)), _) => + // A is a NT matcher that is not tt, ident, or block (that is, A + // could match several token trees), we cannot know where we + // should continue the analysis. + return Error, - (&TokenTree::Token(_, MatchNt(_, nt)), _) | - (_ ,&TokenTree::Token(_, MatchNt(_, nt))) - if nt.name.as_str() == "tt" => - // this is okay for now, either A will always have priority, - // either B will always be unreachable. but search for errors - // further - continue, - - (&TokenTree::Token(_, MatchNt(_, _)), - &TokenTree::Token(_, MatchNt(_, _))) => - // this case cannot happen. the only NTs that are a single-TT - // and that are not tt are ident and block, that do not share any - // FIRST token. - cx.bug("unexpected NT vs. NT"), - - (&TokenTree::Token(_, MatchNt(_, nt)), &TokenTree::Token(_, Ident(_))) | - (&TokenTree::Token(_, Ident(_)), &TokenTree::Token(_, MatchNt(_, nt))) => - if nt.name.as_str() == "ident" { - // NT ident vs token ident. it's the same as with tt: - // either A is included, either B is unreachable - continue - } else { - // the only possible NT here is ident because the only token in - // the FIRST set of block is {, and { is not seen as a token but - // as the beginning of a Delim - // the only possible token is thus an hardcoded identifier or - // keyword. so the only possible case of NT vs. Token is the - // the case above. - cx.bug("unexpeceted NT vs. Token") - }, - - (&TokenTree::Token(_, MatchNt(_, nt)), - &TokenTree::Delimited(_, ref delim)) => { - // should be the only possibility. - assert!(nt.name.as_str() == "block" && - delim.delim == token::DelimToken::Brace); - // we cannot say much here. we cannot look inside. we - // can just hope we will find an obvious disambiguation later - need_disambiguation = true; - continue + // second case: T vs _. + // both A and B are always a single-TT + + (&TokenTree::Token(..), &TokenTree::Token(_, MatchNt(_, nt))) => { + assert!(nt.name.as_str() == "ident" || nt.name.as_str() == "tt"); + // the token will never match new input + continue; } (&TokenTree::Delimited(_, ref delim), - &TokenTree::Token(_, MatchNt(_, nt))) => { - assert!(nt.name.as_str() == "block" && - delim.delim == token::DelimToken::Brace); + &TokenTree::Token(_, MatchNt(_, _))) => { + // either block vs { } or tt vs any delim. // as with several-TTs NTs, if the above is only // made of simple tokens this is ok... need_disambiguation |= !only_simple_tokens(&delim.tts); - continue + continue; } (&TokenTree::Delimited(_, ref d1), &TokenTree::Delimited(_, ref d2)) => { // they have the same delim. as above. + assert!(d1.delim == d2.delim); + // descend into delimiters. match check_matcher_firsts(cx, &d1.tts, &d2.tts, visited_spans) { Ok => return Ok, Unsure => continue, @@ -827,8 +799,8 @@ fn check_matcher_firsts(cx: &ExtCtxt, ma: &[TokenTree], mb: &[TokenTree], } } - // cannot happen. either they're the same token or their FIRST sets - // are disjoint. + // cannot happen. either they're the same + // token or their FIRST sets are disjoint. (&TokenTree::Token(..), &TokenTree::Token(..)) | (&TokenTree::Token(..), &TokenTree::Delimited(..)) | (&TokenTree::Delimited(..), &TokenTree::Token(..)) => @@ -837,11 +809,25 @@ fn check_matcher_firsts(cx: &ExtCtxt, ma: &[TokenTree], mb: &[TokenTree], } // now we are at the end of one arm: + // we know that the last element on this arm was not a sequence (we would + // have returned earlier), so it cannot accept new input at this point. + // if the other arm always accept new input, that is, if it cannot accept + // the end of stream, then this is a disambiguation. + let (ma, mb): (Vec<_>, Vec<_>) = iter.unzip(); + for &(_, tt) in if ma.len() == 0 { mb.iter() } else { ma.iter() } { + match tt { + &TokenTree::Sequence(_, ref seq) + if seq.op == tokenstream::KleeneOp::ZeroOrMore => continue, + _ => + // this arm still expects input, while the other can't. + // use this as a disambiguation + return Ok + } + } + if need_disambiguation { // we couldn't find any. we cannot say anything about those arms. // reject conservatively. - // FIXME: if we are not at the end of the other arm, and that the other - // arm cannot derive empty, I think we could accept...? Error } else { // either A is strictly included in B and the other inputs that match B From 3a1828f31abffeb2293ee89e28c4b6e2f2e65a74 Mon Sep 17 00:00:00 2001 From: Leo Testard Date: Mon, 25 Jul 2016 14:34:05 +0200 Subject: [PATCH 10/10] Consider `tt` as a "simple token". This means a matcher will be accepted if it only contains tokens, `ident` or `tt` NT matchers, or delimited/sequences of them. This is valid beacuse `tt` will never start matching more input that matches the second matcher. --- src/libsyntax/ext/tt/macro_rules.rs | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/libsyntax/ext/tt/macro_rules.rs b/src/libsyntax/ext/tt/macro_rules.rs index 88165ad68dc6f..0c79e215555dc 100644 --- a/src/libsyntax/ext/tt/macro_rules.rs +++ b/src/libsyntax/ext/tt/macro_rules.rs @@ -837,10 +837,13 @@ fn check_matcher_firsts(cx: &ExtCtxt, ma: &[TokenTree], mb: &[TokenTree], } } -// checks that a matcher does not contain any NT except ident +// checks that a matcher does not contain any NT except ident or TT. +// that is, that it will never start matching new input fn only_simple_tokens(m: &[TokenTree]) -> bool { m.iter().all(|tt| match *tt { - TokenTree::Token(_, MatchNt(_, nt)) => nt.name.as_str() == "ident", + TokenTree::Token(_, MatchNt(_, nt)) => + nt.name.as_str() == "ident" || + nt.name.as_str() == "tt", TokenTree::Token(..) => true, TokenTree::Delimited(_, ref delim) => only_simple_tokens(&delim.tts), TokenTree::Sequence(_, ref seq) => only_simple_tokens(&seq.tts)