Return earlier in some cases in collect_token.

nnethercote · nnethercote · commit 1ae521e9d57b · 2024-08-23T14:40:08.000+10:00
This example triggers an assertion failure: ``` fn f() -> u32 { #[cfg_eval] #[cfg(not(FALSE))] 0 } ``` The sequence of events: - `configure_annotatable` calls `parse_expr_force_collect`, which calls `collect_tokens`. - Within that, we end up in `parse_expr_dot_or_call`, which again calls `collect_tokens`. - The return value of the `f` call is the expression `0`. - This inner call collects tokens for `0` (parser range 10..11) and creates a replacement covering `#[cfg(not(FALSE))] 0` (parser range 0..11). - We return to the outer `collect_tokens` call. The return value of the `f` call is *again* the expression `0`, again with the range 10..11, but the replacement from earlier covers the range 0..11. The code mistakenly assumes that any attributes from an inner `collect_tokens` call fit entirely within the body of the result of an outer `collect_tokens` call. So it adjusts the replacement parser range 0..11 to a node range by subtracting 10, resulting in -10..1. This is an invalid range and triggers an assertion failure. It's tricky to follow, but basically things get complicated when an AST node is returned from an inner `collect_tokens` call and then returned again from an outer `collect_token` node without being wrapped in any kind of additional layer. This commit changes `collect_tokens` to return early in some extra cases, avoiding the construction of lazy tokens. In the example above, the outer `collect_tokens` returns earlier because the `0` token already has tokens and `self.capture_state.capturing` is `Capturing::No`. This early return avoids the creation of the invalid range and the assertion failure. Fixes rust-lang#129166. Note: these invalid ranges have been happening for a long time. rust-lang#128725 looks like it's at fault only because it introduced the assertion that catches the invalid ranges.
diff --git a/compiler/rustc_parse/src/parser/attr_wrapper.rs b/compiler/rustc_parse/src/parser/attr_wrapper.rs
@@ -234,6 +234,8 @@ impl<'a> Parser<'a> {
         force_collect: ForceCollect,
         f: impl FnOnce(&mut Self, AttrVec) -> PResult<'a, (R, Trailing, UsePreAttrPos)>,
     ) -> PResult<'a, R> {
+        let possible_capture_mode = self.capture_cfg;
+
         // We must collect if anything could observe the collected tokens, i.e.
         // if any of the following conditions hold.
         // - We are force collecting tokens (because force collection requires
@@ -244,9 +246,9 @@ impl<'a> Parser<'a> {
             // - Our target supports custom inner attributes (custom
             //   inner attribute invocation might require token capturing).
             || R::SUPPORTS_CUSTOM_INNER_ATTRS
-            // - We are in `capture_cfg` mode (which requires tokens if
+            // - We are in "possible capture mode" (which requires tokens if
             //   the parsed node has `#[cfg]` or `#[cfg_attr]` attributes).
-            || self.capture_cfg;
+            || possible_capture_mode;
         if !needs_collection {
             return Ok(f(self, attrs.attrs)?.0);
         }
@@ -267,7 +269,7 @@ impl<'a> Parser<'a> {
             res?
         };
 
-        // When we're not in `capture_cfg` mode, then skip collecting and
+        // When we're not in "definite capture mode", then skip collecting and
         // return early if either of the following conditions hold.
         // - `None`: Our target doesn't support tokens at all (e.g. `NtIdent`).
         // - `Some(Some(_))`: Our target already has tokens set (e.g. we've
@@ -278,7 +280,10 @@ impl<'a> Parser<'a> {
         // Note that this check is independent of `force_collect`. There's no
         // need to collect tokens when we don't support tokens or already have
         // tokens.
-        if !self.capture_cfg && matches!(ret.tokens_mut(), None | Some(Some(_))) {
+        let definite_capture_mode = self.capture_cfg
+            && matches!(self.capture_state.capturing, Capturing::Yes)
+            && has_cfg_or_cfg_attr(ret.attrs());
+        if !definite_capture_mode && matches!(ret.tokens_mut(), None | Some(Some(_))) {
             return Ok(ret);
         }
 
@@ -298,11 +303,11 @@ impl<'a> Parser<'a> {
             //   the earlier `needs_tokens` check, and we don't need to
             //   check `R::SUPPORTS_CUSTOM_INNER_ATTRS`.)
             || needs_tokens(ret.attrs())
-            // - We are in `capture_cfg` mode and there are `#[cfg]` or
-            //   `#[cfg_attr]` attributes. (During normal non-`capture_cfg`
-            //   parsing, we don't need any special capturing for those
-            //   attributes, because they're builtin.)
-            || (self.capture_cfg && has_cfg_or_cfg_attr(ret.attrs()));
+            // - We are in "definite capture mode", which requires that there
+            //   are `#[cfg]` or `#[cfg_attr]` attributes. (During normal
+            //   non-`capture_cfg` parsing, we don't need any special capturing
+            //   for those attributes, because they're builtin.)
+            || definite_capture_mode;
         if !needs_collection {
             return Ok(ret);
         }
@@ -399,20 +404,18 @@ impl<'a> Parser<'a> {
             break_last_token: self.break_last_token,
             node_replacements,
         });
+        let mut tokens_used = false;
 
         // If we support tokens and don't already have them, store the newly captured tokens.
         if let Some(target_tokens @ None) = ret.tokens_mut() {
+            tokens_used = true;
             *target_tokens = Some(tokens.clone());
         }
 
-        // If `capture_cfg` is set and we're inside a recursive call to
-        // `collect_tokens`, then we need to register a replace range if we
-        // have `#[cfg]` or `#[cfg_attr]`. This allows us to run eager
-        // cfg-expansion on the captured token stream.
-        if self.capture_cfg
-            && matches!(self.capture_state.capturing, Capturing::Yes)
-            && has_cfg_or_cfg_attr(ret.attrs())
-        {
+        // If in "definite capture mode" we need to register a replace range
+        // for the `#[cfg]` and/or `#[cfg_attr]` attrs. This allows us to run
+        // eager cfg-expansion on the captured token stream.
+        if definite_capture_mode {
             assert!(!self.break_last_token, "Should not have unglued last token with cfg attr");
 
             // What is the status here when parsing the example code at the top of this method?
@@ -430,6 +433,7 @@ impl<'a> Parser<'a> {
             let start_pos =
                 if has_outer_attrs { attrs.start_pos.unwrap() } else { collect_pos.start_pos };
             let target = AttrsTarget { attrs: ret.attrs().iter().cloned().collect(), tokens };
+            tokens_used = true;
             self.capture_state
                 .parser_replacements
                 .push((ParserRange(start_pos..end_pos), Some(target)));
@@ -439,6 +443,7 @@ impl<'a> Parser<'a> {
             self.capture_state.parser_replacements.clear();
             self.capture_state.inner_attr_parser_ranges.clear();
         }
+        assert!(tokens_used); // check we didn't create `tokens` unnecessarily
         Ok(ret)
     }
 }
diff --git a/tests/crashes/129166.rs b/tests/crashes/129166.rs
diff --git a/tests/ui/conditional-compilation/invalid-node-range-issue-129166.rs b/tests/ui/conditional-compilation/invalid-node-range-issue-129166.rs
@@ -0,0 +1,11 @@
+// This was triggering an assertion failure in `NodeRange::new`.
+
+#![feature(cfg_eval)]
+#![feature(stmt_expr_attributes)]
+
+fn f() -> u32 {
+    #[cfg_eval] #[cfg(not(FALSE))] 0
+    //~^ ERROR removing an expression is not supported in this position
+}
+
+fn main() {}
diff --git a/tests/ui/conditional-compilation/invalid-node-range-issue-129166.stderr b/tests/ui/conditional-compilation/invalid-node-range-issue-129166.stderr
@@ -0,0 +1,8 @@
+error: removing an expression is not supported in this position
+  --> $DIR/invalid-node-range-issue-129166.rs:7:17
+   |
+LL |     #[cfg_eval] #[cfg(not(FALSE))] 0
+   |                 ^^^^^^^^^^^^^^^^^^
+
+error: aborting due to 1 previous error
+