rust-lang · cjgillot · Mar 12, 2023 · Jul 14, 2023 · Jul 14, 2023 · Jul 14, 2023
diff --git a/compiler/rustc_codegen_cranelift/src/driver/aot.rs b/compiler/rustc_codegen_cranelift/src/driver/aot.rs
@@ -522,11 +522,12 @@ fn determine_cgu_reuse<'tcx>(tcx: TyCtxt<'tcx>, cgu: &CodegenUnit<'tcx>) -> CguR
     // know that later). If we are not doing LTO, there is only one optimized
     // version of each module, so we re-use that.
     let dep_node = cgu.codegen_dep_node(tcx);
-    assert!(
-        !tcx.dep_graph.dep_node_exists(&dep_node),
-        "CompileCodegenUnit dep-node for CGU `{}` already exists before marking.",
-        cgu.name()
-    );
+    tcx.dep_graph.assert_dep_node_not_yet_allocated_in_current_session(&dep_node, || {
+        format!(
+            "CompileCodegenUnit dep-node for CGU `{}` already exists before marking.",
+            cgu.name()
+        )
+    });
 
     if tcx.try_mark_green(&dep_node) { CguReuse::PostLto } else { CguReuse::No }
 }
diff --git a/compiler/rustc_codegen_ssa/src/base.rs b/compiler/rustc_codegen_ssa/src/base.rs
@@ -1031,11 +1031,12 @@ fn determine_cgu_reuse<'tcx>(tcx: TyCtxt<'tcx>, cgu: &CodegenUnit<'tcx>) -> CguR
     // know that later). If we are not doing LTO, there is only one optimized
     // version of each module, so we re-use that.
     let dep_node = cgu.codegen_dep_node(tcx);
-    assert!(
-        !tcx.dep_graph.dep_node_exists(&dep_node),
-        "CompileCodegenUnit dep-node for CGU `{}` already exists before marking.",
-        cgu.name()
-    );
+    tcx.dep_graph.assert_dep_node_not_yet_allocated_in_current_session(&dep_node, || {
+        format!(
+            "CompileCodegenUnit dep-node for CGU `{}` already exists before marking.",
+            cgu.name()
+        )
+    });
 
     if tcx.try_mark_green(&dep_node) {
         // We can re-use either the pre- or the post-thinlto state. If no LTO is

diff --git a/compiler/rustc_incremental/src/persist/save.rs b/compiler/rustc_incremental/src/persist/save.rs
@@ -170,7 +170,7 @@ pub fn build_dep_graph(
     sess.opts.dep_tracking_hash(false).encode(&mut encoder);
 
     Some(DepGraph::new(
-        &sess.prof,
+        sess,
         prev_graph,
         prev_work_products,
         encoder,

diff --git a/compiler/rustc_query_system/src/dep_graph/graph.rs b/compiler/rustc_query_system/src/dep_graph/graph.rs
@@ -9,6 +9,7 @@ use rustc_data_structures::sync::{AtomicU32, AtomicU64, Lock, Lrc, Ordering};
 use rustc_data_structures::unord::UnordMap;
 use rustc_index::IndexVec;
 use rustc_serialize::opaque::{FileEncodeResult, FileEncoder};
+use rustc_session::Session;
 use smallvec::{smallvec, SmallVec};
 use std::assert_matches::assert_matches;
 use std::collections::hash_map::Entry;
@@ -115,7 +116,7 @@ where
 
 impl<K: DepKind> DepGraph<K> {
     pub fn new(
-        profiler: &SelfProfilerRef,
+        session: &Session,
         prev_graph: SerializedDepGraph<K>,
         prev_work_products: FxIndexMap<WorkProductId, WorkProduct>,
         encoder: FileEncoder,
@@ -125,7 +126,7 @@ impl<K: DepKind> DepGraph<K> {
         let prev_graph_node_count = prev_graph.node_count();
 
         let current = CurrentDepGraph::new(
-            profiler,
+            session,
             prev_graph_node_count,
             encoder,
             record_graph,
@@ -136,7 +137,7 @@ impl<K: DepKind> DepGraph<K> {
 
         // Instantiate a dependy-less node only once for anonymous queries.
         let _green_node_index = current.intern_new_node(
-            profiler,
+            &session.prof,
             DepNode { kind: DepKind::NULL, hash: current.anon_id_seed.into() },
             smallvec![],
             Fingerprint::ZERO,
@@ -145,7 +146,7 @@ impl<K: DepKind> DepGraph<K> {
 
         // Instantiate a dependy-less red node only once for anonymous queries.
         let (red_node_index, red_node_prev_index_and_color) = current.intern_node(
-            profiler,
+            &session.prof,
             &prev_graph,
             DepNode { kind: DepKind::RED, hash: Fingerprint::ZERO.into() },
             smallvec![],
@@ -348,12 +349,13 @@ impl<K: DepKind> DepGraphData<K> {
         //    in `DepGraph::try_mark_green()`.
         // 2. Two distinct query keys get mapped to the same `DepNode`
         //    (see for example #48923).
-        assert!(
-            !self.dep_node_exists(&key),
-            "forcing query with already existing `DepNode`\n\
-                 - query-key: {arg:?}\n\
-                 - dep-node: {key:?}"
-        );
+        self.assert_dep_node_not_yet_allocated_in_current_session(&key, || {
+            format!(
+                "forcing query with already existing `DepNode`\n\
+            - query-key: {arg:?}\n\
+            - dep-node: {key:?}"
+            )
+        });
 
         let with_deps = |task_deps| K::with_deps(task_deps, || task(cx, arg));
         let (result, edges) = if cx.dep_context().is_eval_always(key.kind) {
@@ -449,12 +451,32 @@ impl<K: DepKind> DepGraphData<K> {
                     hash: self.current.anon_id_seed.combine(hasher.finish()).into(),
                 };
 
-                self.current.intern_new_node(
-                    cx.profiler(),
-                    target_dep_node,
-                    task_deps,
-                    Fingerprint::ZERO,
-                )
+                // The DepNodes generated by the process above are not unique. 2 queries could
+                // have exactly the same dependencies. However, deserialization does not handle
+                // duplicated nodes, so we do the deduplication here directly.
+                //
+                // As anonymous nodes are a small quantity compared to the full dep-graph, the
+                // memory impact of this `anon_node_to_index` map remains tolerable, and helps
+                // us avoid useless growth of the graph with almost-equivalent nodes.
+                match self
+                    .current
+                    .anon_node_to_index
+                    .get_shard_by_value(&target_dep_node)
+                    .lock()
+                    .entry(target_dep_node)
+                {
+                    Entry::Occupied(entry) => *entry.get(),
+                    Entry::Vacant(entry) => {
+                        let dep_node_index = self.current.intern_new_node(
+                            cx.profiler(),
+                            target_dep_node,
+                            task_deps,
+                            Fingerprint::ZERO,
+                        );
+                        entry.insert(dep_node_index);
+                        dep_node_index
+                    }
+                }
             }
         };
 
@@ -625,25 +647,22 @@ impl<K: DepKind> DepGraph<K> {
 }
 
 impl<K: DepKind> DepGraphData<K> {
-    #[inline]
-    pub fn dep_node_index_of_opt(&self, dep_node: &DepNode<K>) -> Option<DepNodeIndex> {
+    fn assert_dep_node_not_yet_allocated_in_current_session<S: std::fmt::Display>(
+        &self,
+        dep_node: &DepNode<K>,
+        msg: impl FnOnce() -> S,
+    ) {
         if let Some(prev_index) = self.previous.node_to_index_opt(dep_node) {
-            self.current.prev_index_to_index.lock()[prev_index]
-        } else {
-            self.current
-                .new_node_to_index
-                .get_shard_by_value(dep_node)
-                .lock()
-                .get(dep_node)
-                .copied()
+            let current = self.current.prev_index_to_index.lock()[prev_index];
+            assert!(current.is_none(), "{}", msg())
+        } else if let Some(nodes_newly_allocated_in_current_session) =
+            &self.current.nodes_newly_allocated_in_current_session
+        {
+            let seen = nodes_newly_allocated_in_current_session.lock().contains(dep_node);
+            assert!(!seen, "{}", msg());
         }
     }
 
-    #[inline]
-    pub fn dep_node_exists(&self, dep_node: &DepNode<K>) -> bool {
-        self.dep_node_index_of_opt(dep_node).is_some()
-    }
-
     fn node_color(&self, dep_node: &DepNode<K>) -> Option<DepNodeColor> {
         if let Some(prev_index) = self.previous.node_to_index_opt(dep_node) {
             self.colors.get(prev_index)
@@ -676,11 +695,6 @@ impl<K: DepKind> DepGraphData<K> {
 }
 
 impl<K: DepKind> DepGraph<K> {
-    #[inline]
-    pub fn dep_node_exists(&self, dep_node: &DepNode<K>) -> bool {
-        self.data.as_ref().is_some_and(|data| data.dep_node_exists(dep_node))
-    }
-
     /// Checks whether a previous work product exists for `v` and, if
     /// so, return the path that leads to it. Used to skip doing work.
     pub fn previous_work_product(&self, v: &WorkProductId) -> Option<WorkProduct> {
@@ -762,7 +776,7 @@ impl<K: DepKind> DepGraphData<K> {
         }
     }
 
-    #[instrument(skip(self, qcx, parent_dep_node_index, frame), level = "debug")]
+    #[instrument(skip(self, qcx, frame), level = "debug")]
     fn try_mark_parent_green<Qcx: QueryContext<DepKind = K>>(
         &self,
         qcx: Qcx,
@@ -861,10 +875,11 @@ impl<K: DepKind> DepGraphData<K> {
         let frame = MarkFrame { index: prev_dep_node_index, parent: frame };
 
         #[cfg(not(parallel_compiler))]
-        {
-            debug_assert!(!self.dep_node_exists(dep_node));
-            debug_assert!(self.colors.get(prev_dep_node_index).is_none());
-        }
+        self.assert_dep_node_not_yet_allocated_in_current_session(dep_node, || {
+            format!("trying to mark existing {dep_node:?} as green")
+        });
+        #[cfg(not(parallel_compiler))]
+        debug_assert!(self.colors.get(prev_dep_node_index).is_none());
 
         // We never try to mark eval_always nodes as green
         debug_assert!(!qcx.dep_context().is_eval_always(dep_node.kind));
@@ -959,6 +974,16 @@ impl<K: DepKind> DepGraph<K> {
         self.node_color(dep_node).is_some_and(|c| c.is_green())
     }
 
+    pub fn assert_dep_node_not_yet_allocated_in_current_session<S: std::fmt::Display>(
+        &self,
+        dep_node: &DepNode<K>,
+        msg: impl FnOnce() -> S,
+    ) {
+        if let Some(data) = &self.data {
+            data.assert_dep_node_not_yet_allocated_in_current_session(dep_node, msg)
+        }
+    }
+
     /// This method loads all on-disk cacheable query results into memory, so
     /// they can be written out to the new cache file again. Most query results
     /// will already be in memory but in the case where we marked something as
@@ -1066,24 +1091,24 @@ rustc_index::newtype_index! {
 /// largest in the compiler.
 ///
 /// For this reason, we avoid storing `DepNode`s more than once as map
-/// keys. The `new_node_to_index` map only contains nodes not in the previous
+/// keys. The `anon_node_to_index` map only contains nodes of anonymous queries not in the previous
 /// graph, and we map nodes in the previous graph to indices via a two-step
 /// mapping. `SerializedDepGraph` maps from `DepNode` to `SerializedDepNodeIndex`,
 /// and the `prev_index_to_index` vector (which is more compact and faster than
 /// using a map) maps from `SerializedDepNodeIndex` to `DepNodeIndex`.
 ///
-/// This struct uses three locks internally. The `data`, `new_node_to_index`,
+/// This struct uses three locks internally. The `data`, `anon_node_to_index`,
 /// and `prev_index_to_index` fields are locked separately. Operations that take
 /// a `DepNodeIndex` typically just access the `data` field.
 ///
 /// We only need to manipulate at most two locks simultaneously:
-/// `new_node_to_index` and `data`, or `prev_index_to_index` and `data`. When
-/// manipulating both, we acquire `new_node_to_index` or `prev_index_to_index`
+/// `anon_node_to_index` and `data`, or `prev_index_to_index` and `data`. When
+/// manipulating both, we acquire `anon_node_to_index` or `prev_index_to_index`
 /// first, and `data` second.
 pub(super) struct CurrentDepGraph<K: DepKind> {
     encoder: Steal<GraphEncoder<K>>,
-    new_node_to_index: Sharded<FxHashMap<DepNode<K>, DepNodeIndex>>,
     prev_index_to_index: Lock<IndexVec<SerializedDepNodeIndex, Option<DepNodeIndex>>>,
+    anon_node_to_index: Sharded<FxHashMap<DepNode<K>, DepNodeIndex>>,
 
     /// This is used to verify that fingerprints do not change between the creation of a node
     /// and its recomputation.
@@ -1095,6 +1120,13 @@ pub(super) struct CurrentDepGraph<K: DepKind> {
     #[cfg(debug_assertions)]
     forbidden_edge: Option<EdgeFilter<K>>,
 
+    /// Used to verify the absence of hash collisions among DepNodes.
+    /// This field is only `Some` if the `-Z incremental_verify_ich` option is present.
+    ///
+    /// The map contains all DepNodes that have been allocated in the current session so far and
+    /// for which there is no equivalent in the previous session.
+    nodes_newly_allocated_in_current_session: Option<Lock<FxHashSet<DepNode<K>>>>,
+
     /// Anonymous `DepNode`s are nodes whose IDs we compute from the list of
     /// their edges. This has the beneficial side-effect that multiple anonymous
     /// nodes can be coalesced into one without changing the semantics of the
@@ -1122,7 +1154,7 @@ pub(super) struct CurrentDepGraph<K: DepKind> {
 
 impl<K: DepKind> CurrentDepGraph<K> {
     fn new(
-        profiler: &SelfProfilerRef,
+        session: &Session,
         prev_graph_node_count: usize,
         encoder: FileEncoder,
         record_graph: bool,
@@ -1152,7 +1184,8 @@ impl<K: DepKind> CurrentDepGraph<K> {
 
         let new_node_count_estimate = 102 * prev_graph_node_count / 100 + 200;
 
-        let node_intern_event_id = profiler
+        let node_intern_event_id = session
+            .prof
             .get_or_alloc_cached_string("incr_comp_intern_dep_graph_node")
             .map(EventId::from_label);
 
@@ -1163,7 +1196,7 @@ impl<K: DepKind> CurrentDepGraph<K> {
                 record_graph,
                 record_stats,
             )),
-            new_node_to_index: Sharded::new(|| {
+            anon_node_to_index: Sharded::new(|| {
                 FxHashMap::with_capacity_and_hasher(
                     new_node_count_estimate / sharded::SHARDS,
                     Default::default(),
@@ -1175,6 +1208,16 @@ impl<K: DepKind> CurrentDepGraph<K> {
             forbidden_edge,
             #[cfg(debug_assertions)]
             fingerprints: Lock::new(IndexVec::from_elem_n(None, new_node_count_estimate)),
+            nodes_newly_allocated_in_current_session: session
+                .opts
+                .unstable_opts
+                .incremental_verify_ich
+                .then(|| {
+                    Lock::new(FxHashSet::with_capacity_and_hasher(
+                        new_node_count_estimate,
+                        Default::default(),
+                    ))
+                }),
             total_read_count: AtomicU64::new(0),
             total_duplicate_read_count: AtomicU64::new(0),
             node_intern_event_id,
@@ -1200,20 +1243,19 @@ impl<K: DepKind> CurrentDepGraph<K> {
         edges: EdgesVec,
         current_fingerprint: Fingerprint,
     ) -> DepNodeIndex {
-        let dep_node_index = match self.new_node_to_index.get_shard_by_value(&key).lock().entry(key)
-        {
-            Entry::Occupied(entry) => *entry.get(),
-            Entry::Vacant(entry) => {
-                let dep_node_index =
-                    self.encoder.borrow().send(profiler, key, current_fingerprint, edges);
-                entry.insert(dep_node_index);
-                dep_node_index
-            }
-        };
+        let dep_node_index = self.encoder.borrow().send(profiler, key, current_fingerprint, edges);
 
         #[cfg(debug_assertions)]
         self.record_edge(dep_node_index, key, current_fingerprint);
 
+        if let Some(ref nodes_newly_allocated_in_current_session) =
+            self.nodes_newly_allocated_in_current_session
+        {
+            if !nodes_newly_allocated_in_current_session.lock().insert(key) {
+                panic!("Found duplicate dep-node {key:?}");
+            }
+        }
+
         dep_node_index
     }
 
@@ -1328,7 +1370,10 @@ impl<K: DepKind> CurrentDepGraph<K> {
     ) {
         let node = &prev_graph.index_to_node(prev_index);
         debug_assert!(
-            !self.new_node_to_index.get_shard_by_value(node).lock().contains_key(node),
+            !self
+                .nodes_newly_allocated_in_current_session
+                .as_ref()
+                .map_or(false, |set| set.lock().contains(node)),
             "node from previous graph present in new node collection"
         );
     }

diff --git a/compiler/rustc_session/src/options.rs b/compiler/rustc_session/src/options.rs
@@ -1531,7 +1531,9 @@ options! {
     incremental_relative_spans: bool = (false, parse_bool, [TRACKED],
         "hash spans relative to their parent item for incr. comp. (default: no)"),
     incremental_verify_ich: bool = (false, parse_bool, [UNTRACKED],
-        "verify incr. comp. hashes of green query instances (default: no)"),
+        "verify extended properties for incr. comp. (default: no):
+        - hashes of green query instances;
+        - hash collisions when creating dep-nodes."),
     inline_in_all_cgus: Option<bool> = (None, parse_opt_bool, [TRACKED],
         "control whether `#[inline]` functions are in all CGUs"),
     inline_llvm: bool = (true, parse_bool, [TRACKED],