rust-lang · saethlin · Jan 12, 2025 · Jan 12, 2025 · Jan 12, 2025 · bjorn3
diff --git a/compiler/rustc_mir_transform/src/cross_crate_inline.rs b/compiler/rustc_mir_transform/src/cross_crate_inline.rs
@@ -34,6 +34,15 @@ fn cross_crate_inlinable(tcx: TyCtxt<'_>, def_id: LocalDefId) -> bool {
         return true;
     }
 
+    // compiler-builtins only defines intrinsics (which are handled above by checking
+    // contains_extern_indicator) and helper functions used by those intrinsics. The helper
+    // functions should always be inlined into intrinsics that use them. This check does not
+    // guarantee that we get the optimizations we want, but it makes them *much* easier.
+    // See https://github.com/rust-lang/rust/issues/73135
+    if tcx.is_compiler_builtins(rustc_span::def_id::LOCAL_CRATE) {
+        return true;
+    }
 while let Some(search_item) = self.worklist.pop() { 
     if !scanned.insert(search_item) { 
         continue; 
     } 
     self.propagate_node(&self.tcx.hir_node_by_def_id(search_item), search_item); 
 while let Some(search_item) = self.worklist.pop() { 
     if !scanned.insert(search_item) { 
         continue; 
     } 
  
     self.propagate_node(&self.tcx.hir_node_by_def_id(search_item), search_item); 
+
     if tcx.has_attr(def_id, sym::rustc_intrinsic) {
         // Intrinsic fallback bodies are always cross-crate inlineable.
         // To ensure that the MIR inliner doesn't cluelessly try to inline fallback

diff --git a/compiler/rustc_monomorphize/src/partitioning.rs b/compiler/rustc_monomorphize/src/partitioning.rs
@@ -165,7 +165,8 @@ where
     // estimates.
     {
         let _prof_timer = tcx.prof.generic_activity("cgu_partitioning_merge_cgus");
-        merge_codegen_units(cx, &mut codegen_units);
+        let cgu_contents = merge_codegen_units(cx, &mut codegen_units);
+        rename_codegen_units(cx, &mut codegen_units, cgu_contents);
         debug_dump(tcx, "MERGE", &codegen_units);
     }
 
@@ -200,7 +201,6 @@ where
     I: Iterator<Item = MonoItem<'tcx>>,
 {
     let mut codegen_units = UnordMap::default();
-    let is_incremental_build = cx.tcx.sess.opts.incremental.is_some();
     let mut internalization_candidates = UnordSet::default();
 
     // Determine if monomorphizations instantiated in this crate will be made
@@ -227,20 +227,8 @@ where
             }
         }
 
-        let characteristic_def_id = characteristic_def_id_of_mono_item(cx.tcx, mono_item);
-        let is_volatile = is_incremental_build && mono_item.is_generic_fn();
-
-        let cgu_name = match characteristic_def_id {
-            Some(def_id) => compute_codegen_unit_name(
-                cx.tcx,
-                cgu_name_builder,
-                def_id,
-                is_volatile,
-                cgu_name_cache,
-            ),
-            None => fallback_cgu_name(cgu_name_builder),
-        };
-
+        let cgu_name =
+            compute_codegen_unit_name(cx.tcx, cgu_name_builder, mono_item, cgu_name_cache);
         let cgu = codegen_units.entry(cgu_name).or_insert_with(|| CodegenUnit::new(cgu_name));
 
         let mut can_be_internalized = true;
@@ -321,7 +309,7 @@ where
 fn merge_codegen_units<'tcx>(
     cx: &PartitioningCx<'_, 'tcx>,
     codegen_units: &mut Vec<CodegenUnit<'tcx>>,
-) {
+) -> UnordMap<Symbol, Vec<Symbol>> {
     assert!(cx.tcx.sess.codegen_units().as_usize() >= 1);
 
     // A sorted order here ensures merging is deterministic.
@@ -331,6 +319,13 @@ fn merge_codegen_units<'tcx>(
     let mut cgu_contents: UnordMap<Symbol, Vec<Symbol>> =
         codegen_units.iter().map(|cgu| (cgu.name(), vec![cgu.name()])).collect();
 
+    // When compiling compiler_builtins, we do not want to put multiple intrinsics in a CGU.
+    // There may be mergeable CGUs under this constraint, but just skipping over merging is much
+    // simpler.
+    if cx.tcx.is_compiler_builtins(LOCAL_CRATE) {
+        return cgu_contents;
+    }
+
     // If N is the maximum number of CGUs, and the CGUs are sorted from largest
     // to smallest, we repeatedly find which CGU in codegen_units[N..] has the
     // greatest overlap of inlined items with codegen_units[N-1], merge that
@@ -421,6 +416,14 @@ fn merge_codegen_units<'tcx>(
         // Don't update `cgu_contents`, that's only for incremental builds.
     }
 
+    cgu_contents
+}
+
+fn rename_codegen_units<'tcx>(
+    cx: &PartitioningCx<'_, 'tcx>,
+    codegen_units: &mut Vec<CodegenUnit<'tcx>>,
+    cgu_contents: UnordMap<Symbol, Vec<Symbol>>,
+) {
     let cgu_name_builder = &mut CodegenUnitNameBuilder::new(cx.tcx);
 
     // Rename the newly merged CGUs.
@@ -678,13 +681,26 @@ fn characteristic_def_id_of_mono_item<'tcx>(
     }
 }
 
-fn compute_codegen_unit_name(
-    tcx: TyCtxt<'_>,
+fn compute_codegen_unit_name<'tcx>(
+    tcx: TyCtxt<'tcx>,
     name_builder: &mut CodegenUnitNameBuilder<'_>,
-    def_id: DefId,
-    volatile: bool,
+    mono_item: MonoItem<'tcx>,
     cache: &mut CguNameCache,
 ) -> Symbol {
+    // When compiling compiler_builtins, we do not want to put multiple intrinsics in a CGU.
+    // Using the symbol name as the CGU name puts every GloballyShared item in its own CGU, but in
+    // an optimized build we actually want every item in the crate that isn't an intrinsic to get
+    // LocalCopy so that it is easy to inline away. In an unoptimized build, this CGU naming
+    // strategy probably generates more CGUs than we strictly need. But it is simple.
+    if tcx.is_compiler_builtins(LOCAL_CRATE) {
+        let name = mono_item.symbol_name(tcx);
+        return Symbol::intern(name.name);
+    }
+
+    let Some(def_id) = characteristic_def_id_of_mono_item(tcx, mono_item) else {
+        return fallback_cgu_name(name_builder);
+    };
+
     // Find the innermost module that is not nested within a function.
     let mut current_def_id = def_id;
     let mut cgu_def_id = None;
@@ -712,6 +728,9 @@ fn compute_codegen_unit_name(
 
     let cgu_def_id = cgu_def_id.unwrap();
 
+    let is_incremental_build = tcx.sess.opts.incremental.is_some();
+    let volatile = is_incremental_build && mono_item.is_generic_fn();
+
     *cache.entry((cgu_def_id, volatile)).or_insert_with(|| {
         let def_path = tcx.def_path(cgu_def_id);
 

diff --git a/library/Cargo.toml b/library/Cargo.toml
@@ -11,19 +11,6 @@ exclude = [
   "windows_targets"
 ]
 
-[profile.release.package.compiler_builtins]
-# For compiler-builtins we always use a high number of codegen units.
-# The goal here is to place every single intrinsic into its own object
-# file to avoid symbol clashes with the system libgcc if possible. Note
-# that this number doesn't actually produce this many object files, we
-# just don't create more than this number of object files.
-#
-# It's a bit of a bummer that we have to pass this here, unfortunately.
-# Ideally this would be specified through an env var to Cargo so Cargo
-# knows how many CGUs are for this specific crate, but for now
-# per-crate configuration isn't specifiable in the environment.
-codegen-units = 10000
-
 # These dependencies of the standard library implement symbolication for
 # backtraces on most platforms. Their debuginfo causes both linking to be slower
 # (more data to chew through) and binaries to be larger without really all that