Skip to content

Commit 1fa9449

Browse files
committed
Auto merge of #53356 - michaelwoerister:itlto, r=alexcrichton
Preliminary work for incremental ThinLTO (CGU name edition) Bring back the first half of #52266 but hopefully without the performance regression.
2 parents c8c587f + d662083 commit 1fa9449

File tree

21 files changed

+331
-172
lines changed

21 files changed

+331
-172
lines changed

src/librustc/mir/mono.rs

+95-2
Original file line numberDiff line numberDiff line change
@@ -8,15 +8,16 @@
88
// option. This file may not be copied, modified, or distributed
99
// except according to those terms.
1010

11-
use hir::def_id::DefId;
11+
use hir::def_id::{DefId, CrateNum};
1212
use syntax::ast::NodeId;
13-
use syntax::symbol::InternedString;
13+
use syntax::symbol::{Symbol, InternedString};
1414
use ty::{Instance, TyCtxt};
1515
use util::nodemap::FxHashMap;
1616
use rustc_data_structures::base_n;
1717
use rustc_data_structures::stable_hasher::{HashStable, StableHasherResult,
1818
StableHasher};
1919
use ich::{Fingerprint, StableHashingContext, NodeIdHashingMode};
20+
use std::fmt;
2021
use std::hash::Hash;
2122

2223
#[derive(PartialEq, Eq, Clone, Copy, Debug, Hash)]
@@ -239,3 +240,95 @@ impl Stats {
239240
self.fn_stats.extend(stats.fn_stats);
240241
}
241242
}
243+
244+
pub struct CodegenUnitNameBuilder<'a, 'gcx: 'tcx, 'tcx: 'a> {
245+
tcx: TyCtxt<'a, 'gcx, 'tcx>,
246+
cache: FxHashMap<CrateNum, String>,
247+
}
248+
249+
impl<'a, 'gcx: 'tcx, 'tcx: 'a> CodegenUnitNameBuilder<'a, 'gcx, 'tcx> {
250+
251+
pub fn new(tcx: TyCtxt<'a, 'gcx, 'tcx>) -> Self {
252+
CodegenUnitNameBuilder {
253+
tcx,
254+
cache: FxHashMap(),
255+
}
256+
}
257+
258+
/// CGU names should fulfill the following requirements:
259+
/// - They should be able to act as a file name on any kind of file system
260+
/// - They should not collide with other CGU names, even for different versions
261+
/// of the same crate.
262+
///
263+
/// Consequently, we don't use special characters except for '.' and '-' and we
264+
/// prefix each name with the crate-name and crate-disambiguator.
265+
///
266+
/// This function will build CGU names of the form:
267+
///
268+
/// ```
269+
/// <crate-name>.<crate-disambiguator>(-<component>)*[.<special-suffix>]
270+
/// ```
271+
///
272+
/// The '.' before `<special-suffix>` makes sure that names with a special
273+
/// suffix can never collide with a name built out of regular Rust
274+
/// identifiers (e.g. module paths).
275+
pub fn build_cgu_name<I, C, S>(&mut self,
276+
cnum: CrateNum,
277+
components: I,
278+
special_suffix: Option<S>)
279+
-> InternedString
280+
where I: IntoIterator<Item=C>,
281+
C: fmt::Display,
282+
S: fmt::Display,
283+
{
284+
let cgu_name = self.build_cgu_name_no_mangle(cnum,
285+
components,
286+
special_suffix);
287+
288+
if self.tcx.sess.opts.debugging_opts.human_readable_cgu_names {
289+
cgu_name
290+
} else {
291+
let cgu_name = &cgu_name.as_str()[..];
292+
Symbol::intern(&CodegenUnit::mangle_name(cgu_name)).as_interned_str()
293+
}
294+
}
295+
296+
/// Same as `CodegenUnit::build_cgu_name()` but will never mangle the
297+
/// resulting name.
298+
pub fn build_cgu_name_no_mangle<I, C, S>(&mut self,
299+
cnum: CrateNum,
300+
components: I,
301+
special_suffix: Option<S>)
302+
-> InternedString
303+
where I: IntoIterator<Item=C>,
304+
C: fmt::Display,
305+
S: fmt::Display,
306+
{
307+
use std::fmt::Write;
308+
309+
let mut cgu_name = String::with_capacity(64);
310+
311+
// Start out with the crate name and disambiguator
312+
let tcx = self.tcx;
313+
let crate_prefix = self.cache.entry(cnum).or_insert_with(|| {
314+
let crate_disambiguator = format!("{}", tcx.crate_disambiguator(cnum));
315+
// Using a shortened disambiguator of about 40 bits
316+
format!("{}.{}", tcx.crate_name(cnum), &crate_disambiguator[0 .. 8])
317+
});
318+
319+
write!(cgu_name, "{}", crate_prefix).unwrap();
320+
321+
// Add the components
322+
for component in components {
323+
write!(cgu_name, "-{}", component).unwrap();
324+
}
325+
326+
if let Some(special_suffix) = special_suffix {
327+
// We add a dot in here so it cannot clash with anything in a regular
328+
// Rust identifier
329+
write!(cgu_name, ".{}", special_suffix).unwrap();
330+
}
331+
332+
Symbol::intern(&cgu_name[..]).as_interned_str()
333+
}
334+
}

src/librustc/session/mod.rs

+10
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ use util::nodemap::{FxHashMap, FxHashSet};
2424
use util::common::{duration_to_secs_str, ErrorReported};
2525
use util::common::ProfileQueriesMsg;
2626

27+
use rustc_data_structures::base_n;
2728
use rustc_data_structures::sync::{self, Lrc, Lock, LockCell, OneThread, Once, RwLock};
2829

2930
use syntax::ast::NodeId;
@@ -48,6 +49,7 @@ use std;
4849
use std::cell::{self, Cell, RefCell};
4950
use std::collections::HashMap;
5051
use std::env;
52+
use std::fmt;
5153
use std::io::Write;
5254
use std::path::{Path, PathBuf};
5355
use std::time::Duration;
@@ -1221,6 +1223,14 @@ impl CrateDisambiguator {
12211223
}
12221224
}
12231225

1226+
impl fmt::Display for CrateDisambiguator {
1227+
fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> {
1228+
let (a, b) = self.0.as_value();
1229+
let as_u128 = a as u128 | ((b as u128) << 64);
1230+
f.write_str(&base_n::encode(as_u128, base_n::CASE_INSENSITIVE))
1231+
}
1232+
}
1233+
12241234
impl From<Fingerprint> for CrateDisambiguator {
12251235
fn from(fingerprint: Fingerprint) -> CrateDisambiguator {
12261236
CrateDisambiguator(fingerprint)

src/librustc_codegen_llvm/back/link.rs

-7
Original file line numberDiff line numberDiff line change
@@ -46,13 +46,6 @@ use std::process::{Output, Stdio};
4646
use std::str;
4747
use syntax::attr;
4848

49-
/// The LLVM module name containing crate-metadata. This includes a `.` on
50-
/// purpose, so it cannot clash with the name of a user-defined module.
51-
pub const METADATA_MODULE_NAME: &'static str = "crate.metadata";
52-
53-
// same as for metadata above, but for allocator shim
54-
pub const ALLOCATOR_MODULE_NAME: &'static str = "crate.allocator";
55-
5649
pub use rustc_codegen_utils::link::{find_crate_name, filename_for_input, default_output_for_target,
5750
invalid_output_for_target, build_link_meta, out_filename,
5851
check_file_is_writeable};

src/librustc_codegen_llvm/back/lto.rs

+5-6
Original file line numberDiff line numberDiff line change
@@ -242,7 +242,7 @@ fn fat_lto(cgcx: &CodegenContext,
242242
let llvm = module.llvm().expect("can't lto pre-codegened modules");
243243
(&llvm.llcx, llvm.llmod())
244244
};
245-
info!("using {:?} as a base module", module.llmod_id);
245+
info!("using {:?} as a base module", module.name);
246246

247247
// The linking steps below may produce errors and diagnostics within LLVM
248248
// which we'd like to handle and print, so set up our diagnostic handlers
@@ -257,7 +257,7 @@ fn fat_lto(cgcx: &CodegenContext,
257257
for module in modules {
258258
let llvm = module.llvm().expect("can't lto pre-codegened modules");
259259
let buffer = ModuleBuffer::new(llvm.llmod());
260-
let llmod_id = CString::new(&module.llmod_id[..]).unwrap();
260+
let llmod_id = CString::new(&module.name[..]).unwrap();
261261
serialized_modules.push((SerializedModule::Local(buffer), llmod_id));
262262
}
263263

@@ -384,9 +384,9 @@ fn thin_lto(diag_handler: &Handler,
384384
// the most expensive portion of this small bit of global
385385
// analysis!
386386
for (i, module) in modules.iter().enumerate() {
387-
info!("local module: {} - {}", i, module.llmod_id);
387+
info!("local module: {} - {}", i, module.name);
388388
let llvm = module.llvm().expect("can't lto precodegened module");
389-
let name = CString::new(module.llmod_id.clone()).unwrap();
389+
let name = CString::new(module.name.clone()).unwrap();
390390
let buffer = ThinBuffer::new(llvm.llmod());
391391
thin_modules.push(llvm::ThinLTOModule {
392392
identifier: name.as_ptr(),
@@ -395,7 +395,7 @@ fn thin_lto(diag_handler: &Handler,
395395
});
396396
thin_buffers.push(buffer);
397397
module_names.push(name);
398-
timeline.record(&module.llmod_id);
398+
timeline.record(&module.name);
399399
}
400400

401401
// FIXME: All upstream crates are deserialized internally in the
@@ -668,7 +668,6 @@ impl ThinModule {
668668
llcx,
669669
tm,
670670
}),
671-
llmod_id: self.name().to_string(),
672671
name: self.name().to_string(),
673672
kind: ModuleKind::Regular,
674673
};

src/librustc_codegen_llvm/back/write.rs

+1-2
Original file line numberDiff line numberDiff line change
@@ -728,7 +728,7 @@ unsafe fn codegen(cgcx: &CodegenContext,
728728

729729
if config.emit_bc_compressed {
730730
let dst = bc_out.with_extension(RLIB_BYTECODE_EXTENSION);
731-
let data = bytecode::encode(&module.llmod_id, data);
731+
let data = bytecode::encode(&module.name, data);
732732
if let Err(e) = fs::write(&dst, data) {
733733
diag_handler.err(&format!("failed to write bytecode: {}", e));
734734
}
@@ -1338,7 +1338,6 @@ fn execute_work_item(cgcx: &CodegenContext,
13381338
assert_eq!(bytecode_compressed.is_some(), config.emit_bc_compressed);
13391339

13401340
Ok(WorkItemResult::Compiled(CompiledModule {
1341-
llmod_id: module.llmod_id.clone(),
13421341
name: module_name,
13431342
kind: ModuleKind::Regular,
13441343
pre_existing: true,

src/librustc_codegen_llvm/base.rs

+24-43
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ use metadata;
3636
use rustc::hir::def_id::{CrateNum, DefId, LOCAL_CRATE};
3737
use rustc::middle::lang_items::StartFnLangItem;
3838
use rustc::middle::weak_lang_items;
39-
use rustc::mir::mono::{Linkage, Visibility, Stats};
39+
use rustc::mir::mono::{Linkage, Visibility, Stats, CodegenUnitNameBuilder};
4040
use rustc::middle::cstore::{EncodedMetadata};
4141
use rustc::ty::{self, Ty, TyCtxt};
4242
use rustc::ty::layout::{self, Align, TyLayout, LayoutOf};
@@ -742,19 +742,23 @@ pub fn codegen_crate<'a, 'tcx>(tcx: TyCtxt<'a, 'tcx, 'tcx>,
742742

743743
let crate_hash = tcx.crate_hash(LOCAL_CRATE);
744744
let link_meta = link::build_link_meta(crate_hash);
745+
let cgu_name_builder = &mut CodegenUnitNameBuilder::new(tcx);
745746

746747
// Codegen the metadata.
747748
tcx.sess.profiler(|p| p.start_activity(ProfileCategory::Codegen));
748-
let llmod_id = "metadata";
749-
let metadata_llvm_module = ModuleLlvm::new(tcx.sess, llmod_id);
749+
750+
let metadata_cgu_name = cgu_name_builder.build_cgu_name(LOCAL_CRATE,
751+
&["crate"],
752+
Some("metadata")).as_str()
753+
.to_string();
754+
let metadata_llvm_module = ModuleLlvm::new(tcx.sess, &metadata_cgu_name);
750755
let metadata = time(tcx.sess, "write metadata", || {
751756
write_metadata(tcx, &metadata_llvm_module, &link_meta)
752757
});
753758
tcx.sess.profiler(|p| p.end_activity(ProfileCategory::Codegen));
754759

755760
let metadata_module = ModuleCodegen {
756-
name: link::METADATA_MODULE_NAME.to_string(),
757-
llmod_id: llmod_id.to_string(),
761+
name: metadata_cgu_name,
758762
source: ModuleSource::Codegened(metadata_llvm_module),
759763
kind: ModuleKind::Metadata,
760764
};
@@ -833,20 +837,22 @@ pub fn codegen_crate<'a, 'tcx>(tcx: TyCtxt<'a, 'tcx, 'tcx>,
833837
let allocator_module = if any_dynamic_crate {
834838
None
835839
} else if let Some(kind) = *tcx.sess.allocator_kind.get() {
836-
unsafe {
837-
let llmod_id = "allocator";
838-
let modules = ModuleLlvm::new(tcx.sess, llmod_id);
839-
time(tcx.sess, "write allocator module", || {
840+
let llmod_id = cgu_name_builder.build_cgu_name(LOCAL_CRATE,
841+
&["crate"],
842+
Some("allocator")).as_str()
843+
.to_string();
844+
let modules = ModuleLlvm::new(tcx.sess, &llmod_id);
845+
time(tcx.sess, "write allocator module", || {
846+
unsafe {
840847
allocator::codegen(tcx, &modules, kind)
841-
});
848+
}
849+
});
842850

843-
Some(ModuleCodegen {
844-
name: link::ALLOCATOR_MODULE_NAME.to_string(),
845-
llmod_id: llmod_id.to_string(),
846-
source: ModuleSource::Codegened(modules),
847-
kind: ModuleKind::Allocator,
848-
})
849-
}
851+
Some(ModuleCodegen {
852+
name: llmod_id,
853+
source: ModuleSource::Codegened(modules),
854+
kind: ModuleKind::Allocator,
855+
})
850856
} else {
851857
None
852858
};
@@ -889,21 +895,10 @@ pub fn codegen_crate<'a, 'tcx>(tcx: TyCtxt<'a, 'tcx, 'tcx>,
889895
// succeed it means that none of the dependencies has changed
890896
// and we can safely re-use.
891897
if let Some(dep_node_index) = tcx.dep_graph.try_mark_green(tcx, dep_node) {
892-
// Append ".rs" to LLVM module identifier.
893-
//
894-
// LLVM code generator emits a ".file filename" directive
895-
// for ELF backends. Value of the "filename" is set as the
896-
// LLVM module identifier. Due to a LLVM MC bug[1], LLVM
897-
// crashes if the module identifier is same as other symbols
898-
// such as a function name in the module.
899-
// 1. http://llvm.org/bugs/show_bug.cgi?id=11479
900-
let llmod_id = format!("{}.rs", cgu.name());
901-
902898
let module = ModuleCodegen {
903899
name: cgu.name().to_string(),
904900
source: ModuleSource::Preexisting(buf),
905901
kind: ModuleKind::Regular,
906-
llmod_id,
907902
};
908903
tcx.dep_graph.mark_loaded_from_cache(dep_node_index, true);
909904
write::submit_codegened_module_to_llvm(tcx, module, 0);
@@ -1212,21 +1207,8 @@ fn compile_codegen_unit<'a, 'tcx>(tcx: TyCtxt<'a, 'tcx, 'tcx>,
12121207
{
12131208
let cgu_name = cgu.name().to_string();
12141209

1215-
// Append ".rs" to LLVM module identifier.
1216-
//
1217-
// LLVM code generator emits a ".file filename" directive
1218-
// for ELF backends. Value of the "filename" is set as the
1219-
// LLVM module identifier. Due to a LLVM MC bug[1], LLVM
1220-
// crashes if the module identifier is same as other symbols
1221-
// such as a function name in the module.
1222-
// 1. http://llvm.org/bugs/show_bug.cgi?id=11479
1223-
let llmod_id = format!("{}-{}.rs",
1224-
cgu.name(),
1225-
tcx.crate_disambiguator(LOCAL_CRATE)
1226-
.to_fingerprint().to_hex());
1227-
12281210
// Instantiate monomorphizations without filling out definitions yet...
1229-
let llvm_module = ModuleLlvm::new(tcx.sess, &llmod_id);
1211+
let llvm_module = ModuleLlvm::new(tcx.sess, &cgu_name);
12301212
let stats = {
12311213
let cx = CodegenCx::new(tcx, cgu, &llvm_module);
12321214
let mono_items = cx.codegen_unit
@@ -1282,7 +1264,6 @@ fn compile_codegen_unit<'a, 'tcx>(tcx: TyCtxt<'a, 'tcx, 'tcx>,
12821264
name: cgu_name,
12831265
source: ModuleSource::Codegened(llvm_module),
12841266
kind: ModuleKind::Regular,
1285-
llmod_id,
12861267
})
12871268
}
12881269
}

src/librustc_codegen_llvm/lib.rs

+1-3
Original file line numberDiff line numberDiff line change
@@ -269,8 +269,8 @@ struct ModuleCodegen {
269269
/// unique amongst **all** crates. Therefore, it should contain
270270
/// something unique to this crate (e.g., a module path) as well
271271
/// as the crate name and disambiguator.
272+
/// We currently generate these names via CodegenUnit::build_cgu_name().
272273
name: String,
273-
llmod_id: String,
274274
source: ModuleSource,
275275
kind: ModuleKind,
276276
}
@@ -317,7 +317,6 @@ impl ModuleCodegen {
317317
};
318318

319319
CompiledModule {
320-
llmod_id: self.llmod_id,
321320
name: self.name.clone(),
322321
kind: self.kind,
323322
pre_existing,
@@ -331,7 +330,6 @@ impl ModuleCodegen {
331330
#[derive(Debug)]
332331
struct CompiledModule {
333332
name: String,
334-
llmod_id: String,
335333
kind: ModuleKind,
336334
pre_existing: bool,
337335
object: Option<PathBuf>,

0 commit comments

Comments
 (0)