Skip to content

Commit a14a361

Browse files
committed
Auto merge of #52266 - michaelwoerister:incr-thinlto-preliminaries, r=alexcrichton
Preliminary work for incremental ThinLTO. Since implementing incremental ThinLTO is a bit more involved than I initially thought, I'm splitting out some of the things that already work. This PR (1) adds a way accessing some ThinLTO information in `rustc` and (2) does some cleanup around CGU/object file naming (which makes things quite a bit nicer). This is probably best reviewed one commit at a time.
2 parents 254f879 + e045a6c commit a14a361

File tree

25 files changed

+457
-171
lines changed

25 files changed

+457
-171
lines changed

src/librustc/mir/mono.rs

+77-2
Original file line numberDiff line numberDiff line change
@@ -8,15 +8,16 @@
88
// option. This file may not be copied, modified, or distributed
99
// except according to those terms.
1010

11-
use hir::def_id::DefId;
11+
use hir::def_id::{DefId, CrateNum};
1212
use syntax::ast::NodeId;
13-
use syntax::symbol::InternedString;
13+
use syntax::symbol::{Symbol, InternedString};
1414
use ty::{Instance, TyCtxt};
1515
use util::nodemap::FxHashMap;
1616
use rustc_data_structures::base_n;
1717
use rustc_data_structures::stable_hasher::{HashStable, StableHasherResult,
1818
StableHasher};
1919
use ich::{Fingerprint, StableHashingContext, NodeIdHashingMode};
20+
use std::fmt;
2021
use std::hash::Hash;
2122

2223
#[derive(PartialEq, Eq, Clone, Copy, Debug, Hash)]
@@ -173,6 +174,80 @@ impl<'tcx> CodegenUnit<'tcx> {
173174
self.size_estimate = Some(size_estimate + delta);
174175
}
175176
}
177+
178+
/// CGU names should fulfill the following requirements:
179+
/// - They should be able to act as a file name on any kind of file system
180+
/// - They should not collide with other CGU names, even for different versions
181+
/// of the same crate.
182+
///
183+
/// Consequently, we don't use special characters except for '.' and '-' and we
184+
/// prefix each name with the crate-name and crate-disambiguator.
185+
///
186+
/// This function will build CGU names of the form:
187+
///
188+
/// ```
189+
/// <crate-name>.<crate-disambiguator>(-<component>)*[.<special-suffix>]
190+
/// ```
191+
///
192+
/// The '.' before `<special-suffix>` makes sure that names with a special
193+
/// suffix can never collide with a name built out of regular Rust
194+
/// identifiers (e.g. module paths).
195+
pub fn build_cgu_name<I, C, S>(tcx: TyCtxt,
196+
cnum: CrateNum,
197+
components: I,
198+
special_suffix: Option<S>)
199+
-> InternedString
200+
where I: IntoIterator<Item=C>,
201+
C: fmt::Display,
202+
S: fmt::Display,
203+
{
204+
let cgu_name = CodegenUnit::build_cgu_name_no_mangle(tcx,
205+
cnum,
206+
components,
207+
special_suffix);
208+
209+
if tcx.sess.opts.debugging_opts.human_readable_cgu_names {
210+
cgu_name
211+
} else {
212+
let cgu_name = &cgu_name.as_str()[..];
213+
Symbol::intern(&CodegenUnit::mangle_name(cgu_name)).as_interned_str()
214+
}
215+
}
216+
217+
/// Same as `CodegenUnit::build_cgu_name()` but will never mangle the
218+
/// resulting name.
219+
pub fn build_cgu_name_no_mangle<I, C, S>(tcx: TyCtxt,
220+
cnum: CrateNum,
221+
components: I,
222+
special_suffix: Option<S>)
223+
-> InternedString
224+
where I: IntoIterator<Item=C>,
225+
C: fmt::Display,
226+
S: fmt::Display,
227+
{
228+
use std::fmt::Write;
229+
230+
let mut cgu_name = String::with_capacity(64);
231+
232+
// Start out with the crate name and disambiguator
233+
write!(cgu_name,
234+
"{}.{}",
235+
tcx.crate_name(cnum),
236+
tcx.crate_disambiguator(cnum)).unwrap();
237+
238+
// Add the components
239+
for component in components {
240+
write!(cgu_name, "-{}", component).unwrap();
241+
}
242+
243+
if let Some(special_suffix) = special_suffix {
244+
// We add a dot in here so it cannot clash with anything in a regular
245+
// Rust identifier
246+
write!(cgu_name, ".{}", special_suffix).unwrap();
247+
}
248+
249+
Symbol::intern(&cgu_name[..]).as_interned_str()
250+
}
176251
}
177252

178253
impl<'a, 'tcx> HashStable<StableHashingContext<'a>> for CodegenUnit<'tcx> {

src/librustc/session/mod.rs

+9
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ use util::nodemap::{FxHashMap, FxHashSet};
2626
use util::common::{duration_to_secs_str, ErrorReported};
2727
use util::common::ProfileQueriesMsg;
2828

29+
use rustc_data_structures::base_n;
2930
use rustc_data_structures::sync::{self, Lrc, Lock, LockCell, OneThread, Once, RwLock};
3031

3132
use syntax::ast::NodeId;
@@ -1185,6 +1186,14 @@ impl CrateDisambiguator {
11851186
}
11861187
}
11871188

1189+
impl fmt::Display for CrateDisambiguator {
1190+
fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> {
1191+
let (a, b) = self.0.as_value();
1192+
let as_u128 = a as u128 | ((b as u128) << 64);
1193+
f.write_str(&base_n::encode(as_u128, base_n::CASE_INSENSITIVE))
1194+
}
1195+
}
1196+
11881197
impl From<Fingerprint> for CrateDisambiguator {
11891198
fn from(fingerprint: Fingerprint) -> CrateDisambiguator {
11901199
CrateDisambiguator(fingerprint)

src/librustc_codegen_llvm/back/link.rs

-7
Original file line numberDiff line numberDiff line change
@@ -45,13 +45,6 @@ use std::process::{Output, Stdio};
4545
use std::str;
4646
use syntax::attr;
4747

48-
/// The LLVM module name containing crate-metadata. This includes a `.` on
49-
/// purpose, so it cannot clash with the name of a user-defined module.
50-
pub const METADATA_MODULE_NAME: &'static str = "crate.metadata";
51-
52-
// same as for metadata above, but for allocator shim
53-
pub const ALLOCATOR_MODULE_NAME: &'static str = "crate.allocator";
54-
5548
pub use rustc_codegen_utils::link::{find_crate_name, filename_for_input, default_output_for_target,
5649
invalid_output_for_target, build_link_meta, out_filename,
5750
check_file_is_writeable};

src/librustc_codegen_llvm/back/lto.rs

+142-9
Original file line numberDiff line numberDiff line change
@@ -20,16 +20,23 @@ use rustc::hir::def_id::LOCAL_CRATE;
2020
use rustc::middle::exported_symbols::SymbolExportLevel;
2121
use rustc::session::config::{self, Lto};
2222
use rustc::util::common::time_ext;
23+
use rustc_data_structures::fx::FxHashMap;
2324
use time_graph::Timeline;
2425
use {ModuleCodegen, ModuleLlvm, ModuleKind, ModuleSource};
2526

2627
use libc;
2728

28-
use std::ffi::CString;
29+
use std::ffi::{CString, CStr};
30+
use std::fs::File;
31+
use std::io;
32+
use std::mem;
33+
use std::path::Path;
2934
use std::ptr;
3035
use std::slice;
3136
use std::sync::Arc;
3237

38+
pub const THIN_LTO_IMPORTS_INCR_COMP_FILE_NAME: &str = "thin-lto-imports.bin";
39+
3340
pub fn crate_type_allows_lto(crate_type: config::CrateType) -> bool {
3441
match crate_type {
3542
config::CrateTypeExecutable |
@@ -193,7 +200,7 @@ pub(crate) fn run(cgcx: &CodegenContext,
193200
}
194201
Lto::Thin |
195202
Lto::ThinLocal => {
196-
thin_lto(&diag_handler, modules, upstream_modules, &arr, timeline)
203+
thin_lto(cgcx, &diag_handler, modules, upstream_modules, &arr, timeline)
197204
}
198205
Lto::No => unreachable!(),
199206
}
@@ -231,7 +238,7 @@ fn fat_lto(cgcx: &CodegenContext,
231238
.expect("must be codegen'ing at least one module");
232239
let module = modules.remove(costliest_module);
233240
let llmod = module.llvm().expect("can't lto pre-codegened modules").llmod;
234-
info!("using {:?} as a base module", module.llmod_id);
241+
info!("using {:?} as a base module", module.name);
235242

236243
// For all other modules we codegened we'll need to link them into our own
237244
// bitcode. All modules were codegened in their own LLVM context, however,
@@ -241,7 +248,7 @@ fn fat_lto(cgcx: &CodegenContext,
241248
for module in modules {
242249
let llvm = module.llvm().expect("can't lto pre-codegened modules");
243250
let buffer = ModuleBuffer::new(llvm.llmod);
244-
let llmod_id = CString::new(&module.llmod_id[..]).unwrap();
251+
let llmod_id = CString::new(&module.name[..]).unwrap();
245252
serialized_modules.push((SerializedModule::Local(buffer), llmod_id));
246253
}
247254

@@ -346,7 +353,8 @@ impl Drop for Linker {
346353
/// calculating the *index* for ThinLTO. This index will then be shared amongst
347354
/// all of the `LtoModuleCodegen` units returned below and destroyed once
348355
/// they all go out of scope.
349-
fn thin_lto(diag_handler: &Handler,
356+
fn thin_lto(cgcx: &CodegenContext,
357+
diag_handler: &Handler,
350358
modules: Vec<ModuleCodegen>,
351359
serialized_modules: Vec<(SerializedModule, CString)>,
352360
symbol_white_list: &[*const libc::c_char],
@@ -368,9 +376,9 @@ fn thin_lto(diag_handler: &Handler,
368376
// the most expensive portion of this small bit of global
369377
// analysis!
370378
for (i, module) in modules.iter().enumerate() {
371-
info!("local module: {} - {}", i, module.llmod_id);
379+
info!("local module: {} - {}", i, module.name);
372380
let llvm = module.llvm().expect("can't lto precodegened module");
373-
let name = CString::new(module.llmod_id.clone()).unwrap();
381+
let name = CString::new(module.name.clone()).unwrap();
374382
let buffer = ThinBuffer::new(llvm.llmod);
375383
thin_modules.push(llvm::ThinLTOModule {
376384
identifier: name.as_ptr(),
@@ -379,7 +387,7 @@ fn thin_lto(diag_handler: &Handler,
379387
});
380388
thin_buffers.push(buffer);
381389
module_names.push(name);
382-
timeline.record(&module.llmod_id);
390+
timeline.record(&module.name);
383391
}
384392

385393
// FIXME: All upstream crates are deserialized internally in the
@@ -424,6 +432,18 @@ fn thin_lto(diag_handler: &Handler,
424432
let msg = format!("failed to prepare thin LTO context");
425433
return Err(write::llvm_err(&diag_handler, msg))
426434
}
435+
436+
// Save the ThinLTO import information for incremental compilation.
437+
if let Some(ref incr_comp_session_dir) = cgcx.incr_comp_session_dir {
438+
let path = incr_comp_session_dir.join(THIN_LTO_IMPORTS_INCR_COMP_FILE_NAME);
439+
let imports = ThinLTOImports::from_thin_lto_data(data);
440+
if let Err(err) = imports.save_to_file(&path) {
441+
let msg = format!("Error while writing ThinLTO import data: {}",
442+
err);
443+
return Err(write::llvm_err(&diag_handler, msg));
444+
}
445+
}
446+
427447
let data = ThinData(data);
428448
info!("thin LTO data created");
429449
timeline.record("data");
@@ -656,7 +676,6 @@ impl ThinModule {
656676
llcx,
657677
tm,
658678
}),
659-
llmod_id: self.name().to_string(),
660679
name: self.name().to_string(),
661680
kind: ModuleKind::Regular,
662681
};
@@ -776,3 +795,117 @@ impl ThinModule {
776795
Ok(module)
777796
}
778797
}
798+
799+
800+
#[derive(Debug)]
801+
pub struct ThinLTOImports {
802+
// key = llvm name of importing module, value = list of modules it imports from
803+
imports: FxHashMap<String, Vec<String>>,
804+
}
805+
806+
impl ThinLTOImports {
807+
808+
pub fn new() -> ThinLTOImports {
809+
ThinLTOImports {
810+
imports: FxHashMap(),
811+
}
812+
}
813+
814+
/// Load the ThinLTO import map from ThinLTOData.
815+
unsafe fn from_thin_lto_data(data: *const llvm::ThinLTOData) -> ThinLTOImports {
816+
817+
fn module_name_to_str(c_str: &CStr) -> &str {
818+
match c_str.to_str() {
819+
Ok(s) => s,
820+
Err(e) => {
821+
bug!("Encountered non-utf8 LLVM module name `{}`: {}",
822+
c_str.to_string_lossy(),
823+
e)
824+
}
825+
}
826+
}
827+
828+
unsafe extern "C" fn imported_module_callback(payload: *mut libc::c_void,
829+
importing_module_name: *const libc::c_char,
830+
imported_module_name: *const libc::c_char) {
831+
let map = &mut* (payload as *mut ThinLTOImports);
832+
833+
let importing_module_name = CStr::from_ptr(importing_module_name);
834+
let importing_module_name = module_name_to_str(&importing_module_name);
835+
let imported_module_name = CStr::from_ptr(imported_module_name);
836+
let imported_module_name = module_name_to_str(&imported_module_name);
837+
838+
if !map.imports.contains_key(importing_module_name) {
839+
map.imports.insert(importing_module_name.to_owned(), vec![]);
840+
}
841+
842+
map.imports
843+
.get_mut(importing_module_name)
844+
.unwrap()
845+
.push(imported_module_name.to_owned());
846+
}
847+
848+
let mut map = ThinLTOImports {
849+
imports: FxHashMap(),
850+
};
851+
852+
llvm::LLVMRustGetThinLTOModuleImports(data,
853+
imported_module_callback,
854+
&mut map as *mut _ as *mut libc::c_void);
855+
map
856+
}
857+
858+
pub fn save_to_file(&self, path: &Path) -> io::Result<()> {
859+
use std::io::Write;
860+
861+
let file = File::create(path)?;
862+
let mut writer = io::BufWriter::new(file);
863+
864+
for (importing_module_name, imported_modules) in &self.imports {
865+
writeln!(writer, "{}", importing_module_name)?;
866+
867+
for imported_module in imported_modules {
868+
writeln!(writer, " {}", imported_module)?;
869+
}
870+
871+
writeln!(writer)?;
872+
}
873+
874+
Ok(())
875+
}
876+
877+
pub fn load_from_file(path: &Path) -> io::Result<ThinLTOImports> {
878+
use std::io::BufRead;
879+
880+
let mut imports = FxHashMap();
881+
let mut current_module = None;
882+
let mut current_imports = vec![];
883+
884+
let file = File::open(path)?;
885+
886+
for line in io::BufReader::new(file).lines() {
887+
let line = line?;
888+
889+
if line.is_empty() {
890+
let importing_module = current_module
891+
.take()
892+
.expect("Importing module not set");
893+
894+
imports.insert(importing_module,
895+
mem::replace(&mut current_imports, vec![]));
896+
} else if line.starts_with(" ") {
897+
// This is an imported module
898+
assert_ne!(current_module, None);
899+
current_imports.push(line.trim().to_string());
900+
} else {
901+
// This is the beginning of a new module
902+
assert_eq!(current_module, None);
903+
current_module = Some(line.trim().to_string());
904+
}
905+
}
906+
907+
Ok(ThinLTOImports {
908+
imports
909+
})
910+
}
911+
}

src/librustc_codegen_llvm/back/write.rs

+1-2
Original file line numberDiff line numberDiff line change
@@ -696,7 +696,7 @@ unsafe fn codegen(cgcx: &CodegenContext,
696696

697697
if config.emit_bc_compressed {
698698
let dst = bc_out.with_extension(RLIB_BYTECODE_EXTENSION);
699-
let data = bytecode::encode(&module.llmod_id, data);
699+
let data = bytecode::encode(&module.name, data);
700700
if let Err(e) = fs::write(&dst, data) {
701701
diag_handler.err(&format!("failed to write bytecode: {}", e));
702702
}
@@ -1308,7 +1308,6 @@ fn execute_work_item(cgcx: &CodegenContext,
13081308
assert_eq!(bytecode_compressed.is_some(), config.emit_bc_compressed);
13091309

13101310
Ok(WorkItemResult::Compiled(CompiledModule {
1311-
llmod_id: module.llmod_id.clone(),
13121311
name: module_name,
13131312
kind: ModuleKind::Regular,
13141313
pre_existing: true,

0 commit comments

Comments
 (0)