Skip to content

Commit 0c34122

Browse files
committed
Auto merge of rust-lang#83084 - nagisa:nagisa/features-native, r=petrochenkov
Adjust `-Ctarget-cpu=native` handling in cg_llvm When cg_llvm encounters the `-Ctarget-cpu=native` it computes an explciit set of features that applies to the target in order to correctly compile code for the host CPU (because e.g. `skylake` alone is not sufficient to tell if some of the instructions are available or not). However there were a couple of issues with how we did this. Firstly, the order in which features were overriden wasn't quite right – conceptually you'd expect `-Ctarget-cpu=native` option to override the features that are implicitly set by the target definition. However due to how other `-Ctarget-cpu` values are handled we must adopt the following order of priority: * Features from -Ctarget-cpu=*; are overriden by * Features implied by --target; are overriden by * Features from -Ctarget-feature; are overriden by * function specific features. Another problem was in that the function level `target-features` attribute would overwrite the entire set of the globally enabled features, rather than just the features the `#[target_feature(enable/disable)]` specified. With something like `-Ctarget-cpu=native` we'd end up in a situation wherein a function without `#[target_feature(enable)]` annotation would have a broader set of features compared to a function with one such attribute. This turned out to be a cause of heavy run-time regressions in some code using these function-level attributes in conjunction with `-Ctarget-cpu=native`, for example. With this PR rustc is more careful about specifying the entire set of features for functions that use `#[target_feature(enable/disable)]` or `#[instruction_set]` attributes. Sadly testing the original reproducer for this behaviour is quite impossible – we cannot rely on `-Ctarget-cpu=native` to be anything in particular on developer or CI machines. cc rust-lang#83027 `@BurntSushi`
2 parents e655fb6 + 72fb437 commit 0c34122

File tree

7 files changed

+155
-52
lines changed

7 files changed

+155
-52
lines changed

compiler/rustc_codegen_llvm/src/attributes.rs

+10-20
Original file line numberDiff line numberDiff line change
@@ -152,18 +152,6 @@ fn set_probestack(cx: &CodegenCx<'ll, '_>, llfn: &'ll Value) {
152152
}
153153
}
154154

155-
pub fn llvm_target_features(sess: &Session) -> impl Iterator<Item = &str> {
156-
const RUSTC_SPECIFIC_FEATURES: &[&str] = &["crt-static"];
157-
158-
let cmdline = sess
159-
.opts
160-
.cg
161-
.target_feature
162-
.split(',')
163-
.filter(|f| !RUSTC_SPECIFIC_FEATURES.iter().any(|s| f.contains(s)));
164-
sess.target.features.split(',').chain(cmdline).filter(|l| !l.is_empty())
165-
}
166-
167155
pub fn apply_target_cpu_attr(cx: &CodegenCx<'ll, '_>, llfn: &'ll Value) {
168156
let target_cpu = SmallCStr::new(llvm_util::target_cpu(cx.tcx.sess));
169157
llvm::AddFunctionAttrStringValue(
@@ -301,20 +289,22 @@ pub fn from_fn_attrs(cx: &CodegenCx<'ll, 'tcx>, llfn: &'ll Value, instance: ty::
301289
// The target doesn't care; the subtarget reads our attribute.
302290
apply_tune_cpu_attr(cx, llfn);
303291

304-
let features = llvm_target_features(cx.tcx.sess)
305-
.map(|s| s.to_string())
306-
.chain(codegen_fn_attrs.target_features.iter().map(|f| {
292+
let function_features = codegen_fn_attrs
293+
.target_features
294+
.iter()
295+
.map(|f| {
307296
let feature = &f.as_str();
308297
format!("+{}", llvm_util::to_llvm_feature(cx.tcx.sess, feature))
309-
}))
298+
})
310299
.chain(codegen_fn_attrs.instruction_set.iter().map(|x| match x {
311300
InstructionSetAttr::ArmA32 => "-thumb-mode".to_string(),
312301
InstructionSetAttr::ArmT32 => "+thumb-mode".to_string(),
313302
}))
314-
.collect::<Vec<String>>()
315-
.join(",");
316-
317-
if !features.is_empty() {
303+
.collect::<Vec<String>>();
304+
if !function_features.is_empty() {
305+
let mut global_features = llvm_util::llvm_global_features(cx.tcx.sess);
306+
global_features.extend(function_features.into_iter());
307+
let features = global_features.join(",");
318308
let val = CString::new(features).unwrap();
319309
llvm::AddFunctionAttrStringValue(
320310
llfn,

compiler/rustc_codegen_llvm/src/back/write.rs

+1-4
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,3 @@
1-
use crate::attributes;
21
use crate::back::lto::ThinBuffer;
32
use crate::back::profiling::{
43
selfprofile_after_pass_callback, selfprofile_before_pass_callback, LlvmSelfProfiler,
@@ -166,8 +165,6 @@ pub fn target_machine_factory(
166165

167166
let code_model = to_llvm_code_model(sess.code_model());
168167

169-
let mut features = llvm_util::handle_native_features(sess);
170-
features.extend(attributes::llvm_target_features(sess).map(|s| s.to_owned()));
171168
let mut singlethread = sess.target.singlethread;
172169

173170
// On the wasm target once the `atomics` feature is enabled that means that
@@ -182,7 +179,7 @@ pub fn target_machine_factory(
182179

183180
let triple = SmallCStr::new(&sess.target.llvm_target);
184181
let cpu = SmallCStr::new(llvm_util::target_cpu(sess));
185-
let features = features.join(",");
182+
let features = llvm_util::llvm_global_features(sess).join(",");
186183
let features = CString::new(features).unwrap();
187184
let abi = SmallCStr::new(&sess.target.llvm_abiname);
188185
let trap_unreachable =

compiler/rustc_codegen_llvm/src/llvm_util.rs

+56-10
Original file line numberDiff line numberDiff line change
@@ -218,13 +218,39 @@ pub fn target_cpu(sess: &Session) -> &str {
218218
handle_native(name)
219219
}
220220

221-
pub fn handle_native_features(sess: &Session) -> Vec<String> {
221+
/// The list of LLVM features computed from CLI flags (`-Ctarget-cpu`, `-Ctarget-feature`,
222+
/// `--target` and similar).
223+
// FIXME(nagisa): Cache the output of this somehow? Maybe make this a query? We're calling this
224+
// for every function that has `#[target_feature]` on it. The global features won't change between
225+
// the functions; only crates, maybe…
226+
pub fn llvm_global_features(sess: &Session) -> Vec<String> {
227+
// FIXME(nagisa): this should definitely be available more centrally and to other codegen backends.
228+
/// These features control behaviour of rustc rather than llvm.
229+
const RUSTC_SPECIFIC_FEATURES: &[&str] = &["crt-static"];
230+
231+
// Features that come earlier are overriden by conflicting features later in the string.
232+
// Typically we'll want more explicit settings to override the implicit ones, so:
233+
//
234+
// * Features from -Ctarget-cpu=*; are overriden by [^1]
235+
// * Features implied by --target; are overriden by
236+
// * Features from -Ctarget-feature; are overriden by
237+
// * function specific features.
238+
//
239+
// [^1]: target-cpu=native is handled here, other target-cpu values are handled implicitly
240+
// through LLVM TargetMachine implementation.
241+
//
242+
// FIXME(nagisa): it isn't clear what's the best interaction between features implied by
243+
// `-Ctarget-cpu` and `--target` are. On one hand, you'd expect CLI arguments to always
244+
// override anything that's implicit, so e.g. when there's no `--target` flag, features implied
245+
// the host target are overriden by `-Ctarget-cpu=*`. On the other hand, what about when both
246+
// `--target` and `-Ctarget-cpu=*` are specified? Both then imply some target features and both
247+
// flags are specified by the user on the CLI. It isn't as clear-cut which order of precedence
248+
// should be taken in cases like these.
249+
let mut features = vec![];
250+
251+
// -Ctarget-cpu=native
222252
match sess.opts.cg.target_cpu {
223-
Some(ref s) => {
224-
if s != "native" {
225-
return vec![];
226-
}
227-
253+
Some(ref s) if s == "native" => {
228254
let features_string = unsafe {
229255
let ptr = llvm::LLVMGetHostCPUFeatures();
230256
let features_string = if !ptr.is_null() {
@@ -242,11 +268,31 @@ pub fn handle_native_features(sess: &Session) -> Vec<String> {
242268

243269
features_string
244270
};
245-
246-
features_string.split(",").map(|s| s.to_owned()).collect()
271+
features.extend(features_string.split(",").map(String::from));
247272
}
248-
None => vec![],
249-
}
273+
Some(_) | None => {}
274+
};
275+
276+
// Features implied by an implicit or explicit `--target`.
277+
features.extend(
278+
sess.target
279+
.features
280+
.split(',')
281+
.filter(|f| !f.is_empty() && !RUSTC_SPECIFIC_FEATURES.iter().any(|s| f.contains(s)))
282+
.map(String::from),
283+
);
284+
285+
// -Ctarget-features
286+
features.extend(
287+
sess.opts
288+
.cg
289+
.target_feature
290+
.split(',')
291+
.filter(|f| !f.is_empty() && !RUSTC_SPECIFIC_FEATURES.iter().any(|s| f.contains(s)))
292+
.map(String::from),
293+
);
294+
295+
features
250296
}
251297

252298
pub fn tune_cpu(sess: &Session) -> Option<&str> {
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
// assembly-output: emit-asm
2+
// needs-llvm-components: x86
3+
// revisions: TWOFLAGS SINGLEFLAG
4+
// compile-flags: --target=x86_64-unknown-linux-gnu
5+
// [TWOFLAGS] compile-flags: -C target-feature=+rdrnd -C target-feature=+rdseed
6+
// [SINGLEFLAG] compile-flags: -C target-feature=+rdrnd,+rdseed
7+
8+
// Target features set via flags aren't necessarily reflected in the IR, so the only way to test
9+
// them is to build code that requires the features to be enabled to work.
10+
//
11+
// In this particular test if `rdrnd,rdseed` somehow didn't make it to LLVM, the instruction
12+
// selection should crash.
13+
//
14+
// > LLVM ERROR: Cannot select: 0x7f00f400c010: i32,i32,ch = X86ISD::RDSEED 0x7f00f400bfa8:2
15+
// > In function: foo
16+
//
17+
// See also src/test/codegen/target-feature-overrides.rs
18+
#![feature(no_core, lang_items, link_llvm_intrinsics, abi_unadjusted)]
19+
#![crate_type = "lib"]
20+
#![no_core]
21+
22+
#[lang = "sized"]
23+
trait Sized {}
24+
#[lang = "copy"]
25+
trait Copy {}
26+
27+
// Use of these requires target features to be enabled
28+
extern "unadjusted" {
29+
#[link_name = "llvm.x86.rdrand.32"]
30+
fn x86_rdrand32_step() -> (u32, i32);
31+
#[link_name = "llvm.x86.rdseed.32"]
32+
fn x86_rdseed32_step() -> (u32, i32);
33+
}
34+
35+
#[no_mangle]
36+
pub unsafe fn foo() -> (u32, u32) {
37+
// CHECK-LABEL: foo:
38+
// CHECK: rdrand
39+
// CHECK: rdseed
40+
(x86_rdrand32_step().0, x86_rdseed32_step().0)
41+
}

src/test/codegen/target-feature-multiple.rs

-9
This file was deleted.

src/test/codegen/target-feature-on-functions.rs

-9
This file was deleted.
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
// revisions: COMPAT INCOMPAT
2+
// needs-llvm-components: x86
3+
// compile-flags: --target=x86_64-unknown-linux-gnu -Copt-level=3
4+
// [COMPAT] compile-flags: -Ctarget-feature=+avx2,+avx
5+
// [INCOMPAT] compile-flags: -Ctarget-feature=-avx2,-avx
6+
7+
// See also src/test/assembly/target-feature-multiple.rs
8+
#![feature(no_core, lang_items)]
9+
#![crate_type = "lib"]
10+
#![no_core]
11+
12+
13+
#[lang = "sized"]
14+
trait Sized {}
15+
#[lang = "copy"]
16+
trait Copy {}
17+
18+
extern "C" {
19+
fn peach() -> u32;
20+
}
21+
22+
#[inline]
23+
#[target_feature(enable = "avx")]
24+
#[no_mangle]
25+
pub unsafe fn apple() -> u32 {
26+
// CHECK-LABEL: @apple()
27+
// CHECK-SAME: [[APPLEATTRS:#[0-9]+]] {
28+
// CHECK: {{.*}}call{{.*}}@peach
29+
peach()
30+
}
31+
32+
// target features same as global (not reflected or overriden in IR)
33+
#[no_mangle]
34+
pub unsafe fn banana() -> u32 {
35+
// CHECK-LABEL: @banana()
36+
// CHECK-SAME: [[BANANAATTRS:#[0-9]+]] {
37+
// COMPAT: {{.*}}call{{.*}}@peach
38+
// INCOMPAT: {{.*}}call{{.*}}@apple
39+
apple() // Compatible for inline in COMPAT revision and can't be inlined in INCOMPAT
40+
}
41+
42+
// CHECK: attributes [[APPLEATTRS]]
43+
// COMPAT-SAME: "target-features"="+avx2,+avx,+avx"
44+
// INCOMPAT-SAME: "target-features"="-avx2,-avx,+avx"
45+
// CHECK: attributes [[BANANAATTRS]]
46+
// CHECK-NOT: target-features
47+
// CHECK-SAME: }

0 commit comments

Comments
 (0)