Skip to content

Commit 52dd1cd

Browse files
committed
Auto merge of #107294 - JamieCunliffe:neon-fp, r=Amanieu
Fix some issues with folded AArch64 features In #91608 the `fp` feature was removed for AArch64 and folded into the `neon` feature, however disabling the `neon` feature doesn't actually disable the `fp` feature. If my understanding on that thread is correct it should do. While doing this, I also noticed that disabling some features would disable features that it shouldn't. For instance enabling `sve` will enable `neon`, however, when disabling `sve` it would then also disable `neon`, I wouldn't expect disabling `sve` to also disable `neon`. cc `@workingjubilee`
2 parents b08148f + a059e68 commit 52dd1cd

File tree

3 files changed

+158
-38
lines changed

3 files changed

+158
-38
lines changed

compiler/rustc_codegen_llvm/src/lib.rs

+1
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
#![feature(iter_intersperse)]
1111
#![feature(let_chains)]
1212
#![feature(never_type)]
13+
#![feature(impl_trait_in_assoc_type)]
1314
#![recursion_limit = "256"]
1415
#![allow(rustc::potential_query_instability)]
1516
#![deny(rustc::untranslatable_diagnostic)]

compiler/rustc_codegen_llvm/src/llvm_util.rs

+128-38
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,6 @@ use rustc_session::config::PrintRequest;
1616
use rustc_session::Session;
1717
use rustc_span::symbol::Symbol;
1818
use rustc_target::spec::{MergeFunctions, PanicStrategy};
19-
use smallvec::{smallvec, SmallVec};
2019
use std::ffi::{CStr, CString};
2120

2221
use std::path::Path;
@@ -132,6 +131,60 @@ pub fn time_trace_profiler_finish(file_name: &Path) {
132131
}
133132
}
134133

134+
pub enum TargetFeatureFoldStrength<'a> {
135+
// The feature is only tied when enabling the feature, disabling
136+
// this feature shouldn't disable the tied feature.
137+
EnableOnly(&'a str),
138+
// The feature is tied for both enabling and disabling this feature.
139+
Both(&'a str),
140+
}
141+
142+
impl<'a> TargetFeatureFoldStrength<'a> {
143+
fn as_str(&self) -> &'a str {
144+
match self {
145+
TargetFeatureFoldStrength::EnableOnly(feat) => feat,
146+
TargetFeatureFoldStrength::Both(feat) => feat,
147+
}
148+
}
149+
}
150+
151+
pub struct LLVMFeature<'a> {
152+
pub llvm_feature_name: &'a str,
153+
pub dependency: Option<TargetFeatureFoldStrength<'a>>,
154+
}
155+
156+
impl<'a> LLVMFeature<'a> {
157+
pub fn new(llvm_feature_name: &'a str) -> Self {
158+
Self { llvm_feature_name, dependency: None }
159+
}
160+
161+
pub fn with_dependency(
162+
llvm_feature_name: &'a str,
163+
dependency: TargetFeatureFoldStrength<'a>,
164+
) -> Self {
165+
Self { llvm_feature_name, dependency: Some(dependency) }
166+
}
167+
168+
pub fn contains(&self, feat: &str) -> bool {
169+
self.iter().any(|dep| dep == feat)
170+
}
171+
172+
pub fn iter(&'a self) -> impl Iterator<Item = &'a str> {
173+
let dependencies = self.dependency.iter().map(|feat| feat.as_str());
174+
std::iter::once(self.llvm_feature_name).chain(dependencies)
175+
}
176+
}
177+
178+
impl<'a> IntoIterator for LLVMFeature<'a> {
179+
type Item = &'a str;
180+
type IntoIter = impl Iterator<Item = &'a str>;
181+
182+
fn into_iter(self) -> Self::IntoIter {
183+
let dependencies = self.dependency.into_iter().map(|feat| feat.as_str());
184+
std::iter::once(self.llvm_feature_name).chain(dependencies)
185+
}
186+
}
187+
135188
// WARNING: the features after applying `to_llvm_features` must be known
136189
// to LLVM or the feature detection code will walk past the end of the feature
137190
// array, leading to crashes.
@@ -147,36 +200,65 @@ pub fn time_trace_profiler_finish(file_name: &Path) {
147200
// Though note that Rust can also be build with an external precompiled version of LLVM
148201
// which might lead to failures if the oldest tested / supported LLVM version
149202
// doesn't yet support the relevant intrinsics
150-
pub fn to_llvm_features<'a>(sess: &Session, s: &'a str) -> SmallVec<[&'a str; 2]> {
203+
pub fn to_llvm_features<'a>(sess: &Session, s: &'a str) -> LLVMFeature<'a> {
151204
let arch = if sess.target.arch == "x86_64" { "x86" } else { &*sess.target.arch };
152205
match (arch, s) {
153-
("x86", "sse4.2") => smallvec!["sse4.2", "crc32"],
154-
("x86", "pclmulqdq") => smallvec!["pclmul"],
155-
("x86", "rdrand") => smallvec!["rdrnd"],
156-
("x86", "bmi1") => smallvec!["bmi"],
157-
("x86", "cmpxchg16b") => smallvec!["cx16"],
158-
("aarch64", "rcpc2") => smallvec!["rcpc-immo"],
159-
("aarch64", "dpb") => smallvec!["ccpp"],
160-
("aarch64", "dpb2") => smallvec!["ccdp"],
161-
("aarch64", "frintts") => smallvec!["fptoint"],
162-
("aarch64", "fcma") => smallvec!["complxnum"],
163-
("aarch64", "pmuv3") => smallvec!["perfmon"],
164-
("aarch64", "paca") => smallvec!["pauth"],
165-
("aarch64", "pacg") => smallvec!["pauth"],
166-
// Rust ties fp and neon together. In LLVM neon implicitly enables fp,
167-
// but we manually enable neon when a feature only implicitly enables fp
168-
("aarch64", "f32mm") => smallvec!["f32mm", "neon"],
169-
("aarch64", "f64mm") => smallvec!["f64mm", "neon"],
170-
("aarch64", "fhm") => smallvec!["fp16fml", "neon"],
171-
("aarch64", "fp16") => smallvec!["fullfp16", "neon"],
172-
("aarch64", "jsconv") => smallvec!["jsconv", "neon"],
173-
("aarch64", "sve") => smallvec!["sve", "neon"],
174-
("aarch64", "sve2") => smallvec!["sve2", "neon"],
175-
("aarch64", "sve2-aes") => smallvec!["sve2-aes", "neon"],
176-
("aarch64", "sve2-sm4") => smallvec!["sve2-sm4", "neon"],
177-
("aarch64", "sve2-sha3") => smallvec!["sve2-sha3", "neon"],
178-
("aarch64", "sve2-bitperm") => smallvec!["sve2-bitperm", "neon"],
179-
(_, s) => smallvec![s],
206+
("x86", "sse4.2") => {
207+
LLVMFeature::with_dependency("sse4.2", TargetFeatureFoldStrength::EnableOnly("crc32"))
208+
}
209+
("x86", "pclmulqdq") => LLVMFeature::new("pclmul"),
210+
("x86", "rdrand") => LLVMFeature::new("rdrnd"),
211+
("x86", "bmi1") => LLVMFeature::new("bmi"),
212+
("x86", "cmpxchg16b") => LLVMFeature::new("cx16"),
213+
("aarch64", "rcpc2") => LLVMFeature::new("rcpc-immo"),
214+
("aarch64", "dpb") => LLVMFeature::new("ccpp"),
215+
("aarch64", "dpb2") => LLVMFeature::new("ccdp"),
216+
("aarch64", "frintts") => LLVMFeature::new("fptoint"),
217+
("aarch64", "fcma") => LLVMFeature::new("complxnum"),
218+
("aarch64", "pmuv3") => LLVMFeature::new("perfmon"),
219+
("aarch64", "paca") => LLVMFeature::new("pauth"),
220+
("aarch64", "pacg") => LLVMFeature::new("pauth"),
221+
// Rust ties fp and neon together.
222+
("aarch64", "neon") => {
223+
LLVMFeature::with_dependency("neon", TargetFeatureFoldStrength::Both("fp-armv8"))
224+
}
225+
// In LLVM neon implicitly enables fp, but we manually enable
226+
// neon when a feature only implicitly enables fp
227+
("aarch64", "f32mm") => {
228+
LLVMFeature::with_dependency("f32mm", TargetFeatureFoldStrength::EnableOnly("neon"))
229+
}
230+
("aarch64", "f64mm") => {
231+
LLVMFeature::with_dependency("f64mm", TargetFeatureFoldStrength::EnableOnly("neon"))
232+
}
233+
("aarch64", "fhm") => {
234+
LLVMFeature::with_dependency("fp16fml", TargetFeatureFoldStrength::EnableOnly("neon"))
235+
}
236+
("aarch64", "fp16") => {
237+
LLVMFeature::with_dependency("fullfp16", TargetFeatureFoldStrength::EnableOnly("neon"))
238+
}
239+
("aarch64", "jsconv") => {
240+
LLVMFeature::with_dependency("jsconv", TargetFeatureFoldStrength::EnableOnly("neon"))
241+
}
242+
("aarch64", "sve") => {
243+
LLVMFeature::with_dependency("sve", TargetFeatureFoldStrength::EnableOnly("neon"))
244+
}
245+
("aarch64", "sve2") => {
246+
LLVMFeature::with_dependency("sve2", TargetFeatureFoldStrength::EnableOnly("neon"))
247+
}
248+
("aarch64", "sve2-aes") => {
249+
LLVMFeature::with_dependency("sve2-aes", TargetFeatureFoldStrength::EnableOnly("neon"))
250+
}
251+
("aarch64", "sve2-sm4") => {
252+
LLVMFeature::with_dependency("sve2-sm4", TargetFeatureFoldStrength::EnableOnly("neon"))
253+
}
254+
("aarch64", "sve2-sha3") => {
255+
LLVMFeature::with_dependency("sve2-sha3", TargetFeatureFoldStrength::EnableOnly("neon"))
256+
}
257+
("aarch64", "sve2-bitperm") => LLVMFeature::with_dependency(
258+
"sve2-bitperm",
259+
TargetFeatureFoldStrength::EnableOnly("neon"),
260+
),
261+
(_, s) => LLVMFeature::new(s),
180262
}
181263
}
182264

@@ -274,18 +356,17 @@ fn print_target_features(sess: &Session, tm: &llvm::TargetMachine) {
274356
let mut rustc_target_features = supported_target_features(sess)
275357
.iter()
276358
.map(|(feature, _gate)| {
277-
let desc = if let Some(llvm_feature) = to_llvm_features(sess, *feature).first() {
278-
// LLVM asserts that these are sorted. LLVM and Rust both use byte comparison for these strings.
359+
// LLVM asserts that these are sorted. LLVM and Rust both use byte comparison for these strings.
360+
let llvm_feature = to_llvm_features(sess, *feature).llvm_feature_name;
361+
let desc =
279362
match llvm_target_features.binary_search_by_key(&llvm_feature, |(f, _d)| f).ok() {
280363
Some(index) => {
281364
known_llvm_target_features.insert(llvm_feature);
282365
llvm_target_features[index].1
283366
}
284367
None => "",
285-
}
286-
} else {
287-
""
288-
};
368+
};
369+
289370
(*feature, desc)
290371
})
291372
.collect::<Vec<_>>();
@@ -469,10 +550,19 @@ pub(crate) fn global_llvm_features(sess: &Session, diagnostics: bool) -> Vec<Str
469550
// passing requests down to LLVM. This means that all in-language
470551
// features also work on the command line instead of having two
471552
// different names when the LLVM name and the Rust name differ.
553+
let llvm_feature = to_llvm_features(sess, feature);
554+
472555
Some(
473-
to_llvm_features(sess, feature)
474-
.into_iter()
475-
.map(move |f| format!("{}{}", enable_disable, f)),
556+
std::iter::once(format!("{}{}", enable_disable, llvm_feature.llvm_feature_name))
557+
.chain(llvm_feature.dependency.into_iter().filter_map(move |feat| {
558+
match (enable_disable, feat) {
559+
('-' | '+', TargetFeatureFoldStrength::Both(f))
560+
| ('+', TargetFeatureFoldStrength::EnableOnly(f)) => {
561+
Some(format!("{}{}", enable_disable, f))
562+
}
563+
_ => None,
564+
}
565+
})),
476566
)
477567
})
478568
.flatten();
+29
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
// ignore-tidy-linelength
2+
// revisions: ENABLE_SVE DISABLE_SVE DISABLE_NEON ENABLE_NEON
3+
// compile-flags: --crate-type=rlib --target=aarch64-unknown-linux-gnu
4+
// needs-llvm-components: aarch64
5+
6+
// The "+v8a" feature is matched as optional as it isn't added when we
7+
// are targeting older LLVM versions. Once the min supported version
8+
// is LLVM-14 we can remove the optional regex matching for this feature.
9+
10+
// [ENABLE_SVE] compile-flags: -C target-feature=+sve
11+
// ENABLE_SVE: attributes #0 = { {{.*}} "target-features"="{{((\+outline-atomics,?)|(\+v8a,?)?|(\+sve,?)|(\+neon,?))*}}" }
12+
13+
// [DISABLE_SVE] compile-flags: -C target-feature=-sve
14+
// DISABLE_SVE: attributes #0 = { {{.*}} "target-features"="{{((\+outline-atomics,?)|(\+v8a,?)?|(-sve,?)|(\+neon,?))*}}" }
15+
16+
// [DISABLE_NEON] compile-flags: -C target-feature=-neon
17+
// DISABLE_NEON: attributes #0 = { {{.*}} "target-features"="{{((\+outline-atomics,?)|(\+v8a,?)?|(-fp-armv8,?)|(-neon,?))*}}" }
18+
19+
// [ENABLE_NEON] compile-flags: -C target-feature=+neon
20+
// ENABLE_NEON: attributes #0 = { {{.*}} "target-features"="{{((\+outline-atomics,?)|(\+v8a,?)?|(\+fp-armv8,?)|(\+neon,?))*}}" }
21+
22+
23+
#![feature(no_core, lang_items)]
24+
#![no_core]
25+
26+
#[lang = "sized"]
27+
trait Sized {}
28+
29+
pub fn test() {}

0 commit comments

Comments
 (0)