Skip to content

Commit 481ba16

Browse files
committed
Auto merge of rust-lang#84339 - alexcrichton:llvm-fptoint-sat, r=nagisa
rustc: Use LLVM's new saturating float-to-int intrinsics This commit updates rustc, with an applicable LLVM version, to use LLVM's new `llvm.fpto{u,s}i.sat.*.*` intrinsics to implement saturating floating-point-to-int conversions. This results in a little bit tighter codegen for x86/x86_64, but the main purpose of this is to prepare for upcoming changes to the WebAssembly backend in LLVM where wasm's saturating float-to-int instructions will now be implemented with these intrinsics. This change allows simplifying a good deal of surrounding code, namely removing a lot of wasm-specific behavior. WebAssembly no longer has any special-casing of saturating arithmetic instructions and the need for `fptoint_may_trap` is gone and all handling code for that is now removed. This means that the only wasm-specific logic is in the `fpto{s,u}i` instructions which only get used for "out of bounds is undefined behavior". This does mean that for the WebAssembly target specifically the Rust compiler will no longer be 100% compatible with pre-LLVM 12 versions, but it seems like that's unlikely to be relied on by too many folks. Note that this change does immediately regress the codegen of saturating float-to-int casts on WebAssembly due to the specialization of the LLVM intrinsic not being present in our LLVM fork just yet. I'll be following up with an LLVM update to pull in those patches, but affects a few other SIMD things in flight for WebAssembly so I wanted to separate this change. Eventually the entire `cast_float_to_int` function can be removed when LLVM 12 is the minimum version, but that will require sinking the complexity of it into other backends such as Cranelfit.
2 parents 484c619 + ed6dd40 commit 481ba16

File tree

7 files changed

+104
-378
lines changed

7 files changed

+104
-378
lines changed

compiler/rustc_codegen_llvm/src/builder.rs

+34-63
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ use crate::common::Funclet;
22
use crate::context::CodegenCx;
33
use crate::llvm::{self, BasicBlock, False};
44
use crate::llvm::{AtomicOrdering, AtomicRmwBinOp, SynchronizationScope};
5+
use crate::llvm_util;
56
use crate::type_::Type;
67
use crate::type_of::LayoutLlvmExt;
78
use crate::value::Value;
@@ -16,7 +17,7 @@ use rustc_data_structures::small_c_str::SmallCStr;
1617
use rustc_hir::def_id::DefId;
1718
use rustc_middle::ty::layout::TyAndLayout;
1819
use rustc_middle::ty::{self, Ty, TyCtxt};
19-
use rustc_span::{sym, Span};
20+
use rustc_span::Span;
2021
use rustc_target::abi::{self, Align, Size};
2122
use rustc_target::spec::{HasTargetSpec, Target};
2223
use std::borrow::Cow;
@@ -669,81 +670,47 @@ impl BuilderMethods<'a, 'tcx> for Builder<'a, 'll, 'tcx> {
669670
}
670671

671672
fn fptoui_sat(&mut self, val: &'ll Value, dest_ty: &'ll Type) -> Option<&'ll Value> {
672-
// WebAssembly has saturating floating point to integer casts if the
673-
// `nontrapping-fptoint` target feature is activated. We'll use those if
674-
// they are available.
675-
if self.sess().target.arch == "wasm32"
676-
&& self.sess().target_features.contains(&sym::nontrapping_dash_fptoint)
677-
{
673+
if llvm_util::get_version() >= (12, 0, 0) && !self.fptoint_sat_broken_in_llvm() {
678674
let src_ty = self.cx.val_ty(val);
679675
let float_width = self.cx.float_width(src_ty);
680676
let int_width = self.cx.int_width(dest_ty);
681-
let name = match (int_width, float_width) {
682-
(32, 32) => Some("llvm.wasm.trunc.saturate.unsigned.i32.f32"),
683-
(32, 64) => Some("llvm.wasm.trunc.saturate.unsigned.i32.f64"),
684-
(64, 32) => Some("llvm.wasm.trunc.saturate.unsigned.i64.f32"),
685-
(64, 64) => Some("llvm.wasm.trunc.saturate.unsigned.i64.f64"),
686-
_ => None,
687-
};
688-
if let Some(name) = name {
689-
let intrinsic = self.get_intrinsic(name);
690-
return Some(self.call(intrinsic, &[val], None));
691-
}
677+
let name = format!("llvm.fptoui.sat.i{}.f{}", int_width, float_width);
678+
let intrinsic = self.get_intrinsic(&name);
679+
return Some(self.call(intrinsic, &[val], None));
692680
}
681+
693682
None
694683
}
695684

696685
fn fptosi_sat(&mut self, val: &'ll Value, dest_ty: &'ll Type) -> Option<&'ll Value> {
697-
// WebAssembly has saturating floating point to integer casts if the
698-
// `nontrapping-fptoint` target feature is activated. We'll use those if
699-
// they are available.
700-
if self.sess().target.arch == "wasm32"
701-
&& self.sess().target_features.contains(&sym::nontrapping_dash_fptoint)
702-
{
686+
if llvm_util::get_version() >= (12, 0, 0) && !self.fptoint_sat_broken_in_llvm() {
703687
let src_ty = self.cx.val_ty(val);
704688
let float_width = self.cx.float_width(src_ty);
705689
let int_width = self.cx.int_width(dest_ty);
706-
let name = match (int_width, float_width) {
707-
(32, 32) => Some("llvm.wasm.trunc.saturate.signed.i32.f32"),
708-
(32, 64) => Some("llvm.wasm.trunc.saturate.signed.i32.f64"),
709-
(64, 32) => Some("llvm.wasm.trunc.saturate.signed.i64.f32"),
710-
(64, 64) => Some("llvm.wasm.trunc.saturate.signed.i64.f64"),
711-
_ => None,
712-
};
713-
if let Some(name) = name {
714-
let intrinsic = self.get_intrinsic(name);
715-
return Some(self.call(intrinsic, &[val], None));
716-
}
690+
let name = format!("llvm.fptosi.sat.i{}.f{}", int_width, float_width);
691+
let intrinsic = self.get_intrinsic(&name);
692+
return Some(self.call(intrinsic, &[val], None));
717693
}
718-
None
719-
}
720694

721-
fn fptosui_may_trap(&self, val: &'ll Value, dest_ty: &'ll Type) -> bool {
722-
// Most of the time we'll be generating the `fptosi` or `fptoui`
723-
// instruction for floating-point-to-integer conversions. These
724-
// instructions by definition in LLVM do not trap. For the WebAssembly
725-
// target, however, we'll lower in some cases to intrinsic calls instead
726-
// which may trap. If we detect that this is a situation where we'll be
727-
// using the intrinsics then we report that the call map trap, which
728-
// callers might need to handle.
729-
if !self.wasm_and_missing_nontrapping_fptoint() {
730-
return false;
731-
}
732-
let src_ty = self.cx.val_ty(val);
733-
let float_width = self.cx.float_width(src_ty);
734-
let int_width = self.cx.int_width(dest_ty);
735-
matches!((int_width, float_width), (32, 32) | (32, 64) | (64, 32) | (64, 64))
695+
None
736696
}
737697

738698
fn fptoui(&mut self, val: &'ll Value, dest_ty: &'ll Type) -> &'ll Value {
739-
// When we can, use the native wasm intrinsics which have tighter
740-
// codegen. Note that this has a semantic difference in that the
741-
// intrinsic can trap whereas `fptoui` never traps. That difference,
742-
// however, is handled by `fptosui_may_trap` above.
699+
// On WebAssembly the `fptoui` and `fptosi` instructions currently have
700+
// poor codegen. The reason for this is that the corresponding wasm
701+
// instructions, `i32.trunc_f32_s` for example, will trap when the float
702+
// is out-of-bounds, infinity, or nan. This means that LLVM
703+
// automatically inserts control flow around `fptoui` and `fptosi`
704+
// because the LLVM instruction `fptoui` is defined as producing a
705+
// poison value, not having UB on out-of-bounds values.
743706
//
744-
// Note that we skip the wasm intrinsics for vector types where `fptoui`
745-
// must be used instead.
746-
if self.wasm_and_missing_nontrapping_fptoint() {
707+
// This method, however, is only used with non-saturating casts that
708+
// have UB on out-of-bounds values. This means that it's ok if we use
709+
// the raw wasm instruction since out-of-bounds values can do whatever
710+
// we like. To ensure that LLVM picks the right instruction we choose
711+
// the raw wasm intrinsic functions which avoid LLVM inserting all the
712+
// other control flow automatically.
713+
if self.sess().target.arch == "wasm32" {
747714
let src_ty = self.cx.val_ty(val);
748715
if self.cx.type_kind(src_ty) != TypeKind::Vector {
749716
let float_width = self.cx.float_width(src_ty);
@@ -765,7 +732,8 @@ impl BuilderMethods<'a, 'tcx> for Builder<'a, 'll, 'tcx> {
765732
}
766733

767734
fn fptosi(&mut self, val: &'ll Value, dest_ty: &'ll Type) -> &'ll Value {
768-
if self.wasm_and_missing_nontrapping_fptoint() {
735+
// see `fptoui` above for why wasm is different here
736+
if self.sess().target.arch == "wasm32" {
769737
let src_ty = self.cx.val_ty(val);
770738
if self.cx.type_kind(src_ty) != TypeKind::Vector {
771739
let float_width = self.cx.float_width(src_ty);
@@ -1420,8 +1388,11 @@ impl Builder<'a, 'll, 'tcx> {
14201388
}
14211389
}
14221390

1423-
fn wasm_and_missing_nontrapping_fptoint(&self) -> bool {
1424-
self.sess().target.arch == "wasm32"
1425-
&& !self.sess().target_features.contains(&sym::nontrapping_dash_fptoint)
1391+
fn fptoint_sat_broken_in_llvm(&self) -> bool {
1392+
match self.tcx.sess.target.arch.as_str() {
1393+
// FIXME - https://bugs.llvm.org/show_bug.cgi?id=50083
1394+
"riscv64" => llvm_util::get_version() < (13, 0, 0),
1395+
_ => false,
1396+
}
14261397
}
14271398
}

compiler/rustc_codegen_llvm/src/context.rs

+22-8
Original file line numberDiff line numberDiff line change
@@ -503,14 +503,6 @@ impl CodegenCx<'b, 'tcx> {
503503
let t_f32 = self.type_f32();
504504
let t_f64 = self.type_f64();
505505

506-
ifn!("llvm.wasm.trunc.saturate.unsigned.i32.f32", fn(t_f32) -> t_i32);
507-
ifn!("llvm.wasm.trunc.saturate.unsigned.i32.f64", fn(t_f64) -> t_i32);
508-
ifn!("llvm.wasm.trunc.saturate.unsigned.i64.f32", fn(t_f32) -> t_i64);
509-
ifn!("llvm.wasm.trunc.saturate.unsigned.i64.f64", fn(t_f64) -> t_i64);
510-
ifn!("llvm.wasm.trunc.saturate.signed.i32.f32", fn(t_f32) -> t_i32);
511-
ifn!("llvm.wasm.trunc.saturate.signed.i32.f64", fn(t_f64) -> t_i32);
512-
ifn!("llvm.wasm.trunc.saturate.signed.i64.f32", fn(t_f32) -> t_i64);
513-
ifn!("llvm.wasm.trunc.saturate.signed.i64.f64", fn(t_f64) -> t_i64);
514506
ifn!("llvm.wasm.trunc.unsigned.i32.f32", fn(t_f32) -> t_i32);
515507
ifn!("llvm.wasm.trunc.unsigned.i32.f64", fn(t_f64) -> t_i32);
516508
ifn!("llvm.wasm.trunc.unsigned.i64.f32", fn(t_f32) -> t_i64);
@@ -520,6 +512,28 @@ impl CodegenCx<'b, 'tcx> {
520512
ifn!("llvm.wasm.trunc.signed.i64.f32", fn(t_f32) -> t_i64);
521513
ifn!("llvm.wasm.trunc.signed.i64.f64", fn(t_f64) -> t_i64);
522514

515+
ifn!("llvm.fptosi.sat.i8.f32", fn(t_f32) -> t_i8);
516+
ifn!("llvm.fptosi.sat.i16.f32", fn(t_f32) -> t_i16);
517+
ifn!("llvm.fptosi.sat.i32.f32", fn(t_f32) -> t_i32);
518+
ifn!("llvm.fptosi.sat.i64.f32", fn(t_f32) -> t_i64);
519+
ifn!("llvm.fptosi.sat.i128.f32", fn(t_f32) -> t_i128);
520+
ifn!("llvm.fptosi.sat.i8.f64", fn(t_f64) -> t_i8);
521+
ifn!("llvm.fptosi.sat.i16.f64", fn(t_f64) -> t_i16);
522+
ifn!("llvm.fptosi.sat.i32.f64", fn(t_f64) -> t_i32);
523+
ifn!("llvm.fptosi.sat.i64.f64", fn(t_f64) -> t_i64);
524+
ifn!("llvm.fptosi.sat.i128.f64", fn(t_f64) -> t_i128);
525+
526+
ifn!("llvm.fptoui.sat.i8.f32", fn(t_f32) -> t_i8);
527+
ifn!("llvm.fptoui.sat.i16.f32", fn(t_f32) -> t_i16);
528+
ifn!("llvm.fptoui.sat.i32.f32", fn(t_f32) -> t_i32);
529+
ifn!("llvm.fptoui.sat.i64.f32", fn(t_f32) -> t_i64);
530+
ifn!("llvm.fptoui.sat.i128.f32", fn(t_f32) -> t_i128);
531+
ifn!("llvm.fptoui.sat.i8.f64", fn(t_f64) -> t_i8);
532+
ifn!("llvm.fptoui.sat.i16.f64", fn(t_f64) -> t_i16);
533+
ifn!("llvm.fptoui.sat.i32.f64", fn(t_f64) -> t_i32);
534+
ifn!("llvm.fptoui.sat.i64.f64", fn(t_f64) -> t_i64);
535+
ifn!("llvm.fptoui.sat.i128.f64", fn(t_f64) -> t_i128);
536+
523537
ifn!("llvm.trap", fn() -> void);
524538
ifn!("llvm.debugtrap", fn() -> void);
525539
ifn!("llvm.frameaddress", fn(t_i32) -> i8p);

0 commit comments

Comments
 (0)