Skip to content

Commit 4ff8145

Browse files
committed
Merge pull request #9898 from eschnett/choose-correct-fma
Automatically choose a correct `fma` implementation at run time
2 parents f36625b + 975a7dc commit 4ff8145

File tree

1 file changed

+25
-2
lines changed

1 file changed

+25
-2
lines changed

base/float.jl

Lines changed: 25 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -198,8 +198,6 @@ widen(::Type{Float32}) = Float64
198198
/(x::Float32, y::Float32) = box(Float32,div_float(unbox(Float32,x),unbox(Float32,y)))
199199
/(x::Float64, y::Float64) = box(Float64,div_float(unbox(Float64,x),unbox(Float64,y)))
200200

201-
fma(x::Float32, y::Float32, z::Float32) = box(Float32,fma_float(unbox(Float32,x),unbox(Float32,y),unbox(Float32,z)))
202-
fma(x::Float64, y::Float64, z::Float64) = box(Float64,fma_float(unbox(Float64,x),unbox(Float64,y),unbox(Float64,z)))
203201
muladd(x::Float32, y::Float32, z::Float32) = box(Float32,muladd_float(unbox(Float32,x),unbox(Float32,y),unbox(Float32,z)))
204202
muladd(x::Float64, y::Float64, z::Float64) = box(Float64,muladd_float(unbox(Float64,x),unbox(Float64,y),unbox(Float64,z)))
205203

@@ -387,6 +385,31 @@ end
387385
eps() = eps(Float64)
388386
end
389387

388+
# fused multiply-add
389+
fma_libm(x::Float32, y::Float32, z::Float32) =
390+
ccall(("fmaf", libm_name), Float32, (Float32,Float32,Float32), x, y, z)
391+
fma_libm(x::Float64, y::Float64, z::Float64) =
392+
ccall(("fma", libm_name), Float64, (Float64,Float64,Float64), x, y, z)
393+
fma_llvm(x::Float32, y::Float32, z::Float32) =
394+
box(Float32,fma_float(unbox(Float32,x),unbox(Float32,y),unbox(Float32,z)))
395+
fma_llvm(x::Float64, y::Float64, z::Float64) =
396+
box(Float64,fma_float(unbox(Float64,x),unbox(Float64,y),unbox(Float64,z)))
397+
# Disable LLVM's fma if it is incorrect, e.g. because LLVM falls back
398+
# onto a broken system libm; if so, use openlibm's fma instead
399+
# 1.0000305f0 = 1 + 1/2^15
400+
if fma_llvm(1.0000305f0, 1.0000305f0, -1.0f0) == 6.103609f-5
401+
fma(x::Float32, y::Float32, z::Float32) = fma_llvm(x,y,z)
402+
else
403+
fma(x::Float32, y::Float32, z::Float32) = fma_libm(x,y,z)
404+
end
405+
# 1.0000000009313226 = 1 + 1/2^30
406+
if (fma_llvm(1.0000000009313226, 1.0000000009313226, -1.0) ==
407+
1.8626451500983188e-9)
408+
fma(x::Float64, y::Float64, z::Float64) = fma_llvm(x,y,z)
409+
else
410+
fma(x::Float64, y::Float64, z::Float64) = fma_libm(x,y,z)
411+
end
412+
390413
## byte order swaps for arbitrary-endianness serialization/deserialization ##
391414
bswap(x::Float32) = box(Float32,bswap_int(unbox(Float32,x)))
392415
bswap(x::Float64) = box(Float64,bswap_int(unbox(Float64,x)))

0 commit comments

Comments
 (0)