diff --git a/src/FixedPointNumbers.jl b/src/FixedPointNumbers.jl index 60ec762d..c8ee8948 100644 --- a/src/FixedPointNumbers.jl +++ b/src/FixedPointNumbers.jl @@ -10,8 +10,14 @@ import Base: ==, <, <=, -, +, *, /, ~, isapprox, using Base: @pure -# T => BaseType -# f => Number of bits reserved for fractional part +""" + FixedPoint{T <: Integer, f} <: Real + +Supertype of the two fixed-point number types: `Fixed{T, f}` and `Normed{T, f}`. + +The parameter `T` is the underlying machine representation and `f` is the number +of fraction bits. +""" abstract type FixedPoint{T <: Integer, f} <: Real end @@ -25,16 +31,20 @@ export # Functions scaledual +include("utilities.jl") + +# reinterpretation reinterpret(x::FixedPoint) = x.i reinterpret(::Type{T}, x::FixedPoint{T,f}) where {T,f} = x.i +reinterpret(::Type{X}, x::T) where {T <: Integer, X <: FixedPoint{T}} = X(x, 0) + +# static parameters +nbitsfrac(::Type{X}) where {T, f, X <: FixedPoint{T,f}} = f +rawtype(::Type{X}) where {T, X <: FixedPoint{T}} = T # construction using the (approximate) intended value, i.e., N0f8 *(x::Real, ::Type{X}) where {X<:FixedPoint} = X(x) -# comparison -==(x::T, y::T) where {T <: FixedPoint} = x.i == y.i - <(x::T, y::T) where {T <: FixedPoint} = x.i < y.i -<=(x::T, y::T) where {T <: FixedPoint} = x.i <= y.i """ isapprox(x::FixedPoint, y::FixedPoint; rtol=0, atol=max(eps(x), eps(y))) @@ -52,26 +62,22 @@ end # predicates isinteger(x::FixedPoint{T,f}) where {T,f} = (x.i&(1<true), typemin(T)); Tmin = String(take!(io)) - show(IOContext(io, :compact=>true), typemax(T)); Tmax = String(take!(io)) - throw(ArgumentError("$T is $bitstring type representing $n values from $Tmin to $Tmax; cannot represent $x")) + show(IOContext(io, :compact=>true), typemin(X)); Xmin = String(take!(io)) + show(IOContext(io, :compact=>true), typemax(X)); Xmax = String(take!(io)) + throw(ArgumentError("$X is $bitstring type representing $n values from $Xmin to $Xmax; cannot represent $x")) end rand(::Type{T}) where {T <: FixedPoint} = reinterpret(T, rand(rawtype(T))) diff --git a/src/fixed.jl b/src/fixed.jl index 3576a610..d4164b67 100644 --- a/src/fixed.jl +++ b/src/fixed.jl @@ -1,5 +1,16 @@ -# 32-bit fixed point; parameter `f` is the number of fraction bits -struct Fixed{T <: Signed,f} <: FixedPoint{T, f} +""" + Fixed{T <: Signed, f} <: FixedPoint{T, f} + +`Fixed{T,f}` maps `Signed` integers from `-2^f` to `2^f` to the range +[-1.0, 1.0]. For example, `Fixed{Int8,7}` maps `-128` to `-1.0` and `127` to +`127/128 ≈ 0.992`. + +There are the typealiases for `Fixed` in the `QXfY` notation, where `Y` is +the number of fractional bits (i.e. `f`), and `X+Y+1` equals the number of +underlying bits used (`+1` means the sign bit). For example, `Q0f7` is aliased +to `Fixed{Int8,7}` and `Q3f12` is aliased to `Fixed{Int16,12}`. +""" +struct Fixed{T <: Signed, f} <: FixedPoint{T, f} i::T # constructor for manipulating the representation; @@ -14,14 +25,13 @@ Fixed{T,f}(x::Integer) where {T,f} = Fixed{T,f}(round(T, convert(widen1(T),x)<= bitwidth(T)-1 && throw_converterror(Fixed{T,f}, 1) + oneunit(T) << f +end -+(x::Fixed{T,f}, y::Fixed{T,f}) where {T,f} = Fixed{T,f}(x.i+y.i,0) --(x::Fixed{T,f}, y::Fixed{T,f}) where {T,f} = Fixed{T,f}(x.i-y.i,0) +# unchecked arithmetic # with truncation: -#*{f}(x::Fixed32{f}, y::Fixed32{f}) = Fixed32{f}(Base.widemul(x.i,y.i)>>f,0) +#*(x::Fixed{T,f}, y::Fixed{T,f}) = Fixed{T,f}(Base.widemul(x.i,y.i)>>f,0) # with rounding up: *(x::Fixed{T,f}, y::Fixed{T,f}) where {T,f} = Fixed{T,f}((Base.widemul(x.i,y.i) + (one(widen(T)) << (f-1)))>>f,0) @@ -56,7 +66,6 @@ end rem(x::Integer, ::Type{Fixed{T,f}}) where {T,f} = Fixed{T,f}(rem(x,T)<>f) + BigFloat(x.i&(one(widen1(T))<> (8*sizeof(T)-nbitsfrac(T)), 0) +function rawone(::Type{Normed{T,f}}) where {T <: Unsigned, f} + typemax(T) >> (bitwidth(T) - f) end -one(::Type{T}) where {T <: Normed} = oneunit(T) -zero(x::Normed) = zero(typeof(x)) -oneunit(x::Normed) = one(typeof(x)) -one(x::Normed) = oneunit(x) -rawone(v) = reinterpret(one(v)) # Conversions function Normed{T,f}(x::Normed{T2}) where {T <: Unsigned,T2 <: Unsigned,f} @@ -66,36 +68,34 @@ function _convert(::Type{U}, x::Float16) where {T, f, U <: Normed{T,f}} end return _convert(U, Float32(x)) end -function _convert(::Type{U}, x::Tf) where {T, f, U <: Normed{T,f}, Tf <: Union{Float32, Float64}} - if T == UInt128 && f == 53 - 0 <= x <= Tf(3.777893186295717e22) || throw_converterror(U, x) +function _convert(::Type{N}, x::Tf) where {T, f, N <: Normed{T,f}, Tf <: Union{Float32, Float64}} + if T === UInt128 && f == 53 + 0 <= x <= Tf(3.777893186295717e22) || throw_converterror(N, x) else - 0 <= x <= Tf((typemax(T)-rawone(U))/rawone(U)+1) || throw_converterror(U, x) + 0 <= x <= Tf((typemax(T)-rawone(N))/rawone(N)+1) || throw_converterror(N, x) end - significand_bits = Tf == Float64 ? 52 : 23 - if f <= (significand_bits + 1) && sizeof(T) * 8 < significand_bits - return reinterpret(U, unsafe_trunc(T, round(rawone(U) * x))) + if f <= (significand_bits(Tf) + 1) && bitwidth(T) < significand_bits(Tf) + return reinterpret(N, unsafe_trunc(T, round(rawone(N) * x))) end # cf. the implementation of `frexp` - Tw = f < sizeof(T) * 8 ? T : widen1(T) - bits = sizeof(Tw) * 8 - 1 - xu = reinterpret(Tf == Float64 ? UInt64 : UInt32, x) - k = Int(xu >> significand_bits) - k == 0 && return zero(U) # neglect subnormal numbers - significand = xu | (one(xu) << significand_bits) - yh = unsafe_trunc(Tw, significand) << (bits - significand_bits) - exponent_bias = Tf == Float64 ? 1023 : 127 - ex = exponent_bias - k + bits - f + Tw = f < bitwidth(T) ? T : widen1(T) + bits = bitwidth(Tw) - 1 + xu = reinterpret(Unsigned, x) + k = Int(xu >> significand_bits(Tf)) + k == 0 && return zero(N) # neglect subnormal numbers + significand = xu | (oneunit(xu) << significand_bits(Tf)) + yh = unsafe_trunc(Tw, significand) << (bits - significand_bits(Tf)) + ex = exponent_bias(Tf) - k + bits - f yi = bits >= f ? yh - (yh >> f) : yh if ex <= 0 - ex == 0 && return reinterpret(U, unsafe_trunc(T, yi)) - ex != -1 || signbit(signed(yi)) && return typemax(U) - return reinterpret(U, unsafe_trunc(T, yi + yi)) + ex == 0 && return reinterpret(N, unsafe_trunc(T, yi)) + ex != -1 || signbit(signed(yi)) && return typemax(N) + return reinterpret(N, unsafe_trunc(T, yi + yi)) end - ex > bits && return reinterpret(U, ex == bits + 1 ? one(T) : zero(T)) - yi += one(Tw)<<((ex - 1) & bits) # RoundNearestTiesUp - return reinterpret(U, unsafe_trunc(T, yi >> (ex & bits))) + ex > bits && return reinterpret(N, ex == bits + 1 ? oneunit(T) : zero(T)) + yi += oneunit(Tw)<<((ex - 1) & bits) # RoundNearestTiesUp + return reinterpret(N, unsafe_trunc(T, yi >> (ex & bits))) end rem(x::T, ::Type{T}) where {T <: Normed} = x @@ -103,18 +103,6 @@ rem(x::Normed, ::Type{T}) where {T <: Normed} = reinterpret(T, _unsafe_trunc(raw rem(x::Real, ::Type{T}) where {T <: Normed} = reinterpret(T, _unsafe_trunc(rawtype(T), round(rawone(T)*x))) rem(x::Float16, ::Type{T}) where {T <: Normed} = rem(Float32(x), T) # avoid overflow -float(x::Normed) = convert(floattype(x), x) - -macro f32(x::Float64) # just for hexadecimal floating-point literals - :(Float32($x)) -end -macro exp2(n) - :(_exp2(Val($(esc(n))))) -end -_exp2(::Val{N}) where {N} = exp2(N) - -# for Julia v1.0, which does not fold `div_float` before inlining -inv_rawone(x) = (@generated) ? (y = 1.0 / rawone(x); :($y)) : 1.0 / rawone(x) function (::Type{T})(x::Normed) where {T <: AbstractFloat} # The following optimization for constant division may cause rounding errors. @@ -240,21 +228,12 @@ Base.Integer(x::Normed) = convert(Integer, x*1.0) Base.Rational{Ti}(x::Normed) where {Ti <: Integer} = convert(Ti, reinterpret(x))//convert(Ti, rawone(x)) Base.Rational(x::Normed) = reinterpret(x)//rawone(x) -# Traits abs(x::Normed) = x -(-)(x::T) where {T <: Normed} = T(-reinterpret(x), 0) -(~)(x::T) where {T <: Normed} = T(~reinterpret(x), 0) - -+(x::Normed{T,f}, y::Normed{T,f}) where {T,f} = Normed{T,f}(convert(T, x.i+y.i),0) --(x::Normed{T,f}, y::Normed{T,f}) where {T,f} = Normed{T,f}(convert(T, x.i-y.i),0) +# unchecked arithmetic *(x::T, y::T) where {T <: Normed} = convert(T,convert(floattype(T), x)*convert(floattype(T), y)) /(x::T, y::T) where {T <: Normed} = convert(T,convert(floattype(T), x)/convert(floattype(T), y)) -# Comparisons - <(x::T, y::T) where {T <: Normed} = reinterpret(x) < reinterpret(y) -<=(x::T, y::T) where {T <: Normed} = reinterpret(x) <= reinterpret(y) - # Functions trunc(x::T) where {T <: Normed} = T(div(reinterpret(x), rawone(T))*rawone(T),0) floor(x::T) where {T <: Normed} = trunc(x) @@ -265,7 +244,7 @@ function round(x::Normed{T,f}) where {T,f} Normed{T,f}(y+oneunit(Normed{T,f})) : y end function ceil(x::Normed{T,f}) where {T,f} - k = 8*sizeof(T)-f + k = bitwidth(T)-f mask = (typemax(T)<>k y = trunc(x) return convert(T, reinterpret(x)-reinterpret(y)) & (mask)>0 ? @@ -281,14 +260,6 @@ isfinite(x::Normed) = true isnan(x::Normed) = false isinf(x::Normed) = false -bswap(x::Normed{UInt8,f}) where {f} = x -bswap(x::Normed) = typeof(x)(bswap(reinterpret(x)),0) - -function minmax(x::T, y::T) where {T <: Normed} - a, b = minmax(reinterpret(x), reinterpret(y)) - T(a,0), T(b,0) -end - # Iteration # The main subtlety here is that iterating over N0f8(0):N0f8(1) will wrap around # unless we iterate using a wider type @@ -314,13 +285,13 @@ end f = max(f1, f2) # ensure we have enough precision T = promote_type(T1, T2) # make sure we have enough integer bits - i1, i2 = 8*sizeof(T1)-f1, 8*sizeof(T2)-f2 # number of integer bits for each - i = 8*sizeof(T)-f + i1, i2 = bitwidth(T1)-f1, bitwidth(T2)-f2 # number of integer bits for each + i = bitwidth(T)-f while i < max(i1, i2) Tw = widen1(T) T == Tw && break T = Tw - i = 8*sizeof(T)-f + i = bitwidth(T)-f end :(Normed{$T,$f}) end diff --git a/src/utilities.jl b/src/utilities.jl new file mode 100644 index 00000000..5bc17388 --- /dev/null +++ b/src/utilities.jl @@ -0,0 +1,31 @@ +# utility functions and macros, which are independent of `FixedPoint` +bitwidth(T::Type) = 8sizeof(T) + +widen1(::Type{Int8}) = Int16 +widen1(::Type{UInt8}) = UInt16 +widen1(::Type{Int16}) = Int32 +widen1(::Type{UInt16}) = UInt32 +widen1(::Type{Int32}) = Int64 +widen1(::Type{UInt32}) = UInt64 +widen1(::Type{Int64}) = Int128 +widen1(::Type{UInt64}) = UInt128 +widen1(::Type{Int128}) = Int128 +widen1(::Type{UInt128}) = UInt128 +widen1(x::Integer) = x % widen1(typeof(x)) + +const ShortInts = Union{Int8, UInt8, Int16, UInt16} +const LongInts = Union{Int64, UInt64, Int128, UInt128, BigInt} + +macro f32(x::Float64) # just for hexadecimal floating-point literals + :(Float32($x)) +end +macro exp2(n) + :(_exp2(Val($(esc(n))))) +end +_exp2(::Val{N}) where {N} = exp2(N) + +# these are defined in julia/float.jl or julia/math.jl, but not exported +significand_bits(::Type{Float32}) = 23 +significand_bits(::Type{Float64}) = 52 +exponent_bias(::Type{Float32}) = 127 +exponent_bias(::Type{Float64}) = 1023 diff --git a/test/fixed.jl b/test/fixed.jl index cc8ea449..96f7cc6a 100644 --- a/test/fixed.jl +++ b/test/fixed.jl @@ -1,4 +1,5 @@ using FixedPointNumbers, Test +using FixedPointNumbers: bitwidth function test_op(fun::F, ::Type{T}, fx, fy, fxf, fyf, tol) where {F,T} # Make sure that the result is representable @@ -49,6 +50,24 @@ function test_fixed(::Type{T}, f) where {T} end end +@testset "reinterpret" begin + @test reinterpret(Q0f7, signed(0xa2)) === -0.734375Q0f7 + @test reinterpret(Q5f10, signed(0x00a2)) === 0.158203125Q5f10 + + @test reinterpret(reinterpret(Q0f7, signed(0xa2))) === signed(0xa2) + @test reinterpret(reinterpret(Q5f10, signed(0x00a2))) === signed(0x00a2) + + @test reinterpret(Int8, 0.5Q0f7) === signed(0x40) +end + +@testset "inexactness" begin + @test_throws InexactError Q0f7(-2) + # TODO: change back to InexactError when it allows message strings + @test_throws ArgumentError one(Q0f15) + @test_throws ArgumentError oneunit(Q0f31) + @test_throws ArgumentError one(Fixed{Int8,8}) +end + @testset "conversion" begin @test isapprox(convert(Fixed{Int8,7}, 0.8), 0.797, atol=0.001) @test isapprox(convert(Fixed{Int8,7}, 0.9), 0.898, atol=0.001) @@ -164,13 +183,18 @@ end (Int64, 63)) tmax = typemax(Fixed{T, f}) @test tmax == BigInt(typemax(T)) / BigInt(2)^f - tol = (tmax + BigFloat(1.0)) / (sizeof(T) * 8) + tol = (tmax + BigFloat(1.0)) / bitwidth(T) for x in range(-1, stop=BigFloat(tmax)-tol, length=50) @test abs(Fixed{T, f}(x) - x) <= tol end end end +@testset "low-level arithmetic" begin + @test bswap(Q0f7(0.5)) === Q0f7(0.5) + @test bswap(Q0f15(0.5)) === reinterpret(Q0f15, signed(0x0040)) +end + @testset "Promotion within Fixed" begin @test @inferred(promote(Q0f7(0.25), Q0f7(0.75))) === (Q0f7(0.25), Q0f7(0.75)) diff --git a/test/normed.jl b/test/normed.jl index 6fb4d125..4c0430ea 100644 --- a/test/normed.jl +++ b/test/normed.jl @@ -1,4 +1,5 @@ using FixedPointNumbers, Test +using FixedPointNumbers: bitwidth @testset "reinterpret" begin @test reinterpret(N0f8, 0xa2).i === 0xa2 @@ -13,6 +14,8 @@ using FixedPointNumbers, Test @test reinterpret(reinterpret(N2f14, 0x00a2)) === 0x00a2 @test reinterpret(reinterpret(N0f16, 0x00a2)) === 0x00a2 + @test reinterpret(UInt8, 1N0f8) === 0xff + @test 0.635N0f8 == N0f8(0.635) @test 0.635N6f10 == N6f10(0.635) @test 0.635N4f12 == N4f12(0.635) @@ -106,7 +109,7 @@ end # issue 102 for T in (UInt8, UInt16, UInt32, UInt64, UInt128) for Tf in (Float16, Float32, Float64) - @testset "Normed{$T,$f}(::$Tf)" for f = 1:sizeof(T)*8 + @testset "Normed{$T,$f}(::$Tf)" for f = 1:bitwidth(T) U = Normed{T,f} r = FixedPointNumbers.rawone(U) @@ -123,7 +126,7 @@ end isinf(input_upper) && continue # for Julia v0.7 @test reinterpret(U(input_upper)) == T(min(round(BigFloat(input_upper) * r), typemax(T))) - input_exp2 = Tf(exp2(sizeof(T) * 8 - f)) + input_exp2 = Tf(exp2(bitwidth(T) - f)) isinf(input_exp2) && continue @test reinterpret(U(input_exp2)) == T(input_exp2) * r end @@ -143,7 +146,7 @@ end for Tf in (Float16, Float32, Float64) @testset "$Tf(::Normed{$Ti})" for Ti in (UInt8, UInt16) - @testset "$Tf(::Normed{$Ti,$f})" for f = 1:(sizeof(Ti)*8) + @testset "$Tf(::Normed{$Ti,$f})" for f = 1:bitwidth(Ti) T = Normed{Ti,f} float_err = 0.0 for i = typemin(Ti):typemax(Ti) @@ -156,7 +159,7 @@ end end end @testset "$Tf(::Normed{$Ti})" for Ti in (UInt32, UInt64, UInt128) - @testset "$Tf(::Normed{$Ti,$f})" for f = 1:(sizeof(Ti)*8) + @testset "$Tf(::Normed{$Ti,$f})" for f = 1:bitwidth(Ti) T = Normed{Ti,f} error_count = 0 for i in vcat(Ti(0x00):Ti(0xFF), (typemax(Ti)-0xFF):typemax(Ti))