Skip to content

Commit d90fa45

Browse files
authored
Fix minimum/maximum over dimensions with missing values (#35323)
`v0 != v0` returns `missing` for missing values. Use the largest/smallest non-missing value to initialize the array. This is an inefficient approach. Faster alternatives would be to avoid using an initial value at all, and instead keep track of whether a value has been set in a separate mask; or to use `typemax`/`typemin` for types that support them. Fixes #35308.
1 parent e2a26aa commit d90fa45

File tree

2 files changed

+26
-11
lines changed

2 files changed

+26
-11
lines changed

base/reducedim.jl

Lines changed: 15 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -146,16 +146,18 @@ for (f1, f2, initval, typeextreme) in ((:min, :max, :Inf, :typemax), (:max, :min
146146
T = _realtype(f, promote_union(eltype(A)))
147147
Tr = v0 isa T ? T : typeof(v0)
148148

149-
# but NaNs and missing need to be avoided as initial values
149+
# but NaNs, missing and unordered values need to be avoided as initial values
150150
if v0 isa Number && isnan(v0)
151151
# v0 is NaN
152152
v0 = oftype(v0, $initval)
153153
elseif isunordered(v0)
154154
# v0 is missing or a third-party unordered value
155155
Tnm = nonmissingtype(Tr)
156-
# TODO: Some types, like BigInt, don't support typemin/typemax.
157-
# So a Matrix{Union{BigInt, Missing}} can still error here.
158-
v0 = $typeextreme(Tnm)
156+
if Tnm <: Union{BitInteger, IEEEFloat, BigFloat}
157+
v0 = $typeextreme(Tnm)
158+
elseif !all(isunordered, A1)
159+
v0 = mapreduce(f, $f2, Iterators.filter(!isunordered, A1))
160+
end
159161
end
160162
# v0 may have changed type.
161163
Tr = v0 isa T ? T : typeof(v0)
@@ -186,12 +188,18 @@ function reducedim_init(f::ExtremaMap, op::typeof(_extrema_rf), A::AbstractArray
186188

187189
# but NaNs and missing need to be avoided as initial values
188190
if v0[1] isa Number && isnan(v0[1])
191+
# v0 is NaN
189192
v0 = oftype(v0[1], Inf), oftype(v0[2], -Inf)
190193
elseif isunordered(v0[1])
191194
# v0 is missing or a third-party unordered value
192-
# TODO: Some types, like BigInt, don't support typemin/typemax.
193-
# So a Matrix{Union{BigInt, Missing}} can still error here.
194-
v0 = typemax(nonmissingtype(Tmin)), typemin(nonmissingtype(Tmax))
195+
Tminnm = nonmissingtype(Tmin)
196+
Tmaxnm = nonmissingtype(Tmax)
197+
if Tminnm <: Union{BitInteger, IEEEFloat, BigFloat} &&
198+
Tmaxnm <: Union{BitInteger, IEEEFloat, BigFloat}
199+
v0 = (typemax(Tminnm), typemin(Tmaxnm))
200+
elseif !all(isunordered, A1)
201+
v0 = reverse(mapreduce(f, op, Iterators.filter(!isunordered, A1)))
202+
end
195203
end
196204
# v0 may have changed type.
197205
Tmin = v0[1] isa T ? T : typeof(v0[1])

test/reducedim.jl

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -608,7 +608,7 @@ end
608608
end
609609
@testset "NaN/missing test for extrema with dims #43599" begin
610610
for sz = (3, 10, 100)
611-
for T in (Int, Float64, BigFloat)
611+
for T in (Int, Float64, BigFloat, BigInt)
612612
Aₘ = Matrix{Union{T, Missing}}(rand(-sz:sz, sz, sz))
613613
Aₘ[rand(1:sz*sz, sz)] .= missing
614614
unordered_test_for_extrema(Aₘ)
@@ -622,9 +622,16 @@ end
622622
end
623623
end
624624
end
625-
@test_broken minimum([missing;BigInt(1)], dims = 1)
626-
@test_broken maximum([missing;BigInt(1)], dims = 1)
627-
@test_broken extrema([missing;BigInt(1)], dims = 1)
625+
626+
@testset "minimum/maximum over dims with missing (#35308)" begin
627+
for T in (Int, Float64, BigInt, BigFloat)
628+
x = Union{T, Missing}[1 missing; 2 missing]
629+
@test isequal(minimum(x, dims=1), reshape([1, missing], 1, :))
630+
@test isequal(maximum(x, dims=1), reshape([2, missing], 1, :))
631+
@test isequal(minimum(x, dims=2), reshape([missing, missing], :, 1))
632+
@test isequal(maximum(x, dims=2), reshape([missing, missing], :, 1))
633+
end
634+
end
628635

629636
# issue #26709
630637
@testset "dimensional reduce with custom non-bitstype types" begin

0 commit comments

Comments
 (0)