Remove explicit dependence of sparse broadcast on type inference

martinholters · martinholters · commit 2afab2d42dde · 2016-12-16T08:51:39.000+01:00
Instead of determining the output element type beforehand by querying
inference, the element type is deduced from the actually computed output
values (similar to broadcast over Array, but taking into account the
output for the all-inputs-zero case). For the type-unstable case,
performance is sub-optimal, but at least it gives the correct result.

Closes #19595.
diff --git a/base/sparse/sparsematrix.jl b/base/sparse/sparsematrix.jl
@@ -1413,7 +1413,7 @@ function map{Tf,N}(f::Tf, A::SparseMatrixCSC, Bs::Vararg{SparseMatrixCSC,N})
     fofzeros = f(_zeros_eltypes(A, Bs...)...)
     fpreszeros = fofzeros == zero(fofzeros)
     maxnnzC = fpreszeros ? min(length(A), _sumnnzs(A, Bs...)) : length(A)
-    entrytypeC = _broadcast_type(f, A, Bs...)
+    entrytypeC = typeof(fofzeros)
     indextypeC = _promote_indtype(A, Bs...)
     Ccolptr = Vector{indextypeC}(A.n + 1)
     Crowval = Vector{indextypeC}(maxnnzC)
@@ -1438,7 +1438,7 @@ function broadcast{Tf,N}(f::Tf, A::SparseMatrixCSC, Bs::Vararg{SparseMatrixCSC,N
     fofzeros = f(_zeros_eltypes(A, Bs...)...)
     fpreszeros = fofzeros == zero(fofzeros)
     indextypeC = _promote_indtype(A, Bs...)
-    entrytypeC = _broadcast_type(f, A, Bs...)
+    entrytypeC = typeof(fofzeros)
     Cm, Cn = Base.to_shape(Base.Broadcast.broadcast_indices(A, Bs...))
     maxnnzC = fpreszeros ? _checked_maxnnzbcres(Cm, Cn, A, Bs...) : (Cm * Cn)
     Ccolptr = Vector{indextypeC}(Cn + 1)
@@ -1464,28 +1464,34 @@ _maxnnzfrom(Cm, Cn, A) = nnz(A) * div(Cm, A.m) * div(Cn, A.n)
 @inline _maxnnzfrom_each(Cm, Cn, As) = (_maxnnzfrom(Cm, Cn, first(As)), _maxnnzfrom_each(Cm, Cn, tail(As))...)
 @inline _unchecked_maxnnzbcres(Cm, Cn, As) = min(Cm * Cn, sum(_maxnnzfrom_each(Cm, Cn, As)))
 @inline _checked_maxnnzbcres(Cm, Cn, As...) = Cm != 0 && Cn != 0 ? _unchecked_maxnnzbcres(Cm, Cn, As) : 0
-_broadcast_type(f, As...) = Base._promote_op(f, Base.Broadcast.typestuple(As...))
+@inline _update_nzval!{T}(nzval::Vector{T}, k, x::T) = (nzval[k] = x; nzval)
+@inline function _update_nzval!{T,Tx}(nzval::Vector{T}, k, x::Tx)
+    nzval = convert(Vector{typejoin(Tx, T)}, nzval)
+    nzval[k] = x
+    return nzval
+end
 
 # _map_zeropres!/_map_notzeropres! specialized for a single sparse matrix
 "Stores only the nonzero entries of `map(f, Matrix(A))` in `C`."
 function _map_zeropres!{Tf}(f::Tf, C::SparseMatrixCSC, A::SparseMatrixCSC)
     spaceC = min(length(C.rowval), length(C.nzval))
     Ck = 1
+    nzval = C.nzval
     @inbounds for j in 1:C.n
         C.colptr[j] = Ck
         for Ak in nzrange(A, j)
             Cx = f(A.nzval[Ak])
             if Cx != zero(eltype(C))
                 Ck > spaceC && (spaceC = _expandstorage!(C, Ck + nnz(A) - (Ak - 1)))
                 C.rowval[Ck] = A.rowval[Ak]
-                C.nzval[Ck] = Cx
+                nzval = _update_nzval!(nzval, Ck, Cx)
                 Ck += 1
             end
         end
     end
     @inbounds C.colptr[C.n + 1] = Ck
     _trimstorage!(C, Ck - 1)
-    return C
+    return nzval === C.nzval ? C : SparseMatrixCSC(C.m, C.n, C.colptr, C.rowval, nzval)
 end
 """
 Densifies `C`, storing `fillvalue` in place of each unstored entry in `A` and
@@ -1496,13 +1502,14 @@ function _map_notzeropres!{Tf}(f::Tf, fillvalue, C::SparseMatrixCSC, A::SparseMa
     _densestructure!(C)
     # Populate values
     fill!(C.nzval, fillvalue)
+    nzval = C.nzval
     @inbounds for (j, jo) in zip(1:C.n, 0:C.m:(C.m*C.n - 1)), Ak in nzrange(A, j)
         Cx = f(A.nzval[Ak])
-        Cx != fillvalue && (C.nzval[jo + A.rowval[Ak]] = Cx)
+        Cx != fillvalue && (nzval = _update_nzval!(nzval, jo + A.rowval[Ak], Cx))
     end
     # NOTE: Combining the fill! above into the loop above to avoid multiple sweeps over /
     # nonsequential access of C.nzval does not appear to improve performance.
-    return C
+    return nzval === C.nzval ? C : SparseMatrixCSC(C.m, C.n, C.colptr, C.rowval, nzval)
 end
 # helper functions for these methods and some of those below
 function _expandstorage!(X::SparseMatrixCSC, maxstored)
@@ -1533,6 +1540,7 @@ function _map_zeropres!{Tf}(f::Tf, C::SparseMatrixCSC, A::SparseMatrixCSC, B::Sp
     spaceC = min(length(C.rowval), length(C.nzval))
     rowsentinelA = convert(eltype(A.rowval), C.m + 1)
     rowsentinelB = convert(eltype(B.rowval), C.m + 1)
+    nzval = C.nzval
     Ck = 1
     @inbounds for j in 1:C.n
         C.colptr[j] = Ck
@@ -1562,12 +1570,13 @@ function _map_zeropres!{Tf}(f::Tf, C::SparseMatrixCSC, A::SparseMatrixCSC, B::Sp
             if Cx != zero(eltype(C))
                 Ck > spaceC && (spaceC = _expandstorage!(C, Ck + (nnz(A) - (Ak - 1)) + (nnz(B) - (Bk - 1))))
                 C.rowval[Ck] = Ci
-                C.nzval[Ck] = Cx
+                nzval = _update_nzval!(nzval, Ck, Cx)
                 Ck += 1
             end
         end
     end
     @inbounds C.colptr[C.n + 1] = Ck
+    nzval === C.nzval || (C = SparseMatrixCSC(C.m, C.n, C.colptr, C.rowval, nzval))
     _trimstorage!(C, Ck - 1)
     return C
 end
@@ -1578,6 +1587,7 @@ function _map_notzeropres!{Tf}(f::Tf, fillvalue, C::SparseMatrixCSC, A::SparseMa
     fill!(C.nzval, fillvalue)
     # NOTE: Combining this fill! into the loop below to avoid multiple sweeps over /
     # nonsequential access of C.nzval does not appear to improve performance.
+    nzval = C.nzval
     rowsentinelA = convert(eltype(A.rowval), C.m + 1)
     rowsentinelB = convert(eltype(B.rowval), C.m + 1)
     @inbounds for (j, jo) in zip(1:C.n, 0:C.m:(C.m*C.n - 1))
@@ -1598,10 +1608,10 @@ function _map_notzeropres!{Tf}(f::Tf, fillvalue, C::SparseMatrixCSC, A::SparseMa
                 Cx, Ci = f(zero(eltype(A)), B.nzval[Bk]), Bi
                 Bk += one(Bk); Bi = Bk < stopBk ? B.rowval[Bk] : rowsentinelB
             end
-            Cx != fillvalue && (C.nzval[jo + Ci] = Cx)
+            Cx != fillvalue && (nzval = _update_nzval!(nzval, jo + Ci, Cx))
         end
     end
-    return C
+    return nzval === C.nzval ? C : SparseMatrixCSC(C.m, C.n, C.colptr, C.rowval, nzval)
 end
 # _broadcast_zeropres!/_broadcast_notzeropres! specialized for a pair of (input) sparse matrices
 function _broadcast_zeropres!{Tf}(f::Tf, C::SparseMatrixCSC, A::SparseMatrixCSC, B::SparseMatrixCSC)
diff --git a/test/sparse/sparse.jl b/test/sparse/sparse.jl
@@ -1828,3 +1828,16 @@ let
     @test_throws DimensionMismatch broadcast(+, A, B, speye(N))
     @test_throws DimensionMismatch broadcast!(+, X, A, B, speye(N))
 end
+
+# Issue #19595 - broadcasting over sparse matrices with abstract eltype
+let x = sparse(eye(Real,3,3))
+    @test eltype(x) === Real
+    @test eltype(x + x) <: Real
+    @test eltype(x .+ x) <: Real
+    @test eltype(map(+, x, x)) <: Real
+    @test eltype(broadcast(+, x, x)) <: Real
+    @test eltype(x + x + x) <: Real
+    @test eltype(x .+ x .+ x) <: Real
+    @test eltype(map(+, map(+, x, x), x)) <: Real
+    @test eltype(broadcast(+, broadcast(+, x, x), x)) <: Real
+end