From 0f341d619c5bf58e1293e7e0a3fb1c0849fa3804 Mon Sep 17 00:00:00 2001 From: Martin Holters Date: Fri, 16 Dec 2016 08:42:28 +0100 Subject: [PATCH] Remove explicit dependence of sparse broadcast on type inference Instead of determining the output element type beforehand by querying inference, the element type is deduced from the actually computed output values (similar to broadcast over Array, but taking into account the output for the all-inputs-zero case). For the type-unstable case, performance is sub-optimal, but at least it gives the correct result. Closes #19595. --- base/sparse/sparsematrix.jl | 74 +++++++++++++++++++++++-------------- test/sparse/sparse.jl | 13 +++++++ 2 files changed, 60 insertions(+), 27 deletions(-) diff --git a/base/sparse/sparsematrix.jl b/base/sparse/sparsematrix.jl index 31dda9535f4366..a41eced14a9c6f 100644 --- a/base/sparse/sparsematrix.jl +++ b/base/sparse/sparsematrix.jl @@ -1413,7 +1413,7 @@ function map{Tf,N}(f::Tf, A::SparseMatrixCSC, Bs::Vararg{SparseMatrixCSC,N}) fofzeros = f(_zeros_eltypes(A, Bs...)...) fpreszeros = fofzeros == zero(fofzeros) maxnnzC = fpreszeros ? min(length(A), _sumnnzs(A, Bs...)) : length(A) - entrytypeC = _broadcast_type(f, A, Bs...) + entrytypeC = typeof(fofzeros) indextypeC = _promote_indtype(A, Bs...) Ccolptr = Vector{indextypeC}(A.n + 1) Crowval = Vector{indextypeC}(maxnnzC) @@ -1438,7 +1438,7 @@ function broadcast{Tf,N}(f::Tf, A::SparseMatrixCSC, Bs::Vararg{SparseMatrixCSC,N fofzeros = f(_zeros_eltypes(A, Bs...)...) fpreszeros = fofzeros == zero(fofzeros) indextypeC = _promote_indtype(A, Bs...) - entrytypeC = _broadcast_type(f, A, Bs...) + entrytypeC = typeof(fofzeros) Cm, Cn = Base.to_shape(Base.Broadcast.broadcast_indices(A, Bs...)) maxnnzC = fpreszeros ? _checked_maxnnzbcres(Cm, Cn, A, Bs...) : (Cm * Cn) Ccolptr = Vector{indextypeC}(Cn + 1) @@ -1464,13 +1464,19 @@ _maxnnzfrom(Cm, Cn, A) = nnz(A) * div(Cm, A.m) * div(Cn, A.n) @inline _maxnnzfrom_each(Cm, Cn, As) = (_maxnnzfrom(Cm, Cn, first(As)), _maxnnzfrom_each(Cm, Cn, tail(As))...) @inline _unchecked_maxnnzbcres(Cm, Cn, As) = min(Cm * Cn, sum(_maxnnzfrom_each(Cm, Cn, As))) @inline _checked_maxnnzbcres(Cm, Cn, As...) = Cm != 0 && Cn != 0 ? _unchecked_maxnnzbcres(Cm, Cn, As) : 0 -_broadcast_type(f, As...) = Base._promote_op(f, Base.Broadcast.typestuple(As...)) +@inline _update_nzval!{T}(nzval::Vector{T}, k, x::T) = (nzval[k] = x; nzval) +@inline function _update_nzval!{T,Tx}(nzval::Vector{T}, k, x::Tx) + nzval = convert(Vector{typejoin(Tx, T)}, nzval) + nzval[k] = x + return nzval +end # _map_zeropres!/_map_notzeropres! specialized for a single sparse matrix "Stores only the nonzero entries of `map(f, Matrix(A))` in `C`." function _map_zeropres!{Tf}(f::Tf, C::SparseMatrixCSC, A::SparseMatrixCSC) spaceC = min(length(C.rowval), length(C.nzval)) Ck = 1 + nzval = C.nzval @inbounds for j in 1:C.n C.colptr[j] = Ck for Ak in nzrange(A, j) @@ -1478,12 +1484,13 @@ function _map_zeropres!{Tf}(f::Tf, C::SparseMatrixCSC, A::SparseMatrixCSC) if Cx != zero(eltype(C)) Ck > spaceC && (spaceC = _expandstorage!(C, Ck + nnz(A) - (Ak - 1))) C.rowval[Ck] = A.rowval[Ak] - C.nzval[Ck] = Cx + nzval = _update_nzval!(nzval, Ck, Cx) Ck += 1 end end end @inbounds C.colptr[C.n + 1] = Ck + nzval === C.nzval || (C = SparseMatrixCSC(C.m, C.n, C.colptr, C.rowval, nzval)) _trimstorage!(C, Ck - 1) return C end @@ -1496,13 +1503,14 @@ function _map_notzeropres!{Tf}(f::Tf, fillvalue, C::SparseMatrixCSC, A::SparseMa _densestructure!(C) # Populate values fill!(C.nzval, fillvalue) + nzval = C.nzval @inbounds for (j, jo) in zip(1:C.n, 0:C.m:(C.m*C.n - 1)), Ak in nzrange(A, j) Cx = f(A.nzval[Ak]) - Cx != fillvalue && (C.nzval[jo + A.rowval[Ak]] = Cx) + Cx != fillvalue && (nzval = _update_nzval!(nzval, jo + A.rowval[Ak], Cx)) end # NOTE: Combining the fill! above into the loop above to avoid multiple sweeps over / # nonsequential access of C.nzval does not appear to improve performance. - return C + return nzval === C.nzval ? C : SparseMatrixCSC(C.m, C.n, C.colptr, C.rowval, nzval) end # helper functions for these methods and some of those below function _expandstorage!(X::SparseMatrixCSC, maxstored) @@ -1533,6 +1541,7 @@ function _map_zeropres!{Tf}(f::Tf, C::SparseMatrixCSC, A::SparseMatrixCSC, B::Sp spaceC = min(length(C.rowval), length(C.nzval)) rowsentinelA = convert(eltype(A.rowval), C.m + 1) rowsentinelB = convert(eltype(B.rowval), C.m + 1) + nzval = C.nzval Ck = 1 @inbounds for j in 1:C.n C.colptr[j] = Ck @@ -1562,12 +1571,13 @@ function _map_zeropres!{Tf}(f::Tf, C::SparseMatrixCSC, A::SparseMatrixCSC, B::Sp if Cx != zero(eltype(C)) Ck > spaceC && (spaceC = _expandstorage!(C, Ck + (nnz(A) - (Ak - 1)) + (nnz(B) - (Bk - 1)))) C.rowval[Ck] = Ci - C.nzval[Ck] = Cx + nzval = _update_nzval!(nzval, Ck, Cx) Ck += 1 end end end @inbounds C.colptr[C.n + 1] = Ck + nzval === C.nzval || (C = SparseMatrixCSC(C.m, C.n, C.colptr, C.rowval, nzval)) _trimstorage!(C, Ck - 1) return C end @@ -1578,6 +1588,7 @@ function _map_notzeropres!{Tf}(f::Tf, fillvalue, C::SparseMatrixCSC, A::SparseMa fill!(C.nzval, fillvalue) # NOTE: Combining this fill! into the loop below to avoid multiple sweeps over / # nonsequential access of C.nzval does not appear to improve performance. + nzval = C.nzval rowsentinelA = convert(eltype(A.rowval), C.m + 1) rowsentinelB = convert(eltype(B.rowval), C.m + 1) @inbounds for (j, jo) in zip(1:C.n, 0:C.m:(C.m*C.n - 1)) @@ -1598,10 +1609,10 @@ function _map_notzeropres!{Tf}(f::Tf, fillvalue, C::SparseMatrixCSC, A::SparseMa Cx, Ci = f(zero(eltype(A)), B.nzval[Bk]), Bi Bk += one(Bk); Bi = Bk < stopBk ? B.rowval[Bk] : rowsentinelB end - Cx != fillvalue && (C.nzval[jo + Ci] = Cx) + Cx != fillvalue && (nzval = _update_nzval!(nzval, jo + Ci, Cx)) end end - return C + return nzval === C.nzval ? C : SparseMatrixCSC(C.m, C.n, C.colptr, C.rowval, nzval) end # _broadcast_zeropres!/_broadcast_notzeropres! specialized for a pair of (input) sparse matrices function _broadcast_zeropres!{Tf}(f::Tf, C::SparseMatrixCSC, A::SparseMatrixCSC, B::SparseMatrixCSC) @@ -1609,6 +1620,7 @@ function _broadcast_zeropres!{Tf}(f::Tf, C::SparseMatrixCSC, A::SparseMatrixCSC, spaceC = min(length(C.rowval), length(C.nzval)) rowsentinelA = convert(eltype(A.rowval), C.m + 1) rowsentinelB = convert(eltype(B.rowval), C.m + 1) + nzval = C.nzval # A and B cannot have the same shape, as we directed that case to map in broadcast's # entry point; here we need efficiently handle only heterogeneous combinations of matrices # with no singleton dimensions ("matrices" hereafter), one singleton dimension ("columns" @@ -1663,7 +1675,7 @@ function _broadcast_zeropres!{Tf}(f::Tf, C::SparseMatrixCSC, A::SparseMatrixCSC, if Cx != zero(eltype(C)) Ck > spaceC && (spaceC = _expandstorage!(C, _unchecked_maxnnzbcres(C.m, C.n, A, B))) C.rowval[Ck] = Ci - C.nzval[Ck] = Cx + nzval = _update_nzval!(nzval, Ck, Cx) Ck += 1 end end @@ -1685,7 +1697,7 @@ function _broadcast_zeropres!{Tf}(f::Tf, C::SparseMatrixCSC, A::SparseMatrixCSC, if Cx != zero(eltype(C)) Ck > spaceC && (spaceC = _expandstorage!(C, _unchecked_maxnnzbcres(C.m, C.n, A, B))) C.rowval[Ck] = B.rowval[Bk] - C.nzval[Ck] = Cx + nzval = _update_nzval!(nzval, Ck, Cx) Ck += 1 end Bk += one(Bk) @@ -1704,7 +1716,7 @@ function _broadcast_zeropres!{Tf}(f::Tf, C::SparseMatrixCSC, A::SparseMatrixCSC, if Cx != zero(eltype(C)) Ck > spaceC && (spaceC = _expandstorage!(C, _unchecked_maxnnzbcres(C.m, C.n, A, B))) C.rowval[Ck] = Ci - C.nzval[Ck] = Cx + nzval = _update_nzval!(nzval, Ck, Cx) Ck += 1 end end @@ -1726,7 +1738,7 @@ function _broadcast_zeropres!{Tf}(f::Tf, C::SparseMatrixCSC, A::SparseMatrixCSC, if Cx != zero(eltype(C)) Ck > spaceC && (spaceC = _expandstorage!(C, _unchecked_maxnnzbcres(C.m, C.n, A, B))) C.rowval[Ck] = A.rowval[Ak] - C.nzval[Ck] = Cx + nzval = _update_nzval!(nzval, Ck, Cx) Ck += 1 end Ak += one(Ak) @@ -1745,7 +1757,7 @@ function _broadcast_zeropres!{Tf}(f::Tf, C::SparseMatrixCSC, A::SparseMatrixCSC, if Cx != zero(eltype(C)) Ck > spaceC && (spaceC = _expandstorage!(C, _unchecked_maxnnzbcres(C.m, C.n, A, B))) C.rowval[Ck] = Ci - C.nzval[Ck] = Cx + nzval = _update_nzval!(nzval, Ck, Cx) Ck += 1 end end @@ -1753,6 +1765,7 @@ function _broadcast_zeropres!{Tf}(f::Tf, C::SparseMatrixCSC, A::SparseMatrixCSC, end end @inbounds C.colptr[C.n + 1] = Ck + nzval === C.nzval || (C = SparseMatrixCSC(C.m, C.n, C.colptr, C.rowval, nzval)) _trimstorage!(C, Ck - 1) return C end @@ -1764,6 +1777,7 @@ function _broadcast_notzeropres!{Tf}(f::Tf, fillvalue, C::SparseMatrixCSC, A::Sp fill!(C.nzval, fillvalue) rowsentinelA = convert(eltype(A.rowval), C.m + 1) rowsentinelB = convert(eltype(B.rowval), C.m + 1) + nzval = C.nzval # Cases without vertical expansion if A.m == B.m @inbounds for (j, jo) in zip(1:C.n, 0:C.m:(C.m*C.n - 1)) @@ -1785,7 +1799,7 @@ function _broadcast_notzeropres!{Tf}(f::Tf, fillvalue, C::SparseMatrixCSC, A::Sp Ak += one(Ak); Ai = Ak < stopAk ? A.rowval[Ak] : rowsentinelA Bk += one(Bk); Bi = Bk < stopBk ? B.rowval[Bk] : rowsentinelB end - Cx != fillvalue && (C.nzval[jo + Ci] = Cx) + Cx != fillvalue && (nzval = _update_nzval!(nzval, jo + Ci, Cx)) end end # Cases with vertical expansion @@ -1798,7 +1812,7 @@ function _broadcast_notzeropres!{Tf}(f::Tf, fillvalue, C::SparseMatrixCSC, A::Sp if fvAzB == zero(eltype(C)) while Bk < stopBk Cx = f(Ax, B.nzval[Bk]) - Cx != fillvalue && (C.nzval[jo + B.rowval[Bk]] = Cx) + Cx != fillvalue && (nzval = _update_nzval!(nzval, jo + B.rowval[Bk], Cx)) Bk += one(Bk) end else @@ -1810,7 +1824,7 @@ function _broadcast_notzeropres!{Tf}(f::Tf, fillvalue, C::SparseMatrixCSC, A::Sp else Cx = fvAzB end - Cx != fillvalue && (C.nzval[jo + Ci] = Cx) + Cx != fillvalue && (nzval = _update_nzval!(nzval, jo + Ci, Cx)) end end end @@ -1823,7 +1837,7 @@ function _broadcast_notzeropres!{Tf}(f::Tf, fillvalue, C::SparseMatrixCSC, A::Sp if fzAvB == zero(eltype(C)) while Ak < stopAk Cx = f(A.nzval[Ak], Bx) - Cx != fillvalue && (C.nzval[jo + A.rowval[Ak]] = Cx) + Cx != fillvalue && (nzval = _update_nzval!(nzval, jo + A.rowval[Ak], Cx)) Ak += one(Ak) end else @@ -1835,12 +1849,12 @@ function _broadcast_notzeropres!{Tf}(f::Tf, fillvalue, C::SparseMatrixCSC, A::Sp else Cx = fzAvB end - Cx != fillvalue && (C.nzval[jo + Ci] = Cx) + Cx != fillvalue && (nzval = _update_nzval!(nzval, jo + Ci, Cx)) end end end end - return C + return nzval === C.nzval ? C : SparseMatrixCSC(C.m, C.n, C.colptr, C.rowval, nzval) end # _map_zeropres!/_map_notzeropres! for more than two sparse matrices @@ -1849,6 +1863,7 @@ function _map_zeropres!{Tf,N}(f::Tf, C::SparseMatrixCSC, As::Vararg{SparseMatrix rowsentinel = C.m + 1 Ck = 1 stopks = _indforcol_all(1, As) + nzval = C.nzval @inbounds for j in 1:C.n C.colptr[j] = Ck ks = stopks @@ -1865,13 +1880,14 @@ function _map_zeropres!{Tf,N}(f::Tf, C::SparseMatrixCSC, As::Vararg{SparseMatrix if Cx != zero(eltype(C)) Ck > spaceC && (spaceC = _expandstorage!(C, Ck + min(length(C), _sumnnzs(As...)) - (sum(ks) - N))) C.rowval[Ck] = activerow - C.nzval[Ck] = Cx + _update_nzval!(nzval, Ck, Cx) Ck += 1 end activerow = min(rows...) end end @inbounds C.colptr[C.n + 1] = Ck + nzval === C.nzval || (C = SparseMatrixCSC(C.m, C.n, C.colptr, C.rowval, nzval)) _trimstorage!(C, Ck - 1) return C end @@ -1884,6 +1900,7 @@ function _map_notzeropres!{Tf,N}(f::Tf, fillvalue, C::SparseMatrixCSC, As::Varar # nonsequential access of C.nzval does not appear to improve performance. rowsentinel = C.m + 1 stopks = _indforcol_all(1, As) + nzval = C.nzval @inbounds for (j, jo) in zip(1:C.n, 0:C.m:(C.m*C.n - 1)) ks = stopks stopks = _indforcol_all(j + 1, As) @@ -1896,7 +1913,7 @@ function _map_notzeropres!{Tf,N}(f::Tf, fillvalue, C::SparseMatrixCSC, As::Varar # rows = _updaterow_all(rowsentinel, activerows, rows, ks, stopks, As) vals, ks, rows = _fusedupdate_all(rowsentinel, activerow, rows, ks, stopks, As) Cx = f(vals...) - Cx != fillvalue && (C.nzval[jo + activerow] = Cx) + Cx != fillvalue && (_update_nzval!(nzval, jo + activerow, Cx)) activerow = min(rows...) end end @@ -1962,6 +1979,7 @@ function _broadcast_zeropres!{Tf,N}(f::Tf, C::SparseMatrixCSC, As::Vararg{Sparse expandsverts = _expandsvert_all(C, As) expandshorzs = _expandshorz_all(C, As) rowsentinel = C.m + 1 + nzval = C.nzval Ck = 1 @inbounds for j in 1:C.n C.colptr[j] = Ck @@ -1985,7 +2003,7 @@ function _broadcast_zeropres!{Tf,N}(f::Tf, C::SparseMatrixCSC, As::Vararg{Sparse if Cx != zero(eltype(C)) Ck > spaceC && (spaceC = _expandstorage!(C, _unchecked_maxnnzbcres(C.m, C.n, As))) C.rowval[Ck] = activerow - C.nzval[Ck] = Cx + nzval = _update_nzval!(nzval, Ck, Cx) Ck += 1 end activerow = min(rows...) @@ -2006,13 +2024,14 @@ function _broadcast_zeropres!{Tf,N}(f::Tf, C::SparseMatrixCSC, As::Vararg{Sparse if Cx != zero(eltype(C)) Ck > spaceC && (spaceC = _expandstorage!(C, _unchecked_maxnnzbcres(C.m, C.n, As))) C.rowval[Ck] = Ci - C.nzval[Ck] = Cx + nzval = _update_nzval!(nzval, Ck, Cx) Ck += 1 end end end end @inbounds C.colptr[C.n + 1] = Ck + nzval === C.nzval || (C = SparseMatrixCSC(C.m, C.n, C.colptr, C.rowval, nzval)) _trimstorage!(C, Ck - 1) return C end @@ -2022,6 +2041,7 @@ function _broadcast_notzeropres!{Tf,N}(f::Tf, fillvalue, C::SparseMatrixCSC, As: _densestructure!(C) # Populate values fill!(C.nzval, fillvalue) + nzval = C.nzval expandsverts = _expandsvert_all(C, As) expandshorzs = _expandshorz_all(C, As) rowsentinel = C.m + 1 @@ -2043,7 +2063,7 @@ function _broadcast_notzeropres!{Tf,N}(f::Tf, fillvalue, C::SparseMatrixCSC, As: # rows = _updaterow_all(rowsentinel, activerows, rows, ks, stopks, As) args, ks, rows = _fusedupdatebc_all(rowsentinel, activerow, rows, defargs, ks, stopks, As) Cx = f(args...) - Cx != fillvalue && (C.nzval[jo + activerow] = Cx) + Cx != fillvalue && (nzval = _update_nzval!(nzval, jo + activerow, Cx)) activerow = min(rows...) end else # fillvalue-non-preserving column scan @@ -2059,11 +2079,11 @@ function _broadcast_notzeropres!{Tf,N}(f::Tf, fillvalue, C::SparseMatrixCSC, As: else Cx = defaultCx end - Cx != fillvalue && (C.nzval[jo + Ci] = Cx) + Cx != fillvalue && (nzval = _update_nzval!(nzval, jo + Ci, Cx)) end end end - return C + return nzval === C.nzval ? C : SparseMatrixCSC(C.m, C.n, C.colptr, C.rowval, nzval) end # helper method for broadcast/broadcast! methods just above @inline _expandsvert(C, A) = A.m != C.m diff --git a/test/sparse/sparse.jl b/test/sparse/sparse.jl index e9b1e3fbcaca2c..d1a33fbf8f2176 100644 --- a/test/sparse/sparse.jl +++ b/test/sparse/sparse.jl @@ -1828,3 +1828,16 @@ let @test_throws DimensionMismatch broadcast(+, A, B, speye(N)) @test_throws DimensionMismatch broadcast!(+, X, A, B, speye(N)) end + +# Issue #19595 - broadcasting over sparse matrices with abstract eltype +let x = sparse(eye(Real,3,3)) + @test eltype(x) === Real + @test eltype(x + x) <: Real + @test eltype(x .+ x) <: Real + @test eltype(map(+, x, x)) <: Real + @test eltype(broadcast(+, x, x)) <: Real + @test eltype(x + x + x) <: Real + @test eltype(x .+ x .+ x) <: Real + @test eltype(map(+, map(+, x, x), x)) <: Real + @test eltype(broadcast(+, broadcast(+, x, x), x)) <: Real +end