JuliaSparse · ViralBShah · Aug 7, 2024 · Jul 25, 2024 · Jul 27, 2024 · Jul 27, 2024
diff --git a/src/solvers/cholmod.jl b/src/solvers/cholmod.jl
@@ -18,7 +18,7 @@
 using LinearAlgebra: RealHermSymComplexHerm, AdjOrTrans
 import LinearAlgebra: (\), AdjointFactorization,
                  cholesky, cholesky!, det, diag, ishermitian, isposdef,
-                 issuccess, issymmetric, ldlt, ldlt!, logdet,
+                 issuccess, issymmetric, ldiv!, ldlt, ldlt!, logdet,
                  lowrankdowndate, lowrankdowndate!, lowrankupdate, lowrankupdate!
 
 using SparseArrays
@@ -795,7 +795,7 @@
     A, B = convert.(Sparse{promote_type(Tv1, Tv2), promote_type(Ti1, Ti2)}, (A, B))
     return ssmult(A, B, stype, values, sorted)
 end
-function horzcat(A::Sparse{Tv1, Ti1}, B::Sparse{Tv2, Ti2}, values::Bool) where 
+function horzcat(A::Sparse{Tv1, Ti1}, B::Sparse{Tv2, Ti2}, values::Bool) where
         {Tv1<:VRealTypes, Tv2<:VRealTypes, Ti1, Ti2}
     A, B = convert.(Sparse{promote_type(Tv1, Tv2), promote_type(Ti1, Ti2)}, (A, B))
     return horzcat(A, B, values)
@@ -809,7 +809,7 @@
     A, X = convert(Sparse{Tv3, Ti}, A), convert(Dense{Tv3}, X)
     return sdmult!(A, transpose, α, β, X, Y)
 end
-function vertcat(A::Sparse{Tv1, Ti1}, B::Sparse{Tv2, Ti2}, values::Bool) where 
+function vertcat(A::Sparse{Tv1, Ti1}, B::Sparse{Tv2, Ti2}, values::Bool) where
         {Tv1<:VRealTypes, Ti1, Tv2<:VRealTypes, Ti2}
     A, B = convert.(Sparse{promote_type(Tv1, Tv2), promote_type(Ti1, Ti2)}, (A, B))
     return vertcat(A, B, values)
@@ -895,7 +895,7 @@
     return Dense{T}(A)
 end
 # Don't always promote to Float64 now that we have Float32 support.
-Dense(A::StridedVecOrMatInclAdjAndTrans{T}) where 
+Dense(A::StridedVecOrMatInclAdjAndTrans{T}) where
     {T<:Union{Float16, ComplexF16, Float32, ComplexF32}} = Dense{promote_type(T, Float32)}(A)
 
 
@@ -913,6 +913,32 @@
 end
 Base.convert(::Type{Dense{T}}, A::Dense{T}) where T = A
 
+# Just calling Dense(x) or Dense(b) will allocate new
+# `cholmod_dense_struct`s in CHOLMOD. Instead, we want to reuse
+# the existing memory. We can do this by creating new
+# `cholmod_dense_struct`s and filling them manually.
+function wrap_dense_and_ptr(x::StridedVecOrMat{T}) where {T <: VTypes}
+    dense_x = cholmod_dense_struct()
+    dense_x.nrow = size(x, 1)
+    dense_x.ncol = size(x, 2)
+    dense_x.nzmax = length(x)
+    dense_x.d = stride(x, 2)
+    dense_x.x = pointer(x)
+    dense_x.z = C_NULL
+    dense_x.xtype = xtyp(eltype(x))
+    dense_x.dtype = dtyp(eltype(x))
+    return dense_x, pointer_from_objref(dense_x)
+end
+# We need to use a special handling for the case of `Dense`
+# input arrays since the `pointer` refers to the pointer to the
+# `cholmod_dense`, not to the array values themselves as for
+# standard arrays.
+function wrap_dense_and_ptr(x::Dense{T}) where {T <: VTypes}
+    dense_x_ptr = x.ptr
+    dense_x = unsafe_load(dense_x_ptr)
+    return dense_x, pointer_from_objref(dense_x)
+end
+
 # This constructor assumes zero based colptr and rowval
 function Sparse(m::Integer, n::Integer,
         colptr0::Vector{Ti}, rowval0::Vector{Ti},
@@ -1055,8 +1081,8 @@
     Sparse{promote_type(Tv, Float64), Ti <: ITypes ? Ti : promote_type(Ti, Int)}(
         A.data, A.uplo == 'L' ? -1 : 1
     )
-Sparse(A::Hermitian{Tv, SparseMatrixCSC{Tv,Ti}}) where 
-    {Tv<:Union{Float16, Float32, ComplexF32, ComplexF16}, Ti} = 
+Sparse(A::Hermitian{Tv, SparseMatrixCSC{Tv,Ti}}) where
+    {Tv<:Union{Float16, Float32, ComplexF32, ComplexF16}, Ti} =
     Sparse{promote_type(Float32, Tv), Ti <: ITypes ? Ti : promote_type(Ti, Int)}(
         A.data, A.uplo == 'L' ? -1 : 1
     )
@@ -1076,7 +1102,7 @@
         a = unsafe_load(typedpointer(A))
         S = allocate_sparse(a.nrow, a.ncol, a.nzmax, Bool(a.sorted), Bool(a.packed), a.stype, Tnew, Inew)
         s = unsafe_load(typedpointer(S))
-        
+
         ap = unsafe_wrap(Array, a.p, (a.ncol + 1,), own = false)
         sp = unsafe_wrap(Array, s.p, (s.ncol + 1,), own = false)
         copyto!(sp, ap)
@@ -1376,7 +1402,7 @@
 
 ## Multiplication
 (*)(A::Sparse, B::Sparse) = ssmult(A, B, 0, true, true)
-(*)(A::Sparse, B::Dense) = sdmult!(A, false, 1., 0., B, 
+(*)(A::Sparse, B::Dense) = sdmult!(A, false, 1., 0., B,
     zeros(size(A, 1), size(B, 2), promote_type(eltype(A), eltype(B)))
 )
 (*)(A::Sparse, B::VecOrMat) = (*)(A, Dense(B))
@@ -1413,7 +1439,7 @@
 end
 
 *(adjA::Adjoint{<:Any,<:Sparse}, B::Dense) = (
-    A = parent(adjA); sdmult!(A, true, 1., 0., B, 
+    A = parent(adjA); sdmult!(A, true, 1., 0., B,
     zeros(size(A, 2), size(B, 2), promote_type(eltype(A), eltype(B))))
 )
 *(adjA::Adjoint{<:Any,<:Sparse}, B::VecOrMat) = adjA * Dense(B)
@@ -1467,7 +1493,7 @@
 
 !!! note
     This method uses the CHOLMOD library from SuiteSparse, which only supports
-    real or complex types in single or double precision. 
+    real or complex types in single or double precision.
     Input matrices not of those element types will
     be converted to these types as appropriate.
 """
@@ -1587,8 +1613,8 @@
 
 !!! note
     This method uses the CHOLMOD[^ACM887][^DavisHager2009] library from [SuiteSparse](https://github.com/DrTimothyAldenDavis/SuiteSparse).
-    CHOLMOD only supports real or complex types in single or double precision. 
-    Input matrices not of those element types will be 
+    CHOLMOD only supports real or complex types in single or double precision.
+    Input matrices not of those element types will be
     converted to these types as appropriate.
 
     Many other functions from CHOLMOD are wrapped but not exported from the
@@ -1633,8 +1659,8 @@
 See also [`ldlt`](@ref).
 
 !!! note
-    This method uses the CHOLMOD library from [SuiteSparse](https://github.com/DrTimothyAldenDavis/SuiteSparse), 
-    which only supports real or complex types in single or double precision. 
+    This method uses the CHOLMOD library from [SuiteSparse](https://github.com/DrTimothyAldenDavis/SuiteSparse),
+    which only supports real or complex types in single or double precision.
     Input matrices not of those element types will
     be converted to these types as appropriate.
 """
@@ -1695,7 +1721,7 @@
 
 !!! note
     This method uses the CHOLMOD[^ACM887][^DavisHager2009] library from [SuiteSparse](https://github.com/DrTimothyAldenDavis/SuiteSparse).
-    CHOLMOD only supports real or complex types in single or double precision. 
+    CHOLMOD only supports real or complex types in single or double precision.
     Input matrices not of those element types will
     be converted to these types as appropriate.
 
@@ -1767,7 +1793,7 @@
 """
 lowrankupdate(F::Factor{Tv}, V::AbstractArray{Tv2}) where {Tv, Tv2} =
     lowrankupdate!(
-        change_xdtype(F, promote_type(Tv, Tv2)), 
+        change_xdtype(F, promote_type(Tv, Tv2)),
         convert(AbstractArray{promote_type(Tv, Tv2)}, V)
     )
 
@@ -1782,7 +1808,7 @@
 """
 lowrankdowndate(F::Factor{Tv}, V::AbstractArray{Tv2}) where {Tv, Tv2} =
 lowrankdowndate!(
-    change_xdtype(F, promote_type(Tv, Tv2)), 
+    change_xdtype(F, promote_type(Tv, Tv2)),
     convert(AbstractArray{promote_type(Tv, Tv2)}, V)
 )
 
@@ -1905,6 +1931,66 @@
     throw(ArgumentError("self-adjoint sparse system solve not implemented for sparse rhs B," *
         " consider to convert B to a dense array"))
 
+# in-place ldiv!
+for TI in IndexTypes
+    @eval function ldiv!(x::StridedVecOrMat{T},
+                         L::Factor{T, $TI},
+                         b::StridedVecOrMat{T}) where {T<:VTypes}
+        if x === b
+            throw(ArgumentError("output array must not be aliased with input array"))
+        end
+        if size(L, 1) != size(b, 1)
+            throw(DimensionMismatch("Factorization and RHS should have the same number of rows. " *
+                "Factorization has $(size(L, 2)) rows, but RHS has $(size(b, 1)) rows."))
+        end
+        if size(L, 2) != size(x, 1)
+            throw(DimensionMismatch("Factorization and solution should match sizes. " *
+                "Factorization has $(size(L, 1)) columns, but solution has $(size(x, 1)) rows."))
+        end
+        if size(x, 2) != size(b, 2)
+            throw(DimensionMismatch("Solution and RHS should have the same number of columns. " *
+                "Solution has $(size(x, 2)) columns, but RHS has $(size(b, 2)) columns."))
+        end
+        if !issuccess(L)
+            s = unsafe_load(pointer(L))
+            if s.is_ll == 1
+                throw(LinearAlgebra.PosDefException(s.minor))
+            else
+                throw(LinearAlgebra.ZeroPivotException(s.minor))
+            end
+        end
+
+        # Just calling Dense(x) or Dense(b) will allocate new
+        # `cholmod_dense_struct`s in CHOLMOD. Instead, we want to reuse
+        # the existing memory. We can do this by creating new
+        # `cholmod_dense_struct`s and filling them manually.
+        dense_x, dense_x_ptr = wrap_dense_and_ptr(x)
+        dense_b, dense_b_ptr = wrap_dense_and_ptr(b)
+
+        X_Handle = Ptr{cholmod_dense_struct}(dense_x_ptr)
+        Y_Handle = Ptr{cholmod_dense_struct}(C_NULL)
+        E_Handle = Ptr{cholmod_dense_struct}(C_NULL)
+        status = GC.@preserve x dense_x b dense_b begin
+            $(cholname(:solve2, TI))(
+                CHOLMOD_A, L,
+                Ref(dense_b), C_NULL,
+                Ref(X_Handle), C_NULL,
+                Ref(Y_Handle),
+                Ref(E_Handle),
+                getcommon($TI))
+        end
+        if Y_Handle != C_NULL
+            free!(Y_Handle)
+        end
+        if E_Handle != C_NULL
+            free!(E_Handle)
+        end
+        @assert !iszero(status)
+
+        return x
+    end
+end
+
 ## Other convenience methods
 function diag(F::Factor{Tv, Ti}) where {Tv, Ti}
     f = unsafe_load(typedpointer(F))

diff --git a/test/cholmod.jl b/test/cholmod.jl
@@ -13,7 +13,7 @@ using Random
 using Serialization
 using LinearAlgebra:
     I, cholesky, cholesky!, det, diag, eigmax, ishermitian, isposdef, issuccess,
-    issymmetric, ldlt, ldlt!, logdet, norm, opnorm, Diagonal, Hermitian, Symmetric,
+    issymmetric, ldiv!, ldlt, ldlt!, logdet, norm, opnorm, Diagonal, Hermitian, Symmetric,
     PosDefException, ZeroPivotException, RowMaximum
 using SparseArrays
 using SparseArrays: getcolptr
@@ -138,6 +138,9 @@ Random.seed!(123)
     @test CHOLMOD.isvalid(chma)
     @test unsafe_load(pointer(chma)).is_ll == 1    # check that it is in fact an LLt
     @test chma\b ≈ x
+    x2 = zero(x)
+    @inferred ldiv!(x2, chma, b)
+    @test x2 ≈ x
     @test nnz(chma) == 489
     @test nnz(cholesky(A, perm=1:size(A,1))) > nnz(chma)
     @test size(chma) == size(A)
@@ -281,6 +284,37 @@ end
     end
 end
 
+@testset "ldiv! $Tv $Ti" begin
+    local A, x, x2, b, X, X2, B
+    A = sprand(10, 10, 0.1)
+    A = I + A * A'
+    A = convert(SparseMatrixCSC{Tv,Ti}, A)
+    factor = cholesky(A)
+
+    x = fill(Tv(1), 10)
+    b = A * x
+    x2 = zero(x)
+    @inferred ldiv!(x2, factor, b)
+    @test x2 ≈ x
+
+    X = fill(Tv(1), 10, 5)
+    B = A * X
+    X2 = zero(X)
+    @inferred ldiv!(X2, factor, B)
+    @test X2 ≈ X
+
+    c = fill(Tv(1), size(x, 1) + 1)
+    C = fill(Tv(1), size(X, 1) + 1, size(X, 2))
+    y = fill(Tv(1), size(x, 1) + 1)
+    Y = fill(Tv(1), size(X, 1) + 1, size(X, 2))
+    @test_throws DimensionMismatch ldiv!(y, factor, b)
+    @test_throws DimensionMismatch ldiv!(Y, factor, B)
+    @test_throws DimensionMismatch ldiv!(x2, factor, c)
+    @test_throws DimensionMismatch ldiv!(X2, factor, C)
+    @test_throws DimensionMismatch ldiv!(X2, factor, b)
+    @test_throws DimensionMismatch ldiv!(x2, factor, B)
+end
+
 end #end for Ti ∈ itypes
 
 for Tv ∈ (Float32, Float64)
@@ -365,9 +399,9 @@ end
     @test isa(CHOLMOD.eye(3), CHOLMOD.Dense{Float64})
 end
 
-@testset "Core functionality ($elty, $elty2)" for 
-    elty in (Tv, Complex{Tv}), 
-    Tv2 in (Float32, Float64), 
+@testset "Core functionality ($elty, $elty2)" for
+    elty in (Tv, Complex{Tv}),
+    Tv2 in (Float32, Float64),
     elty2 in (Tv2, Complex{Tv2}),
     Ti ∈ itypes
     A1 = sparse(Ti[1:5; 1], Ti[1:5; 2], elty <: Real ? randn(Tv, 6) : complex.(randn(Tv, 6), randn(Tv, 6)))
@@ -972,7 +1006,7 @@ end
     f = ones(size(K, 1))
     u = K \ f
     residual = norm(f - K * u) / norm(f)
-    @test residual < 1e-6 
+    @test residual < 1e-6
 end
 
 @testset "wrapped sparse matrices" begin