diff --git a/Project.toml b/Project.toml index 88ff0db..c792c5a 100644 --- a/Project.toml +++ b/Project.toml @@ -11,6 +11,7 @@ LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" MPI = "da04e1cc-30fd-572f-bb4f-1f8673147195" MatrixMarket = "4d4711f2-db25-561a-b6b3-d35e7d4047d3" OpenBLAS32_jll = "656ef2d0-ae68-5445-9ca0-591084a874a2" +Preferences = "21216c6a-2e73-6563-6e65-726566657250" SparseBase = "8047a082-0763-4c4f-a23b-a4a948721f1d" SuperLUBase = "79fc8b2a-9fec-41f5-a0c4-660a4203283a" SuperLU_DIST_jll = "9a1356b0-3c82-5da3-b77c-7c198e8bd7ab" @@ -22,12 +23,12 @@ SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf" SparseArraysExt = "SparseArrays" [compat] -CIndices = "0.3" -DocStringExtensions = "0.8" -MPI = "0.20" +CIndices = "0.3 - 0" +DocStringExtensions = "0.8 - 0" +MPI = "0.20 - 0" MatrixMarket = "0.5" -SparseBase = "0.3" -SuperLUBase = "0.1.1" +SparseBase = "0.4 - 0" +SuperLUBase = "0.1.1 - 0" SuperLU_DIST_jll = "8.0.0" julia = "1.7" diff --git a/examples/highlevel.jl b/examples/highlevel.jl new file mode 100644 index 0000000..0f0d62e --- /dev/null +++ b/examples/highlevel.jl @@ -0,0 +1,55 @@ +# ENV["OMP_NUM_THREADS"] = 1 +using MPI +using SuperLUDIST: Grid, DistributedSuperMatrix, +pgssvx! +using SuperLUDIST +using SparseBase.Communication +using SparseBase.Communication: distribute_evenly, localsize +using MatrixMarket +using SparseBase +using LinearAlgebra +MPI.Init() +nprow, npcol, nrhs = 1, 1, 4 +root = 0 +comm = MPI.COMM_WORLD +grid = Grid{Int32}(nprow, npcol, comm) +iam = grid.iam +isroot = iam == root + +# Utility function for reading a .mtx file and generating suitable +# rhs and x for testing. +# coo is held only on root, b and xtrue are replicated on each rank. +coo, b, xtrue = SuperLUDIST.mmread_and_generatesolution( + Float64, Int32, nrhs, joinpath(@__DIR__, "add32.mtx"), grid; root +) +csr = isroot ? convert(SparseBase.CSRStore, coo) : nothing +chunksizes = isroot ? distribute_evenly(size(csr, 1), nprow * npcol) : nothing + +# on single nodes this will help prevent oversubscription of threads. +SuperLUDIST.superlu_set_num_threads(Int64, 2) + +# If constructing from existing per-node data the following constructors will help: +# A = DistributedSuperMatrix(store::CSRStore, firstrow, globalsize::NTuple{2, Int}) +# store = CSRStore(ptrs, indices, values, localsize::NTuple{2, Int}) +# @show iam csr +A = Communication.scatterstore!( + DistributedSuperMatrix{Float64, Int32}(grid), csr, chunksizes; root); + +b_local = b[A.first_row : A.first_row + localsize(A, 1) - 1, :] # shrink b +xtrue_local = xtrue[A.first_row : A.first_row + localsize(A, 1) - 1, :] # shrink xtrue + +options = SuperLUDIST.Options() +stat = SuperLUDIST.LUStat{Int32}() +b1 = Matrix{Float64}(undef, localsize(A, 2), 0) +_, F = pgssvx!(A, b1; options, stat); + +b_local, F = pgssvx!(F, b_local); +GC.gc() +if !(iam == root) || (nprow * npcol == 1) + SuperLUDIST.inf_norm_error_dist(b_local, xtrue_local, grid) +end +SuperLUDIST.PStatPrint(options, stat, grid) + +# @show iam b_local xtrue_local +MPI.Finalize() + diff --git a/examples/pdrive.jl b/examples/pdrive.jl index b4fe2e9..f21bf79 100644 --- a/examples/pdrive.jl +++ b/examples/pdrive.jl @@ -1,7 +1,7 @@ # ENV["OMP_NUM_THREADS"] = 1 using MPI using SuperLUDIST: Grid, DistributedSuperMatrix, -pgssvx! +pgssvx!, pgssvx_ABdist!, pgstrs_prep!, pgstrs_init! using SuperLUDIST using SparseBase.Communication using SparseBase.Communication: distribute_evenly, localsize @@ -9,7 +9,7 @@ using MatrixMarket using SparseBase using LinearAlgebra MPI.Init() -nprow, npcol, nrhs = 2, 2, 1 +nprow, npcol, nrhs = 1, 1, 2 root = 0 comm = MPI.COMM_WORLD grid = Grid{Int32}(nprow, npcol, comm) @@ -46,9 +46,11 @@ xtrue_local = xtrue[A.first_row : A.first_row + localsize(A, 1) - 1, :] # shrink # creating options and stat is optional, they will be created if not provided. options = SuperLUDIST.Options() stat = SuperLUDIST.LUStat{Int32}() - -b_local, F = pgssvx!(A, b_local; options, stat); - +# b1 = Matrix{Float64}(undef, localsize(A, 2), 2) +b1 = rand(localsize(A, 1)) +_, F = pgssvx!(A, b1; options, stat); +@show F.options +b_local, F = pgssvx!(F, b_local); if !(iam == root) || (nprow * npcol == 1) SuperLUDIST.inf_norm_error_dist(b_local, xtrue_local, grid) end diff --git a/examples/pdrive_ABglobal.jl b/examples/pdrive_ABglobal.jl index 78cc16a..37d032e 100644 --- a/examples/pdrive_ABglobal.jl +++ b/examples/pdrive_ABglobal.jl @@ -4,16 +4,16 @@ using SuperLUDIST: Grid, Options, LUStat, ScalePermStruct, ReplicatedSuperMatrix, pgssvx! using SuperLUDIST.Common using MatrixMarket -nprow, npcol, nrhs = Int64.((2, 2, 1)) +nprow, npcol, nrhs = (2, 2, 1) root = 0 MPI.Init() comm = MPI.COMM_WORLD -grid = Grid{Int64}(nprow, npcol, comm) +grid = Grid{Int}(nprow, npcol, comm) iam = grid.iam # This function handles broadcasting internally! A = MatrixMarket.mmread( - ReplicatedSuperMatrix{Float64, Int64}, + ReplicatedSuperMatrix{Float64, Int}, joinpath(@__DIR__, "add32.mtx"), grid ) @@ -32,16 +32,11 @@ end MPI.Bcast!(b, root, comm) MPI.Bcast!(xtrue, root, comm) -options = Options() - -LU = SuperLUDIST.LUStruct{Float64, Int64}(n, grid) -stat = LUStat{Int64}() - -b, F = pgssvx!(A, b; options, stat) +b, F = pgssvx!(A, b) if !(iam == root) || (nprow * npcol == 1) SuperLUDIST.inf_norm_error_dist(b, xtrue, grid) end -SuperLUDIST.PStatPrint(options, F.stat, grid) +SuperLUDIST.PStatPrint(F) MPI.Finalize() diff --git a/src/SuperLUDIST.jl b/src/SuperLUDIST.jl index ebe5de3..c61d217 100644 --- a/src/SuperLUDIST.jl +++ b/src/SuperLUDIST.jl @@ -79,8 +79,6 @@ include("distributedmatrix.jl") include("replicatedmatrix.jl") include("drivers.jl") include("matrixmarket.jl") - - - +include("highlevel.jl") end diff --git a/src/comproutines.jl b/src/comproutines.jl deleted file mode 100644 index 23e17ad..0000000 --- a/src/comproutines.jl +++ /dev/null @@ -1,2 +0,0 @@ -# TODO: Comp routines currently untested. -# TODO: add comp routines back. \ No newline at end of file diff --git a/src/distributedmatrix.jl b/src/distributedmatrix.jl index c38e712..599b893 100644 --- a/src/distributedmatrix.jl +++ b/src/distributedmatrix.jl @@ -83,6 +83,54 @@ function DistributedSuperMatrix(store::SparseBase.AbstractSparseStore{Tv, <:Any, return DistributedSuperMatrix(convert(SparseBase.CSRStore{Tv, CIndex{Ti}}, store), firstrow, globalsize, grid) end +""" + $(TYPEDSIGNATURES) + +Construct a DistributedSuperMatrix from the vectors of a CSR matrix, and the necessary metadata. + +Valid index types are `{Int32, Int64}`, or `CIndex{Int32}, CIndex{Int64}`, if the indices are already 0-based. +Valid element types are `{Float32, Float64, ComplexF64}`. + +# Arguments + - `rowptr, colidx, v` : sparse storage vectors, must be valid CSR matrix internals with types noted above. + - `firstrow` : the 1-based starting row of the matrix on this rank. + - `localsize` : the local size of the matrix on this rank. + - `globalsize` : the size of the matrix across all ranks. + - `grid` : the grid on which the matrix is distributed. +""" +function DistributedSuperMatrix(rowptr, colidx, v, firstrow, localsize, globalsize, grid::Grid{Ti}) where {Ti} + return DistributedSuperMatrix( + SparseBase.CSRStore(rowptr, colidx, v, localsize), + firstrow, + globalsize, + grid + ) +end + +""" + $(TYPEDSIGNATURES) + +Construct a DistributedSuperMatrix from COO format and the necessary metadata. + +Valid index types are `{Int32, Int64}`, or `CIndex{Int32}, CIndex{Int64}`, if the indices are already 0-based. +Valid element types are `{Float32, Float64, ComplexF64}`. + +# Arguments + - `(rows, cols), v` : sparse storage vectors, must be valid COO matrix internals with types noted above. + - `firstrow` : the 1-based starting row of the matrix on this rank. + - `localsize` : the local size of the matrix on this rank. + - `globalsize` : the size of the matrix across all ranks. + - `grid` : the grid on which the matrix is distributed. +""" +function DistributedSuperMatrix((rows, cols)::NTuple{2, <:AbstractVector}, v, firstrow, localsize, globalsize, grid::Grid{Ti}) where {Ti} + return DistributedSuperMatrix( + SparseBase.CSRStore(ptr, idx, v, localsize), + firstrow, + globalsize, + grid + ) +end + """ $(TYPEDSIGNATURES) """ diff --git a/src/drivers.jl b/src/drivers.jl index f94ca07..a012467 100644 --- a/src/drivers.jl +++ b/src/drivers.jl @@ -17,9 +17,8 @@ function pgssvx!( stat = LUStat{Ti}(), berr = Vector{Tv}(undef, size(b, 2)) ) where {Tv, Ti} - grid = A.grid - b, _ = pgssvx_ABglobal!(options, A, perm, b, LU, berr, stat) - return b, SuperLUFactorization(A, options, nothing, perm, LU, stat, berr) +return pgssvx!(SuperLUFactorization(A, options, nothing, perm, LU, stat, berr, b), b) + end """ @@ -42,30 +41,33 @@ function pgssvx!( stat = LUStat{Ti}(), berr = Vector{Tv}(undef, size(b, 2)) ) where {Tv, Ti} - grid = A.grid - b, _ = pgssvx_ABdist!(options, A, perm, b, LU, Solve, berr, stat) - return b, SuperLUFactorization(A, options, Solve, perm, LU, stat, berr) + return pgssvx!(SuperLUFactorization(A, options, Solve, perm, LU, stat, berr, b), b) end -""" - $(TYPEDSIGNATURES) - -Solve the sparse linear system `Ax = b` using an existing factorization of `A` held in `F`. +function pgssvx!(F::SuperLUFactorization{T, I, <:ReplicatedSuperMatrix{T, I}}, b::VecOrMat{T}) where {T, I} + (; mat, options, perm, lu, stat, berr) = F + b, _ = pgssvx_ABglobal!(options, mat, perm, b, lu, berr, stat) + F.options.Fact = Common.FACTORED + F.b = b + return b, F +end +function pgssvx!(F::SuperLUFactorization{T, I, <:DistributedSuperMatrix{T, I}}, b::VecOrMat{T}) where {T, I} + (; mat, options, solve, perm, lu, stat, berr) = F + currentnrhs = size(F.b, 2) + if currentnrhs != size(b, 2) + # F = pgstrs_prep!(F) + pgstrs_init!( + F.solve, + reverse(Communication.localsize(F.mat))..., + size(b, 2), F.mat.first_row - 1, F.perm, + F.lu, F.mat.grid + ) + end -Returns `b` and a new factorization object which may alias some of `F`. -""" -function pgssvx!( - F::SuperLUFactorization{T, I}, - b::VecOrMat{T} -) where {T, I} - options = copy(F.options) - options.Fact = Common.FACTORED - stat = LUStat{I}() - pgstrs_prep!(F) - solve = pgstrs_init( - F.solve, size(F.A, 2), localsize(F.A, 1), size(b, 2), - F.A.first_row, F.perm, F.lu, - ) + b, _ = pgssvx_ABdist!(options, mat, perm, b, lu, solve, berr, stat) + F.options.Fact = Common.FACTORED + F.b = b + return b, F end for T ∈ (Float32, Float64, ComplexF64) @@ -122,7 +124,7 @@ function pgstrs_init!( lu::LUStruct{$T, $I}, grid::Grid{$I} ) - $L(Symbol(:p, prefixsymbol(T), :gstrs_init))( + $L.$(Symbol(:p, prefixsymbol(T), :gstrs_init))( n, m_local, nrhs, first_row, scaleperm.perm_r, scaleperm.perm_c, grid, lu.Glu_persist, solve ) @@ -132,14 +134,14 @@ end function pgstrs_prep!( F::SuperLUFactorization{$T, $I} ) - gstrs = F.solve.gstrs_comm[] - $L.superlu_free_dist(gstrs.B_to_X_SendCnt) - gstrs.B_to_X_SendCnt[] = C_NULL - $L.superlu_free_dist(gstrs.X_to_B_SendCnt) - gstrs.X_to_B_SendCnt[] = C_NULL - $L.superlu_free_dist(gstrs.ptr_to_ibuf) - gstrs.ptr_to_ibuf[] = C_NULL - F.solve.gstrs_comm[] = gstrs # is this necessary? Think yes. + if size(F.b, 2) != 0 + gstrs = unsafe_load(F.solve.gstrs_comm) + @show gstrs + $L.superlu_free_dist(gstrs.B_to_X_SendCnt) + $L.superlu_free_dist(gstrs.X_to_B_SendCnt) + $L.superlu_free_dist(gstrs.ptr_to_ibuf) + @show gstrs + end return F end diff --git a/src/highlevel.jl b/src/highlevel.jl new file mode 100644 index 0000000..604f67e --- /dev/null +++ b/src/highlevel.jl @@ -0,0 +1,11 @@ +function LinearAlgebra.lu!( + A::AbstractSuperMatrix{Tv, Ti}; + kwargs... +) where {Tv, Ti} + b_local = Matrix{Tv}(undef, Communication.localsize(A, 2), 0) + return pgssvx!(A, b_local; kwargs...)[2] # F, drop b_local. +end + +function LinearAlgebra.ldiv!(A::SuperLUFactorization{Tv, Ti}, B::StridedVecOrMat{Tv}) where {Tv, Ti} + return pgssvx!(A, B)[1] +end diff --git a/src/structs.jl b/src/structs.jl index e4ce926..cc3cf4b 100644 --- a/src/structs.jl +++ b/src/structs.jl @@ -8,7 +8,6 @@ function Base.getproperty(g::Grid, s::Symbol) return getproperty(g.grid[], s) end - # Option functions: ################### const Options = Common.superlu_dist_options_t @@ -27,6 +26,10 @@ ScalePermStruct{T}(m, n) where T = ScalePermStruct{T, Int}(m, n) ScalePermStruct{T, I}(m, n) where {T, Ti, I<:CIndex{Ti}} = ScalePermStruct{T, Ti}(m, n) +function Base.getproperty(g::ScalePermStruct, s::Symbol) + s === :scaleperm && return Base.getfield(g, s) + return getproperty(g.scaleperm[], s) +end struct LUStruct{T, I, S, G} LU::Base.RefValue{S} @@ -36,6 +39,13 @@ end LUStruct{T}(n, grid) where T = LUStruct{T, Int}(n, grid) LUStruct{T, I}(n, grid) where {T, Ti, I<:CIndex{Ti}} = LUStruct{T, Ti}(n, grid) +function Base.getproperty(g::LUStruct, s::Symbol) + s === :LU && return Base.getfield(g, s) + s === :grid && return Base.getfield(g, s) + s === :n && return Base.getfield(g, s) + return getproperty(g.LU[], s) +end + struct LUStat{I, S} stat::Base.RefValue{S} end @@ -52,8 +62,13 @@ SOLVE{T}(options) where T = SOLVE{T, Int}(options) SOLVE{T, I}(options) where {T, Ti, I<:CIndex{Ti}} = SOLVE{T, Ti}(options) +function Base.getproperty(g::SOLVE, s::Symbol) + s === :options && return Base.getfield(g, s) + s === :SOLVEstruct && return Base.getfield(g, s) + return getproperty(g.SOLVEstruct[], s) +end -mutable struct SuperLUFactorization{T, I, A, Solve, Perm, LU, Stat} +mutable struct SuperLUFactorization{T, I, A, Solve, Perm, LU, Stat, B} mat::A options::Options solve::Solve @@ -61,9 +76,10 @@ mutable struct SuperLUFactorization{T, I, A, Solve, Perm, LU, Stat} lu::LU stat::Stat berr::Vector{T} - function SuperLUFactorization{T, I, A, Solve, Perm, LU, Stat}( + b::B + function SuperLUFactorization{T, I, A, Solve, Perm, LU, Stat, B}( mat::A, options::Options, solve::Solve, perm::Perm, - lustruct::LU, stat::Stat, berr::Vector{T} + lustruct::LU, stat::Stat, berr::Vector{T}, b::B ) where { T<:Union{Float32, Float64, ComplexF64}, I <: Union{Int32, Int64}, @@ -71,17 +87,21 @@ mutable struct SuperLUFactorization{T, I, A, Solve, Perm, LU, Stat} Solve <: Union{SOLVE{T, I}, Nothing}, Perm <: ScalePermStruct{T, I}, LU <: LUStruct{T, I}, - Stat <: LUStat{I} + Stat <: LUStat{I}, + B <: StridedVecOrMat{T} } - return new(mat, options, solve, perm, lustruct, stat, berr) + return new(mat, options, solve, perm, lustruct, stat, berr, b) end end +isfactored(F::SuperLUFactorization) = F.options.Fact == Common.FACTORED + + function SuperLUFactorization( A::AbstractSuperMatrix{Tv, Ti}, options, - solve::Solve, perm::Perm, lustruct::LU, stat::Stat, berr::Vector{Tv} -) where {Tv, Ti, Solve, Perm, LU, Stat} - return SuperLUFactorization{Tv, Ti, typeof(A), Solve, Perm, LU, Stat}( - A, options, solve, perm, lustruct, stat, berr + solve::Solve, perm::Perm, lustruct::LU, stat::Stat, berr::Vector{Tv}, b::B +) where {Tv, Ti, Solve, Perm, LU, Stat, B} + return SuperLUFactorization{Tv, Ti, typeof(A), Solve, Perm, LU, Stat, B}( + A, options, solve, perm, lustruct, stat, berr, b ) end @@ -120,7 +140,6 @@ libname = Symbol(:libsuperlu_dist_, I) else return Grid{$I, gridinfo_t{$I}}( finalizer(r) do ref - !MPI.Finalized() && $L.superlu_gridexit(ref) end ) @@ -192,3 +211,7 @@ function gridmap!(r, comm, nprow, npcol) usermap = LinearIndices((nprow, npcol))' .- 1 return gridmap!(r, comm, nprow, npcol, usermap) end + +function PStatPrint(F::SuperLUFactorization) + PStatPrint(F.options, F.stat, F.mat.grid) +end