Skip to content

Commit

Permalink
Update the documentation for the generic API
Browse files Browse the repository at this point in the history
  • Loading branch information
amontoison committed Mar 25, 2024
1 parent 2811cb5 commit 1db4589
Show file tree
Hide file tree
Showing 6 changed files with 131 additions and 59 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
Manifest.toml
/LocalPreferences.toml
docs/build
2 changes: 2 additions & 0 deletions docs/Project.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
[deps]
Documenter = "e30172f5-a6a5-5a46-863b-614d45cd2de4"
CUDSS = "45b445bb-4962-46a0-9369-b4df9d0f772e"
CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba"
LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"

[compat]
Documenter = "1.0"
2 changes: 2 additions & 0 deletions docs/make.jl
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
using Documenter, CUDSS
using LinearAlgebra
using CUDA, CUDA.CUSPARSE

makedocs(
modules = [CUDSS],
Expand Down
125 changes: 71 additions & 54 deletions docs/src/generic.md
Original file line number Diff line number Diff line change
@@ -1,41 +1,64 @@
# Examples
## LLᵀ and LLᴴ

## Example 1: Sparse unsymmetric linear system with one right-hand side
```@docs
LinearAlgebra.cholesky(A::CuSparseMatrixCSR{T,Cint}; view::Char='F') where T <: LinearAlgebra.BlasFloat
```

## Example 1: Sparse hermitian positive definite linear system

```julia
using CUDA, CUDA.CUSPARSE
using CUDSS
using LinearAlgebra
using SparseArrays

T = Float64
T = ComplexF64
R = real(T)
n = 100
A_cpu = sprand(T, n, n, 0.05) + I
b_cpu = rand(T, n)
p = 5
A_cpu = sprand(T, n, n, 0.01)
A_cpu = A_cpu * A_cpu' + I
B_cpu = rand(T, n, p)

A_gpu = CuSparseMatrixCSR(A_cpu)
b_gpu = CuVector(b_cpu)
A_gpu = CuSparseMatrixCSR(A_cpu |> triu)
B_gpu = CuMatrix(B_cpu)
X_gpu = similar(B_gpu)

F = lu(A_gpu)
x_gpu = F \ b_gpu
F = cholesky(A_gpu, view='U')
X_gpu = F \ B_gpu

r_gpu = b_gpu - A_gpu * x_gpu
norm(r_gpu)
R_gpu = B_gpu - CuSparseMatrixCSR(A_cpu) * X_gpu
norm(R_gpu)

# In-place LU
d_gpu = rand(T, n) |> CuVector
B_gpu = A_gpu + Diagonal(d_gpu)
lu!(F, B_gpu)
# In-place LLᴴ
d_gpu = rand(R, n) |> CuVector
A_gpu = A_gpu + Diagonal(d_gpu)
cholesky!(F, A_gpu)

c_cpu = rand(T, n)
c_gpu = CuVector(c_cpu)
ldiv!(x_gpu, F, c_gpu)
C_cpu = rand(T, n, p)
C_gpu = CuMatrix(C_cpu)
ldiv!(X_gpu, F, C_gpu)

r_gpu = c_gpu - B_gpu * x_gpu
norm(r_gpu)
R_gpu = C_gpu - ( CuSparseMatrixCSR(A_cpu) + Diagonal(d_gpu) ) * X_gpu
norm(R_gpu)
```

## Example 2: Sparse symmetric linear system with multiple right-hand sides
!!! note
If we only store one triangle of `A_gpu`, we can also use the wrappers `Symmetric` and `Hermitian` instead of using the keyword argument `view` in `cholesky`. For real matrices, both wrappers are allowed but only `Hermitian` can be used for complex matrices.

```julia
H_gpu = Hermitian(A_gpu, :U)
F = cholesky(H_gpu)
```


## LDLᵀ and LDLᴴ

```@docs
LinearAlgebra.ldlt(A::CuSparseMatrixCSR{T,Cint}; view::Char='F') where T <: LinearAlgebra.BlasFloat
```

## Example 2: Sparse symmetric linear system

```julia
using CUDA, CUDA.CUSPARSE
Expand Down Expand Up @@ -63,8 +86,8 @@ norm(R_gpu)

# In-place LDLᵀ
d_gpu = rand(R, n) |> CuVector
B_gpu = A_gpu + Diagonal(d_gpu)
ldlt!(F, B_gpu)
A_gpu = A_gpu + Diagonal(d_gpu)
ldlt!(F, A_gpu)

C_cpu = rand(T, n, p)
C_gpu = CuMatrix(C_cpu)
Expand All @@ -82,49 +105,43 @@ S_gpu = Symmetric(A_gpu, :L)
F = ldlt(S_gpu)
```

## Example 3: Sparse hermitian positive definite linear system with multiple right-hand sides
## LU

```@docs
LinearAlgebra.lu(A::CuSparseMatrixCSR{T,Cint}) where T <: LinearAlgebra.BlasFloat
```

## Example 3: Sparse unsymmetric linear system

```julia
using CUDA, CUDA.CUSPARSE
using CUDSS
using LinearAlgebra
using SparseArrays

T = ComplexF64
R = real(T)
T = Float64
n = 100
p = 5
A_cpu = sprand(T, n, n, 0.01)
A_cpu = A_cpu * A_cpu' + I
B_cpu = rand(T, n, p)

A_gpu = CuSparseMatrixCSR(A_cpu |> triu)
B_gpu = CuMatrix(B_cpu)
X_gpu = similar(B_gpu)

F = cholesky(A_gpu, view='U')
X_gpu = F \ B_gpu
A_cpu = sprand(T, n, n, 0.05) + I
b_cpu = rand(T, n)

R_gpu = B_gpu - CuSparseMatrixCSR(A_cpu) * X_gpu
norm(R_gpu)
A_gpu = CuSparseMatrixCSR(A_cpu)
b_gpu = CuVector(b_cpu)

# In-place LLᴴ
d_gpu = rand(R, n) |> CuVector
B_gpu = A_gpu + Diagonal(d_gpu)
cholesky!(F, B_gpu)
F = lu(A_gpu)
x_gpu = F \ b_gpu

C_cpu = rand(T, n, p)
C_gpu = CuMatrix(C_cpu)
ldiv!(X_gpu, F, C_gpu)
r_gpu = b_gpu - A_gpu * x_gpu
norm(r_gpu)

R_gpu = C_gpu - ( CuSparseMatrixCSR(A_cpu) + Diagonal(d_gpu) ) * X_gpu
norm(R_gpu)
```
# In-place LU
d_gpu = rand(T, n) |> CuVector
A_gpu = A_gpu + Diagonal(d_gpu)
lu!(F, A_gpu)

!!! note
If we only store one triangle of `A_gpu`, we can also use the wrappers `Symmetric` and `Hermitian` instead of using the keyword argument `view` in `cholesky`. For real matrices, both wrappers are allowed but only `Hermitian` can be used for complex matrices.
c_cpu = rand(T, n)
c_gpu = CuVector(c_cpu)
ldiv!(x_gpu, F, c_gpu)

```julia
H_gpu = Hermitian(A_gpu, :U)
F = cholesky(H_gpu)
r_gpu = c_gpu - A_gpu * x_gpu
norm(r_gpu)
```
2 changes: 1 addition & 1 deletion src/CUDSS.jl
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ else
end

import CUDA: @checked, libraryPropertyType, cudaDataType, initialize_context, retry_reclaim, CUstream
import LinearAlgebra: lu, lu!, ldlt, ldlt!, cholesky, cholesky!, ldiv!, BlasFloat, BlasReal
import LinearAlgebra: lu, lu!, ldlt, ldlt!, cholesky, cholesky!, ldiv!, BlasFloat, BlasReal, checksquare
import Base: \

include("libcudss.jl")
Expand Down
58 changes: 54 additions & 4 deletions src/generic.jl
Original file line number Diff line number Diff line change
@@ -1,5 +1,19 @@
"""
solver = lu(A::CuSparseMatrixCSR{T,Cint})
Compute the LU factorization of a sparse matrix A on an NVIDIA GPU.
The type `T` can be `Float32`, `Float64`, `ComplexF32` or `ComplexF64`.
#### Input argument
* `A`: a sparse square matrix stored in the `CuSparseMatrixCSR` format.
#### Output argument
* `solver`: an opaque structure [`CudssSolver`](@ref) that stores the factors of the LU decomposition.
"""
function LinearAlgebra.lu(A::CuSparseMatrixCSR{T,Cint}) where T <: BlasFloat
n = LinearAlgebra.checksquare(A)
n = checksquare(A)
solver = CudssSolver(A, "G", 'F')
x = CudssMatrix(T, n)
b = CudssMatrix(T, n)
Expand All @@ -8,8 +22,26 @@ function LinearAlgebra.lu(A::CuSparseMatrixCSR{T,Cint}) where T <: BlasFloat
return solver
end

"""
solver = ldlt(A::CuSparseMatrixCSR{T,Cint}; view::Char='F')
Compute the LDLᴴ factorization of a sparse matrix A on an NVIDIA GPU.
The type `T` can be `Float32`, `Float64`, `ComplexF32` or `ComplexF64`.
#### Input argument
* `A`: a sparse Hermitian matrix stored in the `CuSparseMatrixCSR` format.
#### Keyword argument
*`view`: A character that specifies which triangle of the sparse matrix is provided. Possible options are `L` for the lower triangle, `U` for the upper triangle, and `F` for the full matrix.
#### Output argument
* `solver`: Opaque structure [`CudssSolver`](@ref) that stores the factors of the LDLᴴ decomposition.
"""
function LinearAlgebra.ldlt(A::CuSparseMatrixCSR{T,Cint}; view::Char='F') where T <: BlasFloat
n = LinearAlgebra.checksquare(A)
n = checksquare(A)
structure = T <: Real ? "S" : "H"
solver = CudssSolver(A, structure, view)
(T <: Complex) && cudss_set(solver, "pivot_type", 'N')
Expand All @@ -23,8 +55,26 @@ end
LinearAlgebra.ldlt(A::Symmetric{T,<:CuSparseMatrixCSR{T,Cint}}) where T <: BlasReal = LinearAlgebra.ldlt(A.data, view=A.uplo)
LinearAlgebra.ldlt(A::Hermitian{T,<:CuSparseMatrixCSR{T,Cint}}) where T <: BlasFloat = LinearAlgebra.ldlt(A.data, view=A.uplo)

"""
solver = cholesky(A::CuSparseMatrixCSR{T,Cint}; view::Char='F')
Compute the LLᴴ factorization of a sparse matrix A on an NVIDIA GPU.
The type `T` can be `Float32`, `Float64`, `ComplexF32` or `ComplexF64`.
#### Input argument
* `A`: a sparse Hermitian positive definite matrix stored in the `CuSparseMatrixCSR` format.
#### Keyword argument
*`view`: A character that specifies which triangle of the sparse matrix is provided. Possible options are `L` for the lower triangle, `U` for the upper triangle, and `F` for the full matrix.
#### Output argument
* `solver`: Opaque structure [`CudssSolver`](@ref) that stores the factors of the LLᴴ decomposition.
"""
function LinearAlgebra.cholesky(A::CuSparseMatrixCSR{T,Cint}; view::Char='F') where T <: BlasFloat
n = LinearAlgebra.checksquare(A)
n = checksquare(A)
structure = T <: Real ? "SPD" : "HPD"
solver = CudssSolver(A, structure, view)
x = CudssMatrix(T, n)
Expand All @@ -40,7 +90,7 @@ LinearAlgebra.cholesky(A::Hermitian{T,<:CuSparseMatrixCSR{T,Cint}}) where T <: B
for fun in (:lu!, :ldlt!, :cholesky!)
@eval begin
function LinearAlgebra.$fun(solver::CudssSolver{T}, A::CuSparseMatrixCSR{T,Cint}) where T <: BlasFloat
n = LinearAlgebra.checksquare(A)
n = checksquare(A)
cudss_set(solver.matrix, A)
x = CudssMatrix(T, n)
b = CudssMatrix(T, n)
Expand Down

0 comments on commit 1db4589

Please sign in to comment.