Skip to content

Commit

Permalink
Merge remote-tracking branch 'origin/master' into fp/madnlp
Browse files Browse the repository at this point in the history
  • Loading branch information
frapac committed Jan 25, 2024
2 parents 75ca054 + 4bb2155 commit 02989e3
Show file tree
Hide file tree
Showing 20 changed files with 354 additions and 81 deletions.
26 changes: 26 additions & 0 deletions .ci/Project.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
[compat]
CUDA = "4.1, 5"
FiniteDiff = "2.7"
Ipopt = "1"
MadNLP = "0.7"

[deps]
Argos = "ef244971-cf80-42b0-9762-2c2c832df5d5"
CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba"
CUSOLVERRF = "a8cc9031-bad2-4722-94f5-40deabb4245c"
DelimitedFiles = "8bb1440f-4735-579b-a4ab-409b98df4dab"
ExaPF = "0cf0e50c-a82e-488f-ac7e-41ffdff1b8aa"
FiniteDiff = "6a86dc24-6348-571c-b903-95158fe2bd41"
Ipopt = "b6b21f68-93f8-5de0-b562-5493be1d77c9"
KernelAbstractions = "63c18a36-062a-441e-b654-da1e3ab1ce7c"
LazyArtifacts = "4af54fe1-eca0-43a8-85a7-787d91b784e3"
LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
MadNLP = "2621e9c9-9eb4-46b1-8089-e8c72242dfb6"
MadNLPGPU = "d72a61cc-809d-412f-99be-fd81f4b8a598"
MathOptInterface = "b8f27783-ece8-5eb3-8dc8-9495eed66fee"
Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf"
Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"

[extras]
CUDA_Runtime_jll = "76a88914-d11a-5bdc-97e0-2f5a05c973a2"
10 changes: 10 additions & 0 deletions .ci/setup.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@

using Pkg
Pkg.instantiate()

using CUDA

argos_path = joinpath(@__DIR__, "..")
Pkg.develop(path=argos_path)

CUDA.set_runtime_version!(v"11.8")
8 changes: 8 additions & 0 deletions .github/codecov.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
coverage:
status:
project:
default:
informational: true
patch:
default:
informational: true
11 changes: 4 additions & 7 deletions .github/workflows/action.yml
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ jobs:
strategy:
matrix:
os: [ubuntu-22.04]
julia-version: ['1.8']
julia-version: ['1.9']
julia-arch: [x64]

steps:
Expand All @@ -43,7 +43,7 @@ jobs:
strategy:
matrix:
os: [ubuntu-22.04]
julia-version: ['1.8']
julia-version: ['1.9']
julia-arch: [x64]

steps:
Expand All @@ -52,9 +52,6 @@ jobs:
with:
version: ${{ matrix.julia-version }}
arch: ${{ matrix.julia-arch }}
- run: julia --project -e 'using Pkg; Pkg.Registry.update()'
- run: julia --project -e 'using Pkg; Pkg.add("CUSOLVERRF")'
- run: julia --project -e 'using Pkg; Pkg.develop(path="lib/ArgosCUDA.jl")'
- uses: julia-actions/julia-buildpkg@latest
- uses: julia-actions/julia-runtest@latest
- run: julia --project=.ci .ci/setup.jl
- run: julia --project=.ci test/runtests.jl

28 changes: 9 additions & 19 deletions Project.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
name = "Argos"
uuid = "ef244971-cf80-42b0-9762-2c2c832df5d5"
authors = ["fpacaud <[email protected]>"]
version = "0.3.3"
version = "0.3.4"

[deps]
ExaPF = "0cf0e50c-a82e-488f-ac7e-41ffdff1b8aa"
Expand All @@ -13,27 +13,17 @@ NLPModels = "a4795742-8479-5a88-8948-cc11e1c8c1a6"
Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7"
SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf"

[weakdeps]
CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba"
CUSOLVERRF = "a8cc9031-bad2-4722-94f5-40deabb4245c"

[extensions]
ArgosCUDAExt = ["CUDA", "CUSOLVERRF"]

[compat]
CUDA = "4.1, 5"
ExaPF = "~0.9.3"
FiniteDiff = "2.7"
Ipopt = "1"
KernelAbstractions = "0.9"
MadNLP = "0.7"
MathOptInterface = "1"
NLPModels = "0.19, 0.20"
julia = "1.6"

[extras]
CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba"
DelimitedFiles = "8bb1440f-4735-579b-a4ab-409b98df4dab"
FiniteDiff = "6a86dc24-6348-571c-b903-95158fe2bd41"
Ipopt = "b6b21f68-93f8-5de0-b562-5493be1d77c9"
LazyArtifacts = "4af54fe1-eca0-43a8-85a7-787d91b784e3"
MadNLPGPU = "d72a61cc-809d-412f-99be-fd81f4b8a598"
MadNLPTests = "b52a2a03-04ab-4a5f-9698-6a2deff93217"
Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"

[targets]
test = ["Test", "CUDA", "DelimitedFiles", "FiniteDiff", "Ipopt", "LazyArtifacts", "MadNLPGPU", "MadNLPTests", "Random"]
julia = "1.9"
3 changes: 2 additions & 1 deletion lib/ArgosCUDA.jl/src/ArgosCUDA.jl → ext/ArgosCUDAExt.jl
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
module ArgosCUDA
module ArgosCUDAExt

using LinearAlgebra
using SparseArrays
Expand All @@ -22,3 +22,4 @@ include("reduction.jl")
include("api.jl")

end # module

16 changes: 8 additions & 8 deletions lib/ArgosCUDA.jl/src/api.jl → ext/api.jl
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,8 @@ function MadNLP._madnlp_unsafe_wrap(vec::CuVector, n, shift=1)
return vec
end


function run_opf_gpu(datafile::String, ::Argos.FullSpace; options...)
flp = Argos.FullSpaceEvaluator(datafile; device=CUDADevice())
function Argos.run_opf_gpu(datafile::String, ::Argos.FullSpace; options...)
flp = Argos.FullSpaceEvaluator(datafile; device=CUDABackend())
model = Argos.OPFModel(Argos.bridge(flp))
ips = MadNLP.MadNLPSolver(
model;
Expand All @@ -16,8 +15,8 @@ function run_opf_gpu(datafile::String, ::Argos.FullSpace; options...)
return ips
end

function run_opf_gpu(datafile::String, ::Argos.BieglerReduction; options...)
flp = Argos.FullSpaceEvaluator(datafile; device=CUDADevice())
function Argos.run_opf_gpu(datafile::String, ::Argos.BieglerReduction; options...)
flp = Argos.FullSpaceEvaluator(datafile; device=CUDABackend())
model = Argos.OPFModel(Argos.bridge(flp))

madnlp_options = Dict{Symbol, Any}(options...)
Expand All @@ -30,8 +29,8 @@ function run_opf_gpu(datafile::String, ::Argos.BieglerReduction; options...)
return ips
end

function run_opf_gpu(datafile::String, ::Argos.DommelTinney; options...)
flp = Argos.ReducedSpaceEvaluator(datafile; device=CUDADevice(), nbatch_hessian=256)
function Argos.run_opf_gpu(datafile::String, ::Argos.DommelTinney; options...)
flp = Argos.ReducedSpaceEvaluator(datafile; device=CUDABackend(), nbatch_hessian=256)
model = Argos.OPFModel(Argos.bridge(flp))

madnlp_options = Dict{Symbol, Any}(options...)
Expand All @@ -42,7 +41,8 @@ function run_opf_gpu(datafile::String, ::Argos.DommelTinney; options...)

opt_ipm, opt_linear, logger = MadNLP.load_options(; madnlp_options...)

KKT = MadNLP.DenseCondensedKKTSystem{Float64, CuVector{Float64}, CuMatrix{Float64}}
QN = MadNLP.ExactHessian{Float64, CuVector{Float64}}
KKT = MadNLP.DenseCondensedKKTSystem{Float64, CuVector{Float64}, CuMatrix{Float64}, QN}
ips = MadNLP.MadNLPSolver{Float64, KKT}(model, opt_ipm, opt_linear; logger=logger)
MadNLP.solve!(ips)

Expand Down
17 changes: 0 additions & 17 deletions lib/ArgosCUDA.jl/src/kernels.jl → ext/kernels.jl
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@ function Argos.transfer2tril!(hessvals::AbstractVector, H::CuSparseMatrixCSR, cs
KA.synchronize(CUDABackend())
end


@kernel function _fixed_kernel!(dest, fixed, val)
i = @index(Global, Linear)
dest[fixed[i]] = val
Expand All @@ -21,7 +20,6 @@ function Argos.fixed!(dest::CuVector, ind_fixed, val::Number)
KA.synchronize(CUDABackend())
end


@kernel function _copy_index_kernel!(dest, src, idx)
i = @index(Global, Linear)
@inbounds dest[i] = src[idx[i]]
Expand All @@ -34,7 +32,6 @@ function Argos.copy_index!(dest::CuVector{T}, src::CuVector{T}, idx) where T
KA.synchronize(CUDABackend())
end


@kernel function _fixed_diag_kernel!(dest, fixed, val)
i = @index(Global, Linear)
k = fixed[i]
Expand Down Expand Up @@ -75,7 +72,6 @@ function Argos.transfer_auglag_hessian!(
return
end


@kernel function _batch_tangents_kernel!(seeds, offset, n_batches)
i = @index(Global, Linear)
@inbounds seeds[i + offset, i] = 1.0
Expand All @@ -92,7 +88,6 @@ function Argos.set_batch_tangents!(seeds::CuMatrix, offset, n, n_batches)
return
end


@kernel function _tgtmul_1_kernel!(y, A_rowPtr, A_colVal, A_nzVal, z, w, alpha, nx, nu)
i, k = @index(Global, NTuple)
@inbounds for c in A_rowPtr[i]:A_rowPtr[i+1]-1
Expand Down Expand Up @@ -122,7 +117,6 @@ function Argos.tgtmul!(
KA.synchronize(CUDABackend())
end


@kernel function _tgtmul_2_kernel!(yx, yu, A_rowPtr, A_colVal, A_nzVal, z, w, alpha, nx, nu)
i, k = @index(Global, NTuple)
@inbounds for c in A_rowPtr[i]:A_rowPtr[i+1]-1
Expand Down Expand Up @@ -157,7 +151,6 @@ function Argos.tgtmul!(
KA.synchronize(CUDABackend())
end


@kernel function _scale_transpose_kernel!(
Jtz, Jp, Jj, Jz, D, tperm,
)
Expand All @@ -169,13 +162,3 @@ end
end
end

function Argos.update!(K::Argos.HJDJ, A, D, Σ)
m = size(A, 1)
ev = _scale_transpose_kernel!(CUDABackend())(
K.Jt.nzVal, A.rowPtr, A.colVal, A.nzVal, D, K.transperm,
ndrange=(m, 1),
)
KA.synchronize(ev)
spgemm!('N', 'N', 1.0, K.Jt, A, 0.0, K.JtJ, 'O')
K.Σ .= Σ
end
File renamed without changes.
File renamed without changes.
15 changes: 0 additions & 15 deletions lib/ArgosCUDA.jl/Project.toml

This file was deleted.

4 changes: 0 additions & 4 deletions lib/ArgosCUDA.jl/README.md

This file was deleted.

125 changes: 125 additions & 0 deletions scripts/kkt/benchmark_kkt.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,125 @@
include("common.jl")

#=
CONFIG
=#

# Number of trial runs to estimate running time.
ntrials = 3
# Save results on disk?
save_results = true
# Should we use the GPU to evaluate the derivatives?
use_gpu = true
# Verbose level
verbose = true
print_level = if verbose
MadNLP.DEBUG
else
MadNLP.ERROR
end

# OPF instances
cases = [
"case118.m",
"case1354pegase.m",
"case2869pegase.m",
"case9241pegase.m",
]


function benchmark_kkt(model, kkt; use_gpu=false, ntrials=3, options...)
use_gpu && refresh_memory()
blk = build_opf_model(model; use_gpu=use_gpu)

## Warm-up
solver = build_madnlp(blk, kkt; max_iter=1, options...)
MadNLP.solve!(solver)

## Benchmark
t_build, t_factorization, t_backsolve = (0.0, 0.0, 0.0)
delta_err = 0.0
for _ in 1:ntrials
t_build += CUDA.@elapsed begin
MadNLP.build_kkt!(solver.kkt)
end
t_factorization += CUDA.@elapsed begin
MadNLP.factorize!(solver.linear_solver)
end
t_backsolve += CUDA.@elapsed begin
MadNLP.solve_refine_wrapper!(solver, solver.d, solver.p)
end

dsol = MadNLP.primal_dual(solver.d)
n = length(dsol)
psol = zeros(n)

mul!(psol, solver.kkt, dsol)

delta_err += norm(psol .- MadNLP.primal_dual(solver.p), Inf)
end

return (
build=t_build / ntrials,
factorization=t_factorization / ntrials,
backsolve=t_backsolve / ntrials,
accuracy=delta_err / ntrials,
)
end

function benchmark_kkt(cases, kkt, ntrials, save_results; use_gpu=false, options...)
# Setup
dev = use_gpu ? :cuda : :cpu
form = isa(kkt, Argos.BieglerReduction) ? :biegler : :full

nexp = length(cases)
results = zeros(nexp, 5)

i = 0
for case in cases
i += 1
datafile = joinpath(DATA, case)
model = ExaPF.PolarForm(datafile)
nbus = PS.get(model, PS.NumberOfBuses())

r = benchmark_kkt(model, kkt; ntrials=ntrials, use_gpu=use_gpu, options...)
results[i, :] .= (nbus, r.build, r.factorization, r.backsolve, r.accuracy)
end

if save_results
output_dir = joinpath(dirname(@__FILE__), RESULTS_DIR)
if !isdir(output_dir)
mkdir(output_dir)
end
output_file = joinpath(output_dir, "benchmark_kkt_$(form)_$(dev).txt")
writedlm(output_file, results)
end
return results
end

#=
Benchmark using ma27 as a reference.
=#
benchmark_kkt(
cases,
Argos.FullSpace(),
ntrials,
save_results;
print_level=print_level,
linear_solver=Ma27Solver,
use_gpu=use_gpu,
)


#=
Benchmark Biegler's reduction.
=#
benchmark_kkt(
cases,
Argos.BieglerReduction(),
ntrials,
save_results;
print_level=print_level,
linear_solver=LapackGPUSolver,
use_gpu=use_gpu,
)

Loading

0 comments on commit 02989e3

Please sign in to comment.