Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[DEV] Adding julia solvers: GD, SGD, SAGA #1

Open
wants to merge 6 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion .github/workflows/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -25,12 +25,14 @@ jobs:
python-version: 3.8

- name: Install dependencies
env:
BENCH_BRANCH: master
run: |
eval "$(conda shell.bash hook)"
conda activate base
# make sure we have the latest version of pip to correctly install benchopt in sub conda env
pip install --upgrade pip
pip install -U https://api.github.com/repos/benchopt/benchOpt/zipball/master
pip install -U git+https://github.com/benchopt/benchopt@$BENCH_BRANCH

- name: Test
env:
Expand Down
47 changes: 47 additions & 0 deletions solvers/julia_gd.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
using Core
using LinearAlgebra

# TODO : import shared functions from common file
# include("./benchmarks/logreg_l2/solvers/logistic.jl")

# Loss evaluation
function logistic_loss(X, y, w::Array{Float64})
return sum( log.(1. .+ exp.(-(y .* (X*w)))) )
end

# Gradient evaluation
function sigmoid(z::Array{Float64})
# This function computes the sigmoid function:
# \sigma(z) = 1 / (1 + e^(-z)) .
# Let the i-th loss be
# \phi_i (z) = \log \left( 1 + e^{-y_i z} \right) .
# Then its derivative is
# \phi_i^' (z) = -y_i \sigma(-y_i z)
idx = z .> 0
out = zeros(size(z))
out[idx] = (1 .+ exp.(-z[idx])).^(-1)
exp_t = exp.(z[.~idx])
out[.~idx] = exp_t ./ (1. .+ exp_t)
return out
end

function logistic_grad(X, y, w::Array{Float64})
# lot of computations hidden back here
z = sigmoid(y .* (X*w))
return X' * (y .* (z .- 1))
end


function solve_logreg_l2(X, y, lambda, n_iter)
L = (opnorm(X)^2 / 4) + lambda

n_features = size(X, 2)
w = zeros(n_features, 1)
t_new = 1
for i ∈ 1:n_iter
grad = logistic_grad(X, y, w) .+ (lambda .* w); # correct
w -= grad ./ L
end

return w
end
32 changes: 32 additions & 0 deletions solvers/julia_gd.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
from pathlib import Path
from benchopt.util import safe_import_context

from benchopt.utils.julia_helpers import JuliaSolver
from benchopt.utils.julia_helpers import get_jl_interpreter
from benchopt.utils.julia_helpers import assert_julia_installed

with safe_import_context() as import_ctx:
assert_julia_installed()


# File containing the function to be called from julia
JULIA_SOLVER_FILE = str(Path(__file__).with_suffix('.jl'))


class Solver(JuliaSolver):

# Config of the solver
name = 'Julia-GD'
stop_strategy = 'iteration'

def set_objective(self, X, y, lmbd):
self.X, self.y, self.lmbd = X, y, lmbd

jl = get_jl_interpreter()
self.solve_logreg_l2 = jl.include(JULIA_SOLVER_FILE)

def run(self, n_iter):
self.beta = self.solve_logreg_l2(self.X, self.y, self.lmbd, n_iter)

def get_result(self):
return self.beta.ravel()
74 changes: 74 additions & 0 deletions solvers/julia_saga.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
using Core
using LinearAlgebra
using StatsBase # sample function

# TODO : import shared functions from file
# include("./benchmarks/logreg_l2/solvers/logistic.jl")


# Gradient evaluation
function sigmoid(z::Array{Float64})
# This function computes the sigmoid function:
# \sigma(z) = 1 / (1 + e^(-z)) .
# Let the i-th loss be
# \phi_i (z) = \log \left( 1 + e^{-y_i z} \right) .
# Then its derivative is
# \phi_i^' (z) = -y_i \sigma(-y_i z)
idx = z .> 0
out = zeros(size(z))
out[idx] = (1 .+ exp.(-z[idx])).^(-1)
exp_t = exp.(z[.~idx])
out[.~idx] = exp_t ./ (1. .+ exp_t)
return out
end

function logreg_l2_Jac!(X, y, w::Array{Float64}, lambda::Float64, B::Array{Int64}, Jac::Array{Float64})
n_samples = size(X, 1)
z = sigmoid(y[B] .* (X[B, :]*w));
Jac[:, B] = n_samples .* X[B, :]' .* (y[B] .* (z .- 1))' .+ (lambda .* w); # J_{:i}^{t+1} <- \nabla f_i (w^t)
end


function solve_logreg_l2(X, y, lambda::Float64, n_iter::Int64; batch_size::Int64=1, unbiased::Bool=false)
"""
Implementation based on Algorithm 2 of
N. Gazagnadou, R. M. Gower, J. Salmon, `Optimal Mini-Batch and Step Sizes for SAGA`, ICML 2019.
"""
# TODO : use expected smoothness instead -> larger step size with minibatching

n_samples = size(X, 1)
Lmax = (n_samples/4) * maximum(sum(X .^ 2, dims=2)) + lambda
step_size = 1. / Lmax
println("Step size SAGA = ", step_size, "\n")

n_features = size(X, 2)
w = zeros(n_features, 1)

Jac = zeros(n_features, n_samples) # Jacobian estimate
# logreg_l2_Jac!(X, y, w, lambda, collect(1:n_samples), Jac) # full gradient Jacobian init # FIXME
aux = zeros(n_features, 1) # auxiliary vector
grad_estim = zeros(n_features, 1) # stochastic gradient estimate, SAGA if unbiased = true, SAG else
u = sum(Jac, dims=2) # SAG (biased) estimate
for i ∈ 1:n_iter
B = sample(1:n_samples, batch_size, replace=false) # sampling a mini-batch

# Assign each gradient to a different column of Jac
aux[:] = -sum(Jac[:, B], dims=2) # Calculating the auxiliary vector
logreg_l2_Jac!(X, y, w, lambda, B, Jac) # Update of the Jacobian estimate
aux[:] += sum(Jac[:, B], dims=2) # aux = \sum_{i \in B} (\nabla f_i (w^t) - J_{:i}^t)

# Update of the unbiased gradient estimate: g^k
if unbiased
grad_estim[:] = u .+ ((1. / length(B)) .* aux) # SAGA unbiased descent direction
else
grad_estim[:] = u; # SAG biased descent direction
end

# Update SAG biased estimate: 1/n J^{k+1}1 = 1/n J^k 1 + 1/n (DF^k-J^k) Proj 1
u[:] = u .+ ((1. / n_samples) .* aux)

# Update the vector of weights through a stochastic step
w -= step_size .* grad_estim
end
return w
end
35 changes: 35 additions & 0 deletions solvers/julia_saga.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
from pathlib import Path
from benchopt.util import safe_import_context

from benchopt.utils.julia_helpers import JuliaSolver
from benchopt.utils.julia_helpers import get_jl_interpreter
from benchopt.utils.julia_helpers import assert_julia_installed

with safe_import_context() as import_ctx:
assert_julia_installed()


# File containing the function to be called from julia
JULIA_SOLVER_FILE = str(Path(__file__).with_suffix('.jl'))


class Solver(JuliaSolver):

# Config of the solver
name = 'Julia-SAGA'
stop_strategy = 'iteration'

# Julia package dependencies
julia_requirements = ['StatsBase']

def set_objective(self, X, y, lmbd):
self.X, self.y, self.lmbd = X, y, lmbd

jl = get_jl_interpreter()
self.solve_logreg_l2 = jl.include(JULIA_SOLVER_FILE)

def run(self, n_iter):
self.beta = self.solve_logreg_l2(self.X, self.y, self.lmbd, n_iter)

def get_result(self):
return self.beta.ravel()
60 changes: 60 additions & 0 deletions solvers/julia_sgd.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
using Core
using LinearAlgebra
using StatsBase # for sample function

# TODO : import shared functions from common file
# include("./benchmarks/logreg_l2/solvers/logistic.jl")

# Loss evaluation
function logistic_loss(X, y, w::Array{Float64})
return sum( log.(1. .+ exp.(-(y .* (X*w)))) )
end

# Gradient evaluation
function sigmoid(z::Array{Float64})
# This function computes the sigmoid function:
# \sigma(z) = 1 / (1 + e^(-z)) .
# Let the i-th loss be
# \phi_i (z) = \log \left( 1 + e^{-y_i z} \right) .
# Then its derivative is
# \phi_i^' (z) = -y_i \sigma(-y_i z)
idx = z .> 0
out = zeros(size(z))
out[idx] = (1 .+ exp.(-z[idx])).^(-1)
exp_t = exp.(z[.~idx])
out[.~idx] = exp_t ./ (1. .+ exp_t)
return out
end

function logistic_grad(X, y, w::Array{Float64})
# lot of computations hidden back here
z = sigmoid(y .* (X*w))
return X' * (y .* (z .- 1))
end


function solve_logreg_l2(X, y, lambda::Float64, n_iter::Int64; batch_size::Int64=1, step_size::Float64=1.)
# Stochastic gradient solver for l2 regularized logistic regression.

# Define function and gradient with mini-batching and l2 regularization
# f(w) = \frac{1}{n} \sum_{i=1}^n f_i (w)
# = \frac{\lambda}{2} \norm{w}_2^2 + \sum_{i=1}^n \left( \phi_i (x_i^{\top} w) \right)
# where \phi_i (z) = \log(1 + \exp(-y_i z))

# Let B be a mini-batch of indices, then the loss of the subsampled function is
# logreg_l2_subloss(w, B) = (n_samples / length(B)) * logistic_loss(X[B, :], y[B], w) + (0.5 * lambda * norm(w)^2);

# TODO: choose a step size rule

n_samples = size(X, 1)
sgd_grad(w, B) = (n_samples / length(B)) * logistic_grad(X[B, :], y[B], w) .+ (lambda .* w);

n_features = size(X, 2)
w = zeros(n_features, 1)
t_new = 1
for i ∈ 1:n_iter
B = sample(1:n_samples, batch_size, replace=false) # sampling a mini-batch
w -= (step_size / sqrt(i)) .* sgd_grad(w, B)
end
return w
end
35 changes: 35 additions & 0 deletions solvers/julia_sgd.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
from pathlib import Path
from benchopt.util import safe_import_context

from benchopt.utils.julia_helpers import JuliaSolver
from benchopt.utils.julia_helpers import get_jl_interpreter
from benchopt.utils.julia_helpers import assert_julia_installed

with safe_import_context() as import_ctx:
assert_julia_installed()


# File containing the function to be called from julia
JULIA_SOLVER_FILE = str(Path(__file__).with_suffix('.jl'))


class Solver(JuliaSolver):

# Config of the solver
name = 'Julia-SGD'
stop_strategy = 'iteration'

# Julia package dependencies
julia_requirements = ['StatsBase']

def set_objective(self, X, y, lmbd):
self.X, self.y, self.lmbd = X, y, lmbd

jl = get_jl_interpreter()
self.solve_logreg_l2 = jl.include(JULIA_SOLVER_FILE)

def run(self, n_iter):
self.beta = self.solve_logreg_l2(self.X, self.y, self.lmbd, n_iter)

def get_result(self):
return self.beta.ravel()