benchopt · ngazagna · Sep 29, 2020 · Oct 1, 2020 · Oct 1, 2020 · Oct 1, 2020
diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
@@ -25,12 +25,14 @@ jobs:
         python-version: 3.8
 
     - name: Install dependencies
+      env:
+        BENCH_BRANCH: master
       run: |
         eval "$(conda shell.bash hook)"
         conda activate base
         # make sure we have the latest version of pip to correctly install benchopt in sub conda env
         pip install --upgrade pip
-        pip install -U https://api.github.com/repos/benchopt/benchOpt/zipball/master
+        pip install -U git+https://github.com/benchopt/benchopt@$BENCH_BRANCH
 
     - name: Test
       env:

diff --git a/solvers/julia_gd.jl b/solvers/julia_gd.jl
@@ -0,0 +1,47 @@
+using Core
+using LinearAlgebra
+
+# TODO : import shared functions from common file
+# include("./benchmarks/logreg_l2/solvers/logistic.jl")
+
+# Loss evaluation
+function logistic_loss(X, y, w::Array{Float64})
+    return sum( log.(1. .+  exp.(-(y .* (X*w)))) )
+end
+
+# Gradient evaluation
+function sigmoid(z::Array{Float64})
+    # This function computes the sigmoid function:
+    # \sigma(z) = 1 / (1 + e^(-z)) .
+    # Let the i-th loss be
+    # \phi_i (z) = \log \left( 1 + e^{-y_i z} \right) .
+    # Then its derivative is
+    # \phi_i^' (z) = -y_i \sigma(-y_i z)
+    idx = z .> 0
+    out = zeros(size(z))
+    out[idx] = (1 .+ exp.(-z[idx])).^(-1)
+    exp_t = exp.(z[.~idx])
+    out[.~idx] = exp_t ./ (1. .+ exp_t)
+    return out
+end
+
+function logistic_grad(X, y, w::Array{Float64})
+    # lot of computations hidden back here
+    z = sigmoid(y .* (X*w))
+    return X' * (y .* (z .- 1))
+end
+
+
+function solve_logreg_l2(X, y, lambda, n_iter)
+    L = (opnorm(X)^2 / 4) + lambda
+
+    n_features = size(X, 2)
+    w = zeros(n_features, 1)
+    t_new = 1
+    for i ∈ 1:n_iter
+        grad = logistic_grad(X, y, w) .+ (lambda .* w); # correct
+        w -= grad ./ L
+    end
+
+    return w
+end
diff --git a/solvers/julia_gd.py b/solvers/julia_gd.py
@@ -0,0 +1,32 @@
+from pathlib import Path
+from benchopt.util import safe_import_context
+
+from benchopt.utils.julia_helpers import JuliaSolver
+from benchopt.utils.julia_helpers import get_jl_interpreter
+from benchopt.utils.julia_helpers import assert_julia_installed
+
+with safe_import_context() as import_ctx:
+    assert_julia_installed()
+
+
+# File containing the function to be called from julia
+JULIA_SOLVER_FILE = str(Path(__file__).with_suffix('.jl'))
+
+
+class Solver(JuliaSolver):
+
+    # Config of the solver
+    name = 'Julia-GD'
+    stop_strategy = 'iteration'
+
+    def set_objective(self, X, y, lmbd):
+        self.X, self.y, self.lmbd = X, y, lmbd
+
+        jl = get_jl_interpreter()
+        self.solve_logreg_l2 = jl.include(JULIA_SOLVER_FILE)
+
+    def run(self, n_iter):
+        self.beta = self.solve_logreg_l2(self.X, self.y, self.lmbd, n_iter)
+
+    def get_result(self):
+        return self.beta.ravel()
diff --git a/solvers/julia_saga.jl b/solvers/julia_saga.jl
@@ -0,0 +1,74 @@
+using Core
+using LinearAlgebra
+using StatsBase # sample function
+
+# TODO : import shared functions from file
+# include("./benchmarks/logreg_l2/solvers/logistic.jl")
+
+
+# Gradient evaluation
+function sigmoid(z::Array{Float64})
+    # This function computes the sigmoid function:
+    # \sigma(z) = 1 / (1 + e^(-z)) .
+    # Let the i-th loss be
+    # \phi_i (z) = \log \left( 1 + e^{-y_i z} \right) .
+    # Then its derivative is
+    # \phi_i^' (z) = -y_i \sigma(-y_i z)
+    idx = z .> 0
+    out = zeros(size(z))
+    out[idx] = (1 .+ exp.(-z[idx])).^(-1)
+    exp_t = exp.(z[.~idx])
+    out[.~idx] = exp_t ./ (1. .+ exp_t)
+    return out
+end
+
+function logreg_l2_Jac!(X, y, w::Array{Float64}, lambda::Float64, B::Array{Int64}, Jac::Array{Float64})
+    n_samples = size(X, 1)
+    z = sigmoid(y[B] .* (X[B, :]*w));
+    Jac[:, B] = n_samples .* X[B, :]' .* (y[B] .* (z .- 1))' .+ (lambda .* w); # J_{:i}^{t+1} <- \nabla f_i (w^t)
+end
+
+
+function solve_logreg_l2(X, y, lambda::Float64, n_iter::Int64; batch_size::Int64=1, unbiased::Bool=false)
+    """
+    Implementation based on Algorithm 2 of
+    N. Gazagnadou, R. M. Gower, J. Salmon, `Optimal Mini-Batch and Step Sizes for SAGA`, ICML 2019.
+    """
+    # TODO : use expected smoothness instead -> larger step size with minibatching
+
+    n_samples = size(X, 1)
+    Lmax = (n_samples/4) * maximum(sum(X .^ 2, dims=2)) + lambda
+    step_size = 1. / Lmax
+    println("Step size SAGA = ", step_size, "\n")
+
+    n_features = size(X, 2)
+    w = zeros(n_features, 1)
+
+    Jac = zeros(n_features, n_samples) # Jacobian estimate
+    # logreg_l2_Jac!(X, y, w, lambda, collect(1:n_samples), Jac) # full gradient Jacobian init # FIXME
+    aux = zeros(n_features, 1) # auxiliary vector
+    grad_estim = zeros(n_features, 1) # stochastic gradient estimate, SAGA if unbiased = true, SAG else
+    u = sum(Jac, dims=2) # SAG (biased) estimate
+    for i ∈ 1:n_iter
+        B = sample(1:n_samples, batch_size, replace=false) # sampling a mini-batch
+
+        # Assign each gradient to a different column of Jac
+        aux[:] = -sum(Jac[:, B], dims=2) # Calculating the auxiliary vector
+        logreg_l2_Jac!(X, y, w, lambda, B, Jac) # Update of the Jacobian estimate
+        aux[:] += sum(Jac[:, B], dims=2) # aux = \sum_{i \in B} (\nabla f_i (w^t) - J_{:i}^t)
+
+        # Update of the unbiased gradient estimate: g^k
+        if unbiased
+            grad_estim[:] = u .+ ((1. / length(B)) .* aux) # SAGA unbiased descent direction
+        else
+            grad_estim[:] = u; # SAG biased descent direction
+        end
+
+        # Update SAG biased estimate: 1/n J^{k+1}1 = 1/n J^k 1 + 1/n (DF^k-J^k) Proj 1
+        u[:] = u .+ ((1. / n_samples) .* aux)
+
+        # Update the vector of weights through a stochastic step
+        w -= step_size .* grad_estim
+    end
+    return w
+end
diff --git a/solvers/julia_saga.py b/solvers/julia_saga.py
@@ -0,0 +1,35 @@
+from pathlib import Path
+from benchopt.util import safe_import_context
+
+from benchopt.utils.julia_helpers import JuliaSolver
+from benchopt.utils.julia_helpers import get_jl_interpreter
+from benchopt.utils.julia_helpers import assert_julia_installed
+
+with safe_import_context() as import_ctx:
+    assert_julia_installed()
+
+
+# File containing the function to be called from julia
+JULIA_SOLVER_FILE = str(Path(__file__).with_suffix('.jl'))
+
+
+class Solver(JuliaSolver):
+
+    # Config of the solver
+    name = 'Julia-SAGA'
+    stop_strategy = 'iteration'
+
+    # Julia package dependencies
+    julia_requirements = ['StatsBase']
+
+    def set_objective(self, X, y, lmbd):
+        self.X, self.y, self.lmbd = X, y, lmbd
+
+        jl = get_jl_interpreter()
+        self.solve_logreg_l2 = jl.include(JULIA_SOLVER_FILE)
+
+    def run(self, n_iter):
+        self.beta = self.solve_logreg_l2(self.X, self.y, self.lmbd, n_iter)
+
+    def get_result(self):
+        return self.beta.ravel()
diff --git a/solvers/julia_sgd.jl b/solvers/julia_sgd.jl
@@ -0,0 +1,60 @@
+using Core
+using LinearAlgebra
+using StatsBase # for sample function
+
+# TODO : import shared functions from common file
+# include("./benchmarks/logreg_l2/solvers/logistic.jl")
+
+# Loss evaluation
+function logistic_loss(X, y, w::Array{Float64})
+    return sum( log.(1. .+  exp.(-(y .* (X*w)))) )
+end
+
+# Gradient evaluation
+function sigmoid(z::Array{Float64})
+    # This function computes the sigmoid function:
+    # \sigma(z) = 1 / (1 + e^(-z)) .
+    # Let the i-th loss be
+    # \phi_i (z) = \log \left( 1 + e^{-y_i z} \right) .
+    # Then its derivative is
+    # \phi_i^' (z) = -y_i \sigma(-y_i z)
+    idx = z .> 0
+    out = zeros(size(z))
+    out[idx] = (1 .+ exp.(-z[idx])).^(-1)
+    exp_t = exp.(z[.~idx])
+    out[.~idx] = exp_t ./ (1. .+ exp_t)
+    return out
+end
+
+function logistic_grad(X, y, w::Array{Float64})
+    # lot of computations hidden back here
+    z = sigmoid(y .* (X*w))
+    return X' * (y .* (z .- 1))
+end
+
+
+function solve_logreg_l2(X, y, lambda::Float64, n_iter::Int64; batch_size::Int64=1, step_size::Float64=1.)
+    # Stochastic gradient solver for l2 regularized logistic regression.
+
+    # Define function and gradient with mini-batching and l2 regularization
+    # f(w) = \frac{1}{n} \sum_{i=1}^n f_i (w)
+    #      = \frac{\lambda}{2} \norm{w}_2^2 + \sum_{i=1}^n \left( \phi_i (x_i^{\top} w) \right)
+    # where \phi_i (z) = \log(1 + \exp(-y_i z))
+
+    # Let B be a mini-batch of indices, then the loss of the subsampled function is
+    # logreg_l2_subloss(w, B) = (n_samples / length(B)) * logistic_loss(X[B, :], y[B], w) + (0.5 * lambda * norm(w)^2);
+
+    # TODO: choose a step size rule
+
+    n_samples = size(X, 1)
+    sgd_grad(w, B) = (n_samples / length(B)) * logistic_grad(X[B, :], y[B], w) .+ (lambda .* w);
+
+    n_features = size(X, 2)
+    w = zeros(n_features, 1)
+    t_new = 1
+    for i ∈ 1:n_iter
+        B = sample(1:n_samples, batch_size, replace=false) # sampling a mini-batch
+        w -= (step_size / sqrt(i)) .* sgd_grad(w, B)
+    end
+    return w
+end
diff --git a/solvers/julia_sgd.py b/solvers/julia_sgd.py
@@ -0,0 +1,35 @@
+from pathlib import Path
+from benchopt.util import safe_import_context
+
+from benchopt.utils.julia_helpers import JuliaSolver
+from benchopt.utils.julia_helpers import get_jl_interpreter
+from benchopt.utils.julia_helpers import assert_julia_installed
+
+with safe_import_context() as import_ctx:
+    assert_julia_installed()
+
+
+# File containing the function to be called from julia
+JULIA_SOLVER_FILE = str(Path(__file__).with_suffix('.jl'))
+
+
+class Solver(JuliaSolver):
+
+    # Config of the solver
+    name = 'Julia-SGD'
+    stop_strategy = 'iteration'
+
+    # Julia package dependencies
+    julia_requirements = ['StatsBase']
+
+    def set_objective(self, X, y, lmbd):
+        self.X, self.y, self.lmbd = X, y, lmbd
+
+        jl = get_jl_interpreter()
+        self.solve_logreg_l2 = jl.include(JULIA_SOLVER_FILE)
+
+    def run(self, n_iter):
+        self.beta = self.solve_logreg_l2(self.X, self.y, self.lmbd, n_iter)
+
+    def get_result(self):
+        return self.beta.ravel()