Skip to content

Commit

Permalink
"JuliaSmoothOptimizers#15 first fix, also added unit test for JuliaSm…
Browse files Browse the repository at this point in the history
  • Loading branch information
farhadrclass committed Oct 31, 2023
1 parent 390de83 commit c244bfa
Show file tree
Hide file tree
Showing 5 changed files with 173 additions and 72 deletions.
5 changes: 1 addition & 4 deletions Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -10,16 +10,13 @@ NLPModels = "a4795742-8479-5a88-8948-cc11e1c8c1a6"
Zygote = "e88e6eb3-aa80-5325-afca-941959d7151f"

[compat]
Flux = "0.13.16"
MLDatasets = "0.7.9"
NLPModels = "0.20.0"
Zygote = "0.6.49"
julia = "^1.6.0"
julia = "^1.9.0"

[extras]
CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba"
LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"

[targets]
test = ["LinearAlgebra","CUDA" ,"Test"]
6 changes: 3 additions & 3 deletions src/FluxNLPModels.jl
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ using Flux: onehotbatch, onecold, @epochs
export AbstractFluxNLPModel, FluxNLPModel
export reset_minibatch_train!, reset_minibatch_test!
export minibatch_next_train!, minibatch_next_test!
export accuracy, set_vars!, local_loss
export accuracy, set_vars!, local_loss, update_type!

abstract type AbstractFluxNLPModel{T, S} <: AbstractNLPModel{T, S} end

Expand Down Expand Up @@ -58,7 +58,7 @@ function FluxNLPModel(
current_training_minibatch = [],
current_test_minibatch = [],
size_minibatch::Int = 100,
loss_f::F = Flux.mse, #Flux.crossentropy,
loss_f::F = Flux.crossentropy,#Flux.mse, #
) where {T <: Chain, F <: Function}
x0, rebuild = Flux.destructure(chain_ANN)
n = length(x0)
Expand Down Expand Up @@ -90,4 +90,4 @@ end

include("utils.jl")
include("FluxNLPModels_methods.jl")
end
end
48 changes: 37 additions & 11 deletions src/FluxNLPModels_methods.jl
Original file line number Diff line number Diff line change
@@ -1,20 +1,28 @@
"""
f = obj(nlp, w)
Evaluate `f(w)`, the objective function of `nlp` at `w`.
Evaluate `f(w)`, the objective function of `nlp` at `w`. if `w` and `nlp` precision different, we advance to match the the type of `w`
# Arguments
- `nlp::AbstractFluxNLPModel{T, S}`: the FluxNLPModel data struct;
- `w::AbstractVector{T}`: is the vector of weights/variables.
- `w::AbstractVector{V}`: is the vector of weights/variables. The reason for V here is to allow different precision type for weight and models
# Output
- `f_w`: the new objective function.
"""
function NLPModels.obj(nlp::AbstractFluxNLPModel{T, S}, w::AbstractVector{T}) where {T, S}
increment!(nlp, :neval_obj)
set_vars!(nlp, w)
function NLPModels.obj(nlp::AbstractFluxNLPModel{T, S}, w::AbstractVector{V}) where {T, V, S}
x, y = nlp.current_training_minibatch

if (T != V) # we check if the types are the same,
update_type!(nlp, w)
if eltype(x) != V #TODO check if the user have changed the typed ?
x = V.(x)
end
end

set_vars!(nlp, w)
increment!(nlp, :neval_obj)
return nlp.loss_f(nlp.chain(x), y)
end

Expand All @@ -34,12 +42,21 @@ Evaluate `∇f(w)`, the gradient of the objective function at `w` in place.
"""
function NLPModels.grad!(
nlp::AbstractFluxNLPModel{T, S},
w::AbstractVector{T},
w::AbstractVector{V},
g::AbstractVector{T},
) where {T, S}
) where {T, V, S}
@lencheck nlp.meta.nvar w g
increment!(nlp, :neval_grad)
x, y = nlp.current_training_minibatch

if (T != V) # we check if the types are the same,
update_type!(nlp, w)
g = V.(g)
if eltype(x) != V #TODO check if the user have changed the typed ?
x = V.(x)
end
end

increment!(nlp, :neval_grad)
g .= gradient(w_g -> local_loss(nlp, x, y, w_g), w)[1]
return g
end
Expand All @@ -60,10 +77,19 @@ Evaluate both `f(w)`, the objective function of `nlp` at `w`, and `∇f(w)`, the
"""
function NLPModels.objgrad!(
nlp::AbstractFluxNLPModel{T, S},
w::AbstractVector{T},
w::AbstractVector{V},
g::AbstractVector{T},
) where {T, S}
) where {T,V, S}
@lencheck nlp.meta.nvar w g

if (T != V) # we check if the types are the same,
update_type!(nlp, w)
g = V.(g)
if eltype(x) != V #TODO check if the user have changed the typed ?
x = V.(x)
end
end

increment!(nlp, :neval_obj)
increment!(nlp, :neval_grad)
set_vars!(nlp, w)
Expand All @@ -73,4 +99,4 @@ function NLPModels.objgrad!(
g .= gradient(w_g -> local_loss(nlp, x, y, w_g), w)[1]

return f_w, g
end
end
24 changes: 23 additions & 1 deletion src/utils.jl
Original file line number Diff line number Diff line change
@@ -1,3 +1,25 @@
"""
update_type!(nlp::AbstractFluxNLPModel{T, S}, w::AbstractVector{V}) where {T,V, S}
Sets the vaiables and rebuild the chain to a sepecific type defined by weigths
"""
function update_type!(nlp::AbstractFluxNLPModel{T, S}, w::AbstractVector{V}) where {T, V, S}
if V == Float16
Local_chain = f16(nlp.chain)
elseif V == Float64
Local_chain = f64(nlp.chain)
elseif V == Float32
Local_chain = f32(nlp.chain)
else
error("The package only support Float16, Float32 and Float64")
end

# this is same for all the cases
nlp.chain = Local_chain
-, nlp.rebuild = Flux.destructure(nlp.chain)
end

"""
set_vars!(model::AbstractFluxNLPModel{T,S}, new_w::AbstractVector{T}) where {T<:Number, S}
Expand Down Expand Up @@ -108,4 +130,4 @@ function minibatch_next_test!(nlp::AbstractFluxNLPModel; device = cpu)
(item, nlp.current_test_minibatch_status) = next
nlp.current_test_minibatch = device(item)
return true
end
end
162 changes: 109 additions & 53 deletions test/runtests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -9,13 +9,13 @@ using MLDatasets
using LinearAlgebra

# Helper functions
function getdata(args)
function getdata(args; T = Float32)
ENV["DATADEPS_ALWAYS_ACCEPT"] = "true" # download datasets without having to manually confirm the download

# Loading Dataset

xtrain, ytrain = MLDatasets.MNIST(Tx = Float32, split = :train)[:]
xtest, ytest = MLDatasets.MNIST(Tx = Float32, split = :test)[:]
xtrain, ytrain = MLDatasets.MNIST(Tx = T, split = :train)[:]
xtest, ytest = MLDatasets.MNIST(Tx = T, split = :test)[:]

# Reshape Data in order to flatten each image into a linear array
xtrain = Flux.flatten(xtrain)
Expand All @@ -32,7 +32,7 @@ function getdata(args)
end

function build_model(; imgsize = (28, 28, 1), nclasses = 10)
return Flux.Chain(Dense(prod(imgsize), 32, relu), Dense(32, nclasses))
return Flux.Chain(Dense(prod(imgsize), 32, relu), Dense(32, nclasses), softmax)
end

@kwdef mutable struct Args
Expand All @@ -46,70 +46,126 @@ args = Args() # collect options in a struct for convenience

device = cpu

@testset "FluxNLPModels tests" begin
# @testset "FluxNLPModels tests" begin

# Create test and train dataloaders
train_data, test_data = getdata(args)
# # Create test and train dataloaders
# train_data, test_data = getdata(args)

# Construct model
DN = build_model() |> device
DNNLPModel = FluxNLPModel(DN, train_data, test_data)
# # Construct model
# DN = build_model() |> device
# DNNLPModel = FluxNLPModel(DN, train_data, test_data)

old_w, rebuild = Flux.destructure(DN)
# old_w, rebuild = Flux.destructure(DN)

x1 = copy(DNNLPModel.w)
# x1 = copy(DNNLPModel.w)

obj_x1 = obj(DNNLPModel, x1)
grad_x1 = NLPModels.grad(DNNLPModel, x1)
# obj_x1 = obj(DNNLPModel, x1)
# grad_x1 = NLPModels.grad(DNNLPModel, x1)

grad_x1_2 = similar(x1)
obj_x1_2, grad_x1_2 = NLPModels.objgrad!(DNNLPModel, x1, grad_x1_2)
# grad_x1_2 = similar(x1)
# obj_x1_2, grad_x1_2 = NLPModels.objgrad!(DNNLPModel, x1, grad_x1_2)

@test DNNLPModel.w == old_w
@test obj_x1 == obj_x1_2
println(norm(grad_x1 - grad_x1_2))
@test norm(grad_x1 - grad_x1_2) 0.0
# @test DNNLPModel.w == old_w
# @test obj_x1 == obj_x1_2
# # println(norm(grad_x1 - grad_x1_2))
# @test norm(grad_x1 - grad_x1_2) ≈ 0.0

@test x1 == DNNLPModel.w
@test Flux.params(DNNLPModel.chain)[1][1] == x1[1]
@test Flux.params(DNNLPModel.chain)[1][2] == x1[2]
# @test x1 == DNNLPModel.w
# @test Flux.params(DNNLPModel.chain)[1][1] == x1[1]
# @test Flux.params(DNNLPModel.chain)[1][2] == x1[2]

@test_throws Exception FluxNLPModel(DN, [], test_data) # if the train data is empty
@test_throws Exception FluxNLPModel(DN, train_data, []) # if the test data is empty
@test_throws Exception FluxNLPModel(DN, [], []) # if the both data is empty
# @test_throws Exception FluxNLPModel(DN, [], test_data) # if the train data is empty
# @test_throws Exception FluxNLPModel(DN, train_data, []) # if the test data is empty
# @test_throws Exception FluxNLPModel(DN, [], []) # if the both data is empty

# Testing if the value of the first batch was passed it
DNNLPModel_2 = FluxNLPModel(
DN,
train_data,
test_data,
current_training_minibatch = first(train_data),
current_test_minibatch = first(test_data),
)
# # Testing if the value of the first batch was passed it
# DNNLPModel_2 = FluxNLPModel(
# DN,
# train_data,
# test_data,
# current_training_minibatch = first(train_data),
# current_test_minibatch = first(test_data),
# )

#checking if we can call accuracy
train_acc = FluxNLPModels.accuracy(DNNLPModel_2; data_loader = train_data) # accuracy on train data
test_acc = FluxNLPModels.accuracy(DNNLPModel_2) # on the test data
# #checking if we can call accuracy
# train_acc = FluxNLPModels.accuracy(DNNLPModel_2; data_loader = train_data) # accuracy on train data
# test_acc = FluxNLPModels.accuracy(DNNLPModel_2) # on the test data

@test train_acc >= 0.0
@test train_acc <= 1.0
end
# @test train_acc >= 0.0
# @test train_acc <= 1.0
# end

@testset "minibatch tests" begin
# @testset "minibatch tests" begin
# # Create test and train dataloaders
# train_data, test_data = getdata(args)

# # Construct model
# DN = build_model() |> device
# nlp = FluxNLPModel(DN, train_data, test_data)
# reset_minibatch_train!(nlp)
# @test nlp.current_training_minibatch_status === nothing
# buffer_minibatch = deepcopy(nlp.current_training_minibatch)
# @test minibatch_next_train!(nlp) # should return true
# @test minibatch_next_train!(nlp) # should return true
# @test !isequal(nlp.current_training_minibatch, buffer_minibatch)

# reset_minibatch_test!(nlp)
# @test minibatch_next_test!(nlp) # should return true
# @test minibatch_next_test!(nlp) # should return true
# end

@testset "Multiple precision test" begin
# Create test and train dataloaders
train_data, test_data = getdata(args)

# Construct model
# Construct model in Float32
DN = build_model() |> device
nlp = FluxNLPModel(DN, train_data, test_data)
reset_minibatch_train!(nlp)
@test nlp.current_training_minibatch_status === nothing
buffer_minibatch = deepcopy(nlp.current_training_minibatch)
@test minibatch_next_train!(nlp) # should return true
@test minibatch_next_train!(nlp) # should return true
@test !isequal(nlp.current_training_minibatch, buffer_minibatch)

reset_minibatch_test!(nlp)
@test minibatch_next_test!(nlp) # should return true
@test minibatch_next_test!(nlp) # should return true
end


x1 = copy(nlp.w)
obj_x1 = obj(nlp, x1)
grad_x1 = NLPModels.grad(nlp, x1)
@test typeof(obj_x1) == Float32
@test eltype(grad_x1) == Float32

# change to Float16
x2 = Float16.(x1)
obj_x2 = obj(nlp, x2)
grad_x2 = NLPModels.grad(nlp, x2)
@test typeof(obj_x2) == Float16
@test eltype(grad_x2) == Float16

# # change to Float64
# x3 = Float64.(x1)
# obj_x3 = obj(nlp, x3)
# grad_x3 = NLPModels.grad(nlp, x3)
# @test typeof(obj_x3) == Float64
# @test eltype(grad_x3) == Float64

# # Construct model in Float16
# train_data_f16, test_data_f16 = getdata(args, T = Float16)
# DN_f16 = build_model() |> f16
# nlp_f16 = FluxNLPModel(DN_f16, train_data_f16, test_data_f16)

# x4 = copy(nlp_f16.w)
# obj_x4 = obj(nlp_f16, x4)
# grad_x4 = NLPModels.grad(nlp_f16, x4)

# @test typeof(obj_x4) == Float16
# @test eltype(grad_x4) == Float16

# # change to Float32 from Float16
# x5 = Float32.(x4)
# obj_x5 = obj(nlp_f16, x5)
# grad_x5 = NLPModels.grad(nlp_f16, x5)
# @test typeof(obj_x5) == Float32
# @test eltype(grad_x5) == Float32

# # change to Float64 from Float16
# x6 = Float64.(x4)
# obj_x6 = obj(nlp_f16, x6)
# grad_x6 = NLPModels.grad(nlp_f16, x6)
# @test typeof(obj_x6) == Float64
# @test eltype(grad_x6) == Float64
end

0 comments on commit c244bfa

Please sign in to comment.