From a377065f5e8afa8943d44f803f400929742a3b26 Mon Sep 17 00:00:00 2001 From: farhadrclass <31899325+farhadrclass@users.noreply.github.com> Date: Tue, 4 Jul 2023 19:22:29 -0400 Subject: [PATCH 01/20] Update tutorial.md --- docs/src/tutorial.md | 48 +++++++++++--------------------------------- 1 file changed, 12 insertions(+), 36 deletions(-) diff --git a/docs/src/tutorial.md b/docs/src/tutorial.md index ab482f6..d8302da 100644 --- a/docs/src/tutorial.md +++ b/docs/src/tutorial.md @@ -1,4 +1,5 @@ # FluxNLPModels.jl Tutorial + ## Setting up This step-by-step example assumes prior knowledge of [Julia](https://julialang.org/) and [Flux.jl](https://github.com/FluxML/Flux.jl). See the [Julia tutorial](https://julialang.org/learning/) and the [Flux.jl tutorial](https://fluxml.ai/Flux.jl/stable/models/quickstart/#man-quickstart) for more details. @@ -25,6 +26,7 @@ We will cover the following: ### Packages needed ```@example FluxNLPModel + using FluxNLPModels using Flux, NLPModels using Flux.Data: DataLoader @@ -32,6 +34,7 @@ using Flux: onehotbatch, onecold, @epochs using Flux.Losses: logitcrossentropy using MLDatasets using JSOSolvers + ``` ### Setting Neural Network (NN) Model @@ -39,23 +42,13 @@ using JSOSolvers First, a NN model needs to be define in Flux.jl. Our model is very simple: It consists of one "hidden layer" with 32 "neurons", each connected to every input pixel. Each neuron has a sigmoid nonlinearity and is connected to every "neuron" in the output layer. Finally, softmax produces probabilities, i.e., positive numbers that add up to 1. -We have two ways of defining the models: - -1. **Direct Definition**: You can directly define the model in your code, specifying the layers and their connections using Flux's syntax. This approach allows for more flexibility and customization. - ```@example FluxNLPModel - model = Flux.Chain(Dense(28^2=> 32, relu), Dense(32=>10)) - ``` - -2. **Method-Based Definition**: Alternatively, you can create a method that returns the model. This method can encapsulate the specific architecture and parameters of the model, making it easier to reuse and manage. It provides a convenient way to define and initialize the model when needed. - ```@example FluxNLPModel - function build_model(; imgsize = (28, 28, 1), nclasses = 10) - return Chain(Dense(prod(imgsize), 32, relu), Dense(32, nclasses)) - end - ``` - - +One can create a method that returns the model. This method can encapsulate the specific architecture and parameters of the model, making it easier to reuse and manage. It provides a convenient way to define and initialize the model when needed. -Both approaches have their advantages, and you can choose the one that suits your needs and coding style. +```@example FluxNLPModel +function build_model(; imgsize = (28, 28, 1), nclasses = 10) + return Chain(Dense(prod(imgsize), 32, relu), Dense(32, nclasses)) +end +``` ### Loss function @@ -65,23 +58,6 @@ We can define any loss function that we need, here we use Flux build-in logitcro const loss = Flux.logitcrossentropy ``` -We also define a loss function `loss_and_accuracy`. -```@example FluxNLPModel - function loss_and_accuracy(data_loader, model, device) - acc = 0 - ls = 0.0f0 - num = 0 - for (x, y) in data_loader - x, y = device(x), device(y) - ŷ = model(x) - ls += loss(ŷ, y, agg = sum) - acc += sum(onecold(ŷ) .== onecold(y)) ## Decode the output of the model - num += size(x)[end] - end - return ls / num, acc / num - end -``` - ### Load datasets and define minibatch In this section, we will cover the process of loading datasets and defining minibatches for training your model using Flux. Loading and preprocessing data is an essential step in machine learning, as it allows you to train your model on real-world examples. @@ -93,7 +69,7 @@ Additionally, we will define minibatches, which are subsets of the dataset that ```@example FluxNLPModel -function getdata(batchsize) +function getdata(bs) ENV["DATADEPS_ALWAYS_ACCEPT"] = "true" # Loading Dataset @@ -108,8 +84,8 @@ function getdata(batchsize) ytrain, ytest = onehotbatch(ytrain, 0:9), onehotbatch(ytest, 0:9) # Create DataLoaders (mini-batch iterators) - train_loader = DataLoader((xtrain, ytrain), batchsize = batchsize, shuffle = true) - test_loader = DataLoader((xtest, ytest), batchsize = batchsize) + train_loader = DataLoader((xtrain, ytrain), batchsize = bs, shuffle = true) + test_loader = DataLoader((xtest, ytest), batchsize = bs) return train_loader, test_loader end From 1e5a7948071a1c5943d65bf2857cdc9991ec57c1 Mon Sep 17 00:00:00 2001 From: farhadrclass <31899325+farhadrclass@users.noreply.github.com> Date: Tue, 4 Jul 2023 19:32:35 -0400 Subject: [PATCH 02/20] updated the index --- README.md | 3 ++- docs/src/index.md | 5 ++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 15fd506..4707b0b 100644 --- a/README.md +++ b/README.md @@ -13,7 +13,8 @@ This package serves as an NLPModels interface to the [Flux.jl](https://github.co To use FluxNLPModels, add the package in the Julia package manager: ```julia -pkg> add FluxNLPModels +# pkg> add FluxNLPModels +pkg> add https://github.com/JuliaSmoothOptimizers/FluxNLPModels.jl.git ``` ## How to Use diff --git a/docs/src/index.md b/docs/src/index.md index 6699d8f..75ddd7e 100644 --- a/docs/src/index.md +++ b/docs/src/index.md @@ -26,12 +26,11 @@ In addition, it provides tools to: - Measure the neural network's loss at the current `w`. ## How to use -Check the tutorials - +Check the [tutorial](https://jso.dev/FluxNLPModels.jl/dev/tutorial/). # Bug reports and discussions -If you think you found a bug, feel free to open an [issue] TODO: add repo link +If you think you found a bug, feel free to open an [issue](https://github.com/JuliaSmoothOptimizers/FluxNLPModels.jl/issues). TODO: add repo link Focused suggestions and requests can also be opened as issues. Before opening a pull request, please start an issue or a discussion on the topic. If you have a question that is not suited for a bug report, feel free to start a discussion [here](#TODO). This forum is for general discussion about this repository and the [JuliaSmoothOptimizers](https://github.com/JuliaSmoothOptimizers). Questions about any of our packages are welcome. From 323c8e4f59fef3a1f958dfd21ceaf914fe4db8f8 Mon Sep 17 00:00:00 2001 From: tmigot Date: Wed, 5 Jul 2023 09:41:43 -0400 Subject: [PATCH 03/20] Version 0.1.0 First version should 0.1.0 --- Project.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Project.toml b/Project.toml index 31065a4..83ea529 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "FluxNLPModels" uuid = "31fab0eb-bb78-4d15-8993-a8083bba6d27" authors = ["Farhad Rahbarnia "] -version = "0.0.1" +version = "0.1.0" [deps] Flux = "587475ba-b771-5e3f-ad9e-33799f191a9c" From e90ef4d21da0307c3c4da0714e3715f5c1c3c767 Mon Sep 17 00:00:00 2001 From: farhadrclass <31899325+farhadrclass@users.noreply.github.com> Date: Wed, 5 Jul 2023 11:19:48 -0400 Subject: [PATCH 04/20] fixing #11 --- src/FluxNLPModels.jl | 8 ++++++-- test/runtests.jl | 22 ++++++++++++++++++++-- 2 files changed, 26 insertions(+), 4 deletions(-) diff --git a/src/FluxNLPModels.jl b/src/FluxNLPModels.jl index 174df80..3544c6e 100644 --- a/src/FluxNLPModels.jl +++ b/src/FluxNLPModels.jl @@ -55,8 +55,8 @@ function FluxNLPModel( chain_ANN::T, data_train, data_test; - current_training_minibatch = first(data_train), - current_test_minibatch = first(data_test), + current_training_minibatch = [], + current_test_minibatch = [], size_minibatch::Int = 100, loss_f::F = Flux.mse, #Flux.crossentropy, ) where {T <: Chain, F <: Function} @@ -66,6 +66,10 @@ function FluxNLPModel( if (isempty(data_train) || isempty(data_test)) error("train data or test is empty") end + if (isempty(current_training_minibatch) || isempty(current_test_minibatch)) + current_training_minibatch = first(data_train) + current_test_minibatch = first(data_test) + end return FluxNLPModel( meta, diff --git a/test/runtests.jl b/test/runtests.jl index ddfa7fc..50a32e0 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -70,11 +70,23 @@ device = cpu #TODO should we test on GPU? println(norm(grad_x1 - grad_x1_2)) @test norm(grad_x1 - grad_x1_2) ≈ 0.0 - # @test grad_x1 ≈ grad_x1_2 - # @test all(grad_x1 .≈ grad_x1_2) @test x1 == DNNLPModel.w @test Flux.params(DNNLPModel.chain)[1][1] == x1[1] @test Flux.params(DNNLPModel.chain)[1][2] == x1[2] + + @test_throws Exception FluxNLPModel(DN, [], test_data) # if the train data is empty + @test_throws Exception FluxNLPModel(DN, train_data , []) # if the test data is empty + @test_throws Exception FluxNLPModel(DN, [] , []) # if the both data is empty + + # Testing if the value of the first batch was passed it + DNNLPModel_2 = FluxNLPModel(DN, train_data , test_data, first(train_data), first(test_data)) + + #checking if we can call accuracy + train_acc = FluxNLPModels.accuracy(DNNLPModel_2; data_loader = train_data) # accuracy on train data + test_acc = FluxNLPModels.accuracy(DNNLPModel_2) # on the test data + + @test train_acc >= 0.0 + @test train_acc <= 1.0 end @testset "minibatch tests" begin @@ -89,4 +101,10 @@ end buffer_minibatch = deepcopy(nlp.current_training_minibatch) @test minibatch_next_train!(nlp) # should return true @test !isequal(nlp.current_training_minibatch, buffer_minibatch) + + reset_minibatch_test!(nlp) + @test minibatch_next_test!(nlp) # should return true + @test minibatch_next_test!(nlp) # should return true + + end From 84b32aec36f50e847ae0f79ba7f00043e5d89477 Mon Sep 17 00:00:00 2001 From: farhadrclass <31899325+farhadrclass@users.noreply.github.com> Date: Wed, 5 Jul 2023 11:21:02 -0400 Subject: [PATCH 05/20] Update runtests.jl --- test/runtests.jl | 1 + 1 file changed, 1 insertion(+) diff --git a/test/runtests.jl b/test/runtests.jl index 50a32e0..49f7cbf 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -100,6 +100,7 @@ end @test nlp.current_training_minibatch_status === nothing buffer_minibatch = deepcopy(nlp.current_training_minibatch) @test minibatch_next_train!(nlp) # should return true + @test minibatch_next_train!(nlp) # should return true @test !isequal(nlp.current_training_minibatch, buffer_minibatch) reset_minibatch_test!(nlp) From 67983f53ec35dbe66d1ecde2252e6a80a1c17c9c Mon Sep 17 00:00:00 2001 From: farhadrclass <31899325+farhadrclass@users.noreply.github.com> Date: Wed, 5 Jul 2023 11:26:39 -0400 Subject: [PATCH 06/20] Update runtests.jl --- test/runtests.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/runtests.jl b/test/runtests.jl index 49f7cbf..7c9120a 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -79,7 +79,7 @@ device = cpu #TODO should we test on GPU? @test_throws Exception FluxNLPModel(DN, [] , []) # if the both data is empty # Testing if the value of the first batch was passed it - DNNLPModel_2 = FluxNLPModel(DN, train_data , test_data, first(train_data), first(test_data)) + DNNLPModel_2 = FluxNLPModel(DN, train_data , test_data, current_training_minibatch = first(train_data), current_test_minibatch =first(test_data)) #checking if we can call accuracy train_acc = FluxNLPModels.accuracy(DNNLPModel_2; data_loader = train_data) # accuracy on train data From cd5defec6f022c4ddd7a9c5adbfd70478751d4d5 Mon Sep 17 00:00:00 2001 From: farhadrclass <31899325+farhadrclass@users.noreply.github.com> Date: Wed, 5 Jul 2023 11:32:59 -0400 Subject: [PATCH 07/20] format the file --- test/runtests.jl | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/test/runtests.jl b/test/runtests.jl index 7c9120a..90c2ba5 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -75,11 +75,17 @@ device = cpu #TODO should we test on GPU? @test Flux.params(DNNLPModel.chain)[1][2] == x1[2] @test_throws Exception FluxNLPModel(DN, [], test_data) # if the train data is empty - @test_throws Exception FluxNLPModel(DN, train_data , []) # if the test data is empty - @test_throws Exception FluxNLPModel(DN, [] , []) # if the both data is empty + @test_throws Exception FluxNLPModel(DN, train_data, []) # if the test data is empty + @test_throws Exception FluxNLPModel(DN, [], []) # if the both data is empty # Testing if the value of the first batch was passed it - DNNLPModel_2 = FluxNLPModel(DN, train_data , test_data, current_training_minibatch = first(train_data), current_test_minibatch =first(test_data)) + DNNLPModel_2 = FluxNLPModel( + DN, + train_data, + test_data, + current_training_minibatch = first(train_data), + current_test_minibatch = first(test_data), + ) #checking if we can call accuracy train_acc = FluxNLPModels.accuracy(DNNLPModel_2; data_loader = train_data) # accuracy on train data @@ -102,10 +108,8 @@ end @test minibatch_next_train!(nlp) # should return true @test minibatch_next_train!(nlp) # should return true @test !isequal(nlp.current_training_minibatch, buffer_minibatch) - + reset_minibatch_test!(nlp) @test minibatch_next_test!(nlp) # should return true @test minibatch_next_test!(nlp) # should return true - - end From d2cf7b98da9b979c87661aad96038a806df22f2f Mon Sep 17 00:00:00 2001 From: Farhad Rahbarnia <31899325+farhadrclass@users.noreply.github.com> Date: Wed, 12 Jul 2023 12:50:15 -0400 Subject: [PATCH 08/20] Cleaning the Todos (#13) * Cleaning the Todos --------- Co-authored-by: tmigot --- README.md | 4 +--- docs/make.jl | 1 - docs/src/index.md | 16 +++++----------- src/FluxNLPModels.jl | 6 +++--- src/utils.jl | 2 +- test/runtests.jl | 6 +++--- 6 files changed, 13 insertions(+), 22 deletions(-) diff --git a/README.md b/README.md index 4707b0b..532dc0d 100644 --- a/README.md +++ b/README.md @@ -4,7 +4,6 @@ [![Dev](https://img.shields.io/badge/docs-dev-blue.svg)](https://JuliaSmoothOptimizers.github.io/FluxNLPModels.jl/dev) [![Build Status](https://github.com/JuliaSmoothOptimizers/FluxNLPModels.jl/workflows/CI/badge.svg)](https://github.com/JuliaSmoothOptimizers/FluxNLPModels.jl/actions) [![Codecov](https://codecov.io/gh/JuliaSmoothOptimizers/FluxNLPModels.jl/branch/main/graph/badge.svg)](https://codecov.io/gh/JuliaSmoothOptimizers/FluxNLPModels.jl) - This package serves as an NLPModels interface to the [Flux.jl](https://github.com/FluxML/Flux.jl) deep learning framework. It enables seamless integration between Flux's neural network architectures and NLPModels' optimization tools for natural language processing tasks. @@ -13,8 +12,7 @@ This package serves as an NLPModels interface to the [Flux.jl](https://github.co To use FluxNLPModels, add the package in the Julia package manager: ```julia -# pkg> add FluxNLPModels -pkg> add https://github.com/JuliaSmoothOptimizers/FluxNLPModels.jl.git +pkg> add FluxNLPModels ``` ## How to Use diff --git a/docs/make.jl b/docs/make.jl index ba7a8c5..5570350 100644 --- a/docs/make.jl +++ b/docs/make.jl @@ -1,4 +1,3 @@ -#TODO redo this section using Documenter, FluxNLPModels makedocs( diff --git a/docs/src/index.md b/docs/src/index.md index 75ddd7e..343aad7 100644 --- a/docs/src/index.md +++ b/docs/src/index.md @@ -1,19 +1,18 @@ -#TODO redo this section # FluxNLPModels.jl ## Compatibility Julia ≥ 1.6. ## How to install -TODO: this section needs work since our package is not yet register + This module can be installed with the following command: ```julia -# pkg> add FluxNLPModels -# pkg> test FluxNLPModels +pkg> add FluxNLPModels ``` ## Synopsis -FluxNLPModels exposes neural network models as optimization problems conforming to the NLPModels.jl API. FluxNLPModels is an interface between [Flux.jl](https://github.com/FluxML/Flux.jl)'s classification neural networks and [NLPModels.jl](https://github.com/JuliaSmoothOptimizers/NLPModels.jl.git). + +FluxNLPModels exposes neural network models as optimization problems conforming to the [NLPModels API](https://github.com/JuliaSmoothOptimizers/NLPModels.jl). FluxNLPModels is an interface between [Flux.jl](https://github.com/FluxML/Flux.jl)'s classification neural networks and [NLPModels.jl](https://github.com/JuliaSmoothOptimizers/NLPModels.jl). A `FluxNLPModel` gives the user access to: - The values of the neural network variables/weights `w`; @@ -25,12 +24,7 @@ In addition, it provides tools to: - Retrieve the current minibatch ; - Measure the neural network's loss at the current `w`. -## How to use -Check the [tutorial](https://jso.dev/FluxNLPModels.jl/dev/tutorial/). - # Bug reports and discussions -If you think you found a bug, feel free to open an [issue](https://github.com/JuliaSmoothOptimizers/FluxNLPModels.jl/issues). TODO: add repo link -Focused suggestions and requests can also be opened as issues. Before opening a pull request, please start an issue or a discussion on the topic. +If you encounter any bugs or have suggestions for improvement, please open an [issue](https://github.com/JuliaSmoothOptimizers/FluxNLPModels.jl/issues). For general questions or discussions related to this repository and the [JuliaSmoothOptimizers](https://github.com/JuliaSmoothOptimizers) organization, feel free to start a discussion [here](https://github.com/JuliaSmoothOptimizers/Organization/discussions). -If you have a question that is not suited for a bug report, feel free to start a discussion [here](#TODO). This forum is for general discussion about this repository and the [JuliaSmoothOptimizers](https://github.com/JuliaSmoothOptimizers). Questions about any of our packages are welcome. diff --git a/src/FluxNLPModels.jl b/src/FluxNLPModels.jl index 3544c6e..2787e19 100644 --- a/src/FluxNLPModels.jl +++ b/src/FluxNLPModels.jl @@ -32,9 +32,9 @@ mutable struct FluxNLPModel{T, S, C <: Chain, F <: Function} <: AbstractFluxNLPM chain::C counters::Counters loss_f::F - size_minibatch::Int #TODO remove this - training_minibatch_iterator #TODO remove this, right now we pass the data - test_minibatch_iterator #TODO remove this + size_minibatch::Int + training_minibatch_iterator + test_minibatch_iterator current_training_minibatch current_test_minibatch rebuild # this is used to create the rebuild of flat function diff --git a/src/utils.jl b/src/utils.jl index 0705240..c5f8880 100644 --- a/src/utils.jl +++ b/src/utils.jl @@ -3,7 +3,7 @@ Sets the vaiables and rebuild the chain """ -function set_vars!(nlp::AbstractFluxNLPModel{T, S}, new_w::AbstractVector{T}) where {T <: Number, S} #TODO test T +function set_vars!(nlp::AbstractFluxNLPModel{T, S}, new_w::AbstractVector{T}) where {T <: Number, S} nlp.w .= new_w nlp.chain = nlp.rebuild(nlp.w) end diff --git a/test/runtests.jl b/test/runtests.jl index 90c2ba5..bd0793b 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -1,7 +1,7 @@ using Test using FluxNLPModels using CUDA, Flux, NLPModels -using Flux.Data: DataLoader #TODO update this +using Flux.Data: DataLoader using Flux: onehotbatch, onecold, @epochs using Flux.Losses: logitcrossentropy using Base: @kwdef @@ -24,7 +24,7 @@ function getdata(args) # One-hot-encode the labels ytrain, ytest = onehotbatch(ytrain, 0:9), onehotbatch(ytest, 0:9) - # Create DataLoaders (mini-batch iterators) #TODO it is passed down + # Create DataLoaders (mini-batch iterators) train_loader = DataLoader((xtrain, ytrain), batchsize = args.batchsize, shuffle = true) test_loader = DataLoader((xtest, ytest), batchsize = args.batchsize) @@ -44,7 +44,7 @@ end args = Args() # collect options in a struct for convenience -device = cpu #TODO should we test on GPU? +device = cpu @testset "FluxNLPModels tests" begin From faa28bbd7b47c38debff1d80bb838c6fc6695830 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Wed, 12 Jul 2023 12:54:21 -0400 Subject: [PATCH 09/20] :robot: Format .jl files (#17) Co-authored-by: tmigot --- src/FluxNLPModels.jl | 4 ++-- src/utils.jl | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/FluxNLPModels.jl b/src/FluxNLPModels.jl index 2787e19..9ffe11c 100644 --- a/src/FluxNLPModels.jl +++ b/src/FluxNLPModels.jl @@ -32,8 +32,8 @@ mutable struct FluxNLPModel{T, S, C <: Chain, F <: Function} <: AbstractFluxNLPM chain::C counters::Counters loss_f::F - size_minibatch::Int - training_minibatch_iterator + size_minibatch::Int + training_minibatch_iterator test_minibatch_iterator current_training_minibatch current_test_minibatch diff --git a/src/utils.jl b/src/utils.jl index c5f8880..71ee317 100644 --- a/src/utils.jl +++ b/src/utils.jl @@ -3,7 +3,7 @@ Sets the vaiables and rebuild the chain """ -function set_vars!(nlp::AbstractFluxNLPModel{T, S}, new_w::AbstractVector{T}) where {T <: Number, S} +function set_vars!(nlp::AbstractFluxNLPModel{T, S}, new_w::AbstractVector{T}) where {T <: Number, S} nlp.w .= new_w nlp.chain = nlp.rebuild(nlp.w) end From b386159d49fe5ff5c67f1f9f42a140cdee4ab2c0 Mon Sep 17 00:00:00 2001 From: tmigot Date: Wed, 12 Jul 2023 13:43:02 -0400 Subject: [PATCH 10/20] Update docstring in NLPModel API (#19) --- src/FluxNLPModels_methods.jl | 34 ++++++++++++++++++---------------- 1 file changed, 18 insertions(+), 16 deletions(-) diff --git a/src/FluxNLPModels_methods.jl b/src/FluxNLPModels_methods.jl index 9ad89d0..9d50597 100644 --- a/src/FluxNLPModels_methods.jl +++ b/src/FluxNLPModels_methods.jl @@ -1,14 +1,15 @@ """ - f = obj(nlp, x) + f = obj(nlp, w) -Evaluate `f(x)`, the objective function of `nlp` at `x`. +Evaluate `f(w)`, the objective function of `nlp` at `w`. # Arguments -- `nlp::AbstractFluxNLPModel{T, S}`: the FluxNLPModel data struct -- `w::AbstractVector{T}`: is the vector of weights/variables; +- `nlp::AbstractFluxNLPModel{T, S}`: the FluxNLPModel data struct; +- `w::AbstractVector{T}`: is the vector of weights/variables. # Output -- `f_w`: the new objective function +- `f_w`: the new objective function. + """ function NLPModels.obj(nlp::AbstractFluxNLPModel{T, S}, w::AbstractVector{T}) where {T, S} increment!(nlp, :neval_obj) @@ -18,16 +19,18 @@ function NLPModels.obj(nlp::AbstractFluxNLPModel{T, S}, w::AbstractVector{T}) wh end """ - g = grad!(nlp, x, g) + g = grad!(nlp, w, g) + +Evaluate `∇f(w)`, the gradient of the objective function at `w` in place. -Evaluate `∇f(x)`, the gradient of the objective function at `x` in place. # Arguments -- `nlp::AbstractFluxNLPModel{T, S}`: the FluxNLPModel data struct +- `nlp::AbstractFluxNLPModel{T, S}`: the FluxNLPModel data struct; - `w::AbstractVector{T}`: is the vector of weights/variables; --`g::AbstractVector{T}`: the gradient vector +- `g::AbstractVector{T}`: the gradient vector. # Output -- `g`: the gradient at point x +- `g`: the gradient at point `w`. + """ function NLPModels.grad!( nlp::AbstractFluxNLPModel{T, S}, @@ -42,17 +45,17 @@ function NLPModels.grad!( end """ - objgrad!(nlp, x, g) + objgrad!(nlp, w, g) - Evaluate both `f(x)`, the objective function of `nlp` at `x` and `∇f(x)`, the gradient of the objective function at `x` in place. +Evaluate both `f(w)`, the objective function of `nlp` at `w`, and `∇f(w)`, the gradient of the objective function at `w` in place. # Arguments -- `nlp::AbstractFluxNLPModel{T, S}`: the FluxNLPModel data struct +- `nlp::AbstractFluxNLPModel{T, S}`: the FluxNLPModel data struct; - `w::AbstractVector{T}`: is the vector of weights/variables; --`g::AbstractVector{T}`: the gradient vector +- `g::AbstractVector{T}`: the gradient vector. # Output -- `f_w`, `g`: the new objective function, and the gradient at point x +- `f_w`, `g`: the new objective function, and the gradient at point w. """ function NLPModels.objgrad!( @@ -61,7 +64,6 @@ function NLPModels.objgrad!( g::AbstractVector{T}, ) where {T, S} @lencheck nlp.meta.nvar w g - #both updates increment!(nlp, :neval_obj) increment!(nlp, :neval_grad) set_vars!(nlp, w) From f92d0db06658e40f74c39ecc932773506bf17bff Mon Sep 17 00:00:00 2001 From: Paul Raynaud Date: Wed, 12 Jul 2023 15:00:28 -0300 Subject: [PATCH 11/20] Add Flux's neural network example in tutorial --- docs/src/tutorial.md | 28 +++++++++++++++------------- 1 file changed, 15 insertions(+), 13 deletions(-) diff --git a/docs/src/tutorial.md b/docs/src/tutorial.md index d8302da..0118aef 100644 --- a/docs/src/tutorial.md +++ b/docs/src/tutorial.md @@ -4,7 +4,6 @@ This step-by-step example assumes prior knowledge of [Julia](https://julialang.org/) and [Flux.jl](https://github.com/FluxML/Flux.jl). See the [Julia tutorial](https://julialang.org/learning/) and the [Flux.jl tutorial](https://fluxml.ai/Flux.jl/stable/models/quickstart/#man-quickstart) for more details. - We have aligned this tutorial to [MLP_MNIST](https://github.com/FluxML/model-zoo/blob/master/vision/mlp_mnist/mlp_mnist.jl) example and reused some of their functions. ### What we cover in this tutorial @@ -26,7 +25,6 @@ We will cover the following: ### Packages needed ```@example FluxNLPModel - using FluxNLPModels using Flux, NLPModels using Flux.Data: DataLoader @@ -34,7 +32,6 @@ using Flux: onehotbatch, onecold, @epochs using Flux.Losses: logitcrossentropy using MLDatasets using JSOSolvers - ``` ### Setting Neural Network (NN) Model @@ -58,7 +55,6 @@ We can define any loss function that we need, here we use Flux build-in logitcro const loss = Flux.logitcrossentropy ``` - ### Load datasets and define minibatch In this section, we will cover the process of loading datasets and defining minibatches for training your model using Flux. Loading and preprocessing data is an essential step in machine learning, as it allows you to train your model on real-world examples. @@ -66,8 +62,6 @@ We will specifically focus on loading the MNIST dataset. We will divide the data Additionally, we will define minibatches, which are subsets of the dataset that are used during the training process. Minibatches enable efficient training by processing a small batch of examples at a time, instead of the entire dataset. This technique helps in managing memory resources and improving convergence speed. - - ```@example FluxNLPModel function getdata(bs) ENV["DATADEPS_ALWAYS_ACCEPT"] = "true" @@ -91,22 +85,30 @@ function getdata(bs) end ``` - ### Transfering to FluxNLPModels +Definition of a LeNet NLPModel. +More details about defining neural networks with Flux.jl can be found [here](http://fluxml.ai/Flux.jl/stable/). ```@example FluxNLPModel device = cpu train_loader, test_loader = getdata(128) ## Construct model - model = build_model() |> device + LeNet = + Chain( + Conv((5, 5), 1 => 6, relu), + MaxPool((2, 2)), + Conv((5, 5), 6 => 16, relu), + MaxPool((2, 2)), + Flux.flatten, + Dense(256 => 120, relu), + Dense(120 => 84, relu), + Dense(84 => 10), + ) |> device # now we set the model to FluxNLPModel - nlp = FluxNLPModel(model, train_loader, test_loader; loss_f = loss) + nlp = FluxNLPModel(LeNet, train_loader, test_loader; loss_f = loss) ``` - - - ## Tools associated with a FluxNLPModel The problem dimension `n`, where `w` ∈ ℝⁿ: @@ -130,4 +132,4 @@ The length of `w` must be `nlp.meta.nvar`. ```@example FluxNLPModel g = similar(w) NLPModels.grad!(nlp, w, g) -``` \ No newline at end of file +``` From dc2daef6fc528a5342df3a1fba02a1b419bf9cd0 Mon Sep 17 00:00:00 2001 From: paraynaud Date: Wed, 12 Jul 2023 17:26:58 -0300 Subject: [PATCH 12/20] Revert "Add Flux's neural network example in tutorial" This reverts commit f92d0db06658e40f74c39ecc932773506bf17bff. --- docs/src/tutorial.md | 28 +++++++++++++--------------- 1 file changed, 13 insertions(+), 15 deletions(-) diff --git a/docs/src/tutorial.md b/docs/src/tutorial.md index 0118aef..d8302da 100644 --- a/docs/src/tutorial.md +++ b/docs/src/tutorial.md @@ -4,6 +4,7 @@ This step-by-step example assumes prior knowledge of [Julia](https://julialang.org/) and [Flux.jl](https://github.com/FluxML/Flux.jl). See the [Julia tutorial](https://julialang.org/learning/) and the [Flux.jl tutorial](https://fluxml.ai/Flux.jl/stable/models/quickstart/#man-quickstart) for more details. + We have aligned this tutorial to [MLP_MNIST](https://github.com/FluxML/model-zoo/blob/master/vision/mlp_mnist/mlp_mnist.jl) example and reused some of their functions. ### What we cover in this tutorial @@ -25,6 +26,7 @@ We will cover the following: ### Packages needed ```@example FluxNLPModel + using FluxNLPModels using Flux, NLPModels using Flux.Data: DataLoader @@ -32,6 +34,7 @@ using Flux: onehotbatch, onecold, @epochs using Flux.Losses: logitcrossentropy using MLDatasets using JSOSolvers + ``` ### Setting Neural Network (NN) Model @@ -55,6 +58,7 @@ We can define any loss function that we need, here we use Flux build-in logitcro const loss = Flux.logitcrossentropy ``` + ### Load datasets and define minibatch In this section, we will cover the process of loading datasets and defining minibatches for training your model using Flux. Loading and preprocessing data is an essential step in machine learning, as it allows you to train your model on real-world examples. @@ -62,6 +66,8 @@ We will specifically focus on loading the MNIST dataset. We will divide the data Additionally, we will define minibatches, which are subsets of the dataset that are used during the training process. Minibatches enable efficient training by processing a small batch of examples at a time, instead of the entire dataset. This technique helps in managing memory resources and improving convergence speed. + + ```@example FluxNLPModel function getdata(bs) ENV["DATADEPS_ALWAYS_ACCEPT"] = "true" @@ -85,30 +91,22 @@ function getdata(bs) end ``` + ### Transfering to FluxNLPModels -Definition of a LeNet NLPModel. -More details about defining neural networks with Flux.jl can be found [here](http://fluxml.ai/Flux.jl/stable/). ```@example FluxNLPModel device = cpu train_loader, test_loader = getdata(128) ## Construct model - LeNet = - Chain( - Conv((5, 5), 1 => 6, relu), - MaxPool((2, 2)), - Conv((5, 5), 6 => 16, relu), - MaxPool((2, 2)), - Flux.flatten, - Dense(256 => 120, relu), - Dense(120 => 84, relu), - Dense(84 => 10), - ) |> device + model = build_model() |> device # now we set the model to FluxNLPModel - nlp = FluxNLPModel(LeNet, train_loader, test_loader; loss_f = loss) + nlp = FluxNLPModel(model, train_loader, test_loader; loss_f = loss) ``` + + + ## Tools associated with a FluxNLPModel The problem dimension `n`, where `w` ∈ ℝⁿ: @@ -132,4 +130,4 @@ The length of `w` must be `nlp.meta.nvar`. ```@example FluxNLPModel g = similar(w) NLPModels.grad!(nlp, w, g) -``` +``` \ No newline at end of file From 9e63d78c6b383f9934a545c638b6cc06feca3df4 Mon Sep 17 00:00:00 2001 From: paraynaud Date: Wed, 12 Jul 2023 17:33:03 -0300 Subject: [PATCH 13/20] add JSOSolvers.R2 mention --- docs/src/tutorial.md | 21 +- tat | 562 +++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 574 insertions(+), 9 deletions(-) create mode 100644 tat diff --git a/docs/src/tutorial.md b/docs/src/tutorial.md index d8302da..a59b8be 100644 --- a/docs/src/tutorial.md +++ b/docs/src/tutorial.md @@ -26,7 +26,6 @@ We will cover the following: ### Packages needed ```@example FluxNLPModel - using FluxNLPModels using Flux, NLPModels using Flux.Data: DataLoader @@ -34,7 +33,6 @@ using Flux: onehotbatch, onecold, @epochs using Flux.Losses: logitcrossentropy using MLDatasets using JSOSolvers - ``` ### Setting Neural Network (NN) Model @@ -58,7 +56,6 @@ We can define any loss function that we need, here we use Flux build-in logitcro const loss = Flux.logitcrossentropy ``` - ### Load datasets and define minibatch In this section, we will cover the process of loading datasets and defining minibatches for training your model using Flux. Loading and preprocessing data is an essential step in machine learning, as it allows you to train your model on real-world examples. @@ -66,8 +63,6 @@ We will specifically focus on loading the MNIST dataset. We will divide the data Additionally, we will define minibatches, which are subsets of the dataset that are used during the training process. Minibatches enable efficient training by processing a small batch of examples at a time, instead of the entire dataset. This technique helps in managing memory resources and improving convergence speed. - - ```@example FluxNLPModel function getdata(bs) ENV["DATADEPS_ALWAYS_ACCEPT"] = "true" @@ -91,7 +86,6 @@ function getdata(bs) end ``` - ### Transfering to FluxNLPModels ```@example FluxNLPModel @@ -105,9 +99,6 @@ end nlp = FluxNLPModel(model, train_loader, test_loader; loss_f = loss) ``` - - - ## Tools associated with a FluxNLPModel The problem dimension `n`, where `w` ∈ ℝⁿ: ```@example FluxNLPModel @@ -130,4 +121,16 @@ The length of `w` must be `nlp.meta.nvar`. ```@example FluxNLPModel g = similar(w) NLPModels.grad!(nlp, w, g) +``` + +## Train a neural network with JSOSOlvers.R2 + +```@example FluxNLPModel +max_time = 60. # run at most 1min +callback = (nlp, + solver, + stats) -> FluxNLPModels.minibatch_next_train!(nlp) + +solver_stats = R2(nlp; callback, max_time) +test_accuracy = FluxNLPModels.accuracy(nlp) #check the accuracy ``` \ No newline at end of file diff --git a/tat b/tat new file mode 100644 index 0000000..4fdce5b --- /dev/null +++ b/tat @@ -0,0 +1,562 @@ +commit c930d63b6e53a914b7f9b1db762234cdf686262c (HEAD -> pr-fix-tutorial, origin/pr-fix-tutorial) +Author: paraynaud +Date: Wed Jul 12 17:26:58 2023 -0300 + + Revert "Add Flux's neural network example in tutorial" + + This reverts commit f92d0db06658e40f74c39ecc932773506bf17bff. + +commit f92d0db06658e40f74c39ecc932773506bf17bff (origin/main, origin/HEAD, main) +Author: Paul Raynaud +Date: Wed Jul 12 15:00:28 2023 -0300 + + Add Flux's neural network example in tutorial + +commit b386159d49fe5ff5c67f1f9f42a140cdee4ab2c0 +Author: tmigot +Date: Wed Jul 12 13:43:02 2023 -0400 + + Update docstring in NLPModel API (#19) + +commit faa28bbd7b47c38debff1d80bb838c6fc6695830 +Author: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> +Date: Wed Jul 12 12:54:21 2023 -0400 + + :robot: Format .jl files (#17) + + Co-authored-by: tmigot + +commit d2cf7b98da9b979c87661aad96038a806df22f2f +Author: Farhad Rahbarnia <31899325+farhadrclass@users.noreply.github.com> +Date: Wed Jul 12 12:50:15 2023 -0400 + + Cleaning the Todos (#13) + + * Cleaning the Todos + + --------- + + Co-authored-by: tmigot + +commit 54a840da2c11f81924a37ac9f3df5405b89a9e18 +Merge: 5ebf17d cd5defe +Author: Farhad Rahbarnia <31899325+farhadrclass@users.noreply.github.com> +Date: Thu Jul 6 14:53:24 2023 -0400 + + Merge pull request #14 from JuliaSmoothOptimizers/PR-Codecov + + Pr codecov + +commit cd5defec6f022c4ddd7a9c5adbfd70478751d4d5 +Author: farhadrclass <31899325+farhadrclass@users.noreply.github.com> +Date: Wed Jul 5 11:32:59 2023 -0400 + + format the file + +commit 67983f53ec35dbe66d1ecde2252e6a80a1c17c9c +Author: farhadrclass <31899325+farhadrclass@users.noreply.github.com> +Date: Wed Jul 5 11:26:39 2023 -0400 + + Update runtests.jl + +commit 84b32aec36f50e847ae0f79ba7f00043e5d89477 +Author: farhadrclass <31899325+farhadrclass@users.noreply.github.com> +Date: Wed Jul 5 11:21:02 2023 -0400 + + Update runtests.jl + +commit e90ef4d21da0307c3c4da0714e3715f5c1c3c767 +Author: farhadrclass <31899325+farhadrclass@users.noreply.github.com> +Date: Wed Jul 5 11:19:48 2023 -0400 + + fixing #11 + +commit 5ebf17d3c897565e32dbbe07fb8a9915f4011b50 +Merge: 1e5a794 323c8e4 +Author: Farhad Rahbarnia <31899325+farhadrclass@users.noreply.github.com> +Date: Wed Jul 5 09:44:47 2023 -0400 + + Merge pull request #12 from JuliaSmoothOptimizers/fix-version + + Version 0.1.0 + +commit 323c8e4f59fef3a1f958dfd21ceaf914fe4db8f8 +Author: tmigot +Date: Wed Jul 5 09:41:43 2023 -0400 + + Version 0.1.0 + + First version should 0.1.0 + +commit 1e5a7948071a1c5943d65bf2857cdc9991ec57c1 +Author: farhadrclass <31899325+farhadrclass@users.noreply.github.com> +Date: Tue Jul 4 19:32:35 2023 -0400 + + updated the index + +commit a377065f5e8afa8943d44f803f400929742a3b26 +Author: farhadrclass <31899325+farhadrclass@users.noreply.github.com> +Date: Tue Jul 4 19:22:29 2023 -0400 + + Update tutorial.md + +commit 4680bb1efae5ce0d06e04f0d3a5fae474dd85fc7 +Author: farhadrclass <31899325+farhadrclass@users.noreply.github.com> +Date: Tue Jul 4 16:23:27 2023 -0400 + + Update Project.toml + + Fix #10 + +commit 764bdd4092d1e4d53a42be57499cca0366493be8 +Author: farhadrclass <31899325+farhadrclass@users.noreply.github.com> +Date: Tue Jul 4 16:19:55 2023 -0400 + + Fix #10 + +commit ee9ef69f2e6762a23492f10db8974bc2e7709523 +Author: farhadrclass <31899325+farhadrclass@users.noreply.github.com> +Date: Tue Jul 4 16:17:01 2023 -0400 + + Clean up the tutorial and remove CUDA.jl as deps + +commit e180d47f552275149bf24a0fa9588fbb9e4ee098 +Author: farhadrclass <31899325+farhadrclass@users.noreply.github.com> +Date: Tue Jul 4 15:51:47 2023 -0400 + + issue JSOSolvers #8 + +commit a3e7f018746f106e8b05dfb1fa78c15159974cfc +Author: farhadrclass <31899325+farhadrclass@users.noreply.github.com> +Date: Tue Jul 4 15:30:43 2023 -0400 + + Update tutorial.md + +commit 0d66ce131d280c3f79703b379e3a054c9729306a +Author: farhadrclass <31899325+farhadrclass@users.noreply.github.com> +Date: Tue Jul 4 14:57:19 2023 -0400 + + Update tutorial.md + +commit 9f7d059ed5c3f84729cbafba8c5bda2a7da72bbb +Author: farhadrclass <31899325+farhadrclass@users.noreply.github.com> +Date: Tue Jul 4 14:28:43 2023 -0400 + + Update tutorial.md + +commit 86564ca1d2c238ab51b9f182501d6418bd7b2ae5 +Author: farhadrclass <31899325+farhadrclass@users.noreply.github.com> +Date: Tue Jul 4 14:09:13 2023 -0400 + + Update tutorial.md + +commit 916e6f9a6ff569346b7b68ba4708df2611d0bc6b +Author: farhadrclass <31899325+farhadrclass@users.noreply.github.com> +Date: Tue Jul 4 13:51:40 2023 -0400 + + Workflow + +commit 812772e0176d8ba7526c851ca561a8213020a7aa +Author: farhadrclass <31899325+farhadrclass@users.noreply.github.com> +Date: Tue Jul 4 12:54:45 2023 -0400 + + Update Project.toml + +commit 85bf8f89d8440e67c84818b0b1019dc5bf69c983 +Author: farhadrclass <31899325+farhadrclass@users.noreply.github.com> +Date: Tue Jul 4 12:53:25 2023 -0400 + + 1 + +commit 472e08f41df5aa8310bb6f157832f4ffdcafbf57 +Author: farhadrclass <31899325+farhadrclass@users.noreply.github.com> +Date: Tue Jul 4 12:49:58 2023 -0400 + + Updated the project.toml + +commit 91a3ee7c4ed8e41737de2fc52f07ed8813a492ca +Merge: b020044 e19947f +Author: Farhad Rahbarnia <31899325+farhadrclass@users.noreply.github.com> +Date: Sun Jul 2 02:58:08 2023 -0400 + + Merge pull request #7 from Farhad-phd/add-link-readme + + Add link to Flux.jl in readme + +commit e19947f02b149bbee8a6f10444568baca6220378 +Author: tmigot +Date: Sat Jul 1 08:31:59 2023 +0200 + + add link to Flux.jl in readme + +commit b020044552330685b9835d1efde641ca0a0675f2 +Author: farhadrclass <31899325+farhadrclass@users.noreply.github.com> +Date: Fri Jun 30 10:22:23 2023 -0400 + + Update README.md + +commit 8e73969f80bf79830863f61f624b0880142eed68 +Author: farhadrclass <31899325+farhadrclass@users.noreply.github.com> +Date: Fri Jun 23 17:53:55 2023 -0400 + + example + +commit 7be95d6f6a6cf365faf936618a5ad7bfaf82b94d +Author: farhadrclass <31899325+farhadrclass@users.noreply.github.com> +Date: Wed Jun 21 22:50:40 2023 -0400 + + unit test + +commit 9584688a89c176c5762f77ea81e8da94fb1a6dea +Author: farhadrclass <31899325+farhadrclass@users.noreply.github.com> +Date: Wed Jun 21 22:35:10 2023 -0400 + + added unit tests and format + +commit 2a64bed37986e3f4d76c008b3a3cf8b803fc5c08 +Author: farhadrclass <31899325+farhadrclass@users.noreply.github.com> +Date: Wed Jun 21 22:26:32 2023 -0400 + + minibatch_next + +commit e0871860e6e4df9989a774731deab73723d7b313 +Author: farhadrclass <31899325+farhadrclass@users.noreply.github.com> +Date: Wed Jun 21 21:55:44 2023 -0400 + + Removing the samples + +commit b9c5c041f47312702e2b7bfa9e90e524fb56f9ea +Author: farhadrclass <31899325+farhadrclass@users.noreply.github.com> +Date: Wed Jun 21 21:50:21 2023 -0400 + + example + +commit 8a7a142d800c4b5ea508338ef708622500378d16 +Author: farhadrclass <31899325+farhadrclass@users.noreply.github.com> +Date: Mon Jun 12 17:10:31 2023 -0400 + + Update experiments_R2_Dynamic_batch.jl + +commit 8a6f25d39e7b83f25d83cf3048b980b641f6382b +Author: farhadrclass <31899325+farhadrclass@users.noreply.github.com> +Date: Mon Jun 12 16:48:08 2023 -0400 + + added the batch size changble + +commit acb8de96fa8d656e594fff6a9235364fd8c09ee7 +Author: farhadrclass <31899325+farhadrclass@users.noreply.github.com> +Date: Thu Jun 8 13:03:09 2023 -0400 + + For review + +commit d5c8b102feb28805c8fb11d1c8e78f72955c483f +Author: farhadrclass <31899325+farhadrclass@users.noreply.github.com> +Date: Mon Jun 12 14:30:23 2023 -0400 + + Update experiments_R2_Dynamic_batch.jl + +commit c322881b0b60f9488dda1156f6974a969fb158cf +Author: farhadrclass <31899325+farhadrclass@users.noreply.github.com> +Date: Fri Jun 9 13:13:20 2023 -0400 + + Create experiments_R2_Dynamic_batch.jl + +commit e12dbca480b069b37f1ccba58f416940e60ae342 +Author: farhadrclass <31899325+farhadrclass@users.noreply.github.com> +Date: Thu Jun 8 13:36:13 2023 -0400 + + Update experiments_R2.jl + +commit ce87506d153505847820fa02ee11670e24ef78ed +Author: farhadrclass <31899325+farhadrclass@users.noreply.github.com> +Date: Wed Jun 7 14:46:45 2023 -0400 + + Updated the tutorials + +commit cadf61b013c8bb677f5567a3a3d5a87bf8bd2bec +Author: farhadrclass <31899325+farhadrclass@users.noreply.github.com> +Date: Wed Jun 7 13:14:57 2023 -0400 + + Update experiments_R2.jl + +commit 3b7a8707ac9fa49269af615fa15ebed782df583c +Author: farhadrclass <31899325+farhadrclass@users.noreply.github.com> +Date: Fri Jun 2 18:10:21 2023 -0400 + + 1 + +commit 73816d1054b697f7f58d6e8de78193cce61abcb3 +Author: farhadrclass <31899325+farhadrclass@users.noreply.github.com> +Date: Fri Jun 2 16:55:33 2023 -0400 + + Update experiments_R2.jl + +commit 0bdd74e280b33f085dc58a5d4ecb6c0c211cc9c3 +Author: farhadrclass <31899325+farhadrclass@users.noreply.github.com> +Date: Fri Jun 2 16:03:51 2023 -0400 + + 1 + +commit 306a4773fa1e4417fe2156205f8f161485fabb8d +Author: farhadrclass <31899325+farhadrclass@users.noreply.github.com> +Date: Fri Jun 2 15:33:16 2023 -0400 + + Update experiments_R2.jl + +commit cdd852514fc17b8f2cb3901a3c40e96a4f61c265 +Author: farhadrclass <31899325+farhadrclass@users.noreply.github.com> +Date: Fri Jun 2 13:40:54 2023 -0400 + + fixed the bug in Grad! + +commit 2e279436ae40017bea072b681c0dee5f475fbca0 +Author: farhadrclass <31899325+farhadrclass@users.noreply.github.com> +Date: Fri Jun 2 12:45:53 2023 -0400 + + updating the grad + +commit ac56e94cc8d6fb90f68f64176c64ba970f82151d +Author: farhadrclass <31899325+farhadrclass@users.noreply.github.com> +Date: Thu Jun 1 18:16:10 2023 -0400 + + not yet working + +commit 8ce728254a88beb22f0d8c2f8fabc1fd5bfcb344 +Author: farhadrclass <31899325+farhadrclass@users.noreply.github.com> +Date: Sun May 28 18:05:23 2023 -0400 + + 1 + +commit ece62446ff0687feb96a421e97447e44e50d9300 +Author: farhadrclass <31899325+farhadrclass@users.noreply.github.com> +Date: Thu May 25 13:54:25 2023 -0400 + + 1 + +commit d36fad2e0691d87170ca2080d560f6cc733b049a +Author: farhadrclass <31899325+farhadrclass@users.noreply.github.com> +Date: Thu May 25 11:13:39 2023 -0400 + + R2 + +commit c7f947fdcc03ceb67ef9430faa39fb49b081bbca +Author: farhadrclass <31899325+farhadrclass@users.noreply.github.com> +Date: Wed May 24 17:31:53 2023 -0400 + + ploting + +commit 433b6588cfad6b09d2577b13a3a588ee6eed9ce5 +Author: farhadrclass <31899325+farhadrclass@users.noreply.github.com> +Date: Tue May 23 11:13:50 2023 -0400 + + Update experiments.jl + +commit de1147da4dea465be3f241b56c76e8b1f3a53cd1 +Author: farhadrclass <31899325+farhadrclass@users.noreply.github.com> +Date: Tue May 23 11:13:01 2023 -0400 + + Delete events.out.tfevents.1.684440570839e9.LAPTOP-F91PBKHC + +commit 1612498a74fd8c5b270d194d6d86757e42611d45 +Author: farhadrclass <31899325+farhadrclass@users.noreply.github.com> +Date: Tue May 23 11:12:51 2023 -0400 + + Float32Sr + +commit d4260bb532a3c19a43c7b2b96599951fa20f7ad7 +Author: farhadrclass <31899325+farhadrclass@users.noreply.github.com> +Date: Mon May 22 23:21:45 2023 -0400 + + Update experiments.jl + +commit 946deb045778a6a14527b1a5503f2d7415679b96 +Author: farhadrclass <31899325+farhadrclass@users.noreply.github.com> +Date: Mon May 22 23:09:23 2023 -0400 + + experiment for the paper + +commit ef1f2c8481d96cf9fad8c1ab8f9a19001f9de46d +Author: farhadrclass <31899325+farhadrclass@users.noreply.github.com> +Date: Mon May 22 11:36:37 2023 -0400 + + R2 + +commit 18a436d80953ba498549fcac8e47e99918951a96 +Author: farhadrclass <31899325+farhadrclass@users.noreply.github.com> +Date: Mon May 22 11:09:57 2023 -0400 + + added time for file names + +commit ca9fb0e482178fd89189cef2000c7cb4ea2689d8 +Author: farhadrclass <31899325+farhadrclass@users.noreply.github.com> +Date: Mon May 22 11:00:18 2023 -0400 + + speed up the run + +commit c725d1b233e5173760109d58619180bbdd1f41fe +Author: farhadrclass <31899325+farhadrclass@users.noreply.github.com> +Date: Fri May 19 19:31:13 2023 -0400 + + Update FluxNLPModels_methods.jl + +commit 76009fee5f34fd1cd49e09be87f8447a257cf3f4 +Author: farhadrclass <31899325+farhadrclass@users.noreply.github.com> +Date: Fri May 19 11:09:09 2023 -0400 + + 1 + +commit 8d95023876a4d36f2665de6b0ff27f2ad7927a7a +Author: farhadrclass <31899325+farhadrclass@users.noreply.github.com> +Date: Thu May 18 17:15:45 2023 -0400 + + Update mlp_MNIST.jl + +commit 6928235522bb8429e3b48661548909aaea21ee5e +Author: farhadrclass <31899325+farhadrclass@users.noreply.github.com> +Date: Thu May 18 16:52:54 2023 -0400 + + Update mlp_MNIST.jl + +commit 70e2219c92dccd752669bdd59c1916136b42f292 +Author: farhadrclass <31899325+farhadrclass@users.noreply.github.com> +Date: Thu May 18 16:44:08 2023 -0400 + + fixed the bug + +commit bdce753e3a14fc906d7989db374dea16bf4269f9 +Author: farhadrclass <31899325+farhadrclass@users.noreply.github.com> +Date: Thu May 18 16:00:20 2023 -0400 + + need to add close for logger + +commit 41321b5fb4fe075163e5fbabb5bd3a33c9165139 +Author: farhadrclass <31899325+farhadrclass@users.noreply.github.com> +Date: Thu May 18 15:35:44 2023 -0400 + + 1 + +commit 7201845a0ac8d71804f59ad843e6b67ad38b9006 +Author: farhadrclass <31899325+farhadrclass@users.noreply.github.com> +Date: Thu May 18 15:25:02 2023 -0400 + + added SGD for FluxNLPmodels + +commit db6154a1e6f775166b37b7936636f0ba12bb63a9 +Author: farhadrclass <31899325+farhadrclass@users.noreply.github.com> +Date: Thu May 18 14:48:53 2023 -0400 + + example + +commit 8587c4d61331584c0197a9aae85c28001783fe6d +Author: farhadrclass <31899325+farhadrclass@users.noreply.github.com> +Date: Wed May 17 19:46:10 2023 -0400 + + Update runtests.jl + +commit 106e59cb3584fa4e915ca28aea9e18f68900e289 +Author: farhadrclass <31899325+farhadrclass@users.noreply.github.com> +Date: Wed May 17 19:38:09 2023 -0400 + + Update runtests.jl + +commit 49ba7020d4e6582fccfb2fa001311a187197da9f +Author: farhadrclass <31899325+farhadrclass@users.noreply.github.com> +Date: Wed May 17 14:01:23 2023 -0400 + + using MSE + +commit f109608013e42f7c08f7b03cb13be4c783d9cc8d +Author: farhadrclass <31899325+farhadrclass@users.noreply.github.com> +Date: Wed May 17 13:53:05 2023 -0400 + + fixed the issue with obj and grad + +commit 614061e733428c4bba2d04bb4c17313986aa80df +Author: farhadrclass <31899325+farhadrclass@users.noreply.github.com> +Date: Wed May 17 13:43:40 2023 -0400 + + Update utils.jl + +commit 110feac17a90a3a29fdcbe8cf7fd142e2a35f942 +Author: farhadrclass <31899325+farhadrclass@users.noreply.github.com> +Date: Wed May 17 13:40:17 2023 -0400 + + bug in the struct fixed + +commit 17008cd6c30a99093b904afb0cc67bc513eadbff +Author: farhadrclass <31899325+farhadrclass@users.noreply.github.com> +Date: Tue May 16 16:20:48 2023 -0400 + + update the code + +commit 59c1cca0134a564e0148780430a5261a7258b89f +Author: farhadrclass <31899325+farhadrclass@users.noreply.github.com> +Date: Tue May 16 15:32:32 2023 -0400 + + running the unit tests + +commit 5863ed6e2a0e89291d9a7ba7b43008422a301354 +Author: farhadrclass <31899325+farhadrclass@users.noreply.github.com> +Date: Tue May 16 15:16:38 2023 -0400 + + 1 + +commit ed478a45172b0a9ec54d7adb098e1c8661bcc702 +Author: farhadrclass <31899325+farhadrclass@users.noreply.github.com> +Date: Tue May 16 13:35:05 2023 -0400 + + ask Tangi + +commit 943aae7fa93dbcb717415644934687cca0138ad2 +Author: farhadrclass <31899325+farhadrclass@users.noreply.github.com> +Date: Tue May 16 12:05:32 2023 -0400 + + Util + +commit b19802dbb02c41ec3dd4fd031abbdd89119f5edf +Author: farhadrclass <31899325+farhadrclass@users.noreply.github.com> +Date: Tue May 16 11:51:55 2023 -0400 + + Flux methods are done + +commit 8cbf3b9141dc49e2892cbb313beafb97388b36f6 +Author: farhadrclass <31899325+farhadrclass@users.noreply.github.com> +Date: Tue May 16 11:27:02 2023 -0400 + + 1 + +commit 3434eea988fbf6c9d33dcd7d28ccfe5d3da3bcbc +Author: farhadrclass <31899325+farhadrclass@users.noreply.github.com> +Date: Mon May 15 09:32:40 2023 -0400 + + cleaning the code + +commit 267e8d1f6150b6ddfa8c6bb6d27f22189f0d00fa +Author: farhadrclass <31899325+farhadrclass@users.noreply.github.com> +Date: Fri May 5 11:31:07 2023 -0400 + + 1 + +commit cce5d1ccc86676a909e45fdd06471406c0a35742 +Author: farhadrclass <31899325+farhadrclass@users.noreply.github.com> +Date: Fri May 5 11:02:20 2023 -0400 + + cleaned some of the functions + +commit 0dedff36fca16f84b700af451caa29e731414237 +Author: farhadrclass <31899325+farhadrclass@users.noreply.github.com> +Date: Thu May 4 12:51:46 2023 -0400 + + deleting the doc for now + +commit 82bb4c8479defb95ca0023e2463ba37d23f5a8b0 +Author: farhadrclass <31899325+farhadrclass@users.noreply.github.com> +Date: Thu May 4 12:47:05 2023 -0400 + + first add + +commit 4fbf11d4dc2cdc085623944d2074615a2ede179f +Author: Farhad Rahbarnia <31899325+farhadrclass@users.noreply.github.com> +Date: Thu May 4 12:15:16 2023 -0400 + + Initial commit From 8b07426ae911b04bf8d1c914023bc95c9aa40523 Mon Sep 17 00:00:00 2001 From: paraynaud Date: Thu, 13 Jul 2023 09:53:35 -0300 Subject: [PATCH 14/20] apply review @tmigot --- docs/src/tutorial.md | 2 +- tat | 562 ------------------------------------------- 2 files changed, 1 insertion(+), 563 deletions(-) delete mode 100644 tat diff --git a/docs/src/tutorial.md b/docs/src/tutorial.md index a59b8be..24a23e4 100644 --- a/docs/src/tutorial.md +++ b/docs/src/tutorial.md @@ -123,7 +123,7 @@ g = similar(w) NLPModels.grad!(nlp, w, g) ``` -## Train a neural network with JSOSOlvers.R2 +## Train a neural network with JSOSolvers.R2 ```@example FluxNLPModel max_time = 60. # run at most 1min diff --git a/tat b/tat deleted file mode 100644 index 4fdce5b..0000000 --- a/tat +++ /dev/null @@ -1,562 +0,0 @@ -commit c930d63b6e53a914b7f9b1db762234cdf686262c (HEAD -> pr-fix-tutorial, origin/pr-fix-tutorial) -Author: paraynaud -Date: Wed Jul 12 17:26:58 2023 -0300 - - Revert "Add Flux's neural network example in tutorial" - - This reverts commit f92d0db06658e40f74c39ecc932773506bf17bff. - -commit f92d0db06658e40f74c39ecc932773506bf17bff (origin/main, origin/HEAD, main) -Author: Paul Raynaud -Date: Wed Jul 12 15:00:28 2023 -0300 - - Add Flux's neural network example in tutorial - -commit b386159d49fe5ff5c67f1f9f42a140cdee4ab2c0 -Author: tmigot -Date: Wed Jul 12 13:43:02 2023 -0400 - - Update docstring in NLPModel API (#19) - -commit faa28bbd7b47c38debff1d80bb838c6fc6695830 -Author: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> -Date: Wed Jul 12 12:54:21 2023 -0400 - - :robot: Format .jl files (#17) - - Co-authored-by: tmigot - -commit d2cf7b98da9b979c87661aad96038a806df22f2f -Author: Farhad Rahbarnia <31899325+farhadrclass@users.noreply.github.com> -Date: Wed Jul 12 12:50:15 2023 -0400 - - Cleaning the Todos (#13) - - * Cleaning the Todos - - --------- - - Co-authored-by: tmigot - -commit 54a840da2c11f81924a37ac9f3df5405b89a9e18 -Merge: 5ebf17d cd5defe -Author: Farhad Rahbarnia <31899325+farhadrclass@users.noreply.github.com> -Date: Thu Jul 6 14:53:24 2023 -0400 - - Merge pull request #14 from JuliaSmoothOptimizers/PR-Codecov - - Pr codecov - -commit cd5defec6f022c4ddd7a9c5adbfd70478751d4d5 -Author: farhadrclass <31899325+farhadrclass@users.noreply.github.com> -Date: Wed Jul 5 11:32:59 2023 -0400 - - format the file - -commit 67983f53ec35dbe66d1ecde2252e6a80a1c17c9c -Author: farhadrclass <31899325+farhadrclass@users.noreply.github.com> -Date: Wed Jul 5 11:26:39 2023 -0400 - - Update runtests.jl - -commit 84b32aec36f50e847ae0f79ba7f00043e5d89477 -Author: farhadrclass <31899325+farhadrclass@users.noreply.github.com> -Date: Wed Jul 5 11:21:02 2023 -0400 - - Update runtests.jl - -commit e90ef4d21da0307c3c4da0714e3715f5c1c3c767 -Author: farhadrclass <31899325+farhadrclass@users.noreply.github.com> -Date: Wed Jul 5 11:19:48 2023 -0400 - - fixing #11 - -commit 5ebf17d3c897565e32dbbe07fb8a9915f4011b50 -Merge: 1e5a794 323c8e4 -Author: Farhad Rahbarnia <31899325+farhadrclass@users.noreply.github.com> -Date: Wed Jul 5 09:44:47 2023 -0400 - - Merge pull request #12 from JuliaSmoothOptimizers/fix-version - - Version 0.1.0 - -commit 323c8e4f59fef3a1f958dfd21ceaf914fe4db8f8 -Author: tmigot -Date: Wed Jul 5 09:41:43 2023 -0400 - - Version 0.1.0 - - First version should 0.1.0 - -commit 1e5a7948071a1c5943d65bf2857cdc9991ec57c1 -Author: farhadrclass <31899325+farhadrclass@users.noreply.github.com> -Date: Tue Jul 4 19:32:35 2023 -0400 - - updated the index - -commit a377065f5e8afa8943d44f803f400929742a3b26 -Author: farhadrclass <31899325+farhadrclass@users.noreply.github.com> -Date: Tue Jul 4 19:22:29 2023 -0400 - - Update tutorial.md - -commit 4680bb1efae5ce0d06e04f0d3a5fae474dd85fc7 -Author: farhadrclass <31899325+farhadrclass@users.noreply.github.com> -Date: Tue Jul 4 16:23:27 2023 -0400 - - Update Project.toml - - Fix #10 - -commit 764bdd4092d1e4d53a42be57499cca0366493be8 -Author: farhadrclass <31899325+farhadrclass@users.noreply.github.com> -Date: Tue Jul 4 16:19:55 2023 -0400 - - Fix #10 - -commit ee9ef69f2e6762a23492f10db8974bc2e7709523 -Author: farhadrclass <31899325+farhadrclass@users.noreply.github.com> -Date: Tue Jul 4 16:17:01 2023 -0400 - - Clean up the tutorial and remove CUDA.jl as deps - -commit e180d47f552275149bf24a0fa9588fbb9e4ee098 -Author: farhadrclass <31899325+farhadrclass@users.noreply.github.com> -Date: Tue Jul 4 15:51:47 2023 -0400 - - issue JSOSolvers #8 - -commit a3e7f018746f106e8b05dfb1fa78c15159974cfc -Author: farhadrclass <31899325+farhadrclass@users.noreply.github.com> -Date: Tue Jul 4 15:30:43 2023 -0400 - - Update tutorial.md - -commit 0d66ce131d280c3f79703b379e3a054c9729306a -Author: farhadrclass <31899325+farhadrclass@users.noreply.github.com> -Date: Tue Jul 4 14:57:19 2023 -0400 - - Update tutorial.md - -commit 9f7d059ed5c3f84729cbafba8c5bda2a7da72bbb -Author: farhadrclass <31899325+farhadrclass@users.noreply.github.com> -Date: Tue Jul 4 14:28:43 2023 -0400 - - Update tutorial.md - -commit 86564ca1d2c238ab51b9f182501d6418bd7b2ae5 -Author: farhadrclass <31899325+farhadrclass@users.noreply.github.com> -Date: Tue Jul 4 14:09:13 2023 -0400 - - Update tutorial.md - -commit 916e6f9a6ff569346b7b68ba4708df2611d0bc6b -Author: farhadrclass <31899325+farhadrclass@users.noreply.github.com> -Date: Tue Jul 4 13:51:40 2023 -0400 - - Workflow - -commit 812772e0176d8ba7526c851ca561a8213020a7aa -Author: farhadrclass <31899325+farhadrclass@users.noreply.github.com> -Date: Tue Jul 4 12:54:45 2023 -0400 - - Update Project.toml - -commit 85bf8f89d8440e67c84818b0b1019dc5bf69c983 -Author: farhadrclass <31899325+farhadrclass@users.noreply.github.com> -Date: Tue Jul 4 12:53:25 2023 -0400 - - 1 - -commit 472e08f41df5aa8310bb6f157832f4ffdcafbf57 -Author: farhadrclass <31899325+farhadrclass@users.noreply.github.com> -Date: Tue Jul 4 12:49:58 2023 -0400 - - Updated the project.toml - -commit 91a3ee7c4ed8e41737de2fc52f07ed8813a492ca -Merge: b020044 e19947f -Author: Farhad Rahbarnia <31899325+farhadrclass@users.noreply.github.com> -Date: Sun Jul 2 02:58:08 2023 -0400 - - Merge pull request #7 from Farhad-phd/add-link-readme - - Add link to Flux.jl in readme - -commit e19947f02b149bbee8a6f10444568baca6220378 -Author: tmigot -Date: Sat Jul 1 08:31:59 2023 +0200 - - add link to Flux.jl in readme - -commit b020044552330685b9835d1efde641ca0a0675f2 -Author: farhadrclass <31899325+farhadrclass@users.noreply.github.com> -Date: Fri Jun 30 10:22:23 2023 -0400 - - Update README.md - -commit 8e73969f80bf79830863f61f624b0880142eed68 -Author: farhadrclass <31899325+farhadrclass@users.noreply.github.com> -Date: Fri Jun 23 17:53:55 2023 -0400 - - example - -commit 7be95d6f6a6cf365faf936618a5ad7bfaf82b94d -Author: farhadrclass <31899325+farhadrclass@users.noreply.github.com> -Date: Wed Jun 21 22:50:40 2023 -0400 - - unit test - -commit 9584688a89c176c5762f77ea81e8da94fb1a6dea -Author: farhadrclass <31899325+farhadrclass@users.noreply.github.com> -Date: Wed Jun 21 22:35:10 2023 -0400 - - added unit tests and format - -commit 2a64bed37986e3f4d76c008b3a3cf8b803fc5c08 -Author: farhadrclass <31899325+farhadrclass@users.noreply.github.com> -Date: Wed Jun 21 22:26:32 2023 -0400 - - minibatch_next - -commit e0871860e6e4df9989a774731deab73723d7b313 -Author: farhadrclass <31899325+farhadrclass@users.noreply.github.com> -Date: Wed Jun 21 21:55:44 2023 -0400 - - Removing the samples - -commit b9c5c041f47312702e2b7bfa9e90e524fb56f9ea -Author: farhadrclass <31899325+farhadrclass@users.noreply.github.com> -Date: Wed Jun 21 21:50:21 2023 -0400 - - example - -commit 8a7a142d800c4b5ea508338ef708622500378d16 -Author: farhadrclass <31899325+farhadrclass@users.noreply.github.com> -Date: Mon Jun 12 17:10:31 2023 -0400 - - Update experiments_R2_Dynamic_batch.jl - -commit 8a6f25d39e7b83f25d83cf3048b980b641f6382b -Author: farhadrclass <31899325+farhadrclass@users.noreply.github.com> -Date: Mon Jun 12 16:48:08 2023 -0400 - - added the batch size changble - -commit acb8de96fa8d656e594fff6a9235364fd8c09ee7 -Author: farhadrclass <31899325+farhadrclass@users.noreply.github.com> -Date: Thu Jun 8 13:03:09 2023 -0400 - - For review - -commit d5c8b102feb28805c8fb11d1c8e78f72955c483f -Author: farhadrclass <31899325+farhadrclass@users.noreply.github.com> -Date: Mon Jun 12 14:30:23 2023 -0400 - - Update experiments_R2_Dynamic_batch.jl - -commit c322881b0b60f9488dda1156f6974a969fb158cf -Author: farhadrclass <31899325+farhadrclass@users.noreply.github.com> -Date: Fri Jun 9 13:13:20 2023 -0400 - - Create experiments_R2_Dynamic_batch.jl - -commit e12dbca480b069b37f1ccba58f416940e60ae342 -Author: farhadrclass <31899325+farhadrclass@users.noreply.github.com> -Date: Thu Jun 8 13:36:13 2023 -0400 - - Update experiments_R2.jl - -commit ce87506d153505847820fa02ee11670e24ef78ed -Author: farhadrclass <31899325+farhadrclass@users.noreply.github.com> -Date: Wed Jun 7 14:46:45 2023 -0400 - - Updated the tutorials - -commit cadf61b013c8bb677f5567a3a3d5a87bf8bd2bec -Author: farhadrclass <31899325+farhadrclass@users.noreply.github.com> -Date: Wed Jun 7 13:14:57 2023 -0400 - - Update experiments_R2.jl - -commit 3b7a8707ac9fa49269af615fa15ebed782df583c -Author: farhadrclass <31899325+farhadrclass@users.noreply.github.com> -Date: Fri Jun 2 18:10:21 2023 -0400 - - 1 - -commit 73816d1054b697f7f58d6e8de78193cce61abcb3 -Author: farhadrclass <31899325+farhadrclass@users.noreply.github.com> -Date: Fri Jun 2 16:55:33 2023 -0400 - - Update experiments_R2.jl - -commit 0bdd74e280b33f085dc58a5d4ecb6c0c211cc9c3 -Author: farhadrclass <31899325+farhadrclass@users.noreply.github.com> -Date: Fri Jun 2 16:03:51 2023 -0400 - - 1 - -commit 306a4773fa1e4417fe2156205f8f161485fabb8d -Author: farhadrclass <31899325+farhadrclass@users.noreply.github.com> -Date: Fri Jun 2 15:33:16 2023 -0400 - - Update experiments_R2.jl - -commit cdd852514fc17b8f2cb3901a3c40e96a4f61c265 -Author: farhadrclass <31899325+farhadrclass@users.noreply.github.com> -Date: Fri Jun 2 13:40:54 2023 -0400 - - fixed the bug in Grad! - -commit 2e279436ae40017bea072b681c0dee5f475fbca0 -Author: farhadrclass <31899325+farhadrclass@users.noreply.github.com> -Date: Fri Jun 2 12:45:53 2023 -0400 - - updating the grad - -commit ac56e94cc8d6fb90f68f64176c64ba970f82151d -Author: farhadrclass <31899325+farhadrclass@users.noreply.github.com> -Date: Thu Jun 1 18:16:10 2023 -0400 - - not yet working - -commit 8ce728254a88beb22f0d8c2f8fabc1fd5bfcb344 -Author: farhadrclass <31899325+farhadrclass@users.noreply.github.com> -Date: Sun May 28 18:05:23 2023 -0400 - - 1 - -commit ece62446ff0687feb96a421e97447e44e50d9300 -Author: farhadrclass <31899325+farhadrclass@users.noreply.github.com> -Date: Thu May 25 13:54:25 2023 -0400 - - 1 - -commit d36fad2e0691d87170ca2080d560f6cc733b049a -Author: farhadrclass <31899325+farhadrclass@users.noreply.github.com> -Date: Thu May 25 11:13:39 2023 -0400 - - R2 - -commit c7f947fdcc03ceb67ef9430faa39fb49b081bbca -Author: farhadrclass <31899325+farhadrclass@users.noreply.github.com> -Date: Wed May 24 17:31:53 2023 -0400 - - ploting - -commit 433b6588cfad6b09d2577b13a3a588ee6eed9ce5 -Author: farhadrclass <31899325+farhadrclass@users.noreply.github.com> -Date: Tue May 23 11:13:50 2023 -0400 - - Update experiments.jl - -commit de1147da4dea465be3f241b56c76e8b1f3a53cd1 -Author: farhadrclass <31899325+farhadrclass@users.noreply.github.com> -Date: Tue May 23 11:13:01 2023 -0400 - - Delete events.out.tfevents.1.684440570839e9.LAPTOP-F91PBKHC - -commit 1612498a74fd8c5b270d194d6d86757e42611d45 -Author: farhadrclass <31899325+farhadrclass@users.noreply.github.com> -Date: Tue May 23 11:12:51 2023 -0400 - - Float32Sr - -commit d4260bb532a3c19a43c7b2b96599951fa20f7ad7 -Author: farhadrclass <31899325+farhadrclass@users.noreply.github.com> -Date: Mon May 22 23:21:45 2023 -0400 - - Update experiments.jl - -commit 946deb045778a6a14527b1a5503f2d7415679b96 -Author: farhadrclass <31899325+farhadrclass@users.noreply.github.com> -Date: Mon May 22 23:09:23 2023 -0400 - - experiment for the paper - -commit ef1f2c8481d96cf9fad8c1ab8f9a19001f9de46d -Author: farhadrclass <31899325+farhadrclass@users.noreply.github.com> -Date: Mon May 22 11:36:37 2023 -0400 - - R2 - -commit 18a436d80953ba498549fcac8e47e99918951a96 -Author: farhadrclass <31899325+farhadrclass@users.noreply.github.com> -Date: Mon May 22 11:09:57 2023 -0400 - - added time for file names - -commit ca9fb0e482178fd89189cef2000c7cb4ea2689d8 -Author: farhadrclass <31899325+farhadrclass@users.noreply.github.com> -Date: Mon May 22 11:00:18 2023 -0400 - - speed up the run - -commit c725d1b233e5173760109d58619180bbdd1f41fe -Author: farhadrclass <31899325+farhadrclass@users.noreply.github.com> -Date: Fri May 19 19:31:13 2023 -0400 - - Update FluxNLPModels_methods.jl - -commit 76009fee5f34fd1cd49e09be87f8447a257cf3f4 -Author: farhadrclass <31899325+farhadrclass@users.noreply.github.com> -Date: Fri May 19 11:09:09 2023 -0400 - - 1 - -commit 8d95023876a4d36f2665de6b0ff27f2ad7927a7a -Author: farhadrclass <31899325+farhadrclass@users.noreply.github.com> -Date: Thu May 18 17:15:45 2023 -0400 - - Update mlp_MNIST.jl - -commit 6928235522bb8429e3b48661548909aaea21ee5e -Author: farhadrclass <31899325+farhadrclass@users.noreply.github.com> -Date: Thu May 18 16:52:54 2023 -0400 - - Update mlp_MNIST.jl - -commit 70e2219c92dccd752669bdd59c1916136b42f292 -Author: farhadrclass <31899325+farhadrclass@users.noreply.github.com> -Date: Thu May 18 16:44:08 2023 -0400 - - fixed the bug - -commit bdce753e3a14fc906d7989db374dea16bf4269f9 -Author: farhadrclass <31899325+farhadrclass@users.noreply.github.com> -Date: Thu May 18 16:00:20 2023 -0400 - - need to add close for logger - -commit 41321b5fb4fe075163e5fbabb5bd3a33c9165139 -Author: farhadrclass <31899325+farhadrclass@users.noreply.github.com> -Date: Thu May 18 15:35:44 2023 -0400 - - 1 - -commit 7201845a0ac8d71804f59ad843e6b67ad38b9006 -Author: farhadrclass <31899325+farhadrclass@users.noreply.github.com> -Date: Thu May 18 15:25:02 2023 -0400 - - added SGD for FluxNLPmodels - -commit db6154a1e6f775166b37b7936636f0ba12bb63a9 -Author: farhadrclass <31899325+farhadrclass@users.noreply.github.com> -Date: Thu May 18 14:48:53 2023 -0400 - - example - -commit 8587c4d61331584c0197a9aae85c28001783fe6d -Author: farhadrclass <31899325+farhadrclass@users.noreply.github.com> -Date: Wed May 17 19:46:10 2023 -0400 - - Update runtests.jl - -commit 106e59cb3584fa4e915ca28aea9e18f68900e289 -Author: farhadrclass <31899325+farhadrclass@users.noreply.github.com> -Date: Wed May 17 19:38:09 2023 -0400 - - Update runtests.jl - -commit 49ba7020d4e6582fccfb2fa001311a187197da9f -Author: farhadrclass <31899325+farhadrclass@users.noreply.github.com> -Date: Wed May 17 14:01:23 2023 -0400 - - using MSE - -commit f109608013e42f7c08f7b03cb13be4c783d9cc8d -Author: farhadrclass <31899325+farhadrclass@users.noreply.github.com> -Date: Wed May 17 13:53:05 2023 -0400 - - fixed the issue with obj and grad - -commit 614061e733428c4bba2d04bb4c17313986aa80df -Author: farhadrclass <31899325+farhadrclass@users.noreply.github.com> -Date: Wed May 17 13:43:40 2023 -0400 - - Update utils.jl - -commit 110feac17a90a3a29fdcbe8cf7fd142e2a35f942 -Author: farhadrclass <31899325+farhadrclass@users.noreply.github.com> -Date: Wed May 17 13:40:17 2023 -0400 - - bug in the struct fixed - -commit 17008cd6c30a99093b904afb0cc67bc513eadbff -Author: farhadrclass <31899325+farhadrclass@users.noreply.github.com> -Date: Tue May 16 16:20:48 2023 -0400 - - update the code - -commit 59c1cca0134a564e0148780430a5261a7258b89f -Author: farhadrclass <31899325+farhadrclass@users.noreply.github.com> -Date: Tue May 16 15:32:32 2023 -0400 - - running the unit tests - -commit 5863ed6e2a0e89291d9a7ba7b43008422a301354 -Author: farhadrclass <31899325+farhadrclass@users.noreply.github.com> -Date: Tue May 16 15:16:38 2023 -0400 - - 1 - -commit ed478a45172b0a9ec54d7adb098e1c8661bcc702 -Author: farhadrclass <31899325+farhadrclass@users.noreply.github.com> -Date: Tue May 16 13:35:05 2023 -0400 - - ask Tangi - -commit 943aae7fa93dbcb717415644934687cca0138ad2 -Author: farhadrclass <31899325+farhadrclass@users.noreply.github.com> -Date: Tue May 16 12:05:32 2023 -0400 - - Util - -commit b19802dbb02c41ec3dd4fd031abbdd89119f5edf -Author: farhadrclass <31899325+farhadrclass@users.noreply.github.com> -Date: Tue May 16 11:51:55 2023 -0400 - - Flux methods are done - -commit 8cbf3b9141dc49e2892cbb313beafb97388b36f6 -Author: farhadrclass <31899325+farhadrclass@users.noreply.github.com> -Date: Tue May 16 11:27:02 2023 -0400 - - 1 - -commit 3434eea988fbf6c9d33dcd7d28ccfe5d3da3bcbc -Author: farhadrclass <31899325+farhadrclass@users.noreply.github.com> -Date: Mon May 15 09:32:40 2023 -0400 - - cleaning the code - -commit 267e8d1f6150b6ddfa8c6bb6d27f22189f0d00fa -Author: farhadrclass <31899325+farhadrclass@users.noreply.github.com> -Date: Fri May 5 11:31:07 2023 -0400 - - 1 - -commit cce5d1ccc86676a909e45fdd06471406c0a35742 -Author: farhadrclass <31899325+farhadrclass@users.noreply.github.com> -Date: Fri May 5 11:02:20 2023 -0400 - - cleaned some of the functions - -commit 0dedff36fca16f84b700af451caa29e731414237 -Author: farhadrclass <31899325+farhadrclass@users.noreply.github.com> -Date: Thu May 4 12:51:46 2023 -0400 - - deleting the doc for now - -commit 82bb4c8479defb95ca0023e2463ba37d23f5a8b0 -Author: farhadrclass <31899325+farhadrclass@users.noreply.github.com> -Date: Thu May 4 12:47:05 2023 -0400 - - first add - -commit 4fbf11d4dc2cdc085623944d2074615a2ede179f -Author: Farhad Rahbarnia <31899325+farhadrclass@users.noreply.github.com> -Date: Thu May 4 12:15:16 2023 -0400 - - Initial commit From 2942872323dfccd552ded996ae5de9cd3914c70a Mon Sep 17 00:00:00 2001 From: Farhad Rahbarnia <31899325+farhadrclass@users.noreply.github.com> Date: Wed, 26 Jul 2023 15:53:14 -0400 Subject: [PATCH 15/20] Update README.md fixing a typo in the readme --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 532dc0d..894f3e9 100644 --- a/README.md +++ b/README.md @@ -5,7 +5,7 @@ [![Build Status](https://github.com/JuliaSmoothOptimizers/FluxNLPModels.jl/workflows/CI/badge.svg)](https://github.com/JuliaSmoothOptimizers/FluxNLPModels.jl/actions) [![Codecov](https://codecov.io/gh/JuliaSmoothOptimizers/FluxNLPModels.jl/branch/main/graph/badge.svg)](https://codecov.io/gh/JuliaSmoothOptimizers/FluxNLPModels.jl) -This package serves as an NLPModels interface to the [Flux.jl](https://github.com/FluxML/Flux.jl) deep learning framework. It enables seamless integration between Flux's neural network architectures and NLPModels' optimization tools for natural language processing tasks. +This package serves as an NLPModels interface to the [Flux.jl](https://github.com/FluxML/Flux.jl) deep learning framework. It enables seamless integration between Flux's neural network architectures and NLPModels' optimization tools for non-linear programming (NLP) problems. ## Installation From 51655948e53c9c044d0c7fe1d20a7762a0e72e3b Mon Sep 17 00:00:00 2001 From: d-monnet Date: Tue, 24 Oct 2023 15:07:33 -0400 Subject: [PATCH 16/20] modify functions to handle multiple precision support --- src/FluxNLPModels.jl | 28 ++++++++++--- src/FluxNLPModels_methods.jl | 30 +++++++++----- src/utils.jl | 66 +++++++++++++++++++++++++++--- test/multi_prec_test.jl | 78 ++++++++++++++++++++++++++++++++++++ test/runtests.jl | 4 ++ 5 files changed, 185 insertions(+), 21 deletions(-) create mode 100644 test/multi_prec_test.jl diff --git a/src/FluxNLPModels.jl b/src/FluxNLPModels.jl index 9ffe11c..f1085e9 100644 --- a/src/FluxNLPModels.jl +++ b/src/FluxNLPModels.jl @@ -29,7 +29,7 @@ A FluxNLPModel has fields """ mutable struct FluxNLPModel{T, S, C <: Chain, F <: Function} <: AbstractFluxNLPModel{T, S} meta::NLPModelMeta{T, S} - chain::C + chain::Vector{C} counters::Counters loss_f::F size_minibatch::Int @@ -37,22 +37,33 @@ mutable struct FluxNLPModel{T, S, C <: Chain, F <: Function} <: AbstractFluxNLPM test_minibatch_iterator current_training_minibatch current_test_minibatch - rebuild # this is used to create the rebuild of flat function + rebuild # this is used to create the rebuild of flat function current_training_minibatch_status current_test_minibatch_status w::S + Types::Vector{DataType} end """ - FluxNLPModel(chain_ANN data_train=MLDatasets.MNIST.traindata(Float32), data_test=MLDatasets.MNIST.testdata(Float32); size_minibatch=100) + FluxNLPModel(chain_ANN, data_train=MLDatasets.MNIST.traindata(Float32), data_test=MLDatasets.MNIST.testdata(Float32); size_minibatch=100) Build a `FluxNLPModel` from the neural network represented by `chain_ANN`. `chain_ANN` is built using [Flux.jl](https://fluxml.ai/) for more details. The other data required are: an iterator over the training dataset `data_train`, an iterator over the test dataset `data_test` and the size of the minibatch `size_minibatch`. Suppose `(xtrn,ytrn) = Fluxnlp.data_train` """ + function FluxNLPModel( - chain_ANN::T, + chain_ANN::C, + data_train, + data_test; + kwargs... +) where {C <: Chain} + FluxNLPModel([chain_ANN],data_train,data_test;kwargs...) +end + +function FluxNLPModel( + chain_ANN::Vector{T}, data_train, data_test; current_training_minibatch = [], @@ -60,7 +71,10 @@ function FluxNLPModel( size_minibatch::Int = 100, loss_f::F = Flux.mse, #Flux.crossentropy, ) where {T <: Chain, F <: Function} - x0, rebuild = Flux.destructure(chain_ANN) + d = Flux.destructure.(chain_ANN) + rebuild = [del[2] for del in d] + x0 = d[end][1] + Types = eltype.([del[1] for del in d]) n = length(x0) meta = NLPModelMeta(n, x0 = x0) if (isempty(data_train) || isempty(data_test)) @@ -70,7 +84,8 @@ function FluxNLPModel( current_training_minibatch = first(data_train) current_test_minibatch = first(data_test) end - + test_types_consistency(Types,data_train,data_test) + test_devices_consistency(chain_ANN,data_train,data_test) return FluxNLPModel( meta, chain_ANN, @@ -85,6 +100,7 @@ function FluxNLPModel( nothing, nothing, x0, + Types, ) end diff --git a/src/FluxNLPModels_methods.jl b/src/FluxNLPModels_methods.jl index 9d50597..5180ce6 100644 --- a/src/FluxNLPModels_methods.jl +++ b/src/FluxNLPModels_methods.jl @@ -11,11 +11,12 @@ Evaluate `f(w)`, the objective function of `nlp` at `w`. - `f_w`: the new objective function. """ -function NLPModels.obj(nlp::AbstractFluxNLPModel{T, S}, w::AbstractVector{T}) where {T, S} +function NLPModels.obj(nlp::AbstractFluxNLPModel{T, S}, w::AbstractVector) where {T, S} increment!(nlp, :neval_obj) set_vars!(nlp, w) x, y = nlp.current_training_minibatch - return nlp.loss_f(nlp.chain(x), y) + type_ind = find_type_index(nlp,w) + return nlp.loss_f(nlp.chain[type_ind](x), y) end """ @@ -34,16 +35,28 @@ Evaluate `∇f(w)`, the gradient of the objective function at `w` in place. """ function NLPModels.grad!( nlp::AbstractFluxNLPModel{T, S}, - w::AbstractVector{T}, - g::AbstractVector{T}, + w::AbstractVector, + g::AbstractVector, ) where {T, S} @lencheck nlp.meta.nvar w g - increment!(nlp, :neval_grad) x, y = nlp.current_training_minibatch - g .= gradient(w_g -> local_loss(nlp, x, y, w_g), w)[1] + #check_weights_data_type(w,x) + increment!(nlp, :neval_grad) + type_ind = find_type_index(nlp,w) + nlp.chain[type_ind] = nlp.rebuild[type_ind](w) + + g .= gradient(w_g -> local_loss(nlp,nlp.rebuild[type_ind],x,y,w_g),w)[1] return g end +function NLPModels.grad( + nlp::AbstractFluxNLPModel{T, S}, + w::AbstractVector, +) where {T, S} + g = similar(w) + grad!(nlp,w,g) +end + """ objgrad!(nlp, w, g) @@ -68,9 +81,8 @@ function NLPModels.objgrad!( increment!(nlp, :neval_grad) set_vars!(nlp, w) - x, y = nlp.current_training_minibatch - f_w = nlp.loss_f(nlp.chain(x), y) - g .= gradient(w_g -> local_loss(nlp, x, y, w_g), w)[1] + f_w = obj(nlp,w) + grad!(nlp,w,g) return f_w, g end diff --git a/src/utils.jl b/src/utils.jl index 71ee317..5a94e07 100644 --- a/src/utils.jl +++ b/src/utils.jl @@ -1,17 +1,25 @@ +using CUDA """ set_vars!(model::AbstractFluxNLPModel{T,S}, new_w::AbstractVector{T}) where {T<:Number, S} Sets the vaiables and rebuild the chain """ -function set_vars!(nlp::AbstractFluxNLPModel{T, S}, new_w::AbstractVector{T}) where {T <: Number, S} +function set_vars!(nlp::AbstractFluxNLPModel{T, S}, new_w::AbstractVector) where {T <: Number, S} nlp.w .= new_w - nlp.chain = nlp.rebuild(nlp.w) + type_ind = findfirst(x->x == eltype(new_w),nlp.Types) + nlp.chain[type_ind] = nlp.rebuild[type_ind](nlp.w) end function local_loss(nlp::AbstractFluxNLPModel{T, S}, x, y, w::AbstractVector{T}) where {T, S} # increment!(nlp, :neval_obj) #TODO not sure - nlp.chain = nlp.rebuild(w) - return nlp.loss_f(nlp.chain(x), y) + type_ind = findfirst(x->x == eltype(w),nlp.Types) + nlp.chain[type_ind] = nlp.rebuild[type_ind](w) + return nlp.loss_f(nlp.chain[type_ind](x), y) +end + +function local_loss(nlp::AbstractFluxNLPModel{T, S}, rebuild, x, y, w::AbstractVector) where {T, S} + model = rebuild(w) + return nlp.loss_f(model(x), y) end """ @@ -22,10 +30,10 @@ device is set to cpu """ function accuracy( nlp::AbstractFluxNLPModel{T, S}; - model = nlp.chain, + model = nlp.chain[findfirst(x->x==nlp.Types[1],nlp.Types)], data_loader = nlp.test_minibatch_iterator, device = cpu, - myT = Float32, + myT = nlp.Types[1], ) where {T, S} acc = myT(0) num = myT(0) @@ -109,3 +117,49 @@ function minibatch_next_test!(nlp::AbstractFluxNLPModel; device = cpu) nlp.current_test_minibatch = device(item) return true end + +""" +find_type_index(nlp::AbstractFluxNLPModel,w::AbstractVector) + +find index of nlp.Types corresponding to element types of w. Returns error if not found. +""" +function find_type_index(nlp::AbstractFluxNLPModel,w::AbstractVector) + type_ind = findfirst(x->x == eltype(w),nlp.Types) + type_ind === nothing && error("$(eltype(w)) is not a format supported for weights, supported formats are $(nlp.Types)") + return type_ind +end + +""" + test_types_consistency(Types,data_train,data_test) + + Tests FluxNLPModel input FP formats consistency. +""" +function test_types_consistency(Types::Vector{DataType},data_train,data_test) + issorted(Types,by = x-> -eps(x)) || error("models should be provided by increasing precision FP formats") + train_type = eltype(first(data_train)[1]) + test_type = eltype(first(data_test)[1]) + Types[1] == train_type || @warn "train data FP format ($train_type) doesn't match lowest precision FP format of model weight ($(Types[1]))" + Types[1] == test_type || @warn "test data FP format ($test_type) doesn't match lowest precision FP format of model weight ($(Types[1]))" +end + +""" + test_types_consistency(chain_ANN,data_train,data_test) + + Tests FluxNLPModel loader and NN device consistency. +""" +function test_devices_consistency(chain_ANN::Vector{T},data_train,data_test) where {T <: Chain} + is_chain_gpu = [typeof(c) <: CuArray for c in chain_ANN] + if !in(sum(is_chain_gpu),[0,length(chain_ANN)]) + @error "Chain models should all be on the same device." + end + is_all_chain_gpu = is_chain_gpu[1] + is_train_gpu = typeof(first(data_train)[1]) <: CuArray + is_test_gpu = typeof(first(data_test)[1]) <: CuArray + @show is_all_chain_gpu is_train_gpu is_test_gpu + if is_all_chain_gpu != is_train_gpu + @error "train loader and models are not on the same device." + end + if is_all_chain_gpu != is_test_gpu + @error "test loader and models are not on the same device." + end +end diff --git a/test/multi_prec_test.jl b/test/multi_prec_test.jl new file mode 100644 index 0000000..cab07c8 --- /dev/null +++ b/test/multi_prec_test.jl @@ -0,0 +1,78 @@ +# test example taken from Flux quickstart guide ([https://fluxml.ai/Flux.jl/stable/models/quickstart/](https://fluxml.ai/Flux.jl/stable/models/quickstart/)) + +noisy = rand(Float32, 2, 1000) +truth = [xor(col[1]>0.5, col[2]>0.5) for col in eachcol(noisy)] +target = Flux.onehotbatch(truth, [true, false]) + +model16_cpu = Chain( + Dense(2 => 3, tanh), # activation function inside layer + BatchNorm(3), + Dense(3 => 2), + softmax) |> f16 +model32_cpu = Chain( + Dense(2 => 3, tanh), # activation function inside layer + BatchNorm(3), + Dense(3 => 2), + softmax) |> f32 +model32_gpu = Chain( + Dense(2 => 3, tanh), # activation function inside layer + BatchNorm(3), + Dense(3 => 2), + softmax) |> gpu +loader_cpu = Flux.DataLoader((noisy, target) |> f16, batchsize=64); +loader_gpu = Flux.DataLoader((noisy, target) |> gpu, batchsize=64); + +@testset "FluxNLPModel ill-instanciation checks" begin + try + FluxNLPModel([model16_cpu,model32_gpu],loader_cpu,loader_cpu) + @test false + catch + @test true + end + try + FluxNLPModel([model16_cpu,model32_cpu],loader_gpu,loader_cpu) + @test false + catch + @test true + end + try + FluxNLPModel([model16_cpu,model32_cpu],loader_cpu,loader_gpu) + @test false + catch + @test true + end + try + FluxNLPModel([model32_cpu,model16_cpu],loader_cpu,loader_cpu) # wrong model order + @test false + catch + @test true + end +end + +@testset "obj/grad FP formats consistency" begin + nlp = FluxNLPModel([model16_cpu,model32_cpu],loader_cpu,loader_cpu) + x16,_ = Flux.destructure(model16_cpu) + @test typeof(obj(nlp,x16)) == eltype(x16) + @test eltype(grad(nlp,x16)) == eltype(x16) + g16 = similar(x16) + o16, g16 = objgrad!(nlp,x16,g16) + @test typeof(o16) == eltype(x16) + @test eltype(g16) == eltype(x16) + x32 = Float32.(x16) + @test typeof(obj(nlp,x32)) == eltype(x32) + @test eltype(grad(nlp,x32)) == eltype(x32) + g32 = similar(x32) + o32, g32 = objgrad!(nlp,x32,g32) + @test typeof(o32) == eltype(x32) + @test eltype(g32) == eltype(x32) + + # gpu + nlp = FluxNLPModel([model32_gpu],loader_gpu,loader_gpu) + x32,_ = Flux.destructure(model32_gpu) + @test typeof(obj(nlp,x32)) == eltype(x32) + @test eltype(grad(nlp,x32)) == eltype(x32) + g32 = similar(x32) + o32, g32 = objgrad!(nlp,x32,g32) + @test typeof(o32) == eltype(x32) + @test eltype(g32) == eltype(x32) +end \ No newline at end of file diff --git a/test/runtests.jl b/test/runtests.jl index bd0793b..78b8d21 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -113,3 +113,7 @@ end @test minibatch_next_test!(nlp) # should return true @test minibatch_next_test!(nlp) # should return true end + +@testset "Multi-precision support" begin + include("multi_prec_test.jl") +end \ No newline at end of file From 354bb2eaeed5e22173b6f50f6baf03afe2517b74 Mon Sep 17 00:00:00 2001 From: d-monnet Date: Tue, 24 Oct 2023 15:33:35 -0400 Subject: [PATCH 17/20] fix device check function --- src/utils.jl | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/utils.jl b/src/utils.jl index 5a94e07..f049a1f 100644 --- a/src/utils.jl +++ b/src/utils.jl @@ -148,14 +148,16 @@ end Tests FluxNLPModel loader and NN device consistency. """ function test_devices_consistency(chain_ANN::Vector{T},data_train,data_test) where {T <: Chain} - is_chain_gpu = [typeof(c) <: CuArray for c in chain_ANN] + d = Flux.destructure.(chain_ANN) + weights = [del[1] for del in d] + is_chain_gpu = [typeof(w) <: CuArray for w in weights] if !in(sum(is_chain_gpu),[0,length(chain_ANN)]) @error "Chain models should all be on the same device." end is_all_chain_gpu = is_chain_gpu[1] is_train_gpu = typeof(first(data_train)[1]) <: CuArray is_test_gpu = typeof(first(data_test)[1]) <: CuArray - @show is_all_chain_gpu is_train_gpu is_test_gpu + @show is_chain_gpu is_all_chain_gpu is_train_gpu is_test_gpu if is_all_chain_gpu != is_train_gpu @error "train loader and models are not on the same device." end From d84273786e4423bebc371d67518edce4cad3527c Mon Sep 17 00:00:00 2001 From: d-monnet Date: Tue, 31 Oct 2023 11:54:45 -0400 Subject: [PATCH 18/20] modify interface --- src/FluxNLPModels.jl | 18 +++++------------- test/multi_prec_test.jl | 25 +++++-------------------- 2 files changed, 10 insertions(+), 33 deletions(-) diff --git a/src/FluxNLPModels.jl b/src/FluxNLPModels.jl index f1085e9..8c39169 100644 --- a/src/FluxNLPModels.jl +++ b/src/FluxNLPModels.jl @@ -9,7 +9,7 @@ export accuracy, set_vars!, local_loss abstract type AbstractFluxNLPModel{T, S} <: AbstractNLPModel{T, S} end -""" +""" FluxNLPModel{T, S, C <: Flux.Chain} <: AbstractNLPModel{T, S} Data structure that makes the interfaces between neural networks defined with [Flux.jl](https://fluxml.ai/) and [NLPModels](https://github.com/JuliaSmoothOptimizers/NLPModels.jl). @@ -52,26 +52,18 @@ Build a `FluxNLPModel` from the neural network represented by `chain_ANN`. The other data required are: an iterator over the training dataset `data_train`, an iterator over the test dataset `data_test` and the size of the minibatch `size_minibatch`. Suppose `(xtrn,ytrn) = Fluxnlp.data_train` """ - -function FluxNLPModel( - chain_ANN::C, - data_train, - data_test; - kwargs... -) where {C <: Chain} - FluxNLPModel([chain_ANN],data_train,data_test;kwargs...) -end - function FluxNLPModel( - chain_ANN::Vector{T}, + chain_ANN::T, data_train, data_test; + Formats = [f16,f32,f64], current_training_minibatch = [], current_test_minibatch = [], size_minibatch::Int = 100, loss_f::F = Flux.mse, #Flux.crossentropy, ) where {T <: Chain, F <: Function} - d = Flux.destructure.(chain_ANN) + chain = [f(chain_ANN) for f in Formats] + d = Flux.destructure.(chain) rebuild = [del[2] for del in d] x0 = d[end][1] Types = eltype.([del[1] for del in d]) diff --git a/test/multi_prec_test.jl b/test/multi_prec_test.jl index cab07c8..3f1481d 100644 --- a/test/multi_prec_test.jl +++ b/test/multi_prec_test.jl @@ -9,40 +9,25 @@ model16_cpu = Chain( BatchNorm(3), Dense(3 => 2), softmax) |> f16 -model32_cpu = Chain( - Dense(2 => 3, tanh), # activation function inside layer - BatchNorm(3), - Dense(3 => 2), - softmax) |> f32 -model32_gpu = Chain( - Dense(2 => 3, tanh), # activation function inside layer - BatchNorm(3), - Dense(3 => 2), - softmax) |> gpu + loader_cpu = Flux.DataLoader((noisy, target) |> f16, batchsize=64); loader_gpu = Flux.DataLoader((noisy, target) |> gpu, batchsize=64); @testset "FluxNLPModel ill-instanciation checks" begin try - FluxNLPModel([model16_cpu,model32_gpu],loader_cpu,loader_cpu) - @test false - catch - @test true - end - try - FluxNLPModel([model16_cpu,model32_cpu],loader_gpu,loader_cpu) + FluxNLPModel(model16_cpu,loader_cpu,loader_cpu;Formats=[f16,gpu]) @test false catch @test true end try - FluxNLPModel([model16_cpu,model32_cpu],loader_cpu,loader_gpu) + FluxNLPModel(model16_cpu,loader_gpu,loader_cpu) @test false catch @test true end try - FluxNLPModel([model32_cpu,model16_cpu],loader_cpu,loader_cpu) # wrong model order + FluxNLPModel(model16_cpu,loader_cpu,loader_gpu) @test false catch @test true @@ -50,7 +35,7 @@ loader_gpu = Flux.DataLoader((noisy, target) |> gpu, batchsize=64); end @testset "obj/grad FP formats consistency" begin - nlp = FluxNLPModel([model16_cpu,model32_cpu],loader_cpu,loader_cpu) + nlp = FluxNLPModel(model16_cpu,loader_cpu,loader_cpu) x16,_ = Flux.destructure(model16_cpu) @test typeof(obj(nlp,x16)) == eltype(x16) @test eltype(grad(nlp,x16)) == eltype(x16) From ba522115570741934a568c55b620b07bf164569c Mon Sep 17 00:00:00 2001 From: d-monnet Date: Tue, 31 Oct 2023 16:03:30 -0400 Subject: [PATCH 19/20] modify multi-precision interface --- src/FluxNLPModels.jl | 11 +++++++---- src/utils.jl | 7 +++---- test/multi_prec_test.jl | 9 +++++++-- 3 files changed, 17 insertions(+), 10 deletions(-) diff --git a/src/FluxNLPModels.jl b/src/FluxNLPModels.jl index 8c39169..f966c19 100644 --- a/src/FluxNLPModels.jl +++ b/src/FluxNLPModels.jl @@ -56,13 +56,16 @@ function FluxNLPModel( chain_ANN::T, data_train, data_test; - Formats = [f16,f32,f64], + Formats = [], current_training_minibatch = [], current_test_minibatch = [], size_minibatch::Int = 100, loss_f::F = Flux.mse, #Flux.crossentropy, ) where {T <: Chain, F <: Function} - chain = [f(chain_ANN) for f in Formats] + chain = [chain_ANN] + if !isempty(Formats) + chain = [f(chain_ANN) for f in Formats] + end d = Flux.destructure.(chain) rebuild = [del[2] for del in d] x0 = d[end][1] @@ -77,10 +80,10 @@ function FluxNLPModel( current_test_minibatch = first(data_test) end test_types_consistency(Types,data_train,data_test) - test_devices_consistency(chain_ANN,data_train,data_test) + test_devices_consistency(chain,data_train,data_test) return FluxNLPModel( meta, - chain_ANN, + chain, Counters(), loss_f, size_minibatch, diff --git a/src/utils.jl b/src/utils.jl index f049a1f..83f3708 100644 --- a/src/utils.jl +++ b/src/utils.jl @@ -152,16 +152,15 @@ function test_devices_consistency(chain_ANN::Vector{T},data_train,data_test) whe weights = [del[1] for del in d] is_chain_gpu = [typeof(w) <: CuArray for w in weights] if !in(sum(is_chain_gpu),[0,length(chain_ANN)]) - @error "Chain models should all be on the same device." + error("Chain models should all be on the same device.") end is_all_chain_gpu = is_chain_gpu[1] is_train_gpu = typeof(first(data_train)[1]) <: CuArray is_test_gpu = typeof(first(data_test)[1]) <: CuArray - @show is_chain_gpu is_all_chain_gpu is_train_gpu is_test_gpu if is_all_chain_gpu != is_train_gpu - @error "train loader and models are not on the same device." + error("train loader and models are not on the same device.") end if is_all_chain_gpu != is_test_gpu - @error "test loader and models are not on the same device." + error("test loader and models are not on the same device.") end end diff --git a/test/multi_prec_test.jl b/test/multi_prec_test.jl index 3f1481d..e0e3947 100644 --- a/test/multi_prec_test.jl +++ b/test/multi_prec_test.jl @@ -9,6 +9,11 @@ model16_cpu = Chain( BatchNorm(3), Dense(3 => 2), softmax) |> f16 +model32_gpu = Chain( + Dense(2 => 3, tanh), # activation function inside layer + BatchNorm(3), + Dense(3 => 2), + softmax) |> gpu loader_cpu = Flux.DataLoader((noisy, target) |> f16, batchsize=64); loader_gpu = Flux.DataLoader((noisy, target) |> gpu, batchsize=64); @@ -35,7 +40,7 @@ loader_gpu = Flux.DataLoader((noisy, target) |> gpu, batchsize=64); end @testset "obj/grad FP formats consistency" begin - nlp = FluxNLPModel(model16_cpu,loader_cpu,loader_cpu) + nlp = FluxNLPModel(model16_cpu,loader_cpu,loader_cpu,Formats = [f16,f32]) x16,_ = Flux.destructure(model16_cpu) @test typeof(obj(nlp,x16)) == eltype(x16) @test eltype(grad(nlp,x16)) == eltype(x16) @@ -52,7 +57,7 @@ end @test eltype(g32) == eltype(x32) # gpu - nlp = FluxNLPModel([model32_gpu],loader_gpu,loader_gpu) + nlp = FluxNLPModel(model32_gpu,loader_gpu,loader_gpu) x32,_ = Flux.destructure(model32_gpu) @test typeof(obj(nlp,x32)) == eltype(x32) @test eltype(grad(nlp,x32)) == eltype(x32) From e319445b4da7e022a2d2580a665c1a4d5d5c6a52 Mon Sep 17 00:00:00 2001 From: d-monnet Date: Tue, 31 Oct 2023 16:25:31 -0400 Subject: [PATCH 20/20] add CUDA dep --- Project.toml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Project.toml b/Project.toml index 83ea529..95cb994 100644 --- a/Project.toml +++ b/Project.toml @@ -4,6 +4,7 @@ authors = ["Farhad Rahbarnia "] version = "0.1.0" [deps] +CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba" Flux = "587475ba-b771-5e3f-ad9e-33799f191a9c" MLDatasets = "eb30cadb-4394-5ae3-aed4-317e484a6458" NLPModels = "a4795742-8479-5a88-8948-cc11e1c8c1a6" @@ -22,4 +23,4 @@ LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" [targets] -test = ["LinearAlgebra","CUDA" ,"Test"] +test = ["LinearAlgebra", "CUDA", "Test"]