From 6170b48f43d488024ce9768939bf47239b5672b4 Mon Sep 17 00:00:00 2001 From: davidge807 Date: Fri, 23 Dec 2022 11:19:17 +0100 Subject: [PATCH 1/5] Cross entropy NAN parameters fixed --- opennn/cross_entropy_error.cpp | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/opennn/cross_entropy_error.cpp b/opennn/cross_entropy_error.cpp index 5314db1a3..a23c93898 100644 --- a/opennn/cross_entropy_error.cpp +++ b/opennn/cross_entropy_error.cpp @@ -87,9 +87,13 @@ void CrossEntropyError::calculate_binary_error(const DataSetBatch& batch, const TensorMap> targets(batch.targets_data, batch.targets_dimensions(0), batch.targets_dimensions(1)); + Tensor binary_cross_entropy = - ((type(1)-targets)*((type(1)-outputs).log())); + + std::replace_if(binary_cross_entropy.data(), binary_cross_entropy.data()+binary_cross_entropy.size(), [](type x){return isnan(x);}, 0); + Tensor cross_entropy_error; - cross_entropy_error.device(*thread_pool_device) = -(targets*(outputs.log())).sum() - ((type(1)-targets)*((type(1)-outputs).log())).sum(); + cross_entropy_error.device(*thread_pool_device) = -(targets*(outputs.log())).sum() + binary_cross_entropy.sum(); back_propagation.error = cross_entropy_error()/static_cast(batch_samples_number); } From a7235e08c98899b6e8bf9b65fa49e7e79fe20818 Mon Sep 17 00:00:00 2001 From: robertolopez Date: Fri, 23 Dec 2022 12:00:09 +0100 Subject: [PATCH 2/5] Cleaning training algorithms --- opennn/adaptive_moment_estimation.cpp | 10 ++-------- opennn/adaptive_moment_estimation.h | 4 ---- opennn/conjugate_gradient.cpp | 3 +-- opennn/gradient_descent.cpp | 14 +++++--------- opennn/gradient_descent.h | 10 ---------- opennn/stochastic_gradient_descent.cpp | 16 ++++++++++++++-- 6 files changed, 22 insertions(+), 35 deletions(-) diff --git a/opennn/adaptive_moment_estimation.cpp b/opennn/adaptive_moment_estimation.cpp index 4ba6a7f6b..67367a2ad 100644 --- a/opennn/adaptive_moment_estimation.cpp +++ b/opennn/adaptive_moment_estimation.cpp @@ -321,8 +321,6 @@ TrainingResults AdaptiveMomentEstimation::perform_training() LossIndexBackPropagation training_back_propagation(batch_size_training, loss_index_pointer); LossIndexBackPropagation selection_back_propagation(batch_size_selection, loss_index_pointer); - Index parameters_size = training_back_propagation.parameters.size(); - type training_error = type(0); type training_loss = type(0); @@ -380,7 +378,6 @@ TrainingResults AdaptiveMomentEstimation::perform_training() training_loss += training_back_propagation.loss; update_parameters(training_back_propagation, optimization_data); - } // Loss @@ -587,6 +584,7 @@ void AdaptiveMomentEstimation::update_parameters(LossIndexBackPropagation& back_ saxpby(¶meters_number, &a, back_propagation.gradient.data(), &incx, &b, optimization_data.gradient_exponential_decay.data(), &incy); #else + optimization_data.gradient_exponential_decay.device(*thread_pool_device) = back_propagation.gradient * (type(1) - beta_1) + optimization_data.gradient_exponential_decay * beta_1; @@ -597,11 +595,8 @@ void AdaptiveMomentEstimation::update_parameters(LossIndexBackPropagation& back_ = back_propagation.gradient * back_propagation.gradient * (type(1) - beta_2) + optimization_data.square_gradient_exponential_decay * beta_2; - optimization_data.square_gradient_exponential_decay_square_root.device(*thread_pool_device) - = optimization_data.square_gradient_exponential_decay.sqrt() + epsilon; - back_propagation.parameters.device(*thread_pool_device) - -= learning_rate * optimization_data.gradient_exponential_decay / optimization_data.square_gradient_exponential_decay_square_root; + -= learning_rate * optimization_data.gradient_exponential_decay / (optimization_data.square_gradient_exponential_decay.sqrt() + epsilon); optimization_data.iteration++; @@ -841,7 +836,6 @@ void AdaptiveMomentEstimationData::set(AdaptiveMomentEstimation* new_adaptive_mo square_gradient_exponential_decay.resize(parameters_number); square_gradient_exponential_decay.setZero(); - square_gradient_exponential_decay_square_root.resize(parameters_number); square_gradient_exponential_decay.setZero(); } diff --git a/opennn/adaptive_moment_estimation.h b/opennn/adaptive_moment_estimation.h index 7950394a6..03c61f0f5 100644 --- a/opennn/adaptive_moment_estimation.h +++ b/opennn/adaptive_moment_estimation.h @@ -192,10 +192,6 @@ struct AdaptiveMomentEstimationData : public OptimizationAlgorithmData Tensor gradient_exponential_decay; Tensor square_gradient_exponential_decay; - - Tensor square_gradient_exponential_decay_square_root; - - Index iteration = 0; Index learning_rate_iteration = 0; diff --git a/opennn/conjugate_gradient.cpp b/opennn/conjugate_gradient.cpp index c72795423..a214319e9 100644 --- a/opennn/conjugate_gradient.cpp +++ b/opennn/conjugate_gradient.cpp @@ -354,7 +354,6 @@ type ConjugateGradient::calculate_PR_parameter(const Tensor& old_gradie } return PR_parameter; - } @@ -987,7 +986,7 @@ void ConjugateGradient::update_parameters( } optimization_data.training_slope.device(*thread_pool_device) - = (back_propagation.gradient).contract(optimization_data.training_direction, AT_B); + = back_propagation.gradient.contract(optimization_data.training_direction, AT_B); if(optimization_data.training_slope(0) >= type(0)) { diff --git a/opennn/gradient_descent.cpp b/opennn/gradient_descent.cpp index 0ce17e890..bf14c7590 100644 --- a/opennn/gradient_descent.cpp +++ b/opennn/gradient_descent.cpp @@ -284,10 +284,8 @@ void GradientDescent::update_parameters( if(abs(optimization_data.learning_rate) > type(0)) { - optimization_data.parameters_increment.device(*thread_pool_device) - = optimization_data.training_direction*optimization_data.learning_rate; - - back_propagation.parameters.device(*thread_pool_device) += optimization_data.parameters_increment; + back_propagation.parameters.device(*thread_pool_device) + -= back_propagation.gradient*optimization_data.learning_rate; } else { @@ -297,23 +295,21 @@ void GradientDescent::update_parameters( { if(abs(back_propagation.gradient(i)) < type(NUMERIC_LIMITS_MIN)) { - optimization_data.parameters_increment(i) = type(0); + //optimization_data.parameters_increment(i) = type(0); } else if(back_propagation.gradient(i) > type(0)) { back_propagation.parameters(i) -= numeric_limits::epsilon(); - optimization_data.parameters_increment(i) = -numeric_limits::epsilon(); + //optimization_data.parameters_increment(i) = -numeric_limits::epsilon(); } else if(back_propagation.gradient(i) < type(0)) { back_propagation.parameters(i) += numeric_limits::epsilon(); - optimization_data.parameters_increment(i) = numeric_limits::epsilon(); + //optimization_data.parameters_increment(i) = numeric_limits::epsilon(); } } - - optimization_data.learning_rate = optimization_data.old_learning_rate; } // Update parameters diff --git a/opennn/gradient_descent.h b/opennn/gradient_descent.h index 8e7a8fe4b..689ea1657 100644 --- a/opennn/gradient_descent.h +++ b/opennn/gradient_descent.h @@ -179,8 +179,6 @@ struct GradientDescentData : public OptimizationAlgorithmData potential_parameters.resize(parameters_number); - parameters_increment.resize(parameters_number); - // Optimization algorithm data training_direction.resize(parameters_number); @@ -198,14 +196,6 @@ struct GradientDescentData : public OptimizationAlgorithmData GradientDescent* gradient_descent_pointer = nullptr; - // Neural network data - -// Tensor potential_parameters; -// Tensor training_direction; -// type initial_learning_rate = type(0); - - Tensor parameters_increment; - // Optimization algorithm data Index epoch = 0; diff --git a/opennn/stochastic_gradient_descent.cpp b/opennn/stochastic_gradient_descent.cpp index fc1871841..3cef8b5b3 100644 --- a/opennn/stochastic_gradient_descent.cpp +++ b/opennn/stochastic_gradient_descent.cpp @@ -271,8 +271,6 @@ void StochasticGradientDescent::set_maximum_time(const type& new_maximum_time) } -/// Set hardware to use. Default: Multi-core. - void StochasticGradientDescent::update_parameters(LossIndexBackPropagation& back_propagation, StochasticGradientDescentData& optimization_data) const { @@ -303,6 +301,20 @@ void StochasticGradientDescent::update_parameters(LossIndexBackPropagation& back optimization_data.last_parameters_increment = optimization_data.parameters_increment; + /// @todo check if the following is equivalent +/* + if(momentum > type(0)) + { + back_propagation.parameters.device(*thread_pool_device) += momentum*optimization_data.last_parameters_increment; + + if(nesterov) + { + back_propagation.parameters.device(*thread_pool_device) += optimization_data.parameters_increment*momentum; + } + + optimization_data.last_parameters_increment = optimization_data.parameters_increment; + } +*/ optimization_data.iteration++; // Update parameters From 007bd6229cd35c97dd9b43576a90b8d30079d247 Mon Sep 17 00:00:00 2001 From: davidge807 Date: Fri, 23 Dec 2022 12:37:19 +0100 Subject: [PATCH 3/5] gradient descent cleaning --- opennn/gradient_descent.cpp | 25 +++++++++++-------------- 1 file changed, 11 insertions(+), 14 deletions(-) diff --git a/opennn/gradient_descent.cpp b/opennn/gradient_descent.cpp index bf14c7590..6205fffc0 100644 --- a/opennn/gradient_descent.cpp +++ b/opennn/gradient_descent.cpp @@ -293,23 +293,21 @@ void GradientDescent::update_parameters( for(Index i = 0; i < parameters_number; i++) { - if(abs(back_propagation.gradient(i)) < type(NUMERIC_LIMITS_MIN)) + if(abs(back_propagation.gradient(i)) >= type(NUMERIC_LIMITS_MIN)) { - //optimization_data.parameters_increment(i) = type(0); + if(back_propagation.gradient(i) > type(0)) + { + back_propagation.parameters(i) -= numeric_limits::epsilon(); + } + else if(back_propagation.gradient(i) < type(0)) + { + back_propagation.parameters(i) += numeric_limits::epsilon(); + } } - else if(back_propagation.gradient(i) > type(0)) - { - back_propagation.parameters(i) -= numeric_limits::epsilon(); + } - //optimization_data.parameters_increment(i) = -numeric_limits::epsilon(); - } - else if(back_propagation.gradient(i) < type(0)) - { - back_propagation.parameters(i) += numeric_limits::epsilon(); + optimization_data.learning_rate = optimization_data.old_learning_rate; - //optimization_data.parameters_increment(i) = numeric_limits::epsilon(); - } - } } // Update parameters @@ -317,7 +315,6 @@ void GradientDescent::update_parameters( optimization_data.old_learning_rate = optimization_data.learning_rate; forward_propagation.neural_network_pointer->set_parameters(back_propagation.parameters); - } From e5f7faef5d3adfdf6453c2397a2fe1da1f511cad Mon Sep 17 00:00:00 2001 From: davidge807 Date: Fri, 23 Dec 2022 15:11:53 +0100 Subject: [PATCH 4/5] sgd cleaning --- opennn/stochastic_gradient_descent.cpp | 19 +------------------ opennn/stochastic_gradient_descent.h | 3 --- 2 files changed, 1 insertion(+), 21 deletions(-) diff --git a/opennn/stochastic_gradient_descent.cpp b/opennn/stochastic_gradient_descent.cpp index 3cef8b5b3..4346acc9d 100644 --- a/opennn/stochastic_gradient_descent.cpp +++ b/opennn/stochastic_gradient_descent.cpp @@ -288,10 +288,7 @@ void StochasticGradientDescent::update_parameters(LossIndexBackPropagation& back } else { - optimization_data.nesterov_increment.device(*thread_pool_device) - = optimization_data.parameters_increment*momentum - back_propagation.gradient*learning_rate; - - back_propagation.parameters.device(*thread_pool_device) += optimization_data.nesterov_increment; + back_propagation.parameters.device(*thread_pool_device) += optimization_data.parameters_increment*momentum - back_propagation.gradient*learning_rate;; } } else @@ -301,20 +298,6 @@ void StochasticGradientDescent::update_parameters(LossIndexBackPropagation& back optimization_data.last_parameters_increment = optimization_data.parameters_increment; - /// @todo check if the following is equivalent -/* - if(momentum > type(0)) - { - back_propagation.parameters.device(*thread_pool_device) += momentum*optimization_data.last_parameters_increment; - - if(nesterov) - { - back_propagation.parameters.device(*thread_pool_device) += optimization_data.parameters_increment*momentum; - } - - optimization_data.last_parameters_increment = optimization_data.parameters_increment; - } -*/ optimization_data.iteration++; // Update parameters diff --git a/opennn/stochastic_gradient_descent.h b/opennn/stochastic_gradient_descent.h index a84f24c43..328768b6e 100644 --- a/opennn/stochastic_gradient_descent.h +++ b/opennn/stochastic_gradient_descent.h @@ -183,11 +183,9 @@ struct StochasticGradientDescentData : public OptimizationAlgorithmData const Index parameters_number = neural_network_pointer->get_parameters_number(); parameters_increment.resize(parameters_number); - nesterov_increment.resize(parameters_number); last_parameters_increment.resize(parameters_number); parameters_increment.setZero(); - nesterov_increment.setZero(); last_parameters_increment.setZero(); } @@ -196,7 +194,6 @@ struct StochasticGradientDescentData : public OptimizationAlgorithmData Index iteration = 0; Tensor parameters_increment; - Tensor nesterov_increment; Tensor last_parameters_increment; }; From d88bb66d9291bf29df1a525e7d091d09e56ae65d Mon Sep 17 00:00:00 2001 From: davidge807 Date: Tue, 3 Jan 2023 09:33:33 +0100 Subject: [PATCH 5/5] Descriptives issue --- opennn/statistics.cpp | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/opennn/statistics.cpp b/opennn/statistics.cpp index 3288e2032..270cd9eac 100644 --- a/opennn/statistics.cpp +++ b/opennn/statistics.cpp @@ -315,7 +315,6 @@ Histogram::Histogram(const Tensor& data, corresponding_bin = int((value - data_minimum) / step); - if(corresponding_bin >= number_of_bins) corresponding_bin = number_of_bins - 1; @@ -1932,6 +1931,13 @@ Tensor descriptives(const Tensor& matrix, standard_deviation(i) = sqrt(variance); } } + else + { + for(Index i = 0; i < columns_indices_size; i++) + { + standard_deviation(i) = 0; + } + } for(Index i = 0; i < columns_indices_size; i++) {