diff --git a/opennn/adaptive_moment_estimation.cpp b/opennn/adaptive_moment_estimation.cpp index eb16d8427..c8dece041 100644 --- a/opennn/adaptive_moment_estimation.cpp +++ b/opennn/adaptive_moment_estimation.cpp @@ -318,8 +318,6 @@ TrainingResults AdaptiveMomentEstimation::perform_training() LossIndexBackPropagation training_back_propagation(batch_size_training, loss_index_pointer); LossIndexBackPropagation selection_back_propagation(batch_size_selection, loss_index_pointer); - Index parameters_size = training_back_propagation.parameters.size(); - type training_error = type(0); type training_loss = type(0); @@ -377,7 +375,6 @@ TrainingResults AdaptiveMomentEstimation::perform_training() training_loss += training_back_propagation.loss; update_parameters(training_back_propagation, optimization_data); - } // Loss @@ -580,6 +577,7 @@ void AdaptiveMomentEstimation::update_parameters(LossIndexBackPropagation& back_ saxpby(¶meters_number, &a, back_propagation.gradient.data(), &incx, &b, optimization_data.gradient_exponential_decay.data(), &incy); #else + optimization_data.gradient_exponential_decay.device(*thread_pool_device) = back_propagation.gradient * (type(1) - beta_1) + optimization_data.gradient_exponential_decay * beta_1; @@ -590,11 +588,8 @@ void AdaptiveMomentEstimation::update_parameters(LossIndexBackPropagation& back_ = back_propagation.gradient * back_propagation.gradient * (type(1) - beta_2) + optimization_data.square_gradient_exponential_decay * beta_2; - optimization_data.square_gradient_exponential_decay_square_root.device(*thread_pool_device) - = optimization_data.square_gradient_exponential_decay.sqrt() + epsilon; - back_propagation.parameters.device(*thread_pool_device) - -= learning_rate * optimization_data.gradient_exponential_decay / optimization_data.square_gradient_exponential_decay_square_root; + -= learning_rate * optimization_data.gradient_exponential_decay / (optimization_data.square_gradient_exponential_decay.sqrt() + epsilon); optimization_data.iteration++; @@ -834,7 +829,6 @@ void AdaptiveMomentEstimationData::set(AdaptiveMomentEstimation* new_adaptive_mo square_gradient_exponential_decay.resize(parameters_number); square_gradient_exponential_decay.setZero(); - square_gradient_exponential_decay_square_root.resize(parameters_number); square_gradient_exponential_decay.setZero(); } diff --git a/opennn/adaptive_moment_estimation.h b/opennn/adaptive_moment_estimation.h index 7950394a6..03c61f0f5 100644 --- a/opennn/adaptive_moment_estimation.h +++ b/opennn/adaptive_moment_estimation.h @@ -192,10 +192,6 @@ struct AdaptiveMomentEstimationData : public OptimizationAlgorithmData Tensor gradient_exponential_decay; Tensor square_gradient_exponential_decay; - - Tensor square_gradient_exponential_decay_square_root; - - Index iteration = 0; Index learning_rate_iteration = 0; diff --git a/opennn/conjugate_gradient.cpp b/opennn/conjugate_gradient.cpp index 34ec4cc90..dd6764296 100644 --- a/opennn/conjugate_gradient.cpp +++ b/opennn/conjugate_gradient.cpp @@ -354,7 +354,6 @@ type ConjugateGradient::calculate_PR_parameter(const Tensor& old_gradie } return PR_parameter; - } @@ -987,7 +986,7 @@ void ConjugateGradient::update_parameters( } optimization_data.training_slope.device(*thread_pool_device) - = (back_propagation.gradient).contract(optimization_data.training_direction, AT_B); + = back_propagation.gradient.contract(optimization_data.training_direction, AT_B); if(optimization_data.training_slope(0) >= type(0)) { diff --git a/opennn/cross_entropy_error.cpp b/opennn/cross_entropy_error.cpp index 887e098fd..0963018c8 100644 --- a/opennn/cross_entropy_error.cpp +++ b/opennn/cross_entropy_error.cpp @@ -87,9 +87,13 @@ void CrossEntropyError::calculate_binary_error(const DataSetBatch& batch, const TensorMap> targets(batch.targets_data, batch.targets_dimensions(0), batch.targets_dimensions(1)); + Tensor binary_cross_entropy = - ((type(1)-targets)*((type(1)-outputs).log())); + + std::replace_if(binary_cross_entropy.data(), binary_cross_entropy.data()+binary_cross_entropy.size(), [](type x){return isnan(x);}, 0); + Tensor cross_entropy_error; - cross_entropy_error.device(*thread_pool_device) = -(targets*(outputs.log())).sum() - ((type(1)-targets)*((type(1)-outputs).log())).sum(); + cross_entropy_error.device(*thread_pool_device) = -(targets*(outputs.log())).sum() + binary_cross_entropy.sum(); back_propagation.error = cross_entropy_error()/static_cast(batch_samples_number); } diff --git a/opennn/gradient_descent.cpp b/opennn/gradient_descent.cpp index b831cf6f6..cd87aa5b0 100644 --- a/opennn/gradient_descent.cpp +++ b/opennn/gradient_descent.cpp @@ -284,10 +284,8 @@ void GradientDescent::update_parameters( if(abs(optimization_data.learning_rate) > type(0)) { - optimization_data.parameters_increment.device(*thread_pool_device) - = optimization_data.training_direction*optimization_data.learning_rate; - - back_propagation.parameters.device(*thread_pool_device) += optimization_data.parameters_increment; + back_propagation.parameters.device(*thread_pool_device) + -= back_propagation.gradient*optimization_data.learning_rate; } else { @@ -295,25 +293,21 @@ void GradientDescent::update_parameters( for(Index i = 0; i < parameters_number; i++) { - if(abs(back_propagation.gradient(i)) < type(NUMERIC_LIMITS_MIN)) + if(abs(back_propagation.gradient(i)) >= type(NUMERIC_LIMITS_MIN)) { - optimization_data.parameters_increment(i) = type(0); - } - else if(back_propagation.gradient(i) > type(0)) - { - back_propagation.parameters(i) -= numeric_limits::epsilon(); - - optimization_data.parameters_increment(i) = -numeric_limits::epsilon(); - } - else if(back_propagation.gradient(i) < type(0)) - { - back_propagation.parameters(i) += numeric_limits::epsilon(); - - optimization_data.parameters_increment(i) = numeric_limits::epsilon(); + if(back_propagation.gradient(i) > type(0)) + { + back_propagation.parameters(i) -= numeric_limits::epsilon(); + } + else if(back_propagation.gradient(i) < type(0)) + { + back_propagation.parameters(i) += numeric_limits::epsilon(); + } } } optimization_data.learning_rate = optimization_data.old_learning_rate; + } // Update parameters @@ -321,7 +315,6 @@ void GradientDescent::update_parameters( optimization_data.old_learning_rate = optimization_data.learning_rate; forward_propagation.neural_network_pointer->set_parameters(back_propagation.parameters); - } diff --git a/opennn/gradient_descent.h b/opennn/gradient_descent.h index 8e7a8fe4b..689ea1657 100644 --- a/opennn/gradient_descent.h +++ b/opennn/gradient_descent.h @@ -179,8 +179,6 @@ struct GradientDescentData : public OptimizationAlgorithmData potential_parameters.resize(parameters_number); - parameters_increment.resize(parameters_number); - // Optimization algorithm data training_direction.resize(parameters_number); @@ -198,14 +196,6 @@ struct GradientDescentData : public OptimizationAlgorithmData GradientDescent* gradient_descent_pointer = nullptr; - // Neural network data - -// Tensor potential_parameters; -// Tensor training_direction; -// type initial_learning_rate = type(0); - - Tensor parameters_increment; - // Optimization algorithm data Index epoch = 0; diff --git a/opennn/statistics.cpp b/opennn/statistics.cpp index a60c1ebc0..a457fb9f2 100644 --- a/opennn/statistics.cpp +++ b/opennn/statistics.cpp @@ -315,7 +315,6 @@ Histogram::Histogram(const Tensor& data, corresponding_bin = int((value - data_minimum) / step); - if(corresponding_bin >= number_of_bins) corresponding_bin = number_of_bins - 1; @@ -1932,6 +1931,13 @@ Tensor descriptives(const Tensor& matrix, standard_deviation(i) = sqrt(variance); } } + else + { + for(Index i = 0; i < columns_indices_size; i++) + { + standard_deviation(i) = 0; + } + } for(Index i = 0; i < columns_indices_size; i++) { diff --git a/opennn/stochastic_gradient_descent.cpp b/opennn/stochastic_gradient_descent.cpp index 49240679d..0ce4083a4 100644 --- a/opennn/stochastic_gradient_descent.cpp +++ b/opennn/stochastic_gradient_descent.cpp @@ -271,8 +271,6 @@ void StochasticGradientDescent::set_maximum_time(const type& new_maximum_time) } -/// Set hardware to use. Default: Multi-core. - void StochasticGradientDescent::update_parameters(LossIndexBackPropagation& back_propagation, StochasticGradientDescentData& optimization_data) const { @@ -290,10 +288,7 @@ void StochasticGradientDescent::update_parameters(LossIndexBackPropagation& back } else { - optimization_data.nesterov_increment.device(*thread_pool_device) - = optimization_data.parameters_increment*momentum - back_propagation.gradient*learning_rate; - - back_propagation.parameters.device(*thread_pool_device) += optimization_data.nesterov_increment; + back_propagation.parameters.device(*thread_pool_device) += optimization_data.parameters_increment*momentum - back_propagation.gradient*learning_rate;; } } else diff --git a/opennn/stochastic_gradient_descent.h b/opennn/stochastic_gradient_descent.h index a84f24c43..328768b6e 100644 --- a/opennn/stochastic_gradient_descent.h +++ b/opennn/stochastic_gradient_descent.h @@ -183,11 +183,9 @@ struct StochasticGradientDescentData : public OptimizationAlgorithmData const Index parameters_number = neural_network_pointer->get_parameters_number(); parameters_increment.resize(parameters_number); - nesterov_increment.resize(parameters_number); last_parameters_increment.resize(parameters_number); parameters_increment.setZero(); - nesterov_increment.setZero(); last_parameters_increment.setZero(); } @@ -196,7 +194,6 @@ struct StochasticGradientDescentData : public OptimizationAlgorithmData Index iteration = 0; Tensor parameters_increment; - Tensor nesterov_increment; Tensor last_parameters_increment; };