From 2f012fa01af7265031da390387aa9688dfd660c2 Mon Sep 17 00:00:00 2001 From: nychiang Date: Tue, 12 Nov 2024 15:20:55 -0800 Subject: [PATCH 1/3] test 1 --- src/Drivers/Sparse/NlpSparseEx1.cpp | 301 +++--- src/Drivers/Sparse/NlpSparseEx1.hpp | 109 +- src/Optimization/HessianDiagPlusRowRank.cpp | 1021 ++++++++++--------- src/Optimization/HessianDiagPlusRowRank.hpp | 389 ++++--- 4 files changed, 938 insertions(+), 882 deletions(-) diff --git a/src/Drivers/Sparse/NlpSparseEx1.cpp b/src/Drivers/Sparse/NlpSparseEx1.cpp index b3a84a6c9..f3d95e1b0 100644 --- a/src/Drivers/Sparse/NlpSparseEx1.cpp +++ b/src/Drivers/Sparse/NlpSparseEx1.cpp @@ -1,7 +1,7 @@ #include "NlpSparseEx1.hpp" #include -#include //for memcpy +#include //for memcpy #include /* Test with bounds and constraints of all types. For some reason this @@ -17,196 +17,259 @@ * x_i >=0.5, i=4,...,n */ SparseEx1::SparseEx1(int n, double scal_input) - : n_vars(n), n_cons{2}, scal{scal_input} + : n_vars(n), + n_cons{2}, + scal{scal_input} { - assert(n>=3); - if(n>3) - n_cons += n-3; + assert(n >= 3); + if (n > 3) { + n_cons += n - 3; + } } -SparseEx1::~SparseEx1() -{} +SparseEx1::~SparseEx1() {} bool SparseEx1::get_prob_sizes(size_type& n, size_type& m) - { n=n_vars; m=n_cons; return true; } +{ + n = n_vars; + m = n_cons; + return true; +} -bool SparseEx1::get_vars_info(const size_type& n, double *xlow, double* xupp, NonlinearityType* type) +bool SparseEx1::get_vars_info(const size_type& n, double* xlow, double* xupp, NonlinearityType* type) { - assert(n==n_vars); - for(index_type i=0; i=3), which are bounded only from below - xlow[i]= 0.5; xupp[i]=1e20; type[i]=hiopNonlinear; + assert(n == n_vars); + for (index_type i = 0; i < n; i++) { + if (i == 0) { + xlow[i] = -1e20; + xupp[i] = 1e20; + type[i] = hiopNonlinear; + continue; + } + if (i == 1) { + xlow[i] = 0.0; + xupp[i] = 1e20; + type[i] = hiopNonlinear; + continue; + } + if (i == 2) { + xlow[i] = 1.5; + xupp[i] = 10.0; + type[i] = hiopNonlinear; + continue; + } + // this is for x_4, x_5, ... , x_n (i>=3), which are bounded only from below + xlow[i] = 0.5; + xupp[i] = 1e20; + type[i] = hiopNonlinear; } return true; } bool SparseEx1::get_cons_info(const size_type& m, double* clow, double* cupp, NonlinearityType* type) { - assert(m==n_cons); + assert(m == n_cons); index_type conidx{0}; - clow[conidx]= scal*10.0; cupp[conidx]= scal*10.0; type[conidx++]=hiopInterfaceBase::hiopLinear; - clow[conidx]= scal*5.0; cupp[conidx]= 1e20; type[conidx++]=hiopInterfaceBase::hiopLinear; - for(index_type i=3; i 4*x_1 + 2*x_2 == 10 - cons[conidx++] += scal*( 4*x[0] + 2*x[1]); + // compute the constraint one by one. + // --- constraint 1 body ---> 4*x_1 + 2*x_2 == 10 + cons[conidx++] += scal * (4 * x[0] + 2 * x[1]); // --- constraint 2 body ---> 2*x_1 + x_3 - cons[conidx++] += scal*( 2*x[0] + 1*x[2]); + cons[conidx++] += scal * (2 * x[0] + 1 * x[2]); // --- constraint 3 body ---> 2*x_1 + 0.5*x_i, for i>=4 - for(auto i=3; i=3); + assert(n == n_vars); + assert(m == n_cons); + assert(n >= 3); - assert(nnzJacS == 4 + 2*(n-3)); + assert(nnzJacS == 4 + 2 * (n - 3)); + int nnzit{0}; + index_type conidx{0}; - int nnzit{0}; - index_type conidx{0}; - - if(iJacS!=NULL && jJacS!=NULL){ - // --- constraint 1 body ---> 4*x_1 + 2*x_2 == 10 - iJacS[nnzit] = conidx; jJacS[nnzit++] = 0; - iJacS[nnzit] = conidx; jJacS[nnzit++] = 1; - conidx++; - - // --- constraint 2 body ---> 2*x_1 + x_3 - iJacS[nnzit] = conidx; jJacS[nnzit++] = 0; - iJacS[nnzit] = conidx; jJacS[nnzit++] = 2; - conidx++; - - // --- constraint 3 body ---> 2*x_1 + 0.5*x_i, for i>=4 - for(auto i=3; i 4*x_1 + 2*x_2 == 10 + iJacS[nnzit] = conidx; + jJacS[nnzit++] = 0; + iJacS[nnzit] = conidx; + jJacS[nnzit++] = 1; + conidx++; + + // --- constraint 2 body ---> 2*x_1 + x_3 + iJacS[nnzit] = conidx; + jJacS[nnzit++] = 0; + iJacS[nnzit] = conidx; + jJacS[nnzit++] = 2; + conidx++; + + // --- constraint 3 body ---> 2*x_1 + 0.5*x_i, for i>=4 + for (auto i = 3; i < n; i++) { + iJacS[nnzit] = conidx; + jJacS[nnzit++] = 0; + iJacS[nnzit] = conidx; + jJacS[nnzit++] = i; + conidx++; } + assert(nnzit == nnzJacS); + } - //values for sparse Jacobian if requested by the solver - nnzit = 0; - if(MJacS!=NULL) { - // --- constraint 1 body ---> 4*x_1 + 2*x_2 == 10 - MJacS[nnzit++] = scal*4; - MJacS[nnzit++] = scal*2; - - // --- constraint 2 body ---> 2*x_1 + x_3 - MJacS[nnzit++] = scal*2; - MJacS[nnzit++] = scal*1; - - // --- constraint 3 body ---> 2*x_1 + 0.5*x_4 - for(auto i=3; i 4*x_1 + 2*x_2 == 10 + MJacS[nnzit++] = scal * 4; + MJacS[nnzit++] = scal * 2; + + // --- constraint 2 body ---> 2*x_1 + x_3 + MJacS[nnzit++] = scal * 2; + MJacS[nnzit++] = scal * 1; + + // --- constraint 3 body ---> 2*x_1 + 0.5*x_4 + for (auto i = 3; i < n; i++) { + MJacS[nnzit++] = scal * 2; + MJacS[nnzit++] = scal * 0.5; } - return true; + assert(nnzit == nnzJacS); + } + return true; } -bool SparseEx1::eval_Hess_Lagr(const size_type& n, const size_type& m, - const double* x, bool new_x, const double& obj_factor, - const double* lambda, bool new_lambda, - const size_type& nnzHSS, index_type* iHSS, index_type* jHSS, double* MHSS) +bool SparseEx1::eval_Hess_Lagr(const size_type& n, + const size_type& m, + const double* x, + bool new_x, + const double& obj_factor, + const double* lambda, + bool new_lambda, + const size_type& nnzHSS, + index_type* iHSS, + index_type* jHSS, + double* MHSS) { - //Note: lambda is not used since all the constraints are linear and, therefore, do - //not contribute to the Hessian of the Lagrangian - assert(nnzHSS == n); + // Note: lambda is not used since all the constraints are linear and, therefore, do + // not contribute to the Hessian of the Lagrangian + assert(nnzHSS == n); - if(iHSS!=NULL && jHSS!=NULL) { - for(int i=0; i -using size_type = hiop::size_type; +using size_type = hiop::size_type; using index_type = hiop::index_type; /* Test with bounds and constraints of all types. For some reason this @@ -27,87 +27,70 @@ class SparseEx1 : public hiop::hiopInterfaceSparse virtual ~SparseEx1(); virtual bool get_prob_sizes(size_type& n, size_type& m); - virtual bool get_vars_info(const size_type& n, double *xlow, double* xupp, NonlinearityType* type); + virtual bool get_vars_info(const size_type& n, double* xlow, double* xupp, NonlinearityType* type); virtual bool get_cons_info(const size_type& m, double* clow, double* cupp, NonlinearityType* type); - + virtual bool get_sparse_blocks_info(size_type& nx, size_type& nnz_sparse_Jaceq, size_type& nnz_sparse_Jacineq, size_type& nnz_sparse_Hess_Lagr); virtual bool eval_f(const size_type& n, const double* x, bool new_x, double& obj_value); - virtual bool eval_cons(const size_type& n, - const size_type& m, - const size_type& num_cons, + virtual bool eval_cons(const size_type& n, + const size_type& m, + const size_type& num_cons, const index_type* idx_cons, - const double* x, - bool new_x, - double* cons); - virtual bool eval_cons(const size_type& n, - const size_type& m, - const double* x, - bool new_x, - double* cons); - virtual bool eval_grad_f(const size_type& n, - const double* x, - bool new_x, - double* gradf); - virtual bool eval_Jac_cons(const size_type& n, - const size_type& m, - const size_type& num_cons, + const double* x, + bool new_x, + double* cons); + virtual bool eval_cons(const size_type& n, const size_type& m, const double* x, bool new_x, double* cons); + virtual bool eval_grad_f(const size_type& n, const double* x, bool new_x, double* gradf); + virtual bool eval_Jac_cons(const size_type& n, + const size_type& m, + const size_type& num_cons, const index_type* idx_cons, - const double* x, - bool new_x, - const size_type& nnzJacS, - index_type* iJacS, - index_type* jJacS, - double* MJacS); + const double* x, + bool new_x, + const size_type& nnzJacS, + index_type* iJacS, + index_type* jJacS, + double* MJacS); virtual bool eval_Jac_cons(const size_type& n, const size_type& m, - const double* x, - bool new_x, + const double* x, + bool new_x, const size_type& nnzJacS, - index_type* iJacS, - index_type* jJacS, - double* MJacS); - virtual bool get_starting_point(const size_type&n, double* x0); + index_type* iJacS, + index_type* jJacS, + double* MJacS); + virtual bool get_starting_point(const size_type& n, double* x0); virtual bool eval_Hess_Lagr(const size_type& n, const size_type& m, - const double* x, - bool new_x, - const double& obj_factor, - const double* lambda, - bool new_lambda, + const double* x, + bool new_x, + const double& obj_factor, + const double* lambda, + bool new_lambda, const size_type& nnzHSS, - index_type* iHSS, - index_type* jHSS, - double* MHSS); + index_type* iHSS, + index_type* jHSS, + double* MHSS); // not implemented - virtual bool get_starting_point(const size_type&, - const size_type&, - double*, - bool&, - double*, - double*, - double*, - bool&, - double*) - { return false; } + virtual bool + get_starting_point(const size_type&, const size_type&, double*, bool&, double*, double*, double*, bool&, double*) + { + return false; + } - virtual bool get_warmstart_point(const size_type&, - const size_type&, - double*, - double*, - double*, - double*, - double*, - double*, - double*) - { return false; } + virtual bool + get_warmstart_point(const size_type&, const size_type&, double*, double*, double*, double*, double*, double*, double*) + { + return false; + } private: size_type n_vars, n_cons; - double scal; + double scal; }; #endif diff --git a/src/Optimization/HessianDiagPlusRowRank.cpp b/src/Optimization/HessianDiagPlusRowRank.cpp index f6d6f5b93..0d79aa426 100644 --- a/src/Optimization/HessianDiagPlusRowRank.cpp +++ b/src/Optimization/HessianDiagPlusRowRank.cpp @@ -2,47 +2,47 @@ // Produced at the Lawrence Livermore National Laboratory (LLNL). // LLNL-CODE-742473. All rights reserved. // -// This file is part of HiOp. For details, see https://github.com/LLNL/hiop. HiOp -// is released under the BSD 3-clause license (https://opensource.org/licenses/BSD-3-Clause). +// This file is part of HiOp. For details, see https://github.com/LLNL/hiop. HiOp +// is released under the BSD 3-clause license (https://opensource.org/licenses/BSD-3-Clause). // Please also read “Additional BSD Notice” below. // -// Redistribution and use in source and binary forms, with or without modification, +// Redistribution and use in source and binary forms, with or without modification, // are permitted provided that the following conditions are met: -// i. Redistributions of source code must retain the above copyright notice, this list +// i. Redistributions of source code must retain the above copyright notice, this list // of conditions and the disclaimer below. -// ii. Redistributions in binary form must reproduce the above copyright notice, -// this list of conditions and the disclaimer (as noted below) in the documentation and/or +// ii. Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the disclaimer (as noted below) in the documentation and/or // other materials provided with the distribution. -// iii. Neither the name of the LLNS/LLNL nor the names of its contributors may be used to -// endorse or promote products derived from this software without specific prior written +// iii. Neither the name of the LLNS/LLNL nor the names of its contributors may be used to +// endorse or promote products derived from this software without specific prior written // permission. // -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES -// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT -// SHALL LAWRENCE LIVERMORE NATIONAL SECURITY, LLC, THE U.S. DEPARTMENT OF ENERGY OR -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS -// OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED -// AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT +// SHALL LAWRENCE LIVERMORE NATIONAL SECURITY, LLC, THE U.S. DEPARTMENT OF ENERGY OR +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +// OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED +// AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, // EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Additional BSD Notice -// 1. This notice is required to be provided under our contract with the U.S. Department -// of Energy (DOE). This work was produced at Lawrence Livermore National Laboratory under +// 1. This notice is required to be provided under our contract with the U.S. Department +// of Energy (DOE). This work was produced at Lawrence Livermore National Laboratory under // Contract No. DE-AC52-07NA27344 with the DOE. -// 2. Neither the United States Government nor Lawrence Livermore National Security, LLC -// nor any of their employees, makes any warranty, express or implied, or assumes any -// liability or responsibility for the accuracy, completeness, or usefulness of any +// 2. Neither the United States Government nor Lawrence Livermore National Security, LLC +// nor any of their employees, makes any warranty, express or implied, or assumes any +// liability or responsibility for the accuracy, completeness, or usefulness of any // information, apparatus, product, or process disclosed, or represents that its use would // not infringe privately-owned rights. -// 3. Also, reference herein to any specific commercial products, process, or services by -// trade name, trademark, manufacturer or otherwise does not necessarily constitute or -// imply its endorsement, recommendation, or favoring by the United States Government or -// Lawrence Livermore National Security, LLC. The views and opinions of authors expressed -// herein do not necessarily state or reflect those of the United States Government or -// Lawrence Livermore National Security, LLC, and shall not be used for advertising or +// 3. Also, reference herein to any specific commercial products, process, or services by +// trade name, trademark, manufacturer or otherwise does not necessarily constitute or +// imply its endorsement, recommendation, or favoring by the United States Government or +// Lawrence Livermore National Security, LLC. The views and opinions of authors expressed +// herein do not necessarily state or reflect those of the United States Government or +// Lawrence Livermore National Security, LLC, and shall not be used for advertising or // product endorsement purposes. /** @@ -80,74 +80,79 @@ namespace hiop { HessianDiagPlusRowRank::HessianDiagPlusRowRank(hiopNlpDenseConstraints* nlp_in, int max_mem_len) - : l_max_(max_mem_len), - l_curr_(-1), - sigma_(1.), - sigma0_(1.), - nlp_(nlp_in), - matrix_changed_(false) + : l_max_(max_mem_len), + l_curr_(-1), + sigma_(1.), + sigma0_(1.), + nlp_(nlp_in), + matrix_changed_(false) { - DhInv_ = nlp_->alloc_primal_vec(); - St_ = nlp_->alloc_multivector_primal(0, l_max_); - Yt_ = St_->alloc_clone(); //faster than nlp_->alloc_multivector_primal(...); - //these are local - L_ = LinearAlgebraFactory::create_matrix_dense("DEFAULT", 0, 0); - D_ = LinearAlgebraFactory::create_vector("DEFAULT", 0); - V_ = LinearAlgebraFactory::create_matrix_dense("DEFAULT", 0, 0); - - //the previous iteration - it_prev_ = new hiopIterate(nlp_); + DhInv_ = nlp_->alloc_primal_vec(); + St_ = nlp_->alloc_multivector_primal(0, l_max_); + Yt_ = St_->alloc_clone(); // faster than nlp_->alloc_multivector_primal(...); + // these are local + L_ = LinearAlgebraFactory::create_matrix_dense("DEFAULT", 0, 0); + D_ = LinearAlgebraFactory::create_vector("DEFAULT", 0); + V_ = LinearAlgebraFactory::create_matrix_dense("DEFAULT", 0, 0); + + // the previous iteration + it_prev_ = new hiopIterate(nlp_); grad_f_prev_ = nlp_->alloc_primal_vec(); - Jac_c_prev_ = nlp_->alloc_Jac_c(); - Jac_d_prev_ = nlp_->alloc_Jac_d(); + Jac_c_prev_ = nlp_->alloc_Jac_c(); + Jac_d_prev_ = nlp_->alloc_Jac_d(); - //internal buffers for memory pool (none of them should be in n) + // internal buffers for memory pool (none of them should be in n) #ifdef HIOP_USE_MPI buff_kxk_ = new double[nlp_->m() * nlp_->m()]; - buff_2lxk_ = new double[nlp_->m() * 2*l_max_]; - buff1_lxlx3_ = new double[3*l_max_*l_max_]; - buff2_lxlx3_ = new double[3*l_max_*l_max_]; + buff_2lxk_ = new double[nlp_->m() * 2 * l_max_]; + buff1_lxlx3_ = new double[3 * l_max_ * l_max_]; + buff2_lxlx3_ = new double[3 * l_max_ * l_max_]; #else - //not needed in non-MPI mode - buff_kxk_ = nullptr; - buff_2lxk_ = nullptr; + // not needed in non-MPI mode + buff_kxk_ = nullptr; + buff_2lxk_ = nullptr; buff1_lxlx3_ = nullptr; buff2_lxlx3_ = nullptr; #endif - //auxiliary objects/buffers - S1_ = nullptr; - Y1_ = nullptr; - lxl_mat1_ = nullptr; - kxl_mat1_ = nullptr; - kx2l_mat1_ = nullptr; - l_vec1_ = nullptr; - l_vec2_ = nullptr; - twol_vec1_ = nullptr; - n_vec1_ = DhInv_->alloc_clone(); - n_vec2_ = DhInv_->alloc_clone(); - - V_work_vec_ = LinearAlgebraFactory::create_vector("DEFAULT", 0); - V_ipiv_vec_ = nullptr; - V_ipiv_size_ = -1; - - sigma0_ = nlp_->options->GetNumeric("sigma0"); - sigma_ = sigma0_; + // auxiliary objects/buffers + S1_ = nullptr; + Y1_ = nullptr; + lxl_mat1_ = nullptr; + kxl_mat1_ = nullptr; + kx2l_mat1_ = nullptr; + l_vec1_ = nullptr; + l_vec2_ = nullptr; + twol_vec1_ = nullptr; + n_vec1_ = DhInv_->alloc_clone(); + n_vec2_ = DhInv_->alloc_clone(); + + V_work_vec_ = LinearAlgebraFactory::create_vector("DEFAULT", 0); + V_ipiv_vec_ = nullptr; + V_ipiv_size_ = -1; + + sigma0_ = nlp_->options->GetNumeric("sigma0"); + sigma_ = sigma0_; string sigma_strategy = nlp_->options->GetString("sigma_update_strategy"); transform(sigma_strategy.begin(), sigma_strategy.end(), sigma_strategy.begin(), ::tolower); sigma_update_strategy_ = SIGMA_STRATEGY3; - if(sigma_strategy=="sty") { - sigma_update_strategy_=SIGMA_STRATEGY1; - } else if(sigma_strategy=="sty_inv") { - sigma_update_strategy_=SIGMA_STRATEGY2; - } else if(sigma_strategy=="snrm_ynrm") { - sigma_update_strategy_=SIGMA_STRATEGY3; - } else if(sigma_strategy=="sty_srnm_ynrm") { - sigma_update_strategy_=SIGMA_STRATEGY4; - } else if(sigma_strategy=="sigma0") { - sigma_update_strategy_=SIGMA_CONSTANT; - } else { + if (sigma_strategy == "sty") { + sigma_update_strategy_ = SIGMA_STRATEGY1; + } + else if (sigma_strategy == "sty_inv") { + sigma_update_strategy_ = SIGMA_STRATEGY2; + } + else if (sigma_strategy == "snrm_ynrm") { + sigma_update_strategy_ = SIGMA_STRATEGY3; + } + else if (sigma_strategy == "sty_srnm_ynrm") { + sigma_update_strategy_ = SIGMA_STRATEGY4; + } + else if (sigma_strategy == "sigma0") { + sigma_update_strategy_ = SIGMA_CONSTANT; + } + else { assert(false && "sigma_update_strategy option not recognized"); } @@ -159,15 +164,14 @@ HessianDiagPlusRowRank::HessianDiagPlusRowRank(hiopNlpDenseConstraints* nlp_in, sigma_update_strategy_, sigma_strategy.c_str()); - Dx_ = DhInv_->alloc_clone(); + Dx_ = DhInv_->alloc_clone(); #ifdef HIOP_DEEPCHECKS Vmat_ = V_->alloc_clone(); #endif yk = nlp_->alloc_primal_vec(); sk = nlp_->alloc_primal_vec(); - -} +} HessianDiagPlusRowRank::~HessianDiagPlusRowRank() { @@ -185,7 +189,6 @@ HessianDiagPlusRowRank::~HessianDiagPlusRowRank() delete Vmat_; #endif - delete it_prev_; delete grad_f_prev_; delete Jac_c_prev_; @@ -199,7 +202,7 @@ HessianDiagPlusRowRank::~HessianDiagPlusRowRank() delete S1_; delete Y1_; delete lxl_mat1_; - delete kxl_mat1_; + delete kxl_mat1_; delete kx2l_mat1_; delete l_vec1_; @@ -210,19 +213,19 @@ HessianDiagPlusRowRank::~HessianDiagPlusRowRank() delete[] V_ipiv_vec_; delete V_work_vec_; - for(auto* it: a) { + for (auto* it : a) { delete it; } - for(auto* it: b) { + for (auto* it : b) { delete it; } } void HessianDiagPlusRowRank::alloc_for_limited_mem(const size_type& mem_length) { - //note: St_ and Yt_ always have l_curr_ rows - if(l_curr_ == mem_length) { + // note: St_ and Yt_ always have l_curr_ rows + if (l_curr_ == mem_length) { assert(D_->get_size() == l_curr_); return; } @@ -233,7 +236,7 @@ void HessianDiagPlusRowRank::alloc_for_limited_mem(const size_type& mem_length) St_ = nlp_->alloc_multivector_primal(mem_length, l_max_); Yt_ = St_->alloc_clone(); - //these are local + // these are local L_ = LinearAlgebraFactory::create_matrix_dense("DEFAULT", mem_length, mem_length); D_ = LinearAlgebraFactory::create_vector("DEFAULT", mem_length); } @@ -241,7 +244,7 @@ void HessianDiagPlusRowRank::alloc_for_limited_mem(const size_type& mem_length) bool HessianDiagPlusRowRank::update_logbar_diag(const hiopVector& Dx) { DhInv_->setToConstant(sigma_); - DhInv_->axpy(1.0,Dx); + DhInv_->axpy(1.0, Dx); Dx_->copyFrom(Dx); #ifdef HIOP_DEEPCHECKS assert(DhInv_->allPositive()); @@ -282,9 +285,9 @@ void HessianDiagPlusRowRank::print(FILE* f, hiopOutVerbosity v, const char* msg) #include bool HessianDiagPlusRowRank::update(const hiopIterate& it_curr, - const hiopVector& grad_f_curr, - const hiopMatrix& Jac_c_curr_in, - const hiopMatrix& Jac_d_curr_in) + const hiopVector& grad_f_curr, + const hiopMatrix& Jac_c_curr_in, + const hiopMatrix& Jac_d_curr_in) { nlp_->runStats.tmSolverInternal.start(); @@ -297,29 +300,30 @@ bool HessianDiagPlusRowRank::update(const hiopIterate& it_curr, assert(it_curr.sxl->matchesPattern(nlp_->get_ixl())); assert(it_curr.sxu->matchesPattern(nlp_->get_ixu())); #endif - //on first call l_curr_=-1 - if(l_curr_>=0) { - size_type n = grad_f_curr.get_size(); - //compute s_new = x_curr-x_prev + // on first call l_curr_=-1 + if (l_curr_ >= 0) { + size_type n = grad_f_curr.get_size(); + // compute s_new = x_curr-x_prev hiopVector& s_new = new_n_vec1(n); s_new.copyFrom(*it_curr.x); - s_new.axpy(-1.,*it_prev_->x); - double s_infnorm=s_new.infnorm(); - if(s_infnorm>=100*std::numeric_limits::epsilon()) { //norm of s not too small + s_new.axpy(-1., *it_prev_->x); + double s_infnorm = s_new.infnorm(); + if (s_infnorm >= 100 * std::numeric_limits::epsilon()) { // norm of s not too small - //compute y_new = \grad J(x_curr,\lambda_curr) - \grad J(x_prev, \lambda_curr) (yes, J(x_prev, \lambda_curr)) - // = graf_f_curr-grad_f_prev + (Jac_c_curr-Jac_c_prev)yc_curr+ (Jac_d_curr-Jac_c_prev)yd_curr - zl_curr*s_new + zu_curr*s_new + // compute y_new = \grad J(x_curr,\lambda_curr) - \grad J(x_prev, \lambda_curr) (yes, J(x_prev, \lambda_curr)) + // = graf_f_curr-grad_f_prev + (Jac_c_curr-Jac_c_prev)yc_curr+ (Jac_d_curr-Jac_c_prev)yd_curr - + // zl_curr*s_new + zu_curr*s_new hiopVector& y_new = new_n_vec2(n); - y_new.copyFrom(grad_f_curr); + y_new.copyFrom(grad_f_curr); y_new.axpy(-1., *grad_f_prev_); - Jac_c_curr.transTimesVec (1.0, y_new, 1.0, *it_curr.yc); - //!opt if nlp_->Jac_c_isLinear no need for the multiplications - Jac_c_prev_->transTimesVec(1.0, y_new,-1.0, *it_curr.yc); - //!opt same here - Jac_d_curr.transTimesVec (1.0, y_new, 1.0, *it_curr.yd); - Jac_d_prev_->transTimesVec(1.0, y_new,-1.0, *it_curr.yd); - - double sTy = s_new.dotProductWith(y_new), s_nrm2=s_new.twonorm(), y_nrm2=y_new.twonorm(); + Jac_c_curr.transTimesVec(1.0, y_new, 1.0, *it_curr.yc); + //! opt if nlp_->Jac_c_isLinear no need for the multiplications + Jac_c_prev_->transTimesVec(1.0, y_new, -1.0, *it_curr.yc); + //! opt same here + Jac_d_curr.transTimesVec(1.0, y_new, 1.0, *it_curr.yd); + Jac_d_prev_->transTimesVec(1.0, y_new, -1.0, *it_curr.yd); + + double sTy = s_new.dotProductWith(y_new), s_nrm2 = s_new.twonorm(), y_nrm2 = y_new.twonorm(); #ifdef HIOP_DEEPCHECKS nlp_->log->printf(hovLinAlgScalarsVerb, @@ -327,82 +331,86 @@ bool HessianDiagPlusRowRank::update(const hiopIterate& it_curr, sTy, s_nrm2, y_nrm2); - nlp_->log->write("HessianDiagPlusRowRank s_new",s_new, hovIteration); - nlp_->log->write("HessianDiagPlusRowRank y_new",y_new, hovIteration); + nlp_->log->write("HessianDiagPlusRowRank s_new", s_new, hovIteration); + nlp_->log->write("HessianDiagPlusRowRank y_new", y_new, hovIteration); #endif - if(sTy>s_nrm2*y_nrm2*sqrt(std::numeric_limits::epsilon())) { //sTy far away from zero + if (sTy > s_nrm2 * y_nrm2 * sqrt(std::numeric_limits::epsilon())) { // sTy far away from zero - if(l_max_>0) { - //compute the new row in L, update S and Y (either augment them or shift cols and add s_new and y_new) + if (l_max_ > 0) { + // compute the new row in L, update S and Y (either augment them or shift cols and add s_new and y_new) hiopVector& YTs = new_l_vec1(l_curr_); Yt_->timesVec(0.0, YTs, 1.0, s_new); - //update representation - if(l_curr_appendRow(s_new); Yt_->appendRow(y_new); growL(l_curr_, l_max_, YTs); growD(l_curr_, l_max_, sTy); l_curr_++; - } else { - //shift + } + else { + // shift St_->shiftRows(-1); Yt_->shiftRows(-1); - St_->replaceRow(l_max_-1, s_new); - Yt_->replaceRow(l_max_-1, y_new); - updateL(YTs,sTy); + St_->replaceRow(l_max_ - 1, s_new); + Yt_->replaceRow(l_max_ - 1, y_new); + updateL(YTs, sTy); updateD(sTy); l_curr_ = l_max_; } - } //end of l_max_>0 + } // end of l_max_>0 #ifdef HIOP_DEEPCHECKS nlp_->log->printf(hovMatrices, "\nHessianDiagPlusRowRank: these are L and D from the BFGS compact representation\n"); nlp_->log->write("L", *L_, hovMatrices); nlp_->log->write("D", *D_, hovMatrices); nlp_->log->printf(hovMatrices, "\n"); #endif - //update B0 (i.e., sigma) - switch (sigma_update_strategy_ ) { - case SIGMA_STRATEGY1: - sigma_ = sTy/(s_nrm2*s_nrm2); - break; - case SIGMA_STRATEGY2: - sigma_ = y_nrm2*y_nrm2/sTy; - break; - case SIGMA_STRATEGY3: - sigma_ = sqrt(s_nrm2*s_nrm2 / y_nrm2 / y_nrm2); - break; - case SIGMA_STRATEGY4: - sigma_ = 0.5*(sTy/(s_nrm2*s_nrm2)+y_nrm2*y_nrm2/sTy); - break; - case SIGMA_CONSTANT: - sigma_ = sigma0_; - break; - default: - assert(false && "Option value for sigma_update_strategy was not recognized."); - break; - } // else of the switch - //safe guard it + // update B0 (i.e., sigma) + switch (sigma_update_strategy_) { + case SIGMA_STRATEGY1: + sigma_ = sTy / (s_nrm2 * s_nrm2); + break; + case SIGMA_STRATEGY2: + sigma_ = y_nrm2 * y_nrm2 / sTy; + break; + case SIGMA_STRATEGY3: + sigma_ = sqrt(s_nrm2 * s_nrm2 / y_nrm2 / y_nrm2); + break; + case SIGMA_STRATEGY4: + sigma_ = 0.5 * (sTy / (s_nrm2 * s_nrm2) + y_nrm2 * y_nrm2 / sTy); + break; + case SIGMA_CONSTANT: + sigma_ = sigma0_; + break; + default: + assert(false && "Option value for sigma_update_strategy was not recognized."); + break; + } // else of the switch + // safe guard it sigma_ = fmax(fmin(sigma_safe_max_, sigma_), sigma_safe_min_); nlp_->log->printf(hovLinAlgScalars, "HessianDiagPlusRowRank: sigma was updated to %22.16e\n", sigma_); - } else { //sTy is too small or negative -> skip + } + else { // sTy is too small or negative -> skip nlp_->log->printf(hovLinAlgScalars, "HessianDiagPlusRowRank: s^T*y=%12.6e not positive enough... skipping the Hessian update\n", sTy); } - } else {// norm of s_new is too small -> skip + } + else { // norm of s_new is too small -> skip nlp_->log->printf(hovLinAlgScalars, "HessianDiagPlusRowRank: ||s_new||=%12.6e too small... skipping the Hessian update\n", s_infnorm); } - //save this stuff for next update + // save this stuff for next update it_prev_->copyFrom(it_curr); grad_f_prev_->copyFrom(grad_f_curr); Jac_c_prev_->copyFrom(Jac_c_curr); Jac_d_prev_->copyFrom(Jac_d_curr); nlp_->log->printf(hovLinAlgScalarsVerb, "HessianDiagPlusRowRank: storing the iteration info as 'previous'\n", s_infnorm); - } else { - //this is the first optimization iterate, just save the iterate and exit + } + else { + // this is the first optimization iterate, just save the iterate and exit it_prev_->copyFrom(it_curr); grad_f_prev_->copyFrom(grad_f_curr); Jac_c_prev_->copyFrom(Jac_c_curr); @@ -416,7 +424,7 @@ bool HessianDiagPlusRowRank::update(const hiopIterate& it_curr, return true; } -/* +/* * The dirty work to bring this^{-1} to the form * M = DhInv - DhInv*[B0*S Y] * V^{-1} * [ S^T*B0 ] *DhInv * [ Y^T ] @@ -428,66 +436,65 @@ bool HessianDiagPlusRowRank::update(const hiopIterate& it_curr, */ void HessianDiagPlusRowRank::updateInternalBFGSRepresentation() { - size_type n=St_->n(); - size_type l=St_->m(); + size_type n = St_->n(); + size_type l = St_->m(); - //grow L,D, andV if needed - if(L_->m()!=l) { + // grow L,D, andV if needed + if (L_->m() != l) { delete L_; L_ = LinearAlgebraFactory::create_matrix_dense("DEFAULT", l, l); } - if(D_->get_size()!=l) { + if (D_->get_size() != l) { delete D_; D_ = LinearAlgebraFactory::create_vector("DEFAULT", l); } - if(V_->m()!=2*l) { + if (V_->m() != 2 * l) { delete V_; - V_ = LinearAlgebraFactory::create_matrix_dense("DEFAULT", 2*l, 2*l); + V_ = LinearAlgebraFactory::create_matrix_dense("DEFAULT", 2 * l, 2 * l); } //-- block (2,2) hiopMatrixDense& DpYtDhInvY = new_lxl_mat1(l); - sym_mat_times_diag_times_mattrans_local(0.0, DpYtDhInvY, 1.0,*Yt_,*DhInv_); + sym_mat_times_diag_times_mattrans_local(0.0, DpYtDhInvY, 1.0, *Yt_, *DhInv_); #ifdef HIOP_USE_MPI - const size_t buffsize=l*l*sizeof(double); + const size_t buffsize = l * l * sizeof(double); memcpy(buff1_lxlx3_, DpYtDhInvY.local_data(), buffsize); #else DpYtDhInvY.addDiagonal(1., *D_); - V_->copyBlockFromMatrix(l,l,DpYtDhInvY); + V_->copyBlockFromMatrix(l, l, DpYtDhInvY); #endif //-- block (1,2) - hiopMatrixDense& StB0DhInvYmL = DpYtDhInvY; //just a rename - hiopVector& B0DhInv = new_n_vec1(n); + hiopMatrixDense& StB0DhInvYmL = DpYtDhInvY; // just a rename + hiopVector& B0DhInv = new_n_vec1(n); B0DhInv.copyFrom(*DhInv_); B0DhInv.scale(sigma_); mat_times_diag_times_mattrans_local(StB0DhInvYmL, *St_, B0DhInv, *Yt_); #ifdef HIOP_USE_MPI - memcpy(buff1_lxlx3_+l*l, StB0DhInvYmL.local_data(), buffsize); + memcpy(buff1_lxlx3_ + l * l, StB0DhInvYmL.local_data(), buffsize); #else - //substract L + // substract L StB0DhInvYmL.addMatrix(-1.0, *L_); // (1,2) block in V - V_->copyBlockFromMatrix(0,l,StB0DhInvYmL); + V_->copyBlockFromMatrix(0, l, StB0DhInvYmL); #endif //-- block (2,2) - hiopVector& theDiag = B0DhInv; //just a rename, also reuses values - theDiag.addConstant(-1.0); //at this point theDiag=DhInv*B0-I + hiopVector& theDiag = B0DhInv; // just a rename, also reuses values + theDiag.addConstant(-1.0); // at this point theDiag=DhInv*B0-I theDiag.scale(sigma_); - hiopMatrixDense& StDS = DpYtDhInvY; //a rename + hiopMatrixDense& StDS = DpYtDhInvY; // a rename sym_mat_times_diag_times_mattrans_local(0.0, StDS, 1.0, *St_, theDiag); #ifdef HIOP_USE_MPI - memcpy(buff1_lxlx3_+2*l*l, DpYtDhInvY.local_data(), buffsize); + memcpy(buff1_lxlx3_ + 2 * l * l, DpYtDhInvY.local_data(), buffsize); #else - V_->copyBlockFromMatrix(0,0,StDS); + V_->copyBlockFromMatrix(0, 0, StDS); #endif - #ifdef HIOP_USE_MPI int ierr; - ierr = MPI_Allreduce(buff1_lxlx3_, buff2_lxlx3_, 3*l*l, MPI_DOUBLE, MPI_SUM, nlp_->get_comm()); - assert(ierr==MPI_SUCCESS); + ierr = MPI_Allreduce(buff1_lxlx3_, buff2_lxlx3_, 3 * l * l, MPI_DOUBLE, MPI_SUM, nlp_->get_comm()); + assert(ierr == MPI_SUCCESS); // - block (2,2) DpYtDhInvY.copyFrom(buff2_lxlx3_); @@ -495,12 +502,12 @@ void HessianDiagPlusRowRank::updateInternalBFGSRepresentation() V_->copyBlockFromMatrix(l, l, DpYtDhInvY); // - block (1,2) - StB0DhInvYmL.copyFrom(buff2_lxlx3_+l*l); + StB0DhInvYmL.copyFrom(buff2_lxlx3_ + l * l); StB0DhInvYmL.addMatrix(-1.0, *L_); V_->copyBlockFromMatrix(0, l, StB0DhInvYmL); // - block (1,1) - StDS.copyFrom(buff2_lxlx3_ + 2*l*l); + StDS.copyFrom(buff2_lxlx3_ + 2 * l * l); V_->copyBlockFromMatrix(0, 0, StDS); #endif #ifdef HIOP_DEEPCHECKS @@ -509,7 +516,7 @@ void HessianDiagPlusRowRank::updateInternalBFGSRepresentation() Vmat_->overwriteLowerTriangleWithUpper(); #endif - //finally, factorize V + // finally, factorize V factorizeV(); matrix_changed_ = false; @@ -522,27 +529,27 @@ void HessianDiagPlusRowRank::updateInternalBFGSRepresentation() * * M is is nxn, S,Y are nxl, V is upper triangular 2lx2l, and x is nx1 * Remember we store Yt=Y^T and St=S^T - */ + */ void HessianDiagPlusRowRank::solve(const hiopVector& rhsx, hiopVector& x) { - if(matrix_changed_) { + if (matrix_changed_) { updateInternalBFGSRepresentation(); } - size_type n=St_->n(), l=St_->m(); + size_type n = St_->n(), l = St_->m(); #ifdef HIOP_DEEPCHECKS - assert(rhsx.get_size()==n); - assert(x.get_size()==n); - assert(DhInv_->get_size()==n); + assert(rhsx.get_size() == n); + assert(x.get_size() == n); + assert(DhInv_->get_size() == n); assert(DhInv_->isfinite_local() && "inf or nan entry detected"); assert(rhsx.isfinite_local() && "inf or nan entry detected in rhs"); #endif - //1. x = DhInv*res + // 1. x = DhInv*res x.copyFrom(rhsx); x.componentMult(*DhInv_); - //2. stx= S^T*B0*DhInv*res and ytx=Y^T*DhInv*res + // 2. stx= S^T*B0*DhInv*res and ytx=Y^T*DhInv*res hiopVector& stx = new_l_vec1(l); hiopVector& ytx = new_l_vec2(l); stx.setToZero(); @@ -550,28 +557,28 @@ void HessianDiagPlusRowRank::solve(const hiopVector& rhsx, hiopVector& x) Yt_->timesVec(0.0, ytx, 1.0, x); hiopVector& B0DhInvx = new_n_vec1(n); - B0DhInvx.copyFrom(x); //it contains DhInv*res - B0DhInvx.scale(sigma_); //B0*(DhInv*res) + B0DhInvx.copyFrom(x); // it contains DhInv*res + B0DhInvx.scale(sigma_); // B0*(DhInv*res) St_->timesVec(0.0, stx, 1.0, B0DhInvx); - //3. solve with V - hiopVector& spart=stx; hiopVector& ypart=ytx; - solve_with_V(spart,ypart); + // 3. solve with V + hiopVector& spart = stx; + hiopVector& ypart = ytx; + solve_with_V(spart, ypart); - //4. multiply with DhInv*[B0*S Y], namely - // result = DhInv*(B0*S*spart + Y*ypart) - hiopVector& result = new_n_vec1(n); + // 4. multiply with DhInv*[B0*S Y], namely + // result = DhInv*(B0*S*spart + Y*ypart) + hiopVector& result = new_n_vec1(n); St_->transTimesVec(0.0, result, 1.0, spart); result.scale(sigma_); Yt_->transTimesVec(1.0, result, 1.0, ypart); result.componentMult(*DhInv_); - //5. x = first term - second term = x_computed_in_1 - result - x.axpy(-1.0,result); + // 5. x = first term - second term = x_computed_in_1 - result + x.axpy(-1.0, result); #ifdef HIOP_DEEPCHECKS assert(x.isfinite_local() && "inf or nan entry detected in computed solution"); #endif - } /* W = beta*W + alpha*X*inverse(this)*X^T (a more efficient version of solve) @@ -580,151 +587,155 @@ void HessianDiagPlusRowRank::solve(const hiopVector& rhsx, hiopVector& x) * [ Y^T ] * W is kxk, S,Y are nxl, DhInv,B0 are n, V is 2lx2l * X is kxn - */ -void HessianDiagPlusRowRank::sym_mat_times_inverse_times_mattrans(double beta, - hiopMatrixDense& W, - double alpha, + */ +void HessianDiagPlusRowRank::sym_mat_times_inverse_times_mattrans(double beta, + hiopMatrixDense& W, + double alpha, const hiopMatrixDense& X) { - if(matrix_changed_) { + if (matrix_changed_) { updateInternalBFGSRepresentation(); } - size_type n=St_->n(), l=St_->m(); - size_type k=W.m(); - assert(X.m()==k); - assert(X.n()==n); + size_type n = St_->n(), l = St_->m(); + size_type k = W.m(); + assert(X.m() == k); + assert(X.n() == n); #ifdef HIOP_DEEPCHECKS - nlp_->log->write("sym_mat_times_inverse_times_mattrans: X is: ", X, hovMatrices); -#endif + nlp_->log->write("sym_mat_times_inverse_times_mattrans: X is: ", X, hovMatrices); +#endif - //1. compute W=beta*W + alpha*X*DhInv*X' + // 1. compute W=beta*W + alpha*X*DhInv*X' #ifdef HIOP_USE_MPI - if(0==nlp_->get_rank()) { - sym_mat_times_diag_times_mattrans_local(beta,W,alpha,X,*DhInv_); - } else { - sym_mat_times_diag_times_mattrans_local(0.0, W,alpha,X,*DhInv_); + if (0 == nlp_->get_rank()) { + sym_mat_times_diag_times_mattrans_local(beta, W, alpha, X, *DhInv_); + } + else { + sym_mat_times_diag_times_mattrans_local(0.0, W, alpha, X, *DhInv_); } - //W will be MPI_All_reduced later + // W will be MPI_All_reduced later #else - sym_mat_times_diag_times_mattrans_local(beta,W,alpha,X,*DhInv_); + sym_mat_times_diag_times_mattrans_local(beta, W, alpha, X, *DhInv_); #endif - //2. compute S1=X*DhInv*B0*S and Y1=X*DhInv*Y - auto& S1 = new_S1(X, *St_); - auto& Y1 = new_Y1(X, *Yt_); //both are kxl + // 2. compute S1=X*DhInv*B0*S and Y1=X*DhInv*Y + auto& S1 = new_S1(X, *St_); + auto& Y1 = new_Y1(X, *Yt_); // both are kxl hiopVector& B0DhInv = new_n_vec1(n); B0DhInv.copyFrom(*DhInv_); B0DhInv.scale(sigma_); mat_times_diag_times_mattrans_local(S1, X, B0DhInv, *St_); - mat_times_diag_times_mattrans_local(Y1, X, *DhInv_, *Yt_); + mat_times_diag_times_mattrans_local(Y1, X, *DhInv_, *Yt_); - //3. reduce W, S1, and Y1 (dimensions: kxk, kxl, kxl) - hiopMatrixDense& S2Y2 = new_kx2l_mat1(k,l); //Initialy S2Y2 = [Y1 S1] - S2Y2.copyBlockFromMatrix(0,0,S1); - S2Y2.copyBlockFromMatrix(0,l,Y1); + // 3. reduce W, S1, and Y1 (dimensions: kxk, kxl, kxl) + hiopMatrixDense& S2Y2 = new_kx2l_mat1(k, l); // Initialy S2Y2 = [Y1 S1] + S2Y2.copyBlockFromMatrix(0, 0, S1); + S2Y2.copyBlockFromMatrix(0, l, Y1); #ifdef HIOP_USE_MPI int ierr; - ierr = MPI_Allreduce(S2Y2.local_data(), buff_2lxk_, 2*l*k, MPI_DOUBLE, MPI_SUM, nlp_->get_comm()); - assert(ierr==MPI_SUCCESS); - ierr = MPI_Allreduce(W.local_data(), buff_kxk_, k*k, MPI_DOUBLE, MPI_SUM, nlp_->get_comm()); - assert(ierr==MPI_SUCCESS); + ierr = MPI_Allreduce(S2Y2.local_data(), buff_2lxk_, 2 * l * k, MPI_DOUBLE, MPI_SUM, nlp_->get_comm()); + assert(ierr == MPI_SUCCESS); + ierr = MPI_Allreduce(W.local_data(), buff_kxk_, k * k, MPI_DOUBLE, MPI_SUM, nlp_->get_comm()); + assert(ierr == MPI_SUCCESS); S2Y2.copyFrom(buff_2lxk_); W.copyFrom(buff_kxk_); - //also copy S1 and Y1 - S1.copyFromMatrixBlock(S2Y2, 0,0); - Y1.copyFromMatrixBlock(S2Y2, 0,l); + // also copy S1 and Y1 + S1.copyFromMatrixBlock(S2Y2, 0, 0); + Y1.copyFromMatrixBlock(S2Y2, 0, l); #endif #ifdef HIOP_DEEPCHECKS nlp_->log->write("sym_mat_times_inverse_times_mattrans: W first term is: ", W, hovMatrices); -#endif - //4. [S2] = V \ [S1^T] - // [Y2] [Y1^T] - //S2Y2 is exactly [S1^T] when Fortran Lapack looks at it - // [Y1^T] - hiopMatrixDense& RHS_fortran = S2Y2; +#endif + // 4. [S2] = V \ [S1^T] + // [Y2] [Y1^T] + // S2Y2 is exactly [S1^T] when Fortran Lapack looks at it + // [Y1^T] + hiopMatrixDense& RHS_fortran = S2Y2; solve_with_V(RHS_fortran); - //5. W = W-alpha*[S1 Y1]*[S2^T] - // [Y2^T] - S2Y2 = RHS_fortran; - alpha = 0-alpha; - hiopMatrixDense& S2=new_kxl_mat1(k,l); + // 5. W = W-alpha*[S1 Y1]*[S2^T] + // [Y2^T] + S2Y2 = RHS_fortran; + alpha = 0 - alpha; + hiopMatrixDense& S2 = new_kxl_mat1(k, l); S2.copyFromMatrixBlock(S2Y2, 0, 0); S1.timesMatTrans_local(1.0, W, alpha, S2); - hiopMatrixDense& Y2=S2; + hiopMatrixDense& Y2 = S2; Y2.copyFromMatrixBlock(S2Y2, 0, l); Y1.timesMatTrans_local(1.0, W, alpha, Y2); - //nlp_->log->write("sym_mat_times_inverse_times_mattrans: Y1 is : ", Y1, hovMatrices); - //nlp_->log->write("sym_mat_times_inverse_times_mattrans: Y2 is : ", Y2, hovMatrices); - //nlp_->log->write("sym_mat_times_inverse_times_mattrans: W is : ", W, hovMatrices); + // nlp_->log->write("sym_mat_times_inverse_times_mattrans: Y1 is : ", Y1, hovMatrices); + // nlp_->log->write("sym_mat_times_inverse_times_mattrans: Y2 is : ", Y2, hovMatrices); + // nlp_->log->write("sym_mat_times_inverse_times_mattrans: W is : ", W, hovMatrices); #ifdef HIOP_DEEPCHECKS nlp_->log->write("sym_mat_times_inverse_times_mattrans: final matrix is : ", W, hovMatrices); -#endif +#endif } void HessianDiagPlusRowRank::factorizeV() { - int N = V_->n(); + int N = V_->n(); int lda = N; int info; - if(N==0) { + if (N == 0) { return; } #ifdef HIOP_DEEPCHECKS - nlp_->log->write("factorizeV: V is ", *V_, hovMatrices); + nlp_->log->write("factorizeV: V is ", *V_, hovMatrices); #endif - char uplo='L'; //V is upper in C++ so it's lower in fortran + char uplo = 'L'; // V is upper in C++ so it's lower in fortran - if(V_ipiv_vec_==nullptr) { + if (V_ipiv_vec_ == nullptr) { V_ipiv_vec_ = new int[N]; } else { - if(V_ipiv_size_!=N) { + if (V_ipiv_size_ != N) { delete[] V_ipiv_vec_; - V_ipiv_vec_ = new int[N]; + V_ipiv_vec_ = new int[N]; V_ipiv_size_ = N; } } - int lwork=-1;//inquire sizes + int lwork = -1; // inquire sizes double Vwork_tmp; DSYTRF(&uplo, &N, V_->local_data(), &lda, V_ipiv_vec_, &Vwork_tmp, &lwork, &info); - assert(info==0); + assert(info == 0); - lwork=(int)Vwork_tmp; - if(lwork != V_work_vec_->get_size()) { - if(V_work_vec_!=nullptr) { + lwork = (int)Vwork_tmp; + if (lwork != V_work_vec_->get_size()) { + if (V_work_vec_ != nullptr) { delete V_work_vec_; } V_work_vec_ = LinearAlgebraFactory::create_vector("DEFAULT", lwork); - } else assert(V_work_vec_); + } + else { + assert(V_work_vec_); + } DSYTRF(&uplo, &N, V_->local_data(), &lda, V_ipiv_vec_, V_work_vec_->local_data(), &lwork, &info); - - if(info<0) { + + if (info < 0) { nlp_->log->printf(hovError, "HessianDiagPlusRowRank::factorizeV error: %d arg to dsytrf has an illegal value\n", -info); - } else if(info>0) { + } + else if (info > 0) { nlp_->log->printf(hovError, - "HessianDiagPlusRowRank::factorizeV error: %d entry in the factorization's diagonal is exactly zero. " - "Division by zero will occur if a solve is attempted.\n", - info); + "HessianDiagPlusRowRank::factorizeV error: %d entry in the factorization's diagonal is exactly zero. " + "Division by zero will occur if a solve is attempted.\n", + info); } - assert(info==0); + assert(info == 0); #ifdef HIOP_DEEPCHECKS nlp_->log->write("factorizeV: factors of V: ", *V_, hovMatrices); #endif - } void HessianDiagPlusRowRank::solve_with_V(hiopVector& rhs_s, hiopVector& rhs_y) { int N = V_->n(); - if(N==0) { + if (N == 0) { return; } @@ -733,53 +744,55 @@ void HessianDiagPlusRowRank::solve_with_V(hiopVector& rhs_s, hiopVector& rhs_y) #ifdef HIOP_DEEPCHECKS nlp_->log->write("HessianDiagPlusRowRank::solve_with_V: RHS IN 's' part: ", rhs_s, hovMatrices); nlp_->log->write("HessianDiagPlusRowRank::solve_with_V: RHS IN 'y' part: ", rhs_y, hovMatrices); - hiopVector* rhs_saved= LinearAlgebraFactory::create_vector("DEFAULT", rhs_s.get_size()+rhs_y.get_size()); + hiopVector* rhs_saved = LinearAlgebraFactory::create_vector("DEFAULT", rhs_s.get_size() + rhs_y.get_size()); rhs_saved->copyFromStarting(0, rhs_s); rhs_saved->copyFromStarting(l, rhs_y); #endif - int lda=N, one=1, info; - char uplo='L'; + int lda = N, one = 1, info; + char uplo = 'L'; #ifdef HIOP_DEEPCHECKS - assert(N==rhs_s.get_size()+rhs_y.get_size()); + assert(N == rhs_s.get_size() + rhs_y.get_size()); #endif - hiopVector& rhs=new_2l_vec1(l); + hiopVector& rhs = new_2l_vec1(l); rhs.copyFromStarting(0, rhs_s); rhs.copyFromStarting(l, rhs_y); DSYTRS(&uplo, &N, &one, V_->local_data(), &lda, V_ipiv_vec_, rhs.local_data(), &N, &info); - if(info<0) { - nlp_->log->printf(hovError, "HessianDiagPlusRowRank::solve_with_V error: %d arg to dsytrf has an illegal value\n", -info); + if (info < 0) { + nlp_->log->printf(hovError, + "HessianDiagPlusRowRank::solve_with_V error: %d arg to dsytrf has an illegal value\n", + -info); } - assert(info==0); + assert(info == 0); - //copy back the solution - rhs.copyToStarting(0,rhs_s); - rhs.copyToStarting(l,rhs_y); + // copy back the solution + rhs.copyToStarting(0, rhs_s); + rhs.copyToStarting(l, rhs_y); #ifdef HIOP_DEEPCHECKS nlp_->log->write("solve_with_V: SOL OUT 's' part: ", rhs_s, hovMatrices); nlp_->log->write("solve_with_V: SOL OUT 'y' part: ", rhs_y, hovMatrices); - //residual calculation - double nrmrhs=rhs_saved->infnorm(); + // residual calculation + double nrmrhs = rhs_saved->infnorm(); Vmat_->timesVec(1.0, *rhs_saved, -1.0, rhs); - double nrmres=rhs_saved->infnorm(); - //nlp_->log->printf(hovLinAlgScalars, "HessianDiagPlusRowRank::solve_with_V 1rhs: rel resid norm=%g\n", nrmres/(1+nrmrhs)); - nlp_->log->printf(hovScalars, "HessianDiagPlusRowRank::solve_with_V 1rhs: rel resid norm=%g\n", nrmres/(1+nrmrhs)); - if(nrmres>1e-8) { + double nrmres = rhs_saved->infnorm(); + // nlp_->log->printf(hovLinAlgScalars, "HessianDiagPlusRowRank::solve_with_V 1rhs: rel resid norm=%g\n", + // nrmres/(1+nrmrhs)); + nlp_->log->printf(hovScalars, "HessianDiagPlusRowRank::solve_with_V 1rhs: rel resid norm=%g\n", nrmres / (1 + nrmrhs)); + if (nrmres > 1e-8) { nlp_->log->printf(hovWarning, "HessianDiagPlusRowRank::solve_with_V large residual=%g\n", nrmres); } delete rhs_saved; #endif - } void HessianDiagPlusRowRank::solve_with_V(hiopMatrixDense& rhs) { int N = V_->n(); - if(0==N) { + if (0 == N) { return; } @@ -788,42 +801,44 @@ void HessianDiagPlusRowRank::solve_with_V(hiopMatrixDense& rhs) hiopMatrixDense* rhs_saved = rhs.new_copy(); #endif - //rhs is transpose in C++ + // rhs is transpose in C++ - char uplo='L'; - int lda=N, ldb=N, nrhs=rhs.m(), info; + char uplo = 'L'; + int lda = N, ldb = N, nrhs = rhs.m(), info; #ifdef HIOP_DEEPCHECKS - assert(N==rhs.n()); + assert(N == rhs.n()); #endif DSYTRS(&uplo, &N, &nrhs, V_->local_data(), &lda, V_ipiv_vec_, rhs.local_data(), &ldb, &info); - if(info<0) { - nlp_->log->printf(hovError, "HessianDiagPlusRowRank::solve_with_V error: %d arg to dsytrf has an illegal value\n", -info); + if (info < 0) { + nlp_->log->printf(hovError, + "HessianDiagPlusRowRank::solve_with_V error: %d arg to dsytrf has an illegal value\n", + -info); } - assert(info==0); + assert(info == 0); #ifdef HIOP_DEEPCHECKS nlp_->log->write("solve_with_V: SOL OUT: ", rhs, hovMatrices); - - hiopMatrixDense& sol = rhs; //matrix of solutions + + hiopMatrixDense& sol = rhs; // matrix of solutions /// TODO: get rid of these uses of specific hiopVector implementation - hiopVector* x = LinearAlgebraFactory::create_vector("DEFAULT", rhs.n()); //again, keep in mind rhs is transposed - hiopVector* r = LinearAlgebraFactory::create_vector("DEFAULT", rhs.n()); + hiopVector* x = LinearAlgebraFactory::create_vector("DEFAULT", rhs.n()); // again, keep in mind rhs is transposed + hiopVector* r = LinearAlgebraFactory::create_vector("DEFAULT", rhs.n()); - double resnorm=0.0; - for(int k=0; kgetRow(k, *r); - sol.getRow(k,*x); - double nrmrhs = r->infnorm();//nrmrhs=.0; + sol.getRow(k, *x); + double nrmrhs = r->infnorm(); // nrmrhs=.0; Vmat_->timesVec(1.0, *r, -1.0, *x); double nrmres = r->infnorm(); - if(nrmres>1e-8) { + if (nrmres > 1e-8) { nlp_->log->printf(hovWarning, "HessianDiagPlusRowRank::solve_with_V mult-rhs: rhs number %d has large resid norm=%g\n", k, nrmres); } - if(nrmres/(nrmrhs+1)>resnorm) { - resnorm=nrmres/(nrmrhs+1); + if (nrmres / (nrmrhs + 1) > resnorm) { + resnorm = nrmres / (nrmrhs + 1); } } nlp_->log->printf(hovLinAlgScalars, "HessianDiagPlusRowRank::solve_with_V mult-rhs: rel resid norm=%g\n", resnorm); @@ -831,38 +846,37 @@ void HessianDiagPlusRowRank::solve_with_V(hiopMatrixDense& rhs) delete r; delete rhs_saved; #endif - } void HessianDiagPlusRowRank::growL(const int& lmem_curr, const int& lmem_max, const hiopVector& YTs) { int l = L_->m(); #ifdef HIOP_DEEPCHECKS - assert(l==L_->n()); - assert(lmem_curr==l); - assert(lmem_max>=l); + assert(l == L_->n()); + assert(lmem_curr == l); + assert(lmem_max >= l); #endif - //newL = [ L 0] - // [ Y^T*s 0] - hiopMatrixDense* newL = LinearAlgebraFactory::create_matrix_dense("DEFAULT", l+1, l+1); + // newL = [ L 0] + // [ Y^T*s 0] + hiopMatrixDense* newL = LinearAlgebraFactory::create_matrix_dense("DEFAULT", l + 1, l + 1); assert(newL); - //copy from L to newL - newL->copyBlockFromMatrix(0,0, *L_); - - double* newL_mat = newL->local_data(); //doing the rest here - const double* YTs_vec = YTs.local_data_const(); - //for(int j=0; jcopyBlockFromMatrix(0, 0, *L_); + + double* newL_mat = newL->local_data(); // doing the rest here + const double* YTs_vec = YTs.local_data_const(); + // for(int j=0; jget_size(); - assert(l==lmem_curr); - assert(lmem_max>=l); + assert(l == lmem_curr); + assert(lmem_max >= l); - hiopVector* Dnew = LinearAlgebraFactory::create_vector("DEFAULT", l+1); - double* Dnew_vec = Dnew->local_data(); - memcpy(Dnew_vec, D_->local_data_const(), l*sizeof(double)); + hiopVector* Dnew = LinearAlgebraFactory::create_vector("DEFAULT", l + 1); + double* Dnew_vec = Dnew->local_data(); + memcpy(Dnew_vec, D_->local_data_const(), l * sizeof(double)); Dnew_vec[l] = sTy; delete D_; @@ -887,80 +901,79 @@ void HessianDiagPlusRowRank::growD(const int& lmem_curr, const int& lmem_max, co */ void HessianDiagPlusRowRank::updateL(const hiopVector& YTs, const double& sTy) { - int l=YTs.get_size(); - assert(l==L_->m()); - assert(l==L_->n()); + int l = YTs.get_size(); + assert(l == L_->m()); + assert(l == L_->n()); #ifdef HIOP_DEEPCHECKS - assert(l_curr_==l); - assert(l_curr_==l_max_); + assert(l_curr_ == l); + assert(l_curr_ == l_max_); #endif - const int lm1=l-1; - double* L_mat=L_->local_data(); - const double* yts_vec=YTs.local_data_const(); - for(int i=1; ilocal_data(); + const double* yts_vec = YTs.local_data_const(); + for (int i = 1; i < lm1; i++) { + for (int j = 0; j < i; j++) { + // L_mat[i][j] = L_mat[i+1][j+1]; + L_mat[i * l + j] = L_mat[(i + 1) * l + j + 1]; } } - - //is this really needed? - //for(int i=0; iget_size(); + int l = D_->get_size(); double* D_vec = D_->local_data(); - for(int i=0; iget_size()==l) { + if (l_vec1_ != nullptr && l_vec1_->get_size() == l) { return *l_vec1_; } - if(l_vec1_!=nullptr) { + if (l_vec1_ != nullptr) { delete l_vec1_; } - l_vec1_= LinearAlgebraFactory::create_vector("DEFAULT", l); + l_vec1_ = LinearAlgebraFactory::create_vector("DEFAULT", l); return *l_vec1_; } -hiopVector& HessianDiagPlusRowRank::new_l_vec2(int l) +hiopVector& HessianDiagPlusRowRank::new_l_vec2(int l) { - if(l_vec2_!=nullptr && l_vec2_->get_size()==l) { + if (l_vec2_ != nullptr && l_vec2_->get_size() == l) { return *l_vec2_; } - if(l_vec2_!=nullptr) { + if (l_vec2_ != nullptr) { delete l_vec2_; } - l_vec2_= LinearAlgebraFactory::create_vector("DEFAULT", l); + l_vec2_ = LinearAlgebraFactory::create_vector("DEFAULT", l); return *l_vec2_; } hiopMatrixDense& HessianDiagPlusRowRank::new_lxl_mat1(int l) { - if(lxl_mat1_!=nullptr) { - if(l==lxl_mat1_->m()) { + if (lxl_mat1_ != nullptr) { + if (l == lxl_mat1_->m()) { return *lxl_mat1_; - } else { - delete lxl_mat1_; - lxl_mat1_=nullptr; + } + else { + delete lxl_mat1_; + lxl_mat1_ = nullptr; } } lxl_mat1_ = LinearAlgebraFactory::create_matrix_dense("DEFAULT", l, l); @@ -969,14 +982,15 @@ hiopMatrixDense& HessianDiagPlusRowRank::new_lxl_mat1(int l) hiopMatrixDense& HessianDiagPlusRowRank::new_kx2l_mat1(int k, int l) { - const int twol=2*l; - if(nullptr!=kx2l_mat1_) { - assert(kx2l_mat1_->m()==k); - if(twol==kx2l_mat1_->n()) { + const int twol = 2 * l; + if (nullptr != kx2l_mat1_) { + assert(kx2l_mat1_->m() == k); + if (twol == kx2l_mat1_->n()) { return *kx2l_mat1_; - } else { - delete kx2l_mat1_; - kx2l_mat1_=nullptr; + } + else { + delete kx2l_mat1_; + kx2l_mat1_ = nullptr; } } kx2l_mat1_ = LinearAlgebraFactory::create_matrix_dense("DEFAULT", k, twol); @@ -985,13 +999,14 @@ hiopMatrixDense& HessianDiagPlusRowRank::new_kx2l_mat1(int k, int l) hiopMatrixDense& HessianDiagPlusRowRank::new_kxl_mat1(int k, int l) { - if(kxl_mat1_!=nullptr) { - assert(kxl_mat1_->m()==k); - if( l==kxl_mat1_->n() ) { + if (kxl_mat1_ != nullptr) { + assert(kxl_mat1_->m() == k); + if (l == kxl_mat1_->n()) { return *kxl_mat1_; - } else { - delete kxl_mat1_; - kxl_mat1_=nullptr; + } + else { + delete kxl_mat1_; + kxl_mat1_ = nullptr; } } kxl_mat1_ = LinearAlgebraFactory::create_matrix_dense("DEFAULT", k, l); @@ -1000,74 +1015,76 @@ hiopMatrixDense& HessianDiagPlusRowRank::new_kxl_mat1(int k, int l) hiopMatrixDense& HessianDiagPlusRowRank::new_S1(const hiopMatrixDense& X, const hiopMatrixDense& St) { - //S1 is X*some_diag*S (kxl). Here St=S^T is lxn and X is kxn (l BFGS memory size, k number of constraints) + // S1 is X*some_diag*S (kxl). Here St=S^T is lxn and X is kxn (l BFGS memory size, k number of constraints) size_type k = X.m(); size_type l = St.m(); #ifdef HIOP_DEEPCHECKS - assert(St.n()==X.n()); - if(S1_!=nullptr) { - assert(S1_->m()==k); + assert(St.n() == X.n()); + if (S1_ != nullptr) { + assert(S1_->m() == k); } #endif - if(nullptr!=S1_ && S1_->n()!=l) { + if (nullptr != S1_ && S1_->n() != l) { delete S1_; - S1_=nullptr; + S1_ = nullptr; } - if(nullptr==S1_) { - S1_=LinearAlgebraFactory::create_matrix_dense("DEFAULT", k, l); + if (nullptr == S1_) { + S1_ = LinearAlgebraFactory::create_matrix_dense("DEFAULT", k, l); } return *S1_; } hiopMatrixDense& HessianDiagPlusRowRank::new_Y1(const hiopMatrixDense& X, const hiopMatrixDense& Yt) { - //Y1 is X*somediag*Y (kxl). Here Yt=Y^T is lxn, X is kxn + // Y1 is X*somediag*Y (kxl). Here Yt=Y^T is lxn, X is kxn size_type k = X.m(); size_type l = Yt.m(); #ifdef HIOP_DEEPCHECKS - assert(X.n()==Yt.n()); - if(Y1_!=nullptr) { - assert(Y1_->m()==k); + assert(X.n() == Yt.n()); + if (Y1_ != nullptr) { + assert(Y1_->m() == k); } #endif - if(nullptr!=Y1_ && Y1_->n()!=l) { + if (nullptr != Y1_ && Y1_->n() != l) { delete Y1_; Y1_ = nullptr; } - if(nullptr==Y1_) { + if (nullptr == Y1_) { Y1_ = LinearAlgebraFactory::create_matrix_dense("DEFAULT", k, l); } return *Y1_; } #ifdef HIOP_DEEPCHECKS -void HessianDiagPlusRowRank::times_vec_no_logbar_term(double beta, hiopVector& y, double alpha, const hiopVector&x) +void HessianDiagPlusRowRank::times_vec_no_logbar_term(double beta, hiopVector& y, double alpha, const hiopVector& x) { this->times_vec_common(beta, y, alpha, x, false); } -#endif //HIOP_DEEPCHECKS +#endif // HIOP_DEEPCHECKS - -void HessianDiagPlusRowRank:: -times_vec_common(double beta, hiopVector& y, double alpha, const hiopVector& x, bool addLogTerm) const +void HessianDiagPlusRowRank::times_vec_common(double beta, + hiopVector& y, + double alpha, + const hiopVector& x, + bool addLogTerm) const { - size_type n=St_->n(); - assert(l_curr_==St_->m()); - assert(y.get_size()==n); + size_type n = St_->n(); + assert(l_curr_ == St_->m()); + assert(y.get_size() == n); assert(St_->get_local_size_n() == Yt_->get_local_size_n()); - //we have B+=B-B*s*B*s'/(s'*B*s)+yy'/(y'*s) - //B0 is sigma*I. There is an additional diagonal log-barrier term Dx_ + // we have B+=B-B*s*B*s'/(s'*B*s)+yy'/(y'*s) + // B0 is sigma*I. There is an additional diagonal log-barrier term Dx_ - bool print=false; - if(print) { + bool print = false; + if (print) { nlp_->log->printf(hovMatrices, "---HessianDiagPlusRowRank::times_vec \n"); nlp_->log->write("S=", *St_, hovMatrices); nlp_->log->write("Y=", *Yt_, hovMatrices); nlp_->log->write("DhInv=", *DhInv_, hovMatrices); nlp_->log->printf(hovMatrices, "sigma=%22.16e; addLogTerm=%d;\n", sigma_, addLogTerm); - if(addLogTerm) { + if (addLogTerm) { nlp_->log->write("Dx=", *Dx_, hovMatrices); } nlp_->log->printf(hovMatrices, "y=beta*y + alpha*this*x : beta=%g alpha=%g\n", beta, alpha); @@ -1075,18 +1092,18 @@ times_vec_common(double beta, hiopVector& y, double alpha, const hiopVector& x, nlp_->log->write("y_in=", y, hovMatrices); } - //allocate and compute a_k and b_k + // allocate and compute a_k and b_k //! make sure the pointers within these std::vectors are deallocated a.resize(l_curr_, nullptr); b.resize(l_curr_, nullptr); int n_local = Yt_->get_local_size_n(); - for(int k=0; kcopyFrom(Yt_->local_data() + k*n_local); - sk->copyFrom(St_->local_data() + k*n_local); - double skTyk=yk->dotProductWith(*sk); - - if(skTyk < std::numeric_limits::epsilon()) { + for (int k = 0; k < l_curr_; k++) { + // bk=yk/sqrt(yk'*sk) + yk->copyFrom(Yt_->local_data() + k * n_local); + sk->copyFrom(St_->local_data() + k * n_local); + double skTyk = yk->dotProductWith(*sk); + + if (skTyk < std::numeric_limits::epsilon()) { nlp_->log->printf(hovLinAlgScalars, "HessianDiagPlusRowRank: ||s_k^T*y_k||=%12.6e too small and was set it to mach eps = %12.6e \n", skTyk, @@ -1094,57 +1111,57 @@ times_vec_common(double beta, hiopVector& y, double alpha, const hiopVector& x, skTyk = std::numeric_limits::epsilon(); } - if(a[k] == nullptr && b[k] == nullptr) { + if (a[k] == nullptr && b[k] == nullptr) { b[k] = nlp_->alloc_primal_vec(); a[k] = nlp_->alloc_primal_vec(); } - + b[k]->copyFrom(*yk); - b[k]->scale(1/sqrt(skTyk)); + b[k]->scale(1 / sqrt(skTyk)); - //compute ak by an inner loop + // compute ak by an inner loop a[k]->copyFrom(*sk); a[k]->scale(sigma_); - for(int i=0; idotProductWith(*sk); a[k]->axpy(+biTsk, *b[i]); double aiTsk = a[i]->dotProductWith(*sk); a[k]->axpy(-aiTsk, *a[i]); } double skTak = a[k]->dotProductWith(*sk); - a[k]->scale(1/sqrt(skTak)); + a[k]->scale(1 / sqrt(skTak)); } - //now we have B= B_0 + sum{ bk bk' - ak ak' : k=0,1,...,l_curr-1} - //compute the product with x - //y = beta*y+alpha*(B0+Dx)*x + alpha* sum { bk'x bk - ak'x ak : k=0,1,...,l_curr-1} + // now we have B= B_0 + sum{ bk bk' - ak ak' : k=0,1,...,l_curr-1} + // compute the product with x + // y = beta*y+alpha*(B0+Dx)*x + alpha* sum { bk'x bk - ak'x ak : k=0,1,...,l_curr-1} y.scale(beta); - if(addLogTerm) + if (addLogTerm) { y.axzpy(alpha, x, *Dx_); + } - y.axpy(alpha*sigma_, x); + y.axpy(alpha * sigma_, x); - for(int k=0; kdotProductWith(x); double akTx = a[k]->dotProductWith(x); - - y.axpy( alpha*bkTx, *b[k]); - y.axpy(-alpha*akTx, *a[k]); + + y.axpy(alpha * bkTx, *b[k]); + y.axpy(-alpha * akTx, *a[k]); } - if(print) { + if (print) { nlp_->log->write("y_out=", y, hovMatrices); } - } -void HessianDiagPlusRowRank::times_vec(double beta, hiopVector& y, double alpha, const hiopVector&x) +void HessianDiagPlusRowRank::times_vec(double beta, hiopVector& y, double alpha, const hiopVector& x) { this->times_vec_common(beta, y, alpha, x); } -void HessianDiagPlusRowRank::timesVec(double beta, hiopVector& y, double alpha, const hiopVector&x) const +void HessianDiagPlusRowRank::timesVec(double beta, hiopVector& y, double alpha, const hiopVector& x) const { this->times_vec_common(beta, y, alpha, x); } @@ -1153,87 +1170,89 @@ void HessianDiagPlusRowRank::timesVec(double beta, hiopVector& y, double alpha, * Internal helpers *************************************************************************/ -/* symmetric multiplication W = beta*W + alpha*X*Diag*X^T +/* symmetric multiplication W = beta*W + alpha*X*Diag*X^T * W is kxk local, X is kxn distributed and Diag is n, distributed * The ops are perform locally. The reduce is done separately/externally to decrease comm */ -void HessianDiagPlusRowRank::sym_mat_times_diag_times_mattrans_local(double beta, - hiopMatrixDense& W, - double alpha, +void HessianDiagPlusRowRank::sym_mat_times_diag_times_mattrans_local(double beta, + hiopMatrixDense& W, + double alpha, const hiopMatrixDense& X, - const hiopVector& d) + const hiopVector& d) { - size_type k=W.m(); - size_type n_local=X.get_local_size_n(); + size_type k = W.m(); + size_type n_local = X.get_local_size_n(); + + assert(X.m() == k); - assert(X.m()==k); - #ifdef HIOP_DEEPCHECKS - assert(W.n()==k); - assert(d.get_size()==X.n()); - assert(d.get_local_size()==n_local); + assert(W.n() == k); + assert(d.get_size() == X.n()); + assert(d.get_local_size() == n_local); #endif - - //#define chunk 512; //!opt + + // #define chunk 512; //!opt const double *xi, *xj; - double acc; - double *Wdata=W.local_data(); - const double *Xdata=X.local_data_const(); - const double* dd=d.local_data_const(); - for(int i=0; i>k and l=O(10). - * - * This class provides functionality to KKT linear system class for updating the secant approximation + * + * This class provides functionality to KKT linear system class for updating the secant approximation * and solving with Hk=Dk+Bk. - * - * Solving with Hk is performed by + * + * Solving with Hk is performed by * 1. computing the inverse as * Hk{-1} = (Dk+B0)^{-1} - (Dk+B0)^{-1}*[B0*Sk Yk]*( -N + [Sk'*B0]*(Dk+B0)^{-1}*[B0*Sk Yk] )^{-1} *[Sk'*B0]*(Dk+B0)^{-1} * ( [Yk' ] ) [Yk' ] - * 2. multiplying with the above expression. The inner 2lx2l inverse matrix is not explicitly computed; instead + * 2. multiplying with the above expression. The inner 2lx2l inverse matrix is not explicitly computed; instead * V=(-N + [Sk'*B0]*(Dk+B0)^{-1}*[B0*Sk Yk] ) is stored, factorized, and solved with. - * ( [Yk' ] [Yk' ] ) + * ( [Yk' ] [Yk' ] ) * * Notation used in the implementation provided by this class * - DhInv:=(Dk+B0)^{-1}, thus Hk{-1}=DhInv-DhInv*U*V^{-1}*U'*DhInv with * - U:=[B0*St' Yt'] and V is defined above - * - - * - * Parallel computations: Dk, B0 are distributed vectors, M is distributed + * - + * + * Parallel computations: Dk, B0 are distributed vectors, M is distributed * column-wise, and N is local (stored on all processors). */ class HessianDiagPlusRowRank : public hiopMatrix @@ -104,195 +104,201 @@ class HessianDiagPlusRowRank : public hiopMatrix /// Updates Hessian if hereditary positive definitness is maintained and returns true, otherwise false. virtual bool update(const hiopIterate& x_curr, - const hiopVector& grad_f_curr, - const hiopMatrix& Jac_c_curr, - const hiopMatrix& Jac_d_curr); + const hiopVector& grad_f_curr, + const hiopMatrix& Jac_c_curr, + const hiopMatrix& Jac_d_curr); /* updates the logBar diagonal term from the representation */ virtual bool update_logbar_diag(const hiopVector& Dx); /* solves this*x=res */ virtual void solve(const hiopVector& rhs, hiopVector& x); - + /* W = beta*W + alpha*X*inverse(this)*X^T (a more efficient version of solve) * This is performed as W = beta*W + alpha*X*(this\X^T) - */ + */ virtual void sym_mat_times_inverse_times_mattrans(double beta, hiopMatrixDense& W, double alpha, const hiopMatrixDense& X); #ifdef HIOP_DEEPCHECKS /* same as above but without the Dx term in H */ - virtual void times_vec_no_logbar_term(double beta, hiopVector& y, double alpha, const hiopVector&x); + virtual void times_vec_no_logbar_term(double beta, hiopVector& y, double alpha, const hiopVector& x); virtual void print(FILE* f, hiopOutVerbosity v, const char* msg) const; #endif /* Computes the product of the Hessian with a vector: y=beta*y+alpha*H*x. - * The function is supposed to use the underlying ***recursive*** definition of the + * The function is supposed to use the underlying ***recursive*** definition of the * quasi-Newton Hessian and is used for checking/testing/error calculation. */ - virtual void times_vec(double beta, hiopVector& y, double alpha, const hiopVector&x); + virtual void times_vec(double beta, hiopVector& y, double alpha, const hiopVector& x); /* code shared by the above two methods*/ - virtual void times_vec_common(double beta, hiopVector& y, double alpha, const hiopVector&x, bool add_logbar = false) const; + virtual void times_vec_common(double beta, + hiopVector& y, + double alpha, + const hiopVector& x, + bool add_logbar = false) const; protected: - friend class hiopAlgFilterIPMQuasiNewton; - int l_max_; //max memory size - int l_curr_; //number of pairs currently stored - double sigma_; //initial scaling factor of identity - double sigma0_; //default scaling factor of identity - - //Integer for the sigma update strategy - int sigma_update_strategy_; - //Min safety thresholds for sigma - double sigma_safe_min_; - //Max safety thresholds for sigma - double sigma_safe_max_; - //Pointer to the NLP formulation - hiopNlpDenseConstraints* nlp_; - + friend class hiopAlgFilterIPMQuasiNewton; + int l_max_; // max memory size + int l_curr_; // number of pairs currently stored + double sigma_; // initial scaling factor of identity + double sigma0_; // default scaling factor of identity + + // Integer for the sigma update strategy + int sigma_update_strategy_; + // Min safety thresholds for sigma + double sigma_safe_min_; + // Max safety thresholds for sigma + double sigma_safe_max_; + // Pointer to the NLP formulation + hiopNlpDenseConstraints* nlp_; + mutable std::vector a; mutable std::vector b; - hiopVector* yk; - hiopVector* sk; + hiopVector* yk; + hiopVector* sk; + private: // Vector for (B0+Dk)^{-1} - hiopVector* DhInv_; + hiopVector* DhInv_; // Dx_ is needed in times_vec (for residual checking in solveCompressed). Can be recomputed from DhInv, but I decided to - //store it instead to avoid round-off errors - hiopVector* Dx_; - - bool matrix_changed_; - - //These are matrices from the compact representation; they are updated at each iteration. - //More exactly Bk=B0-[B0*St' Yt']*[St*B0*St' L]*[St*B0] - // [ L' -D] [Yt ] - //Transpose of S and T are store to easily access columns + // store it instead to avoid round-off errors + hiopVector* Dx_; + + bool matrix_changed_; + + // These are matrices from the compact representation; they are updated at each iteration. + // More exactly Bk=B0-[B0*St' Yt']*[St*B0*St' L]*[St*B0] + // [ L' -D] [Yt ] + // Transpose of S and T are store to easily access columns hiopMatrixDense* St_; hiopMatrixDense* Yt_; /// Lower triangular matrix from the compact representation hiopMatrixDense* L_; /// Diagonal matrix from the compact representation - hiopVector* D_; + hiopVector* D_; // Matrix V from the representation of the inverse - hiopMatrixDense* V_; + hiopMatrixDense* V_; #ifdef HIOP_DEEPCHECKS - //copy of the V matrix - needed to check the residual - hiopMatrixDense* Vmat_; + // copy of the V matrix - needed to check the residual + hiopMatrixDense* Vmat_; #endif - void growL(const int& lmem_curr, const int& lmem_max, const hiopVector& YTs); - void growD(const int& l_curr, const int& l_max, const double& sTy); - void updateL(const hiopVector& STy, const double& sTy); - void updateD(const double& sTy); - //also stored are the iterate, gradient obj, and Jacobians at the previous optimization iteration - hiopIterate *it_prev_; - hiopVector *grad_f_prev_; - hiopMatrixDense *Jac_c_prev_; - hiopMatrixDense *Jac_d_prev_; - - //internal helpers - void updateInternalBFGSRepresentation(); - - //internals buffers, mostly for MPIAll_reduce - double* buff_kxk_; // size = num_constraints^2 - double* buff_2lxk_; // size = 2 x q-Newton mem size x num_constraints - double* buff1_lxlx3_; - double* buff2_lxlx3_; - + void growL(const int& lmem_curr, const int& lmem_max, const hiopVector& YTs); + void growD(const int& l_curr, const int& l_max, const double& sTy); + void updateL(const hiopVector& STy, const double& sTy); + void updateD(const double& sTy); + // also stored are the iterate, gradient obj, and Jacobians at the previous optimization iteration + hiopIterate* it_prev_; + hiopVector* grad_f_prev_; + hiopMatrixDense* Jac_c_prev_; + hiopMatrixDense* Jac_d_prev_; + + // internal helpers + void updateInternalBFGSRepresentation(); + + // internals buffers, mostly for MPIAll_reduce + double* buff_kxk_; // size = num_constraints^2 + double* buff_2lxk_; // size = 2 x q-Newton mem size x num_constraints + double* buff1_lxlx3_; + double* buff2_lxlx3_; + // auxiliary objects preallocated and used in internally in various computation blocks /// See new_S1 - hiopMatrixDense* S1_; + hiopMatrixDense* S1_; /// See new_Y1 - hiopMatrixDense* Y1_; - - hiopMatrixDense* lxl_mat1_; - hiopMatrixDense* kx2l_mat1_; - hiopMatrixDense* kxl_mat1_; - + hiopMatrixDense* Y1_; + + hiopMatrixDense* lxl_mat1_; + hiopMatrixDense* kx2l_mat1_; + hiopMatrixDense* kxl_mat1_; + /** - * (Re)Allocates S1_ of size kxl to store is X*D*S, where D is a diagonal matrix. S comes in - * as St=S^T (lxn) and X comes in as kxn, where l is the BFGS memory size and k number of - * constraints. S1_ is allocated only if not already allocated or realocated only if it does + * (Re)Allocates S1_ of size kxl to store is X*D*S, where D is a diagonal matrix. S comes in + * as St=S^T (lxn) and X comes in as kxn, where l is the BFGS memory size and k number of + * constraints. S1_ is allocated only if not already allocated or realocated only if it does * not have the right dimesions to store X*D*S. */ - hiopMatrixDense& new_S1(const hiopMatrixDense& X, const hiopMatrixDense& St); + hiopMatrixDense& new_S1(const hiopMatrixDense& X, const hiopMatrixDense& St); /** - * (Re)Allocates Y1_ of size kxl to store is X*D*Y, where D is a diagonal matrix. Y comes in - * as Yt=Y^T (lxn) and X comes in as kxn, where l is the BFGS memory size and k number of - * constraints. Y1_ is allocated only if not already allocated or reallocated only if it does + * (Re)Allocates Y1_ of size kxl to store is X*D*Y, where D is a diagonal matrix. Y comes in + * as Yt=Y^T (lxn) and X comes in as kxn, where l is the BFGS memory size and k number of + * constraints. Y1_ is allocated only if not already allocated or reallocated only if it does * not have the right dimesions to store X*D*Y. */ - hiopMatrixDense& new_Y1(const hiopMatrixDense& X, const hiopMatrixDense& Yt); - - hiopMatrixDense& new_lxl_mat1 (int l); - hiopMatrixDense& new_kxl_mat1 (int k, int l); - hiopMatrixDense& new_kx2l_mat1(int k, int l); - - hiopVector* l_vec1_; - hiopVector* l_vec2_; - hiopVector* n_vec1_; - hiopVector* n_vec2_; - hiopVector* twol_vec1_; - hiopVector& new_l_vec1(int l); - hiopVector& new_l_vec2(int l); + hiopMatrixDense& new_Y1(const hiopMatrixDense& X, const hiopMatrixDense& Yt); + + hiopMatrixDense& new_lxl_mat1(int l); + hiopMatrixDense& new_kxl_mat1(int k, int l); + hiopMatrixDense& new_kx2l_mat1(int k, int l); + + hiopVector* l_vec1_; + hiopVector* l_vec2_; + hiopVector* n_vec1_; + hiopVector* n_vec2_; + hiopVector* twol_vec1_; + hiopVector& new_l_vec1(int l); + hiopVector& new_l_vec2(int l); inline hiopVector& new_n_vec1(size_type n) { #ifdef HIOP_DEEPCHECKS - assert(n_vec1_!=nullptr); - assert(n_vec1_->get_size()==n); + assert(n_vec1_ != nullptr); + assert(n_vec1_->get_size() == n); #endif return *n_vec1_; } inline hiopVector& new_n_vec2(size_type n) { #ifdef HIOP_DEEPCHECKS - assert(n_vec2_!=nullptr); - assert(n_vec2_->get_size()==n); + assert(n_vec2_ != nullptr); + assert(n_vec2_->get_size() == n); #endif return *n_vec2_; } inline hiopVector& new_2l_vec1(int l) { - if(twol_vec1_!=nullptr && twol_vec1_->get_size()==2*l) { + if (twol_vec1_ != nullptr && twol_vec1_->get_size() == 2 * l) { return *twol_vec1_; } - if(twol_vec1_!=nullptr) - { + if (twol_vec1_ != nullptr) { delete twol_vec1_; } - twol_vec1_=LinearAlgebraFactory::create_vector(nlp_->options->GetString("mem_space"), 2*l); + twol_vec1_ = LinearAlgebraFactory::create_vector(nlp_->options->GetString("mem_space"), 2 * l); return *twol_vec1_; } + private: - //utilities - - /// @brief Ensures the internal containers are ready to work with "limited memory" mem_length - void alloc_for_limited_mem(const size_type& mem_length); + // utilities + + /// @brief Ensures the internal containers are ready to work with "limited memory" mem_length + void alloc_for_limited_mem(const size_type& mem_length); /* symmetric multiplication W = beta*W + alpha*X*Diag*X^T */ - static void sym_mat_times_diag_times_mattrans_local(double beta, - hiopMatrixDense& W_, - double alpha, + static void sym_mat_times_diag_times_mattrans_local(double beta, + hiopMatrixDense& W_, + double alpha, const hiopMatrixDense& X_, - const hiopVector& d); + const hiopVector& d); /* W=S*Diag*X^T */ - static void mat_times_diag_times_mattrans_local(hiopMatrixDense& W, - const hiopMatrixDense& S, - const hiopVector& d, + static void mat_times_diag_times_mattrans_local(hiopMatrixDense& W, + const hiopMatrixDense& S, + const hiopVector& d, const hiopMatrixDense& X); /* members and utilities related to V matrix: factorization and solve */ hiopVector* V_work_vec_; - int V_ipiv_size_; - int* V_ipiv_vec_; - - void factorizeV(); - void solve_with_V(hiopVector& rhs_s, hiopVector& rhs_y); - void solve_with_V(hiopMatrixDense& rhs); + int V_ipiv_size_; + int* V_ipiv_vec_; + + void factorizeV(); + void solve_with_V(hiopVector& rhs_s, hiopVector& rhs_y); + void solve_with_V(hiopMatrixDense& rhs); + private: HessianDiagPlusRowRank() {}; HessianDiagPlusRowRank(const HessianDiagPlusRowRank&) {}; - HessianDiagPlusRowRank& operator=(const HessianDiagPlusRowRank&) {return *this;}; + HessianDiagPlusRowRank& operator=(const HessianDiagPlusRowRank&) { return *this; }; /* methods that need to be implemented as the class inherits from hiopMatrix*/ public: @@ -307,24 +313,18 @@ class HessianDiagPlusRowRank : public hiopMatrix return nullptr; } - virtual void setToZero() - { - assert(false && "not provided because it is not needed"); - } - virtual void setToConstant(double c) - { - assert(false && "not provided because it is not needed"); - } + virtual void setToZero() { assert(false && "not provided because it is not needed"); } + virtual void setToConstant(double c) { assert(false && "not provided because it is not needed"); } - void timesVec(double beta, hiopVector& y, double alpha, const hiopVector&x) const; + void timesVec(double beta, hiopVector& y, double alpha, const hiopVector& x) const; /** y = beta * y + alpha * this^T * x */ - virtual void transTimesVec(double beta, hiopVector& y, double alpha, const hiopVector& x ) const + virtual void transTimesVec(double beta, hiopVector& y, double alpha, const hiopVector& x) const { assert(false && "not provided because it is not needed"); } - /* W = beta*W + alpha*this*X */ + /* W = beta*W + alpha*this*X */ virtual void timesMat(double beta, hiopMatrix& W, double alpha, const hiopMatrix& X) const { assert(false && "not provided because it is not needed"); @@ -343,37 +343,31 @@ class HessianDiagPlusRowRank : public hiopMatrix { assert(false && "not provided because it is not needed"); } - virtual void addDiagonal(const double& value) - { - assert(false && "not provided because it is not needed"); - } + virtual void addDiagonal(const double& value) { assert(false && "not provided because it is not needed"); } virtual void addSubDiagonal(const double& alpha, index_type start, const hiopVector& d_) { assert(false && "not provided because it is not needed"); } /* add to the diagonal of 'this' (destination) starting at 'start_on_dest_diag' elements of - * 'd_' (source) starting at index 'start_on_src_vec'. The number of elements added is 'num_elems' + * 'd_' (source) starting at index 'start_on_src_vec'. The number of elements added is 'num_elems' * when num_elems>=0, or the remaining elems on 'd_' starting at 'start_on_src_vec'. */ - virtual void addSubDiagonal(int start_on_dest_diag, - const double& alpha, - const hiopVector& d_, - int start_on_src_vec, - int num_elems=-1) + virtual void addSubDiagonal(int start_on_dest_diag, + const double& alpha, + const hiopVector& d_, + int start_on_src_vec, + int num_elems = -1) { assert(false && "not needed / implemented"); } - virtual void addSubDiagonal(int start_on_dest_diag, int num_elems, const double& c) + virtual void addSubDiagonal(int start_on_dest_diag, int num_elems, const double& c) { assert(false && "not needed / implemented"); } - + /* this += alpha*X */ - virtual void addMatrix(double alpah, const hiopMatrix& X) - { - assert(false && "not provided because it is not needed"); - } + virtual void addMatrix(double alpah, const hiopMatrix& X) { assert(false && "not provided because it is not needed"); } - void addToSymDenseMatrixUpperTriangle(int row_start, int col_start, double alpha, hiopMatrixDense& W) const + void addToSymDenseMatrixUpperTriangle(int row_start, int col_start, double alpha, hiopMatrixDense& W) const { assert(false && "not needed; should not be used"); } @@ -391,12 +385,9 @@ class HessianDiagPlusRowRank : public hiopMatrix return 0.; } - virtual void row_max_abs_value(hiopVector &ret_vec) - { - assert(false && "not provided because it is not needed"); - } - - virtual void scale_row(hiopVector &vec_scal, const bool inv_scale) + virtual void row_max_abs_value(hiopVector& ret_vec) { assert(false && "not provided because it is not needed"); } + + virtual void scale_row(hiopVector& vec_scal, const bool inv_scale) { assert(false && "not provided because it is not needed"); } @@ -405,22 +396,22 @@ class HessianDiagPlusRowRank : public hiopMatrix { assert(false && "not needed / should not be used"); } - + /* return false is any of the entry is a nan, inf, or denormalized */ virtual bool isfinite() const { assert(false && "not provided because it is not needed"); return false; } - + /* call with -1 to print all rows, all columns, or on all ranks; otherwise will - * will print the first rows and/or columns on the specified rank. - * - * If the underlying matrix is sparse, maxCols is ignored and a max number elements - * given by the value of 'maxRows' will be printed. If this value is negative, all - * elements will be printed. - */ - virtual void print(FILE* f=nullptr, const char* msg=nullptr, int maxRows=-1, int maxCols=-1, int rank=-1) const + * will print the first rows and/or columns on the specified rank. + * + * If the underlying matrix is sparse, maxCols is ignored and a max number elements + * given by the value of 'maxRows' will be printed. If this value is negative, all + * elements will be printed. + */ + virtual void print(FILE* f = nullptr, const char* msg = nullptr, int maxRows = -1, int maxCols = -1, int rank = -1) const { assert(false && "not provided because it is not needed"); } @@ -439,9 +430,9 @@ class HessianDiagPlusRowRank : public hiopMatrix } #ifdef HIOP_DEEPCHECKS /* check symmetry */ - virtual bool assertSymmetry(double tol=1e-16) const { return true; } + virtual bool assertSymmetry(double tol = 1e-16) const { return true; } #endif }; -} //~namespace +} // namespace hiop #endif From 28893d61172cd0397ae1af4a17a499ba419f9ddb Mon Sep 17 00:00:00 2001 From: nychiang Date: Thu, 21 Nov 2024 17:41:03 -0800 Subject: [PATCH 2/3] test 2 --- src/Drivers/Sparse/NlpSparseEx1.cpp | 124 +++---- src/Drivers/Sparse/NlpSparseEx1.hpp | 60 ++-- src/Optimization/HessianDiagPlusRowRank.cpp | 342 ++++++++++---------- src/Optimization/HessianDiagPlusRowRank.hpp | 146 ++++----- 4 files changed, 328 insertions(+), 344 deletions(-) diff --git a/src/Drivers/Sparse/NlpSparseEx1.cpp b/src/Drivers/Sparse/NlpSparseEx1.cpp index f3d95e1b0..a1e438df4 100644 --- a/src/Drivers/Sparse/NlpSparseEx1.cpp +++ b/src/Drivers/Sparse/NlpSparseEx1.cpp @@ -22,7 +22,7 @@ SparseEx1::SparseEx1(int n, double scal_input) scal{scal_input} { assert(n >= 3); - if (n > 3) { + if(n > 3) { n_cons += n - 3; } } @@ -39,20 +39,20 @@ bool SparseEx1::get_prob_sizes(size_type& n, size_type& m) bool SparseEx1::get_vars_info(const size_type& n, double* xlow, double* xupp, NonlinearityType* type) { assert(n == n_vars); - for (index_type i = 0; i < n; i++) { - if (i == 0) { + for(index_type i = 0; i < n; i++) { + if(i == 0) { xlow[i] = -1e20; xupp[i] = 1e20; type[i] = hiopNonlinear; continue; } - if (i == 1) { + if(i == 1) { xlow[i] = 0.0; xupp[i] = 1e20; type[i] = hiopNonlinear; continue; } - if (i == 2) { + if(i == 2) { xlow[i] = 1.5; xupp[i] = 10.0; type[i] = hiopNonlinear; @@ -70,15 +70,15 @@ bool SparseEx1::get_cons_info(const size_type& m, double* clow, double* cupp, No { assert(m == n_cons); index_type conidx{0}; - clow[conidx] = scal * 10.0; - cupp[conidx] = scal * 10.0; + clow[conidx] = scal * 10.0; + cupp[conidx] = scal * 10.0; type[conidx++] = hiopInterfaceBase::hiopLinear; - clow[conidx] = scal * 5.0; - cupp[conidx] = 1e20; + clow[conidx] = scal * 5.0; + cupp[conidx] = 1e20; type[conidx++] = hiopInterfaceBase::hiopLinear; - for (index_type i = 3; i < n_vars; i++) { - clow[conidx] = scal * 1.0; - cupp[conidx] = scal * 2 * n_vars; + for(index_type i = 3; i < n_vars; i++) { + clow[conidx] = scal * 1.0; + cupp[conidx] = scal * 2 * n_vars; type[conidx++] = hiopInterfaceBase::hiopLinear; } return true; @@ -91,8 +91,8 @@ bool SparseEx1::get_sparse_blocks_info(size_type& nx, { nx = n_vars; ; - nnz_sparse_Jaceq = 2; - nnz_sparse_Jacineq = 2 + 2 * (n_vars - 3); + nnz_sparse_Jaceq = 2; + nnz_sparse_Jacineq = 2 + 2 * (n_vars - 3); nnz_sparse_Hess_Lagr = n_vars; return true; } @@ -101,7 +101,7 @@ bool SparseEx1::eval_f(const size_type& n, const double* x, bool new_x, double& { assert(n == n_vars); obj_value = 0.; - for (auto i = 0; i < n; i++) { + for(auto i = 0; i < n; i++) { obj_value += scal * 0.25 * pow(x[i] - 1., 4); } @@ -111,19 +111,19 @@ bool SparseEx1::eval_f(const size_type& n, const double* x, bool new_x, double& bool SparseEx1::eval_grad_f(const size_type& n, const double* x, bool new_x, double* gradf) { assert(n == n_vars); - for (auto i = 0; i < n; i++) { + for(auto i = 0; i < n; i++) { gradf[i] = scal * pow(x[i] - 1., 3); } return true; } -bool SparseEx1::eval_cons(const size_type& n, - const size_type& m, - const size_type& num_cons, +bool SparseEx1::eval_cons(const size_type& n, + const size_type& m, + const size_type& num_cons, const index_type* idx_cons, - const double* x, - bool new_x, - double* cons) + const double* x, + bool new_x, + double* cons) { return false; } @@ -136,7 +136,7 @@ bool SparseEx1::eval_cons(const size_type& n, const size_type& m, const double* assert(n_cons == 2 + n - 3); // local contributions to the constraints in cons are reset - for (auto j = 0; j < m; j++) { + for(auto j = 0; j < m; j++) { cons[j] = 0.; } @@ -149,35 +149,35 @@ bool SparseEx1::eval_cons(const size_type& n, const size_type& m, const double* cons[conidx++] += scal * (2 * x[0] + 1 * x[2]); // --- constraint 3 body ---> 2*x_1 + 0.5*x_i, for i>=4 - for (auto i = 3; i < n; i++) { + for(auto i = 3; i < n; i++) { cons[conidx++] += scal * (2 * x[0] + 0.5 * x[i]); } return true; } -bool SparseEx1::eval_Jac_cons(const size_type& n, - const size_type& m, - const size_type& num_cons, +bool SparseEx1::eval_Jac_cons(const size_type& n, + const size_type& m, + const size_type& num_cons, const index_type* idx_cons, - const double* x, - bool new_x, - const size_type& nnzJacS, - index_type* iJacS, - index_type* jJacS, - double* MJacS) + const double* x, + bool new_x, + const size_type& nnzJacS, + index_type* iJacS, + index_type* jJacS, + double* MJacS) { return false; } bool SparseEx1::eval_Jac_cons(const size_type& n, const size_type& m, - const double* x, - bool new_x, + const double* x, + bool new_x, const size_type& nnzJacS, - index_type* iJacS, - index_type* jJacS, - double* MJacS) + index_type* iJacS, + index_type* jJacS, + double* MJacS) { assert(n == n_vars); assert(m == n_cons); @@ -185,29 +185,29 @@ bool SparseEx1::eval_Jac_cons(const size_type& n, assert(nnzJacS == 4 + 2 * (n - 3)); - int nnzit{0}; + int nnzit{0}; index_type conidx{0}; - if (iJacS != NULL && jJacS != NULL) { + if(iJacS != NULL && jJacS != NULL) { // --- constraint 1 body ---> 4*x_1 + 2*x_2 == 10 - iJacS[nnzit] = conidx; + iJacS[nnzit] = conidx; jJacS[nnzit++] = 0; - iJacS[nnzit] = conidx; + iJacS[nnzit] = conidx; jJacS[nnzit++] = 1; conidx++; // --- constraint 2 body ---> 2*x_1 + x_3 - iJacS[nnzit] = conidx; + iJacS[nnzit] = conidx; jJacS[nnzit++] = 0; - iJacS[nnzit] = conidx; + iJacS[nnzit] = conidx; jJacS[nnzit++] = 2; conidx++; // --- constraint 3 body ---> 2*x_1 + 0.5*x_i, for i>=4 - for (auto i = 3; i < n; i++) { - iJacS[nnzit] = conidx; + for(auto i = 3; i < n; i++) { + iJacS[nnzit] = conidx; jJacS[nnzit++] = 0; - iJacS[nnzit] = conidx; + iJacS[nnzit] = conidx; jJacS[nnzit++] = i; conidx++; } @@ -216,7 +216,7 @@ bool SparseEx1::eval_Jac_cons(const size_type& n, // values for sparse Jacobian if requested by the solver nnzit = 0; - if (MJacS != NULL) { + if(MJacS != NULL) { // --- constraint 1 body ---> 4*x_1 + 2*x_2 == 10 MJacS[nnzit++] = scal * 4; MJacS[nnzit++] = scal * 2; @@ -226,7 +226,7 @@ bool SparseEx1::eval_Jac_cons(const size_type& n, MJacS[nnzit++] = scal * 1; // --- constraint 3 body ---> 2*x_1 + 0.5*x_4 - for (auto i = 3; i < n; i++) { + for(auto i = 3; i < n; i++) { MJacS[nnzit++] = scal * 2; MJacS[nnzit++] = scal * 0.5; } @@ -237,28 +237,28 @@ bool SparseEx1::eval_Jac_cons(const size_type& n, bool SparseEx1::eval_Hess_Lagr(const size_type& n, const size_type& m, - const double* x, - bool new_x, - const double& obj_factor, - const double* lambda, - bool new_lambda, + const double* x, + bool new_x, + const double& obj_factor, + const double* lambda, + bool new_lambda, const size_type& nnzHSS, - index_type* iHSS, - index_type* jHSS, - double* MHSS) + index_type* iHSS, + index_type* jHSS, + double* MHSS) { // Note: lambda is not used since all the constraints are linear and, therefore, do // not contribute to the Hessian of the Lagrangian assert(nnzHSS == n); - if (iHSS != NULL && jHSS != NULL) { - for (int i = 0; i < n; i++) { + if(iHSS != NULL && jHSS != NULL) { + for(int i = 0; i < n; i++) { iHSS[i] = jHSS[i] = i; } } - if (MHSS != NULL) { - for (int i = 0; i < n; i++) { + if(MHSS != NULL) { + for(int i = 0; i < n; i++) { MHSS[i] = scal * obj_factor * 3 * pow(x[i] - 1., 2); } } @@ -268,7 +268,7 @@ bool SparseEx1::eval_Hess_Lagr(const size_type& n, bool SparseEx1::get_starting_point(const size_type& n, double* x0) { assert(n == n_vars); - for (auto i = 0; i < n; i++) { + for(auto i = 0; i < n; i++) { x0[i] = 0.0; } return true; diff --git a/src/Drivers/Sparse/NlpSparseEx1.hpp b/src/Drivers/Sparse/NlpSparseEx1.hpp index 2877b12f7..ccf3ec9fb 100644 --- a/src/Drivers/Sparse/NlpSparseEx1.hpp +++ b/src/Drivers/Sparse/NlpSparseEx1.hpp @@ -5,7 +5,7 @@ #include -using size_type = hiop::size_type; +using size_type = hiop::size_type; using index_type = hiop::index_type; /* Test with bounds and constraints of all types. For some reason this @@ -36,45 +36,45 @@ class SparseEx1 : public hiop::hiopInterfaceSparse size_type& nnz_sparse_Hess_Lagr); virtual bool eval_f(const size_type& n, const double* x, bool new_x, double& obj_value); - virtual bool eval_cons(const size_type& n, - const size_type& m, - const size_type& num_cons, + virtual bool eval_cons(const size_type& n, + const size_type& m, + const size_type& num_cons, const index_type* idx_cons, - const double* x, - bool new_x, - double* cons); + const double* x, + bool new_x, + double* cons); virtual bool eval_cons(const size_type& n, const size_type& m, const double* x, bool new_x, double* cons); virtual bool eval_grad_f(const size_type& n, const double* x, bool new_x, double* gradf); - virtual bool eval_Jac_cons(const size_type& n, - const size_type& m, - const size_type& num_cons, + virtual bool eval_Jac_cons(const size_type& n, + const size_type& m, + const size_type& num_cons, const index_type* idx_cons, - const double* x, - bool new_x, - const size_type& nnzJacS, - index_type* iJacS, - index_type* jJacS, - double* MJacS); + const double* x, + bool new_x, + const size_type& nnzJacS, + index_type* iJacS, + index_type* jJacS, + double* MJacS); virtual bool eval_Jac_cons(const size_type& n, const size_type& m, - const double* x, - bool new_x, + const double* x, + bool new_x, const size_type& nnzJacS, - index_type* iJacS, - index_type* jJacS, - double* MJacS); + index_type* iJacS, + index_type* jJacS, + double* MJacS); virtual bool get_starting_point(const size_type& n, double* x0); virtual bool eval_Hess_Lagr(const size_type& n, const size_type& m, - const double* x, - bool new_x, - const double& obj_factor, - const double* lambda, - bool new_lambda, + const double* x, + bool new_x, + const double& obj_factor, + const double* lambda, + bool new_lambda, const size_type& nnzHSS, - index_type* iHSS, - index_type* jHSS, - double* MHSS); + index_type* iHSS, + index_type* jHSS, + double* MHSS); // not implemented virtual bool @@ -91,6 +91,6 @@ class SparseEx1 : public hiop::hiopInterfaceSparse private: size_type n_vars, n_cons; - double scal; + double scal; }; #endif diff --git a/src/Optimization/HessianDiagPlusRowRank.cpp b/src/Optimization/HessianDiagPlusRowRank.cpp index 0d79aa426..9446f6dea 100644 --- a/src/Optimization/HessianDiagPlusRowRank.cpp +++ b/src/Optimization/HessianDiagPlusRowRank.cpp @@ -74,7 +74,7 @@ using namespace std; #define SIGMA_STRATEGY2 2 #define SIGMA_STRATEGY3 3 #define SIGMA_STRATEGY4 4 -#define SIGMA_CONSTANT 5 +#define SIGMA_CONSTANT 5 namespace hiop { @@ -87,72 +87,67 @@ HessianDiagPlusRowRank::HessianDiagPlusRowRank(hiopNlpDenseConstraints* nlp_in, nlp_(nlp_in), matrix_changed_(false) { - DhInv_ = nlp_->alloc_primal_vec(); - St_ = nlp_->alloc_multivector_primal(0, l_max_); - Yt_ = St_->alloc_clone(); // faster than nlp_->alloc_multivector_primal(...); + DhInv_ = nlp_->alloc_primal_vec(); + St_ = nlp_->alloc_multivector_primal(0, l_max_); + Yt_ = St_->alloc_clone(); // faster than nlp_->alloc_multivector_primal(...); // these are local - L_ = LinearAlgebraFactory::create_matrix_dense("DEFAULT", 0, 0); - D_ = LinearAlgebraFactory::create_vector("DEFAULT", 0); - V_ = LinearAlgebraFactory::create_matrix_dense("DEFAULT", 0, 0); + L_ = LinearAlgebraFactory::create_matrix_dense("DEFAULT", 0, 0); + D_ = LinearAlgebraFactory::create_vector("DEFAULT", 0); + V_ = LinearAlgebraFactory::create_matrix_dense("DEFAULT", 0, 0); // the previous iteration - it_prev_ = new hiopIterate(nlp_); + it_prev_ = new hiopIterate(nlp_); grad_f_prev_ = nlp_->alloc_primal_vec(); - Jac_c_prev_ = nlp_->alloc_Jac_c(); - Jac_d_prev_ = nlp_->alloc_Jac_d(); + Jac_c_prev_ = nlp_->alloc_Jac_c(); + Jac_d_prev_ = nlp_->alloc_Jac_d(); // internal buffers for memory pool (none of them should be in n) #ifdef HIOP_USE_MPI - buff_kxk_ = new double[nlp_->m() * nlp_->m()]; - buff_2lxk_ = new double[nlp_->m() * 2 * l_max_]; + buff_kxk_ = new double[nlp_->m() * nlp_->m()]; + buff_2lxk_ = new double[nlp_->m() * 2 * l_max_]; buff1_lxlx3_ = new double[3 * l_max_ * l_max_]; buff2_lxlx3_ = new double[3 * l_max_ * l_max_]; #else // not needed in non-MPI mode - buff_kxk_ = nullptr; - buff_2lxk_ = nullptr; + buff_kxk_ = nullptr; + buff_2lxk_ = nullptr; buff1_lxlx3_ = nullptr; buff2_lxlx3_ = nullptr; #endif // auxiliary objects/buffers - S1_ = nullptr; - Y1_ = nullptr; - lxl_mat1_ = nullptr; - kxl_mat1_ = nullptr; - kx2l_mat1_ = nullptr; - l_vec1_ = nullptr; - l_vec2_ = nullptr; - twol_vec1_ = nullptr; - n_vec1_ = DhInv_->alloc_clone(); - n_vec2_ = DhInv_->alloc_clone(); - - V_work_vec_ = LinearAlgebraFactory::create_vector("DEFAULT", 0); - V_ipiv_vec_ = nullptr; - V_ipiv_size_ = -1; - - sigma0_ = nlp_->options->GetNumeric("sigma0"); - sigma_ = sigma0_; + S1_ = nullptr; + Y1_ = nullptr; + lxl_mat1_ = nullptr; + kxl_mat1_ = nullptr; + kx2l_mat1_ = nullptr; + l_vec1_ = nullptr; + l_vec2_ = nullptr; + twol_vec1_ = nullptr; + n_vec1_ = DhInv_->alloc_clone(); + n_vec2_ = DhInv_->alloc_clone(); + + V_work_vec_ = LinearAlgebraFactory::create_vector("DEFAULT", 0); + V_ipiv_vec_ = nullptr; + V_ipiv_size_ = -1; + + sigma0_ = nlp_->options->GetNumeric("sigma0"); + sigma_ = sigma0_; string sigma_strategy = nlp_->options->GetString("sigma_update_strategy"); transform(sigma_strategy.begin(), sigma_strategy.end(), sigma_strategy.begin(), ::tolower); sigma_update_strategy_ = SIGMA_STRATEGY3; - if (sigma_strategy == "sty") { + if(sigma_strategy == "sty") { sigma_update_strategy_ = SIGMA_STRATEGY1; - } - else if (sigma_strategy == "sty_inv") { + } else if(sigma_strategy == "sty_inv") { sigma_update_strategy_ = SIGMA_STRATEGY2; - } - else if (sigma_strategy == "snrm_ynrm") { + } else if(sigma_strategy == "snrm_ynrm") { sigma_update_strategy_ = SIGMA_STRATEGY3; - } - else if (sigma_strategy == "sty_srnm_ynrm") { + } else if(sigma_strategy == "sty_srnm_ynrm") { sigma_update_strategy_ = SIGMA_STRATEGY4; - } - else if (sigma_strategy == "sigma0") { + } else if(sigma_strategy == "sigma0") { sigma_update_strategy_ = SIGMA_CONSTANT; - } - else { + } else { assert(false && "sigma_update_strategy option not recognized"); } @@ -213,11 +208,11 @@ HessianDiagPlusRowRank::~HessianDiagPlusRowRank() delete[] V_ipiv_vec_; delete V_work_vec_; - for (auto* it : a) { + for(auto* it: a) { delete it; } - for (auto* it : b) { + for(auto* it: b) { delete it; } } @@ -225,7 +220,7 @@ HessianDiagPlusRowRank::~HessianDiagPlusRowRank() void HessianDiagPlusRowRank::alloc_for_limited_mem(const size_type& mem_length) { // note: St_ and Yt_ always have l_curr_ rows - if (l_curr_ == mem_length) { + if(l_curr_ == mem_length) { assert(D_->get_size() == l_curr_); return; } @@ -237,8 +232,8 @@ void HessianDiagPlusRowRank::alloc_for_limited_mem(const size_type& mem_length) Yt_ = St_->alloc_clone(); // these are local - L_ = LinearAlgebraFactory::create_matrix_dense("DEFAULT", mem_length, mem_length); - D_ = LinearAlgebraFactory::create_vector("DEFAULT", mem_length); + L_ = LinearAlgebraFactory::create_matrix_dense("DEFAULT", mem_length, mem_length); + D_ = LinearAlgebraFactory::create_vector("DEFAULT", mem_length); } bool HessianDiagPlusRowRank::update_logbar_diag(const hiopVector& Dx) @@ -285,9 +280,9 @@ void HessianDiagPlusRowRank::print(FILE* f, hiopOutVerbosity v, const char* msg) #include bool HessianDiagPlusRowRank::update(const hiopIterate& it_curr, - const hiopVector& grad_f_curr, - const hiopMatrix& Jac_c_curr_in, - const hiopMatrix& Jac_d_curr_in) + const hiopVector& grad_f_curr, + const hiopMatrix& Jac_c_curr_in, + const hiopMatrix& Jac_d_curr_in) { nlp_->runStats.tmSolverInternal.start(); @@ -301,14 +296,14 @@ bool HessianDiagPlusRowRank::update(const hiopIterate& it_curr, assert(it_curr.sxu->matchesPattern(nlp_->get_ixu())); #endif // on first call l_curr_=-1 - if (l_curr_ >= 0) { - size_type n = grad_f_curr.get_size(); + if(l_curr_ >= 0) { + size_type n = grad_f_curr.get_size(); // compute s_new = x_curr-x_prev hiopVector& s_new = new_n_vec1(n); s_new.copyFrom(*it_curr.x); s_new.axpy(-1., *it_prev_->x); double s_infnorm = s_new.infnorm(); - if (s_infnorm >= 100 * std::numeric_limits::epsilon()) { // norm of s not too small + if(s_infnorm >= 100 * std::numeric_limits::epsilon()) { // norm of s not too small // compute y_new = \grad J(x_curr,\lambda_curr) - \grad J(x_prev, \lambda_curr) (yes, J(x_prev, \lambda_curr)) // = graf_f_curr-grad_f_prev + (Jac_c_curr-Jac_c_prev)yc_curr+ (Jac_d_curr-Jac_c_prev)yd_curr - @@ -334,22 +329,21 @@ bool HessianDiagPlusRowRank::update(const hiopIterate& it_curr, nlp_->log->write("HessianDiagPlusRowRank s_new", s_new, hovIteration); nlp_->log->write("HessianDiagPlusRowRank y_new", y_new, hovIteration); #endif - if (sTy > s_nrm2 * y_nrm2 * sqrt(std::numeric_limits::epsilon())) { // sTy far away from zero + if(sTy > s_nrm2 * y_nrm2 * sqrt(std::numeric_limits::epsilon())) { // sTy far away from zero - if (l_max_ > 0) { + if(l_max_ > 0) { // compute the new row in L, update S and Y (either augment them or shift cols and add s_new and y_new) hiopVector& YTs = new_l_vec1(l_curr_); Yt_->timesVec(0.0, YTs, 1.0, s_new); // update representation - if (l_curr_ < l_max_) { + if(l_curr_ < l_max_) { // just grow/augment the matrices St_->appendRow(s_new); Yt_->appendRow(y_new); growL(l_curr_, l_max_, YTs); growD(l_curr_, l_max_, sTy); l_curr_++; - } - else { + } else { // shift St_->shiftRows(-1); Yt_->shiftRows(-1); @@ -367,7 +361,7 @@ bool HessianDiagPlusRowRank::update(const hiopIterate& it_curr, nlp_->log->printf(hovMatrices, "\n"); #endif // update B0 (i.e., sigma) - switch (sigma_update_strategy_) { + switch(sigma_update_strategy_) { case SIGMA_STRATEGY1: sigma_ = sTy / (s_nrm2 * s_nrm2); break; @@ -390,14 +384,12 @@ bool HessianDiagPlusRowRank::update(const hiopIterate& it_curr, // safe guard it sigma_ = fmax(fmin(sigma_safe_max_, sigma_), sigma_safe_min_); nlp_->log->printf(hovLinAlgScalars, "HessianDiagPlusRowRank: sigma was updated to %22.16e\n", sigma_); - } - else { // sTy is too small or negative -> skip + } else { // sTy is too small or negative -> skip nlp_->log->printf(hovLinAlgScalars, "HessianDiagPlusRowRank: s^T*y=%12.6e not positive enough... skipping the Hessian update\n", sTy); } - } - else { // norm of s_new is too small -> skip + } else { // norm of s_new is too small -> skip nlp_->log->printf(hovLinAlgScalars, "HessianDiagPlusRowRank: ||s_new||=%12.6e too small... skipping the Hessian update\n", s_infnorm); @@ -408,8 +400,7 @@ bool HessianDiagPlusRowRank::update(const hiopIterate& it_curr, Jac_c_prev_->copyFrom(Jac_c_curr); Jac_d_prev_->copyFrom(Jac_d_curr); nlp_->log->printf(hovLinAlgScalarsVerb, "HessianDiagPlusRowRank: storing the iteration info as 'previous'\n", s_infnorm); - } - else { + } else { // this is the first optimization iterate, just save the iterate and exit it_prev_->copyFrom(it_curr); grad_f_prev_->copyFrom(grad_f_curr); @@ -440,15 +431,15 @@ void HessianDiagPlusRowRank::updateInternalBFGSRepresentation() size_type l = St_->m(); // grow L,D, andV if needed - if (L_->m() != l) { + if(L_->m() != l) { delete L_; L_ = LinearAlgebraFactory::create_matrix_dense("DEFAULT", l, l); } - if (D_->get_size() != l) { + if(D_->get_size() != l) { delete D_; D_ = LinearAlgebraFactory::create_vector("DEFAULT", l); } - if (V_->m() != 2 * l) { + if(V_->m() != 2 * l) { delete V_; V_ = LinearAlgebraFactory::create_matrix_dense("DEFAULT", 2 * l, 2 * l); } @@ -466,7 +457,7 @@ void HessianDiagPlusRowRank::updateInternalBFGSRepresentation() //-- block (1,2) hiopMatrixDense& StB0DhInvYmL = DpYtDhInvY; // just a rename - hiopVector& B0DhInv = new_n_vec1(n); + hiopVector& B0DhInv = new_n_vec1(n); B0DhInv.copyFrom(*DhInv_); B0DhInv.scale(sigma_); mat_times_diag_times_mattrans_local(StB0DhInvYmL, *St_, B0DhInv, *Yt_); @@ -532,7 +523,7 @@ void HessianDiagPlusRowRank::updateInternalBFGSRepresentation() */ void HessianDiagPlusRowRank::solve(const hiopVector& rhsx, hiopVector& x) { - if (matrix_changed_) { + if(matrix_changed_) { updateInternalBFGSRepresentation(); } @@ -588,12 +579,12 @@ void HessianDiagPlusRowRank::solve(const hiopVector& rhsx, hiopVector& x) * W is kxk, S,Y are nxl, DhInv,B0 are n, V is 2lx2l * X is kxn */ -void HessianDiagPlusRowRank::sym_mat_times_inverse_times_mattrans(double beta, - hiopMatrixDense& W, - double alpha, +void HessianDiagPlusRowRank::sym_mat_times_inverse_times_mattrans(double beta, + hiopMatrixDense& W, + double alpha, const hiopMatrixDense& X) { - if (matrix_changed_) { + if(matrix_changed_) { updateInternalBFGSRepresentation(); } @@ -608,10 +599,9 @@ void HessianDiagPlusRowRank::sym_mat_times_inverse_times_mattrans(double // 1. compute W=beta*W + alpha*X*DhInv*X' #ifdef HIOP_USE_MPI - if (0 == nlp_->get_rank()) { + if(0 == nlp_->get_rank()) { sym_mat_times_diag_times_mattrans_local(beta, W, alpha, X, *DhInv_); - } - else { + } else { sym_mat_times_diag_times_mattrans_local(0.0, W, alpha, X, *DhInv_); } // W will be MPI_All_reduced later @@ -619,8 +609,8 @@ void HessianDiagPlusRowRank::sym_mat_times_inverse_times_mattrans(double sym_mat_times_diag_times_mattrans_local(beta, W, alpha, X, *DhInv_); #endif // 2. compute S1=X*DhInv*B0*S and Y1=X*DhInv*Y - auto& S1 = new_S1(X, *St_); - auto& Y1 = new_Y1(X, *Yt_); // both are kxl + auto& S1 = new_S1(X, *St_); + auto& Y1 = new_Y1(X, *Yt_); // both are kxl hiopVector& B0DhInv = new_n_vec1(n); B0DhInv.copyFrom(*DhInv_); B0DhInv.scale(sigma_); @@ -655,8 +645,8 @@ void HessianDiagPlusRowRank::sym_mat_times_inverse_times_mattrans(double // 5. W = W-alpha*[S1 Y1]*[S2^T] // [Y2^T] - S2Y2 = RHS_fortran; - alpha = 0 - alpha; + S2Y2 = RHS_fortran; + alpha = 0 - alpha; hiopMatrixDense& S2 = new_kxl_mat1(k, l); S2.copyFromMatrixBlock(S2Y2, 0, 0); S1.timesMatTrans_local(1.0, W, alpha, S2); @@ -675,10 +665,10 @@ void HessianDiagPlusRowRank::sym_mat_times_inverse_times_mattrans(double void HessianDiagPlusRowRank::factorizeV() { - int N = V_->n(); + int N = V_->n(); int lda = N; int info; - if (N == 0) { + if(N == 0) { return; } @@ -688,39 +678,36 @@ void HessianDiagPlusRowRank::factorizeV() char uplo = 'L'; // V is upper in C++ so it's lower in fortran - if (V_ipiv_vec_ == nullptr) { + if(V_ipiv_vec_ == nullptr) { V_ipiv_vec_ = new int[N]; - } - else { - if (V_ipiv_size_ != N) { + } else { + if(V_ipiv_size_ != N) { delete[] V_ipiv_vec_; - V_ipiv_vec_ = new int[N]; + V_ipiv_vec_ = new int[N]; V_ipiv_size_ = N; } } - int lwork = -1; // inquire sizes + int lwork = -1; // inquire sizes double Vwork_tmp; DSYTRF(&uplo, &N, V_->local_data(), &lda, V_ipiv_vec_, &Vwork_tmp, &lwork, &info); assert(info == 0); lwork = (int)Vwork_tmp; - if (lwork != V_work_vec_->get_size()) { - if (V_work_vec_ != nullptr) { + if(lwork != V_work_vec_->get_size()) { + if(V_work_vec_ != nullptr) { delete V_work_vec_; } V_work_vec_ = LinearAlgebraFactory::create_vector("DEFAULT", lwork); - } - else { + } else { assert(V_work_vec_); } DSYTRF(&uplo, &N, V_->local_data(), &lda, V_ipiv_vec_, V_work_vec_->local_data(), &lwork, &info); - if (info < 0) { + if(info < 0) { nlp_->log->printf(hovError, "HessianDiagPlusRowRank::factorizeV error: %d arg to dsytrf has an illegal value\n", -info); - } - else if (info > 0) { + } else if(info > 0) { nlp_->log->printf(hovError, "HessianDiagPlusRowRank::factorizeV error: %d entry in the factorization's diagonal is exactly zero. " "Division by zero will occur if a solve is attempted.\n", @@ -735,7 +722,7 @@ void HessianDiagPlusRowRank::factorizeV() void HessianDiagPlusRowRank::solve_with_V(hiopVector& rhs_s, hiopVector& rhs_y) { int N = V_->n(); - if (N == 0) { + if(N == 0) { return; } @@ -749,7 +736,7 @@ void HessianDiagPlusRowRank::solve_with_V(hiopVector& rhs_s, hiopVector& rhs_y) rhs_saved->copyFromStarting(l, rhs_y); #endif - int lda = N, one = 1, info; + int lda = N, one = 1, info; char uplo = 'L'; #ifdef HIOP_DEEPCHECKS assert(N == rhs_s.get_size() + rhs_y.get_size()); @@ -760,7 +747,7 @@ void HessianDiagPlusRowRank::solve_with_V(hiopVector& rhs_s, hiopVector& rhs_y) DSYTRS(&uplo, &N, &one, V_->local_data(), &lda, V_ipiv_vec_, rhs.local_data(), &N, &info); - if (info < 0) { + if(info < 0) { nlp_->log->printf(hovError, "HessianDiagPlusRowRank::solve_with_V error: %d arg to dsytrf has an illegal value\n", -info); @@ -782,7 +769,7 @@ void HessianDiagPlusRowRank::solve_with_V(hiopVector& rhs_s, hiopVector& rhs_y) // nlp_->log->printf(hovLinAlgScalars, "HessianDiagPlusRowRank::solve_with_V 1rhs: rel resid norm=%g\n", // nrmres/(1+nrmrhs)); nlp_->log->printf(hovScalars, "HessianDiagPlusRowRank::solve_with_V 1rhs: rel resid norm=%g\n", nrmres / (1 + nrmrhs)); - if (nrmres > 1e-8) { + if(nrmres > 1e-8) { nlp_->log->printf(hovWarning, "HessianDiagPlusRowRank::solve_with_V large residual=%g\n", nrmres); } delete rhs_saved; @@ -792,7 +779,7 @@ void HessianDiagPlusRowRank::solve_with_V(hiopVector& rhs_s, hiopVector& rhs_y) void HessianDiagPlusRowRank::solve_with_V(hiopMatrixDense& rhs) { int N = V_->n(); - if (0 == N) { + if(0 == N) { return; } @@ -804,13 +791,13 @@ void HessianDiagPlusRowRank::solve_with_V(hiopMatrixDense& rhs) // rhs is transpose in C++ char uplo = 'L'; - int lda = N, ldb = N, nrhs = rhs.m(), info; + int lda = N, ldb = N, nrhs = rhs.m(), info; #ifdef HIOP_DEEPCHECKS assert(N == rhs.n()); #endif DSYTRS(&uplo, &N, &nrhs, V_->local_data(), &lda, V_ipiv_vec_, rhs.local_data(), &ldb, &info); - if (info < 0) { + if(info < 0) { nlp_->log->printf(hovError, "HessianDiagPlusRowRank::solve_with_V error: %d arg to dsytrf has an illegal value\n", -info); @@ -821,23 +808,23 @@ void HessianDiagPlusRowRank::solve_with_V(hiopMatrixDense& rhs) hiopMatrixDense& sol = rhs; // matrix of solutions /// TODO: get rid of these uses of specific hiopVector implementation - hiopVector* x = LinearAlgebraFactory::create_vector("DEFAULT", rhs.n()); // again, keep in mind rhs is transposed - hiopVector* r = LinearAlgebraFactory::create_vector("DEFAULT", rhs.n()); + hiopVector* x = LinearAlgebraFactory::create_vector("DEFAULT", rhs.n()); // again, keep in mind rhs is transposed + hiopVector* r = LinearAlgebraFactory::create_vector("DEFAULT", rhs.n()); - double resnorm = 0.0; - for (int k = 0; k < rhs.m(); k++) { + double resnorm = 0.0; + for(int k = 0; k < rhs.m(); k++) { rhs_saved->getRow(k, *r); sol.getRow(k, *x); double nrmrhs = r->infnorm(); // nrmrhs=.0; Vmat_->timesVec(1.0, *r, -1.0, *x); double nrmres = r->infnorm(); - if (nrmres > 1e-8) { + if(nrmres > 1e-8) { nlp_->log->printf(hovWarning, "HessianDiagPlusRowRank::solve_with_V mult-rhs: rhs number %d has large resid norm=%g\n", k, nrmres); } - if (nrmres / (nrmrhs + 1) > resnorm) { + if(nrmres / (nrmrhs + 1) > resnorm) { resnorm = nrmres / (nrmrhs + 1); } } @@ -863,16 +850,16 @@ void HessianDiagPlusRowRank::growL(const int& lmem_curr, const int& lmem_max, co // copy from L to newL newL->copyBlockFromMatrix(0, 0, *L_); - double* newL_mat = newL->local_data(); // doing the rest here - const double* YTs_vec = YTs.local_data_const(); + double* newL_mat = newL->local_data(); // doing the rest here + const double* YTs_vec = YTs.local_data_const(); // for(int j=0; j= l); - hiopVector* Dnew = LinearAlgebraFactory::create_vector("DEFAULT", l + 1); - double* Dnew_vec = Dnew->local_data(); + hiopVector* Dnew = LinearAlgebraFactory::create_vector("DEFAULT", l + 1); + double* Dnew_vec = Dnew->local_data(); memcpy(Dnew_vec, D_->local_data_const(), l * sizeof(double)); Dnew_vec[l] = sTy; @@ -908,11 +895,11 @@ void HessianDiagPlusRowRank::updateL(const hiopVector& YTs, const double& sTy) assert(l_curr_ == l); assert(l_curr_ == l_max_); #endif - const int lm1 = l - 1; - double* L_mat = L_->local_data(); + const int lm1 = l - 1; + double* L_mat = L_->local_data(); const double* yts_vec = YTs.local_data_const(); - for (int i = 1; i < lm1; i++) { - for (int j = 0; j < i; j++) { + for(int i = 1; i < lm1; i++) { + for(int j = 0; j < i; j++) { // L_mat[i][j] = L_mat[i+1][j+1]; L_mat[i * l + j] = L_mat[(i + 1) * l + j + 1]; } @@ -923,7 +910,7 @@ void HessianDiagPlusRowRank::updateL(const hiopVector& YTs, const double& sTy) // L_mat[i][lm1]=0.0; // first entry in YTs corresponds to y_to_be_discarded_since_it_is_the_oldest'* s_new and is discarded - for (int j = 0; j < lm1; j++) { + for(int j = 0; j < lm1; j++) { // L_mat[lm1][j]=yts_vec[j+1]; L_mat[lm1 * l + j] = yts_vec[j + 1]; } @@ -933,9 +920,9 @@ void HessianDiagPlusRowRank::updateL(const hiopVector& YTs, const double& sTy) } void HessianDiagPlusRowRank::updateD(const double& sTy) { - int l = D_->get_size(); + int l = D_->get_size(); double* D_vec = D_->local_data(); - for (int i = 0; i < l - 1; i++) { + for(int i = 0; i < l - 1; i++) { D_vec[i] = D_vec[i + 1]; } D_vec[l - 1] = sTy; @@ -943,10 +930,10 @@ void HessianDiagPlusRowRank::updateD(const double& sTy) hiopVector& HessianDiagPlusRowRank::new_l_vec1(int l) { - if (l_vec1_ != nullptr && l_vec1_->get_size() == l) { + if(l_vec1_ != nullptr && l_vec1_->get_size() == l) { return *l_vec1_; } - if (l_vec1_ != nullptr) { + if(l_vec1_ != nullptr) { delete l_vec1_; } l_vec1_ = LinearAlgebraFactory::create_vector("DEFAULT", l); @@ -955,10 +942,10 @@ hiopVector& HessianDiagPlusRowRank::new_l_vec1(int l) hiopVector& HessianDiagPlusRowRank::new_l_vec2(int l) { - if (l_vec2_ != nullptr && l_vec2_->get_size() == l) { + if(l_vec2_ != nullptr && l_vec2_->get_size() == l) { return *l_vec2_; } - if (l_vec2_ != nullptr) { + if(l_vec2_ != nullptr) { delete l_vec2_; } l_vec2_ = LinearAlgebraFactory::create_vector("DEFAULT", l); @@ -967,11 +954,10 @@ hiopVector& HessianDiagPlusRowRank::new_l_vec2(int l) hiopMatrixDense& HessianDiagPlusRowRank::new_lxl_mat1(int l) { - if (lxl_mat1_ != nullptr) { - if (l == lxl_mat1_->m()) { + if(lxl_mat1_ != nullptr) { + if(l == lxl_mat1_->m()) { return *lxl_mat1_; - } - else { + } else { delete lxl_mat1_; lxl_mat1_ = nullptr; } @@ -983,12 +969,11 @@ hiopMatrixDense& HessianDiagPlusRowRank::new_lxl_mat1(int l) hiopMatrixDense& HessianDiagPlusRowRank::new_kx2l_mat1(int k, int l) { const int twol = 2 * l; - if (nullptr != kx2l_mat1_) { + if(nullptr != kx2l_mat1_) { assert(kx2l_mat1_->m() == k); - if (twol == kx2l_mat1_->n()) { + if(twol == kx2l_mat1_->n()) { return *kx2l_mat1_; - } - else { + } else { delete kx2l_mat1_; kx2l_mat1_ = nullptr; } @@ -999,12 +984,11 @@ hiopMatrixDense& HessianDiagPlusRowRank::new_kx2l_mat1(int k, int l) hiopMatrixDense& HessianDiagPlusRowRank::new_kxl_mat1(int k, int l) { - if (kxl_mat1_ != nullptr) { + if(kxl_mat1_ != nullptr) { assert(kxl_mat1_->m() == k); - if (l == kxl_mat1_->n()) { + if(l == kxl_mat1_->n()) { return *kxl_mat1_; - } - else { + } else { delete kxl_mat1_; kxl_mat1_ = nullptr; } @@ -1020,15 +1004,15 @@ hiopMatrixDense& HessianDiagPlusRowRank::new_S1(const hiopMatrixDense& X, const size_type l = St.m(); #ifdef HIOP_DEEPCHECKS assert(St.n() == X.n()); - if (S1_ != nullptr) { + if(S1_ != nullptr) { assert(S1_->m() == k); } #endif - if (nullptr != S1_ && S1_->n() != l) { + if(nullptr != S1_ && S1_->n() != l) { delete S1_; S1_ = nullptr; } - if (nullptr == S1_) { + if(nullptr == S1_) { S1_ = LinearAlgebraFactory::create_matrix_dense("DEFAULT", k, l); } return *S1_; @@ -1041,15 +1025,15 @@ hiopMatrixDense& HessianDiagPlusRowRank::new_Y1(const hiopMatrixDense& X, const size_type l = Yt.m(); #ifdef HIOP_DEEPCHECKS assert(X.n() == Yt.n()); - if (Y1_ != nullptr) { + if(Y1_ != nullptr) { assert(Y1_->m() == k); } #endif - if (nullptr != Y1_ && Y1_->n() != l) { + if(nullptr != Y1_ && Y1_->n() != l) { delete Y1_; Y1_ = nullptr; } - if (nullptr == Y1_) { + if(nullptr == Y1_) { Y1_ = LinearAlgebraFactory::create_matrix_dense("DEFAULT", k, l); } return *Y1_; @@ -1063,11 +1047,11 @@ void HessianDiagPlusRowRank::times_vec_no_logbar_term(double beta, hiopVector& y #endif // HIOP_DEEPCHECKS -void HessianDiagPlusRowRank::times_vec_common(double beta, - hiopVector& y, - double alpha, +void HessianDiagPlusRowRank::times_vec_common(double beta, + hiopVector& y, + double alpha, const hiopVector& x, - bool addLogTerm) const + bool addLogTerm) const { size_type n = St_->n(); assert(l_curr_ == St_->m()); @@ -1078,13 +1062,13 @@ void HessianDiagPlusRowRank::times_vec_common(double beta, // B0 is sigma*I. There is an additional diagonal log-barrier term Dx_ bool print = false; - if (print) { + if(print) { nlp_->log->printf(hovMatrices, "---HessianDiagPlusRowRank::times_vec \n"); nlp_->log->write("S=", *St_, hovMatrices); nlp_->log->write("Y=", *Yt_, hovMatrices); nlp_->log->write("DhInv=", *DhInv_, hovMatrices); nlp_->log->printf(hovMatrices, "sigma=%22.16e; addLogTerm=%d;\n", sigma_, addLogTerm); - if (addLogTerm) { + if(addLogTerm) { nlp_->log->write("Dx=", *Dx_, hovMatrices); } nlp_->log->printf(hovMatrices, "y=beta*y + alpha*this*x : beta=%g alpha=%g\n", beta, alpha); @@ -1097,13 +1081,13 @@ void HessianDiagPlusRowRank::times_vec_common(double beta, a.resize(l_curr_, nullptr); b.resize(l_curr_, nullptr); int n_local = Yt_->get_local_size_n(); - for (int k = 0; k < l_curr_; k++) { + for(int k = 0; k < l_curr_; k++) { // bk=yk/sqrt(yk'*sk) yk->copyFrom(Yt_->local_data() + k * n_local); sk->copyFrom(St_->local_data() + k * n_local); double skTyk = yk->dotProductWith(*sk); - if (skTyk < std::numeric_limits::epsilon()) { + if(skTyk < std::numeric_limits::epsilon()) { nlp_->log->printf(hovLinAlgScalars, "HessianDiagPlusRowRank: ||s_k^T*y_k||=%12.6e too small and was set it to mach eps = %12.6e \n", skTyk, @@ -1111,7 +1095,7 @@ void HessianDiagPlusRowRank::times_vec_common(double beta, skTyk = std::numeric_limits::epsilon(); } - if (a[k] == nullptr && b[k] == nullptr) { + if(a[k] == nullptr && b[k] == nullptr) { b[k] = nlp_->alloc_primal_vec(); a[k] = nlp_->alloc_primal_vec(); } @@ -1123,7 +1107,7 @@ void HessianDiagPlusRowRank::times_vec_common(double beta, a[k]->copyFrom(*sk); a[k]->scale(sigma_); - for (int i = 0; i < k; i++) { + for(int i = 0; i < k; i++) { double biTsk = b[i]->dotProductWith(*sk); a[k]->axpy(+biTsk, *b[i]); double aiTsk = a[i]->dotProductWith(*sk); @@ -1137,13 +1121,13 @@ void HessianDiagPlusRowRank::times_vec_common(double beta, // compute the product with x // y = beta*y+alpha*(B0+Dx)*x + alpha* sum { bk'x bk - ak'x ak : k=0,1,...,l_curr-1} y.scale(beta); - if (addLogTerm) { + if(addLogTerm) { y.axzpy(alpha, x, *Dx_); } y.axpy(alpha * sigma_, x); - for (int k = 0; k < l_curr_; k++) { + for(int k = 0; k < l_curr_; k++) { double bkTx = b[k]->dotProductWith(x); double akTx = a[k]->dotProductWith(x); @@ -1151,7 +1135,7 @@ void HessianDiagPlusRowRank::times_vec_common(double beta, y.axpy(-alpha * akTx, *a[k]); } - if (print) { + if(print) { nlp_->log->write("y_out=", y, hovMatrices); } } @@ -1174,13 +1158,13 @@ void HessianDiagPlusRowRank::timesVec(double beta, hiopVector& y, double alpha, * W is kxk local, X is kxn distributed and Diag is n, distributed * The ops are perform locally. The reduce is done separately/externally to decrease comm */ -void HessianDiagPlusRowRank::sym_mat_times_diag_times_mattrans_local(double beta, - hiopMatrixDense& W, - double alpha, +void HessianDiagPlusRowRank::sym_mat_times_diag_times_mattrans_local(double beta, + hiopMatrixDense& W, + double alpha, const hiopMatrixDense& X, - const hiopVector& d) + const hiopVector& d) { - size_type k = W.m(); + size_type k = W.m(); size_type n_local = X.get_local_size_n(); assert(X.m() == k); @@ -1193,19 +1177,19 @@ void HessianDiagPlusRowRank::sym_mat_times_diag_times_mattrans_local(double // #define chunk 512; //!opt const double *xi, *xj; - double acc; - double* Wdata = W.local_data(); + double acc; + double* Wdata = W.local_data(); const double* Xdata = X.local_data_const(); - const double* dd = d.local_data_const(); - for (int i = 0; i < k; i++) { + const double* dd = d.local_data_const(); + for(int i = 0; i < k; i++) { // xi=Xdata[i]; xi = Xdata + i * n_local; - for (int j = i; j < k; j++) { + for(int j = i; j < k; j++) { // xj=Xdata[j]; - xj = Xdata + j * n_local; + xj = Xdata + j * n_local; // compute W[i,j] = sum {X[i,p]*d[p]*X[j,p] : p=1,...,n_local} acc = 0.0; - for (size_type p = 0; p < n_local; p++) { + for(size_type p = 0; p < n_local; p++) { acc += xi[p] * dd[p] * xj[p]; } @@ -1216,9 +1200,9 @@ void HessianDiagPlusRowRank::sym_mat_times_diag_times_mattrans_local(double } /* W=S*D*X^T, where S is lxn, D is diag nxn, and X is kxn */ -void HessianDiagPlusRowRank::mat_times_diag_times_mattrans_local(hiopMatrixDense& W, +void HessianDiagPlusRowRank::mat_times_diag_times_mattrans_local(hiopMatrixDense& W, const hiopMatrixDense& S, - const hiopVector& d, + const hiopVector& d, const hiopMatrixDense& X) { #ifdef HIOP_DEEPCHECKS @@ -1229,24 +1213,24 @@ void HessianDiagPlusRowRank::mat_times_diag_times_mattrans_local(hiopMatrixDense assert(X.get_local_size_n() == d.get_local_size()); const double* Sdi; - double* Wdi; + double* Wdi; const double* Xdj; - double acc; - double* Wd = W.local_data(); - const double* Sd = S.local_data_const(); - const double* Xd = X.local_data_const(); + double acc; + double* Wd = W.local_data(); + const double* Sd = S.local_data_const(); + const double* Xd = X.local_data_const(); const double* diag = d.local_data_const(); //! opt - for (int i = 0; i < l; i++) { + for(int i = 0; i < l; i++) { // Sdi=Sd[i]; Wdi=Wd[i]; Sdi = Sd + i * n; Wdi = Wd + i * W.get_local_size_n(); - for (int j = 0; j < k; j++) { + for(int j = 0; j < k; j++) { // Xdj=Xd[j]; Xdj = Xd + j * n; acc = 0.; - for (int p = 0; p < n; p++) { + for(int p = 0; p < n; p++) { // acc += Sdi[p]*diag[p]*Xdj[p]; acc += Sdi[p] * diag[p] * Xdj[p]; } diff --git a/src/Optimization/HessianDiagPlusRowRank.hpp b/src/Optimization/HessianDiagPlusRowRank.hpp index 72a6c8b43..92d4ed6a6 100644 --- a/src/Optimization/HessianDiagPlusRowRank.hpp +++ b/src/Optimization/HessianDiagPlusRowRank.hpp @@ -104,9 +104,9 @@ class HessianDiagPlusRowRank : public hiopMatrix /// Updates Hessian if hereditary positive definitness is maintained and returns true, otherwise false. virtual bool update(const hiopIterate& x_curr, - const hiopVector& grad_f_curr, - const hiopMatrix& Jac_c_curr, - const hiopMatrix& Jac_d_curr); + const hiopVector& grad_f_curr, + const hiopMatrix& Jac_c_curr, + const hiopMatrix& Jac_d_curr); /* updates the logBar diagonal term from the representation */ virtual bool update_logbar_diag(const hiopVector& Dx); @@ -131,41 +131,41 @@ class HessianDiagPlusRowRank : public hiopMatrix virtual void times_vec(double beta, hiopVector& y, double alpha, const hiopVector& x); /* code shared by the above two methods*/ - virtual void times_vec_common(double beta, - hiopVector& y, - double alpha, + virtual void times_vec_common(double beta, + hiopVector& y, + double alpha, const hiopVector& x, - bool add_logbar = false) const; + bool add_logbar = false) const; protected: friend class hiopAlgFilterIPMQuasiNewton; - int l_max_; // max memory size - int l_curr_; // number of pairs currently stored - double sigma_; // initial scaling factor of identity - double sigma0_; // default scaling factor of identity + int l_max_; // max memory size + int l_curr_; // number of pairs currently stored + double sigma_; // initial scaling factor of identity + double sigma0_; // default scaling factor of identity // Integer for the sigma update strategy - int sigma_update_strategy_; + int sigma_update_strategy_; // Min safety thresholds for sigma - double sigma_safe_min_; + double sigma_safe_min_; // Max safety thresholds for sigma - double sigma_safe_max_; + double sigma_safe_max_; // Pointer to the NLP formulation - hiopNlpDenseConstraints* nlp_; + hiopNlpDenseConstraints* nlp_; mutable std::vector a; mutable std::vector b; - hiopVector* yk; - hiopVector* sk; + hiopVector* yk; + hiopVector* sk; private: // Vector for (B0+Dk)^{-1} - hiopVector* DhInv_; + hiopVector* DhInv_; // Dx_ is needed in times_vec (for residual checking in solveCompressed). Can be recomputed from DhInv, but I decided to // store it instead to avoid round-off errors - hiopVector* Dx_; + hiopVector* Dx_; - bool matrix_changed_; + bool matrix_changed_; // These are matrices from the compact representation; they are updated at each iteration. // More exactly Bk=B0-[B0*St' Yt']*[St*B0*St' L]*[St*B0] @@ -177,42 +177,42 @@ class HessianDiagPlusRowRank : public hiopMatrix /// Lower triangular matrix from the compact representation hiopMatrixDense* L_; /// Diagonal matrix from the compact representation - hiopVector* D_; + hiopVector* D_; // Matrix V from the representation of the inverse hiopMatrixDense* V_; #ifdef HIOP_DEEPCHECKS // copy of the V matrix - needed to check the residual hiopMatrixDense* Vmat_; #endif - void growL(const int& lmem_curr, const int& lmem_max, const hiopVector& YTs); - void growD(const int& l_curr, const int& l_max, const double& sTy); - void updateL(const hiopVector& STy, const double& sTy); - void updateD(const double& sTy); + void growL(const int& lmem_curr, const int& lmem_max, const hiopVector& YTs); + void growD(const int& l_curr, const int& l_max, const double& sTy); + void updateL(const hiopVector& STy, const double& sTy); + void updateD(const double& sTy); // also stored are the iterate, gradient obj, and Jacobians at the previous optimization iteration - hiopIterate* it_prev_; - hiopVector* grad_f_prev_; - hiopMatrixDense* Jac_c_prev_; - hiopMatrixDense* Jac_d_prev_; + hiopIterate* it_prev_; + hiopVector* grad_f_prev_; + hiopMatrixDense* Jac_c_prev_; + hiopMatrixDense* Jac_d_prev_; // internal helpers - void updateInternalBFGSRepresentation(); + void updateInternalBFGSRepresentation(); // internals buffers, mostly for MPIAll_reduce - double* buff_kxk_; // size = num_constraints^2 - double* buff_2lxk_; // size = 2 x q-Newton mem size x num_constraints - double* buff1_lxlx3_; - double* buff2_lxlx3_; + double* buff_kxk_; // size = num_constraints^2 + double* buff_2lxk_; // size = 2 x q-Newton mem size x num_constraints + double* buff1_lxlx3_; + double* buff2_lxlx3_; // auxiliary objects preallocated and used in internally in various computation blocks /// See new_S1 - hiopMatrixDense* S1_; + hiopMatrixDense* S1_; /// See new_Y1 - hiopMatrixDense* Y1_; + hiopMatrixDense* Y1_; - hiopMatrixDense* lxl_mat1_; - hiopMatrixDense* kx2l_mat1_; - hiopMatrixDense* kxl_mat1_; + hiopMatrixDense* lxl_mat1_; + hiopMatrixDense* kx2l_mat1_; + hiopMatrixDense* kxl_mat1_; /** * (Re)Allocates S1_ of size kxl to store is X*D*S, where D is a diagonal matrix. S comes in @@ -220,7 +220,7 @@ class HessianDiagPlusRowRank : public hiopMatrix * constraints. S1_ is allocated only if not already allocated or realocated only if it does * not have the right dimesions to store X*D*S. */ - hiopMatrixDense& new_S1(const hiopMatrixDense& X, const hiopMatrixDense& St); + hiopMatrixDense& new_S1(const hiopMatrixDense& X, const hiopMatrixDense& St); /** * (Re)Allocates Y1_ of size kxl to store is X*D*Y, where D is a diagonal matrix. Y comes in @@ -228,19 +228,19 @@ class HessianDiagPlusRowRank : public hiopMatrix * constraints. Y1_ is allocated only if not already allocated or reallocated only if it does * not have the right dimesions to store X*D*Y. */ - hiopMatrixDense& new_Y1(const hiopMatrixDense& X, const hiopMatrixDense& Yt); - - hiopMatrixDense& new_lxl_mat1(int l); - hiopMatrixDense& new_kxl_mat1(int k, int l); - hiopMatrixDense& new_kx2l_mat1(int k, int l); - - hiopVector* l_vec1_; - hiopVector* l_vec2_; - hiopVector* n_vec1_; - hiopVector* n_vec2_; - hiopVector* twol_vec1_; - hiopVector& new_l_vec1(int l); - hiopVector& new_l_vec2(int l); + hiopMatrixDense& new_Y1(const hiopMatrixDense& X, const hiopMatrixDense& Yt); + + hiopMatrixDense& new_lxl_mat1(int l); + hiopMatrixDense& new_kxl_mat1(int k, int l); + hiopMatrixDense& new_kx2l_mat1(int k, int l); + + hiopVector* l_vec1_; + hiopVector* l_vec2_; + hiopVector* n_vec1_; + hiopVector* n_vec2_; + hiopVector* twol_vec1_; + hiopVector& new_l_vec1(int l); + hiopVector& new_l_vec2(int l); inline hiopVector& new_n_vec1(size_type n) { #ifdef HIOP_DEEPCHECKS @@ -259,10 +259,10 @@ class HessianDiagPlusRowRank : public hiopMatrix } inline hiopVector& new_2l_vec1(int l) { - if (twol_vec1_ != nullptr && twol_vec1_->get_size() == 2 * l) { + if(twol_vec1_ != nullptr && twol_vec1_->get_size() == 2 * l) { return *twol_vec1_; } - if (twol_vec1_ != nullptr) { + if(twol_vec1_ != nullptr) { delete twol_vec1_; } twol_vec1_ = LinearAlgebraFactory::create_vector(nlp_->options->GetString("mem_space"), 2 * l); @@ -273,27 +273,27 @@ class HessianDiagPlusRowRank : public hiopMatrix // utilities /// @brief Ensures the internal containers are ready to work with "limited memory" mem_length - void alloc_for_limited_mem(const size_type& mem_length); + void alloc_for_limited_mem(const size_type& mem_length); /* symmetric multiplication W = beta*W + alpha*X*Diag*X^T */ - static void sym_mat_times_diag_times_mattrans_local(double beta, - hiopMatrixDense& W_, - double alpha, + static void sym_mat_times_diag_times_mattrans_local(double beta, + hiopMatrixDense& W_, + double alpha, const hiopMatrixDense& X_, - const hiopVector& d); + const hiopVector& d); /* W=S*Diag*X^T */ - static void mat_times_diag_times_mattrans_local(hiopMatrixDense& W, + static void mat_times_diag_times_mattrans_local(hiopMatrixDense& W, const hiopMatrixDense& S, - const hiopVector& d, + const hiopVector& d, const hiopMatrixDense& X); /* members and utilities related to V matrix: factorization and solve */ hiopVector* V_work_vec_; - int V_ipiv_size_; - int* V_ipiv_vec_; + int V_ipiv_size_; + int* V_ipiv_vec_; - void factorizeV(); - void solve_with_V(hiopVector& rhs_s, hiopVector& rhs_y); - void solve_with_V(hiopMatrixDense& rhs); + void factorizeV(); + void solve_with_V(hiopVector& rhs_s, hiopVector& rhs_y); + void solve_with_V(hiopMatrixDense& rhs); private: HessianDiagPlusRowRank() {}; @@ -316,7 +316,7 @@ class HessianDiagPlusRowRank : public hiopMatrix virtual void setToZero() { assert(false && "not provided because it is not needed"); } virtual void setToConstant(double c) { assert(false && "not provided because it is not needed"); } - void timesVec(double beta, hiopVector& y, double alpha, const hiopVector& x) const; + void timesVec(double beta, hiopVector& y, double alpha, const hiopVector& x) const; /** y = beta * y + alpha * this^T * x */ virtual void transTimesVec(double beta, hiopVector& y, double alpha, const hiopVector& x) const @@ -351,11 +351,11 @@ class HessianDiagPlusRowRank : public hiopMatrix /* add to the diagonal of 'this' (destination) starting at 'start_on_dest_diag' elements of * 'd_' (source) starting at index 'start_on_src_vec'. The number of elements added is 'num_elems' * when num_elems>=0, or the remaining elems on 'd_' starting at 'start_on_src_vec'. */ - virtual void addSubDiagonal(int start_on_dest_diag, - const double& alpha, + virtual void addSubDiagonal(int start_on_dest_diag, + const double& alpha, const hiopVector& d_, - int start_on_src_vec, - int num_elems = -1) + int start_on_src_vec, + int num_elems = -1) { assert(false && "not needed / implemented"); } @@ -367,7 +367,7 @@ class HessianDiagPlusRowRank : public hiopMatrix /* this += alpha*X */ virtual void addMatrix(double alpah, const hiopMatrix& X) { assert(false && "not provided because it is not needed"); } - void addToSymDenseMatrixUpperTriangle(int row_start, int col_start, double alpha, hiopMatrixDense& W) const + void addToSymDenseMatrixUpperTriangle(int row_start, int col_start, double alpha, hiopMatrixDense& W) const { assert(false && "not needed; should not be used"); } From 4744bbc27a1b00d87136a0279d8e3d6ade99dad1 Mon Sep 17 00:00:00 2001 From: nychiang Date: Fri, 22 Nov 2024 12:28:33 -0800 Subject: [PATCH 3/3] apply changes to all the files --- .clang-format | 72 + src/Drivers/Dense/NlpDenseConsEx1.cpp | 214 +- src/Drivers/Dense/NlpDenseConsEx1.hpp | 169 +- src/Drivers/Dense/NlpDenseConsEx1Driver.cpp | 140 +- src/Drivers/Dense/NlpDenseConsEx2.cpp | 286 +- src/Drivers/Dense/NlpDenseConsEx2.hpp | 89 +- src/Drivers/Dense/NlpDenseConsEx2Driver.cpp | 117 +- src/Drivers/Dense/NlpDenseConsEx3.hpp | 263 +- src/Drivers/Dense/NlpDenseConsEx3Driver.cpp | 106 +- src/Drivers/Dense/NlpDenseConsEx4.cpp | 258 +- src/Drivers/Dense/NlpDenseConsEx4.hpp | 85 +- src/Drivers/Dense/NlpDenseConsEx4Driver.cpp | 60 +- src/Drivers/IpoptAdapter/IpoptAdapter.hpp | 946 +++--- .../IpoptAdapterDriverPriDecEx1.cpp | 57 +- .../IpoptAdapterDriverPriDecEx2.cpp | 56 +- .../IpoptAdapter/IpoptAdapter_driver.cpp | 74 +- src/Drivers/MDS/NlpMdsEx1.hpp | 720 ++--- src/Drivers/MDS/NlpMdsEx1Driver.cpp | 113 +- src/Drivers/MDS/NlpMdsEx1RajaDriver.cpp | 113 +- src/Drivers/MDS/NlpMdsEx2.hpp | 602 ++-- src/Drivers/MDS/NlpMdsEx2Driver.cpp | 223 +- src/Drivers/MDS/NlpMdsRajaEx1.cpp | 1012 +++---- src/Drivers/MDS/NlpMdsRajaEx1.hpp | 277 +- src/Drivers/MDS/hpc_benchmark.cpp | 90 +- src/Drivers/MDS/hpc_multisolves.cpp | 60 +- src/Drivers/PriDec/NlpPriDecEx1.cpp | 213 +- src/Drivers/PriDec/NlpPriDecEx1.hpp | 134 +- src/Drivers/PriDec/NlpPriDecEx1Driver.cpp | 142 +- src/Drivers/PriDec/NlpPriDecEx2.cpp | 162 +- src/Drivers/PriDec/NlpPriDecEx2.hpp | 55 +- src/Drivers/PriDec/NlpPriDecEx2Driver.cpp | 129 +- src/Drivers/PriDec/NlpPriDecEx2Sparse.cpp | 156 +- src/Drivers/PriDec/NlpPriDecEx2Sparse.hpp | 53 +- .../PriDec/NlpPriDecEx2SparseDriver.cpp | 134 +- src/Drivers/PriDec/NlpPriDecEx2SparseRaja.cpp | 167 +- src/Drivers/PriDec/NlpPriDecEx2SparseRaja.hpp | 53 +- .../PriDec/NlpPriDecEx2SparseRajaDriver.cpp | 137 +- .../PriDec/NlpPriDecEx2UserBasecase.hpp | 51 +- .../PriDec/NlpPriDecEx2UserRecourseMds.hpp | 449 ++- .../PriDec/NlpPriDecEx2UserRecourseSparse.hpp | 351 ++- .../NlpPriDecEx2UserRecourseSparseRaja.hpp | 618 ++-- src/Drivers/PriDec/NlpPriDecEx3Sparse.hpp | 437 +-- .../PriDec/NlpPriDecEx3SparseDriver.cpp | 60 +- src/Drivers/Sparse/NlpSparseEx1Driver.cpp | 149 +- src/Drivers/Sparse/NlpSparseEx2.cpp | 308 +- src/Drivers/Sparse/NlpSparseEx2.hpp | 25 +- src/Drivers/Sparse/NlpSparseEx2Driver.cpp | 175 +- src/Drivers/Sparse/NlpSparseEx3.cpp | 176 +- src/Drivers/Sparse/NlpSparseEx3.hpp | 25 +- src/Drivers/Sparse/NlpSparseEx3Driver.cpp | 102 +- src/Drivers/Sparse/NlpSparseEx4.cpp | 253 +- src/Drivers/Sparse/NlpSparseEx4.hpp | 51 +- src/Drivers/Sparse/NlpSparseEx4Driver.cpp | 149 +- src/Drivers/Sparse/NlpSparseRajaEx2.cpp | 486 ++- src/Drivers/Sparse/NlpSparseRajaEx2.hpp | 96 +- src/Drivers/Sparse/NlpSparseRajaEx2Driver.cpp | 158 +- src/ExecBackends/ExecPoliciesRajaCudaImpl.hpp | 82 +- src/ExecBackends/ExecPoliciesRajaHipImpl.hpp | 83 +- src/ExecBackends/ExecPoliciesRajaOmpImpl.hpp | 83 +- src/ExecBackends/ExecSpace.hpp | 230 +- src/ExecBackends/MemBackendCppImpl.hpp | 78 +- src/ExecBackends/MemBackendCudaImpl.hpp | 80 +- src/ExecBackends/MemBackendHipImpl.hpp | 83 +- src/ExecBackends/MemBackendUmpireImpl.hpp | 127 +- src/Interface/chiopInterface.cpp | 108 +- src/Interface/chiopInterface.hpp | 1012 ++++--- src/Interface/hiopInterface.hpp | 543 ++-- src/Interface/hiopInterfacePrimalDecomp.cpp | 195 +- src/Interface/hiopInterfacePrimalDecomp.hpp | 108 +- src/Interface/hiopVersion.hpp | 21 +- src/LinAlg/LinAlgFactory.cpp | 409 +-- src/LinAlg/LinAlgFactory.hpp | 84 +- src/LinAlg/MatrixSparseCsrCudaKernels.hpp | 40 +- src/LinAlg/ReSolve/IterativeRefinement.cpp | 1242 ++++---- src/LinAlg/ReSolve/IterativeRefinement.hpp | 132 +- src/LinAlg/ReSolve/MatrixCsr.cpp | 154 +- src/LinAlg/ReSolve/MatrixCsr.hpp | 65 +- src/LinAlg/ReSolve/RefactorizationSolver.cpp | 1318 ++++----- src/LinAlg/ReSolve/RefactorizationSolver.hpp | 159 +- src/LinAlg/ReSolve/resolve_cusolver_defs.hpp | 151 +- src/LinAlg/VectorCudaKernels.hpp | 203 +- src/LinAlg/VectorHipKernels.cpp | 832 +++--- src/LinAlg/VectorHipKernels.hpp | 203 +- src/LinAlg/hiopKrylovSolver.cpp | 337 ++- src/LinAlg/hiopKrylovSolver.hpp | 60 +- src/LinAlg/hiopLinSolver.cpp | 121 +- src/LinAlg/hiopLinSolver.hpp | 74 +- src/LinAlg/hiopLinSolverCholCuSparse.cpp | 299 +- src/LinAlg/hiopLinSolverCholCuSparse.hpp | 34 +- src/LinAlg/hiopLinSolverMA86Z.cpp | 314 +- src/LinAlg/hiopLinSolverMA86Z.hpp | 136 +- src/LinAlg/hiopLinSolverSparseGinkgo.cpp | 500 ++-- src/LinAlg/hiopLinSolverSparseGinkgo.hpp | 20 +- src/LinAlg/hiopLinSolverSparsePARDISO.cpp | 981 +++--- src/LinAlg/hiopLinSolverSparsePARDISO.hpp | 160 +- src/LinAlg/hiopLinSolverSparseReSolve.cpp | 841 +++--- src/LinAlg/hiopLinSolverSparseReSolve.hpp | 44 +- src/LinAlg/hiopLinSolverSparseSTRUMPACK.cpp | 600 ++-- src/LinAlg/hiopLinSolverSparseSTRUMPACK.hpp | 75 +- src/LinAlg/hiopLinSolverSymDenseLapack.hpp | 199 +- src/LinAlg/hiopLinSolverSymDenseMagma.cpp | 664 ++--- src/LinAlg/hiopLinSolverSymDenseMagma.hpp | 89 +- src/LinAlg/hiopLinSolverSymSparseMA57.cpp | 474 ++- src/LinAlg/hiopLinSolverSymSparseMA57.hpp | 297 +- src/LinAlg/hiopLinSolverUMFPACKZ.cpp | 501 ++-- src/LinAlg/hiopLinSolverUMFPACKZ.hpp | 140 +- src/LinAlg/hiopLinearOperator.cpp | 62 +- src/LinAlg/hiopLinearOperator.hpp | 23 +- src/LinAlg/hiopMatrix.hpp | 165 +- src/LinAlg/hiopMatrixComplexDense.cpp | 570 ++-- src/LinAlg/hiopMatrixComplexDense.hpp | 478 ++- src/LinAlg/hiopMatrixComplexSparseTriplet.cpp | 886 +++--- src/LinAlg/hiopMatrixComplexSparseTriplet.hpp | 436 ++- src/LinAlg/hiopMatrixDense.hpp | 316 +- src/LinAlg/hiopMatrixDenseRaja.hpp | 204 +- src/LinAlg/hiopMatrixDenseRajaCuda.cpp | 62 +- src/LinAlg/hiopMatrixDenseRajaHip.cpp | 62 +- src/LinAlg/hiopMatrixDenseRajaImpl.hpp | 1028 ++++--- src/LinAlg/hiopMatrixDenseRajaOmp.cpp | 63 +- src/LinAlg/hiopMatrixDenseRowMajor.cpp | 867 +++--- src/LinAlg/hiopMatrixDenseRowMajor.hpp | 196 +- src/LinAlg/hiopMatrixMDS.cpp | 9 +- src/LinAlg/hiopMatrixMDS.hpp | 263 +- src/LinAlg/hiopMatrixRajaSparseTriplet.hpp | 291 +- .../hiopMatrixRajaSparseTripletCuda.cpp | 62 +- src/LinAlg/hiopMatrixRajaSparseTripletHip.cpp | 62 +- .../hiopMatrixRajaSparseTripletImpl.hpp | 1933 ++++++------ src/LinAlg/hiopMatrixRajaSparseTripletOmp.cpp | 62 +- src/LinAlg/hiopMatrixSparse.hpp | 251 +- src/LinAlg/hiopMatrixSparseCSR.hpp | 134 +- src/LinAlg/hiopMatrixSparseCSRSeq.cpp | 1000 +++---- src/LinAlg/hiopMatrixSparseCSRSeq.hpp | 301 +- src/LinAlg/hiopMatrixSparseCsrCuda.cpp | 508 ++-- src/LinAlg/hiopMatrixSparseCsrCuda.hpp | 331 +-- src/LinAlg/hiopMatrixSparseTriplet.cpp | 971 +++--- src/LinAlg/hiopMatrixSparseTriplet.hpp | 284 +- src/LinAlg/hiopMatrixSparseTripletStorage.cpp | 1 - src/LinAlg/hiopMatrixSparseTripletStorage.hpp | 237 +- src/LinAlg/hiopVector.hpp | 202 +- src/LinAlg/hiopVectorCompoundPD.cpp | 374 ++- src/LinAlg/hiopVectorCompoundPD.hpp | 198 +- src/LinAlg/hiopVectorCuda.cpp | 496 ++-- src/LinAlg/hiopVectorCuda.hpp | 171 +- src/LinAlg/hiopVectorHip.cpp | 539 ++-- src/LinAlg/hiopVectorHip.hpp | 169 +- src/LinAlg/hiopVectorInt.hpp | 87 +- src/LinAlg/hiopVectorIntCompoundPD.cpp | 88 +- src/LinAlg/hiopVectorIntCompoundPD.hpp | 105 +- src/LinAlg/hiopVectorIntCuda.cpp | 71 +- src/LinAlg/hiopVectorIntCuda.hpp | 84 +- src/LinAlg/hiopVectorIntHip.cpp | 71 +- src/LinAlg/hiopVectorIntHip.hpp | 82 +- src/LinAlg/hiopVectorIntRaja.hpp | 93 +- src/LinAlg/hiopVectorIntRajaCuda.cpp | 65 +- src/LinAlg/hiopVectorIntRajaHip.cpp | 65 +- src/LinAlg/hiopVectorIntRajaImpl.hpp | 87 +- src/LinAlg/hiopVectorIntRajaOmp.cpp | 67 +- src/LinAlg/hiopVectorIntSeq.cpp | 81 +- src/LinAlg/hiopVectorIntSeq.hpp | 81 +- src/LinAlg/hiopVectorPar.cpp | 1007 +++---- src/LinAlg/hiopVectorPar.hpp | 182 +- src/LinAlg/hiopVectorRaja.hpp | 200 +- src/LinAlg/hiopVectorRajaCuda.cpp | 13 +- src/LinAlg/hiopVectorRajaHip.cpp | 13 +- src/LinAlg/hiopVectorRajaImpl.hpp | 1544 +++++----- src/LinAlg/hiopVectorRajaOmp.cpp | 17 +- src/LinAlg/hiop_blasdefs.hpp | 210 +- src/LinAlg/test_hiopLinalgComplex.cpp | 148 +- src/Optimization/KktLinSysLowRank.cpp | 236 +- src/Optimization/KktLinSysLowRank.hpp | 49 +- src/Optimization/hiopAlgFilterIPM.cpp | 1922 ++++++------ src/Optimization/hiopAlgFilterIPM.hpp | 233 +- src/Optimization/hiopAlgPrimalDecomp.cpp | 2625 ++++++++--------- src/Optimization/hiopAlgPrimalDecomp.hpp | 259 +- src/Optimization/hiopDualsUpdater.cpp | 442 ++- src/Optimization/hiopDualsUpdater.hpp | 210 +- src/Optimization/hiopFRProb.cpp | 622 ++-- src/Optimization/hiopFRProb.hpp | 116 +- src/Optimization/hiopFactAcceptor.cpp | 108 +- src/Optimization/hiopFactAcceptor.hpp | 83 +- src/Optimization/hiopFilter.cpp | 71 +- src/Optimization/hiopFilter.hpp | 96 +- src/Optimization/hiopIterate.cpp | 374 +-- src/Optimization/hiopIterate.hpp | 110 +- src/Optimization/hiopKKTLinSys.cpp | 815 ++--- src/Optimization/hiopKKTLinSys.hpp | 309 +- src/Optimization/hiopKKTLinSysDense.hpp | 361 +-- src/Optimization/hiopKKTLinSysMDS.cpp | 824 +++--- src/Optimization/hiopKKTLinSysMDS.hpp | 112 +- src/Optimization/hiopKKTLinSysSparse.cpp | 2019 +++++++------ src/Optimization/hiopKKTLinSysSparse.hpp | 90 +- .../hiopKKTLinSysSparseCondensed.cpp | 245 +- .../hiopKKTLinSysSparseCondensed.hpp | 59 +- .../hiopKKTLinSysSparseNormalEqn.cpp | 214 +- .../hiopKKTLinSysSparseNormalEqn.hpp | 35 +- src/Optimization/hiopLogBarProblem.hpp | 148 +- src/Optimization/hiopNlpFormulation.cpp | 1191 ++++---- src/Optimization/hiopNlpFormulation.hpp | 539 ++-- src/Optimization/hiopNlpTransforms.cpp | 382 +-- src/Optimization/hiopNlpTransforms.hpp | 498 ++-- src/Optimization/hiopPDPerturbation.cpp | 1110 ++++--- src/Optimization/hiopPDPerturbation.hpp | 215 +- src/Optimization/hiopResidual.cpp | 525 ++-- src/Optimization/hiopResidual.hpp | 121 +- src/Utils/MathKernelsCuda.hpp | 24 +- src/Utils/MathKernelsHip.cpp | 80 +- src/Utils/MathKernelsHip.hpp | 24 +- src/Utils/MathKernelsHost.cpp | 11 +- src/Utils/MathKernelsHost.hpp | 12 +- src/Utils/RajaUmpireUtils.cpp | 67 +- src/Utils/RajaUmpireUtils.hpp | 83 +- src/Utils/SidreHelper.hpp | 165 +- src/Utils/hiopCSR_IO.hpp | 404 ++- src/Utils/hiopCppStdUtils.hpp | 140 +- src/Utils/hiopKronReduction.cpp | 217 +- src/Utils/hiopKronReduction.hpp | 135 +- src/Utils/hiopLogger.cpp | 152 +- src/Utils/hiopLogger.hpp | 122 +- src/Utils/hiopMPI.hpp | 82 +- src/Utils/hiopOptions.cpp | 765 +++-- src/Utils/hiopOptions.hpp | 180 +- src/Utils/hiopRunStats.hpp | 249 +- src/Utils/hiopTimer.hpp | 94 +- src/_Internals/hsl_mc69z.hpp | 1 - tests/LinAlg/matrixTestsDense.hpp | 647 ++-- tests/LinAlg/matrixTestsDenseRowMajor.cpp | 127 +- tests/LinAlg/matrixTestsDenseRowMajor.hpp | 48 +- tests/LinAlg/matrixTestsRajaDense.cpp | 154 +- tests/LinAlg/matrixTestsRajaDense.hpp | 48 +- tests/LinAlg/matrixTestsRajaSparseTriplet.cpp | 191 +- tests/LinAlg/matrixTestsRajaSparseTriplet.hpp | 53 +- .../matrixTestsRajaSymSparseTriplet.cpp | 110 +- .../matrixTestsRajaSymSparseTriplet.hpp | 32 +- tests/LinAlg/matrixTestsSparse.hpp | 1282 ++++---- tests/LinAlg/matrixTestsSparseTriplet.cpp | 152 +- tests/LinAlg/matrixTestsSparseTriplet.hpp | 45 +- tests/LinAlg/matrixTestsSymSparse.hpp | 205 +- tests/LinAlg/matrixTestsSymSparseTriplet.cpp | 82 +- tests/LinAlg/matrixTestsSymSparseTriplet.hpp | 32 +- tests/LinAlg/testBase.hpp | 66 +- tests/LinAlg/vectorTests.hpp | 912 +++--- tests/LinAlg/vectorTestsCuda.cpp | 62 +- tests/LinAlg/vectorTestsCuda.hpp | 14 +- tests/LinAlg/vectorTestsHip.cpp | 62 +- tests/LinAlg/vectorTestsHip.hpp | 14 +- tests/LinAlg/vectorTestsInt.hpp | 39 +- tests/LinAlg/vectorTestsIntRaja.cpp | 16 +- tests/LinAlg/vectorTestsIntRaja.hpp | 13 +- tests/LinAlg/vectorTestsIntSeq.cpp | 8 +- tests/LinAlg/vectorTestsIntSeq.hpp | 12 +- tests/LinAlg/vectorTestsPar.cpp | 45 +- tests/LinAlg/vectorTestsPar.hpp | 12 +- tests/LinAlg/vectorTestsRajaPar.cpp | 66 +- tests/LinAlg/vectorTestsRajaPar.hpp | 7 +- tests/testMatrixDense.cpp | 98 +- tests/testMatrixSparse.cpp | 251 +- tests/testMatrixSymSparse.cpp | 67 +- tests/testVector.cpp | 93 +- tests/test_bicgstab.cpp | 115 +- tests/test_pcg.cpp | 128 +- 260 files changed, 34540 insertions(+), 36512 deletions(-) create mode 100644 .clang-format diff --git a/.clang-format b/.clang-format new file mode 100644 index 000000000..2b06a23eb --- /dev/null +++ b/.clang-format @@ -0,0 +1,72 @@ +# Use defaults from Google style +BasedOnStyle: Google + +# General formatting settings +IndentWidth: 2 # Use 2 spaces per indentation level (default in Google style) +ColumnLimit: 125 # Maximum number of characters per line +AccessModifierOffset: -2 # Indent access modifiers (e.g., `public`, `private`) by -2 spaces + +# Alignment settings +AlignAfterOpenBracket: Align # Align after open bracket (for function parameters, etc.) +AlignConsecutiveAssignments: None # Do not align consecutive assignments +AlignConsecutiveDeclarations: None # Do not align consecutive declarations +AlignConsecutiveMacros: None # Do not align consecutive macros + +# Space formatting settings +SpaceBeforeParens: Never # No space before parentheses (e.g., function calls) +SpaceBeforeAssignmentOperators: true # Add a space before assignment operators (e.g., `a = b`) +SpaceBeforeCaseColon: false # No space before case colons in switch statements +SpaceBeforeCpp11BracedList: false # No space before opening brace in C++11 list initialization +SpaceBeforeRangeBasedForLoopColon: false # No space before colon in range-based for loops +SpaceInEmptyBlock: false # Do not add space inside empty blocks +SpaceAfterLogicalNot: false # No space after logical negation (`!`) +SpaceAfterTemplateKeyword: false # No space after the `template` keyword in template definitions +SpacesInContainerLiterals: false # No space inside container literals (e.g., `std::vector{1,2,3}`) + +# Pointer alignment settings +PointerAlignment: Left # Align pointers to the left (e.g., `int* ptr` instead of `int *ptr`) +DerivePointerAlignment: true # Derive pointer alignment from surrounding code + +# Bracing and block formatting settings +BreakBeforeBraces: Custom # Use custom brace wrapping settings (defined below) +BraceWrapping: + AfterCaseLabel: false # No new line after case labels + AfterClass: true # Place braces on a new line after class declarations + AfterControlStatement: false # No new line after control statements (e.g., if, while) + AfterEnum: true # Place braces on a new line after enum declarations + AfterFunction: true # Place braces on a new line after function declarations + AfterNamespace: true # Place braces on a new line after namespace declarations + AfterStruct: true # Place braces on a new line after struct declarations + AfterUnion: true # Place braces on a new line after union declarations + AfterExternBlock: false # No new line after extern blocks + BeforeCatch: false # No new line before `catch` statements + BeforeElse: false # No new line before `else` statements + BeforeLambdaBody: false # No new line before lambda body + BeforeWhile: false # No new line before `while` statements + IndentBraces: false # Keep braces aligned with surrounding code (do not indent them) + SplitEmptyFunction: false # Do not split empty functions onto separate lines + SplitEmptyRecord: false # Do not split empty structs/classes onto separate lines + SplitEmptyNamespace: false # Do not split empty namespaces onto separate lines +BreakTemplateDeclarations: Yes # Break template declarations onto separate lines if needed + +# Constructor and initializer formatting +BreakConstructorInitializers: BeforeColon # Place a break before the colon in constructor initializers +PackConstructorInitializers: Never # Do not pack constructor initializers onto a single line + +# Function and parameter formatting +AllowAllArgumentsOnNextLine: false # Do not allow function arguments to go to the next line +AllowAllParametersOfDeclarationOnNextLine: false # Do not allow parameters of function declarations to go to the next line +BinPackArguments: false # Do not pack function arguments onto a single line +BinPackParameters: false # Do not pack function parameters onto a single line + +# Sorting and other settings +SortIncludes: Never # Do not sort `#include` statements + +# Allowing short forms (this section is currently commented out) +#AllowShortBlocksOnASingleLine: Never # Never allow short blocks to be on a single line +#AllowShortCaseExpressionOnASingleLine: false # Do not allow case expressions on a single line +#AllowShortCaseLabelsOnASingleLine: false # Do not allow case labels on a single line +#AllowShortEnumsOnASingleLine: false # Do not allow enums on a single line +#AllowShortFunctionsOnASingleLine: Empty # Allow short functions to be on a single line, if empty +#AllowShortIfStatementsOnASingleLine: Never # Never allow if statements to be on a single line + diff --git a/src/Drivers/Dense/NlpDenseConsEx1.cpp b/src/Drivers/Dense/NlpDenseConsEx1.cpp index cd0a31676..ea5fcdfda 100644 --- a/src/Drivers/Dense/NlpDenseConsEx1.cpp +++ b/src/Drivers/Dense/NlpDenseConsEx1.cpp @@ -16,41 +16,54 @@ using namespace hiop; Ex1Meshing1D::Ex1Meshing1D(double a, double b, size_type glob_n, double r, MPI_Comm comm_) { - _a=a; _b=b; _r=r; - comm=comm_; - comm_size=1; my_rank=0; + _a = a; + _b = b; + _r = r; + comm = comm_; + comm_size = 1; + my_rank = 0; #ifdef HIOP_USE_MPI - int ierr = MPI_Comm_size(comm, &comm_size); assert(MPI_SUCCESS==ierr); - ierr = MPI_Comm_rank(comm, &my_rank); assert(MPI_SUCCESS==ierr); + int ierr = MPI_Comm_size(comm, &comm_size); + assert(MPI_SUCCESS == ierr); + ierr = MPI_Comm_rank(comm, &my_rank); + assert(MPI_SUCCESS == ierr); #endif // set up vector distribution for primal variables - easier to store it as a member in this simple example - col_partition = new index_type[comm_size+1]; - size_type quotient=glob_n/comm_size, remainder=glob_n-comm_size*quotient; - - int i=0; col_partition[i]=0; i++; - while(i<=remainder) { col_partition[i] = col_partition[i-1]+quotient+1; i++; } - while(i<=comm_size) { col_partition[i] = col_partition[i-1]+quotient; i++; } + col_partition = new index_type[comm_size + 1]; + size_type quotient = glob_n / comm_size, remainder = glob_n - comm_size * quotient; + + int i = 0; + col_partition[i] = 0; + i++; + while(i <= remainder) { + col_partition[i] = col_partition[i - 1] + quotient + 1; + i++; + } + while(i <= comm_size) { + col_partition[i] = col_partition[i - 1] + quotient; + i++; + } _mass = LinearAlgebraFactory::create_vector("DEFAULT", glob_n, col_partition, comm); - //if(my_rank==0) printf("reminder=%d quotient=%d\n", remainder, quotient); - //printf("left=%d right=%d\n", col_partition[my_rank], col_partition[my_rank+1]); + // if(my_rank==0) printf("reminder=%d quotient=%d\n", remainder, quotient); + // printf("left=%d right=%d\n", col_partition[my_rank], col_partition[my_rank+1]); - //compute the mass - double m1=2*_r / ((1+_r)*glob_n); - double h =2*(1-_r) / (1+_r) / (glob_n-1) / glob_n; + // compute the mass + double m1 = 2 * _r / ((1 + _r) * glob_n); + double h = 2 * (1 - _r) / (1 + _r) / (glob_n - 1) / glob_n; - size_type glob_n_start=col_partition[my_rank], glob_n_end=col_partition[my_rank+1]-1; + size_type glob_n_start = col_partition[my_rank], glob_n_end = col_partition[my_rank + 1] - 1; - double* mass = _mass->local_data(); //local slice - double rescale = _b-_a; - for(size_type k=glob_n_start; k<=glob_n_end; k++) { - mass[k-glob_n_start] = (m1 + (k-glob_n_start)*h) * rescale; - //printf(" proc %d k=%d mass[k]=%g\n", my_rank, k, mass[k-glob_n_start]); + double* mass = _mass->local_data(); // local slice + double rescale = _b - _a; + for(size_type k = glob_n_start; k <= glob_n_end; k++) { + mass[k - glob_n_start] = (m1 + (k - glob_n_start) * h) * rescale; + // printf(" proc %d k=%d mass[k]=%g\n", my_rank, k, mass[k-glob_n_start]); } //_mass->print(stdout, NULL); - //fflush(stdout); + // fflush(stdout); } Ex1Meshing1D::~Ex1Meshing1D() { @@ -60,81 +73,75 @@ Ex1Meshing1D::~Ex1Meshing1D() bool Ex1Meshing1D::get_vecdistrib_info(size_type global_n, index_type* cols) { - for(int i=0; i<=comm_size; i++) cols[i] = col_partition[i]; + for(int i = 0; i <= comm_size; i++) cols[i] = col_partition[i]; return true; } -void Ex1Meshing1D::applyM(DiscretizedFunction& f) -{ - f.componentMult(*this->_mass); -} +void Ex1Meshing1D::applyM(DiscretizedFunction& f) { f.componentMult(*this->_mass); } -//converts the local indexes to global indexes +// converts the local indexes to global indexes index_type Ex1Meshing1D::getGlobalIndex(index_type i_local) const { - assert(0<=i_local); - assert(i_local < col_partition[my_rank+1]-col_partition[my_rank]); + assert(0 <= i_local); + assert(i_local < col_partition[my_rank + 1] - col_partition[my_rank]); - return i_local+col_partition[my_rank]; + return i_local + col_partition[my_rank]; } index_type Ex1Meshing1D::getLocalIndex(index_type i_global) const { assert(i_global >= col_partition[my_rank]); - assert(i_global < col_partition[my_rank+1]); - return i_global-col_partition[my_rank]; + assert(i_global < col_partition[my_rank + 1]); + return i_global - col_partition[my_rank]; } -//for a function c(t), for given global index in the discretization -// returns the corresponding continuous argument 't', which is in this -// case the middle of the discretization interval. +// for a function c(t), for given global index in the discretization +// returns the corresponding continuous argument 't', which is in this +// case the middle of the discretization interval. double Ex1Meshing1D::getFunctionArgument(index_type i_global) const { assert(i_global >= col_partition[my_rank]); - assert(i_global < col_partition[my_rank+1]); + assert(i_global < col_partition[my_rank + 1]); - const index_type & k = i_global; + const index_type& k = i_global; size_type glob_n = size(); - double m1=2*_r / ((1+_r)*glob_n); - double h =2*(1-_r) / (1+_r) / (glob_n-1) / glob_n; + double m1 = 2 * _r / ((1 + _r) * glob_n); + double h = 2 * (1 - _r) / (1 + _r) / (glob_n - 1) / glob_n; - //t is the middle of [k*m1 + k(k-1)/2*h, (k+1)m1+ (k+1)k/2*h] - double t = 0.5*( (2*k+1)*m1 + k*k*h); + // t is the middle of [k*m1 + k(k-1)/2*h, (k+1)m1+ (k+1)k/2*h] + double t = 0.5 * ((2 * k + 1) * m1 + k * k * h); return t; } - - /* DiscretizedFunction implementation */ DiscretizedFunction::DiscretizedFunction(Ex1Meshing1D* meshing) - : hiopVectorPar(meshing->size(), meshing->get_col_partition(), meshing->get_comm()) + : hiopVectorPar(meshing->size(), meshing->get_col_partition(), meshing->get_comm()) { _mesh = meshing; } // u'*v = u'*M*v, where u is 'this' -double DiscretizedFunction::dotProductWith( const hiopVector& v_ ) const +double DiscretizedFunction::dotProductWith(const hiopVector& v_) const { auto discretizedFunction(dynamic_cast(&v_)); - if (discretizedFunction) { + if(discretizedFunction) { assert(discretizedFunction->_mesh->matches(this->_mesh)); - double* M=_mesh->_mass->local_data(); - double* u= this->data_; - double* v= discretizedFunction->data_; - - double dot=0.; - for(int i=0; i_mass->local_data(); + double* u = this->data_; + double* v = discretizedFunction->data_; + + double dot = 0.; + for(int i = 0; i < get_local_size(); i++) dot += u[i] * M[i] * v[i]; + +#ifdef HIOP_USE_MPI double dotprodG; - int ierr = MPI_Allreduce(&dot, &dotprodG, 1, MPI_DOUBLE, MPI_SUM, comm_); assert(MPI_SUCCESS==ierr); - dot=dotprodG; + int ierr = MPI_Allreduce(&dot, &dotprodG, 1, MPI_DOUBLE, MPI_SUM, comm_); + assert(MPI_SUCCESS == ierr); + dot = dotprodG; #endif return dot; - } - else { + } else { return hiopVectorPar::dotProductWith(v_); } } @@ -142,48 +149,41 @@ double DiscretizedFunction::dotProductWith( const hiopVector& v_ ) const // computes integral of 'this', that is sum (this[elem]*m[elem]) double DiscretizedFunction::integral() const { - //the base dotProductWith method would do it + // the base dotProductWith method would do it return hiopVectorPar::dotProductWith(*_mesh->_mass); } // norm(u) as sum(M[elem]*u[elem]^2) -double DiscretizedFunction::twonorm() const +double DiscretizedFunction::twonorm() const { - double* M=_mesh->_mass->local_data(); - double* u= this->data_; + double* M = _mesh->_mass->local_data(); + double* u = this->data_; - double nrm_square=0.; - for(int i=0; igetGlobalIndex(i_local); } -//converts the local indexes to global indexes -index_type DiscretizedFunction::getGlobalIndex(index_type i_local) const -{ - return _mesh->getGlobalIndex(i_local); -} +// for a function c(t), for given global index in the discretization +// returns the corresponding continuous argument 't', which is in this +// case the middle of the discretization interval. +double DiscretizedFunction::getFunctionArgument(index_type i_global) const { return _mesh->getFunctionArgument(i_global); } -//for a function c(t), for given global index in the discretization -// returns the corresponding continuous argument 't', which is in this -// case the middle of the discretization interval. -double DiscretizedFunction::getFunctionArgument(index_type i_global) const -{ - return _mesh->getFunctionArgument(i_global); -} - -//set the function value for a given global index +// set the function value for a given global index void DiscretizedFunction::setFunctionValue(index_type i_global, const double& value) { - index_type i_local=_mesh->getLocalIndex(i_global); - this->data_[i_local]=value; + index_type i_local = _mesh->getLocalIndex(i_global); + this->data_[i_local] = value; } /* DenseConsEx1 class implementation */ @@ -209,27 +209,27 @@ bool DenseConsEx1::iterate_callback(int iter, int ls_trials) { #ifdef HIOP_USE_AXOM - //save state to sidre::Group every 5 iterations if a solver/algorithm object was provided - if(iter > 0 && (iter % 5 == 0) && nullptr!=solver_) { - // - //Example of how to save HiOp state to axom::sidre::Group + // save state to sidre::Group every 5 iterations if a solver/algorithm object was provided + if(iter > 0 && (iter % 5 == 0) && nullptr != solver_) { + // + // Example of how to save HiOp state to axom::sidre::Group // - //We first manufacture a Group. User code supposedly already has one. + // We first manufacture a Group. User code supposedly already has one. sidre::DataStore ds; sidre::Group* group = ds.getRoot()->createGroup("HiOp quasi-Newton alg state"); - //the actual saving of state to group + // the actual saving of state to group try { solver_->save_state_to_sidre_group(*group); } catch(std::runtime_error& e) { - //user chooses action when an error occured in saving the state... - //we choose to stop HiOp + // user chooses action when an error occured in saving the state... + // we choose to stop HiOp return false; } - //User code can further inspect the Group or add addtl info to DataStore, with the end goal - //of saving it to file before HiOp starts next iteration. Here we just save it. + // User code can further inspect the Group or add addtl info to DataStore, with the end goal + // of saving it to file before HiOp starts next iteration. Here we just save it. sidre::IOManager writer(comm); int n_files; MPI_Comm_size(comm, &n_files); @@ -239,22 +239,24 @@ bool DenseConsEx1::iterate_callback(int iter, return true; } -/*set c to +/*set c to * c(t) = 1-10*t, for 0<=t<=1/10, * 0, for 1/10<=t<=1. */ void DenseConsEx1::set_c() { - for(int i_local=0; i_localgetGlobalIndex(i_local); + for(int i_local = 0; i_local < n_local; i_local++) { + // this will be based on 'my_rank', thus, different ranks get the appropriate global indexes + size_type n_global = c->getGlobalIndex(i_local); double t = c->getFunctionArgument(n_global); - //if(t<=0.1) c->setFunctionValue(n_global, 1-10.*t); + // if(t<=0.1) c->setFunctionValue(n_global, 1-10.*t); double cval; - if(t<=0.1) cval = -1.+10.*t; - else cval = 0.; + if(t <= 0.1) + cval = -1. + 10. * t; + else + cval = 0.; c->setFunctionValue(n_global, cval); - //printf("index %d t=%g value %g\n", n_global, t, cval); - } + // printf("index %d t=%g value %g\n", n_global, t, cval); + } } diff --git a/src/Drivers/Dense/NlpDenseConsEx1.hpp b/src/Drivers/Dense/NlpDenseConsEx1.hpp index 967255f04..33a3178d7 100644 --- a/src/Drivers/Dense/NlpDenseConsEx1.hpp +++ b/src/Drivers/Dense/NlpDenseConsEx1.hpp @@ -1,5 +1,5 @@ #ifndef HIOP_EXAMPLE_DENSE_EX1 -#define HIOP_EXAMPLE_DENSE_EX1 +#define HIOP_EXAMPLE_DENSE_EX1 #include "hiopVector.hpp" #include "LinAlgFactory.hpp" @@ -20,15 +20,15 @@ /* Example 1: a simple infinite-dimensional QP in the optimiz. function variable x:[0,1]->R * min sum +1/2* - * s.t. + * s.t. * integral(x:[0,1]) = 0.5 * 0.1 <= x(t) <= 1.0, for all t in [0,1]. * * Here c(t) = 1-t*10, for 0<=t<=1/10, * 0, for 1/10<=t<=1. * The inner products are L2. - * - * We generate "distorted" meshes for [0,1] having the ratio of the smalest element and + * + * We generate "distorted" meshes for [0,1] having the ratio of the smalest element and * the largest element given by r. The mesh is such that the consecutive elements * increase by h * [t_0,t_1], [t_1, t_2], [t_2, t_3], ..., [t_{n-1}, t_n] (t_0=0, t_n=1) @@ -45,23 +45,23 @@ using size_type = hiop::size_type; using index_type = hiop::index_type; /* our (admitedly weird) 1D distorted meshing */ -class Ex1Meshing1D +class Ex1Meshing1D { public: - Ex1Meshing1D(double a, double b, size_type glob_n, double r=1.0, MPI_Comm comm=MPI_COMM_WORLD); + Ex1Meshing1D(double a, double b, size_type glob_n, double r = 1.0, MPI_Comm comm = MPI_COMM_WORLD); virtual ~Ex1Meshing1D(); - virtual bool matches(Ex1Meshing1D* other) { return this==other; } + virtual bool matches(Ex1Meshing1D* other) { return this == other; } virtual size_type size() const { return _mass->get_size(); } - virtual size_type local_size() const { return col_partition[my_rank+1]-col_partition[my_rank]; } + virtual size_type local_size() const { return col_partition[my_rank + 1] - col_partition[my_rank]; } /* the following methods are mostly for educational purposes and may not be optimized */ - //converts the local indexes to global indexes + // converts the local indexes to global indexes index_type getGlobalIndex(index_type i_local) const; - //given a global index, returns the local index + // given a global index, returns the local index index_type getLocalIndex(index_type i_global) const; - //for a function c(t), for given global index in the discretization - // returns the corresponding continuous argument 't', which is in this - // case the middle of the discretization interval. + // for a function c(t), for given global index in the discretization + // returns the corresponding continuous argument 't', which is in this + // case the middle of the discretization interval. double getFunctionArgument(index_type i_global) const; virtual bool get_vecdistrib_info(size_type global_n, index_type* cols); @@ -69,19 +69,19 @@ class Ex1Meshing1D MPI_Comm get_comm() const { return comm; } virtual void applyM(DiscretizedFunction& f); -protected: - hiop::hiopVector* _mass; //the length or the mass of the elements - double _a,_b; //end points - double _r; //distortion ratio +protected: + hiop::hiopVector* _mass; // the length or the mass of the elements + double _a, _b; // end points + double _r; // distortion ratio MPI_Comm comm; int my_rank, comm_size; index_type* col_partition; - + friend class DiscretizedFunction; -private: +private: Ex1Meshing1D(const Ex1Meshing1D& other) { assert(false); } Ex1Meshing1D() { assert(false); } }; @@ -90,113 +90,128 @@ class DiscretizedFunction : public hiop::hiopVectorPar { public: DiscretizedFunction(Ex1Meshing1D* meshing); - - double dotProductWith( const hiopVector& v ) const override; + + double dotProductWith(const hiopVector& v) const override; virtual double integral() const; double twonorm() const override; /* the following methods are mostly for educational purposes and may not be optimized */ - //converts the local indexes to global indexes + // converts the local indexes to global indexes index_type getGlobalIndex(index_type i_local) const; - //for a function c(t), for given global index in the discretization - // returns the corresponding continuous argument 't', which is in this - // case the middle of the discretization interval. + // for a function c(t), for given global index in the discretization + // returns the corresponding continuous argument 't', which is in this + // case the middle of the discretization interval. double getFunctionArgument(index_type i_global) const; - //set the function value for a given global index + // set the function value for a given global index void setFunctionValue(index_type i_global, const double& value); + protected: Ex1Meshing1D* _mesh; }; class DenseConsEx1 : public hiop::hiopInterfaceDenseConstraints { -public: - DenseConsEx1(int n_mesh_elem=100, double mesh_ratio=1.0) - : n_vars(n_mesh_elem), - comm(MPI_COMM_WORLD), - solver_(nullptr) +public: + DenseConsEx1(int n_mesh_elem = 100, double mesh_ratio = 1.0) + : n_vars(n_mesh_elem), + comm(MPI_COMM_WORLD), + solver_(nullptr) { - //create the members - _mesh = new Ex1Meshing1D(0.0,1.0, n_vars, mesh_ratio, comm); - c = new DiscretizedFunction(_mesh); - x = new DiscretizedFunction(_mesh); + // create the members + _mesh = new Ex1Meshing1D(0.0, 1.0, n_vars, mesh_ratio, comm); + c = new DiscretizedFunction(_mesh); + x = new DiscretizedFunction(_mesh); //_aux=new DiscretizedFunction(_mesh); // used as a auxiliary variable n_local = _mesh->local_size(); - set_c(); - + set_c(); } virtual ~DenseConsEx1() { delete c; delete x; - //delete _aux; + // delete _aux; delete _mesh; } bool get_prob_sizes(size_type& n, size_type& m) - { n=n_vars; m=1; return true; } + { + n = n_vars; + m = 1; + return true; + } - bool get_vars_info(const size_type& n, double *xlow, double* xupp, NonlinearityType* type) + bool get_vars_info(const size_type& n, double* xlow, double* xupp, NonlinearityType* type) { - for(int i_local=0; i_localcopyFrom(x_in); - obj_value = c->dotProductWith(*x); + obj_value = c->dotProductWith(*x); double xnrm = x->twonorm(); - //printf("c'x=%g xnrm_sq=%g\n", obj_value, xnrm*xnrm); - obj_value += 0.5 * xnrm*xnrm; + // printf("c'x=%g xnrm_sq=%g\n", obj_value, xnrm*xnrm); + obj_value += 0.5 * xnrm * xnrm; return true; } bool eval_grad_f(const size_type& n, const double* x_in, bool new_x, double* gradf) { - //gradf = m.*(x + c) - //use x as auxiliary variable + // gradf = m.*(x + c) + // use x as auxiliary variable x->copyFrom(x_in); x->axpy(1.0, *c); _mesh->applyM(*x); x->copyTo(gradf); - //x->copyFrom(x_in); - //x->print(stdout); + // x->copyFrom(x_in); + // x->print(stdout); return true; } /** Sum(x[i])<=10 and sum(x[i])>= 1 (we pretend are different) */ - bool eval_cons(const size_type& n, - const size_type& m, - const size_type& num_cons, const index_type* idx_cons, - const double* x_in, bool new_x, double* cons) + bool eval_cons(const size_type& n, + const size_type& m, + const size_type& num_cons, + const index_type* idx_cons, + const double* x_in, + bool new_x, + double* cons) { - assert(n==n_vars); - if(0==num_cons) return true; //this may happen when Hiop asks for inequalities, which we don't have in this example + assert(n == n_vars); + if(0 == num_cons) return true; // this may happen when Hiop asks for inequalities, which we don't have in this example - assert(num_cons==1); + assert(num_cons == 1); x->copyFrom(x_in); cons[0] = x->integral(); return true; } - bool eval_Jac_cons(const size_type& n, const size_type& m, - const size_type& num_cons, const index_type* idx_cons, - const double* x_in, bool new_x, double* Jac) + bool eval_Jac_cons(const size_type& n, + const size_type& m, + const size_type& num_cons, + const index_type* idx_cons, + const double* x_in, + bool new_x, + double* Jac) { - assert(n==n_vars); - if(0==num_cons) return true; //this may happen when Hiop asks for inequalities, which we don't have in this example - assert(1==num_cons); - //use x as auxiliary + assert(n == n_vars); + if(0 == num_cons) return true; // this may happen when Hiop asks for inequalities, which we don't have in this example + assert(1 == num_cons); + // use x as auxiliary x->setToConstant(1.); _mesh->applyM(*x); x->copyTo(Jac); @@ -205,26 +220,23 @@ class DenseConsEx1 : public hiop::hiopInterfaceDenseConstraints bool get_vecdistrib_info(size_type global_n, index_type* cols) { - if(global_n==n_vars) + if(global_n == n_vars) return _mesh->get_vecdistrib_info(global_n, cols); - else + else assert(false && "You shouldn't need distrib info for this size."); return true; } - bool get_starting_point(const size_type &global_n, double* x0) + bool get_starting_point(const size_type& global_n, double* x0) { - assert(global_n==n_vars); - for(int i_local=0; i_local1.) return false; + if(n <= 0) return false; + if(distortion_ratio <= 1e-8 || distortion_ratio > 1.) return false; return true; }; static void usage(const char* exeName) { - printf("hiOp driver '%s' that solves a synthetic infinite dimensional problem of variable size. A 1D mesh is created by the example, and the size and the distortion of the mesh can be specified as options to this executable. The distortion of the mesh is the ratio of the smallest element and the largest element in the mesh.\n", exeName); + printf( + "hiOp driver '%s' that solves a synthetic infinite dimensional problem of variable size. A 1D mesh is created by the " + "example, and the size and the distortion of the mesh can be specified as options to this executable. The distortion " + "of the mesh is the ratio of the smallest element and the largest element in the mesh.\n", + exeName); printf("Usage: \n"); printf(" '$ %s problem_size mesh_distortion_ratio -selfcheck'\n", exeName); printf("Arguments (specify in the order above): \n"); printf(" 'problem_size': number of decision variables [optional, default is 20k]\n"); printf(" 'dist_ratio': mesh distortion ratio, see above; a number in (0,1) [optional, default 1.0]\n"); - printf(" '-selfcheck': compares the optimal objective with a previously saved value for the problem specified by 'problem_size'. [optional]\n"); + printf( + " '-selfcheck': compares the optimal objective with a previously saved value for the problem specified by " + "'problem_size'. [optional]\n"); } - -int main(int argc, char **argv) +int main(int argc, char** argv) { int rank = 0; #ifdef HIOP_USE_MPI int numRanks = 1; int err; - err = MPI_Init(&argc, &argv); assert(MPI_SUCCESS==err); - err = MPI_Comm_rank(MPI_COMM_WORLD,&rank); assert(MPI_SUCCESS==err); - err = MPI_Comm_size(MPI_COMM_WORLD,&numRanks); assert(MPI_SUCCESS==err); - if(0==rank) { + err = MPI_Init(&argc, &argv); + assert(MPI_SUCCESS == err); + err = MPI_Comm_rank(MPI_COMM_WORLD, &rank); + assert(MPI_SUCCESS == err); + err = MPI_Comm_size(MPI_COMM_WORLD, &numRanks); + assert(MPI_SUCCESS == err); + if(0 == rank) { printf("Support for MPI is enabled\n"); } #endif @@ -95,68 +101,75 @@ int main(int argc, char **argv) DenseConsEx1 problem(mesh_size, ratio); hiop::hiopNlpDenseConstraints nlp(problem); - + hiop::hiopAlgFilterIPM solver(&nlp); problem.set_solver(&solver); - + hiop::hiopSolveStatus status = solver.run(); objective = solver.getObjective(); - //this is used for testing when the driver is called with -selfcheck + // this is used for testing when the driver is called with -selfcheck if(selfCheck) { - if(!self_check(mesh_size, objective)) - return -1; + if(!self_check(mesh_size, objective)) return -1; } else { - if(rank==0) { - printf("Optimal objective: %22.14e. Solver status: %d. Number of iterations: %d\n", - objective, status, solver.getNumIterations()); + if(rank == 0) { + printf("Optimal objective: %22.14e. Solver status: %d. Number of iterations: %d\n", + objective, + status, + solver.getNumIterations()); } } - if(0==rank) { + if(0 == rank) { printf("Objective: %18.12e\n", objective); } #ifdef HIOP_USE_AXOM // example/test for HiOp's load checkpoint API. if(!do_load_checkpoint_test(mesh_size, ratio, objective)) { - if(rank==0) { + if(rank == 0) { printf("Load checkpoint and restart test failed."); } return -1; } -#endif +#endif #ifdef HIOP_USE_MPI MPI_Finalize(); #endif - + return 0; } static bool self_check(size_type n, double objval) { -#define num_n_saved 3 //keep this is sync with n_saved and objval_saved - const size_type n_saved[] = {500, 5000, 50000}; +#define num_n_saved 3 // keep this is sync with n_saved and objval_saved + const size_type n_saved[] = {500, 5000, 50000}; const double objval_saved[] = {8.6156700e-2, 8.6156106e-02, 8.6161001e-02}; #define relerr 1e-6 - bool found=false; - for(int it=0; it relerr) { - printf("selfcheck failure. Objective (%18.12e) does not agree (%d digits) with the saved value (%18.12e) for n=%d.\n", - objval, -(int)log10(relerr), objval_saved[it], n); + bool found = false; + for(int it = 0; it < num_n_saved; it++) { + if(n_saved[it] == n) { + found = true; + if(fabs((objval_saved[it] - objval) / (1 + objval_saved[it])) > relerr) { + printf( + "selfcheck failure. Objective (%18.12e) does not agree (%d digits) with the saved value (%18.12e) for n=%d.\n", + objval, + -(int)log10(relerr), + objval_saved[it], + n); return false; } else { - printf("selfcheck success (%d digits)\n", -(int)log10(relerr)); + printf("selfcheck success (%d digits)\n", -(int)log10(relerr)); } break; } } if(!found) { - printf("selfcheck: driver does not have the objective for n=%d saved. BTW, obj=%18.12e was obtained for this n.\n", n, objval); + printf("selfcheck: driver does not have the objective for n=%d saved. BTW, obj=%18.12e was obtained for this n.\n", + n, + objval); return false; } @@ -164,27 +177,25 @@ static bool self_check(size_type n, double objval) } #ifdef HIOP_USE_AXOM -/** +/** * An illustration on how to use load_state_from_sidre_group API method of HiOp's algorithm class. - * - * + * + * */ -static bool do_load_checkpoint_test(const size_type& mesh_size, - const double& ratio, - const double& obj_val_expected) +static bool do_load_checkpoint_test(const size_type& mesh_size, const double& ratio, const double& obj_val_expected) { - //Pretend this is new job and recreate the HiOp objects. + // Pretend this is new job and recreate the HiOp objects. DenseConsEx1 problem(mesh_size, ratio); hiop::hiopNlpDenseConstraints nlp(problem); - + hiop::hiopAlgFilterIPM solver(&nlp); // // example of how to use load_state_sidre_group to warm-start // - //Supposedly, the user code should have the group in hand before asking HiOp to load from it. - //We will manufacture it by loading a sidre checkpoint file. Here the checkpoint file + // Supposedly, the user code should have the group in hand before asking HiOp to load from it. + // We will manufacture it by loading a sidre checkpoint file. Here the checkpoint file //"hiop_state_ex1.root" was created from the interface class' iterate_callback method //(saved every 5 iterations) sidre::DataStore ds; @@ -196,9 +207,8 @@ static bool do_load_checkpoint_test(const size_type& mesh_size, printf("Failed to read checkpoint file. Error: [%s]", e.what()); return false; } - - //the actual API call + // the actual API call try { const sidre::Group* group = ds.getRoot()->getGroup("HiOp quasi-Newton alg state"); solver.load_state_from_sidre_group(*group); @@ -206,7 +216,7 @@ static bool do_load_checkpoint_test(const size_type& mesh_size, printf("Failed to load from sidre::group. Error: [%s]", e.what()); return false; } - + hiop::hiopSolveStatus status = solver.run(); double obj_val = solver.getObjective(); if(obj_val != obj_val_expected) { @@ -214,4 +224,4 @@ static bool do_load_checkpoint_test(const size_type& mesh_size, } return true; } -#endif // HIOP_USE_AXOM +#endif // HIOP_USE_AXOM diff --git a/src/Drivers/Dense/NlpDenseConsEx2.cpp b/src/Drivers/Dense/NlpDenseConsEx2.cpp index cffbd97c2..b79b461b0 100644 --- a/src/Drivers/Dense/NlpDenseConsEx2.cpp +++ b/src/Drivers/Dense/NlpDenseConsEx2.cpp @@ -1,20 +1,22 @@ #include "NlpDenseConsEx2.hpp" #include -#include //for memcpy +#include //for memcpy #include DenseConsEx2::DenseConsEx2(int n, bool unconstrained) - : n_vars_(n), - n_cons_(4), - unconstrained_(unconstrained) + : n_vars_(n), + n_cons_(4), + unconstrained_(unconstrained) { comm_size = 1; - my_rank = 0; + my_rank = 0; #ifdef HIOP_USE_MPI comm = MPI_COMM_WORLD; - int ierr = MPI_Comm_size(comm, &comm_size); assert(MPI_SUCCESS==ierr); - ierr = MPI_Comm_rank(comm, &my_rank); assert(MPI_SUCCESS==ierr); + int ierr = MPI_Comm_size(comm, &comm_size); + assert(MPI_SUCCESS == ierr); + ierr = MPI_Comm_rank(comm, &my_rank); + assert(MPI_SUCCESS == ierr); #endif if(unconstrained_) { @@ -22,58 +24,54 @@ DenseConsEx2::DenseConsEx2(int n, bool unconstrained) } // set up vector distribution for primal variables - easier to store it as a member in this simple example - col_partition_ = new index_type[comm_size+1]; + col_partition_ = new index_type[comm_size + 1]; index_type quotient = n_vars_ / comm_size; index_type remainder = n_vars_ - comm_size * quotient; - //if(my_rank==0) printf("reminder=%llu quotient=%llu\n", remainder, quotient); + // if(my_rank==0) printf("reminder=%llu quotient=%llu\n", remainder, quotient); int i = 0; - col_partition_[i++]=0; - while(i<=remainder) { - col_partition_[i] = col_partition_[i-1] + quotient + 1; + col_partition_[i++] = 0; + while(i <= remainder) { + col_partition_[i] = col_partition_[i - 1] + quotient + 1; i++; } - while(i<=comm_size) { - col_partition_[i] = col_partition_[i-1] + quotient; + while(i <= comm_size) { + col_partition_[i] = col_partition_[i - 1] + quotient; i++; } } -DenseConsEx2::~DenseConsEx2() -{ - delete[] col_partition_; -} - +DenseConsEx2::~DenseConsEx2() { delete[] col_partition_; } bool DenseConsEx2::get_prob_sizes(size_type& n, size_type& m) -{ +{ n = n_vars_; m = n_cons_; return true; } -bool DenseConsEx2::get_vars_info(const size_type& n, double *xlow, double* xupp, NonlinearityType* type) +bool DenseConsEx2::get_vars_info(const size_type& n, double* xlow, double* xupp, NonlinearityType* type) { index_type i_local; - for(index_type i = col_partition_[my_rank]; i < col_partition_[my_rank+1]; i++) { - i_local = idx_global2local(n,i); - if(i==0) { + for(index_type i = col_partition_[my_rank]; i < col_partition_[my_rank + 1]; i++) { + i_local = idx_global2local(n, i); + if(i == 0) { xlow[i_local] = -1e20; xupp[i_local] = 1e20; type[i_local] = hiopNonlinear; continue; } - if(i==1) { + if(i == 1) { xlow[i_local] = 0.0; xupp[i_local] = 1e20; type[i_local] = hiopNonlinear; continue; } - if(i==2) { + if(i == 2) { xlow[i_local] = 1.5; xupp[i_local] = 10.0; type[i_local] = hiopNonlinear; continue; } - //this is for x_4, x_5, ... , x_n (i>=3), which are bounded only from below + // this is for x_4, x_5, ... , x_n (i>=3), which are bounded only from below xlow[i_local] = 0.5; xupp[i_local] = 1e20; type[i_local] = hiopNonlinear; @@ -82,45 +80,46 @@ bool DenseConsEx2::get_vars_info(const size_type& n, double *xlow, double* xupp, } bool DenseConsEx2::get_cons_info(const size_type& m, double* clow, double* cupp, NonlinearityType* type) { - assert(m==n_cons_); + assert(m == n_cons_); if(!unconstrained_) { - clow[0] = n_vars_+1; - cupp[0] = n_vars_+1; + clow[0] = n_vars_ + 1; + cupp[0] = n_vars_ + 1; type[0] = hiopInterfaceBase::hiopLinear; - + clow[1] = 5.0; cupp[1] = 1e20; type[1] = hiopInterfaceBase::hiopLinear; clow[2] = 1.0; - cupp[2] = 2*n_vars_; + cupp[2] = 2 * n_vars_; type[2] = hiopInterfaceBase::hiopLinear; clow[3] = -1e20; - cupp[3] = 4*n_vars_; - type[3] = hiopInterfaceBase::hiopLinear; + cupp[3] = 4 * n_vars_; + type[3] = hiopInterfaceBase::hiopLinear; } return true; } bool DenseConsEx2::eval_f(const size_type& n, const double* x, bool new_x, double& obj_value) { - size_type n_local = col_partition_[my_rank+1] - col_partition_[my_rank]; - obj_value = 0.; - for(int i=0; i=0); - //local contributions to the constraints in cons are reset - for(int j=0;j= 0); + // local contributions to the constraints in cons are reset + for(int j = 0; j < num_cons; j++) { + cons[j] = 0.; } - //compute the constraint one by one. - for(int itcon=0; itcon sum x_i = n+1 - if(idx_cons[itcon]==0) { - size_type n_local = col_partition_[my_rank+1] - col_partition_[my_rank]; - //loop over local x in local indexes and add its entries to the result - for(int i=0; i 2*x_1 + sum {x_i : i=2,...,n} - if(idx_cons[itcon]==1) { + + // --- constraint 2 body ---> 2*x_1 + sum {x_i : i=2,...,n} + if(idx_cons[itcon] == 1) { int i_local; - //loop over local x in global indexes - for(size_type i_global=col_partition_[my_rank]; i_global 2*x_1 + 0.5*x_2 + sum{x_i : i=3,...,n} - if(idx_cons[itcon]==2) { + if(idx_cons[itcon] == 2) { int i_local; - //loop over x in global indexes - for(size_type i_global=col_partition_[my_rank]; i_global 4*x_1 + 2*x_2 + 2*x_3 + sum{x_i : i=4,...,n} - if(idx_cons[itcon]==3) { + if(idx_cons[itcon] == 3) { int i_local; - //loop over x in global indexes - for(size_type i_global=col_partition_[my_rank]; i_global0) { + if(num_cons > 0) { double* cons_global = new double[num_cons]; - int ierr = MPI_Allreduce(cons, cons_global, num_cons, MPI_DOUBLE, MPI_SUM, comm); assert(ierr==MPI_SUCCESS); - memcpy(cons, cons_global, num_cons*sizeof(double)); + int ierr = MPI_Allreduce(cons, cons_global, num_cons, MPI_DOUBLE, MPI_SUM, comm); + assert(ierr == MPI_SUCCESS); + memcpy(cons, cons_global, num_cons * sizeof(double)); delete[] cons_global; } #endif @@ -221,101 +223,97 @@ bool DenseConsEx2::eval_cons(const size_type& n, return true; } - - bool DenseConsEx2::eval_Jac_cons(const size_type& n, const size_type& m, const size_type& num_cons, - const index_type* idx_cons, + const index_type* idx_cons, const double* x, bool new_x, - double* Jac) + double* Jac) { if(unconstrained_) { assert(m == 0); return true; } - assert(n==n_vars_); assert(m==n_cons_); - size_type n_local = col_partition_[my_rank+1] - col_partition_[my_rank]; + assert(n == n_vars_); + assert(m == n_cons_); + size_type n_local = col_partition_[my_rank + 1] - col_partition_[my_rank]; int i; - //here we will iterate over the local indexes, however we still need to work with the - //global indexes to correctly determine the entries in the Jacobian corresponding - //to the 'rebels' variables x_1, x_2, x_3 - - for(int itcon=0; itcon=0.5, i=4,...,n */ class DenseConsEx2 : public hiop::hiopInterfaceDenseConstraints { -public: +public: DenseConsEx2(int n, bool unconstrained = false); virtual ~DenseConsEx2(); virtual bool get_prob_sizes(size_type& n, size_type& m); - virtual bool get_vars_info(const size_type& n, double *xlow, double* xupp, NonlinearityType* type); + virtual bool get_vars_info(const size_type& n, double* xlow, double* xupp, NonlinearityType* type); virtual bool get_cons_info(const size_type& m, double* clow, double* cupp, NonlinearityType* type); virtual bool eval_f(const size_type& n, const double* x, bool new_x, double& obj_value); @@ -50,44 +50,44 @@ class DenseConsEx2 : public hiop::hiopInterfaceDenseConstraints virtual bool eval_Jac_cons(const size_type& n, const size_type& m, const size_type& num_cons, - const index_type* idx_cons, + const index_type* idx_cons, const double* x, bool new_x, double* Jac); virtual bool get_vecdistrib_info(size_type global_n, index_type* cols); - virtual bool get_starting_point(const size_type&n, double* x0); + virtual bool get_starting_point(const size_type& n, double* x0); -/* - void solution_callback(hiop::hiopSolveStatus status, - int n, const double* x, - const double* z_L, - const double* z_U, - int m, const double* g, - const double* lambda, - double obj_value) - { - printf("solution_callback with optimal value: %g. Also x[1]=%22.14f\n", obj_value, x[1]); - }; - + /* + void solution_callback(hiop::hiopSolveStatus status, + int n, const double* x, + const double* z_L, + const double* z_U, + int m, const double* g, + const double* lambda, + double obj_value) + { + printf("solution_callback with optimal value: %g. Also x[1]=%22.14f\n", obj_value, x[1]); + }; - virtual bool iterate_callback(int iter, double obj_value, double logbar_obj_value, - int n, const double* x, - const double* z_L, - const double* z_U, - int m, const double* g, - const double* lambda, - double inf_pr, double inf_du, double onenorm_pr, - double mu, - double alpha_du, double alpha_pr, - int ls_trials) - { - if(iter==3) return false; - printf("%g %g\n", x[0], x[1]); - return true; - } -*/ + + virtual bool iterate_callback(int iter, double obj_value, double logbar_obj_value, + int n, const double* x, + const double* z_L, + const double* z_U, + int m, const double* g, + const double* lambda, + double inf_pr, double inf_du, double onenorm_pr, + double mu, + double alpha_du, double alpha_pr, + int ls_trials) + { + if(iter==3) return false; + printf("%g %g\n", x[0], x[1]); + return true; + } + */ private: size_type n_vars_, n_cons_; #ifdef HIOP_USE_MPI @@ -97,11 +97,12 @@ class DenseConsEx2 : public hiop::hiopInterfaceDenseConstraints int comm_size; index_type* col_partition_; bool unconstrained_; + public: - inline index_type idx_local2global(size_type global_n, index_type idx_local) - { - assert(idx_local + col_partition_[my_rank]=col_partition_[my_rank] && "global index does not belong to this rank"); - assert(idx_global< col_partition_[my_rank+1] && "global index does not belong to this rank"); - assert(global_n==n_vars_ && "your global_n does not match the number of variables?"); - return (idx_global-col_partition_[my_rank]); + assert(idx_global >= col_partition_[my_rank] && "global index does not belong to this rank"); + assert(idx_global < col_partition_[my_rank + 1] && "global index does not belong to this rank"); + assert(global_n == n_vars_ && "your global_n does not match the number of variables?"); + return (idx_global - col_partition_[my_rank]); } }; #endif diff --git a/src/Drivers/Dense/NlpDenseConsEx2Driver.cpp b/src/Drivers/Dense/NlpDenseConsEx2Driver.cpp index 7aad10fbf..b14b875fc 100644 --- a/src/Drivers/Dense/NlpDenseConsEx2Driver.cpp +++ b/src/Drivers/Dense/NlpDenseConsEx2Driver.cpp @@ -10,23 +10,23 @@ using namespace hiop; static bool self_check(size_type n, double obj_value); static bool self_check_uncon(size_type n, double obj_value); -static bool parse_arguments(int argc, char **argv, size_type& n, bool& self_check, bool& no_con) +static bool parse_arguments(int argc, char** argv, size_type& n, bool& self_check, bool& no_con) { self_check = false; no_con = false; n = 50000; switch(argc) { - case 1: - //no arguments - return true; - break; - case 4: //3 arguments + case 1: + // no arguments + return true; + break; + case 4: // 3 arguments { if(std::string(argv[3]) == "-selfcheck") { self_check = true; } } - case 3: //2 arguments + case 3: // 2 arguments { if(std::string(argv[2]) == "-unconstrained") { no_con = true; @@ -34,16 +34,15 @@ static bool parse_arguments(int argc, char **argv, size_type& n, bool& self_chec self_check = true; } } - case 2: //1 argument + case 2: // 1 argument { n = std::atoi(argv[1]); if(n <= 0) { return false; } - } - break; - default: - return false; //4 or more arguments + } break; + default: + return false; // 4 or more arguments } return true; @@ -57,43 +56,44 @@ static void usage(const char* exeName) printf("Arguments:\n"); printf(" 'problem_size': number of decision variables [optional, default is 50k]\n"); printf(" '-unconstrained': unconstrainted optimization problem [optional]\n"); - printf(" '-selfcheck': compares the optimal objective with a previously saved value for the problem specified by 'problem_size'. [optional]\n"); + printf( + " '-selfcheck': compares the optimal objective with a previously saved value for the problem specified by " + "'problem_size'. [optional]\n"); } - -int main(int argc, char **argv) +int main(int argc, char** argv) { - int rank=0; + int rank = 0; #ifdef HIOP_USE_MPI MPI_Init(&argc, &argv); - int ierr = MPI_Comm_rank(MPI_COMM_WORLD,&rank); - assert(MPI_SUCCESS==ierr); - //if(0==rank) printf("Support for MPI is enabled\n"); + int ierr = MPI_Comm_rank(MPI_COMM_WORLD, &rank); + assert(MPI_SUCCESS == ierr); + // if(0==rank) printf("Support for MPI is enabled\n"); #endif bool selfCheck; bool unconstrained; size_type n; - if(!parse_arguments(argc, argv, n, selfCheck, unconstrained)) { - usage(argv[0]); + if(!parse_arguments(argc, argv, n, selfCheck, unconstrained)) { + usage(argv[0]); return 1; } - DenseConsEx2 nlp_interface(n,unconstrained); - //if(rank==0) printf("interface created\n"); + DenseConsEx2 nlp_interface(n, unconstrained); + // if(rank==0) printf("interface created\n"); hiopNlpDenseConstraints nlp(nlp_interface); - //if(rank==0) printf("nlp formulation created\n"); + // if(rank==0) printf("nlp formulation created\n"); hiopAlgFilterIPM solver(&nlp); hiopSolveStatus status = solver.run(); double obj_value = solver.getObjective(); - - if(status<0) { - if(rank==0) printf("solver returned negative solve status: %d (with objective is %18.12e)\n", status, obj_value); + + if(status < 0) { + if(rank == 0) printf("solver returned negative solve status: %d (with objective is %18.12e)\n", status, obj_value); return -1; } - //this is used for "regression" testing when the driver is called with -selfcheck + // this is used for "regression" testing when the driver is called with -selfcheck if(selfCheck) { if(!unconstrained) { if(!self_check(n, obj_value)) { @@ -102,10 +102,10 @@ int main(int argc, char **argv) } else { if(!self_check_uncon(n, obj_value)) { return -1; - } + } } } else { - if(rank==0) { + if(rank == 0) { printf("Optimal objective: %22.14e. Solver status: %d\n", obj_value, status); } } @@ -117,31 +117,36 @@ int main(int argc, char **argv) return 0; } - static bool self_check(size_type n, double objval) { -#define num_n_saved 3 //keep this is sync with n_saved and objval_saved - const size_type n_saved[] = {500, 5000, 50000}; +#define num_n_saved 3 // keep this is sync with n_saved and objval_saved + const size_type n_saved[] = {500, 5000, 50000}; const double objval_saved[] = {1.56251020819349e-02, 1.56251019995139e-02, 1.56251028980352e-02}; #define relerr 1e-6 - bool found=false; - for(int it=0; it relerr) { - printf("selfcheck failure. Objective (%18.12e) does not agree (%d digits) with the saved value (%18.12e) for n=%d.\n", - objval, -(int)log10(relerr), objval_saved[it], n); + bool found = false; + for(int it = 0; it < num_n_saved; it++) { + if(n_saved[it] == n) { + found = true; + if(fabs((objval_saved[it] - objval) / (1 + objval_saved[it])) > relerr) { + printf( + "selfcheck failure. Objective (%18.12e) does not agree (%d digits) with the saved value (%18.12e) for n=%d.\n", + objval, + -(int)log10(relerr), + objval_saved[it], + n); return false; } else { - printf("selfcheck success (%d digits)\n", -(int)log10(relerr)); + printf("selfcheck success (%d digits)\n", -(int)log10(relerr)); } break; } } if(!found) { - printf("selfcheck: driver does not have the objective for n=%d saved. BTW, obj=%18.12e was obtained for this n.\n", n, objval); + printf("selfcheck: driver does not have the objective for n=%d saved. BTW, obj=%18.12e was obtained for this n.\n", + n, + objval); return false; } @@ -150,28 +155,34 @@ static bool self_check(size_type n, double objval) static bool self_check_uncon(size_type n, double objval) { -#define num_n_saved 3 //keep this is sync with n_saved and objval_saved - const size_type n_saved[] = {500, 5000, 50000}; +#define num_n_saved 3 // keep this is sync with n_saved and objval_saved + const size_type n_saved[] = {500, 5000, 50000}; const double objval_saved[] = {1.56250004019985e-02, 1.56250035348275e-02, 1.56250304912460e-02}; #define relerr 1e-6 - bool found=false; - for(int it=0; it relerr) { - printf("selfcheck failure. Objective (%18.12e) does not agree (%d digits) with the saved value (%18.12e) for n=%d.\n", - objval, -(int)log10(relerr), objval_saved[it], n); + bool found = false; + for(int it = 0; it < num_n_saved; it++) { + if(n_saved[it] == n) { + found = true; + if(fabs((objval_saved[it] - objval) / (1 + objval_saved[it])) > relerr) { + printf( + "selfcheck failure. Objective (%18.12e) does not agree (%d digits) with the saved value (%18.12e) for n=%d.\n", + objval, + -(int)log10(relerr), + objval_saved[it], + n); return false; } else { - printf("selfcheck success (%d digits)\n", -(int)log10(relerr)); + printf("selfcheck success (%d digits)\n", -(int)log10(relerr)); } break; } } if(!found) { - printf("selfcheck: driver does not have the objective for n=%d saved. BTW, obj=%18.12e was obtained for this n.\n", n, objval); + printf("selfcheck: driver does not have the objective for n=%d saved. BTW, obj=%18.12e was obtained for this n.\n", + n, + objval); return false; } diff --git a/src/Drivers/Dense/NlpDenseConsEx3.hpp b/src/Drivers/Dense/NlpDenseConsEx3.hpp index d539c5c77..51fde78cf 100644 --- a/src/Drivers/Dense/NlpDenseConsEx3.hpp +++ b/src/Drivers/Dense/NlpDenseConsEx3.hpp @@ -1,5 +1,5 @@ #ifndef HIOP_EXAMPLE_DENSE_EX3 -#define HIOP_EXAMPLE_DENSE_EX3 +#define HIOP_EXAMPLE_DENSE_EX3 #include "hiopInterface.hpp" @@ -11,7 +11,7 @@ #endif #include -#include //for memcpy +#include //for memcpy #include #include @@ -20,49 +20,50 @@ using index_type = hiop::index_type; /* Problem test with fixed variables and related corner cases. * min sum 1/4* { (x_{i}-1)^4 : i=1,...,n} - * s.t. + * s.t. * sum x_i = n+1 - * 5<= 2*x_1 + sum {x_i : i=2,...,n} - * x_1=0 fixed - * 0.0 <= x_2 + * 5<= 2*x_1 + sum {x_i : i=2,...,n} + * x_1=0 fixed + * 0.0 <= x_2 * 1.5 <= x_3 <= 10 * x_i >=0.5, i=4,...,n * x_i <=0.5, i=3n/4+1,...,n (additional fixed variables) */ class DenseConsEx3 : public hiop::hiopInterfaceDenseConstraints { -public: +public: DenseConsEx3(int n) - : n_vars(n), n_cons(2) + : n_vars(n), + n_cons(2) { - comm_size=1; my_rank=0; + comm_size = 1; + my_rank = 0; #ifdef HIOP_USE_MPI comm = MPI_COMM_WORLD; - int ierr = MPI_Comm_size(comm, &comm_size); assert(MPI_SUCCESS==ierr); - ierr = MPI_Comm_rank(comm, &my_rank); assert(MPI_SUCCESS==ierr); + int ierr = MPI_Comm_size(comm, &comm_size); + assert(MPI_SUCCESS == ierr); + ierr = MPI_Comm_rank(comm, &my_rank); + assert(MPI_SUCCESS == ierr); #endif - + // set up vector distribution for primal variables - easier to store it as a member in this simple example - col_partition = new index_type[comm_size+1]; - size_type quotient=n_vars/comm_size; - size_type remainder=n_vars-comm_size*quotient; - - int i=0; - col_partition[i++]=0; - while(i<=remainder) { - col_partition[i] = col_partition[i-1]+quotient+1; + col_partition = new index_type[comm_size + 1]; + size_type quotient = n_vars / comm_size; + size_type remainder = n_vars - comm_size * quotient; + + int i = 0; + col_partition[i++] = 0; + while(i <= remainder) { + col_partition[i] = col_partition[i - 1] + quotient + 1; i++; } - while(i<=comm_size) { - col_partition[i] = col_partition[i-1]+quotient; + while(i <= comm_size) { + col_partition[i] = col_partition[i - 1] + quotient; i++; } }; - virtual ~DenseConsEx3() - { - delete[] col_partition; - }; + virtual ~DenseConsEx3() { delete[] col_partition; }; virtual bool get_prob_sizes(size_type& n, size_type& m) { @@ -71,101 +72,129 @@ class DenseConsEx3 : public hiop::hiopInterfaceDenseConstraints return true; } - virtual bool get_vars_info(const size_type& n, double *xlow, double* xupp, NonlinearityType* type) + virtual bool get_vars_info(const size_type& n, double* xlow, double* xupp, NonlinearityType* type) { - assert(n>=4 && "number of variables should be greater than 4 for this example"); + assert(n >= 4 && "number of variables should be greater than 4 for this example"); index_type i_local; - for(index_type i=col_partition[my_rank]; i=4), which are bounded till i=3/n4 and fixed after that - xlow[i_local]= 0.5; type[i_local]=hiopNonlinear; - if(i+1<=3*(n/4.0)) xupp[i_local]=1e20; - else xupp[i_local]=0.50; + for(index_type i = col_partition[my_rank]; i < col_partition[my_rank + 1]; i++) { + i_local = idx_global2local(n, i); + if(i == 0) { + xlow[i_local] = 1.5; + xupp[i_local] = 1.50; + type[i_local] = hiopNonlinear; + continue; + } + if(i == 1) { + xlow[i_local] = 0.0; + xupp[i_local] = 1e20; + type[i_local] = hiopNonlinear; + continue; + } + if(i == 2) { + xlow[i_local] = 1.5; + xupp[i_local] = 10.0; + type[i_local] = hiopNonlinear; + continue; + } + // this is for x_4, x_5, ... , x_n (i>=4), which are bounded till i=3/n4 and fixed after that + xlow[i_local] = 0.5; + type[i_local] = hiopNonlinear; + if(i + 1 <= 3 * (n / 4.0)) + xupp[i_local] = 1e20; + else + xupp[i_local] = 0.50; + } + return true; } - return true; -} virtual bool get_cons_info(const size_type& m, double* clow, double* cupp, NonlinearityType* type) { - assert(m==n_cons); - clow[0]= n_vars+1; cupp[0]= n_vars+1; type[0]=hiopInterfaceBase::hiopLinear; - clow[1]= 5.0; cupp[1]= 1e20; type[1]=hiopInterfaceBase::hiopLinear; + assert(m == n_cons); + clow[0] = n_vars + 1; + cupp[0] = n_vars + 1; + type[0] = hiopInterfaceBase::hiopLinear; + clow[1] = 5.0; + cupp[1] = 1e20; + type[1] = hiopInterfaceBase::hiopLinear; return true; } virtual bool eval_f(const size_type& n, const double* x, bool new_x, double& obj_value) { - size_type n_local=col_partition[my_rank+1]-col_partition[my_rank]; - obj_value=0.; - for(int i=0;i=0); - //local contributions to the constraints in cons are reset - for(int j=0;j= 0); + // local contributions to the constraints in cons are reset + for(int j = 0; j < num_cons; j++) cons[j] = 0.; + + // compute the constraint one by one. + for(int itcon = 0; itcon < num_cons; itcon++) { // --- constraint 1 body ---> sum x_i = n+1 - if(idx_cons[itcon]==0) { - size_type n_local=col_partition[my_rank+1]-col_partition[my_rank]; - //loop over local x in local indexes and add its entries to the result - for(int i=0;i 2*x_1 + sum {x_i : i=2,...,n} - if(idx_cons[itcon]==1) { + + // --- constraint 2 body ---> 2*x_1 + sum {x_i : i=2,...,n} + if(idx_cons[itcon] == 1) { int i_local; - //loop over local x in global indexes - for(size_type i_global=col_partition[my_rank]; i_global=col_partition[my_rank] && "global index does not belong to this rank"); - assert(idx_global< col_partition[my_rank+1] && "global index does not belong to this rank"); - assert(global_n==n_vars && "your global_n does not match the number of variables?"); - return (idx_global-col_partition[my_rank]); + assert(idx_global >= col_partition[my_rank] && "global index does not belong to this rank"); + assert(idx_global < col_partition[my_rank + 1] && "global index does not belong to this rank"); + assert(global_n == n_vars && "your global_n does not match the number of variables?"); + return (idx_global - col_partition[my_rank]); } }; #endif diff --git a/src/Drivers/Dense/NlpDenseConsEx3Driver.cpp b/src/Drivers/Dense/NlpDenseConsEx3Driver.cpp index 1158b9636..24ebf47ef 100644 --- a/src/Drivers/Dense/NlpDenseConsEx3Driver.cpp +++ b/src/Drivers/Dense/NlpDenseConsEx3Driver.cpp @@ -9,39 +9,39 @@ using namespace hiop; static bool self_check(size_type n, double obj_value); -static bool parse_arguments(int argc, char **argv, size_type& n, bool& self_check) +static bool parse_arguments(int argc, char** argv, size_type& n, bool& self_check) { - self_check=false; n = 50000; + self_check = false; + n = 50000; switch(argc) { - case 1: - //no arguments - return true; - break; - case 3: //2 arguments + case 1: + // no arguments + return true; + break; + case 3: // 2 arguments { if(std::string(argv[2]) == "-selfcheck") { - self_check=true; + self_check = true; } else { n = std::atoi(argv[2]); - if(n<=0) { + if(n <= 0) { return false; } } } - case 2: //1 argument + case 2: // 1 argument { if(std::string(argv[1]) == "-selfcheck") { - self_check=true; + self_check = true; } else { n = std::atoi(argv[1]); - if(n<=0) { + if(n <= 0) { return false; } } - } - break; - default: - return false; //3 or more arguments + } break; + default: + return false; // 3 or more arguments } return true; @@ -54,25 +54,30 @@ static void usage(const char* exeName) printf(" '$ %s problem_size -selfcheck'\n", exeName); printf("Arguments:\n"); printf(" 'problem_size': number of decision variables [optional, default is 50k]\n"); - printf(" '-selfcheck': compares the optimal objective with a previously saved value for the problem specified by 'problem_size'. [optional]\n"); + printf( + " '-selfcheck': compares the optimal objective with a previously saved value for the problem specified by " + "'problem_size'. [optional]\n"); } - -int main(int argc, char **argv) +int main(int argc, char** argv) { - int rank=0; + int rank = 0; #ifdef HIOP_USE_MPI MPI_Init(&argc, &argv); - int ierr = MPI_Comm_rank(MPI_COMM_WORLD,&rank); - assert(MPI_SUCCESS==ierr); - //if(0==rank) printf("Support for MPI is enabled\n"); + int ierr = MPI_Comm_rank(MPI_COMM_WORLD, &rank); + assert(MPI_SUCCESS == ierr); + // if(0==rank) printf("Support for MPI is enabled\n"); #endif - bool selfCheck; size_type n; - if(!parse_arguments(argc, argv, n, selfCheck)) { usage(argv[0]); return 1;} + bool selfCheck; + size_type n; + if(!parse_arguments(argc, argv, n, selfCheck)) { + usage(argv[0]); + return 1; + } double obj_value; bool do_second_round = true; - + hiopSolveStatus status; DenseConsEx3 nlp_interface(n); @@ -82,12 +87,12 @@ int main(int argc, char **argv) // relax var/con bounds before solving the problem nlp.options->SetNumericValue("bound_relax_perturb", 1e-10); - //keep multipliers small + // keep multipliers small nlp.options->SetStringValue("elastic_mode", "correct_it_adjust_bound"); nlp.options->SetNumericValue("elastic_mode_bound_relax_final", 1e-12); nlp.options->SetNumericValue("elastic_mode_bound_relax_initial", 1e-2); - //quasi-Newton tolerance is smaller than the default + // quasi-Newton tolerance is smaller than the default nlp.options->SetNumericValue("tolerance", 1e-6); { @@ -96,13 +101,12 @@ int main(int argc, char **argv) status = solver.run(); obj_value = solver.getObjective(); - //change options and resolve + // change options and resolve nlp.options->SetStringValue("fixed_var", "relax"); status = solver.run(); obj_value = solver.getObjective(); - } - //do the same as above but force deallocation of the solver + // do the same as above but force deallocation of the solver if(do_second_round) { { hiopAlgFilterIPM solver(&nlp); @@ -118,17 +122,16 @@ int main(int argc, char **argv) } } - if(status<0) { - if(rank==0) printf("solver returned negative solve status: %d (with objective is %18.12e)\n", status, obj_value); + if(status < 0) { + if(rank == 0) printf("solver returned negative solve status: %d (with objective is %18.12e)\n", status, obj_value); return -1; } - //this is used for "regression" testing when the driver is called with -selfcheck + // this is used for "regression" testing when the driver is called with -selfcheck if(selfCheck) { - if(!self_check(n, obj_value)) - return -1; + if(!self_check(n, obj_value)) return -1; } else { - if(rank==0) { + if(rank == 0) { printf("Optimal objective: %22.14e. Solver status: %d\n", obj_value, status); } } @@ -140,31 +143,36 @@ int main(int argc, char **argv) return 0; } - static bool self_check(size_type n, double objval) { -#define num_n_saved 3 //keep this is sync with n_saved and objval_saved - const size_type n_saved[] = {500, 5000, 50000}; +#define num_n_saved 3 // keep this is sync with n_saved and objval_saved + const size_type n_saved[] = {500, 5000, 50000}; const double objval_saved[] = {2.057860427672e+00, 2.02870382737020e+01, 2.02578703828247e+02}; #define relerr 1e-6 - bool found=false; - for(int it=0; it relerr) { - printf("selfcheck failure. Objective (%18.12e) does not agree (%d digits) with the saved value (%18.12e) for n=%d.\n", - objval, -(int)log10(relerr), objval_saved[it], n); + bool found = false; + for(int it = 0; it < num_n_saved; it++) { + if(n_saved[it] == n) { + found = true; + if(fabs((objval_saved[it] - objval) / (1 + objval_saved[it])) > relerr) { + printf( + "selfcheck failure. Objective (%18.12e) does not agree (%d digits) with the saved value (%18.12e) for n=%d.\n", + objval, + -(int)log10(relerr), + objval_saved[it], + n); return false; } else { - printf("selfcheck success (%d digits)\n", -(int)log10(relerr)); + printf("selfcheck success (%d digits)\n", -(int)log10(relerr)); } break; } } if(!found) { - printf("selfcheck: driver does not have the objective for n=%d saved. BTW, obj=%18.12e was obtained for this n.\n", n, objval); + printf("selfcheck: driver does not have the objective for n=%d saved. BTW, obj=%18.12e was obtained for this n.\n", + n, + objval); return false; } diff --git a/src/Drivers/Dense/NlpDenseConsEx4.cpp b/src/Drivers/Dense/NlpDenseConsEx4.cpp index bcca2e40a..864d1cf85 100644 --- a/src/Drivers/Dense/NlpDenseConsEx4.cpp +++ b/src/Drivers/Dense/NlpDenseConsEx4.cpp @@ -1,20 +1,22 @@ #include "NlpDenseConsEx4.hpp" #include -#include //for memcpy +#include //for memcpy #include DenseConsEx4::DenseConsEx4() - : n_vars_(2), - n_cons_(4), - unconstrained_(false) + : n_vars_(2), + n_cons_(4), + unconstrained_(false) { comm_size = 1; - my_rank = 0; + my_rank = 0; #ifdef HIOP_USE_MPI comm = MPI_COMM_WORLD; - int ierr = MPI_Comm_size(comm, &comm_size); assert(MPI_SUCCESS==ierr); - ierr = MPI_Comm_rank(comm, &my_rank); assert(MPI_SUCCESS==ierr); + int ierr = MPI_Comm_size(comm, &comm_size); + assert(MPI_SUCCESS == ierr); + ierr = MPI_Comm_rank(comm, &my_rank); + assert(MPI_SUCCESS == ierr); #endif if(unconstrained_) { @@ -22,46 +24,42 @@ DenseConsEx4::DenseConsEx4() } // set up vector distribution for primal variables - easier to store it as a member in this simple example - col_partition_ = new index_type[comm_size+1]; + col_partition_ = new index_type[comm_size + 1]; index_type quotient = n_vars_ / comm_size; index_type remainder = n_vars_ - comm_size * quotient; - //if(my_rank==0) printf("reminder=%llu quotient=%llu\n", remainder, quotient); + // if(my_rank==0) printf("reminder=%llu quotient=%llu\n", remainder, quotient); int i = 0; - col_partition_[i++]=0; - while(i<=remainder) { - col_partition_[i] = col_partition_[i-1] + quotient + 1; + col_partition_[i++] = 0; + while(i <= remainder) { + col_partition_[i] = col_partition_[i - 1] + quotient + 1; i++; } - while(i<=comm_size) { - col_partition_[i] = col_partition_[i-1] + quotient; + while(i <= comm_size) { + col_partition_[i] = col_partition_[i - 1] + quotient; i++; } } -DenseConsEx4::~DenseConsEx4() -{ - delete[] col_partition_; -} - +DenseConsEx4::~DenseConsEx4() { delete[] col_partition_; } bool DenseConsEx4::get_prob_sizes(size_type& n, size_type& m) -{ +{ n = n_vars_; m = n_cons_; return true; } -bool DenseConsEx4::get_vars_info(const size_type& n, double *xlow, double* xupp, NonlinearityType* type) +bool DenseConsEx4::get_vars_info(const size_type& n, double* xlow, double* xupp, NonlinearityType* type) { index_type i_local; - for(index_type i = col_partition_[my_rank]; i < col_partition_[my_rank+1]; i++) { - i_local = idx_global2local(n,i); - if(i==0) { + for(index_type i = col_partition_[my_rank]; i < col_partition_[my_rank + 1]; i++) { + i_local = idx_global2local(n, i); + if(i == 0) { xlow[i_local] = 0.; xupp[i_local] = 11.; type[i_local] = hiopNonlinear; continue; } - if(i==1) { + if(i == 1) { xlow[i_local] = 0.; xupp[i_local] = 11.; type[i_local] = hiopNonlinear; @@ -73,7 +71,7 @@ bool DenseConsEx4::get_vars_info(const size_type& n, double *xlow, double* xupp, bool DenseConsEx4::get_cons_info(const size_type& m, double* clow, double* cupp, NonlinearityType* type) { - assert(m==n_cons_); + assert(m == n_cons_); if(!unconstrained_) { clow[0] = 0.0; cupp[0] = 1e20; @@ -99,22 +97,23 @@ bool DenseConsEx4::eval_f(const size_type& n, const double* x, bool new_x, doubl obj_value = 0.; index_type i_local; - for(index_type i = col_partition_[my_rank]; i < col_partition_[my_rank+1]; i++) { - i_local = idx_global2local(n,i); - - if(i==0) { - obj_value += -3.*x[i_local]*x[i_local]; + for(index_type i = col_partition_[my_rank]; i < col_partition_[my_rank + 1]; i++) { + i_local = idx_global2local(n, i); + + if(i == 0) { + obj_value += -3. * x[i_local] * x[i_local]; continue; } - if(i==1) { - obj_value += - 2.*x[i_local]*x[i_local]; + if(i == 1) { + obj_value += -2. * x[i_local] * x[i_local]; continue; } } #ifdef HIOP_USE_MPI double obj_global; - int ierr = MPI_Allreduce(&obj_value, &obj_global, 1, MPI_DOUBLE, MPI_SUM, comm); assert(ierr==MPI_SUCCESS); + int ierr = MPI_Allreduce(&obj_value, &obj_global, 1, MPI_DOUBLE, MPI_SUM, comm); + assert(ierr == MPI_SUCCESS); obj_value = obj_global; #endif return true; @@ -123,15 +122,15 @@ bool DenseConsEx4::eval_f(const size_type& n, const double* x, bool new_x, doubl bool DenseConsEx4::eval_grad_f(const size_type& n, const double* x, bool new_x, double* gradf) { index_type i_local; - for(index_type i = col_partition_[my_rank]; i < col_partition_[my_rank+1]; i++) { - i_local = idx_global2local(n,i); - - if(i==0) { - gradf[i_local] = -6.*x[i_local]; + for(index_type i = col_partition_[my_rank]; i < col_partition_[my_rank + 1]; i++) { + i_local = idx_global2local(n, i); + + if(i == 0) { + gradf[i_local] = -6. * x[i_local]; continue; } - if(i==1) { - gradf[i_local] = -4.*x[i_local]; + if(i == 1) { + gradf[i_local] = -4. * x[i_local]; continue; } } @@ -153,75 +152,78 @@ bool DenseConsEx4::eval_cons(const size_type& n, return true; } - assert(n==n_vars_); assert(m==n_cons_); assert(n_cons_==4); - assert(num_cons<=m); assert(num_cons>=0); - //local contributions to the constraints in cons are reset - for(int j=0;j= 0); + // local contributions to the constraints in cons are reset + for(int j = 0; j < num_cons; j++) { + cons[j] = 0.; } - //compute the constraint one by one. - for(int itcon=0; itcon sum x_i = n+1 - if(idx_cons[itcon]==0) { + if(idx_cons[itcon] == 0) { index_type i_local; - for(index_type i = col_partition_[my_rank]; i < col_partition_[my_rank+1]; i++) { - i_local = idx_global2local(n,i); - if(i==0) { - cons[itcon] += -0.06 * x[i_local] * x[i_local]; + for(index_type i = col_partition_[my_rank]; i < col_partition_[my_rank + 1]; i++) { + i_local = idx_global2local(n, i); + if(i == 0) { + cons[itcon] += -0.06 * x[i_local] * x[i_local]; } - if(i==1) { - cons[itcon] += x[i_local]; + if(i == 1) { + cons[itcon] += x[i_local]; } } - continue; //done with this constraint + continue; // done with this constraint } - - // --- constraint 2 body ---> 2*x_1 + sum {x_i : i=2,...,n} - if(idx_cons[itcon]==1) { + + // --- constraint 2 body ---> 2*x_1 + sum {x_i : i=2,...,n} + if(idx_cons[itcon] == 1) { index_type i_local; - for(index_type i = col_partition_[my_rank]; i < col_partition_[my_rank+1]; i++) { - i_local = idx_global2local(n,i); - if(i==0) { - cons[itcon] += 0.05 * x[i_local] * x[i_local]; + for(index_type i = col_partition_[my_rank]; i < col_partition_[my_rank + 1]; i++) { + i_local = idx_global2local(n, i); + if(i == 0) { + cons[itcon] += 0.05 * x[i_local] * x[i_local]; } - if(i==1) { - cons[itcon] += x[i_local]; + if(i == 1) { + cons[itcon] += x[i_local]; } } continue; } // --- constraint 3 body ---> 2*x_1 + 0.5*x_2 + sum{x_i : i=3,...,n} - if(idx_cons[itcon]==2) { + if(idx_cons[itcon] == 2) { index_type i_local; - for(index_type i = col_partition_[my_rank]; i < col_partition_[my_rank+1]; i++) { - i_local = idx_global2local(n,i); - if(i==1) { - cons[itcon] += x[i_local]*x[i_local]; + for(index_type i = col_partition_[my_rank]; i < col_partition_[my_rank + 1]; i++) { + i_local = idx_global2local(n, i); + if(i == 1) { + cons[itcon] += x[i_local] * x[i_local]; } } continue; } // --- constraint 4 body ---> 4*x_1 + 2*x_2 + 2*x_3 + sum{x_i : i=4,...,n} - if(idx_cons[itcon]==3) { + if(idx_cons[itcon] == 3) { index_type i_local; - for(index_type i = col_partition_[my_rank]; i < col_partition_[my_rank+1]; i++) { - i_local = idx_global2local(n,i); - if(i==0) { - cons[itcon] += x[i_local]*x[i_local]; + for(index_type i = col_partition_[my_rank]; i < col_partition_[my_rank + 1]; i++) { + i_local = idx_global2local(n, i); + if(i == 0) { + cons[itcon] += x[i_local] * x[i_local]; } } continue; } - } //end for loop over constraints - + } // end for loop over constraints + #ifdef HIOP_USE_MPI - if(num_cons>0) { + if(num_cons > 0) { double* cons_global = new double[num_cons]; - int ierr = MPI_Allreduce(cons, cons_global, num_cons, MPI_DOUBLE, MPI_SUM, comm); assert(ierr==MPI_SUCCESS); - memcpy(cons, cons_global, num_cons*sizeof(double)); + int ierr = MPI_Allreduce(cons, cons_global, num_cons, MPI_DOUBLE, MPI_SUM, comm); + assert(ierr == MPI_SUCCESS); + memcpy(cons, cons_global, num_cons * sizeof(double)); delete[] cons_global; } #endif @@ -229,101 +231,97 @@ bool DenseConsEx4::eval_cons(const size_type& n, return true; } - - bool DenseConsEx4::eval_Jac_cons(const size_type& n, const size_type& m, const size_type& num_cons, - const index_type* idx_cons, + const index_type* idx_cons, const double* x, bool new_x, - double* Jac) + double* Jac) { if(unconstrained_) { assert(m == 0); return true; } - assert(n==n_vars_); assert(m==n_cons_); - size_type n_local = col_partition_[my_rank+1] - col_partition_[my_rank]; - //here we will iterate over the local indexes, however we still need to work with the - //global indexes to correctly determine the entries in the Jacobian corresponding - //to the 'rebels' variables x_1, x_2, x_3 + assert(n == n_vars_); + assert(m == n_cons_); + size_type n_local = col_partition_[my_rank + 1] - col_partition_[my_rank]; + // here we will iterate over the local indexes, however we still need to work with the + // global indexes to correctly determine the entries in the Jacobian corresponding + // to the 'rebels' variables x_1, x_2, x_3 - for(int itcon=0; itcon= 0 - * y + 0.05*x*x <= 10 + * y + 0.05*x*x <= 10 * y*y <= 64 * x*x <= 100 * 0 <= x <= 11 @@ -27,12 +27,12 @@ using index_type = hiop::index_type; */ class DenseConsEx4 : public hiop::hiopInterfaceDenseConstraints { -public: +public: DenseConsEx4(); virtual ~DenseConsEx4(); virtual bool get_prob_sizes(size_type& n, size_type& m); - virtual bool get_vars_info(const size_type& n, double *xlow, double* xupp, NonlinearityType* type); + virtual bool get_vars_info(const size_type& n, double* xlow, double* xupp, NonlinearityType* type); virtual bool get_cons_info(const size_type& m, double* clow, double* cupp, NonlinearityType* type); virtual bool eval_f(const size_type& n, const double* x, bool new_x, double& obj_value); @@ -47,44 +47,44 @@ class DenseConsEx4 : public hiop::hiopInterfaceDenseConstraints virtual bool eval_Jac_cons(const size_type& n, const size_type& m, const size_type& num_cons, - const index_type* idx_cons, + const index_type* idx_cons, const double* x, bool new_x, double* Jac); virtual bool get_vecdistrib_info(size_type global_n, index_type* cols); - virtual bool get_starting_point(const size_type&n, double* x0); + virtual bool get_starting_point(const size_type& n, double* x0); -/* - void solution_callback(hiop::hiopSolveStatus status, - int n, const double* x, - const double* z_L, - const double* z_U, - int m, const double* g, - const double* lambda, - double obj_value) - { - printf("solution_callback with optimal value: %g. Also x[1]=%22.14f\n", obj_value, x[1]); - }; - + /* + void solution_callback(hiop::hiopSolveStatus status, + int n, const double* x, + const double* z_L, + const double* z_U, + int m, const double* g, + const double* lambda, + double obj_value) + { + printf("solution_callback with optimal value: %g. Also x[1]=%22.14f\n", obj_value, x[1]); + }; - virtual bool iterate_callback(int iter, double obj_value, double logbar_obj_value, - int n, const double* x, - const double* z_L, - const double* z_U, - int m, const double* g, - const double* lambda, - double inf_pr, double inf_du, double onenorm_pr, - double mu, - double alpha_du, double alpha_pr, - int ls_trials) - { - if(iter==3) return false; - printf("%g %g\n", x[0], x[1]); - return true; - } -*/ + + virtual bool iterate_callback(int iter, double obj_value, double logbar_obj_value, + int n, const double* x, + const double* z_L, + const double* z_U, + int m, const double* g, + const double* lambda, + double inf_pr, double inf_du, double onenorm_pr, + double mu, + double alpha_du, double alpha_pr, + int ls_trials) + { + if(iter==3) return false; + printf("%g %g\n", x[0], x[1]); + return true; + } + */ private: size_type n_vars_, n_cons_; #ifdef HIOP_USE_MPI @@ -94,11 +94,12 @@ class DenseConsEx4 : public hiop::hiopInterfaceDenseConstraints int comm_size; index_type* col_partition_; bool unconstrained_; + public: - inline index_type idx_local2global(size_type global_n, index_type idx_local) - { - assert(idx_local + col_partition_[my_rank]=col_partition_[my_rank] && "global index does not belong to this rank"); - assert(idx_global< col_partition_[my_rank+1] && "global index does not belong to this rank"); - assert(global_n==n_vars_ && "your global_n does not match the number of variables?"); - return (idx_global-col_partition_[my_rank]); + assert(idx_global >= col_partition_[my_rank] && "global index does not belong to this rank"); + assert(idx_global < col_partition_[my_rank + 1] && "global index does not belong to this rank"); + assert(global_n == n_vars_ && "your global_n does not match the number of variables?"); + return (idx_global - col_partition_[my_rank]); } }; #endif diff --git a/src/Drivers/Dense/NlpDenseConsEx4Driver.cpp b/src/Drivers/Dense/NlpDenseConsEx4Driver.cpp index 7c2cccc08..873666b25 100644 --- a/src/Drivers/Dense/NlpDenseConsEx4Driver.cpp +++ b/src/Drivers/Dense/NlpDenseConsEx4Driver.cpp @@ -9,23 +9,22 @@ using namespace hiop; static bool self_check(double obj_value); -static bool parse_arguments(int argc, char **argv, bool& self_check) +static bool parse_arguments(int argc, char** argv, bool& self_check) { self_check = false; switch(argc) { - case 1: - //no arguments - return true; - break; - case 2: //1 arguments + case 1: + // no arguments + return true; + break; + case 2: // 1 arguments { if(std::string(argv[1]) == "-selfcheck") { self_check = true; } - } - break; - default: - return false; //2 or more arguments + } break; + default: + return false; // 2 or more arguments } return true; @@ -40,26 +39,26 @@ static void usage(const char* exeName) printf(" '-selfcheck': compares the optimal objective with a previously saved value. [optional]\n"); } -int main(int argc, char **argv) +int main(int argc, char** argv) { - int rank=0; + int rank = 0; #ifdef HIOP_USE_MPI MPI_Init(&argc, &argv); - int ierr = MPI_Comm_rank(MPI_COMM_WORLD,&rank); - assert(MPI_SUCCESS==ierr); - //if(0==rank) printf("Support for MPI is enabled\n"); + int ierr = MPI_Comm_rank(MPI_COMM_WORLD, &rank); + assert(MPI_SUCCESS == ierr); + // if(0==rank) printf("Support for MPI is enabled\n"); #endif bool selfCheck; - if(!parse_arguments(argc, argv, selfCheck)) { - usage(argv[0]); + if(!parse_arguments(argc, argv, selfCheck)) { + usage(argv[0]); return 1; } DenseConsEx4 nlp_interface; - //if(rank==0) printf("interface created\n"); + // if(rank==0) printf("interface created\n"); hiopNlpDenseConstraints nlp(nlp_interface); - //if(rank==0) printf("nlp formulation created\n"); + // if(rank==0) printf("nlp formulation created\n"); nlp.options->SetStringValue("duals_update_type", "linear"); nlp.options->SetStringValue("compute_mode", "cpu"); @@ -73,19 +72,19 @@ int main(int argc, char **argv) #ifdef HIOP_USE_MPI MPI_Finalize(); #endif - - if(status<0) { - if(rank==0) printf("solver returned negative solve status: %d (with objective is %18.12e)\n", status, obj_value); + + if(status < 0) { + if(rank == 0) printf("solver returned negative solve status: %d (with objective is %18.12e)\n", status, obj_value); return -1; } - //this is used for "regression" testing when the driver is called with -selfcheck + // this is used for "regression" testing when the driver is called with -selfcheck if(selfCheck) { if(!self_check(obj_value)) { - return -1; + return -1; } } else { - if(rank==0) { + if(rank == 0) { printf("Optimal objective: %22.14e. Solver status: %d\n", obj_value, status); } } @@ -93,18 +92,19 @@ int main(int argc, char **argv) return 0; } - static bool self_check(double objval) { const double objval_saved = -3.32231409044575e+02; #define relerr 1e-6 - if(fabs( (objval_saved-objval)/(1+objval_saved)) > relerr) { - printf("selfcheck failure. Objective (%18.12e) does not agree (%d digits) with the saved value (%18.12e).\n", - objval, -(int)log10(relerr), objval_saved); + if(fabs((objval_saved - objval) / (1 + objval_saved)) > relerr) { + printf("selfcheck failure. Objective (%18.12e) does not agree (%d digits) with the saved value (%18.12e).\n", + objval, + -(int)log10(relerr), + objval_saved); return false; } else { - printf("selfcheck success (%d digits)\n", -(int)log10(relerr)); + printf("selfcheck success (%d digits)\n", -(int)log10(relerr)); } return true; } diff --git a/src/Drivers/IpoptAdapter/IpoptAdapter.hpp b/src/Drivers/IpoptAdapter/IpoptAdapter.hpp index 25503d9c5..d1d27eb38 100644 --- a/src/Drivers/IpoptAdapter/IpoptAdapter.hpp +++ b/src/Drivers/IpoptAdapter/IpoptAdapter.hpp @@ -4,7 +4,7 @@ * // create a new instance of your nlp by using the adapter offered by hiOP. * SmartPtr mynlp = new hiop2IpoptTNLP(&hiop_interface); * // from now on everything is compatible with Ipopt - * + * * An example IPOPT driver that solve HiOP NLPs using Ipopt is available upon request. * */ @@ -25,58 +25,49 @@ #include using namespace Ipopt; -namespace hiop { +namespace hiop +{ /* Addapts HiOp DenseConstraints interface to Ipopt TNLP interface */ -//TO DO: call eval_cons (and Jacob) separately for Eq and Ineq as per documentation of these methods +// TO DO: call eval_cons (and Jacob) separately for Eq and Ineq as per documentation of these methods class hiopDenseCons2IpoptTNLP : public TNLP { public: - hiopDenseCons2IpoptTNLP(hiopInterfaceDenseConstraints* hiopNLP_) - : hiopNLP(hiopNLP_) {}; + hiopDenseCons2IpoptTNLP(hiopInterfaceDenseConstraints* hiopNLP_) + : hiopNLP(hiopNLP_) {}; virtual ~hiopDenseCons2IpoptTNLP() {}; /* Overloads from TNLP */ /** Method to return some info about the nlp */ - virtual bool get_nlp_info(Index& n, - Index& m, - Index& nnz_jac_g, - Index& nnz_h_lag, - IndexStyleEnum& index_style) + virtual bool get_nlp_info(Index& n, Index& m, Index& nnz_jac_g, Index& nnz_h_lag, IndexStyleEnum& index_style) { size_type nvars, ncons; - if(false==hiopNLP->get_prob_sizes(nvars, ncons)) - return false; - n = (int)nvars; m=(int)ncons; - nnz_jac_g = n*m; - nnz_h_lag=0; + if(false == hiopNLP->get_prob_sizes(nvars, ncons)) return false; + n = (int)nvars; + m = (int)ncons; + nnz_jac_g = n * m; + nnz_h_lag = 0; index_style = TNLP::C_STYLE; return true; } /** Method to return the bounds for my problem */ - virtual bool get_bounds_info(Index n, - Number* x_l, - Number* x_u, - Index m, - Number* g_l, - Number* g_u) + virtual bool get_bounds_info(Index n, Number* x_l, Number* x_u, Index m, Number* g_l, Number* g_u) { - bool bSuccess=true; - size_type nll=n, mll=m; - hiopInterfaceBase::NonlinearityType* types=new hiopInterfaceBase::NonlinearityType[n]; + bool bSuccess = true; + size_type nll = n, mll = m; + hiopInterfaceBase::NonlinearityType* types = new hiopInterfaceBase::NonlinearityType[n]; bSuccess = hiopNLP->get_vars_info(nll, x_l, x_u, types); delete[] types; - + if(bSuccess) { - types=new hiopInterfaceBase::NonlinearityType[m]; + types = new hiopInterfaceBase::NonlinearityType[m]; bSuccess = hiopNLP->get_cons_info(mll, g_l, g_u, types); delete[] types; } return bSuccess; } - /** Method to return the starting point for the algorithm */ virtual bool get_starting_point(Index n, bool init_x, @@ -86,82 +77,70 @@ class hiopDenseCons2IpoptTNLP : public TNLP Number* z_U, Index m, bool init_lambda, - Number* lambda) + Number* lambda) { - assert(false==init_z); - assert(false==init_lambda); - size_type nll=n; - return hiopNLP->get_starting_point(nll,x); + assert(false == init_z); + assert(false == init_lambda); + size_type nll = n; + return hiopNLP->get_starting_point(nll, x); } - /** Method to return the objective value */ - virtual bool eval_f(Index n, const Number* x, bool new_x, Number& obj_value) + virtual bool eval_f(Index n, const Number* x, bool new_x, Number& obj_value) { - size_type nll=n; - return hiopNLP->eval_f(nll,x,new_x,obj_value); + size_type nll = n; + return hiopNLP->eval_f(nll, x, new_x, obj_value); } - /** Method to return the gradient of the objective */ - virtual bool eval_grad_f(Index n, const Number* x, bool new_x, Number* grad_f) + virtual bool eval_grad_f(Index n, const Number* x, bool new_x, Number* grad_f) { - size_type nll=n; - return hiopNLP->eval_grad_f(nll,x,new_x,grad_f); + size_type nll = n; + return hiopNLP->eval_grad_f(nll, x, new_x, grad_f); } - /** Method to return the constraint residuals */ - virtual bool eval_g(Index n, const Number* x, bool new_x, Index m, Number* g) + virtual bool eval_g(Index n, const Number* x, bool new_x, Index m, Number* g) { - size_type nll=n, mll=m; - index_type* idx_cons=new index_type[m]; - for(int i=0; ieval_cons(nll,mll,mll,idx_cons,x,new_x,g); + size_type nll = n, mll = m; + index_type* idx_cons = new index_type[m]; + for(int i = 0; i < m; i++) idx_cons[i] = i; + bool bret = hiopNLP->eval_cons(nll, mll, mll, idx_cons, x, new_x, g); delete[] idx_cons; return bret; } - /** Method to return: * 1) The structure of the jacobian (if "values" is NULL) * 2) The values of the jacobian (if "values" is not NULL) */ - virtual bool eval_jac_g(Index n, - const Number* x, - bool new_x, - Index m, - Index nele_jac, - Index* iRow, - Index *jCol, - Number* values) + virtual bool + eval_jac_g(Index n, const Number* x, bool new_x, Index m, Index nele_jac, Index* iRow, Index* jCol, Number* values) { - bool bret=true; size_type nll=n, mll=m, onell=1; - double* constraint=new double[n]; - size_type nz=0; - for(size_type i=0; ieval_Jac_cons(nll, mll, onell, &i, x, new_x, constraint); + bret = hiopNLP->eval_Jac_cons(nll, mll, onell, &i, x, new_x, constraint); if(!bret) break; - memcpy(values+i*n, constraint, ((size_t)n)*sizeof(double)); + memcpy(values + i * n, constraint, ((size_t)n) * sizeof(double)); - } else { //this is only for iRow and jCol + } else { // this is only for iRow and jCol - for(size_type j=0; jget_prob_sizes(nvars, ncons)) - return false; - - if(false==hiopNLP->get_sparse_dense_blocks_info(nx_sparse, nx_dense, - nnz_sparse_Jaceq, - nnz_sparse_Jacineq, - nnz_sparse_Hess_Lagr_SS, - nnz_sparse_Hess_Lagr_SD)) { + if(false == hiopNLP->get_prob_sizes(nvars, ncons)) return false; + + if(false == hiopNLP->get_sparse_dense_blocks_info(nx_sparse, + nx_dense, + nnz_sparse_Jaceq, + nnz_sparse_Jacineq, + nnz_sparse_Hess_Lagr_SS, + nnz_sparse_Hess_Lagr_SD)) { return false; } - + nnz_jac_g = nnz_sparse_Jaceq + nnz_sparse_Jacineq; - //also put the dense part - nnz_jac_g += (int) ncons*nx_dense; + // also put the dense part + nnz_jac_g += (int)ncons * nx_dense; - n = (int)nvars; m=(int)ncons; - nnz_h_lag = nnz_sparse_Hess_Lagr_SS; assert(nnz_sparse_Hess_Lagr_SD==0); - //plus the dense part - nnz_h_lag += nx_dense*(nx_dense+1)/2; + n = (int)nvars; + m = (int)ncons; + nnz_h_lag = nnz_sparse_Hess_Lagr_SS; + assert(nnz_sparse_Hess_Lagr_SD == 0); + // plus the dense part + nnz_h_lag += nx_dense * (nx_dense + 1) / 2; index_style = TNLP::C_STYLE; return true; } /** Method to return the bounds for my problem */ - bool get_bounds_info(Index n, Number* x_l, Number* x_u, Index m, Number* g_l, Number* g_u) + bool get_bounds_info(Index n, Number* x_l, Number* x_u, Index m, Number* g_l, Number* g_u) { - bool bSuccess=true; - size_type nll=n, mll=m; - hiopInterfaceBase::NonlinearityType* types=new hiopInterfaceBase::NonlinearityType[n]; + bool bSuccess = true; + size_type nll = n, mll = m; + hiopInterfaceBase::NonlinearityType* types = new hiopInterfaceBase::NonlinearityType[n]; bSuccess = hiopNLP->get_vars_info(nll, x_l, x_u, types); delete[] types; - + if(bSuccess) { - types=new hiopInterfaceBase::NonlinearityType[m]; + types = new hiopInterfaceBase::NonlinearityType[m]; bSuccess = hiopNLP->get_cons_info(mll, g_l, g_u, types); delete[] types; } n_eq = n_ineq = 0; - for(int it=0; itget_starting_point(nll,x); + assert(false == init_z && "primal-dual restart not supported by the addapter"); + assert(false == init_lambda && "primal-dual restart not supported by the addapter"); + size_type nll = n; + return hiopNLP->get_starting_point(nll, x); } - /** Method to return the objective value */ - bool eval_f(Index n, const Number* x, bool new_x, Number& obj_value) + bool eval_f(Index n, const Number* x, bool new_x, Number& obj_value) { - size_type nll=n; - return hiopNLP->eval_f(nll,x,new_x,obj_value); + size_type nll = n; + return hiopNLP->eval_f(nll, x, new_x, obj_value); } - /** Method to return the gradient of the objective */ - bool eval_grad_f(Index n, const Number* x, bool new_x, Number* grad_f) + bool eval_grad_f(Index n, const Number* x, bool new_x, Number* grad_f) { - size_type nll=n; - return hiopNLP->eval_grad_f(nll,x,new_x,grad_f); + size_type nll = n; + return hiopNLP->eval_grad_f(nll, x, new_x, grad_f); } - /** Method to return the constraint residuals */ - // HiOp calls Eq and Ineq separately -> the interface expects that so we have to + // HiOp calls Eq and Ineq separately -> the interface expects that so we have to // mimic it - bool eval_g(Index n, const Number* x, bool new_x, Index m, Number* g) + bool eval_g(Index n, const Number* x, bool new_x, Index m, Number* g) { - size_type nll=n, mll=m; - bool bret=false; + size_type nll = n, mll = m; + bool bret = false; bool eq_call_failed = false; bool try_onecall_Jac = false; { double g_eq[n_eq]; size_type num_cons = n_eq; - bret = hiopNLP->eval_cons(nll, mll, num_cons, cons_eq_idxs, x, new_x,g_eq); + bret = hiopNLP->eval_cons(nll, mll, num_cons, cons_eq_idxs, x, new_x, g_eq); if(bret) { - for(int i=0; ieval_cons(nll, mll, num_cons, cons_ineq_idxs, x, new_x,g_ineq); + bret = hiopNLP->eval_cons(nll, mll, num_cons, cons_ineq_idxs, x, new_x, g_ineq); if(bret) { - for(int i=0; ieval_cons(nll, mll, x, new_x, g); - //for(int i=0; ieval_Jac_cons(nll, mll, num_cons, cons_eq_idxs, - x, new_x, nx_sparse, nx_dense, - nnz_sparse_Jaceq, iRow, jCol, NULL, + bret = hiopNLP->eval_Jac_cons(nll, + mll, + num_cons, + cons_eq_idxs, + x, + new_x, + nx_sparse, + nx_dense, + nnz_sparse_Jaceq, + iRow, + jCol, + NULL, NULL); if(bret) { nnzit += nnz_sparse_Jaceq; - for(int i=0; ieval_Jac_cons(nll, mll, num_cons, cons_ineq_idxs, - x, new_x, nx_sparse, nx_dense, - nnz_sparse_Jacineq, iRow+nnzit, jCol+nnzit, NULL, + bret = hiopNLP->eval_Jac_cons(nll, + mll, + num_cons, + cons_ineq_idxs, + x, + new_x, + nx_sparse, + nx_dense, + nnz_sparse_Jacineq, + iRow + nnzit, + jCol + nnzit, + NULL, NULL); if(bret) { - //in-place shift of iRow and jCol for Jacineq - for(int it=nnzit; iteval_Jac_cons(nll, mll, x, new_x, nx_sparse, nx_dense, - nnz_sparse_Jaceq+nnz_sparse_Jacineq, iRow, jCol, values, + bret = hiopNLP->eval_Jac_cons(nll, + mll, + x, + new_x, + nx_sparse, + nx_dense, + nnz_sparse_Jaceq + nnz_sparse_Jacineq, + iRow, + jCol, + values, NULL); - if(!bret) - return false; + if(!bret) return false; - nnzit = nnz_sparse_Jaceq+nnz_sparse_Jacineq; - //put the dense part of the MDS in the Ipopt sparse Jac matrix - for(int i=0; im() != n_eq || JacDeq->n() != nx_dense) { delete JacDeq; JacDeq = new hiopMatrixDenseRowMajor(n_eq, nx_dense); @@ -484,7 +493,7 @@ class hiopMDS2IpoptTNLP : public TNLP if(JacDineq == NULL) { JacDineq = new hiopMatrixDenseRowMajor(n_ineq, nx_dense); } else { - //this for the case when the problem (constraints) sizes changed + // this for the case when the problem (constraints) sizes changed if(JacDineq->m() != n_ineq || JacDineq->n() != nx_dense) { delete JacDineq; JacDineq = new hiopMatrixDenseRowMajor(n_ineq, nx_dense); @@ -492,43 +501,64 @@ class hiopMDS2IpoptTNLP : public TNLP } int nnzit = 0; - //sparse Jac Eq + // sparse Jac Eq { size_type num_cons = n_eq; - bret = hiopNLP->eval_Jac_cons(nll, mll, num_cons, cons_eq_idxs, - x, new_x, nx_sparse, nx_dense, - nnz_sparse_Jaceq, NULL, NULL, values, + bret = hiopNLP->eval_Jac_cons(nll, + mll, + num_cons, + cons_eq_idxs, + x, + new_x, + nx_sparse, + nx_dense, + nnz_sparse_Jaceq, + NULL, + NULL, + values, JacDeq->local_data()); if(bret) { - nnzit += nnz_sparse_Jaceq; assert(nnzit<=nele_jac); - - //the dense part - const size_t len = (size_t)(n_eq*nx_dense); - memcpy(values+nnzit, JacDeq->local_data(), len*sizeof(double)); - - nnzit += n_eq*nx_dense; assert(nnzit<=nele_jac); + nnzit += nnz_sparse_Jaceq; + assert(nnzit <= nele_jac); + + // the dense part + const size_t len = (size_t)(n_eq * nx_dense); + memcpy(values + nnzit, JacDeq->local_data(), len * sizeof(double)); + + nnzit += n_eq * nx_dense; + assert(nnzit <= nele_jac); } else { eq_call_failed = true; delete JacDeq; JacDeq = NULL; } } - - //sparse Jac Ineq + + // sparse Jac Ineq { size_type num_cons = n_ineq; - bret = hiopNLP->eval_Jac_cons(nll, mll, num_cons, cons_ineq_idxs, - x, new_x, nx_sparse, nx_dense, - nnz_sparse_Jacineq, NULL, NULL, values+nnzit, + bret = hiopNLP->eval_Jac_cons(nll, + mll, + num_cons, + cons_ineq_idxs, + x, + new_x, + nx_sparse, + nx_dense, + nnz_sparse_Jacineq, + NULL, + NULL, + values + nnzit, JacDineq->local_data()); if(bret) { - nnzit += nnz_sparse_Jacineq; assert(nnzit<=nele_jac); - - const size_t len = (size_t)(n_ineq*nx_dense); - //the dense part - memcpy(values+nnzit, JacDineq->local_data(), len*sizeof(double)); - nnzit += n_ineq*nx_dense; - assert(nnzit==nele_jac); + nnzit += nnz_sparse_Jacineq; + assert(nnzit <= nele_jac); + + const size_t len = (size_t)(n_ineq * nx_dense); + // the dense part + memcpy(values + nnzit, JacDineq->local_data(), len * sizeof(double)); + nnzit += n_ineq * nx_dense; + assert(nnzit == nele_jac); } else { delete JacDineq; JacDineq = NULL; @@ -543,73 +573,101 @@ class hiopMDS2IpoptTNLP : public TNLP } else { // if(true == onecall_Jac_detected_) { try_onecall_Jac = true; } - - //try one call Jacobian + + // try one call Jacobian if(try_onecall_Jac) { if(JacDeqineq == NULL) { JacDeqineq = new hiopMatrixDenseRowMajor(m, nx_dense); assert(JacDeq == NULL); assert(JacDineq == NULL); } - - bret = hiopNLP->eval_Jac_cons(nll, mll, x, new_x, nx_sparse, nx_dense, - nnz_sparse_Jaceq+nnz_sparse_Jacineq, NULL, NULL, values, + + bret = hiopNLP->eval_Jac_cons(nll, + mll, + x, + new_x, + nx_sparse, + nx_dense, + nnz_sparse_Jaceq + nnz_sparse_Jacineq, + NULL, + NULL, + values, JacDeqineq->local_data()); - if(!bret) - return false; - - int nnzit = nnz_sparse_Jaceq+nnz_sparse_Jacineq; - //put the dense part of the MDS in the Ipopt sparse Jac matrix - memcpy(values+nnzit, JacDeqineq->local_data(), ((size_t)m*nx_dense)*sizeof(double)); - nnzit += m*nx_dense; - + if(!bret) return false; + + int nnzit = nnz_sparse_Jaceq + nnz_sparse_Jacineq; + // put the dense part of the MDS in the Ipopt sparse Jac matrix + memcpy(values + nnzit, JacDeqineq->local_data(), ((size_t)m * nx_dense) * sizeof(double)); + nnzit += m * nx_dense; + assert(nnzit == nele_jac); } } return true; } - bool eval_h(Index n, const Number* x, bool new_x, - Number obj_factor, Index m, const Number* lambda, - bool new_lambda, Index nele_hess, Index* iRow, - Index* jCol, Number* values) - { - bool bret = true; size_type nll=n, mll=m; + bool eval_h(Index n, + const Number* x, + bool new_x, + Number obj_factor, + Index m, + const Number* lambda, + bool new_lambda, + Index nele_hess, + Index* iRow, + Index* jCol, + Number* values) + { + bool bret = true; + size_type nll = n, mll = m; assert(nnz_sparse_Hess_Lagr_SD == 0 && "not yet supported"); - if(values==NULL) { + if(values == NULL) { int nnzit = 0; - bret = hiopNLP->eval_Hess_Lagr(nll, mll, x, new_x, obj_factor, lambda, new_lambda, - nx_sparse, nx_dense, - nnz_sparse_Hess_Lagr_SS, iRow, jCol, NULL, + bret = hiopNLP->eval_Hess_Lagr(nll, + mll, + x, + new_x, + obj_factor, + lambda, + new_lambda, + nx_sparse, + nx_dense, + nnz_sparse_Hess_Lagr_SS, + iRow, + jCol, + NULL, + NULL, + nnz_sparse_Hess_Lagr_SD, + NULL, NULL, - nnz_sparse_Hess_Lagr_SD, NULL, NULL, NULL); + NULL); if(!bret) return false; nnzit += nnz_sparse_Hess_Lagr_SS; - - //dense part - for(int i=0; im() != nx_dense) { delete HessDL; HessDL = new hiopMatrixDenseRowMajor(nx_dense, nx_dense); @@ -617,35 +675,52 @@ class hiopMDS2IpoptTNLP : public TNLP } double* HessMat = HessDL->local_data(); - bret = hiopNLP->eval_Hess_Lagr(nll, mll, x, new_x, obj_factor, lambda, new_lambda, - nx_sparse, nx_dense, - nnz_sparse_Hess_Lagr_SS, NULL, NULL, values, + bret = hiopNLP->eval_Hess_Lagr(nll, + mll, + x, + new_x, + obj_factor, + lambda, + new_lambda, + nx_sparse, + nx_dense, + nnz_sparse_Hess_Lagr_SS, + NULL, + NULL, + values, HessMat, - nnz_sparse_Hess_Lagr_SD, NULL, NULL, NULL); + nnz_sparse_Hess_Lagr_SD, + NULL, + NULL, + NULL); if(!bret) return false; nnzit += nnz_sparse_Hess_Lagr_SS; - - //dense part - for(int i=0; i //! TODO: convert between IPOPT and HiOp err codes hiopNLP->solution_callback(hiop::Solve_Success, n, x, z_L, z_U, m, g, lambda, obj_value); - //free auxiliary buffers that may have been used by this adapter - delete JacDeq; delete JacDineq; delete HessDL; delete JacDeqineq; + // free auxiliary buffers that may have been used by this adapter + delete JacDeq; + delete JacDineq; + delete HessDL; + delete JacDeqineq; JacDeq = JacDineq = HessDL = JacDeqineq = NULL; - delete [] cons_eq_idxs; - delete [] cons_ineq_idxs; + delete[] cons_eq_idxs; + delete[] cons_ineq_idxs; cons_eq_idxs = cons_ineq_idxs = NULL; - + onecall_Jac_detected_ = false; }; - + private: hiopInterfaceMDS* hiopNLP; - int nx_sparse, nx_dense; // by convention, sparse variables comes first + int nx_sparse, nx_dense; // by convention, sparse variables comes first int nnz_sparse_Jaceq, nnz_sparse_Jacineq; int nnz_sparse_Hess_Lagr_SS, nnz_sparse_Hess_Lagr_SD; int n_eq, n_ineq; - index_type *cons_eq_idxs, *cons_ineq_idxs; + index_type *cons_eq_idxs, *cons_ineq_idxs; hiopMatrixDenseRowMajor *JacDeq, *JacDineq, *HessDL; - hiopMatrixDenseRowMajor *JacDeqineq; //this holds the full Jacobian when one-call Jacobian is activated + hiopMatrixDenseRowMajor* JacDeqineq; // this holds the full Jacobian when one-call Jacobian is activated bool onecall_Jac_detected_; - + /* Methods to block default compiler methods. * The compiler automatically generates the following three methods. * Since the default compiler implementation is generally not what - * you want (for all but the most simple classes), we usually + * you want (for all but the most simple classes), we usually * put the declarations of these methods in the private section * and never implement them. This prevents the compiler from * implementing an incorrect "default" behavior without us * knowing. (See Scott Meyers book, "Effective C++") - * + * */ hiopMDS2IpoptTNLP() {}; hiopMDS2IpoptTNLP(const hiopMDS2IpoptTNLP&) {}; @@ -692,49 +770,45 @@ class hiopMDS2IpoptTNLP : public TNLP //@} }; - - /* Adapter from Sparse NLP formulation to Ipopt's general TNLP */ class hiopSparse2IpoptTNLP : public TNLP { public: hiopSparse2IpoptTNLP(hiopInterfaceSparse* hiopNLP_) - : hiopNLP(hiopNLP_), - m_nx{0}, m_nnz_sparse_Jaceq{0}, m_nnz_sparse_Jacineq{0}, - m_nnz_sparse_Hess_Lagr{0}, - m_cons_eq_idxs{nullptr}, m_cons_ineq_idxs{nullptr}, -// m_Jac_eq{nullptr}, m_Jac_ineq{nullptr}, m_Hess{nullptr}, m_Jac{nullptr}, - m_onecall_Jac_detected_{false} - {}; + : hiopNLP(hiopNLP_), + m_nx{0}, + m_nnz_sparse_Jaceq{0}, + m_nnz_sparse_Jacineq{0}, + m_nnz_sparse_Hess_Lagr{0}, + m_cons_eq_idxs{nullptr}, + m_cons_ineq_idxs{nullptr}, + // m_Jac_eq{nullptr}, m_Jac_ineq{nullptr}, m_Hess{nullptr}, m_Jac{nullptr}, + m_onecall_Jac_detected_{false} {}; virtual ~hiopSparse2IpoptTNLP() { - delete [] m_cons_eq_idxs; - delete [] m_cons_ineq_idxs; -// delete [] m_Jac_eq; -// delete [] m_Jac_ineq; -// delete [] m_Hess; -// delete [] m_Jac; + delete[] m_cons_eq_idxs; + delete[] m_cons_ineq_idxs; + // delete [] m_Jac_eq; + // delete [] m_Jac_ineq; + // delete [] m_Hess; + // delete [] m_Jac; }; /* Overloads from TNLP */ /** Method to return some info about the nlp */ - bool get_nlp_info(Index& n, Index& m, Index& nnz_jac_g, - Index& nnz_h_lag, IndexStyleEnum& index_style) + bool get_nlp_info(Index& n, Index& m, Index& nnz_jac_g, Index& nnz_h_lag, IndexStyleEnum& index_style) { size_type nvars, ncons; - if(false==hiopNLP->get_prob_sizes(nvars, ncons)) - return false; + if(false == hiopNLP->get_prob_sizes(nvars, ncons)) return false; - if(false==hiopNLP->get_sparse_blocks_info(m_nx, - m_nnz_sparse_Jaceq, m_nnz_sparse_Jacineq, - m_nnz_sparse_Hess_Lagr)) - { + if(false == hiopNLP->get_sparse_blocks_info(m_nx, m_nnz_sparse_Jaceq, m_nnz_sparse_Jacineq, m_nnz_sparse_Hess_Lagr)) { return false; } nnz_jac_g = m_nnz_sparse_Jaceq + m_nnz_sparse_Jacineq; - n = (int)nvars; m=(int)ncons; + n = (int)nvars; + m = (int)ncons; nnz_h_lag = m_nnz_sparse_Hess_Lagr; index_style = TNLP::C_STYLE; @@ -744,105 +818,109 @@ class hiopSparse2IpoptTNLP : public TNLP /** Method to return the bounds for my problem */ bool get_bounds_info(Index n, Number* x_l, Number* x_u, Index m, Number* g_l, Number* g_u) { - bool bSuccess=true; - size_type nll=n, mll=m; - hiopInterfaceBase::NonlinearityType* types=new hiopInterfaceBase::NonlinearityType[n]; + bool bSuccess = true; + size_type nll = n, mll = m; + hiopInterfaceBase::NonlinearityType* types = new hiopInterfaceBase::NonlinearityType[n]; bSuccess = hiopNLP->get_vars_info(nll, x_l, x_u, types); delete[] types; if(bSuccess) { - types=new hiopInterfaceBase::NonlinearityType[m]; + types = new hiopInterfaceBase::NonlinearityType[m]; bSuccess = hiopNLP->get_cons_info(mll, g_l, g_u, types); delete[] types; } m_n_eq = m_n_ineq = 0; - for(int it=0; itget_starting_point(nll,x); + assert(false == init_z && "primal-dual restart not supported by the addapter"); + assert(false == init_lambda && "primal-dual restart not supported by the addapter"); + size_type nll = n; + return hiopNLP->get_starting_point(nll, x); } - /** Method to return the objective value */ bool eval_f(Index n, const Number* x, bool new_x, Number& obj_value) { - size_type nll=n; - return hiopNLP->eval_f(nll,x,new_x,obj_value); + size_type nll = n; + return hiopNLP->eval_f(nll, x, new_x, obj_value); } - /** Method to return the gradient of the objective */ bool eval_grad_f(Index n, const Number* x, bool new_x, Number* grad_f) { - size_type nll=n; - return hiopNLP->eval_grad_f(nll,x,new_x,grad_f); + size_type nll = n; + return hiopNLP->eval_grad_f(nll, x, new_x, grad_f); } - /** Method to return the constraint residuals */ // HiOp calls Eq and Ineq separately -> the interface expects that so we have to // mimic it bool eval_g(Index n, const Number* x, bool new_x, Index m, Number* g) { - size_type nll=n, mll=m; - bool bret=false; + size_type nll = n, mll = m; + bool bret = false; bool eq_call_failed = false; bool try_onecall_Jac = false; { - double g_eq[m_n_eq]; - size_type num_cons = m_n_eq; - bret = hiopNLP->eval_cons(nll, mll, num_cons, m_cons_eq_idxs, x, new_x,g_eq); - if(bret) { - for(int i=0; ieval_cons(nll, mll, num_cons, m_cons_eq_idxs, x, new_x, g_eq); + if(bret) { + for(int i = 0; i < m_n_eq; i++) g[m_cons_eq_idxs[i]] = g_eq[i]; + } else { + eq_call_failed = true; + } } { - double g_ineq[m_n_ineq]; - size_type num_cons = m_n_ineq; - bret = hiopNLP->eval_cons(nll, mll, num_cons, m_cons_ineq_idxs, x, new_x,g_ineq); - if(bret) { - for(int i=0; ieval_cons(nll, mll, num_cons, m_cons_ineq_idxs, x, new_x, g_ineq); + if(bret) { + for(int i = 0; i < m_n_ineq; i++) g[m_cons_ineq_idxs[i]] = g_ineq[i]; + } else { + if(!eq_call_failed) + return false; + else + try_onecall_Jac = true; + } } if(try_onecall_Jac) { @@ -852,32 +930,22 @@ class hiopSparse2IpoptTNLP : public TNLP return bret; } - /** Method to return: * 1) The structure of the jacobian (if "values" is NULL) * 2) The values of the jacobian (if "values" is not NULL) */ - bool eval_jac_g(Index n, - const Number* x, - bool new_x, - Index m, - Index nele_jac, - Index* iRow, - Index *jCol, - Number* values) + bool eval_jac_g(Index n, const Number* x, bool new_x, Index m, Index nele_jac, Index* iRow, Index* jCol, Number* values) { bool bret = true; size_type nll = n, mll = m; bool eq_call_failed = false; bool try_onecall_Jac = false; - if(values==NULL) { + if(values == NULL) { int nnzit = 0; - //Sparse Jac for Eq + // Sparse Jac for Eq { size_type num_cons = m_n_eq; - bret = hiopNLP->eval_Jac_cons(nll, mll, num_cons, m_cons_eq_idxs, - x, new_x, - m_nnz_sparse_Jaceq, iRow, jCol, NULL); + bret = hiopNLP->eval_Jac_cons(nll, mll, num_cons, m_cons_eq_idxs, x, new_x, m_nnz_sparse_Jaceq, iRow, jCol, NULL); if(bret) { nnzit += m_nnz_sparse_Jaceq; @@ -886,19 +954,25 @@ class hiopSparse2IpoptTNLP : public TNLP } } - //Sparse Jac for Ineq + // Sparse Jac for Ineq { size_type num_cons = m_n_ineq; - bret = hiopNLP->eval_Jac_cons(nll, mll, num_cons, m_cons_ineq_idxs, - x, new_x, - m_nnz_sparse_Jacineq, iRow+nnzit, jCol+nnzit, NULL); + bret = hiopNLP->eval_Jac_cons(nll, + mll, + num_cons, + m_cons_ineq_idxs, + x, + new_x, + m_nnz_sparse_Jacineq, + iRow + nnzit, + jCol + nnzit, + NULL); if(bret) { - //in-place shift of iRow and jCol for Jacineq - for(int it=nnzit; iteval_Jac_cons(nll, mll, x, new_x, m_nnz_sparse_Jaceq+m_nnz_sparse_Jacineq, iRow, jCol, values); + bret = hiopNLP->eval_Jac_cons(nll, mll, x, new_x, m_nnz_sparse_Jaceq + m_nnz_sparse_Jacineq, iRow, jCol, values); if(!bret) { return false; } - nnzit = m_nnz_sparse_Jaceq+m_nnz_sparse_Jacineq; + nnzit = m_nnz_sparse_Jaceq + m_nnz_sparse_Jacineq; assert(nnzit == nele_jac); } - } - else { - assert(values!=NULL); + } else { + assert(values != NULL); - //avoid unnecessary reallocations when one-call constraints/Jacobian is active + // avoid unnecessary reallocations when one-call constraints/Jacobian is active if(false == m_onecall_Jac_detected_) { int nnzit = 0; - //sparse Jac Eq + // sparse Jac Eq { size_type num_cons = m_n_eq; - bret = hiopNLP->eval_Jac_cons(nll, mll, num_cons, m_cons_eq_idxs, - x, new_x, - m_nnz_sparse_Jaceq, NULL, NULL, values); + bret = + hiopNLP->eval_Jac_cons(nll, mll, num_cons, m_cons_eq_idxs, x, new_x, m_nnz_sparse_Jaceq, NULL, NULL, values); if(bret) { - nnzit += m_nnz_sparse_Jaceq; assert(nnzit<=nele_jac); + nnzit += m_nnz_sparse_Jaceq; + assert(nnzit <= nele_jac); } else { eq_call_failed = true; } } - //sparse Jac Ineq + // sparse Jac Ineq { size_type num_cons = m_n_ineq; - bret = hiopNLP->eval_Jac_cons(nll, mll, num_cons, m_cons_ineq_idxs, - x, new_x, - m_nnz_sparse_Jacineq, NULL, NULL, values+nnzit); + bret = hiopNLP->eval_Jac_cons(nll, + mll, + num_cons, + m_cons_ineq_idxs, + x, + new_x, + m_nnz_sparse_Jacineq, + NULL, + NULL, + values + nnzit); if(bret) { - nnzit += m_nnz_sparse_Jacineq; assert(nnzit<=nele_jac); - assert(nnzit==nele_jac); + nnzit += m_nnz_sparse_Jacineq; + assert(nnzit <= nele_jac); + assert(nnzit == nele_jac); } else { if(!eq_call_failed) { return false; @@ -959,53 +1040,69 @@ class hiopSparse2IpoptTNLP : public TNLP try_onecall_Jac = true; } - //try one call Jacobian + // try one call Jacobian if(try_onecall_Jac) { - bret = hiopNLP->eval_Jac_cons(nll, mll, x, new_x, - m_nnz_sparse_Jaceq+m_nnz_sparse_Jacineq, NULL, NULL, values); - if(!bret) - return false; + bret = hiopNLP->eval_Jac_cons(nll, mll, x, new_x, m_nnz_sparse_Jaceq + m_nnz_sparse_Jacineq, NULL, NULL, values); + if(!bret) return false; - int nnzit = m_nnz_sparse_Jaceq+m_nnz_sparse_Jacineq; + int nnzit = m_nnz_sparse_Jaceq + m_nnz_sparse_Jacineq; assert(nnzit == nele_jac); } } return true; } - bool eval_h(Index n, const Number* x, bool new_x, - Number obj_factor, Index m, const Number* lambda, - bool new_lambda, Index nele_hess, Index* iRow, - Index* jCol, Number* values) + bool eval_h(Index n, + const Number* x, + bool new_x, + Number obj_factor, + Index m, + const Number* lambda, + bool new_lambda, + Index nele_hess, + Index* iRow, + Index* jCol, + Number* values) { - bool bret = true; size_type nll=n, mll=m; + bool bret = true; + size_type nll = n, mll = m; - if(values==NULL) { + if(values == NULL) { int nnzit = 0; - bret = hiopNLP->eval_Hess_Lagr(nll, mll, x, new_x, obj_factor, lambda, new_lambda, - m_nnz_sparse_Hess_Lagr, iRow, jCol, NULL); + bret = + hiopNLP + ->eval_Hess_Lagr(nll, mll, x, new_x, obj_factor, lambda, new_lambda, m_nnz_sparse_Hess_Lagr, iRow, jCol, NULL); if(!bret) return false; nnzit += m_nnz_sparse_Hess_Lagr; #ifndef NDEBUG - //nnzit += nx_dense*nx_dense; - assert(nnzit==nele_hess); + // nnzit += nx_dense*nx_dense; + assert(nnzit == nele_hess); #endif } else { - assert(values!=NULL); + assert(values != NULL); int nnzit = 0; - bret = hiopNLP->eval_Hess_Lagr(nll, mll, x, new_x, obj_factor, lambda, new_lambda, - m_nnz_sparse_Hess_Lagr, iRow, jCol, values); + bret = hiopNLP->eval_Hess_Lagr(nll, + mll, + x, + new_x, + obj_factor, + lambda, + new_lambda, + m_nnz_sparse_Hess_Lagr, + iRow, + jCol, + values); if(!bret) return false; nnzit += m_nnz_sparse_Hess_Lagr; #ifndef NDEBUG - //nnzit += nx_dense*nx_dense; - assert(nnzit==nele_hess); + // nnzit += nx_dense*nx_dense; + assert(nnzit == nele_hess); #endif } return true; @@ -1027,12 +1124,12 @@ class hiopSparse2IpoptTNLP : public TNLP //! we use hiop::Solve_Success -> //! TODO: convert between IPOPT and HiOp err codes hiopNLP->solution_callback(hiop::Solve_Success, n, x, z_L, z_U, m, g, lambda, obj_value); - //free auxiliary buffers that may have been used by this adapter -// delete m_Jac_eq; delete m_Jac_ineq; delete m_Hess; delete m_Jac; -// JacDeq = JacDineq = HessDL = JacDeqineq = NULL; + // free auxiliary buffers that may have been used by this adapter + // delete m_Jac_eq; delete m_Jac_ineq; delete m_Hess; delete m_Jac; + // JacDeq = JacDineq = HessDL = JacDeqineq = NULL; - delete [] m_cons_eq_idxs; - delete [] m_cons_ineq_idxs; + delete[] m_cons_eq_idxs; + delete[] m_cons_ineq_idxs; m_cons_eq_idxs = m_cons_ineq_idxs = NULL; m_onecall_Jac_detected_ = false; @@ -1040,12 +1137,12 @@ class hiopSparse2IpoptTNLP : public TNLP private: hiopInterfaceSparse* hiopNLP; - int m_nx; // by convention, sparse variables comes first - int m_nnz_sparse_Jaceq, m_nnz_sparse_Jacineq,m_nnz_sparse_Hess_Lagr; + int m_nx; // by convention, sparse variables comes first + int m_nnz_sparse_Jaceq, m_nnz_sparse_Jacineq, m_nnz_sparse_Hess_Lagr; int m_n_eq, m_n_ineq; index_type *m_cons_eq_idxs, *m_cons_ineq_idxs; -// hiopMatrixSparseTriplet *m_Jac_eq, *m_Jac_ineq, *m_Hess; -// hiopMatrixSparseTriplet *m_Jac; //this holds the full Jacobian when one-call Jacobian is activated + // hiopMatrixSparseTriplet *m_Jac_eq, *m_Jac_ineq, *m_Hess; + // hiopMatrixSparseTriplet *m_Jac; //this holds the full Jacobian when one-call Jacobian is activated bool m_onecall_Jac_detected_; @@ -1065,6 +1162,5 @@ class hiopSparse2IpoptTNLP : public TNLP //@} }; -} //end of namespace hiop +} // end of namespace hiop #endif - diff --git a/src/Drivers/IpoptAdapter/IpoptAdapterDriverPriDecEx1.cpp b/src/Drivers/IpoptAdapter/IpoptAdapterDriverPriDecEx1.cpp index 982e87f88..eabd479bf 100644 --- a/src/Drivers/IpoptAdapter/IpoptAdapterDriverPriDecEx1.cpp +++ b/src/Drivers/IpoptAdapter/IpoptAdapterDriverPriDecEx1.cpp @@ -2,7 +2,7 @@ #include "IpoptAdapter.hpp" -//use HiOp's SparseEx1 - sparse NLP +// use HiOp's SparseEx1 - sparse NLP #include "NlpSparseEx1.hpp" #include @@ -13,14 +13,14 @@ using namespace hiop; int main(int argv, char** argc) { - //instantiate a HiOp problem + // instantiate a HiOp problem // - SparseEx1 hiopNlp(3,1.0); + SparseEx1 hiopNlp(3, 1.0); // - //create + // create - //int n_sp = 12, n_de = 10; - //MdsEx2 hiopNlp(n_sp, n_de); + // int n_sp = 12, n_de = 10; + // MdsEx2 hiopNlp(n_sp, n_de); // Create a new instance of the Ipopt nlp // (use a SmartPtr, not raw) @@ -41,43 +41,42 @@ int main(int argv, char** argc) // app->Options()->SetNumericValue("bound_relax_factor", 0.); // app->Options()->SetNumericValue("constr_mult_init_max", 0.001); - - //app->Options()->SetNumericValue("tol", 1e-7); + // app->Options()->SetNumericValue("tol", 1e-7); app->Options()->SetStringValue("recalc_y", "no"); - //app->Options()->SetIntegerValue("print_level", 11); + // app->Options()->SetIntegerValue("print_level", 11); app->Options()->SetStringValue("mu_strategy", "monotone"); app->Options()->SetNumericValue("bound_frac", 1e-8); app->Options()->SetNumericValue("bound_push", 1e-8); - //app->Options()->SetNumericValue("slack_bound_push", 1e-24); + // app->Options()->SetNumericValue("slack_bound_push", 1e-24); app->Options()->SetNumericValue("bound_relax_factor", 0.); - //app->Options()->SetNumericValue("constr_mult_init_max", 0.001); + // app->Options()->SetNumericValue("constr_mult_init_max", 0.001); app->Options()->SetNumericValue("kappa1", 1e-8); app->Options()->SetNumericValue("kappa2", 1e-8); - + app->Options()->SetStringValue("duals_init", "zero"); - //app->Options()->SetStringValue("output_file", "ipopt.out"); - //app->Options()->SetStringValue("derivative_test", "second-order"); //"only-second-order" - // Initialize the IpoptApplication and process the options + // app->Options()->SetStringValue("output_file", "ipopt.out"); + // app->Options()->SetStringValue("derivative_test", "second-order"); //"only-second-order" + // Initialize the IpoptApplication and process the options ApplicationReturnStatus status; status = app->Initialize(); - if( status != Solve_Succeeded ) { - std::cout << std::endl << std::endl << "*** Error during initialization!" << std::endl; - return (int) status; - } + if(status != Solve_Succeeded) { + std::cout << std::endl << std::endl << "*** Error during initialization!" << std::endl; + return (int)status; + } // Ask Ipopt to solve the problem - status = app->OptimizeTNLP(mynlp); + status = app->OptimizeTNLP(mynlp); - if( status == Solve_Succeeded ) { - std::cout << std::endl << std::endl << "*** The problem solved!" << std::endl; - } else { - std::cout << std::endl << std::endl << "*** The problem FAILED!" << std::endl; - } + if(status == Solve_Succeeded) { + std::cout << std::endl << std::endl << "*** The problem solved!" << std::endl; + } else { + std::cout << std::endl << std::endl << "*** The problem FAILED!" << std::endl; + } - // As the SmartPtrs go out of scope, the reference count - // will be decremented and the objects will automatically - // be deleted. + // As the SmartPtrs go out of scope, the reference count + // will be decremented and the objects will automatically + // be deleted. - return (int) status; + return (int)status; } diff --git a/src/Drivers/IpoptAdapter/IpoptAdapterDriverPriDecEx2.cpp b/src/Drivers/IpoptAdapter/IpoptAdapterDriverPriDecEx2.cpp index f366f04dd..dc33b0a17 100644 --- a/src/Drivers/IpoptAdapter/IpoptAdapterDriverPriDecEx2.cpp +++ b/src/Drivers/IpoptAdapter/IpoptAdapterDriverPriDecEx2.cpp @@ -1,7 +1,7 @@ #include "IpIpoptApplication.hpp" #include "IpoptAdapter.hpp" -//use HiOp's SparseEx2 - sparse NLP +// use HiOp's SparseEx2 - sparse NLP #include "NlpSparseEx2.hpp" #include "NlpSparseEx1.hpp" @@ -16,18 +16,18 @@ using namespace hiop; int main(int argv, char** argc) { - //instantiate a HiOp problem + // instantiate a HiOp problem int nx = 1000; int S = 1920; - int nS = 5; - double x0[nx]; - for(int i=0;iSetStringValue("Hessian", "analytical_exact"); - nlp.options->SetStringValue("duals_update_type", "linear"); -// nlp.options->SetStringValue("duals_init", "zero"); // "lsq" or "zero" + nlp.options->SetStringValue("duals_update_type", "linear"); + // nlp.options->SetStringValue("duals_init", "zero"); // "lsq" or "zero" nlp.options->SetStringValue("compute_mode", "cpu"); nlp.options->SetStringValue("KKTLinsys", "xdycyd"); // nlp.options->SetStringValue("KKTLinsys", "full"); @@ -37,11 +37,10 @@ int main(int argv, char** argc) hiopAlgFilterIPMNewton solver(&nlp); hiopSolveStatus status0 = solver.run(); solver.getSolution(x0); - - - double x[nx+S*nx]; - PriDecEx2 hiopNlp(nx,S,nS); - hiopNlp.set_starting_point(x0); + + double x[nx + S * nx]; + PriDecEx2 hiopNlp(nx, S, nS); + hiopNlp.set_starting_point(x0); // Create a new instance of the Ipopt nlp // (use a SmartPtr, not raw) @@ -63,7 +62,6 @@ int main(int argv, char** argc) // app->Options()->SetNumericValue("bound_relax_factor", 0.); // app->Options()->SetNumericValue("constr_mult_init_max", 0.001); - // app->Options()->SetNumericValue("tol", 1e-7); // app->Options()->SetStringValue("recalc_y", "no"); // app->Options()->SetIntegerValue("print_level", 11); @@ -81,23 +79,23 @@ int main(int argv, char** argc) // Initialize the IpoptApplication and process the options ApplicationReturnStatus status; status = app->Initialize(); - if( status != Solve_Succeeded ) { - std::cout << std::endl << std::endl << "*** Error during initialization!" << std::endl; - return (int) status; - } + if(status != Solve_Succeeded) { + std::cout << std::endl << std::endl << "*** Error during initialization!" << std::endl; + return (int)status; + } - // Ask Ipopt to solve the problem - status = app->OptimizeTNLP(mynlp); + // Ask Ipopt to solve the problem + status = app->OptimizeTNLP(mynlp); - if( status == Solve_Succeeded ) { - std::cout << std::endl << std::endl << "*** The problem solved!" << std::endl; - } else { - std::cout << std::endl << std::endl << "*** The problem FAILED!" << std::endl; - } + if(status == Solve_Succeeded) { + std::cout << std::endl << std::endl << "*** The problem solved!" << std::endl; + } else { + std::cout << std::endl << std::endl << "*** The problem FAILED!" << std::endl; + } - // As the SmartPtrs go out of scope, the reference count - // will be decremented and the objects will automatically - // be deleted. + // As the SmartPtrs go out of scope, the reference count + // will be decremented and the objects will automatically + // be deleted. - return (int) status; + return (int)status; } diff --git a/src/Drivers/IpoptAdapter/IpoptAdapter_driver.cpp b/src/Drivers/IpoptAdapter/IpoptAdapter_driver.cpp index c0cbf79c2..12e2d1052 100644 --- a/src/Drivers/IpoptAdapter/IpoptAdapter_driver.cpp +++ b/src/Drivers/IpoptAdapter/IpoptAdapter_driver.cpp @@ -2,8 +2,8 @@ #include "IpoptAdapter.hpp" -//use HiOp's MdsEx1 - mixed dense-sparse QP -//#include "NlpMdsEx1.hpp" +// use HiOp's MdsEx1 - mixed dense-sparse QP +// #include "NlpMdsEx1.hpp" #include "NlpMdsEx2.hpp" #include @@ -14,20 +14,20 @@ using namespace hiop; int main(int argv, char** argc) { - //instantiate a HiOp problem + // instantiate a HiOp problem // -// MdsEx2 hiopNlp(300,100,true,true); - MdsEx2 hiopNlp(0,3,false,false,false); + // MdsEx2 hiopNlp(300,100,true,true); + MdsEx2 hiopNlp(0, 3, false, false, false); // - //create + // create + + // int n_sp = 12, n_de = 10; + // MdsEx2 hiopNlp(n_sp, n_de); - //int n_sp = 12, n_de = 10; - //MdsEx2 hiopNlp(n_sp, n_de); - // Create a new instance of the Ipopt nlp // (use a SmartPtr, not raw) SmartPtr mynlp = new hiopMDS2IpoptTNLP(&hiopNlp); - + // Create a new instance of IpoptApplication // (use a SmartPtr, not raw) // We are using the factory, since this allows us to compile this @@ -35,48 +35,46 @@ int main(int argv, char** argc) SmartPtr app = IpoptApplicationFactory(); // - // HiOp-compatible Ipopt Options (Ipopt behaves or should behave like HiOp) + // HiOp-compatible Ipopt Options (Ipopt behaves or should behave like HiOp) // // app->Options()->SetStringValue("recalc_y", "no"); // app->Options()->SetStringValue("mu_strategy", "monotone"); // app->Options()->SetNumericValue("bound_push", 1e-2); // app->Options()->SetNumericValue("bound_relax_factor", 0.); // app->Options()->SetNumericValue("constr_mult_init_max", 0.001); - - //app->Options()->SetNumericValue("tol", 1e-7); + // app->Options()->SetNumericValue("tol", 1e-7); app->Options()->SetStringValue("recalc_y", "no"); - //app->Options()->SetIntegerValue("print_level", 11); + // app->Options()->SetIntegerValue("print_level", 11); app->Options()->SetStringValue("mu_strategy", "monotone"); app->Options()->SetNumericValue("bound_frac", 1e-8); app->Options()->SetNumericValue("bound_push", 1e-8); - //app->Options()->SetNumericValue("slack_bound_push", 1e-24); + // app->Options()->SetNumericValue("slack_bound_push", 1e-24); app->Options()->SetNumericValue("bound_relax_factor", 0.); app->Options()->SetNumericValue("constr_mult_init_max", 0.001); - - - //app->Options()->SetStringValue("output_file", "ipopt.out"); - //app->Options()->SetStringValue("derivative_test", "second-order"); //"only-second-order" - // Initialize the IpoptApplication and process the options + + // app->Options()->SetStringValue("output_file", "ipopt.out"); + // app->Options()->SetStringValue("derivative_test", "second-order"); //"only-second-order" + // Initialize the IpoptApplication and process the options ApplicationReturnStatus status; status = app->Initialize(); - if( status != Solve_Succeeded ) { - std::cout << std::endl << std::endl << "*** Error during initialization!" << std::endl; - return (int) status; - } - + if(status != Solve_Succeeded) { + std::cout << std::endl << std::endl << "*** Error during initialization!" << std::endl; + return (int)status; + } + // Ask Ipopt to solve the problem - status = app->OptimizeTNLP(mynlp); - - if( status == Solve_Succeeded ) { - std::cout << std::endl << std::endl << "*** The problem solved!" << std::endl; - } else { - std::cout << std::endl << std::endl << "*** The problem FAILED!" << std::endl; - } - - // As the SmartPtrs go out of scope, the reference count - // will be decremented and the objects will automatically - // be deleted. - - return (int) status; + status = app->OptimizeTNLP(mynlp); + + if(status == Solve_Succeeded) { + std::cout << std::endl << std::endl << "*** The problem solved!" << std::endl; + } else { + std::cout << std::endl << std::endl << "*** The problem FAILED!" << std::endl; + } + + // As the SmartPtrs go out of scope, the reference count + // will be decremented and the objects will automatically + // be deleted. + + return (int)status; } diff --git a/src/Drivers/MDS/NlpMdsEx1.hpp b/src/Drivers/MDS/NlpMdsEx1.hpp index 4b9f27847..1a1fa0614 100644 --- a/src/Drivers/MDS/NlpMdsEx1.hpp +++ b/src/Drivers/MDS/NlpMdsEx1.hpp @@ -3,9 +3,9 @@ #include "hiopInterface.hpp" -//this include is not needed in general -//we use hiopMatrixDense in this particular example for convienience -#include "hiopMatrixDenseRowMajor.hpp" +// this include is not needed in general +// we use hiopMatrixDense in this particular example for convienience +#include "hiopMatrixDenseRowMajor.hpp" #include "LinAlgFactory.hpp" #ifdef HIOP_USE_MPI @@ -16,7 +16,7 @@ #endif #include -#include //for memcpy +#include //for memcpy #include #include @@ -42,14 +42,14 @@ using index_type = hiop::index_type; * x <= 3 * s>=0 * -4 <=y_1 <=4, the rest of y are free - * + * * The vector 'y' is of dimension nd = ns (can be changed in the constructor) * Dense matrices Qd and Md are such that * Qd = two on the diagonal, one on the first offdiagonals, zero elsewhere * Md = minus one everywhere * e = vector of all ones * - * Coding of the problem in MDS HiOp input: order of variables need to be [x,s,y] + * Coding of the problem in MDS HiOp input: order of variables need to be [x,s,y] * since [x,s] are the so-called sparse variables and y are the dense variables */ @@ -57,34 +57,40 @@ class MdsEx1 : public hiop::hiopInterfaceMDS { public: MdsEx1(int ns_, bool empty_sp_row = false) - : MdsEx1(ns_, ns_, empty_sp_row) - { - } - + : MdsEx1(ns_, ns_, empty_sp_row) + {} + MdsEx1(int ns_, int nd_, bool empty_sp_row = false) - : ns(ns_), sol_x_(NULL), sol_zl_(NULL), sol_zu_(NULL), sol_lambda_(NULL), empty_sp_row_(empty_sp_row) + : ns(ns_), + sol_x_(NULL), + sol_zl_(NULL), + sol_zu_(NULL), + sol_lambda_(NULL), + empty_sp_row_(empty_sp_row) { - if(ns<0) { + if(ns < 0) { ns = 0; } else { - if(4*(ns/4) != ns) { - ns = 4*((4+ns)/4); - printf("[warning] number (%d) of sparse vars is not a multiple ->was altered to %d\n", ns_, ns); + if(4 * (ns / 4) != ns) { + ns = 4 * ((4 + ns) / 4); + printf("[warning] number (%d) of sparse vars is not a multiple ->was altered to %d\n", ns_, ns); } } - if(nd_<0) nd=0; - else nd = nd_; + if(nd_ < 0) + nd = 0; + else + nd = nd_; - Q = hiop::LinearAlgebraFactory::create_matrix_dense("DEFAULT", nd, nd); + Q = hiop::LinearAlgebraFactory::create_matrix_dense("DEFAULT", nd, nd); Q->setToConstant(1e-8); Q->addDiagonal(2.); double* Qa = Q->local_data(); - for(int i=1; i=4 && "number of variables should be greater than 4 for this example"); - assert(n==2*ns+nd); - - //x - for(int i=0; i=4 && "number of variables should be greater than 4 for this example"); + assert(n == 2 * ns + nd); + + // x + for(int i = 0; i < ns; ++i) xlow[i] = -1e+20; + // s + for(int i = ns; i < 2 * ns; ++i) xlow[i] = 0.; + // y + xlow[2 * ns] = -4.; + for(int i = 2 * ns + 1; i < n; ++i) xlow[i] = -1e+20; + + // x + for(int i = 0; i < ns; ++i) xupp[i] = 3.; + // s + for(int i = ns; i < 2 * ns; ++i) xupp[i] = +1e+20; + // y + xupp[2 * ns] = 4.; + for(int i = 2 * ns + 1; i < n; ++i) xupp[i] = +1e+20; + + for(int i = 0; i < n; ++i) type[i] = hiopNonlinear; return true; } bool get_cons_info(const size_type& m, double* clow, double* cupp, NonlinearityType* type) { - assert(m==ns+3*haveIneq); + assert(m == ns + 3 * haveIneq); int i; - //x+s - Md y = 0, i=1,...,ns - for(i=0; i=4); - assert(Q->n()==nd); assert(Q->m()==nd); - obj_value=0.;//x[0]*(x[0]-1.); - //sum 0.5 {x_i*(x_{i}-1) : i=1,...,ns} + 0.5 y'*Qd*y + 0.5 s^T s - for(int i=0; i=4); + assert(Q->n() == nd); + assert(Q->m() == nd); + obj_value = 0.; // x[0]*(x[0]-1.); + // sum 0.5 {x_i*(x_{i}-1) : i=1,...,ns} + 0.5 y'*Qd*y + 0.5 s^T s + for(int i = 0; i < ns; i++) obj_value += x[i] * (x[i] - 1.); obj_value *= 0.5; - double term2=0.; - const double* y = x+2*ns; + double term2 = 0.; + const double* y = x + 2 * ns; Q->timesVec(0.0, _buf_y, 1., y); - for(int i=0; itimesVec(1.0, cons, 1.0, y); } return true; } - - //sum 0.5 {x_i*(x_{i}-1) : i=1,...,ns} + 0.5 y'*Qd*y + 0.5 s^T s + + // sum 0.5 {x_i*(x_{i}-1) : i=1,...,ns} + 0.5 y'*Qd*y + 0.5 s^T s bool eval_grad_f(const size_type& n, const double* x, bool new_x, double* gradf) { //! assert(ns>=4); assert(Q->n()==ns/4); assert(Q->m()==ns/4); - //x_i - 0.5 - for(int i=0; itimesVec(0.0, gradf_y, 1., y); - //s - const double* s=x+ns; - double* gradf_s = gradf+ns; - for(int i=0; i0) { - //sparse Jacobian eq w.r.t. x and s - //x + assert(num_cons == ns || num_cons == 3 * haveIneq); + + if(iJacS != NULL && jJacS != NULL) { + int nnzit = 0; + for(int itrow = 0; itrow < num_cons; itrow++) { + const int con_idx = (int)idx_cons[itrow]; + if(con_idx < ns && ns > 0) { + // sparse Jacobian eq w.r.t. x and s + // x iJacS[nnzit] = con_idx; jJacS[nnzit] = con_idx; nnzit++; - //s + // s iJacS[nnzit] = con_idx; - jJacS[nnzit] = con_idx+ns; + jJacS[nnzit] = con_idx + ns; nnzit++; } else if(haveIneq) { - //sparse Jacobian ineq w.r.t x and s - if(con_idx-ns==0 && ns>0) { - //w.r.t x_1 + // sparse Jacobian ineq w.r.t x and s + if(con_idx - ns == 0 && ns > 0) { + // w.r.t x_1 iJacS[nnzit] = 0; jJacS[nnzit] = 0; nnzit++; - //w.r.t s - for(int i=0; i0) { - //w.r.t x_2 or x_3 - iJacS[nnzit] = con_idx-ns; - jJacS[nnzit] = con_idx-ns; + if(((con_idx - ns == 1 && !empty_sp_row_) || con_idx - ns == 2) && ns > 0) { + // w.r.t x_2 or x_3 + iJacS[nnzit] = con_idx - ns; + jJacS[nnzit] = con_idx - ns; nnzit++; } } } } - assert(nnzit==nnzJacS); - } - //values for sparse Jacobian if requested by the solver - if(MJacS!=NULL) { - int nnzit=0; - for(int itrow=0; itrow0) { - //sparse Jacobian EQ w.r.t. x and s - //x - MJacS[nnzit] = 1.; - nnzit++; - - //s - MJacS[nnzit] = 1.; - nnzit++; - - } else if(haveIneq) { - //sparse Jacobian INEQ w.r.t x and s - if(con_idx-ns==0 && ns>0) { - //w.r.t x_1 - MJacS[nnzit] = 1.; - nnzit++; - //w.r.t s - for(int i=0; i0) { - //w.r.t x_2 or x_3 - MJacS[nnzit] = 1.; - nnzit++; - } - } - } - } - assert(nnzit==nnzJacS); + assert(nnzit == nnzJacS); } - - //dense Jacobian w.r.t y - if(JacD!=NULL) { - bool isEq=false; - for(int itrow=0; itrow 0) { + // sparse Jacobian EQ w.r.t. x and s + // x + MJacS[nnzit] = 1.; + nnzit++; + + // s + MJacS[nnzit] = 1.; + nnzit++; + + } else if(haveIneq) { + // sparse Jacobian INEQ w.r.t x and s + if(con_idx - ns == 0 && ns > 0) { + // w.r.t x_1 + MJacS[nnzit] = 1.; + nnzit++; + // w.r.t s + for(int i = 0; i < ns; i++) { + MJacS[nnzit] = 1.; + nnzit++; + } + } else { + if(((con_idx - ns == 1 && !empty_sp_row_) || con_idx - ns == 2) && ns > 0) { + // w.r.t x_2 or x_3 + MJacS[nnzit] = 1.; + nnzit++; + } + } + } + } + assert(nnzit == nnzJacS); + } + + // dense Jacobian w.r.t y + if(JacD != NULL) { + bool isEq = false; + for(int itrow = 0; itrow < num_cons; itrow++) { + const int con_idx = (int)idx_cons[itrow]; + if(con_idx < ns) { + isEq = true; + assert(num_cons == ns); continue; } else if(haveIneq) { - //do an in place fill-in for the ineq Jacobian corresponding to e^T - assert(con_idx-ns==0 || con_idx-ns==1 || con_idx-ns==2); - assert(num_cons==3); - for(int i=0; ilocal_data(), ns*nd*sizeof(double)); + memcpy(JacD, Md->local_data(), ns * nd * sizeof(double)); } } return true; } - + bool eval_Hess_Lagr(const size_type& n, - const size_type& m, + const size_type& m, const double* x, bool new_x, const double& obj_factor, const double* lambda, bool new_lambda, const size_type& nsparse, - const size_type& ndense, + const size_type& ndense, const size_type& nnzHSS, index_type* iHSS, index_type* jHSS, - double* MHSS, + double* MHSS, double* HDD, size_type& nnzHSD, index_type* iHSD, index_type* jHSD, double* MHSD) { - //Note: lambda is not used since all the constraints are linear and, therefore, do - //not contribute to the Hessian of the Lagrangian + // Note: lambda is not used since all the constraints are linear and, therefore, do + // not contribute to the Hessian of the Lagrangian - assert(nnzHSS==2*ns); - assert(nnzHSD==0); - assert(iHSD==NULL); assert(jHSD==NULL); assert(MHSD==NULL); + assert(nnzHSS == 2 * ns); + assert(nnzHSD == 0); + assert(iHSD == NULL); + assert(jHSD == NULL); + assert(MHSD == NULL); - if(iHSS!=NULL && jHSS!=NULL) { - for(int i=0; i<2*ns; i++) iHSS[i] = jHSS[i] = i; + if(iHSS != NULL && jHSS != NULL) { + for(int i = 0; i < 2 * ns; i++) iHSS[i] = jHSS[i] = i; } - if(MHSS!=NULL) { - for(int i=0; i<2*ns; i++) MHSS[i] = obj_factor; + if(MHSS != NULL) { + for(int i = 0; i < 2 * ns; i++) MHSS[i] = obj_factor; } - if(HDD!=NULL) { - const int nx_dense_squared = nd*nd; - //memcpy(HDD[0], Q->local_buffer(), nx_dense_squared*sizeof(double)); + if(HDD != NULL) { + const int nx_dense_squared = nd * nd; + // memcpy(HDD[0], Q->local_buffer(), nx_dense_squared*sizeof(double)); const double* Qv = Q->local_data(); - for(int i=0; itimesVec(1.0, cons, 1.0, y); return true; } - virtual bool - eval_Jac_cons(const size_type& n, - const size_type& m, - const size_type& num_cons, - const index_type* idx_cons, - const double* x, - bool new_x, - const size_type& nsparse, - const size_type& ndense, - const size_type& nnzJacS, - index_type* iJacS, - index_type* jJacS, - double* MJacS, - double* JacD) + virtual bool eval_Jac_cons(const size_type& n, + const size_type& m, + const size_type& num_cons, + const index_type* idx_cons, + const double* x, + bool new_x, + const size_type& nsparse, + const size_type& ndense, + const size_type& nnzJacS, + index_type* iJacS, + index_type* jJacS, + double* MJacS, + double* JacD) { - return false; // so that HiOp will call the one-call full-Jacob function below + return false; // so that HiOp will call the one-call full-Jacob function below } - virtual bool - eval_Jac_cons(const size_type& n, - const size_type& m, - const double* x, - bool new_x, - const size_type& nsparse, - const size_type& ndense, - const size_type& nnzJacS, - index_type* iJacS, - index_type* jJacS, - double* MJacS, - double* JacD) + virtual bool eval_Jac_cons(const size_type& n, + const size_type& m, + const double* x, + bool new_x, + const size_type& nsparse, + const size_type& ndense, + const size_type& nnzJacS, + index_type* iJacS, + index_type* jJacS, + double* MJacS, + double* JacD) { - assert(m==ns+3*haveIneq); + assert(m == ns + 3 * haveIneq); - if(iJacS!=NULL && jJacS!=NULL) { - int nnzit=0; - for(int con_idx=0; con_idx0) { - for(int con_idx=ns; con_idx 0) { + for(int con_idx = ns; con_idx < m; ++con_idx) { + // sparse Jacobian ineq w.r.t x and s + if(con_idx == ns) { + // w.r.t x_1 iJacS[nnzit] = con_idx; jJacS[nnzit] = 0; nnzit++; - //w.r.t s - for(int i=0; i0) { - for(int con_idx=ns; con_idx 0) { + for(int con_idx = ns; con_idx < m; ++con_idx) { + // sparse Jacobian INEQ w.r.t x and s + if(con_idx - ns == 0) { + // w.r.t x_1 MJacS[nnzit] = 1.; nnzit++; - //w.r.t s - for(int i=0; ilocal_data(), ns*nd*sizeof(double)); - + + // dense Jacobian w.r.t y + if(JacD != NULL) { + // just copy the dense Jacobian corresponding to equalities + memcpy(JacD, Md->local_data(), ns * nd * sizeof(double)); + if(haveIneq) { - assert(ns+3 == m); - //do an in place fill-in for the ineq Jacobian corresponding to e^T - for(int i=0; i<3*nd; ++i) - JacD[ns*nd+i] = 1.; + assert(ns + 3 == m); + // do an in place fill-in for the ineq Jacobian corresponding to e^T + for(int i = 0; i < 3 * nd; ++i) JacD[ns * nd + i] = 1.; } } return true; diff --git a/src/Drivers/MDS/NlpMdsEx1Driver.cpp b/src/Drivers/MDS/NlpMdsEx1Driver.cpp index 610dd1928..990e86310 100644 --- a/src/Drivers/MDS/NlpMdsEx1Driver.cpp +++ b/src/Drivers/MDS/NlpMdsEx1Driver.cpp @@ -13,90 +13,93 @@ using namespace hiop; // static bool self_check(size_type n, double obj_value); -static bool parse_arguments(int argc, char **argv, +static bool parse_arguments(int argc, + char** argv, bool& self_check, size_type& n_sp, size_type& n_de, bool& one_call_cons, bool& empty_sp_row) { - self_check=false; + self_check = false; empty_sp_row = false; n_sp = 1000; n_de = 1000; one_call_cons = false; switch(argc) { - case 1: - //no arguments - return true; - break; - case 6: // 5 arguments + case 1: + // no arguments + return true; + break; + case 6: // 5 arguments { - if(std::string(argv[5]) == "-selfcheck") - self_check=true; + if(std::string(argv[5]) == "-selfcheck") self_check = true; } - case 5: // 4 arguments + case 5: // 4 arguments { if(std::string(argv[4]) == "-selfcheck") { - self_check=true; + self_check = true; } if(std::string(argv[4]) == "-empty_sp_row") { - empty_sp_row=true; - } + empty_sp_row = true; + } } - case 4: // 3 arguments + case 4: // 3 arguments { - one_call_cons = (bool) atoi(argv[3]); + one_call_cons = (bool)atoi(argv[3]); } - case 3: //2 arguments + case 3: // 2 arguments { n_de = atoi(argv[2]); - if(n_de<0) n_de = 0; + if(n_de < 0) n_de = 0; } - case 2: //1 argument + case 2: // 1 argument { n_sp = atoi(argv[1]); - if(n_sp<0) n_sp = 0; - } - break; - default: - return false; //5 or more arguments + if(n_sp < 0) n_sp = 0; + } break; + default: + return false; // 5 or more arguments } - if(self_check && (n_sp!=400 || n_de!=100) ) - return false; - + if(self_check && (n_sp != 400 || n_de != 100)) return false; + return true; }; static void usage(const char* exeName) { - printf("HiOp driver %s that solves a synthetic problem of variable size in the " - "mixed dense-sparse formulation.\n", exeName); + printf( + "HiOp driver %s that solves a synthetic problem of variable size in the " + "mixed dense-sparse formulation.\n", + exeName); printf("Usage: \n"); printf(" '$ %s sp_vars_size de_vars_size eq_ineq_combined_nlp -empty_sp_row -selfcheck'\n", exeName); printf("Arguments, all integers, excepting string '-selfcheck'\n"); printf(" 'sp_vars_size': # of sparse variables [default 400, optional]\n"); printf(" 'de_vars_size': # of dense variables [default 100, optional]\n"); printf(" '-empty_sp_row': set an empty row in sparser inequality Jacobian. [optional]\n"); - printf(" '-selfcheck': compares the optimal objective with sp_vars_size being 400 and " - "de_vars_size being 100 (these two exact values must be passed as arguments). [optional]\n"); - printf(" 'eq_ineq_combined_nlp': 0 or 1, specifying whether the NLP formulation with split " - "constraints should be used (0) or not (1) [default 0, optional]\n"); + printf( + " '-selfcheck': compares the optimal objective with sp_vars_size being 400 and " + "de_vars_size being 100 (these two exact values must be passed as arguments). [optional]\n"); + printf( + " 'eq_ineq_combined_nlp': 0 or 1, specifying whether the NLP formulation with split " + "constraints should be used (0) or not (1) [default 0, optional]\n"); } - -int main(int argc, char **argv) +int main(int argc, char** argv) { - int rank=0; + int rank = 0; #ifdef HIOP_USE_MPI MPI_Init(&argc, &argv); int comm_size; - int ierr = MPI_Comm_size(MPI_COMM_WORLD, &comm_size); assert(MPI_SUCCESS==ierr); - //int ierr = MPI_Comm_rank(MPI_COMM_WORLD, &rank); assert(MPI_SUCCESS==ierr); + int ierr = MPI_Comm_size(MPI_COMM_WORLD, &comm_size); + assert(MPI_SUCCESS == ierr); + // int ierr = MPI_Comm_rank(MPI_COMM_WORLD, &rank); assert(MPI_SUCCESS==ierr); if(comm_size != 1) { - printf("[error] driver detected more than one rank but the driver should be run " - "in serial only; will exit\n"); + printf( + "[error] driver detected more than one rank but the driver should be run " + "in serial only; will exit\n"); MPI_Finalize(); return 1; } @@ -114,10 +117,10 @@ int main(int argc, char **argv) return 1; } - double obj_value=-1e+20; + double obj_value = -1e+20; hiopSolveStatus status; - //user's NLP -> implementation of hiop::hiopInterfaceMDS + // user's NLP -> implementation of hiop::hiopInterfaceMDS MdsEx1* my_nlp; if(one_call_cons) { my_nlp = new MdsEx1OneCallCons(n_sp, n_de, has_empty_sp_row); @@ -142,13 +145,14 @@ int main(int argc, char **argv) status = solver.run(); obj_value = solver.getObjective(); - int ret_code = 0; //0 success, -1 failure - + int ret_code = 0; // 0 success, -1 failure - if(selfCheck) { // && has_empty_sp_row) { - if(fabs(obj_value-(-4.9994906229741609e+01))>1e-6) { - printf("selfcheck: objective mismatch for MDS Ex1 problem with 400 sparse variables and 100 " - "dense variables did. BTW, obj=%18.12e was returned by HiOp.\n", obj_value); + if(selfCheck) { // && has_empty_sp_row) { + if(fabs(obj_value - (-4.9994906229741609e+01)) > 1e-6) { + printf( + "selfcheck: objective mismatch for MDS Ex1 problem with 400 sparse variables and 100 " + "dense variables did. BTW, obj=%18.12e was returned by HiOp.\n", + obj_value); ret_code = -1; } else { printf("selfcheck passed\n"); @@ -156,18 +160,17 @@ int main(int argc, char **argv) } } else { - if(status<0) { - if(rank==0) { + if(status < 0) { + if(rank == 0) { printf("solver returned negative solve status: %d (objective is %18.12e)\n", status, obj_value); - } + } ret_code = -1; } else { - if(rank==0) { + if(rank == 0) { printf("solver returned successfully: objective is %18.12e)\n", obj_value); - } + } ret_code = 0; - - } + } } #if 0 @@ -233,7 +236,7 @@ int main(int argc, char **argv) #endif delete my_nlp; - + #ifdef HIOP_USE_MAGMA magma_finalize(); #endif diff --git a/src/Drivers/MDS/NlpMdsEx1RajaDriver.cpp b/src/Drivers/MDS/NlpMdsEx1RajaDriver.cpp index 05e90d9af..4af209e93 100644 --- a/src/Drivers/MDS/NlpMdsEx1RajaDriver.cpp +++ b/src/Drivers/MDS/NlpMdsEx1RajaDriver.cpp @@ -13,93 +13,95 @@ #include #include - using namespace hiop; -static bool parse_arguments(int argc, char **argv, +static bool parse_arguments(int argc, + char** argv, bool& self_check, size_type& n_sp, size_type& n_de, bool& one_call_cons, bool& empty_sp_row) { - self_check=false; + self_check = false; empty_sp_row = false; n_sp = 1000; n_de = 1000; one_call_cons = false; switch(argc) { - case 1: - //no arguments - return true; - break; - case 6: // 5 arguments + case 1: + // no arguments + return true; + break; + case 6: // 5 arguments { - if(std::string(argv[5]) == "-selfcheck") - self_check=true; + if(std::string(argv[5]) == "-selfcheck") self_check = true; } - case 5: // 4 arguments + case 5: // 4 arguments { if(std::string(argv[4]) == "-selfcheck") { - self_check=true; + self_check = true; } if(std::string(argv[4]) == "-empty_sp_row") { - empty_sp_row=true; - } + empty_sp_row = true; + } } - case 4: // 3 arguments + case 4: // 3 arguments { - one_call_cons = (bool) atoi(argv[3]); + one_call_cons = (bool)atoi(argv[3]); } - case 3: //2 arguments + case 3: // 2 arguments { n_de = atoi(argv[2]); - if(n_de<0) n_de = 0; + if(n_de < 0) n_de = 0; } - case 2: //1 argument + case 2: // 1 argument { n_sp = atoi(argv[1]); - if(n_sp<0) n_sp = 0; - } - break; - default: - return false; //5 or more arguments + if(n_sp < 0) n_sp = 0; + } break; + default: + return false; // 5 or more arguments } - if(self_check && (n_sp!=400 || n_de!=100) ) - return false; - + if(self_check && (n_sp != 400 || n_de != 100)) return false; + return true; }; static void usage(const char* exeName) { - printf("HiOp driver %s that solves a synthetic problem of variable size in the " - "mixed dense-sparse formulation.\n", exeName); + printf( + "HiOp driver %s that solves a synthetic problem of variable size in the " + "mixed dense-sparse formulation.\n", + exeName); printf("Usage: \n"); printf(" '$ %s sp_vars_size de_vars_size eq_ineq_combined_nlp -empty_sp_row -selfcheck'\n", exeName); printf("Arguments, all integers, excepting string '-selfcheck'\n"); printf(" 'sp_vars_size': # of sparse variables [default 400, optional]\n"); printf(" 'de_vars_size': # of dense variables [default 100, optional]\n"); printf(" '-empty_sp_row': set an empty row in sparser inequality Jacobian. [optional]\n"); - printf(" '-selfcheck': compares the optimal objective with sp_vars_size being 400 and " - "de_vars_size being 100 (these two exact values must be passed as arguments). [optional]\n"); - printf(" 'eq_ineq_combined_nlp': 0 or 1, specifying whether the NLP formulation with split " - "constraints should be used (0) or not (1) [default 0, optional]\n"); + printf( + " '-selfcheck': compares the optimal objective with sp_vars_size being 400 and " + "de_vars_size being 100 (these two exact values must be passed as arguments). [optional]\n"); + printf( + " 'eq_ineq_combined_nlp': 0 or 1, specifying whether the NLP formulation with split " + "constraints should be used (0) or not (1) [default 0, optional]\n"); } - -int main(int argc, char **argv) +int main(int argc, char** argv) { - int rank=0; + int rank = 0; #ifdef HIOP_USE_MPI MPI_Init(&argc, &argv); int comm_size; - int ierr = MPI_Comm_size(MPI_COMM_WORLD, &comm_size); assert(MPI_SUCCESS==ierr); - //int ierr = MPI_Comm_rank(MPI_COMM_WORLD, &rank); assert(MPI_SUCCESS==ierr); + int ierr = MPI_Comm_size(MPI_COMM_WORLD, &comm_size); + assert(MPI_SUCCESS == ierr); + // int ierr = MPI_Comm_rank(MPI_COMM_WORLD, &rank); assert(MPI_SUCCESS==ierr); if(comm_size != 1) { - printf("[error] driver detected more than one rank but the driver should be run " - "in serial only; will exit\n"); + printf( + "[error] driver detected more than one rank but the driver should be run " + "in serial only; will exit\n"); MPI_Finalize(); return 1; } @@ -124,21 +126,17 @@ int main(int argc, char **argv) return 1; } - double obj_value=-1e+20; + double obj_value = -1e+20; hiopSolveStatus status; - //user's NLP -> implementation of hiop::hiopInterfaceMDS + // user's NLP -> implementation of hiop::hiopInterfaceMDS MdsEx1* my_nlp; - if(one_call_cons) - { + if(one_call_cons) { my_nlp = new MdsEx1OneCallCons(n_sp, n_de, mem_space); - } - else - { + } else { my_nlp = new MdsEx1(n_sp, n_de, mem_space); } - hiopNlpMDS nlp(*my_nlp); nlp.options->SetStringValue("duals_update_type", "linear"); @@ -158,16 +156,17 @@ int main(int argc, char **argv) status = solver.run(); obj_value = solver.getObjective(); - + if(selfCheck && has_empty_sp_row) { - if(fabs(obj_value-(-4.9994906229741609e+01))>1e-6) { - printf("selfcheck: objective mismatch for MDS Ex1 problem with 400 sparse variables and 100 " - "dense variables did. BTW, obj=%18.12e was returned by HiOp.\n", obj_value); + if(fabs(obj_value - (-4.9994906229741609e+01)) > 1e-6) { + printf( + "selfcheck: objective mismatch for MDS Ex1 problem with 400 sparse variables and 100 " + "dense variables did. BTW, obj=%18.12e was returned by HiOp.\n", + obj_value); return -1; } - } else if(status<0) { - if(rank==0) - printf("solver returned negative solve status: %d (with objective is %18.12e)\n", status, obj_value); + } else if(status < 0) { + if(rank == 0) printf("solver returned negative solve status: %d (with objective is %18.12e)\n", status, obj_value); return -1; } @@ -231,9 +230,9 @@ int main(int argc, char **argv) } } #endif - + delete my_nlp; - + #ifdef HIOP_USE_MAGMA magma_finalize(); #endif diff --git a/src/Drivers/MDS/NlpMdsEx2.hpp b/src/Drivers/MDS/NlpMdsEx2.hpp index b92d0bfda..056ba42d1 100644 --- a/src/Drivers/MDS/NlpMdsEx2.hpp +++ b/src/Drivers/MDS/NlpMdsEx2.hpp @@ -3,9 +3,9 @@ #include "hiopInterface.hpp" -//this include is not needed in general -//we use hiopMatrixDense in this particular example for convienience -#include "hiopMatrixDense.hpp" +// this include is not needed in general +// we use hiopMatrixDense in this particular example for convienience +#include "hiopMatrixDense.hpp" #include "LinAlgFactory.hpp" #ifdef HIOP_USE_MPI @@ -16,14 +16,14 @@ #endif #include -#include //for memcpy +#include //for memcpy #include #include using size_type = hiop::size_type; using index_type = hiop::index_type; -/** Nonlinear *highly nonconvex* and *rank deficient* problem test for the Filter IPM +/** Nonlinear *highly nonconvex* and *rank deficient* problem test for the Filter IPM * Newton of HiOp. It uses a mixed Dense-Sparse NLP formulation. The problem is based * on MDS Ex1. * @@ -41,52 +41,52 @@ using index_type = hiop::index_type; * s.t. [-inf] <= [ x_1 + e^T s + x_2 + 2e^T y] <= [ 4 ] (rnkdef-con1.1) * [ -4 ] <= [ x_1 + e^T s + x_3 + 2e^T y] <= [inf] (rnkdef-con1.2) * x+s + Md y = 0 (rnkdef-con2) - * + * * The vector 'y' is of dimension nd = ns (can be changed on construction) * Dense matrices Qd and Md are such that * Qd = two on the diagonal, one on the first offdiagonals, zero elsewhere * Md = minus one everywhere, matrix ns x nd * e = vector of all ones * - * Coding of the problem in MDS HiOp input: order of variables need to be [x,s,y] + * Coding of the problem in MDS HiOp input: order of variables need to be [x,s,y] * since [x,s] are the so-called sparse variables and y are the dense variables */ class MdsEx2 : public hiop::hiopInterfaceMDS { public: MdsEx2(int ns) - : MdsEx2(ns, ns, true, true, true) - { - } - + : MdsEx2(ns, ns, true, true, true) + {} + MdsEx2(int ns, int nd, bool convex_obj, bool rankdefic_Jac_eq, bool rankdefic_Jac_ineq) - : ns_(ns), - rankdefic_eq_(rankdefic_Jac_eq), - rankdefic_ineq_(rankdefic_Jac_ineq), - convex_obj_(convex_obj) + : ns_(ns), + rankdefic_eq_(rankdefic_Jac_eq), + rankdefic_ineq_(rankdefic_Jac_ineq), + convex_obj_(convex_obj) { - if(ns_<0) { + if(ns_ < 0) { ns_ = 0; } else { - if(4*(ns_/4) != ns_) { - ns_ = 4*((4+ns_)/4); - printf("[warning] number (%d) of sparse vars is not a multiple of n; was altered to %d\n", - ns, ns_); + if(4 * (ns_ / 4) != ns_) { + ns_ = 4 * ((4 + ns_) / 4); + printf("[warning] number (%d) of sparse vars is not a multiple of n; was altered to %d\n", ns, ns_); } } - if(nd<0) nd_=0; - else nd_ = nd; + if(nd < 0) + nd_ = 0; + else + nd_ = nd; - Q_ = hiop::LinearAlgebraFactory::create_matrix_dense("DEFAULT", nd_,nd_); + Q_ = hiop::LinearAlgebraFactory::create_matrix_dense("DEFAULT", nd_, nd_); Q_->setToConstant(0.); - Q_->addDiagonal(2. * (2*convex_obj_-1)); //-2 or 2 + Q_->addDiagonal(2. * (2 * convex_obj_ - 1)); //-2 or 2 double* Qa = Q_->local_data(); - for(int i=1; i=4 && "number of variables should be greater than 4 for this example"); - assert(n == 2*ns_ + nd_); - - //x - for(int i=0; i=4 && "number of variables should be greater than 4 for this example"); + assert(n == 2 * ns_ + nd_); + + // x + for(int i = 0; i < ns_; ++i) xlow[i] = -10.; + // s + for(int i = ns_; i < 2 * ns_; ++i) xlow[i] = 0.; + // y + for(int i = 2 * ns_; i < n; ++i) xlow[i] = -4.; + + // x + for(int i = 0; i < ns_; ++i) xupp[i] = 3.; + // s + for(int i = ns_; i < 2 * ns_; ++i) xupp[i] = +1e+20; + // y + xupp[2 * ns_] = 4.; + for(int i = 2 * ns_ + 1; i < n; ++i) xupp[i] = 4.; + + for(int i = 0; i < n; ++i) type[i] = hiopNonlinear; return true; } bool get_cons_info(const size_type& m, double* clow, double* cupp, NonlinearityType* type) { - assert(m == ns_ + 3 + 2*rankdefic_ineq_ + ns_*rankdefic_eq_); + assert(m == ns_ + 3 + 2 * rankdefic_ineq_ + ns_ * rankdefic_eq_); int i; - //x+s - Md y = 0, i=1,...,ns - for(i=0; i=ns_ + 3 + 2*rankdefic_ineq_); + for(; i < m;) { + assert(i >= ns_ + 3 + 2 * rankdefic_ineq_); clow[i] = 0.; cupp[i++] = 0.; } } - assert(i==m); + assert(i == m); - for(i=0; i=4); - assert(Q_->n()==nd_); assert(Q_->m()==nd_); - obj_value=0.;//x[0]*(x[0]-1.); - //sum 0.5 {x_i*(x_{i}-1) : i=1,...,ns} + 0.5 y'*Qd*y + 0.5 s^T s - for(int i=0; i=4); + assert(Q_->n() == nd_); + assert(Q_->m() == nd_); + obj_value = 0.; // x[0]*(x[0]-1.); + // sum 0.5 {x_i*(x_{i}-1) : i=1,...,ns} + 0.5 y'*Qd*y + 0.5 s^T s + for(int i = 0; i < ns_; i++) obj_value += x[i] * (x[i] - 1.); obj_value *= 0.5; - obj_value *= (2*convex_obj_-1); //switch sign if non-convex problem is desired + obj_value *= (2 * convex_obj_ - 1); // switch sign if non-convex problem is desired - double term2=0.; - const double* y = x+2*ns_; + double term2 = 0.; + const double* y = x + 2 * ns_; Q_->timesVec(0.0, _buf_y_, 1., y); - for(int i=0; itimesVec(1.0, cons, 1.0, y); //[ -2 ] <= [ x_1 + e^T s] [e^T] y <= [ 2 ] cons[con_idx] = x[0]; - for(int i=0; itimesVec(1.0, cons+(m-ns_), 1.0, y); + Md_->timesVec(1.0, cons + (m - ns_), 1.0, y); } assert(m == con_idx); - + return true; } - - //sum 0.5 {x_i*(x_{i}-1) : i=1,...,ns} + 0.5 y'*Qd*y + 0.5 s^T s + + // sum 0.5 {x_i*(x_{i}-1) : i=1,...,ns} + 0.5 y'*Qd*y + 0.5 s^T s bool eval_grad_f(const size_type& n, const double* x, bool new_x, double* gradf) { //! assert(ns>=4); assert(Q->n()==ns/4); assert(Q->m()==ns/4); - //x_i - 0.5 - for(int i=0; itimesVec(0.0, gradf_y, 1., y); - //s - const double* s=x+ns_; - double* gradf_s = gradf+ns_; - for(int i=0; i0) { - //w.r.t x_1 + assert(nnzit == 2 * ns_); + + // sparse Jacobian ineq w.r.t x and s + if(ns_ > 0) { + // w.r.t x_1 iJacS[nnzit] = con_idx; jJacS[nnzit++] = 0; - //w.r.t s - for(int i=0; i0) - assert(nnzit == 2*ns_ + 3*(ns_>0) + ns_); + } // end of if(ns>0) + assert(nnzit == 2 * ns_ + 3 * (ns_ > 0) + ns_); - if(rankdefic_ineq_ && ns_>0) { + if(rankdefic_ineq_ && ns_ > 0) { // [-inf] <= [ x_1 + e^T s + x_2 + 2e^T y] <= [ 4 ] iJacS[nnzit] = con_idx; - jJacS[nnzit++] = 0; //x1 - + jJacS[nnzit++] = 0; // x1 + iJacS[nnzit] = con_idx; - jJacS[nnzit++] = 1; //x2 - - for(int i=0; i0) + ns_ + rankdefic_ineq_*2*(2+ns_)*(ns_>0)); + assert(nnzit == 2 * ns_ + 3 * (ns_ > 0) + ns_ + rankdefic_ineq_ * 2 * (2 + ns_) * (ns_ > 0)); if(rankdefic_eq_) { // x+s - Md y = 0, i=1,...,ns - for(int i=0; i0) { - //sparse Jacobian INEQ w.r.t x and s - - //w.r.t x_1 - MJacS[nnzit++] = 1.; - //w.r.t s - for(int i=0; i0) + ns_); - assert(con_idx == ns_ + 3*(ns_>0)); - - if(rankdefic_ineq_ && ns_>0) { - // [-inf] <= [ x_1 + e^T s + x_2 + 2e^T y] <= [ 4 ] - MJacS[nnzit++] = 1.; //x1 - MJacS[nnzit++] = 1.; //x2 - for(int i=0; i0) + ns_ + rankdefic_ineq_*2*(2+ns_)*(ns_>0)); - - // x+s - Md y = 0, i=1,...,ns - if(rankdefic_eq_) { - for(int i=0; i 0) { + // sparse Jacobian INEQ w.r.t x and s + + // w.r.t x_1 + MJacS[nnzit++] = 1.; + // w.r.t s + for(int i = 0; i < ns_; i++) { + MJacS[nnzit++] = 1.; + } + con_idx++; + + // w.r.t x_2 + MJacS[nnzit++] = 1.; + con_idx++; + + // w.r.t. x_3 + MJacS[nnzit++] = 1.; + con_idx++; + } + assert(nnzit == 2 * ns_ + 3 * (ns_ > 0) + ns_); + assert(con_idx == ns_ + 3 * (ns_ > 0)); + + if(rankdefic_ineq_ && ns_ > 0) { + // [-inf] <= [ x_1 + e^T s + x_2 + 2e^T y] <= [ 4 ] + MJacS[nnzit++] = 1.; // x1 + MJacS[nnzit++] = 1.; // x2 + for(int i = 0; i < ns_; i++) { + MJacS[nnzit++] = 1.; // s + } + con_idx++; + + // [ -4 ] <= [ x_1 + e^T s + x_3 + 2e^T y] <= [inf] + MJacS[nnzit++] = 1.; // x1 + MJacS[nnzit++] = 1.; // x3 + for(int i = 0; i < ns_; i++) { + MJacS[nnzit++] = 1.; // s + } + con_idx++; + } + assert(nnzit == 2 * ns_ + 3 * (ns_ > 0) + ns_ + rankdefic_ineq_ * 2 * (2 + ns_) * (ns_ > 0)); + + // x+s - Md y = 0, i=1,...,ns + if(rankdefic_eq_) { + for(int i = 0; i < ns_; i++) { + MJacS[nnzit++] = 1.; // x + MJacS[nnzit++] = 1.; // s + con_idx++; + } + } + assert(nnzit == nnzJacS); } - // - //dense Jacobian w.r.t y + // dense Jacobian w.r.t y // - if(JacD!=NULL) { - //eq - memcpy(JacD, Md_->local_data(), ns_*nd_*sizeof(double)); - - //ineq - for(int i=0; i<3*nd_; i++) { - //!JacD[ns_][i] = 1.; - JacD[ns_*nd_+i] = 1.; + if(JacD != NULL) { + // eq + memcpy(JacD, Md_->local_data(), ns_ * nd_ * sizeof(double)); + + // ineq + for(int i = 0; i < 3 * nd_; i++) { + //! JacD[ns_][i] = 1.; + JacD[ns_ * nd_ + i] = 1.; } - int con_idx=ns_+3; + int con_idx = ns_ + 3; if(rankdefic_ineq_) { - for(int i=0; i<2*nd_; i++) { - //!JacD[con_idx][i] = 2.; - JacD[con_idx*nd_+i] = 2.; + for(int i = 0; i < 2 * nd_; i++) { + //! JacD[con_idx][i] = 2.; + JacD[con_idx * nd_ + i] = 2.; } con_idx += 2; } - + if(rankdefic_eq_) { - memcpy(JacD+con_idx*nd_, Md_->local_data(), ns_*nd_*sizeof(double)); + memcpy(JacD + con_idx * nd_, Md_->local_data(), ns_ * nd_ * sizeof(double)); con_idx += ns_; } assert(con_idx == m); } - + return true; } - + bool eval_Hess_Lagr(const size_type& n, - const size_type& m, + const size_type& m, const double* x, bool new_x, const double& obj_factor, const double* lambda, bool new_lambda, const size_type& nsparse, - const size_type& ndense, + const size_type& ndense, const size_type& nnzHSS, index_type* iHSS, index_type* jHSS, - double* MHSS, + double* MHSS, double* HDD, size_type& nnzHSD, index_type* iHSD, index_type* jHSD, double* MHSD) { - //Note: lambda is not used since all the constraints are linear and, therefore, do - //not contribute to the Hessian of the Lagrangian + // Note: lambda is not used since all the constraints are linear and, therefore, do + // not contribute to the Hessian of the Lagrangian - assert(nnzHSS == 2*ns_); - assert(nnzHSD==0); - assert(iHSD==NULL); assert(jHSD==NULL); assert(MHSD==NULL); + assert(nnzHSS == 2 * ns_); + assert(nnzHSD == 0); + assert(iHSD == NULL); + assert(jHSD == NULL); + assert(MHSD == NULL); - if(iHSS!=NULL && jHSS!=NULL) { - for(int i=0; i<2*ns_; i++) iHSS[i] = jHSS[i] = i; + if(iHSS != NULL && jHSS != NULL) { + for(int i = 0; i < 2 * ns_; i++) iHSS[i] = jHSS[i] = i; } - if(MHSS!=NULL) { - for(int i=0; ilocal_buffer(), nx_dense_squared*sizeof(double)); + if(HDD != NULL) { + const int nx_dense_squared = nd_ * nd_; + // memcpy(HDD[0], Q->local_buffer(), nx_dense_squared*sizeof(double)); const double* Qv = Q_->local_data(); - for(int i=0; iSetStringValue("duals_update_type", "linear"); -// nlp.options->SetStringValue("duals_init", "zero"); - + // nlp.options->SetStringValue("duals_init", "zero"); + nlp.options->SetStringValue("Hessian", "analytical_exact"); - //nlp.options->SetStringValue("KKTLinsys", "xdycyd"); + // nlp.options->SetStringValue("KKTLinsys", "xdycyd"); nlp.options->SetStringValue("compute_mode", "hybrid"); - + nlp.options->SetIntegerValue("verbosity_level", 3); nlp.options->SetNumericValue("mu0", 1e-1); hiopAlgFilterIPMNewton solver(&nlp); @@ -151,115 +149,107 @@ int main(int argc, char **argv) obj_value1 = solver.getObjective(); delete nlp_interface; - - if(status1<0) { - if(rank==0) - printf("solve1 trouble: returned %d (with objective is %18.12e)\n", - status1, obj_value1); + + if(status1 < 0) { + if(rank == 0) printf("solve1 trouble: returned %d (with objective is %18.12e)\n", status1, obj_value1); return -1; } - } //end of test 1 + } // end of test 1 - //test 2 + // test 2 if(rdJac) { bool convex_obj = true; bool rankdefic_Jac_eq = false; bool rankdefic_Jac_ineq = true; - + hiopInterfaceMDS* nlp_interface = new MdsEx2(n_sp, n_de, convex_obj, rankdefic_Jac_eq, rankdefic_Jac_ineq); - + hiopNlpMDS nlp(*nlp_interface); - + nlp.options->SetStringValue("duals_update_type", "linear"); -// nlp.options->SetStringValue("duals_init", "zero"); - + // nlp.options->SetStringValue("duals_init", "zero"); + nlp.options->SetStringValue("Hessian", "analytical_exact"); - //nlp.options->SetStringValue("KKTLinsys", "xdycyd"); + // nlp.options->SetStringValue("KKTLinsys", "xdycyd"); nlp.options->SetStringValue("compute_mode", "hybrid"); - + nlp.options->SetIntegerValue("verbosity_level", 3); nlp.options->SetNumericValue("mu0", 1e-1); hiopAlgFilterIPMNewton solver(&nlp); status2 = solver.run(); obj_value2 = solver.getObjective(); - + delete nlp_interface; - - if(status2<0) { - if(rank==0) - printf("solve2 trouble: returned %d (with objective is %18.12e)\n", - status2, obj_value2); + + if(status2 < 0) { + if(rank == 0) printf("solve2 trouble: returned %d (with objective is %18.12e)\n", status2, obj_value2); return -1; } - } //end of test 2 + } // end of test 2 - //test 3 + // test 3 { bool convex_obj = false; bool rankdefic_Jac_eq = false; bool rankdefic_Jac_ineq = false; - + hiopInterfaceMDS* nlp_interface = new MdsEx2(n_sp, n_de, convex_obj, rankdefic_Jac_eq, rankdefic_Jac_ineq); - + hiopNlpMDS nlp(*nlp_interface); - + nlp.options->SetStringValue("duals_update_type", "linear"); -// nlp.options->SetStringValue("duals_init", "zero"); - + // nlp.options->SetStringValue("duals_init", "zero"); + nlp.options->SetStringValue("Hessian", "analytical_exact"); - //nlp.options->SetStringValue("KKTLinsys", "xdycyd"); + // nlp.options->SetStringValue("KKTLinsys", "xdycyd"); nlp.options->SetStringValue("compute_mode", "hybrid"); - + nlp.options->SetIntegerValue("verbosity_level", 3); nlp.options->SetNumericValue("mu0", 1e-1); hiopAlgFilterIPMNewton solver(&nlp); status3 = solver.run(); obj_value3 = solver.getObjective(); - + delete nlp_interface; - - if(status3<0) { - if(rank==0) - printf("solve3 trouble: returned %d (with objective is %18.12e)\n", - status3, obj_value3); + + if(status3 < 0) { + if(rank == 0) printf("solve3 trouble: returned %d (with objective is %18.12e)\n", status3, obj_value3); return -1; } - } //end of test 3 + } // end of test 3 - //test 4 + // test 4 if(rdJac) { bool convex_obj = false; bool rankdefic_Jac_eq = true; bool rankdefic_Jac_ineq = true; - + hiopInterfaceMDS* nlp_interface = new MdsEx2(n_sp, n_de, convex_obj, rankdefic_Jac_eq, rankdefic_Jac_ineq); - + hiopNlpMDS nlp(*nlp_interface); - + nlp.options->SetStringValue("duals_update_type", "linear"); -// nlp.options->SetStringValue("duals_init", "zero"); - + // nlp.options->SetStringValue("duals_init", "zero"); + nlp.options->SetStringValue("Hessian", "analytical_exact"); - //nlp.options->SetStringValue("KKTLinsys", "xdycyd"); + // nlp.options->SetStringValue("KKTLinsys", "xdycyd"); nlp.options->SetStringValue("compute_mode", "hybrid"); - + nlp.options->SetIntegerValue("verbosity_level", 3); nlp.options->SetNumericValue("mu0", 1e-1); hiopAlgFilterIPMNewton solver(&nlp); status4 = solver.run(); obj_value4 = solver.getObjective(); - + delete nlp_interface; - - if(status4<0) { - if(rank==0) - printf("solve4 trouble: returned %d (with objective is %18.12e)\n", - status4, obj_value4); + + if(status4 < 0) { + if(rank == 0) printf("solve4 trouble: returned %d (with objective is %18.12e)\n", status4, obj_value4); return -1; } - } //end of test 4 + } // end of test 4 - bool selfcheck_ok=true; + bool selfcheck_ok = true; // this is used for testing when the driver is in '-selfcheck' mode if(selfCheck) { // if(rdJac && fabs(obj_value1-(-3.160999998751e+03))>1e-6) { @@ -272,9 +262,11 @@ int main(int argc, char **argv) // "dense variables did. BTW, obj=%18.12e was returned by HiOp.\n", obj_value2); // selfcheck_ok = false; // } - if((fabs(obj_value3-(-3.160999998751e+03))/3.160999998751e+03)>1e-6) { - printf("selfcheck3: objective mismatch for MDS Ex2 problem with 400 sparse variables and 100 " - "dense variables did. BTW, obj=%18.12e was returned by HiOp.\n", obj_value3); + if((fabs(obj_value3 - (-3.160999998751e+03)) / 3.160999998751e+03) > 1e-6) { + printf( + "selfcheck3: objective mismatch for MDS Ex2 problem with 400 sparse variables and 100 " + "dense variables did. BTW, obj=%18.12e was returned by HiOp.\n", + obj_value3); selfcheck_ok = false; } // if(rdJac && fabs(obj_value4-(-1.35649999989221e+03))>1e-6) { @@ -283,13 +275,12 @@ int main(int argc, char **argv) // selfcheck_ok = false; // } - if(false == selfcheck_ok) - { + if(false == selfcheck_ok) { std::cout << "Selfcheck failed!\n"; return -1; } } else { - if(rank==0) { + if(rank == 0) { if(rdJac) printf("Optimal objective 1: %22.14e. Solver status: %d\n", obj_value1, status1); if(rdJac) printf("Optimal objective 2: %22.14e. Solver status: %d\n", obj_value2, status2); printf("Optimal objective 3: %22.14e. Solver status: %d\n", obj_value3, status3); diff --git a/src/Drivers/MDS/NlpMdsRajaEx1.cpp b/src/Drivers/MDS/NlpMdsRajaEx1.cpp index e56401d91..a34535d41 100644 --- a/src/Drivers/MDS/NlpMdsRajaEx1.cpp +++ b/src/Drivers/MDS/NlpMdsRajaEx1.cpp @@ -65,13 +65,12 @@ #include #include -//TODO: A good idea to not use the internal HiOp Raja policies here and, instead, give self-containing -// definitions of the policies here so that the user gets a better grasp of the concept and does not -// rely on the internals of HiOp. For example: -// #define RAJA_LAMBDA [=] __device__ -// using ex1_raja_exec = RAJA::cuda_exec<128>; -// more defs here - +// TODO: A good idea to not use the internal HiOp Raja policies here and, instead, give self-containing +// definitions of the policies here so that the user gets a better grasp of the concept and does not +// rely on the internals of HiOp. For example: +// #define RAJA_LAMBDA [=] __device__ +// using ex1_raja_exec = RAJA::cuda_exec<128>; +// more defs here #if defined(HIOP_USE_CUDA) #include "ExecPoliciesRajaCudaImpl.hpp" @@ -84,7 +83,7 @@ using ex1_raja_exec = hiop::ExecRajaPoliciesBackend::hi using ex1_raja_reduce = hiop::ExecRajaPoliciesBackend::hiop_raja_reduce; using hiopMatrixRajaDense = hiop::hiopMatrixDenseRaja; #else -//#if !defined(HIOP_USE_CUDA) && !defined(HIOP_USE_HIP) +// #if !defined(HIOP_USE_CUDA) && !defined(HIOP_USE_HIP) #include using ex1_raja_exec = hiop::ExecRajaPoliciesBackend::hiop_raja_exec; using ex1_raja_reduce = hiop::ExecRajaPoliciesBackend::hiop_raja_reduce; @@ -94,51 +93,41 @@ using hiopMatrixRajaDense = hiop::hiopMatrixDenseRajawas altered to " << ns_ - << "\n"; + } else { + if(4 * (ns_ / 4) != ns_) { + ns_ = 4 * ((4 + ns_) / 4); + std::cout << "[warning] number " << ns_in << " of sparse vars is not a multiple ->was altered to " << ns_ << "\n"; } } - if(nd_in<0) - nd_=0; + if(nd_in < 0) + nd_ = 0; else nd_ = nd_in; // Allocate data buffer and matrices Q_ and Md_ - if(mem_space_ == "DEFAULT") - { - Q_ = new hiop::hiopMatrixDenseRowMajor(nd_, nd_); + if(mem_space_ == "DEFAULT") { + Q_ = new hiop::hiopMatrixDenseRowMajor(nd_, nd_); Md_ = new hiop::hiopMatrixDenseRowMajor(ns_, nd_); buf_y_ = new double[nd_]; - } - else - { - Q_ = new hiopMatrixRajaDense(nd_, nd_, mem_space_); + } else { + Q_ = new hiopMatrixRajaDense(nd_, nd_, mem_space_); Md_ = new hiopMatrixRajaDense(ns_, nd_, mem_space_); buf_y_ = static_cast(allocator.allocate(nd_ * sizeof(double))); } @@ -163,7 +152,7 @@ MdsEx1::~MdsEx1() allocator.deallocate(sol_x_); if(mem_space_ == "DEFAULT") - delete [] buf_y_; + delete[] buf_y_; else allocator.deallocate(buf_y_); @@ -178,21 +167,21 @@ void MdsEx1::initialize() double* data = Q_->local_data(); RAJA::View> Qview(data, nd_, nd_); - RAJA::forall(RAJA::RangeSegment(1, nd_-1), - RAJA_LAMBDA(RAJA::Index_type i) - { - Qview(i, i+1) = 1.0; - Qview(i+1, i) = 1.0; - }); + RAJA::forall( + RAJA::RangeSegment(1, nd_ - 1), + RAJA_LAMBDA(RAJA::Index_type i) { + Qview(i, i + 1) = 1.0; + Qview(i + 1, i) = 1.0; + }); Md_->setToConstant(-1.0); } bool MdsEx1::get_prob_sizes(size_type& n, size_type& m) -{ - n = 2*ns_ + nd_; - m = ns_ + 3*( haveIneq_ ? 1 : 0 ); - return true; +{ + n = 2 * ns_ + nd_; + m = ns_ + 3 * (haveIneq_ ? 1 : 0); + return true; } /** @@ -200,90 +189,64 @@ bool MdsEx1::get_prob_sizes(size_type& n, size_type& m) * @todo register pointers with umpire in case they need to be copied * from device to host. */ -bool MdsEx1::get_vars_info(const size_type& n, double *xlow, double* xupp, NonlinearityType* type) +bool MdsEx1::get_vars_info(const size_type& n, double* xlow, double* xupp, NonlinearityType* type) { - //assert(n>=4 && "number of variables should be greater than 4 for this example"); - assert(n == 2*ns_+nd_); + // assert(n>=4 && "number of variables should be greater than 4 for this example"); + assert(n == 2 * ns_ + nd_); int ns = ns_; - //x - RAJA::forall(RAJA::RangeSegment(0, ns_), - RAJA_LAMBDA(RAJA::Index_type i) - { - xlow[i] = -1e+20; - }); - - //s - RAJA::forall(RAJA::RangeSegment(ns_, 2*ns_), - RAJA_LAMBDA(RAJA::Index_type i) - { - xlow[i] = 0.; - }); - - //y - // xlow[2*ns_] = -4.; - // for(int i=2*ns_+1; i(RAJA::RangeSegment(2*ns_, n), - RAJA_LAMBDA(RAJA::Index_type i) - { - /// @todo move this assignment outside the kernel somehow - /// limiting factor is that this will eventually have to run on - /// a GPU device, and cannot be assigned to directly. This is a - /// workaround for now. - if (i == 2*ns) - { - xlow[i] = -4.; - } - else - { - xlow[i] = -1e+20; - } - }); - - //x - RAJA::forall(RAJA::RangeSegment(0, ns_), - RAJA_LAMBDA(RAJA::Index_type i) - { - xupp[i] = 3.; - }); - - //s - RAJA::forall(RAJA::RangeSegment(ns_, 2*ns_), - RAJA_LAMBDA(RAJA::Index_type i) - { - xupp[i] = +1e20; - }); - - //y - // xupp[2*ns_] = 4.; - // for(int i=2*ns_+1; i(RAJA::RangeSegment(2*ns_, n), - RAJA_LAMBDA(RAJA::Index_type i) - { - /// @todo Same situation as above case. Figure out how to - /// remove conditional. - if (i == 2*ns) - { - xupp[i] = 4.; - } - else - { - xupp[i] = 1e+20; - } - }); + // x + RAJA::forall(RAJA::RangeSegment(0, ns_), RAJA_LAMBDA(RAJA::Index_type i) { xlow[i] = -1e+20; }); + + // s + RAJA::forall(RAJA::RangeSegment(ns_, 2 * ns_), RAJA_LAMBDA(RAJA::Index_type i) { xlow[i] = 0.; }); + + // y + // xlow[2*ns_] = -4.; + // for(int i=2*ns_+1; i( + RAJA::RangeSegment(2 * ns_, n), + RAJA_LAMBDA(RAJA::Index_type i) { + /// @todo move this assignment outside the kernel somehow + /// limiting factor is that this will eventually have to run on + /// a GPU device, and cannot be assigned to directly. This is a + /// workaround for now. + if(i == 2 * ns) { + xlow[i] = -4.; + } else { + xlow[i] = -1e+20; + } + }); + + // x + RAJA::forall(RAJA::RangeSegment(0, ns_), RAJA_LAMBDA(RAJA::Index_type i) { xupp[i] = 3.; }); + + // s + RAJA::forall(RAJA::RangeSegment(ns_, 2 * ns_), RAJA_LAMBDA(RAJA::Index_type i) { xupp[i] = +1e20; }); + + // y + // xupp[2*ns_] = 4.; + // for(int i=2*ns_+1; i( + RAJA::RangeSegment(2 * ns_, n), + RAJA_LAMBDA(RAJA::Index_type i) { + /// @todo Same situation as above case. Figure out how to + /// remove conditional. + if(i == 2 * ns) { + xupp[i] = 4.; + } else { + xupp[i] = 1e+20; + } + }); // Use a sequential policy for host computations for now - RAJA::forall(RAJA::RangeSegment(0, n), - [=] (RAJA::Index_type i) - { - type[i] = hiopNonlinear; - }); + RAJA::forall(RAJA::RangeSegment(0, n), [=](RAJA::Index_type i) { type[i] = hiopNonlinear; }); return true; } /** * @todo fill out param descriptions below to determine whether or not - * they will reside on device and will have to be accessed/assigned to + * they will reside on device and will have to be accessed/assigned to * in a RAJA kernel * * @param[out] m - number of constraints @@ -293,30 +256,28 @@ bool MdsEx1::get_vars_info(const size_type& n, double *xlow, double* xupp, Nonli */ bool MdsEx1::get_cons_info(const size_type& m, double* clow, double* cupp, NonlinearityType* type) { - assert(m == ns_ + 3*haveIneq_); - bool haveIneq = haveIneq_; ///< Cannot capture member variable in RAJA lambda - //x+s - Md_ y = 0, i=1,...,ns_ - RAJA::forall(RAJA::RangeSegment(0, ns_), - RAJA_LAMBDA(RAJA::Index_type i) - { - clow[i] = cupp[i] = 0.; - if(i == 0 && haveIneq) - { - // [-2 ] [ x_1 + e^T s] [e^T] [ 2 ] - clow[m-3] = -2; cupp[m-3] = 2.; - // [-inf] <= [ x_2 ] + [e^T] y <= [ 2 ] - clow[m-2] = -1e+20; cupp[m-2] = 2.; - // [-2 ] [ x_3 ] [e^T] [inf] - clow[m-1] = -2; cupp[m-1] = 1e+20; - } - }); + assert(m == ns_ + 3 * haveIneq_); + bool haveIneq = haveIneq_; ///< Cannot capture member variable in RAJA lambda + // x+s - Md_ y = 0, i=1,...,ns_ + RAJA::forall( + RAJA::RangeSegment(0, ns_), + RAJA_LAMBDA(RAJA::Index_type i) { + clow[i] = cupp[i] = 0.; + if(i == 0 && haveIneq) { + // [-2 ] [ x_1 + e^T s] [e^T] [ 2 ] + clow[m - 3] = -2; + cupp[m - 3] = 2.; + // [-inf] <= [ x_2 ] + [e^T] y <= [ 2 ] + clow[m - 2] = -1e+20; + cupp[m - 2] = 2.; + // [-2 ] [ x_3 ] [e^T] [inf] + clow[m - 1] = -2; + cupp[m - 1] = 1e+20; + } + }); // Must be a sequential host policy for now - RAJA::forall(RAJA::RangeSegment(0, m), - [=] (RAJA::Index_type i) - { - type[i] = hiopNonlinear; - }); + RAJA::forall(RAJA::RangeSegment(0, m), [=](RAJA::Index_type i) { type[i] = hiopNonlinear; }); return true; } @@ -327,93 +288,82 @@ bool MdsEx1::get_sparse_dense_blocks_info(int& nx_sparse, int& nnz_sparse_Hess_Lagr_SS, int& nnz_sparse_Hess_Lagr_SD) { - nx_sparse = 2*ns_; + nx_sparse = 2 * ns_; nx_dense = nd_; - nnz_sparse_Jace = 2*ns_; + nnz_sparse_Jace = 2 * ns_; if(empty_sp_row_) { - nnz_sparse_Jaci = (ns_==0 || !haveIneq_) ? 0 : 2+ns_; + nnz_sparse_Jaci = (ns_ == 0 || !haveIneq_) ? 0 : 2 + ns_; } else { - nnz_sparse_Jaci = (ns_==0 || !haveIneq_) ? 0 : 3+ns_; + nnz_sparse_Jaci = (ns_ == 0 || !haveIneq_) ? 0 : 3 + ns_; } - nnz_sparse_Hess_Lagr_SS = 2*ns_; + nnz_sparse_Hess_Lagr_SS = 2 * ns_; nnz_sparse_Hess_Lagr_SD = 0.; return true; } bool MdsEx1::eval_f(const size_type& n, const double* x, bool new_x, double& obj_value) { - //assert(ns_>=4); - assert(Q_->n()==nd_); assert(Q_->m()==nd_); - obj_value=0.;//x[0]*(x[0]-1.); + // assert(ns_>=4); + assert(Q_->n() == nd_); + assert(Q_->m() == nd_); + obj_value = 0.; // x[0]*(x[0]-1.); { RAJA::ReduceSum aux(0); - RAJA::forall(RAJA::RangeSegment(0, ns_), - RAJA_LAMBDA(RAJA::Index_type i) - { - aux += x[i] * (x[i]-1.); - }); + RAJA::forall(RAJA::RangeSegment(0, ns_), RAJA_LAMBDA(RAJA::Index_type i) { aux += x[i] * (x[i] - 1.); }); obj_value += aux.get(); obj_value *= 0.5; } { - const double* y = x+2*ns_; + const double* y = x + 2 * ns_; Q_->timesVec(0.0, buf_y_, 1., y); double* _buf_y_vec = this->buf_y_; RAJA::ReduceSum aux(0); - RAJA::forall(RAJA::RangeSegment(0, nd_), - RAJA_LAMBDA(RAJA::Index_type i) - { - aux += _buf_y_vec[i] * y[i]; - }); + RAJA::forall( + RAJA::RangeSegment(0, nd_), + RAJA_LAMBDA(RAJA::Index_type i) { aux += _buf_y_vec[i] * y[i]; }); obj_value += 0.5 * aux.get(); } { - const double* s=x+ns_; + const double* s = x + ns_; RAJA::ReduceSum aux(0); - RAJA::forall(RAJA::RangeSegment(0, ns_), - RAJA_LAMBDA(RAJA::Index_type i) - { - aux += s[i]*s[i]; - }); + RAJA::forall(RAJA::RangeSegment(0, ns_), RAJA_LAMBDA(RAJA::Index_type i) { aux += s[i] * s[i]; }); obj_value += 0.5 * aux.get(); } return true; } -bool MdsEx1::eval_cons(const size_type& n, - const size_type& m, - const size_type& num_cons, +bool MdsEx1::eval_cons(const size_type& n, + const size_type& m, + const size_type& num_cons, const index_type* idx_cons_in, - const double* x, - bool new_x, + const double* x, + bool new_x, double* cons) { - const double* s = x+ns_; - const double* y = x+2*ns_; + const double* s = x + ns_; + const double* y = x + 2 * ns_; - assert(num_cons==ns_ || num_cons==3*haveIneq_); + assert(num_cons == ns_ || num_cons == 3 * haveIneq_); - int ns = ns_; ///< Cannot capture member inside RAJA lambda - int nd = nd_; ///< Cannot capture member inside RAJA lambda + int ns = ns_; ///< Cannot capture member inside RAJA lambda + int nd = nd_; ///< Cannot capture member inside RAJA lambda bool empty_sp_row = empty_sp_row_; - + // equality constraints if(num_cons == ns_ && ns_ > 0) { RAJA::forall( - RAJA::RangeSegment(0, num_cons), - RAJA_LAMBDA(RAJA::Index_type irow) - { - const int con_idx = (int) idx_cons_in[irow]; - if(con_idx < ns) - { - //equalities: x+s - Md_ y = 0 - cons[con_idx] = x[con_idx] + s[con_idx]; - } - }); + RAJA::RangeSegment(0, num_cons), + RAJA_LAMBDA(RAJA::Index_type irow) { + const int con_idx = (int)idx_cons_in[irow]; + if(con_idx < ns) { + // equalities: x+s - Md_ y = 0 + cons[con_idx] = x[con_idx] + s[con_idx]; + } + }); Md_->timesVec(1.0, cons, 1.0, y); } @@ -422,288 +372,241 @@ bool MdsEx1::eval_cons(const size_type& n, // inequality constraints if(num_cons == 3 && haveIneq_) { RAJA::forall( - RAJA::RangeSegment(0, num_cons), - RAJA_LAMBDA(RAJA::Index_type irow) - { - assert(ns>=0); - const int con_idx = (int) idx_cons_in[irow]; - assert(con_idx>=0); - assert(con_idx < ns+3); - const int conineq_idx = con_idx - ns; - - if(conineq_idx==0) - { - cons[conineq_idx] = x[0]; - for(int i=0; i= 0); + const int con_idx = (int)idx_cons_in[irow]; + assert(con_idx >= 0); + assert(con_idx < ns + 3); + const int conineq_idx = con_idx - ns; + + if(conineq_idx == 0) { + cons[conineq_idx] = x[0]; + for(int i = 0; i < ns; i++) cons[conineq_idx] += s[i]; + for(int i = 0; i < nd; i++) cons[conineq_idx] += y[i]; + } else if(conineq_idx == 1) { + if(empty_sp_row) { + cons[conineq_idx] = 0.0; + } else { + cons[conineq_idx] = x[1]; + } + for(int i = 0; i < nd; i++) cons[conineq_idx] += y[i]; + } else if(conineq_idx == 2) { + cons[conineq_idx] = x[2]; + for(int i = 0; i < nd; i++) { + cons[conineq_idx] += y[i]; + } } else { - cons[conineq_idx] = x[1]; - } - for(int i=0; i= 0); + assert(false); } - } - else - { - assert(conineq_idx>=0); - assert(false); - } - }); + }); } return true; } -//sum 0.5 {x_i*(x_{i}-1) : i=1,...,ns_} + 0.5 y'*Qd*y + 0.5 s^T s +// sum 0.5 {x_i*(x_{i}-1) : i=1,...,ns_} + 0.5 y'*Qd*y + 0.5 s^T s bool MdsEx1::eval_grad_f(const size_type& n, const double* x, bool new_x, double* gradf) { //! assert(ns_>=4); assert(Q_->n()==ns_/4); assert(Q_->m()==ns_/4); - //x_i - 0.5 - RAJA::forall( - RAJA::RangeSegment(0, ns_), - RAJA_LAMBDA(RAJA::Index_type i) - { - gradf[i] = x[i] - 0.5; - }); - - //Qd*y - const double* y = x+2*ns_; - double* gradf_y = gradf+2*ns_; + // x_i - 0.5 + RAJA::forall(RAJA::RangeSegment(0, ns_), RAJA_LAMBDA(RAJA::Index_type i) { gradf[i] = x[i] - 0.5; }); + + // Qd*y + const double* y = x + 2 * ns_; + double* gradf_y = gradf + 2 * ns_; Q_->timesVec(0.0, gradf_y, 1., y); - //s - const double* s=x+ns_; - double* gradf_s = gradf+ns_; - RAJA::forall( - RAJA::RangeSegment(0, ns_), - RAJA_LAMBDA(RAJA::Index_type i) - { - gradf_s[i] = s[i]; - }); + // s + const double* s = x + ns_; + double* gradf_s = gradf + ns_; + RAJA::forall(RAJA::RangeSegment(0, ns_), RAJA_LAMBDA(RAJA::Index_type i) { gradf_s[i] = s[i]; }); return true; } /** * @brief Evaluate Jacobian for equality and inequality constraints - * + * * @param[in] n - number of variables * @param[in] m - number of equality and inequality constraints * @param[in] num_cons - number of constraints to update * @param[in] idx_cons - indices of constraints to update * @param[in] x - solution vector (optimization variables) - * @param[in] new_x - if variable is updated (?) - * @param[in] nsparse - number of sparse variables - * @param[in] ndense - number of dense variables + * @param[in] new_x - if variable is updated (?) + * @param[in] nsparse - number of sparse variables + * @param[in] ndense - number of dense variables * @param[in] nnzJacS - number of nonzeros in sparse Jacobian block * @param[out] - sparse matrix row indices * @param[out] - sparse matrix column indices * @param[out] - sparse matrix values * @param[out] - array to dense matrix row pointers - * + * * This method runs on GPU. - * + * */ bool MdsEx1::eval_Jac_cons(const size_type& n, - const size_type& m, - const size_type& num_cons, + const size_type& m, + const size_type& num_cons, const index_type* idx_cons, - const double* x, + const double* x, bool new_x, - const size_type& nsparse, - const size_type& ndense, - const int& nnzJacS, - int* iJacS, - int* jJacS, - double* MJacS, + const size_type& nsparse, + const size_type& ndense, + const int& nnzJacS, + int* iJacS, + int* jJacS, + double* MJacS, double* JacD) { - assert(num_cons==ns_ || num_cons==3*haveIneq_); + assert(num_cons == ns_ || num_cons == 3 * haveIneq_); - int ns = ns_; ///< Cannot capture member inside RAJA lambda - - if(iJacS!=NULL && jJacS!=NULL) - { + int ns = ns_; ///< Cannot capture member inside RAJA lambda + + if(iJacS != NULL && jJacS != NULL) { // Compute equality constraints Jacobian - if(num_cons==ns_ && ns_>0) - { - assert(2*ns_==nnzJacS); + if(num_cons == ns_ && ns_ > 0) { + assert(2 * ns_ == nnzJacS); RAJA::forall( - RAJA::RangeSegment(0, num_cons), - RAJA_LAMBDA(RAJA::Index_type itrow) - { - const int con_idx = (int) idx_cons[itrow]; - //sparse Jacobian eq w.r.t. x and s - //x - iJacS[2*itrow] = con_idx; - jJacS[2*itrow] = con_idx; - if(MJacS != nullptr) - MJacS[2*itrow] = 1.0; - - //s - iJacS[2*itrow+1] = con_idx; - jJacS[2*itrow+1] = con_idx + ns; - if(MJacS != nullptr) { - MJacS[2*itrow+1] = 1.0; - } - }); + RAJA::RangeSegment(0, num_cons), + RAJA_LAMBDA(RAJA::Index_type itrow) { + const int con_idx = (int)idx_cons[itrow]; + // sparse Jacobian eq w.r.t. x and s + // x + iJacS[2 * itrow] = con_idx; + jJacS[2 * itrow] = con_idx; + if(MJacS != nullptr) MJacS[2 * itrow] = 1.0; + + // s + iJacS[2 * itrow + 1] = con_idx; + jJacS[2 * itrow + 1] = con_idx + ns; + if(MJacS != nullptr) { + MJacS[2 * itrow + 1] = 1.0; + } + }); } // Compute inequality constraints Jacobian - if(num_cons==3 && haveIneq_ && ns_>0) { + if(num_cons == 3 && haveIneq_ && ns_ > 0) { if(!empty_sp_row_) { - assert(ns_+3==nnzJacS); + assert(ns_ + 3 == nnzJacS); // Loop over all matrix nonzeros RAJA::forall( - RAJA::RangeSegment(0, ns_+3), - RAJA_LAMBDA(RAJA::Index_type tid) - { - if(tid==0) { - iJacS[tid] = 0; - jJacS[tid] = 0; - if(MJacS != nullptr) - MJacS[tid] = 1.0; - assert(idx_cons[0] == ns); - } else if(tid > ns) { - iJacS[tid] = tid - ns; - jJacS[tid] = tid - ns; - if(MJacS != nullptr) - MJacS[tid] = 1.0; - assert(idx_cons[1] == ns + 1 && idx_cons[2] == ns + 2); - } else { - iJacS[tid] = 0; - jJacS[tid] = ns + tid - 1; - if(MJacS != nullptr) - MJacS[tid] = 1.0; - } - } - ); - } else { //empty_sp_row_ == true - assert(ns_+2==nnzJacS); + RAJA::RangeSegment(0, ns_ + 3), + RAJA_LAMBDA(RAJA::Index_type tid) { + if(tid == 0) { + iJacS[tid] = 0; + jJacS[tid] = 0; + if(MJacS != nullptr) MJacS[tid] = 1.0; + assert(idx_cons[0] == ns); + } else if(tid > ns) { + iJacS[tid] = tid - ns; + jJacS[tid] = tid - ns; + if(MJacS != nullptr) MJacS[tid] = 1.0; + assert(idx_cons[1] == ns + 1 && idx_cons[2] == ns + 2); + } else { + iJacS[tid] = 0; + jJacS[tid] = ns + tid - 1; + if(MJacS != nullptr) MJacS[tid] = 1.0; + } + }); + } else { // empty_sp_row_ == true + assert(ns_ + 2 == nnzJacS); // Loop over all matrix nonzeros RAJA::forall( - RAJA::RangeSegment(0, ns_+2), - RAJA_LAMBDA(RAJA::Index_type tid) - { - if(tid==0) { - // x_1 - iJacS[tid] = 0; - jJacS[tid] = 0; - if(MJacS != nullptr) { - MJacS[tid] = 1.0; + RAJA::RangeSegment(0, ns_ + 2), + RAJA_LAMBDA(RAJA::Index_type tid) { + if(tid == 0) { + // x_1 + iJacS[tid] = 0; + jJacS[tid] = 0; + if(MJacS != nullptr) { + MJacS[tid] = 1.0; + } + assert(idx_cons[0] == ns); + } else if(tid > ns) { + // x_3 + iJacS[tid] = 2; + jJacS[tid] = 2; + if(MJacS != nullptr) MJacS[tid] = 1.0; + assert(idx_cons[1] == ns + 1 && idx_cons[2] == ns + 2); + } else { + // s + iJacS[tid] = 0; + jJacS[tid] = ns + tid - 1; + if(MJacS != nullptr) MJacS[tid] = 1.0; } - assert(idx_cons[0] == ns); - } else if(tid > ns) { - // x_3 - iJacS[tid] = 2; - jJacS[tid] = 2; - if(MJacS != nullptr) - MJacS[tid] = 1.0; - assert(idx_cons[1] == ns + 1 && idx_cons[2] == ns + 2); - } else { - // s - iJacS[tid] = 0; - jJacS[tid] = ns + tid - 1; - if(MJacS != nullptr) - MJacS[tid] = 1.0; - } - } - ); - } // end of if empty_sp_row_ - } // if(num_cons==3 && haveIneq_) - } // if(iJacS!=NULL && jJacS!=NULL) - - //dense Jacobian w.r.t y - if(JacD!=nullptr) - { - if(num_cons == ns_) {// && ns_ > static_cast(idx_cons[0])) - umpire::ResourceManager::getInstance().copy(JacD, Md_->local_data_const(), ns_*nd_*sizeof(double)); + }); + } // end of if empty_sp_row_ + } // if(num_cons==3 && haveIneq_) + } // if(iJacS!=NULL && jJacS!=NULL) + + // dense Jacobian w.r.t y + if(JacD != nullptr) { + if(num_cons == ns_) { // && ns_ > static_cast(idx_cons[0])) + umpire::ResourceManager::getInstance().copy(JacD, Md_->local_data_const(), ns_ * nd_ * sizeof(double)); } - if(num_cons==3 && haveIneq_ && ns_>0) { - int nd = nd_; ///< Cannot capture member inside RAJA lambda - + if(num_cons == 3 && haveIneq_ && ns_ > 0) { + int nd = nd_; ///< Cannot capture member inside RAJA lambda + RAJA::forall( - RAJA::RangeSegment(0, num_cons*nd_), - RAJA_LAMBDA(RAJA::Index_type i) - { - assert(0 == idx_cons[0]-ns); - assert(1 == idx_cons[1]-ns); - assert(2 == idx_cons[2]-ns); - JacD[nd*(idx_cons[0]-ns)+i] = 1.0; - }); + RAJA::RangeSegment(0, num_cons * nd_), + RAJA_LAMBDA(RAJA::Index_type i) { + assert(0 == idx_cons[0] - ns); + assert(1 == idx_cons[1] - ns); + assert(2 == idx_cons[2] - ns); + JacD[nd * (idx_cons[0] - ns) + i] = 1.0; + }); } - } // end of if(JacD != nullptr) + } // end of if(JacD != nullptr) return true; } /// Hessian evaluation bool MdsEx1::eval_Hess_Lagr(const size_type& n, - const size_type& m, + const size_type& m, const double* x, bool new_x, const double& obj_factor, const double* lambda, bool new_lambda, const size_type& nsparse, - const size_type& ndense, + const size_type& ndense, const int& nnzHSS, int* iHSS, int* jHSS, - double* MHSS, + double* MHSS, double* HDD, int& nnzHSD, int* iHSD, int* jHSD, double* MHSD) { - //Note: lambda is not used since all the constraints are linear and, therefore, do - //not contribute to the Hessian of the Lagrangian + // Note: lambda is not used since all the constraints are linear and, therefore, do + // not contribute to the Hessian of the Lagrangian - assert(nnzHSS==2*ns_); - assert(nnzHSD==0); - assert(iHSD==NULL); assert(jHSD==NULL); assert(MHSD==NULL); + assert(nnzHSS == 2 * ns_); + assert(nnzHSD == 0); + assert(iHSD == NULL); + assert(jHSD == NULL); + assert(MHSD == NULL); - if(iHSS!=NULL && jHSS!=NULL) - { - RAJA::forall( - RAJA::RangeSegment(0, 2*ns_), - RAJA_LAMBDA(RAJA::Index_type i) - { - iHSS[i] = jHSS[i] = i; - }); + if(iHSS != NULL && jHSS != NULL) { + RAJA::forall(RAJA::RangeSegment(0, 2 * ns_), RAJA_LAMBDA(RAJA::Index_type i) { iHSS[i] = jHSS[i] = i; }); } - if(MHSS!=NULL) - { - RAJA::forall( - RAJA::RangeSegment(0, 2*ns_), - RAJA_LAMBDA(RAJA::Index_type i) - { - MHSS[i] = obj_factor; - }); + if(MHSS != NULL) { + RAJA::forall(RAJA::RangeSegment(0, 2 * ns_), RAJA_LAMBDA(RAJA::Index_type i) { MHSS[i] = obj_factor; }); } - if(HDD!=NULL) - { - const int nx_dense_squared = nd_*nd_; + if(HDD != NULL) { + const int nx_dense_squared = nd_ * nd_; const double* Qv = Q_->local_data(); RAJA::forall( - RAJA::RangeSegment(0, nx_dense_squared), - RAJA_LAMBDA(RAJA::Index_type i) - { - HDD[i] = obj_factor*Qv[i]; - }); + RAJA::RangeSegment(0, nx_dense_squared), + RAJA_LAMBDA(RAJA::Index_type i) { HDD[i] = obj_factor * Qv[i]; }); } return true; } @@ -711,17 +614,12 @@ bool MdsEx1::eval_Hess_Lagr(const size_type& n, /* Implementation of the primal starting point specification */ bool MdsEx1::get_starting_point(const size_type& global_n, double* x0) { - assert(global_n==2*ns_+nd_); - RAJA::forall( - RAJA::RangeSegment(0, global_n), - RAJA_LAMBDA(RAJA::Index_type i) - { - x0[i] = 1.; - }); + assert(global_n == 2 * ns_ + nd_); + RAJA::forall(RAJA::RangeSegment(0, global_n), RAJA_LAMBDA(RAJA::Index_type i) { x0[i] = 1.; }); return true; } -bool MdsEx1::get_starting_point(const size_type& n, +bool MdsEx1::get_starting_point(const size_type& n, const size_type& m, double* x0, bool& duals_avail, @@ -732,7 +630,7 @@ bool MdsEx1::get_starting_point(const size_type& n, double* ineq_slack) { slacks_avail = false; - + if(sol_x_ && sol_zl_ && sol_zu_ && sol_lambda_) { duals_avail = true; @@ -740,7 +638,7 @@ bool MdsEx1::get_starting_point(const size_type& n, resmgr.copy(x0, sol_x_, n); resmgr.copy(z_bndL0, sol_zl_, n); resmgr.copy(z_bndU0, sol_zu_, n); - + resmgr.copy(lambda0, sol_lambda_, m); } else { @@ -751,22 +649,21 @@ bool MdsEx1::get_starting_point(const size_type& n, } /* The public methods below are not part of hiopInterface. They are a proxy - * for user's (front end) code to set solutions from a previous solve. + * for user's (front end) code to set solutions from a previous solve. * - * Same behaviour can be achieved internally (in this class ) if desired by + * Same behaviour can be achieved internally (in this class ) if desired by * overriding @solution_callback and @get_starting_point */ void MdsEx1::set_solution_primal(const double* x_vec) { - int n=2*ns_+nd_; + int n = 2 * ns_ + nd_; /// @note: The constnesss is cast away only for memcpys - still don't touch! auto* x = const_cast(x_vec); auto& resmgr = umpire::ResourceManager::getInstance(); umpire::Allocator allocator = resmgr.getAllocator(mem_space_); - if(NULL == sol_x_) - { + if(NULL == sol_x_) { sol_x_ = static_cast(allocator.allocate(n * sizeof(double))); } resmgr.copy(sol_x_, x); @@ -774,8 +671,8 @@ void MdsEx1::set_solution_primal(const double* x_vec) void MdsEx1::set_solution_duals(const double* zl_vec, const double* zu_vec, const double* lambda_vec) { - int m=ns_+3*haveIneq_; - int n=2*ns_+nd_; + int m = ns_ + 3 * haveIneq_; + int n = 2 * ns_ + nd_; /// @note: The constnesss is cast away only for memcpys - still don't touch! auto* zl = const_cast(zl_vec); @@ -785,226 +682,195 @@ void MdsEx1::set_solution_duals(const double* zl_vec, const double* zu_vec, cons auto& resmgr = umpire::ResourceManager::getInstance(); umpire::Allocator allocator = resmgr.getAllocator(mem_space_); - if(NULL == sol_zl_) - { + if(NULL == sol_zl_) { sol_zl_ = static_cast(allocator.allocate(n * sizeof(double))); } resmgr.copy(sol_zl_, zl); - if(NULL == sol_zu_) - { + if(NULL == sol_zu_) { sol_zu_ = static_cast(allocator.allocate(n * sizeof(double))); } resmgr.copy(sol_zu_, zu); - if(NULL == sol_lambda_) - { + if(NULL == sol_lambda_) { sol_lambda_ = static_cast(allocator.allocate(m * sizeof(double))); } resmgr.copy(sol_lambda_, lambda); } /** all constraints evaluated in here */ -bool MdsEx1OneCallCons::eval_cons(const size_type& n, - const size_type& m, - const double* x, - bool new_x, - double* cons) +bool MdsEx1OneCallCons::eval_cons(const size_type& n, const size_type& m, const double* x, bool new_x, double* cons) { - assert(3*haveIneq_+ns_ == m); - assert(2*ns_ + nd_ == n); + assert(3 * haveIneq_ + ns_ == m); + assert(2 * ns_ + nd_ == n); const double* s = x + ns_; - const double* y = x + 2*ns_; + const double* y = x + 2 * ns_; - int ns = ns_; ///< Cannot capture member inside RAJA lambda - int nd = nd_; ///< Cannot capture member inside RAJA lambda + int ns = ns_; ///< Cannot capture member inside RAJA lambda + int nd = nd_; ///< Cannot capture member inside RAJA lambda bool haveIneq = haveIneq_; bool empty_sp_row = empty_sp_row_; - + /// @todo determine whether this outter loop should be raja lambda, or /// if the inner loops should each be kernels, or if a more complex /// kernel execution policy should be used. RAJA::forall( - RAJA::RangeSegment(0, m), - RAJA_LAMBDA(RAJA::Index_type con_idx) - { - if(con_idxtimesVec(1.0, cons, 1.0, y); return true; } /** * @brief Evaluate Jacobian for equality and inequality constraints - * + * * @param[in] n - number of variables * @param[in] m - number of equality and inequality constraints * @param[in] x - solution vector (optimization variables) - * @param[in] new_x - if variable is updated (?) - * @param[in] nsparse - number of sparse variables - * @param[in] ndense - number of dense variables + * @param[in] new_x - if variable is updated (?) + * @param[in] nsparse - number of sparse variables + * @param[in] ndense - number of dense variables * @param[in] nnzJacS - number of nonzeros in sparse Jacobian block * @param[out] - sparse matrix row indices * @param[out] - sparse matrix column indices * @param[out] - sparse matrix values * @param[out] - array to dense matrix row pointers - * + * * This method runs on GPU. - * + * */ -bool MdsEx1OneCallCons::eval_Jac_cons(const size_type& n, const size_type& m, - const double* x, bool new_x, - const size_type& nsparse, const size_type& ndense, - const int& nnzJacS, int* iJacS, int* jJacS, double* MJacS, - double* JacD) +bool MdsEx1OneCallCons::eval_Jac_cons(const size_type& n, + const size_type& m, + const double* x, + bool new_x, + const size_type& nsparse, + const size_type& ndense, + const int& nnzJacS, + int* iJacS, + int* jJacS, + double* MJacS, + double* JacD) { - assert(m==ns_+3*haveIneq_); + assert(m == ns_ + 3 * haveIneq_); - int ns = ns_; ///< Cannot capture member inside RAJA lambda - - if(iJacS!=NULL && jJacS!=NULL) - { + int ns = ns_; ///< Cannot capture member inside RAJA lambda + + if(iJacS != NULL && jJacS != NULL) { // Compute equality constraints Jacobian RAJA::forall( - RAJA::RangeSegment(0, ns_), - RAJA_LAMBDA(RAJA::Index_type itrow) - { - //sparse Jacobian eq w.r.t. x and s - //x - iJacS[2*itrow] = itrow; - jJacS[2*itrow] = itrow; - if(MJacS != nullptr) - MJacS[2*itrow] = 1.0; - - //s - iJacS[2*itrow+1] = itrow; - jJacS[2*itrow+1] = itrow+ns; - if(MJacS != nullptr) - MJacS[2*itrow+1] = 1.0; - }); + RAJA::RangeSegment(0, ns_), + RAJA_LAMBDA(RAJA::Index_type itrow) { + // sparse Jacobian eq w.r.t. x and s + // x + iJacS[2 * itrow] = itrow; + jJacS[2 * itrow] = itrow; + if(MJacS != nullptr) MJacS[2 * itrow] = 1.0; + + // s + iJacS[2 * itrow + 1] = itrow; + jJacS[2 * itrow + 1] = itrow + ns; + if(MJacS != nullptr) MJacS[2 * itrow + 1] = 1.0; + }); // Compute inequality constraints Jacobian - if(haveIneq_ && ns_>0) { + if(haveIneq_ && ns_ > 0) { if(!empty_sp_row_) { // Loop over all matrix nonzeros RAJA::forall( - RAJA::RangeSegment(0, ns_+3), - RAJA_LAMBDA(RAJA::Index_type tid) - { - const int offset = 2*ns; - if(tid==0) { - iJacS[tid+offset] = ns; - jJacS[tid+offset] = 0; - if(MJacS != nullptr) { - MJacS[tid+offset] = 1.0; - } - } else if(tid>ns) { - iJacS[tid+offset] = tid; - jJacS[tid+offset] = tid-ns; - if(MJacS != nullptr) { - MJacS[tid+offset] = 1.0; - } - } else { - iJacS[tid+offset] = ns; - jJacS[tid+offset] = ns+tid-1; - if(MJacS != nullptr) { - MJacS[tid+offset] = 1.0; + RAJA::RangeSegment(0, ns_ + 3), + RAJA_LAMBDA(RAJA::Index_type tid) { + const int offset = 2 * ns; + if(tid == 0) { + iJacS[tid + offset] = ns; + jJacS[tid + offset] = 0; + if(MJacS != nullptr) { + MJacS[tid + offset] = 1.0; + } + } else if(tid > ns) { + iJacS[tid + offset] = tid; + jJacS[tid + offset] = tid - ns; + if(MJacS != nullptr) { + MJacS[tid + offset] = 1.0; + } + } else { + iJacS[tid + offset] = ns; + jJacS[tid + offset] = ns + tid - 1; + if(MJacS != nullptr) { + MJacS[tid + offset] = 1.0; + } } - } - } - ); - } else { // empty_sp_row_ == true + }); + } else { // empty_sp_row_ == true // Loop over all matrix nonzeros RAJA::forall( - RAJA::RangeSegment(0, ns_+2), - RAJA_LAMBDA(RAJA::Index_type tid) - { - const int offset = 2*ns; - if(tid==0) { - // x_1 - iJacS[tid+offset] = ns; - jJacS[tid+offset] = 0; - if(MJacS != nullptr) { - MJacS[tid+offset] = 1.0; + RAJA::RangeSegment(0, ns_ + 2), + RAJA_LAMBDA(RAJA::Index_type tid) { + const int offset = 2 * ns; + if(tid == 0) { + // x_1 + iJacS[tid + offset] = ns; + jJacS[tid + offset] = 0; + if(MJacS != nullptr) { + MJacS[tid + offset] = 1.0; + } + } else if(tid > ns) { + // x_3 + iJacS[tid + offset] = tid + 1; + jJacS[tid + offset] = 2; + if(MJacS != nullptr) { + MJacS[tid + offset] = 1.0; + } + } else { + iJacS[tid + offset] = ns; + jJacS[tid + offset] = ns + tid - 1; + if(MJacS != nullptr) { + MJacS[tid + offset] = 1.0; + } } - } else if(tid>ns) { - // x_3 - iJacS[tid+offset] = tid+1; - jJacS[tid+offset] = 2; - if(MJacS != nullptr) { - MJacS[tid+offset] = 1.0; - } - } else { - iJacS[tid+offset] = ns; - jJacS[tid+offset] = ns + tid - 1; - if(MJacS != nullptr) { - MJacS[tid+offset] = 1.0; - } - } - } - ); - } // end of if empty_sp_row_ - } // if(haveIneq_) - } // if(iJacS!=NULL && jJacS!=NULL) - - //dense Jacobian w.r.t y - if(JacD!=NULL) - { - //just copy the dense Jacobian corresponding to equalities + }); + } // end of if empty_sp_row_ + } // if(haveIneq_) + } // if(iJacS!=NULL && jJacS!=NULL) + + // dense Jacobian w.r.t y + if(JacD != NULL) { + // just copy the dense Jacobian corresponding to equalities auto& rm = umpire::ResourceManager::getInstance(); - rm.copy(JacD, Md_->local_data_const(), ns_*nd_*sizeof(double)); - - if(haveIneq_) - { - assert(ns_+3 == m); - //do an in place fill-in for the ineq Jacobian corresponding to e^T - //double* J = JacD[ns_]; - double* J = JacD + ns_*nd_; - RAJA::forall( - RAJA::RangeSegment(0, 3*nd_), - RAJA_LAMBDA(RAJA::Index_type i) - { - J[i] = 1.0; - }); + rm.copy(JacD, Md_->local_data_const(), ns_ * nd_ * sizeof(double)); + + if(haveIneq_) { + assert(ns_ + 3 == m); + // do an in place fill-in for the ineq Jacobian corresponding to e^T + // double* J = JacD[ns_]; + double* J = JacD + ns_ * nd_; + RAJA::forall(RAJA::RangeSegment(0, 3 * nd_), RAJA_LAMBDA(RAJA::Index_type i) { J[i] = 1.0; }); } } return true; diff --git a/src/Drivers/MDS/NlpMdsRajaEx1.hpp b/src/Drivers/MDS/NlpMdsRajaEx1.hpp index 5de2226e9..2d8789d7f 100644 --- a/src/Drivers/MDS/NlpMdsRajaEx1.hpp +++ b/src/Drivers/MDS/NlpMdsRajaEx1.hpp @@ -57,14 +57,13 @@ * */ - #ifndef HIOP_EXAMPLE_MDS_RAJA_EX1 #define HIOP_EXAMPLE_MDS_RAJA_EX1 #include "hiopInterface.hpp" -//this include is not needed in general -//we use hiopMatrixDense in this particular example for convienience +// this include is not needed in general +// we use hiopMatrixDense in this particular example for convienience #include #include @@ -76,7 +75,7 @@ #endif #include -#include //for memcpy +#include //for memcpy #include #include @@ -107,58 +106,57 @@ using index_type = hiop::index_type; * x <= 3 * s>=0 * -4 <=y_1 <=4, the rest of y are free - * + * * The vector 'y' is of dimension nd = ns (can be changed in the constructor) * Dense matrices Qd and Md are such that * Qd = two on the diagonal, one on the first offdiagonals, zero elsewhere * Md = minus one everywhere * e = vector of all ones * - * Coding of the problem in MDS HiOp input: order of variables need to be [x,s,y] + * Coding of the problem in MDS HiOp input: order of variables need to be [x,s,y] * since [x,s] are the so-called sparse variables and y are the dense variables - * + * * @note All pointers marked as "managed by Umpire" are allocated by HiOp using the - * Umpire's API. They all are addresses in the same memory space; however, the memory - * space can be host (typically CPU), device (typically GPU), or unified memory (um) - * spaces as per Umpire specification. The selection of the memory space is done via - * the option "mem_space" of HiOp. It is the responsibility of the implementers of - * the HiOp's interfaces (such as the hiop::hiopInterfaceMDS used in this example) to - * work with the "managed by Umpire" pointers in the same memory space as the one + * Umpire's API. They all are addresses in the same memory space; however, the memory + * space can be host (typically CPU), device (typically GPU), or unified memory (um) + * spaces as per Umpire specification. The selection of the memory space is done via + * the option "mem_space" of HiOp. It is the responsibility of the implementers of + * the HiOp's interfaces (such as the hiop::hiopInterfaceMDS used in this example) to + * work with the "managed by Umpire" pointers in the same memory space as the one * specified by the "mem_space" option. - * + * */ class MdsEx1 : public hiop::hiopInterfaceMDS { public: MdsEx1(int ns_in, std::string mem_space, bool empty_sp_row = false) - : MdsEx1(ns_in, ns_in, mem_space, empty_sp_row) - { - } - + : MdsEx1(ns_in, ns_in, mem_space, empty_sp_row) + {} + MdsEx1(int ns_in, int nd_in, std::string mem_space, bool empty_sp_row = false); virtual ~MdsEx1(); - + /** * @brief Number of variables and constraints. - */ + */ bool get_prob_sizes(size_type& n, size_type& m); /** - * @brief Get types and bounds on the variables. - * + * @brief Get types and bounds on the variables. + * * @param[in] n number of variables * @param[out] ixlow array with lower bounds (managed by Umpire) * @param[out] ixupp array with upper bounds (managed by Umpire) * @param[out] type array with the variable types (on host) */ - bool get_vars_info(const size_type& n, double *xlow, double* xupp, NonlinearityType* type); + bool get_vars_info(const size_type& n, double* xlow, double* xupp, NonlinearityType* type); /** * Get types and bounds corresponding to constraints. An equality constraint is specified * by setting the lower and upper bounds equal. - * + * * @param[in] m Number of constraints * @param[out] iclow array with lower bounds (managed by Umpire) * @param[out] icupp array with upper bounds (managed by Umpire) @@ -171,28 +169,28 @@ class MdsEx1 : public hiop::hiopInterfaceMDS * * @param[out] nx_sparse number of sparse variables * @param[out] nx_dense number of dense variables - * @param[out] nnz_sparse_Jace number of nonzeros in the Jacobian of the equalities w.r.t. - * sparse variables - * @param[out] nnz_sparse_Jaci number of nonzeros in the Jacobian of the inequalities w.r.t. - * sparse variables - * @param[out] nnz_sparse_Hess_Lagr_SS number of nonzeros in the (sparse) Hessian w.r.t. + * @param[out] nnz_sparse_Jace number of nonzeros in the Jacobian of the equalities w.r.t. + * sparse variables + * @param[out] nnz_sparse_Jaci number of nonzeros in the Jacobian of the inequalities w.r.t. + * sparse variables + * @param[out] nnz_sparse_Hess_Lagr_SS number of nonzeros in the (sparse) Hessian w.r.t. * sparse variables * @param[out] nnz_sparse_Hess_Lagr_SD reserved, always set to 0 */ - bool get_sparse_dense_blocks_info(int& nx_sparse, + bool get_sparse_dense_blocks_info(int& nx_sparse, int& nx_dense, - int& nnz_sparse_Jace, + int& nnz_sparse_Jace, int& nnz_sparse_Jaci, - int& nnz_sparse_Hess_Lagr_SS, + int& nnz_sparse_Hess_Lagr_SS, int& nnz_sparse_Hess_Lagr_SD); - + /** - * Evaluate objective. - * + * Evaluate objective. + * * @param[in] n number of variables - * @param[in] x array with the optimization variables or point at which to evaluate + * @param[in] x array with the optimization variables or point at which to evaluate * (managed by Umpire) - * @param[in] new_x indicates whether any of the other eval functions have been + * @param[in] new_x indicates whether any of the other eval functions have been * evaluated previously (false) or not (true) at x * @param[out] obj_value the objective function value. */ @@ -203,52 +201,48 @@ class MdsEx1 : public hiop::hiopInterfaceMDS * * @param[in] num_cons number of constraints to evaluate (size of idx_cons array) * @param[in] idx_cons indexes of the constraints to evaluate (managed by Umpire) - * @param[in] x the point at which to evaluate (managed by Umpire) - * @param[in] new_x indicates whether any of the other eval functions have been evaluated + * @param[in] x the point at which to evaluate (managed by Umpire) + * @param[in] new_x indicates whether any of the other eval functions have been evaluated * previously (false) or not (true) at x - * @param[out] cons array with values of the constraints (managed by Umpire, size num_cons) + * @param[out] cons array with values of the constraints (managed by Umpire, size num_cons) */ bool eval_cons(const size_type& n, - const size_type& m, + const size_type& m, const size_type& num_cons, - const index_type* idx_cons, + const index_type* idx_cons, const double* x, bool new_x, double* cons); - bool eval_cons(const size_type& n, - const size_type& m, - const double* x, - bool new_x, - double* cons) + bool eval_cons(const size_type& n, const size_type& m, const double* x, bool new_x, double* cons) { - //return false so that HiOp will rely on the constraint evaluator defined above + // return false so that HiOp will rely on the constraint evaluator defined above return false; } /** - * Evaluation of the gradient of the objective. + * Evaluation of the gradient of the objective. * * @param[in] n number of variables * @param[in] x array with the optimization variables or point at which to evaluate * (managed by Umpire) - * @param[in] new_x indicates whether any of the other eval functions have been evaluated + * @param[in] new_x indicates whether any of the other eval functions have been evaluated * previously (false) or not (true) at x - * @param[out] gradf array with the values of the gradient (managed by Umpire) + * @param[out] gradf array with the values of the gradient (managed by Umpire) */ bool eval_grad_f(const size_type& n, const double* x, bool new_x, double* gradf); /** - * Evaluates the Jacobian of the constraints. Please check the user manual and the - * documentation of hiop::hiopInterfaceMDS for a detailed discussion of how the last - * four arguments are expected to behave. + * Evaluates the Jacobian of the constraints. Please check the user manual and the + * documentation of hiop::hiopInterfaceMDS for a detailed discussion of how the last + * four arguments are expected to behave. * * @param[in] n number of variables * @param[in] m Number of constraints * @param[in] num_cons number of constraints to evaluate (size of idx_cons array) * @param[in] idx_cons indexes of the constraints to evaluate (managed by Umpire) * @param[in] x the point at which to evaluate (managed by Umpire) - * @param[in] new_x indicates whether any of the other eval functions have been evaluated + * @param[in] new_x indicates whether any of the other eval functions have been evaluated * previously (false) or not (true) at x * @param[in] nsparse number of sparse variables * @param[in] ndense number of dense variables @@ -259,82 +253,82 @@ class MdsEx1 : public hiop::hiopInterfaceMDS * @param[out] JacD array with the values of the dense Jacobian (managed by Umpire) */ virtual bool eval_Jac_cons(const size_type& n, - const size_type& m, + const size_type& m, const size_type& num_cons, const index_type* idx_cons, const double* x, bool new_x, const size_type& nsparse, - const size_type& ndense, + const size_type& ndense, const size_type& nnzJacS, index_type* iJacS, index_type* jJacS, - double* MJacS, + double* MJacS, double* JacD); /// Similar to the above, but not used in this example. virtual bool eval_Jac_cons(const size_type& n, - const size_type& m, + const size_type& m, const double* x, bool new_x, const size_type& nsparse, - const size_type& ndense, + const size_type& ndense, const size_type& nnzJacS, index_type* iJacS, index_type* jJacS, - double* MJacS, + double* MJacS, double* JacD) { - //return false so that HiOp will rely on the Jacobian evaluator defined above + // return false so that HiOp will rely on the Jacobian evaluator defined above return false; } /** - * Evaluate the Hessian of the Lagrangian function. Please consult the user manual for a + * Evaluate the Hessian of the Lagrangian function. Please consult the user manual for a * detailed discussion of the form the Lagrangian function takes. - * + * * @param[in] n number of variables * @param[in] m Number of constraints * @param[in] x the point at which to evaluate (managed by Umpire) - * @param[in] new_x indicates whether any of the other eval functions have been evaluated + * @param[in] new_x indicates whether any of the other eval functions have been evaluated * previously (false) or not (true) at x * @param[in] obj_factor scalar that multiplies the objective term in the Lagrangian function * @param[in] lambda array with values of the multipliers used by the Lagrangian function * @param[in] new_lambda indicates whether lambda values changed since last call * @param[in] nsparse number of sparse variables * @param[in] ndense number of dense variables - * @param[in] nnzHSS number of nonzeros in the (sparse) Hessian w.r.t. sparse variables + * @param[in] nnzHSS number of nonzeros in the (sparse) Hessian w.r.t. sparse variables * @param[out] iHSS array of row indexes in the Hessian w.r.t. sparse variables * (managed by Umpire) - * @param[out] jHSS array of column indexes in the Hessian w.r.t. sparse variables + * @param[out] jHSS array of column indexes in the Hessian w.r.t. sparse variables * (managed by Umpire) * @param[out] MHSS array of nonzero values in the Hessian w.r.t. sparse variables * (managed by Umpire) - * @param[out] HDDD array with the values of the Hessian w.r.t. to dense variables + * @param[out] HDDD array with the values of the Hessian w.r.t. to dense variables * (managed by Umpire) - * @param[out] iHSD is reserved and should not be accessed - * @param[out] jHSD is reserved and should not be accessed + * @param[out] iHSD is reserved and should not be accessed + * @param[out] jHSD is reserved and should not be accessed * @param[out] MHSD is reserved and should not be accessed * @param[out] HHSD is reserved and should not be accessed */ - bool eval_Hess_Lagr(const size_type& n, - const size_type& m, - const double* x, - bool new_x, + bool eval_Hess_Lagr(const size_type& n, + const size_type& m, + const double* x, + bool new_x, const double& obj_factor, - const double* lambda, + const double* lambda, bool new_lambda, - const size_type& nsparse, - const size_type& ndense, - const size_type& nnzHSS, - index_type* iHSS, - index_type* jHSS, - double* MHSS, + const size_type& nsparse, + const size_type& ndense, + const size_type& nnzHSS, + index_type* iHSS, + index_type* jHSS, + double* MHSS, double* HDD, - size_type& nnzHSD, - index_type* iHSD, - index_type* jHSD, + size_type& nnzHSD, + index_type* iHSD, + index_type* jHSD, double* MHSD); /* Implementation of the primal starting point specification */ @@ -353,7 +347,7 @@ class MdsEx1 : public hiop::hiopInterfaceMDS bool get_warmstart_point(const size_type& n, const size_type& m, double* x0, - double* z_bndL0, + double* z_bndL0, double* z_bndU0, double* lambda0, double* ineq_slack, @@ -364,9 +358,9 @@ class MdsEx1 : public hiop::hiopInterfaceMDS } /* The public methods below are not part of hiopInterface. They are a proxy - * for user's (front end) code to set solutions from a previous solve. + * for user's (front end) code to set solutions from a previous solve. * - * Same behaviour can be achieved internally (in this class) if desired by + * Same behaviour can be achieved internally (in this class) if desired by * overriding @solution_callback and @get_starting_point */ void set_solution_primal(const double* x_vec); @@ -374,7 +368,7 @@ class MdsEx1 : public hiop::hiopInterfaceMDS void set_solution_duals(const double* zl_vec, const double* zu_vec, const double* lambda_vec); void initialize(); - + protected: int ns_, nd_; hiop::hiopMatrixDense* Q_; @@ -395,65 +389,60 @@ class MdsEx1 : public hiop::hiopInterfaceMDS class MdsEx1OneCallCons : public MdsEx1 { - public: - MdsEx1OneCallCons(int ns_in, std::string mem_space, bool empty_sp_row = false) +public: + MdsEx1OneCallCons(int ns_in, std::string mem_space, bool empty_sp_row = false) : MdsEx1(ns_in, mem_space, empty_sp_row) - { - } + {} - MdsEx1OneCallCons(int ns_in, int nd_in, std::string mem_space) + MdsEx1OneCallCons(int ns_in, int nd_in, std::string mem_space) : MdsEx1(ns_in, nd_in, mem_space) - { - } - - virtual ~MdsEx1OneCallCons() - { - } - - bool eval_cons(const size_type& n, - const size_type& m, - const size_type& num_cons, - const index_type* idx_cons, - const double* x, - bool new_x, - double* cons) - { - //return false so that HiOp will rely on the one-call constraint evaluator defined below - return false; - } - - /** all constraints evaluated in here */ - bool eval_cons(const size_type& n, const size_type& m, - const double* x, bool new_x, double* cons); - - virtual bool eval_Jac_cons(const size_type& n, - const size_type& m, - const size_type& num_cons, - const index_type* idx_cons, - const double* x, - bool new_x, - const size_type& nsparse, - const size_type& ndense, - const size_type& nnzJacS, - index_type* iJacS, - index_type* jJacS, - double* MJacS, - double* JacD) - { - return false; // so that HiOp will call the one-call full-Jacob function below - } - - virtual bool eval_Jac_cons(const size_type& n, - const size_type& m, - const double* x, - bool new_x, - const size_type& nsparse, - const size_type& ndense, - const size_type& nnzJacS, - index_type* iJacS, - index_type* jJacS, - double* MJacS, - double* JacD); + {} + + virtual ~MdsEx1OneCallCons() {} + + bool eval_cons(const size_type& n, + const size_type& m, + const size_type& num_cons, + const index_type* idx_cons, + const double* x, + bool new_x, + double* cons) + { + // return false so that HiOp will rely on the one-call constraint evaluator defined below + return false; + } + + /** all constraints evaluated in here */ + bool eval_cons(const size_type& n, const size_type& m, const double* x, bool new_x, double* cons); + + virtual bool eval_Jac_cons(const size_type& n, + const size_type& m, + const size_type& num_cons, + const index_type* idx_cons, + const double* x, + bool new_x, + const size_type& nsparse, + const size_type& ndense, + const size_type& nnzJacS, + index_type* iJacS, + index_type* jJacS, + double* MJacS, + double* JacD) + { + return false; // so that HiOp will call the one-call full-Jacob function below + } + + virtual bool eval_Jac_cons(const size_type& n, + const size_type& m, + const double* x, + bool new_x, + const size_type& nsparse, + const size_type& ndense, + const size_type& nnzJacS, + index_type* iJacS, + index_type* jJacS, + double* MJacS, + double* JacD); }; #endif diff --git a/src/Drivers/MDS/hpc_benchmark.cpp b/src/Drivers/MDS/hpc_benchmark.cpp index 8610c297a..289294e18 100644 --- a/src/Drivers/MDS/hpc_benchmark.cpp +++ b/src/Drivers/MDS/hpc_benchmark.cpp @@ -15,78 +15,88 @@ void net_benchmark(const size_type baseDim); static const size_type default_num_doubles_per_rank = 32768; int main(int argc, char **argv) { - int nranks=1; + int nranks = 1; #ifdef HIOP_USE_MPI MPI_Init(&argc, &argv); - int err = MPI_Comm_size(MPI_COMM_WORLD, &nranks); assert(MPI_SUCCESS==err); + int err = MPI_Comm_size(MPI_COMM_WORLD, &nranks); + assert(MPI_SUCCESS == err); #endif - size_type base_dim = nranks*default_num_doubles_per_rank; - if(argc>1) base_dim = nranks*atol(argv[1]); + size_type base_dim = nranks * default_num_doubles_per_rank; + if(argc > 1) base_dim = nranks * atol(argv[1]); net_benchmark(base_dim); #ifdef HIOP_USE_MPI MPI_Finalize(); #endif - return 0; + return 0; } -const static int NUM_REPETES=100; -const static int NUM_REDUCES=8; -const static int NUM_TESTS =5; -const static int TEST_X_SIZE=2; +const static int NUM_REPETES = 100; +const static int NUM_REDUCES = 8; +const static int NUM_TESTS = 5; +const static int TEST_X_SIZE = 2; void net_benchmark(const size_type baseDim) { #ifndef HIOP_USE_MPI printf("non-MPI build, skipping network benchmark\n"); #else - int nranks, my_rank; - int err = MPI_Comm_size(MPI_COMM_WORLD, &nranks); assert(MPI_SUCCESS==err); - err = MPI_Comm_rank(MPI_COMM_WORLD, &my_rank); assert(MPI_SUCCESS==err); + int err = MPI_Comm_size(MPI_COMM_WORLD, &nranks); + assert(MPI_SUCCESS == err); + err = MPI_Comm_rank(MPI_COMM_WORLD, &my_rank); + assert(MPI_SUCCESS == err); - if(0==my_rank ) printf("Network benchmark: base dimension is %lu\n", baseDim); + if(0 == my_rank) printf("Network benchmark: base dimension is %lu\n", baseDim); - vector< vector > results( NUM_TESTS, vector(NUM_REPETES,0.) ); + vector > results(NUM_TESTS, vector(NUM_REPETES, 0.)); - for(int r=0; r // std::this_thread::sleep_for -#include // std::chrono::seconds +#include // std::this_thread::sleep_for +#include // std::chrono::seconds #include "hiopTimer.hpp" -using namespace hiop; +using namespace hiop; -/** The driver performs multiple solves per MPI process using MDS Ex1 +/** The driver performs multiple solves per MPI process using MDS Ex1 * * Intended to be used to test intra-node CPU cores affinity or GPU streams multiprocessing * * * Usage with bsub, for example, on Summit: see end of file for a submission script */ -int main(int argc, char *argv[]) +int main(int argc, char* argv[]) { int ret; - ret = MPI_Init(&argc, &argv); assert(ret==MPI_SUCCESS); + ret = MPI_Init(&argc, &argv); + assert(ret == MPI_SUCCESS); if(MPI_SUCCESS != ret) { printf("MPI_Init failed\n"); return -1; @@ -33,27 +34,29 @@ int main(int argc, char *argv[]) hiopTimer glob_timer, t; glob_timer.start(); - + int my_rank = 0, comm_size; - ret = MPI_Comm_rank(MPI_COMM_WORLD, &my_rank); assert(ret==MPI_SUCCESS); + ret = MPI_Comm_rank(MPI_COMM_WORLD, &my_rank); + assert(ret == MPI_SUCCESS); - ret = MPI_Comm_size(MPI_COMM_WORLD, &comm_size); assert(ret==MPI_SUCCESS); + ret = MPI_Comm_size(MPI_COMM_WORLD, &comm_size); + assert(ret == MPI_SUCCESS); const int num_probs_per_rank = 5; const int n_de = 2000; - const int n_sp = 2*n_de; + const int n_sp = 2 * n_de; - for(int i=0; i implementation of hiop::hiopInterfaceMDS + + // user's NLP -> implementation of hiop::hiopInterfaceMDS MdsEx1* my_nlp = new MdsEx1(n_sp, n_de); - + hiopNlpMDS nlp(*my_nlp); hiopAlgFilterIPMNewton solver(&nlp); status = solver.run(); @@ -62,36 +65,37 @@ int main(int argc, char *argv[]) delete my_nlp; t.stop(); - printf("[driver] Rank %d solved problem %d (obj=%12.5e) in %g sec\n", - my_rank, (i+1), obj_value, t.getElapsedTime()); + printf("[driver] Rank %d solved problem %d (obj=%12.5e) in %g sec\n", my_rank, (i + 1), obj_value, t.getElapsedTime()); fflush(stdout); } - glob_timer.stop(); double tmElapsed = glob_timer.getElapsedTime(); MPI_Barrier(MPI_COMM_WORLD); - std::this_thread::sleep_for (std::chrono::milliseconds((1+my_rank)*100)); + std::this_thread::sleep_for(std::chrono::milliseconds((1 + my_rank) * 100)); - printf("[driver] Rank %d finished solves in %g seconds\n", my_rank, tmElapsed); fflush(stdout); + printf("[driver] Rank %d finished solves in %g seconds\n", my_rank, tmElapsed); + fflush(stdout); double tmAvg, stdDevTm, aux; - ret = MPI_Allreduce(&tmElapsed, &tmAvg, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); assert(ret==MPI_SUCCESS); + ret = MPI_Allreduce(&tmElapsed, &tmAvg, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); + assert(ret == MPI_SUCCESS); tmAvg /= comm_size; - if(comm_size>1) { - aux = (tmElapsed-tmAvg)*(tmElapsed-tmAvg)/(comm_size-1); + if(comm_size > 1) { + aux = (tmElapsed - tmAvg) * (tmElapsed - tmAvg) / (comm_size - 1); - ret = MPI_Allreduce(&aux, &stdDevTm, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); assert(ret==MPI_SUCCESS); + ret = MPI_Allreduce(&aux, &stdDevTm, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); + assert(ret == MPI_SUCCESS); stdDevTm = sqrt(stdDevTm); } else { aux = 0.; } - if(0==my_rank) { - printf("\n\nSummary: average time %g sec, std dev %.2f percent \n\n", tmAvg, 100*stdDevTm/tmAvg); + if(0 == my_rank) { + printf("\n\nSummary: average time %g sec, std dev %.2f percent \n\n", tmAvg, 100 * stdDevTm / tmAvg); } - + MPI_Finalize(); return 0; } diff --git a/src/Drivers/PriDec/NlpPriDecEx1.cpp b/src/Drivers/PriDec/NlpPriDecEx1.cpp index 1f847886b..46fe30591 100644 --- a/src/Drivers/PriDec/NlpPriDecEx1.cpp +++ b/src/Drivers/PriDec/NlpPriDecEx1.cpp @@ -1,50 +1,50 @@ #include "NlpPriDecEx1.hpp" #include -#include //for memcpy +#include //for memcpy #include #include - PriDecEx1::PriDecEx1(int ns_) : PriDecEx1(ns_, ns_) -{ -} // ns = nx, nd=S +{} // ns = nx, nd=S PriDecEx1::PriDecEx1(int ns_, int S_) - : ns(ns_), evaluator_(nullptr) + : ns(ns_), + evaluator_(nullptr) { - if(ns<0) { + if(ns < 0) { ns = 0; } else { - if(4*(ns/4) != ns) { - ns = 4*((4+ns)/4); - printf("[warning] number (%d) of sparse vars is not a multiple ->was altered to %d\n", - ns_, ns); + if(4 * (ns / 4) != ns) { + ns = 4 * ((4 + ns) / 4); + printf("[warning] number (%d) of sparse vars is not a multiple ->was altered to %d\n", ns_, ns); } } - if(S_<0) { - S=0; + if(S_ < 0) { + S = 0; } else { S = S_; } - if(S=4 && "number of variables should be greater than 4 for this example"); - assert(n==ns); - //define x bounds - for(int i=0; i=4 && "number of variables should be greater than 4 for this example"); + assert(n == ns); + // define x bounds + for(int i = 0; i < ns; ++i) xlow[i] = 0.; + for(int i = 0; i < ns; ++i) xupp[i] = +1e+20; + for(int i = 0; i < ns; ++i) type[i] = hiopNonlinear; + // uncoupled x fixed + // for testing + if(nc < ns) { + for(int i = nc + 1; i < ns; ++i) xlow[i] = 1.; + for(int i = nc + 1; i < ns; ++i) xupp[i] = 1.; + xupp[0] = 1.; + xupp[0] = 1.; } return true; }; - bool PriDecEx1::get_cons_info(const size_type& m, double* clow, double* cupp, NonlinearityType* type) { - assert(m==0); + assert(m == 0); return true; }; bool PriDecEx1::eval_f(const size_type& n, const double* x, bool new_x, double& obj_value) { - obj_value=0.;// x[0]*(x[0]-1.); - //sum 0.5 {(x_i-1)*(x_{i}-1) : i=1,...,ns} - for(int i=0; iget_rgrad()!=NULL); + assert(evaluator_->get_rgrad() != NULL); evaluator_->eval_f(n, x, new_x, obj_value); } return true; }; - -bool PriDecEx1::eval_cons(const size_type& n, - const size_type& m, - const size_type& num_cons, - const index_type* idx_cons, - const double* x, - bool new_x, +bool PriDecEx1::eval_cons(const size_type& n, + const size_type& m, + const size_type& num_cons, + const index_type* idx_cons, + const double* x, + bool new_x, double* cons) { - assert(num_cons==0); + assert(num_cons == 0); return true; }; - + bool PriDecEx1::eval_grad_f(const size_type& n, const double* x, bool new_x, double* gradf) { //! assert(ns>=4); assert(Q->n()==ns/4); assert(Q->m()==ns/4); - for(int i=0; iget_rgrad()!=NULL); + assert(evaluator_->get_rgrad() != NULL); evaluator_->eval_grad(n, x, new_x, gradf); } return true; @@ -138,16 +134,16 @@ bool PriDecEx1::eval_grad_f(const size_type& n, const double* x, bool new_x, dou bool PriDecEx1::get_starting_point(const size_type& global_n, double* x0_) { - assert(global_n==ns); - for(int i=0; iSetStringValue("duals_update_type", "linear"); - nlp.options->SetStringValue("duals_init", "zero"); // "lsq" or "zero" + + nlp.options->SetStringValue("duals_update_type", "linear"); + nlp.options->SetStringValue("duals_init", "zero"); // "lsq" or "zero" nlp.options->SetStringValue("compute_mode", "cpu"); nlp.options->SetStringValue("KKTLinsys", "xdycyd"); nlp.options->SetStringValue("fixed_var", "relax"); @@ -243,7 +242,7 @@ solve_master(hiopVector& x, double* x_vec = x.local_data(); solver.getSolution(x_vec); - if(status<0) { + if(status < 0) { printf("solver returned negative solve status: %d (with objective is %18.12e)\n", status, obj_); return status; } @@ -251,24 +250,23 @@ solve_master(hiopVector& x, sol_ = new double[n_]; } - memcpy(sol_, x_vec, n_*sizeof(double)); - //assert("for debugging" && false); //for debugging purpose + memcpy(sol_, x_vec, n_ * sizeof(double)); + // assert("for debugging" && false); //for debugging purpose return Solve_Success; - }; -bool PriDecMasterProblemEx1::eval_f_rterm(size_type idx, const int& n,const double* x, double& rval) +bool PriDecMasterProblemEx1::eval_f_rterm(size_type idx, const int& n, const double* x, double& rval) { rval = 0.; - for(int i=0; i(idx)) { - rval += (x[i]+S_)*(x[i]+S_); + for(int i = 0; i < n; i++) { + if(i == static_cast(idx)) { + rval += (x[i] + S_) * (x[i] + S_); } else { - rval += x[i]*x[i]; + rval += x[i] * x[i]; } } rval *= 0.5; - //rval /= S_; + // rval /= S_; return true; }; @@ -277,20 +275,19 @@ bool PriDecMasterProblemEx1::eval_grad_rterm(size_type idx, const int& n, double { assert(static_cast(nc_) == n); double* grad_vec = grad.local_data(); - for(int i=0; i(idx)) { - grad_vec[i] = (x[i]+S_); + for(int i = 0; i < n; i++) { + if(i == static_cast(idx)) { + grad_vec[i] = (x[i] + S_); } else { grad_vec[i] = x[i]; } } return true; -}; +}; -bool PriDecMasterProblemEx1:: -set_recourse_approx_evaluator(const int n, hiopInterfacePriDecProblem::RecourseApproxEvaluator* evaluator) -{ - my_nlp->set_quadratic_terms( n, evaluator); - return true; +bool PriDecMasterProblemEx1::set_recourse_approx_evaluator(const int n, + hiopInterfacePriDecProblem::RecourseApproxEvaluator* evaluator) +{ + my_nlp->set_quadratic_terms(n, evaluator); + return true; } - diff --git a/src/Drivers/PriDec/NlpPriDecEx1.hpp b/src/Drivers/PriDec/NlpPriDecEx1.hpp index a0d3663f8..f4eb8c0f0 100644 --- a/src/Drivers/PriDec/NlpPriDecEx1.hpp +++ b/src/Drivers/PriDec/NlpPriDecEx1.hpp @@ -8,17 +8,17 @@ #include "hiopAlgFilterIPM.hpp" #include -#include //for memcpy +#include //for memcpy #include #include #include -/** This file provides an example of what a user of hiop::hiopInterfacePriDecProblem - * should implement in order to provide both the base and recourse problem to +/** This file provides an example of what a user of hiop::hiopInterfacePriDecProblem + * should implement in order to provide both the base and recourse problem to * hiop::hiopAlgPrimalDecomposition solver - * + * * Base case problem f - * sum 0.5 {(x_i-1)*(x_i-1) : i=1,...,ns} + * sum 0.5 {(x_i-1)*(x_i-1) : i=1,...,ns} * x_i >=0 * Contingency/recourse problems r * r = 1/S * \sum{i=1^S} 0.5*|x+Se_i|^2 @@ -30,8 +30,8 @@ using namespace hiop; -/** PriDecEx1 is the class for the base case problem. It is also - * a building block for the master problem. +/** PriDecEx1 is the class for the base case problem. It is also + * a building block for the master problem. * @param include_r is a boolean that determines whether a recourse objective is present * @param evaluator_ contains the information for the recourse objective approximation * If include_r is true, the objective of this class will contain the extra recourse term. @@ -41,73 +41,72 @@ class PriDecEx1 : public hiop::hiopInterfaceDenseConstraints { public: PriDecEx1(int ns_); - + PriDecEx1(int ns_, int S_); - + PriDecEx1(int ns_, int S_, int nc_); - PriDecEx1(int ns_, int S_, int nc_,bool include_); - - PriDecEx1(int ns_, - int S_, - bool include, - hiopInterfacePriDecProblem::RecourseApproxEvaluator* evaluator); + PriDecEx1(int ns_, int S_, int nc_, bool include_); + + PriDecEx1(int ns_, int S_, bool include, hiopInterfacePriDecProblem::RecourseApproxEvaluator* evaluator); virtual ~PriDecEx1(); - + bool get_prob_sizes(size_type& n, size_type& m); - virtual bool get_vars_info(const size_type& n, double *xlow, double* xupp, NonlinearityType* type); + virtual bool get_vars_info(const size_type& n, double* xlow, double* xupp, NonlinearityType* type); virtual bool get_cons_info(const size_type& m, double* clow, double* cupp, NonlinearityType* type); - + virtual bool eval_f(const size_type& n, const double* x, bool new_x, double& obj_value); - + virtual bool eval_cons(const size_type& n, - const size_type& m, + const size_type& m, const size_type& num_cons, - const index_type* idx_cons, + const index_type* idx_cons, const double* x, bool new_x, double* cons); - - // sum 0.5 {(x_i-1)*(x_{i}-1) : i=1,...,ns} + + // sum 0.5 {(x_i-1)*(x_{i}-1) : i=1,...,ns} virtual bool eval_grad_f(const size_type& n, const double* x, bool new_x, double* gradf); - + // Implementation of the primal starting point specification // virtual bool get_starting_point(const size_type& global_n, double* x0_); - + virtual bool get_starting_point(const size_type& n, const size_type& m, double* x0_, bool& duals_avail, - double* z_bndL0, + double* z_bndL0, double* z_bndU0, double* lambda0, bool& slacks_avail, double* ineq_slack); - + // pass the COMM_SELF communicator since this example is only intended to run inside 1 MPI process // virtual bool get_MPI_comm(MPI_Comm& comm_out); - virtual bool eval_Jac_cons(const size_type& n, const size_type& m, - const size_type& num_cons, const index_type* idx_cons, - const double* x, bool new_x, double* Jac); - - // Test to see if the quadratic approxmation is defined. + virtual bool eval_Jac_cons(const size_type& n, + const size_type& m, + const size_type& num_cons, + const index_type* idx_cons, + const double* x, + bool new_x, + double* Jac); + + // Test to see if the quadratic approxmation is defined. virtual bool quad_is_defined(); - + /** Set up the recourse approximation: evaluator_. */ - virtual bool set_quadratic_terms(const int& n, - hiopInterfacePriDecProblem::RecourseApproxEvaluator* evaluator); - // Set the include_r boolean. + virtual bool set_quadratic_terms(const int& n, hiopInterfacePriDecProblem::RecourseApproxEvaluator* evaluator); + // Set the include_r boolean. virtual bool set_include(bool include); - + protected: - int ns,S; + int ns, S; int nc; bool include_r = false; hiopInterfacePriDecProblem::RecourseApproxEvaluator* evaluator_; - }; /** @@ -118,71 +117,70 @@ class PriDecMasterProblemEx1 : public hiopInterfacePriDecProblem { public: PriDecMasterProblemEx1(int n, int S) - : n_(n), - S_(S), - obj_(-1e20), - sol_(nullptr) + : n_(n), + S_(S), + obj_(-1e20), + sol_(nullptr) { nc_ = n; - my_nlp = new PriDecEx1(n_,S_); + my_nlp = new PriDecEx1(n_, S_); } PriDecMasterProblemEx1(int n, int S, int nc) - : n_(n), - S_(S), - nc_(nc), - obj_(-1e20), - sol_(nullptr) + : n_(n), + S_(S), + nc_(nc), + obj_(-1e20), + sol_(nullptr) { - my_nlp = new PriDecEx1(n,S,nc); + my_nlp = new PriDecEx1(n, S, nc); } virtual ~PriDecMasterProblemEx1() { delete[] sol_; delete my_nlp; } - + virtual hiopSolveStatus solve_master(hiopVector& x, const bool& include_r, const double& rval = 0, - const double* grad= 0, + const double* grad = 0, const double* hess = 0, - const char* master_options_file=nullptr); - + const char* master_options_file = nullptr); + /** * This function returns the recourse objective, which is 0.5*(x+Se_i)(x+Se_i). */ - virtual bool eval_f_rterm(size_type idx, const int& n,const double* x, double& rval); - + virtual bool eval_f_rterm(size_type idx, const int& n, const double* x, double& rval); + /** * This function returns the recourse gradient. */ virtual bool eval_grad_rterm(size_type idx, const int& n, double* x, hiopVector& grad); - + /** - * This function sets up the approximation of the recourse objective based on the function value and gradient + * This function sets up the approximation of the recourse objective based on the function value and gradient * returned by eval_f_rterm and eval_grad_rterm. - * Implemented with alpha = 1 for now only. + * Implemented with alpha = 1 for now only. * This function is called only if quadratic regularization is included. */ - virtual bool set_recourse_approx_evaluator(const int n, - hiopInterfacePriDecProblem::RecourseApproxEvaluator* evaluator); + virtual bool set_recourse_approx_evaluator(const int n, hiopInterfacePriDecProblem::RecourseApproxEvaluator* evaluator); // Returns the number S of recourse terms. - size_type get_num_rterms() const {return S_;} - size_type get_num_vars() const {return n_;} + size_type get_num_rterms() const { return S_; } + size_type get_num_vars() const { return n_; } // Returns the solution. - void get_solution(double* x) const + void get_solution(double* x) const { - for(int i=0; i(n_); i++) - x[i] = sol_[i]; + for(int i = 0; i < static_cast(n_); i++) x[i] = sol_[i]; } - double get_objective() {return obj_;} + double get_objective() { return obj_; } + private: size_type n_; size_type S_; size_type nc_; PriDecEx1* my_nlp; double obj_; - double* sol_; + double* sol_; }; #endif diff --git a/src/Drivers/PriDec/NlpPriDecEx1Driver.cpp b/src/Drivers/PriDec/NlpPriDecEx1Driver.cpp index 86a8cb2a5..560278953 100644 --- a/src/Drivers/PriDec/NlpPriDecEx1Driver.cpp +++ b/src/Drivers/PriDec/NlpPriDecEx1Driver.cpp @@ -1,6 +1,6 @@ -//the problem to be solved +// the problem to be solved #include "NlpPriDecEx1.hpp" -//the solver +// the solver #include "hiopAlgPrimalDecomp.hpp" #ifdef HIOP_USE_MAGMA @@ -18,112 +18,109 @@ #define MPI_COMM_WORLD 0 #endif - /**t - * Driver for PriDec Example 1 that illustrates the use of hiop::hiopAlgPrimalDecomposition - * + * Driver for PriDec Example 1 that illustrates the use of hiop::hiopAlgPrimalDecomposition + * * @note This example is built only when HIOP_USE_MPI is enabled during cmake build * and require at least two MPI ranks in MPI_COMM_WORLD. * */ static bool self_check(int nx, int S, double obj_value); -static bool parse_arguments(int argc, char **argv, - bool& self_check, - int& nx, - int& S) +static bool parse_arguments(int argc, char** argv, bool& self_check, int& nx, int& S) { self_check = false; nx = 20; S = 100; switch(argc) { - case 1: - //no arguments - return true; - break; - case 4: // 3 arguments + case 1: + // no arguments + return true; + break; + case 4: // 3 arguments { - if(std::string(argv[3]) == "-selfcheck") - { - self_check=true; + if(std::string(argv[3]) == "-selfcheck") { + self_check = true; nx = std::atoi(argv[1]); S = std::atof(argv[2]); - if(S<3) S = 4; - if(nx<=0) return false; + if(S < 3) S = 4; + if(nx <= 0) return false; } else { return false; } } - case 3: //2 arguments + case 3: // 2 arguments { nx = atoi(argv[1]); - if(nx<=0) return false; + if(nx <= 0) return false; S = atoi(argv[2]); - if(S<3) S = 4; + if(S < 3) S = 4; } - case 2: //1 argument + case 2: // 1 argument { - if(std::string(argv[1]) == "-selfcheck") - { - self_check=true; + if(std::string(argv[1]) == "-selfcheck") { + self_check = true; } else { nx = atoi(argv[1]); - if(nx<=0) return false; + if(nx <= 0) return false; } - } - break; - default: - return false; //4 or more arguments + } break; + default: + return false; // 4 or more arguments } - if(self_check && nx!=20 && S!=100) { - printf("Error: incorrect input parameters: '-selfcheck' must be used with predefined " - "values for input parameters, nx=20 S=100.\n"); - return false; + if(self_check && nx != 20 && S != 100) { + printf( + "Error: incorrect input parameters: '-selfcheck' must be used with predefined " + "values for input parameters, nx=20 S=100.\n"); + return false; } - + return true; }; static void usage(const char* exeName) { - printf("HiOp driver %s that solves a nonconvex synthetic problem of variable size in the " - "primal decomposition formulation. )\n", - exeName); + printf( + "HiOp driver %s that solves a nonconvex synthetic problem of variable size in the " + "primal decomposition formulation. )\n", + exeName); printf("Usage: \n"); printf(" '$ %s nx S -selfcheck '\n", exeName); printf("Arguments, all integers, excepting strings '-selfcheck' \n"); printf(" 'nx': # of base case variables [default 20, optional, nonnegative integer].\n"); printf(" 'S': # of recourse/contingency problems [default 100, optional, nonnegative integer].\n"); - printf(" '-selfcheck': compares the optimal objective with nx being 20 and " - "S being 100 (these two exact values must be passed as arguments). [optional]\n"); + printf( + " '-selfcheck': compares the optimal objective with nx being 20 and " + "S being 100 (these two exact values must be passed as arguments). [optional]\n"); } - -int main(int argc, char **argv) +int main(int argc, char** argv) { - int rank=0; + int rank = 0; #ifdef HIOP_USE_MPI MPI_Init(&argc, &argv); int comm_size; - int ierr = MPI_Comm_size(MPI_COMM_WORLD, &comm_size); assert(MPI_SUCCESS==ierr); - ierr = MPI_Comm_rank(MPI_COMM_WORLD, &rank); assert(MPI_SUCCESS==ierr); + int ierr = MPI_Comm_size(MPI_COMM_WORLD, &comm_size); + assert(MPI_SUCCESS == ierr); + ierr = MPI_Comm_rank(MPI_COMM_WORLD, &rank); + assert(MPI_SUCCESS == ierr); #endif #ifdef HIOP_USE_MAGMA magma_init(); #endif - int nx=20; - int S=100; - int nc=20; - //index of coupled x in the entire x - //for testing purpos + int nx = 20; + int S = 100; + int nc = 20; + // index of coupled x in the entire x + // for testing purpos int* list = new int[nc]; - for(int i=0;iset_include(include_r); @@ -45,25 +46,25 @@ PriDecMasterProbleEx2::solve_master(hiopVector& x, if(include_r) { assert(basecase_->quad_is_defined()); } - + hiopNlpSparse nlp(*basecase_, master_options_file); // any of the options below can be overwritten by specifying them in the 'hiop_pridec_master.options' file - - //nlp.options->SetStringValue("compute_mode", "hybrid"); - //nlp.options->SetStringValue("dualsUpdateType", "linear"); - //nlp.options->SetStringValue("fixed_var", "relax"); + // nlp.options->SetStringValue("compute_mode", "hybrid"); + // nlp.options->SetStringValue("dualsUpdateType", "linear"); + + // nlp.options->SetStringValue("fixed_var", "relax"); nlp.options->SetStringValue("Hessian", "analytical_exact"); nlp.options->SetStringValue("KKTLinsys", "xdycyd"); nlp.options->SetStringValue("compute_mode", "cpu"); - //nlp.options->SetStringValue("compute_mode", "hybrid"); - //nlp.options->SetStringValue("mem_space", mem_space.c_str()); + // nlp.options->SetStringValue("compute_mode", "hybrid"); + // nlp.options->SetStringValue("mem_space", mem_space.c_str()); nlp.options->SetIntegerValue("verbosity_level", 1); nlp.options->SetNumericValue("mu0", 1e-1); - //nlp.options->SetNumericValue("tolerance", 1e-5); - + // nlp.options->SetNumericValue("tolerance", 1e-5); + hiopAlgFilterIPMNewton solver(&nlp); status = solver.run(); @@ -72,100 +73,99 @@ PriDecMasterProbleEx2::solve_master(hiopVector& x, double* x_vec = x.local_data(); solver.getSolution(x_vec); - if(status<0) { + if(status < 0) { printf("solver returned negative solve status: %d (with objective is %18.12e)\n", status, solver.getObjective()); return status; } - + // for(int i=0;iget_rec_obj(nx_, x_vec, rec_appx); } - + return Solve_Success; // return hiop::SolverInternal_Error; } -bool PriDecMasterProbleEx2:: -set_recourse_approx_evaluator(const int n, - hiopInterfacePriDecProblem::RecourseApproxEvaluator* evaluator) +bool PriDecMasterProbleEx2::set_recourse_approx_evaluator(const int n, + hiopInterfacePriDecProblem::RecourseApproxEvaluator* evaluator) { - assert(n==nc_); + assert(n == nc_); basecase_->set_quadratic_terms(n, evaluator); - return true; + return true; } bool PriDecMasterProbleEx2::eval_f_rterm(size_type idx, const int& n, const double* x, double& rval) { - assert(nx_==n); - rval=-1e+20; + assert(nx_ == n); + rval = -1e+20; double* xi; hiopSolveStatus status; - + #ifdef HIOP_USE_MPI // uncomment if want to monitor contingency computing time - //double t3 = MPI_Wtime(); - //double t4 = 0.; -#endif - - // xi can be set below - xi = new double[nS_]; - for(int i=0;iset_sparse(0.3); } */ - + hiopNlpMDS nlp(*ex9_recourse); nlp.options->SetStringValue("duals_update_type", "linear"); - //nlp.options->SetStringValue("dualsInitialization", "zero"); + // nlp.options->SetStringValue("dualsInitialization", "zero"); nlp.options->SetStringValue("Hessian", "analytical_exact"); #ifdef HIOP_USE_GPU nlp.options->SetStringValue("compute_mode", "hybrid"); -#else +#else nlp.options->SetStringValue("compute_mode", "cpu"); #endif - //nlp.options->SetStringValue("time_kkt", "on"); + // nlp.options->SetStringValue("time_kkt", "on"); nlp.options->SetIntegerValue("verbosity_level", 1); nlp.options->SetNumericValue("mu0", 1e-1); - //nlp.options->SetNumericValue("tolerance", 1e-5); + // nlp.options->SetNumericValue("tolerance", 1e-5); hiopAlgFilterIPMNewton solver(&nlp); - //assert("for debugging" && false); //for debugging purpose + // assert("for debugging" && false); //for debugging purpose status = solver.run(); - assert(status<=hiopSolveStatus::User_Stopped); //check solver status if necessary - rval = solver.getObjective(); - if(y_==NULL) { + assert(status <= hiopSolveStatus::User_Stopped); // check solver status if necessary + rval = solver.getObjective(); + if(y_ == NULL) { y_ = new double[ny_]; } solver.getSolution(y_); - - #ifdef HIOP_USE_MPI - // uncomment if want to monitor contingency computing time - /* t4 = MPI_Wtime(); - if(idx==0||idx==1) { - printf( "Elapsed time for contingency %d is %f\n",idx, t4 - t3 ); - printf(" Objective for idx %d value %18.12e, xi %18.12e\n",idx,rval,xi[0]); - } - */ - #endif + +#ifdef HIOP_USE_MPI +// uncomment if want to monitor contingency computing time +/* t4 = MPI_Wtime(); + if(idx==0||idx==1) { + printf( "Elapsed time for contingency %d is %f\n",idx, t4 - t3 ); + printf(" Objective for idx %d value %18.12e, xi %18.12e\n",idx,rval,xi[0]); + } +*/ +#endif delete[] xi; delete ex9_recourse; @@ -175,32 +175,22 @@ bool PriDecMasterProbleEx2::eval_f_rterm(size_type idx, const int& n, const doub // returns the gradient computed in eval_f_rterm bool PriDecMasterProbleEx2::eval_grad_rterm(size_type idx, const int& n, double* x, hiopVector& grad) { - assert(nx_==n); + assert(nx_ == n); double* grad_vec = grad.local_data(); - for(int i=0;i -//base interface (NLP specification for primal decomposable problems) +// base interface (NLP specification for primal decomposable problems) #include "hiopInterfacePrimalDecomp.hpp" -//basecase sparse NLP +// basecase sparse NLP #include "NlpPriDecEx2UserBasecase.hpp" -//recourse MDS NLP +// recourse MDS NLP #include "NlpPriDecEx2UserRecourseMds.hpp" /** @@ -29,21 +29,21 @@ * * Mathematically, the recourse function r_i(x) is defined based on the recourse problem * - * r_i(x;\xi^i) = 1/S * min_y 0.5 || y - x ||^2 such that - * - * (1-y_1 + \xi^i_1)^2 + \sum_{k=2}^{n_S} (y_k+\xi^i_k)^2 - * - * + \sum_{k=n_S+1}^{n_y} y_k^2 >= 1 - * + * r_i(x;\xi^i) = 1/S * min_y 0.5 || y - x ||^2 such that + * + * (1-y_1 + \xi^i_1)^2 + \sum_{k=2}^{n_S} (y_k+\xi^i_k)^2 + * + * + \sum_{k=n_S+1}^{n_y} y_k^2 >= 1 + * * y_k - y_{k-1} >=0, k=2, ..., n_y * * y_1 >=0 - * + * * Eventually each of @f$ \xi^i_1, \xi^i_2, ..., \xi^i_{n_S} @f$ can be withdrawn from U[-0.25, 0.25] * and the size n_S of the sample satisfying 1<=n_S<=n_y. They are set to 1.0 for now. * * When $S$ samples (\xi^i_1, \xi^i_2, ..., \xi^i_{n_S}), i=1,\ldots,S, are used the - * primal decomposable problem looks like + * primal decomposable problem looks like * * min_x basecase(x) + 1/S \sum_{i=1}^S r_i(x;\xi_i) * @@ -51,16 +51,16 @@ * stochastic programming problem * * min_x basecase(x) + E_\xi[ r(x,\xi) | \xi ~ U[-0.25,0.25]] - * + * * where the random function r(x;\xi) is defined similarily to r_i(x;\xi) (excepting the * scaling by 1/S). * * centered, multiline: - * @f[ + * @f[ * \min_x \sum_{i=1}^n f(x_i) - * @f] - * - * The recourse problems are implemented in hiop MDS class. + * @f] + * + * The recourse problems are implemented in hiop MDS class. */ using namespace hiop; @@ -68,35 +68,34 @@ class PriDecMasterProbleEx2 : public hiop::hiopInterfacePriDecProblem { public: PriDecMasterProbleEx2(size_type nx, size_type ny, size_type nS, size_type S); - + virtual ~PriDecMasterProbleEx2(); hiop::hiopSolveStatus solve_master(hiopVector& x, const bool& include_r, - const double& rval = 0, + const double& rval = 0, const double* grad = 0, - const double*hess = 0, + const double* hess = 0, const char* master_options_file = nullptr); - virtual bool set_recourse_approx_evaluator(const int n, - hiopInterfacePriDecProblem::RecourseApproxEvaluator* evaluator); - + virtual bool set_recourse_approx_evaluator(const int n, hiopInterfacePriDecProblem::RecourseApproxEvaluator* evaluator); + /** * This function solves the idxth recourse optimization subproblem and returns the objective. * n is the number of coupled x, not the entire dimension of x, and might be denoted as nc_ elsewhere. * rval is the return value of the recourse solution function evaluation. */ bool eval_f_rterm(size_type idx, const int& n, const double* x, double& rval); - + /** * This function computes the gradient of the recourse solution function w.r.t x. - * n is the number of coupled x, not the entire dimension of x, and + * n is the number of coupled x, not the entire dimension of x, and * grad is the output. */ bool eval_grad_rterm(size_type idx, const int& n, double* x, hiopVector& grad); - + inline size_type get_num_rterms() const; - + inline size_type get_num_vars() const; void get_solution(double* x) const; @@ -108,11 +107,11 @@ class PriDecMasterProbleEx2 : public hiop::hiopInterfacePriDecProblem size_type nx_; /// dimension of the coupled variable, nc_<=nx_ size_type nc_; - ///dimension of recourse problem primal variable `y` for each contingency + /// dimension of recourse problem primal variable `y` for each contingency size_type ny_; /// dimension of uncertainty dimension entering the recourse problem size_type nS_; - ///number of sample to use, effectively the number of recourse terms + /// number of sample to use, effectively the number of recourse terms size_type S_; double* y_; diff --git a/src/Drivers/PriDec/NlpPriDecEx2Driver.cpp b/src/Drivers/PriDec/NlpPriDecEx2Driver.cpp index 60171630a..f68ab1297 100644 --- a/src/Drivers/PriDec/NlpPriDecEx2Driver.cpp +++ b/src/Drivers/PriDec/NlpPriDecEx2Driver.cpp @@ -18,13 +18,12 @@ #ifndef MPI_COMM_WORLD #define MPI_COMM_WORLD 0 -#endif #endif - +#endif /** - * Driver for example 9 that illustrates the use of hiop::hiopAlgPrimalDecomposition - * + * Driver for example 9 that illustrates the use of hiop::hiopAlgPrimalDecomposition + * * @note This example is built only when HIOP_USE_MPI and HIOP_SPARSE is enabled during cmake build * and require at least two MPI ranks in MPI_COMM_WORLD. * @@ -35,103 +34,103 @@ using namespace hiop; // checking the solution of a given nx and S static bool self_check(int nx, int S, double obj_value); -static bool parse_arguments(int argc, char **argv, - bool& self_check, - int& nx, - int& S) +static bool parse_arguments(int argc, char** argv, bool& self_check, int& nx, int& S) { self_check = false; nx = 20; S = 5; switch(argc) { - case 1: - //no arguments - return true; - break; - case 4: // 3 arguments + case 1: + // no arguments + return true; + break; + case 4: // 3 arguments { if(std::string(argv[3]) == "-selfcheck") { - self_check=true; + self_check = true; nx = std::atoi(argv[1]); S = std::atof(argv[2]); - if(S<3) { + if(S < 3) { S = 4; } - if(nx<=0) { + if(nx <= 0) { return false; } } else { return false; } } - case 3: //2 arguments + case 3: // 2 arguments { nx = atoi(argv[1]); - if(nx<=0) return false; + if(nx <= 0) return false; S = atoi(argv[2]); - if(S<3) S = 4; + if(S < 3) S = 4; } - case 2: //1 argument + case 2: // 1 argument { if(std::string(argv[1]) == "-selfcheck") { - self_check=true; + self_check = true; } else { nx = atoi(argv[1]); - if(nx<=0) { + if(nx <= 0) { return false; } } - } - break; - default: - return false; // 4 or more arguments + } break; + default: + return false; // 4 or more arguments } - if(self_check && nx!=20 && S!=5) { - printf("Error: incorrect input parameters: '-selfcheck' must be used with predefined " - "values for input parameters, nx=20 S=5\n"); - return false; + if(self_check && nx != 20 && S != 5) { + printf( + "Error: incorrect input parameters: '-selfcheck' must be used with predefined " + "values for input parameters, nx=20 S=5\n"); + return false; } - + return true; }; static void usage(const char* exeName) { - printf("HiOp driver %s that solves a nonconvex synthetic problem of variable size in the " - "primal decomposition formulation. )\n", - exeName); + printf( + "HiOp driver %s that solves a nonconvex synthetic problem of variable size in the " + "primal decomposition formulation. )\n", + exeName); printf("Usage: \n"); printf(" '$ %s nx S -selfcheck '\n", exeName); printf("Arguments, all integers, except strings '-selfcheck' \n"); printf(" 'nx': # of base case variables [default 20, optional, nonnegative integer].\n"); printf(" 'S': # of recourse/contingency problems [default 5, optional, nonnegative integer].\n"); - printf(" '-selfcheck': compares the optimal objective with nx being 20 and " - "S being 5 (these two exact values must be passed as arguments). [optional]\n"); + printf( + " '-selfcheck': compares the optimal objective with nx being 20 and " + "S being 5 (these two exact values must be passed as arguments). [optional]\n"); } - -int main(int argc, char **argv) +int main(int argc, char** argv) { - int rank=0; + int rank = 0; #ifdef HIOP_USE_MPI MPI_Init(&argc, &argv); int comm_size; - int ierr = MPI_Comm_size(MPI_COMM_WORLD, &comm_size); assert(MPI_SUCCESS==ierr); - ierr = MPI_Comm_rank(MPI_COMM_WORLD, &rank); assert(MPI_SUCCESS==ierr); + int ierr = MPI_Comm_size(MPI_COMM_WORLD, &comm_size); + assert(MPI_SUCCESS == ierr); + ierr = MPI_Comm_rank(MPI_COMM_WORLD, &rank); + assert(MPI_SUCCESS == ierr); #endif #ifdef HIOP_USE_MAGMA magma_init(); #endif - int nx = 20; // nx = ny for this problem - int nS = 5; // dimension of \xi + int nx = 20; // nx = ny for this problem + int nS = 5; // dimension of \xi int S = 5; - + bool selfCheck; - + if(!parse_arguments(argc, argv, selfCheck, nx, S)) { usage(argv[0]); return 1; @@ -141,31 +140,31 @@ int main(int argc, char **argv) hiop::hiopAlgPrimalDecomposition pridec_solver(&pridec_problem, MPI_COMM_WORLD); pridec_solver.set_initial_alpha_ratio(0.5); pridec_solver.set_alpha_min(0.3); - //pridec_solver.set_local_accum("true"); - //pridec_solver.set_tolerance(1e-6); - //pridec_solver.set_max_iteration(5); + // pridec_solver.set_local_accum("true"); + // pridec_solver.set_tolerance(1e-6); + // pridec_solver.set_max_iteration(5); auto status = pridec_solver.run(); - + int it = pridec_solver.getNumIterations(); printf("number of iterations %d\n", it); - if(status!=Solve_Success) { - if(rank==0) { + if(status != Solve_Success) { + if(rank == 0) { printf("Solve was NOT successfull."); } } else { - if(rank==0) { - printf("Solve was successfull. Optimal value: %12.5e\n",pridec_solver.getObjective()); + if(rank == 0) { + printf("Solve was successfull. Optimal value: %12.5e\n", pridec_solver.getObjective()); } } - + if(selfCheck) { - if(rank==0) { - if(!self_check(nx,S, pridec_solver.getObjective())) { + if(rank == 0) { + if(!self_check(nx, S, pridec_solver.getObjective())) { return -1; } } - } - + } + #ifdef HIOP_USE_MAGMA magma_finalize(); #endif @@ -173,21 +172,23 @@ int main(int argc, char **argv) MPI_Finalize(); #endif - //printf("Returned successfully from driver! Rank=%d\n", rank); + // printf("Returned successfully from driver! Rank=%d\n", rank); return 0; } - static bool self_check(int nx, int S, double obj_value) { double obj_true = 0.2633379371706; double err = 1e-5; - if(fabs((obj_value)-obj_true)<1e-5) { - printf("selfcheck success (error less than %18.12e), objective value is %18.12e \n", err,obj_value); + if(fabs((obj_value)-obj_true) < 1e-5) { + printf("selfcheck success (error less than %18.12e), objective value is %18.12e \n", err, obj_value); return true; } else { - printf("selfcheck failure. Objective (%18.12e) does not agree with the saved value (%18.12e) for nx=%d,S=%d.\n", - obj_value, obj_true, nx,S); + printf("selfcheck failure. Objective (%18.12e) does not agree with the saved value (%18.12e) for nx=%d,S=%d.\n", + obj_value, + obj_true, + nx, + S); return false; } return true; diff --git a/src/Drivers/PriDec/NlpPriDecEx2Sparse.cpp b/src/Drivers/PriDec/NlpPriDecEx2Sparse.cpp index e2dcc2565..f910461cc 100644 --- a/src/Drivers/PriDec/NlpPriDecEx2Sparse.cpp +++ b/src/Drivers/PriDec/NlpPriDecEx2Sparse.cpp @@ -3,11 +3,14 @@ #include "hiopAlgFilterIPM.hpp" using namespace hiop; - -PriDecMasterProbleEx2Sparse:: -PriDecMasterProbleEx2Sparse(size_type nx, size_type ny, size_type nS, size_type S) : nx_(nx), ny_(ny),nS_(nS),S_(S) + +PriDecMasterProbleEx2Sparse::PriDecMasterProbleEx2Sparse(size_type nx, size_type ny, size_type nS, size_type S) + : nx_(nx), + ny_(ny), + nS_(nS), + S_(S) { - assert(nx==ny); + assert(nx == ny); y_ = new double[ny_]; sol_ = new double[nx_]; obj_ = 1e20; @@ -18,21 +21,20 @@ PriDecMasterProbleEx2Sparse(size_type nx, size_type ny, size_type nS, size_type PriDecMasterProbleEx2Sparse::~PriDecMasterProbleEx2Sparse() { delete[] y_; - delete[] sol_; + delete[] sol_; delete basecase_; }; - -hiop::hiopSolveStatus -PriDecMasterProbleEx2Sparse::solve_master(hiopVector& x, - const bool& include_r, - const double& rval/*=0*/, - const double* grad/*=0*/, - const double*hess /*=0*/, - const char* master_options_file/*=nullptr*/) + +hiop::hiopSolveStatus PriDecMasterProbleEx2Sparse::solve_master(hiopVector& x, + const bool& include_r, + const double& rval /*=0*/, + const double* grad /*=0*/, + const double* hess /*=0*/, + const char* master_options_file /*=nullptr*/) { - obj_=-1e+20; + obj_ = -1e+20; hiopSolveStatus status; - if(basecase_==nullptr) { + if(basecase_ == nullptr) { basecase_ = new PriDecBasecaseProbleEx2(nx_); } basecase_->set_include(include_r); @@ -40,22 +42,22 @@ PriDecMasterProbleEx2Sparse::solve_master(hiopVector& x, if(include_r) { assert(basecase_->quad_is_defined()); } - + hiopNlpSparse nlp(*basecase_, master_options_file); // any of the options below can be overwritten by specifying them in the 'hiop_pridec_master.options' file - //nlp.options->SetStringValue("fixed_var", "relax"); + // nlp.options->SetStringValue("fixed_var", "relax"); nlp.options->SetStringValue("Hessian", "analytical_exact"); nlp.options->SetStringValue("KKTLinsys", "xdycyd"); nlp.options->SetStringValue("compute_mode", "cpu"); - //nlp.options->SetStringValue("compute_mode", "hybrid"); - //nlp.options->SetStringValue("mem_space", mem_space.c_str()); + // nlp.options->SetStringValue("compute_mode", "hybrid"); + // nlp.options->SetStringValue("mem_space", mem_space.c_str()); nlp.options->SetIntegerValue("verbosity_level", 1); nlp.options->SetNumericValue("mu0", 1e-1); - //nlp.options->SetNumericValue("tolerance", 1e-5); - + // nlp.options->SetNumericValue("tolerance", 1e-5); + hiopAlgFilterIPMNewton solver(&nlp); status = solver.run(); @@ -64,101 +66,101 @@ PriDecMasterProbleEx2Sparse::solve_master(hiopVector& x, double* x_vec = x.local_data(); solver.getSolution(x_vec); - if(status<0) { + if(status < 0) { printf("solver returned negative solve status: %d (with objective is %18.12e)\n", status, solver.getObjective()); return status; } - + // for(int i=0;iget_rec_obj(nx_, x_vec, rec_appx); } - + return Solve_Success; // return hiop::SolverInternal_Error; } -bool PriDecMasterProbleEx2Sparse:: -set_recourse_approx_evaluator(const int n, - hiopInterfacePriDecProblem::RecourseApproxEvaluator* evaluator) +bool PriDecMasterProbleEx2Sparse::set_recourse_approx_evaluator( + const int n, + hiopInterfacePriDecProblem::RecourseApproxEvaluator* evaluator) { - assert(n==nc_); + assert(n == nc_); basecase_->set_quadratic_terms(n, evaluator); - return true; + return true; } bool PriDecMasterProbleEx2Sparse::eval_f_rterm(size_type idx, const int& n, const double* x, double& rval) { - assert(nx_==n); - rval=-1e+20; + assert(nx_ == n); + rval = -1e+20; hiopSolveStatus status; double* xi; - + #ifdef HIOP_USE_MPI // uncomment if want to monitor contingency computing time - //double t3 = MPI_Wtime(); - //double t4 = 0.; -#endif - - xi = new double[nS_]; - for(int i=0; iset_sparse(0.3); } */ - + hiopNlpSparse nlp(*ex9_recourse); nlp.options->SetStringValue("duals_update_type", "linear"); - //nlp.options->SetStringValue("dualsInitialization", "zero"); + // nlp.options->SetStringValue("dualsInitialization", "zero"); nlp.options->SetStringValue("Hessian", "analytical_exact"); #ifdef HIOP_USE_GPU nlp.options->SetStringValue("compute_mode", "hybrid"); nlp.options->SetStringValue("compute_mode", "cpu"); -#else +#else nlp.options->SetStringValue("compute_mode", "cpu"); #endif - //nlp.options->SetStringValue("time_kkt", "on"); + // nlp.options->SetStringValue("time_kkt", "on"); nlp.options->SetIntegerValue("verbosity_level", 1); nlp.options->SetNumericValue("mu0", 1e-1); - //nlp.options->SetNumericValue("tolerance", 1e-5); + // nlp.options->SetNumericValue("tolerance", 1e-5); hiopAlgFilterIPMNewton solver(&nlp); - //assert("for debugging" && false); //for debugging purpose + // assert("for debugging" && false); //for debugging purpose status = solver.run(); - assert(status<=hiopSolveStatus::User_Stopped); //check solver status if necessary - rval = solver.getObjective(); - if(y_==nullptr) { + assert(status <= hiopSolveStatus::User_Stopped); // check solver status if necessary + rval = solver.getObjective(); + if(y_ == nullptr) { y_ = new double[ny_]; } solver.getSolution(y_); - - #ifdef HIOP_USE_MPI - // uncomment if want to monitor contingency computing time - /* t4 = MPI_Wtime(); - if(idx==0||idx==1) { - printf( "Elapsed time for contingency %d is %f\n",idx, t4 - t3 ); - printf(" Objective for idx %d value %18.12e, xi %18.12e\n",idx,rval,xi[0]); - } - */ - #endif + +#ifdef HIOP_USE_MPI +// uncomment if want to monitor contingency computing time +/* t4 = MPI_Wtime(); + if(idx==0||idx==1) { + printf( "Elapsed time for contingency %d is %f\n",idx, t4 - t3 ); + printf(" Objective for idx %d value %18.12e, xi %18.12e\n",idx,rval,xi[0]); + } +*/ +#endif delete[] xi; delete ex9_recourse; @@ -168,32 +170,22 @@ bool PriDecMasterProbleEx2Sparse::eval_f_rterm(size_type idx, const int& n, cons // returns the gradient computed in eval_f_rterm bool PriDecMasterProbleEx2Sparse::eval_grad_rterm(size_type idx, const int& n, double* x, hiopVector& grad) { - assert(nx_==n); + assert(nx_ == n); double* grad_vec = grad.local_data(); - for(int i=0; i -//base interface (NLP specification for primal decomposable problems) +// base interface (NLP specification for primal decomposable problems) #include "hiopInterfacePrimalDecomp.hpp" -//basecase sparse NLP +// basecase sparse NLP #include "NlpPriDecEx2UserBasecase.hpp" -//recourse sparse NLP +// recourse sparse NLP #include "NlpPriDecEx2UserRecourseSparse.hpp" /** * @@ -28,21 +28,21 @@ * * Mathematically, the recourse function r_i(x) is defined based on the recourse problem * - * r_i(x;\xi^i) = 1/S * min_y 0.5 || y - x ||^2 such that - * - * (1-y_1 + \xi^i_1)^2 + \sum_{k=2}^{n_S} (y_k+\xi^i_k)^2 - * - * + \sum_{k=n_S+1}^{n_y} y_k^2 >= 1 - * + * r_i(x;\xi^i) = 1/S * min_y 0.5 || y - x ||^2 such that + * + * (1-y_1 + \xi^i_1)^2 + \sum_{k=2}^{n_S} (y_k+\xi^i_k)^2 + * + * + \sum_{k=n_S+1}^{n_y} y_k^2 >= 1 + * * y_k - y_{k-1} >=0, k=2, ..., n_y * * y_1 >=0 - * + * * Eventually each of @f$ \xi^i_1, \xi^i_2, ..., \xi^i_{n_S} @f$ can be withdrawn from U[-0.25, 0.25] * and the size n_S of the sample satisfying 1<=n_S<=n_y. They are set to 1.0 for now. * * When $S$ samples (\xi^i_1, \xi^i_2, ..., \xi^i_{n_S}), i=1,\ldots,S, are used the - * primal decomposable problem looks like + * primal decomposable problem looks like * * min_x basecase(x) + 1/S \sum_{i=1}^S r_i(x;\xi_i) * @@ -50,16 +50,16 @@ * stochastic programming problem * * min_x basecase(x) + E_\xi[ r(x,\xi) | \xi ~ U[-0.25,0.25]] - * + * * where the random function r(x;\xi) is defined similarily to r_i(x;\xi) (excepting the * scaling by 1/S). * * centered, multiline: - * @f[ + * @f[ * \min_x \sum_{i=1}^n f(x_i) - * @f] + * @f] * - * The recourse problems are implemented in hiop sparse class. + * The recourse problems are implemented in hiop sparse class. */ using namespace hiop; @@ -67,35 +67,34 @@ class PriDecMasterProbleEx2Sparse : public hiop::hiopInterfacePriDecProblem { public: PriDecMasterProbleEx2Sparse(size_type nx, size_type ny, size_type nS, size_type S); - + virtual ~PriDecMasterProbleEx2Sparse(); hiop::hiopSolveStatus solve_master(hiopVector& x, const bool& include_r, - const double& rval = 0, + const double& rval = 0, const double* grad = 0, - const double*hess = 0, + const double* hess = 0, const char* master_options_file = nullptr); - virtual bool set_recourse_approx_evaluator(const int n, - hiopInterfacePriDecProblem::RecourseApproxEvaluator* evaluator); - + virtual bool set_recourse_approx_evaluator(const int n, hiopInterfacePriDecProblem::RecourseApproxEvaluator* evaluator); + /** * This function solves the idxth recourse optimization subproblem and returns the objective. * n is the number of coupled x, not the entire dimension of x, and might be denoted as nc_ elsewhere. * rval is the return value of the recourse solution function evaluation. */ bool eval_f_rterm(size_type idx, const int& n, const double* x, double& rval); - + /** * This function computes the gradient of the recourse solution function w.r.t x. - * n is the number of coupled x, not the entire dimension of x, and + * n is the number of coupled x, not the entire dimension of x, and * grad is the output. */ bool eval_grad_rterm(size_type idx, const int& n, double* x, hiopVector& grad); - + inline size_type get_num_rterms() const; - + inline size_type get_num_vars() const; void get_solution(double* x) const; @@ -107,11 +106,11 @@ class PriDecMasterProbleEx2Sparse : public hiop::hiopInterfacePriDecProblem size_type nx_; /// dimension of the coupled variable, nc_<=nx_ size_type nc_; - ///dimension of recourse problem primal variable `y` for each contingency + /// dimension of recourse problem primal variable `y` for each contingency size_type ny_; /// dimension of uncertainty dimension entering the recourse problem size_type nS_; - ///number of sample to use, effectively the number of recourse terms + /// number of sample to use, effectively the number of recourse terms size_type S_; double* y_; diff --git a/src/Drivers/PriDec/NlpPriDecEx2SparseDriver.cpp b/src/Drivers/PriDec/NlpPriDecEx2SparseDriver.cpp index 482d8ec32..caaee456a 100644 --- a/src/Drivers/PriDec/NlpPriDecEx2SparseDriver.cpp +++ b/src/Drivers/PriDec/NlpPriDecEx2SparseDriver.cpp @@ -14,170 +14,170 @@ #ifndef MPI_COMM_WORLD #define MPI_COMM_WORLD 0 -#endif #endif - +#endif /** - * Driver for example 9 that illustrates the use of hiop::hiopAlgPrimalDecomposition - * + * Driver for example 9 that illustrates the use of hiop::hiopAlgPrimalDecomposition + * * @note This example is built only when HIOP_USE_MPI and HIOP_SPARSE is enabled during cmake build * and require at least two MPI ranks in MPI_COMM_WORLD. * * The recourse problems are implemented with HIOP Sparse class. */ - using namespace hiop; // checking the solution of a given nx and S static bool self_check(int nx, int S, double obj_value); -static bool parse_arguments(int argc, char **argv, - bool& self_check, - int& nx, - int& S) +static bool parse_arguments(int argc, char** argv, bool& self_check, int& nx, int& S) { self_check = false; nx = 20; S = 5; switch(argc) { - case 1: - // no arguments - return true; - break; - case 4: // 3 arguments + case 1: + // no arguments + return true; + break; + case 4: // 3 arguments { if(std::string(argv[3]) == "-selfcheck") { - self_check=true; + self_check = true; nx = std::atoi(argv[1]); S = std::atof(argv[2]); - if(S<3) { + if(S < 3) { S = 4; } - if(nx<=0) { + if(nx <= 0) { return false; } } else { return false; } } - case 3: // 2 arguments + case 3: // 2 arguments { nx = atoi(argv[1]); - if(nx<=0) return false; + if(nx <= 0) return false; S = atoi(argv[2]); - if(S<3) S = 4; + if(S < 3) S = 4; } - case 2: // 1 argument + case 2: // 1 argument { if(std::string(argv[1]) == "-selfcheck") { - self_check=true; + self_check = true; } else { nx = atoi(argv[1]); - if(nx<=0) { + if(nx <= 0) { return false; } } - } - break; - default: - return false; // 4 or more arguments not supported + } break; + default: + return false; // 4 or more arguments not supported } - if(self_check && nx!=20 && S!=5) { - printf("Error: incorrect input parameters: '-selfcheck' must be used with predefined " - "values for input parameters, nx=20 S=5\n"); - return false; + if(self_check && nx != 20 && S != 5) { + printf( + "Error: incorrect input parameters: '-selfcheck' must be used with predefined " + "values for input parameters, nx=20 S=5\n"); + return false; } - + return true; }; static void usage(const char* exeName) { - printf("HiOp driver %s that solves a nonconvex synthetic problem of variable size in the " - "primal decomposition formulation. )\n", - exeName); + printf( + "HiOp driver %s that solves a nonconvex synthetic problem of variable size in the " + "primal decomposition formulation. )\n", + exeName); printf("Usage: \n"); printf(" '$ %s nx S -selfcheck '\n", exeName); printf("Arguments, all integers, except strings '-selfcheck' \n"); printf(" 'nx': # of base case variables [default 20, optional, nonnegative integer].\n"); printf(" 'S': # of recourse/contingency problems [default 5, optional, nonnegative integer].\n"); - printf(" '-selfcheck': compares the optimal objective with nx being 20 and " - "S being 5 (these two exact values must be passed as arguments). [optional]\n"); + printf( + " '-selfcheck': compares the optimal objective with nx being 20 and " + "S being 5 (these two exact values must be passed as arguments). [optional]\n"); } - -int main(int argc, char **argv) +int main(int argc, char** argv) { - int rank=0; + int rank = 0; #ifdef HIOP_USE_MPI MPI_Init(&argc, &argv); int comm_size; - int ierr = MPI_Comm_size(MPI_COMM_WORLD, &comm_size); assert(MPI_SUCCESS==ierr); - ierr = MPI_Comm_rank(MPI_COMM_WORLD, &rank); assert(MPI_SUCCESS==ierr); + int ierr = MPI_Comm_size(MPI_COMM_WORLD, &comm_size); + assert(MPI_SUCCESS == ierr); + ierr = MPI_Comm_rank(MPI_COMM_WORLD, &rank); + assert(MPI_SUCCESS == ierr); #endif - int nx = 20; //nx == ny for this problem - int nS = 5; // number of \xi + int nx = 20; // nx == ny for this problem + int nS = 5; // number of \xi int S = 5; - + bool selfCheck; - + if(!parse_arguments(argc, argv, selfCheck, nx, S)) { usage(argv[0]); return 1; - } - + } + PriDecMasterProbleEx2Sparse pridec_problem(nx, nx, nS, S); hiop::hiopAlgPrimalDecomposition pridec_solver(&pridec_problem, MPI_COMM_WORLD); pridec_solver.set_initial_alpha_ratio(0.5); pridec_solver.set_alpha_min(0.3); - //pridec_solver.set_local_accum("true"); - //pridec_solver.set_tolerance(1e-6); - //pridec_solver.set_max_iteration(5); + // pridec_solver.set_local_accum("true"); + // pridec_solver.set_tolerance(1e-6); + // pridec_solver.set_max_iteration(5); auto status = pridec_solver.run(); - + int it = pridec_solver.getNumIterations(); printf("number of iterations %d\n", it); - if(status!=Solve_Success) { - if(rank==0) { + if(status != Solve_Success) { + if(rank == 0) { printf("Solve was NOT successfull."); } } else { - if(rank==0) { - printf("Solve was successfull. Optimal value: %12.5e\n",pridec_solver.getObjective()); + if(rank == 0) { + printf("Solve was successfull. Optimal value: %12.5e\n", pridec_solver.getObjective()); } } - + if(selfCheck) { - if(rank==0) { - if(!self_check(nx,S, pridec_solver.getObjective())) { + if(rank == 0) { + if(!self_check(nx, S, pridec_solver.getObjective())) { return -1; } } - } - + } + #ifdef HIOP_USE_MPI MPI_Finalize(); #endif - //printf("Returned successfully from driver! Rank=%d\n", rank); + // printf("Returned successfully from driver! Rank=%d\n", rank); return 0; } - static bool self_check(int nx, int S, double obj_value) { double obj_true = 0.2633379371706; double err = 1e-5; - if(fabs((obj_value)-obj_true)<1e-5) { - printf("selfcheck success (error less than %18.12e), objective value is %18.12e \n", err,obj_value); + if(fabs((obj_value)-obj_true) < 1e-5) { + printf("selfcheck success (error less than %18.12e), objective value is %18.12e \n", err, obj_value); return true; } else { - printf("selfcheck failure. Objective (%18.12e) does not agree with the saved value (%18.12e) for nx=%d,S=%d.\n", - obj_value, obj_true, nx,S); + printf("selfcheck failure. Objective (%18.12e) does not agree with the saved value (%18.12e) for nx=%d,S=%d.\n", + obj_value, + obj_true, + nx, + S); return false; } return true; diff --git a/src/Drivers/PriDec/NlpPriDecEx2SparseRaja.cpp b/src/Drivers/PriDec/NlpPriDecEx2SparseRaja.cpp index eaa43fe91..69ac42d15 100644 --- a/src/Drivers/PriDec/NlpPriDecEx2SparseRaja.cpp +++ b/src/Drivers/PriDec/NlpPriDecEx2SparseRaja.cpp @@ -13,7 +13,7 @@ using ex9_raja_reduce = hiop::ExecRajaPoliciesBackend: using ex9_raja_exec = hiop::ExecRajaPoliciesBackend::hiop_raja_exec; using ex9_raja_reduce = hiop::ExecRajaPoliciesBackend::hiop_raja_reduce; #else -//#if !defined(HIOP_USE_CUDA) && !defined(HIOP_USE_HIP) +// #if !defined(HIOP_USE_CUDA) && !defined(HIOP_USE_HIP) #include using ex9_raja_exec = hiop::ExecRajaPoliciesBackend::hiop_raja_exec; using ex9_raja_reduce = hiop::ExecRajaPoliciesBackend::hiop_raja_reduce; @@ -21,15 +21,18 @@ using ex9_raja_reduce = hiop::ExecRajaPoliciesBackend:: using namespace hiop; -PriDecMasterProbleEx2Sparse:: -PriDecMasterProbleEx2Sparse(size_type nx, size_type ny, size_type nS, size_type S, std::string mem_space) - : nx_(nx), - ny_(ny), - nS_(nS), - S_(S), - mem_space_(mem_space) +PriDecMasterProbleEx2Sparse::PriDecMasterProbleEx2Sparse(size_type nx, + size_type ny, + size_type nS, + size_type S, + std::string mem_space) + : nx_(nx), + ny_(ny), + nS_(nS), + S_(S), + mem_space_(mem_space) { - assert(nx==ny); + assert(nx == ny); y_ = new double[ny_]; sol_ = new double[nx_]; obj_ = 1e20; @@ -40,21 +43,20 @@ PriDecMasterProbleEx2Sparse(size_type nx, size_type ny, size_type nS, size_type PriDecMasterProbleEx2Sparse::~PriDecMasterProbleEx2Sparse() { delete[] y_; - delete[] sol_; + delete[] sol_; delete basecase_; }; -hiop::hiopSolveStatus -PriDecMasterProbleEx2Sparse::solve_master(hiopVector& x, - const bool& include_r, - const double& rval/*=0*/, - const double* grad/*=0*/, - const double*hess /*=0*/, - const char* master_options_file/*=nullptr*/) +hiop::hiopSolveStatus PriDecMasterProbleEx2Sparse::solve_master(hiopVector& x, + const bool& include_r, + const double& rval /*=0*/, + const double* grad /*=0*/, + const double* hess /*=0*/, + const char* master_options_file /*=nullptr*/) { - obj_=-1e+20; + obj_ = -1e+20; hiopSolveStatus status; - if(basecase_==nullptr) { + if(basecase_ == nullptr) { basecase_ = new PriDecBasecaseProbleEx2(nx_); } basecase_->set_include(include_r); @@ -62,22 +64,22 @@ PriDecMasterProbleEx2Sparse::solve_master(hiopVector& x, if(include_r) { assert(basecase_->quad_is_defined()); } - + hiopNlpSparse nlp(*basecase_, master_options_file); // any of the options below can be overwritten by specifying them in the 'hiop_pridec_master.options' file - //nlp.options->SetStringValue("fixed_var", "relax"); + // nlp.options->SetStringValue("fixed_var", "relax"); nlp.options->SetStringValue("Hessian", "analytical_exact"); nlp.options->SetStringValue("KKTLinsys", "xdycyd"); nlp.options->SetStringValue("compute_mode", "cpu"); - //nlp.options->SetStringValue("compute_mode", "hybrid"); - //nlp.options->SetStringValue("mem_space", mem_space.c_str()); + // nlp.options->SetStringValue("compute_mode", "hybrid"); + // nlp.options->SetStringValue("mem_space", mem_space.c_str()); nlp.options->SetIntegerValue("verbosity_level", 1); nlp.options->SetNumericValue("mu0", 1e-1); - //nlp.options->SetNumericValue("tolerance", 1e-5); - + // nlp.options->SetNumericValue("tolerance", 1e-5); + hiopAlgFilterIPMNewton solver(&nlp); status = solver.run(); @@ -86,95 +88,94 @@ PriDecMasterProbleEx2Sparse::solve_master(hiopVector& x, double* x_vec = x.local_data(); solver.getSolution(x_vec); - if(status<0) { - nlp.log->printf(hovError, - "solver returned negative solve status: %d (with objective is %18.12e)\n", - status, solver.getObjective()); + if(status < 0) { + nlp.log->printf(hovError, + "solver returned negative solve status: %d (with objective is %18.12e)\n", + status, + solver.getObjective()); return status; } - - if(sol_==nullptr) { + + if(sol_ == nullptr) { sol_ = new double[nx_]; } - memcpy(sol_, x_vec, nx_*sizeof(double)); - - //compute the recourse estimate + memcpy(sol_, x_vec, nx_ * sizeof(double)); + + // compute the recourse estimate if(include_r) { double rec_appx = 0.; basecase_->get_rec_obj(nx_, x_vec, rec_appx); } - + return Solve_Success; - //return hiop::SolverInternal_Error; + // return hiop::SolverInternal_Error; } -bool PriDecMasterProbleEx2Sparse:: -set_recourse_approx_evaluator(const int n, - hiopInterfacePriDecProblem::RecourseApproxEvaluator* evaluator) +bool PriDecMasterProbleEx2Sparse::set_recourse_approx_evaluator( + const int n, + hiopInterfacePriDecProblem::RecourseApproxEvaluator* evaluator) { - assert(n==nc_); + assert(n == nc_); basecase_->set_quadratic_terms(n, evaluator); - return true; + return true; } // all the memory management is done through umpire in the PriDecRecourseProbleEx2Sparse class bool PriDecMasterProbleEx2Sparse::eval_f_rterm(size_type idx, const int& n, const double* x, double& rval) { - assert(nx_==n); - rval=-1e+20; + assert(nx_ == n); + rval = -1e+20; double* xi; - + #ifdef HIOP_USE_MPI - //to monitor contingency compute time - //double t3 = MPI_Wtime(); - //double t4 = 0.; -#endif - - // xi can be set below - xi = new double[nS_]; - for(int i=0; iSetStringValue("duals_update_type", "linear"); - //nlp.options->SetStringValue("dualsInitialization", "zero"); + // nlp.options->SetStringValue("dualsInitialization", "zero"); nlp.options->SetStringValue("Hessian", "analytical_exact"); #ifdef HIOP_USE_GPU - //nlp.options->SetStringValue("compute_mode", "hybrid"); + // nlp.options->SetStringValue("compute_mode", "hybrid"); nlp.options->SetStringValue("compute_mode", "cpu"); -#else +#else nlp.options->SetStringValue("compute_mode", "cpu"); #endif - //nlp.options->SetStringValue("time_kkt", "on"); + // nlp.options->SetStringValue("time_kkt", "on"); nlp.options->SetIntegerValue("verbosity_level", 1); nlp.options->SetNumericValue("mu0", 1e-1); - //nlp.options->SetNumericValue("tolerance", 1e-5); + // nlp.options->SetNumericValue("tolerance", 1e-5); hiopAlgFilterIPMNewton solver(&nlp); hiopSolveStatus status = solver.run(); - assert(status==Solve_Success || - status==Solve_Success_RelTol || - status==Solve_Acceptable_Level); - - rval = solver.getObjective(); - if(y_==nullptr) { + assert(status == Solve_Success || status == Solve_Success_RelTol || status == Solve_Acceptable_Level); + + rval = solver.getObjective(); + if(y_ == nullptr) { y_ = new double[ny_]; } solver.getSolution(y_); - + #ifdef HIOP_USE_MPI // uncomment if want to monitor contingency computing time - /* - t4 = MPI_Wtime(); + /* + t4 = MPI_Wtime(); if(idx==0||idx==1) { - printf( "Elapsed time for contingency %d is %f\n",idx, t4 - t3 ); + printf( "Elapsed time for contingency %d is %f\n",idx, t4 - t3 ); printf(" Objective for idx %d value %18.12e, xi %18.12e\n",idx,rval,xi[0]); } */ @@ -188,32 +189,22 @@ bool PriDecMasterProbleEx2Sparse::eval_f_rterm(size_type idx, const int& n, cons // returns the gradient computed in eval_f_rterm bool PriDecMasterProbleEx2Sparse::eval_grad_rterm(size_type idx, const int& n, double* x, hiopVector& grad) { - assert(nx_==n); + assert(nx_ == n); double* grad_vec = grad.local_data(); - for(int i=0; i -//base interface (NLP specification for primal decomposable problems) +// base interface (NLP specification for primal decomposable problems) #include "hiopInterfacePrimalDecomp.hpp" -//basecase sparse NLP +// basecase sparse NLP #include "NlpPriDecEx2UserBasecase.hpp" -//recourse Sparse NLP +// recourse Sparse NLP #include "NlpPriDecEx2UserRecourseSparseRaja.hpp" /** @@ -29,21 +29,21 @@ * * Mathematically, the recourse function r_i(x) is defined based on the recourse problem * - * r_i(x;\xi^i) = 1/S * min_y 0.5 || y - x ||^2 such that - * - * (1-y_1 + \xi^i_1)^2 + \sum_{k=2}^{n_S} (y_k+\xi^i_k)^2 - * - * + \sum_{k=n_S+1}^{n_y} y_k^2 >= 1 - * + * r_i(x;\xi^i) = 1/S * min_y 0.5 || y - x ||^2 such that + * + * (1-y_1 + \xi^i_1)^2 + \sum_{k=2}^{n_S} (y_k+\xi^i_k)^2 + * + * + \sum_{k=n_S+1}^{n_y} y_k^2 >= 1 + * * y_k - y_{k-1} >=0, k=2, ..., n_y * * y_1 >=0 - * + * * Eventually each of @f$ \xi^i_1, \xi^i_2, ..., \xi^i_{n_S} @f$ can be withdrawn from U[-0.25, 0.25] * and the size n_S of the sample satisfying 1<=n_S<=n_y. They are set to 1.0 for now. * * When $S$ samples (\xi^i_1, \xi^i_2, ..., \xi^i_{n_S}), i=1,\ldots,S, are used the - * primal decomposable problem looks like + * primal decomposable problem looks like * * min_x basecase(x) + 1/S \sum_{i=1}^S r_i(x;\xi_i) * @@ -51,16 +51,16 @@ * stochastic programming problem * * min_x basecase(x) + E_\xi[ r(x,\xi) | \xi ~ U[-0.25,0.25]] - * + * * where the random function r(x;\xi) is defined similarily to r_i(x;\xi) (excepting the * scaling by 1/S). * * centered, multiline: - * @f[ + * @f[ * \min_x \sum_{i=1}^n f(x_i) - * @f] + * @f] * - * This implementation uses HiOp Sparse Raja recourse problems. + * This implementation uses HiOp Sparse Raja recourse problems. */ using namespace hiop; @@ -68,35 +68,34 @@ class PriDecMasterProbleEx2Sparse : public hiop::hiopInterfacePriDecProblem { public: PriDecMasterProbleEx2Sparse(size_type nx, size_type ny, size_type nS, size_type S, std::string mem_space); - + virtual ~PriDecMasterProbleEx2Sparse(); hiop::hiopSolveStatus solve_master(hiopVector& x, const bool& include_r, - const double& rval = 0, + const double& rval = 0, const double* grad = 0, - const double*hess = 0, + const double* hess = 0, const char* master_options_file = nullptr); - virtual bool set_recourse_approx_evaluator(const int n, - hiopInterfacePriDecProblem::RecourseApproxEvaluator* evaluator); - + virtual bool set_recourse_approx_evaluator(const int n, hiopInterfacePriDecProblem::RecourseApproxEvaluator* evaluator); + /** * This function solves the idxth recourse optimization subproblem and returns the objective. * n is the number of coupled x, not the entire dimension of x, and might be denoted as nc_ elsewhere. * rval is the return value of the recourse solution function evaluation. */ bool eval_f_rterm(size_type idx, const int& n, const double* x, double& rval); - + /** * This function computes the gradient of the recourse solution function w.r.t x. - * n is the number of coupled x, not the entire dimension of x, and + * n is the number of coupled x, not the entire dimension of x, and * grad is the output. */ bool eval_grad_rterm(size_type idx, const int& n, double* x, hiopVector& grad); - + inline size_type get_num_rterms() const; - + inline size_type get_num_vars() const; void get_solution(double* x) const; @@ -108,11 +107,11 @@ class PriDecMasterProbleEx2Sparse : public hiop::hiopInterfacePriDecProblem size_type nx_; /// dimension of the coupled variable, nc_<=nx_ size_type nc_; - ///dimension of recourse problem primal variable `y` for each contingency + /// dimension of recourse problem primal variable `y` for each contingency size_type ny_; /// dimension of uncertainty dimension entering the recourse problem size_type nS_; - ///number of sample to use, effectively the number of recourse terms + /// number of sample to use, effectively the number of recourse terms size_type S_; double* y_; diff --git a/src/Drivers/PriDec/NlpPriDecEx2SparseRajaDriver.cpp b/src/Drivers/PriDec/NlpPriDecEx2SparseRajaDriver.cpp index b92800e30..f828fbb98 100644 --- a/src/Drivers/PriDec/NlpPriDecEx2SparseRajaDriver.cpp +++ b/src/Drivers/PriDec/NlpPriDecEx2SparseRajaDriver.cpp @@ -22,109 +22,107 @@ #ifndef MPI_COMM_WORLD #define MPI_COMM_WORLD 0 -#endif #endif - +#endif /** - * Driver for example 9 that illustrates the use of hiop::hiopAlgPrimalDecomposition - * + * Driver for example 9 that illustrates the use of hiop::hiopAlgPrimalDecomposition + * * @note This example is built only when HIOP_USE_MPI, HIOP_SPARSE and HIOP_USE_RAJA are enabled during cmake build * and require at least two MPI ranks in MPI_COMM_WORLD. * * The recourse problems are implemented with HIOP Sparse class and RAJA. */ - using namespace hiop; // checking the solution of a given nx and S static bool self_check(int nx, int S, double obj_value); -static bool parse_arguments(int argc, char **argv, - bool& self_check, - int& nx, - int& S) +static bool parse_arguments(int argc, char** argv, bool& self_check, int& nx, int& S) { self_check = false; nx = 20; S = 5; switch(argc) { - case 1: - //no arguments - return true; - break; - case 4: // 3 arguments + case 1: + // no arguments + return true; + break; + case 4: // 3 arguments { if(std::string(argv[3]) == "-selfcheck") { - self_check=true; + self_check = true; nx = std::atoi(argv[1]); S = std::atof(argv[2]); - if(S<3) { + if(S < 3) { S = 4; } - if(nx<=0) { + if(nx <= 0) { return false; } } else { return false; } } - case 3: //2 arguments + case 3: // 2 arguments { nx = atoi(argv[1]); - if(nx<=0) return false; + if(nx <= 0) return false; S = atoi(argv[2]); - if(S<3) S = 4; + if(S < 3) S = 4; } - case 2: //1 argument + case 2: // 1 argument { if(std::string(argv[1]) == "-selfcheck") { - self_check=true; + self_check = true; } else { nx = atoi(argv[1]); - if(nx<=0) { + if(nx <= 0) { return false; } } - } - break; - default: - return false; //4 or more arguments + } break; + default: + return false; // 4 or more arguments } - if(self_check && nx!=20 && S!=5) { - printf("Error: incorrect input parameters: '-selfcheck' must be used with predefined " - "values for input parameters, nx=20 S=5\n"); - return false; + if(self_check && nx != 20 && S != 5) { + printf( + "Error: incorrect input parameters: '-selfcheck' must be used with predefined " + "values for input parameters, nx=20 S=5\n"); + return false; } - + return true; }; static void usage(const char* exeName) { - printf("HiOp driver %s that solves a nonconvex synthetic problem of variable size in the " - "primal decomposition formulation. )\n", - exeName); + printf( + "HiOp driver %s that solves a nonconvex synthetic problem of variable size in the " + "primal decomposition formulation. )\n", + exeName); printf("Usage: \n"); printf(" '$ %s nx S -selfcheck '\n", exeName); printf("Arguments, all integers, except strings '-selfcheck' \n"); printf(" 'nx': # of base case variables [default 20, optional, nonnegative integer].\n"); printf(" 'S': # of recourse/contingency problems [default 5, optional, nonnegative integer].\n"); - printf(" '-selfcheck': compares the optimal objective with nx being 20 and " - "S being 5 (these two exact values must be passed as arguments). [optional]\n"); + printf( + " '-selfcheck': compares the optimal objective with nx being 20 and " + "S being 5 (these two exact values must be passed as arguments). [optional]\n"); } - -int main(int argc, char **argv) +int main(int argc, char** argv) { - int rank=0; + int rank = 0; #ifdef HIOP_USE_MPI MPI_Init(&argc, &argv); int comm_size; - int ierr = MPI_Comm_size(MPI_COMM_WORLD, &comm_size); assert(MPI_SUCCESS==ierr); - ierr = MPI_Comm_rank(MPI_COMM_WORLD, &rank); assert(MPI_SUCCESS==ierr); + int ierr = MPI_Comm_size(MPI_COMM_WORLD, &comm_size); + assert(MPI_SUCCESS == ierr); + ierr = MPI_Comm_rank(MPI_COMM_WORLD, &rank); + assert(MPI_SUCCESS == ierr); #endif #ifdef HIOP_USE_MAGMA @@ -133,49 +131,48 @@ int main(int argc, char **argv) // Set memory space where to create models and perform NLP solve std::string mem_space = "UM"; - //std::string mem_space = "DEFAULT"; - //std::string mem_space = "HOST"; - - int nx = 20; //nx == ny for this problem - int nS = 5; // number of \xi + // std::string mem_space = "DEFAULT"; + // std::string mem_space = "HOST"; + + int nx = 20; // nx == ny for this problem + int nS = 5; // number of \xi int S = 5; - + bool selfCheck; - + if(!parse_arguments(argc, argv, selfCheck, nx, S)) { usage(argv[0]); return 1; } - - + PriDecMasterProbleEx2Sparse pridec_problem(nx, nx, nS, S, mem_space); hiop::hiopAlgPrimalDecomposition pridec_solver(&pridec_problem, MPI_COMM_WORLD); pridec_solver.set_initial_alpha_ratio(0.5); pridec_solver.set_alpha_min(0.3); - //pridec_solver.set_tolerance(1e-6); - //pridec_solver.set_max_iteration(5); + // pridec_solver.set_tolerance(1e-6); + // pridec_solver.set_max_iteration(5); auto status = pridec_solver.run(); - + int it = pridec_solver.getNumIterations(); printf("number of iterations %d\n", it); - if(status!=Solve_Success) { - if(rank==0) { + if(status != Solve_Success) { + if(rank == 0) { printf("Solve was NOT successfull."); } } else { - if(rank==0) { - printf("Solve was successfull. Optimal value: %12.5e\n",pridec_solver.getObjective()); + if(rank == 0) { + printf("Solve was successfull. Optimal value: %12.5e\n", pridec_solver.getObjective()); } } - + if(selfCheck) { - if(rank==0) { - if(!self_check(nx,S, pridec_solver.getObjective())) { + if(rank == 0) { + if(!self_check(nx, S, pridec_solver.getObjective())) { return -1; } } - } - + } + #ifdef HIOP_USE_MAGMA magma_finalize(); #endif @@ -183,21 +180,23 @@ int main(int argc, char **argv) MPI_Finalize(); #endif - //printf("Returned successfully from driver! Rank=%d\n", rank); + // printf("Returned successfully from driver! Rank=%d\n", rank); return 0; } - static bool self_check(int nx, int S, double obj_value) { double obj_true = 0.2633379371706; double err = 1e-5; - if(fabs((obj_value)-obj_true)<1e-5) { - printf("selfcheck success (error less than %18.12e), objective value is %18.12e \n", err,obj_value); + if(fabs((obj_value)-obj_true) < 1e-5) { + printf("selfcheck success (error less than %18.12e), objective value is %18.12e \n", err, obj_value); return true; } else { - printf("selfcheck failure. Objective (%18.12e) does not agree with the saved value (%18.12e) for nx=%d,S=%d.\n", - obj_value, obj_true, nx,S); + printf("selfcheck failure. Objective (%18.12e) does not agree with the saved value (%18.12e) for nx=%d,S=%d.\n", + obj_value, + obj_true, + nx, + S); return false; } return true; diff --git a/src/Drivers/PriDec/NlpPriDecEx2UserBasecase.hpp b/src/Drivers/PriDec/NlpPriDecEx2UserBasecase.hpp index 35ac2c258..393fa9d55 100644 --- a/src/Drivers/PriDec/NlpPriDecEx2UserBasecase.hpp +++ b/src/Drivers/PriDec/NlpPriDecEx2UserBasecase.hpp @@ -1,33 +1,31 @@ #include "NlpSparseEx1.hpp" #include "hiopInterfacePrimalDecomp.hpp" -/** This class is the basecase problem for Ex9. +/** This class is the basecase problem for Ex9. * To work for the master problem, it has a boolean include_rec_ to determine * whether a recourse approximation is in the objective. - * There is no corresponding .cpp file. + * There is no corresponding .cpp file. */ using namespace hiop; class PriDecBasecaseProbleEx2 : public SparseEx1 { public: PriDecBasecaseProbleEx2(int n) - : SparseEx1(n, 1.0), rec_evaluator_(nullptr) - { - } + : SparseEx1(n, 1.0), + rec_evaluator_(nullptr) + {} - virtual ~PriDecBasecaseProbleEx2() - { - } + virtual ~PriDecBasecaseProbleEx2() {} bool eval_f(const size_type& n, const double* x, bool new_x, double& obj_value) { if(!SparseEx1::eval_f(n, x, new_x, obj_value)) { return false; } - if(include_rec_) {//same as include_r - assert(rec_evaluator_->get_rgrad()!=NULL); + if(include_rec_) { // same as include_r + assert(rec_evaluator_->get_rgrad() != NULL); rec_evaluator_->eval_f(n, x, new_x, obj_value); - } + } // add regularization to the objective based on rec_evaluator_ return true; } @@ -37,9 +35,9 @@ class PriDecBasecaseProbleEx2 : public SparseEx1 if(!SparseEx1::eval_grad_f(n, x, new_x, gradf)) { return false; } - //add regularization gradient + // add regularization gradient if(include_rec_) { - assert(rec_evaluator_->get_rgrad()!=NULL); + assert(rec_evaluator_->get_rgrad() != NULL); rec_evaluator_->eval_grad(n, x, new_x, gradf); } return true; @@ -61,42 +59,37 @@ class PriDecBasecaseProbleEx2 : public SparseEx1 return false; } // Add diagonal to the Hessian - // The indices are already added through the parent + // The indices are already added through the parent - if(MHSS!=nullptr) { + if(MHSS != nullptr) { // use rec_evaluator_ to add diagonal entries in the Hessian assert(nnzHSS == n); if(include_rec_) { - for(int i=0; iget_rhess()->local_data_const()[i]) ; + for(int i = 0; i < n; i++) { + MHSS[i] += obj_factor * (rec_evaluator_->get_rhess()->local_data_const()[i]); } } } return true; } - - bool set_quadratic_terms(const int& n, - hiopInterfacePriDecProblem::RecourseApproxEvaluator* evaluator) + + bool set_quadratic_terms(const int& n, hiopInterfacePriDecProblem::RecourseApproxEvaluator* evaluator) { rec_evaluator_ = evaluator; return true; } - void set_include(const bool include) - { - include_rec_ = include; - }; + void set_include(const bool include) { include_rec_ = include; }; - bool quad_is_defined() // check if quadratic approximation is defined + bool quad_is_defined() // check if quadratic approximation is defined { - if(rec_evaluator_!=NULL) { + if(rec_evaluator_ != NULL) { return true; } else { return false; } } - void get_rec_obj(const size_type& n, const double* x, double& obj_value) { bool temp = rec_evaluator_->eval_f(n, x, false, obj_value); @@ -104,6 +97,6 @@ class PriDecBasecaseProbleEx2 : public SparseEx1 } protected: - bool include_rec_=false; - hiopInterfacePriDecProblem::RecourseApproxEvaluator* rec_evaluator_; //this should be const + bool include_rec_ = false; + hiopInterfacePriDecProblem::RecourseApproxEvaluator* rec_evaluator_; // this should be const }; diff --git a/src/Drivers/PriDec/NlpPriDecEx2UserRecourseMds.hpp b/src/Drivers/PriDec/NlpPriDecEx2UserRecourseMds.hpp index 58c0d0e11..1b61ab9e5 100644 --- a/src/Drivers/PriDec/NlpPriDecEx2UserRecourseMds.hpp +++ b/src/Drivers/PriDec/NlpPriDecEx2UserRecourseMds.hpp @@ -1,26 +1,26 @@ #ifndef HIOP_EXAMPLE_PRIDEC_EX2_RECOURSE #define HIOP_EXAMPLE_PRIDEC_EX2_RECOURSE -#include +#include #include -/** This class provide an example of what a user of hiop::hiopInterfacePriDecProblem - * should implement in order to provide the recourse problem to +/** This class provide an example of what a user of hiop::hiopInterfacePriDecProblem + * should implement in order to provide the recourse problem to * hiop::hiopAlgPrimalDecomposition solver. - * + * * For a given vector x\in R^n and \xi \in R^{n_S}, this example class implements * - * r_i(x;\xi^i) = 1/S * min_y 0.5 || y - x ||^2 such that - * - * (1-y_1 + \xi^i_1)^2 + \sum_{k=2}^{n_S} (y_k+\xi^i_k)^2 - * - * + \sum_{k=n_S+1}^{n_y} y_k^2 >= 1 //last one in the constraint implementation - * + * r_i(x;\xi^i) = 1/S * min_y 0.5 || y - x ||^2 such that + * + * (1-y_1 + \xi^i_1)^2 + \sum_{k=2}^{n_S} (y_k+\xi^i_k)^2 + * + * + \sum_{k=n_S+1}^{n_y} y_k^2 >= 1 //last one in the constraint implementation + * * y_k - y_{k-1} >=0, k=2, ..., n_y * * y_1 >=0 * - * Coding of the problem in MDS HiOp input: order of variables need to be [ysparse,ydense]. + * Coding of the problem in MDS HiOp input: order of variables need to be [ysparse,ydense]. * This is distinguished from the Sparse HiOp class. */ @@ -28,58 +28,55 @@ class PriDecRecourseProbleEx2 : public hiop::hiopInterfaceMDS { public: PriDecRecourseProbleEx2(int n, int nS, int S) - : x_(nullptr), - xi_(nullptr), - nx_(n), - nS_(nS), - S_(S) + : x_(nullptr), + xi_(nullptr), + nx_(n), + nS_(nS), + S_(S) { - assert(nS_>=1); - assert(nx_>=nS_); // ny = nx = n - assert(S_>=1); + assert(nS_ >= 1); + assert(nx_ >= nS_); // ny = nx = n + assert(S_ >= 1); ny_ = nx_; - nsparse_ = nx_*sparse_ratio; + nsparse_ = nx_ * sparse_ratio; } - PriDecRecourseProbleEx2(int n, - int nS, - int S, - const double* x, - const double* xi): nx_(n), nS_(nS), S_(S) + PriDecRecourseProbleEx2(int n, int nS, int S, const double* x, const double* xi) + : nx_(n), + nS_(nS), + S_(S) { - assert(nS_>=1); - assert(nx_>=nS_); // ny = nx = n - assert(S_>=1); + assert(nS_ >= 1); + assert(nx_ >= nS_); // ny = nx = n + assert(S_ >= 1); ny_ = nx_; xi_ = new double[nS_]; - memcpy(xi_,xi, nS_*sizeof(double)); + memcpy(xi_, xi, nS_ * sizeof(double)); x_ = new double[nx_]; - //assert("for debugging" && false); //for debugging purpose - memcpy(x_,x, nx_*sizeof(double)); - nsparse_ = int(nx_*sparse_ratio); + // assert("for debugging" && false); //for debugging purpose + memcpy(x_, x, nx_ * sizeof(double)); + nsparse_ = int(nx_ * sparse_ratio); } - PriDecRecourseProbleEx2(int n, - int nS, - int S, - int idx, - const double* x, - const double* xi): nx_(n), nS_(nS), S_(S) + PriDecRecourseProbleEx2(int n, int nS, int S, int idx, const double* x, const double* xi) + : nx_(n), + nS_(nS), + S_(S) { - assert(nS_>=1); - assert(nx_>=nS_); // ny=nx=n - assert(S_>=1); + assert(nS_ >= 1); + assert(nx_ >= nS_); // ny=nx=n + assert(S_ >= 1); ny_ = nx_; xi_ = new double[nS_]; - memcpy(xi_,xi, nS_*sizeof(double)); + memcpy(xi_, xi, nS_ * sizeof(double)); x_ = new double[nx_]; - //assert("for debugging" && false); //for debugging purpose - memcpy(x_,x, nx_*sizeof(double)); - nsparse_ = int(nx_*sparse_ratio); + // assert("for debugging" && false); //for debugging purpose + memcpy(x_, x, nx_ * sizeof(double)); + nsparse_ = int(nx_ * sparse_ratio); idx_ = idx; } @@ -93,77 +90,77 @@ class PriDecRecourseProbleEx2 : public hiop::hiopInterfaceMDS void set_sparse(const double ratio) { sparse_ratio = ratio; - nsparse_ = int(ratio*nx_); - assert(nsparse_>=1 && ratio<1 && ratio>0); + nsparse_ = int(ratio * nx_); + assert(nsparse_ >= 1 && ratio < 1 && ratio > 0); } /// Set the basecase solution `x` void set_x(const double* x) { - if(x_==NULL) { - x_ = new double[nx_]; + if(x_ == NULL) { + x_ = new double[nx_]; } - memcpy(x_,x, nx_*sizeof(double)); + memcpy(x_, x, nx_ * sizeof(double)); } /// Set the "sample" vector \xi - void set_center(const double *xi) + void set_center(const double* xi) { - if(xi_==NULL) { - xi_ = new double[nS_]; + if(xi_ == NULL) { + xi_ = new double[nS_]; } - memcpy(xi_,xi, nS_*sizeof(double)); + memcpy(xi_, xi, nS_ * sizeof(double)); } bool get_prob_sizes(size_type& n, size_type& m) { n = ny_; - m = ny_; - return true; + m = ny_; + return true; } - bool get_vars_info(const size_type& n, double *xlow, double* xupp, NonlinearityType* type) + bool get_vars_info(const size_type& n, double* xlow, double* xupp, NonlinearityType* type) { // y_1 bounded xlow[0] = 0.; xupp[0] = 1e20; - for(int i=1; i0); + nx_sparse = nsparse_; + nx_dense = ny_ - nsparse_; + assert(nx_sparse > 0); nnz_sparse_Jace = 0; - if(nx_sparsensparse_-1 && con_idx!=m-1) { - JacD[(ny_-nsparse_)*con_idx+(con_idx-nsparse_)] = -1.0; - JacD[(ny_-nsparse_)*con_idx+(con_idx-nsparse_)+1] = 1.0; - } else if(con_idx==m-1) { - if(nsparse_<=nS_) { - //cons[m-1] += (x[i] + xi_[i])*(x[i] + xi_[i]); - for(int i=nsparse_; i nsparse_ - 1 && con_idx != m - 1) { + JacD[(ny_ - nsparse_) * con_idx + (con_idx - nsparse_)] = -1.0; + JacD[(ny_ - nsparse_) * con_idx + (con_idx - nsparse_) + 1] = 1.0; + } else if(con_idx == m - 1) { + if(nsparse_ <= nS_) { + // cons[m-1] += (x[i] + xi_[i])*(x[i] + xi_[i]); + for(int i = nsparse_; i < nS_; i++) { + JacD[(ny_ - nsparse_) * con_idx + i - nsparse_] = 2 * (x[i] + xi_[i]); } - for(int i=nS_; i //for memcpy +#include //for memcpy #include using size_type = hiop::size_type; using index_type = hiop::index_type; -/** This class provide an example of what a user of hiop::hiopInterfacePriDecProblem - * should implement in order to provide the recourse problem to +/** This class provide an example of what a user of hiop::hiopInterfacePriDecProblem + * should implement in order to provide the recourse problem to * hiop::hiopAlgPrimalDecomposition solver - * + * * For a given vector x\in R^n and \xi \in R^{n_S}, this example class implements * - * r_i(x;\xi^i) = 1/S * min_y 0.5 || y - x ||^2 such that - * - * (1-y_1 + \xi^i_1)^2 + \sum_{k=2}^{n_S} (y_k+\xi^i_k)^2 - * - * + \sum_{k=n_S+1}^{n_y} y_k^2 >= 1 //last one in the constraint implementation - * + * r_i(x;\xi^i) = 1/S * min_y 0.5 || y - x ||^2 such that + * + * (1-y_1 + \xi^i_1)^2 + \sum_{k=2}^{n_S} (y_k+\xi^i_k)^2 + * + * + \sum_{k=n_S+1}^{n_y} y_k^2 >= 1 //last one in the constraint implementation + * * y_k - y_{k-1} >=0, k=2, ..., n_y * * y_1 >=0 * - * The recourse problem is of Sparse HiOp input. + * The recourse problem is of Sparse HiOp input. */ class PriDecRecourseProbleEx2Sparse : public hiop::hiopInterfaceSparse { public: PriDecRecourseProbleEx2Sparse(int n, int nS, int S) - : x_(nullptr), - xi_(nullptr), - nx_(n), - nS_(nS), - S_(S) + : x_(nullptr), + xi_(nullptr), + nx_(n), + nS_(nS), + S_(S) { - assert(nS_>=1); - assert(nx_>=nS_); // ny = nx = n - assert(S_>=1); + assert(nS_ >= 1); + assert(nx_ >= nS_); // ny = nx = n + assert(S_ >= 1); ny_ = nx_; } - PriDecRecourseProbleEx2Sparse(int n, - int nS, - int S, - const double* x, - const double* xi) - : nx_(n), - nS_(nS), - S_(S) + PriDecRecourseProbleEx2Sparse(int n, int nS, int S, const double* x, const double* xi) + : nx_(n), + nS_(nS), + S_(S) { - assert(nS_>=1); - assert(nx_>=nS_); // ny = nx = n - assert(S_>=1); + assert(nS_ >= 1); + assert(nx_ >= nS_); // ny = nx = n + assert(S_ >= 1); ny_ = nx_; xi_ = new double[nS_]; - memcpy(xi_, xi, nS_*sizeof(double)); + memcpy(xi_, xi, nS_ * sizeof(double)); x_ = new double[nx_]; - memcpy(x_, x, nx_*sizeof(double)); + memcpy(x_, x, nx_ * sizeof(double)); } - PriDecRecourseProbleEx2Sparse(int n, - int nS, - int S, - int idx, - const double* x, - const double* xi) - : nx_(n), - nS_(nS), - S_(S) + PriDecRecourseProbleEx2Sparse(int n, int nS, int S, int idx, const double* x, const double* xi) + : nx_(n), + nS_(nS), + S_(S) { - assert(nS_>=1); - assert(nx_>=nS_); // ny=nx=n - assert(S_>=1); + assert(nS_ >= 1); + assert(nx_ >= nS_); // ny=nx=n + assert(S_ >= 1); ny_ = nx_; xi_ = new double[nS_]; - memcpy(xi_, xi, nS_*sizeof(double)); + memcpy(xi_, xi, nS_ * sizeof(double)); x_ = new double[nx_]; - memcpy(x_,x, nx_*sizeof(double)); + memcpy(x_, x, nx_ * sizeof(double)); idx_ = idx; } @@ -94,41 +85,41 @@ class PriDecRecourseProbleEx2Sparse : public hiop::hiopInterfaceSparse /// Set the basecase solution `x` void set_x(const double* x) { - if(x_==NULL) { - x_ = new double[nx_]; + if(x_ == NULL) { + x_ = new double[nx_]; } - memcpy(x_, x, nx_*sizeof(double)); + memcpy(x_, x, nx_ * sizeof(double)); } /// Set the "sample" vector \xi - void set_center(const double *xi) + void set_center(const double* xi) { - if(xi_==NULL) { - xi_ = new double[nS_]; + if(xi_ == NULL) { + xi_ = new double[nS_]; } - memcpy(xi_, xi, nS_*sizeof(double)); + memcpy(xi_, xi, nS_ * sizeof(double)); } bool get_prob_sizes(size_type& n, size_type& m) { n = ny_; - m = ny_; - return true; + m = ny_; + return true; } - bool get_vars_info(const size_type& n, double *xlow, double* xupp, NonlinearityType* type) + bool get_vars_info(const size_type& n, double* xlow, double* xupp, NonlinearityType* type) { // y_1 bounded xlow[0] = 0.; xupp[0] = 1e20; - for(int i=1; i0); + assert(nx > 0); nnz_sparse_Jaceq = 0; - nnz_sparse_Jacineq = ny_+(ny_-1)*2; - + nnz_sparse_Jacineq = ny_ + (ny_ - 1) * 2; + nnz_sparse_Hess_Lagr = ny_; // Lagrangian return true; } bool eval_f(const size_type& n, const double* x, bool new_x, double& obj_value) { - assert(ny_==n); + assert(ny_ == n); obj_value = 0.; - for(int i=0;i=nS_); - for(int i=1;i= nS_); + for(int i = 1; i < nS_; i++) { + MJacS[nnzit] = 2 * (x[i] + xi_[i]); nnzit++; - //cons[m-1] += (x[i] + xi_[i])*(x[i] + xi_[i]); + // cons[m-1] += (x[i] + xi_[i])*(x[i] + xi_[i]); } - for(int i=nS_; i //for memcpy +#include //for memcpy #include #include @@ -19,7 +19,7 @@ using ex9_raja_reduce = hiop::ExecRajaPoliciesBackend: using ex9_raja_exec = hiop::ExecRajaPoliciesBackend::hiop_raja_exec; using ex9_raja_reduce = hiop::ExecRajaPoliciesBackend::hiop_raja_reduce; #else -//#if !defined(HIOP_USE_CUDA) && !defined(HIOP_USE_HIP) +// #if !defined(HIOP_USE_CUDA) && !defined(HIOP_USE_HIP) #include using ex9_raja_exec = hiop::ExecRajaPoliciesBackend::hiop_raja_exec; using ex9_raja_reduce = hiop::ExecRajaPoliciesBackend::hiop_raja_reduce; @@ -27,101 +27,91 @@ using ex9_raja_reduce = hiop::ExecRajaPoliciesBackend:: using namespace hiop; -/** This class provide an example of what a user of hiop::hiopInterfacePriDecProblem - * should implement in order to provide the recourse problem to +/** This class provide an example of what a user of hiop::hiopInterfacePriDecProblem + * should implement in order to provide the recourse problem to * hiop::hiopAlgPrimalDecomposition solver - * + * * For a given vector x\in R^n and \xi \in R^{n_S}, this example class implements * - * r_i(x;\xi^i) = 1/S * min_y 0.5 || y - x ||^2 such that - * - * (1-y_1 + \xi^i_1)^2 + \sum_{k=2}^{n_S} (y_k+\xi^i_k)^2 - * - * + \sum_{k=n_S+1}^{n_y} y_k^2 >= 1 //last one in the constraint implementation - * + * r_i(x;\xi^i) = 1/S * min_y 0.5 || y - x ||^2 such that + * + * (1-y_1 + \xi^i_1)^2 + \sum_{k=2}^{n_S} (y_k+\xi^i_k)^2 + * + * + \sum_{k=n_S+1}^{n_y} y_k^2 >= 1 //last one in the constraint implementation + * * y_k - y_{k-1} >=0, k=2, ..., n_y * * y_1 >=0 * - * The recourse problems are implemented in hiop sparse class with Raja to work in memory space. + * The recourse problems are implemented in hiop sparse class with Raja to work in memory space. */ class PriDecRecourseProbleEx2Sparse : public hiop::hiopInterfaceSparse { public: - PriDecRecourseProbleEx2Sparse(int n, int nS, int S, std::string mem_space) - : x_(nullptr), - xi_(nullptr), - nx_(n), - nS_(nS),S_(S), - mem_space_(mem_space) + PriDecRecourseProbleEx2Sparse(int n, int nS, int S, std::string mem_space) + : x_(nullptr), + xi_(nullptr), + nx_(n), + nS_(nS), + S_(S), + mem_space_(mem_space) { // Make sure mem_space_ is uppercase transform(mem_space_.begin(), mem_space_.end(), mem_space_.begin(), ::toupper); - //auto& resmgr = umpire::ResourceManager::getInstance(); - //umpire::Allocator allocator = resmgr.getAllocator(mem_space_); - // umpire::Allocator allocator - // = resmgr.getAllocator(mem_space_ == "DEFAULT" ? "HOST" : mem_space_); + // auto& resmgr = umpire::ResourceManager::getInstance(); + // umpire::Allocator allocator = resmgr.getAllocator(mem_space_); + // umpire::Allocator allocator + // = resmgr.getAllocator(mem_space_ == "DEFAULT" ? "HOST" : mem_space_); - assert(nS_>=1); - assert(nx_>=nS_); // ny = nx = n - assert(S_>=1); + assert(nS_ >= 1); + assert(nx_ >= nS_); // ny = nx = n + assert(S_ >= 1); ny_ = nx_; } - PriDecRecourseProbleEx2Sparse(int n, - int nS, - int S, - const double* x, - const double* xi, - std::string mem_space) - : nx_(n), - nS_(nS), - S_(S), - mem_space_(mem_space) + PriDecRecourseProbleEx2Sparse(int n, int nS, int S, const double* x, const double* xi, std::string mem_space) + : nx_(n), + nS_(nS), + S_(S), + mem_space_(mem_space) { - assert(nS_>=1); - assert(nx_>=nS_); - assert(S_>=1); + assert(nS_ >= 1); + assert(nx_ >= nS_); + assert(S_ >= 1); ny_ = nx_; - //for(int i=0;i(x); auto* xi_vec = const_cast(xi); auto& resmgr = umpire::ResourceManager::getInstance(); umpire::Allocator allocator = resmgr.getAllocator(mem_space_); - + xi_ = static_cast(allocator.allocate(nS_ * sizeof(double))); x_ = static_cast(allocator.allocate(nx_ * sizeof(double))); - + auto* str = allocator.getAllocationStrategy(); - resmgr.registerAllocation(x_vec, {x_vec,nx_*sizeof(double),str}); - resmgr.registerAllocation(xi_vec, {xi_vec,nS_*sizeof(double),str}); + resmgr.registerAllocation(x_vec, {x_vec, nx_ * sizeof(double), str}); + resmgr.registerAllocation(xi_vec, {xi_vec, nS_ * sizeof(double), str}); resmgr.copy(x_, x_vec); resmgr.copy(xi_, xi_vec); - - resmgr.deregisterAllocation(x_vec); - resmgr.deregisterAllocation(xi_vec); + + resmgr.deregisterAllocation(x_vec); + resmgr.deregisterAllocation(xi_vec); } - PriDecRecourseProbleEx2Sparse(int n, - int nS, - int S, - int idx, - const double* x, - const double* xi, - std::string mem_space) - : nx_(n), - nS_(nS), - S_(S), - mem_space_(mem_space) + PriDecRecourseProbleEx2Sparse(int n, int nS, int S, int idx, const double* x, const double* xi, std::string mem_space) + : nx_(n), + nS_(nS), + S_(S), + mem_space_(mem_space) { - assert(nS_>=1); - assert(nx_>=nS_); // ny=nx=n - assert(S_>=1); + assert(nS_ >= 1); + assert(nx_ >= nS_); // ny=nx=n + assert(S_ >= 1); ny_ = nx_; @@ -132,16 +122,16 @@ class PriDecRecourseProbleEx2Sparse : public hiop::hiopInterfaceSparse xi_ = static_cast(allocator.allocate(nS_ * sizeof(double))); x_ = static_cast(allocator.allocate(nx_ * sizeof(double))); - + auto* str = allocator.getAllocationStrategy(); - resmgr.registerAllocation(x_vec, {x_vec,nx_*sizeof(double),str}); - resmgr.registerAllocation(xi_vec, {xi_vec,nS_*sizeof(double),str}); - + resmgr.registerAllocation(x_vec, {x_vec, nx_ * sizeof(double), str}); + resmgr.registerAllocation(xi_vec, {xi_vec, nS_ * sizeof(double), str}); + resmgr.copy(x_, x_vec); resmgr.copy(xi_, xi_vec); - - resmgr.deregisterAllocation(x_vec); - resmgr.deregisterAllocation(xi_vec); + + resmgr.deregisterAllocation(x_vec); + resmgr.deregisterAllocation(xi_vec); idx_ = idx; } @@ -160,75 +150,68 @@ class PriDecRecourseProbleEx2Sparse : public hiop::hiopInterfaceSparse } // Set the basecase solution `x` - // Assuming 'x' is not assigned by umpire + // Assuming 'x' is not assigned by umpire void set_x(const double* x) { auto* x_vec = const_cast(x); auto& resmgr = umpire::ResourceManager::getInstance(); umpire::Allocator allocator = resmgr.getAllocator(mem_space_); - if(x_==NULL) { + if(x_ == NULL) { x_ = static_cast(allocator.allocate(nx_ * sizeof(double))); } auto* str = allocator.getAllocationStrategy(); - resmgr.registerAllocation(x_vec, {x_vec,nx_*sizeof(double),str}); - resmgr.copy(x_, x_vec); - resmgr.deregisterAllocation(x_vec); + resmgr.registerAllocation(x_vec, {x_vec, nx_ * sizeof(double), str}); + resmgr.copy(x_, x_vec); + resmgr.deregisterAllocation(x_vec); } /// Set the "sample" vector \xi - void set_center(const double *xi) + void set_center(const double* xi) { auto* xi_vec = const_cast(xi); auto& resmgr = umpire::ResourceManager::getInstance(); umpire::Allocator allocator = resmgr.getAllocator(mem_space_); - + if(xi_ == NULL) { xi_ = static_cast(allocator.allocate(nS_ * sizeof(double))); } auto* str = allocator.getAllocationStrategy(); - resmgr.registerAllocation(xi_vec, {xi_vec,nS_*sizeof(double),str}); + resmgr.registerAllocation(xi_vec, {xi_vec, nS_ * sizeof(double), str}); resmgr.copy(xi_, xi_vec); - resmgr.deregisterAllocation(xi_vec); + resmgr.deregisterAllocation(xi_vec); } bool get_prob_sizes(size_type& n, size_type& m) { n = ny_; - m = ny_; - return true; + m = ny_; + return true; } - bool get_vars_info(const size_type& n, double *xlow, double* xupp, NonlinearityType* type) - { - - RAJA::forall(RAJA::RangeSegment(0, n), - RAJA_LAMBDA(RAJA::Index_type i) - { - if(i == 0) { - xlow[i] = 0.; //y_1 bounded - } - else { - xlow[i] = -1e+20; - } - }); - - RAJA::forall(RAJA::RangeSegment(0, n), - RAJA_LAMBDA(RAJA::Index_type i) - { - if(i == 0) { - xupp[i] = 1e20; - } - else { - xupp[i] = 1e+20; - } - }); - - RAJA::forall(RAJA::RangeSegment(0, n), - [=] (RAJA::Index_type i) - { - type[i] = hiopNonlinear; - }); + bool get_vars_info(const size_type& n, double* xlow, double* xupp, NonlinearityType* type) + { + RAJA::forall( + RAJA::RangeSegment(0, n), + RAJA_LAMBDA(RAJA::Index_type i) { + if(i == 0) { + xlow[i] = 0.; // y_1 bounded + } else { + xlow[i] = -1e+20; + } + }); + + RAJA::forall( + RAJA::RangeSegment(0, n), + RAJA_LAMBDA(RAJA::Index_type i) { + if(i == 0) { + xupp[i] = 1e20; + } else { + xupp[i] = 1e+20; + } + }); + + RAJA::forall(RAJA::RangeSegment(0, n), [=](RAJA::Index_type i) { type[i] = hiopNonlinear; }); return true; } @@ -236,155 +219,142 @@ class PriDecRecourseProbleEx2Sparse : public hiop::hiopInterfaceSparse { assert(m == ny_); const auto d_ny = ny_; - RAJA::forall(RAJA::RangeSegment(0, ny_), - RAJA_LAMBDA(RAJA::Index_type i) - { - if(i == d_ny-1) { - clow[d_ny-1] = 1.; - cupp[d_ny-1] = 1e20; - } else { - clow[i] = 0.; - cupp[i] = 1e20; - } - }); - - //clow[ny_-1] = 0.; + RAJA::forall( + RAJA::RangeSegment(0, ny_), + RAJA_LAMBDA(RAJA::Index_type i) { + if(i == d_ny - 1) { + clow[d_ny - 1] = 1.; + cupp[d_ny - 1] = 1e20; + } else { + clow[i] = 0.; + cupp[i] = 1e20; + } + }); + + // clow[ny_-1] = 0.; return true; } - bool get_sparse_blocks_info(size_type& nx, + bool get_sparse_blocks_info(size_type& nx, size_type& nnz_sparse_Jaceq, size_type& nnz_sparse_Jacineq, - size_type& nnz_sparse_Hess_Lagr) + size_type& nnz_sparse_Hess_Lagr) { nx = ny_; - assert(nx>0); + assert(nx > 0); nnz_sparse_Jaceq = 0; - nnz_sparse_Jacineq = ny_+(ny_-1)*2; - - nnz_sparse_Hess_Lagr = ny_; //Lagrangian + nnz_sparse_Jacineq = ny_ + (ny_ - 1) * 2; + + nnz_sparse_Hess_Lagr = ny_; // Lagrangian return true; } bool eval_f(const size_type& n, const double* x, bool new_x, double& obj_value) { - assert(ny_==n); + assert(ny_ == n); obj_value = 0.; - RAJA::ReduceSum aux(0); //why do we need reducesum? + RAJA::ReduceSum aux(0); // why do we need reducesum? const auto d_x = x_; - RAJA::forall(RAJA::RangeSegment(0, n), - RAJA_LAMBDA(RAJA::Index_type i) - { - aux += (x[i]-d_x[i])*(x[i]-d_x[i]); - }); + RAJA::forall( + RAJA::RangeSegment(0, n), + RAJA_LAMBDA(RAJA::Index_type i) { aux += (x[i] - d_x[i]) * (x[i] - d_x[i]); }); obj_value += aux.get(); obj_value *= 0.5; return true; } - - bool eval_cons(const size_type& n, - const size_type& m, - const double* x, - bool new_x, - double* cons) + + bool eval_cons(const size_type& n, const size_type& m, const double* x, bool new_x, double* cons) { // return false so that HiOp will rely on the constraint evaluator defined above return false; } - bool eval_cons(const size_type& n, - const size_type& m, - const size_type& num_cons, - const index_type* idx_cons, - const double* x, - bool new_x, + bool eval_cons(const size_type& n, + const size_type& m, + const size_type& num_cons, + const index_type* idx_cons, + const double* x, + bool new_x, double* cons) { - assert(n==ny_); assert(m==ny_); - assert(num_cons==ny_||num_cons==0); - if(num_cons==0) { + assert(n == ny_); + assert(m == ny_); + assert(num_cons == ny_ || num_cons == 0); + if(num_cons == 0) { return true; } - RAJA::forall(RAJA::RangeSegment(0, m), - RAJA_LAMBDA(RAJA::Index_type i) - { - cons[i]=0.; - }); + RAJA::forall(RAJA::RangeSegment(0, m), RAJA_LAMBDA(RAJA::Index_type i) { cons[i] = 0.; }); - const auto *d_xi = xi_; + const auto* d_xi = xi_; const auto d_nx = nx_; const auto d_nS = nS_; - RAJA::forall(RAJA::RangeSegment(0, num_cons), - RAJA_LAMBDA(RAJA::Index_type irow) - { - const int con_idx = (int) idx_cons[irow]; - if(con_idx(RAJA::RangeSegment(1, nS_), - // RAJA_LAMBDA(RAJA::Index_type i) - // { - // Using a for loop inside a RAJA loop: - for (int i=1; i< d_nS; i++) { - cons[m-1] += (x[i] + d_xi[i])*(x[i] + d_xi[i]); - } - //}); - //RAJA::forall(RAJA::RangeSegment(nS_, nx_), - // RAJA_LAMBDA(RAJA::Index_type i) - //{ - for (int i=d_nS; i< d_nx; i++) { - cons[m-1] += x[i]*x[i]; - } - //}); - } - }); - return true; + RAJA::forall( + RAJA::RangeSegment(0, num_cons), + RAJA_LAMBDA(RAJA::Index_type irow) { + const int con_idx = (int)idx_cons[irow]; + if(con_idx < m - 1) { + cons[con_idx] = x[con_idx + 1] - x[con_idx]; + } else { + assert(con_idx == m - 1); + cons[m - 1] = (1 - x[0] + d_xi[0]) * (1 - x[0] + d_xi[0]); + // RAJA::forall(RAJA::RangeSegment(1, nS_), + // RAJA_LAMBDA(RAJA::Index_type i) + // { + // Using a for loop inside a RAJA loop: + for(int i = 1; i < d_nS; i++) { + cons[m - 1] += (x[i] + d_xi[i]) * (x[i] + d_xi[i]); + } + //}); + // RAJA::forall(RAJA::RangeSegment(nS_, nx_), + // RAJA_LAMBDA(RAJA::Index_type i) + //{ + for(int i = d_nS; i < d_nx; i++) { + cons[m - 1] += x[i] * x[i]; + } + //}); + } + }); + return true; } - - // r_i(x;\xi^i) = 1/S * min_y 0.5 || y - x ||^2 such that + + // r_i(x;\xi^i) = 1/S * min_y 0.5 || y - x ||^2 such that bool eval_grad_f(const size_type& n, const double* x, bool new_x, double* gradf) { - assert(ny_==n); + assert(ny_ == n); const auto d_x = x_; - RAJA::forall(RAJA::RangeSegment(0, nx_), - RAJA_LAMBDA(RAJA::Index_type i) - { - gradf[i] = (x[i]-d_x[i]); - }); + RAJA::forall(RAJA::RangeSegment(0, nx_), RAJA_LAMBDA(RAJA::Index_type i) { gradf[i] = (x[i] - d_x[i]); }); return true; } - + bool eval_Jac_cons(const size_type& n, - const size_type& m, - const double* x, - bool new_x, - const size_type& nnzJacS, - index_type* iJacS, - index_type* jJacS, - double* MJacS) + const size_type& m, + const double* x, + bool new_x, + const size_type& nnzJacS, + index_type* iJacS, + index_type* jJacS, + double* MJacS) { return false; } - bool eval_Jac_cons(const size_type& n, - const size_type& m, - const size_type& num_cons, + bool eval_Jac_cons(const size_type& n, + const size_type& m, + const size_type& num_cons, const index_type* idx_cons, - const double* x, + const double* x, bool new_x, - const size_type& nnzJacS, - index_type* iJacS, - index_type* jJacS, - double* MJacS) + const size_type& nnzJacS, + index_type* iJacS, + index_type* jJacS, + double* MJacS) { - assert(num_cons==nx_||num_cons==0); + assert(num_cons == nx_ || num_cons == 0); // indexes for sparse part - if(num_cons==0) { + if(num_cons == 0) { return true; } @@ -392,143 +362,127 @@ class PriDecRecourseProbleEx2Sparse : public hiop::hiopInterfaceSparse // umpire::Allocator allocator = resmgr.getAllocator(mem_space_); // index_type* nnzit = static_cast(allocator.allocate(1 * sizeof(index_type))); // resmgr.memset(nnzit, 0); - - if(iJacS!=NULL && jJacS!=NULL) { + + if(iJacS != NULL && jJacS != NULL) { const auto d_ny = ny_; - RAJA::forall(RAJA::RangeSegment(0, num_cons), - RAJA_LAMBDA (RAJA::Index_type itrow) - { - const int con_idx = (int) idx_cons[itrow]; - if(con_idx(RAJA::RangeSegment(1, m), - // [=] __device__ (RAJA::Index_type i) - //{ - for (int i=1; i( + RAJA::RangeSegment(0, num_cons), + RAJA_LAMBDA(RAJA::Index_type itrow) { + const int con_idx = (int)idx_cons[itrow]; + if(con_idx < d_ny - 1) { + // sparse Jacobian eq w.r.t. x and s + // y_k + iJacS[2 * itrow] = con_idx; + jJacS[2 * itrow] = con_idx; //-1 + // nnzit[0] = nnzit[0] + 1; + + // y_{k+1} + iJacS[2 * itrow + 1] = con_idx; + jJacS[2 * itrow + 1] = con_idx + 1; // 1 + // nnzit[0] += 1; // nnzit does nto work inside RAJA loop + } else if(con_idx == m - 1) { + assert(itrow == m - 1); + iJacS[2 * (m - 1)] = m - 1; + jJacS[2 * (m - 1)] = 0; + // nnzit[0] += 1; + // cons[m-1] = (1-x[0]+xi_[0]); + + // RAJA::forall(RAJA::RangeSegment(1, m), + // [=] __device__ (RAJA::Index_type i) + //{ + for(int i = 1; i < m; i++) { + iJacS[2 * (m - 1) + i] = m - 1; + jJacS[2 * (m - 1) + i] = i; + // nnzit[0] += 1; + } + // cons[m-1] += x[i]*x[i]; + // sparse Jacobian ineq w.r.t x and s + } + }); + assert(2 * (m - 1) + m == nnzJacS); // utilizing the structure of the nonzeros directly // assert(nnzit[0]==nnzJacS); } - //values for sparse Jacobian if requested by the solver - if(MJacS!=NULL) { - //nnzit[0] = 0; - const auto *d_xi = xi_; + // values for sparse Jacobian if requested by the solver + if(MJacS != NULL) { + // nnzit[0] = 0; + const auto* d_xi = xi_; const auto d_nS = nS_; - RAJA::forall(RAJA::RangeSegment(0, num_cons), - RAJA_LAMBDA (RAJA::Index_type itrow) - { - const int con_idx = (int) idx_cons[itrow]; - if(con_idx=d_nS); - for(int i=1; i( + RAJA::RangeSegment(0, num_cons), + RAJA_LAMBDA(RAJA::Index_type itrow) { + const int con_idx = (int)idx_cons[itrow]; + if(con_idx < m - 1) { + // sparse Jacobian eq w.r.t. x and s + // yk+1 + MJacS[2 * itrow] = -1.; + // nnzit[0] += 1; + // yk + MJacS[2 * itrow + 1] = 1.; + // nnzit[0] += 1; + } else if(con_idx == m - 1) { + assert(itrow == m - 1); + MJacS[2 * (m - 1)] = -2 * (1 - x[0] + d_xi[0]); + // nnzit[0] += 1; + // cons[m-1] = (1-x[0]+xi_[0])^2; + assert(m >= d_nS); + for(int i = 1; i < d_nS; i++) { + MJacS[2 * (m - 1) + i] = 2 * (x[i] + d_xi[i]); + // nnzit[0] += 1; + } + // cons[m-1] += (x[i] + xi_[i])*(x[i] + xi_[i]); + for(int i = d_nS; i < m; i++) { + MJacS[2 * (m - 1) + i] = 2 * x[i]; + // nnzit[0] += 1; + // cons[m-1] += x[i]*x[i]; + } + // sparse Jacobian ineq w.r.t x and s + } + }); + assert(2 * (m - 1) + m == nnzJacS); } // allocator.deallocate(nnzit); // assert("for debugging" && false); //for debugging purpose return true; } - - bool eval_Hess_Lagr(const size_type& n, - const size_type& m, - const double* x, - bool new_x, + + bool eval_Hess_Lagr(const size_type& n, + const size_type& m, + const double* x, + bool new_x, const double& obj_factor, - const double* lambda, + const double* lambda, bool new_lambda, - const int& nnzHSS, - int* iHSS, - int* jHSS, - double* MHSS) + const int& nnzHSS, + int* iHSS, + int* jHSS, + double* MHSS) { - assert(nnzHSS==m); - if(iHSS!=NULL && jHSS!=NULL) { - RAJA::forall(RAJA::RangeSegment(0, m), - RAJA_LAMBDA(RAJA::Index_type i) - { - iHSS[i] = jHSS[i] = i; - }); + assert(nnzHSS == m); + if(iHSS != NULL && jHSS != NULL) { + RAJA::forall(RAJA::RangeSegment(0, m), RAJA_LAMBDA(RAJA::Index_type i) { iHSS[i] = jHSS[i] = i; }); } - if(MHSS!=NULL) { - RAJA::forall(RAJA::RangeSegment(0, m), - RAJA_LAMBDA(RAJA::Index_type i) - { - MHSS[i] = obj_factor; - }); - MHSS[0] += 2*lambda[m-1]; - RAJA::forall(RAJA::RangeSegment(1, m), - RAJA_LAMBDA(RAJA::Index_type i) - { - MHSS[i] += lambda[m-1]* 2.; - }); + if(MHSS != NULL) { + RAJA::forall(RAJA::RangeSegment(0, m), RAJA_LAMBDA(RAJA::Index_type i) { MHSS[i] = obj_factor; }); + MHSS[0] += 2 * lambda[m - 1]; + RAJA::forall( + RAJA::RangeSegment(1, m), + RAJA_LAMBDA(RAJA::Index_type i) { MHSS[i] += lambda[m - 1] * 2.; }); } return true; } /* Implementation of the primal starting point specification */ bool get_starting_point(const size_type& global_n, double* x0) - { - assert(global_n==nx_); - RAJA::forall(RAJA::RangeSegment(0, global_n), - RAJA_LAMBDA(RAJA::Index_type i) - { - x0[i]=1.; - }); + { + assert(global_n == nx_); + RAJA::forall(RAJA::RangeSegment(0, global_n), RAJA_LAMBDA(RAJA::Index_type i) { x0[i] = 1.; }); return true; } bool get_starting_point(const size_type& n, const size_type& m, double* x0, bool& duals_avail, - double* z_bndL0, + double* z_bndL0, double* z_bndU0, double* lambda0, bool& slacks_avail, @@ -542,7 +496,7 @@ class PriDecRecourseProbleEx2Sparse : public hiop::hiopInterfaceSparse bool get_warmstart_point(const size_type& n, const size_type& m, double* x0, - double* z_bndL0, + double* z_bndL0, double* z_bndU0, double* lambda0, double* ineq_slack, @@ -557,23 +511,19 @@ class PriDecRecourseProbleEx2Sparse : public hiop::hiopInterfaceSparse * which is the x_ in the protected variable, while x in the function implementation * represents y in the problem description. */ - bool compute_gradx(const int n, const double* y, double* gradx) + bool compute_gradx(const int n, const double* y, double* gradx) { - assert(nx_==n); - const auto *d_x = x_; - RAJA::forall(RAJA::RangeSegment(0, nx_), - RAJA_LAMBDA(RAJA::Index_type i) - { - gradx[i] = (d_x[i]-y[i]); - }); + assert(nx_ == n); + const auto* d_x = x_; + RAJA::forall(RAJA::RangeSegment(0, nx_), RAJA_LAMBDA(RAJA::Index_type i) { gradx[i] = (d_x[i] - y[i]); }); return true; }; /** - * Returns COMM_SELF communicator since this example is only intended to run - * on one MPI process + * Returns COMM_SELF communicator since this example is only intended to run + * on one MPI process */ - bool get_MPI_comm(MPI_Comm& comm_out) + bool get_MPI_comm(MPI_Comm& comm_out) { comm_out = MPI_COMM_SELF; return true; @@ -582,7 +532,7 @@ class PriDecRecourseProbleEx2Sparse : public hiop::hiopInterfaceSparse protected: double* x_; double* xi_; - int nx_; // n_ = nx = ny + int nx_; // n_ = nx = ny int ny_; int nS_; int S_; diff --git a/src/Drivers/PriDec/NlpPriDecEx3Sparse.hpp b/src/Drivers/PriDec/NlpPriDecEx3Sparse.hpp index a73ceb728..395a4ab89 100644 --- a/src/Drivers/PriDec/NlpPriDecEx3Sparse.hpp +++ b/src/Drivers/PriDec/NlpPriDecEx3Sparse.hpp @@ -1,15 +1,15 @@ #include -#include //for memcpy +#include //for memcpy #include #include "hiopInterface.hpp" #include #include #ifndef HIOP_EXAMPLE_PRIDEC_EX3 -#define HIOP_EXAMPLE_PRIDEC_EX3 +#define HIOP_EXAMPLE_PRIDEC_EX3 /** This is the full problem defined directly in hiopSparse to test the result of hiopAlgPriDecomp - * Base case from SparseEx1. + * Base case from SparseEx1. * min sum 1/4* { (x_{i}-1)^4 : i=1,...,n} + sum_{i=1^S} 1/S * min_y 0.5 || y - x ||^2 * s.t. * 4*x_1 + 2*x_2 == 10 @@ -21,127 +21,158 @@ * x_i >=0.5, i=4,...,n * For each i=1...S, an independent y^i, n_y=n_x - * (1-y^i_1 + \xi^i_1)^2 + \sum_{k=2}^{n_S} (y^i_k+\xi^i_k)^2 - * + \sum_{k=n_S+1}^{n_y} y_k^{i,2} >= 1 //last one in the constraint implementation - * + * (1-y^i_1 + \xi^i_1)^2 + \sum_{k=2}^{n_S} (y^i_k+\xi^i_k)^2 + * + \sum_{k=n_S+1}^{n_y} y_k^{i,2} >= 1 //last one in the constraint implementation + * * y^i_k - y^i_{k-1} >=0, k=2, ..., n_y * * y^i_1 >=0 */ - using namespace hiop; class SparseEx3 : public hiop::hiopInterfaceSparse { public: - SparseEx3(int nx,int S) - : n_vars_(nx+S*nx), - n_cons_{2+S*nx}, - nx_(nx), - S_(S) //total number of variables should be nx+S*nx + SparseEx3(int nx, int S) + : n_vars_(nx + S * nx), + n_cons_{2 + S * nx}, + nx_(nx), + S_(S) // total number of variables should be nx+S*nx { - assert(nx>=3); - if(nx>3) { - n_cons_ += nx-3; + assert(nx >= 3); + if(nx > 3) { + n_cons_ += nx - 3; } - nS_ = int(nx_/2); - xi_ = new double[S_*nS_]; + nS_ = int(nx_ / 2); + xi_ = new double[S_ * nS_]; x0_ = new double[nx_]; - for(int i=0;i=3); - if(nx>3) { - n_cons_ += nx-3; + assert(nx >= 3); + if(nx > 3) { + n_cons_ += nx - 3; } - xi_ = new double[nS_*S_]; + xi_ = new double[nS_ * S_]; x0_ = new double[nx_]; - for(int i=0;i=3), which are bounded only from below - xlow[i]= 0.5; xupp[i]=1e20; type[i]=hiopNonlinear; + assert(n == n_vars_); + for(size_type i = 0; i < nx_; i++) { + if(i == 0) { + xlow[i] = -1e20; + xupp[i] = 1e20; + type[i] = hiopNonlinear; + continue; + } + if(i == 1) { + xlow[i] = 0.0; + xupp[i] = 1e20; + type[i] = hiopNonlinear; + continue; + } + if(i == 2) { + xlow[i] = 1.5; + xupp[i] = 10.0; + type[i] = hiopNonlinear; + continue; + } + // this is for x_4, x_5, ... , x_n (i>=3), which are bounded only from below + xlow[i] = 0.5; + xupp[i] = 1e20; + type[i] = hiopNonlinear; } - for(size_type i=0; i3){assert(conidx==2+nx_-3);}//nx_-1 - for(size_type i=0;i 3) { + assert(conidx == 2 + nx_ - 3); + } // nx_-1 + for(size_type i = 0; i < S_; i++) { + for(size_type j = 0; j < nx_ - 1; j++) { + clow[conidx + nx_ * i + j] = 0.; + cupp[conidx + nx_ * i + j] = 1e20; } - clow[conidx+nx_*i+nx_-1] = 1.; - cupp[conidx+nx_*i+nx_-1] = 1e20; + clow[conidx + nx_ * i + nx_ - 1] = 1.; + cupp[conidx + nx_ * i + nx_ - 1] = 1e20; } return true; } - - bool get_sparse_blocks_info(int& n, - int& nnz_sparse_Jaceq, - int& nnz_sparse_Jacineq, - int& nnz_sparse_Hess_Lagr) + + bool get_sparse_blocks_info(int& n, int& nnz_sparse_Jaceq, int& nnz_sparse_Jacineq, int& nnz_sparse_Hess_Lagr) { n = n_vars_; nnz_sparse_Jaceq = 2; - nnz_sparse_Jacineq = 2+2*(nx_-3)+S_*(nx_+ 2*(nx_-1)) ; - nnz_sparse_Hess_Lagr = nx_+S_*nx_+S_*nx_; //this variable should always be <= n_vars_ + nnz_sparse_Jacineq = 2 + 2 * (nx_ - 3) + S_ * (nx_ + 2 * (nx_ - 1)); + nnz_sparse_Hess_Lagr = nx_ + S_ * nx_ + S_ * nx_; // this variable should always be <= n_vars_ return true; } bool eval_f(const size_type& n, const double* x, bool new_x, double& obj_value) { - assert(n==n_vars_); - obj_value=0.; - for(int i=0;i 4*x_1 + 2*x_2 == 10 - cons[conidx++] += 4*x[0] + 2*x[1]; + // compute the constraint one by one. + // --- constraint 1 body ---> 4*x_1 + 2*x_2 == 10 + cons[conidx++] += 4 * x[0] + 2 * x[1]; // --- constraint 2 body ---> 2*x_1 + x_3 - cons[conidx++] += 2*x[0] + 1*x[2]; + cons[conidx++] += 2 * x[0] + 1 * x[2]; // --- constraint 3 body ---> 2*x_1 + 0.5*x_i, for i>=4 - for(auto i=3; i=3); - //2*(n-1) for basecase + assert(n == n_vars_); + assert(m == n_cons_); + assert(n >= 3); + // 2*(n-1) for basecase - assert(nnzJacS == 2*(nx_-1)+S_*(2*(nx_-1)+nx_)); + assert(nnzJacS == 2 * (nx_ - 1) + S_ * (2 * (nx_ - 1) + nx_)); int nnzit{0}; size_type conidx{0}; - if(iJacS!=NULL && jJacS!=NULL) { + if(iJacS != NULL && jJacS != NULL) { // --- constraint 1 body ---> 4*x_1 + 2*x_2 == 10 - iJacS[nnzit] = conidx; jJacS[nnzit++] = 0; - iJacS[nnzit] = conidx; jJacS[nnzit++] = 1; + iJacS[nnzit] = conidx; + jJacS[nnzit++] = 0; + iJacS[nnzit] = conidx; + jJacS[nnzit++] = 1; conidx++; // --- constraint 2 body ---> 2*x_1 + x_3 - iJacS[nnzit] = conidx; jJacS[nnzit++] = 0; - iJacS[nnzit] = conidx; jJacS[nnzit++] = 2; + iJacS[nnzit] = conidx; + jJacS[nnzit++] = 0; + iJacS[nnzit] = conidx; + jJacS[nnzit++] = 2; conidx++; // --- constraint 3 body ---> 2*x_1 + 0.5*x_i, for i>=4 - for(auto i=3; i 4*x_1 + 2*x_2 == 10 MJacS[nnzit++] = 4; MJacS[nnzit++] = 2; @@ -297,25 +335,25 @@ class SparseEx3 : public hiop::hiopInterfaceSparse MJacS[nnzit++] = 1; // --- constraint 3 body ---> 2*x_1 + 0.5*x_4 - for(auto i=3; i /** - * Driver for Sparse Example 3 that illustrates the use of hiop::hiopAlgPrimalDecomposition - * + * Driver for Sparse Example 3 that illustrates the use of hiop::hiopAlgPrimalDecomposition + * * @note This example is built only when HIOP_USE_MPI and HIOP_SPARSE are enabled during cmake build * and require at least two MPI ranks in MPI_COMM_WORLD. * */ // -//TODO: add -selfcheck option (see other drivers) and add the driver to cmake tests +// TODO: add -selfcheck option (see other drivers) and add the driver to cmake tests // using namespace hiop; - int main(int argc, char **argv) { - int rank=0; + int rank = 0; #ifdef HIOP_USE_MPI MPI_Init(&argc, &argv); int comm_size; - int ierr = MPI_Comm_size(MPI_COMM_WORLD, &comm_size); assert(MPI_SUCCESS==ierr); - ierr = MPI_Comm_rank(MPI_COMM_WORLD, &rank); assert(MPI_SUCCESS==ierr); - double t3 = MPI_Wtime(); - double t4 = 0.; + int ierr = MPI_Comm_size(MPI_COMM_WORLD, &comm_size); + assert(MPI_SUCCESS == ierr); + ierr = MPI_Comm_rank(MPI_COMM_WORLD, &rank); + assert(MPI_SUCCESS == ierr); + double t3 = MPI_Wtime(); + double t4 = 0.; #endif #ifdef HIOP_USE_MAGMA @@ -44,40 +44,35 @@ int main(int argc, char **argv) #endif int nx = 1000; int S = 1920; - int nS = 5; - double x[nx+S*nx]; + int nS = 5; + double x[nx + S * nx]; - SparseEx3 nlp_interface(nx,S,nS); + SparseEx3 nlp_interface(nx, S, nS); hiopNlpSparse nlp(nlp_interface); - nlp.options->SetStringValue("compute_mode", "cpu");// using CPU only in computations + nlp.options->SetStringValue("compute_mode", "cpu"); // using CPU only in computations nlp.options->SetStringValue("KKTLinsys", "xdycyd"); - //nlp.options->SetStringValue("KKTLinsys", "full"); + // nlp.options->SetStringValue("KKTLinsys", "full"); hiopAlgFilterIPMNewton solver(&nlp); hiopSolveStatus status = solver.run(); double obj_value = solver.getObjective(); - + solver.getSolution(x); - for(int i=0;iSetStringValue("Hessian", "analytical_exact"); - - nlp.options->SetStringValue("duals_update_type", "linear"); - + + nlp.options->SetStringValue("duals_update_type", "linear"); + nlp.options->SetStringValue("compute_mode", "cpu"); nlp.options->SetStringValue("KKTLinsys", "xdycyd"); - //nlp.options->SetStringValue("write_kkt", "yes"); + // nlp.options->SetStringValue("write_kkt", "yes"); nlp.options->SetNumericValue("mu0", 0.1); nlp.options->SetStringValue("options_file_fr_prob", "hiop_fr_ci.options"); @@ -238,12 +251,12 @@ int main(int argc, char **argv) nlp.options->SetStringValue("linear_solver_sparse", "ginkgo"); nlp.options->SetStringValue("fact_acceptor", "inertia_free"); nlp.options->SetIntegerValue("ir_outer_maxit", 0); - if (use_ginkgo_cuda) { - nlp.options->SetStringValue("ginkgo_exec", "cuda"); - } else if (use_ginkgo_hip) { - nlp.options->SetStringValue("ginkgo_exec", "hip"); + if(use_ginkgo_cuda) { + nlp.options->SetStringValue("ginkgo_exec", "cuda"); + } else if(use_ginkgo_hip) { + nlp.options->SetStringValue("ginkgo_exec", "hip"); } else { - nlp.options->SetStringValue("ginkgo_exec", "reference"); + nlp.options->SetStringValue("ginkgo_exec", "reference"); } } if(force_fr) { @@ -254,11 +267,9 @@ int main(int argc, char **argv) hiopSolveStatus status = solver.run(); double obj_value = solver.getObjective(); - if(status<0) { - if(rank==0) { - printf("solver returned negative solve status: %d (obj. is %18.12e)\n", - status, - obj_value); + if(status < 0) { + if(rank == 0) { + printf("solver returned negative solve status: %d (obj. is %18.12e)\n", status, obj_value); } #ifdef HIOP_USE_MPI MPI_Finalize(); @@ -266,7 +277,7 @@ int main(int argc, char **argv) return -1; } - //this is used for "regression" testing when the driver is called with -selfcheck + // this is used for "regression" testing when the driver is called with -selfcheck if(selfCheck) { if(!self_check(n, obj_value)) { #ifdef HIOP_USE_MPI @@ -275,7 +286,7 @@ int main(int argc, char **argv) return -1; } } else { - if(rank==0) { + if(rank == 0) { printf("Optimal objective: %22.14e. Solver status: %d\n", obj_value, status); } } @@ -284,35 +295,39 @@ int main(int argc, char **argv) MPI_Finalize(); #endif - return 0; } - static bool self_check(size_type n, double objval) { -#define num_n_saved 3 //keep this is sync with n_saved and objval_saved +#define num_n_saved 3 // keep this is sync with n_saved and objval_saved const size_type n_saved[] = {50, 500, 5000}; const double objval_saved[] = {1.10351564683176e-01, 1.10351566513480e-01, 1.10351578644469e-01}; #define relerr 1e-6 - bool found=false; - for(int it=0; it relerr) { - printf("selfcheck failure. Objective (%18.12e) does not agree (%d digits) with the saved value (%18.12e) for n=%d.\n", - objval, -(int)log10(relerr), objval_saved[it], n); + bool found = false; + for(int it = 0; it < num_n_saved; it++) { + if(n_saved[it] == n) { + found = true; + if(fabs((objval_saved[it] - objval) / (1 + objval_saved[it])) > relerr) { + printf( + "selfcheck failure. Objective (%18.12e) does not agree (%d digits) with the saved value (%18.12e) for n=%d.\n", + objval, + -(int)log10(relerr), + objval_saved[it], + n); return false; } else { - printf("selfcheck success (%d digits)\n", -(int)log10(relerr)); + printf("selfcheck success (%d digits)\n", -(int)log10(relerr)); } break; } } if(!found) { - printf("selfcheck: driver does not have the objective for n=%d saved. BTW, obj=%18.12e was obtained for this n.\n", n, objval); + printf("selfcheck: driver does not have the objective for n=%d saved. BTW, obj=%18.12e was obtained for this n.\n", + n, + objval); return false; } diff --git a/src/Drivers/Sparse/NlpSparseEx2.cpp b/src/Drivers/Sparse/NlpSparseEx2.cpp index 512f0f2a5..e7c0547f2 100644 --- a/src/Drivers/Sparse/NlpSparseEx2.cpp +++ b/src/Drivers/Sparse/NlpSparseEx2.cpp @@ -1,7 +1,7 @@ #include "NlpSparseEx2.hpp" #include -#include //for memcpy +#include //for memcpy #include /** Nonlinear *highly nonconvex* and *rank deficient* problem test for the Filter IPM @@ -28,53 +28,50 @@ * */ SparseEx2::SparseEx2(int n, bool convex_obj, bool rankdefic_Jac_eq, bool rankdefic_Jac_ineq, double scal_neg_obj) - : n_vars_{n}, - n_cons_{2}, - convex_obj_{convex_obj}, - rankdefic_eq_(rankdefic_Jac_eq), - rankdefic_ineq_(rankdefic_Jac_ineq), - scal_neg_obj_{scal_neg_obj} + : n_vars_{n}, + n_cons_{2}, + convex_obj_{convex_obj}, + rankdefic_eq_(rankdefic_Jac_eq), + rankdefic_ineq_(rankdefic_Jac_ineq), + scal_neg_obj_{scal_neg_obj} { - assert(n>=3); - if(n>3) - n_cons_ += n-3; + assert(n >= 3); + if(n > 3) n_cons_ += n - 3; n_cons_ += rankdefic_eq_ + rankdefic_ineq_; } -SparseEx2::~SparseEx2() -{ -} +SparseEx2::~SparseEx2() {} bool SparseEx2::get_prob_sizes(size_type& n, size_type& m) { - n=n_vars_; - m=n_cons_; + n = n_vars_; + m = n_cons_; return true; } -bool SparseEx2::get_vars_info(const size_type& n, double *xlow, double* xupp, NonlinearityType* type) +bool SparseEx2::get_vars_info(const size_type& n, double* xlow, double* xupp, NonlinearityType* type) { - assert(n==n_vars_); - for(index_type i=0; i=3), which are bounded only from below + // this is for x_4, x_5, ... , x_n (i>=3), which are bounded only from below xlow[i] = 0.5; xupp[i] = 1e20; type[i] = hiopNonlinear; @@ -84,60 +81,62 @@ bool SparseEx2::get_vars_info(const size_type& n, double *xlow, double* xupp, No bool SparseEx2::get_cons_info(const size_type& m, double* clow, double* cupp, NonlinearityType* type) { - assert(m==n_cons_); + assert(m == n_cons_); index_type conidx{0}; - clow[conidx]= 10.0; cupp[conidx]= 10.0; type[conidx++]=hiopInterfaceBase::hiopLinear; - clow[conidx]= 5.0; cupp[conidx]= 1e20; type[conidx++]=hiopInterfaceBase::hiopLinear; - for(index_type i=3; i 4*x_1 + 2*x_2 == 10 - cons[conidx++] += 4*x[0] + 2*x[1]; + // compute the constraint one by one. + // --- constraint 1 body ---> 4*x_1 + 2*x_2 == 10 + cons[conidx++] += 4 * x[0] + 2 * x[1]; // --- constraint 2 body ---> 2*x_1 + x_3 - cons[conidx++] += 2*x[0] + 1*x[2]; + cons[conidx++] += 2 * x[0] + 1 * x[2]; // --- constraint 3 body ---> 2*x_1 + 0.5*x_i, for i>=4 - for(auto i=3; i=3); - - assert(nnzJacS == 4 + 2*(n-3) + 2*rankdefic_eq_ + 2*rankdefic_ineq_); - - - int nnzit{0}; - index_type conidx{0}; - - if(iJacS!=NULL && jJacS!=NULL){ - // --- constraint 1 body ---> 4*x_1 + 2*x_2 == 10 - iJacS[nnzit] = conidx; jJacS[nnzit++] = 0; - iJacS[nnzit] = conidx; jJacS[nnzit++] = 1; - conidx++; - - // --- constraint 2 body ---> 2*x_1 + x_3 - iJacS[nnzit] = conidx; jJacS[nnzit++] = 0; - iJacS[nnzit] = conidx; jJacS[nnzit++] = 2; - conidx++; - - // --- constraint 3 body ---> 2*x_1 + 0.5*x_i, for i>=4 - for(auto i=3; i= 3); + + assert(nnzJacS == 4 + 2 * (n - 3) + 2 * rankdefic_eq_ + 2 * rankdefic_ineq_); + + int nnzit{0}; + index_type conidx{0}; + + if(iJacS != NULL && jJacS != NULL) { + // --- constraint 1 body ---> 4*x_1 + 2*x_2 == 10 + iJacS[nnzit] = conidx; + jJacS[nnzit++] = 0; + iJacS[nnzit] = conidx; + jJacS[nnzit++] = 1; + conidx++; + + // --- constraint 2 body ---> 2*x_1 + x_3 + iJacS[nnzit] = conidx; + jJacS[nnzit++] = 0; + iJacS[nnzit] = conidx; + jJacS[nnzit++] = 2; + conidx++; + + // --- constraint 3 body ---> 2*x_1 + 0.5*x_i, for i>=4 + for(auto i = 3; i < n; i++) { + iJacS[nnzit] = conidx; + jJacS[nnzit++] = 0; + iJacS[nnzit] = conidx; + jJacS[nnzit++] = i; + conidx++; } - //values for sparse Jacobian if requested by the solver - nnzit = 0; - if(MJacS!=NULL) { - // --- constraint 1 body ---> 4*x_1 + 2*x_2 == 10 - MJacS[nnzit++] = 4; - MJacS[nnzit++] = 2; - - // --- constraint 2 body ---> 2*x_1 + x_3 - MJacS[nnzit++] = 2; - MJacS[nnzit++] = 1; - - // --- constraint 3 body ---> 2*x_1 + 0.5*x_4 - for(auto i=3; i 4*x_1 + 2*x_2 == 10 + MJacS[nnzit++] = 4; + MJacS[nnzit++] = 2; + + // --- constraint 2 body ---> 2*x_1 + x_3 + MJacS[nnzit++] = 2; + MJacS[nnzit++] = 1; + + // --- constraint 3 body ---> 2*x_1 + 0.5*x_4 + for(auto i = 3; i < n; i++) { + MJacS[nnzit++] = 2; + MJacS[nnzit++] = 0.5; + } + + if(rankdefic_ineq_) { + // [-inf] <= 4*x_1 + 2*x_3 <= [ 19 ] + MJacS[nnzit++] = 4; + MJacS[nnzit++] = 2; + } + + if(rankdefic_eq_) { + // 4*x_1 + 2*x_2 == 10 + MJacS[nnzit++] = 4; + MJacS[nnzit++] = 2; + } + assert(nnzit == nnzJacS); + } + return true; } bool SparseEx2::eval_Hess_Lagr(const size_type& n, @@ -300,27 +310,27 @@ bool SparseEx2::eval_Hess_Lagr(const size_type& n, index_type* jHSS, double* MHSS) { - //Note: lambda is not used since all the constraints are linear and, therefore, do - //not contribute to the Hessian of the Lagrangian - assert(nnzHSS == n); + // Note: lambda is not used since all the constraints are linear and, therefore, do + // not contribute to the Hessian of the Lagrangian + assert(nnzHSS == n); - if(iHSS!=NULL && jHSS!=NULL) { - for(int i=0; i -#include //for memcpy +#include //for memcpy #include #include @@ -49,11 +49,11 @@ using index_type = hiop::index_type; class SparseEx2 : public hiop::hiopInterfaceSparse { public: - SparseEx2(int n, bool convex_obj, bool rankdefic_Jac_eq, bool rankdefic_Jac_ineq, double scal_neg_obj = 1.0); + SparseEx2(int n, bool convex_obj, bool rankdefic_Jac_eq, bool rankdefic_Jac_ineq, double scal_neg_obj = 1.0); virtual ~SparseEx2(); virtual bool get_prob_sizes(size_type& n, size_type& m); - virtual bool get_vars_info(const size_type& n, double *xlow, double* xupp, NonlinearityType* type); + virtual bool get_vars_info(const size_type& n, double* xlow, double* xupp, NonlinearityType* type); virtual bool get_cons_info(const size_type& m, double* clow, double* cupp, NonlinearityType* type); virtual bool get_sparse_blocks_info(index_type& nx, index_type& nnz_sparse_Jaceq, @@ -68,18 +68,11 @@ class SparseEx2 : public hiop::hiopInterfaceSparse const double* x, bool new_x, double* cons); - virtual bool eval_cons(const size_type& n, - const size_type& m, - const double* x, - bool new_x, - double* cons); - virtual bool eval_grad_f(const size_type& n, - const double* x, - bool new_x, - double* gradf); + virtual bool eval_cons(const size_type& n, const size_type& m, const double* x, bool new_x, double* cons); + virtual bool eval_grad_f(const size_type& n, const double* x, bool new_x, double* gradf); virtual bool eval_Jac_cons(const size_type& n, const size_type& m, - const size_type& num_cons, + const size_type& num_cons, const index_type* idx_cons, const double* x, bool new_x, @@ -95,7 +88,7 @@ class SparseEx2 : public hiop::hiopInterfaceSparse index_type* iJacS, index_type* jJacS, double* MJacS); - virtual bool get_starting_point(const size_type&n, double* x0); + virtual bool get_starting_point(const size_type& n, double* x0); virtual bool eval_Hess_Lagr(const size_type& n, const size_type& m, const double* x, diff --git a/src/Drivers/Sparse/NlpSparseEx2Driver.cpp b/src/Drivers/Sparse/NlpSparseEx2Driver.cpp index e61b866a0..e66463534 100644 --- a/src/Drivers/Sparse/NlpSparseEx2Driver.cpp +++ b/src/Drivers/Sparse/NlpSparseEx2Driver.cpp @@ -10,7 +10,7 @@ using namespace hiop; static bool self_check(size_type n, double obj_value, const bool inertia_free); static bool parse_arguments(int argc, - char **argv, + char** argv, size_type& n, bool& self_check, bool& inertia_free, @@ -29,105 +29,104 @@ static bool parse_arguments(int argc, use_ginkgo_cuda = false; use_ginkgo_cuda = false; switch(argc) { - case 1: - //no arguments - return true; - break; - case 5: //4 arguments + case 1: + // no arguments + return true; + break; + case 5: // 4 arguments { if(std::string(argv[4]) == "-selfcheck") { - self_check = true; + self_check = true; } else if(std::string(argv[4]) == "-inertiafree") { inertia_free = true; } else if(std::string(argv[4]) == "-cusolver") { use_cusolver = true; - } else if(std::string(argv[4]) == "-ginkgo"){ + } else if(std::string(argv[4]) == "-ginkgo") { use_ginkgo = true; - } else if(std::string(argv[4]) == "-ginkgo_cuda"){ + } else if(std::string(argv[4]) == "-ginkgo_cuda") { use_ginkgo = true; use_ginkgo_cuda = true; - } else if(std::string(argv[4]) == "-ginkgo_hip"){ + } else if(std::string(argv[4]) == "-ginkgo_hip") { use_ginkgo = true; use_ginkgo_hip = true; } else { n = std::atoi(argv[4]); - if(n<=0) { + if(n <= 0) { return false; } } } - case 4: //3 arguments + case 4: // 3 arguments { if(std::string(argv[3]) == "-selfcheck") { - self_check = true; + self_check = true; } else if(std::string(argv[3]) == "-inertiafree") { inertia_free = true; } else if(std::string(argv[3]) == "-cusolver") { use_cusolver = true; - } else if(std::string(argv[3]) == "-ginkgo"){ + } else if(std::string(argv[3]) == "-ginkgo") { use_ginkgo = true; - } else if(std::string(argv[3]) == "-ginkgo_cuda"){ + } else if(std::string(argv[3]) == "-ginkgo_cuda") { use_ginkgo = true; use_ginkgo_cuda = true; - } else if(std::string(argv[3]) == "-ginkgo_hip"){ + } else if(std::string(argv[3]) == "-ginkgo_hip") { use_ginkgo = true; use_ginkgo_hip = true; } else { n = std::atoi(argv[3]); - if(n<=0) { + if(n <= 0) { return false; } } } - case 3: //2 arguments + case 3: // 2 arguments { if(std::string(argv[2]) == "-selfcheck") { - self_check = true; + self_check = true; } else if(std::string(argv[2]) == "-inertiafree") { inertia_free = true; } else if(std::string(argv[2]) == "-cusolver") { use_cusolver = true; - } else if(std::string(argv[2]) == "-ginkgo"){ + } else if(std::string(argv[2]) == "-ginkgo") { use_ginkgo = true; - } else if(std::string(argv[2]) == "-ginkgo_cuda"){ + } else if(std::string(argv[2]) == "-ginkgo_cuda") { use_ginkgo = true; use_ginkgo_cuda = true; - } else if(std::string(argv[2]) == "-ginkgo_hip"){ + } else if(std::string(argv[2]) == "-ginkgo_hip") { use_ginkgo = true; use_ginkgo_hip = true; } else { n = std::atoi(argv[2]); - if(n<=0) { + if(n <= 0) { return false; } } } - case 2: //1 argument + case 2: // 1 argument { if(std::string(argv[1]) == "-selfcheck") { - self_check = true; + self_check = true; } else if(std::string(argv[1]) == "-inertiafree") { inertia_free = true; } else if(std::string(argv[1]) == "-cusolver") { use_cusolver = true; - } else if(std::string(argv[1]) == "-ginkgo"){ + } else if(std::string(argv[1]) == "-ginkgo") { use_ginkgo = true; - } else if(std::string(argv[1]) == "-ginkgo_cuda"){ + } else if(std::string(argv[1]) == "-ginkgo_cuda") { use_ginkgo = true; use_ginkgo_cuda = true; - } else if(std::string(argv[1]) == "-ginkgo_hip"){ + } else if(std::string(argv[1]) == "-ginkgo_hip") { use_ginkgo = true; use_ginkgo_hip = true; } else { n = std::atoi(argv[1]); - if(n<=0) { + if(n <= 0) { return false; } } - } - break; - default: - return false; // 4 or more arguments + } break; + default: + return false; // 4 or more arguments } // If CUDA is not available, de-select cuSOLVER @@ -179,23 +178,25 @@ static void usage(const char* exeName) printf("Arguments:\n"); printf(" 'problem_size': number of decision variables [optional, default is 50]\n"); printf(" '-inertiafree': indicate if inertia free approach should be used [optional]\n"); - printf(" '-selfcheck': compares the optimal objective with a previously saved value for the " - "problem specified by 'problem_size'. [optional]\n"); + printf( + " '-selfcheck': compares the optimal objective with a previously saved value for the " + "problem specified by 'problem_size'. [optional]\n"); printf(" '-cusolver': use cuSOLVER linear solver [optional]\n"); printf(" '-ginkgo': use GINKGO linear solver [optional]\n"); } - -int main(int argc, char **argv) +int main(int argc, char** argv) { - int rank=0; + int rank = 0; #ifdef HIOP_USE_MPI MPI_Init(&argc, &argv); int comm_size; - int ierr = MPI_Comm_size(MPI_COMM_WORLD, &comm_size); assert(MPI_SUCCESS==ierr); + int ierr = MPI_Comm_size(MPI_COMM_WORLD, &comm_size); + assert(MPI_SUCCESS == ierr); if(comm_size != 1) { - printf("[error] driver detected more than one rank but the driver should be run " - "in serial only; will exit\n"); + printf( + "[error] driver detected more than one rank but the driver should be run " + "in serial only; will exit\n"); MPI_Finalize(); return 1; } @@ -208,7 +209,16 @@ int main(int argc, char **argv) bool use_ginkgo = false; bool use_ginkgo_cuda = false; bool use_ginkgo_hip = false; - if(!parse_arguments(argc, argv, n, selfCheck, inertia_free, use_cusolver, use_resolve, use_ginkgo, use_ginkgo_cuda, use_ginkgo_hip)) { + if(!parse_arguments(argc, + argv, + n, + selfCheck, + inertia_free, + use_cusolver, + use_resolve, + use_ginkgo, + use_ginkgo_cuda, + use_ginkgo_hip)) { usage(argv[0]); #ifdef HIOP_USE_MPI MPI_Finalize(); @@ -221,14 +231,14 @@ int main(int argc, char **argv) bool rankdefic_Jac_ineq = true; double scal_neg_obj = 0.1; - //first test + // first test { SparseEx2 nlp_interface(n, convex_obj, rankdefic_Jac_eq, rankdefic_Jac_ineq, scal_neg_obj); hiopNlpSparse nlp(nlp_interface); nlp.options->SetStringValue("compute_mode", "cpu"); nlp.options->SetStringValue("KKTLinsys", "xdycyd"); - //lsq initialization of the duals fails for this example since the Jacobian is rank deficient - //use zero initialization + // lsq initialization of the duals fails for this example since the Jacobian is rank deficient + // use zero initialization nlp.options->SetStringValue("duals_init", "zero"); if(inertia_free) { nlp.options->SetStringValue("fact_acceptor", "inertia_free"); @@ -247,21 +257,21 @@ int main(int argc, char **argv) if(use_ginkgo) { nlp.options->SetStringValue("linsol_mode", "speculative"); nlp.options->SetStringValue("linear_solver_sparse", "ginkgo"); - if (use_ginkgo_cuda) { - nlp.options->SetStringValue("ginkgo_exec", "cuda"); - } else if (use_ginkgo_hip) { - nlp.options->SetStringValue("ginkgo_exec", "hip"); + if(use_ginkgo_cuda) { + nlp.options->SetStringValue("ginkgo_exec", "cuda"); + } else if(use_ginkgo_hip) { + nlp.options->SetStringValue("ginkgo_exec", "hip"); } else { - nlp.options->SetStringValue("ginkgo_exec", "reference"); + nlp.options->SetStringValue("ginkgo_exec", "reference"); } } hiopAlgFilterIPMNewton solver(&nlp); hiopSolveStatus status = solver.run(); - + double obj_value = solver.getObjective(); - - if(status<0) { - if(rank==0) { + + if(status < 0) { + if(rank == 0) { printf("solver returned negative solve status: %d (with objective is %18.12e)\n", status, obj_value); } #ifdef HIOP_USE_MPI @@ -270,7 +280,7 @@ int main(int argc, char **argv) return -1; } - //this is used for "regression" testing when the driver is called with -selfcheck + // this is used for "regression" testing when the driver is called with -selfcheck if(selfCheck) { if(!self_check(n, obj_value, inertia_free)) { #ifdef HIOP_USE_MPI @@ -279,20 +289,20 @@ int main(int argc, char **argv) return -1; } } else { - if(rank==0) { + if(rank == 0) { printf("Optimal objective: %22.14e. Solver status: %d\n", obj_value, status); } } } - + // - //same as above but with equalities relaxed as two-sided inequalities and using condensed linear system + // same as above but with equalities relaxed as two-sided inequalities and using condensed linear system // #ifdef HIOP_USE_COINHSL { - SparseEx2 nlp_interface(n,convex_obj, rankdefic_Jac_eq, rankdefic_Jac_ineq, scal_neg_obj); + SparseEx2 nlp_interface(n, convex_obj, rankdefic_Jac_eq, rankdefic_Jac_ineq, scal_neg_obj); hiopNlpSparseIneq nlp(nlp_interface); - //compute mode cpu will use MA57 by default + // compute mode cpu will use MA57 by default nlp.options->SetStringValue("KKTLinsys", "condensed"); nlp.options->SetStringValue("compute_mode", "cpu"); nlp.options->SetStringValue("linsol_mode", "speculative"); @@ -307,11 +317,9 @@ int main(int argc, char **argv) double obj_value = solver.getObjective(); - if(status<0) { - if(rank==0) { - printf("solver returned negative solve status with hiopNlpSparseIneq: %d (obj. is %18.12e)\n", - status, - obj_value); + if(status < 0) { + if(rank == 0) { + printf("solver returned negative solve status with hiopNlpSparseIneq: %d (obj. is %18.12e)\n", status, obj_value); } #ifdef HIOP_USE_MPI MPI_Finalize(); @@ -319,7 +327,7 @@ int main(int argc, char **argv) return -1; } - //this is used for "regression" testing when the driver is called with -selfcheck + // this is used for "regression" testing when the driver is called with -selfcheck if(selfCheck) { if(!self_check(n, obj_value, inertia_free)) { #ifdef HIOP_USE_MPI @@ -328,13 +336,13 @@ int main(int argc, char **argv) return -1; } } else { - if(rank==0) { + if(rank == 0) { printf("Optimal objective: %22.14e. Solver status: %d\n", obj_value, status); } } } -#endif //HIOP_USE_COINHSL - +#endif // HIOP_USE_COINHSL + #ifdef HIOP_USE_MPI MPI_Finalize(); #endif @@ -342,31 +350,36 @@ int main(int argc, char **argv) return 0; } - static bool self_check(size_type n, double objval, const bool inertia_free) { -#define num_n_saved 3 //keep this is sync with n_saved and objval_saved +#define num_n_saved 3 // keep this is sync with n_saved and objval_saved const size_type n_saved[] = {50, 500, 10000}; - const double objval_saved[] = { 8.7754974e+00, 6.4322371e+01, 1.2369786e+03}; + const double objval_saved[] = {8.7754974e+00, 6.4322371e+01, 1.2369786e+03}; #define relerr 1e-6 - bool found=false; - for(int it=0; it relerr) { - printf("selfcheck failure. Objective (%18.12e) does not agree (%d digits) with the saved value (%18.12e) for n=%d.\n", - objval, -(int)log10(relerr), objval_saved[it], n); + bool found = false; + for(int it = 0; it < num_n_saved; it++) { + if(n_saved[it] == n) { + found = true; + if(fabs((objval_saved[it] - objval) / (1 + objval_saved[it])) > relerr) { + printf( + "selfcheck failure. Objective (%18.12e) does not agree (%d digits) with the saved value (%18.12e) for n=%d.\n", + objval, + -(int)log10(relerr), + objval_saved[it], + n); return false; } else { - printf("selfcheck success (%d digits)\n", -(int)log10(relerr)); + printf("selfcheck success (%d digits)\n", -(int)log10(relerr)); } break; } } if(!found) { - printf("selfcheck: driver does not have the objective for n=%d saved. BTW, obj=%18.12e was obtained for this n.\n", n, objval); + printf("selfcheck: driver does not have the objective for n=%d saved. BTW, obj=%18.12e was obtained for this n.\n", + n, + objval); return false; } diff --git a/src/Drivers/Sparse/NlpSparseEx3.cpp b/src/Drivers/Sparse/NlpSparseEx3.cpp index e3758c24a..4fab95553 100644 --- a/src/Drivers/Sparse/NlpSparseEx3.cpp +++ b/src/Drivers/Sparse/NlpSparseEx3.cpp @@ -1,7 +1,7 @@ #include "NlpSparseEx3.hpp" #include -#include //for memcpy +#include //for memcpy #include /* Test with bounds and constraints of all types. For some reason this @@ -11,47 +11,46 @@ * x_1 + x_n == 10, , if eq_feas == true or eq_infeas == true * x_1 + x_n == 10, i=3,...,n, if eq_feas == true * x_1 + x_n == 15, i=3,...,n, if eq_infeas == true - * 10-a <= x_1 + x_n <= 10+a, , if ineq_feas == true or ineq_infeas == true + * 10-a <= x_1 + x_n <= 10+a, , if ineq_feas == true or ineq_infeas == true * 10-a <= x_1 + x_n <= 15+a, i=3,...,n , if ineq_feas == true * 3-a <= x_1 + x_n <= 5-a, i=3,...,n, if ineq_infeas == true * x_i >= 0, i=1,...,n - * + * * a >= 0 , by default a = 1e-6 * n >= 3; */ SparseEx3::SparseEx3(int n, double scala_a, bool eq_feas, bool eq_infeas, bool ineq_feas, bool ineq_infeas) - : n_vars_{n}, - n_cons_{0}, - scala_a_{scala_a}, - eq_feas_{eq_feas}, - eq_infeas_{eq_infeas}, - ineq_feas_{ineq_feas}, - ineq_infeas_{ineq_infeas} + : n_vars_{n}, + n_cons_{0}, + scala_a_{scala_a}, + eq_feas_{eq_feas}, + eq_infeas_{eq_infeas}, + ineq_feas_{ineq_feas}, + ineq_infeas_{ineq_infeas} { - assert(n>=3); - assert(scala_a>=0); + assert(n >= 3); + assert(scala_a >= 0); if(eq_feas_ || eq_infeas_) { n_cons_++; } if(eq_feas_) { - n_cons_ += n-2; - } + n_cons_ += n - 2; + } if(eq_infeas_) { - n_cons_ += n-2; - } + n_cons_ += n - 2; + } if(ineq_feas_ || ineq_infeas_) { n_cons_++; } if(ineq_feas_) { - n_cons_ += n-2; - } + n_cons_ += n - 2; + } if(ineq_infeas_) { - n_cons_ += n-2; + n_cons_ += n - 2; } } -SparseEx3::~SparseEx3() -{} +SparseEx3::~SparseEx3() {} bool SparseEx3::get_prob_info(NonlinearityType& type) { @@ -60,26 +59,26 @@ bool SparseEx3::get_prob_info(NonlinearityType& type) } bool SparseEx3::get_prob_sizes(size_type& n, size_type& m) -{ +{ n = n_vars_; - m = n_cons_; + m = n_cons_; return true; } -bool SparseEx3::get_vars_info(const size_type& n, double *xlow, double* xupp, NonlinearityType* type) +bool SparseEx3::get_vars_info(const size_type& n, double* xlow, double* xupp, NonlinearityType* type) { - assert(n==n_vars_); - for(index_type i=0; i=3); + assert(n == n_vars_); + assert(m == n_cons_); + assert(n >= 3); assert(nnzJacS == nnzJac_); - + int nnzit{0}; index_type conidx{0}; - if(iJacS!=NULL && jJacS!=NULL){ + if(iJacS != NULL && jJacS != NULL) { if(eq_feas_ || eq_infeas_) { iJacS[nnzit] = conidx; jJacS[nnzit++] = 0; @@ -269,23 +265,23 @@ bool SparseEx3::eval_Jac_cons(const size_type& n, conidx++; } if(eq_feas_) { - for(index_type i=0; i= 0, i=1,...,n - * + * * a >= 0 , by default a = 1e-6 * n >= 3 */ class SparseEx3 : public hiop::hiopInterfaceSparse { public: - SparseEx3(int n, double scala_a = 1e-6, bool eq_feas = false, bool eq_infeas = false, bool ineq_feas = false, bool ineq_infeas = false); + SparseEx3(int n, + double scala_a = 1e-6, + bool eq_feas = false, + bool eq_infeas = false, + bool ineq_feas = false, + bool ineq_infeas = false); virtual ~SparseEx3(); virtual bool get_prob_sizes(size_type& n, size_type& m); virtual bool get_prob_info(NonlinearityType& type); - virtual bool get_vars_info(const size_type& n, double *xlow, double* xupp, NonlinearityType* type); + virtual bool get_vars_info(const size_type& n, double* xlow, double* xupp, NonlinearityType* type); virtual bool get_cons_info(const size_type& m, double* clow, double* cupp, NonlinearityType* type); - - virtual bool get_sparse_blocks_info(size_type& nx, + + virtual bool get_sparse_blocks_info(size_type& nx, size_type& nnz_sparse_Jaceq, size_type& nnz_sparse_Jacineq, size_type& nnz_sparse_Hess_Lagr); @@ -47,9 +52,7 @@ class SparseEx3 : public hiop::hiopInterfaceSparse const double* x, bool new_x, double* cons); - virtual bool eval_cons(const size_type& n, const size_type& m, - const double* x, bool new_x, - double* cons); + virtual bool eval_cons(const size_type& n, const size_type& m, const double* x, bool new_x, double* cons); virtual bool eval_grad_f(const size_type& n, const double* x, bool new_x, double* gradf); virtual bool eval_Jac_cons(const size_type& n, const size_type& m, @@ -69,7 +72,7 @@ class SparseEx3 : public hiop::hiopInterfaceSparse index_type* iJacS, index_type* jJacS, double* MJacS); - virtual bool get_starting_point(const size_type&n, double* x0); + virtual bool get_starting_point(const size_type& n, double* x0); virtual bool eval_Hess_Lagr(const size_type& n, const size_type& m, const double* x, diff --git a/src/Drivers/Sparse/NlpSparseEx3Driver.cpp b/src/Drivers/Sparse/NlpSparseEx3Driver.cpp index 447dde583..8d741b9e6 100644 --- a/src/Drivers/Sparse/NlpSparseEx3Driver.cpp +++ b/src/Drivers/Sparse/NlpSparseEx3Driver.cpp @@ -10,9 +10,9 @@ using namespace hiop; static bool self_check(size_type n, double obj_value, hiopSolveStatus status); static bool parse_arguments(int argc, - char **argv, + char** argv, size_type& n, - double &scala_a, + double& scala_a, bool& eq_feas, bool& eq_infeas, bool& ineq_feas, @@ -27,11 +27,11 @@ static bool parse_arguments(int argc, n = 50; scala_a = 1e-6; switch(argc) { - case 1: - //no arguments - return true; - break; - case 7: //6 arguments + case 1: + // no arguments + return true; + break; + case 7: // 6 arguments { if(std::string(argv[6]) == "-eq_feas") { eq_feas = true; @@ -45,7 +45,7 @@ static bool parse_arguments(int argc, self_check = true; } } - case 6: //5 arguments + case 6: // 5 arguments { if(std::string(argv[5]) == "-eq_feas") { eq_feas = true; @@ -59,7 +59,7 @@ static bool parse_arguments(int argc, self_check = true; } } - case 5: //4 arguments + case 5: // 4 arguments { if(std::string(argv[4]) == "-eq_feas") { eq_feas = true; @@ -73,7 +73,7 @@ static bool parse_arguments(int argc, self_check = true; } } - case 4: //3 arguments + case 4: // 3 arguments { if(std::string(argv[3]) == "-eq_feas") { eq_feas = true; @@ -87,7 +87,7 @@ static bool parse_arguments(int argc, self_check = true; } } - case 3: //2 arguments + case 3: // 2 arguments { if(std::string(argv[2]) == "-eq_feas") { eq_feas = true; @@ -102,18 +102,17 @@ static bool parse_arguments(int argc, } else { scala_a = std::atof(argv[2]); assert(scala_a > 0.0); - } + } } - case 2: //1 argument + case 2: // 1 argument { n = std::atoi(argv[1]); - if(n<=0) { + if(n <= 0) { return false; } - } - break; - default: - return false; //6 or more arguments + } break; + default: + return false; // 6 or more arguments } if(self_check) { @@ -135,24 +134,28 @@ static void usage(const char* exeName) printf(" 'problem_size': number of decision variables [optional, default is 50]\n"); printf(" 'scala_a': small pertubation added to the inequality bounds [optional, default is 1e-6]\n"); printf(" '-eq_feas': include feasible equality constraints, with rank deficient Jacobian [optional, default is no]\n"); - printf(" '-eq_infeas': include infeasible equality constraints, with rank deficient Jacobian [optional, default is no]\n"); + printf( + " '-eq_infeas': include infeasible equality constraints, with rank deficient Jacobian [optional, default is no]\n"); printf(" '-eq_feas': include feasible inequality constraints, with rank deficient Jacobian [optional, default is no]\n"); - printf(" '-eq_infeas': include infeasible inequality constraints, with rank deficient Jacobian [optional, default is no]\n"); - printf(" '-selfcheck': compares the optimal objective with a previously saved value for the " - "problem specified by 'problem_size'. `-ineq_feas` is set to `yes` internally. [optional]\n"); + printf( + " '-eq_infeas': include infeasible inequality constraints, with rank deficient Jacobian [optional, default is no]\n"); + printf( + " '-selfcheck': compares the optimal objective with a previously saved value for the " + "problem specified by 'problem_size'. `-ineq_feas` is set to `yes` internally. [optional]\n"); } - -int main(int argc, char **argv) +int main(int argc, char** argv) { - int rank=0; + int rank = 0; #ifdef HIOP_USE_MPI MPI_Init(&argc, &argv); int comm_size; - int ierr = MPI_Comm_size(MPI_COMM_WORLD, &comm_size); assert(MPI_SUCCESS==ierr); + int ierr = MPI_Comm_size(MPI_COMM_WORLD, &comm_size); + assert(MPI_SUCCESS == ierr); if(comm_size != 1) { - printf("[error] driver detected more than one rank but the driver should be run " - "in serial only; will exit\n"); + printf( + "[error] driver detected more than one rank but the driver should be run " + "in serial only; will exit\n"); MPI_Finalize(); return 1; } @@ -172,10 +175,10 @@ int main(int argc, char **argv) } SparseEx3 nlp_interface(n, scala_a, eq_feas, eq_infeas, ineq_feas, ineq_infeas); - hiopNlpSparse nlp(nlp_interface); + hiopNlpSparse nlp(nlp_interface); nlp.options->SetStringValue("Hessian", "analytical_exact"); - nlp.options->SetStringValue("duals_update_type", "lsq"); // "lsq" or "linear" + nlp.options->SetStringValue("duals_update_type", "lsq"); // "lsq" or "linear" nlp.options->SetStringValue("compute_mode", "cpu"); nlp.options->SetStringValue("KKTLinsys", "xdycyd"); nlp.options->SetNumericValue("mu0", 0.1); @@ -184,22 +187,20 @@ int main(int argc, char **argv) hiopSolveStatus status = solver.run(); double obj_value = solver.getObjective(); - if(status<0) { - if(rank==0) { - printf("solver returned negative solve status: %d (obj. is %18.12e)\n", - status, - obj_value); + if(status < 0) { + if(rank == 0) { + printf("solver returned negative solve status: %d (obj. is %18.12e)\n", status, obj_value); } return -1; } - //this is used for "regression" testing when the driver is called with -selfcheck + // this is used for "regression" testing when the driver is called with -selfcheck if(selfCheck) { if(!self_check(n, obj_value, status)) { return -1; } } else { - if(rank==0) { + if(rank == 0) { printf("Optimal objective: %22.14e. Solver status: %d\n", obj_value, status); } } @@ -211,10 +212,9 @@ int main(int argc, char **argv) return 0; } - static bool self_check(size_type n, double objval, hiopSolveStatus status) { -#define num_n_saved 3 //keep this is sync with n_saved and objval_saved +#define num_n_saved 3 // keep this is sync with n_saved and objval_saved const size_type n_saved[] = {50, 500, 5000}; const double objval_saved[] = {7.565566821330e+00, 8.284201575839e+01, 8.066106777964e+02}; @@ -224,23 +224,29 @@ static bool self_check(size_type n, double objval, hiopSolveStatus status) return false; } - bool found=false; - for(int it=0; it relerr) { - printf("selfcheck failure. Objective (%18.12e) does not agree (%d digits) with the saved value (%18.12e) for n=%d.\n", - objval, -(int)log10(relerr), objval_saved[it], n); + bool found = false; + for(int it = 0; it < num_n_saved; it++) { + if(n_saved[it] == n) { + found = true; + if(fabs((objval_saved[it] - objval) / (1 + objval_saved[it])) > relerr) { + printf( + "selfcheck failure. Objective (%18.12e) does not agree (%d digits) with the saved value (%18.12e) for n=%d.\n", + objval, + -(int)log10(relerr), + objval_saved[it], + n); return false; } else { - printf("selfcheck success (%d digits)\n", -(int)log10(relerr)); + printf("selfcheck success (%d digits)\n", -(int)log10(relerr)); } break; } } if(!found) { - printf("selfcheck: driver does not have the objective for n=%d saved. BTW, obj=%18.12e was obtained for this n.\n", n, objval); + printf("selfcheck: driver does not have the objective for n=%d saved. BTW, obj=%18.12e was obtained for this n.\n", + n, + objval); return false; } diff --git a/src/Drivers/Sparse/NlpSparseEx4.cpp b/src/Drivers/Sparse/NlpSparseEx4.cpp index a7ddc8d79..a250e1d1b 100644 --- a/src/Drivers/Sparse/NlpSparseEx4.cpp +++ b/src/Drivers/Sparse/NlpSparseEx4.cpp @@ -1,7 +1,7 @@ #include "NlpSparseEx4.hpp" #include -#include //for memcpy +#include //for memcpy #include /* Test problem from a tiny concave example @@ -15,14 +15,15 @@ * 0 <= y <= 11 */ SparseEx4::SparseEx4(double scal_input) - : n_vars(2), n_cons{4}, scal{scal_input} + : n_vars(2), + n_cons{4}, + scal{scal_input} { assert(n_vars == 2); assert(n_cons == 4); } -SparseEx4::~SparseEx4() -{} +SparseEx4::~SparseEx4() {} bool SparseEx4::get_prob_sizes(size_type& n, size_type& m) { @@ -31,9 +32,9 @@ bool SparseEx4::get_prob_sizes(size_type& n, size_type& m) return true; } -bool SparseEx4::get_vars_info(const size_type& n, double *xlow, double* xupp, NonlinearityType* type) +bool SparseEx4::get_vars_info(const size_type& n, double* xlow, double* xupp, NonlinearityType* type) { - assert(n==n_vars); + assert(n == n_vars); xlow[0] = 0.; xupp[0] = 11.; type[0] = hiopNonlinear; @@ -45,7 +46,7 @@ bool SparseEx4::get_vars_info(const size_type& n, double *xlow, double* xupp, No bool SparseEx4::get_cons_info(const size_type& m, double* clow, double* cupp, NonlinearityType* type) { - assert(m==n_cons); + assert(m == n_cons); clow[0] = 0.0; cupp[0] = 1e20; type[0] = hiopInterfaceBase::hiopLinear; @@ -66,166 +67,188 @@ bool SparseEx4::get_cons_info(const size_type& m, double* clow, double* cupp, No } bool SparseEx4::get_sparse_blocks_info(size_type& nx, - size_type& nnz_sparse_Jaceq, - size_type& nnz_sparse_Jacineq, - size_type& nnz_sparse_Hess_Lagr) + size_type& nnz_sparse_Jaceq, + size_type& nnz_sparse_Jacineq, + size_type& nnz_sparse_Hess_Lagr) { - nx = n_vars;; - nnz_sparse_Jaceq = 0; - nnz_sparse_Jacineq = 6; - nnz_sparse_Hess_Lagr = 2; - return true; + nx = n_vars; + ; + nnz_sparse_Jaceq = 0; + nnz_sparse_Jacineq = 6; + nnz_sparse_Hess_Lagr = 2; + return true; } bool SparseEx4::eval_f(const size_type& n, const double* x, bool new_x, double& obj_value) { - assert(n==n_vars); + assert(n == n_vars); obj_value = 0.; - obj_value += -3.*x[0]*x[0] - 2.*x[1]*x[1]; + obj_value += -3. * x[0] * x[0] - 2. * x[1] * x[1]; return true; } bool SparseEx4::eval_grad_f(const size_type& n, const double* x, bool new_x, double* gradf) { - assert(n==n_vars); - gradf[0] = -6.*x[0]; - gradf[1] = -4.*x[1]; + assert(n == n_vars); + gradf[0] = -6. * x[0]; + gradf[1] = -4. * x[1]; return true; } bool SparseEx4::eval_cons(const size_type& n, - const size_type& m, - const size_type& num_cons, - const index_type* idx_cons, - const double* x, - bool new_x, double* cons) + const size_type& m, + const size_type& num_cons, + const index_type* idx_cons, + const double* x, + bool new_x, + double* cons) { return false; } /* Four constraints no matter how large n is */ -bool SparseEx4::eval_cons(const size_type& n, - const size_type& m, - const double* x, - bool new_x, - double* cons) +bool SparseEx4::eval_cons(const size_type& n, const size_type& m, const double* x, bool new_x, double* cons) { - assert(n==n_vars); - assert(m==n_cons); + assert(n == n_vars); + assert(m == n_cons); - //local contributions to the constraints in cons are reset - for(auto j=0;j y - 0.06*x*x >= 0.0 - cons[conidx++] += scal*( x[1] - 0.06 * x[0] * x[0]); + // compute the constraint one by one. + // --- constraint 1 body ---> y - 0.06*x*x >= 0.0 + cons[conidx++] += scal * (x[1] - 0.06 * x[0] * x[0]); // --- constraint 2 body ---> y + 0.05*x*x <= 10.0 - cons[conidx++] += scal*( x[1] + 0.05 * x[0] * x[0]); + cons[conidx++] += scal * (x[1] + 0.05 * x[0] * x[0]); // --- constraint 3 body ---> y*y <= -64. - cons[conidx++] += scal*( x[1] * x[1] ); + cons[conidx++] += scal * (x[1] * x[1]); // --- constraint 4 body ---> x*x <= -100. - cons[conidx++] += scal*( x[0] * x[0]); + cons[conidx++] += scal * (x[0] * x[0]); return true; } -bool SparseEx4::eval_Jac_cons(const size_type& n, const size_type& m, - const size_type& num_cons, const index_type* idx_cons, - const double* x, bool new_x, - const size_type& nnzJacS, index_type* iJacS, index_type* jJacS, double* MJacS) +bool SparseEx4::eval_Jac_cons(const size_type& n, + const size_type& m, + const size_type& num_cons, + const index_type* idx_cons, + const double* x, + bool new_x, + const size_type& nnzJacS, + index_type* iJacS, + index_type* jJacS, + double* MJacS) { return false; } -bool SparseEx4::eval_Jac_cons(const size_type& n, const size_type& m, - const double* x, bool new_x, - const size_type& nnzJacS, index_type* iJacS, index_type* jJacS, double* MJacS) +bool SparseEx4::eval_Jac_cons(const size_type& n, + const size_type& m, + const double* x, + bool new_x, + const size_type& nnzJacS, + index_type* iJacS, + index_type* jJacS, + double* MJacS) { - assert(n==n_vars); assert(m==n_cons); - - int nnzit{0}; - index_type conidx{0}; - - if(iJacS!=NULL && jJacS!=NULL){ - // --- constraint 1 body ---> y - 0.06*x*x >= 0.0 - iJacS[nnzit] = conidx; jJacS[nnzit++] = 0; - iJacS[nnzit] = conidx; jJacS[nnzit++] = 1; - conidx++; - - // --- constraint 2 body ---> y + 0.05*x*x <= 10.0 - iJacS[nnzit] = conidx; jJacS[nnzit++] = 0; - iJacS[nnzit] = conidx; jJacS[nnzit++] = 1; - conidx++; + assert(n == n_vars); + assert(m == n_cons); - // --- constraint 3 body ---> y*y <= -64. - iJacS[nnzit] = conidx; jJacS[nnzit++] = 1; - conidx++; - - // --- constraint 4 body ---> x*x <= -100. - iJacS[nnzit] = conidx; jJacS[nnzit++] = 0; - conidx++; + int nnzit{0}; + index_type conidx{0}; - assert(nnzit == nnzJacS); - } + if(iJacS != NULL && jJacS != NULL) { + // --- constraint 1 body ---> y - 0.06*x*x >= 0.0 + iJacS[nnzit] = conidx; + jJacS[nnzit++] = 0; + iJacS[nnzit] = conidx; + jJacS[nnzit++] = 1; + conidx++; + + // --- constraint 2 body ---> y + 0.05*x*x <= 10.0 + iJacS[nnzit] = conidx; + jJacS[nnzit++] = 0; + iJacS[nnzit] = conidx; + jJacS[nnzit++] = 1; + conidx++; + + // --- constraint 3 body ---> y*y <= -64. + iJacS[nnzit] = conidx; + jJacS[nnzit++] = 1; + conidx++; + + // --- constraint 4 body ---> x*x <= -100. + iJacS[nnzit] = conidx; + jJacS[nnzit++] = 0; + conidx++; + + assert(nnzit == nnzJacS); + } - //values for sparse Jacobian if requested by the solver - nnzit = 0; - if(MJacS!=NULL) { - // --- constraint 1 body ---> y - 0.06*x*x >= 0.0 - MJacS[nnzit++] = -0.12*x[0]; - MJacS[nnzit++] = 1.; - conidx++; - - // --- constraint 2 body ---> y + 0.05*x*x <= 10.0 - MJacS[nnzit++] = 0.1*x[0]; - MJacS[nnzit++] = 1.; - conidx++; - - // --- constraint 3 body ---> y*y <= -64. - MJacS[nnzit++] = 2.*x[1]; - conidx++; - - // --- constraint 4 body ---> x*x <= -100. - MJacS[nnzit++] = 2.*x[0]; - conidx++; - assert(nnzit == nnzJacS); - } - return true; + // values for sparse Jacobian if requested by the solver + nnzit = 0; + if(MJacS != NULL) { + // --- constraint 1 body ---> y - 0.06*x*x >= 0.0 + MJacS[nnzit++] = -0.12 * x[0]; + MJacS[nnzit++] = 1.; + conidx++; + + // --- constraint 2 body ---> y + 0.05*x*x <= 10.0 + MJacS[nnzit++] = 0.1 * x[0]; + MJacS[nnzit++] = 1.; + conidx++; + + // --- constraint 3 body ---> y*y <= -64. + MJacS[nnzit++] = 2. * x[1]; + conidx++; + + // --- constraint 4 body ---> x*x <= -100. + MJacS[nnzit++] = 2. * x[0]; + conidx++; + assert(nnzit == nnzJacS); + } + return true; } -bool SparseEx4::eval_Hess_Lagr(const size_type& n, const size_type& m, - const double* x, bool new_x, const double& obj_factor, - const double* lambda, bool new_lambda, - const size_type& nnzHSS, index_type* iHSS, index_type* jHSS, double* MHSS) +bool SparseEx4::eval_Hess_Lagr(const size_type& n, + const size_type& m, + const double* x, + bool new_x, + const double& obj_factor, + const double* lambda, + bool new_lambda, + const size_type& nnzHSS, + index_type* iHSS, + index_type* jHSS, + double* MHSS) { - //Note: lambda is not used since all the constraints are linear and, therefore, do - //not contribute to the Hessian of the Lagrangian - assert(nnzHSS == n); - - if(iHSS!=NULL && jHSS!=NULL) { - for(int i=0; iSetStringValue("Hessian", "analytical_exact"); - - nlp.options->SetStringValue("duals_update_type", "linear"); - + + nlp.options->SetStringValue("duals_update_type", "linear"); + nlp.options->SetStringValue("compute_mode", "cpu"); nlp.options->SetStringValue("KKTLinsys", "xdycyd"); - //nlp.options->SetStringValue("write_kkt", "yes"); + // nlp.options->SetStringValue("write_kkt", "yes"); nlp.options->SetNumericValue("mu0", 0.1); nlp.options->SetStringValue("options_file_fr_prob", "hiop_fr_ci.options"); @@ -237,12 +250,12 @@ int main(int argc, char **argv) nlp.options->SetStringValue("linear_solver_sparse", "ginkgo"); nlp.options->SetStringValue("fact_acceptor", "inertia_free"); nlp.options->SetIntegerValue("ir_outer_maxit", 0); - if (use_ginkgo_cuda) { - nlp.options->SetStringValue("ginkgo_exec", "cuda"); - } else if (use_ginkgo_hip) { - nlp.options->SetStringValue("ginkgo_exec", "hip"); + if(use_ginkgo_cuda) { + nlp.options->SetStringValue("ginkgo_exec", "cuda"); + } else if(use_ginkgo_hip) { + nlp.options->SetStringValue("ginkgo_exec", "hip"); } else { - nlp.options->SetStringValue("ginkgo_exec", "reference"); + nlp.options->SetStringValue("ginkgo_exec", "reference"); } } if(force_fr) { @@ -253,11 +266,9 @@ int main(int argc, char **argv) hiopSolveStatus status = solver.run(); double obj_value = solver.getObjective(); - if(status<0) { - if(rank==0) { - printf("solver returned negative solve status: %d (obj. is %18.12e)\n", - status, - obj_value); + if(status < 0) { + if(rank == 0) { + printf("solver returned negative solve status: %d (obj. is %18.12e)\n", status, obj_value); } #ifdef HIOP_USE_MPI MPI_Finalize(); @@ -265,7 +276,7 @@ int main(int argc, char **argv) return -1; } - //this is used for "regression" testing when the driver is called with -selfcheck + // this is used for "regression" testing when the driver is called with -selfcheck if(selfCheck) { if(!self_check(n, obj_value)) { #ifdef HIOP_USE_MPI @@ -274,7 +285,7 @@ int main(int argc, char **argv) return -1; } } else { - if(rank==0) { + if(rank == 0) { printf("Optimal objective: %22.14e. Solver status: %d\n", obj_value, status); } } @@ -283,35 +294,39 @@ int main(int argc, char **argv) MPI_Finalize(); #endif - return 0; } - static bool self_check(size_type n, double objval) { -#define num_n_saved 3 //keep this is sync with n_saved and objval_saved +#define num_n_saved 3 // keep this is sync with n_saved and objval_saved const size_type n_saved[] = {50, 500, 5000}; const double objval_saved[] = {1.10351564683176e-01, 1.10351566513480e-01, 1.10351578644469e-01}; #define relerr 1e-6 - bool found=false; - for(int it=0; it relerr) { - printf("selfcheck failure. Objective (%18.12e) does not agree (%d digits) with the saved value (%18.12e) for n=%d.\n", - objval, -(int)log10(relerr), objval_saved[it], n); + bool found = false; + for(int it = 0; it < num_n_saved; it++) { + if(n_saved[it] == n) { + found = true; + if(fabs((objval_saved[it] - objval) / (1 + objval_saved[it])) > relerr) { + printf( + "selfcheck failure. Objective (%18.12e) does not agree (%d digits) with the saved value (%18.12e) for n=%d.\n", + objval, + -(int)log10(relerr), + objval_saved[it], + n); return false; } else { - printf("selfcheck success (%d digits)\n", -(int)log10(relerr)); + printf("selfcheck success (%d digits)\n", -(int)log10(relerr)); } break; } } if(!found) { - printf("selfcheck: driver does not have the objective for n=%d saved. BTW, obj=%18.12e was obtained for this n.\n", n, objval); + printf("selfcheck: driver does not have the objective for n=%d saved. BTW, obj=%18.12e was obtained for this n.\n", + n, + objval); return false; } diff --git a/src/Drivers/Sparse/NlpSparseRajaEx2.cpp b/src/Drivers/Sparse/NlpSparseRajaEx2.cpp index f5e8c799f..30dc9ad24 100644 --- a/src/Drivers/Sparse/NlpSparseRajaEx2.cpp +++ b/src/Drivers/Sparse/NlpSparseRajaEx2.cpp @@ -47,7 +47,7 @@ /** * @file NlpSparseRajaEx2.cpp - * + * * @author Nai-Yuan Chiang , LLNL * */ @@ -58,40 +58,37 @@ #include #include -//#include +// #include #include -#include //for memcpy +#include //for memcpy #include - -//TODO: A good idea to not use the internal HiOp Raja policies here and, instead, give self-containing -// definitions of the policies here so that the user gets a better grasp of the concept and does not -// rely on the internals of HiOp. For example: -// #define RAJA_LAMBDA [=] __device__ -// using ex2_raja_exec = RAJA::cuda_exec<128>; -// more defs here - +// TODO: A good idea to not use the internal HiOp Raja policies here and, instead, give self-containing +// definitions of the policies here so that the user gets a better grasp of the concept and does not +// rely on the internals of HiOp. For example: +// #define RAJA_LAMBDA [=] __device__ +// using ex2_raja_exec = RAJA::cuda_exec<128>; +// more defs here #if defined(HIOP_USE_CUDA) #include "ExecPoliciesRajaCudaImpl.hpp" using ex2_raja_exec = hiop::ExecRajaPoliciesBackend::hiop_raja_exec; using ex2_raja_reduce = hiop::ExecRajaPoliciesBackend::hiop_raja_reduce; -//using hiopMatrixRajaSparse = hiop::hiopMatrixRajaSparseTriplet; +// using hiopMatrixRajaSparse = hiop::hiopMatrixRajaSparseTriplet; #elif defined(HIOP_USE_HIP) #include using ex2_raja_exec = hiop::ExecRajaPoliciesBackend::hiop_raja_exec; using ex2_raja_reduce = hiop::ExecRajaPoliciesBackend::hiop_raja_reduce; -//using hiopMatrixRajaSparse = hiop::hiopMatrixRajaSparseTriplet; +// using hiopMatrixRajaSparse = hiop::hiopMatrixRajaSparseTriplet; #else -//#if !defined(HIOP_USE_CUDA) && !defined(HIOP_USE_HIP) +// #if !defined(HIOP_USE_CUDA) && !defined(HIOP_USE_HIP) #include using ex2_raja_exec = hiop::ExecRajaPoliciesBackend::hiop_raja_exec; using ex2_raja_reduce = hiop::ExecRajaPoliciesBackend::hiop_raja_reduce; -//using hiopMatrixRajaSparse = hiop::hiopMatrixRajaSparseTriplet; +// using hiopMatrixRajaSparse = hiop::hiopMatrixRajaSparseTriplet; #endif - /** Nonlinear *highly nonconvex* and *rank deficient* problem test for the Filter IPM * Newton of HiOp. It uses a Sparse NLP formulation. The problem is based on SparseEx1. * @@ -121,27 +118,25 @@ SparseRajaEx2::SparseRajaEx2(std::string mem_space, bool rankdefic_Jac_eq, bool rankdefic_Jac_ineq, double scal_neg_obj) - : mem_space_{mem_space}, - convex_obj_{convex_obj}, - rankdefic_eq_{rankdefic_Jac_eq}, - rankdefic_ineq_{rankdefic_Jac_ineq}, - n_vars_{n}, - scal_neg_obj_{scal_neg_obj}, - n_cons_{2} + : mem_space_{mem_space}, + convex_obj_{convex_obj}, + rankdefic_eq_{rankdefic_Jac_eq}, + rankdefic_ineq_{rankdefic_Jac_ineq}, + n_vars_{n}, + scal_neg_obj_{scal_neg_obj}, + n_cons_{2} { // Make sure mem_space_ is uppercase transform(mem_space_.begin(), mem_space_.end(), mem_space_.begin(), ::toupper); - assert(n>=3 && "number of variables should be greater than 3 for this example"); - if(n>3) { - n_cons_ += n-3; + assert(n >= 3 && "number of variables should be greater than 3 for this example"); + if(n > 3) { + n_cons_ += n - 3; } n_cons_ += rankdefic_eq_ + rankdefic_ineq_; } -SparseRajaEx2::~SparseRajaEx2() -{ -} +SparseRajaEx2::~SparseRajaEx2() {} bool SparseRajaEx2::get_prob_sizes(size_type& n, size_type& m) { @@ -150,96 +145,88 @@ bool SparseRajaEx2::get_prob_sizes(size_type& n, size_type& m) return true; } -bool SparseRajaEx2::get_vars_info(const size_type& n, double *xlow, double* xupp, NonlinearityType* type) +bool SparseRajaEx2::get_vars_info(const size_type& n, double* xlow, double* xupp, NonlinearityType* type) { - assert(n==n_vars_); - - RAJA::forall(RAJA::RangeSegment(0, 1), - RAJA_LAMBDA(RAJA::Index_type i) - { - xlow[0] = -1e20; - xupp[0] = 1e20; -// type[0] = hiopNonlinear; - xlow[1] = 0.0; - xupp[1] = 1e20; -// type[1] = hiopNonlinear; - xlow[2] = 1.0; - xupp[2] = 10.0; -// type[2] = hiopNonlinear; - }); - - if(n>3) { - RAJA::forall(RAJA::RangeSegment(3, n), - RAJA_LAMBDA(RAJA::Index_type i) - { - xlow[i] = 0.5; - xupp[i] = 1e20; -// type[i] = hiopNonlinear; + assert(n == n_vars_); + + RAJA::forall( + RAJA::RangeSegment(0, 1), + RAJA_LAMBDA(RAJA::Index_type i) { + xlow[0] = -1e20; + xupp[0] = 1e20; + // type[0] = hiopNonlinear; + xlow[1] = 0.0; + xupp[1] = 1e20; + // type[1] = hiopNonlinear; + xlow[2] = 1.0; + xupp[2] = 10.0; + // type[2] = hiopNonlinear; }); + + if(n > 3) { + RAJA::forall( + RAJA::RangeSegment(3, n), + RAJA_LAMBDA(RAJA::Index_type i) { + xlow[i] = 0.5; + xupp[i] = 1e20; + // type[i] = hiopNonlinear; + }); } // Use a sequential policy for host computations for now - RAJA::forall(RAJA::RangeSegment(0, n), - [=] (RAJA::Index_type i) - { - type[i] = hiopNonlinear; - }); + RAJA::forall(RAJA::RangeSegment(0, n), [=](RAJA::Index_type i) { type[i] = hiopNonlinear; }); return true; } bool SparseRajaEx2::get_cons_info(const size_type& m, double* clow, double* cupp, NonlinearityType* type) { - assert(m==n_cons_); + assert(m == n_cons_); size_type n = n_vars_; - assert(m-1 == n-1+rankdefic_ineq_); + assert(m - 1 == n - 1 + rankdefic_ineq_); // RAJA doesn't like member objects bool rankdefic_eq = rankdefic_eq_; bool rankdefic_ineq = rankdefic_ineq_; - + // serial part - RAJA::forall(RAJA::RangeSegment(0, 1), - RAJA_LAMBDA(RAJA::Index_type i) - { - clow[0] = 10.0; - cupp[0] = 10.0; -// type[0] = hiopInterfaceBase::hiopNonlinear; - clow[1] = 5.0; - cupp[1] = 1e20; -// type[1] = hiopInterfaceBase::hiopNonlinear; - - if(rankdefic_ineq) { - // [-inf] <= 4*x_1 + 2*x_3 <= [ 19 ] - clow[n-1] = -1e+20; - cupp[n-1] = 19.; -// type[n-1] = hiopInterfaceBase::hiopNonlinear; - } - - if(rankdefic_eq) { + RAJA::forall( + RAJA::RangeSegment(0, 1), + RAJA_LAMBDA(RAJA::Index_type i) { + clow[0] = 10.0; + cupp[0] = 10.0; + // type[0] = hiopInterfaceBase::hiopNonlinear; + clow[1] = 5.0; + cupp[1] = 1e20; + // type[1] = hiopInterfaceBase::hiopNonlinear; + + if(rankdefic_ineq) { + // [-inf] <= 4*x_1 + 2*x_3 <= [ 19 ] + clow[n - 1] = -1e+20; + cupp[n - 1] = 19.; + // type[n-1] = hiopInterfaceBase::hiopNonlinear; + } + + if(rankdefic_eq) { // 4*x_1 + 2*x_2 == 10 - clow[m-1] = 10; - cupp[m-1] = 10; -// type[m-1] = hiopInterfaceBase::hiopNonlinear; - } - }); - - if(n>3) { - RAJA::forall(RAJA::RangeSegment(2, n-1), - RAJA_LAMBDA(RAJA::Index_type conidx) - { - clow[conidx] = 1.0; - cupp[conidx] = 2*n; -// type[conidx] = hiopInterfaceBase::hiopNonlinear; + clow[m - 1] = 10; + cupp[m - 1] = 10; + // type[m-1] = hiopInterfaceBase::hiopNonlinear; + } }); + + if(n > 3) { + RAJA::forall( + RAJA::RangeSegment(2, n - 1), + RAJA_LAMBDA(RAJA::Index_type conidx) { + clow[conidx] = 1.0; + cupp[conidx] = 2 * n; + // type[conidx] = hiopInterfaceBase::hiopNonlinear; + }); } // Must be a sequential host policy for now - RAJA::forall(RAJA::RangeSegment(0, m), - [=] (RAJA::Index_type i) - { - type[i] = hiopNonlinear; - }); + RAJA::forall(RAJA::RangeSegment(0, m), [=](RAJA::Index_type i) { type[i] = hiopNonlinear; }); return true; } @@ -249,27 +236,28 @@ bool SparseRajaEx2::get_sparse_blocks_info(int& nx, int& nnz_sparse_Jacineq, int& nnz_sparse_Hess_Lagr) { - nx = n_vars_;; - nnz_sparse_Jaceq = 2 + 2*rankdefic_eq_; - nnz_sparse_Jacineq = 2 + 2*(n_vars_-3) + 2*rankdefic_ineq_; + nx = n_vars_; + ; + nnz_sparse_Jaceq = 2 + 2 * rankdefic_eq_; + nnz_sparse_Jacineq = 2 + 2 * (n_vars_ - 3) + 2 * rankdefic_ineq_; nnz_sparse_Hess_Lagr = n_vars_; return true; } bool SparseRajaEx2::eval_f(const size_type& n, const double* x, bool new_x, double& obj_value) { - assert(n==n_vars_); - obj_value=0.; + assert(n == n_vars_); + obj_value = 0.; { - int convex_obj = (int) convex_obj_; + int convex_obj = (int)convex_obj_; double scal_neg_obj = scal_neg_obj_; RAJA::ReduceSum aux(0); - RAJA::forall(RAJA::RangeSegment(0, n), - RAJA_LAMBDA(RAJA::Index_type i) - { - aux += (2*convex_obj-1) * scal_neg_obj * 0.25 * std::pow(x[i]-1., 4) + 0.5 * std::pow(x[i], 2); - }); + RAJA::forall( + RAJA::RangeSegment(0, n), + RAJA_LAMBDA(RAJA::Index_type i) { + aux += (2 * convex_obj - 1) * scal_neg_obj * 0.25 * std::pow(x[i] - 1., 4) + 0.5 * std::pow(x[i], 2); + }); obj_value += aux.get(); } return true; @@ -277,56 +265,54 @@ bool SparseRajaEx2::eval_f(const size_type& n, const double* x, bool new_x, doub bool SparseRajaEx2::eval_grad_f(const size_type& n, const double* x, bool new_x, double* gradf) { - assert(n==n_vars_); + assert(n == n_vars_); { - int convex_obj = (int) convex_obj_; + int convex_obj = (int)convex_obj_; double scal_neg_obj = scal_neg_obj_; - RAJA::forall(RAJA::RangeSegment(0, n), - RAJA_LAMBDA(RAJA::Index_type i) - { - gradf[i] = (2*convex_obj-1) * scal_neg_obj * std::pow(x[i]-1.,3) + x[i]; - }); + RAJA::forall( + RAJA::RangeSegment(0, n), + RAJA_LAMBDA(RAJA::Index_type i) { gradf[i] = (2 * convex_obj - 1) * scal_neg_obj * std::pow(x[i] - 1., 3) + x[i]; }); } return true; } bool SparseRajaEx2::eval_cons(const size_type& n, const size_type& m, const double* x, bool new_x, double* cons) { - assert(n==n_vars_); - assert(m==n_cons_); - assert(n_cons_==2+n-3+rankdefic_eq_+rankdefic_ineq_); + assert(n == n_vars_); + assert(m == n_cons_); + assert(n_cons_ == 2 + n - 3 + rankdefic_eq_ + rankdefic_ineq_); // RAJA doesn't like member objects bool rankdefic_eq = rankdefic_eq_; bool rankdefic_ineq = rankdefic_ineq_; // serial part - RAJA::forall(RAJA::RangeSegment(0, 1), - RAJA_LAMBDA(RAJA::Index_type i) - { - // --- constraint 1 body ---> 4*x_1 + 2*x_2 == 10 - cons[0] = 4*x[0] + 2*x[1]; - // --- constraint 2 body ---> 2*x_1 + x_3 - cons[1] = 2*x[0] + 1*x[2]; - - if(rankdefic_ineq) { - // [-inf] <= 4*x_1 + 2*x_3 <= [ 19 ] - cons[n-1] = 4*x[0] + 2*x[2]; - } - - if(rankdefic_eq) { - // 4*x_1 + 2*x_2 == 10 - cons[m-1] = 4*x[0] + 2*x[1]; - } - }); - - RAJA::forall(RAJA::RangeSegment(2, n-1), - RAJA_LAMBDA(RAJA::Index_type i) - { - // --- constraint 3 body ---> 2*x_1 + 0.5*x_i, for i>=4 - cons[i] = 2*x[0] + 0.5*x[i+1]; - }); + RAJA::forall( + RAJA::RangeSegment(0, 1), + RAJA_LAMBDA(RAJA::Index_type i) { + // --- constraint 1 body ---> 4*x_1 + 2*x_2 == 10 + cons[0] = 4 * x[0] + 2 * x[1]; + // --- constraint 2 body ---> 2*x_1 + x_3 + cons[1] = 2 * x[0] + 1 * x[2]; + + if(rankdefic_ineq) { + // [-inf] <= 4*x_1 + 2*x_3 <= [ 19 ] + cons[n - 1] = 4 * x[0] + 2 * x[2]; + } + + if(rankdefic_eq) { + // 4*x_1 + 2*x_2 == 10 + cons[m - 1] = 4 * x[0] + 2 * x[1]; + } + }); + + RAJA::forall( + RAJA::RangeSegment(2, n - 1), + RAJA_LAMBDA(RAJA::Index_type i) { + // --- constraint 3 body ---> 2*x_1 + 0.5*x_i, for i>=4 + cons[i] = 2 * x[0] + 0.5 * x[i + 1]; + }); return true; } @@ -340,145 +326,137 @@ bool SparseRajaEx2::eval_Jac_cons(const size_type& n, index_type* jJacS, double* MJacS) { - assert(n==n_vars_); assert(m==n_cons_); - assert(n>=3); + assert(n == n_vars_); + assert(m == n_cons_); + assert(n >= 3); - assert(nnzJacS == 4 + 2*(n-3) + 2*rankdefic_eq_ + 2*rankdefic_ineq_); + assert(nnzJacS == 4 + 2 * (n - 3) + 2 * rankdefic_eq_ + 2 * rankdefic_ineq_); // RAJA doesn't like member objects bool rankdefic_eq = rankdefic_eq_; bool rankdefic_ineq = rankdefic_ineq_; - if(iJacS !=nullptr && jJacS != nullptr) { + if(iJacS != nullptr && jJacS != nullptr) { // serial part - RAJA::forall(RAJA::RangeSegment(0, 1), - RAJA_LAMBDA(RAJA::Index_type itrow) - { - // --- constraint 1 body ---> 4*x_1 + 2*x_2 == 10 - iJacS[0] = 0; - jJacS[0] = 0; - iJacS[1] = 0; - jJacS[1] = 1; - // --- constraint 2 body ---> 2*x_1 + x_3 - iJacS[2] = 1; - jJacS[2] = 0; - iJacS[3] = 1; - jJacS[3] = 2; - - if(rankdefic_ineq) { - // [-inf] <= 4*x_1 + 2*x_3 <= [ 19 ] - iJacS[2*n-2] = n-1; - jJacS[2*n-2] = 0; - iJacS[2*n-1] = n-1; - jJacS[2*n-1] = 2; - } - - if(rankdefic_eq) { - // 4*x_1 + 2*x_2 == 10 - iJacS[2*m-2] = m-1; - jJacS[2*m-2] = 0; - iJacS[2*m-1] = m-1; - jJacS[2*m-1] = 1; - } - }); - - RAJA::forall(RAJA::RangeSegment(2, n-1), - RAJA_LAMBDA(RAJA::Index_type itrow) - { - // --- constraint 3 body ---> 2*x_1 + 0.5*x_i, for i>=4 - iJacS[2*itrow] = itrow; - jJacS[2*itrow] = 0; - iJacS[2*itrow+1] = itrow; - jJacS[2*itrow+1] = itrow+1; - }); - + RAJA::forall( + RAJA::RangeSegment(0, 1), + RAJA_LAMBDA(RAJA::Index_type itrow) { + // --- constraint 1 body ---> 4*x_1 + 2*x_2 == 10 + iJacS[0] = 0; + jJacS[0] = 0; + iJacS[1] = 0; + jJacS[1] = 1; + // --- constraint 2 body ---> 2*x_1 + x_3 + iJacS[2] = 1; + jJacS[2] = 0; + iJacS[3] = 1; + jJacS[3] = 2; + + if(rankdefic_ineq) { + // [-inf] <= 4*x_1 + 2*x_3 <= [ 19 ] + iJacS[2 * n - 2] = n - 1; + jJacS[2 * n - 2] = 0; + iJacS[2 * n - 1] = n - 1; + jJacS[2 * n - 1] = 2; + } + + if(rankdefic_eq) { + // 4*x_1 + 2*x_2 == 10 + iJacS[2 * m - 2] = m - 1; + jJacS[2 * m - 2] = 0; + iJacS[2 * m - 1] = m - 1; + jJacS[2 * m - 1] = 1; + } + }); + + RAJA::forall( + RAJA::RangeSegment(2, n - 1), + RAJA_LAMBDA(RAJA::Index_type itrow) { + // --- constraint 3 body ---> 2*x_1 + 0.5*x_i, for i>=4 + iJacS[2 * itrow] = itrow; + jJacS[2 * itrow] = 0; + iJacS[2 * itrow + 1] = itrow; + jJacS[2 * itrow + 1] = itrow + 1; + }); } if(MJacS != nullptr) { // serial part - RAJA::forall(RAJA::RangeSegment(0, 1), - RAJA_LAMBDA(RAJA::Index_type itrow) - { - // --- constraint 1 body ---> 4*x_1 + 2*x_2 == 10 - MJacS[0] = 4.0; - MJacS[1] = 2.0; - // --- constraint 2 body ---> 2*x_1 + x_3 - MJacS[2] = 2.0; - MJacS[3] = 1.0; - - if(rankdefic_ineq) { - // [-inf] <= 4*x_1 + 2*x_3 <= [ 19 ] - MJacS[2*n-2] = 4.0; - MJacS[2*n-1] = 2.0; - } - - if(rankdefic_eq) { - // 4*x_1 + 2*x_2 == 10 - MJacS[2*m-2] = 4.0; - MJacS[2*m-1] = 2.0; - } - }); - - RAJA::forall(RAJA::RangeSegment(2, n-1), - RAJA_LAMBDA(RAJA::Index_type itrow) - { - // --- constraint 3 body ---> 2*x_1 + 0.5*x_i, for i>=4 - MJacS[2*itrow] = 2.0; - MJacS[2*itrow+1] = 0.5; - }); - + RAJA::forall( + RAJA::RangeSegment(0, 1), + RAJA_LAMBDA(RAJA::Index_type itrow) { + // --- constraint 1 body ---> 4*x_1 + 2*x_2 == 10 + MJacS[0] = 4.0; + MJacS[1] = 2.0; + // --- constraint 2 body ---> 2*x_1 + x_3 + MJacS[2] = 2.0; + MJacS[3] = 1.0; + + if(rankdefic_ineq) { + // [-inf] <= 4*x_1 + 2*x_3 <= [ 19 ] + MJacS[2 * n - 2] = 4.0; + MJacS[2 * n - 1] = 2.0; + } + + if(rankdefic_eq) { + // 4*x_1 + 2*x_2 == 10 + MJacS[2 * m - 2] = 4.0; + MJacS[2 * m - 1] = 2.0; + } + }); + + RAJA::forall( + RAJA::RangeSegment(2, n - 1), + RAJA_LAMBDA(RAJA::Index_type itrow) { + // --- constraint 3 body ---> 2*x_1 + 0.5*x_i, for i>=4 + MJacS[2 * itrow] = 2.0; + MJacS[2 * itrow + 1] = 0.5; + }); } return true; } bool SparseRajaEx2::eval_Hess_Lagr(const size_type& n, - const size_type& m, - const double* x, - bool new_x, - const double& obj_factor, - const double* lambda, - bool new_lambda, - const size_type& nnzHSS, - index_type* iHSS, - index_type* jHSS, - double* MHSS) + const size_type& m, + const double* x, + bool new_x, + const double& obj_factor, + const double* lambda, + bool new_lambda, + const size_type& nnzHSS, + index_type* iHSS, + index_type* jHSS, + double* MHSS) { - //Note: lambda is not used since all the constraints are linear and, therefore, do - //not contribute to the Hessian of the Lagrangian + // Note: lambda is not used since all the constraints are linear and, therefore, do + // not contribute to the Hessian of the Lagrangian assert(nnzHSS == n); - if(iHSS!=nullptr && jHSS!=nullptr) { - RAJA::forall(RAJA::RangeSegment(0, n), - RAJA_LAMBDA(RAJA::Index_type i) - { - iHSS[i] = i; - jHSS[i] = i; - }); + if(iHSS != nullptr && jHSS != nullptr) { + RAJA::forall( + RAJA::RangeSegment(0, n), + RAJA_LAMBDA(RAJA::Index_type i) { + iHSS[i] = i; + jHSS[i] = i; + }); } - int convex_obj = (int) convex_obj_; + int convex_obj = (int)convex_obj_; double scal_neg_obj = scal_neg_obj_; - if(MHSS!=nullptr) { - RAJA::forall(RAJA::RangeSegment(0, n), - RAJA_LAMBDA(RAJA::Index_type i) - { - MHSS[i] = obj_factor * ( (2*convex_obj-1) * scal_neg_obj * 3 * std::pow(x[i]-1., 2) + 1); - }); + if(MHSS != nullptr) { + RAJA::forall( + RAJA::RangeSegment(0, n), + RAJA_LAMBDA(RAJA::Index_type i) { + MHSS[i] = obj_factor * ((2 * convex_obj - 1) * scal_neg_obj * 3 * std::pow(x[i] - 1., 2) + 1); + }); } return true; } - - - bool SparseRajaEx2::get_starting_point(const size_type& n, double* x0) { - assert(n==n_vars_); - RAJA::forall(RAJA::RangeSegment(0, n), - RAJA_LAMBDA(RAJA::Index_type i) - { - x0[i] = 0.0; - }); + assert(n == n_vars_); + RAJA::forall(RAJA::RangeSegment(0, n), RAJA_LAMBDA(RAJA::Index_type i) { x0[i] = 0.0; }); return true; } diff --git a/src/Drivers/Sparse/NlpSparseRajaEx2.hpp b/src/Drivers/Sparse/NlpSparseRajaEx2.hpp index 44382c5ab..6489f4c61 100644 --- a/src/Drivers/Sparse/NlpSparseRajaEx2.hpp +++ b/src/Drivers/Sparse/NlpSparseRajaEx2.hpp @@ -47,7 +47,7 @@ /** * @file NlpSparseRajaEx2.hpp - * + * * @author Nai-Yuan Chiang , LLNL * */ @@ -66,7 +66,7 @@ #endif #include -#include //for memcpy +#include //for memcpy #include #include @@ -96,14 +96,14 @@ using index_type = hiop::index_type; * scale_quartic_obj_term: scaling factor for the quartic term in the objective (1.0 by default). * * @note All pointers marked as "managed by Umpire" are allocated by HiOp using the - * Umpire's API. They all are addresses in the same memory space; however, the memory - * space can be host (typically CPU), device (typically GPU), or unified memory (um) - * spaces as per Umpire specification. The selection of the memory space is done via - * the option "mem_space" of HiOp. It is the responsibility of the implementers of - * the HiOp's interfaces (such as the hiop::hiopInterfaceMDS used in this example) to - * work with the "managed by Umpire" pointers in the same memory space as the one + * Umpire's API. They all are addresses in the same memory space; however, the memory + * space can be host (typically CPU), device (typically GPU), or unified memory (um) + * spaces as per Umpire specification. The selection of the memory space is done via + * the option "mem_space" of HiOp. It is the responsibility of the implementers of + * the HiOp's interfaces (such as the hiop::hiopInterfaceMDS used in this example) to + * work with the "managed by Umpire" pointers in the same memory space as the one * specified by the "mem_space" option. - * + * */ class SparseRajaEx2 : public hiop::hiopInterfaceSparse { @@ -122,19 +122,19 @@ class SparseRajaEx2 : public hiop::hiopInterfaceSparse virtual bool get_prob_sizes(size_type& n, size_type& m); /** - * @brief Get types and bounds on the variables. - * + * @brief Get types and bounds on the variables. + * * @param[in] n number of variables * @param[out] ixlow array with lower bounds (managed by Umpire) * @param[out] ixupp array with upper bounds (managed by Umpire) * @param[out] type array with the variable types (on host) */ - virtual bool get_vars_info(const size_type& n, double *xlow, double* xupp, NonlinearityType* type); + virtual bool get_vars_info(const size_type& n, double* xlow, double* xupp, NonlinearityType* type); /** * Get types and bounds corresponding to constraints. An equality constraint is specified * by setting the lower and upper bounds equal. - * + * * @param[in] m Number of constraints * @param[out] iclow array with lower bounds (managed by Umpire) * @param[out] icupp array with upper bounds (managed by Umpire) @@ -146,10 +146,10 @@ class SparseRajaEx2 : public hiop::hiopInterfaceSparse * Returns the sizes and number of nonzeros of the sparse blocks * * @param[out] nx number of variables - * @param[out] nnz_sparse_Jace number of nonzeros in the Jacobian of the equalities w.r.t. - * sparse variables - * @param[out] nnz_sparse_Jaci number of nonzeros in the Jacobian of the inequalities w.r.t. - * sparse variables + * @param[out] nnz_sparse_Jace number of nonzeros in the Jacobian of the equalities w.r.t. + * sparse variables + * @param[out] nnz_sparse_Jaci number of nonzeros in the Jacobian of the inequalities w.r.t. + * sparse variables * @param[out] nnz_sparse_Hess_Lagr number of nonzeros in the (sparse) Hessian */ virtual bool get_sparse_blocks_info(index_type& nx, @@ -158,12 +158,12 @@ class SparseRajaEx2 : public hiop::hiopInterfaceSparse index_type& nnz_sparse_Hess_Lagr); /** - * Evaluate objective. - * + * Evaluate objective. + * * @param[in] n number of variables - * @param[in] x array with the optimization variables or point at which to evaluate + * @param[in] x array with the optimization variables or point at which to evaluate * (managed by Umpire) - * @param[in] new_x indicates whether any of the other eval functions have been + * @param[in] new_x indicates whether any of the other eval functions have been * evaluated previously (false) or not (true) at x * @param[out] obj_value the objective function value. */ @@ -174,10 +174,10 @@ class SparseRajaEx2 : public hiop::hiopInterfaceSparse * * @param[in] num_cons number of constraints to evaluate (size of idx_cons array) * @param[in] idx_cons indexes of the constraints to evaluate (managed by Umpire) - * @param[in] x the point at which to evaluate (managed by Umpire) - * @param[in] new_x indicates whether any of the other eval functions have been evaluated + * @param[in] x the point at which to evaluate (managed by Umpire) + * @param[in] new_x indicates whether any of the other eval functions have been evaluated * previously (false) or not (true) at x - * @param[out] cons array with values of the constraints (managed by Umpire, size num_cons) + * @param[out] cons array with values of the constraints (managed by Umpire, size num_cons) */ virtual bool eval_cons(const size_type& n, const size_type& m, @@ -187,42 +187,35 @@ class SparseRajaEx2 : public hiop::hiopInterfaceSparse bool new_x, double* cons) { - //return false so that HiOp will rely on the constraint evaluator defined below + // return false so that HiOp will rely on the constraint evaluator defined below return false; } - virtual bool eval_cons(const size_type& n, - const size_type& m, - const double* x, - bool new_x, - double* cons); + virtual bool eval_cons(const size_type& n, const size_type& m, const double* x, bool new_x, double* cons); /** - * Evaluation of the gradient of the objective. + * Evaluation of the gradient of the objective. * * @param[in] n number of variables * @param[in] x array with the optimization variables or point at which to evaluate * (managed by Umpire) - * @param[in] new_x indicates whether any of the other eval functions have been evaluated + * @param[in] new_x indicates whether any of the other eval functions have been evaluated * previously (false) or not (true) at x - * @param[out] gradf array with the values of the gradient (managed by Umpire) + * @param[out] gradf array with the values of the gradient (managed by Umpire) */ - virtual bool eval_grad_f(const size_type& n, - const double* x, - bool new_x, - double* gradf); + virtual bool eval_grad_f(const size_type& n, const double* x, bool new_x, double* gradf); /** - * Evaluates the Jacobian of the constraints. Please check the user manual and the - * documentation of hiop::hiopInterfaceMDS for a detailed discussion of how the last - * four arguments are expected to behave. + * Evaluates the Jacobian of the constraints. Please check the user manual and the + * documentation of hiop::hiopInterfaceMDS for a detailed discussion of how the last + * four arguments are expected to behave. * * @param[in] n number of variables * @param[in] m Number of constraints * @param[in] num_cons number of constraints to evaluate (size of idx_cons array) * @param[in] idx_cons indexes of the constraints to evaluate (managed by Umpire) * @param[in] x the point at which to evaluate (managed by Umpire) - * @param[in] new_x indicates whether any of the other eval functions have been evaluated + * @param[in] new_x indicates whether any of the other eval functions have been evaluated * previously (false) or not (true) at x * @param[in] nnzJacS number of nonzeros in the sparse Jacobian * @param[out] iJacS array of row indexes in the sparse Jacobian (managed by Umpire) @@ -231,7 +224,7 @@ class SparseRajaEx2 : public hiop::hiopInterfaceSparse */ virtual bool eval_Jac_cons(const size_type& n, const size_type& m, - const size_type& num_cons, + const size_type& num_cons, const index_type* idx_cons, const double* x, bool new_x, @@ -240,7 +233,7 @@ class SparseRajaEx2 : public hiop::hiopInterfaceSparse index_type* jJacS, double* MJacS) { - //return false so that HiOp will rely on the Jacobian evaluator defined below + // return false so that HiOp will rely on the Jacobian evaluator defined below return false; } @@ -254,23 +247,22 @@ class SparseRajaEx2 : public hiop::hiopInterfaceSparse index_type* jJacS, double* MJacS); - /** - * Evaluate the Hessian of the Lagrangian function. Please consult the user manual for a + * Evaluate the Hessian of the Lagrangian function. Please consult the user manual for a * detailed discussion of the form the Lagrangian function takes. - * + * * @param[in] n number of variables * @param[in] m Number of constraints * @param[in] x the point at which to evaluate (managed by Umpire) - * @param[in] new_x indicates whether any of the other eval functions have been evaluated + * @param[in] new_x indicates whether any of the other eval functions have been evaluated * previously (false) or not (true) at x * @param[in] obj_factor scalar that multiplies the objective term in the Lagrangian function * @param[in] lambda array with values of the multipliers used by the Lagrangian function * @param[in] new_lambda indicates whether lambda values changed since last call - * @param[in] nnzHSS number of nonzeros in the (sparse) Hessian w.r.t. sparse variables + * @param[in] nnzHSS number of nonzeros in the (sparse) Hessian w.r.t. sparse variables * @param[out] iHSS array of row indexes in the Hessian w.r.t. sparse variables * (managed by Umpire) - * @param[out] jHSS array of column indexes in the Hessian w.r.t. sparse variables + * @param[out] jHSS array of column indexes in the Hessian w.r.t. sparse variables * (managed by Umpire) * @param[out] MHSS array of nonzero values in the Hessian w.r.t. sparse variables * (managed by Umpire) @@ -289,11 +281,11 @@ class SparseRajaEx2 : public hiop::hiopInterfaceSparse /** * Implementation of the primal starting point specification - * + * * @param[in] n number of variables * @param[in] x0 the primal starting point(managed by Umpire) */ - virtual bool get_starting_point(const size_type&n, double* x0); + virtual bool get_starting_point(const size_type& n, double* x0); private: int n_vars_; @@ -302,7 +294,7 @@ class SparseRajaEx2 : public hiop::hiopInterfaceSparse bool rankdefic_eq_; bool rankdefic_ineq_; double scal_neg_obj_; - + std::string mem_space_; }; diff --git a/src/Drivers/Sparse/NlpSparseRajaEx2Driver.cpp b/src/Drivers/Sparse/NlpSparseRajaEx2Driver.cpp index a2455ebe1..d82aa6622 100644 --- a/src/Drivers/Sparse/NlpSparseRajaEx2Driver.cpp +++ b/src/Drivers/Sparse/NlpSparseRajaEx2Driver.cpp @@ -14,7 +14,7 @@ using namespace hiop; static bool self_check(size_type n, double obj_value, const bool inertia_free); static bool parse_arguments(int argc, - char **argv, + char** argv, size_type& n, bool& self_check, bool& inertia_free, @@ -33,113 +33,112 @@ static bool parse_arguments(int argc, use_ginkgo_cuda = false; use_ginkgo_hip = false; switch(argc) { - case 1: - //no arguments - return true; - break; - case 5: //4 arguments + case 1: + // no arguments + return true; + break; + case 5: // 4 arguments { if(std::string(argv[4]) == "-selfcheck") { - self_check = true; + self_check = true; } else if(std::string(argv[4]) == "-inertiafree") { inertia_free = true; } else if(std::string(argv[4]) == "-resolve_cuda_glu") { use_resolve_cuda_glu = true; } else if(std::string(argv[4]) == "-resolve_cuda_rf") { use_resolve_cuda_rf = true; - } else if(std::string(argv[4]) == "-ginkgo"){ + } else if(std::string(argv[4]) == "-ginkgo") { use_ginkgo = true; - } else if(std::string(argv[4]) == "-ginkgo_cuda"){ + } else if(std::string(argv[4]) == "-ginkgo_cuda") { use_ginkgo = true; use_ginkgo_cuda = true; - } else if(std::string(argv[4]) == "-ginkgo_hip"){ + } else if(std::string(argv[4]) == "-ginkgo_hip") { use_ginkgo = true; use_ginkgo_hip = true; } else { n = std::atoi(argv[4]); - if(n<=0) { + if(n <= 0) { return false; } } } - case 4: //3 arguments + case 4: // 3 arguments { if(std::string(argv[3]) == "-selfcheck") { - self_check = true; + self_check = true; } else if(std::string(argv[3]) == "-inertiafree") { inertia_free = true; } else if(std::string(argv[3]) == "-resolve_cuda_glu") { use_resolve_cuda_glu = true; } else if(std::string(argv[3]) == "-resolve_cuda_rf") { use_resolve_cuda_rf = true; - } else if(std::string(argv[3]) == "-ginkgo"){ + } else if(std::string(argv[3]) == "-ginkgo") { use_ginkgo = true; - } else if(std::string(argv[3]) == "-ginkgo_cuda"){ + } else if(std::string(argv[3]) == "-ginkgo_cuda") { use_ginkgo = true; use_ginkgo_cuda = true; - } else if(std::string(argv[3]) == "-ginkgo_hip"){ + } else if(std::string(argv[3]) == "-ginkgo_hip") { use_ginkgo = true; use_ginkgo_hip = true; } else { n = std::atoi(argv[3]); - if(n<=0) { + if(n <= 0) { return false; } } } - case 3: //2 arguments + case 3: // 2 arguments { if(std::string(argv[2]) == "-selfcheck") { - self_check = true; + self_check = true; } else if(std::string(argv[2]) == "-inertiafree") { inertia_free = true; } else if(std::string(argv[2]) == "-resolve_cuda_glu") { use_resolve_cuda_glu = true; } else if(std::string(argv[2]) == "-resolve_cuda_rf") { use_resolve_cuda_rf = true; - } else if(std::string(argv[2]) == "-ginkgo"){ + } else if(std::string(argv[2]) == "-ginkgo") { use_ginkgo = true; - } else if(std::string(argv[2]) == "-ginkgo_cuda"){ + } else if(std::string(argv[2]) == "-ginkgo_cuda") { use_ginkgo = true; use_ginkgo_cuda = true; - } else if(std::string(argv[2]) == "-ginkgo_hip"){ + } else if(std::string(argv[2]) == "-ginkgo_hip") { use_ginkgo = true; use_ginkgo_hip = true; } else { n = std::atoi(argv[2]); - if(n<=0) { + if(n <= 0) { return false; } } } - case 2: //1 argument + case 2: // 1 argument { if(std::string(argv[1]) == "-selfcheck") { - self_check = true; + self_check = true; } else if(std::string(argv[1]) == "-inertiafree") { inertia_free = true; } else if(std::string(argv[1]) == "-resolve_cuda_glu") { use_resolve_cuda_glu = true; } else if(std::string(argv[1]) == "-resolve_cuda_rf") { use_resolve_cuda_rf = true; - } else if(std::string(argv[1]) == "-ginkgo"){ + } else if(std::string(argv[1]) == "-ginkgo") { use_ginkgo = true; - } else if(std::string(argv[1]) == "-ginkgo_cuda"){ + } else if(std::string(argv[1]) == "-ginkgo_cuda") { use_ginkgo = true; use_ginkgo_cuda = true; - } else if(std::string(argv[1]) == "-ginkgo_hip"){ + } else if(std::string(argv[1]) == "-ginkgo_hip") { use_ginkgo = true; use_ginkgo_hip = true; } else { n = std::atoi(argv[1]); - if(n<=0) { + if(n <= 0) { return false; } } - } - break; - default: - return false; // 4 or more arguments + } break; + default: + return false; // 4 or more arguments } // Currently only CUDA backend for ReSolve is available. Unselect ReSolve if CUDA is not enabled @@ -195,24 +194,30 @@ static void usage(const char* exeName) printf("Arguments:\n"); printf(" 'problem_size': number of decision variables [optional, default is 50]\n"); printf(" '-inertiafree': indicate if inertia free approach should be used [optional]\n"); - printf(" '-selfcheck': compares the optimal objective with a previously saved value for the " - "problem specified by 'problem_size'. [optional]\n"); - printf(" '-use_resolve_cuda_glu': use ReSolve linear solver with KLU factorization and cusolverGLU refactorization [optional]\n"); - printf(" '-use_resolve_cuda_rf' : use ReSolve linear solver with KLU factorization and cusolverRf refactorization [optional]\n"); + printf( + " '-selfcheck': compares the optimal objective with a previously saved value for the " + "problem specified by 'problem_size'. [optional]\n"); + printf( + " '-use_resolve_cuda_glu': use ReSolve linear solver with KLU factorization and cusolverGLU refactorization " + "[optional]\n"); + printf( + " '-use_resolve_cuda_rf' : use ReSolve linear solver with KLU factorization and cusolverRf refactorization " + "[optional]\n"); printf(" '-ginkgo': use GINKGO linear solver [optional]\n"); } - -int main(int argc, char **argv) +int main(int argc, char** argv) { - int rank=0; + int rank = 0; #ifdef HIOP_USE_MPI MPI_Init(&argc, &argv); int comm_size; - int ierr = MPI_Comm_size(MPI_COMM_WORLD, &comm_size); assert(MPI_SUCCESS==ierr); + int ierr = MPI_Comm_size(MPI_COMM_WORLD, &comm_size); + assert(MPI_SUCCESS == ierr); if(comm_size != 1) { - printf("[error] driver detected more than one rank but the driver should be run " - "in serial only; will exit\n"); + printf( + "[error] driver detected more than one rank but the driver should be run " + "in serial only; will exit\n"); MPI_Finalize(); return 1; } @@ -229,11 +234,20 @@ int main(int argc, char **argv) size_type n = 50; bool inertia_free = false; bool use_resolve_cuda_glu = false; - bool use_resolve_cuda_rf = false; - bool use_ginkgo = false; + bool use_resolve_cuda_rf = false; + bool use_ginkgo = false; bool use_ginkgo_cuda = false; - bool use_ginkgo_hip = false; - if(!parse_arguments(argc, argv, n, selfCheck, inertia_free, use_resolve_cuda_glu, use_resolve_cuda_rf, use_ginkgo, use_ginkgo_cuda, use_ginkgo_hip)) { + bool use_ginkgo_hip = false; + if(!parse_arguments(argc, + argv, + n, + selfCheck, + inertia_free, + use_resolve_cuda_glu, + use_resolve_cuda_rf, + use_ginkgo, + use_ginkgo_cuda, + use_ginkgo_hip)) { usage(argv[0]); #ifdef HIOP_USE_MPI MPI_Finalize(); @@ -246,7 +260,7 @@ int main(int argc, char **argv) bool rankdefic_Jac_ineq = true; double scal_neg_obj = 0.1; - //first test + // first test { SparseRajaEx2 nlp_interface(mem_space, n, convex_obj, rankdefic_Jac_eq, rankdefic_Jac_ineq, scal_neg_obj); hiopNlpSparse nlp(nlp_interface); @@ -254,8 +268,8 @@ int main(int argc, char **argv) nlp.options->SetStringValue("KKTLinsys", "xdycyd"); // only support cusolverLU right now, 2023.02.28 - //lsq initialization of the duals fails for this example since the Jacobian is rank deficient - //use zero initialization + // lsq initialization of the duals fails for this example since the Jacobian is rank deficient + // use zero initialization nlp.options->SetStringValue("linear_solver_sparse", "resolve"); if(use_resolve_cuda_rf) { nlp.options->SetStringValue("resolve_refactorization", "rf"); @@ -269,11 +283,11 @@ int main(int argc, char **argv) hiopAlgFilterIPMNewton solver(&nlp); hiopSolveStatus status = solver.run(); - + double obj_value = solver.getObjective(); - - if(status<0) { - if(rank==0) { + + if(status < 0) { + if(rank == 0) { printf("solver returned negative solve status: %d (with objective is %18.12e)\n", status, obj_value); } #ifdef HIOP_USE_MPI @@ -282,7 +296,7 @@ int main(int argc, char **argv) return -1; } - //this is used for "regression" testing when the driver is called with -selfcheck + // this is used for "regression" testing when the driver is called with -selfcheck if(selfCheck) { if(!self_check(n, obj_value, inertia_free)) { #ifdef HIOP_USE_MPI @@ -291,13 +305,12 @@ int main(int argc, char **argv) return -1; } } else { - if(rank==0) { + if(rank == 0) { printf("Optimal objective: %22.14e. Solver status: %d\n", obj_value, status); } } } - - + #ifdef HIOP_USE_MPI MPI_Finalize(); #endif @@ -305,31 +318,36 @@ int main(int argc, char **argv) return 0; } - static bool self_check(size_type n, double objval, const bool inertia_free) { -#define num_n_saved 3 //keep this is sync with n_saved and objval_saved +#define num_n_saved 3 // keep this is sync with n_saved and objval_saved const size_type n_saved[] = {50, 500, 10000}; - const double objval_saved[] = { 8.7754974e+00, 6.4322371e+01, 1.2369786e+03}; + const double objval_saved[] = {8.7754974e+00, 6.4322371e+01, 1.2369786e+03}; #define relerr 1e-6 - bool found=false; - for(int it=0; it relerr) { - printf("selfcheck failure. Objective (%18.12e) does not agree (%d digits) with the saved value (%18.12e) for n=%d.\n", - objval, -(int)log10(relerr), objval_saved[it], n); + bool found = false; + for(int it = 0; it < num_n_saved; it++) { + if(n_saved[it] == n) { + found = true; + if(fabs((objval_saved[it] - objval) / (1 + objval_saved[it])) > relerr) { + printf( + "selfcheck failure. Objective (%18.12e) does not agree (%d digits) with the saved value (%18.12e) for n=%d.\n", + objval, + -(int)log10(relerr), + objval_saved[it], + n); return false; } else { - printf("selfcheck success (%d digits)\n", -(int)log10(relerr)); + printf("selfcheck success (%d digits)\n", -(int)log10(relerr)); } break; } } if(!found) { - printf("selfcheck: driver does not have the objective for n=%d saved. BTW, obj=%18.12e was obtained for this n.\n", n, objval); + printf("selfcheck: driver does not have the objective for n=%d saved. BTW, obj=%18.12e was obtained for this n.\n", + n, + objval); return false; } diff --git a/src/ExecBackends/ExecPoliciesRajaCudaImpl.hpp b/src/ExecBackends/ExecPoliciesRajaCudaImpl.hpp index 984b07a4e..27e27f8ea 100644 --- a/src/ExecBackends/ExecPoliciesRajaCudaImpl.hpp +++ b/src/ExecBackends/ExecPoliciesRajaCudaImpl.hpp @@ -2,47 +2,47 @@ // Produced at the Lawrence Livermore National Laboratory (LLNL). // LLNL-CODE-742473. All rights reserved. // -// This file is part of HiOp. For details, see https://github.com/LLNL/hiop. HiOp -// is released under the BSD 3-clause license (https://opensource.org/licenses/BSD-3-Clause). +// This file is part of HiOp. For details, see https://github.com/LLNL/hiop. HiOp +// is released under the BSD 3-clause license (https://opensource.org/licenses/BSD-3-Clause). // Please also read "Additional BSD Notice" below. // -// Redistribution and use in source and binary forms, with or without modification, +// Redistribution and use in source and binary forms, with or without modification, // are permitted provided that the following conditions are met: -// i. Redistributions of source code must retain the above copyright notice, this list +// i. Redistributions of source code must retain the above copyright notice, this list // of conditions and the disclaimer below. -// ii. Redistributions in binary form must reproduce the above copyright notice, -// this list of conditions and the disclaimer (as noted below) in the documentation and/or +// ii. Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the disclaimer (as noted below) in the documentation and/or // other materials provided with the distribution. -// iii. Neither the name of the LLNS/LLNL nor the names of its contributors may be used to -// endorse or promote products derived from this software without specific prior written +// iii. Neither the name of the LLNS/LLNL nor the names of its contributors may be used to +// endorse or promote products derived from this software without specific prior written // permission. // -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES -// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT -// SHALL LAWRENCE LIVERMORE NATIONAL SECURITY, LLC, THE U.S. DEPARTMENT OF ENERGY OR -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS -// OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED -// AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT +// SHALL LAWRENCE LIVERMORE NATIONAL SECURITY, LLC, THE U.S. DEPARTMENT OF ENERGY OR +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +// OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED +// AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, // EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Additional BSD Notice -// 1. This notice is required to be provided under our contract with the U.S. Department -// of Energy (DOE). This work was produced at Lawrence Livermore National Laboratory under +// 1. This notice is required to be provided under our contract with the U.S. Department +// of Energy (DOE). This work was produced at Lawrence Livermore National Laboratory under // Contract No. DE-AC52-07NA27344 with the DOE. -// 2. Neither the United States Government nor Lawrence Livermore National Security, LLC -// nor any of their employees, makes any warranty, express or implied, or assumes any -// liability or responsibility for the accuracy, completeness, or usefulness of any +// 2. Neither the United States Government nor Lawrence Livermore National Security, LLC +// nor any of their employees, makes any warranty, express or implied, or assumes any +// liability or responsibility for the accuracy, completeness, or usefulness of any // information, apparatus, product, or process disclosed, or represents that its use would // not infringe privately-owned rights. -// 3. Also, reference herein to any specific commercial products, process, or services by -// trade name, trademark, manufacturer or otherwise does not necessarily constitute or -// imply its endorsement, recommendation, or favoring by the United States Government or -// Lawrence Livermore National Security, LLC. The views and opinions of authors expressed -// herein do not necessarily state or reflect those of the United States Government or -// Lawrence Livermore National Security, LLC, and shall not be used for advertising or +// 3. Also, reference herein to any specific commercial products, process, or services by +// trade name, trademark, manufacturer or otherwise does not necessarily constitute or +// imply its endorsement, recommendation, or favoring by the United States Government or +// Lawrence Livermore National Security, LLC. The views and opinions of authors expressed +// herein do not necessarily state or reflect those of the United States Government or +// Lawrence Livermore National Security, LLC, and shall not be used for advertising or // product endorsement purposes. /** @@ -54,7 +54,7 @@ */ /** - * This file contains CUDA RAJA policies. Should be generally included only in CUDA + * This file contains CUDA RAJA policies. Should be generally included only in CUDA * compilation units. */ @@ -71,13 +71,13 @@ namespace hiop { #define RAJA_LAMBDA [=] __device__ - + template<> struct ExecRajaPoliciesBackend { static constexpr unsigned short int HIOP_RAJA_GPU_BLOCK_SIZE = 128; - - using hiop_raja_exec = RAJA::cuda_exec; + + using hiop_raja_exec = RAJA::cuda_exec; using hiop_raja_reduce = RAJA::cuda_reduce; using hiop_raja_atomic = RAJA::cuda_atomic; @@ -87,18 +87,10 @@ struct ExecRajaPoliciesBackend template using hiop_kernel = RAJA::statement::CudaKernel; - using matrix_exec = - RAJA::KernelPolicy< - hiop_kernel< - RAJA::statement::For<1, hiop_block_x_loop, - RAJA::statement::For<0, hiop_thread_x_loop, - RAJA::statement::Lambda<0> - > - > - > - >; - + using matrix_exec = RAJA::KernelPolicy > > > >; }; -} //end of namespace -#endif //defined(HIOP_USE_RAJA) && defined(HIOP_USE_CUDA) +} // namespace hiop +#endif // defined(HIOP_USE_RAJA) && defined(HIOP_USE_CUDA) #endif diff --git a/src/ExecBackends/ExecPoliciesRajaHipImpl.hpp b/src/ExecBackends/ExecPoliciesRajaHipImpl.hpp index d5b1acdec..36ce85a8a 100644 --- a/src/ExecBackends/ExecPoliciesRajaHipImpl.hpp +++ b/src/ExecBackends/ExecPoliciesRajaHipImpl.hpp @@ -2,47 +2,47 @@ // Produced at the Lawrence Livermore National Laboratory (LLNL). // LLNL-CODE-742473. All rights reserved. // -// This file is part of HiOp. For details, see https://github.com/LLNL/hiop. HiOp -// is released under the BSD 3-clause license (https://opensource.org/licenses/BSD-3-Clause). +// This file is part of HiOp. For details, see https://github.com/LLNL/hiop. HiOp +// is released under the BSD 3-clause license (https://opensource.org/licenses/BSD-3-Clause). // Please also read "Additional BSD Notice" below. // -// Redistribution and use in source and binary forms, with or without modification, +// Redistribution and use in source and binary forms, with or without modification, // are permitted provided that the following conditions are met: -// i. Redistributions of source code must retain the above copyright notice, this list +// i. Redistributions of source code must retain the above copyright notice, this list // of conditions and the disclaimer below. -// ii. Redistributions in binary form must reproduce the above copyright notice, -// this list of conditions and the disclaimer (as noted below) in the documentation and/or +// ii. Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the disclaimer (as noted below) in the documentation and/or // other materials provided with the distribution. -// iii. Neither the name of the LLNS/LLNL nor the names of its contributors may be used to -// endorse or promote products derived from this software without specific prior written +// iii. Neither the name of the LLNS/LLNL nor the names of its contributors may be used to +// endorse or promote products derived from this software without specific prior written // permission. // -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES -// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT -// SHALL LAWRENCE LIVERMORE NATIONAL SECURITY, LLC, THE U.S. DEPARTMENT OF ENERGY OR -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS -// OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED -// AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT +// SHALL LAWRENCE LIVERMORE NATIONAL SECURITY, LLC, THE U.S. DEPARTMENT OF ENERGY OR +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +// OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED +// AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, // EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Additional BSD Notice -// 1. This notice is required to be provided under our contract with the U.S. Department -// of Energy (DOE). This work was produced at Lawrence Livermore National Laboratory under +// 1. This notice is required to be provided under our contract with the U.S. Department +// of Energy (DOE). This work was produced at Lawrence Livermore National Laboratory under // Contract No. DE-AC52-07NA27344 with the DOE. -// 2. Neither the United States Government nor Lawrence Livermore National Security, LLC -// nor any of their employees, makes any warranty, express or implied, or assumes any -// liability or responsibility for the accuracy, completeness, or usefulness of any +// 2. Neither the United States Government nor Lawrence Livermore National Security, LLC +// nor any of their employees, makes any warranty, express or implied, or assumes any +// liability or responsibility for the accuracy, completeness, or usefulness of any // information, apparatus, product, or process disclosed, or represents that its use would // not infringe privately-owned rights. -// 3. Also, reference herein to any specific commercial products, process, or services by -// trade name, trademark, manufacturer or otherwise does not necessarily constitute or -// imply its endorsement, recommendation, or favoring by the United States Government or -// Lawrence Livermore National Security, LLC. The views and opinions of authors expressed -// herein do not necessarily state or reflect those of the United States Government or -// Lawrence Livermore National Security, LLC, and shall not be used for advertising or +// 3. Also, reference herein to any specific commercial products, process, or services by +// trade name, trademark, manufacturer or otherwise does not necessarily constitute or +// imply its endorsement, recommendation, or favoring by the United States Government or +// Lawrence Livermore National Security, LLC. The views and opinions of authors expressed +// herein do not necessarily state or reflect those of the United States Government or +// Lawrence Livermore National Security, LLC, and shall not be used for advertising or // product endorsement purposes. /** @@ -54,7 +54,7 @@ */ /** - * This file contains HIP RAJA policies. Should be generally included only in HIP + * This file contains HIP RAJA policies. Should be generally included only in HIP * compilation units. */ @@ -72,13 +72,13 @@ namespace hiop { #define RAJA_LAMBDA [=] __device__ - + template<> struct ExecRajaPoliciesBackend { static constexpr unsigned short int HIOP_RAJA_GPU_BLOCK_SIZE = 128; - - using hiop_raja_exec = RAJA::hip_exec; + + using hiop_raja_exec = RAJA::hip_exec; using hiop_raja_reduce = RAJA::hip_reduce; using hiop_raja_atomic = RAJA::hip_atomic; @@ -88,18 +88,11 @@ struct ExecRajaPoliciesBackend template using hiop_kernel = RAJA::statement::HipKernel; - using matrix_exec = - RAJA::KernelPolicy< - hiop_kernel< - RAJA::statement::For<1, hiop_block_x_loop, - RAJA::statement::For<0, hiop_thread_x_loop, - RAJA::statement::Lambda<0> - > - > - > - >; + using matrix_exec = RAJA::KernelPolicy > > > >; }; -} //end of namespace hiop -#endif // defined(HIOP_USE_RAJA) && defined(HIOP_USE_HIP) - +} // end of namespace hiop +#endif // defined(HIOP_USE_RAJA) && defined(HIOP_USE_HIP) + #endif diff --git a/src/ExecBackends/ExecPoliciesRajaOmpImpl.hpp b/src/ExecBackends/ExecPoliciesRajaOmpImpl.hpp index 33fe24063..33cf53c40 100644 --- a/src/ExecBackends/ExecPoliciesRajaOmpImpl.hpp +++ b/src/ExecBackends/ExecPoliciesRajaOmpImpl.hpp @@ -2,47 +2,47 @@ // Produced at the Lawrence Livermore National Laboratory (LLNL). // LLNL-CODE-742473. All rights reserved. // -// This file is part of HiOp. For details, see https://github.com/LLNL/hiop. HiOp -// is released under the BSD 3-clause license (https://opensource.org/licenses/BSD-3-Clause). +// This file is part of HiOp. For details, see https://github.com/LLNL/hiop. HiOp +// is released under the BSD 3-clause license (https://opensource.org/licenses/BSD-3-Clause). // Please also read "Additional BSD Notice" below. // -// Redistribution and use in source and binary forms, with or without modification, +// Redistribution and use in source and binary forms, with or without modification, // are permitted provided that the following conditions are met: -// i. Redistributions of source code must retain the above copyright notice, this list +// i. Redistributions of source code must retain the above copyright notice, this list // of conditions and the disclaimer below. -// ii. Redistributions in binary form must reproduce the above copyright notice, -// this list of conditions and the disclaimer (as noted below) in the documentation and/or +// ii. Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the disclaimer (as noted below) in the documentation and/or // other materials provided with the distribution. -// iii. Neither the name of the LLNS/LLNL nor the names of its contributors may be used to -// endorse or promote products derived from this software without specific prior written +// iii. Neither the name of the LLNS/LLNL nor the names of its contributors may be used to +// endorse or promote products derived from this software without specific prior written // permission. // -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES -// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT -// SHALL LAWRENCE LIVERMORE NATIONAL SECURITY, LLC, THE U.S. DEPARTMENT OF ENERGY OR -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS -// OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED -// AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT +// SHALL LAWRENCE LIVERMORE NATIONAL SECURITY, LLC, THE U.S. DEPARTMENT OF ENERGY OR +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +// OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED +// AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, // EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Additional BSD Notice -// 1. This notice is required to be provided under our contract with the U.S. Department -// of Energy (DOE). This work was produced at Lawrence Livermore National Laboratory under +// 1. This notice is required to be provided under our contract with the U.S. Department +// of Energy (DOE). This work was produced at Lawrence Livermore National Laboratory under // Contract No. DE-AC52-07NA27344 with the DOE. -// 2. Neither the United States Government nor Lawrence Livermore National Security, LLC -// nor any of their employees, makes any warranty, express or implied, or assumes any -// liability or responsibility for the accuracy, completeness, or usefulness of any +// 2. Neither the United States Government nor Lawrence Livermore National Security, LLC +// nor any of their employees, makes any warranty, express or implied, or assumes any +// liability or responsibility for the accuracy, completeness, or usefulness of any // information, apparatus, product, or process disclosed, or represents that its use would // not infringe privately-owned rights. -// 3. Also, reference herein to any specific commercial products, process, or services by -// trade name, trademark, manufacturer or otherwise does not necessarily constitute or -// imply its endorsement, recommendation, or favoring by the United States Government or -// Lawrence Livermore National Security, LLC. The views and opinions of authors expressed -// herein do not necessarily state or reflect those of the United States Government or -// Lawrence Livermore National Security, LLC, and shall not be used for advertising or +// 3. Also, reference herein to any specific commercial products, process, or services by +// trade name, trademark, manufacturer or otherwise does not necessarily constitute or +// imply its endorsement, recommendation, or favoring by the United States Government or +// Lawrence Livermore National Security, LLC. The views and opinions of authors expressed +// herein do not necessarily state or reflect those of the United States Government or +// Lawrence Livermore National Security, LLC, and shall not be used for advertising or // product endorsement purposes. /** @@ -54,7 +54,7 @@ */ /** - * This file contains Omp RAJA policies. + * This file contains Omp RAJA policies. */ #ifndef HIOP_EXEC_POL_RAJA_OMP @@ -62,7 +62,7 @@ #if defined(HIOP_USE_RAJA) -//todo: need HIOP_USE_OMP or similar +// todo: need HIOP_USE_OMP or similar #if !defined(HIOP_USE_CUDA) && !defined(HIOP_USE_HIP) #include "ExecSpace.hpp" @@ -73,26 +73,23 @@ namespace hiop { #define RAJA_LAMBDA [=] - + template<> struct ExecRajaPoliciesBackend { static constexpr unsigned short int HIOP_RAJA_GPU_BLOCK_SIZE = 128; - using hiop_raja_exec = RAJA::omp_parallel_for_exec; + using hiop_raja_exec = RAJA::omp_parallel_for_exec; using hiop_raja_reduce = RAJA::omp_reduce; using hiop_raja_atomic = RAJA::omp_atomic; - using matrix_exec = - RAJA::KernelPolicy< - RAJA::statement::For<1, hiop_raja_exec, // row - RAJA::statement::For<0, hiop_raja_exec, // col - RAJA::statement::Lambda<0> - > - > - >; + using matrix_exec = RAJA::KernelPolicy > > >; }; -} -#endif // !defined(HIOP_USE_CUDA) && !defined(HIOP_USE_HIP) -#endif // defined(HIOP_USE_RAJA) +} // namespace hiop +#endif // !defined(HIOP_USE_CUDA) && !defined(HIOP_USE_HIP) +#endif // defined(HIOP_USE_RAJA) #endif diff --git a/src/ExecBackends/ExecSpace.hpp b/src/ExecBackends/ExecSpace.hpp index 7f8e81398..fe700248f 100644 --- a/src/ExecBackends/ExecSpace.hpp +++ b/src/ExecBackends/ExecSpace.hpp @@ -2,54 +2,54 @@ // Produced at the Lawrence Livermore National Laboratory (LLNL). // LLNL-CODE-742473. All rights reserved. // -// This file is part of HiOp. For details, see https://github.com/LLNL/hiop. HiOp -// is released under the BSD 3-clause license (https://opensource.org/licenses/BSD-3-Clause). +// This file is part of HiOp. For details, see https://github.com/LLNL/hiop. HiOp +// is released under the BSD 3-clause license (https://opensource.org/licenses/BSD-3-Clause). // Please also read "Additional BSD Notice" below. // -// Redistribution and use in source and binary forms, with or without modification, +// Redistribution and use in source and binary forms, with or without modification, // are permitted provided that the following conditions are met: -// i. Redistributions of source code must retain the above copyright notice, this list +// i. Redistributions of source code must retain the above copyright notice, this list // of conditions and the disclaimer below. -// ii. Redistributions in binary form must reproduce the above copyright notice, -// this list of conditions and the disclaimer (as noted below) in the documentation and/or +// ii. Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the disclaimer (as noted below) in the documentation and/or // other materials provided with the distribution. -// iii. Neither the name of the LLNS/LLNL nor the names of its contributors may be used to -// endorse or promote products derived from this software without specific prior written +// iii. Neither the name of the LLNS/LLNL nor the names of its contributors may be used to +// endorse or promote products derived from this software without specific prior written // permission. // -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES -// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT -// SHALL LAWRENCE LIVERMORE NATIONAL SECURITY, LLC, THE U.S. DEPARTMENT OF ENERGY OR -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS -// OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED -// AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT +// SHALL LAWRENCE LIVERMORE NATIONAL SECURITY, LLC, THE U.S. DEPARTMENT OF ENERGY OR +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +// OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED +// AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, // EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Additional BSD Notice -// 1. This notice is required to be provided under our contract with the U.S. Department -// of Energy (DOE). This work was produced at Lawrence Livermore National Laboratory under +// 1. This notice is required to be provided under our contract with the U.S. Department +// of Energy (DOE). This work was produced at Lawrence Livermore National Laboratory under // Contract No. DE-AC52-07NA27344 with the DOE. -// 2. Neither the United States Government nor Lawrence Livermore National Security, LLC -// nor any of their employees, makes any warranty, express or implied, or assumes any -// liability or responsibility for the accuracy, completeness, or usefulness of any +// 2. Neither the United States Government nor Lawrence Livermore National Security, LLC +// nor any of their employees, makes any warranty, express or implied, or assumes any +// liability or responsibility for the accuracy, completeness, or usefulness of any // information, apparatus, product, or process disclosed, or represents that its use would // not infringe privately-owned rights. -// 3. Also, reference herein to any specific commercial products, process, or services by -// trade name, trademark, manufacturer or otherwise does not necessarily constitute or -// imply its endorsement, recommendation, or favoring by the United States Government or -// Lawrence Livermore National Security, LLC. The views and opinions of authors expressed -// herein do not necessarily state or reflect those of the United States Government or -// Lawrence Livermore National Security, LLC, and shall not be used for advertising or +// 3. Also, reference herein to any specific commercial products, process, or services by +// trade name, trademark, manufacturer or otherwise does not necessarily constitute or +// imply its endorsement, recommendation, or favoring by the United States Government or +// Lawrence Livermore National Security, LLC. The views and opinions of authors expressed +// herein do not necessarily state or reflect those of the United States Government or +// Lawrence Livermore National Security, LLC, and shall not be used for advertising or // product endorsement purposes. /** * @file ExecSpace.hpp * * @author Cosmin G. Petra , LLNL - * + * */ #ifndef HIOP_EXEC_SPACE @@ -68,8 +68,8 @@ namespace hiop { -/** - * Runtime information about the execution space, namely memory space, memory backend, and execution policies. +/** + * Runtime information about the execution space, namely memory space, memory backend, and execution policies. * Closely related to HiOp's option 'mem_space', 'compute_mode', 'mem_backend', and 'exec_policies'. */ struct ExecSpaceInfo @@ -97,10 +97,9 @@ struct ExecSpaceInfo } } ExecSpaceInfo(const char* mem_space_in) - : ExecSpaceInfo(std::string(mem_space_in)) - { - } - + : ExecSpaceInfo(std::string(mem_space_in)) + {} + std::string mem_space_; std::string mem_backend_; std::string mem_backend_host_; @@ -111,7 +110,7 @@ struct ExecSpaceInfo // Memory backends /////////////////////////////////////////////////////////////////////////////////////// -/// Standard C++ memory backend on host +/// Standard C++ memory backend on host struct MemBackendCpp { /** Constructor that makes this class compatible to use as a memory backend with RAJA @@ -119,27 +118,21 @@ struct MemBackendCpp * * @pre: input string should be always be "HOST" */ - MemBackendCpp(std::string mem_space = "HOST") - { - assert(mem_space == "HOST"); - } - + MemBackendCpp(std::string mem_space = "HOST") { assert(mem_space == "HOST"); } + /// Always on host memory space static bool is_host() { return true; } /// No host memory space is supported. static bool is_device() { return false; } - //for when the class is used as memory backend with RAJA + // for when the class is used as memory backend with RAJA using MemBackendHost = MemBackendCpp; /// Returns a backend set up for host memory space - static MemBackendCpp new_backend_host() - { - return MemBackendCpp(); - }; + static MemBackendCpp new_backend_host() { return MemBackendCpp(); }; }; -#ifdef HIOP_USE_RAJA //HIOP_USE_UMPIRE would be better since Hiop RAJA code can now work without Umpire +#ifdef HIOP_USE_RAJA // HIOP_USE_UMPIRE would be better since Hiop RAJA code can now work without Umpire /** * Umpire-based memory backend that supports "HOST", "UM" (unified memory), and "DEVICE" * memory spaces. @@ -147,39 +140,26 @@ struct MemBackendCpp struct MemBackendUmpire { MemBackendUmpire(const std::string& l) - : mem_space_(l) - { - } - MemBackendUmpire() //todo = delete; + : mem_space_(l) + {} + MemBackendUmpire() // todo = delete; { mem_space_ = "HOST"; } - std::string mem_space() const - { - return mem_space_; - } + std::string mem_space() const { return mem_space_; } - inline bool is_host() const - { - return mem_space_ == "HOST"; - } - inline bool is_device() const - { - return mem_space_ == "DEVICE"; - } + inline bool is_host() const { return mem_space_ == "HOST"; } + inline bool is_device() const { return mem_space_ == "DEVICE"; } using MemBackendHost = MemBackendUmpire; /// Returns a backend set up for host memory space - inline static MemBackendHost new_backend_host() - { - return MemBackendHost("HOST"); - }; + inline static MemBackendHost new_backend_host() { return MemBackendHost("HOST"); }; private: std::string mem_space_; }; -#endif //HIOP_USE_RAJA //HIOP_USE_UMPIRE +#endif // HIOP_USE_RAJA //HIOP_USE_UMPIRE #ifdef HIOP_USE_CUDA /// Cuda memory backend for device memory space that is implemented using Cuda API @@ -189,11 +169,8 @@ struct MemBackendCuda * Constructor taking a memory space as input; provided for exchangeability with * other memory backends. */ - MemBackendCuda(std::string mem_space = "DEVICE") - { - assert(mem_space == "DEVICE"); - } - + MemBackendCuda(std::string mem_space = "DEVICE") { assert(mem_space == "DEVICE"); } + /// For now does not support host memory space (but can/will be implemented). inline static bool is_host() { return false; } @@ -201,25 +178,19 @@ struct MemBackendCuda using MemBackendHost = MemBackendCpp; /// Returns a backend set up for host memory space - inline static MemBackendHost new_backend_host() - { - return MemBackendHost(); - }; + inline static MemBackendHost new_backend_host() { return MemBackendHost(); }; }; -#endif //HIOP_USE_CUDA +#endif // HIOP_USE_CUDA #ifdef HIOP_USE_HIP /// Cuda memory backend for device memory space that is implemented using Hip API struct MemBackendHip { /** - * Constructor taking a memory space as input; provided for exchangeability with + * Constructor taking a memory space as input; provided for exchangeability with * other memory backends. */ - MemBackendHip(std::string mem_space = "DEVICE") - { - assert(mem_space == "DEVICE"); - } + MemBackendHip(std::string mem_space = "DEVICE") { assert(mem_space == "DEVICE"); } /// For now does not support host memory space (but can/will be implemented). inline static bool is_host() { return false; } @@ -228,12 +199,9 @@ struct MemBackendHip using MemBackendHost = MemBackendCpp; /// Returns a backend set up for host memory space - inline static MemBackendHost new_backend_host() - { - return MemBackendHost(); - }; + inline static MemBackendHost new_backend_host() { return MemBackendHost(); }; }; -#endif //HIOP_USE_HIP +#endif // HIOP_USE_HIP /////////////////////////////////////////////////////////////////////////////////////// // Execution policies @@ -241,18 +209,16 @@ struct MemBackendHip /// Standard C++ sequential execution struct ExecPolicySeq -{ -}; +{}; #ifdef HIOP_USE_CUDA struct ExecPolicyCuda { ExecPolicyCuda() - : bl_sz_binary_search(16), - bl_sz_vector_loop(256) - { - } - /** Block size for kernels performing binary search (e.g., updating or getting diagonal + : bl_sz_binary_search(16), + bl_sz_vector_loop(256) + {} + /** Block size for kernels performing binary search (e.g., updating or getting diagonal * in CSR CUDA matrices. Default value 16. */ unsigned short int bl_sz_binary_search; @@ -277,45 +243,46 @@ struct ExecPolicyHip #ifdef HIOP_USE_CUDA struct ExecPolicyRajaCuda { - //empty since no runtime info is stored + // empty since no runtime info is stored }; #endif #ifdef HIOP_USE_HIP struct ExecPolicyRajaHip { - //empty since no runtime info is stored + // empty since no runtime info is stored }; #endif -//RAJA OMP execution policies backend +// RAJA OMP execution policies backend #if !defined(HIOP_USE_CUDA) && !defined(HIOP_USE_HIP) struct ExecPolicyRajaOmp { - //empty since no runtime info is stored + // empty since no runtime info is stored }; #endif /** * The backend RAJA policies that needs to be provided for each one of the ExecPolicyRajaCuda, * ExecPolicyRajaHip, and/or ExecPolicyRajaOmp. The class is specialized in HiOp's vendor-specific - * Raja execution policies source files. Namely, the class' inner types are specialized to + * Raja execution policies source files. Namely, the class' inner types are specialized to * vendor-specific RAJA policies types. The inner type below are just for reference and this * generic templated struct is/should not be used. */ template struct ExecRajaPoliciesBackend { - using hiop_raja_exec = void; - using hiop_raja_reduce = void; + using hiop_raja_exec = void; + using hiop_raja_reduce = void; using hiop_raja_atomic = void; // The following are primarily for _matrix_exec_ using hiop_block_x_loop = void; using hiop_thread_x_loop = void; - template using hiop_kernel = void; + template + using hiop_kernel = void; }; -#endif //HIOP_USE_RAJA +#endif // HIOP_USE_RAJA /////////////////////////////////////////////////////////////////////////////////////// // The generic/template execution backend class @@ -327,19 +294,13 @@ struct ExecRajaPoliciesBackend template struct AllocImpl; -template +template struct DeAllocImpl; - -template +template struct TransferImpl; -/** +/** * Hardware backend wrapping a concrete memory backend and a concrete set of execution policies. */ template @@ -350,21 +311,14 @@ class ExecSpace ExecSpace(const ExecSpace&) = default; ExecSpace(const MEMBACKEND& mb) - : mb_(mb), - ep_() - { - } + : mb_(mb), + ep_() + {} - const MEMBACKEND& mem_backend() const - { - return mb_; - } + const MEMBACKEND& mem_backend() const { return mb_; } + + const EXECPOLICIES& exec_policies() const { return ep_; } - const EXECPOLICIES& exec_policies() const - { - return ep_; - } - template inline T* alloc_array(const I& n) { @@ -379,25 +333,25 @@ class ExecSpace /** * Copy `n` elements of the array `p_src` to the `p_dest` array. - * - * @pre `p_src` and `p_dest` should be allocated so that they can hold at least + * + * @pre `p_src` and `p_dest` should be allocated so that they can hold at least * `n` elements. * @pre `p_dest` should be managed by the memory backend of `this`. * @pre `p_src` should be managed by the memory backend of `ms`. */ template - inline bool copy(T* p_dest, const T* p_src, const I& n, const ExecSpace& ms) + inline bool copy(T* p_dest, const T* p_src, const I& n, const ExecSpace& ms) { return TransferImpl::do_it(p_dest, *this, p_src, ms, n); } /** * Copy `n` elements of the array `p_src` to the `p_dest` array. - * - * @pre `p_src` and `p_dest` should be allocated so that they can hold at least + * + * @pre `p_src` and `p_dest` should be allocated so that they can hold at least * `n` elements. * @pre Both `p_dest` and `p_src` should be managed by the memory backend of `this`. - */ + */ template inline bool copy(T* p_dest, const T* p_src, const I& n) { @@ -412,9 +366,9 @@ class ExecSpace // // Internals start here // - + /** - * Memory allocations should be provided via `AllocImpl` for concrete memory backends. + * Memory allocations should be provided via `AllocImpl` for concrete memory backends. */ template struct AllocImpl @@ -428,14 +382,14 @@ struct AllocImpl /** * Memory deallocations should be provided via `DeAllocImpl` for concrete memory backends. - * The size type `I` is not needed by current implementation and defaulted to `void`. + * The size type `I` is not needed by current implementation and defaulted to `void`. */ -template +template struct DeAllocImpl { inline static void dealloc(MEMBACKEND& mb, T* p) { - assert(false && "Specialization for template parameters needs to be provided."); + assert(false && "Specialization for template parameters needs to be provided."); } }; @@ -456,6 +410,6 @@ struct TransferImpl } }; -} // end namespace +} // namespace hiop #endif diff --git a/src/ExecBackends/MemBackendCppImpl.hpp b/src/ExecBackends/MemBackendCppImpl.hpp index cdd96a722..cc96e5f5a 100644 --- a/src/ExecBackends/MemBackendCppImpl.hpp +++ b/src/ExecBackends/MemBackendCppImpl.hpp @@ -2,58 +2,58 @@ // Produced at the Lawrence Livermore National Laboratory (LLNL). // LLNL-CODE-742473. All rights reserved. // -// This file is part of HiOp. For details, see https://github.com/LLNL/hiop. HiOp -// is released under the BSD 3-clause license (https://opensource.org/licenses/BSD-3-Clause). +// This file is part of HiOp. For details, see https://github.com/LLNL/hiop. HiOp +// is released under the BSD 3-clause license (https://opensource.org/licenses/BSD-3-Clause). // Please also read "Additional BSD Notice" below. // -// Redistribution and use in source and binary forms, with or without modification, +// Redistribution and use in source and binary forms, with or without modification, // are permitted provided that the following conditions are met: -// i. Redistributions of source code must retain the above copyright notice, this list +// i. Redistributions of source code must retain the above copyright notice, this list // of conditions and the disclaimer below. -// ii. Redistributions in binary form must reproduce the above copyright notice, -// this list of conditions and the disclaimer (as noted below) in the documentation and/or +// ii. Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the disclaimer (as noted below) in the documentation and/or // other materials provided with the distribution. -// iii. Neither the name of the LLNS/LLNL nor the names of its contributors may be used to -// endorse or promote products derived from this software without specific prior written +// iii. Neither the name of the LLNS/LLNL nor the names of its contributors may be used to +// endorse or promote products derived from this software without specific prior written // permission. // -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES -// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT -// SHALL LAWRENCE LIVERMORE NATIONAL SECURITY, LLC, THE U.S. DEPARTMENT OF ENERGY OR -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS -// OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED -// AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT +// SHALL LAWRENCE LIVERMORE NATIONAL SECURITY, LLC, THE U.S. DEPARTMENT OF ENERGY OR +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +// OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED +// AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, // EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Additional BSD Notice -// 1. This notice is required to be provided under our contract with the U.S. Department -// of Energy (DOE). This work was produced at Lawrence Livermore National Laboratory under +// 1. This notice is required to be provided under our contract with the U.S. Department +// of Energy (DOE). This work was produced at Lawrence Livermore National Laboratory under // Contract No. DE-AC52-07NA27344 with the DOE. -// 2. Neither the United States Government nor Lawrence Livermore National Security, LLC -// nor any of their employees, makes any warranty, express or implied, or assumes any -// liability or responsibility for the accuracy, completeness, or usefulness of any +// 2. Neither the United States Government nor Lawrence Livermore National Security, LLC +// nor any of their employees, makes any warranty, express or implied, or assumes any +// liability or responsibility for the accuracy, completeness, or usefulness of any // information, apparatus, product, or process disclosed, or represents that its use would // not infringe privately-owned rights. -// 3. Also, reference herein to any specific commercial products, process, or services by -// trade name, trademark, manufacturer or otherwise does not necessarily constitute or -// imply its endorsement, recommendation, or favoring by the United States Government or -// Lawrence Livermore National Security, LLC. The views and opinions of authors expressed -// herein do not necessarily state or reflect those of the United States Government or -// Lawrence Livermore National Security, LLC, and shall not be used for advertising or +// 3. Also, reference herein to any specific commercial products, process, or services by +// trade name, trademark, manufacturer or otherwise does not necessarily constitute or +// imply its endorsement, recommendation, or favoring by the United States Government or +// Lawrence Livermore National Security, LLC. The views and opinions of authors expressed +// herein do not necessarily state or reflect those of the United States Government or +// Lawrence Livermore National Security, LLC, and shall not be used for advertising or // product endorsement purposes. /** * @file MemBackendCppImpl.hpp * * @author Cosmin G. Petra , LLNL - * + * */ /** - * This file contains C++ memory backend implementation using new and delete operators. + * This file contains C++ memory backend implementation using new and delete operators. * std::memcpy is used to copy. */ @@ -74,19 +74,13 @@ namespace hiop template struct AllocImpl { - inline static T* alloc(MemBackendCpp& mb, const I& n) - { - return new T[n]; - } + inline static T* alloc(MemBackendCpp& mb, const I& n) { return new T[n]; } }; template struct DeAllocImpl { - inline static void dealloc(MemBackendCpp& mb, T* p) - { - delete[] p; - } + inline static void dealloc(MemBackendCpp& mb, T* p) { delete[] p; } }; // @@ -101,10 +95,10 @@ struct TransferImpl const ExecSpace& hwb_src, const I& n) { - std::memcpy(p_dest, p_src, n*sizeof(T)); + std::memcpy(p_dest, p_src, n * sizeof(T)); return true; } }; - -} // end namespace hiop -#endif //HIOP_MEM_BCK_CPP + +} // end namespace hiop +#endif // HIOP_MEM_BCK_CPP diff --git a/src/ExecBackends/MemBackendCudaImpl.hpp b/src/ExecBackends/MemBackendCudaImpl.hpp index fecc5978a..46a9abd00 100644 --- a/src/ExecBackends/MemBackendCudaImpl.hpp +++ b/src/ExecBackends/MemBackendCudaImpl.hpp @@ -2,58 +2,58 @@ // Produced at the Lawrence Livermore National Laboratory (LLNL). // LLNL-CODE-742473. All rights reserved. // -// This file is part of HiOp. For details, see https://github.com/LLNL/hiop. HiOp -// is released under the BSD 3-clause license (https://opensource.org/licenses/BSD-3-Clause). +// This file is part of HiOp. For details, see https://github.com/LLNL/hiop. HiOp +// is released under the BSD 3-clause license (https://opensource.org/licenses/BSD-3-Clause). // Please also read "Additional BSD Notice" below. // -// Redistribution and use in source and binary forms, with or without modification, +// Redistribution and use in source and binary forms, with or without modification, // are permitted provided that the following conditions are met: -// i. Redistributions of source code must retain the above copyright notice, this list +// i. Redistributions of source code must retain the above copyright notice, this list // of conditions and the disclaimer below. -// ii. Redistributions in binary form must reproduce the above copyright notice, -// this list of conditions and the disclaimer (as noted below) in the documentation and/or +// ii. Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the disclaimer (as noted below) in the documentation and/or // other materials provided with the distribution. -// iii. Neither the name of the LLNS/LLNL nor the names of its contributors may be used to -// endorse or promote products derived from this software without specific prior written +// iii. Neither the name of the LLNS/LLNL nor the names of its contributors may be used to +// endorse or promote products derived from this software without specific prior written // permission. // -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES -// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT -// SHALL LAWRENCE LIVERMORE NATIONAL SECURITY, LLC, THE U.S. DEPARTMENT OF ENERGY OR -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS -// OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED -// AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT +// SHALL LAWRENCE LIVERMORE NATIONAL SECURITY, LLC, THE U.S. DEPARTMENT OF ENERGY OR +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +// OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED +// AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, // EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Additional BSD Notice -// 1. This notice is required to be provided under our contract with the U.S. Department -// of Energy (DOE). This work was produced at Lawrence Livermore National Laboratory under +// 1. This notice is required to be provided under our contract with the U.S. Department +// of Energy (DOE). This work was produced at Lawrence Livermore National Laboratory under // Contract No. DE-AC52-07NA27344 with the DOE. -// 2. Neither the United States Government nor Lawrence Livermore National Security, LLC -// nor any of their employees, makes any warranty, express or implied, or assumes any -// liability or responsibility for the accuracy, completeness, or usefulness of any +// 2. Neither the United States Government nor Lawrence Livermore National Security, LLC +// nor any of their employees, makes any warranty, express or implied, or assumes any +// liability or responsibility for the accuracy, completeness, or usefulness of any // information, apparatus, product, or process disclosed, or represents that its use would // not infringe privately-owned rights. -// 3. Also, reference herein to any specific commercial products, process, or services by -// trade name, trademark, manufacturer or otherwise does not necessarily constitute or -// imply its endorsement, recommendation, or favoring by the United States Government or -// Lawrence Livermore National Security, LLC. The views and opinions of authors expressed -// herein do not necessarily state or reflect those of the United States Government or -// Lawrence Livermore National Security, LLC, and shall not be used for advertising or +// 3. Also, reference herein to any specific commercial products, process, or services by +// trade name, trademark, manufacturer or otherwise does not necessarily constitute or +// imply its endorsement, recommendation, or favoring by the United States Government or +// Lawrence Livermore National Security, LLC. The views and opinions of authors expressed +// herein do not necessarily state or reflect those of the United States Government or +// Lawrence Livermore National Security, LLC, and shall not be used for advertising or // product endorsement purposes. /** * @file MemBackendCudaImpl.hpp * * @author Cosmin G. Petra , LLNL - * + * */ /** - * This file contains CUDA implementation of memory backend. Should be generally included + * This file contains CUDA implementation of memory backend. Should be generally included * only in CUDA compilation units. */ @@ -76,8 +76,8 @@ struct AllocImpl inline static T* alloc(MemBackendCuda& mb, const I& n) { T* p; - auto err = cudaMalloc((void**)&p, n*sizeof(T)); - assert(cudaSuccess==err); + auto err = cudaMalloc((void**)&p, n * sizeof(T)); + assert(cudaSuccess == err); return p; } }; @@ -88,8 +88,8 @@ struct DeAllocImpl inline static void dealloc(MemBackendCuda& mb, T* p) { auto err = cudaFree((void*)p); - assert(cudaSuccess==err); - } + assert(cudaSuccess == err); + } }; // @@ -104,7 +104,7 @@ struct TransferImpl& hwb_src, const I& n) { - cudaError_t err = cudaMemcpy(p_dest, p_src, n*sizeof(T), cudaMemcpyDeviceToDevice); + cudaError_t err = cudaMemcpy(p_dest, p_src, n * sizeof(T), cudaMemcpyDeviceToDevice); assert(err == cudaSuccess); return cudaSuccess == err; } @@ -119,7 +119,7 @@ struct TransferImpl& hwb_src, const I& n) { - auto err = cudaMemcpy(p_dest, p_src, n*sizeof(T), cudaMemcpyHostToDevice); + auto err = cudaMemcpy(p_dest, p_src, n * sizeof(T), cudaMemcpyHostToDevice); assert(cudaSuccess == err); return cudaSuccess == err; } @@ -134,13 +134,11 @@ struct TransferImpl& hwb_src, const I& n) { - auto err = cudaMemcpy(p_dest, p_src, n*sizeof(T), cudaMemcpyDeviceToHost); + auto err = cudaMemcpy(p_dest, p_src, n * sizeof(T), cudaMemcpyDeviceToHost); assert(cudaSuccess == err); return cudaSuccess == err; } }; -} // end namespace hiop -#endif //HIOP_MEM_BCK_CUDA - - +} // end namespace hiop +#endif // HIOP_MEM_BCK_CUDA diff --git a/src/ExecBackends/MemBackendHipImpl.hpp b/src/ExecBackends/MemBackendHipImpl.hpp index 846886b1e..fedf3b584 100644 --- a/src/ExecBackends/MemBackendHipImpl.hpp +++ b/src/ExecBackends/MemBackendHipImpl.hpp @@ -2,58 +2,58 @@ // Produced at the Lawrence Livermore National Laboratory (LLNL). // LLNL-CODE-742473. All rights reserved. // -// This file is part of HiOp. For details, see https://github.com/LLNL/hiop. HiOp -// is released under the BSD 3-clause license (https://opensource.org/licenses/BSD-3-Clause). +// This file is part of HiOp. For details, see https://github.com/LLNL/hiop. HiOp +// is released under the BSD 3-clause license (https://opensource.org/licenses/BSD-3-Clause). // Please also read "Additional BSD Notice" below. // -// Redistribution and use in source and binary forms, with or without modification, +// Redistribution and use in source and binary forms, with or without modification, // are permitted provided that the following conditions are met: -// i. Redistributions of source code must retain the above copyright notice, this list +// i. Redistributions of source code must retain the above copyright notice, this list // of conditions and the disclaimer below. -// ii. Redistributions in binary form must reproduce the above copyright notice, -// this list of conditions and the disclaimer (as noted below) in the documentation and/or +// ii. Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the disclaimer (as noted below) in the documentation and/or // other materials provided with the distribution. -// iii. Neither the name of the LLNS/LLNL nor the names of its contributors may be used to -// endorse or promote products derived from this software without specific prior written +// iii. Neither the name of the LLNS/LLNL nor the names of its contributors may be used to +// endorse or promote products derived from this software without specific prior written // permission. // -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES -// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT -// SHALL LAWRENCE LIVERMORE NATIONAL SECURITY, LLC, THE U.S. DEPARTMENT OF ENERGY OR -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS -// OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED -// AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT +// SHALL LAWRENCE LIVERMORE NATIONAL SECURITY, LLC, THE U.S. DEPARTMENT OF ENERGY OR +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +// OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED +// AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, // EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Additional BSD Notice -// 1. This notice is required to be provided under our contract with the U.S. Department -// of Energy (DOE). This work was produced at Lawrence Livermore National Laboratory under +// 1. This notice is required to be provided under our contract with the U.S. Department +// of Energy (DOE). This work was produced at Lawrence Livermore National Laboratory under // Contract No. DE-AC52-07NA27344 with the DOE. -// 2. Neither the United States Government nor Lawrence Livermore National Security, LLC -// nor any of their employees, makes any warranty, express or implied, or assumes any -// liability or responsibility for the accuracy, completeness, or usefulness of any +// 2. Neither the United States Government nor Lawrence Livermore National Security, LLC +// nor any of their employees, makes any warranty, express or implied, or assumes any +// liability or responsibility for the accuracy, completeness, or usefulness of any // information, apparatus, product, or process disclosed, or represents that its use would // not infringe privately-owned rights. -// 3. Also, reference herein to any specific commercial products, process, or services by -// trade name, trademark, manufacturer or otherwise does not necessarily constitute or -// imply its endorsement, recommendation, or favoring by the United States Government or -// Lawrence Livermore National Security, LLC. The views and opinions of authors expressed -// herein do not necessarily state or reflect those of the United States Government or -// Lawrence Livermore National Security, LLC, and shall not be used for advertising or +// 3. Also, reference herein to any specific commercial products, process, or services by +// trade name, trademark, manufacturer or otherwise does not necessarily constitute or +// imply its endorsement, recommendation, or favoring by the United States Government or +// Lawrence Livermore National Security, LLC. The views and opinions of authors expressed +// herein do not necessarily state or reflect those of the United States Government or +// Lawrence Livermore National Security, LLC, and shall not be used for advertising or // product endorsement purposes. /** * @file MemBackendHipImpl.hpp * * @author Cosmin G. Petra , LLNL - * + * */ /** - * This file contains HIP implementation of memory backend. + * This file contains HIP implementation of memory backend. */ #ifndef HIOP_MEM_SPACE_HIP @@ -75,8 +75,8 @@ struct AllocImpl inline static T* alloc(MemBackendHip& mb, const I& n) { T* p = nullptr; - auto err = hipMalloc((void**)&p, n*sizeof(T)); - assert(hipSuccess==err); + auto err = hipMalloc((void**)&p, n * sizeof(T)); + assert(hipSuccess == err); return p; } }; @@ -87,8 +87,8 @@ struct DeAllocImpl inline static void dealloc(MemBackendHip& mb, T* p) { auto err = hipFree((void*)p); - assert(hipSuccess==err); - } + assert(hipSuccess == err); + } }; // @@ -103,12 +103,12 @@ struct TransferImpl const ExecSpace& hwb_src, const I& n) { - return hipSuccess == hipMemcpy(p_dest, p_src, n*sizeof(T), hipMemcpyDeviceToDevice); + return hipSuccess == hipMemcpy(p_dest, p_src, n * sizeof(T), hipMemcpyDeviceToDevice); } }; - template - struct TransferImpl +template +struct TransferImpl { inline static bool do_it(T* p_dest, ExecSpace& hwb_dest, @@ -116,7 +116,7 @@ struct TransferImpl const ExecSpace& hwb_src, const I& n) { - return hipSuccess == hipMemcpy(p_dest, p_src, n*sizeof(T), hipMemcpyHostToDevice); + return hipSuccess == hipMemcpy(p_dest, p_src, n * sizeof(T), hipMemcpyHostToDevice); } }; @@ -129,10 +129,9 @@ struct TransferImpl const ExecSpace& hwb_src, const I& n) { - return hipSuccess == hipMemcpy(p_dest, p_src, n*sizeof(T), hipMemcpyDeviceToHost); + return hipSuccess == hipMemcpy(p_dest, p_src, n * sizeof(T), hipMemcpyDeviceToHost); } }; -} // end namespace hiop -#endif //HIOP_MEM_SPACE_HIP - +} // end namespace hiop +#endif // HIOP_MEM_SPACE_HIP diff --git a/src/ExecBackends/MemBackendUmpireImpl.hpp b/src/ExecBackends/MemBackendUmpireImpl.hpp index 477cae244..e64e01ba5 100644 --- a/src/ExecBackends/MemBackendUmpireImpl.hpp +++ b/src/ExecBackends/MemBackendUmpireImpl.hpp @@ -2,54 +2,54 @@ // Produced at the Lawrence Livermore National Laboratory (LLNL). // LLNL-CODE-742473. All rights reserved. // -// This file is part of HiOp. For details, see https://github.com/LLNL/hiop. HiOp -// is released under the BSD 3-clause license (https://opensource.org/licenses/BSD-3-Clause). +// This file is part of HiOp. For details, see https://github.com/LLNL/hiop. HiOp +// is released under the BSD 3-clause license (https://opensource.org/licenses/BSD-3-Clause). // Please also read "Additional BSD Notice" below. // -// Redistribution and use in source and binary forms, with or without modification, +// Redistribution and use in source and binary forms, with or without modification, // are permitted provided that the following conditions are met: -// i. Redistributions of source code must retain the above copyright notice, this list +// i. Redistributions of source code must retain the above copyright notice, this list // of conditions and the disclaimer below. -// ii. Redistributions in binary form must reproduce the above copyright notice, -// this list of conditions and the disclaimer (as noted below) in the documentation and/or +// ii. Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the disclaimer (as noted below) in the documentation and/or // other materials provided with the distribution. -// iii. Neither the name of the LLNS/LLNL nor the names of its contributors may be used to -// endorse or promote products derived from this software without specific prior written +// iii. Neither the name of the LLNS/LLNL nor the names of its contributors may be used to +// endorse or promote products derived from this software without specific prior written // permission. // -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES -// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT -// SHALL LAWRENCE LIVERMORE NATIONAL SECURITY, LLC, THE U.S. DEPARTMENT OF ENERGY OR -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS -// OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED -// AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT +// SHALL LAWRENCE LIVERMORE NATIONAL SECURITY, LLC, THE U.S. DEPARTMENT OF ENERGY OR +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +// OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED +// AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, // EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Additional BSD Notice -// 1. This notice is required to be provided under our contract with the U.S. Department -// of Energy (DOE). This work was produced at Lawrence Livermore National Laboratory under +// 1. This notice is required to be provided under our contract with the U.S. Department +// of Energy (DOE). This work was produced at Lawrence Livermore National Laboratory under // Contract No. DE-AC52-07NA27344 with the DOE. -// 2. Neither the United States Government nor Lawrence Livermore National Security, LLC -// nor any of their employees, makes any warranty, express or implied, or assumes any -// liability or responsibility for the accuracy, completeness, or usefulness of any +// 2. Neither the United States Government nor Lawrence Livermore National Security, LLC +// nor any of their employees, makes any warranty, express or implied, or assumes any +// liability or responsibility for the accuracy, completeness, or usefulness of any // information, apparatus, product, or process disclosed, or represents that its use would // not infringe privately-owned rights. -// 3. Also, reference herein to any specific commercial products, process, or services by -// trade name, trademark, manufacturer or otherwise does not necessarily constitute or -// imply its endorsement, recommendation, or favoring by the United States Government or -// Lawrence Livermore National Security, LLC. The views and opinions of authors expressed -// herein do not necessarily state or reflect those of the United States Government or -// Lawrence Livermore National Security, LLC, and shall not be used for advertising or +// 3. Also, reference herein to any specific commercial products, process, or services by +// trade name, trademark, manufacturer or otherwise does not necessarily constitute or +// imply its endorsement, recommendation, or favoring by the United States Government or +// Lawrence Livermore National Security, LLC. The views and opinions of authors expressed +// herein do not necessarily state or reflect those of the United States Government or +// Lawrence Livermore National Security, LLC, and shall not be used for advertising or // product endorsement purposes. /** * @file MemBackendUmpireImpl.hpp * * @author Cosmin G. Petra , LLNL - * + * */ /** @@ -64,7 +64,8 @@ #include #include -namespace hiop { +namespace hiop +{ // // Memory allocator and deallocator @@ -75,20 +76,20 @@ struct AllocImpl inline static T* alloc(MemBackendUmpire& mb, const I& n) { auto& resmgr = umpire::ResourceManager::getInstance(); - umpire::Allocator devalloc = resmgr.getAllocator(mb.mem_space()); - return static_cast(devalloc.allocate(n*sizeof(T))); + umpire::Allocator devalloc = resmgr.getAllocator(mb.mem_space()); + return static_cast(devalloc.allocate(n * sizeof(T))); } }; - + template struct DeAllocImpl { inline static void dealloc(MemBackendUmpire& mb, T* p) { auto& resmgr = umpire::ResourceManager::getInstance(); - umpire::Allocator devalloc = resmgr.getAllocator(mb.mem_space()); + umpire::Allocator devalloc = resmgr.getAllocator(mb.mem_space()); devalloc.deallocate(p); - } + } }; ////////////////////////////////////////////////////////////////////////////////////////// @@ -104,10 +105,10 @@ struct TransferImpl& hwb_src, const I& n) { - if(n>0) { + if(n > 0) { auto& rm = umpire::ResourceManager::getInstance(); T* src = const_cast(p_src); - rm.copy(p_dest, src, n*sizeof(T)); + rm.copy(p_dest, src, n * sizeof(T)); } return true; } @@ -126,9 +127,9 @@ struct TransferImpl0) { + if(n > 0) { auto& rm = umpire::ResourceManager::getInstance(); T* src = const_cast(p_src); assert(src); @@ -141,14 +142,13 @@ struct TransferImpl(host_alloc.allocate(n*sizeof(T))); - rm.copy(src_host, src, n*sizeof(T)); + umpire::Allocator host_alloc = rm.getAllocator("HOST"); + T* src_host = static_cast(host_alloc.allocate(n * sizeof(T))); + rm.copy(src_host, src, n * sizeof(T)); + + std::memcpy(p_dest, src_host, n * sizeof(T)); - std::memcpy(p_dest, src_host, n*sizeof(T)); - host_alloc.deallocate(src_host); - } } return true; @@ -165,17 +165,17 @@ struct TransferImpl0) { + if(n > 0) { // TODO: Note: see note above in the sister TransferImpl auto& rm = umpire::ResourceManager::getInstance(); - umpire::Allocator host_alloc = rm.getAllocator("HOST"); - T* dest_host = static_cast(host_alloc.allocate(n*sizeof(T))); - - std::memcpy(dest_host, p_src, n*sizeof(T)); - - rm.copy(p_dest, dest_host, n*sizeof(T)); + umpire::Allocator host_alloc = rm.getAllocator("HOST"); + T* dest_host = static_cast(host_alloc.allocate(n * sizeof(T))); + + std::memcpy(dest_host, p_src, n * sizeof(T)); + + rm.copy(p_dest, dest_host, n * sizeof(T)); host_alloc.deallocate(dest_host); } } @@ -197,10 +197,10 @@ struct TransferImpl, ANL * @author Cosmin G. Petra , LLNL * @author Nai-Yuan Chiang , LLNL * */ - #include "chiopInterface.hpp" extern "C" { using namespace hiop; // These are default options for the C interface for now. Setting options from C will be added in the future. -int hiop_mds_create_problem(cHiopMDSProblem *prob) { - cppUserProblemMDS * cppproblem = new cppUserProblemMDS(prob); +int hiop_mds_create_problem(cHiopMDSProblem *prob) +{ + cppUserProblemMDS *cppproblem = new cppUserProblemMDS(prob); hiopNlpMDS *nlp = new hiopNlpMDS(*cppproblem); nlp->options->SetStringValue("duals_update_type", "linear"); nlp->options->SetStringValue("duals_init", "zero"); @@ -76,27 +76,30 @@ int hiop_mds_create_problem(cHiopMDSProblem *prob) { prob->refcppHiop = nlp; prob->hiopinterface = cppproblem; return 0; -} +} -int hiop_mds_solve_problem(cHiopMDSProblem *prob) { +int hiop_mds_solve_problem(cHiopMDSProblem *prob) +{ hiopSolveStatus status; hiopAlgFilterIPMNewton solver(prob->refcppHiop); status = solver.run(); - assert(status<=hiopSolveStatus::User_Stopped); //check solver status if necessary + assert(status <= hiopSolveStatus::User_Stopped); // check solver status if necessary prob->obj_value = solver.getObjective(); solver.getSolution(prob->solution); return 0; } -int hiop_mds_destroy_problem(cHiopMDSProblem *prob) { +int hiop_mds_destroy_problem(cHiopMDSProblem *prob) +{ delete prob->refcppHiop; delete prob->hiopinterface; return 0; } #ifdef HIOP_SPARSE -int hiop_sparse_create_problem(cHiopSparseProblem *prob) { - cppUserProblemSparse * cppproblem = new cppUserProblemSparse(prob); +int hiop_sparse_create_problem(cHiopSparseProblem *prob) +{ + cppUserProblemSparse *cppproblem = new cppUserProblemSparse(prob); hiopNlpSparse *nlp = new hiopNlpSparse(*cppproblem); nlp->options->SetStringValue("Hessian", "analytical_exact"); @@ -108,9 +111,10 @@ int hiop_sparse_create_problem(cHiopSparseProblem *prob) { prob->hiopinterface_ = cppproblem; return 0; -} +} -int hiop_sparse_solve_problem(cHiopSparseProblem *prob) { +int hiop_sparse_solve_problem(cHiopSparseProblem *prob) +{ hiopAlgFilterIPMNewton solver(prob->refcppHiop_); prob->status_ = solver.run(); prob->obj_value_ = solver.getObjective(); @@ -119,20 +123,22 @@ int hiop_sparse_solve_problem(cHiopSparseProblem *prob) { return 0; } -int hiop_sparse_destroy_problem(cHiopSparseProblem *prob) { +int hiop_sparse_destroy_problem(cHiopSparseProblem *prob) +{ delete prob->refcppHiop_; delete prob->hiopinterface_; return 0; } -#endif //#ifdef HIOP_SPARSE +#endif // #ifdef HIOP_SPARSE -int hiop_dense_create_problem(cHiopDenseProblem *prob) { - cppUserProblemDense * cppproblem = new cppUserProblemDense(prob); +int hiop_dense_create_problem(cHiopDenseProblem *prob) +{ + cppUserProblemDense *cppproblem = new cppUserProblemDense(prob); hiopNlpDenseConstraints *nlp = new hiopNlpDenseConstraints(*cppproblem); nlp->options->SetStringValue("Hessian", "quasinewton_approx"); - nlp->options->SetStringValue("duals_update_type", "linear"); - nlp->options->SetStringValue("duals_init", "zero"); // "lsq" or "zero" + nlp->options->SetStringValue("duals_update_type", "linear"); + nlp->options->SetStringValue("duals_init", "zero"); // "lsq" or "zero" nlp->options->SetStringValue("compute_mode", "cpu"); nlp->options->SetStringValue("KKTLinsys", "xdycyd"); nlp->options->SetStringValue("fixed_var", "relax"); @@ -141,9 +147,10 @@ int hiop_dense_create_problem(cHiopDenseProblem *prob) { prob->hiopinterface = cppproblem; return 0; -} +} -int hiop_dense_solve_problem(cHiopDenseProblem *prob) { +int hiop_dense_solve_problem(cHiopDenseProblem *prob) +{ hiopAlgFilterIPMQuasiNewton solver(prob->refcppHiop); prob->status = solver.run(); prob->obj_value = solver.getObjective(); @@ -152,10 +159,11 @@ int hiop_dense_solve_problem(cHiopDenseProblem *prob) { return 0; } -int hiop_dense_destroy_problem(cHiopDenseProblem *prob) { +int hiop_dense_destroy_problem(cHiopDenseProblem *prob) +{ delete prob->refcppHiop; delete prob->hiopinterface; return 0; } -} // extern C +} // extern C diff --git a/src/Interface/chiopInterface.hpp b/src/Interface/chiopInterface.hpp index d75dd56e9..a4cee65c5 100644 --- a/src/Interface/chiopInterface.hpp +++ b/src/Interface/chiopInterface.hpp @@ -2,52 +2,52 @@ // Produced at the Lawrence Livermore National Laboratory (LLNL). // LLNL-CODE-742473. All rights reserved. // -// This file is part of HiOp. For details, see https://github.com/LLNL/hiop. HiOp -// is released under the BSD 3-clause license (https://opensource.org/licenses/BSD-3-Clause). +// This file is part of HiOp. For details, see https://github.com/LLNL/hiop. HiOp +// is released under the BSD 3-clause license (https://opensource.org/licenses/BSD-3-Clause). // Please also read "Additional BSD Notice" below. // -// Redistribution and use in source and binary forms, with or without modification, +// Redistribution and use in source and binary forms, with or without modification, // are permitted provided that the following conditions are met: -// i. Redistributions of source code must retain the above copyright notice, this list +// i. Redistributions of source code must retain the above copyright notice, this list // of conditions and the disclaimer below. -// ii. Redistributions in binary form must reproduce the above copyright notice, -// this list of conditions and the disclaimer (as noted below) in the documentation and/or +// ii. Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the disclaimer (as noted below) in the documentation and/or // other materials provided with the distribution. -// iii. Neither the name of the LLNS/LLNL nor the names of its contributors may be used to -// endorse or promote products derived from this software without specific prior written +// iii. Neither the name of the LLNS/LLNL nor the names of its contributors may be used to +// endorse or promote products derived from this software without specific prior written // permission. // -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES -// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT -// SHALL LAWRENCE LIVERMORE NATIONAL SECURITY, LLC, THE U.S. DEPARTMENT OF ENERGY OR -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS -// OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED -// AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT +// SHALL LAWRENCE LIVERMORE NATIONAL SECURITY, LLC, THE U.S. DEPARTMENT OF ENERGY OR +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +// OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED +// AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, // EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Additional BSD Notice -// 1. This notice is required to be provided under our contract with the U.S. Department -// of Energy (DOE). This work was produced at Lawrence Livermore National Laboratory under +// 1. This notice is required to be provided under our contract with the U.S. Department +// of Energy (DOE). This work was produced at Lawrence Livermore National Laboratory under // Contract No. DE-AC52-07NA27344 with the DOE. -// 2. Neither the United States Government nor Lawrence Livermore National Security, LLC -// nor any of their employees, makes any warranty, express or implied, or assumes any -// liability or responsibility for the accuracy, completeness, or usefulness of any +// 2. Neither the United States Government nor Lawrence Livermore National Security, LLC +// nor any of their employees, makes any warranty, express or implied, or assumes any +// liability or responsibility for the accuracy, completeness, or usefulness of any // information, apparatus, product, or process disclosed, or represents that its use would // not infringe privately-owned rights. -// 3. Also, reference herein to any specific commercial products, process, or services by -// trade name, trademark, manufacturer or otherwise does not necessarily constitute or -// imply its endorsement, recommendation, or favoring by the United States Government or -// Lawrence Livermore National Security, LLC. The views and opinions of authors expressed -// herein do not necessarily state or reflect those of the United States Government or -// Lawrence Livermore National Security, LLC, and shall not be used for advertising or +// 3. Also, reference herein to any specific commercial products, process, or services by +// trade name, trademark, manufacturer or otherwise does not necessarily constitute or +// imply its endorsement, recommendation, or favoring by the United States Government or +// Lawrence Livermore National Security, LLC. The views and opinions of authors expressed +// herein do not necessarily state or reflect those of the United States Government or +// Lawrence Livermore National Security, LLC, and shall not be used for advertising or // product endorsement purposes. /** * @file chiopInterface.hpp - * + * * @author Michel Schanen , ANL * @author Cosmin G. Petra , LLNL * @author Nai-Yuan Chiang , LLNL @@ -68,519 +68,551 @@ using namespace hiop; class cppUserProblemMDS; extern "C" { - // C struct with HiOp function callbacks - typedef struct cHiopMDSProblem { - hiopNlpMDS *refcppHiop; - cppUserProblemMDS *hiopinterface; - // user_data similar to the Ipopt interface. In case of Julia pointer to the Julia problem object. - void *user_data; - // Used by hiop_mds_solve_problem() to store the final state. The duals should be added here. - double *solution; - double obj_value; - // HiOp callback function wrappers - int (*get_starting_point)(size_type n_, double* x0, void* user_data); - int (*get_prob_sizes)(size_type* n_, size_type* m_, void* user_data); - int (*get_vars_info)(size_type n, double *xlow_, double* xupp_, void* user_data); - int (*get_cons_info)(size_type m, double *clow_, double* cupp_, void* user_data); - int (*eval_f)(size_type n, double* x, int new_x, double* obj, void* user_data); - int (*eval_grad_f)(size_type n, double* x, int new_x, double* gradf, void* user_data); - int (*eval_cons)(size_type n, size_type m, - double* x, int new_x, - double* cons, void* user_data); - int (*get_sparse_dense_blocks_info)(hiop_size_type* nx_sparse, hiop_size_type* nx_dense, - hiop_size_type* nnz_sparse_Jaceq, hiop_size_type* nnz_sparse_Jacineq, - hiop_size_type* nnz_sparse_Hess_Lagr_SS, - hiop_size_type* nnz_sparse_Hess_Lagr_SD, void* user_data); - int (*eval_Jac_cons)(size_type n, size_type m, - double* x, int new_x, - size_type nsparse, size_type ndense, - int nnzJacS, hiop_index_type* iJacS, hiop_index_type* jJacS, double* MJacS, - double* JacD, void *user_data); - int (*eval_Hess_Lagr)(size_type n, size_type m, - double* x, int new_x, double obj_factor, - double* lambda, int new_lambda, - size_type nsparse, size_type ndense, - hiop_size_type nnzHSS, hiop_index_type* iHSS, hiop_index_type* jHSS, double* MHSS, - double* HDD, - hiop_size_type nnzHSD, hiop_index_type* iHSD, hiop_index_type* jHSD, double* MHSD, void* user_data); - } cHiopMDSProblem; +// C struct with HiOp function callbacks +typedef struct cHiopMDSProblem +{ + hiopNlpMDS* refcppHiop; + cppUserProblemMDS* hiopinterface; + // user_data similar to the Ipopt interface. In case of Julia pointer to the Julia problem object. + void* user_data; + // Used by hiop_mds_solve_problem() to store the final state. The duals should be added here. + double* solution; + double obj_value; + // HiOp callback function wrappers + int (*get_starting_point)(size_type n_, double* x0, void* user_data); + int (*get_prob_sizes)(size_type* n_, size_type* m_, void* user_data); + int (*get_vars_info)(size_type n, double* xlow_, double* xupp_, void* user_data); + int (*get_cons_info)(size_type m, double* clow_, double* cupp_, void* user_data); + int (*eval_f)(size_type n, double* x, int new_x, double* obj, void* user_data); + int (*eval_grad_f)(size_type n, double* x, int new_x, double* gradf, void* user_data); + int (*eval_cons)(size_type n, size_type m, double* x, int new_x, double* cons, void* user_data); + int (*get_sparse_dense_blocks_info)(hiop_size_type* nx_sparse, + hiop_size_type* nx_dense, + hiop_size_type* nnz_sparse_Jaceq, + hiop_size_type* nnz_sparse_Jacineq, + hiop_size_type* nnz_sparse_Hess_Lagr_SS, + hiop_size_type* nnz_sparse_Hess_Lagr_SD, + void* user_data); + int (*eval_Jac_cons)(size_type n, + size_type m, + double* x, + int new_x, + size_type nsparse, + size_type ndense, + int nnzJacS, + hiop_index_type* iJacS, + hiop_index_type* jJacS, + double* MJacS, + double* JacD, + void* user_data); + int (*eval_Hess_Lagr)(size_type n, + size_type m, + double* x, + int new_x, + double obj_factor, + double* lambda, + int new_lambda, + size_type nsparse, + size_type ndense, + hiop_size_type nnzHSS, + hiop_index_type* iHSS, + hiop_index_type* jHSS, + double* MHSS, + double* HDD, + hiop_size_type nnzHSD, + hiop_index_type* iHSD, + hiop_index_type* jHSD, + double* MHSD, + void* user_data); +} cHiopMDSProblem; } - // The cpp object used in the C interface class cppUserProblemMDS : public hiopInterfaceMDS { public: - cppUserProblemMDS(cHiopMDSProblem *cprob_) - : cprob(cprob_) - { - } + cppUserProblemMDS(cHiopMDSProblem* cprob_) + : cprob(cprob_) + {} + + virtual ~cppUserProblemMDS() {} + // HiOp callbacks calling the C wrappers + bool get_prob_sizes(size_type& n_, size_type& m_) + { + cprob->get_prob_sizes(&n_, &m_, cprob->user_data); + return true; + }; + bool get_starting_point(const size_type& n, double* x0) + { + cprob->get_starting_point(n, x0, cprob->user_data); + return true; + }; + bool get_vars_info(const size_type& n, double* xlow_, double* xupp_, NonlinearityType* type) + { + for(size_type i = 0; i < n; ++i) type[i] = hiopNonlinear; + cprob->get_vars_info(n, xlow_, xupp_, cprob->user_data); + return true; + }; + bool get_cons_info(const size_type& m, double* clow, double* cupp, NonlinearityType* type) + { + for(size_type i = 0; i < m; ++i) type[i] = hiopNonlinear; + cprob->get_cons_info(m, clow, cupp, cprob->user_data); + return true; + }; + bool eval_f(const size_type& n, const double* x, bool new_x, double& obj_value) + { + cprob->eval_f(n, (double*)x, 0, &obj_value, cprob->user_data); + return true; + }; + + bool eval_grad_f(const size_type& n, const double* x, bool new_x, double* gradf) + { + cprob->eval_grad_f(n, (double*)x, 0, gradf, cprob->user_data); + + return true; + }; + bool eval_cons(const size_type& n, + const size_type& m, + const size_type& num_cons, + const hiop_index_type* idx_cons, + const double* x, + bool new_x, + double* cons) + { + return false; + }; + bool eval_cons(const size_type& n, const size_type& m, const double* x, bool new_x, double* cons) + { + cprob->eval_cons(n, m, (double*)x, new_x, cons, cprob->user_data); + return true; + }; + bool get_sparse_dense_blocks_info(hiop_size_type& nx_sparse, + hiop_size_type& nx_dense, + hiop_size_type& nnz_sparse_Jaceq, + hiop_size_type& nnz_sparse_Jacineq, + hiop_size_type& nnz_sparse_Hess_Lagr_SS, + hiop_size_type& nnz_sparse_Hess_Lagr_SD) + { + cprob->get_sparse_dense_blocks_info(&nx_sparse, + &nx_dense, + &nnz_sparse_Jaceq, + &nnz_sparse_Jacineq, + &nnz_sparse_Hess_Lagr_SS, + &nnz_sparse_Hess_Lagr_SD, + cprob->user_data); + return true; + }; + bool eval_Jac_cons(const size_type& n, + const size_type& m, + const size_type& num_cons, + const hiop_index_type* idx_cons, + const double* x, + bool new_x, + const size_type& nsparse, + const size_type& ndense, + const hiop_size_type& nnzJacS, + hiop_index_type* iJacS, + hiop_index_type* jJacS, + double* MJacS, + double* JacD) + { + return false; + }; + bool eval_Jac_cons(const size_type& n, + const size_type& m, + const double* x, + bool new_x, + const size_type& nsparse, + const size_type& ndense, + const hiop_size_type& nnzJacS, + hiop_index_type* iJacS, + hiop_index_type* jJacS, + double* MJacS, + double* JacD) + { + cprob->eval_Jac_cons(n, m, (double*)x, new_x, nsparse, ndense, nnzJacS, iJacS, jJacS, MJacS, JacD, cprob->user_data); + return true; + }; + bool eval_Hess_Lagr(const size_type& n, + const size_type& m, + const double* x, + bool new_x, + const double& obj_factor, + const double* lambda, + bool new_lambda, + const size_type& nsparse, + const size_type& ndense, + const hiop_size_type& nnzHSS, + hiop_index_type* iHSS, + hiop_index_type* jHSS, + double* MHSS, + double* HDD, + hiop_size_type& nnzHSD, + hiop_index_type* iHSD, + hiop_index_type* jHSD, + double* MHSD) + { + // Note: lambda is not used since all the constraints are linear and, therefore, do + // not contribute to the Hessian of the Lagrangian + cprob->eval_Hess_Lagr(n, + m, + (double*)x, + new_x, + obj_factor, + (double*)lambda, + new_lambda, + nsparse, + ndense, + nnzHSS, + iHSS, + jHSS, + MHSS, + HDD, + nnzHSD, + iHSD, + jHSD, + MHSD, + cprob->user_data); + return true; + }; - virtual ~cppUserProblemMDS() - { - } - // HiOp callbacks calling the C wrappers - bool get_prob_sizes(size_type& n_, size_type& m_) - { - cprob->get_prob_sizes(&n_, &m_, cprob->user_data); - return true; - }; - bool get_starting_point(const size_type& n, double *x0) - { - cprob->get_starting_point(n, x0, cprob->user_data); - return true; - }; - bool get_vars_info(const size_type& n, double *xlow_, double* xupp_, NonlinearityType* type) - { - for(size_type i=0; iget_vars_info(n, xlow_, xupp_, cprob->user_data); - return true; - }; - bool get_cons_info(const size_type& m, double* clow, double* cupp, NonlinearityType* type) - { - for(size_type i=0; iget_cons_info(m, clow, cupp, cprob->user_data); - return true; - }; - bool eval_f(const size_type& n, const double* x, bool new_x, double& obj_value) - { - cprob->eval_f(n, (double *) x, 0, &obj_value, cprob->user_data); - return true; - }; - - bool eval_grad_f(const size_type& n, const double* x, bool new_x, double* gradf) - { - cprob->eval_grad_f(n, (double *) x, 0, gradf, cprob->user_data); - - return true; - }; - bool eval_cons(const size_type& n, const size_type& m, - const size_type& num_cons, const hiop_index_type* idx_cons, - const double* x, bool new_x, - double* cons) - { - return false; - }; - bool eval_cons(const size_type& n, const size_type& m, - const double* x, bool new_x, double* cons) - { - cprob->eval_cons(n, m, (double *) x, new_x, cons, cprob->user_data); - return true; - }; - bool get_sparse_dense_blocks_info(hiop_size_type& nx_sparse, hiop_size_type& nx_dense, - hiop_size_type& nnz_sparse_Jaceq, hiop_size_type& nnz_sparse_Jacineq, - hiop_size_type& nnz_sparse_Hess_Lagr_SS, - hiop_size_type& nnz_sparse_Hess_Lagr_SD) - { - cprob->get_sparse_dense_blocks_info(&nx_sparse, &nx_dense, &nnz_sparse_Jaceq, &nnz_sparse_Jacineq, - &nnz_sparse_Hess_Lagr_SS, &nnz_sparse_Hess_Lagr_SD, cprob->user_data); - return true; - }; - bool eval_Jac_cons(const size_type& n, const size_type& m, - const size_type& num_cons, const hiop_index_type* idx_cons, - const double* x, bool new_x, - const size_type& nsparse, const size_type& ndense, - const hiop_size_type& nnzJacS, hiop_index_type* iJacS, hiop_index_type* jJacS, double* MJacS, - double* JacD) - { - return false; - }; - bool eval_Jac_cons(const size_type& n, const size_type& m, - const double* x, bool new_x, - const size_type& nsparse, const size_type& ndense, - const hiop_size_type& nnzJacS, hiop_index_type* iJacS, hiop_index_type* jJacS, double* MJacS, - double* JacD) - { - cprob->eval_Jac_cons(n, m, (double *) x, new_x, nsparse, ndense, - nnzJacS, iJacS, jJacS, MJacS, - JacD, cprob->user_data); - return true; - }; - bool eval_Hess_Lagr(const size_type& n, const size_type& m, - const double* x, bool new_x, const double& obj_factor, - const double* lambda, bool new_lambda, - const size_type& nsparse, const size_type& ndense, - const hiop_size_type& nnzHSS, hiop_index_type* iHSS, hiop_index_type* jHSS, double* MHSS, - double* HDD, - hiop_size_type& nnzHSD, hiop_index_type* iHSD, hiop_index_type* jHSD, double* MHSD) - { - //Note: lambda is not used since all the constraints are linear and, therefore, do - //not contribute to the Hessian of the Lagrangian - cprob->eval_Hess_Lagr(n, m, (double *) x, new_x, obj_factor, - (double *) lambda, new_lambda, nsparse, ndense, - nnzHSS, iHSS, jHSS, MHSS, - HDD, - nnzHSD, iHSD, jHSD, MHSD, - cprob->user_data); - return true; - }; private: // Storing the C struct in the CPP object - cHiopMDSProblem *cprob; + cHiopMDSProblem* cprob; }; /** The 3 essential function calls to create and destroy a problem object in addition to solve a problem. * Some option setters will be added in the future. */ -extern "C" int hiop_mds_create_problem(cHiopMDSProblem *problem); -extern "C" int hiop_mds_solve_problem(cHiopMDSProblem *problem); -extern "C" int hiop_mds_destroy_problem(cHiopMDSProblem *problem); - +extern "C" int hiop_mds_create_problem(cHiopMDSProblem* problem); +extern "C" int hiop_mds_solve_problem(cHiopMDSProblem* problem); +extern "C" int hiop_mds_destroy_problem(cHiopMDSProblem* problem); #ifdef HIOP_SPARSE class cppUserProblemSparse; extern "C" { - // C struct with HiOp function callbacks - typedef struct cHiopSparseProblem { - hiopNlpSparse *refcppHiop_; - cppUserProblemSparse *hiopinterface_; - // user_data similar to the Ipopt interface. In case of Julia pointer to the Julia problem object. - void* user_data_; - // Used by hiop_sparse_createProblemsolveProblem() to store the final state. The duals should be added here. - double* solution_; - double obj_value_; - int niters_; - int status_; - // HiOp callback function wrappers - int (*get_starting_point_)(hiop_size_type n, double* x0, void* user_data); - int (*get_prob_sizes_)(hiop_size_type* n, hiop_size_type* m, void* user_data); - int (*get_vars_info_)(hiop_size_type n, double *xlow, double* xupp, void* user_data); - int (*get_cons_info_)(hiop_size_type m, double *clow, double* cupp, void* user_data); - int (*eval_f_)(hiop_size_type n, double* x, int new_x, double* obj, void* user_data); - int (*eval_grad_f_)(hiop_size_type n, double* x, int new_x, double* gradf, void* user_data); - int (*eval_cons_)(hiop_size_type n, - hiop_size_type m, - double* x, - int new_x, - double* cons, - void* user_data); - int (*get_sparse_blocks_info_)(hiop_size_type* nx, - hiop_size_type* nnz_sparse_Jaceq, - hiop_size_type* nnz_sparse_Jacineq, - hiop_size_type* nnz_sparse_Hess_Lagr, - void* user_data); - int (*eval_Jac_cons_)(size_type n, - size_type m, - double* x, - int new_x, - int nnzJacS, - hiop_index_type* iJacS, - hiop_index_type* jJacS, - double* MJacS, - void *user_data); - int (*eval_Hess_Lagr_)(size_type n, - size_type m, - double* x, - int new_x, - double obj_factor, - double* lambda, - int new_lambda, - hiop_size_type nnzHSS, - hiop_index_type* iHSS, - hiop_index_type* jHSS, - double* MHSS, - void* user_data); - } cHiopSparseProblem; +// C struct with HiOp function callbacks +typedef struct cHiopSparseProblem +{ + hiopNlpSparse* refcppHiop_; + cppUserProblemSparse* hiopinterface_; + // user_data similar to the Ipopt interface. In case of Julia pointer to the Julia problem object. + void* user_data_; + // Used by hiop_sparse_createProblemsolveProblem() to store the final state. The duals should be added here. + double* solution_; + double obj_value_; + int niters_; + int status_; + // HiOp callback function wrappers + int (*get_starting_point_)(hiop_size_type n, double* x0, void* user_data); + int (*get_prob_sizes_)(hiop_size_type* n, hiop_size_type* m, void* user_data); + int (*get_vars_info_)(hiop_size_type n, double* xlow, double* xupp, void* user_data); + int (*get_cons_info_)(hiop_size_type m, double* clow, double* cupp, void* user_data); + int (*eval_f_)(hiop_size_type n, double* x, int new_x, double* obj, void* user_data); + int (*eval_grad_f_)(hiop_size_type n, double* x, int new_x, double* gradf, void* user_data); + int (*eval_cons_)(hiop_size_type n, hiop_size_type m, double* x, int new_x, double* cons, void* user_data); + int (*get_sparse_blocks_info_)(hiop_size_type* nx, + hiop_size_type* nnz_sparse_Jaceq, + hiop_size_type* nnz_sparse_Jacineq, + hiop_size_type* nnz_sparse_Hess_Lagr, + void* user_data); + int (*eval_Jac_cons_)(size_type n, + size_type m, + double* x, + int new_x, + int nnzJacS, + hiop_index_type* iJacS, + hiop_index_type* jJacS, + double* MJacS, + void* user_data); + int (*eval_Hess_Lagr_)(size_type n, + size_type m, + double* x, + int new_x, + double obj_factor, + double* lambda, + int new_lambda, + hiop_size_type nnzHSS, + hiop_index_type* iHSS, + hiop_index_type* jHSS, + double* MHSS, + void* user_data); +} cHiopSparseProblem; } - // The cpp object used in the C interface class cppUserProblemSparse : public hiopInterfaceSparse { - public: - cppUserProblemSparse(cHiopSparseProblem *cprob) - : cprob_(cprob) - { +public: + cppUserProblemSparse(cHiopSparseProblem* cprob) + : cprob_(cprob) + {} + + virtual ~cppUserProblemSparse() {} + + // HiOp callbacks calling the C wrappers + bool get_prob_sizes(size_type& n, size_type& m) + { + cprob_->get_prob_sizes_(&n, &m, cprob_->user_data_); + return true; + }; + + bool get_starting_point(const size_type& n, double* x0) + { + cprob_->get_starting_point_(n, x0, cprob_->user_data_); + return true; + }; + + bool get_vars_info(const size_type& n, double* xlow, double* xupp, NonlinearityType* type) + { + for(size_type i = 0; i < n; ++i) { + type[i] = hiopNonlinear; } - - virtual ~cppUserProblemSparse() - { + cprob_->get_vars_info_(n, xlow, xupp, cprob_->user_data_); + return true; + }; + + bool get_cons_info(const size_type& m, double* clow, double* cupp, NonlinearityType* type) + { + for(size_type i = 0; i < m; ++i) { + type[i] = hiopNonlinear; } + cprob_->get_cons_info_(m, clow, cupp, cprob_->user_data_); + return true; + }; + + bool eval_f(const size_type& n, const double* x, bool new_x, double& obj_value) + { + cprob_->eval_f_(n, (double*)x, 0, &obj_value, cprob_->user_data_); + return true; + }; + + bool eval_grad_f(const size_type& n, const double* x, bool new_x, double* gradf) + { + cprob_->eval_grad_f_(n, (double*)x, 0, gradf, cprob_->user_data_); + + return true; + }; + + bool eval_cons(const size_type& n, + const size_type& m, + const size_type& num_cons, + const index_type* idx_cons, + const double* x, + bool new_x, + double* cons) + { + return false; + }; + + bool eval_cons(const size_type& n, const size_type& m, const double* x, bool new_x, double* cons) + { + cprob_->eval_cons_(n, m, (double*)x, new_x, cons, cprob_->user_data_); + return true; + }; + + bool get_sparse_blocks_info(size_type& nx, + size_type& nnz_sparse_Jaceq, + size_type& nnz_sparse_Jacineq, + size_type& nnz_sparse_Hess_Lagr) + { + cprob_->get_sparse_blocks_info_(&nx, &nnz_sparse_Jaceq, &nnz_sparse_Jacineq, &nnz_sparse_Hess_Lagr, cprob_->user_data_); + return true; + }; + + bool eval_Jac_cons(const size_type& n, + const size_type& m, + const size_type& num_cons, + const index_type* idx_cons, + const double* x, + bool new_x, + const size_type& nnzJacS, + index_type* iJacS, + index_type* jJacS, + double* MJacS) + { + return false; + }; + + bool eval_Jac_cons(const size_type& n, + const size_type& m, + const double* x, + bool new_x, + const size_type& nnzJacS, + index_type* iJacS, + index_type* jJacS, + double* MJacS) + { + cprob_->eval_Jac_cons_(n, m, (double*)x, new_x, nnzJacS, iJacS, jJacS, MJacS, cprob_->user_data_); + return true; + }; + bool eval_Hess_Lagr(const size_type& n, + const size_type& m, + const double* x, + bool new_x, + const double& obj_factor, + const double* lambda, + bool new_lambda, + const size_type& nnzHSS, + index_type* iHSS, + index_type* jHSS, + double* MHSS) + { + // Note: lambda is not used since all the constraints are linear and, therefore, do + // not contribute to the Hessian of the Lagrangian + cprob_->eval_Hess_Lagr_(n, + m, + (double*)x, + new_x, + obj_factor, + (double*)lambda, + new_lambda, + nnzHSS, + iHSS, + jHSS, + MHSS, + cprob_->user_data_); + return true; + }; - // HiOp callbacks calling the C wrappers - bool get_prob_sizes(size_type& n, size_type& m) - { - cprob_->get_prob_sizes_(&n, &m, cprob_->user_data_); - return true; - - }; - - bool get_starting_point(const size_type& n, double *x0) - { - cprob_->get_starting_point_(n, x0, cprob_->user_data_); - return true; - }; - - bool get_vars_info(const size_type& n, double *xlow, double* xupp, NonlinearityType* type) - { - for(size_type i=0; iget_vars_info_(n, xlow, xupp, cprob_->user_data_); - return true; - }; - - bool get_cons_info(const size_type& m, double* clow, double* cupp, NonlinearityType* type) - { - for(size_type i=0; iget_cons_info_(m, clow, cupp, cprob_->user_data_); - return true; - }; - - bool eval_f(const size_type& n, const double* x, bool new_x, double& obj_value) - { - cprob_->eval_f_(n, (double *) x, 0, &obj_value, cprob_->user_data_); - return true; - }; - - bool eval_grad_f(const size_type& n, const double* x, bool new_x, double* gradf) - { - cprob_->eval_grad_f_(n, (double *) x, 0, gradf, cprob_->user_data_); - - return true; - }; - - bool eval_cons(const size_type& n, - const size_type& m, - const size_type& num_cons, - const index_type* idx_cons, - const double* x, - bool new_x, - double* cons) - { - return false; - }; - - bool eval_cons(const size_type& n, - const size_type& m, - const double* x, - bool new_x, - double* cons) - { - cprob_->eval_cons_(n, m, (double *) x, new_x, cons, cprob_->user_data_); - return true; - }; - - bool get_sparse_blocks_info(size_type& nx, - size_type& nnz_sparse_Jaceq, - size_type& nnz_sparse_Jacineq, - size_type& nnz_sparse_Hess_Lagr) - { - cprob_->get_sparse_blocks_info_(&nx, &nnz_sparse_Jaceq, &nnz_sparse_Jacineq, &nnz_sparse_Hess_Lagr, cprob_->user_data_); - return true; - }; - - bool eval_Jac_cons(const size_type& n, - const size_type& m, - const size_type& num_cons, - const index_type* idx_cons, - const double* x, - bool new_x, - const size_type& nnzJacS, - index_type* iJacS, - index_type* jJacS, - double* MJacS) - { - return false; - }; - - bool eval_Jac_cons(const size_type& n, - const size_type& m, - const double* x, - bool new_x, - const size_type& nnzJacS, - index_type* iJacS, - index_type* jJacS, - double* MJacS) - { - cprob_->eval_Jac_cons_(n, m, (double *) x, new_x, - nnzJacS, iJacS, jJacS, MJacS, - cprob_->user_data_); - return true; - }; - bool eval_Hess_Lagr(const size_type& n, - const size_type& m, - const double* x, - bool new_x, - const double& obj_factor, - const double* lambda, - bool new_lambda, - const size_type& nnzHSS, - index_type* iHSS, - index_type* jHSS, - double* MHSS) - { - //Note: lambda is not used since all the constraints are linear and, therefore, do - //not contribute to the Hessian of the Lagrangian - cprob_->eval_Hess_Lagr_(n, m, (double *) x, new_x, obj_factor, - (double *) lambda, new_lambda, - nnzHSS, iHSS, jHSS, MHSS, - cprob_->user_data_); - return true; - }; private: // Storing the C struct in the CPP object - cHiopSparseProblem *cprob_; + cHiopSparseProblem* cprob_; }; /** The 3 essential function calls to create and destroy a problem object in addition to solve a problem. * Some option setters will be added in the future. */ -extern "C" int hiop_sparse_create_problem(cHiopSparseProblem *problem); -extern "C" int hiop_sparse_solve_problem(cHiopSparseProblem *problem); -extern "C" int hiop_sparse_destroy_problem(cHiopSparseProblem *problem); +extern "C" int hiop_sparse_create_problem(cHiopSparseProblem* problem); +extern "C" int hiop_sparse_solve_problem(cHiopSparseProblem* problem); +extern "C" int hiop_sparse_destroy_problem(cHiopSparseProblem* problem); -#endif //#ifdef HIOP_SPARSE +#endif // #ifdef HIOP_SPARSE class cppUserProblemDense; extern "C" { - // C struct with HiOp function callbacks - typedef struct cHiopDenseProblem { - hiopNlpDenseConstraints *refcppHiop; - cppUserProblemDense *hiopinterface; - // user_data similar to the Ipopt interface. In case of Julia pointer to the Julia problem object. - void* user_data; - // Used by hiop_sparse_createProblemsolveProblem() to store the final state. The duals should be added here. - double* solution; - double obj_value; - int niters; - int status; - // HiOp callback function wrappers - int (*get_starting_point)(hiop_size_type n_, double* x0, void* user_data); - int (*get_prob_sizes)(hiop_size_type* n_, hiop_size_type* m_, void* user_data); - int (*get_vars_info)(hiop_size_type n, double *xlow_, double* xupp_, void* user_data); - int (*get_cons_info)(hiop_size_type m, double *clow_, double* cupp_, void* user_data); - int (*eval_f)(hiop_size_type n, double* x, int new_x, double* obj, void* user_data); - int (*eval_grad_f)(hiop_size_type n, double* x, int new_x, double* gradf, void* user_data); - int (*eval_cons)(hiop_size_type n, - hiop_size_type m, - double* x, - int new_x, - double* cons, - void* user_data); - int (*eval_Jac_cons)(size_type n, - size_type m, - double* x, - int new_x, - double* Jac, - void *user_data); - } cHiopDenseProblem; +// C struct with HiOp function callbacks +typedef struct cHiopDenseProblem +{ + hiopNlpDenseConstraints* refcppHiop; + cppUserProblemDense* hiopinterface; + // user_data similar to the Ipopt interface. In case of Julia pointer to the Julia problem object. + void* user_data; + // Used by hiop_sparse_createProblemsolveProblem() to store the final state. The duals should be added here. + double* solution; + double obj_value; + int niters; + int status; + // HiOp callback function wrappers + int (*get_starting_point)(hiop_size_type n_, double* x0, void* user_data); + int (*get_prob_sizes)(hiop_size_type* n_, hiop_size_type* m_, void* user_data); + int (*get_vars_info)(hiop_size_type n, double* xlow_, double* xupp_, void* user_data); + int (*get_cons_info)(hiop_size_type m, double* clow_, double* cupp_, void* user_data); + int (*eval_f)(hiop_size_type n, double* x, int new_x, double* obj, void* user_data); + int (*eval_grad_f)(hiop_size_type n, double* x, int new_x, double* gradf, void* user_data); + int (*eval_cons)(hiop_size_type n, hiop_size_type m, double* x, int new_x, double* cons, void* user_data); + int (*eval_Jac_cons)(size_type n, size_type m, double* x, int new_x, double* Jac, void* user_data); +} cHiopDenseProblem; } - // The cpp object used in the C interface class cppUserProblemDense : public hiopInterfaceDenseConstraints { public: - cppUserProblemDense(cHiopDenseProblem *cprob_) - : cprob(cprob_) - { + cppUserProblemDense(cHiopDenseProblem* cprob_) + : cprob(cprob_) + {} + + virtual ~cppUserProblemDense() {} + + // HiOp callbacks calling the C wrappers + bool get_prob_sizes(size_type& n_, size_type& m_) + { + cprob->get_prob_sizes(&n_, &m_, cprob->user_data); + return true; + }; + + bool get_starting_point(const size_type& n, double* x0) + { + cprob->get_starting_point(n, x0, cprob->user_data); + return true; + }; + + bool get_vars_info(const size_type& n, double* xlow_, double* xupp_, NonlinearityType* type) + { + for(size_type i = 0; i < n; ++i) { + type[i] = hiopNonlinear; } - - virtual ~cppUserProblemDense() - { + cprob->get_vars_info(n, xlow_, xupp_, cprob->user_data); + return true; + }; + + bool get_cons_info(const size_type& m, double* clow, double* cupp, NonlinearityType* type) + { + for(size_type i = 0; i < m; ++i) { + type[i] = hiopNonlinear; } - - // HiOp callbacks calling the C wrappers - bool get_prob_sizes(size_type& n_, size_type& m_) - { - cprob->get_prob_sizes(&n_, &m_, cprob->user_data); - return true; - }; - - bool get_starting_point(const size_type& n, double *x0) - { - cprob->get_starting_point(n, x0, cprob->user_data); - return true; - }; - - bool get_vars_info(const size_type& n, double *xlow_, double* xupp_, NonlinearityType* type) - { - for(size_type i=0; iget_vars_info(n, xlow_, xupp_, cprob->user_data); - return true; - }; - - bool get_cons_info(const size_type& m, double* clow, double* cupp, NonlinearityType* type) - { - for(size_type i=0; iget_cons_info(m, clow, cupp, cprob->user_data); - return true; - }; - - bool eval_f(const size_type& n, const double* x, bool new_x, double& obj_value) - { - cprob->eval_f(n, (double *) x, 0, &obj_value, cprob->user_data); - return true; - }; - - bool eval_grad_f(const size_type& n, const double* x, bool new_x, double* gradf) - { - cprob->eval_grad_f(n, (double *) x, 0, gradf, cprob->user_data); - - return true; - }; - - bool eval_cons(const size_type& n, - const size_type& m, - const size_type& num_cons, - const index_type* idx_cons, - const double* x, - bool new_x, - double* cons) - { - return false; - }; - - bool eval_cons(const size_type& n, - const size_type& m, - const double* x, - bool new_x, - double* cons) - { - cprob->eval_cons(n, m, (double *) x, new_x, cons, cprob->user_data); - return true; - }; - - bool eval_Jac_cons(const size_type& n, - const size_type& m, - const size_type& num_cons, - const index_type* idx_cons, - const double* x, - bool new_x, - double* Jac) - { - return false; - }; - - bool eval_Jac_cons(const size_type& n, - const size_type& m, - const double* x, - bool new_x, - double* Jac) - { - cprob->eval_Jac_cons(n, m, (double *) x, new_x, - Jac, - cprob->user_data); - return true; - }; + cprob->get_cons_info(m, clow, cupp, cprob->user_data); + return true; + }; + + bool eval_f(const size_type& n, const double* x, bool new_x, double& obj_value) + { + cprob->eval_f(n, (double*)x, 0, &obj_value, cprob->user_data); + return true; + }; + + bool eval_grad_f(const size_type& n, const double* x, bool new_x, double* gradf) + { + cprob->eval_grad_f(n, (double*)x, 0, gradf, cprob->user_data); + + return true; + }; + + bool eval_cons(const size_type& n, + const size_type& m, + const size_type& num_cons, + const index_type* idx_cons, + const double* x, + bool new_x, + double* cons) + { + return false; + }; + + bool eval_cons(const size_type& n, const size_type& m, const double* x, bool new_x, double* cons) + { + cprob->eval_cons(n, m, (double*)x, new_x, cons, cprob->user_data); + return true; + }; + + bool eval_Jac_cons(const size_type& n, + const size_type& m, + const size_type& num_cons, + const index_type* idx_cons, + const double* x, + bool new_x, + double* Jac) + { + return false; + }; + + bool eval_Jac_cons(const size_type& n, const size_type& m, const double* x, bool new_x, double* Jac) + { + cprob->eval_Jac_cons(n, m, (double*)x, new_x, Jac, cprob->user_data); + return true; + }; private: // Storing the C struct in the CPP object - cHiopDenseProblem *cprob; + cHiopDenseProblem* cprob; }; /** The 3 essential function calls to create and destroy a problem object in addition to solve a problem. * Some option setters will be added in the future. */ -extern "C" int hiop_dense_create_problem(cHiopDenseProblem *problem); -extern "C" int hiop_dense_solve_problem(cHiopDenseProblem *problem); -extern "C" int hiop_dense_destroy_problem(cHiopDenseProblem *problem); +extern "C" int hiop_dense_create_problem(cHiopDenseProblem* problem); +extern "C" int hiop_dense_solve_problem(cHiopDenseProblem* problem); +extern "C" int hiop_dense_destroy_problem(cHiopDenseProblem* problem); #endif diff --git a/src/Interface/hiopInterface.hpp b/src/Interface/hiopInterface.hpp index 47c54d3ca..904efb9d3 100644 --- a/src/Interface/hiopInterface.hpp +++ b/src/Interface/hiopInterface.hpp @@ -2,47 +2,47 @@ // Produced at the Lawrence Livermore National Laboratory (LLNL). // LLNL-CODE-742473. All rights reserved. // -// This file is part of HiOp. For details, see https://github.com/LLNL/hiop. HiOp -// is released under the BSD 3-clause license (https://opensource.org/licenses/BSD-3-Clause). +// This file is part of HiOp. For details, see https://github.com/LLNL/hiop. HiOp +// is released under the BSD 3-clause license (https://opensource.org/licenses/BSD-3-Clause). // Please also read "Additional BSD Notice" below. // -// Redistribution and use in source and binary forms, with or without modification, +// Redistribution and use in source and binary forms, with or without modification, // are permitted provided that the following conditions are met: -// i. Redistributions of source code must retain the above copyright notice, this list +// i. Redistributions of source code must retain the above copyright notice, this list // of conditions and the disclaimer below. -// ii. Redistributions in binary form must reproduce the above copyright notice, -// this list of conditions and the disclaimer (as noted below) in the documentation and/or +// ii. Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the disclaimer (as noted below) in the documentation and/or // other materials provided with the distribution. -// iii. Neither the name of the LLNS/LLNL nor the names of its contributors may be used to -// endorse or promote products derived from this software without specific prior written +// iii. Neither the name of the LLNS/LLNL nor the names of its contributors may be used to +// endorse or promote products derived from this software without specific prior written // permission. // -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES -// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT -// SHALL LAWRENCE LIVERMORE NATIONAL SECURITY, LLC, THE U.S. DEPARTMENT OF ENERGY OR -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS -// OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED -// AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT +// SHALL LAWRENCE LIVERMORE NATIONAL SECURITY, LLC, THE U.S. DEPARTMENT OF ENERGY OR +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +// OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED +// AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, // EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Additional BSD Notice -// 1. This notice is required to be provided under our contract with the U.S. Department -// of Energy (DOE). This work was produced at Lawrence Livermore National Laboratory under +// 1. This notice is required to be provided under our contract with the U.S. Department +// of Energy (DOE). This work was produced at Lawrence Livermore National Laboratory under // Contract No. DE-AC52-07NA27344 with the DOE. -// 2. Neither the United States Government nor Lawrence Livermore National Security, LLC -// nor any of their employees, makes any warranty, express or implied, or assumes any -// liability or responsibility for the accuracy, completeness, or usefulness of any +// 2. Neither the United States Government nor Lawrence Livermore National Security, LLC +// nor any of their employees, makes any warranty, express or implied, or assumes any +// liability or responsibility for the accuracy, completeness, or usefulness of any // information, apparatus, product, or process disclosed, or represents that its use would // not infringe privately-owned rights. -// 3. Also, reference herein to any specific commercial products, process, or services by -// trade name, trademark, manufacturer or otherwise does not necessarily constitute or -// imply its endorsement, recommendation, or favoring by the United States Government or -// Lawrence Livermore National Security, LLC. The views and opinions of authors expressed -// herein do not necessarily state or reflect those of the United States Government or -// Lawrence Livermore National Security, LLC, and shall not be used for advertising or +// 3. Also, reference herein to any specific commercial products, process, or services by +// trade name, trademark, manufacturer or otherwise does not necessarily constitute or +// imply its endorsement, recommendation, or favoring by the United States Government or +// Lawrence Livermore National Security, LLC. The views and opinions of authors expressed +// herein do not necessarily state or reflect those of the United States Government or +// Lawrence Livermore National Security, LLC, and shall not be used for advertising or // product endorsement purposes. /** @@ -61,71 +61,72 @@ namespace hiop { - /** Solver status codes. */ -enum hiopSolveStatus { - //(partial) success - Solve_Success=0, - Solve_Success_RelTol=1, - Solve_Acceptable_Level=2, - Infeasible_Problem=5, - Iterates_Diverging=6, +/** Solver status codes. */ +enum hiopSolveStatus +{ + //(partial) success + Solve_Success = 0, + Solve_Success_RelTol = 1, + Solve_Acceptable_Level = 2, + Infeasible_Problem = 5, + Iterates_Diverging = 6, Feasible_Not_Optimal = 7, - //solver stopped based on user-defined criteria that are not related to optimality - Max_Iter_Exceeded=10, - Max_CpuTime_Exceeded=11, - User_Stopped=12, - - //NLP algorithm/solver reports issues in solving the problem and stops without being certain - //that is solved the problem to optimality or that the problem is infeasible. - //Feasible_Point_Found, - NlpAlgorithm_failure=-1, - Diverging_Iterates=-2, - Search_Dir_Too_Small=-3, - Steplength_Too_Small=-4, - Err_Step_Computation=-5, - //errors related to user-provided data (e.g., inconsistent problem specification, 'nans' in the - //function/sensitivity evaluations, invalid options) - Invalid_Problem_Definition=-11, - Invalid_Parallelization=-12, - Invalid_UserOption=-13, - Invalid_Number=-14, - Error_In_User_Function=-15, - Error_In_FR =-16, - - //ungraceful errors and returns - Exception_Unrecoverable=-100, - Memory_Alloc_Problem=-101, - SolverInternal_Error=-199, - - //unknown NLP solver errors or return codes - UnknownNLPSolveStatus=-1000, - SolveInitializationError=-1001, - - //intermediary statuses for the solver - NlpSolve_IncompleteInit=-10001, - NlpSolve_SolveNotCalled=-10002, - NlpSolve_Pending=-10003 + // solver stopped based on user-defined criteria that are not related to optimality + Max_Iter_Exceeded = 10, + Max_CpuTime_Exceeded = 11, + User_Stopped = 12, + + // NLP algorithm/solver reports issues in solving the problem and stops without being certain + // that is solved the problem to optimality or that the problem is infeasible. + // Feasible_Point_Found, + NlpAlgorithm_failure = -1, + Diverging_Iterates = -2, + Search_Dir_Too_Small = -3, + Steplength_Too_Small = -4, + Err_Step_Computation = -5, + // errors related to user-provided data (e.g., inconsistent problem specification, 'nans' in the + // function/sensitivity evaluations, invalid options) + Invalid_Problem_Definition = -11, + Invalid_Parallelization = -12, + Invalid_UserOption = -13, + Invalid_Number = -14, + Error_In_User_Function = -15, + Error_In_FR = -16, + + // ungraceful errors and returns + Exception_Unrecoverable = -100, + Memory_Alloc_Problem = -101, + SolverInternal_Error = -199, + + // unknown NLP solver errors or return codes + UnknownNLPSolveStatus = -1000, + SolveInitializationError = -1001, + + // intermediary statuses for the solver + NlpSolve_IncompleteInit = -10001, + NlpSolve_SolveNotCalled = -10002, + NlpSolve_Pending = -10003 }; -/** Base class for the solver's interface that has no assumptions how the - * matrices are stored. The vectors are dense and distributed row-wise. - * The data distribution is decided by the calling code (that implements +/** Base class for the solver's interface that has no assumptions how the + * matrices are stored. The vectors are dense and distributed row-wise. + * The data distribution is decided by the calling code (that implements * this interface) and specified to the optimization via 'get_vecdistrib_info' * - * Three possible implementations are for sparse NLPs (hiopInterfaceSparse), - * mixed dense-sparse NLPs (hiopInterfaceMDS), and NLPs with small + * Three possible implementations are for sparse NLPs (hiopInterfaceSparse), + * mixed dense-sparse NLPs (hiopInterfaceMDS), and NLPs with small * number of global constraints (hiopInterfaceDenseConstraints). * - * @note Please take notice of the following notes regarding the implementation of - * hiop::hiopInterfaceMDS on the device. All pointers marked as "managed by Umpire" - * are allocated by HiOp using the Umpire's API. They all are addressed in the - * same memory space; however, the memory space can be host (typically CPU), - * device (typically GPU), or unified memory (um) spaces as per Umpire - * specification. The selection of the memory space is done via the option - * "mem_space" of HiOp. It is the responsibility of the implementers of the - * HiOp's interfaces to work with the "managed by Umpire" pointers in the same - * memory space as the one specified by the "mem_space" option. - * + * @note Please take notice of the following notes regarding the implementation of + * hiop::hiopInterfaceMDS on the device. All pointers marked as "managed by Umpire" + * are allocated by HiOp using the Umpire's API. They all are addressed in the + * same memory space; however, the memory space can be host (typically CPU), + * device (typically GPU), or unified memory (um) spaces as per Umpire + * specification. The selection of the memory space is done via the option + * "mem_space" of HiOp. It is the responsibility of the implementers of the + * HiOp's interfaces to work with the "managed by Umpire" pointers in the same + * memory space as the one specified by the "mem_space" option. + * * @note The above note does not currently apply to the NLP interfaces * hiop::hiopInterfaceDenseConstraints and hiop::hiopInterfaceSparse) and the pointers * marked as "managed by Umpire" are in the host/CPU memory space (subject to change @@ -134,77 +135,87 @@ enum hiopSolveStatus { class hiopInterfaceBase { public: - //Types indicating linearity or nonlinearity. - enum NonlinearityType{ hiopLinear=0, hiopQuadratic, hiopNonlinear}; + // Types indicating linearity or nonlinearity. + enum NonlinearityType + { + hiopLinear = 0, + hiopQuadratic, + hiopNonlinear + }; + public: hiopInterfaceBase() {}; virtual ~hiopInterfaceBase() {}; /** Specifies the problem dimensions. - * + * * @param n global number of variables * @param m number of constraints */ - virtual bool get_prob_sizes(size_type& n, size_type& m)=0; + virtual bool get_prob_sizes(size_type& n, size_type& m) = 0; /** Specifies the type of optimization problem * @param[out] type indicating whether the optimization problem is * linearily, quadratically, or general nonlinearily. - * TODO: need to `deepcheck` is this return value matches the returned type array from + * TODO: need to `deepcheck` is this return value matches the returned type array from * `get_vars_info` and `get_cons_info` */ - virtual bool get_prob_info(NonlinearityType& type) { type = hiopInterfaceBase::hiopNonlinear; return true;} + virtual bool get_prob_info(NonlinearityType& type) + { + type = hiopInterfaceBase::hiopNonlinear; + return true; + } /** Specifies bounds on the variables. - * + * * @param[in] n global number of constraints - * @param[out] xlow array of lower bounds. A value of -1e20 or less means no lower + * @param[out] xlow array of lower bounds. A value of -1e20 or less means no lower * bound is present (managed by Umpire) - * @param[out] xupp array of upper bounds. A value of 1e20 or more means no upper + * @param[out] xupp array of upper bounds. A value of 1e20 or more means no upper * bound is present (managed by Umpire) - * @param[out] type array of indicating whether the variables enters the objective - * linearily, quadratically, or general nonlinearily. Momentarily + * @param[out] type array of indicating whether the variables enters the objective + * linearily, quadratically, or general nonlinearily. Momentarily * all bounds should be marked as nonlinear (allocated on host). */ - virtual bool get_vars_info(const size_type& n, double *xlow, double* xupp, NonlinearityType* type)=0; - + virtual bool get_vars_info(const size_type& n, double* xlow, double* xupp, NonlinearityType* type) = 0; + /** Specififes the bounds on the constraints. * * @param[in] m number of constraints - * @param[out] clow array of lower bounds for constraints. A value of -1e20 or less means no lower + * @param[out] clow array of lower bounds for constraints. A value of -1e20 or less means no lower * bound is present (managed by Umpire) - * @param[out] cupp array of upper bounds for constraints. A value of 1e20 or more means no upper + * @param[out] cupp array of upper bounds for constraints. A value of 1e20 or more means no upper * bound is present (managed by Umpire) * @param[out] type array of indicating whether the constraint is linear, quadratic, or general * nonlinear. Momentarily all bounds should be marked as nonlinear (allocated on host). */ - virtual bool get_cons_info(const size_type& m, double* clow, double* cupp, NonlinearityType* type)=0; + virtual bool get_cons_info(const size_type& m, double* clow, double* cupp, NonlinearityType* type) = 0; /** Method the evaluation of the objective function. - * + * * @param[in] n global size of the problem * @param[in] x array with the local entries of the primal variable (managed by Umpire) * @param[in] new_x whether x has been changed from the previous calls to other evaluation methods * (gradient, constraints, Jacobian, and Hessian). * @param[out] obj_value the value of the objective function at @p x * - * @note When MPI is enabled, each rank returns the objective value in @p obj_value. @p x points to + * @note When MPI is enabled, each rank returns the objective value in @p obj_value. @p x points to * the local entries and the function is responsible for knowing the local buffer size. */ - virtual bool eval_f(const size_type& n, const double* x, bool new_x, double& obj_value)=0; - + virtual bool eval_f(const size_type& n, const double* x, bool new_x, double& obj_value) = 0; + /** Method for the evaluation of the gradient of objective. - * + * * @param[in] n global size of the problem * @param[in] x array with the local entries of the primal variable (managed by Umpire) * @param[in] new_x whether x has been changed from the previous calls to other evaluation methods * ( function, constraints, Jacobian, and Hessian) - * @param[out] gradf the entries of the gradient of the objective function at @p x, local to the + * @param[out] gradf the entries of the gradient of the objective function at @p x, local to the * MPI rank (managed by Umpire) * * @note When MPI is enabled, each rank should access only the local buffers @p x and @p gradf. */ - virtual bool eval_grad_f(const size_type& n, const double* x, bool new_x, double* gradf)=0; + virtual bool eval_grad_f(const size_type& n, const double* x, bool new_x, double* gradf) = 0; /** Evaluates a subset of the constraints @p cons(@p x). The subset is of size * @p num_cons and is described by indexes in the @p idx_cons array. The method will be called at each @@ -217,9 +228,9 @@ class hiopInterfaceBase * @param[in] idx_cons: indexes in {1,2,...,m} of the constraints to be evaluated (managed by Umpire) * @param[in] x the point where the constraints need to be evaluated (managed by Umpire) * @param[in] new_x whether x has been changed from the previous call to f, grad_f, or Jac - * @param[out] cons array of size num_cons containing the value of the constraints indicated by + * @param[out] cons array of size num_cons containing the value of the constraints indicated by * @p idx_cons (managed by Umpire) - * + * * @note When MPI is enabled, every rank populates @p cons since the constraints are not distributed. */ virtual bool eval_cons(const size_type& n, @@ -228,48 +239,44 @@ class hiopInterfaceBase const index_type* idx_cons, const double* x, bool new_x, - double* cons)=0; - - /** Evaluates the constraints body @p cons(@p x), both equalities and inequalities, in one call. + double* cons) = 0; + + /** Evaluates the constraints body @p cons(@p x), both equalities and inequalities, in one call. * * @param[in] n the global number of variables * @param[in] m the number of constraints * @param[in] x the point where the constraints need to be evaluated (managed by Umpire) * @param[in] new_x whether x has been changed from the previous call to f, grad_f, or Jac - * @param[out] cons array of size num_cons containing the value of the constraints indicated by + * @param[out] cons array of size num_cons containing the value of the constraints indicated by * @p idx_cons (managed by Umpire) * - * HiOp will first call the other hiopInterfaceBase::eval_cons() twice. If the implementer/user wants the - * functionality of this "one-call" overload, he should return false from the other + * HiOp will first call the other hiopInterfaceBase::eval_cons() twice. If the implementer/user wants the + * functionality of this "one-call" overload, he should return false from the other * hiopInterfaceBase::eval_cons() (during both calls). - * + * * @note When MPI is enabled, every rank populates @p cons since the constraints are not distributed. */ - virtual bool eval_cons(const size_type& n, - const size_type& m, - const double* x, - bool new_x, - double* cons) + virtual bool eval_cons(const size_type& n, const size_type& m, const double* x, bool new_x, double* cons) { return false; } + + /** Passes the communicator, defaults to MPI_COMM_WORLD (dummy for non-MPI builds) */ + virtual bool get_MPI_comm(MPI_Comm& comm_out) { - return false; + comm_out = MPI_COMM_WORLD; + return true; } - - /** Passes the communicator, defaults to MPI_COMM_WORLD (dummy for non-MPI builds) */ - virtual bool get_MPI_comm(MPI_Comm& comm_out) { comm_out=MPI_COMM_WORLD; return true;} - - - /** - * Method for column partitioning specification for distributed memory vectors. Process P owns + /** + * Method for column partitioning specification for distributed memory vectors. Process P owns * cols[P], cols[P]+1, ..., cols[P+1]-1, P={0,1,...,NumRanks}. * - * Example: for a vector x of @p global_n=6 elements on 3 ranks, the column partitioning is + * Example: for a vector x of @p global_n=6 elements on 3 ranks, the column partitioning is * @p cols=[0,2,4,6]. - * - * The caller manages memory associated with @p cols, which is an array of size NumRanks+1 + * + * The caller manages memory associated with @p cols, which is an array of size NumRanks+1 */ - virtual bool get_vecdistrib_info(size_type global_n, index_type* cols) { - return false; //defaults to serial + virtual bool get_vecdistrib_info(size_type global_n, index_type* cols) + { + return false; // defaults to serial } /** @@ -278,43 +285,40 @@ class hiopInterfaceBase * @note Avoid using this method since it will be removed in a future release and replaced with * the same-name method below. * - * The method returns true (and populates @p x0) or returns false, in which case HiOp will + * The method returns true (and populates @p x0) or returns false, in which case HiOp will * internally set @p x0 to all zero (still subject to internal adjustements). * * By default, HiOp first calls the overloaded primal-dual starting point specification - * (overloaded) method get_starting_point() (see below). If the above returns false, HiOp will then call + * (overloaded) method get_starting_point() (see below). If the above returns false, HiOp will then call * this method. * * @param[in] n the global number of variables * @param[out] x0 the user-defined initial values for the primal variablers (managed by Umpire) - * + * */ - virtual bool get_starting_point(const size_type&n, double* x0) - { - return false; - } - + virtual bool get_starting_point(const size_type& n, double* x0) { return false; } + /** - * Method provides a primal or a primal-dual starting point. This point is subject + * Method provides a primal or a primal-dual starting point. This point is subject * to internal adjustments in HiOp. * * If the user (implementer of this method) has good estimates only of the primal variables, * the method should populate @p x0 with these values and return true. The @p duals_avail - * should be set to false; internally, HiOp will not access @p z_bndL0, @p z_bndU0, and + * should be set to false; internally, HiOp will not access @p z_bndL0, @p z_bndU0, and * @p lambda0 in this case. * - * If the user (implementer of this method) has good estimates of the duals of bound constraints - * and of inequality and equality constraints, @p duals_avail boolean argument should - * be set to true and the respective duals should be provided (in @p z_bndL0 and @p z_bndU0 and - * @p lambda0, respectively). In this case, the user should also set @p x0 to his/her estimate + * If the user (implementer of this method) has good estimates of the duals of bound constraints + * and of inequality and equality constraints, @p duals_avail boolean argument should + * be set to true and the respective duals should be provided (in @p z_bndL0 and @p z_bndU0 and + * @p lambda0, respectively). In this case, the user should also set @p x0 to his/her estimate * of primal variables and return true. * - * If user does not have high-quality (primal or primal-dual) starting points, the method should + * If user does not have high-quality (primal or primal-dual) starting points, the method should * return false (see note below). * - * @note When this method returns false, HiOp will call the overload - * get_starting_point() for only primal variables (see the above function). This behaviour is for backward compatibility and - * will be removed in a future release. + * @note When this method returns false, HiOp will call the overload + * get_starting_point() for only primal variables (see the above function). This behaviour is for backward compatibility + * and will be removed in a future release. * * @param[in] n the global number of variables * @param[in] m the number of constraints @@ -323,17 +327,17 @@ class hiopInterfaceBase * @param[out] z_bndL0 the user-defined initial values for the duals of the variable lower bounds (managed by Umpire) * @param[out] z_bndU0 the user-defined initial values for the duals of the variable upper bounds (managed by Umpire) * @param[out] lambda0 the user-defined initial values for the duals of the constraints (managed by Umpire) - * @param[out] slacks_avail a boolean argument which indicates whether the initial values for the inequality slacks + * @param[out] slacks_avail a boolean argument which indicates whether the initial values for the inequality slacks * (added by HiOp internally) are given by the user - * @param[out] ineq_slack the user-defined initial values for the slacks added to transfer inequalities to equalities + * @param[out] ineq_slack the user-defined initial values for the slacks added to transfer inequalities to equalities * (managed by Umpire) - * + * */ virtual bool get_starting_point(const size_type& n, const size_type& m, double* x0, bool& duals_avail, - double* z_bndL0, + double* z_bndL0, double* z_bndU0, double* lambda0, bool& slacks_avail, @@ -345,7 +349,7 @@ class hiopInterfaceBase } /** - * Method provides a primal-dual starting point for warm start. This point is subject + * Method provides a primal-dual starting point for warm start. This point is subject * to internal adjustments in HiOp. * * User provides starting point for all the iterate variable used in HiOp. @@ -357,7 +361,7 @@ class hiopInterfaceBase * @param[out] z_bndL0 the user-defined initial values for the duals of the variable lower bounds (managed by Umpire) * @param[out] z_bndU0 the user-defined initial values for the duals of the variable upper bounds (managed by Umpire) * @param[out] lambda0 the user-defined initial values for the duals of the constraints (managed by Umpire) - * @param[out] ineq_slack the user-defined initial values for the slacks added to transfer inequalities to equalities + * @param[out] ineq_slack the user-defined initial values for the slacks added to transfer inequalities to equalities * (managed by Umpire) * @param[out] vl0 the user-defined initial values for the duals of the constraint lower bounds (managed by Umpire) * @param[out] vu0 the user-defined initial values for the duals of the constraint upper bounds (managed by Umpire) @@ -366,7 +370,7 @@ class hiopInterfaceBase virtual bool get_warmstart_point(const size_type& n, const size_type& m, double* x0, - double* z_bndL0, + double* z_bndL0, double* z_bndU0, double* lambda0, double* ineq_slack, @@ -376,9 +380,9 @@ class hiopInterfaceBase return false; } - /** + /** * Callback method called by HiOp when the optimal solution is reached. User should use it - * to retrieve primal-dual optimal solution. + * to retrieve primal-dual optimal solution. * * @param[in] status status of the solution process * @param[in] n global number of variables @@ -390,13 +394,13 @@ class hiopInterfaceBase * @param[in] g array of the values of the constraints body at solution (managed by Umpire, see note below) * @param[in] lambda array of (local) entries of the dual variables for constraints at solution (managed by Umpire, * see note below) - * @param[in] obj_value objective value at solution + * @param[in] obj_value objective value at solution + * + * @note HiOp's option `callback_mem_space` can be used to change the memory location of array parameters managaged by + * Umpire. More specifically, when `callback_mem_space` is set to `host` (and `mem_space` is `device`), HiOp transfers the + * arrays from device to host first, and then passes/returns pointers on host for the arrays managed by Umpire. These + * pointers can be then used in host memory space (without the need to rely on or use Umpire). * - * @note HiOp's option `callback_mem_space` can be used to change the memory location of array parameters managaged by Umpire. - * More specifically, when `callback_mem_space` is set to `host` (and `mem_space` is `device`), HiOp transfers the - * arrays from device to host first, and then passes/returns pointers on host for the arrays managed by Umpire. These pointers - * can be then used in host memory space (without the need to rely on or use Umpire). - * */ virtual void solution_callback(hiopSolveStatus status, size_type n, @@ -407,16 +411,15 @@ class hiopInterfaceBase const double* g, const double* lambda, double obj_value) - { - } + {} - /** + /** * Callback for the (end of) iteration. This method is not called during the line-searche * procedure. @see solution_callback() for an explanation of the parameters. * - * @note If the user (implementer) of this methods returns false, HiOp will stop the + * @note If the user (implementer) of this methods returns false, HiOp will stop the * the optimization with hiop::hiopSolveStatus ::User_Stopped return code. - * + * * @param[in] iter the current iteration number * @param[in] obj_value objective value * @param[in] logbar_obj_value log barrier objective value @@ -436,12 +439,12 @@ class hiopInterfaceBase * @param[in] alpha_du dual step size * @param[in] alpha_pr primal step size * @param[in] ls_trials the number of line search iterations - * - * @note HiOp's option `callback_mem_space` can be used to change the memory location of array parameters managaged by Umpire. - * More specifically, when `callback_mem_space` is set to `host` (and `mem_space` is `device`), HiOp transfers the - * arrays from device to host first, and then passes/returns pointers on host for the arrays managed by Umpire. These pointers - * can be then used in host memory space (without the need to rely on or use Umpire). - * + * + * @note HiOp's option `callback_mem_space` can be used to change the memory location of array parameters managaged by + * Umpire. More specifically, when `callback_mem_space` is set to `host` (and `mem_space` is `device`), HiOp transfers the + * arrays from device to host first, and then passes/returns pointers on host for the arrays managed by Umpire. These + * pointers can be then used in host memory space (without the need to rely on or use Umpire). + * */ virtual bool iterate_callback(int iter, double obj_value, @@ -466,24 +469,28 @@ class hiopInterfaceBase return true; } - /** - * This method is used to provide user all the internal hiop iterates. @see solution_callback() + /** + * This method is used to provide user all the internal hiop iterates. @see solution_callback() * for an explanation of the parameters. - * + * * @param[in] x array of (local) entries of the primal variables (managed by Umpire, see note below) * @param[in] z_L array of (local) entries of the dual variables for lower bounds (managed by Umpire, see note below) * @param[in] z_U array of (local) entries of the dual variables for upper bounds (managed by Umpire, see note below) - * @param[in] yc array of (local) entries of the dual variables for equality constraints (managed by Umpire, see note below) - * @param[in] yd array of (local) entries of the dual variables for inequality constraints (managed by Umpire, see note below) + * @param[in] yc array of (local) entries of the dual variables for equality constraints (managed by Umpire, see note + * below) + * @param[in] yd array of (local) entries of the dual variables for inequality constraints (managed by Umpire, see note + * below) * @param[in] s array of the slacks added to transfer inequalities to equalities (managed by Umpire, see note below) - * @param[in] v_L array of (local) entries of the dual variables for constraint lower bounds (managed by Umpire, see note below) - * @param[in] v_U array of (local) entries of the dual variables for constraint upper bounds (managed by Umpire, see note below) - * - * @note HiOp's option `callback_mem_space` can be used to change the memory location of array parameters managaged by Umpire. - * More specifically, when `callback_mem_space` is set to `host` (and `mem_space` is `device`), HiOp transfers the - * arrays from device to host first, and then passes/returns pointers on host for the arrays managed by Umpire. These pointers - * can be then used in host memory space (without the need to rely on or use Umpire). - * + * @param[in] v_L array of (local) entries of the dual variables for constraint lower bounds (managed by Umpire, see note + * below) + * @param[in] v_U array of (local) entries of the dual variables for constraint upper bounds (managed by Umpire, see note + * below) + * + * @note HiOp's option `callback_mem_space` can be used to change the memory location of array parameters managaged by + * Umpire. More specifically, when `callback_mem_space` is set to `host` (and `mem_space` is `device`), HiOp transfers the + * arrays from device to host first, and then passes/returns pointers on host for the arrays managed by Umpire. These + * pointers can be then used in host memory space (without the need to rely on or use Umpire). + * */ virtual bool iterate_full_callback(const double* x, const double* z_L, @@ -496,31 +503,28 @@ class hiopInterfaceBase { return true; } - + /** * A wildcard function used to change the primal variables. * * @note If the user (implementer) of this methods returns false, HiOp will stop the * the optimization with hiop::hiopSolveStatus::User_Stopped return code. */ - virtual bool force_update_x(const int n, double* x) - { - return true; - } + virtual bool force_update_x(const int n, double* x) { return true; } private: - hiopInterfaceBase(const hiopInterfaceBase& ) {}; + hiopInterfaceBase(const hiopInterfaceBase&) {}; void operator=(const hiopInterfaceBase&) {}; }; -/** Specialized interface for NLPs with 'global' but few constraints. +/** Specialized interface for NLPs with 'global' but few constraints. */ -class hiopInterfaceDenseConstraints : public hiopInterfaceBase +class hiopInterfaceDenseConstraints : public hiopInterfaceBase { public: hiopInterfaceDenseConstraints() {}; virtual ~hiopInterfaceDenseConstraints() {}; - /** + /** * Evaluates the Jacobian of the subset of constraints indicated by idx_cons and of size num_cons. * Example: Assuming idx_cons[k]=i, which means that the gradient of the (i+1)th constraint is * to be evaluated, one needs to do Jac[k][0]=d/dx_0 con_i(x), Jac[k][1]=d/dx_1 con_i(x), ... @@ -532,72 +536,69 @@ class hiopInterfaceDenseConstraints : public hiopInterfaceBase * Parameters: see eval_cons */ virtual bool eval_Jac_cons(const size_type& n, - const size_type& m, + const size_type& m, const size_type& num_cons, const index_type* idx_cons, const double* x, bool new_x, double* Jac) = 0; - + /** - * Evaluates the Jacobian of equality and inequality constraints in one call. + * Evaluates the Jacobian of equality and inequality constraints in one call. * - * The main difference from the above 'eval_Jac_cons' is that the implementer/user of this + * The main difference from the above 'eval_Jac_cons' is that the implementer/user of this * method does not have to split the constraints into equalities and inequalities; instead, * HiOp does this internally. * * The parameter 'Jac' is passed as as a contiguous array storing the dense Jacobian matrix by rows. * - * TODO: build an example (new one-call Nlp formulation derived from ex2) to illustrate this + * TODO: build an example (new one-call Nlp formulation derived from ex2) to illustrate this * feature and to test HiOp's internal implementation of eq.-ineq. spliting. */ - virtual bool eval_Jac_cons(const size_type& n, - const size_type& m, - const double* x, - bool new_x, - double* Jac) + virtual bool eval_Jac_cons(const size_type& n, const size_type& m, const double* x, bool new_x, double* Jac) { return false; } }; -/** - * Specialized interface for NLPs having mixed DENSE and sparse (MDS) blocks in the - * Jacobian and Hessian. - * +/** + * Specialized interface for NLPs having mixed DENSE and sparse (MDS) blocks in the + * Jacobian and Hessian. + * * More specifically, this interface is for specifying optimization problem in x * split as (xs,xd), the rule of thumb being that xs have sparse derivatives and * xd have dense derivatives * - * min f(x) s.t. g(x) <= or = 0, lb<=x<=ub - * such that - * - Jacobian w.r.t. xs and LagrHessian w.r.t. (xs,xs) are sparse - * - Jacobian w.r.t. xd and LagrHessian w.r.t. (xd,xd) are dense + * min f(x) s.t. g(x) <= or = 0, lb<=x<=ub + * such that + * - Jacobian w.r.t. xs and LagrHessian w.r.t. (xs,xs) are sparse + * - Jacobian w.r.t. xd and LagrHessian w.r.t. (xd,xd) are dense * - LagrHessian w.r.t (xs,xd) is zero (later this assumption will be relaxed) * * @note HiOp expects the sparse variables first and then the dense variables. In many cases, - * the implementer has to (inconviniently) keep a map between his internal variables + * the implementer has to (inconviniently) keep a map between his internal variables * indexes and the indexes HiOp. - * - * @note This interface is 'local' in the sense that data is not assumed to be + * + * @note This interface is 'local' in the sense that data is not assumed to be * distributed across MPI ranks ('get_vecdistrib_info' should return 'false') * */ -class hiopInterfaceMDS : public hiopInterfaceBase { +class hiopInterfaceMDS : public hiopInterfaceBase +{ public: hiopInterfaceMDS() {}; virtual ~hiopInterfaceMDS() {}; - + /** * Returns the sizes and number of nonzeros of the sparse and dense blocks within MDS * * @param[out] nx_sparse number of sparse variables * @param[out] nx_ense number of dense variables - * @param[out] nnz_sparse_Jace number of nonzeros in the Jacobian of the equalities w.r.t. - * sparse variables - * @param[out] nnz_sparse_Jaci number of nonzeros in the Jacobian of the inequalities w.r.t. - * sparse variables - * @param[out] nnz_sparse_Hess_Lagr_SS number of nonzeros in the (sparse) Hessian w.r.t. + * @param[out] nnz_sparse_Jace number of nonzeros in the Jacobian of the equalities w.r.t. + * sparse variables + * @param[out] nnz_sparse_Jaci number of nonzeros in the Jacobian of the inequalities w.r.t. + * sparse variables + * @param[out] nnz_sparse_Hess_Lagr_SS number of nonzeros in the (sparse) Hessian w.r.t. * sparse variables * @param[out] nnz_sparse_Hess_Lagr_SD reserved, should be set to 0 */ @@ -606,10 +607,10 @@ class hiopInterfaceMDS : public hiopInterfaceBase { int& nnz_sparse_Jaceq, int& nnz_sparse_Jacineq, int& nnz_sparse_Hess_Lagr_SS, - int& nnz_sparse_Hess_Lagr_SD) = 0; + int& nnz_sparse_Hess_Lagr_SD) = 0; - /** - * Evaluates the Jacobian of constraints split in the sparse (triplet format) and + /** + * Evaluates the Jacobian of constraints split in the sparse (triplet format) and * dense matrices (rows storage) * * This method is called twice per Jacobian evaluation, once for equalities and once for @@ -622,7 +623,7 @@ class hiopInterfaceMDS : public hiopInterfaceBase { * @param[in] num_cons number of constraints to evaluate (size of idx_cons array) * @param[in] idx_cons indexes of the constraints to evaluate (managed by Umpire) * @param[in] x the point at which to evaluate (managed by Umpire) - * @param[in] new_x indicates whether any of the other eval functions have been evaluated + * @param[in] new_x indicates whether any of the other eval functions have been evaluated * previously (false) or not (true) at x * @param[in] nsparse number of sparse variables * @param[in] ndense number of dense variables @@ -631,15 +632,15 @@ class hiopInterfaceMDS : public hiopInterfaceBase { * @param[out] jJacS array of column indexes in the sparse Jacobian (managed by Umpire) * @param[out] MJacS array of nonzero values in the sparse Jacobian (managed by Umpire) * @param[out] JacD array with the values of the dense Jacobian (managed by Umpire) - * + * * The implementer of this method should be aware of the following observations. * 1) 'JacD' parameter will be always non-null - * 2) When 'iJacS' and 'jJacS' are non-null, the implementer should provide the (i,j) - * indexes. - * 3) When 'MJacS' is non-null, the implementer should provide the values corresponding to + * 2) When 'iJacS' and 'jJacS' are non-null, the implementer should provide the (i,j) + * indexes. + * 3) When 'MJacS' is non-null, the implementer should provide the values corresponding to * entries specified by 'iJacS' and 'jJacS' * 4) 'iJacS' and 'jJacS' are both either non-null or null during a call. - * 5) Both 'iJacS'/'jJacS' and 'MJacS' can be non-null during the same call or only one of + * 5) Both 'iJacS'/'jJacS' and 'MJacS' can be non-null during the same call or only one of * them non-null; but they will not be both null. */ virtual bool eval_Jac_cons(const size_type& n, @@ -655,21 +656,21 @@ class hiopInterfaceMDS : public hiopInterfaceBase { index_type* jJacS, double* MJacS, double* JacD) = 0; - - /** + + /** * Evaluates the Jacobian of equality and inequality constraints in one call. This Jacobian is - * mixed dense-sparse (MDS), which means is structurally split in the sparse (triplet format) and + * mixed dense-sparse (MDS), which means is structurally split in the sparse (triplet format) and * dense matrices (rows storage) * - * The main difference from the above 'eval_Jac_cons' is that the implementer/user of this + * The main difference from the above 'eval_Jac_cons' is that the implementer/user of this * method does not have to split the constraints into equalities and inequalities; instead, - * HiOp does this internally. HiOp will call this method whenever the implementer/user returns + * HiOp does this internally. HiOp will call this method whenever the implementer/user returns * false from the 'eval_Jac_cons' above (which is called for equalities and inequalities separately). - * + * * @param[in] n number of variables * @param[in] m Number of constraints * @param[in] x the point at which to evaluate (managed by Umpire) - * @param[in] new_x indicates whether any of the other eval functions have been evaluated previously + * @param[in] new_x indicates whether any of the other eval functions have been evaluated previously * (false) or not (true) at x * @param[in] nsparse number of sparse variables * @param[in] ndense number of dense variables @@ -678,15 +679,15 @@ class hiopInterfaceMDS : public hiopInterfaceBase { * @param[out] jJacS array of column indexes in the sparse Jacobian (managed by Umpire) * @param[out] MJacS array of nonzero values in the sparse Jacobian (managed by Umpire) * @param[out] JacD array with the values of the dense Jacobian (managed by Umpire) - * - * Notes for implementer of this method: + * + * Notes for implementer of this method: * 1) 'JacD' parameter will be always non-null. * 2) When 'iJacS' and 'jJacS' are non-null, the implementer should provide the (i,j) indexes. - * 3) When 'MJacS' is non-null, the implementer should provide the values corresponding to + * 3) When 'MJacS' is non-null, the implementer should provide the values corresponding to * entries specified by 'iJacS' and 'jJacS' (managed by Umpire). * 4) 'iJacS' and 'jJacS' are both either non-null or null during a call. - * 5) Both 'iJacS'/'jJacS' and 'MJacS' can be non-null during the same call or only one of them - * non-null; but they will not be both null. + * 5) Both 'iJacS'/'jJacS' and 'MJacS' can be non-null during the same call or only one of them + * non-null; but they will not be both null. */ virtual bool eval_Jac_cons(const size_type& n, const size_type& m, @@ -703,41 +704,40 @@ class hiopInterfaceMDS : public hiopInterfaceBase { return false; } - - /** - * Evaluates the Hessian of the Lagrangian function in 3 structural blocks: HSS is the Hessian + /** + * Evaluates the Hessian of the Lagrangian function in 3 structural blocks: HSS is the Hessian * w.r.t. (xs,xs), HDD is the Hessian w.r.t. (xd,xd), and HSD is the Hessian w.r.t (xs,xd). * Please consult the user manual for a details on the form the Lagrangian function takes. * * @note HSD is for now assumed to be zero. The implementer should return nnzHSD=0 - * during the first call to 'eval_Hess_Lagr'. On subsequent calls, HiOp will pass the + * during the first call to 'eval_Hess_Lagr'. On subsequent calls, HiOp will pass the * triplet arrays for HSD set to NULL and the implementer (obviously) should not use them. - * + * * @param[in] n number of variables * @param[in] m Number of constraints * @param[in] x the point at which to evaluate (managed by Umpire) - * @param[in] new_x indicates whether any of the other eval functions have been evaluated + * @param[in] new_x indicates whether any of the other eval functions have been evaluated * previously (false) or not (true) at x * @param[in] obj_factor scalar that multiplies the objective term in the Lagrangian function * @param[in] lambda array with values of the multipliers used by the Lagrangian function * @param[in] new_lambda indicates whether lambda values changed since last call * @param[in] nsparse number of sparse variables * @param[in] ndense number of dense variables - * @param[in] nnzHSS number of nonzeros in the (sparse) Hessian w.r.t. sparse variables + * @param[in] nnzHSS number of nonzeros in the (sparse) Hessian w.r.t. sparse variables * @param[out] iHSS array of row indexes in the Hessian w.r.t. sparse variables (managed by * Umpire) - * @param[out] jHSS array of column indexes in the Hessian w.r.t. sparse variables + * @param[out] jHSS array of column indexes in the Hessian w.r.t. sparse variables * (managed by Umpire) * @param[out] MHSS array of nonzero values in the Hessian w.r.t. sparse variables * (managed by Umpire) - * @param[out] HDDD array with the values of the Hessian w.r.t. to dense variables + * @param[out] HDDD array with the values of the Hessian w.r.t. to dense variables * (managed by Umpire) - * @param[out] iHSD is reserved and should not be accessed - * @param[out] jHSD is reserved and should not be accessed + * @param[out] iHSD is reserved and should not be accessed + * @param[out] jHSD is reserved and should not be accessed * @param[out] MHSD is reserved and should not be accessed * @param[out] HHSD is reserved and should not be accessed - * - * Notes + * + * Notes * 1)-5) from 'eval_Jac_cons' apply to xxxHSS and HDD arrays * 6) The order is multipliers is: lambda=[lambda_eq, lambda_ineq] */ @@ -761,7 +761,6 @@ class hiopInterfaceMDS : public hiopInterfaceBase { double* MHSD) = 0; }; - /** Specialized interface for NLPs with sparse Jacobian and Hessian matrices. * * More specifically, this interface is for specifying optimization problem: @@ -772,7 +771,7 @@ class hiopInterfaceMDS : public hiopInterfaceBase { * * @note this interface is 'local' in the sense that data is not assumed to be * distributed across MPI ranks ('get_vecdistrib_info' should return 'false'). - * Acceleration can be however obtained using OpenMP and CUDA via Raja + * Acceleration can be however obtained using OpenMP and CUDA via Raja * abstraction layer that HiOp uses and via linear solver. * */ @@ -784,7 +783,7 @@ class hiopInterfaceSparse : public hiopInterfaceBase /** Get the number of variables and constraints, nonzeros * and get the number of nonzeros in Jacobian and Heesian - */ + */ virtual bool get_sparse_blocks_info(size_type& nx, size_type& nnz_sparse_Jaceq, size_type& nnz_sparse_Jacineq, @@ -868,12 +867,14 @@ class hiopInterfaceSparse : public hiopInterfaceBase index_type* jHSS, double* MHSS) = 0; - /** Specifying the get_MPI_comm code defined in the base class */ - virtual bool get_MPI_comm(MPI_Comm& comm_out) { comm_out=MPI_COMM_SELF; return true;} - + virtual bool get_MPI_comm(MPI_Comm& comm_out) + { + comm_out = MPI_COMM_SELF; + return true; + } }; -} //end of namespace +} // namespace hiop #endif diff --git a/src/Interface/hiopInterfacePrimalDecomp.cpp b/src/Interface/hiopInterfacePrimalDecomp.cpp index 014a4c022..1d6b9ea42 100644 --- a/src/Interface/hiopInterfacePrimalDecomp.cpp +++ b/src/Interface/hiopInterfacePrimalDecomp.cpp @@ -1,15 +1,11 @@ #include "hiopInterfacePrimalDecomp.hpp" - using namespace hiop; -hiopInterfacePriDecProblem::RecourseApproxEvaluator:: -RecourseApproxEvaluator(int nc, const std::string& mem_space) - : RecourseApproxEvaluator(nc, nc, mem_space) //nc_ <= nx, nd=S -{ -} +hiopInterfacePriDecProblem::RecourseApproxEvaluator::RecourseApproxEvaluator(int nc, const std::string& mem_space) + : RecourseApproxEvaluator(nc, nc, mem_space) // nc_ <= nx, nd=S +{} -hiopInterfacePriDecProblem::RecourseApproxEvaluator:: -~RecourseApproxEvaluator() +hiopInterfacePriDecProblem::RecourseApproxEvaluator::~RecourseApproxEvaluator() { delete xc_idx_; delete rgrad_; @@ -19,15 +15,16 @@ hiopInterfacePriDecProblem::RecourseApproxEvaluator:: delete vec_work_basecase_; } -hiopInterfacePriDecProblem::RecourseApproxEvaluator:: -RecourseApproxEvaluator(int nc, int S, const std::string& mem_space) - : nc_(nc), S_(S), rval_(0.), //nc = nx, nd=S - mem_space_(mem_space) +hiopInterfacePriDecProblem::RecourseApproxEvaluator::RecourseApproxEvaluator(int nc, int S, const std::string& mem_space) + : nc_(nc), + S_(S), + rval_(0.), // nc = nx, nd=S + mem_space_(mem_space) { - assert(S>=nc); + assert(S >= nc); xc_idx_ = LinearAlgebraFactory::create_vector_int(mem_space_, nc); - xc_idx_->linspace(0,1); - + xc_idx_->linspace(0, 1); + rgrad_ = LinearAlgebraFactory::create_vector(mem_space_, nc); rhess_ = rgrad_->alloc_clone(); x0_ = rgrad_->alloc_clone(); @@ -35,36 +32,40 @@ RecourseApproxEvaluator(int nc, int S, const std::string& mem_space) vec_work_basecase_ = nullptr; } -hiopInterfacePriDecProblem::RecourseApproxEvaluator:: -RecourseApproxEvaluator(const int nc, - const int S, - const int* list, - const std::string& mem_space) - : nc_(nc), S_(S), rval_(0.), rgrad_(NULL), rhess_(NULL), x0_(NULL), - mem_space_(mem_space) +hiopInterfacePriDecProblem::RecourseApproxEvaluator::RecourseApproxEvaluator(const int nc, + const int S, + const int* list, + const std::string& mem_space) + : nc_(nc), + S_(S), + rval_(0.), + rgrad_(NULL), + rhess_(NULL), + x0_(NULL), + mem_space_(mem_space) { rgrad_ = LinearAlgebraFactory::create_vector(mem_space_, nc); rhess_ = rgrad_->alloc_clone(); x0_ = rgrad_->alloc_clone(); vec_work_coupling_ = rgrad_->alloc_clone(); vec_work_basecase_ = nullptr; - + xc_idx_ = LinearAlgebraFactory::create_vector_int(mem_space_, nc); xc_idx_->copy_from(list); } -hiopInterfacePriDecProblem::RecourseApproxEvaluator:: -RecourseApproxEvaluator(const int nc, - const int S, - const double& rval, - const hiopVector& rgrad, - const hiopVector& rhess, - const hiopVector& x0, - const std::string& mem_space) - : nc_(nc), S_(S), - mem_space_(mem_space) +hiopInterfacePriDecProblem::RecourseApproxEvaluator::RecourseApproxEvaluator(const int nc, + const int S, + const double& rval, + const hiopVector& rgrad, + const hiopVector& rhess, + const hiopVector& x0, + const std::string& mem_space) + : nc_(nc), + S_(S), + mem_space_(mem_space) { - //assert(S>=nc); + // assert(S>=nc); rval_ = rval; rgrad_ = LinearAlgebraFactory::create_vector(mem_space_, nc); rhess_ = rgrad_->alloc_clone(); @@ -72,36 +73,35 @@ RecourseApproxEvaluator(const int nc, x0_ = rgrad_->alloc_clone(); vec_work_coupling_ = rgrad_->alloc_clone(); vec_work_basecase_ = nullptr; - + xc_idx_ = LinearAlgebraFactory::create_vector_int(mem_space_, nc); - xc_idx_->linspace(0,1); + xc_idx_->linspace(0, 1); rgrad_->copyFromStarting(0, rgrad.local_data_const(), nc); rhess_->copyFromStarting(0, rhess.local_data_const(), nc); x0_->copyFromStarting(0, x0.local_data_const(), nc); } -hiopInterfacePriDecProblem::RecourseApproxEvaluator:: -RecourseApproxEvaluator(const int nc, - const int S, - const int* list, - const double& rval, - const hiopVector& rgrad, - const hiopVector& rhess, - const hiopVector& x0, - const std::string& mem_space) - : nc_(nc), - S_(S), - mem_space_(mem_space) +hiopInterfacePriDecProblem::RecourseApproxEvaluator::RecourseApproxEvaluator(const int nc, + const int S, + const int* list, + const double& rval, + const hiopVector& rgrad, + const hiopVector& rhess, + const hiopVector& x0, + const std::string& mem_space) + : nc_(nc), + S_(S), + mem_space_(mem_space) { - //assert(S>=nc); + // assert(S>=nc); rval_ = rval; rgrad_ = LinearAlgebraFactory::create_vector(mem_space_, nc); rhess_ = rgrad_->alloc_clone(); x0_ = rgrad_->alloc_clone(); vec_work_coupling_ = rgrad_->alloc_clone(); vec_work_basecase_ = nullptr; - + xc_idx_ = LinearAlgebraFactory::create_vector_int(mem_space_, nc); xc_idx_->copy_from(list); @@ -116,30 +116,34 @@ RecourseApproxEvaluator(const int nc, * Therefore need to pick out the coupled ones * n is the total dimension of x and not really used in the function */ -bool hiopInterfacePriDecProblem::RecourseApproxEvaluator:: -eval_f(const size_type& n, const double* x, bool new_x, double& obj_value) +bool hiopInterfacePriDecProblem::RecourseApproxEvaluator::eval_f(const size_type& n, + const double* x, + bool new_x, + double& obj_value) { - assert(rgrad_!=NULL); + assert(rgrad_ != NULL); obj_value += rval_; hiopVector& v = *vec_work_coupling_; - v.copy_from_indexes(x, *xc_idx_); + v.copy_from_indexes(x, *xc_idx_); v.axpy(-1.0, *x0_); obj_value += v.dotProductWith(*rgrad_); v.componentMult(v); - obj_value += 0.5*v.dotProductWith(*rhess_); + obj_value += 0.5 * v.dotProductWith(*rhess_); return true; } - + // grad is assumed to be of the length n, of the entire x -bool hiopInterfacePriDecProblem::RecourseApproxEvaluator:: -eval_grad(const size_type& n, const double* x, bool new_x, double* grad) +bool hiopInterfacePriDecProblem::RecourseApproxEvaluator::eval_grad(const size_type& n, + const double* x, + bool new_x, + double* grad) { - assert(rgrad_!=NULL); - + assert(rgrad_ != NULL); + hiopVector& v = *vec_work_coupling_; // v = x-x0 v.copy_from_indexes(x, *xc_idx_); @@ -157,47 +161,45 @@ eval_grad(const size_type& n, const double* x, bool new_x, double* grad) hiopVector& grad_vec = *vec_work_basecase_; grad_vec.copyFrom(grad); - //add the recourse gradient to the basecase gradient + // add the recourse gradient to the basecase gradient grad_vec.axpy(1.0, v, *xc_idx_); - + grad_vec.copyTo(grad); - + return true; } /** - * Hessian evaluation is different since it's hard to decipher the + * Hessian evaluation is different since it's hard to decipher the * specific Lagrangian arrangement at this level - * So hess currently is a vector of nc_ length - * Careful when implementing in the full problem + * So hess currently is a vector of nc_ length + * Careful when implementing in the full problem */ -bool hiopInterfacePriDecProblem::RecourseApproxEvaluator:: -eval_hess(const size_type& n, const hiopVector& x, bool new_x, hiopVector& hess) -{ - assert(rgrad_!=NULL); - assert(rhess_->get_local_size()==hess.get_local_size()); - hess.axpy(1.0,*rhess_); +bool hiopInterfacePriDecProblem::RecourseApproxEvaluator::eval_hess(const size_type& n, + const hiopVector& x, + bool new_x, + hiopVector& hess) +{ + assert(rgrad_ != NULL); + assert(rhess_->get_local_size() == hess.get_local_size()); + hess.axpy(1.0, *rhess_); return true; } // pass the COMM_SELF communicator since this example is only intended to run inside 1 MPI process // bool hiopInterfacePriDecProblem::RecourseApproxEvaluator::get_MPI_comm(MPI_Comm& comm_out) { - comm_out=MPI_COMM_SELF; + comm_out = MPI_COMM_SELF; return true; } - -void hiopInterfacePriDecProblem::RecourseApproxEvaluator::set_rval(const double rval) -{ - rval_ = rval; -} - +void hiopInterfacePriDecProblem::RecourseApproxEvaluator::set_rval(const double rval) { rval_ = rval; } + void hiopInterfacePriDecProblem::RecourseApproxEvaluator::set_rgrad(const int n, const hiopVector& rgrad) { assert(n == nc_); - assert(rgrad.get_size()>=nc_); - if(rgrad_==NULL) { + assert(rgrad.get_size() >= nc_); + if(rgrad_ == NULL) { rgrad_ = LinearAlgebraFactory::create_vector(mem_space_, nc_); } rgrad_->copyFromStarting(0, rgrad.local_data_const(), nc_); @@ -208,7 +210,7 @@ void hiopInterfacePriDecProblem::RecourseApproxEvaluator::set_rgrad(const int n, void hiopInterfacePriDecProblem::RecourseApproxEvaluator::set_rhess(const int n, const hiopVector& rhess) { assert(n == nc_); - if(rhess_==NULL) { + if(rhess_ == NULL) { rhess_ = LinearAlgebraFactory::create_vector(mem_space_, nc_); } rhess_->copyFromStarting(0, rhess.local_data_const(), nc_); @@ -217,35 +219,18 @@ void hiopInterfacePriDecProblem::RecourseApproxEvaluator::set_rhess(const int n, void hiopInterfacePriDecProblem::RecourseApproxEvaluator::set_x0(const int n, const hiopVector& x0) { assert(n == nc_); - if(rgrad_==NULL) { + if(rgrad_ == NULL) { x0_ = LinearAlgebraFactory::create_vector(mem_space_, nc_); } x0_->copyFromStarting(0, x0.local_data_const(), nc_); } -int hiopInterfacePriDecProblem::RecourseApproxEvaluator::get_S() const -{ - return S_; -} - -double hiopInterfacePriDecProblem::RecourseApproxEvaluator::get_rval() const -{ - return rval_; -} - -hiopVector* hiopInterfacePriDecProblem::RecourseApproxEvaluator::get_rgrad() const -{ - return rgrad_; -} +int hiopInterfacePriDecProblem::RecourseApproxEvaluator::get_S() const { return S_; } -hiopVector* hiopInterfacePriDecProblem::RecourseApproxEvaluator::get_rhess() const -{ - return rhess_; -} +double hiopInterfacePriDecProblem::RecourseApproxEvaluator::get_rval() const { return rval_; } -hiopVector* hiopInterfacePriDecProblem::RecourseApproxEvaluator::get_x0() const -{ - return x0_; -} +hiopVector* hiopInterfacePriDecProblem::RecourseApproxEvaluator::get_rgrad() const { return rgrad_; } +hiopVector* hiopInterfacePriDecProblem::RecourseApproxEvaluator::get_rhess() const { return rhess_; } +hiopVector* hiopInterfacePriDecProblem::RecourseApproxEvaluator::get_x0() const { return x0_; } diff --git a/src/Interface/hiopInterfacePrimalDecomp.hpp b/src/Interface/hiopInterfacePrimalDecomp.hpp index e8d4eb102..d7a083d3b 100644 --- a/src/Interface/hiopInterfacePrimalDecomp.hpp +++ b/src/Interface/hiopInterfacePrimalDecomp.hpp @@ -5,69 +5,64 @@ #include "hiopVector.hpp" #include "LinAlgFactory.hpp" #include -#include //for memcpy +#include //for memcpy #include namespace hiop { -/** - * Base class (interface) for specifying optimization NLPs that have separable terms in the - * objective, which we coin as "primal decomposable" problems. More specifically, these problems +/** + * Base class (interface) for specifying optimization NLPs that have separable terms in the + * objective, which we coin as "primal decomposable" problems. More specifically, these problems * have the following structure (please also take a note of the terminology): * * min_x basecase(x) + 1/S sum { r_i(x) : i=1,...,S} (primal decomposable NLP) * - * The subproblem 'basecase' refers to a general nonlinear nonconvex NLP in `x`. We point out - * that the basecase can have general twice continously differentiable objective and + * The subproblem 'basecase' refers to a general nonlinear nonconvex NLP in `x`. We point out + * that the basecase can have general twice continously differentiable objective and * constraints; the latter can be equalities, inequalities, and bounds on `x`. - * Furthermore, borrowing from stochastic programming terminology, the terms `r_i` are + * Furthermore, borrowing from stochastic programming terminology, the terms `r_i` are * called recourse terms , or, in short, r-terms . - * + * * In order to solve the above problem, HiOp solver will perform a series of approximations and will * require the user to solve a so-called 'master' problem - * + * * min basecase(x) + q(x) (master NLP) * x * where the function q(x) is a convex differentiable approximation of sum { r_i(x) : i=1,...,S} * that we refer to as quadratic regularization . - * + * * The user is required to maintain and solve the master problem, more specifically: - * - to add the quadratic regularization to the basecase NLP; the quadratic regularization is - * provided by HiOp hiopInterfacePriDecProblem::RecourseApproxEvaluator classs. the user is - * expected to implement hiopInterfacePriDecProblem::set_recourse_approx_evaluator in the master + * - to add the quadratic regularization to the basecase NLP; the quadratic regularization is + * provided by HiOp hiopInterfacePriDecProblem::RecourseApproxEvaluator classs. the user is + * expected to implement hiopInterfacePriDecProblem::set_recourse_approx_evaluator in the master * problem class. - * - to (re)solve master NLP and return the primal optimal solution `x` to HiOp; for doing this, + * - to (re)solve master NLP and return the primal optimal solution `x` to HiOp; for doing this, * the user is required to implement hiopInterfacePriDecProblem::solve_master method. * - * In addition, the user is required to implement - * - hiopInterfacePriDecProblem::eval_f_rterm + * In addition, the user is required to implement + * - hiopInterfacePriDecProblem::eval_f_rterm * - hiopInterfacePriDecProblem::eval_grad_rterm * which solves the individual recourse subproblems. * - * These methods will be used by the HiOp's primal decomposition solver to compute function value - * and gradient vector individually for each recourse term r_i, which are needed to build the - * convex regularizations q(x). The above methods will be called at arbitrary vectors `x` that - * are decided internally by HiOp. + * These methods will be used by the HiOp's primal decomposition solver to compute function value + * and gradient vector individually for each recourse term r_i, which are needed to build the + * convex regularizations q(x). The above methods will be called at arbitrary vectors `x` that + * are decided internally by HiOp. * */ class hiopInterfacePriDecProblem { public: - /** + /** * Constructor */ - hiopInterfacePriDecProblem() - { - } + hiopInterfacePriDecProblem() {} - virtual ~hiopInterfacePriDecProblem() - { - } + virtual ~hiopInterfacePriDecProblem() {} - - /** + /** * Solves the master problem consisting of the basecase problem plus the recourse terms. * The recourse terms have been added by the outer optimization loop (hiopAlgPrimalDecomposition) * via the 'add_' methods below. (this does not appear to be case anymore Frank TODO) @@ -79,21 +74,20 @@ class hiopInterfacePriDecProblem * * @param master_options_file : input string specifying the name of the options file the NLP solver * should use when solving the master problem. A null value indicates that the NLP solver should use - * its default options file. - * + * its default options file. + * */ virtual hiopSolveStatus solve_master(hiopVector& x, const bool& include_r, - const double& rval = 0, + const double& rval = 0, const double* grad = 0, const double* hess = 0, - const char* master_options_file=nullptr) = 0; + const char* master_options_file = nullptr) = 0; virtual bool eval_f_rterm(size_type idx, const int& n, const double* x, double& rval) = 0; virtual bool eval_grad_rterm(size_type idx, const int& n, double* x, hiopVector& grad) = 0; - - /** + /** * Returns the number S of recourse terms */ virtual size_type get_num_rterms() const = 0; @@ -105,9 +99,8 @@ class hiopInterfacePriDecProblem virtual void get_solution(double* x) const = 0; virtual double get_objective() = 0; - - /** - * Define the evaluator class called by the base case problem class to add the quadratic + /** + * Define the evaluator class called by the base case problem class to add the quadratic * recourse approximation. * This class is intened for internal use of hiopInterfacePriDecProblem class only * In the cases where only RecourseApproxEvaluator is needed, a shell hiopInterfacePriDecProblem @@ -121,23 +114,24 @@ class hiopInterfacePriDecProblem { public: RecourseApproxEvaluator(int nc, const std::string& mem_space); - + RecourseApproxEvaluator(int nc, int S, const std::string& mem_space); - + RecourseApproxEvaluator(const int nc, const int S, const int* list, const std::string& mem_space); - + RecourseApproxEvaluator(const int nc, const int S, const double& rval, - const hiopVector& rgrad, + const hiopVector& rgrad, const hiopVector& rhess, const hiopVector& x0, const std::string& mem_space); - - RecourseApproxEvaluator(int nc,int S, + + RecourseApproxEvaluator(int nc, + int S, const int* list, const double& rval, - const hiopVector& rgrad, + const hiopVector& rgrad, const hiopVector& rhess, const hiopVector& x0, const std::string& mem_space); @@ -145,45 +139,45 @@ class hiopInterfacePriDecProblem virtual ~RecourseApproxEvaluator(); bool eval_f(const size_type& n, const double* x, bool new_x, double& obj_value); - + bool eval_grad(const size_type& n, const double* x, bool new_x, double* grad); bool eval_hess(const size_type& n, const hiopVector& x, bool new_x, hiopVector& hess); - + virtual bool get_MPI_comm(MPI_Comm& comm_out); - + void set_rval(const double rval); void set_rgrad(const int n, const hiopVector& rgrad); void set_rhess(const int n, const hiopVector& rhess); void set_x0(const int n, const hiopVector& x0); - //void set_xc_idx(const int* idx); + // void set_xc_idx(const int* idx); - int get_S() const; + int get_S() const; double get_rval() const; hiopVector* get_rgrad() const; hiopVector* get_rhess() const; hiopVector* get_x0() const; + protected: int nc_, S_; hiopVectorInt* xc_idx_; double rval_; hiopVector* rgrad_; - hiopVector* rhess_; //diagonal Hessian vector - hiopVector* x0_; //current solution + hiopVector* rhess_; // diagonal Hessian vector + hiopVector* x0_; // current solution /// working buffer in the size of coupling variables (same size as x0_, rgrad_, and rhess_) hiopVector* vec_work_coupling_; /// working buffer in the size of the basecase (primal variables) hiopVector* vec_work_basecase_; - + /// memory space of the PriDec solver (must match the memory space of the NLP solver) std::string mem_space_; }; - - virtual bool set_recourse_approx_evaluator(const int n, RecourseApproxEvaluator* evaluator)=0; + virtual bool set_recourse_approx_evaluator(const int n, RecourseApproxEvaluator* evaluator) = 0; }; - -} //end of namespace + +} // namespace hiop #endif diff --git a/src/Interface/hiopVersion.hpp b/src/Interface/hiopVersion.hpp index 0535f4fe2..7f1937eec 100644 --- a/src/Interface/hiopVersion.hpp +++ b/src/Interface/hiopVersion.hpp @@ -94,22 +94,13 @@ struct hiopVersion static inline std::string fullVersionInfo() { - auto fmt = [] (bool use) { return use ? "YES" : "NO"; }; + auto fmt = [](bool use) { return use ? "YES" : "NO"; }; std::stringstream ss; - ss << "HiOp " - << version() << " compiled on " - << releaseDate() << "\n" - << "Built with:" - << "\nGPU: " << fmt(useGPU) - << "\nMPI: " << fmt(useMPI) - << "\nMAGMA: " << fmt(useMagma) - << "\nRAJA: " << fmt(useRAJA) - << "\nSparse: " << fmt(useSparse) - << "\nCOINHSL: " << fmt(useCOINHSL) - << "\nSTRUMPACK: " << fmt(useSTRUMPACK) - << "\nPARDISO: " << fmt(usePARDISO) - << "\nReSolve: " << fmt(useReSolve) - << "\n"; + ss << "HiOp " << version() << " compiled on " << releaseDate() << "\n" + << "Built with:" + << "\nGPU: " << fmt(useGPU) << "\nMPI: " << fmt(useMPI) << "\nMAGMA: " << fmt(useMagma) << "\nRAJA: " << fmt(useRAJA) + << "\nSparse: " << fmt(useSparse) << "\nCOINHSL: " << fmt(useCOINHSL) << "\nSTRUMPACK: " << fmt(useSTRUMPACK) + << "\nPARDISO: " << fmt(usePARDISO) << "\nReSolve: " << fmt(useReSolve) << "\n"; return ss.str(); } }; diff --git a/src/LinAlg/LinAlgFactory.cpp b/src/LinAlg/LinAlgFactory.cpp index 9cd5b0f94..4a49ddf81 100644 --- a/src/LinAlg/LinAlgFactory.cpp +++ b/src/LinAlg/LinAlgFactory.cpp @@ -3,47 +3,47 @@ // Written by Cosmin G. Petra, petra1@llnl.gov. // LLNL-CODE-742473. All rights reserved. // -// This file is part of HiOp. For details, see https://github.com/LLNL/hiop. HiOp -// is released under the BSD 3-clause license (https://opensource.org/licenses/BSD-3-Clause). +// This file is part of HiOp. For details, see https://github.com/LLNL/hiop. HiOp +// is released under the BSD 3-clause license (https://opensource.org/licenses/BSD-3-Clause). // Please also read “Additional BSD Notice” below. // -// Redistribution and use in source and binary forms, with or without modification, +// Redistribution and use in source and binary forms, with or without modification, // are permitted provided that the following conditions are met: -// i. Redistributions of source code must retain the above copyright notice, this list +// i. Redistributions of source code must retain the above copyright notice, this list // of conditions and the disclaimer below. -// ii. Redistributions in binary form must reproduce the above copyright notice, -// this list of conditions and the disclaimer (as noted below) in the documentation and/or +// ii. Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the disclaimer (as noted below) in the documentation and/or // other materials provided with the distribution. -// iii. Neither the name of the LLNS/LLNL nor the names of its contributors may be used to -// endorse or promote products derived from this software without specific prior written +// iii. Neither the name of the LLNS/LLNL nor the names of its contributors may be used to +// endorse or promote products derived from this software without specific prior written // permission. // -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES -// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT -// SHALL LAWRENCE LIVERMORE NATIONAL SECURITY, LLC, THE U.S. DEPARTMENT OF ENERGY OR -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS -// OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED -// AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT +// SHALL LAWRENCE LIVERMORE NATIONAL SECURITY, LLC, THE U.S. DEPARTMENT OF ENERGY OR +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +// OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED +// AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, // EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Additional BSD Notice -// 1. This notice is required to be provided under our contract with the U.S. Department -// of Energy (DOE). This work was produced at Lawrence Livermore National Laboratory under +// 1. This notice is required to be provided under our contract with the U.S. Department +// of Energy (DOE). This work was produced at Lawrence Livermore National Laboratory under // Contract No. DE-AC52-07NA27344 with the DOE. -// 2. Neither the United States Government nor Lawrence Livermore National Security, LLC -// nor any of their employees, makes any warranty, express or implied, or assumes any -// liability or responsibility for the accuracy, completeness, or usefulness of any +// 2. Neither the United States Government nor Lawrence Livermore National Security, LLC +// nor any of their employees, makes any warranty, express or implied, or assumes any +// liability or responsibility for the accuracy, completeness, or usefulness of any // information, apparatus, product, or process disclosed, or represents that its use would // not infringe privately-owned rights. -// 3. Also, reference herein to any specific commercial products, process, or services by -// trade name, trademark, manufacturer or otherwise does not necessarily constitute or -// imply its endorsement, recommendation, or favoring by the United States Government or -// Lawrence Livermore National Security, LLC. The views and opinions of authors expressed -// herein do not necessarily state or reflect those of the United States Government or -// Lawrence Livermore National Security, LLC, and shall not be used for advertising or +// 3. Also, reference herein to any specific commercial products, process, or services by +// trade name, trademark, manufacturer or otherwise does not necessarily constitute or +// imply its endorsement, recommendation, or favoring by the United States Government or +// Lawrence Livermore National Security, LLC. The views and opinions of authors expressed +// herein do not necessarily state or reflect those of the United States Government or +// Lawrence Livermore National Security, LLC, and shall not be used for advertising or // product endorsement purposes. /** @@ -52,7 +52,7 @@ * @author Asher Mancinelli , PNNL * @author Slaven Peles , PNNL * @author Cosmin G. Petra , LLNL - * + * */ #include #include @@ -68,7 +68,7 @@ #include #include #include -#endif // HIOP_USE_RAJA +#endif // HIOP_USE_RAJA #ifdef HIOP_USE_CUDA #include #include @@ -93,11 +93,11 @@ using namespace hiop; /** * @brief Method to create vector. - * + * * Creates legacy HiOp vector by default, RAJA vector when memory space * is specified. */ -hiopVector* LinearAlgebraFactory::create_vector(const ExecSpaceInfo& hi, //const std::string& mem_space, +hiopVector* LinearAlgebraFactory::create_vector(const ExecSpaceInfo& hi, // const std::string& mem_space, const size_type& glob_n, index_type* col_part, MPI_Comm comm) @@ -106,63 +106,83 @@ hiopVector* LinearAlgebraFactory::create_vector(const ExecSpaceInfo& hi, //const if(mem_space_upper == "DEFAULT") { return new hiopVectorPar(glob_n, col_part, comm); } else { - if(hi.exec_backend_ == "RAJA") { #ifdef HIOP_USE_RAJA if(hi.mem_backend_ == "UMPIRE") { #ifdef HIOP_USE_CUDA - return new hiop::hiopVectorRaja(glob_n, mem_space_upper, col_part, comm); -#endif + return new hiop::hiopVectorRaja(glob_n, + mem_space_upper, + col_part, + comm); +#endif #ifdef HIOP_USE_HIP - return new hiop::hiopVectorRaja(glob_n, mem_space_upper, col_part, comm); -#endif + return new hiop::hiopVectorRaja(glob_n, + mem_space_upper, + col_part, + comm); +#endif #if !defined(HIOP_USE_CUDA) && !defined(HIOP_USE_HIP) - return new hiop::hiopVectorRaja(glob_n, mem_space_upper, col_part, comm); + return new hiop::hiopVectorRaja(glob_n, + mem_space_upper, + col_part, + comm); #endif - + } else { // RAJA exec policy with non-Umpire memory backend - //work in progress + // work in progress assert(false && "work in progress"); if(hi.mem_backend_ == "cuda") { #ifdef HIOP_USE_CUDA assert(mem_space_upper == "DEVICE"); - return new hiop::hiopVectorRaja(glob_n, mem_space_upper, col_part, comm); -#endif + return new hiop::hiopVectorRaja(glob_n, + mem_space_upper, + col_part, + comm); +#endif } if(hi.mem_backend_ == "hip") { #ifdef HIOP_USE_HIP assert(mem_space_upper == "DEVICE"); - return new hiop::hiopVectorRaja(glob_n, mem_space_upper, col_part, comm); + return new hiop::hiopVectorRaja(glob_n, + mem_space_upper, + col_part, + comm); #endif - } else { + } else { #if !defined(HIOP_USE_CUDA) && !defined(HIOP_USE_HIP) assert(hi.mem_backend_ == "stdcpp" || hi.mem_backend_ == "auto"); - return new hiop::hiopVectorRaja(glob_n, mem_space_upper, col_part, comm); + return new hiop::hiopVectorRaja(glob_n, + mem_space_upper, + col_part, + comm); #endif } return nullptr; } -#else // non RAJA - assert(false && "requested execution space not available because Hiop was not" - "built with RAJA support"); -#endif // #ifdef HIOP_USE_RAJA - } else { //else for if(hi.exec_backend_ == "RAJA") +#else // non RAJA + assert(false && + "requested execution space not available because Hiop was not" + "built with RAJA support"); +#endif // #ifdef HIOP_USE_RAJA + } else { // else for if(hi.exec_backend_ == "RAJA") if(mem_space_upper == "CUDA") { #ifdef HIOP_USE_CUDA return new hiop::hiopVectorCuda(glob_n, col_part, comm); -#else //ifdef HIOP_USE_CUDA - assert(false && "requested memory space not available because Hiop was not" +#else // ifdef HIOP_USE_CUDA + assert(false && + "requested memory space not available because Hiop was not" "built with CUDA support"); -#endif //ifdef HIOP_USE_CUDA +#endif // ifdef HIOP_USE_CUDA } else { if(mem_space_upper == "HIP") { #ifdef HIOP_USE_HIP return new hiop::hiopVectorHip(glob_n, col_part, comm); -#else //ifdef HIOP_USE_HIP - assert(false && "requested memory space not available because HiOp was not" - "built with HIP support"); -#endif //ifdef HIOP_USE_HIP +#else // ifdef HIOP_USE_HIP + assert(false && + "requested memory space not available because HiOp was not" + "built with HIP support"); +#endif // ifdef HIOP_USE_HIP } else { assert(false && "to be implemented"); } @@ -175,47 +195,45 @@ hiopVector* LinearAlgebraFactory::create_vector(const ExecSpaceInfo& hi, //const /** * @brief Method to create local int vector. - * + * * Creates int vector with operator new by default, RAJA vector when memory space * is specified. */ -hiopVectorInt* LinearAlgebraFactory::create_vector_int(const ExecSpaceInfo& hi, - size_type n) +hiopVectorInt* LinearAlgebraFactory::create_vector_int(const ExecSpaceInfo& hi, size_type n) { const std::string ms = toupper(hi.mem_space_); if(ms == "DEFAULT") { return new hiopVectorIntSeq(n); } else { - if(hi.exec_backend_ == "RAJA") { #ifdef HIOP_USE_RAJA if(hi.mem_backend_ == "UMPIRE") { #ifdef HIOP_USE_CUDA return new hiop::hiopVectorIntRaja(n, ms); -#endif +#endif #ifdef HIOP_USE_HIP return new hiop::hiopVectorIntRaja(n, ms); -#endif +#endif #if !defined(HIOP_USE_CUDA) && !defined(HIOP_USE_HIP) return new hiop::hiopVectorIntRaja(n, ms); #endif - + } else { // RAJA exec policy with non-Umpire memory backend - //work in progress + // work in progress assert(false && "work in progress"); if(hi.mem_backend_ == "cuda") { #ifdef HIOP_USE_CUDA assert(ms == "DEVICE"); return new hiop::hiopVectorIntRaja(n, ms); -#endif +#endif } if(hi.mem_backend_ == "hip") { #ifdef HIOP_USE_HIP assert(ms == "DEVICE"); return new hiop::hiopVectorIntRaja(n, ms); #endif - } else { + } else { #if !defined(HIOP_USE_CUDA) && !defined(HIOP_USE_HIP) assert(hi.mem_backend_ == "stdcpp" || hi.mem_backend_ == "auto"); return new hiop::hiopVectorIntRaja(n, ms); @@ -224,25 +242,28 @@ hiopVectorInt* LinearAlgebraFactory::create_vector_int(const ExecSpaceInfo& hi, return nullptr; } #else // non RAJA - assert(false && "requested execution space not available because Hiop was not" - "built with RAJA support"); + assert(false && + "requested execution space not available because Hiop was not" + "built with RAJA support"); #endif - } else { //else for if(hi.exec_backend_ == "RAJA") + } else { // else for if(hi.exec_backend_ == "RAJA") if(ms == "CUDA") { #ifdef HIOP_USE_CUDA return new hiop::hiopVectorIntCuda(n, ms); -#else //ifdef HIOP_USE_CUDA - assert(false && "requested memory space not available because Hiop was not" +#else // ifdef HIOP_USE_CUDA + assert(false && + "requested memory space not available because Hiop was not" "built with CUDA support"); -#endif //ifdef HIOP_USE_CUDA +#endif // ifdef HIOP_USE_CUDA } else { if(ms == "HIP") { #ifdef HIOP_USE_HIP return new hiop::hiopVectorIntHip(n, ms); -#else //ifdef HIOP_USE_HIP - assert(false && "requested memory space not available because HiOp was not" - "built with HIP support"); -#endif //ifdef HIOP_USE_CUDA +#else // ifdef HIOP_USE_HIP + assert(false && + "requested memory space not available because HiOp was not" + "built with HIP support"); +#endif // ifdef HIOP_USE_CUDA } else { assert(false && "to be implemented"); } @@ -255,11 +276,11 @@ hiopVectorInt* LinearAlgebraFactory::create_vector_int(const ExecSpaceInfo& hi, /** * @brief Method to create matrix. - * + * * Creates legacy HiOp dense matrix by default, RAJA vector when memory space * is specified. */ -hiopMatrixDense* LinearAlgebraFactory::create_matrix_dense(const ExecSpaceInfo& hi, //const std::string& mem_space, +hiopMatrixDense* LinearAlgebraFactory::create_matrix_dense(const ExecSpaceInfo& hi, // const std::string& mem_space, const size_type& m, const size_type& glob_n, index_type* col_part, @@ -287,18 +308,16 @@ hiopMatrixDense* LinearAlgebraFactory::create_matrix_dense(const ExecSpaceInfo& col_part, comm, m_max_alloc); -#else // this is for #if !defined(HIOP_USE_CUDA) && !defined(HIOP_USE_HIP) +#else // this is for #if !defined(HIOP_USE_CUDA) && !defined(HIOP_USE_HIP) return new hiop::hiopMatrixDenseRaja(m, glob_n, mem_space_upper, col_part, comm, m_max_alloc); -#endif //HIOP_USE_CUDA - +#endif // HIOP_USE_CUDA - - } else { // if(hi.mem_backend_ == "UMPIRE") + } else { // if(hi.mem_backend_ == "UMPIRE") // RAJA exec policy but memory backend not based on Umpire assert(false && "work in progress"); if(hi.mem_backend_ == "CUDA") { @@ -312,8 +331,8 @@ hiopMatrixDense* LinearAlgebraFactory::create_matrix_dense(const ExecSpaceInfo& m_max_alloc); #else assert(false && "cuda memory backend not available because HiOp was not built with CUDA"); -#endif //HIOP_USE_CUDA - +#endif // HIOP_USE_CUDA + } else if(hi.mem_backend_ == "HIP") { #if defined(HIOP_USE_HIP) assert(mem_space_upper == "DEVICE"); @@ -326,11 +345,11 @@ hiopMatrixDense* LinearAlgebraFactory::create_matrix_dense(const ExecSpaceInfo& #else assert(false && "hip memory backend not available because HiOp was not built with HIP"); return nullptr; -#endif //HIOP_USE_HIP - +#endif // HIOP_USE_HIP + } else { - //RAJA-OMP exec policy with non-Umpire memory backend - + // RAJA-OMP exec policy with non-Umpire memory backend + assert(false && "work in progress"); assert(mem_space_upper == "host"); @@ -343,36 +362,36 @@ hiopMatrixDense* LinearAlgebraFactory::create_matrix_dense(const ExecSpaceInfo& m_max_alloc); #else assert(false && "cuda memory backend not available because HiOp was not built with RAJA-OMP"); -#endif //!defined(HIOP_USE_CUDA) && !defined(HIOP_USE_HIP) - +#endif //! defined(HIOP_USE_CUDA) && !defined(HIOP_USE_HIP) } - } // end of else if(hi.mem_backend_ == "UMPIRE") -#else // else of ifdef HIOP_USE_RAJA - assert(false && "requested memory space not available because Hiop was not" + } // end of else if(hi.mem_backend_ == "UMPIRE") +#else // else of ifdef HIOP_USE_RAJA + assert(false && + "requested memory space not available because Hiop was not" "built with RAJA support"); return nullptr; -#endif //HIOP_USE_RAJA - } else {// for if(hi.exec_backend_ == "RAJA") +#endif // HIOP_USE_RAJA + } else { // for if(hi.exec_backend_ == "RAJA") assert(false && "device memory backend not available because HiOp was not built with RAJA"); if(mem_space_upper == "CUDA") { #ifdef HIOP_USE_CUDA - assert(mem_space_upper == "DEVICE"); - return new hiop::hiopMatrixDenseRaja(m, - glob_n, - mem_space_upper, - col_part, - comm, - m_max_alloc); + assert(mem_space_upper == "DEVICE"); + return new hiop::hiopMatrixDenseRaja(m, + glob_n, + mem_space_upper, + col_part, + comm, + m_max_alloc); #else - assert(false && "requested memory space not available because Hiop was not built with CUDA"); -#endif //HIOP_USE_CUDA + assert(false && "requested memory space not available because Hiop was not built with CUDA"); +#endif // HIOP_USE_CUDA } else if(mem_space_upper == "HIP") { assert(false && "to be implemented"); } else { assert(false && "to be implemented"); } - } // end of else for if(hi.exec_backend_ == "RAJA") - } // end of else if(mem_space_upper == "DEFAULT") + } // end of else for if(hi.exec_backend_ == "RAJA") + } // end of else if(mem_space_upper == "DEFAULT") assert(false && "should not reach here"); return nullptr; } @@ -381,7 +400,7 @@ hiopMatrixDense* LinearAlgebraFactory::create_matrix_dense(const ExecSpaceInfo& * @brief Creates an instance of a sparse matrix of the appropriate implementation * depending on the build. */ -hiopMatrixSparse* LinearAlgebraFactory::create_matrix_sparse(const ExecSpaceInfo& hi, +hiopMatrixSparse* LinearAlgebraFactory::create_matrix_sparse(const ExecSpaceInfo& hi, size_type rows, size_type cols, size_type nnz) @@ -394,60 +413,78 @@ hiopMatrixSparse* LinearAlgebraFactory::create_matrix_sparse(const ExecSpaceInfo #ifdef HIOP_USE_RAJA if(hi.mem_backend_ == "UMPIRE") { #if defined(HIOP_USE_CUDA) - return new hiopMatrixRajaSparseTriplet(rows, cols, nnz, mem_space_upper); + return new hiopMatrixRajaSparseTriplet(rows, + cols, + nnz, + mem_space_upper); #elif defined(HIOP_USE_HIP) - return new hiopMatrixRajaSparseTriplet(rows, cols, nnz, mem_space_upper); -#else // this is for #if !defined(HIOP_USE_CUDA) && !defined(HIOP_USE_HIP) - return new hiopMatrixRajaSparseTriplet(rows, cols, nnz, mem_space_upper); -#endif //HIOP_USE_CUDA - } else { // if(hi.mem_backend_ == "UMPIRE") + return new hiopMatrixRajaSparseTriplet(rows, + cols, + nnz, + mem_space_upper); +#else // this is for #if !defined(HIOP_USE_CUDA) && !defined(HIOP_USE_HIP) + return new hiopMatrixRajaSparseTriplet(rows, + cols, + nnz, + mem_space_upper); +#endif // HIOP_USE_CUDA + } else { // if(hi.mem_backend_ == "UMPIRE") // RAJA exec policy but memory backend not based on Umpire assert(false && "work in progress"); if(hi.mem_backend_ == "CUDA") { #ifdef HIOP_USE_CUDA assert(mem_space_upper == "DEVICE"); - return new hiopMatrixRajaSparseTriplet(rows, cols, nnz, mem_space_upper); + return new hiopMatrixRajaSparseTriplet(rows, + cols, + nnz, + mem_space_upper); #else assert(false && "cuda memory backend not available because HiOp was not built with CUDA"); return nullptr; -#endif //HIOP_USE_CUDA +#endif // HIOP_USE_CUDA } else if(hi.mem_backend_ == "HIP") { #if defined(HIOP_USE_HIP) assert(mem_space_upper == "DEVICE"); - return new hiopMatrixRajaSparseTriplet(rows, cols, nnz, mem_space_upper); + return new hiopMatrixRajaSparseTriplet(rows, + cols, + nnz, + mem_space_upper); #else assert(false && "hip memory backend not available because HiOp was not built with HIP"); return nullptr; -#endif //HIOP_USE_HIP +#endif // HIOP_USE_HIP } else { + // RAJA-OMP exec policy with non-Umpire memory backend - //RAJA-OMP exec policy with non-Umpire memory backend - assert(false && "work in progress"); assert(mem_space_upper == "host"); #if !defined(HIOP_USE_CUDA) && !defined(HIOP_USE_HIP) - return new hiopMatrixRajaSparseTriplet(rows, cols, nnz, mem_space_upper); + return new hiopMatrixRajaSparseTriplet(rows, + cols, + nnz, + mem_space_upper); #else return new hiopMatrixSparseTriplet(rows, cols, nnz); -#endif //!defined(HIOP_USE_CUDA) && !defined(HIOP_USE_HIP) +#endif //! defined(HIOP_USE_CUDA) && !defined(HIOP_USE_HIP) } - } // end of else if(hi.mem_backend_ == "UMPIRE") -#else - assert(false && "requested memory space not available because Hiop was not" - "built with RAJA support"); - return nullptr; -#endif //HIOP_USE_RAJA - } else { // for if(hi.exec_backend_ == "RAJA") + } // end of else if(hi.mem_backend_ == "UMPIRE") +#else + assert(false && + "requested memory space not available because Hiop was not" + "built with RAJA support"); + return nullptr; +#endif // HIOP_USE_RAJA + } else { // for if(hi.exec_backend_ == "RAJA") if(mem_space_upper == "CUDA") { #ifdef HIOP_USE_CUDA - assert(mem_space_upper == "DEVICE"); - assert(false && "not supported yet"); - return nullptr; + assert(mem_space_upper == "DEVICE"); + assert(false && "not supported yet"); + return nullptr; #else - assert(false && "requested memory space not available because Hiop was not built with CUDA"); - return nullptr; -#endif //HIOP_USE_CUDA + assert(false && "requested memory space not available because Hiop was not built with CUDA"); + return nullptr; +#endif // HIOP_USE_CUDA } else if(mem_space_upper == "HIP") { assert(false && "to be implemented"); return nullptr; @@ -455,8 +492,8 @@ hiopMatrixSparse* LinearAlgebraFactory::create_matrix_sparse(const ExecSpaceInfo assert(false && "to be implemented"); return nullptr; } - } // end of else for if(hi.exec_backend_ == "RAJA") - } // end of else if(mem_space_upper == "DEFAULT") + } // end of else for if(hi.exec_backend_ == "RAJA") + } // end of else if(mem_space_upper == "DEFAULT") assert(false && "should not reach here"); return nullptr; } @@ -476,11 +513,11 @@ hiopMatrixSparseCSR* LinearAlgebraFactory::create_matrix_sparse_csr(const std::s assert(false && "should not reach here"); return nullptr; } - -hiopMatrixSparseCSR* LinearAlgebraFactory::create_matrix_sparse_csr(const std::string& mem_space, - size_type rows, - size_type cols, - size_type nnz) + +hiopMatrixSparseCSR* LinearAlgebraFactory::create_matrix_sparse_csr(const std::string& mem_space, + size_type rows, + size_type cols, + size_type nnz) { const std::string mem_space_upper = toupper(mem_space); if(mem_space_upper == "DEFAULT") { @@ -500,9 +537,7 @@ hiopMatrixSparseCSR* LinearAlgebraFactory::create_matrix_sparse_csr(const std:: * @brief Creates an instance of a symmetric sparse matrix of the appropriate * implementation depending on the build. */ -hiopMatrixSparse* LinearAlgebraFactory::create_matrix_sym_sparse(const ExecSpaceInfo& hi, - size_type size, - size_type nnz) +hiopMatrixSparse* LinearAlgebraFactory::create_matrix_sym_sparse(const ExecSpaceInfo& hi, size_type size, size_type nnz) { const std::string mem_space_upper = toupper(hi.mem_space_); if(mem_space_upper == "DEFAULT") { @@ -512,60 +547,72 @@ hiopMatrixSparse* LinearAlgebraFactory::create_matrix_sym_sparse(const ExecSpace #ifdef HIOP_USE_RAJA if(hi.mem_backend_ == "UMPIRE") { #if defined(HIOP_USE_CUDA) - return new hiopMatrixRajaSymSparseTriplet(size, nnz, mem_space_upper); + return new hiopMatrixRajaSymSparseTriplet(size, + nnz, + mem_space_upper); #elif defined(HIOP_USE_HIP) - return new hiopMatrixRajaSymSparseTriplet(size, nnz, mem_space_upper); -#else // this is for #if !defined(HIOP_USE_CUDA) && !defined(HIOP_USE_HIP) - return new hiopMatrixRajaSymSparseTriplet(size, nnz, mem_space_upper); -#endif //HIOP_USE_CUDA - } else { // if(hi.mem_backend_ == "UMPIRE") + return new hiopMatrixRajaSymSparseTriplet(size, + nnz, + mem_space_upper); +#else // this is for #if !defined(HIOP_USE_CUDA) && !defined(HIOP_USE_HIP) + return new hiopMatrixRajaSymSparseTriplet(size, + nnz, + mem_space_upper); +#endif // HIOP_USE_CUDA + } else { // if(hi.mem_backend_ == "UMPIRE") // RAJA exec policy but memory backend not based on Umpire assert(false && "work in progress"); if(hi.mem_backend_ == "CUDA") { #ifdef HIOP_USE_CUDA assert(mem_space_upper == "DEVICE"); - return new hiopMatrixRajaSymSparseTriplet(size, nnz, mem_space_upper); + return new hiopMatrixRajaSymSparseTriplet(size, + nnz, + mem_space_upper); #else assert(false && "cuda memory backend not available because HiOp was not built with CUDA"); return nullptr; -#endif //HIOP_USE_CUDA +#endif // HIOP_USE_CUDA } else if(hi.mem_backend_ == "HIP") { #if defined(HIOP_USE_HIP) assert(mem_space_upper == "DEVICE"); - return new hiopMatrixRajaSymSparseTriplet(size, nnz, mem_space_upper); + return new hiopMatrixRajaSymSparseTriplet(size, + nnz, + mem_space_upper); #else assert(false && "hip memory backend not available because HiOp was not built with HIP"); return nullptr; -#endif //HIOP_USE_HIP +#endif // HIOP_USE_HIP } else { + // RAJA-OMP exec policy with non-Umpire memory backend - //RAJA-OMP exec policy with non-Umpire memory backend - assert(false && "work in progress"); assert(mem_space_upper == "host"); #if !defined(HIOP_USE_CUDA) && !defined(HIOP_USE_HIP) - return new hiopMatrixRajaSymSparseTriplet(size, nnz, mem_space_upper); + return new hiopMatrixRajaSymSparseTriplet(size, + nnz, + mem_space_upper); #else return new hiopMatrixSymSparseTriplet(size, nnz); -#endif //!defined(HIOP_USE_CUDA) && !defined(HIOP_USE_HIP) +#endif //! defined(HIOP_USE_CUDA) && !defined(HIOP_USE_HIP) } - } // end of else if(hi.mem_backend_ == "UMPIRE") -#else - assert(false && "requested memory space not available because Hiop was not" - "built with RAJA support"); - return nullptr; -#endif //HIOP_USE_RAJA - } else { // for if(hi.exec_backend_ == "RAJA") + } // end of else if(hi.mem_backend_ == "UMPIRE") +#else + assert(false && + "requested memory space not available because Hiop was not" + "built with RAJA support"); + return nullptr; +#endif // HIOP_USE_RAJA + } else { // for if(hi.exec_backend_ == "RAJA") if(mem_space_upper == "CUDA") { #ifdef HIOP_USE_CUDA - assert(mem_space_upper == "DEVICE"); - assert(false && "not supported yet"); - return nullptr; + assert(mem_space_upper == "DEVICE"); + assert(false && "not supported yet"); + return nullptr; #else - assert(false && "requested memory space not available because Hiop was not built with CUDA"); - return nullptr; -#endif //HIOP_USE_CUDA + assert(false && "requested memory space not available because Hiop was not built with CUDA"); + return nullptr; +#endif // HIOP_USE_CUDA } else if(mem_space_upper == "HIP") { assert(false && "to be implemented"); return nullptr; @@ -573,8 +620,8 @@ hiopMatrixSparse* LinearAlgebraFactory::create_matrix_sym_sparse(const ExecSpace assert(false && "to be implemented"); return nullptr; } - } // end of else for if(hi.exec_backend_ == "RAJA") - } // end of else if(mem_space_upper == "DEFAULT") + } // end of else for if(hi.exec_backend_ == "RAJA") + } // end of else if(mem_space_upper == "DEFAULT") assert(false && "should not reach here"); return nullptr; } @@ -588,13 +635,13 @@ double* LinearAlgebraFactory::create_raw_array(const std::string& mem_space, siz if(mem_space_upper == "DEFAULT") { return new double[n]; } else { - #ifdef HIOP_USE_RAJA auto& resmgr = umpire::ResourceManager::getInstance(); - umpire::Allocator al = resmgr.getAllocator(mem_space_upper); - return static_cast(al.allocate(n*sizeof(double))); + umpire::Allocator al = resmgr.getAllocator(mem_space_upper); + return static_cast(al.allocate(n * sizeof(double))); #else - assert(false && "requested memory space not available because Hiop was not" + assert(false && + "requested memory space not available because Hiop was not" "built with RAJA support"); #endif } @@ -609,11 +656,11 @@ void LinearAlgebraFactory::delete_raw_array(const std::string& mem_space, double { const std::string mem_space_upper = toupper(mem_space); if(mem_space_upper == "DEFAULT") { - delete [] a; + delete[] a; } else { #ifdef HIOP_USE_RAJA auto& resmgr = umpire::ResourceManager::getInstance(); - umpire::Allocator al = resmgr.getAllocator(mem_space_upper); + umpire::Allocator al = resmgr.getAllocator(mem_space_upper); al.deallocate(a); #endif } diff --git a/src/LinAlg/LinAlgFactory.hpp b/src/LinAlg/LinAlgFactory.hpp index e721b7e51..64137f7fe 100644 --- a/src/LinAlg/LinAlgFactory.hpp +++ b/src/LinAlg/LinAlgFactory.hpp @@ -3,47 +3,47 @@ // Written by Cosmin G. Petra, petra1@llnl.gov. // LLNL-CODE-742473. All rights reserved. // -// This file is part of HiOp. For details, see https://github.com/LLNL/hiop. HiOp -// is released under the BSD 3-clause license (https://opensource.org/licenses/BSD-3-Clause). +// This file is part of HiOp. For details, see https://github.com/LLNL/hiop. HiOp +// is released under the BSD 3-clause license (https://opensource.org/licenses/BSD-3-Clause). // Please also read “Additional BSD Notice” below. // -// Redistribution and use in source and binary forms, with or without modification, +// Redistribution and use in source and binary forms, with or without modification, // are permitted provided that the following conditions are met: -// i. Redistributions of source code must retain the above copyright notice, this list +// i. Redistributions of source code must retain the above copyright notice, this list // of conditions and the disclaimer below. -// ii. Redistributions in binary form must reproduce the above copyright notice, -// this list of conditions and the disclaimer (as noted below) in the documentation and/or +// ii. Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the disclaimer (as noted below) in the documentation and/or // other materials provided with the distribution. -// iii. Neither the name of the LLNS/LLNL nor the names of its contributors may be used to -// endorse or promote products derived from this software without specific prior written +// iii. Neither the name of the LLNS/LLNL nor the names of its contributors may be used to +// endorse or promote products derived from this software without specific prior written // permission. // -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES -// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT -// SHALL LAWRENCE LIVERMORE NATIONAL SECURITY, LLC, THE U.S. DEPARTMENT OF ENERGY OR -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS -// OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED -// AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT +// SHALL LAWRENCE LIVERMORE NATIONAL SECURITY, LLC, THE U.S. DEPARTMENT OF ENERGY OR +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +// OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED +// AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, // EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Additional BSD Notice -// 1. This notice is required to be provided under our contract with the U.S. Department -// of Energy (DOE). This work was produced at Lawrence Livermore National Laboratory under +// 1. This notice is required to be provided under our contract with the U.S. Department +// of Energy (DOE). This work was produced at Lawrence Livermore National Laboratory under // Contract No. DE-AC52-07NA27344 with the DOE. -// 2. Neither the United States Government nor Lawrence Livermore National Security, LLC -// nor any of their employees, makes any warranty, express or implied, or assumes any -// liability or responsibility for the accuracy, completeness, or usefulness of any +// 2. Neither the United States Government nor Lawrence Livermore National Security, LLC +// nor any of their employees, makes any warranty, express or implied, or assumes any +// liability or responsibility for the accuracy, completeness, or usefulness of any // information, apparatus, product, or process disclosed, or represents that its use would // not infringe privately-owned rights. -// 3. Also, reference herein to any specific commercial products, process, or services by -// trade name, trademark, manufacturer or otherwise does not necessarily constitute or -// imply its endorsement, recommendation, or favoring by the United States Government or -// Lawrence Livermore National Security, LLC. The views and opinions of authors expressed -// herein do not necessarily state or reflect those of the United States Government or -// Lawrence Livermore National Security, LLC, and shall not be used for advertising or +// 3. Also, reference herein to any specific commercial products, process, or services by +// trade name, trademark, manufacturer or otherwise does not necessarily constitute or +// imply its endorsement, recommendation, or favoring by the United States Government or +// Lawrence Livermore National Security, LLC. The views and opinions of authors expressed +// herein do not necessarily state or reflect those of the United States Government or +// Lawrence Livermore National Security, LLC, and shall not be used for advertising or // product endorsement purposes. #pragma once @@ -57,17 +57,17 @@ #include #include -namespace hiop { +namespace hiop +{ - /** * @brief Factory for HiOp's linear algebra objects - * + * */ class LinearAlgebraFactory { public: - LinearAlgebraFactory() = delete; + LinearAlgebraFactory() = delete; ~LinearAlgebraFactory() = delete; /** @@ -80,12 +80,11 @@ class LinearAlgebraFactory /** * @brief Static method to create local int vector. */ - static hiopVectorInt* create_vector_int(const ExecSpaceInfo& hi, - size_type size); + static hiopVectorInt* create_vector_int(const ExecSpaceInfo& hi, size_type size); /** * @brief Static method to create a dense matrix. - * + * */ static hiopMatrixDense* create_matrix_dense(const ExecSpaceInfo& hi, const size_type& m, @@ -96,17 +95,14 @@ class LinearAlgebraFactory /** * @brief Static method to create the default, triplet sparse matrix */ - static hiopMatrixSparse* create_matrix_sparse(const ExecSpaceInfo& hi, - size_type rows, - size_type cols, - size_type nnz); + static hiopMatrixSparse* create_matrix_sparse(const ExecSpaceInfo& hi, size_type rows, size_type cols, size_type nnz); /** * @brief Static method to create an empty CSR sparse matrix of the type that supports the * memory space passed as argument. */ static hiopMatrixSparseCSR* create_matrix_sparse_csr(const std::string& mem_space); - + /** * @brief Static method to create a CSR sparse matrix of the type that supports the * memory space passed as argument. @@ -119,10 +115,8 @@ class LinearAlgebraFactory /** * @brief Static method to create a symmetric sparse matrix */ - static hiopMatrixSparse* create_matrix_sym_sparse(const ExecSpaceInfo& hi, - size_type size, - size_type nnz); - + static hiopMatrixSparse* create_matrix_sym_sparse(const ExecSpaceInfo& hi, size_type size, size_type nnz); + /** * @brief Static method to create a raw C array */ @@ -134,4 +128,4 @@ class LinearAlgebraFactory static void delete_raw_array(const std::string& mem_space, double* a); }; -} // namespace hiop +} // namespace hiop diff --git a/src/LinAlg/MatrixSparseCsrCudaKernels.hpp b/src/LinAlg/MatrixSparseCsrCudaKernels.hpp index e6da3d880..a39d708ea 100644 --- a/src/LinAlg/MatrixSparseCsrCudaKernels.hpp +++ b/src/LinAlg/MatrixSparseCsrCudaKernels.hpp @@ -64,44 +64,32 @@ namespace cuda /** * Set diagonal of the CSR matrix to `val` by performing a binary search on the column indexes * for each row. Assumes pointers are on the device and parallelizes over rows. - * + * * @pre CSR matrix must be square. * @pre Diagonal entries must appear explicitly among the nonzeros. * @pre Column indexes must be sorted for any given row. - */ -void csr_set_diag_kernel(int n, - int nnz, - int* irowptr, - int* jcoldind, - double* values, - double val, - int block_size); + */ +void csr_set_diag_kernel(int n, int nnz, int* irowptr, int* jcoldind, double* values, double val, int block_size); /** * Add the constant `val` to the diagonal of the CSR matrix. Performs a binary search on the column indexes * for each row. Assumes pointers are on the device and parallelizes over rows. - * + * * @pre CSR matrix must be square. * @pre Diagonal entries must appear explicitly among the nonzeros. * @pre Column indexes must be sorted for any given row. - */ -void csr_add_diag_kernel(int n, - int nnz, - int* irowptr, - int* jcoldind, - double* values, - double Dval, - int block_size); + */ +void csr_add_diag_kernel(int n, int nnz, int* irowptr, int* jcoldind, double* values, double Dval, int block_size); /** * Add entries of the array `values` to the diagonal of the CSR matrix. Performs a binary search on the column indexes * for each row. Assumes pointers are on the device and parallelizes over rows. - * + * * @pre CSR matrix must be square. * @pre Diagonal entries must appear explicitly among the nonzeros. * @pre Column indexes must be sorted for any given row. - * @pre - */ + * @pre + */ void csr_add_diag_kernel(int n, int nnz, int* irowptr, @@ -114,7 +102,7 @@ void csr_add_diag_kernel(int n, /** * Copies the diagonal of a CSR matrix into the array `diag_out`. All pointers are on the device. The * output array should be allocated to hold `n` doubles. - * + * * @pre CSR matrix must be square. * @pre Column indexes must be sorted for any given row. */ @@ -133,8 +121,8 @@ void csr_form_diag_symbolic_kernel(int n, int* irowptr, int* jcolind, int block_ /** * Scales rows of the sparse CSR matrix with the diagonal matrix given by array `D` - * - * @pre All pointers should be on the device. + * + * @pre All pointers should be on the device. * @pre Column indexes must be sorted for any given row. */ void csr_scalerows_kernel(int nrows, @@ -145,7 +133,7 @@ void csr_scalerows_kernel(int nrows, double* values, const double* D, int block_size); -} //end of namespace cuda -} //end of namespace hiop +} // end of namespace cuda +} // end of namespace hiop #endif diff --git a/src/LinAlg/ReSolve/IterativeRefinement.cpp b/src/LinAlg/ReSolve/IterativeRefinement.cpp index 719e1045e..f7e531cd3 100644 --- a/src/LinAlg/ReSolve/IterativeRefinement.cpp +++ b/src/LinAlg/ReSolve/IterativeRefinement.cpp @@ -67,729 +67,631 @@ #define checkCudaErrors(val) resolveCheckCudaError((val), __FILE__, __LINE__) -namespace ReSolve { +namespace ReSolve +{ - // Default constructor - IterativeRefinement::IterativeRefinement() - {} +// Default constructor +IterativeRefinement::IterativeRefinement() {} - // Parametrized constructor - IterativeRefinement::IterativeRefinement(int restart, - double tol, - int maxit) - : restart_{restart}, +// Parametrized constructor +IterativeRefinement::IterativeRefinement(int restart, double tol, int maxit) + : restart_{restart}, maxit_{maxit}, tol_{tol} - {} - - IterativeRefinement::~IterativeRefinement() - { - cusparseDestroySpMat(mat_A_); - // free GPU variables that belong to this class and are not shared with CUSOLVER class - cudaFree(mv_buffer_); - cudaFree(d_V_); - cudaFree(d_Z_); - cudaFree(d_rvGPU_); - cudaFree(d_Hcolumn_); - - if(orth_option_ == "cgs2") { - cudaFree(d_H_col_); - } - // delete all CPU GMRES variables - delete[] h_H_; +{} + +IterativeRefinement::~IterativeRefinement() +{ + cusparseDestroySpMat(mat_A_); + // free GPU variables that belong to this class and are not shared with CUSOLVER class + cudaFree(mv_buffer_); + cudaFree(d_V_); + cudaFree(d_Z_); + cudaFree(d_rvGPU_); + cudaFree(d_Hcolumn_); + + if(orth_option_ == "cgs2") { + cudaFree(d_H_col_); + } + // delete all CPU GMRES variables + delete[] h_H_; - if(orth_option_ == "mgs_two_synch" || orth_option_ == "mgs_pm") { - delete[] h_L_; - delete[] h_rv_; - } - delete[] h_c_; - delete[] h_s_; - delete[] h_rs_; + if(orth_option_ == "mgs_two_synch" || orth_option_ == "mgs_pm") { + delete[] h_L_; + delete[] h_rv_; + } + delete[] h_c_; + delete[] h_s_; + delete[] h_rs_; - if(orth_option_ == "mgs_pm" || orth_option_ == "cgs2") { - delete[] h_aux_; - } + if(orth_option_ == "mgs_pm" || orth_option_ == "cgs2") { + delete[] h_aux_; + } +} + +int IterativeRefinement::setup_system_matrix(int n, int nnz, int* dia, int* dja, double* da) +{ + dia_ = dia; + dja_ = dja; + da_ = da; + n_ = n; + nnz_ = nnz; + checkCudaErrors(cusparseCreateCsr(&mat_A_, + n, + n, + nnz, + dia_, + dja_, + da_, + CUSPARSE_INDEX_32I, + CUSPARSE_INDEX_32I, + CUSPARSE_INDEX_BASE_ZERO, + CUDA_R_64F)); + return 0; +} + +int IterativeRefinement::setup(cusparseHandle_t cusparse_handle, + cublasHandle_t cublas_handle, + cusolverRfHandle_t cusolverrf_handle, + int n, + double* d_T, + int* d_P, + int* d_Q, + double* devx, + double* devr) +{ + cusparse_handle_ = cusparse_handle; + cublas_handle_ = cublas_handle; + cusolverrf_handle_ = cusolverrf_handle; + assert(n_ == n && "Size of the linear system incorrectly set in the iterative refinement class!\n"); + + // only set pointers + d_T_ = d_T; + d_P_ = d_P; + d_Q_ = d_Q; + + // setup matvec + + cusparseCreateDnVec(&vec_x_, n_, devx, CUDA_R_64F); + cusparseCreateDnVec(&vec_Ax_, n_, devr, CUDA_R_64F); + size_t buffer_size; + checkCudaErrors(cusparseSpMV_bufferSize(cusparse_handle_, + CUSPARSE_OPERATION_NON_TRANSPOSE, + &(minusone_), + mat_A_, + vec_x_, + &(one_), + vec_Ax_, + CUDA_R_64F, + CUSPARSE_SPMV_CSR_ALG2, + &buffer_size)); + + cudaDeviceSynchronize(); + checkCudaErrors(cudaMalloc(&mv_buffer_, buffer_size)); + + // allocate space for the GPU + + checkCudaErrors(cudaMalloc(&(d_V_), n_ * (restart_ + 1) * sizeof(double))); + checkCudaErrors(cudaMalloc(&(d_Z_), n_ * (restart_ + 1) * sizeof(double))); + checkCudaErrors(cudaMalloc(&(d_rvGPU_), 2 * (restart_ + 1) * sizeof(double))); + checkCudaErrors(cudaMalloc(&(d_Hcolumn_), 2 * (restart_ + 1) * (restart_ + 1) * sizeof(double))); + + // and for the CPU + + h_H_ = new double[restart_ * (restart_ + 1)]; + h_c_ = new double[restart_]; // needed for givens + h_s_ = new double[restart_]; // same + h_rs_ = new double[restart_ + 1]; // for residual norm history + + // for specific orthogonalization options, need a little more memory + if(orth_option_ == "mgs_two_synch" || orth_option_ == "mgs_pm") { + h_L_ = new double[restart_ * (restart_ + 1)]; + h_rv_ = new double[restart_ + 1]; } - int IterativeRefinement::setup_system_matrix(int n, int nnz, int* dia, int* dja, double* da) - { - dia_ = dia; - dja_ = dja; - da_ = da; - n_ = n; - nnz_ = nnz; - checkCudaErrors(cusparseCreateCsr(&mat_A_, - n, - n, - nnz, - dia_, - dja_, - da_, - CUSPARSE_INDEX_32I, - CUSPARSE_INDEX_32I, - CUSPARSE_INDEX_BASE_ZERO, - CUDA_R_64F)); - return 0; + if(orth_option_ == "cgs2") { + h_aux_ = new double[restart_ + 1]; + checkCudaErrors(cudaMalloc(&(d_H_col_), (restart_ + 1) * sizeof(double))); } - int IterativeRefinement::setup(cusparseHandle_t cusparse_handle, - cublasHandle_t cublas_handle, - cusolverRfHandle_t cusolverrf_handle, - int n, - double* d_T, - int* d_P, - int* d_Q, - double* devx, - double* devr) - { - cusparse_handle_ = cusparse_handle; - cublas_handle_ = cublas_handle; - cusolverrf_handle_ = cusolverrf_handle; - assert(n_ == n && "Size of the linear system incorrectly set in the iterative refinement class!\n"); - - // only set pointers - d_T_ = d_T; - d_P_ = d_P; - d_Q_ = d_Q; - - // setup matvec - - cusparseCreateDnVec(&vec_x_, n_, devx, CUDA_R_64F); - cusparseCreateDnVec(&vec_Ax_, n_, devr, CUDA_R_64F); - size_t buffer_size; - checkCudaErrors(cusparseSpMV_bufferSize(cusparse_handle_, - CUSPARSE_OPERATION_NON_TRANSPOSE, - &(minusone_), - mat_A_, - vec_x_, - &(one_), - vec_Ax_, - CUDA_R_64F, - CUSPARSE_SPMV_CSR_ALG2, - &buffer_size)); - - cudaDeviceSynchronize(); - checkCudaErrors(cudaMalloc(&mv_buffer_, buffer_size)); - - // allocate space for the GPU - - checkCudaErrors(cudaMalloc(&(d_V_), n_ * (restart_ + 1) * sizeof(double))); - checkCudaErrors(cudaMalloc(&(d_Z_), n_ * (restart_ + 1) * sizeof(double))); - checkCudaErrors(cudaMalloc(&(d_rvGPU_), 2 * (restart_ + 1) * sizeof(double))); - checkCudaErrors(cudaMalloc(&(d_Hcolumn_), 2 * (restart_ + 1) * (restart_ + 1) * sizeof(double))); - - // and for the CPU - - h_H_ = new double[restart_ * (restart_ + 1)]; - h_c_ = new double[restart_]; // needed for givens - h_s_ = new double[restart_]; // same - h_rs_ = new double[restart_ + 1]; // for residual norm history - - // for specific orthogonalization options, need a little more memory - if(orth_option_ == "mgs_two_synch" || orth_option_ == "mgs_pm") { - h_L_ = new double[restart_ * (restart_ + 1)]; - h_rv_ = new double[restart_ + 1]; + if(orth_option_ == "mgs_pm") { + h_aux_ = new double[restart_ + 1]; + } + return 0; +} + +double IterativeRefinement::getFinalResidalNorm() { return final_residual_norm_; } + +double IterativeRefinement::getInitialResidalNorm() { return initial_residual_norm_; } + +double IterativeRefinement::getBNorm() { return bnorm_; } + +int IterativeRefinement::getFinalNumberOfIterations() { return fgmres_iters_; } + +double IterativeRefinement::matrixAInfNrm() +{ + double nrm; + matrix_row_sums(n_, nnz_, dia_, da_, d_Z_); + cusolverSpDnrminf(cusolver_handle_, n_, d_Z_, &nrm, mv_buffer_ /* at least 8192 bytes */); + return nrm; +} + +double IterativeRefinement::vectorInfNrm(int n, double* d_v) +{ + double nrm; + + cusolverSpDnrminf(cusolver_handle_, n, d_v, &nrm, mv_buffer_ /* at least 8192 bytes */); + return nrm; +} + +void IterativeRefinement::fgmres(double* d_x, double* d_b) +{ + int outer_flag = 1; + int notconv = 1; + int i = 0; + int it = 0; + int j; + int k; + int k1; + + double t; + double rnorm; + double bnorm; + // double rnorm_aux; + double tolrel; + // V[0] = b-A*x_0 + cudaMemcpy(&(d_V_[0]), d_b, sizeof(double) * n_, cudaMemcpyDeviceToDevice); + + cudaMatvec(d_x, d_V_, "residual"); + + rnorm = 0.0; + cublasDdot(cublas_handle_, n_, d_b, 1, d_b, 1, &bnorm); + cublasDdot(cublas_handle_, n_, d_V_, 1, d_V_, 1, &rnorm); + // rnorm = ||V_1|| + rnorm = sqrt(rnorm); + bnorm = sqrt(bnorm); + bnorm_ = bnorm; + while(outer_flag) { + // check if maybe residual is already small enough? + if(it == 0) { + tolrel = tol_ * rnorm; + if(fabs(tolrel) < 1e-16) { + tolrel = 1e-16; + } } - - if(orth_option_ == "cgs2") { - h_aux_ = new double[restart_ + 1]; - checkCudaErrors(cudaMalloc(&(d_H_col_), (restart_ + 1) * sizeof(double))); + int exit_cond = 0; + if(conv_cond() == 0) { + exit_cond = ((fabs(rnorm - ZERO) <= EPSILON)); + } else { + if(conv_cond() == 1) { + exit_cond = ((fabs(rnorm - ZERO) <= EPSILON) || (rnorm < tol_)); + } else { + if(conv_cond() == 2) { + exit_cond = ((fabs(rnorm - ZERO) <= EPSILON) || (rnorm < (tol_ * bnorm))); + } + } } - - if(orth_option_ == "mgs_pm") { - h_aux_ = new double[restart_ + 1]; + if(exit_cond) { + outer_flag = 0; + final_residual_norm_ = rnorm; + initial_residual_norm_ = rnorm; + fgmres_iters_ = 0; + break; } - return 0; - } - double IterativeRefinement::getFinalResidalNorm() - { - return final_residual_norm_; - } + // normalize first vector + t = 1.0 / rnorm; + cublasDscal(cublas_handle_, n_, &t, d_V_, 1); + + // initialize norm history + + h_rs_[0] = rnorm; + initial_residual_norm_ = rnorm; + i = -1; + notconv = 1; + + while((notconv) && (it < maxit_)) { + i++; + it++; + // Z_i = (LU)^{-1}*V_i + cudaMemcpy(&d_Z_[i * n_], &d_V_[i * n_], sizeof(double) * n_, cudaMemcpyDeviceToDevice); + checkCudaErrors(cusolverRfSolve(cusolverrf_handle_, d_P_, d_Q_, 1, d_T_, n_, &d_Z_[i * n_], n_)); + cudaDeviceSynchronize(); + // V_{i+1}=A*Z_i + cudaMatvec(&d_Z_[i * n_], &d_V_[(i + 1) * n_], "matvec"); + // orthogonalize V[i+1], form a column of h_L + GramSchmidt(i); + + if(i != 0) { + for(int k = 1; k <= i; k++) { + k1 = k - 1; + t = h_H_[i * (restart_ + 1) + k1]; + h_H_[i * (restart_ + 1) + k1] = h_c_[k1] * t + h_s_[k1] * h_H_[i * (restart_ + 1) + k]; + h_H_[i * (restart_ + 1) + k] = -h_s_[k1] * t + h_c_[k1] * h_H_[i * (restart_ + 1) + k]; + } + } // if i!=0 - double IterativeRefinement::getInitialResidalNorm() - { - return initial_residual_norm_; - } + double Hii = h_H_[i * (restart_ + 1) + i]; + double Hii1 = h_H_[(i) * (restart_ + 1) + i + 1]; + double gam = sqrt(Hii * Hii + Hii1 * Hii1); - double IterativeRefinement::getBNorm() - { - return bnorm_; - } + if(fabs(gam - ZERO) <= EPSILON) { + gam = EPSMAC; + } - int IterativeRefinement::getFinalNumberOfIterations() - { - return fgmres_iters_; - } + /* next Given's rotation */ + h_c_[i] = Hii / gam; + h_s_[i] = Hii1 / gam; + h_rs_[i + 1] = -h_s_[i] * h_rs_[i]; + h_rs_[i] = h_c_[i] * h_rs_[i]; + h_H_[(i) * (restart_ + 1) + (i)] = h_c_[i] * Hii + h_s_[i] * Hii1; + h_H_[(i) * (restart_ + 1) + (i + 1)] = h_c_[i] * Hii1 - h_s_[i] * Hii; - double IterativeRefinement::matrixAInfNrm() - { - double nrm; - matrix_row_sums(n_, nnz_, dia_, da_, d_Z_); - cusolverSpDnrminf(cusolver_handle_, - n_, - d_Z_, - &nrm, - mv_buffer_ /* at least 8192 bytes */); - return nrm; - } + // residual norm estimate + rnorm = fabs(h_rs_[i + 1]); + // check convergence + if(i + 1 >= restart_ || rnorm <= tolrel || it >= maxit_) { + notconv = 0; + } + } // inner while + + // solve tri system + h_rs_[i] = h_rs_[i] / h_H_[i * (restart_ + 1) + i]; + for(int ii = 2; ii <= i + 1; ii++) { + k = i - ii + 1; + k1 = k + 1; + t = h_rs_[k]; + for(j = k1; j <= i; j++) { + t -= h_H_[j * (restart_ + 1) + k] * h_rs_[j]; + } + h_rs_[k] = t / h_H_[k * (restart_ + 1) + k]; + } - double IterativeRefinement::vectorInfNrm(int n, double* d_v) - { - double nrm; + // get solution + for(j = 0; j <= i; j++) { + cublasDaxpy(cublas_handle_, n_, &h_rs_[j], &d_Z_[j * n_], 1, d_x, 1); + } - cusolverSpDnrminf(cusolver_handle_, - n, - d_v, - &nrm, - mv_buffer_ /* at least 8192 bytes */); - return nrm; - } + /* test solution */ - void IterativeRefinement::fgmres(double *d_x, double *d_b) - { - int outer_flag = 1; - int notconv = 1; - int i = 0; - int it = 0; - int j; - int k; - int k1; - - double t; - double rnorm; - double bnorm; - // double rnorm_aux; - double tolrel; - //V[0] = b-A*x_0 - cudaMemcpy(&(d_V_[0]), d_b, sizeof(double) * n_, cudaMemcpyDeviceToDevice); + if(rnorm <= tolrel || it >= maxit_) { + // rnorm_aux = rnorm; + outer_flag = 0; + } + cudaMemcpy(&d_V_[0], d_b, sizeof(double) * n_, cudaMemcpyDeviceToDevice); cudaMatvec(d_x, d_V_, "residual"); rnorm = 0.0; - cublasDdot (cublas_handle_, n_, d_b, 1, d_b, 1, &bnorm); - cublasDdot (cublas_handle_, n_, d_V_, 1, d_V_, 1, &rnorm); - //rnorm = ||V_1|| + cublasDdot(cublas_handle_, n_, d_V_, 1, d_V_, 1, &rnorm); + // rnorm = ||V_1|| rnorm = sqrt(rnorm); - bnorm = sqrt(bnorm); - bnorm_ = bnorm; - while(outer_flag) { - // check if maybe residual is already small enough? - if(it == 0) { - tolrel = tol_ * rnorm; - if(fabs(tolrel) < 1e-16) { - tolrel = 1e-16; - } - } - int exit_cond = 0; - if (conv_cond() == 0){ - exit_cond = ((fabs(rnorm - ZERO) <= EPSILON)); + + if(!outer_flag) { + final_residual_norm_ = rnorm; + fgmres_iters_ = it; + } + } // outer while +} + +// b-Ax +void IterativeRefinement::cudaMatvec(double* d_x, double* d_b, std::string option) +{ + cusparseCreateDnVec(&vec_x_, n_, d_x, CUDA_R_64F); + cusparseCreateDnVec(&vec_Ax_, n_, d_b, CUDA_R_64F); + if(option == "residual") { + // b = b-Ax + cusparseSpMV(cusparse_handle_, + CUSPARSE_OPERATION_NON_TRANSPOSE, + &minusone_, + mat_A_, + vec_x_, + &one_, + vec_Ax_, + CUDA_R_64F, + CUSPARSE_SPMV_CSR_ALG2, + mv_buffer_); + } else { + // just b = A*x + cusparseSpMV(cusparse_handle_, + CUSPARSE_OPERATION_NON_TRANSPOSE, + &one_, + mat_A_, + vec_x_, + &zero_, + vec_Ax_, + CUDA_R_64F, + CUSPARSE_SPMV_CSR_ALG2, + mv_buffer_); + } + cusparseDestroyDnVec(vec_x_); + cusparseDestroyDnVec(vec_Ax_); +} + +void IterativeRefinement::GramSchmidt(int i) +{ + double t; + const double one = 1.0; + const double minusone = -1.0; + const double zero = 0.0; + double s; + int sw = 0; + if(orth_option_ == "mgs") { + sw = 0; + } else { + if(orth_option_ == "cgs2") { + sw = 1; + } else { + if(orth_option_ == "mgs_two_synch") { + sw = 2; } else { - if (conv_cond() == 1){ - exit_cond = ((fabs(rnorm - ZERO) <= EPSILON) || (rnorm < tol_)); + if(orth_option_ == "mgs_pm") { + sw = 3; } else { - if (conv_cond() == 2){ - exit_cond = ((fabs(rnorm - ZERO) <= EPSILON) || (rnorm < (tol_*bnorm))); - } + // display error message and set sw = 0; + /* + nlp_->log->printf(hovWarning, + "Wrong Gram-Schmidt option. Setting default (modified Gram-Schmidt, mgs) ...\n"); + */ + sw = 0; } } - if (exit_cond) { - outer_flag = 0; - final_residual_norm_ = rnorm; - initial_residual_norm_ = rnorm; - fgmres_iters_ = 0; - break; - } - - // normalize first vector - t = 1.0 / rnorm; - cublasDscal(cublas_handle_, n_, &t, d_V_, 1); - - // initialize norm history + } + } - h_rs_[0] = rnorm; - initial_residual_norm_ = rnorm; - i = -1; - notconv = 1; - - while((notconv) && (it < maxit_)) { - i++; - it++; - // Z_i = (LU)^{-1}*V_i - cudaMemcpy(&d_Z_[i * n_], &d_V_[i * n_], sizeof(double) * n_, cudaMemcpyDeviceToDevice); - checkCudaErrors(cusolverRfSolve(cusolverrf_handle_, d_P_, d_Q_, 1, d_T_, n_, &d_Z_[i * n_], n_)); - cudaDeviceSynchronize(); - // V_{i+1}=A*Z_i - cudaMatvec(&d_Z_[i * n_], &d_V_[(i + 1) * n_], "matvec"); - // orthogonalize V[i+1], form a column of h_L - GramSchmidt(i); - - if(i != 0) { - for(int k = 1; k <= i; k++) { - k1 = k - 1; - t = h_H_[i * (restart_ + 1) + k1]; - h_H_[i * (restart_ + 1) + k1] = h_c_[k1] * t + h_s_[k1] * h_H_[i * (restart_ + 1) + k]; - h_H_[i * (restart_ + 1) + k] = -h_s_[k1] * t + h_c_[k1] * h_H_[i * (restart_ + 1) + k]; - } - } // if i!=0 - - double Hii = h_H_[i * (restart_ + 1) + i]; - double Hii1 = h_H_[(i) * (restart_ + 1) + i + 1]; - double gam = sqrt(Hii * Hii + Hii1 * Hii1); - - if(fabs(gam - ZERO) <= EPSILON) { - gam = EPSMAC; - } + switch(sw) { + case 0: // mgs - /* next Given's rotation */ - h_c_[i] = Hii / gam; - h_s_[i] = Hii1 / gam; - h_rs_[i + 1] = -h_s_[i] * h_rs_[i]; - h_rs_[i] = h_c_[i] * h_rs_[i]; + for(int j = 0; j <= i; ++j) { + t = 0.0; + cublasDdot(cublas_handle_, n_, &d_V_[j * n_], 1, &d_V_[(i + 1) * n_], 1, &t); - h_H_[(i) * (restart_ + 1) + (i)] = h_c_[i] * Hii + h_s_[i] * Hii1; - h_H_[(i) * (restart_ + 1) + (i + 1)] = h_c_[i] * Hii1 - h_s_[i] * Hii; + h_H_[i * (restart_ + 1) + j] = t; + t *= -1.0; - // residual norm estimate - rnorm = fabs(h_rs_[i + 1]); - // check convergence - if(i + 1 >= restart_ || rnorm <= tolrel || it >= maxit_) { - notconv = 0; - } - } // inner while - - // solve tri system - h_rs_[i] = h_rs_[i] / h_H_[i * (restart_ + 1) + i]; - for(int ii = 2; ii <= i + 1; ii++) { - k = i - ii + 1; - k1 = k + 1; - t = h_rs_[k]; - for(j = k1; j <= i; j++) { - t -= h_H_[j * (restart_ + 1) + k] * h_rs_[j]; - } - h_rs_[k] = t / h_H_[k * (restart_ + 1) + k]; + cublasDaxpy(cublas_handle_, n_, &t, &d_V_[j * n_], 1, &d_V_[(i + 1) * n_], 1); } - - // get solution - for(j = 0; j <= i; j++) { - cublasDaxpy(cublas_handle_, n_, &h_rs_[j], &d_Z_[j * n_], 1, d_x, 1); + t = 0.0; + cublasDdot(cublas_handle_, n_, &d_V_[(i + 1) * n_], 1, &d_V_[(i + 1) * n_], 1, &t); + + // set the last entry in Hessenberg matrix + t = sqrt(t); + h_H_[(i) * (restart_ + 1) + i + 1] = t; + if(t != 0.0) { + t = 1.0 / t; + cublasDscal(cublas_handle_, n_, &t, &d_V_[(i + 1) * n_], 1); + } else { + assert(0 && "Iterative refinement failed, Krylov vector with zero norm\n"); } + break; - /* test solution */ + case 1: // cgs2 + // Hcol = V(:,1:i)^T *V(:,i+1); + cublasDgemv(cublas_handle_, CUBLAS_OP_T, n_, i + 1, &one_, d_V_, n_, &d_V_[(i + 1) * n_], 1, &zero_, d_H_col_, 1); + // V(:,i+1) = V(:, i+1) - V(:,1:i)*Hcol + cublasDgemv(cublas_handle_, CUBLAS_OP_N, n_, i + 1, &minusone_, d_V_, n_, d_H_col_, 1, &one_, &d_V_[n_ * (i + 1)], 1); + // copy H_col to aux, we will need it later - if(rnorm <= tolrel || it >= maxit_) { - // rnorm_aux = rnorm; - outer_flag = 0; - } + cudaMemcpy(h_aux_, d_H_col_, sizeof(double) * (i + 1), cudaMemcpyDeviceToHost); - cudaMemcpy(&d_V_[0], d_b, sizeof(double)*n_, cudaMemcpyDeviceToDevice); - cudaMatvec(d_x, d_V_, "residual"); + // Hcol = V(:,1:i)*V(:,i+1); + cublasDgemv(cublas_handle_, CUBLAS_OP_T, n_, i + 1, &one_, d_V_, n_, &d_V_[(i + 1) * n_], 1, &zero_, d_H_col_, 1); + // V(:,i+1) = V(:, i+1) - V(:,1:i)*Hcol - rnorm = 0.0; - cublasDdot(cublas_handle_, n_, d_V_, 1, d_V_, 1, &rnorm); - // rnorm = ||V_1|| - rnorm = sqrt(rnorm); + cublasDgemv(cublas_handle_, CUBLAS_OP_N, n_, i + 1, &minusone_, d_V_, n_, d_H_col_, 1, &one_, &d_V_[n_ * (i + 1)], 1); + // copy H_col to H - if(!outer_flag) { - final_residual_norm_ = rnorm; - fgmres_iters_ = it; + cudaMemcpy(&h_H_[i * (restart_ + 1)], d_H_col_, sizeof(double) * (i + 1), cudaMemcpyDeviceToHost); + // add both pieces together (unstable otherwise, careful here!!) + for(int j = 0; j <= i; ++j) { + h_H_[i * (restart_ + 1) + j] += h_aux_[j]; } - } // outer while - } - - //b-Ax - void IterativeRefinement::cudaMatvec(double *d_x, double * d_b, std::string option) - { - cusparseCreateDnVec(&vec_x_, n_, d_x, CUDA_R_64F); - cusparseCreateDnVec(&vec_Ax_, n_, d_b, CUDA_R_64F); - if (option == "residual"){ - //b = b-Ax - cusparseSpMV(cusparse_handle_, - CUSPARSE_OPERATION_NON_TRANSPOSE, - &minusone_, - mat_A_, - vec_x_, - &one_, - vec_Ax_, - CUDA_R_64F, - CUSPARSE_SPMV_CSR_ALG2, - mv_buffer_); - } else { - // just b = A*x - cusparseSpMV(cusparse_handle_, - CUSPARSE_OPERATION_NON_TRANSPOSE, - &one_, - mat_A_, - vec_x_, - &zero_, - vec_Ax_, - CUDA_R_64F, - CUSPARSE_SPMV_CSR_ALG2, - mv_buffer_); - } - cusparseDestroyDnVec(vec_x_); - cusparseDestroyDnVec(vec_Ax_); - } - - void IterativeRefinement::GramSchmidt(int i) - { - double t; - const double one = 1.0; - const double minusone = -1.0; - const double zero = 0.0; - double s; - int sw = 0; - if(orth_option_ == "mgs") { - sw = 0; - } else { - if(orth_option_ == "cgs2") { - sw = 1; + t = 0.0; + cublasDdot(cublas_handle_, n_, &d_V_[(i + 1) * n_], 1, &d_V_[(i + 1) * n_], 1, &t); + + // set the last entry in Hessenberg matrix + t = sqrt(t); + h_H_[(i) * (restart_ + 1) + i + 1] = t; + if(t != 0.0) { + t = 1.0 / t; + cublasDscal(cublas_handle_, n_, &t, &d_V_[(i + 1) * n_], 1); } else { - if(orth_option_ == "mgs_two_synch") { - sw = 2; - } else { - if(orth_option_ == "mgs_pm") { - sw = 3; - } else { - // display error message and set sw = 0; - /* - nlp_->log->printf(hovWarning, - "Wrong Gram-Schmidt option. Setting default (modified Gram-Schmidt, mgs) ...\n"); - */ - sw = 0; - } - } + assert(0 && "Iterative refinement failed, Krylov vector with zero norm\n"); } - } - - switch (sw){ - case 0: //mgs - - for(int j=0; j<=i; ++j) { - t=0.0; - cublasDdot (cublas_handle_, n_, &d_V_[j*n_], 1, &d_V_[(i+1)*n_], 1, &t); + break; + // the two low synch schemes + case 2: + // KS: the kernels are limited by the size of the shared memory on the GPU. If too many vectors in Krylov space, use + // standard cublas routines. V[1:i]^T[V[i] w] + if(i < 200) { + mass_inner_product_two_vectors(n_, i, &d_V_[i * n_], &d_V_[(i + 1) * n_], d_V_, d_rvGPU_); + } else { + cublasDgemm(cublas_handle_, + CUBLAS_OP_T, + CUBLAS_OP_N, + i + 1, // m + 2, // n + n_, // k + &one, // alpha + d_V_, // A + n_, // lda + &d_V_[i * n_], // B + n_, // ldb + &zero, + d_rvGPU_, // c + i + 1); // ldc + } + // copy rvGPU to L + cudaMemcpy(&h_L_[(i) * (restart_ + 1)], d_rvGPU_, (i + 1) * sizeof(double), cudaMemcpyDeviceToHost); - h_H_[i*(restart_+1)+j]=t; - t *= -1.0; + cudaMemcpy(h_rv_, &d_rvGPU_[i + 1], (i + 1) * sizeof(double), cudaMemcpyDeviceToHost); - cublasDaxpy(cublas_handle_, - n_, - &t, - &d_V_[j*n_], - 1, - &d_V_[(i+1)*n_], - 1); - } - t = 0.0; - cublasDdot(cublas_handle_, n_, &d_V_[(i+1)*n_], 1, &d_V_[(i+1)*n_], 1, &t); - - //set the last entry in Hessenberg matrix - t=sqrt(t); - h_H_[(i)*(restart_+1)+i+1] = t; - if(t != 0.0) { - t = 1.0/t; - cublasDscal(cublas_handle_,n_,&t,&d_V_[(i+1)*n_], 1); - } else { - assert(0 && "Iterative refinement failed, Krylov vector with zero norm\n"); - } - break; - - case 1://cgs2 - // Hcol = V(:,1:i)^T *V(:,i+1); - cublasDgemv(cublas_handle_, - CUBLAS_OP_T, - n_, - i+1, - &one_, - d_V_, - n_, - &d_V_[(i+1)*n_], - 1, - &zero_,d_H_col_, - 1); - // V(:,i+1) = V(:, i+1) - V(:,1:i)*Hcol - cublasDgemv(cublas_handle_, + for(int j = 0; j <= i; ++j) { + h_H_[(i) * (restart_ + 1) + j] = 0.0; + } + // triangular solve + for(int j = 0; j <= i; ++j) { + h_H_[(i) * (restart_ + 1) + j] = h_rv_[j]; + s = 0.0; + for(int k = 0; k < j; ++k) { + s += h_L_[j * (restart_ + 1) + k] * h_H_[(i) * (restart_ + 1) + k]; + } // for k + h_H_[(i) * (restart_ + 1) + j] -= s; + } // for j + + cudaMemcpy(d_Hcolumn_, &h_H_[(i) * (restart_ + 1)], (i + 1) * sizeof(double), cudaMemcpyHostToDevice); + // again, use std cublas functions if Krylov space is too large + if(i < 200) { + mass_axpy(n_, i, d_V_, &d_V_[(i + 1) * n_], d_Hcolumn_); + } else { + cublasDgemm(cublas_handle_, CUBLAS_OP_N, - n_, - i+1, - &minusone_, - d_V_, - n_, - d_H_col_, - 1, - &one_, - &d_V_[n_*(i+1)], - 1); - // copy H_col to aux, we will need it later - - cudaMemcpy(h_aux_, d_H_col_, sizeof(double) * (i+1), cudaMemcpyDeviceToHost); - - //Hcol = V(:,1:i)*V(:,i+1); - cublasDgemv(cublas_handle_, + CUBLAS_OP_N, + n_, // m + 1, // n + i + 1, // k + &minusone, // alpha + d_V_, // A + n_, // lda + d_Hcolumn_, // B + i + 1, // ldb + &one, + &d_V_[(i + 1) * n_], // c + n_); // ldc + } + // normalize (second synch) + t = 0.0; + cublasDdot(cublas_handle_, n_, &d_V_[(i + 1) * n_], 1, &d_V_[(i + 1) * n_], 1, &t); + + // set the last entry in Hessenberg matrix + t = sqrt(t); + h_H_[(i) * (restart_ + 1) + i + 1] = t; + if(t != 0.0) { + t = 1.0 / t; + cublasDscal(cublas_handle_, n_, &t, &d_V_[(i + 1) * n_], 1); + } else { + assert(0 && "Iterative refinement failed, Krylov vector with zero norm\n"); + } + break; + + case 3: // two synch Gauss-Seidel mgs, SUPER STABLE + // according to unpublisjed work by ST + // L is where we keep the triangular matrix(L is ON THE CPU) + // if Krylov space is too large, use std cublas (because out of shared mmory) + if(i < 200) { + mass_inner_product_two_vectors(n_, i, &d_V_[i * n_], &d_V_[(i + 1) * n_], d_V_, d_rvGPU_); + } else { + cublasDgemm(cublas_handle_, CUBLAS_OP_T, - n_, - i+1, - &one_, - d_V_, - n_, - &d_V_[(i+1)*n_], - 1, - &zero_, - d_H_col_, - 1); - // V(:,i+1) = V(:, i+1) - V(:,1:i)*Hcol - - cublasDgemv(cublas_handle_, CUBLAS_OP_N, - n_, - i+1, - &minusone_, - d_V_, - n_, - d_H_col_, - 1, - &one_, - &d_V_[n_*(i+1)], - 1); - // copy H_col to H - - cudaMemcpy(&h_H_[i*(restart_+1)], d_H_col_, sizeof(double) * (i+1), cudaMemcpyDeviceToHost); - // add both pieces together (unstable otherwise, careful here!!) - for(int j=0; j<=i; ++j) { - h_H_[i*(restart_+1)+j] += h_aux_[j]; - } - t = 0.0; - cublasDdot (cublas_handle_, n_, &d_V_[(i+1)*n_], 1, &d_V_[(i+1)*n_], 1, &t); - - //set the last entry in Hessenberg matrix - t=sqrt(t); - h_H_[(i)*(restart_+1)+i+1] = t; - if(t != 0.0) { - t = 1.0/t; - cublasDscal(cublas_handle_,n_,&t,&d_V_[(i+1)*n_], 1); - } else { - assert(0 && "Iterative refinement failed, Krylov vector with zero norm\n"); - } - break; - // the two low synch schemes - case 2: - // KS: the kernels are limited by the size of the shared memory on the GPU. If too many vectors in Krylov space, use standard cublas routines. - // V[1:i]^T[V[i] w] - if(i < 200) { - mass_inner_product_two_vectors(n_, i, &d_V_[i * n_],&d_V_[(i+1) * n_], d_V_, d_rvGPU_); - } else { - cublasDgemm(cublas_handle_, - CUBLAS_OP_T, - CUBLAS_OP_N, - i + 1,//m - 2,//n - n_,//k - &one,//alpha - d_V_,//A - n_,//lda - &d_V_[i * n_],//B - n_,//ldb - &zero, - d_rvGPU_,//c - i+1);//ldc - } - // copy rvGPU to L - cudaMemcpy(&h_L_[(i) * (restart_ + 1)], - d_rvGPU_, - (i + 1) * sizeof(double), - cudaMemcpyDeviceToHost); - - cudaMemcpy(h_rv_, - &d_rvGPU_[i + 1], - (i + 1) * sizeof(double), - cudaMemcpyDeviceToHost); - - for(int j=0; j<=i; ++j) { - h_H_[(i)*(restart_+1)+j] = 0.0; - } - // triangular solve - for(int j = 0; j <= i; ++j) { - h_H_[(i) * (restart_ + 1) + j] = h_rv_[j]; - s = 0.0; - for(int k = 0; k < j; ++k) { - s += h_L_[j * (restart_ + 1) + k] * h_H_[(i) * (restart_ + 1) + k]; - } // for k - h_H_[(i) * (restart_ + 1) + j] -= s; - } // for j - - cudaMemcpy(d_Hcolumn_, - &h_H_[(i) * (restart_ + 1)], - (i + 1) * sizeof(double), - cudaMemcpyHostToDevice); - //again, use std cublas functions if Krylov space is too large - if(i < 200) { - mass_axpy(n_, i, d_V_, &d_V_[(i+1) * n_],d_Hcolumn_); - } else { - cublasDgemm(cublas_handle_, - CUBLAS_OP_N, - CUBLAS_OP_N, - n_,//m - 1,//n - i + 1,//k - &minusone,//alpha - d_V_,//A - n_,//lda - d_Hcolumn_,//B - i + 1,//ldb - &one, - &d_V_[(i + 1) * n_],//c - n_);//ldc + i + 1, // m + 2, // n + n_, // k + &one, // alpha + d_V_, // A + n_, // lda + &d_V_[i * n_], // B + n_, // ldb + &zero, + d_rvGPU_, // c + i + 1); // ldc + } + // copy rvGPU to L + cudaMemcpy(&h_L_[(i) * (restart_ + 1)], d_rvGPU_, (i + 1) * sizeof(double), cudaMemcpyDeviceToHost); - } - // normalize (second synch) - t=0.0; - cublasDdot(cublas_handle_, n_, &d_V_[(i + 1) * n_], 1, &d_V_[(i + 1) * n_], 1, &t); - - // set the last entry in Hessenberg matrix - t=sqrt(t); - h_H_[(i) * (restart_ + 1) + i + 1] = t; - if(t != 0.0) { - t = 1.0/t; - cublasDscal(cublas_handle_, n_, &t, &d_V_[(i + 1) * n_], 1); - } else { - assert(0 && "Iterative refinement failed, Krylov vector with zero norm\n"); - } - break; - - case 3: //two synch Gauss-Seidel mgs, SUPER STABLE - // according to unpublisjed work by ST - // L is where we keep the triangular matrix(L is ON THE CPU) - // if Krylov space is too large, use std cublas (because out of shared mmory) - if(i < 200) { - mass_inner_product_two_vectors(n_, i, &d_V_[i * n_],&d_V_[(i+1) * n_], d_V_, d_rvGPU_); - } else { - cublasDgemm(cublas_handle_, - CUBLAS_OP_T, - CUBLAS_OP_N, - i + 1,//m - 2,//n - n_,//k - &one,//alpha - d_V_,//A - n_,//lda - &d_V_[i * n_],//B - n_,//ldb - &zero, - d_rvGPU_,//c - i+1);//ldc - } - // copy rvGPU to L - cudaMemcpy(&h_L_[(i) * (restart_ + 1)], - d_rvGPU_, - (i + 1) * sizeof(double), - cudaMemcpyDeviceToHost); - - cudaMemcpy(h_rv_, - &d_rvGPU_[i + 1], - (i + 1) * sizeof(double), - cudaMemcpyDeviceToHost); - - for(int j = 0; j <= i; ++j) { - h_H_[(i) * (restart_ + 1) + j] = 0.0; - } - //triangular solve - for(int j = 0; j <= i; ++j) { - h_H_[(i) * (restart_ + 1) + j] = h_rv_[j]; - s = 0.0; - for(int k = 0; k < j; ++k) { - s += h_L_[j * (restart_ + 1) + k] * h_H_[(i) * (restart_ + 1) + k]; - } // for k - h_H_[(i) * (restart_ + 1) + j] -= s; - } // for j - - // now compute h_rv = L^T h_H - double h; - for(int j = 0; j <= i; ++j) { - // go through COLUMN OF L - h_rv_[j] = 0.0; - for(int k = j + 1; k <= i; ++k) { - h = h_L_[k * (restart_ + 1) + j]; - h_rv_[j] += h_H_[(i) * (restart_ + 1) + k] * h; - } - } + cudaMemcpy(h_rv_, &d_rvGPU_[i + 1], (i + 1) * sizeof(double), cudaMemcpyDeviceToHost); - // and do one more tri solve with L^T: h_aux = (I-L)^{-1}h_rv - for(int j = 0; j <= i; ++j) { - h_aux_[j] = h_rv_[j]; - s = 0.0; - for(int k = 0; k < j; ++k) { - s += h_L_[j * (restart_ + 1) + k] * h_aux_[k]; - } // for k - h_aux_[j] -= s; - } // for j - - // and now subtract that from h_H - for(int j=0; j<=i; ++j) { - h_H_[(i)*(restart_+1)+j] -= h_aux_[j]; - } - cudaMemcpy(d_Hcolumn_, - &h_H_[(i) * (restart_ + 1)], - (i + 1) * sizeof(double), - cudaMemcpyHostToDevice); - // if Krylov space too large, use std cublas routines - if(i < 200) { - mass_axpy(n_, i, d_V_, &d_V_[(i+1) * n_],d_Hcolumn_); - } else { - cublasDgemm(cublas_handle_, - CUBLAS_OP_N, - CUBLAS_OP_N, - n_,//m - 1,//n - i + 1,//k - &minusone,//alpha - d_V_,//A - n_,//lda - d_Hcolumn_,//B - i + 1,//ldb - &one, - &d_V_[(i + 1) * n_],//c - n_);//ldc - } - // normalize (second synch) - t=0.0; - cublasDdot(cublas_handle_, n_, &d_V_[(i + 1) * n_], 1, &d_V_[(i + 1) * n_], 1, &t); - - // set the last entry in Hessenberg matrix - t = sqrt(t); - h_H_[(i) * (restart_ + 1) + i + 1] = t; - if (t != 0.0){ - t = 1.0/t; - cublasDscal(cublas_handle_, n_, &t, &d_V_[(i + 1) * n_], 1); - } else { - assert(0 && "Iterative refinement failed, Krylov vector with zero norm\n"); + for(int j = 0; j <= i; ++j) { + h_H_[(i) * (restart_ + 1) + j] = 0.0; + } + // triangular solve + for(int j = 0; j <= i; ++j) { + h_H_[(i) * (restart_ + 1) + j] = h_rv_[j]; + s = 0.0; + for(int k = 0; k < j; ++k) { + s += h_L_[j * (restart_ + 1) + k] * h_H_[(i) * (restart_ + 1) + k]; + } // for k + h_H_[(i) * (restart_ + 1) + j] -= s; + } // for j + + // now compute h_rv = L^T h_H + double h; + for(int j = 0; j <= i; ++j) { + // go through COLUMN OF L + h_rv_[j] = 0.0; + for(int k = j + 1; k <= i; ++k) { + h = h_L_[k * (restart_ + 1) + j]; + h_rv_[j] += h_H_[(i) * (restart_ + 1) + k] * h; } - break; - - default: - assert(0 && "Iterative refinement failed, wrong orthogonalization.\n"); - break; - } // switch - } // GramSchmidt - - // Error checking utility for CUDA - // KS: might later become part of src/Utils, putting it here for now - template - void IterativeRefinement::resolveCheckCudaError(T result, - const char* const file, - int const line) - { + } + + // and do one more tri solve with L^T: h_aux = (I-L)^{-1}h_rv + for(int j = 0; j <= i; ++j) { + h_aux_[j] = h_rv_[j]; + s = 0.0; + for(int k = 0; k < j; ++k) { + s += h_L_[j * (restart_ + 1) + k] * h_aux_[k]; + } // for k + h_aux_[j] -= s; + } // for j + + // and now subtract that from h_H + for(int j = 0; j <= i; ++j) { + h_H_[(i) * (restart_ + 1) + j] -= h_aux_[j]; + } + cudaMemcpy(d_Hcolumn_, &h_H_[(i) * (restart_ + 1)], (i + 1) * sizeof(double), cudaMemcpyHostToDevice); + // if Krylov space too large, use std cublas routines + if(i < 200) { + mass_axpy(n_, i, d_V_, &d_V_[(i + 1) * n_], d_Hcolumn_); + } else { + cublasDgemm(cublas_handle_, + CUBLAS_OP_N, + CUBLAS_OP_N, + n_, // m + 1, // n + i + 1, // k + &minusone, // alpha + d_V_, // A + n_, // lda + d_Hcolumn_, // B + i + 1, // ldb + &one, + &d_V_[(i + 1) * n_], // c + n_); // ldc + } + // normalize (second synch) + t = 0.0; + cublasDdot(cublas_handle_, n_, &d_V_[(i + 1) * n_], 1, &d_V_[(i + 1) * n_], 1, &t); + + // set the last entry in Hessenberg matrix + t = sqrt(t); + h_H_[(i) * (restart_ + 1) + i + 1] = t; + if(t != 0.0) { + t = 1.0 / t; + cublasDscal(cublas_handle_, n_, &t, &d_V_[(i + 1) * n_], 1); + } else { + assert(0 && "Iterative refinement failed, Krylov vector with zero norm\n"); + } + break; + + default: + assert(0 && "Iterative refinement failed, wrong orthogonalization.\n"); + break; + } // switch +} // GramSchmidt + +// Error checking utility for CUDA +// KS: might later become part of src/Utils, putting it here for now +template +void IterativeRefinement::resolveCheckCudaError(T result, const char* const file, int const line) +{ #ifdef DEBUG - if(result) { - fprintf(stdout, - "CUDA error at %s:%d, error# %d\n", - file, - line, - result); - assert(false); - } -#endif + if(result) { + fprintf(stdout, "CUDA error at %s:%d, error# %d\n", file, line, result); + assert(false); } +#endif +} - -} // namespace ReSolve +} // namespace ReSolve diff --git a/src/LinAlg/ReSolve/IterativeRefinement.hpp b/src/LinAlg/ReSolve/IterativeRefinement.hpp index c194ac3a0..3650b38ae 100644 --- a/src/LinAlg/ReSolve/IterativeRefinement.hpp +++ b/src/LinAlg/ReSolve/IterativeRefinement.hpp @@ -12,19 +12,19 @@ #include "resolve_cusolver_defs.hpp" #include -namespace ReSolve { +namespace ReSolve +{ constexpr double ZERO = 0.0; constexpr double EPSILON = 1.0e-18; -constexpr double EPSMAC = 1.0e-16; +constexpr double EPSMAC = 1.0e-16; /** * @brief Iterative refinement class - * + * */ class IterativeRefinement { - public: IterativeRefinement(); IterativeRefinement(int restart, double tol, int maxit); @@ -45,53 +45,38 @@ class IterativeRefinement double getBNorm(); // this is public on purpose, can be used internally or outside, to compute the residual. void fgmres(double* d_x, double* d_b); - void set_tol(double tol) {tol_ = tol;} ///< Set tolerance for the Krylov solver + void set_tol(double tol) { tol_ = tol; } ///< Set tolerance for the Krylov solver /** * @brief Set the up system matrix object mat_A_ of type cusparseSpMatDescr_t - * + * * @param n - size of the matrix * @param nnz - number of nonzeros in the matrix * @param irow - array of row pointers * @param jcol - array of column indices * @param val - array of sparse matrix values - * + * * @return int - * + * * @pre Arrays `irow`, `jcol` and `val` are on the device. */ int setup_system_matrix(int n, int nnz, int* irow, int* jcol, double* val); // Simple accessors - int& maxit() - { - return maxit_; - } - - double& tol() - { - return tol_; - } - - std::string& orth_option() - { - return orth_option_; - } - - int& restart() - { - return restart_; - } - - int& conv_cond() - { - return conv_cond_; - } + int& maxit() { return maxit_; } + + double& tol() { return tol_; } + + std::string& orth_option() { return orth_option_; } + + int& restart() { return restart_; } + + int& conv_cond() { return conv_cond_; } private: // Krylov vectors - double* d_V_{ nullptr }; - double* d_Z_{ nullptr }; + double* d_V_{nullptr}; + double* d_Z_{nullptr}; double final_residual_norm_; double initial_residual_norm_; @@ -102,48 +87,48 @@ class IterativeRefinement int restart_; int maxit_; double tol_; - int conv_cond_; ///< convergence condition, can be 0, 1, 2 for IR + int conv_cond_; ///< convergence condition, can be 0, 1, 2 for IR std::string orth_option_; // System matrix data int n_; int nnz_; - int* dia_{ nullptr }; - int* dja_{ nullptr }; - double* da_{ nullptr }; - cusparseSpMatDescr_t mat_A_{ nullptr }; + int* dia_{nullptr}; + int* dja_{nullptr}; + double* da_{nullptr}; + cusparseSpMatDescr_t mat_A_{nullptr}; // Matrix-vector product data - cusparseDnVecDescr_t vec_x_{ nullptr }; - cusparseDnVecDescr_t vec_Ax_{ nullptr }; + cusparseDnVecDescr_t vec_x_{nullptr}; + cusparseDnVecDescr_t vec_Ax_{nullptr}; // CUDA libraries handles - MUST BE SET AT INIT - cusparseHandle_t cusparse_handle_{ nullptr }; - cublasHandle_t cublas_handle_{ nullptr }; - cusolverRfHandle_t cusolverrf_handle_{ nullptr }; - cusolverSpHandle_t cusolver_handle_{ nullptr }; + cusparseHandle_t cusparse_handle_{nullptr}; + cublasHandle_t cublas_handle_{nullptr}; + cusolverRfHandle_t cusolverrf_handle_{nullptr}; + cusolverSpHandle_t cusolver_handle_{nullptr}; // GPU data (?) - double* d_T_{ nullptr }; - int* d_P_{ nullptr }; - int* d_Q_{ nullptr }; + double* d_T_{nullptr}; + int* d_P_{nullptr}; + int* d_Q_{nullptr}; - double* d_rvGPU_{ nullptr }; - double* d_Hcolumn_{ nullptr }; - double* d_H_col_{ nullptr }; - void* mv_buffer_{ nullptr }; ///< SpMV buffer + double* d_rvGPU_{nullptr}; + double* d_Hcolumn_{nullptr}; + double* d_H_col_{nullptr}; + void* mv_buffer_{nullptr}; ///< SpMV buffer // CPU: - double* h_L_{ nullptr }; - double* h_H_{ nullptr }; - double* h_rv_{ nullptr }; + double* h_L_{nullptr}; + double* h_H_{nullptr}; + double* h_rv_{nullptr}; // for givens rotations - double* h_c_{ nullptr }; - double* h_s_{ nullptr }; + double* h_c_{nullptr}; + double* h_s_{nullptr}; // for Hessenberg system - double* h_rs_{ nullptr }; + double* h_rs_{nullptr}; // neded in some of the orthogonalization methods - double* h_aux_{ nullptr }; + double* h_aux_{nullptr}; // TODO: Something needs to be done with this :) const double minusone_ = -1.0; @@ -152,41 +137,40 @@ class IterativeRefinement /** * @brief orthogonalize i+1 vector against i vectors already orthogonal - * + * * Private function needed for FGMRES. - * + * * @param[in] i - number of orthogonal vectors */ void GramSchmidt(int i); /** - * @brief matvec black-box: b = b - A*d_x if option is "residual" and b=A*x + * @brief matvec black-box: b = b - A*d_x if option is "residual" and b=A*x * if option is "matvec" - * - * @param d_x - * @param d_b + * + * @param d_x + * @param d_b * @param option - * + * * @todo Document d_x and d_b; are both of them modified in this function? */ void cudaMatvec(double* d_x, double* d_b, std::string option); - - //KS: needed for testing -- condider delating later + + // KS: needed for testing -- condider delating later double matrixAInfNrm(); double vectorInfNrm(int n, double* d_v); - //end of testing - + // end of testing + /** * @brief Check for CUDA errors. - * + * * @tparam T - type of the result * @param result - result value * @param file - file name where the error occured * @param line - line at which the error occured */ - template + template void resolveCheckCudaError(T result, const char* const file, int const line); - }; -} // namespace ReSolve +} // namespace ReSolve diff --git a/src/LinAlg/ReSolve/MatrixCsr.cpp b/src/LinAlg/ReSolve/MatrixCsr.cpp index 1a5d9a1ac..309546d80 100644 --- a/src/LinAlg/ReSolve/MatrixCsr.cpp +++ b/src/LinAlg/ReSolve/MatrixCsr.cpp @@ -65,85 +65,79 @@ #define checkCudaErrors(val) resolveCheckCudaError((val), __FILE__, __LINE__) -namespace ReSolve { - - - - MatrixCsr::MatrixCsr() - { - } - - MatrixCsr::~MatrixCsr() - { - if(n_ == 0) - return; - - clear_data(); - } - - void MatrixCsr::allocate_size(int n) - { - n_ = n; - checkCudaErrors(cudaMalloc(&irows_, (n_+1) * sizeof(int))); - irows_host_ = new int[n_+1]{0}; - } - - void MatrixCsr::allocate_nnz(int nnz) - { - nnz_ = nnz; - checkCudaErrors(cudaMalloc(&jcols_, nnz_ * sizeof(int))); - checkCudaErrors(cudaMalloc(&vals_, nnz_ * sizeof(double))); - jcols_host_ = new int[nnz_]{0}; - vals_host_ = new double[nnz_]{0}; - } - - void MatrixCsr::clear_data() - { - checkCudaErrors(cudaFree(irows_)); - checkCudaErrors(cudaFree(jcols_)); - checkCudaErrors(cudaFree(vals_)); - - irows_ = nullptr; - jcols_ = nullptr; - vals_ = nullptr; - - delete [] irows_host_; - delete [] jcols_host_; - delete [] vals_host_ ; - - irows_host_ = nullptr; - jcols_host_ = nullptr; - vals_host_ = nullptr; - - n_ = 0; - nnz_ = 0; - } - - void MatrixCsr::update_from_host_mirror() - { - checkCudaErrors(cudaMemcpy(irows_, irows_host_, sizeof(int) * (n_+1), cudaMemcpyHostToDevice)); - checkCudaErrors(cudaMemcpy(jcols_, jcols_host_, sizeof(int) * nnz_, cudaMemcpyHostToDevice)); - checkCudaErrors(cudaMemcpy(vals_, vals_host_, sizeof(double) * nnz_, cudaMemcpyHostToDevice)); - } - - void MatrixCsr::copy_to_host_mirror() - { - checkCudaErrors(cudaMemcpy(irows_host_, irows_, sizeof(int) * (n_+1), cudaMemcpyDeviceToHost)); - checkCudaErrors(cudaMemcpy(jcols_host_, jcols_, sizeof(int) * nnz_, cudaMemcpyDeviceToHost)); - checkCudaErrors(cudaMemcpy(vals_host_, vals_, sizeof(double) * nnz_, cudaMemcpyDeviceToHost)); - } - - // Error checking utility for CUDA - // KS: might later become part of src/Utils, putting it here for now - template - void MatrixCsr::resolveCheckCudaError(T result, - const char* const file, - int const line) - { - if(result) { - std::cout << "CUDA error at " << file << ":" << line << " error# " << result << "\n"; - assert(false); - } +namespace ReSolve +{ + +MatrixCsr::MatrixCsr() {} + +MatrixCsr::~MatrixCsr() +{ + if(n_ == 0) return; + + clear_data(); +} + +void MatrixCsr::allocate_size(int n) +{ + n_ = n; + checkCudaErrors(cudaMalloc(&irows_, (n_ + 1) * sizeof(int))); + irows_host_ = new int[n_ + 1]{0}; +} + +void MatrixCsr::allocate_nnz(int nnz) +{ + nnz_ = nnz; + checkCudaErrors(cudaMalloc(&jcols_, nnz_ * sizeof(int))); + checkCudaErrors(cudaMalloc(&vals_, nnz_ * sizeof(double))); + jcols_host_ = new int[nnz_]{0}; + vals_host_ = new double[nnz_]{0}; +} + +void MatrixCsr::clear_data() +{ + checkCudaErrors(cudaFree(irows_)); + checkCudaErrors(cudaFree(jcols_)); + checkCudaErrors(cudaFree(vals_)); + + irows_ = nullptr; + jcols_ = nullptr; + vals_ = nullptr; + + delete[] irows_host_; + delete[] jcols_host_; + delete[] vals_host_; + + irows_host_ = nullptr; + jcols_host_ = nullptr; + vals_host_ = nullptr; + + n_ = 0; + nnz_ = 0; +} + +void MatrixCsr::update_from_host_mirror() +{ + checkCudaErrors(cudaMemcpy(irows_, irows_host_, sizeof(int) * (n_ + 1), cudaMemcpyHostToDevice)); + checkCudaErrors(cudaMemcpy(jcols_, jcols_host_, sizeof(int) * nnz_, cudaMemcpyHostToDevice)); + checkCudaErrors(cudaMemcpy(vals_, vals_host_, sizeof(double) * nnz_, cudaMemcpyHostToDevice)); +} + +void MatrixCsr::copy_to_host_mirror() +{ + checkCudaErrors(cudaMemcpy(irows_host_, irows_, sizeof(int) * (n_ + 1), cudaMemcpyDeviceToHost)); + checkCudaErrors(cudaMemcpy(jcols_host_, jcols_, sizeof(int) * nnz_, cudaMemcpyDeviceToHost)); + checkCudaErrors(cudaMemcpy(vals_host_, vals_, sizeof(double) * nnz_, cudaMemcpyDeviceToHost)); +} + +// Error checking utility for CUDA +// KS: might later become part of src/Utils, putting it here for now +template +void MatrixCsr::resolveCheckCudaError(T result, const char* const file, int const line) +{ + if(result) { + std::cout << "CUDA error at " << file << ":" << line << " error# " << result << "\n"; + assert(false); } +} -} // namespace ReSolve +} // namespace ReSolve diff --git a/src/LinAlg/ReSolve/MatrixCsr.hpp b/src/LinAlg/ReSolve/MatrixCsr.hpp index 019e834c3..685fb5658 100644 --- a/src/LinAlg/ReSolve/MatrixCsr.hpp +++ b/src/LinAlg/ReSolve/MatrixCsr.hpp @@ -1,6 +1,7 @@ #pragma once -namespace ReSolve { +namespace ReSolve +{ class MatrixCsr { @@ -11,69 +12,45 @@ class MatrixCsr void allocate_nnz(int nnz); void clear_data(); - int* get_irows() - { - return irows_; - } + int* get_irows() { return irows_; } - const int* get_irows() const - { - return irows_; - } + const int* get_irows() const { return irows_; } - int* get_jcols() - { - return jcols_; - } + int* get_jcols() { return jcols_; } - double* get_vals() - { - return vals_; - } + double* get_vals() { return vals_; } - int* get_irows_host() - { - return irows_host_; - } + int* get_irows_host() { return irows_host_; } - int* get_jcols_host() - { - return jcols_host_; - } + int* get_jcols_host() { return jcols_host_; } - double* get_vals_host() - { - return vals_host_; - } + double* get_vals_host() { return vals_host_; } void update_from_host_mirror(); void copy_to_host_mirror(); private: - int n_{ 0 }; - int nnz_{ 0 }; - - int* irows_{ nullptr }; - int* jcols_{ nullptr }; - double* vals_{ nullptr}; + int n_{0}; + int nnz_{0}; - int* irows_host_{ nullptr }; - int* jcols_host_{ nullptr }; - double* vals_host_{ nullptr}; + int* irows_{nullptr}; + int* jcols_{nullptr}; + double* vals_{nullptr}; + int* irows_host_{nullptr}; + int* jcols_host_{nullptr}; + double* vals_host_{nullptr}; /** * @brief Check for CUDA errors. - * + * * @tparam T - type of the result * @param result - result value * @param file - file name where the error occured * @param line - line at which the error occured */ - template + template void resolveCheckCudaError(T result, const char* const file, int const line); +}; -}; - - -} // namespace ReSolve +} // namespace ReSolve diff --git a/src/LinAlg/ReSolve/RefactorizationSolver.cpp b/src/LinAlg/ReSolve/RefactorizationSolver.cpp index 8d70e8936..e387b8e40 100644 --- a/src/LinAlg/ReSolve/RefactorizationSolver.cpp +++ b/src/LinAlg/ReSolve/RefactorizationSolver.cpp @@ -66,715 +66,699 @@ #define checkCudaErrors(val) resolveCheckCudaError((val), __FILE__, __LINE__) -namespace ReSolve { +namespace ReSolve +{ - RefactorizationSolver::RefactorizationSolver(int n) +RefactorizationSolver::RefactorizationSolver(int n) : n_(n) - { - mat_A_csr_ = new MatrixCsr(); - - // handles - cusparseCreate(&handle_); - cusolverSpCreate(&handle_cusolver_); - cublasCreate(&handle_cublas_); - - // descriptors - cusparseCreateMatDescr(&descr_A_); - cusparseSetMatType(descr_A_, CUSPARSE_MATRIX_TYPE_GENERAL); - cusparseSetMatIndexBase(descr_A_, CUSPARSE_INDEX_BASE_ZERO); - - // Allocate host mirror for the solution vector - hostx_ = new double[n_]; - - // Allocate solution and rhs vectors - checkCudaErrors(cudaMalloc(&devx_, n_ * sizeof(double))); - checkCudaErrors(cudaMalloc(&devr_, n_ * sizeof(double))); +{ + mat_A_csr_ = new MatrixCsr(); + + // handles + cusparseCreate(&handle_); + cusolverSpCreate(&handle_cusolver_); + cublasCreate(&handle_cublas_); + + // descriptors + cusparseCreateMatDescr(&descr_A_); + cusparseSetMatType(descr_A_, CUSPARSE_MATRIX_TYPE_GENERAL); + cusparseSetMatIndexBase(descr_A_, CUSPARSE_INDEX_BASE_ZERO); + + // Allocate host mirror for the solution vector + hostx_ = new double[n_]; + + // Allocate solution and rhs vectors + checkCudaErrors(cudaMalloc(&devx_, n_ * sizeof(double))); + checkCudaErrors(cudaMalloc(&devr_, n_ * sizeof(double))); +} + +RefactorizationSolver::~RefactorizationSolver() +{ + if(iterative_refinement_enabled_) delete ir_; + delete mat_A_csr_; + + // Delete workspaces and handles + cudaFree(d_work_); + cusparseDestroy(handle_); + cusolverSpDestroy(handle_cusolver_); + cublasDestroy(handle_cublas_); + cusparseDestroyMatDescr(descr_A_); + + // Delete host mirror for the solution vector + delete[] hostx_; + + // Delete residual and solution vectors + cudaFree(devr_); + cudaFree(devx_); + + // Delete matrix descriptor used in cuSolverGLU setup + if(cusolver_glu_enabled_) { + cusparseDestroyMatDescr(descr_M_); + cusolverSpDestroyGluInfo(info_M_); } - RefactorizationSolver::~RefactorizationSolver() - { - if(iterative_refinement_enabled_) - delete ir_; - delete mat_A_csr_; - - // Delete workspaces and handles - cudaFree(d_work_); - cusparseDestroy(handle_); - cusolverSpDestroy(handle_cusolver_); - cublasDestroy(handle_cublas_); - cusparseDestroyMatDescr(descr_A_); - - // Delete host mirror for the solution vector - delete [] hostx_; - - // Delete residual and solution vectors - cudaFree(devr_); - cudaFree(devx_); - - // Delete matrix descriptor used in cuSolverGLU setup - if(cusolver_glu_enabled_) { - cusparseDestroyMatDescr(descr_M_); - cusolverSpDestroyGluInfo(info_M_); - } - - if(cusolver_rf_enabled_) { - cudaFree(d_P_); - cudaFree(d_Q_); - cudaFree(d_T_); - } + if(cusolver_rf_enabled_) { + cudaFree(d_P_); + cudaFree(d_Q_); + cudaFree(d_T_); + } + klu_free_symbolic(&Symbolic_, &Common_); + klu_free_numeric(&Numeric_, &Common_); + delete[] mia_; + delete[] mja_; +} + +void RefactorizationSolver::enable_iterative_refinement() +{ + ir_ = new IterativeRefinement(); + if(ir_ != nullptr) iterative_refinement_enabled_ = true; +} + +// TODO: Refactor to only pass mat_A_csr_ to setup_system_matrix; n and nnz can be read from mat_A_csr_ +void RefactorizationSolver::setup_iterative_refinement_matrix(int n, int nnz) +{ + ir_->setup_system_matrix(n, nnz, mat_A_csr_->get_irows(), mat_A_csr_->get_jcols(), mat_A_csr_->get_vals()); +} + +// TODO: Can this function be merged with setup_iterative_refinement_matrix ? +void RefactorizationSolver::configure_iterative_refinement(cusparseHandle_t cusparse_handle, + cublasHandle_t cublas_handle, + cusolverRfHandle_t cusolverrf_handle, + int n, + double* d_T, + int* d_P, + int* d_Q, + double* devx, + double* devr) +{ + ir_->setup(cusparse_handle, cublas_handle, cusolverrf_handle, n, d_T, d_P, d_Q, devx, devr); +} + +int RefactorizationSolver::setup_factorization() +{ + int* row_ptr = mat_A_csr_->get_irows_host(); + int* col_idx = mat_A_csr_->get_jcols_host(); + + if(fact_ == "klu") { + /* initialize KLU setup parameters, dont factorize yet */ + initializeKLU(); + + /*perform KLU but only the symbolic analysis (important) */ klu_free_symbolic(&Symbolic_, &Common_); klu_free_numeric(&Numeric_, &Common_); - delete [] mia_; - delete [] mja_; - } - - void RefactorizationSolver::enable_iterative_refinement() - { - ir_ = new IterativeRefinement(); - if(ir_ != nullptr) - iterative_refinement_enabled_ = true; - } - - // TODO: Refactor to only pass mat_A_csr_ to setup_system_matrix; n and nnz can be read from mat_A_csr_ - void RefactorizationSolver::setup_iterative_refinement_matrix(int n, int nnz) - { - ir_->setup_system_matrix(n, nnz, mat_A_csr_->get_irows(), mat_A_csr_->get_jcols(), mat_A_csr_->get_vals()); - } - - // TODO: Can this function be merged with setup_iterative_refinement_matrix ? - void RefactorizationSolver::configure_iterative_refinement(cusparseHandle_t cusparse_handle, - cublasHandle_t cublas_handle, - cusolverRfHandle_t cusolverrf_handle, - int n, - double* d_T, - int* d_P, - int* d_Q, - double* devx, - double* devr) - { - ir_->setup(cusparse_handle, cublas_handle, cusolverrf_handle, n, d_T, d_P, d_Q, devx, devr); - } - + Symbolic_ = klu_analyze(n_, row_ptr, col_idx, &Common_); - int RefactorizationSolver::setup_factorization() - { - int* row_ptr = mat_A_csr_->get_irows_host(); - int* col_idx = mat_A_csr_->get_jcols_host(); - - if(fact_ == "klu") { - /* initialize KLU setup parameters, dont factorize yet */ - initializeKLU(); - - /*perform KLU but only the symbolic analysis (important) */ - klu_free_symbolic(&Symbolic_, &Common_); - klu_free_numeric(&Numeric_, &Common_); - Symbolic_ = klu_analyze(n_, row_ptr, col_idx, &Common_); - - if(Symbolic_ == nullptr) { - return -1; - } - } else { // for future - assert(0 && "Only KLU is available for the first factorization.\n"); + if(Symbolic_ == nullptr) { + return -1; } - return 0; + } else { // for future + assert(0 && "Only KLU is available for the first factorization.\n"); } - - int RefactorizationSolver::factorize() - { - Numeric_ = klu_factor(mat_A_csr_->get_irows_host(), mat_A_csr_->get_jcols_host(), mat_A_csr_->get_vals_host(), Symbolic_, &Common_); - return (Numeric_ == nullptr) ? -1 : 0; + return 0; +} + +int RefactorizationSolver::factorize() +{ + Numeric_ = klu_factor(mat_A_csr_->get_irows_host(), + mat_A_csr_->get_jcols_host(), + mat_A_csr_->get_vals_host(), + Symbolic_, + &Common_); + return (Numeric_ == nullptr) ? -1 : 0; +} + +void RefactorizationSolver::setup_refactorization() +{ + if(refact_ == "glu") { + initializeCusolverGLU(); + refactorizationSetupCusolverGLU(); + } else if(refact_ == "rf") { + initializeCusolverRf(); + refactorizationSetupCusolverRf(); + if(use_ir_ == "yes") { + configure_iterative_refinement(handle_, handle_cublas_, handle_rf_, n_, d_T_, d_P_, d_Q_, devx_, devr_); + } + } else { // for future - + assert(0 && "Only glu and rf refactorizations available.\n"); } +} - void RefactorizationSolver::setup_refactorization() - { - if(refact_ == "glu") { - initializeCusolverGLU(); - refactorizationSetupCusolverGLU(); - } else if(refact_ == "rf") { - initializeCusolverRf(); - refactorizationSetupCusolverRf(); - if(use_ir_ == "yes") { - configure_iterative_refinement(handle_, handle_cublas_, handle_rf_, n_, d_T_, d_P_, d_Q_, devx_, devr_); - } - } else { // for future - - assert(0 && "Only glu and rf refactorizations available.\n"); +int RefactorizationSolver::refactorize() +{ + if(refact_ == "glu") { + sp_status_ = cusolverSpDgluReset(handle_cusolver_, + n_, + /* A is original matrix */ + nnz_, + descr_A_, + mat_A_csr_->get_vals(), + mat_A_csr_->get_irows(), + mat_A_csr_->get_jcols(), + info_M_); + sp_status_ = cusolverSpDgluFactor(handle_cusolver_, info_M_, d_work_); + } else { + if(refact_ == "rf") { + sp_status_ = cusolverRfResetValues(n_, + nnz_, + mat_A_csr_->get_irows(), + mat_A_csr_->get_jcols(), + mat_A_csr_->get_vals(), + d_P_, + d_Q_, + handle_rf_); + cudaDeviceSynchronize(); + sp_status_ = cusolverRfRefactor(handle_rf_); } } - - int RefactorizationSolver::refactorize() - { - if(refact_ == "glu") { - sp_status_ = cusolverSpDgluReset(handle_cusolver_, - n_, - /* A is original matrix */ - nnz_, - descr_A_, - mat_A_csr_->get_vals(), - mat_A_csr_->get_irows(), - mat_A_csr_->get_jcols(), - info_M_); - sp_status_ = cusolverSpDgluFactor(handle_cusolver_, info_M_, d_work_); + return 0; +} + +bool RefactorizationSolver::triangular_solve(double* dx, double tol, std::string memspace) +{ + if(refact_ == "glu") { + double* devx = nullptr; + if(memspace == "device") { + checkCudaErrors(cudaMemcpy(devr_, dx, sizeof(double) * n_, cudaMemcpyDeviceToDevice)); + devx = dx; } else { - if(refact_ == "rf") { - sp_status_ = cusolverRfResetValues(n_, - nnz_, - mat_A_csr_->get_irows(), - mat_A_csr_->get_jcols(), - mat_A_csr_->get_vals(), - d_P_, - d_Q_, - handle_rf_); - cudaDeviceSynchronize(); - sp_status_ = cusolverRfRefactor(handle_rf_); - } + checkCudaErrors(cudaMemcpy(devr_, dx, sizeof(double) * n_, cudaMemcpyHostToDevice)); + devx = devx_; } - return 0; + sp_status_ = cusolverSpDgluSolve(handle_cusolver_, + n_, + /* A is original matrix */ + nnz_, + descr_A_, + mat_A_csr_->get_vals(), + mat_A_csr_->get_irows(), + mat_A_csr_->get_jcols(), + devr_, /* right hand side */ + devx, /* left hand side, local pointer */ + &ite_refine_succ_, + &r_nrminf_, + info_M_, + d_work_); + if(sp_status_ != 0 && !silent_output_) { + std::cout << "GLU solve failed with status: " << sp_status_ << "\n"; + return false; + } + if(memspace == "device") { + // do nothing + } else { + checkCudaErrors(cudaMemcpy(dx, devx_, sizeof(double) * n_, cudaMemcpyDeviceToHost)); + } + return true; } - bool RefactorizationSolver::triangular_solve(double* dx, double tol, std::string memspace) - { - if(refact_ == "glu") - { - double* devx = nullptr; + if(refact_ == "rf") { + // First solve is performed on CPU + if(is_first_solve_) { + double* hostx = nullptr; if(memspace == "device") { - checkCudaErrors(cudaMemcpy(devr_, dx, sizeof(double) * n_, cudaMemcpyDeviceToDevice)); - devx = dx; + checkCudaErrors(cudaMemcpy(hostx_, dx, sizeof(double) * n_, cudaMemcpyDeviceToHost)); + hostx = hostx_; } else { - checkCudaErrors(cudaMemcpy(devr_, dx, sizeof(double) * n_, cudaMemcpyHostToDevice)); - devx = devx_; - } - sp_status_ = cusolverSpDgluSolve(handle_cusolver_, - n_, - /* A is original matrix */ - nnz_, - descr_A_, - mat_A_csr_->get_vals(), - mat_A_csr_->get_irows(), - mat_A_csr_->get_jcols(), - devr_,/* right hand side */ - devx,/* left hand side, local pointer */ - &ite_refine_succ_, - &r_nrminf_, - info_M_, - d_work_); - if(sp_status_ != 0 && !silent_output_) { - std::cout << "GLU solve failed with status: " << sp_status_ << "\n"; - return false; + hostx = dx; } + int ok = klu_solve(Symbolic_, Numeric_, n_, 1, hostx, &Common_); // replace dx with hostx + klu_free_numeric(&Numeric_, &Common_); + klu_free_symbolic(&Symbolic_, &Common_); + is_first_solve_ = false; if(memspace == "device") { - // do nothing + checkCudaErrors(cudaMemcpy(dx, hostx, sizeof(double) * n_, cudaMemcpyHostToDevice)); } else { - checkCudaErrors(cudaMemcpy(dx, devx_, sizeof(double) * n_, cudaMemcpyDeviceToHost)); + // do nothing } return true; - } - - if(refact_ == "rf") - { - // First solve is performed on CPU - if(is_first_solve_) - { - double* hostx = nullptr; - if(memspace == "device") { - checkCudaErrors(cudaMemcpy(hostx_, dx, sizeof(double) * n_, cudaMemcpyDeviceToHost)); - hostx = hostx_; - } else { - hostx = dx; - } - int ok = klu_solve(Symbolic_, Numeric_, n_, 1, hostx, &Common_); // replace dx with hostx - klu_free_numeric(&Numeric_, &Common_); - klu_free_symbolic(&Symbolic_, &Common_); - is_first_solve_ = false; - if(memspace == "device") { - checkCudaErrors(cudaMemcpy(dx, hostx, sizeof(double) * n_, cudaMemcpyHostToDevice)); - } else { - // do nothing - } - return true; - } + } - double* devx = nullptr; - if(memspace == "device") { - devx = dx; - checkCudaErrors(cudaMemcpy(devr_, dx, sizeof(double) * n_, cudaMemcpyDeviceToDevice)); - } else { - checkCudaErrors(cudaMemcpy(devx_, dx, sizeof(double) * n_, cudaMemcpyHostToDevice)); - checkCudaErrors(cudaMemcpy(devr_, devx_, sizeof(double) * n_, cudaMemcpyDeviceToDevice)); - devx = devx_; - } + double* devx = nullptr; + if(memspace == "device") { + devx = dx; + checkCudaErrors(cudaMemcpy(devr_, dx, sizeof(double) * n_, cudaMemcpyDeviceToDevice)); + } else { + checkCudaErrors(cudaMemcpy(devx_, dx, sizeof(double) * n_, cudaMemcpyHostToDevice)); + checkCudaErrors(cudaMemcpy(devr_, devx_, sizeof(double) * n_, cudaMemcpyDeviceToDevice)); + devx = devx_; + } - // Each next solve is performed on GPU - sp_status_ = cusolverRfSolve(handle_rf_, - d_P_, - d_Q_, - 1, - d_T_, - n_, - devx, // replace devx_ with local pointer devx - n_); - if(sp_status_ != 0) { - if(!silent_output_) - std::cout << "Rf solve failed with status: " << sp_status_ << "\n"; - return false; - } + // Each next solve is performed on GPU + sp_status_ = cusolverRfSolve(handle_rf_, + d_P_, + d_Q_, + 1, + d_T_, + n_, + devx, // replace devx_ with local pointer devx + n_); + if(sp_status_ != 0) { + if(!silent_output_) std::cout << "Rf solve failed with status: " << sp_status_ << "\n"; + return false; + } - if(use_ir_ == "yes") { - // Set tolerance based on barrier parameter mu - ir_->set_tol(tol); - - ir_->fgmres(devx, devr_); // replace devx_ with local pointer devx - if(!silent_output_ && (ir_->getFinalResidalNorm() > tol*ir_->getBNorm())) { - std::cout << "[Warning] Iterative refinement did not converge!\n"; - std::cout << "\t Iterative refinement tolerance " << tol << "\n"; - std::cout << "\t Relative solution error " << ir_->getFinalResidalNorm()/ir_->getBNorm() << "\n"; - std::cout << "\t fgmres: init residual norm: " << ir_->getInitialResidalNorm() << "\n" - << "\t final residual norm: " << ir_->getFinalResidalNorm() << "\n" - << "\t number of iterations: " << ir_->getFinalNumberOfIterations() << "\n"; - } - + if(use_ir_ == "yes") { + // Set tolerance based on barrier parameter mu + ir_->set_tol(tol); + + ir_->fgmres(devx, devr_); // replace devx_ with local pointer devx + if(!silent_output_ && (ir_->getFinalResidalNorm() > tol * ir_->getBNorm())) { + std::cout << "[Warning] Iterative refinement did not converge!\n"; + std::cout << "\t Iterative refinement tolerance " << tol << "\n"; + std::cout << "\t Relative solution error " << ir_->getFinalResidalNorm() / ir_->getBNorm() << "\n"; + std::cout << "\t fgmres: init residual norm: " << ir_->getInitialResidalNorm() << "\n" + << "\t final residual norm: " << ir_->getFinalResidalNorm() << "\n" + << "\t number of iterations: " << ir_->getFinalNumberOfIterations() << "\n"; } - if(memspace == "device") { - // do nothing - } else { - checkCudaErrors(cudaMemcpy(dx, devx_, sizeof(double) * n_, cudaMemcpyDeviceToHost)); - } - return true; } - - if(!silent_output_) { - std::cout << "Unknown refactorization " << refact_ << ", exiting\n"; + if(memspace == "device") { + // do nothing + } else { + checkCudaErrors(cudaMemcpy(dx, devx_, sizeof(double) * n_, cudaMemcpyDeviceToHost)); } - return false; + return true; } - // helper private function needed for format conversion - int RefactorizationSolver::createM(const int n, - const int /* nnzL */, - const int* Lp, - const int* Li, - const int /* nnzU */, - const int* Up, - const int* Ui) - { - int row; - for(int i = 0; i < n; ++i) { - // go through EACH COLUMN OF L first - for(int j = Lp[i]; j < Lp[i + 1]; ++j) { - row = Li[j]; - // BUT dont count diagonal twice, important - if(row != i) { - mia_[row + 1]++; - } - } - // then each column of U - for(int j = Up[i]; j < Up[i + 1]; ++j) { - row = Ui[j]; + if(!silent_output_) { + std::cout << "Unknown refactorization " << refact_ << ", exiting\n"; + } + return false; +} + +// helper private function needed for format conversion +int RefactorizationSolver::createM(const int n, + const int /* nnzL */, + const int* Lp, + const int* Li, + const int /* nnzU */, + const int* Up, + const int* Ui) +{ + int row; + for(int i = 0; i < n; ++i) { + // go through EACH COLUMN OF L first + for(int j = Lp[i]; j < Lp[i + 1]; ++j) { + row = Li[j]; + // BUT dont count diagonal twice, important + if(row != i) { mia_[row + 1]++; } } - // then organize mia_; - mia_[0] = 0; - for(int i = 1; i < n + 1; i++) { - mia_[i] += mia_[i - 1]; + // then each column of U + for(int j = Up[i]; j < Up[i + 1]; ++j) { + row = Ui[j]; + mia_[row + 1]++; } + } + // then organize mia_; + mia_[0] = 0; + for(int i = 1; i < n + 1; i++) { + mia_[i] += mia_[i - 1]; + } - std::vector Mshifts(n, 0); - for(int i = 0; i < n; ++i) { - // go through EACH COLUMN OF L first - for(int j = Lp[i]; j < Lp[i + 1]; ++j) { - row = Li[j]; - if(row != i) { - // place (row, i) where it belongs! - mja_[mia_[row] + Mshifts[row]] = i; - Mshifts[row]++; - } - } - // each column of U next - for(int j = Up[i]; j < Up[i + 1]; ++j) { - row = Ui[j]; + std::vector Mshifts(n, 0); + for(int i = 0; i < n; ++i) { + // go through EACH COLUMN OF L first + for(int j = Lp[i]; j < Lp[i + 1]; ++j) { + row = Li[j]; + if(row != i) { + // place (row, i) where it belongs! mja_[mia_[row] + Mshifts[row]] = i; Mshifts[row]++; } } - return 0; - } - - int RefactorizationSolver::initializeKLU() - { - klu_defaults(&Common_); - - // TODO: consider making this a part of setup options so that user can - // set up these values. For now, we keep them hard-wired. - Common_.btf = 0; - Common_.ordering = ordering_; // COLAMD=1; AMD=0 - Common_.tol = 0.1; - Common_.scale = -1; - Common_.halt_if_singular = 1; - - return 0; - } - - int RefactorizationSolver::initializeCusolverGLU() - { - cusparseCreateMatDescr(&descr_M_); - cusparseSetMatType(descr_M_, CUSPARSE_MATRIX_TYPE_GENERAL); - cusparseSetMatIndexBase(descr_M_, CUSPARSE_INDEX_BASE_ZERO); - - // info (data structure where factorization is stored) - // this is done in the constructor - however, this function might be called more than once - cusolverSpDestroyGluInfo(info_M_); - cusolverSpCreateGluInfo(&info_M_); - - cusolver_glu_enabled_ = true; - return 0; - } - - int RefactorizationSolver::initializeCusolverRf() - { - cusolverRfCreate(&handle_rf_); - - checkCudaErrors(cusolverRfSetAlgs(handle_rf_, - CUSOLVERRF_FACTORIZATION_ALG2, - CUSOLVERRF_TRIANGULAR_SOLVE_ALG2)); - - checkCudaErrors(cusolverRfSetMatrixFormat(handle_rf_, - CUSOLVERRF_MATRIX_FORMAT_CSR, - CUSOLVERRF_UNIT_DIAGONAL_STORED_L)); - - cusolverRfSetResetValuesFastMode(handle_rf_, - CUSOLVERRF_RESET_VALUES_FAST_MODE_ON); - - const double boost = 1e-12; - const double zero = 1e-14; - - cusolverRfSetNumericProperties(handle_rf_, zero, boost); - - cusolver_rf_enabled_ = true; - return 0; - } - - // call if both the matrix and the nnz structure changed or if convergence is - // poor while using refactorization. - int RefactorizationSolver::refactorizationSetupCusolverGLU() - { - // for now this ONLY WORKS if proceeded by KLU. Might be worth decoupling - // later - - // get sizes - const int nnzL = Numeric_->lnz; - const int nnzU = Numeric_->unz; - - const int nnzM = (nnzL + nnzU - n_); - - /* parse the factorization */ - - mia_ = new int[n_ + 1]{0}; - mja_ = new int[nnzM]{0}; - int* Lp = new int[n_ + 1]; - int* Li = new int[nnzL]; - // we can't use nullptr instead od Lx and Ux because it causes SEG FAULT. It - // seems like a waste of memory though. - - double* Lx = new double[nnzL]; - int* Up = new int[n_ + 1]; - int* Ui = new int[nnzU]; - - double* Ux = new double[nnzU]; - - int ok = klu_extract(Numeric_, - Symbolic_, - Lp, - Li, - Lx, - Up, - Ui, - Ux, - nullptr, - nullptr, - nullptr, - nullptr, - nullptr, - nullptr, - nullptr, - &Common_); - createM(n_, nnzL, Lp, Li, nnzU, Up, Ui); - - delete[] Lp; - delete[] Li; - delete[] Lx; - delete[] Up; - delete[] Ui; - delete[] Ux; - - /* setup GLU */ - sp_status_ = cusolverSpDgluSetup(handle_cusolver_, - n_, - nnz_, - descr_A_, - mat_A_csr_->get_irows_host(), //kRowPtr_, - mat_A_csr_->get_jcols_host(), //jCol_, - Numeric_->Pnum, /* base-0 */ - Symbolic_->Q, /* base-0 */ - nnzM, /* nnzM */ - descr_M_, - mia_, - mja_, - info_M_); - - sp_status_ = cusolverSpDgluBufferSize(handle_cusolver_, info_M_, &size_M_); - assert(CUSOLVER_STATUS_SUCCESS == sp_status_); - - buffer_size_ = size_M_; - checkCudaErrors(cudaMalloc((void**)&d_work_, buffer_size_)); - - sp_status_ = cusolverSpDgluAnalysis(handle_cusolver_, info_M_, d_work_); - assert(CUSOLVER_STATUS_SUCCESS == sp_status_); - - // reset and refactor so factors are ON THE GPU - - sp_status_ = cusolverSpDgluReset(handle_cusolver_, - n_, - /* A is original matrix */ - nnz_, - descr_A_, - mat_A_csr_->get_vals(), - mat_A_csr_->get_irows(), - mat_A_csr_->get_jcols(), - info_M_); - - assert(CUSOLVER_STATUS_SUCCESS == sp_status_); - sp_status_ = cusolverSpDgluFactor(handle_cusolver_, info_M_, d_work_); - return 0; - } - - int RefactorizationSolver::refactorizationSetupCusolverRf() - { - // for now this ONLY WORKS if preceeded by KLU. Might be worth decoupling - // later - const int nnzL = Numeric_->lnz; - const int nnzU = Numeric_->unz; - - checkCudaErrors(cudaMalloc(&d_P_, (n_) * sizeof(int))); - checkCudaErrors(cudaMalloc(&d_Q_, (n_) * sizeof(int))); - checkCudaErrors(cudaMalloc(&d_T_, (n_) * sizeof(double))); - - checkCudaErrors(cudaMemcpy(d_P_, Numeric_->Pnum, sizeof(int) * (n_), cudaMemcpyHostToDevice)); - checkCudaErrors(cudaMemcpy(d_Q_, Symbolic_->Q, sizeof(int) * (n_), cudaMemcpyHostToDevice)); - - int* Lp = new int[n_ + 1]; - int* Li = new int[nnzL]; - double* Lx = new double[nnzL]; - int* Up = new int[n_ + 1]; - int* Ui = new int[nnzU]; - double* Ux = new double[nnzU]; - - int ok = klu_extract(Numeric_, - Symbolic_, - Lp, - Li, - Lx, - Up, - Ui, - Ux, - nullptr, - nullptr, - nullptr, - nullptr, - nullptr, - nullptr, - nullptr, - &Common_); - - /* CSC */ - int* d_Lp; - int* d_Li; - int* d_Up; - int* d_Ui; - double* d_Lx; - double* d_Ux; - /* CSR */ - int* d_Lp_csr; - int* d_Li_csr; - int* d_Up_csr; - int* d_Ui_csr; - double* d_Lx_csr; - double* d_Ux_csr; - - /* allocate CSC */ - checkCudaErrors(cudaMalloc(&d_Lp, (n_ + 1) * sizeof(int))); - checkCudaErrors(cudaMalloc(&d_Li, nnzL * sizeof(int))); - checkCudaErrors(cudaMalloc(&d_Lx, nnzL * sizeof(double))); - checkCudaErrors(cudaMalloc(&d_Up, (n_ + 1) * sizeof(int))); - checkCudaErrors(cudaMalloc(&d_Ui, nnzU * sizeof(int))); - checkCudaErrors(cudaMalloc(&d_Ux, nnzU * sizeof(double))); - - /* allocate CSR */ - checkCudaErrors(cudaMalloc(&d_Lp_csr, (n_ + 1) * sizeof(int))); - checkCudaErrors(cudaMalloc(&d_Li_csr, nnzL * sizeof(int))); - checkCudaErrors(cudaMalloc(&d_Lx_csr, nnzL * sizeof(double))); - checkCudaErrors(cudaMalloc(&d_Up_csr, (n_ + 1) * sizeof(int))); - checkCudaErrors(cudaMalloc(&d_Ui_csr, nnzU * sizeof(int))); - checkCudaErrors(cudaMalloc(&d_Ux_csr, nnzU * sizeof(double))); - - /* copy CSC to the GPU */ - checkCudaErrors(cudaMemcpy(d_Lp, Lp, sizeof(int) * (n_ + 1), cudaMemcpyHostToDevice)); - checkCudaErrors(cudaMemcpy(d_Li, Li, sizeof(int) * (nnzL), cudaMemcpyHostToDevice)); - checkCudaErrors(cudaMemcpy(d_Lx, Lx, sizeof(double) * (nnzL), cudaMemcpyHostToDevice)); - - checkCudaErrors(cudaMemcpy(d_Up, Up, sizeof(int) * (n_ + 1), cudaMemcpyHostToDevice)); - checkCudaErrors(cudaMemcpy(d_Ui, Ui, sizeof(int) * (nnzU), cudaMemcpyHostToDevice)); - checkCudaErrors(cudaMemcpy(d_Ux, Ux, sizeof(double) * (nnzU), cudaMemcpyHostToDevice)); - - /* we dont need these any more */ - delete[] Lp; - delete[] Li; - delete[] Lx; - delete[] Up; - delete[] Ui; - delete[] Ux; - - /* now CSC to CSR using the new cuda 11 awkward way */ - size_t bufferSizeL; - size_t bufferSizeU; - - cusparseStatus_t csp = cusparseCsr2cscEx2_bufferSize(handle_, - n_, - n_, - nnzL, - d_Lx, - d_Lp, - d_Li, - d_Lx_csr, - d_Lp_csr, - d_Li_csr, - CUDA_R_64F, - CUSPARSE_ACTION_NUMERIC, - CUSPARSE_INDEX_BASE_ZERO, - CUSPARSE_CSR2CSC_ALG1, - &bufferSizeL); - - csp = cusparseCsr2cscEx2_bufferSize(handle_, - n_, - n_, - nnzU, - d_Ux, - d_Up, - d_Ui, - d_Ux_csr, - d_Up_csr, - d_Ui_csr, - CUDA_R_64F, - CUSPARSE_ACTION_NUMERIC, - CUSPARSE_INDEX_BASE_ZERO, - CUSPARSE_CSR2CSC_ALG1, - &bufferSizeU); - /* allocate buffers */ - - double* d_workL; - double* d_workU; - checkCudaErrors(cudaMalloc((void**)&d_workL, bufferSizeL)); - checkCudaErrors(cudaMalloc((void**)&d_workU, bufferSizeU)); - - /* actual CSC to CSR */ - - csp = cusparseCsr2cscEx2(handle_, - n_, - n_, - nnzL, - d_Lx, - d_Lp, - d_Li, - d_Lx_csr, - d_Lp_csr, - d_Li_csr, - CUDA_R_64F, - CUSPARSE_ACTION_NUMERIC, - CUSPARSE_INDEX_BASE_ZERO, - CUSPARSE_CSR2CSC_ALG1, - d_workL); - - csp = cusparseCsr2cscEx2(handle_, - n_, - n_, - nnzU, - d_Ux, - d_Up, - d_Ui, - d_Ux_csr, - d_Up_csr, - d_Ui_csr, - CUDA_R_64F, - CUSPARSE_ACTION_NUMERIC, - CUSPARSE_INDEX_BASE_ZERO, - CUSPARSE_CSR2CSC_ALG1, - d_workU); - - (void)csp; // mute unused variable warnings - - /* CSC no longer needed, nor the work arrays! */ - - cudaFree(d_Lp); - cudaFree(d_Li); - cudaFree(d_Lx); - - cudaFree(d_Up); - cudaFree(d_Ui); - cudaFree(d_Ux); - - cudaFree(d_workU); - cudaFree(d_workL); - - /* actual setup */ - - sp_status_ = cusolverRfSetupDevice(n_, - nnz_, - mat_A_csr_->get_irows(), //dia_, - mat_A_csr_->get_jcols(), //dja_, - mat_A_csr_->get_vals(), //da_, - nnzL, - d_Lp_csr, - d_Li_csr, - d_Lx_csr, - nnzU, - d_Up_csr, - d_Ui_csr, - d_Ux_csr, - d_P_, - d_Q_, - handle_rf_); - cudaDeviceSynchronize(); - sp_status_ = cusolverRfAnalyze(handle_rf_); - - //clean up - cudaFree(d_Lp_csr); - cudaFree(d_Li_csr); - cudaFree(d_Lx_csr); - - cudaFree(d_Up_csr); - cudaFree(d_Ui_csr); - cudaFree(d_Ux_csr); - - return 0; - } - - - - // Error checking utility for CUDA - // KS: might later become part of src/Utils, putting it here for now - template - void RefactorizationSolver::resolveCheckCudaError(T result, - const char* const file, - int const line) - { - if(result) { - fprintf(stdout, - "CUDA error at %s:%d, error# %d\n", - file, - line, - result); - assert(false); + // each column of U next + for(int j = Up[i]; j < Up[i + 1]; ++j) { + row = Ui[j]; + mja_[mia_[row] + Mshifts[row]] = i; + Mshifts[row]++; } } + return 0; +} + +int RefactorizationSolver::initializeKLU() +{ + klu_defaults(&Common_); + + // TODO: consider making this a part of setup options so that user can + // set up these values. For now, we keep them hard-wired. + Common_.btf = 0; + Common_.ordering = ordering_; // COLAMD=1; AMD=0 + Common_.tol = 0.1; + Common_.scale = -1; + Common_.halt_if_singular = 1; + + return 0; +} + +int RefactorizationSolver::initializeCusolverGLU() +{ + cusparseCreateMatDescr(&descr_M_); + cusparseSetMatType(descr_M_, CUSPARSE_MATRIX_TYPE_GENERAL); + cusparseSetMatIndexBase(descr_M_, CUSPARSE_INDEX_BASE_ZERO); + + // info (data structure where factorization is stored) + // this is done in the constructor - however, this function might be called more than once + cusolverSpDestroyGluInfo(info_M_); + cusolverSpCreateGluInfo(&info_M_); + + cusolver_glu_enabled_ = true; + return 0; +} + +int RefactorizationSolver::initializeCusolverRf() +{ + cusolverRfCreate(&handle_rf_); + + checkCudaErrors(cusolverRfSetAlgs(handle_rf_, CUSOLVERRF_FACTORIZATION_ALG2, CUSOLVERRF_TRIANGULAR_SOLVE_ALG2)); + + checkCudaErrors(cusolverRfSetMatrixFormat(handle_rf_, CUSOLVERRF_MATRIX_FORMAT_CSR, CUSOLVERRF_UNIT_DIAGONAL_STORED_L)); + + cusolverRfSetResetValuesFastMode(handle_rf_, CUSOLVERRF_RESET_VALUES_FAST_MODE_ON); + + const double boost = 1e-12; + const double zero = 1e-14; + + cusolverRfSetNumericProperties(handle_rf_, zero, boost); + + cusolver_rf_enabled_ = true; + return 0; +} + +// call if both the matrix and the nnz structure changed or if convergence is +// poor while using refactorization. +int RefactorizationSolver::refactorizationSetupCusolverGLU() +{ + // for now this ONLY WORKS if proceeded by KLU. Might be worth decoupling + // later + + // get sizes + const int nnzL = Numeric_->lnz; + const int nnzU = Numeric_->unz; + + const int nnzM = (nnzL + nnzU - n_); + + /* parse the factorization */ + + mia_ = new int[n_ + 1]{0}; + mja_ = new int[nnzM]{0}; + int* Lp = new int[n_ + 1]; + int* Li = new int[nnzL]; + // we can't use nullptr instead od Lx and Ux because it causes SEG FAULT. It + // seems like a waste of memory though. + + double* Lx = new double[nnzL]; + int* Up = new int[n_ + 1]; + int* Ui = new int[nnzU]; + + double* Ux = new double[nnzU]; + + int ok = klu_extract(Numeric_, + Symbolic_, + Lp, + Li, + Lx, + Up, + Ui, + Ux, + nullptr, + nullptr, + nullptr, + nullptr, + nullptr, + nullptr, + nullptr, + &Common_); + createM(n_, nnzL, Lp, Li, nnzU, Up, Ui); + + delete[] Lp; + delete[] Li; + delete[] Lx; + delete[] Up; + delete[] Ui; + delete[] Ux; + + /* setup GLU */ + sp_status_ = cusolverSpDgluSetup(handle_cusolver_, + n_, + nnz_, + descr_A_, + mat_A_csr_->get_irows_host(), // kRowPtr_, + mat_A_csr_->get_jcols_host(), // jCol_, + Numeric_->Pnum, /* base-0 */ + Symbolic_->Q, /* base-0 */ + nnzM, /* nnzM */ + descr_M_, + mia_, + mja_, + info_M_); + + sp_status_ = cusolverSpDgluBufferSize(handle_cusolver_, info_M_, &size_M_); + assert(CUSOLVER_STATUS_SUCCESS == sp_status_); + + buffer_size_ = size_M_; + checkCudaErrors(cudaMalloc((void**)&d_work_, buffer_size_)); + + sp_status_ = cusolverSpDgluAnalysis(handle_cusolver_, info_M_, d_work_); + assert(CUSOLVER_STATUS_SUCCESS == sp_status_); + + // reset and refactor so factors are ON THE GPU + + sp_status_ = cusolverSpDgluReset(handle_cusolver_, + n_, + /* A is original matrix */ + nnz_, + descr_A_, + mat_A_csr_->get_vals(), + mat_A_csr_->get_irows(), + mat_A_csr_->get_jcols(), + info_M_); + + assert(CUSOLVER_STATUS_SUCCESS == sp_status_); + sp_status_ = cusolverSpDgluFactor(handle_cusolver_, info_M_, d_work_); + return 0; +} + +int RefactorizationSolver::refactorizationSetupCusolverRf() +{ + // for now this ONLY WORKS if preceeded by KLU. Might be worth decoupling + // later + const int nnzL = Numeric_->lnz; + const int nnzU = Numeric_->unz; + + checkCudaErrors(cudaMalloc(&d_P_, (n_) * sizeof(int))); + checkCudaErrors(cudaMalloc(&d_Q_, (n_) * sizeof(int))); + checkCudaErrors(cudaMalloc(&d_T_, (n_) * sizeof(double))); + + checkCudaErrors(cudaMemcpy(d_P_, Numeric_->Pnum, sizeof(int) * (n_), cudaMemcpyHostToDevice)); + checkCudaErrors(cudaMemcpy(d_Q_, Symbolic_->Q, sizeof(int) * (n_), cudaMemcpyHostToDevice)); + + int* Lp = new int[n_ + 1]; + int* Li = new int[nnzL]; + double* Lx = new double[nnzL]; + int* Up = new int[n_ + 1]; + int* Ui = new int[nnzU]; + double* Ux = new double[nnzU]; + + int ok = klu_extract(Numeric_, + Symbolic_, + Lp, + Li, + Lx, + Up, + Ui, + Ux, + nullptr, + nullptr, + nullptr, + nullptr, + nullptr, + nullptr, + nullptr, + &Common_); + + /* CSC */ + int* d_Lp; + int* d_Li; + int* d_Up; + int* d_Ui; + double* d_Lx; + double* d_Ux; + /* CSR */ + int* d_Lp_csr; + int* d_Li_csr; + int* d_Up_csr; + int* d_Ui_csr; + double* d_Lx_csr; + double* d_Ux_csr; + + /* allocate CSC */ + checkCudaErrors(cudaMalloc(&d_Lp, (n_ + 1) * sizeof(int))); + checkCudaErrors(cudaMalloc(&d_Li, nnzL * sizeof(int))); + checkCudaErrors(cudaMalloc(&d_Lx, nnzL * sizeof(double))); + checkCudaErrors(cudaMalloc(&d_Up, (n_ + 1) * sizeof(int))); + checkCudaErrors(cudaMalloc(&d_Ui, nnzU * sizeof(int))); + checkCudaErrors(cudaMalloc(&d_Ux, nnzU * sizeof(double))); + + /* allocate CSR */ + checkCudaErrors(cudaMalloc(&d_Lp_csr, (n_ + 1) * sizeof(int))); + checkCudaErrors(cudaMalloc(&d_Li_csr, nnzL * sizeof(int))); + checkCudaErrors(cudaMalloc(&d_Lx_csr, nnzL * sizeof(double))); + checkCudaErrors(cudaMalloc(&d_Up_csr, (n_ + 1) * sizeof(int))); + checkCudaErrors(cudaMalloc(&d_Ui_csr, nnzU * sizeof(int))); + checkCudaErrors(cudaMalloc(&d_Ux_csr, nnzU * sizeof(double))); + + /* copy CSC to the GPU */ + checkCudaErrors(cudaMemcpy(d_Lp, Lp, sizeof(int) * (n_ + 1), cudaMemcpyHostToDevice)); + checkCudaErrors(cudaMemcpy(d_Li, Li, sizeof(int) * (nnzL), cudaMemcpyHostToDevice)); + checkCudaErrors(cudaMemcpy(d_Lx, Lx, sizeof(double) * (nnzL), cudaMemcpyHostToDevice)); + + checkCudaErrors(cudaMemcpy(d_Up, Up, sizeof(int) * (n_ + 1), cudaMemcpyHostToDevice)); + checkCudaErrors(cudaMemcpy(d_Ui, Ui, sizeof(int) * (nnzU), cudaMemcpyHostToDevice)); + checkCudaErrors(cudaMemcpy(d_Ux, Ux, sizeof(double) * (nnzU), cudaMemcpyHostToDevice)); + + /* we dont need these any more */ + delete[] Lp; + delete[] Li; + delete[] Lx; + delete[] Up; + delete[] Ui; + delete[] Ux; + + /* now CSC to CSR using the new cuda 11 awkward way */ + size_t bufferSizeL; + size_t bufferSizeU; + + cusparseStatus_t csp = cusparseCsr2cscEx2_bufferSize(handle_, + n_, + n_, + nnzL, + d_Lx, + d_Lp, + d_Li, + d_Lx_csr, + d_Lp_csr, + d_Li_csr, + CUDA_R_64F, + CUSPARSE_ACTION_NUMERIC, + CUSPARSE_INDEX_BASE_ZERO, + CUSPARSE_CSR2CSC_ALG1, + &bufferSizeL); + + csp = cusparseCsr2cscEx2_bufferSize(handle_, + n_, + n_, + nnzU, + d_Ux, + d_Up, + d_Ui, + d_Ux_csr, + d_Up_csr, + d_Ui_csr, + CUDA_R_64F, + CUSPARSE_ACTION_NUMERIC, + CUSPARSE_INDEX_BASE_ZERO, + CUSPARSE_CSR2CSC_ALG1, + &bufferSizeU); + /* allocate buffers */ + + double* d_workL; + double* d_workU; + checkCudaErrors(cudaMalloc((void**)&d_workL, bufferSizeL)); + checkCudaErrors(cudaMalloc((void**)&d_workU, bufferSizeU)); + + /* actual CSC to CSR */ + + csp = cusparseCsr2cscEx2(handle_, + n_, + n_, + nnzL, + d_Lx, + d_Lp, + d_Li, + d_Lx_csr, + d_Lp_csr, + d_Li_csr, + CUDA_R_64F, + CUSPARSE_ACTION_NUMERIC, + CUSPARSE_INDEX_BASE_ZERO, + CUSPARSE_CSR2CSC_ALG1, + d_workL); + + csp = cusparseCsr2cscEx2(handle_, + n_, + n_, + nnzU, + d_Ux, + d_Up, + d_Ui, + d_Ux_csr, + d_Up_csr, + d_Ui_csr, + CUDA_R_64F, + CUSPARSE_ACTION_NUMERIC, + CUSPARSE_INDEX_BASE_ZERO, + CUSPARSE_CSR2CSC_ALG1, + d_workU); + + (void)csp; // mute unused variable warnings + + /* CSC no longer needed, nor the work arrays! */ + + cudaFree(d_Lp); + cudaFree(d_Li); + cudaFree(d_Lx); + + cudaFree(d_Up); + cudaFree(d_Ui); + cudaFree(d_Ux); + + cudaFree(d_workU); + cudaFree(d_workL); + + /* actual setup */ + + sp_status_ = cusolverRfSetupDevice(n_, + nnz_, + mat_A_csr_->get_irows(), // dia_, + mat_A_csr_->get_jcols(), // dja_, + mat_A_csr_->get_vals(), // da_, + nnzL, + d_Lp_csr, + d_Li_csr, + d_Lx_csr, + nnzU, + d_Up_csr, + d_Ui_csr, + d_Ux_csr, + d_P_, + d_Q_, + handle_rf_); + cudaDeviceSynchronize(); + sp_status_ = cusolverRfAnalyze(handle_rf_); + + // clean up + cudaFree(d_Lp_csr); + cudaFree(d_Li_csr); + cudaFree(d_Lx_csr); + + cudaFree(d_Up_csr); + cudaFree(d_Ui_csr); + cudaFree(d_Ux_csr); + + return 0; +} + +// Error checking utility for CUDA +// KS: might later become part of src/Utils, putting it here for now +template +void RefactorizationSolver::resolveCheckCudaError(T result, const char* const file, int const line) +{ + if(result) { + fprintf(stdout, "CUDA error at %s:%d, error# %d\n", file, line, result); + assert(false); + } +} -} // namespace ReSolve +} // namespace ReSolve diff --git a/src/LinAlg/ReSolve/RefactorizationSolver.hpp b/src/LinAlg/ReSolve/RefactorizationSolver.hpp index f88736a21..7cd2b7e14 100644 --- a/src/LinAlg/ReSolve/RefactorizationSolver.hpp +++ b/src/LinAlg/ReSolve/RefactorizationSolver.hpp @@ -60,16 +60,15 @@ #include "resolve_cusolver_defs.hpp" #include +namespace ReSolve +{ -namespace ReSolve { - - class MatrixCsr; - class IterativeRefinement; - +class MatrixCsr; +class IterativeRefinement; /** * @brief Implements refactorization solvers using KLU and cuSOLVER libraries - * + * */ class RefactorizationSolver { @@ -81,8 +80,8 @@ class RefactorizationSolver void enable_iterative_refinement(); void setup_iterative_refinement_matrix(int n, int nnz); - void configure_iterative_refinement(cusparseHandle_t cusparse_handle, - cublasHandle_t cublas_handle, + void configure_iterative_refinement(cusparseHandle_t cusparse_handle, + cublasHandle_t cublas_handle, cusolverRfHandle_t cusolverrf_handle, int n, double* d_T, @@ -93,109 +92,81 @@ class RefactorizationSolver /** * @brief Set the number of nonzeros in system matrix. - * - * @param nnz + * + * @param nnz */ - void set_nnz(int nnz) - { - nnz_ = nnz; - } - - IterativeRefinement* ir() - { - return ir_; - } - - MatrixCsr* mat_A_csr() - { - return mat_A_csr_; - } - - double* devr() - { - return devr_; - } - - int& ordering() - { - return ordering_; - } - - std::string& fact() - { - return fact_; - } - - std::string& refact() - { - return refact_; - } - - std::string& use_ir() - { - return use_ir_; - } - - void set_silent_output(bool silent_output) - { - silent_output_ = silent_output; - } - + void set_nnz(int nnz) { nnz_ = nnz; } + + IterativeRefinement* ir() { return ir_; } + + MatrixCsr* mat_A_csr() { return mat_A_csr_; } + + double* devr() { return devr_; } + + int& ordering() { return ordering_; } + + std::string& fact() { return fact_; } + + std::string& refact() { return refact_; } + + std::string& use_ir() { return use_ir_; } + + void set_silent_output(bool silent_output) { silent_output_ = silent_output; } + /** * @brief Set up factorization of the first linear system. - * - * @return int + * + * @return int */ int setup_factorization(); /** * @brief Factorize system matrix - * + * * @return int - factorization status: success=0, failure=-1 */ int factorize(); /** * @brief Set the up the refactorization - * + * */ void setup_refactorization(); /** * @brief Refactorize system matrix - * - * @return int + * + * @return int */ int refactorize(); /** * @brief Invokes triangular solver given matrix factors - * - * @param dx - * @param tol - * @return bool + * + * @param dx + * @param tol + * @return bool */ bool triangular_solve(double* dx, double tol, std::string memspace); - private: - int n_{ 0 }; ///< Size of the linear system - int nnz_{ 0 }; ///< Number of nonzeros in the system's matrix + int n_{0}; ///< Size of the linear system + int nnz_{0}; ///< Number of nonzeros in the system's matrix - MatrixCsr* mat_A_csr_{ nullptr }; ///< System matrix in nonsymmetric CSR format - IterativeRefinement* ir_{ nullptr }; ///< Iterative refinement class + MatrixCsr* mat_A_csr_{nullptr}; ///< System matrix in nonsymmetric CSR format + IterativeRefinement* ir_{nullptr}; ///< Iterative refinement class - bool cusolver_glu_enabled_{ false }; ///< cusolverGLU on/off flag - bool cusolver_rf_enabled_{ false }; ///< cusolverRf on/off flag - bool iterative_refinement_enabled_{ false }; ///< Iterative refinement on/off flag - bool is_first_solve_{ true }; ///< If it is first call to triangular solver + bool cusolver_glu_enabled_{false}; ///< cusolverGLU on/off flag + bool cusolver_rf_enabled_{false}; ///< cusolverRf on/off flag + bool iterative_refinement_enabled_{false}; ///< Iterative refinement on/off flag + bool is_first_solve_{true}; ///< If it is first call to triangular solver // Options - int ordering_{ -1 }; + int ordering_{-1}; std::string fact_; std::string refact_; std::string use_ir_; - bool silent_output_{ true }; + bool silent_output_{true}; /** needed for cuSolver **/ @@ -234,28 +205,22 @@ class RefactorizationSolver /* needed for cuSolverRf */ int* d_P_ = nullptr; - int* d_Q_ = nullptr; // permutation matrices + int* d_Q_ = nullptr; // permutation matrices double* d_T_ = nullptr; /** * @brief Function that computes M = (L-I) + U - * - * @param n - * @param nnzL - * @param Lp - * @param Li - * @param nnzU - * @param Up - * @param Ui - * @return int + * + * @param n + * @param nnzL + * @param Lp + * @param Li + * @param nnzU + * @param Up + * @param Ui + * @return int */ - int createM(const int n, - const int nnzL, - const int* Lp, - const int* Li, - const int nnzU, - const int* Up, - const int* Ui); + int createM(const int n, const int nnzL, const int* Lp, const int* Li, const int nnzU, const int* Up, const int* Ui); int initializeKLU(); int initializeCusolverGLU(); @@ -264,18 +229,16 @@ class RefactorizationSolver int refactorizationSetupCusolverGLU(); int refactorizationSetupCusolverRf(); - /** * @brief Check for CUDA errors. - * + * * @tparam T - type of the result * @param result - result value * @param file - file name where the error occured * @param line - line at which the error occured */ - template + template void resolveCheckCudaError(T result, const char* const file, int const line); - }; -} // namespace ReSolve +} // namespace ReSolve diff --git a/src/LinAlg/ReSolve/resolve_cusolver_defs.hpp b/src/LinAlg/ReSolve/resolve_cusolver_defs.hpp index a9fa63fb1..ce3b1fa46 100644 --- a/src/LinAlg/ReSolve/resolve_cusolver_defs.hpp +++ b/src/LinAlg/ReSolve/resolve_cusolver_defs.hpp @@ -46,12 +46,11 @@ * @file hiop_cusolver_defs.hpp * * @author Kasia Swirydowicz , PNNL - * + * * Contains prototypes of cuSOLVER functions not in public API. * */ - #ifndef CUSOLVERDEFS_H #define CUSOLVERDEFS_H @@ -65,85 +64,69 @@ #include "cusolverRf.h" extern "C" { - /* - * prototype not in public header file - */ - struct csrgluInfo; - typedef struct csrgluInfo *csrgluInfo_t; - - cusolverStatus_t CUSOLVERAPI - cusolverSpCreateGluInfo(csrgluInfo_t *info); - - cusolverStatus_t CUSOLVERAPI - cusolverSpDestroyGluInfo(csrgluInfo_t info); - - cusolverStatus_t CUSOLVERAPI - cusolverSpDgluSetup(cusolverSpHandle_t handle, - int m, - /* A can be base-0 or base-1 */ - int nnzA, - const cusparseMatDescr_t descrA, - const int* h_csrRowPtrA, - const int* h_csrColIndA, - const int* h_P, /* base-0 */ - const int* h_Q, /* base-0 */ - /* M can be base-0 or base-1 */ - int nnzM, - const cusparseMatDescr_t descrM, - const int* h_csrRowPtrM, - const int* h_csrColIndM, - csrgluInfo_t info); - - cusolverStatus_t CUSOLVERAPI - cusolverSpDgluBufferSize(cusolverSpHandle_t handle, - csrgluInfo_t info, - size_t* pBufferSize); - - cusolverStatus_t CUSOLVERAPI - cusolverSpDgluAnalysis(cusolverSpHandle_t handle, - csrgluInfo_t info, - void* workspace); - - cusolverStatus_t CUSOLVERAPI - cusolverSpDgluReset(cusolverSpHandle_t handle, - int m, - /* A is original matrix */ - int nnzA, - const cusparseMatDescr_t descr_A, - const double* d_csrValA, - const int* d_csrRowPtrA, - const int* d_csrColIndA, - csrgluInfo_t info); - - cusolverStatus_t CUSOLVERAPI - cusolverSpDgluFactor(cusolverSpHandle_t handle, - csrgluInfo_t info, - void *workspace); - - cusolverStatus_t CUSOLVERAPI - cusolverSpDgluSolve(cusolverSpHandle_t handle, - int m, - /* A is original matrix */ - int nnzA, - const cusparseMatDescr_t descr_A, - const double *d_csrValA, - const int* d_csrRowPtrA, - const int* d_csrColIndA, - const double* d_b0, /* right hand side */ - double* d_x, /* left hand side */ - int* ite_refine_succ, - double* r_nrminf_ptr, - csrgluInfo_t info, - void* workspace); - - cusolverStatus_t CUSOLVERAPI - cusolverSpDnrminf(cusolverSpHandle_t handle, - int n, - const double *x, - double* result, /* |x|_inf, host */ - void* d_work); /* at least 8192 bytes */ - -} // extern "C" - - -#endif // CUSOLVERDEFS_H +/* + * prototype not in public header file + */ +struct csrgluInfo; +typedef struct csrgluInfo* csrgluInfo_t; + +cusolverStatus_t CUSOLVERAPI cusolverSpCreateGluInfo(csrgluInfo_t* info); + +cusolverStatus_t CUSOLVERAPI cusolverSpDestroyGluInfo(csrgluInfo_t info); + +cusolverStatus_t CUSOLVERAPI cusolverSpDgluSetup(cusolverSpHandle_t handle, + int m, + /* A can be base-0 or base-1 */ + int nnzA, + const cusparseMatDescr_t descrA, + const int* h_csrRowPtrA, + const int* h_csrColIndA, + const int* h_P, /* base-0 */ + const int* h_Q, /* base-0 */ + /* M can be base-0 or base-1 */ + int nnzM, + const cusparseMatDescr_t descrM, + const int* h_csrRowPtrM, + const int* h_csrColIndM, + csrgluInfo_t info); + +cusolverStatus_t CUSOLVERAPI cusolverSpDgluBufferSize(cusolverSpHandle_t handle, csrgluInfo_t info, size_t* pBufferSize); + +cusolverStatus_t CUSOLVERAPI cusolverSpDgluAnalysis(cusolverSpHandle_t handle, csrgluInfo_t info, void* workspace); + +cusolverStatus_t CUSOLVERAPI cusolverSpDgluReset(cusolverSpHandle_t handle, + int m, + /* A is original matrix */ + int nnzA, + const cusparseMatDescr_t descr_A, + const double* d_csrValA, + const int* d_csrRowPtrA, + const int* d_csrColIndA, + csrgluInfo_t info); + +cusolverStatus_t CUSOLVERAPI cusolverSpDgluFactor(cusolverSpHandle_t handle, csrgluInfo_t info, void* workspace); + +cusolverStatus_t CUSOLVERAPI cusolverSpDgluSolve(cusolverSpHandle_t handle, + int m, + /* A is original matrix */ + int nnzA, + const cusparseMatDescr_t descr_A, + const double* d_csrValA, + const int* d_csrRowPtrA, + const int* d_csrColIndA, + const double* d_b0, /* right hand side */ + double* d_x, /* left hand side */ + int* ite_refine_succ, + double* r_nrminf_ptr, + csrgluInfo_t info, + void* workspace); + +cusolverStatus_t CUSOLVERAPI cusolverSpDnrminf(cusolverSpHandle_t handle, + int n, + const double* x, + double* result, /* |x|_inf, host */ + void* d_work); /* at least 8192 bytes */ + +} // extern "C" + +#endif // CUSOLVERDEFS_H diff --git a/src/LinAlg/VectorCudaKernels.hpp b/src/LinAlg/VectorCudaKernels.hpp index 90fe1f355..1a7e478b1 100644 --- a/src/LinAlg/VectorCudaKernels.hpp +++ b/src/LinAlg/VectorCudaKernels.hpp @@ -2,47 +2,47 @@ // Produced at the Lawrence Livermore National Laboratory (LLNL). // LLNL-CODE-742473. All rights reserved. // -// This file is part of HiOp. For details, see https://github.com/LLNL/hiop. HiOp -// is released under the BSD 3-clause license (https://opensource.org/licenses/BSD-3-Clause). +// This file is part of HiOp. For details, see https://github.com/LLNL/hiop. HiOp +// is released under the BSD 3-clause license (https://opensource.org/licenses/BSD-3-Clause). // Please also read "Additional BSD Notice" below. // -// Redistribution and use in source and binary forms, with or without modification, +// Redistribution and use in source and binary forms, with or without modification, // are permitted provided that the following conditions are met: -// i. Redistributions of source code must retain the above copyright notice, this list +// i. Redistributions of source code must retain the above copyright notice, this list // of conditions and the disclaimer below. -// ii. Redistributions in binary form must reproduce the above copyright notice, -// this list of conditions and the disclaimer (as noted below) in the documentation and/or +// ii. Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the disclaimer (as noted below) in the documentation and/or // other materials provided with the distribution. -// iii. Neither the name of the LLNS/LLNL nor the names of its contributors may be used to -// endorse or promote products derived from this software without specific prior written +// iii. Neither the name of the LLNS/LLNL nor the names of its contributors may be used to +// endorse or promote products derived from this software without specific prior written // permission. // -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES -// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT -// SHALL LAWRENCE LIVERMORE NATIONAL SECURITY, LLC, THE U.S. DEPARTMENT OF ENERGY OR -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS -// OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED -// AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT +// SHALL LAWRENCE LIVERMORE NATIONAL SECURITY, LLC, THE U.S. DEPARTMENT OF ENERGY OR +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +// OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED +// AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, // EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Additional BSD Notice -// 1. This notice is required to be provided under our contract with the U.S. Department -// of Energy (DOE). This work was produced at Lawrence Livermore National Laboratory under +// 1. This notice is required to be provided under our contract with the U.S. Department +// of Energy (DOE). This work was produced at Lawrence Livermore National Laboratory under // Contract No. DE-AC52-07NA27344 with the DOE. -// 2. Neither the United States Government nor Lawrence Livermore National Security, LLC -// nor any of their employees, makes any warranty, express or implied, or assumes any -// liability or responsibility for the accuracy, completeness, or usefulness of any +// 2. Neither the United States Government nor Lawrence Livermore National Security, LLC +// nor any of their employees, makes any warranty, express or implied, or assumes any +// liability or responsibility for the accuracy, completeness, or usefulness of any // information, apparatus, product, or process disclosed, or represents that its use would // not infringe privately-owned rights. -// 3. Also, reference herein to any specific commercial products, process, or services by -// trade name, trademark, manufacturer or otherwise does not necessarily constitute or -// imply its endorsement, recommendation, or favoring by the United States Government or -// Lawrence Livermore National Security, LLC. The views and opinions of authors expressed -// herein do not necessarily state or reflect those of the United States Government or -// Lawrence Livermore National Security, LLC, and shall not be used for advertising or +// 3. Also, reference herein to any specific commercial products, process, or services by +// trade name, trademark, manufacturer or otherwise does not necessarily constitute or +// imply its endorsement, recommendation, or favoring by the United States Government or +// Lawrence Livermore National Security, LLC. The views and opinions of authors expressed +// herein do not necessarily state or reflect those of the United States Government or +// Lawrence Livermore National Security, LLC, and shall not be used for advertising or // product endorsement purposes. /** @@ -57,66 +57,37 @@ #include #include "hiopInterface.hpp" - namespace hiop { namespace cuda { -/// @brief Copy from src the elements specified by the indices in id. -void copy_from_index_kernel(int n_local, - double* yd, - const double* src, - const int* id); +/// @brief Copy from src the elements specified by the indices in id. +void copy_from_index_kernel(int n_local, double* yd, const double* src, const int* id); /** @brief Set y[i] = min(y[i],c), for i=[0,n_local-1] */ -void component_min_kernel(int n_local, - double* yd, - double c); +void component_min_kernel(int n_local, double* yd, double c); /** @brief Set y[i] = min(y[i],x[i]), for i=[0,n_local-1] */ -void component_min_kernel(int n_local, - double* yd, - const double* xd); +void component_min_kernel(int n_local, double* yd, const double* xd); /** @brief Set y[i] = max(y[i],c), for i=[0,n_local-1] */ -void component_max_kernel(int n_local, - double* yd, - double c); +void component_max_kernel(int n_local, double* yd, double c); /** @brief Set y[i] = max(y[i],x[i]), for i=[0,n_local-1] */ -void component_max_kernel(int n_local, - double* yd, - const double* xd); +void component_max_kernel(int n_local, double* yd, const double* xd); /// @brief Performs axpy, this += alpha*x, on the indexes in this specified by i. -void axpy_w_map_kernel(int n_local, - double* yd, - const double* xd, - const int* id, - double alpha); +void axpy_w_map_kernel(int n_local, double* yd, const double* xd, const int* id, double alpha); /** @brief this[i] += alpha*x[i]*z[i] forall i */ -void axzpy_kernel(int n_local, - double* yd, - const double* xd, - const double* zd, - double alpha); +void axzpy_kernel(int n_local, double* yd, const double* xd, const double* zd, double alpha); /** @brief this[i] += alpha*x[i]/z[i] forall i */ -void axdzpy_kernel(int n_local, - double* yd, - const double* xd, - const double* zd, - double alpha); +void axdzpy_kernel(int n_local, double* yd, const double* xd, const double* zd, double alpha); /** @brief this[i] += alpha*x[i]/z[i] forall i with pattern selection */ -void axdzpy_w_pattern_kernel(int n_local, - double* yd, - const double* xd, - const double* zd, - const double* id, - double alpha); +void axdzpy_w_pattern_kernel(int n_local, double* yd, const double* xd, const double* zd, const double* id, double alpha); /** @brief this[i] += c forall i */ void add_constant_kernel(int n_local, double* yd, double c); @@ -128,48 +99,25 @@ void add_constant_w_pattern_kernel(int n_local, double* yd, const double* id, do void invert_kernel(int n_local, double* yd); /** @brief y[i] += alpha*1/x[i] + y[i] forall i with pattern selection */ -void adxpy_w_pattern_kernel(int n_local, - double* yd, - const double* xd, - const double* ld, - double alpha); +void adxpy_w_pattern_kernel(int n_local, double* yd, const double* xd, const double* ld, double alpha); /** @brief y[i] = y[i]/x[i] c forall i with pattern selection */ -void component_div_w_pattern_kernel(int n_local, - double* yd, - const double* xd, - const double* id); +void component_div_w_pattern_kernel(int n_local, double* yd, const double* xd, const double* id); /** @brief Linear damping term */ -void set_linear_damping_term_kernel(int n_local, - double* yd, - const double* vd, - const double* ld, - const double* rd); - -/** -* @brief Performs `this[i] = alpha*this[i] + sign*ct` where sign=1 when EXACTLY one of -* ixleft[i] and ixright[i] is 1.0 and sign=0 otherwise. -*/ -void add_linear_damping_term_kernel(int n_local, - double* yd, - const double* ixl, - const double* ixr, - double alpha, - double ct); +void set_linear_damping_term_kernel(int n_local, double* yd, const double* vd, const double* ld, const double* rd); + +/** + * @brief Performs `this[i] = alpha*this[i] + sign*ct` where sign=1 when EXACTLY one of + * ixleft[i] and ixright[i] is 1.0 and sign=0 otherwise. + */ +void add_linear_damping_term_kernel(int n_local, double* yd, const double* ixl, const double* ixr, double alpha, double ct); /** @brief y[i] = 1.0 if x[i] is positive and id[i] = 1.0, otherwise y[i] = 0 */ -void is_posive_w_pattern_kernel(int n_local, - double* yd, - const double* xd, - const double* id); +void is_posive_w_pattern_kernel(int n_local, double* yd, const double* xd, const double* id); /** @brief y[i] = x[i] if id[i] = 1.0, otherwise y[i] = val_else */ -void set_val_w_pattern_kernel(int n_local, - double* yd, - const double* xd, - const double* id, - double val_else); +void set_val_w_pattern_kernel(int n_local, double* yd, const double* xd, const double* id, double val_else); /** @brief Project solution into bounds */ void project_into_bounds_kernel(int n_local, @@ -183,11 +131,7 @@ void project_into_bounds_kernel(int n_local, double small_real); /** @brief max{a\in(0,1]| x+ad >=(1-tau)x} */ -void fraction_to_the_boundry_kernel(int n_local, - double* yd, - const double* xd, - const double* dd, - double tau); +void fraction_to_the_boundry_kernel(int n_local, double* yd, const double* xd, const double* dd, double tau); /** @brief max{a\in(0,1]| x+ad >=(1-tau)x} with pattern select */ void fraction_to_the_boundry_w_pattern_kernel(int n_local, @@ -204,25 +148,20 @@ void select_pattern_kernel(int n_local, double* yd, const double* id); void component_match_pattern_kernel(int n_local, int* yd, const double* xd, const double* id); /** @brief Adjusts duals. */ -void adjustDuals_plh_kernel(int n_local, - double* yd, - const double* xd, - const double* id, - double mu, - double kappa); +void adjustDuals_plh_kernel(int n_local, double* yd, const double* xd, const double* id, double mu, double kappa); /// @brief set int array 'arr', starting at `start` and ending at `end`, to the values in `arr_src` from 'start_src` -void set_array_from_to_kernel(int n_local, - hiop::hiopInterfaceBase::NonlinearityType* arr, - int start, - int length, +void set_array_from_to_kernel(int n_local, + hiop::hiopInterfaceBase::NonlinearityType* arr, + int start, + int length, const hiop::hiopInterfaceBase::NonlinearityType* arr_src, int start_src); /// @brief set int array 'arr', starting at `start` and ending at `end`, to the values in `arr_src` from 'start_src` -void set_array_from_to_kernel(int n_local, - hiop::hiopInterfaceBase::NonlinearityType* arr, - int start, +void set_array_from_to_kernel(int n_local, + hiop::hiopInterfaceBase::NonlinearityType* arr, + int start, int length, hiop::hiopInterfaceBase::NonlinearityType arr_src); @@ -251,12 +190,7 @@ double log_barr_obj_kernel(int n, double* d1, const double* id); /** @brief compute sum(d1[i]) */ double thrust_sum_kernel(int n, double* d1); /** @brief Linear damping term */ -double linear_damping_term_kernel(int n, - const double* vd, - const double* ld, - const double* rd, - double mu, - double kappa_d); +double linear_damping_term_kernel(int n, const double* vd, const double* ld, const double* rd, double mu, double kappa_d); /** @brief compute min(d1) */ double min_local_kernel(int n, double* d1); /** @brief Checks if selected elements of `d1` are positive */ @@ -268,11 +202,7 @@ bool check_bounds_kernel(int n, const double* xld, const double* xud); /** @brief compute max{a\in(0,1]| x+ad >=(1-tau)x} */ double min_frac_to_bds_kernel(int n, const double* xd, const double* dd, double tau); /** @brief max{a\in(0,1]| x+ad >=(1-tau)x} with pattern id */ -double min_frac_to_bds_w_pattern_kernel(int n, - const double* xd, - const double* dd, - const double* id, - double tau); +double min_frac_to_bds_w_pattern_kernel(int n, const double* xd, const double* dd, const double* id, double tau); /** @brief Checks if `xd` matches nonzero pattern of `id`. */ bool match_pattern_kernel(int n, const double* xd, const double* id); /** @brief Checks if all x[i] = 0 */ @@ -289,16 +219,16 @@ int num_of_elem_less_than_kernel(int n, double* xd, double val); int num_of_elem_absless_than_kernel(int n, double* xd, double val); /// @brief Copy the entries in 'dd' where corresponding 'ix' is nonzero, to vd starting at start_index_in_dest. -void copyToStartingAt_w_pattern_kernel(int n_src, +void copyToStartingAt_w_pattern_kernel(int n_src, int n_dest, int start_index_in_dest, - int* nnz_cumsum, - double *vd, + int* nnz_cumsum, + double* vd, const double* dd); /// for hiopVectorIntCuda /** - * @brief Set the vector entries to be a linear space of starting at i0 containing evenly + * @brief Set the vector entries to be a linear space of starting at i0 containing evenly * incremented integers up to i0+(n-1)di, when n is the length of this vector * */ @@ -306,7 +236,6 @@ void set_to_linspace_kernel(int sz, int* buf, int i0, int di); /** @brief compute cusum from the given pattern*/ void compute_cusum_kernel(int sz, int* buf, const double* id); -} -} +} // namespace cuda +} // namespace hiop #endif - diff --git a/src/LinAlg/VectorHipKernels.cpp b/src/LinAlg/VectorHipKernels.cpp index 413ee183f..908a2c822 100644 --- a/src/LinAlg/VectorHipKernels.cpp +++ b/src/LinAlg/VectorHipKernels.cpp @@ -2,47 +2,47 @@ // Produced at the Lawrence Livermore National Laboratory (LLNL). // LLNL-CODE-742473. All rights reserved. // -// This file is part of HiOp. For details, see https://github.com/LLNL/hiop. HiOp -// is released under the BSD 3-clause license (https://opensource.org/licenses/BSD-3-Clause). +// This file is part of HiOp. For details, see https://github.com/LLNL/hiop. HiOp +// is released under the BSD 3-clause license (https://opensource.org/licenses/BSD-3-Clause). // Please also read "Additional BSD Notice" below. // -// Redistribution and use in source and binary forms, with or without modification, +// Redistribution and use in source and binary forms, with or without modification, // are permitted provided that the following conditions are met: -// i. Redistributions of source code must retain the above copyright notice, this list +// i. Redistributions of source code must retain the above copyright notice, this list // of conditions and the disclaimer below. -// ii. Redistributions in binary form must reproduce the above copyright notice, -// this list of conditions and the disclaimer (as noted below) in the documentation and/or +// ii. Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the disclaimer (as noted below) in the documentation and/or // other materials provided with the distribution. -// iii. Neither the name of the LLNS/LLNL nor the names of its contributors may be used to -// endorse or promote products derived from this software without specific prior written +// iii. Neither the name of the LLNS/LLNL nor the names of its contributors may be used to +// endorse or promote products derived from this software without specific prior written // permission. // -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES -// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT -// SHALL LAWRENCE LIVERMORE NATIONAL SECURITY, LLC, THE U.S. DEPARTMENT OF ENERGY OR -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS -// OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED -// AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT +// SHALL LAWRENCE LIVERMORE NATIONAL SECURITY, LLC, THE U.S. DEPARTMENT OF ENERGY OR +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +// OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED +// AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, // EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Additional BSD Notice -// 1. This notice is required to be provided under our contract with the U.S. Department -// of Energy (DOE). This work was produced at Lawrence Livermore National Laboratory under +// 1. This notice is required to be provided under our contract with the U.S. Department +// of Energy (DOE). This work was produced at Lawrence Livermore National Laboratory under // Contract No. DE-AC52-07NA27344 with the DOE. -// 2. Neither the United States Government nor Lawrence Livermore National Security, LLC -// nor any of their employees, makes any warranty, express or implied, or assumes any -// liability or responsibility for the accuracy, completeness, or usefulness of any +// 2. Neither the United States Government nor Lawrence Livermore National Security, LLC +// nor any of their employees, makes any warranty, express or implied, or assumes any +// liability or responsibility for the accuracy, completeness, or usefulness of any // information, apparatus, product, or process disclosed, or represents that its use would // not infringe privately-owned rights. -// 3. Also, reference herein to any specific commercial products, process, or services by -// trade name, trademark, manufacturer or otherwise does not necessarily constitute or -// imply its endorsement, recommendation, or favoring by the United States Government or -// Lawrence Livermore National Security, LLC. The views and opinions of authors expressed -// herein do not necessarily state or reflect those of the United States Government or -// Lawrence Livermore National Security, LLC, and shall not be used for advertising or +// 3. Also, reference herein to any specific commercial products, process, or services by +// trade name, trademark, manufacturer or otherwise does not necessarily constitute or +// imply its endorsement, recommendation, or favoring by the United States Government or +// Lawrence Livermore National Security, LLC. The views and opinions of authors expressed +// herein do not necessarily state or reflect those of the United States Government or +// Lawrence Livermore National Security, LLC, and shall not be used for advertising or // product endorsement purposes. /** @@ -68,155 +68,113 @@ #include #include - -//#include -//#include +// #include +// #include /// @brief compute abs(b-a) -template -struct thrust_abs_diff: public thrust::binary_function +template +struct thrust_abs_diff : public thrust::binary_function { - __host__ __device__ - T operator()(const T& a, const T& b) - { - return fabs(b - a); - } + __host__ __device__ T operator()(const T& a, const T& b) { return fabs(b - a); } }; /// @brief compute abs(a) -template -struct thrust_abs: public thrust::unary_function +template +struct thrust_abs : public thrust::unary_function { - __host__ __device__ - T operator()(const T& a) - { - return fabs(a); - } + __host__ __device__ T operator()(const T& a) { return fabs(a); } }; /// @brief return true if abs(a) < tol_ struct thrust_abs_less { const double tol_; - thrust_abs_less(double tol) : tol_(tol) {} + thrust_abs_less(double tol) + : tol_(tol) + {} - __host__ __device__ - int operator()(const double& a) - { - return (fabs(a) < tol_); - } + __host__ __device__ int operator()(const double& a) { return (fabs(a) < tol_); } }; /// @brief return true if a < tol_ struct thrust_less { const double tol_; - thrust_less(double tol) : tol_(tol) {} + thrust_less(double tol) + : tol_(tol) + {} - __host__ __device__ - int operator()(const double& a) - { - return (a < tol_); - } + __host__ __device__ int operator()(const double& a) { return (a < tol_); } }; /// @brief return true if (0.0 < a) - (a < 0.0) -template -struct thrust_sig: public thrust::unary_function +template +struct thrust_sig : public thrust::unary_function { - __host__ __device__ - T operator()(const T& a) - { - return static_cast( (0.0 < a) - (a < 0.0) ); - } + __host__ __device__ T operator()(const T& a) { return static_cast((0.0 < a) - (a < 0.0)); } }; /// @brief compute sqrt(a) -template -struct thrust_sqrt: public thrust::unary_function +template +struct thrust_sqrt : public thrust::unary_function { - __host__ __device__ - T operator()(const T& a) - { - return sqrt(a); - } + __host__ __device__ T operator()(const T& a) { return sqrt(a); } }; /// @brief compute log(a) if a > 0, otherwise returns 0 -template -struct thrust_log_select: public thrust::unary_function +template +struct thrust_log_select : public thrust::unary_function { - __host__ __device__ - double operator()(const T& a) + __host__ __device__ double operator()(const T& a) { - if(a>0) { + if(a > 0) { return log(a); } - return 0.; + return 0.; } }; /// @brief compute isinf(a) -template -struct thrust_isinf: public thrust::unary_function +template +struct thrust_isinf : public thrust::unary_function { - __host__ __device__ - bool operator()(const T& a) - { - return isinf(a); - } + __host__ __device__ bool operator()(const T& a) { return isinf(a); } }; /// @brief compute isfinite(a) -template -struct thrust_isfinite: public thrust::unary_function +template +struct thrust_isfinite : public thrust::unary_function { - __host__ __device__ - bool operator()(const T& a) - { - return isfinite(a); - } + __host__ __device__ bool operator()(const T& a) { return isfinite(a); } }; /// @brief compute a==0.0 -template -struct thrust_iszero: public thrust::unary_function +template +struct thrust_iszero : public thrust::unary_function { - __host__ __device__ - bool operator()(const T& a) - { - return a== (T) (0.0); - } + __host__ __device__ bool operator()(const T& a) { return a == (T)(0.0); } }; /// @brief compute isnan(a) -template -struct thrust_isnan: public thrust::unary_function +template +struct thrust_isnan : public thrust::unary_function { - __host__ __device__ - bool operator()(const T& a) - { - return isnan(a); - } + __host__ __device__ bool operator()(const T& a) { return isnan(a); } }; /// @brief compute (bool) (a) struct thrust_istrue : public thrust::unary_function { - __host__ __device__ - bool operator()(const int& a) - { - return a!=0; - } + __host__ __device__ bool operator()(const int& a) { return a != 0; } }; /** @brief Set y[i] = min(y[i],c), for i=[0,n_local-1] */ __global__ void component_min_hip(int n, double* y, const double c) { const int num_threads = blockDim.x * gridDim.x; - const int tid = blockIdx.x * blockDim.x + threadIdx.x; - for (int i = tid; i < n; i += num_threads) { - y[i] = (y[i]c) ? y[i] : c; + const int tid = blockIdx.x * blockDim.x + threadIdx.x; + for(int i = tid; i < n; i += num_threads) { + y[i] = (y[i] > c) ? y[i] : c; } } @@ -244,19 +202,19 @@ __global__ void component_max_hip(int n, double* y, const double c) __global__ void component_max_hip(int n, double* y, const double* x) { const int num_threads = blockDim.x * gridDim.x; - const int tid = blockIdx.x * blockDim.x + threadIdx.x; - for (int i = tid; i < n; i += num_threads) { - y[i] = (y[i]>x[i]) ? y[i] : x[i]; + const int tid = blockIdx.x * blockDim.x + threadIdx.x; + for(int i = tid; i < n; i += num_threads) { + y[i] = (y[i] > x[i]) ? y[i] : x[i]; } } -/// @brief Copy from src the elements specified by the indices in id. +/// @brief Copy from src the elements specified by the indices in id. __global__ void copy_from_index_hip(int n, double* vec, const double* val, const int* id) { const int num_threads = blockDim.x * gridDim.x; - const int tid = blockIdx.x * blockDim.x + threadIdx.x; - for (int i = tid; i < n; i += num_threads) { - vec[i] = val[id[i]]; + const int tid = blockIdx.x * blockDim.x + threadIdx.x; + for(int i = tid; i < n; i += num_threads) { + vec[i] = val[id[i]]; } } @@ -264,9 +222,9 @@ __global__ void copy_from_index_hip(int n, double* vec, const double* val, const __global__ void axpy_w_map_hip(int n, double* yd, const double* xd, const int* id, double alpha) { const int num_threads = blockDim.x * gridDim.x; - const int tid = blockIdx.x * blockDim.x + threadIdx.x; - for (int i = tid; i < n; i += num_threads) { - assert(id[i] 0.0) ? 1 : 0; } } @@ -401,8 +364,8 @@ __global__ void is_posive_w_pattern_hip(int n, double* data, const double* vd, c __global__ void set_val_w_pattern_hip(int n, double* data, const double* vd, const double* id, double val_else) { const int num_threads = blockDim.x * gridDim.x; - const int tid = blockIdx.x * blockDim.x + threadIdx.x; - for (int i = tid; i < n; i += num_threads) { + const int tid = blockIdx.x * blockDim.x + threadIdx.x; + for(int i = tid; i < n; i += num_threads) { data[i] = (id[i] == 1.0) ? vd[i] : val_else; } } @@ -411,22 +374,22 @@ __global__ void set_val_w_pattern_hip(int n, double* data, const double* vd, con __global__ void select_pattern_hip(int n, double* data, const double* id) { const int num_threads = blockDim.x * gridDim.x; - const int tid = blockIdx.x * blockDim.x + threadIdx.x; - for (int i = tid; i < n; i += num_threads) { + const int tid = blockIdx.x * blockDim.x + threadIdx.x; + for(int i = tid; i < n; i += num_threads) { if(id[i] == 0.0) { data[i] = 0.0; - } + } } } __global__ void match_pattern_hip(int n, double* data, const double* id) { const int num_threads = blockDim.x * gridDim.x; - const int tid = blockIdx.x * blockDim.x + threadIdx.x; - for (int i = tid; i < n; i += num_threads) { + const int tid = blockIdx.x * blockDim.x + threadIdx.x; + for(int i = tid; i < n; i += num_threads) { if(id[i] == 0.0) { data[i] = 0.0; - } + } } } @@ -444,11 +407,11 @@ __global__ void project_into_bounds_hip(int n, const int num_threads = blockDim.x * gridDim.x; const int tid = blockIdx.x * blockDim.x + threadIdx.x; - for (int i = tid; i < n; i += num_threads) { - double aux = 0.0; + for(int i = tid; i < n; i += num_threads) { + double aux = 0.0; double aux2 = 0.0; if(ild[i] != 0.0 && iud[i] != 0.0) { - aux = kappa2*(xud[i] - xld[i]) - small_real; + aux = kappa2 * (xud[i] - xld[i]) - small_real; aux2 = xld[i] + fmin(kappa1 * fmax(1.0, fabs(xld[i])), aux); if(xd[i] < aux2) { xd[i] = aux2; @@ -463,11 +426,11 @@ __global__ void project_into_bounds_hip(int n, #endif } else { if(ild[i] != 0.0) { - xd[i] = fmax(xd[i], xld[i] + kappa1*fmax(1.0, fabs(xld[i])) - small_real); + xd[i] = fmax(xd[i], xld[i] + kappa1 * fmax(1.0, fabs(xld[i])) - small_real); } if(iud[i] != 0.0) { - xd[i] = fmin(xd[i], xud[i] - kappa1*fmax(1.0, fabs(xud[i])) - small_real); - } else { + xd[i] = fmin(xd[i], xud[i] - kappa1 * fmax(1.0, fabs(xud[i])) - small_real); + } else { /*nothing for free vars */ } } @@ -478,31 +441,31 @@ __global__ void project_into_bounds_hip(int n, __global__ void fraction_to_the_boundry_hip(int n, double* yd, const double* xd, const double* dd, double tau) { const int num_threads = blockDim.x * gridDim.x; - const int tid = blockIdx.x * blockDim.x + threadIdx.x; - for (int i = tid; i < n; i += num_threads) { - if(dd[i]>=0) { + const int tid = blockIdx.x * blockDim.x + threadIdx.x; + for(int i = tid; i < n; i += num_threads) { + if(dd[i] >= 0) { yd[i] = 1.0; } else { - yd[i] = -tau*xd[i]/dd[i]; + yd[i] = -tau * xd[i] / dd[i]; } } } /** @brief max{a\in(0,1]| x+ad >=(1-tau)x} with pattern select */ __global__ void fraction_to_the_boundry_w_pattern_hip(int n, - double* yd, - const double* xd, - const double* dd, - const double* id, - double tau) + double* yd, + const double* xd, + const double* dd, + const double* id, + double tau) { const int num_threads = blockDim.x * gridDim.x; - const int tid = blockIdx.x * blockDim.x + threadIdx.x; - for (int i = tid; i < n; i += num_threads) { - if(dd[i]>=0 || id[i]==0) { + const int tid = blockIdx.x * blockDim.x + threadIdx.x; + for(int i = tid; i < n; i += num_threads) { + if(dd[i] >= 0 || id[i] == 0) { yd[i] = 1.0; } else { - yd[i] = -tau*xd[i]/dd[i]; + yd[i] = -tau * xd[i] / dd[i]; } } } @@ -511,9 +474,9 @@ __global__ void fraction_to_the_boundry_w_pattern_hip(int n, __global__ void set_match_pattern_hip(int n, int* yd, const double* xd, const double* id) { const int num_threads = blockDim.x * gridDim.x; - const int tid = blockIdx.x * blockDim.x + threadIdx.x; - for (int i = tid; i < n; i += num_threads) { - if(id[i]==0.0 && xd[i]!=0.0) { + const int tid = blockIdx.x * blockDim.x + threadIdx.x; + for(int i = tid; i < n; i += num_threads) { + if(id[i] == 0.0 && xd[i] != 0.0) { yd[i] = 0; } else { yd[i] = 1; @@ -525,31 +488,31 @@ __global__ void set_match_pattern_hip(int n, int* yd, const double* xd, const do __global__ void adjust_duals_hip(int n, double* zd, const double* xd, const double* id, double mu, double kappa) { const int num_threads = blockDim.x * gridDim.x; - const int tid = blockIdx.x * blockDim.x + threadIdx.x; - double a,b; - for (int i = tid; i < n; i += num_threads) { + const int tid = blockIdx.x * blockDim.x + threadIdx.x; + double a, b; + for(int i = tid; i < n; i += num_threads) { // preemptive loop to reduce number of iterations? if(id[i] == 1.) { // precompute a and b in another loop? - a = mu/xd[i]; - b = a/kappa; - a = a*kappa; + a = mu / xd[i]; + b = a / kappa; + a = a * kappa; // Necessary conditionals - if(zd[i]=b - if(a<=b) { + // zd[i]>=b + if(a <= b) { zd[i] = b; } else { - //a>b - if(ab + if(a < zd[i]) { zd[i] = a; } } } - // - - - - - //else a>=z[i] then *z=*z (z[i] does not need adjustment) + // - - - - + // else a>=z[i] then *z=*z (z[i] does not need adjustment) } } } @@ -563,9 +526,9 @@ __global__ void set_nonlinear_type_hip(const int n, const int start_src) { const int num_threads = blockDim.x * gridDim.x; - const int tid = blockIdx.x * blockDim.x + threadIdx.x; - for (int i = tid; i < n && i < length; i += num_threads) { - arr[start+i] = arr_src[start_src+i]; + const int tid = blockIdx.x * blockDim.x + threadIdx.x; + for(int i = tid; i < n && i < length; i += num_threads) { + arr[start + i] = arr_src[start_src + i]; } } @@ -577,24 +540,23 @@ __global__ void set_nonlinear_type_hip(const int n, const hiop::hiopInterfaceBase::NonlinearityType arr_src) { const int num_threads = blockDim.x * gridDim.x; - const int tid = blockIdx.x * blockDim.x + threadIdx.x; - for (int i = tid; i < n && i < length; i += num_threads) { - arr[start+i] = arr_src; + const int tid = blockIdx.x * blockDim.x + threadIdx.x; + for(int i = tid; i < n && i < length; i += num_threads) { + arr[start + i] = arr_src; } } /// for hiopVectorIntHip /** - * @brief Set the vector entries to be a linear space of starting at i0 containing evenly + * @brief Set the vector entries to be a linear space of starting at i0 containing evenly * incremented integers up to i0+(n-1)di, when n is the length of this vector */ -__global__ void set_to_linspace_hip(int n, int *vec, int i0, int di) +__global__ void set_to_linspace_hip(int n, int* vec, int i0, int di) { - const int num_threads = blockDim.x * gridDim.x; - const int tid = blockIdx.x * blockDim.x + threadIdx.x; - for (int i = tid; i < n; i += num_threads) { - vec[i] = i0 + i*di; + const int tid = blockIdx.x * blockDim.x + threadIdx.x; + for(int i = tid; i < n; i += num_threads) { + vec[i] = i0 + i * di; } } @@ -602,35 +564,35 @@ __global__ void set_to_linspace_hip(int n, int *vec, int i0, int di) __global__ void compute_cusum_hip(int n, int* vec, const double* id) { const int num_threads = blockDim.x * gridDim.x; - const int tid = blockIdx.x * blockDim.x + threadIdx.x; - for (int i = tid; i < n; i += num_threads) { - if(i==0) { + const int tid = blockIdx.x * blockDim.x + threadIdx.x; + for(int i = tid; i < n; i += num_threads) { + if(i == 0) { vec[i] = 0; } else { // from i=1..n - if(id[i-1]!=0.0){ + if(id[i - 1] != 0.0) { vec[i] = 1; } else { - vec[i] = 0; + vec[i] = 0; } } } } /// @brief Copy the entries in 'dd' where corresponding 'ix' is nonzero, to vd starting at start_index_in_dest. -__global__ void copyToStartingAt_w_pattern_hip(int n_src, - int n_dest, - int start_index_in_dest, - int* nnz_cumsum, - double *vd, - const double* dd) +__global__ void copyToStartingAt_w_pattern_hip(int n_src, + int n_dest, + int start_index_in_dest, + int* nnz_cumsum, + double* vd, + const double* dd) { const int num_threads = blockDim.x * gridDim.x; - const int tid = blockIdx.x * blockDim.x + threadIdx.x; - for (int i = tid+1; i < n_src+1; i += num_threads) { - if(nnz_cumsum[i] != nnz_cumsum[i-1]){ - int idx_dest = nnz_cumsum[i-1] + start_index_in_dest; - vd[idx_dest] = dd[i-1]; + const int tid = blockIdx.x * blockDim.x + threadIdx.x; + for(int i = tid + 1; i < n_src + 1; i += num_threads) { + if(nnz_cumsum[i] != nnz_cumsum[i - 1]) { + int idx_dest = nnz_cumsum[i - 1] + start_index_in_dest; + vd[idx_dest] = dd[i - 1]; } } } @@ -640,188 +602,137 @@ namespace hiop namespace hip { -constexpr int block_size=256; +constexpr int block_size = 256; -/// @brief Copy from src the elements specified by the indices in id. -void copy_from_index_kernel(int n_local, - double* yd, - const double* src, - const int* id) +/// @brief Copy from src the elements specified by the indices in id. +void copy_from_index_kernel(int n_local, double* yd, const double* src, const int* id) { - int num_blocks = (n_local+block_size-1)/block_size; - copy_from_index_hip<<>>(n_local, yd, src, id); + int num_blocks = (n_local + block_size - 1) / block_size; + copy_from_index_hip<<>>(n_local, yd, src, id); } /** @brief Set y[i] = min(y[i],c), for i=[0,n_local-1] */ -void component_min_kernel(int n_local, - double* yd, - double c) +void component_min_kernel(int n_local, double* yd, double c) { - int num_blocks = (n_local+block_size-1)/block_size; - component_min_hip<<>>(n_local, yd, c); + int num_blocks = (n_local + block_size - 1) / block_size; + component_min_hip<<>>(n_local, yd, c); } /** @brief Set y[i] = min(y[i],x[i], for i=[0,n_local-1] */ -void component_min_kernel(int n_local, - double* yd, - const double* xd) +void component_min_kernel(int n_local, double* yd, const double* xd) { - int num_blocks = (n_local+block_size-1)/block_size; - component_min_hip<<>>(n_local, yd, xd); + int num_blocks = (n_local + block_size - 1) / block_size; + component_min_hip<<>>(n_local, yd, xd); } /** @brief Set y[i] = max(y[i],c), for i=[0,n_local-1] */ -void component_max_kernel(int n_local, - double* yd, - double c) +void component_max_kernel(int n_local, double* yd, double c) { - int num_blocks = (n_local+block_size-1)/block_size; - component_max_hip<<>>(n_local, yd, c); + int num_blocks = (n_local + block_size - 1) / block_size; + component_max_hip<<>>(n_local, yd, c); } /** @brief Set y[i] = max(y[i],x[i]), for i=[0,n_local-1] */ -void component_max_kernel(int n_local, - double* yd, - const double* xd) +void component_max_kernel(int n_local, double* yd, const double* xd) { - int num_blocks = (n_local+block_size-1)/block_size; - component_max_hip<<>>(n_local, yd, xd); + int num_blocks = (n_local + block_size - 1) / block_size; + component_max_hip<<>>(n_local, yd, xd); } /// @brief Performs axpy, y += alpha*x, on the indexes in this specified by id. -void axpy_w_map_kernel(int n_local, - double* yd, - const double* xd, - const int* id, - double alpha) +void axpy_w_map_kernel(int n_local, double* yd, const double* xd, const int* id, double alpha) { - int num_blocks = (n_local+block_size-1)/block_size; - axpy_w_map_hip<<>>(n_local, yd, xd, id, alpha); + int num_blocks = (n_local + block_size - 1) / block_size; + axpy_w_map_hip<<>>(n_local, yd, xd, id, alpha); } /** @brief y[i] += alpha*x[i]*z[i] forall i */ -void axzpy_kernel(int n_local, - double* yd, - const double* xd, - const double* zd, - double alpha) +void axzpy_kernel(int n_local, double* yd, const double* xd, const double* zd, double alpha) { - int num_blocks = (n_local+block_size-1)/block_size; - axzpy_hip<<>>(n_local, yd, xd, zd, alpha); + int num_blocks = (n_local + block_size - 1) / block_size; + axzpy_hip<<>>(n_local, yd, xd, zd, alpha); } /** @brief y[i] += alpha*x[i]/z[i] forall i */ -void axdzpy_kernel(int n_local, - double* yd, - const double* xd, - const double* zd, - double alpha) +void axdzpy_kernel(int n_local, double* yd, const double* xd, const double* zd, double alpha) { - int num_blocks = (n_local+block_size-1)/block_size; - axdzpy_hip<<>>(n_local, yd, xd, zd, alpha); + int num_blocks = (n_local + block_size - 1) / block_size; + axdzpy_hip<<>>(n_local, yd, xd, zd, alpha); } /** @brief y[i] += alpha*x[i]/z[i] forall i with pattern selection */ -void axdzpy_w_pattern_kernel(int n_local, - double* yd, - const double* xd, - const double* zd, - const double* id, - double alpha) +void axdzpy_w_pattern_kernel(int n_local, double* yd, const double* xd, const double* zd, const double* id, double alpha) { - int num_blocks = (n_local+block_size-1)/block_size; - axdzpy_w_pattern_hip<<>>(n_local, yd, xd, zd, id, alpha); + int num_blocks = (n_local + block_size - 1) / block_size; + axdzpy_w_pattern_hip<<>>(n_local, yd, xd, zd, id, alpha); } /** @brief y[i] += c forall i */ void add_constant_kernel(int n_local, double* yd, double c) { - int num_blocks = (n_local+block_size-1)/block_size; - add_constant_hip<<>>(n_local, yd, c); + int num_blocks = (n_local + block_size - 1) / block_size; + add_constant_hip<<>>(n_local, yd, c); } /** @brief y[i] += c forall i with pattern selection */ -void add_constant_w_pattern_kernel(int n_local, double* yd, const double* id, double c) +void add_constant_w_pattern_kernel(int n_local, double* yd, const double* id, double c) { - int num_blocks = (n_local+block_size-1)/block_size; - add_constant_w_pattern_hip<<>>(n_local, yd, c, id); + int num_blocks = (n_local + block_size - 1) / block_size; + add_constant_w_pattern_hip<<>>(n_local, yd, c, id); } /// @brief Invert (1/x) the elements of this void invert_kernel(int n_local, double* yd) { - int num_blocks = (n_local+block_size-1)/block_size; - invert_hip<<>>(n_local, yd); + int num_blocks = (n_local + block_size - 1) / block_size; + invert_hip<<>>(n_local, yd); } /** @brief y[i] += alpha*1/x[i] + y[i] forall i with pattern selection */ -void adxpy_w_pattern_kernel(int n_local, - double* yd, - const double* xd, - const double* id, - double alpha) +void adxpy_w_pattern_kernel(int n_local, double* yd, const double* xd, const double* id, double alpha) { - int num_blocks = (n_local+block_size-1)/block_size; - adxpy_w_pattern_hip<<>>(n_local, yd, xd, id, alpha); + int num_blocks = (n_local + block_size - 1) / block_size; + adxpy_w_pattern_hip<<>>(n_local, yd, xd, id, alpha); } /** @brief elements of this that corespond to nonzeros in ix are divided by elements of v. The rest of elements of this are set to zero.*/ -void component_div_w_pattern_kernel(int n_local, - double* yd, - const double* xd, - const double* id) +void component_div_w_pattern_kernel(int n_local, double* yd, const double* xd, const double* id) { - int num_blocks = (n_local+block_size-1)/block_size; - component_div_w_pattern_hip<<>>(n_local, yd, xd, id); + int num_blocks = (n_local + block_size - 1) / block_size; + component_div_w_pattern_hip<<>>(n_local, yd, xd, id); } /** @brief Linear damping term */ -void set_linear_damping_term_kernel(int n_local, - double* yd, - const double* vd, - const double* ld, - const double* rd) +void set_linear_damping_term_kernel(int n_local, double* yd, const double* vd, const double* ld, const double* rd) { // compute linear damping term - int num_blocks = (n_local+block_size-1)/block_size; - set_linear_damping_term_hip<<>>(n_local, yd, vd, ld, rd); + int num_blocks = (n_local + block_size - 1) / block_size; + set_linear_damping_term_hip<<>>(n_local, yd, vd, ld, rd); } -/** -* @brief Performs `this[i] = alpha*this[i] + sign*ct` where sign=1 when EXACTLY one of -* ixleft[i] and ixright[i] is 1.0 and sign=0 otherwise. -*/ -void add_linear_damping_term_kernel(int n_local, - double* yd, - const double* ixl, - const double* ixr, - double alpha, - double ct) +/** + * @brief Performs `this[i] = alpha*this[i] + sign*ct` where sign=1 when EXACTLY one of + * ixleft[i] and ixright[i] is 1.0 and sign=0 otherwise. + */ +void add_linear_damping_term_kernel(int n_local, double* yd, const double* ixl, const double* ixr, double alpha, double ct) { - int num_blocks = (n_local+block_size-1)/block_size; - add_linear_damping_term_hip<<>>(n_local, yd, ixl, ixr, alpha, ct); + int num_blocks = (n_local + block_size - 1) / block_size; + add_linear_damping_term_hip<<>>(n_local, yd, ixl, ixr, alpha, ct); } /** @brief Checks if selected elements of `this` are positive */ -void is_posive_w_pattern_kernel(int n_local, - double* yd, - const double* xd, - const double* id) +void is_posive_w_pattern_kernel(int n_local, double* yd, const double* xd, const double* id) { - int num_blocks = (n_local+block_size-1)/block_size; - is_posive_w_pattern_hip<<>>(n_local, yd, xd, id); + int num_blocks = (n_local + block_size - 1) / block_size; + is_posive_w_pattern_hip<<>>(n_local, yd, xd, id); } /// set value with pattern -void set_val_w_pattern_kernel(int n_local, - double* yd, - const double* xd, - const double* id, - double max_val) +void set_val_w_pattern_kernel(int n_local, double* yd, const double* xd, const double* id, double max_val) { - int num_blocks = (n_local+block_size-1)/block_size; - set_val_w_pattern_hip<<>>(n_local, yd, xd, id, max_val); + int num_blocks = (n_local + block_size - 1) / block_size; + set_val_w_pattern_hip<<>>(n_local, yd, xd, id, max_val); } /** @brief Project solution into bounds */ @@ -835,19 +746,15 @@ void project_into_bounds_kernel(int n_local, double kappa2, double small_real) { - int num_blocks = (n_local+block_size-1)/block_size; - project_into_bounds_hip<<>>(n_local, xd, xld, ild, xud, iud, kappa1, kappa2, small_real); + int num_blocks = (n_local + block_size - 1) / block_size; + project_into_bounds_hip<<>>(n_local, xd, xld, ild, xud, iud, kappa1, kappa2, small_real); } /** @brief max{a\in(0,1]| x+ad >=(1-tau)x} */ -void fraction_to_the_boundry_kernel(int n_local, - double* yd, - const double* xd, - const double* dd, - double tau) +void fraction_to_the_boundry_kernel(int n_local, double* yd, const double* xd, const double* dd, double tau) { - int num_blocks = (n_local+block_size-1)/block_size; - fraction_to_the_boundry_hip<<>>(n_local, yd, xd, dd, tau); + int num_blocks = (n_local + block_size - 1) / block_size; + fraction_to_the_boundry_hip<<>>(n_local, yd, xd, dd, tau); } /** @brief max{a\in(0,1]| x+ad >=(1-tau)x} with pattern select */ @@ -858,64 +765,59 @@ void fraction_to_the_boundry_w_pattern_kernel(int n_local, const double* id, double tau) { - int num_blocks = (n_local+block_size-1)/block_size; - fraction_to_the_boundry_w_pattern_hip<<>>(n_local, yd, xd, dd, id, tau); + int num_blocks = (n_local + block_size - 1) / block_size; + fraction_to_the_boundry_w_pattern_hip<<>>(n_local, yd, xd, dd, id, tau); } /** @brief Set elements of `this` to zero based on `select`.*/ void select_pattern_kernel(int n_local, double* yd, const double* id) { - int num_blocks = (n_local+block_size-1)/block_size; - select_pattern_hip<<>>(n_local, yd, id); + int num_blocks = (n_local + block_size - 1) / block_size; + select_pattern_hip<<>>(n_local, yd, id); } /** @brief y[i] = 0 if id[i]==0.0 && xd[i]!=0.0, otherwise y[i] = 1*/ void component_match_pattern_kernel(int n_local, int* yd, const double* xd, const double* id) { - int num_blocks = (n_local+block_size-1)/block_size; - set_match_pattern_hip<<>>(n_local, yd, xd, id); + int num_blocks = (n_local + block_size - 1) / block_size; + set_match_pattern_hip<<>>(n_local, yd, xd, id); } /** @brief Adjusts duals. */ -void adjustDuals_plh_kernel(int n_local, - double* yd, - const double* xd, - const double* id, - double mu, - double kappa) +void adjustDuals_plh_kernel(int n_local, double* yd, const double* xd, const double* id, double mu, double kappa) { - int num_blocks = (n_local+block_size-1)/block_size; - adjust_duals_hip<<>>(n_local, yd, xd, id, mu, kappa); + int num_blocks = (n_local + block_size - 1) / block_size; + adjust_duals_hip<<>>(n_local, yd, xd, id, mu, kappa); } /// @brief set int array 'arr', starting at `start` and ending at `end`, to the values in `arr_src` from 'start_src` void set_array_from_to_kernel(int n_local, - hiop::hiopInterfaceBase::NonlinearityType* arr, - int start, - int length, + hiop::hiopInterfaceBase::NonlinearityType* arr, + int start, + int length, const hiop::hiopInterfaceBase::NonlinearityType* arr_src, - int start_src) + int start_src) { - int num_blocks = (n_local+block_size-1)/block_size; - set_nonlinear_type_hip<<>> (n_local, length, arr, start, arr_src, start_src); + int num_blocks = (n_local + block_size - 1) / block_size; + set_nonlinear_type_hip<<>>(n_local, length, arr, start, arr_src, start_src); } /// @brief set int array 'arr', starting at `start` and ending at `end`, to the values in `arr_src` from 'start_src` void set_array_from_to_kernel(int n_local, - hiop::hiopInterfaceBase::NonlinearityType* arr, - int start, + hiop::hiopInterfaceBase::NonlinearityType* arr, + int start, int length, hiop::hiopInterfaceBase::NonlinearityType arr_src) { - int num_blocks = (n_local+block_size-1)/block_size; - set_nonlinear_type_hip<<>> (n_local, length, arr, start, arr_src); + int num_blocks = (n_local + block_size - 1) / block_size; + set_nonlinear_type_hip<<>>(n_local, length, arr, start, arr_src); } /// @brief Set all elements to c. void thrust_fill_kernel(int n, double* ptr, double c) { - thrust::device_ptr dev_ptr = thrust::device_pointer_cast(ptr); - thrust::fill(thrust::device, dev_ptr, dev_ptr+n, c); + thrust::device_ptr dev_ptr = thrust::device_pointer_cast(ptr); + thrust::fill(thrust::device, dev_ptr, dev_ptr + n, c); } /** @brief inf norm on single rank */ @@ -926,7 +828,7 @@ double infnorm_local_kernel(int n, double* data_dev) thrust::device_ptr dev_ptr = thrust::device_pointer_cast(data_dev); // compute one norm - double norm = thrust::transform_reduce(thrust::device, data_dev, data_dev+n, abs_op, 0.0, max_op); + double norm = thrust::transform_reduce(thrust::device, data_dev, data_dev + n, abs_op, 0.0, max_op); return norm; } @@ -937,10 +839,10 @@ double onenorm_local_kernel(int n, double* data_dev) thrust_abs abs_op; thrust::plus plus_op; thrust::device_ptr dev_ptr = thrust::device_pointer_cast(data_dev); - //thrust::device_ptr dev_ptr(data_dev); + // thrust::device_ptr dev_ptr(data_dev); // compute one norm - double norm = thrust::transform_reduce(thrust::device, data_dev, data_dev+n, abs_op, 0.0, plus_op); + double norm = thrust::transform_reduce(thrust::device, data_dev, data_dev + n, abs_op, 0.0, plus_op); return norm; } @@ -948,69 +850,63 @@ double onenorm_local_kernel(int n, double* data_dev) /** @brief d1[i] = d1[i] * d2[i] forall i */ void thrust_component_mult_kernel(int n, double* d1, const double* d2) { - // wrap raw pointer with a device_ptr + // wrap raw pointer with a device_ptr thrust::multiplies mult_op; thrust::device_ptr dev_v1 = thrust::device_pointer_cast(d1); thrust::device_ptr dev_v2 = thrust::device_pointer_cast(d2); - - thrust::transform(thrust::device, - dev_v1, dev_v1+n, - dev_v2, dev_v1, - mult_op); + + thrust::transform(thrust::device, dev_v1, dev_v1 + n, dev_v2, dev_v1, mult_op); } /** @brief d1[i] = d1[i] / d2[i] forall i */ void thrust_component_div_kernel(int n, double* d1, const double* d2) { - // wrap raw pointer with a device_ptr + // wrap raw pointer with a device_ptr thrust::divides div_op; thrust::device_ptr dev_v1 = thrust::device_pointer_cast(d1); thrust::device_ptr dev_v2 = thrust::device_pointer_cast(d2); - - thrust::transform(thrust::device, - dev_v1, dev_v1+n, - dev_v2, dev_v1, - div_op); + + thrust::transform(thrust::device, dev_v1, dev_v1 + n, dev_v2, dev_v1, div_op); } /** @brief d1[i] = abs(d1[i]) forall i */ void thrust_component_abs_kernel(int n, double* d1) { - // wrap raw pointer with a device_ptr + // wrap raw pointer with a device_ptr thrust_abs abs_op; thrust::device_ptr dev_v1 = thrust::device_pointer_cast(d1); - + // compute abs - thrust::transform(thrust::device, dev_v1, dev_v1+n, dev_v1, abs_op); + thrust::transform(thrust::device, dev_v1, dev_v1 + n, dev_v1, abs_op); } /** @brief d1[i] = sign(d1[i]) forall i */ void thrust_component_sgn_kernel(int n, double* d1) { - // wrap raw pointer with a device_ptr + // wrap raw pointer with a device_ptr thrust_sig sig_op; thrust::device_ptr dev_v1 = thrust::device_pointer_cast(d1); - + // compute sign - thrust::transform(thrust::device, dev_v1, dev_v1+n, dev_v1, sig_op); + thrust::transform(thrust::device, dev_v1, dev_v1 + n, dev_v1, sig_op); } /** @brief d1[i] = sqrt(d1[i]) forall i */ void thrust_component_sqrt_kernel(int n, double* d1) { - // wrap raw pointer with a device_ptr + // wrap raw pointer with a device_ptr thrust_sqrt sqrt_op; thrust::device_ptr dev_v1 = thrust::device_pointer_cast(d1); - + // compute sqrt - thrust::transform(thrust::device, dev_v1, dev_v1+n, dev_v1, sqrt_op); + thrust::transform(thrust::device, dev_v1, dev_v1 + n, dev_v1, sqrt_op); } /** @brief d1[i] = -(d1[i]) forall i */ void thrust_negate_kernel(int n, double* d1) { thrust::device_ptr dev_v1 = thrust::device_pointer_cast(d1); - thrust::transform(thrust::device, dev_v1, dev_v1+n, dev_v1, thrust::negate()); + thrust::transform(thrust::device, dev_v1, dev_v1 + n, dev_v1, thrust::negate()); } /** @brief compute sum(log(d1[i])) forall i where id[i]=1*/ @@ -1019,18 +915,18 @@ double log_barr_obj_kernel(int n, double* d1, const double* id) thrust::device_ptr dev_v = thrust::device_pointer_cast(d1); thrust::device_ptr id_v = thrust::device_pointer_cast(id); - // wrap raw pointer with a device_ptr + // wrap raw pointer with a device_ptr thrust_log_select log_select_op; thrust::plus plus_op; thrust::multiplies mult_op; - + // TODO: how to avoid this temp vec? - thrust::device_ptr v_temp = thrust::device_malloc(n*sizeof(double)); + thrust::device_ptr v_temp = thrust::device_malloc(n * sizeof(double)); // compute x*id - thrust::transform(thrust::device, dev_v, dev_v+n, id_v, v_temp, mult_op); + thrust::transform(thrust::device, dev_v, dev_v + n, id_v, v_temp, mult_op); // compute log(y) for y > 0 - double sum = thrust::transform_reduce(thrust::device, v_temp, v_temp+n, log_select_op, 0.0, plus_op); + double sum = thrust::transform_reduce(thrust::device, v_temp, v_temp + n, log_select_op, 0.0, plus_op); thrust::device_free(v_temp); @@ -1042,16 +938,11 @@ double thrust_sum_kernel(int n, double* d1) { thrust::device_ptr dev_v1 = thrust::device_pointer_cast(d1); // compute sum - return thrust::reduce(thrust::device, dev_v1, dev_v1+n, 0.0, thrust::plus()); + return thrust::reduce(thrust::device, dev_v1, dev_v1 + n, 0.0, thrust::plus()); } /** @brief Linear damping term */ -double linear_damping_term_kernel(int n, - const double* vd, - const double* ld, - const double* rd, - double mu, - double kappa_d) +double linear_damping_term_kernel(int n, const double* vd, const double* ld, const double* rd, double mu, double kappa_d) { // TODO: how to avoid this temp vec? thrust::device_vector v_temp(n); @@ -1071,15 +962,15 @@ double linear_damping_term_kernel(int n, double min_local_kernel(int n, double* d1) { thrust::device_ptr dev_v1 = thrust::device_pointer_cast(d1); - thrust::device_ptr ret_dev_ptr = thrust::min_element(thrust::device, dev_v1, dev_v1+n); - - double *ret_ptr = thrust::raw_pointer_cast(ret_dev_ptr); - double *ret_host = new double[1]; - hipError_t cuerr = hipMemcpy(ret_host, ret_ptr, (1)*sizeof(double), hipMemcpyDeviceToHost); - + thrust::device_ptr ret_dev_ptr = thrust::min_element(thrust::device, dev_v1, dev_v1 + n); + + double* ret_ptr = thrust::raw_pointer_cast(ret_dev_ptr); + double* ret_host = new double[1]; + hipError_t cuerr = hipMemcpy(ret_host, ret_ptr, (1) * sizeof(double), hipMemcpyDeviceToHost); + double rv = ret_host[0]; - delete [] ret_host; - + delete[] ret_host; + return rv; } @@ -1091,7 +982,7 @@ int all_positive_w_pattern_kernel(int n, const double* d1, const double* id) double* dv_ptr = thrust::raw_pointer_cast(v_temp.data()); hiop::hip::is_posive_w_pattern_kernel(n, dv_ptr, d1, id); - + return thrust::reduce(thrust::device, v_temp.begin(), v_temp.end(), (int)0, thrust::plus()); } @@ -1099,24 +990,24 @@ int all_positive_w_pattern_kernel(int n, const double* d1, const double* id) double min_w_pattern_kernel(int n, const double* d1, const double* id, double max_val) { // TODO: how to avoid this temp vec? - thrust::device_ptr dv_ptr = thrust::device_malloc(n*sizeof(double)); + thrust::device_ptr dv_ptr = thrust::device_malloc(n * sizeof(double)); double* d_ptr = thrust::raw_pointer_cast(dv_ptr); // set value with pattern hiop::hip::set_val_w_pattern_kernel(n, d_ptr, d1, id, max_val); - thrust::device_ptr ret_dev_ptr = thrust::min_element(thrust::device, dv_ptr, dv_ptr+n); + thrust::device_ptr ret_dev_ptr = thrust::min_element(thrust::device, dv_ptr, dv_ptr + n); // TODO: how to return double from device to host? - double *ret_host = new double[1]; - double *ret_ptr = thrust::raw_pointer_cast(ret_dev_ptr); - hipError_t cuerr = hipMemcpy(ret_host, ret_ptr, (1)*sizeof(double), hipMemcpyDeviceToHost); + double* ret_host = new double[1]; + double* ret_ptr = thrust::raw_pointer_cast(ret_dev_ptr); + hipError_t cuerr = hipMemcpy(ret_host, ret_ptr, (1) * sizeof(double), hipMemcpyDeviceToHost); double ret_v = ret_host[0]; - delete [] ret_host; + delete[] ret_host; thrust::device_free(dv_ptr); - + return ret_v; } @@ -1129,22 +1020,18 @@ bool check_bounds_kernel(int n, const double* xld, const double* xud) thrust::device_ptr dev_xld = thrust::device_pointer_cast(const_cast(xld)); // TODO: how to avoid this temp vec? - thrust::device_ptr dv_ptr = thrust::device_malloc(n*sizeof(double)); + thrust::device_ptr dv_ptr = thrust::device_malloc(n * sizeof(double)); - thrust::transform(thrust::device, - dev_xud, dev_xud+n, - dev_xld, dv_ptr, - minus_op); + thrust::transform(thrust::device, dev_xud, dev_xud + n, dev_xld, dv_ptr, minus_op); int res_offset = thrust::min_element(thrust::device, dv_ptr, dv_ptr + n) - dv_ptr; double ret_v = *(dv_ptr + res_offset); - + bool bval = (ret_v > 0.0) ? 1 : 0; thrust::device_free(dv_ptr); - - if(false == bval) - return false; + + if(false == bval) return false; return true; } @@ -1152,25 +1039,21 @@ bool check_bounds_kernel(int n, const double* xld, const double* xud) /** @brief compute max{a\in(0,1]| x+ad >=(1-tau)x} */ double min_frac_to_bds_kernel(int n, const double* xd, const double* dd, double tau) { - thrust::device_ptr dv_ptr = thrust::device_malloc(n*sizeof(double)); + thrust::device_ptr dv_ptr = thrust::device_malloc(n * sizeof(double)); double* d_ptr = thrust::raw_pointer_cast(dv_ptr); // set values hiop::hip::fraction_to_the_boundry_kernel(n, d_ptr, xd, dd, tau); - int res_offset = thrust::min_element(thrust::device, dv_ptr, dv_ptr+n) - dv_ptr; + int res_offset = thrust::min_element(thrust::device, dv_ptr, dv_ptr + n) - dv_ptr; double alpha = *(dv_ptr + res_offset); thrust::device_free(dv_ptr); - + return alpha; } /** @brief max{a\in(0,1]| x+ad >=(1-tau)x} with pattern id */ -double min_frac_to_bds_w_pattern_kernel(int n, - const double* xd, - const double* dd, - const double* id, - double tau) +double min_frac_to_bds_w_pattern_kernel(int n, const double* xd, const double* dd, const double* id, double tau) { // TODO: how to avoid this temp vec? thrust::device_vector v_temp(n); @@ -1201,48 +1084,48 @@ bool match_pattern_kernel(int n, const double* xd, const double* id) /** @brief Checks if all x[i] = 0 */ bool is_zero_kernel(int n, double* xd) { - // wrap raw pointer with a device_ptr + // wrap raw pointer with a device_ptr thrust_iszero iszero_op; thrust::device_ptr dev_v = thrust::device_pointer_cast(xd); - return thrust::all_of(thrust::device, dev_v, dev_v+n, iszero_op); + return thrust::all_of(thrust::device, dev_v, dev_v + n, iszero_op); } /** @brief Checks if any x[i] = nan */ bool isnan_kernel(int n, double* xd) { - // wrap raw pointer with a device_ptr + // wrap raw pointer with a device_ptr thrust_isnan isnan_op; thrust::device_ptr dev_v = thrust::device_pointer_cast(xd); - return thrust::any_of(thrust::device, dev_v, dev_v+n, isnan_op); + return thrust::any_of(thrust::device, dev_v, dev_v + n, isnan_op); } /** @brief Checks if any x[i] = inf */ bool isinf_kernel(int n, double* xd) { - // wrap raw pointer with a device_ptr + // wrap raw pointer with a device_ptr thrust_isinf isinf_op; thrust::device_ptr dev_v = thrust::device_pointer_cast(xd); - return thrust::any_of(thrust::device, dev_v, dev_v+n, isinf_op); + return thrust::any_of(thrust::device, dev_v, dev_v + n, isinf_op); } /** @brief Checks if all x[i] != inf */ bool isfinite_kernel(int n, double* xd) { - // wrap raw pointer with a device_ptr + // wrap raw pointer with a device_ptr thrust_isfinite isfinite_op; thrust::device_ptr dev_v = thrust::device_pointer_cast(xd); - return thrust::all_of(thrust::device, dev_v, dev_v+n, isfinite_op); + return thrust::all_of(thrust::device, dev_v, dev_v + n, isfinite_op); } /// @brief get number of values that are less than the given value 'val'. int num_of_elem_less_than_kernel(int n, double* xd, double val) { thrust::device_ptr dev_v = thrust::device_pointer_cast(xd); - int rval = thrust::transform_reduce(thrust::device, dev_v, dev_v+n, thrust_less(val), (int) 0, thrust::plus()); + int rval = thrust::transform_reduce(thrust::device, dev_v, dev_v + n, thrust_less(val), (int)0, thrust::plus()); return rval; } @@ -1250,50 +1133,43 @@ int num_of_elem_less_than_kernel(int n, double* xd, double val) int num_of_elem_absless_than_kernel(int n, double* xd, double val) { thrust::device_ptr dev_v = thrust::device_pointer_cast(xd); - int rval = thrust::transform_reduce(thrust::device, dev_v, dev_v+n, thrust_abs_less(val), (int) 0, thrust::plus()); + int rval = thrust::transform_reduce(thrust::device, dev_v, dev_v + n, thrust_abs_less(val), (int)0, thrust::plus()); return rval; } /// @brief Copy the entries in 'dd' where corresponding 'ix' is nonzero, to vd starting at start_index_in_dest. -void copyToStartingAt_w_pattern_kernel(int n_src, +void copyToStartingAt_w_pattern_kernel(int n_src, int n_dest, int start_index_in_dest, - int* nnz_cumsum, - double *vd, + int* nnz_cumsum, + double* vd, const double* dd) { - int num_blocks = (n_src+block_size-1)/block_size; - copyToStartingAt_w_pattern_hip<<>>(n_src, - n_dest, - start_index_in_dest, - nnz_cumsum, - vd, - dd); + int num_blocks = (n_src + block_size - 1) / block_size; + copyToStartingAt_w_pattern_hip<<>>(n_src, n_dest, start_index_in_dest, nnz_cumsum, vd, dd); } - - /// for hiopVectorIntHip /** - * @brief Set the vector entries to be a linear space of starting at i0 containing evenly + * @brief Set the vector entries to be a linear space of starting at i0 containing evenly * incremented integers up to i0+(n-1)di, when n is the length of this vector */ void set_to_linspace_kernel(int sz, int* buf, int i0, int di) { - int num_blocks = (sz+block_size-1)/block_size; - set_to_linspace_hip<<>>(sz, buf, i0, di); + int num_blocks = (sz + block_size - 1) / block_size; + set_to_linspace_hip<<>>(sz, buf, i0, di); } /** @brief compute cusum from the given pattern*/ void compute_cusum_kernel(int sz, int* buf, const double* id) { - int num_blocks = (sz+block_size-1)/block_size; - compute_cusum_hip<<>>(sz, buf, id); + int num_blocks = (sz + block_size - 1) / block_size; + compute_cusum_hip<<>>(sz, buf, id); thrust::device_ptr dev_v = thrust::device_pointer_cast(buf); - thrust::inclusive_scan(dev_v, dev_v + sz, dev_v); // in-place scan + thrust::inclusive_scan(dev_v, dev_v + sz, dev_v); // in-place scan } -} +} // namespace hip -} +} // namespace hiop diff --git a/src/LinAlg/VectorHipKernels.hpp b/src/LinAlg/VectorHipKernels.hpp index b1c0b8022..2766d76be 100644 --- a/src/LinAlg/VectorHipKernels.hpp +++ b/src/LinAlg/VectorHipKernels.hpp @@ -2,47 +2,47 @@ // Produced at the Lawrence Livermore National Laboratory (LLNL). // LLNL-CODE-742473. All rights reserved. // -// This file is part of HiOp. For details, see https://github.com/LLNL/hiop. HiOp -// is released under the BSD 3-clause license (https://opensource.org/licenses/BSD-3-Clause). +// This file is part of HiOp. For details, see https://github.com/LLNL/hiop. HiOp +// is released under the BSD 3-clause license (https://opensource.org/licenses/BSD-3-Clause). // Please also read "Additional BSD Notice" below. // -// Redistribution and use in source and binary forms, with or without modification, +// Redistribution and use in source and binary forms, with or without modification, // are permitted provided that the following conditions are met: -// i. Redistributions of source code must retain the above copyright notice, this list +// i. Redistributions of source code must retain the above copyright notice, this list // of conditions and the disclaimer below. -// ii. Redistributions in binary form must reproduce the above copyright notice, -// this list of conditions and the disclaimer (as noted below) in the documentation and/or +// ii. Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the disclaimer (as noted below) in the documentation and/or // other materials provided with the distribution. -// iii. Neither the name of the LLNS/LLNL nor the names of its contributors may be used to -// endorse or promote products derived from this software without specific prior written +// iii. Neither the name of the LLNS/LLNL nor the names of its contributors may be used to +// endorse or promote products derived from this software without specific prior written // permission. // -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES -// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT -// SHALL LAWRENCE LIVERMORE NATIONAL SECURITY, LLC, THE U.S. DEPARTMENT OF ENERGY OR -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS -// OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED -// AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT +// SHALL LAWRENCE LIVERMORE NATIONAL SECURITY, LLC, THE U.S. DEPARTMENT OF ENERGY OR +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +// OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED +// AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, // EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Additional BSD Notice -// 1. This notice is required to be provided under our contract with the U.S. Department -// of Energy (DOE). This work was produced at Lawrence Livermore National Laboratory under +// 1. This notice is required to be provided under our contract with the U.S. Department +// of Energy (DOE). This work was produced at Lawrence Livermore National Laboratory under // Contract No. DE-AC52-07NA27344 with the DOE. -// 2. Neither the United States Government nor Lawrence Livermore National Security, LLC -// nor any of their employees, makes any warranty, express or implied, or assumes any -// liability or responsibility for the accuracy, completeness, or usefulness of any +// 2. Neither the United States Government nor Lawrence Livermore National Security, LLC +// nor any of their employees, makes any warranty, express or implied, or assumes any +// liability or responsibility for the accuracy, completeness, or usefulness of any // information, apparatus, product, or process disclosed, or represents that its use would // not infringe privately-owned rights. -// 3. Also, reference herein to any specific commercial products, process, or services by -// trade name, trademark, manufacturer or otherwise does not necessarily constitute or -// imply its endorsement, recommendation, or favoring by the United States Government or -// Lawrence Livermore National Security, LLC. The views and opinions of authors expressed -// herein do not necessarily state or reflect those of the United States Government or -// Lawrence Livermore National Security, LLC, and shall not be used for advertising or +// 3. Also, reference herein to any specific commercial products, process, or services by +// trade name, trademark, manufacturer or otherwise does not necessarily constitute or +// imply its endorsement, recommendation, or favoring by the United States Government or +// Lawrence Livermore National Security, LLC. The views and opinions of authors expressed +// herein do not necessarily state or reflect those of the United States Government or +// Lawrence Livermore National Security, LLC, and shall not be used for advertising or // product endorsement purposes. /** @@ -57,66 +57,37 @@ #include #include "hiopInterface.hpp" - namespace hiop { namespace hip { -/// @brief Copy from src the elements specified by the indices in id. -void copy_from_index_kernel(int n_local, - double* yd, - const double* src, - const int* id); +/// @brief Copy from src the elements specified by the indices in id. +void copy_from_index_kernel(int n_local, double* yd, const double* src, const int* id); /** @brief Set y[i] = min(y[i],c), for i=[0,n_local-1] */ -void component_min_kernel(int n_local, - double* yd, - double c); +void component_min_kernel(int n_local, double* yd, double c); /** @brief Set y[i] = min(y[i],x[i]), for i=[0,n_local-1] */ -void component_min_kernel(int n_local, - double* yd, - const double* xd); +void component_min_kernel(int n_local, double* yd, const double* xd); /** @brief Set y[i] = max(y[i],c), for i=[0,n_local-1] */ -void component_max_kernel(int n_local, - double* yd, - double c); +void component_max_kernel(int n_local, double* yd, double c); /** @brief Set y[i] = max(y[i],x[i]), for i=[0,n_local-1] */ -void component_max_kernel(int n_local, - double* yd, - const double* xd); +void component_max_kernel(int n_local, double* yd, const double* xd); /// @brief Performs axpy, this += alpha*x, on the indexes in this specified by i. -void axpy_w_map_kernel(int n_local, - double* yd, - const double* xd, - const int* id, - double alpha); +void axpy_w_map_kernel(int n_local, double* yd, const double* xd, const int* id, double alpha); /** @brief this[i] += alpha*x[i]*z[i] forall i */ -void axzpy_kernel(int n_local, - double* yd, - const double* xd, - const double* zd, - double alpha); +void axzpy_kernel(int n_local, double* yd, const double* xd, const double* zd, double alpha); /** @brief this[i] += alpha*x[i]/z[i] forall i */ -void axdzpy_kernel(int n_local, - double* yd, - const double* xd, - const double* zd, - double alpha); +void axdzpy_kernel(int n_local, double* yd, const double* xd, const double* zd, double alpha); /** @brief this[i] += alpha*x[i]/z[i] forall i with pattern selection */ -void axdzpy_w_pattern_kernel(int n_local, - double* yd, - const double* xd, - const double* zd, - const double* id, - double alpha); +void axdzpy_w_pattern_kernel(int n_local, double* yd, const double* xd, const double* zd, const double* id, double alpha); /** @brief this[i] += c forall i */ void add_constant_kernel(int n_local, double* yd, double c); @@ -128,48 +99,25 @@ void add_constant_w_pattern_kernel(int n_local, double* yd, const double* id, do void invert_kernel(int n_local, double* yd); /** @brief y[i] += alpha*1/x[i] + y[i] forall i with pattern selection */ -void adxpy_w_pattern_kernel(int n_local, - double* yd, - const double* xd, - const double* ld, - double alpha); +void adxpy_w_pattern_kernel(int n_local, double* yd, const double* xd, const double* ld, double alpha); /** @brief y[i] = y[i]/x[i] c forall i with pattern selection */ -void component_div_w_pattern_kernel(int n_local, - double* yd, - const double* xd, - const double* id); +void component_div_w_pattern_kernel(int n_local, double* yd, const double* xd, const double* id); /** @brief Linear damping term */ -void set_linear_damping_term_kernel(int n_local, - double* yd, - const double* vd, - const double* ld, - const double* rd); - -/** -* @brief Performs `this[i] = alpha*this[i] + sign*ct` where sign=1 when EXACTLY one of -* ixleft[i] and ixright[i] is 1.0 and sign=0 otherwise. -*/ -void add_linear_damping_term_kernel(int n_local, - double* yd, - const double* ixl, - const double* ixr, - double alpha, - double ct); +void set_linear_damping_term_kernel(int n_local, double* yd, const double* vd, const double* ld, const double* rd); + +/** + * @brief Performs `this[i] = alpha*this[i] + sign*ct` where sign=1 when EXACTLY one of + * ixleft[i] and ixright[i] is 1.0 and sign=0 otherwise. + */ +void add_linear_damping_term_kernel(int n_local, double* yd, const double* ixl, const double* ixr, double alpha, double ct); /** @brief y[i] = 1.0 if x[i] is positive and id[i] = 1.0, otherwise y[i] = 0 */ -void is_posive_w_pattern_kernel(int n_local, - double* yd, - const double* xd, - const double* id); +void is_posive_w_pattern_kernel(int n_local, double* yd, const double* xd, const double* id); /** @brief y[i] = x[i] if id[i] = 1.0, otherwise y[i] = val_else */ -void set_val_w_pattern_kernel(int n_local, - double* yd, - const double* xd, - const double* id, - double val_else); +void set_val_w_pattern_kernel(int n_local, double* yd, const double* xd, const double* id, double val_else); /** @brief Project solution into bounds */ void project_into_bounds_kernel(int n_local, @@ -183,11 +131,7 @@ void project_into_bounds_kernel(int n_local, double small_real); /** @brief max{a\in(0,1]| x+ad >=(1-tau)x} */ -void fraction_to_the_boundry_kernel(int n_local, - double* yd, - const double* xd, - const double* dd, - double tau); +void fraction_to_the_boundry_kernel(int n_local, double* yd, const double* xd, const double* dd, double tau); /** @brief max{a\in(0,1]| x+ad >=(1-tau)x} with pattern select */ void fraction_to_the_boundry_w_pattern_kernel(int n_local, @@ -204,25 +148,20 @@ void select_pattern_kernel(int n_local, double* yd, const double* id); void component_match_pattern_kernel(int n_local, int* yd, const double* xd, const double* id); /** @brief Adjusts duals. */ -void adjustDuals_plh_kernel(int n_local, - double* yd, - const double* xd, - const double* id, - double mu, - double kappa); +void adjustDuals_plh_kernel(int n_local, double* yd, const double* xd, const double* id, double mu, double kappa); /// @brief set int array 'arr', starting at `start` and ending at `end`, to the values in `arr_src` from 'start_src` -void set_array_from_to_kernel(int n_local, - hiop::hiopInterfaceBase::NonlinearityType* arr, - int start, - int length, +void set_array_from_to_kernel(int n_local, + hiop::hiopInterfaceBase::NonlinearityType* arr, + int start, + int length, const hiop::hiopInterfaceBase::NonlinearityType* arr_src, int start_src); /// @brief set int array 'arr', starting at `start` and ending at `end`, to the values in `arr_src` from 'start_src` -void set_array_from_to_kernel(int n_local, - hiop::hiopInterfaceBase::NonlinearityType* arr, - int start, +void set_array_from_to_kernel(int n_local, + hiop::hiopInterfaceBase::NonlinearityType* arr, + int start, int length, hiop::hiopInterfaceBase::NonlinearityType arr_src); @@ -251,12 +190,7 @@ double log_barr_obj_kernel(int n, double* d1, const double* id); /** @brief compute sum(d1[i]) */ double thrust_sum_kernel(int n, double* d1); /** @brief Linear damping term */ -double linear_damping_term_kernel(int n, - const double* vd, - const double* ld, - const double* rd, - double mu, - double kappa_d); +double linear_damping_term_kernel(int n, const double* vd, const double* ld, const double* rd, double mu, double kappa_d); /** @brief compute min(d1) */ double min_local_kernel(int n, double* d1); /** @brief Checks if selected elements of `d1` are positive */ @@ -268,11 +202,7 @@ bool check_bounds_kernel(int n, const double* xld, const double* xud); /** @brief compute max{a\in(0,1]| x+ad >=(1-tau)x} */ double min_frac_to_bds_kernel(int n, const double* xd, const double* dd, double tau); /** @brief max{a\in(0,1]| x+ad >=(1-tau)x} with pattern id */ -double min_frac_to_bds_w_pattern_kernel(int n, - const double* xd, - const double* dd, - const double* id, - double tau); +double min_frac_to_bds_w_pattern_kernel(int n, const double* xd, const double* dd, const double* id, double tau); /** @brief Checks if `xd` matches nonzero pattern of `id`. */ bool match_pattern_kernel(int n, const double* xd, const double* id); /** @brief Checks if all x[i] = 0 */ @@ -289,16 +219,16 @@ int num_of_elem_less_than_kernel(int n, double* xd, double val); int num_of_elem_absless_than_kernel(int n, double* xd, double val); /// @brief Copy the entries in 'dd' where corresponding 'ix' is nonzero, to vd starting at start_index_in_dest. -void copyToStartingAt_w_pattern_kernel(int n_src, +void copyToStartingAt_w_pattern_kernel(int n_src, int n_dest, int start_index_in_dest, - int* nnz_cumsum, - double *vd, + int* nnz_cumsum, + double* vd, const double* dd); /// for hiopVectorIntHip /** - * @brief Set the vector entries to be a linear space of starting at i0 containing evenly + * @brief Set the vector entries to be a linear space of starting at i0 containing evenly * incremented integers up to i0+(n-1)di, when n is the length of this vector * */ @@ -306,7 +236,6 @@ void set_to_linspace_kernel(int sz, int* buf, int i0, int di); /** @brief compute cusum from the given pattern*/ void compute_cusum_kernel(int sz, int* buf, const double* id); -} -} +} // namespace hip +} // namespace hiop #endif - diff --git a/src/LinAlg/hiopKrylovSolver.cpp b/src/LinAlg/hiopKrylovSolver.cpp index 25f8fe2c9..cebcbefb7 100644 --- a/src/LinAlg/hiopKrylovSolver.cpp +++ b/src/LinAlg/hiopKrylovSolver.cpp @@ -47,16 +47,16 @@ // product endorsement purposes. /* -* @file hiopKrylovSolver.cpp -* @ingroup LinearSolvers -* @author Nai-Yuan Chiang , LLNL -* @author Cosmin G. Petra , LLNL -*/ + * @file hiopKrylovSolver.cpp + * @ingroup LinearSolvers + * @author Nai-Yuan Chiang , LLNL + * @author Cosmin G. Petra , LLNL + */ /** - * Implementation of Krylov solvers + * Implementation of Krylov solvers */ - + #include #include "hiopKrylovSolver.hpp" @@ -68,16 +68,17 @@ #include -namespace hiop { +namespace hiop +{ - /* - * class hiopKrylovSolver - */ - hiopKrylovSolver::hiopKrylovSolver(int n, - hiopLinearOperator* A_opr, - hiopLinearOperator* Mleft_opr, - hiopLinearOperator* Mright_opr, - const hiopVector* x0) +/* + * class hiopKrylovSolver + */ +hiopKrylovSolver::hiopKrylovSolver(int n, + hiopLinearOperator* A_opr, + hiopLinearOperator* Mleft_opr, + hiopLinearOperator* Mright_opr, + const hiopVector* x0) : tol_{1e-9}, maxit_{8}, iter_{-1.}, @@ -85,38 +86,38 @@ namespace hiop { abs_resid_{-1.}, rel_resid_{-1.}, n_{n}, - A_opr_{A_opr}, + A_opr_{A_opr}, ML_opr_{Mleft_opr}, MR_opr_{Mright_opr}, x0_{nullptr}, b_{nullptr} - { - if(x0) { - x0_ = x0->new_copy(); - } - } - - hiopKrylovSolver::~hiopKrylovSolver() - { - delete x0_; - delete b_; +{ + if(x0) { + x0_ = x0->new_copy(); } +} - void hiopKrylovSolver::set_x0(const double xval) - { - if(x0_) { - x0_->setToConstant(xval); - } +hiopKrylovSolver::~hiopKrylovSolver() +{ + delete x0_; + delete b_; +} + +void hiopKrylovSolver::set_x0(const double xval) +{ + if(x0_) { + x0_->setToConstant(xval); } - - /* - * class hiopPCGSolver - */ - hiopPCGSolver::hiopPCGSolver(int n, - hiopLinearOperator* A_opr, - hiopLinearOperator* Mleft_opr, - hiopLinearOperator* Mright_opr, - const hiopVector* x0) +} + +/* + * class hiopPCGSolver + */ +hiopPCGSolver::hiopPCGSolver(int n, + hiopLinearOperator* A_opr, + hiopLinearOperator* Mleft_opr, + hiopLinearOperator* Mright_opr, + const hiopVector* x0) : hiopKrylovSolver(n, A_opr, Mleft_opr, Mright_opr, x0), xmin_{nullptr}, res_{nullptr}, @@ -124,18 +125,17 @@ namespace hiop { zk_{nullptr}, pk_{nullptr}, qk_{nullptr} - { - } +{} - hiopPCGSolver::~hiopPCGSolver() - { - delete xmin_; - delete res_; - delete yk_; - delete zk_; - delete pk_; - delete qk_; - } +hiopPCGSolver::~hiopPCGSolver() +{ + delete xmin_; + delete res_; + delete yk_; + delete zk_; + delete pk_; + delete qk_; +} bool hiopPCGSolver::solve(hiopIterate* xsol, const hiopResidual* bresid) { @@ -157,17 +157,17 @@ bool hiopPCGSolver::solve(hiopVector* b) return true; } - if(xmin_==nullptr) { - xmin_ = b->alloc_clone(); //iterate which has minimal residual so far - res_ = b->alloc_clone(); //minimal residual iterate - yk_ = b->alloc_clone(); //work vectors - zk_ = b->alloc_clone(); //work vectors - pk_ = b->alloc_clone(); //work vectors - qk_ = b->alloc_clone(); //work vectors + if(xmin_ == nullptr) { + xmin_ = b->alloc_clone(); // iterate which has minimal residual so far + res_ = b->alloc_clone(); // minimal residual iterate + yk_ = b->alloc_clone(); // work vectors + zk_ = b->alloc_clone(); // work vectors + pk_ = b->alloc_clone(); // work vectors + qk_ = b->alloc_clone(); // work vectors } if(nullptr == x0_) { - x0_ = b->alloc_clone(); //work vectors + x0_ = b->alloc_clone(); // work vectors x0_->setToZero(); } @@ -176,37 +176,37 @@ bool hiopPCGSolver::solve(hiopVector* b) ////////////////////////////////////////////////////////////////// hiopVector* xk_ = x0_; - + flag_ = 1; - index_type imin = 0; // iteration at which minimal residual is achieved - double tolb = tol_ * n2b; // relative tolerance + index_type imin = 0; // iteration at which minimal residual is achieved + double tolb = tol_ * n2b; // relative tolerance xmin_->copyFrom(*xk_); // compute residual: b-KKT*xk A_opr_->times_vec(*res_, *xk_); res_->axpy(-1.0, *b); - res_->scale(-1.0); + res_->scale(-1.0); double normr = res_->twonorm(); // Norm of residual abs_resid_ = normr; // initial guess is good enough - if(normr <= tolb) { + if(normr <= tolb) { b->copyFrom(*xk_); flag_ = 0; iter_ = 0.; rel_resid_ = normr / n2b; return true; } - + double normrmin = normr; // Two-norm of minimum residual double rho = 1.0; size_type stagsteps = 0; // stagnation of the method size_type moresteps = 0; double eps = std::numeric_limits::epsilon(); - - size_type maxmsteps = 100;//fmin(5, n_-maxit_); - maxmsteps = 100;//fmin(floor(n_/50), maxmsteps); + + size_type maxmsteps = 100; // fmin(5, n_-maxit_); + maxmsteps = 100; // fmin(floor(n_/50), maxmsteps); size_type maxstagsteps = 3; // main loop for PCG @@ -215,7 +215,6 @@ bool hiopPCGSolver::solve(hiopVector* b) double pq; index_type ii = 0; for(; ii < maxit_; ++ii) { - if(ML_opr_) { ML_opr_->times_vec(*yk_, *res_); } else { @@ -226,11 +225,11 @@ bool hiopPCGSolver::solve(hiopVector* b) } else { zk_->copyFrom(*yk_); } - + rho1 = rho; rho = res_->dotProductWith(*zk_); - //check for stagnation + // check for stagnation if((rho == 0) || std::abs(rho) > 1E+20) { flag_ = 4; iter_ = ii + 1; @@ -247,12 +246,12 @@ bool hiopPCGSolver::solve(hiopVector* b) break; } pk_->scale(beta); - pk_->axpy(1.0, *zk_); + pk_->axpy(1.0, *zk_); } A_opr_->times_vec(*qk_, *pk_); pq = pk_->dotProductWith(*qk_); - + if(pq <= 0.0 || std::abs(pq) > 1E+20) { flag_ = 4; iter_ = ii + 1; @@ -265,9 +264,9 @@ bool hiopPCGSolver::solve(hiopVector* b) iter_ = ii + 1; break; } - + // Check for stagnation of the method - if(pk_->twonorm()*std::abs(alpha) < eps * xk_->twonorm()) { + if(pk_->twonorm() * std::abs(alpha) < eps * xk_->twonorm()) { stagsteps++; } else { stagsteps = 0; @@ -276,19 +275,19 @@ bool hiopPCGSolver::solve(hiopVector* b) // new PCG iter xk_->axpy(alpha, *pk_); res_->axpy(-alpha, *qk_); - + normr = res_->twonorm(); abs_resid_ = normr; // check for convergence if(normr <= tolb || stagsteps >= maxstagsteps || moresteps) { // update residual: b-KKT*xk - A_opr_->times_vec(*res_,*xk_); - res_->axpy(-1.0,*b); - res_->scale(-1.0); + A_opr_->times_vec(*res_, *xk_); + res_->axpy(-1.0, *b); + res_->scale(-1.0); abs_resid_ = res_->twonorm(); - if(abs_resid_ <= tolb) { + if(abs_resid_ <= tolb) { b->copyFrom(*xk_); flag_ = 0; iter_ = ii + 1; @@ -317,21 +316,21 @@ bool hiopPCGSolver::solve(hiopVector* b) iter_ = ii + 1; break; } - } // end of for(; ii < maxit_; ++ii) + } // end of for(; ii < maxit_; ++ii) // returned solution is first with minimal residual if(flag_ == 0) { - rel_resid_ = abs_resid_/n2b; - ss_info_ << "PCG converged: actual normResid=" << abs_resid_ << " relResid=" << rel_resid_ - << " iter=" << iter_ << std::endl; - b->copyFrom(*xk_); + rel_resid_ = abs_resid_ / n2b; + ss_info_ << "PCG converged: actual normResid=" << abs_resid_ << " relResid=" << rel_resid_ << " iter=" << iter_ + << std::endl; + b->copyFrom(*xk_); } else { // update residual: b-KKT*xk A_opr_->times_vec(*res_, *xmin_); res_->axpy(-1.0, *b); - res_->scale(-1.0); + res_->scale(-1.0); double normr_comp = res_->twonorm(); - + if(normr_comp <= abs_resid_) { b->copyFrom(*xmin_); iter_ = imin + 1; @@ -344,24 +343,23 @@ bool hiopPCGSolver::solve(hiopVector* b) rel_resid_ = abs_resid_ / n2b; } - ss_info_ << "PCG did NOT converged after " << ii+1 << " iters. The solution from iter " - << imin << " was returned." << std::endl; - ss_info_ << "\t - Error code " << flag_ << "\n\t - Act res=" << abs_resid_ << "n\t - Rel res=" - << rel_resid_ << std::endl; + ss_info_ << "PCG did NOT converged after " << ii + 1 << " iters. The solution from iter " << imin << " was returned." + << std::endl; + ss_info_ << "\t - Error code " << flag_ << "\n\t - Act res=" << abs_resid_ << "n\t - Rel res=" << rel_resid_ + << std::endl; return false; } return true; } - - /* - * class hiopBiCGStabSolver - */ - hiopBiCGStabSolver::hiopBiCGStabSolver(int n, - hiopLinearOperator* A_opr, - hiopLinearOperator* Mleft_opr, - hiopLinearOperator* Mright_opr, - const hiopVector* x0) +/* + * class hiopBiCGStabSolver + */ +hiopBiCGStabSolver::hiopBiCGStabSolver(int n, + hiopLinearOperator* A_opr, + hiopLinearOperator* Mleft_opr, + hiopLinearOperator* Mright_opr, + const hiopVector* x0) : hiopKrylovSolver(n, A_opr, Mleft_opr, Mright_opr, x0), xmin_{nullptr}, res_{nullptr}, @@ -371,20 +369,19 @@ bool hiopPCGSolver::solve(hiopVector* b) sk_{nullptr}, t_{nullptr}, rt_{nullptr} - { - } +{} - hiopBiCGStabSolver::~hiopBiCGStabSolver() - { - delete xmin_; - delete res_; - delete pk_; - delete ph_; - delete v_; - delete sk_; - delete t_; - delete rt_; - } +hiopBiCGStabSolver::~hiopBiCGStabSolver() +{ + delete xmin_; + delete res_; + delete pk_; + delete ph_; + delete v_; + delete sk_; + delete t_; + delete rt_; +} bool hiopBiCGStabSolver::solve(hiopIterate* xsol, const hiopResidual* bresid) { @@ -406,25 +403,25 @@ bool hiopBiCGStabSolver::solve(hiopVector* b) iter_ = 0.; rel_resid_ = 0; abs_resid_ = 0; - ss_info_ << "BiCGStab converged: actual normResid=" << abs_resid_ << " relResid=" << rel_resid_ - << " iter=" << iter_ << std::endl; + ss_info_ << "BiCGStab converged: actual normResid=" << abs_resid_ << " relResid=" << rel_resid_ << " iter=" << iter_ + << std::endl; return true; } - if(xmin_==nullptr) { - xmin_ = b->new_copy(); //iterate which has minimal residual so far + if(xmin_ == nullptr) { + xmin_ = b->new_copy(); // iterate which has minimal residual so far xmin_->setToZero(); - res_ = xmin_->new_copy(); //minimal residual iterate - pk_ = xmin_->new_copy(); //work vectors - ph_ = xmin_->new_copy(); //work vectors - v_ = xmin_->new_copy(); //work vectors - sk_ = xmin_->new_copy(); //work vectors - t_ = xmin_->new_copy(); //work vectors - rt_ = xmin_->new_copy(); //work vectors + res_ = xmin_->new_copy(); // minimal residual iterate + pk_ = xmin_->new_copy(); // work vectors + ph_ = xmin_->new_copy(); // work vectors + v_ = xmin_->new_copy(); // work vectors + sk_ = xmin_->new_copy(); // work vectors + t_ = xmin_->new_copy(); // work vectors + rt_ = xmin_->new_copy(); // work vectors } if(nullptr == x0_) { - x0_ = b->alloc_clone(); //work vectors + x0_ = b->alloc_clone(); // work vectors x0_->setToZero(); } @@ -434,30 +431,30 @@ bool hiopBiCGStabSolver::solve(hiopVector* b) hiopVector* xk_ = x0_; flag_ = 1; - double imin = 0.; // iteration at which minimal residual is achieved - double tolb = tol_ * n2b; // relative tolerance + double imin = 0.; // iteration at which minimal residual is achieved + double tolb = tol_ * n2b; // relative tolerance xmin_->copyFrom(*xk_); // compute residual: b-KKT*xk A_opr_->times_vec(*res_, *xk_); res_->axpy(-1.0, *b); - res_->scale(-1.0); + res_->scale(-1.0); double normr = res_->twonorm(); // Norm of residual abs_resid_ = normr; - + // initial guess is good enough - if(normr <= tolb) { + if(normr <= tolb) { b->copyFrom(*xk_); flag_ = 0; iter_ = 0.; rel_resid_ = normr / n2b; abs_resid_ = normr; - ss_info_ << "BiCGStab converged: actual normResid=" << abs_resid_ << " relResid=" << rel_resid_ - << " iter=" << iter_ << std::endl; + ss_info_ << "BiCGStab converged: actual normResid=" << abs_resid_ << " relResid=" << rel_resid_ << " iter=" << iter_ + << std::endl; return true; } - + rt_->copyFrom(*res_); double normrmin = normr; // Two-norm of minimum residual double rho = 1.0; @@ -465,9 +462,9 @@ bool hiopBiCGStabSolver::solve(hiopVector* b) size_type stagsteps = 0; // stagnation of the method size_type moresteps = 0; double eps = std::numeric_limits::epsilon(); - - size_type maxmsteps = 100;//fmin(5, n_-maxit_); - maxmsteps = 100;//fmin(floor(n_/50), maxmsteps); + + size_type maxmsteps = 100; // fmin(5, n_-maxit_); + maxmsteps = 100; // fmin(floor(n_/50), maxmsteps); size_type maxstagsteps = 3; // main loop for BICGStab @@ -475,11 +472,10 @@ bool hiopBiCGStabSolver::solve(hiopVector* b) double rho1; index_type ii = 0; for(; ii < maxit_; ++ii) { - rho1 = rho; rho = rt_->dotProductWith(*res_); - //check for stagnation + // check for stagnation if((rho == 0) || std::abs(rho) > 1E+40) { flag_ = 4; iter_ = ii + 1 - 0.5; @@ -495,9 +491,9 @@ bool hiopBiCGStabSolver::solve(hiopVector* b) iter_ = ii + 1 - 0.5; break; } - pk_->axpy(-omega, *v_); + pk_->axpy(-omega, *v_); pk_->scale(beta); - pk_->axpy(1.0, *res_); + pk_->axpy(1.0, *res_); } if(ML_opr_) { @@ -510,9 +506,9 @@ bool hiopBiCGStabSolver::solve(hiopVector* b) } A_opr_->times_vec(*v_, *ph_); - + double rtv = rt_->dotProductWith(*v_); - + if(rtv == 0.0 || std::abs(rtv) > 1E+40) { flag_ = 4; iter_ = ii + 1 - 0.5; @@ -526,9 +522,9 @@ bool hiopBiCGStabSolver::solve(hiopVector* b) iter_ = ii + 1 - 0.5; break; } - + // Check for stagnation of the method - if(ph_->twonorm()*std::abs(alpha) < eps * xk_->twonorm()) { + if(ph_->twonorm() * std::abs(alpha) < eps * xk_->twonorm()) { stagsteps++; } else { stagsteps = 0; @@ -538,19 +534,19 @@ bool hiopBiCGStabSolver::solve(hiopVector* b) xk_->axpy(alpha, *ph_); sk_->copyFrom(*res_); sk_->axpy(-alpha, *v_); - + normr = sk_->twonorm(); abs_resid_ = normr; // check for convergence if(normr <= tolb || stagsteps >= maxstagsteps || moresteps) { // update residual: b-KKT*xk - A_opr_->times_vec(*sk_,*xk_); - sk_->axpy(-1.0,*b); - sk_->scale(-1.0); + A_opr_->times_vec(*sk_, *xk_); + sk_->axpy(-1.0, *b); + sk_->scale(-1.0); abs_resid_ = sk_->twonorm(); - - if(abs_resid_ <= tolb) { + + if(abs_resid_ <= tolb) { flag_ = 0; iter_ = ii + 1 - 0.5; break; @@ -592,7 +588,7 @@ bool hiopBiCGStabSolver::solve(hiopVector* b) A_opr_->times_vec(*t_, *ph_); double tt = t_->dotProductWith(*t_); - + if(tt == 0.0 || std::abs(tt) > 1E+20) { iter_ = ii + 1; flag_ = 4; @@ -607,7 +603,7 @@ bool hiopBiCGStabSolver::solve(hiopVector* b) break; } - if(ph_->twonorm()*std::abs(omega) < eps * xk_->twonorm()) { + if(ph_->twonorm() * std::abs(omega) < eps * xk_->twonorm()) { stagsteps++; } else { stagsteps = 0; @@ -617,19 +613,19 @@ bool hiopBiCGStabSolver::solve(hiopVector* b) xk_->axpy(omega, *ph_); res_->copyFrom(*sk_); res_->axpy(-omega, *t_); - + normr = res_->twonorm(); abs_resid_ = normr; // check for convergence if(normr <= tolb || stagsteps >= maxstagsteps || moresteps) { // update residual: b-KKT*xk - A_opr_->times_vec(*res_,*xk_); - res_->axpy(-1.0,*b); - res_->scale(-1.0); + A_opr_->times_vec(*res_, *xk_); + res_->axpy(-1.0, *b); + res_->scale(-1.0); abs_resid_ = res_->twonorm(); - if(abs_resid_ <= tolb) { + if(abs_resid_ <= tolb) { flag_ = 0; iter_ = ii + 1; break; @@ -661,21 +657,21 @@ bool hiopBiCGStabSolver::solve(hiopVector* b) break; } - } // end of for(; ii < maxit_; ++ii) + } // end of for(; ii < maxit_; ++ii) // returned solution is first with minimal residual if(flag_ == 0) { - rel_resid_ = abs_resid_/n2b; - ss_info_ << "BiCGStab converged: actual normResid=" << abs_resid_ << " relResid=" << rel_resid_ - << " iter=" << iter_ << std::endl; - b->copyFrom(*xk_); + rel_resid_ = abs_resid_ / n2b; + ss_info_ << "BiCGStab converged: actual normResid=" << abs_resid_ << " relResid=" << rel_resid_ << " iter=" << iter_ + << std::endl; + b->copyFrom(*xk_); } else { // update residual: b-KKT*xk A_opr_->times_vec(*res_, *xmin_); res_->axpy(-1.0, *b); - res_->scale(-1.0); + res_->scale(-1.0); double normr_comp = res_->twonorm(); - + if(normr_comp <= abs_resid_) { b->copyFrom(*xmin_); iter_ = imin + 1; @@ -688,15 +684,14 @@ bool hiopBiCGStabSolver::solve(hiopVector* b) rel_resid_ = abs_resid_ / n2b; } - ss_info_ << "BiCGStab did NOT converged after " << ii+1 << " iters. The solution from iter " - << imin << " was returned." << std::endl; - ss_info_ << "\t - Error code " << flag_ << "\n\t - Abs res=" << abs_resid_ << "n\t - Rel res=" - << rel_resid_ << std::endl; + ss_info_ << "BiCGStab did NOT converged after " << ii + 1 << " iters. The solution from iter " << imin + << " was returned." << std::endl; + ss_info_ << "\t - Error code " << flag_ << "\n\t - Abs res=" << abs_resid_ << "n\t - Rel res=" << rel_resid_ + << std::endl; ss_info_ << "\t - ||rhs||_2=" << n2b << " ||sol||_2=" << b->twonorm() << std::endl; return false; } return true; } - -} // namespace hiop +} // namespace hiop diff --git a/src/LinAlg/hiopKrylovSolver.hpp b/src/LinAlg/hiopKrylovSolver.hpp index 1c0bd2c50..52a0aedde 100644 --- a/src/LinAlg/hiopKrylovSolver.hpp +++ b/src/LinAlg/hiopKrylovSolver.hpp @@ -47,11 +47,11 @@ // product endorsement purposes. /* implements the Krylov iterative solver -* @file hiopKrylovSolver.hpp -* @ingroup LinearSolvers -* @author Nai-Yuan Chiang , LLNL -* @author Cosmin G. Petra , LLNL -*/ + * @file hiopKrylovSolver.hpp + * @ingroup LinearSolvers + * @author Nai-Yuan Chiang , LLNL + * @author Cosmin G. Petra , LLNL + */ #ifndef HIOP_KrylovSolver #define HIOP_KrylovSolver @@ -97,57 +97,53 @@ class hiopKrylovSolver virtual void set_x0(double xval); /// Set the maximun number of iteration - inline virtual void set_max_num_iter(int num_iter) {maxit_ = num_iter;} + inline virtual void set_max_num_iter(int num_iter) { maxit_ = num_iter; } /** - * Set Krylov solver tolerance relative to norm of the right-hand side, meaning + * Set Krylov solver tolerance relative to norm of the right-hand side, meaning * that the solver will stop when two-norm of the residual is less than the tolerance * times two-norm of the right-hand side. */ - inline virtual void set_tol(double tol) - { - tol_ = tol; - } - + inline virtual void set_tol(double tol) { tol_ = tol; } + /// Return the absolute residual at the end of Krylov solver - inline virtual double get_sol_abs_resid() {return abs_resid_;} + inline virtual double get_sol_abs_resid() { return abs_resid_; } /// Return the relative residual at the end of Krylov solver - inline virtual double get_sol_rel_resid() {return rel_resid_;} + inline virtual double get_sol_rel_resid() { return rel_resid_; } /// Return the number of iterations at the end of Krylov solver - inline virtual double get_sol_num_iter() {return iter_;} + inline virtual double get_sol_num_iter() { return iter_; } /// Return the message about the convergence - inline virtual std::string get_convergence_info() {return ss_info_.str();} + inline virtual std::string get_convergence_info() { return ss_info_.str(); } /** * Convergence flag: 0 for success, the other codes depending on the Krylov method - * used. Concrete message about the convergence can be obtained from + * used. Concrete message about the convergence can be obtained from * get_convergence_info. */ - inline virtual int get_convergence_flag() {return flag_;} + inline virtual int get_convergence_flag() { return flag_; } protected: + double tol_; // convergence tolerence + size_type maxit_; // maximun number of iteratiions + double iter_; // number of iterations at convergence + int flag_; // convergence flag + double abs_resid_; // absolute residual + double rel_resid_; // relative residual + const size_type n_; // size of the rhs + std::stringstream ss_info_; // message about the convergence - double tol_; // convergence tolerence - size_type maxit_; // maximun number of iteratiions - double iter_; // number of iterations at convergence - int flag_; // convergence flag - double abs_resid_; // absolute residual - double rel_resid_; // relative residual - const size_type n_; // size of the rhs - std::stringstream ss_info_; // message about the convergence - /// Memory space std::string mem_space_; - + /// Linear operator to apply the linear system matrix to a residual/vector hiopLinearOperator* A_opr_; /// Left preconditioner hiopLinearOperator* ML_opr_; - + /// Right preconditioners hiopLinearOperator* MR_opr_; @@ -156,7 +152,7 @@ class hiopKrylovSolver hiopVectorCompoundPD* b_; }; -/** +/** * a Krylov solver class implementing the PCG framework */ class hiopPCGSolver : public hiopKrylovSolver @@ -186,7 +182,7 @@ class hiopPCGSolver : public hiopKrylovSolver hiopVector* qk_; }; -/** +/** * a Krylov solver class implementing the BiCGStab framework */ class hiopBiCGStabSolver : public hiopKrylovSolver @@ -218,6 +214,6 @@ class hiopBiCGStabSolver : public hiopKrylovSolver hiopVector* rt_; }; -} //end namespace +} // namespace hiop #endif diff --git a/src/LinAlg/hiopLinSolver.cpp b/src/LinAlg/hiopLinSolver.cpp index 868981175..311872d45 100644 --- a/src/LinAlg/hiopLinSolver.cpp +++ b/src/LinAlg/hiopLinSolver.cpp @@ -58,76 +58,69 @@ #include "hiopOptions.hpp" #include "LinAlgFactory.hpp" -namespace hiop { - hiopLinSolver::hiopLinSolver() - : nlp_(NULL), perf_report_(false) - { - } - hiopLinSolver::~hiopLinSolver() - { - } +namespace hiop +{ +hiopLinSolver::hiopLinSolver() + : nlp_(NULL), + perf_report_(false) +{} +hiopLinSolver::~hiopLinSolver() {} - /// Constructor allocates dense system matrix - hiopLinSolverSymDense::hiopLinSolverSymDense(int n, hiopNlpFormulation* nlp) - { - nlp_ = nlp; - perf_report_ = "on"==hiop::tolower(nlp_->options->GetString("time_kkt")); - M_ = LinearAlgebraFactory::create_matrix_dense(nlp_->options->GetString("mem_space"), n, n); - } +/// Constructor allocates dense system matrix +hiopLinSolverSymDense::hiopLinSolverSymDense(int n, hiopNlpFormulation* nlp) +{ + nlp_ = nlp; + perf_report_ = "on" == hiop::tolower(nlp_->options->GetString("time_kkt")); + M_ = LinearAlgebraFactory::create_matrix_dense(nlp_->options->GetString("mem_space"), n, n); +} - /// Default constructor is protected and should fail when called - hiopLinSolverSymDense::hiopLinSolverSymDense() +/// Default constructor is protected and should fail when called +hiopLinSolverSymDense::hiopLinSolverSymDense() : M_(nullptr) - { - assert(false); - } +{ + assert(false); +} - /// Destructor deletes the system matrix - hiopLinSolverSymDense::~hiopLinSolverSymDense() - { - delete M_; - } +/// Destructor deletes the system matrix +hiopLinSolverSymDense::~hiopLinSolverSymDense() { delete M_; } - /// Method to return reference to the system matrix - hiopMatrixDense& hiopLinSolverSymDense::sysMatrix() - { - return *M_; - } +/// Method to return reference to the system matrix +hiopMatrixDense& hiopLinSolverSymDense::sysMatrix() { return *M_; } - hiopLinSolverSymSparse::hiopLinSolverSymSparse(int n, int nnz, hiopNlpFormulation* nlp) - { - //we default to triplet matrix for now; derived classes using CSR matrices will not call - //this constructor (will call the 1-parameter constructor below) so they avoid creating - //the triplet matrix - M_ = LinearAlgebraFactory::create_matrix_sparse(nlp->options->GetString("mem_space"), n, n, nnz); - //this class will own `M_` - sys_mat_owned_ = true; - nlp_ = nlp; - perf_report_ = "on"==hiop::tolower(nlp->options->GetString("time_kkt")); - } +hiopLinSolverSymSparse::hiopLinSolverSymSparse(int n, int nnz, hiopNlpFormulation* nlp) +{ + // we default to triplet matrix for now; derived classes using CSR matrices will not call + // this constructor (will call the 1-parameter constructor below) so they avoid creating + // the triplet matrix + M_ = LinearAlgebraFactory::create_matrix_sparse(nlp->options->GetString("mem_space"), n, n, nnz); + // this class will own `M_` + sys_mat_owned_ = true; + nlp_ = nlp; + perf_report_ = "on" == hiop::tolower(nlp->options->GetString("time_kkt")); +} - hiopLinSolverSymSparse::hiopLinSolverSymSparse(hiopNlpFormulation* nlp) - { - M_ = nullptr; - sys_mat_owned_ = false; - nlp_ = nlp; - perf_report_ = "on"==hiop::tolower(nlp->options->GetString("time_kkt")); - } +hiopLinSolverSymSparse::hiopLinSolverSymSparse(hiopNlpFormulation* nlp) +{ + M_ = nullptr; + sys_mat_owned_ = false; + nlp_ = nlp; + perf_report_ = "on" == hiop::tolower(nlp->options->GetString("time_kkt")); +} - hiopLinSolverSymSparse::hiopLinSolverSymSparse(hiopMatrixSparse* M, hiopNlpFormulation* nlp) - { - M_ = M; - sys_mat_owned_ = false; - nlp_ = nlp; - perf_report_ = "on"==hiop::tolower(nlp->options->GetString("time_kkt")); - } - - hiopLinSolverNonSymSparse::hiopLinSolverNonSymSparse(int n, int nnz, hiopNlpFormulation* nlp) - { - M_ = LinearAlgebraFactory::create_matrix_sparse(nlp->options->GetString("mem_space"), n, n, nnz); - sys_mat_owned_ = false; - nlp_ = nlp; - perf_report_ = "on"==hiop::tolower(nlp->options->GetString("time_kkt")); - } +hiopLinSolverSymSparse::hiopLinSolverSymSparse(hiopMatrixSparse* M, hiopNlpFormulation* nlp) +{ + M_ = M; + sys_mat_owned_ = false; + nlp_ = nlp; + perf_report_ = "on" == hiop::tolower(nlp->options->GetString("time_kkt")); +} -} // namespace hiop +hiopLinSolverNonSymSparse::hiopLinSolverNonSymSparse(int n, int nnz, hiopNlpFormulation* nlp) +{ + M_ = LinearAlgebraFactory::create_matrix_sparse(nlp->options->GetString("mem_space"), n, n, nnz); + sys_mat_owned_ = false; + nlp_ = nlp; + perf_report_ = "on" == hiop::tolower(nlp->options->GetString("time_kkt")); +} + +} // namespace hiop diff --git a/src/LinAlg/hiopLinSolver.hpp b/src/LinAlg/hiopLinSolver.hpp index 79fe53bc5..395f922b8 100644 --- a/src/LinAlg/hiopLinSolver.hpp +++ b/src/LinAlg/hiopLinSolver.hpp @@ -70,7 +70,7 @@ namespace hiop { /** - * Abstract class specifying the linear solver interface needed by interior-point + * Abstract class specifying the linear solver interface needed by interior-point * methods of HiOp. Implementations of this abstract class should be wrappers * of existing CPU and GPU libraries for solving linear systems. */ @@ -87,19 +87,19 @@ class hiopLinSolver */ virtual int matrixChanged() = 0; - /** - * Method to solve the linear system once the factorization phase has been + /** + * Method to solve the linear system once the factorization phase has been * completed by matrixChanged(). - * - * @param x is on entry the right-hand side of the system to be solved. On exit + * + * @param x is on entry the right-hand side of the system to be solved. On exit * it contains the solution. */ virtual bool solve(hiopVector& x) = 0; - + /** - * Method to solve the linear system with multiple right-hand sides once the + * Method to solve the linear system with multiple right-hand sides once the * factorization phase has been completed by matrixChanged(). - * + * * @param x contains on entry the right-hand side(s) of the system to be solved * and storesthe solutions on exit. */ @@ -108,6 +108,7 @@ class hiopLinSolver assert(false && "not yet supported"); return false; } + public: hiopNlpFormulation* nlp_; bool perf_report_; @@ -121,36 +122,33 @@ class hiopLinSolverSymDense : public hiopLinSolver virtual ~hiopLinSolverSymDense(); hiopMatrixDense& sysMatrix(); + protected: hiopMatrixDense* M_; + protected: hiopLinSolverSymDense(); }; - -/** - * Base class for symmetric and non-symmetric sparse linear systems/solvers +/** + * Base class for symmetric and non-symmetric sparse linear systems/solvers */ class hiopLinSolverSparseBase : public hiopLinSolver { public: hiopLinSolverSparseBase() - : M_(nullptr), - sys_mat_owned_(true) - { - } - + : M_(nullptr), + sys_mat_owned_(true) + {} + virtual ~hiopLinSolverSparseBase() { if(sys_mat_owned_) { delete M_; } } - - inline hiopMatrixSparse* sys_matrix() - { - return M_; - } + + inline hiopMatrixSparse* sys_matrix() { return M_; } inline void set_sys_matrix(hiopMatrixSparse* M) { @@ -161,13 +159,14 @@ class hiopLinSolverSparseBase : public hiopLinSolver sys_mat_owned_ = false; M_ = M; } + protected: hiopMatrixSparse* M_; bool sys_mat_owned_; }; -/** - * Base class for symmetric (indefinite or positive definite) sparse solvers +/** + * Base class for symmetric (indefinite or positive definite) sparse solvers */ class hiopLinSolverSymSparse : public hiopLinSolverSparseBase { @@ -176,44 +175,41 @@ class hiopLinSolverSymSparse : public hiopLinSolverSparseBase hiopLinSolverSymSparse(size_type n, size_type nnz, hiopNlpFormulation* nlp); /** - * Constructor that uses the matrix passed as argument as internal system matrix. The system matrix will NOT be - * managed by this class - * + * Constructor that uses the matrix passed as argument as internal system matrix. The system matrix will NOT be + * managed by this class + * * @note This constructor should set `sys_mat_owned_` to `false`. */ hiopLinSolverSymSparse(hiopMatrixSparse* M, hiopNlpFormulation* nlp); /** * Barebone constructor that does not create or set internal system matrix. It should be used for cases when - * the system matrix is not available upon instantiation of this class. This system matrix should be subsequently + * the system matrix is not available upon instantiation of this class. This system matrix should be subsequently * set by the calling code by invoking `set_sys_matrix`. * * @note This constructor should set `sys_mat_owned_` to false. */ hiopLinSolverSymSparse(hiopNlpFormulation* nlp); - - virtual ~hiopLinSolverSymSparse() - { - } + + virtual ~hiopLinSolverSymSparse() {} + protected: hiopLinSolverSymSparse() {} }; -/** - * Base class for non-symmetric sparse solvers -*/ +/** + * Base class for non-symmetric sparse solvers + */ class hiopLinSolverNonSymSparse : public hiopLinSolverSparseBase { public: hiopLinSolverNonSymSparse(size_type n, size_type nnz, hiopNlpFormulation* nlp); - virtual ~hiopLinSolverNonSymSparse() - { - } - + virtual ~hiopLinSolverNonSymSparse() {} + protected: hiopLinSolverNonSymSparse() = delete; }; -} //end namespace +} // namespace hiop #endif diff --git a/src/LinAlg/hiopLinSolverCholCuSparse.cpp b/src/LinAlg/hiopLinSolverCholCuSparse.cpp index 21d7b12a6..649547294 100644 --- a/src/LinAlg/hiopLinSolverCholCuSparse.cpp +++ b/src/LinAlg/hiopLinSolverCholCuSparse.cpp @@ -69,9 +69,9 @@ #include using Scalar = double; -//using SparseMatrixCSC = Eigen::SparseMatrix; +// using SparseMatrixCSC = Eigen::SparseMatrix; using SparseMatrixCSR = Eigen::SparseMatrix; -//using Triplet = Eigen::Triplet; +// using Triplet = Eigen::Triplet; using Ordering = Eigen::AMDOrdering; using PermutationMatrix = Ordering::PermutationType; #endif @@ -80,32 +80,32 @@ namespace hiop { hiopLinSolverCholCuSparse::hiopLinSolverCholCuSparse(hiopMatrixSparseCSR* M, hiopNlpFormulation* nlp) - : hiopLinSolverSymSparse(M, nlp), - buf_fact_(nullptr), - rowptr_(nullptr), - colind_(nullptr), - values_(nullptr), - P_(nullptr), - PT_(nullptr), - map_nnz_perm_(nullptr), - rhs_buf1_(nullptr), - rhs_buf2_(nullptr) + : hiopLinSolverSymSparse(M, nlp), + buf_fact_(nullptr), + rowptr_(nullptr), + colind_(nullptr), + values_(nullptr), + P_(nullptr), + PT_(nullptr), + map_nnz_perm_(nullptr), + rhs_buf1_(nullptr), + rhs_buf2_(nullptr) { nnz_ = M->numberOfNonzeros(); - + cusolverStatus_t ret; cusparseStatus_t ret_sp; - + ret_sp = cusparseCreate(&h_cusparse_); assert(ret_sp == CUSPARSE_STATUS_SUCCESS); - + ret = cusolverSpCreate(&h_cusolver_); assert(ret == CUSOLVER_STATUS_SUCCESS); ret = cusolverSpCreateCsrcholInfo(&info_); assert(ret == CUSOLVER_STATUS_SUCCESS); - - //matrix description + + // matrix description ret_sp = cusparseCreateMatDescr(&mat_descr_); assert(ret_sp == CUSPARSE_STATUS_SUCCESS); ret_sp = cusparseSetMatType(mat_descr_, CUSPARSE_MATRIX_TYPE_GENERAL); @@ -126,7 +126,7 @@ hiopLinSolverCholCuSparse::~hiopLinSolverCholCuSparse() cudaFree(map_nnz_perm_); map_nnz_perm_ = nullptr; - + cudaFree(buf_fact_); buf_fact_ = nullptr; @@ -134,11 +134,11 @@ hiopLinSolverCholCuSparse::~hiopLinSolverCholCuSparse() P_ = nullptr; cudaFree(PT_); PT_ = nullptr; - + cudaFree(rowptr_); cudaFree(colind_); cudaFree(values_); - + cusparseDestroyMatDescr(mat_descr_); cusolverSpDestroyCsrcholInfo(info_); cusolverSpDestroy(h_cusolver_); @@ -150,15 +150,15 @@ bool hiopLinSolverCholCuSparse::do_symb_analysis(const size_type n, const index_type* rowptr, const index_type* colind, const double* value, - index_type* perm) + index_type* perm) { auto ordering = nlp_->options->GetString("linear_solver_sparse_ordering"); cusolverStatus_t ret; nlp_->log->printf(hovScalars, "Chol CuSolver: using '%s' as ordering strategy.\n", ordering.c_str()); - + if("metis" == ordering) { - const int64_t *options = nullptr; //use default METIS options + const int64_t* options = nullptr; // use default METIS options ret = cusolverSpXcsrmetisndHost(h_cusolver_, n, nnz, mat_descr_, rowptr, colind, options, perm); assert(ret == CUSOLVER_STATUS_SUCCESS); } else if("symamd-cuda" == ordering) { @@ -166,21 +166,18 @@ bool hiopLinSolverCholCuSparse::do_symb_analysis(const size_type n, assert(ret == CUSOLVER_STATUS_SUCCESS); } else if("symamd-eigen" == ordering) { #ifdef HIOP_USE_EIGEN - Eigen::Map M(n, - n, - nnz, - const_cast(rowptr), - const_cast(colind), - const_cast(value)); + Eigen::Map M(n, n, nnz, const_cast(rowptr), const_cast(colind), const_cast(value)); PermutationMatrix P; Ordering ordering; ordering(M.selfadjointView(), P); - memcpy(perm, P.indices().data(), n*sizeof(int)); + memcpy(perm, P.indices().data(), n * sizeof(int)); #else - assert(false && "user option linear_solver_sparse_ordering=symamd-eigen is inconsistent (HiOp was not build with EIGEN)"); - nlp_->log->printf(hovError, - "option linear_solver_sparse_ordering=symamd-eigen is inconsistent (HiOp was not build with EIGEN).\n"); + assert(false && + "user option linear_solver_sparse_ordering=symamd-eigen is inconsistent (HiOp was not build with EIGEN)"); + nlp_->log->printf( + hovError, + "option linear_solver_sparse_ordering=symamd-eigen is inconsistent (HiOp was not build with EIGEN).\n"); #endif } else { assert("symrcm" == ordering && "unrecognized option for sparse solver ordering"); @@ -199,63 +196,58 @@ bool hiopLinSolverCholCuSparse::initial_setup() assert(nnz_ == mat_csr->numberOfNonzeros()); // - // allocate device CSR arrays; then + // allocate device CSR arrays; then // copy row and col arrays to the device - // + // assert(nullptr == rowptr_); - cudaMalloc(&rowptr_, (m+1)*sizeof(int)); + cudaMalloc(&rowptr_, (m + 1) * sizeof(int)); assert(nullptr == colind_); - cudaMalloc(&colind_, nnz_*sizeof(int)); - + cudaMalloc(&colind_, nnz_ * sizeof(int)); + assert(nullptr == values_); - cudaMalloc(&values_, nnz_*sizeof(double)); - + cudaMalloc(&values_, nnz_ * sizeof(double)); + assert(rowptr_); assert(colind_); assert(values_); - + hiopTimer t; std::stringstream ss_log; // // compute permutation to promote sparsity in the factors (on CPU/host) // - t.reset(); t.start(); - + t.reset(); + t.start(); + auto* P_h = new index_type[m]; - + hiopMatrixSparseCSRSeq mat_csr_h(mat_csr->m(), mat_csr->m(), mat_csr->numberOfNonzeros()); mat_csr->copy_to(mat_csr_h); - - - do_symb_analysis(mat_csr_h.m(), - mat_csr_h.numberOfNonzeros(), - mat_csr_h.i_row(), - mat_csr_h.j_col(), - mat_csr_h.M(), - P_h); + + do_symb_analysis(mat_csr_h.m(), mat_csr_h.numberOfNonzeros(), mat_csr_h.i_row(), mat_csr_h.j_col(), mat_csr_h.M(), P_h); ss_log << "\tOrdering: '" << nlp_->options->GetString("linear_solver_sparse_ordering") << "': "; - + t.stop(); ss_log << std::fixed << std::setprecision(4) << t.getElapsedTime() << " sec\n"; - - //compute transpose/inverse permutation + + // compute transpose/inverse permutation index_type* PT_h = new index_type[m]; - for(index_type i=0; i pattern of L + // analysis -> pattern of L // ret = cusolverSpXcsrcholAnalysis(h_cusolver_, m, nnz_, mat_descr_, rowptr_, colind_, info_); assert(ret == CUSOLVER_STATUS_SUCCESS); t.stop(); ss_log << "\tcsrcholAnalysis: " << t.getElapsedTime() << " sec" << std::endl; - // buffer size - size_t internalData; // in BYTEs - ret = cusolverSpDcsrcholBufferInfo(h_cusolver_, - m, - nnz_, - mat_descr_, - mat_csr->M(), //! don't we need to pass the permuted values? - rowptr_, - colind_, - info_, + size_t internalData; // in BYTEs + ret = cusolverSpDcsrcholBufferInfo(h_cusolver_, + m, + nnz_, + mat_descr_, + mat_csr->M(), //! don't we need to pass the permuted values? + rowptr_, + colind_, + info_, &internalData, &buf_fact_size_); assert(ret == CUSOLVER_STATUS_SUCCESS); @@ -383,13 +365,13 @@ bool hiopLinSolverCholCuSparse::initial_setup() if(perf_report_) { nlp_->log->printf(hovSummary, "CholCuSolver: initial setup times: \n%s", ss_log.str().c_str()); } - - cudaError_t ret_cu = cudaMalloc(&buf_fact_, sizeof(unsigned char)*buf_fact_size_); + + cudaError_t ret_cu = cudaMalloc(&buf_fact_, sizeof(unsigned char) * buf_fact_size_); assert(ret_cu == cudaSuccess); - + return true; } - + /* returns -1 if zero or negative pivots are encountered */ int hiopLinSolverCholCuSparse::matrixChanged() { @@ -401,47 +383,34 @@ int hiopLinSolverCholCuSparse::matrixChanged() hiopTimer t; if(nullptr == buf_fact_) { - t.start(); nlp_->runStats.linsolv.tmFactTime.start(); if(!initial_setup()) { - nlp_->log->printf(hovError, - "hiopLinSolverCholCuSparse: initial setup failed.\n"); + nlp_->log->printf(hovError, "hiopLinSolverCholCuSparse: initial setup failed.\n"); return -1; } nlp_->runStats.linsolv.tmFactTime.stop(); t.stop(); if(perf_report_) { - nlp_->log->printf(hovSummary, "CholCuSolver: initial setup total %.4f sec (includes device transfer)\n", t.getElapsedTime()); } } - + nlp_->runStats.linsolv.tmFactTime.start(); // - //permute nonzeros in values_buf_ into values_ accordingly to map_nnz_perm_ + // permute nonzeros in values_buf_ into values_ accordingly to map_nnz_perm_ // permute_vec(nnz_, mat_csr->M(), map_nnz_perm_, values_); - + // - //cuSOLVER factorization + // cuSOLVER factorization // - ret = cusolverSpDcsrcholFactor(h_cusolver_, - m, - nnz_, - mat_descr_, - values_, - rowptr_, - colind_, - info_, - buf_fact_); + ret = cusolverSpDcsrcholFactor(h_cusolver_, m, nnz_, mat_descr_, values_, rowptr_, colind_, info_, buf_fact_); if(ret != CUSOLVER_STATUS_SUCCESS) { // this does not return error when the factorization fails numerically - nlp_->log->printf(hovWarning, - "hiopLinSolverCholCuSparse: factorization failed: CUSOLVER_STATUS=%d.\n", - ret); + nlp_->log->printf(hovWarning, "hiopLinSolverCholCuSparse: factorization failed: CUSOLVER_STATUS=%d.\n", ret); return -1; } nlp_->runStats.linsolv.tmFactTime.stop(); @@ -454,14 +423,11 @@ int hiopLinSolverCholCuSparse::matrixChanged() int position = -1; ret = cusolverSpDcsrcholZeroPivot(h_cusolver_, info_, zero_piv_tol, &position); nlp_->runStats.linsolv.tmInertiaComp.stop(); - - if(position>=0) { - nlp_->log->printf(hovWarning, - "hiopLinSolverCholCuSparse: the %dth pivot is <=%.5e\n", - position, - zero_piv_tol); + + if(position >= 0) { + nlp_->log->printf(hovWarning, "hiopLinSolverCholCuSparse: the %dth pivot is <=%.5e\n", position, zero_piv_tol); return -1; - } + } return 0; } @@ -469,43 +435,41 @@ bool hiopLinSolverCholCuSparse::solve(hiopVector& x_in) { hiopTimer t; cusolverStatus_t ret; - + size_type m = M_->m(); assert(m == x_in.get_size()); if(!rhs_buf1_) { - cudaMalloc(&rhs_buf1_, m*sizeof(double)); + cudaMalloc(&rhs_buf1_, m * sizeof(double)); } if(!rhs_buf2_) { - cudaMalloc(&rhs_buf2_, m*sizeof(double)); + cudaMalloc(&rhs_buf2_, m * sizeof(double)); } nlp_->runStats.linsolv.tmDeviceTransfer.start(); - cudaMemcpy(rhs_buf1_, x_in.local_data(), m*sizeof(double), cudaMemcpyHostToDevice); + cudaMemcpy(rhs_buf1_, x_in.local_data(), m * sizeof(double), cudaMemcpyHostToDevice); nlp_->runStats.linsolv.tmDeviceTransfer.stop(); - nlp_->runStats.linsolv.tmTriuSolves.start(); + nlp_->runStats.linsolv.tmTriuSolves.start(); // b = P*b permute_vec(m, rhs_buf1_, P_, rhs_buf2_); // - //solve -> two triangular solves + // solve -> two triangular solves // ret = cusolverSpDcsrcholSolve(h_cusolver_, m, rhs_buf2_, rhs_buf1_, info_, buf_fact_); - //x = P'*x + // x = P'*x permute_vec(m, rhs_buf1_, PT_, rhs_buf2_); nlp_->runStats.linsolv.tmTriuSolves.stop(); - - //transfer to host + + // transfer to host nlp_->runStats.linsolv.tmDeviceTransfer.start(); - cudaMemcpy(x_in.local_data(), rhs_buf2_, m*sizeof(double), cudaMemcpyDeviceToHost); + cudaMemcpy(x_in.local_data(), rhs_buf2_, m * sizeof(double), cudaMemcpyDeviceToHost); nlp_->runStats.linsolv.tmDeviceTransfer.stop(); - + if(ret != CUSOLVER_STATUS_SUCCESS) { - nlp_->log->printf(hovWarning, - "hiopLinSolverCholCuSparse: solve failed: CUSOLVER_STATUS=%d.\n", - ret); + nlp_->log->printf(hovWarning, "hiopLinSolverCholCuSparse: solve failed: CUSOLVER_STATUS=%d.\n", ret); return false; } @@ -516,40 +480,33 @@ bool hiopLinSolverCholCuSparse::permute_vec(int n, double* vec_in, index_type* p { cusparseStatus_t ret; #if CUSPARSE_VERSION >= 11400 - //the descr of the array going to be permuted + // the descr of the array going to be permuted cusparseSpVecDescr_t v_out; - //original nonzeros + // original nonzeros cusparseDnVecDescr_t v_in; - + // Create sparse vector (output) - ret = cusparseCreateSpVec(&v_out, - n, - n, - perm, - vec_out, - CUSPARSE_INDEX_32I, - CUSPARSE_INDEX_BASE_ZERO, - CUDA_R_64F); + ret = cusparseCreateSpVec(&v_out, n, n, perm, vec_out, CUSPARSE_INDEX_32I, CUSPARSE_INDEX_BASE_ZERO, CUDA_R_64F); assert(CUSPARSE_STATUS_SUCCESS == ret); - + // Create dense vector (input) ret = cusparseCreateDnVec(&v_in, n, vec_in, CUDA_R_64F); assert(CUSPARSE_STATUS_SUCCESS == ret); - + ret = cusparseGather(h_cusparse_, v_in, v_out); assert(CUSPARSE_STATUS_SUCCESS == ret); cusparseDestroySpVec(v_out); cusparseDestroyDnVec(v_in); - -#else //CUSPARSE_VERSION < 11400 - + +#else // CUSPARSE_VERSION < 11400 + ret = cusparseDgthr(h_cusparse_, n, vec_in, vec_out, perm, CUSPARSE_INDEX_BASE_ZERO); assert(CUSPARSE_STATUS_SUCCESS == ret); -#endif +#endif return (CUSPARSE_STATUS_SUCCESS == ret); } -} // end of namespace +} // namespace hiop -#endif //HIOP_USE_CUDA +#endif // HIOP_USE_CUDA diff --git a/src/LinAlg/hiopLinSolverCholCuSparse.hpp b/src/LinAlg/hiopLinSolverCholCuSparse.hpp index 00814c193..2db323206 100644 --- a/src/LinAlg/hiopLinSolverCholCuSparse.hpp +++ b/src/LinAlg/hiopLinSolverCholCuSparse.hpp @@ -62,7 +62,7 @@ #include #include #include -#include +#include #include "hiopMatrixSparseCsrCuda.hpp" #include "hiopKKTLinSysSparseCondensed.hpp" @@ -73,23 +73,23 @@ namespace hiop * Wrapper class for cusolverSpXcsrchol Cholesky solver. */ -class hiopLinSolverCholCuSparse: public hiopLinSolverSymSparse +class hiopLinSolverCholCuSparse : public hiopLinSolverSymSparse { public: hiopLinSolverCholCuSparse(hiopMatrixSparseCSR* M, hiopNlpFormulation* nlp); virtual ~hiopLinSolverCholCuSparse(); /** - * Triggers a refactorization of the matrix, if necessary. - * Returns -1 if zero or negative pivots are encountered + * Triggers a refactorization of the matrix, if necessary. + * Returns -1 if zero or negative pivots are encountered */ int matrixChanged(); using hiopLinSolverSymSparse::solve; - + /** Solves a linear system. * param 'x' is on entry the right hand side(s) of the system to be solved. On - * exit is contains the solution(s). + * exit is contains the solution(s). */ bool solve(hiopVector& x_in); @@ -104,8 +104,8 @@ class hiopLinSolverCholCuSparse: public hiopLinSolverSymSparse const index_type* colind, const double* value, index_type* perm); - - /** + + /** * Permutes an array accordingly to given permutation. All pointers are on device and * the method executes on device. */ @@ -113,6 +113,7 @@ class hiopLinSolverCholCuSparse: public hiopLinSolverSymSparse /*const*/ double* vec_in, /*const*/ index_type* permutation, double* vec_out); + protected: /// Internal handle required by cuSPARSE functions cusparseHandle_t h_cusparse_; @@ -149,18 +150,15 @@ class hiopLinSolverCholCuSparse: public hiopLinSolverSymSparse /// internal buffers in the size of the linear system (on device) double* rhs_buf1_; double* rhs_buf2_; - + protected: - inline hiopMatrixSparseCSRCUDA* sys_mat_csr() - { - return dynamic_cast(M_); - } + inline hiopMatrixSparseCSRCUDA* sys_mat_csr() { return dynamic_cast(M_); } + private: - hiopLinSolverCholCuSparse() = delete; + hiopLinSolverCholCuSparse() = delete; }; +} // namespace hiop -} // end of namespace - -#endif //HIOP_USE_CUDA -#endif //HIOP_LINSOLVER_CHOL_CUSP +#endif // HIOP_USE_CUDA +#endif // HIOP_LINSOLVER_CHOL_CUSP diff --git a/src/LinAlg/hiopLinSolverMA86Z.cpp b/src/LinAlg/hiopLinSolverMA86Z.cpp index af89854f2..04b766217 100644 --- a/src/LinAlg/hiopLinSolverMA86Z.cpp +++ b/src/LinAlg/hiopLinSolverMA86Z.cpp @@ -4,179 +4,183 @@ namespace hiop { - hiopLinSolverMA86Z::hiopLinSolverMA86Z(hiopMatrixComplexSparseTriplet& sysmat, hiopNlpFormulation* nlp_/*=NULL*/) - : hiopLinSolver(), keep(NULL), ptr(NULL), row(NULL), order(NULL), vals(NULL), sys_mat(sysmat) - { - nlp = nlp_; +hiopLinSolverMA86Z::hiopLinSolverMA86Z(hiopMatrixComplexSparseTriplet& sysmat, hiopNlpFormulation* nlp_ /*=NULL*/) + : hiopLinSolver(), + keep(NULL), + ptr(NULL), + row(NULL), + order(NULL), + vals(NULL), + sys_mat(sysmat) +{ + nlp = nlp_; - n = sys_mat.n(); - nnz = sys_mat.numberOfNonzeros(); + n = sys_mat.n(); + nnz = sys_mat.numberOfNonzeros(); - ma86_default_control_z(&control); + ma86_default_control_z(&control); - ptr = new int[n+1]; - row = new int[nnz]; - vals = new double _Complex[nnz]; + ptr = new int[n + 1]; + row = new int[nnz]; + vals = new double _Complex[nnz]; - order = new int[n]; - for(int i=0; ii_row(); - const int* jcol = sys_mat.storage()->j_col(); - const std::complex* M = sys_mat.storage()->M(); - - //since - // 1. sys_mat is upper triangle - // 2. sys_mat is ordered on (i,j) (first on i and then on j) - // 3. ma86 expects lower triangular in column oriented - //we can - // i. do the update in linear time - //ii. copy sys_mat.j_col to this->row - //iii.copy sys_mat.M to this->vals - - //i. - ptr[0] = 0; - int next_col=1, it=0; - for(it=0; it=0); +hiopLinSolverMA86Z::~hiopLinSolverMA86Z() +{ + ma86_finalise(&keep, &control); + delete[] ptr; + delete[] row; + delete[] order; + delete[] vals; +} + +int hiopLinSolverMA86Z::matrixChanged() +{ + assert(n == sys_mat.n()); + assert(nnz == sys_mat.numberOfNonzeros()); + // + // update ptr, row, and vals from sys_mat + // + const int* irow = sys_mat.storage()->i_row(); + const int* jcol = sys_mat.storage()->j_col(); + const std::complex* M = sys_mat.storage()->M(); + + // since + // 1. sys_mat is upper triangle + // 2. sys_mat is ordered on (i,j) (first on i and then on j) + // 3. ma86 expects lower triangular in column oriented + // we can + // i. do the update in linear time + // ii. copy sys_mat.j_col to this->row + // iii.copy sys_mat.M to this->vals + + // i. + ptr[0] = 0; + int next_col = 1, it = 0; + for(it = 0; it < nnz; it++) { + if(irow[it] == next_col) { + ptr[next_col] = it; + next_col++; } - ptr[n] = nnz; + assert(next_col <= n); + assert(next_col >= 0); + } + ptr[n] = nnz; - //ii. - memcpy(row, jcol, sizeof(int)*nnz); + // ii. + memcpy(row, jcol, sizeof(int) * nnz); - double buffer[2]; - //iii. - for(int it=0; iti_row(); + const int* B_jcol = B.storage()->j_col(); + const auto* B_M = B.storage()->M(); + const int B_nnz = B.numberOfNonzeros(); + + // This is messy - MA86 expects X column oriented + // MA86 user manual + // "x is a rank-2 array with size x[nrhs][ldx]. On entry, x[j][i] must hold the ith component + // of the jth right-hand side; on exit, it holds the corresponding solution" + // hiopMatrixComplexDense X is row oriented + // + // We use an auxiliary buffer nrhs x ldx of _Complex double that stores X / RHS column oriented + // + const int dimM = n; + int dimN = nrhs; + _Complex double* X_buf = new _Complex double[dimM * dimN]; + + // TODO: solve only for a smaller number of rhs (64, 128) at once (requires calling ma86_solve in a loop) + // this would also reduce the buffer storage + + for(int i = 0; i < dimM * dimN; i++) X_buf[i] = 0.; + + double buffer[2]; + + for(int itnz = 0; itnz < B_nnz; itnz++) { + assert(B_jcol[itnz] >= 0 && B_jcol[itnz] < X.n()); + assert(B_irow[itnz] >= 0 && B_irow[itnz] < X.m()); + // X_buf[ B_jcol[itnz]*dimM + B_irow[itnz] ] = B_M[itnz].real() + I*B_M[itnz].imag(); + //! +#pragma message("revisit this code (MA86z class complex numbers handling) for performance considerations") + buffer[0] = B_M[itnz].real(); + buffer[1] = B_M[itnz].imag(); + memcpy(X_buf + B_jcol[itnz] * dimM + B_irow[itnz], buffer, sizeof X_buf[0]); } - void hiopLinSolverMA86Z::solve(hiopMatrix& X) - { - assert(false && "not yet implemented"); //not needed; + ma86_solve(0, dimN, ldx, X_buf, order, &keep, &control, &info, NULL); + if(info.flag < 0) { + printf("Failure during solve with info.flag = %i\n", info.flag); } - - void hiopLinSolverMA86Z::solve(const hiopMatrixComplexSparseTriplet& B, hiopMatrixComplexDense& X) - { - assert(X.n()==B.n()); - assert(n==B.m()); - assert(n==X.m()); - - int ldx = n; - int nrhs = X.n(); - - X.setToZero(); - - //copy from B to X. !!! - const int* B_irow = B.storage()->i_row(); - const int* B_jcol = B.storage()->j_col(); - const auto*B_M = B.storage()->M(); - const int B_nnz = B.numberOfNonzeros(); - - - // This is messy - MA86 expects X column oriented - // MA86 user manual - // "x is a rank-2 array with size x[nrhs][ldx]. On entry, x[j][i] must hold the ith component - // of the jth right-hand side; on exit, it holds the corresponding solution" - // hiopMatrixComplexDense X is row oriented - // - // We use an auxiliary buffer nrhs x ldx of _Complex double that stores X / RHS column oriented - // - const int dimM=n; int dimN=nrhs; - _Complex double* X_buf = new _Complex double[dimM*dimN]; - - // TODO: solve only for a smaller number of rhs (64, 128) at once (requires calling ma86_solve in a loop) - // this would also reduce the buffer storage - - for(int i=0; i=0 && B_jcol[itnz]=0 && B_irow[itnz]** X_M = X.get_M(); - for(int i=0; i** X_M = X.get_M(); + for(int i = 0; i < dimM; i++) { + for(int j = 0; j < dimN; j++) { + // X_M[i][j] = X_buf[j*dimM+i]; #pragma message("revisit this code (MA86z class complex numbers handling) for performance considerations") - memcpy(buffer, X_buf+j*dimM+i, sizeof X_buf[0]); - X_M[i][j] = std::complex(buffer[0], buffer[1]); - } + memcpy(buffer, X_buf + j * dimM + i, sizeof X_buf[0]); + X_M[i][j] = std::complex(buffer[0], buffer[1]); } - - delete[] X_buf; } -} //end namespace hiop + delete[] X_buf; +} + +} // end namespace hiop diff --git a/src/LinAlg/hiopLinSolverMA86Z.hpp b/src/LinAlg/hiopLinSolverMA86Z.hpp index b0ddddc54..dcb5f6dde 100644 --- a/src/LinAlg/hiopLinSolverMA86Z.hpp +++ b/src/LinAlg/hiopLinSolverMA86Z.hpp @@ -3,47 +3,47 @@ // Written by Cosmin G. Petra, petra1@llnl.gov. // LLNL-CODE-742473. All rights reserved. // -// This file is part of HiOp. For details, see https://github.com/LLNL/hiop. HiOp -// is released under the BSD 3-clause license (https://opensource.org/licenses/BSD-3-Clause). +// This file is part of HiOp. For details, see https://github.com/LLNL/hiop. HiOp +// is released under the BSD 3-clause license (https://opensource.org/licenses/BSD-3-Clause). // Please also read “Additional BSD Notice” below. // -// Redistribution and use in source and binary forms, with or without modification, +// Redistribution and use in source and binary forms, with or without modification, // are permitted provided that the following conditions are met: -// i. Redistributions of source code must retain the above copyright notice, this list +// i. Redistributions of source code must retain the above copyright notice, this list // of conditions and the disclaimer below. -// ii. Redistributions in binary form must reproduce the above copyright notice, -// this list of conditions and the disclaimer (as noted below) in the documentation and/or +// ii. Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the disclaimer (as noted below) in the documentation and/or // other materials provided with the distribution. -// iii. Neither the name of the LLNS/LLNL nor the names of its contributors may be used to -// endorse or promote products derived from this software without specific prior written +// iii. Neither the name of the LLNS/LLNL nor the names of its contributors may be used to +// endorse or promote products derived from this software without specific prior written // permission. // -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES -// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT -// SHALL LAWRENCE LIVERMORE NATIONAL SECURITY, LLC, THE U.S. DEPARTMENT OF ENERGY OR -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS -// OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED -// AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT +// SHALL LAWRENCE LIVERMORE NATIONAL SECURITY, LLC, THE U.S. DEPARTMENT OF ENERGY OR +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +// OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED +// AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, // EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Additional BSD Notice -// 1. This notice is required to be provided under our contract with the U.S. Department -// of Energy (DOE). This work was produced at Lawrence Livermore National Laboratory under +// 1. This notice is required to be provided under our contract with the U.S. Department +// of Energy (DOE). This work was produced at Lawrence Livermore National Laboratory under // Contract No. DE-AC52-07NA27344 with the DOE. -// 2. Neither the United States Government nor Lawrence Livermore National Security, LLC -// nor any of their employees, makes any warranty, express or implied, or assumes any -// liability or responsibility for the accuracy, completeness, or usefulness of any +// 2. Neither the United States Government nor Lawrence Livermore National Security, LLC +// nor any of their employees, makes any warranty, express or implied, or assumes any +// liability or responsibility for the accuracy, completeness, or usefulness of any // information, apparatus, product, or process disclosed, or represents that its use would // not infringe privately-owned rights. -// 3. Also, reference herein to any specific commercial products, process, or services by -// trade name, trademark, manufacturer or otherwise does not necessarily constitute or -// imply its endorsement, recommendation, or favoring by the United States Government or -// Lawrence Livermore National Security, LLC. The views and opinions of authors expressed -// herein do not necessarily state or reflect those of the United States Government or -// Lawrence Livermore National Security, LLC, and shall not be used for advertising or +// 3. Also, reference herein to any specific commercial products, process, or services by +// trade name, trademark, manufacturer or otherwise does not necessarily constitute or +// imply its endorsement, recommendation, or favoring by the United States Government or +// Lawrence Livermore National Security, LLC. The views and opinions of authors expressed +// herein do not necessarily state or reflect those of the United States Government or +// Lawrence Livermore National Security, LLC, and shall not be used for advertising or // product endorsement purposes. #ifndef HIOP_LINSOLVER_MA86Z @@ -53,18 +53,19 @@ #include "hsl_ma86z.hpp" #ifdef HSL_MC69Z_HEADER_NOT_CPP_READY -#pragma message("hsl_mc69z.h file needs additional instrumentation to work with C++. " \ - "See issue #15 on github.com/llnl/hiop") -//#pragma message("See issue #17 on github.com/llnl/hiop") +#pragma message( \ + "hsl_mc69z.h file needs additional instrumentation to work with C++. " \ + "See issue #15 on github.com/llnl/hiop") +// #pragma message("See issue #17 on github.com/llnl/hiop") #endif #ifdef HSL_MA86Z_HEADER_NOT_CPP_READY -#pragma message("hsl_ma86z.h file needs additional instrumentation to work with C++. " \ - "See issue #15 on github.com/llnl/hiop") -//#error compilation aborted +#pragma message( \ + "hsl_ma86z.h file needs additional instrumentation to work with C++. " \ + "See issue #15 on github.com/llnl/hiop") +// #error compilation aborted #endif - #include "hiopNlpFormulation.hpp" #include "hiopLinSolver.hpp" #include "hiopMatrixComplexSparseTriplet.hpp" @@ -72,40 +73,39 @@ namespace hiop { - /* - Note: the following methods of hiopLinSolver are NOT - implemented in this class: - - solve (hiopVector) - - solve (hiopMatrix) - */ - class hiopLinSolverMA86Z : public hiopLinSolver - { - public: - hiopLinSolverMA86Z(hiopMatrixComplexSparseTriplet& sysmat, hiopNlpFormulation* nlp_=NULL); - virtual ~hiopLinSolverMA86Z(); - - /** Triggers a refactorization of the matrix, if necessary. - * Returns -1 if trouble in factorization is encountered. */ - virtual int matrixChanged(); - - /** solves a linear system. - * param 'x' is on entry the right hand side(s) of the system to be solved. On - * exit is contains the solution(s). */ - virtual void solve(hiopVector& x); - virtual void solve(hiopMatrix& X); - virtual void solve(const hiopMatrixComplexSparseTriplet& B, hiopMatrixComplexDense& X); +/* + Note: the following methods of hiopLinSolver are NOT + implemented in this class: + - solve (hiopVector) + - solve (hiopMatrix) +*/ +class hiopLinSolverMA86Z : public hiopLinSolver +{ +public: + hiopLinSolverMA86Z(hiopMatrixComplexSparseTriplet& sysmat, hiopNlpFormulation* nlp_ = NULL); + virtual ~hiopLinSolverMA86Z(); - private: - void* keep; - ma86_control_z control; - ma86_info_z info; + /** Triggers a refactorization of the matrix, if necessary. + * Returns -1 if trouble in factorization is encountered. */ + virtual int matrixChanged(); - int *ptr, *row, *order; - double _Complex *vals; - const hiopMatrixComplexSparseTriplet& sys_mat; - int n, nnz; - }; -} //end namespace hiop + /** solves a linear system. + * param 'x' is on entry the right hand side(s) of the system to be solved. On + * exit is contains the solution(s). */ + virtual void solve(hiopVector& x); + virtual void solve(hiopMatrix& X); + virtual void solve(const hiopMatrixComplexSparseTriplet& B, hiopMatrixComplexDense& X); -#endif +private: + void* keep; + ma86_control_z control; + ma86_info_z info; + int *ptr, *row, *order; + double _Complex* vals; + const hiopMatrixComplexSparseTriplet& sys_mat; + int n, nnz; +}; +} // end namespace hiop + +#endif diff --git a/src/LinAlg/hiopLinSolverSparseGinkgo.cpp b/src/LinAlg/hiopLinSolverSparseGinkgo.cpp index 971ef699a..8755b8aeb 100644 --- a/src/LinAlg/hiopLinSolverSparseGinkgo.cpp +++ b/src/LinAlg/hiopLinSolverSparseGinkgo.cpp @@ -59,126 +59,129 @@ namespace hiop { - namespace { - std::shared_ptr> transferTripletToCSR(std::shared_ptr exec, int n_, hiopMatrixSparse* M_, int** index_covert_CSR2Triplet, int** index_covert_extra_Diag2CSR) { - // transfer triplet form to CSR form - // note that input is in lower triangular triplet form. First part is the sparse matrix, and the 2nd part are the additional diagonal elememts - // the 1st part is sorted by row - int nnz_{0}; - auto kRowPtr_ = new int[n_+1]{0}; - { - // - // compute nnz in each row - // - // off-diagonal part - kRowPtr_[0]=0; - for(int k=0; knumberOfNonzeros()-n_; k++) { - if(M_->i_row()[k]!=M_->j_col()[k]) { - kRowPtr_[M_->i_row()[k]+1]++; - kRowPtr_[M_->j_col()[k]+1]++; - nnz_ += 2; - } - } - // diagonal part - for(int i=0; inumberOfNonzeros() - n_; k++) { + if(M_->i_row()[k] != M_->j_col()[k]) { + kRowPtr_[M_->i_row()[k] + 1]++; + kRowPtr_[M_->j_col()[k] + 1]++; + nnz_ += 2; } - assert(nnz_==kRowPtr_[n_]); } - auto kVal_ = new double[nnz_]{0.0}; - auto jCol_ = new int[nnz_]{0}; - *index_covert_CSR2Triplet = new int[nnz_]; - *index_covert_extra_Diag2CSR = new int[n_]; - auto index_covert_CSR2Triplet_ = *index_covert_CSR2Triplet; - auto index_covert_extra_Diag2CSR_ = *index_covert_extra_Diag2CSR; - { - // - // set correct col index and value - // - - int *nnz_each_row_tmp = new int[n_]{0}; - int total_nnz_tmp{0}; - int nnz_tmp{0}; - int rowID_tmp; - int colID_tmp; - for(int k=0; knumberOfNonzeros()-n_; k++) { - rowID_tmp = M_->i_row()[k]; - colID_tmp = M_->j_col()[k]; - if(rowID_tmp==colID_tmp) { - nnz_tmp = nnz_each_row_tmp[rowID_tmp] + kRowPtr_[rowID_tmp]; - jCol_[nnz_tmp] = colID_tmp; - kVal_[nnz_tmp] = M_->M()[k]; - index_covert_CSR2Triplet_[nnz_tmp] = k; - - kVal_[nnz_tmp] += M_->M()[M_->numberOfNonzeros()-n_+rowID_tmp]; - index_covert_extra_Diag2CSR_[rowID_tmp] = nnz_tmp; - - nnz_each_row_tmp[rowID_tmp]++; - total_nnz_tmp++; - } else { - nnz_tmp = nnz_each_row_tmp[rowID_tmp] + kRowPtr_[rowID_tmp]; - jCol_[nnz_tmp] = colID_tmp; - kVal_[nnz_tmp] = M_->M()[k]; - index_covert_CSR2Triplet_[nnz_tmp] = k; - - nnz_tmp = nnz_each_row_tmp[colID_tmp] + kRowPtr_[colID_tmp]; - jCol_[nnz_tmp] = rowID_tmp; - kVal_[nnz_tmp] = M_->M()[k]; - index_covert_CSR2Triplet_[nnz_tmp] = k; - - nnz_each_row_tmp[rowID_tmp]++; - nnz_each_row_tmp[colID_tmp]++; - total_nnz_tmp += 2; - } + for(int k = 0; k < M_->numberOfNonzeros() - n_; k++) { + rowID_tmp = M_->i_row()[k]; + colID_tmp = M_->j_col()[k]; + if(rowID_tmp == colID_tmp) { + nnz_tmp = nnz_each_row_tmp[rowID_tmp] + kRowPtr_[rowID_tmp]; + jCol_[nnz_tmp] = colID_tmp; + kVal_[nnz_tmp] = M_->M()[k]; + index_covert_CSR2Triplet_[nnz_tmp] = k; + + kVal_[nnz_tmp] += M_->M()[M_->numberOfNonzeros() - n_ + rowID_tmp]; + index_covert_extra_Diag2CSR_[rowID_tmp] = nnz_tmp; + + nnz_each_row_tmp[rowID_tmp]++; + total_nnz_tmp++; + } else { + nnz_tmp = nnz_each_row_tmp[rowID_tmp] + kRowPtr_[rowID_tmp]; + jCol_[nnz_tmp] = colID_tmp; + kVal_[nnz_tmp] = M_->M()[k]; + index_covert_CSR2Triplet_[nnz_tmp] = k; + + nnz_tmp = nnz_each_row_tmp[colID_tmp] + kRowPtr_[colID_tmp]; + jCol_[nnz_tmp] = rowID_tmp; + kVal_[nnz_tmp] = M_->M()[k]; + index_covert_CSR2Triplet_[nnz_tmp] = k; + + nnz_each_row_tmp[rowID_tmp]++; + nnz_each_row_tmp[colID_tmp]++; + total_nnz_tmp += 2; } - // correct the missing diagonal term - for(int i=0; iM()[M_->numberOfNonzeros() - n_ + i]; - index_covert_CSR2Triplet_[nnz_tmp] = M_->numberOfNonzeros() - n_ + i; - total_nnz_tmp += 1; - - std::vector ind_temp(kRowPtr_[i+1] - kRowPtr_[i]); - std::iota(ind_temp.begin(), ind_temp.end(), 0); - std::sort(ind_temp.begin(), ind_temp.end(),[&](int a, int b){ return jCol_[a+kRowPtr_[i]] < jCol_[b+kRowPtr_[i]]; }); - - reorder(kVal_+kRowPtr_[i],ind_temp,kRowPtr_[i+1] - kRowPtr_[i]); - reorder(index_covert_CSR2Triplet_+kRowPtr_[i],ind_temp,kRowPtr_[i+1] - kRowPtr_[i]); - std::sort(jCol_+kRowPtr_[i],jCol_+kRowPtr_[i+1]); - } + } + // correct the missing diagonal term + for(int i = 0; i < n_; i++) { + if(nnz_each_row_tmp[i] != kRowPtr_[i + 1] - kRowPtr_[i]) { + assert(nnz_each_row_tmp[i] == kRowPtr_[i + 1] - kRowPtr_[i] - 1); + nnz_tmp = nnz_each_row_tmp[i] + kRowPtr_[i]; + jCol_[nnz_tmp] = i; + kVal_[nnz_tmp] = M_->M()[M_->numberOfNonzeros() - n_ + i]; + index_covert_CSR2Triplet_[nnz_tmp] = M_->numberOfNonzeros() - n_ + i; + total_nnz_tmp += 1; + + std::vector ind_temp(kRowPtr_[i + 1] - kRowPtr_[i]); + std::iota(ind_temp.begin(), ind_temp.end(), 0); + std::sort(ind_temp.begin(), ind_temp.end(), [&](int a, int b) { + return jCol_[a + kRowPtr_[i]] < jCol_[b + kRowPtr_[i]]; + }); + + reorder(kVal_ + kRowPtr_[i], ind_temp, kRowPtr_[i + 1] - kRowPtr_[i]); + reorder(index_covert_CSR2Triplet_ + kRowPtr_[i], ind_temp, kRowPtr_[i + 1] - kRowPtr_[i]); + std::sort(jCol_ + kRowPtr_[i], jCol_ + kRowPtr_[i + 1]); } - - delete[] nnz_each_row_tmp; } - auto val_array = gko::array::view(exec, nnz_, kVal_); - auto row_ptrs = gko::array::view(exec, n_ + 1, kRowPtr_); - auto col_idxs = gko::array::view(exec, nnz_, jCol_); - auto mtx = gko::share(gko::matrix::Csr::create(exec, gko::dim<2>{(long unsigned int)n_, (long unsigned int)n_}, val_array, col_idxs, row_ptrs)); - return mtx; -} + delete[] nnz_each_row_tmp; + } + auto val_array = gko::array::view(exec, nnz_, kVal_); + auto row_ptrs = gko::array::view(exec, n_ + 1, kRowPtr_); + auto col_idxs = gko::array::view(exec, nnz_, jCol_); + auto mtx = gko::share(gko::matrix::Csr::create(exec, + gko::dim<2>{(long unsigned int)n_, (long unsigned int)n_}, + val_array, + col_idxs, + row_ptrs)); + return mtx; +} void update_matrix(hiopMatrixSparse* M_, std::shared_ptr> mtx, @@ -186,189 +189,166 @@ void update_matrix(hiopMatrixSparse* M_, int* index_covert_CSR2Triplet_, int* index_covert_extra_Diag2CSR_) { - int n_ = mtx->get_size()[0]; - int nnz_= mtx->get_num_stored_elements(); - auto values = host_mtx->get_values(); - for(int k=0; kM()[index_covert_CSR2Triplet_[k]]; - } - for(int i=0; iM()[M_->numberOfNonzeros() - n_ + i]; - } - } - auto exec = mtx->get_executor(); - if (exec != exec->get_master()) { - mtx->copy_from(host_mtx.get()); + int n_ = mtx->get_size()[0]; + int nnz_ = mtx->get_num_stored_elements(); + auto values = host_mtx->get_values(); + for(int k = 0; k < nnz_; k++) { + values[k] = M_->M()[index_covert_CSR2Triplet_[k]]; + } + for(int i = 0; i < n_; i++) { + if(index_covert_extra_Diag2CSR_[i] != -1) { + values[index_covert_extra_Diag2CSR_[i]] += M_->M()[M_->numberOfNonzeros() - n_ + i]; } + } + auto exec = mtx->get_executor(); + if(exec != exec->get_master()) { + mtx->copy_from(host_mtx.get()); + } } - std::shared_ptr create_exec(std::string executor_string) { - // The omp and dpcpp currently do not support LU factorization. - std::map()>> - exec_map{ - {"omp", [] { return gko::OmpExecutor::create(); }}, - {"cuda", - [] { - return gko::CudaExecutor::create(0, gko::ReferenceExecutor::create(), - true); - }}, - {"hip", - [] { - return gko::HipExecutor::create(0, gko::ReferenceExecutor::create(), - true); - }}, - {"dpcpp", - [] { - return gko::DpcppExecutor::create(0, - gko::ReferenceExecutor::create()); - }}, - {"reference", [] { return gko::ReferenceExecutor::create(); }}}; - - return exec_map.at(executor_string)(); + // The omp and dpcpp currently do not support LU factorization. + std::map()>> exec_map{ + {"omp", [] { return gko::OmpExecutor::create(); }}, + {"cuda", [] { return gko::CudaExecutor::create(0, gko::ReferenceExecutor::create(), true); }}, + {"hip", [] { return gko::HipExecutor::create(0, gko::ReferenceExecutor::create(), true); }}, + {"dpcpp", [] { return gko::DpcppExecutor::create(0, gko::ReferenceExecutor::create()); }}, + {"reference", [] { return gko::ReferenceExecutor::create(); }}}; + + return exec_map.at(executor_string)(); } - std::shared_ptr setup_solver_factory(std::shared_ptr exec, std::shared_ptr> mtx, gko::solver::trisolve_algorithm alg, - const unsigned gmres_iter, const double gmres_tol, const unsigned gmres_restart) + const unsigned gmres_iter, + const double gmres_tol, + const unsigned gmres_restart) { - auto preprocessing_fact = gko::share(gko::reorder::Mc64::build().on(exec)); - auto preprocessing = gko::share(preprocessing_fact->generate(mtx)); - auto lu_fact = gko::share(gko::experimental::factorization::Glu::build_reusable() - .on(exec, mtx.get(), preprocessing.get())); - auto inner_solver_fact = gko::share(gko::experimental::solver::Direct::build() - .with_factorization(lu_fact) - .with_algorithm(alg) - .on(exec)); - - std::shared_ptr solver_fact = inner_solver_fact; - if (gmres_iter > 0) { - solver_fact = gko::share(gko::solver::Gmres::build() - .with_criteria( - gko::stop::Iteration::build() - .with_max_iters(gmres_iter) - .on(exec), - gko::stop::ResidualNorm<>::build() - .with_baseline(gko::stop::mode::absolute) - .with_reduction_factor(gmres_tol) - .on(exec)) - .with_krylov_dim(gmres_restart) - .with_preconditioner(inner_solver_fact) - .on(exec)); - } + auto preprocessing_fact = gko::share(gko::reorder::Mc64::build().on(exec)); + auto preprocessing = gko::share(preprocessing_fact->generate(mtx)); + auto lu_fact = gko::share( + gko::experimental::factorization::Glu::build_reusable().on(exec, mtx.get(), preprocessing.get())); + auto inner_solver_fact = gko::share( + gko::experimental::solver::Direct::build().with_factorization(lu_fact).with_algorithm(alg).on(exec)); + + std::shared_ptr solver_fact = inner_solver_fact; + if(gmres_iter > 0) { + solver_fact = gko::share(gko::solver::Gmres::build() + .with_criteria(gko::stop::Iteration::build().with_max_iters(gmres_iter).on(exec), + gko::stop::ResidualNorm<>::build() + .with_baseline(gko::stop::mode::absolute) + .with_reduction_factor(gmres_tol) + .on(exec)) + .with_krylov_dim(gmres_restart) + .with_preconditioner(inner_solver_fact) + .on(exec)); + } - auto reusable_factory = gko::share(gko::solver::ScaledReordered<>::build() - .with_solver(solver_fact) - .with_reordering(preprocessing) - .on(exec)); - return reusable_factory; + auto reusable_factory = + gko::share(gko::solver::ScaledReordered<>::build().with_solver(solver_fact).with_reordering(preprocessing).on(exec)); + return reusable_factory; } +} // namespace -} - - const std::map - hiopLinSolverSymSparseGinkgo::alg_map_ = {{"syncfree", gko::solver::trisolve_algorithm::syncfree}, - {"sparselib", gko::solver::trisolve_algorithm::sparselib}}; +const std::map hiopLinSolverSymSparseGinkgo::alg_map_ = { + {"syncfree", gko::solver::trisolve_algorithm::syncfree}, + {"sparselib", gko::solver::trisolve_algorithm::sparselib}}; - hiopLinSolverSymSparseGinkgo::hiopLinSolverSymSparseGinkgo(const int& n, - const int& nnz, - hiopNlpFormulation* nlp) +hiopLinSolverSymSparseGinkgo::hiopLinSolverSymSparseGinkgo(const int& n, const int& nnz, hiopNlpFormulation* nlp) : hiopLinSolverSymSparse(n, nnz, nlp), n_{n}, nnz_{0}, index_covert_CSR2Triplet_{nullptr}, index_covert_extra_Diag2CSR_{nullptr} - {} +{} - hiopLinSolverSymSparseGinkgo::~hiopLinSolverSymSparseGinkgo() - { - delete [] index_covert_CSR2Triplet_; - delete [] index_covert_extra_Diag2CSR_; - } +hiopLinSolverSymSparseGinkgo::~hiopLinSolverSymSparseGinkgo() +{ + delete[] index_covert_CSR2Triplet_; + delete[] index_covert_extra_Diag2CSR_; +} - void hiopLinSolverSymSparseGinkgo::firstCall() - { - nlp_->log->printf(hovSummary, "Setting up Ginkgo solver ... \n"); - assert(n_==M_->n() && M_->n()==M_->m()); - assert(n_>0); - - exec_ = create_exec(nlp_->options->GetString("ginkgo_exec")); - auto alg = alg_map_.at(nlp_->options->GetString("ginkgo_trisolve")); - auto gmres_iter = nlp_->options->GetInteger("ir_inner_maxit"); - auto gmres_tol = nlp_->options->GetNumeric("ir_inner_tol"); - auto gmres_restart = nlp_->options->GetInteger("ir_inner_restart"); - iterative_refinement_ = gmres_iter > 0; - - host_mtx_ = transferTripletToCSR(exec_->get_master(), n_, M_, &index_covert_CSR2Triplet_, &index_covert_extra_Diag2CSR_); - mtx_ = exec_ == (exec_->get_master()) ? host_mtx_ : gko::clone(exec_, host_mtx_); - nnz_ = mtx_->get_num_stored_elements(); - - reusable_factory_ = setup_solver_factory(exec_, mtx_, alg, gmres_iter, gmres_tol, gmres_restart); - } +void hiopLinSolverSymSparseGinkgo::firstCall() +{ + nlp_->log->printf(hovSummary, "Setting up Ginkgo solver ... \n"); + assert(n_ == M_->n() && M_->n() == M_->m()); + assert(n_ > 0); + + exec_ = create_exec(nlp_->options->GetString("ginkgo_exec")); + auto alg = alg_map_.at(nlp_->options->GetString("ginkgo_trisolve")); + auto gmres_iter = nlp_->options->GetInteger("ir_inner_maxit"); + auto gmres_tol = nlp_->options->GetNumeric("ir_inner_tol"); + auto gmres_restart = nlp_->options->GetInteger("ir_inner_restart"); + iterative_refinement_ = gmres_iter > 0; + + host_mtx_ = transferTripletToCSR(exec_->get_master(), n_, M_, &index_covert_CSR2Triplet_, &index_covert_extra_Diag2CSR_); + mtx_ = exec_ == (exec_->get_master()) ? host_mtx_ : gko::clone(exec_, host_mtx_); + nnz_ = mtx_->get_num_stored_elements(); + + reusable_factory_ = setup_solver_factory(exec_, mtx_, alg, gmres_iter, gmres_tol, gmres_restart); +} - int hiopLinSolverSymSparseGinkgo::matrixChanged() - { - assert(n_==M_->n() && M_->n()==M_->m()); - assert(n_>0); +int hiopLinSolverSymSparseGinkgo::matrixChanged() +{ + assert(n_ == M_->n() && M_->n() == M_->m()); + assert(n_ > 0); - nlp_->runStats.linsolv.tmFactTime.start(); + nlp_->runStats.linsolv.tmFactTime.start(); - if( !mtx_ ) { - this->firstCall(); - } else { - update_matrix(M_, mtx_, host_mtx_, index_covert_CSR2Triplet_, index_covert_extra_Diag2CSR_); - } - - gko_solver_ = gko::share(reusable_factory_->generate(mtx_)); - - // Temporary solution for the ginkgo GLU integration. - auto direct = iterative_refinement_ ? - gko::as>( - gko::as>( - gko::as>( - gko_solver_)->get_solver())->get_preconditioner()) : - gko::as>( - gko::as>(gko_solver_)->get_solver()); - auto status = direct->get_factorization_status(); - - return status == gko::experimental::factorization::status::success ? 0 : -1; + if(!mtx_) { + this->firstCall(); + } else { + update_matrix(M_, mtx_, host_mtx_, index_covert_CSR2Triplet_, index_covert_extra_Diag2CSR_); } - bool hiopLinSolverSymSparseGinkgo::solve ( hiopVector& x_ ) - { - using vec = gko::matrix::Dense; - using arr = gko::array; - auto host = exec_->get_master(); - assert(n_==M_->n() && M_->n()==M_->m()); - assert(n_>0); - assert(x_.get_size()==M_->n()); - - nlp_->runStats.linsolv.tmTriuSolves.start(); - - hiopVectorPar* x = dynamic_cast(&x_); - assert(x != NULL); - hiopVectorPar* rhs = dynamic_cast(x->new_copy()); - double* dx = x->local_data(); - double* drhs = rhs->local_data(); - const auto size = gko::dim<2>{(long unsigned int)n_, 1}; - auto dense_x_host = vec::create(host, size, arr::view(host, n_, dx), 1); - auto dense_x = vec::create(exec_, size); - dense_x->copy_from(dense_x_host.get()); - auto dense_b_host = vec::create(host, size, arr::view(host, n_, drhs), 1); - auto dense_b = vec::create(exec_, size); - dense_b->copy_from(dense_b_host.get()); - - gko_solver_->apply(dense_b.get(), dense_x.get()); - nlp_->runStats.linsolv.tmTriuSolves.stop(); - - dense_x_host->copy_from(dense_x.get()); - delete rhs; rhs=nullptr; - return 1; - } + gko_solver_ = gko::share(reusable_factory_->generate(mtx_)); + + // Temporary solution for the ginkgo GLU integration. + auto direct = iterative_refinement_ + ? gko::as>( + gko::as>(gko::as>(gko_solver_)->get_solver()) + ->get_preconditioner()) + : gko::as>( + gko::as>(gko_solver_)->get_solver()); + auto status = direct->get_factorization_status(); + + return status == gko::experimental::factorization::status::success ? 0 : -1; +} + +bool hiopLinSolverSymSparseGinkgo::solve(hiopVector& x_) +{ + using vec = gko::matrix::Dense; + using arr = gko::array; + auto host = exec_->get_master(); + assert(n_ == M_->n() && M_->n() == M_->m()); + assert(n_ > 0); + assert(x_.get_size() == M_->n()); + + nlp_->runStats.linsolv.tmTriuSolves.start(); + + hiopVectorPar* x = dynamic_cast(&x_); + assert(x != NULL); + hiopVectorPar* rhs = dynamic_cast(x->new_copy()); + double* dx = x->local_data(); + double* drhs = rhs->local_data(); + const auto size = gko::dim<2>{(long unsigned int)n_, 1}; + auto dense_x_host = vec::create(host, size, arr::view(host, n_, dx), 1); + auto dense_x = vec::create(exec_, size); + dense_x->copy_from(dense_x_host.get()); + auto dense_b_host = vec::create(host, size, arr::view(host, n_, drhs), 1); + auto dense_b = vec::create(exec_, size); + dense_b->copy_from(dense_b_host.get()); + + gko_solver_->apply(dense_b.get(), dense_x.get()); + nlp_->runStats.linsolv.tmTriuSolves.stop(); + + dense_x_host->copy_from(dense_x.get()); + delete rhs; + rhs = nullptr; + return 1; +} -} //end namespace hiop +} // end namespace hiop diff --git a/src/LinAlg/hiopLinSolverSparseGinkgo.hpp b/src/LinAlg/hiopLinSolverSparseGinkgo.hpp index 058c606a5..a9b4016a1 100644 --- a/src/LinAlg/hiopLinSolverSparseGinkgo.hpp +++ b/src/LinAlg/hiopLinSolverSparseGinkgo.hpp @@ -51,19 +51,18 @@ #include "hiopLinSolver.hpp" #include "hiopMatrixSparseTriplet.hpp" - #include - #include /** implements the linear solver class using Ginkgo * * @ingroup LinearSolvers */ -namespace hiop { +namespace hiop +{ -class hiopLinSolverSymSparseGinkgo: public hiopLinSolverSymSparse +class hiopLinSolverSymSparseGinkgo : public hiopLinSolverSymSparse { public: hiopLinSolverSymSparseGinkgo(const int& n, const int& nnz, hiopNlpFormulation* nlp); @@ -76,13 +75,12 @@ class hiopLinSolverSymSparseGinkgo: public hiopLinSolverSymSparse /** solves a linear system. * param 'x' is on entry the right hand side(s) of the system to be solved. On * exit is contains the solution(s). */ - bool solve ( hiopVector& x_ ); + bool solve(hiopVector& x_); private: - - int m_; // number of rows of the whole matrix - int n_; // number of cols of the whole matrix - int nnz_; // number of nonzeros in the matrix + int m_; // number of rows of the whole matrix + int n_; // number of cols of the whole matrix + int nnz_; // number of nonzeros in the matrix int* index_covert_CSR2Triplet_; int* index_covert_extra_Diag2CSR_; @@ -97,12 +95,10 @@ class hiopLinSolverSymSparseGinkgo: public hiopLinSolverSymSparse static const std::map alg_map_; public: - /** called the very first time a matrix is factored. Allocates space * for the factorization and performs ordering */ virtual void firstCall(); - }; -} // end namespace +} // namespace hiop #endif diff --git a/src/LinAlg/hiopLinSolverSparsePARDISO.cpp b/src/LinAlg/hiopLinSolverSparsePARDISO.cpp index 6a7d56e46..b83a43880 100644 --- a/src/LinAlg/hiopLinSolverSparsePARDISO.cpp +++ b/src/LinAlg/hiopLinSolverSparsePARDISO.cpp @@ -2,54 +2,54 @@ // Produced at the Lawrence Livermore National Laboratory (LLNL). // LLNL-CODE-742473. All rights reserved. // -// This file is part of HiOp. For details, see https://github.com/LLNL/hiop. HiOp -// is released under the BSD 3-clause license (https://opensource.org/licenses/BSD-3-Clause). +// This file is part of HiOp. For details, see https://github.com/LLNL/hiop. HiOp +// is released under the BSD 3-clause license (https://opensource.org/licenses/BSD-3-Clause). // Please also read "Additional BSD Notice" below. // -// Redistribution and use in source and binary forms, with or without modification, +// Redistribution and use in source and binary forms, with or without modification, // are permitted provided that the following conditions are met: -// i. Redistributions of source code must retain the above copyright notice, this list +// i. Redistributions of source code must retain the above copyright notice, this list // of conditions and the disclaimer below. -// ii. Redistributions in binary form must reproduce the above copyright notice, -// this list of conditions and the disclaimer (as noted below) in the documentation and/or +// ii. Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the disclaimer (as noted below) in the documentation and/or // other materials provided with the distribution. -// iii. Neither the name of the LLNS/LLNL nor the names of its contributors may be used to -// endorse or promote products derived from this software without specific prior written +// iii. Neither the name of the LLNS/LLNL nor the names of its contributors may be used to +// endorse or promote products derived from this software without specific prior written // permission. // -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES -// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT -// SHALL LAWRENCE LIVERMORE NATIONAL SECURITY, LLC, THE U.S. DEPARTMENT OF ENERGY OR -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS -// OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED -// AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT +// SHALL LAWRENCE LIVERMORE NATIONAL SECURITY, LLC, THE U.S. DEPARTMENT OF ENERGY OR +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +// OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED +// AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, // EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Additional BSD Notice -// 1. This notice is required to be provided under our contract with the U.S. Department -// of Energy (DOE). This work was produced at Lawrence Livermore National Laboratory under +// 1. This notice is required to be provided under our contract with the U.S. Department +// of Energy (DOE). This work was produced at Lawrence Livermore National Laboratory under // Contract No. DE-AC52-07NA27344 with the DOE. -// 2. Neither the United States Government nor Lawrence Livermore National Security, LLC -// nor any of their employees, makes any warranty, express or implied, or assumes any -// liability or responsibility for the accuracy, completeness, or usefulness of any +// 2. Neither the United States Government nor Lawrence Livermore National Security, LLC +// nor any of their employees, makes any warranty, express or implied, or assumes any +// liability or responsibility for the accuracy, completeness, or usefulness of any // information, apparatus, product, or process disclosed, or represents that its use would // not infringe privately-owned rights. -// 3. Also, reference herein to any specific commercial products, process, or services by -// trade name, trademark, manufacturer or otherwise does not necessarily constitute or -// imply its endorsement, recommendation, or favoring by the United States Government or -// Lawrence Livermore National Security, LLC. The views and opinions of authors expressed -// herein do not necessarily state or reflect those of the United States Government or -// Lawrence Livermore National Security, LLC, and shall not be used for advertising or +// 3. Also, reference herein to any specific commercial products, process, or services by +// trade name, trademark, manufacturer or otherwise does not necessarily constitute or +// imply its endorsement, recommendation, or favoring by the United States Government or +// Lawrence Livermore National Security, LLC. The views and opinions of authors expressed +// herein do not necessarily state or reflect those of the United States Government or +// Lawrence Livermore National Security, LLC, and shall not be used for advertising or // product endorsement purposes. /* implements the linear solver class using the PARDISO solver -* @file hiopLinSolverSparsePARDISO.cpp -* @ingroup LinearSolvers -* @author Nai-Yuan Chiang , LLNL -*/ + * @file hiopLinSolverSparsePARDISO.cpp + * @ingroup LinearSolvers + * @author Nai-Yuan Chiang , LLNL + */ #include "hiopLinSolverSparsePARDISO.hpp" @@ -59,459 +59,562 @@ namespace hiop { - /* - * PARDISO for symmetric indefinite sparse matrix - */ - hiopLinSolverSymSparsePARDISO::hiopLinSolverSymSparsePARDISO(const int& n, const int& nnz, hiopNlpFormulation* nlp) +/* + * PARDISO for symmetric indefinite sparse matrix + */ +hiopLinSolverSymSparsePARDISO::hiopLinSolverSymSparsePARDISO(const int& n, const int& nnz, hiopNlpFormulation* nlp) : hiopLinSolverSymSparse(n, nnz, nlp), - kRowPtr_{nullptr}, jCol_{nullptr}, kVal_{nullptr}, - rhs_{nullptr}, - n_{n}, nnz_{-1}, is_initialized_{false} - { - maxfct_ = 1; //max number of fact having same sparsity pattern to keep at the same time - mnum_ = 1; //actual matrix (as in index from 1 to maxfct) - msglvl_ = 0; //messaging level - mtype_ = -2; //real and symmetric indefinite - solver_ = 0; //sparse direct solver + kRowPtr_{nullptr}, + jCol_{nullptr}, + kVal_{nullptr}, + rhs_{nullptr}, + n_{n}, + nnz_{-1}, + is_initialized_{false} +{ + maxfct_ = 1; // max number of fact having same sparsity pattern to keep at the same time + mnum_ = 1; // actual matrix (as in index from 1 to maxfct) + msglvl_ = 0; // messaging level + mtype_ = -2; // real and symmetric indefinite + solver_ = 0; // sparse direct solver +} + +hiopLinSolverSymSparsePARDISO::~hiopLinSolverSymSparsePARDISO() +{ + /* Termination and release of memory */ + int phase = -1; /* Release internal memory . */ + pardiso_d(pt_, + &maxfct_, + &mnum_, + &mtype_, + &phase, + &n_, + kVal_, + kRowPtr_, + jCol_, + NULL, + NULL, + iparm_, + &msglvl_, + NULL, + NULL, + &error_, + dparm_); + + if(kRowPtr_) delete[] kRowPtr_; + if(jCol_) delete[] jCol_; + if(kVal_) delete[] kVal_; + if(index_covert_CSR2Triplet_) delete[] index_covert_CSR2Triplet_; + if(index_covert_extra_Diag2CSR_) delete[] index_covert_extra_Diag2CSR_; + + if(rhs_) { + delete rhs_; } +} - hiopLinSolverSymSparsePARDISO::~hiopLinSolverSymSparsePARDISO() - { - /* Termination and release of memory */ - int phase = -1; /* Release internal memory . */ - pardiso_d(pt_ , &maxfct_, &mnum_, &mtype_, &phase, - &n_, kVal_, kRowPtr_, jCol_, - NULL, NULL, - iparm_, &msglvl_, NULL, NULL, &error_, dparm_); - - if(kRowPtr_) - delete [] kRowPtr_; - if(jCol_) - delete [] jCol_; - if(kVal_) - delete [] kVal_; - if(index_covert_CSR2Triplet_) - delete [] index_covert_CSR2Triplet_; - if(index_covert_extra_Diag2CSR_) - delete [] index_covert_extra_Diag2CSR_; - - if(rhs_) { - delete rhs_; - } - - } +void hiopLinSolverSymSparsePARDISO::firstCall() +{ + assert(n_ == M_->n() && M_->n() == M_->m()); + assert(n_ > 0); + + kRowPtr_ = new int[n_ + 1]{0}; + nnz_ = 0; - void hiopLinSolverSymSparsePARDISO::firstCall() + // transfer triplet form to CSR upper triangular form + // note that input is in lower triangular triplet form. First part is the sparse matrix, and the 2nd part are the + // additional diagonal elememts the 1st part is sorted by row { - assert(n_==M_->n() && M_->n()==M_->m()); - assert(n_>0); - - kRowPtr_ = new int[n_+1]{0}; - nnz_ = 0; - - // transfer triplet form to CSR upper triangular form - // note that input is in lower triangular triplet form. First part is the sparse matrix, and the 2nd part are the additional diagonal elememts - // the 1st part is sorted by row - { - // - // compute nnz in each row - // - // off-diagonal part - kRowPtr_[0]=0; - for(int k=0;knumberOfNonzeros()-n_;k++){ - if(M_->i_row()[k]!=M_->j_col()[k]){ - kRowPtr_[M_->j_col()[k]+1]++; - nnz_ += 1; - } - } - // diagonal part - for(int i=0;inumberOfNonzeros() - n_; k++) { + if(M_->i_row()[k] != M_->j_col()[k]) { + kRowPtr_[M_->j_col()[k] + 1]++; nnz_ += 1; } - // get correct row ptr index - for(int i=1;inumberOfNonzeros()-n_;k++) { - rowID_tmp = M_->i_row()[k]; - colID_tmp = M_->j_col()[k]; - if(rowID_tmp==colID_tmp){ - nnz_tmp = nnz_each_row_tmp[rowID_tmp] + kRowPtr_[rowID_tmp]; - jCol_[nnz_tmp] = colID_tmp; - kVal_[nnz_tmp] = M_->M()[k]; - index_covert_CSR2Triplet_[nnz_tmp] = k; - - kVal_[nnz_tmp] += M_->M()[M_->numberOfNonzeros()-n_+rowID_tmp]; - index_covert_extra_Diag2CSR_[rowID_tmp] = nnz_tmp; - - nnz_each_row_tmp[rowID_tmp]++; - total_nnz_tmp++; - }else{ - nnz_tmp = nnz_each_row_tmp[colID_tmp] + kRowPtr_[colID_tmp]; - jCol_[nnz_tmp] = rowID_tmp; - kVal_[nnz_tmp] = M_->M()[k]; - index_covert_CSR2Triplet_[nnz_tmp] = k; - - nnz_each_row_tmp[colID_tmp]++; - total_nnz_tmp += 1; - } + for(int k = 0; k < M_->numberOfNonzeros() - n_; k++) { + rowID_tmp = M_->i_row()[k]; + colID_tmp = M_->j_col()[k]; + if(rowID_tmp == colID_tmp) { + nnz_tmp = nnz_each_row_tmp[rowID_tmp] + kRowPtr_[rowID_tmp]; + jCol_[nnz_tmp] = colID_tmp; + kVal_[nnz_tmp] = M_->M()[k]; + index_covert_CSR2Triplet_[nnz_tmp] = k; + + kVal_[nnz_tmp] += M_->M()[M_->numberOfNonzeros() - n_ + rowID_tmp]; + index_covert_extra_Diag2CSR_[rowID_tmp] = nnz_tmp; + + nnz_each_row_tmp[rowID_tmp]++; + total_nnz_tmp++; + } else { + nnz_tmp = nnz_each_row_tmp[colID_tmp] + kRowPtr_[colID_tmp]; + jCol_[nnz_tmp] = rowID_tmp; + kVal_[nnz_tmp] = M_->M()[k]; + index_covert_CSR2Triplet_[nnz_tmp] = k; + + nnz_each_row_tmp[colID_tmp]++; + total_nnz_tmp += 1; } + } - // correct the missing diagonal term - for(int i=0;iM()[M_->numberOfNonzeros()-n_+i]; - index_covert_CSR2Triplet_[nnz_tmp] = M_->numberOfNonzeros()-n_+i; - total_nnz_tmp += 1; - - std::vector ind_temp(kRowPtr_[i+1]-kRowPtr_[i]); - std::iota(ind_temp.begin(), ind_temp.end(), 0); - std::sort(ind_temp.begin(), ind_temp.end(),[&](int a, int b){ return jCol_[a+kRowPtr_[i]]M()[M_->numberOfNonzeros() - n_ + i]; + index_covert_CSR2Triplet_[nnz_tmp] = M_->numberOfNonzeros() - n_ + i; + total_nnz_tmp += 1; + + std::vector ind_temp(kRowPtr_[i + 1] - kRowPtr_[i]); + std::iota(ind_temp.begin(), ind_temp.end(), 0); + std::sort(ind_temp.begin(), ind_temp.end(), [&](int a, int b) { + return jCol_[a + kRowPtr_[i]] < jCol_[b + kRowPtr_[i]]; + }); + + reorder(kVal_ + kRowPtr_[i], ind_temp, kRowPtr_[i + 1] - kRowPtr_[i]); + reorder(index_covert_CSR2Triplet_ + kRowPtr_[i], ind_temp, kRowPtr_[i + 1] - kRowPtr_[i]); + std::sort(jCol_ + kRowPtr_[i], jCol_ + kRowPtr_[i + 1]); } - - delete[] nnz_each_row_tmp; } - // need Fortran indexes - for( int i = 0; i < n_+1; i++) { - kRowPtr_[i] += 1; - } - for( int i = 0; i < nnz_; i++) { - jCol_[i] += 1; - } + delete[] nnz_each_row_tmp; + } - /* initialize PARDISO */ - pardisoinit_d(pt_, &mtype_, &solver_, iparm_, dparm_, &error_); - if (error_!=0) { - std::cout << "PardisoSolver ERROR during pardisoinit:" << error_ << "." << std::endl; - assert(false); - } + // need Fortran indexes + for(int i = 0; i < n_ + 1; i++) { + kRowPtr_[i] += 1; + } + for(int i = 0; i < nnz_; i++) { + jCol_[i] += 1; + } - /* Numbers of processors, value of OMP_NUM_THREADS */ - char *var = getenv("OMP_NUM_THREADS"); - if(var != NULL) { - sscanf( var, "%d", &num_threads_ ); - } else { - num_threads_ = 1; - } + /* initialize PARDISO */ + pardisoinit_d(pt_, &mtype_, &solver_, iparm_, dparm_, &error_); + if(error_ != 0) { + std::cout << "PardisoSolver ERROR during pardisoinit:" << error_ << "." << std::endl; + assert(false); + } - iparm_[2] = num_threads_; - iparm_[1] = 2; // 2 is for metis, 0 for min degree - iparm_[7] = 3; // # iterative refinements - iparm_[10] = 1; // scaling for IPM KKT; used with IPARM(13)=1 or 2 - iparm_[12] = 2; // improved accuracy for IPM KKT; used with IPARM(11)=1; - // if needed, use 2 for advanced matchings and higer accuracy. - iparm_[23] = 1; // Parallel Numerical Factorization - // (0=used in the last years, 1=two-level scheduling) - - /* symbolic analysis from PARDISO */ - int phase = 11; //analysis - int nrhs = 1; - - pardiso_d(pt_ , &maxfct_, &mnum_, &mtype_, &phase, - &n_, kVal_, kRowPtr_, jCol_, - NULL, &nrhs, - iparm_, &msglvl_, NULL, NULL, &error_, dparm_); - if ( error_ != 0) { - printf ("PardisoSolver - ERROR during symbolic factorization: %d\n", error_ ); - assert(false); - } - + /* Numbers of processors, value of OMP_NUM_THREADS */ + char* var = getenv("OMP_NUM_THREADS"); + if(var != NULL) { + sscanf(var, "%d", &num_threads_); + } else { + num_threads_ = 1; } - int hiopLinSolverSymSparsePARDISO::matrixChanged() - { - assert(n_==M_->n() && M_->n()==M_->m()); - assert(n_>0); - - nlp_->runStats.linsolv.tmFactTime.start(); - - if(!is_initialized_) { - this->firstCall(); - is_initialized_ = true; - } else { - // update matrix - int rowID_tmp{0}; - for(int k=0;kM()[index_covert_CSR2Triplet_[k]]; - } - for(int i=0;iM()[M_->numberOfNonzeros()-n_+i]; - } - } + iparm_[2] = num_threads_; + iparm_[1] = 2; // 2 is for metis, 0 for min degree + iparm_[7] = 3; // # iterative refinements + iparm_[10] = 1; // scaling for IPM KKT; used with IPARM(13)=1 or 2 + iparm_[12] = 2; // improved accuracy for IPM KKT; used with IPARM(11)=1; + // if needed, use 2 for advanced matchings and higer accuracy. + iparm_[23] = 1; // Parallel Numerical Factorization + // (0=used in the last years, 1=two-level scheduling) + + /* symbolic analysis from PARDISO */ + int phase = 11; // analysis + int nrhs = 1; + + pardiso_d(pt_, + &maxfct_, + &mnum_, + &mtype_, + &phase, + &n_, + kVal_, + kRowPtr_, + jCol_, + NULL, + &nrhs, + iparm_, + &msglvl_, + NULL, + NULL, + &error_, + dparm_); + if(error_ != 0) { + printf("PardisoSolver - ERROR during symbolic factorization: %d\n", error_); + assert(false); + } +} - /* do numerical factorization */ - int phase = 22; - int nrhs = 1; - - pardiso_d(pt_ , &maxfct_, &mnum_, &mtype_, &phase, - &n_, kVal_, kRowPtr_, jCol_, - NULL, &nrhs, - iparm_, &msglvl_, NULL, NULL, &error_, dparm_); - - if ( error_ != 0) { - printf ("PardisoSolver - ERROR during numerical factorization: %d\n", error_ ); - assert(false); +int hiopLinSolverSymSparsePARDISO::matrixChanged() +{ + assert(n_ == M_->n() && M_->n() == M_->m()); + assert(n_ > 0); + + nlp_->runStats.linsolv.tmFactTime.start(); + + if(!is_initialized_) { + this->firstCall(); + is_initialized_ = true; + } else { + // update matrix + int rowID_tmp{0}; + for(int k = 0; k < nnz_; k++) { + kVal_[k] = M_->M()[index_covert_CSR2Triplet_[k]]; } - - nlp_->runStats.linsolv.tmInertiaComp.start(); - - int negEigVal = iparm_[22]; - if(iparm_[21]+iparm_[22] != n_) { - // singular matrix - negEigVal = -1; + for(int i = 0; i < n_; i++) { + if(index_covert_extra_Diag2CSR_[i] != -1) + kVal_[index_covert_extra_Diag2CSR_[i]] += M_->M()[M_->numberOfNonzeros() - n_ + i]; } + } + + /* do numerical factorization */ + int phase = 22; + int nrhs = 1; + + pardiso_d(pt_, + &maxfct_, + &mnum_, + &mtype_, + &phase, + &n_, + kVal_, + kRowPtr_, + jCol_, + NULL, + &nrhs, + iparm_, + &msglvl_, + NULL, + NULL, + &error_, + dparm_); + + if(error_ != 0) { + printf("PardisoSolver - ERROR during numerical factorization: %d\n", error_); + assert(false); + } - nlp_->runStats.linsolv.tmInertiaComp.stop(); + nlp_->runStats.linsolv.tmInertiaComp.start(); - return negEigVal; + int negEigVal = iparm_[22]; + if(iparm_[21] + iparm_[22] != n_) { + // singular matrix + negEigVal = -1; } - bool hiopLinSolverSymSparsePARDISO::solve(hiopVector& b) - { - assert(n_==M_->n() && M_->n()==M_->m()); - assert(n_>0); - assert(b.get_size()==M_->n()); - - nlp_->runStats.linsolv.tmTriuSolves.start(); - - /* do backsolve */ - hiopVectorPar* x = dynamic_cast(&b); - assert(x != nullptr); - if(rhs_==nullptr) { - rhs_ = dynamic_cast(x->new_copy()); - } else { - rhs_->copyFrom(*x); - } - double* dx = x->local_data(); - double* drhs = rhs_->local_data(); - - int phase = 33; - int nrhs = 1; - - pardiso_d(pt_ , &maxfct_, &mnum_, &mtype_, &phase, - &n_, kVal_, kRowPtr_, jCol_, - NULL, &nrhs, - iparm_, &msglvl_, - drhs, dx, &error_, dparm_); - - if ( error_ != 0) { - printf ("PardisoSolver - ERROR during backsolve: %d\n", error_ ); - assert(false); - } + nlp_->runStats.linsolv.tmInertiaComp.stop(); - nlp_->runStats.linsolv.tmTriuSolves.stop(); - return 1; + return negEigVal; +} + +bool hiopLinSolverSymSparsePARDISO::solve(hiopVector& b) +{ + assert(n_ == M_->n() && M_->n() == M_->m()); + assert(n_ > 0); + assert(b.get_size() == M_->n()); + + nlp_->runStats.linsolv.tmTriuSolves.start(); + + /* do backsolve */ + hiopVectorPar* x = dynamic_cast(&b); + assert(x != nullptr); + if(rhs_ == nullptr) { + rhs_ = dynamic_cast(x->new_copy()); + } else { + rhs_->copyFrom(*x); + } + double* dx = x->local_data(); + double* drhs = rhs_->local_data(); + + int phase = 33; + int nrhs = 1; + + pardiso_d(pt_, + &maxfct_, + &mnum_, + &mtype_, + &phase, + &n_, + kVal_, + kRowPtr_, + jCol_, + NULL, + &nrhs, + iparm_, + &msglvl_, + drhs, + dx, + &error_, + dparm_); + + if(error_ != 0) { + printf("PardisoSolver - ERROR during backsolve: %d\n", error_); + assert(false); } + nlp_->runStats.linsolv.tmTriuSolves.stop(); + return 1; +} - /* - * PARDISO for unsymmetric sparse matrix - */ - hiopLinSolverNonSymSparsePARDISO::hiopLinSolverNonSymSparsePARDISO(const int& n, const int& nnz, hiopNlpFormulation* nlp) +/* + * PARDISO for unsymmetric sparse matrix + */ +hiopLinSolverNonSymSparsePARDISO::hiopLinSolverNonSymSparsePARDISO(const int& n, const int& nnz, hiopNlpFormulation* nlp) : hiopLinSolverNonSymSparse(n, nnz, nlp), - kRowPtr_{nullptr}, jCol_{nullptr}, kVal_{nullptr}, - rhs_{nullptr}, - n_{n}, nnz_{-1}, is_initialized_{false} - { - maxfct_ = 1; //max number of fact having same sparsity pattern to keep at the same time - mnum_ = 1; //actual matrix (as in index from 1 to maxfct) - msglvl_ = 0; //messaging level - mtype_ = 11; //real and unsymmetric - solver_ = 0; //sparse direct solver + kRowPtr_{nullptr}, + jCol_{nullptr}, + kVal_{nullptr}, + rhs_{nullptr}, + n_{n}, + nnz_{-1}, + is_initialized_{false} +{ + maxfct_ = 1; // max number of fact having same sparsity pattern to keep at the same time + mnum_ = 1; // actual matrix (as in index from 1 to maxfct) + msglvl_ = 0; // messaging level + mtype_ = 11; // real and unsymmetric + solver_ = 0; // sparse direct solver +} + +hiopLinSolverNonSymSparsePARDISO::~hiopLinSolverNonSymSparsePARDISO() +{ + /* Termination and release of memory */ + int phase = -1; /* Release internal memory . */ + pardiso_d(pt_, + &maxfct_, + &mnum_, + &mtype_, + &phase, + &n_, + kVal_, + kRowPtr_, + jCol_, + NULL, + NULL, + iparm_, + &msglvl_, + NULL, + NULL, + &error_, + dparm_); + + if(kRowPtr_) delete[] kRowPtr_; + if(jCol_) delete[] jCol_; + if(kVal_) delete[] kVal_; + if(index_covert_CSR2Triplet_) delete[] index_covert_CSR2Triplet_; + if(index_covert_extra_Diag2CSR_) delete[] index_covert_extra_Diag2CSR_; + + if(rhs_) { + delete rhs_; } +} - hiopLinSolverNonSymSparsePARDISO::~hiopLinSolverNonSymSparsePARDISO() - { - /* Termination and release of memory */ - int phase = -1; /* Release internal memory . */ - pardiso_d(pt_ , &maxfct_, &mnum_, &mtype_, &phase, - &n_, kVal_, kRowPtr_, jCol_, - NULL, NULL, - iparm_, &msglvl_, NULL, NULL, &error_, dparm_); - - if(kRowPtr_) - delete [] kRowPtr_; - if(jCol_) - delete [] jCol_; - if(kVal_) - delete [] kVal_; - if(index_covert_CSR2Triplet_) - delete [] index_covert_CSR2Triplet_; - if(index_covert_extra_Diag2CSR_) - delete [] index_covert_extra_Diag2CSR_; - - if(rhs_) { - delete rhs_; - } - +void hiopLinSolverNonSymSparsePARDISO::firstCall() +{ + assert(n_ == M_->n() && M_->n() == M_->m()); + assert(n_ > 0); + + // transfer triplet form to CSR form + // note that input is in lower triangular triplet form. First part is the sparse matrix, and the 2nd part are the + // additional diagonal elememts the 1st part is sorted by row + + M_->convert_to_csr_arrays(nnz_, + &kRowPtr_, + &jCol_, + &kVal_, + &index_covert_CSR2Triplet_, + &index_covert_extra_Diag2CSR_, + extra_diag_nnz_map); + + // need Fortran indexes + for(int i = 0; i < n_ + 1; i++) { + kRowPtr_[i] += 1; + } + for(int i = 0; i < nnz_; i++) { + jCol_[i] += 1; } - void hiopLinSolverNonSymSparsePARDISO::firstCall() - { - assert(n_==M_->n() && M_->n()==M_->m()); - assert(n_>0); - - // transfer triplet form to CSR form - // note that input is in lower triangular triplet form. First part is the sparse matrix, and the 2nd part are the additional diagonal elememts - // the 1st part is sorted by row - - M_->convert_to_csr_arrays(nnz_, &kRowPtr_, &jCol_, &kVal_, &index_covert_CSR2Triplet_, &index_covert_extra_Diag2CSR_, extra_diag_nnz_map); + /* initialize PARDISO */ + pardisoinit_d(pt_, &mtype_, &solver_, iparm_, dparm_, &error_); + if(error_ != 0) { + std::cout << "PardisoSolver ERROR during pardisoinit:" << error_ << "." << std::endl; + assert(false); + } - // need Fortran indexes - for( int i = 0; i < n_+1; i++) { - kRowPtr_[i] += 1; - } - for( int i = 0; i < nnz_; i++) { - jCol_[i] += 1; - } + /* Numbers of processors, value of OMP_NUM_THREADS */ + char* var = getenv("OMP_NUM_THREADS"); + if(var != NULL) { + sscanf(var, "%d", &num_threads_); + } else { + num_threads_ = 1; + } - /* initialize PARDISO */ - pardisoinit_d(pt_, &mtype_, &solver_, iparm_, dparm_, &error_); - if (error_!=0) { - std::cout << "PardisoSolver ERROR during pardisoinit:" << error_ << "." << std::endl; - assert(false); - } + iparm_[2] = num_threads_; + iparm_[1] = 2; // 2 is for metis, 0 for min degree + iparm_[7] = 3; // # iterative refinements + iparm_[10] = 1; // scaling for IPM KKT; used with IPARM(13)=1 or 2 + iparm_[12] = 2; // improved accuracy for IPM KKT; used with IPARM(11)=1; + // if needed, use 2 for advanced matchings and higer accuracy. + iparm_[23] = 1; // Parallel Numerical Factorization + // (0=used in the last years, 1=two-level scheduling) + + /* symbolic analysis from PARDISO */ + int phase = 11; // analysis + int nrhs = 1; + + pardiso_d(pt_, + &maxfct_, + &mnum_, + &mtype_, + &phase, + &n_, + kVal_, + kRowPtr_, + jCol_, + NULL, + &nrhs, + iparm_, + &msglvl_, + NULL, + NULL, + &error_, + dparm_); + if(error_ != 0) { + printf("PardisoSolver - ERROR during symbolic factorization: %d\n", error_); + assert(false); + } +} - /* Numbers of processors, value of OMP_NUM_THREADS */ - char *var = getenv("OMP_NUM_THREADS"); - if(var != NULL) { - sscanf( var, "%d", &num_threads_ ); - } else { - num_threads_ = 1; +int hiopLinSolverNonSymSparsePARDISO::matrixChanged() +{ + assert(n_ == M_->n() && M_->n() == M_->m()); + assert(n_ > 0); + + nlp_->runStats.linsolv.tmFactTime.start(); + + if(!is_initialized_) { + this->firstCall(); + is_initialized_ = true; + } else { + // update matrix + int rowID_tmp{0}; + for(int k = 0; k < nnz_; k++) { + kVal_[k] = M_->M()[index_covert_CSR2Triplet_[k]]; } - - iparm_[2] = num_threads_; - iparm_[1] = 2; // 2 is for metis, 0 for min degree - iparm_[7] = 3; // # iterative refinements - iparm_[10] = 1; // scaling for IPM KKT; used with IPARM(13)=1 or 2 - iparm_[12] = 2; // improved accuracy for IPM KKT; used with IPARM(11)=1; - // if needed, use 2 for advanced matchings and higer accuracy. - iparm_[23] = 1; // Parallel Numerical Factorization - // (0=used in the last years, 1=two-level scheduling) - - /* symbolic analysis from PARDISO */ - int phase = 11; //analysis - int nrhs = 1; - - pardiso_d(pt_ , &maxfct_, &mnum_, &mtype_, &phase, - &n_, kVal_, kRowPtr_, jCol_, - NULL, &nrhs, - iparm_, &msglvl_, NULL, NULL, &error_, dparm_); - if ( error_ != 0) { - printf ("PardisoSolver - ERROR during symbolic factorization: %d\n", error_ ); - assert(false); + for(auto p: extra_diag_nnz_map) { + kVal_[p.first] += M_->M()[p.second]; } - } - int hiopLinSolverNonSymSparsePARDISO::matrixChanged() - { - assert(n_==M_->n() && M_->n()==M_->m()); - assert(n_>0); - - nlp_->runStats.linsolv.tmFactTime.start(); - - if(!is_initialized_) { - this->firstCall(); - is_initialized_ = true; - } else { - // update matrix - int rowID_tmp{0}; - for(int k=0;kM()[index_covert_CSR2Triplet_[k]]; - } - for(auto p: extra_diag_nnz_map) { - kVal_[p.first] += M_->M()[p.second]; - } - } + /* do numerical factorization */ + int phase = 22; + int nrhs = 1; + + pardiso_d(pt_, + &maxfct_, + &mnum_, + &mtype_, + &phase, + &n_, + kVal_, + kRowPtr_, + jCol_, + NULL, + &nrhs, + iparm_, + &msglvl_, + NULL, + NULL, + &error_, + dparm_); + + if(error_ != 0) { + printf("PardisoSolver - ERROR during numerical factorization: %d\n", error_); + assert(false); + } - /* do numerical factorization */ - int phase = 22; - int nrhs = 1; - - pardiso_d(pt_ , &maxfct_, &mnum_, &mtype_, &phase, - &n_, kVal_, kRowPtr_, jCol_, - NULL, &nrhs, - iparm_, &msglvl_, NULL, NULL, &error_, dparm_); - - if ( error_ != 0) { - printf ("PardisoSolver - ERROR during numerical factorization: %d\n", error_ ); - assert(false); - } + nlp_->runStats.linsolv.tmInertiaComp.start(); + nlp_->runStats.linsolv.tmInertiaComp.stop(); - nlp_->runStats.linsolv.tmInertiaComp.start(); - nlp_->runStats.linsolv.tmInertiaComp.stop(); + return 0; +} - return 0; +bool hiopLinSolverNonSymSparsePARDISO::solve(hiopVector& b) +{ + assert(n_ == M_->n() && M_->n() == M_->m()); + assert(n_ > 0); + assert(b.get_size() == M_->n()); + + nlp_->runStats.linsolv.tmTriuSolves.start(); + + /* do backsolve */ + hiopVectorPar* x = dynamic_cast(&b); + assert(x != nullptr); + if(rhs_ == nullptr) { + rhs_ = dynamic_cast(x->new_copy()); + } else { + rhs_->copyFrom(*x); } - - bool hiopLinSolverNonSymSparsePARDISO::solve(hiopVector& b) - { - assert(n_==M_->n() && M_->n()==M_->m()); - assert(n_>0); - assert(b.get_size()==M_->n()); - - nlp_->runStats.linsolv.tmTriuSolves.start(); - - /* do backsolve */ - hiopVectorPar* x = dynamic_cast(&b); - assert(x != nullptr); - if(rhs_==nullptr) { - rhs_ = dynamic_cast(x->new_copy()); - } else { - rhs_->copyFrom(*x); - } - double* dx = x->local_data(); - double* drhs = rhs_->local_data(); - - int phase = 33; - int nrhs = 1; - - pardiso_d(pt_ , &maxfct_, &mnum_, &mtype_, &phase, - &n_, kVal_, kRowPtr_, jCol_, - NULL, &nrhs, - iparm_, &msglvl_, - drhs, dx, &error_, dparm_); - - if ( error_ != 0) { - printf ("PardisoSolver - ERROR during backsolve: %d\n", error_ ); - return false; - } - - nlp_->runStats.linsolv.tmTriuSolves.stop(); - return true; + double* dx = x->local_data(); + double* drhs = rhs_->local_data(); + + int phase = 33; + int nrhs = 1; + + pardiso_d(pt_, + &maxfct_, + &mnum_, + &mtype_, + &phase, + &n_, + kVal_, + kRowPtr_, + jCol_, + NULL, + &nrhs, + iparm_, + &msglvl_, + drhs, + dx, + &error_, + dparm_); + + if(error_ != 0) { + printf("PardisoSolver - ERROR during backsolve: %d\n", error_); + return false; } -} //end namespace hiop + nlp_->runStats.linsolv.tmTriuSolves.stop(); + return true; +} + +} // end namespace hiop diff --git a/src/LinAlg/hiopLinSolverSparsePARDISO.hpp b/src/LinAlg/hiopLinSolverSparsePARDISO.hpp index 3781e2e0d..a9f2b5fe1 100644 --- a/src/LinAlg/hiopLinSolverSparsePARDISO.hpp +++ b/src/LinAlg/hiopLinSolverSparsePARDISO.hpp @@ -2,54 +2,54 @@ // Produced at the Lawrence Livermore National Laboratory (LLNL). // LLNL-CODE-742473. All rights reserved. // -// This file is part of HiOp. For details, see https://github.com/LLNL/hiop. HiOp -// is released under the BSD 3-clause license (https://opensource.org/licenses/BSD-3-Clause). +// This file is part of HiOp. For details, see https://github.com/LLNL/hiop. HiOp +// is released under the BSD 3-clause license (https://opensource.org/licenses/BSD-3-Clause). // Please also read "Additional BSD Notice" below. // -// Redistribution and use in source and binary forms, with or without modification, +// Redistribution and use in source and binary forms, with or without modification, // are permitted provided that the following conditions are met: -// i. Redistributions of source code must retain the above copyright notice, this list +// i. Redistributions of source code must retain the above copyright notice, this list // of conditions and the disclaimer below. -// ii. Redistributions in binary form must reproduce the above copyright notice, -// this list of conditions and the disclaimer (as noted below) in the documentation and/or +// ii. Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the disclaimer (as noted below) in the documentation and/or // other materials provided with the distribution. -// iii. Neither the name of the LLNS/LLNL nor the names of its contributors may be used to -// endorse or promote products derived from this software without specific prior written +// iii. Neither the name of the LLNS/LLNL nor the names of its contributors may be used to +// endorse or promote products derived from this software without specific prior written // permission. // -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES -// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT -// SHALL LAWRENCE LIVERMORE NATIONAL SECURITY, LLC, THE U.S. DEPARTMENT OF ENERGY OR -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS -// OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED -// AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT +// SHALL LAWRENCE LIVERMORE NATIONAL SECURITY, LLC, THE U.S. DEPARTMENT OF ENERGY OR +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +// OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED +// AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, // EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Additional BSD Notice -// 1. This notice is required to be provided under our contract with the U.S. Department -// of Energy (DOE). This work was produced at Lawrence Livermore National Laboratory under +// 1. This notice is required to be provided under our contract with the U.S. Department +// of Energy (DOE). This work was produced at Lawrence Livermore National Laboratory under // Contract No. DE-AC52-07NA27344 with the DOE. -// 2. Neither the United States Government nor Lawrence Livermore National Security, LLC -// nor any of their employees, makes any warranty, express or implied, or assumes any -// liability or responsibility for the accuracy, completeness, or usefulness of any +// 2. Neither the United States Government nor Lawrence Livermore National Security, LLC +// nor any of their employees, makes any warranty, express or implied, or assumes any +// liability or responsibility for the accuracy, completeness, or usefulness of any // information, apparatus, product, or process disclosed, or represents that its use would // not infringe privately-owned rights. -// 3. Also, reference herein to any specific commercial products, process, or services by -// trade name, trademark, manufacturer or otherwise does not necessarily constitute or -// imply its endorsement, recommendation, or favoring by the United States Government or -// Lawrence Livermore National Security, LLC. The views and opinions of authors expressed -// herein do not necessarily state or reflect those of the United States Government or -// Lawrence Livermore National Security, LLC, and shall not be used for advertising or +// 3. Also, reference herein to any specific commercial products, process, or services by +// trade name, trademark, manufacturer or otherwise does not necessarily constitute or +// imply its endorsement, recommendation, or favoring by the United States Government or +// Lawrence Livermore National Security, LLC. The views and opinions of authors expressed +// herein do not necessarily state or reflect those of the United States Government or +// Lawrence Livermore National Security, LLC, and shall not be used for advertising or // product endorsement purposes. /* implements the linear solver class using the PARDISO solver -* @file hiopLinSolverSparsePARDISO.hpp -* @ingroup LinearSolvers -* @author Nai-Yuan Chiang , LLNL -*/ + * @file hiopLinSolverSparsePARDISO.hpp + * @ingroup LinearSolvers + * @author Nai-Yuan Chiang , LLNL + */ #ifndef HIOP_LINSOLVER_PARDISO #define HIOP_LINSOLVER_PARDISO @@ -57,21 +57,20 @@ #include "hiopLinSolver.hpp" #include "hiopMatrixSparseTriplet.hpp" -namespace hiop { +namespace hiop +{ /* PARDISO prototpye */ extern "C" void pardisoinit_d(void*, int*, int*, int*, double*, int*); -extern "C" void pardiso_d(void*, int*, int*, int*, int*, int*, - double*, int*, int*, int*, int*, int*, - int*, double*, double*, int*, double*); +extern "C" void +pardiso_d(void*, int*, int*, int*, int*, int*, double*, int*, int*, int*, int*, int*, int*, double*, double*, int*, double*); extern "C" void pardiso_chkmatrix_d(int*, int*, double*, int*, int*, int*); extern "C" void pardiso_chkvec_d(int*, int*, double*, int*); -extern "C" void pardiso_printstats_d(int*, int*, double*, int*, int *, int*, double*, int*); +extern "C" void pardiso_printstats_d(int*, int*, double*, int*, int*, int*, double*, int*); extern "C" void pardiso_get_schur_d(void*, int*, int*, int*, double*, int*, int*); - /** Wrapper for PARDISO */ -class hiopLinSolverSymSparsePARDISO: public hiopLinSolverSymSparse +class hiopLinSolverSymSparsePARDISO : public hiopLinSolverSymSparse { public: hiopLinSolverSymSparsePARDISO(const int& n, const int& nnz, hiopNlpFormulation* nlp); @@ -84,51 +83,48 @@ class hiopLinSolverSymSparsePARDISO: public hiopLinSolverSymSparse /** solves a linear system. * param 'x' is on entry the right hand side(s) of the system to be solved. On * exit is contains the solution(s). */ - bool solve ( hiopVector& x_ ); + bool solve(hiopVector& x_); private: + int m_; // number of rows of the whole matrix + int n_; // number of cols of the whole matrix + int nnz_; // number of nonzeros in the matrix - int m_; // number of rows of the whole matrix - int n_; // number of cols of the whole matrix - int nnz_; // number of nonzeros in the matrix - - int *kRowPtr_; // row pointer for nonzeros - int *jCol_; // column indexes for nonzeros - double *kVal_; // storage for sparse matrix + int* kRowPtr_; // row pointer for nonzeros + int* jCol_; // column indexes for nonzeros + double* kVal_; // storage for sparse matrix - int *index_covert_CSR2Triplet_; - int *index_covert_extra_Diag2CSR_; + int* index_covert_CSR2Triplet_; + int* index_covert_extra_Diag2CSR_; // pardiso parameters - void *pt_[64]; + void* pt_[64]; int iparm_[64]; int num_threads_; double dparm_[64]; - int maxfct_; //max number of fact having same sparsity pattern to keep at the same time - int mnum_; //actual matrix (as in index from 1 to maxfct) - int msglvl_; //messaging level + int maxfct_; // max number of fact having same sparsity pattern to keep at the same time + int mnum_; // actual matrix (as in index from 1 to maxfct) + int msglvl_; // messaging level int mtype_; int solver_; int error_; bool is_initialized_; /* temporary storage for the factorization process */ - double* nvec_; //temporary vec - double* sol_; //solution - int sz_sol_; //allocated size - + double* nvec_; // temporary vec + double* sol_; // solution + int sz_sol_; // allocated size + hiopVectorPar* rhs_; public: - /** called the very first time a matrix is factored. Allocates space * for the factorization and performs ordering */ virtual void firstCall(); - }; -class hiopLinSolverNonSymSparsePARDISO: public hiopLinSolverNonSymSparse +class hiopLinSolverNonSymSparsePARDISO : public hiopLinSolverNonSymSparse { public: hiopLinSolverNonSymSparsePARDISO(const int& n, const int& nnz, hiopNlpFormulation* nlp); @@ -142,53 +138,49 @@ class hiopLinSolverNonSymSparsePARDISO: public hiopLinSolverNonSymSparse /** solves a linear system. * param 'x' is on entry the right hand side(s) of the system to be solved. On * exit is contains the solution(s). */ - bool solve ( hiopVector& x_ ); + bool solve(hiopVector& x_); private: + int m_; // number of rows of the whole matrix + int n_; // number of cols of the whole matrix + int nnz_; // number of nonzeros in the matrix - int m_; // number of rows of the whole matrix - int n_; // number of cols of the whole matrix - int nnz_; // number of nonzeros in the matrix + int* kRowPtr_; // row pointer for nonzeros + int* jCol_; // column indexes for nonzeros + double* kVal_; // storage for sparse matrix - int *kRowPtr_; // row pointer for nonzeros - int *jCol_; // column indexes for nonzeros - double *kVal_; // storage for sparse matrix - - int *index_covert_CSR2Triplet_; - int *index_covert_extra_Diag2CSR_; - std::unordered_map extra_diag_nnz_map; + int* index_covert_CSR2Triplet_; + int* index_covert_extra_Diag2CSR_; + std::unordered_map extra_diag_nnz_map; // pardiso parameters - void *pt_[64]; + void* pt_[64]; int iparm_[64]; int num_threads_; double dparm_[64]; - int maxfct_; //max number of fact having same sparsity pattern to keep at the same time - int mnum_; //actual matrix (as in index from 1 to maxfct) - int msglvl_; //messaging level + int maxfct_; // max number of fact having same sparsity pattern to keep at the same time + int mnum_; // actual matrix (as in index from 1 to maxfct) + int msglvl_; // messaging level int mtype_; int solver_; int error_; bool is_initialized_; /* temporary storage for the factorization process */ - double* nvec_; //temporary vec - double* sol_; //solution - int sz_sol_; //allocated size - + double* nvec_; // temporary vec + double* sol_; // solution + int sz_sol_; // allocated size + hiopVectorPar* rhs_; public: - /** called the very first time a matrix is factored. Allocates space * for the factorization and performs ordering */ void firstCall(); -//friend class hiopLinSolverSymSparsePARDISO; - + // friend class hiopLinSolverSymSparsePARDISO; }; - -} // end namespace +} // namespace hiop #endif diff --git a/src/LinAlg/hiopLinSolverSparseReSolve.cpp b/src/LinAlg/hiopLinSolverSparseReSolve.cpp index 5ae3c6d0d..312c9a099 100644 --- a/src/LinAlg/hiopLinSolverSparseReSolve.cpp +++ b/src/LinAlg/hiopLinSolverSparseReSolve.cpp @@ -67,502 +67,493 @@ #define checkCudaErrors(val) hiopCheckCudaError((val), __FILE__, __LINE__) - /** * @brief Map elements of one array to the other - * + * * for(int k = 0; k < nnz_; k++) { * vals[k] = M_->M()[index_convert_CSR2Triplet_host_[k]]; * } - * + * */ -template -__global__ void -mapArraysKernel(T* dst, const T* src, const I* mapidx, I n) +template +__global__ void mapArraysKernel(T* dst, const T* src, const I* mapidx, I n) { I tid = blockDim.x * blockIdx.x + threadIdx.x; - if (tid < n) - { - dst[tid] = src[ mapidx[tid] ]; + if(tid < n) { + dst[tid] = src[mapidx[tid]]; } } /** * @brief Map elements of one array to the other - * + * * for(int i = 0; i < n_; i++) { * if(index_convert_extra_Diag2CSR_host_[i] != -1) * vals[index_convert_extra_Diag2CSR_host_[i]] += M_->M()[M_->numberOfNonzeros() - n_ + i]; * } - * + * */ -template -__global__ void -addToArrayKernel(T* dst, const T* src, const I* mapidx, I n, I nnz) +template +__global__ void addToArrayKernel(T* dst, const T* src, const I* mapidx, I n, I nnz) { I tid = blockDim.x * blockIdx.x + threadIdx.x; - if (tid < n) - { - if(mapidx[tid] != -1) - dst[ mapidx[tid] ] += src[nnz - n + tid]; + if(tid < n) { + if(mapidx[tid] != -1) dst[mapidx[tid]] += src[nnz - n + tid]; } } - namespace hiop { - hiopLinSolverSymSparseReSolve::hiopLinSolverSymSparseReSolve(const int& n, - const int& nnz, - hiopNlpFormulation* nlp) - : hiopLinSolverSymSparse(n, nnz, nlp), - index_convert_CSR2Triplet_host_{ nullptr }, - index_convert_extra_Diag2CSR_host_{ nullptr }, - index_convert_CSR2Triplet_device_{ nullptr }, - index_convert_extra_Diag2CSR_device_{ nullptr }, - n_{ n }, - nnz_{ 0 }, - factorizationSetupSucc_{ 0 }, - is_first_call_{ true } - { - // Create ReSolve solver and allocate rhs temporary storage - solver_ = new ReSolve::RefactorizationSolver(n); - - // If memory space is device, allocate host mirror for HiOp's KKT matrix in triplet format - if(nlp_->options->GetString("mem_space") == "device") { - M_host_ = LinearAlgebraFactory::create_matrix_sparse("default", n, n, nnz); - } - - // Set verbosity of ReSolve based on HiOp verbosity - if(nlp_->options->GetInteger("verbosity_level") >= 3) { - solver_->set_silent_output(false); - } - - // Select matrix ordering - int ordering = 1; - std::string ord = nlp_->options->GetString("linear_solver_sparse_ordering"); - if(ord == "amd_ssparse") { - ordering = 0; - } else if(ord == "colamd_ssparse") { - ordering = 1; - } else { - nlp_->log->printf(hovWarning, - "Ordering %s not compatible with cuSOLVER LU, using default ...\n", - ord.c_str()); - ordering = 1; - } - solver_->ordering() = ordering; - nlp_->log->printf(hovSummary, "Ordering: %d\n", solver_->ordering()); - - // Select factorization - std::string fact; - fact = nlp_->options->GetString("resolve_factorization"); - if(fact != "klu") { - nlp_->log->printf(hovWarning, - "Factorization %s not compatible with cuSOLVER LU, using default ...\n", - fact.c_str()); - fact = "klu"; - } - solver_->fact() = fact; - nlp_->log->printf(hovSummary, "Factorization: %s\n", solver_->fact().c_str()); - - // Select refactorization - std::string refact; - refact = nlp_->options->GetString("resolve_refactorization"); - if(refact != "glu" && refact != "rf") { - nlp_->log->printf(hovWarning, - "Refactorization %s not compatible with cuSOLVER LU, using default ...\n", - refact.c_str()); - refact = "glu"; - } - solver_->refact() = refact; - nlp_->log->printf(hovSummary, "Refactorization: %s\n", solver_->refact().c_str()); - - // by default, dont use iterative refinement - std::string use_ir; - int maxit_test = nlp_->options->GetInteger("ir_inner_maxit"); - - if ((maxit_test < 0) || (maxit_test > 1000)){ - nlp_->log->printf(hovWarning, - "Wrong maxit value: %d. Use int maxit value between 0 and 1000. Setting default (50) ...\n", - maxit_test); - maxit_test = 50; - } - use_ir = "no"; - if(maxit_test > 0){ - use_ir = "yes"; - solver_->enable_iterative_refinement(); - solver_->ir()->maxit() = maxit_test; - } - if(use_ir == "yes") { - if((refact == "rf")) { - - solver_->ir()->restart() = nlp_->options->GetInteger("ir_inner_restart"); - - if ((solver_->ir()->restart() <0) || (solver_->ir()->restart() >100)){ - nlp_->log->printf(hovWarning, - "Wrong restart value: %d. Use int restart value between 1 and 100. Setting default (20) ...\n", - solver_->ir()->restart()); - solver_->ir()->restart() = 20; - } - - - solver_->ir()->tol() = nlp_->options->GetNumeric("ir_inner_tol"); - if ((solver_->ir()->tol() <0) || (solver_->ir()->tol() >1)){ - nlp_->log->printf(hovWarning, - "Wrong tol value: %e. Use double tol value between 0 and 1. Setting default (1e-12) ...\n", - solver_->ir()->tol()); - solver_->ir()->tol() = 1e-12; - } - solver_->ir()->orth_option() = nlp_->options->GetString("ir_inner_gs_scheme"); - - /* 0) "Standard" GMRES and FGMRES (Saad and Schultz, 1986, Saad, 1992) use Modified Gram-Schmidt ("mgs") to keep the Krylov vectors orthogonal. - * Modified Gram-Schmidt requires k synchronization (due to inner products) in iteration k and this becomes a scaling bottleneck for - * GPU-accelerated implementation and it becomes even more pronouced for MPI+GPU-acceleration. - * Modified Gram-Schidt can be replaced by a different scheme. - * - * 1) One can use Classical Gram-Schmidt ("cgs") which is numerically unstable or reorthogonalized Classical Gram-Schmidt ("cgs2"), which - * is numerically stable and requires 3 synchrnozations and each iteration. Reorthogonalized Classical Gram-Schmidt makes two passes of - * Classical Gram-Schmidt. And two passes are enough to get vectors orthogonal to machine precision (Bjorck 1967). - * - * 2) An alternative is a low-sych version (Swirydowicz and Thomas, 2020), which reformulates Modified Gram-Schmidt to be a (very small) triangular solve. - * It requires extra storage for the matrix used in triangular solve (kxk at iteration k), but only two sycnhronizations are needed per iteration. - * The inner producats are performed in bulk, which quarantees better GPU utilization. The second synchronization comes from normalizing the vector and - * can be eliminated if the norm is postponed to the next iteration, but also makes code more complicated. This is why we use two-synch method ("mgs_two_synch") - * - * 3) A recently submitted paper by Stephen Thomas (Thomas 202*) takes the triangular solve idea further and uses a different approximation for - * the inverse of a triangular matrix. It requires two (very small) triangular solves and two sychroniztions (if the norm is NOT delayed). It also guarantees - * that the vectors are orthogonal to the machine epsilon, as in cgs2. Since Stephen's paper is named "post modern GMRES", we call this Gram-Schmidt scheme "mgs_pm". - */ - if(solver_->ir()->orth_option() != "mgs" && solver_->ir()->orth_option() != "cgs2" && solver_->ir()->orth_option() != "mgs_two_synch" && solver_->ir()->orth_option() != "mgs_pm") { - nlp_->log->printf(hovWarning, - "mgs option : %s is wrong. Use 'mgs', 'cgs2', 'mgs_two_synch' or 'mgs_pm'. Switching to default (mgs) ...\n", - use_ir.c_str()); - solver_->ir()->orth_option() = "mgs"; - } - - solver_->ir()->conv_cond() = nlp_->options->GetInteger("ir_inner_conv_cond"); - - if ((solver_->ir()->conv_cond() <0) || (solver_->ir()->conv_cond() >2)){ - nlp_->log->printf(hovWarning, - "Wrong IR convergence condition: %d. Use int value: 0, 1 or 2. Setting default (0) ...\n", - solver_->ir()->conv_cond()); - solver_->ir()->conv_cond() = 0; - } - - } else { - nlp_->log->printf(hovWarning, - "Currently, inner iterative refinement works ONLY with cuSolverRf ... \n"); - use_ir = "no"; - } - } - solver_->use_ir() = use_ir; - nlp_->log->printf(hovSummary, "Use IR: %s\n", solver_->use_ir().c_str()); - } // constructor +hiopLinSolverSymSparseReSolve::hiopLinSolverSymSparseReSolve(const int& n, const int& nnz, hiopNlpFormulation* nlp) + : hiopLinSolverSymSparse(n, nnz, nlp), + index_convert_CSR2Triplet_host_{nullptr}, + index_convert_extra_Diag2CSR_host_{nullptr}, + index_convert_CSR2Triplet_device_{nullptr}, + index_convert_extra_Diag2CSR_device_{nullptr}, + n_{n}, + nnz_{0}, + factorizationSetupSucc_{0}, + is_first_call_{true} +{ + // Create ReSolve solver and allocate rhs temporary storage + solver_ = new ReSolve::RefactorizationSolver(n); - hiopLinSolverSymSparseReSolve::~hiopLinSolverSymSparseReSolve() - { - delete solver_; + // If memory space is device, allocate host mirror for HiOp's KKT matrix in triplet format + if(nlp_->options->GetString("mem_space") == "device") { + M_host_ = LinearAlgebraFactory::create_matrix_sparse("default", n, n, nnz); + } - // If memory space is device, delete allocated host mirrors - if(nlp_->options->GetString("mem_space") == "device") { - delete M_host_; - } + // Set verbosity of ReSolve based on HiOp verbosity + if(nlp_->options->GetInteger("verbosity_level") >= 3) { + solver_->set_silent_output(false); + } - // Delete CSR <--> triplet mappings - delete[] index_convert_CSR2Triplet_host_; - delete[] index_convert_extra_Diag2CSR_host_; - checkCudaErrors(cudaFree(index_convert_CSR2Triplet_device_)); - checkCudaErrors(cudaFree(index_convert_extra_Diag2CSR_device_)); + // Select matrix ordering + int ordering = 1; + std::string ord = nlp_->options->GetString("linear_solver_sparse_ordering"); + if(ord == "amd_ssparse") { + ordering = 0; + } else if(ord == "colamd_ssparse") { + ordering = 1; + } else { + nlp_->log->printf(hovWarning, "Ordering %s not compatible with cuSOLVER LU, using default ...\n", ord.c_str()); + ordering = 1; + } + solver_->ordering() = ordering; + nlp_->log->printf(hovSummary, "Ordering: %d\n", solver_->ordering()); + + // Select factorization + std::string fact; + fact = nlp_->options->GetString("resolve_factorization"); + if(fact != "klu") { + nlp_->log->printf(hovWarning, "Factorization %s not compatible with cuSOLVER LU, using default ...\n", fact.c_str()); + fact = "klu"; } + solver_->fact() = fact; + nlp_->log->printf(hovSummary, "Factorization: %s\n", solver_->fact().c_str()); + + // Select refactorization + std::string refact; + refact = nlp_->options->GetString("resolve_refactorization"); + if(refact != "glu" && refact != "rf") { + nlp_->log->printf(hovWarning, "Refactorization %s not compatible with cuSOLVER LU, using default ...\n", refact.c_str()); + refact = "glu"; + } + solver_->refact() = refact; + nlp_->log->printf(hovSummary, "Refactorization: %s\n", solver_->refact().c_str()); + + // by default, dont use iterative refinement + std::string use_ir; + int maxit_test = nlp_->options->GetInteger("ir_inner_maxit"); + + if((maxit_test < 0) || (maxit_test > 1000)) { + nlp_->log->printf(hovWarning, + "Wrong maxit value: %d. Use int maxit value between 0 and 1000. Setting default (50) ...\n", + maxit_test); + maxit_test = 50; + } + use_ir = "no"; + if(maxit_test > 0) { + use_ir = "yes"; + solver_->enable_iterative_refinement(); + solver_->ir()->maxit() = maxit_test; + } + if(use_ir == "yes") { + if((refact == "rf")) { + solver_->ir()->restart() = nlp_->options->GetInteger("ir_inner_restart"); + + if((solver_->ir()->restart() < 0) || (solver_->ir()->restart() > 100)) { + nlp_->log->printf(hovWarning, + "Wrong restart value: %d. Use int restart value between 1 and 100. Setting default (20) ...\n", + solver_->ir()->restart()); + solver_->ir()->restart() = 20; + } - int hiopLinSolverSymSparseReSolve::matrixChanged() - { - assert(n_ == M_->n() && M_->n() == M_->m()); - assert(n_ > 0); + solver_->ir()->tol() = nlp_->options->GetNumeric("ir_inner_tol"); + if((solver_->ir()->tol() < 0) || (solver_->ir()->tol() > 1)) { + nlp_->log->printf(hovWarning, + "Wrong tol value: %e. Use double tol value between 0 and 1. Setting default (1e-12) ...\n", + solver_->ir()->tol()); + solver_->ir()->tol() = 1e-12; + } + solver_->ir()->orth_option() = nlp_->options->GetString("ir_inner_gs_scheme"); + + /* 0) "Standard" GMRES and FGMRES (Saad and Schultz, 1986, Saad, 1992) use Modified Gram-Schmidt ("mgs") to keep the + * Krylov vectors orthogonal. Modified Gram-Schmidt requires k synchronization (due to inner products) in iteration k + * and this becomes a scaling bottleneck for GPU-accelerated implementation and it becomes even more pronouced for + * MPI+GPU-acceleration. Modified Gram-Schidt can be replaced by a different scheme. + * + * 1) One can use Classical Gram-Schmidt ("cgs") which is numerically unstable or reorthogonalized Classical + * Gram-Schmidt ("cgs2"), which is numerically stable and requires 3 synchrnozations and each iteration. + * Reorthogonalized Classical Gram-Schmidt makes two passes of Classical Gram-Schmidt. And two passes are enough to get + * vectors orthogonal to machine precision (Bjorck 1967). + * + * 2) An alternative is a low-sych version (Swirydowicz and Thomas, 2020), which reformulates Modified Gram-Schmidt to + * be a (very small) triangular solve. It requires extra storage for the matrix used in triangular solve (kxk at + * iteration k), but only two sycnhronizations are needed per iteration. The inner producats are performed in bulk, + * which quarantees better GPU utilization. The second synchronization comes from normalizing the vector and can be + * eliminated if the norm is postponed to the next iteration, but also makes code more complicated. This is why we use + * two-synch method ("mgs_two_synch") + * + * 3) A recently submitted paper by Stephen Thomas (Thomas 202*) takes the triangular solve idea further and uses a + * different approximation for the inverse of a triangular matrix. It requires two (very small) triangular solves and + * two sychroniztions (if the norm is NOT delayed). It also guarantees that the vectors are orthogonal to the machine + * epsilon, as in cgs2. Since Stephen's paper is named "post modern GMRES", we call this Gram-Schmidt scheme "mgs_pm". + */ + if(solver_->ir()->orth_option() != "mgs" && solver_->ir()->orth_option() != "cgs2" && + solver_->ir()->orth_option() != "mgs_two_synch" && solver_->ir()->orth_option() != "mgs_pm") { + nlp_->log->printf( + hovWarning, + "mgs option : %s is wrong. Use 'mgs', 'cgs2', 'mgs_two_synch' or 'mgs_pm'. Switching to default (mgs) ...\n", + use_ir.c_str()); + solver_->ir()->orth_option() = "mgs"; + } - nlp_->runStats.linsolv.tmFactTime.start(); + solver_->ir()->conv_cond() = nlp_->options->GetInteger("ir_inner_conv_cond"); - if(is_first_call_) { - firstCall(); - } else { - update_matrix_values(); - } - - if(factorizationSetupSucc_ == 0) { - int retval = solver_->factorize(); - if(retval == -1) { - nlp_->log->printf(hovWarning, "Numeric klu factorization failed. Regularizing ...\n"); - // This is not a catastrophic failure - // The matrix is singular so return -1 to regularaize! - return -1; - } else { // Numeric was succesfull so now can set up - solver_->setup_refactorization(); - factorizationSetupSucc_ = 1; - nlp_->log->printf(hovScalars, "Numeric klu factorization succesful! \n"); + if((solver_->ir()->conv_cond() < 0) || (solver_->ir()->conv_cond() > 2)) { + nlp_->log->printf(hovWarning, + "Wrong IR convergence condition: %d. Use int value: 0, 1 or 2. Setting default (0) ...\n", + solver_->ir()->conv_cond()); + solver_->ir()->conv_cond() = 0; } - } else { // factorizationSetupSucc_ == 1 - solver_->refactorize(); - } - nlp_->runStats.linsolv.tmFactTime.stop(); - return 0; + } else { + nlp_->log->printf(hovWarning, "Currently, inner iterative refinement works ONLY with cuSolverRf ... \n"); + use_ir = "no"; + } } + solver_->use_ir() = use_ir; + nlp_->log->printf(hovSummary, "Use IR: %s\n", solver_->use_ir().c_str()); +} // constructor - bool hiopLinSolverSymSparseReSolve::solve(hiopVector& x) - { - assert(n_ == M_->n() && M_->n() == M_->m()); - assert(n_ > 0); - assert(x.get_size() == M_->n()); +hiopLinSolverSymSparseReSolve::~hiopLinSolverSymSparseReSolve() +{ + delete solver_; - nlp_->runStats.linsolv.tmTriuSolves.start(); + // If memory space is device, delete allocated host mirrors + if(nlp_->options->GetString("mem_space") == "device") { + delete M_host_; + } - // Set IR tolerance - double ir_tol = nlp_->options->GetNumeric("ir_inner_tol"); + // Delete CSR <--> triplet mappings + delete[] index_convert_CSR2Triplet_host_; + delete[] index_convert_extra_Diag2CSR_host_; + checkCudaErrors(cudaFree(index_convert_CSR2Triplet_device_)); + checkCudaErrors(cudaFree(index_convert_extra_Diag2CSR_device_)); +} - std::string mem_space = nlp_->options->GetString("mem_space"); - double* dx = x.local_data(); +int hiopLinSolverSymSparseReSolve::matrixChanged() +{ + assert(n_ == M_->n() && M_->n() == M_->m()); + assert(n_ > 0); - bool retval = solver_->triangular_solve(dx, ir_tol, mem_space); - if(!retval) { - nlp_->log->printf(hovError, // catastrophic failure - "ReSolve triangular solver failed\n"); - } + nlp_->runStats.linsolv.tmFactTime.start(); - nlp_->runStats.linsolv.tmTriuSolves.stop(); - return true; + if(is_first_call_) { + firstCall(); + } else { + update_matrix_values(); } - void hiopLinSolverSymSparseReSolve::firstCall() - { - assert(n_ == M_->n() && M_->n() == M_->m()); - assert(n_ > 0); - - // If the matrix is on device, copy it to the host mirror - std::string mem_space = nlp_->options->GetString("mem_space"); - if(mem_space == "device") { - checkCudaErrors(cudaMemcpy(M_host_->M(), M_->M(), sizeof(double) * M_->numberOfNonzeros(), cudaMemcpyDeviceToHost)); - checkCudaErrors(cudaMemcpy(M_host_->i_row(), M_->i_row(), sizeof(index_type) * M_->numberOfNonzeros(), cudaMemcpyDeviceToHost)); - checkCudaErrors(cudaMemcpy(M_host_->j_col(), M_->j_col(), sizeof(index_type) * M_->numberOfNonzeros(), cudaMemcpyDeviceToHost)); - } - - // Transfer triplet to CSR form - - // Allocate row pointers and compute number of nonzeros. - solver_->mat_A_csr()->allocate_size(n_); - compute_nnz(); - solver_->set_nnz(nnz_); - - // Allocate column indices and matrix values - solver_->mat_A_csr()->allocate_nnz(nnz_); - - // Set column indices and matrix values. - set_csr_indices_values(); - - // Copy matrix to device - solver_->mat_A_csr()->update_from_host_mirror(); - - if(solver_->use_ir() == "yes") { - solver_->setup_iterative_refinement_matrix(n_, nnz_); + if(factorizationSetupSucc_ == 0) { + int retval = solver_->factorize(); + if(retval == -1) { + nlp_->log->printf(hovWarning, "Numeric klu factorization failed. Regularizing ...\n"); + // This is not a catastrophic failure + // The matrix is singular so return -1 to regularaize! + return -1; + } else { // Numeric was succesfull so now can set up + solver_->setup_refactorization(); + factorizationSetupSucc_ = 1; + nlp_->log->printf(hovScalars, "Numeric klu factorization succesful! \n"); } - /* - * initialize matrix factorization - */ - if(solver_->setup_factorization() < 0) { - nlp_->log->printf(hovError, // catastrophic failure - "Symbolic factorization failed!\n"); - return; - }; - is_first_call_ = false; + } else { // factorizationSetupSucc_ == 1 + solver_->refactorize(); } - /// nnz_ is number of nonzeros in CSR matrix - /// M_->numberOfNonzeros() is number of zeros in symmetric triplet matrix - void hiopLinSolverSymSparseReSolve::update_matrix_values() - { - std::string mem_space = nlp_->options->GetString("mem_space"); - if(mem_space == "device") { + nlp_->runStats.linsolv.tmFactTime.stop(); + return 0; +} - double* csr_vals = solver_->mat_A_csr()->get_vals(); - double* coo_vals = M_->M(); - int coo_nnz = M_->numberOfNonzeros(); +bool hiopLinSolverSymSparseReSolve::solve(hiopVector& x) +{ + assert(n_ == M_->n() && M_->n() == M_->m()); + assert(n_ > 0); + assert(x.get_size() == M_->n()); - const int blocksize = 512; - int gridsize = (nnz_ + blocksize - 1) / blocksize; - mapArraysKernel<<< gridsize, blocksize >>>(csr_vals, coo_vals, index_convert_CSR2Triplet_device_, nnz_); + nlp_->runStats.linsolv.tmTriuSolves.start(); - gridsize = (n_ + blocksize - 1) / blocksize; - addToArrayKernel<<< gridsize, blocksize>>>(csr_vals, coo_vals, index_convert_extra_Diag2CSR_device_, n_, coo_nnz); + // Set IR tolerance + double ir_tol = nlp_->options->GetNumeric("ir_inner_tol"); - // If factorization was not successful, we need a copy of values on the host - if(factorizationSetupSucc_ == 0) - checkCudaErrors(cudaMemcpy(solver_->mat_A_csr()->get_vals_host(), solver_->mat_A_csr()->get_vals(), sizeof(double) * nnz_, cudaMemcpyDeviceToHost)); + std::string mem_space = nlp_->options->GetString("mem_space"); + double* dx = x.local_data(); - } else { - // KKT matrix is on the host - double* vals = solver_->mat_A_csr()->get_vals_host(); - // update matrix - for(int k = 0; k < nnz_; k++) { - vals[k] = M_->M()[index_convert_CSR2Triplet_host_[k]]; - } - for(int i = 0; i < n_; i++) { - if(index_convert_extra_Diag2CSR_host_[i] != -1) - vals[index_convert_extra_Diag2CSR_host_[i]] += M_->M()[M_->numberOfNonzeros() - n_ + i]; - } - checkCudaErrors(cudaMemcpy(solver_->mat_A_csr()->get_vals(), solver_->mat_A_csr()->get_vals_host(), sizeof(double) * nnz_, cudaMemcpyHostToDevice)); - } + bool retval = solver_->triangular_solve(dx, ir_tol, mem_space); + if(!retval) { + nlp_->log->printf(hovError, // catastrophic failure + "ReSolve triangular solver failed\n"); } - /// @pre Data is either on the host or the host mirror is synced with the device - void hiopLinSolverSymSparseReSolve::compute_nnz() - { - // - // compute nnz in each row - // - int* row_ptr = solver_->mat_A_csr()->get_irows_host(); - - // If the data is on device, fetch it from the host mirror - hiopMatrixSparse* M_host = nullptr; - std::string mem_space = nlp_->options->GetString("mem_space"); - if(mem_space == "host" || mem_space == "default") { - M_host = M_; - } else if(mem_space == "device") { - M_host = M_host_; - } else { - nlp_->log->printf(hovError, "Memory space %s incompatible with ReSolve.\n", mem_space.c_str()); - } + nlp_->runStats.linsolv.tmTriuSolves.stop(); + return true; +} +void hiopLinSolverSymSparseReSolve::firstCall() +{ + assert(n_ == M_->n() && M_->n() == M_->m()); + assert(n_ > 0); + + // If the matrix is on device, copy it to the host mirror + std::string mem_space = nlp_->options->GetString("mem_space"); + if(mem_space == "device") { + checkCudaErrors(cudaMemcpy(M_host_->M(), M_->M(), sizeof(double) * M_->numberOfNonzeros(), cudaMemcpyDeviceToHost)); + checkCudaErrors( + cudaMemcpy(M_host_->i_row(), M_->i_row(), sizeof(index_type) * M_->numberOfNonzeros(), cudaMemcpyDeviceToHost)); + checkCudaErrors( + cudaMemcpy(M_host_->j_col(), M_->j_col(), sizeof(index_type) * M_->numberOfNonzeros(), cudaMemcpyDeviceToHost)); + } - // off-diagonal part - row_ptr[0] = 0; - for(int k = 0; k < M_host->numberOfNonzeros() - n_; k++) { - if(M_host->i_row()[k] != M_host->j_col()[k]) { - row_ptr[M_host->i_row()[k] + 1]++; - row_ptr[M_host->j_col()[k] + 1]++; - nnz_ += 2; - } + // Transfer triplet to CSR form + + // Allocate row pointers and compute number of nonzeros. + solver_->mat_A_csr()->allocate_size(n_); + compute_nnz(); + solver_->set_nnz(nnz_); + + // Allocate column indices and matrix values + solver_->mat_A_csr()->allocate_nnz(nnz_); + + // Set column indices and matrix values. + set_csr_indices_values(); + + // Copy matrix to device + solver_->mat_A_csr()->update_from_host_mirror(); + + if(solver_->use_ir() == "yes") { + solver_->setup_iterative_refinement_matrix(n_, nnz_); + } + /* + * initialize matrix factorization + */ + if(solver_->setup_factorization() < 0) { + nlp_->log->printf(hovError, // catastrophic failure + "Symbolic factorization failed!\n"); + return; + }; + is_first_call_ = false; +} + +/// nnz_ is number of nonzeros in CSR matrix +/// M_->numberOfNonzeros() is number of zeros in symmetric triplet matrix +void hiopLinSolverSymSparseReSolve::update_matrix_values() +{ + std::string mem_space = nlp_->options->GetString("mem_space"); + if(mem_space == "device") { + double* csr_vals = solver_->mat_A_csr()->get_vals(); + double* coo_vals = M_->M(); + int coo_nnz = M_->numberOfNonzeros(); + + const int blocksize = 512; + int gridsize = (nnz_ + blocksize - 1) / blocksize; + mapArraysKernel<<>>(csr_vals, coo_vals, index_convert_CSR2Triplet_device_, nnz_); + + gridsize = (n_ + blocksize - 1) / blocksize; + addToArrayKernel + <<>>(csr_vals, coo_vals, index_convert_extra_Diag2CSR_device_, n_, coo_nnz); + + // If factorization was not successful, we need a copy of values on the host + if(factorizationSetupSucc_ == 0) + checkCudaErrors(cudaMemcpy(solver_->mat_A_csr()->get_vals_host(), + solver_->mat_A_csr()->get_vals(), + sizeof(double) * nnz_, + cudaMemcpyDeviceToHost)); + + } else { + // KKT matrix is on the host + double* vals = solver_->mat_A_csr()->get_vals_host(); + // update matrix + for(int k = 0; k < nnz_; k++) { + vals[k] = M_->M()[index_convert_CSR2Triplet_host_[k]]; } - // diagonal part for(int i = 0; i < n_; i++) { - row_ptr[i + 1]++; - nnz_ += 1; - } - // get correct row ptr index - for(int i = 1; i < n_ + 1; i++) { - row_ptr[i] += row_ptr[i - 1]; + if(index_convert_extra_Diag2CSR_host_[i] != -1) + vals[index_convert_extra_Diag2CSR_host_[i]] += M_->M()[M_->numberOfNonzeros() - n_ + i]; } - assert(nnz_ == row_ptr[n_]); + checkCudaErrors(cudaMemcpy(solver_->mat_A_csr()->get_vals(), + solver_->mat_A_csr()->get_vals_host(), + sizeof(double) * nnz_, + cudaMemcpyHostToDevice)); } +} - /// @pre Data is either on the host or the host mirror is synced with the device - void hiopLinSolverSymSparseReSolve::set_csr_indices_values() - { - // If the data is on device, fetch it from the host mirror - hiopMatrixSparse* M_host = nullptr; - std::string mem_space = nlp_->options->GetString("mem_space"); - if(mem_space == "host" || mem_space == "default") { - M_host = M_; - } else if(mem_space == "device") { - M_host = M_host_; - } else { - nlp_->log->printf(hovError, "Memory space %s incompatible with ReSolve.\n", mem_space.c_str()); +/// @pre Data is either on the host or the host mirror is synced with the device +void hiopLinSolverSymSparseReSolve::compute_nnz() +{ + // + // compute nnz in each row + // + int* row_ptr = solver_->mat_A_csr()->get_irows_host(); + + // If the data is on device, fetch it from the host mirror + hiopMatrixSparse* M_host = nullptr; + std::string mem_space = nlp_->options->GetString("mem_space"); + if(mem_space == "host" || mem_space == "default") { + M_host = M_; + } else if(mem_space == "device") { + M_host = M_host_; + } else { + nlp_->log->printf(hovError, "Memory space %s incompatible with ReSolve.\n", mem_space.c_str()); + } + + // off-diagonal part + row_ptr[0] = 0; + for(int k = 0; k < M_host->numberOfNonzeros() - n_; k++) { + if(M_host->i_row()[k] != M_host->j_col()[k]) { + row_ptr[M_host->i_row()[k] + 1]++; + row_ptr[M_host->j_col()[k] + 1]++; + nnz_ += 2; } + } + // diagonal part + for(int i = 0; i < n_; i++) { + row_ptr[i + 1]++; + nnz_ += 1; + } + // get correct row ptr index + for(int i = 1; i < n_ + 1; i++) { + row_ptr[i] += row_ptr[i - 1]; + } + assert(nnz_ == row_ptr[n_]); +} - // - // set correct col index and value - // - const int* row_ptr = solver_->mat_A_csr()->get_irows_host(); - int* col_idx = solver_->mat_A_csr()->get_jcols_host(); - double* vals = solver_->mat_A_csr()->get_vals_host(); +/// @pre Data is either on the host or the host mirror is synced with the device +void hiopLinSolverSymSparseReSolve::set_csr_indices_values() +{ + // If the data is on device, fetch it from the host mirror + hiopMatrixSparse* M_host = nullptr; + std::string mem_space = nlp_->options->GetString("mem_space"); + if(mem_space == "host" || mem_space == "default") { + M_host = M_; + } else if(mem_space == "device") { + M_host = M_host_; + } else { + nlp_->log->printf(hovError, "Memory space %s incompatible with ReSolve.\n", mem_space.c_str()); + } - index_convert_CSR2Triplet_host_ = new int[nnz_]; - index_convert_extra_Diag2CSR_host_ = new int[n_]; - checkCudaErrors(cudaMalloc(&index_convert_CSR2Triplet_device_, nnz_ * sizeof(int))); - checkCudaErrors(cudaMalloc(&index_convert_extra_Diag2CSR_device_, n_ * sizeof(int))); + // + // set correct col index and value + // + const int* row_ptr = solver_->mat_A_csr()->get_irows_host(); + int* col_idx = solver_->mat_A_csr()->get_jcols_host(); + double* vals = solver_->mat_A_csr()->get_vals_host(); - int* nnz_each_row_tmp = new int[n_]{ 0 }; - int total_nnz_tmp{ 0 }, nnz_tmp{ 0 }, rowID_tmp, colID_tmp; + index_convert_CSR2Triplet_host_ = new int[nnz_]; + index_convert_extra_Diag2CSR_host_ = new int[n_]; + checkCudaErrors(cudaMalloc(&index_convert_CSR2Triplet_device_, nnz_ * sizeof(int))); + checkCudaErrors(cudaMalloc(&index_convert_extra_Diag2CSR_device_, n_ * sizeof(int))); - for(int k = 0; k < n_; k++) { - index_convert_extra_Diag2CSR_host_[k] = -1; - } + int* nnz_each_row_tmp = new int[n_]{0}; + int total_nnz_tmp{0}, nnz_tmp{0}, rowID_tmp, colID_tmp; - for(int k = 0; k < M_host->numberOfNonzeros() - n_; k++) { - rowID_tmp = M_host->i_row()[k]; - colID_tmp = M_host->j_col()[k]; - if(rowID_tmp == colID_tmp) { - nnz_tmp = nnz_each_row_tmp[rowID_tmp] + row_ptr[rowID_tmp]; - col_idx[nnz_tmp] = colID_tmp; - vals[nnz_tmp] = M_host->M()[k]; - index_convert_CSR2Triplet_host_[nnz_tmp] = k; - - vals[nnz_tmp] += M_host->M()[M_host->numberOfNonzeros() - n_ + rowID_tmp]; - index_convert_extra_Diag2CSR_host_[rowID_tmp] = nnz_tmp; - - nnz_each_row_tmp[rowID_tmp]++; - total_nnz_tmp++; - } else { - nnz_tmp = nnz_each_row_tmp[rowID_tmp] + row_ptr[rowID_tmp]; - col_idx[nnz_tmp] = colID_tmp; - vals[nnz_tmp] = M_host->M()[k]; - index_convert_CSR2Triplet_host_[nnz_tmp] = k; - - nnz_tmp = nnz_each_row_tmp[colID_tmp] + row_ptr[colID_tmp]; - col_idx[nnz_tmp] = rowID_tmp; - vals[nnz_tmp] = M_host->M()[k]; - index_convert_CSR2Triplet_host_[nnz_tmp] = k; - - nnz_each_row_tmp[rowID_tmp]++; - nnz_each_row_tmp[colID_tmp]++; - total_nnz_tmp += 2; - } - } - // correct the missing dia_gonal term - for(int i = 0; i < n_; i++) { - if(nnz_each_row_tmp[i] != row_ptr[i + 1] - row_ptr[i]) { - assert(nnz_each_row_tmp[i] == row_ptr[i + 1] - row_ptr[i] - 1); - nnz_tmp = nnz_each_row_tmp[i] + row_ptr[i]; - col_idx[nnz_tmp] = i; - vals[nnz_tmp] = M_host->M()[M_host->numberOfNonzeros() - n_ + i]; - index_convert_CSR2Triplet_host_[nnz_tmp] = M_host->numberOfNonzeros() - n_ + i; - total_nnz_tmp += 1; - - std::vector ind_temp(row_ptr[i + 1] - row_ptr[i]); - std::iota(ind_temp.begin(), ind_temp.end(), 0); - std::sort(ind_temp.begin(), ind_temp.end(), - [&](int a, int b) { - return col_idx[a + row_ptr[i]] < col_idx[b + row_ptr[i]]; - } - ); - - reorder(vals + row_ptr[i], ind_temp, row_ptr[i + 1] - row_ptr[i]); - reorder(index_convert_CSR2Triplet_host_ + row_ptr[i], ind_temp, row_ptr[i + 1] - row_ptr[i]); - std::sort(col_idx + row_ptr[i], col_idx + row_ptr[i + 1]); - } - } - checkCudaErrors(cudaMemcpy(index_convert_CSR2Triplet_device_, index_convert_CSR2Triplet_host_, nnz_ * sizeof(int), cudaMemcpyHostToDevice)); - checkCudaErrors(cudaMemcpy(index_convert_extra_Diag2CSR_device_, index_convert_extra_Diag2CSR_host_, n_ * sizeof(int), cudaMemcpyHostToDevice)); - delete[] nnz_each_row_tmp; + for(int k = 0; k < n_; k++) { + index_convert_extra_Diag2CSR_host_[k] = -1; } - // Error checking utility for CUDA - // KS: might later become part of src/Utils, putting it here for now - template - void hiopLinSolverSymSparseReSolve::hiopCheckCudaError(T result, - const char* const file, - int const line) - { - if(result) { - nlp_->log->printf(hovError, - "CUDA error at %s:%d, error# %d\n", - file, - line, - result); - assert(false); + for(int k = 0; k < M_host->numberOfNonzeros() - n_; k++) { + rowID_tmp = M_host->i_row()[k]; + colID_tmp = M_host->j_col()[k]; + if(rowID_tmp == colID_tmp) { + nnz_tmp = nnz_each_row_tmp[rowID_tmp] + row_ptr[rowID_tmp]; + col_idx[nnz_tmp] = colID_tmp; + vals[nnz_tmp] = M_host->M()[k]; + index_convert_CSR2Triplet_host_[nnz_tmp] = k; + + vals[nnz_tmp] += M_host->M()[M_host->numberOfNonzeros() - n_ + rowID_tmp]; + index_convert_extra_Diag2CSR_host_[rowID_tmp] = nnz_tmp; + + nnz_each_row_tmp[rowID_tmp]++; + total_nnz_tmp++; + } else { + nnz_tmp = nnz_each_row_tmp[rowID_tmp] + row_ptr[rowID_tmp]; + col_idx[nnz_tmp] = colID_tmp; + vals[nnz_tmp] = M_host->M()[k]; + index_convert_CSR2Triplet_host_[nnz_tmp] = k; + + nnz_tmp = nnz_each_row_tmp[colID_tmp] + row_ptr[colID_tmp]; + col_idx[nnz_tmp] = rowID_tmp; + vals[nnz_tmp] = M_host->M()[k]; + index_convert_CSR2Triplet_host_[nnz_tmp] = k; + + nnz_each_row_tmp[rowID_tmp]++; + nnz_each_row_tmp[colID_tmp]++; + total_nnz_tmp += 2; } } + // correct the missing dia_gonal term + for(int i = 0; i < n_; i++) { + if(nnz_each_row_tmp[i] != row_ptr[i + 1] - row_ptr[i]) { + assert(nnz_each_row_tmp[i] == row_ptr[i + 1] - row_ptr[i] - 1); + nnz_tmp = nnz_each_row_tmp[i] + row_ptr[i]; + col_idx[nnz_tmp] = i; + vals[nnz_tmp] = M_host->M()[M_host->numberOfNonzeros() - n_ + i]; + index_convert_CSR2Triplet_host_[nnz_tmp] = M_host->numberOfNonzeros() - n_ + i; + total_nnz_tmp += 1; + + std::vector ind_temp(row_ptr[i + 1] - row_ptr[i]); + std::iota(ind_temp.begin(), ind_temp.end(), 0); + std::sort(ind_temp.begin(), ind_temp.end(), [&](int a, int b) { + return col_idx[a + row_ptr[i]] < col_idx[b + row_ptr[i]]; + }); + + reorder(vals + row_ptr[i], ind_temp, row_ptr[i + 1] - row_ptr[i]); + reorder(index_convert_CSR2Triplet_host_ + row_ptr[i], ind_temp, row_ptr[i + 1] - row_ptr[i]); + std::sort(col_idx + row_ptr[i], col_idx + row_ptr[i + 1]); + } + } + checkCudaErrors(cudaMemcpy(index_convert_CSR2Triplet_device_, + index_convert_CSR2Triplet_host_, + nnz_ * sizeof(int), + cudaMemcpyHostToDevice)); + checkCudaErrors(cudaMemcpy(index_convert_extra_Diag2CSR_device_, + index_convert_extra_Diag2CSR_host_, + n_ * sizeof(int), + cudaMemcpyHostToDevice)); + delete[] nnz_each_row_tmp; +} +// Error checking utility for CUDA +// KS: might later become part of src/Utils, putting it here for now +template +void hiopLinSolverSymSparseReSolve::hiopCheckCudaError(T result, const char* const file, int const line) +{ + if(result) { + nlp_->log->printf(hovError, "CUDA error at %s:%d, error# %d\n", file, line, result); + assert(false); + } +} -} // namespace hiop - +} // namespace hiop diff --git a/src/LinAlg/hiopLinSolverSparseReSolve.hpp b/src/LinAlg/hiopLinSolverSparseReSolve.hpp index bc33e9835..ae60024ee 100644 --- a/src/LinAlg/hiopLinSolverSparseReSolve.hpp +++ b/src/LinAlg/hiopLinSolverSparseReSolve.hpp @@ -67,18 +67,17 @@ * @ingroup LinearSolvers */ - -namespace ReSolve { - // Forward declaration of inner IR class - class IterativeRefinement; - class MatrixCsr; - class RefactorizationSolver; -} +namespace ReSolve +{ +// Forward declaration of inner IR class +class IterativeRefinement; +class MatrixCsr; +class RefactorizationSolver; +} // namespace ReSolve namespace hiop { - class hiopLinSolverSymSparseReSolve : public hiopLinSolverSymSparse { public: @@ -86,25 +85,24 @@ class hiopLinSolverSymSparseReSolve : public hiopLinSolverSymSparse hiopLinSolverSymSparseReSolve(const int& n, const int& nnz, hiopNlpFormulation* nlp); virtual ~hiopLinSolverSymSparseReSolve(); - /** + /** * @brief Triggers a refactorization of the matrix, if necessary. * Overload from base class. * In this case, KLU (SuiteSparse) is used to refactor */ virtual int matrixChanged(); - /** + /** * @brief Solves a linear system. - * + * * @param x is on entry the right hand side(s) of the system to be solved. - * + * * @post On exit `x` is overwritten with the solution(s). */ - virtual bool solve(hiopVector& x_); + virtual bool solve(hiopVector& x_); /** Multiple rhs not supported yet */ - virtual bool - solve(hiopMatrix& /* x */) + virtual bool solve(hiopMatrix& /* x */) { assert(false && "not yet supported"); return false; @@ -113,9 +111,9 @@ class hiopLinSolverSymSparseReSolve : public hiopLinSolverSymSparse protected: ReSolve::RefactorizationSolver* solver_; - int m_; ///< number of rows of the whole matrix - int n_; ///< number of cols of the whole matrix - int nnz_; ///< number of nonzeros in the matrix + int m_; ///< number of rows of the whole matrix + int n_; ///< number of cols of the whole matrix + int nnz_; ///< number of nonzeros in the matrix // Mapping on the host int* index_convert_CSR2Triplet_host_; @@ -129,7 +127,7 @@ class hiopLinSolverSymSparseReSolve : public hiopLinSolverSymSparse int factorizationSetupSucc_; bool is_first_call_; - hiopMatrixSparse* M_host_{ nullptr }; ///< Host mirror for the KKT matrix + hiopMatrixSparse* M_host_{nullptr}; ///< Host mirror for the KKT matrix /* private function: creates a cuSolver data structure from KLU data * structures. */ @@ -143,7 +141,7 @@ class hiopLinSolverSymSparseReSolve : public hiopLinSolverSymSparse /** * @brief Updates matrix values from HiOp object. - * + * * @note This function maps data from HiOp supplied matrix M_ to data structures * used by the linear solver. */ @@ -154,10 +152,10 @@ class hiopLinSolverSymSparseReSolve : public hiopLinSolverSymSparse /** Function to compute column indices and matrix values arrays */ void set_csr_indices_values(); - template void hiopCheckCudaError(T result, const char* const file, int const line); + template + void hiopCheckCudaError(T result, const char* const file, int const line); }; -} // namespace hiop - +} // namespace hiop #endif diff --git a/src/LinAlg/hiopLinSolverSparseSTRUMPACK.cpp b/src/LinAlg/hiopLinSolverSparseSTRUMPACK.cpp index 84eee8627..f6cff573a 100644 --- a/src/LinAlg/hiopLinSolverSparseSTRUMPACK.cpp +++ b/src/LinAlg/hiopLinSolverSparseSTRUMPACK.cpp @@ -60,7 +60,7 @@ using namespace strumpack; namespace hiop { - hiopLinSolverSymSparseSTRUMPACK::hiopLinSolverSymSparseSTRUMPACK(const int& n, const int& nnz, hiopNlpFormulation* nlp) +hiopLinSolverSymSparseSTRUMPACK::hiopLinSolverSymSparseSTRUMPACK(const int& n, const int& nnz, hiopNlpFormulation* nlp) : hiopLinSolverSymSparse(n, nnz, nlp), kRowPtr_{nullptr}, jCol_{nullptr}, @@ -69,236 +69,229 @@ namespace hiop index_covert_extra_Diag2CSR_{nullptr}, n_{n}, nnz_{0} - {} +{} - hiopLinSolverSymSparseSTRUMPACK::~hiopLinSolverSymSparseSTRUMPACK() - { - if(kRowPtr_) - delete [] kRowPtr_; - if(jCol_) - delete [] jCol_; - if(kVal_) - delete [] kVal_; - if(index_covert_CSR2Triplet_) - delete [] index_covert_CSR2Triplet_; - if(index_covert_extra_Diag2CSR_) - delete [] index_covert_extra_Diag2CSR_; - } +hiopLinSolverSymSparseSTRUMPACK::~hiopLinSolverSymSparseSTRUMPACK() +{ + if(kRowPtr_) delete[] kRowPtr_; + if(jCol_) delete[] jCol_; + if(kVal_) delete[] kVal_; + if(index_covert_CSR2Triplet_) delete[] index_covert_CSR2Triplet_; + if(index_covert_extra_Diag2CSR_) delete[] index_covert_extra_Diag2CSR_; +} + +void hiopLinSolverSymSparseSTRUMPACK::firstCall() +{ + assert(n_ == M_->n() && M_->n() == M_->m()); + assert(n_ > 0); + + kRowPtr_ = new int[n_ + 1]{0}; + + // transfer triplet form to CSR form + // note that input is in lower triangular triplet form. First part is the sparse matrix, and the 2nd part are the + // additional diagonal elememts the 1st part is sorted by row - void hiopLinSolverSymSparseSTRUMPACK::firstCall() { - assert(n_==M_->n() && M_->n()==M_->m()); - assert(n_>0); - - kRowPtr_ = new int[n_+1]{0}; - - // transfer triplet form to CSR form - // note that input is in lower triangular triplet form. First part is the sparse matrix, and the 2nd part are the additional diagonal elememts - // the 1st part is sorted by row - - { - // - // compute nnz in each row - // - // off-diagonal part - kRowPtr_[0]=0; - for(int k=0;knumberOfNonzeros()-n_;k++){ - if(M_->i_row()[k]!=M_->j_col()[k]){ - kRowPtr_[M_->i_row()[k]+1]++; - kRowPtr_[M_->j_col()[k]+1]++; - nnz_ += 2; - } + // + // compute nnz in each row + // + // off-diagonal part + kRowPtr_[0] = 0; + for(int k = 0; k < M_->numberOfNonzeros() - n_; k++) { + if(M_->i_row()[k] != M_->j_col()[k]) { + kRowPtr_[M_->i_row()[k] + 1]++; + kRowPtr_[M_->j_col()[k] + 1]++; + nnz_ += 2; } - // diagonal part - for(int i=0;inumberOfNonzeros() - n_; k++) { + rowID_tmp = M_->i_row()[k]; + colID_tmp = M_->j_col()[k]; + if(rowID_tmp == colID_tmp) { + nnz_tmp = nnz_each_row_tmp[rowID_tmp] + kRowPtr_[rowID_tmp]; + jCol_[nnz_tmp] = colID_tmp; + kVal_[nnz_tmp] = M_->M()[k]; + index_covert_CSR2Triplet_[nnz_tmp] = k; + + kVal_[nnz_tmp] += M_->M()[M_->numberOfNonzeros() - n_ + rowID_tmp]; + index_covert_extra_Diag2CSR_[rowID_tmp] = nnz_tmp; + + nnz_each_row_tmp[rowID_tmp]++; + total_nnz_tmp++; + } else { + nnz_tmp = nnz_each_row_tmp[rowID_tmp] + kRowPtr_[rowID_tmp]; + jCol_[nnz_tmp] = colID_tmp; + kVal_[nnz_tmp] = M_->M()[k]; + index_covert_CSR2Triplet_[nnz_tmp] = k; + + nnz_tmp = nnz_each_row_tmp[colID_tmp] + kRowPtr_[colID_tmp]; + jCol_[nnz_tmp] = rowID_tmp; + kVal_[nnz_tmp] = M_->M()[k]; + index_covert_CSR2Triplet_[nnz_tmp] = k; + + nnz_each_row_tmp[rowID_tmp]++; + nnz_each_row_tmp[colID_tmp]++; + total_nnz_tmp += 2; } - // get correct row ptr index - for(int i=1;iM()[M_->numberOfNonzeros() - n_ + i]; + index_covert_CSR2Triplet_[nnz_tmp] = M_->numberOfNonzeros() - n_ + i; + total_nnz_tmp += 1; + + std::vector ind_temp(kRowPtr_[i + 1] - kRowPtr_[i]); + std::iota(ind_temp.begin(), ind_temp.end(), 0); + std::sort(ind_temp.begin(), ind_temp.end(), [&](int a, int b) { + return jCol_[a + kRowPtr_[i]] < jCol_[b + kRowPtr_[i]]; + }); + + reorder(kVal_ + kRowPtr_[i], ind_temp, kRowPtr_[i + 1] - kRowPtr_[i]); + reorder(index_covert_CSR2Triplet_ + kRowPtr_[i], ind_temp, kRowPtr_[i + 1] - kRowPtr_[i]); + std::sort(jCol_ + kRowPtr_[i], jCol_ + kRowPtr_[i + 1]); } - assert(nnz_==kRowPtr_[n_]); + } - kVal_ = new double[nnz_]{0.0}; - jCol_ = new int[nnz_]{0}; + delete[] nnz_each_row_tmp; + } - } - { - // - // set correct col index and value - // - index_covert_CSR2Triplet_ = new int[nnz_]; - index_covert_extra_Diag2CSR_ = new int[n_]; - - int *nnz_each_row_tmp = new int[n_]{0}; - int total_nnz_tmp{0},nnz_tmp{0}, rowID_tmp, colID_tmp; - for(int k=0;knumberOfNonzeros()-n_;k++){ - rowID_tmp = M_->i_row()[k]; - colID_tmp = M_->j_col()[k]; - if(rowID_tmp==colID_tmp){ - nnz_tmp = nnz_each_row_tmp[rowID_tmp] + kRowPtr_[rowID_tmp]; - jCol_[nnz_tmp] = colID_tmp; - kVal_[nnz_tmp] = M_->M()[k]; - index_covert_CSR2Triplet_[nnz_tmp] = k; - - kVal_[nnz_tmp] += M_->M()[M_->numberOfNonzeros()-n_+rowID_tmp]; - index_covert_extra_Diag2CSR_[rowID_tmp] = nnz_tmp; - - nnz_each_row_tmp[rowID_tmp]++; - total_nnz_tmp++; - }else{ - nnz_tmp = nnz_each_row_tmp[rowID_tmp] + kRowPtr_[rowID_tmp]; - jCol_[nnz_tmp] = colID_tmp; - kVal_[nnz_tmp] = M_->M()[k]; - index_covert_CSR2Triplet_[nnz_tmp] = k; - - nnz_tmp = nnz_each_row_tmp[colID_tmp] + kRowPtr_[colID_tmp]; - jCol_[nnz_tmp] = rowID_tmp; - kVal_[nnz_tmp] = M_->M()[k]; - index_covert_CSR2Triplet_[nnz_tmp] = k; - - nnz_each_row_tmp[rowID_tmp]++; - nnz_each_row_tmp[colID_tmp]++; - total_nnz_tmp += 2; - } - } - // correct the missing diagonal term - for(int i=0;iM()[M_->numberOfNonzeros()-n_+i]; - index_covert_CSR2Triplet_[nnz_tmp] = M_->numberOfNonzeros()-n_+i; - total_nnz_tmp += 1; - - std::vector ind_temp(kRowPtr_[i+1]-kRowPtr_[i]); - std::iota(ind_temp.begin(), ind_temp.end(), 0); - std::sort(ind_temp.begin(), ind_temp.end(),[&](int a, int b){ return jCol_[a+kRowPtr_[i]]options->GetString("compute_mode") == "cpu") { + spss.options().disable_gpu(); + } + spss.options().set_verbose(false); - delete[] nnz_each_row_tmp; - } + spss.set_csr_matrix(n_, kRowPtr_, jCol_, kVal_, true); + // spss.reorder(n_, n_); +} - /* - * initialize strumpack parameters - */ - - // possible values for MatchingJob (from STRUMPACK's source code) - //NONE, /*!< Don't do anything, but it can provide inertia info - // MC64 provides non-symmetric permutation and hence inertia is inaccurate */ - //MAX_CARDINALITY, /*!< Maximum cardinality */ - //MAX_SMALLEST_DIAGONAL, /*!< Maximum smallest diagonal value */ - //MAX_SMALLEST_DIAGONAL_2, /*!< Same as MAX_SMALLEST_DIAGONAL, - // but different algorithm */ - //MAX_DIAGONAL_SUM, /*!< Maximum sum of diagonal values */ - //MAX_DIAGONAL_PRODUCT_SCALING, /*!< Maximum product of diagonal values - // and row and column scaling */ - //COMBBLAS /*!< Use AWPM from CombBLAS */ - // - // If you encounter numerical issues during the factorization - //(such as small pivots, failure in LU factorization), you can - // also try a different matching (MatchingJob::MAX_DIAGONAL_PRODUCT_SCALING - // is the most robust option) - - - spss.options().set_matching(MatchingJob::NONE); - spss.options().enable_METIS_NodeNDP(); - - if(nlp_->options->GetString("compute_mode")=="cpu") - { - spss.options().disable_gpu(); +int hiopLinSolverSymSparseSTRUMPACK::matrixChanged() +{ + assert(n_ == M_->n() && M_->n() == M_->m()); + assert(n_ > 0); + + int num_neg_eig_val{0}; + int num_zero_eig_val{0}; + int num_pos_eig_val{0}; + + nlp_->runStats.linsolv.tmFactTime.start(); + + if(!kRowPtr_) { + this->firstCall(); + } else { + // update matrix + int rowID_tmp{0}; + for(int k = 0; k < nnz_; k++) { + kVal_[k] = M_->M()[index_covert_CSR2Triplet_[k]]; + } + for(int i = 0; i < n_; i++) { + if(index_covert_extra_Diag2CSR_[i] != -1) + kVal_[index_covert_extra_Diag2CSR_[i]] += M_->M()[M_->numberOfNonzeros() - n_ + i]; } - spss.options().set_verbose(false); spss.set_csr_matrix(n_, kRowPtr_, jCol_, kVal_, true); -// spss.reorder(n_, n_); } - int hiopLinSolverSymSparseSTRUMPACK::matrixChanged() - { - assert(n_==M_->n() && M_->n()==M_->m()); - assert(n_>0); - - int num_neg_eig_val{0}; - int num_zero_eig_val{0}; - int num_pos_eig_val{0}; - - nlp_->runStats.linsolv.tmFactTime.start(); - - if( !kRowPtr_ ){ - this->firstCall(); - }else{ - // update matrix - int rowID_tmp{0}; - for(int k=0;kM()[index_covert_CSR2Triplet_[k]]; - } - for(int i=0;iM()[M_->numberOfNonzeros()-n_+i]; - } + strumpack::ReturnCode retval = spss.factor(); // not really necessary, called if needed by solve - spss.set_csr_matrix(n_, kRowPtr_, jCol_, kVal_, true); - } - - strumpack::ReturnCode retval = spss.factor(); // not really necessary, called if needed by solve - - if(strumpack::ReturnCode::ZERO_PIVOT==retval) { - return -1; - } else if(strumpack::ReturnCode::SUCCESS==retval) { - retval = spss.inertia(num_neg_eig_val, num_zero_eig_val, num_pos_eig_val); - if(strumpack::ReturnCode::SUCCESS == retval) { - num_neg_eig_val = num_neg_eig_val; - } else { - if(nlp_->options->GetString("fact_acceptor")=="inertia_correction") { - nlp_->log->printf(hovError, "strumpack: failed to provide accurate inertia infomation. Please use inertia-free approach."); - } - } + if(strumpack::ReturnCode::ZERO_PIVOT == retval) { + return -1; + } else if(strumpack::ReturnCode::SUCCESS == retval) { + retval = spss.inertia(num_neg_eig_val, num_zero_eig_val, num_pos_eig_val); + if(strumpack::ReturnCode::SUCCESS == retval) { + num_neg_eig_val = num_neg_eig_val; } else { - // unknown error - assert(false && "unknown error from strumpack factor()"); + if(nlp_->options->GetString("fact_acceptor") == "inertia_correction") { + nlp_->log->printf(hovError, + "strumpack: failed to provide accurate inertia infomation. Please use inertia-free approach."); + } } - - nlp_->runStats.linsolv.tmInertiaComp.start(); - nlp_->runStats.linsolv.tmInertiaComp.stop(); - return num_neg_eig_val; + } else { + // unknown error + assert(false && "unknown error from strumpack factor()"); } - - bool hiopLinSolverSymSparseSTRUMPACK::solve ( hiopVector& x_ ) - { - assert(n_==M_->n() && M_->n()==M_->m()); - assert(n_>0); - assert(x_.get_size()==M_->n()); - nlp_->runStats.linsolv.tmTriuSolves.start(); + nlp_->runStats.linsolv.tmInertiaComp.start(); + nlp_->runStats.linsolv.tmInertiaComp.stop(); + return num_neg_eig_val; +} - hiopVectorPar* x = dynamic_cast(&x_); - assert(x != NULL); - hiopVectorPar* rhs = dynamic_cast(x->new_copy()); - double* dx = x->local_data(); - double* drhs = rhs->local_data(); +bool hiopLinSolverSymSparseSTRUMPACK::solve(hiopVector& x_) +{ + assert(n_ == M_->n() && M_->n() == M_->m()); + assert(n_ > 0); + assert(x_.get_size() == M_->n()); - spss.solve(drhs, dx); + nlp_->runStats.linsolv.tmTriuSolves.start(); - nlp_->runStats.linsolv.tmTriuSolves.stop(); - - delete rhs; rhs=nullptr; - return 1; - } + hiopVectorPar* x = dynamic_cast(&x_); + assert(x != NULL); + hiopVectorPar* rhs = dynamic_cast(x->new_copy()); + double* dx = x->local_data(); + double* drhs = rhs->local_data(); + spss.solve(drhs, dx); + nlp_->runStats.linsolv.tmTriuSolves.stop(); + delete rhs; + rhs = nullptr; + return 1; +} - hiopLinSolverNonSymSparseSTRUMPACK::hiopLinSolverNonSymSparseSTRUMPACK(const int& n, const int& nnz, hiopNlpFormulation* nlp) +hiopLinSolverNonSymSparseSTRUMPACK::hiopLinSolverNonSymSparseSTRUMPACK(const int& n, const int& nnz, hiopNlpFormulation* nlp) : hiopLinSolverNonSymSparse(n, nnz, nlp), kRowPtr_{nullptr}, jCol_{nullptr}, @@ -307,118 +300,119 @@ namespace hiop index_covert_extra_Diag2CSR_{nullptr}, n_{n}, nnz_{0} - {} +{} - hiopLinSolverNonSymSparseSTRUMPACK::~hiopLinSolverNonSymSparseSTRUMPACK() - { - if(kRowPtr_) - delete [] kRowPtr_; - if(jCol_) - delete [] jCol_; - if(kVal_) - delete [] kVal_; - if(index_covert_CSR2Triplet_) - delete [] index_covert_CSR2Triplet_; - if(index_covert_extra_Diag2CSR_) - delete [] index_covert_extra_Diag2CSR_; +hiopLinSolverNonSymSparseSTRUMPACK::~hiopLinSolverNonSymSparseSTRUMPACK() +{ + if(kRowPtr_) delete[] kRowPtr_; + if(jCol_) delete[] jCol_; + if(kVal_) delete[] kVal_; + if(index_covert_CSR2Triplet_) delete[] index_covert_CSR2Triplet_; + if(index_covert_extra_Diag2CSR_) delete[] index_covert_extra_Diag2CSR_; +} + +void hiopLinSolverNonSymSparseSTRUMPACK::firstCall() +{ + assert(n_ == M_->n() && M_->n() == M_->m()); + assert(n_ > 0); + + // transfer triplet form to CSR form + // note that input is in lower triangular triplet form. First part is the sparse matrix, and the 2nd part are the + // additional diagonal elememts the 1st part is sorted by row + hiop::hiopMatrixSparseTriplet* M_triplet = dynamic_cast(M_); + if(M_triplet == nullptr) { + nlp_->log->printf(hovError, "M_triplet is nullptr"); + return; } - void hiopLinSolverNonSymSparseSTRUMPACK::firstCall() - { - assert(n_==M_->n() && M_->n()==M_->m()); - assert(n_>0); - - // transfer triplet form to CSR form - // note that input is in lower triangular triplet form. First part is the sparse matrix, and the 2nd part are the additional diagonal elememts - // the 1st part is sorted by row - hiop::hiopMatrixSparseTriplet* M_triplet = dynamic_cast(M_); - if(M_triplet == nullptr) { - nlp_->log->printf(hovError, "M_triplet is nullptr"); - return; - } - - M_triplet->convert_to_csr_arrays(nnz_, &kRowPtr_, &jCol_, &kVal_, &index_covert_CSR2Triplet_, &index_covert_extra_Diag2CSR_, extra_diag_nnz_map); - - /* - * initialize strumpack parameters - */ - - // possible values for MatchingJob (from STRUMPACK's source code) - //NONE, /*!< Don't do anything */ - //MAX_CARDINALITY, /*!< Maximum cardinality */ - //MAX_SMALLEST_DIAGONAL, /*!< Maximum smallest diagonal value */ - //MAX_SMALLEST_DIAGONAL_2, /*!< Same as MAX_SMALLEST_DIAGONAL, - // but different algorithm */ - //MAX_DIAGONAL_SUM, /*!< Maximum sum of diagonal values */ - //MAX_DIAGONAL_PRODUCT_SCALING, /*!< Maximum product of diagonal values - // and row and column scaling */ - //COMBBLAS /*!< Use AWPM from CombBLAS */ - // - // If you encounter numerical issues during the factorization - //(such as small pivots, failure in LU factorization), you can - // also try a different matching (MatchingJob::MAX_DIAGONAL_PRODUCT_SCALING - // is the most robust option) - - //spss.options().set_matching(MatchingJob::MAX_DIAGONAL_PRODUCT_SCALING); - //spss.options().enable_METIS_NodeNDP(); - if(nlp_->options->GetString("compute_mode")=="cpu") { - spss.options().disable_gpu(); - } - spss.options().set_verbose(false); - - spss.set_csr_matrix(n_, kRowPtr_, jCol_, kVal_, true); -// spss.reorder(n_, n_); + M_triplet->convert_to_csr_arrays(nnz_, + &kRowPtr_, + &jCol_, + &kVal_, + &index_covert_CSR2Triplet_, + &index_covert_extra_Diag2CSR_, + extra_diag_nnz_map); + + /* + * initialize strumpack parameters + */ + + // possible values for MatchingJob (from STRUMPACK's source code) + // NONE, /*!< Don't do anything */ + // MAX_CARDINALITY, /*!< Maximum cardinality */ + // MAX_SMALLEST_DIAGONAL, /*!< Maximum smallest diagonal value */ + // MAX_SMALLEST_DIAGONAL_2, /*!< Same as MAX_SMALLEST_DIAGONAL, + // but different algorithm */ + // MAX_DIAGONAL_SUM, /*!< Maximum sum of diagonal values */ + // MAX_DIAGONAL_PRODUCT_SCALING, /*!< Maximum product of diagonal values + // and row and column scaling */ + // COMBBLAS /*!< Use AWPM from CombBLAS */ + // + // If you encounter numerical issues during the factorization + //(such as small pivots, failure in LU factorization), you can + // also try a different matching (MatchingJob::MAX_DIAGONAL_PRODUCT_SCALING + // is the most robust option) + + // spss.options().set_matching(MatchingJob::MAX_DIAGONAL_PRODUCT_SCALING); + // spss.options().enable_METIS_NodeNDP(); + if(nlp_->options->GetString("compute_mode") == "cpu") { + spss.options().disable_gpu(); } + spss.options().set_verbose(false); - int hiopLinSolverNonSymSparseSTRUMPACK::matrixChanged() - { - assert(n_==M_->n() && M_->n()==M_->m()); - assert(n_>0); - - nlp_->runStats.linsolv.tmFactTime.start(); - - if( !kRowPtr_ ) { - this->firstCall(); - } else { - // update matrix - int rowID_tmp{0}; - for(int k=0;kM()[index_covert_CSR2Triplet_[k]]; - } - for(auto p: extra_diag_nnz_map) { - kVal_[p.first] += M_->M()[p.second]; - } + spss.set_csr_matrix(n_, kRowPtr_, jCol_, kVal_, true); + // spss.reorder(n_, n_); +} - spss.set_csr_matrix(n_, kRowPtr_, jCol_, kVal_, true); +int hiopLinSolverNonSymSparseSTRUMPACK::matrixChanged() +{ + assert(n_ == M_->n() && M_->n() == M_->m()); + assert(n_ > 0); + + nlp_->runStats.linsolv.tmFactTime.start(); + + if(!kRowPtr_) { + this->firstCall(); + } else { + // update matrix + int rowID_tmp{0}; + for(int k = 0; k < nnz_; k++) { + kVal_[k] = M_->M()[index_covert_CSR2Triplet_[k]]; + } + for(auto p: extra_diag_nnz_map) { + kVal_[p.first] += M_->M()[p.second]; } - spss.factor(); // not really necessary, called if needed by solve - - nlp_->runStats.linsolv.tmInertiaComp.start(); - nlp_->runStats.linsolv.tmInertiaComp.stop(); - return 0; + spss.set_csr_matrix(n_, kRowPtr_, jCol_, kVal_, true); } - bool hiopLinSolverNonSymSparseSTRUMPACK::solve(hiopVector& x_) - { - assert(n_==M_->n() && M_->n()==M_->m()); - assert(n_>0); - assert(x_.get_size()==M_->n()); + spss.factor(); // not really necessary, called if needed by solve - nlp_->runStats.linsolv.tmTriuSolves.start(); + nlp_->runStats.linsolv.tmInertiaComp.start(); + nlp_->runStats.linsolv.tmInertiaComp.stop(); + return 0; +} - hiopVectorPar* x = dynamic_cast(&x_); - assert(x != NULL); - hiopVectorPar* rhs = dynamic_cast(x->new_copy()); - double* dx = x->local_data(); - double* drhs = rhs->local_data(); +bool hiopLinSolverNonSymSparseSTRUMPACK::solve(hiopVector& x_) +{ + assert(n_ == M_->n() && M_->n() == M_->m()); + assert(n_ > 0); + assert(x_.get_size() == M_->n()); - spss.solve(drhs, dx); + nlp_->runStats.linsolv.tmTriuSolves.start(); - nlp_->runStats.linsolv.tmTriuSolves.stop(); - delete rhs; rhs=nullptr; - return 1; - } + hiopVectorPar* x = dynamic_cast(&x_); + assert(x != NULL); + hiopVectorPar* rhs = dynamic_cast(x->new_copy()); + double* dx = x->local_data(); + double* drhs = rhs->local_data(); + + spss.solve(drhs, dx); + nlp_->runStats.linsolv.tmTriuSolves.stop(); + delete rhs; + rhs = nullptr; + return 1; +} -} //end namespace hiop +} // end namespace hiop diff --git a/src/LinAlg/hiopLinSolverSparseSTRUMPACK.hpp b/src/LinAlg/hiopLinSolverSparseSTRUMPACK.hpp index 91c91dd88..06c2db778 100644 --- a/src/LinAlg/hiopLinSolverSparseSTRUMPACK.hpp +++ b/src/LinAlg/hiopLinSolverSparseSTRUMPACK.hpp @@ -61,13 +61,14 @@ using namespace strumpack; -namespace hiop { +namespace hiop +{ /** Wrapper for STRUMPACK */ -class hiopLinSolverSymSparseSTRUMPACK: public hiopLinSolverSymSparse +class hiopLinSolverSymSparseSTRUMPACK : public hiopLinSolverSymSparse { public: - hiopLinSolverSymSparseSTRUMPACK(const int& n, const int& nnz, hiopNlpFormulation* nlp); + hiopLinSolverSymSparseSTRUMPACK(const int &n, const int &nnz, hiopNlpFormulation *nlp); virtual ~hiopLinSolverSymSparseSTRUMPACK(); /** Triggers a refactorization of the matrix, if necessary. @@ -77,44 +78,40 @@ class hiopLinSolverSymSparseSTRUMPACK: public hiopLinSolverSymSparse /** solves a linear system. * param 'x' is on entry the right hand side(s) of the system to be solved. On * exit is contains the solution(s). */ - bool solve ( hiopVector& x_ ); + bool solve(hiopVector &x_); -//protected: -// int* ipiv; -// hiopVector* dwork; + // protected: + // int* ipiv; + // hiopVector* dwork; private: + int m_; // number of rows of the whole matrix + int n_; // number of cols of the whole matrix + int nnz_; // number of nonzeros in the matrix - int m_; // number of rows of the whole matrix - int n_; // number of cols of the whole matrix - int nnz_; // number of nonzeros in the matrix - - int *kRowPtr_; // row pointer for nonzeros - int *jCol_; // column indexes for nonzeros - double *kVal_; // storage for sparse matrix + int *kRowPtr_; // row pointer for nonzeros + int *jCol_; // column indexes for nonzeros + double *kVal_; // storage for sparse matrix int *index_covert_CSR2Triplet_; int *index_covert_extra_Diag2CSR_; // strumpack object - StrumpackSparseSolver spss; - + StrumpackSparseSolver spss; public: - /** called the very first time a matrix is factored. Allocates space * for the factorization and performs ordering */ virtual void firstCall(); -// virtual void diagonalChanged( int idiag, int extent ); - -friend class hiopLinSolverNonSymSparseSTRUMPACK; + // virtual void diagonalChanged( int idiag, int extent ); + friend class hiopLinSolverNonSymSparseSTRUMPACK; }; -class hiopLinSolverNonSymSparseSTRUMPACK: public hiopLinSolverNonSymSparse +class hiopLinSolverNonSymSparseSTRUMPACK : public hiopLinSolverNonSymSparse { public: - hiopLinSolverNonSymSparseSTRUMPACK(const int& n, const int& nnz, hiopNlpFormulation* nlp); + hiopLinSolverNonSymSparseSTRUMPACK(const int &n, const int &nnz, hiopNlpFormulation *nlp); virtual ~hiopLinSolverNonSymSparseSTRUMPACK(); @@ -125,40 +122,36 @@ class hiopLinSolverNonSymSparseSTRUMPACK: public hiopLinSolverNonSymSparse /** solves a linear system. * param 'x' is on entry the right hand side(s) of the system to be solved. On * exit is contains the solution(s). */ - bool solve ( hiopVector& x_ ); + bool solve(hiopVector &x_); -//protected: -// int* ipiv; -// hiopVector* dwork; + // protected: + // int* ipiv; + // hiopVector* dwork; private: + int m_; // number of rows of the whole matrix + int n_; // number of cols of the whole matrix + int nnz_; // number of nonzeros in the matrix - int m_; // number of rows of the whole matrix - int n_; // number of cols of the whole matrix - int nnz_; // number of nonzeros in the matrix - - int *kRowPtr_; // row pointer for nonzeros - int *jCol_; // column indexes for nonzeros - double *kVal_; // storage for sparse matrix + int *kRowPtr_; // row pointer for nonzeros + int *jCol_; // column indexes for nonzeros + double *kVal_; // storage for sparse matrix int *index_covert_CSR2Triplet_; int *index_covert_extra_Diag2CSR_; - std::unordered_map extra_diag_nnz_map; + std::unordered_map extra_diag_nnz_map; // strumpack object - StrumpackSparseSolver spss; + StrumpackSparseSolver spss; public: - /** called the very first time a matrix is factored. Allocates space * for the factorization and performs ordering */ void firstCall(); -// virtual void diagonalChanged( int idiag, int extent ); - -friend class hiopLinSolverSymSparseSTRUMPACK; + // virtual void diagonalChanged( int idiag, int extent ); + friend class hiopLinSolverSymSparseSTRUMPACK; }; - -} // end namespace +} // namespace hiop #endif diff --git a/src/LinAlg/hiopLinSolverSymDenseLapack.hpp b/src/LinAlg/hiopLinSolverSymDenseLapack.hpp index 39127e35b..3eae15f91 100644 --- a/src/LinAlg/hiopLinSolverSymDenseLapack.hpp +++ b/src/LinAlg/hiopLinSolverSymDenseLapack.hpp @@ -3,47 +3,47 @@ // Written by Cosmin G. Petra, petra1@llnl.gov. // LLNL-CODE-742473. All rights reserved. // -// This file is part of HiOp. For details, see https://github.com/LLNL/hiop. HiOp -// is released under the BSD 3-clause license (https://opensource.org/licenses/BSD-3-Clause). +// This file is part of HiOp. For details, see https://github.com/LLNL/hiop. HiOp +// is released under the BSD 3-clause license (https://opensource.org/licenses/BSD-3-Clause). // Please also read “Additional BSD Notice” below. // -// Redistribution and use in source and binary forms, with or without modification, +// Redistribution and use in source and binary forms, with or without modification, // are permitted provided that the following conditions are met: -// i. Redistributions of source code must retain the above copyright notice, this list +// i. Redistributions of source code must retain the above copyright notice, this list // of conditions and the disclaimer below. -// ii. Redistributions in binary form must reproduce the above copyright notice, -// this list of conditions and the disclaimer (as noted below) in the documentation and/or +// ii. Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the disclaimer (as noted below) in the documentation and/or // other materials provided with the distribution. -// iii. Neither the name of the LLNS/LLNL nor the names of its contributors may be used to -// endorse or promote products derived from this software without specific prior written +// iii. Neither the name of the LLNS/LLNL nor the names of its contributors may be used to +// endorse or promote products derived from this software without specific prior written // permission. // -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES -// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT -// SHALL LAWRENCE LIVERMORE NATIONAL SECURITY, LLC, THE U.S. DEPARTMENT OF ENERGY OR -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS -// OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED -// AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT +// SHALL LAWRENCE LIVERMORE NATIONAL SECURITY, LLC, THE U.S. DEPARTMENT OF ENERGY OR +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +// OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED +// AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, // EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Additional BSD Notice -// 1. This notice is required to be provided under our contract with the U.S. Department -// of Energy (DOE). This work was produced at Lawrence Livermore National Laboratory under +// 1. This notice is required to be provided under our contract with the U.S. Department +// of Energy (DOE). This work was produced at Lawrence Livermore National Laboratory under // Contract No. DE-AC52-07NA27344 with the DOE. -// 2. Neither the United States Government nor Lawrence Livermore National Security, LLC -// nor any of their employees, makes any warranty, express or implied, or assumes any -// liability or responsibility for the accuracy, completeness, or usefulness of any +// 2. Neither the United States Government nor Lawrence Livermore National Security, LLC +// nor any of their employees, makes any warranty, express or implied, or assumes any +// liability or responsibility for the accuracy, completeness, or usefulness of any // information, apparatus, product, or process disclosed, or represents that its use would // not infringe privately-owned rights. -// 3. Also, reference herein to any specific commercial products, process, or services by -// trade name, trademark, manufacturer or otherwise does not necessarily constitute or -// imply its endorsement, recommendation, or favoring by the United States Government or -// Lawrence Livermore National Security, LLC. The views and opinions of authors expressed -// herein do not necessarily state or reflect those of the United States Government or -// Lawrence Livermore National Security, LLC, and shall not be used for advertising or +// 3. Also, reference herein to any specific commercial products, process, or services by +// trade name, trademark, manufacturer or otherwise does not necessarily constitute or +// imply its endorsement, recommendation, or favoring by the United States Government or +// Lawrence Livermore National Security, LLC. The views and opinions of authors expressed +// herein do not necessarily state or reflect those of the United States Government or +// Lawrence Livermore National Security, LLC, and shall not be used for advertising or // product endorsement purposes. #ifndef HIOP_LINSOLVER_LAPACK @@ -51,46 +51,47 @@ #include "hiopLinSolver.hpp" -namespace hiop { +namespace hiop +{ /** Wrapper for LAPACK's DSYTRF */ class hiopLinSolverSymDenseLapack : public hiopLinSolverSymDense { public: hiopLinSolverSymDenseLapack(int n, hiopNlpFormulation* nlp) - : hiopLinSolverSymDense(n, nlp) + : hiopLinSolverSymDense(n, nlp) { ipiv = new int[n]; - //a "legacy" hiopVector within in the CPU memory space is sufficient + // a "legacy" hiopVector within in the CPU memory space is sufficient dwork = LinearAlgebraFactory::create_vector("DEFAULT", 0); } virtual ~hiopLinSolverSymDenseLapack() { - delete [] ipiv; + delete[] ipiv; delete dwork; } - /** Triggers a refactorization of the matrix, if necessary. + /** Triggers a refactorization of the matrix, if necessary. * Overload from base class. */ int matrixChanged() { assert(M_->n() == M_->m()); - int N=M_->n(), lda = N, info; - if(N==0) return 0; + int N = M_->n(), lda = N, info; + if(N == 0) return 0; nlp_->runStats.linsolv.tmFactTime.start(); - + double dwork_tmp; - char uplo='L'; // M is upper in C++ so it's lower in fortran + char uplo = 'L'; // M is upper in C++ so it's lower in fortran // - //query sizes + // query sizes // - int lwork=-1; - DSYTRF(&uplo, &N, M_->local_data(), &lda, ipiv, &dwork_tmp, &lwork, &info ); - assert(info==0); + int lwork = -1; + DSYTRF(&uplo, &N, M_->local_data(), &lda, ipiv, &dwork_tmp, &lwork, &info); + assert(info == 0); - lwork=(int)dwork_tmp; + lwork = (int)dwork_tmp; if(lwork != dwork->get_size()) { delete dwork; dwork = LinearAlgebraFactory::create_vector("DEFAULT", lwork); @@ -99,108 +100,108 @@ class hiopLinSolverSymDenseLapack : public hiopLinSolverSymDense // // factorization // - DSYTRF(&uplo, &N, M_->local_data(), &lda, ipiv, dwork->local_data(), &lwork, &info ); - if(info<0) { - nlp_->log->printf(hovError, - "hiopLinSolverSymDense error: %d argument to dsytrf has an illegal value.\n", - -info); + DSYTRF(&uplo, &N, M_->local_data(), &lda, ipiv, dwork->local_data(), &lwork, &info); + if(info < 0) { + nlp_->log->printf(hovError, "hiopLinSolverSymDense error: %d argument to dsytrf has an illegal value.\n", -info); return -1; } else { - if(info>0) { - nlp_->log->printf(hovWarning, - "hiopLinSolverSymDense error: %d entry in the factorization's diagonal\n" - "is exactly zero. Division by zero will occur if it a solve is attempted.\n", - info); - //matrix is singular - return -1; + if(info > 0) { + nlp_->log->printf(hovWarning, + "hiopLinSolverSymDense error: %d entry in the factorization's diagonal\n" + "is exactly zero. Division by zero will occur if it a solve is attempted.\n", + info); + // matrix is singular + return -1; } } - assert(info==0); + assert(info == 0); nlp_->runStats.linsolv.tmFactTime.stop(); - + nlp_->runStats.linsolv.tmInertiaComp.start(); // // Compute the inertia. Only negative eigenvalues are returned. // Code originally written by M. Schanenfor PIPS based on // LINPACK's dsidi Fortran routine (http://www.netlib.org/linpack/dsidi.f) // 04/08/2020 - petra: fixed the test for non-positive pivots (was only for negative pivots) - int negEigVal=0; - [[maybe_unused]] int posEigVal=0; - int nullEigVal=0; - double t=0; + int negEigVal = 0; + [[maybe_unused]] int posEigVal = 0; + int nullEigVal = 0; + double t = 0; double* MM = M_->local_data(); - for(int k=0; krunStats.linsolv.tmInertiaComp.stop(); - - if(nullEigVal>0) return -1; + + if(nullEigVal > 0) return -1; return negEigVal; } - + /** solves a linear system. * param 'x' is on entry the right hand side(s) of the system to be solved. On * exit is contains the solution(s). */ - bool solve ( hiopVector& x ) + bool solve(hiopVector& x) { assert(M_->n() == M_->m()); - assert(x.get_size()==M_->n()); - int N=M_->n(), LDA = N, info; - if(N==0) return true; + assert(x.get_size() == M_->n()); + int N = M_->n(), LDA = N, info; + if(N == 0) return true; nlp_->runStats.linsolv.tmTriuSolves.start(); - - char uplo='L'; // M is upper in C++ so it's lower in fortran - int NRHS=1, LDB=N; + + char uplo = 'L'; // M is upper in C++ so it's lower in fortran + int NRHS = 1, LDB = N; DSYTRS(&uplo, &N, &NRHS, M_->local_data(), &LDA, ipiv, x.local_data(), &LDB, &info); - if(info<0) { + if(info < 0) { nlp_->log->printf(hovError, "hiopLinSolverSymDenseLapack: DSYTRS returned error %d\n", info); - } else if(info>0) { + } else if(info > 0) { nlp_->log->printf(hovError, "hiopLinSolverSymDenseLapack: DSYTRS returned warning %d\n", info); } nlp_->runStats.linsolv.tmTriuSolves.stop(); - return info==0; + return info == 0; } protected: int* ipiv; hiopVector* dwork; + private: hiopLinSolverSymDenseLapack() - : ipiv(NULL), dwork(NULL) + : ipiv(NULL), + dwork(NULL) { assert(false); } }; -} // end namespace +} // namespace hiop #endif diff --git a/src/LinAlg/hiopLinSolverSymDenseMagma.cpp b/src/LinAlg/hiopLinSolverSymDenseMagma.cpp index c29ec4f78..c9988a776 100644 --- a/src/LinAlg/hiopLinSolverSymDenseMagma.cpp +++ b/src/LinAlg/hiopLinSolverSymDenseMagma.cpp @@ -57,424 +57,406 @@ namespace hiop { - hiopLinSolverSymDenseMagmaBuKa::hiopLinSolverSymDenseMagmaBuKa(int n, hiopNlpFormulation* nlp_) +hiopLinSolverSymDenseMagmaBuKa::hiopLinSolverSymDenseMagmaBuKa(int n, hiopNlpFormulation* nlp_) : hiopLinSolverSymDense(n, nlp_) - { - magma_int_t ndevices; - magma_device_t devices[MagmaMaxGPUs]; - magma_getdevices(devices, MagmaMaxGPUs, &ndevices); - assert(ndevices>=1); +{ + magma_int_t ndevices; + magma_device_t devices[MagmaMaxGPUs]; + magma_getdevices(devices, MagmaMaxGPUs, &ndevices); + assert(ndevices >= 1); - int device = 0; - magma_setdevice(device); + int device = 0; + magma_setdevice(device); - magma_queue_create(devices[device], &magma_device_queue_); + magma_queue_create(devices[device], &magma_device_queue_); - int magmaRet; + int magmaRet; - const int align=32; - ldda_ = magma_roundup(n, align ); // multiple of 'align', small power of 2 (i.e., 32) - - const int nrhs=1; - lddb_ = ldda_; + const int align = 32; + ldda_ = magma_roundup(n, align); // multiple of 'align', small power of 2 (i.e., 32) - magmaRet = magma_malloc((void**)&dinert_, 3*sizeof(int)); - assert(MAGMA_SUCCESS == magmaRet); + const int nrhs = 1; + lddb_ = ldda_; + magmaRet = magma_malloc((void**)&dinert_, 3 * sizeof(int)); + assert(MAGMA_SUCCESS == magmaRet); - magmaRet = magma_imalloc_pinned(&ipiv_, ldda_); - assert(MAGMA_SUCCESS == magmaRet); + magmaRet = magma_imalloc_pinned(&ipiv_, ldda_); + assert(MAGMA_SUCCESS == magmaRet); - std::string mem_space = nlp_->options->GetString("mem_space"); - if(mem_space == "default" || mem_space == "host") { - magmaRet = magma_dmalloc(&device_M_, n*ldda_); - assert(MAGMA_SUCCESS == magmaRet); - magmaRet = magma_dmalloc(&device_rhs_, nrhs*lddb_ ); - assert(MAGMA_SUCCESS == magmaRet); - } else { - device_M_ = nullptr; - device_rhs_ = nullptr; - //overwrite leading dimensions so that it aligns with the internal representation from - //HiOp dense matrix - ldda_ = n; - lddb_ = ldda_; - - } + std::string mem_space = nlp_->options->GetString("mem_space"); + if(mem_space == "default" || mem_space == "host") { + magmaRet = magma_dmalloc(&device_M_, n * ldda_); + assert(MAGMA_SUCCESS == magmaRet); + magmaRet = magma_dmalloc(&device_rhs_, nrhs * lddb_); + assert(MAGMA_SUCCESS == magmaRet); + } else { + device_M_ = nullptr; + device_rhs_ = nullptr; + // overwrite leading dimensions so that it aligns with the internal representation from + // HiOp dense matrix + ldda_ = n; + lddb_ = ldda_; } +} - hiopLinSolverSymDenseMagmaBuKa::~hiopLinSolverSymDenseMagmaBuKa() - { - std::string mem_space = nlp_->options->GetString("mem_space"); - if(mem_space == "default" || mem_space == "host") { - magma_free(device_M_); - magma_free(device_rhs_); - } +hiopLinSolverSymDenseMagmaBuKa::~hiopLinSolverSymDenseMagmaBuKa() +{ + std::string mem_space = nlp_->options->GetString("mem_space"); + if(mem_space == "default" || mem_space == "host") { + magma_free(device_M_); + magma_free(device_rhs_); + } - magma_free_pinned(ipiv_); - magma_free(dinert_); + magma_free_pinned(ipiv_); + magma_free(dinert_); - magma_queue_destroy(magma_device_queue_); - magma_device_queue_ = NULL; + magma_queue_destroy(magma_device_queue_); + magma_device_queue_ = NULL; +} +/** Triggers a refactorization of the matrix, if necessary. */ +int hiopLinSolverSymDenseMagmaBuKa::matrixChanged() +{ + assert(M_->n() == M_->m()); + int N = M_->n(); + int lda = N; + int info; + if(N == 0) { + return 0; } - /** Triggers a refactorization of the matrix, if necessary. */ - int hiopLinSolverSymDenseMagmaBuKa::matrixChanged() - { - assert(M_->n() == M_->m()); - int N=M_->n(); - int lda = N; - int info; - if(N==0) { - return 0; - } - nlp_->runStats.linsolv.tmFactTime.start(); + nlp_->runStats.linsolv.tmFactTime.start(); - magma_uplo_t uplo=MagmaLower; // M is upper in C++ so it's lower in fortran + magma_uplo_t uplo = MagmaLower; // M is upper in C++ so it's lower in fortran #ifdef HIOP_USE_HIP - uplo = MagmaUpper; // M is upper in C++ so it's lower in fortran - M_->symmetrize(); + uplo = MagmaUpper; // M is upper in C++ so it's lower in fortran + M_->symmetrize(); #endif - std::string mem_space = nlp_->options->GetString("mem_space"); - if(mem_space == "default" || mem_space == "host") { - nlp_->runStats.linsolv.tmDeviceTransfer.start(); - magma_dsetmatrix(N, N, M_->local_data(), lda, device_M_, ldda_, magma_device_queue_); - nlp_->runStats.linsolv.tmDeviceTransfer.stop(); - } else { - device_M_ = M_->local_data(); - } + std::string mem_space = nlp_->options->GetString("mem_space"); + if(mem_space == "default" || mem_space == "host") { + nlp_->runStats.linsolv.tmDeviceTransfer.start(); + magma_dsetmatrix(N, N, M_->local_data(), lda, device_M_, ldda_, magma_device_queue_); + nlp_->runStats.linsolv.tmDeviceTransfer.stop(); + } else { + device_M_ = M_->local_data(); + } - // - //factorization - // - magma_dsytrf_gpu(uplo, N, device_M_, ldda_, ipiv_, &info ); + // + // factorization + // + magma_dsytrf_gpu(uplo, N, device_M_, ldda_, ipiv_, &info); - nlp_->runStats.linsolv.tmFactTime.stop(); + nlp_->runStats.linsolv.tmFactTime.stop(); - const double tflops = FLOPS_DPOTRF( N ) / 1e12; - nlp_->runStats.linsolv.flopsFact += tflops; + const double tflops = FLOPS_DPOTRF(N) / 1e12; + nlp_->runStats.linsolv.flopsFact += tflops; - if(info<0) { - nlp_->log->printf(hovError, - "hiopLinSolverMagmaBuka error: argument %d to dsytrf has an illegal value.\n", - -info); + if(info < 0) { + nlp_->log->printf(hovError, "hiopLinSolverMagmaBuka error: argument %d to dsytrf has an illegal value.\n", -info); + return -1; + } else { + if(info > 0) { + nlp_->log->printf(hovWarning, + "hiopLinSolverMagmaBuka error: entry %d in the factorization's diagonal\n" + "is exactly zero. Division by zero will occur if it a solve is attempted.\n", + info); + // matrix is singular return -1; - } else { - if(info>0) { - nlp_->log->printf(hovWarning, - "hiopLinSolverMagmaBuka error: entry %d in the factorization's diagonal\n" - "is exactly zero. Division by zero will occur if it a solve is attempted.\n", - info); - //matrix is singular - return -1; - } } - assert(info==0); - int negEigVal, posEigVal, nullEigVal; + } + assert(info == 0); + int negEigVal, posEigVal, nullEigVal; - if(!compute_inertia(N, ipiv_, posEigVal, negEigVal, nullEigVal)) { - return -1; - } + if(!compute_inertia(N, ipiv_, posEigVal, negEigVal, nullEigVal)) { + return -1; + } - if(nullEigVal>0) { - return -1; - } - return negEigVal; + if(nullEigVal > 0) { + return -1; } + return negEigVal; +} - bool hiopLinSolverSymDenseMagmaBuKa:: - compute_inertia(int N, int* ipiv, int& posEigVal, int& negEigVal, int& nullEigVal) - { - int inert[3]; - int info; - magma_uplo_t uplo=MagmaLower; // M is upper in C++ so it's lower in fortran +bool hiopLinSolverSymDenseMagmaBuKa::compute_inertia(int N, int* ipiv, int& posEigVal, int& negEigVal, int& nullEigVal) +{ + int inert[3]; + int info; + magma_uplo_t uplo = MagmaLower; // M is upper in C++ so it's lower in fortran #ifdef HIOP_USE_HIP - uplo = MagmaUpper; // M is upper in C++ so it's lower in fortran + uplo = MagmaUpper; // M is upper in C++ so it's lower in fortran #endif - nlp_->runStats.linsolv.tmInertiaComp.start(); + nlp_->runStats.linsolv.tmInertiaComp.start(); - info = magmablas_dsiinertia(uplo, N, device_M_, ldda_, ipiv, dinert_, magma_device_queue_); + info = magmablas_dsiinertia(uplo, N, device_M_, ldda_, ipiv, dinert_, magma_device_queue_); - magma_getvector(3, sizeof(int), dinert_, 1, inert, 1, magma_device_queue_); + magma_getvector(3, sizeof(int), dinert_, 1, inert, 1, magma_device_queue_); - if(0!=info) { - nlp_->log->printf(hovError, - "Magma dsidi inertia computation failed with [%d] (MagmaBuKa)\n", - info); - posEigVal = negEigVal = nullEigVal = -1; - return false; - } + if(0 != info) { + nlp_->log->printf(hovError, "Magma dsidi inertia computation failed with [%d] (MagmaBuKa)\n", info); + posEigVal = negEigVal = nullEigVal = -1; + return false; + } - posEigVal = inert[0]; - negEigVal = inert[1]; - nullEigVal = inert[2]; - - nlp_->log->printf(hovScalars, - "BuKa dsidi eigs: pos/neg/null %d %d %d \n", - posEigVal, - negEigVal, - nullEigVal); - fflush(stdout); - nlp_->runStats.linsolv.tmInertiaComp.stop(); - return info==0; + posEigVal = inert[0]; + negEigVal = inert[1]; + nullEigVal = inert[2]; + + nlp_->log->printf(hovScalars, "BuKa dsidi eigs: pos/neg/null %d %d %d \n", posEigVal, negEigVal, nullEigVal); + fflush(stdout); + nlp_->runStats.linsolv.tmInertiaComp.stop(); + return info == 0; +} + +bool hiopLinSolverSymDenseMagmaBuKa::solve(hiopVector& x) +{ + assert(M_->n() == M_->m()); + assert(x.get_size() == M_->n()); + int N = M_->n(); + int LDB = N; + int NRHS = 1; + if(N == 0) { + return true; } - bool hiopLinSolverSymDenseMagmaBuKa::solve(hiopVector& x) - { - assert(M_->n() == M_->m()); - assert(x.get_size() == M_->n()); - int N = M_->n(); - int LDB = N; - int NRHS = 1; - if(N == 0) { - return true; - } + std::string mem_space = nlp_->options->GetString("mem_space"); + if(mem_space == "default" || mem_space == "host") { + nlp_->runStats.linsolv.tmDeviceTransfer.start(); + magma_dsetmatrix(N, NRHS, x.local_data(), LDB, device_rhs_, lddb_, magma_device_queue_); + nlp_->runStats.linsolv.tmDeviceTransfer.stop(); + } else { + device_rhs_ = x.local_data(); + } - std::string mem_space = nlp_->options->GetString("mem_space"); - if(mem_space == "default" || mem_space == "host") { - nlp_->runStats.linsolv.tmDeviceTransfer.start(); - magma_dsetmatrix(N, NRHS, x.local_data(), LDB, device_rhs_, lddb_, magma_device_queue_); - nlp_->runStats.linsolv.tmDeviceTransfer.stop(); - } else { - device_rhs_ = x.local_data(); - } + nlp_->runStats.linsolv.tmTriuSolves.start(); - nlp_->runStats.linsolv.tmTriuSolves.start(); - - magma_uplo_t uplo=MagmaLower; // M is upper in C++ so it's lower in fortran + magma_uplo_t uplo = MagmaLower; // M is upper in C++ so it's lower in fortran #ifdef HIOP_USE_HIP - uplo = MagmaUpper; // M is upper in C++ so it's lower in fortran + uplo = MagmaUpper; // M is upper in C++ so it's lower in fortran #endif - int info; - magma_dsytrs_gpu(uplo, N, NRHS, device_M_, ldda_, ipiv_, device_rhs_, lddb_, &info, magma_device_queue_); - - if(info<0) { - nlp_->log->printf(hovError, "hiopLinSolverMagmaBuKa: DSYTRS_GPU returned error %d\n", info); - assert(false); - } else if(info>0) { - nlp_->log->printf(hovWarning, "hiopLinSolverMagmaBuKa: DSYTRS_GPU returned warning %d\n", info); - } - nlp_->runStats.linsolv.tmTriuSolves.stop(); - - const double tflops = FLOPS_DPOTRS(N, NRHS) / 1e12; - nlp_->runStats.linsolv.flopsTriuSolves += tflops; + int info; + magma_dsytrs_gpu(uplo, N, NRHS, device_M_, ldda_, ipiv_, device_rhs_, lddb_, &info, magma_device_queue_); + + if(info < 0) { + nlp_->log->printf(hovError, "hiopLinSolverMagmaBuKa: DSYTRS_GPU returned error %d\n", info); + assert(false); + } else if(info > 0) { + nlp_->log->printf(hovWarning, "hiopLinSolverMagmaBuKa: DSYTRS_GPU returned warning %d\n", info); + } + nlp_->runStats.linsolv.tmTriuSolves.stop(); - if(mem_space == "default" || mem_space == "host") { - nlp_->runStats.linsolv.tmDeviceTransfer.start(); - magma_dgetmatrix(N, NRHS, device_rhs_, lddb_, x.local_data(), LDB, magma_device_queue_); - nlp_->runStats.linsolv.tmDeviceTransfer.stop(); - } + const double tflops = FLOPS_DPOTRS(N, NRHS) / 1e12; + nlp_->runStats.linsolv.flopsTriuSolves += tflops; - return info==0; + if(mem_space == "default" || mem_space == "host") { + nlp_->runStats.linsolv.tmDeviceTransfer.start(); + magma_dgetmatrix(N, NRHS, device_rhs_, lddb_, x.local_data(), LDB, magma_device_queue_); + nlp_->runStats.linsolv.tmDeviceTransfer.stop(); } + return info == 0; +} - /******************************************************************************************************* - * MAGMA indefinite solver without pivoting (fast) - *******************************************************************************************************/ - hiopLinSolverSymDenseMagmaNopiv::hiopLinSolverSymDenseMagmaNopiv(int n, hiopNlpFormulation* nlp) - : hiopLinSolverSymDense(n, nlp) - { - magma_int_t ndevices; - magma_device_t devices[MagmaMaxGPUs]; - magma_getdevices(devices, MagmaMaxGPUs, &ndevices); - assert(ndevices>=1); +/******************************************************************************************************* + * MAGMA indefinite solver without pivoting (fast) + *******************************************************************************************************/ +hiopLinSolverSymDenseMagmaNopiv::hiopLinSolverSymDenseMagmaNopiv(int n, hiopNlpFormulation* nlp) + : hiopLinSolverSymDense(n, nlp) +{ + magma_int_t ndevices; + magma_device_t devices[MagmaMaxGPUs]; + magma_getdevices(devices, MagmaMaxGPUs, &ndevices); + assert(ndevices >= 1); - int device = 0; - magma_setdevice(device); + int device = 0; + magma_setdevice(device); - magma_queue_create(devices[device], &magma_device_queue_); + magma_queue_create(devices[device], &magma_device_queue_); - int magmaRet; + int magmaRet; - const int align=32; - const int nrhs=1; - ldda_ = magma_roundup(n, align ); // multiple of 32 by default - lddb_ = ldda_; + const int align = 32; + const int nrhs = 1; + ldda_ = magma_roundup(n, align); // multiple of 32 by default + lddb_ = ldda_; - std::string mem_space = nlp_->options->GetString("mem_space"); - if(mem_space == "default" || mem_space == "host") { - magmaRet = magma_dmalloc(&device_M_, n*ldda_); - assert(MAGMA_SUCCESS == magmaRet); - magmaRet = magma_dmalloc(&device_rhs_, nrhs*lddb_ ); - assert(MAGMA_SUCCESS == magmaRet); - } else { - device_M_ = nullptr; - device_rhs_ = nullptr; - //overwrite leading dimensions so that it aligns with the internal representation from - //HiOp dense matrix - ldda_ = n; - lddb_ = ldda_; - } + std::string mem_space = nlp_->options->GetString("mem_space"); + if(mem_space == "default" || mem_space == "host") { + magmaRet = magma_dmalloc(&device_M_, n * ldda_); + assert(MAGMA_SUCCESS == magmaRet); + magmaRet = magma_dmalloc(&device_rhs_, nrhs * lddb_); + assert(MAGMA_SUCCESS == magmaRet); + } else { + device_M_ = nullptr; + device_rhs_ = nullptr; + // overwrite leading dimensions so that it aligns with the internal representation from + // HiOp dense matrix + ldda_ = n; + lddb_ = ldda_; } +} - hiopLinSolverSymDenseMagmaNopiv::~hiopLinSolverSymDenseMagmaNopiv() - { - std::string mem_space = nlp_->options->GetString("mem_space"); - if(mem_space == "default" || mem_space == "host") { - magma_free(device_M_); - magma_free(device_rhs_); - } - magma_queue_destroy(magma_device_queue_); - magma_device_queue_ = NULL; +hiopLinSolverSymDenseMagmaNopiv::~hiopLinSolverSymDenseMagmaNopiv() +{ + std::string mem_space = nlp_->options->GetString("mem_space"); + if(mem_space == "default" || mem_space == "host") { + magma_free(device_M_); + magma_free(device_rhs_); } + magma_queue_destroy(magma_device_queue_); + magma_device_queue_ = NULL; +} - /** Triggers a refactorization of the matrix, if necessary. */ - int hiopLinSolverSymDenseMagmaNopiv::matrixChanged() - { - assert(M_->n() == M_->m()); - int N=M_->n(); - int LDA = N; - if(N==0) { - return true; - } - - magma_int_t info; - magma_uplo_t uplo=MagmaLower; // M is upper in C++ so it's lower in fortran - - std::string mem_space = nlp_->options->GetString("mem_space"); - if(mem_space == "default" || mem_space == "host") { - nlp_->runStats.linsolv.tmDeviceTransfer.start(); - magma_dsetmatrix(N, N, M_->local_data(), LDA, device_M_, ldda_, magma_device_queue_); - nlp_->runStats.linsolv.tmDeviceTransfer.stop(); - } else { - device_M_ = M_->local_data(); - } - nlp_->runStats.linsolv.tmFactTime.start(); - - // - // Factorization of the matrix - // - magma_dsytrf_nopiv_gpu(uplo, N, device_M_, ldda_, &info); - - nlp_->runStats.linsolv.tmFactTime.stop(); +/** Triggers a refactorization of the matrix, if necessary. */ +int hiopLinSolverSymDenseMagmaNopiv::matrixChanged() +{ + assert(M_->n() == M_->m()); + int N = M_->n(); + int LDA = N; + if(N == 0) { + return true; + } - const double tflops = FLOPS_DPOTRF( N ) / 1e12; - nlp_->runStats.linsolv.flopsFact += tflops; + magma_int_t info; + magma_uplo_t uplo = MagmaLower; // M is upper in C++ so it's lower in fortran - if(info<0) { - nlp_->log->printf(hovError, - "hiopLinSolverSymDenseNoPiv error: %d argument to dsytrf has an illegal value.\n", - -info); - return -1; - } else { - if(info>0) { - nlp_->log->printf(hovScalars, - "hiopLinSolverSymDenseNoPiv error: %d entry in the factorization's diagonal\n" - "is exactly zero. Division by zero will occur if it a solve is attempted.\n", - info); - //matrix is singular - return -1; - } - } - assert(info==0); - int negEigVal, posEigVal, nullEigVal; - - if(!compute_inertia(N, posEigVal, negEigVal, nullEigVal)) { + std::string mem_space = nlp_->options->GetString("mem_space"); + if(mem_space == "default" || mem_space == "host") { + nlp_->runStats.linsolv.tmDeviceTransfer.start(); + magma_dsetmatrix(N, N, M_->local_data(), LDA, device_M_, ldda_, magma_device_queue_); + nlp_->runStats.linsolv.tmDeviceTransfer.stop(); + } else { + device_M_ = M_->local_data(); + } + nlp_->runStats.linsolv.tmFactTime.start(); + + // + // Factorization of the matrix + // + magma_dsytrf_nopiv_gpu(uplo, N, device_M_, ldda_, &info); + + nlp_->runStats.linsolv.tmFactTime.stop(); + + const double tflops = FLOPS_DPOTRF(N) / 1e12; + nlp_->runStats.linsolv.flopsFact += tflops; + + if(info < 0) { + nlp_->log->printf(hovError, "hiopLinSolverSymDenseNoPiv error: %d argument to dsytrf has an illegal value.\n", -info); + return -1; + } else { + if(info > 0) { + nlp_->log->printf(hovScalars, + "hiopLinSolverSymDenseNoPiv error: %d entry in the factorization's diagonal\n" + "is exactly zero. Division by zero will occur if it a solve is attempted.\n", + info); + // matrix is singular return -1; } + } + assert(info == 0); + int negEigVal, posEigVal, nullEigVal; - if(nullEigVal>0) return -1; - return negEigVal; + if(!compute_inertia(N, posEigVal, negEigVal, nullEigVal)) { + return -1; } - bool hiopLinSolverSymDenseMagmaNopiv::compute_inertia(int n, - int& posEigvals, - int& negEigvals, - int& zeroEigvals) - { - assert(device_M_); + if(nullEigVal > 0) return -1; + return negEigVal; +} - // - // inertia - // - int info; - int *dinert, inert[3]; - if(MAGMA_SUCCESS != magma_malloc((void**)&dinert, 3*sizeof(int))) { - nlp_->log->printf(hovError, - "hiopLinSolverMagmaNopiv: error in allocating memory on the device " - "(MAGMA_ERR_INVALID_PTR).\n"); - return false; - - } +bool hiopLinSolverSymDenseMagmaNopiv::compute_inertia(int n, int& posEigvals, int& negEigvals, int& zeroEigvals) +{ + assert(device_M_); + + // + // inertia + // + int info; + int *dinert, inert[3]; + if(MAGMA_SUCCESS != magma_malloc((void**)&dinert, 3 * sizeof(int))) { + nlp_->log->printf(hovError, + "hiopLinSolverMagmaNopiv: error in allocating memory on the device " + "(MAGMA_ERR_INVALID_PTR).\n"); + return false; + } - info = magmablas_ddiinertia(n, device_M_, ldda_, dinert, magma_device_queue_); - if(MAGMA_SUCCESS != info) { - nlp_->log->printf(hovError, - "hiopLinSolverMagmaNopiv: magmablas_ddiinertia returned error %d [%s]\n", - info, magma_strerror(info)); - posEigvals = negEigvals = zeroEigvals = -1; - return false; - } - magma_getvector( 3, sizeof(int), dinert, 1, inert, 1, magma_device_queue_); - magma_free( dinert ); + info = magmablas_ddiinertia(n, device_M_, ldda_, dinert, magma_device_queue_); + if(MAGMA_SUCCESS != info) { + nlp_->log->printf(hovError, + "hiopLinSolverMagmaNopiv: magmablas_ddiinertia returned error %d [%s]\n", + info, + magma_strerror(info)); + posEigvals = negEigvals = zeroEigvals = -1; + return false; + } + magma_getvector(3, sizeof(int), dinert, 1, inert, 1, magma_device_queue_); + magma_free(dinert); - posEigvals = inert[0]; - negEigvals = inert[1]; - zeroEigvals = inert[2]; + posEigvals = inert[0]; + negEigvals = inert[1]; + zeroEigvals = inert[2]; - nlp_->log->printf(hovScalars, - "inertia: positive / negative / zero = %d / %d / %d\n", - inert[0], inert[1], inert[2]); + nlp_->log->printf(hovScalars, "inertia: positive / negative / zero = %d / %d / %d\n", inert[0], inert[1], inert[2]); + return true; +} + +bool hiopLinSolverSymDenseMagmaNopiv::solve(hiopVector& x) +{ + assert(M_->n() == M_->m()); + assert(x.get_size() == M_->n()); + int N = M_->n(); + int LDB = N; + if(N == 0) { return true; } - bool hiopLinSolverSymDenseMagmaNopiv::solve( hiopVector& x ) - { - assert(M_->n() == M_->m()); - assert(x.get_size()==M_->n()); - int N=M_->n(); - int LDB=N; - if(N==0) { - return true; - } + magma_int_t info; + magma_uplo_t uplo = MagmaLower; // M is upper in C++ so it's lower in fortran + magma_int_t NRHS = 1; + + std::string mem_space = nlp_->options->GetString("mem_space"); + if(mem_space == "default" || mem_space == "host") { + nlp_->runStats.linsolv.tmDeviceTransfer.start(); + magma_dsetmatrix(N, NRHS, x.local_data(), LDB, device_rhs_, lddb_, magma_device_queue_); + nlp_->runStats.linsolv.tmDeviceTransfer.stop(); + } else { + device_rhs_ = x.local_data(); + } - magma_int_t info; - magma_uplo_t uplo=MagmaLower; // M is upper in C++ so it's lower in fortran - magma_int_t NRHS=1; + nlp_->runStats.linsolv.tmTriuSolves.start(); - std::string mem_space = nlp_->options->GetString("mem_space"); - if(mem_space == "default" || mem_space == "host") { - nlp_->runStats.linsolv.tmDeviceTransfer.start(); - magma_dsetmatrix(N, NRHS, x.local_data(), LDB, device_rhs_, lddb_, magma_device_queue_); - nlp_->runStats.linsolv.tmDeviceTransfer.stop(); - } else { - device_rhs_ = x.local_data(); - } - - nlp_->runStats.linsolv.tmTriuSolves.start(); + assert(device_M_); + assert(device_rhs_); + // + // the call + // + // magma_dsysv_nopiv_gpu(uplo, N, NRHS, device_M_, ldda_, device_rhs_, lddb_, &info); + magma_dsytrs_nopiv_gpu(uplo, N, NRHS, device_M_, ldda_, device_rhs_, lddb_, &info); - assert(device_M_); - assert(device_rhs_); - // - // the call - // - //magma_dsysv_nopiv_gpu(uplo, N, NRHS, device_M_, ldda_, device_rhs_, lddb_, &info); - magma_dsytrs_nopiv_gpu(uplo, N, NRHS, device_M_, ldda_, device_rhs_, lddb_, &info); - - nlp_->runStats.linsolv.tmTriuSolves.stop(); + nlp_->runStats.linsolv.tmTriuSolves.stop(); - const double tflops = FLOPS_DPOTRS(N, NRHS) / 1e12; - nlp_->runStats.linsolv.flopsTriuSolves += tflops; - - if(info != 0) { - nlp_->log->printf(hovError, - "hiopLinSolverMagmaNopiv: dsytrs_nopiv_gpu returned error %d [%s]\n", - info, magma_strerror(info)); - return false; - } + const double tflops = FLOPS_DPOTRS(N, NRHS) / 1e12; + nlp_->runStats.linsolv.flopsTriuSolves += tflops; - if(mem_space == "default" || mem_space == "host") { - nlp_->runStats.linsolv.tmDeviceTransfer.start(); - magma_dgetmatrix(N, NRHS, device_rhs_, lddb_, x.local_data(), LDB, magma_device_queue_); - nlp_->runStats.linsolv.tmDeviceTransfer.stop(); - } + if(info != 0) { + nlp_->log->printf(hovError, + "hiopLinSolverMagmaNopiv: dsytrs_nopiv_gpu returned error %d [%s]\n", + info, + magma_strerror(info)); + return false; + } - return true; + if(mem_space == "default" || mem_space == "host") { + nlp_->runStats.linsolv.tmDeviceTransfer.start(); + magma_dgetmatrix(N, NRHS, device_rhs_, lddb_, x.local_data(), LDB, magma_device_queue_); + nlp_->runStats.linsolv.tmDeviceTransfer.stop(); } + return true; +} + #if 0 hiopLinSolverSymDenseMagmaBuKa_old2::hiopLinSolverSymDenseMagmaBuKa_old2(int n, hiopNlpFormulation* nlp_) : hiopLinSolverSymDense(n, nlp_), work_(NULL) @@ -606,6 +588,6 @@ namespace hiop return info==0; } -#endif //0 +#endif // 0 -} // end namespace +} // namespace hiop diff --git a/src/LinAlg/hiopLinSolverSymDenseMagma.hpp b/src/LinAlg/hiopLinSolverSymDenseMagma.hpp index a1129cd53..b0cc1f939 100644 --- a/src/LinAlg/hiopLinSolverSymDenseMagma.hpp +++ b/src/LinAlg/hiopLinSolverSymDenseMagma.hpp @@ -58,7 +58,6 @@ #include "hiopNlpFormulation.hpp" #include "hiopLinSolver.hpp" - #ifdef HIOP_USE_MAGMA #include "magma_v2.h" @@ -66,33 +65,33 @@ * */ -//these FLOPS counter are from MAGMA -#define FADDS_POTRF(n_) ((n_) * (((1. / 6.) * (n_) ) * (n_) - (1. / 6.))) -#define FADDS_POTRI(n_) ( (n_) * ((1. / 6.) + (n_) * ((1. / 3.) * (n_) - 0.5)) ) -#define FADDS_POTRS(n_, nrhs_) ((nrhs_) * (n_) * ((n_) - 1 )) +// these FLOPS counter are from MAGMA +#define FADDS_POTRF(n_) ((n_) * (((1. / 6.) * (n_)) * (n_) - (1. / 6.))) +#define FADDS_POTRI(n_) ((n_) * ((1. / 6.) + (n_) * ((1. / 3.) * (n_) - 0.5))) +#define FADDS_POTRS(n_, nrhs_) ((nrhs_) * (n_) * ((n_) - 1)) #define FMULS_POTRF(n_) ((n_) * (((1. / 6.) * (n_) + 0.5) * (n_) + (1. / 3.))) -#define FMULS_POTRI(n_) ( (n_) * ((2. / 3.) + (n_) * ((1. / 3.) * (n_) + 1. )) ) -#define FMULS_POTRS(n_, nrhs_) ((nrhs_) * (n_) * ((n_) + 1 )) -#define FLOPS_DPOTRF(n_) ( FMULS_POTRF((double)(n_)) + FADDS_POTRF((double)(n_)) ) -#define FLOPS_DPOTRI(n_) ( FMULS_POTRI((double)(n_)) + FADDS_POTRI((double)(n_)) ) -#define FLOPS_DPOTRS(n_, nrhs_) ( FMULS_POTRS((double)(n_), (double)(nrhs_)) + FADDS_POTRS((double)(n_), (double)(nrhs_)) ) - +#define FMULS_POTRI(n_) ((n_) * ((2. / 3.) + (n_) * ((1. / 3.) * (n_) + 1.))) +#define FMULS_POTRS(n_, nrhs_) ((nrhs_) * (n_) * ((n_) + 1)) +#define FLOPS_DPOTRF(n_) (FMULS_POTRF((double)(n_)) + FADDS_POTRF((double)(n_))) +#define FLOPS_DPOTRI(n_) (FMULS_POTRI((double)(n_)) + FADDS_POTRI((double)(n_))) +#define FLOPS_DPOTRS(n_, nrhs_) (FMULS_POTRS((double)(n_), (double)(nrhs_)) + FADDS_POTRS((double)(n_), (double)(nrhs_))) -namespace hiop { +namespace hiop +{ -/** - * Solver based on Magma's GPU interface for 'dsytrf' using Bunch-Kaufmann. This is +/** + * Solver based on Magma's GPU interface for 'dsytrf' using Bunch-Kaufmann. This is * a numerically stable factorization with decent peak performance on large matrices. - * + * * @note: The option "compute_mode" decides whether this class is instantiated (values - * "gpu" and "hybrid"); otherwise, for "cpu", the LAPACK-based counterpart linear solver - * class is used. However, this class works with all the values for "mem_space" option. - * Regardless of the value of the "mem_space" option (and when instructed by the - * "compute_mode" option mentioned above) this class will always perform the factorization - * and the triangular solves on the device. The host-device communication will be minimal - * (only scalars) for gpu compute mode; under hybrid compute mode, the class will offload + * "gpu" and "hybrid"); otherwise, for "cpu", the LAPACK-based counterpart linear solver + * class is used. However, this class works with all the values for "mem_space" option. + * Regardless of the value of the "mem_space" option (and when instructed by the + * "compute_mode" option mentioned above) this class will always perform the factorization + * and the triangular solves on the device. The host-device communication will be minimal + * (only scalars) for gpu compute mode; under hybrid compute mode, the class will offload * the matrix and rhs to the device. - * + * */ class hiopLinSolverSymDenseMagmaBuKa : public hiopLinSolverSymDense @@ -106,35 +105,34 @@ class hiopLinSolverSymDenseMagmaBuKa : public hiopLinSolverSymDense /** Solves a linear system. * param 'x' is on entry the right hand side(s) of the system to be solved. On - * exit is contains the solution(s). + * exit is contains the solution(s). */ bool solve(hiopVector& x_in); - inline hiopMatrixDense& sysMatrix() - { - return *M_; - } + inline hiopMatrixDense& sysMatrix() { return *M_; } + protected: /** - * Computes inertia of matrix, namely the triplet of non-negative numbers + * Computes inertia of matrix, namely the triplet of non-negative numbers * consisting of the counts of positive, negative, and null eigenvalues * - * @pre The system matrix is factorized and, as a result, `ipiv` has been + * @pre The system matrix is factorized and, as a result, `ipiv` has been * also updated properly. */ - virtual bool compute_inertia(int n, int *ipiv_in, int& posEig, int& negEig, int& zeroEig); + virtual bool compute_inertia(int n, int* ipiv_in, int& posEig, int& negEig, int& zeroEig); + protected: int* ipiv_; /// array storing the inertia (on the device pointer) - int* dinert_; + int* dinert_; magma_queue_t magma_device_queue_; magmaDouble_ptr device_M_, device_rhs_; magma_int_t ldda_, lddb_; + private: hiopLinSolverSymDenseMagmaBuKa() { assert(false); } }; - /** * Solver class for MAGMA symmetric indefinite GPU factorization "_nopiv". This * is as much as twice faster but numerically less stable than the above @@ -152,36 +150,32 @@ class hiopLinSolverSymDenseMagmaNopiv : public hiopLinSolverSymDense /** Triggers a refactorization of the matrix, if necessary. */ int matrixChanged(); - /** + /** * Solves a linear system with the right-hand side `x`. This is also an out * parameter and on exit it contains the solution. */ bool solve(hiopVector& x); - inline hiopMatrixDense& sysMatrix() - { - return *M_; - } + inline hiopMatrixDense& sysMatrix() { return *M_; } + protected: /** - * Computes inertia of matrix, namely the triplet of non-negative numbers + * Computes inertia of matrix, namely the triplet of non-negative numbers * of positive, negative, and null eigenvalues. This method runs on device and * accesses the device pointer(s). All the parameters reside on device. * * @pre The system matrix is factorized and is present on the device. - * + * */ - bool compute_inertia(int n, int& posEigvals, int& negEigvals, int& zeroEigvals); + bool compute_inertia(int n, int& posEigvals, int& negEigvals, int& zeroEigvals); protected: magma_queue_t magma_device_queue_; magmaDouble_ptr device_M_, device_rhs_; magma_int_t ldda_, lddb_; + private: - hiopLinSolverSymDenseMagmaNopiv() - { - assert(false); - } + hiopLinSolverSymDenseMagmaNopiv() { assert(false); } }; #if 0 @@ -234,10 +228,9 @@ class hiopLinSolverSymDenseMagmaBuKa_old2 : public hiopLinSolverSymDense hiopLinSolverSymDenseMagmaBuKa_old2() { assert(false); } }; -#endif //0 - +#endif // 0 -} //end namespace hiop +} // end namespace hiop -#endif //of HIOP_USE_MAGMA +#endif // of HIOP_USE_MAGMA #endif diff --git a/src/LinAlg/hiopLinSolverSymSparseMA57.cpp b/src/LinAlg/hiopLinSolverSymSparseMA57.cpp index 2d2a2b206..3663badec 100644 --- a/src/LinAlg/hiopLinSolverSymSparseMA57.cpp +++ b/src/LinAlg/hiopLinSolverSymSparseMA57.cpp @@ -4,260 +4,256 @@ namespace hiop { - hiopLinSolverSymSparseMA57::hiopLinSolverSymSparseMA57(const int& n, const int& nnz, hiopNlpFormulation* nlp) +hiopLinSolverSymSparseMA57::hiopLinSolverSymSparseMA57(const int& n, const int& nnz, hiopNlpFormulation* nlp) : hiopLinSolverSymSparse(n, nnz, nlp) - { - constructor_part(); - n_ = n; - nnz_ = nnz; - } +{ + constructor_part(); + n_ = n; + nnz_ = nnz; +} - hiopLinSolverSymSparseMA57::hiopLinSolverSymSparseMA57(hiopMatrixSparse* M, hiopNlpFormulation* nlp) +hiopLinSolverSymSparseMA57::hiopLinSolverSymSparseMA57(hiopMatrixSparse* M, hiopNlpFormulation* nlp) : hiopLinSolverSymSparse(M, nlp) - { - constructor_part(); - } - hiopLinSolverSymSparseMA57::~hiopLinSolverSymSparseMA57() - { - delete [] irowM_; - delete [] jcolM_; - delete [] ifact_; - delete [] fact_; - delete [] keep_; - delete [] iwork_; - delete [] dwork_; - delete resid_; - delete rhs_; - } +{ + constructor_part(); +} +hiopLinSolverSymSparseMA57::~hiopLinSolverSymSparseMA57() +{ + delete[] irowM_; + delete[] jcolM_; + delete[] ifact_; + delete[] fact_; + delete[] keep_; + delete[] iwork_; + delete[] dwork_; + delete resid_; + delete rhs_; +} + +void hiopLinSolverSymSparseMA57::constructor_part() +{ + irowM_ = nullptr; + jcolM_ = nullptr; + lifact_ = 0; + ifact_ = nullptr; + lfact_ = 0; + fact_ = nullptr; + lkeep_ = 0; + keep_ = nullptr; + iwork_ = nullptr; + dwork_ = nullptr; + ipessimism_ = 1.05; + rpessimism_ = 1.05; + n_ = 0; + nnz_ = 0; + rhs_ = nullptr; + resid_ = nullptr; + pivot_tol_ = 1e-8; + pivot_max_ = 1e-4; + pivot_changed_ = false; + + MA57ID(cntl_, icntl_); + + /* + * initialize MA57 parameters + */ + icntl_[1 - 1] = 0; // don't print warning messages + icntl_[2 - 1] = 0; // no Warning messages + icntl_[4 - 1] = 1; // no statistics messages + icntl_[5 - 1] = 0; // no Print messages. + icntl_[6 - 1] = 2; // 2 use MC47; + // 3 min degree ordering as in MA27; + // 4 use Metis; + // 5 automatic choice(MA47 or Metis); + icntl_[7 - 1] = 1; // Pivoting strategy. + icntl_[9 - 1] = 10; // up to 10 steps of iterative refinement + icntl_[11 - 1] = 16; + icntl_[12 - 1] = 16; + icntl_[15 - 1] = 0; + icntl_[16 - 1] = 0; + + cntl_[1 - 1] = pivot_tol_; // pivot tolerance +} + +void hiopLinSolverSymSparseMA57::firstCall() +{ + assert(n_ == M_->n() && M_->n() == M_->m()); + assert(nnz_ <= M_->numberOfNonzeros()); + assert(n_ > 0); - void hiopLinSolverSymSparseMA57::constructor_part() - { - irowM_ = nullptr; - jcolM_ = nullptr; - lifact_ = 0; - ifact_ = nullptr; - lfact_ = 0; - fact_ = nullptr; - lkeep_ = 0; - keep_ = nullptr; - iwork_ = nullptr; - dwork_ = nullptr; - ipessimism_ = 1.05; - rpessimism_ = 1.05; - n_ = 0; - nnz_ = 0; - rhs_ = nullptr; - resid_ = nullptr; - pivot_tol_ = 1e-8; - pivot_max_ = 1e-4; - pivot_changed_ = false; - - MA57ID( cntl_, icntl_ ); - - /* - * initialize MA57 parameters - */ - icntl_[1-1] = 0; // don't print warning messages - icntl_[2-1] = 0; // no Warning messages - icntl_[4-1] = 1; // no statistics messages - icntl_[5-1] = 0; // no Print messages. - icntl_[6-1] = 2; // 2 use MC47; - // 3 min degree ordering as in MA27; - // 4 use Metis; - // 5 automatic choice(MA47 or Metis); - icntl_[7-1] = 1; // Pivoting strategy. - icntl_[9-1] = 10; // up to 10 steps of iterative refinement - icntl_[11-1] = 16; - icntl_[12-1] = 16; - icntl_[15-1] = 0; - icntl_[16-1] = 0; - - cntl_[1-1] = pivot_tol_; // pivot tolerance - } + assert(nullptr == irowM_); - void hiopLinSolverSymSparseMA57::firstCall() - { - assert(n_==M_->n() && M_->n()==M_->m()); - assert(nnz_<=M_->numberOfNonzeros()); - assert(n_>0); - - assert(nullptr==irowM_); - - irowM_ = new int[nnz_]; - jcolM_ = new int[nnz_]; - - fill_triplet_index_arrays(); - - lkeep_ = ( nnz_ > n_ ) ? (5 * n_ + 2 * nnz_ + 42) : (6 * n_ + nnz_ + 42); - keep_ = new int[lkeep_]{0}; // Initialize to 0 as otherwise MA57ED can sometimes fail - - iwork_ = new int[5 * n_]; - dwork_ = new double[n_]; - - MA57AD( &n_, &nnz_, irowM_, jcolM_, &lkeep_, keep_, iwork_, icntl_, info_, rinfo_ ); - - lfact_ = (int) (rpessimism_ * info_[8]); - fact_ = new double[lfact_]; - - lifact_ = (int) (ipessimism_ * info_[9]); - ifact_ = new int[lifact_]; - } + irowM_ = new int[nnz_]; + jcolM_ = new int[nnz_]; + + fill_triplet_index_arrays(); + lkeep_ = (nnz_ > n_) ? (5 * n_ + 2 * nnz_ + 42) : (6 * n_ + nnz_ + 42); + keep_ = new int[lkeep_]{0}; // Initialize to 0 as otherwise MA57ED can sometimes fail - int hiopLinSolverSymSparseMA57::matrixChanged() - { - assert(n_==M_->n() && M_->n()==M_->m()); - assert(nnz_<=M_->numberOfNonzeros()); - assert(n_>0); + iwork_ = new int[5 * n_]; + dwork_ = new double[n_]; - nlp_->runStats.linsolv.tmFactTime.start(); + MA57AD(&n_, &nnz_, irowM_, jcolM_, &lkeep_, keep_, iwork_, icntl_, info_, rinfo_); - if(!keep_) { - this->firstCall(); - } + lfact_ = (int)(rpessimism_ * info_[8]); + fact_ = new double[lfact_]; - bool done{false}; - bool is_singular{false}; + lifact_ = (int)(ipessimism_ * info_[9]); + ifact_ = new int[lifact_]; +} - //get the triplet values array and copy the entries into it (different behavior for triplet and csr implementations) - double* Mvals = get_triplet_values_array(); - fill_triplet_values_array(Mvals); - - do { - MA57BD(&n_, &nnz_, Mvals, fact_, &lfact_, ifact_, &lifact_, &lkeep_, keep_, iwork_, icntl_, cntl_, info_, rinfo_ ); +int hiopLinSolverSymSparseMA57::matrixChanged() +{ + assert(n_ == M_->n() && M_->n() == M_->m()); + assert(nnz_ <= M_->numberOfNonzeros()); + assert(n_ > 0); - switch( info_[0] ) { - case 0: + nlp_->runStats.linsolv.tmFactTime.start(); + + if(!keep_) { + this->firstCall(); + } + + bool done{false}; + bool is_singular{false}; + + // get the triplet values array and copy the entries into it (different behavior for triplet and csr implementations) + double* Mvals = get_triplet_values_array(); + fill_triplet_values_array(Mvals); + + do { + MA57BD(&n_, &nnz_, Mvals, fact_, &lfact_, ifact_, &lifact_, &lkeep_, keep_, iwork_, icntl_, cntl_, info_, rinfo_); + + switch(info_[0]) { + case 0: + done = true; + break; + case -3: { + // Failure due to insufficient REAL space on a call to MA57B/BD + int ic{0}, intTemp{0}; + int lnfact = (int)(info_[16] * rpessimism_); + double* newfact = new double[lnfact]; + + MA57ED(&n_, &ic, keep_, fact_, &info_[1], newfact, &lnfact, ifact_, &info_[1], &intTemp, &lifact_, info_); + + delete[] fact_; + fact_ = newfact; + lfact_ = lnfact; + }; break; + case -4: { + // Failure due to insufficient INTEGER space on a call to MA57B/BD + int ic = 1; + int lnifact = (int)(info_[17] * ipessimism_); + int* nifact = new int[lnifact]; + MA57ED(&n_, &ic, keep_, fact_, &lfact_, fact_, &lfact_, ifact_, &lifact_, nifact, &lnifact, info_); + delete[] ifact_; + ifact_ = nifact; + lifact_ = lnifact; + }; break; + case 4: { + // Matrix is rank deficient on exit from MA57B/BD. + is_singular = true; + done = true; + }; break; + default: + if(info_[0] >= 0) { done = true; - break; - case -3: { - //Failure due to insufficient REAL space on a call to MA57B/BD - int ic{0}, intTemp{0}; - int lnfact = (int) (info_[16] * rpessimism_); - double * newfact = new double[lnfact]; - - MA57ED(&n_, &ic, keep_, fact_, &info_[1], newfact, &lnfact, ifact_, &info_[1], &intTemp, &lifact_, info_ ); - - delete [] fact_; - fact_ = newfact; - lfact_ = lnfact; - }; - break; - case -4: { - // Failure due to insufficient INTEGER space on a call to MA57B/BD - int ic = 1; - int lnifact = (int) (info_[17] * ipessimism_); - int * nifact = new int[ lnifact ]; - MA57ED(&n_, &ic, keep_, fact_, &lfact_, fact_, &lfact_, ifact_, &lifact_, nifact, &lnifact, info_ ); - delete [] ifact_; - ifact_ = nifact; - lifact_ = lnifact; - }; - break; - case 4: { - //Matrix is rank deficient on exit from MA57B/BD. - is_singular=true; - done=true; - }; - break; - default: - if(info_[0] >= 0) { - done = true; - } - assert( "unknown error!" && 0 ); - } // end switch - } while( !done ); - - nlp_->runStats.linsolv.tmFactTime.stop(); - nlp_->runStats.linsolv.tmInertiaComp.start(); - - int negEigVal{0}; - if(is_singular) { - negEigVal = -1; - } else { - negEigVal = info_[24-1]; - } - - nlp_->runStats.linsolv.tmInertiaComp.stop(); - - return negEigVal; + } + assert("unknown error!" && 0); + } // end switch + } while(!done); + + nlp_->runStats.linsolv.tmFactTime.stop(); + nlp_->runStats.linsolv.tmInertiaComp.start(); + + int negEigVal{0}; + if(is_singular) { + negEigVal = -1; + } else { + negEigVal = info_[24 - 1]; } - bool hiopLinSolverSymSparseMA57::solve(hiopVector& x_in) - { - assert(n_==M_->n() && M_->n()==M_->m()); - assert(nnz_<=M_->numberOfNonzeros()); - assert(n_>0); - assert(x_in.get_size()==M_->n()); - - nlp_->runStats.linsolv.tmTriuSolves.start(); - - int job = 1; // full solve - icntl_[9-1] = 1; // do one step of iterative refinement - - hiopVector* x = dynamic_cast(&x_in); - assert(x!=nullptr); - - if(nullptr==rhs_) { - rhs_ = dynamic_cast(x->new_copy()); - assert(rhs_); - assert(nullptr==resid_); - resid_ = dynamic_cast(x->new_copy()); - assert(resid_); - } else { - rhs_->copyFrom(*x); - resid_->copyFrom(*x); - } - - double* dx = x->local_data(); - double* drhs = rhs_->local_data(); - double* dresid = resid_->local_data(); - -// MA57CD( &job, &n_, fact_, &lfact_, ifact_, &lifact_, -// &one, drhs, &n_, dwork_, &n_, iwork_, icntl_, info_ ); -// x->copyFrom(*rhs_); - - // M_->M() for triplet or internal triplet values (values_) for CSR - double* Mvals = get_triplet_values_array(); - - MA57DD(&job, - &n_, - &nnz_, - Mvals, - irowM_, - jcolM_, - fact_, - &lfact_, - ifact_, - &lifact_, - drhs, - dx, - dresid, - dwork_, - iwork_, - icntl_, - cntl_, - info_, - rinfo_ ); - - if (info_[0]<0){ - nlp_->log->printf(hovError, "hiopLinSolverSymSparseMA57: MA57 returned error %d\n", info_[0]); - } else if(info_[0]>0) { - nlp_->log->printf(hovError, "hiopLinSolverSymSparseMA57: MA57 returned warning %d\n", info_[0]); - } - - nlp_->runStats.linsolv.tmTriuSolves.stop(); - - return info_[0]==0; + nlp_->runStats.linsolv.tmInertiaComp.stop(); + + return negEigVal; +} + +bool hiopLinSolverSymSparseMA57::solve(hiopVector& x_in) +{ + assert(n_ == M_->n() && M_->n() == M_->m()); + assert(nnz_ <= M_->numberOfNonzeros()); + assert(n_ > 0); + assert(x_in.get_size() == M_->n()); + + nlp_->runStats.linsolv.tmTriuSolves.start(); + + int job = 1; // full solve + icntl_[9 - 1] = 1; // do one step of iterative refinement + + hiopVector* x = dynamic_cast(&x_in); + assert(x != nullptr); + + if(nullptr == rhs_) { + rhs_ = dynamic_cast(x->new_copy()); + assert(rhs_); + assert(nullptr == resid_); + resid_ = dynamic_cast(x->new_copy()); + assert(resid_); + } else { + rhs_->copyFrom(*x); + resid_->copyFrom(*x); } - bool hiopLinSolverSymSparseMA57::increase_pivot_tol() - { - pivot_changed_ = false; - if(pivot_tol_ < pivot_max_) { - pivot_tol_ = fmin(pivot_max_, pow(pivot_tol_, 0.75)); - pivot_changed_ = true; - } - return pivot_changed_; + double* dx = x->local_data(); + double* drhs = rhs_->local_data(); + double* dresid = resid_->local_data(); + + // MA57CD( &job, &n_, fact_, &lfact_, ifact_, &lifact_, + // &one, drhs, &n_, dwork_, &n_, iwork_, icntl_, info_ ); + // x->copyFrom(*rhs_); + + // M_->M() for triplet or internal triplet values (values_) for CSR + double* Mvals = get_triplet_values_array(); + + MA57DD(&job, + &n_, + &nnz_, + Mvals, + irowM_, + jcolM_, + fact_, + &lfact_, + ifact_, + &lifact_, + drhs, + dx, + dresid, + dwork_, + iwork_, + icntl_, + cntl_, + info_, + rinfo_); + + if(info_[0] < 0) { + nlp_->log->printf(hovError, "hiopLinSolverSymSparseMA57: MA57 returned error %d\n", info_[0]); + } else if(info_[0] > 0) { + nlp_->log->printf(hovError, "hiopLinSolverSymSparseMA57: MA57 returned warning %d\n", info_[0]); + } + + nlp_->runStats.linsolv.tmTriuSolves.stop(); + + return info_[0] == 0; +} + +bool hiopLinSolverSymSparseMA57::increase_pivot_tol() +{ + pivot_changed_ = false; + if(pivot_tol_ < pivot_max_) { + pivot_tol_ = fmin(pivot_max_, pow(pivot_tol_, 0.75)); + pivot_changed_ = true; } -} //end namespace hiop + return pivot_changed_; +} +} // end namespace hiop diff --git a/src/LinAlg/hiopLinSolverSymSparseMA57.hpp b/src/LinAlg/hiopLinSolverSymSparseMA57.hpp index af5a55128..a8a2a00bd 100644 --- a/src/LinAlg/hiopLinSolverSymSparseMA57.hpp +++ b/src/LinAlg/hiopLinSolverSymSparseMA57.hpp @@ -54,82 +54,127 @@ #include "hiopMatrixSparseCSRSeq.hpp" #include "FortranCInterface.hpp" -#define MA57ID FC_GLOBAL(ma57id, MA57ID) -#define MA57AD FC_GLOBAL(ma57ad, MA57AD) -#define MA57BD FC_GLOBAL(ma57bd, MA57BD) -#define MA57CD FC_GLOBAL(ma57cd, MA57CD) -#define MA57DD FC_GLOBAL(ma57dd, MA57DD) -#define MA57ED FC_GLOBAL(ma57ed, MA57ED) +#define MA57ID FC_GLOBAL(ma57id, MA57ID) +#define MA57AD FC_GLOBAL(ma57ad, MA57AD) +#define MA57BD FC_GLOBAL(ma57bd, MA57BD) +#define MA57CD FC_GLOBAL(ma57cd, MA57CD) +#define MA57DD FC_GLOBAL(ma57dd, MA57DD) +#define MA57ED FC_GLOBAL(ma57ed, MA57ED) /** implements the linear solver class using the HSL MA57 solver * * @ingroup LinearSolvers */ -namespace hiop { +namespace hiop +{ extern "C" { - void MA57ID( double cntl[], int icntl[] ); - - void MA57AD( int * n, int * ne, int irn[], - int jcn[], int * lkeep, int keep[], - int iwork[], int icntl[], int info[], - double rinfo[] ); - - void MA57BD( int * n, int * ne, double a[], - double fact[], int * lfact, int ifact[], - int * lifact, int * lkeep, int keep[], - int ppos[], int * icntl, double cntl[], - int info[], double rinfo[] ); - void MA57CD( int * job, int * n, double fact[], - int * lfact, int ifact[], int * lifact, - int * nrhs, double rhs[], int * lrhs, - double w[], int * lw, int iw1[], - int icntl[], int info[]); - void MA57DD( int * job, int * n, int * ne, - double a[], int irn[], int jcn[], - double fact[], int * lfact, int ifact[], - int * lifact, double rhs[], double x[], - double resid[], double w[], int iw[], - int icntl[], double cntl[], int info[], - double rinfo[] ); - void MA57ED( int * n, int * ic, int keep[], - double fact[], int * lfact, double * newfac, - int * lnew, int ifact[], int * lifact, - int newifc[], int * linew, int * info ); +void MA57ID(double cntl[], int icntl[]); + +void MA57AD(int* n, + int* ne, + int irn[], + int jcn[], + int* lkeep, + int keep[], + int iwork[], + int icntl[], + int info[], + double rinfo[]); + +void MA57BD(int* n, + int* ne, + double a[], + double fact[], + int* lfact, + int ifact[], + int* lifact, + int* lkeep, + int keep[], + int ppos[], + int* icntl, + double cntl[], + int info[], + double rinfo[]); +void MA57CD(int* job, + int* n, + double fact[], + int* lfact, + int ifact[], + int* lifact, + int* nrhs, + double rhs[], + int* lrhs, + double w[], + int* lw, + int iw1[], + int icntl[], + int info[]); +void MA57DD(int* job, + int* n, + int* ne, + double a[], + int irn[], + int jcn[], + double fact[], + int* lfact, + int ifact[], + int* lifact, + double rhs[], + double x[], + double resid[], + double w[], + int iw[], + int icntl[], + double cntl[], + int info[], + double rinfo[]); +void MA57ED(int* n, + int* ic, + int keep[], + double fact[], + int* lfact, + double* newfac, + int* lnew, + int ifact[], + int* lifact, + int newifc[], + int* linew, + int* info); } - -/** +/** * Wrapper class for using MA57 solver to solve symmetric sparse indefinite KKT linearizations. - * - * This class uses a triplet sparse matrix (member `M_`) to store the KKT linear system. This matrix + * + * This class uses a triplet sparse matrix (member `M_`) to store the KKT linear system. This matrix * is populated by the KKT linsys classes. -*/ -class hiopLinSolverSymSparseMA57: public hiopLinSolverSymSparse + */ +class hiopLinSolverSymSparseMA57 : public hiopLinSolverSymSparse { public: /// Constructor that allocates and ownes the system matrix hiopLinSolverSymSparseMA57(const int& n, const int& nnz, hiopNlpFormulation* nlp); /** - * Constructor that does not create nor owns the system matrix. Used by specializations of this + * Constructor that does not create nor owns the system matrix. Used by specializations of this * class that takes CSR matrix as input. */ hiopLinSolverSymSparseMA57(hiopMatrixSparse* M, hiopNlpFormulation* nlp); virtual ~hiopLinSolverSymSparseMA57(); + protected: - hiopLinSolverSymSparseMA57()=delete; + hiopLinSolverSymSparseMA57() = delete; /// Method holding the code common to the constructors. Initializes MA57 global parameters void constructor_part(); + public: - /** Triggers a refactorization of the matrix, if necessary. * Overload from base class. */ int matrixChanged(); - /** + /** * Solves a linear system. * @param x is on entry the right hand side(s) of the system to be solved. On * exit is contains the solution(s). */ @@ -137,146 +182,138 @@ class hiopLinSolverSymSparseMA57: public hiopLinSolverSymSparse protected: /** - * Fill `irowM_` and `jcolM_` by copying row and col indexes from the member matrix `M_`. Overridden by + * Fill `irowM_` and `jcolM_` by copying row and col indexes from the member matrix `M_`. Overridden by * specialized classes, such as the one that takes as input a CSR matrix. - * - * Note: the indexes should be only for the lower or only for the upper triangular part, as per MA57 + * + * Note: the indexes should be only for the lower or only for the upper triangular part, as per MA57 * requirement. Also, the indexes should be 1-based. */ virtual void fill_triplet_index_arrays() { assert(nnz_ == M_->numberOfNonzeros()); - for(int k=0; ki_row()[k]+1; - jcolM_[k] = M_->j_col()[k]+1; - } + for(int k = 0; k < nnz_; k++) { + irowM_[k] = M_->i_row()[k] + 1; + jcolM_[k] = M_->j_col()[k] + 1; + } } /// Return the pointer to array of triplet values that should be passed to MA57 - virtual double* get_triplet_values_array() - { - return M_->M(); - } - + virtual double* get_triplet_values_array() { return M_->M(); } + /// Fill the array passed as argument with the triplet nonzeros referred to by `irowM_` and `jcolM_` virtual void fill_triplet_values_array(double* values_triplet) { // no fill is required for this base "triplet" implementation since the triplet array is already populated assert(dynamic_cast(M_)); - assert(M_->M() == values_triplet); //pointers should coincide + assert(M_->M() == values_triplet); // pointers should coincide } - + protected: - int icntl_[20]; - int info_[40]; - double cntl_[5]; - double rinfo_[20]; + int icntl_[20]; + int info_[40]; + double cntl_[5]; + double rinfo_[20]; - int n_; // dimension of the whole matrix - int nnz_; // number of nonzeros in the matrix + int n_; // dimension of the whole matrix + int nnz_; // number of nonzeros in the matrix /// row indexes used by the factorization int* irowM_; - + /// col indexes used by the factorization - int* jcolM_; + int* jcolM_; // note: the values array is reused (from the sys matrix) - int lkeep_; // temporary storage - int* keep_; // temporary storage - int lifact_; // temporary storage - int* ifact_; // temporary storage - int lfact_; // temporary storage for the factorization process - double* fact_; // storage for the factors - double ipessimism_; // amounts by which to increase allocated factorization space - double rpessimism_; // amounts by which to increase allocated factorization space + int lkeep_; // temporary storage + int* keep_; // temporary storage + int lifact_; // temporary storage + int* ifact_; // temporary storage + int lfact_; // temporary storage for the factorization process + double* fact_; // storage for the factors + double ipessimism_; // amounts by which to increase allocated factorization space + double rpessimism_; // amounts by which to increase allocated factorization space int* iwork_; double* dwork_; - /// Right-hand side working array + /// Right-hand side working array hiopVector* rhs_; - /// Working array used for residual computation + /// Working array used for residual computation hiopVector* resid_; - + /// parameters to control pivoting double pivot_tol_; double pivot_max_; bool pivot_changed_; public: - - /** - * Called the very first time a matrix is factorized, this method allocates space for the - * factorization and performs ordering. + /** + * Called the very first time a matrix is factorized, this method allocates space for the + * factorization and performs ordering. */ virtual void firstCall(); // increase pivot tolarence virtual bool increase_pivot_tol(); +}; - }; - -/** - * MA57 solver class that takes CSR sparse input and offers the boilerplate to copy this into the internal - * triplet matrix used with MA57 API. - * - * The CSR matrix is understood to be symmetric. The underlying CSR storage can contain all the nonzero entries - * or only the lower triangular part. In both cases, this class will copy ONLY the lower triangular entries to +/** + * MA57 solver class that takes CSR sparse input and offers the boilerplate to copy this into the internal + * triplet matrix used with MA57 API. + * + * The CSR matrix is understood to be symmetric. The underlying CSR storage can contain all the nonzero entries + * or only the lower triangular part. In both cases, this class will copy ONLY the lower triangular entries to * the underlying triplet storage. - */ + */ class hiopLinSolverSparseCsrMa57 : public hiopLinSolverSymSparseMA57 { public: - /// Constructor that takes a CSR matrix as input. + /// Constructor that takes a CSR matrix as input. hiopLinSolverSparseCsrMa57(hiopMatrixSparseCSRSeq* csr_in, hiopNlpFormulation* nlp_in) - : hiopLinSolverSymSparseMA57(csr_in, nlp_in), //csr input pointer not owned - values_(nullptr) + : hiopLinSolverSymSparseMA57(csr_in, nlp_in), // csr input pointer not owned + values_(nullptr) { - //count nnz for the lower triangle in the csr input + // count nnz for the lower triangle in the csr input index_type* i_rowptr = M_->i_row(); index_type* j_colidx = M_->j_col(); n_ = M_->m(); - + nnz_ = 0; - for(int r=0; r=j_colidx[itnz]) { + for(int r = 0; r < n_; ++r) { + for(int itnz = i_rowptr[r]; itnz < i_rowptr[r + 1]; ++itnz) { + if(r >= j_colidx[itnz]) { nnz_++; } } } values_ = new double[nnz_]; } - - virtual ~hiopLinSolverSparseCsrMa57() - { - delete[] values_; - } - + + virtual ~hiopLinSolverSparseCsrMa57() { delete[] values_; } + protected: hiopLinSolverSparseCsrMa57() = delete; /** * Fill `irowM_` and `jcolM_` by copying row and col indexes from the CSR matrix `mat_csr_`. - * - * Note: the indexes should be only for the lower or only for the upper triangular part, as per MA57 + * + * Note: the indexes should be only for the lower or only for the upper triangular part, as per MA57 * requirement. Also, the indexes should be 1-based. */ virtual void fill_triplet_index_arrays() { - assert(n_==M_->m()); - assert(nnz_<=M_->numberOfNonzeros()); + assert(n_ == M_->m()); + assert(nnz_ <= M_->numberOfNonzeros()); index_type* i_rowptr = M_->i_row(); index_type* j_colidx = M_->j_col(); #ifdef HIOP_DEEPCHECKS bool is_upper_tri = true; - for(index_type r=0; rj_colidx[itnz]) { + for(index_type r = 0; r < n_ && is_upper_tri; ++r) { + for(index_type itnz = i_rowptr[r]; itnz < i_rowptr[r + 1]; ++itnz) { + if(r > j_colidx[itnz]) { is_upper_tri = false; break; } @@ -289,33 +326,33 @@ class hiopLinSolverSparseCsrMa57 : public hiopLinSolverSymSparseMA57 } #endif index_type nnz_triplet = 0; - - for(index_type r=0; r=j_colidx[itnz]) { - irowM_[nnz_triplet] = r+1; - jcolM_[nnz_triplet] = j_colidx[itnz]+1; + + for(index_type r = 0; r < n_; ++r) { + for(index_type itnz = i_rowptr[r]; itnz < i_rowptr[r + 1]; ++itnz) { + if(r >= j_colidx[itnz]) { + irowM_[nnz_triplet] = r + 1; + jcolM_[nnz_triplet] = j_colidx[itnz] + 1; nnz_triplet++; } } } assert(nnz_ == nnz_triplet); } - + /// Fill the array passed as argument with nonzeros corresponding to triplet entries from `irowM_` and `jcolM_` virtual void fill_triplet_values_array(double* values_triplet) { - assert(values_ == values_triplet); //pointers should coincide - //copy lower triangular elements from M_ (which is CSR) to values_ - assert(n_==M_->m()); - assert(nnz_<=M_->numberOfNonzeros()); + assert(values_ == values_triplet); // pointers should coincide + // copy lower triangular elements from M_ (which is CSR) to values_ + assert(n_ == M_->m()); + assert(nnz_ <= M_->numberOfNonzeros()); const index_type* i_rowptr = M_->i_row(); const index_type* j_colidx = M_->j_col(); const double* Mvals = M_->M(); index_type nnz_triplet = 0; - for(index_type r=0; r=j_colidx[itnz]) { + for(index_type r = 0; r < n_; ++r) { + for(index_type itnz = i_rowptr[r]; itnz < i_rowptr[r + 1]; ++itnz) { + if(r >= j_colidx[itnz]) { values_triplet[nnz_triplet++] = Mvals[itnz]; } } @@ -324,15 +361,11 @@ class hiopLinSolverSparseCsrMa57 : public hiopLinSolverSymSparseMA57 } /// Return the pointer to array of triplet values that should be passed to MA57 - virtual double* get_triplet_values_array() - { - return values_; - } + virtual double* get_triplet_values_array() { return values_; } - protected: double* values_; }; -} // end namespace +} // namespace hiop #endif diff --git a/src/LinAlg/hiopLinSolverUMFPACKZ.cpp b/src/LinAlg/hiopLinSolverUMFPACKZ.cpp index 8ef8c94e9..22863c06b 100644 --- a/src/LinAlg/hiopLinSolverUMFPACKZ.cpp +++ b/src/LinAlg/hiopLinSolverUMFPACKZ.cpp @@ -2,282 +2,285 @@ namespace hiop { - hiopLinSolverUMFPACKZ::hiopLinSolverUMFPACKZ(hiopMatrixComplexSparseTriplet& sysmat, - hiopNlpFormulation* nlp_/*=NULL*/) - : m_symbolic(nullptr), +hiopLinSolverUMFPACKZ::hiopLinSolverUMFPACKZ(hiopMatrixComplexSparseTriplet& sysmat, hiopNlpFormulation* nlp_ /*=NULL*/) + : m_symbolic(nullptr), m_numeric(nullptr), m_null(nullptr), nlp(nlp_), sys_mat(sysmat) - { - n = sys_mat.n(); - nnz = sys_mat.numberOfNonzeros(); +{ + n = sys_mat.n(); + nnz = sys_mat.numberOfNonzeros(); - m_colptr = new int[n+1]; - m_rowidx = new int[nnz]; - m_vals = new double[2*nnz]; + m_colptr = new int[n + 1]; + m_rowidx = new int[nnz]; + m_vals = new double[2 * nnz]; - // - // initialize UMFPACK control - // - //get the default control parameters - umfpack_zi_defaults(m_control); + // + // initialize UMFPACK control + // + // get the default control parameters + umfpack_zi_defaults(m_control); - //change the default controls - //m_control[UMFPACK_PRL] = 4; //printing/verbosity + // change the default controls + // m_control[UMFPACK_PRL] = 4; //printing/verbosity - // print the control parameters - umfpack_zi_report_control(m_control); + // print the control parameters + umfpack_zi_report_control(m_control); - //others: [UMFPACK_STRATEGY], [UMFPACK_ORDERING] + // others: [UMFPACK_STRATEGY], [UMFPACK_ORDERING] - // - // m_info needs no initialization - // + // + // m_info needs no initialization + // +} +hiopLinSolverUMFPACKZ::~hiopLinSolverUMFPACKZ() +{ + if(m_symbolic) { + umfpack_di_free_symbolic(&m_symbolic); + m_symbolic = NULL; } - hiopLinSolverUMFPACKZ::~hiopLinSolverUMFPACKZ() - { - if(m_symbolic) { - umfpack_di_free_symbolic(&m_symbolic); - m_symbolic = NULL; - } - if(m_numeric) { - umfpack_di_free_numeric(&m_numeric) ; - m_numeric = NULL; - } - - delete[] m_colptr; - delete[] m_rowidx; - delete[] m_vals; - //delete[] m_valsim; + if(m_numeric) { + umfpack_di_free_numeric(&m_numeric); + m_numeric = NULL; } - - int hiopLinSolverUMFPACKZ::matrixChanged() - { - assert(n==sys_mat.n()); - assert(nnz == sys_mat.numberOfNonzeros()); - //UMFPACK does not handle zero-dimensioned arrays - if(n==0) return 0; - int status; - - // - // copy from sys_mat triplets to UMFPACK's column form sparse format - // - const int* irow = sys_mat.storage()->i_row(); - const int* jcol = sys_mat.storage()->j_col(); - const std::complex* M = sys_mat.storage()->M(); - - //Note: sys_mat is ordered on (i,j) (first on i and then on j) - //but we'll just use the umfpack's conversion routine - - // oh boy - { - //double Aval[nnz], Avalz[nnz]; - //for(int i=0; i(M); - - //for(int it=0;it<10; it++) - //printf("[%d,%d]=%g+%g*i\n", irow[it], jcol[it], Aval[2*it], Aval[2*it+1]); - //printf("n=%d nnz=%d\n", n, nnz); - //printf("begin-------------------------------------------------\n"); - //umfpack_zi_report_triplet(n, n, nnz, irow, jcol, Aval, NULL, m_control); - //printf("end -------------------------------------------------\n"); - - // activate the so-called "packed" complex form by passing Avalz=NULL and - // Avals with real and imaginary interleaved - //Note that complex interleaves real with imag (as per C++ standard) - double* Avalz = NULL; - status = umfpack_zi_triplet_to_col(n, n, nnz, - irow, jcol, Aval, Avalz, - m_colptr, m_rowidx, m_vals, (double*) NULL, (int*) NULL); - if(status<0) { - umfpack_zi_report_status (m_control, status); - printf("umfpack_zi_triplet_to_col failed\n"); - return -1; - } - // print the column-form of A - //printf ("\nA: "); - //umfpack_zi_report_matrix (n, n, m_colptr, m_rowidx, m_vals, (double*) NULL, 1, m_control) ; - } - - status = umfpack_zi_symbolic(n, n, m_colptr, m_rowidx, m_vals, (double*) NULL, - &m_symbolic, m_control, m_info); - if(status<0) { - //printf("[start]report info on symbolic factorization\n"); - umfpack_zi_report_info (m_control, m_info); - //printf("[done ]report info on symbolic factorization\n"); - - umfpack_zi_report_status (m_control, status); - printf("UMFPACK: error in the symbolic factorization: status=%d\n", status); - return -1; - } - //umfpack_zi_report_symbolic (m_symbolic, m_control) ; - - status = umfpack_zi_numeric(m_colptr, m_rowidx, m_vals, (double*) NULL, - m_symbolic, &m_numeric, m_control, m_info); - if(status<0) { - umfpack_zi_report_info (m_control, m_info) ; - umfpack_zi_report_status (m_control, status) ; - printf("[%d] UMFPACK: error in the numeric factorization: status=%d\n", - UMFPACK_ERROR_n_nonpositive, status); - return -1; - } - // print the numeric factorization - //printf ("\nNumeric factorization of A: ") ; - //(void) umfpack_zi_report_numeric (Numeric, Control) ; - - return 0; - } + delete[] m_colptr; + delete[] m_rowidx; + delete[] m_vals; + // delete[] m_valsim; +} + +int hiopLinSolverUMFPACKZ::matrixChanged() +{ + assert(n == sys_mat.n()); + assert(nnz == sys_mat.numberOfNonzeros()); + // UMFPACK does not handle zero-dimensioned arrays + if(n == 0) return 0; + int status; + + // + // copy from sys_mat triplets to UMFPACK's column form sparse format + // + const int* irow = sys_mat.storage()->i_row(); + const int* jcol = sys_mat.storage()->j_col(); + const std::complex* M = sys_mat.storage()->M(); - bool hiopLinSolverUMFPACKZ::solve(const std::complex* rhs_in, std::complex* x) + // Note: sys_mat is ordered on (i,j) (first on i and then on j) + // but we'll just use the umfpack's conversion routine + + // oh boy { - const double* rhs = reinterpret_cast(rhs_in); - double* sol = reinterpret_cast(x); - int status = umfpack_zi_solve(UMFPACK_A, m_colptr, m_rowidx, m_vals, (double*) NULL, - sol, (double*) NULL, - rhs, (double*) NULL, - m_numeric, m_control, m_info); - if(status<0) { - umfpack_zi_report_info(m_control, m_info); + // double Aval[nnz], Avalz[nnz]; + // for(int i=0; i(M); + + // for(int it=0;it<10; it++) + // printf("[%d,%d]=%g+%g*i\n", irow[it], jcol[it], Aval[2*it], Aval[2*it+1]); + // printf("n=%d nnz=%d\n", n, nnz); + // printf("begin-------------------------------------------------\n"); + // umfpack_zi_report_triplet(n, n, nnz, irow, jcol, Aval, NULL, m_control); + // printf("end -------------------------------------------------\n"); + + // activate the so-called "packed" complex form by passing Avalz=NULL and + // Avals with real and imaginary interleaved + // Note that complex interleaves real with imag (as per C++ standard) + double* Avalz = NULL; + status = + umfpack_zi_triplet_to_col(n, n, nnz, irow, jcol, Aval, Avalz, m_colptr, m_rowidx, m_vals, (double*)NULL, (int*)NULL); + if(status < 0) { umfpack_zi_report_status(m_control, status); - printf("umfpack_zi_solve failed\n"); - return false; + printf("umfpack_zi_triplet_to_col failed\n"); + return -1; } - return true; - - //norm of residual - //double resnrm = resid_abs_norm(n, m_colptr, m_rowidx, m_vals, sol, rhs); - //printf("solve %d -> abs resid abs nrm: %g\n", col_current, resnrm); + // print the column-form of A + // printf ("\nA: "); + // umfpack_zi_report_matrix (n, n, m_colptr, m_rowidx, m_vals, (double*) NULL, 1, m_control) ; } - - bool hiopLinSolverUMFPACKZ::solve(hiopVector& x) - { - assert(false && "not yet implemented"); //not needed; also there is no complex vector at this point - return true; + + status = umfpack_zi_symbolic(n, n, m_colptr, m_rowidx, m_vals, (double*)NULL, &m_symbolic, m_control, m_info); + if(status < 0) { + // printf("[start]report info on symbolic factorization\n"); + umfpack_zi_report_info(m_control, m_info); + // printf("[done ]report info on symbolic factorization\n"); + + umfpack_zi_report_status(m_control, status); + printf("UMFPACK: error in the symbolic factorization: status=%d\n", status); + return -1; } + // umfpack_zi_report_symbolic (m_symbolic, m_control) ; - bool hiopLinSolverUMFPACKZ::solve(hiopMatrix& X) - { - assert(false && "not yet implemented"); //not needed; - return true; + status = umfpack_zi_numeric(m_colptr, m_rowidx, m_vals, (double*)NULL, m_symbolic, &m_numeric, m_control, m_info); + if(status < 0) { + umfpack_zi_report_info(m_control, m_info); + umfpack_zi_report_status(m_control, status); + printf("[%d] UMFPACK: error in the numeric factorization: status=%d\n", UMFPACK_ERROR_n_nonpositive, status); + return -1; } - - bool hiopLinSolverUMFPACKZ::solve(const hiopMatrixComplexSparseTriplet& B, hiopMatrixComplexDense& X) - { - assert(X.n()==B.n()); - assert(n==B.m()); - assert(n==X.m()); - - if(n==0) return true; - - int nrhs = X.n(); - if(0==nrhs) return true; - - const int* B_irow = B.storage()->i_row(); - const int* B_jcol = B.storage()->j_col(); - const auto*B_M = B.storage()->M(); - const int B_nnz = B.numberOfNonzeros(); - std::complex** X_M = X.get_M(); - - double rhs[2*n]; - double sol[2*n]; - - // Columns of B need to be copied into the rhs array. - // B is triplet format, ordered after rows then after cols. - // To avoid scanning B for each rhs / column of B, we keep indexes (array - // of size n) of each of (i, col) of the last seen column 'col' in B_irow and B_jcol - - int idxsB_col[n]; - idxsB_col[0]=0; - - int status; - for(int col_current=0; col_current abs resid abs nrm: %g\n", col_current, resnrm); - - //copy to X - for(int row=0; row(sol[2*row], sol[2*row+1]); - } - } //end of for loop over columns - return true; - // printf ("\nx (solution of Ax=b): ") ; - // (void) umfpack_zi_report_vector (n, x, xz, Control) ; - // rnorm = resid (FALSE, Ap, Ai, Ax, Az) ; - // printf ("maxnorm of residual: %g\n\n", rnorm) ; +bool hiopLinSolverUMFPACKZ::solve(const std::complex* rhs_in, std::complex* x) +{ + const double* rhs = reinterpret_cast(rhs_in); + double* sol = reinterpret_cast(x); + int status = umfpack_zi_solve(UMFPACK_A, + m_colptr, + m_rowidx, + m_vals, + (double*)NULL, + sol, + (double*)NULL, + rhs, + (double*)NULL, + m_numeric, + m_control, + m_info); + if(status < 0) { + umfpack_zi_report_info(m_control, m_info); + umfpack_zi_report_status(m_control, status); + printf("umfpack_zi_solve failed\n"); + return false; } + return true; - double hiopLinSolverUMFPACKZ::resid_abs_norm(int n, int* Ap, int* Ai, double* Ax/*packed*/, - double* x, double* b) - { - double resid[2*n]; - - for(int i=0; i<2*n; i++) resid[i]=-b[i]; - int i; - for(int j=0; j abs resid abs nrm: %g\n", col_current, resnrm); +} + +bool hiopLinSolverUMFPACKZ::solve(hiopVector& x) +{ + assert(false && "not yet implemented"); // not needed; also there is no complex vector at this point + return true; +} + +bool hiopLinSolverUMFPACKZ::solve(hiopMatrix& X) +{ + assert(false && "not yet implemented"); // not needed; + return true; +} + +bool hiopLinSolverUMFPACKZ::solve(const hiopMatrixComplexSparseTriplet& B, hiopMatrixComplexDense& X) +{ + assert(X.n() == B.n()); + assert(n == B.m()); + assert(n == X.m()); + + if(n == 0) return true; + + int nrhs = X.n(); + if(0 == nrhs) return true; + + const int* B_irow = B.storage()->i_row(); + const int* B_jcol = B.storage()->j_col(); + const auto* B_M = B.storage()->M(); + const int B_nnz = B.numberOfNonzeros(); + std::complex** X_M = X.get_M(); + + double rhs[2 * n]; + double sol[2 * n]; + + // Columns of B need to be copied into the rhs array. + // B is triplet format, ordered after rows then after cols. + // To avoid scanning B for each rhs / column of B, we keep indexes (array + // of size n) of each of (i, col) of the last seen column 'col' in B_irow and B_jcol + + int idxsB_col[n]; + idxsB_col[0] = 0; + + int status; + for(int col_current = 0; col_current < nrhs; col_current++) { + // update idxB_col + for(int row = 0; row < n; row++) { + if(row != 0) idxsB_col[row] = idxsB_col[row - 1]; + + assert(idxsB_col[row] <= B_nnz); + // skip all elems in previous rows + while(idxsB_col[row] < B_nnz && B_irow[idxsB_col[row]] < row) { + idxsB_col[row]++; + } + // skip elems in current row till 'col_current' is found or an higher column + // is found, which means elem at (row,col_current) is 0.0 + while(idxsB_col[row] < B_nnz && B_irow[idxsB_col[row]] == row && B_jcol[idxsB_col[row]] < col_current) { + idxsB_col[row]++; } + assert(idxsB_col[row] <= B_nnz); + + if(idxsB_col[row] < B_nnz && B_irow[idxsB_col[row]] == row && B_jcol[idxsB_col[row]] == col_current) { + rhs[2 * row] = B_M[idxsB_col[row]].real(); + rhs[2 * row + 1] = B_M[idxsB_col[row]].imag(); + } else { + rhs[2 * row] = rhs[2 * row + 1] = 0.; + } + } + + // solve for rhs. NULL pointers mean we work with packed complex arrays (re and imag + // are interleaved contiguously) + status = umfpack_zi_solve(UMFPACK_A, + m_colptr, + m_rowidx, + m_vals, + (double*)NULL, + sol, + (double*)NULL, + rhs, + (double*)NULL, + m_numeric, + m_control, + m_info); + if(status < 0) { + umfpack_zi_report_info(m_control, m_info); + umfpack_zi_report_status(m_control, status); + printf("eumfpack_zi_solve failed for rhs=%d", col_current); + return false; + } + + // norm of residual + // double resnrm = resid_abs_norm(n, m_colptr, m_rowidx, m_vals, sol, rhs); + // printf("solve %d -> abs resid abs nrm: %g\n", col_current, resnrm); + + // copy to X + for(int row = 0; row < n; row++) { + X_M[row][col_current] = std::complex(sol[2 * row], sol[2 * row + 1]); } + } // end of for loop over columns + return true; + // printf ("\nx (solution of Ax=b): ") ; + // (void) umfpack_zi_report_vector (n, x, xz, Control) ; + // rnorm = resid (FALSE, Ap, Ai, Ax, Az) ; + // printf ("maxnorm of residual: %g\n\n", rnorm) ; +} + +double hiopLinSolverUMFPACKZ::resid_abs_norm(int n, int* Ap, int* Ai, double* Ax /*packed*/, double* x, double* b) +{ + double resid[2 * n]; - char chnorm='M'; - int M=1, N=n, LDA=1; - return ZLANGE(&chnorm, &M, &N, reinterpret_cast(resid), &LDA, NULL); + for(int i = 0; i < 2 * n; i++) resid[i] = -b[i]; + int i; + for(int j = 0; j < n; j++) { + for(int p = Ap[j]; p < Ap[j + 1]; p++) { + i = Ai[p]; + resid[2 * i] += Ax[2 * p] * x[2 * j]; + resid[2 * i] -= Ax[2 * p + 1] * x[2 * j + 1]; + + resid[2 * i + 1] += Ax[2 * p + 1] * x[2 * j]; + resid[2 * i + 1] += Ax[2 * p] * x[2 * j + 1]; + } } - -} //end namespace hiop + + char chnorm = 'M'; + int M = 1, N = n, LDA = 1; + return ZLANGE(&chnorm, &M, &N, reinterpret_cast(resid), &LDA, NULL); +} + +} // end namespace hiop diff --git a/src/LinAlg/hiopLinSolverUMFPACKZ.hpp b/src/LinAlg/hiopLinSolverUMFPACKZ.hpp index fd233e6ff..97a44dea4 100644 --- a/src/LinAlg/hiopLinSolverUMFPACKZ.hpp +++ b/src/LinAlg/hiopLinSolverUMFPACKZ.hpp @@ -3,47 +3,47 @@ // Written by Cosmin G. Petra, petra1@llnl.gov. // LLNL-CODE-742473. All rights reserved. // -// This file is part of HiOp. For details, see https://github.com/LLNL/hiop. HiOp -// is released under the BSD 3-clause license (https://opensource.org/licenses/BSD-3-Clause). +// This file is part of HiOp. For details, see https://github.com/LLNL/hiop. HiOp +// is released under the BSD 3-clause license (https://opensource.org/licenses/BSD-3-Clause). // Please also read “Additional BSD Notice” below. // -// Redistribution and use in source and binary forms, with or without modification, +// Redistribution and use in source and binary forms, with or without modification, // are permitted provided that the following conditions are met: -// i. Redistributions of source code must retain the above copyright notice, this list +// i. Redistributions of source code must retain the above copyright notice, this list // of conditions and the disclaimer below. -// ii. Redistributions in binary form must reproduce the above copyright notice, -// this list of conditions and the disclaimer (as noted below) in the documentation and/or +// ii. Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the disclaimer (as noted below) in the documentation and/or // other materials provided with the distribution. -// iii. Neither the name of the LLNS/LLNL nor the names of its contributors may be used to -// endorse or promote products derived from this software without specific prior written +// iii. Neither the name of the LLNS/LLNL nor the names of its contributors may be used to +// endorse or promote products derived from this software without specific prior written // permission. // -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES -// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT -// SHALL LAWRENCE LIVERMORE NATIONAL SECURITY, LLC, THE U.S. DEPARTMENT OF ENERGY OR -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS -// OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED -// AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT +// SHALL LAWRENCE LIVERMORE NATIONAL SECURITY, LLC, THE U.S. DEPARTMENT OF ENERGY OR +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +// OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED +// AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, // EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Additional BSD Notice -// 1. This notice is required to be provided under our contract with the U.S. Department -// of Energy (DOE). This work was produced at Lawrence Livermore National Laboratory under +// 1. This notice is required to be provided under our contract with the U.S. Department +// of Energy (DOE). This work was produced at Lawrence Livermore National Laboratory under // Contract No. DE-AC52-07NA27344 with the DOE. -// 2. Neither the United States Government nor Lawrence Livermore National Security, LLC -// nor any of their employees, makes any warranty, express or implied, or assumes any -// liability or responsibility for the accuracy, completeness, or usefulness of any +// 2. Neither the United States Government nor Lawrence Livermore National Security, LLC +// nor any of their employees, makes any warranty, express or implied, or assumes any +// liability or responsibility for the accuracy, completeness, or usefulness of any // information, apparatus, product, or process disclosed, or represents that its use would // not infringe privately-owned rights. -// 3. Also, reference herein to any specific commercial products, process, or services by -// trade name, trademark, manufacturer or otherwise does not necessarily constitute or -// imply its endorsement, recommendation, or favoring by the United States Government or -// Lawrence Livermore National Security, LLC. The views and opinions of authors expressed -// herein do not necessarily state or reflect those of the United States Government or -// Lawrence Livermore National Security, LLC, and shall not be used for advertising or +// 3. Also, reference herein to any specific commercial products, process, or services by +// trade name, trademark, manufacturer or otherwise does not necessarily constitute or +// imply its endorsement, recommendation, or favoring by the United States Government or +// Lawrence Livermore National Security, LLC. The views and opinions of authors expressed +// herein do not necessarily state or reflect those of the United States Government or +// Lawrence Livermore National Security, LLC, and shall not be used for advertising or // product endorsement purposes. #ifndef HIOP_LINSOLVER_UMFPACKZ @@ -58,52 +58,50 @@ namespace hiop { - /* - Note: the following methods of hiopLinSolver are NOT - implemented in this class: - - solve (hiopVector) - - solve (hiopMatrix) - */ - class hiopLinSolverUMFPACKZ : public hiopLinSolver - { - public: - hiopLinSolverUMFPACKZ(hiopMatrixComplexSparseTriplet& sysmat, hiopNlpFormulation* nlp_=NULL); - virtual ~hiopLinSolverUMFPACKZ(); - - /** Triggers a refactorization of the matrix, if necessary. - * Returns -1 if trouble in factorization is encountered. */ - virtual int matrixChanged(); - - /** solves a linear system. - * param 'x' is on entry the right hand side(s) of the system to be solved. On - * exit is contains the solution(s). */ - virtual bool solve(hiopVector& x); - virtual bool solve(hiopMatrix& X); - virtual bool solve(const hiopMatrixComplexSparseTriplet& B, hiopMatrixComplexDense& X); +/* + Note: the following methods of hiopLinSolver are NOT + implemented in this class: + - solve (hiopVector) + - solve (hiopMatrix) +*/ +class hiopLinSolverUMFPACKZ : public hiopLinSolver +{ +public: + hiopLinSolverUMFPACKZ(hiopMatrixComplexSparseTriplet& sysmat, hiopNlpFormulation* nlp_ = NULL); + virtual ~hiopLinSolverUMFPACKZ(); - /** same as above but right-side and solution are separated */ - virtual bool solve(const std::complex* rhs, std::complex* x); - private: - void* m_symbolic; - void* m_numeric; - double* m_null; + /** Triggers a refactorization of the matrix, if necessary. + * Returns -1 if trouble in factorization is encountered. */ + virtual int matrixChanged(); - hiopNlpFormulation* nlp; - - int *m_colptr, *m_rowidx; - double *m_vals; //size 2*nnz !!! - const hiopMatrixComplexSparseTriplet& sys_mat; - int n, nnz; + /** solves a linear system. + * param 'x' is on entry the right hand side(s) of the system to be solved. On + * exit is contains the solution(s). */ + virtual bool solve(hiopVector& x); + virtual bool solve(hiopMatrix& X); + virtual bool solve(const hiopMatrixComplexSparseTriplet& B, hiopMatrixComplexDense& X); - double m_control [UMFPACK_CONTROL], m_info [UMFPACK_INFO]; + /** same as above but right-side and solution are separated */ + virtual bool solve(const std::complex* rhs, std::complex* x); - private: - //returns the "abs" norm of the residual A*x-b - double resid_abs_norm(int n, int* Ap, int* Ai, double* Ax/*packed*/, - double* x/*packed*/, - double* b/*packed*/); - }; -} //end namespace hiop +private: + void* m_symbolic; + void* m_numeric; + double* m_null; -#endif + hiopNlpFormulation* nlp; + + int *m_colptr, *m_rowidx; + double* m_vals; // size 2*nnz !!! + const hiopMatrixComplexSparseTriplet& sys_mat; + int n, nnz; + double m_control[UMFPACK_CONTROL], m_info[UMFPACK_INFO]; + +private: + // returns the "abs" norm of the residual A*x-b + double resid_abs_norm(int n, int* Ap, int* Ai, double* Ax /*packed*/, double* x /*packed*/, double* b /*packed*/); +}; +} // end namespace hiop + +#endif diff --git a/src/LinAlg/hiopLinearOperator.cpp b/src/LinAlg/hiopLinearOperator.cpp index cf900b941..9fae6bd2e 100644 --- a/src/LinAlg/hiopLinearOperator.cpp +++ b/src/LinAlg/hiopLinearOperator.cpp @@ -47,11 +47,11 @@ // product endorsement purposes. /* implements the linear solver class using the PARDISO solver -* @file hiopLinearOperator.cpp -* @ingroup LinearSolvers -* @author Nai-Yuan Chiang , LLNL -* @author Cosmin G. Petra , LLNL -*/ + * @file hiopLinearOperator.cpp + * @ingroup LinearSolvers + * @author Nai-Yuan Chiang , LLNL + * @author Cosmin G. Petra , LLNL + */ #include "hiopVector.hpp" #include "hiopMatrix.hpp" @@ -64,36 +64,34 @@ namespace hiop { /********************************************************************** - * hiopMatVecOpr implementation - **********************************************************************/ - hiopMatVecOpr::hiopMatVecOpr(hiopMatrix* mat) + * hiopMatVecOpr implementation + **********************************************************************/ +hiopMatVecOpr::hiopMatVecOpr(hiopMatrix* mat) : mMat_(mat) - { - } +{} - bool hiopMatVecOpr::times_vec(hiopVector& y, const hiopVector& x) - { - if(mMat_) { - assert(x.get_local_size() == mMat_->n()); - assert(y.get_local_size() == mMat_->m()); - mMat_->timesVec(0.0, y, 1.0, x); - } else { - y.copyFrom(x); - } - return true; +bool hiopMatVecOpr::times_vec(hiopVector& y, const hiopVector& x) +{ + if(mMat_) { + assert(x.get_local_size() == mMat_->n()); + assert(y.get_local_size() == mMat_->m()); + mMat_->timesVec(0.0, y, 1.0, x); + } else { + y.copyFrom(x); } + return true; +} - bool hiopMatVecOpr::trans_times_vec(hiopVector& y, const hiopVector& x) - { - if(mMat_) { - assert(x.get_local_size() == mMat_->m()); - assert(y.get_local_size() == mMat_->n()); - mMat_->transTimesVec(0.0, y, 1.0, x); - } else { - y.copyFrom(x); - } - return true; +bool hiopMatVecOpr::trans_times_vec(hiopVector& y, const hiopVector& x) +{ + if(mMat_) { + assert(x.get_local_size() == mMat_->m()); + assert(y.get_local_size() == mMat_->n()); + mMat_->transTimesVec(0.0, y, 1.0, x); + } else { + y.copyFrom(x); } + return true; +} -}; - +}; // namespace hiop diff --git a/src/LinAlg/hiopLinearOperator.hpp b/src/LinAlg/hiopLinearOperator.hpp index 6bb8c3b91..6203ac8d5 100644 --- a/src/LinAlg/hiopLinearOperator.hpp +++ b/src/LinAlg/hiopLinearOperator.hpp @@ -47,11 +47,11 @@ // product endorsement purposes. /* implements the linear solver class using the PARDISO solver -* @file hiopLinearOperator.hpp -* @ingroup LinearSolvers -* @author Nai-Yuan Chiang , LLNL -* @author Cosmin G. Petra , LLNL -*/ + * @file hiopLinearOperator.hpp + * @ingroup LinearSolvers + * @author Nai-Yuan Chiang , LLNL + * @author Cosmin G. Petra , LLNL + */ #ifndef HIOP_MATVECOPR #define HIOP_MATVECOPR @@ -66,14 +66,14 @@ namespace hiop class hiopKKTLinSys; /** - * The abstract interface to a mat-vec operation required by + * The abstract interface to a mat-vec operation required by * the iterative solvers. */ class hiopLinearOperator { public: - hiopLinearOperator(){}; - virtual ~hiopLinearOperator(){}; + hiopLinearOperator() {}; + virtual ~hiopLinearOperator() {}; /** y = Mat * x */ virtual bool times_vec(hiopVector& y, const hiopVector& x) = 0; @@ -82,11 +82,12 @@ class hiopLinearOperator virtual bool trans_times_vec(hiopVector& y, const hiopVector& x) = 0; }; -/** +/** * An implementation of the abstract class @hiopLinearOperator that performs a mat-vec operation * with both the matrix and vector being on the same processor. */ -class hiopMatVecOpr : public hiopLinearOperator { +class hiopMatVecOpr : public hiopLinearOperator +{ public: hiopMatVecOpr(hiopMatrix* mat); virtual ~hiopMatVecOpr() {}; @@ -101,6 +102,6 @@ class hiopMatVecOpr : public hiopLinearOperator { hiopMatrix* mMat_; }; -}; +}; // namespace hiop #endif diff --git a/src/LinAlg/hiopMatrix.hpp b/src/LinAlg/hiopMatrix.hpp index 6b9f543f9..3b7f97dcc 100644 --- a/src/LinAlg/hiopMatrix.hpp +++ b/src/LinAlg/hiopMatrix.hpp @@ -3,47 +3,47 @@ // Written by Cosmin G. Petra, petra1@llnl.gov and Juraj Kardos // LLNL-CODE-742473. All rights reserved. // -// This file is part of HiOp. For details, see https://github.com/LLNL/hiop. HiOp -// is released under the BSD 3-clause license (https://opensource.org/licenses/BSD-3-Clause). +// This file is part of HiOp. For details, see https://github.com/LLNL/hiop. HiOp +// is released under the BSD 3-clause license (https://opensource.org/licenses/BSD-3-Clause). // Please also read “Additional BSD Notice” below. // -// Redistribution and use in source and binary forms, with or without modification, +// Redistribution and use in source and binary forms, with or without modification, // are permitted provided that the following conditions are met: -// i. Redistributions of source code must retain the above copyright notice, this list +// i. Redistributions of source code must retain the above copyright notice, this list // of conditions and the disclaimer below. -// ii. Redistributions in binary form must reproduce the above copyright notice, -// this list of conditions and the disclaimer (as noted below) in the documentation and/or +// ii. Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the disclaimer (as noted below) in the documentation and/or // other materials provided with the distribution. -// iii. Neither the name of the LLNS/LLNL nor the names of its contributors may be used to -// endorse or promote products derived from this software without specific prior written +// iii. Neither the name of the LLNS/LLNL nor the names of its contributors may be used to +// endorse or promote products derived from this software without specific prior written // permission. // -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES -// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT -// SHALL LAWRENCE LIVERMORE NATIONAL SECURITY, LLC, THE U.S. DEPARTMENT OF ENERGY OR -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS -// OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED -// AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT +// SHALL LAWRENCE LIVERMORE NATIONAL SECURITY, LLC, THE U.S. DEPARTMENT OF ENERGY OR +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +// OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED +// AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, // EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Additional BSD Notice -// 1. This notice is required to be provided under our contract with the U.S. Department -// of Energy (DOE). This work was produced at Lawrence Livermore National Laboratory under +// 1. This notice is required to be provided under our contract with the U.S. Department +// of Energy (DOE). This work was produced at Lawrence Livermore National Laboratory under // Contract No. DE-AC52-07NA27344 with the DOE. -// 2. Neither the United States Government nor Lawrence Livermore National Security, LLC -// nor any of their employees, makes any warranty, express or implied, or assumes any -// liability or responsibility for the accuracy, completeness, or usefulness of any +// 2. Neither the United States Government nor Lawrence Livermore National Security, LLC +// nor any of their employees, makes any warranty, express or implied, or assumes any +// liability or responsibility for the accuracy, completeness, or usefulness of any // information, apparatus, product, or process disclosed, or represents that its use would // not infringe privately-owned rights. -// 3. Also, reference herein to any specific commercial products, process, or services by -// trade name, trademark, manufacturer or otherwise does not necessarily constitute or -// imply its endorsement, recommendation, or favoring by the United States Government or -// Lawrence Livermore National Security, LLC. The views and opinions of authors expressed -// herein do not necessarily state or reflect those of the United States Government or -// Lawrence Livermore National Security, LLC, and shall not be used for advertising or +// 3. Also, reference herein to any specific commercial products, process, or services by +// trade name, trademark, manufacturer or otherwise does not necessarily constitute or +// imply its endorsement, recommendation, or favoring by the United States Government or +// Lawrence Livermore National Security, LLC. The views and opinions of authors expressed +// herein do not necessarily state or reflect those of the United States Government or +// Lawrence Livermore National Security, LLC, and shall not be used for advertising or // product endorsement purposes. #ifndef HIOP_MATRIX @@ -59,62 +59,62 @@ class hiopVector; class hiopVectorPar; class hiopMatrixDense; -/* See readme.md for some conventions on matrices */ +/* See readme.md for some conventions on matrices */ class hiopMatrix { public: hiopMatrix() {} virtual ~hiopMatrix() {} - virtual hiopMatrix* alloc_clone() const=0; - virtual hiopMatrix* new_copy() const=0; + virtual hiopMatrix* alloc_clone() const = 0; + virtual hiopMatrix* new_copy() const = 0; - virtual void setToZero()=0; - virtual void setToConstant(double c)=0; + virtual void setToZero() = 0; + virtual void setToConstant(double c) = 0; /// @brief y = beta * y + alpha * this * x - virtual void timesVec(double beta, hiopVector& y, - double alpha, const hiopVector& x ) const = 0; + virtual void timesVec(double beta, hiopVector& y, double alpha, const hiopVector& x) const = 0; /// @brief y = beta * y + alpha * this^T * x - virtual void transTimesVec(double beta, hiopVector& y, - double alpha, const hiopVector& x ) const = 0; + virtual void transTimesVec(double beta, hiopVector& y, double alpha, const hiopVector& x) const = 0; /// @brief W = beta*W + alpha*this*X virtual void timesMat(double beta, hiopMatrix& W, double alpha, const hiopMatrix& X) const = 0; /// @brief W = beta*W + alpha*this^T*X - virtual void transTimesMat(double beta, hiopMatrix& W, double alpha, const hiopMatrix& X) const =0; + virtual void transTimesMat(double beta, hiopMatrix& W, double alpha, const hiopMatrix& X) const = 0; /// @brief W = beta*W + alpha*this*X^T - virtual void timesMatTrans(double beta, hiopMatrix& W, double alpha, const hiopMatrix& X) const =0; + virtual void timesMatTrans(double beta, hiopMatrix& W, double alpha, const hiopMatrix& X) const = 0; /// @brief this += alpha * (sub)diag virtual void addDiagonal(const double& alpha, const hiopVector& d_) = 0; virtual void addDiagonal(const double& value) = 0; - + /** - * @brief subdigonal(this) += alpha*d + * @brief subdigonal(this) += alpha*d * - * Adds elements of 'd' to the diagonal of 'this' starting at 'start_on_dest_diag'. + * Adds elements of 'd' to the diagonal of 'this' starting at 'start_on_dest_diag'. * Precondition: start_on_dest_diag + length(d) <= n_local_ - * + * * @pre _this_ is local/non-distributed */ virtual void addSubDiagonal(const double& alpha, index_type start_on_dest_diag, const hiopVector& d) = 0; - /** + /** * @brief subdigonal(this) += alpha*d * * Adds to the diagonal of 'this' (destination) starting at 'start_on_dest_diag' elements of - * 'd' (source) starting at index 'start_on_src_vec'. The number of elements added is 'num_elems' + * 'd' (source) starting at index 'start_on_src_vec'. The number of elements added is 'num_elems' * if num_elems>=0, otherwise the remaining elems in 'd' starting at 'start_on_src_vec'. * * @pre _this_ is local/non-distributed */ virtual void addSubDiagonal(int start_on_dest_diag, - const double& alpha, const hiopVector& d, - int start_on_src_vec, int num_elems=-1) = 0; + const double& alpha, + const hiopVector& d, + int start_on_src_vec, + int num_elems = -1) = 0; - /** + /** * @brief subdiagonal(this) += c * * Adds the constant @param c to the diagonal starting at @param start_on_dest_diag @@ -122,9 +122,8 @@ class hiopMatrix * * @pre _this_ is local/non-distributed */ - virtual void addSubDiagonal(int start_on_dest_diag, int num_elems, - const double& c) = 0; - + virtual void addSubDiagonal(int start_on_dest_diag, int num_elems, const double& c) = 0; + /// @brief this += alpha*X virtual void addMatrix(double alpha, const hiopMatrix& X) = 0; @@ -136,20 +135,22 @@ class hiopMatrix * The functionality of this method is needed only for general (non-symmetric) matrices and, for this * reason, only general matrices classes implement/need to implement this method. * - * @pre transpose of 'this' fits in the upper triangle of W + * @pre transpose of 'this' fits in the upper triangle of W * @pre W.n() == W.m() * @pre 'this' and W are local/non-distributed matrices */ - virtual void transAddToSymDenseMatrixUpperTriangle(int row_dest_start, int col_dest_start, - double alpha, hiopMatrixDense& W) const = 0; + virtual void transAddToSymDenseMatrixUpperTriangle(int row_dest_start, + int col_dest_start, + double alpha, + hiopMatrixDense& W) const = 0; /** * @brief diagonal block of W += alpha*this with 'diag_start' indicating the diagonal entry of W where * 'this' should start to contribute to. - * + * * For efficiency, only upper triangle of W is updated since this will be eventually sent to LAPACK * and only the upper triangle of 'this' is accessed. - * + * * This functionality of this method is needed only for symmetric matrices and, for this reason, * only symmetric matrices classes implement/need to implement it. * @@ -157,52 +158,50 @@ class hiopMatrix * @pre W.n() == W.m() * @pre 'this' and W are local/non-distributed matrices */ - virtual void addUpperTriangleToSymDenseMatrixUpperTriangle(int diag_start, - double alpha, - hiopMatrixDense& W) const = 0; + virtual void addUpperTriangleToSymDenseMatrixUpperTriangle(int diag_start, double alpha, hiopMatrixDense& W) const = 0; /** * @brief Copy 'n_rows' rows specified by 'rows_idxs' (array of size 'n_rows') from 'src' to 'this' - * + * * @pre 'this' has exactly 'n_rows' rows * @pre 'src' and 'this' must have same number of columns * @pre number of rows in 'src' must be at least the number of rows in 'this' */ virtual void copyRowsFrom(const hiopMatrix& src, const index_type* rows_idxs, size_type n_rows) = 0; - + virtual double max_abs_value() = 0; /** - * @brief Find the maximum absolute value in each row of `this` matrix, and return them in `ret_vec` - * - * @pre 'ret_vec' has exactly same number of rows as `this` matrix - */ - virtual void row_max_abs_value(hiopVector &ret_vec) = 0; + * @brief Find the maximum absolute value in each row of `this` matrix, and return them in `ret_vec` + * + * @pre 'ret_vec' has exactly same number of rows as `this` matrix + */ + virtual void row_max_abs_value(hiopVector& ret_vec) = 0; /** - * @brief Scale each row of `this` matrix, according to the component of `ret_vec`. - * - * if inv_scale=false: - * this[i] = ret_vec[i]*this[i] - * else - * this[i] = (1/ret_vec[i])*this[i] - * - * @pre 'ret_vec' has exactly same number of rows as `this` matrix - */ - virtual void scale_row(hiopVector &vec_scal, const bool inv_scale) = 0; + * @brief Scale each row of `this` matrix, according to the component of `ret_vec`. + * + * if inv_scale=false: + * this[i] = ret_vec[i]*this[i] + * else + * this[i] = (1/ret_vec[i])*this[i] + * + * @pre 'ret_vec' has exactly same number of rows as `this` matrix + */ + virtual void scale_row(hiopVector& vec_scal, const bool inv_scale) = 0; /** @brief return false is any of the entry is a nan, inf, or denormalized */ virtual bool isfinite() const = 0; - + /** * @brief call with -1 to print all rows, all columns, or on all ranks; otherwise will * will print the first rows and/or columns on the specified rank. - * - * If the underlying matrix is sparse, maxCols is ignored and a max number elements + * + * If the underlying matrix is sparse, maxCols is ignored and a max number elements * given by the value of 'maxRows' will be printed. If this value is negative, all * elements will be printed. */ - virtual void print(FILE* f=NULL, const char* msg=NULL, int maxRows=-1, int maxCols=-1, int rank=-1) const = 0; + virtual void print(FILE* f = NULL, const char* msg = NULL, int maxRows = -1, int maxCols = -1, int rank = -1) const = 0; /// @brief number of rows virtual size_type m() const = 0; @@ -210,15 +209,15 @@ class hiopMatrix /// @brief number of columns virtual size_type n() const = 0; #ifdef HIOP_DEEPCHECKS - /** + /** * @brief Checks symmetry for locally/non-distributed matrices: returns true if the absolute difference * (i,j) and (j,i) entries is less than @param tol, otherwise return false and assert(false) * * For distributed matrices, this function returns false (and assert(false)). */ - virtual bool assertSymmetry(double tol=1e-16) const = 0; + virtual bool assertSymmetry(double tol = 1e-16) const = 0; #endif }; -} +} // namespace hiop #endif diff --git a/src/LinAlg/hiopMatrixComplexDense.cpp b/src/LinAlg/hiopMatrixComplexDense.cpp index ec3dbea25..48646ddc3 100644 --- a/src/LinAlg/hiopMatrixComplexDense.cpp +++ b/src/LinAlg/hiopMatrixComplexDense.cpp @@ -6,315 +6,321 @@ namespace hiop { - hiopMatrixComplexDense::hiopMatrixComplexDense(const size_type& m, - const size_type& glob_n, - index_type* col_part/*=NULL*/, - MPI_Comm comm/*=MPI_COMM_SELF*/, - const size_type& m_max_alloc/*=-1*/) - { - m_local_=m; n_global_=glob_n; - comm_=comm; - int P=0; - if(col_part) { +hiopMatrixComplexDense::hiopMatrixComplexDense(const size_type& m, + const size_type& glob_n, + index_type* col_part /*=NULL*/, + MPI_Comm comm /*=MPI_COMM_SELF*/, + const size_type& m_max_alloc /*=-1*/) +{ + m_local_ = m; + n_global_ = glob_n; + comm_ = comm; + int P = 0; + if(col_part) { #ifdef HIOP_USE_MPI - int ierr=MPI_Comm_rank(comm_, &P); assert(ierr==MPI_SUCCESS); + int ierr = MPI_Comm_rank(comm_, &P); + assert(ierr == MPI_SUCCESS); #endif - glob_jl_=col_part[P]; glob_ju_=col_part[P+1]; - } else { - glob_jl_=0; glob_ju_=n_global_; - } - n_local_=glob_ju_-glob_jl_; - - myrank_ = P; - - max_rows_=m_max_alloc; - if(max_rows_==-1) max_rows_=m_local_; - assert(max_rows_>=m_local_ && - "the requested extra allocation is smaller than the allocation needed by the matrix"); - - M=new std::complex*[max_rows_==0?1:max_rows_]; - M[0] = max_rows_==0?NULL:new std::complex[max_rows_*n_local_]; - for(int i=1; i*[max_rows_==0?1:max_rows_]; - - M[0] = max_rows_==0?NULL:new std::complex[max_rows_*n_local_]; - - for(int i=1; i)); - } - } - - void hiopMatrixComplexDense::copyFrom(const std::complex* buffer) - { - if(NULL==buffer) { - M[0] = NULL; - } else { - memcpy(M[0], buffer, m_local_*n_local_*sizeof(std::complex)); - } - } + myrank_ = P; - void hiopMatrixComplexDense::copyRowsFrom(const hiopMatrix& src_gen, - const index_type* rows_idxs, - size_type n_rows) - { - const hiopMatrixComplexDense& src = dynamic_cast(src_gen); - assert(n_global_==src.n_global_); - assert(n_local_==src.n_local_); - assert(n_rows<=src.m_local_); - assert(n_rows == m_local_); - - // todo //! opt - copy multiple consecutive rows at once ?!? - - //int i should suffice for this container - for(int i=0; i)); - } - } + max_rows_ = m_max_alloc; + if(max_rows_ == -1) max_rows_ = m_local_; + assert(max_rows_ >= m_local_ && "the requested extra allocation is smaller than the allocation needed by the matrix"); + + M = new std::complex*[max_rows_ == 0 ? 1 : max_rows_]; + M[0] = max_rows_ == 0 ? NULL : new std::complex[max_rows_ * n_local_]; + for(int i = 1; i < max_rows_; i++) M[i] = M[0] + i * n_local_; + + //! valgrind reports a shit load of errors without this; check this + for(int i = 0; i < max_rows_ * n_local_; i++) M[0][i] = 0.0; - void hiopMatrixComplexDense::setToZero() - { - setToConstant(0.0); + // internal buffers + buff_mxnlocal_ = NULL; +} +hiopMatrixComplexDense::~hiopMatrixComplexDense() +{ + if(buff_mxnlocal_) delete[] buff_mxnlocal_; + if(M) { + if(M[0]) delete[] M[0]; + delete[] M; } - void hiopMatrixComplexDense::setToConstant(double c) - { - std::complex cc=c; - setToConstant(cc); +} + +hiopMatrixComplexDense::hiopMatrixComplexDense(const hiopMatrixComplexDense& dm) +{ + n_local_ = dm.n_local_; + m_local_ = dm.m_local_; + n_global_ = dm.n_global_; + glob_jl_ = dm.glob_jl_; + glob_ju_ = dm.glob_ju_; + comm_ = dm.comm_; + myrank_ = dm.myrank_; + + // M=new double*[m_local_==0?1:m_local_]; + max_rows_ = dm.max_rows_; + M = new std::complex*[max_rows_ == 0 ? 1 : max_rows_]; + + M[0] = max_rows_ == 0 ? NULL : new std::complex[max_rows_ * n_local_]; + + for(int i = 1; i < max_rows_; i++) M[i] = M[0] + i * n_local_; + + buff_mxnlocal_ = NULL; +} + +void hiopMatrixComplexDense::copyFrom(const hiopMatrixComplexDense& dm) +{ + assert(n_local_ == dm.n_local_); + assert(m_local_ == dm.m_local_); + assert(n_global_ == dm.n_global_); + assert(glob_jl_ == dm.glob_jl_); + assert(glob_ju_ == dm.glob_ju_); + if(NULL == dm.M[0]) { + M[0] = NULL; + } else { + memcpy(M[0], dm.M[0], m_local_ * n_local_ * sizeof(std::complex)); } - void hiopMatrixComplexDense::setToConstant(std::complex& c) - { - auto buf=M[0]; - //! optimization needed -> use zcopy if exists - for(int j=0; j* buffer) +{ + if(NULL == buffer) { + M[0] = NULL; + } else { + memcpy(M[0], buffer, m_local_ * n_local_ * sizeof(std::complex)); } +} + +void hiopMatrixComplexDense::copyRowsFrom(const hiopMatrix& src_gen, const index_type* rows_idxs, size_type n_rows) +{ + const hiopMatrixComplexDense& src = dynamic_cast(src_gen); + assert(n_global_ == src.n_global_); + assert(n_local_ == src.n_local_); + assert(n_rows <= src.m_local_); + assert(n_rows == m_local_); - void hiopMatrixComplexDense::negate() - { - auto buf=M[0]; - for(int j=0; j)); } +} + +void hiopMatrixComplexDense::setToZero() { setToConstant(0.0); } +void hiopMatrixComplexDense::setToConstant(double c) +{ + std::complex cc = c; + setToConstant(cc); +} +void hiopMatrixComplexDense::setToConstant(std::complex& c) +{ + auto buf = M[0]; + //! optimization needed -> use zcopy if exists + for(int j = 0; j < n_local_ * m_local_; j++) *(buf++) = c; +} + +void hiopMatrixComplexDense::negate() +{ + auto buf = M[0]; + for(int j = 0; j < n_local_ * m_local_; j++) buf[j] = -buf[j]; +} + +void hiopMatrixComplexDense::timesVec(std::complex beta_in, + std::complex* ya_, + std::complex alpha_in, + const std::complex* xa_in) const +{ + char fortranTrans = 'T'; + int MM = m_local_, NN = n_local_, incx_y = 1; + + dcomplex beta; + beta.re = beta_in.real(); + beta.im = beta_in.imag(); + + dcomplex alpha; + alpha.re = alpha_in.real(); + alpha.im = alpha_in.imag(); - void hiopMatrixComplexDense::timesVec(std::complex beta_in, - std::complex* ya_, - std::complex alpha_in, - const std::complex* xa_in) const - { - char fortranTrans='T'; - int MM=m_local_, NN=n_local_, incx_y=1; - - dcomplex beta; - beta.re = beta_in.real(); - beta.im = beta_in.imag(); - - dcomplex alpha; - alpha.re = alpha_in.real(); - alpha.im = alpha_in.imag(); - - dcomplex* ya = reinterpret_cast(ya_); - const dcomplex* xa = reinterpret_cast(xa_in); - dcomplex* Ma = reinterpret_cast(&M[0][0]); + dcomplex* ya = reinterpret_cast(ya_); + const dcomplex* xa = reinterpret_cast(xa_in); + dcomplex* Ma = reinterpret_cast(&M[0][0]); #ifdef HIOP_USE_MPI - assert(n_local_ == n_global_ && "timesVec for distributed matrices not supported/not needed"); + assert(n_local_ == n_global_ && "timesVec for distributed matrices not supported/not needed"); #endif - if( MM != 0 && NN != 0 ) { - // the arguments seem reversed but so is trans='T' - // required since we keep the matrix row-wise, while the Fortran/BLAS expects them column-wise - ZGEMV( &fortranTrans, &NN, &MM, &alpha, Ma, &NN, - xa, &incx_y, &beta, ya, &incx_y ); + if(MM != 0 && NN != 0) { + // the arguments seem reversed but so is trans='T' + // required since we keep the matrix row-wise, while the Fortran/BLAS expects them column-wise + ZGEMV(&fortranTrans, &NN, &MM, &alpha, Ma, &NN, xa, &incx_y, &beta, ya, &incx_y); + } else { + if(MM != 0) { + int one = 1; + ZSCAL(&NN, &beta, ya, &one); } else { - if( MM != 0 ) { - int one=1; - ZSCAL(&NN, &beta, ya, &one); - } else { - assert(MM==0); - return; - } - } - } - - bool hiopMatrixComplexDense::isfinite() const - { - for(int i=0; im_local_) maxRows=m_local_; - if(maxCols>n_local_) maxCols=n_local_; - - if(msg) { - fprintf(f, "%s (local_dims=[%d,%d])\n", msg, m_local_,n_local_); - } else { - fprintf(f, - "hiopMatrixComplexDense::printing max=[%d,%d] (local_dims=[%d,%d], on rank=%d)\n", - maxRows, maxCols, m_local_, n_local_, myrank_); +} + +bool hiopMatrixComplexDense::isfinite() const +{ + for(int i = 0; i < m_local_; i++) + for(int j = 0; j < n_local_; j++) + if(false == std::isfinite(M[i][j].real()) || false == std::isfinite(M[i][j].imag())) return false; + return true; +} + +void hiopMatrixComplexDense::print(FILE* f, + const char* msg /*=NULL*/, + int maxRows /*=-1*/, + int maxCols /*=-1*/, + int rank /*=-1*/) const +{ + if(myrank_ == rank || rank == -1) { + if(NULL == f) f = stdout; + if(maxRows > m_local_) maxRows = m_local_; + if(maxCols > n_local_) maxCols = n_local_; + + if(msg) { + fprintf(f, "%s (local_dims=[%d,%d])\n", msg, m_local_, n_local_); + } else { + fprintf(f, + "hiopMatrixComplexDense::printing max=[%d,%d] (local_dims=[%d,%d], on rank=%d)\n", + maxRows, + maxCols, + m_local_, + n_local_, + myrank_); + } + maxRows = maxRows >= 0 ? maxRows : m_local_; + maxCols = maxCols >= 0 ? maxCols : n_local_; + fprintf(f, "["); + for(int i = 0; i < maxRows; i++) { + if(i > 0) { + fprintf(f, " "); + } + for(int j = 0; j < maxCols; j++) { + fprintf(f, "%8.5e+%8.5ei; ", M[i][j].real(), M[i][j].imag()); } - maxRows = maxRows>=0?maxRows:m_local_; - maxCols = maxCols>=0?maxCols:n_local_; - fprintf(f, "["); - for(int i=0; i0) { - fprintf(f, " "); - } - for(int j=0; jcopyFrom(*this); - return c; - } - double hiopMatrixComplexDense::max_abs_value() - { - char norm='M'; int one=1; int N=get_local_size_n() * get_local_size_m(); - hiop::dcomplex* MM = reinterpret_cast(M[0]); - - double maxv = ZLANGE(&norm, &one, &N, MM, &one, NULL); - return maxv; - } +hiopMatrixComplexDense* hiopMatrixComplexDense::alloc_clone() const +{ + hiopMatrixComplexDense* c = new hiopMatrixComplexDense(*this); + return c; +} - void hiopMatrixComplexDense::addMatrix(double alpha, const hiopMatrix& X_) - { - const hiopMatrixComplexDense& X = dynamic_cast(X_); - addMatrix(std::complex(alpha,0), X); - } - void hiopMatrixComplexDense::addMatrix(const std::complex& alpha, const hiopMatrixComplexDense& X) - { +hiopMatrixComplexDense* hiopMatrixComplexDense::new_copy() const +{ + hiopMatrixComplexDense* c = new hiopMatrixComplexDense(*this); + c->copyFrom(*this); + return c; +} +double hiopMatrixComplexDense::max_abs_value() +{ + char norm = 'M'; + int one = 1; + int N = get_local_size_n() * get_local_size_m(); + hiop::dcomplex* MM = reinterpret_cast(M[0]); + + double maxv = ZLANGE(&norm, &one, &N, MM, &one, NULL); + return maxv; +} + +void hiopMatrixComplexDense::addMatrix(double alpha, const hiopMatrix& X_) +{ + const hiopMatrixComplexDense& X = dynamic_cast(X_); + addMatrix(std::complex(alpha, 0), X); +} +void hiopMatrixComplexDense::addMatrix(const std::complex& alpha, const hiopMatrixComplexDense& X) +{ #ifdef HIOP_DEEPCHECKS - assert(m_local_==X.m_local_); - assert(n_local_==X.n_local_); + assert(m_local_ == X.m_local_); + assert(n_local_ == X.n_local_); #endif - hiop::dcomplex* Mdest= reinterpret_cast(M[0]); - hiop::dcomplex* Msrc = reinterpret_cast(X.M[0]); - hiop::dcomplex a; a.re=alpha.real(); a.im=alpha.imag(); - int N=m_local_*n_local_, inc=1; - ZAXPY(&N, &a, Msrc, &inc, Mdest, &inc); - } + hiop::dcomplex* Mdest = reinterpret_cast(M[0]); + hiop::dcomplex* Msrc = reinterpret_cast(X.M[0]); + hiop::dcomplex a; + a.re = alpha.real(); + a.im = alpha.imag(); + int N = m_local_ * n_local_, inc = 1; + ZAXPY(&N, &a, Msrc, &inc, Mdest, &inc); +} - /* this = this + alpha*X - * X is a general sparse matrix in triplet format (rows and cols indexes are assumed to be ordered) - */ - void hiopMatrixComplexDense::addSparseMatrix(const std::complex& alpha, - const hiopMatrixComplexSparseTriplet& X) - { - assert(m()==n()); - assert(X.m()==X.n()); - assert(m()==X.m()); - - if(alpha==0.) return; - - const int* X_irow = X.storage()->i_row(); - const int* X_jcol = X.storage()->j_col(); - const std::complex* X_M = X.storage()->M(); - - int nnz = X.numberOfNonzeros(); - - for(int it=0; it& alpha, - const hiopMatrixComplexSparseTriplet& X) - { - assert(m()==n()); - assert(X.m()==X.n()); - assert(m()==X.m()); - - if(alpha==0.) return; - - const int* X_irow = X.storage()->i_row(); - const int* X_jcol = X.storage()->j_col(); - const std::complex* X_M = X.storage()->M(); - - int nnz = X.numberOfNonzeros(); - - for(int it=0; it& alpha, const hiopMatrixComplexSparseTriplet& X) +{ + assert(m() == n()); + assert(X.m() == X.n()); + assert(m() == X.m()); + + if(alpha == 0.) return; + + const int* X_irow = X.storage()->i_row(); + const int* X_jcol = X.storage()->j_col(); + const std::complex* X_M = X.storage()->M(); + + int nnz = X.numberOfNonzeros(); + + for(int it = 0; it < nnz; it++) { + assert(X_irow[it] < m()); + assert(X_jcol[it] < n()); + M[X_irow[it]][X_jcol[it]] += alpha * X_M[it]; } +} + +/* uppertriangle(this) += uppertriangle(X) + * where X is a sparse matrix stored in triplet format holding only upper triangle elements*/ +void hiopMatrixComplexDense::addSparseSymUpperTriangleToSymDenseMatrixUpperTriangle(const std::complex& alpha, + const hiopMatrixComplexSparseTriplet& X) +{ + assert(m() == n()); + assert(X.m() == X.n()); + assert(m() == X.m()); + + if(alpha == 0.) return; + + const int* X_irow = X.storage()->i_row(); + const int* X_jcol = X.storage()->j_col(); + const std::complex* X_M = X.storage()->M(); -#ifdef HIOP_DEEPCHECKS - bool hiopMatrixComplexDense::assertSymmetry(double tol/*=1e-16*/) const - { - assert(n_global_==n_local_ && "not yet implemented for distributed matrices"); - if(n_global_!=n_local_) return false; - if(n_local_!=m_local_) return false; - - for(int i=0; itol) - return false; - return true; + int nnz = X.numberOfNonzeros(); + + for(int it = 0; it < nnz; it++) { + assert(X_irow[it] <= X_jcol[it]); + M[X_irow[it]][X_jcol[it]] += alpha * X_M[it]; } +} + +#ifdef HIOP_DEEPCHECKS +bool hiopMatrixComplexDense::assertSymmetry(double tol /*=1e-16*/) const +{ + assert(n_global_ == n_local_ && "not yet implemented for distributed matrices"); + if(n_global_ != n_local_) return false; + if(n_local_ != m_local_) return false; + + for(int i = 0; i < m_local_; i++) + for(int j = i + 1; j < n_local_; j++) + if(std::abs(M[i][j] - M[j][i]) > tol) return false; + return true; +} #endif -} //end namespace +} // namespace hiop diff --git a/src/LinAlg/hiopMatrixComplexDense.hpp b/src/LinAlg/hiopMatrixComplexDense.hpp index 0c441a9ca..650aa2109 100644 --- a/src/LinAlg/hiopMatrixComplexDense.hpp +++ b/src/LinAlg/hiopMatrixComplexDense.hpp @@ -9,256 +9,246 @@ #include "hiopMatrixComplexSparseTriplet.hpp" - namespace hiop { - /* - Note: the following methods of hiopMatrix are NOT - implemented in this class: - - timesVec (both overloads) - - transTimesVec (both overloads) - - timesMat - - timesMat_local - - transTimesMat - - timesMatTran - - timesMatTran_local - - addDiagonal (both overloads) - - addSubDiagonal (all three overloads) - - transAddToSymDenseMatrixUpperTriangle - - addUpperTriangleToSymDenseMatrixUpperTriangle - - copyRowsFrom - - copyBlockFromMatrix - - copyFromMatrixBlock - */ - class hiopMatrixComplexDense : public hiopMatrix +/* +Note: the following methods of hiopMatrix are NOT +implemented in this class: + - timesVec (both overloads) + - transTimesVec (both overloads) + - timesMat + - timesMat_local + - transTimesMat + - timesMatTran + - timesMatTran_local + - addDiagonal (both overloads) + - addSubDiagonal (all three overloads) + - transAddToSymDenseMatrixUpperTriangle + - addUpperTriangleToSymDenseMatrixUpperTriangle + - copyRowsFrom + - copyBlockFromMatrix + - copyFromMatrixBlock +*/ +class hiopMatrixComplexDense : public hiopMatrix +{ +public: + hiopMatrixComplexDense(const size_type& m, + const size_type& glob_n, + index_type* col_part = NULL, + MPI_Comm comm = MPI_COMM_SELF, + const size_type& m_max_alloc = -1); + virtual ~hiopMatrixComplexDense(); + + virtual void setToZero(); + virtual void setToConstant(double c); + virtual void setToConstant(std::complex& c); + virtual void copyFrom(const hiopMatrixComplexDense& dm); + virtual void copyFrom(const std::complex* buffer); + + virtual void negate(); + + /* Copy 'n_rows' rows specified by 'rows_idxs' (array of size 'n_rows') from 'src' to 'this' + * + * Preconditions + * 1. 'this' has exactly 'n_rows' rows + * 2. 'src' and 'this' must have same number of columns + * 3. number of rows in 'src' must be at least the number of rows in 'this' + */ + void copyRowsFrom(const hiopMatrix& src, const index_type* rows_idxs, size_type n_rows); + + virtual void timesVec(std::complex beta, + std::complex* y, + std::complex alpha, + const std::complex* x) const; + + virtual void timesVec(double beta, hiopVector& y, double alpha, const hiopVector& x) const { - public: - hiopMatrixComplexDense(const size_type& m, - const size_type& glob_n, - index_type* col_part=NULL, - MPI_Comm comm=MPI_COMM_SELF, - const size_type& m_max_alloc=-1); - virtual ~hiopMatrixComplexDense(); - - virtual void setToZero(); - virtual void setToConstant(double c); - virtual void setToConstant(std::complex& c); - virtual void copyFrom(const hiopMatrixComplexDense& dm); - virtual void copyFrom(const std::complex* buffer); - - virtual void negate(); - - /* Copy 'n_rows' rows specified by 'rows_idxs' (array of size 'n_rows') from 'src' to 'this' - * - * Preconditions - * 1. 'this' has exactly 'n_rows' rows - * 2. 'src' and 'this' must have same number of columns - * 3. number of rows in 'src' must be at least the number of rows in 'this' - */ - void copyRowsFrom(const hiopMatrix& src, const index_type* rows_idxs, size_type n_rows); - - virtual void timesVec(std::complex beta, - std::complex* y, - std::complex alpha, - const std::complex* x) const; - - virtual void timesVec(double beta, hiopVector& y, - double alpha, const hiopVector& x) const - { - assert(false && "not yet supported"); - } - /* same as above for mostly for internal use - avoid using it */ - virtual void timesVec(double beta, double* y, - double alpha, const double* x) const - { - assert(false && "not yet supported"); - } + assert(false && "not yet supported"); + } + /* same as above for mostly for internal use - avoid using it */ + virtual void timesVec(double beta, double* y, double alpha, const double* x) const + { + assert(false && "not yet supported"); + } - virtual void transTimesVec(double beta, hiopVector& y, - double alpha, const hiopVector& x) const - { - assert(false && "not yet supported"); - } - /* same as above for mostly for internal use - avoid using it */ - virtual void transTimesVec(double beta, double* y, - double alpha, const double* x) const - { - assert(false && "not yet supported"); - } - - // All methods taking an arguments 'hiopMatrix' will dynamic_cast the argument to - // 'complex' dense matrix (this class). Specialized multiplications with sparse matrices - // are to be done by the sparse matrix classes. Multiplications with double dense matrices - // are to be determined. - - /* W = beta*W + alpha*this*X */ - virtual void timesMat(double beta, hiopMatrix& W, double alpha, const hiopMatrix& X) const - { - assert(false && "not yet implemented"); - } - virtual void timesMat_local(double beta, hiopMatrix& W, double alpha, const hiopMatrix& X) const - { - assert(false && "not yet implemented"); - } + virtual void transTimesVec(double beta, hiopVector& y, double alpha, const hiopVector& x) const + { + assert(false && "not yet supported"); + } + /* same as above for mostly for internal use - avoid using it */ + virtual void transTimesVec(double beta, double* y, double alpha, const double* x) const + { + assert(false && "not yet supported"); + } - //to be used only locally - virtual void transTimesMat(double beta, hiopMatrix& W, double alpha, const hiopMatrix& X) const - { - assert(false && "not yet implemented"); - } - //to be used only locally - virtual void timesMatTrans(double beta, hiopMatrix& W, double alpha, const hiopMatrix& X) const - { - assert(false && "not yet implemented"); - } - virtual void timesMatTrans_local(double beta, hiopMatrix& W, double alpha, const hiopMatrix& X) const - { - assert(false && "not yet implemented"); - } - virtual void addDiagonal(const double& alpha, const hiopVector& d_) - { - assert(false && "not yet supported"); - } - virtual void addDiagonal(const double& value) - { - assert(false && "not yet supported"); - } - virtual void addSubDiagonal(const double& alpha, index_type start_on_dest_diag, const hiopVector& d_) - { - assert(false && "not yet supported"); - } - /* add to the diagonal of 'this' (destination) starting at 'start_on_dest_diag' elements of - * 'd_' (source) starting at index 'start_on_src_vec'. The number of elements added is 'num_elems' - * when num_elems>=0, or the remaining elems on 'd_' starting at 'start_on_src_vec'. */ - virtual void addSubDiagonal(int start_on_dest_diag, const double& alpha, - const hiopVector& d_, int start_on_src_vec, int num_elems=-1) - { - assert(false && "not yet supported"); - } - virtual void addSubDiagonal(int start_on_dest_diag, int num_elems, const double& c) - { - assert(false && "not yet supported"); - } - - virtual void addMatrix(double alpha, const hiopMatrix& X); - virtual void addMatrix(const std::complex& alpha, const hiopMatrixComplexDense& X); - - /* this = this + alpha*X - * X is a general sparse matrix in triplet format (rows and cols indexes are assumed to be ordered) - */ - void addSparseMatrix(const std::complex& alpha, - const hiopMatrixComplexSparseTriplet& X); - - /* uppertriangle(this) += alpha*uppertriangle(X) - * where X is a sparse matrix stored in triplet format holding only upper triangle elements */ - void addSparseSymUpperTriangleToSymDenseMatrixUpperTriangle(const std::complex& alpha, - const hiopMatrixComplexSparseTriplet& X); - - /* block of W += alpha*transpose(this) - * For efficiency, only upper triangular matrix is updated since this will be eventually sent to LAPACK - * Preconditions: - * 1. transpose of 'this' has to fit in the upper triangle of W - * 2. W.n() == W.m() - */ - virtual void transAddToSymDenseMatrixUpperTriangle(int row_dest_start, int col_dest_start, - double alpha, hiopMatrixDense& W) const - { - assert(false && "not supported"); - } - - /* diagonal block of W += alpha*this with 'diag_start' indicating the diagonal entry of W where - * 'this' should start to contribute. - * - * For efficiency, only upper triangle of W is updated since this will be eventually sent to LAPACK - * and only the upper triangle of 'this' is accessed - * - * Preconditions: - * 1. this->n()==this->m() - * 2. W.n() == W.m() - */ - virtual void addUpperTriangleToSymDenseMatrixUpperTriangle(int diag_start, - double alpha, hiopMatrixDense& W) const - { - assert(false && "not supported"); - } - - virtual double max_abs_value(); - - virtual void row_max_abs_value(hiopVector &ret_vec){assert(0&&"not yet");} - - virtual void scale_row(hiopVector &vec_scal, const bool inv_scale){assert(0&&"not yet");} - - virtual bool isfinite() const; - - //virtual void print(int maxRows=-1, int maxCols=-1, int rank=-1) const; - virtual void print(FILE* f=NULL, const char* msg=NULL, int maxRows=-1, int maxCols=-1, int rank=-1) const; - - // - // below are methods specific to this class - // - virtual hiopMatrixComplexDense* alloc_clone() const; - virtual hiopMatrixComplexDense* new_copy() const; - - - /* copy 'num_rows' rows from 'src' in this starting at 'row_dest' */ - void copyRowsFrom(const hiopMatrixComplexDense& src, int num_rows, int row_dest) - { - assert(false && "not yet implemented"); - } - /* copies 'src' into this as a block starting at (i_block_start,j_block_start) */ - /* copyMatrixAsBlock */ - void copyBlockFromMatrix(const index_type i_block_start, const index_type j_block_start, - const hiopMatrixComplexDense& src) - { - assert(false && "not yet implemented"); - } - - /* overwrites 'this' with 'src''s block starting at (i_src_block_start,j_src_block_start) - * and dimensions of this */ - void copyFromMatrixBlock(const hiopMatrixComplexDense& src, - const int i_src_block_start, - const int j_src_block_start) - { - assert(false && "not yet implemented"); - } - - inline size_type get_local_size_n() const { return n_local_; } - inline size_type get_local_size_m() const { return m_local_; } - - //TODO: this is not kosher! - inline std::complex** local_data() const { return M; } - inline std::complex* local_buffer() const { return M[0]; } - //do not use this unless you sure you know what you're doing - inline std::complex** get_M() { return M; } - - virtual size_type m() const {return m_local_;} - virtual size_type n() const {return n_global_;} + // All methods taking an arguments 'hiopMatrix' will dynamic_cast the argument to + // 'complex' dense matrix (this class). Specialized multiplications with sparse matrices + // are to be done by the sparse matrix classes. Multiplications with double dense matrices + // are to be determined. + + /* W = beta*W + alpha*this*X */ + virtual void timesMat(double beta, hiopMatrix& W, double alpha, const hiopMatrix& X) const + { + assert(false && "not yet implemented"); + } + virtual void timesMat_local(double beta, hiopMatrix& W, double alpha, const hiopMatrix& X) const + { + assert(false && "not yet implemented"); + } + + // to be used only locally + virtual void transTimesMat(double beta, hiopMatrix& W, double alpha, const hiopMatrix& X) const + { + assert(false && "not yet implemented"); + } + // to be used only locally + virtual void timesMatTrans(double beta, hiopMatrix& W, double alpha, const hiopMatrix& X) const + { + assert(false && "not yet implemented"); + } + virtual void timesMatTrans_local(double beta, hiopMatrix& W, double alpha, const hiopMatrix& X) const + { + assert(false && "not yet implemented"); + } + virtual void addDiagonal(const double& alpha, const hiopVector& d_) { assert(false && "not yet supported"); } + virtual void addDiagonal(const double& value) { assert(false && "not yet supported"); } + virtual void addSubDiagonal(const double& alpha, index_type start_on_dest_diag, const hiopVector& d_) + { + assert(false && "not yet supported"); + } + /* add to the diagonal of 'this' (destination) starting at 'start_on_dest_diag' elements of + * 'd_' (source) starting at index 'start_on_src_vec'. The number of elements added is 'num_elems' + * when num_elems>=0, or the remaining elems on 'd_' starting at 'start_on_src_vec'. */ + virtual void addSubDiagonal(int start_on_dest_diag, + const double& alpha, + const hiopVector& d_, + int start_on_src_vec, + int num_elems = -1) + { + assert(false && "not yet supported"); + } + virtual void addSubDiagonal(int start_on_dest_diag, int num_elems, const double& c) + { + assert(false && "not yet supported"); + } + + virtual void addMatrix(double alpha, const hiopMatrix& X); + virtual void addMatrix(const std::complex& alpha, const hiopMatrixComplexDense& X); + + /* this = this + alpha*X + * X is a general sparse matrix in triplet format (rows and cols indexes are assumed to be ordered) + */ + void addSparseMatrix(const std::complex& alpha, const hiopMatrixComplexSparseTriplet& X); + + /* uppertriangle(this) += alpha*uppertriangle(X) + * where X is a sparse matrix stored in triplet format holding only upper triangle elements */ + void addSparseSymUpperTriangleToSymDenseMatrixUpperTriangle(const std::complex& alpha, + const hiopMatrixComplexSparseTriplet& X); + + /* block of W += alpha*transpose(this) + * For efficiency, only upper triangular matrix is updated since this will be eventually sent to LAPACK + * Preconditions: + * 1. transpose of 'this' has to fit in the upper triangle of W + * 2. W.n() == W.m() + */ + virtual void transAddToSymDenseMatrixUpperTriangle(int row_dest_start, + int col_dest_start, + double alpha, + hiopMatrixDense& W) const + { + assert(false && "not supported"); + } + + /* diagonal block of W += alpha*this with 'diag_start' indicating the diagonal entry of W where + * 'this' should start to contribute. + * + * For efficiency, only upper triangle of W is updated since this will be eventually sent to LAPACK + * and only the upper triangle of 'this' is accessed + * + * Preconditions: + * 1. this->n()==this->m() + * 2. W.n() == W.m() + */ + virtual void addUpperTriangleToSymDenseMatrixUpperTriangle(int diag_start, double alpha, hiopMatrixDense& W) const + { + assert(false && "not supported"); + } + + virtual double max_abs_value(); + + virtual void row_max_abs_value(hiopVector& ret_vec) { assert(0 && "not yet"); } + + virtual void scale_row(hiopVector& vec_scal, const bool inv_scale) { assert(0 && "not yet"); } + + virtual bool isfinite() const; + + // virtual void print(int maxRows=-1, int maxCols=-1, int rank=-1) const; + virtual void print(FILE* f = NULL, const char* msg = NULL, int maxRows = -1, int maxCols = -1, int rank = -1) const; + + // + // below are methods specific to this class + // + virtual hiopMatrixComplexDense* alloc_clone() const; + virtual hiopMatrixComplexDense* new_copy() const; + + /* copy 'num_rows' rows from 'src' in this starting at 'row_dest' */ + void copyRowsFrom(const hiopMatrixComplexDense& src, int num_rows, int row_dest) + { + assert(false && "not yet implemented"); + } + /* copies 'src' into this as a block starting at (i_block_start,j_block_start) */ + /* copyMatrixAsBlock */ + void copyBlockFromMatrix(const index_type i_block_start, const index_type j_block_start, const hiopMatrixComplexDense& src) + { + assert(false && "not yet implemented"); + } + + /* overwrites 'this' with 'src''s block starting at (i_src_block_start,j_src_block_start) + * and dimensions of this */ + void copyFromMatrixBlock(const hiopMatrixComplexDense& src, const int i_src_block_start, const int j_src_block_start) + { + assert(false && "not yet implemented"); + } + + inline size_type get_local_size_n() const { return n_local_; } + inline size_type get_local_size_m() const { return m_local_; } + + // TODO: this is not kosher! + inline std::complex** local_data() const { return M; } + inline std::complex* local_buffer() const { return M[0]; } + // do not use this unless you sure you know what you're doing + inline std::complex** get_M() { return M; } + + virtual size_type m() const { return m_local_; } + virtual size_type n() const { return n_global_; } #ifdef HIOP_DEEPCHECKS - virtual bool assertSymmetry(double tol=1e-16) const; + virtual bool assertSymmetry(double tol = 1e-16) const; #endif - private: - std::complex** M; //local storage - size_type n_global_; //total / global number of columns - int m_local_, n_local_; //local number of rows and cols, respectively - size_type glob_jl_, glob_ju_; - MPI_Comm comm_; - int myrank_; - - mutable std::complex* buff_mxnlocal_; - - //this is very private do not touch :) - size_type max_rows_; - private: - hiopMatrixComplexDense() {}; - /** copy constructor, for internal/private use only (it doesn't copy the values) */ - hiopMatrixComplexDense(const hiopMatrixComplexDense&); - - inline std::complex* new_mxnlocal_buff() const { - if(buff_mxnlocal_==NULL) { - buff_mxnlocal_ = new std::complex[max_rows_*n_local_]; - } - return buff_mxnlocal_; +private: + std::complex** M; // local storage + size_type n_global_; // total / global number of columns + int m_local_, n_local_; // local number of rows and cols, respectively + size_type glob_jl_, glob_ju_; + MPI_Comm comm_; + int myrank_; + + mutable std::complex* buff_mxnlocal_; + + // this is very private do not touch :) + size_type max_rows_; + +private: + hiopMatrixComplexDense() {}; + /** copy constructor, for internal/private use only (it doesn't copy the values) */ + hiopMatrixComplexDense(const hiopMatrixComplexDense&); + + inline std::complex* new_mxnlocal_buff() const + { + if(buff_mxnlocal_ == NULL) { + buff_mxnlocal_ = new std::complex[max_rows_ * n_local_]; } - }; //end class -}//end namespace + return buff_mxnlocal_; + } +}; // end class +} // namespace hiop #endif diff --git a/src/LinAlg/hiopMatrixComplexSparseTriplet.cpp b/src/LinAlg/hiopMatrixComplexSparseTriplet.cpp index ec322a3b9..4ef1a4c71 100644 --- a/src/LinAlg/hiopMatrixComplexSparseTriplet.cpp +++ b/src/LinAlg/hiopMatrixComplexSparseTriplet.cpp @@ -6,526 +6,520 @@ #include #include -#include // std::cout, std::fixed -#include // std::setprecision +#include // std::cout, std::fixed +#include // std::setprecision namespace hiop { - hiopMatrixComplexSparseTriplet::hiopMatrixComplexSparseTriplet(int rows, int cols, int nnz) - { - stM = new hiopMatrixSparseTripletStorage >(rows, cols, nnz); - } - hiopMatrixComplexSparseTriplet::~hiopMatrixComplexSparseTriplet() - { - delete stM; - } - - hiopMatrix* hiopMatrixComplexSparseTriplet::alloc_clone() const - { - return new hiopMatrixComplexSparseTriplet(stM->m(), stM->n(), stM->numberOfNonzeros()); - } - - hiopMatrix* hiopMatrixComplexSparseTriplet::new_copy() const - { - assert(false); - return NULL; +hiopMatrixComplexSparseTriplet::hiopMatrixComplexSparseTriplet(int rows, int cols, int nnz) +{ + stM = new hiopMatrixSparseTripletStorage >(rows, cols, nnz); +} +hiopMatrixComplexSparseTriplet::~hiopMatrixComplexSparseTriplet() { delete stM; } + +hiopMatrix* hiopMatrixComplexSparseTriplet::alloc_clone() const +{ + return new hiopMatrixComplexSparseTriplet(stM->m(), stM->n(), stM->numberOfNonzeros()); +} + +hiopMatrix* hiopMatrixComplexSparseTriplet::new_copy() const +{ + assert(false); + return NULL; +} + +void hiopMatrixComplexSparseTriplet::setToZero() +{ + auto* values = stM->M(); + for(int i = 0; i < stM->numberOfNonzeros(); i++) values[i] = 0.; +} +void hiopMatrixComplexSparseTriplet::setToConstant(double c) +{ + auto* values = stM->M(); + for(int i = 0; i < stM->numberOfNonzeros(); i++) values[i] = c; +} +void hiopMatrixComplexSparseTriplet::setToConstant(std::complex c) +{ + auto* values = stM->M(); + for(int i = 0; i < stM->numberOfNonzeros(); i++) values[i] = c; +} + +double hiopMatrixComplexSparseTriplet::max_abs_value() +{ + char norm = 'M'; + int one = 1, nnz = stM->numberOfNonzeros(); + hiop::dcomplex* M = reinterpret_cast(stM->M()); + + double maxv = ZLANGE(&norm, &one, &nnz, M, &one, NULL); + return maxv; +} + +void hiopMatrixComplexSparseTriplet::timesVec(double beta, + std::complex* y, + double alpha, + const std::complex* x) const +{ + int nrows = stM->m(); + // y= beta*y + if(beta != 0.) { + for(int i = 0; i < nrows; i++) { + y[i] *= beta; + } + } else { + for(int i = 0; i < nrows; i++) { + y[i] = 0.; + } } - void hiopMatrixComplexSparseTriplet::setToZero() - { - auto* values = stM->M(); - for(int i=0; inumberOfNonzeros(); i++) - values[i]=0.; + auto* values = stM->M(); + int* iRow = stM->i_row(); + int* jCol = stM->j_col(); + int nnz = stM->numberOfNonzeros(); + int ncols = stM->n(); + // y += alpha*this*x + for(int i = 0; i < nnz; i++) { + assert(iRow[i] < nrows); + assert(jCol[i] < ncols); + y[iRow[i]] += alpha * x[jCol[i]] * values[i]; } - void hiopMatrixComplexSparseTriplet::setToConstant(double c) - { - auto* values = stM->M(); - for(int i=0; inumberOfNonzeros(); i++) - values[i]=c; +} +/* W = beta*W + alpha*this^T*X + * + * Only supports W and X of the type 'hiopMatrixComplexDense' + */ +void hiopMatrixComplexSparseTriplet::transTimesMat(double beta, hiopMatrix& W, double alpha, const hiopMatrix& X) const +{ + assert(m() == X.m()); + assert(n() == W.m()); + assert(W.n() == X.n()); + + hiopMatrixComplexDense* Wd = dynamic_cast(&W); + if(Wd == NULL) { + std::cerr << "hiopMatrixComplexSparseTriplet::transTimesMat received an unsuported type (1)\n"; + return; } - void hiopMatrixComplexSparseTriplet::setToConstant(std::complex c) - { - auto* values = stM->M(); - for(int i=0; inumberOfNonzeros(); i++) - values[i]=c; + + const hiopMatrixComplexDense* Xd = dynamic_cast(&X); + if(Xd == NULL) { + std::cerr << "hiopMatrixComplexSparseTriplet::transTimesMat received an unsuported type (2)\n"; + return; } - double hiopMatrixComplexSparseTriplet::max_abs_value() - { - char norm='M'; int one=1, nnz=stM->numberOfNonzeros(); - hiop::dcomplex* M = reinterpret_cast(stM->M()); - - double maxv = ZLANGE(&norm, &one, &nnz, M, &one, NULL); - return maxv; + auto* W_M = Wd->get_M(); + const auto* X_M = Xd->local_data(); // same as get_M but with has const qualifier + + if(beta == 0.) { + Wd->setToZero(); + } else { + int N = W.m() * W.n(); + dcomplex zalpha; + zalpha.re = beta; + zalpha.im = 0.; + int one = 1; + ZSCAL(&N, &zalpha, reinterpret_cast(*W_M), &one); } - void hiopMatrixComplexSparseTriplet::timesVec(double beta, std::complex* y, - double alpha, const std::complex* x) const - { - int nrows = stM->m(); - // y= beta*y - if(beta != 0.) { - for(int i=0; ii_row(); + int* this_jcol = storage()->j_col(); + std::complex* this_M = storage()->M(); + int nnz = numberOfNonzeros(); - auto* values = stM->M(); - int* iRow = stM->i_row(); - int* jCol = stM->j_col(); - int nnz = stM->numberOfNonzeros(); - int ncols = stM->n(); - // y += alpha*this*x - for(int i=0; i aux = alpha * this_M[it]; + for(int j = 0; j < X.n(); j++) { + W_M[this_jcol[it]][j] += aux * X_M[this_irow[it]][j]; } } - - /* W = beta*W + alpha*this^T*X - * - * Only supports W and X of the type 'hiopMatrixComplexDense' - */ - void hiopMatrixComplexSparseTriplet:: - transTimesMat(double beta, hiopMatrix& W, double alpha, const hiopMatrix& X) const - { - assert(m()==X.m()); - assert(n()==W.m()); - assert(W.n()==X.n()); - - hiopMatrixComplexDense* Wd = dynamic_cast(&W); - if(Wd==NULL) { - std::cerr << "hiopMatrixComplexSparseTriplet::transTimesMat received an unsuported type (1)\n"; - return; - } +} - const hiopMatrixComplexDense* Xd = dynamic_cast(&X); - if(Xd==NULL) { - std::cerr << "hiopMatrixComplexSparseTriplet::transTimesMat received an unsuported type (2)\n"; - return; +hiopMatrixComplexSparseTriplet* hiopMatrixComplexSparseTriplet::new_slice(const int* row_idxs, + int nrows, + const int* col_idxs, + int ncols) const +{ + int* src_i = this->storage()->i_row(); + int* src_j = this->storage()->j_col(); + // + // count nnz first + // + int dest_nnz = 0, src_itnz = 0, src_nnz = this->stM->numberOfNonzeros(); + for(int ki = 0; ki < nrows; ki++) { + const int& row = row_idxs[ki]; + assert(row < m()); +#ifndef NDEBUG + if(ki > 0) { + assert(row_idxs[ki] > row_idxs[ki - 1] && "slice row indexes need to be increasingly ordered"); } +#endif - auto* W_M = Wd->get_M(); - const auto* X_M = Xd->local_data(); //same as get_M but with has const qualifier + while(src_itnz < src_nnz && src_i[src_itnz] < row) src_itnz++; - if(beta==0.) { - Wd->setToZero(); - } else { - int N = W.m()*W.n(); - dcomplex zalpha; zalpha.re=beta; zalpha.im=0.; - int one = 1; - ZSCAL(&N, &zalpha, reinterpret_cast(*W_M), &one); + for(int kj = 0; kj < ncols; kj++) { + const int& col = col_idxs[kj]; + assert(col < n()); + +#ifndef NDEBUG + if(kj > 0) { + assert(col_idxs[kj] > col_idxs[kj - 1] && "slice column indexes need to be increasingly ordered"); + } +#endif + while(src_itnz < src_nnz && src_i[src_itnz] == row && src_j[src_itnz] < col) { + src_itnz++; + } + + if(src_itnz < src_nnz && src_i[src_itnz] == row && src_j[src_itnz] == col) { + // std::complex* src_M = this->storage()->M(); + // printf("[%d,%d] -> %g+%g*i (1)\n", ki, kj, src_M[src_itnz].real(), src_M[src_itnz].imag()); + dest_nnz++; + src_itnz++; + } } - + } + assert(src_itnz <= src_nnz); + assert(src_itnz >= dest_nnz); + + const int dest_nnz2 = dest_nnz; + hiopMatrixComplexSparseTriplet* newMat = new hiopMatrixComplexSparseTriplet(nrows, ncols, dest_nnz2); + // + // populate the new slice matrix + // + // first pass -> populate with elements on the upper triangle of 'this' + int* dest_i = newMat->storage()->i_row(); + int* dest_j = newMat->storage()->j_col(); + std::complex* dest_M = newMat->storage()->M(); + std::complex* src_M = this->storage()->M(); + + dest_nnz = 0; + src_itnz = 0; + for(int ki = 0; ki < nrows; ki++) { + const int& row = row_idxs[ki]; + + while(src_itnz < src_nnz && src_i[src_itnz] < row) src_itnz++; + + for(int kj = 0; kj < ncols; kj++) { + const int& col = col_idxs[kj]; + + while(src_itnz < src_nnz && src_i[src_itnz] == row && src_j[src_itnz] < col) { + src_itnz++; + } + if(src_itnz < src_nnz && src_i[src_itnz] == row && src_j[src_itnz] == col) { + dest_i[dest_nnz] = ki; + dest_j[dest_nnz] = kj; + dest_M[dest_nnz] = src_M[src_itnz]; - int* this_irow = storage()->i_row(); - int* this_jcol = storage()->j_col(); - std::complex* this_M = storage()->M(); - int nnz = numberOfNonzeros(); + // printf("[%d,%d] -> %g+%g*i (2)\n", ki, kj, src_M[src_itnz].real(), src_M[src_itnz].imag()); - for(int it=0; it aux = alpha*this_M[it]; - for(int j=0; jstorage()->i_row(); - int* src_j = this->storage()->j_col(); - // - //count nnz first - // - int dest_nnz=0, src_itnz=0, src_nnz=this->stM->numberOfNonzeros(); - for(int ki=0; kistorage()->sort_indexes(); + return newMat; +} + +// builds submatrix nrows x ncols with rows and cols specified by row_idxs and cols_idx +// assumes the 'this' is symmetric +hiopMatrixComplexSparseTriplet* hiopMatrixComplexSparseTriplet::new_sliceFromSym(const int* row_idxs, + int nrows, + const int* col_idxs, + int ncols) const +{ + int* src_i = this->storage()->i_row(); + int* src_j = this->storage()->j_col(); + + // count nnz first + int dest_nnz = 0, src_itnz = 0, src_nnz = this->stM->numberOfNonzeros(); + for(int ki = 0; ki < nrows; ki++) { + const int& row = row_idxs[ki]; + assert(row < m()); #ifndef NDEBUG - if(ki>0) { - assert(row_idxs[ki]>row_idxs[ki-1] && "slice row indexes need to be increasingly ordered"); - } + if(ki > 0) { + assert(row_idxs[ki] > row_idxs[ki - 1] && "slice row indexes need to be increasingly ordered"); + } #endif - - while(src_itnz0) { - assert(col_idxs[kj]>col_idxs[kj-1] && "slice column indexes need to be increasingly ordered"); - } + if(kj > 0) { + assert(col_idxs[kj] > col_idxs[kj - 1] && "slice column indexes need to be increasingly ordered"); + } #endif - while(src_itnz* src_M = this->storage()->M(); - //printf("[%d,%d] -> %g+%g*i (1)\n", ki, kj, src_M[src_itnz].real(), src_M[src_itnz].imag()); - dest_nnz++; - src_itnz++; - } + // we won't have entry (row, col) with row>col since 'this' is upper triangular. Avoid the extra + // checks below and continue the for loop + if(col < row) continue; + + while(src_itnz < src_nnz && src_i[src_itnz] == row && src_j[src_itnz] < col) { + src_itnz++; } - } - assert(src_itnz <= src_nnz); - assert(src_itnz >= dest_nnz); - - const int dest_nnz2 = dest_nnz; - hiopMatrixComplexSparseTriplet* newMat = new hiopMatrixComplexSparseTriplet(nrows, ncols, dest_nnz2); - // - //populate the new slice matrix - // - //first pass -> populate with elements on the upper triangle of 'this' - int* dest_i = newMat->storage()->i_row(); - int* dest_j = newMat->storage()->j_col(); - std::complex* dest_M = newMat->storage()->M(); - std::complex* src_M = this->storage()->M(); - - dest_nnz=0; src_itnz=0; - for(int ki=0; ki %g+%g*i (2)\n", ki, kj, src_M[src_itnz].real(), src_M[src_itnz].imag()); - - dest_nnz++; - src_itnz++; - assert(dest_nnz<=dest_nnz2); - } + + if(src_itnz < src_nnz && src_i[src_itnz] == row && src_j[src_itnz] == col) { + assert(row <= col); + dest_nnz++; + src_itnz++; } } - assert(src_itnz <= src_nnz); - assert(dest_nnz == dest_nnz2); - - newMat->storage()->sort_indexes(); - return newMat; } + assert(src_itnz <= src_nnz); + assert(src_itnz >= dest_nnz); - - //builds submatrix nrows x ncols with rows and cols specified by row_idxs and cols_idx - //assumes the 'this' is symmetric - hiopMatrixComplexSparseTriplet* - hiopMatrixComplexSparseTriplet::new_sliceFromSym(const int* row_idxs, int nrows, - const int* col_idxs, int ncols) const - { - int* src_i = this->storage()->i_row(); - int* src_j = this->storage()->j_col(); - - //count nnz first - int dest_nnz=0, src_itnz=0, src_nnz=this->stM->numberOfNonzeros(); - for(int ki=0; ki0) { - assert(row_idxs[ki]>row_idxs[ki-1] && "slice row indexes need to be increasingly ordered"); - } -#endif - - while(src_itnz0) { - assert(col_idxs[kj]>col_idxs[kj-1] && "slice column indexes need to be increasingly ordered"); - } -#endif - //we won't have entry (row, col) with row>col since 'this' is upper triangular. Avoid the extra - //checks below and continue the for loop - if(colcol since 'this' is upper triangular + // also entries for which row==col were already counted in the above loop + // so avoid the extra checks below and continue the for loop + if(col <= row) continue; + + while(src_itnz < src_nnz && src_i[src_itnz] == row && src_j[src_itnz] < col) { + src_itnz++; } - } - assert(src_itnz <= src_nnz); - assert(src_itnz >= dest_nnz); - - //one more iteration over, like above looking in the lower triangular this time - //this can be done by changing the order of the for(ki) and for(kj) loops - src_itnz=0; //reinitialize - for(int kj=0; kjcol since 'this' is upper triangular - //also entries for which row==col were already counted in the above loop - //so avoid the extra checks below and continue the for loop - if(col<=row) continue; - - while(src_itnz populate with elements on the upper triangle of 'this' - int* dest_i = newMat->storage()->i_row(); - int* dest_j = newMat->storage()->j_col(); - std::complex* dest_M = newMat->storage()->M(); - std::complex* src_M = this->storage()->M(); + // populate the new slice matrix - dest_nnz=0; src_itnz=0; - for(int ki=0; ki populate with elements on the upper triangle of 'this' + int* dest_i = newMat->storage()->i_row(); + int* dest_j = newMat->storage()->j_col(); + std::complex* dest_M = newMat->storage()->M(); + std::complex* src_M = this->storage()->M(); - while(src_itnz populate with elements on the upper triangle of 'this' - src_itnz=0; - for(int kj=0; kj populate with elements on the upper triangle of 'this' + src_itnz = 0; + for(int kj = 0; kj < ncols; kj++) { + const int& row = col_idxs[kj]; - while(src_itnzstorage()->sort_indexes(); + newMat->storage()->sort_indexes(); - return newMat; - } - - //extract a symmetric matrix (only upper triangle is stored) - hiopMatrixComplexSparseTriplet* - hiopMatrixComplexSparseTriplet::new_sliceFromSymToSym(const int* row_col_idxs, int ndim) const - { - int* src_i = this->storage()->i_row(); - int* src_j = this->storage()->j_col(); - - int dest_nnz=0, src_itnz=0, src_nnz=this->stM->numberOfNonzeros(); - for(int ki=0; kistorage()->i_row(); + int* src_j = this->storage()->j_col(); + + int dest_nnz = 0, src_itnz = 0, src_nnz = this->stM->numberOfNonzeros(); + for(int ki = 0; ki < ndim; ki++) { + const int& row = row_col_idxs[ki]; + assert(row < m()); #ifndef NDEBUG - if(ki>0) { - assert(row_col_idxs[ki]>row_col_idxs[ki-1] && "slice indexes need to be increasingly ordered"); - } + if(ki > 0) { + assert(row_col_idxs[ki] > row_col_idxs[ki - 1] && "slice indexes need to be increasingly ordered"); + } #endif - while(src_itnz= dest_nnz); - - hiopMatrixComplexSparseTriplet* newMat = new hiopMatrixComplexSparseTriplet(ndim, ndim, dest_nnz); - - int* dest_i = newMat->storage()->i_row(); - int* dest_j = newMat->storage()->j_col(); - std::complex* dest_M = newMat->storage()->M(); - std::complex* src_M = this->storage()->M(); - - dest_nnz=0; src_itnz=0; - for(int ki=0; ki= dest_nnz); + + hiopMatrixComplexSparseTriplet* newMat = new hiopMatrixComplexSparseTriplet(ndim, ndim, dest_nnz); + + int* dest_i = newMat->storage()->i_row(); + int* dest_j = newMat->storage()->j_col(); + std::complex* dest_M = newMat->storage()->M(); + std::complex* src_M = this->storage()->M(); + + dest_nnz = 0; + src_itnz = 0; + for(int ki = 0; ki < ndim; ki++) { + const int& row = row_col_idxs[ki]; + + while(src_itnz < src_nnz && src_i[src_itnz] < row) src_itnz++; + + for(int kj = ki; kj < ndim; kj++) { + const int& col = row_col_idxs[kj]; + + while(src_itnz < src_nnz && src_i[src_itnz] == row && src_j[src_itnz] < col) { + src_itnz++; + } + if(src_itnz < src_nnz && src_i[src_itnz] == row && src_j[src_itnz] == col) { + dest_i[dest_nnz] = ki; + dest_j[dest_nnz] = kj; + assert(dest_i[dest_nnz] <= dest_j[dest_nnz]); + dest_M[dest_nnz] = src_M[src_itnz]; + dest_nnz++; + src_itnz++; } } - assert(src_itnz <= src_nnz); - assert(src_itnz >= dest_nnz); - - return newMat; } - - - void hiopMatrixComplexSparseTriplet::print(FILE* file, const char* msg/*=NULL*/, - int maxRows/*=-1*/, int maxCols/*=-1*/, - int rank/*=-1*/) const - { - int myrank_=0, numranks=1; //this is a local object => always print - - int max_elems = maxRows>=0 ? maxRows : stM->numberOfNonzeros(); - max_elems = std::min(max_elems, stM->numberOfNonzeros()); - - if(file==NULL) file=stdout; - - if(myrank_==rank || rank==-1) { - std::stringstream ss; - if(NULL==msg) { - if(numranks>1) { - fprintf(file, "matrix of size %d %d and nonzeros %d, printing %d elems (on rank=%d)\n", - m(), n(), numberOfNonzeros(), max_elems, myrank_); - ss << "matrix of size " << m() << " " << n() << " and nonzeros " - << numberOfNonzeros() << ", printing " << max_elems << " elems (on rank=" - << myrank_ << ")" << std::endl; - } else { - //fprintf(file, "matrix of size %d %d and nonzeros %d, printing %d elems\n", - // m(), n(), numberOfNonzeros(), max_elems); - ss << "matrix of size " << m() << " " << n() << " and nonzeros " - << numberOfNonzeros() << ", printing " << max_elems << " elems" << std::endl; - } + assert(src_itnz <= src_nnz); + assert(src_itnz >= dest_nnz); + + return newMat; +} + +void hiopMatrixComplexSparseTriplet::print(FILE* file, + const char* msg /*=NULL*/, + int maxRows /*=-1*/, + int maxCols /*=-1*/, + int rank /*=-1*/) const +{ + int myrank_ = 0, numranks = 1; // this is a local object => always print + + int max_elems = maxRows >= 0 ? maxRows : stM->numberOfNonzeros(); + max_elems = std::min(max_elems, stM->numberOfNonzeros()); + + if(file == NULL) file = stdout; + + if(myrank_ == rank || rank == -1) { + std::stringstream ss; + if(NULL == msg) { + if(numranks > 1) { + fprintf(file, + "matrix of size %d %d and nonzeros %d, printing %d elems (on rank=%d)\n", + m(), + n(), + numberOfNonzeros(), + max_elems, + myrank_); + ss << "matrix of size " << m() << " " << n() << " and nonzeros " << numberOfNonzeros() << ", printing " << max_elems + << " elems (on rank=" << myrank_ << ")" << std::endl; } else { - ss << msg << " "; - //fprintf(file, "%s ", msg); - } - -#ifdef AAAAA - // output matlab indices and input format - fprintf(file, "iRow=["); - for(int it=0; itirow[it]+1); - fprintf(file, "];\n"); - - fprintf(file, "jCol=["); - for(int it=0; itjcol[it]+1); - fprintf(file, "];\n"); - - fprintf(file, "v=["); - for(int it=0; itvalues[it].real(), stM->values[it].imag()); - fprintf(file, "%.6g+%.6gi; ", stM->values[it].real(), stM->values[it].imag()); - fprintf(file, "];\n"); - -#else - for(int it=0; itirow[it]+1, - // stM->jcol[it]+1, - // stM->values[it].real(), - // stM->values[it].imag()); - ss << "[" << stM->irow[it]+1 << "," << stM->jcol[it]+1 << " = " - << std::setprecision(6) - << stM->values[it].real() << "+" - << stM->values[it].imag() << ";" << std::endl; + // fprintf(file, "matrix of size %d %d and nonzeros %d, printing %d elems\n", + // m(), n(), numberOfNonzeros(), max_elems); + ss << "matrix of size " << m() << " " << n() << " and nonzeros " << numberOfNonzeros() << ", printing " << max_elems + << " elems" << std::endl; } - fprintf(file, "%s", ss.str().c_str()); -#endif + } else { + ss << msg << " "; + // fprintf(file, "%s ", msg); } + +#ifdef AAAAA + // output matlab indices and input format + fprintf(file, "iRow=["); + for(int it = 0; it < max_elems; it++) fprintf(file, "%d; ", stM->irow[it] + 1); + fprintf(file, "];\n"); + + fprintf(file, "jCol=["); + for(int it = 0; it < max_elems; it++) fprintf(file, "%d; ", stM->jcol[it] + 1); + fprintf(file, "];\n"); + + fprintf(file, "v=["); + for(int it = 0; it < max_elems; it++) + // fprintf(file, "%22.16e+%22.16ei; ", stM->values[it].real(), stM->values[it].imag()); + fprintf(file, "%.6g+%.6gi; ", stM->values[it].real(), stM->values[it].imag()); + fprintf(file, "];\n"); + +#else + for(int it = 0; it < max_elems; it++) { + // fprintf(file, + // "[%3d,%3d] = %.6g+%.6gi\n", + // stM->irow[it]+1, + // stM->jcol[it]+1, + // stM->values[it].real(), + // stM->values[it].imag()); + ss << "[" << stM->irow[it] + 1 << "," << stM->jcol[it] + 1 << " = " << std::setprecision(6) << stM->values[it].real() + << "+" << stM->values[it].imag() << ";" << std::endl; + } + fprintf(file, "%s", ss.str().c_str()); +#endif + } } -}//end namespace +} // namespace hiop diff --git a/src/LinAlg/hiopMatrixComplexSparseTriplet.hpp b/src/LinAlg/hiopMatrixComplexSparseTriplet.hpp index 5e5a904c5..0096a02a9 100644 --- a/src/LinAlg/hiopMatrixComplexSparseTriplet.hpp +++ b/src/LinAlg/hiopMatrixComplexSparseTriplet.hpp @@ -7,234 +7,222 @@ namespace hiop { - /** Sparse matrix of complex numbers in triplet format - it is not distributed - * - * Note: most methods expect (i,j) ordered: first on rows 'i' and then on cols 'j'. The - * class hiopMatrixSparseTripletStorage offers this functionality. +/** Sparse matrix of complex numbers in triplet format - it is not distributed + * + * Note: most methods expect (i,j) ordered: first on rows 'i' and then on cols 'j'. The + * class hiopMatrixSparseTripletStorage offers this functionality. + * + * Existing limitations: this class is mostly used as storage both for symmetric and + * rectangular matrices. Some of the ("not yet implemented") methods are ambiguous + * or simply cannot be implemented without i. having this class specialized for + * rectangular matrices and ii. derive a new specialization for symmetric matrices. + */ +/* + Note: the following methods of hiopMatrix are NOT + implemented in this class: + - copyRowsFrom + - timesVec + - transTimesVec + - timesMat + - transTimesMat + - timesMatTrans + - addDiagonal (both overloads) + - addSubDiagonal (all three overloads) + - addMatrix + - addToSymDenseMatrixUpperTriangle + - transAddToSymDenseMatrixUpperTriangle + - addUpperTriangleToSymDenseMatrixUpperTriangle + - isfinite + - assertSymmetry +*/ +class hiopMatrixComplexSparseTriplet : public hiopMatrix +{ +public: + hiopMatrixComplexSparseTriplet(int rows, int cols, int nnz); + virtual ~hiopMatrixComplexSparseTriplet(); + + virtual hiopMatrix* alloc_clone() const; + virtual hiopMatrix* new_copy() const; + + virtual void setToZero(); + virtual void setToConstant(double c); + virtual void setToConstant(std::complex c); + + void copyRowsFrom(const hiopMatrix& src, const index_type* rows_idxs, size_type n_rows) + { + assert(false && "not yet implemented"); + } + + /** y = beta * y + alpha * this * x */ + virtual void timesVec(double beta, hiopVector& y, double alpha, const hiopVector& x) const + { + assert(false && "not yet implemented"); + } + /** y = beta * y + alpha * this * x + */ + virtual void timesVec(double beta, std::complex* y, double alpha, const std::complex* x) const; + + /** y = beta * y + alpha * this^T * x */ + virtual void transTimesVec(double beta, hiopVector& y, double alpha, const hiopVector& x) const + { + assert(false && "not yet implemented"); + } + + /* W = beta*W + alpha*this*X */ + virtual void timesMat(double beta, hiopMatrix& W, double alpha, const hiopMatrix& X) const + { + assert(false && "not yet implemented"); + } + + /* W = beta*W + alpha*this^T*X * - * Existing limitations: this class is mostly used as storage both for symmetric and - * rectangular matrices. Some of the ("not yet implemented") methods are ambiguous - * or simply cannot be implemented without i. having this class specialized for - * rectangular matrices and ii. derive a new specialization for symmetric matrices. + * Only supports W and X of the type 'hiopMatrixComplexDense' */ - /* - Note: the following methods of hiopMatrix are NOT - implemented in this class: - - copyRowsFrom - - timesVec - - transTimesVec - - timesMat - - transTimesMat - - timesMatTrans - - addDiagonal (both overloads) - - addSubDiagonal (all three overloads) - - addMatrix - - addToSymDenseMatrixUpperTriangle - - transAddToSymDenseMatrixUpperTriangle - - addUpperTriangleToSymDenseMatrixUpperTriangle - - isfinite - - assertSymmetry - */ - class hiopMatrixComplexSparseTriplet : public hiopMatrix + virtual void transTimesMat(double beta, hiopMatrix& W, double alpha, const hiopMatrix& X) const; + + /* W = beta*W + alpha*this*X^T */ + virtual void timesMatTrans(double beta, hiopMatrix& W, double alpha, const hiopMatrix& X) const { - public: - hiopMatrixComplexSparseTriplet(int rows, int cols, int nnz); - virtual ~hiopMatrixComplexSparseTriplet(); - - virtual hiopMatrix* alloc_clone() const; - virtual hiopMatrix* new_copy() const; - - virtual void setToZero(); - virtual void setToConstant(double c); - virtual void setToConstant(std::complex c); - - void copyRowsFrom(const hiopMatrix& src, const index_type* rows_idxs, size_type n_rows) - { - assert(false && "not yet implemented"); - } - - /** y = beta * y + alpha * this * x */ - virtual void timesVec(double beta, hiopVector& y, - double alpha, const hiopVector& x ) const - { - assert(false && "not yet implemented"); - } - /** y = beta * y + alpha * this * x - */ - virtual void timesVec(double beta, std::complex* y, - double alpha, const std::complex* x ) const; - - /** y = beta * y + alpha * this^T * x */ - virtual void transTimesVec(double beta, hiopVector& y, - double alpha, const hiopVector& x ) const - { - assert(false && "not yet implemented"); - } - - /* W = beta*W + alpha*this*X */ - virtual void timesMat(double beta, hiopMatrix& W, double alpha, const hiopMatrix& X) const - { - assert(false && "not yet implemented"); - } - - /* W = beta*W + alpha*this^T*X - * - * Only supports W and X of the type 'hiopMatrixComplexDense' - */ - virtual void transTimesMat(double beta, hiopMatrix& W, double alpha, const hiopMatrix& X) const; - - /* W = beta*W + alpha*this*X^T */ - virtual void timesMatTrans(double beta, hiopMatrix& W, double alpha, const hiopMatrix& X) const - { - assert(false && "not yet implemented"); - } - - /* this += alpha * (sub)diag */ - virtual void addDiagonal(const double& alpha, const hiopVector& d_) - { - assert(false && "not yet implemented"); - } - - virtual void addDiagonal(const double& value) - { - assert(false && "not yet implemented"); - } - - virtual void addSubDiagonal(const double& alpha, index_type start_on_dest_diag, const hiopVector& d_) - { - assert(false && "not yet implemented"); - } - - /* add to the diagonal of 'this' (destination) starting at 'start_on_dest_diag' elements of - * 'd_' (source) starting at index 'start_on_src_vec'. The number of elements added is 'num_elems' - * when num_elems>=0, or the remaining elems on 'd_' starting at 'start_on_src_vec'. */ - virtual void addSubDiagonal(int start_on_dest_diag, const double& alpha, - const hiopVector& d_, int start_on_src_vec, int num_elems=-1) - { - assert(false && "not yet implemented"); - } - virtual void addSubDiagonal(int start_on_dest_diag, int num_elems, const double& c) - { - assert(false && "not yet implemented"); - } - - - /* this += alpha*X */ - virtual void addMatrix(double alpha, const hiopMatrix& X) - { - assert(false && "not yet implemented"); - } - - - /* block of W += alpha*this - * For efficiency, only upper triangular matrix is updated since this will be eventually sent to LAPACK - * Preconditions: - * 1. 'this' has to fit in the upper triangle of W - * 2. W.n() == W.m() - */ - virtual void addToSymDenseMatrixUpperTriangle(int row_dest_start, int col_dest_start, - double alpha, hiopMatrixDense& W) const - { - assert(false && "not yet implemented"); - } - - /* block of W += alpha*transpose(this) - * For efficiency, only upper triangular matrix is updated since this will be eventually sent to LAPACK - * Preconditions: - * 1. transpose of 'this' has to fit in the upper triangle of W - * 2. W.n() == W.m() - * - */ - virtual void transAddToSymDenseMatrixUpperTriangle(int row_dest_start, int col_dest_start, - double alpha, hiopMatrixDense& W) const - { - assert(false && "not yet implemented"); - } - - /* diagonal block of W += alpha*this with 'diag_start' indicating the diagonal entry of W where - * 'this' should start to contribute. - * - * For efficiency, only upper triangle of W is updated since this will be eventually sent to LAPACK - * and only the upper triangle of 'this' is accessed - * - * Preconditions: - * 1. this->n()==this-m() - * 2. W.n() == W.m() - */ - virtual void addUpperTriangleToSymDenseMatrixUpperTriangle(int diag_start, - double alpha, hiopMatrixDense& W) const - { - assert(false && "not yet implemented"); - } - - virtual double max_abs_value(); - - virtual void row_max_abs_value(hiopVector &ret_vec){assert(0&&"not yet");} - - virtual void scale_row(hiopVector &vec_scal, const bool inv_scale){assert(0&&"not yet");} - - /* return false is any of the entry is a nan, inf, or denormalized */ - virtual bool isfinite() const - { - assert(false && "not yet implemented"); - return false; - } - - /* call with -1 to print all rows, all columns, or on all ranks; otherwise will - * will print the first rows and/or columns on the specified rank. - * - * If the underlying matrix is sparse, maxCols is ignored and a max number elements - * given by the value of 'maxRows' will be printed. If this value is negative, all - * elements will be printed. - */ - virtual void print(FILE* f=NULL, const char* msg=NULL, - int maxRows=-1, int maxCols=-1, int rank=-1) const; - - /* number of rows */ - virtual size_type m() const { return stM->m(); } - /* number of columns */ - virtual size_type n() const { return stM->n(); } + assert(false && "not yet implemented"); + } + + /* this += alpha * (sub)diag */ + virtual void addDiagonal(const double& alpha, const hiopVector& d_) { assert(false && "not yet implemented"); } + + virtual void addDiagonal(const double& value) { assert(false && "not yet implemented"); } + + virtual void addSubDiagonal(const double& alpha, index_type start_on_dest_diag, const hiopVector& d_) + { + assert(false && "not yet implemented"); + } + + /* add to the diagonal of 'this' (destination) starting at 'start_on_dest_diag' elements of + * 'd_' (source) starting at index 'start_on_src_vec'. The number of elements added is 'num_elems' + * when num_elems>=0, or the remaining elems on 'd_' starting at 'start_on_src_vec'. */ + virtual void addSubDiagonal(int start_on_dest_diag, + const double& alpha, + const hiopVector& d_, + int start_on_src_vec, + int num_elems = -1) + { + assert(false && "not yet implemented"); + } + virtual void addSubDiagonal(int start_on_dest_diag, int num_elems, const double& c) + { + assert(false && "not yet implemented"); + } + + /* this += alpha*X */ + virtual void addMatrix(double alpha, const hiopMatrix& X) { assert(false && "not yet implemented"); } + + /* block of W += alpha*this + * For efficiency, only upper triangular matrix is updated since this will be eventually sent to LAPACK + * Preconditions: + * 1. 'this' has to fit in the upper triangle of W + * 2. W.n() == W.m() + */ + virtual void addToSymDenseMatrixUpperTriangle(int row_dest_start, + int col_dest_start, + double alpha, + hiopMatrixDense& W) const + { + assert(false && "not yet implemented"); + } + + /* block of W += alpha*transpose(this) + * For efficiency, only upper triangular matrix is updated since this will be eventually sent to LAPACK + * Preconditions: + * 1. transpose of 'this' has to fit in the upper triangle of W + * 2. W.n() == W.m() + * + */ + virtual void transAddToSymDenseMatrixUpperTriangle(int row_dest_start, + int col_dest_start, + double alpha, + hiopMatrixDense& W) const + { + assert(false && "not yet implemented"); + } + + /* diagonal block of W += alpha*this with 'diag_start' indicating the diagonal entry of W where + * 'this' should start to contribute. + * + * For efficiency, only upper triangle of W is updated since this will be eventually sent to LAPACK + * and only the upper triangle of 'this' is accessed + * + * Preconditions: + * 1. this->n()==this-m() + * 2. W.n() == W.m() + */ + virtual void addUpperTriangleToSymDenseMatrixUpperTriangle(int diag_start, double alpha, hiopMatrixDense& W) const + { + assert(false && "not yet implemented"); + } + + virtual double max_abs_value(); + + virtual void row_max_abs_value(hiopVector& ret_vec) { assert(0 && "not yet"); } + + virtual void scale_row(hiopVector& vec_scal, const bool inv_scale) { assert(0 && "not yet"); } + + /* return false is any of the entry is a nan, inf, or denormalized */ + virtual bool isfinite() const + { + assert(false && "not yet implemented"); + return false; + } + + /* call with -1 to print all rows, all columns, or on all ranks; otherwise will + * will print the first rows and/or columns on the specified rank. + * + * If the underlying matrix is sparse, maxCols is ignored and a max number elements + * given by the value of 'maxRows' will be printed. If this value is negative, all + * elements will be printed. + */ + virtual void print(FILE* f = NULL, const char* msg = NULL, int maxRows = -1, int maxCols = -1, int rank = -1) const; + + /* number of rows */ + virtual size_type m() const { return stM->m(); } + /* number of columns */ + virtual size_type n() const { return stM->n(); } #ifdef HIOP_DEEPCHECKS - /* check symmetry */ - virtual bool assertSymmetry(double tol=1e-16) const - { - assert(false && "not yet implemented"); - return false; - } + /* check symmetry */ + virtual bool assertSymmetry(double tol = 1e-16) const + { + assert(false && "not yet implemented"); + return false; + } #endif - // these are not part of the hiopMatrix - - //Builds/extracts submatrix nrows x ncols with rows and cols specified by row_idxs and cols_idx - //Assumes - // - 'this' is unsymmetric - // - 'row_idxs' and 'col_idxs' are ordered - hiopMatrixComplexSparseTriplet* new_slice(const int* row_idxs, int nrows, - const int* col_idxs, int ncols) const; - - - //Builds/extracts submatrix nrows x ncols with rows and cols specified by row_idxs and cols_idx - //Assumes - // - 'this' is symmetric (only upper triangle is stored) - // - 'row_idxs' and 'col_idxs' are ordered - hiopMatrixComplexSparseTriplet* new_sliceFromSym(const int* row_idxs, int nrows, - const int* col_idxs, int ncols) const; - - //Extracts a symmetric matrix (for which only the upper triangle is stored) - //Assumes - // - 'this' is symmetric (only upper triangle is stored) - // - 'row_col_idxs' is ordered - hiopMatrixComplexSparseTriplet* new_sliceFromSymToSym(const int* row_col_idxs, int ndim) const; - - - inline void copyFrom(const int* irow_, const int* jcol_, const std::complex* values_) - { - stM->copyFrom(irow_, jcol_, values_); - } - inline size_type numberOfNonzeros() const { return stM->numberOfNonzeros(); } - inline hiopMatrixSparseTripletStorage >* storage() const { return stM; } - private: - hiopMatrixSparseTripletStorage > *stM; - }; -} //end of namespace + // these are not part of the hiopMatrix + + // Builds/extracts submatrix nrows x ncols with rows and cols specified by row_idxs and cols_idx + // Assumes + // - 'this' is unsymmetric + // - 'row_idxs' and 'col_idxs' are ordered + hiopMatrixComplexSparseTriplet* new_slice(const int* row_idxs, int nrows, const int* col_idxs, int ncols) const; + + // Builds/extracts submatrix nrows x ncols with rows and cols specified by row_idxs and cols_idx + // Assumes + // - 'this' is symmetric (only upper triangle is stored) + // - 'row_idxs' and 'col_idxs' are ordered + hiopMatrixComplexSparseTriplet* new_sliceFromSym(const int* row_idxs, int nrows, const int* col_idxs, int ncols) const; + + // Extracts a symmetric matrix (for which only the upper triangle is stored) + // Assumes + // - 'this' is symmetric (only upper triangle is stored) + // - 'row_col_idxs' is ordered + hiopMatrixComplexSparseTriplet* new_sliceFromSymToSym(const int* row_col_idxs, int ndim) const; + + inline void copyFrom(const int* irow_, const int* jcol_, const std::complex* values_) + { + stM->copyFrom(irow_, jcol_, values_); + } + inline size_type numberOfNonzeros() const { return stM->numberOfNonzeros(); } + inline hiopMatrixSparseTripletStorage >* storage() const { return stM; } + +private: + hiopMatrixSparseTripletStorage >* stM; +}; +} // namespace hiop #endif diff --git a/src/LinAlg/hiopMatrixDense.hpp b/src/LinAlg/hiopMatrixDense.hpp index 51c25b50d..9bf2b9dd0 100644 --- a/src/LinAlg/hiopMatrixDense.hpp +++ b/src/LinAlg/hiopMatrixDense.hpp @@ -3,47 +3,47 @@ // Written by Cosmin G. Petra, petra1@llnl.gov. // LLNL-CODE-742473. All rights reserved. // -// This file is part of HiOp. For details, see https://github.com/LLNL/hiop. HiOp -// is released under the BSD 3-clause license (https://opensource.org/licenses/BSD-3-Clause). +// This file is part of HiOp. For details, see https://github.com/LLNL/hiop. HiOp +// is released under the BSD 3-clause license (https://opensource.org/licenses/BSD-3-Clause). // Please also read “Additional BSD Notice” below. // -// Redistribution and use in source and binary forms, with or without modification, +// Redistribution and use in source and binary forms, with or without modification, // are permitted provided that the following conditions are met: -// i. Redistributions of source code must retain the above copyright notice, this list +// i. Redistributions of source code must retain the above copyright notice, this list // of conditions and the disclaimer below. -// ii. Redistributions in binary form must reproduce the above copyright notice, -// this list of conditions and the disclaimer (as noted below) in the documentation and/or +// ii. Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the disclaimer (as noted below) in the documentation and/or // other materials provided with the distribution. -// iii. Neither the name of the LLNS/LLNL nor the names of its contributors may be used to -// endorse or promote products derived from this software without specific prior written +// iii. Neither the name of the LLNS/LLNL nor the names of its contributors may be used to +// endorse or promote products derived from this software without specific prior written // permission. // -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES -// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT -// SHALL LAWRENCE LIVERMORE NATIONAL SECURITY, LLC, THE U.S. DEPARTMENT OF ENERGY OR -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS -// OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED -// AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT +// SHALL LAWRENCE LIVERMORE NATIONAL SECURITY, LLC, THE U.S. DEPARTMENT OF ENERGY OR +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +// OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED +// AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, // EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Additional BSD Notice -// 1. This notice is required to be provided under our contract with the U.S. Department -// of Energy (DOE). This work was produced at Lawrence Livermore National Laboratory under +// 1. This notice is required to be provided under our contract with the U.S. Department +// of Energy (DOE). This work was produced at Lawrence Livermore National Laboratory under // Contract No. DE-AC52-07NA27344 with the DOE. -// 2. Neither the United States Government nor Lawrence Livermore National Security, LLC -// nor any of their employees, makes any warranty, express or implied, or assumes any -// liability or responsibility for the accuracy, completeness, or usefulness of any +// 2. Neither the United States Government nor Lawrence Livermore National Security, LLC +// nor any of their employees, makes any warranty, express or implied, or assumes any +// liability or responsibility for the accuracy, completeness, or usefulness of any // information, apparatus, product, or process disclosed, or represents that its use would // not infringe privately-owned rights. -// 3. Also, reference herein to any specific commercial products, process, or services by -// trade name, trademark, manufacturer or otherwise does not necessarily constitute or -// imply its endorsement, recommendation, or favoring by the United States Government or -// Lawrence Livermore National Security, LLC. The views and opinions of authors expressed -// herein do not necessarily state or reflect those of the United States Government or -// Lawrence Livermore National Security, LLC, and shall not be used for advertising or +// 3. Also, reference herein to any specific commercial products, process, or services by +// trade name, trademark, manufacturer or otherwise does not necessarily constitute or +// imply its endorsement, recommendation, or favoring by the United States Government or +// Lawrence Livermore National Security, LLC. The views and opinions of authors expressed +// herein do not necessarily state or reflect those of the United States Government or +// Lawrence Livermore National Security, LLC, and shall not be used for advertising or // product endorsement purposes. #pragma once @@ -61,96 +61,130 @@ class hiopMatrixDense : public hiopMatrix { public: hiopMatrixDense(const size_type& m, const size_type& glob_n, MPI_Comm comm = MPI_COMM_SELF) - : m_local_(m) - , n_global_(glob_n) - , comm_(comm) - { - } - virtual ~hiopMatrixDense() - { - } + : m_local_(m), + n_global_(glob_n), + comm_(comm) + {} + virtual ~hiopMatrixDense() {} - virtual void setToZero(){assert(false && "not implemented in base class");} - virtual void setToConstant(double c){assert(false && "not implemented in base class");} - virtual void copyFrom(const hiopMatrixDense& dm){assert(false && "not implemented in base class");} - virtual void copyFrom(const double* buffer){assert(false && "not implemented in base class");} + virtual void setToZero() { assert(false && "not implemented in base class"); } + virtual void setToConstant(double c) { assert(false && "not implemented in base class"); } + virtual void copyFrom(const hiopMatrixDense& dm) { assert(false && "not implemented in base class"); } + virtual void copyFrom(const double* buffer) { assert(false && "not implemented in base class"); } /** * @brief copy to a double array - * + * * @pre double array is big enough to hold all the values from this dense matrix */ - virtual void copy_to(double* buffer){assert(false && "not implemented in base class");} + virtual void copy_to(double* buffer) { assert(false && "not implemented in base class"); } - virtual void timesVec(double beta, hiopVector& y, - double alpha, const hiopVector& x) const{assert(false && "not implemented in base class");} + virtual void timesVec(double beta, hiopVector& y, double alpha, const hiopVector& x) const + { + assert(false && "not implemented in base class"); + } /* same as above for mostly internal use - avoid using it */ - virtual void timesVec(double beta, double* y, - double alpha, const double* x) const{assert(false && "not implemented in base class");} + virtual void timesVec(double beta, double* y, double alpha, const double* x) const + { + assert(false && "not implemented in base class"); + } - virtual void transTimesVec(double beta, hiopVector& y, - double alpha, const hiopVector& x) const{assert(false && "not implemented in base class");} + virtual void transTimesVec(double beta, hiopVector& y, double alpha, const hiopVector& x) const + { + assert(false && "not implemented in base class"); + } /* same as above for mostly for internal use - avoid using it */ - virtual void transTimesVec(double beta, double* y, - double alpha, const double* x) const{assert(false && "not implemented in base class");} + virtual void transTimesVec(double beta, double* y, double alpha, const double* x) const + { + assert(false && "not implemented in base class"); + } /** - * @brief W = beta*W + alpha*this*X + * @brief W = beta*W + alpha*this*X * - * @pre W, 'this', and 'X' need to be local matrices (not distributed). All multiplications - * of distributed matrices needed by HiOp internally can be done efficiently in parallel using the + * @pre W, 'this', and 'X' need to be local matrices (not distributed). All multiplications + * of distributed matrices needed by HiOp internally can be done efficiently in parallel using the * 'timesMatTrans' and 'transTimesMat' methods below. - */ - virtual void timesMat(double beta, hiopMatrix& W, double alpha, const hiopMatrix& X) const{assert(false && "not implemented in base class");} - + */ + virtual void timesMat(double beta, hiopMatrix& W, double alpha, const hiopMatrix& X) const + { + assert(false && "not implemented in base class"); + } + /** - * @brief W = beta*W + alpha*this*X + * @brief W = beta*W + alpha*this*X * Contains the implementation internals of the above; can be used on its own. */ - virtual void timesMat_local(double beta, hiopMatrix& W, double alpha, const hiopMatrix& X) const{assert(false && "not implemented in base class");} + virtual void timesMat_local(double beta, hiopMatrix& W, double alpha, const hiopMatrix& X) const + { + assert(false && "not implemented in base class"); + } /** - * @brief W = beta*W + alpha*this^T*X - * + * @brief W = beta*W + alpha*this^T*X + * * @pre 'this' should be local/non-distributed. 'X' (and 'W') can be distributed. * * Note: no inter-process communication occurs in the parallel case */ - virtual void transTimesMat(double beta, hiopMatrix& W, double alpha, const hiopMatrix& X) const{assert(false && "not implemented in base class");} + virtual void transTimesMat(double beta, hiopMatrix& W, double alpha, const hiopMatrix& X) const + { + assert(false && "not implemented in base class"); + } - /** - * @brief W = beta*W + alpha*this*X^T + /** + * @brief W = beta*W + alpha*this*X^T * @pre 'W' need to be local/non-distributed. * * 'this' and 'X' can be distributed, in which case communication will occur. */ - virtual void timesMatTrans(double beta, hiopMatrix& W, double alpha, const hiopMatrix& X) const{assert(false && "not implemented in base class");} + virtual void timesMatTrans(double beta, hiopMatrix& W, double alpha, const hiopMatrix& X) const + { + assert(false && "not implemented in base class"); + } /* Contains dgemm wrapper needed by the above */ - virtual void timesMatTrans_local(double beta, hiopMatrix& W, double alpha, const hiopMatrix& X) const{assert(false && "not implemented in base class");} + virtual void timesMatTrans_local(double beta, hiopMatrix& W, double alpha, const hiopMatrix& X) const + { + assert(false && "not implemented in base class"); + } + + virtual void addDiagonal(const double& alpha, const hiopVector& d_) { assert(false && "not implemented in base class"); } + virtual void addDiagonal(const double& value) { assert(false && "not implemented in base class"); } + virtual void addSubDiagonal(const double& alpha, index_type start_on_dest_diag, const hiopVector& d_) + { + assert(false && "not implemented in base class"); + } - virtual void addDiagonal(const double& alpha, const hiopVector& d_){assert(false && "not implemented in base class");} - virtual void addDiagonal(const double& value){assert(false && "not implemented in base class");} - virtual void addSubDiagonal(const double& alpha, index_type start_on_dest_diag, const hiopVector& d_){assert(false && "not implemented in base class");} - - /** + /** * @brief add to the diagonal of 'this' (destination) starting at 'start_on_dest_diag' elements of - * 'd_' (source) starting at index 'start_on_src_vec'. The number of elements added is 'num_elems' + * 'd_' (source) starting at index 'start_on_src_vec'. The number of elements added is 'num_elems' * when num_elems>=0, or the remaining elems on 'd_' starting at 'start_on_src_vec'. */ - virtual void addSubDiagonal(int start_on_dest_diag, const double& alpha, - const hiopVector& d_, int start_on_src_vec, int num_elems=-1){assert(false && "not implemented in base class");} - virtual void addSubDiagonal(int start_on_dest_diag, int num_elems, const double& c){assert(false && "not implemented in base class");} - - virtual void addMatrix(double alpha, const hiopMatrix& X){assert(false && "not implemented in base class");} + virtual void addSubDiagonal(int start_on_dest_diag, + const double& alpha, + const hiopVector& d_, + int start_on_src_vec, + int num_elems = -1) + { + assert(false && "not implemented in base class"); + } + virtual void addSubDiagonal(int start_on_dest_diag, int num_elems, const double& c) + { + assert(false && "not implemented in base class"); + } + + virtual void addMatrix(double alpha, const hiopMatrix& X) { assert(false && "not implemented in base class"); } /** * @brief block of W += alpha*transpose(this) * For efficiency, only upper triangular matrix is updated since this will be eventually sent to LAPACK * - * @pre transpose of 'this' has to fit in the upper triangle of W + * @pre transpose of 'this' has to fit in the upper triangle of W * @pre W.n() == W.m() */ - virtual void transAddToSymDenseMatrixUpperTriangle(int row_dest_start, int col_dest_start, - double alpha, hiopMatrixDense& W) const + virtual void transAddToSymDenseMatrixUpperTriangle(int row_dest_start, + int col_dest_start, + double alpha, + hiopMatrixDense& W) const { assert(false && "not implemented in base class"); } @@ -158,88 +192,127 @@ class hiopMatrixDense : public hiopMatrix /** * @brief diagonal block of W += alpha*this with 'diag_start' indicating the diagonal entry of W where * 'this' should start to contribute. - * + * * For efficiency, only upper triangle of W is updated since this will be eventually sent to LAPACK * and only the upper triangle of 'this' is accessed - * + * * @pre this->n()==this->m() * @pre W.n() == W.m() */ - virtual void addUpperTriangleToSymDenseMatrixUpperTriangle(int diag_start, - double alpha, hiopMatrixDense& W) const + virtual void addUpperTriangleToSymDenseMatrixUpperTriangle(int diag_start, double alpha, hiopMatrixDense& W) const + { + assert(false && "not implemented in base class"); + } + + virtual double max_abs_value() { assert(false && "not implemented in base class"); + return -1.0; } - virtual double max_abs_value(){assert(false && "not implemented in base class"); return -1.0;} + virtual void row_max_abs_value(hiopVector& ret_vec) { assert(false && "not implemented in base class"); } - virtual void row_max_abs_value(hiopVector &ret_vec){assert(false && "not implemented in base class");} + virtual void scale_row(hiopVector& vec_scal, const bool inv_scale) { assert(false && "not implemented in base class"); } - virtual void scale_row(hiopVector &vec_scal, const bool inv_scale){assert(false && "not implemented in base class");} + virtual bool isfinite() const + { + assert(false && "not implemented in base class"); + return false; + } - virtual bool isfinite() const{assert(false && "not implemented in base class"); return false;} - - virtual void print(FILE* f=NULL, const char* msg=NULL, int maxRows=-1, int maxCols=-1, int rank=-1) const + virtual void print(FILE* f = NULL, const char* msg = NULL, int maxRows = -1, int maxCols = -1, int rank = -1) const { assert(false && "not implemented in base class"); } - virtual hiopMatrixDense* alloc_clone() const=0; - virtual hiopMatrixDense* new_copy() const=0; + virtual hiopMatrixDense* alloc_clone() const = 0; + virtual hiopMatrixDense* new_copy() const = 0; - virtual void appendRow(const hiopVector& row){assert(false && "not implemented in base class");} + virtual void appendRow(const hiopVector& row) { assert(false && "not implemented in base class"); } /// @brief copies the first 'num_rows' rows from 'src' to 'this' starting at 'row_dest' - virtual void copyRowsFrom(const hiopMatrixDense& src, int num_rows, int row_dest) + virtual void copyRowsFrom(const hiopMatrixDense& src, int num_rows, int row_dest) { assert(false && "not implemented in base class"); } - + /** * @brief Copy 'n_rows' rows specified by 'rows_idxs' (array of size 'n_rows') from 'src' to 'this' - * + * * @pre 'this' has exactly 'n_rows' rows * @pre 'src' and 'this' must have same number of columns * @pre number of rows in 'src' must be at least the number of rows in 'this' */ - virtual void copyRowsFrom(const hiopMatrix& src_gen, const index_type* rows_idxs, size_type n_rows){assert(false && "not implemented in base class");} - + virtual void copyRowsFrom(const hiopMatrix& src_gen, const index_type* rows_idxs, size_type n_rows) + { + assert(false && "not implemented in base class"); + } + /// @brief copies 'src' into this as a block starting at (i_block_start,j_block_start) - virtual void copyBlockFromMatrix(const index_type i_block_start, const index_type j_block_start, - const hiopMatrixDense& src){assert(false && "not implemented in base class");} - + virtual void copyBlockFromMatrix(const index_type i_block_start, + const index_type j_block_start, + const hiopMatrixDense& src) + { + assert(false && "not implemented in base class"); + } + /** - * @brief overwrites 'this' with 'src''s block that starts at (i_src_block_start,j_src_block_start) + * @brief overwrites 'this' with 'src''s block that starts at (i_src_block_start,j_src_block_start) * and has dimensions of 'this' */ - virtual void copyFromMatrixBlock(const hiopMatrixDense& src, const int i_src_block_start, const int j_src_block_start){assert(false && "not implemented in base class");} + virtual void copyFromMatrixBlock(const hiopMatrixDense& src, const int i_src_block_start, const int j_src_block_start) + { + assert(false && "not implemented in base class"); + } /// @brief shift<0 -> up; shift>0 -> down - virtual void shiftRows(size_type shift){assert(false && "not implemented in base class");} - virtual void replaceRow(index_type row, const hiopVector& vec){assert(false && "not implemented in base class");} + virtual void shiftRows(size_type shift) { assert(false && "not implemented in base class"); } + virtual void replaceRow(index_type row, const hiopVector& vec) { assert(false && "not implemented in base class"); } /// @brief copies row 'irow' in the vector 'row_vec' (sizes should match) - virtual void getRow(index_type irow, hiopVector& row_vec){assert(false && "not implemented in base class");} + virtual void getRow(index_type irow, hiopVector& row_vec) { assert(false && "not implemented in base class"); } /// @brief build Hess for FR problem, from the base problem `Hess`. - virtual void set_Hess_FR(const hiopMatrixDense& Hess, const hiopVector& add_diag_de){assert(false && "not implemented in base class");} + virtual void set_Hess_FR(const hiopMatrixDense& Hess, const hiopVector& add_diag_de) + { + assert(false && "not implemented in base class"); + } - /// @brief build Jac for FR problem, from the base problem `Jac_c` and `Jac_d`. - virtual void set_Jac_FR(const hiopMatrixDense& Jac_c, - const hiopMatrixDense& Jac_d) {assert(false && "not implemented in base class");} + /// @brief build Jac for FR problem, from the base problem `Jac_c` and `Jac_d`. + virtual void set_Jac_FR(const hiopMatrixDense& Jac_c, const hiopMatrixDense& Jac_d) + { + assert(false && "not implemented in base class"); + } #ifdef HIOP_DEEPCHECKS - virtual void overwriteUpperTriangleWithLower(){assert(false && "not implemented in base class");} - virtual void overwriteLowerTriangleWithUpper(){assert(false && "not implemented in base class");} + virtual void overwriteUpperTriangleWithLower() { assert(false && "not implemented in base class"); } + virtual void overwriteLowerTriangleWithUpper() { assert(false && "not implemented in base class"); } #endif - virtual size_type get_local_size_n() const {assert(false && "not implemented in base class"); return -1;} - virtual size_type get_local_size_m() const {assert(false && "not implemented in base class"); return -1;} + virtual size_type get_local_size_n() const + { + assert(false && "not implemented in base class"); + return -1; + } + virtual size_type get_local_size_m() const + { + assert(false && "not implemented in base class"); + return -1; + } virtual MPI_Comm get_mpi_comm() const { return comm_; } - virtual double* local_data_const() const {assert(false && "not implemented in base class"); return nullptr;} - virtual double* local_data() {assert(false && "not implemented in base class"); return nullptr;} + virtual double* local_data_const() const + { + assert(false && "not implemented in base class"); + return nullptr; + } + virtual double* local_data() + { + assert(false && "not implemented in base class"); + return nullptr; + } + public: - virtual size_type m() const {return m_local_;} - virtual size_type n() const {return n_global_;} + virtual size_type m() const { return m_local_; } + virtual size_type n() const { return n_global_; } #ifdef HIOP_DEEPCHECKS - virtual bool assertSymmetry(double tol=1e-16) const + virtual bool assertSymmetry(double tol = 1e-16) const { assert(false && "not implemented in base class"); return true; @@ -249,7 +322,7 @@ class hiopMatrixDense : public hiopMatrix protected: size_type m_local_; - size_type n_global_; //total / global number of columns + size_type n_global_; // total / global number of columns MPI_Comm comm_; int myrank_; int comm_size_; @@ -257,8 +330,7 @@ class hiopMatrixDense : public hiopMatrix protected: hiopMatrixDense() {}; /** copy constructor, for internal/private use only (it doesn't copy the values) */ - hiopMatrixDense(const hiopMatrixDense&){assert(false && "not implemented in base class");} + hiopMatrixDense(const hiopMatrixDense&) { assert(false && "not implemented in base class"); } }; -} // namespace hiop - +} // namespace hiop diff --git a/src/LinAlg/hiopMatrixDenseRaja.hpp b/src/LinAlg/hiopMatrixDenseRaja.hpp index c816095c5..dbc5f6c34 100644 --- a/src/LinAlg/hiopMatrixDenseRaja.hpp +++ b/src/LinAlg/hiopMatrixDenseRaja.hpp @@ -2,47 +2,47 @@ // Produced at the Lawrence Livermore National Laboratory (LLNL). // LLNL-CODE-742473. All rights reserved. // -// This file is part of HiOp. For details, see https://github.com/LLNL/hiop. HiOp -// is released under the BSD 3-clause license (https://opensource.org/licenses/BSD-3-Clause). +// This file is part of HiOp. For details, see https://github.com/LLNL/hiop. HiOp +// is released under the BSD 3-clause license (https://opensource.org/licenses/BSD-3-Clause). // Please also read “Additional BSD Notice” below. // -// Redistribution and use in source and binary forms, with or without modification, +// Redistribution and use in source and binary forms, with or without modification, // are permitted provided that the following conditions are met: -// i. Redistributions of source code must retain the above copyright notice, this list +// i. Redistributions of source code must retain the above copyright notice, this list // of conditions and the disclaimer below. -// ii. Redistributions in binary form must reproduce the above copyright notice, -// this list of conditions and the disclaimer (as noted below) in the documentation and/or +// ii. Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the disclaimer (as noted below) in the documentation and/or // other materials provided with the distribution. -// iii. Neither the name of the LLNS/LLNL nor the names of its contributors may be used to -// endorse or promote products derived from this software without specific prior written +// iii. Neither the name of the LLNS/LLNL nor the names of its contributors may be used to +// endorse or promote products derived from this software without specific prior written // permission. // -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES -// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT -// SHALL LAWRENCE LIVERMORE NATIONAL SECURITY, LLC, THE U.S. DEPARTMENT OF ENERGY OR -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS -// OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED -// AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT +// SHALL LAWRENCE LIVERMORE NATIONAL SECURITY, LLC, THE U.S. DEPARTMENT OF ENERGY OR +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +// OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED +// AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, // EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Additional BSD Notice -// 1. This notice is required to be provided under our contract with the U.S. Department -// of Energy (DOE). This work was produced at Lawrence Livermore National Laboratory under +// 1. This notice is required to be provided under our contract with the U.S. Department +// of Energy (DOE). This work was produced at Lawrence Livermore National Laboratory under // Contract No. DE-AC52-07NA27344 with the DOE. -// 2. Neither the United States Government nor Lawrence Livermore National Security, LLC -// nor any of their employees, makes any warranty, express or implied, or assumes any -// liability or responsibility for the accuracy, completeness, or usefulness of any +// 2. Neither the United States Government nor Lawrence Livermore National Security, LLC +// nor any of their employees, makes any warranty, express or implied, or assumes any +// liability or responsibility for the accuracy, completeness, or usefulness of any // information, apparatus, product, or process disclosed, or represents that its use would // not infringe privately-owned rights. -// 3. Also, reference herein to any specific commercial products, process, or services by -// trade name, trademark, manufacturer or otherwise does not necessarily constitute or -// imply its endorsement, recommendation, or favoring by the United States Government or -// Lawrence Livermore National Security, LLC. The views and opinions of authors expressed -// herein do not necessarily state or reflect those of the United States Government or -// Lawrence Livermore National Security, LLC, and shall not be used for advertising or +// 3. Also, reference herein to any specific commercial products, process, or services by +// trade name, trademark, manufacturer or otherwise does not necessarily constitute or +// imply its endorsement, recommendation, or favoring by the United States Government or +// Lawrence Livermore National Security, LLC. The views and opinions of authors expressed +// herein do not necessarily state or reflect those of the United States Government or +// Lawrence Livermore National Security, LLC, and shall not be used for advertising or // product endorsement purposes. /** @@ -68,9 +68,9 @@ namespace hiop { -/** +/** * @brief Dense matrix stored row-wise and distributed column-wise - * + * * Local methods (not MPI distributed) * timesMat * addDiagonal @@ -83,17 +83,16 @@ namespace hiop * copyBlockFromMatrix * copyFromMatrixBlock */ - + template class hiopMatrixDenseRaja : public hiopMatrixDense { public: - - hiopMatrixDenseRaja(const size_type& m, + hiopMatrixDenseRaja(const size_type& m, const size_type& glob_n, - std::string mem_space, - index_type* col_part = NULL, - MPI_Comm comm = MPI_COMM_SELF, + std::string mem_space, + index_type* col_part = NULL, + MPI_Comm comm = MPI_COMM_SELF, const size_type& m_max_alloc = -1); virtual ~hiopMatrixDenseRaja(); @@ -103,46 +102,42 @@ class hiopMatrixDenseRaja : public hiopMatrixDense virtual void copyFrom(const double* buffer); virtual void copy_to(double* buffer); - virtual void timesVec(double beta, hiopVector& y, - double alpha, const hiopVector& x) const; + virtual void timesVec(double beta, hiopVector& y, double alpha, const hiopVector& x) const; /* same as above for mostly internal use - avoid using it */ - virtual void timesVec(double beta, double* y, - double alpha, const double* x) const; + virtual void timesVec(double beta, double* y, double alpha, const double* x) const; - virtual void transTimesVec(double beta, hiopVector& y, - double alpha, const hiopVector& x) const; + virtual void transTimesVec(double beta, hiopVector& y, double alpha, const hiopVector& x) const; /* same as above for mostly for internal use - avoid using it */ - virtual void transTimesVec(double beta, double* y, - double alpha, const double* x) const; + virtual void transTimesVec(double beta, double* y, double alpha, const double* x) const; /** - * @brief W = beta*W + alpha*this*X + * @brief W = beta*W + alpha*this*X * - * @pre W, 'this', and 'X' need to be local matrices (not distributed). All multiplications - * of distributed matrices needed by HiOp internally can be done efficiently in parallel using the + * @pre W, 'this', and 'X' need to be local matrices (not distributed). All multiplications + * of distributed matrices needed by HiOp internally can be done efficiently in parallel using the * 'timesMatTrans' and 'transTimesMat' methods below. - */ + */ virtual void timesMat(double beta, hiopMatrix& W, double alpha, const hiopMatrix& X) const; - + /** - * @brief W = beta*W + alpha*this*X + * @brief W = beta*W + alpha*this*X * Contains the implementation internals of the above; can be used on its own. */ virtual void timesMat_local(double beta, hiopMatrix& W, double alpha, const hiopMatrix& X) const; /** - * @brief W = beta*W + alpha*this^T*X - * + * @brief W = beta*W + alpha*this^T*X + * * @pre 'this' should be local/non-distributed. 'X' (and 'W') can be distributed. * * Note: no inter-process communication occurs in the parallel case */ virtual void transTimesMat(double beta, hiopMatrix& W, double alpha, const hiopMatrix& X) const; - /** - * @brief W = beta*W + alpha*this*X^T + /** + * @brief W = beta*W + alpha*this*X^T * @pre 'W' need to be local/non-distributed. * * 'this' and 'X' can be distributed, in which case communication will occur. @@ -154,61 +149,67 @@ class hiopMatrixDenseRaja : public hiopMatrixDense virtual void addDiagonal(const double& alpha, const hiopVector& d_); virtual void addDiagonal(const double& value); virtual void addSubDiagonal(const double& alpha, index_type start_on_dest_diag, const hiopVector& d_); - - /** + + /** * @brief add to the diagonal of 'this' (destination) starting at 'start_on_dest_diag' elements of - * 'd_' (source) starting at index 'start_on_src_vec'. The number of elements added is 'num_elems' + * 'd_' (source) starting at index 'start_on_src_vec'. The number of elements added is 'num_elems' * when num_elems>=0, or the remaining elems on 'd_' starting at 'start_on_src_vec'. */ - virtual void addSubDiagonal(int start_on_dest_diag, const double& alpha, - const hiopVector& d_, int start_on_src_vec, int num_elems=-1); + virtual void addSubDiagonal(int start_on_dest_diag, + const double& alpha, + const hiopVector& d_, + int start_on_src_vec, + int num_elems = -1); virtual void addSubDiagonal(int start_on_dest_diag, int num_elems, const double& c); - + virtual void addMatrix(double alpha, const hiopMatrix& X); /** * @brief block of W += alpha*this * For efficiency, only upper triangular matrix is updated since this will be eventually sent to LAPACK * - * @pre 'this' has to fit in the upper triangle of W + * @pre 'this' has to fit in the upper triangle of W * @pre W.n() == W.m() */ - virtual void addToSymDenseMatrixUpperTriangle(int row_dest_start, int col_dest_start, - double alpha, hiopMatrixDense& W) const; + virtual void addToSymDenseMatrixUpperTriangle(int row_dest_start, + int col_dest_start, + double alpha, + hiopMatrixDense& W) const; /** * @brief block of W += alpha*transpose(this) * For efficiency, only upper triangular matrix is updated since this will be eventually sent to LAPACK * - * @pre transpose of 'this' has to fit in the upper triangle of W + * @pre transpose of 'this' has to fit in the upper triangle of W * @pre W.n() == W.m() */ - virtual void transAddToSymDenseMatrixUpperTriangle(int row_dest_start, int col_dest_start, - double alpha, hiopMatrixDense& W) const; + virtual void transAddToSymDenseMatrixUpperTriangle(int row_dest_start, + int col_dest_start, + double alpha, + hiopMatrixDense& W) const; /** * @brief diagonal block of W += alpha*this with 'diag_start' indicating the diagonal entry of W where * 'this' should start to contribute. - * + * * For efficiency, only upper triangle of W is updated since this will be eventually sent to LAPACK * and only the upper triangle of 'this' is accessed - * + * * @pre this->n()==this->m() * @pre W.n() == W.m() */ - virtual void addUpperTriangleToSymDenseMatrixUpperTriangle(int diag_start, - double alpha, hiopMatrixDense& W) const; + virtual void addUpperTriangleToSymDenseMatrixUpperTriangle(int diag_start, double alpha, hiopMatrixDense& W) const; virtual double max_abs_value(); - virtual void row_max_abs_value(hiopVector &ret_vec); + virtual void row_max_abs_value(hiopVector& ret_vec); - virtual void scale_row(hiopVector &vec_scal, const bool inv_scale=false); + virtual void scale_row(hiopVector& vec_scal, const bool inv_scale = false); virtual bool isfinite() const; - - //virtual void print(int maxRows=-1, int maxCols=-1, int rank=-1) const; - virtual void print(FILE* f=NULL, const char* msg=NULL, int maxRows=-1, int maxCols=-1, int rank=-1) const; + + // virtual void print(int maxRows=-1, int maxCols=-1, int rank=-1) const; + virtual void print(FILE* f = NULL, const char* msg = NULL, int maxRows = -1, int maxCols = -1, int rank = -1) const; virtual hiopMatrixDense* alloc_clone() const; virtual hiopMatrixDense* new_copy() const; @@ -217,22 +218,21 @@ class hiopMatrixDenseRaja : public hiopMatrixDense /// @brief copies the first 'num_rows' rows from 'src' to 'this' starting at 'row_dest' void copyRowsFrom(const hiopMatrixDense& src, int num_rows, int row_dest); - + /** * @brief Copy 'n_rows' rows specified by 'rows_idxs' (array of size 'n_rows') from 'src' to 'this' - * + * * @pre 'this' has exactly 'n_rows' rows * @pre 'src' and 'this' must have same number of columns * @pre number of rows in 'src' must be at least the number of rows in 'this' */ void copyRowsFrom(const hiopMatrix& src_gen, const index_type* rows_idxs, size_type n_rows); - + /// @brief copies 'src' into this as a block starting at (i_block_start,j_block_start) - void copyBlockFromMatrix(const index_type i_block_start, const index_type j_block_start, - const hiopMatrixDense& src); - + void copyBlockFromMatrix(const index_type i_block_start, const index_type j_block_start, const hiopMatrixDense& src); + /** - * @brief overwrites 'this' with 'src''s block that starts at (i_src_block_start,j_src_block_start) + * @brief overwrites 'this' with 'src''s block that starts at (i_src_block_start,j_src_block_start) * and has dimensions of 'this' */ void copyFromMatrixBlock(const hiopMatrixDense& src, const int i_src_block_start, const int j_src_block_start); @@ -253,14 +253,15 @@ class hiopMatrixDenseRaja : public hiopMatrixDense virtual size_type get_local_size_m() const { return m_local_; } virtual MPI_Comm get_mpi_comm() const { return comm_; } - inline double* local_data_host() const {return data_host_; } - double* local_data() {return data_dev_; } + inline double* local_data_host() const { return data_host_; } + double* local_data() { return data_dev_; } double* local_data_const() const { return data_dev_; } + public: - virtual size_type m() const {return m_local_;} - virtual size_type n() const {return n_global_;} + virtual size_type m() const { return m_local_; } + virtual size_type n() const { return n_global_; } #ifdef HIOP_DEEPCHECKS - virtual bool assertSymmetry(double tol=1e-16) const; + virtual bool assertSymmetry(double tol = 1e-16) const; #endif virtual bool symmetrize(); @@ -268,32 +269,31 @@ class hiopMatrixDenseRaja : public hiopMatrixDense void copyFromDev(); private: - mutable ExecSpace exec_space_; using MEMBACKENDHOST = typename MEMBACKEND::MemBackendHost; - //EXECPOLICYRAJA is used internally as a execution policy. EXECPOLICYHOST is not used internally - //in this class. EXECPOLICYHOST can be any host policy as long as memory allocations and - //and transfers within and from `exec_space_host_` work with EXECPOLICYHOST (currently all such - //combinations work). + // EXECPOLICYRAJA is used internally as a execution policy. EXECPOLICYHOST is not used internally + // in this class. EXECPOLICYHOST can be any host policy as long as memory allocations and + // and transfers within and from `exec_space_host_` work with EXECPOLICYHOST (currently all such + // combinations work). using EXECPOLICYHOST = hiop::ExecPolicySeq; mutable ExecSpace exec_space_host_; - std::string mem_space_; - double* data_host_; ///< pointer to host mirror of matrix data - double* data_dev_; ///< pointer to memory space of matrix data - int n_local_; ///< local number of columns - size_type glob_jl_; ///< global index of first column in the local data block - size_type glob_ju_; ///< global index of first column in the next data block + double* data_host_; ///< pointer to host mirror of matrix data + double* data_dev_; ///< pointer to memory space of matrix data + int n_local_; ///< local number of columns + size_type glob_jl_; ///< global index of first column in the local data block + size_type glob_ju_; ///< global index of first column in the next data block double* yglob_host_; double* ya_host_; - mutable double* buff_mxnlocal_host_; ///< host data buffer + mutable double* buff_mxnlocal_host_; ///< host data buffer + + // this is very private do not touch :) + size_type max_rows_; - //this is very private do not touch :) - size_type max_rows_; private: hiopMatrixDenseRaja() {}; /** copy constructor, for internal/private use only (it doesn't copy the values) */ @@ -302,4 +302,4 @@ class hiopMatrixDenseRaja : public hiopMatrixDense double* new_mxnlocal_host_buff() const; }; -} // namespace hiop +} // namespace hiop diff --git a/src/LinAlg/hiopMatrixDenseRajaCuda.cpp b/src/LinAlg/hiopMatrixDenseRajaCuda.cpp index 12a99175a..c2809c39b 100644 --- a/src/LinAlg/hiopMatrixDenseRajaCuda.cpp +++ b/src/LinAlg/hiopMatrixDenseRajaCuda.cpp @@ -3,47 +3,47 @@ // Written by Cosmin G. Petra, petra1@llnl.gov. // LLNL-CODE-742473. All rights reserved. // -// This file is part of HiOp. For details, see https://github.com/LLNL/hiop. HiOp -// is released under the BSD 3-clause license (https://opensource.org/licenses/BSD-3-Clause). +// This file is part of HiOp. For details, see https://github.com/LLNL/hiop. HiOp +// is released under the BSD 3-clause license (https://opensource.org/licenses/BSD-3-Clause). // Please also read “Additional BSD Notice” below. // -// Redistribution and use in source and binary forms, with or without modification, +// Redistribution and use in source and binary forms, with or without modification, // are permitted provided that the following conditions are met: -// i. Redistributions of source code must retain the above copyright notice, this list +// i. Redistributions of source code must retain the above copyright notice, this list // of conditions and the disclaimer below. -// ii. Redistributions in binary form must reproduce the above copyright notice, -// this list of conditions and the disclaimer (as noted below) in the documentation and/or +// ii. Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the disclaimer (as noted below) in the documentation and/or // other materials provided with the distribution. -// iii. Neither the name of the LLNS/LLNL nor the names of its contributors may be used to -// endorse or promote products derived from this software without specific prior written +// iii. Neither the name of the LLNS/LLNL nor the names of its contributors may be used to +// endorse or promote products derived from this software without specific prior written // permission. // -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES -// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT -// SHALL LAWRENCE LIVERMORE NATIONAL SECURITY, LLC, THE U.S. DEPARTMENT OF ENERGY OR -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS -// OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED -// AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT +// SHALL LAWRENCE LIVERMORE NATIONAL SECURITY, LLC, THE U.S. DEPARTMENT OF ENERGY OR +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +// OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED +// AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, // EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Additional BSD Notice -// 1. This notice is required to be provided under our contract with the U.S. Department -// of Energy (DOE). This work was produced at Lawrence Livermore National Laboratory under +// 1. This notice is required to be provided under our contract with the U.S. Department +// of Energy (DOE). This work was produced at Lawrence Livermore National Laboratory under // Contract No. DE-AC52-07NA27344 with the DOE. -// 2. Neither the United States Government nor Lawrence Livermore National Security, LLC -// nor any of their employees, makes any warranty, express or implied, or assumes any -// liability or responsibility for the accuracy, completeness, or usefulness of any +// 2. Neither the United States Government nor Lawrence Livermore National Security, LLC +// nor any of their employees, makes any warranty, express or implied, or assumes any +// liability or responsibility for the accuracy, completeness, or usefulness of any // information, apparatus, product, or process disclosed, or represents that its use would // not infringe privately-owned rights. -// 3. Also, reference herein to any specific commercial products, process, or services by -// trade name, trademark, manufacturer or otherwise does not necessarily constitute or -// imply its endorsement, recommendation, or favoring by the United States Government or -// Lawrence Livermore National Security, LLC. The views and opinions of authors expressed -// herein do not necessarily state or reflect those of the United States Government or -// Lawrence Livermore National Security, LLC, and shall not be used for advertising or +// 3. Also, reference herein to any specific commercial products, process, or services by +// trade name, trademark, manufacturer or otherwise does not necessarily constitute or +// imply its endorsement, recommendation, or favoring by the United States Government or +// Lawrence Livermore National Security, LLC. The views and opinions of authors expressed +// herein do not necessarily state or reflect those of the United States Government or +// Lawrence Livermore National Security, LLC, and shall not be used for advertising or // product endorsement purposes. /** @@ -65,7 +65,7 @@ namespace hiop using hiop_raja_exec = ExecRajaPoliciesBackend::hiop_raja_exec; using hiop_raja_reduce = ExecRajaPoliciesBackend::hiop_raja_reduce; using matrix_exec = ExecRajaPoliciesBackend::matrix_exec; -} +} // namespace hiop #include "hiopMatrixDenseRajaImpl.hpp" @@ -73,8 +73,8 @@ namespace hiop { // -//Explicit instantiations: force compilation +// Explicit instantiations: force compilation // template class hiopMatrixDenseRaja; template class hiopMatrixDenseRaja; -} +} // namespace hiop diff --git a/src/LinAlg/hiopMatrixDenseRajaHip.cpp b/src/LinAlg/hiopMatrixDenseRajaHip.cpp index 9d55b8865..5cddca043 100644 --- a/src/LinAlg/hiopMatrixDenseRajaHip.cpp +++ b/src/LinAlg/hiopMatrixDenseRajaHip.cpp @@ -3,47 +3,47 @@ // Written by Cosmin G. Petra, petra1@llnl.gov. // LLNL-CODE-742473. All rights reserved. // -// This file is part of HiOp. For details, see https://github.com/LLNL/hiop. HiOp -// is released under the BSD 3-clause license (https://opensource.org/licenses/BSD-3-Clause). +// This file is part of HiOp. For details, see https://github.com/LLNL/hiop. HiOp +// is released under the BSD 3-clause license (https://opensource.org/licenses/BSD-3-Clause). // Please also read “Additional BSD Notice” below. // -// Redistribution and use in source and binary forms, with or without modification, +// Redistribution and use in source and binary forms, with or without modification, // are permitted provided that the following conditions are met: -// i. Redistributions of source code must retain the above copyright notice, this list +// i. Redistributions of source code must retain the above copyright notice, this list // of conditions and the disclaimer below. -// ii. Redistributions in binary form must reproduce the above copyright notice, -// this list of conditions and the disclaimer (as noted below) in the documentation and/or +// ii. Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the disclaimer (as noted below) in the documentation and/or // other materials provided with the distribution. -// iii. Neither the name of the LLNS/LLNL nor the names of its contributors may be used to -// endorse or promote products derived from this software without specific prior written +// iii. Neither the name of the LLNS/LLNL nor the names of its contributors may be used to +// endorse or promote products derived from this software without specific prior written // permission. // -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES -// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT -// SHALL LAWRENCE LIVERMORE NATIONAL SECURITY, LLC, THE U.S. DEPARTMENT OF ENERGY OR -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS -// OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED -// AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT +// SHALL LAWRENCE LIVERMORE NATIONAL SECURITY, LLC, THE U.S. DEPARTMENT OF ENERGY OR +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +// OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED +// AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, // EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Additional BSD Notice -// 1. This notice is required to be provided under our contract with the U.S. Department -// of Energy (DOE). This work was produced at Lawrence Livermore National Laboratory under +// 1. This notice is required to be provided under our contract with the U.S. Department +// of Energy (DOE). This work was produced at Lawrence Livermore National Laboratory under // Contract No. DE-AC52-07NA27344 with the DOE. -// 2. Neither the United States Government nor Lawrence Livermore National Security, LLC -// nor any of their employees, makes any warranty, express or implied, or assumes any -// liability or responsibility for the accuracy, completeness, or usefulness of any +// 2. Neither the United States Government nor Lawrence Livermore National Security, LLC +// nor any of their employees, makes any warranty, express or implied, or assumes any +// liability or responsibility for the accuracy, completeness, or usefulness of any // information, apparatus, product, or process disclosed, or represents that its use would // not infringe privately-owned rights. -// 3. Also, reference herein to any specific commercial products, process, or services by -// trade name, trademark, manufacturer or otherwise does not necessarily constitute or -// imply its endorsement, recommendation, or favoring by the United States Government or -// Lawrence Livermore National Security, LLC. The views and opinions of authors expressed -// herein do not necessarily state or reflect those of the United States Government or -// Lawrence Livermore National Security, LLC, and shall not be used for advertising or +// 3. Also, reference herein to any specific commercial products, process, or services by +// trade name, trademark, manufacturer or otherwise does not necessarily constitute or +// imply its endorsement, recommendation, or favoring by the United States Government or +// Lawrence Livermore National Security, LLC. The views and opinions of authors expressed +// herein do not necessarily state or reflect those of the United States Government or +// Lawrence Livermore National Security, LLC, and shall not be used for advertising or // product endorsement purposes. /** @@ -65,7 +65,7 @@ namespace hiop using hiop_raja_exec = ExecRajaPoliciesBackend::hiop_raja_exec; using hiop_raja_reduce = ExecRajaPoliciesBackend::hiop_raja_reduce; using matrix_exec = ExecRajaPoliciesBackend::matrix_exec; -} +} // namespace hiop #include "hiopMatrixDenseRajaImpl.hpp" @@ -73,8 +73,8 @@ namespace hiop { // -//Explicit instantiations: force compilation +// Explicit instantiations: force compilation // template class hiopMatrixDenseRaja; template class hiopMatrixDenseRaja; -} +} // namespace hiop diff --git a/src/LinAlg/hiopMatrixDenseRajaImpl.hpp b/src/LinAlg/hiopMatrixDenseRajaImpl.hpp index def5051e5..55b196288 100644 --- a/src/LinAlg/hiopMatrixDenseRajaImpl.hpp +++ b/src/LinAlg/hiopMatrixDenseRajaImpl.hpp @@ -3,47 +3,47 @@ // Written by Cosmin G. Petra, petra1@llnl.gov. // LLNL-CODE-742473. All rights reserved. // -// This file is part of HiOp. For details, see https://github.com/LLNL/hiop. HiOp -// is released under the BSD 3-clause license (https://opensource.org/licenses/BSD-3-Clause). +// This file is part of HiOp. For details, see https://github.com/LLNL/hiop. HiOp +// is released under the BSD 3-clause license (https://opensource.org/licenses/BSD-3-Clause). // Please also read “Additional BSD Notice” below. // -// Redistribution and use in source and binary forms, with or without modification, +// Redistribution and use in source and binary forms, with or without modification, // are permitted provided that the following conditions are met: -// i. Redistributions of source code must retain the above copyright notice, this list +// i. Redistributions of source code must retain the above copyright notice, this list // of conditions and the disclaimer below. -// ii. Redistributions in binary form must reproduce the above copyright notice, -// this list of conditions and the disclaimer (as noted below) in the documentation and/or +// ii. Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the disclaimer (as noted below) in the documentation and/or // other materials provided with the distribution. -// iii. Neither the name of the LLNS/LLNL nor the names of its contributors may be used to -// endorse or promote products derived from this software without specific prior written +// iii. Neither the name of the LLNS/LLNL nor the names of its contributors may be used to +// endorse or promote products derived from this software without specific prior written // permission. // -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES -// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT -// SHALL LAWRENCE LIVERMORE NATIONAL SECURITY, LLC, THE U.S. DEPARTMENT OF ENERGY OR -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS -// OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED -// AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT +// SHALL LAWRENCE LIVERMORE NATIONAL SECURITY, LLC, THE U.S. DEPARTMENT OF ENERGY OR +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +// OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED +// AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, // EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Additional BSD Notice -// 1. This notice is required to be provided under our contract with the U.S. Department -// of Energy (DOE). This work was produced at Lawrence Livermore National Laboratory under +// 1. This notice is required to be provided under our contract with the U.S. Department +// of Energy (DOE). This work was produced at Lawrence Livermore National Laboratory under // Contract No. DE-AC52-07NA27344 with the DOE. -// 2. Neither the United States Government nor Lawrence Livermore National Security, LLC -// nor any of their employees, makes any warranty, express or implied, or assumes any -// liability or responsibility for the accuracy, completeness, or usefulness of any +// 2. Neither the United States Government nor Lawrence Livermore National Security, LLC +// nor any of their employees, makes any warranty, express or implied, or assumes any +// liability or responsibility for the accuracy, completeness, or usefulness of any // information, apparatus, product, or process disclosed, or represents that its use would // not infringe privately-owned rights. -// 3. Also, reference herein to any specific commercial products, process, or services by -// trade name, trademark, manufacturer or otherwise does not necessarily constitute or -// imply its endorsement, recommendation, or favoring by the United States Government or -// Lawrence Livermore National Security, LLC. The views and opinions of authors expressed -// herein do not necessarily state or reflect those of the United States Government or -// Lawrence Livermore National Security, LLC, and shall not be used for advertising or +// 3. Also, reference herein to any specific commercial products, process, or services by +// trade name, trademark, manufacturer or otherwise does not necessarily constitute or +// imply its endorsement, recommendation, or favoring by the United States Government or +// Lawrence Livermore National Security, LLC. The views and opinions of authors expressed +// herein do not necessarily state or reflect those of the United States Government or +// Lawrence Livermore National Security, LLC, and shall not be used for advertising or // product endorsement purposes. /** @@ -68,29 +68,29 @@ namespace hiop { template -hiopMatrixDenseRaja:: -hiopMatrixDenseRaja(const size_type& m, - const size_type& glob_n, - std::string mem_space, - index_type* col_part /* = nullptr */, - MPI_Comm comm /* = MPI_COMM_SELF */, - const size_type& m_max_alloc /* = -1 */) - : hiopMatrixDense(m, glob_n, comm), - exec_space_(ExecSpace(MEMBACKEND(mem_space))), - exec_space_host_(ExecSpace(MEMBACKENDHOST::new_backend_host())), - mem_space_(mem_space), - buff_mxnlocal_host_(nullptr) +hiopMatrixDenseRaja::hiopMatrixDenseRaja(const size_type& m, + const size_type& glob_n, + std::string mem_space, + index_type* col_part /* = nullptr */, + MPI_Comm comm /* = MPI_COMM_SELF */, + const size_type& m_max_alloc /* = -1 */) + : hiopMatrixDense(m, glob_n, comm), + exec_space_(ExecSpace(MEMBACKEND(mem_space))), + exec_space_host_(ExecSpace(MEMBACKENDHOST::new_backend_host())), + mem_space_(mem_space), + buff_mxnlocal_host_(nullptr) { - m_local_ = m; + m_local_ = m; n_global_ = glob_n; - comm_ = comm; - int P = 0; + comm_ = comm; + int P = 0; if(col_part) { #ifdef HIOP_USE_MPI - int ierr = MPI_Comm_rank(comm_, &P); assert(ierr == MPI_SUCCESS); + int ierr = MPI_Comm_rank(comm_, &P); + assert(ierr == MPI_SUCCESS); #endif glob_jl_ = col_part[P]; - glob_ju_ = col_part[P+1]; + glob_ju_ = col_part[P + 1]; } else { glob_jl_ = 0; glob_ju_ = n_global_; @@ -100,19 +100,18 @@ hiopMatrixDenseRaja(const size_type& m, myrank_ = P; max_rows_ = m_max_alloc; - if(max_rows_ == -1) - max_rows_ = m_local_; - assert(max_rows_>=m_local_ && "the requested extra allocation is smaller than the allocation needed by the matrix"); + if(max_rows_ == -1) max_rows_ = m_local_; + assert(max_rows_ >= m_local_ && "the requested extra allocation is smaller than the allocation needed by the matrix"); #ifndef HIOP_USE_GPU - assert(mem_space_ == "HOST"); + assert(mem_space_ == "HOST"); #endif - data_dev_ = exec_space_.template alloc_array(n_local_*max_rows_); + data_dev_ = exec_space_.template alloc_array(n_local_ * max_rows_); if(exec_space_.mem_backend().is_device()) { // If memory space is on device, create a host mirror - data_host_ = exec_space_host_.template alloc_array(n_local_*max_rows_); + data_host_ = exec_space_host_.template alloc_array(n_local_ * max_rows_); yglob_host_ = exec_space_host_.template alloc_array(m_local_); ya_host_ = exec_space_host_.template alloc_array(m_local_); } else { @@ -136,10 +135,10 @@ hiopMatrixDenseRaja::~hiopMatrixDenseRaja() } exec_space_.dealloc_array(data_dev_); - data_host_ = nullptr; - data_dev_ = nullptr; + data_host_ = nullptr; + data_dev_ = nullptr; yglob_host_ = nullptr; - ya_host_ = nullptr; + ya_host_ = nullptr; if(buff_mxnlocal_host_ != nullptr) { exec_space_host_.dealloc_array(buff_mxnlocal_host_); @@ -149,30 +148,30 @@ hiopMatrixDenseRaja::~hiopMatrixDenseRaja() /** * @brief Matrix copy constructor - * + * */ template hiopMatrixDenseRaja::hiopMatrixDenseRaja(const hiopMatrixDenseRaja& dm) { - n_local_ = dm.n_local_; - m_local_ = dm.m_local_; + n_local_ = dm.n_local_; + m_local_ = dm.m_local_; n_global_ = dm.n_global_; - glob_jl_ = dm.glob_jl_; - glob_ju_ = dm.glob_ju_; - comm_ = dm.comm_; - myrank_ = dm.myrank_; + glob_jl_ = dm.glob_jl_; + glob_ju_ = dm.glob_ju_; + comm_ = dm.comm_; + myrank_ = dm.myrank_; max_rows_ = dm.max_rows_; mem_space_ = dm.mem_space_; - data_dev_ = exec_space_.template alloc_array(n_local_*max_rows_); + data_dev_ = exec_space_.template alloc_array(n_local_ * max_rows_); if(exec_space_.mem_backend().is_device()) { // If memory space is on device, create a host mirror - data_host_ = exec_space_host_.template alloc_array(n_local_*max_rows_); + data_host_ = exec_space_host_.template alloc_array(n_local_ * max_rows_); yglob_host_ = exec_space_host_.template alloc_array(m_local_); ya_host_ = exec_space_host_.template alloc_array(m_local_); buff_mxnlocal_host_ = nullptr; if(dm.buff_mxnlocal_host_ != nullptr) { - buff_mxnlocal_host_ = exec_space_host_.template alloc_array(max_rows_*n_local_); + buff_mxnlocal_host_ = exec_space_host_.template alloc_array(max_rows_ * n_local_); } } else { data_host_ = data_dev_; @@ -181,14 +180,14 @@ hiopMatrixDenseRaja::hiopMatrixDenseRaja(const hiopMatr ya_host_ = exec_space_.template alloc_array(m_local_); buff_mxnlocal_host_ = nullptr; if(dm.buff_mxnlocal_host_ != nullptr) { - buff_mxnlocal_host_ = exec_space_.template alloc_array(max_rows_*n_local_); + buff_mxnlocal_host_ = exec_space_.template alloc_array(max_rows_ * n_local_); } } } /** * @brief Appends the contents of an input vector to the row past the end in this matrix. - * + * * @pre The length of the vector must equal this->n_local_ * @pre m_local < max_rows_ */ @@ -196,7 +195,7 @@ template void hiopMatrixDenseRaja::appendRow(const hiopVector& rowvec) { const auto& row = dynamic_cast&>(rowvec); -#ifdef HIOP_DEEPCHECKS +#ifdef HIOP_DEEPCHECKS assert(row.get_local_size() == n_local_); assert(m_local_ < max_rows_ && "no more space to append rows ... should have preallocated more rows."); #endif @@ -205,12 +204,11 @@ void hiopMatrixDenseRaja::appendRow(const hiopVector& r m_local_++; } - /** * @brief Copies the elements of the input matrix to this matrix. - * + * * @param[in] dm - Matrix whose elements will be copied. - * + * * @pre The input and this matrix must have the same size and partitioning. * @post Matrix `dm` is unchanged. * @post Elements of `this` are overwritten @@ -220,18 +218,18 @@ void hiopMatrixDenseRaja::copyFrom(const hiopMatrixDens { const auto& dm = dynamic_cast(dmmat); // Verify sizes and partitions - assert(n_local_ == dm.n_local_ ); - assert(m_local_ == dm.m_local_ ); + assert(n_local_ == dm.n_local_); + assert(m_local_ == dm.m_local_); assert(n_global_ == dm.n_global_); - assert(glob_jl_ == dm.glob_jl_ ); - assert(glob_ju_ == dm.glob_ju_ ); + assert(glob_jl_ == dm.glob_jl_); + assert(glob_ju_ == dm.glob_ju_); - exec_space_.copy(data_dev_, dm.data_dev_, n_local_*max_rows_, dm.exec_space_); + exec_space_.copy(data_dev_, dm.data_dev_, n_local_ * max_rows_, dm.exec_space_); } /** * @brief Copies the elements of `this` matrix to output buffer. - * + * * @pre The input buffer is big enough to hold the entire matrix. * @pre The memory pointed at by the input is in the same execution space as `this`. */ @@ -239,15 +237,15 @@ template void hiopMatrixDenseRaja::copy_to(double* dest) { if(nullptr != dest) { - exec_space_.copy(dest, data_dev_, n_local_*max_rows_); + exec_space_.copy(dest, data_dev_, n_local_ * max_rows_); } } /** * @brief Copies the elements of the input buffer to this matrix. - * + * * @param[in] buffer - The beginning of a matrix - * + * * @pre The input matrix is a pointer to the beginning of a row-major 2D * data block with the same dimensions as this matrix. * @pre The memory pointed at by the input is in the same execution space as `this`. @@ -256,35 +254,33 @@ template void hiopMatrixDenseRaja::copyFrom(const double* src) { if(nullptr != src) { - exec_space_.copy(data_dev_, src, m_local_*n_local_); + exec_space_.copy(data_dev_, src, m_local_ * n_local_); } } /** * @brief Copies rows from a source matrix to this matrix. - * + * * @param[in] src - Matrix whose rows will be copied. * @param[in] num_rows - Number of rows to copy. * @param[in] row_dest - Starting row in this matrix to copy to. - * + * * @pre this->n_global_ == src.n_global_ && this->n_local_ == src.n_local_ * @pre row_dest + num_rows <= this->m_local_ * @pre num_rows <= src.m_local_ */ template -void hiopMatrixDenseRaja::copyRowsFrom(const hiopMatrixDense& srcmat, - int num_rows, - int row_dest) +void hiopMatrixDenseRaja::copyRowsFrom(const hiopMatrixDense& srcmat, int num_rows, int row_dest) { - if(num_rows==0) { + if(num_rows == 0) { return; - } + } const auto& src = dynamic_cast(srcmat); #ifdef HIOP_DEEPCHECKS assert(row_dest >= 0); assert(n_global_ == src.n_global_); - assert(n_local_ == src.n_local_); + assert(n_local_ == src.n_local_); assert(row_dest + num_rows <= m_local_); assert(num_rows <= src.m_local_); #endif @@ -295,103 +291,104 @@ void hiopMatrixDenseRaja::copyRowsFrom(const hiopMatrix /** * @brief Copies rows from a source matrix to this matrix. - * + * * @param[in] src_mat - source matrix - * @param[in] rows_idxs - indices of rows in src_mat to be copied; a device pointer to an array + * @param[in] rows_idxs - indices of rows in src_mat to be copied; a device pointer to an array * of n_rows indeces * @param[in] n_rows - number of rows to be copied - * + * * @pre `src_mat` and `this` have same number of columns and partitioning. * @pre m_local_ == n_rows <= src_mat.m_local_ - * + * */ template -void hiopMatrixDenseRaja:: -copyRowsFrom(const hiopMatrix& src_mat, const index_type* rows_idxs, size_type n_rows) +void hiopMatrixDenseRaja::copyRowsFrom(const hiopMatrix& src_mat, + const index_type* rows_idxs, + size_type n_rows) { const hiopMatrixDenseRaja& src = dynamic_cast(src_mat); - assert(n_global_==src.n_global_); - assert(n_local_==src.n_local_); - assert(n_rows<=src.m_local_); + assert(n_global_ == src.n_global_); + assert(n_local_ == src.n_local_); + assert(n_rows <= src.m_local_); assert(n_rows == m_local_); RAJA::View> Mview(this->data_dev_, m_local_, n_local_); RAJA::View> Sview(src.data_dev_, src.m_local_, src.n_local_); - - for(int i=0; i(&Sview(rows_idxs[i], 0)), n_local_ * sizeof(double)); - RAJA::forall(RAJA::RangeSegment(0, n_local_), - RAJA_LAMBDA(RAJA::Index_type j) - { - Mview(i, j) = Sview(rows_idxs[i], j); - }); + + for(int i = 0; i < n_rows; ++i) { + // rm.copy(&Mview(i, 0), const_cast(&Sview(rows_idxs[i], 0)), n_local_ * sizeof(double)); + RAJA::forall( + RAJA::RangeSegment(0, n_local_), + RAJA_LAMBDA(RAJA::Index_type j) { Mview(i, j) = Sview(rows_idxs[i], j); }); } } - + /** * @brief Copies the content of `src` into a location in `this` matrix. - * + * * @param[in] i_start Starting row of this matrix to copy to. * @param[in] j_start Starting column of this matrix to copy to. * @param[in] src Source matrix to copy into this one. - * + * * @pre The size of _src_ plus the starting location must be within the bounds * of this matrix. * @pre This method should only be used with non-distributed matrices. */ template -void hiopMatrixDenseRaja:: -copyBlockFromMatrix(const index_type i_start, const index_type j_start, const hiopMatrixDense& srcmat) +void hiopMatrixDenseRaja::copyBlockFromMatrix(const index_type i_start, + const index_type j_start, + const hiopMatrixDense& srcmat) { const auto& src = dynamic_cast(srcmat); - assert(n_local_==n_global_ && "this method should be used only in 'serial' mode"); - assert(src.n_local_==src.n_global_ && "this method should be used only in 'serial' mode"); - assert(m_local_>=i_start+src.m_local_ && "the matrix does not fit as a sublock in 'this' at specified coordinates"); - assert(n_local_>=j_start+src.n_local_ && "the matrix does not fit as a sublock in 'this' at specified coordinates"); - - //quick returns for empty source matrices - if(src.n()==0) return; - if(src.m()==0) return; + assert(n_local_ == n_global_ && "this method should be used only in 'serial' mode"); + assert(src.n_local_ == src.n_global_ && "this method should be used only in 'serial' mode"); + assert(m_local_ >= i_start + src.m_local_ && "the matrix does not fit as a sublock in 'this' at specified coordinates"); + assert(n_local_ >= j_start + src.n_local_ && "the matrix does not fit as a sublock in 'this' at specified coordinates"); + + // quick returns for empty source matrices + if(src.n() == 0) return; + if(src.m() == 0) return; #ifdef HIOP_DEEPCHECKS - assert(i_start=0); assert(j_start>=0); + assert(i_start < m_local_ || !m_local_); + assert(j_start < n_local_ || !n_local_); + assert(i_start >= 0); + assert(j_start >= 0); #endif RAJA::View> Mview(this->data_dev_, m_local_, n_local_); RAJA::View> Sview(src.data_dev_, src.m_local_, src.n_local_); - for(index_type ii=0; ii -void hiopMatrixDenseRaja:: -copyFromMatrixBlock(const hiopMatrixDense& srcmat, const int i_block, const int j_block) +void hiopMatrixDenseRaja::copyFromMatrixBlock(const hiopMatrixDense& srcmat, + const int i_block, + const int j_block) { const auto& src = dynamic_cast(srcmat); - assert(n_local_==n_global_ && "this method should be used only in 'serial' mode"); - assert(src.n_local_==src.n_global_ && "this method should be used only in 'serial' mode"); - assert(m_local_+i_block<=src.m_local_ && "the source does not enough rows to fill 'this'"); - assert(n_local_+j_block<=src.n_local_ && "the source does not enough cols to fill 'this'"); - + assert(n_local_ == n_global_ && "this method should be used only in 'serial' mode"); + assert(src.n_local_ == src.n_global_ && "this method should be used only in 'serial' mode"); + assert(m_local_ + i_block <= src.m_local_ && "the source does not enough rows to fill 'this'"); + assert(n_local_ + j_block <= src.n_local_ && "the source does not enough cols to fill 'this'"); + RAJA::View> Mview(this->data_dev_, m_local_, n_local_); RAJA::View> Sview(src.data_dev_, src.m_local_, src.n_local_); - + if(n_local_ == src.n_local_) { // j_block=0 exec_space_.copy(&Mview(0, 0), &Sview(i_block, 0), n_local_ * m_local_, src.exec_space_); } else { - for(int i=0; i < m_local_; i++) { + for(int i = 0; i < m_local_; i++) { exec_space_.copy(&Mview(i, 0), &Sview(i + i_block, j_block), n_local_, src.exec_space_); } } @@ -399,56 +396,54 @@ copyFromMatrixBlock(const hiopMatrixDense& srcmat, const int i_block, const int /** * @brief Shifts the rows of this matrix up or down by a specified amount. - * + * * @todo Document this better. */ template void hiopMatrixDenseRaja::shiftRows(size_type shift) { - if(shift == 0) - return; - if(abs(shift) == m_local_) - return; //nothing to shift - if(m_local_ <= 1) - return; //nothing to shift - + if(shift == 0) return; + if(abs(shift) == m_local_) return; // nothing to shift + if(m_local_ <= 1) return; // nothing to shift + assert(abs(shift) < m_local_); - //at this point m_local_ should be >=2 + // at this point m_local_ should be >=2 assert(m_local_ >= 2); - //and + // and assert(m_local_ - abs(shift) >= 1); #if defined(HIOP_DEEPCHECKS) && !defined(NDEBUG) copyFromDev(); double test1 = 8.3, test2 = -98.3; - if(n_local_>0) - { - //not sure if memcpy is copying sequentially on all systems. we check this. - //let's at least check it - //!test1=shift<0 ? M_host_[-shift][0] : M_host_[m_local_-shift-1][0]; - test1=shift<0 ? data_host_[-shift*n_local_] : data_host_[(m_local_-shift-1)*n_local_]; - //!test2=shift<0 ? M_host_[-shift][n_local_-1] : M_host_[m_local_-shift-1][n_local_-1]; - test2=shift<0 ? data_host_[-shift*n_local_ + n_local_-1] : data_host_[(m_local_-shift-1)*n_local_ + n_local_-1]; + if(n_local_ > 0) { + // not sure if memcpy is copying sequentially on all systems. we check this. + // let's at least check it + //! test1=shift<0 ? M_host_[-shift][0] : M_host_[m_local_-shift-1][0]; + test1 = shift < 0 ? data_host_[-shift * n_local_] : data_host_[(m_local_ - shift - 1) * n_local_]; + //! test2=shift<0 ? M_host_[-shift][n_local_-1] : M_host_[m_local_-shift-1][n_local_-1]; + test2 = shift < 0 ? data_host_[-shift * n_local_ + n_local_ - 1] + : data_host_[(m_local_ - shift - 1) * n_local_ + n_local_ - 1]; } #endif RAJA::View> Mview(this->data_dev_, m_local_, n_local_); - if(shift<0) { + if(shift < 0) { for(int row = 0; row < m_local_ + shift; row++) { exec_space_.copy(&Mview(row, 0), &Mview(row - shift, 0), n_local_); } } else { - for(int row = m_local_-1; row >= shift; row--) { + for(int row = m_local_ - 1; row >= shift; row--) { exec_space_.copy(&Mview(row, 0), &Mview(row - shift, 0), n_local_); } } - + #if defined(HIOP_DEEPCHECKS) && !defined(NDEBUG) copyFromDev(); - if(n_local_>0) - { - assert(test1==data_host_[n_local_*(shift<0?0:m_local_-1)] && "a different copy technique than memcpy is needed on this system"); - assert(test2==data_host_[n_local_*(shift<0?0:m_local_-1) + n_local_-1] && "a different copy technique than memcpy is needed on this system"); + if(n_local_ > 0) { + assert(test1 == data_host_[n_local_ * (shift < 0 ? 0 : m_local_ - 1)] && + "a different copy technique than memcpy is needed on this system"); + assert(test2 == data_host_[n_local_ * (shift < 0 ? 0 : m_local_ - 1) + n_local_ - 1] && + "a different copy technique than memcpy is needed on this system"); } #endif } @@ -462,9 +457,7 @@ void hiopMatrixDenseRaja::replaceRow(index_type row, co assert(row >= 0); assert(row < m_local_); size_type vec_size = rvec.get_local_size(); - exec_space_.copy(&Mview(row, 0), - rvec.local_data_const(), - (vec_size >= n_local_ ? n_local_ : vec_size)); + exec_space_.copy(&Mview(row, 0), rvec.local_data_const(), (vec_size >= n_local_ ? n_local_ : vec_size)); } /// Overwrites the values in row_vec with those from a specified row in this matrix @@ -472,16 +465,15 @@ template void hiopMatrixDenseRaja::getRow(index_type irow, hiopVector& row_vec) { RAJA::View> Mview(this->data_dev_, m_local_, n_local_); - assert(irow>=0); - assert(irow= 0); + assert(irow < m_local_); auto& vec = dynamic_cast&>(row_vec); assert(n_local_ == vec.get_local_size()); exec_space_.copy(vec.local_data(), &Mview(irow, 0), n_local_, vec.exec_space()); } template -void hiopMatrixDenseRaja:: -set_Hess_FR(const hiopMatrixDense& Hess, const hiopVector& add_diag_de) +void hiopMatrixDenseRaja::set_Hess_FR(const hiopMatrixDense& Hess, const hiopVector& add_diag_de) { double one{1.0}; copyFrom(Hess); @@ -492,30 +484,28 @@ set_Hess_FR(const hiopMatrixDense& Hess, const hiopVector& add_diag_de) template void hiopMatrixDenseRaja::overwriteUpperTriangleWithLower() { - assert(n_local_==n_global_ && "Use only with local, non-distributed matrices"); + assert(n_local_ == n_global_ && "Use only with local, non-distributed matrices"); int n_local = n_local_; double* data = data_dev_; RAJA::View> Mview(data, get_local_size_m(), get_local_size_n()); - RAJA::forall(RAJA::RangeSegment(0, m_local_), - RAJA_LAMBDA(RAJA::Index_type i) - { - for (int j = i + 1; j < n_local; j++) - Mview(i, j) = Mview(j, i); - }); + RAJA::forall( + RAJA::RangeSegment(0, m_local_), + RAJA_LAMBDA(RAJA::Index_type i) { + for(int j = i + 1; j < n_local; j++) Mview(i, j) = Mview(j, i); + }); } template void hiopMatrixDenseRaja::overwriteLowerTriangleWithUpper() { - assert(n_local_==n_global_ && "Use only with local, non-distributed matrices"); + assert(n_local_ == n_global_ && "Use only with local, non-distributed matrices"); double* data = data_dev_; RAJA::View> Mview(data, get_local_size_m(), get_local_size_n()); - RAJA::forall(RAJA::RangeSegment(1, m_local_), - RAJA_LAMBDA(RAJA::Index_type i) - { - for (int j = 0; j < i; j++) - Mview(i, j) = Mview(j, i); - }); + RAJA::forall( + RAJA::RangeSegment(1, m_local_), + RAJA_LAMBDA(RAJA::Index_type i) { + for(int j = 0; j < i; j++) Mview(i, j) = Mview(j, i); + }); } #endif @@ -551,11 +541,7 @@ template void hiopMatrixDenseRaja::setToConstant(double c) { double* dd = this->data_dev_; - RAJA::forall(RAJA::RangeSegment(0, n_local_ * m_local_), - RAJA_LAMBDA(RAJA::Index_type i) - { - dd[i] = c; - }); + RAJA::forall(RAJA::RangeSegment(0, n_local_ * m_local_), RAJA_LAMBDA(RAJA::Index_type i) { dd[i] = c; }); } /** @@ -567,53 +553,56 @@ bool hiopMatrixDenseRaja::isfinite() const { double* dd = this->data_dev_; RAJA::ReduceSum any(0); - RAJA::forall(RAJA::RangeSegment(0, n_local_ * m_local_), - RAJA_LAMBDA(RAJA::Index_type i) - { - if (!std::isfinite(dd[i])) - any += 1; - }); + RAJA::forall( + RAJA::RangeSegment(0, n_local_ * m_local_), + RAJA_LAMBDA(RAJA::Index_type i) { + if(!std::isfinite(dd[i])) any += 1; + }); return any.get() == 0; } /** * @brief Print matrix to a file - * + * * @note This is I/O function and takes place on the host. Need to move * matrix data to the host mirror memory. */ template -void hiopMatrixDenseRaja::print(FILE* f, - const char* msg/*=NULL*/, - int maxRows/*=-1*/, - int maxCols/*=-1*/, - int rank/*=-1*/) const +void hiopMatrixDenseRaja::print(FILE* f, + const char* msg /*=NULL*/, + int maxRows /*=-1*/, + int maxCols /*=-1*/, + int rank /*=-1*/) const { - if(myrank_==rank || rank==-1) { - if(NULL==f) f=stdout; - if(maxRows>m_local_) maxRows=m_local_; - if(maxCols>n_local_) maxCols=n_local_; + if(myrank_ == rank || rank == -1) { + if(NULL == f) f = stdout; + if(maxRows > m_local_) maxRows = m_local_; + if(maxCols > n_local_) maxCols = n_local_; if(msg) { - fprintf(f, "%s (local_dims=[%d,%d])\n", msg, m_local_,n_local_); - } else { - fprintf(f, "hiopMatrixDenseRaja::printing max=[%d,%d] (local_dims=[%d,%d], on rank=%d)\n", - maxRows, maxCols, m_local_,n_local_,myrank_); + fprintf(f, "%s (local_dims=[%d,%d])\n", msg, m_local_, n_local_); + } else { + fprintf(f, + "hiopMatrixDenseRaja::printing max=[%d,%d] (local_dims=[%d,%d], on rank=%d)\n", + maxRows, + maxCols, + m_local_, + n_local_, + myrank_); } - maxRows = maxRows>=0?maxRows:m_local_; - maxCols = maxCols>=0?maxCols:n_local_; + maxRows = maxRows >= 0 ? maxRows : m_local_; + maxCols = maxCols >= 0 ? maxCols : n_local_; fprintf(f, "["); - for(int i=0; i0) fprintf(f, " "); - for(int j=0; j 0) fprintf(f, " "); + for(int j = 0; j < maxCols; j++) fprintf(f, "%20.12e ", data_host_[i * n_local_ + j]); + if(i < maxRows - 1) fprintf(f, "; ...\n"); else fprintf(f, "];\n"); - } - } // if(myrank_==rank || rank==-1) + } + } // if(myrank_==rank || rank==-1) } #include @@ -621,15 +610,15 @@ void hiopMatrixDenseRaja::print(FILE* f, /** * @brief Multiplies this matrix by a vector and * stores the result in another vector. - * + * * The full operation performed is: * _y_ = _beta_ * _y_ + _alpha_ * _this_ * _x_ - * + * * @param[in] beta Amount to scale _y_ by * @param[out] y Vector to store result in * @param[in] alpha Amount to scale result of this * _x_ by * @param[in] x Vector to multiply this matrix with - * + * * @pre The length of _x_ equals the number of column * of this matrix (n_local_). * @pre The length of _y_ equals the number of rows @@ -649,20 +638,19 @@ void hiopMatrixDenseRaja::timesVec(double beta, const auto& x = dynamic_cast&>(x_); #ifdef HIOP_DEEPCHECKS assert(y.get_local_size() == m_local_); - assert(y.get_size() == m_local_); //y should not be distributed + assert(y.get_size() == m_local_); // y should not be distributed assert(x.get_local_size() == n_local_); assert(x.get_size() == n_global_); - if(beta != 0) - { + if(beta != 0) { assert(y.isfinite_local() && "pre timesvec"); } assert(x.isfinite_local()); #endif - + timesVec(beta, y.local_data(), alpha, x.local_data_const()); -#ifdef HIOP_DEEPCHECKS +#ifdef HIOP_DEEPCHECKS assert(y.isfinite_local() && "post timesVec"); #endif } @@ -670,43 +658,38 @@ void hiopMatrixDenseRaja::timesVec(double beta, /** * @brief Multiplies this matrix by a vector and * stores the result in another vector. - * + * * @pre The input and output vectors are pointers to memory allocated in the same * execution/memory space. * @todo GPU BLAS (GEMV, SCAL) for this - * - * see timesVec for more detail + * + * see timesVec for more detail */ template -void hiopMatrixDenseRaja::timesVec(double beta, - double* ya, - double alpha, - const double* xa) const +void hiopMatrixDenseRaja::timesVec(double beta, double* ya, double alpha, const double* xa) const { #ifdef HIOP_USE_MPI - //only add beta*y on one processor (rank 0) - if (myrank_ != 0) - beta = 0.0; + // only add beta*y on one processor (rank 0) + if(myrank_ != 0) beta = 0.0; #endif double* data = data_dev_; int m_local = m_local_; int n_local = n_local_; // y = beta * y + alpha * this * x RAJA::View> Mview(data, m_local, n_local); - RAJA::forall(RAJA::RangeSegment(0, m_local), - RAJA_LAMBDA(RAJA::Index_type i) - { - double dot = 0; - for (int j = 0; j < n_local; j++) - dot += Mview(i, j) * xa[j]; - ya[i] = beta * ya[i] + alpha * dot; - }); + RAJA::forall( + RAJA::RangeSegment(0, m_local), + RAJA_LAMBDA(RAJA::Index_type i) { + double dot = 0; + for(int j = 0; j < n_local; j++) dot += Mview(i, j) * xa[j]; + ya[i] = beta * ya[i] + alpha * dot; + }); #ifdef HIOP_USE_MPI - //here m_local_ is > 0 + // here m_local_ is > 0 exec_space_host_.copy(ya_host_, ya, m_local_, exec_space_); int ierr = MPI_Allreduce(ya_host_, yglob_host_, m_local_, MPI_DOUBLE, MPI_SUM, comm_); - assert(MPI_SUCCESS==ierr); + assert(MPI_SUCCESS == ierr); exec_space_.copy(ya, yglob_host_, m_local_, exec_space_host_); #endif } @@ -715,10 +698,10 @@ void hiopMatrixDenseRaja::timesVec(double beta, * @brief Multiplies the transpose of this matrix by a vector and * stores the result in another vector: * y = beta * y + alpha * transpose(this) * x - * + * * @todo GPU BLAS (DGEMV, DSCAL) for this - * - * see timesVec for more detail + * + * see timesVec for more detail */ template void hiopMatrixDenseRaja::transTimesVec(double beta, @@ -730,7 +713,7 @@ void hiopMatrixDenseRaja::transTimesVec(double beta, const auto& x = dynamic_cast&>(x_); #ifdef HIOP_DEEPCHECKS assert(x.get_local_size() == m_local_); - assert(x.get_size() == m_local_); //x should not be distributed + assert(x.get_size() == m_local_); // x should not be distributed assert(y.get_local_size() == n_local_); assert(y.get_size() == n_global_); assert(y.isfinite_local()); @@ -742,12 +725,12 @@ void hiopMatrixDenseRaja::transTimesVec(double beta, /** * @brief Multiplies the transpose of this matrix by a vector and * stores the result in another vector. - * + * * @pre The input and output vectors are pointers to the same * execution/memory space. * @todo GPU BLAS (DGEMV, DSCAL) for this - * - * see timesVec for more detail + * + * see timesVec for more detail */ template void hiopMatrixDenseRaja::transTimesVec(double beta, @@ -763,25 +746,24 @@ void hiopMatrixDenseRaja::transTimesVec(double beta, // TODO: consider performance benefits of using nested RAJA loop RAJA::View> Mview(data, m_local, n_local); - RAJA::forall(RAJA::RangeSegment(0, n_local), - RAJA_LAMBDA(RAJA::Index_type j) - { - double dot = 0; - for (int i = 0; i < m_local; i++) - dot += Mview(i, j) * xa[i]; - ya[j] = beta * ya[j] + alpha * dot; - }); + RAJA::forall( + RAJA::RangeSegment(0, n_local), + RAJA_LAMBDA(RAJA::Index_type j) { + double dot = 0; + for(int i = 0; i < m_local; i++) dot += Mview(i, j) * xa[i]; + ya[j] = beta * ya[j] + alpha * dot; + }); } /** * @brief Multiplies this matrix by a matrix and * stores the result in another matrix. - * + * * @param[in] beta Amount to scale _W_ by * @param[out] W Matrix to store result in * @param[in] alpha Amount to scale result of this * _X_ by * @param[in] X Matrix to multiply this matrix with - * + * * @pre this is an mxn matrix * @pre _X_ is an nxk matrix * @pre _W_ is an mxk matrix @@ -790,7 +772,7 @@ void hiopMatrixDenseRaja::transTimesVec(double beta, * @post _W_ will have its elements modified to reflect * the below calculation. * @todo GPU BLAS (DGEMV, DSCAL) for this - * + * * The operation that is performed is: * W = beta * W + alpha * this * X * @warning MPI parallel computations are _not_ supported. @@ -805,24 +787,22 @@ void hiopMatrixDenseRaja::timesMat(double beta, timesMat_local(beta, Wmat, alpha, Xmat); #else auto& W = dynamic_cast(Wmat); - //double* WM = W.local_data_host(); + // double* WM = W.local_data_host(); const auto& X = dynamic_cast(Xmat); - + assert(W.m() == this->m()); assert(X.m() == this->n()); - assert(W.n() == X.n() ); + assert(W.n() == X.n()); - if(W.m() == 0 || X.m() == 0 || W.n() == 0) - return; -#ifdef HIOP_DEEPCHECKS + if(W.m() == 0 || X.m() == 0 || W.n() == 0) return; +#ifdef HIOP_DEEPCHECKS assert(W.isfinite()); assert(X.isfinite()); #endif - if(X.n_local_ != X.n_global_ || this->n_local_ != this->n_global_) - { + if(X.n_local_ != X.n_global_ || this->n_local_ != this->n_global_) { assert(false && "'timesMat' involving distributed matrices is not needed/supported" && - "also, it cannot be performed efficiently with the data distribution used by this class"); + "also, it cannot be performed efficiently with the data distribution used by this class"); W.setToConstant(beta); return; } @@ -830,18 +810,18 @@ void hiopMatrixDenseRaja::timesMat(double beta, // if(0==myrank_) timesMat_local(beta,W_,alpha,X_); // else timesMat_local(0., W_,alpha,X_); - // int n2Red=W.m()*W.n(); + // int n2Red=W.m()*W.n(); // double* Wglob = new_mxnlocal_buff(); //[n2Red]; // int ierr = MPI_Allreduce(WM[0], Wglob, n2Red, MPI_DOUBLE, MPI_SUM,comm); assert(ierr==MPI_SUCCESS); // memcpy(WM[0], Wglob, n2Red*sizeof(double)); -#endif // HIOP_USE_MPI +#endif // HIOP_USE_MPI } /** * @brief Multiplies this matrix by a matrix and stores the result in another matrix. - * + * * @todo GPU BLAS call for this (DGEMM) - * + * * see timesMat for more detail * MPI NOT SUPPORTED */ @@ -853,157 +833,158 @@ void hiopMatrixDenseRaja::timesMat_local(double beta, { const hiopMatrixDenseRaja& X = dynamic_cast(X_); hiopMatrixDenseRaja& W = dynamic_cast(W_); -#ifdef HIOP_DEEPCHECKS - assert(W.m()==this->m()); - assert(X.m()==this->n()); - assert(W.n()==X.n()); +#ifdef HIOP_DEEPCHECKS + assert(W.m() == this->m()); + assert(X.m() == this->n()); + assert(W.n() == X.n()); assert(W.isfinite()); assert(X.isfinite()); #endif - assert(W.n_local_==W.n_global_ && "requested multiplication is not supported, see timesMat"); - - double* data = data_dev_; + assert(W.n_local_ == W.n_global_ && "requested multiplication is not supported, see timesMat"); + + double* data = data_dev_; double* xdata = X.data_dev_; double* wdata = W.data_dev_; - RAJA::View> Mview(data, m_local_, n_local_); + RAJA::View> Mview(data, m_local_, n_local_); RAJA::View> Xview(xdata, X.m_local_, X.n_local_); RAJA::View> Wview(wdata, W.m_local_, W.n_local_); RAJA::RangeSegment row_range(0, W.m_local_); RAJA::RangeSegment col_range(0, W.n_local_); auto n_local = n_local_; - RAJA::kernel(RAJA::make_tuple(col_range, row_range), - RAJA_LAMBDA(int col, int row) - { - double dot = 0; - for (int k = 0; k < n_local; k++) - dot += Mview(row, k) * Xview(k, col); - Wview(row, col) = beta * Wview(row, col) + alpha * dot; - }); + RAJA::kernel( + RAJA::make_tuple(col_range, row_range), + RAJA_LAMBDA(int col, int row) { + double dot = 0; + for(int k = 0; k < n_local; k++) dot += Mview(row, k) * Xview(k, col); + Wview(row, col) = beta * Wview(row, col) + alpha * dot; + }); } /** * @brief Multiplies the transpose of this matrix by a matrix, storing the result * in an output matrix. - * + * * @pre Size of `this` is mxn, X is mxk, W is nxk - * + * * See timesMat for more detail. * Operation performed is: W = beta * W + alpha * this^T * X - * + * * @warning This method is not MPI distributed! */ template -void hiopMatrixDenseRaja:: -transTimesMat(double beta, hiopMatrix& W_, double alpha, const hiopMatrix& X_) const +void hiopMatrixDenseRaja::transTimesMat(double beta, + hiopMatrix& W_, + double alpha, + const hiopMatrix& X_) const { const hiopMatrixDenseRaja& X = dynamic_cast(X_); hiopMatrixDenseRaja& W = dynamic_cast(W_); - assert(W.m()==n_local_); - assert(X.m()==m_local_); - assert(W.n()==X.n()); + assert(W.m() == n_local_); + assert(X.m() == m_local_); + assert(W.n() == X.n()); #ifdef HIOP_DEEPCHECKS assert(W.isfinite()); assert(X.isfinite()); #endif - if(W.m()==0) return; + if(W.m() == 0) return; - assert(this->n_global_==this->n_local_ && "requested parallel multiplication is not supported"); + assert(this->n_global_ == this->n_local_ && "requested parallel multiplication is not supported"); - double* data = data_dev_; + double* data = data_dev_; double* xdata = X.data_dev_; double* wdata = W.data_dev_; - RAJA::View> Mview(data, m_local_, n_local_); + RAJA::View> Mview(data, m_local_, n_local_); RAJA::View> Xview(xdata, X.m_local_, X.n_local_); RAJA::View> Wview(wdata, W.m_local_, W.n_local_); RAJA::RangeSegment row_range(0, W.m_local_); RAJA::RangeSegment col_range(0, W.n_local_); auto Mm = m_local_; - RAJA::kernel(RAJA::make_tuple(col_range, row_range), - RAJA_LAMBDA(int col, int row) - { - double dot = 0; - for (int k = 0; k < Mm; k++) - dot += Mview(k, row) * Xview(k, col); - Wview(row, col) = beta * Wview(row, col) + alpha * dot; - }); + RAJA::kernel( + RAJA::make_tuple(col_range, row_range), + RAJA_LAMBDA(int col, int row) { + double dot = 0; + for(int k = 0; k < Mm; k++) dot += Mview(k, row) * Xview(k, col); + Wview(row, col) = beta * Wview(row, col) + alpha * dot; + }); } /** * @brief Multiplies this matrix by the transpose of a matrix and * stores the result in another matrix. - * + * * @todo GPU BLAS call for this (GEMM) * @todo Fix the distributed version of this method. * (Something to think about: how to find the dot product of two rows * of distributed matrices? This is what we're solving here.) - * + * * see timesMat for more detail */ template -void hiopMatrixDenseRaja:: -timesMatTrans_local(double beta, hiopMatrix& W_, double alpha, const hiopMatrix& X_) const +void hiopMatrixDenseRaja::timesMatTrans_local(double beta, + hiopMatrix& W_, + double alpha, + const hiopMatrix& X_) const { const auto& X = dynamic_cast(X_); auto& W = dynamic_cast(W_); #ifdef HIOP_DEEPCHECKS - assert(W.m()==m_local_); - assert(X.n_local_==n_local_); - assert(W.n()==X.m()); + assert(W.m() == m_local_); + assert(X.n_local_ == n_local_); + assert(W.n() == X.m()); #endif - assert(W.n_local_==W.n_global_ && "not intended for the case when the result matrix is distributed."); - if(W.m()==0) - return; - if(W.n()==0) - return; + assert(W.n_local_ == W.n_global_ && "not intended for the case when the result matrix is distributed."); + if(W.m() == 0) return; + if(W.n() == 0) return; - double* data = data_dev_; + double* data = data_dev_; double* xdata = X.data_dev_; double* wdata = W.data_dev_; - RAJA::View> Mview(data, m_local_, n_local_); + RAJA::View> Mview(data, m_local_, n_local_); RAJA::View> Xview(xdata, X.m_local_, X.n_local_); RAJA::View> Wview(wdata, W.m_local_, W.n_local_); RAJA::RangeSegment row_range(0, W.m_local_); RAJA::RangeSegment col_range(0, W.n_local_); auto Mn = n_local_; - RAJA::kernel(RAJA::make_tuple(col_range, row_range), - RAJA_LAMBDA(int col, int row) - { - double dot = 0; - for (int k = 0; k < Mn; k++) - dot += Mview(row, k) * Xview(col, k); // X^T - Wview(row, col) = beta * Wview(row, col) + alpha * dot; - }); + RAJA::kernel( + RAJA::make_tuple(col_range, row_range), + RAJA_LAMBDA(int col, int row) { + double dot = 0; + for(int k = 0; k < Mn; k++) dot += Mview(row, k) * Xview(col, k); // X^T + Wview(row, col) = beta * Wview(row, col) + alpha * dot; + }); } /** * @brief Multiplies this matrix by the transpose of a matrix and * stores the result in another matrix: `W = beta*W + alpha*this*X^T` - * + * * @todo Fix the distributed version of this method. - * + * * see timesMat for more detail */ template -void hiopMatrixDenseRaja:: -timesMatTrans(double beta, hiopMatrix& Wmat, double alpha, const hiopMatrix& Xmat) const +void hiopMatrixDenseRaja::timesMatTrans(double beta, + hiopMatrix& Wmat, + double alpha, + const hiopMatrix& Xmat) const { auto& W = dynamic_cast(Wmat); - assert(W.n_local_==W.n_global_ && "not intended for the case when the result matrix is distributed."); + assert(W.n_local_ == W.n_global_ && "not intended for the case when the result matrix is distributed."); #ifdef HIOP_DEEPCHECKS const auto& X = dynamic_cast(Xmat); assert(W.isfinite()); assert(X.isfinite()); - assert(this->n()==X.n()); - assert(this->m()==W.m()); - assert(X.m()==W.n()); + assert(this->n() == X.n()); + assert(this->m() == W.m()); + assert(X.m() == W.n()); #endif - if(W.m()==0) return; - if(W.n()==0) return; + if(W.m() == 0) return; + if(W.n() == 0) return; // only apply W * beta on one rank if(myrank_ == 0) @@ -1012,12 +993,13 @@ timesMatTrans(double beta, hiopMatrix& Wmat, double alpha, const hiopMatrix& Xma timesMatTrans_local(0.0, Wmat, alpha, Xmat); #ifdef HIOP_USE_MPI -//printf("W.m: %d, W.n: %d\n", W.m(), W.n()); + // printf("W.m: %d, W.n: %d\n", W.m(), W.n()); int n2Red = W.m() * W.n(); double* Wdata_host = W.data_host_; W.copyFromDev(); double* Wglob = W.new_mxnlocal_host_buff(); - int ierr = MPI_Allreduce(Wdata_host, Wglob, n2Red, MPI_DOUBLE, MPI_SUM, comm_); assert(ierr==MPI_SUCCESS); + int ierr = MPI_Allreduce(Wdata_host, Wglob, n2Red, MPI_DOUBLE, MPI_SUM, comm_); + assert(ierr == MPI_SUCCESS); memcpy(Wdata_host, Wglob, n2Red * sizeof(double)); W.copyToDev(); #endif @@ -1025,13 +1007,13 @@ timesMatTrans(double beta, hiopMatrix& Wmat, double alpha, const hiopMatrix& Xma /** * @brief Adds the values of a vector to the diagonal of this matrix. - * + * * @param[in] alpha - Amount to scale values of _d_ by. * @param[in] dvec - Vector to add to this matrix's diagonal. - * + * * @pre This matrix is square. * @pre The length of _dvec_ equals the length of the diagonal of this matrix. - * + * * @warning This method is not MPI distributed */ template @@ -1040,28 +1022,26 @@ void hiopMatrixDenseRaja::addDiagonal(const double& alp const auto& d = dynamic_cast&>(dvec); #ifdef HIOP_DEEPCHECKS - assert(d.get_size()==n()); - assert(d.get_size()==m()); - assert(d.get_local_size()==m_local_); - assert(d.get_local_size()==n_local_); + assert(d.get_size() == n()); + assert(d.get_size() == m()); + assert(d.get_local_size() == m_local_); + assert(d.get_local_size() == n_local_); #endif // the min() is symbolic as n/m_local_ should be equal int diag = std::min(get_local_size_m(), get_local_size_n()); double* data = data_dev_; const double* dd = d.local_data_const(); - RAJA::View> Mview(data, get_local_size_m(), get_local_size_n()); // matrix - RAJA::View> Dview(dd, d.get_size()); // vector - RAJA::forall(RAJA::RangeSegment(0, diag), - RAJA_LAMBDA(RAJA::Index_type i) - { - Mview(i, i) += Dview(i) * alpha; - }); + RAJA::View> Mview(data, get_local_size_m(), get_local_size_n()); // matrix + RAJA::View> Dview(dd, d.get_size()); // vector + RAJA::forall( + RAJA::RangeSegment(0, diag), + RAJA_LAMBDA(RAJA::Index_type i) { Mview(i, i) += Dview(i) * alpha; }); } /** * @brief Adds a constant to the diagonal of this matrix. - * + * * @param[in] value Adding constant. */ template @@ -1070,21 +1050,17 @@ void hiopMatrixDenseRaja::addDiagonal(const double& val int diag = std::min(get_local_size_m(), get_local_size_n()); double* data = data_dev_; RAJA::View> Mview(data, get_local_size_m(), get_local_size_n()); - RAJA::forall(RAJA::RangeSegment(0, diag), - RAJA_LAMBDA(RAJA::Index_type i) - { - Mview(i, i) += value; - }); + RAJA::forall(RAJA::RangeSegment(0, diag), RAJA_LAMBDA(RAJA::Index_type i) { Mview(i, i) += value; }); } /** * @brief Adds the values of a vector to the diagonal of this matrix, * starting at an offset in the diagonal. - * + * * @param[in] alpha Amount to scale values of _d_ by. * @param[in] start Offset from beginning of this matrix's diagonal. * @param[in] d Vector whose elements will be added to the diagonal. - * + * * @post The elements written will be in the range [start, start + _d_.len) */ template @@ -1093,153 +1069,149 @@ void hiopMatrixDenseRaja::addSubDiagonal(const double& const hiopVector& dvec) { const auto& d = dynamic_cast&>(dvec); - size_type dlen=d.get_size(); + size_type dlen = d.get_size(); #ifdef HIOP_DEEPCHECKS - assert(start>=0); - assert(start+dlen<=n_local_); + assert(start >= 0); + assert(start + dlen <= n_local_); #endif double* data = data_dev_; const double* dd = d.local_data_const(); - RAJA::View> Mview(data, get_local_size_m(), get_local_size_n()); // matrix - RAJA::View> Dview(dd, dlen); // vector - RAJA::forall(RAJA::RangeSegment(start, start+dlen), - RAJA_LAMBDA(RAJA::Index_type i) - { - Mview(i, i) += Dview(i - start) * alpha; - }); + RAJA::View> Mview(data, get_local_size_m(), get_local_size_n()); // matrix + RAJA::View> Dview(dd, dlen); // vector + RAJA::forall( + RAJA::RangeSegment(start, start + dlen), + RAJA_LAMBDA(RAJA::Index_type i) { Mview(i, i) += Dview(i - start) * alpha; }); } /** * @brief Adds the values of vector _dvec_ to the diagonal of this matrix, * starting at an offset in both destination and source. - * + * * @param[in] start_on_dest_diag - Offset on `this` matrix's diagonal. * @param[in] alpha - Amount to scale values of _d_ by. * @param[in] dvec - Vector whose elements will be added to the diagonal. * @param[in] start_on_src_vec - Offset in vector. * @param[in] num_elems - Number of elements to add if >= 0, otherwise * the remaining elements on _dvec_ starting at _start_on_src_vec_ - * + * * @pre This matrix must be non-distributed and symmetric/square */ template void hiopMatrixDenseRaja::addSubDiagonal(int start_on_dest_diag, - const double& alpha, + const double& alpha, const hiopVector& dvec, int start_on_src_vec, int num_elems /* = -1 */) { const auto& d = dynamic_cast&>(dvec); - if(num_elems < 0) - num_elems = d.get_size() - start_on_src_vec; + if(num_elems < 0) num_elems = d.get_size() - start_on_src_vec; assert(num_elems <= d.get_size()); assert(n_local_ == n_global_ && "method supported only for non-distributed matrices"); - assert(n_local_ == m_local_ && "method supported only for symmetric matrices"); + assert(n_local_ == m_local_ && "method supported only for symmetric matrices"); - assert(start_on_dest_diag>=0 && start_on_dest_diag= 0 && start_on_dest_diag < m_local_); + num_elems = std::min(num_elems, m_local_ - start_on_dest_diag); double* data = data_dev_; const double* dd = d.local_data_const(); RAJA::View> Mview(data, get_local_size_m(), get_local_size_n()); - RAJA::View> Dview(dd, d.get_size()); // vector - RAJA::forall(RAJA::RangeSegment(0, num_elems), - RAJA_LAMBDA(RAJA::Index_type i) - { - const int loc = i + start_on_dest_diag; - Mview(loc, loc) += Dview(i + start_on_src_vec) * alpha; - }); + RAJA::View> Dview(dd, d.get_size()); // vector + RAJA::forall( + RAJA::RangeSegment(0, num_elems), + RAJA_LAMBDA(RAJA::Index_type i) { + const int loc = i + start_on_dest_diag; + Mview(loc, loc) += Dview(i + start_on_src_vec) * alpha; + }); } /** * @brief Adds a value to a subdiagonal of this matrix, starting * at an offset. - * + * * @param[in] start_on_dest_diag Start on this matrix's diagonal. * @param[in] num_elems Number of elements to add. * @param[in] c Constant to add. - * + * * @pre _num_elems_ must be >= 0 * @pre This matrix must be non-distributed and symmetric/square */ template -void hiopMatrixDenseRaja:: -addSubDiagonal(int start_on_dest_diag, int num_elems, const double& c) +void hiopMatrixDenseRaja::addSubDiagonal(int start_on_dest_diag, int num_elems, const double& c) { assert(num_elems >= 0); - assert(start_on_dest_diag>=0 && start_on_dest_diag+num_elems<=n_local_); + assert(start_on_dest_diag >= 0 && start_on_dest_diag + num_elems <= n_local_); assert(n_local_ == n_global_ && "method supported only for non-distributed matrices"); - assert(n_local_ == m_local_ && "method supported only for symmetric matrices"); + assert(n_local_ == m_local_ && "method supported only for symmetric matrices"); - double* data = data_dev_; + double* data = data_dev_; RAJA::View> Mview(data, get_local_size_m(), get_local_size_n()); - RAJA::forall(RAJA::RangeSegment(0, num_elems), - RAJA_LAMBDA(RAJA::Index_type i) - { - const int loc = i + start_on_dest_diag; - Mview(loc, loc) += c; - }); + RAJA::forall( + RAJA::RangeSegment(0, num_elems), + RAJA_LAMBDA(RAJA::Index_type i) { + const int loc = i + start_on_dest_diag; + Mview(loc, loc) += c; + }); } /** * @brief Adds the elements of a matrix to the elements of this matrix. - * + * * @param[in] alpha Value to scale the elements of _X_ by. * @param[in] X_ Matrix to add to this one. - * + * * @pre _X_ and this matrix must have matching dimensions. - * + * * @todo update with GPU BLAS call (AXPY) */ template void hiopMatrixDenseRaja::addMatrix(double alpha, const hiopMatrix& X_) { - const hiopMatrixDenseRaja& X = dynamic_cast(X_); + const hiopMatrixDenseRaja& X = dynamic_cast(X_); #ifdef HIOP_DEEPCHECKS - assert(m_local_==X.m_local_); - assert(n_local_==X.n_local_); + assert(m_local_ == X.m_local_); + assert(n_local_ == X.n_local_); #endif double* dd = this->data_dev_; double* dd2 = X.data_dev_; - RAJA::forall(RAJA::RangeSegment(0, n_local_ * m_local_), - RAJA_LAMBDA(RAJA::Index_type i) - { - dd[i] += dd2[i] * alpha; - }); + RAJA::forall( + RAJA::RangeSegment(0, n_local_ * m_local_), + RAJA_LAMBDA(RAJA::Index_type i) { dd[i] += dd2[i] * alpha; }); } /** - * @brief Adds the values of this matrix to a corresponding section in + * @brief Adds the values of this matrix to a corresponding section in * the upper triangle of the input matrix. (W += alpha*this @ offset) - * + * * @param[in] row_start Row offset into target matrix. * @param[in] col_start Column offset into target matrix. * @param[in] alpha Amount to scale values of this matrix by when adding. * @param[out] W Matrix to be added to. - * + * * @pre _row_start_ and _col_start_ must be >= 0 * @pre _row_start_ + this->m() <= W.m() * @pre _col_start_ + this->n() <= W.n() * @pre _W_ must be a square matrix - * + * * @post The modified section of _W_ will be a square in the upper triangle * starting at (_row_start_, _col_start_) with the dimensions of this matrix. */ template -void hiopMatrixDenseRaja:: -addToSymDenseMatrixUpperTriangle(int row_start, int col_start, double alpha, hiopMatrixDense& Wmat) const +void hiopMatrixDenseRaja::addToSymDenseMatrixUpperTriangle(int row_start, + int col_start, + double alpha, + hiopMatrixDense& Wmat) const { hiopMatrixDenseRaja& W = dynamic_cast(Wmat); double* wdata = W.data_dev_; - double* data = data_dev_; + double* data = data_dev_; assert(row_start >= 0 && m() + row_start <= W.m()); assert(col_start >= 0 && n() + col_start <= W.n()); assert(W.n() == W.m()); - RAJA::View> Mview(data, m_local_, n_local_); + RAJA::View> Mview(data, m_local_, n_local_); RAJA::View> Wview(wdata, W.get_local_size_m(), W.get_local_size_n()); RAJA::RangeSegment row_range(0, m_local_); RAJA::RangeSegment col_range(0, n_local_); @@ -1251,93 +1223,93 @@ addToSymDenseMatrixUpperTriangle(int row_start, int col_start, double alpha, hio assert(iWmax <= jWmin && "source entries need to map inside the upper triangular part of destination"); #endif - RAJA::kernel(RAJA::make_tuple(col_range, row_range), - RAJA_LAMBDA(int jcol, int irow) - { - const int iW = irow + row_start; - const int jW = jcol + col_start; - Wview(iW, jW) += alpha * Mview(irow, jcol); - }); + RAJA::kernel( + RAJA::make_tuple(col_range, row_range), + RAJA_LAMBDA(int jcol, int irow) { + const int iW = irow + row_start; + const int jW = jcol + col_start; + Wview(iW, jW) += alpha * Mview(irow, jcol); + }); } /** - * @brief Adds the values of the transpose of this matrix to a + * @brief Adds the values of the transpose of this matrix to a * corresponding section in the upper triangle of the input matrix. * W += alpha*this' @ offset - * + * * see addToSymDenseMatrixUpperTriangle for more information. */ template -void hiopMatrixDenseRaja:: -transAddToSymDenseMatrixUpperTriangle(int row_start, int col_start, double alpha, hiopMatrixDense& Wmat) const +void hiopMatrixDenseRaja::transAddToSymDenseMatrixUpperTriangle(int row_start, + int col_start, + double alpha, + hiopMatrixDense& Wmat) const { hiopMatrixDenseRaja& W = dynamic_cast(Wmat); double* wdata = W.data_dev_; - double* data = data_dev_; + double* data = data_dev_; assert(row_start >= 0 && n() + row_start <= W.m()); assert(col_start >= 0 && m() + col_start <= W.n()); assert(W.n() == W.m()); - RAJA::View> Mview(data, this->get_local_size_m(), this->get_local_size_n()); + RAJA::View> Mview(data, this->get_local_size_m(), this->get_local_size_n()); RAJA::View> Wview(wdata, W.get_local_size_m(), W.get_local_size_n()); RAJA::RangeSegment row_range(0, m_local_); RAJA::RangeSegment col_range(0, n_local_); - RAJA::kernel(RAJA::make_tuple(col_range, row_range), - RAJA_LAMBDA(int jcol, int irow) - { - const int jW = irow + col_start; - const int iW = jcol + row_start; + RAJA::kernel( + RAJA::make_tuple(col_range, row_range), + RAJA_LAMBDA(int jcol, int irow) { + const int jW = irow + col_start; + const int iW = jcol + row_start; #ifdef HIOP_DEEPCHECKS - assert(iW <= jW && "source entries need to map inside the upper triangular part of destination"); + assert(iW <= jW && "source entries need to map inside the upper triangular part of destination"); #endif - Wview(iW, jW) += alpha * Mview(irow, jcol); - }); + Wview(iW, jW) += alpha * Mview(irow, jcol); + }); } /** * @brief diagonal block of W += alpha*this with 'diag_start' indicating the * diagonal entry of W where 'this' should start to contribute. - * + * * @param[in] diag_start * For efficiency, only upper triangle of W is updated since this will be * eventually sent to LAPACK and only the upper triangle of 'this' is accessed - * + * * @pre this->n()==this->m() * @pre W.n() == W.m() * */ template -void hiopMatrixDenseRaja:: -addUpperTriangleToSymDenseMatrixUpperTriangle(int diag_start, - double alpha, - hiopMatrixDense& Wmat) const +void hiopMatrixDenseRaja::addUpperTriangleToSymDenseMatrixUpperTriangle(int diag_start, + double alpha, + hiopMatrixDense& Wmat) const { hiopMatrixDenseRaja& W = dynamic_cast(Wmat); double* wdata = W.data_dev_; - double* data = data_dev_; + double* data = data_dev_; - RAJA::View> Mview(data, this->get_local_size_m(), this->get_local_size_n()); + RAJA::View> Mview(data, this->get_local_size_m(), this->get_local_size_n()); RAJA::View> Wview(wdata, W.get_local_size_m(), W.get_local_size_n()); RAJA::RangeSegment row_range(0, m_local_); RAJA::RangeSegment col_range(0, n_local_); - RAJA::kernel(RAJA::make_tuple(col_range, row_range), - RAJA_LAMBDA(RAJA::Index_type j, RAJA::Index_type i) - { - if(j < i) - return; - auto iw = i + diag_start; - auto jw = j + diag_start; - Wview(iw, jw) += alpha * Mview(i, j); - }); + RAJA::kernel( + RAJA::make_tuple(col_range, row_range), + RAJA_LAMBDA(RAJA::Index_type j, RAJA::Index_type i) { + if(j < i) return; + auto iw = i + diag_start; + auto jw = j + diag_start; + Wview(iw, jw) += alpha * Mview(i, j); + }); } /** * @brief Returns the value of the element with the maximum absolute value. - * + * * @todo Consider using BLAS call (LANGE) */ template @@ -1345,16 +1317,15 @@ double hiopMatrixDenseRaja::max_abs_value() { double* dd = this->data_dev_; RAJA::ReduceMax norm(0.0); - RAJA::forall(RAJA::RangeSegment(0, n_local_ * m_local_), - RAJA_LAMBDA(RAJA::Index_type i) - { - norm.max(fabs(dd[i])); - }); + RAJA::forall( + RAJA::RangeSegment(0, n_local_ * m_local_), + RAJA_LAMBDA(RAJA::Index_type i) { norm.max(fabs(dd[i])); }); double maxv = static_cast(norm.get()); #ifdef HIOP_USE_MPI double maxvg; - int ierr=MPI_Allreduce(&maxv,&maxvg,1,MPI_DOUBLE,MPI_MAX,comm_); assert(ierr==MPI_SUCCESS); + int ierr = MPI_Allreduce(&maxv, &maxvg, 1, MPI_DOUBLE, MPI_MAX, comm_); + assert(ierr == MPI_SUCCESS); return maxvg; #endif return maxv; @@ -1362,87 +1333,77 @@ double hiopMatrixDenseRaja::max_abs_value() /** * @brief Returns the value of the element with the maximum absolute value. - * + * * @todo Consider using BLAS call (LANGE) */ template -void hiopMatrixDenseRaja::row_max_abs_value(hiopVector &ret_vec) -{ +void hiopMatrixDenseRaja::row_max_abs_value(hiopVector& ret_vec) +{ assert(ret_vec.get_size() == m()); ret_vec.setToZero(); if(0 == m_local_) { return; - } + } auto& vec = dynamic_cast&>(ret_vec); double* vd = vec.local_data(); - + double* data = data_dev_; int m_local = m_local_; int n_local = n_local_; - + RAJA::View> Mview(data, m_local, n_local); RAJA::forall( - RAJA::RangeSegment(0, m_local), - RAJA_LAMBDA(RAJA::Index_type i) - { - for(int j = 0; j < n_local; j++) { - double abs_val = fabs(Mview(i, j)); - vd[i] = (vd[i] > abs_val) ? vd[i] : abs_val; - } - } - ); + RAJA::RangeSegment(0, m_local), + RAJA_LAMBDA(RAJA::Index_type i) { + for(int j = 0; j < n_local; j++) { + double abs_val = fabs(Mview(i, j)); + vd[i] = (vd[i] > abs_val) ? vd[i] : abs_val; + } + }); #ifdef HIOP_USE_MPI hiopVectorPar maxvg(m_local_); hiopVectorPar vec_host(m_local_); vec.copy_to_vectorpar(vec_host); - int ierr = MPI_Allreduce(vec_host.local_data(), - maxvg.local_data(), - m_local_, - MPI_DOUBLE, - MPI_MAX, - comm_); - assert(ierr==MPI_SUCCESS); + int ierr = MPI_Allreduce(vec_host.local_data(), maxvg.local_data(), m_local_, MPI_DOUBLE, MPI_MAX, comm_); + assert(ierr == MPI_SUCCESS); vec.copy_from_vectorpar(maxvg); #endif } /// Scale each row of matrix, according to the scale factor in `ret_vec` template -void hiopMatrixDenseRaja::scale_row(hiopVector &vec_scal, const bool inv_scale) +void hiopMatrixDenseRaja::scale_row(hiopVector& vec_scal, const bool inv_scale) { double* data = data_dev_; auto& vec = dynamic_cast&>(vec_scal); double* vd = vec.local_data(); - + int m_local = m_local_; int n_local = n_local_; - + RAJA::View> Mview(data, m_local, n_local); RAJA::forall( - RAJA::RangeSegment(0, m_local), - RAJA_LAMBDA(RAJA::Index_type i) - { - for (int j = 0; j < n_local; j++) - { - Mview(i,j) *= vd[i]; - } - } - ); + RAJA::RangeSegment(0, m_local), + RAJA_LAMBDA(RAJA::Index_type i) { + for(int j = 0; j < n_local; j++) { + Mview(i, j) *= vd[i]; + } + }); } #ifdef HIOP_DEEPCHECKS template bool hiopMatrixDenseRaja::assertSymmetry(double tol) const { - if(n_local_!=n_global_) { + if(n_local_ != n_global_) { assert(false && "should be used only for local matrices"); return false; } - //must be square - if(m_local_!=n_global_) { + // must be square + if(m_local_ != n_global_) { assert(false); return false; } @@ -1451,31 +1412,30 @@ bool hiopMatrixDenseRaja::assertSymmetry(double tol) co RAJA::View> Mview(data, n_local_, n_local_); RAJA::RangeSegment range(0, n_local_); - //symmetry + // symmetry RAJA::ReduceSum any(0); - RAJA::kernel(RAJA::make_tuple(range, range), - RAJA_LAMBDA(int j, int i) - { - double ij = Mview(i, j); - double ji = Mview(j, i); - double relerr= fabs(ij - ji) / (1 + fabs(ij)); - assert(relerr < tol); - if(relerr >= tol) - any += 1; - }); + RAJA::kernel( + RAJA::make_tuple(range, range), + RAJA_LAMBDA(int j, int i) { + double ij = Mview(i, j); + double ji = Mview(j, i); + double relerr = fabs(ij - ji) / (1 + fabs(ij)); + assert(relerr < tol); + if(relerr >= tol) any += 1; + }); return any.get() == 0; } #endif template -bool hiopMatrixDenseRaja::symmetrize() +bool hiopMatrixDenseRaja::symmetrize() { - if(n_local_!=n_global_) { + if(n_local_ != n_global_) { assert(false && "should be used only for local matrices"); return false; } - //must be square - if(m_local_!=n_global_) { + // must be square + if(m_local_ != n_global_) { assert(false); return false; } @@ -1484,15 +1444,15 @@ bool hiopMatrixDenseRaja::symmetrize() RAJA::View> Mview(data, n_local_, n_local_); RAJA::RangeSegment range(0, n_local_); - //symmetrize --- copy the upper triangular part to lower tirangular part - RAJA::kernel(RAJA::make_tuple(range, range), - RAJA_LAMBDA(int j, int i) - { - double ij = Mview(i, j); - if(i < j) { - Mview(j, i) = ij; - } - }); + // symmetrize --- copy the upper triangular part to lower tirangular part + RAJA::kernel( + RAJA::make_tuple(range, range), + RAJA_LAMBDA(int j, int i) { + double ij = Mview(i, j); + if(i < j) { + Mview(j, i) = ij; + } + }); return true; } @@ -1500,8 +1460,8 @@ bool hiopMatrixDenseRaja::symmetrize() template void hiopMatrixDenseRaja::copyToDev() { - if(data_dev_!=data_host_) { - exec_space_.copy(data_dev_, data_host_, n_local_*max_rows_, exec_space_host_); + if(data_dev_ != data_host_) { + exec_space_.copy(data_dev_, data_host_, n_local_ * max_rows_, exec_space_host_); } } @@ -1509,8 +1469,8 @@ void hiopMatrixDenseRaja::copyToDev() template void hiopMatrixDenseRaja::copyFromDev() { - if(data_dev_!=data_host_) { - exec_space_host_.copy(data_host_, data_dev_, n_local_*max_rows_, exec_space_); + if(data_dev_ != data_host_) { + exec_space_host_.copy(data_host_, data_dev_, n_local_ * max_rows_, exec_space_); } } @@ -1518,9 +1478,9 @@ template double* hiopMatrixDenseRaja::new_mxnlocal_host_buff() const { if(buff_mxnlocal_host_ == nullptr) { - buff_mxnlocal_host_ = exec_space_host_.template alloc_array(max_rows_*n_local_); + buff_mxnlocal_host_ = exec_space_host_.template alloc_array(max_rows_ * n_local_); } return buff_mxnlocal_host_; } -} // namespace hiop +} // namespace hiop diff --git a/src/LinAlg/hiopMatrixDenseRajaOmp.cpp b/src/LinAlg/hiopMatrixDenseRajaOmp.cpp index 7ba20d394..9401b9b4b 100644 --- a/src/LinAlg/hiopMatrixDenseRajaOmp.cpp +++ b/src/LinAlg/hiopMatrixDenseRajaOmp.cpp @@ -3,47 +3,47 @@ // Written by Cosmin G. Petra, petra1@llnl.gov. // LLNL-CODE-742473. All rights reserved. // -// This file is part of HiOp. For details, see https://github.com/LLNL/hiop. HiOp -// is released under the BSD 3-clause license (https://opensource.org/licenses/BSD-3-Clause). +// This file is part of HiOp. For details, see https://github.com/LLNL/hiop. HiOp +// is released under the BSD 3-clause license (https://opensource.org/licenses/BSD-3-Clause). // Please also read “Additional BSD Notice” below. // -// Redistribution and use in source and binary forms, with or without modification, +// Redistribution and use in source and binary forms, with or without modification, // are permitted provided that the following conditions are met: -// i. Redistributions of source code must retain the above copyright notice, this list +// i. Redistributions of source code must retain the above copyright notice, this list // of conditions and the disclaimer below. -// ii. Redistributions in binary form must reproduce the above copyright notice, -// this list of conditions and the disclaimer (as noted below) in the documentation and/or +// ii. Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the disclaimer (as noted below) in the documentation and/or // other materials provided with the distribution. -// iii. Neither the name of the LLNS/LLNL nor the names of its contributors may be used to -// endorse or promote products derived from this software without specific prior written +// iii. Neither the name of the LLNS/LLNL nor the names of its contributors may be used to +// endorse or promote products derived from this software without specific prior written // permission. // -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES -// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT -// SHALL LAWRENCE LIVERMORE NATIONAL SECURITY, LLC, THE U.S. DEPARTMENT OF ENERGY OR -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS -// OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED -// AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT +// SHALL LAWRENCE LIVERMORE NATIONAL SECURITY, LLC, THE U.S. DEPARTMENT OF ENERGY OR +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +// OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED +// AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, // EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Additional BSD Notice -// 1. This notice is required to be provided under our contract with the U.S. Department -// of Energy (DOE). This work was produced at Lawrence Livermore National Laboratory under +// 1. This notice is required to be provided under our contract with the U.S. Department +// of Energy (DOE). This work was produced at Lawrence Livermore National Laboratory under // Contract No. DE-AC52-07NA27344 with the DOE. -// 2. Neither the United States Government nor Lawrence Livermore National Security, LLC -// nor any of their employees, makes any warranty, express or implied, or assumes any -// liability or responsibility for the accuracy, completeness, or usefulness of any +// 2. Neither the United States Government nor Lawrence Livermore National Security, LLC +// nor any of their employees, makes any warranty, express or implied, or assumes any +// liability or responsibility for the accuracy, completeness, or usefulness of any // information, apparatus, product, or process disclosed, or represents that its use would // not infringe privately-owned rights. -// 3. Also, reference herein to any specific commercial products, process, or services by -// trade name, trademark, manufacturer or otherwise does not necessarily constitute or -// imply its endorsement, recommendation, or favoring by the United States Government or -// Lawrence Livermore National Security, LLC. The views and opinions of authors expressed -// herein do not necessarily state or reflect those of the United States Government or -// Lawrence Livermore National Security, LLC, and shall not be used for advertising or +// 3. Also, reference herein to any specific commercial products, process, or services by +// trade name, trademark, manufacturer or otherwise does not necessarily constitute or +// imply its endorsement, recommendation, or favoring by the United States Government or +// Lawrence Livermore National Security, LLC. The views and opinions of authors expressed +// herein do not necessarily state or reflect those of the United States Government or +// Lawrence Livermore National Security, LLC, and shall not be used for advertising or // product endorsement purposes. /** @@ -59,13 +59,12 @@ #include "MemBackendCppImpl.hpp" #include "ExecPoliciesRajaOmpImpl.hpp" - namespace hiop { using hiop_raja_exec = ExecRajaPoliciesBackend::hiop_raja_exec; using hiop_raja_reduce = ExecRajaPoliciesBackend::hiop_raja_reduce; using matrix_exec = ExecRajaPoliciesBackend::matrix_exec; -} +} // namespace hiop #include "hiopMatrixDenseRajaImpl.hpp" @@ -73,8 +72,8 @@ namespace hiop { // -//Explicit instantiations: force compilation +// Explicit instantiations: force compilation // template class hiopMatrixDenseRaja; template class hiopMatrixDenseRaja; -} +} // namespace hiop diff --git a/src/LinAlg/hiopMatrixDenseRowMajor.cpp b/src/LinAlg/hiopMatrixDenseRowMajor.cpp index 478ac897b..e46760f32 100644 --- a/src/LinAlg/hiopMatrixDenseRowMajor.cpp +++ b/src/LinAlg/hiopMatrixDenseRowMajor.cpp @@ -3,53 +3,53 @@ // Written by Cosmin G. Petra, petra1@llnl.gov. // LLNL-CODE-742473. All rights reserved. // -// This file is part of HiOp. For details, see https://github.com/LLNL/hiop. HiOp -// is released under the BSD 3-clause license (https://opensource.org/licenses/BSD-3-Clause). +// This file is part of HiOp. For details, see https://github.com/LLNL/hiop. HiOp +// is released under the BSD 3-clause license (https://opensource.org/licenses/BSD-3-Clause). // Please also read "Additional BSD Notice" below. // -// Redistribution and use in source and binary forms, with or without modification, +// Redistribution and use in source and binary forms, with or without modification, // are permitted provided that the following conditions are met: -// i. Redistributions of source code must retain the above copyright notice, this list +// i. Redistributions of source code must retain the above copyright notice, this list // of conditions and the disclaimer below. -// ii. Redistributions in binary form must reproduce the above copyright notice, -// this list of conditions and the disclaimer (as noted below) in the documentation and/or +// ii. Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the disclaimer (as noted below) in the documentation and/or // other materials provided with the distribution. -// iii. Neither the name of the LLNS/LLNL nor the names of its contributors may be used to -// endorse or promote products derived from this software without specific prior written +// iii. Neither the name of the LLNS/LLNL nor the names of its contributors may be used to +// endorse or promote products derived from this software without specific prior written // permission. // -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES -// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT -// SHALL LAWRENCE LIVERMORE NATIONAL SECURITY, LLC, THE U.S. DEPARTMENT OF ENERGY OR -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS -// OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED -// AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT +// SHALL LAWRENCE LIVERMORE NATIONAL SECURITY, LLC, THE U.S. DEPARTMENT OF ENERGY OR +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +// OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED +// AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, // EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Additional BSD Notice -// 1. This notice is required to be provided under our contract with the U.S. Department -// of Energy (DOE). This work was produced at Lawrence Livermore National Laboratory under +// 1. This notice is required to be provided under our contract with the U.S. Department +// of Energy (DOE). This work was produced at Lawrence Livermore National Laboratory under // Contract No. DE-AC52-07NA27344 with the DOE. -// 2. Neither the United States Government nor Lawrence Livermore National Security, LLC -// nor any of their employees, makes any warranty, express or implied, or assumes any -// liability or responsibility for the accuracy, completeness, or usefulness of any +// 2. Neither the United States Government nor Lawrence Livermore National Security, LLC +// nor any of their employees, makes any warranty, express or implied, or assumes any +// liability or responsibility for the accuracy, completeness, or usefulness of any // information, apparatus, product, or process disclosed, or represents that its use would // not infringe privately-owned rights. -// 3. Also, reference herein to any specific commercial products, process, or services by -// trade name, trademark, manufacturer or otherwise does not necessarily constitute or -// imply its endorsement, recommendation, or favoring by the United States Government or -// Lawrence Livermore National Security, LLC. The views and opinions of authors expressed -// herein do not necessarily state or reflect those of the United States Government or -// Lawrence Livermore National Security, LLC, and shall not be used for advertising or +// 3. Also, reference herein to any specific commercial products, process, or services by +// trade name, trademark, manufacturer or otherwise does not necessarily constitute or +// imply its endorsement, recommendation, or favoring by the United States Government or +// Lawrence Livermore National Security, LLC. The views and opinions of authors expressed +// herein do not necessarily state or reflect those of the United States Government or +// Lawrence Livermore National Security, LLC, and shall not be used for advertising or // product endorsement purposes. #include "hiopMatrixDenseRowMajor.hpp" #include -#include //for memcpy +#include //for memcpy #include #include #include @@ -61,42 +61,45 @@ namespace hiop { -hiopMatrixDenseRowMajor::hiopMatrixDenseRowMajor(const size_type& m, - const size_type& glob_n, - index_type* col_part/*=NULL*/, - MPI_Comm comm/*=MPI_COMM_SELF*/, - const size_type& m_max_alloc/*=-1*/) - : hiopMatrixDense(m, glob_n, comm) +hiopMatrixDenseRowMajor::hiopMatrixDenseRowMajor(const size_type& m, + const size_type& glob_n, + index_type* col_part /*=NULL*/, + MPI_Comm comm /*=MPI_COMM_SELF*/, + const size_type& m_max_alloc /*=-1*/) + : hiopMatrixDense(m, glob_n, comm) { - int P=0; + int P = 0; comm_size_ = 1; if(col_part) { #ifdef HIOP_USE_MPI - int ierr = MPI_Comm_rank(comm_, &P); assert(ierr==MPI_SUCCESS); - ierr = MPI_Comm_size(comm_, &comm_size_); assert(MPI_SUCCESS==ierr); + int ierr = MPI_Comm_rank(comm_, &P); + assert(ierr == MPI_SUCCESS); + ierr = MPI_Comm_size(comm_, &comm_size_); + assert(MPI_SUCCESS == ierr); #endif - glob_jl_=col_part[P]; glob_ju_=col_part[P+1]; + glob_jl_ = col_part[P]; + glob_ju_ = col_part[P + 1]; } else { - glob_jl_=0; glob_ju_=n_global_; + glob_jl_ = 0; + glob_ju_ = n_global_; } - n_local_=glob_ju_-glob_jl_; + n_local_ = glob_ju_ - glob_jl_; myrank_ = P; - max_rows_=m_max_alloc; - if(max_rows_==-1) max_rows_=m_local_; - assert(max_rows_>=m_local_ && "the requested extra allocation is smaller than the allocation needed by the matrix"); + max_rows_ = m_max_alloc; + if(max_rows_ == -1) max_rows_ = m_local_; + assert(max_rows_ >= m_local_ && "the requested extra allocation is smaller than the allocation needed by the matrix"); - M_=new double*[max_rows_==0?1:max_rows_]; - M_[0] = max_rows_==0?NULL:new double[max_rows_*n_local_]; - for(int i=1; i(dmmat); - assert(n_local_==dm.n_local_); assert(m_local_==dm.m_local_); assert(n_global_==dm.n_global_); - assert(glob_jl_==dm.glob_jl_); assert(glob_ju_==dm.glob_ju_); - if(NULL==dm.M_[0]) { + assert(n_local_ == dm.n_local_); + assert(m_local_ == dm.m_local_); + assert(n_global_ == dm.n_global_); + assert(glob_jl_ == dm.glob_jl_); + assert(glob_ju_ == dm.glob_ju_); + if(NULL == dm.M_[0]) { M_[0] = NULL; } else { - memcpy(M_[0], dm.M_[0], m_local_*n_local_*sizeof(double)); + memcpy(M_[0], dm.M_[0], m_local_ * n_local_ * sizeof(double)); } } void hiopMatrixDenseRowMajor::copyFrom(const double* buffer) { - if(NULL==buffer) { + if(NULL == buffer) { M_[0] = NULL; } else { - memcpy(M_[0], buffer, m_local_*n_local_*sizeof(double)); + memcpy(M_[0], buffer, m_local_ * n_local_ * sizeof(double)); } } void hiopMatrixDenseRowMajor::copy_to(double* buffer) { - if(NULL==buffer) { + if(NULL == buffer) { return; } else { - memcpy(buffer, M_[0], m_local_*n_local_*sizeof(double)); + memcpy(buffer, M_[0], m_local_ * n_local_ * sizeof(double)); } } @@ -170,125 +179,126 @@ void hiopMatrixDenseRowMajor::copyRowsFrom(const hiopMatrixDense& srcmat, int nu { const auto& src = dynamic_cast(srcmat); #ifdef HIOP_DEEPCHECKS - assert(row_dest>=0); - assert(n_local_==src.n_local_); - assert(row_dest+num_rows<=m_local_); - assert(num_rows<=src.m_local_); + assert(row_dest >= 0); + assert(n_local_ == src.n_local_); + assert(row_dest + num_rows <= m_local_); + assert(num_rows <= src.m_local_); #endif - if(num_rows>0) - memcpy(M_[row_dest], src.M_[0], n_local_*num_rows*sizeof(double)); + if(num_rows > 0) memcpy(M_[row_dest], src.M_[0], n_local_ * num_rows * sizeof(double)); } void hiopMatrixDenseRowMajor::copyRowsFrom(const hiopMatrix& src_gen, const index_type* rows_idxs, size_type n_rows) { const auto& src = dynamic_cast(src_gen); - assert(n_global_==src.n_global_); - assert(n_local_==src.n_local_); - assert(n_rows<=src.m_local_); + assert(n_global_ == src.n_global_); + assert(n_local_ == src.n_local_); + assert(n_rows <= src.m_local_); assert(n_rows == m_local_); // todo //! opt -> copy multiple (consecutive rows at the time -> maybe keep blocks of eq and ineq, - //instead of indexes) + // instead of indexes) - //int i should suffice for dense matrices - for(int i=0; i(srcmat); - assert(n_local_==n_global_ && "this method should be used only in 'serial' mode"); - assert(src.n_local_==src.n_global_ && "this method should be used only in 'serial' mode"); - assert(m_local_>=i_start+src.m_local_ && "the matrix does not fit as a sublock in 'this' at specified coordinates"); - assert(n_local_>=j_start+src.n_local_ && "the matrix does not fit as a sublock in 'this' at specified coordinates"); - - //quick returns for empty source matrices - if(src.n()==0) return; - if(src.m()==0) return; + assert(n_local_ == n_global_ && "this method should be used only in 'serial' mode"); + assert(src.n_local_ == src.n_global_ && "this method should be used only in 'serial' mode"); + assert(m_local_ >= i_start + src.m_local_ && "the matrix does not fit as a sublock in 'this' at specified coordinates"); + assert(n_local_ >= j_start + src.n_local_ && "the matrix does not fit as a sublock in 'this' at specified coordinates"); + + // quick returns for empty source matrices + if(src.n() == 0) return; + if(src.m() == 0) return; #ifdef HIOP_DEEPCHECKS - assert(i_start=0); assert(j_start>=0); + assert(i_start < m_local_ || !m_local_); + assert(j_start < n_local_ || !n_local_); + assert(i_start >= 0); + assert(j_start >= 0); #endif - const size_t buffsize = src.n_local_*sizeof(double); - for(index_type ii=0; ii(srcmat); - assert(n_local_==n_global_ && "this method should be used only in 'serial' mode"); - assert(src.n_local_==src.n_global_ && "this method should be used only in 'serial' mode"); - assert(m_local_+i_block<=src.m_local_ && "the source does not enough rows to fill 'this'"); - assert(n_local_+j_block<=src.n_local_ && "the source does not enough cols to fill 'this'"); + assert(n_local_ == n_global_ && "this method should be used only in 'serial' mode"); + assert(src.n_local_ == src.n_global_ && "this method should be used only in 'serial' mode"); + assert(m_local_ + i_block <= src.m_local_ && "the source does not enough rows to fill 'this'"); + assert(n_local_ + j_block <= src.n_local_ && "the source does not enough cols to fill 'this'"); - if(n_local_==src.n_local_) //and j_block=0 - memcpy(M_[0], src.M_[i_block], n_local_*m_local_*sizeof(double)); + if(n_local_ == src.n_local_) // and j_block=0 + memcpy(M_[0], src.M_[i_block], n_local_ * m_local_ * sizeof(double)); else { - for(int i=0; i=2 - assert(m_local_>=2); - //and - assert(m_local_-std::abs(shift)>=1); + // at this point m_local_ should be >=2 + assert(m_local_ >= 2); + // and + assert(m_local_ - std::abs(shift) >= 1); #ifdef HIOP_DEEPCHECKS - double test1=8.3, test2=-98.3; - if(n_local_>0) { - //not sure if memcpy is copying sequentially on all systems. we check this. - //let's at least check it - test1=shift<0 ? M_[-shift][0] : M_[m_local_-shift-1][0]; - test2=shift<0 ? M_[-shift][n_local_-1] : M_[m_local_-shift-1][n_local_-1]; + double test1 = 8.3, test2 = -98.3; + if(n_local_ > 0) { + // not sure if memcpy is copying sequentially on all systems. we check this. + // let's at least check it + test1 = shift < 0 ? M_[-shift][0] : M_[m_local_ - shift - 1][0]; + test2 = shift < 0 ? M_[-shift][n_local_ - 1] : M_[m_local_ - shift - 1][n_local_ - 1]; } #endif - //shift < 0 -> up; shift > 0 -> down - //if(shift<0) memcpy(M[0], M[-shift], n_local_*(m_local_+shift)*sizeof(double)); - //else memcpy(M[shift], M[0], n_local_*(m_local_-shift)*sizeof(double)); - if(shift<0) { - for(int row=0; row up; shift > 0 -> down + // if(shift<0) memcpy(M[0], M[-shift], n_local_*(m_local_+shift)*sizeof(double)); + // else memcpy(M[shift], M[0], n_local_*(m_local_-shift)*sizeof(double)); + if(shift < 0) { + for(int row = 0; row < m_local_ + shift; row++) memcpy(M_[row], M_[row - shift], n_local_ * sizeof(double)); } else { - for(int row=m_local_-1; row>=shift; row--) { - memcpy(M_[row], M_[row-shift], n_local_*sizeof(double)); + for(int row = m_local_ - 1; row >= shift; row--) { + memcpy(M_[row], M_[row - shift], n_local_ * sizeof(double)); } } - + #ifdef HIOP_DEEPCHECKS - if(n_local_>0) { - assert(test1==M_[shift<0?0:m_local_-1][0] && "a different copy technique than memcpy is needed on this system"); - assert(test2==M_[shift<0?0:m_local_-1][n_local_-1] && "a different copy technique than memcpy is needed on this system"); + if(n_local_ > 0) { + assert(test1 == M_[shift < 0 ? 0 : m_local_ - 1][0] && + "a different copy technique than memcpy is needed on this system"); + assert(test2 == M_[shift < 0 ? 0 : m_local_ - 1][n_local_ - 1] && + "a different copy technique than memcpy is needed on this system"); } #endif } void hiopMatrixDenseRowMajor::replaceRow(index_type row, const hiopVector& vec) { - assert(row>=0); assert(row=n_local_?n_local_:vec_size)*sizeof(double)); + assert(row >= 0); + assert(row < m_local_); + size_type vec_size = vec.get_local_size(); + memcpy(M_[row], vec.local_data_const(), (vec_size >= n_local_ ? n_local_ : vec_size) * sizeof(double)); } void hiopMatrixDenseRowMajor::getRow(index_type irow, hiopVector& row_vec) { - assert(irow>=0); assert(irow(row_vec); - assert(n_local_==vec.get_local_size()); - memcpy(vec.local_data(), M_[irow], n_local_*sizeof(double)); + assert(irow >= 0); + assert(irow < m_local_); + hiopVectorPar& vec = dynamic_cast(row_vec); + assert(n_local_ == vec.get_local_size()); + memcpy(vec.local_data(), M_[irow], n_local_ * sizeof(double)); } void hiopMatrixDenseRowMajor::set_Hess_FR(const hiopMatrixDense& Hess, const hiopVector& add_diag_de) @@ -303,7 +313,7 @@ void hiopMatrixDenseRowMajor::set_Jac_FR(const hiopMatrixDense& Jac_c, const hio assert(Jac_c.n() == Jac_d.n()); assert(Jac_c.get_local_size_n() == Jac_d.get_local_size_n()); assert(Jac_c.m() + Jac_d.m() == this->m()); - assert(Jac_c.n() <= this->n()); // we will add variable p, n on the top of base prob + assert(Jac_c.n() <= this->n()); // we will add variable p, n on the top of base prob const auto& Jeq = dynamic_cast(Jac_c); const auto& Jin = dynamic_cast(Jac_d); @@ -320,7 +330,7 @@ void hiopMatrixDenseRowMajor::set_Jac_FR(const hiopMatrixDense& Jac_c, const hio // Jac for c(x) - p + n size_type m_eq_local_base = Jeq.m(); size_type n_local_base = Jeq.get_local_size_n(); - + for(int i = 0; i < m_eq_local_base; ++i) { memcpy(M_[i], Jeq.M_[i], n_local_base * sizeof(double)); M_[i][n_local_base + i] = -1.0; @@ -340,17 +350,15 @@ void hiopMatrixDenseRowMajor::set_Jac_FR(const hiopMatrixDense& Jac_c, const hio #ifdef HIOP_DEEPCHECKS void hiopMatrixDenseRowMajor::overwriteUpperTriangleWithLower() { - assert(n_local_==n_global_ && "Use only with local, non-distributed matrices"); - for(int i=0; im_local_) maxRows=m_local_; - if(maxCols>n_local_) maxCols=n_local_; + if(myrank_ == rank || rank == -1) { + if(NULL == f) f = stdout; + if(maxRows > m_local_) maxRows = m_local_; + if(maxCols > n_local_) maxCols = n_local_; if(msg) { - fprintf(f, "%s (local_dims=[%d,%d])\n", msg, m_local_,n_local_); - } else { - fprintf(f, "hiopMatrixDenseRowMajor::printing max=[%d,%d] (local_dims=[%d,%d], on rank=%d)\n", - maxRows, maxCols, m_local_,n_local_,myrank_); + fprintf(f, "%s (local_dims=[%d,%d])\n", msg, m_local_, n_local_); + } else { + fprintf(f, + "hiopMatrixDenseRowMajor::printing max=[%d,%d] (local_dims=[%d,%d], on rank=%d)\n", + maxRows, + maxCols, + m_local_, + n_local_, + myrank_); } - maxRows = maxRows>=0?maxRows:m_local_; - maxCols = maxCols>=0?maxCols:n_local_; + maxRows = maxRows >= 0 ? maxRows : m_local_; + maxCols = maxCols >= 0 ? maxCols : n_local_; fprintf(f, "["); - for(int i=0; i0) fprintf(f, " "); - for(int j=0; j 0) fprintf(f, " "); + for(int j = 0; j < maxCols; j++) fprintf(f, "%20.12e ", M_[i][j]); + if(i < maxRows - 1) + fprintf(f, "; ...\n"); else - fprintf(f, "];\n"); + fprintf(f, "];\n"); } } } -/* y = beta * y + alpha * this * x +/* y = beta * y + alpha * this * x * * Sizes: y is m_local_, x is n_local_, the matrix is m_local_ x n_global_, and the - * local chunk is m_local_ x n_local_ -*/ -void hiopMatrixDenseRowMajor:: -timesVec(double beta, hiopVector& y_, double alpha, const hiopVector& x_) const + * local chunk is m_local_ x n_local_ + */ +void hiopMatrixDenseRowMajor::timesVec(double beta, hiopVector& y_, double alpha, const hiopVector& x_) const { hiopVectorPar& y = dynamic_cast(y_); const hiopVectorPar& x = dynamic_cast(x_); #ifdef HIOP_DEEPCHECKS assert(y.get_local_size() == m_local_); - assert(y.get_size() == m_local_); //y should not be distributed + assert(y.get_size() == m_local_); // y should not be distributed assert(x.get_local_size() == n_local_); assert(x.get_size() == n_global_); - if(beta!=0) assert(y.isfinite_local()); + if(beta != 0) assert(y.isfinite_local()); assert(x.isfinite_local()); #endif - + timesVec(beta, y.local_data(), alpha, x.local_data_const()); -#ifdef HIOP_DEEPCHECKS +#ifdef HIOP_DEEPCHECKS assert(y.isfinite_local()); #endif } -void hiopMatrixDenseRowMajor:: -timesVec(double beta, double* ya, double alpha, const double* xa) const +void hiopMatrixDenseRowMajor::timesVec(double beta, double* ya, double alpha, const double* xa) const { - char fortranTrans='T'; - int MM=m_local_, NN=n_local_, incx_y=1; + char fortranTrans = 'T'; + int MM = m_local_, NN = n_local_, incx_y = 1; #ifdef HIOP_USE_MPI - //only add beta*y on one processor (rank 0) - if(myrank_!=0) beta=0.0; + // only add beta*y on one processor (rank 0) + if(myrank_ != 0) beta = 0.0; #endif - if( MM != 0 && NN != 0 ) { - // the arguments seem reversed but so is trans='T' + if(MM != 0 && NN != 0) { + // the arguments seem reversed but so is trans='T' // required since we keep the matrix row-wise, while the Fortran/BLAS expects them column-wise - DGEMV( &fortranTrans, &NN, &MM, &alpha, &M_[0][0], &NN, xa, &incx_y, &beta, ya, &incx_y ); + DGEMV(&fortranTrans, &NN, &MM, &alpha, &M_[0][0], &NN, xa, &incx_y, &beta, ya, &incx_y); } else { - if( MM != 0 ) { - //y.scale( beta ); + if(MM != 0) { + // y.scale( beta ); if(beta != 1.) { - int one=1; - DSCAL(&MM, &beta, ya, &one); + int one = 1; + DSCAL(&MM, &beta, ya, &one); } } else { - assert(MM==0); + assert(MM == 0); return; } } #ifdef HIOP_USE_MPI - //here m_local_ is > 0 - double yglob[m_local_]; - int ierr=MPI_Allreduce(ya, yglob, m_local_, MPI_DOUBLE, MPI_SUM, comm_); assert(MPI_SUCCESS==ierr); - memcpy(ya, yglob, m_local_*sizeof(double)); + // here m_local_ is > 0 + double yglob[m_local_]; + int ierr = MPI_Allreduce(ya, yglob, m_local_, MPI_DOUBLE, MPI_SUM, comm_); + assert(MPI_SUCCESS == ierr); + memcpy(ya, yglob, m_local_ * sizeof(double)); #endif - } /* y = beta * y + alpha * transpose(this) * x */ -void hiopMatrixDenseRowMajor:: -transTimesVec(double beta, hiopVector& y_, double alpha, const hiopVector& x_) const +void hiopMatrixDenseRowMajor::transTimesVec(double beta, hiopVector& y_, double alpha, const hiopVector& x_) const { hiopVectorPar& y = dynamic_cast(y_); const hiopVectorPar& x = dynamic_cast(x_); #ifdef HIOP_DEEPCHECKS assert(x.get_local_size() == m_local_); - assert(x.get_size() == m_local_); //x should not be distributed + assert(x.get_size() == m_local_); // x should not be distributed assert(y.get_local_size() == n_local_); assert(y.get_size() == n_global_); assert(y.isfinite_local()); @@ -507,110 +513,107 @@ transTimesVec(double beta, hiopVector& y_, double alpha, const hiopVector& x_) c transTimesVec(beta, y.local_data(), alpha, x.local_data_const()); } -void hiopMatrixDenseRowMajor::transTimesVec(double beta, double* ya, - double alpha, const double* xa) const +void hiopMatrixDenseRowMajor::transTimesVec(double beta, double* ya, double alpha, const double* xa) const { - char fortranTrans='N'; - int MM=m_local_, NN=n_local_, incx_y=1; + char fortranTrans = 'N'; + int MM = m_local_, NN = n_local_, incx_y = 1; - if( MM!=0 && NN!=0 ) { - // the arguments seem reversed but so is trans='T' + if(MM != 0 && NN != 0) { + // the arguments seem reversed but so is trans='T' // required since we keep the matrix row-wise, while the Fortran/BLAS expects them column-wise - DGEMV( &fortranTrans, &NN, &MM, &alpha, &M_[0][0], &NN, - xa, &incx_y, &beta, ya, &incx_y ); + DGEMV(&fortranTrans, &NN, &MM, &alpha, &M_[0][0], &NN, xa, &incx_y, &beta, ya, &incx_y); } else { - if( NN != 0 ) { - //y.scale( beta ); - int one=1; + if(NN != 0) { + // y.scale( beta ); + int one = 1; DSCAL(&NN, &beta, ya, &one); } } } -/* W = beta*W + alpha*this*X +/* W = beta*W + alpha*this*X * -- this is 'M' mxn, X is nxk, W is mxk * * Precondition: - * - W, this, and X need to be local matrices (not distributed). All multiplications of distributed + * - W, this, and X need to be local matrices (not distributed). All multiplications of distributed * matrices needed by HiOp can be done efficiently in parallel using 'transTimesMat' */ void hiopMatrixDenseRowMajor::timesMat(double beta, hiopMatrix& W_, double alpha, const hiopMatrix& X_) const { #ifndef HIOP_USE_MPI - timesMat_local(beta,W_,alpha,X_); + timesMat_local(beta, W_, alpha, X_); #else auto& W = dynamic_cast(W_); - const auto& X = dynamic_cast(X_); - - assert(W.m()==this->m()); - assert(X.m()==this->n()); - assert(W.n()==X.n()); - - if(W.m()==0 || X.m()==0 || W.n()==0) return; -#ifdef HIOP_DEEPCHECKS + const auto& X = dynamic_cast(X_); + + assert(W.m() == this->m()); + assert(X.m() == this->n()); + assert(W.n() == X.n()); + + if(W.m() == 0 || X.m() == 0 || W.n() == 0) return; +#ifdef HIOP_DEEPCHECKS assert(W.isfinite()); assert(X.isfinite()); #endif - if(X.n_local_!=X.n_global_ || this->n_local_!=this->n_global_) { + if(X.n_local_ != X.n_global_ || this->n_local_ != this->n_global_) { assert(false && "'timesMat' involving distributed matrices is not needed/supported" && - "also, it cannot be performed efficiently with the data distribution used by this class"); + "also, it cannot be performed efficiently with the data distribution used by this class"); W.setToConstant(beta); return; } - timesMat_local(beta,W_,alpha,X_); + timesMat_local(beta, W_, alpha, X_); // if(0==myrank_) timesMat_local(beta,W_,alpha,X_); // else timesMat_local(0., W_,alpha,X_); - // int n2Red=W.m()*W.n(); + // int n2Red=W.m()*W.n(); // double* Wglob = new_mxnlocal_buff(); //[n2Red]; // int ierr = MPI_Allreduce(WM[0], Wglob, n2Red, MPI_DOUBLE, MPI_SUM,comm); assert(ierr==MPI_SUCCESS); // memcpy(WM[0], Wglob, n2Red*sizeof(double)); - -#endif +#endif } -/* W = beta*W + alpha*this*X +/* W = beta*W + alpha*this*X * -- this is 'M' mxn, X is nxk, W is mxk */ void hiopMatrixDenseRowMajor::timesMat_local(double beta, hiopMatrix& W_, double alpha, const hiopMatrix& X_) const { const auto& X = dynamic_cast(X_); auto& W = dynamic_cast(W_); -#ifdef HIOP_DEEPCHECKS - assert(W.m()==this->m()); - assert(X.m()==this->n()); - assert(W.n()==X.n()); +#ifdef HIOP_DEEPCHECKS + assert(W.m() == this->m()); + assert(X.m() == this->n()); + assert(W.n() == X.n()); assert(W.isfinite()); assert(X.isfinite()); #endif - assert(W.n_local_==W.n_global_ && "requested multiplication is not supported, see timesMat"); - + assert(W.n_local_ == W.n_global_ && "requested multiplication is not supported, see timesMat"); + /* C = alpha*op(A)*op(B) + beta*C in our case is Wt= alpha* Xt *Mt + beta*Wt */ - char trans='N'; - int M=X.n(), N=m_local_, K=X.m(); - int ldx=X.n(), ldm=n_local_, ldw=X.n(); + char trans = 'N'; + int M = X.n(), N = m_local_, K = X.m(); + int ldx = X.n(), ldm = n_local_, ldw = X.n(); - double* XM=X.local_data_const(); - double* WM=W.local_data(); - //DGEMM(&trans,&trans, &M,&N,&K, &alpha,XM[0],&ldx, this->M_[0],&ldm, &beta,WM[0],&ldw); + double* XM = X.local_data_const(); + double* WM = W.local_data(); + // DGEMM(&trans,&trans, &M,&N,&K, &alpha,XM[0],&ldx, this->M_[0],&ldm, &beta,WM[0],&ldw); - DGEMM(&trans,&trans, &M,&N,&K, &alpha,XM,&ldx, this->M_[0],&ldm, &beta,WM,&ldw); + DGEMM(&trans, &trans, &M, &N, &K, &alpha, XM, &ldx, this->M_[0], &ldm, &beta, WM, &ldw); /* C = alpha*op(A)*op(B) + beta*C in our case is Wt= alpha* Xt *Mt + beta*Wt */ - //char trans='T'; - //int lda=X.m(), ldb=n_local_, ldc=W.n(); - //int M=X.n(), N=this->m(), K=this->n_local_; + // char trans='T'; + // int lda=X.m(), ldb=n_local_, ldc=W.n(); + // int M=X.n(), N=this->m(), K=this->n_local_; - //DGEMM(&trans,&trans, &M,&N,&K, &alpha,XM[0],&lda, this->M[0],&ldb, &beta,WM[0],&ldc); + // DGEMM(&trans,&trans, &M,&N,&K, &alpha,XM[0],&lda, this->M[0],&ldb, &beta,WM[0],&ldc); } -/* W = beta*W + alpha*this^T*X +/* W = beta*W + alpha*this^T*X * -- this is mxn, X is mxk, W is nxk */ void hiopMatrixDenseRowMajor::transTimesMat(double beta, hiopMatrix& W_, double alpha, const hiopMatrix& X_) const @@ -618,26 +621,26 @@ void hiopMatrixDenseRowMajor::transTimesMat(double beta, hiopMatrix& W_, double const auto& X = dynamic_cast(X_); auto& W = dynamic_cast(W_); - assert(W.m()==n_local_); - assert(X.m()==m_local_); - assert(W.n()==X.n()); + assert(W.m() == n_local_); + assert(X.m() == m_local_); + assert(W.n() == X.n()); #ifdef HIOP_DEEPCHECKS assert(W.isfinite()); assert(X.isfinite()); #endif - if(W.m()==0) return; + if(W.m() == 0) return; + + assert(this->n_global_ == this->n_local_ && "requested parallel multiplication is not supported"); - assert(this->n_global_==this->n_local_ && "requested parallel multiplication is not supported"); - /* C = alpha*op(A)*op(B) + beta*C in our case is Wt= alpha* Xt *M + beta*Wt */ - char transX='N', transM='T'; - int ldx=X.n_local_, ldm=n_local_, ldw=W.n_local_; - int M=X.n_local_, N=n_local_, K=X.m(); - double* XM=X.local_data_const(); - double* WM=W.local_data(); - - //DGEMM(&transX, &transM, &M,&N,&K, &alpha,XM,&ldx, this->M_[0],&ldm, &beta,WM,&ldw); - DGEMM(&transX, &transM, &M,&N,&K, &alpha,XM,&ldx, this->local_data_const(),&ldm, &beta,WM,&ldw); + char transX = 'N', transM = 'T'; + int ldx = X.n_local_, ldm = n_local_, ldw = W.n_local_; + int M = X.n_local_, N = n_local_, K = X.m(); + double* XM = X.local_data_const(); + double* WM = W.local_data(); + + // DGEMM(&transX, &transM, &M,&N,&K, &alpha,XM,&ldx, this->M_[0],&ldm, &beta,WM,&ldw); + DGEMM(&transX, &transM, &M, &N, &K, &alpha, XM, &ldx, this->local_data_const(), &ldm, &beta, WM, &ldw); } /* W = beta*W + alpha*this*X^T @@ -648,287 +651,289 @@ void hiopMatrixDenseRowMajor::timesMatTrans_local(double beta, hiopMatrix& W_, d const auto& X = dynamic_cast(X_); auto& W = dynamic_cast(W_); #ifdef HIOP_DEEPCHECKS - assert(W.m()==m_local_); - //assert(X.n()==n_local_); - assert(W.n()==X.m()); + assert(W.m() == m_local_); + // assert(X.n()==n_local_); + assert(W.n() == X.m()); #endif - assert(W.n_local_==W.n_global_ && "not intended for the case when the result matrix is distributed."); - if(W.m()==0) return; - if(W.n()==0) return; - if(n_local_==0) { - if(beta!=1.0) { - int one=1; int mn=W.m()*W.n(); + assert(W.n_local_ == W.n_global_ && "not intended for the case when the result matrix is distributed."); + if(W.m() == 0) return; + if(W.n() == 0) return; + if(n_local_ == 0) { + if(beta != 1.0) { + int one = 1; + int mn = W.m() * W.n(); DSCAL(&mn, &beta, W.M_[0], &one); } return; } /* C = alpha*op(A)*op(B) + beta*C in our case is Wt= alpha* X *Mt + beta*Wt */ - char transX='T', transM='N'; - int ldx=n_local_;//=X.n(); (modified to support the parallel case) - int ldm=n_local_, ldw=W.n(); - int M=X.m(), N=m_local_, K=n_local_; - double* XM=X.local_data_const(); double* WM=W.local_data(); + char transX = 'T', transM = 'N'; + int ldx = n_local_; //=X.n(); (modified to support the parallel case) + int ldm = n_local_, ldw = W.n(); + int M = X.m(), N = m_local_, K = n_local_; + double* XM = X.local_data_const(); + double* WM = W.local_data(); - DGEMM(&transX, &transM, &M,&N,&K, &alpha,XM,&ldx, this->local_data_const(),&ldm, &beta,WM,&ldw); + DGEMM(&transX, &transM, &M, &N, &K, &alpha, XM, &ldx, this->local_data_const(), &ldm, &beta, WM, &ldw); } /* W = beta*W + alpha*this*X^T */ void hiopMatrixDenseRowMajor::timesMatTrans(double beta, hiopMatrix& W_, double alpha, const hiopMatrix& X_) const { - auto& W = dynamic_cast(W_); - assert(W.n_local_==W.n_global_ && "not intended for the case when the result matrix is distributed."); + auto& W = dynamic_cast(W_); + assert(W.n_local_ == W.n_global_ && "not intended for the case when the result matrix is distributed."); #ifdef HIOP_DEEPCHECKS const auto& X = dynamic_cast(X_); assert(W.isfinite()); assert(X.isfinite()); - assert(this->n()==X.n()); - assert(this->m()==W.m()); - assert(X.m()==W.n()); + assert(this->n() == X.n()); + assert(this->m() == W.m()); + assert(X.m() == W.n()); #endif - if(W.m()==0) return; - if(W.n()==0) return; + if(W.m() == 0) return; + if(W.n() == 0) return; - if(0==myrank_) timesMatTrans_local(beta,W_,alpha,X_); - else timesMatTrans_local(0., W_,alpha,X_); + if(0 == myrank_) + timesMatTrans_local(beta, W_, alpha, X_); + else + timesMatTrans_local(0., W_, alpha, X_); #ifdef HIOP_USE_MPI - int n2Red=W.m()*W.n(); - double* WM=W.local_data(); - double* Wglob= W.new_mxnlocal_buff(); - int ierr = MPI_Allreduce(WM, Wglob, n2Red, MPI_DOUBLE, MPI_SUM, comm_); assert(ierr==MPI_SUCCESS); - memcpy(WM, Wglob, n2Red*sizeof(double)); + int n2Red = W.m() * W.n(); + double* WM = W.local_data(); + double* Wglob = W.new_mxnlocal_buff(); + int ierr = MPI_Allreduce(WM, Wglob, n2Red, MPI_DOUBLE, MPI_SUM, comm_); + assert(ierr == MPI_SUCCESS); + memcpy(WM, Wglob, n2Red * sizeof(double)); #endif } void hiopMatrixDenseRowMajor::addDiagonal(const double& alpha, const hiopVector& d_) { const hiopVectorPar& d = dynamic_cast(d_); #ifdef HIOP_DEEPCHECKS - assert(d.get_size()==n()); - assert(d.get_size()==m()); - assert(d.get_local_size()==m_local_); - assert(d.get_local_size()==n_local_); + assert(d.get_size() == n()); + assert(d.get_size() == m()); + assert(d.get_local_size() == m_local_); + assert(d.get_local_size() == n_local_); #endif - const double* dd=d.local_data_const(); - for(int i=0; i(d_); - size_type dlen=d.get_size(); + size_type dlen = d.get_size(); #ifdef HIOP_DEEPCHECKS - assert(start>=0); - assert(start+dlen<=n_local_); + assert(start >= 0); + assert(start + dlen <= n_local_); #endif - const double* dd=d.local_data_const(); - for(int i=start; i=0, or the remaining elems on 'd_' starting at 'start_on_src_vec'. */ void hiopMatrixDenseRowMajor::addSubDiagonal(int start_on_dest_diag, - const double& alpha, + const double& alpha, const hiopVector& d_, int start_on_src_vec, - int num_elems/*=-1*/) + int num_elems /*=-1*/) { const hiopVectorPar& d = dynamic_cast(d_); - if(num_elems<0) num_elems = d.get_size()-start_on_src_vec; + if(num_elems < 0) num_elems = d.get_size() - start_on_src_vec; assert(num_elems <= d.get_size()); assert(n_local_ == n_global_ && "method supported only for non-distributed matrices"); - assert(n_local_ == m_local_ && "method supported only for symmetric matrices"); + assert(n_local_ == m_local_ && "method supported only for symmetric matrices"); - assert(start_on_dest_diag>=0 && start_on_dest_diag= 0 && start_on_dest_diag < m_local_); + num_elems = std::min(num_elems, m_local_ - start_on_dest_diag); - const double* dd=d.local_data_const(); - for(int i=0; i=0); - assert(start_on_dest_diag>=0 && start_on_dest_diag+num_elems<=n_local_); + assert(num_elems >= 0); + assert(start_on_dest_diag >= 0 && start_on_dest_diag + num_elems <= n_local_); assert(n_local_ == n_global_ && "method supported only for non-distributed matrices"); - assert(n_local_ == m_local_ && "method supported only for symmetric matrices"); + assert(n_local_ == m_local_ && "method supported only for symmetric matrices"); - for(int i=0; i(X_); + const auto& X = dynamic_cast(X_); #ifdef HIOP_DEEPCHECKS - assert(m_local_==X.m_local_); - assert(n_local_==X.n_local_); + assert(m_local_ == X.m_local_); + assert(n_local_ == X.n_local_); #endif - int N=m_local_*n_local_, inc=1; + int N = m_local_ * n_local_, inc = 1; DAXPY(&N, &alpha, X.M_[0], &inc, M_[0], &inc); } /* block of W += alpha*this' */ -void hiopMatrixDenseRowMajor:: -transAddToSymDenseMatrixUpperTriangle(int row_start, int col_start, - double alpha, hiopMatrixDense& W) const -{ - assert(row_start>=0 && n()+row_start<=W.m()); - assert(col_start>=0 && m()+col_start<=W.n()); - assert(W.n()==W.m()); - +void hiopMatrixDenseRowMajor::transAddToSymDenseMatrixUpperTriangle(int row_start, + int col_start, + double alpha, + hiopMatrixDense& W) const +{ + assert(row_start >= 0 && n() + row_start <= W.m()); + assert(col_start >= 0 && m() + col_start <= W.n()); + assert(W.n() == W.m()); + int n_W = W.n(); double* WM = W.local_data(); - for(int ir=0; irM_[ir][jc]; - WM[iW*n_W+jW] += alpha*this->M_[ir][jc]; + for(int ir = 0; ir < m_local_; ir++) { + const int jW = ir + col_start; + for(int jc = 0; jc < n_local_; jc++) { + const int iW = jc + row_start; + assert(iW <= jW && "source entries need to map inside the upper triangular part of destination"); + // WM[iW][jW] += alpha*this->M_[ir][jc]; + WM[iW * n_W + jW] += alpha * this->M_[ir][jc]; } } } - /* diagonal block of W += alpha*this with 'diag_start' indicating the diagonal entry of W where - * 'this' should start to contribute. - * - * For efficiency, only upper triangle of W is updated since this will be eventually sent to LAPACK - * and only the upper triangle of 'this' is accessed - * - * Preconditions: - * 1. this->n()==this->m() - * 2. W.n() == W.m() - */ -void hiopMatrixDenseRowMajor:: -addUpperTriangleToSymDenseMatrixUpperTriangle(int diag_start, - double alpha, hiopMatrixDense& W) const +/* diagonal block of W += alpha*this with 'diag_start' indicating the diagonal entry of W where + * 'this' should start to contribute. + * + * For efficiency, only upper triangle of W is updated since this will be eventually sent to LAPACK + * and only the upper triangle of 'this' is accessed + * + * Preconditions: + * 1. this->n()==this->m() + * 2. W.n() == W.m() + */ +void hiopMatrixDenseRowMajor::addUpperTriangleToSymDenseMatrixUpperTriangle(int diag_start, + double alpha, + hiopMatrixDense& W) const { int n_W = W.n(); - assert(W.n()==W.m()); - assert(this->n()==this->m()); - assert(diag_start+this->n() <= W.n()); + assert(W.n() == W.m()); + assert(this->n() == this->m()); + assert(diag_start + this->n() <= W.n()); double* WM = W.local_data(); - for(int i=0; iM_[i][j]; - WM[iW*n_W+jW] += alpha*this->M_[i][j]; + for(int i = 0; i < n_local_; i++) { + const int iW = i + diag_start; + for(int j = i; j < m_local_; j++) { + const int jW = j + diag_start; + assert(iW <= jW && "source entries need to map inside the upper triangular part of destination"); + assert(iW < W.n() && jW < W.m()); + // WM[iW][jW] += alpha*this->M_[i][j]; + WM[iW * n_W + jW] += alpha * this->M_[i][j]; } } } - double hiopMatrixDenseRowMajor::max_abs_value() { - char norm='M'; + char norm = 'M'; double maxv = DLANGE(&norm, &n_local_, &m_local_, M_[0], &n_local_, NULL); #ifdef HIOP_USE_MPI double maxvg; - int ierr=MPI_Allreduce(&maxv,&maxvg,1,MPI_DOUBLE,MPI_MAX,comm_); assert(ierr==MPI_SUCCESS); + int ierr = MPI_Allreduce(&maxv, &maxvg, 1, MPI_DOUBLE, MPI_MAX, comm_); + assert(ierr == MPI_SUCCESS); return maxvg; #endif return maxv; } -void hiopMatrixDenseRowMajor::row_max_abs_value(hiopVector &ret_vec) +void hiopMatrixDenseRowMajor::row_max_abs_value(hiopVector& ret_vec) { - char norm='M'; - int one=1; + char norm = 'M'; + int one = 1; double maxv; - - hiopVectorPar& vec=dynamic_cast(ret_vec); - assert(m_local_==vec.get_local_size()); - - for(int irow=0; irow(ret_vec); + assert(m_local_ == vec.get_local_size()); + + for(int irow = 0; irow < m_local_; irow++) { + maxv = DLANGE(&norm, &one, &n_local_, M_[0] + (irow * n_local_), &one, nullptr); #ifdef HIOP_USE_MPI double maxvg; - int ierr=MPI_Allreduce(&maxv,&maxvg,1,MPI_DOUBLE,MPI_MAX,comm_); assert(ierr==MPI_SUCCESS); + int ierr = MPI_Allreduce(&maxv, &maxvg, 1, MPI_DOUBLE, MPI_MAX, comm_); + assert(ierr == MPI_SUCCESS); maxv = maxvg; #endif vec.local_data()[irow] = maxv; - } + } } -void hiopMatrixDenseRowMajor::scale_row(hiopVector &vec_scal, const bool inv_scale) +void hiopMatrixDenseRowMajor::scale_row(hiopVector& vec_scal, const bool inv_scale) { - int one=1; + int one = 1; double scal; - - hiopVectorPar& vec=dynamic_cast(vec_scal); - assert(m_local_==vec.get_local_size()); + + hiopVectorPar& vec = dynamic_cast(vec_scal); + assert(m_local_ == vec.get_local_size()); double* vd = vec.local_data(); - - for(int irow=0; irow=tol) { - return false; + // symmetry + for(int i = 0; i < n_local_; i++) + for(int j = 0; j < n_local_; j++) { + double ij = M_[i][j], ji = M_[j][i]; + double relerr = std::abs(ij - ji) / (1 + std::abs(ij)); + assert(relerr < tol); + if(relerr >= tol) { + return false; } } return true; } #endif -bool hiopMatrixDenseRowMajor::symmetrize() +bool hiopMatrixDenseRowMajor::symmetrize() { - if(n_local_!=n_global_) { + if(n_local_ != n_global_) { assert(false && "should be used only for local matrices"); return false; } - //must be square - if(m_local_!=n_global_) { + // must be square + if(m_local_ != n_global_) { assert(false); return false; } - //symmetrize --- copy the upper triangular part to lower tirangular part - for(index_type i=0; i=0, or the remaining elems on 'd_' starting at 'start_on_src_vec'. */ - virtual void addSubDiagonal(int start_on_dest_diag, const double& alpha, - const hiopVector& d_, int start_on_src_vec, int num_elems=-1); + virtual void addSubDiagonal(int start_on_dest_diag, + const double& alpha, + const hiopVector& d_, + int start_on_src_vec, + int num_elems = -1); virtual void addSubDiagonal(int start_on_dest_diag, int num_elems, const double& c); - + virtual void addMatrix(double alpha, const hiopMatrix& X); /** * @brief block of W += alpha*transpose(this) * For efficiency, only upper triangular matrix is updated since this will be eventually sent to LAPACK * - * @pre transpose of 'this' has to fit in the upper triangle of W + * @pre transpose of 'this' has to fit in the upper triangle of W * @pre W.n() == W.m() */ - virtual void transAddToSymDenseMatrixUpperTriangle(int row_dest_start, int col_dest_start, - double alpha, hiopMatrixDense& W) const; + virtual void transAddToSymDenseMatrixUpperTriangle(int row_dest_start, + int col_dest_start, + double alpha, + hiopMatrixDense& W) const; /** * @brief diagonal block of W += alpha*this with 'diag_start' indicating the diagonal entry of W where * 'this' should start to contribute. - * + * * For efficiency, only upper triangle of W is updated since this will be eventually sent to LAPACK * and only the upper triangle of 'this' is accessed - * + * * This functionality of this method is needed only for symmetric matrices and, for this reason, * only symmetric matrices classes implement/need to implement it. * * @pre this->n()==this->m() * @pre W.n() == W.m() */ - virtual void addUpperTriangleToSymDenseMatrixUpperTriangle(int diag_start, - double alpha, hiopMatrixDense& W) const; + virtual void addUpperTriangleToSymDenseMatrixUpperTriangle(int diag_start, double alpha, hiopMatrixDense& W) const; virtual double max_abs_value(); - virtual void row_max_abs_value(hiopVector &ret_vec); + virtual void row_max_abs_value(hiopVector& ret_vec); - virtual void scale_row(hiopVector &vec_scal, const bool inv_scale=false); + virtual void scale_row(hiopVector& vec_scal, const bool inv_scale = false); virtual bool isfinite() const; - - //virtual void print(int maxRows=-1, int maxCols=-1, int rank=-1) const; - virtual void print(FILE* f=NULL, const char* msg=NULL, int maxRows=-1, int maxCols=-1, int rank=-1) const; + + // virtual void print(int maxRows=-1, int maxCols=-1, int rank=-1) const; + virtual void print(FILE* f = NULL, const char* msg = NULL, int maxRows = -1, int maxCols = -1, int rank = -1) const; virtual hiopMatrixDense* alloc_clone() const; virtual hiopMatrixDense* new_copy() const; @@ -188,22 +187,21 @@ class hiopMatrixDenseRowMajor : public hiopMatrixDense /// @brief copies the first 'num_rows' rows from 'src' to 'this' starting at 'row_dest' void copyRowsFrom(const hiopMatrixDense& src, int num_rows, int row_dest); - + /** * @brief Copy 'n_rows' rows specified by 'rows_idxs' (array of size 'n_rows') from 'src' to 'this' - * + * * @pre 'this' has exactly 'n_rows' rows * @pre 'src' and 'this' must have same number of columns * @pre number of rows in 'src' must be at least the number of rows in 'this' */ void copyRowsFrom(const hiopMatrix& src_gen, const index_type* rows_idxs, size_type n_rows); - + /// @brief copies 'src' into this as a block starting at (i_block_start,j_block_start) - void copyBlockFromMatrix(const index_type i_block_start, const index_type j_block_start, - const hiopMatrixDense& src); - + void copyBlockFromMatrix(const index_type i_block_start, const index_type j_block_start, const hiopMatrixDense& src); + /** - * @brief overwrites 'this' with 'src''s block that starts at (i_src_block_start,j_src_block_start) + * @brief overwrites 'this' with 'src''s block that starts at (i_src_block_start,j_src_block_start) * and has dimensions of 'this' */ void copyFromMatrixBlock(const hiopMatrixDense& src, const int i_src_block_start, const int j_src_block_start); @@ -216,9 +214,8 @@ class hiopMatrixDenseRowMajor : public hiopMatrixDense /// @brief build Hess for FR problem, from the base problem `Hess`. virtual void set_Hess_FR(const hiopMatrixDense& Hess, const hiopVector& add_diag_de); - /// @brief build Jac for FR problem, from the base problem `Jac_c` and `Jac_d`. - virtual void set_Jac_FR(const hiopMatrixDense& Jac_c, - const hiopMatrixDense& Jac_d); + /// @brief build Jac for FR problem, from the base problem `Jac_c` and `Jac_d`. + virtual void set_Jac_FR(const hiopMatrixDense& Jac_c, const hiopMatrixDense& Jac_d); #ifdef HIOP_DEEPCHECKS void overwriteUpperTriangleWithLower(); @@ -228,40 +225,43 @@ class hiopMatrixDenseRowMajor : public hiopMatrixDense virtual size_type get_local_size_m() const { return m_local_; } virtual MPI_Comm get_mpi_comm() const { return comm_; } - double* local_data_const() const {return M_[0]; } - double* local_data() {return M_[0]; } + double* local_data_const() const { return M_[0]; } + double* local_data() { return M_[0]; } + protected: - //do not use this unless you sure you know what you're doing + // do not use this unless you sure you know what you're doing inline double** get_M() { return M_; } + public: - virtual size_type m() const {return m_local_;} - virtual size_type n() const {return n_global_;} + virtual size_type m() const { return m_local_; } + virtual size_type n() const { return n_global_; } #ifdef HIOP_DEEPCHECKS - virtual bool assertSymmetry(double tol=1e-16) const; + virtual bool assertSymmetry(double tol = 1e-16) const; #endif virtual bool symmetrize(); private: - double** M_; //local storage - int n_local_; //local number of rows and cols, respectively + double** M_; // local storage + int n_local_; // local number of rows and cols, respectively size_type glob_jl_, glob_ju_; - mutable double* buff_mxnlocal_; + mutable double* buff_mxnlocal_; - //this is very private do not touch :) + // this is very private do not touch :) size_type max_rows_; + private: hiopMatrixDenseRowMajor() {}; /** copy constructor, for internal/private use only (it doesn't copy the values) */ hiopMatrixDenseRowMajor(const hiopMatrixDenseRowMajor&); - inline double* new_mxnlocal_buff() const { - if(buff_mxnlocal_==NULL) { - buff_mxnlocal_ = new double[max_rows_*n_local_]; - } + inline double* new_mxnlocal_buff() const + { + if(buff_mxnlocal_ == NULL) { + buff_mxnlocal_ = new double[max_rows_ * n_local_]; + } return buff_mxnlocal_; } }; -} // namespace hiop - +} // namespace hiop diff --git a/src/LinAlg/hiopMatrixMDS.cpp b/src/LinAlg/hiopMatrixMDS.cpp index 503f1f2a0..a51700a04 100644 --- a/src/LinAlg/hiopMatrixMDS.cpp +++ b/src/LinAlg/hiopMatrixMDS.cpp @@ -21,8 +21,8 @@ void hiopMatrixMDS::set_Jac_FR(const hiopMatrixMDS& Jac_c, assert(J_c_de.n() == mDe->n() && J_d_de.n() == mDe->n()); mDe->copyRowsFrom(J_c_de, J_c_de.m(), 0); mDe->copyRowsFrom(J_d_de, J_d_de.m(), J_c_de.m()); - - mDe->copy_to(JacD); + + mDe->copy_to(JacD); } void hiopMatrixSymBlockDiagMDS::set_Hess_FR(const hiopMatrixSymBlockDiagMDS& Hess, @@ -41,7 +41,4 @@ void hiopMatrixSymBlockDiagMDS::set_Hess_FR(const hiopMatrixSymBlockDiagMDS& Hes mDe->copy_to(MHDD); } - -} //end of namespace - - +} // namespace hiop diff --git a/src/LinAlg/hiopMatrixMDS.hpp b/src/LinAlg/hiopMatrixMDS.hpp index 9998bc81a..4d8ff15ca 100644 --- a/src/LinAlg/hiopMatrixMDS.hpp +++ b/src/LinAlg/hiopMatrixMDS.hpp @@ -14,7 +14,7 @@ namespace hiop { /** Mixed Sparse-Dense blocks matrix - it is not distributed * M = [S D] where S is sparse and D is dense - * Note: the following methods of hiopMatrix are NOT + * Note: the following methods of hiopMatrix are NOT * implemented in this class: * - timesMat * - transTimesMat @@ -49,10 +49,10 @@ class hiopMatrixMDS : public hiopMatrix } /** - * @note should this method be called, an assertion will be thrown in + * @note should this method be called, an assertion will be thrown in * hiopMatrixSparseTriplet if that is the relevant implementation. */ - virtual void copyFrom(const hiopMatrixMDS& m) + virtual void copyFrom(const hiopMatrixMDS& m) { mSp->copyFrom(*m.mSp); mDe->copyFrom(*m.mDe); @@ -64,23 +64,21 @@ class hiopMatrixMDS : public hiopMatrix mSp->copyRowsFrom(*src.mSp, rows_idxs, n_rows); mDe->copyRowsFrom(*src.mDe, rows_idxs, n_rows); } - - virtual void timesVec(double beta, hiopVector& y, - double alpha, const hiopVector& x) const + + virtual void timesVec(double beta, hiopVector& y, double alpha, const hiopVector& x) const { - assert(x.get_size() == mSp->n()+mDe->n()); + assert(x.get_size() == mSp->n() + mDe->n()); mSp->timesVec(beta, y.local_data(), alpha, x.local_data_const()); - mDe->timesVec(1., y.local_data(), alpha, x.local_data_const()+mSp->n()); + mDe->timesVec(1., y.local_data(), alpha, x.local_data_const() + mSp->n()); } - virtual void transTimesVec(double beta, hiopVector& y, - double alpha, const hiopVector& x) const + virtual void transTimesVec(double beta, hiopVector& y, double alpha, const hiopVector& x) const { - assert(y.get_size() == mSp->n()+mDe->n()); - mSp->transTimesVec(beta, y.local_data(), alpha, x.local_data_const()); - mDe->transTimesVec(beta, y.local_data()+mSp->n(), alpha, x.local_data_const()); + assert(y.get_size() == mSp->n() + mDe->n()); + mSp->transTimesVec(beta, y.local_data(), alpha, x.local_data_const()); + mDe->transTimesVec(beta, y.local_data() + mSp->n(), alpha, x.local_data_const()); } - /* W = beta*W + alpha*this*X */ + /* W = beta*W + alpha*this*X */ virtual void timesMat(double beta, hiopMatrix& W, double alpha, const hiopMatrix& X) const { assert(false && "not yet implemented"); @@ -93,30 +91,26 @@ class hiopMatrixMDS : public hiopMatrix virtual void timesMatTrans(double beta, hiopMatrix& W, double alpha, const hiopMatrix& X) const { - const hiopMatrixMDS &X_mds = dynamic_cast(X); - + const hiopMatrixMDS& X_mds = dynamic_cast(X); + mDe->timesMatTrans(beta, W, 1.0, *X_mds.de_mat()); mSp->timesMatTrans(1.0, W, 1.0, *X_mds.sp_mat()); } - - virtual void addDiagonal(const double& alpha, const hiopVector& d_) - { - assert(false && "not supported"); - } - virtual void addDiagonal(const double& value) - { - assert(false && "not supported"); - } + virtual void addDiagonal(const double& alpha, const hiopVector& d_) { assert(false && "not supported"); } + virtual void addDiagonal(const double& value) { assert(false && "not supported"); } virtual void addSubDiagonal(const double& alpha, index_type start, const hiopVector& d_) { assert(false && "not supported"); } /* add to the diagonal of 'this' (destination) starting at 'start_on_dest_diag' elements of - * 'd_' (source) starting at index 'start_on_src_vec'. The number of elements added is 'num_elems' + * 'd_' (source) starting at index 'start_on_src_vec'. The number of elements added is 'num_elems' * when num_elems>=0, or the remaining elems on 'd_' starting at 'start_on_src_vec'. */ - virtual void addSubDiagonal(int start_on_dest_diag, const double& alpha, - const hiopVector& d_, int start_on_src_vec, int num_elems=-1) + virtual void addSubDiagonal(int start_on_dest_diag, + const double& alpha, + const hiopVector& d_, + int start_on_src_vec, + int num_elems = -1) { assert(false && "not needed / implemented"); } @@ -127,8 +121,8 @@ class hiopMatrixMDS : public hiopMatrix virtual void addMatrix(double alpha, const hiopMatrix& X) { - const hiopMatrixMDS* pX=dynamic_cast(&X); - if(pX==NULL) { + const hiopMatrixMDS* pX = dynamic_cast(&X); + if(pX == NULL) { assert(false && "operation only supported for hiopMatrixMDS left operand"); } mSp->addMatrix(alpha, *pX->mSp); @@ -145,100 +139,88 @@ class hiopMatrixMDS : public hiopMatrix /* block of W += alpha*this' */ virtual void transAddToSymDenseMatrixUpperTriangle(int row_start, int col_start, double alpha, hiopMatrixDense& W) const { - mSp->transAddToSymDenseMatrixUpperTriangle(row_start, col_start, alpha, W); - mDe->transAddToSymDenseMatrixUpperTriangle(row_start+mSp->n(), col_start, alpha, W); + mSp->transAddToSymDenseMatrixUpperTriangle(row_start, col_start, alpha, W); + mDe->transAddToSymDenseMatrixUpperTriangle(row_start + mSp->n(), col_start, alpha, W); } /* diagonal block of W += alpha*this with 'diag_start' indicating the diagonal entry of W where * 'this' should start to contribute. - * + * * For efficiency, only upper triangle of W is updated since this will be eventually sent to LAPACK * and only the upper triangle of 'this' is accessed - * - * Preconditions: + * + * Preconditions: * 1. this->n()==this-m() * 2. W.n() == W.m() */ - virtual void addUpperTriangleToSymDenseMatrixUpperTriangle(int diag_start, - double alpha, hiopMatrixDense& W) const + virtual void addUpperTriangleToSymDenseMatrixUpperTriangle(int diag_start, double alpha, hiopMatrixDense& W) const { assert(false && "not needed for general/nonsymmetric matrices."); } - virtual double max_abs_value() - { - return std::max(mSp->max_abs_value(), mDe->max_abs_value()); - } - - virtual void row_max_abs_value(hiopVector &ret_vec) + virtual double max_abs_value() { return std::max(mSp->max_abs_value(), mDe->max_abs_value()); } + + virtual void row_max_abs_value(hiopVector& ret_vec) { auto ret_vec_dense = ret_vec.new_copy(); - + mSp->row_max_abs_value(ret_vec); mDe->row_max_abs_value(*ret_vec_dense); - + ret_vec.component_max(*ret_vec_dense); - + delete ret_vec_dense; } - virtual void scale_row(hiopVector &vec_scal, const bool inv_scale) + virtual void scale_row(hiopVector& vec_scal, const bool inv_scale) { mSp->scale_row(vec_scal, inv_scale); mDe->scale_row(vec_scal, inv_scale); } - - virtual bool isfinite() const - { - return mSp->isfinite() && mDe->isfinite(); - } - - virtual void print(FILE* f=NULL, const char* msg=NULL, int maxRows=-1, int maxCols=-1, int rank=-1) const + + virtual bool isfinite() const { return mSp->isfinite() && mDe->isfinite(); } + + virtual void print(FILE* f = NULL, const char* msg = NULL, int maxRows = -1, int maxCols = -1, int rank = -1) const { - mSp->print(f,msg,maxRows,maxCols,rank); - mDe->print(f,msg,maxRows,maxCols,rank); + mSp->print(f, msg, maxRows, maxCols, rank); + mDe->print(f, msg, maxRows, maxCols, rank); } virtual hiopMatrix* alloc_clone() const { hiopMatrixMDS* m = new hiopMatrixMDS(); - assert(m->mSp==NULL); assert(m->mDe==NULL); + assert(m->mSp == NULL); + assert(m->mDe == NULL); m->mSp = mSp->alloc_clone(); m->mDe = mDe->alloc_clone(); - assert(m->mSp!=NULL); assert(m->mDe!=NULL); + assert(m->mSp != NULL); + assert(m->mDe != NULL); return m; } virtual hiopMatrix* new_copy() const { hiopMatrixMDS* m = new hiopMatrixMDS(); - assert(m->mSp==NULL); assert(m->mDe==NULL); + assert(m->mSp == NULL); + assert(m->mDe == NULL); m->mSp = mSp->new_copy(); m->mDe = mDe->new_copy(); - assert(m->mSp!=NULL); assert(m->mDe!=NULL); + assert(m->mSp != NULL); + assert(m->mDe != NULL); return m; } - virtual inline size_type m() const {return mSp->m();} - virtual inline size_type n() const {return mSp->n()+mDe->n();} - inline size_type n_sp() const {return mSp->n();} - inline size_type n_de() const {return mDe->n();} + virtual inline size_type m() const { return mSp->m(); } + virtual inline size_type n() const { return mSp->n() + mDe->n(); } + inline size_type n_sp() const { return mSp->n(); } + inline size_type n_de() const { return mDe->n(); } inline const hiopMatrixSparse* sp_mat() const { return mSp; } inline const hiopMatrixDense* de_mat() const { return mDe; } inline int sp_nnz() const { return mSp->numberOfNonzeros(); } - inline int* sp_irow() - { - return mSp->i_row(); - } - inline int* sp_jcol() - { - return mSp->j_col(); - } - inline double* sp_M() - { - return mSp->M(); - } + inline int* sp_irow() { return mSp->i_row(); } + inline int* sp_jcol() { return mSp->j_col(); } + inline double* sp_M() { return mSp->M(); } inline double* de_local_data() { return mDe->local_data(); } /// @brief extend base problem Jac to the Jac in feasibility problem @@ -250,18 +232,21 @@ class hiopMatrixMDS : public hiopMatrix double* JacD); #ifdef HIOP_DEEPCHECKS - virtual bool assertSymmetry(double tol=1e-16) const { return false; } + virtual bool assertSymmetry(double tol = 1e-16) const { return false; } #endif private: hiopMatrixSparse* mSp; hiopMatrixDense* mDe; + private: - hiopMatrixMDS() : mSp(NULL), mDe(NULL) {}; + hiopMatrixMDS() + : mSp(NULL), + mDe(NULL) {}; hiopMatrixMDS(const hiopMatrixMDS&) {}; }; /* - * Note: the following methods of hiopMatrix are NOT + * Note: the following methods of hiopMatrix are NOT * implemented in this class: * - timesMat * - transTimesMat @@ -294,7 +279,7 @@ class hiopMatrixSymBlockDiagMDS : public hiopMatrix mSp->setToConstant(c); mDe->setToConstant(c); } - virtual void copyFrom(const hiopMatrixSymBlockDiagMDS& m) + virtual void copyFrom(const hiopMatrixSymBlockDiagMDS& m) { mSp->copyFrom(*m.mSp); mDe->copyFrom(*m.mDe); @@ -306,23 +291,21 @@ class hiopMatrixSymBlockDiagMDS : public hiopMatrix mSp->copyRowsFrom(src, rows_idxs, n_rows); mDe->copyRowsFrom(src, rows_idxs, n_rows); } - - virtual void timesVec(double beta, hiopVector& y, - double alpha, const hiopVector& x) const + + virtual void timesVec(double beta, hiopVector& y, double alpha, const hiopVector& x) const { - assert(x.get_size() == mSp->n()+mDe->n()); - assert(y.get_size() == mSp->n()+mDe->n()); + assert(x.get_size() == mSp->n() + mDe->n()); + assert(y.get_size() == mSp->n() + mDe->n()); - mSp->timesVec(beta, y.local_data(), alpha, x.local_data_const()); - mDe->timesVec(beta, y.local_data()+mSp->n(), alpha, x.local_data_const()+mSp->n()); + mSp->timesVec(beta, y.local_data(), alpha, x.local_data_const()); + mDe->timesVec(beta, y.local_data() + mSp->n(), alpha, x.local_data_const() + mSp->n()); } - virtual void transTimesVec(double beta, hiopVector& y, - double alpha, const hiopVector& x) const + virtual void transTimesVec(double beta, hiopVector& y, double alpha, const hiopVector& x) const { timesVec(beta, y, alpha, x); } - /* W = beta*W + alpha*this*X */ + /* W = beta*W + alpha*this*X */ virtual void timesMat(double beta, hiopMatrix& W, double alpha, const hiopMatrix& X) const { assert(false && "not yet implemented"); @@ -338,25 +321,21 @@ class hiopMatrixSymBlockDiagMDS : public hiopMatrix assert(false && "not yet implemented"); } - - virtual void addDiagonal(const double& alpha, const hiopVector& d_) - { - assert(false && "not supported"); - } - virtual void addDiagonal(const double& value) - { - assert(false && "not supported"); - } + virtual void addDiagonal(const double& alpha, const hiopVector& d_) { assert(false && "not supported"); } + virtual void addDiagonal(const double& value) { assert(false && "not supported"); } virtual void addSubDiagonal(const double& alpha, index_type start, const hiopVector& d_) { assert(false && "not supported"); } /* add to the diagonal of 'this' (destination) starting at 'start_on_dest_diag' elements of - * 'd_' (source) starting at index 'start_on_src_vec'. The number of elements added is 'num_elems' + * 'd_' (source) starting at index 'start_on_src_vec'. The number of elements added is 'num_elems' * when num_elems>=0, or the remaining elems on 'd_' starting at 'start_on_src_vec'. */ - virtual void addSubDiagonal(int start_on_dest_diag, const double& alpha, - const hiopVector& d_, int start_on_src_vec, int num_elems=-1) + virtual void addSubDiagonal(int start_on_dest_diag, + const double& alpha, + const hiopVector& d_, + int start_on_src_vec, + int num_elems = -1) { assert(false && "not needed / implemented"); } @@ -374,91 +353,87 @@ class hiopMatrixSymBlockDiagMDS : public hiopMatrix /** * block of W += alpha*this - * + * * @warning This method should never be called/is never needed for symmetric matrixes. * Use addUpperTriangleToSymDenseMatrixUpperTriangle instead. */ - virtual void transAddToSymDenseMatrixUpperTriangle(int row_start, int col_start, - double alpha, hiopMatrixDense& W) const + virtual void transAddToSymDenseMatrixUpperTriangle(int row_start, int col_start, double alpha, hiopMatrixDense& W) const { assert(0 && "This should not be called for MDS symmetric matrices."); } /* diagonal block of W += alpha*this with 'diag_start' indicating the diagonal entry of W where * 'this' should start to contribute. - * + * * For efficiency, only upper triangle of W is updated since this will be eventually sent to LAPACK * and only the upper triangle of 'this' is accessed - * - * Preconditions: + * + * Preconditions: * 1. this->n()==this->m() * 2. W.n() == W.m() */ - virtual void addUpperTriangleToSymDenseMatrixUpperTriangle(int diag_start, - double alpha, hiopMatrixDense& W) const + virtual void addUpperTriangleToSymDenseMatrixUpperTriangle(int diag_start, double alpha, hiopMatrixDense& W) const { assert(mSp->m() == mSp->n()); - mSp->addUpperTriangleToSymDenseMatrixUpperTriangle(diag_start, alpha, W); - mDe->addUpperTriangleToSymDenseMatrixUpperTriangle(diag_start+mSp->m(), alpha, W); + mSp->addUpperTriangleToSymDenseMatrixUpperTriangle(diag_start, alpha, W); + mDe->addUpperTriangleToSymDenseMatrixUpperTriangle(diag_start + mSp->m(), alpha, W); } - virtual double max_abs_value() - { - return std::max(mSp->max_abs_value(), mDe->max_abs_value()); - } + virtual double max_abs_value() { return std::max(mSp->max_abs_value(), mDe->max_abs_value()); } - virtual void row_max_abs_value(hiopVector &ret_vec) + virtual void row_max_abs_value(hiopVector& ret_vec) { auto ret_vec_dense = ret_vec.new_copy(); - + mSp->row_max_abs_value(ret_vec); mDe->row_max_abs_value(*ret_vec_dense); - + ret_vec.component_max(*ret_vec_dense); - - delete ret_vec_dense; + + delete ret_vec_dense; } - virtual void scale_row(hiopVector &vec_scal, const bool inv_scale) + virtual void scale_row(hiopVector& vec_scal, const bool inv_scale) { mSp->scale_row(vec_scal, inv_scale); mDe->scale_row(vec_scal, inv_scale); } - virtual bool isfinite() const - { - return mSp->isfinite() && mDe->isfinite(); - } - - virtual void print(FILE* f=NULL, const char* msg=NULL, int maxRows=-1, int maxCols=-1, int rank=-1) const + virtual bool isfinite() const { return mSp->isfinite() && mDe->isfinite(); } + + virtual void print(FILE* f = NULL, const char* msg = NULL, int maxRows = -1, int maxCols = -1, int rank = -1) const { - mSp->print(f,msg,maxRows,maxCols,rank); - mDe->print(f,msg,maxRows,maxCols,rank); + mSp->print(f, msg, maxRows, maxCols, rank); + mDe->print(f, msg, maxRows, maxCols, rank); } virtual hiopMatrix* alloc_clone() const { hiopMatrixSymBlockDiagMDS* m = new hiopMatrixSymBlockDiagMDS(); - assert(m->mSp==NULL); assert(m->mDe==NULL); + assert(m->mSp == NULL); + assert(m->mDe == NULL); m->mSp = mSp->alloc_clone(); m->mDe = mDe->alloc_clone(); - assert(m->mSp!=NULL); assert(m->mDe!=NULL); + assert(m->mSp != NULL); + assert(m->mDe != NULL); return m; } virtual hiopMatrix* new_copy() const { hiopMatrixSymBlockDiagMDS* m = new hiopMatrixSymBlockDiagMDS(); - assert(m->mSp==NULL); assert(m->mDe==NULL); + assert(m->mSp == NULL); + assert(m->mDe == NULL); m->mSp = mSp->new_copy(); m->mDe = mDe->new_copy(); - assert(m->mSp!=NULL); assert(m->mDe!=NULL); + assert(m->mSp != NULL); + assert(m->mDe != NULL); return m; } - virtual inline size_type m() const {return n();} - virtual inline size_type n() const {return mSp->n()+mDe->n();} - inline size_type n_sp() const {return mSp->n();} - inline size_type n_de() const {return mDe->n();} + virtual inline size_type m() const { return n(); } + virtual inline size_type n() const { return mSp->n() + mDe->n(); } + inline size_type n_sp() const { return mSp->n(); } + inline size_type n_de() const { return mDe->n(); } inline const hiopMatrixSparse* sp_mat() const { return mSp; } inline const hiopMatrixDense* de_mat() const { return mDe; } @@ -479,7 +454,7 @@ class hiopMatrixSymBlockDiagMDS : public hiopMatrix const hiopVector& add_diag_de); #ifdef HIOP_DEEPCHECKS - virtual bool assertSymmetry(double tol=1e-16) const + virtual bool assertSymmetry(double tol = 1e-16) const { if(mSp->assertSymmetry(tol)) return mDe->assertSymmetry(tol); @@ -488,13 +463,15 @@ class hiopMatrixSymBlockDiagMDS : public hiopMatrix } #endif private: - hiopMatrixSparse* mSp; ///< Symmetric sparse matrix - hiopMatrixDense* mDe; ///< Row-major dense matrix + hiopMatrixSparse* mSp; ///< Symmetric sparse matrix + hiopMatrixDense* mDe; ///< Row-major dense matrix private: - hiopMatrixSymBlockDiagMDS() : mSp(NULL), mDe(NULL) {}; + hiopMatrixSymBlockDiagMDS() + : mSp(NULL), + mDe(NULL) {}; hiopMatrixSymBlockDiagMDS(const hiopMatrixMDS&) {}; }; -} //end of namespace +} // namespace hiop #endif diff --git a/src/LinAlg/hiopMatrixRajaSparseTriplet.hpp b/src/LinAlg/hiopMatrixRajaSparseTriplet.hpp index 7e57f10c6..25e6043ba 100644 --- a/src/LinAlg/hiopMatrixRajaSparseTriplet.hpp +++ b/src/LinAlg/hiopMatrixRajaSparseTriplet.hpp @@ -70,7 +70,7 @@ namespace hiop { -/** +/** * @brief Sparse matrix of doubles in triplet format - it is not distributed * @note for now (i,j) are expected ordered: first on rows 'i' and then on cols 'j' */ @@ -79,7 +79,7 @@ class hiopMatrixRajaSparseTriplet : public hiopMatrixSparse { public: hiopMatrixRajaSparseTriplet(int rows, int cols, int nnz, std::string memspace); - virtual ~hiopMatrixRajaSparseTriplet(); + virtual ~hiopMatrixRajaSparseTriplet(); virtual void setToZero(); virtual void setToConstant(double c); @@ -88,9 +88,9 @@ class hiopMatrixRajaSparseTriplet : public hiopMatrixSparse virtual void copy_to(hiopMatrixDense& W); virtual void copyRowsFrom(const hiopMatrix& src, const index_type* rows_idxs, size_type n_rows); - - virtual void timesVec(double beta, hiopVector& y, double alpha, const hiopVector& x) const; - virtual void timesVec(double beta, double* y, double alpha, const double* x) const; + + virtual void timesVec(double beta, hiopVector& y, double alpha, const hiopVector& x) const; + virtual void timesVec(double beta, double* y, double alpha, const double* x) const; virtual void transTimesVec(double beta, hiopVector& y, double alpha, const hiopVector& x) const; virtual void transTimesVec(double beta, double* y, double alpha, const double* x) const; @@ -106,24 +106,24 @@ class hiopMatrixRajaSparseTriplet : public hiopMatrixSparse virtual void addSubDiagonal(const double& alpha, index_type start, const hiopVector& d_); /* add to the diagonal of 'this' (destination) starting at 'start_on_dest_diag' elements of - * 'd_' (source) starting at index 'start_on_src_vec'. The number of elements added is 'num_elems' + * 'd_' (source) starting at index 'start_on_src_vec'. The number of elements added is 'num_elems' * when num_elems>=0, or the remaining elems on 'd_' starting at 'start_on_src_vec'. */ virtual void addSubDiagonal(int start_on_dest_diag, const double& alpha, const hiopVector& d_, int start_on_src_vec, - int num_elems=-1) + int num_elems = -1) { assert(false && "not needed / implemented"); } - virtual void addSubDiagonal(int start_on_dest_diag, int num_elems, const double& c) + virtual void addSubDiagonal(int start_on_dest_diag, int num_elems, const double& c) { assert(false && "not needed / implemented"); } /* add to the diagonal of 'this' (destination) starting at 'start_on_dest_diag' elements of - * 'vec_d' (source) starting at index 'start_on_src_vec'. The number of elements added is 'num_elems', scaled by 'scal' - */ + * 'vec_d' (source) starting at index 'start_on_src_vec'. The number of elements added is 'num_elems', scaled by 'scal' + */ virtual void copySubDiagonalFrom(const index_type& start_on_dest_diag, const size_type& num_elems, const hiopVector& vec_d, @@ -131,8 +131,8 @@ class hiopMatrixRajaSparseTriplet : public hiopMatrixSparse double scal); /* add constant 'c' to the diagonal of 'this' (destination) starting at 'start_on_dest_diag' elements. - * The number of elements added is 'num_elems' - */ + * The number of elements added is 'num_elems' + */ virtual void setSubDiagonalTo(const index_type& start_on_dest_diag, const size_type& num_elems, const double& c, @@ -145,21 +145,17 @@ class hiopMatrixRajaSparseTriplet : public hiopMatrixSparse int col_dest_start, double alpha, hiopMatrixDense& W) const; - virtual void addUpperTriangleToSymDenseMatrixUpperTriangle(int diag_start, - double alpha, - hiopMatrixDense& W) const + virtual void addUpperTriangleToSymDenseMatrixUpperTriangle(int diag_start, double alpha, hiopMatrixDense& W) const { assert(false && "counterpart method of hiopMatrixRajaSymSparseTriplet should be used"); } - virtual void addUpperTriangleToSymSparseMatrixUpperTriangle(int diag_start, - double alpha, - hiopMatrixSparse& W) const + virtual void addUpperTriangleToSymSparseMatrixUpperTriangle(int diag_start, double alpha, hiopMatrixSparse& W) const { assert(false && "counterpart method of hiopMatrixRajaSymSparseTriplet should be used"); } - /* diag block of W += alpha * M * D^{-1} * transpose(M), where M=this + /* diag block of W += alpha * M * D^{-1} * transpose(M), where M=this * * Only the upper triangular entries of W are updated. */ @@ -168,13 +164,13 @@ class hiopMatrixRajaSparseTriplet : public hiopMatrixSparse const hiopVector& D, hiopMatrixDense& W) const; - /* block of W += alpha * M * D^{-1} * transpose(N), where M=this + /* block of W += alpha * M * D^{-1} * transpose(N), where M=this * - * Warning: The product matrix M * D^{-1} * transpose(N) with start offsets 'row_dest_start' and - * 'col_dest_start' needs to fit completely in the upper triangle of W. If this is NOT the - * case, the method will assert(false) in debug; in release, the method will issue a - * warning with HIOP_DEEPCHECKS (otherwise NO warning will be issue) and will silently update - * the (strictly) lower triangular elements (these are ignored later on since only the upper + * Warning: The product matrix M * D^{-1} * transpose(N) with start offsets 'row_dest_start' and + * 'col_dest_start' needs to fit completely in the upper triangle of W. If this is NOT the + * case, the method will assert(false) in debug; in release, the method will issue a + * warning with HIOP_DEEPCHECKS (otherwise NO warning will be issue) and will silently update + * the (strictly) lower triangular elements (these are ignored later on since only the upper * triangular part of W will be accessed) */ virtual void addMDinvNtransToSymDeMatUTri(int row_dest_start, @@ -189,20 +185,20 @@ class hiopMatrixRajaSparseTriplet : public hiopMatrixSparse const size_type& n_rows, const index_type& rows_dest_idx_st, const size_type& dest_nnz_st); - + /** - * @brief Copy matrix 'src_gen', into 'this' as a submatrix from corner 'dest_row_st' and 'dest_col_st' - * The non-zero elements start from 'dest_nnz_st' will be replaced by the new elements. - * When `offdiag_only` is set to true, only the off-diagonal part of `src_gen` is copied. - * - * @pre 'this' must have enough rows and cols after row 'dest_row_st' and col 'dest_col_st' - * @pre 'dest_nnz_st' + the number of non-zeros in the copied matrix must be less or equal to - * this->numOfNumbers() - * @pre User must know the nonzero pattern of src and dest matrices. The method assumes - * that non-zero patterns does not change between calls and that 'src_gen' is a valid - * submatrix of 'this' - * @pre This function does NOT preserve the sorted row/col indices. USE WITH CAUTION! - */ + * @brief Copy matrix 'src_gen', into 'this' as a submatrix from corner 'dest_row_st' and 'dest_col_st' + * The non-zero elements start from 'dest_nnz_st' will be replaced by the new elements. + * When `offdiag_only` is set to true, only the off-diagonal part of `src_gen` is copied. + * + * @pre 'this' must have enough rows and cols after row 'dest_row_st' and col 'dest_col_st' + * @pre 'dest_nnz_st' + the number of non-zeros in the copied matrix must be less or equal to + * this->numOfNumbers() + * @pre User must know the nonzero pattern of src and dest matrices. The method assumes + * that non-zero patterns does not change between calls and that 'src_gen' is a valid + * submatrix of 'this' + * @pre This function does NOT preserve the sorted row/col indices. USE WITH CAUTION! + */ virtual void copySubmatrixFrom(const hiopMatrix& src_gen, const index_type& dest_row_st, const index_type& dest_col_st, @@ -210,13 +206,13 @@ class hiopMatrixRajaSparseTriplet : public hiopMatrixSparse const bool offdiag_only = false); /** - * @brief Copy the transpose of matrix 'src_gen', into 'this' as a submatrix from corner - * 'dest_row_st' and 'dest_col_st'. - * The non-zero elements start from 'dest_nnz_st' will be replaced by the new elements. - * When `offdiag_only` is set to true, only the off-diagonal part of `src_gen` is copied. - * - * @pre This function does NOT preserve the sorted row/col indices. USE WITH CAUTION! - */ + * @brief Copy the transpose of matrix 'src_gen', into 'this' as a submatrix from corner + * 'dest_row_st' and 'dest_col_st'. + * The non-zero elements start from 'dest_nnz_st' will be replaced by the new elements. + * When `offdiag_only` is set to true, only the off-diagonal part of `src_gen` is copied. + * + * @pre This function does NOT preserve the sorted row/col indices. USE WITH CAUTION! + */ virtual void copySubmatrixFromTrans(const hiopMatrix& src_gen, const index_type& dest_row_st, const index_type& dest_col_st, @@ -224,12 +220,12 @@ class hiopMatrixRajaSparseTriplet : public hiopMatrixSparse const bool offdiag_only = false); /** - * @brief Copy selected columns of a diagonal matrix (a constant 'scalar' times identity), - * into 'this' as a submatrix from corner 'dest_row_st' and 'dest_col_st' - * The non-zero elements start from 'dest_nnz_st' will be replaced by the new elements. - * @pre The diagonal entries in the destination need to be contiguous in the sparse triplet arrays of the destinations. - * @pre this function does NOT preserve the sorted row/col indices. USE WITH CAUTION! - */ + * @brief Copy selected columns of a diagonal matrix (a constant 'scalar' times identity), + * into 'this' as a submatrix from corner 'dest_row_st' and 'dest_col_st' + * The non-zero elements start from 'dest_nnz_st' will be replaced by the new elements. + * @pre The diagonal entries in the destination need to be contiguous in the sparse triplet arrays of the destinations. + * @pre this function does NOT preserve the sorted row/col indices. USE WITH CAUTION! + */ virtual void setSubmatrixToConstantDiag_w_colpattern(const double& scalar, const index_type& dest_row_st, const index_type& dest_col_st, @@ -238,12 +234,12 @@ class hiopMatrixRajaSparseTriplet : public hiopMatrixSparse const hiopVector& ix); /** - * @brief Copy selected rows of a diagonal matrix (a constant 'scalar' times identity), - * into 'this' as a submatrix from corner 'dest_row_st' and 'dest_col_st' - * The non-zero elements start from 'dest_nnz_st' will be replaced by the new elements. - * @pre The diagonal entries in the destination need to be contiguous in the sparse triplet arrays of the destinations. - * @pre this function does NOT preserve the sorted row/col indices. USE WITH CAUTION! - */ + * @brief Copy selected rows of a diagonal matrix (a constant 'scalar' times identity), + * into 'this' as a submatrix from corner 'dest_row_st' and 'dest_col_st' + * The non-zero elements start from 'dest_nnz_st' will be replaced by the new elements. + * @pre The diagonal entries in the destination need to be contiguous in the sparse triplet arrays of the destinations. + * @pre this function does NOT preserve the sorted row/col indices. USE WITH CAUTION! + */ virtual void setSubmatrixToConstantDiag_w_rowpattern(const double& scalar, const index_type& dest_row_st, const index_type& dest_col_st, @@ -252,70 +248,67 @@ class hiopMatrixRajaSparseTriplet : public hiopMatrixSparse const hiopVector& ix); /** - * @brief Copy a diagonal matrix to destination. - * This diagonal matrix is 'src_val'*identity matrix with size 'nnz_to_copy'x'nnz_to_copy'. - * The destination is updated from the start row 'row_dest_st' and start column 'col_dest_st'. - * At the destination, 'nnz_to_copy` nonzeros starting from index `dest_nnz_st` will be replased. - * @pre The diagonal entries in the destination need to be contiguous in the sparse triplet arrays of the destinations. - * @pre This function does NOT preserve the sorted row/col indices. USE WITH CAUTION! - */ + * @brief Copy a diagonal matrix to destination. + * This diagonal matrix is 'src_val'*identity matrix with size 'nnz_to_copy'x'nnz_to_copy'. + * The destination is updated from the start row 'row_dest_st' and start column 'col_dest_st'. + * At the destination, 'nnz_to_copy` nonzeros starting from index `dest_nnz_st` will be replased. + * @pre The diagonal entries in the destination need to be contiguous in the sparse triplet arrays of the destinations. + * @pre This function does NOT preserve the sorted row/col indices. USE WITH CAUTION! + */ virtual void copyDiagMatrixToSubblock(const double& src_val, const index_type& dest_row_st, const index_type& dest_col_st, const size_type& dest_nnz_st, const size_type& nnz_to_copy); - /** - * @brief same as @copyDiagMatrixToSubblock, but copies only diagonal entries specified by `pattern`. - * At the destination, 'nnz_to_copy` nonzeros starting from index `dest_nnz_st` will be replaced. - * @pre The added entries in the destination need to be contiguous in the sparse triplet arrays of the destinations. - * @pre This function does NOT preserve the sorted row/col indices. USE WITH CAUTION! - * @pre 'pattern' has same size as `dx` - * @pre 'pattern` has exactly `nnz_to_copy` nonzeros - */ + /** + * @brief same as @copyDiagMatrixToSubblock, but copies only diagonal entries specified by `pattern`. + * At the destination, 'nnz_to_copy` nonzeros starting from index `dest_nnz_st` will be replaced. + * @pre The added entries in the destination need to be contiguous in the sparse triplet arrays of the destinations. + * @pre This function does NOT preserve the sorted row/col indices. USE WITH CAUTION! + * @pre 'pattern' has same size as `dx` + * @pre 'pattern` has exactly `nnz_to_copy` nonzeros + */ virtual void copyDiagMatrixToSubblock_w_pattern(const hiopVector& dx, const index_type& dest_row_st, const index_type& dest_col_st, const size_type& dest_nnz_st, const size_type& nnz_to_copy, const hiopVector& pattern); - + virtual double max_abs_value(); - virtual void row_max_abs_value(hiopVector &ret_vec); - - virtual void scale_row(hiopVector &vec_scal, const bool inv_scale=false); + virtual void row_max_abs_value(hiopVector& ret_vec); + + virtual void scale_row(hiopVector& vec_scal, const bool inv_scale = false); virtual bool isfinite() const; - - //virtual void print(int maxRows=-1, int maxCols=-1, int rank=-1) const; - virtual void print(FILE* f=NULL, const char* msg=NULL, int maxRows=-1, int maxCols=-1, int rank=-1) const; + + // virtual void print(int maxRows=-1, int maxCols=-1, int rank=-1) const; + virtual void print(FILE* f = NULL, const char* msg = NULL, int maxRows = -1, int maxCols = -1, int rank = -1) const; virtual void startingAtAddSubDiagonalToStartingAt(int diag_src_start, - const double& alpha, + const double& alpha, hiopVector& vec_dest, int vec_start, - int num_elems=-1) const + int num_elems = -1) const { assert(0 && "This method should be used only for symmetric matrices.\n"); } - virtual void convert_to_csr_arrays(int &csr_nnz, - int **csr_kRowPtr, - int **csr_jCol, - double **csr_kVal, - int **index_covert_CSR2Triplet, - int **index_covert_extra_Diag2CSR) + virtual void convert_to_csr_arrays(int& csr_nnz, + int** csr_kRowPtr, + int** csr_jCol, + double** csr_kVal, + int** index_covert_CSR2Triplet, + int** index_covert_extra_Diag2CSR) { assert(0 && "not implemented"); } virtual bool is_diagonal() const; - virtual void extract_diagonal(hiopVector& diag_out) const - { - assert(false && "not yet implemented"); - } + virtual void extract_diagonal(hiopVector& diag_out) const { assert(false && "not yet implemented"); } virtual size_type numberOfOffDiagNonzeros() const { @@ -329,11 +322,7 @@ class hiopMatrixRajaSparseTriplet : public hiopMatrixSparse int* jJacS, double* MJacS); - virtual void set_Hess_FR(const hiopMatrixSparse& Hess, - int* iHSS, - int* jHSS, - double* MHSS, - const hiopVector& add_diag) + virtual void set_Hess_FR(const hiopMatrixSparse& Hess, int* iHSS, int* jHSS, double* MHSS, const hiopVector& add_diag) { assert(false && "not needed / implemented"); } @@ -348,7 +337,7 @@ class hiopMatrixRajaSparseTriplet : public hiopMatrixSparse inline const int* i_row() const { return iRow_; } inline const int* j_col() const { return jCol_; } inline const double* M() const { return values_; } - + inline int* i_row_host() { return iRow_host_; } inline int* j_col_host() { return jCol_host_; } inline double* M_host() { return values_host_; } @@ -361,46 +350,46 @@ class hiopMatrixRajaSparseTriplet : public hiopMatrixSparse void copyFromDev() const; #ifdef HIOP_DEEPCHECKS - virtual bool assertSymmetry(double tol=1e-16) const { return false; } + virtual bool assertSymmetry(double tol = 1e-16) const { return false; } virtual bool checkIndexesAreOrdered() const; #endif protected: mutable ExecSpace exec_space_; using MEMBACKENDHOST = typename MEMBACKEND::MemBackendHost; - //EXECPOLICYRAJA is used internally as a execution policy. EXECPOLICYHOST is not used internally - //in this class. EXECPOLICYHOST can be any host policy as long as memory allocations and - //and transfers within and from `exec_space_host_` work with EXECPOLICYHOST (currently all such - //combinations work). + // EXECPOLICYRAJA is used internally as a execution policy. EXECPOLICYHOST is not used internally + // in this class. EXECPOLICYHOST can be any host policy as long as memory allocations and + // and transfers within and from `exec_space_host_` work with EXECPOLICYHOST (currently all such + // combinations work). using EXECPOLICYHOST = hiop::ExecPolicySeq; mutable ExecSpace exec_space_host_; - int* iRow_; ///< row indices of the nonzero entries - int* jCol_; ///< column indices of the nonzero entries - double* values_; ///< values of the nonzero entries + int* iRow_; ///< row indices of the nonzero entries + int* jCol_; ///< column indices of the nonzero entries + double* values_; ///< values of the nonzero entries mutable int* iRow_host_; mutable int* jCol_host_; mutable double* values_host_; - std::string mem_space_;// = "DEVICE"; + std::string mem_space_; // = "DEVICE"; protected: struct RowStartsInfo { - index_type *idx_start_; //size num_rows+1 - index_type *idx_start_host_; //size num_rows+1 + index_type* idx_start_; // size num_rows+1 + index_type* idx_start_host_; // size num_rows+1 index_type register_row_st_; size_type num_rows_; std::string mem_space_; RowStartsInfo() - : idx_start_(nullptr), - register_row_st_{0}, - num_rows_(0) + : idx_start_(nullptr), + register_row_st_{0}, + num_rows_(0) {} RowStartsInfo(size_type n_rows, std::string memspace); virtual ~RowStartsInfo(); - + void copy_from_dev(); void copy_to_dev(); }; @@ -409,17 +398,20 @@ class hiopMatrixRajaSparseTriplet : public hiopMatrixSparse protected: RowStartsInfo* allocAndBuildRowStarts() const; - RowStartsInfo* allocRowStarts(size_type sz, std::string memspace) const - { - return new RowStartsInfo(sz, memspace); - } + RowStartsInfo* allocRowStarts(size_type sz, std::string memspace) const { return new RowStartsInfo(sz, memspace); } + private: - hiopMatrixRajaSparseTriplet() - : hiopMatrixSparse(0, 0, 0), iRow_(NULL), jCol_(NULL), values_(NULL) - { - } - hiopMatrixRajaSparseTriplet(const hiopMatrixRajaSparseTriplet&) - : hiopMatrixSparse(0, 0, 0), iRow_(NULL), jCol_(NULL), values_(NULL) + hiopMatrixRajaSparseTriplet() + : hiopMatrixSparse(0, 0, 0), + iRow_(NULL), + jCol_(NULL), + values_(NULL) + {} + hiopMatrixRajaSparseTriplet(const hiopMatrixRajaSparseTriplet&) + : hiopMatrixSparse(0, 0, 0), + iRow_(NULL), + jCol_(NULL), + values_(NULL) { assert(false); } @@ -429,18 +421,17 @@ class hiopMatrixRajaSparseTriplet : public hiopMatrixSparse template class hiopMatrixRajaSymSparseTriplet : public hiopMatrixRajaSparseTriplet { -public: +public: hiopMatrixRajaSymSparseTriplet(int n, int nnz, std::string memspace) - : hiopMatrixRajaSparseTriplet(n, n, nnz, memspace), - nnz_offdiag_{-1} - { - } - virtual ~hiopMatrixRajaSymSparseTriplet() {} + : hiopMatrixRajaSparseTriplet(n, n, nnz, memspace), + nnz_offdiag_{-1} + {} + virtual ~hiopMatrixRajaSymSparseTriplet() {} /** y = beta * y + alpha * this * x */ - virtual void timesVec(double beta, hiopVector& y, double alpha, const hiopVector& x) const; - virtual void timesVec(double beta, double* y, double alpha, const double* x) const; - + virtual void timesVec(double beta, hiopVector& y, double alpha, const hiopVector& x) const; + virtual void timesVec(double beta, double* y, double alpha, const double* x) const; + virtual void transTimesVec(double beta, hiopVector& y, double alpha, const hiopVector& x) const { return timesVec(beta, y, alpha, x); @@ -450,33 +441,31 @@ class hiopMatrixRajaSymSparseTriplet : public hiopMatrixRajaSparseTriplet=0, 'num_elems' are copied; otherwise copies as many as * are available in 'vec_dest' starting at 'vec_start' */ virtual void startingAtAddSubDiagonalToStartingAt(int diag_src_start, - const double& alpha, + const double& alpha, hiopVector& vec_dest, - int vec_start, int num_elems=-1) const; - + int vec_start, + int num_elems = -1) const; virtual hiopMatrixSparse* alloc_clone() const; virtual hiopMatrixSparse* new_copy() const; #ifdef HIOP_DEEPCHECKS - virtual bool assertSymmetry(double tol=1e-16) const { return true; } + virtual bool assertSymmetry(double tol = 1e-16) const { return true; } #endif - virtual void extract_diagonal(hiopVector& diag_out) const - { - assert(false && "not yet implemented"); - } + virtual void extract_diagonal(hiopVector& diag_out) const { assert(false && "not yet implemented"); } virtual size_type numberOfOffDiagNonzeros() const; @@ -486,16 +475,12 @@ class hiopMatrixRajaSymSparseTriplet : public hiopMatrixRajaSparseTriplet::hiop_raja_ex using hiop_raja_reduce = ExecRajaPoliciesBackend::hiop_raja_reduce; using matrix_exec = ExecRajaPoliciesBackend::matrix_exec; using hiop_raja_atomic = ExecRajaPoliciesBackend::hiop_raja_atomic; -} +} // namespace hiop #include "hiopMatrixRajaSparseTripletImpl.hpp" @@ -74,10 +74,10 @@ namespace hiop { // -//Explicit instantiations: force compilation +// Explicit instantiations: force compilation // template class hiopMatrixRajaSparseTriplet; template class hiopMatrixRajaSparseTriplet; template class hiopMatrixRajaSymSparseTriplet; template class hiopMatrixRajaSymSparseTriplet; -} +} // namespace hiop diff --git a/src/LinAlg/hiopMatrixRajaSparseTripletHip.cpp b/src/LinAlg/hiopMatrixRajaSparseTripletHip.cpp index 4d52dd861..2b67e2185 100644 --- a/src/LinAlg/hiopMatrixRajaSparseTripletHip.cpp +++ b/src/LinAlg/hiopMatrixRajaSparseTripletHip.cpp @@ -3,47 +3,47 @@ // Written by Cosmin G. Petra, petra1@llnl.gov. // LLNL-CODE-742473. All rights reserved. // -// This file is part of HiOp. For details, see https://github.com/LLNL/hiop. HiOp -// is released under the BSD 3-clause license (https://opensource.org/licenses/BSD-3-Clause). +// This file is part of HiOp. For details, see https://github.com/LLNL/hiop. HiOp +// is released under the BSD 3-clause license (https://opensource.org/licenses/BSD-3-Clause). // Please also read “Additional BSD Notice” below. // -// Redistribution and use in source and binary forms, with or without modification, +// Redistribution and use in source and binary forms, with or without modification, // are permitted provided that the following conditions are met: -// i. Redistributions of source code must retain the above copyright notice, this list +// i. Redistributions of source code must retain the above copyright notice, this list // of conditions and the disclaimer below. -// ii. Redistributions in binary form must reproduce the above copyright notice, -// this list of conditions and the disclaimer (as noted below) in the documentation and/or +// ii. Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the disclaimer (as noted below) in the documentation and/or // other materials provided with the distribution. -// iii. Neither the name of the LLNS/LLNL nor the names of its contributors may be used to -// endorse or promote products derived from this software without specific prior written +// iii. Neither the name of the LLNS/LLNL nor the names of its contributors may be used to +// endorse or promote products derived from this software without specific prior written // permission. // -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES -// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT -// SHALL LAWRENCE LIVERMORE NATIONAL SECURITY, LLC, THE U.S. DEPARTMENT OF ENERGY OR -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS -// OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED -// AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT +// SHALL LAWRENCE LIVERMORE NATIONAL SECURITY, LLC, THE U.S. DEPARTMENT OF ENERGY OR +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +// OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED +// AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, // EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Additional BSD Notice -// 1. This notice is required to be provided under our contract with the U.S. Department -// of Energy (DOE). This work was produced at Lawrence Livermore National Laboratory under +// 1. This notice is required to be provided under our contract with the U.S. Department +// of Energy (DOE). This work was produced at Lawrence Livermore National Laboratory under // Contract No. DE-AC52-07NA27344 with the DOE. -// 2. Neither the United States Government nor Lawrence Livermore National Security, LLC -// nor any of their employees, makes any warranty, express or implied, or assumes any -// liability or responsibility for the accuracy, completeness, or usefulness of any +// 2. Neither the United States Government nor Lawrence Livermore National Security, LLC +// nor any of their employees, makes any warranty, express or implied, or assumes any +// liability or responsibility for the accuracy, completeness, or usefulness of any // information, apparatus, product, or process disclosed, or represents that its use would // not infringe privately-owned rights. -// 3. Also, reference herein to any specific commercial products, process, or services by -// trade name, trademark, manufacturer or otherwise does not necessarily constitute or -// imply its endorsement, recommendation, or favoring by the United States Government or -// Lawrence Livermore National Security, LLC. The views and opinions of authors expressed -// herein do not necessarily state or reflect those of the United States Government or -// Lawrence Livermore National Security, LLC, and shall not be used for advertising or +// 3. Also, reference herein to any specific commercial products, process, or services by +// trade name, trademark, manufacturer or otherwise does not necessarily constitute or +// imply its endorsement, recommendation, or favoring by the United States Government or +// Lawrence Livermore National Security, LLC. The views and opinions of authors expressed +// herein do not necessarily state or reflect those of the United States Government or +// Lawrence Livermore National Security, LLC, and shall not be used for advertising or // product endorsement purposes. /** @@ -66,7 +66,7 @@ using hiop_raja_exec = ExecRajaPoliciesBackend::hiop_raja_exe using hiop_raja_reduce = ExecRajaPoliciesBackend::hiop_raja_reduce; using matrix_exec = ExecRajaPoliciesBackend::matrix_exec; using hiop_raja_atomic = ExecRajaPoliciesBackend::hiop_raja_atomic; -} +} // namespace hiop #include "hiopMatrixRajaSparseTripletImpl.hpp" @@ -74,10 +74,10 @@ namespace hiop { // -//Explicit instantiations: force compilation +// Explicit instantiations: force compilation // template class hiopMatrixRajaSparseTriplet; template class hiopMatrixRajaSparseTriplet; template class hiopMatrixRajaSymSparseTriplet; template class hiopMatrixRajaSymSparseTriplet; -} +} // namespace hiop diff --git a/src/LinAlg/hiopMatrixRajaSparseTripletImpl.hpp b/src/LinAlg/hiopMatrixRajaSparseTripletImpl.hpp index 844858e42..01fe323b7 100644 --- a/src/LinAlg/hiopMatrixRajaSparseTripletImpl.hpp +++ b/src/LinAlg/hiopMatrixRajaSparseTripletImpl.hpp @@ -55,14 +55,14 @@ * @author Cameron Rutherford , PNNL * @author Jake K. Ryan , PNNL * -*/ + */ #include "hiopMatrixRajaSparseTriplet.hpp" #include "hiopVectorRaja.hpp" #include "hiop_blasdefs.hpp" -#include //for std::min -#include //for std::isfinite +#include //for std::min +#include //for std::isfinite #include #include @@ -70,21 +70,19 @@ namespace hiop { - + /// @brief Constructs a hiopMatrixRajaSparseTriplet with the given dimensions and memory space template -hiopMatrixRajaSparseTriplet:: -hiopMatrixRajaSparseTriplet(int rows, - int cols, - int _nnz, - std::string memspace) - : hiopMatrixSparse(rows, cols, _nnz), - mem_space_(memspace), - row_starts_(nullptr) +hiopMatrixRajaSparseTriplet::hiopMatrixRajaSparseTriplet(int rows, + int cols, + int _nnz, + std::string memspace) + : hiopMatrixSparse(rows, cols, _nnz), + mem_space_(memspace), + row_starts_(nullptr) { - if(rows==0 || cols==0) - { - assert(nnz_==0 && "number of nonzeros must be zero when any of the dimensions are 0"); + if(rows == 0 || cols == 0) { + assert(nnz_ == 0 && "number of nonzeros must be zero when any of the dimensions are 0"); nnz_ = 0; } @@ -92,7 +90,7 @@ hiopMatrixRajaSparseTriplet(int rows, mem_space_ = "HOST"; #endif - //printf("Memory space: %s\n", mem_space_.c_str()); + // printf("Memory space: %s\n", mem_space_.c_str()); auto& resmgr = umpire::ResourceManager::getInstance(); umpire::Allocator devAlloc = resmgr.getAllocator(mem_space_); @@ -103,14 +101,11 @@ hiopMatrixRajaSparseTriplet(int rows, values_ = static_cast(devAlloc.allocate(nnz_ * sizeof(double))); // create host mirror if memory space is on the device - if (mem_space_ == "DEVICE") - { + if(mem_space_ == "DEVICE") { iRow_host_ = static_cast(hostAlloc.allocate(nnz_ * sizeof(int))); jCol_host_ = static_cast(hostAlloc.allocate(nnz_ * sizeof(int))); values_host_ = static_cast(hostAlloc.allocate(nnz_ * sizeof(double))); - } - else - { + } else { iRow_host_ = iRow_; jCol_host_ = jCol_; values_host_ = values_; @@ -131,8 +126,7 @@ hiopMatrixRajaSparseTriplet::~hiopMatrixRajaSparseTripl devAlloc.deallocate(values_); // deallocate host mirror if memory space is on device - if (mem_space_ == "DEVICE") - { + if(mem_space_ == "DEVICE") { hostAlloc.deallocate(iRow_host_); hostAlloc.deallocate(jCol_host_); hostAlloc.deallocate(values_host_); @@ -150,7 +144,7 @@ void hiopMatrixRajaSparseTriplet::setToZero() /** * @brief Sets all the values of this matrix to some constant. - * + * * @param c A real number. */ template @@ -158,34 +152,29 @@ void hiopMatrixRajaSparseTriplet::setToConstant(double { double* dd = this->values_; auto nz = nnz_; - RAJA::forall(RAJA::RangeSegment(0, nz), - RAJA_LAMBDA(RAJA::Index_type i) - { - dd[i] = c; - }); + RAJA::forall(RAJA::RangeSegment(0, nz), RAJA_LAMBDA(RAJA::Index_type i) { dd[i] = c; }); } /** * @brief Multiplies this matrix by a vector and stores it in an output vector. - * + * * @param beta Amount to scale the output vector by before adding to it. * @param y The output vector. * @param alpha The amount to scale this matrix by before multiplying. * @param x The vector by which to multiply this matrix. - * + * * @pre _x_'s length must equal the number of columns in this matrix. * @pre _y_'s length must equal the number of rows in this matrix. * @post _y_ will contain the output of the following equation: - * + * * The full operation performed is: * _y_ = _beta_ * _y_ + _alpha_ * this * _x_ */ template -void hiopMatrixRajaSparseTriplet:: -timesVec(double beta, - hiopVector& y, - double alpha, - const hiopVector& x) const +void hiopMatrixRajaSparseTriplet::timesVec(double beta, + hiopVector& y, + double alpha, + const hiopVector& x) const { assert(x.get_size() == ncols_); assert(y.get_size() == nrows_); @@ -198,26 +187,21 @@ timesVec(double beta, timesVec(beta, y_data, alpha, x_data); } - + /** * @brief Multiplies this matrix by a vector and stores it in an output vector. - * + * * @see above timesVec function for more detail. This overload takes raw data * pointers rather than hiop constructs. */ template -void hiopMatrixRajaSparseTriplet:: -timesVec(double beta, - double* y, - double alpha, - const double* x) const +void hiopMatrixRajaSparseTriplet::timesVec(double beta, + double* y, + double alpha, + const double* x) const { // y = beta * y - RAJA::forall(RAJA::RangeSegment(0, nrows_), - RAJA_LAMBDA(RAJA::Index_type i) - { - y[i] *= beta; - }); + RAJA::forall(RAJA::RangeSegment(0, nrows_), RAJA_LAMBDA(RAJA::Index_type i) { y[i] *= beta; }); // nrs and ncs are used in assert statements only #ifndef NDEBUG @@ -230,68 +214,62 @@ timesVec(double beta, auto vls = values_; // atomic is needed to prevent data race from ocurring; // y[jCol_[i]] can be referenced by multiple threads concurrently - RAJA::forall(RAJA::RangeSegment(0, nnz_), - RAJA_LAMBDA(RAJA::Index_type i) - { - assert(irw[i] < nrs); - assert(jcl[i] < ncs); - RAJA::AtomicRef yy(&y[irw[i]]); - yy += alpha * x[jcl[i]] * vls[i]; - }); + RAJA::forall( + RAJA::RangeSegment(0, nnz_), + RAJA_LAMBDA(RAJA::Index_type i) { + assert(irw[i] < nrs); + assert(jcl[i] < ncs); + RAJA::AtomicRef yy(&y[irw[i]]); + yy += alpha * x[jcl[i]] * vls[i]; + }); } /** - * @brief Multiplies the transpose of this matrix by a vector and stores it + * @brief Multiplies the transpose of this matrix by a vector and stores it * in an output vector. - * + * * @see above timesVec function for more detail. This function implicitly transposes * this matrix for the multiplication. - * + * * The full operation performed is: * y = beta * y + alpha * this^T * x */ template -void hiopMatrixRajaSparseTriplet:: -transTimesVec(double beta, - hiopVector& y, - double alpha, - const hiopVector& x) const +void hiopMatrixRajaSparseTriplet::transTimesVec(double beta, + hiopVector& y, + double alpha, + const hiopVector& x) const { assert(x.get_size() == nrows_); assert(y.get_size() == ncols_); auto& yy = dynamic_cast&>(y); const auto& xx = dynamic_cast&>(x); - + double* y_data = yy.local_data(); const double* x_data = xx.local_data_const(); - + transTimesVec(beta, y_data, alpha, x_data); } - + /** - * @brief Multiplies the transpose of this matrix by a vector and stores it + * @brief Multiplies the transpose of this matrix by a vector and stores it * in an output vector. - * + * * @see above transTimesVec function for more detail. This overload takes raw data * pointers rather than hiop constructs. - * + * * The full operation performed is: * y = beta * y + alpha * this^T * x */ template -void hiopMatrixRajaSparseTriplet:: -transTimesVec(double beta, - double* y, - double alpha, - const double* x ) const +void hiopMatrixRajaSparseTriplet::transTimesVec(double beta, + double* y, + double alpha, + const double* x) const { - RAJA::forall(RAJA::RangeSegment(0, ncols_), - RAJA_LAMBDA(RAJA::Index_type i) - { - y[i] *= beta; - }); - + RAJA::forall(RAJA::RangeSegment(0, ncols_), RAJA_LAMBDA(RAJA::Index_type i) { y[i] *= beta; }); + // num_rows and num_columns are used in assert statements only #ifndef NDEBUG int num_rows = nrows_; @@ -303,32 +281,30 @@ transTimesVec(double beta, double* values = values_; // atomic is needed to prevent data race from ocurring; // y[jCol_[i]] can be referenced by multiple threads concurrently - RAJA::forall(RAJA::RangeSegment(0, nnz_), - RAJA_LAMBDA(RAJA::Index_type i) - { - assert(iRow[i] < num_rows); - assert(jCol[i] < num_cols); - RAJA::AtomicRef yy(&y[jCol[i]]); - yy += alpha * x[iRow[i]] * values[i]; - }); + RAJA::forall( + RAJA::RangeSegment(0, nnz_), + RAJA_LAMBDA(RAJA::Index_type i) { + assert(iRow[i] < num_rows); + assert(jCol[i] < num_cols); + RAJA::AtomicRef yy(&y[jCol[i]]); + yy += alpha * x[iRow[i]] * values[i]; + }); } template -void hiopMatrixRajaSparseTriplet:: -timesMat(double beta, - hiopMatrix& W, - double alpha, - const hiopMatrix& X) const +void hiopMatrixRajaSparseTriplet::timesMat(double beta, + hiopMatrix& W, + double alpha, + const hiopMatrix& X) const { assert(false && "not needed"); } template -void hiopMatrixRajaSparseTriplet:: -transTimesMat(double beta, - hiopMatrix& W, - double alpha, - const hiopMatrix& X) const +void hiopMatrixRajaSparseTriplet::transTimesMat(double beta, + hiopMatrix& W, + double alpha, + const hiopMatrix& X) const { assert(false && "not needed"); } @@ -338,35 +314,35 @@ transTimesMat(double beta, * Sizes: M1(this) is (m1 x nx) and M2 is (m2, nx). */ template -void hiopMatrixRajaSparseTriplet:: -timesMatTrans(double beta, hiopMatrix& Wmat, double alpha, const hiopMatrix& M2mat) const +void hiopMatrixRajaSparseTriplet::timesMatTrans(double beta, + hiopMatrix& Wmat, + double alpha, + const hiopMatrix& M2mat) const { auto& W = dynamic_cast(Wmat); const auto& M2 = dynamic_cast(M2mat); - + const int m1 = nrows_; const int m2 = M2.nrows_; assert(ncols_ == M2.ncols_); - assert(m1==W.m()); - assert(m2==W.n()); + assert(m1 == W.m()); + assert(m2 == W.n()); - //double** WM = W.get_M(); + // double** WM = W.get_M(); RAJA::View> WM(W.local_data(), W.m(), W.n()); // TODO: allocAndBuildRowStarts -> should create row_starts_ internally (name='prepareRowStarts' ?) - if(this->row_starts_ == nullptr) - this->row_starts_ = this->allocAndBuildRowStarts(); + if(this->row_starts_ == nullptr) this->row_starts_ = this->allocAndBuildRowStarts(); assert(this->row_starts_); - if(M2.row_starts_==NULL) - M2.row_starts_ = M2.allocAndBuildRowStarts(); + if(M2.row_starts_ == NULL) M2.row_starts_ = M2.allocAndBuildRowStarts(); assert(M2.row_starts_); // M1nnz and M2nnz are used in assert statements only #ifndef NDEBUG int M1nnz = this->nnz_; - int M2nnz = M2.nnz_; + int M2nnz = M2.nnz_; #endif index_type* M1_idx_start = this->row_starts_->idx_start_; @@ -377,58 +353,50 @@ timesMatTrans(double beta, hiopMatrix& Wmat, double alpha, const hiopMatrix& M2m double* M1values = this->values_; double* M2values = M2.values_; - RAJA::forall(RAJA::RangeSegment(0, m1), - RAJA_LAMBDA(RAJA::Index_type i) - { - for(int j=0; j( + RAJA::RangeSegment(0, m1), + RAJA_LAMBDA(RAJA::Index_type i) { + for(int j = 0; j < m2; j++) { + // dest[i,j] = weigthed_dotprod(M1_row_i,M2_row_j) + double acc = 0.; + index_type ki = M1_idx_start[i]; + index_type kj = M2_idx_start[j]; + + while(ki < M1_idx_start[i + 1] && kj < M2_idx_start[j + 1]) { + assert(ki < M1nnz); + assert(kj < M2nnz); + + if(M1jCol[ki] == M2jCol[kj]) { + acc += M1values[ki] * M2values[kj]; + ki++; + kj++; + } else if(M1jCol[ki] < M2jCol[kj]) { + ki++; + } else { + kj++; + } + } // end of while(ki... && kj...) + WM(i, j) = beta * WM(i, j) + alpha * acc; + } // end j + }); } template -void hiopMatrixRajaSparseTriplet:: -addDiagonal(const double& alpha, const hiopVector& d_) +void hiopMatrixRajaSparseTriplet::addDiagonal(const double& alpha, const hiopVector& d_) { assert(false && "not needed"); } template -void hiopMatrixRajaSparseTriplet:: -addDiagonal(const double& value) +void hiopMatrixRajaSparseTriplet::addDiagonal(const double& value) { assert(false && "not needed"); } template -void hiopMatrixRajaSparseTriplet:: -addSubDiagonal(const double& alpha, index_type start, const hiopVector& d_) +void hiopMatrixRajaSparseTriplet::addSubDiagonal(const double& alpha, + index_type start, + const hiopVector& d_) { assert(false && "not needed"); } @@ -436,16 +404,15 @@ addSubDiagonal(const double& alpha, index_type start, const hiopVector& d_) /// @brief: set a subdiagonal block, whose diagonal values come from the input vector `vec_d` /// @pre This function does NOT preserve the sorted row/col indices. USE WITH CAUTION! template -void hiopMatrixRajaSparseTriplet:: -copySubDiagonalFrom(const index_type& start_on_dest_diag, - const size_type& num_elems, - const hiopVector& vec_d, - const index_type& start_on_nnz_idx, - double scal) +void hiopMatrixRajaSparseTriplet::copySubDiagonalFrom(const index_type& start_on_dest_diag, + const size_type& num_elems, + const hiopVector& vec_d, + const index_type& start_on_nnz_idx, + double scal) { const auto& vd = dynamic_cast&>(vec_d); - assert(num_elems<=vd.get_size()); - assert(start_on_dest_diag>=0 && start_on_dest_diag+num_elems<=this->nrows_); + assert(num_elems <= vd.get_size()); + assert(start_on_dest_diag >= 0 && start_on_dest_diag + num_elems <= this->nrows_); const double* v = vd.local_data_const(); // local copy for RAJA access @@ -454,27 +421,24 @@ copySubDiagonalFrom(const index_type& start_on_dest_diag, double* values = values_; RAJA::forall( - RAJA::RangeSegment(0, num_elems), - RAJA_LAMBDA(RAJA::Index_type row_src) - { - const index_type row_dest = row_src + start_on_dest_diag; - const index_type nnz_dest = row_src + start_on_nnz_idx; - iRow[nnz_dest] = jCol[nnz_dest] = row_dest; - values[nnz_dest] = scal*v[row_src]; - } - ); + RAJA::RangeSegment(0, num_elems), + RAJA_LAMBDA(RAJA::Index_type row_src) { + const index_type row_dest = row_src + start_on_dest_diag; + const index_type nnz_dest = row_src + start_on_nnz_idx; + iRow[nnz_dest] = jCol[nnz_dest] = row_dest; + values[nnz_dest] = scal * v[row_src]; + }); } /// @brief: set a subdiagonal block, whose diagonal values are set to `c` /// @pre This function does NOT preserve the sorted row/col indices. USE WITH CAUTION!! template -void hiopMatrixRajaSparseTriplet:: -setSubDiagonalTo(const index_type& start_on_dest_diag, - const size_type& num_elems, - const double& c, - const index_type& start_on_nnz_idx) +void hiopMatrixRajaSparseTriplet::setSubDiagonalTo(const index_type& start_on_dest_diag, + const size_type& num_elems, + const double& c, + const index_type& start_on_nnz_idx) { - assert(start_on_dest_diag>=0 && start_on_dest_diag+num_elems<=this->nrows_); + assert(start_on_dest_diag >= 0 && start_on_dest_diag + num_elems <= this->nrows_); // local copy for RAJA access index_type* iRow = iRow_; @@ -482,64 +446,60 @@ setSubDiagonalTo(const index_type& start_on_dest_diag, double* values = values_; RAJA::forall( - RAJA::RangeSegment(0, num_elems), - RAJA_LAMBDA(RAJA::Index_type row_src) - { - const index_type row_dest = row_src + start_on_dest_diag; - const index_type nnz_dest = row_src + start_on_nnz_idx; - iRow[nnz_dest] = row_dest; - jCol[nnz_dest] = row_dest; - values[nnz_dest] = c; - } - ); + RAJA::RangeSegment(0, num_elems), + RAJA_LAMBDA(RAJA::Index_type row_src) { + const index_type row_dest = row_src + start_on_dest_diag; + const index_type nnz_dest = row_src + start_on_nnz_idx; + iRow[nnz_dest] = row_dest; + jCol[nnz_dest] = row_dest; + values[nnz_dest] = c; + }); } template -void hiopMatrixRajaSparseTriplet:: -addMatrix(double alpha, const hiopMatrix& X) +void hiopMatrixRajaSparseTriplet::addMatrix(double alpha, const hiopMatrix& X) { assert(false && "not needed"); } /** * @brief Adds the transpose of this matrix to a block within a dense matrix. - * + * * @todo Test this function * @todo Better document this function - * - * block of W += alpha*transpose(this) + * + * block of W += alpha*transpose(this) * Note W; contains only the upper triangular entries */ template -void hiopMatrixRajaSparseTriplet:: -transAddToSymDenseMatrixUpperTriangle(int row_start, - int col_start, - double alpha, - hiopMatrixDense& W) const +void hiopMatrixRajaSparseTriplet::transAddToSymDenseMatrixUpperTriangle(int row_start, + int col_start, + double alpha, + hiopMatrixDense& W) const { auto Wm = W.m(); auto Wn = W.n(); - assert(row_start>=0 && row_start+ncols_<=Wm); - assert(col_start>=0 && col_start+nrows_<=Wn); - assert(Wn==Wm); + assert(row_start >= 0 && row_start + ncols_ <= Wm); + assert(col_start >= 0 && col_start + nrows_ <= Wn); + assert(Wn == Wm); RAJA::View> WM(W.local_data(), W.m(), W.n()); int* iRow = iRow_; int* jCol = jCol_; double* values = values_; - RAJA::forall(RAJA::RangeSegment(0, nnz_), - RAJA_LAMBDA(RAJA::Index_type it) - { - const int i = jCol[it] + row_start; - const int j = iRow[it] + col_start; + RAJA::forall( + RAJA::RangeSegment(0, nnz_), + RAJA_LAMBDA(RAJA::Index_type it) { + const int i = jCol[it] + row_start; + const int j = iRow[it] + col_start; #ifdef HIOP_DEEPCHECKS - assert(i < Wm && j < Wn); - assert(i>=0 && j>=0); - assert(i<=j && "source entries need to map inside the upper triangular part of destination"); + assert(i < Wm && j < Wn); + assert(i >= 0 && j >= 0); + assert(i <= j && "source entries need to map inside the upper triangular part of destination"); #endif - WM(i, j) += alpha * values[it]; - }); + WM(i, j) += alpha * values[it]; + }); } /** @@ -550,11 +510,7 @@ double hiopMatrixRajaSparseTriplet::max_abs_value() { double* values = values_; RAJA::ReduceMax norm(0.0); - RAJA::forall(RAJA::RangeSegment(0, nnz_), - RAJA_LAMBDA(RAJA::Index_type i) - { - norm.max(fabs(values[i])); - }); + RAJA::forall(RAJA::RangeSegment(0, nnz_), RAJA_LAMBDA(RAJA::Index_type i) { norm.max(fabs(values[i])); }); double maxv = static_cast(norm.get()); return maxv; } @@ -576,12 +532,12 @@ void hiopMatrixRajaSparseTriplet::row_max_abs_value(hio ret_vec.setToZero(); if(0 == nrows_) { return; - } - + } + auto& vec = dynamic_cast&>(ret_vec); double* vd = vec.local_data(); - if(row_starts_==NULL) { + if(row_starts_ == NULL) { row_starts_ = allocAndBuildRowStarts(); } assert(row_starts_); @@ -591,42 +547,37 @@ void hiopMatrixRajaSparseTriplet::row_max_abs_value(hio double* values = values_; RAJA::forall( - RAJA::RangeSegment(0, num_rows), - RAJA_LAMBDA(RAJA::Index_type row_id) - { - for(index_type itnz=idx_start[row_id]; itnz abs_val) ? vd[row_id] : abs_val; - } - } - ); + RAJA::RangeSegment(0, num_rows), + RAJA_LAMBDA(RAJA::Index_type row_id) { + for(index_type itnz = idx_start[row_id]; itnz < idx_start[row_id + 1]; itnz++) { + double abs_val = fabs(values[itnz]); + vd[row_id] = (vd[row_id] > abs_val) ? vd[row_id] : abs_val; + } + }); } template -void hiopMatrixRajaSparseTriplet:: -scale_row(hiopVector &vec_scal, const bool inv_scale) +void hiopMatrixRajaSparseTriplet::scale_row(hiopVector& vec_scal, const bool inv_scale) { assert(vec_scal.get_size() == nrows_); - + auto& vec = dynamic_cast&>(vec_scal); double* vd = vec.local_data(); auto iRow = this->iRow_; auto values = this->values_; - + RAJA::forall( - RAJA::RangeSegment(0, nnz_), - RAJA_LAMBDA(RAJA::Index_type itnz) - { - double scal; - if(inv_scale) { - scal = 1./vd[iRow[itnz]]; - } else { - scal = vd[iRow[itnz]]; - } - values[itnz] *= scal; - } - ); + RAJA::RangeSegment(0, nnz_), + RAJA_LAMBDA(RAJA::Index_type itnz) { + double scal; + if(inv_scale) { + scal = 1. / vd[iRow[itnz]]; + } else { + scal = vd[iRow[itnz]]; + } + values[itnz] *= scal; + }); } /** @@ -640,12 +591,11 @@ bool hiopMatrixRajaSparseTriplet::isfinite() const #endif double* values = values_; RAJA::ReduceSum any(0); - RAJA::forall(RAJA::RangeSegment(0, nnz_), - RAJA_LAMBDA(RAJA::Index_type i) - { - if (!std::isfinite(values[i])) - any += 1; - }); + RAJA::forall( + RAJA::RangeSegment(0, nnz_), + RAJA_LAMBDA(RAJA::Index_type i) { + if(!std::isfinite(values[i])) any += 1; + }); return any.get() == 0; } @@ -669,10 +619,7 @@ hiopMatrixSparse* hiopMatrixRajaSparseTriplet::new_copy #ifdef HIOP_DEEPCHECKS assert(this->checkIndexesAreOrdered()); #endif - hiopMatrixRajaSparseTriplet* copy = new hiopMatrixRajaSparseTriplet(nrows_, - ncols_, - nnz_, - mem_space_); + hiopMatrixRajaSparseTriplet* copy = new hiopMatrixRajaSparseTriplet(nrows_, ncols_, nnz_, mem_space_); auto& resmgr = umpire::ResourceManager::getInstance(); resmgr.copy(copy->iRow_, iRow_); resmgr.copy(copy->jCol_, jCol_); @@ -684,8 +631,7 @@ hiopMatrixSparse* hiopMatrixRajaSparseTriplet::new_copy } template -void hiopMatrixRajaSparseTriplet:: -copyFrom(const hiopMatrixSparse& dm) +void hiopMatrixRajaSparseTriplet::copyFrom(const hiopMatrixSparse& dm) { assert(false && "this is to be implemented - method def too vague for now"); } @@ -693,8 +639,7 @@ copyFrom(const hiopMatrixSparse& dm) /// @brief copy to 3 arrays. /// @pre these 3 arrays are not nullptr template -void hiopMatrixRajaSparseTriplet:: -copy_to(int* irow, int* jcol, double* val) +void hiopMatrixRajaSparseTriplet::copy_to(int* irow, int* jcol, double* val) { assert(irow && jcol && val); auto& resmgr = umpire::ResourceManager::getInstance(); @@ -704,34 +649,31 @@ copy_to(int* irow, int* jcol, double* val) } template -void hiopMatrixRajaSparseTriplet:: -copy_to(hiopMatrixDense& W) +void hiopMatrixRajaSparseTriplet::copy_to(hiopMatrixDense& W) { assert(W.m() == nrows_); assert(W.n() == ncols_); W.setToZero(); - + RAJA::View> WM(W.local_data(), W.m(), W.n()); - + size_type nnz = this->nnz_; size_type nrows = this->nrows_; size_type ncols = this->ncols_; index_type* jCol = jCol_; index_type* iRow = iRow_; double* values = values_; - + // atomic is needed to prevent data race from ocurring; RAJA::forall( - RAJA::RangeSegment(0, nnz), - RAJA_LAMBDA(RAJA::Index_type i) - { - assert(iRow[i] < nrows); - assert(jCol[i] < ncols); - - RAJA::AtomicRef yy(&WM(iRow[i], jCol[i])); - yy += values[i]; - } - ); + RAJA::RangeSegment(0, nnz), + RAJA_LAMBDA(RAJA::Index_type i) { + assert(iRow[i] < nrows); + assert(jCol[i] < ncols); + + RAJA::AtomicRef yy(&WM(iRow[i], jCol[i])); + yy += values[i]; + }); } #ifdef HIOP_DEEPCHECKS @@ -740,16 +682,12 @@ template bool hiopMatrixRajaSparseTriplet::checkIndexesAreOrdered() const { copyFromDev(); - if(nnz_==0) - return true; - for(int i=1; i::checkIndexesAreOrdere /** * @brief This function cannot be described briefly. See below for more detail. - * + * * @param rowAndCol_dest_start Starting row & col within _W_ to be added to * in the operation. * @param alpha Amount to scale this matrix's values by in the operation. @@ -765,23 +703,23 @@ bool hiopMatrixRajaSparseTriplet::checkIndexesAreOrdere * matrix's values in the operation. * @param W The output matrix, a block of which's values will be added to in * the operation. - * + * * @pre rowAndCol_dest_start >= 0 * @pre rowAndCol_dest_start + this->nrows_ <= W.m() * @pre rowAndCol_dest_start + this->nrows_ <= W.n() * @pre D.get_size() == this->ncols_ - * + * * @post A this->nrows_^2 block will be written to in _W_, containing the output - * of the operation. - * + * of the operation. + * * The full operation performed is: * diag block of _W_ += _alpha_ * this * _D_^{-1} * transpose(this) */ template -void hiopMatrixRajaSparseTriplet:: -addMDinvMtransToDiagBlockOfSymDeMatUTri(int rowAndCol_dest_start, - const double& alpha, - const hiopVector& D, hiopMatrixDense& W) const +void hiopMatrixRajaSparseTriplet::addMDinvMtransToDiagBlockOfSymDeMatUTri(int rowAndCol_dest_start, + const double& alpha, + const hiopVector& D, + hiopMatrixDense& W) const { const int row_dest_start = rowAndCol_dest_start, col_dest_start = rowAndCol_dest_start; @@ -790,66 +728,59 @@ addMDinvMtransToDiagBlockOfSymDeMatUTri(int rowAndCol_dest_start, int nnz = this->nnz_; #endif - assert(row_dest_start >= 0 && row_dest_start+nrows_ <= W.m()); - assert(col_dest_start >= 0 && col_dest_start+nrows_ <= W.n()); + assert(row_dest_start >= 0 && row_dest_start + nrows_ <= W.m()); + assert(col_dest_start >= 0 && col_dest_start + nrows_ <= W.n()); assert(D.get_size() == ncols_); RAJA::View> WM(W.local_data(), W.m(), W.n()); const double* DM = D.local_data_const(); - - if(row_starts_==NULL) - row_starts_ = allocAndBuildRowStarts(); + + if(row_starts_ == NULL) row_starts_ = allocAndBuildRowStarts(); assert(row_starts_); int nrows = this->nrows_; index_type* idx_start = row_starts_->idx_start_; int* jCol = jCol_; double* values = values_; - RAJA::forall(RAJA::RangeSegment(0, nrows), - RAJA_LAMBDA(RAJA::Index_type i) - { - //j==i - double acc = 0.; - for(index_type k=idx_start[i]; ki - for(int j = i+1; j < nrows; j++) - { - //dest[i,j] = weigthed_dotprod(this_row_i,this_row_j) - acc = 0.; - - index_type ki = idx_start[i]; - index_type kj = idx_start[j]; - while(ki < idx_start[i+1] && kj < idx_start[j+1]) - { - assert(ki < nnz); - assert(kj < nnz); - if(jCol[ki] == jCol[kj]) - { - acc += values[ki] / DM[jCol[ki]] * values[kj]; - ki++; - kj++; - } - else - { - if(jCol[ki] < jCol[kj]) + RAJA::forall( + RAJA::RangeSegment(0, nrows), + RAJA_LAMBDA(RAJA::Index_type i) { + // j==i + double acc = 0.; + for(index_type k = idx_start[i]; k < idx_start[i + 1]; k++) { + acc += values[k] / DM[jCol[k]] * values[k]; + } + WM(i + row_dest_start, i + col_dest_start) += alpha * acc; + + // j>i + for(int j = i + 1; j < nrows; j++) { + // dest[i,j] = weigthed_dotprod(this_row_i,this_row_j) + acc = 0.; + + index_type ki = idx_start[i]; + index_type kj = idx_start[j]; + while(ki < idx_start[i + 1] && kj < idx_start[j + 1]) { + assert(ki < nnz); + assert(kj < nnz); + if(jCol[ki] == jCol[kj]) { + acc += values[ki] / DM[jCol[ki]] * values[kj]; ki++; - else kj++; - } - } //end of loop over ki and kj + } else { + if(jCol[ki] < jCol[kj]) + ki++; + else + kj++; + } + } // end of loop over ki and kj - WM(i + row_dest_start, j + col_dest_start) += alpha*acc; - } //end j - }); + WM(i + row_dest_start, j + col_dest_start) += alpha * acc; + } // end j + }); } /** * @brief This function cannot be described briefly. See below for more detail. - * + * * @param row_dest_start Starting row in destination block. * @param col_dest_start Starting col in destination block. * @param alpha Amount to scale this matrix by during the operation. @@ -857,51 +788,48 @@ addMDinvMtransToDiagBlockOfSymDeMatUTri(int rowAndCol_dest_start, * matrix's values in the operation. * @param M2mat Another sparse matrix, the transpose of which will be multiplied in * the following operation. - * @param W A dense matrix, a block in which will be used to store the result of + * @param W A dense matrix, a block in which will be used to store the result of * the operation. - * + * * @pre this->ncols_ == M2mat.ncols_ * @pre D.get_size() == this->ncols_ - * @pre row_dest_start >= 0 + * @pre row_dest_start >= 0 * @pre row_dest_start + this->nrows_ <= W.m() * @pre col_dest_start >= 0 * @pre col_dest_start + M2mat.nrows_ <= W.n() - * + * * The full operation performed is: * block of _W_ += _alpha_ * this * _D_^{-1} * transpose(_M2mat_) * Sizes: M1 is (m1 x nx); D is vector of len nx, M2 is (m2, nx). */ template -void hiopMatrixRajaSparseTriplet:: -addMDinvNtransToSymDeMatUTri(int row_dest_start, - int col_dest_start, - const double& alpha, - const hiopVector& D, - const hiopMatrixSparse& M2mat, - hiopMatrixDense& W) const +void hiopMatrixRajaSparseTriplet::addMDinvNtransToSymDeMatUTri(int row_dest_start, + int col_dest_start, + const double& alpha, + const hiopVector& D, + const hiopMatrixSparse& M2mat, + hiopMatrixDense& W) const { const auto& M2 = dynamic_cast(M2mat); - + const int m1 = nrows_; const int m2 = M2.nrows_; assert(ncols_ == M2.ncols_); assert(D.get_size() == ncols_); - //does it fit in W ? - assert(row_dest_start>=0 && row_dest_start+m1<=W.m()); - assert(col_dest_start>=0 && col_dest_start+m2<=W.n()); + // does it fit in W ? + assert(row_dest_start >= 0 && row_dest_start + m1 <= W.m()); + assert(col_dest_start >= 0 && col_dest_start + m2 <= W.n()); - //double** WM = W.get_M(); + // double** WM = W.get_M(); RAJA::View> WM(W.local_data(), W.m(), W.n()); const double* DM = D.local_data_const(); // TODO: allocAndBuildRowStarts -> should create row_starts_ internally (name='prepareRowStarts' ?) - if(this->row_starts_==NULL) - this->row_starts_ = this->allocAndBuildRowStarts(); + if(this->row_starts_ == NULL) this->row_starts_ = this->allocAndBuildRowStarts(); assert(this->row_starts_); - if(M2.row_starts_==NULL) - M2.row_starts_ = M2.allocAndBuildRowStarts(); + if(M2.row_starts_ == NULL) M2.row_starts_ = M2.allocAndBuildRowStarts(); assert(M2.row_starts_); index_type* M1_idx_start = this->row_starts_->idx_start_; @@ -917,62 +845,56 @@ addMDinvNtransToSymDeMatUTri(int row_dest_start, int* M2jCol = M2.jCol_; double* M1values = this->values_; double* M2values = M2.values_; - RAJA::forall(RAJA::RangeSegment(0, m1), - RAJA_LAMBDA(RAJA::Index_type i) - { - for(int j=0; j( + RAJA::RangeSegment(0, m1), + RAJA_LAMBDA(RAJA::Index_type i) { + for(int j = 0; j < m2; j++) { + // dest[i,j] = weigthed_dotprod(M1_row_i,M2_row_j) + double acc = 0.; + index_type ki = M1_idx_start[i]; + index_type kj = M2_idx_start[j]; + + while(ki < M1_idx_start[i + 1] && kj < M2_idx_start[j + 1]) { + assert(ki < M1nnz); + assert(kj < M2nnz); + + if(M1jCol[ki] == M2jCol[kj]) { + acc += M1values[ki] / DM[M1jCol[ki]] * M2values[kj]; ki++; - else kj++; - } - } //end of loop over ki and kj + } else { + if(M1jCol[ki] < M2jCol[kj]) + ki++; + else + kj++; + } + } // end of loop over ki and kj #ifdef HIOP_DEEPCHECKS - if(i+row_dest_start > j+col_dest_start) - printf("[warning] lower triangular element updated in addMDinvNtransToSymDeMatUTri\n"); - assert(i+row_dest_start <= j+col_dest_start); + if(i + row_dest_start > j + col_dest_start) + printf("[warning] lower triangular element updated in addMDinvNtransToSymDeMatUTri\n"); + assert(i + row_dest_start <= j + col_dest_start); #endif - WM(i+row_dest_start, j+col_dest_start) += alpha*acc; - } //end j - }); + WM(i + row_dest_start, j + col_dest_start) += alpha * acc; + } // end j + }); } - /** * @brief Generates a pointer to a single RowStartsInfo struct containing * the number of rows and indices at which row data starts from this matrix. - * + * * Assumes triplets are ordered. */ template -typename hiopMatrixRajaSparseTriplet::RowStartsInfo* +typename hiopMatrixRajaSparseTriplet::RowStartsInfo* hiopMatrixRajaSparseTriplet::allocAndBuildRowStarts() const { assert(nrows_ >= 0); - RowStartsInfo* rsi = new RowStartsInfo(nrows_, mem_space_); assert(rsi); - if(nrows_<=0) - { + RowStartsInfo* rsi = new RowStartsInfo(nrows_, mem_space_); + assert(rsi); + if(nrows_ <= 0) { return rsi; } @@ -982,19 +904,16 @@ hiopMatrixRajaSparseTriplet::allocAndBuildRowStarts() c int it_triplet = 0; rsi->idx_start_host_[0] = 0; - for(int i = 1; i <= this->nrows_; i++) - { - rsi->idx_start_host_[i] = rsi->idx_start_host_[i-1]; - - while(it_triplet < this->nnz_ && this->iRow_host_[it_triplet] == i - 1) - { + for(int i = 1; i <= this->nrows_; i++) { + rsi->idx_start_host_[i] = rsi->idx_start_host_[i - 1]; + + while(it_triplet < this->nnz_ && this->iRow_host_[it_triplet] == i - 1) { #ifdef HIOP_DEEPCHECKS - if(it_triplet>=1) - { - assert(iRow_host_[it_triplet-1]<=iRow_host_[it_triplet] && "row indices are not sorted"); - //assert(iCol[it_triplet-1]<=iCol[it_triplet]); - if(iRow_host_[it_triplet-1]==iRow_host_[it_triplet]) - assert(jCol_host_[it_triplet-1] < jCol_host_[it_triplet] && "col indices are not sorted"); + if(it_triplet >= 1) { + assert(iRow_host_[it_triplet - 1] <= iRow_host_[it_triplet] && "row indices are not sorted"); + // assert(iCol[it_triplet-1]<=iCol[it_triplet]); + if(iRow_host_[it_triplet - 1] == iRow_host_[it_triplet]) + assert(jCol_host_[it_triplet - 1] < jCol_host_[it_triplet] && "col indices are not sorted"); } #endif rsi->idx_start_host_[i]++; @@ -1002,7 +921,7 @@ hiopMatrixRajaSparseTriplet::allocAndBuildRowStarts() c } assert(rsi->idx_start_host_[i] == it_triplet); } - assert(it_triplet==this->nnz_); + assert(it_triplet == this->nnz_); rsi->copy_to_dev(); @@ -1011,7 +930,7 @@ hiopMatrixRajaSparseTriplet::allocAndBuildRowStarts() c /** * @brief Copies rows from another sparse matrix into this one. - * + * * @pre 'src' is sorted * @pre 'this' has exactly 'n_rows' rows * @pre 'src' and 'this' must have same number of columns @@ -1019,10 +938,9 @@ hiopMatrixRajaSparseTriplet::allocAndBuildRowStarts() c * @pre This function does NOT preserve the sorted row/col indices. USE WITH CAUTION! */ template -void hiopMatrixRajaSparseTriplet:: -copyRowsFrom(const hiopMatrix& src_gen, - const index_type* rows_idxs, - size_type n_rows) +void hiopMatrixRajaSparseTriplet::copyRowsFrom(const hiopMatrix& src_gen, + const index_type* rows_idxs, + size_type n_rows) { const hiopMatrixRajaSparseTriplet& src = dynamic_cast(src_gen); assert(this->m() == n_rows); @@ -1047,7 +965,7 @@ copyRowsFrom(const hiopMatrix& src_gen, if(row_starts_ == nullptr) { row_starts_ = new RowStartsInfo(nrows_, mem_space_); assert(row_starts_); - + // // The latest CPU code can be found in 342eb99ec16d45f57a492be1bf1e39cce73995a5 // It is replaced by RAJA::inclusive_scan after that commit @@ -1056,46 +974,39 @@ copyRowsFrom(const hiopMatrix& src_gen, index_type* dst_row_st_init = row_starts_->idx_start_; RAJA::forall( - RAJA::RangeSegment(0, n_rows+1), - RAJA_LAMBDA(RAJA::Index_type i) - { - dst_row_st_init[i] = 0; - } - ); + RAJA::RangeSegment(0, n_rows + 1), + RAJA_LAMBDA(RAJA::Index_type i) { dst_row_st_init[i] = 0; }); // comput nnz in each row from source RAJA::forall( - RAJA::RangeSegment(0, n_rows), - RAJA_LAMBDA(RAJA::Index_type row_dst) - { - const index_type row_src = rows_idxs[row_dst]; - dst_row_st_init[row_dst+1] = src_row_st[row_src+1] - src_row_st[row_src]; - } - ); - RAJA::inclusive_scan_inplace(RAJA::make_span(dst_row_st_init, n_rows+1), RAJA::operators::plus()); + RAJA::RangeSegment(0, n_rows), + RAJA_LAMBDA(RAJA::Index_type row_dst) { + const index_type row_src = rows_idxs[row_dst]; + dst_row_st_init[row_dst + 1] = src_row_st[row_src + 1] - src_row_st[row_src]; + }); + RAJA::inclusive_scan_inplace(RAJA::make_span(dst_row_st_init, n_rows + 1), + RAJA::operators::plus()); } index_type* dst_row_st = row_starts_->idx_start_; index_type* src_row_st = src.row_starts_->idx_start_; RAJA::forall( - RAJA::RangeSegment(0, n_rows), - RAJA_LAMBDA(RAJA::Index_type row_dst) - { - const index_type row_src = rows_idxs[row_dst]; - index_type k_dst = dst_row_st[row_dst]; - index_type k_src = src_row_st[row_src]; - - // copy from src - while(k_src < src_row_st[row_src+1]) { - iRow[k_dst] = row_dst; - jCol[k_dst] = jCol_src[k_src]; - values[k_dst] = values_src[k_src]; - k_dst++; - k_src++; - } - } - ); + RAJA::RangeSegment(0, n_rows), + RAJA_LAMBDA(RAJA::Index_type row_dst) { + const index_type row_src = rows_idxs[row_dst]; + index_type k_dst = dst_row_st[row_dst]; + index_type k_src = src_row_st[row_src]; + + // copy from src + while(k_src < src_row_st[row_src + 1]) { + iRow[k_dst] = row_dst; + jCol[k_dst] = jCol_src[k_src]; + values[k_dst] = values_src[k_src]; + k_dst++; + k_src++; + } + }); } /** @@ -1107,12 +1018,11 @@ copyRowsFrom(const hiopMatrix& src_gen, * @pre This function does NOT preserve the sorted row/col indices. USE WITH CAUTION! */ template -void hiopMatrixRajaSparseTriplet:: -copyRowsBlockFrom(const hiopMatrix& src_gen, - const index_type& rows_src_idx_st, - const size_type& n_rows, - const index_type& rows_dst_idx_st, - const size_type& dest_nnz_st) +void hiopMatrixRajaSparseTriplet::copyRowsBlockFrom(const hiopMatrix& src_gen, + const index_type& rows_src_idx_st, + const size_type& n_rows, + const index_type& rows_dst_idx_st, + const size_type& dest_nnz_st) { if(n_rows <= 0) { return; @@ -1139,7 +1049,7 @@ copyRowsBlockFrom(const hiopMatrix& src_gen, assert(src.row_starts_); index_type* src_row_st = src.row_starts_->idx_start_; - + // // The latest CPU code can be found in 342eb99ec16d45f57a492be1bf1e39cce73995a5 // It is replaced by RAJA::inclusive_scan after that commit @@ -1150,105 +1060,97 @@ copyRowsBlockFrom(const hiopMatrix& src_gen, index_type* dst_row_st_init = row_starts_->idx_start_; RAJA::forall( - RAJA::RangeSegment(0, n_rows_dst+1), - RAJA_LAMBDA(RAJA::Index_type i) - { - dst_row_st_init[i] = 0; - } - ); + RAJA::RangeSegment(0, n_rows_dst + 1), + RAJA_LAMBDA(RAJA::Index_type i) { dst_row_st_init[i] = 0; }); } index_type* dst_row_st_dev = row_starts_->idx_start_; auto& rm = umpire::ResourceManager::getInstance(); umpire::Allocator hostalloc = rm.getAllocator("HOST"); - int *next_row_nnz = static_cast(hostalloc.allocate(sizeof(size_type))); + int* next_row_nnz = static_cast(hostalloc.allocate(sizeof(size_type))); index_type register_row_st = row_starts_->register_row_st_; - rm.copy(next_row_nnz, dst_row_st_dev+1+rows_dst_idx_st, 1*sizeof(size_type)); + rm.copy(next_row_nnz, dst_row_st_dev + 1 + rows_dst_idx_st, 1 * sizeof(size_type)); if(next_row_nnz[0] == 0) { assert(rows_dst_idx_st >= register_row_st); // comput nnz in each row from source RAJA::forall( - RAJA::RangeSegment(0, n_rows), - RAJA_LAMBDA(RAJA::Index_type row_add) - { - const index_type row_src = rows_src_idx_st + row_add; - const index_type row_dst = rows_dst_idx_st + row_add; - dst_row_st_dev[row_dst+1] = src_row_st[row_src+1] - src_row_st[row_src]; - } - ); - RAJA::inclusive_scan_inplace(RAJA::make_span(dst_row_st_dev+register_row_st, n_rows+1+rows_dst_idx_st-register_row_st), RAJA::operators::plus()); + RAJA::RangeSegment(0, n_rows), + RAJA_LAMBDA(RAJA::Index_type row_add) { + const index_type row_src = rows_src_idx_st + row_add; + const index_type row_dst = rows_dst_idx_st + row_add; + dst_row_st_dev[row_dst + 1] = src_row_st[row_src + 1] - src_row_st[row_src]; + }); + RAJA::inclusive_scan_inplace( + RAJA::make_span(dst_row_st_dev + register_row_st, n_rows + 1 + rows_dst_idx_st - register_row_st), + RAJA::operators::plus()); row_starts_->register_row_st_ = n_rows + rows_dst_idx_st; } index_type* dst_row_st = row_starts_->idx_start_; - + RAJA::forall( - RAJA::RangeSegment(0, n_rows), - RAJA_LAMBDA(RAJA::Index_type row_add) - { - const index_type row_src = rows_src_idx_st + row_add; - const index_type row_dst = rows_dst_idx_st + row_add; - index_type k_src = src_row_st[row_src]; - index_type k_dst = dst_row_st[row_dst] - dst_row_st[rows_dst_idx_st] + dest_nnz_st; - - // copy from src - while(k_src < src_row_st[row_src+1]) { - iRow[k_dst] = row_dst; - jCol[k_dst] = jCol_src[k_src]; - values[k_dst] = values_src[k_src]; - k_dst++; - k_src++; - } - } - ); -// delete [] next_row_nnz; + RAJA::RangeSegment(0, n_rows), + RAJA_LAMBDA(RAJA::Index_type row_add) { + const index_type row_src = rows_src_idx_st + row_add; + const index_type row_dst = rows_dst_idx_st + row_add; + index_type k_src = src_row_st[row_src]; + index_type k_dst = dst_row_st[row_dst] - dst_row_st[rows_dst_idx_st] + dest_nnz_st; + + // copy from src + while(k_src < src_row_st[row_src + 1]) { + iRow[k_dst] = row_dst; + jCol[k_dst] = jCol_src[k_src]; + values[k_dst] = values_src[k_src]; + k_dst++; + k_src++; + } + }); + // delete [] next_row_nnz; } /// @brief Prints the contents of this function to a file. template void hiopMatrixRajaSparseTriplet::print(FILE* file, - const char* msg/*=NULL*/, - int maxRows/*=-1*/, - int maxCols/*=-1*/, - int rank/*=-1*/) const + const char* msg /*=NULL*/, + int maxRows /*=-1*/, + int maxCols /*=-1*/, + int rank /*=-1*/) const { - int myrank_=0, numranks=1; //this is a local object => always print + int myrank_ = 0, numranks = 1; // this is a local object => always print copyFromDev(); - if(file==NULL) file = stdout; + if(file == NULL) file = stdout; - int max_elems = maxRows>=0 ? maxRows : nnz_; + int max_elems = maxRows >= 0 ? maxRows : nnz_; max_elems = std::min(max_elems, nnz_); - if(myrank_==rank || rank==-1) { - - if(NULL==msg) { + if(myrank_ == rank || rank == -1) { + if(NULL == msg) { std::stringstream ss; - ss << "matrix of size " << m() << " " << n() << " and nonzeros " - << numberOfNonzeros() << ", printing " << max_elems - << " elems"; - if(numranks>1) { + ss << "matrix of size " << m() << " " << n() << " and nonzeros " << numberOfNonzeros() << ", printing " << max_elems + << " elems"; + if(numranks > 1) { ss << " (on rank=" << myrank_ << ")"; } ss << "\n"; fprintf(file, "%s", ss.str().c_str()); } else { fprintf(file, "%s ", msg); - } + } // using matlab indices fprintf(file, "iRow_host_=["); - for(int it=0; it::print(FILE* file, template void hiopMatrixRajaSparseTriplet::copyToDev() { - if(mem_space_ == "DEVICE") - { + if(mem_space_ == "DEVICE") { auto& resmgr = umpire::ResourceManager::getInstance(); resmgr.copy(iRow_, iRow_host_); resmgr.copy(jCol_, jCol_host_); @@ -1270,8 +1171,7 @@ void hiopMatrixRajaSparseTriplet::copyToDev() template void hiopMatrixRajaSparseTriplet::copyFromDev() const { - if(mem_space_ == "DEVICE") - { + if(mem_space_ == "DEVICE") { auto& resmgr = umpire::ResourceManager::getInstance(); resmgr.copy(iRow_host_, iRow_); resmgr.copy(jCol_host_, jCol_); @@ -1280,11 +1180,10 @@ void hiopMatrixRajaSparseTriplet::copyFromDev() const } template -hiopMatrixRajaSparseTriplet::RowStartsInfo:: -RowStartsInfo(size_type n_rows, std::string memspace) - : register_row_st_{0}, - num_rows_(n_rows), - mem_space_(memspace) +hiopMatrixRajaSparseTriplet::RowStartsInfo::RowStartsInfo(size_type n_rows, std::string memspace) + : register_row_st_{0}, + num_rows_(n_rows), + mem_space_(memspace) { auto& resmgr = umpire::ResourceManager::getInstance(); umpire::Allocator alloc = resmgr.getAllocator(mem_space_); @@ -1303,7 +1202,7 @@ hiopMatrixRajaSparseTriplet::RowStartsInfo::~RowStartsI auto& resmgr = umpire::ResourceManager::getInstance(); umpire::Allocator devalloc = resmgr.getAllocator(mem_space_); devalloc.deallocate(idx_start_); - if (mem_space_ == "DEVICE") { + if(mem_space_ == "DEVICE") { umpire::Allocator hostalloc = resmgr.getAllocator("HOST"); hostalloc.deallocate(idx_start_host_); } @@ -1314,7 +1213,7 @@ hiopMatrixRajaSparseTriplet::RowStartsInfo::~RowStartsI template void hiopMatrixRajaSparseTriplet::RowStartsInfo::copy_from_dev() { - if (idx_start_ != idx_start_host_) { + if(idx_start_ != idx_start_host_) { auto& resmgr = umpire::ResourceManager::getInstance(); resmgr.copy(idx_start_host_, idx_start_); } @@ -1323,29 +1222,28 @@ void hiopMatrixRajaSparseTriplet::RowStartsInfo::copy_f template void hiopMatrixRajaSparseTriplet::RowStartsInfo::copy_to_dev() { - if (idx_start_ != idx_start_host_) { + if(idx_start_ != idx_start_host_) { auto& resmgr = umpire::ResourceManager::getInstance(); resmgr.copy(idx_start_, idx_start_host_); } } /* -* extend original Jac to [Jac -I I] -*/ + * extend original Jac to [Jac -I I] + */ template -void hiopMatrixRajaSparseTriplet:: -set_Jac_FR(const hiopMatrixSparse& Jac_c, - const hiopMatrixSparse& Jac_d, - int* iJacS, - int* jJacS, - double* MJacS) +void hiopMatrixRajaSparseTriplet::set_Jac_FR(const hiopMatrixSparse& Jac_c, + const hiopMatrixSparse& Jac_d, + int* iJacS, + int* jJacS, + double* MJacS) { const auto& J_c = dynamic_cast(Jac_c); const auto& J_d = dynamic_cast(Jac_d); - + // shortcut to the original Jac - const int *jcol_c = J_c.jCol_; - const int *jcol_d = J_d.jCol_; + const int* jcol_c = J_c.jCol_; + const int* jcol_d = J_d.jCol_; // assuming original Jac is sorted! int nnz_Jac_c = J_c.numberOfNonzeros(); @@ -1355,12 +1253,12 @@ set_Jac_FR(const hiopMatrixSparse& Jac_c, int n_c = J_c.ncols_; int n_d = J_d.ncols_; assert(n_c == n_d); - assert(ncols_ == n_c + 2*m_c + 2*m_d); + assert(ncols_ == n_c + 2 * m_c + 2 * m_d); - int nnz_Jac_c_new = nnz_Jac_c + 2*m_c; + int nnz_Jac_c_new = nnz_Jac_c + 2 * m_c; + + assert(nnz_ == nnz_Jac_c_new + nnz_Jac_d + 2 * m_d); - assert(nnz_ == nnz_Jac_c_new + nnz_Jac_d + 2*m_d); - if(J_c.row_starts_ == nullptr) { J_c.row_starts_ = J_c.allocAndBuildRowStarts(); } @@ -1381,62 +1279,57 @@ set_Jac_FR(const hiopMatrixSparse& Jac_c, // Jac for c(x) - p + n RAJA::forall( - RAJA::RangeSegment(0, m_c), - RAJA_LAMBDA(RAJA::Index_type i) - { - index_type k_base = Jc_row_st[i]; - index_type k = k_base + 2*i; // append 2 nnz in each row - - // copy from base Jac_c - while(k_base < Jc_row_st[i+1]) { + RAJA::RangeSegment(0, m_c), + RAJA_LAMBDA(RAJA::Index_type i) { + index_type k_base = Jc_row_st[i]; + index_type k = k_base + 2 * i; // append 2 nnz in each row + + // copy from base Jac_c + while(k_base < Jc_row_st[i + 1]) { + iRow[k] = iJacS[k] = i; + jCol[k] = jJacS[k] = jcol_c[k_base]; + k++; + k_base++; + } + + // extra parts for p and n iRow[k] = iJacS[k] = i; - jCol[k] = jJacS[k] = jcol_c[k_base]; + jCol[k] = jJacS[k] = n_c + i; k++; - k_base++; - } - // extra parts for p and n - iRow[k] = iJacS[k] = i; - jCol[k] = jJacS[k] = n_c + i; - k++; - - iRow[k] = iJacS[k] = i; - jCol[k] = jJacS[k] = n_c + m_c + i; - k++; - } - ); + iRow[k] = iJacS[k] = i; + jCol[k] = jJacS[k] = n_c + m_c + i; + k++; + }); // Jac for d(x) - p + n RAJA::forall( - RAJA::RangeSegment(0, m_d), - RAJA_LAMBDA(RAJA::Index_type i) - { - index_type k_base = Jd_row_st[i]; - index_type k = nnz_Jac_c_new + k_base + 2*i; // append 2 nnz in each row - - // copy from base Jac_c - while(k_base < Jd_row_st[i+1]) { + RAJA::RangeSegment(0, m_d), + RAJA_LAMBDA(RAJA::Index_type i) { + index_type k_base = Jd_row_st[i]; + index_type k = nnz_Jac_c_new + k_base + 2 * i; // append 2 nnz in each row + + // copy from base Jac_c + while(k_base < Jd_row_st[i + 1]) { + iRow[k] = iJacS[k] = m_c + i; + jCol[k] = jJacS[k] = jcol_d[k_base]; + k++; + k_base++; + } + + // extra parts for p and n iRow[k] = iJacS[k] = m_c + i; - jCol[k] = jJacS[k] = jcol_d[k_base]; + jCol[k] = jJacS[k] = n_d + 2 * m_c + i; k++; - k_base++; - } - // extra parts for p and n - iRow[k] = iJacS[k] = m_c + i; - jCol[k] = jJacS[k] = n_d + 2*m_c + i; - k++; - - iRow[k] = iJacS[k] = m_c + i; - jCol[k] = jJacS[k] = n_d + 2*m_c + m_d + i; - k++; - } - ); + iRow[k] = iJacS[k] = m_c + i; + jCol[k] = jJacS[k] = n_d + 2 * m_c + m_d + i; + k++; + }); } // extend Jac to the p and n parts --- element if(MJacS != nullptr) { - // local copy for RAJA access double* values = values_; @@ -1445,71 +1338,66 @@ set_Jac_FR(const hiopMatrixSparse& Jac_c, // Jac for c(x) - p + n RAJA::forall( - RAJA::RangeSegment(0, m_c), - RAJA_LAMBDA(RAJA::Index_type i) - { - index_type k_base = Jc_row_st[i]; - index_type k = k_base + 2*i; // append 2 nnz in each row - - // copy from base Jac_c - while(k_base < Jc_row_st[i+1]) { - values[k] = MJacS[k] = J_c_val[k_base]; + RAJA::RangeSegment(0, m_c), + RAJA_LAMBDA(RAJA::Index_type i) { + index_type k_base = Jc_row_st[i]; + index_type k = k_base + 2 * i; // append 2 nnz in each row + + // copy from base Jac_c + while(k_base < Jc_row_st[i + 1]) { + values[k] = MJacS[k] = J_c_val[k_base]; + k++; + k_base++; + } + + // extra parts for p and n + values[k] = MJacS[k] = -1.0; k++; - k_base++; - } - // extra parts for p and n - values[k] = MJacS[k] = -1.0; - k++; - - values[k] = MJacS[k] = 1.0; - k++; - } - ); + values[k] = MJacS[k] = 1.0; + k++; + }); // Jac for d(x) - p + n RAJA::forall( - RAJA::RangeSegment(0, m_d), - RAJA_LAMBDA(RAJA::Index_type i) - { - index_type k_base = Jd_row_st[i]; - index_type k = nnz_Jac_c_new + k_base + 2*i; // append 2 nnz in each row - - // copy from base Jac_c - while(k_base < Jd_row_st[i+1]) { - values[k] = MJacS[k] = J_d_val[k_base]; + RAJA::RangeSegment(0, m_d), + RAJA_LAMBDA(RAJA::Index_type i) { + index_type k_base = Jd_row_st[i]; + index_type k = nnz_Jac_c_new + k_base + 2 * i; // append 2 nnz in each row + + // copy from base Jac_c + while(k_base < Jd_row_st[i + 1]) { + values[k] = MJacS[k] = J_d_val[k_base]; + k++; + k_base++; + } + + // extra parts for p and n + values[k] = MJacS[k] = -1.0; k++; - k_base++; - } - // extra parts for p and n - values[k] = MJacS[k] = -1.0; - k++; - - values[k] = MJacS[k] = 1.0; - k++; - } - ); + values[k] = MJacS[k] = 1.0; + k++; + }); } copyFromDev(); } -/// @brief copy a submatrix from another matrix. +/// @brief copy a submatrix from another matrix. /// @pre This function does NOT preserve the sorted row/col indices. USE WITH CAUTION! template -void hiopMatrixRajaSparseTriplet:: -copySubmatrixFrom(const hiopMatrix& src_gen, - const index_type& dest_row_st, - const index_type& dest_col_st, - const size_type& dest_nnz_st, - const bool offdiag_only) +void hiopMatrixRajaSparseTriplet::copySubmatrixFrom(const hiopMatrix& src_gen, + const index_type& dest_row_st, + const index_type& dest_col_st, + const size_type& dest_nnz_st, + const bool offdiag_only) { const hiopMatrixRajaSparseTriplet& src = dynamic_cast(src_gen); auto m_rows = src.m(); auto n_cols = src.n(); assert(this->numberOfNonzeros() >= src.numberOfNonzeros()); - assert(n_cols + dest_col_st <= this->n() ); + assert(n_cols + dest_col_st <= this->n()); assert(m_rows + dest_row_st <= this->m()); assert(dest_nnz_st + src.numberOfNonzeros() <= this->numberOfNonzeros()); @@ -1524,35 +1412,32 @@ copySubmatrixFrom(const hiopMatrix& src_gen, double* values = values_; RAJA::forall( - RAJA::RangeSegment(0, src_nnz), - RAJA_LAMBDA(RAJA::Index_type src_k) - { - if(!offdiag_only || src_iRow[src_k]!=src_jCol[src_k]) { - index_type dest_k = dest_nnz_st + src_k; - iRow[dest_k] = dest_row_st + src_iRow[src_k]; - jCol[dest_k] = dest_col_st + src_jCol[src_k]; - values[dest_k] = src_val[src_k]; - } - } - ); + RAJA::RangeSegment(0, src_nnz), + RAJA_LAMBDA(RAJA::Index_type src_k) { + if(!offdiag_only || src_iRow[src_k] != src_jCol[src_k]) { + index_type dest_k = dest_nnz_st + src_k; + iRow[dest_k] = dest_row_st + src_iRow[src_k]; + jCol[dest_k] = dest_col_st + src_jCol[src_k]; + values[dest_k] = src_val[src_k]; + } + }); } -/// @brief copy a submatrix from a transpose of another matrix. +/// @brief copy a submatrix from a transpose of another matrix. /// @pre This function does NOT preserve the sorted row/col indices. USE WITH CAUTION! template -void hiopMatrixRajaSparseTriplet:: -copySubmatrixFromTrans(const hiopMatrix& src_gen, - const index_type& dest_row_st, - const index_type& dest_col_st, - const size_type& dest_nnz_st, - const bool offdiag_only) +void hiopMatrixRajaSparseTriplet::copySubmatrixFromTrans(const hiopMatrix& src_gen, + const index_type& dest_row_st, + const index_type& dest_col_st, + const size_type& dest_nnz_st, + const bool offdiag_only) { const hiopMatrixRajaSparseTriplet& src = dynamic_cast(src_gen); auto m_rows = src.m(); auto n_cols = src.n(); assert(this->numberOfNonzeros() >= src.numberOfNonzeros()); - assert(n_cols + dest_col_st <= this->n() ); + assert(n_cols + dest_col_st <= this->n()); assert(m_rows + dest_row_st <= this->m()); assert(dest_nnz_st + src.numberOfNonzeros() <= this->numberOfNonzeros()); @@ -1567,41 +1452,39 @@ copySubmatrixFromTrans(const hiopMatrix& src_gen, double* values = values_; RAJA::forall( - RAJA::RangeSegment(0, src_nnz), - RAJA_LAMBDA(RAJA::Index_type src_k) - { - if(!offdiag_only || src_iRow[src_k]!=src_jCol[src_k]) { - index_type dest_k = dest_nnz_st + src_k; - iRow[dest_k] = dest_row_st + src_iRow[src_k]; - jCol[dest_k] = dest_col_st + src_jCol[src_k]; - values[dest_k] = src_val[src_k]; - } - } - ); + RAJA::RangeSegment(0, src_nnz), + RAJA_LAMBDA(RAJA::Index_type src_k) { + if(!offdiag_only || src_iRow[src_k] != src_jCol[src_k]) { + index_type dest_k = dest_nnz_st + src_k; + iRow[dest_k] = dest_row_st + src_iRow[src_k]; + jCol[dest_k] = dest_col_st + src_jCol[src_k]; + values[dest_k] = src_val[src_k]; + } + }); } /** -* @brief Copy selected cols of a diagonal matrix (a constant 'scalar' times identity), -* into 'this' as a submatrix from corner 'dest_row_st' and 'dest_col_st' -* The non-zero elements start from 'dest_nnz_st' will be replaced by the new elements. -* -* @pre The diagonal entries in the destination need to be contiguous in the sparse triplet arrays of the destinations. -* @pre this function does NOT preserve the sorted row/col indices. USE WITH CAUTION! -*/ + * @brief Copy selected cols of a diagonal matrix (a constant 'scalar' times identity), + * into 'this' as a submatrix from corner 'dest_row_st' and 'dest_col_st' + * The non-zero elements start from 'dest_nnz_st' will be replaced by the new elements. + * + * @pre The diagonal entries in the destination need to be contiguous in the sparse triplet arrays of the destinations. + * @pre this function does NOT preserve the sorted row/col indices. USE WITH CAUTION! + */ template -void hiopMatrixRajaSparseTriplet:: -setSubmatrixToConstantDiag_w_colpattern(const double& scalar, - const index_type& dest_row_st, - const index_type& dest_col_st, - const size_type& dest_nnz_st, - const size_type& nnz_to_copy, - const hiopVector& ix) +void hiopMatrixRajaSparseTriplet::setSubmatrixToConstantDiag_w_colpattern( + const double& scalar, + const index_type& dest_row_st, + const index_type& dest_col_st, + const size_type& dest_nnz_st, + const size_type& nnz_to_copy, + const hiopVector& ix) { assert(ix.get_local_size() + dest_col_st <= this->n()); - assert(nnz_to_copy + dest_row_st <= this->m() ); + assert(nnz_to_copy + dest_row_st <= this->m()); assert(dest_nnz_st + nnz_to_copy <= this->numberOfNonzeros()); - const auto& selected= dynamic_cast&>(ix); + const auto& selected = dynamic_cast&>(ix); size_type n = ix.get_local_size(); @@ -1613,13 +1496,13 @@ setSubmatrixToConstantDiag_w_colpattern(const double& scalar, #ifdef HIOP_DEEPCHECKS RAJA::ReduceSum sum(0); - RAJA::forall(RAJA::RangeSegment(0, n), - RAJA_LAMBDA(RAJA::Index_type i) - { - if(pattern[i]!=0.0){ - sum += 1; - } - }); + RAJA::forall( + RAJA::RangeSegment(0, n), + RAJA_LAMBDA(RAJA::Index_type i) { + if(pattern[i] != 0.0) { + sum += 1; + } + }); size_type nrm = sum.get(); assert(nrm == nnz_to_copy); #endif @@ -1630,66 +1513,62 @@ setSubmatrixToConstantDiag_w_colpattern(const double& scalar, // auto& resmgr = umpire::ResourceManager::getInstance(); umpire::Allocator devalloc = resmgr.getAllocator(mem_space_); - index_type* row_start_dev = static_cast(devalloc.allocate((n+1)*sizeof(index_type))); + index_type* row_start_dev = static_cast(devalloc.allocate((n + 1) * sizeof(index_type))); RAJA::forall( - RAJA::RangeSegment(0, n+1), - RAJA_LAMBDA(RAJA::Index_type i) - { - if(i==0) { - row_start_dev[i] = 0; - } else { - // from i=1..n - if(pattern[i-1]!=0.0){ - row_start_dev[i] = 1; + RAJA::RangeSegment(0, n + 1), + RAJA_LAMBDA(RAJA::Index_type i) { + if(i == 0) { + row_start_dev[i] = 0; } else { - row_start_dev[i] = 0; + // from i=1..n + if(pattern[i - 1] != 0.0) { + row_start_dev[i] = 1; + } else { + row_start_dev[i] = 0; + } } - } - } - ); - RAJA::inclusive_scan_inplace(RAJA::make_span(row_start_dev,n+1), RAJA::operators::plus()); + }); + RAJA::inclusive_scan_inplace(RAJA::make_span(row_start_dev, n + 1), RAJA::operators::plus()); RAJA::forall( - RAJA::RangeSegment(1, n+1), - RAJA_LAMBDA(RAJA::Index_type i) - { - if(row_start_dev[i] != row_start_dev[i-1]){ - index_type ele_add = row_start_dev[i] - 1; - assert(ele_add >= 0 && ele_add < nnz_to_copy); - index_type itnz_dest = dest_nnz_st + ele_add; - iRow[itnz_dest] = dest_row_st + i - 1; - jCol[itnz_dest] = dest_col_st + ele_add; - values[itnz_dest] = scalar; - } - } - ); + RAJA::RangeSegment(1, n + 1), + RAJA_LAMBDA(RAJA::Index_type i) { + if(row_start_dev[i] != row_start_dev[i - 1]) { + index_type ele_add = row_start_dev[i] - 1; + assert(ele_add >= 0 && ele_add < nnz_to_copy); + index_type itnz_dest = dest_nnz_st + ele_add; + iRow[itnz_dest] = dest_row_st + i - 1; + jCol[itnz_dest] = dest_col_st + ele_add; + values[itnz_dest] = scalar; + } + }); devalloc.deallocate(row_start_dev); } /** -* @brief Copy selected rows of a diagonal matrix (a constant 'scalar' times identity), -* into 'this' as a submatrix from corner 'dest_row_st' and 'dest_col_st' -* The non-zero elements start from 'dest_nnz_st' will be replaced by the new elements. -* -* @pre The diagonal entries in the destination need to be contiguous in the sparse triplet arrays of the destinations. -* @pre this function does NOT preserve the sorted row/col indices. USE WITH CAUTION! -*/ + * @brief Copy selected rows of a diagonal matrix (a constant 'scalar' times identity), + * into 'this' as a submatrix from corner 'dest_row_st' and 'dest_col_st' + * The non-zero elements start from 'dest_nnz_st' will be replaced by the new elements. + * + * @pre The diagonal entries in the destination need to be contiguous in the sparse triplet arrays of the destinations. + * @pre this function does NOT preserve the sorted row/col indices. USE WITH CAUTION! + */ template -void hiopMatrixRajaSparseTriplet:: -setSubmatrixToConstantDiag_w_rowpattern(const double& scalar, - const index_type& dest_row_st, - const index_type& dest_col_st, - const size_type& dest_nnz_st, - const size_type& nnz_to_copy, - const hiopVector& ix) +void hiopMatrixRajaSparseTriplet::setSubmatrixToConstantDiag_w_rowpattern( + const double& scalar, + const index_type& dest_row_st, + const index_type& dest_col_st, + const size_type& dest_nnz_st, + const size_type& nnz_to_copy, + const hiopVector& ix) { assert(ix.get_local_size() + dest_row_st <= this->m()); - assert(nnz_to_copy + dest_col_st <= this->n() ); + assert(nnz_to_copy + dest_col_st <= this->n()); assert(dest_nnz_st + nnz_to_copy <= this->numberOfNonzeros()); - const auto& selected= dynamic_cast&>(ix); + const auto& selected = dynamic_cast&>(ix); size_type n = ix.get_local_size(); @@ -1701,13 +1580,13 @@ setSubmatrixToConstantDiag_w_rowpattern(const double& scalar, #ifdef HIOP_DEEPCHECKS RAJA::ReduceSum sum(0); - RAJA::forall(RAJA::RangeSegment(0, n), - RAJA_LAMBDA(RAJA::Index_type i) - { - if(pattern[i]!=0.0){ - sum += 1; - } - }); + RAJA::forall( + RAJA::RangeSegment(0, n), + RAJA_LAMBDA(RAJA::Index_type i) { + if(pattern[i] != 0.0) { + sum += 1; + } + }); size_type nrm = sum.get(); assert(nrm == nnz_to_copy); #endif @@ -1718,61 +1597,56 @@ setSubmatrixToConstantDiag_w_rowpattern(const double& scalar, // auto& resmgr = umpire::ResourceManager::getInstance(); umpire::Allocator devalloc = resmgr.getAllocator(mem_space_); - index_type* row_start_dev = static_cast(devalloc.allocate((n+1)*sizeof(index_type))); + index_type* row_start_dev = static_cast(devalloc.allocate((n + 1) * sizeof(index_type))); RAJA::forall( - RAJA::RangeSegment(0, n+1), - RAJA_LAMBDA(RAJA::Index_type i) - { - if(i==0) { - row_start_dev[i] = 0; - } else { - // from i=1..n - if(pattern[i-1]!=0.0){ - row_start_dev[i] = 1; + RAJA::RangeSegment(0, n + 1), + RAJA_LAMBDA(RAJA::Index_type i) { + if(i == 0) { + row_start_dev[i] = 0; } else { - row_start_dev[i] = 0; + // from i=1..n + if(pattern[i - 1] != 0.0) { + row_start_dev[i] = 1; + } else { + row_start_dev[i] = 0; + } } - } - } - ); - RAJA::inclusive_scan_inplace(RAJA::make_span(row_start_dev,n+1), RAJA::operators::plus()); + }); + RAJA::inclusive_scan_inplace(RAJA::make_span(row_start_dev, n + 1), RAJA::operators::plus()); RAJA::forall( - RAJA::RangeSegment(1, n+1), - RAJA_LAMBDA(RAJA::Index_type i) - { - if(row_start_dev[i] != row_start_dev[i-1]){ - index_type ele_add = row_start_dev[i] - 1; - assert(ele_add >= 0 && ele_add < nnz_to_copy); - index_type itnz_dest = dest_nnz_st + ele_add; - iRow[itnz_dest] = dest_row_st + ele_add; - jCol[itnz_dest] = dest_col_st + i - 1; - values[itnz_dest] = scalar; - } - } - ); + RAJA::RangeSegment(1, n + 1), + RAJA_LAMBDA(RAJA::Index_type i) { + if(row_start_dev[i] != row_start_dev[i - 1]) { + index_type ele_add = row_start_dev[i] - 1; + assert(ele_add >= 0 && ele_add < nnz_to_copy); + index_type itnz_dest = dest_nnz_st + ele_add; + iRow[itnz_dest] = dest_row_st + ele_add; + jCol[itnz_dest] = dest_col_st + i - 1; + values[itnz_dest] = scalar; + } + }); devalloc.deallocate(row_start_dev); } /** -* @brief Copy a diagonal matrix to destination. -* This diagonal matrix is 'src_val'*identity matrix with size 'nnz_to_copy'x'nnz_to_copy'. -* The destination is updated from the start row 'row_dest_st' and start column 'col_dest_st'. -* At the destination, 'nnz_to_copy` nonzeros starting from index `dest_nnz_st` will be replaced. -* @pre The diagonal entries in the destination need to be contiguous in the sparse triplet arrays of the destinations. -* @pre This function does NOT preserve the sorted row/col indices. USE WITH CAUTION! -*/ + * @brief Copy a diagonal matrix to destination. + * This diagonal matrix is 'src_val'*identity matrix with size 'nnz_to_copy'x'nnz_to_copy'. + * The destination is updated from the start row 'row_dest_st' and start column 'col_dest_st'. + * At the destination, 'nnz_to_copy` nonzeros starting from index `dest_nnz_st` will be replaced. + * @pre The diagonal entries in the destination need to be contiguous in the sparse triplet arrays of the destinations. + * @pre This function does NOT preserve the sorted row/col indices. USE WITH CAUTION! + */ template -void hiopMatrixRajaSparseTriplet:: -copyDiagMatrixToSubblock(const double& src_val, - const index_type& dest_row_st, - const index_type& col_dest_st, - const size_type& dest_nnz_st, - const size_type &nnz_to_copy) +void hiopMatrixRajaSparseTriplet::copyDiagMatrixToSubblock(const double& src_val, + const index_type& dest_row_st, + const index_type& col_dest_st, + const size_type& dest_nnz_st, + const size_type& nnz_to_copy) { - assert(this->numberOfNonzeros() >= nnz_to_copy+dest_nnz_st); + assert(this->numberOfNonzeros() >= nnz_to_copy + dest_nnz_st); assert(this->n() >= nnz_to_copy); assert(nnz_to_copy + dest_row_st <= this->m()); assert(nnz_to_copy + col_dest_st <= this->n()); @@ -1783,35 +1657,32 @@ copyDiagMatrixToSubblock(const double& src_val, double* values = values_; RAJA::forall( - RAJA::RangeSegment(0, nnz_to_copy), - RAJA_LAMBDA(RAJA::Index_type ele_add) - { - index_type itnz_dest = dest_nnz_st + ele_add; - iRow[itnz_dest] = dest_row_st + ele_add; - jCol[itnz_dest] = col_dest_st + ele_add; - values[itnz_dest] = src_val; - } - ); + RAJA::RangeSegment(0, nnz_to_copy), + RAJA_LAMBDA(RAJA::Index_type ele_add) { + index_type itnz_dest = dest_nnz_st + ele_add; + iRow[itnz_dest] = dest_row_st + ele_add; + jCol[itnz_dest] = col_dest_st + ele_add; + values[itnz_dest] = src_val; + }); } -/** -* @brief same as @copyDiagMatrixToSubblock, but copies only diagonal entries specified by `pattern`. -* At the destination, 'nnz_to_copy` nonzeros starting from index `dest_nnz_st` will be replaced. -* @pre The added entries in the destination need to be contiguous in the sparse triplet arrays of the destinations. -* @pre This function does NOT preserve the sorted row/col indices. USE WITH CAUTION! -* @pre 'pattern' has same size as `dx` -* @pre 'pattern` has exactly `nnz_to_copy` nonzeros -*/ +/** + * @brief same as @copyDiagMatrixToSubblock, but copies only diagonal entries specified by `pattern`. + * At the destination, 'nnz_to_copy` nonzeros starting from index `dest_nnz_st` will be replaced. + * @pre The added entries in the destination need to be contiguous in the sparse triplet arrays of the destinations. + * @pre This function does NOT preserve the sorted row/col indices. USE WITH CAUTION! + * @pre 'pattern' has same size as `dx` + * @pre 'pattern` has exactly `nnz_to_copy` nonzeros + */ template -void hiopMatrixRajaSparseTriplet:: -copyDiagMatrixToSubblock_w_pattern(const hiopVector& dx, - const index_type& dest_row_st, - const index_type& dest_col_st, - const size_type& dest_nnz_st, - const size_type& nnz_to_copy, - const hiopVector& pattern) +void hiopMatrixRajaSparseTriplet::copyDiagMatrixToSubblock_w_pattern(const hiopVector& dx, + const index_type& dest_row_st, + const index_type& dest_col_st, + const size_type& dest_nnz_st, + const size_type& nnz_to_copy, + const hiopVector& pattern) { - assert(this->numberOfNonzeros() >= nnz_to_copy+dest_nnz_st); + assert(this->numberOfNonzeros() >= nnz_to_copy + dest_nnz_st); assert(this->n() >= nnz_to_copy); assert(nnz_to_copy + dest_row_st <= this->m()); assert(nnz_to_copy + dest_col_st <= this->n()); @@ -1831,13 +1702,13 @@ copyDiagMatrixToSubblock_w_pattern(const hiopVector& dx, #ifdef HIOP_DEEPCHECKS RAJA::ReduceSum sum(0); - RAJA::forall(RAJA::RangeSegment(0, n), - RAJA_LAMBDA(RAJA::Index_type i) - { - if(pattern_dev[i]!=0.0){ - sum += 1; - } - }); + RAJA::forall( + RAJA::RangeSegment(0, n), + RAJA_LAMBDA(RAJA::Index_type i) { + if(pattern_dev[i] != 0.0) { + sum += 1; + } + }); size_type nrm = sum.get(); assert(nrm == nnz_to_copy); #endif @@ -1848,40 +1719,36 @@ copyDiagMatrixToSubblock_w_pattern(const hiopVector& dx, // auto& resmgr = umpire::ResourceManager::getInstance(); umpire::Allocator devalloc = resmgr.getAllocator(mem_space_); - index_type* row_start_dev = static_cast(devalloc.allocate((n+1)*sizeof(index_type))); + index_type* row_start_dev = static_cast(devalloc.allocate((n + 1) * sizeof(index_type))); RAJA::forall( - RAJA::RangeSegment(0, n+1), - RAJA_LAMBDA(RAJA::Index_type i) - { - if(i==0) { - row_start_dev[i] = 0; - } else { - // from i=1..n - if(pattern_dev[i-1]!=0.0){ - row_start_dev[i] = 1; + RAJA::RangeSegment(0, n + 1), + RAJA_LAMBDA(RAJA::Index_type i) { + if(i == 0) { + row_start_dev[i] = 0; } else { - row_start_dev[i] = 0; + // from i=1..n + if(pattern_dev[i - 1] != 0.0) { + row_start_dev[i] = 1; + } else { + row_start_dev[i] = 0; + } } - } - } - ); - RAJA::inclusive_scan_inplace(RAJA::make_span(row_start_dev,n+1), RAJA::operators::plus()); + }); + RAJA::inclusive_scan_inplace(RAJA::make_span(row_start_dev, n + 1), RAJA::operators::plus()); RAJA::forall( - RAJA::RangeSegment(1, n+1), - RAJA_LAMBDA(RAJA::Index_type i) - { - if(row_start_dev[i] != row_start_dev[i-1]){ - index_type ele_add = row_start_dev[i] - 1; - assert(ele_add >= 0 && ele_add < nnz_to_copy); - index_type itnz_dest = dest_nnz_st + ele_add; - iRow[itnz_dest] = dest_row_st + ele_add; - jCol[itnz_dest] = dest_col_st + ele_add; - values[itnz_dest] = x[i-1]; - } - } - ); + RAJA::RangeSegment(1, n + 1), + RAJA_LAMBDA(RAJA::Index_type i) { + if(row_start_dev[i] != row_start_dev[i - 1]) { + index_type ele_add = row_start_dev[i] - 1; + assert(ele_add >= 0 && ele_add < nnz_to_copy); + index_type itnz_dest = dest_nnz_st + ele_add; + iRow[itnz_dest] = dest_row_st + ele_add; + jCol[itnz_dest] = dest_col_st + ele_add; + values[itnz_dest] = x[i - 1]; + } + }); devalloc.deallocate(row_start_dev); } @@ -1890,7 +1757,7 @@ template bool hiopMatrixRajaSparseTriplet::is_diagonal() const { bool bret{false}; - + if(ncols_ != nrows_) { bret = false; return bret; @@ -1902,28 +1769,25 @@ bool hiopMatrixRajaSparseTriplet::is_diagonal() const RAJA::ReduceSum sum_no_diag(0); RAJA::forall( - RAJA::RangeSegment(0, nnz_), - RAJA_LAMBDA(RAJA::Index_type i) - { - if (iRow[i]!=jCol[i]) { - sum_no_diag += 1; - } - } - ); - bret = ((sum_no_diag.get())==0); - + RAJA::RangeSegment(0, nnz_), + RAJA_LAMBDA(RAJA::Index_type i) { + if(iRow[i] != jCol[i]) { + sum_no_diag += 1; + } + }); + bret = ((sum_no_diag.get()) == 0); + return bret; } /********************************************************************************** - * Sparse symmetric matrix in triplet format. Only the UPPER triangle is stored - **********************************************************************************/ + * Sparse symmetric matrix in triplet format. Only the UPPER triangle is stored + **********************************************************************************/ template -void hiopMatrixRajaSymSparseTriplet:: -timesVec(double beta, - hiopVector& y, - double alpha, - const hiopVector& x ) const +void hiopMatrixRajaSymSparseTriplet::timesVec(double beta, + hiopVector& y, + double alpha, + const hiopVector& x) const { assert(this->ncols_ == this->nrows_); assert(x.get_size() == this->ncols_); @@ -1937,19 +1801,17 @@ timesVec(double beta, timesVec(beta, y_data, alpha, x_data); } - + /** y = beta * y + alpha * this * x */ template -void hiopMatrixRajaSymSparseTriplet:: -timesVec(double beta, double* y, double alpha, const double* x) const +void hiopMatrixRajaSymSparseTriplet::timesVec(double beta, + double* y, + double alpha, + const double* x) const { assert(this->ncols_ == this->nrows_); - - RAJA::forall(RAJA::RangeSegment(0, this->nrows_), - RAJA_LAMBDA(RAJA::Index_type i) - { - y[i] *= beta; - }); + + RAJA::forall(RAJA::RangeSegment(0, this->nrows_), RAJA_LAMBDA(RAJA::Index_type i) { y[i] *= beta; }); // addition to y[iRow[i]] must be atomic auto iRow = this->iRow_; @@ -1962,19 +1824,18 @@ timesVec(double beta, double* y, double alpha, const double* x) const auto ncols = this->ncols_; #endif - RAJA::forall(RAJA::RangeSegment(0, this->nnz_), - RAJA_LAMBDA(RAJA::Index_type i) - { - assert(iRow[i] < nrows); - assert(jCol[i] < ncols); - RAJA::AtomicRef yy1(&y[iRow[i]]); - yy1 += alpha * x[jCol[i]] * values[i]; - if(iRow[i] != jCol[i]) - { - RAJA::AtomicRef yy2(&y[jCol[i]]); - yy2 += alpha * x[iRow[i]] * values[i]; - } - }); + RAJA::forall( + RAJA::RangeSegment(0, this->nnz_), + RAJA_LAMBDA(RAJA::Index_type i) { + assert(iRow[i] < nrows); + assert(jCol[i] < ncols); + RAJA::AtomicRef yy1(&y[iRow[i]]); + yy1 += alpha * x[jCol[i]] * values[i]; + if(iRow[i] != jCol[i]) { + RAJA::AtomicRef yy2(&y[jCol[i]]); + yy2 += alpha * x[iRow[i]] * values[i]; + } + }); } template @@ -1999,54 +1860,53 @@ hiopMatrixSparse* hiopMatrixRajaSymSparseTriplet::new_c return copy; } -/** - * @brief block of W += alpha*this +/** + * @brief block of W += alpha*this * @note W contains only the upper triangular entries - */ + */ template -void hiopMatrixRajaSymSparseTriplet:: -addUpperTriangleToSymDenseMatrixUpperTriangle(int diag_start, - double alpha, - hiopMatrixDense& W) const +void hiopMatrixRajaSymSparseTriplet::addUpperTriangleToSymDenseMatrixUpperTriangle( + int diag_start, + double alpha, + hiopMatrixDense& W) const { - assert(diag_start>=0 && diag_start + this->nrows_ <= W.m()); + assert(diag_start >= 0 && diag_start + this->nrows_ <= W.m()); assert(diag_start + this->ncols_ <= W.n()); - assert(W.n()==W.m()); + assert(W.n() == W.m()); // double** WM = W.get_M(); - RAJA::View> WM(W.local_data(), - W.get_local_size_m(), - W.get_local_size_n()); + RAJA::View> WM(W.local_data(), W.get_local_size_m(), W.get_local_size_n()); auto Wm = W.m(); auto Wn = W.n(); auto iRow = this->iRow_; auto jCol = this->jCol_; auto values = this->values_; - RAJA::forall(RAJA::RangeSegment(0, this->nnz_), - RAJA_LAMBDA(RAJA::Index_type it) - { - assert(iRow[it]<=jCol[it] && "sparse symmetric matrices should contain only upper triangular entries"); - const int i = iRow[it]+diag_start; - const int j = jCol[it]+diag_start; - assert(i=0 && j>=0); - assert(i<=j && "symMatrices not aligned; source entries need to map inside the upper triangular part of destination"); - WM(i, j) += alpha * values[it]; - }); - } - -/** - * @brief block of W += alpha*(this)^T + RAJA::forall( + RAJA::RangeSegment(0, this->nnz_), + RAJA_LAMBDA(RAJA::Index_type it) { + assert(iRow[it] <= jCol[it] && "sparse symmetric matrices should contain only upper triangular entries"); + const int i = iRow[it] + diag_start; + const int j = jCol[it] + diag_start; + assert(i < Wm && j < Wn); + assert(i >= 0 && j >= 0); + assert(i <= j && + "symMatrices not aligned; source entries need to map inside the upper triangular part of destination"); + WM(i, j) += alpha * values[it]; + }); +} + +/** + * @brief block of W += alpha*(this)^T * @note W contains only the upper triangular entries - * + * * @warning This method should not be called directly. * Use addUpperTriangleToSymDenseMatrixUpperTriangle instead. */ template -void hiopMatrixRajaSymSparseTriplet:: -transAddToSymDenseMatrixUpperTriangle(int row_start, - int col_start, - double alpha, - hiopMatrixDense& W) const +void hiopMatrixRajaSymSparseTriplet::transAddToSymDenseMatrixUpperTriangle(int row_start, + int col_start, + double alpha, + hiopMatrixDense& W) const { assert(0 && "This method should not be called for symmetric matrices."); } @@ -2056,58 +1916,52 @@ transAddToSymDenseMatrixUpperTriangle(int row_start, * are available in 'vec_dest' starting at 'vec_start' */ template -void hiopMatrixRajaSymSparseTriplet:: -startingAtAddSubDiagonalToStartingAt(int diag_src_start, - const double& alpha, - hiopVector& vec_dest, - int vec_start, - int num_elems/*=-1*/) const +void hiopMatrixRajaSymSparseTriplet::startingAtAddSubDiagonalToStartingAt( + int diag_src_start, + const double& alpha, + hiopVector& vec_dest, + int vec_start, + int num_elems /*=-1*/) const { auto& vd = dynamic_cast&>(vec_dest); - if(num_elems < 0) - num_elems = vd.get_size(); - assert(num_elems<=vd.get_size()); + if(num_elems < 0) num_elems = vd.get_size(); + assert(num_elems <= vd.get_size()); - assert(diag_src_start>=0 && diag_src_start+num_elems<=this->nrows_); + assert(diag_src_start >= 0 && diag_src_start + num_elems <= this->nrows_); double* v = vd.local_data(); auto vds = vd.get_size(); auto iRow = this->iRow_; auto jCol = this->jCol_; auto values = this->values_; - RAJA::forall(RAJA::RangeSegment(0, this->nnz_), - RAJA_LAMBDA(RAJA::Index_type itnz) - { - const int row = iRow[itnz]; - if(row == jCol[itnz]) - { - if(row >= diag_src_start && row < diag_src_start + num_elems) - { - assert(row+vec_start < vds); - v[vec_start + row] += alpha * values[itnz]; + RAJA::forall( + RAJA::RangeSegment(0, this->nnz_), + RAJA_LAMBDA(RAJA::Index_type itnz) { + const int row = iRow[itnz]; + if(row == jCol[itnz]) { + if(row >= diag_src_start && row < diag_src_start + num_elems) { + assert(row + vec_start < vds); + v[vec_start + row] += alpha * values[itnz]; + } } - } - }); + }); } template -size_type hiopMatrixRajaSymSparseTriplet:: -numberOfOffDiagNonzeros() const +size_type hiopMatrixRajaSymSparseTriplet::numberOfOffDiagNonzeros() const { - if(-1==nnz_offdiag_) { - this->nnz_offdiag_= this->nnz_; - int *irow = this->iRow_; - int *jcol = this->jCol_; + if(-1 == nnz_offdiag_) { + this->nnz_offdiag_ = this->nnz_; + int* irow = this->iRow_; + int* jcol = this->jCol_; RAJA::ReduceSum sum(0); RAJA::forall( - RAJA::RangeSegment(0, this->nnz_), - RAJA_LAMBDA(RAJA::Index_type i) - { - if (irow[i]==jcol[i]) { - sum += 1; - } - } - ); + RAJA::RangeSegment(0, this->nnz_), + RAJA_LAMBDA(RAJA::Index_type i) { + if(irow[i] == jcol[i]) { + sum += 1; + } + }); this->nnz_offdiag_ -= static_cast(sum.get()); } @@ -2115,20 +1969,19 @@ numberOfOffDiagNonzeros() const } /* -* extend original Hess to [Hess+diag_term] -*/ + * extend original Hess to [Hess+diag_term] + */ template -void hiopMatrixRajaSymSparseTriplet:: -set_Hess_FR(const hiopMatrixSparse& Hess, - int* iHSS, - int* jHSS, - double* MHSS, - const hiopVector& add_diag) +void hiopMatrixRajaSymSparseTriplet::set_Hess_FR(const hiopMatrixSparse& Hess, + int* iHSS, + int* jHSS, + double* MHSS, + const hiopVector& add_diag) { if(this->nnz_ == 0) { return; } - + hiopMatrixRajaSymSparseTriplet& M1 = *this; const auto& M2 = dynamic_cast&>(Hess); @@ -2139,19 +1992,19 @@ set_Hess_FR(const hiopMatrixSparse& Hess, const int n2 = M2.n(); int m_row = add_diag.get_size(); - assert(n1==m1); - assert(n2==m2); - assert(m2<=m1); + assert(n1 == m1); + assert(n2 == m2); + assert(m2 <= m1); - // note that nnz2 can be zero, i.e., original hess is empty. + // note that nnz2 can be zero, i.e., original hess is empty. // Hence we use add_diag.get_size() to detect the length of x in the base problem - assert(m_row==m2 || m2==0); - + assert(m_row == m2 || m2 == 0); + int nnz1 = m_row + M2.numberOfOffDiagNonzeros(); assert(this->nnz_ == nnz1); - if(M2.row_starts_==NULL) { + if(M2.row_starts_ == NULL) { M2.row_starts_ = M2.allocAndBuildRowStarts(); } assert(M2.row_starts_); @@ -2164,145 +2017,131 @@ set_Hess_FR(const hiopMatrixSparse& Hess, // extend Hess to the p and n parts --- sparsity // sparsity may change due to te new obj term zeta*DR^2.*(x-x_ref) if(iHSS != nullptr && jHSS != nullptr) { - int* M1iRow = M1.i_row(); int* M1jCol = M1.j_col(); - + if(m2 > 0) { - if(M1.row_starts_==nullptr) { + if(M1.row_starts_ == nullptr) { M1.row_starts_ = nullptr; M1.row_starts_ = hiopMatrixRajaSparseTriplet::allocRowStarts(m1, this->mem_space_); - M1_row_start = M1.row_starts_->idx_start_; + M1_row_start = M1.row_starts_->idx_start_; // // The latest CPU code can be found in 342eb99ec16d45f57a492be1bf1e39cce73995a5 // It is replaced by RAJA::inclusive_scan after that commit // RAJA::forall( - RAJA::RangeSegment(0, m1+1), - RAJA_LAMBDA(RAJA::Index_type i) - { - if(i>0) { - M1_row_start[i] = 1; - } else { - M1_row_start[i] = 0; - } - } - ); + RAJA::RangeSegment(0, m1 + 1), + RAJA_LAMBDA(RAJA::Index_type i) { + if(i > 0) { + M1_row_start[i] = 1; + } else { + M1_row_start[i] = 0; + } + }); RAJA::forall( - RAJA::RangeSegment(0, m2), - RAJA_LAMBDA(RAJA::Index_type i) - { - index_type k_base = M2_row_start[i]; - index_type nnz_in_row = M2_row_start[i+1] - k_base; - - if(nnz_in_row > 0 && M2iRow[k_base] == M2jCol[k_base]) { - // first nonzero in this row is a diagonal term - // skip it since we will defined the diagonal nonezero - M1_row_start[i+1] += nnz_in_row-1; - } else { - M1_row_start[i+1] += nnz_in_row; - } - } - ); - - RAJA::inclusive_scan_inplace(RAJA::make_span(M1_row_start,m1+1), RAJA::operators::plus()); + RAJA::RangeSegment(0, m2), + RAJA_LAMBDA(RAJA::Index_type i) { + index_type k_base = M2_row_start[i]; + index_type nnz_in_row = M2_row_start[i + 1] - k_base; + + if(nnz_in_row > 0 && M2iRow[k_base] == M2jCol[k_base]) { + // first nonzero in this row is a diagonal term + // skip it since we will defined the diagonal nonezero + M1_row_start[i + 1] += nnz_in_row - 1; + } else { + M1_row_start[i + 1] += nnz_in_row; + } + }); + + RAJA::inclusive_scan_inplace(RAJA::make_span(M1_row_start, m1 + 1), + RAJA::operators::plus()); } RAJA::forall( - RAJA::RangeSegment(0, m2), - RAJA_LAMBDA(RAJA::Index_type i) - { - index_type k = M1_row_start[i]; - index_type k_base = M2_row_start[i]; - size_type nnz_in_row = M2_row_start[i+1] - k_base; - - // insert diagonal entry due to the new obj term - M1iRow[k] = iHSS[k] = i; - M1jCol[k] = jHSS[k] = i; - k++; + RAJA::RangeSegment(0, m2), + RAJA_LAMBDA(RAJA::Index_type i) { + index_type k = M1_row_start[i]; + index_type k_base = M2_row_start[i]; + size_type nnz_in_row = M2_row_start[i + 1] - k_base; - if(nnz_in_row > 0 && M2iRow[k_base] == M2jCol[k_base]) { - // first nonzero in this row is a diagonal term - // skip it since we have defined the diagonal nonezero - k_base++; - } - - // copy from base Hess - while(k_base < M2_row_start[i+1]) { + // insert diagonal entry due to the new obj term M1iRow[k] = iHSS[k] = i; - M1jCol[k] = jHSS[k] = M2jCol[k_base]; + M1jCol[k] = jHSS[k] = i; k++; - k_base++; - } - } - ); + + if(nnz_in_row > 0 && M2iRow[k_base] == M2jCol[k_base]) { + // first nonzero in this row is a diagonal term + // skip it since we have defined the diagonal nonezero + k_base++; + } + + // copy from base Hess + while(k_base < M2_row_start[i + 1]) { + M1iRow[k] = iHSS[k] = i; + M1jCol[k] = jHSS[k] = M2jCol[k_base]; + k++; + k_base++; + } + }); } else { // hess in the base problem is empty. just insert the new diag elements RAJA::forall( - RAJA::RangeSegment(0, m_row), - RAJA_LAMBDA(RAJA::Index_type i) - { - M1iRow[i] = iHSS[i] = i; - M1jCol[i] = jHSS[i] = i; - } - ); + RAJA::RangeSegment(0, m_row), + RAJA_LAMBDA(RAJA::Index_type i) { + M1iRow[i] = iHSS[i] = i; + M1jCol[i] = jHSS[i] = i; + }); } } // extend Hess to the p and n parts --- element - if(MHSS != nullptr) { + if(MHSS != nullptr) { assert(M1.row_starts_); index_type* M1_row_start = M1.row_starts_->idx_start_; double* M1values = M1.M(); const double* M2values = M2.M(); - + const double* diag_data = add_diag.local_data_const(); - + if(m2 > 0) { RAJA::forall( - RAJA::RangeSegment(0, m2), - RAJA_LAMBDA(RAJA::Index_type i) - { - index_type k = M1_row_start[i]; - index_type k_base = M2_row_start[i]; - size_type nnz_in_row_base = M2_row_start[i+1] - k_base; - - // insert diagonal entry due to the new obj term - M1values[k] = MHSS[k] = diag_data[i]; - - if(nnz_in_row_base > 0 && M2iRow[k_base] == M2jCol[k_base]) { - // first nonzero in this row is a diagonal term - // add it since we will defined the diagonal nonezero - M1values[k] += M2values[k_base]; - MHSS[k] = M1values[k]; - k_base++; - } - k++; - - // copy from base Hess - while(k_base < M2_row_start[i+1]) { - M1values[k] = MHSS[k] = M2values[k_base]; + RAJA::RangeSegment(0, m2), + RAJA_LAMBDA(RAJA::Index_type i) { + index_type k = M1_row_start[i]; + index_type k_base = M2_row_start[i]; + size_type nnz_in_row_base = M2_row_start[i + 1] - k_base; + + // insert diagonal entry due to the new obj term + M1values[k] = MHSS[k] = diag_data[i]; + + if(nnz_in_row_base > 0 && M2iRow[k_base] == M2jCol[k_base]) { + // first nonzero in this row is a diagonal term + // add it since we will defined the diagonal nonezero + M1values[k] += M2values[k_base]; + MHSS[k] = M1values[k]; + k_base++; + } k++; - k_base++; - } - } - ); + + // copy from base Hess + while(k_base < M2_row_start[i + 1]) { + M1values[k] = MHSS[k] = M2values[k_base]; + k++; + k_base++; + } + }); } else { // hess in the base problem is empty. just insert the new diag elements RAJA::forall( - RAJA::RangeSegment(0, m_row), - RAJA_LAMBDA(RAJA::Index_type i) - { - M1values[i] = MHSS[i] = diag_data[i]; - } - ); + RAJA::RangeSegment(0, m_row), + RAJA_LAMBDA(RAJA::Index_type i) { M1values[i] = MHSS[i] = diag_data[i]; }); } } this->copyFromDev(); } -} //end of namespace +} // namespace hiop diff --git a/src/LinAlg/hiopMatrixRajaSparseTripletOmp.cpp b/src/LinAlg/hiopMatrixRajaSparseTripletOmp.cpp index 0da9afef0..42c645da1 100644 --- a/src/LinAlg/hiopMatrixRajaSparseTripletOmp.cpp +++ b/src/LinAlg/hiopMatrixRajaSparseTripletOmp.cpp @@ -3,47 +3,47 @@ // Written by Cosmin G. Petra, petra1@llnl.gov. // LLNL-CODE-742473. All rights reserved. // -// This file is part of HiOp. For details, see https://github.com/LLNL/hiop. HiOp -// is released under the BSD 3-clause license (https://opensource.org/licenses/BSD-3-Clause). +// This file is part of HiOp. For details, see https://github.com/LLNL/hiop. HiOp +// is released under the BSD 3-clause license (https://opensource.org/licenses/BSD-3-Clause). // Please also read “Additional BSD Notice” below. // -// Redistribution and use in source and binary forms, with or without modification, +// Redistribution and use in source and binary forms, with or without modification, // are permitted provided that the following conditions are met: -// i. Redistributions of source code must retain the above copyright notice, this list +// i. Redistributions of source code must retain the above copyright notice, this list // of conditions and the disclaimer below. -// ii. Redistributions in binary form must reproduce the above copyright notice, -// this list of conditions and the disclaimer (as noted below) in the documentation and/or +// ii. Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the disclaimer (as noted below) in the documentation and/or // other materials provided with the distribution. -// iii. Neither the name of the LLNS/LLNL nor the names of its contributors may be used to -// endorse or promote products derived from this software without specific prior written +// iii. Neither the name of the LLNS/LLNL nor the names of its contributors may be used to +// endorse or promote products derived from this software without specific prior written // permission. // -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES -// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT -// SHALL LAWRENCE LIVERMORE NATIONAL SECURITY, LLC, THE U.S. DEPARTMENT OF ENERGY OR -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS -// OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED -// AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT +// SHALL LAWRENCE LIVERMORE NATIONAL SECURITY, LLC, THE U.S. DEPARTMENT OF ENERGY OR +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +// OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED +// AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, // EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Additional BSD Notice -// 1. This notice is required to be provided under our contract with the U.S. Department -// of Energy (DOE). This work was produced at Lawrence Livermore National Laboratory under +// 1. This notice is required to be provided under our contract with the U.S. Department +// of Energy (DOE). This work was produced at Lawrence Livermore National Laboratory under // Contract No. DE-AC52-07NA27344 with the DOE. -// 2. Neither the United States Government nor Lawrence Livermore National Security, LLC -// nor any of their employees, makes any warranty, express or implied, or assumes any -// liability or responsibility for the accuracy, completeness, or usefulness of any +// 2. Neither the United States Government nor Lawrence Livermore National Security, LLC +// nor any of their employees, makes any warranty, express or implied, or assumes any +// liability or responsibility for the accuracy, completeness, or usefulness of any // information, apparatus, product, or process disclosed, or represents that its use would // not infringe privately-owned rights. -// 3. Also, reference herein to any specific commercial products, process, or services by -// trade name, trademark, manufacturer or otherwise does not necessarily constitute or -// imply its endorsement, recommendation, or favoring by the United States Government or -// Lawrence Livermore National Security, LLC. The views and opinions of authors expressed -// herein do not necessarily state or reflect those of the United States Government or -// Lawrence Livermore National Security, LLC, and shall not be used for advertising or +// 3. Also, reference herein to any specific commercial products, process, or services by +// trade name, trademark, manufacturer or otherwise does not necessarily constitute or +// imply its endorsement, recommendation, or favoring by the United States Government or +// Lawrence Livermore National Security, LLC. The views and opinions of authors expressed +// herein do not necessarily state or reflect those of the United States Government or +// Lawrence Livermore National Security, LLC, and shall not be used for advertising or // product endorsement purposes. /** @@ -65,7 +65,7 @@ using hiop_raja_exec = ExecRajaPoliciesBackend::hiop_raja_exe using hiop_raja_reduce = ExecRajaPoliciesBackend::hiop_raja_reduce; using matrix_exec = ExecRajaPoliciesBackend::matrix_exec; using hiop_raja_atomic = ExecRajaPoliciesBackend::hiop_raja_atomic; -} +} // namespace hiop #include "hiopMatrixRajaSparseTripletImpl.hpp" @@ -73,10 +73,10 @@ namespace hiop { // -//Explicit instantiations: force compilation +// Explicit instantiations: force compilation // template class hiopMatrixRajaSparseTriplet; template class hiopMatrixRajaSparseTriplet; template class hiopMatrixRajaSymSparseTriplet; template class hiopMatrixRajaSymSparseTriplet; -} +} // namespace hiop diff --git a/src/LinAlg/hiopMatrixSparse.hpp b/src/LinAlg/hiopMatrixSparse.hpp index f5c9f11fc..ff7900e65 100644 --- a/src/LinAlg/hiopMatrixSparse.hpp +++ b/src/LinAlg/hiopMatrixSparse.hpp @@ -68,22 +68,19 @@ class hiopMatrixSparse : public hiopMatrix { public: hiopMatrixSparse(int rows, int cols, int nnz) - : nrows_(rows), - ncols_(cols), - nnz_(nnz) - { - } - virtual ~hiopMatrixSparse() - { - } + : nrows_(rows), + ncols_(cols), + nnz_(nnz) + {} + virtual ~hiopMatrixSparse() {} virtual void setToZero() = 0; virtual void setToConstant(double c) = 0; virtual void copyFrom(const hiopMatrixSparse& dm) = 0; - - /* @brief copy the nonzeros into 3 arrays, in their triplet form. - * This function is not used right now. - * + + /* @brief copy the nonzeros into 3 arrays, in their triplet form. + * This function is not used right now. + * * TODO: Unit test is missing. */ virtual void copy_to(int* irow, int* jcol, double* val) = 0; @@ -93,9 +90,9 @@ class hiopMatrixSparse : public hiopMatrix */ virtual void copy_to(hiopMatrixDense& W) = 0; - /* @brief copy `n_rows` rows from `src` into `this`, i.e., the ith row of this is copied from + /* @brief copy `n_rows` rows from `src` into `this`, i.e., the ith row of this is copied from * the rows_idx[i]_th row in `src`. - * + * * @pre This function does NOT preserve the sorted row/col indices. USE WITH CAUTION! */ virtual void copyRowsFrom(const hiopMatrix& src, const index_type* rows_idxs, size_type n_rows) = 0; @@ -113,8 +110,8 @@ class hiopMatrixSparse : public hiopMatrix virtual void timesMatTrans(double beta, hiopMatrix& W, double alpha, const hiopMatrix& X) const = 0; /** - * Adds alpha times the i-th entry of `D` to the i-th diagonal of `this`. - * + * Adds alpha times the i-th entry of `D` to the i-th diagonal of `this`. + * * @pre Sparse matrices should have the diagonal entries allocated as nonzeros. * @pre `this` is expected to be symmetric. * @pre Size of `D` should match the size(s) of `M`. @@ -126,8 +123,11 @@ class hiopMatrixSparse : public hiopMatrix /* add to the diagonal of 'this' (destination) starting at 'start_on_dest_diag' elements of * 'd_' (source) starting at index 'start_on_src_vec'. The number of elements added is 'num_elems' * when num_elems>=0, or the remaining elems on 'd_' starting at 'start_on_src_vec'. */ - virtual void addSubDiagonal(int start_on_dest_diag, const double& alpha, const hiopVector& d_, - int start_on_src_vec, int num_elems = -1) + virtual void addSubDiagonal(int start_on_dest_diag, + const double& alpha, + const hiopVector& d_, + int start_on_src_vec, + int num_elems = -1) { assert(false && "not needed / implemented"); } @@ -137,21 +137,21 @@ class hiopMatrixSparse : public hiopMatrix } /* add to the diagonal of 'this' (destination) starting at 'start_on_dest_diag' elements of - * 'd_' (source) starting at index 'start_on_src_vec'. The number of elements added is 'num_elems', scaled by 'scal' - * @pre This function does NOT preserve the sorted row/col indices. USE WITH CAUTION! - */ + * 'd_' (source) starting at index 'start_on_src_vec'. The number of elements added is 'num_elems', scaled by 'scal' + * @pre This function does NOT preserve the sorted row/col indices. USE WITH CAUTION! + */ virtual void copySubDiagonalFrom(const index_type& start_on_dest_diag, const size_type& num_elems, const hiopVector& d_, const index_type& start_on_nnz_idx, - double scal=1.0) = 0; - - /* - * @brief: add constant 'c' to the diagonal of 'this' (destination) starting at 'start_on_dest_diag' elements. - * The number of elements added is 'num_elems' - * - * @pre This function does NOT preserve the sorted row/col indices. USE WITH CAUTION! - */ + double scal = 1.0) = 0; + + /* + * @brief: add constant 'c' to the diagonal of 'this' (destination) starting at 'start_on_dest_diag' elements. + * The number of elements added is 'num_elems' + * + * @pre This function does NOT preserve the sorted row/col indices. USE WITH CAUTION! + */ virtual void setSubDiagonalTo(const index_type& start_on_dest_diag, const size_type& num_elems, const double& c, @@ -161,15 +161,13 @@ class hiopMatrixSparse : public hiopMatrix virtual void addMatrix(double alpha, const hiopMatrix& X) = 0; /* block of W += alpha*transpose(this) */ - virtual void transAddToSymDenseMatrixUpperTriangle(int row_dest_start, - int col_dest_start, - double alpha, + virtual void transAddToSymDenseMatrixUpperTriangle(int row_dest_start, + int col_dest_start, + double alpha, hiopMatrixDense& W) const = 0; - virtual void addUpperTriangleToSymDenseMatrixUpperTriangle( - int diag_start, double alpha, hiopMatrixDense& W) const = 0; + virtual void addUpperTriangleToSymDenseMatrixUpperTriangle(int diag_start, double alpha, hiopMatrixDense& W) const = 0; - virtual void addUpperTriangleToSymSparseMatrixUpperTriangle( - int diag_start, double alpha, hiopMatrixSparse& W) const + virtual void addUpperTriangleToSymSparseMatrixUpperTriangle(int diag_start, double alpha, hiopMatrixSparse& W) const { assert(false && "counterpart method of hiopMatrixSymSparse should be used"); } @@ -178,8 +176,10 @@ class hiopMatrixSparse : public hiopMatrix * * Only the upper triangular entries of W are updated. */ - virtual void addMDinvMtransToDiagBlockOfSymDeMatUTri( - int rowCol_dest_start, const double& alpha, const hiopVector& D, hiopMatrixDense& W) const = 0; + virtual void addMDinvMtransToDiagBlockOfSymDeMatUTri(int rowCol_dest_start, + const double& alpha, + const hiopVector& D, + hiopMatrixDense& W) const = 0; /* block of W += alpha * M * D^{-1} * transpose(N), where M=this * @@ -190,43 +190,44 @@ class hiopMatrixSparse : public hiopMatrix * the (strictly) lower triangular elements (these are ignored later on since only the upper * triangular part of W will be accessed) */ - virtual void addMDinvNtransToSymDeMatUTri(int row_dest_start, + virtual void addMDinvNtransToSymDeMatUTri(int row_dest_start, int col_dest_start, - const double& alpha, - const hiopVector& D, - const hiopMatrixSparse& N, + const double& alpha, + const hiopVector& D, + const hiopMatrixSparse& N, hiopMatrixDense& W) const = 0; /** - * @brief Copy 'n_rows' rows from matrix 'src_gen', started from 'rows_src_idx_st', to the rows started from 'B_rows_st' in 'this'. - * The non-zero elements start from 'dest_nnz_st' will be replaced by the new elements. + * @brief Copy 'n_rows' rows from matrix 'src_gen', started from 'rows_src_idx_st', to the rows started from 'B_rows_st' in + * 'this'. The non-zero elements start from 'dest_nnz_st' will be replaced by the new elements. * * @pre 'src_gen' must have exactly, or more than 'n_rows' rows after row 'rows_src_idx_st' * @pre 'this' must have exactly, or more than 'n_rows' rows after row 'rows_dest_idx_st' * @pre 'dest_nnz_st' + the number of non-zeros in the copied the rows must be less or equal to this->numOfNumbers() - * @pre User must know the nonzero pattern of src and dest matrices. Assume non-zero patterns of these two wont change, and 'src_gen' is a submatrix of 'this' + * @pre User must know the nonzero pattern of src and dest matrices. Assume non-zero patterns of these two wont change, and + * 'src_gen' is a submatrix of 'this' * @pre Otherwise, this function may replace the non-zero values and nonzero patterns for the undesired elements. * @pre This function does NOT preserve the sorted row/col indices. USE WITH CAUTION! */ virtual void copyRowsBlockFrom(const hiopMatrix& src_gen, - const index_type& rows_src_idx_st, + const index_type& rows_src_idx_st, const size_type& n_rows, - const index_type& rows_dest_idx_st, + const index_type& rows_dest_idx_st, const size_type& dest_nnz_st) = 0; /** - * @brief Copy matrix 'src_gen', into 'this' as a submatrix from corner 'dest_row_st' and 'dest_col_st' - * The non-zero elements start from 'dest_nnz_st' will be replaced by the new elements. - * When `offdiag_only` is set to true, only the off-diagonal part of `src_gen` is copied. - * - * @pre 'this' must have enough rows and cols after row 'dest_row_st' and col 'dest_col_st' - * @pre 'dest_nnz_st' + the number of non-zeros in the copied matrix must be less or equal to - * this->numOfNumbers() - * @pre User must know the nonzero pattern of src and dest matrices. The method assumes - * that non-zero patterns does not change between calls and that 'src_gen' is a valid - * submatrix of 'this' - * @pre This function does NOT preserve the sorted row/col indices. USE WITH CAUTION! - */ + * @brief Copy matrix 'src_gen', into 'this' as a submatrix from corner 'dest_row_st' and 'dest_col_st' + * The non-zero elements start from 'dest_nnz_st' will be replaced by the new elements. + * When `offdiag_only` is set to true, only the off-diagonal part of `src_gen` is copied. + * + * @pre 'this' must have enough rows and cols after row 'dest_row_st' and col 'dest_col_st' + * @pre 'dest_nnz_st' + the number of non-zeros in the copied matrix must be less or equal to + * this->numOfNumbers() + * @pre User must know the nonzero pattern of src and dest matrices. The method assumes + * that non-zero patterns does not change between calls and that 'src_gen' is a valid + * submatrix of 'this' + * @pre This function does NOT preserve the sorted row/col indices. USE WITH CAUTION! + */ virtual void copySubmatrixFrom(const hiopMatrix& src_gen, const index_type& dest_row_st, const index_type& dest_col_st, @@ -234,13 +235,13 @@ class hiopMatrixSparse : public hiopMatrix const bool offdiag_only = false) = 0; /** - * @brief Copy the transpose of matrix 'src_gen', into 'this' as a submatrix from corner - * 'dest_row_st' and 'dest_col_st'. - * The non-zero elements start from 'dest_nnz_st' will be replaced by the new elements. - * When `offdiag_only` is set to true, only the off-diagonal part of `src_gen` is copied. - * - * @pre This function does NOT preserve the sorted row/col indices. USE WITH CAUTION! - */ + * @brief Copy the transpose of matrix 'src_gen', into 'this' as a submatrix from corner + * 'dest_row_st' and 'dest_col_st'. + * The non-zero elements start from 'dest_nnz_st' will be replaced by the new elements. + * When `offdiag_only` is set to true, only the off-diagonal part of `src_gen` is copied. + * + * @pre This function does NOT preserve the sorted row/col indices. USE WITH CAUTION! + */ virtual void copySubmatrixFromTrans(const hiopMatrix& src_gen, const index_type& dest_row_st, const index_type& dest_col_st, @@ -248,12 +249,12 @@ class hiopMatrixSparse : public hiopMatrix const bool offdiag_only = false) = 0; /** - * @brief Copy selected columns of a diagonal matrix (a constant 'scalar' times identity), - * into 'this' as a submatrix from corner 'dest_row_st' and 'dest_col_st' - * The non-zero elements start from 'dest_nnz_st' will be replaced by the new elements. - * @pre The diagonal entries in the destination need to be contiguous in the sparse triplet arrays of the destinations. - * @pre this function does NOT preserve the sorted row/col indices. USE WITH CAUTION! - */ + * @brief Copy selected columns of a diagonal matrix (a constant 'scalar' times identity), + * into 'this' as a submatrix from corner 'dest_row_st' and 'dest_col_st' + * The non-zero elements start from 'dest_nnz_st' will be replaced by the new elements. + * @pre The diagonal entries in the destination need to be contiguous in the sparse triplet arrays of the destinations. + * @pre this function does NOT preserve the sorted row/col indices. USE WITH CAUTION! + */ virtual void setSubmatrixToConstantDiag_w_colpattern(const double& scalar, const index_type& dest_row_st, const index_type& dest_col_st, @@ -262,12 +263,12 @@ class hiopMatrixSparse : public hiopMatrix const hiopVector& ix) = 0; /** - * @brief Copy selected rows of a diagonal matrix (a constant 'scalar' times identity), - * into 'this' as a submatrix from corner 'dest_row_st' and 'dest_col_st' - * The non-zero elements start from 'dest_nnz_st' will be replaced by the new elements. - * @pre The diagonal entries in the destination need to be contiguous in the sparse triplet arrays of the destinations. - * @pre this function does NOT preserve the sorted row/col indices. USE WITH CAUTION! - */ + * @brief Copy selected rows of a diagonal matrix (a constant 'scalar' times identity), + * into 'this' as a submatrix from corner 'dest_row_st' and 'dest_col_st' + * The non-zero elements start from 'dest_nnz_st' will be replaced by the new elements. + * @pre The diagonal entries in the destination need to be contiguous in the sparse triplet arrays of the destinations. + * @pre this function does NOT preserve the sorted row/col indices. USE WITH CAUTION! + */ virtual void setSubmatrixToConstantDiag_w_rowpattern(const double& scalar, const index_type& dest_row_st, const index_type& dest_col_st, @@ -276,28 +277,28 @@ class hiopMatrixSparse : public hiopMatrix const hiopVector& ix) = 0; /** - * @brief Sets the diagonal of a subblock of `this` to a constant times identity matrix. - * - * This diagonal matrix is 'src_val'*identity matrix with size 'nnz_to_copy'x'nnz_to_copy'. - * The destination is updated from the start row 'row_dest_st' and start column 'col_dest_st'. - * At the destination, 'nnz_to_copy` nonzeros starting from index `dest_nnz_st` will be replased. - * @pre The diagonal entries in the destination need to be contiguous in the sparse triplet arrays of the destinations. - * @pre This function does NOT preserve the sorted row/col indices. USE WITH CAUTION! - */ + * @brief Sets the diagonal of a subblock of `this` to a constant times identity matrix. + * + * This diagonal matrix is 'src_val'*identity matrix with size 'nnz_to_copy'x'nnz_to_copy'. + * The destination is updated from the start row 'row_dest_st' and start column 'col_dest_st'. + * At the destination, 'nnz_to_copy` nonzeros starting from index `dest_nnz_st` will be replased. + * @pre The diagonal entries in the destination need to be contiguous in the sparse triplet arrays of the destinations. + * @pre This function does NOT preserve the sorted row/col indices. USE WITH CAUTION! + */ virtual void copyDiagMatrixToSubblock(const double& src_val, const index_type& dest_row_st, const index_type& dest_col_st, const size_type& dest_nnz_st, const size_type& nnz_to_copy) = 0; - /** - * @brief same as @copyDiagMatrixToSubblock, but copies only diagonal entries specified by `pattern`. - * At the destination, 'nnz_to_copy` nonzeros starting from index `dest_nnz_st` will be replaced. - * @pre The added entries in the destination need to be contiguous in the sparse triplet arrays of the destinations. - * @pre This function does NOT preserve the sorted row/col indices. USE WITH CAUTION! - * @pre 'pattern' has same size as `x`. - * @pre 'pattern` has exactly `nnz_to_copy` nonzeros. - */ + /** + * @brief same as @copyDiagMatrixToSubblock, but copies only diagonal entries specified by `pattern`. + * At the destination, 'nnz_to_copy` nonzeros starting from index `dest_nnz_st` will be replaced. + * @pre The added entries in the destination need to be contiguous in the sparse triplet arrays of the destinations. + * @pre This function does NOT preserve the sorted row/col indices. USE WITH CAUTION! + * @pre 'pattern' has same size as `x`. + * @pre 'pattern` has exactly `nnz_to_copy` nonzeros. + */ virtual void copyDiagMatrixToSubblock_w_pattern(const hiopVector& dx, const index_type& dest_row_st, const index_type& dest_col_st, @@ -307,42 +308,38 @@ class hiopMatrixSparse : public hiopMatrix virtual double max_abs_value() = 0; - virtual void row_max_abs_value(hiopVector &ret_vec) = 0; + virtual void row_max_abs_value(hiopVector& ret_vec) = 0; - virtual void scale_row(hiopVector &vec_scal, const bool inv_scale) = 0; + virtual void scale_row(hiopVector& vec_scal, const bool inv_scale) = 0; virtual bool isfinite() const = 0; // virtual void print(int maxRows=-1, int maxCols=-1, int rank=-1) const; - virtual void print(FILE* f = NULL, - const char* msg = NULL, - int maxRows = -1, - int maxCols = -1, - int rank = -1) const = 0; + virtual void print(FILE* f = NULL, const char* msg = NULL, int maxRows = -1, int maxCols = -1, int rank = -1) const = 0; /* extract subdiagonal from 'this' (source) and adds the entries to 'vec_dest' starting at * index 'vec_start'. If num_elems>=0, 'num_elems' are copied; otherwise copies as many as * are available in 'vec_dest' starting at 'vec_start' */ - virtual void startingAtAddSubDiagonalToStartingAt(int diag_src_start, + virtual void startingAtAddSubDiagonalToStartingAt(int diag_src_start, const double& alpha, - hiopVector& vec_dest, - int vec_start, - int num_elems=-1) const = 0; - + hiopVector& vec_dest, + int vec_start, + int num_elems = -1) const = 0; virtual hiopMatrixSparse* alloc_clone() const = 0; virtual hiopMatrixSparse* new_copy() const = 0; virtual index_type* i_row() = 0; virtual index_type* j_col() = 0; - virtual double* M() = 0; + virtual double* M() = 0; virtual const index_type* i_row() const = 0; virtual const index_type* j_col() const = 0; - virtual const double* M() const = 0; + virtual const double* M() const = 0; virtual size_type numberOfOffDiagNonzeros() const = 0; - - /// @brief build Jac for FR problem, from the base problem `Jac_c` and `Jac_d`. Set sparsity if `task`=0, otherwise set values + + /// @brief build Jac for FR problem, from the base problem `Jac_c` and `Jac_d`. Set sparsity if `task`=0, otherwise set + /// values virtual void set_Jac_FR(const hiopMatrixSparse& Jac_c, const hiopMatrixSparse& Jac_d, int* iJacS, @@ -350,40 +347,24 @@ class hiopMatrixSparse : public hiopMatrix double* MJacS) = 0; /// @brief build Hess for FR problem, from the base problem `Hess`. - virtual void set_Hess_FR(const hiopMatrixSparse& Hess, - int* iHSS, - int* jHSS, - double* MHSS, - const hiopVector& add_diag) = 0; + virtual void set_Hess_FR(const hiopMatrixSparse& Hess, int* iHSS, int* jHSS, double* MHSS, const hiopVector& add_diag) = 0; - inline size_type m() const - { - return nrows_; - } - inline size_type n() const - { - return ncols_; - } - inline size_type numberOfNonzeros() const - { - return nnz_; - } + inline size_type m() const { return nrows_; } + inline size_type n() const { return ncols_; } + inline size_type numberOfNonzeros() const { return nnz_; } virtual bool is_diagonal() const = 0; virtual void extract_diagonal(hiopVector& diag_out) const = 0; #ifdef HIOP_DEEPCHECKS - virtual bool assertSymmetry(double tol = 1e-16) const - { - return false; - } + virtual bool assertSymmetry(double tol = 1e-16) const { return false; } virtual bool checkIndexesAreOrdered() const = 0; #endif protected: - size_type nrows_; ///< number of rows - size_type ncols_; ///< number of columns - size_type nnz_; ///< number of nonzero entries + size_type nrows_; ///< number of rows + size_type ncols_; ///< number of columns + size_type nnz_; ///< number of nonzero entries }; -} // namespace hiop +} // namespace hiop diff --git a/src/LinAlg/hiopMatrixSparseCSR.hpp b/src/LinAlg/hiopMatrixSparseCSR.hpp index e13f623cb..059e45988 100644 --- a/src/LinAlg/hiopMatrixSparseCSR.hpp +++ b/src/LinAlg/hiopMatrixSparseCSR.hpp @@ -72,18 +72,14 @@ class hiopMatrixSparseCSR : public hiopMatrixSparse { public: hiopMatrixSparseCSR(int num_rows, int num_cols, int nnz) - : hiopMatrixSparse(num_rows, num_cols, nnz) - { - } - + : hiopMatrixSparse(num_rows, num_cols, nnz) + {} + hiopMatrixSparseCSR() - : hiopMatrixSparseCSR(0, 0, 0) - { - } - - virtual ~hiopMatrixSparseCSR() - { - } + : hiopMatrixSparseCSR(0, 0, 0) + {} + + virtual ~hiopMatrixSparseCSR() {} ///////////////////////////////////////////////////////////////////// // Below are CSR-specific methods (addition to hiopMatrixSparse) @@ -103,59 +99,59 @@ class hiopMatrixSparseCSR : public hiopMatrixSparse * @pre `this` is expected to store the diagonal entries as nonzero elements. */ virtual void set_diagonal(const double& val) = 0; - + /** - * Allocates a CSR matrix capable of storing the multiplication result of M = X*Y, where X + * Allocates a CSR matrix capable of storing the multiplication result of M = X*Y, where X * is the calling matrix class (`this`) and Y is the `Y` argument of the method. * * @note Should be used in conjunction with `times_mat_symbolic` and `times_mat_numeric` - * + * * @pre The dimensions of the matrices should be consistent with the multiplication. - * + * */ virtual hiopMatrixSparseCSR* times_mat_alloc(const hiopMatrixSparseCSR& Y) const = 0; - + /** * Computes sparsity pattern, meaning computes row pointers and column indexes of `M`, - * of M = X*Y, where X is the calling matrix class (`this`) and Y is the second argument. + * of M = X*Y, where X is the calling matrix class (`this`) and Y is the second argument. * * @note The output matrix `M` will have unique and ordered column indexes (with the same * row) * * @note Specializations of this class may only be able to compute the sparsity pattern in - * tandem with the numerical multiplications (for example, because of API limitations). - * In this cases, the `times_mat_numeric` will take over sparsity computations and the + * tandem with the numerical multiplications (for example, because of API limitations). + * In this cases, the `times_mat_numeric` will take over sparsity computations and the * arrays with row pointers and column indexes may be uninitialized after this call. - * + * * @pre The dimensions of the matrices should be consistent with the multiplication. - * + * * @pre The column indexes within the same row must be unique and ordered for `Y`. - * - * @pre The internal arrays of `M` should have enough storage to hold the sparsity - * pattern (row pointers and column indexes) and values of the multiplication result. + * + * @pre The internal arrays of `M` should have enough storage to hold the sparsity + * pattern (row pointers and column indexes) and values of the multiplication result. * This preallocation can be done by calling `times_mat_alloc` prior to this method. - * + * */ - virtual void times_mat_symbolic(hiopMatrixSparseCSR& M, const hiopMatrixSparseCSR& Y) const = 0; + virtual void times_mat_symbolic(hiopMatrixSparseCSR& M, const hiopMatrixSparseCSR& Y) const = 0; /** * Computes (numerical values of) M = beta*M + alpha*X*D*Y, where X is the calling matrix * class (`this`), beta and alpha are scalars passed as arguments, and M and Y are matrices * of appropriate sizes passed as arguments. * - * @note Generally, only the nonzero values of the input/output argument `M` are updated + * @note Generally, only the nonzero values of the input/output argument `M` are updated * since the sparsity pattern (row pointers and column indexes) of `M` should have been * already computed by `times_mat_symbolic`. Some specializations of this method may be - * restricted to performing both phases in inside this method. + * restricted to performing both phases in inside this method. * * @pre The dimensions of the matrices should be consistent with the multiplication. * * @pre The column indexes within the same row must be unique and ordered both for input * matrices and result matrix `M`. * - * @pre The indexes arrays of `this`, `Y`, and `M` should not have changed since the + * @pre The indexes arrays of `this`, `Y`, and `M` should not have changed since the * last call to `times_diag_times_mat`. - * + * * Example of usage: * //initially allocate and compute M * auto* M = X.times_mat_alloc(Y); @@ -164,12 +160,9 @@ class hiopMatrixSparseCSR : public hiopMatrixSparse * ... calculations .... * //if only nonzero entries of X and Y have changed, call the fast multiplication routine * X.times_mat_numeric(0.0, M, 1.0, Y); - * + * */ - virtual void times_mat_numeric(double beta, - hiopMatrixSparseCSR& M, - double alpha, - const hiopMatrixSparseCSR& Y) = 0; + virtual void times_mat_numeric(double beta, hiopMatrixSparseCSR& M, double alpha, const hiopMatrixSparseCSR& Y) = 0; /// @brief Column scaling or right multiplication by a diagonal: `this`=`this`*D virtual void scale_cols(const hiopVector& D) = 0; @@ -177,11 +170,10 @@ class hiopMatrixSparseCSR : public hiopMatrixSparse /// @brief Row scaling or left multiplication by a diagonal: `this`=D*`this` virtual void scale_rows(const hiopVector& D) = 0; - /** - * Allocates and populates the sparsity pattern of `this` as the CSR representation + * Allocates and populates the sparsity pattern of `this` as the CSR representation * of the triplet matrix `M`. - * + * * @pre The input argument should have the nonzeros sorted by row and then by column * indexes. */ @@ -190,80 +182,79 @@ class hiopMatrixSparseCSR : public hiopMatrixSparse /** * Copies the numerical values of the triplet matrix M into the CSR matrix `this` * - * @pre The sparsity pattern (row pointers and column indexes arrays) of `this` should be + * @pre The sparsity pattern (row pointers and column indexes arrays) of `this` should be * allocated and populated, possibly by a previous call to `form_from_symbolic` * * @pre The input argument should have the nonzeros sorted by row and then by column * indexes. */ virtual void form_from_numeric(const hiopMatrixSparseTriplet& M) = 0; - + /** - * Allocates and populates the sparsity pattern of `this` as the CSR representation + * Allocates and populates the sparsity pattern of `this` as the CSR representation * of transpose of the triplet matrix `M`. - * + * * @pre The input argument should have the nonzeros sorted by row and then by column * indexes. */ virtual void form_transpose_from_symbolic(const hiopMatrixSparseTriplet& M) = 0; - + /** - * Copies the numerical values of the transpose of the triplet matrix M into the + * Copies the numerical values of the transpose of the triplet matrix M into the * CSR matrix `this` * - * @pre The sparsity pattern (row pointers and column indexes arrays) of `this` should be + * @pre The sparsity pattern (row pointers and column indexes arrays) of `this` should be * allocated and populated, possibly by a previous call to `form_transpose_from_symbolic` * * @pre The input argument should have the nonzeros sorted by row and then by column * indexes. - */ + */ virtual void form_transpose_from_numeric(const hiopMatrixSparseTriplet& M) = 0; /** - * Allocates and populates the sparsity pattern of `this` as the CSR representation + * Allocates and populates the sparsity pattern of `this` as the CSR representation * of transpose of the CSR matrix `M`. - * + * * @pre The input argument should have the column indexes sorted and unique within a row. */ virtual void form_transpose_from_symbolic(const hiopMatrixSparseCSR& M) = 0; - + /** * Copies the numerical values of the transpose of the CSR matrix M into the CSR matrix `this`. * - * @pre The sparsity pattern (row pointers and column indexes arrays) of `this` should be + * @pre The sparsity pattern (row pointers and column indexes arrays) of `this` should be * allocated and populated, possibly by a previous call to `form_transpose_from_symbolic` * * @pre The input argument should have the column indexes sorted and unique within a row. - */ + */ virtual void form_transpose_from_numeric(const hiopMatrixSparseCSR& M) = 0; - /** * (Re)Initializes `this` to a diagonal matrix with diagonal entries given by D. */ virtual void form_diag_from_symbolic(const hiopVector& D) = 0; - + /** * Sets the diagonal entries of `this` equal to entries of D - * + * * @pre Length of `D` should be equal to size(s) of `this` - * + * * @pre `this` should be a diagonal matrix (in CSR format) with storage for * all the diagonal entries, which can be ensured by calling the sister method * `form_diag_from_symbolic` */ virtual void form_diag_from_numeric(const hiopVector& D) = 0; - + /** - * Allocates and returns CSR matrix `M` capable of holding M = X+Y, where X is + * Allocates and returns CSR matrix `M` capable of holding M = X+Y, where X is * the calling matrix class (`this`) and Y is the argument passed to the method. */ virtual hiopMatrixSparseCSR* add_matrix_alloc(const hiopMatrixSparseCSR& Y) const = 0; /** - * Computes sparsity pattern of M = X+Y (i.e., populates the row pointers and + * Computes sparsity pattern of M = X+Y (i.e., populates the row pointers and * column indexes arrays) of `M`. `X` is `this`. - * + * * @pre `this` and `Y` should hold matrices of identical dimensions. * */ @@ -272,38 +263,33 @@ class hiopMatrixSparseCSR : public hiopMatrixSparse /** * Performs matrix addition M = alpha*X + beta*Y numerically, where * X is `this` and alpha and beta are scalars. - * + * * @pre `M`, `this` and `Y` should hold matrices of identical dimensions. - * - * @pre `M` and `X+Y` should have identical sparsity pattern, namely the + * + * @pre `M` and `X+Y` should have identical sparsity pattern, namely the * `add_matrix_symbolic` should have been called previously. * */ - virtual void add_matrix_numeric(hiopMatrixSparseCSR& M, - double alpha, - const hiopMatrixSparseCSR& Y, - double beta) const = 0; + virtual void add_matrix_numeric(hiopMatrixSparseCSR& M, double alpha, const hiopMatrixSparseCSR& Y, double beta) const = 0; - /** Performs a quick check and returns false if the CSR indexes are not ordered. - * + /** Performs a quick check and returns false if the CSR indexes are not ordered. + * * Should be used with caution, for example only under HIOP_DEEPCHECKS or for debugging purposes - * because it is a computationally intensive method for GPU implementations as transfers the - * matrix data from device to host. + * because it is a computationally intensive method for GPU implementations as transfers the + * matrix data from device to host. */ virtual bool check_csr_is_ordered() = 0; ///////////////////////////////////////////////////////////////////// // end of new CSR-specific methods ///////////////////////////////////////////////////////////////////// - protected: //// inherits nrows_, ncols_, and nnz_ from parent hiopSparseMatrix - + private: hiopMatrixSparseCSR(const hiopMatrixSparseCSR&) = delete; }; - -} //end of namespace +} // namespace hiop #endif diff --git a/src/LinAlg/hiopMatrixSparseCSRSeq.cpp b/src/LinAlg/hiopMatrixSparseCSRSeq.cpp index e9d4373f6..cafdb9688 100644 --- a/src/LinAlg/hiopMatrixSparseCSRSeq.cpp +++ b/src/LinAlg/hiopMatrixSparseCSRSeq.cpp @@ -57,15 +57,15 @@ #include "hiop_blasdefs.hpp" -#include //for std::min -#include //for std::isfinite +#include //for std::min +#include //for std::isfinite #include #include #include #include #include -#include // std::cout, std::fixed -#include // std::setprecision +#include // std::cout, std::fixed +#include // std::setprecision #include "MemBackendCppImpl.hpp" #include "hiopCppStdUtils.hpp" @@ -76,15 +76,15 @@ namespace hiop { hiopMatrixSparseCSRSeq::hiopMatrixSparseCSRSeq(size_type rows, size_type cols, size_type nnz) - : hiopMatrixSparseCSR(rows, cols, nnz), - irowptr_(nullptr), - jcolind_(nullptr), - values_(nullptr), - buf_col_(nullptr), - row_starts_(nullptr) + : hiopMatrixSparseCSR(rows, cols, nnz), + irowptr_(nullptr), + jcolind_(nullptr), + values_(nullptr), + buf_col_(nullptr), + row_starts_(nullptr) { - if(rows==0 || cols==0) { - assert(nnz_==0 && "number of nonzeros must be zero when any of the dimensions are 0"); + if(rows == 0 || cols == 0) { + assert(nnz_ == 0 && "number of nonzeros must be zero when any of the dimensions are 0"); nnz_ = 0; } else { alloc(); @@ -92,20 +92,15 @@ hiopMatrixSparseCSRSeq::hiopMatrixSparseCSRSeq(size_type rows, size_type cols, s } hiopMatrixSparseCSRSeq::hiopMatrixSparseCSRSeq() - : hiopMatrixSparseCSR(0, 0, 0), - irowptr_(nullptr), - jcolind_(nullptr), - values_(nullptr), - buf_col_(nullptr), - row_starts_(nullptr) -{ -} + : hiopMatrixSparseCSR(0, 0, 0), + irowptr_(nullptr), + jcolind_(nullptr), + values_(nullptr), + buf_col_(nullptr), + row_starts_(nullptr) +{} - -hiopMatrixSparseCSRSeq::~hiopMatrixSparseCSRSeq() -{ - dealloc(); -} +hiopMatrixSparseCSRSeq::~hiopMatrixSparseCSRSeq() { dealloc(); } void hiopMatrixSparseCSRSeq::alloc() { @@ -113,17 +108,16 @@ void hiopMatrixSparseCSRSeq::alloc() assert(jcolind_ == nullptr); assert(values_ == nullptr); - irowptr_ = new index_type[nrows_+1]; + irowptr_ = new index_type[nrows_ + 1]; jcolind_ = new index_type[nnz_]; values_ = new double[nnz_]; assert(buf_col_ == nullptr); - //buf_col_ remains null since it is allocated on demand + // buf_col_ remains null since it is allocated on demand assert(row_starts_ == nullptr); - //row_starts_ remains null since it is allocated on demand + // row_starts_ remains null since it is allocated on demand } - void hiopMatrixSparseCSRSeq::dealloc() { delete[] row_starts_; @@ -137,25 +131,22 @@ void hiopMatrixSparseCSRSeq::dealloc() jcolind_ = nullptr; values_ = nullptr; } - + void hiopMatrixSparseCSRSeq::setToZero() { - for(index_type i=0; i=0 && start_on_dest_diag+num_elems<=this->nrows_); + assert(start_on_dest_diag >= 0 && start_on_dest_diag + num_elems <= this->nrows_); assert(false && "not implemented"); } -void hiopMatrixSparseCSRSeq::addMatrix(double alpha, const hiopMatrix& X) -{ - assert(false && "not needed"); -} +void hiopMatrixSparseCSRSeq::addMatrix(double alpha, const hiopMatrix& X) { assert(false && "not needed"); } /* block of W += alpha*transpose(this) * Note W; contains only the upper triangular entries */ -void hiopMatrixSparseCSRSeq:: -transAddToSymDenseMatrixUpperTriangle(index_type row_start, - index_type col_start, - double alpha, - hiopMatrixDense& W) const +void hiopMatrixSparseCSRSeq::transAddToSymDenseMatrixUpperTriangle(index_type row_start, + index_type col_start, + double alpha, + hiopMatrixDense& W) const { - assert(row_start>=0 && row_start+ncols_<=W.m()); - assert(col_start>=0 && col_start+nrows_<=W.n()); - assert(W.n()==W.m()); + assert(row_start >= 0 && row_start + ncols_ <= W.m()); + assert(col_start >= 0 && col_start + nrows_ <= W.n()); + assert(W.n() == W.m()); assert(false && "not yet implemented"); } double hiopMatrixSparseCSRSeq::max_abs_value() { - char norm='M'; size_type one=1; + char norm = 'M'; + size_type one = 1; double maxv = DLANGE(&norm, &one, &nnz_, values_, &one, nullptr); return maxv; } -void hiopMatrixSparseCSRSeq::row_max_abs_value(hiopVector &ret_vec) +void hiopMatrixSparseCSRSeq::row_max_abs_value(hiopVector& ret_vec) { assert(ret_vec.get_local_size() == nrows_); @@ -321,7 +291,7 @@ void hiopMatrixSparseCSRSeq::row_max_abs_value(hiopVector &ret_vec) assert(false && "not yet implemented"); } -void hiopMatrixSparseCSRSeq::scale_row(hiopVector &vec_scal, const bool inv_scale) +void hiopMatrixSparseCSRSeq::scale_row(hiopVector& vec_scal, const bool inv_scale) { assert(vec_scal.get_local_size() == nrows_); @@ -330,32 +300,29 @@ void hiopMatrixSparseCSRSeq::scale_row(hiopVector &vec_scal, const bool inv_scal bool hiopMatrixSparseCSRSeq::isfinite() const { - for(index_type i=0; iirowptr_, irowptr_, (nrows_+1)*sizeof(index_type)); - memcpy(copy->jcolind_, jcolind_, nnz_*sizeof(index_type)); - memcpy(copy->values_, values_, nnz_*sizeof(double)); + memcpy(copy->irowptr_, irowptr_, (nrows_ + 1) * sizeof(index_type)); + memcpy(copy->jcolind_, jcolind_, nnz_ * sizeof(index_type)); + memcpy(copy->values_, values_, nnz_ * sizeof(double)); return copy; } void hiopMatrixSparseCSRSeq::copyFrom(const hiopMatrixSparse& dm) { - assert(nnz_==dm.numberOfNonzeros() && nrows_==dm.m()); + assert(nnz_ == dm.numberOfNonzeros() && nrows_ == dm.m()); const hiopMatrixSparseCSRSeq& src = dynamic_cast(dm); - - memcpy(irowptr_, src.irowptr_, (nrows_+1)*sizeof(index_type)); - memcpy(jcolind_, src.jcolind_, nnz_*sizeof(index_type)); - memcpy(values_, src.values_, nnz_*sizeof(double)); + + memcpy(irowptr_, src.irowptr_, (nrows_ + 1) * sizeof(index_type)); + memcpy(jcolind_, src.jcolind_, nnz_ * sizeof(index_type)); + memcpy(values_, src.values_, nnz_ * sizeof(double)); } /// @brief copy to 3 arrays. @@ -363,9 +330,9 @@ void hiopMatrixSparseCSRSeq::copyFrom(const hiopMatrixSparse& dm) void hiopMatrixSparseCSRSeq::copy_to(index_type* irow, index_type* jcol, double* val) { assert(irow && jcol && val); - memcpy(irow, irowptr_, (1+nrows_)*sizeof(index_type)); - memcpy(jcol, jcolind_, nnz_*sizeof(index_type)); - memcpy(val, values_, nnz_*sizeof(double)); + memcpy(irow, irowptr_, (1 + nrows_) * sizeof(index_type)); + memcpy(jcol, jcolind_, nnz_ * sizeof(index_type)); + memcpy(val, values_, nnz_ * sizeof(double)); } void hiopMatrixSparseCSRSeq::copy_to(hiopMatrixDense& W) @@ -375,10 +342,10 @@ void hiopMatrixSparseCSRSeq::copy_to(hiopMatrixDense& W) assert(W.n() == ncols_); } -void hiopMatrixSparseCSRSeq:: -addMDinvMtransToDiagBlockOfSymDeMatUTri(index_type rowAndCol_dest_start, - const double& alpha, - const hiopVector& D, hiopMatrixDense& W) const +void hiopMatrixSparseCSRSeq::addMDinvMtransToDiagBlockOfSymDeMatUTri(index_type rowAndCol_dest_start, + const double& alpha, + const hiopVector& D, + hiopMatrixDense& W) const { assert(false && "not needed"); } @@ -387,20 +354,17 @@ addMDinvMtransToDiagBlockOfSymDeMatUTri(index_type rowAndCol_dest_start, * block of W += alpha * M1 * D^{-1} * transpose(M2), where M1=this * Sizes: M1 is (m1 x nx); D is vector of len nx, M2 is (m2, nx) */ -void hiopMatrixSparseCSRSeq:: -addMDinvNtransToSymDeMatUTri(index_type row_dest_start, - index_type col_dest_start, - const double& alpha, - const hiopVector& D, - const hiopMatrixSparse& M2mat, - hiopMatrixDense& W) const +void hiopMatrixSparseCSRSeq::addMDinvNtransToSymDeMatUTri(index_type row_dest_start, + index_type col_dest_start, + const double& alpha, + const hiopVector& D, + const hiopMatrixSparse& M2mat, + hiopMatrixDense& W) const { assert(false && "not needed"); } -void hiopMatrixSparseCSRSeq::copyRowsFrom(const hiopMatrix& src_gen, - const index_type* rows_idxs, - size_type n_rows) +void hiopMatrixSparseCSRSeq::copyRowsFrom(const hiopMatrix& src_gen, const index_type* rows_idxs, size_type n_rows) { const hiopMatrixSparseCSRSeq& src = dynamic_cast(src_gen); assert(this->m() == n_rows); @@ -419,8 +383,10 @@ void hiopMatrixSparseCSRSeq::copyRowsFrom(const hiopMatrix& src_gen, * @pre 'this' must have exactly, or more cols than 'src' */ void hiopMatrixSparseCSRSeq::copyRowsBlockFrom(const hiopMatrix& src_gen, - const index_type& rows_src_idx_st, const size_type& n_rows, - const index_type& rows_dest_idx_st, const size_type& dest_nnz_st) + const index_type& rows_src_idx_st, + const size_type& n_rows, + const index_type& rows_dest_idx_st, + const size_type& dest_nnz_st) { const hiopMatrixSparseCSRSeq& src = dynamic_cast(src_gen); assert(this->numberOfNonzeros() >= src.numberOfNonzeros()); @@ -442,7 +408,7 @@ void hiopMatrixSparseCSRSeq::copySubmatrixFrom(const hiopMatrix& src_gen, auto n_cols = src.n(); assert(this->numberOfNonzeros() >= src.numberOfNonzeros()); - assert(n_cols + dest_col_st <= this->n() ); + assert(n_cols + dest_col_st <= this->n()); assert(m_rows + dest_row_st <= this->m()); assert(dest_nnz_st <= this->numberOfNonzeros()); @@ -460,51 +426,48 @@ void hiopMatrixSparseCSRSeq::copySubmatrixFromTrans(const hiopMatrix& src_gen, auto n_cols = src.m(); assert(this->numberOfNonzeros() >= src.numberOfNonzeros()); - assert(n_cols + dest_col_st <= this->n() ); + assert(n_cols + dest_col_st <= this->n()); assert(m_rows + dest_row_st <= this->m()); assert(dest_nnz_st <= this->numberOfNonzeros()); assert(false && "not yet implemented"); } -void hiopMatrixSparseCSRSeq:: -setSubmatrixToConstantDiag_w_colpattern(const double& scalar, - const index_type& dest_row_st, - const index_type& dest_col_st, - const size_type& dest_nnz_st, - const size_type& nnz_to_copy, - const hiopVector& ix) +void hiopMatrixSparseCSRSeq::setSubmatrixToConstantDiag_w_colpattern(const double& scalar, + const index_type& dest_row_st, + const index_type& dest_col_st, + const size_type& dest_nnz_st, + const size_type& nnz_to_copy, + const hiopVector& ix) { assert(ix.get_local_size() + dest_row_st <= this->m()); - assert(nnz_to_copy + dest_col_st <= this->n() ); + assert(nnz_to_copy + dest_col_st <= this->n()); assert(dest_nnz_st + nnz_to_copy <= this->numberOfNonzeros()); - + assert(false && "not yet implemented"); } -void hiopMatrixSparseCSRSeq:: -setSubmatrixToConstantDiag_w_rowpattern(const double& scalar, - const index_type& dest_row_st, - const index_type& dest_col_st, - const size_type& dest_nnz_st, - const size_type& nnz_to_copy, - const hiopVector& ix) +void hiopMatrixSparseCSRSeq::setSubmatrixToConstantDiag_w_rowpattern(const double& scalar, + const index_type& dest_row_st, + const index_type& dest_col_st, + const size_type& dest_nnz_st, + const size_type& nnz_to_copy, + const hiopVector& ix) { assert(nnz_to_copy + dest_row_st <= this->m()); - assert(ix.get_local_size() + dest_col_st <= this->n() ); + assert(ix.get_local_size() + dest_col_st <= this->n()); assert(dest_nnz_st + nnz_to_copy <= this->numberOfNonzeros()); - + assert(false && "not yet implemented"); } -void hiopMatrixSparseCSRSeq:: -copyDiagMatrixToSubblock(const double& src_val, - const index_type& dest_row_st, - const index_type& col_dest_st, - const size_type& dest_nnz_st, - const size_type &nnz_to_copy) +void hiopMatrixSparseCSRSeq::copyDiagMatrixToSubblock(const double& src_val, + const index_type& dest_row_st, + const index_type& col_dest_st, + const size_type& dest_nnz_st, + const size_type& nnz_to_copy) { - assert(this->numberOfNonzeros() >= nnz_to_copy+dest_nnz_st); + assert(this->numberOfNonzeros() >= nnz_to_copy + dest_nnz_st); assert(this->n() >= nnz_to_copy); assert(nnz_to_copy + dest_row_st <= this->m()); assert(nnz_to_copy + col_dest_st <= this->n()); @@ -512,15 +475,14 @@ copyDiagMatrixToSubblock(const double& src_val, assert(false && "not yet implemented"); } -void hiopMatrixSparseCSRSeq:: -copyDiagMatrixToSubblock_w_pattern(const hiopVector& dx, - const index_type& dest_row_st, - const index_type& dest_col_st, - const size_type& dest_nnz_st, - const size_type &nnz_to_copy, - const hiopVector& ix) +void hiopMatrixSparseCSRSeq::copyDiagMatrixToSubblock_w_pattern(const hiopVector& dx, + const index_type& dest_row_st, + const index_type& dest_col_st, + const size_type& dest_nnz_st, + const size_type& nnz_to_copy, + const hiopVector& ix) { - assert(this->numberOfNonzeros() >= nnz_to_copy+dest_nnz_st); + assert(this->numberOfNonzeros() >= nnz_to_copy + dest_nnz_st); assert(this->n() >= nnz_to_copy); assert(nnz_to_copy + dest_row_st <= this->m()); assert(nnz_to_copy + dest_col_st <= this->n()); @@ -528,65 +490,65 @@ copyDiagMatrixToSubblock_w_pattern(const hiopVector& dx, assert(false && "not yet implemented"); } -void hiopMatrixSparseCSRSeq::print(FILE* file, const char* msg/*=nullptr*/, - int maxRows/*=-1*/, int maxCols/*=-1*/, - int rank/*=-1*/) const +void hiopMatrixSparseCSRSeq::print(FILE* file, + const char* msg /*=nullptr*/, + int maxRows /*=-1*/, + int maxCols /*=-1*/, + int rank /*=-1*/) const { - int myrank_=0, numranks=1; //this is a local object => always print + int myrank_ = 0, numranks = 1; // this is a local object => always print - if(file==nullptr) file = stdout; + if(file == nullptr) file = stdout; - int max_elems = maxRows>=0 ? maxRows : nnz_; + int max_elems = maxRows >= 0 ? maxRows : nnz_; max_elems = std::min(max_elems, nnz_); - - if(myrank_==rank || rank==-1) { + + if(myrank_ == rank || rank == -1) { std::stringstream ss; - if(nullptr==msg) { - if(numranks>1) { - ss << "CSR matrix of size " << m() << " " << n() << " and nonzeros " - << numberOfNonzeros() << ", printing " << max_elems << " elems (on rank=" - << myrank_ << ")" << std::endl; + if(nullptr == msg) { + if(numranks > 1) { + ss << "CSR matrix of size " << m() << " " << n() << " and nonzeros " << numberOfNonzeros() << ", printing " + << max_elems << " elems (on rank=" << myrank_ << ")" << std::endl; } else { - ss << "CSR matrix of size " << m() << " " << n() << " and nonzeros " - << numberOfNonzeros() << ", printing " << max_elems << " elems" << std::endl; + ss << "CSR matrix of size " << m() << " " << n() << " and nonzeros " << numberOfNonzeros() << ", printing " + << max_elems << " elems" << std::endl; } } else { ss << msg << " "; } // using matlab indices (starting at 1) - //fprintf(file, "iRow_=["); + // fprintf(file, "iRow_=["); ss << "iRow_=["; - for(index_type i=0; i computes nnz in M and allocates M -//By convention, M is mxn, X is mxK and Y is Kxn +// M = X*D*Y -> computes nnz in M and allocates M +// By convention, M is mxn, X is mxK and Y is Kxn hiopMatrixSparseCSR* hiopMatrixSparseCSRSeq::times_mat_alloc(const hiopMatrixSparseCSR& Y) const { const index_type* irowptrY = Y.i_row(); @@ -600,129 +562,127 @@ hiopMatrixSparseCSR* hiopMatrixSparseCSRSeq::times_mat_alloc(const hiopMatrixSpa const index_type K = this->n(); assert(Y.m() == K); - + index_type nnzM = 0; - // count the number of entries in the result M + // count the number of entries in the result M char* flag = new char[n]; - - for(int i=0; i=0); //overflow?!? + assert(nnzM >= 0); // overflow?!? delete[] flag; - //allocate result M + // allocate result M return new hiopMatrixSparseCSRSeq(m, n, nnzM); -} +} /** - * M = X*D*Y -> computes nnz in M and allocates M + * M = X*D*Y -> computes nnz in M and allocates M * By convention, M is mxn, X is mxK, Y is Kxn, and D is size K. - * + * * The algorithm uses the fact that the sparsity pattern of the i-th row of M is * K * M_{i*} = sum x_{ik} Y_{k*} (see Tim Davis book p.17) * k=1 * Therefore, to get sparsity pattern of the i-th row of M: * 1. we k-iterate over nonzeros (i,k) in the i-th row of X - * 2. for each such k we j-iterate over the nonzeros (k,j) in the k-th row of Y and - * 3. count (i,j) as nonzero of M + * 2. for each such k we j-iterate over the nonzeros (k,j) in the k-th row of Y and + * 3. count (i,j) as nonzero of M */ -void hiopMatrixSparseCSRSeq::times_mat_symbolic(hiopMatrixSparseCSR& M_in, - const hiopMatrixSparseCSR& Y_in) const +void hiopMatrixSparseCSRSeq::times_mat_symbolic(hiopMatrixSparseCSR& M_in, const hiopMatrixSparseCSR& Y_in) const { auto& M = dynamic_cast(M_in); auto& Y = dynamic_cast(Y_in); const index_type* irowptrY = Y.i_row(); const index_type* jcolindY = Y.j_col(); - + const index_type* irowptrX = irowptr_; const index_type* jcolindX = jcolind_; index_type* irowptrM = M.i_row(); index_type* jcolindM = M.j_col(); - + const index_type m = this->m(); const index_type n = Y.n(); - assert(M.m()==m && M.n()==n); - + assert(M.m() == m && M.n() == n); + const index_type K = this->n(); assert(Y.m() == K); - - //if(nullptr == M.buf_col_) { - // M.buf_col_ = new double[n]; - //} - //double* W = M.buf_col_; - - //char* flag=new char[n]; - - //for(int it=0; it j_idxs; - - int nnzM=0; - for(int i=0; i::iterator it=j_idxs.begin(); it!=j_idxs.end(); ++it) { - assert(nnzM::iterator it = j_idxs.begin(); it != j_idxs.end(); ++it) { + assert(nnzM < M.numberOfNonzeros()); jcolindM[nnzM++] = *it; } } irowptrM[m] = nnzM; - //delete[] flag; + // delete[] flag; } void hiopMatrixSparseCSRSeq::times_mat_numeric(double beta, @@ -735,7 +695,7 @@ void hiopMatrixSparseCSRSeq::times_mat_numeric(double beta, const index_type* irowptrY = Y.i_row(); const index_type* jcolindY = Y.j_col(); const double* valuesY = Y.M(); - + const index_type* irowptrX = irowptr_; const index_type* jcolindX = jcolind_; const double* valuesX = values_; @@ -743,78 +703,77 @@ void hiopMatrixSparseCSRSeq::times_mat_numeric(double beta, index_type* irowptrM = M.i_row(); index_type* jcolindM = M.j_col(); double* valuesM = M.M(); - + const index_type m = this->m(); const index_type n = Y.n(); - assert(M.m()==m && M.n()==n); - + assert(M.m() == m && M.n() == n); + const index_type K = this->n(); assert(Y.m() == K); - if(beta!=1.0) { + if(beta != 1.0) { int NN = M.numberOfNonzeros(); - if(beta==0.0) { - //just in case M comes uninitialized - for(index_type i=0; i=0); - if(nnz_<=0) { + assert(nnz_ >= 0); + if(nnz_ <= 0) { return; } - + assert(irowptr_); assert(jcolind_); assert(values_); @@ -834,22 +793,22 @@ void hiopMatrixSparseCSRSeq::form_from_symbolic(const hiopMatrixSparseTriplet& M const index_type* Mirow = M.i_row(); const index_type* Mjcol = M.j_col(); - //storage the row count + // storage the row count std::vector w(nrows_, 0); - - for(int it=0; it0) { - assert(Mirow[it] >= Mirow[it-1] && "row indexes of the triplet format are not ordered."); - if(Mirow[it] == Mirow[it-1]) { - assert(Mjcol[it] > Mjcol[it-1] && "col indexes of the triplet format are not ordered or unique."); + if(it > 0) { + assert(Mirow[it] >= Mirow[it - 1] && "row indexes of the triplet format are not ordered."); + if(Mirow[it] == Mirow[it - 1]) { + assert(Mjcol[it] > Mjcol[it - 1] && "col indexes of the triplet format are not ordered or unique."); } } #endif - assert(row_idx=0); - assert(Mjcol[it]=0); + assert(row_idx < nrows_ && row_idx >= 0); + assert(Mjcol[it] < ncols_ && Mjcol[it] >= 0); w[row_idx]++; @@ -857,8 +816,8 @@ void hiopMatrixSparseCSRSeq::form_from_symbolic(const hiopMatrixSparseTriplet& M } irowptr_[0] = 0; - for(int i=0; i=0); - if(nnz_<=0) { + assert(nnz_ >= 0); + if(nnz_ <= 0) { return; } - + assert(irowptr_); assert(jcolind_); assert(values_); @@ -901,61 +860,60 @@ void hiopMatrixSparseCSRSeq::form_transpose_from_symbolic(const hiopMatrixSparse assert(nullptr == row_starts_); row_starts_ = new index_type[nrows_]; - //in this method we use the row_starts_ as working buffer to count nz on each row of `this` - //at the end of this method row_starts_ keeps row starts, used by the numeric method to - //speed up computations + // in this method we use the row_starts_ as working buffer to count nz on each row of `this` + // at the end of this method row_starts_ keeps row starts, used by the numeric method to + // speed up computations { index_type* w = row_starts_; - + // initialize nz per row to zero - for(index_type i=0; i=1; --i) { - row_starts_[i] = row_starts_[i-1]; + // rollback row_starts_ + for(int i = nrows_ - 1; i >= 1; --i) { + row_starts_[i] = row_starts_[i - 1]; } - row_starts_[0]=0; + row_starts_[0] = 0; #ifndef NDEBUG - for(int i=0; i jcolind_[itnz-1] && - "something wrong: col indexes not sorted or not unique"); + for(int i = 0; i < nrows_; i++) { + for(int itnz = irowptr_[i] + 1; itnz < irowptr_[i + 1]; ++itnz) { + assert(jcolind_[itnz] > jcolind_[itnz - 1] && "something wrong: col indexes not sorted or not unique"); } } #endif @@ -967,49 +925,48 @@ void hiopMatrixSparseCSRSeq::form_transpose_from_numeric(const hiopMatrixSparseT assert(nrows_ == M.n()); assert(ncols_ == M.m()); assert(nnz_ == M.numberOfNonzeros()); - + #ifndef NDEBUG - for(int i=0; i jcolind_[itnz-1] && - "something wrong: col indexes not sorted or not unique"); + for(int i = 0; i < nrows_; i++) { + for(int itnz = irowptr_[i] + 1; itnz < irowptr_[i + 1]; ++itnz) { + assert(jcolind_[itnz] > jcolind_[itnz - 1] && "something wrong: col indexes not sorted or not unique"); } } #endif const index_type* Mirow = M.i_row(); const index_type* Mjcol = M.j_col(); - const double* Mvalues = M.M(); + const double* Mvalues = M.M(); - //populate values_ - for(index_type it=0; it=1; --i) { - row_starts_[i] = row_starts_[i-1]; + for(int i = nrows_ - 1; i >= 1; --i) { + row_starts_[i] = row_starts_[i - 1]; } - row_starts_[0]=0; + row_starts_[0] = 0; } void hiopMatrixSparseCSRSeq::form_transpose_from_symbolic(const hiopMatrixSparseCSR& M_in) { auto& M = dynamic_cast(M_in); - if(M.m()!=ncols_ || M.n()!=nrows_ || M.numberOfNonzeros()!=nnz_) { + if(M.m() != ncols_ || M.n() != nrows_ || M.numberOfNonzeros() != nnz_) { dealloc(); - + nrows_ = M.n(); ncols_ = M.m(); nnz_ = M.numberOfNonzeros(); @@ -1017,11 +974,11 @@ void hiopMatrixSparseCSRSeq::form_transpose_from_symbolic(const hiopMatrixSparse alloc(); } - assert(nnz_>=0); - if(nnz_<=0) { + assert(nnz_ >= 0); + if(nnz_ <= 0) { return; } - + assert(irowptr_); assert(jcolind_); assert(values_); @@ -1032,63 +989,61 @@ void hiopMatrixSparseCSRSeq::form_transpose_from_symbolic(const hiopMatrixSparse assert(nullptr == row_starts_); row_starts_ = new index_type[nrows_]; - //in this method we use the row_starts_ as working buffer to count nz on each row of `this` - //at the end of this method row_starts_ keeps row starts, used by the numeric method to - //speed up computations + // in this method we use the row_starts_ as working buffer to count nz on each row of `this` + // at the end of this method row_starts_ keeps row starts, used by the numeric method to + // speed up computations { index_type* w = row_starts_; - + // initialize nz per row to zero - for(index_type i=0; i=1; --i) { - row_starts_[i] = row_starts_[i-1]; + // rollback row_starts_ + for(int i = nrows_ - 1; i >= 1; --i) { + row_starts_[i] = row_starts_[i - 1]; } - row_starts_[0]=0; + row_starts_[0] = 0; #ifndef NDEBUG - for(int i=0; i jcolind_[itnz-1] && - "something wrong: col indexes not sorted or not unique"); + for(int i = 0; i < nrows_; i++) { + for(int itnz = irowptr_[i] + 1; itnz < irowptr_[i + 1]; ++itnz) { + assert(jcolind_[itnz] > jcolind_[itnz - 1] && "something wrong: col indexes not sorted or not unique"); } } #endif @@ -1101,56 +1056,51 @@ void hiopMatrixSparseCSRSeq::form_transpose_from_numeric(const hiopMatrixSparseC assert(nrows_ == M.n()); assert(ncols_ == M.m()); assert(nnz_ == M.numberOfNonzeros()); - + #ifndef NDEBUG - for(int i=0; i jcolind_[itnz-1] && - "something wrong: col indexes not sorted or not unique"); + for(int i = 0; i < nrows_; i++) { + for(int itnz = irowptr_[i] + 1; itnz < irowptr_[i + 1]; ++itnz) { + assert(jcolind_[itnz] > jcolind_[itnz - 1] && "something wrong: col indexes not sorted or not unique"); } } #endif const index_type* Mirow = M.i_row(); const index_type* Mjcol = M.j_col(); - const double* Mvalues = M.M(); + const double* Mvalues = M.M(); - //iterate over nonzeros of M to populate populate values_ - for(index_type i=0; i=1; --i) { - row_starts_[i] = row_starts_[i-1]; + // rollback row_starts_ + for(int i = nrows_ - 1; i >= 1; --i) { + row_starts_[i] = row_starts_[i - 1]; } - row_starts_[0]=0; + row_starts_[0] = 0; } - - - - void hiopMatrixSparseCSRSeq::form_diag_from_symbolic(const hiopVector& D) { int m = D.get_size(); - if(m!=ncols_ || m!=nrows_ || m!=nnz_) { + if(m != ncols_ || m != nrows_ || m != nnz_) { dealloc(); - + nrows_ = m; ncols_ = m; nnz_ = m; @@ -1162,37 +1112,30 @@ void hiopMatrixSparseCSRSeq::form_diag_from_symbolic(const hiopVector& D) assert(jcolind_); assert(values_); - for(index_type i=0; ijY ptY++; } - } - } // end of while - assert(ptX==irowptrX[i+1] || ptY==irowptrY[i+1]); - for(; ptX=0); //overflow?!? - //allocate result M + + } // end of for over rows + assert(nnzM >= 0); // overflow?!? + // allocate result M return new hiopMatrixSparseCSRSeq(nrows_, ncols_, nnzM); } /** - * Computes sparsity pattern of M = X+Y (i.e., populates the row pointers and + * Computes sparsity pattern of M = X+Y (i.e., populates the row pointers and * column indexes arrays) of `M`. * */ -void hiopMatrixSparseCSRSeq:: -add_matrix_symbolic(hiopMatrixSparseCSR& M_in, const hiopMatrixSparseCSR& Y_in) const +void hiopMatrixSparseCSRSeq::add_matrix_symbolic(hiopMatrixSparseCSR& M_in, const hiopMatrixSparseCSR& Y_in) const { auto& M = dynamic_cast(M_in); auto& Y = dynamic_cast(Y_in); @@ -1284,61 +1225,60 @@ add_matrix_symbolic(hiopMatrixSparseCSR& M_in, const hiopMatrixSparseCSR& Y_in) index_type* irowptrM = M.i_row(); index_type* jcolindM = M.j_col(); - // counter for nz in M + // counter for nz in M index_type itnnzM = 0; - - for(int i=0; ijY - jcolindM[itnnzM] = jY; + jcolindM[itnnzM] = jY; ptY++; } } itnnzM++; - } // end of while - assert(ptX==irowptrX[i+1] || ptY==irowptrY[i+1]); - for(; ptXjY #ifdef HIOP_DEEPCHECKS - assert(jY==jcolindM[itnnzM]); + assert(jY == jcolindM[itnnzM]); #endif - valuesM[itnnzM] += beta*valuesY[ptY]; + valuesM[itnnzM] += beta * valuesY[ptY]; ptY++; } } itnnzM++; - } // end of while "sorted merge" iteration - assert(ptX==irowptrX[i+1] || ptY==irowptrY[i+1]); + } // end of while "sorted merge" iteration + assert(ptX == irowptrX[i + 1] || ptY == irowptrY[i + 1]); // iterate over remaining col indexes of (i row of) X - for(; ptX jcolind_[pt]) { + } else if(jcolind_[pt - 1] > jcolind_[pt]) { printf("in row %4d, index j=%d is before j=%d (positions in jcolind are %d and %d)\n", i, - jcolind_[pt-1], + jcolind_[pt - 1], jcolind_[pt], - pt-1, + pt - 1, pt); return false; } @@ -1489,5 +1428,4 @@ bool hiopMatrixSparseCSRSeq::check_csr_is_ordered() return true; } -} //end of namespace - +} // namespace hiop diff --git a/src/LinAlg/hiopMatrixSparseCSRSeq.hpp b/src/LinAlg/hiopMatrixSparseCSRSeq.hpp index abbb6404e..753f503ea 100644 --- a/src/LinAlg/hiopMatrixSparseCSRSeq.hpp +++ b/src/LinAlg/hiopMatrixSparseCSRSeq.hpp @@ -71,8 +71,8 @@ namespace hiop * @brief Sparse matrix of doubles in compressed row format for use on CPU/host. Data * is not (memory, MPI) distributed. * - * @note The methods of this class expect and maintains unique and ordered column indexes - * within the same row. + * @note The methods of this class expect and maintains unique and ordered column indexes + * within the same row. * * Note: most of the methods are not implemented (TODO) as this is work in progress (wip). */ @@ -91,11 +91,11 @@ class hiopMatrixSparseCSRSeq : public hiopMatrixSparseCSR virtual void copyRowsFrom(const hiopMatrix& src, const index_type* rows_idxs, size_type n_rows); - virtual void timesVec(double beta, hiopVector& y, double alpha, const hiopVector& x) const; - virtual void timesVec(double beta, double* y, double alpha, const double* x) const; + virtual void timesVec(double beta, hiopVector& y, double alpha, const hiopVector& x) const; + virtual void timesVec(double beta, double* y, double alpha, const double* x) const; - virtual void transTimesVec(double beta, hiopVector& y, double alpha, const hiopVector& x) const; - virtual void transTimesVec(double beta, double* y, double alpha, const double* x) const; + virtual void transTimesVec(double beta, hiopVector& y, double alpha, const hiopVector& x) const; + virtual void transTimesVec(double beta, double* y, double alpha, const double* x) const; virtual void timesMat(double beta, hiopMatrix& W, double alpha, const hiopMatrix& X) const; @@ -106,7 +106,7 @@ class hiopMatrixSparseCSRSeq : public hiopMatrixSparseCSR virtual void addDiagonal(const double& alpha, const hiopVector& d_); virtual void addDiagonal(const double& value); virtual void addSubDiagonal(const double& alpha, index_type start, const hiopVector& d_); - + /* add to the diagonal of 'this' (destination) starting at 'start_on_dest_diag' elements of * 'd_' (source) starting at index 'start_on_src_vec'. The number of elements added is 'num_elems' * when num_elems>=0, or the remaining elems on 'd_' starting at 'start_on_src_vec'. */ @@ -114,7 +114,7 @@ class hiopMatrixSparseCSRSeq : public hiopMatrixSparseCSR const double& alpha, const hiopVector& d_, index_type start_on_src_vec, - int num_elems=-1) + int num_elems = -1) { assert(false && "not needed / implemented"); } @@ -124,18 +124,18 @@ class hiopMatrixSparseCSRSeq : public hiopMatrixSparseCSR } /* Add to the diagonal of 'this' (destination) starting at 'start_on_dest_diag' elements of - * 'd_' (source) starting at index 'start_on_src_vec'. The number of elements added is 'num_elems', - * scaled by 'scal' - */ + * 'd_' (source) starting at index 'start_on_src_vec'. The number of elements added is 'num_elems', + * scaled by 'scal' + */ virtual void copySubDiagonalFrom(const index_type& start_on_dest_diag, const size_type& num_elems, const hiopVector& d_, const index_type& start_on_nnz_idx, - double scal=1.0); + double scal = 1.0); /* add constant 'c' to the diagonal of 'this' (destination) starting at 'start_on_dest_diag' elements. - * The number of elements added is 'num_elems' - */ + * The number of elements added is 'num_elems' + */ virtual void setSubDiagonalTo(const index_type& start_on_dest_diag, const size_type& num_elems, const double& c, @@ -148,16 +148,12 @@ class hiopMatrixSparseCSRSeq : public hiopMatrixSparseCSR index_type col_dest_start, double alpha, hiopMatrixDense& W) const; - virtual void addUpperTriangleToSymDenseMatrixUpperTriangle(index_type diag_start, - double alpha, - hiopMatrixDense& W) const + virtual void addUpperTriangleToSymDenseMatrixUpperTriangle(index_type diag_start, double alpha, hiopMatrixDense& W) const { assert(false && "not yet implemented"); } - virtual void addUpperTriangleToSymSparseMatrixUpperTriangle(index_type diag_start, - double alpha, - hiopMatrixSparse& W) const + virtual void addUpperTriangleToSymSparseMatrixUpperTriangle(index_type diag_start, double alpha, hiopMatrixSparse& W) const { assert(false && "not yet implemented"); } @@ -168,7 +164,7 @@ class hiopMatrixSparseCSRSeq : public hiopMatrixSparseCSR */ virtual void addMDinvMtransToDiagBlockOfSymDeMatUTri(index_type rowCol_dest_start, const double& alpha, - const hiopVector& D, + const hiopVector& D, hiopMatrixDense& W) const; /* Block of W += alpha * M * D^{-1} * transpose(N), where M=this @@ -194,32 +190,32 @@ class hiopMatrixSparseCSRSeq : public hiopMatrixSparseCSR const size_type& dest_nnz_st); /** - * @brief Copy matrix 'src_gen', into 'this' as a submatrix from corner 'dest_row_st' and 'dest_col_st' - * The non-zero elements start from 'dest_nnz_st' will be replaced by the new elements. - * When `offdiag_only` is set to true, only the off-diagonal part of `src_gen` is copied. - * - * @pre 'this' must have enough rows and cols after row 'dest_row_st' and col 'dest_col_st' - * @pre 'dest_nnz_st' + the number of non-zeros in the copied matrix must be less or equal to - * this->numOfNumbers() - * @pre User must know the nonzero pattern of src and dest matrices. The method assumes - * that non-zero patterns does not change between calls and that 'src_gen' is a valid - * submatrix of 'this' - * @pre This function does NOT preserve the sorted row/col indices. USE WITH CAUTION! - */ + * @brief Copy matrix 'src_gen', into 'this' as a submatrix from corner 'dest_row_st' and 'dest_col_st' + * The non-zero elements start from 'dest_nnz_st' will be replaced by the new elements. + * When `offdiag_only` is set to true, only the off-diagonal part of `src_gen` is copied. + * + * @pre 'this' must have enough rows and cols after row 'dest_row_st' and col 'dest_col_st' + * @pre 'dest_nnz_st' + the number of non-zeros in the copied matrix must be less or equal to + * this->numOfNumbers() + * @pre User must know the nonzero pattern of src and dest matrices. The method assumes + * that non-zero patterns does not change between calls and that 'src_gen' is a valid + * submatrix of 'this' + * @pre This function does NOT preserve the sorted row/col indices. USE WITH CAUTION! + */ virtual void copySubmatrixFrom(const hiopMatrix& src_gen, const index_type& dest_row_st, const index_type& dest_col_st, const size_type& dest_nnz_st, const bool offdiag_only = false); - + /** - * @brief Copy the transpose of matrix 'src_gen', into 'this' as a submatrix from corner - * 'dest_row_st' and 'dest_col_st'. - * The non-zero elements start from 'dest_nnz_st' will be replaced by the new elements. - * When `offdiag_only` is set to true, only the off-diagonal part of `src_gen` is copied. - * - * @pre This function does NOT preserve the sorted row/col indices. USE WITH CAUTION! - */ + * @brief Copy the transpose of matrix 'src_gen', into 'this' as a submatrix from corner + * 'dest_row_st' and 'dest_col_st'. + * The non-zero elements start from 'dest_nnz_st' will be replaced by the new elements. + * When `offdiag_only` is set to true, only the off-diagonal part of `src_gen` is copied. + * + * @pre This function does NOT preserve the sorted row/col indices. USE WITH CAUTION! + */ virtual void copySubmatrixFromTrans(const hiopMatrix& src_gen, const index_type& dest_row_st, const index_type& dest_col_st, @@ -227,12 +223,12 @@ class hiopMatrixSparseCSRSeq : public hiopMatrixSparseCSR const bool offdiag_only = false); /** - * @brief Copy selected columns of a diagonal matrix (a constant 'scalar' times identity), - * into 'this' as a submatrix from corner 'dest_row_st' and 'dest_col_st' - * The non-zero elements start from 'dest_nnz_st' will be replaced by the new elements. - * @pre The diagonal entries in the destination need to be contiguous in the sparse triplet arrays of the destinations. - * @pre this function does NOT preserve the sorted row/col indices. USE WITH CAUTION! - */ + * @brief Copy selected columns of a diagonal matrix (a constant 'scalar' times identity), + * into 'this' as a submatrix from corner 'dest_row_st' and 'dest_col_st' + * The non-zero elements start from 'dest_nnz_st' will be replaced by the new elements. + * @pre The diagonal entries in the destination need to be contiguous in the sparse triplet arrays of the destinations. + * @pre this function does NOT preserve the sorted row/col indices. USE WITH CAUTION! + */ virtual void setSubmatrixToConstantDiag_w_colpattern(const double& scalar, const index_type& dest_row_st, const index_type& dest_col_st, @@ -241,12 +237,12 @@ class hiopMatrixSparseCSRSeq : public hiopMatrixSparseCSR const hiopVector& ix); /** - * @brief Copy selected rows of a diagonal matrix (a constant 'scalar' times identity), - * into 'this' as a submatrix from corner 'dest_row_st' and 'dest_col_st' - * The non-zero elements start from 'dest_nnz_st' will be replaced by the new elements. - * @pre The diagonal entries in the destination need to be contiguous in the sparse triplet arrays of the destinations. - * @pre this function does NOT preserve the sorted row/col indices. USE WITH CAUTION! - */ + * @brief Copy selected rows of a diagonal matrix (a constant 'scalar' times identity), + * into 'this' as a submatrix from corner 'dest_row_st' and 'dest_col_st' + * The non-zero elements start from 'dest_nnz_st' will be replaced by the new elements. + * @pre The diagonal entries in the destination need to be contiguous in the sparse triplet arrays of the destinations. + * @pre this function does NOT preserve the sorted row/col indices. USE WITH CAUTION! + */ virtual void setSubmatrixToConstantDiag_w_rowpattern(const double& scalar, const index_type& dest_row_st, const index_type& dest_col_st, @@ -255,46 +251,46 @@ class hiopMatrixSparseCSRSeq : public hiopMatrixSparseCSR const hiopVector& ix); /** - * @brief Copy a diagonal matrix to destination. - * This diagonal matrix is 'src_val'*identity matrix with size 'nnz_to_copy'x'nnz_to_copy'. - * The destination is updated from the start row 'row_dest_st' and start column 'col_dest_st'. USE WITH CAUTION! - */ + * @brief Copy a diagonal matrix to destination. + * This diagonal matrix is 'src_val'*identity matrix with size 'nnz_to_copy'x'nnz_to_copy'. + * The destination is updated from the start row 'row_dest_st' and start column 'col_dest_st'. USE WITH CAUTION! + */ virtual void copyDiagMatrixToSubblock(const double& src_val, const index_type& dest_row_st, const index_type& dest_col_st, const size_type& dest_nnz_st, - const size_type &nnz_to_copy); - - /** - * @brief same as @copyDiagMatrixToSubblock, but copies only diagonal entries specified by `pattern`. - * At the destination, 'nnz_to_copy` nonzeros starting from index `dest_nnz_st` will be replaced. - * @pre The added entries in the destination need to be contiguous in the sparse triplet arrays of the destinations. - * @pre This function does NOT preserve the sorted row/col indices. USE WITH CAUTION! - * @pre 'pattern' has same size as `x`. - * @pre 'pattern` has exactly `nnz_to_copy` nonzeros. - */ + const size_type& nnz_to_copy); + + /** + * @brief same as @copyDiagMatrixToSubblock, but copies only diagonal entries specified by `pattern`. + * At the destination, 'nnz_to_copy` nonzeros starting from index `dest_nnz_st` will be replaced. + * @pre The added entries in the destination need to be contiguous in the sparse triplet arrays of the destinations. + * @pre This function does NOT preserve the sorted row/col indices. USE WITH CAUTION! + * @pre 'pattern' has same size as `x`. + * @pre 'pattern` has exactly `nnz_to_copy` nonzeros. + */ virtual void copyDiagMatrixToSubblock_w_pattern(const hiopVector& x, const index_type& dest_row_st, const index_type& dest_col_st, const size_type& dest_nnz_st, - const size_type &nnz_to_copy, + const size_type& nnz_to_copy, const hiopVector& pattern); virtual double max_abs_value(); - virtual void row_max_abs_value(hiopVector &ret_vec); - - virtual void scale_row(hiopVector &vec_scal, const bool inv_scale=false); + virtual void row_max_abs_value(hiopVector& ret_vec); + + virtual void scale_row(hiopVector& vec_scal, const bool inv_scale = false); virtual bool isfinite() const; - virtual void print(FILE* f=nullptr, const char* msg=nullptr, int maxRows=-1, int maxCols=-1, int rank=-1) const; + virtual void print(FILE* f = nullptr, const char* msg = nullptr, int maxRows = -1, int maxCols = -1, int rank = -1) const; virtual void startingAtAddSubDiagonalToStartingAt(index_type diag_src_start, const double& alpha, hiopVector& vec_dest, index_type vec_start, - size_type num_elems=-1) const + size_type num_elems = -1) const { assert(0 && "not implemented; should be used only for symmetric matrices."); } @@ -334,41 +330,20 @@ class hiopMatrixSparseCSRSeq : public hiopMatrixSparseCSR virtual hiopMatrixSparse* alloc_clone() const; virtual hiopMatrixSparse* new_copy() const; - inline index_type* i_row() - { - return irowptr_; - } - inline index_type* j_col() - { - return jcolind_; - } - inline double* M() - { - return values_; - } - inline const index_type* i_row() const - { - return irowptr_; - } - inline const index_type* j_col() const - { - return jcolind_; - } - inline const double* M() const - { - return values_; - } + inline index_type* i_row() { return irowptr_; } + inline index_type* j_col() { return jcolind_; } + inline double* M() { return values_; } + inline const index_type* i_row() const { return irowptr_; } + inline const index_type* j_col() const { return jcolind_; } + inline const double* M() const { return values_; } #ifdef HIOP_DEEPCHECKS - virtual bool assertSymmetry(double tol=1e-16) const + virtual bool assertSymmetry(double tol = 1e-16) const { assert(false && "not yet implemented"); return false; } - virtual bool checkIndexesAreOrdered() const - { - return true; - } + virtual bool checkIndexesAreOrdered() const { return true; } #endif /** @@ -376,10 +351,7 @@ class hiopMatrixSparseCSRSeq : public hiopMatrixSparseCSR * * @pre `this` matrix needs to be symmetric and of same size(s) as `diag_out` */ - virtual void extract_diagonal(hiopVector& diag_out) const - { - assert(false && "wip"); - } + virtual void extract_diagonal(hiopVector& diag_out) const { assert(false && "wip"); } /** * Sets the diagonal of `this` to the constant `val`. If `val` is zero, the sparsity pattern @@ -388,59 +360,59 @@ class hiopMatrixSparseCSRSeq : public hiopMatrixSparseCSR * @pre `this` is expected to store the diagonal entries as nonzero elements. */ virtual void set_diagonal(const double& val); - + /** - * Allocates a CSR matrix capable of storing the multiplication result of M = X*Y, where X + * Allocates a CSR matrix capable of storing the multiplication result of M = X*Y, where X * is the calling matrix class (`this`) and Y is the `Y` argument of the method. * * @note Should be used in conjunction with `times_mat_symbolic` and `times_mat_numeric` - * + * * @pre The dimensions of the matrices should be consistent with the multiplication. - * + * */ hiopMatrixSparseCSR* times_mat_alloc(const hiopMatrixSparseCSR& Y) const; - + /** * Computes sparsity pattern, meaning computes row pointers and column indexes of `M`, - * of M = X*Y, where X is the calling matrix class (`this`) and Y is the second argument. + * of M = X*Y, where X is the calling matrix class (`this`) and Y is the second argument. * * @note The output matrix `M` will have unique and ordered column indexes (with the same * row) * * @note Specializations of this class may only be able to compute the sparsity pattern in - * tandem with the numerical multiplications (for example, because of API limitations). - * In this cases, the `times_mat_numeric` will take over sparsity computations and the + * tandem with the numerical multiplications (for example, because of API limitations). + * In this cases, the `times_mat_numeric` will take over sparsity computations and the * arrays with row pointers and column indexes may be uninitialized after this call. - * + * * @pre The dimensions of the matrices should be consistent with the multiplication. - * + * * @pre The column indexes within the same row must be unique and ordered for `Y`. - * - * @pre The internal arrays of `M` should have enough storage to hold the sparsity - * pattern (row pointers and column indexes) and values of the multiplication result. + * + * @pre The internal arrays of `M` should have enough storage to hold the sparsity + * pattern (row pointers and column indexes) and values of the multiplication result. * This preallocation can be done by calling `times_mat_alloc` prior to this method. - * + * */ - void times_mat_symbolic(hiopMatrixSparseCSR& M, const hiopMatrixSparseCSR& Y) const; + void times_mat_symbolic(hiopMatrixSparseCSR& M, const hiopMatrixSparseCSR& Y) const; /** * Computes (numerical values of) M = beta*M + alpha*X*D*Y, where X is the calling matrix * class (`this`), beta and alpha are scalars passed as arguments, and M and Y are matrices * of appropriate sizes passed as arguments. * - * @note Generally, only the nonzero values of the input/output argument `M` are updated + * @note Generally, only the nonzero values of the input/output argument `M` are updated * since the sparsity pattern (row pointers and column indexes) of `M` should have been * already computed by `times_mat_symbolic`. Some specializations of this method may be - * restricted to performing both phases in inside this method. + * restricted to performing both phases in inside this method. * * @pre The dimensions of the matrices should be consistent with the multiplication. * * @pre The column indexes within the same row must be unique and ordered both for input * matrices and result matrix `M`. * - * @pre The indexes arrays of `this`, `Y`, and `M` should not have changed since the + * @pre The indexes arrays of `this`, `Y`, and `M` should not have changed since the * last call to `times_diag_times_mat`. - * + * * Example of usage: * //initially allocate and compute M * auto* M = X.times_mat_alloc(Y); @@ -449,12 +421,9 @@ class hiopMatrixSparseCSRSeq : public hiopMatrixSparseCSR * ... calculations .... * //if only nonzero entries of X and Y have changed, call the fast multiplication routine * X.times_mat_numeric(0.0, M, 1.0, Y); - * + * */ - void times_mat_numeric(double beta, - hiopMatrixSparseCSR& M, - double alpha, - const hiopMatrixSparseCSR& Y); + void times_mat_numeric(double beta, hiopMatrixSparseCSR& M, double alpha, const hiopMatrixSparseCSR& Y); /// @brief Column scaling or right multiplication by a diagonal: `this`=`this`*D void scale_cols(const hiopVector& D); @@ -462,11 +431,10 @@ class hiopMatrixSparseCSRSeq : public hiopMatrixSparseCSR /// @brief Row scaling or left multiplication by a diagonal: `this`=D*`this` void scale_rows(const hiopVector& D); - /** - * Allocates and populates the sparsity pattern of `this` as the CSR representation + * Allocates and populates the sparsity pattern of `this` as the CSR representation * of the triplet matrix `M`. - * + * * @pre The input argument should have the nonzeros sorted by row and then by column * indexes. */ @@ -476,7 +444,7 @@ class hiopMatrixSparseCSRSeq : public hiopMatrixSparseCSR /** * Copies the numerical values of the triplet matrix M into the CSR matrix `this` * - * @pre The sparsity pattern (row pointers and column indexes arrays) of `this` should be + * @pre The sparsity pattern (row pointers and column indexes arrays) of `this` should be * allocated and populated, possibly by a previous call to `form_from_symbolic` * * @pre The input argument should have the nonzeros sorted by row and then by column @@ -484,73 +452,73 @@ class hiopMatrixSparseCSRSeq : public hiopMatrixSparseCSR */ //// note: only device cuda memcpy void form_from_numeric(const hiopMatrixSparseTriplet& M); - + /** - * Allocates and populates the sparsity pattern of `this` as the CSR representation + * Allocates and populates the sparsity pattern of `this` as the CSR representation * of transpose of the triplet matrix `M`. - * + * * @pre The input argument should have the nonzeros sorted by row and then by column * indexes. */ //// note: cusparseCsr2cscEx2 void form_transpose_from_symbolic(const hiopMatrixSparseTriplet& M); - + /** - * Copies the numerical values of the transpose of the triplet matrix M into the + * Copies the numerical values of the transpose of the triplet matrix M into the * CSR matrix `this` * - * @pre The sparsity pattern (row pointers and column indexes arrays) of `this` should be + * @pre The sparsity pattern (row pointers and column indexes arrays) of `this` should be * allocated and populated, possibly by a previous call to `form_transpose_from_symbolic` * * @pre The input argument should have the nonzeros sorted by row and then by column * indexes. - */ + */ void form_transpose_from_numeric(const hiopMatrixSparseTriplet& M); /** - * Allocates and populates the sparsity pattern of `this` as the CSR representation + * Allocates and populates the sparsity pattern of `this` as the CSR representation * of transpose of the CSR matrix `M`. - * + * * @pre The input argument should have the column indexes sorted and unique within a row. */ virtual void form_transpose_from_symbolic(const hiopMatrixSparseCSR& M); - + /** * Copies the numerical values of the transpose of the CSR matrix M into the CSR matrix `this`. * - * @pre The sparsity pattern (row pointers and column indexes arrays) of `this` should be + * @pre The sparsity pattern (row pointers and column indexes arrays) of `this` should be * allocated and populated, possibly by a previous call to `form_transpose_from_symbolic` * * @pre The input argument should have the column indexes sorted and unique within a row. - */ + */ virtual void form_transpose_from_numeric(const hiopMatrixSparseCSR& M); - + /** * (Re)Initializes `this` to a diagonal matrix with diagonal entries given by D. */ void form_diag_from_symbolic(const hiopVector& D); - + /** * Sets the diagonal entries of `this` equal to entries of D - * + * * @pre Length of `D` should be equal to size(s) of `this` - * + * * @pre `this` should be a diagonal matrix (in CSR format) with storage for * all the diagonal entries, which can be ensured by calling the sister method * `form_diag_from_symbolic` */ void form_diag_from_numeric(const hiopVector& D); - + /** - * Allocates and returns CSR matrix `M` capable of holding M = X+Y, where X is + * Allocates and returns CSR matrix `M` capable of holding M = X+Y, where X is * the calling matrix class (`this`) and Y is the argument passed to the method. */ hiopMatrixSparseCSR* add_matrix_alloc(const hiopMatrixSparseCSR& Y) const; /** - * Computes sparsity pattern of M = X+Y (i.e., populates the row pointers and + * Computes sparsity pattern of M = X+Y (i.e., populates the row pointers and * column indexes arrays) of `M`. `X` is this. - * + * * @pre `this` and `Y` should hold matrices of identical dimensions. * */ @@ -559,17 +527,14 @@ class hiopMatrixSparseCSRSeq : public hiopMatrixSparseCSR /** * Performs matrix addition M = gamma*M + alpha*X + beta*Y numerically, where * X is `this` and alpha and beta are scalars. - * + * * @pre `M`, `this` and `Y` should hold matrices of identical dimensions. - * - * @pre `M` and `X+Y` should have identical sparsity pattern, namely the + * + * @pre `M` and `X+Y` should have identical sparsity pattern, namely the * `add_matrix_symbolic` should have been called previously. * */ - void add_matrix_numeric(hiopMatrixSparseCSR& M, - double alpha, - const hiopMatrixSparseCSR& Y, - double beta) const; + void add_matrix_numeric(hiopMatrixSparseCSR& M, double alpha, const hiopMatrixSparseCSR& Y, double beta) const; /// @brief Performs a quick check and returns false if the CSR indexes are not ordered bool check_csr_is_ordered(); @@ -580,12 +545,13 @@ class hiopMatrixSparseCSRSeq : public hiopMatrixSparseCSR private: void alloc(); void dealloc(); + protected: friend class hiopMatrixSparseCSRCUDA; ExecSpace exec_space_; - + //// inherits nrows_, ncols_, and nnz_ from hiopSparseMatrix - + /// Row pointers (starting indexes) in the column and values arrays index_type* irowptr_; @@ -599,16 +565,15 @@ class hiopMatrixSparseCSRSeq : public hiopMatrixSparseCSR double* buf_col_; /** - * Storage for the row starts used by `form_transpose_from_xxx` methods (allocated on + * Storage for the row starts used by `form_transpose_from_xxx` methods (allocated on * demand, only the above mentioned methods are called) */ index_type* row_starts_; - + private: - hiopMatrixSparseCSRSeq(const hiopMatrixSparseCSRSeq&) = delete; + hiopMatrixSparseCSRSeq(const hiopMatrixSparseCSRSeq&) = delete; }; - -} //end of namespace +} // namespace hiop #endif diff --git a/src/LinAlg/hiopMatrixSparseCsrCuda.cpp b/src/LinAlg/hiopMatrixSparseCsrCuda.cpp index 818b967b6..2d719a35b 100644 --- a/src/LinAlg/hiopMatrixSparseCsrCuda.cpp +++ b/src/LinAlg/hiopMatrixSparseCsrCuda.cpp @@ -62,15 +62,15 @@ #include "MatrixSparseCsrCudaKernels.hpp" #include "MemBackendCudaImpl.hpp" -#include //for std::min -#include //for std::isfinite +#include //for std::min +#include //for std::isfinite #include #include #include #include #include -#include // std::cout, std::fixed -#include // std::setprecision +#include // std::cout, std::fixed +#include // std::setprecision #include "hiopCppStdUtils.hpp" #include @@ -79,16 +79,16 @@ namespace hiop { hiopMatrixSparseCSRCUDA::hiopMatrixSparseCSRCUDA(size_type rows, size_type cols, size_type nnz) - : hiopMatrixSparseCSR(rows, cols, nnz), - irowptr_(nullptr), - jcolind_(nullptr), - values_(nullptr), - buffer_csc2csr_(nullptr), - buffer_geam2_(nullptr), - buffer_gemm3_(nullptr), - buffer_gemm4_(nullptr), - buffer_gemm5_(nullptr), - mat_sp_descr_(nullptr) + : hiopMatrixSparseCSR(rows, cols, nnz), + irowptr_(nullptr), + jcolind_(nullptr), + values_(nullptr), + buffer_csc2csr_(nullptr), + buffer_geam2_(nullptr), + buffer_gemm3_(nullptr), + buffer_gemm4_(nullptr), + buffer_gemm5_(nullptr), + mat_sp_descr_(nullptr) { cusparseStatus_t ret_sp = cusparseCreate(&h_cusparse_); assert(ret_sp == CUSPARSE_STATUS_SUCCESS); @@ -107,16 +107,16 @@ hiopMatrixSparseCSRCUDA::hiopMatrixSparseCSRCUDA(size_type rows, size_type cols, } hiopMatrixSparseCSRCUDA::hiopMatrixSparseCSRCUDA() - : hiopMatrixSparseCSR(0, 0, 0), - irowptr_(nullptr), - jcolind_(nullptr), - values_(nullptr), - buffer_csc2csr_(nullptr), - buffer_geam2_(nullptr), - buffer_gemm3_(nullptr), - buffer_gemm4_(nullptr), - buffer_gemm5_(nullptr), - mat_sp_descr_(nullptr) + : hiopMatrixSparseCSR(0, 0, 0), + irowptr_(nullptr), + jcolind_(nullptr), + values_(nullptr), + buffer_csc2csr_(nullptr), + buffer_geam2_(nullptr), + buffer_gemm3_(nullptr), + buffer_gemm4_(nullptr), + buffer_gemm5_(nullptr), + mat_sp_descr_(nullptr) { cusparseStatus_t ret_sp = cusparseCreate(&h_cusparse_); assert(ret_sp == CUSPARSE_STATUS_SUCCESS); @@ -131,7 +131,7 @@ hiopMatrixSparseCSRCUDA::hiopMatrixSparseCSRCUDA() st = cusparseSpGEMM_createDescr(&gemm_sp_descr_); assert(st == CUSPARSE_STATUS_SUCCESS); } - + hiopMatrixSparseCSRCUDA::~hiopMatrixSparseCSRCUDA() { dealloc(); @@ -142,14 +142,14 @@ hiopMatrixSparseCSRCUDA::~hiopMatrixSparseCSRCUDA() assert(cudaSuccess == cret); cret = cudaFree(buffer_gemm3_); assert(cudaSuccess == cret); - + cret = cudaFree(buffer_geam2_); assert(cudaSuccess == cret); cret = cudaFree(buffer_csc2csr_); assert(cudaSuccess == cret); - + cusparseDestroy(h_cusparse_); - //cusolverSpDestroy(h_cusolver_); + // cusolverSpDestroy(h_cusolver_); cusparseStatus_t st = cusparseDestroyMatDescr(mat_descr_); assert(st == CUSPARSE_STATUS_SUCCESS); @@ -161,13 +161,13 @@ hiopMatrixSparseCSRCUDA::~hiopMatrixSparseCSRCUDA() void hiopMatrixSparseCSRCUDA::alloc() { cudaError_t err; - err = cudaMalloc(&irowptr_, (nrows_+1)*sizeof(index_type)); + err = cudaMalloc(&irowptr_, (nrows_ + 1) * sizeof(index_type)); assert(cudaSuccess == err && irowptr_); - - err = cudaMalloc(&jcolind_, nnz_*sizeof(index_type)); + + err = cudaMalloc(&jcolind_, nnz_ * sizeof(index_type)); assert(cudaSuccess == err && jcolind_); - - err = cudaMalloc(&values_, nnz_*sizeof(double)); + + err = cudaMalloc(&values_, nnz_ * sizeof(double)); assert(cudaSuccess == err && values_); assert(nullptr == mat_sp_descr_); @@ -186,7 +186,7 @@ void hiopMatrixSparseCSRCUDA::alloc() } void hiopMatrixSparseCSRCUDA::dealloc() -{ +{ auto st = cusparseDestroySpMat(mat_sp_descr_); assert(st == CUSPARSE_STATUS_SUCCESS); mat_sp_descr_ = nullptr; @@ -199,26 +199,17 @@ void hiopMatrixSparseCSRCUDA::dealloc() err = cudaFree(jcolind_); assert(cudaSuccess == err); jcolind_ = nullptr; - + err = cudaFree(irowptr_); assert(cudaSuccess == err); irowptr_ = nullptr; } -void hiopMatrixSparseCSRCUDA::setToZero() -{ - assert(false && "work in progress"); -} -void hiopMatrixSparseCSRCUDA::setToConstant(double c) -{ - assert(false && "work in progress"); -} +void hiopMatrixSparseCSRCUDA::setToZero() { assert(false && "work in progress"); } +void hiopMatrixSparseCSRCUDA::setToConstant(double c) { assert(false && "work in progress"); } /** y = beta * y + alpha * this * x */ -void hiopMatrixSparseCSRCUDA::timesVec(double beta, - hiopVector& y, - double alpha, - const hiopVector& x) const +void hiopMatrixSparseCSRCUDA::timesVec(double beta, hiopVector& y, double alpha, const hiopVector& x) const { assert(false && "work in progress"); assert(x.get_size() == ncols_); @@ -234,62 +225,44 @@ void hiopMatrixSparseCSRCUDA::timesVec(double beta, } /** y = beta * y + alpha * this * x */ -void hiopMatrixSparseCSRCUDA::timesVec(double beta, - double* y, - double alpha, - const double* x) const +void hiopMatrixSparseCSRCUDA::timesVec(double beta, double* y, double alpha, const double* x) const { assert(false && "not yet implemented"); } /** y = beta * y + alpha * this^T * x */ -void hiopMatrixSparseCSRCUDA::transTimesVec(double beta, - hiopVector& y, - double alpha, - const hiopVector& x) const +void hiopMatrixSparseCSRCUDA::transTimesVec(double beta, hiopVector& y, double alpha, const hiopVector& x) const { assert(false && "work in progress"); assert(x.get_size() == nrows_); assert(y.get_size() == ncols_); - + hiopVectorPar& yy = dynamic_cast(y); const hiopVectorPar& xx = dynamic_cast(x); - + double* y_data = yy.local_data(); const double* x_data = xx.local_data_const(); - + transTimesVec(beta, y_data, alpha, x_data); } /** y = beta * y + alpha * this^T * x */ -void hiopMatrixSparseCSRCUDA::transTimesVec(double beta, - double* y, - double alpha, - const double* x) const +void hiopMatrixSparseCSRCUDA::transTimesVec(double beta, double* y, double alpha, const double* x) const { assert(false && "not yet implemented"); -} +} -void hiopMatrixSparseCSRCUDA::timesMat(double beta, - hiopMatrix& W, - double alpha, - const hiopMatrix& X) const +void hiopMatrixSparseCSRCUDA::timesMat(double beta, hiopMatrix& W, double alpha, const hiopMatrix& X) const { assert(false && "not needed"); } -void hiopMatrixSparseCSRCUDA::transTimesMat(double beta, - hiopMatrix& W, - double alpha, - const hiopMatrix& X) const +void hiopMatrixSparseCSRCUDA::transTimesMat(double beta, hiopMatrix& W, double alpha, const hiopMatrix& X) const { assert(false && "not needed"); } -void hiopMatrixSparseCSRCUDA::timesMatTrans(double beta, - hiopMatrix& Wmat, - double alpha, - const hiopMatrix& M2mat) const +void hiopMatrixSparseCSRCUDA::timesMatTrans(double beta, hiopMatrix& Wmat, double alpha, const hiopMatrix& M2mat) const { assert(false && "not needed"); } @@ -340,46 +313,42 @@ void hiopMatrixSparseCSRCUDA::setSubDiagonalTo(const index_type& start_on_dest_d const double& c, const index_type& start_on_nnz_idx) { - assert(start_on_dest_diag>=0 && start_on_dest_diag+num_elems<=this->nrows_); + assert(start_on_dest_diag >= 0 && start_on_dest_diag + num_elems <= this->nrows_); assert(false && "not implemented"); } -void hiopMatrixSparseCSRCUDA::addMatrix(double alpha, const hiopMatrix& X) -{ - assert(false && "not needed"); -} +void hiopMatrixSparseCSRCUDA::addMatrix(double alpha, const hiopMatrix& X) { assert(false && "not needed"); } /* block of W += alpha*transpose(this) * Note W; contains only the upper triangular entries */ -void hiopMatrixSparseCSRCUDA:: -transAddToSymDenseMatrixUpperTriangle(index_type row_start, - index_type col_start, - double alpha, - hiopMatrixDense& W) const +void hiopMatrixSparseCSRCUDA::transAddToSymDenseMatrixUpperTriangle(index_type row_start, + index_type col_start, + double alpha, + hiopMatrixDense& W) const { - assert(row_start>=0 && row_start+ncols_<=W.m()); - assert(col_start>=0 && col_start+nrows_<=W.n()); - assert(W.n()==W.m()); - + assert(row_start >= 0 && row_start + ncols_ <= W.m()); + assert(col_start >= 0 && col_start + nrows_ <= W.n()); + assert(W.n() == W.m()); + assert(false && "not yet implemented"); } double hiopMatrixSparseCSRCUDA::max_abs_value() { assert(false && "work in progress"); - //char norm='M'; size_type one=1; - //double maxv = DLANGE(&norm, &one, &nnz_, values_, &one, nullptr); - //return maxv; + // char norm='M'; size_type one=1; + // double maxv = DLANGE(&norm, &one, &nnz_, values_, &one, nullptr); + // return maxv; return 0.0; } -void hiopMatrixSparseCSRCUDA::row_max_abs_value(hiopVector &ret_vec) +void hiopMatrixSparseCSRCUDA::row_max_abs_value(hiopVector& ret_vec) { assert(ret_vec.get_local_size() == nrows_); assert(false && "not yet implemented"); } -void hiopMatrixSparseCSRCUDA::scale_row(hiopVector &vec_scal, const bool inv_scale) +void hiopMatrixSparseCSRCUDA::scale_row(hiopVector& vec_scal, const bool inv_scale) { assert(vec_scal.get_local_size() == nrows_); assert(false && "not yet implemented"); @@ -388,15 +357,12 @@ void hiopMatrixSparseCSRCUDA::scale_row(hiopVector &vec_scal, const bool inv_sca bool hiopMatrixSparseCSRCUDA::isfinite() const { assert(false && "work in progress"); - for(index_type i=0; ii_row(), 1+nrows_, exec_space_); + W.exec_space_.copy(W.i_row(), this->i_row(), 1 + nrows_, exec_space_); W.exec_space_.copy(W.j_col(), this->j_col(), nnz_, exec_space_); W.exec_space_.copy(W.M(), this->M(), nnz_, exec_space_); } -void hiopMatrixSparseCSRCUDA:: -addMDinvMtransToDiagBlockOfSymDeMatUTri(index_type rowAndCol_dest_start, - const double& alpha, - const hiopVector& D, hiopMatrixDense& W) const +void hiopMatrixSparseCSRCUDA::addMDinvMtransToDiagBlockOfSymDeMatUTri(index_type rowAndCol_dest_start, + const double& alpha, + const hiopVector& D, + hiopMatrixDense& W) const { assert(false && "not needed"); } @@ -450,20 +416,17 @@ addMDinvMtransToDiagBlockOfSymDeMatUTri(index_type rowAndCol_dest_start, * block of W += alpha * M1 * D^{-1} * transpose(M2), where M1=this * Sizes: M1 is (m1 x nx); D is vector of len nx, M2 is (m2, nx) */ -void hiopMatrixSparseCSRCUDA:: -addMDinvNtransToSymDeMatUTri(index_type row_dest_start, - index_type col_dest_start, - const double& alpha, - const hiopVector& D, - const hiopMatrixSparse& M2mat, - hiopMatrixDense& W) const +void hiopMatrixSparseCSRCUDA::addMDinvNtransToSymDeMatUTri(index_type row_dest_start, + index_type col_dest_start, + const double& alpha, + const hiopVector& D, + const hiopMatrixSparse& M2mat, + hiopMatrixDense& W) const { assert(false && "not needed"); } -void hiopMatrixSparseCSRCUDA::copyRowsFrom(const hiopMatrix& src_gen, - const index_type* rows_idxs, - size_type n_rows) +void hiopMatrixSparseCSRCUDA::copyRowsFrom(const hiopMatrix& src_gen, const index_type* rows_idxs, size_type n_rows) { const hiopMatrixSparseCSRCUDA& src = dynamic_cast(src_gen); assert(this->m() == n_rows); @@ -482,8 +445,10 @@ void hiopMatrixSparseCSRCUDA::copyRowsFrom(const hiopMatrix& src_gen, * @pre 'this' must have exactly, or more cols than 'src' */ void hiopMatrixSparseCSRCUDA::copyRowsBlockFrom(const hiopMatrix& src_gen, - const index_type& rows_src_idx_st, const size_type& n_rows, - const index_type& rows_dest_idx_st, const size_type& dest_nnz_st) + const index_type& rows_src_idx_st, + const size_type& n_rows, + const index_type& rows_dest_idx_st, + const size_type& dest_nnz_st) { const hiopMatrixSparseCSRCUDA& src = dynamic_cast(src_gen); assert(this->numberOfNonzeros() >= src.numberOfNonzeros()); @@ -505,7 +470,7 @@ void hiopMatrixSparseCSRCUDA::copySubmatrixFrom(const hiopMatrix& src_gen, auto n_cols = src.n(); assert(this->numberOfNonzeros() >= src.numberOfNonzeros()); - assert(n_cols + dest_col_st <= this->n() ); + assert(n_cols + dest_col_st <= this->n()); assert(m_rows + dest_row_st <= this->m()); assert(dest_nnz_st <= this->numberOfNonzeros()); @@ -523,51 +488,48 @@ void hiopMatrixSparseCSRCUDA::copySubmatrixFromTrans(const hiopMatrix& src_gen, auto n_cols = src.m(); assert(this->numberOfNonzeros() >= src.numberOfNonzeros()); - assert(n_cols + dest_col_st <= this->n() ); + assert(n_cols + dest_col_st <= this->n()); assert(m_rows + dest_row_st <= this->m()); assert(dest_nnz_st <= this->numberOfNonzeros()); assert(false && "not yet implemented"); } -void hiopMatrixSparseCSRCUDA:: -setSubmatrixToConstantDiag_w_colpattern(const double& scalar, - const index_type& dest_row_st, - const index_type& dest_col_st, - const size_type& dest_nnz_st, - const size_type& nnz_to_copy, - const hiopVector& ix) +void hiopMatrixSparseCSRCUDA::setSubmatrixToConstantDiag_w_colpattern(const double& scalar, + const index_type& dest_row_st, + const index_type& dest_col_st, + const size_type& dest_nnz_st, + const size_type& nnz_to_copy, + const hiopVector& ix) { assert(ix.get_local_size() + dest_row_st <= this->m()); - assert(nnz_to_copy + dest_col_st <= this->n() ); + assert(nnz_to_copy + dest_col_st <= this->n()); assert(dest_nnz_st + nnz_to_copy <= this->numberOfNonzeros()); - + assert(false && "not yet implemented"); } -void hiopMatrixSparseCSRCUDA:: -setSubmatrixToConstantDiag_w_rowpattern(const double& scalar, - const index_type& dest_row_st, - const index_type& dest_col_st, - const size_type& dest_nnz_st, - const size_type& nnz_to_copy, - const hiopVector& ix) +void hiopMatrixSparseCSRCUDA::setSubmatrixToConstantDiag_w_rowpattern(const double& scalar, + const index_type& dest_row_st, + const index_type& dest_col_st, + const size_type& dest_nnz_st, + const size_type& nnz_to_copy, + const hiopVector& ix) { assert(nnz_to_copy + dest_row_st <= this->m()); - assert(ix.get_local_size() + dest_col_st <= this->n() ); + assert(ix.get_local_size() + dest_col_st <= this->n()); assert(dest_nnz_st + nnz_to_copy <= this->numberOfNonzeros()); - + assert(false && "not yet implemented"); } -void hiopMatrixSparseCSRCUDA:: -copyDiagMatrixToSubblock(const double& src_val, - const index_type& dest_row_st, - const index_type& col_dest_st, - const size_type& dest_nnz_st, - const size_type &nnz_to_copy) +void hiopMatrixSparseCSRCUDA::copyDiagMatrixToSubblock(const double& src_val, + const index_type& dest_row_st, + const index_type& col_dest_st, + const size_type& dest_nnz_st, + const size_type& nnz_to_copy) { - assert(this->numberOfNonzeros() >= nnz_to_copy+dest_nnz_st); + assert(this->numberOfNonzeros() >= nnz_to_copy + dest_nnz_st); assert(this->n() >= nnz_to_copy); assert(nnz_to_copy + dest_row_st <= this->m()); assert(nnz_to_copy + col_dest_st <= this->n()); @@ -575,15 +537,14 @@ copyDiagMatrixToSubblock(const double& src_val, assert(false && "not yet implemented"); } -void hiopMatrixSparseCSRCUDA:: -copyDiagMatrixToSubblock_w_pattern(const hiopVector& dx, - const index_type& dest_row_st, - const index_type& dest_col_st, - const size_type& dest_nnz_st, - const size_type &nnz_to_copy, - const hiopVector& ix) +void hiopMatrixSparseCSRCUDA::copyDiagMatrixToSubblock_w_pattern(const hiopVector& dx, + const index_type& dest_row_st, + const index_type& dest_col_st, + const size_type& dest_nnz_st, + const size_type& nnz_to_copy, + const hiopVector& ix) { - assert(this->numberOfNonzeros() >= nnz_to_copy+dest_nnz_st); + assert(this->numberOfNonzeros() >= nnz_to_copy + dest_nnz_st); assert(this->n() >= nnz_to_copy); assert(nnz_to_copy + dest_row_st <= this->m()); assert(nnz_to_copy + dest_col_st <= this->n()); @@ -592,67 +553,64 @@ copyDiagMatrixToSubblock_w_pattern(const hiopVector& dx, } void hiopMatrixSparseCSRCUDA::print(FILE* file, - const char* msg/*=nullptr*/, - int maxRows/*=-1*/, - int maxCols/*=-1*/, - int rank/*=-1*/) const + const char* msg /*=nullptr*/, + int maxRows /*=-1*/, + int maxCols /*=-1*/, + int rank /*=-1*/) const { - - int myrank_=0, numranks=1; //this is a local object => always print + int myrank_ = 0, numranks = 1; // this is a local object => always print - if(file==nullptr) file = stdout; + if(file == nullptr) file = stdout; - int max_elems = maxRows>=0 ? maxRows : nnz_; + int max_elems = maxRows >= 0 ? maxRows : nnz_; max_elems = std::min(max_elems, nnz_); - - if(myrank_==rank || rank==-1) { - index_type* irowptr = new index_type[nrows_+1]; + if(myrank_ == rank || rank == -1) { + index_type* irowptr = new index_type[nrows_ + 1]; index_type* jcolind = new index_type[nnz_]; double* values = new double[nnz_]; - - cudaMemcpy(irowptr, irowptr_, (nrows_+1)*sizeof(index_type), cudaMemcpyDeviceToHost); - cudaMemcpy(jcolind, jcolind_, nnz_*sizeof(index_type), cudaMemcpyDeviceToHost); - cudaMemcpy(values, values_, nnz_*sizeof(double), cudaMemcpyDeviceToHost); - + + cudaMemcpy(irowptr, irowptr_, (nrows_ + 1) * sizeof(index_type), cudaMemcpyDeviceToHost); + cudaMemcpy(jcolind, jcolind_, nnz_ * sizeof(index_type), cudaMemcpyDeviceToHost); + cudaMemcpy(values, values_, nnz_ * sizeof(double), cudaMemcpyDeviceToHost); + std::stringstream ss; - if(nullptr==msg) { - if(numranks>1) { - ss << "CSR CUDA matrix of size " << m() << " " << n() << " and nonzeros " - << numberOfNonzeros() << ", printing " << max_elems << " elems (on rank=" - << myrank_ << ")" << std::endl; + if(nullptr == msg) { + if(numranks > 1) { + ss << "CSR CUDA matrix of size " << m() << " " << n() << " and nonzeros " << numberOfNonzeros() << ", printing " + << max_elems << " elems (on rank=" << myrank_ << ")" << std::endl; } else { - ss << "CSR CUDA matrix of size " << m() << " " << n() << " and nonzeros " - << numberOfNonzeros() << ", printing " << max_elems << " elems" << std::endl; + ss << "CSR CUDA matrix of size " << m() << " " << n() << " and nonzeros " << numberOfNonzeros() << ", printing " + << max_elems << " elems" << std::endl; } } else { ss << msg << " "; } // using matlab indices (starting at 1) - //fprintf(file, "iRow_=["); + // fprintf(file, "iRow_=["); ss << "iRow_=["; - for(index_type i=0; i(Y_in); auto& X = *this; - + assert(ncols_ == Y.m()); - + cusparseStatus_t st; cudaError_t cret; - + // // create a temporary matrix descriptor for M cusparseSpMatDescr_t mat_descrM; @@ -690,15 +647,15 @@ hiopMatrixSparseCSR* hiopMatrixSparseCSRCUDA::times_mat_alloc(const hiopMatrixSp CUSPARSE_INDEX_BASE_ZERO, CUDA_R_64F); assert(st == CUSPARSE_STATUS_SUCCESS); - + cusparseSpGEMMDescr_t spgemmDesc; st = cusparseSpGEMM_createDescr(&spgemmDesc); assert(st == CUSPARSE_STATUS_SUCCESS); - + cusparseOperation_t opX = CUSPARSE_OPERATION_NON_TRANSPOSE; cusparseOperation_t opY = CUSPARSE_OPERATION_NON_TRANSPOSE; - - //inquire buffer size + + // inquire buffer size size_t buff_size = 0; st = cusparseSpGEMMreuse_workEstimation(h_cusparse_, opX, @@ -711,13 +668,13 @@ hiopMatrixSparseCSR* hiopMatrixSparseCSRCUDA::times_mat_alloc(const hiopMatrixSp &buff_size, nullptr); assert(st == CUSPARSE_STATUS_SUCCESS); - - //allocate buffer + + // allocate buffer void* buff_gemm1 = nullptr; cret = cudaMalloc((void**)&buff_gemm1, buff_size); assert(cret == cudaSuccess); - //inspect input matrices to determine memory requirements for the next steps + // inspect input matrices to determine memory requirements for the next steps st = cusparseSpGEMMreuse_workEstimation(h_cusparse_, opX, opY, @@ -729,8 +686,8 @@ hiopMatrixSparseCSR* hiopMatrixSparseCSRCUDA::times_mat_alloc(const hiopMatrixSp &buff_size, buff_gemm1); assert(st == CUSPARSE_STATUS_SUCCESS); - - //inquire buffer size for nnz call + + // inquire buffer size for nnz call size_t buff_size2 = 0; size_t buff_size3 = 0; size_t buff_size4 = 0; @@ -761,7 +718,6 @@ hiopMatrixSparseCSR* hiopMatrixSparseCSRCUDA::times_mat_alloc(const hiopMatrixSp cret = cudaMalloc((void**)&buff_gemm4, buff_size4); assert(cret == cudaSuccess); - st = cusparseSpGEMMreuse_nnz(h_cusparse_, opX, opY, @@ -775,7 +731,7 @@ hiopMatrixSparseCSR* hiopMatrixSparseCSRCUDA::times_mat_alloc(const hiopMatrixSp &buff_size3, buff_gemm3, &buff_size4, - buff_gemm4 ); + buff_gemm4); assert(st == CUSPARSE_STATUS_SUCCESS); cret = cudaFree(buff_gemm1); @@ -784,7 +740,7 @@ hiopMatrixSparseCSR* hiopMatrixSparseCSRCUDA::times_mat_alloc(const hiopMatrixSp cret = cudaFree(buff_gemm2); assert(cret == cudaSuccess); - //get sizes of M + // get sizes of M int64_t M_m, M_n, M_nnz; st = cusparseSpMatGetSize(mat_descrM, &M_m, &M_n, &M_nnz); assert(st == CUSPARSE_STATUS_SUCCESS); @@ -799,23 +755,22 @@ hiopMatrixSparseCSR* hiopMatrixSparseCSRCUDA::times_mat_alloc(const hiopMatrixSp M->use_sparse_mat_descriptor(mat_descrM); return M; -} +} // M = X*D*Y, where X is `this`. M is mxn, X is mxK and Y is Kxn -void hiopMatrixSparseCSRCUDA::times_mat_symbolic(hiopMatrixSparseCSR& M_in, - const hiopMatrixSparseCSR& Y_in) const +void hiopMatrixSparseCSRCUDA::times_mat_symbolic(hiopMatrixSparseCSR& M_in, const hiopMatrixSparseCSR& Y_in) const { auto& M = dynamic_cast(M_in); auto& Y = dynamic_cast(Y_in); auto& X = *this; - auto cret = cudaMemset(M.values_, 0x0, M.nnz_*sizeof(double)); + auto cret = cudaMemset(M.values_, 0x0, M.nnz_ * sizeof(double)); assert(cudaSuccess == cret); - + cusparseOperation_t opX = CUSPARSE_OPERATION_NON_TRANSPOSE; cusparseOperation_t opY = CUSPARSE_OPERATION_NON_TRANSPOSE; - //inquire size + // inquire size size_t buff_size5 = 0; auto st = cusparseSpGEMMreuse_copy(h_cusparse_, opX, @@ -829,10 +784,10 @@ void hiopMatrixSparseCSRCUDA::times_mat_symbolic(hiopMatrixSparseCSR& M_in, nullptr); assert(st == CUSPARSE_STATUS_SUCCESS); - //allocate buffer5 + // allocate buffer5 auto* buffer_gemm5 = M.alloc_gemm_buffer5(buff_size5); - - //the actual call + + // the actual call st = cusparseSpGEMMreuse_copy(h_cusparse_, opX, opY, @@ -845,7 +800,7 @@ void hiopMatrixSparseCSRCUDA::times_mat_symbolic(hiopMatrixSparseCSR& M_in, buffer_gemm5); assert(st == CUSPARSE_STATUS_SUCCESS); - //buffer3 not needed anymore + // buffer3 not needed anymore M.dealloc_gemm_buffer3(); } @@ -859,11 +814,11 @@ void hiopMatrixSparseCSRCUDA::times_mat_numeric(double beta, auto& Y = dynamic_cast(Y_in); auto& X = *this; - if(beta==0.0) { - auto cret = cudaMemset(M.values_, 0x0, M.nnz_*sizeof(double)); + if(beta == 0.0) { + auto cret = cudaMemset(M.values_, 0x0, M.nnz_ * sizeof(double)); assert(cudaSuccess == cret); } - + cusparseOperation_t opX = CUSPARSE_OPERATION_NON_TRANSPOSE; cusparseOperation_t opY = CUSPARSE_OPERATION_NON_TRANSPOSE; cudaDataType compute_type = CUDA_R_64F; @@ -884,9 +839,9 @@ void hiopMatrixSparseCSRCUDA::times_mat_numeric(double beta, void hiopMatrixSparseCSRCUDA::form_from_symbolic(const hiopMatrixSparseTriplet& M) { - if(M.m()!=nrows_ || M.n()!=ncols_ || M.numberOfNonzeros()!=nnz_) { + if(M.m() != nrows_ || M.n() != ncols_ || M.numberOfNonzeros() != nnz_) { dealloc(); - + nrows_ = M.m(); ncols_ = M.n(); nnz_ = M.numberOfNonzeros(); @@ -894,34 +849,29 @@ void hiopMatrixSparseCSRCUDA::form_from_symbolic(const hiopMatrixSparseTriplet& alloc(); } - assert(nnz_>=0); - if(nnz_<=0) { + assert(nnz_ >= 0); + if(nnz_ <= 0) { return; } - + assert(irowptr_); assert(jcolind_); assert(values_); - //transfer coo/triplet to device - int* d_rowind=nullptr; + // transfer coo/triplet to device + int* d_rowind = nullptr; d_rowind = exec_space_.alloc_array(nnz_); assert(d_rowind); exec_space_.copy(d_rowind, M.i_row(), nnz_, M.exec_space_); - //use cuda API - cusparseStatus_t st = cusparseXcoo2csr(h_cusparse_, - d_rowind, - nnz_, - nrows_, - irowptr_, - CUSPARSE_INDEX_BASE_ZERO); + // use cuda API + cusparseStatus_t st = cusparseXcoo2csr(h_cusparse_, d_rowind, nnz_, nrows_, irowptr_, CUSPARSE_INDEX_BASE_ZERO); assert(CUSPARSE_STATUS_SUCCESS == st); exec_space_.dealloc_array(d_rowind); - - //j indexes can be just transfered - cudaMemcpy(jcolind_, M.j_col(), nnz_*sizeof(index_type), cudaMemcpyHostToDevice); + + // j indexes can be just transfered + cudaMemcpy(jcolind_, M.j_col(), nnz_ * sizeof(index_type), cudaMemcpyHostToDevice); } void hiopMatrixSparseCSRCUDA::form_from_numeric(const hiopMatrixSparseTriplet& M) @@ -931,7 +881,7 @@ void hiopMatrixSparseCSRCUDA::form_from_numeric(const hiopMatrixSparseTriplet& M assert(ncols_ == M.n()); assert(nnz_ == M.numberOfNonzeros()); - cudaMemcpy(values_, M.M(), nnz_*sizeof(double), cudaMemcpyHostToDevice); + cudaMemcpy(values_, M.M(), nnz_ * sizeof(double), cudaMemcpyHostToDevice); } void hiopMatrixSparseCSRCUDA::form_transpose_from_symbolic(const hiopMatrixSparseTriplet& M) @@ -946,9 +896,9 @@ void hiopMatrixSparseCSRCUDA::form_transpose_from_numeric(const hiopMatrixSparse void hiopMatrixSparseCSRCUDA::form_transpose_from_symbolic(const hiopMatrixSparseCSR& M) { - if(M.m()!=ncols_ || M.n()!=nrows_ || M.numberOfNonzeros()!=nnz_) { + if(M.m() != ncols_ || M.n() != nrows_ || M.numberOfNonzeros() != nnz_) { dealloc(); - + nrows_ = M.n(); ncols_ = M.m(); nnz_ = M.numberOfNonzeros(); @@ -956,11 +906,11 @@ void hiopMatrixSparseCSRCUDA::form_transpose_from_symbolic(const hiopMatrixSpars alloc(); } - assert(nnz_>=0); - if(nnz_<=0) { + assert(nnz_ >= 0); + if(nnz_ <= 0) { return; } - + assert(irowptr_); assert(jcolind_); assert(values_); @@ -978,12 +928,12 @@ void hiopMatrixSparseCSRCUDA::form_transpose_from_symbolic(const hiopMatrixSpars irowptr_, jcolind_, CUDA_R_64F, - CUSPARSE_ACTION_SYMBOLIC, + CUSPARSE_ACTION_SYMBOLIC, CUSPARSE_INDEX_BASE_ZERO, CUSPARSE_CSR2CSC_ALG1, &buffer_size); assert(CUSPARSE_STATUS_SUCCESS == st); - cudaError_t ret = cudaMalloc(&buffer_csc2csr_, sizeof(char)*buffer_size); + cudaError_t ret = cudaMalloc(&buffer_csc2csr_, sizeof(char) * buffer_size); assert(cudaSuccess == ret); } @@ -1014,13 +964,12 @@ void hiopMatrixSparseCSRCUDA::form_transpose_from_numeric(const hiopMatrixSparse assert(CUSPARSE_STATUS_SUCCESS == st); } - void hiopMatrixSparseCSRCUDA::form_diag_from_symbolic(const hiopVector& D) { const int m = D.get_size(); - if(m!=ncols_ || m!=nrows_ || m!=nnz_) { + if(m != ncols_ || m != nrows_ || m != nnz_) { dealloc(); - + nrows_ = m; ncols_ = m; nnz_ = m; @@ -1035,19 +984,16 @@ void hiopMatrixSparseCSRCUDA::form_diag_from_symbolic(const hiopVector& D) void hiopMatrixSparseCSRCUDA::form_diag_from_numeric(const hiopVector& D) { - assert(D.get_size()==ncols_ && D.get_size()==nrows_ && D.get_size()==nnz_); + assert(D.get_size() == ncols_ && D.get_size() == nrows_ && D.get_size() == nnz_); assert(irowptr_ && jcolind_ && values_); assert(dynamic_cast(&D) && "input vector must be CUDA"); - cudaError_t ret = cudaMemcpy(values_, - D.local_data_const(), - nrows_*sizeof(double), - cudaMemcpyDeviceToDevice); + cudaError_t ret = cudaMemcpy(values_, D.local_data_const(), nrows_ * sizeof(double), cudaMemcpyDeviceToDevice); assert(cudaSuccess == ret); } -///Column scaling or right multiplication by a diagonal: `this`=`this`*D +/// Column scaling or right multiplication by a diagonal: `this`=`this`*D void hiopMatrixSparseCSRCUDA::scale_cols(const hiopVector& D) { assert(false && "work in progress"); @@ -1058,7 +1004,7 @@ void hiopMatrixSparseCSRCUDA::scale_cols(const hiopVector& D) void hiopMatrixSparseCSRCUDA::scale_rows(const hiopVector& D) { assert(nrows_ == D.get_size()); - + assert(dynamic_cast(&D) && "input vector must be CUDA"); hiop::cuda::csr_scalerows_kernel(nrows_, @@ -1076,16 +1022,16 @@ hiopMatrixSparseCSR* hiopMatrixSparseCSRCUDA::add_matrix_alloc(const hiopMatrixS { auto& Y = dynamic_cast(Y_in); auto& X = *this; - + assert(nrows_ == Y.m()); assert(ncols_ == Y.n()); - + cusparseStatus_t st; cudaError_t cret; - - double alpha = 1.0; //dummy + + double alpha = 1.0; // dummy double beta = 1.0; - size_t buffer_size; + size_t buffer_size; // // create a (dummy) math descriptor @@ -1097,10 +1043,10 @@ hiopMatrixSparseCSR* hiopMatrixSparseCSRCUDA::add_matrix_alloc(const hiopMatrixS cusparseMatDescr_t mat_descrM; st = cusparseCreateMatDescr(&mat_descrM); assert(st == CUSPARSE_STATUS_SUCCESS); - + int* irowptrM = nullptr; - cret = cudaMalloc((void**)&irowptrM, sizeof(int)*(nrows_+1)); - assert(cudaSuccess==cret); + cret = cudaMalloc((void**)&irowptrM, sizeof(int) * (nrows_ + 1)); + assert(cudaSuccess == cret); assert(irowptrM); // get size of buffer needed internally @@ -1120,16 +1066,16 @@ hiopMatrixSparseCSR* hiopMatrixSparseCSRCUDA::add_matrix_alloc(const hiopMatrixS Y.irowptr_, Y.jcolind_, mat_descrM, - NULL,//valuesM, + NULL, // valuesM, irowptrM, - NULL,//jcolindM, + NULL, // jcolindM, &buffer_size); assert(CUSPARSE_STATUS_SUCCESS == st); - - //prepare and allocate buffer + + // prepare and allocate buffer void* buffer_geam2; - cret = cudaMalloc((void**)& buffer_geam2, sizeof(char)*buffer_size); - assert(cudaSuccess==cret); + cret = cudaMalloc((void**)&buffer_geam2, sizeof(char) * buffer_size); + assert(cudaSuccess == cret); assert(buffer_geam2); int nnzM; @@ -1151,35 +1097,33 @@ hiopMatrixSparseCSR* hiopMatrixSparseCSRCUDA::add_matrix_alloc(const hiopMatrixS assert(CUSPARSE_STATUS_SUCCESS == st); - //mat descriptor not needed anymore + // mat descriptor not needed anymore st = cusparseDestroyMatDescr(mat_descrM); assert(st == CUSPARSE_STATUS_SUCCESS); - hiopMatrixSparseCSRCUDA* M = new hiopMatrixSparseCSRCUDA(nrows_, ncols_, nnzM); - //play it safe and copy (instead of switching pointers) - cret = cudaMemcpy(M->irowptr_, (void*)irowptrM, (nrows_+1)*sizeof(int), cudaMemcpyDeviceToDevice); - assert(cudaSuccess==cret); + // play it safe and copy (instead of switching pointers) + cret = cudaMemcpy(M->irowptr_, (void*)irowptrM, (nrows_ + 1) * sizeof(int), cudaMemcpyDeviceToDevice); + assert(cudaSuccess == cret); cret = cudaFree(irowptrM); - assert(cudaSuccess==cret); - - //have the buffer_geam2 stay with M - assert(nullptr==M->buffer_geam2_); + assert(cudaSuccess == cret); + + // have the buffer_geam2 stay with M + assert(nullptr == M->buffer_geam2_); M->buffer_geam2_ = buffer_geam2; buffer_geam2 = nullptr; - + return M; } /** - * Computes sparsity pattern of M = X+Y (i.e., populates the row pointers and + * Computes sparsity pattern of M = X+Y (i.e., populates the row pointers and * column indexes arrays) of `M`. * */ -void hiopMatrixSparseCSRCUDA:: -add_matrix_symbolic(hiopMatrixSparseCSR& M_in, const hiopMatrixSparseCSR& Y_in) const +void hiopMatrixSparseCSRCUDA::add_matrix_symbolic(hiopMatrixSparseCSR& M_in, const hiopMatrixSparseCSR& Y_in) const { auto& M = dynamic_cast(M_in); auto& Y = dynamic_cast(Y_in); @@ -1191,7 +1135,7 @@ add_matrix_symbolic(hiopMatrixSparseCSR& M_in, const hiopMatrixSparseCSR& Y_in) assert(M.m() == Y.m()); // - //nothing to do for this CUDA, geam2-based implementation + // nothing to do for this CUDA, geam2-based implementation // } @@ -1214,7 +1158,7 @@ void hiopMatrixSparseCSRCUDA::add_matrix_numeric(hiopMatrixSparseCSR& M_in, auto& X = *this; assert(M.buffer_geam2_); - + cusparseStatus_t st; st = cusparseDcsrgeam2(h_cusparse_, nrows_, @@ -1271,6 +1215,6 @@ bool hiopMatrixSparseCSRCUDA::check_csr_is_ordered() return mat_h.check_csr_is_ordered(); } -} //end of namespace +} // namespace hiop -#endif //#ifdef HIOP_USE_CUDA +#endif // #ifdef HIOP_USE_CUDA diff --git a/src/LinAlg/hiopMatrixSparseCsrCuda.hpp b/src/LinAlg/hiopMatrixSparseCsrCuda.hpp index ef29e8a50..5cf73fb29 100644 --- a/src/LinAlg/hiopMatrixSparseCsrCuda.hpp +++ b/src/LinAlg/hiopMatrixSparseCsrCuda.hpp @@ -58,7 +58,7 @@ #include "ExecSpace.hpp" -#ifdef HIOP_USE_CUDA +#ifdef HIOP_USE_CUDA #include #include @@ -79,8 +79,8 @@ namespace hiop * @brief Sparse matrix of doubles in compressed row format for use on CUDA GPUs. Data * is not (memory, MPI) distributed. * - * @note The methods of this class expect and maintains unique and ordered column indexes - * within the same row. + * @note The methods of this class expect and maintains unique and ordered column indexes + * within the same row. * * Note: most of the methods are not implemented (TODO) as this is work in progress (wip). */ @@ -101,16 +101,16 @@ class hiopMatrixSparseCSRCUDA : public hiopMatrixSparseCSR * code that will be removed in the future. */ void copy_to(hiopMatrixSparseCSRSeq& src); - + virtual void copy_to(hiopMatrixDense& W); virtual void copyRowsFrom(const hiopMatrix& src, const index_type* rows_idxs, size_type n_rows); - virtual void timesVec(double beta, hiopVector& y, double alpha, const hiopVector& x) const; - virtual void timesVec(double beta, double* y, double alpha, const double* x) const; + virtual void timesVec(double beta, hiopVector& y, double alpha, const hiopVector& x) const; + virtual void timesVec(double beta, double* y, double alpha, const double* x) const; - virtual void transTimesVec(double beta, hiopVector& y, double alpha, const hiopVector& x) const; - virtual void transTimesVec(double beta, double* y, double alpha, const double* x) const; + virtual void transTimesVec(double beta, hiopVector& y, double alpha, const hiopVector& x) const; + virtual void transTimesVec(double beta, double* y, double alpha, const double* x) const; virtual void timesMat(double beta, hiopMatrix& W, double alpha, const hiopMatrix& X) const; @@ -121,7 +121,7 @@ class hiopMatrixSparseCSRCUDA : public hiopMatrixSparseCSR virtual void addDiagonal(const double& alpha, const hiopVector& d_); virtual void addDiagonal(const double& value); virtual void addSubDiagonal(const double& alpha, index_type start, const hiopVector& d_); - + /* add to the diagonal of 'this' (destination) starting at 'start_on_dest_diag' elements of * 'd_' (source) starting at index 'start_on_src_vec'. The number of elements added is 'num_elems' * when num_elems>=0, or the remaining elems on 'd_' starting at 'start_on_src_vec'. */ @@ -129,7 +129,7 @@ class hiopMatrixSparseCSRCUDA : public hiopMatrixSparseCSR const double& alpha, const hiopVector& d_, index_type start_on_src_vec, - int num_elems=-1) + int num_elems = -1) { assert(false && "not needed / implemented"); } @@ -139,18 +139,18 @@ class hiopMatrixSparseCSRCUDA : public hiopMatrixSparseCSR } /* Add to the diagonal of 'this' (destination) starting at 'start_on_dest_diag' elements of - * 'd_' (source) starting at index 'start_on_src_vec'. The number of elements added is 'num_elems', - * scaled by 'scal' - */ + * 'd_' (source) starting at index 'start_on_src_vec'. The number of elements added is 'num_elems', + * scaled by 'scal' + */ virtual void copySubDiagonalFrom(const index_type& start_on_dest_diag, const size_type& num_elems, const hiopVector& d_, const index_type& start_on_nnz_idx, - double scal=1.0); + double scal = 1.0); /* add constant 'c' to the diagonal of 'this' (destination) starting at 'start_on_dest_diag' elements. - * The number of elements added is 'num_elems' - */ + * The number of elements added is 'num_elems' + */ virtual void setSubDiagonalTo(const index_type& start_on_dest_diag, const size_type& num_elems, const double& c, @@ -163,16 +163,12 @@ class hiopMatrixSparseCSRCUDA : public hiopMatrixSparseCSR index_type col_dest_start, double alpha, hiopMatrixDense& W) const; - virtual void addUpperTriangleToSymDenseMatrixUpperTriangle(index_type diag_start, - double alpha, - hiopMatrixDense& W) const + virtual void addUpperTriangleToSymDenseMatrixUpperTriangle(index_type diag_start, double alpha, hiopMatrixDense& W) const { assert(false && "not yet implemented"); } - virtual void addUpperTriangleToSymSparseMatrixUpperTriangle(index_type diag_start, - double alpha, - hiopMatrixSparse& W) const + virtual void addUpperTriangleToSymSparseMatrixUpperTriangle(index_type diag_start, double alpha, hiopMatrixSparse& W) const { assert(false && "not yet implemented"); } @@ -183,7 +179,7 @@ class hiopMatrixSparseCSRCUDA : public hiopMatrixSparseCSR */ virtual void addMDinvMtransToDiagBlockOfSymDeMatUTri(index_type rowCol_dest_start, const double& alpha, - const hiopVector& D, + const hiopVector& D, hiopMatrixDense& W) const; /* Block of W += alpha * M * D^{-1} * transpose(N), where M=this @@ -209,32 +205,32 @@ class hiopMatrixSparseCSRCUDA : public hiopMatrixSparseCSR const size_type& dest_nnz_st); /** - * @brief Copy matrix 'src_gen', into 'this' as a submatrix from corner 'dest_row_st' and 'dest_col_st' - * The non-zero elements start from 'dest_nnz_st' will be replaced by the new elements. - * When `offdiag_only` is set to true, only the off-diagonal part of `src_gen` is copied. - * - * @pre 'this' must have enough rows and cols after row 'dest_row_st' and col 'dest_col_st' - * @pre 'dest_nnz_st' + the number of non-zeros in the copied matrix must be less or equal to - * this->numOfNumbers() - * @pre User must know the nonzero pattern of src and dest matrices. The method assumes - * that non-zero patterns does not change between calls and that 'src_gen' is a valid - * submatrix of 'this' - * @pre This function does NOT preserve the sorted row/col indices. USE WITH CAUTION! - */ + * @brief Copy matrix 'src_gen', into 'this' as a submatrix from corner 'dest_row_st' and 'dest_col_st' + * The non-zero elements start from 'dest_nnz_st' will be replaced by the new elements. + * When `offdiag_only` is set to true, only the off-diagonal part of `src_gen` is copied. + * + * @pre 'this' must have enough rows and cols after row 'dest_row_st' and col 'dest_col_st' + * @pre 'dest_nnz_st' + the number of non-zeros in the copied matrix must be less or equal to + * this->numOfNumbers() + * @pre User must know the nonzero pattern of src and dest matrices. The method assumes + * that non-zero patterns does not change between calls and that 'src_gen' is a valid + * submatrix of 'this' + * @pre This function does NOT preserve the sorted row/col indices. USE WITH CAUTION! + */ virtual void copySubmatrixFrom(const hiopMatrix& src_gen, const index_type& dest_row_st, const index_type& dest_col_st, const size_type& dest_nnz_st, const bool offdiag_only = false); - + /** - * @brief Copy the transpose of matrix 'src_gen', into 'this' as a submatrix from corner - * 'dest_row_st' and 'dest_col_st'. - * The non-zero elements start from 'dest_nnz_st' will be replaced by the new elements. - * When `offdiag_only` is set to true, only the off-diagonal part of `src_gen` is copied. - * - * @pre This function does NOT preserve the sorted row/col indices. USE WITH CAUTION! - */ + * @brief Copy the transpose of matrix 'src_gen', into 'this' as a submatrix from corner + * 'dest_row_st' and 'dest_col_st'. + * The non-zero elements start from 'dest_nnz_st' will be replaced by the new elements. + * When `offdiag_only` is set to true, only the off-diagonal part of `src_gen` is copied. + * + * @pre This function does NOT preserve the sorted row/col indices. USE WITH CAUTION! + */ virtual void copySubmatrixFromTrans(const hiopMatrix& src_gen, const index_type& dest_row_st, const index_type& dest_col_st, @@ -242,12 +238,12 @@ class hiopMatrixSparseCSRCUDA : public hiopMatrixSparseCSR const bool offdiag_only = false); /** - * @brief Copy selected columns of a diagonal matrix (a constant 'scalar' times identity), - * into 'this' as a submatrix from corner 'dest_row_st' and 'dest_col_st' - * The non-zero elements start from 'dest_nnz_st' will be replaced by the new elements. - * @pre The diagonal entries in the destination need to be contiguous in the sparse triplet arrays of the destinations. - * @pre this function does NOT preserve the sorted row/col indices. USE WITH CAUTION! - */ + * @brief Copy selected columns of a diagonal matrix (a constant 'scalar' times identity), + * into 'this' as a submatrix from corner 'dest_row_st' and 'dest_col_st' + * The non-zero elements start from 'dest_nnz_st' will be replaced by the new elements. + * @pre The diagonal entries in the destination need to be contiguous in the sparse triplet arrays of the destinations. + * @pre this function does NOT preserve the sorted row/col indices. USE WITH CAUTION! + */ virtual void setSubmatrixToConstantDiag_w_colpattern(const double& scalar, const index_type& dest_row_st, const index_type& dest_col_st, @@ -256,12 +252,12 @@ class hiopMatrixSparseCSRCUDA : public hiopMatrixSparseCSR const hiopVector& ix); /** - * @brief Copy selected rows of a diagonal matrix (a constant 'scalar' times identity), - * into 'this' as a submatrix from corner 'dest_row_st' and 'dest_col_st' - * The non-zero elements start from 'dest_nnz_st' will be replaced by the new elements. - * @pre The diagonal entries in the destination need to be contiguous in the sparse triplet arrays of the destinations. - * @pre this function does NOT preserve the sorted row/col indices. USE WITH CAUTION! - */ + * @brief Copy selected rows of a diagonal matrix (a constant 'scalar' times identity), + * into 'this' as a submatrix from corner 'dest_row_st' and 'dest_col_st' + * The non-zero elements start from 'dest_nnz_st' will be replaced by the new elements. + * @pre The diagonal entries in the destination need to be contiguous in the sparse triplet arrays of the destinations. + * @pre this function does NOT preserve the sorted row/col indices. USE WITH CAUTION! + */ virtual void setSubmatrixToConstantDiag_w_rowpattern(const double& scalar, const index_type& dest_row_st, const index_type& dest_col_st, @@ -270,46 +266,46 @@ class hiopMatrixSparseCSRCUDA : public hiopMatrixSparseCSR const hiopVector& ix); /** - * @brief Copy a diagonal matrix to destination. - * This diagonal matrix is 'src_val'*identity matrix with size 'nnz_to_copy'x'nnz_to_copy'. - * The destination is updated from the start row 'row_dest_st' and start column 'col_dest_st'. USE WITH CAUTION! - */ + * @brief Copy a diagonal matrix to destination. + * This diagonal matrix is 'src_val'*identity matrix with size 'nnz_to_copy'x'nnz_to_copy'. + * The destination is updated from the start row 'row_dest_st' and start column 'col_dest_st'. USE WITH CAUTION! + */ virtual void copyDiagMatrixToSubblock(const double& src_val, const index_type& dest_row_st, const index_type& dest_col_st, const size_type& dest_nnz_st, - const size_type &nnz_to_copy); - - /** - * @brief same as @copyDiagMatrixToSubblock, but copies only diagonal entries specified by `pattern`. - * At the destination, 'nnz_to_copy` nonzeros starting from index `dest_nnz_st` will be replaced. - * @pre The added entries in the destination need to be contiguous in the sparse triplet arrays of the destinations. - * @pre This function does NOT preserve the sorted row/col indices. USE WITH CAUTION! - * @pre 'pattern' has same size as `x`. - * @pre 'pattern` has exactly `nnz_to_copy` nonzeros. - */ + const size_type& nnz_to_copy); + + /** + * @brief same as @copyDiagMatrixToSubblock, but copies only diagonal entries specified by `pattern`. + * At the destination, 'nnz_to_copy` nonzeros starting from index `dest_nnz_st` will be replaced. + * @pre The added entries in the destination need to be contiguous in the sparse triplet arrays of the destinations. + * @pre This function does NOT preserve the sorted row/col indices. USE WITH CAUTION! + * @pre 'pattern' has same size as `x`. + * @pre 'pattern` has exactly `nnz_to_copy` nonzeros. + */ virtual void copyDiagMatrixToSubblock_w_pattern(const hiopVector& x, const index_type& dest_row_st, const index_type& dest_col_st, const size_type& dest_nnz_st, - const size_type &nnz_to_copy, + const size_type& nnz_to_copy, const hiopVector& pattern); virtual double max_abs_value(); - virtual void row_max_abs_value(hiopVector &ret_vec); - - virtual void scale_row(hiopVector &vec_scal, const bool inv_scale=false); + virtual void row_max_abs_value(hiopVector& ret_vec); + + virtual void scale_row(hiopVector& vec_scal, const bool inv_scale = false); virtual bool isfinite() const; - virtual void print(FILE* f=nullptr, const char* msg=nullptr, int maxRows=-1, int maxCols=-1, int rank=-1) const; + virtual void print(FILE* f = nullptr, const char* msg = nullptr, int maxRows = -1, int maxCols = -1, int rank = -1) const; virtual void startingAtAddSubDiagonalToStartingAt(index_type diag_src_start, const double& alpha, hiopVector& vec_dest, index_type vec_start, - size_type num_elems=-1) const + size_type num_elems = -1) const { assert(0 && "not implemented; should be used only for symmetric matrices."); } @@ -349,41 +345,20 @@ class hiopMatrixSparseCSRCUDA : public hiopMatrixSparseCSR virtual hiopMatrixSparse* alloc_clone() const; virtual hiopMatrixSparse* new_copy() const; - inline index_type* i_row() - { - return irowptr_; - } - inline index_type* j_col() - { - return jcolind_; - } - inline double* M() - { - return values_; - } - inline const index_type* i_row() const - { - return irowptr_; - } - inline const index_type* j_col() const - { - return jcolind_; - } - inline const double* M() const - { - return values_; - } + inline index_type* i_row() { return irowptr_; } + inline index_type* j_col() { return jcolind_; } + inline double* M() { return values_; } + inline const index_type* i_row() const { return irowptr_; } + inline const index_type* j_col() const { return jcolind_; } + inline const double* M() const { return values_; } #ifdef HIOP_DEEPCHECKS - virtual bool assertSymmetry(double tol=1e-16) const + virtual bool assertSymmetry(double tol = 1e-16) const { assert(false && "not yet implemented"); return false; } - virtual bool checkIndexesAreOrdered() const - { - return true; - } + virtual bool checkIndexesAreOrdered() const { return true; } #endif /** @@ -402,57 +377,57 @@ class hiopMatrixSparseCSRCUDA : public hiopMatrixSparseCSR virtual void set_diagonal(const double& val); /** - * Allocates a CSR matrix capable of storing the multiplication result of M = X*Y, where X + * Allocates a CSR matrix capable of storing the multiplication result of M = X*Y, where X * is the calling matrix class (`this`) and Y is the `Y` argument of the method. * * @note Should be used in conjunction with `times_mat_symbolic` and `times_mat_numeric` - * + * * @pre The dimensions of the matrices should be consistent with the multiplication. - * + * */ hiopMatrixSparseCSR* times_mat_alloc(const hiopMatrixSparseCSR& Y) const; - + /** * Computes sparsity pattern, meaning computes row pointers and column indexes of `M`, - * of M = X*Y, where X is the calling matrix class (`this`) and Y is the second argument. + * of M = X*Y, where X is the calling matrix class (`this`) and Y is the second argument. * * @note The output matrix `M` will have unique and ordered column indexes (with the same * row) * * @note Specializations of this class may only be able to compute the sparsity pattern in - * tandem with the numerical multiplications (for example, because of API limitations). - * In this cases, the `times_mat_numeric` will take over sparsity computations and the + * tandem with the numerical multiplications (for example, because of API limitations). + * In this cases, the `times_mat_numeric` will take over sparsity computations and the * arrays with row pointers and column indexes may be uninitialized after this call. - * + * * @pre The dimensions of the matrices should be consistent with the multiplication. - * + * * @pre The column indexes within the same row must be unique and ordered for `Y`. - * - * @pre The internal arrays of `M` should have enough storage to hold the sparsity - * pattern (row pointers and column indexes) and values of the multiplication result. + * + * @pre The internal arrays of `M` should have enough storage to hold the sparsity + * pattern (row pointers and column indexes) and values of the multiplication result. * This preallocation can be done by calling `times_mat_alloc` prior to this method. - * + * */ - void times_mat_symbolic(hiopMatrixSparseCSR& M, const hiopMatrixSparseCSR& Y) const; + void times_mat_symbolic(hiopMatrixSparseCSR& M, const hiopMatrixSparseCSR& Y) const; /** * Computes (numerical values of) M = beta*M + alpha*X*D*Y, where X is the calling matrix * class (`this`), beta and alpha are scalars passed as arguments, and M and Y are matrices * of appropriate sizes passed as arguments. * - * @note Generally, only the nonzero values of the input/output argument `M` are updated + * @note Generally, only the nonzero values of the input/output argument `M` are updated * since the sparsity pattern (row pointers and column indexes) of `M` should have been * already computed by `times_mat_symbolic`. Some specializations of this method may be - * restricted to performing both phases in inside this method. + * restricted to performing both phases in inside this method. * * @pre The dimensions of the matrices should be consistent with the multiplication. * * @pre The column indexes within the same row must be unique and ordered both for input * matrices and result matrix `M`. * - * @pre The indexes arrays of `this`, `Y`, and `M` should not have changed since the + * @pre The indexes arrays of `this`, `Y`, and `M` should not have changed since the * last call to `times_diag_times_mat`. - * + * * Example of usage: * //initially allocate and compute M * auto* M = X.times_mat_alloc(Y); @@ -461,12 +436,9 @@ class hiopMatrixSparseCSRCUDA : public hiopMatrixSparseCSR * ... calculations .... * //if only nonzero entries of X and Y have changed, call the fast multiplication routine * X.times_mat_numeric(0.0, M, 1.0, Y); - * + * */ - void times_mat_numeric(double beta, - hiopMatrixSparseCSR& M, - double alpha, - const hiopMatrixSparseCSR& Y); + void times_mat_numeric(double beta, hiopMatrixSparseCSR& M, double alpha, const hiopMatrixSparseCSR& Y); /// @brief Column scaling or right multiplication by a diagonal: `this`=`this`*D void scale_cols(const hiopVector& D); @@ -474,11 +446,10 @@ class hiopMatrixSparseCSRCUDA : public hiopMatrixSparseCSR /// @brief Row scaling or left multiplication by a diagonal: `this`=D*`this` void scale_rows(const hiopVector& D); - /** - * Allocates and populates the sparsity pattern of `this` as the CSR representation + * Allocates and populates the sparsity pattern of `this` as the CSR representation * of the triplet matrix `M`. - * + * * @pre The input argument should have the nonzeros sorted by row and then by column * indexes. */ @@ -487,7 +458,7 @@ class hiopMatrixSparseCSRCUDA : public hiopMatrixSparseCSR /** * Copies the numerical values of the triplet matrix M into the CSR matrix `this` * - * @pre The sparsity pattern (row pointers and column indexes arrays) of `this` should be + * @pre The sparsity pattern (row pointers and column indexes arrays) of `this` should be * allocated and populated, possibly by a previous call to `form_from_symbolic` * * @pre The input argument should have the nonzeros sorted by row and then by column @@ -495,72 +466,72 @@ class hiopMatrixSparseCSRCUDA : public hiopMatrixSparseCSR */ //// note: only device cuda memcpy void form_from_numeric(const hiopMatrixSparseTriplet& M); - + /** - * Allocates and populates the sparsity pattern of `this` as the CSR representation + * Allocates and populates the sparsity pattern of `this` as the CSR representation * of transpose of the triplet matrix `M`. - * + * * @pre The input argument should have the nonzeros sorted by row and then by column * indexes. */ void form_transpose_from_symbolic(const hiopMatrixSparseTriplet& M); - /** - * Allocates and populates the sparsity pattern of `this` as the CSR representation + /** + * Allocates and populates the sparsity pattern of `this` as the CSR representation * of transpose of the CSR matrix `M`. - * + * * @pre The input argument should have the column indexes sorted and unique within a row. */ virtual void form_transpose_from_symbolic(const hiopMatrixSparseCSR& M); - + /** * Copies the numerical values of the transpose of the CSR matrix M into the CSR matrix `this`. * - * @pre The sparsity pattern (row pointers and column indexes arrays) of `this` should be + * @pre The sparsity pattern (row pointers and column indexes arrays) of `this` should be * allocated and populated, possibly by a previous call to `form_transpose_from_symbolic` * * @pre The input argument should have the column indexes sorted and unique within a row. - */ + */ virtual void form_transpose_from_numeric(const hiopMatrixSparseCSR& M); - + /** - * Copies the numerical values of the transpose of the triplet matrix M into the + * Copies the numerical values of the transpose of the triplet matrix M into the * CSR matrix `this` * - * @pre The sparsity pattern (row pointers and column indexes arrays) of `this` should be + * @pre The sparsity pattern (row pointers and column indexes arrays) of `this` should be * allocated and populated, possibly by a previous call to `form_transpose_from_symbolic` * * @pre The input argument should have the nonzeros sorted by row and then by column * indexes. - */ + */ void form_transpose_from_numeric(const hiopMatrixSparseTriplet& M); /** * Forms `this` as a diagonal matrix with diagonal entries given by D. */ void form_diag_from_symbolic(const hiopVector& D); - + /** * Sets the diagonal entries of `this` equal to entries of D - * + * * @pre Length of `D` should be equal to size(s) of `this` - * + * * @pre `this` should be a diagonal matrix (in CSR format) with storage for * all the diagonal entries, which can be ensured by calling the sister method * `form_diag_from_symbolic` */ void form_diag_from_numeric(const hiopVector& D); - + /** - * Allocates and returns CSR matrix `M` capable of holding M = X+Y, where X is + * Allocates and returns CSR matrix `M` capable of holding M = X+Y, where X is * the calling matrix class (`this`) and Y is the argument passed to the method. */ hiopMatrixSparseCSR* add_matrix_alloc(const hiopMatrixSparseCSR& Y) const; /** - * Computes sparsity pattern of M = X+Y (i.e., populates the row pointers and + * Computes sparsity pattern of M = X+Y (i.e., populates the row pointers and * column indexes arrays) of `M`. X is `this`. - * + * * @pre `this` and `Y` should hold matrices of identical dimensions. * */ @@ -569,33 +540,29 @@ class hiopMatrixSparseCSRCUDA : public hiopMatrixSparseCSR /** * Performs matrix addition M = alpha*X + beta*Y numerically, where * X is `this` and alpha and beta are scalars. - * + * * @pre `M`, `this` and `Y` should hold matrices of identical dimensions. - * - * @pre `M` and `X+Y` should have identical sparsity pattern, namely the + * + * @pre `M` and `X+Y` should have identical sparsity pattern, namely the * `add_matrix_symbolic` should have been called previously. * */ - void add_matrix_numeric(hiopMatrixSparseCSR& M, - double alpha, - const hiopMatrixSparseCSR& Y, - double beta) const; + void add_matrix_numeric(hiopMatrixSparseCSR& M, double alpha, const hiopMatrixSparseCSR& Y, double beta) const; - /** Performs a quick check and returns false if the CSR indexes are not ordered. - * + /** Performs a quick check and returns false if the CSR indexes are not ordered. + * * Should be used with caution, for example only under HIOP_DEEPCHECKS or for debugging purposes - * because it is a computationally intensive method for GPU implementations as transfers the - * matrix data from device to host. + * because it is a computationally intensive method for GPU implementations as transfers the + * matrix data from device to host. */ bool check_csr_is_ordered(); - + ///////////////////////////////////////////////////////////////////// // end of new CSR-specific methods ///////////////////////////////////////////////////////////////////// //(re)setters for internals protected: - /** Set the sparse gemm descriptor that was used before the instantiation of this class. This class * takes ownership of the input descriptor/pointer. */ @@ -615,7 +582,7 @@ class hiopMatrixSparseCSRCUDA : public hiopMatrixSparseCSR assert(st == CUSPARSE_STATUS_SUCCESS); mat_sp_descr_ = sp_mat_descr; - //set pointers since the input descriptor does not have the values_ pointer + // set pointers since the input descriptor does not have the values_ pointer st = cusparseCsrSetPointers(mat_sp_descr_, irowptr_, jcolind_, values_); assert(st == CUSPARSE_STATUS_SUCCESS); } @@ -623,8 +590,8 @@ class hiopMatrixSparseCSRCUDA : public hiopMatrixSparseCSR ///@brief Set and take ownwership of the buffer inline void set_gemm_buffer3(void* buff_in) { - //should not be previously allocated - assert(buffer_gemm3_==nullptr); + // should not be previously allocated + assert(buffer_gemm3_ == nullptr); //`this` takes ownership of the pointer buffer_gemm3_ = buff_in; } @@ -639,8 +606,8 @@ class hiopMatrixSparseCSRCUDA : public hiopMatrixSparseCSR ///@brief Set and take ownwership of the buffer inline void set_gemm_buffer4(void* buff_in) { - //should not be previously allocated - assert(buffer_gemm4_==nullptr); + // should not be previously allocated + assert(buffer_gemm4_ == nullptr); //`this` takes ownership of the pointer buffer_gemm4_ = buff_in; } @@ -653,14 +620,16 @@ class hiopMatrixSparseCSRCUDA : public hiopMatrixSparseCSR assert(cudaSuccess == cret); return buffer_gemm5_; } + private: void alloc(); void dealloc(); + protected: - ExecSpace exec_space_; - + ExecSpace exec_space_; + //// inherits nrows_, ncols_, and nnz_ from hiopSparseMatrix - + /// Row pointers (starting indexes) in the column and values arrays index_type* irowptr_; @@ -670,21 +639,21 @@ class hiopMatrixSparseCSRCUDA : public hiopMatrixSparseCSR /// Nonzero values double* values_; - ///Internal buffer used by transpose/csr2csc (allocated on demand) + /// Internal buffer used by transpose/csr2csc (allocated on demand) void* buffer_csc2csr_; - ///Internal buffer used by add_matrix/geam2 + /// Internal buffer used by add_matrix/geam2 void* buffer_geam2_; - ///Internal buffer used by times_mat/SpGEMMreuse + /// Internal buffer used by times_mat/SpGEMMreuse void* buffer_gemm3_; - ///Internal buffer used by times_mat/SpGEMMreuse + /// Internal buffer used by times_mat/SpGEMMreuse void* buffer_gemm4_; - ///Internal buffer used by times_mat/SpGEMMreuse + /// Internal buffer used by times_mat/SpGEMMreuse void* buffer_gemm5_; - + /// Internal handle required by cuSPARSE functions cusparseHandle_t h_cusparse_; @@ -696,12 +665,12 @@ class hiopMatrixSparseCSRCUDA : public hiopMatrixSparseCSR /// Internal cuSPARSE gemm descriptor cusparseSpGEMMDescr_t gemm_sp_descr_; + private: hiopMatrixSparseCSRCUDA(const hiopMatrixSparseCSRCUDA&) = delete; }; +} // namespace hiop -} //end of namespace - -#endif //#ifdef HIOP_USE_CUDA +#endif // #ifdef HIOP_USE_CUDA #endif diff --git a/src/LinAlg/hiopMatrixSparseTriplet.cpp b/src/LinAlg/hiopMatrixSparseTriplet.cpp index 6b81c4f04..34a7369fa 100644 --- a/src/LinAlg/hiopMatrixSparseTriplet.cpp +++ b/src/LinAlg/hiopMatrixSparseTriplet.cpp @@ -3,15 +3,15 @@ #include "hiop_blasdefs.hpp" -#include //for std::min -#include //for std::isfinite +#include //for std::min +#include //for std::isfinite #include #include #include #include #include -#include // std::cout, std::fixed -#include // std::setprecision +#include // std::cout, std::fixed +#include // std::setprecision #include "MemBackendCppImpl.hpp" #include "hiopCppStdUtils.hpp" @@ -19,43 +19,38 @@ namespace hiop { hiopMatrixSparseTriplet::hiopMatrixSparseTriplet(int rows, int cols, int nnz) - : hiopMatrixSparse(rows, cols, nnz) - , row_starts_(NULL) + : hiopMatrixSparse(rows, cols, nnz), + row_starts_(NULL) { - if(rows==0 || cols==0) { - assert(nnz_==0 && "number of nonzeros must be zero when any of the dimensions are 0"); + if(rows == 0 || cols == 0) { + assert(nnz_ == 0 && "number of nonzeros must be zero when any of the dimensions are 0"); nnz_ = 0; } - iRow_ = new int[nnz_]; + iRow_ = new int[nnz_]; jCol_ = new int[nnz_]; values_ = new double[nnz_]; } hiopMatrixSparseTriplet::~hiopMatrixSparseTriplet() { - delete [] iRow_; - delete [] jCol_; - delete [] values_; + delete[] iRow_; + delete[] jCol_; + delete[] values_; delete row_starts_; } void hiopMatrixSparseTriplet::setToZero() { - for(int i=0; i(Wmat); const auto& M2 = dynamic_cast(M2mat); const hiopMatrixSparseTriplet& M1 = *this; const int m1 = M1.nrows_, nx = M1.ncols_, m2 = M2.nrows_; - assert(nx==M1.ncols_); - assert(nx==M2.ncols_); + assert(nx == M1.ncols_); + assert(nx == M2.ncols_); assert(M2.ncols_ == nx); - assert(m1==W.m()); - assert(m2==W.n()); - + assert(m1 == W.m()); + assert(m2 == W.n()); + double* WM = W.local_data(); auto n_W = W.n(); - + // TODO: allocAndBuildRowStarts -> should create row_starts internally (name='prepareRowStarts' ?) - if(M1.row_starts_==NULL) M1.row_starts_ = M1.allocAndBuildRowStarts(); + if(M1.row_starts_ == NULL) M1.row_starts_ = M1.allocAndBuildRowStarts(); assert(M1.row_starts_); - if(M2.row_starts_==NULL) M2.row_starts_ = M2.allocAndBuildRowStarts(); + if(M2.row_starts_ == NULL) M2.row_starts_ = M2.allocAndBuildRowStarts(); assert(M2.row_starts_); double acc; - for(int i=0; i=i - for(int j=0; jidx_start_[i]; - index_type kj=M2.row_starts_->idx_start_[j]; + index_type ki = M1.row_starts_->idx_start_[i]; + index_type kj = M2.row_starts_->idx_start_[j]; - while(kiidx_start_[i+1] && kjidx_start_[j+1]) { - assert(kiidx_start_[i + 1] && kj < M2.row_starts_->idx_start_[j + 1]) { + assert(ki < M1.nnz_); + assert(kj < M2.nnz_); if(M1.jCol_[ki] == M2.jCol_[kj]) { - // same col, so multiply and increment + // same col, so multiply and increment acc += M1.values_[ki] * M2.values_[kj]; ki++; kj++; } else { - if(M1.jCol_[ki](d_in); - assert(num_elems<=vd.get_size()); - assert(start_on_dest_diag>=0 && start_on_dest_diag+num_elems<=this->nrows_); + assert(num_elems <= vd.get_size()); + assert(start_on_dest_diag >= 0 && start_on_dest_diag + num_elems <= this->nrows_); const double* v = vd.local_data_const(); - for(auto row_src=0; row_srcvalues_[nnz_dest] = scal*v[row_src]; + this->values_[nnz_dest] = scal * v[row_src]; } } @@ -237,61 +207,61 @@ void hiopMatrixSparseTriplet::setSubDiagonalTo(const index_type& start_on_dest_d const double& c, const index_type& start_on_nnz_idx) { - assert(start_on_dest_diag>=0 && start_on_dest_diag+num_elems<=this->nrows_); + assert(start_on_dest_diag >= 0 && start_on_dest_diag + num_elems <= this->nrows_); - for(auto row_src=0; row_srcvalues_[nnz_dest] = c; } } -void hiopMatrixSparseTriplet::addMatrix(double alpha, const hiopMatrix& X) -{ - assert(false && "not needed"); -} +void hiopMatrixSparseTriplet::addMatrix(double alpha, const hiopMatrix& X) { assert(false && "not needed"); } /* block of W += alpha*transpose(this) * Note W; contains only the upper triangular entries */ -void hiopMatrixSparseTriplet:: -transAddToSymDenseMatrixUpperTriangle(int row_start, int col_start, - double alpha, hiopMatrixDense& W) const +void hiopMatrixSparseTriplet::transAddToSymDenseMatrixUpperTriangle(int row_start, + int col_start, + double alpha, + hiopMatrixDense& W) const { - assert(row_start>=0 && row_start+ncols_<=W.m()); - assert(col_start>=0 && col_start+nrows_<=W.n()); - assert(W.n()==W.m()); + assert(row_start >= 0 && row_start + ncols_ <= W.m()); + assert(col_start >= 0 && col_start + nrows_ <= W.n()); + assert(W.n() == W.m()); int m_W = W.m(); double* WM = W.local_data(); - for(int it=0; it=0 && j>=0); - assert(i<=j && "source entries need to map inside the upper triangular part of destination"); - //WM[i][j] += alpha*values_[it]; - WM[i*m_W+j] += alpha*values_[it]; + for(int it = 0; it < nnz_; it++) { + const int i = jCol_[it] + row_start; + const int j = iRow_[it] + col_start; + assert(i < W.m() && j < W.n()); + assert(i >= 0 && j >= 0); + assert(i <= j && "source entries need to map inside the upper triangular part of destination"); + // WM[i][j] += alpha*values_[it]; + WM[i * m_W + j] += alpha * values_[it]; } } double hiopMatrixSparseTriplet::max_abs_value() { - char norm='M'; int one=1; + char norm = 'M'; + int one = 1; double maxv = DLANGE(&norm, &one, &nnz_, values_, &one, NULL); return maxv; } -void hiopMatrixSparseTriplet::row_max_abs_value(hiopVector &ret_vec) +void hiopMatrixSparseTriplet::row_max_abs_value(hiopVector& ret_vec) { assert(ret_vec.get_local_size() == nrows_); hiopVectorPar& yy = dynamic_cast(ret_vec); yy.setToZero(); - + double* y_data = yy.local_data(); - - for(int it=0; it(vec_scal); + hiopVectorPar& vscal = dynamic_cast(vec_scal); double* vd = vscal.local_data(); double scal; - - for(int it=0; itcheckIndexesAreOrdered()); #endif - for(int i=0; icheckIndexesAreOrdered()); #endif hiopMatrixSparseTriplet* copy = new hiopMatrixSparseTriplet(nrows_, ncols_, nnz_); - memcpy(copy->iRow_, iRow_, nnz_*sizeof(int)); - memcpy(copy->jCol_, jCol_, nnz_*sizeof(int)); - memcpy(copy->values_, values_, nnz_*sizeof(double)); + memcpy(copy->iRow_, iRow_, nnz_ * sizeof(int)); + memcpy(copy->jCol_, jCol_, nnz_ * sizeof(int)); + memcpy(copy->values_, values_, nnz_ * sizeof(double)); return copy; } void hiopMatrixSparseTriplet::copyFrom(const hiopMatrixSparse& dm) @@ -355,9 +321,9 @@ void hiopMatrixSparseTriplet::copyFrom(const hiopMatrixSparse& dm) void hiopMatrixSparseTriplet::copy_to(int* irow, int* jcol, double* val) { assert(irow && jcol && val); - memcpy(irow, iRow_, nnz_*sizeof(int)); - memcpy(jcol, jCol_, nnz_*sizeof(int)); - memcpy(val, values_, nnz_*sizeof(double)); + memcpy(irow, iRow_, nnz_ * sizeof(int)); + memcpy(jcol, jCol_, nnz_ * sizeof(int)); + memcpy(val, values_, nnz_ * sizeof(double)); } void hiopMatrixSparseTriplet::copy_to(hiopMatrixDense& W) @@ -367,111 +333,113 @@ void hiopMatrixSparseTriplet::copy_to(hiopMatrixDense& W) W.setToZero(); double* WM = W.local_data(); int n_W = W.n(); - - for(int k=0; knrows_; - assert(row_dest_start>=0 && row_dest_start+n<=W.m()); - assert(col_dest_start>=0 && col_dest_start+nrows_<=W.n()); + assert(row_dest_start >= 0 && row_dest_start + n <= W.m()); + assert(col_dest_start >= 0 && col_dest_start + nrows_ <= W.n()); assert(D.get_size() == this->ncols_); double* WM = W.local_data(); int m_W = W.m(); const double* DM = D.local_data_const(); - if(row_starts_==NULL) row_starts_ = allocAndBuildRowStarts(); + if(row_starts_ == NULL) row_starts_ = allocAndBuildRowStarts(); assert(row_starts_); double acc; - for(int i=0; inrows_; i++) { - //j==i + for(int i = 0; i < this->nrows_; i++) { + // j==i acc = 0.; - for(index_type k=row_starts_->idx_start_[i]; kidx_start_[i+1]; k++) + for(index_type k = row_starts_->idx_start_[i]; k < row_starts_->idx_start_[i + 1]; k++) acc += this->values_[k] / DM[this->jCol_[k]] * this->values_[k]; - //WM[i+row_dest_start][i+col_dest_start] += alpha*acc; - WM[(i+row_dest_start)*m_W+i+col_dest_start] += alpha*acc; + // WM[i+row_dest_start][i+col_dest_start] += alpha*acc; + WM[(i + row_dest_start) * m_W + i + col_dest_start] += alpha * acc; - //j>i - for(int j=i+1; jnrows_; j++) { - //dest[i,j] = weigthed_dotprod(this_row_i,this_row_j) + // j>i + for(int j = i + 1; j < this->nrows_; j++) { + // dest[i,j] = weigthed_dotprod(this_row_i,this_row_j) acc = 0.; - index_type ki=row_starts_->idx_start_[i], kj=row_starts_->idx_start_[j]; - while(kiidx_start_[i+1] && kjidx_start_[j+1]) { - assert(kinnz_); - assert(kjnnz_); + index_type ki = row_starts_->idx_start_[i], kj = row_starts_->idx_start_[j]; + while(ki < row_starts_->idx_start_[i + 1] && kj < row_starts_->idx_start_[j + 1]) { + assert(ki < this->nnz_); + assert(kj < this->nnz_); if(this->jCol_[ki] == this->jCol_[kj]) { acc += this->values_[ki] / DM[this->jCol_[ki]] * this->values_[kj]; ki++; kj++; } else { - if(this->jCol_[ki]jCol_[kj]) ki++; - else kj++; + if(this->jCol_[ki] < this->jCol_[kj]) + ki++; + else + kj++; } - } //end of loop over ki and kj - - //WM[i+row_dest_start][j+col_dest_start] += alpha*acc; - WM[(i+row_dest_start)*m_W + j+col_dest_start] += alpha*acc; - } //end j - } // end i + } // end of loop over ki and kj + // WM[i+row_dest_start][j+col_dest_start] += alpha*acc; + WM[(i + row_dest_start) * m_W + j + col_dest_start] += alpha * acc; + } // end j + } // end i } /* * block of W += alpha * M1 * D^{-1} * transpose(M2), where M1=this * Sizes: M1 is (m1 x nx); D is vector of len nx, M2 is (m2, nx) */ -void hiopMatrixSparseTriplet:: -addMDinvNtransToSymDeMatUTri(int row_dest_start, int col_dest_start, - const double& alpha, - const hiopVector& D, const hiopMatrixSparse& M2mat, - hiopMatrixDense& W) const +void hiopMatrixSparseTriplet::addMDinvNtransToSymDeMatUTri(int row_dest_start, + int col_dest_start, + const double& alpha, + const hiopVector& D, + const hiopMatrixSparse& M2mat, + hiopMatrixDense& W) const { const hiopMatrixSparseTriplet& M2 = dynamic_cast(M2mat); const hiopMatrixSparseTriplet& M1 = *this; const int m1 = M1.nrows_, nx = M1.ncols_, m2 = M2.nrows_; - assert(nx==M1.ncols_); - assert(nx==M2.ncols_); + assert(nx == M1.ncols_); + assert(nx == M2.ncols_); assert(D.get_size() == nx); assert(M2.ncols_ == nx); - //does it fit in W ? - assert(row_dest_start>=0 && row_dest_start+m1<=W.m()); - assert(col_dest_start>=0 && col_dest_start+m2<=W.n()); + // does it fit in W ? + assert(row_dest_start >= 0 && row_dest_start + m1 <= W.m()); + assert(col_dest_start >= 0 && col_dest_start + m2 <= W.n()); double* WM = W.local_data(); int m_W = W.m(); - + const double* DM = D.local_data_const(); // TODO: allocAndBuildRowStarts -> should create row_starts internally (name='prepareRowStarts' ?) - if(M1.row_starts_==NULL) M1.row_starts_ = M1.allocAndBuildRowStarts(); + if(M1.row_starts_ == NULL) M1.row_starts_ = M1.allocAndBuildRowStarts(); assert(M1.row_starts_); - if(M2.row_starts_==NULL) M2.row_starts_ = M2.allocAndBuildRowStarts(); + if(M2.row_starts_ == NULL) M2.row_starts_ = M2.allocAndBuildRowStarts(); assert(M2.row_starts_); double acc; @@ -487,67 +455,64 @@ addMDinvNtransToSymDeMatUTri(int row_dest_start, int col_dest_start, // thread execution time // // compressed row/col patterns? - for(int i=0; i=i - for(int j=0; jidx_start_[i]; - index_type kj=M2.row_starts_->idx_start_[j]; + index_type ki = M1.row_starts_->idx_start_[i]; + index_type kj = M2.row_starts_->idx_start_[j]; - while(kiidx_start_[i+1] && kjidx_start_[j+1]) { - assert(kiidx_start_[i + 1] && kj < M2.row_starts_->idx_start_[j + 1]) { + assert(ki < M1.nnz_); + assert(kj < M2.nnz_); if(M1.jCol_[ki] == M2.jCol_[kj]) { - acc += M1.values_[ki] / DM[this->jCol_[ki]] * M2.values_[kj]; ki++; kj++; } else { - if(M1.jCol_[ki] j+col_dest_start) + if(i + row_dest_start > j + col_dest_start) printf("[warning] lower triangular element updated in addMDinvNtransToSymDeMatUTri\n"); #endif - assert(i+row_dest_start <= j+col_dest_start); - //WM[i+row_dest_start][j+col_dest_start] += alpha*acc; - WM[(i+row_dest_start)*m_W + j+col_dest_start] += alpha*acc; - - } //end j - } // end i + assert(i + row_dest_start <= j + col_dest_start); + // WM[i+row_dest_start][j+col_dest_start] += alpha*acc; + WM[(i + row_dest_start) * m_W + j + col_dest_start] += alpha * acc; + } // end j + } // end i } - // //assumes triplets are ordered -hiopMatrixSparseTriplet::RowStartsInfo* -hiopMatrixSparseTriplet::allocAndBuildRowStarts() const +hiopMatrixSparseTriplet::RowStartsInfo* hiopMatrixSparseTriplet::allocAndBuildRowStarts() const { - assert(nrows_>=0); + assert(nrows_ >= 0); - RowStartsInfo* rsi = new RowStartsInfo(nrows_); assert(rsi); + RowStartsInfo* rsi = new RowStartsInfo(nrows_); + assert(rsi); - if(nrows_<=0) return rsi; + if(nrows_ <= 0) return rsi; - size_type it_triplet=0; - rsi->idx_start_[0]=0; - for(index_type i=1; i<=this->nrows_; i++) { + size_type it_triplet = 0; + rsi->idx_start_[0] = 0; + for(index_type i = 1; i <= this->nrows_; i++) { + rsi->idx_start_[i] = rsi->idx_start_[i - 1]; - rsi->idx_start_[i]=rsi->idx_start_[i-1]; - - while(it_tripletnnz_ && this->iRow_[it_triplet]==i-1) { + while(it_triplet < this->nnz_ && this->iRow_[it_triplet] == i - 1) { #ifdef HIOP_DEEPCHECKS - if(it_triplet>=1) { - assert(iRow_[it_triplet-1]<=iRow_[it_triplet] && "row indexes are not sorted"); - //assert(iCol[it_triplet-1]<=iCol[it_triplet]); - if(iRow_[it_triplet-1]==iRow_[it_triplet]) - assert(jCol_[it_triplet-1] < jCol_[it_triplet] && "col indexes are not sorted"); + if(it_triplet >= 1) { + assert(iRow_[it_triplet - 1] <= iRow_[it_triplet] && "row indexes are not sorted"); + // assert(iCol[it_triplet-1]<=iCol[it_triplet]); + if(iRow_[it_triplet - 1] == iRow_[it_triplet]) + assert(jCol_[it_triplet - 1] < jCol_[it_triplet] && "col indexes are not sorted"); } #endif rsi->idx_start_[i]++; @@ -555,13 +520,11 @@ hiopMatrixSparseTriplet::allocAndBuildRowStarts() const } assert(rsi->idx_start_[i] == it_triplet); } - assert(it_triplet==this->nnz_); + assert(it_triplet == this->nnz_); return rsi; } -void hiopMatrixSparseTriplet::copyRowsFrom(const hiopMatrix& src_gen, - const index_type* rows_idxs, - size_type n_rows) +void hiopMatrixSparseTriplet::copyRowsFrom(const hiopMatrix& src_gen, const index_type* rows_idxs, size_type n_rows) { const hiopMatrixSparseTriplet& src = dynamic_cast(src_gen); assert(this->m() == n_rows); @@ -573,37 +536,37 @@ void hiopMatrixSparseTriplet::copyRowsFrom(const hiopMatrix& src_gen, const int* jCol_src = src.j_col(); const double* values_src = src.M(); int nnz_src = src.numberOfNonzeros(); - int itnz_src=0; - int itnz_dest=0; - //int iterators should suffice - for(int row_dest=0; row_dest0) { - assert(iRow_src[itnz_src]>=iRow_src[itnz_src-1] && "row indexes are not sorted"); - if(iRow_src[itnz_src]==iRow_src[itnz_src-1]) - assert(jCol_src[itnz_src] >= jCol_src[itnz_src-1] && "col indexes are not sorted"); + if(itnz_src > 0) { + assert(iRow_src[itnz_src] >= iRow_src[itnz_src - 1] && "row indexes are not sorted"); + if(iRow_src[itnz_src] == iRow_src[itnz_src - 1]) + assert(jCol_src[itnz_src] >= jCol_src[itnz_src - 1] && "col indexes are not sorted"); } #endif ++itnz_src; } - while(itnz_src0) { - assert(iRow_src[itnz_src]>=iRow_src[itnz_src-1] && "row indexes are not sorted"); - if(iRow_src[itnz_src]==iRow_src[itnz_src-1]) - assert(jCol_src[itnz_src] >= jCol_src[itnz_src-1] && "col indexes are not sorted"); + if(itnz_src > 0) { + assert(iRow_src[itnz_src] >= iRow_src[itnz_src - 1] && "row indexes are not sorted"); + if(iRow_src[itnz_src] == iRow_src[itnz_src - 1]) + assert(jCol_src[itnz_src] >= jCol_src[itnz_src - 1] && "col indexes are not sorted"); } #endif - iRow_[itnz_dest] = row_dest;//iRow_src[itnz_src]; + iRow_[itnz_dest] = row_dest; // iRow_src[itnz_src]; jCol_[itnz_dest] = jCol_src[itnz_src]; values_[itnz_dest++] = values_src[itnz_src++]; - assert(itnz_dest<=nnz_); + assert(itnz_dest <= nnz_); } } assert(itnz_dest == nnz_); @@ -617,9 +580,9 @@ void hiopMatrixSparseTriplet::copyRowsFrom(const hiopMatrix& src_gen, * @pre 'this' must have exactly, or more cols than 'src' */ void hiopMatrixSparseTriplet::copyRowsBlockFrom(const hiopMatrix& src_gen, - const index_type& rows_src_idx_st, + const index_type& rows_src_idx_st, const size_type& n_rows, - const index_type& rows_dest_idx_st, + const index_type& rows_dest_idx_st, const size_type& dest_nnz_st) { const hiopMatrixSparse& src = dynamic_cast(src_gen); @@ -632,82 +595,83 @@ void hiopMatrixSparseTriplet::copyRowsBlockFrom(const hiopMatrix& src_gen, const int* jCol_src = src.j_col(); const double* values_src = src.M(); int nnz_src = src.numberOfNonzeros(); - int itnz_src=0; - int itnz_dest=dest_nnz_st; - //int iterators should suffice - for(auto row_add=0; row_add0) { - assert(iRow_src[itnz_src]>=iRow_src[itnz_src-1] && "row indexes are not sorted"); - if(iRow_src[itnz_src]==iRow_src[itnz_src-1]) - assert(jCol_src[itnz_src] >= jCol_src[itnz_src-1] && "col indexes are not sorted"); + if(itnz_src > 0) { + assert(iRow_src[itnz_src] >= iRow_src[itnz_src - 1] && "row indexes are not sorted"); + if(iRow_src[itnz_src] == iRow_src[itnz_src - 1]) + assert(jCol_src[itnz_src] >= jCol_src[itnz_src - 1] && "col indexes are not sorted"); } #endif ++itnz_src; } - while(itnz_src0) { - assert(iRow_src[itnz_src]>=iRow_src[itnz_src-1] && "row indexes are not sorted"); - if(iRow_src[itnz_src]==iRow_src[itnz_src-1]) - assert(jCol_src[itnz_src] >= jCol_src[itnz_src-1] && "col indexes are not sorted"); + if(itnz_src > 0) { + assert(iRow_src[itnz_src] >= iRow_src[itnz_src - 1] && "row indexes are not sorted"); + if(iRow_src[itnz_src] == iRow_src[itnz_src - 1]) + assert(jCol_src[itnz_src] >= jCol_src[itnz_src - 1] && "col indexes are not sorted"); } #endif - iRow_[itnz_dest] = row_dest;//iRow_src[itnz_src]; + iRow_[itnz_dest] = row_dest; // iRow_src[itnz_src]; jCol_[itnz_dest] = jCol_src[itnz_src]; values_[itnz_dest++] = values_src[itnz_src++]; - assert(itnz_dest<=nnz_); + assert(itnz_dest <= nnz_); } } } -void hiopMatrixSparseTriplet:: -copyDiagMatrixToSubblock(const double& src_val, - const index_type& dest_row_st, const index_type& col_dest_st, - const size_type& dest_nnz_st, const size_type &nnz_to_copy) +void hiopMatrixSparseTriplet::copyDiagMatrixToSubblock(const double& src_val, + const index_type& dest_row_st, + const index_type& col_dest_st, + const size_type& dest_nnz_st, + const size_type& nnz_to_copy) { - assert(this->numberOfNonzeros() >= nnz_to_copy+dest_nnz_st); + assert(this->numberOfNonzeros() >= nnz_to_copy + dest_nnz_st); assert(this->n() >= nnz_to_copy); assert(nnz_to_copy + dest_row_st <= this->m()); assert(nnz_to_copy + col_dest_st <= this->n()); - int itnz_dest=dest_nnz_st; - for(auto ele_add=0; ele_addnumberOfNonzeros() >= nnz_to_copy+dest_nnz_st); + assert(this->numberOfNonzeros() >= nnz_to_copy + dest_nnz_st); assert(this->n() >= nnz_to_copy); assert(nnz_to_copy + dest_row_st <= this->m()); assert(nnz_to_copy + dest_col_st <= this->n()); const hiopVectorPar& selected = dynamic_cast(ix); const hiopVectorPar& xx = dynamic_cast(dx); - const double *x=xx.local_data_const(), *pattern=selected.local_data_const(); + const double *x = xx.local_data_const(), *pattern = selected.local_data_const(); int dest_k = dest_nnz_st; int n = ix.get_local_size(); - int nnz_find=0; + int nnz_find = 0; - for(int i=0; i always print + int myrank_ = 0, numranks = 1; // this is a local object => always print - if(file==NULL) file = stdout; + if(file == NULL) file = stdout; - int max_elems = maxRows>=0 ? maxRows : nnz_; + int max_elems = maxRows >= 0 ? maxRows : nnz_; max_elems = std::min(max_elems, nnz_); - - if(myrank_==rank || rank==-1) { + + if(myrank_ == rank || rank == -1) { std::stringstream ss; - if(NULL==msg) { - if(numranks>1) { - //fprintf(file, - // "matrix of size %d %d and nonzeros %d, printing %d elems (on rank=%d)\n", - // m(), n(), numberOfNonzeros(), max_elems, myrank_); - ss << "matrix of size " << m() << " " << n() << " and nonzeros " - << numberOfNonzeros() << ", printing " << max_elems << " elems (on rank=" - << myrank_ << ")" << std::endl; + if(NULL == msg) { + if(numranks > 1) { + // fprintf(file, + // "matrix of size %d %d and nonzeros %d, printing %d elems (on rank=%d)\n", + // m(), n(), numberOfNonzeros(), max_elems, myrank_); + ss << "matrix of size " << m() << " " << n() << " and nonzeros " << numberOfNonzeros() << ", printing " << max_elems + << " elems (on rank=" << myrank_ << ")" << std::endl; } else { - ss << "matrix of size " << m() << " " << n() << " and nonzeros " - << numberOfNonzeros() << ", printing " << max_elems << " elems" << std::endl; + ss << "matrix of size " << m() << " " << n() << " and nonzeros " << numberOfNonzeros() << ", printing " << max_elems + << " elems" << std::endl; // fprintf(file, // "matrix of size %d %d and nonzeros %d, printing %d elems\n", // m(), n(), numberOfNonzeros(), max_elems); } } else { ss << msg << " "; - //fprintf(file, "%s ", msg); + // fprintf(file, "%s ", msg); } // using matlab indices - //fprintf(file, "iRow_=["); + // fprintf(file, "iRow_=["); ss << "iRow_=["; - for(int it=0; it(Jac_c); const auto& J_d = dynamic_cast(Jac_d); - + // assuming original Jac is sorted! int nnz_Jac_c = J_c.numberOfNonzeros(); int nnz_Jac_d = J_d.numberOfNonzeros(); @@ -805,43 +770,43 @@ void hiopMatrixSparseTriplet::set_Jac_FR(const hiopMatrixSparse& Jac_c, int n_d = J_d.n(); assert(n_c == n_d); - int nnz_Jac_c_new = nnz_Jac_c + 2*m_c; - int nnz_Jac_d_new = nnz_Jac_d + 2*m_d; + int nnz_Jac_c_new = nnz_Jac_c + 2 * m_c; + int nnz_Jac_d_new = nnz_Jac_d + 2 * m_d; assert(nnz_ == nnz_Jac_c_new + nnz_Jac_d_new); - - if(J_c.row_starts_ == nullptr){ + + if(J_c.row_starts_ == nullptr) { J_c.row_starts_ = J_c.allocAndBuildRowStarts(); } assert(J_c.row_starts_); - - if(J_d.row_starts_ == nullptr){ + + if(J_d.row_starts_ == nullptr) { J_d.row_starts_ = J_d.allocAndBuildRowStarts(); } assert(J_d.row_starts_); - + // extend Jac to the p and n parts --- sparsity if(iJacS != nullptr && jJacS != nullptr) { int k = 0; - + // Jac for c(x) - p + n const int* J_c_col = J_c.j_col(); for(int i = 0; i < m_c; ++i) { index_type k_base = J_c.row_starts_->idx_start_[i]; - + // copy from base Jac_c - while(k_base < J_c.row_starts_->idx_start_[i+1]) { + while(k_base < J_c.row_starts_->idx_start_[i + 1]) { iRow_[k] = iJacS[k] = i; jCol_[k] = jJacS[k] = J_c_col[k_base]; k++; k_base++; } - + // extra parts for p and n iRow_[k] = iJacS[k] = i; jCol_[k] = jJacS[k] = n_c + i; k++; - + iRow_[k] = iJacS[k] = i; jCol_[k] = jJacS[k] = n_c + m_c + i; k++; @@ -851,47 +816,47 @@ void hiopMatrixSparseTriplet::set_Jac_FR(const hiopMatrixSparse& Jac_c, const int* J_d_col = J_d.j_col(); for(int i = 0; i < m_d; ++i) { index_type k_base = J_d.row_starts_->idx_start_[i]; - + // copy from base Jac_d - while(k_base < J_d.row_starts_->idx_start_[i+1]) { + while(k_base < J_d.row_starts_->idx_start_[i + 1]) { iRow_[k] = iJacS[k] = i + m_c; jCol_[k] = jJacS[k] = J_d_col[k_base]; k++; k_base++; } - + // extra parts for p and n iRow_[k] = iJacS[k] = i + m_c; - jCol_[k] = jJacS[k] = n_d + 2*m_c + i; + jCol_[k] = jJacS[k] = n_d + 2 * m_c + i; k++; - + iRow_[k] = iJacS[k] = i + m_c; - jCol_[k] = jJacS[k] = n_d + 2*m_c + m_d + i; + jCol_[k] = jJacS[k] = n_d + 2 * m_c + m_d + i; k++; } assert(k == nnz_); } - + // extend Jac to the p and n parts --- element - if(MJacS != nullptr) { + if(MJacS != nullptr) { int k = 0; // Jac for c(x) - p + n const double* J_c_val = J_c.M(); for(int i = 0; i < m_c; ++i) { index_type k_base = J_c.row_starts_->idx_start_[i]; - + // copy from base Jac_c - while(k_base < J_c.row_starts_->idx_start_[i+1]) { + while(k_base < J_c.row_starts_->idx_start_[i + 1]) { values_[k] = MJacS[k] = J_c_val[k_base]; k++; k_base++; } - + // extra parts for p and n values_[k] = MJacS[k] = -1.0; k++; - values_[k] = MJacS[k] = 1.0; + values_[k] = MJacS[k] = 1.0; k++; } @@ -899,18 +864,18 @@ void hiopMatrixSparseTriplet::set_Jac_FR(const hiopMatrixSparse& Jac_c, const double* J_d_val = J_d.M(); for(int i = 0; i < m_d; ++i) { index_type k_base = J_d.row_starts_->idx_start_[i]; - + // copy from base Jac_d - while(k_base < J_d.row_starts_->idx_start_[i+1]) { + while(k_base < J_d.row_starts_->idx_start_[i + 1]) { values_[k] = MJacS[k] = J_d_val[k_base]; k++; k_base++; } - + // extra parts for p and n values_[k] = MJacS[k] = -1.0; k++; - values_[k] = MJacS[k] = 1.0; + values_[k] = MJacS[k] = 1.0; k++; } assert(k == nnz_); @@ -918,11 +883,10 @@ void hiopMatrixSparseTriplet::set_Jac_FR(const hiopMatrixSparse& Jac_c, } /********************************************************************************** - * Sparse symmetric matrix in triplet format. Only the lower triangle is stored - ********************************************************************************* -*/ -void hiopMatrixSymSparseTriplet::timesVec(double beta, hiopVector& y, - double alpha, const hiopVector& x ) const + * Sparse symmetric matrix in triplet format. Only the lower triangle is stored + ********************************************************************************* + */ +void hiopMatrixSymSparseTriplet::timesVec(double beta, hiopVector& y, double alpha, const hiopVector& x) const { assert(ncols_ == nrows_); assert(x.get_size() == ncols_); @@ -938,22 +902,20 @@ void hiopMatrixSymSparseTriplet::timesVec(double beta, hiopVector& y, } /** y = beta * y + alpha * this * x */ -void hiopMatrixSymSparseTriplet::timesVec(double beta, double* y, - double alpha, const double* x ) const +void hiopMatrixSymSparseTriplet::timesVec(double beta, double* y, double alpha, const double* x) const { assert(ncols_ == nrows_); // y:= beta*y - for (int i = 0; i < nrows_; i++) { + for(int i = 0; i < nrows_; i++) { y[i] *= beta; } // y += alpha*this*x - for (int i = 0; i < nnz_; i++) { + for(int i = 0; i < nnz_; i++) { assert(iRow_[i] < nrows_); assert(jCol_[i] < ncols_); y[iRow_[i]] += alpha * x[jCol_[i]] * values_[i]; - if(iRow_[i]!=jCol_[i]) - y[jCol_[i]] += alpha * x[iRow_[i]] * values_[i]; + if(iRow_[i] != jCol_[i]) y[jCol_[i]] += alpha * x[iRow_[i]] * values_[i]; } } @@ -966,9 +928,9 @@ hiopMatrixSparse* hiopMatrixSymSparseTriplet::new_copy() const { assert(nrows_ == ncols_); hiopMatrixSymSparseTriplet* copy = new hiopMatrixSymSparseTriplet(nrows_, nnz_); - memcpy(copy->iRow_, iRow_, nnz_*sizeof(int)); - memcpy(copy->jCol_, jCol_, nnz_*sizeof(int)); - memcpy(copy->values_, values_, nnz_*sizeof(double)); + memcpy(copy->iRow_, iRow_, nnz_ * sizeof(int)); + memcpy(copy->jCol_, jCol_, nnz_ * sizeof(int)); + memcpy(copy->values_, values_, nnz_ * sizeof(double)); return copy; } @@ -976,24 +938,25 @@ hiopMatrixSparse* hiopMatrixSymSparseTriplet::new_copy() const * @brief block of W += alpha*this * @note W contains only the upper triangular entries */ -void hiopMatrixSymSparseTriplet:: -addUpperTriangleToSymDenseMatrixUpperTriangle(int diag_start, - double alpha, hiopMatrixDense& W) const +void hiopMatrixSymSparseTriplet::addUpperTriangleToSymDenseMatrixUpperTriangle(int diag_start, + double alpha, + hiopMatrixDense& W) const { - assert(diag_start>=0 && diag_start+nrows_<=W.m()); - assert(diag_start+ncols_<=W.n()); - assert(W.n()==W.m()); + assert(diag_start >= 0 && diag_start + nrows_ <= W.m()); + assert(diag_start + ncols_ <= W.n()); + assert(W.n() == W.m()); const auto m_W = W.m(); double* WM = W.local_data(); - for(int it=0; it=0 && j>=0); - assert(i<=j && "symMatrices not aligned; source entries need to map inside the upper triangular part of destination"); - //WM[i][j] += alpha*values_[it]; - WM[i*m_W+j] += alpha*values_[it]; + for(int it = 0; it < nnz_; it++) { + assert(iRow_[it] <= jCol_[it] && "sparse symmetric matrices should contain only upper triangular entries"); + const int i = iRow_[it] + diag_start; + const int j = jCol_[it] + diag_start; + assert(i < W.m() && j < W.n()); + assert(i >= 0 && j >= 0); + assert(i <= j && "symMatrices not aligned; source entries need to map inside the upper triangular part of destination"); + // WM[i][j] += alpha*values_[it]; + WM[i * m_W + j] += alpha * values_[it]; } } @@ -1004,9 +967,10 @@ addUpperTriangleToSymDenseMatrixUpperTriangle(int diag_start, * @warning This method should not be called directly. * Use addUpperTriangleToSymDenseMatrixUpperTriangle instead. */ -void hiopMatrixSymSparseTriplet:: -transAddToSymDenseMatrixUpperTriangle(int row_start, int col_start, - double alpha, hiopMatrixDense& W) const +void hiopMatrixSymSparseTriplet::transAddToSymDenseMatrixUpperTriangle(int row_start, + int col_start, + double alpha, + hiopMatrixDense& W) const { assert(0 && "This method should not be called for symmetric matrices."); } @@ -1019,21 +983,21 @@ void hiopMatrixSymSparseTriplet::startingAtAddSubDiagonalToStartingAt(int diag_s const double& alpha, hiopVector& vec_dest, int vec_start, - int num_elems/*=-1*/) const + int num_elems /*=-1*/) const { hiopVectorPar& vd = dynamic_cast(vec_dest); - if(num_elems<0) num_elems = vd.get_size(); - assert(num_elems<=vd.get_size()); + if(num_elems < 0) num_elems = vd.get_size(); + assert(num_elems <= vd.get_size()); - assert(diag_src_start>=0 && diag_src_start+num_elems<=this->nrows_); + assert(diag_src_start >= 0 && diag_src_start + num_elems <= this->nrows_); double* v = vd.local_data(); - for(int itnz=0; itnz=diag_src_start && rowvalues_[itnz]; + if(row == jCol_[itnz]) { + if(row >= diag_src_start && row < diag_src_start + num_elems) { + assert(row + vec_start < vd.get_size()); + v[vec_start + row] += alpha * this->values_[itnz]; } } } @@ -1050,7 +1014,7 @@ void hiopMatrixSparseTriplet::copySubmatrixFrom(const hiopMatrix& src_gen, auto n_cols = src.n(); assert(this->numberOfNonzeros() >= src.numberOfNonzeros()); - assert(n_cols + dest_col_st <= this->n() ); + assert(n_cols + dest_col_st <= this->n()); assert(m_rows + dest_row_st <= this->m()); assert(dest_nnz_st <= this->numberOfNonzeros()); @@ -1062,7 +1026,7 @@ void hiopMatrixSparseTriplet::copySubmatrixFrom(const hiopMatrix& src_gen, // FIXME: irow and jcol only need to be assigned once; should we save a map for the indexes? for(auto src_k = 0; src_k < src_nnz; ++src_k) { - if(offdiag_only && src_iRow[src_k]==src_jCol[src_k]) { + if(offdiag_only && src_iRow[src_k] == src_jCol[src_k]) { continue; } iRow_[dest_k] = dest_row_st + src_iRow[src_k]; @@ -1084,7 +1048,7 @@ void hiopMatrixSparseTriplet::copySubmatrixFromTrans(const hiopMatrix& src_gen, auto n_cols = src.m(); assert(this->numberOfNonzeros() >= src.numberOfNonzeros()); - assert(n_cols + dest_col_st <= this->n() ); + assert(n_cols + dest_col_st <= this->n()); assert(m_rows + dest_row_st <= this->m()); assert(dest_nnz_st <= this->numberOfNonzeros()); @@ -1096,7 +1060,7 @@ void hiopMatrixSparseTriplet::copySubmatrixFromTrans(const hiopMatrix& src_gen, // FIXME: irow and jcol only need to be assigned once; should we save a map for the indexes? for(auto src_k = 0; src_k < src_nnz; ++src_k) { - if(offdiag_only && src_iRow[src_k]==src_jCol[src_k]) { + if(offdiag_only && src_iRow[src_k] == src_jCol[src_k]) { continue; } iRow_[dest_k] = dest_row_st + src_iRow[src_k]; @@ -1115,18 +1079,18 @@ void hiopMatrixSparseTriplet::setSubmatrixToConstantDiag_w_colpattern(const doub const hiopVector& ix) { assert(ix.get_local_size() + dest_row_st <= this->m()); - assert(nnz_to_copy + dest_col_st <= this->n() ); + assert(nnz_to_copy + dest_col_st <= this->n()); assert(dest_nnz_st + nnz_to_copy <= this->numberOfNonzeros()); - const hiopVectorPar& selected= dynamic_cast(ix); - const double *pattern=selected.local_data_const(); + const hiopVectorPar& selected = dynamic_cast(ix); + const double* pattern = selected.local_data_const(); int dest_k = dest_nnz_st; int n = ix.get_local_size(); - int nnz_find=0; + int nnz_find = 0; - for(int i=0; im()); - assert(ix.get_local_size() + dest_col_st <= this->n() ); + assert(ix.get_local_size() + dest_col_st <= this->n()); assert(dest_nnz_st + nnz_to_copy <= this->numberOfNonzeros()); - const hiopVectorPar& selected= dynamic_cast(ix); - const double *pattern = selected.local_data_const(); + const hiopVectorPar& selected = dynamic_cast(ix); + const double* pattern = selected.local_data_const(); int dest_k = dest_nnz_st; int n = ix.get_local_size(); - int nnz_find=0; + int nnz_find = 0; - for(int i=0; i& extra_diag_nnz_map) +void hiopMatrixSparseTriplet::convert_to_csr_arrays(int& csr_nnz, + int** csr_kRowPtr_in, + int** csr_jCol_in, + double** csr_kVal_in, + int** index_convert_CSR2Triplet_in, + int** index_convert_extra_Diag2CSR_in, + std::unordered_map& extra_diag_nnz_map) { - assert(*csr_kRowPtr_in==nullptr && *index_convert_CSR2Triplet_in==nullptr); + assert(*csr_kRowPtr_in == nullptr && *index_convert_CSR2Triplet_in == nullptr); int n = this->n(); int nnz = numberOfNonzeros(); - *csr_kRowPtr_in = new int[n+1]{}; + *csr_kRowPtr_in = new int[n + 1]{}; - int *csr_kRowPtr = *csr_kRowPtr_in; + int* csr_kRowPtr = *csr_kRowPtr_in; csr_nnz = 0; /* Transfer triplet form to CSR form - * - * Note that input is in lower triangular triplet form. First part is the sparse matrix, and the 2nd part containts additional - * diagonal elements. - */ - std::unordered_map extra_diag_nnz_map_temp; - int *diag_defined = new int[n]; + * + * Note that input is in lower triangular triplet form. First part is the sparse matrix, and the 2nd part containts + * additional diagonal elements. + */ + std::unordered_map extra_diag_nnz_map_temp; + int* diag_defined = new int[n]; // compute nnz in each row { - for(int i=0;i(csr_nnz+extra_diag_nnz_map_temp.size())==nnz); + assert(csr_nnz == csr_kRowPtr[n]); + assert(static_cast(csr_nnz + extra_diag_nnz_map_temp.size()) == nnz); *csr_kVal_in = new double[csr_nnz]; *csr_jCol_in = new int[csr_nnz]; } - double *csr_kVal = *csr_kVal_in; - int *csr_jCol = *csr_jCol_in; + double* csr_kVal = *csr_kVal_in; + int* csr_jCol = *csr_jCol_in; - int *index_convert_extra_Diag2CSR_temp = new int[n]; - int *nnz_each_row_tmp = new int[n]{}; + int* index_convert_extra_Diag2CSR_temp = new int[n]; + int* nnz_each_row_tmp = new int[n]{}; // set correct col index and value { *index_convert_CSR2Triplet_in = new int[csr_nnz]; *index_convert_extra_Diag2CSR_in = new int[n]; - int *index_convert_CSR2Triplet = *index_convert_CSR2Triplet_in; - int *index_convert_extra_Diag2CSR = *index_convert_extra_Diag2CSR_in; + int* index_convert_CSR2Triplet = *index_convert_CSR2Triplet_in; + int* index_convert_extra_Diag2CSR = *index_convert_extra_Diag2CSR_in; - for(int i=0;isecond]; index_convert_extra_Diag2CSR_temp[p->first] = nnz_tmp; } @@ -1289,33 +1250,34 @@ void hiopMatrixSparseTriplet::convert_to_csr_arrays(int &csr_nnz, } // correct the missing diagonal term and sort the nonzeros - for(int i=0; i ind_temp(csr_kRowPtr[i+1]-csr_kRowPtr[i]); + std::vector ind_temp(csr_kRowPtr[i + 1] - csr_kRowPtr[i]); std::iota(ind_temp.begin(), ind_temp.end(), 0); - std::sort(ind_temp.begin(), ind_temp.end(),[&](int a, int b){ return csr_jCol[a+csr_kRowPtr[i]]::iterator p = std::find(ind_temp.begin(),ind_temp.end(),old_nnz_in_row); - assert(p != ind_temp.end()); - int new_nnz_idx = (int) std::distance (ind_temp.begin(), p) + csr_kRowPtr[i]; - assert(new_nnz_idx>=0); + std::vector::iterator p = std::find(ind_temp.begin(), ind_temp.end(), old_nnz_in_row); + assert(p != ind_temp.end()); + int new_nnz_idx = (int)std::distance(ind_temp.begin(), p) + csr_kRowPtr[i]; + assert(new_nnz_idx >= 0); index_convert_extra_Diag2CSR[i] = new_nnz_idx; extra_diag_nnz_map[new_nnz_idx] = extra_diag_nnz_map_temp[i]; } - } + } } - delete [] nnz_each_row_tmp; - delete [] diag_defined; - delete [] index_convert_extra_Diag2CSR_temp; + delete[] nnz_each_row_tmp; + delete[] diag_defined; + delete[] index_convert_extra_Diag2CSR_temp; } /* sort by first row and then col */ @@ -1326,24 +1288,23 @@ void hiopMatrixSparseTriplet::sort() // sort the nonzeros std::vector ind_temp(nnz); std::iota(ind_temp.begin(), ind_temp.end(), 0); - std::sort(ind_temp.begin(), ind_temp.end(),[&](index_type i, index_type j) { - return (iRow_[i] != iRow_[j]) ? iRow_[i](diag_out); double* v_data = vec.local_data(); - + vec.setToZero(); - for(index_type itnnz=0; itnnz(Hess); // assuming original Hess is sorted, and in upper-triangle format @@ -1388,14 +1349,14 @@ void hiopMatrixSymSparseTriplet::set_Hess_FR(const hiopMatrixSparse& Hess, int m_h = Hess.m(); int n_h = Hess.n(); assert(n_h == m_h); - - // note that n_h can be zero, i.e., original hess is empty. + + // note that n_h can be zero, i.e., original hess is empty. // Hence we use add_diag.get_size() to detect the length of x in the base problem - int nnz_h_FR = add_diag.get_size() + Hess_base.numberOfOffDiagNonzeros() ; + int nnz_h_FR = add_diag.get_size() + Hess_base.numberOfOffDiagNonzeros(); assert(nnz_ == nnz_h_FR); - - if(Hess_base.row_starts_ == nullptr){ + + if(Hess_base.row_starts_ == nullptr) { Hess_base.row_starts_ = Hess_base.allocAndBuildRowStarts(); } assert(Hess_base.row_starts_); @@ -1404,27 +1365,27 @@ void hiopMatrixSymSparseTriplet::set_Hess_FR(const hiopMatrixSparse& Hess, // sparsity may change due to te new obj term zeta*DR^2.*(x-x_ref) if(iHSS != nullptr && jHSS != nullptr) { int k = 0; - + const int* Hess_row = Hess_base.i_row(); const int* Hess_col = Hess_base.j_col(); if(m_h > 0) { for(int i = 0; i < m_h; ++i) { index_type k_base = Hess_base.row_starts_->idx_start_[i]; - size_type nnz_in_row = Hess_base.row_starts_->idx_start_[i+1] - k_base; - + size_type nnz_in_row = Hess_base.row_starts_->idx_start_[i + 1] - k_base; + // insert diagonal entry due to the new obj term iRow_[k] = iHSS[k] = i; jCol_[k] = jHSS[k] = i; k++; - + if(nnz_in_row > 0 && Hess_row[k_base] == Hess_col[k_base]) { - // first nonzero in this row is a diagonal term + // first nonzero in this row is a diagonal term // skip it since we have already defined the diagonal nonezero k_base++; } // copy from base Hess - while(k_base < Hess_base.row_starts_->idx_start_[i+1]) { + while(k_base < Hess_base.row_starts_->idx_start_[i + 1]) { iRow_[k] = iHSS[k] = i; jCol_[k] = jHSS[k] = Hess_col[k_base]; k++; @@ -1436,17 +1397,17 @@ void hiopMatrixSymSparseTriplet::set_Hess_FR(const hiopMatrixSparse& Hess, for(int i = 0; i < add_diag.get_size(); ++i) { iRow_[k] = iHSS[k] = i; jCol_[k] = jHSS[k] = i; - k++; + k++; } } assert(k == nnz_); } - + // extend Hess to the p and n parts --- element - if(MHSS != nullptr) { + if(MHSS != nullptr) { int k = 0; - + const int* Hess_row = Hess_base.i_row(); const int* Hess_col = Hess_base.j_col(); const double* Hess_val = Hess_base.M(); @@ -1457,13 +1418,13 @@ void hiopMatrixSymSparseTriplet::set_Hess_FR(const hiopMatrixSparse& Hess, if(m_h > 0) { for(int i = 0; i < m_h; ++i) { index_type k_base = Hess_base.row_starts_->idx_start_[i]; - size_type nnz_in_row = Hess_base.row_starts_->idx_start_[i+1] - k_base; - + size_type nnz_in_row = Hess_base.row_starts_->idx_start_[i + 1] - k_base; + // add diagonal entry due to the new obj term values_[k] = MHSS[k] = diag_data[i]; - + if(nnz_in_row > 0 && Hess_row[k_base] == Hess_col[k_base]) { - // first nonzero in this row is a diagonal term + // first nonzero in this row is a diagonal term // add this element to the existing diag term values_[k] += Hess_val[k_base]; MHSS[k] = values_[k]; @@ -1472,7 +1433,7 @@ void hiopMatrixSymSparseTriplet::set_Hess_FR(const hiopMatrixSparse& Hess, k++; // copy off-diag entries from base Hess - while(k_base < Hess_base.row_starts_->idx_start_[i+1]) { + while(k_base < Hess_base.row_starts_->idx_start_[i + 1]) { values_[k] = MHSS[k] = Hess_val[k_base]; k++; k_base++; @@ -1482,13 +1443,11 @@ void hiopMatrixSymSparseTriplet::set_Hess_FR(const hiopMatrixSparse& Hess, // hess in the base problem is empty. just insert the new elements for(int i = 0; i < add_diag.get_size(); ++i) { values_[k] = MHSS[k] = diag_data[k]; - k++; - } + k++; + } } assert(k == nnz_); } } - -} //end of namespace - +} // namespace hiop diff --git a/src/LinAlg/hiopMatrixSparseTriplet.hpp b/src/LinAlg/hiopMatrixSparseTriplet.hpp index b617979a9..0eff4d8c9 100644 --- a/src/LinAlg/hiopMatrixSparseTriplet.hpp +++ b/src/LinAlg/hiopMatrixSparseTriplet.hpp @@ -37,11 +37,11 @@ class hiopMatrixSparseTriplet : public hiopMatrixSparse virtual void copyRowsFrom(const hiopMatrix& src, const index_type* rows_idxs, size_type n_rows); - virtual void timesVec(double beta, hiopVector& y, double alpha, const hiopVector& x) const; - virtual void timesVec(double beta, double* y, double alpha, const double* x) const; + virtual void timesVec(double beta, hiopVector& y, double alpha, const hiopVector& x) const; + virtual void timesVec(double beta, double* y, double alpha, const double* x) const; - virtual void transTimesVec(double beta, hiopVector& y, double alpha, const hiopVector& x) const; - virtual void transTimesVec(double beta, double* y, double alpha, const double* x) const; + virtual void transTimesVec(double beta, hiopVector& y, double alpha, const hiopVector& x) const; + virtual void transTimesVec(double beta, double* y, double alpha, const double* x) const; virtual void timesMat(double beta, hiopMatrix& W, double alpha, const hiopMatrix& X) const; @@ -52,7 +52,7 @@ class hiopMatrixSparseTriplet : public hiopMatrixSparse virtual void addDiagonal(const double& alpha, const hiopVector& d_); virtual void addDiagonal(const double& value); virtual void addSubDiagonal(const double& alpha, index_type start, const hiopVector& d_); - + /* add to the diagonal of 'this' (destination) starting at 'start_on_dest_diag' elements of * 'd_' (source) starting at index 'start_on_src_vec'. The number of elements added is 'num_elems' * when num_elems>=0, or the remaining elems on 'd_' starting at 'start_on_src_vec'. */ @@ -60,7 +60,7 @@ class hiopMatrixSparseTriplet : public hiopMatrixSparse const double& alpha, const hiopVector& d_, int start_on_src_vec, - int num_elems=-1) + int num_elems = -1) { assert(false && "not needed / implemented"); } @@ -70,17 +70,17 @@ class hiopMatrixSparseTriplet : public hiopMatrixSparse } /* add to the diagonal of 'this' (destination) starting at 'start_on_dest_diag' elements of - * 'd_' (source) starting at index 'start_on_src_vec'. The number of elements added is 'num_elems', scaled by 'scal' - */ + * 'd_' (source) starting at index 'start_on_src_vec'. The number of elements added is 'num_elems', scaled by 'scal' + */ virtual void copySubDiagonalFrom(const index_type& start_on_dest_diag, const size_type& num_elems, const hiopVector& d_, const index_type& start_on_nnz_idx, - double scal=1.0); + double scal = 1.0); /* add constant 'c' to the diagonal of 'this' (destination) starting at 'start_on_dest_diag' elements. - * The number of elements added is 'num_elems' - */ + * The number of elements added is 'num_elems' + */ virtual void setSubDiagonalTo(const index_type& start_on_dest_diag, const size_type& num_elems, const double& c, @@ -93,16 +93,12 @@ class hiopMatrixSparseTriplet : public hiopMatrixSparse int col_dest_start, double alpha, hiopMatrixDense& W) const; - virtual void addUpperTriangleToSymDenseMatrixUpperTriangle(int diag_start, - double alpha, - hiopMatrixDense& W) const + virtual void addUpperTriangleToSymDenseMatrixUpperTriangle(int diag_start, double alpha, hiopMatrixDense& W) const { assert(false && "counterpart method of hiopMatrixSymSparseTriplet should be used"); } - virtual void addUpperTriangleToSymSparseMatrixUpperTriangle(int diag_start, - double alpha, - hiopMatrixSparse& W) const + virtual void addUpperTriangleToSymSparseMatrixUpperTriangle(int diag_start, double alpha, hiopMatrixSparse& W) const { assert(false && "counterpart method of hiopMatrixSymSparseTriplet should be used"); } @@ -113,7 +109,7 @@ class hiopMatrixSparseTriplet : public hiopMatrixSparse */ virtual void addMDinvMtransToDiagBlockOfSymDeMatUTri(int rowCol_dest_start, const double& alpha, - const hiopVector& D, + const hiopVector& D, hiopMatrixDense& W) const; /* block of W += alpha * M * D^{-1} * transpose(N), where M=this @@ -139,32 +135,32 @@ class hiopMatrixSparseTriplet : public hiopMatrixSparse const size_type& dest_nnz_st); /** - * @brief Copy matrix 'src_gen', into 'this' as a submatrix from corner 'dest_row_st' and 'dest_col_st' - * The non-zero elements start from 'dest_nnz_st' will be replaced by the new elements. - * When `offdiag_only` is set to true, only the off-diagonal part of `src_gen` is copied. - * - * @pre 'this' must have enough rows and cols after row 'dest_row_st' and col 'dest_col_st' - * @pre 'dest_nnz_st' + the number of non-zeros in the copied matrix must be less or equal to - * this->numOfNumbers() - * @pre User must know the nonzero pattern of src and dest matrices. The method assumes - * that non-zero patterns does not change between calls and that 'src_gen' is a valid - * submatrix of 'this' - * @pre This function does NOT preserve the sorted row/col indices. USE WITH CAUTION! - */ + * @brief Copy matrix 'src_gen', into 'this' as a submatrix from corner 'dest_row_st' and 'dest_col_st' + * The non-zero elements start from 'dest_nnz_st' will be replaced by the new elements. + * When `offdiag_only` is set to true, only the off-diagonal part of `src_gen` is copied. + * + * @pre 'this' must have enough rows and cols after row 'dest_row_st' and col 'dest_col_st' + * @pre 'dest_nnz_st' + the number of non-zeros in the copied matrix must be less or equal to + * this->numOfNumbers() + * @pre User must know the nonzero pattern of src and dest matrices. The method assumes + * that non-zero patterns does not change between calls and that 'src_gen' is a valid + * submatrix of 'this' + * @pre This function does NOT preserve the sorted row/col indices. USE WITH CAUTION! + */ virtual void copySubmatrixFrom(const hiopMatrix& src_gen, const index_type& dest_row_st, const index_type& dest_col_st, const size_type& dest_nnz_st, const bool offdiag_only = false); - + /** - * @brief Copy the transpose of matrix 'src_gen', into 'this' as a submatrix from corner - * 'dest_row_st' and 'dest_col_st'. - * The non-zero elements start from 'dest_nnz_st' will be replaced by the new elements. - * When `offdiag_only` is set to true, only the off-diagonal part of `src_gen` is copied. - * - * @pre This function does NOT preserve the sorted row/col indices. USE WITH CAUTION! - */ + * @brief Copy the transpose of matrix 'src_gen', into 'this' as a submatrix from corner + * 'dest_row_st' and 'dest_col_st'. + * The non-zero elements start from 'dest_nnz_st' will be replaced by the new elements. + * When `offdiag_only` is set to true, only the off-diagonal part of `src_gen` is copied. + * + * @pre This function does NOT preserve the sorted row/col indices. USE WITH CAUTION! + */ virtual void copySubmatrixFromTrans(const hiopMatrix& src_gen, const index_type& dest_row_st, const index_type& dest_col_st, @@ -172,12 +168,12 @@ class hiopMatrixSparseTriplet : public hiopMatrixSparse const bool offdiag_only = false); /** - * @brief Copy selected columns of a diagonal matrix (a constant 'scalar' times identity), - * into 'this' as a submatrix from corner 'dest_row_st' and 'dest_col_st' - * The non-zero elements start from 'dest_nnz_st' will be replaced by the new elements. - * @pre The diagonal entries in the destination need to be contiguous in the sparse triplet arrays of the destinations. - * @pre this function does NOT preserve the sorted row/col indices. USE WITH CAUTION! - */ + * @brief Copy selected columns of a diagonal matrix (a constant 'scalar' times identity), + * into 'this' as a submatrix from corner 'dest_row_st' and 'dest_col_st' + * The non-zero elements start from 'dest_nnz_st' will be replaced by the new elements. + * @pre The diagonal entries in the destination need to be contiguous in the sparse triplet arrays of the destinations. + * @pre this function does NOT preserve the sorted row/col indices. USE WITH CAUTION! + */ virtual void setSubmatrixToConstantDiag_w_colpattern(const double& scalar, const index_type& dest_row_st, const index_type& dest_col_st, @@ -186,12 +182,12 @@ class hiopMatrixSparseTriplet : public hiopMatrixSparse const hiopVector& ix); /** - * @brief Copy selected rows of a diagonal matrix (a constant 'scalar' times identity), - * into 'this' as a submatrix from corner 'dest_row_st' and 'dest_col_st' - * The non-zero elements start from 'dest_nnz_st' will be replaced by the new elements. - * @pre The diagonal entries in the destination need to be contiguous in the sparse triplet arrays of the destinations. - * @pre this function does NOT preserve the sorted row/col indices. USE WITH CAUTION! - */ + * @brief Copy selected rows of a diagonal matrix (a constant 'scalar' times identity), + * into 'this' as a submatrix from corner 'dest_row_st' and 'dest_col_st' + * The non-zero elements start from 'dest_nnz_st' will be replaced by the new elements. + * @pre The diagonal entries in the destination need to be contiguous in the sparse triplet arrays of the destinations. + * @pre this function does NOT preserve the sorted row/col indices. USE WITH CAUTION! + */ virtual void setSubmatrixToConstantDiag_w_rowpattern(const double& scalar, const index_type& dest_row_st, const index_type& dest_col_st, @@ -200,62 +196,62 @@ class hiopMatrixSparseTriplet : public hiopMatrixSparse const hiopVector& ix); /** - * @brief Copy a diagonal matrix to destination. - * This diagonal matrix is 'src_val'*identity matrix with size 'nnz_to_copy'x'nnz_to_copy'. - * The destination is updated from the start row 'row_dest_st' and start column 'col_dest_st'. USE WITH CAUTION! - */ + * @brief Copy a diagonal matrix to destination. + * This diagonal matrix is 'src_val'*identity matrix with size 'nnz_to_copy'x'nnz_to_copy'. + * The destination is updated from the start row 'row_dest_st' and start column 'col_dest_st'. USE WITH CAUTION! + */ virtual void copyDiagMatrixToSubblock(const double& src_val, const index_type& dest_row_st, const index_type& dest_col_st, const size_type& dest_nnz_st, - const size_type &nnz_to_copy); - - /** - * @brief same as @copyDiagMatrixToSubblock, but copies only diagonal entries specified by `pattern`. - * At the destination, 'nnz_to_copy` nonzeros starting from index `dest_nnz_st` will be replaced. - * @pre The added entries in the destination need to be contiguous in the sparse triplet arrays of the destinations. - * @pre This function does NOT preserve the sorted row/col indices. USE WITH CAUTION! - * @pre 'pattern' has same size as `x`. - * @pre 'pattern` has exactly `nnz_to_copy` nonzeros. - */ + const size_type& nnz_to_copy); + + /** + * @brief same as @copyDiagMatrixToSubblock, but copies only diagonal entries specified by `pattern`. + * At the destination, 'nnz_to_copy` nonzeros starting from index `dest_nnz_st` will be replaced. + * @pre The added entries in the destination need to be contiguous in the sparse triplet arrays of the destinations. + * @pre This function does NOT preserve the sorted row/col indices. USE WITH CAUTION! + * @pre 'pattern' has same size as `x`. + * @pre 'pattern` has exactly `nnz_to_copy` nonzeros. + */ virtual void copyDiagMatrixToSubblock_w_pattern(const hiopVector& x, const index_type& dest_row_st, const index_type& dest_col_st, const size_type& dest_nnz_st, - const size_type &nnz_to_copy, + const size_type& nnz_to_copy, const hiopVector& pattern); virtual double max_abs_value(); - virtual void row_max_abs_value(hiopVector &ret_vec); - - virtual void scale_row(hiopVector &vec_scal, const bool inv_scale=false); + virtual void row_max_abs_value(hiopVector& ret_vec); + + virtual void scale_row(hiopVector& vec_scal, const bool inv_scale = false); virtual bool isfinite() const; - //virtual void print(int maxRows=-1, int maxCols=-1, int rank=-1) const; - virtual void print(FILE* f=NULL, const char* msg=NULL, int maxRows=-1, int maxCols=-1, int rank=-1) const; + // virtual void print(int maxRows=-1, int maxCols=-1, int rank=-1) const; + virtual void print(FILE* f = NULL, const char* msg = NULL, int maxRows = -1, int maxCols = -1, int rank = -1) const; virtual void startingAtAddSubDiagonalToStartingAt(int diag_src_start, const double& alpha, hiopVector& vec_dest, int vec_start, - int num_elems=-1) const + int num_elems = -1) const { assert(0 && "This method should be used only for symmetric matrices.\n"); } - /** - * Converts `this` to CSR sparse matrix 3-array representation. This method is intended for VERY specific uses and - * should NOT be used to convert general triplet matrices to general CSR matrices. + /** + * Converts `this` to CSR sparse matrix 3-array representation. This method is intended for VERY specific uses and + * should NOT be used to convert general triplet matrices to general CSR matrices. * - * `this` stores a KKT matrix with the diagonal nonzeros added at the end of the nonzeros array(s) (hence, the + * `this` stores a KKT matrix with the diagonal nonzeros added at the end of the nonzeros array(s) (hence, the * indexes may not be sorted). These diagonal entries may end up being duplicated. Upon conversion, on output, the - * CSR arrays are ordered, first by row indexes and, for a given row index, by column indexes. - * + * CSR arrays are ordered, first by row indexes and, for a given row index, by column indexes. + * * @pre `this` contains only the lower triangular part. * @pre The double (**) pointers should be null on entry. - * + * * @param `csr_nnz` output nnz for CSR * @param `csr_kRowPtr` output row pointers * @param `csr_jCol` output column pointers @@ -264,36 +260,40 @@ class hiopMatrixSparseTriplet : public hiopMatrixSparse * @param `indexes_extra_Diag2CSR` output array with mapping from the index on the diagonal into the nonzero CSR index * @param `extra_diag_nnz_map` output, maps from the CSR indexes of the diagonals into the triplet indexes of the * the diagonals. - * + * * @note Indexes of the sparse triplet representation of `this` are usually NOT ordered. - * + * * @note All double (**) pointers are allocated internally and should be deallocated by the calling code. * */ - virtual void convert_to_csr_arrays(int &csr_nnz, - int **csr_kRowPtr, - int **csr_jCol, - double **csr_vals, - int **indexes_CSR2Triplet, - int **indexes_extra_Diag2CSR, - std::unordered_map& extra_diag_nnz_map); + virtual void convert_to_csr_arrays(int& csr_nnz, + int** csr_kRowPtr, + int** csr_jCol, + double** csr_vals, + int** indexes_CSR2Triplet, + int** indexes_extra_Diag2CSR, + std::unordered_map& extra_diag_nnz_map); /* @brief sort the nonzeros from index `first` to `last`, by row and then by column. - * @pre assuming there is no duplicate nonzero element - * @remark member variables irow_, jcol_ and values_ will be recomputed - */ + * @pre assuming there is no duplicate nonzero element + * @remark member variables irow_, jcol_ and values_ will be recomputed + */ virtual void sort(); /* @brief check if `this` matrix is a diagonal matrix - */ + */ virtual bool is_diagonal() const; /* @brief extract the diagonals to vector `diag_out` - * @pre assuming `this` matrix is sorted by row and then by column - */ + * @pre assuming `this` matrix is sorted by row and then by column + */ virtual void extract_diagonal(hiopVector& diag_out) const; - virtual size_type numberOfOffDiagNonzeros() const {assert("not implemented"&&0);return 0;}; + virtual size_type numberOfOffDiagNonzeros() const + { + assert("not implemented" && 0); + return 0; + }; /// @brief extend base problem Jac to the Jac in feasibility problem virtual void set_Jac_FR(const hiopMatrixSparse& Jac_c, @@ -303,11 +303,10 @@ class hiopMatrixSparseTriplet : public hiopMatrixSparse double* MJacS); /// @brief extend base problem Hess to the Hess in feasibility problem - virtual void set_Hess_FR(const hiopMatrixSparse& Hess, - int* iHSS, - int* jHSS, - double* MHSS, - const hiopVector& add_diag) {assert("not implemented"&&0);} + virtual void set_Hess_FR(const hiopMatrixSparse& Hess, int* iHSS, int* jHSS, double* MHSS, const hiopVector& add_diag) + { + assert("not implemented" && 0); + } virtual hiopMatrixSparse* alloc_clone() const; virtual hiopMatrixSparse* new_copy() const; @@ -321,42 +320,48 @@ class hiopMatrixSparseTriplet : public hiopMatrixSparse inline const double* M() const { return values_; } #ifdef HIOP_DEEPCHECKS - virtual bool assertSymmetry(double tol=1e-16) const { return false; } + virtual bool assertSymmetry(double tol = 1e-16) const { return false; } virtual bool checkIndexesAreOrdered() const; #endif protected: friend class hiopMatrixSparseCSRCUDA; ExecSpace exec_space_; - int* iRow_; ///< row indices of the nonzero entries - int* jCol_; ///< column indices of the nonzero entries - double* values_; ///< values_ of the nonzero entries + int* iRow_; ///< row indices of the nonzero entries + int* jCol_; ///< column indices of the nonzero entries + double* values_; ///< values_ of the nonzero entries protected: struct RowStartsInfo { - index_type *idx_start_; //size num_rows+1 + index_type* idx_start_; // size num_rows+1 size_type num_rows_; RowStartsInfo() - : idx_start_(NULL), num_rows_(0) + : idx_start_(NULL), + num_rows_(0) {} RowStartsInfo(size_type n_rows) - : idx_start_(new index_type[n_rows+1]), num_rows_(n_rows) + : idx_start_(new index_type[n_rows + 1]), + num_rows_(n_rows) {} - virtual ~RowStartsInfo() - { - delete[] idx_start_; - } + virtual ~RowStartsInfo() { delete[] idx_start_; } }; mutable RowStartsInfo* row_starts_; + protected: RowStartsInfo* allocAndBuildRowStarts() const; + private: hiopMatrixSparseTriplet() - : hiopMatrixSparse(0, 0, 0), iRow_(NULL), jCol_(NULL), values_(NULL) - { - } + : hiopMatrixSparse(0, 0, 0), + iRow_(NULL), + jCol_(NULL), + values_(NULL) + {} hiopMatrixSparseTriplet(const hiopMatrixSparseTriplet&) - : hiopMatrixSparse(0, 0, 0), iRow_(NULL), jCol_(NULL), values_(NULL) + : hiopMatrixSparse(0, 0, 0), + iRow_(NULL), + jCol_(NULL), + values_(NULL) { assert(false); } @@ -367,45 +372,46 @@ class hiopMatrixSymSparseTriplet : public hiopMatrixSparseTriplet { public: hiopMatrixSymSparseTriplet(int n, int nnz) - : hiopMatrixSparseTriplet(n, n, nnz), nnz_offdiag_{-1} + : hiopMatrixSparseTriplet(n, n, nnz), + nnz_offdiag_{-1} {} virtual ~hiopMatrixSymSparseTriplet() {} /** y = beta * y + alpha * this * x */ - virtual void timesVec(double beta, hiopVector& y, - double alpha, const hiopVector& x) const; - virtual void timesVec(double beta, double* y, - double alpha, const double* x) const; + virtual void timesVec(double beta, hiopVector& y, double alpha, const hiopVector& x) const; + virtual void timesVec(double beta, double* y, double alpha, const double* x) const; - virtual void transTimesVec(double beta, hiopVector& y, - double alpha, const hiopVector& x) const + virtual void transTimesVec(double beta, hiopVector& y, double alpha, const hiopVector& x) const { return timesVec(beta, y, alpha, x); } - virtual void transTimesVec(double beta, double* y, - double alpha, const double* x) const + virtual void transTimesVec(double beta, double* y, double alpha, const double* x) const { return timesVec(beta, y, alpha, x); } - virtual void transAddToSymDenseMatrixUpperTriangle(int row_dest_start, int col_dest_start, - double alpha, hiopMatrixDense& W) const; + virtual void transAddToSymDenseMatrixUpperTriangle(int row_dest_start, + int col_dest_start, + double alpha, + hiopMatrixDense& W) const; - virtual void addUpperTriangleToSymDenseMatrixUpperTriangle(int diag_start, - double alpha, hiopMatrixDense& W) const; + virtual void addUpperTriangleToSymDenseMatrixUpperTriangle(int diag_start, double alpha, hiopMatrixDense& W) const; - /* extract subdiagonal from 'this' (source) and adds the entries to 'vec_dest' starting at + /* extract subdiagonal from 'this' (source) and adds the entries to 'vec_dest' starting at * index 'vec_start'. If num_elems>=0, 'num_elems' are copied; otherwise copies as many as * are available in 'vec_dest' starting at 'vec_start' */ - virtual void startingAtAddSubDiagonalToStartingAt(int diag_src_start, const double& alpha, - hiopVector& vec_dest, int vec_start, int num_elems=-1) const; + virtual void startingAtAddSubDiagonalToStartingAt(int diag_src_start, + const double& alpha, + hiopVector& vec_dest, + int vec_start, + int num_elems = -1) const; virtual hiopMatrixSparse* alloc_clone() const; virtual hiopMatrixSparse* new_copy() const; #ifdef HIOP_DEEPCHECKS - virtual bool assertSymmetry(double tol=1e-16) const { return true; } + virtual bool assertSymmetry(double tol = 1e-16) const { return true; } #endif virtual size_type numberOfOffDiagNonzeros() const; @@ -415,20 +421,18 @@ class hiopMatrixSymSparseTriplet : public hiopMatrixSparseTriplet const hiopMatrixSparse& Jac_d, int* iJacS, int* jJacS, - double* MJacS){assert("not implemented"&&0);}; + double* MJacS) + { + assert("not implemented" && 0); + }; /// @brief extend base problem Hess to the Hess in feasibility problem - virtual void set_Hess_FR(const hiopMatrixSparse& Hess, - int* iHSS, - int* jHSS, - double* MHSS, - const hiopVector& add_diag); + virtual void set_Hess_FR(const hiopMatrixSparse& Hess, int* iHSS, int* jHSS, double* MHSS, const hiopVector& add_diag); protected: - mutable int nnz_offdiag_; ///< number of nonzero entries - + mutable int nnz_offdiag_; ///< number of nonzero entries }; -} //end of namespace +} // namespace hiop #endif diff --git a/src/LinAlg/hiopMatrixSparseTripletStorage.cpp b/src/LinAlg/hiopMatrixSparseTripletStorage.cpp index ccffb54f2..ec6e1fdb3 100644 --- a/src/LinAlg/hiopMatrixSparseTripletStorage.cpp +++ b/src/LinAlg/hiopMatrixSparseTripletStorage.cpp @@ -1,2 +1 @@ #include "hiopMatrixSparseTripletStorage.hpp" - diff --git a/src/LinAlg/hiopMatrixSparseTripletStorage.hpp b/src/LinAlg/hiopMatrixSparseTripletStorage.hpp index 4dfeed326..a58a6cb4f 100644 --- a/src/LinAlg/hiopMatrixSparseTripletStorage.hpp +++ b/src/LinAlg/hiopMatrixSparseTripletStorage.hpp @@ -10,135 +10,138 @@ #include #include -namespace hiop { +namespace hiop +{ - //container for sparse matrices in triplet format; implements minimal functionality for matrix ops - template - class hiopMatrixSparseTripletStorage +// container for sparse matrices in triplet format; implements minimal functionality for matrix ops +template +class hiopMatrixSparseTripletStorage +{ +public: + hiopMatrixSparseTripletStorage() + : nrows(0), + ncols(0), + nnz(0), + irow(NULL), + jcol(NULL), + values(NULL) + {} + hiopMatrixSparseTripletStorage(Tidx num_rows, Tidx num_cols, Tidx num_nz) + : nrows(num_rows), + ncols(num_cols), + nnz(num_nz) { - public: - hiopMatrixSparseTripletStorage() - : nrows(0), ncols(0), nnz(0), irow(NULL), jcol(NULL), values(NULL) - { - - } - hiopMatrixSparseTripletStorage(Tidx num_rows, Tidx num_cols, Tidx num_nz) - : nrows(num_rows), ncols(num_cols), nnz(num_nz) - { - irow = new Tidx[nnz]; - jcol = new Tidx[nnz]; - values = new Tval[nnz]; - } - - virtual ~hiopMatrixSparseTripletStorage() + irow = new Tidx[nnz]; + jcol = new Tidx[nnz]; + values = new Tval[nnz]; + } + + virtual ~hiopMatrixSparseTripletStorage() + { + if(values) delete[] values; + if(jcol) delete[] jcol; + if(irow) delete[] irow; + } + + void copyFrom(const Tidx* irow_, const Tidx* jcol_, const Tval* values_) + { + memcpy(irow, irow_, nnz * sizeof(Tidx)); + memcpy(jcol, jcol_, nnz * sizeof(Tidx)); + memcpy(values, values_, nnz * sizeof(Tval)); + } + + // sorts the (i,j) in increasing order of 'i' and for equal 'i's in increasing order of 'j' + // Complexity: n*log(n) + // + // Warning: irow, jcol, and values pointers will changes inside this method. Corresponding + // accessor methods i(), j(), M() should be called again to get the correct pointers + void sort_indexes() + { + std::vector vIdx(nnz); + std::iota(vIdx.begin(), vIdx.end(), 0); + sort(vIdx.begin(), vIdx.end(), [&](const int& i1, const int& i2) { + if(irow[i1] < irow[i2]) return true; + if(irow[i1] > irow[i2]) return false; + return jcol[i1] < jcol[i2]; + }); + + // permute irow, jcol, and M using additional storage + + // irow and jcol can use the same buffer { - if(values) delete[] values; - if(jcol) delete[] jcol; - if(irow) delete[] irow; + Tidx* buffer = new Tidx[nnz]; + for(int itnz = 0; itnz < nnz; itnz++) buffer[itnz] = irow[vIdx[itnz]]; + + // avoid copy back + Tidx* buffer2 = irow; + irow = buffer; + buffer = buffer2; + + for(int itnz = 0; itnz < nnz; itnz++) buffer[itnz] = jcol[vIdx[itnz]]; + + delete[] jcol; + jcol = buffer; } - void copyFrom(const Tidx* irow_, const Tidx* jcol_, const Tval* values_) + // M { - memcpy(irow, irow_, nnz*sizeof(Tidx)); - memcpy(jcol, jcol_, nnz*sizeof(Tidx)); - memcpy(values, values_, nnz*sizeof(Tval)); + Tval* buffer = new Tval[nnz]; + + for(int itnz = 0; itnz < nnz; itnz++) buffer[itnz] = values[vIdx[itnz]]; + + delete[] values; + values = buffer; } + } - //sorts the (i,j) in increasing order of 'i' and for equal 'i's in increasing order of 'j' - //Complexity: n*log(n) - // - // Warning: irow, jcol, and values pointers will changes inside this method. Corresponding - // accessor methods i(), j(), M() should be called again to get the correct pointers - void sort_indexes() { - std::vector vIdx(nnz); - std::iota(vIdx.begin(), vIdx.end(), 0); - sort(vIdx.begin(), vIdx.end(), - [&](const int& i1, const int& i2) { - if(irow[i1]irow[i2]) return false; - return jcol[i1]=0, 'num_elems' will be copied; + * Copy `this` (source) starting at `start_idx_in_src` to `dest` + * starting at index 'int start_idx_dest'. If num_elems>=0, 'num_elems' will be copied; * * @param[in] start_idx_in_src - position in `this` from where to copy * @param[out] dest - destination vector to where to copy vector data @@ -352,13 +348,13 @@ class hiopVector virtual void startingAtCopyToStartingAt(index_type start_idx_in_src, hiopVector& dest, index_type start_idx_dest, - size_type num_elems=-1) const = 0; + size_type num_elems = -1) const = 0; /** * @brief Copy elements of `this` vector to `dest` with offsets. * - * Copy `this` (source) starting at `start_idx_in_src` to `dest` - * starting at index 'int start_idx_dest'. If num_elems>=0, 'num_elems' will be copied; + * Copy `this` (source) starting at `start_idx_in_src` to `dest` + * starting at index 'int start_idx_dest'. If num_elems>=0, 'num_elems' will be copied; * * @param[in] start_idx_in_src - position in `this` from where to copy * @param[out] dest - destination vector to where to copy vector data @@ -377,7 +373,7 @@ class hiopVector hiopVector& dest, index_type start_idx_dest, const hiopVector& selec_dest, - size_type num_elems=-1) const = 0; + size_type num_elems = -1) const = 0; /** * @brief L2 vector norm. @@ -424,10 +420,10 @@ class hiopVector * @pre `this` and `vec` have same partitioning. * @post `vec` is not modified */ - virtual void componentMult( const hiopVector& vec ) = 0; + virtual void componentMult(const hiopVector& vec) = 0; /** - * @brief Divide `this` vector elemenwise in-place by `vec`. + * @brief Divide `this` vector elemenwise in-place by `vec`. * * @param[in] vec - input vector * @@ -435,11 +431,11 @@ class hiopVector * @pre vec[i] != 0 forall i * @post `vec` is not modified */ - virtual void componentDiv ( const hiopVector& vec ) = 0; + virtual void componentDiv(const hiopVector& vec) = 0; /** * @brief Divide `this` vector elemenwise in-place by `vec` - * with pattern selection. + * with pattern selection. * * @param[in] vec - input vector * @param[in] select - pattern selection @@ -501,7 +497,7 @@ class hiopVector virtual void component_sqrt() = 0; /** - * @brief Scale `this` vector by `c` + * @brief Scale `this` vector by `c` * * @param[in] c - scaling factor */ @@ -521,7 +517,7 @@ class hiopVector virtual void axpy(double alpha, const hiopVector& xvec) = 0; /** - * @brief Implementation of AXPY kernel, for selected entries. + * @brief Implementation of AXPY kernel, for selected entries. * this[i] += alpha * x[i] for all i where select[i] == 1.0; * * @param[in] alpha - scaling factor @@ -538,7 +534,7 @@ class hiopVector /** * @brief Performs axpy, this += alpha*x, on the indexes in this specified by i. * - * @param[in] alpha - scaling factor + * @param[in] alpha - scaling factor * @param[in] xvec - vector of doubles to be axpy-ed to this * (size equal to size of i and less than or equal to size of this) * @param[in] i - vector of indexes in this to which the axpy operation is performed @@ -547,25 +543,25 @@ class hiopVector * @pre The entries of i must be valid (zero-based) indexes in this */ virtual void axpy(double alpha, const hiopVector& xvec, const hiopVectorInt& i) = 0; - + /** * @brief this[i] += alpha*x[i]*z[i] forall i * * @param[in] alpha - scaling factor - * @param[in] xvec - vector of doubles to be axzpy-ed to this - * @param[in] zvec - vector of doubles to be axzpy-ed to this + * @param[in] xvec - vector of doubles to be axzpy-ed to this + * @param[in] zvec - vector of doubles to be axzpy-ed to this * * @pre `this`, `xvec` and `zvec` have same partitioning. * @post `xvec` and `zvec` are not modified */ - virtual void axzpy (double alpha, const hiopVector& xvec, const hiopVector& zvec) = 0; + virtual void axzpy(double alpha, const hiopVector& xvec, const hiopVector& zvec) = 0; /** * @brief this[i] += alpha*x[i]/z[i] forall i * * @param[in] alpha - scaling factor - * @param[in] xvec - vector of doubles to be axdzpy-ed to this - * @param[in] zvec - vector of doubles to be axdzpy-ed to this + * @param[in] xvec - vector of doubles to be axdzpy-ed to this + * @param[in] zvec - vector of doubles to be axdzpy-ed to this * * @pre `this`, `xvec` and `zvec` have same partitioning. * @pre zvec[i] != 0 forall i @@ -577,18 +573,15 @@ class hiopVector * @brief this[i] += alpha*x[i]/z[i] forall i with pattern selection * * @param[in] alpha - scaling factor - * @param[in] xvec - vector of doubles to be axdzpy-ed to this - * @param[in] zvec - vector of doubles to be axdzpy-ed to this + * @param[in] xvec - vector of doubles to be axdzpy-ed to this + * @param[in] zvec - vector of doubles to be axdzpy-ed to this * @param[in] select - pattern selection * * @pre `this`, `xvec`, `zvec` and `select` have same partitioning. * @pre zvec[i] != 0 when select[i] = 1 * @post `xvec`, `zvec` and `select` are not modified */ - virtual void axdzpy_w_pattern(double alpha, - const hiopVector& xvec, - const hiopVector& zvec, - const hiopVector& select) = 0; + virtual void axdzpy_w_pattern(double alpha, const hiopVector& xvec, const hiopVector& zvec, const hiopVector& select) = 0; /** * @brief this[i] += c forall i @@ -670,7 +663,7 @@ class hiopVector /** * @brief Linear damping term - * Computes the log barrier's linear damping term of the Filter-IPM method of + * Computes the log barrier's linear damping term of the Filter-IPM method of * WaectherBiegler (see paper, section 3.7). * Essentially compute kappa_d*mu* \sum { this[i] | ixleft[i]==1 and ixright[i]==0 } * @@ -692,14 +685,14 @@ class hiopVector /** * @brief add linear damping term - * Performs `this[i] = alpha*this[i] + sign*ct` where sign=1 when EXACTLY one of - * ixleft[i] and ixright[i] is 1.0 and sign=0 otherwise. + * Performs `this[i] = alpha*this[i] + sign*ct` where sign=1 when EXACTLY one of + * ixleft[i] and ixright[i] is 1.0 and sign=0 otherwise. * * Supports distributed/MPI vectors, but performs only elementwise operations and do not * require communication. * * This method is used to add gradient contributions from the (linear) damping term used - * to handle unbounded problems. The damping terms are used for variables that are + * to handle unbounded problems. The damping terms are used for variables that are * bounded on one side only. * * @param[in] ixleft - pattern selection 1 @@ -771,7 +764,7 @@ class hiopVector * * @warning This is local method only! */ - virtual bool projectIntoBounds_local(const hiopVector& xlo, + virtual bool projectIntoBounds_local(const hiopVector& xlo, const hiopVector& ixl, const hiopVector& xup, const hiopVector& ixu, @@ -844,11 +837,8 @@ class hiopVector * @post `xvec` and `ixvec` are not modified * * @note Implementation probably inefficient. - */ - virtual void adjustDuals_plh(const hiopVector& xvec, - const hiopVector& ixvec, - const double& mu, - const double& kappa) = 0; + */ + virtual void adjustDuals_plh(const hiopVector& xvec, const hiopVector& ixvec, const double& mu, const double& kappa) = 0; /** * @brief Check if all elements of the vector are zero @@ -885,13 +875,13 @@ class hiopVector * @warning This is local method only! */ virtual bool isfinite_local() const = 0; - + /** * @brief Prints vector data to a file in Matlab format. * * @pre Vector data was moved from the memory space to the host mirror. */ - virtual void print(FILE* file=nullptr, const char* message=nullptr, int max_elems=-1, int rank=-1) const = 0; + virtual void print(FILE* file = nullptr, const char* message = nullptr, int max_elems = -1, int rank = -1) const = 0; /** * @brief allocates a vector that mirrors this, but doesn't copy the values @@ -901,7 +891,7 @@ class hiopVector /** * @brief allocates a vector that mirrors this, and copies the values */ - virtual hiopVector* new_copy () const = 0; + virtual hiopVector* new_copy() const = 0; /** * @brief return the global size of `this` vector @@ -932,7 +922,7 @@ class hiopVector * @brief accessor to the local data of `this` vector */ virtual const double* local_data_host_const() const = 0; - + /** * @brief get the number of values that are less than the given tolerance 'val'. * @@ -941,7 +931,7 @@ class hiopVector * @post `val` is not modified * @todo: add unit test */ - virtual size_type numOfElemsLessThan(const double &val) const = 0; + virtual size_type numOfElemsLessThan(const double& val) const = 0; /** * @brief get the number of values whose absolute value are less than the given tolerance 'val'. @@ -951,10 +941,10 @@ class hiopVector * @post `val` is not modified * @todo: add unit test */ - virtual size_type numOfElemsAbsLessThan(const double &val) const = 0; + virtual size_type numOfElemsAbsLessThan(const double& val) const = 0; /** - * @brief set enum-type array 'arr', starting at `start` and ending at `end`, + * @brief set enum-type array 'arr', starting at `start` and ending at `end`, * to the values in array `arr_src` from 'start_src` * * @param[out] arr - array of used to define hiopInterfaceBase::NonlinearityType @@ -969,14 +959,14 @@ class hiopVector * @post `arr_src` is not modified * @todo: add unit test */ - virtual void set_array_from_to(hiopInterfaceBase::NonlinearityType* arr, - const int start, - const int end, + virtual void set_array_from_to(hiopInterfaceBase::NonlinearityType* arr, + const int start, + const int end, const hiopInterfaceBase::NonlinearityType* arr_src, const int start_src) const = 0; /** - * @brief set enum-type array 'arr', starting at `start` and ending at `end`, + * @brief set enum-type array 'arr', starting at `start` and ending at `end`, * to the values in array `arr_src` from 'start_src` * * @param[out] arr - array of used to define hiopInterfaceBase::NonlinearityType @@ -989,9 +979,9 @@ class hiopVector * @post `arr_src` is not modified * @todo: add unit test */ - virtual void set_array_from_to(hiopInterfaceBase::NonlinearityType* arr, - const int start, - const int end, + virtual void set_array_from_to(hiopInterfaceBase::NonlinearityType* arr, + const int start, + const int end, const hiopInterfaceBase::NonlinearityType arr_src) const = 0; /** @@ -1003,15 +993,13 @@ class hiopVector virtual bool is_equal(const hiopVector& vec) const = 0; protected: - size_type n_; //we assume sequential data + size_type n_; // we assume sequential data protected: /** * @brief for internal use only; derived classes may use copy constructor and always allocate data_ */ hiopVector(const hiopVector& v) - : n_(v.n_) - { - }; + : n_(v.n_) {}; }; -} +} // namespace hiop diff --git a/src/LinAlg/hiopVectorCompoundPD.cpp b/src/LinAlg/hiopVectorCompoundPD.cpp index 9f7d31fe5..fda099cf7 100644 --- a/src/LinAlg/hiopVectorCompoundPD.cpp +++ b/src/LinAlg/hiopVectorCompoundPD.cpp @@ -2,47 +2,47 @@ // Produced at the Lawrence Livermore National Laboratory (LLNL). // LLNL-CODE-742473. All rights reserved. // -// This file is part of HiOp. For details, see https://github.com/LLNL/hiop. HiOp -// is released under the BSD 3-clause license (https://opensource.org/licenses/BSD-3-Clause). +// This file is part of HiOp. For details, see https://github.com/LLNL/hiop. HiOp +// is released under the BSD 3-clause license (https://opensource.org/licenses/BSD-3-Clause). // Please also read “Additional BSD Notice” below. // -// Redistribution and use in source and binary forms, with or without modification, +// Redistribution and use in source and binary forms, with or without modification, // are permitted provided that the following conditions are met: -// i. Redistributions of source code must retain the above copyright notice, this list +// i. Redistributions of source code must retain the above copyright notice, this list // of conditions and the disclaimer below. -// ii. Redistributions in binary form must reproduce the above copyright notice, -// this list of conditions and the disclaimer (as noted below) in the documentation and/or +// ii. Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the disclaimer (as noted below) in the documentation and/or // other materials provided with the distribution. -// iii. Neither the name of the LLNS/LLNL nor the names of its contributors may be used to -// endorse or promote products derived from this software without specific prior written +// iii. Neither the name of the LLNS/LLNL nor the names of its contributors may be used to +// endorse or promote products derived from this software without specific prior written // permission. // -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES -// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT -// SHALL LAWRENCE LIVERMORE NATIONAL SECURITY, LLC, THE U.S. DEPARTMENT OF ENERGY OR -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS -// OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED -// AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT +// SHALL LAWRENCE LIVERMORE NATIONAL SECURITY, LLC, THE U.S. DEPARTMENT OF ENERGY OR +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +// OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED +// AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, // EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Additional BSD Notice -// 1. This notice is required to be provided under our contract with the U.S. Department -// of Energy (DOE). This work was produced at Lawrence Livermore National Laboratory under +// 1. This notice is required to be provided under our contract with the U.S. Department +// of Energy (DOE). This work was produced at Lawrence Livermore National Laboratory under // Contract No. DE-AC52-07NA27344 with the DOE. -// 2. Neither the United States Government nor Lawrence Livermore National Security, LLC -// nor any of their employees, makes any warranty, express or implied, or assumes any -// liability or responsibility for the accuracy, completeness, or usefulness of any +// 2. Neither the United States Government nor Lawrence Livermore National Security, LLC +// nor any of their employees, makes any warranty, express or implied, or assumes any +// liability or responsibility for the accuracy, completeness, or usefulness of any // information, apparatus, product, or process disclosed, or represents that its use would // not infringe privately-owned rights. -// 3. Also, reference herein to any specific commercial products, process, or services by -// trade name, trademark, manufacturer or otherwise does not necessarily constitute or -// imply its endorsement, recommendation, or favoring by the United States Government or -// Lawrence Livermore National Security, LLC. The views and opinions of authors expressed -// herein do not necessarily state or reflect those of the United States Government or -// Lawrence Livermore National Security, LLC, and shall not be used for advertising or +// 3. Also, reference herein to any specific commercial products, process, or services by +// trade name, trademark, manufacturer or otherwise does not necessarily constitute or +// imply its endorsement, recommendation, or favoring by the United States Government or +// Lawrence Livermore National Security, LLC. The views and opinions of authors expressed +// herein do not necessarily state or reflect those of the United States Government or +// Lawrence Livermore National Security, LLC, and shall not be used for advertising or // product endorsement purposes. /** @@ -67,8 +67,8 @@ namespace hiop { hiopVectorCompoundPD::hiopVectorCompoundPD(bool own_vectors) -: n_parts_{0}, - own_vectors_{own_vectors} + : n_parts_{0}, + own_vectors_{own_vectors} { n_ = 0; } @@ -101,51 +101,51 @@ hiopVectorCompoundPD::hiopVectorCompoundPD(const hiopIterate* dir) n_ = 0; own_vectors_ = false; - //hiopVector* x = dir->x->alloc_clone(); + // hiopVector* x = dir->x->alloc_clone(); n_ += dir->x->get_size(); vectors_.push_back(dir->x); - //hiopVector* d = dir->d->alloc_clone(); + // hiopVector* d = dir->d->alloc_clone(); n_ += dir->d->get_size(); vectors_.push_back(dir->d); - //hiopVector* yc = dir->yc->alloc_clone(); + // hiopVector* yc = dir->yc->alloc_clone(); n_ += dir->yc->get_size(); vectors_.push_back(dir->yc); - //hiopVector* yd = dir->yd->alloc_clone(); + // hiopVector* yd = dir->yd->alloc_clone(); n_ += dir->yd->get_size(); vectors_.push_back(dir->yd); - //hiopVector* sxl = dir->sxl->alloc_clone(); + // hiopVector* sxl = dir->sxl->alloc_clone(); n_ += dir->sxl->get_size(); vectors_.push_back(dir->sxl); - //hiopVector* sxu = dir->sxu->alloc_clone(); + // hiopVector* sxu = dir->sxu->alloc_clone(); n_ += dir->sxu->get_size(); vectors_.push_back(dir->sxu); - //hiopVector* sdl = dir->sdl->alloc_clone(); + // hiopVector* sdl = dir->sdl->alloc_clone(); n_ += dir->sdl->get_size(); vectors_.push_back(dir->sdl); - //hiopVector* sdu = dir->sdu->alloc_clone(); + // hiopVector* sdu = dir->sdu->alloc_clone(); n_ += dir->sdu->get_size(); vectors_.push_back(dir->sdu); - //hiopVector* zl = dir->zl->alloc_clone(); + // hiopVector* zl = dir->zl->alloc_clone(); n_ += dir->zl->get_size(); vectors_.push_back(dir->zl); - //hiopVector* zu = dir->zu->alloc_clone(); + // hiopVector* zu = dir->zu->alloc_clone(); n_ += dir->zu->get_size(); vectors_.push_back(dir->zu); - //hiopVector* vl = dir->vl->alloc_clone(); + // hiopVector* vl = dir->vl->alloc_clone(); n_ += dir->vl->get_size(); vectors_.push_back(dir->vl); - //hiopVector* vu = dir->vu->alloc_clone(); + // hiopVector* vu = dir->vu->alloc_clone(); n_ += dir->vu->get_size(); vectors_.push_back(dir->vu); @@ -157,51 +157,51 @@ hiopVectorCompoundPD::hiopVectorCompoundPD(const hiopResidual* resid) n_ = 0; own_vectors_ = false; - //hiopVector* x = resid->rx->alloc_clone(); + // hiopVector* x = resid->rx->alloc_clone(); n_ += resid->rx->get_size(); vectors_.push_back(resid->rx); - //hiopVector* d = resid->rd->alloc_clone(); + // hiopVector* d = resid->rd->alloc_clone(); n_ += resid->rd->get_size(); vectors_.push_back(resid->rd); - //hiopVector* yc = resid->ryc->alloc_clone(); + // hiopVector* yc = resid->ryc->alloc_clone(); n_ += resid->ryc->get_size(); vectors_.push_back(resid->ryc); - //hiopVector* yd = resid->ryd->alloc_clone(); + // hiopVector* yd = resid->ryd->alloc_clone(); n_ += resid->ryd->get_size(); vectors_.push_back(resid->ryd); - //hiopVector* sxl = resid->rxl->alloc_clone(); + // hiopVector* sxl = resid->rxl->alloc_clone(); n_ += resid->rxl->get_size(); vectors_.push_back(resid->rxl); - //hiopVector* sxu = resid->rxu->alloc_clone(); + // hiopVector* sxu = resid->rxu->alloc_clone(); n_ += resid->rxu->get_size(); vectors_.push_back(resid->rxu); - //hiopVector* sdl = resid->rdl->alloc_clone(); + // hiopVector* sdl = resid->rdl->alloc_clone(); n_ += resid->rdl->get_size(); vectors_.push_back(resid->rdl); - //hiopVector* sdu = resid->rdu->alloc_clone(); + // hiopVector* sdu = resid->rdu->alloc_clone(); n_ += resid->rdu->get_size(); vectors_.push_back(resid->rdu); - //hiopVector* zl = resid->rszl->alloc_clone(); + // hiopVector* zl = resid->rszl->alloc_clone(); n_ += resid->rszl->get_size(); vectors_.push_back(resid->rszl); - //hiopVector* zu = resid->rszu->alloc_clone(); + // hiopVector* zu = resid->rszu->alloc_clone(); n_ += resid->rszu->get_size(); vectors_.push_back(resid->rszu); - //hiopVector* vl = resid->rsvl->alloc_clone(); + // hiopVector* vl = resid->rsvl->alloc_clone(); n_ += resid->rsvl->get_size(); vectors_.push_back(resid->rsvl); - //hiopVector* vu = resid->rsvu->alloc_clone(); + // hiopVector* vu = resid->rsvu->alloc_clone(); n_ += resid->rsvu->get_size(); vectors_.push_back(resid->rsvu); @@ -215,7 +215,7 @@ hiopVector* hiopVectorCompoundPD::alloc_clone() const return v; } -hiopVector* hiopVectorCompoundPD::new_copy () const +hiopVector* hiopVectorCompoundPD::new_copy() const { hiopVector* v = new hiopVectorCompoundPD(*this); assert(v); @@ -285,7 +285,7 @@ void hiopVectorCompoundPD::setToConstant_w_patternSelect(double c, const hiopVec } } -void hiopVectorCompoundPD::copyFrom(const hiopVector& v_in ) +void hiopVectorCompoundPD::copyFrom(const hiopVector& v_in) { const hiopVectorCompoundPD& v = dynamic_cast(v_in); assert(this->get_num_parts() == v.get_num_parts()); @@ -304,10 +304,7 @@ void hiopVectorCompoundPD::copy_to_vectorpar(hiopVectorPar& vdest) const assert(0 && "TODO: change this method to copy_to_host? host-device memory transfer for each component."); } -void hiopVectorCompoundPD::copyFrom(const double* v_local_data ) -{ - assert(0 && "not required."); -} +void hiopVectorCompoundPD::copyFrom(const double* v_local_data) { assert(0 && "not required."); } /// @brief Copy from vec the elements specified by the indices in index_in_src void hiopVectorCompoundPD::copy_from_w_pattern(const hiopVector& vv, const hiopVector& select) @@ -317,7 +314,7 @@ void hiopVectorCompoundPD::copy_from_w_pattern(const hiopVector& vv, const hiopV assert(n_ == ix.n_); assert(n_ == v.n_); - + for(index_type i = 0; i < n_parts_; i++) { vectors_[i]->copy_from_w_pattern(v.getVector(i), ix.getVector(i)); } @@ -348,7 +345,7 @@ void hiopVectorCompoundPD::copyFromStarting(int start_index_in_this, const doubl assert(0 && "not required."); } -void hiopVectorCompoundPD::copyFromStarting(int start_index/*_in_src*/,const hiopVector& v_) +void hiopVectorCompoundPD::copyFromStarting(int start_index /*_in_src*/, const hiopVector& v_) { assert(0 && "not required."); } @@ -358,90 +355,78 @@ void hiopVectorCompoundPD::copy_from_starting_at(const double* v, int start_inde assert(0 && "not required."); } -void hiopVectorCompoundPD::startingAtCopyFromStartingAt(int start_idx_dest, - const hiopVector& v_in, - int start_idx_src) +void hiopVectorCompoundPD::startingAtCopyFromStartingAt(int start_idx_dest, const hiopVector& v_in, int start_idx_src) { assert(0 && "not required."); } -void hiopVectorCompoundPD::copyToStarting(int start_index, hiopVector& v_) const -{ - assert(0 && "not required."); -} +void hiopVectorCompoundPD::copyToStarting(int start_index, hiopVector& v_) const { assert(0 && "not required."); } -void hiopVectorCompoundPD::copyToStarting(hiopVector& vec, int start_index_in_dest) const -{ - assert(0 && "not required."); -} +void hiopVectorCompoundPD::copyToStarting(hiopVector& vec, int start_index_in_dest) const { assert(0 && "not required."); } void hiopVectorCompoundPD::copyToStartingAt_w_pattern(hiopVector& v_, - index_type start_index/*_in_dest*/, - const hiopVector& select) const + index_type start_index /*_in_dest*/, + const hiopVector& select) const { assert(0 && "not required."); } /* copy 'c' and `d` into `this`, according to the map 'c_map` and `d_map`, respectively. -* e.g., this[c_map[i]] = c[i]; -* -* @pre the size of `this` = the size of `c` + the size of `d`. -* @pre `c_map` \Union `d_map` = {0, ..., size_of_this_vec-1} -*/ -void hiopVectorCompoundPD::copy_from_two_vec_w_pattern(const hiopVector& c, - const hiopVectorInt& c_map, - const hiopVector& d, - const hiopVectorInt& d_map) + * e.g., this[c_map[i]] = c[i]; + * + * @pre the size of `this` = the size of `c` + the size of `d`. + * @pre `c_map` \Union `d_map` = {0, ..., size_of_this_vec-1} + */ +void hiopVectorCompoundPD::copy_from_two_vec_w_pattern(const hiopVector& c, + const hiopVectorInt& c_map, + const hiopVector& d, + const hiopVectorInt& d_map) { assert(0 && "not required."); } /* split `this` to `c` and `d`, according to the map 'c_map` and `d_map`, respectively. -* -* @pre the size of `this` = the size of `c` + the size of `d`. -* @pre `c_map` \Union `d_map` = {0, ..., size_of_this_vec-1} -*/ -void hiopVectorCompoundPD::copy_to_two_vec_w_pattern(hiopVector& c, - const hiopVectorInt& c_map, - hiopVector& d, - const hiopVectorInt& d_map) const + * + * @pre the size of `this` = the size of `c` + the size of `d`. + * @pre `c_map` \Union `d_map` = {0, ..., size_of_this_vec-1} + */ +void hiopVectorCompoundPD::copy_to_two_vec_w_pattern(hiopVector& c, + const hiopVectorInt& c_map, + hiopVector& d, + const hiopVectorInt& d_map) const { assert(0 && "not required."); } -/* copy 'this' (source) starting at 'start_idx_in_src' to 'dest' starting at index 'int start_idx_dest' +/* copy 'this' (source) starting at 'start_idx_in_src' to 'dest' starting at index 'int start_idx_dest' * If num_elems>=0, 'num_elems' will be copied; if num_elems<0, elements will be copied till the end of * either source ('this') or destination ('dest') is reached */ -void hiopVectorCompoundPD:: -startingAtCopyToStartingAt(index_type start_idx_in_src, - hiopVector& dest_, - index_type start_idx_dest, - size_type num_elems/*=-1*/) const +void hiopVectorCompoundPD::startingAtCopyToStartingAt(index_type start_idx_in_src, + hiopVector& dest_, + index_type start_idx_dest, + size_type num_elems /*=-1*/) const { assert(0 && "not required."); } void hiopVectorCompoundPD::startingAtCopyToStartingAt_w_pattern(index_type start_idx_in_src, - hiopVector& dest_, - index_type start_idx_dest, - const hiopVector& selec_dest, - size_type num_elems/*=-1*/) const + hiopVector& dest_, + index_type start_idx_dest, + const hiopVector& selec_dest, + size_type num_elems /*=-1*/) const { assert(0 && "not required."); } -void hiopVectorCompoundPD::copyTo(double* dest) const -{ - assert(0 && "not required."); -} +void hiopVectorCompoundPD::copyTo(double* dest) const { assert(0 && "not required."); } -double hiopVectorCompoundPD::twonorm() const +double hiopVectorCompoundPD::twonorm() const { double nrm = 0.; for(index_type i = 0; i < n_parts_; i++) { double arg = vectors_[i]->twonorm(); - nrm += arg*arg; + nrm += arg * arg; } nrm = std::sqrt(nrm); return nrm; @@ -466,7 +451,7 @@ double hiopVectorCompoundPD::infnorm() const for(index_type i = 0; i < n_parts_; i++) { double arg = vectors_[i]->infnorm(); - nrm = (nrm>arg)?nrm:arg; + nrm = (nrm > arg) ? nrm : arg; } return nrm; } @@ -477,7 +462,7 @@ double hiopVectorCompoundPD::infnorm_local() const for(index_type i = 0; i < n_parts_; i++) { double arg = vectors_[i]->infnorm_local(); - nrm = (nrm>arg)?nrm:arg; + nrm = (nrm > arg) ? nrm : arg; } return nrm; } @@ -522,10 +507,10 @@ void hiopVectorCompoundPD::componentDiv(const hiopVector& v_) } } -void hiopVectorCompoundPD::componentDiv_w_selectPattern( const hiopVector& v_, const hiopVector& ix_) +void hiopVectorCompoundPD::componentDiv_w_selectPattern(const hiopVector& v_, const hiopVector& ix_) { const hiopVectorCompoundPD& v = dynamic_cast(v_); - const hiopVectorCompoundPD& ix= dynamic_cast(ix_); + const hiopVectorCompoundPD& ix = dynamic_cast(ix_); assert(this->get_num_parts() == v.get_num_parts()); assert(this->get_num_parts() == ix.get_num_parts()); @@ -637,7 +622,7 @@ void hiopVectorCompoundPD::axzpy(double alpha, const hiopVector& x_, const hiopV void hiopVectorCompoundPD::axdzpy(double alpha, const hiopVector& x_, const hiopVector& z_) { - if(alpha==0.) return; + if(alpha == 0.) return; const hiopVectorCompoundPD& vx = dynamic_cast(x_); const hiopVectorCompoundPD& vz = dynamic_cast(z_); assert(this->get_num_parts() == vx.get_num_parts()); @@ -646,11 +631,14 @@ void hiopVectorCompoundPD::axdzpy(double alpha, const hiopVector& x_, const hiop } } -void hiopVectorCompoundPD::axdzpy_w_pattern(double alpha, const hiopVector& x_, const hiopVector& z_, const hiopVector& select) +void hiopVectorCompoundPD::axdzpy_w_pattern(double alpha, + const hiopVector& x_, + const hiopVector& z_, + const hiopVector& select) { const hiopVectorCompoundPD& vx = dynamic_cast(x_); const hiopVectorCompoundPD& vz = dynamic_cast(z_); - const hiopVectorCompoundPD& sel= dynamic_cast(select); + const hiopVectorCompoundPD& sel = dynamic_cast(select); assert(this->get_num_parts() == vx.get_num_parts()); for(index_type i = 0; i < n_parts_; i++) { vectors_[i]->axdzpy_w_pattern(alpha, vx.getVector(i), vz.getVector(i), sel.getVector(i)); @@ -664,7 +652,7 @@ void hiopVectorCompoundPD::addConstant(double c) } } -void hiopVectorCompoundPD::addConstant_w_patternSelect(double c, const hiopVector& ix_) +void hiopVectorCompoundPD::addConstant_w_patternSelect(double c, const hiopVector& ix_) { const hiopVectorCompoundPD& ix = dynamic_cast(ix_); assert(this->get_num_parts() == ix.get_num_parts()); @@ -678,7 +666,7 @@ double hiopVectorCompoundPD::min() const double ret_val = std::numeric_limits::max(); for(index_type i = 0; i < n_parts_; i++) { double arg = vectors_[i]->min(); - ret_val = (ret_val(select); assert(this->get_num_parts() == ix.get_num_parts()); - + double ret_val = std::numeric_limits::max(); for(index_type i = 0; i < n_parts_; i++) { double arg = vectors_[i]->min_w_pattern(ix.getVector(i)); - ret_val = (ret_val(select); - assert(this->get_num_parts() == ix.get_num_parts()); + assert(this->get_num_parts() == ix.get_num_parts()); for(index_type i = 0; i < n_parts_; i++) { sum += vectors_[i]->logBarrier_local(ix.getVector(i)); } return sum; } -double hiopVectorCompoundPD::sum_local() const +double hiopVectorCompoundPD::sum_local() const { double sum = 0.0; for(index_type i = 0; i < n_parts_; i++) { @@ -741,32 +726,32 @@ void hiopVectorCompoundPD::addLogBarrierGrad(double alpha, const hiopVector& vx, { const hiopVectorCompoundPD& ix = dynamic_cast(select); const hiopVectorCompoundPD& x = dynamic_cast(select); - assert(this->get_num_parts() == ix.get_num_parts()); + assert(this->get_num_parts() == ix.get_num_parts()); for(index_type i = 0; i < n_parts_; i++) { vectors_[i]->addLogBarrierGrad(alpha, x.getVector(i), ix.getVector(i)); } } -double hiopVectorCompoundPD::linearDampingTerm_local(const hiopVector& ixleft, - const hiopVector& ixright, - const double& mu, - const double& kappa_d) const +double hiopVectorCompoundPD::linearDampingTerm_local(const hiopVector& ixleft, + const hiopVector& ixright, + const double& mu, + const double& kappa_d) const { const hiopVectorCompoundPD& ixl = dynamic_cast(ixleft); const hiopVectorCompoundPD& ixr = dynamic_cast(ixright); assert(this->get_num_parts() == ixl.get_num_parts()); assert(this->get_num_parts() == ixr.get_num_parts()); - double term=0.0; + double term = 0.0; for(index_type i = 0; i < n_parts_; i++) { - term += vectors_[i]->linearDampingTerm_local(ixl.getVector(i), ixr.getVector(i), mu, kappa_d); + term += vectors_[i]->linearDampingTerm_local(ixl.getVector(i), ixr.getVector(i), mu, kappa_d); } return term; } void hiopVectorCompoundPD::addLinearDampingTerm(const hiopVector& ixleft, - const hiopVector& ixright, - const double& alpha, - const double& ct) + const hiopVector& ixright, + const double& alpha, + const double& ct) { const hiopVectorCompoundPD& ixl = dynamic_cast(ixleft); const hiopVectorCompoundPD& ixr = dynamic_cast(ixright); @@ -774,13 +759,13 @@ void hiopVectorCompoundPD::addLinearDampingTerm(const hiopVector& ixleft, assert(this->get_num_parts() == ixr.get_num_parts()); for(index_type i = 0; i < n_parts_; i++) { - vectors_[i]->addLinearDampingTerm(ixl.getVector(i), ixr.getVector(i), alpha, ct); + vectors_[i]->addLinearDampingTerm(ixl.getVector(i), ixr.getVector(i), alpha, ct); } } int hiopVectorCompoundPD::allPositive() { - int allPos=true; + int allPos = true; for(index_type i = 0; i < n_parts_; i++) { if(!vectors_[i]->allPositive()) { allPos = false; @@ -791,37 +776,38 @@ int hiopVectorCompoundPD::allPositive() } bool hiopVectorCompoundPD::projectIntoBounds_local(const hiopVector& xl_, - const hiopVector& ixl_, - const hiopVector& xu_, - const hiopVector& ixu_, - double kappa1, - double kappa2) + const hiopVector& ixl_, + const hiopVector& xu_, + const hiopVector& ixu_, + double kappa1, + double kappa2) { - const hiopVectorCompoundPD& xl = dynamic_cast(xl_); + const hiopVectorCompoundPD& xl = dynamic_cast(xl_); const hiopVectorCompoundPD& ixl = dynamic_cast(ixl_); - const hiopVectorCompoundPD& xu = dynamic_cast(xu_); + const hiopVectorCompoundPD& xu = dynamic_cast(xu_); const hiopVectorCompoundPD& ixu = dynamic_cast(ixu_); - assert(this->get_num_parts() == xl.get_num_parts()); + assert(this->get_num_parts() == xl.get_num_parts()); assert(this->get_num_parts() == ixl.get_num_parts()); - assert(this->get_num_parts() == xu.get_num_parts()); + assert(this->get_num_parts() == xu.get_num_parts()); assert(this->get_num_parts() == ixu.get_num_parts()); for(index_type i = 0; i < n_parts_; i++) { - vectors_[i]->projectIntoBounds_local(xl.getVector(i),ixl.getVector(i),xu.getVector(i),ixu.getVector(i),kappa1,kappa2); + vectors_[i] + ->projectIntoBounds_local(xl.getVector(i), ixl.getVector(i), xu.getVector(i), ixu.getVector(i), kappa1, kappa2); } return true; } /* max{a\in(0,1]| x+ad >=(1-tau)x} */ -double hiopVectorCompoundPD::fractionToTheBdry_local(const hiopVector& dx, const double& tau) const +double hiopVectorCompoundPD::fractionToTheBdry_local(const hiopVector& dx, const double& tau) const { const hiopVectorCompoundPD& x = dynamic_cast(dx); assert(this->get_num_parts() == x.get_num_parts()); - - double alpha=1.0, aux; + + double alpha = 1.0, aux; for(index_type i = 0; i < n_parts_; i++) { aux = vectors_[i]->fractionToTheBdry_local(x.getVector(i), tau); - if(aux=(1-tau)x} */ -double hiopVectorCompoundPD:: -fractionToTheBdry_w_pattern_local(const hiopVector& dx, const double& tau, const hiopVector& select) const +double hiopVectorCompoundPD::fractionToTheBdry_w_pattern_local(const hiopVector& dx, + const double& tau, + const hiopVector& select) const { const hiopVectorCompoundPD& x = dynamic_cast(dx); const hiopVectorCompoundPD& ix = dynamic_cast(select); assert(this->get_num_parts() == x.get_num_parts()); assert(this->get_num_parts() == ix.get_num_parts()); - - double alpha=1.0, aux; + + double alpha = 1.0, aux; for(index_type i = 0; i < n_parts_; i++) { aux = vectors_[i]->fractionToTheBdry_w_pattern_local(x.getVector(i), tau, ix.getVector(i)); - if(aux(select); assert(this->get_num_parts() == ix.get_num_parts()); @@ -874,7 +861,7 @@ bool hiopVectorCompoundPD::matchesPattern(const hiopVector& select) int hiopVectorCompoundPD::allPositive_w_patternSelect(const hiopVector& select) { - int allPos=1; + int allPos = 1; const hiopVectorCompoundPD& ix = dynamic_cast(select); assert(this->get_num_parts() == ix.get_num_parts()); @@ -883,14 +870,14 @@ int hiopVectorCompoundPD::allPositive_w_patternSelect(const hiopVector& select) allPos = false; break; } - } + } return allPos; } void hiopVectorCompoundPD::adjustDuals_plh(const hiopVector& x_, - const hiopVector& select, - const double& mu, - const double& kappa) + const hiopVector& select, + const double& mu, + const double& kappa) { const hiopVectorCompoundPD& x = dynamic_cast(x_); const hiopVectorCompoundPD& ix = dynamic_cast(select); @@ -910,7 +897,7 @@ bool hiopVectorCompoundPD::is_zero() const all_zero = false; break; } - } + } return all_zero; } @@ -944,9 +931,12 @@ bool hiopVectorCompoundPD::isfinite_local() const return true; } -void hiopVectorCompoundPD::print(FILE* file/*=nullptr*/, const char* msg/*=nullptr*/, int max_elems/*=-1*/, int rank/*=-1*/) const +void hiopVectorCompoundPD::print(FILE* file /*=nullptr*/, + const char* msg /*=nullptr*/, + int max_elems /*=-1*/, + int rank /*=-1*/) const { - int myrank_=0; + int myrank_ = 0; if(nullptr == file) { file = stdout; @@ -955,12 +945,14 @@ void hiopVectorCompoundPD::print(FILE* file/*=nullptr*/, const char* msg/*=nullp #ifdef HIOP_USE_MPI int numranks = 1; MPI_Comm comm_ = MPI_COMM_SELF; - if(rank>=0) { - int err = MPI_Comm_rank(comm_, &myrank_); assert(err==MPI_SUCCESS); - err = MPI_Comm_size(comm_, &numranks); assert(err==MPI_SUCCESS); + if(rank >= 0) { + int err = MPI_Comm_rank(comm_, &myrank_); + assert(err == MPI_SUCCESS); + err = MPI_Comm_size(comm_, &numranks); + assert(err == MPI_SUCCESS); } #endif - if(myrank_==rank || rank==-1) { + if(myrank_ == rank || rank == -1) { for(index_type i = 0; i < n_parts_; i++) { fprintf(file, "Compound vector with %d parts. Printing %d-th part \n", n_parts_, i); vectors_[i]->print(file, msg, max_elems, rank); @@ -968,8 +960,7 @@ void hiopVectorCompoundPD::print(FILE* file/*=nullptr*/, const char* msg/*=nullp } } - -size_type hiopVectorCompoundPD::numOfElemsLessThan(const double &val) const +size_type hiopVectorCompoundPD::numOfElemsLessThan(const double& val) const { size_type ret_num = 0; for(index_type i = 0; i < n_parts_; i++) { @@ -978,7 +969,7 @@ size_type hiopVectorCompoundPD::numOfElemsLessThan(const double &val) const return ret_num; } -size_type hiopVectorCompoundPD::numOfElemsAbsLessThan(const double &val) const +size_type hiopVectorCompoundPD::numOfElemsAbsLessThan(const double& val) const { size_type ret_num = 0; for(index_type i = 0; i < n_parts_; i++) { @@ -988,42 +979,33 @@ size_type hiopVectorCompoundPD::numOfElemsAbsLessThan(const double &val) const return ret_num; } -void hiopVectorCompoundPD::set_array_from_to(hiopInterfaceBase::NonlinearityType* arr, - const int start, - const int end, - const hiopInterfaceBase::NonlinearityType* arr_src, - const int start_src) const +void hiopVectorCompoundPD::set_array_from_to(hiopInterfaceBase::NonlinearityType* arr, + const int start, + const int end, + const hiopInterfaceBase::NonlinearityType* arr_src, + const int start_src) const { assert(0 && "not required."); } -void hiopVectorCompoundPD::set_array_from_to(hiopInterfaceBase::NonlinearityType* arr, - const int start, - const int end, - const hiopInterfaceBase::NonlinearityType arr_src) const +void hiopVectorCompoundPD::set_array_from_to(hiopInterfaceBase::NonlinearityType* arr, + const int start, + const int end, + const hiopInterfaceBase::NonlinearityType arr_src) const { assert(0 && "not required."); } -bool hiopVectorCompoundPD::is_equal(const hiopVector& vec) const -{ - assert(0 && "not required."); -} +bool hiopVectorCompoundPD::is_equal(const hiopVector& vec) const { assert(0 && "not required."); } -void hiopVectorCompoundPD::addVector(hiopVector *v) +void hiopVectorCompoundPD::addVector(hiopVector* v) { vectors_.push_back(v); n_ += v->get_size(); } -hiopVector& hiopVectorCompoundPD::getVector(index_type index) const -{ - return *(vectors_[index]); -} +hiopVector& hiopVectorCompoundPD::getVector(index_type index) const { return *(vectors_[index]); } -size_type hiopVectorCompoundPD::get_num_parts() const -{ - return n_parts_; -} +size_type hiopVectorCompoundPD::get_num_parts() const { return n_parts_; } -}; +}; // namespace hiop diff --git a/src/LinAlg/hiopVectorCompoundPD.hpp b/src/LinAlg/hiopVectorCompoundPD.hpp index 2dfc0a517..4b58b6e1b 100644 --- a/src/LinAlg/hiopVectorCompoundPD.hpp +++ b/src/LinAlg/hiopVectorCompoundPD.hpp @@ -3,47 +3,47 @@ // Written by Cosmin G. Petra, petra1@llnl.gov. // LLNL-CODE-742473. All rights reserved. // -// This file is part of HiOp. For details, see https://github.com/LLNL/hiop. HiOp -// is released under the BSD 3-clause license (https://opensource.org/licenses/BSD-3-Clause). +// This file is part of HiOp. For details, see https://github.com/LLNL/hiop. HiOp +// is released under the BSD 3-clause license (https://opensource.org/licenses/BSD-3-Clause). // Please also read "Additional BSD Notice" below. // -// Redistribution and use in source and binary forms, with or without modification, +// Redistribution and use in source and binary forms, with or without modification, // are permitted provided that the following conditions are met: -// i. Redistributions of source code must retain the above copyright notice, this list +// i. Redistributions of source code must retain the above copyright notice, this list // of conditions and the disclaimer below. -// ii. Redistributions in binary form must reproduce the above copyright notice, -// this list of conditions and the disclaimer (as noted below) in the documentation and/or +// ii. Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the disclaimer (as noted below) in the documentation and/or // other materials provided with the distribution. -// iii. Neither the name of the LLNS/LLNL nor the names of its contributors may be used to -// endorse or promote products derived from this software without specific prior written +// iii. Neither the name of the LLNS/LLNL nor the names of its contributors may be used to +// endorse or promote products derived from this software without specific prior written // permission. // -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES -// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT -// SHALL LAWRENCE LIVERMORE NATIONAL SECURITY, LLC, THE U.S. DEPARTMENT OF ENERGY OR -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS -// OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED -// AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT +// SHALL LAWRENCE LIVERMORE NATIONAL SECURITY, LLC, THE U.S. DEPARTMENT OF ENERGY OR +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +// OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED +// AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, // EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Additional BSD Notice -// 1. This notice is required to be provided under our contract with the U.S. Department -// of Energy (DOE). This work was produced at Lawrence Livermore National Laboratory under +// 1. This notice is required to be provided under our contract with the U.S. Department +// of Energy (DOE). This work was produced at Lawrence Livermore National Laboratory under // Contract No. DE-AC52-07NA27344 with the DOE. -// 2. Neither the United States Government nor Lawrence Livermore National Security, LLC -// nor any of their employees, makes any warranty, express or implied, or assumes any -// liability or responsibility for the accuracy, completeness, or usefulness of any +// 2. Neither the United States Government nor Lawrence Livermore National Security, LLC +// nor any of their employees, makes any warranty, express or implied, or assumes any +// liability or responsibility for the accuracy, completeness, or usefulness of any // information, apparatus, product, or process disclosed, or represents that its use would // not infringe privately-owned rights. -// 3. Also, reference herein to any specific commercial products, process, or services by -// trade name, trademark, manufacturer or otherwise does not necessarily constitute or -// imply its endorsement, recommendation, or favoring by the United States Government or -// Lawrence Livermore National Security, LLC. The views and opinions of authors expressed -// herein do not necessarily state or reflect those of the United States Government or -// Lawrence Livermore National Security, LLC, and shall not be used for advertising or +// 3. Also, reference herein to any specific commercial products, process, or services by +// trade name, trademark, manufacturer or otherwise does not necessarily constitute or +// imply its endorsement, recommendation, or favoring by the United States Government or +// Lawrence Livermore National Security, LLC. The views and opinions of authors expressed +// herein do not necessarily state or reflect those of the United States Government or +// Lawrence Livermore National Security, LLC, and shall not be used for advertising or // product endorsement purposes. /** @@ -72,7 +72,7 @@ namespace hiop /** * @brief A vector that consists of different type of hiopVector. - * + * * @note all the functions that requires hiopVectorIntCompoundPD are not used in the current hiop implementation */ @@ -85,7 +85,7 @@ class hiopVectorCompoundPD : public hiopVector ~hiopVectorCompoundPD(); - void addVector(hiopVector *v); + void addVector(hiopVector* v); hiopVector& getVector(index_type index) const; @@ -93,7 +93,7 @@ class hiopVectorCompoundPD : public hiopVector size_type get_num_parts() const; virtual void setToZero(); - virtual void setToConstant( double c ); + virtual void setToConstant(double c); virtual void set_to_random_uniform(double minv, double maxv); virtual void setToConstant_w_patternSelect(double c, const hiopVector& select); @@ -104,14 +104,14 @@ class hiopVectorCompoundPD : public hiopVector /** Copy to `this` the array content of the hiopVectorPar vector passed as argument. * * @pre `this` and source vector should have the same size. - * @pre `this` and source vector should have the same MPI distributions (and, + * @pre `this` and source vector should have the same MPI distributions (and, * hence, same number of local elements) when applicable. */ virtual void copy_from_vectorpar(const hiopVectorPar& vsrc); /** - * @brief Copy from src the elements specified by the indices in index_in_src. + * @brief Copy from src the elements specified by the indices in index_in_src. * - * @pre All vectors must reside in the same memory space. + * @pre All vectors must reside in the same memory space. * @pre Size of src must be greater or equal than size of this * @pre Size of index_in_src must be equal to size of this * @pre Elements of index_in_src must be valid (zero-based) indexes in src @@ -119,9 +119,9 @@ class hiopVectorCompoundPD : public hiopVector */ virtual void copy_from_indexes(const hiopVector& src, const hiopVectorInt& index_in_src); /** - * @brief Copy from src the elements specified by the indices in index_in_src. + * @brief Copy from src the elements specified by the indices in index_in_src. * - * @pre All vectors and arrays must reside in the same memory space. + * @pre All vectors and arrays must reside in the same memory space. * @pre Size of src must be greater or equal than size of this * @pre Size of index_in_src must be equal to size of this * @pre Elements of index_in_src must be valid (zero-based) indexes in src @@ -137,10 +137,10 @@ class hiopVectorCompoundPD : public hiopVector /* * @brief Copy from 'v' starting at 'start_idx_src' to 'this' starting at 'start_idx_dest' * - * Elements are copied into 'this' till the end of the 'this' is reached, more exactly a number + * Elements are copied into 'this' till the end of the 'this' is reached, more exactly a number * of lenght(this) - start_idx_dest elements are copied. * - * Precondition: The method expects that in 'v' there are at least as many elements starting + * Precondition: The method expects that in 'v' there are at least as many elements starting * 'start_idx_src' as 'this' has starting at 'start_idx_dest', or in other words, * length(this) - start_idx_dest <= length(v) - start_idx_src */ @@ -151,7 +151,7 @@ class hiopVectorCompoundPD : public hiopVector /** Copy the array content `this` in the hiopVectorPar passed as argument * * @pre `this` and destination vector should have the same size. - * @pre `this` and destination vector should have the same MPI distributions (and, + * @pre `this` and destination vector should have the same MPI distributions (and, * hence, same number of local elements) when applicable. */ virtual void copy_to_vectorpar(hiopVectorPar& vdest) const; @@ -160,23 +160,21 @@ class hiopVectorCompoundPD : public hiopVector /// @brief Copy 'this' to v starting at start_index in 'v'. virtual void copyToStarting(hiopVector& vec, int start_index_in_dest) const; /// @brief Copy the entries in 'this' where corresponding 'ix' is nonzero, to v starting at start_index in 'v'. - virtual void copyToStartingAt_w_pattern(hiopVector& vec, - index_type start_index_in_dest, - const hiopVector& ix) const; + virtual void copyToStartingAt_w_pattern(hiopVector& vec, index_type start_index_in_dest, const hiopVector& ix) const; /// @brief Copy the entries in `c` and `d` to `this`, according to the mapping in `c_map` and `d_map` - virtual void copy_from_two_vec_w_pattern(const hiopVector& c, - const hiopVectorInt& c_map, - const hiopVector& d, + virtual void copy_from_two_vec_w_pattern(const hiopVector& c, + const hiopVectorInt& c_map, + const hiopVector& d, const hiopVectorInt& d_map); - virtual void copy_to_two_vec_w_pattern(hiopVector& c, - const hiopVectorInt& c_map, - hiopVector& d, + virtual void copy_to_two_vec_w_pattern(hiopVector& c, + const hiopVectorInt& c_map, + hiopVector& d, const hiopVectorInt& d_map) const; /** - * @brief copy 'this' (source) starting at 'start_idx_in_src' to 'dest' starting at index 'start_idx_dest' + * @brief copy 'this' (source) starting at 'start_idx_in_src' to 'dest' starting at index 'start_idx_dest' * * If num_elems>=0, 'num_elems' will be copied; if num_elems<0, elements will be copied till the end of * either source ('this') or destination ('dest') is reached @@ -185,13 +183,13 @@ class hiopVectorCompoundPD : public hiopVector virtual void startingAtCopyToStartingAt(index_type start_idx_in_src, hiopVector& dest, index_type start_idx_dest, - size_type num_elems=-1) const; + size_type num_elems = -1) const; virtual void startingAtCopyToStartingAt_w_pattern(index_type start_idx_in_src, hiopVector& dest, index_type start_idx_dest, const hiopVector& selec_dest, - size_type num_elems=-1) const; + size_type num_elems = -1) const; virtual double twonorm() const; virtual double dotProductWith(const hiopVector& vec) const; @@ -199,9 +197,9 @@ class hiopVectorCompoundPD : public hiopVector virtual double infnorm_local() const; virtual double onenorm() const; virtual double onenorm_local() const; - virtual void componentMult( const hiopVector& v ); - virtual void componentDiv ( const hiopVector& v ); - virtual void componentDiv_w_selectPattern( const hiopVector& v, const hiopVector& ix); + virtual void componentMult(const hiopVector& v); + virtual void componentDiv(const hiopVector& v); + virtual void componentDiv_w_selectPattern(const hiopVector& v, const hiopVector& ix); virtual void component_min(const double constant); virtual void component_min(const hiopVector& vec); virtual void component_max(const double constant); @@ -210,18 +208,18 @@ class hiopVectorCompoundPD : public hiopVector virtual void component_sgn(); virtual void component_sqrt(); - virtual void scale( double alpha ); + virtual void scale(double alpha); /// @brief this += alpha * x - virtual void axpy ( double alpha, const hiopVector& x ); + virtual void axpy(double alpha, const hiopVector& x); /// @brief this += alpha * x, for the entries in 'this' where corresponding 'select' is nonzero. virtual void axpy_w_pattern(double alpha, const hiopVector& xvec, const hiopVector& select); /** * @brief Performs axpy, this += alpha*x, on the indexes in this specified by i. - * - * @param alpha scaling factor + * + * @param alpha scaling factor * @param x vector of doubles to be axpy-ed to this (size equal to size of i and less than or equal to size of this) - * @param i vector of indexes in this to which the axpy operation is performed (size equal to size of x and less than + * @param i vector of indexes in this to which the axpy operation is performed (size equal to size of x and less than * or equal to size of this) * * @pre The entries of i must be valid (zero-based) indexes in this @@ -230,13 +228,10 @@ class hiopVectorCompoundPD : public hiopVector virtual void axpy(double alpha, const hiopVector& xvec, const hiopVectorInt& i); /// @brief this += alpha * x * z - virtual void axzpy (double alpha, const hiopVector& xvec, const hiopVector& zvec); + virtual void axzpy(double alpha, const hiopVector& xvec, const hiopVector& zvec); /// @brief this += alpha * x / z virtual void axdzpy(double alpha, const hiopVector& xvec, const hiopVector& zvec); - virtual void axdzpy_w_pattern(double alpha, - const hiopVector& xvec, - const hiopVector& zvec, - const hiopVector& select); + virtual void axdzpy_w_pattern(double alpha, const hiopVector& xvec, const hiopVector& zvec, const hiopVector& select); /// @brief Add c to the elements of this virtual void addConstant(double c); virtual void addConstant_w_patternSelect(double c, const hiopVector& select); @@ -254,16 +249,16 @@ class hiopVectorCompoundPD : public hiopVector const double& mu, const double& kappa_d) const; - /** - * Performs `this[i] = alpha*this[i] + sign*ct` where sign=1 when EXACTLY one of - * ixleft[i] and ixright[i] is 1.0 and sign=0 otherwise. + /** + * Performs `this[i] = alpha*this[i] + sign*ct` where sign=1 when EXACTLY one of + * ixleft[i] and ixright[i] is 1.0 and sign=0 otherwise. * * Supports distributed/MPI vectors, but performs only elementwise operations and do not * require communication. * * This method is used to add gradient contributions from the (linear) damping term used - * to handle unbounded problems. The damping terms are used for variables that are - * bounded on one side only. + * to handle unbounded problems. The damping terms are used for variables that are + * bounded on one side only. */ virtual void addLinearDampingTerm(const hiopVector& ixleft, const hiopVector& ixright, @@ -272,52 +267,63 @@ class hiopVectorCompoundPD : public hiopVector virtual int allPositive(); virtual int allPositive_w_patternSelect(const hiopVector& select); - virtual bool projectIntoBounds_local(const hiopVector& xl, + virtual bool projectIntoBounds_local(const hiopVector& xl, const hiopVector& ixl, const hiopVector& xu, const hiopVector& ixu, double kappa1, double kappa2); virtual double fractionToTheBdry_local(const hiopVector& dvec, const double& tau) const; - virtual double fractionToTheBdry_w_pattern_local(const hiopVector& dvec, - const double& tau, - const hiopVector& ix) const; + virtual double fractionToTheBdry_w_pattern_local(const hiopVector& dvec, const double& tau, const hiopVector& ix) const; virtual void selectPattern(const hiopVector& select); virtual bool matchesPattern(const hiopVector& select); virtual hiopVector* alloc_clone() const; - virtual hiopVector* new_copy () const; + virtual hiopVector* new_copy() const; - virtual void adjustDuals_plh(const hiopVector& x, - const hiopVector& ix, - const double& mu, - const double& kappa); + virtual void adjustDuals_plh(const hiopVector& x, const hiopVector& ix, const double& mu, const double& kappa); virtual bool is_zero() const; virtual bool isnan_local() const; virtual bool isinf_local() const; virtual bool isfinite_local() const; - - virtual void print(FILE* file=nullptr, const char* message=nullptr,int max_elems=-1, int rank=-1) const; + + virtual void print(FILE* file = nullptr, const char* message = nullptr, int max_elems = -1, int rank = -1) const; /* more accessors */ virtual size_type get_local_size() const { return n_; } - virtual double* local_data() { assert(0 && "not required."); return nullptr; } - virtual const double* local_data_const() const { assert(0 && "not required."); return nullptr; } - virtual inline double* local_data_host() { assert(0 && "not required."); return nullptr; } - virtual inline const double* local_data_host_const() const { assert(0 && "not required."); return nullptr; } - - virtual size_type numOfElemsLessThan(const double &val) const; - virtual size_type numOfElemsAbsLessThan(const double &val) const; - - virtual void set_array_from_to(hiopInterfaceBase::NonlinearityType* arr, - const int start, - const int end, + virtual double* local_data() + { + assert(0 && "not required."); + return nullptr; + } + virtual const double* local_data_const() const + { + assert(0 && "not required."); + return nullptr; + } + virtual inline double* local_data_host() + { + assert(0 && "not required."); + return nullptr; + } + virtual inline const double* local_data_host_const() const + { + assert(0 && "not required."); + return nullptr; + } + + virtual size_type numOfElemsLessThan(const double& val) const; + virtual size_type numOfElemsAbsLessThan(const double& val) const; + + virtual void set_array_from_to(hiopInterfaceBase::NonlinearityType* arr, + const int start, + const int end, const hiopInterfaceBase::NonlinearityType* arr_src, const int start_src) const; - virtual void set_array_from_to(hiopInterfaceBase::NonlinearityType* arr, - const int start, - const int end, + virtual void set_array_from_to(hiopInterfaceBase::NonlinearityType* arr, + const int start, + const int end, const hiopInterfaceBase::NonlinearityType arr_src) const; virtual bool is_equal(const hiopVector& vec) const; @@ -325,7 +331,7 @@ class hiopVectorCompoundPD : public hiopVector void copy_from_resid(const hiopResidual* resid); void copy_from_iterate(const hiopIterate* it); -private: +private: std::vector vectors_; size_type n_parts_; bool own_vectors_; @@ -333,4 +339,4 @@ class hiopVectorCompoundPD : public hiopVector hiopVectorCompoundPD(const hiopVectorCompoundPD&); }; -} +} // namespace hiop diff --git a/src/LinAlg/hiopVectorCuda.cpp b/src/LinAlg/hiopVectorCuda.cpp index 4584c697f..1a81bc788 100644 --- a/src/LinAlg/hiopVectorCuda.cpp +++ b/src/LinAlg/hiopVectorCuda.cpp @@ -2,47 +2,47 @@ // Produced at the Lawrence Livermore National Laboratory (LLNL). // LLNL-CODE-742473. All rights reserved. // -// This file is part of HiOp. For details, see https://github.com/LLNL/hiop. HiOp -// is released under the BSD 3-clause license (https://opensource.org/licenses/BSD-3-Clause). +// This file is part of HiOp. For details, see https://github.com/LLNL/hiop. HiOp +// is released under the BSD 3-clause license (https://opensource.org/licenses/BSD-3-Clause). // Please also read "Additional BSD Notice" below. // -// Redistribution and use in source and binary forms, with or without modification, +// Redistribution and use in source and binary forms, with or without modification, // are permitted provided that the following conditions are met: -// i. Redistributions of source code must retain the above copyright notice, this list +// i. Redistributions of source code must retain the above copyright notice, this list // of conditions and the disclaimer below. -// ii. Redistributions in binary form must reproduce the above copyright notice, -// this list of conditions and the disclaimer (as noted below) in the documentation and/or +// ii. Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the disclaimer (as noted below) in the documentation and/or // other materials provided with the distribution. -// iii. Neither the name of the LLNS/LLNL nor the names of its contributors may be used to -// endorse or promote products derived from this software without specific prior written +// iii. Neither the name of the LLNS/LLNL nor the names of its contributors may be used to +// endorse or promote products derived from this software without specific prior written // permission. // -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES -// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT -// SHALL LAWRENCE LIVERMORE NATIONAL SECURITY, LLC, THE U.S. DEPARTMENT OF ENERGY OR -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS -// OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED -// AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT +// SHALL LAWRENCE LIVERMORE NATIONAL SECURITY, LLC, THE U.S. DEPARTMENT OF ENERGY OR +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +// OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED +// AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, // EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Additional BSD Notice -// 1. This notice is required to be provided under our contract with the U.S. Department -// of Energy (DOE). This work was produced at Lawrence Livermore National Laboratory under +// 1. This notice is required to be provided under our contract with the U.S. Department +// of Energy (DOE). This work was produced at Lawrence Livermore National Laboratory under // Contract No. DE-AC52-07NA27344 with the DOE. -// 2. Neither the United States Government nor Lawrence Livermore National Security, LLC -// nor any of their employees, makes any warranty, express or implied, or assumes any -// liability or responsibility for the accuracy, completeness, or usefulness of any +// 2. Neither the United States Government nor Lawrence Livermore National Security, LLC +// nor any of their employees, makes any warranty, express or implied, or assumes any +// liability or responsibility for the accuracy, completeness, or usefulness of any // information, apparatus, product, or process disclosed, or represents that its use would // not infringe privately-owned rights. -// 3. Also, reference herein to any specific commercial products, process, or services by -// trade name, trademark, manufacturer or otherwise does not necessarily constitute or -// imply its endorsement, recommendation, or favoring by the United States Government or -// Lawrence Livermore National Security, LLC. The views and opinions of authors expressed -// herein do not necessarily state or reflect those of the United States Government or -// Lawrence Livermore National Security, LLC, and shall not be used for advertising or +// 3. Also, reference herein to any specific commercial products, process, or services by +// trade name, trademark, manufacturer or otherwise does not necessarily constitute or +// imply its endorsement, recommendation, or favoring by the United States Government or +// Lawrence Livermore National Security, LLC. The views and opinions of authors expressed +// herein do not necessarily state or reflect those of the United States Government or +// Lawrence Livermore National Security, LLC, and shall not be used for advertising or // product endorsement purposes. /** @@ -70,9 +70,9 @@ namespace hiop { hiopVectorCuda::hiopVectorCuda(const size_type& glob_n, index_type* col_part, MPI_Comm comm) - : hiopVector(), - comm_(comm), - idx_cumsum_{nullptr} + : hiopVector(), + comm_(comm), + idx_cumsum_{nullptr} { n_ = glob_n; @@ -80,18 +80,18 @@ hiopVectorCuda::hiopVectorCuda(const size_type& glob_n, index_type* col_part, MP // if this is a serial vector, make sure it has a valid comm in the mpi case if(comm_ == MPI_COMM_NULL) { comm_ = MPI_COMM_SELF; - } + } #endif - int P = 0; + int P = 0; if(col_part) { #ifdef HIOP_USE_MPI - int ierr=MPI_Comm_rank(comm_, &P); assert(ierr==MPI_SUCCESS); + int ierr = MPI_Comm_rank(comm_, &P); + assert(ierr == MPI_SUCCESS); #endif glob_il_ = col_part[P]; - glob_iu_ = col_part[P+1]; - } - else { + glob_iu_ = col_part[P + 1]; + } else { glob_il_ = 0; glob_iu_ = n_; } @@ -106,8 +106,8 @@ hiopVectorCuda::hiopVectorCuda(const size_type& glob_n, index_type* col_part, MP } hiopVectorCuda::hiopVectorCuda(const hiopVectorCuda& v) - : hiopVector(), - idx_cumsum_{nullptr} + : hiopVector(), + idx_cumsum_{nullptr} { n_local_ = v.get_local_size(); n_ = v.get_size(); @@ -127,7 +127,7 @@ hiopVectorCuda::~hiopVectorCuda() { exec_space_host_.dealloc_array(data_host_mirror_); exec_space_.dealloc_array(data_); - data_ = nullptr; + data_ = nullptr; data_host_mirror_ = nullptr; // Delete workspaces and handles @@ -137,23 +137,17 @@ hiopVectorCuda::~hiopVectorCuda() } /// @brief Set all elements to zero. -void hiopVectorCuda::setToZero() -{ - hiop::cuda::thrust_fill_kernel(n_local_, data_, 0.0); -} +void hiopVectorCuda::setToZero() { hiop::cuda::thrust_fill_kernel(n_local_, data_, 0.0); } /// @brief Set all elements to c -void hiopVectorCuda::setToConstant(double c) -{ - hiop::cuda::thrust_fill_kernel(n_local_, data_, c); -} +void hiopVectorCuda::setToConstant(double c) { hiop::cuda::thrust_fill_kernel(n_local_, data_, c); } /// @brief Set all elements to random values uniformly distributed between `minv` and `maxv`. void hiopVectorCuda::set_to_random_uniform(double minv, double maxv) { double* data = data_; hiop::cuda::array_random_uniform_kernel(n_local_, data, minv, maxv); -} // namespace hiop +} // namespace hiop /// @brief Set all elements that are not zero in ix to c, and the rest to 0 void hiopVectorCuda::setToConstant_w_patternSelect(double c, const hiopVector& select) @@ -204,56 +198,51 @@ void hiopVectorCuda::copy_from_w_pattern(const hiopVector& vv, const hiopVector& /// @brief Copy the `n` elements of v starting at `start_index_in_dest` in `this` void hiopVectorCuda::copyFromStarting(int start_index_in_dest, const double* v, int nv) { - assert(start_index_in_dest+nv <= n_local_); - auto b = exec_space_.copy(data_+start_index_in_dest, v, nv); + assert(start_index_in_dest + nv <= n_local_); + auto b = exec_space_.copy(data_ + start_index_in_dest, v, nv); assert(b); } /// @brief Copy v_src into `this` starting at start_index_in_dest in `this`. */ void hiopVectorCuda::copyFromStarting(int start_index_in_dest, const hiopVector& v_src) { - assert(n_local_==n_ && "only for local/non-distributed vectors"); - assert(start_index_in_dest+v_src.get_local_size() <= n_local_); + assert(n_local_ == n_ && "only for local/non-distributed vectors"); + assert(start_index_in_dest + v_src.get_local_size() <= n_local_); const hiopVectorCuda& v = dynamic_cast(v_src); - auto b = exec_space_.copy(data_+start_index_in_dest, - v.data_, - v.n_local_, - v.exec_space()); + auto b = exec_space_.copy(data_ + start_index_in_dest, v.data_, v.n_local_, v.exec_space()); assert(b); } /// @brief Copy the `n` elements of v starting at `start_index_in_v` into `this` void hiopVectorCuda::copy_from_starting_at(const double* v, int start_index_in_v, int nv) { - auto b = exec_space_.copy(data_, v+start_index_in_v, nv); + auto b = exec_space_.copy(data_, v + start_index_in_v, nv); assert(b); } -/// @brief Copy from src the elements specified by the indices in index_in_src. -void hiopVectorCuda::copy_from_indexes(const hiopVector& src, const hiopVectorInt& index_in_src) +/// @brief Copy from src the elements specified by the indices in index_in_src. +void hiopVectorCuda::copy_from_indexes(const hiopVector& src, const hiopVectorInt& index_in_src) { assert(index_in_src.get_local_size() == n_local_); int* id = const_cast(index_in_src.local_data_const()); double* dd = data_; const double* vd = src.local_data_const(); - + hiop::cuda::copy_from_index_kernel(n_local_, dd, vd, id); } -/// @brief Copy from src the elements specified by the indices in index_in_src. +/// @brief Copy from src the elements specified by the indices in index_in_src. void hiopVectorCuda::copy_from_indexes(const double* src, const hiopVectorInt& index_in_src) { assert(index_in_src.get_local_size() == n_local_); - + hiop::cuda::copy_from_index_kernel(n_local_, data_, src, index_in_src.local_data_const()); } /// @brief Copy from `v` starting at `start_idx_src` to `this` starting at `start_idx_dest` -void hiopVectorCuda::startingAtCopyFromStartingAt(int start_idx_dest, - const hiopVector& vec_src, - int start_idx_src) +void hiopVectorCuda::startingAtCopyFromStartingAt(int start_idx_dest, const hiopVector& vec_src, int start_idx_src) { size_type howManyToCopyDest = this->n_local_ - start_idx_dest; @@ -261,9 +250,9 @@ void hiopVectorCuda::startingAtCopyFromStartingAt(int start_idx_dest, assert(n_local_ == n_ && "are you sure you want to call this?"); #endif int v_size = vec_src.get_local_size(); - assert((start_idx_dest >= 0 && start_idx_dest < this->n_local_) || this->n_local_==0); - assert((start_idx_src >=0 && start_idx_src < v_size) || v_size==0 || v_size==start_idx_src); - const size_type howManyToCopySrc = v_size - start_idx_src; + assert((start_idx_dest >= 0 && start_idx_dest < this->n_local_) || this->n_local_ == 0); + assert((start_idx_src >= 0 && start_idx_src < v_size) || v_size == 0 || v_size == start_idx_src); + const size_type howManyToCopySrc = v_size - start_idx_src; if(howManyToCopyDest == 0 || howManyToCopySrc == 0) { return; @@ -272,7 +261,7 @@ void hiopVectorCuda::startingAtCopyFromStartingAt(int start_idx_dest, assert(howManyToCopyDest <= howManyToCopySrc); auto& v_src = dynamic_cast(vec_src); - exec_space_.copy(data_+start_idx_dest, v_src.data_+start_idx_src, howManyToCopyDest, v_src.exec_space()); + exec_space_.copy(data_ + start_idx_dest, v_src.data_ + start_idx_src, howManyToCopyDest, v_src.exec_space()); } /// @brief Copy `this` to double array, which is assumed to be at least of `n_local_` size. @@ -294,25 +283,23 @@ void hiopVectorCuda::copyToStarting(int start_index, hiopVector& dst) const assert(start_index + v_size <= n_local_); // If nothing to copy, return. - if(v_size == 0) - return; + if(v_size == 0) return; auto& dst_cu = dynamic_cast(dst); - dst_cu.exec_space().copy(dst_cu.data_, data_+start_index, v_size, exec_space_); + dst_cu.exec_space().copy(dst_cu.data_, data_ + start_index, v_size, exec_space_); } /// @brief Copy `this` to `dst` starting at `start_index` in `dst`. void hiopVectorCuda::copyToStarting(hiopVector& dst, int start_index) const { int v_size = dst.get_local_size(); - assert(start_index+n_local_ <= v_size); + assert(start_index + n_local_ <= v_size); // If there is nothing to copy, return. - if(n_local_ == 0) - return; + if(n_local_ == 0) return; auto& dst_cu = dynamic_cast(dst); - dst_cu.exec_space().copy(dst_cu.data_+start_index, data_, n_local_, exec_space_); + dst_cu.exec_space().copy(dst_cu.data_ + start_index, data_, n_local_, exec_space_); } /// @brief Copy the entries in `this` where corresponding `ix` is nonzero, to v starting at start_index in `v`. @@ -321,27 +308,22 @@ void hiopVectorCuda::copyToStartingAt_w_pattern(hiopVector& vec, int start_index if(n_local_ == 0) { return; } - + double* dd = data_; double* vd = vec.local_data(); const double* pattern = select.local_data_const(); if(nullptr == idx_cumsum_) { - idx_cumsum_ = LinearAlgebraFactory::create_vector_int("CUDA", n_local_+1); + idx_cumsum_ = LinearAlgebraFactory::create_vector_int("CUDA", n_local_ + 1); index_type* nnz_in_row = idx_cumsum_->local_data(); - hiop::cuda::compute_cusum_kernel(n_local_+1, nnz_in_row, pattern); + hiop::cuda::compute_cusum_kernel(n_local_ + 1, nnz_in_row, pattern); } index_type* nnz_cumsum = idx_cumsum_->local_data(); index_type v_n_local = vec.get_local_size(); - hiop::cuda::copyToStartingAt_w_pattern_kernel(n_local_, - v_n_local, - start_index_in_dest, - nnz_cumsum, - vd, - dd); + hiop::cuda::copyToStartingAt_w_pattern_kernel(n_local_, v_n_local, start_index_in_dest, nnz_cumsum, vd, dd); } /// @brief Copy the entries in `c` and `d` to `this`, according to the mapping in `c_map` and `d_map` @@ -353,9 +335,9 @@ void hiopVectorCuda::copy_from_two_vec_w_pattern(const hiopVector& c, const int c_size = c.get_size(); const int d_size = d.get_size(); - assert( c_size == c_map.get_local_size() ); - assert( d_size == d_map.get_local_size() ); - assert( c_size + d_size == n_local_); + assert(c_size == c_map.get_local_size()); + assert(d_size == d_map.get_local_size()); + assert(c_size + d_size == n_local_); hiop::cuda::copy_src_to_mapped_dest_kernel(c_size, c.local_data_const(), local_data(), c_map.local_data_const()); hiop::cuda::copy_src_to_mapped_dest_kernel(d_size, d.local_data_const(), local_data(), d_map.local_data_const()); @@ -370,68 +352,67 @@ void hiopVectorCuda::copy_to_two_vec_w_pattern(hiopVector& c, const int c_size = c.get_size(); const int d_size = d.get_size(); - assert( c_size == c_map.get_local_size() ); - assert( d_size == d_map.get_local_size() ); - assert( c_size + d_size == n_local_); + assert(c_size == c_map.get_local_size()); + assert(d_size == d_map.get_local_size()); + assert(c_size + d_size == n_local_); hiop::cuda::copy_mapped_src_to_dest_kernel(c_size, local_data_const(), c.local_data(), c_map.local_data_const()); hiop::cuda::copy_mapped_src_to_dest_kernel(d_size, local_data_const(), d.local_data(), d_map.local_data_const()); } -/// @brief Copy `this` (source) starting at `start_idx_in_src` to `dest` starting at index `int start_idx_dest` +/// @brief Copy `this` (source) starting at `start_idx_in_src` to `dest` starting at index `int start_idx_dest` void hiopVectorCuda::startingAtCopyToStartingAt(index_type start_idx_in_src, - hiopVector& dest, + hiopVector& dest, index_type start_idx_dest, - size_type num_elems /* = -1 */) const + size_type num_elems /* = -1 */) const { #ifdef HIOP_DEEPCHECKS - assert(n_local_==n_ && "only for local/non-distributed vectors"); -#endif + assert(n_local_ == n_ && "only for local/non-distributed vectors"); +#endif assert(start_idx_in_src >= 0 && start_idx_in_src <= this->n_local_); - assert(start_idx_dest >= 0 && start_idx_dest <= dest.get_local_size()); + assert(start_idx_dest >= 0 && start_idx_dest <= dest.get_local_size()); const int dest_size = dest.get_local_size(); -#ifndef NDEBUG - if(start_idx_dest==dest_size || start_idx_in_src==this->n_local_) assert((num_elems==-1 || num_elems==0)); +#ifndef NDEBUG + if(start_idx_dest == dest_size || start_idx_in_src == this->n_local_) assert((num_elems == -1 || num_elems == 0)); #endif - if(num_elems<0) { - num_elems = std::min(this->n_local_ - start_idx_in_src, dest_size- start_idx_dest); + if(num_elems < 0) { + num_elems = std::min(this->n_local_ - start_idx_in_src, dest_size - start_idx_dest); } else { - assert(num_elems+start_idx_in_src <= this->n_local_); - assert(num_elems+start_idx_dest <= dest_size); - //make sure everything stays within bounds (in release) - num_elems = std::min(num_elems, (int) (this->n_local_-start_idx_in_src)); - num_elems = std::min(num_elems, (int) (dest_size-start_idx_dest)); + assert(num_elems + start_idx_in_src <= this->n_local_); + assert(num_elems + start_idx_dest <= dest_size); + // make sure everything stays within bounds (in release) + num_elems = std::min(num_elems, (int)(this->n_local_ - start_idx_in_src)); + num_elems = std::min(num_elems, (int)(dest_size - start_idx_dest)); } - if(num_elems == 0) - return; + if(num_elems == 0) return; auto& dest_cu = dynamic_cast(dest); - dest_cu.exec_space().copy(dest_cu.data_+start_idx_dest, data_+start_idx_in_src, num_elems, exec_space_); + dest_cu.exec_space().copy(dest_cu.data_ + start_idx_dest, data_ + start_idx_in_src, num_elems, exec_space_); } /** -* @brief Copy `this` (source) starting at `start_idx_in_src` to `dest` starting at index `int start_idx_dest` -* The values are copy to `dest` where the corresponding entry in `selec_dest` is nonzero -*/ + * @brief Copy `this` (source) starting at `start_idx_in_src` to `dest` starting at index `int start_idx_dest` + * The values are copy to `dest` where the corresponding entry in `selec_dest` is nonzero + */ void hiopVectorCuda::startingAtCopyToStartingAt_w_pattern(index_type start_idx_in_src, hiopVector& destination, index_type start_idx_dest, const hiopVector& selec_dest, - size_type num_elems/*=-1*/) const + size_type num_elems /*=-1*/) const { - assert(false&&"TODO --- only used in the full linear system"); + assert(false && "TODO --- only used in the full linear system"); } /** @brief Return the two norm */ double hiopVectorCuda::twonorm() const { - int one = 1; + int one = 1; double nrm = 0.; - if(n_local_>0) { + if(n_local_ > 0) { cublasStatus_t ret_cublas = cublasDnrm2(handle_cublas_, n_local_, data_, one, &nrm); assert(ret_cublas == CUBLAS_STATUS_SUCCESS); } @@ -439,9 +420,10 @@ double hiopVectorCuda::twonorm() const #ifdef HIOP_USE_MPI nrm *= nrm; double nrmG; - int ierr = MPI_Allreduce(&nrm, &nrmG, 1, MPI_DOUBLE, MPI_SUM, comm_); assert(MPI_SUCCESS==ierr); + int ierr = MPI_Allreduce(&nrm, &nrmG, 1, MPI_DOUBLE, MPI_SUM, comm_); + assert(MPI_SUCCESS == ierr); nrm = std::sqrt(nrmG); -#endif +#endif return nrm; } @@ -452,7 +434,7 @@ double hiopVectorCuda::infnorm() const #ifdef HIOP_USE_MPI double nrm_global; int ierr = MPI_Allreduce(&nrm, &nrm_global, 1, MPI_DOUBLE, MPI_MAX, comm_); - assert(MPI_SUCCESS==ierr); + assert(MPI_SUCCESS == ierr); return nrm_global; #endif @@ -460,10 +442,7 @@ double hiopVectorCuda::infnorm() const } /** @brief inf norm on single rank */ -double hiopVectorCuda::infnorm_local() const -{ - return hiop::cuda::infnorm_local_kernel(n_local_, data_); -} +double hiopVectorCuda::infnorm_local() const { return hiop::cuda::infnorm_local_kernel(n_local_, data_); } /** @brief Return the one norm */ double hiopVectorCuda::onenorm() const @@ -471,47 +450,42 @@ double hiopVectorCuda::onenorm() const double norm1 = onenorm_local(); #ifdef HIOP_USE_MPI double nrm1_global; - int ierr = MPI_Allreduce(&norm1, &nrm1_global, 1, MPI_DOUBLE, MPI_SUM, comm_); assert(MPI_SUCCESS==ierr); + int ierr = MPI_Allreduce(&norm1, &nrm1_global, 1, MPI_DOUBLE, MPI_SUM, comm_); + assert(MPI_SUCCESS == ierr); return nrm1_global; #endif return norm1; } /** @brief L1 norm on single rank */ -double hiopVectorCuda::onenorm_local() const -{ - return hiop::cuda::onenorm_local_kernel(n_local_, data_); -} +double hiopVectorCuda::onenorm_local() const { return hiop::cuda::onenorm_local_kernel(n_local_, data_); } /** @brief Multiply the components of this by the components of v. */ -void hiopVectorCuda::componentMult( const hiopVector& vec ) +void hiopVectorCuda::componentMult(const hiopVector& vec) { assert(n_local_ == vec.get_local_size()); hiop::cuda::thrust_component_mult_kernel(n_local_, data_, vec.local_data_const()); } /** @brief Divide the components of this hiopVector by the components of v. */ -void hiopVectorCuda::componentDiv( const hiopVector& vec ) +void hiopVectorCuda::componentDiv(const hiopVector& vec) { assert(n_local_ == vec.get_local_size()); hiop::cuda::thrust_component_div_kernel(n_local_, data_, vec.local_data_const()); } /** -* @brief Elements of this that corespond to nonzeros in ix are divided by elements of v. -* The rest of elements of this are set to zero. -*/ -void hiopVectorCuda::componentDiv_w_selectPattern( const hiopVector& vec, const hiopVector& select) + * @brief Elements of this that corespond to nonzeros in ix are divided by elements of v. + * The rest of elements of this are set to zero. + */ +void hiopVectorCuda::componentDiv_w_selectPattern(const hiopVector& vec, const hiopVector& select) { assert(n_local_ == vec.get_local_size()); hiop::cuda::component_div_w_pattern_kernel(n_local_, data_, vec.local_data_const(), select.local_data_const()); } /** @brief Set each component of this hiopVector to the minimum of itself and the given constant. */ -void hiopVectorCuda::component_min(const double constant) -{ - hiop::cuda::component_min_kernel(n_local_, data_, constant); -} +void hiopVectorCuda::component_min(const double constant) { hiop::cuda::component_min_kernel(n_local_, data_, constant); } /** @brief Set each component of this hiopVector to the minimum of itself and the corresponding component of `v`. */ void hiopVectorCuda::component_min(const hiopVector& vec) @@ -522,10 +496,7 @@ void hiopVectorCuda::component_min(const hiopVector& vec) } /** @brief Set each component of this hiopVector to the maximum of itself and the given constant. */ -void hiopVectorCuda::component_max(const double constant) -{ - hiop::cuda::component_max_kernel(n_local_, data_, constant); -} +void hiopVectorCuda::component_max(const double constant) { hiop::cuda::component_max_kernel(n_local_, data_, constant); } /** @brief Set each component of this hiopVector to the maximum of itself and the corresponding component of `v`. */ void hiopVectorCuda::component_max(const hiopVector& vec) @@ -533,32 +504,23 @@ void hiopVectorCuda::component_max(const hiopVector& vec) assert(vec.get_local_size() == n_local_); const double* vd = vec.local_data_const(); - + hiop::cuda::component_max_kernel(n_local_, data_, vd); } /** @brief Set each component to its absolute value */ -void hiopVectorCuda::component_abs() -{ - hiop::cuda::thrust_component_abs_kernel(n_local_, data_); -} +void hiopVectorCuda::component_abs() { hiop::cuda::thrust_component_abs_kernel(n_local_, data_); } /** @brief Apply sign function to each component */ -void hiopVectorCuda::component_sgn () -{ - hiop::cuda::thrust_component_sgn_kernel(n_local_, data_); -} +void hiopVectorCuda::component_sgn() { hiop::cuda::thrust_component_sgn_kernel(n_local_, data_); } /** @brief compute sqrt of each component */ -void hiopVectorCuda::component_sqrt() -{ - hiop::cuda::thrust_component_sqrt_kernel(n_local_, data_); -} +void hiopVectorCuda::component_sqrt() { hiop::cuda::thrust_component_sqrt_kernel(n_local_, data_); } /// @brief Scale each element of this by the constant alpha void hiopVectorCuda::scale(double alpha) { - int one = 1; + int one = 1; cublasStatus_t ret_cublas = cublasDscal(handle_cublas_, n_local_, &alpha, data_, one); assert(ret_cublas == CUBLAS_STATUS_SUCCESS); } @@ -572,7 +534,7 @@ void hiopVectorCuda::axpy(double alpha, const hiopVector& xvec) } /// @brief this += alpha * x, for the entries in `this` where corresponding `select` is nonzero. -void hiopVectorCuda::axpy_w_pattern(double alpha, const hiopVector& xvec, const hiopVector& select) +void hiopVectorCuda::axpy_w_pattern(double alpha, const hiopVector& xvec, const hiopVector& select) { axpy(alpha, xvec); componentMult(select); @@ -581,10 +543,10 @@ void hiopVectorCuda::axpy_w_pattern(double alpha, const hiopVector& xvec, const /// @brief Performs axpy, this += alpha*x, on the indexes in this specified by i. void hiopVectorCuda::axpy(double alpha, const hiopVector& xvec, const hiopVectorInt& i) { - assert(xvec.get_size()==i.get_local_size()); - assert(xvec.get_local_size()==i.get_local_size()); - assert(i.get_local_size()<=n_local_); - + assert(xvec.get_size() == i.get_local_size()); + assert(xvec.get_local_size() == i.get_local_size()); + assert(i.get_local_size() <= n_local_); + double* yd = data_; const double* xd = const_cast(xvec.local_data_const()); int* id = const_cast(i.local_data_const()); @@ -597,9 +559,9 @@ void hiopVectorCuda::axzpy(double alpha, const hiopVector& xvec, const hiopVecto { #ifdef HIOP_DEEPCHECKS assert(xvec.get_local_size() == zvec.get_local_size()); - assert( n_local_ == zvec.get_local_size()); -#endif - double* dd = data_; + assert(n_local_ == zvec.get_local_size()); +#endif + double* dd = data_; const double* xd = xvec.local_data_const(); const double* zd = zvec.local_data_const(); @@ -611,9 +573,9 @@ void hiopVectorCuda::axdzpy(double alpha, const hiopVector& xvec, const hiopVect { #ifdef HIOP_DEEPCHECKS assert(xvec.get_local_size() == zvec.get_local_size()); - assert( n_local_ == zvec.get_local_size()); -#endif - double* yd = data_; + assert(n_local_ == zvec.get_local_size()); +#endif + double* yd = data_; const double* xd = xvec.local_data_const(); const double* zd = zvec.local_data_const(); @@ -621,14 +583,11 @@ void hiopVectorCuda::axdzpy(double alpha, const hiopVector& xvec, const hiopVect } /** @brief this[i] += alpha*x[i]/z[i] forall i with pattern selection */ -void hiopVectorCuda::axdzpy_w_pattern(double alpha, - const hiopVector& xvec, - const hiopVector& zvec, - const hiopVector& select) +void hiopVectorCuda::axdzpy_w_pattern(double alpha, const hiopVector& xvec, const hiopVector& zvec, const hiopVector& select) { #ifdef HIOP_DEEPCHECKS - assert(xvec.get_local_size()==zvec.get_local_size()); - assert( n_local_==zvec.get_local_size()); + assert(xvec.get_local_size() == zvec.get_local_size()); + assert(n_local_ == zvec.get_local_size()); #endif double* yd = data_; const double* xd = xvec.local_data_const(); @@ -639,13 +598,10 @@ void hiopVectorCuda::axdzpy_w_pattern(double alpha, } /** @brief this[i] += c forall i */ -void hiopVectorCuda::addConstant(double c) -{ - hiop::cuda::add_constant_kernel(n_local_, data_, c); -} +void hiopVectorCuda::addConstant(double c) { hiop::cuda::add_constant_kernel(n_local_, data_, c); } /** @brief this[i] += c forall i with pattern selection */ -void hiopVectorCuda::addConstant_w_patternSelect(double c, const hiopVector& select) +void hiopVectorCuda::addConstant_w_patternSelect(double c, const hiopVector& select) { assert(this->n_local_ == select.get_local_size()); const double* id = select.local_data_const(); @@ -654,17 +610,17 @@ void hiopVectorCuda::addConstant_w_patternSelect(double c, const hiopVector& se } /** @brief Return the dot product of this hiopVector with v */ -double hiopVectorCuda::dotProductWith( const hiopVector& v ) const +double hiopVectorCuda::dotProductWith(const hiopVector& v) const { int one = 1; - double retval; + double retval; cublasStatus_t ret_cublas = cublasDdot(handle_cublas_, n_local_, v.local_data_const(), one, data_, one, &retval); assert(ret_cublas == CUBLAS_STATUS_SUCCESS); #ifdef HIOP_USE_MPI double dotprodG; int ierr = MPI_Allreduce(&retval, &dotprodG, 1, MPI_DOUBLE, MPI_SUM, comm_); - assert(MPI_SUCCESS==ierr); + assert(MPI_SUCCESS == ierr); retval = dotprodG; #endif @@ -672,16 +628,10 @@ double hiopVectorCuda::dotProductWith( const hiopVector& v ) const } /// @brief Negate all the elements of this -void hiopVectorCuda::negate() -{ - hiop::cuda::thrust_negate_kernel(n_local_, data_); -} +void hiopVectorCuda::negate() { hiop::cuda::thrust_negate_kernel(n_local_, data_); } /// @brief Invert (1/x) the elements of this -void hiopVectorCuda::invert() -{ - hiop::cuda::invert_kernel(n_local_, data_); -} +void hiopVectorCuda::invert() { hiop::cuda::invert_kernel(n_local_, data_); } /** @brief Sum all selected log(this[i]) */ double hiopVectorCuda::logBarrier_local(const hiopVector& select) const @@ -693,9 +643,7 @@ double hiopVectorCuda::logBarrier_local(const hiopVector& select) const } /* @brief adds the gradient of the log barrier, namely this=this+alpha*1/select(x) */ -void hiopVectorCuda::addLogBarrierGrad(double alpha, - const hiopVector& xvec, - const hiopVector& select) +void hiopVectorCuda::addLogBarrierGrad(double alpha, const hiopVector& xvec, const hiopVector& select) { #ifdef HIOP_DEEPCHECKS assert(n_local_ == xvec.get_local_size()); @@ -709,10 +657,7 @@ void hiopVectorCuda::addLogBarrierGrad(double alpha, } /** @brief Sum all elements */ -double hiopVectorCuda::sum_local() const -{ - return hiop::cuda::thrust_sum_kernel(n_local_, data_); -} +double hiopVectorCuda::sum_local() const { return hiop::cuda::thrust_sum_kernel(n_local_, data_); } /** @brief Linear damping term */ double hiopVectorCuda::linearDampingTerm_local(const hiopVector& ixleft, @@ -731,21 +676,20 @@ double hiopVectorCuda::linearDampingTerm_local(const hiopVector& ixleft, return hiop::cuda::linear_damping_term_kernel(n_local_, vd, ld, rd, mu, kappa_d); } -/** -* @brief Performs `this[i] = alpha*this[i] + sign*ct` where sign=1 when exactly one of -* ixleft[i] and ixright[i] is 1.0 and sign=0 otherwise. -*/ +/** + * @brief Performs `this[i] = alpha*this[i] + sign*ct` where sign=1 when exactly one of + * ixleft[i] and ixright[i] is 1.0 and sign=0 otherwise. + */ void hiopVectorCuda::addLinearDampingTerm(const hiopVector& ixleft, const hiopVector& ixright, const double& alpha, const double& ct) { - assert(ixleft.get_local_size() == n_local_); assert(ixright.get_local_size() == n_local_); - const double* ixl= ixleft.local_data_const(); - const double* ixr= ixright.local_data_const(); + const double* ixl = ixleft.local_data_const(); + const double* ixr = ixright.local_data_const(); double* data = data_; @@ -762,7 +706,8 @@ int hiopVectorCuda::allPositive() #ifdef HIOP_USE_MPI int allPosG; - int ierr=MPI_Allreduce(&allPos, &allPosG, 1, MPI_INT, MPI_MIN, comm_); assert(MPI_SUCCESS==ierr); + int ierr = MPI_Allreduce(&allPos, &allPosG, 1, MPI_INT, MPI_MIN, comm_); + assert(MPI_SUCCESS == ierr); return allPosG; #endif return allPos; @@ -773,21 +718,21 @@ int hiopVectorCuda::allPositive_w_patternSelect(const hiopVector& wvec) { #ifdef HIOP_DEEPCHECKS assert(wvec.get_local_size() == n_local_); -#endif +#endif const double* id = wvec.local_data_const(); const double* data = data_; int allPos = hiop::cuda::all_positive_w_pattern_kernel(n_local_, data, id); - - allPos = (allPos==n_local_) ? 1 : 0; - + + allPos = (allPos == n_local_) ? 1 : 0; + #ifdef HIOP_USE_MPI int allPosG; int ierr = MPI_Allreduce(&allPos, &allPosG, 1, MPI_INT, MPI_MIN, comm_); - assert(MPI_SUCCESS==ierr); + assert(MPI_SUCCESS == ierr); return allPosG; -#endif +#endif return allPos; } @@ -798,7 +743,8 @@ double hiopVectorCuda::min() const #ifdef HIOP_USE_MPI double resultG; - double ierr=MPI_Allreduce(&result, &resultG, 1, MPI_DOUBLE, MPI_MIN, comm_); assert(MPI_SUCCESS==ierr); + double ierr = MPI_Allreduce(&result, &resultG, 1, MPI_DOUBLE, MPI_MIN, comm_); + assert(MPI_SUCCESS == ierr); return resultG; #endif return result; @@ -810,26 +756,24 @@ double hiopVectorCuda::min_w_pattern(const hiopVector& select) const assert(this->n_local_ == select.get_local_size()); const double* data = data_; const double* id = select.local_data_const(); - + double max_val = std::numeric_limits::max(); double result = hiop::cuda::min_w_pattern_kernel(n_local_, data, id, max_val); #ifdef HIOP_USE_MPI double resultG; - double ierr=MPI_Allreduce(&result, &resultG, 1, MPI_DOUBLE, MPI_MIN, comm_); assert(MPI_SUCCESS==ierr); + double ierr = MPI_Allreduce(&result, &resultG, 1, MPI_DOUBLE, MPI_MIN, comm_); + assert(MPI_SUCCESS == ierr); return resultG; #endif return result; } /// @brief Return the minimum value in this vector, and the index at which it occurs. TODO -void hiopVectorCuda::min( double& /* m */, int& /* index */) const -{ - assert(false && "not implemented"); -} +void hiopVectorCuda::min(double& /* m */, int& /* index */) const { assert(false && "not implemented"); } /** @brief Project solution into bounds */ -bool hiopVectorCuda::projectIntoBounds_local(const hiopVector& xlo, +bool hiopVectorCuda::projectIntoBounds_local(const hiopVector& xlo, const hiopVector& ixl, const hiopVector& xup, const hiopVector& ixu, @@ -839,8 +783,8 @@ bool hiopVectorCuda::projectIntoBounds_local(const hiopVector& xlo, #ifdef HIOP_DEEPCHECKS assert(xlo.get_local_size() == n_local_); assert(ixl.get_local_size() == n_local_); - assert(xup.get_local_size()== n_local_); - assert(ixu.get_local_size()== n_local_); + assert(xup.get_local_size() == n_local_); + assert(ixu.get_local_size() == n_local_); #endif const double* xld = xlo.local_data_const(); @@ -848,15 +792,14 @@ bool hiopVectorCuda::projectIntoBounds_local(const hiopVector& xlo, const double* xud = xup.local_data_const(); const double* iud = ixu.local_data_const(); double* xd = data_; - + // Perform preliminary check to see of all upper value < lower value bool bval = hiop::cuda::check_bounds_kernel(n_local_, xld, xud); - if(false == bval) - return false; + if(false == bval) return false; const double small_real = std::numeric_limits::min() * 100; - + hiop::cuda::project_into_bounds_kernel(n_local_, xd, xld, ild, xud, iud, kappa1, kappa2, small_real); return true; @@ -868,7 +811,7 @@ double hiopVectorCuda::fractionToTheBdry_local(const hiopVector& dvec, const dou #ifdef HIOP_DEEPCHECKS assert(dvec.get_local_size() == n_local_); assert(tau > 0); - assert(tau < 1); // TODO: per documentation above it should be tau <= 1 (?). + assert(tau < 1); // TODO: per documentation above it should be tau <= 1 (?). #endif const double* dd = dvec.local_data_const(); @@ -881,14 +824,14 @@ double hiopVectorCuda::fractionToTheBdry_local(const hiopVector& dvec, const dou /** @brief max{a\in(0,1]| x+ad >=(1-tau)x} with pattern select */ double hiopVectorCuda::fractionToTheBdry_w_pattern_local(const hiopVector& dvec, - const double& tau, + const double& tau, const hiopVector& select) const { #ifdef HIOP_DEEPCHECKS assert(dvec.get_local_size() == n_local_); assert(select.get_local_size() == n_local_); - assert(tau>0); - assert(tau<1); + assert(tau > 0); + assert(tau < 1); #endif const double* dd = dvec.local_data_const(); const double* xd = data_; @@ -903,7 +846,7 @@ double hiopVectorCuda::fractionToTheBdry_w_pattern_local(const hiopVector& dvec, void hiopVectorCuda::selectPattern(const hiopVector& select) { #ifdef HIOP_DEEPCHECKS - assert(select.get_local_size()==n_local_); + assert(select.get_local_size() == n_local_); #endif double* data = data_; @@ -917,7 +860,7 @@ void hiopVectorCuda::selectPattern(const hiopVector& select) bool hiopVectorCuda::matchesPattern(const hiopVector& pattern) { #ifdef HIOP_DEEPCHECKS - assert(pattern.get_local_size()==n_local_); + assert(pattern.get_local_size() == n_local_); #endif double* xd = data_; @@ -928,61 +871,49 @@ bool hiopVectorCuda::matchesPattern(const hiopVector& pattern) #ifdef HIOP_USE_MPI int mismatch_glob = bret; int ierr = MPI_Allreduce(&bret, &mismatch_glob, 1, MPI_INT, MPI_MIN, comm_); - assert(MPI_SUCCESS==ierr); + assert(MPI_SUCCESS == ierr); return (mismatch_glob != 0); #endif return bret; } /** @brief Adjusts duals. */ -void hiopVectorCuda::adjustDuals_plh(const hiopVector& xvec, - const hiopVector& ixvec, - const double& mu, - const double& kappa) +void hiopVectorCuda::adjustDuals_plh(const hiopVector& xvec, const hiopVector& ixvec, const double& mu, const double& kappa) { #ifdef HIOP_DEEPCHECKS - assert(xvec.get_local_size()==n_local_); - assert(ixvec.get_local_size()==n_local_); + assert(xvec.get_local_size() == n_local_); + assert(ixvec.get_local_size() == n_local_); #endif - const double* xd = xvec.local_data_const(); + const double* xd = xvec.local_data_const(); const double* id = ixvec.local_data_const(); - double* zd = data_; //the dual + double* zd = data_; // the dual hiop::cuda::adjustDuals_plh_kernel(n_local_, zd, xd, id, mu, kappa); } /** @brief Check if all elements of the vector are zero */ -bool hiopVectorCuda::is_zero() const -{ - return hiop::cuda::is_zero_kernel(n_local_, data_); -} +bool hiopVectorCuda::is_zero() const { return hiop::cuda::is_zero_kernel(n_local_, data_); } /** @brief Returns true if any element of `this` is NaN. */ -bool hiopVectorCuda::isnan_local() const -{ - return hiop::cuda::isnan_kernel(n_local_, data_); -} +bool hiopVectorCuda::isnan_local() const { return hiop::cuda::isnan_kernel(n_local_, data_); } /** * @brief Returns true if any element of `this` is Inf. - * + * * @post `this` is not modified - * + * * @warning This is local method only! */ -bool hiopVectorCuda::isinf_local() const -{ - return hiop::cuda::isinf_kernel(n_local_, data_); -} +bool hiopVectorCuda::isinf_local() const { return hiop::cuda::isinf_kernel(n_local_, data_); } /** @brief Returns true if all elements of `this` are finite. */ -bool hiopVectorCuda::isfinite_local() const -{ - return hiop::cuda::isfinite_kernel(n_local_, data_); -} +bool hiopVectorCuda::isfinite_local() const { return hiop::cuda::isfinite_kernel(n_local_, data_); } /** @brief Prints vector data to a file in Matlab format. */ -void hiopVectorCuda::print(FILE* file/*=nullptr*/, const char* msg/*=nullptr*/, int max_elems/*=-1*/, int rank/*=-1*/) const +void hiopVectorCuda::print(FILE* file /*=nullptr*/, + const char* msg /*=nullptr*/, + int max_elems /*=-1*/, + int rank /*=-1*/) const { // TODO. no fprintf. use printf to print everything on screen? // Alternative: create a hiopVectorPar copy and use hiopVectorPar::print @@ -992,36 +923,32 @@ void hiopVectorCuda::print(FILE* file/*=nullptr*/, const char* msg/*=nullptr*/, /// @brief allocates a vector that mirrors this, but doesn't copy the values hiopVector* hiopVectorCuda::alloc_clone() const { - hiopVector* v = new hiopVectorCuda(*this); assert(v); + hiopVector* v = new hiopVectorCuda(*this); + assert(v); return v; } /// @brief allocates a vector that mirrors this, and copies the values -hiopVector* hiopVectorCuda::new_copy () const +hiopVector* hiopVectorCuda::new_copy() const { - hiopVector* v = new hiopVectorCuda(*this); assert(v); + hiopVector* v = new hiopVectorCuda(*this); + assert(v); v->copyFrom(*this); return v; } /// @brief copy data from host mirror to device -void hiopVectorCuda::copyToDev() -{ - exec_space_.copy(data_, data_host_mirror_, n_local_, exec_space_host_); -} +void hiopVectorCuda::copyToDev() { exec_space_.copy(data_, data_host_mirror_, n_local_, exec_space_host_); } /// @brief copy data from device to host mirror -void hiopVectorCuda::copyFromDev() -{ - exec_space_host_.copy(data_host_mirror_, data_, n_local_, exec_space_); -} +void hiopVectorCuda::copyFromDev() { exec_space_host_.copy(data_host_mirror_, data_, n_local_, exec_space_); } /// @brief copy data from host mirror to device void hiopVectorCuda::copyToDev() const { auto* this_nonconst = const_cast(this); assert(nullptr != this_nonconst); - + this_nonconst->copyToDev(); } @@ -1030,26 +957,26 @@ void hiopVectorCuda::copyFromDev() const { auto* this_nonconst = const_cast(this); assert(nullptr != this_nonconst); - + this_nonconst->copyFromDev(); } /// @brief get number of values that are less than the given value `val`. TODO: add unit test -size_type hiopVectorCuda::numOfElemsLessThan(const double &val) const +size_type hiopVectorCuda::numOfElemsLessThan(const double& val) const { return hiop::cuda::num_of_elem_less_than_kernel(n_local_, data_, val); } /// @brief get number of values whose absolute value are less than the given value `val`. TODO: add unit test -size_type hiopVectorCuda::numOfElemsAbsLessThan(const double &val) const +size_type hiopVectorCuda::numOfElemsAbsLessThan(const double& val) const { return hiop::cuda::num_of_elem_absless_than_kernel(n_local_, data_, val); } /// @brief set int array `arr`, starting at `start` and ending at `end`, to the values in `arr_src` from `start_src` -void hiopVectorCuda::set_array_from_to(hiopInterfaceBase::NonlinearityType* arr, - const int start, - const int end, +void hiopVectorCuda::set_array_from_to(hiopInterfaceBase::NonlinearityType* arr, + const int start, + const int end, const hiopInterfaceBase::NonlinearityType* arr_src, const int start_src) const { @@ -1065,9 +992,9 @@ void hiopVectorCuda::set_array_from_to(hiopInterfaceBase::NonlinearityType* arr, } /// @brief set int array `arr`, starting at `start` and ending at `end`, to the values in `arr_src` from `start_src` -void hiopVectorCuda::set_array_from_to(hiopInterfaceBase::NonlinearityType* arr, - const int start, - const int end, +void hiopVectorCuda::set_array_from_to(hiopInterfaceBase::NonlinearityType* arr, + const int start, + const int end, const hiopInterfaceBase::NonlinearityType arr_src) const { assert(arr && arr_src); @@ -1082,11 +1009,6 @@ void hiopVectorCuda::set_array_from_to(hiopInterfaceBase::NonlinearityType* arr, } /// @brief check if `this` vector is identical to `vec` -bool hiopVectorCuda::is_equal(const hiopVector& vec) const -{ - assert(false&&"NOT needed. Remove this func. TODO"); -} - - -} // namespace hiop +bool hiopVectorCuda::is_equal(const hiopVector& vec) const { assert(false && "NOT needed. Remove this func. TODO"); } +} // namespace hiop diff --git a/src/LinAlg/hiopVectorCuda.hpp b/src/LinAlg/hiopVectorCuda.hpp index ebc281759..5c7197056 100644 --- a/src/LinAlg/hiopVectorCuda.hpp +++ b/src/LinAlg/hiopVectorCuda.hpp @@ -2,47 +2,47 @@ // Produced at the Lawrence Livermore National Laboratory (LLNL). // LLNL-CODE-742473. All rights reserved. // -// This file is part of HiOp. For details, see https://github.com/LLNL/hiop. HiOp -// is released under the BSD 3-clause license (https://opensource.org/licenses/BSD-3-Clause). +// This file is part of HiOp. For details, see https://github.com/LLNL/hiop. HiOp +// is released under the BSD 3-clause license (https://opensource.org/licenses/BSD-3-Clause). // Please also read "Additional BSD Notice" below. // -// Redistribution and use in source and binary forms, with or without modification, +// Redistribution and use in source and binary forms, with or without modification, // are permitted provided that the following conditions are met: -// i. Redistributions of source code must retain the above copyright notice, this list +// i. Redistributions of source code must retain the above copyright notice, this list // of conditions and the disclaimer below. -// ii. Redistributions in binary form must reproduce the above copyright notice, -// this list of conditions and the disclaimer (as noted below) in the documentation and/or +// ii. Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the disclaimer (as noted below) in the documentation and/or // other materials provided with the distribution. -// iii. Neither the name of the LLNS/LLNL nor the names of its contributors may be used to -// endorse or promote products derived from this software without specific prior written +// iii. Neither the name of the LLNS/LLNL nor the names of its contributors may be used to +// endorse or promote products derived from this software without specific prior written // permission. // -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES -// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT -// SHALL LAWRENCE LIVERMORE NATIONAL SECURITY, LLC, THE U.S. DEPARTMENT OF ENERGY OR -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS -// OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED -// AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT +// SHALL LAWRENCE LIVERMORE NATIONAL SECURITY, LLC, THE U.S. DEPARTMENT OF ENERGY OR +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +// OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED +// AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, // EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Additional BSD Notice -// 1. This notice is required to be provided under our contract with the U.S. Department -// of Energy (DOE). This work was produced at Lawrence Livermore National Laboratory under +// 1. This notice is required to be provided under our contract with the U.S. Department +// of Energy (DOE). This work was produced at Lawrence Livermore National Laboratory under // Contract No. DE-AC52-07NA27344 with the DOE. -// 2. Neither the United States Government nor Lawrence Livermore National Security, LLC -// nor any of their employees, makes any warranty, express or implied, or assumes any -// liability or responsibility for the accuracy, completeness, or usefulness of any +// 2. Neither the United States Government nor Lawrence Livermore National Security, LLC +// nor any of their employees, makes any warranty, express or implied, or assumes any +// liability or responsibility for the accuracy, completeness, or usefulness of any // information, apparatus, product, or process disclosed, or represents that its use would // not infringe privately-owned rights. -// 3. Also, reference herein to any specific commercial products, process, or services by -// trade name, trademark, manufacturer or otherwise does not necessarily constitute or -// imply its endorsement, recommendation, or favoring by the United States Government or -// Lawrence Livermore National Security, LLC. The views and opinions of authors expressed -// herein do not necessarily state or reflect those of the United States Government or -// Lawrence Livermore National Security, LLC, and shall not be used for advertising or +// 3. Also, reference herein to any specific commercial products, process, or services by +// trade name, trademark, manufacturer or otherwise does not necessarily constitute or +// imply its endorsement, recommendation, or favoring by the United States Government or +// Lawrence Livermore National Security, LLC. The views and opinions of authors expressed +// herein do not necessarily state or reflect those of the United States Government or +// Lawrence Livermore National Security, LLC, and shall not be used for advertising or // product endorsement purposes. /** @@ -69,19 +69,19 @@ namespace hiop { -//Forward declarations +// Forward declarations class hiopVectorPar; -//Forward declarations of tester classes that needs to be friends with this vector +// Forward declarations of tester classes that needs to be friends with this vector namespace tests { class VectorTestsCuda; } - + /// Implementation of abstract class hiopVector using CUDA API class hiopVectorCuda : public hiopVector { public: - hiopVectorCuda(const size_type& glob_n, index_type* col_part=nullptr, MPI_Comm comm=MPI_COMM_SELF); + hiopVectorCuda(const size_type& glob_n, index_type* col_part = nullptr, MPI_Comm comm = MPI_COMM_SELF); virtual ~hiopVectorCuda(); /// @brief Set all elements to zero. @@ -105,16 +105,16 @@ class hiopVectorCuda : public hiopVector /// @brief Copy the `n` elements of v starting at `start_index_in_v` into `this` virtual void copy_from_starting_at(const double* v, int start_index_in_v, int n); - /// @brief Copy from src the elements specified by the indices in index_in_src. + /// @brief Copy from src the elements specified by the indices in index_in_src. virtual void copy_from_indexes(const hiopVector& src, const hiopVectorInt& index_in_src); - /// @brief Copy from src the elements specified by the indices in index_in_src. + /// @brief Copy from src the elements specified by the indices in index_in_src. virtual void copy_from_indexes(const double* src, const hiopVectorInt& index_in_src); /// @brief Copy entries from a hiopVectorPar, see method documentation in the parent class. void copy_from_vectorpar(const hiopVectorPar& vsrc); /// @brief Copy entries to a hiopVectorPar, see method documentation in the parent class. void copy_to_vectorpar(hiopVectorPar& vdest) const; - + /// @brief Copy from `v` starting at `start_idx_src` to `this` starting at `start_idx_dest` virtual void startingAtCopyFromStartingAt(int start_idx_dest, const hiopVector& v, int start_idx_src); @@ -128,34 +128,37 @@ class hiopVectorCuda : public hiopVector virtual void copyToStartingAt_w_pattern(hiopVector& vec, int start_index_in_dest, const hiopVector& ix) const; /// @brief Copy the entries in `c` and `d` to `this`, according to the mapping in `c_map` and `d_map` - virtual void copy_from_two_vec_w_pattern(const hiopVector& c, - const hiopVectorInt& c_map, - const hiopVector& d, + virtual void copy_from_two_vec_w_pattern(const hiopVector& c, + const hiopVectorInt& c_map, + const hiopVector& d, const hiopVectorInt& d_map); /// @brief Copy the entries in `this` to `c` and `d`, according to the mapping `c_map` and `d_map` - virtual void copy_to_two_vec_w_pattern(hiopVector& c, - const hiopVectorInt& c_map, - hiopVector& d, + virtual void copy_to_two_vec_w_pattern(hiopVector& c, + const hiopVectorInt& c_map, + hiopVector& d, const hiopVectorInt& d_map) const; /** - * copy `this` (source) starting at `start_idx_in_src` to `dest` starting at index `int start_idx_dest` + * copy `this` (source) starting at `start_idx_in_src` to `dest` starting at index `int start_idx_dest` * If num_elems>=0, `num_elems` will be copied; if num_elems<0, elements will be copied till the end of * either source (`this`) or destination (`dest`) is reached - */ - virtual void startingAtCopyToStartingAt(int start_idx_in_src, hiopVector& dest, int start_idx_dest, int num_elems=-1) const; + */ + virtual void startingAtCopyToStartingAt(int start_idx_in_src, + hiopVector& dest, + int start_idx_dest, + int num_elems = -1) const; /** * @brief Copy `this` (source) starting at `start_idx_in_src` to `dest` starting at index `int start_idx_dest` * If num_elems>=0, `num_elems` will be copied; if num_elems<0, elements will be copied till the end of * either source (`this`) or destination (`dest`) is reached * The values are copy to `dest` where the corresponding entry in `selec_dest` is nonzero - */ + */ virtual void startingAtCopyToStartingAt_w_pattern(int start_idx_in_src, hiopVector& dest, int start_idx_dest, const hiopVector& selec_dest, - int num_elems=-1) const; + int num_elems = -1) const; /** @brief Return the two norm */ virtual double twonorm() const; @@ -168,15 +171,15 @@ class hiopVectorCuda : public hiopVector /** @brief L1 norm on single rank */ virtual double onenorm_local() const; /** @brief Multiply the components of this by the components of v. */ - virtual void componentMult( const hiopVector& v ); + virtual void componentMult(const hiopVector& v); /** @brief Divide the components of this hiopVector by the components of v. */ - virtual void componentDiv ( const hiopVector& v ); + virtual void componentDiv(const hiopVector& v); /** * @brief Elements of this that corespond to nonzeros in ix are divided by elements of v. * The rest of elements of this are set to zero. */ - virtual void componentDiv_w_selectPattern( const hiopVector& v, const hiopVector& ix); + virtual void componentDiv_w_selectPattern(const hiopVector& v, const hiopVector& ix); /** @brief Set each component of this hiopVector to the minimum of itself and the given constant. */ virtual void component_min(const double constant); @@ -202,10 +205,10 @@ class hiopVectorCuda : public hiopVector /** * @brief Performs axpy, this += alpha*x, on the indexes in this specified by i. - * - * @param alpha scaling factor + * + * @param alpha scaling factor * @param x vector of doubles to be axpy-ed to this (size equal to size of i and less than or equal to size of this) - * @param i vector of indexes in this to which the axpy operation is performed (size equal to size of x and less than + * @param i vector of indexes in this to which the axpy operation is performed (size equal to size of x and less than * or equal to size of this) * * @pre The entries of i must be valid (zero-based) indexes in this @@ -214,14 +217,11 @@ class hiopVectorCuda : public hiopVector virtual void axpy(double alpha, const hiopVector& xvec, const hiopVectorInt& i); /// @brief this += alpha * x * z - virtual void axzpy (double alpha, const hiopVector& xvec, const hiopVector& zvec); + virtual void axzpy(double alpha, const hiopVector& xvec, const hiopVector& zvec); /// @brief this += alpha * x / z virtual void axdzpy(double alpha, const hiopVector& xvec, const hiopVector& zvec); /// @brief this += alpha * x / z on entries `i` for which select[i]==1. - virtual void axdzpy_w_pattern(double alpha, - const hiopVector& xvec, - const hiopVector& zvec, - const hiopVector& select); + virtual void axdzpy_w_pattern(double alpha, const hiopVector& xvec, const hiopVector& zvec, const hiopVector& select); /// @brief Add c to the elements of this virtual void addConstant(double c); virtual void addConstant_w_patternSelect(double c, const hiopVector& select); @@ -239,25 +239,25 @@ class hiopVectorCuda : public hiopVector virtual double sum_local() const; /** - * @brief Computes the log barrier's linear damping term of the Filter-IPM method of + * @brief Computes the log barrier's linear damping term of the Filter-IPM method of * WaectherBiegler (see paper, section 3.7). * Essentially compute kappa_d*mu* \sum { this[i] | ixleft[i]==1 and ixright[i]==0 } */ virtual double linearDampingTerm_local(const hiopVector& ixleft, const hiopVector& ixright, - const double& mu, + const double& mu, const double& kappa_d) const; - /** - * Performs `this[i] = alpha*this[i] + sign*ct` where sign=1 when EXACTLY one of - * ixleft[i] and ixright[i] is 1.0 and sign=0 otherwise. + /** + * Performs `this[i] = alpha*this[i] + sign*ct` where sign=1 when EXACTLY one of + * ixleft[i] and ixright[i] is 1.0 and sign=0 otherwise. * * Supports distributed/MPI vectors, but performs only elementwise operations and do not * require communication. * * This method is used to add gradient contributions from the (linear) damping term used - * to handle unbounded problems. The damping terms are used for variables that are - * bounded on one side only. + * to handle unbounded problems. The damping terms are used for variables that are + * bounded on one side only. */ virtual void addLinearDampingTerm(const hiopVector& ixleft, const hiopVector& ixright, @@ -290,10 +290,7 @@ class hiopVectorCuda : public hiopVector /// @brief checks whether entries in this matches pattern in ix virtual bool matchesPattern(const hiopVector& select); /// @brief dual adjustment -> see hiopIterate::adjustDuals_primalLogHessian - virtual void adjustDuals_plh(const hiopVector& xvec, - const hiopVector& ixvec, - const double& mu, - const double& kappa); + virtual void adjustDuals_plh(const hiopVector& xvec, const hiopVector& ixvec, const double& mu, const double& kappa); /// @brief True if all elements of this are zero. TODO: add unit test virtual bool is_zero() const; @@ -305,11 +302,11 @@ class hiopVectorCuda : public hiopVector virtual bool isfinite_local() const; /// @brief prints up to max_elems (by default all), on rank `rank` (by default on all) - virtual void print(FILE* file=nullptr, const char* message=nullptr,int max_elems=-1, int rank=-1) const; + virtual void print(FILE* file = nullptr, const char* message = nullptr, int max_elems = -1, int rank = -1) const; /// @brief allocates a vector that mirrors this, but doesn't copy the values virtual hiopVector* alloc_clone() const; /// @brief allocates a vector that mirrors this, and copies the values - virtual hiopVector* new_copy () const; + virtual hiopVector* new_copy() const; /* more accessors */ inline size_type get_local_size() const { return n_local_; } @@ -317,47 +314,40 @@ class hiopVectorCuda : public hiopVector inline const double* local_data_const() const { return data_; } inline double* local_data_host() { return data_host_mirror_; } inline const double* local_data_host_const() const { return data_host_mirror_; } + private: virtual void copyToDev(); virtual void copyFromDev(); virtual void copyToDev() const; virtual void copyFromDev() const; friend class tests::VectorTestsCuda; + public: /// @brief get number of values that are less than the given value `val`. TODO: add unit test - virtual size_type numOfElemsLessThan(const double &val) const; + virtual size_type numOfElemsLessThan(const double& val) const; /// @brief get number of values whose absolute value are less than the given value `val`. TODO: add unit test - virtual size_type numOfElemsAbsLessThan(const double &val) const; + virtual size_type numOfElemsAbsLessThan(const double& val) const; /// @brief set int array `arr`, starting at `start` and ending at `end`, to the values in `arr_src` from `start_src` /// TODO: add unit test - virtual void set_array_from_to(hiopInterfaceBase::NonlinearityType* arr, - const int start, - const int end, + virtual void set_array_from_to(hiopInterfaceBase::NonlinearityType* arr, + const int start, + const int end, const hiopInterfaceBase::NonlinearityType* arr_src, const int start_src) const; - virtual void set_array_from_to(hiopInterfaceBase::NonlinearityType* arr, - const int start, - const int end, + virtual void set_array_from_to(hiopInterfaceBase::NonlinearityType* arr, + const int start, + const int end, const hiopInterfaceBase::NonlinearityType arr_src) const; - - - /// @brief check if `this` vector is identical to `vec` virtual bool is_equal(const hiopVector& vec) const; /* functions for this class */ inline MPI_Comm get_mpi_comm() const { return comm_; } - ExecSpace& exec_space() - { - return exec_space_; - } - const ExecSpace& exec_space() const - { - return exec_space_; - } + ExecSpace& exec_space() { return exec_space_; } + const ExecSpace& exec_space() const { return exec_space_; } private: ExecSpace exec_space_; @@ -376,8 +366,7 @@ class hiopVectorCuda : public hiopVector /** copy constructor, for internal/private use only (it doesn't copy the elements.) */ hiopVectorCuda(const hiopVectorCuda&); - }; -} // namespace hiop +} // namespace hiop #endif diff --git a/src/LinAlg/hiopVectorHip.cpp b/src/LinAlg/hiopVectorHip.cpp index d55f2d421..dafb0e66e 100644 --- a/src/LinAlg/hiopVectorHip.cpp +++ b/src/LinAlg/hiopVectorHip.cpp @@ -2,47 +2,47 @@ // Produced at the Lawrence Livermore National Laboratory (LLNL). // LLNL-CODE-742473. All rights reserved. // -// This file is part of HiOp. For details, see https://github.com/LLNL/hiop. HiOp -// is released under the BSD 3-clause license (https://opensource.org/licenses/BSD-3-Clause). +// This file is part of HiOp. For details, see https://github.com/LLNL/hiop. HiOp +// is released under the BSD 3-clause license (https://opensource.org/licenses/BSD-3-Clause). // Please also read "Additional BSD Notice" below. // -// Redistribution and use in source and binary forms, with or without modification, +// Redistribution and use in source and binary forms, with or without modification, // are permitted provided that the following conditions are met: -// i. Redistributions of source code must retain the above copyright notice, this list +// i. Redistributions of source code must retain the above copyright notice, this list // of conditions and the disclaimer below. -// ii. Redistributions in binary form must reproduce the above copyright notice, -// this list of conditions and the disclaimer (as noted below) in the documentation and/or +// ii. Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the disclaimer (as noted below) in the documentation and/or // other materials provided with the distribution. -// iii. Neither the name of the LLNS/LLNL nor the names of its contributors may be used to -// endorse or promote products derived from this software without specific prior written +// iii. Neither the name of the LLNS/LLNL nor the names of its contributors may be used to +// endorse or promote products derived from this software without specific prior written // permission. // -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES -// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT -// SHALL LAWRENCE LIVERMORE NATIONAL SECURITY, LLC, THE U.S. DEPARTMENT OF ENERGY OR -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS -// OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED -// AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT +// SHALL LAWRENCE LIVERMORE NATIONAL SECURITY, LLC, THE U.S. DEPARTMENT OF ENERGY OR +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +// OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED +// AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, // EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Additional BSD Notice -// 1. This notice is required to be provided under our contract with the U.S. Department -// of Energy (DOE). This work was produced at Lawrence Livermore National Laboratory under +// 1. This notice is required to be provided under our contract with the U.S. Department +// of Energy (DOE). This work was produced at Lawrence Livermore National Laboratory under // Contract No. DE-AC52-07NA27344 with the DOE. -// 2. Neither the United States Government nor Lawrence Livermore National Security, LLC -// nor any of their employees, makes any warranty, express or implied, or assumes any -// liability or responsibility for the accuracy, completeness, or usefulness of any +// 2. Neither the United States Government nor Lawrence Livermore National Security, LLC +// nor any of their employees, makes any warranty, express or implied, or assumes any +// liability or responsibility for the accuracy, completeness, or usefulness of any // information, apparatus, product, or process disclosed, or represents that its use would // not infringe privately-owned rights. -// 3. Also, reference herein to any specific commercial products, process, or services by -// trade name, trademark, manufacturer or otherwise does not necessarily constitute or -// imply its endorsement, recommendation, or favoring by the United States Government or -// Lawrence Livermore National Security, LLC. The views and opinions of authors expressed -// herein do not necessarily state or reflect those of the United States Government or -// Lawrence Livermore National Security, LLC, and shall not be used for advertising or +// 3. Also, reference herein to any specific commercial products, process, or services by +// trade name, trademark, manufacturer or otherwise does not necessarily constitute or +// imply its endorsement, recommendation, or favoring by the United States Government or +// Lawrence Livermore National Security, LLC. The views and opinions of authors expressed +// herein do not necessarily state or reflect those of the United States Government or +// Lawrence Livermore National Security, LLC, and shall not be used for advertising or // product endorsement purposes. /** @@ -70,9 +70,9 @@ namespace hiop { hiopVectorHip::hiopVectorHip(const size_type& glob_n, index_type* col_part, MPI_Comm comm) - : hiopVector(), - comm_(comm), - idx_cumsum_{nullptr} + : hiopVector(), + comm_(comm), + idx_cumsum_{nullptr} { n_ = glob_n; @@ -80,18 +80,18 @@ hiopVectorHip::hiopVectorHip(const size_type& glob_n, index_type* col_part, MPI_ // if this is a serial vector, make sure it has a valid comm in the mpi case if(comm_ == MPI_COMM_NULL) { comm_ = MPI_COMM_SELF; - } + } #endif - int P = 0; + int P = 0; if(col_part) { #ifdef HIOP_USE_MPI - int ierr=MPI_Comm_rank(comm_, &P); assert(ierr==MPI_SUCCESS); + int ierr = MPI_Comm_rank(comm_, &P); + assert(ierr == MPI_SUCCESS); #endif glob_il_ = col_part[P]; - glob_iu_ = col_part[P+1]; - } - else { + glob_iu_ = col_part[P + 1]; + } else { glob_il_ = 0; glob_iu_ = n_; } @@ -110,8 +110,8 @@ hiopVectorHip::hiopVectorHip(const size_type& glob_n, index_type* col_part, MPI_ } hiopVectorHip::hiopVectorHip(const hiopVectorHip& v) - : hiopVector(), - idx_cumsum_{nullptr} + : hiopVector(), + idx_cumsum_{nullptr} { n_local_ = v.get_local_size(); n_ = v.get_size(); @@ -131,7 +131,7 @@ hiopVectorHip::~hiopVectorHip() { exec_space_host_.dealloc_array(data_host_mirror_); exec_space_.dealloc_array(data_); - data_ = nullptr; + data_ = nullptr; data_host_mirror_ = nullptr; // Delete workspaces and handles @@ -141,23 +141,17 @@ hiopVectorHip::~hiopVectorHip() } /// @brief Set all elements to zero. -void hiopVectorHip::setToZero() -{ - hiop::hip::thrust_fill_kernel(n_local_, data_, 0.0); -} +void hiopVectorHip::setToZero() { hiop::hip::thrust_fill_kernel(n_local_, data_, 0.0); } /// @brief Set all elements to c -void hiopVectorHip::setToConstant(double c) -{ - hiop::hip::thrust_fill_kernel(n_local_, data_, c); -} +void hiopVectorHip::setToConstant(double c) { hiop::hip::thrust_fill_kernel(n_local_, data_, c); } /// @brief Set all elements to random values uniformly distributed between `minv` and `maxv`. void hiopVectorHip::set_to_random_uniform(double minv, double maxv) { double* data = data_; hiop::hip::array_random_uniform_kernel(n_local_, data, minv, maxv); -} // namespace hiop +} // namespace hiop /// @brief Set all elements that are not zero in ix to c, and the rest to 0 void hiopVectorHip::setToConstant_w_patternSelect(double c, const hiopVector& select) @@ -208,55 +202,50 @@ void hiopVectorHip::copy_from_w_pattern(const hiopVector& vv, const hiopVector& /// @brief Copy the 'n' elements of v starting at 'start_index_in_dest' in 'this' void hiopVectorHip::copyFromStarting(int start_index_in_dest, const double* v, int nv) { - assert(start_index_in_dest+nv <= n_local_); - auto b = exec_space_.copy(data_+start_index_in_dest, v, nv); + assert(start_index_in_dest + nv <= n_local_); + auto b = exec_space_.copy(data_ + start_index_in_dest, v, nv); assert(b); } /// @brief Copy v_src into 'this' starting at start_index_in_dest in 'this'. */ void hiopVectorHip::copyFromStarting(int start_index_in_dest, const hiopVector& v_src) { - assert(n_local_==n_ && "only for local/non-distributed vectors"); - assert(start_index_in_dest+v_src.get_local_size() <= n_local_); + assert(n_local_ == n_ && "only for local/non-distributed vectors"); + assert(start_index_in_dest + v_src.get_local_size() <= n_local_); const hiopVectorHip& v = dynamic_cast(v_src); - auto b = exec_space_.copy(data_+start_index_in_dest, - v.data_, - v.n_local_, - v.exec_space()); + auto b = exec_space_.copy(data_ + start_index_in_dest, v.data_, v.n_local_, v.exec_space()); assert(b); } /// @brief Copy the 'n' elements of v starting at 'start_index_in_v' into 'this' void hiopVectorHip::copy_from_starting_at(const double* v, int start_index_in_v, int nv) { - auto b = exec_space_.copy(data_, v+start_index_in_v, nv); + auto b = exec_space_.copy(data_, v + start_index_in_v, nv); assert(b); } -/// @brief Copy from src the elements specified by the indices in index_in_src. -void hiopVectorHip::copy_from_indexes(const hiopVector& src, const hiopVectorInt& index_in_src) +/// @brief Copy from src the elements specified by the indices in index_in_src. +void hiopVectorHip::copy_from_indexes(const hiopVector& src, const hiopVectorInt& index_in_src) { assert(index_in_src.get_local_size() == n_local_); int* id = const_cast(index_in_src.local_data_const()); double* dd = data_; const double* vd = src.local_data_const(); - + hiop::hip::copy_from_index_kernel(n_local_, dd, vd, id); } -/// @brief Copy from src the elements specified by the indices in index_in_src. +/// @brief Copy from src the elements specified by the indices in index_in_src. void hiopVectorHip::copy_from_indexes(const double* src, const hiopVectorInt& index_in_src) { assert(index_in_src.get_local_size() == n_local_); - + hiop::hip::copy_from_index_kernel(n_local_, data_, src, index_in_src.local_data_const()); } /// @brief Copy from 'v' starting at 'start_idx_src' to 'this' starting at 'start_idx_dest' -void hiopVectorHip::startingAtCopyFromStartingAt(int start_idx_dest, - const hiopVector& vec_src, - int start_idx_src) +void hiopVectorHip::startingAtCopyFromStartingAt(int start_idx_dest, const hiopVector& vec_src, int start_idx_src) { size_type howManyToCopyDest = this->n_local_ - start_idx_dest; @@ -264,9 +253,9 @@ void hiopVectorHip::startingAtCopyFromStartingAt(int start_idx_dest, assert(n_local_ == n_ && "are you sure you want to call this?"); #endif int v_size = vec_src.get_local_size(); - assert((start_idx_dest >= 0 && start_idx_dest < this->n_local_) || this->n_local_==0); - assert((start_idx_src >=0 && start_idx_src < v_size) || v_size==0 || v_size==start_idx_src); - const size_type howManyToCopySrc = v_size - start_idx_src; + assert((start_idx_dest >= 0 && start_idx_dest < this->n_local_) || this->n_local_ == 0); + assert((start_idx_src >= 0 && start_idx_src < v_size) || v_size == 0 || v_size == start_idx_src); + const size_type howManyToCopySrc = v_size - start_idx_src; if(howManyToCopyDest == 0 || howManyToCopySrc == 0) { return; @@ -275,7 +264,7 @@ void hiopVectorHip::startingAtCopyFromStartingAt(int start_idx_dest, assert(howManyToCopyDest <= howManyToCopySrc); auto& v_src = dynamic_cast(vec_src); - exec_space_.copy(data_+start_idx_dest, v_src.data_+start_idx_src, howManyToCopyDest, v_src.exec_space()); + exec_space_.copy(data_ + start_idx_dest, v_src.data_ + start_idx_src, howManyToCopyDest, v_src.exec_space()); } /// @brief Copy 'this' to double array, which is assumed to be at least of 'n_local_' size. @@ -297,25 +286,23 @@ void hiopVectorHip::copyToStarting(int start_index, hiopVector& dst) const assert(start_index + v_size <= n_local_); // If nothing to copy, return. - if(v_size == 0) - return; + if(v_size == 0) return; auto& dst_hip = dynamic_cast(dst); - dst_hip.exec_space().copy(dst_hip.data_, data_+start_index, v_size, exec_space_); + dst_hip.exec_space().copy(dst_hip.data_, data_ + start_index, v_size, exec_space_); } /// @brief Copy 'this' to dst starting at start_index in 'dst'. void hiopVectorHip::copyToStarting(hiopVector& dst, int start_index) const { int v_size = dst.get_local_size(); - assert(start_index+n_local_ <= v_size); + assert(start_index + n_local_ <= v_size); // If there is nothing to copy, return. - if(n_local_ == 0) - return; + if(n_local_ == 0) return; auto& dst_hip = dynamic_cast(dst); - dst_hip.exec_space().copy(dst_hip.data_+start_index, data_, n_local_, exec_space_); + dst_hip.exec_space().copy(dst_hip.data_ + start_index, data_, n_local_, exec_space_); } /// @brief Copy the entries in 'this' where corresponding 'ix' is nonzero, to v starting at start_index in 'v'. @@ -324,41 +311,36 @@ void hiopVectorHip::copyToStartingAt_w_pattern(hiopVector& vec, int start_index_ if(n_local_ == 0) { return; } - + double* dd = data_; double* vd = vec.local_data(); const double* pattern = select.local_data_const(); if(nullptr == idx_cumsum_) { - idx_cumsum_ = LinearAlgebraFactory::create_vector_int("HIP", n_local_+1); + idx_cumsum_ = LinearAlgebraFactory::create_vector_int("HIP", n_local_ + 1); index_type* nnz_in_row = idx_cumsum_->local_data(); - hiop::hip::compute_cusum_kernel(n_local_+1, nnz_in_row, pattern); + hiop::hip::compute_cusum_kernel(n_local_ + 1, nnz_in_row, pattern); } index_type* nnz_cumsum = idx_cumsum_->local_data(); index_type v_n_local = vec.get_local_size(); - hiop::hip::copyToStartingAt_w_pattern_kernel(n_local_, - v_n_local, - start_index_in_dest, - nnz_cumsum, - vd, - dd); + hiop::hip::copyToStartingAt_w_pattern_kernel(n_local_, v_n_local, start_index_in_dest, nnz_cumsum, vd, dd); } /// @brief Copy the entries in `c` and `d` to `this`, according to the mapping in `c_map` and `d_map` void hiopVectorHip::copy_from_two_vec_w_pattern(const hiopVector& c, - const hiopVectorInt& c_map, - const hiopVector& d, - const hiopVectorInt& d_map) + const hiopVectorInt& c_map, + const hiopVector& d, + const hiopVectorInt& d_map) { const int c_size = c.get_size(); const int d_size = d.get_size(); - assert( c_size == c_map.get_local_size() ); - assert( d_size == d_map.get_local_size() ); - assert( c_size + d_size == n_local_); + assert(c_size == c_map.get_local_size()); + assert(d_size == d_map.get_local_size()); + assert(c_size + d_size == n_local_); hiop::hip::copy_src_to_mapped_dest_kernel(c_size, c.local_data_const(), local_data(), c_map.local_data_const()); hiop::hip::copy_src_to_mapped_dest_kernel(d_size, d.local_data_const(), local_data(), d_map.local_data_const()); @@ -366,47 +348,47 @@ void hiopVectorHip::copy_from_two_vec_w_pattern(const hiopVector& c, /// @brief Copy the entries in `this` to `c` and `d`, according to the mapping `c_map` and `d_map` void hiopVectorHip::copy_to_two_vec_w_pattern(hiopVector& c, - const hiopVectorInt& c_map, - hiopVector& d, - const hiopVectorInt& d_map) const + const hiopVectorInt& c_map, + hiopVector& d, + const hiopVectorInt& d_map) const { const int c_size = c.get_size(); const int d_size = d.get_size(); - assert( c_size == c_map.get_local_size() ); - assert( d_size == d_map.get_local_size() ); - assert( c_size + d_size == n_local_); + assert(c_size == c_map.get_local_size()); + assert(d_size == d_map.get_local_size()); + assert(c_size + d_size == n_local_); hiop::hip::copy_mapped_src_to_dest_kernel(c_size, local_data_const(), c.local_data(), c_map.local_data_const()); hiop::hip::copy_mapped_src_to_dest_kernel(d_size, local_data_const(), d.local_data(), d_map.local_data_const()); } -/// @brief Copy 'this' (source) starting at 'start_idx_in_src' to 'dest' starting at index 'int start_idx_dest' +/// @brief Copy 'this' (source) starting at 'start_idx_in_src' to 'dest' starting at index 'int start_idx_dest' void hiopVectorHip::startingAtCopyToStartingAt(index_type start_idx_in_src, - hiopVector& dest, + hiopVector& dest, index_type start_idx_dest, - size_type num_elems /* = -1 */) const + size_type num_elems /* = -1 */) const { #ifdef HIOP_DEEPCHECKS - assert(n_local_==n_ && "only for local/non-distributed vectors"); -#endif + assert(n_local_ == n_ && "only for local/non-distributed vectors"); +#endif assert(start_idx_in_src >= 0 && start_idx_in_src <= this->n_local_); - assert(start_idx_dest >= 0 && start_idx_dest <= dest.get_local_size()); + assert(start_idx_dest >= 0 && start_idx_dest <= dest.get_local_size()); const int dest_size = dest.get_local_size(); -#ifndef NDEBUG - if(start_idx_dest==dest_size || start_idx_in_src==this->n_local_) assert((num_elems==-1 || num_elems==0)); +#ifndef NDEBUG + if(start_idx_dest == dest_size || start_idx_in_src == this->n_local_) assert((num_elems == -1 || num_elems == 0)); #endif - if(num_elems<0) { - num_elems = std::min(this->n_local_ - start_idx_in_src, dest_size- start_idx_dest); + if(num_elems < 0) { + num_elems = std::min(this->n_local_ - start_idx_in_src, dest_size - start_idx_dest); } else { - assert(num_elems+start_idx_in_src <= this->n_local_); - assert(num_elems+start_idx_dest <= dest_size); - //make sure everything stays within bounds (in release) - num_elems = std::min(num_elems, (int) (this->n_local_-start_idx_in_src)); - num_elems = std::min(num_elems, (int) (dest_size-start_idx_dest)); + assert(num_elems + start_idx_in_src <= this->n_local_); + assert(num_elems + start_idx_dest <= dest_size); + // make sure everything stays within bounds (in release) + num_elems = std::min(num_elems, (int)(this->n_local_ - start_idx_in_src)); + num_elems = std::min(num_elems, (int)(dest_size - start_idx_dest)); } if(num_elems == 0) { @@ -414,28 +396,28 @@ void hiopVectorHip::startingAtCopyToStartingAt(index_type start_idx_in_src, } auto& dest_hip = dynamic_cast(dest); - dest_hip.exec_space().copy(dest_hip.data_+start_idx_dest, data_+start_idx_in_src, num_elems, exec_space_); + dest_hip.exec_space().copy(dest_hip.data_ + start_idx_dest, data_ + start_idx_in_src, num_elems, exec_space_); } /** -* @brief Copy 'this' (source) starting at 'start_idx_in_src' to 'dest' starting at index 'int start_idx_dest' -* The values are copy to 'dest' where the corresponding entry in 'selec_dest' is nonzero -*/ + * @brief Copy 'this' (source) starting at 'start_idx_in_src' to 'dest' starting at index 'int start_idx_dest' + * The values are copy to 'dest' where the corresponding entry in 'selec_dest' is nonzero + */ void hiopVectorHip::startingAtCopyToStartingAt_w_pattern(index_type start_idx_in_src, - hiopVector& destination, - index_type start_idx_dest, - const hiopVector& selec_dest, - size_type num_elems/*=-1*/) const + hiopVector& destination, + index_type start_idx_dest, + const hiopVector& selec_dest, + size_type num_elems /*=-1*/) const { - assert(false&&"TODO --- only used in the full linear system"); + assert(false && "TODO --- only used in the full linear system"); } /** @brief Return the two norm */ double hiopVectorHip::twonorm() const { - int one = 1; + int one = 1; double nrm = 0.; - if(n_local_>0) { + if(n_local_ > 0) { hipblasStatus_t ret_hipblas = hipblasDnrm2(handle_hipblas_, n_local_, data_, one, &nrm); assert(ret_hipblas == HIPBLAS_STATUS_SUCCESS); } @@ -443,9 +425,10 @@ double hiopVectorHip::twonorm() const #ifdef HIOP_USE_MPI nrm *= nrm; double nrmG; - int ierr = MPI_Allreduce(&nrm, &nrmG, 1, MPI_DOUBLE, MPI_SUM, comm_); assert(MPI_SUCCESS==ierr); + int ierr = MPI_Allreduce(&nrm, &nrmG, 1, MPI_DOUBLE, MPI_SUM, comm_); + assert(MPI_SUCCESS == ierr); nrm = std::sqrt(nrmG); -#endif +#endif return nrm; } @@ -456,7 +439,7 @@ double hiopVectorHip::infnorm() const #ifdef HIOP_USE_MPI double nrm_global; int ierr = MPI_Allreduce(&nrm, &nrm_global, 1, MPI_DOUBLE, MPI_MAX, comm_); - assert(MPI_SUCCESS==ierr); + assert(MPI_SUCCESS == ierr); return nrm_global; #endif @@ -464,10 +447,7 @@ double hiopVectorHip::infnorm() const } /** @brief inf norm on single rank */ -double hiopVectorHip::infnorm_local() const -{ - return hiop::hip::infnorm_local_kernel(n_local_, data_); -} +double hiopVectorHip::infnorm_local() const { return hiop::hip::infnorm_local_kernel(n_local_, data_); } /** @brief Return the one norm */ double hiopVectorHip::onenorm() const @@ -475,47 +455,42 @@ double hiopVectorHip::onenorm() const double norm1 = onenorm_local(); #ifdef HIOP_USE_MPI double nrm1_global; - int ierr = MPI_Allreduce(&norm1, &nrm1_global, 1, MPI_DOUBLE, MPI_SUM, comm_); assert(MPI_SUCCESS==ierr); + int ierr = MPI_Allreduce(&norm1, &nrm1_global, 1, MPI_DOUBLE, MPI_SUM, comm_); + assert(MPI_SUCCESS == ierr); return nrm1_global; #endif return norm1; } /** @brief L1 norm on single rank */ -double hiopVectorHip::onenorm_local() const -{ - return hiop::hip::onenorm_local_kernel(n_local_, data_); -} +double hiopVectorHip::onenorm_local() const { return hiop::hip::onenorm_local_kernel(n_local_, data_); } /** @brief Multiply the components of this by the components of v. */ -void hiopVectorHip::componentMult( const hiopVector& vec ) +void hiopVectorHip::componentMult(const hiopVector& vec) { assert(n_local_ == vec.get_local_size()); hiop::hip::thrust_component_mult_kernel(n_local_, data_, vec.local_data_const()); } /** @brief Divide the components of this hiopVector by the components of v. */ -void hiopVectorHip::componentDiv( const hiopVector& vec ) +void hiopVectorHip::componentDiv(const hiopVector& vec) { assert(n_local_ == vec.get_local_size()); hiop::hip::thrust_component_div_kernel(n_local_, data_, vec.local_data_const()); } /** -* @brief Elements of this that corespond to nonzeros in ix are divided by elements of v. -* The rest of elements of this are set to zero. -*/ -void hiopVectorHip::componentDiv_w_selectPattern( const hiopVector& vec, const hiopVector& select) + * @brief Elements of this that corespond to nonzeros in ix are divided by elements of v. + * The rest of elements of this are set to zero. + */ +void hiopVectorHip::componentDiv_w_selectPattern(const hiopVector& vec, const hiopVector& select) { assert(n_local_ == vec.get_local_size()); hiop::hip::component_div_w_pattern_kernel(n_local_, data_, vec.local_data_const(), select.local_data_const()); } /** @brief Set each component of this hiopVector to the minimum of itself and the given constant. */ -void hiopVectorHip::component_min(const double constant) -{ - hiop::hip::component_min_kernel(n_local_, data_, constant); -} +void hiopVectorHip::component_min(const double constant) { hiop::hip::component_min_kernel(n_local_, data_, constant); } /** @brief Set each component of this hiopVector to the minimum of itself and the corresponding component of 'v'. */ void hiopVectorHip::component_min(const hiopVector& vec) @@ -526,10 +501,7 @@ void hiopVectorHip::component_min(const hiopVector& vec) } /** @brief Set each component of this hiopVector to the maximum of itself and the given constant. */ -void hiopVectorHip::component_max(const double constant) -{ - hiop::hip::component_max_kernel(n_local_, data_, constant); -} +void hiopVectorHip::component_max(const double constant) { hiop::hip::component_max_kernel(n_local_, data_, constant); } /** @brief Set each component of this hiopVector to the maximum of itself and the corresponding component of 'v'. */ void hiopVectorHip::component_max(const hiopVector& vec) @@ -537,32 +509,23 @@ void hiopVectorHip::component_max(const hiopVector& vec) assert(vec.get_local_size() == n_local_); const double* vd = vec.local_data_const(); - + hiop::hip::component_max_kernel(n_local_, data_, vd); } /** @brief Set each component to its absolute value */ -void hiopVectorHip::component_abs() -{ - hiop::hip::thrust_component_abs_kernel(n_local_, data_); -} +void hiopVectorHip::component_abs() { hiop::hip::thrust_component_abs_kernel(n_local_, data_); } /** @brief Apply sign function to each component */ -void hiopVectorHip::component_sgn () -{ - hiop::hip::thrust_component_sgn_kernel(n_local_, data_); -} +void hiopVectorHip::component_sgn() { hiop::hip::thrust_component_sgn_kernel(n_local_, data_); } /** @brief compute sqrt of each component */ -void hiopVectorHip::component_sqrt() -{ - hiop::hip::thrust_component_sqrt_kernel(n_local_, data_); -} +void hiopVectorHip::component_sqrt() { hiop::hip::thrust_component_sqrt_kernel(n_local_, data_); } /// @brief Scale each element of this by the constant alpha void hiopVectorHip::scale(double alpha) { - int one = 1; + int one = 1; hipblasStatus_t ret_hipblas = hipblasDscal(handle_hipblas_, n_local_, &alpha, data_, one); assert(ret_hipblas == HIPBLAS_STATUS_SUCCESS); } @@ -576,7 +539,7 @@ void hiopVectorHip::axpy(double alpha, const hiopVector& xvec) } /// @brief this += alpha * x, for the entries in 'this' where corresponding 'select' is nonzero. -void hiopVectorHip::axpy_w_pattern(double alpha, const hiopVector& xvec, const hiopVector& select) +void hiopVectorHip::axpy_w_pattern(double alpha, const hiopVector& xvec, const hiopVector& select) { axpy(alpha, xvec); componentMult(select); @@ -585,10 +548,10 @@ void hiopVectorHip::axpy_w_pattern(double alpha, const hiopVector& xvec, const h /// @brief Performs axpy, this += alpha*x, on the indexes in this specified by i. void hiopVectorHip::axpy(double alpha, const hiopVector& xvec, const hiopVectorInt& i) { - assert(xvec.get_size()==i.get_local_size()); - assert(xvec.get_local_size()==i.get_local_size()); - assert(i.get_local_size()<=n_local_); - + assert(xvec.get_size() == i.get_local_size()); + assert(xvec.get_local_size() == i.get_local_size()); + assert(i.get_local_size() <= n_local_); + double* yd = data_; const double* xd = const_cast(xvec.local_data_const()); int* id = const_cast(i.local_data_const()); @@ -601,9 +564,9 @@ void hiopVectorHip::axzpy(double alpha, const hiopVector& xvec, const hiopVector { #ifdef HIOP_DEEPCHECKS assert(xvec.get_local_size() == zvec.get_local_size()); - assert( n_local_ == zvec.get_local_size()); -#endif - double* dd = data_; + assert(n_local_ == zvec.get_local_size()); +#endif + double* dd = data_; const double* xd = xvec.local_data_const(); const double* zd = zvec.local_data_const(); @@ -615,9 +578,9 @@ void hiopVectorHip::axdzpy(double alpha, const hiopVector& xvec, const hiopVecto { #ifdef HIOP_DEEPCHECKS assert(xvec.get_local_size() == zvec.get_local_size()); - assert( n_local_ == zvec.get_local_size()); -#endif - double* yd = data_; + assert(n_local_ == zvec.get_local_size()); +#endif + double* yd = data_; const double* xd = xvec.local_data_const(); const double* zd = zvec.local_data_const(); @@ -625,14 +588,11 @@ void hiopVectorHip::axdzpy(double alpha, const hiopVector& xvec, const hiopVecto } /** @brief this[i] += alpha*x[i]/z[i] forall i with pattern selection */ -void hiopVectorHip::axdzpy_w_pattern(double alpha, - const hiopVector& xvec, - const hiopVector& zvec, - const hiopVector& select) +void hiopVectorHip::axdzpy_w_pattern(double alpha, const hiopVector& xvec, const hiopVector& zvec, const hiopVector& select) { #ifdef HIOP_DEEPCHECKS - assert(xvec.get_local_size()==zvec.get_local_size()); - assert( n_local_==zvec.get_local_size()); + assert(xvec.get_local_size() == zvec.get_local_size()); + assert(n_local_ == zvec.get_local_size()); #endif double* yd = data_; const double* xd = xvec.local_data_const(); @@ -643,13 +603,10 @@ void hiopVectorHip::axdzpy_w_pattern(double alpha, } /** @brief this[i] += c forall i */ -void hiopVectorHip::addConstant(double c) -{ - hiop::hip::add_constant_kernel(n_local_, data_, c); -} +void hiopVectorHip::addConstant(double c) { hiop::hip::add_constant_kernel(n_local_, data_, c); } /** @brief this[i] += c forall i with pattern selection */ -void hiopVectorHip::addConstant_w_patternSelect(double c, const hiopVector& select) +void hiopVectorHip::addConstant_w_patternSelect(double c, const hiopVector& select) { assert(this->n_local_ == select.get_local_size()); const double* id = select.local_data_const(); @@ -658,17 +615,17 @@ void hiopVectorHip::addConstant_w_patternSelect(double c, const hiopVector& sel } /** @brief Return the dot product of this hiopVector with v */ -double hiopVectorHip::dotProductWith( const hiopVector& v ) const +double hiopVectorHip::dotProductWith(const hiopVector& v) const { int one = 1; - double retval; + double retval; hipblasStatus_t ret_hipblas = hipblasDdot(handle_hipblas_, n_local_, v.local_data_const(), one, data_, one, &retval); assert(ret_hipblas == HIPBLAS_STATUS_SUCCESS); #ifdef HIOP_USE_MPI double dotprodG; int ierr = MPI_Allreduce(&retval, &dotprodG, 1, MPI_DOUBLE, MPI_SUM, comm_); - assert(MPI_SUCCESS==ierr); + assert(MPI_SUCCESS == ierr); retval = dotprodG; #endif @@ -676,16 +633,10 @@ double hiopVectorHip::dotProductWith( const hiopVector& v ) const } /// @brief Negate all the elements of this -void hiopVectorHip::negate() -{ - hiop::hip::thrust_negate_kernel(n_local_, data_); -} +void hiopVectorHip::negate() { hiop::hip::thrust_negate_kernel(n_local_, data_); } /// @brief Invert (1/x) the elements of this -void hiopVectorHip::invert() -{ - hiop::hip::invert_kernel(n_local_, data_); -} +void hiopVectorHip::invert() { hiop::hip::invert_kernel(n_local_, data_); } /** @brief Sum all selected log(this[i]) */ double hiopVectorHip::logBarrier_local(const hiopVector& select) const @@ -697,9 +648,7 @@ double hiopVectorHip::logBarrier_local(const hiopVector& select) const } /* @brief adds the gradient of the log barrier, namely this=this+alpha*1/select(x) */ -void hiopVectorHip::addLogBarrierGrad(double alpha, - const hiopVector& xvec, - const hiopVector& select) +void hiopVectorHip::addLogBarrierGrad(double alpha, const hiopVector& xvec, const hiopVector& select) { #ifdef HIOP_DEEPCHECKS assert(n_local_ == xvec.get_local_size()); @@ -713,16 +662,13 @@ void hiopVectorHip::addLogBarrierGrad(double alpha, } /** @brief Sum all elements */ -double hiopVectorHip::sum_local() const -{ - return hiop::hip::thrust_sum_kernel(n_local_, data_); -} +double hiopVectorHip::sum_local() const { return hiop::hip::thrust_sum_kernel(n_local_, data_); } /** @brief Linear damping term */ double hiopVectorHip::linearDampingTerm_local(const hiopVector& ixleft, - const hiopVector& ixright, - const double& mu, - const double& kappa_d) const + const hiopVector& ixright, + const double& mu, + const double& kappa_d) const { #ifdef HIOP_DEEPCHECKS assert(n_local_ == ixleft.get_local_size()); @@ -735,21 +681,20 @@ double hiopVectorHip::linearDampingTerm_local(const hiopVector& ixleft, return hiop::hip::linear_damping_term_kernel(n_local_, vd, ld, rd, mu, kappa_d); } -/** -* @brief Performs `this[i] = alpha*this[i] + sign*ct` where sign=1 when EXACTLY one of -* ixleft[i] and ixright[i] is 1.0 and sign=0 otherwise. -*/ +/** + * @brief Performs `this[i] = alpha*this[i] + sign*ct` where sign=1 when EXACTLY one of + * ixleft[i] and ixright[i] is 1.0 and sign=0 otherwise. + */ void hiopVectorHip::addLinearDampingTerm(const hiopVector& ixleft, - const hiopVector& ixright, - const double& alpha, - const double& ct) + const hiopVector& ixright, + const double& alpha, + const double& ct) { - assert(ixleft.get_local_size() == n_local_); assert(ixright.get_local_size() == n_local_); - const double* ixl= ixleft.local_data_const(); - const double* ixr= ixright.local_data_const(); + const double* ixl = ixleft.local_data_const(); + const double* ixr = ixright.local_data_const(); double* data = data_; @@ -766,7 +711,8 @@ int hiopVectorHip::allPositive() #ifdef HIOP_USE_MPI int allPosG; - int ierr=MPI_Allreduce(&allPos, &allPosG, 1, MPI_INT, MPI_MIN, comm_); assert(MPI_SUCCESS==ierr); + int ierr = MPI_Allreduce(&allPos, &allPosG, 1, MPI_INT, MPI_MIN, comm_); + assert(MPI_SUCCESS == ierr); return allPosG; #endif return allPos; @@ -777,21 +723,21 @@ int hiopVectorHip::allPositive_w_patternSelect(const hiopVector& wvec) { #ifdef HIOP_DEEPCHECKS assert(wvec.get_local_size() == n_local_); -#endif +#endif const double* id = wvec.local_data_const(); const double* data = data_; int allPos = hiop::hip::all_positive_w_pattern_kernel(n_local_, data, id); - - allPos = (allPos==n_local_) ? 1 : 0; - + + allPos = (allPos == n_local_) ? 1 : 0; + #ifdef HIOP_USE_MPI int allPosG; int ierr = MPI_Allreduce(&allPos, &allPosG, 1, MPI_INT, MPI_MIN, comm_); - assert(MPI_SUCCESS==ierr); + assert(MPI_SUCCESS == ierr); return allPosG; -#endif +#endif return allPos; } @@ -802,7 +748,8 @@ double hiopVectorHip::min() const #ifdef HIOP_USE_MPI double resultG; - double ierr=MPI_Allreduce(&result, &resultG, 1, MPI_DOUBLE, MPI_MIN, comm_); assert(MPI_SUCCESS==ierr); + double ierr = MPI_Allreduce(&result, &resultG, 1, MPI_DOUBLE, MPI_MIN, comm_); + assert(MPI_SUCCESS == ierr); return resultG; #endif return result; @@ -814,37 +761,35 @@ double hiopVectorHip::min_w_pattern(const hiopVector& select) const assert(this->n_local_ == select.get_local_size()); const double* data = data_; const double* id = select.local_data_const(); - + double max_val = std::numeric_limits::max(); double result = hiop::hip::min_w_pattern_kernel(n_local_, data, id, max_val); #ifdef HIOP_USE_MPI double resultG; - double ierr=MPI_Allreduce(&result, &resultG, 1, MPI_DOUBLE, MPI_MIN, comm_); assert(MPI_SUCCESS==ierr); + double ierr = MPI_Allreduce(&result, &resultG, 1, MPI_DOUBLE, MPI_MIN, comm_); + assert(MPI_SUCCESS == ierr); return resultG; #endif return result; } /// @brief Return the minimum value in this vector, and the index at which it occurs. TODO -void hiopVectorHip::min( double& /* m */, int& /* index */) const -{ - assert(false && "not implemented"); -} +void hiopVectorHip::min(double& /* m */, int& /* index */) const { assert(false && "not implemented"); } /** @brief Project solution into bounds */ -bool hiopVectorHip::projectIntoBounds_local(const hiopVector& xlo, - const hiopVector& ixl, - const hiopVector& xup, - const hiopVector& ixu, - double kappa1, - double kappa2) +bool hiopVectorHip::projectIntoBounds_local(const hiopVector& xlo, + const hiopVector& ixl, + const hiopVector& xup, + const hiopVector& ixu, + double kappa1, + double kappa2) { #ifdef HIOP_DEEPCHECKS assert(xlo.get_local_size() == n_local_); assert(ixl.get_local_size() == n_local_); - assert(xup.get_local_size()== n_local_); - assert(ixu.get_local_size()== n_local_); + assert(xup.get_local_size() == n_local_); + assert(ixu.get_local_size() == n_local_); #endif const double* xld = xlo.local_data_const(); @@ -852,15 +797,14 @@ bool hiopVectorHip::projectIntoBounds_local(const hiopVector& xlo, const double* xud = xup.local_data_const(); const double* iud = ixu.local_data_const(); double* xd = data_; - + // Perform preliminary check to see of all upper value < lower value bool bval = hiop::hip::check_bounds_kernel(n_local_, xld, xud); - if(false == bval) - return false; + if(false == bval) return false; const double small_real = std::numeric_limits::min() * 100; - + hiop::hip::project_into_bounds_kernel(n_local_, xd, xld, ild, xud, iud, kappa1, kappa2, small_real); return true; @@ -872,7 +816,7 @@ double hiopVectorHip::fractionToTheBdry_local(const hiopVector& dvec, const doub #ifdef HIOP_DEEPCHECKS assert(dvec.get_local_size() == n_local_); assert(tau > 0); - assert(tau < 1); // TODO: per documentation above it should be tau <= 1 (?). + assert(tau < 1); // TODO: per documentation above it should be tau <= 1 (?). #endif const double* dd = dvec.local_data_const(); @@ -885,14 +829,14 @@ double hiopVectorHip::fractionToTheBdry_local(const hiopVector& dvec, const doub /** @brief max{a\in(0,1]| x+ad >=(1-tau)x} with pattern select */ double hiopVectorHip::fractionToTheBdry_w_pattern_local(const hiopVector& dvec, - const double& tau, - const hiopVector& select) const + const double& tau, + const hiopVector& select) const { #ifdef HIOP_DEEPCHECKS assert(dvec.get_local_size() == n_local_); assert(select.get_local_size() == n_local_); - assert(tau>0); - assert(tau<1); + assert(tau > 0); + assert(tau < 1); #endif const double* dd = dvec.local_data_const(); const double* xd = data_; @@ -907,7 +851,7 @@ double hiopVectorHip::fractionToTheBdry_w_pattern_local(const hiopVector& dvec, void hiopVectorHip::selectPattern(const hiopVector& select) { #ifdef HIOP_DEEPCHECKS - assert(select.get_local_size()==n_local_); + assert(select.get_local_size() == n_local_); #endif double* data = data_; @@ -921,7 +865,7 @@ void hiopVectorHip::selectPattern(const hiopVector& select) bool hiopVectorHip::matchesPattern(const hiopVector& pattern) { #ifdef HIOP_DEEPCHECKS - assert(pattern.get_local_size()==n_local_); + assert(pattern.get_local_size() == n_local_); #endif double* xd = data_; @@ -932,61 +876,49 @@ bool hiopVectorHip::matchesPattern(const hiopVector& pattern) #ifdef HIOP_USE_MPI int mismatch_glob = bret; int ierr = MPI_Allreduce(&bret, &mismatch_glob, 1, MPI_INT, MPI_MIN, comm_); - assert(MPI_SUCCESS==ierr); + assert(MPI_SUCCESS == ierr); return (mismatch_glob != 0); #endif return bret; } /** @brief Adjusts duals. */ -void hiopVectorHip::adjustDuals_plh(const hiopVector& xvec, - const hiopVector& ixvec, - const double& mu, - const double& kappa) +void hiopVectorHip::adjustDuals_plh(const hiopVector& xvec, const hiopVector& ixvec, const double& mu, const double& kappa) { #ifdef HIOP_DEEPCHECKS - assert(xvec.get_local_size()==n_local_); - assert(ixvec.get_local_size()==n_local_); + assert(xvec.get_local_size() == n_local_); + assert(ixvec.get_local_size() == n_local_); #endif - const double* xd = xvec.local_data_const(); + const double* xd = xvec.local_data_const(); const double* id = ixvec.local_data_const(); - double* zd = data_; //the dual + double* zd = data_; // the dual hiop::hip::adjustDuals_plh_kernel(n_local_, zd, xd, id, mu, kappa); } /** @brief Check if all elements of the vector are zero */ -bool hiopVectorHip::is_zero() const -{ - return hiop::hip::is_zero_kernel(n_local_, data_); -} +bool hiopVectorHip::is_zero() const { return hiop::hip::is_zero_kernel(n_local_, data_); } /** @brief Returns true if any element of `this` is NaN. */ -bool hiopVectorHip::isnan_local() const -{ - return hiop::hip::isnan_kernel(n_local_, data_); -} +bool hiopVectorHip::isnan_local() const { return hiop::hip::isnan_kernel(n_local_, data_); } /** * @brief Returns true if any element of `this` is Inf. - * + * * @post `this` is not modified - * + * * @warning This is local method only! */ -bool hiopVectorHip::isinf_local() const -{ - return hiop::hip::isinf_kernel(n_local_, data_); -} +bool hiopVectorHip::isinf_local() const { return hiop::hip::isinf_kernel(n_local_, data_); } /** @brief Returns true if all elements of `this` are finite. */ -bool hiopVectorHip::isfinite_local() const -{ - return hiop::hip::isfinite_kernel(n_local_, data_); -} +bool hiopVectorHip::isfinite_local() const { return hiop::hip::isfinite_kernel(n_local_, data_); } /** @brief Prints vector data to a file in Matlab format. */ -void hiopVectorHip::print(FILE* file/*=nullptr*/, const char* msg/*=nullptr*/, int max_elems/*=-1*/, int rank/*=-1*/) const +void hiopVectorHip::print(FILE* file /*=nullptr*/, + const char* msg /*=nullptr*/, + int max_elems /*=-1*/, + int rank /*=-1*/) const { // TODO. no fprintf. use printf to print everything on screen? // Alternative: create a hiopVectorPar copy and use hiopVectorPar::print @@ -996,36 +928,32 @@ void hiopVectorHip::print(FILE* file/*=nullptr*/, const char* msg/*=nullptr*/, i /// @brief allocates a vector that mirrors this, but doesn't copy the values hiopVector* hiopVectorHip::alloc_clone() const { - hiopVector* v = new hiopVectorHip(*this); assert(v); + hiopVector* v = new hiopVectorHip(*this); + assert(v); return v; } /// @brief allocates a vector that mirrors this, and copies the values -hiopVector* hiopVectorHip::new_copy () const +hiopVector* hiopVectorHip::new_copy() const { - hiopVector* v = new hiopVectorHip(*this); assert(v); + hiopVector* v = new hiopVectorHip(*this); + assert(v); v->copyFrom(*this); return v; } /// @brief copy data from host mirror to device -void hiopVectorHip::copyToDev() -{ - exec_space_.copy(data_, data_host_mirror_, n_local_, exec_space_host_); -} +void hiopVectorHip::copyToDev() { exec_space_.copy(data_, data_host_mirror_, n_local_, exec_space_host_); } /// @brief copy data from device to host mirror -void hiopVectorHip::copyFromDev() -{ - exec_space_host_.copy(data_host_mirror_, data_, n_local_, exec_space_); -} +void hiopVectorHip::copyFromDev() { exec_space_host_.copy(data_host_mirror_, data_, n_local_, exec_space_); } /// @brief copy data from host mirror to device void hiopVectorHip::copyToDev() const { auto* this_nonconst = const_cast(this); assert(nullptr != this_nonconst); - + this_nonconst->copyToDev(); } @@ -1039,23 +967,23 @@ void hiopVectorHip::copyFromDev() const } /// @brief get number of values that are less than the given value 'val'. TODO: add unit test -size_type hiopVectorHip::numOfElemsLessThan(const double &val) const +size_type hiopVectorHip::numOfElemsLessThan(const double& val) const { return hiop::hip::num_of_elem_less_than_kernel(n_local_, data_, val); } /// @brief get number of values whose absolute value are less than the given value 'val'. TODO: add unit test -size_type hiopVectorHip::numOfElemsAbsLessThan(const double &val) const +size_type hiopVectorHip::numOfElemsAbsLessThan(const double& val) const { return hiop::hip::num_of_elem_absless_than_kernel(n_local_, data_, val); } /// @brief set int array 'arr', starting at `start` and ending at `end`, to the values in `arr_src` from 'start_src` -void hiopVectorHip::set_array_from_to(hiopInterfaceBase::NonlinearityType* arr, - const int start, - const int end, - const hiopInterfaceBase::NonlinearityType* arr_src, - const int start_src) const +void hiopVectorHip::set_array_from_to(hiopInterfaceBase::NonlinearityType* arr, + const int start, + const int end, + const hiopInterfaceBase::NonlinearityType* arr_src, + const int start_src) const { assert(arr && arr_src); assert(end <= n_local_ && start <= end && start >= 0); @@ -1069,10 +997,10 @@ void hiopVectorHip::set_array_from_to(hiopInterfaceBase::NonlinearityType* arr, } /// @brief set int array 'arr', starting at `start` and ending at `end`, to the values in `arr_src` from 'start_src` -void hiopVectorHip::set_array_from_to(hiopInterfaceBase::NonlinearityType* arr, - const int start, - const int end, - const hiopInterfaceBase::NonlinearityType arr_src) const +void hiopVectorHip::set_array_from_to(hiopInterfaceBase::NonlinearityType* arr, + const int start, + const int end, + const hiopInterfaceBase::NonlinearityType arr_src) const { assert(arr && arr_src); assert(end <= n_local_ && start <= end && start >= 0); @@ -1086,11 +1014,6 @@ void hiopVectorHip::set_array_from_to(hiopInterfaceBase::NonlinearityType* arr, } /// @brief check if `this` vector is identical to `vec` -bool hiopVectorHip::is_equal(const hiopVector& vec) const -{ - assert(false&&"NOT needed. Remove this func. TODO"); -} - - -} // namespace hiop +bool hiopVectorHip::is_equal(const hiopVector& vec) const { assert(false && "NOT needed. Remove this func. TODO"); } +} // namespace hiop diff --git a/src/LinAlg/hiopVectorHip.hpp b/src/LinAlg/hiopVectorHip.hpp index addaf1d03..648ba8d0a 100644 --- a/src/LinAlg/hiopVectorHip.hpp +++ b/src/LinAlg/hiopVectorHip.hpp @@ -2,47 +2,47 @@ // Produced at the Lawrence Livermore National Laboratory (LLNL). // LLNL-CODE-742473. All rights reserved. // -// This file is part of HiOp. For details, see https://github.com/LLNL/hiop. HiOp -// is released under the BSD 3-clause license (https://opensource.org/licenses/BSD-3-Clause). +// This file is part of HiOp. For details, see https://github.com/LLNL/hiop. HiOp +// is released under the BSD 3-clause license (https://opensource.org/licenses/BSD-3-Clause). // Please also read "Additional BSD Notice" below. // -// Redistribution and use in source and binary forms, with or without modification, +// Redistribution and use in source and binary forms, with or without modification, // are permitted provided that the following conditions are met: -// i. Redistributions of source code must retain the above copyright notice, this list +// i. Redistributions of source code must retain the above copyright notice, this list // of conditions and the disclaimer below. -// ii. Redistributions in binary form must reproduce the above copyright notice, -// this list of conditions and the disclaimer (as noted below) in the documentation and/or +// ii. Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the disclaimer (as noted below) in the documentation and/or // other materials provided with the distribution. -// iii. Neither the name of the LLNS/LLNL nor the names of its contributors may be used to -// endorse or promote products derived from this software without specific prior written +// iii. Neither the name of the LLNS/LLNL nor the names of its contributors may be used to +// endorse or promote products derived from this software without specific prior written // permission. // -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES -// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT -// SHALL LAWRENCE LIVERMORE NATIONAL SECURITY, LLC, THE U.S. DEPARTMENT OF ENERGY OR -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS -// OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED -// AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT +// SHALL LAWRENCE LIVERMORE NATIONAL SECURITY, LLC, THE U.S. DEPARTMENT OF ENERGY OR +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +// OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED +// AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, // EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Additional BSD Notice -// 1. This notice is required to be provided under our contract with the U.S. Department -// of Energy (DOE). This work was produced at Lawrence Livermore National Laboratory under +// 1. This notice is required to be provided under our contract with the U.S. Department +// of Energy (DOE). This work was produced at Lawrence Livermore National Laboratory under // Contract No. DE-AC52-07NA27344 with the DOE. -// 2. Neither the United States Government nor Lawrence Livermore National Security, LLC -// nor any of their employees, makes any warranty, express or implied, or assumes any -// liability or responsibility for the accuracy, completeness, or usefulness of any +// 2. Neither the United States Government nor Lawrence Livermore National Security, LLC +// nor any of their employees, makes any warranty, express or implied, or assumes any +// liability or responsibility for the accuracy, completeness, or usefulness of any // information, apparatus, product, or process disclosed, or represents that its use would // not infringe privately-owned rights. -// 3. Also, reference herein to any specific commercial products, process, or services by -// trade name, trademark, manufacturer or otherwise does not necessarily constitute or -// imply its endorsement, recommendation, or favoring by the United States Government or -// Lawrence Livermore National Security, LLC. The views and opinions of authors expressed -// herein do not necessarily state or reflect those of the United States Government or -// Lawrence Livermore National Security, LLC, and shall not be used for advertising or +// 3. Also, reference herein to any specific commercial products, process, or services by +// trade name, trademark, manufacturer or otherwise does not necessarily constitute or +// imply its endorsement, recommendation, or favoring by the United States Government or +// Lawrence Livermore National Security, LLC. The views and opinions of authors expressed +// herein do not necessarily state or reflect those of the United States Government or +// Lawrence Livermore National Security, LLC, and shall not be used for advertising or // product endorsement purposes. /** @@ -69,19 +69,19 @@ namespace hiop { -//Forward declarations +// Forward declarations class hiopVectorPar; -//Forward declarations of tester classes that needs to be friends with this vector +// Forward declarations of tester classes that needs to be friends with this vector namespace tests { class VectorTestsHip; } - + /// Implementation of abstract class hiopVector using HIP API class hiopVectorHip : public hiopVector { public: - hiopVectorHip(const size_type& glob_n, index_type* col_part=nullptr, MPI_Comm comm=MPI_COMM_SELF); + hiopVectorHip(const size_type& glob_n, index_type* col_part = nullptr, MPI_Comm comm = MPI_COMM_SELF); virtual ~hiopVectorHip(); /// @brief Set all elements to zero. @@ -105,16 +105,16 @@ class hiopVectorHip : public hiopVector /// @brief Copy the 'n' elements of v starting at 'start_index_in_v' into 'this' virtual void copy_from_starting_at(const double* v, int start_index_in_v, int n); - /// @brief Copy from src the elements specified by the indices in index_in_src. + /// @brief Copy from src the elements specified by the indices in index_in_src. virtual void copy_from_indexes(const hiopVector& src, const hiopVectorInt& index_in_src); - /// @brief Copy from src the elements specified by the indices in index_in_src. + /// @brief Copy from src the elements specified by the indices in index_in_src. virtual void copy_from_indexes(const double* src, const hiopVectorInt& index_in_src); /// @brief Copy entries from a hiopVectorPar, see method documentation in the parent class. void copy_from_vectorpar(const hiopVectorPar& vsrc); /// @brief Copy entries to a hiopVectorPar, see method documentation in the parent class. void copy_to_vectorpar(hiopVectorPar& vdest) const; - + /// @brief Copy from 'v' starting at 'start_idx_src' to 'this' starting at 'start_idx_dest' virtual void startingAtCopyFromStartingAt(int start_idx_dest, const hiopVector& v, int start_idx_src); @@ -128,34 +128,37 @@ class hiopVectorHip : public hiopVector virtual void copyToStartingAt_w_pattern(hiopVector& vec, int start_index_in_dest, const hiopVector& ix) const; /// @brief Copy the entries in `c` and `d` to `this`, according to the mapping in `c_map` and `d_map` - virtual void copy_from_two_vec_w_pattern(const hiopVector& c, - const hiopVectorInt& c_map, - const hiopVector& d, + virtual void copy_from_two_vec_w_pattern(const hiopVector& c, + const hiopVectorInt& c_map, + const hiopVector& d, const hiopVectorInt& d_map); /// @brief Copy the entries in `this` to `c` and `d`, according to the mapping `c_map` and `d_map` - virtual void copy_to_two_vec_w_pattern(hiopVector& c, - const hiopVectorInt& c_map, - hiopVector& d, + virtual void copy_to_two_vec_w_pattern(hiopVector& c, + const hiopVectorInt& c_map, + hiopVector& d, const hiopVectorInt& d_map) const; /** - * copy 'this' (source) starting at 'start_idx_in_src' to 'dest' starting at index 'int start_idx_dest' + * copy 'this' (source) starting at 'start_idx_in_src' to 'dest' starting at index 'int start_idx_dest' * If num_elems>=0, 'num_elems' will be copied; if num_elems<0, elements will be copied till the end of * either source ('this') or destination ('dest') is reached - */ - virtual void startingAtCopyToStartingAt(int start_idx_in_src, hiopVector& dest, int start_idx_dest, int num_elems=-1) const; + */ + virtual void startingAtCopyToStartingAt(int start_idx_in_src, + hiopVector& dest, + int start_idx_dest, + int num_elems = -1) const; /** * @brief Copy 'this' (source) starting at 'start_idx_in_src' to 'dest' starting at index 'int start_idx_dest' * If num_elems>=0, 'num_elems' will be copied; if num_elems<0, elements will be copied till the end of * either source ('this') or destination ('dest') is reached * The values are copy to 'dest' where the corresponding entry in 'selec_dest' is nonzero - */ + */ virtual void startingAtCopyToStartingAt_w_pattern(int start_idx_in_src, hiopVector& dest, int start_idx_dest, const hiopVector& selec_dest, - int num_elems=-1) const; + int num_elems = -1) const; /** @brief Return the two norm */ virtual double twonorm() const; @@ -168,15 +171,15 @@ class hiopVectorHip : public hiopVector /** @brief L1 norm on single rank */ virtual double onenorm_local() const; /** @brief Multiply the components of this by the components of v. */ - virtual void componentMult( const hiopVector& v ); + virtual void componentMult(const hiopVector& v); /** @brief Divide the components of this hiopVector by the components of v. */ - virtual void componentDiv ( const hiopVector& v ); + virtual void componentDiv(const hiopVector& v); /** * @brief Elements of this that corespond to nonzeros in ix are divided by elements of v. * The rest of elements of this are set to zero. */ - virtual void componentDiv_w_selectPattern( const hiopVector& v, const hiopVector& ix); + virtual void componentDiv_w_selectPattern(const hiopVector& v, const hiopVector& ix); /** @brief Set each component of this hiopVector to the minimum of itself and the given constant. */ virtual void component_min(const double constant); @@ -202,10 +205,10 @@ class hiopVectorHip : public hiopVector /** * @brief Performs axpy, this += alpha*x, on the indexes in this specified by i. - * - * @param alpha scaling factor + * + * @param alpha scaling factor * @param x vector of doubles to be axpy-ed to this (size equal to size of i and less than or equal to size of this) - * @param i vector of indexes in this to which the axpy operation is performed (size equal to size of x and less than + * @param i vector of indexes in this to which the axpy operation is performed (size equal to size of x and less than * or equal to size of this) * * @pre The entries of i must be valid (zero-based) indexes in this @@ -214,14 +217,11 @@ class hiopVectorHip : public hiopVector virtual void axpy(double alpha, const hiopVector& xvec, const hiopVectorInt& i); /// @brief this += alpha * x * z - virtual void axzpy (double alpha, const hiopVector& xvec, const hiopVector& zvec); + virtual void axzpy(double alpha, const hiopVector& xvec, const hiopVector& zvec); /// @brief this += alpha * x / z virtual void axdzpy(double alpha, const hiopVector& xvec, const hiopVector& zvec); /// @brief this += alpha * x / z on entries 'i' for which select[i]==1. - virtual void axdzpy_w_pattern(double alpha, - const hiopVector& xvec, - const hiopVector& zvec, - const hiopVector& select); + virtual void axdzpy_w_pattern(double alpha, const hiopVector& xvec, const hiopVector& zvec, const hiopVector& select); /// @brief Add c to the elements of this virtual void addConstant(double c); virtual void addConstant_w_patternSelect(double c, const hiopVector& select); @@ -239,25 +239,25 @@ class hiopVectorHip : public hiopVector virtual double sum_local() const; /** - * @brief Computes the log barrier's linear damping term of the Filter-IPM method of + * @brief Computes the log barrier's linear damping term of the Filter-IPM method of * WaectherBiegler (see paper, section 3.7). * Essentially compute kappa_d*mu* \sum { this[i] | ixleft[i]==1 and ixright[i]==0 } */ virtual double linearDampingTerm_local(const hiopVector& ixleft, const hiopVector& ixright, - const double& mu, + const double& mu, const double& kappa_d) const; - /** - * Performs `this[i] = alpha*this[i] + sign*ct` where sign=1 when EXACTLY one of - * ixleft[i] and ixright[i] is 1.0 and sign=0 otherwise. + /** + * Performs `this[i] = alpha*this[i] + sign*ct` where sign=1 when EXACTLY one of + * ixleft[i] and ixright[i] is 1.0 and sign=0 otherwise. * * Supports distributed/MPI vectors, but performs only elementwise operations and do not * require communication. * * This method is used to add gradient contributions from the (linear) damping term used - * to handle unbounded problems. The damping terms are used for variables that are - * bounded on one side only. + * to handle unbounded problems. The damping terms are used for variables that are + * bounded on one side only. */ virtual void addLinearDampingTerm(const hiopVector& ixleft, const hiopVector& ixright, @@ -290,10 +290,7 @@ class hiopVectorHip : public hiopVector /// @brief checks whether entries in this matches pattern in ix virtual bool matchesPattern(const hiopVector& select); /// @brief dual adjustment -> see hiopIterate::adjustDuals_primalLogHessian - virtual void adjustDuals_plh(const hiopVector& xvec, - const hiopVector& ixvec, - const double& mu, - const double& kappa); + virtual void adjustDuals_plh(const hiopVector& xvec, const hiopVector& ixvec, const double& mu, const double& kappa); /// @brief True if all elements of this are zero. TODO: add unit test virtual bool is_zero() const; @@ -305,11 +302,11 @@ class hiopVectorHip : public hiopVector virtual bool isfinite_local() const; /// @brief prints up to max_elems (by default all), on rank 'rank' (by default on all) - virtual void print(FILE* file=nullptr, const char* message=nullptr,int max_elems=-1, int rank=-1) const; + virtual void print(FILE* file = nullptr, const char* message = nullptr, int max_elems = -1, int rank = -1) const; /// @brief allocates a vector that mirrors this, but doesn't copy the values virtual hiopVector* alloc_clone() const; /// @brief allocates a vector that mirrors this, and copies the values - virtual hiopVector* new_copy () const; + virtual hiopVector* new_copy() const; /* more accessors */ inline size_type get_local_size() const { return n_local_; } @@ -327,39 +324,30 @@ class hiopVectorHip : public hiopVector public: /// @brief get number of values that are less than the given value 'val'. TODO: add unit test - virtual size_type numOfElemsLessThan(const double &val) const; + virtual size_type numOfElemsLessThan(const double& val) const; /// @brief get number of values whose absolute value are less than the given value 'val'. TODO: add unit test - virtual size_type numOfElemsAbsLessThan(const double &val) const; + virtual size_type numOfElemsAbsLessThan(const double& val) const; /// @brief set int array 'arr', starting at `start` and ending at `end`, to the values in `arr_src` from 'start_src` /// TODO: add unit test - virtual void set_array_from_to(hiopInterfaceBase::NonlinearityType* arr, - const int start, - const int end, + virtual void set_array_from_to(hiopInterfaceBase::NonlinearityType* arr, + const int start, + const int end, const hiopInterfaceBase::NonlinearityType* arr_src, const int start_src) const; - virtual void set_array_from_to(hiopInterfaceBase::NonlinearityType* arr, - const int start, - const int end, + virtual void set_array_from_to(hiopInterfaceBase::NonlinearityType* arr, + const int start, + const int end, const hiopInterfaceBase::NonlinearityType arr_src) const; - - - /// @brief check if `this` vector is identical to `vec` virtual bool is_equal(const hiopVector& vec) const; /* functions for this class */ inline MPI_Comm get_mpi_comm() const { return comm_; } - ExecSpace& exec_space() - { - return exec_space_; - } - const ExecSpace& exec_space() const - { - return exec_space_; - } + ExecSpace& exec_space() { return exec_space_; } + const ExecSpace& exec_space() const { return exec_space_; } private: ExecSpace exec_space_; @@ -378,8 +366,7 @@ class hiopVectorHip : public hiopVector /** copy constructor, for internal/private use only (it doesn't copy the elements.) */ hiopVectorHip(const hiopVectorHip&); - }; -} // namespace hiop +} // namespace hiop #endif diff --git a/src/LinAlg/hiopVectorInt.hpp b/src/LinAlg/hiopVectorInt.hpp index 303d58374..6eadf2466 100644 --- a/src/LinAlg/hiopVectorInt.hpp +++ b/src/LinAlg/hiopVectorInt.hpp @@ -2,47 +2,47 @@ // Produced at the Lawrence Livermore National Laboratory (LLNL). // LLNL-CODE-742473. All rights reserved. // -// This file is part of HiOp. For details, see https://github.com/LLNL/hiop. HiOp -// is released under the BSD 3-clause license (https://opensource.org/licenses/BSD-3-Clause). +// This file is part of HiOp. For details, see https://github.com/LLNL/hiop. HiOp +// is released under the BSD 3-clause license (https://opensource.org/licenses/BSD-3-Clause). // Please also read “Additional BSD Notice” below. // -// Redistribution and use in source and binary forms, with or without modification, +// Redistribution and use in source and binary forms, with or without modification, // are permitted provided that the following conditions are met: -// i. Redistributions of source code must retain the above copyright notice, this list +// i. Redistributions of source code must retain the above copyright notice, this list // of conditions and the disclaimer below. -// ii. Redistributions in binary form must reproduce the above copyright notice, -// this list of conditions and the disclaimer (as noted below) in the documentation and/or +// ii. Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the disclaimer (as noted below) in the documentation and/or // other materials provided with the distribution. -// iii. Neither the name of the LLNS/LLNL nor the names of its contributors may be used to -// endorse or promote products derived from this software without specific prior written +// iii. Neither the name of the LLNS/LLNL nor the names of its contributors may be used to +// endorse or promote products derived from this software without specific prior written // permission. // -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES -// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT -// SHALL LAWRENCE LIVERMORE NATIONAL SECURITY, LLC, THE U.S. DEPARTMENT OF ENERGY OR -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS -// OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED -// AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT +// SHALL LAWRENCE LIVERMORE NATIONAL SECURITY, LLC, THE U.S. DEPARTMENT OF ENERGY OR +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +// OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED +// AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, // EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Additional BSD Notice -// 1. This notice is required to be provided under our contract with the U.S. Department -// of Energy (DOE). This work was produced at Lawrence Livermore National Laboratory under +// 1. This notice is required to be provided under our contract with the U.S. Department +// of Energy (DOE). This work was produced at Lawrence Livermore National Laboratory under // Contract No. DE-AC52-07NA27344 with the DOE. -// 2. Neither the United States Government nor Lawrence Livermore National Security, LLC -// nor any of their employees, makes any warranty, express or implied, or assumes any -// liability or responsibility for the accuracy, completeness, or usefulness of any +// 2. Neither the United States Government nor Lawrence Livermore National Security, LLC +// nor any of their employees, makes any warranty, express or implied, or assumes any +// liability or responsibility for the accuracy, completeness, or usefulness of any // information, apparatus, product, or process disclosed, or represents that its use would // not infringe privately-owned rights. -// 3. Also, reference herein to any specific commercial products, process, or services by -// trade name, trademark, manufacturer or otherwise does not necessarily constitute or -// imply its endorsement, recommendation, or favoring by the United States Government or -// Lawrence Livermore National Security, LLC. The views and opinions of authors expressed -// herein do not necessarily state or reflect those of the United States Government or -// Lawrence Livermore National Security, LLC, and shall not be used for advertising or +// 3. Also, reference herein to any specific commercial products, process, or services by +// trade name, trademark, manufacturer or otherwise does not necessarily constitute or +// imply its endorsement, recommendation, or favoring by the United States Government or +// Lawrence Livermore National Security, LLC. The views and opinions of authors expressed +// herein do not necessarily state or reflect those of the United States Government or +// Lawrence Livermore National Security, LLC, and shall not be used for advertising or // product endorsement purposes. #pragma once @@ -61,59 +61,58 @@ namespace hiop // "forward" definitions class hiopVectorIntSeq; - + class hiopVectorInt { protected: size_type sz_; public: - hiopVectorInt(size_type sz) : sz_(sz) { } - virtual ~hiopVectorInt() { } + hiopVectorInt(size_type sz) + : sz_(sz) + {} + virtual ~hiopVectorInt() {} - virtual size_type get_local_size() const - { - return sz_; - } + virtual size_type get_local_size() const { return sz_; } virtual index_type* local_data() = 0; virtual const index_type* local_data_const() const = 0; virtual index_type* local_data_host() = 0; virtual const index_type* local_data_host_const() const = 0; - + virtual void copy_from(const index_type* v_local) = 0; - /** Copy array content of `hiopVectorIntSeq` into `this`. Host-device + /** Copy array content of `hiopVectorIntSeq` into `this`. Host-device * communication occurs when `this` is a device vector. * * @pre Sizes must match. */ virtual void copy_from_vectorseq(const hiopVectorIntSeq& src) = 0; - /** Copy array content of `this` into `hiopVectorIntSeq`. Host-device + /** Copy array content of `this` into `hiopVectorIntSeq`. Host-device * communication occurs when `this` is a device vector. * * @pre Sizes must match. */ virtual void copy_to_vectorseq(hiopVectorIntSeq& dest) const = 0; - + /// @brief Set all elements to zero. virtual void set_to_zero() = 0; /// @brief Set all elements to c - virtual void set_to_constant( const index_type c ) = 0; + virtual void set_to_constant(const index_type c) = 0; /** - * @brief Set the vector entries to be a linear space of starting at i0 containing evenly + * @brief Set the vector entries to be a linear space of starting at i0 containing evenly * incremented integers up to i0+(n-1)di, when n is the length of this vector * * @pre The elements of the linear space should not overflow the index_type type - * + * * @param i0 the starting element in the linear space (entry 0 in vector) * @param di the increment for subsequent entries in the vector * - */ + */ virtual void linspace(const index_type& i0, const index_type& di) = 0; }; -} // namespace hiop +} // namespace hiop diff --git a/src/LinAlg/hiopVectorIntCompoundPD.cpp b/src/LinAlg/hiopVectorIntCompoundPD.cpp index 638a6fbe4..79fb9c5db 100644 --- a/src/LinAlg/hiopVectorIntCompoundPD.cpp +++ b/src/LinAlg/hiopVectorIntCompoundPD.cpp @@ -3,47 +3,47 @@ // Written by Cosmin G. Petra, petra1@llnl.gov. // LLNL-CODE-742473. All rights reserved. // -// This file is part of HiOp. For details, see https://github.com/LLNL/hiop. HiOp -// is released under the BSD 3-clause license (https://opensource.org/licenses/BSD-3-Clause). +// This file is part of HiOp. For details, see https://github.com/LLNL/hiop. HiOp +// is released under the BSD 3-clause license (https://opensource.org/licenses/BSD-3-Clause). // Please also read “Additional BSD Notice” below. // -// Redistribution and use in source and binary forms, with or without modification, +// Redistribution and use in source and binary forms, with or without modification, // are permitted provided that the following conditions are met: -// i. Redistributions of source code must retain the above copyright notice, this list +// i. Redistributions of source code must retain the above copyright notice, this list // of conditions and the disclaimer below. -// ii. Redistributions in binary form must reproduce the above copyright notice, -// this list of conditions and the disclaimer (as noted below) in the documentation and/or +// ii. Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the disclaimer (as noted below) in the documentation and/or // other materials provided with the distribution. -// iii. Neither the name of the LLNS/LLNL nor the names of its contributors may be used to -// endorse or promote products derived from this software without specific prior written +// iii. Neither the name of the LLNS/LLNL nor the names of its contributors may be used to +// endorse or promote products derived from this software without specific prior written // permission. // -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES -// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT -// SHALL LAWRENCE LIVERMORE NATIONAL SECURITY, LLC, THE U.S. DEPARTMENT OF ENERGY OR -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS -// OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED -// AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT +// SHALL LAWRENCE LIVERMORE NATIONAL SECURITY, LLC, THE U.S. DEPARTMENT OF ENERGY OR +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +// OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED +// AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, // EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Additional BSD Notice -// 1. This notice is required to be provided under our contract with the U.S. Department -// of Energy (DOE). This work was produced at Lawrence Livermore National Laboratory under +// 1. This notice is required to be provided under our contract with the U.S. Department +// of Energy (DOE). This work was produced at Lawrence Livermore National Laboratory under // Contract No. DE-AC52-07NA27344 with the DOE. -// 2. Neither the United States Government nor Lawrence Livermore National Security, LLC -// nor any of their employees, makes any warranty, express or implied, or assumes any -// liability or responsibility for the accuracy, completeness, or usefulness of any +// 2. Neither the United States Government nor Lawrence Livermore National Security, LLC +// nor any of their employees, makes any warranty, express or implied, or assumes any +// liability or responsibility for the accuracy, completeness, or usefulness of any // information, apparatus, product, or process disclosed, or represents that its use would // not infringe privately-owned rights. -// 3. Also, reference herein to any specific commercial products, process, or services by -// trade name, trademark, manufacturer or otherwise does not necessarily constitute or -// imply its endorsement, recommendation, or favoring by the United States Government or -// Lawrence Livermore National Security, LLC. The views and opinions of authors expressed -// herein do not necessarily state or reflect those of the United States Government or -// Lawrence Livermore National Security, LLC, and shall not be used for advertising or +// 3. Also, reference herein to any specific commercial products, process, or services by +// trade name, trademark, manufacturer or otherwise does not necessarily constitute or +// imply its endorsement, recommendation, or favoring by the United States Government or +// Lawrence Livermore National Security, LLC. The views and opinions of authors expressed +// herein do not necessarily state or reflect those of the United States Government or +// Lawrence Livermore National Security, LLC, and shall not be used for advertising or // product endorsement purposes. /** @@ -56,16 +56,15 @@ #include "hiopVectorIntCompoundPD.hpp" #include "MemBackendCppImpl.hpp" -#include //for memcpy +#include //for memcpy namespace hiop { -hiopVectorIntCompoundPD::hiopVectorIntCompoundPD() - : hiopVectorInt(0), - n_parts_(0) -{ -} +hiopVectorIntCompoundPD::hiopVectorIntCompoundPD() + : hiopVectorInt(0), + n_parts_(0) +{} hiopVectorIntCompoundPD::~hiopVectorIntCompoundPD() { @@ -76,7 +75,7 @@ hiopVectorIntCompoundPD::~hiopVectorIntCompoundPD() vectors_.clear(); n_parts_ = 0; } - + void hiopVectorIntCompoundPD::set_to_zero() { for(index_type i = 0; i < n_parts_; i++) { @@ -91,26 +90,17 @@ void hiopVectorIntCompoundPD::set_to_constant(const index_type c) } } -void hiopVectorIntCompoundPD::addVector(hiopVectorInt *v) +void hiopVectorIntCompoundPD::addVector(hiopVectorInt* v) { vectors_.push_back(v); sz_ += v->get_local_size(); n_parts_++; } -hiopVectorInt& hiopVectorIntCompoundPD::getVector(index_type index) const -{ - return *(vectors_[index]); -} +hiopVectorInt& hiopVectorIntCompoundPD::getVector(index_type index) const { return *(vectors_[index]); } -size_type hiopVectorIntCompoundPD::get_local_size() const -{ - return sz_; -} +size_type hiopVectorIntCompoundPD::get_local_size() const { return sz_; } -size_type hiopVectorIntCompoundPD::get_num_parts() const -{ - return n_parts_; -} +size_type hiopVectorIntCompoundPD::get_num_parts() const { return n_parts_; } -} // namespace hiop +} // namespace hiop diff --git a/src/LinAlg/hiopVectorIntCompoundPD.hpp b/src/LinAlg/hiopVectorIntCompoundPD.hpp index 407563f54..073ff87ab 100644 --- a/src/LinAlg/hiopVectorIntCompoundPD.hpp +++ b/src/LinAlg/hiopVectorIntCompoundPD.hpp @@ -3,47 +3,47 @@ // Written by Cosmin G. Petra, petra1@llnl.gov. // LLNL-CODE-742473. All rights reserved. // -// This file is part of HiOp. For details, see https://github.com/LLNL/hiop. HiOp -// is released under the BSD 3-clause license (https://opensource.org/licenses/BSD-3-Clause). +// This file is part of HiOp. For details, see https://github.com/LLNL/hiop. HiOp +// is released under the BSD 3-clause license (https://opensource.org/licenses/BSD-3-Clause). // Please also read “Additional BSD Notice” below. // -// Redistribution and use in source and binary forms, with or without modification, +// Redistribution and use in source and binary forms, with or without modification, // are permitted provided that the following conditions are met: -// i. Redistributions of source code must retain the above copyright notice, this list +// i. Redistributions of source code must retain the above copyright notice, this list // of conditions and the disclaimer below. -// ii. Redistributions in binary form must reproduce the above copyright notice, -// this list of conditions and the disclaimer (as noted below) in the documentation and/or +// ii. Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the disclaimer (as noted below) in the documentation and/or // other materials provided with the distribution. -// iii. Neither the name of the LLNS/LLNL nor the names of its contributors may be used to -// endorse or promote products derived from this software without specific prior written +// iii. Neither the name of the LLNS/LLNL nor the names of its contributors may be used to +// endorse or promote products derived from this software without specific prior written // permission. // -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES -// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT -// SHALL LAWRENCE LIVERMORE NATIONAL SECURITY, LLC, THE U.S. DEPARTMENT OF ENERGY OR -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS -// OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED -// AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT +// SHALL LAWRENCE LIVERMORE NATIONAL SECURITY, LLC, THE U.S. DEPARTMENT OF ENERGY OR +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +// OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED +// AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, // EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Additional BSD Notice -// 1. This notice is required to be provided under our contract with the U.S. Department -// of Energy (DOE). This work was produced at Lawrence Livermore National Laboratory under +// 1. This notice is required to be provided under our contract with the U.S. Department +// of Energy (DOE). This work was produced at Lawrence Livermore National Laboratory under // Contract No. DE-AC52-07NA27344 with the DOE. -// 2. Neither the United States Government nor Lawrence Livermore National Security, LLC -// nor any of their employees, makes any warranty, express or implied, or assumes any -// liability or responsibility for the accuracy, completeness, or usefulness of any +// 2. Neither the United States Government nor Lawrence Livermore National Security, LLC +// nor any of their employees, makes any warranty, express or implied, or assumes any +// liability or responsibility for the accuracy, completeness, or usefulness of any // information, apparatus, product, or process disclosed, or represents that its use would // not infringe privately-owned rights. -// 3. Also, reference herein to any specific commercial products, process, or services by -// trade name, trademark, manufacturer or otherwise does not necessarily constitute or -// imply its endorsement, recommendation, or favoring by the United States Government or -// Lawrence Livermore National Security, LLC. The views and opinions of authors expressed -// herein do not necessarily state or reflect those of the United States Government or -// Lawrence Livermore National Security, LLC, and shall not be used for advertising or +// 3. Also, reference herein to any specific commercial products, process, or services by +// trade name, trademark, manufacturer or otherwise does not necessarily constitute or +// imply its endorsement, recommendation, or favoring by the United States Government or +// Lawrence Livermore National Security, LLC. The views and opinions of authors expressed +// herein do not necessarily state or reflect those of the United States Government or +// Lawrence Livermore National Security, LLC, and shall not be used for advertising or // product endorsement purposes. #pragma once @@ -65,7 +65,7 @@ namespace hiop /** * @brief A vector that consists of different type of hiopVectorInt. - * + * * @note this class is not used in the current hiop implementation */ class hiopVectorIntCompoundPD : public hiopVectorInt @@ -75,31 +75,47 @@ class hiopVectorIntCompoundPD : public hiopVectorInt ~hiopVectorIntCompoundPD(); - void addVector(hiopVectorInt *v); + void addVector(hiopVectorInt* v); hiopVectorInt& getVector(index_type index) const; size_type get_local_size() const; - + /* @brief return the number of parts in this compound vector */ size_type get_num_parts() const; virtual void copy_to_dev() {} virtual void copy_from_dev() {} - virtual index_type* local_data() { assert(0 && "not required."); return nullptr; } - - virtual const index_type* local_data_const() const { assert(0 && "not required."); return nullptr; } - - virtual inline index_type* local_data_host() { assert(0 && "not required."); return nullptr; } + virtual index_type* local_data() + { + assert(0 && "not required."); + return nullptr; + } + + virtual const index_type* local_data_const() const + { + assert(0 && "not required."); + return nullptr; + } + + virtual inline index_type* local_data_host() + { + assert(0 && "not required."); + return nullptr; + } + + virtual inline const index_type* local_data_host_const() const + { + assert(0 && "not required."); + return nullptr; + } - virtual inline const index_type* local_data_host_const() const { assert(0 && "not required."); return nullptr; } - virtual void copy_from(const index_type* v_local) { assert(0 && "not required."); } virtual void copy_from_vectorseq(const hiopVectorIntSeq& src) { assert(0 && "not required."); } virtual void copy_to_vectorseq(hiopVectorIntSeq& src) const { assert(0 && "not required."); } - + /// @brief Set all elements to zero. virtual void set_to_zero(); @@ -107,21 +123,20 @@ class hiopVectorIntCompoundPD : public hiopVectorInt virtual void set_to_constant(const index_type c); /** - * @brief Set the vector entries to be a linear space of starting at i0 containing evenly + * @brief Set the vector entries to be a linear space of starting at i0 containing evenly * incremented integers up to i0+(n-1)di, when n is the length of this vector * * @pre The elements of the linear space should not overflow the index_type type - * + * * @param i0 the starting element in the linear space (entry 0 in vector) * @param di the increment for subsequent entries in the vector * - */ - virtual void linspace(const index_type& i0, const index_type& di) {assert(0 && "not required.");} + */ + virtual void linspace(const index_type& i0, const index_type& di) { assert(0 && "not required."); } private: std::vector vectors_; size_type n_parts_; - }; -} // namespace hiop +} // namespace hiop diff --git a/src/LinAlg/hiopVectorIntCuda.cpp b/src/LinAlg/hiopVectorIntCuda.cpp index f76b8285e..37c366494 100644 --- a/src/LinAlg/hiopVectorIntCuda.cpp +++ b/src/LinAlg/hiopVectorIntCuda.cpp @@ -2,47 +2,47 @@ // Produced at the Lawrence Livermore National Laboratory (LLNL). // LLNL-CODE-742473. All rights reserved. // -// This file is part of HiOp. For details, see https://github.com/LLNL/hiop. HiOp -// is released under the BSD 3-clause license (https://opensource.org/licenses/BSD-3-Clause). +// This file is part of HiOp. For details, see https://github.com/LLNL/hiop. HiOp +// is released under the BSD 3-clause license (https://opensource.org/licenses/BSD-3-Clause). // Please also read "Additional BSD Notice" below. // -// Redistribution and use in source and binary forms, with or without modification, +// Redistribution and use in source and binary forms, with or without modification, // are permitted provided that the following conditions are met: -// i. Redistributions of source code must retain the above copyright notice, this list +// i. Redistributions of source code must retain the above copyright notice, this list // of conditions and the disclaimer below. -// ii. Redistributions in binary form must reproduce the above copyright notice, -// this list of conditions and the disclaimer (as noted below) in the documentation and/or +// ii. Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the disclaimer (as noted below) in the documentation and/or // other materials provided with the distribution. -// iii. Neither the name of the LLNS/LLNL nor the names of its contributors may be used to -// endorse or promote products derived from this software without specific prior written +// iii. Neither the name of the LLNS/LLNL nor the names of its contributors may be used to +// endorse or promote products derived from this software without specific prior written // permission. // -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES -// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT -// SHALL LAWRENCE LIVERMORE NATIONAL SECURITY, LLC, THE U.S. DEPARTMENT OF ENERGY OR -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS -// OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED -// AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT +// SHALL LAWRENCE LIVERMORE NATIONAL SECURITY, LLC, THE U.S. DEPARTMENT OF ENERGY OR +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +// OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED +// AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, // EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Additional BSD Notice -// 1. This notice is required to be provided under our contract with the U.S. Department -// of Energy (DOE). This work was produced at Lawrence Livermore National Laboratory under +// 1. This notice is required to be provided under our contract with the U.S. Department +// of Energy (DOE). This work was produced at Lawrence Livermore National Laboratory under // Contract No. DE-AC52-07NA27344 with the DOE. -// 2. Neither the United States Government nor Lawrence Livermore National Security, LLC -// nor any of their employees, makes any warranty, express or implied, or assumes any -// liability or responsibility for the accuracy, completeness, or usefulness of any +// 2. Neither the United States Government nor Lawrence Livermore National Security, LLC +// nor any of their employees, makes any warranty, express or implied, or assumes any +// liability or responsibility for the accuracy, completeness, or usefulness of any // information, apparatus, product, or process disclosed, or represents that its use would // not infringe privately-owned rights. -// 3. Also, reference herein to any specific commercial products, process, or services by -// trade name, trademark, manufacturer or otherwise does not necessarily constitute or -// imply its endorsement, recommendation, or favoring by the United States Government or -// Lawrence Livermore National Security, LLC. The views and opinions of authors expressed -// herein do not necessarily state or reflect those of the United States Government or -// Lawrence Livermore National Security, LLC, and shall not be used for advertising or +// 3. Also, reference herein to any specific commercial products, process, or services by +// trade name, trademark, manufacturer or otherwise does not necessarily constitute or +// imply its endorsement, recommendation, or favoring by the United States Government or +// Lawrence Livermore National Security, LLC. The views and opinions of authors expressed +// herein do not necessarily state or reflect those of the United States Government or +// Lawrence Livermore National Security, LLC, and shall not be used for advertising or // product endorsement purposes. /** @@ -66,7 +66,7 @@ namespace hiop { hiopVectorIntCuda::hiopVectorIntCuda(size_type sz, std::string mem_space) - : hiopVectorInt(sz) + : hiopVectorInt(sz) { buf_ = exec_space_.alloc_array(sz); // Create host mirror if the memory space is on device @@ -77,7 +77,7 @@ hiopVectorIntCuda::~hiopVectorIntCuda() { exec_space_host_.dealloc_array(buf_host_); exec_space_.dealloc_array(buf_); - buf_ = nullptr; + buf_ = nullptr; buf_host_ = nullptr; } @@ -116,21 +116,18 @@ void hiopVectorIntCuda::set_to_constant(const index_type c) } /** - * @brief Set the vector entries to be a linear space of starting at i0 containing evenly + * @brief Set the vector entries to be a linear space of starting at i0 containing evenly * incremented integers up to i0+(n-1)di, when n is the length of this vector * * @pre The elements of the linear space should not overflow the index_type type - * + * * @param i0 the starting element in the linear space (entry 0 in vector) * @param di the increment for subsequent entries in the vector * - */ + */ void hiopVectorIntCuda::linspace(const index_type& i0, const index_type& di) { hiop::cuda::set_to_linspace_kernel(sz_, buf_, i0, di); } - -} // namespace hiop - - +} // namespace hiop diff --git a/src/LinAlg/hiopVectorIntCuda.hpp b/src/LinAlg/hiopVectorIntCuda.hpp index ed57f8dfa..271462d7f 100644 --- a/src/LinAlg/hiopVectorIntCuda.hpp +++ b/src/LinAlg/hiopVectorIntCuda.hpp @@ -3,47 +3,47 @@ // Written by Cosmin G. Petra, petra1@llnl.gov. // LLNL-CODE-742473. All rights reserved. // -// This file is part of HiOp. For details, see https://github.com/LLNL/hiop. HiOp -// is released under the BSD 3-clause license (https://opensource.org/licenses/BSD-3-Clause). +// This file is part of HiOp. For details, see https://github.com/LLNL/hiop. HiOp +// is released under the BSD 3-clause license (https://opensource.org/licenses/BSD-3-Clause). // Please also read “Additional BSD Notice” below. // -// Redistribution and use in source and binary forms, with or without modification, +// Redistribution and use in source and binary forms, with or without modification, // are permitted provided that the following conditions are met: -// i. Redistributions of source code must retain the above copyright notice, this list +// i. Redistributions of source code must retain the above copyright notice, this list // of conditions and the disclaimer below. -// ii. Redistributions in binary form must reproduce the above copyright notice, -// this list of conditions and the disclaimer (as noted below) in the documentation and/or +// ii. Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the disclaimer (as noted below) in the documentation and/or // other materials provided with the distribution. -// iii. Neither the name of the LLNS/LLNL nor the names of its contributors may be used to -// endorse or promote products derived from this software without specific prior written +// iii. Neither the name of the LLNS/LLNL nor the names of its contributors may be used to +// endorse or promote products derived from this software without specific prior written // permission. // -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES -// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT -// SHALL LAWRENCE LIVERMORE NATIONAL SECURITY, LLC, THE U.S. DEPARTMENT OF ENERGY OR -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS -// OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED -// AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT +// SHALL LAWRENCE LIVERMORE NATIONAL SECURITY, LLC, THE U.S. DEPARTMENT OF ENERGY OR +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +// OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED +// AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, // EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Additional BSD Notice -// 1. This notice is required to be provided under our contract with the U.S. Department -// of Energy (DOE). This work was produced at Lawrence Livermore National Laboratory under +// 1. This notice is required to be provided under our contract with the U.S. Department +// of Energy (DOE). This work was produced at Lawrence Livermore National Laboratory under // Contract No. DE-AC52-07NA27344 with the DOE. -// 2. Neither the United States Government nor Lawrence Livermore National Security, LLC -// nor any of their employees, makes any warranty, express or implied, or assumes any -// liability or responsibility for the accuracy, completeness, or usefulness of any +// 2. Neither the United States Government nor Lawrence Livermore National Security, LLC +// nor any of their employees, makes any warranty, express or implied, or assumes any +// liability or responsibility for the accuracy, completeness, or usefulness of any // information, apparatus, product, or process disclosed, or represents that its use would // not infringe privately-owned rights. -// 3. Also, reference herein to any specific commercial products, process, or services by -// trade name, trademark, manufacturer or otherwise does not necessarily constitute or -// imply its endorsement, recommendation, or favoring by the United States Government or -// Lawrence Livermore National Security, LLC. The views and opinions of authors expressed -// herein do not necessarily state or reflect those of the United States Government or -// Lawrence Livermore National Security, LLC, and shall not be used for advertising or +// 3. Also, reference herein to any specific commercial products, process, or services by +// trade name, trademark, manufacturer or otherwise does not necessarily constitute or +// imply its endorsement, recommendation, or favoring by the United States Government or +// Lawrence Livermore National Security, LLC. The views and opinions of authors expressed +// herein do not necessarily state or reflect those of the United States Government or +// Lawrence Livermore National Security, LLC, and shall not be used for advertising or // product endorsement purposes. #pragma once @@ -63,11 +63,11 @@ namespace hiop // "forward" declarations class hiopVectorIntSeq; - + class hiopVectorIntCuda : public hiopVectorInt { public: - hiopVectorIntCuda(size_type sz, std::string mem_space="HOST"); + hiopVectorIntCuda(size_type sz, std::string mem_space = "HOST"); ~hiopVectorIntCuda(); @@ -78,7 +78,7 @@ class hiopVectorIntCuda : public hiopVectorInt virtual inline index_type* local_data() { return buf_; } virtual inline const index_type* local_data_const() const { return buf_; } - + virtual void copy_from(const index_type* v_local); virtual void copy_from_vectorseq(const hiopVectorIntSeq& src); @@ -91,32 +91,26 @@ class hiopVectorIntCuda : public hiopVectorInt virtual void set_to_constant(const index_type c); /** - * @brief Set the vector entries to be a linear space of starting at i0 containing evenly + * @brief Set the vector entries to be a linear space of starting at i0 containing evenly * incremented integers up to i0+(n-1)di, when n is the length of this vector * * @pre The elements of the linear space should not overflow the index_type type - * + * * @param i0 the starting element in the linear space (entry 0 in vector) * @param di the increment for subsequent entries in the vector * - */ + */ virtual void linspace(const index_type& i0, const index_type& di); - ExecSpace& exec_space() - { - return exec_space_; - } - const ExecSpace& exec_space() const - { - return exec_space_; - } + ExecSpace& exec_space() { return exec_space_; } + const ExecSpace& exec_space() const { return exec_space_; } private: ExecSpace exec_space_; ExecSpace exec_space_host_; - index_type *buf_host_; - index_type *buf_; + index_type* buf_host_; + index_type* buf_; }; -} // namespace hiop +} // namespace hiop diff --git a/src/LinAlg/hiopVectorIntHip.cpp b/src/LinAlg/hiopVectorIntHip.cpp index 48fd6a89e..7958286aa 100644 --- a/src/LinAlg/hiopVectorIntHip.cpp +++ b/src/LinAlg/hiopVectorIntHip.cpp @@ -2,47 +2,47 @@ // Produced at the Lawrence Livermore National Laboratory (LLNL). // LLNL-CODE-742473. All rights reserved. // -// This file is part of HiOp. For details, see https://github.com/LLNL/hiop. HiOp -// is released under the BSD 3-clause license (https://opensource.org/licenses/BSD-3-Clause). +// This file is part of HiOp. For details, see https://github.com/LLNL/hiop. HiOp +// is released under the BSD 3-clause license (https://opensource.org/licenses/BSD-3-Clause). // Please also read "Additional BSD Notice" below. // -// Redistribution and use in source and binary forms, with or without modification, +// Redistribution and use in source and binary forms, with or without modification, // are permitted provided that the following conditions are met: -// i. Redistributions of source code must retain the above copyright notice, this list +// i. Redistributions of source code must retain the above copyright notice, this list // of conditions and the disclaimer below. -// ii. Redistributions in binary form must reproduce the above copyright notice, -// this list of conditions and the disclaimer (as noted below) in the documentation and/or +// ii. Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the disclaimer (as noted below) in the documentation and/or // other materials provided with the distribution. -// iii. Neither the name of the LLNS/LLNL nor the names of its contributors may be used to -// endorse or promote products derived from this software without specific prior written +// iii. Neither the name of the LLNS/LLNL nor the names of its contributors may be used to +// endorse or promote products derived from this software without specific prior written // permission. // -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES -// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT -// SHALL LAWRENCE LIVERMORE NATIONAL SECURITY, LLC, THE U.S. DEPARTMENT OF ENERGY OR -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS -// OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED -// AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT +// SHALL LAWRENCE LIVERMORE NATIONAL SECURITY, LLC, THE U.S. DEPARTMENT OF ENERGY OR +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +// OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED +// AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, // EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Additional BSD Notice -// 1. This notice is required to be provided under our contract with the U.S. Department -// of Energy (DOE). This work was produced at Lawrence Livermore National Laboratory under +// 1. This notice is required to be provided under our contract with the U.S. Department +// of Energy (DOE). This work was produced at Lawrence Livermore National Laboratory under // Contract No. DE-AC52-07NA27344 with the DOE. -// 2. Neither the United States Government nor Lawrence Livermore National Security, LLC -// nor any of their employees, makes any warranty, express or implied, or assumes any -// liability or responsibility for the accuracy, completeness, or usefulness of any +// 2. Neither the United States Government nor Lawrence Livermore National Security, LLC +// nor any of their employees, makes any warranty, express or implied, or assumes any +// liability or responsibility for the accuracy, completeness, or usefulness of any // information, apparatus, product, or process disclosed, or represents that its use would // not infringe privately-owned rights. -// 3. Also, reference herein to any specific commercial products, process, or services by -// trade name, trademark, manufacturer or otherwise does not necessarily constitute or -// imply its endorsement, recommendation, or favoring by the United States Government or -// Lawrence Livermore National Security, LLC. The views and opinions of authors expressed -// herein do not necessarily state or reflect those of the United States Government or -// Lawrence Livermore National Security, LLC, and shall not be used for advertising or +// 3. Also, reference herein to any specific commercial products, process, or services by +// trade name, trademark, manufacturer or otherwise does not necessarily constitute or +// imply its endorsement, recommendation, or favoring by the United States Government or +// Lawrence Livermore National Security, LLC. The views and opinions of authors expressed +// herein do not necessarily state or reflect those of the United States Government or +// Lawrence Livermore National Security, LLC, and shall not be used for advertising or // product endorsement purposes. /** @@ -65,7 +65,7 @@ namespace hiop { hiopVectorIntHip::hiopVectorIntHip(size_type sz, std::string mem_space) - : hiopVectorInt(sz) + : hiopVectorInt(sz) { buf_ = exec_space_.alloc_array(sz); // Create host mirror if the memory space is on device @@ -76,7 +76,7 @@ hiopVectorIntHip::~hiopVectorIntHip() { exec_space_host_.dealloc_array(buf_host_); exec_space_.dealloc_array(buf_); - buf_ = nullptr; + buf_ = nullptr; buf_host_ = nullptr; } @@ -115,21 +115,18 @@ void hiopVectorIntHip::set_to_constant(const index_type c) } /** - * @brief Set the vector entries to be a linear space of starting at i0 containing evenly + * @brief Set the vector entries to be a linear space of starting at i0 containing evenly * incremented integers up to i0+(n-1)di, when n is the length of this vector * * @pre The elements of the linear space should not overflow the index_type type - * + * * @param i0 the starting element in the linear space (entry 0 in vector) * @param di the increment for subsequent entries in the vector * - */ + */ void hiopVectorIntHip::linspace(const index_type& i0, const index_type& di) { hiop::hip::set_to_linspace_kernel(sz_, buf_, i0, di); } - -} // namespace hiop - - +} // namespace hiop diff --git a/src/LinAlg/hiopVectorIntHip.hpp b/src/LinAlg/hiopVectorIntHip.hpp index b9f4c3ff2..958afb419 100644 --- a/src/LinAlg/hiopVectorIntHip.hpp +++ b/src/LinAlg/hiopVectorIntHip.hpp @@ -3,47 +3,47 @@ // Written by Cosmin G. Petra, petra1@llnl.gov. // LLNL-CODE-742473. All rights reserved. // -// This file is part of HiOp. For details, see https://github.com/LLNL/hiop. HiOp -// is released under the BSD 3-clause license (https://opensource.org/licenses/BSD-3-Clause). +// This file is part of HiOp. For details, see https://github.com/LLNL/hiop. HiOp +// is released under the BSD 3-clause license (https://opensource.org/licenses/BSD-3-Clause). // Please also read “Additional BSD Notice” below. // -// Redistribution and use in source and binary forms, with or without modification, +// Redistribution and use in source and binary forms, with or without modification, // are permitted provided that the following conditions are met: -// i. Redistributions of source code must retain the above copyright notice, this list +// i. Redistributions of source code must retain the above copyright notice, this list // of conditions and the disclaimer below. -// ii. Redistributions in binary form must reproduce the above copyright notice, -// this list of conditions and the disclaimer (as noted below) in the documentation and/or +// ii. Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the disclaimer (as noted below) in the documentation and/or // other materials provided with the distribution. -// iii. Neither the name of the LLNS/LLNL nor the names of its contributors may be used to -// endorse or promote products derived from this software without specific prior written +// iii. Neither the name of the LLNS/LLNL nor the names of its contributors may be used to +// endorse or promote products derived from this software without specific prior written // permission. // -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES -// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT -// SHALL LAWRENCE LIVERMORE NATIONAL SECURITY, LLC, THE U.S. DEPARTMENT OF ENERGY OR -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS -// OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED -// AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT +// SHALL LAWRENCE LIVERMORE NATIONAL SECURITY, LLC, THE U.S. DEPARTMENT OF ENERGY OR +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +// OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED +// AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, // EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Additional BSD Notice -// 1. This notice is required to be provided under our contract with the U.S. Department -// of Energy (DOE). This work was produced at Lawrence Livermore National Laboratory under +// 1. This notice is required to be provided under our contract with the U.S. Department +// of Energy (DOE). This work was produced at Lawrence Livermore National Laboratory under // Contract No. DE-AC52-07NA27344 with the DOE. -// 2. Neither the United States Government nor Lawrence Livermore National Security, LLC -// nor any of their employees, makes any warranty, express or implied, or assumes any -// liability or responsibility for the accuracy, completeness, or usefulness of any +// 2. Neither the United States Government nor Lawrence Livermore National Security, LLC +// nor any of their employees, makes any warranty, express or implied, or assumes any +// liability or responsibility for the accuracy, completeness, or usefulness of any // information, apparatus, product, or process disclosed, or represents that its use would // not infringe privately-owned rights. -// 3. Also, reference herein to any specific commercial products, process, or services by -// trade name, trademark, manufacturer or otherwise does not necessarily constitute or -// imply its endorsement, recommendation, or favoring by the United States Government or -// Lawrence Livermore National Security, LLC. The views and opinions of authors expressed -// herein do not necessarily state or reflect those of the United States Government or -// Lawrence Livermore National Security, LLC, and shall not be used for advertising or +// 3. Also, reference herein to any specific commercial products, process, or services by +// trade name, trademark, manufacturer or otherwise does not necessarily constitute or +// imply its endorsement, recommendation, or favoring by the United States Government or +// Lawrence Livermore National Security, LLC. The views and opinions of authors expressed +// herein do not necessarily state or reflect those of the United States Government or +// Lawrence Livermore National Security, LLC, and shall not be used for advertising or // product endorsement purposes. #pragma once @@ -64,7 +64,7 @@ namespace hiop class hiopVectorIntHip : public hiopVectorInt { public: - hiopVectorIntHip(size_type sz, std::string mem_space="HOST"); + hiopVectorIntHip(size_type sz, std::string mem_space = "HOST"); ~hiopVectorIntHip(); @@ -75,7 +75,7 @@ class hiopVectorIntHip : public hiopVectorInt virtual inline index_type* local_data() { return buf_; } virtual inline const index_type* local_data_const() const { return buf_; } - + virtual void copy_from(const index_type* v_local); virtual void copy_from_vectorseq(const hiopVectorIntSeq& src); @@ -88,32 +88,26 @@ class hiopVectorIntHip : public hiopVectorInt virtual void set_to_constant(const index_type c); /** - * @brief Set the vector entries to be a linear space of starting at i0 containing evenly + * @brief Set the vector entries to be a linear space of starting at i0 containing evenly * incremented integers up to i0+(n-1)di, when n is the length of this vector * * @pre The elements of the linear space should not overflow the index_type type - * + * * @param i0 the starting element in the linear space (entry 0 in vector) * @param di the increment for subsequent entries in the vector * - */ + */ virtual void linspace(const index_type& i0, const index_type& di); - ExecSpace& exec_space() - { - return exec_space_; - } - const ExecSpace& exec_space() const - { - return exec_space_; - } + ExecSpace& exec_space() { return exec_space_; } + const ExecSpace& exec_space() const { return exec_space_; } private: ExecSpace exec_space_; ExecSpace exec_space_host_; - index_type *buf_host_; - index_type *buf_; + index_type* buf_host_; + index_type* buf_; }; -} // namespace hiop +} // namespace hiop diff --git a/src/LinAlg/hiopVectorIntRaja.hpp b/src/LinAlg/hiopVectorIntRaja.hpp index 2a02b0f36..827c2d896 100644 --- a/src/LinAlg/hiopVectorIntRaja.hpp +++ b/src/LinAlg/hiopVectorIntRaja.hpp @@ -3,47 +3,47 @@ // Written by Cosmin G. Petra, petra1@llnl.gov. // LLNL-CODE-742473. All rights reserved. // -// This file is part of HiOp. For details, see https://github.com/LLNL/hiop. HiOp -// is released under the BSD 3-clause license (https://opensource.org/licenses/BSD-3-Clause). +// This file is part of HiOp. For details, see https://github.com/LLNL/hiop. HiOp +// is released under the BSD 3-clause license (https://opensource.org/licenses/BSD-3-Clause). // Please also read “Additional BSD Notice” below. // -// Redistribution and use in source and binary forms, with or without modification, +// Redistribution and use in source and binary forms, with or without modification, // are permitted provided that the following conditions are met: -// i. Redistributions of source code must retain the above copyright notice, this list +// i. Redistributions of source code must retain the above copyright notice, this list // of conditions and the disclaimer below. -// ii. Redistributions in binary form must reproduce the above copyright notice, -// this list of conditions and the disclaimer (as noted below) in the documentation and/or +// ii. Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the disclaimer (as noted below) in the documentation and/or // other materials provided with the distribution. -// iii. Neither the name of the LLNS/LLNL nor the names of its contributors may be used to -// endorse or promote products derived from this software without specific prior written +// iii. Neither the name of the LLNS/LLNL nor the names of its contributors may be used to +// endorse or promote products derived from this software without specific prior written // permission. // -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES -// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT -// SHALL LAWRENCE LIVERMORE NATIONAL SECURITY, LLC, THE U.S. DEPARTMENT OF ENERGY OR -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS -// OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED -// AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT +// SHALL LAWRENCE LIVERMORE NATIONAL SECURITY, LLC, THE U.S. DEPARTMENT OF ENERGY OR +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +// OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED +// AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, // EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Additional BSD Notice -// 1. This notice is required to be provided under our contract with the U.S. Department -// of Energy (DOE). This work was produced at Lawrence Livermore National Laboratory under +// 1. This notice is required to be provided under our contract with the U.S. Department +// of Energy (DOE). This work was produced at Lawrence Livermore National Laboratory under // Contract No. DE-AC52-07NA27344 with the DOE. -// 2. Neither the United States Government nor Lawrence Livermore National Security, LLC -// nor any of their employees, makes any warranty, express or implied, or assumes any -// liability or responsibility for the accuracy, completeness, or usefulness of any +// 2. Neither the United States Government nor Lawrence Livermore National Security, LLC +// nor any of their employees, makes any warranty, express or implied, or assumes any +// liability or responsibility for the accuracy, completeness, or usefulness of any // information, apparatus, product, or process disclosed, or represents that its use would // not infringe privately-owned rights. -// 3. Also, reference herein to any specific commercial products, process, or services by -// trade name, trademark, manufacturer or otherwise does not necessarily constitute or -// imply its endorsement, recommendation, or favoring by the United States Government or -// Lawrence Livermore National Security, LLC. The views and opinions of authors expressed -// herein do not necessarily state or reflect those of the United States Government or -// Lawrence Livermore National Security, LLC, and shall not be used for advertising or +// 3. Also, reference herein to any specific commercial products, process, or services by +// trade name, trademark, manufacturer or otherwise does not necessarily constitute or +// imply its endorsement, recommendation, or favoring by the United States Government or +// Lawrence Livermore National Security, LLC. The views and opinions of authors expressed +// herein do not necessarily state or reflect those of the United States Government or +// Lawrence Livermore National Security, LLC, and shall not be used for advertising or // product endorsement purposes. #pragma once @@ -62,18 +62,18 @@ namespace hiop { -//forward declarations of the "friend" testing classes +// forward declarations of the "friend" testing classes namespace tests { class VectorTestsIntRaja; class MatrixTestsRajaSparseTriplet; -} - +} // namespace tests + template class hiopVectorIntRaja : public hiopVectorInt { public: - hiopVectorIntRaja(size_type sz, std::string mem_space="HOST"); + hiopVectorIntRaja(size_type sz, std::string mem_space = "HOST"); hiopVectorIntRaja(const hiopVectorIntRaja&) = delete; ~hiopVectorIntRaja(); @@ -84,13 +84,12 @@ class hiopVectorIntRaja : public hiopVectorInt virtual inline index_type* local_data() { return buf_; } virtual inline const index_type* local_data_const() const { return buf_; } - + virtual void copy_from(const index_type* v_local); virtual void copy_from_vectorseq(const hiopVectorIntSeq& src); virtual void copy_to_vectorseq(hiopVectorIntSeq& dest) const; - /// @brief Set all elements to zero. virtual void set_to_zero(); @@ -98,22 +97,20 @@ class hiopVectorIntRaja : public hiopVectorInt virtual void set_to_constant(const index_type c); /** - * @brief Set the vector entries to be a linear space of starting at i0 containing evenly + * @brief Set the vector entries to be a linear space of starting at i0 containing evenly * incremented integers up to i0+(n-1)di, when n is the length of this vector * * @pre The elements of the linear space should not overflow the index_type type - * + * * @param i0 the starting element in the linear space (entry 0 in vector) * @param di the increment for subsequent entries in the vector * - */ + */ virtual void linspace(const index_type& i0, const index_type& di); /// Return a const reference to the internal execution space object - const ExecSpace& exec_space() const - { - return exec_space_; - } + const ExecSpace& exec_space() const { return exec_space_; } + private: friend class tests::VectorTestsIntRaja; friend class tests::MatrixTestsRajaSparseTriplet; @@ -135,16 +132,16 @@ class hiopVectorIntRaja : public hiopVectorInt ExecSpace exec_space_; using MEMBACKENDHOST = typename MEMBACKEND::MemBackendHost; - //EXECPOLICYRAJA is used internally as a execution policy. EXECPOLICYHOST is not used internally - //in this class. EXECPOLICYHOST can be any host policy as long as memory allocations and - //and transfers within and from `exec_space_host_` work with EXECPOLICYHOST (currently all such - //combinations work). + // EXECPOLICYRAJA is used internally as a execution policy. EXECPOLICYHOST is not used internally + // in this class. EXECPOLICYHOST can be any host policy as long as memory allocations and + // and transfers within and from `exec_space_host_` work with EXECPOLICYHOST (currently all such + // combinations work). using EXECPOLICYHOST = hiop::ExecPolicySeq; ExecSpace exec_space_host_; - index_type *buf_host_; - index_type *buf_; + index_type* buf_host_; + index_type* buf_; std::string mem_space_; }; -} // namespace hiop +} // namespace hiop diff --git a/src/LinAlg/hiopVectorIntRajaCuda.cpp b/src/LinAlg/hiopVectorIntRajaCuda.cpp index 9d53816bb..e0b83d7bf 100644 --- a/src/LinAlg/hiopVectorIntRajaCuda.cpp +++ b/src/LinAlg/hiopVectorIntRajaCuda.cpp @@ -3,47 +3,47 @@ // Written by Cosmin G. Petra, petra1@llnl.gov. // LLNL-CODE-742473. All rights reserved. // -// This file is part of HiOp. For details, see https://github.com/LLNL/hiop. HiOp -// is released under the BSD 3-clause license (https://opensource.org/licenses/BSD-3-Clause). +// This file is part of HiOp. For details, see https://github.com/LLNL/hiop. HiOp +// is released under the BSD 3-clause license (https://opensource.org/licenses/BSD-3-Clause). // Please also read “Additional BSD Notice” below. // -// Redistribution and use in source and binary forms, with or without modification, +// Redistribution and use in source and binary forms, with or without modification, // are permitted provided that the following conditions are met: -// i. Redistributions of source code must retain the above copyright notice, this list +// i. Redistributions of source code must retain the above copyright notice, this list // of conditions and the disclaimer below. -// ii. Redistributions in binary form must reproduce the above copyright notice, -// this list of conditions and the disclaimer (as noted below) in the documentation and/or +// ii. Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the disclaimer (as noted below) in the documentation and/or // other materials provided with the distribution. -// iii. Neither the name of the LLNS/LLNL nor the names of its contributors may be used to -// endorse or promote products derived from this software without specific prior written +// iii. Neither the name of the LLNS/LLNL nor the names of its contributors may be used to +// endorse or promote products derived from this software without specific prior written // permission. // -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES -// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT -// SHALL LAWRENCE LIVERMORE NATIONAL SECURITY, LLC, THE U.S. DEPARTMENT OF ENERGY OR -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS -// OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED -// AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT +// SHALL LAWRENCE LIVERMORE NATIONAL SECURITY, LLC, THE U.S. DEPARTMENT OF ENERGY OR +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +// OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED +// AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, // EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Additional BSD Notice -// 1. This notice is required to be provided under our contract with the U.S. Department -// of Energy (DOE). This work was produced at Lawrence Livermore National Laboratory under +// 1. This notice is required to be provided under our contract with the U.S. Department +// of Energy (DOE). This work was produced at Lawrence Livermore National Laboratory under // Contract No. DE-AC52-07NA27344 with the DOE. -// 2. Neither the United States Government nor Lawrence Livermore National Security, LLC -// nor any of their employees, makes any warranty, express or implied, or assumes any -// liability or responsibility for the accuracy, completeness, or usefulness of any +// 2. Neither the United States Government nor Lawrence Livermore National Security, LLC +// nor any of their employees, makes any warranty, express or implied, or assumes any +// liability or responsibility for the accuracy, completeness, or usefulness of any // information, apparatus, product, or process disclosed, or represents that its use would // not infringe privately-owned rights. -// 3. Also, reference herein to any specific commercial products, process, or services by -// trade name, trademark, manufacturer or otherwise does not necessarily constitute or -// imply its endorsement, recommendation, or favoring by the United States Government or -// Lawrence Livermore National Security, LLC. The views and opinions of authors expressed -// herein do not necessarily state or reflect those of the United States Government or -// Lawrence Livermore National Security, LLC, and shall not be used for advertising or +// 3. Also, reference herein to any specific commercial products, process, or services by +// trade name, trademark, manufacturer or otherwise does not necessarily constitute or +// imply its endorsement, recommendation, or favoring by the United States Government or +// Lawrence Livermore National Security, LLC. The views and opinions of authors expressed +// herein do not necessarily state or reflect those of the United States Government or +// Lawrence Livermore National Security, LLC, and shall not be used for advertising or // product endorsement purposes. /** @@ -60,23 +60,22 @@ #include "MemBackendCppImpl.hpp" #include "ExecPoliciesRajaCudaImpl.hpp" - namespace hiop { using hiop_raja_exec = ExecRajaPoliciesBackend::hiop_raja_exec; using hiop_raja_reduce = ExecRajaPoliciesBackend::hiop_raja_reduce; -} +} // namespace hiop #include "hiopVectorIntRajaImpl.hpp" namespace hiop { -//CUDA-specific method implementation goes here +// CUDA-specific method implementation goes here // -//Explicit instantiations: force compilation +// Explicit instantiations: force compilation // template class hiopVectorIntRaja; template class hiopVectorIntRaja; -} +} // namespace hiop diff --git a/src/LinAlg/hiopVectorIntRajaHip.cpp b/src/LinAlg/hiopVectorIntRajaHip.cpp index 99bedb4e4..75f6ddfa8 100644 --- a/src/LinAlg/hiopVectorIntRajaHip.cpp +++ b/src/LinAlg/hiopVectorIntRajaHip.cpp @@ -3,47 +3,47 @@ // Written by Cosmin G. Petra, petra1@llnl.gov. // LLNL-CODE-742473. All rights reserved. // -// This file is part of HiOp. For details, see https://github.com/LLNL/hiop. HiOp -// is released under the BSD 3-clause license (https://opensource.org/licenses/BSD-3-Clause). +// This file is part of HiOp. For details, see https://github.com/LLNL/hiop. HiOp +// is released under the BSD 3-clause license (https://opensource.org/licenses/BSD-3-Clause). // Please also read “Additional BSD Notice” below. // -// Redistribution and use in source and binary forms, with or without modification, +// Redistribution and use in source and binary forms, with or without modification, // are permitted provided that the following conditions are met: -// i. Redistributions of source code must retain the above copyright notice, this list +// i. Redistributions of source code must retain the above copyright notice, this list // of conditions and the disclaimer below. -// ii. Redistributions in binary form must reproduce the above copyright notice, -// this list of conditions and the disclaimer (as noted below) in the documentation and/or +// ii. Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the disclaimer (as noted below) in the documentation and/or // other materials provided with the distribution. -// iii. Neither the name of the LLNS/LLNL nor the names of its contributors may be used to -// endorse or promote products derived from this software without specific prior written +// iii. Neither the name of the LLNS/LLNL nor the names of its contributors may be used to +// endorse or promote products derived from this software without specific prior written // permission. // -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES -// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT -// SHALL LAWRENCE LIVERMORE NATIONAL SECURITY, LLC, THE U.S. DEPARTMENT OF ENERGY OR -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS -// OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED -// AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT +// SHALL LAWRENCE LIVERMORE NATIONAL SECURITY, LLC, THE U.S. DEPARTMENT OF ENERGY OR +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +// OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED +// AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, // EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Additional BSD Notice -// 1. This notice is required to be provided under our contract with the U.S. Department -// of Energy (DOE). This work was produced at Lawrence Livermore National Laboratory under +// 1. This notice is required to be provided under our contract with the U.S. Department +// of Energy (DOE). This work was produced at Lawrence Livermore National Laboratory under // Contract No. DE-AC52-07NA27344 with the DOE. -// 2. Neither the United States Government nor Lawrence Livermore National Security, LLC -// nor any of their employees, makes any warranty, express or implied, or assumes any -// liability or responsibility for the accuracy, completeness, or usefulness of any +// 2. Neither the United States Government nor Lawrence Livermore National Security, LLC +// nor any of their employees, makes any warranty, express or implied, or assumes any +// liability or responsibility for the accuracy, completeness, or usefulness of any // information, apparatus, product, or process disclosed, or represents that its use would // not infringe privately-owned rights. -// 3. Also, reference herein to any specific commercial products, process, or services by -// trade name, trademark, manufacturer or otherwise does not necessarily constitute or -// imply its endorsement, recommendation, or favoring by the United States Government or -// Lawrence Livermore National Security, LLC. The views and opinions of authors expressed -// herein do not necessarily state or reflect those of the United States Government or -// Lawrence Livermore National Security, LLC, and shall not be used for advertising or +// 3. Also, reference herein to any specific commercial products, process, or services by +// trade name, trademark, manufacturer or otherwise does not necessarily constitute or +// imply its endorsement, recommendation, or favoring by the United States Government or +// Lawrence Livermore National Security, LLC. The views and opinions of authors expressed +// herein do not necessarily state or reflect those of the United States Government or +// Lawrence Livermore National Security, LLC, and shall not be used for advertising or // product endorsement purposes. /** @@ -60,23 +60,22 @@ #include "MemBackendCppImpl.hpp" #include "ExecPoliciesRajaHipImpl.hpp" - namespace hiop { using hiop_raja_exec = ExecRajaPoliciesBackend::hiop_raja_exec; using hiop_raja_reduce = ExecRajaPoliciesBackend::hiop_raja_reduce; -} +} // namespace hiop #include "hiopVectorIntRajaImpl.hpp" namespace hiop { -//CUDA-specific method implementation goes here +// CUDA-specific method implementation goes here // -//Explicit instantiations: force compilation +// Explicit instantiations: force compilation // template class hiopVectorIntRaja; template class hiopVectorIntRaja; -} +} // namespace hiop diff --git a/src/LinAlg/hiopVectorIntRajaImpl.hpp b/src/LinAlg/hiopVectorIntRajaImpl.hpp index 850306916..8e2d824b9 100644 --- a/src/LinAlg/hiopVectorIntRajaImpl.hpp +++ b/src/LinAlg/hiopVectorIntRajaImpl.hpp @@ -3,47 +3,47 @@ // Written by Cosmin G. Petra, petra1@llnl.gov. // LLNL-CODE-742473. All rights reserved. // -// This file is part of HiOp. For details, see https://github.com/LLNL/hiop. HiOp -// is released under the BSD 3-clause license (https://opensource.org/licenses/BSD-3-Clause). +// This file is part of HiOp. For details, see https://github.com/LLNL/hiop. HiOp +// is released under the BSD 3-clause license (https://opensource.org/licenses/BSD-3-Clause). // Please also read “Additional BSD Notice” below. // -// Redistribution and use in source and binary forms, with or without modification, +// Redistribution and use in source and binary forms, with or without modification, // are permitted provided that the following conditions are met: -// i. Redistributions of source code must retain the above copyright notice, this list +// i. Redistributions of source code must retain the above copyright notice, this list // of conditions and the disclaimer below. -// ii. Redistributions in binary form must reproduce the above copyright notice, -// this list of conditions and the disclaimer (as noted below) in the documentation and/or +// ii. Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the disclaimer (as noted below) in the documentation and/or // other materials provided with the distribution. -// iii. Neither the name of the LLNS/LLNL nor the names of its contributors may be used to -// endorse or promote products derived from this software without specific prior written +// iii. Neither the name of the LLNS/LLNL nor the names of its contributors may be used to +// endorse or promote products derived from this software without specific prior written // permission. // -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES -// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT -// SHALL LAWRENCE LIVERMORE NATIONAL SECURITY, LLC, THE U.S. DEPARTMENT OF ENERGY OR -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS -// OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED -// AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT +// SHALL LAWRENCE LIVERMORE NATIONAL SECURITY, LLC, THE U.S. DEPARTMENT OF ENERGY OR +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +// OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED +// AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, // EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Additional BSD Notice -// 1. This notice is required to be provided under our contract with the U.S. Department -// of Energy (DOE). This work was produced at Lawrence Livermore National Laboratory under +// 1. This notice is required to be provided under our contract with the U.S. Department +// of Energy (DOE). This work was produced at Lawrence Livermore National Laboratory under // Contract No. DE-AC52-07NA27344 with the DOE. -// 2. Neither the United States Government nor Lawrence Livermore National Security, LLC -// nor any of their employees, makes any warranty, express or implied, or assumes any -// liability or responsibility for the accuracy, completeness, or usefulness of any +// 2. Neither the United States Government nor Lawrence Livermore National Security, LLC +// nor any of their employees, makes any warranty, express or implied, or assumes any +// liability or responsibility for the accuracy, completeness, or usefulness of any // information, apparatus, product, or process disclosed, or represents that its use would // not infringe privately-owned rights. -// 3. Also, reference herein to any specific commercial products, process, or services by -// trade name, trademark, manufacturer or otherwise does not necessarily constitute or -// imply its endorsement, recommendation, or favoring by the United States Government or -// Lawrence Livermore National Security, LLC. The views and opinions of authors expressed -// herein do not necessarily state or reflect those of the United States Government or -// Lawrence Livermore National Security, LLC, and shall not be used for advertising or +// 3. Also, reference herein to any specific commercial products, process, or services by +// trade name, trademark, manufacturer or otherwise does not necessarily constitute or +// imply its endorsement, recommendation, or favoring by the United States Government or +// Lawrence Livermore National Security, LLC. The views and opinions of authors expressed +// herein do not necessarily state or reflect those of the United States Government or +// Lawrence Livermore National Security, LLC, and shall not be used for advertising or // product endorsement purposes. /** @@ -62,10 +62,10 @@ namespace hiop template hiopVectorIntRaja::hiopVectorIntRaja(size_type sz, std::string mem_space) - : hiopVectorInt(sz), - exec_space_(ExecSpace(MEMBACKEND(mem_space))), - exec_space_host_(ExecSpace(MEMBACKENDHOST::new_backend_host())), - mem_space_(mem_space) + : hiopVectorInt(sz), + exec_space_(ExecSpace(MEMBACKEND(mem_space))), + exec_space_host_(ExecSpace(MEMBACKENDHOST::new_backend_host())), + mem_space_(mem_space) { #ifndef HIOP_USE_GPU assert(mem_space_ == "HOST"); @@ -86,7 +86,7 @@ hiopVectorIntRaja::~hiopVectorIntRaja() exec_space_host_.dealloc_array(buf_host_); } exec_space_.dealloc_array(buf_); - + buf_host_ = nullptr; buf_ = nullptr; } @@ -121,7 +121,6 @@ void hiopVectorIntRaja::copy_to_vectorseq(hiopVectorInt dest.exec_space().copy(dest.local_data(), buf_, sz_, exec_space_); } - template void hiopVectorIntRaja::copy_from(const index_type* v_local) { @@ -140,19 +139,15 @@ template void hiopVectorIntRaja::set_to_constant(const index_type c) { index_type* data = buf_; - RAJA::forall(RAJA::RangeSegment(0, sz_), - RAJA_LAMBDA(RAJA::Index_type i) - { - data[i] = c; - }); + RAJA::forall(RAJA::RangeSegment(0, sz_), RAJA_LAMBDA(RAJA::Index_type i) { data[i] = c; }); } /** - * @brief Set the vector entries to be a linear space of starting at i0 containing evenly + * @brief Set the vector entries to be a linear space of starting at i0 containing evenly * incremented integers up to i0+(n-1)di, when n is the length of this vector * * @pre The elements of the linear space should not overflow the index_type type - * + * * @param i0 the starting element in the linear space (entry 0 in vector) * @param di the increment for subsequent entries in the vector * @@ -161,11 +156,7 @@ template void hiopVectorIntRaja::linspace(const index_type& i0, const index_type& di) { index_type* data = buf_; - RAJA::forall(RAJA::RangeSegment(0, sz_), - RAJA_LAMBDA(RAJA::Index_type i) - { - data[i] = i0+i*di; - }); + RAJA::forall(RAJA::RangeSegment(0, sz_), RAJA_LAMBDA(RAJA::Index_type i) { data[i] = i0 + i * di; }); } - -} // namespace hiop + +} // namespace hiop diff --git a/src/LinAlg/hiopVectorIntRajaOmp.cpp b/src/LinAlg/hiopVectorIntRajaOmp.cpp index cdb9554fa..9fd3d3d47 100644 --- a/src/LinAlg/hiopVectorIntRajaOmp.cpp +++ b/src/LinAlg/hiopVectorIntRajaOmp.cpp @@ -3,47 +3,47 @@ // Written by Cosmin G. Petra, petra1@llnl.gov. // LLNL-CODE-742473. All rights reserved. // -// This file is part of HiOp. For details, see https://github.com/LLNL/hiop. HiOp -// is released under the BSD 3-clause license (https://opensource.org/licenses/BSD-3-Clause). +// This file is part of HiOp. For details, see https://github.com/LLNL/hiop. HiOp +// is released under the BSD 3-clause license (https://opensource.org/licenses/BSD-3-Clause). // Please also read “Additional BSD Notice” below. // -// Redistribution and use in source and binary forms, with or without modification, +// Redistribution and use in source and binary forms, with or without modification, // are permitted provided that the following conditions are met: -// i. Redistributions of source code must retain the above copyright notice, this list +// i. Redistributions of source code must retain the above copyright notice, this list // of conditions and the disclaimer below. -// ii. Redistributions in binary form must reproduce the above copyright notice, -// this list of conditions and the disclaimer (as noted below) in the documentation and/or +// ii. Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the disclaimer (as noted below) in the documentation and/or // other materials provided with the distribution. -// iii. Neither the name of the LLNS/LLNL nor the names of its contributors may be used to -// endorse or promote products derived from this software without specific prior written +// iii. Neither the name of the LLNS/LLNL nor the names of its contributors may be used to +// endorse or promote products derived from this software without specific prior written // permission. // -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES -// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT -// SHALL LAWRENCE LIVERMORE NATIONAL SECURITY, LLC, THE U.S. DEPARTMENT OF ENERGY OR -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS -// OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED -// AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT +// SHALL LAWRENCE LIVERMORE NATIONAL SECURITY, LLC, THE U.S. DEPARTMENT OF ENERGY OR +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +// OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED +// AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, // EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Additional BSD Notice -// 1. This notice is required to be provided under our contract with the U.S. Department -// of Energy (DOE). This work was produced at Lawrence Livermore National Laboratory under +// 1. This notice is required to be provided under our contract with the U.S. Department +// of Energy (DOE). This work was produced at Lawrence Livermore National Laboratory under // Contract No. DE-AC52-07NA27344 with the DOE. -// 2. Neither the United States Government nor Lawrence Livermore National Security, LLC -// nor any of their employees, makes any warranty, express or implied, or assumes any -// liability or responsibility for the accuracy, completeness, or usefulness of any +// 2. Neither the United States Government nor Lawrence Livermore National Security, LLC +// nor any of their employees, makes any warranty, express or implied, or assumes any +// liability or responsibility for the accuracy, completeness, or usefulness of any // information, apparatus, product, or process disclosed, or represents that its use would // not infringe privately-owned rights. -// 3. Also, reference herein to any specific commercial products, process, or services by -// trade name, trademark, manufacturer or otherwise does not necessarily constitute or -// imply its endorsement, recommendation, or favoring by the United States Government or -// Lawrence Livermore National Security, LLC. The views and opinions of authors expressed -// herein do not necessarily state or reflect those of the United States Government or -// Lawrence Livermore National Security, LLC, and shall not be used for advertising or +// 3. Also, reference herein to any specific commercial products, process, or services by +// trade name, trademark, manufacturer or otherwise does not necessarily constitute or +// imply its endorsement, recommendation, or favoring by the United States Government or +// Lawrence Livermore National Security, LLC. The views and opinions of authors expressed +// herein do not necessarily state or reflect those of the United States Government or +// Lawrence Livermore National Security, LLC, and shall not be used for advertising or // product endorsement purposes. /** @@ -58,12 +58,11 @@ #include "MemBackendUmpireImpl.hpp" #include "ExecPoliciesRajaOmpImpl.hpp" - namespace hiop { using hiop_raja_exec = ExecRajaPoliciesBackend::hiop_raja_exec; using hiop_raja_reduce = ExecRajaPoliciesBackend::hiop_raja_reduce; -} +} // namespace hiop #include "hiopVectorIntRajaImpl.hpp" @@ -71,8 +70,8 @@ namespace hiop { // -//Explicit instantiations: force compilation +// Explicit instantiations: force compilation // -template class hiopVectorIntRaja; -template class hiopVectorIntRaja; -} +template class hiopVectorIntRaja; +template class hiopVectorIntRaja; +} // namespace hiop diff --git a/src/LinAlg/hiopVectorIntSeq.cpp b/src/LinAlg/hiopVectorIntSeq.cpp index 6b68a6690..266cc1efe 100644 --- a/src/LinAlg/hiopVectorIntSeq.cpp +++ b/src/LinAlg/hiopVectorIntSeq.cpp @@ -3,47 +3,47 @@ // Written by Cosmin G. Petra, petra1@llnl.gov. // LLNL-CODE-742473. All rights reserved. // -// This file is part of HiOp. For details, see https://github.com/LLNL/hiop. HiOp -// is released under the BSD 3-clause license (https://opensource.org/licenses/BSD-3-Clause). +// This file is part of HiOp. For details, see https://github.com/LLNL/hiop. HiOp +// is released under the BSD 3-clause license (https://opensource.org/licenses/BSD-3-Clause). // Please also read “Additional BSD Notice” below. // -// Redistribution and use in source and binary forms, with or without modification, +// Redistribution and use in source and binary forms, with or without modification, // are permitted provided that the following conditions are met: -// i. Redistributions of source code must retain the above copyright notice, this list +// i. Redistributions of source code must retain the above copyright notice, this list // of conditions and the disclaimer below. -// ii. Redistributions in binary form must reproduce the above copyright notice, -// this list of conditions and the disclaimer (as noted below) in the documentation and/or +// ii. Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the disclaimer (as noted below) in the documentation and/or // other materials provided with the distribution. -// iii. Neither the name of the LLNS/LLNL nor the names of its contributors may be used to -// endorse or promote products derived from this software without specific prior written +// iii. Neither the name of the LLNS/LLNL nor the names of its contributors may be used to +// endorse or promote products derived from this software without specific prior written // permission. // -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES -// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT -// SHALL LAWRENCE LIVERMORE NATIONAL SECURITY, LLC, THE U.S. DEPARTMENT OF ENERGY OR -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS -// OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED -// AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT +// SHALL LAWRENCE LIVERMORE NATIONAL SECURITY, LLC, THE U.S. DEPARTMENT OF ENERGY OR +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +// OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED +// AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, // EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Additional BSD Notice -// 1. This notice is required to be provided under our contract with the U.S. Department -// of Energy (DOE). This work was produced at Lawrence Livermore National Laboratory under +// 1. This notice is required to be provided under our contract with the U.S. Department +// of Energy (DOE). This work was produced at Lawrence Livermore National Laboratory under // Contract No. DE-AC52-07NA27344 with the DOE. -// 2. Neither the United States Government nor Lawrence Livermore National Security, LLC -// nor any of their employees, makes any warranty, express or implied, or assumes any -// liability or responsibility for the accuracy, completeness, or usefulness of any +// 2. Neither the United States Government nor Lawrence Livermore National Security, LLC +// nor any of their employees, makes any warranty, express or implied, or assumes any +// liability or responsibility for the accuracy, completeness, or usefulness of any // information, apparatus, product, or process disclosed, or represents that its use would // not infringe privately-owned rights. -// 3. Also, reference herein to any specific commercial products, process, or services by -// trade name, trademark, manufacturer or otherwise does not necessarily constitute or -// imply its endorsement, recommendation, or favoring by the United States Government or -// Lawrence Livermore National Security, LLC. The views and opinions of authors expressed -// herein do not necessarily state or reflect those of the United States Government or -// Lawrence Livermore National Security, LLC, and shall not be used for advertising or +// 3. Also, reference herein to any specific commercial products, process, or services by +// trade name, trademark, manufacturer or otherwise does not necessarily constitute or +// imply its endorsement, recommendation, or favoring by the United States Government or +// Lawrence Livermore National Security, LLC. The views and opinions of authors expressed +// herein do not necessarily state or reflect those of the United States Government or +// Lawrence Livermore National Security, LLC, and shall not be used for advertising or // product endorsement purposes. /** @@ -56,12 +56,13 @@ #include "hiopVectorIntSeq.hpp" #include "MemBackendCppImpl.hpp" -#include //for memcpy +#include //for memcpy namespace hiop { -hiopVectorIntSeq::hiopVectorIntSeq(size_type sz) : hiopVectorInt(sz) +hiopVectorIntSeq::hiopVectorIntSeq(size_type sz) + : hiopVectorInt(sz) { buf_ = exec_space_.template alloc_array(sz_); } @@ -84,44 +85,44 @@ void hiopVectorIntSeq::copy_from_vectorseq(const hiopVectorIntSeq& src) assert(src.sz_ == sz_); exec_space_.copy(buf_, src.buf_, sz_, src.exec_space_); } - + void hiopVectorIntSeq::copy_to_vectorseq(hiopVectorIntSeq& src) const { assert(src.sz_ == sz_); src.exec_space_.copy(src.buf_, buf_, sz_, exec_space_); } - + void hiopVectorIntSeq::set_to_zero() { - for(index_type i=0; i& exec_space() const - { - return exec_space_; - } - ExecSpace& exec_space() - { - return exec_space_; - } + const ExecSpace& exec_space() const { return exec_space_; } + ExecSpace& exec_space() { return exec_space_; } + private: ExecSpace exec_space_; - index_type *buf_; + index_type* buf_; }; -} // namespace hiop +} // namespace hiop diff --git a/src/LinAlg/hiopVectorPar.cpp b/src/LinAlg/hiopVectorPar.cpp index d3587e22f..a7ad806b2 100644 --- a/src/LinAlg/hiopVectorPar.cpp +++ b/src/LinAlg/hiopVectorPar.cpp @@ -2,47 +2,47 @@ // Produced at the Lawrence Livermore National Laboratory (LLNL). // LLNL-CODE-742473. All rights reserved. // -// This file is part of HiOp. For details, see https://github.com/LLNL/hiop. HiOp -// is released under the BSD 3-clause license (https://opensource.org/licenses/BSD-3-Clause). +// This file is part of HiOp. For details, see https://github.com/LLNL/hiop. HiOp +// is released under the BSD 3-clause license (https://opensource.org/licenses/BSD-3-Clause). // Please also read “Additional BSD Notice” below. // -// Redistribution and use in source and binary forms, with or without modification, +// Redistribution and use in source and binary forms, with or without modification, // are permitted provided that the following conditions are met: -// i. Redistributions of source code must retain the above copyright notice, this list +// i. Redistributions of source code must retain the above copyright notice, this list // of conditions and the disclaimer below. -// ii. Redistributions in binary form must reproduce the above copyright notice, -// this list of conditions and the disclaimer (as noted below) in the documentation and/or +// ii. Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the disclaimer (as noted below) in the documentation and/or // other materials provided with the distribution. -// iii. Neither the name of the LLNS/LLNL nor the names of its contributors may be used to -// endorse or promote products derived from this software without specific prior written +// iii. Neither the name of the LLNS/LLNL nor the names of its contributors may be used to +// endorse or promote products derived from this software without specific prior written // permission. // -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES -// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT -// SHALL LAWRENCE LIVERMORE NATIONAL SECURITY, LLC, THE U.S. DEPARTMENT OF ENERGY OR -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS -// OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED -// AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT +// SHALL LAWRENCE LIVERMORE NATIONAL SECURITY, LLC, THE U.S. DEPARTMENT OF ENERGY OR +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +// OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED +// AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, // EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Additional BSD Notice -// 1. This notice is required to be provided under our contract with the U.S. Department -// of Energy (DOE). This work was produced at Lawrence Livermore National Laboratory under +// 1. This notice is required to be provided under our contract with the U.S. Department +// of Energy (DOE). This work was produced at Lawrence Livermore National Laboratory under // Contract No. DE-AC52-07NA27344 with the DOE. -// 2. Neither the United States Government nor Lawrence Livermore National Security, LLC -// nor any of their employees, makes any warranty, express or implied, or assumes any -// liability or responsibility for the accuracy, completeness, or usefulness of any +// 2. Neither the United States Government nor Lawrence Livermore National Security, LLC +// nor any of their employees, makes any warranty, express or implied, or assumes any +// liability or responsibility for the accuracy, completeness, or usefulness of any // information, apparatus, product, or process disclosed, or represents that its use would // not infringe privately-owned rights. -// 3. Also, reference herein to any specific commercial products, process, or services by -// trade name, trademark, manufacturer or otherwise does not necessarily constitute or -// imply its endorsement, recommendation, or favoring by the United States Government or -// Lawrence Livermore National Security, LLC. The views and opinions of authors expressed -// herein do not necessarily state or reflect those of the United States Government or -// Lawrence Livermore National Security, LLC, and shall not be used for advertising or +// 3. Also, reference herein to any specific commercial products, process, or services by +// trade name, trademark, manufacturer or otherwise does not necessarily constitute or +// imply its endorsement, recommendation, or favoring by the United States Government or +// Lawrence Livermore National Security, LLC. The views and opinions of authors expressed +// herein do not necessarily state or reflect those of the United States Government or +// Lawrence Livermore National Security, LLC, and shall not be used for advertising or // product endorsement purposes. #include "hiopVectorPar.hpp" @@ -51,7 +51,7 @@ #include "hiopCppStdUtils.hpp" #include -#include //for memcpy +#include //for memcpy #include #include #include @@ -65,26 +65,29 @@ namespace hiop { -hiopVectorPar::hiopVectorPar(const size_type& glob_n, index_type* col_part/*=NULL*/, MPI_Comm comm/*=MPI_COMM_NULL*/) - : comm_(comm) +hiopVectorPar::hiopVectorPar(const size_type& glob_n, index_type* col_part /*=NULL*/, MPI_Comm comm /*=MPI_COMM_NULL*/) + : comm_(comm) { n_ = glob_n; - assert(n_>=0); + assert(n_ >= 0); #ifdef HIOP_USE_MPI // if this is a serial vector, make sure it has a valid comm in the mpi case - if(comm_==MPI_COMM_NULL) comm_=MPI_COMM_SELF; + if(comm_ == MPI_COMM_NULL) comm_ = MPI_COMM_SELF; #endif - int P=0; + int P = 0; if(col_part) { #ifdef HIOP_USE_MPI - int ierr=MPI_Comm_rank(comm_, &P); assert(ierr==MPI_SUCCESS); + int ierr = MPI_Comm_rank(comm_, &P); + assert(ierr == MPI_SUCCESS); #endif - glob_il_=col_part[P]; glob_iu_=col_part[P+1]; - } else { - glob_il_=0; glob_iu_=n_; - } - n_local_=glob_iu_-glob_il_; + glob_il_ = col_part[P]; + glob_iu_ = col_part[P + 1]; + } else { + glob_il_ = 0; + glob_iu_ = n_; + } + n_local_ = glob_iu_ - glob_il_; data_ = exec_space_.template alloc_array(n_local_); } @@ -99,7 +102,7 @@ hiopVectorPar::hiopVectorPar(const hiopVectorPar& v) comm_ = v.comm_; data_ = exec_space_.template alloc_array(n_local_); } - + hiopVectorPar::~hiopVectorPar() { exec_space_.dealloc_array(data_); @@ -108,26 +111,28 @@ hiopVectorPar::~hiopVectorPar() hiopVector* hiopVectorPar::alloc_clone() const { - hiopVector* v = new hiopVectorPar(*this); assert(v); + hiopVector* v = new hiopVectorPar(*this); + assert(v); return v; } -hiopVector* hiopVectorPar::new_copy () const +hiopVector* hiopVectorPar::new_copy() const { - hiopVector* v = new hiopVectorPar(*this); assert(v); + hiopVector* v = new hiopVectorPar(*this); + assert(v); v->copyFrom(*this); return v; } void hiopVectorPar::setToZero() { - for(int i=0; i(select); const double* svec = s.data_; - for(int i=0; i(v_in); copy_from_vectorpar(v); @@ -156,17 +161,15 @@ void hiopVectorPar::copyFrom(const hiopVector& v_in ) void hiopVectorPar::copy_from_vectorpar(const hiopVectorPar& v) { - assert(n_local_==v.n_local_); - assert(glob_il_==v.glob_il_); assert(glob_iu_==v.glob_iu_); + assert(n_local_ == v.n_local_); + assert(glob_il_ == v.glob_il_); + assert(glob_iu_ == v.glob_iu_); exec_space_.copy(this->data_, v.data_, n_local_, v.exec_space_); } -void hiopVectorPar::copy_to_vectorpar(hiopVectorPar& vdest) const -{ - vdest.copy_from_vectorpar(*this); -} +void hiopVectorPar::copy_to_vectorpar(hiopVectorPar& vdest) const { vdest.copy_from_vectorpar(*this); } -void hiopVectorPar::copyFrom(const double* v_local_data ) +void hiopVectorPar::copyFrom(const double* v_local_data) { if(v_local_data) { exec_space_.copy(this->data_, v_local_data, n_local_); @@ -182,11 +185,11 @@ void hiopVectorPar::copy_from_w_pattern(const hiopVector& vv, const hiopVector& assert(n_local_ == ix.n_local_); const double* ix_vec = ix.data_; const double* v_vec = v.data_; - - for(index_type i=0; i(vv); assert(indexes.get_local_size() == n_local_); - + const index_type* index_arr = indexes.local_data_const(); size_type nv = v.get_local_size(); - for(index_type i=0; idata_[i] = v.data_[index_arr[i]]; } } @@ -213,7 +216,7 @@ void hiopVectorPar::copy_from_indexes(const double* vv, const hiopVectorInt& ind const hiopVectorIntSeq& indexes = dynamic_cast(index_in_src); const index_type* index_arr = indexes.local_data_const(); assert(indexes.get_local_size() == n_local_); - for(int i=0; idata_[i] = vv[index_arr[i]]; } } @@ -221,52 +224,51 @@ void hiopVectorPar::copy_from_indexes(const double* vv, const hiopVectorInt& ind void hiopVectorPar::copyFromStarting(int start_index_in_this, const double* v, int nv) { - assert(start_index_in_this+nv <= n_local_); - exec_space_.copy(data_+start_index_in_this, v, nv); + assert(start_index_in_this + nv <= n_local_); + exec_space_.copy(data_ + start_index_in_this, v, nv); } -void hiopVectorPar::copyFromStarting(int start_index/*_in_src*/,const hiopVector& v_) +void hiopVectorPar::copyFromStarting(int start_index /*_in_src*/, const hiopVector& v_) { #ifdef HIOP_DEEPCHECKS - assert(n_local_==n_ && "only for local/non-distributed vectors"); + assert(n_local_ == n_ && "only for local/non-distributed vectors"); #endif const hiopVectorPar& v = dynamic_cast(v_); - assert(start_index+v.n_local_ <= n_local_); - exec_space_.copy(data_+start_index, v.data_, v.n_local_, v.exec_space_); + assert(start_index + v.n_local_ <= n_local_); + exec_space_.copy(data_ + start_index, v.data_, v.n_local_, v.exec_space_); } void hiopVectorPar::copy_from_starting_at(const double* v, int start_index_in_global_v, int global_nv) { -// exec_space_.copy(data_, v+start_index_in_global_v, global_nv); -// return; + // exec_space_.copy(data_, v+start_index_in_global_v, global_nv); + // return; if(n_local_ > 0) { if(global_nv == n_local_ && n_local_ == n_) { - exec_space_.copy(data_, v+start_index_in_global_v, global_nv); + exec_space_.copy(data_, v + start_index_in_global_v, global_nv); } else { - if( glob_il_ <= start_index_in_global_v + global_nv && glob_iu_ >= start_index_in_global_v ) { - int local_start_idx = (glob_il_>=start_index_in_global_v)?0:start_index_in_global_v-glob_il_; - int local_end_idx = (glob_iu_<=start_index_in_global_v+global_nv)?glob_iu_-glob_il_:start_index_in_global_v-glob_il_; - exec_space_.copy(data_, v+local_start_idx, local_end_idx-local_start_idx); + if(glob_il_ <= start_index_in_global_v + global_nv && glob_iu_ >= start_index_in_global_v) { + int local_start_idx = (glob_il_ >= start_index_in_global_v) ? 0 : start_index_in_global_v - glob_il_; + int local_end_idx = + (glob_iu_ <= start_index_in_global_v + global_nv) ? glob_iu_ - glob_il_ : start_index_in_global_v - glob_il_; + exec_space_.copy(data_, v + local_start_idx, local_end_idx - local_start_idx); } } } } -void hiopVectorPar::startingAtCopyFromStartingAt(int start_idx_dest, - const hiopVector& v_in, - int start_idx_src) +void hiopVectorPar::startingAtCopyFromStartingAt(int start_idx_dest, const hiopVector& v_in, int start_idx_src) { size_type howManyToCopyDest = this->n_local_ - start_idx_dest; const hiopVectorPar& v = dynamic_cast(v_in); #ifdef HIOP_DEEPCHECKS - assert(n_local_==n_ && "only for local/non-distributed vectors"); - assert(v.n_local_==v.n_ && "only for local/non-distributed vectors"); + assert(n_local_ == n_ && "only for local/non-distributed vectors"); + assert(v.n_local_ == v.n_ && "only for local/non-distributed vectors"); #endif - assert((start_idx_dest>=0 && start_idx_destn_local_) || this->n_local_==0); - assert((start_idx_src>=0 && start_idx_src= 0 && start_idx_dest < this->n_local_) || this->n_local_ == 0); + assert((start_idx_src >= 0 && start_idx_src < v.n_local_) || v.n_local_ == 0 || v.n_local_ == start_idx_src); + size_type howManyToCopySrc = v.n_local_ - start_idx_src; if(howManyToCopyDest == 0 || howManyToCopySrc == 0) { return; @@ -274,28 +276,28 @@ void hiopVectorPar::startingAtCopyFromStartingAt(int start_idx_dest, assert(howManyToCopyDest <= howManyToCopySrc); - //just to be safe when not NDEBUG + // just to be safe when not NDEBUG if(howManyToCopyDest > howManyToCopySrc) howManyToCopyDest = howManyToCopySrc; - assert(howManyToCopyDest>=0); - exec_space_.copy(data_+start_idx_dest, v.data_+start_idx_src, howManyToCopyDest, v.exec_space_); + assert(howManyToCopyDest >= 0); + exec_space_.copy(data_ + start_idx_dest, v.data_ + start_idx_src, howManyToCopyDest, v.exec_space_); } void hiopVectorPar::copyToStarting(int start_index, hiopVector& v_) const { hiopVectorPar& v = dynamic_cast(v_); #ifdef HIOP_DEEPCHECKS - assert(n_local_==n_ && "are you sure you want to call this?"); + assert(n_local_ == n_ && "are you sure you want to call this?"); #endif - assert(start_index+v.n_local_ <= n_local_); - v.exec_space_.copy(v.data_, data_+start_index, v.n_local_, exec_space_); + assert(start_index + v.n_local_ <= n_local_); + v.exec_space_.copy(v.data_, data_ + start_index, v.n_local_, exec_space_); } /* Copy 'this' to dest starting at start_index in 'dest'. */ void hiopVectorPar::copyToStarting(hiopVector& dest, int start_global_index_in_dest) const { hiopVectorPar& v = dynamic_cast(dest); -// v.exec_space_.copy(v.data_+start_global_index_in_dest, data_, n_local_, exec_space_); -// return; + // v.exec_space_.copy(v.data_+start_global_index_in_dest, data_, n_local_, exec_space_); + // return; assert(start_global_index_in_dest + this->get_size() <= v.get_size()); if(n_local_ > 0) { @@ -305,15 +307,15 @@ void hiopVectorPar::copyToStarting(hiopVector& dest, int start_global_index_in_d } else if(start_global_index_in_dest >= v.glob_iu_) { start_local_index_in_dest = -1; } - if( start_local_index_in_dest >= 0) { + if(start_local_index_in_dest >= 0) { assert(n_local_ + start_local_index_in_dest <= v.n_local_); - v.exec_space_.copy(v.data_+start_local_index_in_dest, data_, n_local_, exec_space_); + v.exec_space_.copy(v.data_ + start_local_index_in_dest, data_, n_local_, exec_space_); } } } void hiopVectorPar::copyToStartingAt_w_pattern(hiopVector& v_, - index_type start_index/*_in_dest*/, + index_type start_index /*_in_dest*/, const hiopVector& select) const { hiopVectorPar& v = dynamic_cast(v_); @@ -321,135 +323,128 @@ void hiopVectorPar::copyToStartingAt_w_pattern(hiopVector& v_, assert(n_local_ == ix.n_local_); const double* ix_vec = ix.data_; int find_nnz = 0; - - for(index_type i=0; i copy into whole lambda array - for(int i=0; i copy into whole lambda array + for(int i = 0; i < c_size; ++i) { arr[c_map.local_data_host_const()[i]] = c_arr[i]; } - for(int i=0; i copy into whole lambda array - for(int i=0; i copy into whole lambda array + for(int i = 0; i < c_size; ++i) { c_arr[i] = arr[c_map.local_data_host_const()[i]]; } - for(int i=0; i=0, 'num_elems' will be copied; if num_elems<0, elements will be copied till the end of * either source ('this') or destination ('dest') is reached */ -void hiopVectorPar:: -startingAtCopyToStartingAt(index_type start_idx_in_src, - hiopVector& dest_, - index_type start_idx_dest, - size_type num_elems/*=-1*/) const +void hiopVectorPar::startingAtCopyToStartingAt(index_type start_idx_in_src, + hiopVector& dest_, + index_type start_idx_dest, + size_type num_elems /*=-1*/) const { #ifdef HIOP_DEEPCHECKS - assert(n_local_==n_ && "only for local/non-distributed vectors"); -#endif - assert(start_idx_in_src>=0 && start_idx_in_src<=this->n_local_); -#ifndef NDEBUG - if(start_idx_in_src==this->n_local_) assert((num_elems==-1 || num_elems==0)); + assert(n_local_ == n_ && "only for local/non-distributed vectors"); +#endif + assert(start_idx_in_src >= 0 && start_idx_in_src <= this->n_local_); +#ifndef NDEBUG + if(start_idx_in_src == this->n_local_) assert((num_elems == -1 || num_elems == 0)); #endif hiopVectorPar& dest = dynamic_cast(dest_); - assert(start_idx_dest>=0 && start_idx_dest<=dest.n_local_); -#ifndef NDEBUG - if(start_idx_dest==dest.n_local_) assert((num_elems==-1 || num_elems==0)); + assert(start_idx_dest >= 0 && start_idx_dest <= dest.n_local_); +#ifndef NDEBUG + if(start_idx_dest == dest.n_local_) assert((num_elems == -1 || num_elems == 0)); #endif - if(num_elems<0) { - num_elems = std::min(this->n_local_-start_idx_in_src, dest.n_local_-start_idx_dest); + if(num_elems < 0) { + num_elems = std::min(this->n_local_ - start_idx_in_src, dest.n_local_ - start_idx_dest); } else { - assert(num_elems+start_idx_in_src <= this->n_local_); - assert(num_elems+start_idx_dest <= dest.n_local_); - //make sure everything stays within bounds (in release) - num_elems = std::min(num_elems, (int)this->n_local_-start_idx_in_src); - num_elems = std::min(num_elems, (int)dest.n_local_-start_idx_dest); + assert(num_elems + start_idx_in_src <= this->n_local_); + assert(num_elems + start_idx_dest <= dest.n_local_); + // make sure everything stays within bounds (in release) + num_elems = std::min(num_elems, (int)this->n_local_ - start_idx_in_src); + num_elems = std::min(num_elems, (int)dest.n_local_ - start_idx_dest); } - dest.exec_space_.copy(dest.data_+start_idx_dest, - this->data_+start_idx_in_src, - num_elems, - this->exec_space_); + dest.exec_space_.copy(dest.data_ + start_idx_dest, this->data_ + start_idx_in_src, num_elems, this->exec_space_); } void hiopVectorPar::startingAtCopyToStartingAt_w_pattern(index_type start_idx_in_src, hiopVector& dest_, index_type start_idx_dest, const hiopVector& selec_dest, - size_type num_elems/*=-1*/) const + size_type num_elems /*=-1*/) const { - assert(start_idx_in_src>=0 && start_idx_in_src<=n_local_); + assert(start_idx_in_src >= 0 && start_idx_in_src <= n_local_); hiopVectorPar& dest = dynamic_cast(dest_); const hiopVectorPar& ix = dynamic_cast(selec_dest); - assert(start_idx_dest>=0 && start_idx_dest<=dest.n_local_); - if(num_elems<0) { - num_elems = std::min(n_local_-start_idx_in_src, dest.n_local_-start_idx_dest); + assert(start_idx_dest >= 0 && start_idx_dest <= dest.n_local_); + if(num_elems < 0) { + num_elems = std::min(n_local_ - start_idx_in_src, dest.n_local_ - start_idx_dest); } else { - assert(num_elems+start_idx_in_src <= this->n_local_); - assert(num_elems+start_idx_dest <= dest.n_local_); - //make sure everything stays within bounds (in release) - num_elems = std::min(num_elems, (int)this->n_local_-start_idx_in_src); - num_elems = std::min(num_elems, (int)dest.n_local_-start_idx_dest); + assert(num_elems + start_idx_in_src <= this->n_local_); + assert(num_elems + start_idx_dest <= dest.n_local_); + // make sure everything stays within bounds (in release) + num_elems = std::min(num_elems, (int)this->n_local_ - start_idx_in_src); + num_elems = std::min(num_elems, (int)dest.n_local_ - start_idx_dest); } int find_nnz = 0; const double* ix_vec = ix.data_; - for(int i=start_idx_dest; idata_, n_local_, exec_space_); } -double hiopVectorPar::twonorm() const +double hiopVectorPar::twonorm() const { - int one=1; int n=n_local_; + int one = 1; + int n = n_local_; double nrm = 0.; - if(n>0) { - nrm = DNRM2(&n,data_,&one); + if(n > 0) { + nrm = DNRM2(&n, data_, &one); } #ifdef HIOP_USE_MPI nrm *= nrm; double nrmG; - int ierr = MPI_Allreduce(&nrm, &nrmG, 1, MPI_DOUBLE, MPI_SUM, comm_); assert(MPI_SUCCESS==ierr); - nrm=sqrt(nrmG); -#endif + int ierr = MPI_Allreduce(&nrm, &nrmG, 1, MPI_DOUBLE, MPI_SUM, comm_); + assert(MPI_SUCCESS == ierr); + nrm = sqrt(nrmG); +#endif return nrm; } double hiopVectorPar::dotProductWith(const hiopVector& v_) const { const hiopVectorPar& v = dynamic_cast(v_); - int one=1; int n=n_local_; - assert(this->n_local_==v.n_local_); + int one = 1; + int n = n_local_; + assert(this->n_local_ == v.n_local_); double dotprod; - if(n>0) { + if(n > 0) { dotprod = DDOT(&n, this->data_, &one, v.data_, &one); } else { dotprod = 0.; } #ifdef HIOP_USE_MPI double dotprodG; - int ierr = MPI_Allreduce(&dotprod, &dotprodG, 1, MPI_DOUBLE, MPI_SUM, comm_); assert(MPI_SUCCESS==ierr); - dotprod=dotprodG; + int ierr = MPI_Allreduce(&dotprod, &dotprodG, 1, MPI_DOUBLE, MPI_SUM, comm_); + assert(MPI_SUCCESS == ierr); + dotprod = dotprodG; #endif return dotprod; @@ -500,20 +499,21 @@ double hiopVectorPar::dotProductWith(const hiopVector& v_) const double hiopVectorPar::infnorm() const { - assert(n_local_>=0); - double nrm=0.; - if(n_local_!=0) { - nrm=fabs(data_[0]); + assert(n_local_ >= 0); + double nrm = 0.; + if(n_local_ != 0) { + nrm = fabs(data_[0]); double aux; - - for(int i=1; inrm) nrm=aux; + + for(int i = 1; i < n_local_; i++) { + aux = fabs(data_[i]); + if(aux > nrm) nrm = aux; } } #ifdef HIOP_USE_MPI double nrm_glob; - int ierr = MPI_Allreduce(&nrm, &nrm_glob, 1, MPI_DOUBLE, MPI_MAX, comm_); assert(MPI_SUCCESS==ierr); + int ierr = MPI_Allreduce(&nrm, &nrm_glob, 1, MPI_DOUBLE, MPI_MAX, comm_); + assert(MPI_SUCCESS == ierr); return nrm_glob; #endif @@ -522,31 +522,30 @@ double hiopVectorPar::infnorm() const double hiopVectorPar::infnorm_local() const { - assert(n_local_>=0); - double nrm=0.; - if(n_local_>0) { - nrm = fabs(data_[0]); + assert(n_local_ >= 0); + double nrm = 0.; + if(n_local_ > 0) { + nrm = fabs(data_[0]); double aux; - - for(int i=1; inrm) nrm=aux; + + for(int i = 1; i < n_local_; i++) { + aux = fabs(data_[i]); + if(aux > nrm) nrm = aux; } } return nrm; } - double hiopVectorPar::onenorm() const { - double nrm1=0.; - for(int i=0; i(v_); - assert(n_local_==v.n_local_); - for(int i=0; i(v_); - assert(n_local_==v.n_local_); - for(int i=0; i(v_); - const hiopVectorPar& ix= dynamic_cast(ix_); + const hiopVectorPar& ix = dynamic_cast(ix_); #ifdef HIOP_DEEPCHECKS - assert(v.n_local_==n_local_); - assert(n_local_==ix.n_local_); + assert(v.n_local_ == n_local_); + assert(n_local_ == ix.n_local_); #endif - double *s=this->data_, *x=v.data_, *pattern=ix.data_; - for(int i=0; idata_, *x = v.data_, *pattern = ix.data_; + for(int i = 0; i < n_local_; i++) + if(pattern[i] == 0.0) + s[i] = 0.0; + else + s[i] /= x[i]; } void hiopVectorPar::component_min(const double constant) { - for(int i=0; iconstant) { + for(int i = 0; i < n_local_; i++) { + if(data_[i] > constant) { data_[i] = constant; } } @@ -603,9 +604,9 @@ void hiopVectorPar::component_min(const double constant) void hiopVectorPar::component_min(const hiopVector& v_) { const hiopVectorPar& v = dynamic_cast(v_); - assert(n_local_==v.n_local_); - for(int i=0; iv.data_[i]) { + assert(n_local_ == v.n_local_); + for(int i = 0; i < n_local_; i++) { + if(data_[i] > v.data_[i]) { data_[i] = v.data_[i]; } } @@ -613,8 +614,8 @@ void hiopVectorPar::component_min(const hiopVector& v_) void hiopVectorPar::component_max(const double constant) { - for(int i=0; i(v_); - assert(n_local_==v.n_local_); - for(int i=0; i(sign); + data_[i] = static_cast(sign); } } void hiopVectorPar::component_sqrt() { - for(int i=0; i=0); - data_[i] = std::sqrt(data_[i]); + for(int i = 0; i < n_local_; i++) { + assert(data_[i] >= 0); + data_[i] = std::sqrt(data_[i]); } } void hiopVectorPar::scale(double num) { - if(1.0==num) return; - int one=1; int n=n_local_; + if(1.0 == num) return; + int one = 1; + int n = n_local_; DSCAL(&n, &num, data_, &one); } @@ -666,7 +668,7 @@ void hiopVectorPar::axpy(double alpha, const hiopVector& x_in) const hiopVectorPar& x = dynamic_cast(x_in); int one = 1; int n = n_local_; - DAXPY( &n, &alpha, x.data_, &one, data_, &one ); + DAXPY(&n, &alpha, x.data_, &one, data_, &one); } /// @brief Performs axpy, this += alpha*x, on the indexes in this specified by i. @@ -680,12 +682,11 @@ void hiopVectorPar::axpy(double alpha, const hiopVector& x, const hiopVectorInt& const double* xd = xx.local_data_const(); const index_type* id = idxs.local_data_const(); - - for(index_type j=0; j(x_); const hiopVectorPar& vz = dynamic_cast(z_); #ifdef HIOP_DEEPCHECKS - assert(vx.n_local_==vz.n_local_); - assert( n_local_==vz.n_local_); -#endif + assert(vx.n_local_ == vz.n_local_); + assert(n_local_ == vz.n_local_); +#endif // this += alpha * x * z (data+=alpha*x*z) - const double *x = vx.local_data_const(), *z=vz.local_data_const(); + const double *x = vx.local_data_const(), *z = vz.local_data_const(); - if(alpha==1.0) { - for(int i=0; i(x_); const hiopVectorPar& vz = dynamic_cast(z_); #ifdef HIOP_DEEPCHECKS - assert(vx.n_local_==vz.n_local_); - assert( n_local_==vz.n_local_); -#endif - // this += alpha * x / z - const double *x = vx.local_data_const(), *z=vz.local_data_const(); + assert(vx.n_local_ == vz.n_local_); + assert(n_local_ == vz.n_local_); +#endif + // this += alpha * x / z + const double *x = vx.local_data_const(), *z = vz.local_data_const(); if(alpha == 1.0) { - - for(int i=0; i(x_); const hiopVectorPar& vz = dynamic_cast(z_); - const hiopVectorPar& sel= dynamic_cast(select); + const hiopVectorPar& sel = dynamic_cast(select); #ifdef HIOP_DEEPCHECKS - assert(vx.n_local_==vz.n_local_); - assert( n_local_==vz.n_local_); -#endif + assert(vx.n_local_ == vz.n_local_); + assert(n_local_ == vz.n_local_); +#endif // this += alpha * x / z (y+=alpha*x/z) - double*y = data_; - const double *x = vx.local_data_const(), *z=vz.local_data_const(), *s=sel.local_data_const(); + double* y = data_; + const double *x = vx.local_data_const(), *z = vz.local_data_const(), *s = sel.local_data_const(); int it; - if(alpha==1.0) { - for(it=0;it(ix_); assert(this->n_local_ == ix.n_local_); const double* ix_vec = ix.data_; - for(int i=0; i::max(); - for(int i=0; i(select); assert(this->n_local_ == ix.n_local_); - + double ret_val = std::numeric_limits::max(); const double* ix_vec = ix.data_; - for(int i=0; i(select); assert(this->n_local_ == ix.n_local_); const double* ix_vec = ix.data_; - for(int i=0; in_local_ == dynamic_cast(ix).n_local_); - assert(this->n_local_ == dynamic_cast( x).n_local_); + assert(this->n_local_ == dynamic_cast(x).n_local_); #endif const double* ix_vec = dynamic_cast(ix).data_; - const double* x_vec = dynamic_cast( x).data_; + const double* x_vec = dynamic_cast(x).data_; - for(int i=0; i(ixleft)).local_data_const(); - const double* ixr= (dynamic_cast(ixright)).local_data_const(); + const double* ixl = (dynamic_cast(ixleft)).local_data_const(); + const double* ixr = (dynamic_cast(ixright)).local_data_const(); #ifdef HIOP_DEEPCHECKS - assert(n_local_==(dynamic_cast(ixleft) ).n_local_); - assert(n_local_==(dynamic_cast(ixright) ).n_local_); + assert(n_local_ == (dynamic_cast(ixleft)).n_local_); + assert(n_local_ == (dynamic_cast(ixright)).n_local_); #endif - double term=0.0; - for(size_type i=0; i(ixleft)).local_data_const(); - const double* ixr= (dynamic_cast(ixright)).local_data_const(); + const double* ixl = (dynamic_cast(ixleft)).local_data_const(); + const double* ixr = (dynamic_cast(ixright)).local_data_const(); double* v = this->local_data(); #ifdef HIOP_DEEPCHECKS - assert(n_local_==(dynamic_cast(ixleft) ).n_local_); - assert(n_local_==(dynamic_cast(ixright) ).n_local_); + assert(n_local_ == (dynamic_cast(ixleft)).n_local_); + assert(n_local_ == (dynamic_cast(ixright)).n_local_); #endif - for(index_type i=0; i(xl_) ).n_local_==n_local_); - assert((dynamic_cast(ixl_)).n_local_==n_local_); - assert((dynamic_cast(xu_) ).n_local_==n_local_); - assert((dynamic_cast(ixu_)).n_local_==n_local_); + assert((dynamic_cast(xl_)).n_local_ == n_local_); + assert((dynamic_cast(ixl_)).n_local_ == n_local_); + assert((dynamic_cast(xu_)).n_local_ == n_local_); + assert((dynamic_cast(ixu_)).n_local_ == n_local_); #endif - const double* xl = (dynamic_cast(xl_) ).local_data_const(); - const double* ixl= (dynamic_cast(ixl_)).local_data_const(); - const double* xu = (dynamic_cast(xu_) ).local_data_const(); - const double* ixu= (dynamic_cast(ixu_)).local_data_const(); - double* x0=data_; + const double* xl = (dynamic_cast(xl_)).local_data_const(); + const double* ixl = (dynamic_cast(ixl_)).local_data_const(); + const double* xu = (dynamic_cast(xu_)).local_data_const(); + const double* ixu = (dynamic_cast(ixu_)).local_data_const(); + double* x0 = data_; const double small_double = std::numeric_limits::min() * 100; double aux, aux2; - for(size_type i=0; ixu[i]) return false; - aux=kappa2*(xu[i]-xl[i])-small_double; - aux2=xl[i]+fmin(kappa1*fmax(1., fabs(xl[i])),aux); - if(x0[i] xu[i]) return false; + aux = kappa2 * (xu[i] - xl[i]) - small_double; + aux2 = xl[i] + fmin(kappa1 * fmax(1., fabs(xl[i])), aux); + if(x0[i] < aux2) { + x0[i] = aux2; } else { - aux2=xu[i]-fmin(kappa1*fmax(1., fabs(xu[i])),aux); - if(x0[i]>aux2) { - x0[i]=aux2; + aux2 = xu[i] - fmin(kappa1 * fmax(1., fabs(xu[i])), aux); + if(x0[i] > aux2) { + x0[i] = aux2; } } #ifdef HIOP_DEEPCHECKS - //if(x0[i]>xl[i] && x0[i]xl[i] && x0[i] HiOp bug"); + // if(x0[i]>xl[i] && x0[i] xl[i] && x0[i] < xu[i] && "this should not happen -> HiOp bug"); #endif } else { - if(ixl[i]!=0.) - x0[i] = fmax(x0[i], xl[i]+kappa1*fmax(1, fabs(xl[i]))-small_double); - else - if(ixu[i]!=0) - x0[i] = fmin(x0[i], xu[i]-kappa1*fmax(1, fabs(xu[i]))-small_double); - else { /*nothing for free vars */ } + if(ixl[i] != 0.) + x0[i] = fmax(x0[i], xl[i] + kappa1 * fmax(1, fabs(xl[i])) - small_double); + else if(ixu[i] != 0) + x0[i] = fmin(x0[i], xu[i] - kappa1 * fmax(1, fabs(xu[i])) - small_double); + else { /*nothing for free vars */ + } } } return true; } /* max{a\in(0,1]| x+ad >=(1-tau)x} */ -double hiopVectorPar::fractionToTheBdry_local(const hiopVector& dx, const double& tau) const +double hiopVectorPar::fractionToTheBdry_local(const hiopVector& dx, const double& tau) const { #ifdef HIOP_DEEPCHECKS - assert((dynamic_cast(dx) ).n_local_==n_local_); - assert(tau>0); - assert(tau<1); + assert((dynamic_cast(dx)).n_local_ == n_local_); + assert(tau > 0); + assert(tau < 1); #endif - double alpha=1.0, aux; - const double* d = (dynamic_cast(dx) ).local_data_const(); + double alpha = 1.0, aux; + const double* d = (dynamic_cast(dx)).local_data_const(); const double* x = data_; - for(int i=0; i0); + assert(x[i] > 0); #endif - if(d[i]>=0) continue; - aux = -tau*x[i]/d[i]; - if(aux= 0) continue; + aux = -tau * x[i] / d[i]; + if(aux < alpha) alpha = aux; } return alpha; } /* max{a\in(0,1]| x+ad >=(1-tau)x} */ -double hiopVectorPar:: -fractionToTheBdry_w_pattern_local(const hiopVector& dx, const double& tau, const hiopVector& ix) const +double hiopVectorPar::fractionToTheBdry_w_pattern_local(const hiopVector& dx, const double& tau, const hiopVector& ix) const { #ifdef HIOP_DEEPCHECKS - assert((dynamic_cast(dx) ).n_local_==n_local_); - assert((dynamic_cast(ix) ).n_local_==n_local_); - assert(tau>0); - assert(tau<1); + assert((dynamic_cast(dx)).n_local_ == n_local_); + assert((dynamic_cast(ix)).n_local_ == n_local_); + assert(tau > 0); + assert(tau < 1); #endif - double alpha=1.0, aux; - const double* d = (dynamic_cast(dx) ).local_data_const(); + double alpha = 1.0, aux; + const double* d = (dynamic_cast(dx)).local_data_const(); const double* x = data_; - const double* pat = (dynamic_cast(ix) ).local_data_const(); - for(int i=0; i=0) continue; - if(pat[i]==0) continue; + const double* pat = (dynamic_cast(ix)).local_data_const(); + for(int i = 0; i < n_local_; i++) { + if(d[i] >= 0) continue; + if(pat[i] == 0) continue; #ifdef HIOP_DEEPCHECKS - assert(x[i]>0); + assert(x[i] > 0); #endif - aux = -tau*x[i]/d[i]; - if(aux(ix_) ).n_local_==n_local_); + assert((dynamic_cast(ix_)).n_local_ == n_local_); #endif - const double* ix = (dynamic_cast(ix_) ).local_data_const(); - double* x=data_; - for(int i=0; i(ix_)).local_data_const(); + double* x = data_; + for(int i = 0; i < n_local_; i++) + if(ix[i] == 0.0) x[i] = 0.0; } bool hiopVectorPar::matchesPattern(const hiopVector& ix_) { #ifdef HIOP_DEEPCHECKS - assert((dynamic_cast(ix_) ).n_local_==n_local_); + assert((dynamic_cast(ix_)).n_local_ == n_local_); #endif - const double* ix = (dynamic_cast(ix_) ).local_data_const(); - int bmatches=true; - double* x=data_; - for(int i=0; (i(ix_)).local_data_const(); + int bmatches = true; + double* x = data_; + for(int i = 0; (i < n_local_) && bmatches; i++) { + if(ix[i] == 0.0 && x[i] != 0.0) { + bmatches = false; } } #ifdef HIOP_USE_MPI - int bmatches_glob=bmatches; - int ierr=MPI_Allreduce(&bmatches, &bmatches_glob, 1, MPI_INT, MPI_LAND, comm_); - assert(MPI_SUCCESS==ierr); + int bmatches_glob = bmatches; + int ierr = MPI_Allreduce(&bmatches, &bmatches_glob, 1, MPI_INT, MPI_LAND, comm_); + assert(MPI_SUCCESS == ierr); return bmatches_glob; #endif return bmatches; @@ -1095,51 +1093,50 @@ bool hiopVectorPar::matchesPattern(const hiopVector& ix_) int hiopVectorPar::allPositive_w_patternSelect(const hiopVector& w_) { #ifdef HIOP_DEEPCHECKS - assert((dynamic_cast(w_) ).n_local_==n_local_); -#endif - const double* w = (dynamic_cast(w_) ).local_data_const(); - const double* x=data_; - int allPos=1; - for(int i=0; i(w_)).n_local_ == n_local_); +#endif + const double* w = (dynamic_cast(w_)).local_data_const(); + const double* x = data_; + int allPos = 1; + for(int i = 0; i < n_local_ && allPos; i++) { + if(w[i] != 0.0 && x[i] <= 0.) { + allPos = 0; } } #ifdef HIOP_USE_MPI - int allPosG=allPos; + int allPosG = allPos; int ierr = MPI_Allreduce(&allPos, &allPosG, 1, MPI_INT, MPI_MIN, comm_); - assert(MPI_SUCCESS==ierr); + assert(MPI_SUCCESS == ierr); return allPosG; -#endif +#endif return allPos; } -void hiopVectorPar::adjustDuals_plh(const hiopVector& x_, - const hiopVector& ix_, - const double& mu, - const double& kappa) +void hiopVectorPar::adjustDuals_plh(const hiopVector& x_, const hiopVector& ix_, const double& mu, const double& kappa) { #ifdef HIOP_DEEPCHECKS - assert((dynamic_cast(x_) ).n_local_==n_local_); - assert((dynamic_cast(ix_)).n_local_==n_local_); + assert((dynamic_cast(x_)).n_local_ == n_local_); + assert((dynamic_cast(ix_)).n_local_ == n_local_); #endif - const double* x = (dynamic_cast(x_ )).local_data_const(); + const double* x = (dynamic_cast(x_)).local_data_const(); const double* ix = (dynamic_cast(ix_)).local_data_const(); - double* z=data_; //the dual - double a,b; - for(size_type i=0; i=b - if(a<=b) { - *z=b; - } else { //a>b - if(a<*z) { - *z=a; + double* z = data_; // the dual + double a, b; + for(size_type i = 0; i < n_local_; i++) { + if(ix[i] == 1.) { + a = mu / x[i]; + b = a / kappa; + a = a * kappa; + if(*z < b) { + *z = b; + } else { // z[i]>=b + if(a <= b) { + *z = b; + } else { // a>b + if(a < *z) { + *z = a; } - //else a>=z[i] then *z=*z (z[i] does not need adjustment) + // else a>=z[i] then *z=*z (z[i] does not need adjustment) } } } @@ -1151,14 +1148,15 @@ bool hiopVectorPar::is_zero() const { int all_zero = true; int i{0}; - while(i=0) { - int err = MPI_Comm_rank(comm_, &myrank_); assert(err==MPI_SUCCESS); - err = MPI_Comm_size(comm_, &numranks); assert(err==MPI_SUCCESS); + if(rank >= 0) { + int err = MPI_Comm_rank(comm_, &myrank_); + assert(err == MPI_SUCCESS); + err = MPI_Comm_size(comm_, &numranks); + assert(err == MPI_SUCCESS); } #endif - if(myrank_==rank || rank==-1) { - if(max_elems>n_local_) max_elems=n_local_; + if(myrank_ == rank || rank == -1) { + if(max_elems > n_local_) max_elems = n_local_; - if(nullptr==msg) { - if(numranks>1){ + if(nullptr == msg) { + if(numranks > 1) { fprintf(file, "vector of size %d, printing %d elems (on rank=%d)\n", n_, max_elems, myrank_); - } - else{ + } else { fprintf(file, "vector of size %d, printing %d elems (serial)\n", n_, max_elems); } } else { fprintf(file, "%s ", msg); - } + } fprintf(file, "=["); - max_elems = max_elems>=0?max_elems:n_local_; - for(int it=0; it= 0 ? max_elems : n_local_; + for(int it = 0; it < max_elems; it++) { fprintf(file, "%24.18e ; ", data_[it]); } fprintf(file, "];\n"); } } - -size_type hiopVectorPar::numOfElemsLessThan(const double &val) const +size_type hiopVectorPar::numOfElemsLessThan(const double& val) const { size_type ret_num = 0; - for(size_type i=0; i= 0 && start_src >= 0); - const index_type how_many = end-start; - if(how_many>0) { + const index_type how_many = end - start; + if(how_many > 0) { ExecSpace exec_space_dest; - exec_space_dest.copy(arr+start, arr_src+start_src, how_many, exec_space_); + exec_space_dest.copy(arr + start, arr_src + start_src, how_many, exec_space_); } } -void hiopVectorPar::set_array_from_to(hiopInterfaceBase::NonlinearityType* arr, - const int start, - const int end, +void hiopVectorPar::set_array_from_to(hiopInterfaceBase::NonlinearityType* arr, + const int start, + const int end, const hiopInterfaceBase::NonlinearityType arr_src) const { assert(end <= n_local_ && start <= end); - for(int i=start; i=0, 'num_elems' will be copied; if num_elems<0, elements will be copied till the end of * either source ('this') or destination ('dest') is reached @@ -171,13 +169,13 @@ class hiopVectorPar : public hiopVector virtual void startingAtCopyToStartingAt(index_type start_idx_in_src, hiopVector& dest, index_type start_idx_dest, - size_type num_elems=-1) const; + size_type num_elems = -1) const; virtual void startingAtCopyToStartingAt_w_pattern(index_type start_idx_in_src, hiopVector& dest, index_type start_idx_dest, const hiopVector& selec_dest, - size_type num_elems=-1) const; + size_type num_elems = -1) const; virtual double twonorm() const; virtual double dotProductWith(const hiopVector& vec) const; @@ -185,9 +183,9 @@ class hiopVectorPar : public hiopVector virtual double infnorm_local() const; virtual double onenorm() const; virtual double onenorm_local() const; - virtual void componentMult( const hiopVector& v ); - virtual void componentDiv ( const hiopVector& v ); - virtual void componentDiv_w_selectPattern( const hiopVector& v, const hiopVector& ix); + virtual void componentMult(const hiopVector& v); + virtual void componentDiv(const hiopVector& v); + virtual void componentDiv_w_selectPattern(const hiopVector& v, const hiopVector& ix); virtual void component_min(const double constant); virtual void component_min(const hiopVector& vec); virtual void component_max(const double constant); @@ -196,18 +194,18 @@ class hiopVectorPar : public hiopVector virtual void component_sgn(); virtual void component_sqrt(); - virtual void scale( double alpha ); + virtual void scale(double alpha); /// @brief this += alpha * x - virtual void axpy ( double alpha, const hiopVector& x ); + virtual void axpy(double alpha, const hiopVector& x); /// @brief this += alpha * x, for the entries in 'this' where corresponding 'select' is nonzero. virtual void axpy_w_pattern(double alpha, const hiopVector& xvec, const hiopVector& select); /** * @brief Performs axpy, this += alpha*x, on the indexes in this specified by i. - * - * @param alpha scaling factor + * + * @param alpha scaling factor * @param x vector of doubles to be axpy-ed to this (size equal to size of i and less than or equal to size of this) - * @param i vector of indexes in this to which the axpy operation is performed (size equal to size of x and less than + * @param i vector of indexes in this to which the axpy operation is performed (size equal to size of x and less than * or equal to size of this) * * @pre The entries of i must be valid (zero-based) indexes in this @@ -216,13 +214,10 @@ class hiopVectorPar : public hiopVector virtual void axpy(double alpha, const hiopVector& xvec, const hiopVectorInt& i); /// @brief this += alpha * x * z - virtual void axzpy (double alpha, const hiopVector& xvec, const hiopVector& zvec); + virtual void axzpy(double alpha, const hiopVector& xvec, const hiopVector& zvec); /// @brief this += alpha * x / z virtual void axdzpy(double alpha, const hiopVector& xvec, const hiopVector& zvec); - virtual void axdzpy_w_pattern(double alpha, - const hiopVector& xvec, - const hiopVector& zvec, - const hiopVector& select); + virtual void axdzpy_w_pattern(double alpha, const hiopVector& xvec, const hiopVector& zvec, const hiopVector& select); /// @brief Add c to the elements of this virtual void addConstant(double c); virtual void addConstant_w_patternSelect(double c, const hiopVector& select); @@ -240,16 +235,16 @@ class hiopVectorPar : public hiopVector const double& mu, const double& kappa_d) const; - /** - * Performs `this[i] = alpha*this[i] + sign*ct` where sign=1 when EXACTLY one of - * ixleft[i] and ixright[i] is 1.0 and sign=0 otherwise. + /** + * Performs `this[i] = alpha*this[i] + sign*ct` where sign=1 when EXACTLY one of + * ixleft[i] and ixright[i] is 1.0 and sign=0 otherwise. * * Supports distributed/MPI vectors, but performs only elementwise operations and do not * require communication. * * This method is used to add gradient contributions from the (linear) damping term used - * to handle unbounded problems. The damping terms are used for variables that are - * bounded on one side only. + * to handle unbounded problems. The damping terms are used for variables that are + * bounded on one side only. */ virtual void addLinearDampingTerm(const hiopVector& ixleft, const hiopVector& ixright, @@ -258,33 +253,28 @@ class hiopVectorPar : public hiopVector virtual int allPositive(); virtual int allPositive_w_patternSelect(const hiopVector& select); - virtual bool projectIntoBounds_local(const hiopVector& xl, + virtual bool projectIntoBounds_local(const hiopVector& xl, const hiopVector& ixl, const hiopVector& xu, const hiopVector& ixu, double kappa1, double kappa2); virtual double fractionToTheBdry_local(const hiopVector& dvec, const double& tau) const; - virtual double fractionToTheBdry_w_pattern_local(const hiopVector& dvec, - const double& tau, - const hiopVector& ix) const; + virtual double fractionToTheBdry_w_pattern_local(const hiopVector& dvec, const double& tau, const hiopVector& ix) const; virtual void selectPattern(const hiopVector& select); virtual bool matchesPattern(const hiopVector& select); virtual hiopVector* alloc_clone() const; - virtual hiopVector* new_copy () const; + virtual hiopVector* new_copy() const; - virtual void adjustDuals_plh(const hiopVector& x, - const hiopVector& ix, - const double& mu, - const double& kappa); + virtual void adjustDuals_plh(const hiopVector& x, const hiopVector& ix, const double& mu, const double& kappa); virtual bool is_zero() const; virtual bool isnan_local() const; virtual bool isinf_local() const; virtual bool isfinite_local() const; - - virtual void print(FILE* file=nullptr, const char* message=nullptr,int max_elems=-1, int rank=-1) const; + + virtual void print(FILE* file = nullptr, const char* message = nullptr, int max_elems = -1, int rank = -1) const; /* more accessors */ virtual size_type get_local_size() const { return n_local_; } @@ -293,18 +283,18 @@ class hiopVectorPar : public hiopVector virtual MPI_Comm get_mpi_comm() const { return comm_; } virtual inline double* local_data_host() { return local_data(); } virtual inline const double* local_data_host_const() const { return local_data_const(); } - - virtual size_type numOfElemsLessThan(const double &val) const; - virtual size_type numOfElemsAbsLessThan(const double &val) const; - virtual void set_array_from_to(hiopInterfaceBase::NonlinearityType* arr, - const int start, - const int end, + virtual size_type numOfElemsLessThan(const double& val) const; + virtual size_type numOfElemsAbsLessThan(const double& val) const; + + virtual void set_array_from_to(hiopInterfaceBase::NonlinearityType* arr, + const int start, + const int end, const hiopInterfaceBase::NonlinearityType* arr_src, const int start_src) const; - virtual void set_array_from_to(hiopInterfaceBase::NonlinearityType* arr, - const int start, - const int end, + virtual void set_array_from_to(hiopInterfaceBase::NonlinearityType* arr, + const int start, + const int end, const hiopInterfaceBase::NonlinearityType arr_src) const; virtual bool is_equal(const hiopVector& vec) const; @@ -312,18 +302,12 @@ class hiopVectorPar : public hiopVector /** * @brief accessor to the execution policy */ - ExecSpace& exec_space() - { - return exec_space_; - } + ExecSpace& exec_space() { return exec_space_; } /** * @brief accessor to the execution policy */ - const ExecSpace& exec_space() const - { - return exec_space_; - } + const ExecSpace& exec_space() const { return exec_space_; } protected: ExecSpace exec_space_; @@ -331,10 +315,10 @@ class hiopVectorPar : public hiopVector double* data_; size_type glob_il_, glob_iu_; size_type n_local_; + private: /// @brief copy constructor, for internal/private use only (it doesn't copy the elements.) hiopVectorPar(const hiopVectorPar&); - }; -} +} // namespace hiop diff --git a/src/LinAlg/hiopVectorRaja.hpp b/src/LinAlg/hiopVectorRaja.hpp index 31cd4a2e9..8541c0e03 100644 --- a/src/LinAlg/hiopVectorRaja.hpp +++ b/src/LinAlg/hiopVectorRaja.hpp @@ -2,47 +2,47 @@ // Produced at the Lawrence Livermore National Laboratory (LLNL). // LLNL-CODE-742473. All rights reserved. // -// This file is part of HiOp. For details, see https://github.com/LLNL/hiop. HiOp -// is released under the BSD 3-clause license (https://opensource.org/licenses/BSD-3-Clause). +// This file is part of HiOp. For details, see https://github.com/LLNL/hiop. HiOp +// is released under the BSD 3-clause license (https://opensource.org/licenses/BSD-3-Clause). // Please also read "Additional BSD Notice" below. // -// Redistribution and use in source and binary forms, with or without modification, +// Redistribution and use in source and binary forms, with or without modification, // are permitted provided that the following conditions are met: -// i. Redistributions of source code must retain the above copyright notice, this list +// i. Redistributions of source code must retain the above copyright notice, this list // of conditions and the disclaimer below. -// ii. Redistributions in binary form must reproduce the above copyright notice, -// this list of conditions and the disclaimer (as noted below) in the documentation and/or +// ii. Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the disclaimer (as noted below) in the documentation and/or // other materials provided with the distribution. -// iii. Neither the name of the LLNS/LLNL nor the names of its contributors may be used to -// endorse or promote products derived from this software without specific prior written +// iii. Neither the name of the LLNS/LLNL nor the names of its contributors may be used to +// endorse or promote products derived from this software without specific prior written // permission. // -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES -// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT -// SHALL LAWRENCE LIVERMORE NATIONAL SECURITY, LLC, THE U.S. DEPARTMENT OF ENERGY OR -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS -// OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED -// AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT +// SHALL LAWRENCE LIVERMORE NATIONAL SECURITY, LLC, THE U.S. DEPARTMENT OF ENERGY OR +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +// OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED +// AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, // EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Additional BSD Notice -// 1. This notice is required to be provided under our contract with the U.S. Department -// of Energy (DOE). This work was produced at Lawrence Livermore National Laboratory under +// 1. This notice is required to be provided under our contract with the U.S. Department +// of Energy (DOE). This work was produced at Lawrence Livermore National Laboratory under // Contract No. DE-AC52-07NA27344 with the DOE. -// 2. Neither the United States Government nor Lawrence Livermore National Security, LLC -// nor any of their employees, makes any warranty, express or implied, or assumes any -// liability or responsibility for the accuracy, completeness, or usefulness of any +// 2. Neither the United States Government nor Lawrence Livermore National Security, LLC +// nor any of their employees, makes any warranty, express or implied, or assumes any +// liability or responsibility for the accuracy, completeness, or usefulness of any // information, apparatus, product, or process disclosed, or represents that its use would // not infringe privately-owned rights. -// 3. Also, reference herein to any specific commercial products, process, or services by -// trade name, trademark, manufacturer or otherwise does not necessarily constitute or -// imply its endorsement, recommendation, or favoring by the United States Government or -// Lawrence Livermore National Security, LLC. The views and opinions of authors expressed -// herein do not necessarily state or reflect those of the United States Government or -// Lawrence Livermore National Security, LLC, and shall not be used for advertising or +// 3. Also, reference herein to any specific commercial products, process, or services by +// trade name, trademark, manufacturer or otherwise does not necessarily constitute or +// imply its endorsement, recommendation, or favoring by the United States Government or +// Lawrence Livermore National Security, LLC. The views and opinions of authors expressed +// herein do not necessarily state or reflect those of the United States Government or +// Lawrence Livermore National Security, LLC, and shall not be used for advertising or // product endorsement purposes. /** @@ -73,40 +73,40 @@ namespace hiop { -//forward declarations of the test classes that are friends to this class +// forward declarations of the test classes that are friends to this class namespace tests { class VectorTestsRajaPar; class MatrixTestsRajaDense; class MatrixTestsRajaSparseTriplet; class MatrixTestsRajaSymSparseTriplet; -} - +} // namespace tests + template class hiopVectorRaja : public hiopVector { public: - hiopVectorRaja(const size_type& glob_n, std::string mem_space, index_type* col_part=NULL, MPI_Comm comm=MPI_COMM_SELF); + hiopVectorRaja(const size_type& glob_n, std::string mem_space, index_type* col_part = NULL, MPI_Comm comm = MPI_COMM_SELF); hiopVectorRaja() = delete; virtual ~hiopVectorRaja(); virtual void setToZero(); - virtual void setToConstant( double c ); + virtual void setToConstant(double c); virtual void set_to_random_uniform(double minv, double maxv); virtual void setToConstant_w_patternSelect(double c, const hiopVector& select); virtual void copyFrom(const hiopVector& vec); - virtual void copyFrom(const double* local_array); //v should be of length at least n_local + virtual void copyFrom(const double* local_array); // v should be of length at least n_local virtual void copy_from_w_pattern(const hiopVector& src, const hiopVector& select); /// @brief Copy entries from a hiopVectorPar, see method documentation in the parent class. void copy_from_vectorpar(const hiopVectorPar& vsrc); /// @brief Copy entries to a hiopVectorPar, see method documentation in the parent class. void copy_to_vectorpar(hiopVectorPar& vdest) const; - + /** - * @brief Copy from src the elements specified by the indices in index_in_src. + * @brief Copy from src the elements specified by the indices in index_in_src. * - * @pre All vectors must reside in the same memory space. + * @pre All vectors must reside in the same memory space. * @pre Size of src must be greater or equal than size of this * @pre Size of index_in_src must be equal to size of this * @pre Elements of index_in_src must be valid (zero-based) indexes in src @@ -115,9 +115,9 @@ class hiopVectorRaja : public hiopVector virtual void copy_from_indexes(const hiopVector& src, const hiopVectorInt& index_in_src); /** - * @brief Copy from src the elements specified by the indices in index_in_src. + * @brief Copy from src the elements specified by the indices in index_in_src. * - * @pre All vectors must reside in the same memory space. + * @pre All vectors must reside in the same memory space. * @pre Size of src must be greater or equal than size of this * @pre Size of index_in_src must be equal to size of this * @pre Elements of index_in_src must be valid (zero-based) indexes in src @@ -125,9 +125,9 @@ class hiopVectorRaja : public hiopVector */ /** - * @brief Copy from src the elements specified by the indices in index_in_src. + * @brief Copy from src the elements specified by the indices in index_in_src. * - * @pre All vectors and arrays must reside in the same memory space. + * @pre All vectors and arrays must reside in the same memory space. * @pre Size of src must be greater or equal than size of this * @pre Size of index_in_src must be equal to size of this * @pre Elements of index_in_src must be valid (zero-based) indexes in src @@ -147,45 +147,43 @@ class hiopVectorRaja : public hiopVector virtual void copyToStarting(int start_index, hiopVector& dst) const; /* Copy 'this' to v starting at start_index in 'v'. */ virtual void copyToStarting(hiopVector& vec, int start_index_in_dest) const; - virtual void copyToStartingAt_w_pattern(hiopVector& vec, - index_type start_index_in_dest, - const hiopVector& ix) const; + virtual void copyToStartingAt_w_pattern(hiopVector& vec, index_type start_index_in_dest, const hiopVector& ix) const; /// @brief Copy the entries in `c` and `d` to `this`, according to the mapping in `c_map` and `d_map` - virtual void copy_from_two_vec_w_pattern(const hiopVector& c, - const hiopVectorInt& c_map, - const hiopVector& d, + virtual void copy_from_two_vec_w_pattern(const hiopVector& c, + const hiopVectorInt& c_map, + const hiopVector& d, const hiopVectorInt& d_map); /// @brief Copy the entries in `this` to `c` and `d`, according to the mapping `c_map` and `d_map` - virtual void copy_to_two_vec_w_pattern(hiopVector& c, - const hiopVectorInt& c_map, - hiopVector& d, + virtual void copy_to_two_vec_w_pattern(hiopVector& c, + const hiopVectorInt& c_map, + hiopVector& d, const hiopVectorInt& d_map) const; - /* copy 'this' (source) starting at 'start_idx_in_src' to 'dest' starting at index 'int start_idx_dest' + /* copy 'this' (source) starting at 'start_idx_in_src' to 'dest' starting at index 'int start_idx_dest' * If num_elems>=0, 'num_elems' will be copied; if num_elems<0, elements will be copied till the end of * either source ('this') or destination ('dest') is reached * if 'selec_dest' is given, the values are copy to 'dest' where the corresponding entry in 'selec_dest' is nonzero */ virtual void startingAtCopyToStartingAt(index_type start_idx_in_src, hiopVector& dest, index_type start_idx_dest, - size_type num_elems=-1) const; + size_type num_elems = -1) const; virtual void startingAtCopyToStartingAt_w_pattern(index_type start_idx_in_src, hiopVector& dest, index_type start_idx_dest, const hiopVector& selec_dest, - size_type num_elems=-1) const; + size_type num_elems = -1) const; virtual double twonorm() const; virtual double dotProductWith(const hiopVector& vec) const; virtual double infnorm() const; virtual double infnorm_local() const; virtual double onenorm() const; - virtual double onenorm_local() const; - virtual void componentMult( const hiopVector& v ); - virtual void componentDiv ( const hiopVector& v ); - virtual void componentDiv_w_selectPattern( const hiopVector& v, const hiopVector& ix); + virtual double onenorm_local() const; + virtual void componentMult(const hiopVector& v); + virtual void componentDiv(const hiopVector& v); + virtual void componentDiv_w_selectPattern(const hiopVector& v, const hiopVector& ix); virtual void component_min(const double constant); virtual void component_min(const hiopVector& vec); virtual void component_max(const double constant); @@ -193,59 +191,57 @@ class hiopVectorRaja : public hiopVector virtual void component_abs(); virtual void component_sgn(); virtual void component_sqrt(); - virtual void scale( double alpha ); + virtual void scale(double alpha); /** this += alpha * x */ - virtual void axpy ( double alpha, const hiopVector& x ); + virtual void axpy(double alpha, const hiopVector& x); /// @brief this += alpha * x, for the entries in 'this' where corresponding 'select' is nonzero. virtual void axpy_w_pattern(double alpha, const hiopVector& xvec, const hiopVector& select); /** * @brief Performs axpy, this += alpha*x, on the indexes in this specified by i. - * - * @param alpha scaling factor + * + * @param alpha scaling factor * @param x vector of doubles to be axpy-ed to this (size equal to size of i and less than or equal to size of this) - * @param i vector of indexes in this to which the axpy operation is performed (size equal to size of x and less than + * @param i vector of indexes in this to which the axpy operation is performed (size equal to size of x and less than * or equal to size of this) * * @pre The entries of i must be valid (zero-based) indexes in this * */ virtual void axpy(double alpha, const hiopVector& xvec, const hiopVectorInt& i); - - + /** this += alpha * x * z */ - virtual void axzpy (double alpha, const hiopVector& xvec, const hiopVector& zvec); + virtual void axzpy(double alpha, const hiopVector& xvec, const hiopVector& zvec); /** this += alpha * x / z */ virtual void axdzpy(double alpha, const hiopVector& xvec, const hiopVector& zvec); - virtual void axdzpy_w_pattern(double alpha, - const hiopVector& xvec, - const hiopVector& zvec, - const hiopVector& select); + virtual void axdzpy_w_pattern(double alpha, const hiopVector& xvec, const hiopVector& zvec, const hiopVector& select); /** Add c to the elements of this */ virtual void addConstant(double c); virtual void addConstant_w_patternSelect(double c, const hiopVector& select); virtual double min() const; virtual void min(double& minval, int& index) const; - virtual double min_w_pattern(const hiopVector& select) const; + virtual double min_w_pattern(const hiopVector& select) const; virtual void negate(); virtual void invert(); virtual double logBarrier_local(const hiopVector& select) const; virtual double sum_local() const; virtual void addLogBarrierGrad(double alpha, const hiopVector& xvec, const hiopVector& select); - virtual double linearDampingTerm_local(const hiopVector& ixl_select, const hiopVector& ixu_select, - const double& mu, const double& kappa_d) const; + virtual double linearDampingTerm_local(const hiopVector& ixl_select, + const hiopVector& ixu_select, + const double& mu, + const double& kappa_d) const; - /** - * Performs `this[i] = alpha*this[i] + sign*ct` where sign=1 when EXACTLY one of - * ixleft[i] and ixright[i] is 1.0 and sign=0 otherwise. + /** + * Performs `this[i] = alpha*this[i] + sign*ct` where sign=1 when EXACTLY one of + * ixleft[i] and ixright[i] is 1.0 and sign=0 otherwise. * * Supports distributed/MPI vectors, but performs only elementwise operations and do not * require communication. * * This method is used to add gradient contributions from the (linear) damping term used - * to handle unbounded problems. The damping terms are used for variables that are - * bounded on one side only. + * to handle unbounded problems. The damping terms are used for variables that are + * bounded on one side only. */ virtual void addLinearDampingTerm(const hiopVector& ixleft, @@ -255,7 +251,7 @@ class hiopVectorRaja : public hiopVector virtual int allPositive(); virtual int allPositive_w_patternSelect(const hiopVector& select); - virtual bool projectIntoBounds_local(const hiopVector& xlo, + virtual bool projectIntoBounds_local(const hiopVector& xlo, const hiopVector& ixl, const hiopVector& xup, const hiopVector& ixu, @@ -267,19 +263,16 @@ class hiopVectorRaja : public hiopVector virtual bool matchesPattern(const hiopVector& select); virtual hiopVector* alloc_clone() const; - virtual hiopVector* new_copy () const; + virtual hiopVector* new_copy() const; - virtual void adjustDuals_plh(const hiopVector& xvec, - const hiopVector& ixvec, - const double& mu, - const double& kappa); + virtual void adjustDuals_plh(const hiopVector& xvec, const hiopVector& ixvec, const double& mu, const double& kappa); virtual bool is_zero() const; virtual bool isnan_local() const; virtual bool isinf_local() const; virtual bool isfinite_local() const; - - virtual void print(FILE*, const char* withMessage=NULL, int max_elems=-1, int rank=-1) const; + + virtual void print(FILE*, const char* withMessage = NULL, int max_elems = -1, int rank = -1) const; virtual void print() const; /* more accessors */ @@ -289,6 +282,7 @@ class hiopVectorRaja : public hiopVector inline double* local_data() { return data_dev_; } inline const double* local_data_const() const { return data_dev_; } inline MPI_Comm get_mpi_comm() const { return comm_; } + private: void copyToDev(); void copyFromDev(); @@ -296,34 +290,33 @@ class hiopVectorRaja : public hiopVector friend class tests::MatrixTestsRajaDense; friend class tests::MatrixTestsRajaSparseTriplet; friend class tests::MatrixTestsRajaSymSparseTriplet; + public: - virtual size_type numOfElemsLessThan(const double &val) const; - virtual size_type numOfElemsAbsLessThan(const double &val) const; + virtual size_type numOfElemsLessThan(const double& val) const; + virtual size_type numOfElemsAbsLessThan(const double& val) const; - virtual void set_array_from_to(hiopInterfaceBase::NonlinearityType* arr, - const int start, - const int end, + virtual void set_array_from_to(hiopInterfaceBase::NonlinearityType* arr, + const int start, + const int end, const hiopInterfaceBase::NonlinearityType* arr_src, const int start_src) const; - virtual void set_array_from_to(hiopInterfaceBase::NonlinearityType* arr, - const int start, - const int end, + virtual void set_array_from_to(hiopInterfaceBase::NonlinearityType* arr, + const int start, + const int end, const hiopInterfaceBase::NonlinearityType arr_src) const; virtual bool is_equal(const hiopVector& vec) const; - const ExecSpace& exec_space() const - { - return exec_space_; - } + const ExecSpace& exec_space() const { return exec_space_; } + private: ExecSpace exec_space_; using MEMBACKENDHOST = typename MEMBACKEND::MemBackendHost; - //EXECPOLICYRAJA is used internally as a execution policy. EXECPOLICYHOST is not used internally - //in this class. EXECPOLICYHOST can be any host policy as long as memory allocations and - //and transfers within and from `exec_space_host_` work with EXECPOLICYHOST (currently all such - //combinations work). + // EXECPOLICYRAJA is used internally as a execution policy. EXECPOLICYHOST is not used internally + // in this class. EXECPOLICYHOST can be any host policy as long as memory allocations and + // and transfers within and from `exec_space_host_` work with EXECPOLICYHOST (currently all such + // combinations work). using EXECPOLICYHOST = hiop::ExecPolicySeq; ExecSpace exec_space_host_; @@ -338,7 +331,6 @@ class hiopVectorRaja : public hiopVector hiopVectorRaja(const hiopVectorRaja&); }; -} // namespace hiop - +} // namespace hiop -#endif // HIOP_VECTOR_RAJA +#endif // HIOP_VECTOR_RAJA diff --git a/src/LinAlg/hiopVectorRajaCuda.cpp b/src/LinAlg/hiopVectorRajaCuda.cpp index e1f94f5f2..2d548c6a8 100644 --- a/src/LinAlg/hiopVectorRajaCuda.cpp +++ b/src/LinAlg/hiopVectorRajaCuda.cpp @@ -60,12 +60,11 @@ #include "MemBackendCppImpl.hpp" #include "ExecPoliciesRajaCudaImpl.hpp" - namespace hiop { using hiop_raja_exec = ExecRajaPoliciesBackend::hiop_raja_exec; using hiop_raja_reduce = ExecRajaPoliciesBackend::hiop_raja_reduce; -} +} // namespace hiop #include "hiopVectorRajaImpl.hpp" #include "MathKernelsCuda.hpp" @@ -73,19 +72,21 @@ using hiop_raja_reduce = ExecRajaPoliciesBackend::hiop_raja_ namespace hiop { -template<> void hiopVectorRaja::set_to_random_uniform(double minv, double maxv) +template<> +void hiopVectorRaja::set_to_random_uniform(double minv, double maxv) { hiop::cuda::array_random_uniform_kernel(n_local_, data_dev_, minv, maxv); } -template<> void hiopVectorRaja::set_to_random_uniform(double minv, double maxv) +template<> +void hiopVectorRaja::set_to_random_uniform(double minv, double maxv) { hiop::cuda::array_random_uniform_kernel(n_local_, data_dev_, minv, maxv); } // -//Explicit instantiations: force compilation +// Explicit instantiations: force compilation // template class hiopVectorRaja; template class hiopVectorRaja; -} +} // namespace hiop diff --git a/src/LinAlg/hiopVectorRajaHip.cpp b/src/LinAlg/hiopVectorRajaHip.cpp index 9d19bdab2..fe7875f92 100644 --- a/src/LinAlg/hiopVectorRajaHip.cpp +++ b/src/LinAlg/hiopVectorRajaHip.cpp @@ -58,12 +58,11 @@ #include "MemBackendHipImpl.hpp" #include "ExecPoliciesRajaHipImpl.hpp" - namespace hiop { using hiop_raja_exec = ExecRajaPoliciesBackend::hiop_raja_exec; using hiop_raja_reduce = ExecRajaPoliciesBackend::hiop_raja_reduce; -} +} // namespace hiop #include "hiopVectorRajaImpl.hpp" #include "MathKernelsHip.hpp" @@ -71,19 +70,21 @@ using hiop_raja_reduce = ExecRajaPoliciesBackend::hiop_raja_r namespace hiop { -template<> void hiopVectorRaja::set_to_random_uniform(double minv, double maxv) +template<> +void hiopVectorRaja::set_to_random_uniform(double minv, double maxv) { hiop::hip::array_random_uniform_kernel(n_local_, data_dev_, minv, maxv); } -template<> void hiopVectorRaja::set_to_random_uniform(double minv, double maxv) +template<> +void hiopVectorRaja::set_to_random_uniform(double minv, double maxv) { hiop::hip::array_random_uniform_kernel(n_local_, data_dev_, minv, maxv); } // -//Explicit instantiations: force compilation +// Explicit instantiations: force compilation // template class hiopVectorRaja; template class hiopVectorRaja; -} +} // namespace hiop diff --git a/src/LinAlg/hiopVectorRajaImpl.hpp b/src/LinAlg/hiopVectorRajaImpl.hpp index f1a950226..bfe1e67fe 100644 --- a/src/LinAlg/hiopVectorRajaImpl.hpp +++ b/src/LinAlg/hiopVectorRajaImpl.hpp @@ -2,47 +2,47 @@ // Produced at the Lawrence Livermore National Laboratory (LLNL). // LLNL-CODE-742473. All rights reserved. // -// This file is part of HiOp. For details, see https://github.com/LLNL/hiop. HiOp -// is released under the BSD 3-clause license (https://opensource.org/licenses/BSD-3-Clause). +// This file is part of HiOp. For details, see https://github.com/LLNL/hiop. HiOp +// is released under the BSD 3-clause license (https://opensource.org/licenses/BSD-3-Clause). // Please also read "Additional BSD Notice" below. // -// Redistribution and use in source and binary forms, with or without modification, +// Redistribution and use in source and binary forms, with or without modification, // are permitted provided that the following conditions are met: -// i. Redistributions of source code must retain the above copyright notice, this list +// i. Redistributions of source code must retain the above copyright notice, this list // of conditions and the disclaimer below. -// ii. Redistributions in binary form must reproduce the above copyright notice, -// this list of conditions and the disclaimer (as noted below) in the documentation and/or +// ii. Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the disclaimer (as noted below) in the documentation and/or // other materials provided with the distribution. -// iii. Neither the name of the LLNS/LLNL nor the names of its contributors may be used to -// endorse or promote products derived from this software without specific prior written +// iii. Neither the name of the LLNS/LLNL nor the names of its contributors may be used to +// endorse or promote products derived from this software without specific prior written // permission. // -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES -// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT -// SHALL LAWRENCE LIVERMORE NATIONAL SECURITY, LLC, THE U.S. DEPARTMENT OF ENERGY OR -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS -// OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED -// AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT +// SHALL LAWRENCE LIVERMORE NATIONAL SECURITY, LLC, THE U.S. DEPARTMENT OF ENERGY OR +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +// OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED +// AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, // EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Additional BSD Notice -// 1. This notice is required to be provided under our contract with the U.S. Department -// of Energy (DOE). This work was produced at Lawrence Livermore National Laboratory under +// 1. This notice is required to be provided under our contract with the U.S. Department +// of Energy (DOE). This work was produced at Lawrence Livermore National Laboratory under // Contract No. DE-AC52-07NA27344 with the DOE. -// 2. Neither the United States Government nor Lawrence Livermore National Security, LLC -// nor any of their employees, makes any warranty, express or implied, or assumes any -// liability or responsibility for the accuracy, completeness, or usefulness of any +// 2. Neither the United States Government nor Lawrence Livermore National Security, LLC +// nor any of their employees, makes any warranty, express or implied, or assumes any +// liability or responsibility for the accuracy, completeness, or usefulness of any // information, apparatus, product, or process disclosed, or represents that its use would // not infringe privately-owned rights. -// 3. Also, reference herein to any specific commercial products, process, or services by -// trade name, trademark, manufacturer or otherwise does not necessarily constitute or -// imply its endorsement, recommendation, or favoring by the United States Government or -// Lawrence Livermore National Security, LLC. The views and opinions of authors expressed -// herein do not necessarily state or reflect those of the United States Government or -// Lawrence Livermore National Security, LLC, and shall not be used for advertising or +// 3. Also, reference herein to any specific commercial products, process, or services by +// trade name, trademark, manufacturer or otherwise does not necessarily constitute or +// imply its endorsement, recommendation, or favoring by the United States Government or +// Lawrence Livermore National Security, LLC. The views and opinions of authors expressed +// herein do not necessarily state or reflect those of the United States Government or +// Lawrence Livermore National Security, LLC, and shall not be used for advertising or // product endorsement purposes. /** @@ -64,7 +64,7 @@ #include "hiopVectorIntRaja.hpp" #include -#include //for memcpy +#include //for memcpy #include #include #include @@ -87,67 +87,60 @@ using global_index_type = index_type; // Define constants static constexpr real_type zero = 0.0; -static constexpr real_type one = 1.0; +static constexpr real_type one = 1.0; template -hiopVectorRaja:: -hiopVectorRaja(const size_type& glob_n, - std::string mem_space /* = "HOST" */, - index_type* col_part /* = NULL */, - MPI_Comm comm /* = MPI_COMM_NULL */) - : hiopVector(), - exec_space_(ExecSpace(MEMBACKEND(mem_space))), - exec_space_host_(ExecSpace(MEMBACKENDHOST::new_backend_host())), - mem_space_(mem_space), - comm_(comm), - idx_cumsum_{nullptr} +hiopVectorRaja::hiopVectorRaja(const size_type& glob_n, + std::string mem_space /* = "HOST" */, + index_type* col_part /* = NULL */, + MPI_Comm comm /* = MPI_COMM_NULL */) + : hiopVector(), + exec_space_(ExecSpace(MEMBACKEND(mem_space))), + exec_space_host_(ExecSpace(MEMBACKENDHOST::new_backend_host())), + mem_space_(mem_space), + comm_(comm), + idx_cumsum_{nullptr} { n_ = glob_n; #ifdef HIOP_USE_MPI // if this is a serial vector, make sure it has a valid comm in the mpi case - if(comm_ == MPI_COMM_NULL) - comm_ = MPI_COMM_SELF; + if(comm_ == MPI_COMM_NULL) comm_ = MPI_COMM_SELF; #endif - int P = 0; - if(col_part) - { + int P = 0; + if(col_part) { #ifdef HIOP_USE_MPI - int ierr=MPI_Comm_rank(comm_, &P); assert(ierr==MPI_SUCCESS); + int ierr = MPI_Comm_rank(comm_, &P); + assert(ierr == MPI_SUCCESS); #endif glob_il_ = col_part[P]; - glob_iu_ = col_part[P+1]; - } - else - { + glob_iu_ = col_part[P + 1]; + } else { glob_il_ = 0; glob_iu_ = n_; } n_local_ = glob_iu_ - glob_il_; #ifndef HIOP_USE_GPU - assert(mem_space_ == "HOST"); + assert(mem_space_ == "HOST"); #endif data_dev_ = exec_space_.template alloc_array(n_local_); - if(exec_space_.mem_backend().is_device()) - { + if(exec_space_.mem_backend().is_device()) { // Create host mirror if the memory space is on device - data_host_ = exec_space_host_.template alloc_array(n_local_); - } - else - { + data_host_ = exec_space_host_.template alloc_array(n_local_); + } else { data_host_ = data_dev_; } } template hiopVectorRaja::hiopVectorRaja(const hiopVectorRaja& v) - : hiopVector(), - exec_space_(v.exec_space_), - exec_space_host_(v.exec_space_host_), - idx_cumsum_{nullptr} + : hiopVector(), + exec_space_(v.exec_space_), + exec_space_host_(v.exec_space_host_), + idx_cumsum_{nullptr} { n_local_ = v.n_local_; n_ = v.n_; @@ -157,17 +150,14 @@ hiopVectorRaja::hiopVectorRaja(const hiopVectorRaja& v) mem_space_ = v.mem_space_; #ifndef HIOP_USE_GPU - assert(mem_space_ == "HOST"); + assert(mem_space_ == "HOST"); #endif data_dev_ = exec_space_.template alloc_array(n_local_); - if(exec_space_.mem_backend().is_device()) - { + if(exec_space_.mem_backend().is_device()) { // Create host mirror if the memory space is on device - data_host_ = exec_space_host_.template alloc_array(n_local_); - } - else - { + data_host_ = exec_space_host_.template alloc_array(n_local_); + } else { data_host_ = data_dev_; } } @@ -179,7 +169,7 @@ hiopVectorRaja::~hiopVectorRaja() exec_space_host_.dealloc_array(data_host_); } exec_space_.dealloc_array(data_dev_); - data_dev_ = nullptr; + data_dev_ = nullptr; data_host_ = nullptr; delete idx_cumsum_; } @@ -192,7 +182,7 @@ hiopVector* hiopVectorRaja::alloc_clone() const return v; } template -hiopVector* hiopVectorRaja::new_copy () const +hiopVector* hiopVectorRaja::new_copy() const { hiopVector* v = new hiopVectorRaja(*this); assert(v); @@ -205,46 +195,41 @@ hiopVector* hiopVectorRaja::new_copy () const // /// Set all vector elements to zero -template +template void hiopVectorRaja::setToZero() { setToConstant(0.0); } /// Set all vector elements to constant c -template +template void hiopVectorRaja::setToConstant(double c) { double* data = data_dev_; - RAJA::forall< hiop_raja_exec >(RAJA::RangeSegment(0, n_local_), - RAJA_LAMBDA(RAJA::Index_type i) - { - data[i] = c; - }); + RAJA::forall(RAJA::RangeSegment(0, n_local_), RAJA_LAMBDA(RAJA::Index_type i) { data[i] = c; }); } /// Set selected elements to constant, zero otherwise -template +template void hiopVectorRaja::setToConstant_w_patternSelect(double c, const hiopVector& select) { const auto& s = dynamic_cast&>(select); const double* pattern = s.local_data_const(); double* data = data_dev_; - RAJA::forall< hiop_raja_exec >( RAJA::RangeSegment(0, n_local_), - RAJA_LAMBDA(RAJA::Index_type i) { - data[i] = pattern[i]*c; - }); + RAJA::forall( + RAJA::RangeSegment(0, n_local_), + RAJA_LAMBDA(RAJA::Index_type i) { data[i] = pattern[i] * c; }); } /** * @brief Copy data from `vec` to this vector - * + * * @param[in] vec - Vector from which to copy into `this` - * + * * @pre `vec` and `this` must have same partitioning. * @post Elements of `this` are overwritten with elements of `vec` */ -template +template void hiopVectorRaja::copyFrom(const hiopVector& vec) { const auto& v = dynamic_cast&>(vec); @@ -255,38 +240,38 @@ void hiopVectorRaja::copyFrom(const hiopVector& vec) exec_space_.copy(data_dev_, v.data_dev_, n_local_, v.exec_space_); } -template +template void hiopVectorRaja::copy_from_vectorpar(const hiopVectorPar& v) { assert(n_local_ == v.get_local_size()); exec_space_.copy(data_dev_, v.local_data_const(), n_local_, v.exec_space()); } -template +template void hiopVectorRaja::copy_to_vectorpar(hiopVectorPar& v) const { - assert(n_local_ == v.get_local_size()); + assert(n_local_ == v.get_local_size()); v.exec_space().copy(v.local_data(), data_dev_, n_local_, exec_space_); } - + /** * @brief Copy data from local_array to this vector - * + * * @param[in] local_array - A raw array from which to copy into `this` - * - * @pre `local_array` is allocated by same memory backend and in the same + * + * @pre `local_array` is allocated by same memory backend and in the same * memory space used by `this`. * @pre `local_array` must be of same size as the data block of `this`. * @post Elements of `this` are overwritten with elements of `local_array`. - * + * * @warning Method has no way to check for the size of `local_array`. May * read past the end of the array. - * + * * @warning Method casts away const from the `local_array`. - * + * * @warning Not tested - not part of the hiopVector interface. */ -template +template void hiopVectorRaja::copyFrom(const double* local_array) { if(local_array) { @@ -295,78 +280,73 @@ void hiopVectorRaja::copyFrom(const double* local_array) } /// @brief Copy from vec the elements specified by the indices in index_in_src. -template +template void hiopVectorRaja::copy_from_w_pattern(const hiopVector& vec, const hiopVector& select) { const auto& v = dynamic_cast&>(vec); const auto& ix = dynamic_cast&>(select); assert(n_local_ == ix.n_local_); - + double* dd = data_dev_; double* vd = v.data_dev_; double* id = ix.data_dev_; - RAJA::forall< hiop_raja_exec >(RAJA::RangeSegment(0, n_local_), - RAJA_LAMBDA(RAJA::Index_type i) - { - if(id[i] == one) { - dd[i] = vd[i]; - } - }); + RAJA::forall( + RAJA::RangeSegment(0, n_local_), + RAJA_LAMBDA(RAJA::Index_type i) { + if(id[i] == one) { + dd[i] = vd[i]; + } + }); } /// @brief Copy from vec the elements specified by the indices in index_in_src -template +template void hiopVectorRaja::copy_from_indexes(const hiopVector& vv, const hiopVectorInt& index_in_src) { - const auto& indexes = dynamic_cast &>(index_in_src); - const auto& v = dynamic_cast &>(vv); + const auto& indexes = dynamic_cast&>(index_in_src); + const auto& v = dynamic_cast&>(vv); assert(indexes.get_local_size() == n_local_); - + index_type* id = const_cast(indexes.local_data_const()); double* dd = data_dev_; double* vd = v.data_dev_; size_type nv = v.get_local_size(); - - RAJA::forall< hiop_raja_exec >(RAJA::RangeSegment(0, n_local_), - RAJA_LAMBDA(RAJA::Index_type i) - { - assert(id[i]( + RAJA::RangeSegment(0, n_local_), + RAJA_LAMBDA(RAJA::Index_type i) { + assert(id[i] < nv); + dd[i] = vd[id[i]]; + }); } /// @brief Copy from vec the elements specified by the indices in index_in_src template void hiopVectorRaja::copy_from_indexes(const double* vv, const hiopVectorInt& index_in_src) { - if(nullptr==vv) { + if(nullptr == vv) { return; } - const auto& indexes = dynamic_cast &>(index_in_src); + const auto& indexes = dynamic_cast&>(index_in_src); assert(indexes.get_local_size() == n_local_); index_type* id = const_cast(indexes.local_data_const()); double* dd = data_dev_; - - RAJA::forall< hiop_raja_exec >(RAJA::RangeSegment(0, n_local_), - RAJA_LAMBDA(RAJA::Index_type i) - { - dd[i] = vv[id[i]]; - }); -} + RAJA::forall(RAJA::RangeSegment(0, n_local_), RAJA_LAMBDA(RAJA::Index_type i) { dd[i] = vv[id[i]]; }); +} /** * @brief Copy `nv` elements from array `v` to this vector starting from `start_index_in_this` - * + * * @param[in] start_index_in_this - position in this where to copy * @param[in] v - a raw array from which to copy into `this` * @param[in] nv - how many elements of `v` to copy - * + * * @pre Size of `v` must be >= nv. * @pre start_index_in_this+nv <= n_local_ * @pre `this` is not distributed @@ -377,22 +357,21 @@ void hiopVectorRaja::copy_from_indexes(const double* vv, const hiopVec template void hiopVectorRaja::copyFromStarting(int start_index_in_this, const double* v, int nv) { - assert(start_index_in_this+nv <= n_local_); + assert(start_index_in_this + nv <= n_local_); - // If nothing to copy, return. - if(nv == 0) - return; + // If nothing to copy, return. + if(nv == 0) return; - //TODO: data_dev_+start_index_in_this -> is not portable, may not work on the device. RAJA loop should be used - exec_space_.copy(data_dev_+start_index_in_this, v, nv); + // TODO: data_dev_+start_index_in_this -> is not portable, may not work on the device. RAJA loop should be used + exec_space_.copy(data_dev_ + start_index_in_this, v, nv); } /** * @brief Copy `vec` to this vector starting from `start_index` in `this`. - * + * * @param[in] start_index - position in `this` where to copy * @param[in] src - the source vector from which to copy into `this` - * + * * @pre Size of `src` must be >= nv. * @pre start_index + src.n_local_ <= n_local_ * @pre `this` is not distributed @@ -407,11 +386,10 @@ void hiopVectorRaja::copyFromStarting(int start_index, const hiopVecto assert(start_index + v.n_local_ <= n_local_); // If there is nothing to copy, return. - if(v.n_local_ == 0) - return; + if(v.n_local_ == 0) return; - //TODO: data_dev_+start_index -> is not portable, may not work on the device. RAJA loop should be used - exec_space_.copy(data_dev_+start_index, v.data_dev_, v.n_local_, v.exec_space_); + // TODO: data_dev_+start_index -> is not portable, may not work on the device. RAJA loop should be used + exec_space_.copy(data_dev_ + start_index, v.data_dev_, v.n_local_, v.exec_space_); } /** @@ -432,27 +410,26 @@ template void hiopVectorRaja::copy_from_starting_at(const double* v, int start_index_in_v, int nv) { // If nothing to copy, return. - if(nv == 0) - return; - - //TODO: v+start_index_in_v -> is not portable, may not work on the device. RAJA loop should be used - exec_space_.copy(data_dev_, v+start_index_in_v, nv); + if(nv == 0) return; + + // TODO: v+start_index_in_v -> is not portable, may not work on the device. RAJA loop should be used + exec_space_.copy(data_dev_, v + start_index_in_v, nv); } /** * @brief Copy from `vec_src` starting at `start_idx_src` into * `this` vector starting at `start_idx_dest`. - * + * * @pre `vec_src` and `this` are not distributed. * @pre `start_idx_dest` + `howManyToCopySrc` <= `n_local_` * @pre `start_idx_src` + `howManyToCopySrc` <= `vec_src.n_local_` * @post Elements of `vec_src` are unchanged. * @post All elements of `this` starting from `start_idx_dest` are overwritten - * + * * @todo Implentation differs from CPU - check with upstream what is correct! */ -template void hiopVectorRaja:: -startingAtCopyFromStartingAt(int start_idx_dest, const hiopVector& vec_src, int start_idx_src) +template +void hiopVectorRaja::startingAtCopyFromStartingAt(int start_idx_dest, const hiopVector& vec_src, int start_idx_src) { size_type howManyToCopyDest = this->n_local_ - start_idx_dest; @@ -460,10 +437,10 @@ startingAtCopyFromStartingAt(int start_idx_dest, const hiopVector& vec_src, int assert(n_local_ == n_ && "are you sure you want to call this?"); #endif - assert((start_idx_dest >= 0 && start_idx_dest < this->n_local_) || this->n_local_==0); + assert((start_idx_dest >= 0 && start_idx_dest < this->n_local_) || this->n_local_ == 0); const auto& v = dynamic_cast&>(vec_src); - assert((start_idx_src >=0 && start_idx_src < v.n_local_) || v.n_local_==0 || v.n_local_==start_idx_src); - const size_type howManyToCopySrc = v.n_local_-start_idx_src; + assert((start_idx_src >= 0 && start_idx_src < v.n_local_) || v.n_local_ == 0 || v.n_local_ == start_idx_src); + const size_type howManyToCopySrc = v.n_local_ - start_idx_src; if(howManyToCopyDest == 0 || howManyToCopySrc == 0) { return; @@ -471,19 +448,16 @@ startingAtCopyFromStartingAt(int start_idx_dest, const hiopVector& vec_src, int assert(howManyToCopyDest <= howManyToCopySrc); - //TODO: this also looks like is not portable - exec_space_.copy(data_dev_+start_idx_dest, - v.data_dev_+start_idx_src, - howManyToCopyDest, - v.exec_space_); + // TODO: this also looks like is not portable + exec_space_.copy(data_dev_ + start_idx_dest, v.data_dev_ + start_idx_src, howManyToCopyDest, v.exec_space_); } /** * @brief Copy to `vec` elements of `this` vector starting from `start_index`. - * + * * @param[in] start_index - position in `this` from where to copy * @param[out] dst - the destination vector where to copy elements of `this` - * + * * @pre start_index + dst.n_local_ <= n_local_ * @pre `this` and `dst` are not distributed */ @@ -498,19 +472,18 @@ void hiopVectorRaja::copyToStarting(int start_index, hiopVector& dst) assert(start_index + v.n_local_ <= n_local_); // If nowhere to copy, return. - if(v.n_local_ == 0) - return; + if(v.n_local_ == 0) return; - //TODO: pointer arithmetic on host should be avoided + // TODO: pointer arithmetic on host should be avoided v.exec_space_.copy(v.data_dev_, this->data_dev_ + start_index, v.n_local_, exec_space_); } /** * @brief Copy elements of `this` vector to `vec` starting at `start_index_in_dest`. - * + * * @param[out] vec - a vector where to copy elements of `this` * @param[in] start_index_in_dest - position in `vec` where to copy - * + * * @pre start_index_in_dest + vec.n_local_ <= n_local_ * @pre `this` and `vec` are not distributed */ @@ -518,78 +491,74 @@ template void hiopVectorRaja::copyToStarting(hiopVector& vec, int start_index_in_dest) const { auto& v = dynamic_cast&>(vec); - assert(start_index_in_dest+n_local_ <= v.n_local_); + assert(start_index_in_dest + n_local_ <= v.n_local_); // If there is nothing to copy, return. - if(n_local_ == 0) - return; + if(n_local_ == 0) return; - //TODO: pointer arithmetic on host should be avoided + // TODO: pointer arithmetic on host should be avoided v.exec_space_.copy(v.data_dev_ + start_index_in_dest, data_dev_, n_local_, exec_space_); } template -void hiopVectorRaja:: -copyToStartingAt_w_pattern(hiopVector& vec, int start_index_in_dest, const hiopVector& select) const +void hiopVectorRaja::copyToStartingAt_w_pattern(hiopVector& vec, + int start_index_in_dest, + const hiopVector& select) const { if(n_local_ == 0) { return; } - + hiopVectorRaja& v = dynamic_cast&>(vec); const hiopVectorRaja& selected = dynamic_cast&>(select); assert(n_local_ == selected.n_local_); - + double* dd = data_dev_; double* vd = v.data_dev_; const double* pattern = selected.local_data_const(); if(nullptr == idx_cumsum_) { - idx_cumsum_ = LinearAlgebraFactory::create_vector_int(mem_space_, n_local_+1); + idx_cumsum_ = LinearAlgebraFactory::create_vector_int(mem_space_, n_local_ + 1); index_type* nnz_in_row = idx_cumsum_->local_data(); - + RAJA::forall( - RAJA::RangeSegment(0, n_local_+1), - RAJA_LAMBDA(RAJA::Index_type i) - { - if(i==0) { - nnz_in_row[i] = 0; - } else { - // from i=1..n - if(pattern[i-1]!=0.0){ - nnz_in_row[i] = 1; + RAJA::RangeSegment(0, n_local_ + 1), + RAJA_LAMBDA(RAJA::Index_type i) { + if(i == 0) { + nnz_in_row[i] = 0; } else { - nnz_in_row[i] = 0; + // from i=1..n + if(pattern[i - 1] != 0.0) { + nnz_in_row[i] = 1; + } else { + nnz_in_row[i] = 0; + } } - } - } - ); - RAJA::inclusive_scan_inplace(RAJA::make_span(nnz_in_row,n_local_+1), RAJA::operators::plus()); + }); + RAJA::inclusive_scan_inplace(RAJA::make_span(nnz_in_row, n_local_ + 1), + RAJA::operators::plus()); } index_type* nnz_cumsum = idx_cumsum_->local_data(); index_type v_n_local = v.n_local_; RAJA::forall( - RAJA::RangeSegment(1, n_local_+1), - RAJA_LAMBDA(RAJA::Index_type i) - { - if(nnz_cumsum[i] != nnz_cumsum[i-1]){ - assert(nnz_cumsum[i] == nnz_cumsum[i-1] + 1); - index_type idx_dest = nnz_cumsum[i-1] + start_index_in_dest; - assert(idx_dest < v_n_local); - vd[idx_dest] = dd[i-1]; - } - } - ); - + RAJA::RangeSegment(1, n_local_ + 1), + RAJA_LAMBDA(RAJA::Index_type i) { + if(nnz_cumsum[i] != nnz_cumsum[i - 1]) { + assert(nnz_cumsum[i] == nnz_cumsum[i - 1] + 1); + index_type idx_dest = nnz_cumsum[i - 1] + start_index_in_dest; + assert(idx_dest < v_n_local); + vd[idx_dest] = dd[i - 1]; + } + }); } /* copy 'c' and `d` into `this`, according to the map 'c_map` and `d_map`, respectively. -* e.g., this[c_map[i]] = c[i]; -* -* @pre the size of `this` = the size of `c` + the size of `d`. -* @pre `c_map` \Union `d_map` = {0, ..., size_of_this_vec-1} -*/ + * e.g., this[c_map[i]] = c[i]; + * + * @pre the size of `this` = the size of `c` + the size of `d`. + * @pre `c_map` \Union `d_map` = {0, ..., size_of_this_vec-1} + */ template void hiopVectorRaja::copy_from_two_vec_w_pattern(const hiopVector& c, const hiopVectorInt& c_map, @@ -600,7 +569,7 @@ void hiopVectorRaja::copy_from_two_vec_w_pattern(const hiopVector& c, const auto& v2 = dynamic_cast&>(d); const auto& ix1 = dynamic_cast&>(c_map); const auto& ix2 = dynamic_cast&>(d_map); - + size_type n1_local = v1.n_local_; size_type n2_local = v2.n_local_; @@ -608,40 +577,36 @@ void hiopVectorRaja::copy_from_two_vec_w_pattern(const hiopVector& c, assert(n1_local + n2_local == n_local_); assert(n_local_ == ix1.get_local_size() + ix2.get_local_size()); #endif - double* dd = data_dev_; - double* vd1 = v1.data_dev_; - double* vd2 = v2.data_dev_; + double* dd = data_dev_; + double* vd1 = v1.data_dev_; + double* vd2 = v2.data_dev_; const index_type* id1 = ix1.local_data_const(); const index_type* id2 = ix2.local_data_const(); - - int n1_local_int = (int) n1_local; - int n2_local_int = (int) n2_local; - RAJA::forall< hiop_raja_exec >( - RAJA::RangeSegment(0, n1_local_int), - RAJA_LAMBDA(RAJA::Index_type i) - { - int idx = id1[i]; - dd[idx] = vd1[i]; - } - ); + int n1_local_int = (int)n1_local; + int n2_local_int = (int)n2_local; - RAJA::forall< hiop_raja_exec >( - RAJA::RangeSegment(0, n2_local_int), - RAJA_LAMBDA(RAJA::Index_type i) - { - int idx = id2[i]; - dd[idx] = vd2[i]; - } - ); + RAJA::forall( + RAJA::RangeSegment(0, n1_local_int), + RAJA_LAMBDA(RAJA::Index_type i) { + int idx = id1[i]; + dd[idx] = vd1[i]; + }); + + RAJA::forall( + RAJA::RangeSegment(0, n2_local_int), + RAJA_LAMBDA(RAJA::Index_type i) { + int idx = id2[i]; + dd[idx] = vd2[i]; + }); } /* split `this` to `c` and `d`, according to the map 'c_map` and `d_map`, respectively. -* -* @pre the size of `this` = the size of `c` + the size of `d`. -* @pre `c_map` \Union `d_map` = {0, ..., size_of_this_vec-1} -*/ + * + * @pre the size of `this` = the size of `c` + the size of `d`. + * @pre `c_map` \Union `d_map` = {0, ..., size_of_this_vec-1} + */ template void hiopVectorRaja::copy_to_two_vec_w_pattern(hiopVector& c, const hiopVectorInt& c_map, @@ -652,7 +617,7 @@ void hiopVectorRaja::copy_to_two_vec_w_pattern(hiopVector& c, const auto& v2 = dynamic_cast&>(d); const auto& ix1 = dynamic_cast&>(c_map); const auto& ix2 = dynamic_cast&>(d_map); - + size_type n1_local = v1.n_local_; size_type n2_local = v2.n_local_; @@ -660,42 +625,38 @@ void hiopVectorRaja::copy_to_two_vec_w_pattern(hiopVector& c, assert(n1_local + n2_local == n_local_); assert(n_local_ == ix1.get_local_size() + ix2.get_local_size()); #endif - double* dd = data_dev_; - double* vd1 = v1.data_dev_; - double* vd2 = v2.data_dev_; + double* dd = data_dev_; + double* vd1 = v1.data_dev_; + double* vd2 = v2.data_dev_; const index_type* id1 = ix1.local_data_const(); const index_type* id2 = ix2.local_data_const(); - - RAJA::forall< hiop_raja_exec >( - RAJA::RangeSegment(0, (int)n1_local), - RAJA_LAMBDA(RAJA::Index_type i) - { - int idx = id1[i]; - vd1[i] = dd[idx]; - } - ); - RAJA::forall< hiop_raja_exec >( - RAJA::RangeSegment(0, (int)n2_local), - RAJA_LAMBDA(RAJA::Index_type i) - { - int idx = id2[i]; - vd2[i] = dd[idx]; - } - ); + RAJA::forall( + RAJA::RangeSegment(0, (int)n1_local), + RAJA_LAMBDA(RAJA::Index_type i) { + int idx = id1[i]; + vd1[i] = dd[idx]; + }); + + RAJA::forall( + RAJA::RangeSegment(0, (int)n2_local), + RAJA_LAMBDA(RAJA::Index_type i) { + int idx = id2[i]; + vd2[i] = dd[idx]; + }); } /** * @brief Copy elements of `this` vector to `dest` with offsets. - * - * Copy `this` (source) starting at `start_idx_in_src` to `dest` - * starting at index 'int start_idx_dest'. If num_elems>=0, 'num_elems' will be copied; - * + * + * Copy `this` (source) starting at `start_idx_in_src` to `dest` + * starting at index 'int start_idx_dest'. If num_elems>=0, 'num_elems' will be copied; + * * @param[in] start_idx_in_src - position in `this` from where to copy * @param[out] dest - destination vector to where to copy vector data * @param[in] start_idx_dest - position in `dest` to where to copy * @param[in] num_elems - number of elements to copy - * + * * @pre start_idx_in_src <= n_local_ * @pre start_idx_dest <= dest.n_local_ * @pre `this` and `dest` are not distributed @@ -709,48 +670,42 @@ void hiopVectorRaja::startingAtCopyToStartingAt(index_type start_idx_i index_type start_idx_dest, size_type num_elems /* = -1 */) const { - #ifdef HIOP_DEEPCHECKS - assert(n_local_==n_ && "only for local/non-distributed vectors"); -#endif + assert(n_local_ == n_ && "only for local/non-distributed vectors"); +#endif hiopVectorRaja& dest_raja = dynamic_cast&>(dest); assert(start_idx_in_src >= 0 && start_idx_in_src <= this->n_local_); - assert(start_idx_dest >= 0 && start_idx_dest <= dest_raja.n_local_); + assert(start_idx_dest >= 0 && start_idx_dest <= dest_raja.n_local_); -#ifndef NDEBUG - if(start_idx_dest==dest_raja.n_local_ || start_idx_in_src==this->n_local_) assert((num_elems==-1 || num_elems==0)); +#ifndef NDEBUG + if(start_idx_dest == dest_raja.n_local_ || start_idx_in_src == this->n_local_) assert((num_elems == -1 || num_elems == 0)); #endif - if(num_elems<0) - { + if(num_elems < 0) { num_elems = std::min(this->n_local_ - start_idx_in_src, dest_raja.n_local_ - start_idx_dest); - } - else - { - assert(num_elems+start_idx_in_src <= this->n_local_); - assert(num_elems+start_idx_dest <= dest_raja.n_local_); - //make sure everything stays within bounds (in release) - num_elems = std::min(num_elems, (int)this->n_local_-start_idx_in_src); - num_elems = std::min(num_elems, (int)dest_raja.n_local_-start_idx_dest); + } else { + assert(num_elems + start_idx_in_src <= this->n_local_); + assert(num_elems + start_idx_dest <= dest_raja.n_local_); + // make sure everything stays within bounds (in release) + num_elems = std::min(num_elems, (int)this->n_local_ - start_idx_in_src); + num_elems = std::min(num_elems, (int)dest_raja.n_local_ - start_idx_dest); } - if(num_elems == 0) - return; + if(num_elems == 0) return; - //rm.copy(dest.data_dev_ + start_idx_dest, this->data_dev_ + start_idx_in_src, num_elems*sizeof(double)); - //TODO: fix pointer arithmetic on host - dest_raja.exec_space_.copy(dest_raja.data_dev_+start_idx_dest, data_dev_+start_idx_in_src, num_elems, exec_space_); + // rm.copy(dest.data_dev_ + start_idx_dest, this->data_dev_ + start_idx_in_src, num_elems*sizeof(double)); + // TODO: fix pointer arithmetic on host + dest_raja.exec_space_.copy(dest_raja.data_dev_ + start_idx_dest, data_dev_ + start_idx_in_src, num_elems, exec_space_); } template -void hiopVectorRaja:: -startingAtCopyToStartingAt_w_pattern(index_type start_idx_in_src, - hiopVector& destination, - index_type start_idx_dest, - const hiopVector& selec_dest, - size_type num_elems/*=-1*/) const +void hiopVectorRaja::startingAtCopyToStartingAt_w_pattern(index_type start_idx_in_src, + hiopVector& destination, + index_type start_idx_dest, + const hiopVector& selec_dest, + size_type num_elems /*=-1*/) const { #if 0 hiopVectorRaja& dest = dynamic_cast&>(destination); @@ -788,12 +743,12 @@ startingAtCopyToStartingAt_w_pattern(index_type start_idx_in_src, assert(false && "not needed / implemented"); #endif } - - /** + +/** * @brief Copy `this` vector local data to `dest` buffer. - * + * * @param[out] dest - destination buffer where to copy vector data - * + * * @pre Size of `dest` must be >= n_local_ * @pre `dest` should be on the same memory space/backend as `this` * @@ -809,9 +764,9 @@ void hiopVectorRaja::copyTo(double* dest) const /** * @brief L2 vector norm. - * + * * @post `this` is not modified - * + * * @todo Consider implementing with BLAS call (NRM2). */ template @@ -819,11 +774,9 @@ double hiopVectorRaja::twonorm() const { double* self_dev = data_dev_; RAJA::ReduceSum sum(0.0); - RAJA::forall(RAJA::RangeSegment(0, n_local_), - RAJA_LAMBDA(RAJA::Index_type i) - { - sum += self_dev[i] * self_dev[i]; - }); + RAJA::forall( + RAJA::RangeSegment(0, n_local_), + RAJA_LAMBDA(RAJA::Index_type i) { sum += self_dev[i] * self_dev[i]; }); double nrm = sum.get(); #ifdef HIOP_USE_MPI @@ -831,18 +784,18 @@ double hiopVectorRaja::twonorm() const int ierr = MPI_Allreduce(&nrm, &nrm_global, 1, MPI_DOUBLE, MPI_SUM, comm_); assert(MPI_SUCCESS == ierr); return std::sqrt(nrm_global); -#endif +#endif return std::sqrt(nrm); } /** * @brief scalar (dot) product. - * + * * @param[in] vec - vector which is scalar-multiplied to `this`. - * + * * @pre `vec` has same size and partitioning as `this`. * @post `this` and `vec` are not modified. - * + * * @todo Consider implementing with BLAS call (DOT). */ template @@ -854,17 +807,14 @@ double hiopVectorRaja::dotProductWith(const hiopVector& vec) const double* dd = data_dev_; double* vd = v.data_dev_; RAJA::ReduceSum dot(0.0); - RAJA::forall( RAJA::RangeSegment(0, n_local_), - RAJA_LAMBDA(RAJA::Index_type i) { - dot += dd[i] * vd[i]; - }); + RAJA::forall(RAJA::RangeSegment(0, n_local_), RAJA_LAMBDA(RAJA::Index_type i) { dot += dd[i] * vd[i]; }); double dotprod = dot.get(); #ifdef HIOP_USE_MPI double dotprodG; int ierr = MPI_Allreduce(&dotprod, &dotprodG, 1, MPI_DOUBLE, MPI_SUM, comm_); - assert(MPI_SUCCESS==ierr); - dotprod=dotprodG; + assert(MPI_SUCCESS == ierr); + dotprod = dotprodG; #endif return dotprod; @@ -872,9 +822,9 @@ double hiopVectorRaja::dotProductWith(const hiopVector& vec) const /** * @brief L-infinity (max) vector norm. - * + * * @post `this` is not modified - * + * */ template double hiopVectorRaja::infnorm() const @@ -883,7 +833,7 @@ double hiopVectorRaja::infnorm() const #ifdef HIOP_USE_MPI double nrm_global; int ierr = MPI_Allreduce(&nrm, &nrm_global, 1, MPI_DOUBLE, MPI_MAX, comm_); - assert(MPI_SUCCESS==ierr); + assert(MPI_SUCCESS == ierr); return nrm_global; #endif @@ -892,30 +842,28 @@ double hiopVectorRaja::infnorm() const /** * @brief Local L-infinity (max) vector norm. - * + * * @pre `this` is not empty vector * @post `this` is not modified - * + * */ template double hiopVectorRaja::infnorm_local() const { assert(n_local_ >= 0); double* data = data_dev_; - RAJA::ReduceMax< hiop_raja_reduce, double > norm(0.0); - RAJA::forall< hiop_raja_exec >( RAJA::RangeSegment(0, n_local_), - RAJA_LAMBDA(RAJA::Index_type i) - { - norm.max(fabs(data[i])); - }); + RAJA::ReduceMax norm(0.0); + RAJA::forall( + RAJA::RangeSegment(0, n_local_), + RAJA_LAMBDA(RAJA::Index_type i) { norm.max(fabs(data[i])); }); return norm.get(); } /** * @brief 1-norm of `this` vector. - * + * * @post `this` is not modified - * + * */ template double hiopVectorRaja::onenorm() const @@ -923,7 +871,8 @@ double hiopVectorRaja::onenorm() const double norm1 = onenorm_local(); #ifdef HIOP_USE_MPI double nrm1_global; - int ierr = MPI_Allreduce(&norm1, &nrm1_global, 1, MPI_DOUBLE, MPI_SUM, comm_); assert(MPI_SUCCESS==ierr); + int ierr = MPI_Allreduce(&norm1, &nrm1_global, 1, MPI_DOUBLE, MPI_SUM, comm_); + assert(MPI_SUCCESS == ierr); return nrm1_global; #endif return norm1; @@ -931,30 +880,26 @@ double hiopVectorRaja::onenorm() const /** * @brief Local 1-norm of `this` vector. - * + * * @pre `this` is not empty vector * @post `this` is not modified - * + * */ template double hiopVectorRaja::onenorm_local() const { double* data = data_dev_; - RAJA::ReduceSum< hiop_raja_reduce, double > sum(0.0); - RAJA::forall< hiop_raja_exec >( RAJA::RangeSegment(0, n_local_), - RAJA_LAMBDA(RAJA::Index_type i) - { - sum += fabs(data[i]); - }); + RAJA::ReduceSum sum(0.0); + RAJA::forall(RAJA::RangeSegment(0, n_local_), RAJA_LAMBDA(RAJA::Index_type i) { sum += fabs(data[i]); }); return sum.get(); } /** * @brief Multiply `this` by `vec` elementwise and store result in `this`. - * + * * @pre `this` and `vec` have same partitioning. * @post `vec` is not modified - * + * */ template void hiopVectorRaja::componentMult(const hiopVector& vec) @@ -963,49 +908,41 @@ void hiopVectorRaja::componentMult(const hiopVector& vec) assert(n_local_ == v.n_local_); double* dd = data_dev_; double* vd = v.data_dev_; - RAJA::forall< hiop_raja_exec >( RAJA::RangeSegment(0, n_local_), - RAJA_LAMBDA(RAJA::Index_type i) - { - dd[i] *= vd[i]; - }); + RAJA::forall(RAJA::RangeSegment(0, n_local_), RAJA_LAMBDA(RAJA::Index_type i) { dd[i] *= vd[i]; }); } /** - * @brief Divide `this` vector elemenwise in-place by `vec`. - * + * @brief Divide `this` vector elemenwise in-place by `vec`. + * * @pre `this` and `vec` have same partitioning. * @pre vec[i] != 0 forall i * @post `vec` is not modified - * + * */ template -void hiopVectorRaja::componentDiv (const hiopVector& vec) +void hiopVectorRaja::componentDiv(const hiopVector& vec) { const hiopVectorRaja& v = dynamic_cast&>(vec); assert(n_local_ == v.n_local_); double* dd = data_dev_; double* vd = v.data_dev_; - RAJA::forall< hiop_raja_exec >( RAJA::RangeSegment(0, n_local_), - RAJA_LAMBDA(RAJA::Index_type i) - { - dd[i] /= vd[i]; - }); + RAJA::forall(RAJA::RangeSegment(0, n_local_), RAJA_LAMBDA(RAJA::Index_type i) { dd[i] /= vd[i]; }); } /** * @brief Divide `this` vector elemenwise in-place by `vec` - * with pattern selection. - * + * with pattern selection. + * * @pre `this`, `select` and `vec` have same partitioning. * @pre vec[i] != 0 when select[i] = 1 * @post `vec` and `select` are not modified - * + * */ template void hiopVectorRaja::componentDiv_w_selectPattern(const hiopVector& vec, const hiopVector& select) { const hiopVectorRaja& v = dynamic_cast&>(vec); - const hiopVectorRaja& ix= dynamic_cast&>(select); + const hiopVectorRaja& ix = dynamic_cast&>(select); #ifdef HIOP_DEEPCHECKS assert(v.n_local_ == n_local_); assert(n_local_ == ix.n_local_); @@ -1013,15 +950,15 @@ void hiopVectorRaja::componentDiv_w_selectPattern(const hiopVector& ve double* dd = data_dev_; double* vd = v.data_dev_; double* id = ix.data_dev_; - RAJA::forall< hiop_raja_exec >( RAJA::RangeSegment(0, n_local_), - RAJA_LAMBDA(RAJA::Index_type i) - { - assert(id[i] == zero || id[i] == one); - if(id[i] == zero) - dd[i] = zero; - else - dd[i] /= vd[i]; - }); + RAJA::forall( + RAJA::RangeSegment(0, n_local_), + RAJA_LAMBDA(RAJA::Index_type i) { + assert(id[i] == zero || id[i] == one); + if(id[i] == zero) + dd[i] = zero; + else + dd[i] /= vd[i]; + }); } /** @@ -1031,23 +968,21 @@ template void hiopVectorRaja::component_min(const double constant) { double* dd = data_dev_; - RAJA::forall< hiop_raja_exec >( - RAJA::RangeSegment(0, n_local_), - RAJA_LAMBDA(RAJA::Index_type i) - { - if(dd[i]>constant) { - dd[i] = constant; - } - } - ); + RAJA::forall( + RAJA::RangeSegment(0, n_local_), + RAJA_LAMBDA(RAJA::Index_type i) { + if(dd[i] > constant) { + dd[i] = constant; + } + }); } /** * @brief Set `this` vector elemenwise to the minimum of itself and the corresponding component of 'vec'. - * + * * @pre `this` and `vec` have same partitioning. * @post `vec` is not modified - * + * */ template void hiopVectorRaja::component_min(const hiopVector& vec) @@ -1056,15 +991,13 @@ void hiopVectorRaja::component_min(const hiopVector& vec) assert(n_local_ == v.n_local_); double* dd = data_dev_; double* vd = v.data_dev_; - RAJA::forall< hiop_raja_exec >( - RAJA::RangeSegment(0, n_local_), - RAJA_LAMBDA(RAJA::Index_type i) - { - if(dd[i]>vd[i]) { - dd[i] = vd[i]; - } - } - ); + RAJA::forall( + RAJA::RangeSegment(0, n_local_), + RAJA_LAMBDA(RAJA::Index_type i) { + if(dd[i] > vd[i]) { + dd[i] = vd[i]; + } + }); } /** @@ -1074,23 +1007,21 @@ template void hiopVectorRaja::component_max(const double constant) { double* dd = data_dev_; - RAJA::forall< hiop_raja_exec >( - RAJA::RangeSegment(0, n_local_), - RAJA_LAMBDA(RAJA::Index_type i) - { - if(dd[i]( + RAJA::RangeSegment(0, n_local_), + RAJA_LAMBDA(RAJA::Index_type i) { + if(dd[i] < constant) { + dd[i] = constant; + } + }); } /** * @brief Set `this` vector elemenwise to the maximum of itself and the corresponding component of 'vec'. - * + * * @pre `this` and `vec` have same partitioning. * @post `vec` is not modified - * + * */ template void hiopVectorRaja::component_max(const hiopVector& vec) @@ -1099,48 +1030,38 @@ void hiopVectorRaja::component_max(const hiopVector& vec) assert(n_local_ == v.n_local_); double* dd = data_dev_; double* vd = v.data_dev_; - RAJA::forall< hiop_raja_exec >( - RAJA::RangeSegment(0, n_local_), - RAJA_LAMBDA(RAJA::Index_type i) - { - if(dd[i]( + RAJA::RangeSegment(0, n_local_), + RAJA_LAMBDA(RAJA::Index_type i) { + if(dd[i] < vd[i]) { + dd[i] = vd[i]; + } + }); } /** * @brief Set each component to its absolute value */ template -void hiopVectorRaja::component_abs () +void hiopVectorRaja::component_abs() { double* dd = data_dev_; - RAJA::forall< hiop_raja_exec >( - RAJA::RangeSegment(0, n_local_), - RAJA_LAMBDA(RAJA::Index_type i) - { - dd[i] = fabs(dd[i]); - } - ); + RAJA::forall(RAJA::RangeSegment(0, n_local_), RAJA_LAMBDA(RAJA::Index_type i) { dd[i] = fabs(dd[i]); }); } /** * @brief Apply sign function to each component */ template -void hiopVectorRaja::component_sgn () +void hiopVectorRaja::component_sgn() { double* dd = data_dev_; - RAJA::forall< hiop_raja_exec >( - RAJA::RangeSegment(0, n_local_), - RAJA_LAMBDA(RAJA::Index_type i) - { - int sign = (0.0 < dd[i]) - (dd[i] < 0.0); - dd[i] = static_cast(sign); - } - ); + RAJA::forall( + RAJA::RangeSegment(0, n_local_), + RAJA_LAMBDA(RAJA::Index_type i) { + int sign = (0.0 < dd[i]) - (dd[i] < 0.0); + dd[i] = static_cast(sign); + }); } /** @@ -1151,55 +1072,44 @@ template void hiopVectorRaja::component_sqrt() { double* dd = data_dev_; - RAJA::forall< hiop_raja_exec >( - RAJA::RangeSegment(0, n_local_), - RAJA_LAMBDA(RAJA::Index_type i) - { - dd[i] = sqrt(dd[i]); - } - ); + RAJA::forall(RAJA::RangeSegment(0, n_local_), RAJA_LAMBDA(RAJA::Index_type i) { dd[i] = sqrt(dd[i]); }); } /** - * @brief Scale `this` vector by `c` - * + * @brief Scale `this` vector by `c` + * * @note Consider implementing with BLAS call (SCAL) */ template void hiopVectorRaja::scale(double c) { - if(1.0==c) - return; - + if(1.0 == c) return; + double* data = data_dev_; - RAJA::forall< hiop_raja_exec >( RAJA::RangeSegment(0, n_local_), - RAJA_LAMBDA(RAJA::Index_type i) - { - data[i] *= c; - }); + RAJA::forall(RAJA::RangeSegment(0, n_local_), RAJA_LAMBDA(RAJA::Index_type i) { data[i] *= c; }); } /** - * @brief Implementation of AXPY kernel - * + * @brief Implementation of AXPY kernel + * * @pre `this` and `xvec` have same partitioning. * @post `xvec` is not modified - * + * * @note Consider implementing with BLAS call (AXPY) */ template void hiopVectorRaja::axpy(double alpha, const hiopVector& xvec) { const hiopVectorRaja& x = dynamic_cast&>(xvec); - + double* yd = data_dev_; double* xd = x.data_dev_; - RAJA::forall< hiop_raja_exec >( RAJA::RangeSegment(0, n_local_), - RAJA_LAMBDA(RAJA::Index_type i) - { - // y := a * x + y - yd[i] = alpha * xd[i] + yd[i]; - }); + RAJA::forall( + RAJA::RangeSegment(0, n_local_), + RAJA_LAMBDA(RAJA::Index_type i) { + // y := a * x + y + yd[i] = alpha * xd[i] + yd[i]; + }); } /// @brief Performs axpy, this += alpha*x, on the indexes in this specified by i. @@ -1209,22 +1119,22 @@ void hiopVectorRaja::axpy(double alpha, const hiopVector& xvec, const const auto& x = dynamic_cast&>(xvec); const auto& idxs = dynamic_cast&>(i); - assert(x.get_size()==i.get_local_size()); - assert(x.get_local_size()==i.get_local_size()); - assert(i.get_local_size()<=n_local_); - + assert(x.get_size() == i.get_local_size()); + assert(x.get_local_size() == i.get_local_size()); + assert(i.get_local_size() <= n_local_); + double* dd = data_dev_; double* xd = const_cast(x.data_dev_); index_type* id = const_cast(idxs.local_data_const()); auto tmp_n_local = n_local_; - RAJA::forall< hiop_raja_exec >( RAJA::RangeSegment(0, n_local_), - RAJA_LAMBDA(RAJA::Index_type i) - { - assert(id[i]( + RAJA::RangeSegment(0, n_local_), + RAJA_LAMBDA(RAJA::Index_type i) { + assert(id[i] < tmp_n_local); + // y := a * x + y + dd[id[i]] = alpha * xd[i] + dd[id[i]]; + }); } /// @brief Performs axpy, this += alpha*x, for selected entries @@ -1235,21 +1145,19 @@ void hiopVectorRaja::axpy_w_pattern(double alpha, const hiopVector& xv const hiopVectorRaja& sel = dynamic_cast&>(select); #ifdef HIOP_DEEPCHECKS assert(x.n_local_ == sel.n_local_); - assert( n_local_ == sel.n_local_); -#endif - double *dd = data_dev_; - const double *xd = x.local_data_const(); - const double *id = sel.local_data_const(); - RAJA::forall< hiop_raja_exec >( RAJA::RangeSegment(0, n_local_), - RAJA_LAMBDA(RAJA::Index_type i) - { - dd[i] += alpha * xd[i] * id[i]; - }); + assert(n_local_ == sel.n_local_); +#endif + double* dd = data_dev_; + const double* xd = x.local_data_const(); + const double* id = sel.local_data_const(); + RAJA::forall( + RAJA::RangeSegment(0, n_local_), + RAJA_LAMBDA(RAJA::Index_type i) { dd[i] += alpha * xd[i] * id[i]; }); } /** * @brief this[i] += alpha*x[i]*z[i] forall i - * + * * @pre `this`, `xvec` and `zvec` have same partitioning. * @post `xvec` and `zvec` are not modified */ @@ -1260,21 +1168,19 @@ void hiopVectorRaja::axzpy(double alpha, const hiopVector& xvec, const const hiopVectorRaja& z = dynamic_cast&>(zvec); #ifdef HIOP_DEEPCHECKS assert(x.n_local_ == z.n_local_); - assert( n_local_ == z.n_local_); -#endif - double *dd = data_dev_; - const double *xd = x.local_data_const(); - const double *zd = z.local_data_const(); - RAJA::forall< hiop_raja_exec >( RAJA::RangeSegment(0, n_local_), - RAJA_LAMBDA(RAJA::Index_type i) - { - dd[i] += alpha*xd[i]*zd[i]; - }); + assert(n_local_ == z.n_local_); +#endif + double* dd = data_dev_; + const double* xd = x.local_data_const(); + const double* zd = z.local_data_const(); + RAJA::forall( + RAJA::RangeSegment(0, n_local_), + RAJA_LAMBDA(RAJA::Index_type i) { dd[i] += alpha * xd[i] * zd[i]; }); } /** * @brief this[i] += alpha*x[i]/z[i] forall i - * + * * @pre `this`, `xvec` and `zvec` have same partitioning. * @pre zvec[i] != 0 forall i * @post `xvec` and `zvec` are not modified @@ -1285,29 +1191,27 @@ void hiopVectorRaja::axdzpy(double alpha, const hiopVector& xvec, cons const hiopVectorRaja& x = dynamic_cast&>(xvec); const hiopVectorRaja& z = dynamic_cast&>(zvec); #ifdef HIOP_DEEPCHECKS - assert(x.n_local_==z.n_local_); - assert( n_local_==z.n_local_); -#endif - double *yd = data_dev_; - const double *xd = x.local_data_const(); - const double *zd = z.local_data_const(); - RAJA::forall< hiop_raja_exec >( RAJA::RangeSegment(0, n_local_), - RAJA_LAMBDA(RAJA::Index_type i) - { - yd[i] += alpha*xd[i]/zd[i]; - }); + assert(x.n_local_ == z.n_local_); + assert(n_local_ == z.n_local_); +#endif + double* yd = data_dev_; + const double* xd = x.local_data_const(); + const double* zd = z.local_data_const(); + RAJA::forall( + RAJA::RangeSegment(0, n_local_), + RAJA_LAMBDA(RAJA::Index_type i) { yd[i] += alpha * xd[i] / zd[i]; }); } /** * @brief this[i] += alpha*x[i]/z[i] forall i with pattern selection - * + * * @pre `this`, `xvec`, `zvec` and `select` have same partitioning. * @pre zvec[i] != 0 when select[i] = 1 * @post `xvec`, `zvec` and `select` are not modified */ template void hiopVectorRaja::axdzpy_w_pattern(double alpha, - const hiopVector& xvec, + const hiopVector& xvec, const hiopVector& zvec, const hiopVector& select) { @@ -1315,40 +1219,35 @@ void hiopVectorRaja::axdzpy_w_pattern(double alpha, const hiopVectorRaja& z = dynamic_cast&>(zvec); const hiopVectorRaja& sel = dynamic_cast&>(select); #ifdef HIOP_DEEPCHECKS - assert(x.n_local_==z.n_local_); - assert( n_local_==z.n_local_); -#endif + assert(x.n_local_ == z.n_local_); + assert(n_local_ == z.n_local_); +#endif double* yd = data_dev_; const double* xd = x.local_data_const(); - const double* zd = z.local_data_const(); + const double* zd = z.local_data_const(); const double* id = sel.local_data_const(); - RAJA::forall< hiop_raja_exec >( RAJA::RangeSegment(0, n_local_), - RAJA_LAMBDA(RAJA::Index_type i) - { - assert(id[i] == one || id[i] == zero); - if(id[i] == one) - yd[i] += alpha * xd[i] / zd[i]; - }); + RAJA::forall( + RAJA::RangeSegment(0, n_local_), + RAJA_LAMBDA(RAJA::Index_type i) { + assert(id[i] == one || id[i] == zero); + if(id[i] == one) yd[i] += alpha * xd[i] / zd[i]; + }); } /** * @brief this[i] += c forall i - * + * */ template void hiopVectorRaja::addConstant(double c) { - double *yd = data_dev_; - RAJA::forall< hiop_raja_exec >( RAJA::RangeSegment(0, n_local_), - RAJA_LAMBDA(RAJA::Index_type i) - { - yd[i] += c; - }); + double* yd = data_dev_; + RAJA::forall(RAJA::RangeSegment(0, n_local_), RAJA_LAMBDA(RAJA::Index_type i) { yd[i] += c; }); } /** * @brief this[i] += c forall i with pattern selection - * + * * @pre `this` and `select` have same partitioning. * @post `select` is not modified */ @@ -1357,14 +1256,14 @@ void hiopVectorRaja::addConstant_w_patternSelect(double c, const hiopV { const hiopVectorRaja& sel = dynamic_cast&>(select); assert(this->n_local_ == sel.n_local_); - double *data = data_dev_; - const double *id = sel.local_data_const(); - RAJA::forall< hiop_raja_exec >( RAJA::RangeSegment(0, n_local_), - RAJA_LAMBDA(RAJA::Index_type i) - { - assert(id[i] == one || id[i] == zero); - data[i] += id[i]*c; - }); + double* data = data_dev_; + const double* id = sel.local_data_const(); + RAJA::forall( + RAJA::RangeSegment(0, n_local_), + RAJA_LAMBDA(RAJA::Index_type i) { + assert(id[i] == one || id[i] == zero); + data[i] += id[i] * c; + }); } /// Find minimum vector element @@ -1372,19 +1271,14 @@ template double hiopVectorRaja::min() const { double* data = data_dev_; - RAJA::ReduceMin< hiop_raja_reduce, double > minimum(std::numeric_limits::max()); - RAJA::forall< hiop_raja_exec >( - RAJA::RangeSegment(0, n_local_), - RAJA_LAMBDA(RAJA::Index_type i) - { - minimum.min(data[i]); - } - ); + RAJA::ReduceMin minimum(std::numeric_limits::max()); + RAJA::forall(RAJA::RangeSegment(0, n_local_), RAJA_LAMBDA(RAJA::Index_type i) { minimum.min(data[i]); }); double ret_val = minimum.get(); #ifdef HIOP_USE_MPI double ret_val_g; - int ierr=MPI_Allreduce(&ret_val, &ret_val_g, 1, MPI_DOUBLE, MPI_MIN, comm_); assert(MPI_SUCCESS==ierr); + int ierr = MPI_Allreduce(&ret_val, &ret_val_g, 1, MPI_DOUBLE, MPI_MIN, comm_); + assert(MPI_SUCCESS == ierr); ret_val = ret_val_g; #endif return ret_val; @@ -1398,22 +1292,21 @@ double hiopVectorRaja::min_w_pattern(const hiopVector& select) const assert(this->n_local_ == sel.n_local_); double* data = data_dev_; const double* id = sel.local_data_const(); - - RAJA::ReduceMin< hiop_raja_reduce, double > minimum(std::numeric_limits::max()); - RAJA::forall< hiop_raja_exec >( - RAJA::RangeSegment(0, n_local_), - RAJA_LAMBDA(RAJA::Index_type i) - { - if(id[i] == one) { - minimum.min(data[i]); - } - } - ); + + RAJA::ReduceMin minimum(std::numeric_limits::max()); + RAJA::forall( + RAJA::RangeSegment(0, n_local_), + RAJA_LAMBDA(RAJA::Index_type i) { + if(id[i] == one) { + minimum.min(data[i]); + } + }); double ret_val = minimum.get(); #ifdef HIOP_USE_MPI double ret_val_g; - int ierr=MPI_Allreduce(&ret_val, &ret_val_g, 1, MPI_DOUBLE, MPI_MIN, comm_); assert(MPI_SUCCESS==ierr); + int ierr = MPI_Allreduce(&ret_val, &ret_val_g, 1, MPI_DOUBLE, MPI_MIN, comm_); + assert(MPI_SUCCESS == ierr); ret_val = ret_val_g; #endif return ret_val; @@ -1421,33 +1314,29 @@ double hiopVectorRaja::min_w_pattern(const hiopVector& select) const /// Find minimum vector element template -void hiopVectorRaja::min( double& /* m */, int& /* index */) const +void hiopVectorRaja::min(double& /* m */, int& /* index */) const { assert(false && "not implemented"); } /** * @brief Negate all vector elements - * + * * @note Consider implementing with BLAS call (SCAL) */ template void hiopVectorRaja::negate() { double* data = data_dev_; - RAJA::forall< hiop_raja_exec >( RAJA::RangeSegment(0, n_local_), - RAJA_LAMBDA(RAJA::Index_type i) - { - data[i] *= -1; - }); + RAJA::forall(RAJA::RangeSegment(0, n_local_), RAJA_LAMBDA(RAJA::Index_type i) { data[i] *= -1; }); } /** * @brief Invert vector elements - * + * * @pre this[i] != 0 forall i * @post `this` is overwritten - * + * * @todo Consider having HiOp-wide `small_real` constant defined. */ template @@ -1458,24 +1347,24 @@ void hiopVectorRaja::invert() const double small_real = 1e-35; #endif #endif - double *data = data_dev_; - RAJA::forall< hiop_raja_exec >(RAJA::RangeSegment(0, n_local_), - RAJA_LAMBDA(RAJA::Index_type i) - { + double* data = data_dev_; + RAJA::forall( + RAJA::RangeSegment(0, n_local_), + RAJA_LAMBDA(RAJA::Index_type i) { #ifdef HIOP_DEEPCHECKS - assert(fabs(data[i]) > small_real); + assert(fabs(data[i]) > small_real); #endif - data[i] = one/data[i]; - }); + data[i] = one / data[i]; + }); } /** * @brief Sum all selected log(this[i]) - * + * * @pre `this` and `select` have same partitioning. * @pre Selected elements of `this` are > 0. * @post `this` and `select` are not modified - * + * * @warning This is local method only! */ template @@ -1486,17 +1375,15 @@ double hiopVectorRaja::logBarrier_local(const hiopVector& select) cons double* data = data_dev_; const double* id = sel.local_data_const(); - RAJA::ReduceSum< hiop_raja_reduce, double > sum(0.0); - RAJA::forall< hiop_raja_exec >( - RAJA::RangeSegment(0, n_local_), - RAJA_LAMBDA(RAJA::Index_type i) - { + RAJA::ReduceSum sum(0.0); + RAJA::forall( + RAJA::RangeSegment(0, n_local_), + RAJA_LAMBDA(RAJA::Index_type i) { #ifdef HIOP_DEEPCHECKS - assert(id[i] == one || id[i] == zero); + assert(id[i] == one || id[i] == zero); #endif - if(id[i] == one) - sum += std::log(data[i]); - }); + if(id[i] == one) sum += std::log(data[i]); + }); return sum.get(); } @@ -1508,32 +1395,24 @@ template double hiopVectorRaja::sum_local() const { double* data = data_dev_; - RAJA::ReduceSum< hiop_raja_reduce, double > sum(0.0); - RAJA::forall< hiop_raja_exec >( - RAJA::RangeSegment(0, n_local_), - RAJA_LAMBDA(RAJA::Index_type i) - { - sum += data[i]; - } - ); + RAJA::ReduceSum sum(0.0); + RAJA::forall(RAJA::RangeSegment(0, n_local_), RAJA_LAMBDA(RAJA::Index_type i) { sum += data[i]; }); return sum.get(); } /** - * @brief adds the gradient of the log barrier, namely this[i]=this[i]+alpha*1/select(x[i]) - * + * @brief adds the gradient of the log barrier, namely this[i]=this[i]+alpha*1/select(x[i]) + * * @pre `this`, `xvec` and `select` have same partitioning. * @pre xvec[i] != 0 forall i * @post `xvec` and `select` are not modified */ template -void hiopVectorRaja::addLogBarrierGrad(double alpha, - const hiopVector& xvec, - const hiopVector& select) +void hiopVectorRaja::addLogBarrierGrad(double alpha, const hiopVector& xvec, const hiopVector& select) { const hiopVectorRaja& x = dynamic_cast&>(xvec); - const hiopVectorRaja& sel = dynamic_cast&>(select); + const hiopVectorRaja& sel = dynamic_cast&>(select); #ifdef HIOP_DEEPCHECKS assert(n_local_ == x.n_local_); assert(n_local_ == sel.n_local_); @@ -1541,21 +1420,20 @@ void hiopVectorRaja::addLogBarrierGrad(double alpha, double* data = data_dev_; const double* xd = x.local_data_const(); const double* id = sel.local_data_const(); - RAJA::forall< hiop_raja_exec >( RAJA::RangeSegment(0, n_local_), - RAJA_LAMBDA(RAJA::Index_type i) - { - if (id[i] == 1.0) - data[i] += alpha/xd[i]; - }); + RAJA::forall( + RAJA::RangeSegment(0, n_local_), + RAJA_LAMBDA(RAJA::Index_type i) { + if(id[i] == 1.0) data[i] += alpha / xd[i]; + }); } /** * @brief Linear damping term - * + * * @pre `this`, `ixleft` and `ixright` have same partitioning. * @pre `ixleft` and `ixright` elements are 0 or 1 only. * @post `this`, `ixleft` and `ixright` are not modified - * + * * @warning This is local method only! */ template @@ -1565,7 +1443,7 @@ double hiopVectorRaja::linearDampingTerm_local(const hiopVector& ixlef const double& kappa_d) const { const hiopVectorRaja& ixl = dynamic_cast&>(ixleft); - const hiopVectorRaja& ixr = dynamic_cast&>(ixright); + const hiopVectorRaja& ixr = dynamic_cast&>(ixright); #ifdef HIOP_DEEPCHECKS assert(n_local_ == ixl.n_local_); assert(n_local_ == ixr.n_local_); @@ -1573,16 +1451,14 @@ double hiopVectorRaja::linearDampingTerm_local(const hiopVector& ixlef const double* ld = ixl.local_data_const(); const double* rd = ixr.local_data_const(); double* data = data_dev_; - RAJA::ReduceSum< hiop_raja_reduce, double > sum(zero); - RAJA::forall< hiop_raja_exec >( - RAJA::RangeSegment(0, n_local_), - RAJA_LAMBDA(RAJA::Index_type i) - { - if (ld[i] == one && rd[i] == zero) - sum += data[i]; - }); + RAJA::ReduceSum sum(zero); + RAJA::forall( + RAJA::RangeSegment(0, n_local_), + RAJA_LAMBDA(RAJA::Index_type i) { + if(ld[i] == one && rd[i] == zero) sum += data[i]; + }); double term = sum.get(); - term *= mu; + term *= mu; term *= kappa_d; return term; } @@ -1593,44 +1469,39 @@ void hiopVectorRaja::addLinearDampingTerm(const hiopVector& ixleft, const double& alpha, const double& ct) { - assert((dynamic_cast&>(ixleft)).n_local_ == n_local_); assert((dynamic_cast&>(ixright)).n_local_ == n_local_); - const double* ixl= (dynamic_cast&>(ixleft)).local_data_const(); - const double* ixr= (dynamic_cast&>(ixright)).local_data_const(); + const double* ixl = (dynamic_cast&>(ixleft)).local_data_const(); + const double* ixr = (dynamic_cast&>(ixright)).local_data_const(); double* data = data_dev_; - RAJA::forall< hiop_raja_exec >( RAJA::RangeSegment(0, n_local_), - RAJA_LAMBDA(RAJA::Index_type i) - { - // y := a * x + ... - data[i] = alpha * data[i] + ct*(ixl[i]-ixr[i]); - }); - + RAJA::forall( + RAJA::RangeSegment(0, n_local_), + RAJA_LAMBDA(RAJA::Index_type i) { + // y := a * x + ... + data[i] = alpha * data[i] + ct * (ixl[i] - ixr[i]); + }); } /** * @brief Check if all elements of the vector are positive - * + * * @post `this` is not modified */ template int hiopVectorRaja::allPositive() { double* data = data_dev_; - RAJA::ReduceMin< hiop_raja_reduce, double > minimum(one); - RAJA::forall< hiop_raja_exec >(RAJA::RangeSegment(0, n_local_), - RAJA_LAMBDA(RAJA::Index_type i) - { - minimum.min(data[i]); - }); + RAJA::ReduceMin minimum(one); + RAJA::forall(RAJA::RangeSegment(0, n_local_), RAJA_LAMBDA(RAJA::Index_type i) { minimum.min(data[i]); }); int allPos = minimum.get() > zero ? 1 : 0; #ifdef HIOP_USE_MPI int allPosG; - int ierr=MPI_Allreduce(&allPos, &allPosG, 1, MPI_INT, MPI_MIN, comm_); assert(MPI_SUCCESS==ierr); + int ierr = MPI_Allreduce(&allPos, &allPosG, 1, MPI_INT, MPI_MIN, comm_); + assert(MPI_SUCCESS == ierr); return allPosG; #endif return allPos; @@ -1638,15 +1509,15 @@ int hiopVectorRaja::allPositive() /** * @brief Project solution into bounds - * + * * @pre `this`, `xlo`, `ixl`, `xup` and `ixu` have same partitioning. * @pre `ixl` and `ixu` elements are 0 or 1 only. * @post `xlo`, `ixl`, `xup` and `ixu` are not modified - * + * * @warning This is local method only! */ template -bool hiopVectorRaja::projectIntoBounds_local(const hiopVector& xlo, +bool hiopVectorRaja::projectIntoBounds_local(const hiopVector& xlo, const hiopVector& ixl, const hiopVector& xup, const hiopVector& ixu, @@ -1669,63 +1540,53 @@ bool hiopVectorRaja::projectIntoBounds_local(const hiopVector& xlo, const double* ild = il.local_data_const(); const double* xud = xu.local_data_const(); const double* iud = iu.local_data_const(); - double* xd = data_dev_; + double* xd = data_dev_; // Perform preliminary check to see of all upper value - RAJA::ReduceMin< hiop_raja_reduce, double > minimum(one); - RAJA::forall< hiop_raja_exec >( RAJA::RangeSegment(0, n_local_), - RAJA_LAMBDA(RAJA::Index_type i) - { - minimum.min(xud[i] - xld[i]); - }); - if (minimum.get() < zero) - return false; + RAJA::ReduceMin minimum(one); + RAJA::forall( + RAJA::RangeSegment(0, n_local_), + RAJA_LAMBDA(RAJA::Index_type i) { minimum.min(xud[i] - xld[i]); }); + if(minimum.get() < zero) return false; const double small_real = std::numeric_limits::min() * 100; - RAJA::forall< hiop_raja_exec >( RAJA::RangeSegment(0, n_local_), - RAJA_LAMBDA(RAJA::Index_type i) - { - double aux = zero; - double aux2 = zero; - if(ild[i] != zero && iud[i] != zero) - { - aux = kappa2*(xud[i] - xld[i]) - small_real; - aux2 = xld[i] + fmin(kappa1 * fmax(one, fabs(xld[i])), aux); - if(xd[i] < aux2) - { - xd[i] = aux2; - } - else - { - aux2 = xud[i] - fmin(kappa1 * fmax(one, fabs(xud[i])), aux); - if(xd[i] > aux2) - { + RAJA::forall( + RAJA::RangeSegment(0, n_local_), + RAJA_LAMBDA(RAJA::Index_type i) { + double aux = zero; + double aux2 = zero; + if(ild[i] != zero && iud[i] != zero) { + aux = kappa2 * (xud[i] - xld[i]) - small_real; + aux2 = xld[i] + fmin(kappa1 * fmax(one, fabs(xld[i])), aux); + if(xd[i] < aux2) { xd[i] = aux2; + } else { + aux2 = xud[i] - fmin(kappa1 * fmax(one, fabs(xud[i])), aux); + if(xd[i] > aux2) { + xd[i] = aux2; + } } - } #ifdef HIOP_DEEPCHECKS - assert(xd[i] > xld[i] && xd[i] < xud[i] && "this should not happen -> HiOp bug"); + assert(xd[i] > xld[i] && xd[i] < xud[i] && "this should not happen -> HiOp bug"); #endif - } - else - { - if(ild[i] != zero) - xd[i] = fmax(xd[i], xld[i] + kappa1*fmax(one, fabs(xld[i])) - small_real); - else - if(iud[i] != zero) - xd[i] = fmin(xd[i], xud[i] - kappa1*fmax(one, fabs(xud[i])) - small_real); - else { /*nothing for free vars */ } - } - }); + } else { + if(ild[i] != zero) + xd[i] = fmax(xd[i], xld[i] + kappa1 * fmax(one, fabs(xld[i])) - small_real); + else if(iud[i] != zero) + xd[i] = fmin(xd[i], xud[i] - kappa1 * fmax(one, fabs(xud[i])) - small_real); + else { /*nothing for free vars */ + } + } + }); return true; } /** * @brief max{a\in(0,1]| x+ad >=(1-tau)x} - * + * * @pre `this` and `dvec` have same partitioning. * @post `this` and `dvec` are not modified - * + * * @warning This is local method only! */ template @@ -1735,38 +1596,37 @@ double hiopVectorRaja::fractionToTheBdry_local(const hiopVector& dvec, #ifdef HIOP_DEEPCHECKS assert(d.n_local_ == n_local_); assert(tau > 0); - assert(tau < 1); // TODO: per documentation above it should be tau <= 1 (?). + assert(tau < 1); // TODO: per documentation above it should be tau <= 1 (?). #endif const double* dd = d.local_data_const(); const double* xd = data_dev_; - RAJA::ReduceMin< hiop_raja_reduce, double > minimum(one); - RAJA::forall< hiop_raja_exec >( RAJA::RangeSegment(0, n_local_), - RAJA_LAMBDA(RAJA::Index_type i) - { - if(dd[i] >= zero) - return; + RAJA::ReduceMin minimum(one); + RAJA::forall( + RAJA::RangeSegment(0, n_local_), + RAJA_LAMBDA(RAJA::Index_type i) { + if(dd[i] >= zero) return; #ifdef HIOP_DEEPCHECKS - assert(xd[i] > zero); + assert(xd[i] > zero); #endif - minimum.min(-tau*xd[i]/dd[i]); - }); + minimum.min(-tau * xd[i] / dd[i]); + }); return minimum.get(); } /** * @brief max{a\in(0,1]| x+ad >=(1-tau)x} with pattern select - * + * * @pre `this`, `select` and `dvec` have same partitioning. * @pre Elements of `select` are either 0 or 1. * @post `this`, `select` and `dvec` are not modified - * + * * @warning This is local method only! */ template double hiopVectorRaja::fractionToTheBdry_w_pattern_local(const hiopVector& dvec, - const double& tau, + const double& tau, const hiopVector& select) const { const hiopVectorRaja& d = dynamic_cast&>(dvec); @@ -1775,32 +1635,31 @@ double hiopVectorRaja::fractionToTheBdry_w_pattern_local(const hiopVec #ifdef HIOP_DEEPCHECKS assert(d.n_local_ == n_local_); assert(s.n_local_ == n_local_); - assert(tau>0); - assert(tau<1); + assert(tau > 0); + assert(tau < 1); #endif const double* dd = d.local_data_const(); const double* xd = data_dev_; const double* id = s.local_data_const(); - RAJA::ReduceMin< hiop_raja_reduce, double > aux(one); - RAJA::forall< hiop_raja_exec >( RAJA::RangeSegment(0, n_local_), - RAJA_LAMBDA(RAJA::Index_type i) - { - assert(id[i] == one || id[i] == zero); - if(dd[i] < 0 && id[i] == one) - { + RAJA::ReduceMin aux(one); + RAJA::forall( + RAJA::RangeSegment(0, n_local_), + RAJA_LAMBDA(RAJA::Index_type i) { + assert(id[i] == one || id[i] == zero); + if(dd[i] < 0 && id[i] == one) { #ifdef HIOP_DEEPCHECKS - assert(xd[i] > 0); + assert(xd[i] > 0); #endif - aux.min(-tau*xd[i]/dd[i]); - } - }); + aux.min(-tau * xd[i] / dd[i]); + } + }); return aux.get(); } /** * @brief Set elements of `this` to zero based on `select`. - * + * * @pre `this` and `select` have same partitioning. * @pre Elements of `select` are either 0 or 1. * @post `select` is not modified @@ -1810,48 +1669,46 @@ void hiopVectorRaja::selectPattern(const hiopVector& select) { const hiopVectorRaja& s = dynamic_cast&>(select); #ifdef HIOP_DEEPCHECKS - assert(s.n_local_==n_local_); + assert(s.n_local_ == n_local_); #endif double* data = data_dev_; double* sd = s.data_dev_; - RAJA::forall< hiop_raja_exec >( RAJA::RangeSegment(0, n_local_), - RAJA_LAMBDA(RAJA::Index_type i) { - if(sd[i] == zero) - data[i] = zero; - }); + RAJA::forall( + RAJA::RangeSegment(0, n_local_), + RAJA_LAMBDA(RAJA::Index_type i) { + if(sd[i] == zero) data[i] = zero; + }); } /** * @brief Checks if `this` matches nonzero pattern of `select`. - * + * * @pre `this` and `select` have same partitioning. * @pre Elements of `select` are either 0 or 1. * @post `select` is not modified */ template bool hiopVectorRaja::matchesPattern(const hiopVector& pattern) -{ +{ const hiopVectorRaja& p = dynamic_cast&>(pattern); #ifdef HIOP_DEEPCHECKS - assert(p.n_local_==n_local_); + assert(p.n_local_ == n_local_); #endif double* data = data_dev_; double* pd = p.data_dev_; RAJA::ReduceSum sum(0); - RAJA::forall( RAJA::RangeSegment(0, n_local_), - RAJA_LAMBDA(RAJA::Index_type i) - { - sum += (data[i] != 0.0 && pd[i] == 0.0); - }); + RAJA::forall( + RAJA::RangeSegment(0, n_local_), + RAJA_LAMBDA(RAJA::Index_type i) { sum += (data[i] != 0.0 && pd[i] == 0.0); }); int mismatch = sum.get(); #ifdef HIOP_USE_MPI int mismatch_glob = mismatch; int ierr = MPI_Allreduce(&mismatch, &mismatch_glob, 1, MPI_INT, MPI_SUM, comm_); - assert(MPI_SUCCESS==ierr); + assert(MPI_SUCCESS == ierr); return (mismatch_glob == 0); #endif return (mismatch == 0); @@ -1859,7 +1716,7 @@ bool hiopVectorRaja::matchesPattern(const hiopVector& pattern) /** * @brief Checks if selected elements of `this` are positive. - * + * * @pre `this` and `select` have same partitioning. * @pre Elements of `select` are either 0 or 1. * @post `select` is not modified @@ -1871,100 +1728,99 @@ int hiopVectorRaja::allPositive_w_patternSelect(const hiopVector& sele #ifdef HIOP_DEEPCHECKS assert(w.n_local_ == n_local_); -#endif +#endif const double* wd = w.local_data_const(); const double* data = data_dev_; - RAJA::ReduceSum< hiop_raja_reduce, int > sum(0); - RAJA::forall< hiop_raja_exec >( RAJA::RangeSegment(0, n_local_), - RAJA_LAMBDA(RAJA::Index_type i) - { - if(wd[i] != zero && data[i] <= zero) - sum += 1; - }); + RAJA::ReduceSum sum(0); + RAJA::forall( + RAJA::RangeSegment(0, n_local_), + RAJA_LAMBDA(RAJA::Index_type i) { + if(wd[i] != zero && data[i] <= zero) sum += 1; + }); int allPos = (sum.get() == 0); - + #ifdef HIOP_USE_MPI int allPosG; int ierr = MPI_Allreduce(&allPos, &allPosG, 1, MPI_INT, MPI_MIN, comm_); - assert(MPI_SUCCESS==ierr); + assert(MPI_SUCCESS == ierr); return allPosG; -#endif +#endif return allPos; } /** * @brief Adjusts duals. - * + * * @pre `this`, `xvec` and `ixvec` have same partitioning. * @pre Elements of `ixvec` are either 0 or 1. * @post `xvec` and `ixvec` are not modified - * + * * @note Implementation probably inefficient. */ template -void hiopVectorRaja::adjustDuals_plh(const hiopVector& xvec, +void hiopVectorRaja::adjustDuals_plh(const hiopVector& xvec, const hiopVector& ixvec, const double& mu, const double& kappa) { - const hiopVectorRaja& x = dynamic_cast&>(xvec) ; + const hiopVectorRaja& x = dynamic_cast&>(xvec); const hiopVectorRaja& ix = dynamic_cast&>(ixvec); #ifdef HIOP_DEEPCHECKS - assert(x.n_local_==n_local_); - assert(ix.n_local_==n_local_); + assert(x.n_local_ == n_local_); + assert(ix.n_local_ == n_local_); #endif - const double* xd = x.local_data_const(); + const double* xd = x.local_data_const(); const double* id = ix.local_data_const(); - double* z = data_dev_; //the dual + double* z = data_dev_; // the dual - RAJA::forall< hiop_raja_exec >( RAJA::RangeSegment(0, n_local_), - RAJA_LAMBDA(RAJA::Index_type i) - { - double a,b; - // preemptive loop to reduce number of iterations? - if(id[i] == 1.) { + RAJA::forall( + RAJA::RangeSegment(0, n_local_), + RAJA_LAMBDA(RAJA::Index_type i) { + double a, b; + // preemptive loop to reduce number of iterations? + if(id[i] == 1.) { // precompute a and b in another loop? - a = mu/xd[i]; - b = a/kappa; - a = a*kappa; + a = mu / xd[i]; + b = a / kappa; + a = a * kappa; // Necessary conditionals - if(z[i]=b - if(a<=b) - z[i]=b; - else //a>b - if(a=z[i] then *z=*z (z[i] does not need adjustment) - } - }); + if(z[i] < b) + z[i] = b; + else // z[i]>=b + if(a <= b) + z[i] = b; + else // a>b + if(a < z[i]) z[i] = a; + // - - - - + // else a>=z[i] then *z=*z (z[i] does not need adjustment) + } + }); } /** * @brief Check if all elements of the vector are zero - * + * * @post `this` is not modified */ template bool hiopVectorRaja::is_zero() const { double* data = data_dev_; - RAJA::ReduceSum< hiop_raja_reduce, int > sum(0); - RAJA::forall< hiop_raja_exec >( RAJA::RangeSegment(0, n_local_), - RAJA_LAMBDA(RAJA::Index_type i) - { - if(data[i] != 0.0) { - sum += 1; - } - }); + RAJA::ReduceSum sum(0); + RAJA::forall( + RAJA::RangeSegment(0, n_local_), + RAJA_LAMBDA(RAJA::Index_type i) { + if(data[i] != 0.0) { + sum += 1; + } + }); int all_zero = (sum.get() == 0) ? 1 : 0; #ifdef HIOP_USE_MPI int all_zero_G; - int ierr=MPI_Allreduce(&all_zero, &all_zero_G, 1, MPI_INT, MPI_MIN, comm_); assert(MPI_SUCCESS==ierr); + int ierr = MPI_Allreduce(&all_zero, &all_zero_G, 1, MPI_INT, MPI_MIN, comm_); + assert(MPI_SUCCESS == ierr); return all_zero_G; #endif return all_zero; @@ -1972,114 +1828,105 @@ bool hiopVectorRaja::is_zero() const /** * @brief Returns true if any element of `this` is NaN. - * + * * @post `this` is not modified - * + * * @warning This is local method only! */ template bool hiopVectorRaja::isnan_local() const { double* data = data_dev_; - RAJA::ReduceSum< hiop_raja_reduce, int > any(0); - RAJA::forall< hiop_raja_exec >( RAJA::RangeSegment(0, n_local_), - RAJA_LAMBDA(RAJA::Index_type i) - { - if(std::isnan(data[i])) - any += 1; - }); + RAJA::ReduceSum any(0); + RAJA::forall( + RAJA::RangeSegment(0, n_local_), + RAJA_LAMBDA(RAJA::Index_type i) { + if(std::isnan(data[i])) any += 1; + }); return any.get(); } /** * @brief Returns true if any element of `this` is Inf. - * + * * @post `this` is not modified - * + * * @warning This is local method only! */ template bool hiopVectorRaja::isinf_local() const { double* data = data_dev_; - RAJA::ReduceSum< hiop_raja_reduce, int > any(0); - RAJA::forall< hiop_raja_exec >( RAJA::RangeSegment(0, n_local_), - RAJA_LAMBDA(RAJA::Index_type i) - { - if(std::isinf(data[i])) - any += 1; - }); + RAJA::ReduceSum any(0); + RAJA::forall( + RAJA::RangeSegment(0, n_local_), + RAJA_LAMBDA(RAJA::Index_type i) { + if(std::isinf(data[i])) any += 1; + }); return any.get(); } /** * @brief Returns true if all elements of `this` are finite. - * + * * @post `this` is not modified - * + * * @warning This is local method only! */ template bool hiopVectorRaja::isfinite_local() const { double* data = data_dev_; - RAJA::ReduceMin< hiop_raja_reduce, int > smallest(1); - RAJA::forall< hiop_raja_exec >( RAJA::RangeSegment(0, n_local_), - RAJA_LAMBDA(RAJA::Index_type i) - { - if(!std::isfinite(data[i])) - smallest.min(0); - }); + RAJA::ReduceMin smallest(1); + RAJA::forall( + RAJA::RangeSegment(0, n_local_), + RAJA_LAMBDA(RAJA::Index_type i) { + if(!std::isfinite(data[i])) smallest.min(0); + }); return smallest.get(); } /** * @brief Prints vector data to a file in Matlab format. - * + * * @pre Vector data was moved from the memory space to the host mirror. */ template -void hiopVectorRaja:: -print(FILE* file, const char* msg/*=NULL*/, int max_elems/*=-1*/, int rank/*=-1*/) const +void hiopVectorRaja::print(FILE* file, const char* msg /*=NULL*/, int max_elems /*=-1*/, int rank /*=-1*/) const { - int myrank=0, numranks=1; + int myrank = 0, numranks = 1; #ifdef HIOP_USE_MPI if(rank >= 0) { - int err = MPI_Comm_rank(comm_, &myrank); assert(err==MPI_SUCCESS); - err = MPI_Comm_size(comm_, &numranks); assert(err==MPI_SUCCESS); + int err = MPI_Comm_rank(comm_, &myrank); + assert(err == MPI_SUCCESS); + err = MPI_Comm_size(comm_, &numranks); + assert(err == MPI_SUCCESS); } #endif - if(nullptr==file) { + if(nullptr == file) { file = stdout; } - - if(myrank == rank || rank == -1) - { - if(max_elems>n_local_) - max_elems=n_local_; - if(NULL==msg) - { + if(myrank == rank || rank == -1) { + if(max_elems > n_local_) max_elems = n_local_; + + if(NULL == msg) { std::stringstream ss; ss << "vector of size " << n_ << ", printing " << max_elems << " elems "; - if(numranks>1) { + if(numranks > 1) { ss << "(on rank=" << myrank << ")"; - } - else { + } else { ss << "(serial)"; } ss << "\n"; fprintf(file, "%s", ss.str().c_str()); - } - else - { + } else { fprintf(file, "%s ", msg); - } + } fprintf(file, "=["); max_elems = max_elems >= 0 ? max_elems : n_local_; - for(int it=0; it void hiopVectorRaja::print() const { - auto* inst = const_cast* >(this); + auto* inst = const_cast*>(this); assert(nullptr != inst); inst->copyFromDev(); - for(index_type it=0; it::print() const template void hiopVectorRaja::copyToDev() { - if(data_dev_ == data_host_) - return; + if(data_dev_ == data_host_) return; assert(exec_space_.mem_backend().is_device() && "should have data_dev_==data_host_"); exec_space_.copy(data_dev_, data_host_, n_local_, exec_space_host_); } @@ -2108,21 +1954,16 @@ void hiopVectorRaja::copyToDev() template void hiopVectorRaja::copyFromDev() { - if(data_dev_ == data_host_) - return; + if(data_dev_ == data_host_) return; exec_space_host_.copy(data_host_, data_dev_, n_local_, exec_space_); } template -size_type hiopVectorRaja::numOfElemsLessThan(const double &val) const -{ +size_type hiopVectorRaja::numOfElemsLessThan(const double& val) const +{ double* data = data_dev_; RAJA::ReduceSum sum(0); - RAJA::forall( RAJA::RangeSegment(0, n_local_), - RAJA_LAMBDA(RAJA::Index_type i) - { - sum += (data[i](RAJA::RangeSegment(0, n_local_), RAJA_LAMBDA(RAJA::Index_type i) { sum += (data[i] < val); }); size_type nrm = sum.get(); @@ -2137,15 +1978,13 @@ size_type hiopVectorRaja::numOfElemsLessThan(const double &val) const } template -size_type hiopVectorRaja::numOfElemsAbsLessThan(const double &val) const -{ +size_type hiopVectorRaja::numOfElemsAbsLessThan(const double& val) const +{ double* data = data_dev_; RAJA::ReduceSum sum(0); - RAJA::forall( RAJA::RangeSegment(0, n_local_), - RAJA_LAMBDA(RAJA::Index_type i) - { - sum += static_cast(fabs(data[i]) < val); - }); + RAJA::forall( + RAJA::RangeSegment(0, n_local_), + RAJA_LAMBDA(RAJA::Index_type i) { sum += static_cast(fabs(data[i]) < val); }); size_type nrm = sum.get(); @@ -2160,47 +1999,34 @@ size_type hiopVectorRaja::numOfElemsAbsLessThan(const double &val) con } template -void hiopVectorRaja::set_array_from_to(hiopInterfaceBase::NonlinearityType* arr, - const int start, - const int end, +void hiopVectorRaja::set_array_from_to(hiopInterfaceBase::NonlinearityType* arr, + const int start, + const int end, const hiopInterfaceBase::NonlinearityType* arr_src, const int start_src) const { assert(end <= n_local_ && start <= end && start >= 0 && start_src >= 0); // If there is nothing to copy, return. - if(end - start == 0) - return; - - RAJA::forall< hiop_raja_exec >( - RAJA::RangeSegment(0, end-start), - RAJA_LAMBDA(RAJA::Index_type i) - { - arr[start+i] = arr_src[start_src+i]; - } - ); + if(end - start == 0) return; + RAJA::forall( + RAJA::RangeSegment(0, end - start), + RAJA_LAMBDA(RAJA::Index_type i) { arr[start + i] = arr_src[start_src + i]; }); } template -void hiopVectorRaja::set_array_from_to(hiopInterfaceBase::NonlinearityType* arr, - const int start, - const int end, +void hiopVectorRaja::set_array_from_to(hiopInterfaceBase::NonlinearityType* arr, + const int start, + const int end, const hiopInterfaceBase::NonlinearityType arr_src) const { assert(end <= n_local_ && start <= end && start >= 0); // If there is nothing to copy, return. - if(end - start == 0) - return; + if(end - start == 0) return; - RAJA::forall< hiop_raja_exec >( - RAJA::RangeSegment(start, end), - RAJA_LAMBDA(RAJA::Index_type i) - { - arr[i] = arr_src; - } - ); + RAJA::forall(RAJA::RangeSegment(start, end), RAJA_LAMBDA(RAJA::Index_type i) { arr[i] = arr_src; }); } template @@ -2209,27 +2035,27 @@ bool hiopVectorRaja::is_equal(const hiopVector& vec) const #ifdef HIOP_DEEPCHECKS const hiopVectorRaja& v = dynamic_cast&>(vec); assert(v.n_local_ == n_local_); -#endif +#endif const double* data_v = vec.local_data_const(); const double* data = data_dev_; - RAJA::ReduceSum< hiop_raja_reduce, int > sum(0); - RAJA::forall< hiop_raja_exec >( RAJA::RangeSegment(0, n_local_), - RAJA_LAMBDA(RAJA::Index_type i) - { - if(data[i]!=data_v[i]) { - sum += 1; - } - }); + RAJA::ReduceSum sum(0); + RAJA::forall( + RAJA::RangeSegment(0, n_local_), + RAJA_LAMBDA(RAJA::Index_type i) { + if(data[i] != data_v[i]) { + sum += 1; + } + }); int all_equal = (sum.get() == 0); - + #ifdef HIOP_USE_MPI int all_equalG; int ierr = MPI_Allreduce(&all_equal, &all_equalG, 1, MPI_INT, MPI_MIN, comm_); - assert(MPI_SUCCESS==ierr); + assert(MPI_SUCCESS == ierr); return all_equalG; -#endif +#endif return all_equal; } -} // namespace hiop +} // namespace hiop diff --git a/src/LinAlg/hiopVectorRajaOmp.cpp b/src/LinAlg/hiopVectorRajaOmp.cpp index 6cef78136..525ff92d4 100644 --- a/src/LinAlg/hiopVectorRajaOmp.cpp +++ b/src/LinAlg/hiopVectorRajaOmp.cpp @@ -3,12 +3,11 @@ #include "MemBackendUmpireImpl.hpp" #include "ExecPoliciesRajaOmpImpl.hpp" - namespace hiop { using hiop_raja_exec = ExecRajaPoliciesBackend::hiop_raja_exec; using hiop_raja_reduce = ExecRajaPoliciesBackend::hiop_raja_reduce; -} +} // namespace hiop #include "hiopVectorRajaImpl.hpp" #include "MathKernelsHost.hpp" @@ -16,19 +15,21 @@ using hiop_raja_reduce = ExecRajaPoliciesBackend::hiop_raja_r namespace hiop { -template<> void hiopVectorRaja::set_to_random_uniform(double minv, double maxv) +template<> +void hiopVectorRaja::set_to_random_uniform(double minv, double maxv) { hiop::host::array_random_uniform_kernel(n_local_, data_dev_, minv, maxv); } -template<> void hiopVectorRaja::set_to_random_uniform(double minv, double maxv) +template<> +void hiopVectorRaja::set_to_random_uniform(double minv, double maxv) { hiop::host::array_random_uniform_kernel(n_local_, data_dev_, minv, maxv); } // -//Explicit instantiations: force compilation +// Explicit instantiations: force compilation // -template class hiopVectorRaja; -template class hiopVectorRaja; -} +template class hiopVectorRaja; +template class hiopVectorRaja; +} // namespace hiop diff --git a/src/LinAlg/hiop_blasdefs.hpp b/src/LinAlg/hiop_blasdefs.hpp index dacdbd486..2ebaf597a 100644 --- a/src/LinAlg/hiop_blasdefs.hpp +++ b/src/LinAlg/hiop_blasdefs.hpp @@ -3,59 +3,85 @@ #include "FortranCInterface.hpp" -#define DDOT FC_GLOBAL(ddot, DDOT) -#define DNRM2 FC_GLOBAL(dnrm2, DNRM2) -#define DSCAL FC_GLOBAL(dscal, DSCAL) -#define ZSCAL FC_GLOBAL(zscal, ZSCAL) -#define DAXPY FC_GLOBAL(daxpy, DAXPY) -#define ZAXPY FC_GLOBAL(zaxpy, ZAXPY) -#define DCOPY FC_GLOBAL(dcopy, DCOPY) -#define DGEMV FC_GLOBAL(dgemv, DGEMV) -#define ZGEMV FC_GLOBAL(zgemv, ZGEMV) -#define DGEMM FC_GLOBAL(dgemm, DGEMM) -#define DTRSM FC_GLOBAL(dtrsm, DTRSM) -#define DPOTRF FC_GLOBAL(dpotrf, DPOTRF) -#define DPOTRS FC_GLOBAL(dpotrs, DPOTRS) -#define DSYTRF FC_GLOBAL(dsytrf, DSYTRF) -#define DSYTRS FC_GLOBAL(dsytrs, DSYTRS) -#define DLANGE FC_GLOBAL(dlange, DLANGE) -#define ZLANGE FC_GLOBAL(zlange, ZLANGE) -#define DPOSVX FC_GLOBAL(dposvx, DPOSVC) +#define DDOT FC_GLOBAL(ddot, DDOT) +#define DNRM2 FC_GLOBAL(dnrm2, DNRM2) +#define DSCAL FC_GLOBAL(dscal, DSCAL) +#define ZSCAL FC_GLOBAL(zscal, ZSCAL) +#define DAXPY FC_GLOBAL(daxpy, DAXPY) +#define ZAXPY FC_GLOBAL(zaxpy, ZAXPY) +#define DCOPY FC_GLOBAL(dcopy, DCOPY) +#define DGEMV FC_GLOBAL(dgemv, DGEMV) +#define ZGEMV FC_GLOBAL(zgemv, ZGEMV) +#define DGEMM FC_GLOBAL(dgemm, DGEMM) +#define DTRSM FC_GLOBAL(dtrsm, DTRSM) +#define DPOTRF FC_GLOBAL(dpotrf, DPOTRF) +#define DPOTRS FC_GLOBAL(dpotrs, DPOTRS) +#define DSYTRF FC_GLOBAL(dsytrf, DSYTRF) +#define DSYTRS FC_GLOBAL(dsytrs, DSYTRS) +#define DLANGE FC_GLOBAL(dlange, DLANGE) +#define ZLANGE FC_GLOBAL(zlange, ZLANGE) +#define DPOSVX FC_GLOBAL(dposvx, DPOSVC) #define DPOSVXX FC_GLOBAL(dposvxx, DPOSVXX) namespace hiop { -//#ifdef __cplusplus +// #ifdef __cplusplus extern "C" { -//#endif - typedef struct { - double re, im; - } dcomplex; -//#ifdef __cplusplus +// #endif +typedef struct +{ + double re, im; +} dcomplex; +// #ifdef __cplusplus } -//#endif +// #endif - extern "C" double DNRM2(int* n, double* x, int* incx); extern "C" double DDOT(int* n, double* dx, int* incx, double* dy, int* incy); -extern "C" void DSCAL(int* n, double* da, double* dx, int* incx); -extern "C" void ZSCAL(int* n, dcomplex* da, dcomplex* dx, int* incx); -extern "C" void DAXPY(int* n, double* da, double* dx, int* incx, double* dy, int* incy ); -extern "C" void ZAXPY(int* n, dcomplex* da, dcomplex* dx, int* incx, dcomplex* dy, int* incy ); -extern "C" void DCOPY(int* n, double* da, int* incx, double* dy, int* incy); -extern "C" void DGEMV(char* trans, int* m, int* n, double* alpha, double* a, int* lda, - const double* x, int* incx, double* beta, double* y, int* incy ); -extern "C" void ZGEMV(char* trans, int* m, int* n, dcomplex* alpha, dcomplex* a, int* lda, - const dcomplex* x, int* incx, dcomplex* beta, dcomplex* y, int* incy ); +extern "C" void DSCAL(int* n, double* da, double* dx, int* incx); +extern "C" void ZSCAL(int* n, dcomplex* da, dcomplex* dx, int* incx); +extern "C" void DAXPY(int* n, double* da, double* dx, int* incx, double* dy, int* incy); +extern "C" void ZAXPY(int* n, dcomplex* da, dcomplex* dx, int* incx, dcomplex* dy, int* incy); +extern "C" void DCOPY(int* n, double* da, int* incx, double* dy, int* incy); +extern "C" void DGEMV(char* trans, + int* m, + int* n, + double* alpha, + double* a, + int* lda, + const double* x, + int* incx, + double* beta, + double* y, + int* incy); +extern "C" void ZGEMV(char* trans, + int* m, + int* n, + dcomplex* alpha, + dcomplex* a, + int* lda, + const dcomplex* x, + int* incx, + dcomplex* beta, + dcomplex* y, + int* incy); /* C := alpha*op( A )*op( B ) + beta*C * op( A ) an m by k matrix, op( B ) a k by n matrix and C an m by n matrix */ -extern "C" void DGEMM(char* transA, char* transB, int* m, int* n, int* k, - double* alpha, double* a, int* lda, - double* b, int* ldb, - double* beta, double* C, int*ldc); - +extern "C" void DGEMM(char* transA, + char* transB, + int* m, + int* n, + int* k, + double* alpha, + double* a, + int* lda, + double* b, + int* ldb, + double* beta, + double* C, + int* ldc); /* op( A )*X = alpha*B, or X*op( A ) = alpha*B, * where alpha is a scalar, X and B are m by n matrices, A is a unit, or @@ -64,29 +90,32 @@ extern "C" void DGEMM(char* transA, char* transB, int* m, int* n, int* k, * * The matrix X is overwritten on B. */ -//!opt DTPTRS packed format triangular solve -extern "C" void DTRSM(char* side, char* uplo, char* transA, char* diag, - int* M, int* N, - double* alpha, - const double* a, int* lda, - double* b, int* ldb); +//! opt DTPTRS packed format triangular solve +extern "C" void DTRSM(char* side, + char* uplo, + char* transA, + char* diag, + int* M, + int* N, + double* alpha, + const double* a, + int* lda, + double* b, + int* ldb); /* Cholesky factorization of a real symmetric positive definite matrix A. * The factorization has the form * A = U**T * U, if UPLO = 'U', or A = L * L**T, if UPLO = 'L', * where U is an upper triangular matrix and L is lower triangular. */ -extern "C" void DPOTRF(char* uplo, int* N, double* A, int* lda, int* info); +extern "C" void DPOTRF(char* uplo, int* N, double* A, int* lda, int* info); /* solves a system of linear equations A*X = B with a symmetric * positive definite matrix A using the Cholesky factorization * A = U**T*U or A = L*L**T computed by DPOTRF * A contains the triangular factor U or L -*/ -extern "C" void DPOTRS(char* uplo, int* N, int* NRHS, - double*A, int* lda, - double* B, int* ldb, - int* info); + */ +extern "C" void DPOTRS(char* uplo, int* N, int* NRHS, double* A, int* lda, double* B, int* ldb, int* info); /* DSYTRF computes the factorization of a real symmetric matrix A using * the Bunch-Kaufman diagonal pivoting method. The form of the @@ -98,7 +127,7 @@ extern "C" void DPOTRS(char* uplo, int* N, int* NRHS, * * This is the blocked version of the algorithm, calling Level 3 BLAS. */ -extern "C" void DSYTRF( char* UPLO, int* N, double* A, int* LDA, int* IPIV, double* WORK, int* LWORK, int* INFO ); +extern "C" void DSYTRF(char* UPLO, int* N, double* A, int* LDA, int* IPIV, double* WORK, int* LWORK, int* INFO); /* DSYTRS solves a system of linear equations A*X = B with a real * symmetric matrix A using the factorization A = U*D*U**T or @@ -106,14 +135,14 @@ extern "C" void DSYTRF( char* UPLO, int* N, double* A, int* LDA, int* IPIV, doub * * To improve the solution using LAPACK one needs to use DSYRFS. */ -extern "C" void DSYTRS( char* UPLO, int* N, int* NRHS, double* A, int* LDA, int* IPIV, double*B, int* LDB, int* INFO ); +extern "C" void DSYTRS(char* UPLO, int* N, int* NRHS, double* A, int* LDA, int* IPIV, double* B, int* LDB, int* INFO); /* returns the value of the one norm, or the Frobenius norm, or * the infinity norm, or the element of largest absolute value of a * real matrix A. */ extern "C" double DLANGE(char* norm, int* M, int* N, double* A, int* lda, double* work); -extern "C" double ZLANGE(char* norm, int* M, int* N, dcomplex* A, int* lda, double* work); +extern "C" double ZLANGE(char* norm, int* M, int* N, dcomplex* A, int* lda, double* work); /* DPOSVX uses the Cholesky factorization A = U**T*U or A = L*L**T to compute the solution to a real system of linear equations @@ -124,16 +153,26 @@ extern "C" double ZLANGE(char* norm, int* M, int* N, dcomplex* A, int* lda, dou Error bounds on the solution and a condition estimate are also provided. */ -extern "C" void DPOSVX(char* FACT, char* UPLO, int* N, int* NRHS, - double* A, int* LDA, - double* AF, int* LDAF, - char* EQUED, - double* S, - double* B, int* LDB, - double* X, int* LDX, - double* RCOND, double* FERR, double* BERR, - double* WORK, int* IWORK, - int* INFO); +extern "C" void DPOSVX(char* FACT, + char* UPLO, + int* N, + int* NRHS, + double* A, + int* LDA, + double* AF, + int* LDAF, + char* EQUED, + double* S, + double* B, + int* LDB, + double* X, + int* LDX, + double* RCOND, + double* FERR, + double* BERR, + double* WORK, + int* IWORK, + int* INFO); /* DPOSVXX uses the Cholesky factorization A = U**T*U or A = L*L**T to compute the solution to a double precision system of linear equations A * X = B, where A is an N-by-N symmetric positive definite matrix @@ -154,18 +193,31 @@ extern "C" void DPOSVX(char* FACT, char* UPLO, int* N, int* NRHS, user-provided factorizations and equilibration factors if they differ from what DPOSVXX would itself produce. */ -extern "C" void DPOSVXX(char* FACT, char* UPLO, int* N, int* NRHS, - double* A, int* LDA, - double* AF, int* LDAF, - char* EQUED, - double* S, - double* B, int* LDB, - double* X, int* LDX, - double* RCOND, double* RPVGRW, double* BERR, - int* N_ERR_BNDS, double* ERR_BNDS_NORM, double* ERR_BNDS_COMP, - int* NPARAMS, double* PARAMS, - double* WORK, int* IWORK, - int* INFO); - -}; +extern "C" void DPOSVXX(char* FACT, + char* UPLO, + int* N, + int* NRHS, + double* A, + int* LDA, + double* AF, + int* LDAF, + char* EQUED, + double* S, + double* B, + int* LDB, + double* X, + int* LDX, + double* RCOND, + double* RPVGRW, + double* BERR, + int* N_ERR_BNDS, + double* ERR_BNDS_NORM, + double* ERR_BNDS_COMP, + int* NPARAMS, + double* PARAMS, + double* WORK, + int* IWORK, + int* INFO); + +}; // namespace hiop #endif diff --git a/src/LinAlg/test_hiopLinalgComplex.cpp b/src/LinAlg/test_hiopLinalgComplex.cpp index 15171289f..a28e1c027 100644 --- a/src/LinAlg/test_hiopLinalgComplex.cpp +++ b/src/LinAlg/test_hiopLinalgComplex.cpp @@ -8,142 +8,136 @@ using namespace hiop; int main() { bool all_tests_ok = true; - { //TEST sparse complex matrix - //this is a rectangular matrix to test with - // [ 1+i 0 0 1-i 0 ] - // [ 0 0 i 0 1 ] - // [ 2-i 0 0 1.5-0.5i 0.5-0.5i ] + { // TEST sparse complex matrix + // this is a rectangular matrix to test with + // [ 1+i 0 0 1-i 0 ] + // [ 0 0 i 0 1 ] + // [ 2-i 0 0 1.5-0.5i 0.5-0.5i ] // - int m=3, n=5; + int m = 3, n = 5; int Mrow[] = {0, 0, 1, 1, 2, 2, 2}; int Mcol[] = {0, 3, 2, 4, 0, 3, 4}; - std::complex Mval[] = {{1,1}, {1,-1}, {0,1}, {1,0}, {2,-1}, {1.5,-0.5}, {0.5, -0.5}}; - + std::complex Mval[] = {{1, 1}, {1, -1}, {0, 1}, {1, 0}, {2, -1}, {1.5, -0.5}, {0.5, -0.5}}; + int nnz = sizeof(Mrow) / sizeof(Mrow[0]); assert(nnz == sizeof Mcol / sizeof Mcol[0]); assert(nnz == sizeof Mval / sizeof Mval[0]); - - hiopMatrixComplexSparseTriplet mat(m,n,nnz); + + hiopMatrixComplexSparseTriplet mat(m, n, nnz); mat.copyFrom(Mrow, Mcol, Mval); - - //test1 + + // test1 double abs_nrm = mat.max_abs_value(); - double diff = std::fabs(abs_nrm-2.23606797749979); - if(diff>1e-12) { + double diff = std::fabs(abs_nrm - 2.23606797749979); + if(diff > 1e-12) { printf("error: max_abs_value did not return the correct value. Difference: %6.3e\n", diff); - all_tests_ok=false; + all_tests_ok = false; } - //mat.print(); - + // mat.print(); + mat.storage()->sort_indexes(); - //mat.print(); - + // mat.print(); + mat.storage()->sum_up_duplicates(); double abs_nrm2 = mat.max_abs_value(); - diff = std::fabs(abs_nrm2-abs_nrm); - if(diff>1e-15) { + diff = std::fabs(abs_nrm2 - abs_nrm); + if(diff > 1e-15) { printf("error: postprocessing check failed\n"); - all_tests_ok=false; + all_tests_ok = false; } - //slicing -> row and cols idxs need to be sorted + // slicing -> row and cols idxs need to be sorted std::vector rows = {1}, cols = {1, 2}; auto* subMat = mat.new_slice(rows.data(), rows.size(), cols.data(), cols.size()); - //subMat->print(); + // subMat->print(); if(subMat->numberOfNonzeros() != 1) { printf("error: new_slice did not return the correct nnz.\n"); - all_tests_ok=false; + all_tests_ok = false; } delete subMat; - rows = {1}; cols = {1, 2}; + rows = {1}; + cols = {1, 2}; subMat = mat.new_slice(rows.data(), rows.size(), cols.data(), cols.size()); abs_nrm = subMat->max_abs_value(); - diff = std::fabs(abs_nrm-1.0); - if(diff>1e-12) { - printf("error: check of 'new_slice' failed. Difference: %6.3e [should be %20.16e]\n", - diff, abs_nrm); - all_tests_ok=false; + diff = std::fabs(abs_nrm - 1.0); + if(diff > 1e-12) { + printf("error: check of 'new_slice' failed. Difference: %6.3e [should be %20.16e]\n", diff, abs_nrm); + all_tests_ok = false; } delete subMat; } - { //TEST dense complex matrix - hiopMatrixComplexDense mat(3,4); + { // TEST dense complex matrix + hiopMatrixComplexDense mat(3, 4); std::complex** M = mat.get_M(); - for(int i=0; i(i,j); - - //hiopMatrixComplexDense mat2(3,4); - //std::complex** M2 = mat2.get_M(); - //for(int i=0; i(i, j); + + // hiopMatrixComplexDense mat2(3,4); + // std::complex** M2 = mat2.get_M(); + // for(int i=0; i(i,-2.*j); - //mat.print(); - //mat2.print(); - //mat2.addMatrix(std::complex(1.,0), mat); + // mat.print(); + // mat2.print(); + // mat2.addMatrix(std::complex(1.,0), mat); - //mat2.print(); + // mat2.print(); - //test1 + // test1 double abs_nrm = mat.max_abs_value(); - double diff = std::fabs(abs_nrm-3.605551275463989); - if(diff>1e-12) { - printf("error: max_abs_value did not return the correct value. Difference: %6.3e [should be %20.16e]\n", - diff, abs_nrm); + double diff = std::fabs(abs_nrm - 3.605551275463989); + if(diff > 1e-12) { + printf("error: max_abs_value did not return the correct value. Difference: %6.3e [should be %20.16e]\n", + diff, + abs_nrm); return -1; } } - //test for sparse complex symmetric + // test for sparse complex symmetric { - int n=5; + int n = 5; int Mrow[] = {0, 0, 0, 1, 2, 2, 3, 3, 4}; int Mcol[] = {0, 2, 3, 2, 2, 3, 3, 4, 4}; - std::complex Mval[] = {{1,1}, {1,-1}, {0.001,3}, - {1,2}, - {2,2}, {2,3.333}, - {3,3}, {3,4}, - {4,4}}; + std::complex Mval[] = {{1, 1}, {1, -1}, {0.001, 3}, {1, 2}, {2, 2}, {2, 3.333}, {3, 3}, {3, 4}, {4, 4}}; int nnz = sizeof(Mrow) / sizeof(Mrow[0]); assert(nnz == sizeof Mcol / sizeof Mcol[0]); assert(nnz == sizeof Mval / sizeof Mval[0]); - hiopMatrixComplexSparseTriplet mat(n,n,nnz); + hiopMatrixComplexSparseTriplet mat(n, n, nnz); mat.copyFrom(Mrow, Mcol, Mval); - //mat.print(); + // mat.print(); - std::vector idxs = {1,2,4}; + std::vector idxs = {1, 2, 4}; hiopMatrixComplexSparseTriplet* submat_sym = mat.new_sliceFromSymToSym(idxs.data(), idxs.size()); - //submat_sym->print(); - //test2 + // submat_sym->print(); + // test2 double abs_nrm = submat_sym->max_abs_value(); - double diff = std::fabs(abs_nrm-5.6568542494923806); - if(diff>1e-12) { - printf("error: check of '.new_sliceFromSymToSym' failed. Difference: %6.3e [should be %20.16e]\n", - diff, abs_nrm); - all_tests_ok=false; + double diff = std::fabs(abs_nrm - 5.6568542494923806); + if(diff > 1e-12) { + printf("error: check of '.new_sliceFromSymToSym' failed. Difference: %6.3e [should be %20.16e]\n", diff, abs_nrm); + all_tests_ok = false; } delete submat_sym; - std::vector idxs_row={0,2,3}; - std::vector idxs_col={1,2,4}; + std::vector idxs_row = {0, 2, 3}; + std::vector idxs_col = {1, 2, 4}; auto* submat_gen = mat.new_sliceFromSym(idxs_row.data(), idxs_row.size(), idxs_col.data(), idxs_col.size()); - //submat_gen->print(); - //test2 + // submat_gen->print(); + // test2 abs_nrm = submat_gen->max_abs_value(); - diff = std::fabs(abs_nrm-5.0); - if(diff>1e-12) { - printf("error: check of 'new_sliceFromSym' failed. Difference: %6.3e [should be %20.16e]\n", - diff, abs_nrm); - all_tests_ok=false; + diff = std::fabs(abs_nrm - 5.0); + if(diff > 1e-12) { + printf("error: check of 'new_sliceFromSym' failed. Difference: %6.3e [should be %20.16e]\n", diff, abs_nrm); + all_tests_ok = false; } delete submat_gen; } - + if(all_tests_ok) printf("All checks passed\n"); return 0; } diff --git a/src/Optimization/KktLinSysLowRank.cpp b/src/Optimization/KktLinSysLowRank.cpp index 11178a121..5bc5b49b0 100644 --- a/src/Optimization/KktLinSysLowRank.cpp +++ b/src/Optimization/KktLinSysLowRank.cpp @@ -58,15 +58,13 @@ namespace hiop { KktLinSysLowRank::KktLinSysLowRank(hiopNlpFormulation* nlp) - : hiopKKTLinSysCompressedXYcYd(nlp) + : hiopKKTLinSysCompressedXYcYd(nlp) { auto* nlpd = dynamic_cast(nlp_); - kxn_mat_ = nlpd->alloc_multivector_primal(nlpd->m()); + kxn_mat_ = nlpd->alloc_multivector_primal(nlpd->m()); assert("DEFAULT" == toupper(nlpd->options->GetString("mem_space"))); - N_ = LinearAlgebraFactory::create_matrix_dense(nlpd->options->GetString("mem_space"), - nlpd->m(), - nlpd->m()); + N_ = LinearAlgebraFactory::create_matrix_dense(nlpd->options->GetString("mem_space"), nlpd->m(), nlpd->m()); #ifdef HIOP_DEEPCHECKS Nmat_ = N_->alloc_clone(); #endif @@ -93,11 +91,12 @@ bool KktLinSysLowRank::update(const hiopIterate* iter, iter_ = iter; grad_f_ = dynamic_cast(grad_f); - Jac_c_ = Jac_c; Jac_d_ = Jac_d; + Jac_c_ = Jac_c; + Jac_d_ = Jac_d; Hess_ = hess_low_rank; - //compute the diagonals - //Dx=(Sxl)^{-1}Zl + (Sxu)^{-1}Zu + // compute the diagonals + // Dx=(Sxl)^{-1}Zl + (Sxu)^{-1}Zu Dx_->setToZero(); Dx_->axdzpy_w_pattern(1.0, *iter_->zl, *iter_->sxl, nlp_->get_ixl()); Dx_->axdzpy_w_pattern(1.0, *iter_->zu, *iter_->sxu, nlp_->get_ixu()); @@ -105,12 +104,12 @@ bool KktLinSysLowRank::update(const hiopIterate* iter, hess_low_rank->update_logbar_diag(*Dx_); - //Dd=(Sdl)^{-1}Vu + (Sdu)^{-1}Vu + // Dd=(Sdl)^{-1}Vu + (Sdu)^{-1}Vu Dd_inv_->setToZero(); Dd_inv_->axdzpy_w_pattern(1.0, *iter_->vl, *iter_->sdl, nlp_->get_idl()); Dd_inv_->axdzpy_w_pattern(1.0, *iter_->vu, *iter_->sdu, nlp_->get_idu()); #ifdef HIOP_DEEPCHECKS - assert(true==Dd_inv_->allPositive()); + assert(true == Dd_inv_->allPositive()); #endif Dd_->copyFrom(*Dd_inv_); Dd_inv_->invert(); @@ -121,7 +120,6 @@ bool KktLinSysLowRank::update(const hiopIterate* iter, return true; } - /* Solves the system corresponding to directions for x, yc, and yd, namely * [ H_BFGS + Dx Jc^T Jd^T ] [ dx] [ rx ] * [ Jc 0 0 ] [dyc] = [ ryc ] @@ -143,9 +141,9 @@ bool KktLinSysLowRank::solveCompressed(hiopVector& rx, hiopVector& dyd) { #ifdef HIOP_DEEPCHECKS - //some outputing + // some outputing nlp_->log->write("KKT Low rank: solve compressed RHS", hovIteration); - nlp_->log->write(" rx: ", rx, hovIteration); + nlp_->log->write(" rx: ", rx, hovIteration); nlp_->log->write(" ryc: ", ryc, hovIteration); nlp_->log->write(" ryd: ", ryd, hovIteration); nlp_->log->write(" Jc: ", *Jac_c_, hovMatrices); @@ -155,18 +153,20 @@ bool KktLinSysLowRank::solveCompressed(hiopVector& rx, #endif hiopMatrixDense& J = *kxn_mat_; - const hiopMatrixDense* Jac_c_de = dynamic_cast(Jac_c_); assert(Jac_c_de); - const hiopMatrixDense* Jac_d_de = dynamic_cast(Jac_d_); assert(Jac_d_de); - J.copyRowsFrom(*Jac_c_de, nlp_->m_eq(), 0); //!opt - J.copyRowsFrom(*Jac_d_de, nlp_->m_ineq(), nlp_->m_eq());//!opt + const hiopMatrixDense* Jac_c_de = dynamic_cast(Jac_c_); + assert(Jac_c_de); + const hiopMatrixDense* Jac_d_de = dynamic_cast(Jac_d_); + assert(Jac_d_de); + J.copyRowsFrom(*Jac_c_de, nlp_->m_eq(), 0); //! opt + J.copyRowsFrom(*Jac_d_de, nlp_->m_ineq(), nlp_->m_eq()); //! opt auto* hess_low_rank = dynamic_cast(Hess_); - - //N = J*(Hess\J') - //Hess->symmetricTimesMat(0.0, *N, 1.0, J); + + // N = J*(Hess\J') + // Hess->symmetricTimesMat(0.0, *N, 1.0, J); hess_low_rank->sym_mat_times_inverse_times_mattrans(0.0, *N_, 1.0, J); - //subdiag of N += 1., Dd_inv + // subdiag of N += 1., Dd_inv N_->addSubDiagonal(1., nlp_->m_eq(), *Dd_inv_); #ifdef HIOP_DEEPCHECKS assert(J.isfinite()); @@ -176,8 +176,8 @@ bool KktLinSysLowRank::solveCompressed(hiopVector& rx, N_->assertSymmetry(1e-10); #endif - //compute the rhs of the lin sys involving N - // 1. first compute (H+Dx)^{-1} rx_tilde and store it temporarily in dx + // compute the rhs of the lin sys involving N + // 1. first compute (H+Dx)^{-1} rx_tilde and store it temporarily in dx hess_low_rank->solve(rx, dx); #ifdef HIOP_DEEPCHECKS assert(rx.isfinite_local() && "Something bad happened: nan or inf value"); @@ -186,7 +186,7 @@ bool KktLinSysLowRank::solveCompressed(hiopVector& rx, // 2 . then rhs = [ Jc(H+Dx)^{-1}*rx - ryc ] // [ Jd(H+dx)^{-1}*rx - ryd ] - hiopVector& rhs=*k_vec1_; + hiopVector& rhs = *k_vec1_; rhs.copyFromStarting(0, ryc); rhs.copyFromStarting(nlp_->m_eq(), ryd); J.timesVec(-1.0, rhs, 1.0, dx); @@ -195,35 +195,35 @@ bool KktLinSysLowRank::solveCompressed(hiopVector& rx, nlp_->log->write("solveCompressed: dx sol is", dx, hovMatrices); nlp_->log->write("solveCompressed: rhs for N is", rhs, hovMatrices); Nmat_->copyFrom(*N_); - hiopVector* r=rhs.new_copy(); //save the rhs to check the norm of the residual + hiopVector* r = rhs.new_copy(); // save the rhs to check the norm of the residual #endif // - //solve N * dyc_dyd = rhs + // solve N * dyc_dyd = rhs // - int ierr = solveWithRefin(*N_,rhs); - //int ierr = solve(*N,rhs); + int ierr = solveWithRefin(*N_, rhs); + // int ierr = solve(*N,rhs); - hiopVector& dyc_dyd= rhs; - dyc_dyd.copyToStarting(0, dyc); + hiopVector& dyc_dyd = rhs; + dyc_dyd.copyToStarting(0, dyc); dyc_dyd.copyToStarting(nlp_->m_eq(), dyd); - //now solve for dx = - (H+Dx)^{-1}*(Jc^T*dyc+Jd^T*dyd - rx) - //first rx = -(Jc^T*dyc+Jd^T*dyd - rx) + // now solve for dx = - (H+Dx)^{-1}*(Jc^T*dyc+Jd^T*dyd - rx) + // first rx = -(Jc^T*dyc+Jd^T*dyd - rx) J.transTimesVec(1.0, rx, -1.0, dyc_dyd); - //then dx = (H+Dx)^{-1} rx + // then dx = (H+Dx)^{-1} rx hess_low_rank->solve(rx, dx); #ifdef HIOP_DEEPCHECKS - //some outputing + // some outputing nlp_->log->write("KKT Low rank: solve compressed SOL", hovIteration); - nlp_->log->write(" dx: ", dx, hovIteration); + nlp_->log->write(" dx: ", dx, hovIteration); nlp_->log->write(" dyc: ", dyc, hovIteration); nlp_->log->write(" dyd: ", dyd, hovIteration); delete r; #endif - return ierr==0; + return ierr == 0; } int KktLinSysLowRank::solveWithRefin(hiopMatrixDense& M, hiopVector& rhs) @@ -234,28 +234,28 @@ int KktLinSysLowRank::solveWithRefin(hiopMatrixDense& M, hiopVector& rhs) // does not always provide a small enough residual since it stops (possibly without refinement) based on // the forward and backward estimates - int N=M.n(); - if(N<=0) return 0; + int N = M.n(); + if(N <= 0) return 0; hiopMatrixDense* Aref = M.new_copy(); hiopVector* rhsref = rhs.new_copy(); - char FACT='E'; - char UPLO='L'; + char FACT = 'E'; + char UPLO = 'L'; - int NRHS=1; - double* A=M.local_data(); - int LDA=N; - double* AF=new double[N*N]; - int LDAF=N; - char EQUED='N'; //it is an output if FACT='E' + int NRHS = 1; + double* A = M.local_data(); + int LDA = N; + double* AF = new double[N * N]; + int LDAF = N; + char EQUED = 'N'; // it is an output if FACT='E' double* S = new double[N]; double* B = rhs.local_data(); - int LDB=N; + int LDB = N; double* X = new double[N]; int LDX = N; double RCOND, FERR, BERR; - double* WORK = new double[3*N]; + double* WORK = new double[3 * N]; int* IWORK = new int[N]; int INFO; @@ -263,16 +263,17 @@ int KktLinSysLowRank::solveWithRefin(hiopMatrixDense& M, hiopVector& rhs) // 1. solve // DPOSVX(&FACT, &UPLO, &N, &NRHS, A, &LDA, AF, &LDAF, &EQUED, S, B, &LDB, X, &LDX, &RCOND, &FERR, &BERR, WORK, IWORK, &INFO); - //printf("INFO ===== %d RCOND=%g FERR=%g BERR=%g EQUED=%c\n", INFO, RCOND, FERR, BERR, EQUED); + // printf("INFO ===== %d RCOND=%g FERR=%g BERR=%g EQUED=%c\n", INFO, RCOND, FERR, BERR, EQUED); // - // 2. check residual + // 2. check residual // hiopVector* x = rhs.alloc_clone(); - hiopVector* dx = rhs.alloc_clone(); + hiopVector* dx = rhs.alloc_clone(); hiopVector* resid = rhs.alloc_clone(); - int nIterRefin=0;double nrmResid; + int nIterRefin = 0; + double nrmResid; int info; - const int MAX_ITER_REFIN=3; + const int MAX_ITER_REFIN = 3; while(true) { x->copyFrom(X); resid->copyFrom(*rhsref); @@ -280,51 +281,51 @@ int KktLinSysLowRank::solveWithRefin(hiopMatrixDense& M, hiopVector& rhs) nlp_->log->write("resid", *resid, hovLinAlgScalars); - nrmResid= resid->infnorm(); + nrmResid = resid->infnorm(); nlp_->log->printf(hovScalars, "KktLinSysLowRank::solveWithRefin iterrefin=%d residual norm=%g\n", nIterRefin, nrmResid); - if(nrmResid<1e-8) break; + if(nrmResid < 1e-8) break; - if(nIterRefin>=MAX_ITER_REFIN) { + if(nIterRefin >= MAX_ITER_REFIN) { nlp_->log->write("N", *Aref, hovMatrices); nlp_->log->write("sol", *x, hovMatrices); nlp_->log->write("rhs", *rhsref, hovMatrices); - nlp_->log->printf(hovWarning, - "KktLinSysLowRank::solveWithRefin reduced residual to ONLY (inf-norm) %g after %d iterative refinements\n", - nrmResid, - nIterRefin); + nlp_->log->printf( + hovWarning, + "KktLinSysLowRank::solveWithRefin reduced residual to ONLY (inf-norm) %g after %d iterative refinements\n", + nrmResid, + nIterRefin); break; - //assert(false && "too many refinements"); + // assert(false && "too many refinements"); } - if(0) { //iter refin based on symmetric indefinite factorization+solve - + if(0) { // iter refin based on symmetric indefinite factorization+solve int _V_ipiv_vec[1000]; double _V_work_vec[1000]; - int lwork=1000; + int lwork = 1000; M.copyFrom(*Aref); DSYTRF(&UPLO, &N, M.local_data(), &LDA, _V_ipiv_vec, _V_work_vec, &lwork, &info); - assert(info==0); + assert(info == 0); DSYTRS(&UPLO, &N, &NRHS, M.local_data(), &LDA, _V_ipiv_vec, resid->local_data(), &LDB, &info); - assert(info==0); + assert(info == 0); } else { - //iter refin based on symmetric positive definite factorization+solve + // iter refin based on symmetric positive definite factorization+solve M.copyFrom(*Aref); DPOTRF(&UPLO, &N, M.local_data(), &LDA, &info); - if(info>0) { - nlp_->log->printf(hovError, + if(info > 0) { + nlp_->log->printf(hovError, "KktLinSysLowRank::factorizeMat: dpotrf (Chol fact) detected %d minor being indefinite.\n", info); } else { - if(info<0) { - nlp_->log->printf(hovError, "KktLinSysLowRank::factorizeMat: dpotrf returned error %d\n", info); + if(info < 0) { + nlp_->log->printf(hovError, "KktLinSysLowRank::factorizeMat: dpotrf returned error %d\n", info); } } - DPOTRS(&UPLO,&N, &NRHS, M.local_data(), &LDA, resid->local_data(), &LDA, &info); - if(info<0) { - nlp_->log->printf(hovError, "KktLinSysLowRank::solveWithFactors: dpotrs returned error %d\n", info); + DPOTRS(&UPLO, &N, &NRHS, M.local_data(), &LDA, resid->local_data(), &LDA, &info); + if(info < 0) { + nlp_->log->printf(hovError, "KktLinSysLowRank::solveWithFactors: dpotrs returned error %d\n", info); } } @@ -350,22 +351,22 @@ int KktLinSysLowRank::solveWithRefin(hiopMatrixDense& M, hiopVector& rhs) int KktLinSysLowRank::solve(hiopMatrixDense& M, hiopVector& rhs) { - char FACT='E'; - char UPLO='L'; - int N=M.n(); - int NRHS=1; - double* A=M.local_data(); - int LDA=N; - double* AF=new double[N*N]; - int LDAF=N; - char EQUED='N'; //it is an output if FACT='E' + char FACT = 'E'; + char UPLO = 'L'; + int N = M.n(); + int NRHS = 1; + double* A = M.local_data(); + int LDA = N; + double* AF = new double[N * N]; + int LDAF = N; + char EQUED = 'N'; // it is an output if FACT='E' double* S = new double[N]; double* B = rhs.local_data(); - int LDB=N; + int LDB = N; double* X = new double[N]; int LDX = N; double RCOND, FERR, BERR; - double* WORK = new double[3*N]; + double* WORK = new double[3 * N]; int* IWORK = new int[N]; int INFO; @@ -375,11 +376,11 @@ int KktLinSysLowRank::solve(hiopMatrixDense& M, hiopVector& rhs) nlp_->log->write("Scaling S", rhs, hovSummary); rhs.copyFrom(X); - delete [] AF; - delete [] S; - delete [] X; - delete [] WORK; - delete [] IWORK; + delete[] AF; + delete[] S; + delete[] X; + delete[] WORK; + delete[] IWORK; return 0; } @@ -415,17 +416,17 @@ int KktLinSysLowRank::solveWithRefin(hiopMatrixDense& M, hiopVectorPar& rhs) int INFO; dposvxx_(&FACT, &UPLO, &N, &NRHS, - A, &LDA, - AF, &LDAF, - &EQUED, - S, - B, &LDB, - X, &LDX, - &RCOND, &RPVGRW, &BERR, - &N_ERR_BNDS, ERR_BNDS_NORM, ERR_BNDS_COMP, - &NPARAMS, PARAMS, - WORK, IWORK, - &INFO); + A, &LDA, + AF, &LDAF, + &EQUED, + S, + B, &LDB, + X, &LDX, + &RCOND, &RPVGRW, &BERR, + &N_ERR_BNDS, ERR_BNDS_NORM, ERR_BNDS_COMP, + &NPARAMS, PARAMS, + WORK, IWORK, + &INFO); //rhs.copyFrom(S); //nlp_->log->write("Scaling S", rhs, hovSummary); @@ -434,8 +435,9 @@ int KktLinSysLowRank::solveWithRefin(hiopMatrixDense& M, hiopVectorPar& rhs) //nlp_->log->write("Factoriz ", M, hovSummary); printf("INFO ===== %d RCOND=%g RPVGRW=%g BERR=%g EQUED=%c\n", INFO, RCOND, RPVGRW, BERR, EQUED); - printf(" ERR_BNDS_NORM=%g %g %g ERR_BNDS_COMP=%g %g %g \n", ERR_BNDS_NORM[0], ERR_BNDS_NORM[1], ERR_BNDS_NORM[2], ERR_BNDS_COMP[0], ERR_BNDS_COMP[1], ERR_BNDS_COMP[2]); - printf(" PARAMS=%g %g %g \n", PARAMS[0], PARAMS[1], PARAMS[2]); + printf(" ERR_BNDS_NORM=%g %g %g ERR_BNDS_COMP=%g %g %g \n", ERR_BNDS_NORM[0], ERR_BNDS_NORM[1], +ERR_BNDS_NORM[2], ERR_BNDS_COMP[0], ERR_BNDS_COMP[1], ERR_BNDS_COMP[2]); printf(" PARAMS=%g %g %g \n", +PARAMS[0], PARAMS[1], PARAMS[2]); rhs.copyFrom(X); @@ -461,47 +463,47 @@ double KktLinSysLowRank::errorCompressedLinsys(const hiopVector& rx, { nlp_->log->printf(hovLinAlgScalars, "KktLinSysLowRank::errorCompressedLinsys residuals norm:\n"); auto* hess_low_rank = dynamic_cast(Hess_); - + double derr = -1.; double aux; hiopVector* RX = rx.new_copy(); - //RX=rx-H*dx-J'c*dyc-J'*dyd + // RX=rx-H*dx-J'c*dyc-J'*dyd hess_low_rank->timesVec(1.0, *RX, -1.0, dx); - //RX->axzpy(-1.0,*Dx,dx); + // RX->axzpy(-1.0,*Dx,dx); Jac_c_->transTimesVec(1.0, *RX, -1.0, dyc); Jac_d_->transTimesVec(1.0, *RX, -1.0, dyd); aux = RX->twonorm(); - derr = fmax(derr,aux); + derr = fmax(derr, aux); nlp_->log->printf(hovLinAlgScalars, " >>> rx=%g\n", aux); - delete RX; + delete RX; hiopVector* RC = ryc.new_copy(); - Jac_c_->timesVec(1.0,*RC, -1.0,dx); + Jac_c_->timesVec(1.0, *RC, -1.0, dx); aux = RC->twonorm(); - derr = fmax(derr,aux); + derr = fmax(derr, aux); nlp_->log->printf(hovLinAlgScalars, " >>> ryc=%g\n", aux); delete RC; hiopVector* RD = ryd.new_copy(); - Jac_d_->timesVec(1.0,*RD, -1.0, dx); + Jac_d_->timesVec(1.0, *RD, -1.0, dx); RD->axzpy(1.0, *Dd_inv_, dyd); aux = RD->twonorm(); - derr=fmax(derr,aux); + derr = fmax(derr, aux); nlp_->log->printf(hovLinAlgScalars, " >>> ryd=%g\n", aux); - delete RD; + delete RD; return derr; } -double KktLinSysLowRank::solveError(const hiopMatrixDense& M, const hiopVector& x, hiopVector& rhs) +double KktLinSysLowRank::solveError(const hiopMatrixDense& M, const hiopVector& x, hiopVector& rhs) { double relError; - M.timesVec(1.0,rhs,-1.0,x); + M.timesVec(1.0, rhs, -1.0, x); double resnorm = rhs.infnorm(); - relError=resnorm;// / (1+rhsnorm); + relError = resnorm; // / (1+rhsnorm); return relError; } #endif -} //end namespace +} // namespace hiop diff --git a/src/Optimization/KktLinSysLowRank.hpp b/src/Optimization/KktLinSysLowRank.hpp index eb08259fc..71a845013 100644 --- a/src/Optimization/KktLinSysLowRank.hpp +++ b/src/Optimization/KktLinSysLowRank.hpp @@ -63,15 +63,15 @@ namespace hiop /** * @brief Encapsulates solves with the KKT system of IPM filter. * - * This class is for problems where the Hessian of the Lagrangian is a or is approximated + * This class is for problems where the Hessian of the Lagrangian is a or is approximated * by low-rank matrix plus a multiple of identity and the number of the constraints is not - * too large. - * - * It works with Hessian being a HessianDiagPlusLowRank class and the constraints Jacobian - * being hiopMatrixDense. + * too large. * - * This class solves the XYcYd compression of the full KKT. See solveCompressed method - * for details on the approach used to solve the linear system. + * It works with Hessian being a HessianDiagPlusLowRank class and the constraints Jacobian + * being hiopMatrixDense. + * + * This class solves the XYcYd compression of the full KKT. See solveCompressed method + * for details on the approach used to solve the linear system. */ class KktLinSysLowRank : public hiopKKTLinSysCompressedXYcYd @@ -82,15 +82,15 @@ class KktLinSysLowRank : public hiopKKTLinSysCompressedXYcYd /// @brief Updates the KKT system with new info at current iteration bool update(const hiopIterate* iter, - const hiopVector* grad_f, - const hiopMatrix* Jac_c, + const hiopVector* grad_f, + const hiopMatrix* Jac_c, const hiopMatrix* Jac_d, - hiopMatrix* Hess) + hiopMatrix* Hess) { const hiopMatrixDense* Jac_c_ = dynamic_cast(Jac_c); const hiopMatrixDense* Jac_d_ = dynamic_cast(Jac_d); HessianDiagPlusRowRank* Hess_ = dynamic_cast(Hess); - if(Jac_c_==nullptr || Jac_d_==nullptr || Hess_==nullptr) { + if(Jac_c_ == nullptr || Jac_d_ == nullptr || Hess_ == nullptr) { assert(false); return false; } @@ -99,10 +99,10 @@ class KktLinSysLowRank : public hiopKKTLinSysCompressedXYcYd /// @brief Updates the KKT system with new info at current iteration virtual bool update(const hiopIterate* iter, - const hiopVector* grad_f, - const hiopMatrixDense* Jac_c, + const hiopVector* grad_f, + const hiopMatrixDense* Jac_c, const hiopMatrixDense* Jac_d, - HessianDiagPlusRowRank* Hess); + HessianDiagPlusRowRank* Hess); virtual bool build_kkt_matrix(const hiopPDPerturbation& pdreg) { @@ -112,7 +112,7 @@ class KktLinSysLowRank : public hiopKKTLinSysCompressedXYcYd /** * Solves the compressed linear system, part of the KKT Linear System interface - * + * * Solves the system corresponding to directions for x, yc, and yd, namely * [ H_BFGS + Dx Jc^T Jd^T ] [ dx] [ rx ] * [ Jc 0 0 ] [dyc] = [ ryc] @@ -132,17 +132,18 @@ class KktLinSysLowRank : public hiopKKTLinSysCompressedXYcYd hiopVector& dyc, hiopVector& dyd); - //LAPACK wrappers + // LAPACK wrappers int solve(hiopMatrixDense& M, hiopVector& rhs); int solveWithRefin(hiopMatrixDense& M, hiopVector& rhs); #ifdef HIOP_DEEPCHECKS - static double solveError(const hiopMatrixDense& M, const hiopVector& x, hiopVector& rhs); + static double solveError(const hiopMatrixDense& M, const hiopVector& x, hiopVector& rhs); double errorCompressedLinsys(const hiopVector& rx, const hiopVector& ryc, const hiopVector& ryd, - const hiopVector& dx, + const hiopVector& dx, const hiopVector& dyc, const hiopVector& dyd); + protected: /// @brief perform y=beta*y+alpha*H*x without the log barrier term from H void HessianTimesVec_noLogBarrierTerm(double beta, hiopVector& y, double alpha, const hiopVector& x) @@ -155,15 +156,15 @@ class KktLinSysLowRank : public hiopKKTLinSysCompressedXYcYd private: /// The kxk reduced matrix - hiopMatrixDense* N_; + hiopMatrixDense* N_; #ifdef HIOP_DEEPCHECKS /// A copy of the above to compute the residual - hiopMatrixDense* Nmat_; + hiopMatrixDense* Nmat_; #endif - //internal buffers: k is usually 2 x quasi-Newton memory; n is the size of primal variable vector - hiopMatrixDense* kxn_mat_; + // internal buffers: k is usually 2 x quasi-Newton memory; n is the size of primal variable vector + hiopMatrixDense* kxn_mat_; hiopVector* k_vec1_; }; -} //end namespace +} // namespace hiop -#endif // HIOP_KKTLINSYSY_LOWRANK +#endif // HIOP_KKTLINSYSY_LOWRANK diff --git a/src/Optimization/hiopAlgFilterIPM.cpp b/src/Optimization/hiopAlgFilterIPM.cpp index 4d13f9d86..9ecb9747b 100644 --- a/src/Optimization/hiopAlgFilterIPM.cpp +++ b/src/Optimization/hiopAlgFilterIPM.cpp @@ -82,24 +82,24 @@ namespace hiop { hiopAlgFilterIPMBase::hiopAlgFilterIPMBase(hiopNlpFormulation* nlp_in, const bool within_FR) - : nlp(nlp_in), - logbar(nullptr), - it_curr(nullptr), - it_trial(nullptr), - dir(nullptr), - soc_dir(nullptr), - resid(nullptr), - resid_trial(nullptr), - iter_num_(0), - iter_num_total_(0), - onenorm_pr_curr_(0.0), - c_soc(nullptr), - d_soc(nullptr), - within_FR_(within_FR), - pd_perturb_(nullptr), - fact_acceptor_(nullptr) + : nlp(nlp_in), + logbar(nullptr), + it_curr(nullptr), + it_trial(nullptr), + dir(nullptr), + soc_dir(nullptr), + resid(nullptr), + resid_trial(nullptr), + iter_num_(0), + iter_num_total_(0), + onenorm_pr_curr_(0.0), + c_soc(nullptr), + d_soc(nullptr), + within_FR_(within_FR), + pd_perturb_(nullptr), + fact_acceptor_(nullptr) { - //force completion of the nlp's initialization + // force completion of the nlp's initialization nlp->finalizeInitialization(); } @@ -148,12 +148,12 @@ void hiopAlgFilterIPMBase::alloc_alg_objects() it_trial = it_curr->alloc_clone(); dir = it_curr->alloc_clone(); - if(nlp->options->GetString("KKTLinsys")=="full") { + if(nlp->options->GetString("KKTLinsys") == "full") { it_curr->selectPattern(); it_trial->selectPattern(); dir->selectPattern(); } - + logbar = new hiopLogBarProblem(nlp); _f_nlp = 0.; @@ -183,21 +183,21 @@ void hiopAlgFilterIPMBase::alloc_alg_objects() d_soc = nlp->alloc_dual_ineq_vec(); soc_dir = it_curr->alloc_clone(); } - + void hiopAlgFilterIPMBase::reInitializeNlpObjects() { dealloc_alg_objects(); alloc_alg_objects(); - //0 LSQ (default), 1 linear update (more stable) - duals_update_type = nlp->options->GetString("duals_update_type")=="lsq"?0:1; - //0 LSQ (default), 1 set to zero - dualsInitializ = nlp->options->GetString("duals_init")=="lsq"?0:1; + // 0 LSQ (default), 1 linear update (more stable) + duals_update_type = nlp->options->GetString("duals_update_type") == "lsq" ? 0 : 1; + // 0 LSQ (default), 1 set to zero + dualsInitializ = nlp->options->GetString("duals_init") == "lsq" ? 0 : 1; - if(duals_update_type==0) { + if(duals_update_type == 0) { hiopNlpDenseConstraints* nlpd = dynamic_cast(nlp); - if(NULL==nlpd) { + if(NULL == nlpd) { duals_update_type = 1; dualsInitializ = 1; nlp->log->printf(hovWarning, @@ -206,49 +206,51 @@ void hiopAlgFilterIPMBase::reInitializeNlpObjects() } } - //parameter based initialization - if(duals_update_type==0) { - //lsq update - //dualsUpdate_ = new hiopDualsLsqUpdate(nlp); + // parameter based initialization + if(duals_update_type == 0) { + // lsq update + // dualsUpdate_ = new hiopDualsLsqUpdate(nlp); dualsUpdate_ = nlp->alloc_duals_lsq_updater(); } else { - if(duals_update_type==1) { + if(duals_update_type == 1) { dualsUpdate_ = new hiopDualsNewtonLinearUpdate(nlp); - } else { assert(false && "duals_update_type has an unrecognized value"); } + } else { + assert(false && "duals_update_type has an unrecognized value"); + } } } void hiopAlgFilterIPMBase::reload_options() { - //algorithm parameters parameters - mu0=_mu = nlp->options->GetNumeric("mu0"); - kappa_mu = nlp->options->GetNumeric("kappa_mu"); //linear decrease factor - theta_mu = nlp->options->GetNumeric("theta_mu"); //exponent for higher than linear decrease of mu - tau_min = nlp->options->GetNumeric("tau_min"); //min value for the fraction-to-the-boundary - eps_tol = nlp->options->GetNumeric("tolerance"); //absolute error for the nlp - cons_tol_ = nlp->options->GetNumeric("cons_tol"); //absolute error for the constraints - dual_tol_ = nlp->options->GetNumeric("dual_tol"); //absolute error for the dual optimality - comp_tol_ = nlp->options->GetNumeric("comp_tol"); //absolute error for the complementary - eps_rtol = nlp->options->GetNumeric("rel_tolerance"); //relative error (to errors for the initial point) - kappa_eps= nlp->options->GetNumeric("kappa_eps"); //relative (to mu) error for the log barrier - - kappa1 = nlp->options->GetNumeric("kappa1"); //projection params for starting point (default 1e-2) - kappa2 = nlp->options->GetNumeric("kappa2"); - p_smax = nlp->options->GetNumeric("smax"); //threshold for the magnitude of the multipliers - - max_n_it = nlp->options->GetInteger("max_iter"); - - accep_n_it = nlp->options->GetInteger("acceptable_iterations"); + // algorithm parameters parameters + mu0 = _mu = nlp->options->GetNumeric("mu0"); + kappa_mu = nlp->options->GetNumeric("kappa_mu"); // linear decrease factor + theta_mu = nlp->options->GetNumeric("theta_mu"); // exponent for higher than linear decrease of mu + tau_min = nlp->options->GetNumeric("tau_min"); // min value for the fraction-to-the-boundary + eps_tol = nlp->options->GetNumeric("tolerance"); // absolute error for the nlp + cons_tol_ = nlp->options->GetNumeric("cons_tol"); // absolute error for the constraints + dual_tol_ = nlp->options->GetNumeric("dual_tol"); // absolute error for the dual optimality + comp_tol_ = nlp->options->GetNumeric("comp_tol"); // absolute error for the complementary + eps_rtol = nlp->options->GetNumeric("rel_tolerance"); // relative error (to errors for the initial point) + kappa_eps = nlp->options->GetNumeric("kappa_eps"); // relative (to mu) error for the log barrier + + kappa1 = nlp->options->GetNumeric("kappa1"); // projection params for starting point (default 1e-2) + kappa2 = nlp->options->GetNumeric("kappa2"); + p_smax = nlp->options->GetNumeric("smax"); // threshold for the magnitude of the multipliers + + max_n_it = nlp->options->GetInteger("max_iter"); + + accep_n_it = nlp->options->GetInteger("acceptable_iterations"); eps_tol_accep = nlp->options->GetNumeric("acceptable_tolerance"); - //0 LSQ (default), 1 linear update (more stable) - duals_update_type = nlp->options->GetString("duals_update_type")=="lsq"?0:1; - //0 LSQ (default), 1 set to zero - dualsInitializ = nlp->options->GetString("duals_init")=="lsq"?0:1; + // 0 LSQ (default), 1 linear update (more stable) + duals_update_type = nlp->options->GetString("duals_update_type") == "lsq" ? 0 : 1; + // 0 LSQ (default), 1 set to zero + dualsInitializ = nlp->options->GetString("duals_init") == "lsq" ? 0 : 1; - if(duals_update_type==0) { + if(duals_update_type == 0) { hiopNlpDenseConstraints* nlpd = dynamic_cast(nlp); - if(NULL==nlpd){ + if(NULL == nlpd) { // this is sparse or mds linear algebra duals_update_type = 1; nlp->log->printf(hovWarning, @@ -257,28 +259,28 @@ void hiopAlgFilterIPMBase::reload_options() } } - gamma_theta = 1e-5; //sufficient progress parameters for the feasibility violation - gamma_phi=1e-8; //and log barrier objective - s_theta=1.1; //parameters in the switch condition of - s_phi=2.3; // the linearsearch (equation 19) in - delta=1.; // the WachterBiegler paper + gamma_theta = 1e-5; // sufficient progress parameters for the feasibility violation + gamma_phi = 1e-8; // and log barrier objective + s_theta = 1.1; // parameters in the switch condition of + s_phi = 2.3; // the linearsearch (equation 19) in + delta = 1.; // the WachterBiegler paper // parameter in the Armijo rule - eta_phi=nlp->options->GetNumeric("eta_phi"); - //parameter in resetting the duals to guarantee closedness of the primal-dual logbar Hessian to the primal - //logbar Hessian - kappa_Sigma = 1e10; - _tau=fmax(tau_min,1.0-_mu); + eta_phi = nlp->options->GetNumeric("eta_phi"); + // parameter in resetting the duals to guarantee closedness of the primal-dual logbar Hessian to the primal + // logbar Hessian + kappa_Sigma = 1e10; + _tau = fmax(tau_min, 1.0 - _mu); theta_max_fact_ = nlp->options->GetNumeric("theta_max_fact"); theta_min_fact_ = nlp->options->GetNumeric("theta_min_fact"); - theta_max = 1e7; //temporary - will be updated after ini pt is computed - theta_min = 1e7; //temporary - will be updated after ini pt is computed + theta_max = 1e7; // temporary - will be updated after ini pt is computed + theta_min = 1e7; // temporary - will be updated after ini pt is computed - perf_report_kkt_ = "on"==hiop::tolower(nlp->options->GetString("time_kkt")); + perf_report_kkt_ = "on" == hiop::tolower(nlp->options->GetString("time_kkt")); // Set memory space for computations - //hiop::LinearAlgebraFactory::set_mem_space(nlp->options->GetString("mem_space")); + // hiop::LinearAlgebraFactory::set_mem_space(nlp->options->GetString("mem_space")); } void hiopAlgFilterIPMBase::resetSolverStatus() @@ -289,7 +291,7 @@ void hiopAlgFilterIPMBase::resetSolverStatus() } int hiopAlgFilterIPMBase::startingProcedure(hiopIterate& it_ini, - double &f, + double& f, hiopVector& c, hiopVector& d, hiopVector& gradf, @@ -300,8 +302,8 @@ int hiopAlgFilterIPMBase::startingProcedure(hiopIterate& it_ini, bool slacks_avail = false; bool warmstart_avail = false; bool ret_bool = false; - - if(nlp->options->GetString("warm_start")=="yes") { + + if(nlp->options->GetString("warm_start") == "yes") { ret_bool = nlp->get_warmstart_point(*it_ini.get_x(), *it_ini.get_zl(), *it_ini.get_zu(), @@ -316,17 +318,16 @@ int hiopAlgFilterIPMBase::startingProcedure(hiopIterate& it_ini, duals_avail, *it_ini.get_zl(), *it_ini.get_zu(), - *it_ini.get_yc(), + *it_ini.get_yc(), *it_ini.get_yd(), slacks_avail, *it_ini.get_d()); - } if(!ret_bool) { nlp->log->printf(hovWarning, "user did not provide a starting point; will be set to all zeros\n"); it_ini.get_x()->setToZero(); - //in case user wrongly set this to true when he/she returned false + // in case user wrongly set this to true when he/she returned false warmstart_avail = duals_avail = slacks_avail = false; } @@ -335,7 +336,6 @@ int hiopAlgFilterIPMBase::startingProcedure(hiopIterate& it_ini, // that the user's NLP evaluator functions, in particular the Hessian of the Lagrangian, // receives initialized arrays - if(!duals_avail) { // initialization for yc and yd it_ini.setEqualityDualsToConstant(0.); @@ -350,7 +350,7 @@ int hiopAlgFilterIPMBase::startingProcedure(hiopIterate& it_ini, } [[maybe_unused]] const bool do_nlp_scaling = nlp->apply_scaling(c, d, gradf, Jac_c, Jac_d); - + nlp->runStats.tmSolverInternal.start(); nlp->runStats.tmStartingPoint.start(); @@ -381,7 +381,7 @@ int hiopAlgFilterIPMBase::startingProcedure(hiopIterate& it_ini, int num_adjusted_slacks = it_ini.compute_safe_slacks(it_ini, mu0); // adjust small/negative slacks - if(num_adjusted_slacks > 0) { + if(num_adjusted_slacks > 0) { nlp->log->printf(hovWarning, "%d slacks are too small. Adjust corresponding variable slacks!\n", num_adjusted_slacks); nlp->adjust_bounds(it_ini); } @@ -395,36 +395,36 @@ int hiopAlgFilterIPMBase::startingProcedure(hiopIterate& it_ini, // compute vl and vu from vl = mu e ./ sdl and vu = mu e ./ sdu // sdl and sdu were initialized above in 'determineSlacks' if(!warmstart_avail) { - it_ini.determineDualsBounds_d(mu0); + it_ini.determineDualsBounds_d(mu0); } } if(!duals_avail) { - if(0==dualsInitializ) { - //LSQ-based initialization of yc and yd + if(0 == dualsInitializ) { + // LSQ-based initialization of yc and yd - //is the dualsUpdate_ already the LSQ-based updater? + // is the dualsUpdate_ already the LSQ-based updater? hiopDualsLsqUpdate* updater = dynamic_cast(dualsUpdate_); bool deleteUpdater = false; if(updater == nullptr) { - //updater = new hiopDualsLsqUpdate(nlp); + // updater = new hiopDualsLsqUpdate(nlp); updater = nlp->alloc_duals_lsq_updater(); deleteUpdater = true; } - //this will update yc and yd in it_ini + // this will update yc and yd in it_ini updater->compute_initial_duals_eq(it_ini, gradf, Jac_c, Jac_d); if(deleteUpdater) delete updater; } else { it_ini.setEqualityDualsToConstant(0.); } - } // end of if(!duals_avail) + } // end of if(!duals_avail) else { // duals eq ('yc' and 'yd') were provided by the user } - //we have the duals + // we have the duals if(!this->evalNlp_HessOnly(it_ini, *_Hess_Lagr)) { assert(false); return false; @@ -440,7 +440,7 @@ int hiopAlgFilterIPMBase::startingProcedure(hiopIterate& it_ini, } bool hiopAlgFilterIPMBase::evalNlp(hiopIterate& iter, - double &f, + double& f, hiopVector& c, hiopVector& d, hiopVector& gradf, @@ -448,42 +448,44 @@ bool hiopAlgFilterIPMBase::evalNlp(hiopIterate& iter, hiopMatrix& Jac_d, hiopMatrix& Hess_L) { - bool new_x=true; + bool new_x = true; // hiopVector& it_x = *iter.get_x(); // double* x = it_x.local_data();//local_data_const(); // //f(x) // if(!nlp->eval_f(x, new_x, f)) { hiopVector& x = *iter.get_x(); - //f(x) + // f(x) if(!nlp->eval_f(x, new_x, f)) { nlp->log->printf(hovError, "Error occured in user objective evaluation\n"); return false; } - new_x= false; //same x for the rest + new_x = false; // same x for the rest if(!nlp->eval_grad_f(x, new_x, gradf)) { nlp->log->printf(hovError, "Error occured in user gradient evaluation\n"); return false; } - //bret = nlp->eval_c (x, new_x, c.local_data()); assert(bret); - //bret = nlp->eval_d (x, new_x, d.local_data()); assert(bret); + // bret = nlp->eval_c (x, new_x, c.local_data()); assert(bret); + // bret = nlp->eval_d (x, new_x, d.local_data()); assert(bret); if(!nlp->eval_c_d(x, new_x, c, d)) { nlp->log->printf(hovError, "Error occured in user constraint(s) function evaluation\n"); return false; } - //nlp->log->write("Eq body c:", c, hovFcnEval); - //nlp->log->write("Ineq body d:", d, hovFcnEval); + // nlp->log->write("Eq body c:", c, hovFcnEval); + // nlp->log->write("Ineq body d:", d, hovFcnEval); - //bret = nlp->eval_Jac_c (x, new_x, Jac_c); assert(bret); - //bret = nlp->eval_Jac_d (x, new_x, Jac_d); assert(bret); + // bret = nlp->eval_Jac_c (x, new_x, Jac_c); assert(bret); + // bret = nlp->eval_Jac_d (x, new_x, Jac_d); assert(bret); if(!nlp->eval_Jac_c_d(x, new_x, Jac_c, Jac_d)) { nlp->log->printf(hovError, "Error occured in user Jacobian function evaluation\n"); return false; } - const hiopVector* yc = iter.get_yc(); assert(yc); - const hiopVector* yd = iter.get_yd(); assert(yd); + const hiopVector* yc = iter.get_yc(); + assert(yc); + const hiopVector* yd = iter.get_yd(); + assert(yd); const int new_lambda = true; if(!nlp->eval_Hess_Lagr(x, new_x, 1., *yc, *yd, new_lambda, Hess_L)) { @@ -494,43 +496,43 @@ bool hiopAlgFilterIPMBase::evalNlp(hiopIterate& iter, } bool hiopAlgFilterIPMBase::evalNlp_noHess(hiopIterate& iter, - double &f, + double& f, hiopVector& c, hiopVector& d, hiopVector& gradf, hiopMatrix& Jac_c, hiopMatrix& Jac_d) { - bool new_x=true; - //hiopVectorPar& it_x = dynamic_cast(*iter.get_x()); - //hiopVectorPar& c=dynamic_cast(c_); - //hiopVectorPar& d=dynamic_cast(d_); - //hiopVectorPar& gradf=dynamic_cast(gradf_); + bool new_x = true; + // hiopVectorPar& it_x = dynamic_cast(*iter.get_x()); + // hiopVectorPar& c=dynamic_cast(c_); + // hiopVectorPar& d=dynamic_cast(d_); + // hiopVectorPar& gradf=dynamic_cast(gradf_); hiopVector& x = *iter.get_x(); - //f(x) + // f(x) if(!nlp->eval_f(x, new_x, f)) { nlp->log->printf(hovError, "Error occured in user objective evaluation\n"); return false; } - new_x= false; //same x for the rest + new_x = false; // same x for the rest if(!nlp->eval_grad_f(x, new_x, gradf)) { nlp->log->printf(hovError, "Error occured in user gradient evaluation\n"); return false; } - //bret = nlp->eval_c (x, new_x, c.local_data()); assert(bret); - //bret = nlp->eval_d (x, new_x, d.local_data()); assert(bret); + // bret = nlp->eval_c (x, new_x, c.local_data()); assert(bret); + // bret = nlp->eval_d (x, new_x, d.local_data()); assert(bret); if(!nlp->eval_c_d(x, new_x, c, d)) { nlp->log->printf(hovError, "Error occured in user constraint(s) function evaluation\n"); return false; } - //nlp->log->write("Eq body c:", c, hovFcnEval); - //nlp->log->write("Ineq body d:", d, hovFcnEval); + // nlp->log->write("Eq body c:", c, hovFcnEval); + // nlp->log->write("Ineq body d:", d, hovFcnEval); - //bret = nlp->eval_Jac_c (x, new_x, Jac_c); assert(bret); - //bret = nlp->eval_Jac_d (x, new_x, Jac_d); assert(bret); + // bret = nlp->eval_Jac_c (x, new_x, Jac_c); assert(bret); + // bret = nlp->eval_Jac_d (x, new_x, Jac_d); assert(bret); if(!nlp->eval_Jac_c_d(x, new_x, Jac_c, Jac_d)) { nlp->log->printf(hovError, "Error occured in user Jacobian function evaluation\n"); return false; @@ -540,10 +542,12 @@ bool hiopAlgFilterIPMBase::evalNlp_noHess(hiopIterate& iter, bool hiopAlgFilterIPMBase::evalNlp_HessOnly(hiopIterate& iter, hiopMatrix& Hess_L) { - const bool new_x = false; //precondition is that 'evalNlp_noHess' was called just before + const bool new_x = false; // precondition is that 'evalNlp_noHess' was called just before - const hiopVector* yc = iter.get_yc(); assert(yc); - const hiopVector* yd = iter.get_yd(); assert(yd); + const hiopVector* yc = iter.get_yc(); + assert(yc); + const hiopVector* yd = iter.get_yd(); + assert(yd); const int new_lambda = true; hiopVector& x = *iter.get_x(); @@ -561,26 +565,26 @@ bool hiopAlgFilterIPMBase::update_log_barrier_params(hiopIterate& it, double& mu_new, double& tau_new) { - const double target_comp_tol = comp_tol_/nlp->get_obj_scale(); - double new_mu = std::fmax(0.0, std::fmin(kappa_mu*mu_curr, std::pow(mu_curr,theta_mu))); - new_mu = std::fmax(new_mu, std::fmin(eps_tol, target_comp_tol)/(10.+1.) ); - if(fabs(new_mu-mu_curr)<1e-16) { + const double target_comp_tol = comp_tol_ / nlp->get_obj_scale(); + double new_mu = std::fmax(0.0, std::fmin(kappa_mu * mu_curr, std::pow(mu_curr, theta_mu))); + new_mu = std::fmax(new_mu, std::fmin(eps_tol, target_comp_tol) / (10. + 1.)); + if(fabs(new_mu - mu_curr) < 1e-16) { return false; } - mu_new = new_mu; - tau_new = fmax(tau_min,1.0-mu_new); - + mu_new = new_mu; + tau_new = fmax(tau_min, 1.0 - mu_new); + if(elastic_mode_on) { const double target_mu = eps_tol; const double bound_relax_perturb_init = nlp->options->GetNumeric("elastic_mode_bound_relax_initial"); const double bound_relax_perturb_min = nlp->options->GetNumeric("elastic_mode_bound_relax_final"); double bound_relax_perturb = bound_relax_perturb_init; - - if(nlp->options->GetString("elastic_bound_strategy")=="mu_scaled") { - bound_relax_perturb = 0.995*mu_new; - } else if(nlp->options->GetString("elastic_bound_strategy")=="mu_projected") { - bound_relax_perturb = (mu_new - target_mu) / (mu0 - target_mu) * (bound_relax_perturb_init-bound_relax_perturb_min) - + bound_relax_perturb_min; + + if(nlp->options->GetString("elastic_bound_strategy") == "mu_scaled") { + bound_relax_perturb = 0.995 * mu_new; + } else if(nlp->options->GetString("elastic_bound_strategy") == "mu_projected") { + bound_relax_perturb = (mu_new - target_mu) / (mu0 - target_mu) * (bound_relax_perturb_init - bound_relax_perturb_min) + + bound_relax_perturb_min; } if(bound_relax_perturb > bound_relax_perturb_init) { @@ -595,45 +599,44 @@ bool hiopAlgFilterIPMBase::update_log_barrier_params(hiopIterate& it, nlp->reset_bounds(bound_relax_perturb); - if(nlp->options->GetString("elastic_mode")!="tighten_bound") { - assert(nlp->options->GetString("elastic_mode")=="correct_it" || - nlp->options->GetString("elastic_mode")=="correct_it_adjust_bound"); + if(nlp->options->GetString("elastic_mode") != "tighten_bound") { + assert(nlp->options->GetString("elastic_mode") == "correct_it" || + nlp->options->GetString("elastic_mode") == "correct_it_adjust_bound"); // recompute slacks according to the new bounds int num_adjusted_slacks = it.compute_safe_slacks(it, mu_new); // adjust small/negative slacks - if(num_adjusted_slacks > 0) { + if(num_adjusted_slacks > 0) { nlp->log->printf(hovLinAlgScalars, "update_log_barrier_params: %d slacks are too small after tightening the bounds. " - "Adjust corresponding slacks!\n", + "Adjust corresponding slacks!\n", num_adjusted_slacks); // adjust bounds according to `it` - if(nlp->options->GetString("elastic_mode")=="correct_it_adjust_bound") { + if(nlp->options->GetString("elastic_mode") == "correct_it_adjust_bound") { nlp->adjust_bounds(it); } - + // adjust duals - bool bret = it.adjustDuals_primalLogHessian(mu_new,kappa_Sigma); + bool bret = it.adjustDuals_primalLogHessian(mu_new, kappa_Sigma); assert(bret); } } - //compute infeasibility theta at trial point, since slacks and/or bounds are modified + // compute infeasibility theta at trial point, since slacks and/or bounds are modified [[maybe_unused]] const double theta_temp = resid->compute_nlp_infeasib_onenorm(*it_trial, *_c_trial, *_d_trial); - } // end of if elastic_mode_on - + } // end of if elastic_mode_on + return true; } double hiopAlgFilterIPMBase::thetaLogBarrier(const hiopIterate& it, const hiopResidual& resid, const double& mu) { - //actual nlp errors + // actual nlp errors double optim, feas, complem, cons_violation; resid.getNlpErrors(optim, feas, complem, cons_violation); return feas; } - bool hiopAlgFilterIPMBase::evalNlpAndLogErrors(const hiopIterate& it, const hiopResidual& resid, const double& mu, @@ -649,45 +652,49 @@ bool hiopAlgFilterIPMBase::evalNlpAndLogErrors(const hiopIterate& it, { nlp->runStats.tmSolverInternal.start(); - size_type n=nlp->n_complem(), m=nlp->m(); - //the one norms - //double nrmDualBou=it.normOneOfBoundDuals(); - //double nrmDualEqu=it.normOneOfEqualityDuals(); + size_type n = nlp->n_complem(), m = nlp->m(); + // the one norms + // double nrmDualBou=it.normOneOfBoundDuals(); + // double nrmDualEqu=it.normOneOfEqualityDuals(); double nrmDualBou, nrmDualEqu; it.normOneOfDuals(nrmDualEqu, nrmDualBou); nlp->log->printf(hovScalars, "nrmOneDualEqu %g nrmOneDualBo %g\n", nrmDualEqu, nrmDualBou); - if(nrmDualBou>1e+10) { - nlp->log->printf(hovWarning, "Unusually large bound dual variables (norm1=%g) occured, " + if(nrmDualBou > 1e+10) { + nlp->log->printf(hovWarning, + "Unusually large bound dual variables (norm1=%g) occured, " "which may cause numerical instabilities if it persists. Convergence " " issues or inacurate optimal solutions may be experienced. Possible causes: " " tight bounds or bad scaling of the optimization variables.\n", nrmDualBou); - if(nlp->options->GetString("fixed_var")=="remove") { - nlp->log->printf(hovWarning, "For example, increase 'fixed_var_tolerance' to remove " + if(nlp->options->GetString("fixed_var") == "remove") { + nlp->log->printf(hovWarning, + "For example, increase 'fixed_var_tolerance' to remove " "additional variables.\n"); - } else if(nlp->options->GetString("fixed_var")=="relax") { - nlp->log->printf(hovWarning, "For example, increase 'fixed_var_tolerance' to relax " - "aditional (tight) variables and/or increase 'fixed_var_perturb' " - "to decrease the tightness.\n"); + } else if(nlp->options->GetString("fixed_var") == "relax") { + nlp->log->printf(hovWarning, + "For example, increase 'fixed_var_tolerance' to relax " + "aditional (tight) variables and/or increase 'fixed_var_perturb' " + "to decrease the tightness.\n"); } else { - nlp->log->printf(hovWarning, "Potential fixes: fix or relax variables with tight bounds " + nlp->log->printf(hovWarning, + "Potential fixes: fix or relax variables with tight bounds " "(see 'fixed_var' option) or rescale variables.\n"); } } - //scaling factors - double sd = fmax(p_smax,(nrmDualBou+nrmDualEqu)/(n+m)) / p_smax; - double sc = n==0?0:fmax(p_smax,nrmDualBou/n) / p_smax; + // scaling factors + double sd = fmax(p_smax, (nrmDualBou + nrmDualEqu) / (n + m)) / p_smax; + double sc = n == 0 ? 0 : fmax(p_smax, nrmDualBou / n) / p_smax; sd = fmin(sd, 1e+8); sc = fmin(sc, 1e+8); - //actual nlp errors + // actual nlp errors resid.getNlpErrors(nlpoptim, nlpfeas, nlpcomplem, cons_violation); - //finally, the scaled nlp error - nlpoverall = fmax(nlpoptim/sd, fmax(cons_violation, nlpcomplem/sc)); + // finally, the scaled nlp error + nlpoverall = fmax(nlpoptim / sd, fmax(cons_violation, nlpcomplem / sc)); nlp->log->printf(hovScalars, "nlpoverall %g nloptim %g sd %g nlpfeas %g nlpcomplem %g sc %g cons_violation %g\n", @@ -699,18 +706,18 @@ bool hiopAlgFilterIPMBase::evalNlpAndLogErrors(const hiopIterate& it, cons_violation, sc); - //actual log errors + // actual log errors resid.getBarrierErrors(logoptim, logfeas, logcomplem); - //finally, the scaled barrier error - logoverall = fmax(logoptim/sd, fmax(cons_violation, logcomplem/sc)); + // finally, the scaled barrier error + logoverall = fmax(logoptim / sd, fmax(cons_violation, logcomplem / sc)); nlp->runStats.tmSolverInternal.stop(); return true; } bool hiopAlgFilterIPMBase::evalNlp_funcOnly(hiopIterate& iter, double& f, hiopVector& c, hiopVector& d) { - bool new_x=true; + bool new_x = true; // hiopVector& it_x = *iter.get_x(); // double* x = it_x.local_data(); // if(!nlp->eval_f(x, new_x, f)) { @@ -719,7 +726,7 @@ bool hiopAlgFilterIPMBase::evalNlp_funcOnly(hiopIterate& iter, double& f, hiopVe nlp->log->printf(hovError, "Error occured in user objective evaluation\n"); return false; } - new_x= false; //same x for the rest + new_x = false; // same x for the rest if(!nlp->eval_c_d(x, new_x, c, d)) { nlp->log->printf(hovError, "Error occured in user constraint(s) function evaluation\n"); return false; @@ -733,7 +740,7 @@ bool hiopAlgFilterIPMBase::evalNlp_derivOnly(hiopIterate& iter, hiopMatrix& Jac_d, hiopMatrix& Hess_L) { - bool new_x=false; //functions were previously evaluated in the line search + bool new_x = false; // functions were previously evaluated in the line search // hiopVector& it_x = *iter.get_x(); // double* x = it_x.local_data(); hiopVector& x = *iter.get_x(); @@ -746,8 +753,10 @@ bool hiopAlgFilterIPMBase::evalNlp_derivOnly(hiopIterate& iter, return false; } - const hiopVector* yc = iter.get_yc(); assert(yc); - const hiopVector* yd = iter.get_yd(); assert(yd); + const hiopVector* yc = iter.get_yc(); + assert(yc); + const hiopVector* yd = iter.get_yd(); + assert(yd); const int new_lambda = true; if(!nlp->eval_Hess_Lagr(x, new_x, 1., *yc, *yd, new_lambda, Hess_L)) { nlp->log->printf(hovError, "Error occured in user Hessian function evaluation\n"); @@ -759,89 +768,81 @@ bool hiopAlgFilterIPMBase::evalNlp_derivOnly(hiopIterate& iter, /* returns the objective value; valid only after 'run' method has been called */ double hiopAlgFilterIPMBase::getObjective() const { - if(solver_status_==NlpSolve_IncompleteInit || solver_status_ == NlpSolve_SolveNotCalled) { - nlp->log-> - printf(hovError, "getObjective: HiOp did not initialize entirely or the 'run' function was not called."); + if(solver_status_ == NlpSolve_IncompleteInit || solver_status_ == NlpSolve_SolveNotCalled) { + nlp->log->printf(hovError, "getObjective: HiOp did not initialize entirely or the 'run' function was not called."); } - if(solver_status_==NlpSolve_Pending) { - nlp->log-> - printf(hovWarning, "getObjective: HiOp has not completed and objective value may not be optimal."); + if(solver_status_ == NlpSolve_Pending) { + nlp->log->printf(hovWarning, "getObjective: HiOp has not completed and objective value may not be optimal."); } return nlp->user_obj(_f_nlp); } /* returns the primal vector x; valid only after 'run' method has been called */ void hiopAlgFilterIPMBase::getSolution(double* x) const { - if(solver_status_==NlpSolve_IncompleteInit || solver_status_ == NlpSolve_SolveNotCalled) { - nlp->log-> - printf(hovError, "getSolution: HiOp did not initialize entirely or the 'run' function was not called."); + if(solver_status_ == NlpSolve_IncompleteInit || solver_status_ == NlpSolve_SolveNotCalled) { + nlp->log->printf(hovError, "getSolution: HiOp did not initialize entirely or the 'run' function was not called."); } - if(solver_status_==NlpSolve_Pending) { - nlp->log-> - printf(hovWarning, "getSolution: HiOp has not completed yet and solution returned may not be optimal."); + if(solver_status_ == NlpSolve_Pending) { + nlp->log->printf(hovWarning, "getSolution: HiOp has not completed yet and solution returned may not be optimal."); } hiopVector& it_x = *it_curr->get_x(); - //it_curr->get_x()->copyTo(x); + // it_curr->get_x()->copyTo(x); nlp->user_x(it_x, x); } void hiopAlgFilterIPMBase::getDualSolutions(double* zl_a, double* zu_a, double* lambda_a) { - if(solver_status_==NlpSolve_IncompleteInit || solver_status_ == NlpSolve_SolveNotCalled) { + if(solver_status_ == NlpSolve_IncompleteInit || solver_status_ == NlpSolve_SolveNotCalled) { nlp->log->printf(hovError, "getDualSolutions: HiOp did not initialize entirely or the 'run' function was not called."); } - if(solver_status_==NlpSolve_Pending) { + if(solver_status_ == NlpSolve_Pending) { nlp->log->printf(hovWarning, "getSolution: HiOp has not completed yet and solution returned may not be optimal."); } [[maybe_unused]] hiopVector& zl = *it_curr->get_zl(); - [[maybe_unused]] hiopVector& zu = *it_curr->get_zu(); + [[maybe_unused]] hiopVector& zu = *it_curr->get_zu(); nlp->get_dual_solutions(*it_curr, zl_a, zu_a, lambda_a); } int hiopAlgFilterIPMBase::getNumIterations() const { - if(solver_status_==NlpSolve_IncompleteInit || solver_status_ == NlpSolve_SolveNotCalled) - nlp->log-> - printf(hovError, "getNumIterations: HiOp did not initialize or the 'run' function was not called."); - if(solver_status_==NlpSolve_Pending) - nlp->log-> - printf(hovWarning, "getNumIterations: HiOp has not completed upon this call of 'getNumIterations'"); + if(solver_status_ == NlpSolve_IncompleteInit || solver_status_ == NlpSolve_SolveNotCalled) + nlp->log->printf(hovError, "getNumIterations: HiOp did not initialize or the 'run' function was not called."); + if(solver_status_ == NlpSolve_Pending) + nlp->log->printf(hovWarning, "getNumIterations: HiOp has not completed upon this call of 'getNumIterations'"); return nlp->runStats.nIter; } - -bool hiopAlgFilterIPMBase:: -checkTermination(const double& err_nlp, const int& iter_num, hiopSolveStatus& status) +bool hiopAlgFilterIPMBase::checkTermination(const double& err_nlp, const int& iter_num, hiopSolveStatus& status) { - if(err_nlp<=eps_tol && - _err_nlp_optim <= dual_tol_ && - _err_cons_violation <= cons_tol_ && + if(err_nlp <= eps_tol && _err_nlp_optim <= dual_tol_ && _err_cons_violation <= cons_tol_ && _err_nlp_complem <= comp_tol_) { solver_status_ = Solve_Success; return true; } - if(iter_num>=max_n_it) { + if(iter_num >= max_n_it) { solver_status_ = Max_Iter_Exceeded; return true; } - if(eps_rtol>0) { - if(_err_nlp_optim <= eps_rtol * _err_nlp_optim0 && - _err_nlp_feas <= eps_rtol * _err_nlp_feas0 && - _err_nlp_complem <= std::max(eps_rtol,1e-6) * std::min(1.,_err_nlp_complem0)) { + if(eps_rtol > 0) { + if(_err_nlp_optim <= eps_rtol * _err_nlp_optim0 && _err_nlp_feas <= eps_rtol * _err_nlp_feas0 && + _err_nlp_complem <= std::max(eps_rtol, 1e-6) * std::min(1., _err_nlp_complem0)) { solver_status_ = Solve_Success_RelTol; return true; } } - if(err_nlp<=eps_tol_accep) { + if(err_nlp <= eps_tol_accep) { n_accep_iters_++; } else { n_accep_iters_ = 0; } - if(n_accep_iters_>=accep_n_it) { solver_status_ = Solve_Acceptable_Level; return true; } + if(n_accep_iters_ >= accep_n_it) { + solver_status_ = Solve_Acceptable_Level; + return true; + } return false; } @@ -850,34 +851,27 @@ void hiopAlgFilterIPMBase::displayTerminationMsg() { std::string strStatsReport = nlp->runStats.get_summary() + nlp->runStats.kkt.get_summary_total(); switch(solver_status_) { - case Solve_Success: - { + case Solve_Success: { nlp->log->printf(hovSummary, "Successfull termination.\n%s\n", strStatsReport.c_str()); break; } - case Solve_Success_RelTol: - { + case Solve_Success_RelTol: { nlp->log->printf(hovSummary, "Successfull termination (error within the relative tolerance).\n%s\n", strStatsReport.c_str()); break; } - case Solve_Acceptable_Level: - { - nlp->log->printf(hovSummary, - "Solve to only to the acceptable tolerance(s).\n%s\n", - strStatsReport.c_str()); + case Solve_Acceptable_Level: { + nlp->log->printf(hovSummary, "Solve to only to the acceptable tolerance(s).\n%s\n", strStatsReport.c_str()); break; } - case Max_Iter_Exceeded: - { + case Max_Iter_Exceeded: { nlp->log->printf(hovSummary, "Maximum number of iterations reached.\n%s\n", - strStatsReport.c_str());//nlp->runStats.getSummary().c_str()); + strStatsReport.c_str()); // nlp->runStats.getSummary().c_str()); break; } - case Steplength_Too_Small: - { + case Steplength_Too_Small: { nlp->log->printf(hovSummary, "Couldn't solve the problem.\n"); nlp->log->printf(hovSummary, "Linesearch returned unsuccessfully (small step). Probable cause: " @@ -885,39 +879,28 @@ void hiopAlgFilterIPMBase::displayTerminationMsg() nlp->log->printf(hovSummary, "%s\n", strStatsReport.c_str()); break; } - case User_Stopped: - { + case User_Stopped: { nlp->log->printf(hovSummary, "Stopped by the user through the user provided iterate callback.\n%s\n", strStatsReport.c_str()); break; } - case Error_In_FR: - { - nlp->log->printf(hovSummary, - "Feasibility restoration problem failed to converge.\n%s\n", - strStatsReport.c_str()); + case Error_In_FR: { + nlp->log->printf(hovSummary, "Feasibility restoration problem failed to converge.\n%s\n", strStatsReport.c_str()); break; } - case Infeasible_Problem: - { + case Infeasible_Problem: { nlp->log->printf(hovSummary, "Inaccurate gradients/Jacobians or locally infeasible problem.\n%s\n", strStatsReport.c_str()); break; } - case Err_Step_Computation: - { - nlp->log->printf(hovSummary, - "Error in step computation/linear algebra (unrecoverable)\n%s\n", - strStatsReport.c_str()); + case Err_Step_Computation: { + nlp->log->printf(hovSummary, "Error in step computation/linear algebra (unrecoverable)\n%s\n", strStatsReport.c_str()); break; - } - default: - { - nlp->log->printf(hovSummary, - "Unclear why HiOp stopped. This shouldn't happen. \n%s\n", - strStatsReport.c_str()); + } + default: { + nlp->log->printf(hovSummary, "Unclear why HiOp stopped. This shouldn't happen. \n%s\n", strStatsReport.c_str()); assert(false && "Do not know why hiop stopped. This shouldn't happen."); break; } @@ -927,20 +910,19 @@ void hiopAlgFilterIPMBase::displayTerminationMsg() /////////////////////////////////////////////////////////////////////////////////////////////////// // hiopAlgFilterIPMQuasiNewton /////////////////////////////////////////////////////////////////////////////////////////////////// -hiopAlgFilterIPMQuasiNewton::hiopAlgFilterIPMQuasiNewton(hiopNlpDenseConstraints* nlp_in, - const bool within_FR) - : hiopAlgFilterIPMBase(nlp_in, within_FR), - load_state_api_called_(false) +hiopAlgFilterIPMQuasiNewton::hiopAlgFilterIPMQuasiNewton(hiopNlpDenseConstraints* nlp_in, const bool within_FR) + : hiopAlgFilterIPMBase(nlp_in, within_FR), + load_state_api_called_(false) { nlpdc = nlp_in; reload_options(); alloc_alg_objects(); - //parameter based initialization - if(duals_update_type==0) { + // parameter based initialization + if(duals_update_type == 0) { dualsUpdate_ = nlp->alloc_duals_lsq_updater(); - } else if(duals_update_type==1) { + } else if(duals_update_type == 1) { dualsUpdate_ = new hiopDualsNewtonLinearUpdate(nlp); } else { assert(false && "duals_update_type has an unrecognized value"); @@ -949,28 +931,26 @@ hiopAlgFilterIPMQuasiNewton::hiopAlgFilterIPMQuasiNewton(hiopNlpDenseConstraints resetSolverStatus(); } -hiopAlgFilterIPMQuasiNewton::~hiopAlgFilterIPMQuasiNewton() -{ -} +hiopAlgFilterIPMQuasiNewton::~hiopAlgFilterIPMQuasiNewton() {} hiopSolveStatus hiopAlgFilterIPMQuasiNewton::run() { - //hiopNlpFormulation nlp may need an update since user may have changed options and - //reruning with the same hiopAlgFilterIPMQuasiNewton instance + // hiopNlpFormulation nlp may need an update since user may have changed options and + // reruning with the same hiopAlgFilterIPMQuasiNewton instance nlp->finalizeInitialization(); - //also reload options + // also reload options reload_options(); - //if nlp changed internally, we need to reinitialize 'this' - if(it_curr->get_x()->get_size()!=nlp->n() || - //Jac_c->get_local_size_n()!=nlpdc->n_local()) { <- this is prone to racing conditions - _Jac_c->n()!=nlpdc->n()) { - //size of the nlp changed internally -> reInitializeNlpObjects(); + // if nlp changed internally, we need to reinitialize 'this' + if(it_curr->get_x()->get_size() != nlp->n() || + // Jac_c->get_local_size_n()!=nlpdc->n_local()) { <- this is prone to racing conditions + _Jac_c->n() != nlpdc->n()) { + // size of the nlp changed internally -> reInitializeNlpObjects(); reInitializeNlpObjects(); } resetSolverStatus(); - //types of linear algebra objects are known now + // types of linear algebra objects are known now auto* Hess = dynamic_cast(_Hess_Lagr); nlp->runStats.initialize(); @@ -1000,33 +980,33 @@ hiopSolveStatus hiopAlgFilterIPMQuasiNewton::run() // - checkpoint from file (option "checkpoint_load_on_start") // if(nlp->options->GetString("checkpoint_load_on_start") != "yes" && !load_state_api_called_) { - //this also evaluates the nlp - startingProcedure(*it_curr, _f_nlp, *_c, *_d, *_grad_f, *_Jac_c, *_Jac_d); - _mu=mu0; + // this also evaluates the nlp + startingProcedure(*it_curr, _f_nlp, *_c, *_d, *_grad_f, *_Jac_c, *_Jac_d); + _mu = mu0; iter_num_total_ = 0; } else { if(!load_state_api_called_) { // - //checkpoint load from file + // checkpoint load from file // -#ifdef HIOP_USE_AXOM - //load from file: will populate it_curr, _Hess_lagr, and algorithmic parameters +#ifdef HIOP_USE_AXOM + // load from file: will populate it_curr, _Hess_lagr, and algorithmic parameters auto chkpnt_ok = load_state_from_file(nlp->options->GetString("checkpoint_file")); if(!chkpnt_ok) { nlp->log->printf(hovWarning, "Using default starting procedure (no checkpoint load!).\n"); iter_num_total_ = 0; - //fall back on the default starting procedure (it also evaluates the nlp) - startingProcedure(*it_curr, _f_nlp, *_c, *_d, *_grad_f, *_Jac_c, *_Jac_d); - _mu=mu0; + // fall back on the default starting procedure (it also evaluates the nlp) + startingProcedure(*it_curr, _f_nlp, *_c, *_d, *_grad_f, *_Jac_c, *_Jac_d); + _mu = mu0; iter_num_total_ = 0; } #else nlp->log->printf(hovWarning, "Unexpected checkpoint misconfiguration. " - "Will use user-provided starting point.\n"); + "Will use user-provided starting point.\n"); #endif } - //additionally: need to evaluate the nlp + // additionally: need to evaluate the nlp if(!this->evalNlp_noHess(*it_curr, _f_nlp, *_c, *_d, *_grad_f, *_Jac_c, *_Jac_d)) { nlp->log->printf(hovError, "Failure in evaluating user NLP functions at loaded checkpoint."); return Error_In_User_Function; @@ -1034,19 +1014,19 @@ hiopSolveStatus hiopAlgFilterIPMQuasiNewton::run() solver_status_ = NlpSolve_SolveNotCalled; } - //update log bar + // update log bar logbar->updateWithNlpInfo(*it_curr, _mu, _f_nlp, *_c, *_d, *_grad_f, *_Jac_c, *_Jac_d); nlp->log->printf(hovScalars, "log bar obj: %g\n", logbar->f_logbar); - //recompute the residuals - resid->update(*it_curr,_f_nlp, *_c, *_d,*_grad_f,*_Jac_c,*_Jac_d, *logbar); + // recompute the residuals + resid->update(*it_curr, _f_nlp, *_c, *_d, *_grad_f, *_Jac_c, *_Jac_d, *logbar); nlp->log->write("First residual-------------", *resid, hovIteration); nlp->runStats.nIter = iter_num_; - bool disableLS = nlp->options->GetString("accept_every_trial_step")=="yes"; + bool disableLS = nlp->options->GetString("accept_every_trial_step") == "yes"; - theta_max = theta_max_fact_*fmax(1.0,resid->get_theta()); - theta_min = theta_min_fact_*fmax(1.0,resid->get_theta()); + theta_max = theta_max_fact_ * fmax(1.0, resid->get_theta()); + theta_min = theta_min_fact_ * fmax(1.0, resid->get_theta()); KktLinSysLowRank* kkt = new KktLinSysLowRank(nlp); assert(kkt != nullptr); @@ -1059,7 +1039,7 @@ hiopSolveStatus hiopAlgFilterIPMQuasiNewton::run() } kkt->set_PD_perturb_calc(pd_perturb_); kkt->set_logbar_mu(_mu); - + if(fact_acceptor_) { delete fact_acceptor_; fact_acceptor_ = nullptr; @@ -1069,7 +1049,9 @@ hiopSolveStatus hiopAlgFilterIPMQuasiNewton::run() _alpha_primal = _alpha_dual = 0; - _err_nlp_optim0=-1.; _err_nlp_feas0=-1.; _err_nlp_complem0=-1; + _err_nlp_optim0 = -1.; + _err_nlp_feas0 = -1.; + _err_nlp_complem0 = -1; // --- Algorithm status 'algStatus ---- //-1 couldn't solve the problem (most likely because small search step. Restauration phase likely needed) @@ -1077,19 +1059,18 @@ hiopSolveStatus hiopAlgFilterIPMQuasiNewton::run() // 1 max iter reached // 2 user stop via the iteration callback - //int algStatus=0; - bool bret=true; - int lsStatus=-1, lsNum=0; + // int algStatus=0; + bool bret = true; + int lsStatus = -1, lsNum = 0; int use_soc = 0; int use_fr = 0; int num_adjusted_slacks = 0; - bool linsol_safe_mode_on = true; // always use safe mode in the quasi-newton solver - bool linsol_forcequick = false; // always use safe mode in the quasi-newton solver - bool elastic_mode_on = nlp->options->GetString("elastic_mode")!="none"; + bool linsol_safe_mode_on = true; // always use safe mode in the quasi-newton solver + bool linsol_forcequick = false; // always use safe mode in the quasi-newton solver + bool elastic_mode_on = nlp->options->GetString("elastic_mode") != "none"; solver_status_ = NlpSolve_Pending; while(true) { - bret = evalNlpAndLogErrors(*it_curr, *resid, _mu, @@ -1109,13 +1090,14 @@ hiopSolveStatus hiopAlgFilterIPMQuasiNewton::run() return Error_In_User_Function; } - nlp->log->printf(hovScalars, - " Nlp errs: pr-infeas:%23.17e dual-infeas:%23.17e comp:%23.17e overall:%23.17e\n cons_violation: %23.17e\n", - _err_nlp_feas, - _err_nlp_optim, - _err_nlp_complem, - _err_nlp, - _err_cons_violation); + nlp->log->printf( + hovScalars, + " Nlp errs: pr-infeas:%23.17e dual-infeas:%23.17e comp:%23.17e overall:%23.17e\n cons_violation: %23.17e\n", + _err_nlp_feas, + _err_nlp_optim, + _err_nlp_complem, + _err_nlp, + _err_cons_violation); nlp->log->printf(hovScalars, " LogBar errs: pr-infeas:%23.17e dual-infeas:%23.17e comp:%23.17e overall:%23.17e\n", _err_log_feas, @@ -1124,11 +1106,13 @@ hiopSolveStatus hiopAlgFilterIPMQuasiNewton::run() _err_log); outputIteration(lsStatus, lsNum, use_soc, use_fr); - if(_err_nlp_optim0<0) { // && _err_nlp_feas0<0 && _err_nlp_complem0<0 - _err_nlp_optim0=_err_nlp_optim; _err_nlp_feas0=_err_nlp_feas; _err_nlp_complem0=_err_nlp_complem; + if(_err_nlp_optim0 < 0) { // && _err_nlp_feas0<0 && _err_nlp_complem0<0 + _err_nlp_optim0 = _err_nlp_optim; + _err_nlp_feas0 = _err_nlp_feas; + _err_nlp_complem0 = _err_nlp_complem; } - //user callback + // user callback if(!nlp->user_callback_iterate(iter_num_, _f_nlp, logbar->f_logbar, @@ -1139,7 +1123,7 @@ hiopSolveStatus hiopAlgFilterIPMQuasiNewton::run() *_c, *_d, *it_curr->get_yc(), - *it_curr->get_yd(), //lambda, + *it_curr->get_yd(), // lambda, _err_nlp_feas, _err_nlp_optim, onenorm_pr_curr_, @@ -1147,63 +1131,80 @@ hiopSolveStatus hiopAlgFilterIPMQuasiNewton::run() _alpha_dual, _alpha_primal, lsNum)) { - solver_status_ = User_Stopped; break; + solver_status_ = User_Stopped; + break; } -#ifdef HIOP_USE_AXOM - //checkpointing - based on options provided by the user +#ifdef HIOP_USE_AXOM + // checkpointing - based on options provided by the user checkpointing_stuff(); #endif - + /************************************************* * Termination check ************************************************/ if(checkTermination(_err_nlp, iter_num_, solver_status_)) { break; } - if(NlpSolve_Pending!=solver_status_) break; //failure of the line search or user stopped. + if(NlpSolve_Pending != solver_status_) break; // failure of the line search or user stopped. /************************************************ * update mu and other parameters ************************************************/ - while(_err_log<=kappa_eps * _mu) { - //update mu and tau (fraction-to-boundary) + while(_err_log <= kappa_eps * _mu) { + // update mu and tau (fraction-to-boundary) auto mu_updated = update_log_barrier_params(*it_curr, _mu, _tau, elastic_mode_on, _mu, _tau); if(!mu_updated) { break; } nlp->log->printf(hovScalars, "Iter[%d] barrier params reduced: mu=%g tau=%g\n", iter_num_, _mu, _tau); - //update only logbar problem and residual (the NLP didn't change) + // update only logbar problem and residual (the NLP didn't change) logbar->updateWithNlpInfo(*it_curr, _mu, _f_nlp, *_c, *_d, *_grad_f, *_Jac_c, *_Jac_d); - + //! should perform only a partial update since NLP didn't change - resid->update(*it_curr,_f_nlp, *_c, *_d,*_grad_f,*_Jac_c,*_Jac_d, *logbar); - bret = evalNlpAndLogErrors(*it_curr, *resid, _mu, - _err_nlp_optim, _err_nlp_feas, _err_nlp_complem, _err_nlp, - _err_log_optim, _err_log_feas, _err_log_complem, _err_log, + resid->update(*it_curr, _f_nlp, *_c, *_d, *_grad_f, *_Jac_c, *_Jac_d, *logbar); + bret = evalNlpAndLogErrors(*it_curr, + *resid, + _mu, + _err_nlp_optim, + _err_nlp_feas, + _err_nlp_complem, + _err_nlp, + _err_log_optim, + _err_log_feas, + _err_log_complem, + _err_log, _err_cons_violation); if(!bret) { solver_status_ = Error_In_User_Function; delete kkt; return Error_In_User_Function; } - nlp->log->printf(hovScalars, - " Nlp errs: pr-infeas:%23.17e dual-infeas:%23.17e comp:%23.17e overall:%23.17e\n cons_violation:%23.17e\n", - _err_nlp_feas, _err_nlp_optim, _err_nlp_complem, _err_nlp, _err_cons_violation); + nlp->log->printf( + hovScalars, + " Nlp errs: pr-infeas:%23.17e dual-infeas:%23.17e comp:%23.17e overall:%23.17e\n cons_violation:%23.17e\n", + _err_nlp_feas, + _err_nlp_optim, + _err_nlp_complem, + _err_nlp, + _err_cons_violation); nlp->log->printf(hovScalars, " LogBar errs: pr-infeas:%23.17e dual-infeas:%23.17e comp:%23.17e overall:%23.17e\n", - _err_log_feas, _err_log_optim, _err_log_complem, _err_log); + _err_log_feas, + _err_log_optim, + _err_log_complem, + _err_log); filter.reinitialize(theta_max); if(elastic_mode_on) { - //reduce mu only once under elastic mode so that bounds do not get tighten too agressively, - //which may result in small steps and invocation of FR + // reduce mu only once under elastic mode so that bounds do not get tighten too agressively, + // which may result in small steps and invocation of FR break; } } - nlp->log->printf(hovScalars, "Iter[%d] logbarObj=%23.17e (mu=%12.5e)\n", iter_num_, logbar->f_logbar,_mu); + nlp->log->printf(hovScalars, "Iter[%d] logbarObj=%23.17e (mu=%12.5e)\n", iter_num_, logbar->f_logbar, _mu); /**************************************************** * Search direction calculation ***************************************************/ @@ -1212,28 +1213,27 @@ hiopSolveStatus hiopAlgFilterIPMQuasiNewton::run() nlp->runStats.kkt.start_optimiz_iteration(); - //update the Hessian and kkt system - Hess->update(*it_curr,*_grad_f,*_Jac_c,*_Jac_d); + // update the Hessian and kkt system + Hess->update(*it_curr, *_grad_f, *_Jac_c, *_Jac_d); if(!kkt->update(it_curr, _grad_f, _Jac_c, _Jac_d, _Hess_Lagr)) { - nlp->log->write("Unrecoverable error in step computation (factorization) [1]. Will exit here.", - hovError); + nlp->log->write("Unrecoverable error in step computation (factorization) [1]. Will exit here.", hovError); delete kkt; return solver_status_ = Err_Step_Computation; - } // end of if(!kkt->update(it_curr, _grad_f, _Jac_c, _Jac_d, _Hess_Lagr)) + } // end of if(!kkt->update(it_curr, _grad_f, _Jac_c, _Jac_d, _Hess_Lagr)) - auto* fact_acceptor_ic = dynamic_cast (fact_acceptor_); + auto* fact_acceptor_ic = dynamic_cast(fact_acceptor_); if(fact_acceptor_ic) { - //compute_search_direction call below updates linsol safe mode flag and linsol_safe_mode_lastiter + // compute_search_direction call below updates linsol safe mode flag and linsol_safe_mode_lastiter if(!compute_search_direction(kkt, linsol_safe_mode_on, linsol_forcequick, iter_num_)) { delete kkt; return solver_status_ = Err_Step_Computation; } } else { - auto* fact_acceptor_dwd = dynamic_cast (fact_acceptor_); + auto* fact_acceptor_dwd = dynamic_cast(fact_acceptor_); assert(fact_acceptor_dwd); - //compute_search_direction call below updates linsol safe mode flag and linsol_safe_mode_lastiter + // compute_search_direction call below updates linsol safe mode flag and linsol_safe_mode_lastiter if(!compute_search_direction_inertia_free(kkt, linsol_safe_mode_on, linsol_forcequick, iter_num_)) { - //it failed under safe mode + // it failed under safe mode delete kkt; return solver_status_ = Err_Step_Computation; } @@ -1251,55 +1251,59 @@ hiopSolveStatus hiopAlgFilterIPMQuasiNewton::run() ****************************************************************/ nlp->runStats.tmSolverInternal.start(); - //maximum step + // maximum step bret = it_curr->fractionToTheBdry(*dir, _tau, _alpha_primal, _alpha_dual); assert(bret); - //Step `_alpha_primal` may be reduced when option 'moving_lim_abs' or 'moving_lim_rel' is active. - //Returned bool indicates if reduction was done or not. + // Step `_alpha_primal` may be reduced when option 'moving_lim_abs' or 'moving_lim_rel' is active. + // Returned bool indicates if reduction was done or not. bret = ensure_moving_lims(*it_curr, *dir, _alpha_primal); - - double theta = onenorm_pr_curr_ = resid->get_theta(); //at it_curr + + double theta = onenorm_pr_curr_ = resid->get_theta(); // at it_curr double theta_trial; nlp->runStats.tmSolverInternal.stop(); - //lsStatus: line search status for the accepted trial point. Needed to update the filter + // lsStatus: line search status for the accepted trial point. Needed to update the filter //-1 uninitialized (first iteration) - //0 unsuccessful (small step size) - //1 "sufficient decrease" when far away from solution (theta_trial>theta_min) - //2 close to solution but switching condition does not hold, so trial accepted based on "sufficient decrease" - //3 close to solution and switching condition is true; trial accepted based on Armijo - lsStatus=0; lsNum=0; + // 0 unsuccessful (small step size) + // 1 "sufficient decrease" when far away from solution (theta_trial>theta_min) + // 2 close to solution but switching condition does not hold, so trial accepted based on "sufficient decrease" + // 3 close to solution and switching condition is true; trial accepted based on Armijo + lsStatus = 0; + lsNum = 0; use_soc = 0; use_fr = 0; - bool grad_phi_dx_computed=false, iniStep=true; double grad_phi_dx; + bool grad_phi_dx_computed = false, iniStep = true; + double grad_phi_dx; - //this will cache the primal infeasibility norm for (reuse)use in the dual updating - double infeas_nrm_trial=-1.; + // this will cache the primal infeasibility norm for (reuse)use in the dual updating + double infeas_nrm_trial = -1.; // - //this is the linesearch loop + // this is the linesearch loop // double min_ls_step_size = nlp->options->GetNumeric("min_step_size"); while(true) { - nlp->runStats.tmSolverInternal.start(); //--- + nlp->runStats.tmSolverInternal.start(); //--- // check the step against the minimum step size, but accept small // fractionToTheBdry since these may occur for tight bounds at the first iteration(s) - if(!iniStep && _alpha_primallog->write("Minimum step size reached. The problem may be locally infeasible or the " - "gradient inaccurate. Will try to restore feasibility.", - hovError); + if(!iniStep && _alpha_primal < min_ls_step_size) { + nlp->log->write( + "Minimum step size reached. The problem may be locally infeasible or the " + "gradient inaccurate. Will try to restore feasibility.", + hovError); solver_status_ = Steplength_Too_Small; nlp->runStats.tmSolverInternal.stop(); break; } - bret = it_trial->takeStep_primals(*it_curr, *dir, _alpha_primal, _alpha_dual); assert(bret); + bret = it_trial->takeStep_primals(*it_curr, *dir, _alpha_primal, _alpha_dual); + assert(bret); num_adjusted_slacks = it_trial->compute_safe_slacks(*it_curr, _mu); - nlp->runStats.tmSolverInternal.stop(); //--- + nlp->runStats.tmSolverInternal.stop(); //--- - //evaluate the problem at the trial iterate (functions only) + // evaluate the problem at the trial iterate (functions only) if(!this->evalNlp_funcOnly(*it_trial, _f_nlp_trial, *_c_trial, *_d_trial)) { solver_status_ = Error_In_User_Function; nlp->runStats.tmOptimizTotal.stop(); @@ -1308,9 +1312,9 @@ hiopSolveStatus hiopAlgFilterIPMQuasiNewton::run() logbar->updateWithNlpInfo_trial_funcOnly(*it_trial, _f_nlp_trial, *_c_trial, *_d_trial); - nlp->runStats.tmSolverInternal.start(); //--- + nlp->runStats.tmSolverInternal.start(); //--- - //compute infeasibility theta at trial point. + // compute infeasibility theta at trial point. infeas_nrm_trial = theta_trial = resid->compute_nlp_infeasib_onenorm(*it_trial, *_c_trial, *_d_trial); lsNum++; @@ -1333,14 +1337,14 @@ hiopSolveStatus hiopAlgFilterIPMQuasiNewton::run() lsStatus = accept_line_search_conditions(theta, theta_trial, _alpha_primal, grad_phi_dx_computed, grad_phi_dx); - if(lsStatus>0) { + if(lsStatus > 0) { nlp->runStats.tmSolverInternal.stop(); break; } nlp->runStats.tmSolverInternal.start(); // second order correction - if(iniStep && theta<=theta_trial) { + if(iniStep && theta <= theta_trial) { bool grad_phi_dx_soc_computed = false; double grad_phi_dx_soc = 0.0; int num_adjusted_slacks_soc = 0; @@ -1350,7 +1354,7 @@ hiopSolveStatus hiopAlgFilterIPMQuasiNewton::run() grad_phi_dx_soc_computed, grad_phi_dx_soc, num_adjusted_slacks_soc); - if(lsStatus>0) { + if(lsStatus > 0) { num_adjusted_slacks = num_adjusted_slacks_soc; grad_phi_dx_computed = grad_phi_dx_soc_computed; grad_phi_dx = grad_phi_dx_soc; @@ -1363,64 +1367,63 @@ hiopSolveStatus hiopAlgFilterIPMQuasiNewton::run() assert(lsStatus == 0); _alpha_primal *= 0.5; - iniStep=false; + iniStep = false; nlp->runStats.tmSolverInternal.stop(); - } //end of while for the linesearch loop - + } // end of while for the linesearch loop // adjust slacks and bounds if necessary if(num_adjusted_slacks > 0) { - nlp->log->printf(hovWarning, "%d slacks are too small. Adjust corresponding variable slacks!\n", - num_adjusted_slacks); + nlp->log->printf(hovWarning, "%d slacks are too small. Adjust corresponding variable slacks!\n", num_adjusted_slacks); nlp->adjust_bounds(*it_trial); - //compute infeasibility theta at trial point, since bounds changed --- note that the returned value won't change + // compute infeasibility theta at trial point, since bounds changed --- note that the returned value won't change [[maybe_unused]] const double theta_temp = resid->compute_nlp_infeasib_onenorm(*it_trial, *_c_trial, *_d_trial); #ifndef NDEBUG - if(0==use_soc) { - // TODO: check why this assertion fails - //assert(theta_temp == theta_trial); - } + if(0 == use_soc) { + // TODO: check why this assertion fails + // assert(theta_temp == theta_trial); + } #endif } - //post line-search stuff - //filter is augmented whenever the switching condition or Armijo rule do not hold for the trial point that was just accepted - if(nlp->options->GetString("force_resto")=="yes" && !within_FR_ && iter_num_ == 1) { + // post line-search stuff + // filter is augmented whenever the switching condition or Armijo rule do not hold for the trial point that was just + // accepted + if(nlp->options->GetString("force_resto") == "yes" && !within_FR_ && iter_num_ == 1) { use_fr = apply_feasibility_restoration(kkt); if(use_fr) { // continue iterations if FR is accepted solver_status_ = NlpSolve_Pending; nlp->runStats.tmSolverInternal.stop(); } - } else if(lsStatus==1) { - //need to check switching cond and Armijo to decide if filter is augmented + } else if(lsStatus == 1) { + // need to check switching cond and Armijo to decide if filter is augmented if(!grad_phi_dx_computed) { grad_phi_dx = logbar->directionalDerivative(*dir); - grad_phi_dx_computed=true; + grad_phi_dx_computed = true; } - //this is the actual switching condition - if(grad_phi_dx<0 && _alpha_primal*pow(-grad_phi_dx,s_phi)>delta*pow(theta,s_theta)) { - //check armijo - if(logbar->f_logbar_trial <= logbar->f_logbar + eta_phi*_alpha_primal*grad_phi_dx) { - //filter does not change + // this is the actual switching condition + if(grad_phi_dx < 0 && _alpha_primal * pow(-grad_phi_dx, s_phi) > delta * pow(theta, s_theta)) { + // check armijo + if(logbar->f_logbar_trial <= logbar->f_logbar + eta_phi * _alpha_primal * grad_phi_dx) { + // filter does not change } else { - //Armijo does not hold + // Armijo does not hold filter.add(theta_trial, logbar->f_logbar_trial); } - } else { //switching condition does not hold + } else { // switching condition does not hold filter.add(theta_trial, logbar->f_logbar_trial); } nlp->runStats.tmSolverInternal.stop(); - } else if(lsStatus==2) { - //switching condition does not hold for the trial + } else if(lsStatus == 2) { + // switching condition does not hold for the trial filter.add(theta_trial, logbar->f_logbar_trial); nlp->runStats.tmSolverInternal.stop(); - } else if(lsStatus==3) { - //Armijo (and switching condition) hold, nothing to do. + } else if(lsStatus == 3) { + // Armijo (and switching condition) hold, nothing to do. nlp->runStats.tmSolverInternal.stop(); - } else if(lsStatus==0) { - //small step + } else if(lsStatus == 0) { + // small step if(linsol_safe_mode_on) { // try to do FR use_fr = apply_feasibility_restoration(kkt); @@ -1433,15 +1436,15 @@ hiopSolveStatus hiopAlgFilterIPMQuasiNewton::run() // exit the linear solve (compute_search_direction) loop nlp->runStats.tmSolverInternal.stop(); break; - } + } nlp->runStats.tmSolverInternal.stop(); } else { nlp->runStats.tmSolverInternal.stop(); assert(false && "unrecognized value for lsStatus"); } - if(NlpSolve_Pending!=solver_status_) { - break; //failure of the line search or user stopped. + if(NlpSolve_Pending != solver_status_) { + break; // failure of the line search or user stopped. } nlp->log->printf(hovScalars, @@ -1460,14 +1463,25 @@ hiopSolveStatus hiopAlgFilterIPMQuasiNewton::run() // update and adjust the duals // this needs to be done before evalNlp_derivOnly so that the user's NLP functions // get the updated duals - assert(infeas_nrm_trial>=0 && "this should not happen"); - bret = dualsUpdate_->go(*it_curr, *it_trial, - _f_nlp, *_c, *_d, *_grad_f, *_Jac_c, *_Jac_d, *dir, - _alpha_primal, _alpha_dual, _mu, kappa_Sigma, infeas_nrm_trial); + assert(infeas_nrm_trial >= 0 && "this should not happen"); + bret = dualsUpdate_->go(*it_curr, + *it_trial, + _f_nlp, + *_c, + *_d, + *_grad_f, + *_Jac_c, + *_Jac_d, + *dir, + _alpha_primal, + _alpha_dual, + _mu, + kappa_Sigma, + infeas_nrm_trial); assert(bret); nlp->runStats.tmSolverInternal.stop(); - //evaluate derivatives at the trial (and to be accepted) trial point + // evaluate derivatives at the trial (and to be accepted) trial point if(!this->evalNlp_derivOnly(*it_trial, *_grad_f, *_Jac_c, *_Jac_d, *_Hess_Lagr)) { solver_status_ = Error_In_User_Function; nlp->runStats.tmOptimizTotal.stop(); @@ -1476,24 +1490,31 @@ hiopSolveStatus hiopAlgFilterIPMQuasiNewton::run() } } - nlp->runStats.tmSolverInternal.start(); //----- - //reuse function values - _f_nlp=_f_nlp_trial; - hiopVector* pvec=_c_trial; _c_trial=_c; _c=pvec; pvec=_d_trial; _d_trial=_d; _d=pvec; + nlp->runStats.tmSolverInternal.start(); //----- + // reuse function values + _f_nlp = _f_nlp_trial; + hiopVector* pvec = _c_trial; + _c_trial = _c; + _c = pvec; + pvec = _d_trial; + _d_trial = _d; + _d = pvec; - //update current iterate (do a fast swap of the pointers) - hiopIterate* pit=it_curr; it_curr=it_trial; it_trial=pit; + // update current iterate (do a fast swap of the pointers) + hiopIterate* pit = it_curr; + it_curr = it_trial; + it_trial = pit; nlp->log->printf(hovIteration, "Iter[%d] -> full iterate:", iter_num_); nlp->log->write("", *it_curr, hovIteration); - nlp->runStats.tmSolverInternal.stop(); //----- + nlp->runStats.tmSolverInternal.stop(); //----- - //notify logbar about the changes + // notify logbar about the changes _f_log = _f_nlp; logbar->updateWithNlpInfo(*it_curr, _mu, _f_nlp, *_c, *_d, *_grad_f, *_Jac_c, *_Jac_d); - //update residual - resid->update(*it_curr,_f_nlp, *_c, *_d,*_grad_f,*_Jac_c,*_Jac_d, *logbar); + // update residual + resid->update(*it_curr, _f_nlp, *_c, *_d, *_grad_f, *_Jac_c, *_Jac_d, *logbar); nlp->log->printf(hovIteration, "Iter[%d] full residual:-------------\n", iter_num_); nlp->log->write("", *resid, hovIteration); @@ -1501,10 +1522,10 @@ hiopSolveStatus hiopAlgFilterIPMQuasiNewton::run() nlp->runStats.tmOptimizTotal.stop(); - //solver_status_ contains the termination information + // solver_status_ contains the termination information displayTerminationMsg(); - //user callback + // user callback nlp->user_callback_solution(solver_status_, *it_curr->get_x(), *it_curr->get_zl(), @@ -1521,31 +1542,49 @@ hiopSolveStatus hiopAlgFilterIPMQuasiNewton::run() void hiopAlgFilterIPMQuasiNewton::outputIteration(int lsStatus, int lsNum, int use_soc, int use_fr) { - if(iter_num_/10*10==iter_num_) + if(iter_num_ / 10 * 10 == iter_num_) nlp->log->printf(hovSummary, "iter objective inf_pr inf_du lg(mu) alpha_du alpha_pr linesrch\n"); - if(lsStatus==-1) - nlp->log->printf(hovSummary, "%4d %14.7e %7.3e %7.3e %6.2f %7.3e %7.3e -(-)\n", - iter_num_total_, _f_nlp/nlp->get_obj_scale(), _err_nlp_feas, _err_nlp_optim, - log10(_mu), _alpha_dual, _alpha_primal); + if(lsStatus == -1) + nlp->log->printf(hovSummary, + "%4d %14.7e %7.3e %7.3e %6.2f %7.3e %7.3e -(-)\n", + iter_num_total_, + _f_nlp / nlp->get_obj_scale(), + _err_nlp_feas, + _err_nlp_optim, + log10(_mu), + _alpha_dual, + _alpha_primal); else { char stepType[2]; - if(lsStatus==1) strcpy(stepType, "s"); - else if(lsStatus==2) strcpy(stepType, "h"); - else if(lsStatus==3) strcpy(stepType, "f"); - else strcpy(stepType, "?"); + if(lsStatus == 1) + strcpy(stepType, "s"); + else if(lsStatus == 2) + strcpy(stepType, "h"); + else if(lsStatus == 3) + strcpy(stepType, "f"); + else + strcpy(stepType, "?"); if(use_soc && lsStatus >= 1 && lsStatus <= 3) { - stepType[0] = (char) ::toupper(stepType[0]); + stepType[0] = (char)::toupper(stepType[0]); } - if(use_fr){ + if(use_fr) { strcpy(stepType, "R"); } - nlp->log->printf(hovSummary, "%4d %14.7e %7.3e %7.3e %6.2f %7.3e %7.3e %d(%s)\n", - iter_num_total_, _f_nlp/nlp->get_obj_scale(), _err_nlp_feas, _err_nlp_optim, - log10(_mu), _alpha_dual, _alpha_primal, lsNum, stepType); + nlp->log->printf(hovSummary, + "%4d %14.7e %7.3e %7.3e %6.2f %7.3e %7.3e %d(%s)\n", + iter_num_total_, + _f_nlp / nlp->get_obj_scale(), + _err_nlp_feas, + _err_nlp_optim, + log10(_mu), + _alpha_dual, + _alpha_primal, + lsNum, + stepType); } } @@ -1565,9 +1604,9 @@ bool hiopAlgFilterIPMQuasiNewton::save_state_to_file(const ::std::string& path) return true; } catch(const std::exception& exp) { nlp->log->printf(hovError, "Error when saving checkpoint to file '%s'\n", path.c_str()); - nlp->log->printf(hovError, " Addtl info: %s\n", exp.what()); - return false; - } + nlp->log->printf(hovError, " Addtl info: %s\n", exp.what()); + return false; + } } bool hiopAlgFilterIPMQuasiNewton::load_state_from_file(const ::std::string& path) noexcept @@ -1585,7 +1624,7 @@ bool hiopAlgFilterIPMQuasiNewton::load_state_from_file(const ::std::string& path return true; } catch(const std::exception& exp) { nlp->log->printf(hovError, "Error in loading checkpoint from file '%s'\n", path.c_str()); - nlp->log->printf(hovError, " Addtl info: %s\n", exp.what()); + nlp->log->printf(hovError, " Addtl info: %s\n", exp.what()); return false; } } @@ -1594,11 +1633,11 @@ void hiopAlgFilterIPMQuasiNewton::save_state_to_sidre_group(::axom::sidre::Group { using IndType = sidre::IndexType; - //iterate state - //create views for each member that needs to be saved + // iterate state + // create views for each member that needs to be saved SidreHelper::copy_iterate_to_views(group, "alg_iterate_", *it_curr); - - //state of quasi-Newton Hessian approximation + + // state of quasi-Newton Hessian approximation HessianDiagPlusRowRank& hqn = dynamic_cast(*_Hess_Lagr); const double hqn_params[] = {(double)hqn.l_max_, (double)hqn.l_curr_, @@ -1608,25 +1647,24 @@ void hiopAlgFilterIPMQuasiNewton::save_state_to_sidre_group(::axom::sidre::Group const size_type nhqn_params = sizeof(hqn_params) / sizeof(double); SidreHelper::copy_array_to_view(group, "Hess_quasiNewton_params", hqn_params, nhqn_params); - //quasi-Newton Hessian stores the previous iterate and corresponding derivatives + // quasi-Newton Hessian stores the previous iterate and corresponding derivatives SidreHelper::copy_iterate_to_views(group, "Hess_quasiNewton_prev_iter", *hqn.it_prev_); SidreHelper::copy_vec_to_view(group, "Hess_quasiNewton_prev_grad", *hqn.grad_f_prev_); - + auto* Jac_c = hqn.Jac_c_prev_; SidreHelper::copy_array_to_view(group, "Hess_quasiNewton_prev_Jacc", Jac_c->local_data_const(), Jac_c->get_local_size_n() * Jac_c->get_local_size_m()); - - + auto* Jac_d = hqn.Jac_d_prev_; SidreHelper::copy_array_to_view(group, "Hess_quasiNewton_prev_Jacd", Jac_d->local_data_const(), Jac_d->get_local_size_n() * Jac_d->get_local_size_m()); - //quasi-Newton Hessian internal states - //memory matrices and internal representation + // quasi-Newton Hessian internal states + // memory matrices and internal representation SidreHelper::copy_array_to_view(group, "Hess_quasiNewton_St", hqn.St_->local_data_const(), @@ -1640,15 +1678,14 @@ void hiopAlgFilterIPMQuasiNewton::save_state_to_sidre_group(::axom::sidre::Group "Hess_quasiNewton_L", hqn.L_->local_data_const(), hqn.L_->get_local_size_n() * hqn.L_->get_local_size_m()); - - - //algorithmic parameters for this state - //mu, iteration number, num MPI ranks - int nranks=1; -#ifdef HIOP_USE_MPI + + // algorithmic parameters for this state + // mu, iteration number, num MPI ranks + int nranks = 1; +#ifdef HIOP_USE_MPI MPI_Comm_size(get_nlp()->get_comm(), &nranks); #endif - constexpr double version = HIOP_VERSION_MAJOR*100 + HIOP_VERSION_MINOR*10 + HIOP_VERSION_PATCH; + constexpr double version = HIOP_VERSION_MAJOR * 100 + HIOP_VERSION_MINOR * 10 + HIOP_VERSION_PATCH; const double alg_params[] = {_mu, (double)iter_num_total_, (double)nranks, version}; const size_type nparams = sizeof(alg_params) / sizeof(double); SidreHelper::copy_array_to_view(group, "alg_params", alg_params, nparams); @@ -1662,7 +1699,6 @@ void hiopAlgFilterIPMQuasiNewton::save_state_to_sidre_group(::axom::sidre::Group nranks, _mu, iter_num_total_); - } void hiopAlgFilterIPMQuasiNewton::load_state_from_sidre_group(const sidre::Group& group) @@ -1670,30 +1706,29 @@ void hiopAlgFilterIPMQuasiNewton::load_state_from_sidre_group(const sidre::Group load_state_api_called_ = true; // - //algorithmic parameters + // algorithmic parameters // //!!!note: nparams needs to match the nparams from save_state_to_data_store - const int nparams = 4; + const int nparams = 4; double alg_params[nparams]; SidreHelper::copy_array_from_view(group, "alg_params", alg_params, nparams); //!!! dev note: match order in save_state_to_data_store _mu = alg_params[0]; iter_num_total_ = alg_params[1]; - - int nranks=1; -#ifdef HIOP_USE_MPI + + int nranks = 1; +#ifdef HIOP_USE_MPI MPI_Comm_size(get_nlp()->get_comm(), &nranks); #endif - if( (int)alg_params[2] != nranks ) { + if((int)alg_params[2] != nranks) { ::std::stringstream ss; - ss << "Mismatch in the number of MPI ranks used to checkpoint. Checkpointing was " << - "done on " << (int)alg_params[2] << " ranks while HiOp currently runs on " << - nranks << " ranks.\n"; + ss << "Mismatch in the number of MPI ranks used to checkpoint. Checkpointing was " << "done on " << (int)alg_params[2] + << " ranks while HiOp currently runs on " << nranks << " ranks.\n"; throw std::runtime_error(ss.str()); } const int ver_major = ((int)alg_params[3] / 100); - const int ver_minor = ((int)alg_params[3] - ver_major*100)/10; - const int ver_patch = (int)alg_params[3] - ver_major*100 - ver_minor*10; + const int ver_minor = ((int)alg_params[3] - ver_major * 100) / 10; + const int ver_patch = (int)alg_params[3] - ver_major * 100 - ver_minor * 10; nlp->log->printf(hovScalars, "Loaded checkpoint from sidre::Group. Found ver %d.%d.%d on %d MPI ranks at " "mu=%12.5e from iter=%d.\n", @@ -1705,12 +1740,12 @@ void hiopAlgFilterIPMQuasiNewton::load_state_from_sidre_group(const sidre::Group iter_num_total_); // - //iterate states + // iterate states // SidreHelper::copy_iterate_from_views(group, "alg_iterate_", *it_curr); // - //state of quasi-Newton Hessian approximation + // state of quasi-Newton Hessian approximation // HessianDiagPlusRowRank& hqn = dynamic_cast(*_Hess_Lagr); //!!!note: nparams needs to match the # of params from save_state_to_sidre_group @@ -1718,21 +1753,21 @@ void hiopAlgFilterIPMQuasiNewton::load_state_from_sidre_group(const sidre::Group double hqn_params[nhqn_params]; SidreHelper::copy_array_from_view(group, "Hess_quasiNewton_params", hqn_params, nhqn_params); - const size_type lim_mem_length = (size_type) hqn_params[1]; - //ensure the internals are allocated for this mem length + const size_type lim_mem_length = (size_type)hqn_params[1]; + // ensure the internals are allocated for this mem length hqn.alloc_for_limited_mem(lim_mem_length); - - hqn.l_max_ = (size_type) hqn_params[0]; + + hqn.l_max_ = (size_type)hqn_params[0]; hqn.l_curr_ = lim_mem_length; hqn.sigma_ = hqn_params[2]; hqn.sigma0_ = hqn_params[3]; hqn.matrix_changed_ = hqn_params[4]; assert(hqn.it_prev_); - //quasi-Newton Hessian stores the previous iterate and corresponding derivatives + // quasi-Newton Hessian stores the previous iterate and corresponding derivatives SidreHelper::copy_iterate_from_views(group, "Hess_quasiNewton_prev_iter", *hqn.it_prev_); SidreHelper::copy_vec_from_view(group, "Hess_quasiNewton_prev_grad", *hqn.grad_f_prev_); - + auto* Jac_c = hqn.Jac_c_prev_; SidreHelper::copy_array_from_view(group, "Hess_quasiNewton_prev_Jacc", @@ -1745,8 +1780,8 @@ void hiopAlgFilterIPMQuasiNewton::load_state_from_sidre_group(const sidre::Group Jac_d->local_data(), Jac_d->get_local_size_n() * Jac_d->get_local_size_m()); - //quasi-Newton Hessian internal states - //memory matrices + // quasi-Newton Hessian internal states + // memory matrices SidreHelper::copy_array_from_view(group, "Hess_quasiNewton_St", hqn.St_->local_data(), @@ -1757,19 +1792,19 @@ void hiopAlgFilterIPMQuasiNewton::load_state_from_sidre_group(const sidre::Group hqn.Yt_->get_local_size_n() * hqn.Yt_->get_local_size_m()); SidreHelper::copy_vec_from_view(group, "Hess_quasiNewton_D", *hqn.D_); SidreHelper::copy_array_from_view(group, - "Hess_quasiNewton_L", - hqn.L_->local_data(), - hqn.L_->get_local_size_n() * hqn.L_->get_local_size_m()); + "Hess_quasiNewton_L", + hqn.L_->local_data(), + hqn.L_->get_local_size_n() * hqn.L_->get_local_size_m()); } void hiopAlgFilterIPMQuasiNewton::checkpointing_stuff() { - if(nlp->options->GetString("checkpoint_save")=="no") { + if(nlp->options->GetString("checkpoint_save") == "no") { return; } int chk_every_N = nlp->options->GetInteger("checkpoint_save_every_N_iter"); - //check iteration - if(iter_num_>0 && iter_num_ % chk_every_N==0) { + // check iteration + if(iter_num_ > 0 && iter_num_ % chk_every_N == 0) { using ::std::string; // replace "#" in checkpointing file with iteration number string path = nlp->options->GetString("checkpoint_file"); @@ -1781,46 +1816,43 @@ void hiopAlgFilterIPMQuasiNewton::checkpointing_stuff() } nlp->log->printf(hovSummary, "Saving checkpoint at iter %d in '%s'.\n", iter_num_, path.c_str()); - //actual checkpointing via axom::sidre + // actual checkpointing via axom::sidre save_state_to_file(path); } } -#endif // HIOP_USE_AXOM +#endif // HIOP_USE_AXOM /****************************************************************************************************** * FULL NEWTON IPM *****************************************************************************************************/ hiopAlgFilterIPMNewton::hiopAlgFilterIPMNewton(hiopNlpFormulation* nlp_in, const bool within_FR) - : hiopAlgFilterIPMBase(nlp_in, within_FR) + : hiopAlgFilterIPMBase(nlp_in, within_FR) { reload_options(); alloc_alg_objects(); - //parameter based initialization - if(duals_update_type==0) { + // parameter based initialization + if(duals_update_type == 0) { dualsUpdate_ = nlp->alloc_duals_lsq_updater(); - } else if(duals_update_type==1) { + } else if(duals_update_type == 1) { dualsUpdate_ = new hiopDualsNewtonLinearUpdate(nlp); } else { assert(false && "duals_update_type has an unrecognized value"); } - resetSolverStatus(); + resetSolverStatus(); } -hiopAlgFilterIPMNewton::~hiopAlgFilterIPMNewton() -{ -} +hiopAlgFilterIPMNewton::~hiopAlgFilterIPMNewton() {} void hiopAlgFilterIPMNewton::reload_options() { auto hess_opt_val = nlp->options->GetString("Hessian"); if(hess_opt_val != "analytical_exact") { - //it can occur since "analytical_exact" is not the default value + // it can occur since "analytical_exact" is not the default value nlp->options->set_val("Hessian", "analytical_exact"); if(nlp->options->is_user_defined("Hessian")) { - nlp->log->printf(hovWarning, "Option Hessian=%s not compatible with the requested NLP formulation and will " "be set to 'analytical_exact'\n", @@ -1833,7 +1865,7 @@ void hiopAlgFilterIPMNewton::reload_options() // 'duals_update_type' should be 'lsq' or 'linear' for 'Hessian=quasinewton_approx' // 'duals_update_type' can only be 'linear' for Newton methods 'Hessian=analytical_exact' - //warn only if these are defined by the user (option file or via SetXXX methods) + // warn only if these are defined by the user (option file or via SetXXX methods) if(nlp->options->is_user_defined("duals_update_type")) { nlp->log->printf(hovWarning, "The option 'duals_update_type=%s' is not valid with 'Hessian=analytical_exact'. " @@ -1842,17 +1874,16 @@ void hiopAlgFilterIPMNewton::reload_options() } nlp->options->set_val("duals_update_type", "linear"); } - + hiopAlgFilterIPMBase::reload_options(); } hiopKKTLinSys* hiopAlgFilterIPMNewton::decideAndCreateLinearSystem(hiopNlpFormulation* nlp) { - //hiopNlpMDS* nlpMDS = nullptr; + // hiopNlpMDS* nlpMDS = nullptr; hiopNlpMDS* nlpMDS = dynamic_cast(nlp); - if(nullptr == nlpMDS) - { + if(nullptr == nlpMDS) { hiopNlpSparse* nlpSp = dynamic_cast(nlp); if(nullptr == nlpSp) { // this is dense linear system. This is the default case. @@ -1873,8 +1904,8 @@ hiopKKTLinSys* hiopAlgFilterIPMNewton::decideAndCreateLinearSystem(hiopNlpFormul return new hiopKKTLinSysCompressedSparseXDYcYd(nlp); } else if(strKKT == "condensed") { return new hiopKKTLinSysCondensedSparse(nlp); - } else if(strKKT == "normaleqn" ) { - if(nlp->m()>0) { + } else if(strKKT == "normaleqn") { + if(nlp->m() > 0) { return new hiopKKTLinSysSparseNormalEqn(nlp); } else { if(nlp->options->is_user_defined("KKTLinsys")) { @@ -1894,57 +1925,56 @@ hiopKKTLinSys* hiopAlgFilterIPMNewton::decideAndCreateLinearSystem(hiopNlpFormul } else { return new hiopKKTLinSysCompressedMDSXYcYd(nlp); } - assert(false && + assert(false && "Could not match linear algebra to NLP formulation. Likely, HiOp was not built with " "all linear algebra modules/options or with an incorrect combination of them"); return nullptr; } -hiopKKTLinSys* -hiopAlgFilterIPMNewton::switch_to_safer_KKT(hiopKKTLinSys* kkt_curr, - const double& mu, - const int& iter_num, - bool& linsol_safe_mode_on, - const int& linsol_safe_mode_max_iters, - int& linsol_safe_mode_last_iter_switched_on, - double& theta_mu, - double& kappa_mu, - bool& switched) +hiopKKTLinSys* hiopAlgFilterIPMNewton::switch_to_safer_KKT(hiopKKTLinSys* kkt_curr, + const double& mu, + const int& iter_num, + bool& linsol_safe_mode_on, + const int& linsol_safe_mode_max_iters, + int& linsol_safe_mode_last_iter_switched_on, + double& theta_mu, + double& kappa_mu, + bool& switched) { #ifdef HIOP_SPARSE if(linsol_safe_mode_on) { - //attempt switching only when running under "condensed" KKT formulation + // attempt switching only when running under "condensed" KKT formulation auto* kkt_condensed = dynamic_cast(kkt_curr); if(kkt_condensed) { assert(nlp->options->GetString("KKTLinsys") == "condensed"); delete kkt_condensed; - - //allocate the "safer" KKT formulation + + // allocate the "safer" KKT formulation auto* kkt = new hiopKKTLinSysCompressedSparseXDYcYd(nlp); - + switched = true; - - //more aggressive mu reduction (this is safe with the stable KKT above) - theta_mu=1.2; - kappa_mu=0.4; - + + // more aggressive mu reduction (this is safe with the stable KKT above) + theta_mu = 1.2; + kappa_mu = 0.4; + kkt->set_safe_mode(linsol_safe_mode_on); - - pd_perturb_->initialize(nlp); + + pd_perturb_->initialize(nlp); pd_perturb_->set_mu(_mu); kkt->set_PD_perturb_calc(pd_perturb_); - - delete fact_acceptor_; - //use inertia correction just be safe - fact_acceptor_ = new hiopFactAcceptorIC(pd_perturb_, nlp->m_eq()+nlp->m_ineq()); - //fact_acceptor_ = decideAndCreateFactAcceptor(pd_perturb_, nlp, kkt); + + delete fact_acceptor_; + // use inertia correction just be safe + fact_acceptor_ = new hiopFactAcceptorIC(pd_perturb_, nlp->m_eq() + nlp->m_ineq()); + // fact_acceptor_ = decideAndCreateFactAcceptor(pd_perturb_, nlp, kkt); kkt->set_fact_acceptor(fact_acceptor_); - + linsol_safe_mode_last_iter_switched_on = iter_num; - + return kkt; - } // end of if(kkt) + } // end of if(kkt) } #endif @@ -1981,34 +2011,30 @@ hiopAlgFilterIPMNewton::switch_to_safer_KKT(hiopKKTLinSys* kkt_curr, return kkt_curr; } -hiopKKTLinSys* -hiopAlgFilterIPMNewton::switch_to_fast_KKT(hiopKKTLinSys* kkt_curr, - const double& mu, - const int& iter_num, - bool& linsol_safe_mode_on, - int& linsol_safe_mode_max_iters, - int& linsol_safe_mode_last_iter_switched_on, - double& theta_mu, - double& kappa_mu, - bool& switched) +hiopKKTLinSys* hiopAlgFilterIPMNewton::switch_to_fast_KKT(hiopKKTLinSys* kkt_curr, + const double& mu, + const int& iter_num, + bool& linsol_safe_mode_on, + int& linsol_safe_mode_max_iters, + int& linsol_safe_mode_last_iter_switched_on, + double& theta_mu, + double& kappa_mu, + bool& switched) { - assert("speculative"==hiop::tolower(nlp->options->GetString("linsol_mode"))); - + assert("speculative" == hiop::tolower(nlp->options->GetString("linsol_mode"))); + #ifdef HIOP_SPARSE // // Switch to quick mode for condensed // auto* kkt = dynamic_cast(kkt_curr); - //KKT should not be a condensed KKT (this is what we switch to) and we should be under - //the condensed KKT user option - - if(nullptr==kkt && nlp->options->GetString("KKTLinsys") == "condensed") { - - if( linsol_safe_mode_on && - (iter_num - linsol_safe_mode_last_iter_switched_on > linsol_safe_mode_max_iters) && - (mu>1e-6) ) - { + // KKT should not be a condensed KKT (this is what we switch to) and we should be under + // the condensed KKT user option + + if(nullptr == kkt && nlp->options->GetString("KKTLinsys") == "condensed") { + if(linsol_safe_mode_on && (iter_num - linsol_safe_mode_last_iter_switched_on > linsol_safe_mode_max_iters) && + (mu > 1e-6)) { linsol_safe_mode_on = false; delete kkt; @@ -2016,28 +2042,28 @@ hiopAlgFilterIPMNewton::switch_to_fast_KKT(hiopKKTLinSys* kkt_curr, switched = true; kkt->set_safe_mode(linsol_safe_mode_on); - - //let safe mode do more iterations next time we switch to safe mode + + // let safe mode do more iterations next time we switch to safe mode linsol_safe_mode_max_iters *= 2; - //reset last iter safe mode was switched on + // reset last iter safe mode was switched on linsol_safe_mode_last_iter_switched_on = 100000; - - //decrease mu reduction strategies since they are numerically friendlier with the Cholesky solve - theta_mu=1.05; - kappa_mu=0.8; - + + // decrease mu reduction strategies since they are numerically friendlier with the Cholesky solve + theta_mu = 1.05; + kappa_mu = 0.8; + pd_perturb_->initialize(nlp); pd_perturb_->set_mu(mu); kkt->set_PD_perturb_calc(pd_perturb_); - + delete fact_acceptor_; - //use options passed by the user for the IC acceptor + // use options passed by the user for the IC acceptor fact_acceptor_ = decideAndCreateFactAcceptor(pd_perturb_, nlp, kkt); kkt->set_fact_acceptor(fact_acceptor_); return kkt; - } + } } #endif @@ -2045,17 +2071,16 @@ hiopAlgFilterIPMNewton::switch_to_fast_KKT(hiopKKTLinSys* kkt_curr, // if linsol_mode = speculative, linsol_safe_mode_on = false by initialization, and hiop starts from fast mode. // if in safe mode and mu is large, switch back to fast model after couple of iters. // - if(nullptr!=dynamic_cast(nlp)) { - if( linsol_safe_mode_on && - (iter_num - linsol_safe_mode_last_iter_switched_on > linsol_safe_mode_max_iters) && - (mu>1e-6) ) { + if(nullptr != dynamic_cast(nlp)) { + if(linsol_safe_mode_on && (iter_num - linsol_safe_mode_last_iter_switched_on > linsol_safe_mode_max_iters) && + (mu > 1e-6)) { linsol_safe_mode_on = false; switched = true; - - //let safe mode do more iterations next time we switch to safe mode + + // let safe mode do more iterations next time we switch to safe mode linsol_safe_mode_max_iters *= 2; - //reset last iter safe mode was switched on + // reset last iter safe mode was switched on linsol_safe_mode_last_iter_switched_on = 100000; return kkt_curr; @@ -2067,52 +2092,48 @@ hiopAlgFilterIPMNewton::switch_to_fast_KKT(hiopKKTLinSys* kkt_curr, return kkt_curr; } - -hiopFactAcceptor* hiopAlgFilterIPMBase:: -decideAndCreateFactAcceptor(hiopPDPerturbation* p, hiopNlpFormulation* nlp, hiopKKTLinSys* kkt) +hiopFactAcceptor* hiopAlgFilterIPMBase::decideAndCreateFactAcceptor(hiopPDPerturbation* p, + hiopNlpFormulation* nlp, + hiopKKTLinSys* kkt) { std::string strKKT = nlp->options->GetString("fact_acceptor"); - if(strKKT == "inertia_free") - { + if(strKKT == "inertia_free") { #ifdef HIOP_SPARSE if(nullptr != dynamic_cast(kkt)) { // for LinSysCondensedSparse correct inertia is different - assert(nullptr != dynamic_cast(nlp) && - "wrong combination of optimization objects was created"); - return new hiopFactAcceptorInertiaFreeDWD(p, 0); + assert(nullptr != dynamic_cast(nlp) && "wrong combination of optimization objects was created"); + return new hiopFactAcceptorInertiaFreeDWD(p, 0); } #endif - return new hiopFactAcceptorInertiaFreeDWD(p, nlp->m_eq()+nlp->m_ineq()); + return new hiopFactAcceptorInertiaFreeDWD(p, nlp->m_eq() + nlp->m_ineq()); } else { -#ifdef HIOP_SPARSE +#ifdef HIOP_SPARSE #ifdef HIOP_USE_RAJA if(nullptr != dynamic_cast(kkt)) { // for LinSysCondensedSparse correct inertia is different - assert(nullptr != dynamic_cast(nlp) && - "wrong combination of optimization objects was created"); - return new hiopFactAcceptorIC(p, 0); + assert(nullptr != dynamic_cast(nlp) && "wrong combination of optimization objects was created"); + return new hiopFactAcceptorIC(p, 0); } #endif -#endif - return new hiopFactAcceptorIC(p, nlp->m_eq()+nlp->m_ineq()); - - } +#endif + return new hiopFactAcceptorIC(p, nlp->m_eq() + nlp->m_ineq()); + } } hiopSolveStatus hiopAlgFilterIPMNewton::run() { - //hiopNlpFormulation nlp may need an update since user may have changed options and - //reruning with the same hiopAlgFilterIPMNewton instance + // hiopNlpFormulation nlp may need an update since user may have changed options and + // reruning with the same hiopAlgFilterIPMNewton instance nlp->finalizeInitialization(); - //also reload options + // also reload options reload_options(); - //if nlp changed internally, we need to reinitialize `this` - if(it_curr->get_x()->get_size()!=nlp->n() || - //Jac_c->get_local_size_n()!=nlpdc->n_local()) { <- this is prone to racing conditions - _Jac_c->n()!=nlp->n()) { - //size of the nlp changed internally -> reInitializeNlpObjects(); + // if nlp changed internally, we need to reinitialize `this` + if(it_curr->get_x()->get_size() != nlp->n() || + // Jac_c->get_local_size_n()!=nlpdc->n_local()) { <- this is prone to racing conditions + _Jac_c->n() != nlp->n()) { + // size of the nlp changed internally -> reInitializeNlpObjects(); reInitializeNlpObjects(); } resetSolverStatus(); @@ -2120,9 +2141,9 @@ hiopSolveStatus hiopAlgFilterIPMNewton::run() nlp->runStats.initialize(); nlp->runStats.kkt.initialize(); - //todo: have this as option maybe - //number of safe mode iteration to run once linsol mode is switched to on - //double every time linsol mode is switched on + // todo: have this as option maybe + // number of safe mode iteration to run once linsol mode is switched to on + // double every time linsol mode is switched on int linsol_safe_mode_max_iters = 10; //////////////////////////////////////////////////////////////////////////////////// // run baby run @@ -2140,36 +2161,37 @@ hiopSolveStatus hiopAlgFilterIPMNewton::run() nlp->runStats.tmOptimizTotal.start(); - startingProcedure(*it_curr, _f_nlp, *_c, *_d, *_grad_f, *_Jac_c, *_Jac_d); //this also evaluates the nlp - _mu=mu0; + startingProcedure(*it_curr, _f_nlp, *_c, *_d, *_grad_f, *_Jac_c, *_Jac_d); // this also evaluates the nlp + _mu = mu0; - //update log bar + // update log bar logbar->updateWithNlpInfo(*it_curr, _mu, _f_nlp, *_c, *_d, *_grad_f, *_Jac_c, *_Jac_d); nlp->log->printf(hovScalars, "log bar obj: %g\n", logbar->f_logbar); - //recompute the residuals - resid->update(*it_curr,_f_nlp, *_c, *_d,*_grad_f,*_Jac_c,*_Jac_d, *logbar); + // recompute the residuals + resid->update(*it_curr, _f_nlp, *_c, *_d, *_grad_f, *_Jac_c, *_Jac_d, *logbar); nlp->log->write("First residual-------------", *resid, hovIteration); iter_num_ = 0; iter_num_total_ = 0; nlp->runStats.nIter = iter_num_; - bool disableLS = nlp->options->GetString("accept_every_trial_step")=="yes"; + bool disableLS = nlp->options->GetString("accept_every_trial_step") == "yes"; - theta_max = theta_max_fact_*fmax(1.0,resid->get_theta()); - theta_min = theta_min_fact_*fmax(1.0,resid->get_theta()); + theta_max = theta_max_fact_ * fmax(1.0, resid->get_theta()); + theta_min = theta_min_fact_ * fmax(1.0, resid->get_theta()); hiopKKTLinSys* kkt = decideAndCreateLinearSystem(nlp); assert(kkt != NULL); - - if(nlp->options->GetString("normaleqn_regularization_priority")=="dual_first" && nlp->options->GetString("KKTLinsys")=="normaleqn") { - if(nlp->options->GetString("regularization_method")=="randomized") { + + if(nlp->options->GetString("normaleqn_regularization_priority") == "dual_first" && + nlp->options->GetString("KKTLinsys") == "normaleqn") { + if(nlp->options->GetString("regularization_method") == "randomized") { pd_perturb_ = new hiopPDPerturbationDualFirstRand(); } else { pd_perturb_ = new hiopPDPerturbationDualFirstScalar(); } } else { - if(nlp->options->GetString("regularization_method")=="randomized") { + if(nlp->options->GetString("regularization_method") == "randomized") { pd_perturb_ = new hiopPDPerturbationPrimalFirstRand(); } else { pd_perturb_ = new hiopPDPerturbationPrimalFirstScalar(); @@ -2180,7 +2202,7 @@ hiopSolveStatus hiopAlgFilterIPMNewton::run() delete kkt; return SolveInitializationError; } - + kkt->set_PD_perturb_calc(pd_perturb_); kkt->set_logbar_mu(_mu); @@ -2190,10 +2212,12 @@ hiopSolveStatus hiopAlgFilterIPMNewton::run() } fact_acceptor_ = decideAndCreateFactAcceptor(pd_perturb_, nlp, kkt); kkt->set_fact_acceptor(fact_acceptor_); - + _alpha_primal = _alpha_dual = 0; - _err_nlp_optim0=-1.; _err_nlp_feas0=-1.; _err_nlp_complem0=-1; + _err_nlp_optim0 = -1.; + _err_nlp_feas0 = -1.; + _err_nlp_complem0 = -1; // --- Algorithm status `algStatus` ---- //-1 couldn't solve the problem (most likely because small search step. Restauration phase likely needed) @@ -2201,19 +2225,18 @@ hiopSolveStatus hiopAlgFilterIPMNewton::run() // 1 max iter reached // 2 user stop via the iteration callback - bool bret=true; - int lsStatus=-1, lsNum=0; + bool bret = true; + int lsStatus = -1, lsNum = 0; int use_soc = 0; int use_fr = 0; int num_adjusted_slacks = 0; int linsol_safe_mode_last_iter_switched_on = 100000; - bool linsol_safe_mode_on = "stable"==hiop::tolower(nlp->options->GetString("linsol_mode")); - bool linsol_forcequick = "forcequick"==hiop::tolower(nlp->options->GetString("linsol_mode")); - bool elastic_mode_on = nlp->options->GetString("elastic_mode")!="none"; + bool linsol_safe_mode_on = "stable" == hiop::tolower(nlp->options->GetString("linsol_mode")); + bool linsol_forcequick = "forcequick" == hiop::tolower(nlp->options->GetString("linsol_mode")); + bool elastic_mode_on = nlp->options->GetString("elastic_mode") != "none"; solver_status_ = NlpSolve_Pending; while(true) { - bret = evalNlpAndLogErrors(*it_curr, *resid, _mu, @@ -2233,28 +2256,29 @@ hiopSolveStatus hiopAlgFilterIPMNewton::run() return Error_In_User_Function; } - nlp->log-> - printf(hovScalars, - " Nlp errs: pr-infeas:%23.17e dual-infeas:%23.17e comp:%23.17e overall:%23.17e\n cons_violation:%23.17e\n", - _err_nlp_feas, - _err_nlp_optim, - _err_nlp_complem, - _err_nlp, - _err_cons_violation); - nlp->log-> - printf(hovScalars, - " LogBar errs: pr-infeas:%23.17e dual-infeas:%23.17e comp:%23.17e overall:%23.17e\n", - _err_log_feas, - _err_log_optim, - _err_log_complem, - _err_log); + nlp->log->printf( + hovScalars, + " Nlp errs: pr-infeas:%23.17e dual-infeas:%23.17e comp:%23.17e overall:%23.17e\n cons_violation:%23.17e\n", + _err_nlp_feas, + _err_nlp_optim, + _err_nlp_complem, + _err_nlp, + _err_cons_violation); + nlp->log->printf(hovScalars, + " LogBar errs: pr-infeas:%23.17e dual-infeas:%23.17e comp:%23.17e overall:%23.17e\n", + _err_log_feas, + _err_log_optim, + _err_log_complem, + _err_log); outputIteration(lsStatus, lsNum, use_soc, use_fr); - if(_err_nlp_optim0<0) { // && _err_nlp_feas0<0 && _err_nlp_complem0<0 - _err_nlp_optim0=_err_nlp_optim; _err_nlp_feas0=_err_nlp_feas; _err_nlp_complem0=_err_nlp_complem; + if(_err_nlp_optim0 < 0) { // && _err_nlp_feas0<0 && _err_nlp_complem0<0 + _err_nlp_optim0 = _err_nlp_optim; + _err_nlp_feas0 = _err_nlp_feas; + _err_nlp_complem0 = _err_nlp_complem; } - //user callback + // user callback if(!nlp->user_callback_iterate(iter_num_, _f_nlp, logbar->f_logbar, @@ -2265,7 +2289,7 @@ hiopSolveStatus hiopAlgFilterIPMNewton::run() *_c, *_d, *it_curr->get_yc(), - *it_curr->get_yd(), //lambda, + *it_curr->get_yd(), // lambda, _err_nlp_feas, _err_nlp_optim, onenorm_pr_curr_, @@ -2273,70 +2297,85 @@ hiopSolveStatus hiopAlgFilterIPMNewton::run() _alpha_dual, _alpha_primal, lsNum)) { - solver_status_ = User_Stopped; break; + solver_status_ = User_Stopped; + break; } /************************************************* * Termination check - ************************************************/ + ************************************************/ if(checkTermination(_err_nlp, iter_num_, solver_status_)) { break; } - if(NlpSolve_Pending!=solver_status_) break; //failure of the line search or user stopped. + if(NlpSolve_Pending != solver_status_) break; // failure of the line search or user stopped. /************************************************ * update mu and other parameters ************************************************/ - while(_err_log<=kappa_eps * _mu) { - //update mu and tau (fraction-to-boundary) + while(_err_log <= kappa_eps * _mu) { + // update mu and tau (fraction-to-boundary) auto mu_updated = update_log_barrier_params(*it_curr, _mu, _tau, elastic_mode_on, _mu, _tau); if(!mu_updated) { break; } nlp->log->printf(hovScalars, "Iter[%d] barrier params reduced: mu=%g tau=%g\n", iter_num_, _mu, _tau); - - //update only logbar problem and residual (the NLP didn't change) + + // update only logbar problem and residual (the NLP didn't change) logbar->updateWithNlpInfo(*it_curr, _mu, _f_nlp, *_c, *_d, *_grad_f, *_Jac_c, *_Jac_d); - + //! should perform only a partial update since NLP didn't change - resid->update(*it_curr,_f_nlp, *_c, *_d,*_grad_f,*_Jac_c,*_Jac_d, *logbar); - - bret = evalNlpAndLogErrors(*it_curr, *resid, _mu, - _err_nlp_optim, _err_nlp_feas, _err_nlp_complem, _err_nlp, - _err_log_optim, _err_log_feas, _err_log_complem, _err_log, + resid->update(*it_curr, _f_nlp, *_c, *_d, *_grad_f, *_Jac_c, *_Jac_d, *logbar); + + bret = evalNlpAndLogErrors(*it_curr, + *resid, + _mu, + _err_nlp_optim, + _err_nlp_feas, + _err_nlp_complem, + _err_nlp, + _err_log_optim, + _err_log_feas, + _err_log_complem, + _err_log, _err_cons_violation); if(!bret) { solver_status_ = Error_In_User_Function; delete kkt; return Error_In_User_Function; } - nlp->log-> - printf(hovScalars, - " Nlp errs: pr-infeas:%23.17e dual-infeas:%23.17e comp:%23.17e overall:%23.17e\n cons_violation:%23.17e\n", - _err_nlp_feas, _err_nlp_optim, _err_nlp_complem, _err_nlp, _err_cons_violation); - nlp->log-> - printf(hovScalars, - " LogBar errs: pr-infeas:%23.17e dual-infeas:%23.17e comp:%23.17e overall:%23.17e\n", - _err_log_feas, _err_log_optim, _err_log_complem, _err_log); - + nlp->log->printf( + hovScalars, + " Nlp errs: pr-infeas:%23.17e dual-infeas:%23.17e comp:%23.17e overall:%23.17e\n cons_violation:%23.17e\n", + _err_nlp_feas, + _err_nlp_optim, + _err_nlp_complem, + _err_nlp, + _err_cons_violation); + nlp->log->printf(hovScalars, + " LogBar errs: pr-infeas:%23.17e dual-infeas:%23.17e comp:%23.17e overall:%23.17e\n", + _err_log_feas, + _err_log_optim, + _err_log_complem, + _err_log); + filter.reinitialize(theta_max); - + if(elastic_mode_on) { - //reduce mu only once under elastic mode so that bounds do not get tighten too agressively, - //which may result in small steps and invocation of FR + // reduce mu only once under elastic mode so that bounds do not get tighten too agressively, + // which may result in small steps and invocation of FR break; } } - nlp->log->printf(hovScalars, "Iter[%d] logbarObj=%23.17e (mu=%12.5e)\n", iter_num_, logbar->f_logbar,_mu); + nlp->log->printf(hovScalars, "Iter[%d] logbarObj=%23.17e (mu=%12.5e)\n", iter_num_, logbar->f_logbar, _mu); /**************************************************** * Search direction calculation ***************************************************/ kkt->set_logbar_mu(_mu); pd_perturb_->set_mu(_mu); - //this will cache the primal infeasibility norm for (re)use in the dual updating + // this will cache the primal infeasibility norm for (re)use in the dual updating double infeas_nrm_trial; - + nlp->runStats.kkt.start_optimiz_iteration(); // // this is the linear solve (compute_search_direction) loop that iterates at most two times @@ -2353,9 +2392,8 @@ hiopSolveStatus hiopAlgFilterIPMNewton::run() // here linsol mode is speculative or stable // - //see if safe mode needs to be switched off - if("speculative"==hiop::tolower(nlp->options->GetString("linsol_mode"))) { - + // see if safe mode needs to be switched off + if("speculative" == hiop::tolower(nlp->options->GetString("linsol_mode"))) { bool switched; kkt = switch_to_fast_KKT(kkt, _mu, @@ -2368,18 +2406,17 @@ hiopSolveStatus hiopAlgFilterIPMNewton::run() switched); if(switched) { nlp->log->printf(hovWarning, "Switched to the fast KKT linsys\n"); - assert(false==linsol_safe_mode_on); + assert(false == linsol_safe_mode_on); } - + } else { - assert("stable"==hiop::tolower(nlp->options->GetString("linsol_mode"))); + assert("stable" == hiop::tolower(nlp->options->GetString("linsol_mode"))); linsol_safe_mode_on = true; } } } - for(int linsolve=1; linsolve<=2; ++linsolve) { - + for(int linsolve = 1; linsolve <= 2; ++linsolve) { bool switched; kkt = switch_to_safer_KKT(kkt, _mu, @@ -2394,71 +2431,66 @@ hiopSolveStatus hiopAlgFilterIPMNewton::run() nlp->log->printf(hovWarning, "Switched to a stable/safe KKT formulation\n"); } kkt->set_safe_mode(linsol_safe_mode_on); - + // - //update the Hessian and kkt system; usually a matrix factorization occurs + // update the Hessian and kkt system; usually a matrix factorization occurs // if(!kkt->update(it_curr, _grad_f, _Jac_c, _Jac_d, _Hess_Lagr)) { if(linsol_safe_mode_on) { - nlp->log->write("Unrecoverable error in step computation (factorization) [1]. Will exit here.", - hovError); + nlp->log->write("Unrecoverable error in step computation (factorization) [1]. Will exit here.", hovError); delete kkt; return solver_status_ = Err_Step_Computation; } else { - - //failed with 'linsol_mode'='forcequick' means unrecoverable + // failed with 'linsol_mode'='forcequick' means unrecoverable if(linsol_forcequick) { - - nlp->log->write("Unrecoverable error in step computation (factorization) [2]. Will exit here.", - hovError); + nlp->log->write("Unrecoverable error in step computation (factorization) [2]. Will exit here.", hovError); delete kkt; return solver_status_ = Err_Step_Computation; } - //turn on safe mode to repeat linear solve (kkt->update(...) and kkt->compute_directions_w_IR(...) + // turn on safe mode to repeat linear solve (kkt->update(...) and kkt->compute_directions_w_IR(...) //(meaning additional accuracy and stability is requested, possibly from a new kkt class) linsol_safe_mode_on = true; - //linsol_safe_mode_lastiter = iter_num; + // linsol_safe_mode_lastiter = iter_num; nlp->log->printf(hovWarning, - "Requesting additional accuracy and stability from the KKT linear system " - "at iteration %d (safe mode ON) [1]\n", + "Requesting additional accuracy and stability from the KKT linear system " + "at iteration %d (safe mode ON) [1]\n", iter_num_); continue; } - } // end of if(!kkt->update(it_curr, _grad_f, _Jac_c, _Jac_d, _Hess_Lagr)) - - auto* fact_acceptor_ic = dynamic_cast (fact_acceptor_); + } // end of if(!kkt->update(it_curr, _grad_f, _Jac_c, _Jac_d, _Hess_Lagr)) + + auto* fact_acceptor_ic = dynamic_cast(fact_acceptor_); if(fact_acceptor_ic) { bool linsol_safe_mode_on_before = linsol_safe_mode_on; - //compute_search_direction call below updates linsol safe mode flag and linsol_safe_mode_lastiter + // compute_search_direction call below updates linsol safe mode flag and linsol_safe_mode_lastiter if(!compute_search_direction(kkt, linsol_safe_mode_on, linsol_forcequick, iter_num_)) { - if(linsol_safe_mode_on_before || linsol_forcequick) { - //it fails under safe mode, this is fatal + // it fails under safe mode, this is fatal delete kkt; return solver_status_ = Err_Step_Computation; } - // safe mode was turned on in the above call because kkt->compute_directions_w_IR(...) failed + // safe mode was turned on in the above call because kkt->compute_directions_w_IR(...) failed continue; - } + } } else { - auto* fact_acceptor_dwd = dynamic_cast (fact_acceptor_); + auto* fact_acceptor_dwd = dynamic_cast(fact_acceptor_); assert(fact_acceptor_dwd); bool linsol_safe_mode_on_before = linsol_safe_mode_on; - //compute_search_direction call below updates linsol safe mode flag and linsol_safe_mode_lastiter + // compute_search_direction call below updates linsol safe mode flag and linsol_safe_mode_lastiter if(!compute_search_direction_inertia_free(kkt, linsol_safe_mode_on, linsol_forcequick, iter_num_)) { if(linsol_safe_mode_on_before || linsol_forcequick) { - //it failed under safe mode + // it failed under safe mode delete kkt; return solver_status_ = Err_Step_Computation; } // safe mode was turned on in the above call because kkt->compute_directions_w_IR(...) failed or the number // of inertia corrections reached max number allowed continue; - } - } - + } + } + nlp->runStats.kkt.end_optimiz_iteration(); if(perf_report_kkt_) { nlp->log->printf(hovSummary, "%s", nlp->runStats.kkt.get_summary_last_iter().c_str()); @@ -2471,45 +2503,48 @@ hiopSolveStatus hiopAlgFilterIPMNewton::run() ****************************************************************/ nlp->runStats.tmSolverInternal.start(); - //maximum step - bret = it_curr->fractionToTheBdry(*dir, _tau, _alpha_primal, _alpha_dual); assert(bret); - //Step `_alpha_primal` may be reduced when option 'moving_lim_abs' or 'moving_lim_rel' is active. - //Returned bool indicates if reduction was done or not. + // maximum step + bret = it_curr->fractionToTheBdry(*dir, _tau, _alpha_primal, _alpha_dual); + assert(bret); + // Step `_alpha_primal` may be reduced when option 'moving_lim_abs' or 'moving_lim_rel' is active. + // Returned bool indicates if reduction was done or not. bret = ensure_moving_lims(*it_curr, *dir, _alpha_primal); - - double theta = onenorm_pr_curr_ = resid->get_theta(); //at it_curr + + double theta = onenorm_pr_curr_ = resid->get_theta(); // at it_curr double theta_trial; nlp->runStats.tmSolverInternal.stop(); - //lsStatus: line search status for the accepted trial point. Needed to update the filter + // lsStatus: line search status for the accepted trial point. Needed to update the filter //-1 uninitialized (first iteration) - //0 unsuccessful (small step size) - //1 "sufficient decrease" when far away from solution (theta_trial>theta_min) - //2 close to solution but switching condition does not hold; trial accepted based on "sufficient decrease" - //3 close to solution and switching condition is true; trial accepted based on Armijo - lsStatus=0; lsNum=0; + // 0 unsuccessful (small step size) + // 1 "sufficient decrease" when far away from solution (theta_trial>theta_min) + // 2 close to solution but switching condition does not hold; trial accepted based on "sufficient decrease" + // 3 close to solution and switching condition is true; trial accepted based on Armijo + lsStatus = 0; + lsNum = 0; use_soc = 0; use_fr = 0; - bool grad_phi_dx_computed=false, iniStep=true; double grad_phi_dx; + bool grad_phi_dx_computed = false, iniStep = true; + double grad_phi_dx; - //this will cache the primal infeasibility norm for (re)use in the dual updating - infeas_nrm_trial=-1.; + // this will cache the primal infeasibility norm for (re)use in the dual updating + infeas_nrm_trial = -1.; // // linesearch loop // double min_ls_step_size = nlp->options->GetNumeric("min_step_size"); while(true) { - nlp->runStats.tmSolverInternal.start(); //--- + nlp->runStats.tmSolverInternal.start(); //--- // check the step against the minimum step size, but accept small // fractionToTheBdry since these may occur for tight bounds at the first iteration(s) - if(!iniStep && _alpha_primallog->write("Minimum step size reached. The problem may be locally infeasible or the " - "gradient inaccurate. Will try to restore feasibility.", - hovError); + nlp->log->write( + "Minimum step size reached. The problem may be locally infeasible or the " + "gradient inaccurate. Will try to restore feasibility.", + hovError); solver_status_ = Steplength_Too_Small; } else { // (silently) take the step if not under safe mode @@ -2518,11 +2553,12 @@ hiopSolveStatus hiopAlgFilterIPMNewton::run() nlp->runStats.tmSolverInternal.stop(); break; } - bret = it_trial->takeStep_primals(*it_curr, *dir, _alpha_primal, _alpha_dual); assert(bret); + bret = it_trial->takeStep_primals(*it_curr, *dir, _alpha_primal, _alpha_dual); + assert(bret); num_adjusted_slacks = it_trial->compute_safe_slacks(*it_curr, _mu); - nlp->runStats.tmSolverInternal.stop(); //--- + nlp->runStats.tmSolverInternal.stop(); //--- - //evaluate the problem at the trial iterate (functions only) + // evaluate the problem at the trial iterate (functions only) if(!this->evalNlp_funcOnly(*it_trial, _f_nlp_trial, *_c_trial, *_d_trial)) { solver_status_ = Error_In_User_Function; nlp->runStats.tmOptimizTotal.stop(); @@ -2532,16 +2568,22 @@ hiopSolveStatus hiopAlgFilterIPMNewton::run() logbar->updateWithNlpInfo_trial_funcOnly(*it_trial, _f_nlp_trial, *_c_trial, *_d_trial); - nlp->runStats.tmSolverInternal.start(); //--- + nlp->runStats.tmSolverInternal.start(); //--- - //compute infeasibility theta at trial point. + // compute infeasibility theta at trial point. infeas_nrm_trial = theta_trial = resid->compute_nlp_infeasib_onenorm(*it_trial, *_c_trial, *_d_trial); lsNum++; - nlp->log->printf(hovLinesearch, " trial point %d: alphaPrimal=%14.8e barier:(%22.16e)>%15.9e " + nlp->log->printf(hovLinesearch, + " trial point %d: alphaPrimal=%14.8e barier:(%22.16e)>%15.9e " "theta:(%22.16e)>%22.16e\n", - lsNum, _alpha_primal, logbar->f_logbar, logbar->f_logbar_trial, theta, theta_trial); + lsNum, + _alpha_primal, + logbar->f_logbar, + logbar->f_logbar_trial, + theta, + theta_trial); if(disableLS) { nlp->runStats.tmSolverInternal.stop(); @@ -2552,24 +2594,23 @@ hiopSolveStatus hiopAlgFilterIPMNewton::run() lsStatus = accept_line_search_conditions(theta, theta_trial, _alpha_primal, grad_phi_dx_computed, grad_phi_dx); - if(lsStatus>0) { + if(lsStatus > 0) { nlp->runStats.tmSolverInternal.stop(); break; } - // second order correction - if(iniStep && theta<=theta_trial) { + if(iniStep && theta <= theta_trial) { bool grad_phi_dx_soc_computed = false; double grad_phi_dx_soc = 0.0; int num_adjusted_slacks_soc = 0; lsStatus = apply_second_order_correction(kkt, theta, - theta_trial, + theta_trial, grad_phi_dx_soc_computed, grad_phi_dx_soc, num_adjusted_slacks_soc); - if(lsStatus>0) { + if(lsStatus > 0) { num_adjusted_slacks = num_adjusted_slacks_soc; grad_phi_dx_computed = grad_phi_dx_soc_computed; grad_phi_dx = grad_phi_dx_soc; @@ -2582,29 +2623,30 @@ hiopSolveStatus hiopAlgFilterIPMNewton::run() assert(lsStatus == 0); _alpha_primal *= 0.5; - iniStep=false; + iniStep = false; nlp->runStats.tmSolverInternal.stop(); - } //end of while for the linesearch loop + } // end of while for the linesearch loop nlp->runStats.tmSolverInternal.start(); // adjust slacks and bounds if necessary if(num_adjusted_slacks > 0) { - nlp->log->printf(hovWarning, "%d slacks are too small. Adjust corresponding variable slacks!\n", + nlp->log->printf(hovWarning, + "%d slacks are too small. Adjust corresponding variable slacks!\n", num_adjusted_slacks); nlp->adjust_bounds(*it_trial); - //compute infeasibility theta at trial point, since bounds changed --- note that the returned value won't change + // compute infeasibility theta at trial point, since bounds changed --- note that the returned value won't change [[maybe_unused]] const double theta_temp = resid->compute_nlp_infeasib_onenorm(*it_trial, *_c_trial, *_d_trial); #ifndef NDEBUG - if(0==use_soc) { + if(0 == use_soc) { // TODO: check why this assertion fails - //assert(theta_temp == theta_trial); + // assert(theta_temp == theta_trial); } #endif } // post line-search: filter is augmented whenever the switching condition or Armijo rule do not // hold for the trial point that was just accepted - if(nlp->options->GetString("force_resto")=="yes" && !within_FR_ && iter_num_ == 1) { + if(nlp->options->GetString("force_resto") == "yes" && !within_FR_ && iter_num_ == 1) { use_fr = apply_feasibility_restoration(kkt); if(use_fr) { // continue iterations if FR is accepted @@ -2612,50 +2654,47 @@ hiopSolveStatus hiopAlgFilterIPMNewton::run() nlp->runStats.tmSolverInternal.stop(); break; } - } else if(lsStatus==1) { - - //need to check switching cond and Armijo to decide if filter is augmented + } else if(lsStatus == 1) { + // need to check switching cond and Armijo to decide if filter is augmented if(!grad_phi_dx_computed) { grad_phi_dx = logbar->directionalDerivative(*dir); - grad_phi_dx_computed=true; + grad_phi_dx_computed = true; } - //this is the actual switching condition - if(grad_phi_dx<0 && (_alpha_primal*pow(-grad_phi_dx,s_phi) > delta*pow(theta,s_theta))) { - //check armijo - if(logbar->f_logbar_trial <= logbar->f_logbar + eta_phi*_alpha_primal*grad_phi_dx) { - //filter does not change + // this is the actual switching condition + if(grad_phi_dx < 0 && (_alpha_primal * pow(-grad_phi_dx, s_phi) > delta * pow(theta, s_theta))) { + // check armijo + if(logbar->f_logbar_trial <= logbar->f_logbar + eta_phi * _alpha_primal * grad_phi_dx) { + // filter does not change } else { - //Armijo does not hold + // Armijo does not hold filter.add(theta_trial, logbar->f_logbar_trial); } - } else { //switching condition does not hold + } else { // switching condition does not hold filter.add(theta_trial, logbar->f_logbar_trial); } nlp->runStats.tmSolverInternal.stop(); - break; //from the linear solve (compute_search_direction) loop + break; // from the linear solve (compute_search_direction) loop - } else if(lsStatus==2) { - //switching condition does not hold for the trial + } else if(lsStatus == 2) { + // switching condition does not hold for the trial filter.add(theta_trial, logbar->f_logbar_trial); nlp->runStats.tmSolverInternal.stop(); - break; //from the linear solve (compute_search_direction) loop + break; // from the linear solve (compute_search_direction) loop - } else if(lsStatus==3) { - //Armijo (and switching condition) hold, nothing to do. + } else if(lsStatus == 3) { + // Armijo (and switching condition) hold, nothing to do. nlp->runStats.tmSolverInternal.stop(); - break; //from the linear solve (compute_search_direction) loop - - } else if(lsStatus==0) { + break; // from the linear solve (compute_search_direction) loop + } else if(lsStatus == 0) { // - //small step + // small step // if(linsol_safe_mode_on) { - // try to do FR use_fr = apply_feasibility_restoration(kkt); @@ -2668,10 +2707,10 @@ hiopSolveStatus hiopAlgFilterIPMNewton::run() nlp->runStats.tmSolverInternal.stop(); break; } else { - //here false == linsol_safe_mode_on + // here false == linsol_safe_mode_on if(linsol_forcequick) { // this is likely catastrophic as under 'linsol_mode'='forcequick' we deliberately - //won't switch to safe mode + // won't switch to safe mode // // however take the update; // if the update doesn't pass the convergence test, the optimiz. loop will exit @@ -2682,7 +2721,7 @@ hiopSolveStatus hiopAlgFilterIPMNewton::run() } linsol_safe_mode_on = true; - //linsol_safe_mode_lastiter = iter_num; + // linsol_safe_mode_lastiter = iter_num; nlp->log->printf(hovWarning, "Requesting additional accuracy and stability from the KKT linear system " @@ -2693,16 +2732,15 @@ hiopSolveStatus hiopAlgFilterIPMNewton::run() // and stability is requested) nlp->runStats.tmSolverInternal.stop(); continue; - } } else { nlp->runStats.tmSolverInternal.stop(); assert(false && "unrecognized value for lsStatus"); } - } // end of the linear solve (compute_search_direction) loop + } // end of the linear solve (compute_search_direction) loop - if(NlpSolve_Pending!=solver_status_) { - break; //failure of the line search or user stopped. + if(NlpSolve_Pending != solver_status_) { + break; // failure of the line search or user stopped. } nlp->log->printf(hovScalars, @@ -2720,14 +2758,25 @@ hiopSolveStatus hiopAlgFilterIPMNewton::run() // update and adjust the duals // this needs to be done before evalNlp_derivOnly so that the user's NLP functions // get the updated duals - assert(infeas_nrm_trial>=0 && "this should not happen"); - bret = dualsUpdate_->go(*it_curr, *it_trial, - _f_nlp, *_c, *_d, *_grad_f, *_Jac_c, *_Jac_d, *dir, - _alpha_primal, _alpha_dual, _mu, kappa_Sigma, infeas_nrm_trial); + assert(infeas_nrm_trial >= 0 && "this should not happen"); + bret = dualsUpdate_->go(*it_curr, + *it_trial, + _f_nlp, + *_c, + *_d, + *_grad_f, + *_Jac_c, + *_Jac_d, + *dir, + _alpha_primal, + _alpha_dual, + _mu, + kappa_Sigma, + infeas_nrm_trial); assert(bret); nlp->runStats.tmSolverInternal.stop(); - //evaluate derivatives at the trial (and to be accepted) trial point + // evaluate derivatives at the trial (and to be accepted) trial point if(!this->evalNlp_derivOnly(*it_trial, *_grad_f, *_Jac_c, *_Jac_d, *_Hess_Lagr)) { solver_status_ = Error_In_User_Function; nlp->runStats.tmOptimizTotal.stop(); @@ -2736,26 +2785,33 @@ hiopSolveStatus hiopAlgFilterIPMNewton::run() } } - nlp->runStats.tmSolverInternal.start(); //----- - //reuse function values - _f_nlp=_f_nlp_trial; - hiopVector* pvec=_c_trial; _c_trial=_c; _c=pvec; pvec=_d_trial; _d_trial=_d; _d=pvec; + nlp->runStats.tmSolverInternal.start(); //----- + // reuse function values + _f_nlp = _f_nlp_trial; + hiopVector* pvec = _c_trial; + _c_trial = _c; + _c = pvec; + pvec = _d_trial; + _d_trial = _d; + _d = pvec; // - //update current iterate (do a fast swap of the pointers) + // update current iterate (do a fast swap of the pointers) // - hiopIterate* pit=it_curr; it_curr=it_trial; it_trial=pit; + hiopIterate* pit = it_curr; + it_curr = it_trial; + it_trial = pit; nlp->log->printf(hovIteration, "Iter[%d] -> full iterate:", iter_num_); nlp->log->write("", *it_curr, hovIteration); - nlp->runStats.tmSolverInternal.stop(); //----- + nlp->runStats.tmSolverInternal.stop(); //----- - //notify logbar about the changes + // notify logbar about the changes _f_log = _f_nlp; logbar->updateWithNlpInfo(*it_curr, _mu, _f_log, *_c, *_d, *_grad_f, *_Jac_c, *_Jac_d); - //update residual - resid->update(*it_curr,_f_nlp, *_c, *_d,*_grad_f,*_Jac_c,*_Jac_d, *logbar); + // update residual + resid->update(*it_curr, _f_nlp, *_c, *_d, *_grad_f, *_Jac_c, *_Jac_d, *logbar); nlp->log->printf(hovIteration, "Iter[%d] full residual:-------------\n", iter_num_); nlp->log->write("", *resid, hovIteration); @@ -2763,10 +2819,10 @@ hiopSolveStatus hiopAlgFilterIPMNewton::run() nlp->runStats.tmOptimizTotal.stop(); - //solver_status_ contains the termination information + // solver_status_ contains the termination information displayTerminationMsg(); - //user callback + // user callback nlp->user_callback_solution(solver_status_, *it_curr->get_x(), *it_curr->get_zl(), @@ -2783,39 +2839,57 @@ hiopSolveStatus hiopAlgFilterIPMNewton::run() void hiopAlgFilterIPMNewton::outputIteration(int lsStatus, int lsNum, int use_soc, int use_fr) { - if(iter_num_/10*10==iter_num_) + if(iter_num_ / 10 * 10 == iter_num_) nlp->log->printf(hovSummary, "iter objective inf_pr inf_du lg(mu) alpha_du alpha_pr linesrch\n"); - if(lsStatus==-1) - nlp->log->printf(hovSummary, "%4d %14.7e %7.3e %7.3e %6.2f %7.3e %7.3e -(-)\n", - iter_num_total_, _f_nlp/nlp->get_obj_scale(), _err_nlp_feas, _err_nlp_optim, - log10(_mu), _alpha_dual, _alpha_primal); + if(lsStatus == -1) + nlp->log->printf(hovSummary, + "%4d %14.7e %7.3e %7.3e %6.2f %7.3e %7.3e -(-)\n", + iter_num_total_, + _f_nlp / nlp->get_obj_scale(), + _err_nlp_feas, + _err_nlp_optim, + log10(_mu), + _alpha_dual, + _alpha_primal); else { char stepType[2]; - if(lsStatus==1) strcpy(stepType, "s"); - else if(lsStatus==2) strcpy(stepType, "h"); - else if(lsStatus==3) strcpy(stepType, "f"); - else strcpy(stepType, "?"); + if(lsStatus == 1) + strcpy(stepType, "s"); + else if(lsStatus == 2) + strcpy(stepType, "h"); + else if(lsStatus == 3) + strcpy(stepType, "f"); + else + strcpy(stepType, "?"); if(use_soc && lsStatus >= 1 && lsStatus <= 3) { - stepType[0] = (char) ::toupper(stepType[0]); + stepType[0] = (char)::toupper(stepType[0]); } - if(use_fr){ + if(use_fr) { lsNum = 0; strcpy(stepType, "R"); } - nlp->log->printf(hovSummary, "%4d %14.7e %7.3e %7.3e %6.2f %7.3e %7.3e %d(%s)\n", - iter_num_total_, _f_nlp/nlp->get_obj_scale(), _err_nlp_feas, _err_nlp_optim, - log10(_mu), _alpha_dual, _alpha_primal, lsNum, stepType); + nlp->log->printf(hovSummary, + "%4d %14.7e %7.3e %7.3e %6.2f %7.3e %7.3e %d(%s)\n", + iter_num_total_, + _f_nlp / nlp->get_obj_scale(), + _err_nlp_feas, + _err_nlp_optim, + log10(_mu), + _alpha_dual, + _alpha_primal, + lsNum, + stepType); } } bool hiopAlgFilterIPMBase::ensure_moving_lims(const hiopIterate& it, const hiopIterate& dir, double& alpha_pr) { auto moving_lim_rel = nlp->options->GetNumeric("moving_lim_rel"); - if(moving_lim_rel>0) { + if(moving_lim_rel > 0) { const auto alpha_pr_in = alpha_pr; alpha_pr = moving_lim_rel * alpha_pr; nlp->log->printf(hovLinesearch, @@ -2827,11 +2901,11 @@ bool hiopAlgFilterIPMBase::ensure_moving_lims(const hiopIterate& it, const hiopI } auto moving_lim_abs = nlp->options->GetNumeric("moving_lim_abs"); - if(moving_lim_abs>0) { + if(moving_lim_abs > 0) { const auto alpha_pr_in = alpha_pr; auto x_nrm = dir.get_x()->infnorm(); - auto step_nrm = alpha_pr*x_nrm; - if(step_nrm>moving_lim_abs) { + auto step_nrm = alpha_pr * x_nrm; + if(step_nrm > moving_lim_abs) { alpha_pr = moving_lim_abs / step_nrm; nlp->log->printf(hovLinesearch, "Moving lim (absolute, [%7.3e]): step reduced: %7.3e -> %7.3e.\n", @@ -2843,43 +2917,42 @@ bool hiopAlgFilterIPMBase::ensure_moving_lims(const hiopIterate& it, const hiopI nlp->log->printf(hovLinesearch, "Moving lim (absolute, [%7.3e]) satisfied, step (norm %7.3e) not reduced.\n", moving_lim_abs, - step_nrm); + step_nrm); } - } // end of moving lim abs + } // end of moving lim abs return false; } - int hiopAlgFilterIPMBase::accept_line_search_conditions(const double theta_curr, const double theta_trial, const double alpha_primal, - bool &grad_phi_dx_computed, - double &grad_phi_dx) + bool& grad_phi_dx_computed, + double& grad_phi_dx) { int bret = 0; trial_is_rejected_by_filter = false; // Do the cheap, "sufficient progress" test first, before more involved/expensive tests. - // This simple test is good enough when iterate is far away from solution - if(theta_curr>=theta_min) { - - //check the sufficient decrease condition (18) - if(theta_trial<=(1-gamma_theta)*theta_curr || - logbar->f_logbar_trial<=logbar->f_logbar - gamma_phi*theta_curr) { - //trial good to go - nlp->log->printf(hovLinesearchVerb, "Linesearch: accepting based on suff. decrease " + // This simple test is good enough when iterate is far away from solution + if(theta_curr >= theta_min) { + // check the sufficient decrease condition (18) + if(theta_trial <= (1 - gamma_theta) * theta_curr || + logbar->f_logbar_trial <= logbar->f_logbar - gamma_phi * theta_curr) { + // trial good to go + nlp->log->printf(hovLinesearchVerb, + "Linesearch: accepting based on suff. decrease " "(far from solution)\n"); bret = 1; } else { - //there is no sufficient progress + // there is no sufficient progress trial_is_rejected_by_filter = false; bret = 0; return bret; } - - //check filter condition - if(filter.contains(theta_trial,logbar->f_logbar_trial)) { - //it is in the filter, reject this trial point + + // check filter condition + if(filter.contains(theta_trial, logbar->f_logbar_trial)) { + // it is in the filter, reject this trial point trial_is_rejected_by_filter = true; bret = 0; } @@ -2889,70 +2962,67 @@ int hiopAlgFilterIPMBase::accept_line_search_conditions(const double theta_curr, // first compute grad_phi^T d_x if it hasn't already been computed if(!grad_phi_dx_computed) { grad_phi_dx = logbar->directionalDerivative(*dir); - grad_phi_dx_computed=true; + grad_phi_dx_computed = true; } nlp->log->printf(hovLinesearch, "Linesearch: grad_phi_dx = %22.15e\n", grad_phi_dx); // this is the actual switching condition (19) - if(grad_phi_dx<0. && alpha_primal*pow(-grad_phi_dx,s_phi)>delta*pow(theta_curr,s_theta)) { + if(grad_phi_dx < 0. && alpha_primal * pow(-grad_phi_dx, s_phi) > delta * pow(theta_curr, s_theta)) { // test Armijo - if(logbar->f_logbar_trial <= logbar->f_logbar + eta_phi*alpha_primal*grad_phi_dx) { - nlp->log->printf(hovLinesearchVerb, - "Linesearch: accepting based on Armijo (switch cond also passed)\n"); + if(logbar->f_logbar_trial <= logbar->f_logbar + eta_phi * alpha_primal * grad_phi_dx) { + nlp->log->printf(hovLinesearchVerb, "Linesearch: accepting based on Armijo (switch cond also passed)\n"); - //iterate good to go since it satisfies Armijo + // iterate good to go since it satisfies Armijo bret = 3; } else { - //Armijo is not satisfied + // Armijo is not satisfied trial_is_rejected_by_filter = false; bret = 0; return bret; } - //check filter condition - if(filter.contains(theta_trial,logbar->f_logbar_trial)) { - //it is in the filter, reject this trial point + // check filter condition + if(filter.contains(theta_trial, logbar->f_logbar_trial)) { + // it is in the filter, reject this trial point trial_is_rejected_by_filter = true; bret = 0; } return bret; - } else {//switching condition does not hold - - //ok to go with "sufficient progress" condition even when close to solution, provided the - //switching condition is not satisfied - - //check the filter and the sufficient decrease condition (18) - if(theta_trial<=(1-gamma_theta)*theta_curr || - logbar->f_logbar_trial <= logbar->f_logbar - gamma_phi*theta_curr) { - //trial good to go - nlp->log->printf(hovLinesearchVerb, - "Linesearch: accepting based on suff. decrease (switch cond also passed)\n"); - bret=2; + } else { // switching condition does not hold + + // ok to go with "sufficient progress" condition even when close to solution, provided the + // switching condition is not satisfied + + // check the filter and the sufficient decrease condition (18) + if(theta_trial <= (1 - gamma_theta) * theta_curr || + logbar->f_logbar_trial <= logbar->f_logbar - gamma_phi * theta_curr) { + // trial good to go + nlp->log->printf(hovLinesearchVerb, "Linesearch: accepting based on suff. decrease (switch cond also passed)\n"); + bret = 2; } else { - //there is no sufficient progress + // there is no sufficient progress trial_is_rejected_by_filter = false; return bret; } - - //check filter condition - if(filter.contains(theta_trial,logbar->f_logbar_trial)) { - //it is in the filter, reject this trial point + + // check filter condition + if(filter.contains(theta_trial, logbar->f_logbar_trial)) { + // it is in the filter, reject this trial point trial_is_rejected_by_filter = true; bret = 0; } return bret; - } // end of else: switching condition does not hold - assert(0&&"cannot reach here!"); - } //end of else: theta_trialoptions->GetInteger("max_soc_iter"); double kappa_soc = nlp->options->GetNumeric("kappa_soc"); @@ -2963,11 +3033,11 @@ int hiopAlgFilterIPMBase::apply_second_order_correction(hiopKKTLinSys* kkt, if(!soc_dir) { soc_dir = dir->alloc_clone(); - if(nlp->options->GetString("KKTLinsys")=="full") { + if(nlp->options->GetString("KKTLinsys") == "full") { soc_dir->selectPattern(); - } + } c_soc = nlp->alloc_dual_eq_vec(); - d_soc = nlp->alloc_dual_ineq_vec(); + d_soc = nlp->alloc_dual_ineq_vec(); } double theta_trial_last = 0.; @@ -2978,55 +3048,55 @@ int hiopAlgFilterIPMBase::apply_second_order_correction(hiopKKTLinSys* kkt, int num_soc = 0; bool bret = true; int ls_status = 0; - + // set initial c/d for soc c_soc->copyFrom(nlp->get_crhs()); c_soc->axpy(-1.0, *_c); d_soc->copyFrom(*it_curr->get_d()); d_soc->axpy(-1.0, *_d); - - while(num_socscale(alpha_primal_soc); c_soc->axpy(1.0, nlp->get_crhs()); c_soc->axpy(-1.0, *_c_trial); - + d_soc->scale(alpha_primal_soc); d_soc->axpy(1.0, *it_trial->get_d()); d_soc->axpy(-1.0, *_d_trial); - + // compute rhs for soc. Use resid_trial since it hasn't been used - resid_trial->update_soc(*it_curr, *c_soc, *d_soc, *_grad_f,*_Jac_c,*_Jac_d, *logbar); + resid_trial->update_soc(*it_curr, *c_soc, *d_soc, *_grad_f, *_Jac_c, *_Jac_d, *logbar); // solve for search directions - bret = kkt->computeDirections(resid_trial, soc_dir); + bret = kkt->computeDirections(resid_trial, soc_dir); assert(bret); // Compute step size - bret = it_curr->fractionToTheBdry(*soc_dir, _tau, alpha_primal_soc, alpha_dual_soc); + bret = it_curr->fractionToTheBdry(*soc_dir, _tau, alpha_primal_soc, alpha_dual_soc); assert(bret); - + // Compute trial point - bret = it_trial->takeStep_primals(*it_curr, *soc_dir, alpha_primal_soc, alpha_dual_soc); + bret = it_trial->takeStep_primals(*it_curr, *soc_dir, alpha_primal_soc, alpha_dual_soc); assert(bret); num_adjusted_slacks = it_trial->compute_safe_slacks(*it_curr, _mu); - //evaluate the problem at the trial iterate (functions only) + // evaluate the problem at the trial iterate (functions only) if(!this->evalNlp_funcOnly(*it_trial, _f_nlp_trial, *_c_trial, *_d_trial)) { solver_status_ = Error_In_User_Function; return Error_In_User_Function; } logbar->updateWithNlpInfo_trial_funcOnly(*it_trial, _f_nlp_trial, *_c_trial, *_d_trial); - - //compute infeasibility theta at trial point. + + // compute infeasibility theta at trial point. theta_trial = resid_trial->compute_nlp_infeasib_onenorm(*it_trial, *_c_trial, *_d_trial); ls_status = accept_line_search_conditions(theta_curr, theta_trial, _alpha_primal, grad_phi_dx_computed, grad_phi_dx); - if(ls_status>0) { + if(ls_status > 0) { _alpha_primal = alpha_primal_soc; dir->copyFrom(*soc_dir); resid->copyFrom(*resid_trial); @@ -3036,7 +3106,6 @@ int hiopAlgFilterIPMBase::apply_second_order_correction(hiopKKTLinSys* kkt, } } return ls_status; - } bool hiopAlgFilterIPMBase::apply_feasibility_restoration(hiopKKTLinSys* kkt) @@ -3050,11 +3119,11 @@ bool hiopAlgFilterIPMBase::apply_feasibility_restoration(hiopKKTLinSys* kkt) // variables have already been updated inside the above function return true; } - + // continue robust FR hiopNlpFormulation* nlpFR{nullptr}; hiopNlpMDS* nlpMDS = dynamic_cast(nlp); - if (nlpMDS == nullptr) { + if(nlpMDS == nullptr) { hiopNlpSparse* nlpSp = dynamic_cast(nlp); if(nullptr == nlpSp) { hiopNlpDenseConstraints* nlpD = dynamic_cast(nlp); @@ -3081,7 +3150,7 @@ bool hiopAlgFilterIPMBase::apply_feasibility_restoration(hiopKKTLinSys* kkt) it_trial->get_x()->copyFrom(nlp_fr_interface.get_fr_sol_x()); it_trial->get_d()->copyFrom(nlp_fr_interface.get_fr_sol_d()); reset_var_from_fr_sol(kkt, reset_dual = true); - } + } } } else { // this is MDS system @@ -3138,7 +3207,7 @@ bool hiopAlgFilterIPMBase::solve_feasibility_restoration(hiopKKTLinSys* kkt, hio nlpFR.options->SetStringValue("scaling_type", "none"); // set mu0 to be the maximun of the current barrier parameter mu and norm_inf(|c|)*/ - double theta_ref = resid->getInfeasInfNorm(); //at current point, i.e., reference point + double theta_ref = resid->getInfeasInfNorm(); // at current point, i.e., reference point double mu_FR = std::max(_mu, theta_ref); nlpFR.options->SetNumericValue("mu0", mu_FR); @@ -3177,12 +3246,12 @@ bool hiopAlgFilterIPMBase::reset_var_from_fr_sol(hiopKKTLinSys* kkt, bool reset_ nlp->log->printf(hovScalars, "FR: Update slacks and duals from the modified primals.\n"); } // determine other slacks -// it_trial->determineSlacks(); // TODO: adjust small slacks after hard FR? + // it_trial->determineSlacks(); // TODO: adjust small slacks after hard FR? int num_adjusted_slacks = it_trial->compute_safe_slacks(*it_curr, mu0); // adjust small/negative slacks - if(num_adjusted_slacks > 0) { + if(num_adjusted_slacks > 0) { nlp->log->printf(hovWarning, "%d slacks are too small. Adjust corresponding variable slacks!\n", num_adjusted_slacks); nlp->adjust_bounds(*it_trial); } @@ -3216,7 +3285,7 @@ bool hiopAlgFilterIPMBase::reset_var_from_fr_sol(hiopKKTLinSys* kkt, bool reset_ updater = nlp->alloc_duals_lsq_updater(); deleteUpdater = true; } - //this will update yc and yd in it_trial + // this will update yc and yd in it_trial updater->go(*it_trial, *_grad_f, *_Jac_c, *_Jac_d); if(deleteUpdater) { delete updater; @@ -3229,38 +3298,39 @@ bool hiopAlgFilterIPMBase::reset_var_from_fr_sol(hiopKKTLinSys* kkt, bool reset_ // set step size to 1 _alpha_primal = 1.0; _alpha_dual = 1.0; - + return true; } bool hiopAlgFilterIPMBase::solve_soft_feasibility_restoration(hiopKKTLinSys* kkt) { - int max_soft_fr_iter = 10; //nlp->options->GetInteger("max_soft_fr_iter"); - double kappa_f = 0.999; //nlp->options->GetNumeric("kappa_f"); + int max_soft_fr_iter = 10; // nlp->options->GetInteger("max_soft_fr_iter"); + double kappa_f = 0.999; // nlp->options->GetNumeric("kappa_f"); int num_soft_fr = 0; if(max_soft_fr_iter == 0 || kappa_f == 0.0) { return false; } - + // use vectors from second order correction if(!soc_dir) { soc_dir = dir->alloc_clone(); - if(nlp->options->GetString("KKTLinsys")=="full") { + if(nlp->options->GetString("KKTLinsys") == "full") { soc_dir->selectPattern(); - } + } c_soc = nlp->alloc_dual_eq_vec(); - d_soc = nlp->alloc_dual_ineq_vec(); + d_soc = nlp->alloc_dual_ineq_vec(); } // shortcut --- use soc_dir as a temporary solution - hiopIterate *soft_dir = soc_dir; + hiopIterate* soft_dir = soc_dir; - double kkt_err_curr = resid->get_nrmOne_bar_optim() + resid->get_nrmOne_bar_feasib();; + double kkt_err_curr = resid->get_nrmOne_bar_optim() + resid->get_nrmOne_bar_feasib(); + ; double kkt_err_trial; double alpha_primal_soft; double alpha_dual_soft; - double infeas_nrm_soft=0; // to avoid uninitialized use below + double infeas_nrm_soft = 0; // to avoid uninitialized use below bool bret = false; @@ -3273,44 +3343,55 @@ bool hiopAlgFilterIPMBase::solve_soft_feasibility_restoration(hiopKKTLinSys* kkt bret = true; } else { - //evaluate the problem at the trial iterate (functions only) + // evaluate the problem at the trial iterate (functions only) if(!this->evalNlp_funcOnly(*it_trial, _f_nlp_trial, *_c_trial, *_d_trial)) { solver_status_ = Error_In_User_Function; return true; } // compute rhs for soft feasibility restoration. Use resid_trial since it hasn't been used - resid_trial->update(*it_trial, _f_nlp_trial, *_c_trial, *_d_trial, *_grad_f,*_Jac_c,*_Jac_d, *logbar); - bret = kkt->compute_directions_w_IR(resid_trial, soft_dir); - } + resid_trial->update(*it_trial, _f_nlp_trial, *_c_trial, *_d_trial, *_grad_f, *_Jac_c, *_Jac_d, *logbar); + bret = kkt->compute_directions_w_IR(resid_trial, soft_dir); + } assert(bret); // Compute step size - bret = it_curr->fractionToTheBdry(*soft_dir, _tau, alpha_primal_soft, alpha_dual_soft); - alpha_primal_soft = std::min(alpha_primal_soft,alpha_dual_soft); + bret = it_curr->fractionToTheBdry(*soft_dir, _tau, alpha_primal_soft, alpha_dual_soft); + alpha_primal_soft = std::min(alpha_primal_soft, alpha_dual_soft); alpha_dual_soft = alpha_primal_soft; assert(bret); // Compute trial point - bret = it_trial->takeStep_primals(*it_curr, *soft_dir, alpha_primal_soft, alpha_dual_soft); + bret = it_trial->takeStep_primals(*it_curr, *soft_dir, alpha_primal_soft, alpha_dual_soft); assert(bret); - it_trial->determineSlacks(); // TODO: adjust small slacks in soft FR? - - //evaluate the problem at the trial iterate (functions only) + it_trial->determineSlacks(); // TODO: adjust small slacks in soft FR? + + // evaluate the problem at the trial iterate (functions only) if(!this->evalNlp_funcOnly(*it_trial, _f_nlp_trial, *_c_trial, *_d_trial)) { solver_status_ = Error_In_User_Function; return true; } - //update and adjust the duals - bret = dualsUpdate_->go(*it_curr, *it_trial, - _f_nlp_trial, *_c_trial, *_d_trial, *_grad_f, *_Jac_c, *_Jac_d, *soft_dir, - alpha_primal_soft, alpha_dual_soft, _mu, kappa_Sigma, infeas_nrm_soft); + // update and adjust the duals + bret = dualsUpdate_->go(*it_curr, + *it_trial, + _f_nlp_trial, + *_c_trial, + *_d_trial, + *_grad_f, + *_Jac_c, + *_Jac_d, + *soft_dir, + alpha_primal_soft, + alpha_dual_soft, + _mu, + kappa_Sigma, + infeas_nrm_soft); assert(bret); logbar->updateWithNlpInfo_trial_funcOnly(*it_trial, _f_nlp_trial, *_c_trial, *_d_trial); - - //compute primal-dual error at trial point. - resid_trial->update(*it_trial, _f_nlp_trial, *_c_trial, *_d_trial, *_grad_f,*_Jac_c,*_Jac_d, *logbar); + + // compute primal-dual error at trial point. + resid_trial->update(*it_trial, _f_nlp_trial, *_c_trial, *_d_trial, *_grad_f, *_Jac_c, *_Jac_d, *logbar); kkt_err_trial = resid_trial->get_nrmOne_bar_optim() + resid_trial->get_nrmOne_bar_feasib(); // sufficient reduction in the KKT error is not achieved, return @@ -3318,14 +3399,14 @@ bool hiopAlgFilterIPMBase::solve_soft_feasibility_restoration(hiopKKTLinSys* kkt bret = false; break; } - - //check filter condition + + // check filter condition double theta_trial = resid_trial->get_nrmOne_bar_feasib(); - if(filter.contains(theta_trial,logbar->f_logbar_trial)) { - //it is in the filter, reject this trial point and continue the iterates + if(filter.contains(theta_trial, logbar->f_logbar_trial)) { + // it is in the filter, reject this trial point and continue the iterates num_soft_fr++; } else { - // continue the regular iterate from the trial point + // continue the regular iterate from the trial point bret = true; break; } @@ -3342,32 +3423,31 @@ bool hiopAlgFilterIPMBase::compute_search_direction(hiopKKTLinSys* kkt, // solve for search directions // if(!kkt->compute_directions_w_IR(resid, dir)) { - if(linsol_safe_mode_on) { nlp->log->write("Unrecoverable error in step computation (solve)[1]. Will exit here.", hovError); - return false; // will trigger a solver_status_ = Err_Step_Computation; + return false; // will trigger a solver_status_ = Err_Step_Computation; } else { if(linsol_forcequick) { nlp->log->write("Unrecoverable error in step computation (solve)[2]. Will exit here.", hovError); - return false; // will trigger a solver_status_ = Err_Step_Computation; + return false; // will trigger a solver_status_ = Err_Step_Computation; } linsol_safe_mode_on = true; - //linsol_safe_mode_lastiter = iter_num; + // linsol_safe_mode_lastiter = iter_num; nlp->log->printf(hovWarning, - "Requesting additional accuracy and stability from the KKT linear system " - "at iteration %d (safe mode ON) [3]\n", + "Requesting additional accuracy and stability from the KKT linear system " + "at iteration %d (safe mode ON) [3]\n", iter_num); // return false and use safe mode to repeat linear solve (kkt->update(...) and kkt->compute_directions_w_IR(...) // (meaning additional accuracy and stability is requested, possibly from a new kkt class) return false; } - } // end of if(!kkt->compute_directions_w_IR(resid, dir)) + } // end of if(!kkt->compute_directions_w_IR(resid, dir)) - //at this point all is good in terms of searchDirections computations as far as the linear solve - //is concerned; the search direction can be of ascent because some fast factorizations do not - //support inertia calculation; this case will be handled later on in the optimization loop + // at this point all is good in terms of searchDirections computations as far as the linear solve + // is concerned; the search direction can be of ascent because some fast factorizations do not + // support inertia calculation; this case will be handled later on in the optimization loop return true; } @@ -3380,39 +3460,37 @@ bool hiopAlgFilterIPMBase::compute_search_direction_inertia_free(hiopKKTLinSys* size_type num_refact = 0; const size_t max_refactorization = 10u; - while(true) - { + while(true) { // // solve for search directions // if(!kkt->compute_directions_w_IR(resid, dir)) { - if(linsol_safe_mode_on) { nlp->log->write("Unrecoverable error in step computation (solve)[1]. Will exit here.", hovError); - return false; //solver_status_ = Err_Step_Computation; + return false; // solver_status_ = Err_Step_Computation; } else { if(linsol_forcequick) { nlp->log->write("Unrecoverable error in step computation (solve)[2]. Will exit here.", hovError); - return false; //solver_status_ = Err_Step_Computation; + return false; // solver_status_ = Err_Step_Computation; } linsol_safe_mode_on = true; nlp->log->printf(hovWarning, - "Requesting additional accuracy and stability from the KKT linear system " - "at iteration %d (safe mode ON)[4]\n", + "Requesting additional accuracy and stability from the KKT linear system " + "at iteration %d (safe mode ON)[4]\n", iter_num); // return false and use safe mode to repeat linear solve (kkt->update(...) and kkt->compute_directions_w_IR(...) // (meaning additional accuracy and stability is requested, possibly from a new kkt class) return false; } - } // end of if(!kkt->compute_directions_w_IR(resid, dir)) - - //at this point all is good in terms of searchDirections computations as far as the linear solve - //is concerned; the search direction can be of ascent because some fast factorizations do not - //support inertia calculation; this case will be handled later on in this loop + } // end of if(!kkt->compute_directions_w_IR(resid, dir)) + + // at this point all is good in terms of searchDirections computations as far as the linear solve + // is concerned; the search direction can be of ascent because some fast factorizations do not + // support inertia calculation; this case will be handled later on in this loop //( //! todo nopiv inertia calculation )) - + if(kkt->test_direction(dir, _Hess_Lagr)) { break; } else { @@ -3431,4 +3509,4 @@ bool hiopAlgFilterIPMBase::compute_search_direction_inertia_free(hiopKKTLinSys* return true; } -} //end namespace +} // namespace hiop diff --git a/src/Optimization/hiopAlgFilterIPM.hpp b/src/Optimization/hiopAlgFilterIPM.hpp index 10653bbe5..624fe60e9 100644 --- a/src/Optimization/hiopAlgFilterIPM.hpp +++ b/src/Optimization/hiopAlgFilterIPM.hpp @@ -68,11 +68,13 @@ #include "hiopFactAcceptor.hpp" #ifdef HIOP_USE_AXOM -namespace axom { -namespace sidre { -class Group; // forward declaration -} +namespace axom +{ +namespace sidre +{ +class Group; // forward declaration } +} // namespace axom #endif #include "hiopTimer.hpp" @@ -80,7 +82,8 @@ class Group; // forward declaration namespace hiop { -class hiopAlgFilterIPMBase { +class hiopAlgFilterIPMBase +{ public: hiopAlgFilterIPMBase(hiopNlpFormulation* nlp_, const bool within_FR = false); virtual ~hiopAlgFilterIPMBase(); @@ -90,7 +93,7 @@ class hiopAlgFilterIPMBase { /** computes primal-dual point and returns the evaluation of the problem at this point */ virtual int startingProcedure(hiopIterate& it_ini, - double &f, + double& f, hiopVector& c_, hiopVector& d_, hiopVector& grad_, @@ -107,8 +110,8 @@ class hiopAlgFilterIPMBase { /* returns the number of iterations */ int getNumIterations() const; /* returns the logbar object */ - hiopLogBarProblem* get_logbar(){return logbar;} - + hiopLogBarProblem* get_logbar() { return logbar; } + inline hiopNlpFormulation* get_nlp() const { return nlp; } inline hiopIterate* get_it_curr() const { return it_curr; } inline hiopIterate* get_it_trial() const { return it_trial; } @@ -121,9 +124,9 @@ class hiopAlgFilterIPMBase { inline hiopVector* get_c() const { return _c; } inline hiopVector* get_d() const { return _d; } inline hiopResidual* get_resid() const { return resid; } - inline bool filter_contains(const double theta, const double logbar_obj) const - { - return filter.contains(theta, logbar_obj); + inline bool filter_contains(const double theta, const double logbar_obj) const + { + return filter.contains(theta, logbar_obj); } /// Setter for the primal steplength. @@ -131,7 +134,7 @@ class hiopAlgFilterIPMBase { protected: bool evalNlp(hiopIterate& iter, - double &f, + double& f, hiopVector& c_, hiopVector& d_, hiopVector& grad_, @@ -145,7 +148,7 @@ class hiopAlgFilterIPMBase { * to be evaluated at a later time. */ bool evalNlp_noHess(hiopIterate& iter, - double &f, + double& f, hiopVector& c_, hiopVector& d_, hiopVector& grad_, @@ -184,7 +187,7 @@ class hiopAlgFilterIPMBase { /** * Reduces log barrier parameters `mu` and `tau` and returns true if it was possible to reduce them. The - * parameter `mu` may reach its min value and may not be reduced (same for `tau`), in which case the + * parameter `mu` may reach its min value and may not be reduced (same for `tau`), in which case the * method returns false. */ bool update_log_barrier_params(hiopIterate& it, @@ -193,23 +196,25 @@ class hiopAlgFilterIPMBase { const bool& elastic_mode_on, double& mu_new, double& tau_new); + protected: // second order correction virtual int apply_second_order_correction(hiopKKTLinSys* kkt, const double theta_curr, const double theta_trial0, - bool &grad_phi_dx_computed, - double &grad_phi_dx, - int &num_adjusted_slacks); + bool& grad_phi_dx_computed, + double& grad_phi_dx, + int& num_adjusted_slacks); // check if all the line search conditions are accepted or not virtual int accept_line_search_conditions(const double theta_curr, const double theta_trial, const double alpha_primal, - bool &grad_phi_dx_computed, - double &grad_phi_dx); + bool& grad_phi_dx_computed, + double& grad_phi_dx); /// @brief Step-length `alpha_pr` may be reduced when option 'moving_lim_abs' or 'moving_lim_rel' is active. bool ensure_moving_lims(const hiopIterate& it, const hiopIterate& dir, double& alpha_pr); + public: /// @brief do feasibility restoration virtual bool apply_feasibility_restoration(hiopKKTLinSys* kkt); @@ -219,7 +224,7 @@ class hiopAlgFilterIPMBase { virtual void outputIteration(int lsStatus, int lsNum, int use_soc = 0, int use_fr = 0) = 0; - //returns whether the algorithm should stop and set an appropriate solve status + // returns whether the algorithm should stop and set an appropriate solve status bool checkTermination(const double& _err_nlp, const int& iter_num, hiopSolveStatus& status); void displayTerminationMsg(); @@ -228,9 +233,7 @@ class hiopAlgFilterIPMBase { virtual void reload_options(); /// @brief Decides and creates regularization objects based on user options and NLP formulation. - virtual hiopFactAcceptor* decideAndCreateFactAcceptor(hiopPDPerturbation* p, - hiopNlpFormulation* nlp, - hiopKKTLinSys* kkt); + virtual hiopFactAcceptor* decideAndCreateFactAcceptor(hiopPDPerturbation* p, hiopNlpFormulation* nlp, hiopKKTLinSys* kkt); virtual bool compute_search_direction(hiopKKTLinSys* kkt, bool& linsol_safe_mode_on, @@ -246,11 +249,12 @@ class hiopAlgFilterIPMBase { /* Helper method containing all the allocations done by the base algorithm class. * * @note: Should not be virtual nor be overridden since it is called in the constructor. - */ + */ void alloc_alg_objects(); - /// Helper method containing all the deallocations done by the base algorithm class. Avoid overidding it. + /// Helper method containing all the deallocations done by the base algorithm class. Avoid overidding it. void dealloc_alg_objects(); + protected: hiopNlpFormulation* nlp; hiopFilter filter; @@ -263,21 +267,21 @@ class hiopAlgFilterIPMBase { hiopIterate* dir; hiopIterate* soc_dir; - hiopResidual* resid, *resid_trial; + hiopResidual *resid, *resid_trial; - /// Iteration number maintained internally by the algorithm and reset at each solve/run + /// Iteration number maintained internally by the algorithm and reset at each solve/run int iter_num_; /// Total iteration number over multiple solves/restarts using checkpoints. int iter_num_total_; - - double _err_nlp_optim, _err_nlp_feas, _err_nlp_complem;//not scaled by sd, sc, and sc - double _err_nlp_optim0,_err_nlp_feas0,_err_nlp_complem0;//initial errors, not scaled by sd, sc, and sc - double _err_log_optim, _err_log_feas, _err_log_complem;//not scaled by sd, sc, and sc - double _err_nlp, _err_log; //max of the above (scaled) - double _err_cons_violation; // constraint violation (Note: this is slightly different from _err_nlp_feas) - double onenorm_pr_curr_; //one norm of the constraint infeasibility - - //class for updating the duals multipliers + + double _err_nlp_optim, _err_nlp_feas, _err_nlp_complem; // not scaled by sd, sc, and sc + double _err_nlp_optim0, _err_nlp_feas0, _err_nlp_complem0; // initial errors, not scaled by sd, sc, and sc + double _err_log_optim, _err_log_feas, _err_log_complem; // not scaled by sd, sc, and sc + double _err_nlp, _err_log; // max of the above (scaled) + double _err_cons_violation; // constraint violation (Note: this is slightly different from _err_nlp_feas) + double onenorm_pr_curr_; // one norm of the constraint infeasibility + + // class for updating the duals multipliers hiopDualsUpdater* dualsUpdate_; /* Log-barrier problem data @@ -287,58 +291,58 @@ class hiopAlgFilterIPMBase { * such as lower or upper bounds, is in the NlpFormulation */ double _f_nlp, _f_log, _f_nlp_trial, _f_log_trial; - hiopVector *_c,*_d, *_c_trial, *_d_trial; + hiopVector *_c, *_d, *_c_trial, *_d_trial; hiopVector *c_soc, *d_soc; - hiopVector* _grad_f, *_grad_f_trial; //gradient of the log-barrier objective function - hiopMatrix* _Jac_c, *_Jac_c_trial; //Jacobian of c(x), the equality part - hiopMatrix* _Jac_d, *_Jac_d_trial; //Jacobian of d(x), the inequality part + hiopVector *_grad_f, *_grad_f_trial; // gradient of the log-barrier objective function + hiopMatrix *_Jac_c, *_Jac_c_trial; // Jacobian of c(x), the equality part + hiopMatrix *_Jac_d, *_Jac_d_trial; // Jacobian of d(x), the inequality part hiopMatrix* _Hess_Lagr; /** Algorithms's working quantities */ double _mu, _tau, _alpha_primal, _alpha_dual; - //initialized to 1e4*max{1,\theta(x_0)} and used in the filter as an upper acceptability - //limit for infeasibility + // initialized to 1e4*max{1,\theta(x_0)} and used in the filter as an upper acceptability + // limit for infeasibility double theta_max; - //1e-4*max{1,\theta(x_0)} used in the switching condition during the line search + // 1e-4*max{1,\theta(x_0)} used in the switching condition during the line search double theta_min; double theta_max_fact_; double theta_min_fact_; /*** Algorithm's parameters ***/ - double mu0; //intial mu - double kappa_mu; //linear decrease factor in mu - double theta_mu; //exponent for a Mehtrotra-style decrease of mu - double eps_tol; //abs tolerance for the NLP error - double eps_rtol; //rel tolerance for the NLP error - double dual_tol_; //abs tolerance for the dual infeasibility - double cons_tol_; //abs tolerance for the constraint violation - double comp_tol_; //abs tolerance for the complementary conditions - double tau_min; //min value for the fraction-to-the-boundary parameter: tau_k=max{tau_min,1-\mu_k} - double kappa_eps; //tolerance for the barrier problem, relative to mu: error<=kappa_eps*mu - double kappa1,kappa2; //params for default starting point - double p_smax; //threshold for the magnitude of the multipliers used in the error estimation - double gamma_theta, //sufficient progress parameters for the feasibility violation - gamma_phi; //and log barrier objective - double s_theta, //parameters in the switch condition of the linearsearch (eq 19) - s_phi, delta; - double eta_phi; //parameter in the Armijo rule - double kappa_Sigma; //parameter in resetting the duals to guarantee closedness of the - //primal-dual logbar Hessian to the primal logbar Hessian - int duals_update_type;//type of the update for dual multipliers: 0 LSQ (default, recommended - //for quasi-Newton); 1 Newton + double mu0; // intial mu + double kappa_mu; // linear decrease factor in mu + double theta_mu; // exponent for a Mehtrotra-style decrease of mu + double eps_tol; // abs tolerance for the NLP error + double eps_rtol; // rel tolerance for the NLP error + double dual_tol_; // abs tolerance for the dual infeasibility + double cons_tol_; // abs tolerance for the constraint violation + double comp_tol_; // abs tolerance for the complementary conditions + double tau_min; // min value for the fraction-to-the-boundary parameter: tau_k=max{tau_min,1-\mu_k} + double kappa_eps; // tolerance for the barrier problem, relative to mu: error<=kappa_eps*mu + double kappa1, kappa2; // params for default starting point + double p_smax; // threshold for the magnitude of the multipliers used in the error estimation + double gamma_theta, // sufficient progress parameters for the feasibility violation + gamma_phi; // and log barrier objective + double s_theta, // parameters in the switch condition of the linearsearch (eq 19) + s_phi, delta; + double eta_phi; // parameter in the Armijo rule + double kappa_Sigma; // parameter in resetting the duals to guarantee closedness of the + // primal-dual logbar Hessian to the primal logbar Hessian + int duals_update_type; // type of the update for dual multipliers: 0 LSQ (default, recommended + // for quasi-Newton); 1 Newton int max_n_it; - int dualsInitializ; //type of initialization for the duals of constraints: 0 LSQ (default), 1 set to zero - int accep_n_it; //after how many iterations with acceptable tolerance should the alg. stop - double eps_tol_accep;//acceptable tolerance + int dualsInitializ; // type of initialization for the duals of constraints: 0 LSQ (default), 1 set to zero + int accep_n_it; // after how many iterations with acceptable tolerance should the alg. stop + double eps_tol_accep; // acceptable tolerance - //internal flags related to the state of the solver + // internal flags related to the state of the solver hiopSolveStatus solver_status_; int n_accep_iters_; bool trial_is_rejected_by_filter; /* Flag for timing and timing breakdown report for the KKT solve */ bool perf_report_kkt_; - + /* Flag to tell if this is a FR problem */ bool within_FR_; @@ -362,12 +366,12 @@ class hiopAlgFilterIPMQuasiNewton : public hiopAlgFilterIPMBase * @param group a reference to the group where state will be saved to * * @exception std::runtime indicates the group contains a view whose size does not match - * the size of the corresponding HiOp algorithm state variable of parameter. + * the size of the corresponding HiOp algorithm state variable of parameter. * - * @details - * Each state variable of each parameter of HiOp algorithm will be saved in a named - * view within the group. A new view will be created within the group if it does not - * already exist. If it exists, the view must have same number of elements as the + * @details + * Each state variable of each parameter of HiOp algorithm will be saved in a named + * view within the group. A new view will be created within the group if it does not + * already exist. If it exists, the view must have same number of elements as the * as the size of the corresponding state variable. This means that this method will * throw an exception if an existing group is reused to save a problem that changed * sizes since the group was created. @@ -379,20 +383,20 @@ class hiopAlgFilterIPMQuasiNewton : public hiopAlgFilterIPMBase * * @param group a pointer to group containing the a prevously saved HiOp algorithm state. * - * @exception std::runtime indicates the group does not contain a view expected by this + * @exception std::runtime indicates the group does not contain a view expected by this * method or the view's number of elements mismatches the size of the corresponding HiOp * state. The latter can occur if the file was saved with a different number of MPI ranks. * - * @details + * @details * Copies views from the sidre::Group passed as argument to HiOp algorithm's state variables - * and parameters. The group should be created by first calling save_state_to_sidre_group - * for a problem/NLP of the same sizes as the problem for which this method is called. - * The method expects views within the group with certain names. If one such view is not - * found or has a number of elements different than the size of the corresponding HiOp state, - * then a std::runtime_error exception is thrown. The latter can occur when the loading + * and parameters. The group should be created by first calling save_state_to_sidre_group + * for a problem/NLP of the same sizes as the problem for which this method is called. + * The method expects views within the group with certain names. If one such view is not + * found or has a number of elements different than the size of the corresponding HiOp state, + * then a std::runtime_error exception is thrown. The latter can occur when the loading * occurs for a instance of HiOp that is not ran on the same number of MPI ranks used to * save the file. - */ + */ virtual void load_state_from_sidre_group(const ::axom::sidre::Group& group); /** @@ -400,7 +404,7 @@ class hiopAlgFilterIPMQuasiNewton : public hiopAlgFilterIPMBase * * @param path the name of the file * @return true if successful, false otherwise - * + * * @details * Internally, HiOp uses axom::sidre::DataStore and sidre's scalable IO. A detailed * error description is sent to the log if an error or exception is caught. @@ -408,25 +412,25 @@ class hiopAlgFilterIPMQuasiNewton : public hiopAlgFilterIPMBase bool save_state_to_file(const ::std::string& path) noexcept; /** - * @brief Load the state of the algorithm from checkpoint file. + * @brief Load the state of the algorithm from checkpoint file. * * @param path the name of the file to load from * @return true if successful, false otherwise - * - * @details - * The file should contains a axom::sidre::DataStore that was previously saved using - * save_state_to_file(). A detailed error description is sent to the log if an error + * + * @details + * The file should contains a axom::sidre::DataStore that was previously saved using + * save_state_to_file(). A detailed error description is sent to the log if an error * or exception is caught. */ bool load_state_from_file(const ::std::string& path) noexcept; -#endif // HIOP_USE_AXOM +#endif // HIOP_USE_AXOM private: virtual void outputIteration(int lsStatus, int lsNum, int use_soc = 0, int use_fr = 0); -#ifdef HIOP_USE_AXOM +#ifdef HIOP_USE_AXOM ///@brief The options-based logic for saving checkpoint and the call to save_state(). void checkpointing_stuff(); -#endif // HIOP_USE_AXOM +#endif // HIOP_USE_AXOM private: hiopNlpDenseConstraints* nlpdc; @@ -434,15 +438,15 @@ class hiopAlgFilterIPMQuasiNewton : public hiopAlgFilterIPMBase bool load_state_api_called_; private: - hiopAlgFilterIPMQuasiNewton() : hiopAlgFilterIPMBase(NULL) {}; - hiopAlgFilterIPMQuasiNewton(const hiopAlgFilterIPMQuasiNewton& ) : hiopAlgFilterIPMBase(NULL){}; - hiopAlgFilterIPMQuasiNewton& operator=(const hiopAlgFilterIPMQuasiNewton&) {return *this;}; + hiopAlgFilterIPMQuasiNewton() + : hiopAlgFilterIPMBase(NULL) {}; + hiopAlgFilterIPMQuasiNewton(const hiopAlgFilterIPMQuasiNewton&) + : hiopAlgFilterIPMBase(NULL) {}; + hiopAlgFilterIPMQuasiNewton& operator=(const hiopAlgFilterIPMQuasiNewton&) { return *this; }; }; -//for backward compatibility we make 'hiopAlgFilterIPM' name available +// for backward compatibility we make 'hiopAlgFilterIPM' name available typedef hiopAlgFilterIPMQuasiNewton hiopAlgFilterIPM; - - class hiopAlgFilterIPMNewton : public hiopAlgFilterIPMBase { public: @@ -458,25 +462,25 @@ class hiopAlgFilterIPMNewton : public hiopAlgFilterIPMBase virtual hiopKKTLinSys* decideAndCreateLinearSystem(hiopNlpFormulation* nlp); /** - * Switch to the safer (more stable) KKT formulation and linear solver. - * - * This is currently done only for `hiopNlpSparseIneq` NLP formulation. In this case + * Switch to the safer (more stable) KKT formulation and linear solver. + * + * This is currently done only for `hiopNlpSparseIneq` NLP formulation. In this case * `hiopKKTLinSysCondensedSparse` is the quick KKT formulation and `hiopKKTLinSysCompressedSparseXDYcYd` * is the safe KKT formulation. For other combinations of NLP and KKT formulations the method * returns the KKT passed as argument. */ virtual hiopKKTLinSys* switch_to_safer_KKT(hiopKKTLinSys* kkt_curr, - const double& mu, - const int& iter_num, - bool& linsol_safe_mode_on, - const int& linsol_safe_mode_max_iters, - int& linsol_safe_mode_last_iter_switched_on, - double& theta_mu, - double& kappa_mu, - bool& switched); + const double& mu, + const int& iter_num, + bool& linsol_safe_mode_on, + const int& linsol_safe_mode_max_iters, + int& linsol_safe_mode_last_iter_switched_on, + double& theta_mu, + double& kappa_mu, + bool& switched); /** - * Switch to the quick KKT formulation and linear solver is switching conditions are met. + * Switch to the quick KKT formulation and linear solver is switching conditions are met. */ virtual hiopKKTLinSys* switch_to_fast_KKT(hiopKKTLinSys* kkt_curr, const double& mu, @@ -490,11 +494,14 @@ class hiopAlgFilterIPMNewton : public hiopAlgFilterIPMBase /// Overridden method from base class that does some preprocessing specific to Newton solver void reload_options(); + private: - hiopAlgFilterIPMNewton() : hiopAlgFilterIPMBase(NULL) {}; - hiopAlgFilterIPMNewton(const hiopAlgFilterIPMNewton& ) : hiopAlgFilterIPMBase(NULL){}; - hiopAlgFilterIPMNewton& operator=(const hiopAlgFilterIPMNewton&) {return *this;}; + hiopAlgFilterIPMNewton() + : hiopAlgFilterIPMBase(NULL) {}; + hiopAlgFilterIPMNewton(const hiopAlgFilterIPMNewton&) + : hiopAlgFilterIPMBase(NULL) {}; + hiopAlgFilterIPMNewton& operator=(const hiopAlgFilterIPMNewton&) { return *this; }; }; -} //end of namespace +} // namespace hiop #endif diff --git a/src/Optimization/hiopAlgPrimalDecomp.cpp b/src/Optimization/hiopAlgPrimalDecomp.cpp index 9b3eea835..f428d56c9 100644 --- a/src/Optimization/hiopAlgPrimalDecomp.cpp +++ b/src/Optimization/hiopAlgPrimalDecomp.cpp @@ -52,7 +52,6 @@ * */ - #include "hiopAlgPrimalDecomp.hpp" #include "hiopInterfacePrimalDecomp.hpp" #include "hiopLogger.hpp" @@ -65,152 +64,154 @@ using namespace std; namespace hiop { #ifdef HIOP_USE_MPI - /** This struct provides the info necessary for the recourse approximation function - * buffer[n+1] contains both the function value and gradient w.r.t x. - * buffer[0] is the function value and buffer[1:n] the gradient. - * Contains send and receive functionalities for the values in buffer. - */ - struct ReqRecourseApprox +/** This struct provides the info necessary for the recourse approximation function + * buffer[n+1] contains both the function value and gradient w.r.t x. + * buffer[0] is the function value and buffer[1:n] the gradient. + * Contains send and receive functionalities for the values in buffer. + */ +struct ReqRecourseApprox +{ + ReqRecourseApprox() + : ReqRecourseApprox(1) + {} + ReqRecourseApprox(const int& n) + { + n_ = n; + buffer = LinearAlgebraFactory::create_vector("DEFAULT", n_ + 1); + request_ = MPI_REQUEST_NULL; + } + virtual ~ReqRecourseApprox() { delete buffer; } + int test() { - ReqRecourseApprox() : ReqRecourseApprox(1) {} - ReqRecourseApprox(const int& n) - { - n_ = n; - buffer = LinearAlgebraFactory::create_vector("DEFAULT", n_+1); + int mpi_test_flag; + MPI_Status mpi_status; + int ierr = MPI_Test(&request_, &mpi_test_flag, &mpi_status); + assert(MPI_SUCCESS == ierr); + if(mpi_test_flag) { request_ = MPI_REQUEST_NULL; } - virtual ~ReqRecourseApprox() - { - delete buffer; - } - int test() - { - int mpi_test_flag; MPI_Status mpi_status; - int ierr = MPI_Test(&request_, &mpi_test_flag, &mpi_status); - assert(MPI_SUCCESS == ierr); - if (mpi_test_flag) { - request_ = MPI_REQUEST_NULL; - } - return mpi_test_flag; - } + return mpi_test_flag; + } - void wait() { - auto ierr = MPI_Wait(&request_, MPI_STATUS_IGNORE); - assert(MPI_SUCCESS == ierr); - request_ = MPI_REQUEST_NULL; - } + void wait() + { + auto ierr = MPI_Wait(&request_, MPI_STATUS_IGNORE); + assert(MPI_SUCCESS == ierr); + request_ = MPI_REQUEST_NULL; + } - // only receive signal (that computation is finished), no actual functional information - void post_recv_end_signal(int tag, int rank_from, MPI_Comm comm) - { - assert(request_ == MPI_REQUEST_NULL); - int recv_sign = 0; - int ierr = MPI_Irecv(&recv_sign, 1, MPI_INT, rank_from, tag, comm, &request_); - assert(MPI_SUCCESS == ierr); - } - // only sende signal (that computation is finished), no actual functional information - void post_send_end_signal(int tag, int rank_to, MPI_Comm comm) - { - assert(request_ == MPI_REQUEST_NULL); - int send_sign = 0; - int ierr = MPI_Isend(&send_sign, 1, MPI_INT, rank_to, tag, comm, &request_); - assert(MPI_SUCCESS == ierr); - } + // only receive signal (that computation is finished), no actual functional information + void post_recv_end_signal(int tag, int rank_from, MPI_Comm comm) + { + assert(request_ == MPI_REQUEST_NULL); + int recv_sign = 0; + int ierr = MPI_Irecv(&recv_sign, 1, MPI_INT, rank_from, tag, comm, &request_); + assert(MPI_SUCCESS == ierr); + } + // only sende signal (that computation is finished), no actual functional information + void post_send_end_signal(int tag, int rank_to, MPI_Comm comm) + { + assert(request_ == MPI_REQUEST_NULL); + int send_sign = 0; + int ierr = MPI_Isend(&send_sign, 1, MPI_INT, rank_to, tag, comm, &request_); + assert(MPI_SUCCESS == ierr); + } - void post_recv(int tag, int rank_from, MPI_Comm comm) - { - assert(request_ == MPI_REQUEST_NULL); - double* buffer_arr = buffer->local_data(); - int ierr = MPI_Irecv(buffer_arr, n_+1, MPI_DOUBLE, rank_from, tag, comm, &request_); - assert(MPI_SUCCESS == ierr); - } - void post_send(int tag, int rank_to, MPI_Comm comm) - { - assert(request_ == MPI_REQUEST_NULL); - double* buffer_arr = buffer->local_data(); - int ierr = MPI_Isend(buffer_arr, n_+1, MPI_DOUBLE, rank_to, tag, comm, &request_); - assert(MPI_SUCCESS == ierr); - } - double value(){return buffer->local_data()[0];} - void set_value(const double v){buffer->local_data()[0]=v;} - double grad(int i){return buffer->local_data()[i+1];} - void set_grad(const double* g) - { - buffer->copyFromStarting(1,g,n_); - } + void post_recv(int tag, int rank_from, MPI_Comm comm) + { + assert(request_ == MPI_REQUEST_NULL); + double* buffer_arr = buffer->local_data(); + int ierr = MPI_Irecv(buffer_arr, n_ + 1, MPI_DOUBLE, rank_from, tag, comm, &request_); + assert(MPI_SUCCESS == ierr); + } + void post_send(int tag, int rank_to, MPI_Comm comm) + { + assert(request_ == MPI_REQUEST_NULL); + double* buffer_arr = buffer->local_data(); + int ierr = MPI_Isend(buffer_arr, n_ + 1, MPI_DOUBLE, rank_to, tag, comm, &request_); + assert(MPI_SUCCESS == ierr); + } + double value() { return buffer->local_data()[0]; } + void set_value(const double v) { buffer->local_data()[0] = v; } + double grad(int i) { return buffer->local_data()[i + 1]; } + void set_grad(const double* g) { buffer->copyFromStarting(1, g, n_); } - MPI_Request request_; - private: - int n_; - hiopVector* buffer; - }; + MPI_Request request_; - /** This struct is used to post receive and request for contingency - * index that is to be solved by the solver ranks. - */ - struct ReqContingencyIdx - { - ReqContingencyIdx() : ReqContingencyIdx(-1) {} - ReqContingencyIdx(const int& idx_) +private: + int n_; + hiopVector* buffer; +}; + +/** This struct is used to post receive and request for contingency + * index that is to be solved by the solver ranks. + */ +struct ReqContingencyIdx +{ + ReqContingencyIdx() + : ReqContingencyIdx(-1) + {} + ReqContingencyIdx(const int& idx_) : request_(MPI_REQUEST_NULL) - { - idx=idx_; - } + { + idx = idx_; + } - int test() { - int mpi_test_flag; MPI_Status mpi_status; - int ierr = MPI_Test(&request_, &mpi_test_flag, &mpi_status); - assert(MPI_SUCCESS == ierr); - if (mpi_test_flag) { - request_ = MPI_REQUEST_NULL; - } - return mpi_test_flag; - } - void wait() { - int ierr = MPI_Wait(&request_, MPI_STATUS_IGNORE); - assert(MPI_SUCCESS == ierr); + int test() + { + int mpi_test_flag; + MPI_Status mpi_status; + int ierr = MPI_Test(&request_, &mpi_test_flag, &mpi_status); + assert(MPI_SUCCESS == ierr); + if(mpi_test_flag) { request_ = MPI_REQUEST_NULL; } - void post_recv(int tag, int rank_from, MPI_Comm comm) - { - assert(request_ == MPI_REQUEST_NULL); - int ierr = MPI_Irecv(&idx, 1, MPI_INT, rank_from, tag, comm, &request_); - assert(MPI_SUCCESS == ierr); - } - void post_send(int tag, int rank_to, MPI_Comm comm) - { - assert(request_ == MPI_REQUEST_NULL); - int ierr = MPI_Isend(&idx, 1, MPI_INT, rank_to, tag, comm, &request_); - assert(MPI_SUCCESS == ierr); - } - int value(){return idx;} - void set_idx(const int& i){idx = i;} - MPI_Request request_; - private: - int idx; - }; -#endif - + return mpi_test_flag; + } + void wait() + { + int ierr = MPI_Wait(&request_, MPI_STATUS_IGNORE); + assert(MPI_SUCCESS == ierr); + request_ = MPI_REQUEST_NULL; + } + void post_recv(int tag, int rank_from, MPI_Comm comm) + { + assert(request_ == MPI_REQUEST_NULL); + int ierr = MPI_Irecv(&idx, 1, MPI_INT, rank_from, tag, comm, &request_); + assert(MPI_SUCCESS == ierr); + } + void post_send(int tag, int rank_to, MPI_Comm comm) + { + assert(request_ == MPI_REQUEST_NULL); + int ierr = MPI_Isend(&idx, 1, MPI_INT, rank_to, tag, comm, &request_); + assert(MPI_SUCCESS == ierr); + } + int value() { return idx; } + void set_idx(const int& i) { idx = i; } + MPI_Request request_; +private: + int idx; +}; +#endif -hiopAlgPrimalDecomposition::HessianApprox:: -HessianApprox(hiopInterfacePriDecProblem* priDecProb, - hiopOptions* options_pridec, - MPI_Comm comm_world) - : HessianApprox(-1, priDecProb, options_pridec, comm_world) +hiopAlgPrimalDecomposition::HessianApprox::HessianApprox(hiopInterfacePriDecProblem* priDecProb, + hiopOptions* options_pridec, + MPI_Comm comm_world) + : HessianApprox(-1, priDecProb, options_pridec, comm_world) { comm_world_ = comm_world; log_ = new hiopLogger(options_, stdout, 0, comm_world); } -hiopAlgPrimalDecomposition::HessianApprox:: -HessianApprox(const int& n, - [[maybe_unused]] hiopInterfacePriDecProblem* priDecProb, - hiopOptions* options_pridec, - MPI_Comm comm_world) - : options_(options_pridec), comm_world_(comm_world) +hiopAlgPrimalDecomposition::HessianApprox::HessianApprox(const int& n, + [[maybe_unused]] hiopInterfacePriDecProblem* priDecProb, + hiopOptions* options_pridec, + MPI_Comm comm_world) + : options_(options_pridec), + comm_world_(comm_world) { - n_=n; + n_ = n; fkm1 = 1e20; fk = 1e20; fkm1_lin = 1e20; @@ -222,18 +223,17 @@ HessianApprox(const int& n, ykm1 = xkm1->alloc_clone(); // g_{k-1} gkm1 = xkm1->alloc_clone(); - + comm_world_ = comm_world; log_ = new hiopLogger(options_, stdout, 0, comm_world_); } -hiopAlgPrimalDecomposition::HessianApprox:: -HessianApprox(const int& n, - const double ratio, - hiopInterfacePriDecProblem* priDecProb, - hiopOptions* options_pridec, - MPI_Comm comm_world) - : HessianApprox(n, priDecProb, options_pridec, comm_world) +hiopAlgPrimalDecomposition::HessianApprox::HessianApprox(const int& n, + const double ratio, + hiopInterfacePriDecProblem* priDecProb, + hiopOptions* options_pridec, + MPI_Comm comm_world) + : HessianApprox(n, priDecProb, options_pridec, comm_world) { ratio_ = ratio; } @@ -244,207 +244,196 @@ hiopAlgPrimalDecomposition::HessianApprox::~HessianApprox() delete xkm1; delete skm1; delete ykm1; - delete gkm1; + delete gkm1; } /** n_ is the dimension of x, hence the dimension of g_k, skm1, etc */ -void hiopAlgPrimalDecomposition::HessianApprox::set_n(const int n) -{ - n_ = n; -} +void hiopAlgPrimalDecomposition::HessianApprox::set_n(const int n) { n_ = n; } -void hiopAlgPrimalDecomposition::HessianApprox:: -set_xkm1(const hiopVector& xk) +void hiopAlgPrimalDecomposition::HessianApprox::set_xkm1(const hiopVector& xk) { - if(xkm1==NULL) { - assert(n_!=-1); + if(xkm1 == NULL) { + assert(n_ != -1); xkm1 = LinearAlgebraFactory::create_vector(options_->GetString("mem_space"), n_); } else { xkm1->copyFromStarting(0, xk.local_data_const(), n_); } } -void hiopAlgPrimalDecomposition::HessianApprox:: -set_gkm1(const hiopVector& grad) +void hiopAlgPrimalDecomposition::HessianApprox::set_gkm1(const hiopVector& grad) { - if(gkm1==NULL) { - assert(n_!=-1); + if(gkm1 == NULL) { + assert(n_ != -1); gkm1 = LinearAlgebraFactory::create_vector(options_->GetString("mem_space"), n_); } else { gkm1->copyFromStarting(0, grad.local_data_const(), n_); } } -void hiopAlgPrimalDecomposition::HessianApprox:: -initialize(const double f_val, const hiopVector& xk, const hiopVector& grad) +void hiopAlgPrimalDecomposition::HessianApprox::initialize(const double f_val, const hiopVector& xk, const hiopVector& grad) { fk = f_val; - if(xkm1==NULL) { - assert(n_!=-1); + if(xkm1 == NULL) { + assert(n_ != -1); xkm1 = LinearAlgebraFactory::create_vector(options_->GetString("mem_space"), n_); } else { xkm1->copyFromStarting(0, xk.local_data_const(), n_); } - if(gkm1==NULL) { - assert(n_!=-1); + if(gkm1 == NULL) { + assert(n_ != -1); gkm1 = LinearAlgebraFactory::create_vector(options_->GetString("mem_space"), n_); } else { gkm1->copyFromStarting(0, grad.local_data_const(), n_); } - if(skm1==NULL) { + if(skm1 == NULL) { skm1 = LinearAlgebraFactory::create_vector(options_->GetString("mem_space"), n_); - assert(n_!=-1); + assert(n_ != -1); skm1->copyFromStarting(0, xk.local_data_const(), n_); } - if(ykm1==NULL) { + if(ykm1 == NULL) { ykm1 = LinearAlgebraFactory::create_vector(options_->GetString("mem_space"), n_); - assert(n_!=-1); + assert(n_ != -1); ykm1->copyFromStarting(0, xk.local_data_const(), n_); } } - -void hiopAlgPrimalDecomposition::HessianApprox:: -update_hess_coeff(const hiopVector& xk, - const hiopVector& gk, - const double& f_val) + +void hiopAlgPrimalDecomposition::HessianApprox::update_hess_coeff(const hiopVector& xk, + const hiopVector& gk, + const double& f_val) { fkm1 = fk; fk = f_val; - assert(skm1!=NULL && ykm1!=NULL); + assert(skm1 != NULL && ykm1 != NULL); - assert(xk.get_local_size()==skm1->get_local_size()); + assert(xk.get_local_size() == skm1->get_local_size()); skm1->copyFrom(xk); skm1->axpy(-1.0, *xkm1); ykm1->copyFrom(gk); ykm1->axpy(-1.0, *gkm1); - - assert(xkm1->get_local_size()==xk.get_local_size()); + + assert(xkm1->get_local_size() == xk.get_local_size()); xkm1->copyFrom(xk); - fkm1_lin = gkm1->dotProductWith(*skm1); + fkm1_lin = gkm1->dotProductWith(*skm1); gkm1->copyFrom(gk); - //update_ratio(); //update ratio relies on gk not gkm1 + // update_ratio(); //update ratio relies on gk not gkm1 } - + void hiopAlgPrimalDecomposition::HessianApprox::update_ratio() { - double rk = fkm1+fkm1_lin; + double rk = fkm1 + fkm1_lin; - rk += 0.5*alpha_*(skm1->twonorm())*(skm1->twonorm()); + rk += 0.5 * alpha_ * (skm1->twonorm()) * (skm1->twonorm()); // printf("recourse estimate inside HessianApprox %18.12e\n",rk); - double rho_k = (fkm1-fk)/(fkm1-rk); - + double rho_k = (fkm1 - fk) / (fkm1 - rk); + log_->printf(hovSummary, " previous val %18.12e,", fkm1); log_->printf(hovSummary, " real val %18.12e,", fk); log_->printf(hovSummary, " predicted val %18.12e,", rk); log_->printf(hovSummary, " rho_k %18.12e\n", rho_k); - + // a measure for when alpha should be decreasing (in addition to being good approximation) - double quanorm = 0.; double gradnorm=0.; + double quanorm = 0.; + double gradnorm = 0.; quanorm += skm1->dotProductWith(*skm1); - gradnorm += fkm1_lin; // equivalent to gradnorm += gkm1->dotProductWith(*skm1); - quanorm = alpha_*quanorm; + gradnorm += fkm1_lin; // equivalent to gradnorm += gkm1->dotProductWith(*skm1); + quanorm = alpha_ * quanorm; - double alpha_g_ratio = quanorm/fabs(gradnorm); - if(ver_ >=outlevel2) { - printf("alpha norm ratio %18.12e",alpha_g_ratio); + double alpha_g_ratio = quanorm / fabs(gradnorm); + if(ver_ >= outlevel2) { + printf("alpha norm ratio %18.12e", alpha_g_ratio); } - //using a trust region criteria for adjusting ratio - update_ratio_tr(rho_k,fkm1, fk, alpha_g_ratio, ratio_); -} - -void hiopAlgPrimalDecomposition::HessianApprox:: -update_ratio_tr(const double rhok, - const double rkm1, - const double rk, - const double alpha_g_ratio, - double& alpha_ratio) + // using a trust region criteria for adjusting ratio + update_ratio_tr(rho_k, fkm1, fk, alpha_g_ratio, ratio_); +} + +void hiopAlgPrimalDecomposition::HessianApprox::update_ratio_tr(const double rhok, + const double rkm1, + const double rk, + const double alpha_g_ratio, + double& alpha_ratio) { - if(rhok>0 && rhok < 1/4. && (rkm1-rk>0)) { - alpha_ratio = alpha_ratio/0.75; + if(rhok > 0 && rhok < 1 / 4. && (rkm1 - rk > 0)) { + alpha_ratio = alpha_ratio / 0.75; log_->printf(hovSummary, "increasing alpha ratio or increasing minimum for quadratic coefficient\n"); - } else if(rhok<0 && (rkm1-rk)<0) { - alpha_ratio = alpha_ratio/0.75; + } else if(rhok < 0 && (rkm1 - rk) < 0) { + alpha_ratio = alpha_ratio / 0.75; log_->printf(hovSummary, "increasing alpha ratio or increasing minimum for quadratic coefficient\n"); } else { - if(rhok > 0.75 && rhok<1.333 &&(rkm1-rk>0) && alpha_g_ratio>0.1) { + if(rhok > 0.75 && rhok < 1.333 && (rkm1 - rk > 0) && alpha_g_ratio > 0.1) { alpha_ratio *= 0.75; log_->printf(hovSummary, "decreasing alpha ratio or decreasing minimum for quadratic coefficient\n"); - } else if(rhok>1.333 && (rkm1-rk<0)) { - alpha_ratio = alpha_ratio/0.75; + } else if(rhok > 1.333 && (rkm1 - rk < 0)) { + alpha_ratio = alpha_ratio / 0.75; log_->printf(hovSummary, "recourse increasing and increased more in real contingency, so increasing alpha\n"); } } - if((rhok>0 &&rhok<1/8. && (rkm1-rk>0)) || (rhok<0 && rkm1-rk<0 )) { + if((rhok > 0 && rhok < 1 / 8. && (rkm1 - rk > 0)) || (rhok < 0 && rkm1 - rk < 0)) { log_->printf(hovWarning, "This step is rejected.\n"); - //sol_base = solm1; // when rejected, return to the previous iteration point. this mechanism has yet to be implemented. - //f = fm1; - //gradf = gkm1; + // sol_base = solm1; // when rejected, return to the previous iteration point. this mechanism has yet to be implemented. + // f = fm1; + // gradf = gkm1; } - alpha_ratio = std::max(ratio_min,alpha_ratio); - alpha_ratio = std::min(ratio_max,alpha_ratio); + alpha_ratio = std::max(ratio_min, alpha_ratio); + alpha_ratio = std::min(ratio_max, alpha_ratio); } -void hiopAlgPrimalDecomposition::HessianApprox:: -update_ratio(const double base_v, const double base_vm1) +void hiopAlgPrimalDecomposition::HessianApprox::update_ratio(const double base_v, const double base_vm1) { - double rk = fkm1+fkm1_lin; + double rk = fkm1 + fkm1_lin; + + rk += 0.5 * alpha_ * (skm1->twonorm()) * (skm1->twonorm()); // include basecase objective + // printf("recourse estimate inside HessianApprox %18.12e\n",rk); + double rho_k = (base_vm1 + fkm1 - fk - base_v) / (fkm1 + base_vm1 - rk - base_v); - rk += 0.5*alpha_*(skm1->twonorm())*(skm1->twonorm()); // include basecase objective - //printf("recourse estimate inside HessianApprox %18.12e\n",rk); - double rho_k = (base_vm1+fkm1-fk-base_v)/(fkm1+base_vm1-rk-base_v); - log_->printf(hovSummary, "previous base %18.12e,", base_vm1); - log_->printf(hovSummary, " current base %18.12e,", base_v); + log_->printf(hovSummary, " current base %18.12e,", base_v); log_->printf(hovSummary, " previous val %18.12e,", fkm1); log_->printf(hovSummary, " real val %18.12e,", fk); log_->printf(hovSummary, " predicted val %18.12e,", rk); log_->printf(hovSummary, " rho_k %18.12e\n", rho_k); - + // using a trust region criteria for adjusting ratio update_ratio_tr(rho_k, ratio_); - //TODO: give choice of two update rules - //tr_ratio_ = 1.0; - //update_ratio_tr(rho_k, tr_ratio_); + // TODO: give choice of two update rules + // tr_ratio_ = 1.0; + // update_ratio_tr(rho_k, tr_ratio_); } - -void hiopAlgPrimalDecomposition::HessianApprox:: -update_ratio_tr(const double rhok, - double& alpha_ratio) +void hiopAlgPrimalDecomposition::HessianApprox::update_ratio_tr(const double rhok, double& alpha_ratio) { - if(rhok < 1/4. ) { - alpha_ratio = alpha_ratio/0.75; - log_->printf(hovSummary,"increasing alpha ratio or increasing minimum for quadratic coefficient\n"); + if(rhok < 1 / 4.) { + alpha_ratio = alpha_ratio / 0.75; + log_->printf(hovSummary, "increasing alpha ratio or increasing minimum for quadratic coefficient\n"); } else { - if(rhok > 0.75) { + if(rhok > 0.75) { alpha_ratio *= 0.75; - log_->printf(hovSummary,"decreasing alpha ratio or decreasing minimum for quadratic coefficient\n"); + log_->printf(hovSummary, "decreasing alpha ratio or decreasing minimum for quadratic coefficient\n"); } } - if(rhok<1/8.) { - log_->printf(hovSummary,"This step needs to be rejected.\n"); - //sol_base = solm1; - //f = fm1; - //gradf = gkm1; // rejection mechanism to be implemented + if(rhok < 1 / 8.) { + log_->printf(hovSummary, "This step needs to be rejected.\n"); + // sol_base = solm1; + // f = fm1; + // gradf = gkm1; // rejection mechanism to be implemented } - alpha_ratio = std::max(ratio_min,alpha_ratio); - alpha_ratio = std::min(ratio_max,alpha_ratio); + alpha_ratio = std::max(ratio_min, alpha_ratio); + alpha_ratio = std::min(ratio_max, alpha_ratio); } double hiopAlgPrimalDecomposition::HessianApprox::get_alpha_BB() { double temp1 = 0.; double temp2 = 0.; - + temp1 = skm1->dotProductWith(*skm1); temp2 = skm1->dotProductWith(*ykm1); - - alpha_ = temp2/temp1; - alpha_ = std::max(alpha_min,alpha_); - alpha_ = std::min(alpha_max,alpha_); + + alpha_ = temp2 / temp1; + alpha_ = std::max(alpha_min, alpha_); + alpha_ = std::min(alpha_max, alpha_); // printf("alpha max %18.12e\n",alpha_max); return alpha_; } @@ -454,23 +443,23 @@ double hiopAlgPrimalDecomposition::HessianApprox::get_alpha_f(const hiopVector& double temp3 = 0.; // call update first, gkm1 is already gk - temp3 = gk.twonorm()*gk.twonorm(); + temp3 = gk.twonorm() * gk.twonorm(); - alpha_ = temp3/2.0/fk; + alpha_ = temp3 / 2.0 / fk; // printf("alpha check %18.12e\n",temp3/2.0); alpha_ *= ratio_; - alpha_ = std::max(alpha_min,alpha_); - alpha_ = std::min(alpha_max,alpha_); - log_->printf(hovScalars,"alpha ratio %18.12e\n",ratio_); + alpha_ = std::max(alpha_min, alpha_); + alpha_ = std::min(alpha_max, alpha_); + log_->printf(hovScalars, "alpha ratio %18.12e\n", ratio_); return alpha_; } double hiopAlgPrimalDecomposition::HessianApprox::get_alpha_tr() { alpha_ *= tr_ratio_; - alpha_ = std::max(alpha_min,alpha_); - alpha_ = std::min(alpha_max,alpha_); - log_->printf(hovScalars,"alpha ratio %18.12e\n",ratio_); + alpha_ = std::max(alpha_min, alpha_); + alpha_ = std::min(alpha_max, alpha_); + log_->printf(hovScalars, "alpha ratio %18.12e\n", ratio_); return alpha_; } @@ -481,88 +470,75 @@ double hiopAlgPrimalDecomposition::HessianApprox::check_convergence_grad(const h double temp2 = 0.; double temp3 = 0.; double temp4 = 0.; - + hiopVector* temp; - temp = LinearAlgebraFactory::create_vector(options_->GetString("mem_space"), skm1->get_local_size()); - temp->copyFrom(*skm1); + temp = LinearAlgebraFactory::create_vector(options_->GetString("mem_space"), skm1->get_local_size()); + temp->copyFrom(*skm1); temp->scale(-alpha_); - temp4 = temp->twonorm()*temp->twonorm(); - + temp4 = temp->twonorm() * temp->twonorm(); + temp3 = ykm1->twonorm(); - temp->axpy(1.0,*ykm1); + temp->axpy(1.0, *ykm1); temp1 = temp->twonorm(); temp2 = gk.twonorm(); - double convg = temp1/temp2; - log_->printf(hovScalars, "alpha %18.12e \n",alpha_); + double convg = temp1 / temp2; + log_->printf(hovScalars, "alpha %18.12e \n", alpha_); log_->printf(hovScalars, "temp1 %18.12e,", temp1); log_->printf(hovScalars, " temp2 %18.12e,", temp2); log_->printf(hovScalars, " temp3 %18.12e,", temp3); log_->printf(hovScalars, " temp4 %18.12e\n", temp4); - + delete temp; return convg; } // stopping criteria based on function value change of both basecase and recourse -double hiopAlgPrimalDecomposition::HessianApprox:: -check_convergence_fcn(const double base_v, const double base_vm1) +double hiopAlgPrimalDecomposition::HessianApprox::check_convergence_fcn(const double base_v, const double base_vm1) { double predicted_decrease = fkm1_lin; - assert(n_==gkm1->get_local_size()); - predicted_decrease += 0.5*alpha_*(skm1->twonorm())*(skm1->twonorm()); + assert(n_ == gkm1->get_local_size()); + predicted_decrease += 0.5 * alpha_ * (skm1->twonorm()) * (skm1->twonorm()); + + log_->printf(hovScalars, "predicted decrease %18.12e\n", predicted_decrease); - log_->printf(hovScalars,"predicted decrease %18.12e\n", predicted_decrease); - predicted_decrease += base_v - base_vm1; predicted_decrease = fabs(predicted_decrease); return predicted_decrease; } -double hiopAlgPrimalDecomposition::HessianApprox:: -compute_base(const double val) +double hiopAlgPrimalDecomposition::HessianApprox::compute_base(const double val) { - double rec_appx = fkm1+fkm1_lin; - rec_appx += 0.5*alpha_*(skm1->twonorm())*(skm1->twonorm()); - return val-rec_appx; + double rec_appx = fkm1 + fkm1_lin; + rec_appx += 0.5 * alpha_ * (skm1->twonorm()) * (skm1->twonorm()); + return val - rec_appx; } void hiopAlgPrimalDecomposition::HessianApprox::set_verbosity(const int i) { - assert(i<=3 && i>=0); + assert(i <= 3 && i >= 0); ver_ = i; } -void hiopAlgPrimalDecomposition::HessianApprox:: -set_alpha_ratio_min(const double alp_ratio_min) +void hiopAlgPrimalDecomposition::HessianApprox::set_alpha_ratio_min(const double alp_ratio_min) { ratio_min = alp_ratio_min; } -void hiopAlgPrimalDecomposition::HessianApprox:: -set_alpha_ratio_max(const double alp_ratio_max) +void hiopAlgPrimalDecomposition::HessianApprox::set_alpha_ratio_max(const double alp_ratio_max) { ratio_max = alp_ratio_max; } -void hiopAlgPrimalDecomposition::HessianApprox:: -set_alpha_min(const double alp_min) -{ - alpha_min = alp_min; -} +void hiopAlgPrimalDecomposition::HessianApprox::set_alpha_min(const double alp_min) { alpha_min = alp_min; } -void hiopAlgPrimalDecomposition::HessianApprox:: -set_alpha_max(const double alp_max) -{ - alpha_max = alp_max; -} +void hiopAlgPrimalDecomposition::HessianApprox::set_alpha_max(const double alp_max) { alpha_max = alp_max; } -hiopAlgPrimalDecomposition:: -hiopAlgPrimalDecomposition(hiopInterfacePriDecProblem* prob_in, - MPI_Comm comm_world/*=MPI_COMM_WORLD*/) - : comm_world_(comm_world), - master_prob_(prob_in) +hiopAlgPrimalDecomposition::hiopAlgPrimalDecomposition(hiopInterfacePriDecProblem* prob_in, + MPI_Comm comm_world /*=MPI_COMM_WORLD*/) + : comm_world_(comm_world), + master_prob_(prob_in) { S_ = master_prob_->get_num_rterms(); n_ = master_prob_->get_num_vars(); @@ -572,34 +548,36 @@ hiopAlgPrimalDecomposition(hiopInterfacePriDecProblem* prob_in, // determine rank and rank type // only two rank types for now, master and evaluator/worker - #ifdef HIOP_USE_MPI - int ierr = MPI_Comm_rank(comm_world_, &my_rank_); assert(ierr == MPI_SUCCESS); - int ret = MPI_Comm_size(comm_world, &comm_size_); assert(ret==MPI_SUCCESS); - if(my_rank_==0) { - my_rank_type_ = 0; - } else { - my_rank_type_ = 1; - } - request_ = new MPI_Request[4]; - #endif - +#ifdef HIOP_USE_MPI + int ierr = MPI_Comm_rank(comm_world_, &my_rank_); + assert(ierr == MPI_SUCCESS); + int ret = MPI_Comm_size(comm_world, &comm_size_); + assert(ret == MPI_SUCCESS); + if(my_rank_ == 0) { + my_rank_type_ = 0; + } else { + my_rank_type_ = 1; + } + request_ = new MPI_Request[4]; +#endif + // use "hiop_pridec.options" - if the file does not exist, built-in default options will be used options_ = new hiopOptionsPriDec(hiopOptions::default_filename_pridec_solver); set_tolerance(options_->GetNumeric("tolerance")); - + set_acceptable_tolerance(options_->GetNumeric("acceptable_tolerance")); - + set_acceptable_count(options_->GetInteger("acceptable_iterations")); - + set_max_iteration(options_->GetInteger("max_iter")); - + set_alpha_max(options_->GetNumeric("alpha_max")); - + set_alpha_min(options_->GetNumeric("alpha_min")); - + set_local_accum(options_->GetString("accum_local")); - + assert(alpha_max_ > alpha_min_); set_verbosity(options_->GetInteger("verbosity_level")); @@ -610,17 +588,16 @@ hiopAlgPrimalDecomposition(hiopInterfacePriDecProblem* prob_in, x_ = LinearAlgebraFactory::create_vector(options_->GetString("mem_space"), n_); xc_idx_ = LinearAlgebraFactory::create_vector_int(options_->GetString("mem_space"), nc_); - xc_idx_->linspace(0,1); + xc_idx_->linspace(0, 1); } -hiopAlgPrimalDecomposition:: -hiopAlgPrimalDecomposition(hiopInterfacePriDecProblem* prob_in, - const int nc, - const int* xc_index, - MPI_Comm comm_world/*=MPI_COMM_WORLD*/) - : comm_world_(comm_world), - master_prob_(prob_in), - nc_(nc) +hiopAlgPrimalDecomposition::hiopAlgPrimalDecomposition(hiopInterfacePriDecProblem* prob_in, + const int nc, + const int* xc_index, + MPI_Comm comm_world /*=MPI_COMM_WORLD*/) + : comm_world_(comm_world), + master_prob_(prob_in), + nc_(nc) { S_ = master_prob_->get_num_rterms(); n_ = master_prob_->get_num_vars(); @@ -628,28 +605,30 @@ hiopAlgPrimalDecomposition(hiopInterfacePriDecProblem* prob_in, // only two rank types for now, master and evaluator/worker #ifdef HIOP_USE_MPI - int ierr = MPI_Comm_rank(comm_world_, &my_rank_); assert(ierr == MPI_SUCCESS); - int ret = MPI_Comm_size(comm_world_, &comm_size_); assert(ret==MPI_SUCCESS); - if(my_rank_==0) { + int ierr = MPI_Comm_rank(comm_world_, &my_rank_); + assert(ierr == MPI_SUCCESS); + int ret = MPI_Comm_size(comm_world_, &comm_size_); + assert(ret == MPI_SUCCESS); + if(my_rank_ == 0) { my_rank_type_ = 0; } else { my_rank_type_ = 1; } - request_ = new MPI_Request[4]; + request_ = new MPI_Request[4]; #endif - //use "hiop_pridec.options" - if the file does not exist, built-in default options will be used + // use "hiop_pridec.options" - if the file does not exist, built-in default options will be used options_ = new hiopOptionsPriDec(hiopOptions::default_filename_pridec_solver); set_tolerance(options_->GetNumeric("tolerance")); - + set_acceptable_tolerance(options_->GetNumeric("acceptable_tolerance")); - + set_acceptable_count(options_->GetInteger("acceptable_iterations")); - + set_max_iteration(options_->GetInteger("max_iter")); set_alpha_max(options_->GetNumeric("alpha_max")); - + set_alpha_min(options_->GetNumeric("alpha_min")); assert(alpha_max_ > alpha_min_); @@ -671,45 +650,36 @@ hiopAlgPrimalDecomposition::~hiopAlgPrimalDecomposition() delete options_; delete log_; #ifdef HIOP_USE_MPI - delete [] request_; + delete[] request_; #endif } -double hiopAlgPrimalDecomposition::getObjective() const -{ - return master_prob_->get_objective(); -} +double hiopAlgPrimalDecomposition::getObjective() const { return master_prob_->get_objective(); } void hiopAlgPrimalDecomposition::getSolution(hiopVector& x) const { double* x_vec = x.local_data(); master_prob_->get_solution(x_vec); } - + void hiopAlgPrimalDecomposition::getDualSolutions(double* zl, double* zu, double* lambda) { assert(false && "not implemented"); } -inline hiopSolveStatus hiopAlgPrimalDecomposition::getSolveStatus() const -{ - return solver_status_; -} +inline hiopSolveStatus hiopAlgPrimalDecomposition::getSolveStatus() const { return solver_status_; } + +int hiopAlgPrimalDecomposition::getNumIterations() const { return it_; } -int hiopAlgPrimalDecomposition::getNumIterations() const -{ - return it_; -} - bool hiopAlgPrimalDecomposition::stopping_criteria(const int it, const double convg, const int accp_count) { // gradient based stopping criteria - if(convgprintf(hovSummary,"reaching error tolerance, successfully found solution\n"); + if(convg < tol_) { + log_->printf(hovSummary, "reaching error tolerance, successfully found solution\n"); return true; } // stopping criteria based on the change in objective function - if(it == max_iter_-1) { + if(it == max_iter_ - 1) { log_->printf(hovSummary, "reached maximum iterations, optimization stops.\n"); return true; @@ -722,1068 +692,1060 @@ bool hiopAlgPrimalDecomposition::stopping_criteria(const int it, const double co } return false; } - -double hiopAlgPrimalDecomposition:: -step_size_inf(const int nc, const hiopVectorInt& idx, const hiopVector& x, const hiopVector& x0) + +double hiopAlgPrimalDecomposition::step_size_inf(const int nc, + const hiopVectorInt& idx, + const hiopVector& x, + const hiopVector& x0) { double step = -1e20; - hiopVector* temp = LinearAlgebraFactory::create_vector(options_->GetString("mem_space"), x0.get_local_size()); - temp->copy_from_indexes(x, idx); - temp->axpy(-1.0, x0); - //step = temp->infnorm(); // using infinity norm + hiopVector* temp = LinearAlgebraFactory::create_vector(options_->GetString("mem_space"), x0.get_local_size()); + temp->copy_from_indexes(x, idx); + temp->axpy(-1.0, x0); + // step = temp->infnorm(); // using infinity norm step = temp->twonorm(); delete temp; return step; } -void hiopAlgPrimalDecomposition::set_max_iteration(const int max_it) -{ - max_iter_ = max_it; -} +void hiopAlgPrimalDecomposition::set_max_iteration(const int max_it) { max_iter_ = max_it; } void hiopAlgPrimalDecomposition::set_verbosity(const int i) { - assert(i<=12 && i>=0); + assert(i <= 12 && i >= 0); ver_ = i; } -void hiopAlgPrimalDecomposition::set_tolerance(const double tol) -{ - tol_ = tol; -} +void hiopAlgPrimalDecomposition::set_tolerance(const double tol) { tol_ = tol; } -void hiopAlgPrimalDecomposition::set_acceptable_tolerance(const double tol) -{ - accp_tol_ = tol; -} +void hiopAlgPrimalDecomposition::set_acceptable_tolerance(const double tol) { accp_tol_ = tol; } -void hiopAlgPrimalDecomposition::set_acceptable_count(const int count) -{ - accp_count_ = count; -} +void hiopAlgPrimalDecomposition::set_acceptable_count(const int count) { accp_count_ = count; } void hiopAlgPrimalDecomposition::set_initial_alpha_ratio(const double alpha) { - assert(alpha>=0&&alpha<10.); + assert(alpha >= 0 && alpha < 10.); alpha_ratio_ = alpha; } -void hiopAlgPrimalDecomposition::set_alpha_min(const double alp_min) -{ - alpha_min_ = alp_min; -} +void hiopAlgPrimalDecomposition::set_alpha_min(const double alp_min) { alpha_min_ = alp_min; } -void hiopAlgPrimalDecomposition::set_alpha_max(const double alp_max) -{ - alpha_max_ = alp_max; -} +void hiopAlgPrimalDecomposition::set_alpha_max(const double alp_max) { alpha_max_ = alp_max; } -void hiopAlgPrimalDecomposition::set_local_accum(const std::string local_accum) -{ - local_accum_ = local_accum; -} +void hiopAlgPrimalDecomposition::set_local_accum(const std::string local_accum) { local_accum_ = local_accum; } /** MPI engine for pridec solver */ #ifdef HIOP_USE_MPI - hiopSolveStatus hiopAlgPrimalDecomposition::run() +hiopSolveStatus hiopAlgPrimalDecomposition::run() { - log_->printf(hovSummary, "===============\nHiop Primal Decomposition SOLVER\n===============\n"); - if(options_->GetString("print_options") != "no") { - log_->write(nullptr, *options_, hovSummary); - } - - if(local_accum_ == "yes") { // if worker ranks accumulate solution locally before tranferring to master rank - return run_local(); + log_->printf(hovSummary, "===============\nHiop Primal Decomposition SOLVER\n===============\n"); + if(options_->GetString("print_options") != "no") { + log_->write(nullptr, *options_, hovSummary); + } + + if(local_accum_ == "yes") { // if worker ranks accumulate solution locally before tranferring to master rank + return run_local(); + } + + if(comm_size_ == 1) { + return run_single(); // call the serial solver + } + if(my_rank_ == 0) { + log_->printf(hovSummary, "total number of recourse problems %lu\n", S_); + log_->printf(hovSummary, "total ranks %d\n", comm_size_); + } + // initial point set to all zero, for now + x_->setToConstant(0.0); + + bool bret; + int rank_master = 0; // master rank is also the rank that solves the master problem + // Define the values and gradients as needed in the master rank + double rval = 0.; + // double grad_r[nc_]; + + hiopVector* grad_r; + grad_r = LinearAlgebraFactory::create_vector(options_->GetString("mem_space"), nc_); + grad_r->setToZero(); + double* grad_r_vec = grad_r->local_data(); + + hiopVector* hess_appx = grad_r->alloc_clone(); + hess_appx->setToZero(); + + hiopVector* x0 = grad_r->alloc_clone(); + x0->setToZero(); + double* x0_vec = x0->local_data(); + + hiopVector* grad_aux = x0->alloc_clone(); + grad_aux->setToZero(); + // local recourse terms for each evaluator, defined accross all processors + double rec_val = 0.; + hiopVector* grad_acc = grad_r->alloc_clone(); + grad_acc->setToZero(); + double* grad_acc_vec = grad_acc->local_data(); + + // hess_appx_2 is declared by all ranks while only rank 0 uses it + HessianApprox* hess_appx_2 = new HessianApprox(nc_, alpha_ratio_, master_prob_, options_); + hess_appx_2->set_alpha_min(alpha_min_); + hess_appx_2->set_alpha_max(alpha_max_); + + if(ver_ >= hovSummary) { + hess_appx_2->set_verbosity(ver_); + } + + double base_val = 0.; // basecase objective value + double base_valm1 = 0.; // basecase objective value from the previous step + double recourse_val = 0.; // recourse objective value + double dinf = 0.; // step size + + double convg = 1e20; // convergence measure + double convg_g = 1e20; + double convg_f = 1e20; + int accp_count = 0; + + int end_signal = 0; + double t1 = 0; + double t2 = 0; + hiopInterfacePriDecProblem::RecourseApproxEvaluator* evaluator = + new hiopInterfacePriDecProblem::RecourseApproxEvaluator(nc_, + S_, + xc_idx_->local_data(), + options_->GetString("mem_space")); + + double* x_vec = x_->local_data(); + + std::string options_file_master_prob; + + // Outer loop starts + for(int it = 0; it < max_iter_; it++) { + if(my_rank_ == 0) { + t1 = MPI_Wtime(); } - - if(comm_size_==1) { - return run_single();//call the serial solver + it_ = it; + // solve the basecase first + if(my_rank_ == 0 && it == 0) { // initial solve + // log_->printf(hovIteration, "my rank for solver %d\n", my_rank_); + // solve master problem basecase on master and iteration 0 + + options_file_master_prob = options_->GetString("options_file_master_prob"); + + solver_status_ = master_prob_->solve_master(*x_, false, 0, 0, 0, options_file_master_prob.c_str()); + + if(solver_status_) { + // to do, what if solve fails? + } + + log_->write(nullptr, *x_, hovFcnEval); + + base_val = master_prob_->get_objective(); + base_valm1 = master_prob_->get_objective(); } - if(my_rank_==0) { - log_->printf(hovSummary, "total number of recourse problems %lu\n", S_); - log_->printf(hovSummary, "total ranks %d\n",comm_size_); + + // send basecase solutions to all ranks + + int ierr = MPI_Bcast(x_vec, n_, MPI_DOUBLE, rank_master, comm_world_); + assert(ierr == MPI_SUCCESS); + + // set up recourse problem send/recv interface + std::vector rec_prob; + for(int r = 0; r < comm_size_; r++) { + rec_prob.push_back(new ReqRecourseApprox(nc_)); } - // initial point set to all zero, for now - x_->setToConstant(0.0); - - bool bret; - int rank_master=0; // master rank is also the rank that solves the master problem - // Define the values and gradients as needed in the master rank - double rval = 0.; - // double grad_r[nc_]; - - hiopVector* grad_r; - grad_r = LinearAlgebraFactory::create_vector(options_->GetString("mem_space"), nc_) ; - grad_r->setToZero(); - double* grad_r_vec=grad_r->local_data(); - - hiopVector* hess_appx = grad_r->alloc_clone(); - hess_appx->setToZero(); - - hiopVector* x0 = grad_r->alloc_clone(); - x0->setToZero(); - double* x0_vec=x0->local_data(); - - hiopVector* grad_aux = x0->alloc_clone(); - grad_aux->setToZero(); - // local recourse terms for each evaluator, defined accross all processors - double rec_val = 0.; - hiopVector* grad_acc = grad_r->alloc_clone(); - grad_acc->setToZero(); - double* grad_acc_vec = grad_acc->local_data(); - - //hess_appx_2 is declared by all ranks while only rank 0 uses it - HessianApprox* hess_appx_2 = new HessianApprox(nc_, alpha_ratio_, master_prob_, options_); - hess_appx_2->set_alpha_min(alpha_min_); - hess_appx_2->set_alpha_max(alpha_max_); - - if(ver_ >= hovSummary) { - hess_appx_2->set_verbosity(ver_); + + std::vector req_cont_idx; + for(int r = 0; r < comm_size_; r++) { + req_cont_idx.push_back(new ReqContingencyIdx(0)); } - double base_val = 0.; // basecase objective value - double base_valm1 = 0.; // basecase objective value from the previous step - double recourse_val = 0.; // recourse objective value - double dinf = 0.; // step size - - double convg = 1e20; // convergence measure - double convg_g = 1e20; - double convg_f = 1e20; - int accp_count = 0; - - int end_signal = 0; - double t1 = 0; - double t2 = 0; - hiopInterfacePriDecProblem::RecourseApproxEvaluator* evaluator = new hiopInterfacePriDecProblem:: - RecourseApproxEvaluator(nc_, S_, xc_idx_->local_data(), options_->GetString("mem_space")); - - double* x_vec = x_->local_data(); - - std::string options_file_master_prob; - - // Outer loop starts - for(int it=0; itsetToZero(); + + std::vector cont_idx(S_); + for(int i = 0; i < static_cast(S_); i++) { + cont_idx[i] = i; + } + // The number of contigencies/recourse problems should be larger than the number of processors + assert(static_cast(S_) >= comm_size_ - 1); + // idx is the next contingency to be sent out from the master + int idx = 0; + // Initilize the recourse communication by sending indices to the evaluator + // Using Blocking send here + for(int r = 1; r < comm_size_; r++) { + int cur_idx = cont_idx[idx]; + int ierr = MPI_Send(&cur_idx, 1, MPI_INT, r, 1, comm_world_); + assert(MPI_SUCCESS == ierr); + // log_->printf(hovIteration, "rank %d to get contingency index %d\n", r, cur_idx); //verbosity level 10 + idx += 1; + } + // Posting initial receive of recourse solutions from evaluators + for(int r = 1; r < comm_size_; r++) { + // int cur_idx = cont_idx[idx]; + rec_prob[r]->post_recv(2, r, comm_world_); // 2 is the tag, r is the rank source + } + // Both finish_flag and last_loop are used to deal with the final round remaining contingencies/recourse problems. + // Some ranks are finished while others are not. The loop needs to continue to fetch the results. + // hiopVectorInt* finish_flag = LinearAlgebraFactory::createVectorInt(comm_size_); + // finish_flag->setToZero(); + std::vector finish_flag(comm_size_); // standard vector will be replaced by hiopVectorInt + for(int i = 0; i < comm_size_; i++) { + finish_flag[i] = 0; } - it_ = it; - // solve the basecase first - if(my_rank_ == 0 && it==0) {//initial solve - // log_->printf(hovIteration, "my rank for solver %d\n", my_rank_); - // solve master problem basecase on master and iteration 0 - - options_file_master_prob = options_->GetString("options_file_master_prob"); - - solver_status_ = master_prob_->solve_master(*x_, false, 0, 0, 0, options_file_master_prob.c_str()); - - if(solver_status_) { - // to do, what if solve fails? + int last_loop = 0; + // log_->printf(hovIteration, "total idx %d\n", S_); + t2 = MPI_Wtime(); + + log_->printf(hovFcnEval, "Elapsed time for entire iteration %d is %f\n", it, t2 - t1); + + while(idx <= static_cast(S_) || last_loop) { + for(int r = 1; r < comm_size_; r++) { + int mpi_test_flag = rec_prob[r]->test(); + if(mpi_test_flag && (finish_flag[r] == 0)) { // receive completed + if(!last_loop && idx < static_cast(S_)) { + log_->printf(hovLinesearch, "idx %d sent to rank %d\n", idx, r); + } else { + log_->printf(hovLinesearch, "last loop for rank %d\n", r); + } + // add to the master rank variables + rval += rec_prob[r]->value(); + for(int i = 0; i < static_cast(nc_); i++) { + grad_r_vec[i] += rec_prob[r]->grad(i); + } + if(last_loop) { + finish_flag[r] = 1; + } + // this is for dealing with the end of contingencies where some ranks have already finished + if(idx < static_cast(S_)) { + req_cont_idx[r]->wait(); // Ensure previous cont idx send has completed. + req_cont_idx[r]->set_idx(cont_idx[idx]); + req_cont_idx[r]->post_send(1, r, comm_world_); + rec_prob[r]->post_recv(2, r, comm_world_); // 2 is the tag, r is the rank source + // log_->printf(hovFcnEval, "recourse value: is %18.12e)\n", rec_prob[r]->value()); + } else { + finish_flag[r] = 1; + last_loop = 1; + } + idx += 1; + } } - - log_->write(nullptr, *x_, hovFcnEval); - base_val = master_prob_->get_objective(); - base_valm1 = master_prob_->get_objective(); + // Current way of ending the loop while accounting for all the last round of results + if(last_loop) { + last_loop = 0; + for(int r = 1; r < comm_size_; r++) { + if(finish_flag[r] == 0) { + last_loop = 1; + } + } + } } + rval /= S_; + grad_r->scale(1.0 / S_); + // send end signal to all evaluators + for(int r = 1; r < comm_size_; r++) { + req_cont_idx[r]->wait(); // Ensure previous idx send has completed. + req_cont_idx[r]->set_idx(-1); + req_cont_idx[r]->post_send(1, r, comm_world_); + } + t2 = MPI_Wtime(); + log_->printf(hovFcnEval, "Elapsed time for entire iteration %d is %f\n", it, t2 - t1); + } - // send basecase solutions to all ranks + // evaluators + if(my_rank_ != 0) { + /* old sychronous implementation of contingencies + * int cpr = S_/(comm_size_-1); //contingency per rank + * int cr = S_%(comm_size_-1); //contingency remained + * log_->printf(hovIteration, "my rank start evaluating work %d)\n", my_rank_); + */ + std::vector cont_idx(1); // currently sending/receiving one contingency index at a time + int cont_i = 0; + cont_idx[0] = 0; + // Receive the index of the contingency to evaluate + int mpi_test_flag = 0; + int ierr = MPI_Recv(&cont_i, 1, MPI_INT, rank_master, 1, comm_world_, &status_); + assert(MPI_SUCCESS == ierr); + cont_idx[0] = cont_i; + // log_->printf(hovIteration, "contingency index %d, rank %d)\n", cont_idx[0],my_rank_); + // compute the recourse function values and gradients + rec_val = 0.; - int ierr = MPI_Bcast(x_vec, n_, MPI_DOUBLE, rank_master, comm_world_); - assert(ierr == MPI_SUCCESS); + grad_acc->setToZero(); + double aux = 0.; - // set up recourse problem send/recv interface - std::vector rec_prob; - for(int r=0; r req_cont_idx; - for(int r=0; rcopy_from_indexes(*x_, *xc_idx_); + } else { + assert(nc_ == n_); + x0->copyFromStarting(0, *x_); } + for(int ri = 0; ri < static_cast(cont_idx.size()); ri++) { + aux = 0.; + int idx_temp = cont_idx[ri]; - // master rank communication - if(my_rank_ == 0) { - // array for number of indices, currently the indices are in [0,S_] - rval = 0.; - grad_r->setToZero(); - - std::vector cont_idx(S_); - for(int i=0; i(S_); i++) { - cont_idx[i] = i; - } - // The number of contigencies/recourse problems should be larger than the number of processors - assert(static_cast(S_)>=comm_size_-1); - // idx is the next contingency to be sent out from the master - int idx = 0; - // Initilize the recourse communication by sending indices to the evaluator - // Using Blocking send here - for(int r=1; r< comm_size_;r++) { - int cur_idx = cont_idx[idx]; - int ierr = MPI_Send(&cur_idx, 1, MPI_INT, r, 1,comm_world_); - assert(MPI_SUCCESS == ierr); - // log_->printf(hovIteration, "rank %d to get contingency index %d\n", r, cur_idx); //verbosity level 10 - idx += 1; - } - // Posting initial receive of recourse solutions from evaluators - for(int r=1; rpost_recv(2,r,comm_world_);// 2 is the tag, r is the rank source - } - // Both finish_flag and last_loop are used to deal with the final round remaining contingencies/recourse problems. - // Some ranks are finished while others are not. The loop needs to continue to fetch the results. - // hiopVectorInt* finish_flag = LinearAlgebraFactory::createVectorInt(comm_size_); - // finish_flag->setToZero(); - std::vector finish_flag(comm_size_); // standard vector will be replaced by hiopVectorInt - for(int i=0;ieval_f_rterm(idx_temp, nc_, x0_vec, aux); // solving the recourse problem + if(!bret) { + // TODO } - int last_loop = 0; - // log_->printf(hovIteration, "total idx %d\n", S_); - t2 = MPI_Wtime(); - - log_->printf(hovFcnEval, "Elapsed time for entire iteration %d is %f\n",it, t2 - t1); - - while(idx<=static_cast(S_) || last_loop) { - for(int r=1; r< comm_size_;r++) { - int mpi_test_flag = rec_prob[r]->test(); - if(mpi_test_flag && (finish_flag[r]==0)) {// receive completed - if(!last_loop && idx(S_)) { - log_->printf(hovLinesearch, "idx %d sent to rank %d\n", idx,r); - } else { - log_->printf(hovLinesearch, "last loop for rank %d\n", r ); - } - // add to the master rank variables - rval += rec_prob[r]->value(); - for(int i=0;i(nc_);i++) { - grad_r_vec[i] += rec_prob[r]->grad(i); - } - if(last_loop) { - finish_flag[r]=1; - } - // this is for dealing with the end of contingencies where some ranks have already finished - if(idx(S_)) { - req_cont_idx[r]->wait(); // Ensure previous cont idx send has completed. - req_cont_idx[r]->set_idx(cont_idx[idx]); - req_cont_idx[r]->post_send(1,r,comm_world_); - rec_prob[r]->post_recv(2,r,comm_world_);// 2 is the tag, r is the rank source - // log_->printf(hovFcnEval, "recourse value: is %18.12e)\n", rec_prob[r]->value()); - } else { - finish_flag[r] = 1; - last_loop = 1; - } - idx += 1; - } - } + rec_val += aux; + } + // log_->printf(hovFcnEval, "recourse value: is %18.12e)\n", rec_val); - // Current way of ending the loop while accounting for all the last round of results - if(last_loop) { - last_loop=0; - for(int r=1; r< comm_size_;r++) { - if(finish_flag[r]==0) { - last_loop=1; - } - } - } + grad_aux->setToZero(); + for(int ri = 0; ri < static_cast(cont_idx.size()); ri++) { + int idx_temp = cont_idx[ri]; + bret = master_prob_->eval_grad_rterm(idx_temp, nc_, x0_vec, *grad_aux); + if(!bret) { + // TODO } - rval /= S_; - grad_r->scale(1.0/S_); - // send end signal to all evaluators - for(int r=1; rwait(); // Ensure previous idx send has completed. - req_cont_idx[r]->set_idx(-1); - req_cont_idx[r]->post_send(1,r,comm_world_); - } - t2 = MPI_Wtime(); - log_->printf(hovFcnEval, "Elapsed time for entire iteration %d is %f\n",it, t2 - t1); + grad_acc->axpy(1.0, *grad_aux); } + rec_prob[my_rank_]->wait(); // Ensure send buffer is safe to use. + rec_prob[my_rank_]->set_value(rec_val); - //evaluators - if(my_rank_ != 0) { - /* old sychronous implementation of contingencies - * int cpr = S_/(comm_size_-1); //contingency per rank - * int cr = S_%(comm_size_-1); //contingency remained - * log_->printf(hovIteration, "my rank start evaluating work %d)\n", my_rank_); - */ - std::vector cont_idx(1); // currently sending/receiving one contingency index at a time - int cont_i = 0; - cont_idx[0] = 0; - // Receive the index of the contingency to evaluate - int mpi_test_flag = 0; - int ierr = MPI_Recv(&cont_i, 1, MPI_INT, rank_master, 1, comm_world_, &status_); - assert(MPI_SUCCESS == ierr); - cont_idx[0] = cont_i; - // log_->printf(hovIteration, "contingency index %d, rank %d)\n", cont_idx[0],my_rank_); - // compute the recourse function values and gradients - rec_val = 0.; - - grad_acc->setToZero(); - double aux=0.; - - if(nc_copy_from_indexes(*x_, *xc_idx_); - } else { - assert(nc_==n_); - x0->copyFromStarting(0, *x_); - } - for(int ri=0; ri(cont_idx.size()); ri++) { - aux = 0.; - int idx_temp = cont_idx[ri]; + rec_prob[my_rank_]->set_grad(grad_acc_vec); + rec_prob[my_rank_]->post_send(2, rank_master, comm_world_); - bret = master_prob_->eval_f_rterm(idx_temp, nc_, x0_vec, aux); // solving the recourse problem - if(!bret) { - //TODO + req_cont_idx[my_rank_]->post_recv(1, rank_master, comm_world_); + while(cont_idx[0] != -1) { // loop until end signal received + mpi_test_flag = req_cont_idx[my_rank_]->test(); + /* contigency starts at 0 + * sychronous implmentation of contingencist + */ + if(mpi_test_flag) { + // log_->printf(hovIteration, "contingency index %d, rank %d)\n", cont_idx[0],my_rank_); + for(int ri = 0; ri < static_cast(cont_idx.size()); ri++) { + cont_idx[ri] = req_cont_idx[my_rank_]->value(); } - rec_val += aux; - } - // log_->printf(hovFcnEval, "recourse value: is %18.12e)\n", rec_val); - - grad_aux->setToZero(); - - for(int ri=0; ri(cont_idx.size()); ri++) { - int idx_temp = cont_idx[ri]; - bret = master_prob_->eval_grad_rterm(idx_temp, nc_, x0_vec, *grad_aux); - if(!bret) { - //TODO + if(cont_idx[0] == -1) { + break; } - grad_acc->axpy(1.0, *grad_aux); - } - rec_prob[my_rank_]->wait(); // Ensure send buffer is safe to use. - rec_prob[my_rank_]->set_value(rec_val); - - rec_prob[my_rank_]->set_grad(grad_acc_vec); - rec_prob[my_rank_]->post_send(2, rank_master, comm_world_); - - req_cont_idx[my_rank_]->post_recv(1, rank_master, comm_world_); - while(cont_idx[0]!=-1) {//loop until end signal received - mpi_test_flag = req_cont_idx[my_rank_]->test(); - /* contigency starts at 0 - * sychronous implmentation of contingencist - */ - if(mpi_test_flag) { - // log_->printf(hovIteration, "contingency index %d, rank %d)\n", cont_idx[0],my_rank_); - for(int ri=0; ri(cont_idx.size()); ri++) { - cont_idx[ri] = req_cont_idx[my_rank_]->value(); - } - if(cont_idx[0]==-1) { - break; - } - rec_val = 0.; - grad_acc->setToZero(); + rec_val = 0.; + grad_acc->setToZero(); + + double aux = 0.; + if(nc_ < n_) { + x0->copy_from_indexes(*x_, *xc_idx_); + } else { + assert(nc_ == n_); + x0->copyFromStarting(0, *x_); + } + for(int ri = 0; ri < static_cast(cont_idx.size()); ri++) { + aux = 0.; + int idx_temp = cont_idx[ri]; - double aux=0.; - if(nc_copy_from_indexes(*x_, *xc_idx_); - } else { - assert(nc_==n_); - x0->copyFromStarting(0, *x_); + bret = master_prob_->eval_f_rterm(idx_temp, nc_, x0_vec, aux); // need to add extra time here + if(!bret) { + // TODO } - for(int ri=0; ri(cont_idx.size()); ri++) { - aux = 0.; - int idx_temp = cont_idx[ri]; - - bret = master_prob_->eval_f_rterm(idx_temp, nc_, x0_vec, aux); //need to add extra time here - if(!bret) { - //TODO - } - rec_val += aux; - } - // log_->printf(hovFcnEval, "recourse value: is %18.12e)\n", rec_val); - grad_aux->setToZero(); - - for(int ri=0; ri(cont_idx.size()); ri++) { - int idx_temp = cont_idx[ri]; - bret = master_prob_->eval_grad_rterm(idx_temp, nc_, x0_vec, *grad_aux); - if(!bret) { - //TODO - } - grad_acc->axpy(1.0, *grad_aux); + rec_val += aux; + } + // log_->printf(hovFcnEval, "recourse value: is %18.12e)\n", rec_val); + grad_aux->setToZero(); + + for(int ri = 0; ri < static_cast(cont_idx.size()); ri++) { + int idx_temp = cont_idx[ri]; + bret = master_prob_->eval_grad_rterm(idx_temp, nc_, x0_vec, *grad_aux); + if(!bret) { + // TODO } - - rec_prob[my_rank_]->wait(); // Ensure send buffer is safe to use. - rec_prob[my_rank_]->set_value(rec_val); - - rec_prob[my_rank_]->set_grad(grad_acc_vec); - rec_prob[my_rank_]->post_send(2, rank_master, comm_world_); - // do something with the func eval and gradient to determine the quadratic regularization - // log_->printf(hovIteration, "send recourse value flag for test %d \n", mpi_test_flag); - - // post recv for new index - req_cont_idx[my_rank_]->post_recv(1, rank_master, comm_world_); - // ierr = MPI_Irecv(&cont_idx[0], 1, MPI_INT, rank_master, 1, comm_world_, &request_[0]); + grad_acc->axpy(1.0, *grad_aux); } - } - } - - if(my_rank_==0) { - for(int r=1; rwait(); - req_cont_idx[r]->wait(); - } - -#ifndef NDEBUG - // Ensure we've completed all NB operations. - for(auto curr : rec_prob) { - assert(curr->request_ == MPI_REQUEST_NULL); - } - for(auto curr : req_cont_idx) { - assert(curr->request_ == MPI_REQUEST_NULL); - } -#endif // NDEBUG - - recourse_val = rval; - log_->printf(hovSummary, "real rval %18.12e\n", rval); + rec_prob[my_rank_]->wait(); // Ensure send buffer is safe to use. + rec_prob[my_rank_]->set_value(rec_val); - hess_appx->setToConstant(1.0); + rec_prob[my_rank_]->set_grad(grad_acc_vec); + rec_prob[my_rank_]->post_send(2, rank_master, comm_world_); + // do something with the func eval and gradient to determine the quadratic regularization + // log_->printf(hovIteration, "send recourse value flag for test %d \n", mpi_test_flag); - if(nc_copy_from_indexes(*x_, *xc_idx_); - } else { - assert(nc_==n_); - x0->copyFromStarting(0, *x_); + // post recv for new index + req_cont_idx[my_rank_]->post_recv(1, rank_master, comm_world_); + // ierr = MPI_Irecv(&cont_idx[0], 1, MPI_INT, rank_master, 1, comm_world_, &request_[0]); } + } + } - if(it==0) { - hess_appx_2->initialize(rval, *x0, *grad_r); - double alp_temp = hess_appx_2->get_alpha_f(*grad_r); - // double alp_temp = hess_appx_2->get_alpha_tr(); // alternative update rule for alpha - log_->printf(hovSummary, "alpd %18.12e\n", alp_temp); - - hess_appx->setToConstant(alp_temp); - } else { - hess_appx_2->update_hess_coeff(*x0, *grad_r, rval); - //update basecase objective, this requires updated skm1 and ykm1 - base_valm1 = base_val; - base_val = hess_appx_2->compute_base(master_prob_->get_objective()); - - //hess_appx_2->update_ratio(); - hess_appx_2->update_ratio(base_val, base_valm1); - - double alp_temp = hess_appx_2->get_alpha_f(*grad_r); - //double alp_temp = hess_appx_2->get_alpha_tr(); - - //double alp_temp2 = hess_appx_2->get_alpha_BB(); - log_->printf(hovSummary, "alpd %18.12e\n", alp_temp); - // log_->printf(hovSummary, "alpd BB %18.12e\n", alp_temp2); - convg_g = hess_appx_2->check_convergence_grad(*grad_r); - log_->printf(hovSummary,"gradient convergence measure %18.12e\n", convg_g); - convg_f = hess_appx_2->check_convergence_fcn(base_val, base_valm1); - log_->printf(hovSummary,"function val convergence measure %18.12e\n", convg_f); - convg = std::min(convg_f,convg_g); - hess_appx->setToConstant(alp_temp); + if(my_rank_ == 0) { + for(int r = 1; r < comm_size_; r++) { + rec_prob[r]->wait(); + req_cont_idx[r]->wait(); + } - } +#ifndef NDEBUG + // Ensure we've completed all NB operations. + for(auto curr: rec_prob) { + assert(curr->request_ == MPI_REQUEST_NULL); + } + for(auto curr: req_cont_idx) { + assert(curr->request_ == MPI_REQUEST_NULL); + } +#endif // NDEBUG - // wait for the sending/receiving to finish - // for debugging purpose print out the recourse gradient - log_->write(nullptr, *grad_r, hovFcnEval); - - if(it>0) { - log_->printf(hovSummary, "iteration objective residual " - "step_size convg\n"); - - log_->printf(hovSummary, "%d %18.12e %18.12e %18.12e " - "%18.12e\n", it, base_val+recourse_val, convg_f, dinf, convg_g); - - fflush(stdout); - } + recourse_val = rval; - assert(evaluator->get_rgrad()!=NULL);// evaluator should be defined - evaluator->set_rval(rval); - evaluator->set_rgrad(nc_,*grad_r); - evaluator->set_rhess(nc_,*hess_appx); - evaluator->set_x0(nc_,*x0); + log_->printf(hovSummary, "real rval %18.12e\n", rval); - bret = master_prob_->set_recourse_approx_evaluator(nc_, evaluator); - if(!bret) { - //TODO - } - - options_file_master_prob = options_->GetString("options_file_master_prob"); - - // log_->printf(hovIteration, "solving full problem starts, iteration %d \n", it); - solver_status_ = master_prob_->solve_master(*x_, true, 0, 0, 0, options_file_master_prob.c_str()); - - log_->printf(hovSummary, "solved full problem with objective %18.12e\n", master_prob_->get_objective()); - - log_->write(nullptr, *x_, hovFcnEval); - - t2 = MPI_Wtime(); - log_->printf(hovFcnEval, "Elapsed time for entire iteration %d is %f\n",it, t2 - t1); - - dinf = step_size_inf(nc_, *xc_idx_, *x_, *x0); - + hess_appx->setToConstant(1.0); + + if(nc_ < n_) { + x0->copy_from_indexes(*x_, *xc_idx_); } else { - // evaluator ranks do nothing + assert(nc_ == n_); + x0->copyFromStarting(0, *x_); } - if(convg <= accp_tol_) { - accp_count += 1; + + if(it == 0) { + hess_appx_2->initialize(rval, *x0, *grad_r); + double alp_temp = hess_appx_2->get_alpha_f(*grad_r); + // double alp_temp = hess_appx_2->get_alpha_tr(); // alternative update rule for alpha + log_->printf(hovSummary, "alpd %18.12e\n", alp_temp); + + hess_appx->setToConstant(alp_temp); } else { - accp_count = 0; + hess_appx_2->update_hess_coeff(*x0, *grad_r, rval); + // update basecase objective, this requires updated skm1 and ykm1 + base_valm1 = base_val; + base_val = hess_appx_2->compute_base(master_prob_->get_objective()); + + // hess_appx_2->update_ratio(); + hess_appx_2->update_ratio(base_val, base_valm1); + + double alp_temp = hess_appx_2->get_alpha_f(*grad_r); + // double alp_temp = hess_appx_2->get_alpha_tr(); + + // double alp_temp2 = hess_appx_2->get_alpha_BB(); + log_->printf(hovSummary, "alpd %18.12e\n", alp_temp); + // log_->printf(hovSummary, "alpd BB %18.12e\n", alp_temp2); + convg_g = hess_appx_2->check_convergence_grad(*grad_r); + log_->printf(hovSummary, "gradient convergence measure %18.12e\n", convg_g); + convg_f = hess_appx_2->check_convergence_fcn(base_val, base_valm1); + log_->printf(hovSummary, "function val convergence measure %18.12e\n", convg_f); + convg = std::min(convg_f, convg_g); + hess_appx->setToConstant(alp_temp); } - if(stopping_criteria(it, convg, accp_count)) { - end_signal = 1; - } - ierr = MPI_Bcast(&end_signal, 1, MPI_INT, rank_master, comm_world_); - assert(ierr == MPI_SUCCESS); - - for(auto it : rec_prob) { - delete it; + // wait for the sending/receiving to finish + // for debugging purpose print out the recourse gradient + log_->write(nullptr, *grad_r, hovFcnEval); + + if(it > 0) { + log_->printf(hovSummary, + "iteration objective residual " + "step_size convg\n"); + + log_->printf(hovSummary, + "%d %18.12e %18.12e %18.12e " + "%18.12e\n", + it, + base_val + recourse_val, + convg_f, + dinf, + convg_g); + + fflush(stdout); } - for(auto it : req_cont_idx) { - delete it; - } - - if(end_signal) { - break; + assert(evaluator->get_rgrad() != NULL); // evaluator should be defined + evaluator->set_rval(rval); + evaluator->set_rgrad(nc_, *grad_r); + evaluator->set_rhess(nc_, *hess_appx); + evaluator->set_x0(nc_, *x0); + + bret = master_prob_->set_recourse_approx_evaluator(nc_, evaluator); + if(!bret) { + // TODO } - } - delete grad_r; - delete hess_appx; - delete x0; - delete grad_aux; - delete grad_acc; - delete hess_appx_2; - delete evaluator; - - if(my_rank_==0) { - return solver_status_; + options_file_master_prob = options_->GetString("options_file_master_prob"); + + // log_->printf(hovIteration, "solving full problem starts, iteration %d \n", it); + solver_status_ = master_prob_->solve_master(*x_, true, 0, 0, 0, options_file_master_prob.c_str()); + + log_->printf(hovSummary, "solved full problem with objective %18.12e\n", master_prob_->get_objective()); + + log_->write(nullptr, *x_, hovFcnEval); + + t2 = MPI_Wtime(); + log_->printf(hovFcnEval, "Elapsed time for entire iteration %d is %f\n", it, t2 - t1); + + dinf = step_size_inf(nc_, *xc_idx_, *x_, *x0); + } else { - return Solve_Success; + // evaluator ranks do nothing + } + if(convg <= accp_tol_) { + accp_count += 1; + } else { + accp_count = 0; } - } - /** - * A different asynchronous communication scheme where evaluator ranks - * transfer data only once with the master rank - * - */ - hiopSolveStatus hiopAlgPrimalDecomposition::run_local() - { - log_->printf(hovSummary, "===============\nHiop Primal Decomposition SOLVER\n===============\n"); - log_->printf(hovSummary, "===============\nUsing local accumulation OPTION\n===============\n"); - if(options_->GetString("print_options") != "no") { - log_->write(nullptr, *options_, hovSummary); + if(stopping_criteria(it, convg, accp_count)) { + end_signal = 1; } - if(comm_size_==1) { - return run_single();//call the serial solver + ierr = MPI_Bcast(&end_signal, 1, MPI_INT, rank_master, comm_world_); + assert(ierr == MPI_SUCCESS); + + for(auto it: rec_prob) { + delete it; } - if(my_rank_==0) { - log_->printf(hovSummary, "total number of recourse problems %lu\n", S_); - log_->printf(hovSummary, "total ranks %d\n",comm_size_); + + for(auto it: req_cont_idx) { + delete it; } - // initial point set to all zero, for now - x_->setToConstant(0.0); - - bool bret; - int rank_master=0; // master rank is also the rank that solves the master problem - // Define the values and gradients as needed as well as a receving buffer on the master rank - double rval = 0.; - - double rval_main = 0.; - - hiopVector* grad_r; - grad_r = LinearAlgebraFactory::create_vector(options_->GetString("mem_space"), nc_) ; - grad_r->setToZero(); - double* grad_r_vec=grad_r->local_data(); - - // grad_r_main is only needed on the master rank - hiopVector* grad_r_main; - grad_r_main = LinearAlgebraFactory::create_vector(options_->GetString("mem_space"), nc_) ; - grad_r_main->setToZero(); - double* grad_r_main_vec=grad_r_main->local_data(); - - hiopVector* hess_appx = grad_r->alloc_clone(); - hess_appx->setToZero(); - - hiopVector* x0 = grad_r->alloc_clone(); - x0->setToZero(); - double* x0_vec=x0->local_data(); - - hiopVector* grad_aux = x0->alloc_clone(); - grad_aux->setToZero(); - - // local recourse terms for each evaluator, defined accross all processors - // it is only necessary if a batch of recourse indices are sent at the same time - // therefore in run_local() no longer defined - //double rec_val = 0.; - //hiopVector* grad_acc = grad_r->alloc_clone(); - //grad_acc->setToZero(); - //double* grad_acc_vec = grad_acc->local_data(); - - //hess_appx_2 is declared by all ranks while only rank 0 uses it - HessianApprox* hess_appx_2 = new HessianApprox(nc_, alpha_ratio_, master_prob_, options_); - hess_appx_2->set_alpha_min(alpha_min_); - hess_appx_2->set_alpha_max(alpha_max_); - - if(ver_ >= hovSummary) { - hess_appx_2->set_verbosity(ver_); + + if(end_signal) { + break; } + } - double base_val = 0.; // basecase objective value - double base_valm1 = 0.; // basecase objective value from the previous step - double recourse_val = 0.; // recourse objective value - double dinf = 0.; // step size - - double convg = 1e20; // convergence measure - double convg_g = 1e20; - double convg_f = 1e20; - int accp_count = 0; - - int end_signal = 0; - double t1 = 0; - double t2 = 0; - hiopInterfacePriDecProblem::RecourseApproxEvaluator* evaluator = new hiopInterfacePriDecProblem:: - RecourseApproxEvaluator(nc_, S_, xc_idx_->local_data(), options_->GetString("mem_space")); - double* x_vec = x_->local_data(); - - std::string options_file_master_prob; - // Outer loop starts - for(int it=0; itprintf(hovIteration, "my rank for solver %d\n", my_rank_); + delete grad_r; + delete hess_appx; + delete x0; + delete grad_aux; + delete grad_acc; + delete hess_appx_2; + delete evaluator; - options_file_master_prob = options_->GetString("options_file_master_prob"); - solver_status_ = master_prob_->solve_master(*x_, false, 0, 0, 0, options_file_master_prob.c_str()); + if(my_rank_ == 0) { + return solver_status_; + } else { + return Solve_Success; + } +} - if(solver_status_) { - // to do, what if solve fails? - } - - log_->write(nullptr, *x_, hovFcnEval); - base_val = master_prob_->get_objective(); - base_valm1 = master_prob_->get_objective(); +/** + * A different asynchronous communication scheme where evaluator ranks + * transfer data only once with the master rank + * + */ +hiopSolveStatus hiopAlgPrimalDecomposition::run_local() +{ + log_->printf(hovSummary, "===============\nHiop Primal Decomposition SOLVER\n===============\n"); + log_->printf(hovSummary, "===============\nUsing local accumulation OPTION\n===============\n"); + if(options_->GetString("print_options") != "no") { + log_->write(nullptr, *options_, hovSummary); + } + if(comm_size_ == 1) { + return run_single(); // call the serial solver + } + if(my_rank_ == 0) { + log_->printf(hovSummary, "total number of recourse problems %lu\n", S_); + log_->printf(hovSummary, "total ranks %d\n", comm_size_); + } + // initial point set to all zero, for now + x_->setToConstant(0.0); + + bool bret; + int rank_master = 0; // master rank is also the rank that solves the master problem + // Define the values and gradients as needed as well as a receving buffer on the master rank + double rval = 0.; + + double rval_main = 0.; + + hiopVector* grad_r; + grad_r = LinearAlgebraFactory::create_vector(options_->GetString("mem_space"), nc_); + grad_r->setToZero(); + double* grad_r_vec = grad_r->local_data(); + + // grad_r_main is only needed on the master rank + hiopVector* grad_r_main; + grad_r_main = LinearAlgebraFactory::create_vector(options_->GetString("mem_space"), nc_); + grad_r_main->setToZero(); + double* grad_r_main_vec = grad_r_main->local_data(); + + hiopVector* hess_appx = grad_r->alloc_clone(); + hess_appx->setToZero(); + + hiopVector* x0 = grad_r->alloc_clone(); + x0->setToZero(); + double* x0_vec = x0->local_data(); + + hiopVector* grad_aux = x0->alloc_clone(); + grad_aux->setToZero(); + + // local recourse terms for each evaluator, defined accross all processors + // it is only necessary if a batch of recourse indices are sent at the same time + // therefore in run_local() no longer defined + // double rec_val = 0.; + // hiopVector* grad_acc = grad_r->alloc_clone(); + // grad_acc->setToZero(); + // double* grad_acc_vec = grad_acc->local_data(); + + // hess_appx_2 is declared by all ranks while only rank 0 uses it + HessianApprox* hess_appx_2 = new HessianApprox(nc_, alpha_ratio_, master_prob_, options_); + hess_appx_2->set_alpha_min(alpha_min_); + hess_appx_2->set_alpha_max(alpha_max_); + + if(ver_ >= hovSummary) { + hess_appx_2->set_verbosity(ver_); + } + + double base_val = 0.; // basecase objective value + double base_valm1 = 0.; // basecase objective value from the previous step + double recourse_val = 0.; // recourse objective value + double dinf = 0.; // step size + + double convg = 1e20; // convergence measure + double convg_g = 1e20; + double convg_f = 1e20; + int accp_count = 0; + + int end_signal = 0; + double t1 = 0; + double t2 = 0; + hiopInterfacePriDecProblem::RecourseApproxEvaluator* evaluator = + new hiopInterfacePriDecProblem::RecourseApproxEvaluator(nc_, + S_, + xc_idx_->local_data(), + options_->GetString("mem_space")); + double* x_vec = x_->local_data(); + + std::string options_file_master_prob; + // Outer loop starts + for(int it = 0; it < max_iter_; it++) { + if(my_rank_ == 0) { + t1 = MPI_Wtime(); + } + it_ = it; + // solve the basecase first + if(my_rank_ == 0 && it == 0) { // initial solve + // log_->printf(hovIteration, "my rank for solver %d\n", my_rank_); + + options_file_master_prob = options_->GetString("options_file_master_prob"); + solver_status_ = master_prob_->solve_master(*x_, false, 0, 0, 0, options_file_master_prob.c_str()); + + if(solver_status_) { + // to do, what if solve fails? } - // send basecase solutions to all ranks - int ierr = MPI_Bcast(x_vec, n_, MPI_DOUBLE, rank_master, comm_world_); - assert(ierr == MPI_SUCCESS); + log_->write(nullptr, *x_, hovFcnEval); + base_val = master_prob_->get_objective(); + base_valm1 = master_prob_->get_objective(); + } + + // send basecase solutions to all ranks + int ierr = MPI_Bcast(x_vec, n_, MPI_DOUBLE, rank_master, comm_world_); + assert(ierr == MPI_SUCCESS); + + // set up recourse problem send/recv interface + std::vector rec_prob; + for(int r = 0; r < comm_size_; r++) { + rec_prob.push_back(new ReqRecourseApprox(nc_)); + } + + std::vector req_cont_idx; + for(int r = 0; r < comm_size_; r++) { + req_cont_idx.push_back(new ReqContingencyIdx(0)); + } - // set up recourse problem send/recv interface - std::vector rec_prob; - for(int r=0; rsetToZero(); + + rval_main = 0.; + grad_r_main->setToZero(); + + // master rank communication + if(my_rank_ == 0) { + // array for number of indices, currently the indices are in [0,S_] + + std::vector cont_idx(S_); + for(int i = 0; i < static_cast(S_); i++) { + cont_idx[i] = i; + } + // The number of contigencies/recourse problems should be larger than the number of processors + assert(static_cast(S_) >= comm_size_ - 1); + // idx is the next contingency to be sent out from the master + int idx = 0; + // Initilize the recourse communication by sending indices to the evaluator + // Using Blocking send here + for(int r = 1; r < comm_size_; r++) { + int cur_idx = cont_idx[idx]; + int ierr = MPI_Send(&cur_idx, 1, MPI_INT, r, 1, comm_world_); + assert(MPI_SUCCESS == ierr); + // log_->printf(hovIteration, "rank %d to get contingency index %d\n", r, cur_idx); //verbosity level 10 + idx += 1; + } + // Posting initial receive of recourse solutions from evaluators + for(int r = 1; r < comm_size_; r++) { + // rec_prob[r]->post_recv(2,r,comm_world_);// 2 is the tag, r is the rank source + rec_prob[r]->post_recv_end_signal(2, r, comm_world_); // 2 is the tag, r is the rank source } - - std::vector req_cont_idx; - for(int r=0; rsetToZero(); + std::vector finish_flag(comm_size_); // standard vector will be replaced by hiopVectorInt + for(int i = 0; i < comm_size_; i++) { + finish_flag[i] = 0; } + int last_loop = 0; + // log_->printf(hovIteration, "total idx %d\n", S_); + t2 = MPI_Wtime(); + + log_->printf(hovFcnEval, "Elapsed time for entire iteration %d is %f\n", it, t2 - t1); + + while(idx <= static_cast(S_) || last_loop) { + for(int r = 1; r < comm_size_; r++) { + int mpi_test_flag = rec_prob[r]->test(); + if(mpi_test_flag && (finish_flag[r] == 0)) { // receive completed + if(!last_loop && idx < static_cast(S_)) { + log_->printf(hovLinesearch, "idx %d sent to rank %d\n", idx, r); + } else { + log_->printf(hovLinesearch, "last loop for rank %d\n", r); + } + // no need to add to the master rank variables + /* + rval += rec_prob[r]->value(); + for(int i=0;igrad(i); + } + */ - rval = 0.; - grad_r->setToZero(); - - rval_main = 0.; - grad_r_main->setToZero(); - - // master rank communication - if(my_rank_ == 0) { - // array for number of indices, currently the indices are in [0,S_] - - std::vector cont_idx(S_); - for(int i=0; i(S_); i++) { - cont_idx[i] = i; - } - // The number of contigencies/recourse problems should be larger than the number of processors - assert(static_cast(S_)>=comm_size_-1); - // idx is the next contingency to be sent out from the master - int idx = 0; - // Initilize the recourse communication by sending indices to the evaluator - // Using Blocking send here - for(int r=1; r< comm_size_;r++) { - int cur_idx = cont_idx[idx]; - int ierr = MPI_Send(&cur_idx, 1, MPI_INT, r, 1,comm_world_); - assert(MPI_SUCCESS == ierr); - // log_->printf(hovIteration, "rank %d to get contingency index %d\n", r, cur_idx); //verbosity level 10 - idx += 1; - } - // Posting initial receive of recourse solutions from evaluators - for(int r=1; rpost_recv(2,r,comm_world_);// 2 is the tag, r is the rank source - rec_prob[r]->post_recv_end_signal(2,r,comm_world_);// 2 is the tag, r is the rank source - } - // Both finish_flag and last_loop are used to deal with the final round remaining contingencies/recourse problems. - // Some ranks are finished while others are not. The loop needs to continue to fetch the results. - // hiopVectorInt* finish_flag = LinearAlgebraFactory::createVectorInt(comm_size_); - // finish_flag->setToZero(); - std::vector finish_flag(comm_size_); // standard vector will be replaced by hiopVectorInt - for(int i=0; iprintf(hovIteration, "total idx %d\n", S_); - t2 = MPI_Wtime(); - - log_->printf(hovFcnEval, "Elapsed time for entire iteration %d is %f\n",it, t2 - t1); - - while(idx<=static_cast(S_) || last_loop) { - for(int r=1; r< comm_size_; r++) { - int mpi_test_flag = rec_prob[r]->test(); - if(mpi_test_flag && (finish_flag[r]==0)) {// receive completed - if(!last_loop && idx(S_)) { - log_->printf(hovLinesearch, "idx %d sent to rank %d\n", idx,r); - } else { - log_->printf(hovLinesearch, "last loop for rank %d\n", r ); - } - - // no need to add to the master rank variables - /* - rval += rec_prob[r]->value(); - for(int i=0;igrad(i); - } - */ - - if(last_loop) { - finish_flag[r]=1; - } - // this is for dealing with the end of contingencies where some ranks have already finished - if(idx(S_)) { - req_cont_idx[r]->wait(); // Ensure previous send has completed. - req_cont_idx[r]->set_idx(cont_idx[idx]); - req_cont_idx[r]->post_send(1,r,comm_world_); - - rec_prob[r]->post_recv_end_signal(2,r,comm_world_);// 2 is the tag, r is the rank source - // rec_prob[r]->post_recv(2,r,comm_world_);// 2 is the tag, r is the rank source - // log_->printf(hovFcnEval, "recourse value: is %18.12e)\n", rec_prob[r]->value()); - } else { - finish_flag[r] = 1; - last_loop = 1; - } - idx += 1; - } + if(last_loop) { + finish_flag[r] = 1; + } + // this is for dealing with the end of contingencies where some ranks have already finished + if(idx < static_cast(S_)) { + req_cont_idx[r]->wait(); // Ensure previous send has completed. + req_cont_idx[r]->set_idx(cont_idx[idx]); + req_cont_idx[r]->post_send(1, r, comm_world_); + + rec_prob[r]->post_recv_end_signal(2, r, comm_world_); // 2 is the tag, r is the rank source + // rec_prob[r]->post_recv(2,r,comm_world_);// 2 is the tag, r is the rank source + // log_->printf(hovFcnEval, "recourse value: is %18.12e)\n", rec_prob[r]->value()); + } else { + finish_flag[r] = 1; + last_loop = 1; + } + idx += 1; } + } - // Current way of ending the loop while accounting for all the last round of results - if(last_loop) { - last_loop=0; - for(int r=1; r< comm_size_;r++) { - if(finish_flag[r]==0) { - last_loop=1; - } + // Current way of ending the loop while accounting for all the last round of results + if(last_loop) { + last_loop = 0; + for(int r = 1; r < comm_size_; r++) { + if(finish_flag[r] == 0) { + last_loop = 1; } } - } - //rval /= S_; - //grad_r->scale(1.0/S_); - // send end signal to all evaluators - for(int r=1; rwait(); // Ensure previous send has completed. - req_cont_idx[r]->set_idx(-1); - req_cont_idx[r]->post_send(1,r,comm_world_); + } + // rval /= S_; + // grad_r->scale(1.0/S_); + // send end signal to all evaluators + for(int r = 1; r < comm_size_; r++) { + req_cont_idx[r]->wait(); // Ensure previous send has completed. + req_cont_idx[r]->set_idx(-1); + req_cont_idx[r]->post_send(1, r, comm_world_); + } + t2 = MPI_Wtime(); + log_->printf(hovFcnEval, "Elapsed time for entire iteration %d is %f\n", it, t2 - t1); + } + + // evaluators + if(my_rank_ != 0) { + /* old sychronous implementation of contingencies + * int cpr = S_/(comm_size_-1); //contingency per rank + * int cr = S_%(comm_size_-1); //contingency remained + * log_->printf(hovIteration, "my rank start evaluating work %d)\n", my_rank_); + */ + std::vector cont_idx(1); // currently sending/receiving one contingency index at a time + int cont_i = 0; + cont_idx[0] = 0; + // Receive the index of the contingency to evaluate + int mpi_test_flag = 0; + int ierr = MPI_Recv(&cont_i, 1, MPI_INT, rank_master, 1, comm_world_, &status_); + assert(MPI_SUCCESS == ierr); + cont_idx[0] = cont_i; + // log_->printf(hovIteration, "contingency index %d, rank %d)\n", cont_idx[0],my_rank_); + // compute the recourse function values and gradients + + // accumulate locally so cannot set to zero + // rec_val = 0.; + // grad_acc->setToZero(); + + double aux = 0.; + + if(nc_ < n_) { + x0->copy_from_indexes(*x_, *xc_idx_); + } else { + assert(nc_ == n_); + x0->copyFromStarting(0, *x_); + } + for(int ri = 0; ri < static_cast(cont_idx.size()); ri++) { + aux = 0.; + int idx_temp = cont_idx[ri]; + + bret = master_prob_->eval_f_rterm(idx_temp, nc_, x0_vec, aux); // solving the recourse problem + if(!bret) { + // TODO } - t2 = MPI_Wtime(); - log_->printf(hovFcnEval, "Elapsed time for entire iteration %d is %f\n",it, t2 - t1); + // rec_val += aux; + rval += aux; } - //evaluators - if(my_rank_ != 0) { - /* old sychronous implementation of contingencies - * int cpr = S_/(comm_size_-1); //contingency per rank - * int cr = S_%(comm_size_-1); //contingency remained - * log_->printf(hovIteration, "my rank start evaluating work %d)\n", my_rank_); - */ - std::vector cont_idx(1); // currently sending/receiving one contingency index at a time - int cont_i = 0; - cont_idx[0] = 0; - // Receive the index of the contingency to evaluate - int mpi_test_flag = 0; - int ierr = MPI_Recv(&cont_i, 1, MPI_INT, rank_master, 1, comm_world_, &status_); - assert(MPI_SUCCESS == ierr); - cont_idx[0] = cont_i; - // log_->printf(hovIteration, "contingency index %d, rank %d)\n", cont_idx[0],my_rank_); - // compute the recourse function values and gradients - - // accumulate locally so cannot set to zero - //rec_val = 0.; - //grad_acc->setToZero(); - - double aux=0.; - - if(nc_copy_from_indexes(*x_, *xc_idx_); - } else { - assert(nc_==n_); - x0->copyFromStarting(0, *x_); + // log_->printf(hovFcnEval, "recourse value: is %18.12e)\n", rec_val); + grad_aux->setToZero(); + + for(int ri = 0; ri < static_cast(cont_idx.size()); ri++) { + int idx_temp = cont_idx[ri]; + bret = master_prob_->eval_grad_rterm(idx_temp, nc_, x0_vec, *grad_aux); + if(!bret) { + // TODO } - for(int ri=0; ri(cont_idx.size()); ri++) { - aux = 0.; - int idx_temp = cont_idx[ri]; + // grad_acc->axpy(1.0, *grad_aux); + grad_r->axpy(1.0, *grad_aux); + } - bret = master_prob_->eval_f_rterm(idx_temp, nc_, x0_vec, aux); // solving the recourse problem - if(!bret) { - //TODO + // no need to set values for rec_prob anymore + // rec_prob[my_rank_]->set_value(rec_val); + // rec_prob[my_rank_]->set_grad(grad_acc_vec); + + // rec_prob[my_rank_]->post_send(2, rank_master, comm_world_); + // send signal that subproblem has been solved + rec_prob[my_rank_]->wait(); // Ensure send buffer is safe to use. + rec_prob[my_rank_]->post_send_end_signal(2, rank_master, comm_world_); + + // request the next subproblem index + req_cont_idx[my_rank_]->post_recv(1, rank_master, comm_world_); + while(cont_idx[0] != -1) { // loop until end signal received + mpi_test_flag = req_cont_idx[my_rank_]->test(); + /* contigency starts at 0 + * sychronous implmentation of contingencist + */ + if(mpi_test_flag) { + // log_->printf(hovIteration, "contingency index %d, rank %d)\n", cont_idx[0],my_rank_); + for(int ri = 0; ri < static_cast(cont_idx.size()); ri++) { + cont_idx[ri] = req_cont_idx[my_rank_]->value(); + } + if(cont_idx[0] == -1) { + break; } - //rec_val += aux; - rval += aux; - } - // log_->printf(hovFcnEval, "recourse value: is %18.12e)\n", rec_val); - grad_aux->setToZero(); + // accumulate locally so cannot set to zero + // rec_val = 0.; + // grad_acc->setToZero(); - for(int ri=0; ri(cont_idx.size()); ri++) { - int idx_temp = cont_idx[ri]; - bret = master_prob_->eval_grad_rterm(idx_temp, nc_, x0_vec, *grad_aux); - if(!bret) { - //TODO + double aux = 0.; + if(nc_ < n_) { + x0->copy_from_indexes(*x_, *xc_idx_); + } else { + assert(nc_ == n_); + x0->copyFromStarting(0, *x_); } - //grad_acc->axpy(1.0, *grad_aux); - grad_r->axpy(1.0, *grad_aux); - } + for(int ri = 0; ri < static_cast(cont_idx.size()); ri++) { + aux = 0.; + int idx_temp = cont_idx[ri]; - // no need to set values for rec_prob anymore - //rec_prob[my_rank_]->set_value(rec_val); - //rec_prob[my_rank_]->set_grad(grad_acc_vec); - - //rec_prob[my_rank_]->post_send(2, rank_master, comm_world_); - // send signal that subproblem has been solved - rec_prob[my_rank_]->wait(); // Ensure send buffer is safe to use. - rec_prob[my_rank_]->post_send_end_signal(2, rank_master, comm_world_); - - // request the next subproblem index - req_cont_idx[my_rank_]->post_recv(1, rank_master, comm_world_); - while(cont_idx[0]!=-1) {//loop until end signal received - mpi_test_flag = req_cont_idx[my_rank_]->test(); - /* contigency starts at 0 - * sychronous implmentation of contingencist - */ - if(mpi_test_flag) { - // log_->printf(hovIteration, "contingency index %d, rank %d)\n", cont_idx[0],my_rank_); - for(int ri=0; ri(cont_idx.size()); ri++) { - cont_idx[ri] = req_cont_idx[my_rank_]->value(); - } - if(cont_idx[0]==-1) { - break; - } - - // accumulate locally so cannot set to zero - //rec_val = 0.; - //grad_acc->setToZero(); - - double aux=0.; - if(nc_copy_from_indexes(*x_, *xc_idx_); - } else { - assert(nc_==n_); - x0->copyFromStarting(0, *x_); + bret = master_prob_->eval_f_rterm(idx_temp, nc_, x0_vec, aux); // need to add extra time here + if(!bret) { + // TODO } - for(int ri=0; ri(cont_idx.size()); ri++) { - aux = 0.; - int idx_temp = cont_idx[ri]; - - bret = master_prob_->eval_f_rterm(idx_temp, nc_, x0_vec, aux); //need to add extra time here - if(!bret) { - //TODO - } - //rec_val += aux; - rval += aux; - } - // log_->printf(hovFcnEval, "recourse value: is %18.12e)\n", rec_val); - grad_aux->setToZero(); - - for(int ri=0; ri(cont_idx.size()); ri++) { - int idx_temp = cont_idx[ri]; - bret = master_prob_->eval_grad_rterm(idx_temp, nc_, x0_vec, *grad_aux); - if(!bret) { - //TODO - } - //grad_acc->axpy(1.0, *grad_aux); - grad_r->axpy(1.0, *grad_aux); + // rec_val += aux; + rval += aux; + } + // log_->printf(hovFcnEval, "recourse value: is %18.12e)\n", rec_val); + grad_aux->setToZero(); + + for(int ri = 0; ri < static_cast(cont_idx.size()); ri++) { + int idx_temp = cont_idx[ri]; + bret = master_prob_->eval_grad_rterm(idx_temp, nc_, x0_vec, *grad_aux); + if(!bret) { + // TODO } + // grad_acc->axpy(1.0, *grad_aux); + grad_r->axpy(1.0, *grad_aux); + } - //rec_prob[my_rank_]->set_value(rec_val); - //rec_prob[my_rank_]->set_grad(grad_acc_vec); - - // send signal that the subproblem has been solved - rec_prob[my_rank_]->wait(); // Ensure send buffer is safe to use. - rec_prob[my_rank_]->post_send_end_signal(2, rank_master, comm_world_); - //rec_prob[my_rank_]->post_send(2, rank_master, comm_world_); - // log_->printf(hovIteration, "send recourse value flag for test %d \n", mpi_test_flag); - - // post recv for new index - req_cont_idx[my_rank_]->post_recv(1, rank_master, comm_world_); - // ierr = MPI_Irecv(&cont_idx[0], 1, MPI_INT, rank_master, 1, comm_world_, &request_[0]); + // rec_prob[my_rank_]->set_value(rec_val); + // rec_prob[my_rank_]->set_grad(grad_acc_vec); - } + // send signal that the subproblem has been solved + rec_prob[my_rank_]->wait(); // Ensure send buffer is safe to use. + rec_prob[my_rank_]->post_send_end_signal(2, rank_master, comm_world_); + // rec_prob[my_rank_]->post_send(2, rank_master, comm_world_); + // log_->printf(hovIteration, "send recourse value flag for test %d \n", mpi_test_flag); + + // post recv for new index + req_cont_idx[my_rank_]->post_recv(1, rank_master, comm_world_); + // ierr = MPI_Irecv(&cont_idx[0], 1, MPI_INT, rank_master, 1, comm_world_, &request_[0]); } - //rval = rec_val; - //grad_r->copyFrom(grad_acc_vec); } + // rval = rec_val; + // grad_r->copyFrom(grad_acc_vec); + } - if(my_rank_==0) { - assert(rval == 0); - for(int i=0; i(nc_); i++ ) { - assert(grad_r_vec[i] == 0.); - } - for(int r=1; rwait(); - req_cont_idx[r]->wait(); - } + if(my_rank_ == 0) { + assert(rval == 0); + for(int i = 0; i < static_cast(nc_); i++) { + assert(grad_r_vec[i] == 0.); + } + for(int r = 1; r < comm_size_; r++) { + rec_prob[r]->wait(); + req_cont_idx[r]->wait(); + } #ifndef NDEBUG - // Ensure we've completed all NB operations. - for(auto curr : rec_prob) { - assert(curr->request_ == MPI_REQUEST_NULL); - } - for(auto curr : req_cont_idx) { - assert(curr->request_ == MPI_REQUEST_NULL); - } -#endif // NDEBUG + // Ensure we've completed all NB operations. + for(auto curr: rec_prob) { + assert(curr->request_ == MPI_REQUEST_NULL); + } + for(auto curr: req_cont_idx) { + assert(curr->request_ == MPI_REQUEST_NULL); } +#endif // NDEBUG + } - //std::cout<<"my rank "<copyFrom(grad_r_main_vec); - - rval /= S_; - grad_r->scale(1.0/S_); - - log_->printf(hovSummary, "real rval %18.12e\n", rval); - - hess_appx->setToConstant(1.0); - - if(nc_copy_from_indexes(*x_, *xc_idx_); - } else { - assert(nc_==n_); - x0->copyFromStarting(0, *x_); - } + // std::cout<<"my rank "<initialize(rval, *x0, *grad_r); - double alp_temp = hess_appx_2->get_alpha_f(*grad_r); - // double alp_temp = hess_appx_2->get_alpha_tr(); // alternative update rule for alpha - log_->printf(hovSummary, "alpd %18.12e\n", alp_temp); - - hess_appx->setToConstant(alp_temp); - } else { - hess_appx_2->update_hess_coeff(*x0, *grad_r, rval); - //update basecase objective, this requires updated skm1 and ykm1 - base_valm1 = base_val; - base_val = hess_appx_2->compute_base(master_prob_->get_objective()); - - //hess_appx_2->update_ratio(); - hess_appx_2->update_ratio(base_val, base_valm1); - - double alp_temp = hess_appx_2->get_alpha_f(*grad_r); - //double alp_temp = hess_appx_2->get_alpha_tr(); - - //double alp_temp2 = hess_appx_2->get_alpha_BB(); - log_->printf(hovSummary, "alpd %18.12e\n", alp_temp); - // log_->printf(hovSummary, "alpd BB %18.12e\n", alp_temp2); - convg_g = hess_appx_2->check_convergence_grad(*grad_r); - log_->printf(hovSummary,"gradient convergence measure %18.12e\n", convg_g); - convg_f = hess_appx_2->check_convergence_fcn(base_val, base_valm1); - log_->printf(hovSummary,"function val convergence measure %18.12e\n", convg_f); - convg = std::min(convg_f,convg_g); - hess_appx->setToConstant(alp_temp); + if(my_rank_ == 0) { + // std::cout<<"real rval %18.12e\n "<< rval_main<copyFrom(grad_r_main_vec); - } + rval /= S_; + grad_r->scale(1.0 / S_); - // wait for the sending/receiving to finish - // for debugging purpose print out the recourse gradient - log_->write(nullptr, *grad_r, hovFcnEval); - - if(it>0) { - log_->printf(hovSummary, "iteration objective residual " - "step_size convg\n"); - - log_->printf(hovSummary, "%d %18.12e %18.12e %18.12e " - "%18.12e\n", it, base_val+recourse_val, convg_f, dinf, convg_g); - - fflush(stdout); - } + log_->printf(hovSummary, "real rval %18.12e\n", rval); - assert(evaluator->get_rgrad()!=NULL);// evaluator should be defined - evaluator->set_rval(rval); - evaluator->set_rgrad(nc_,*grad_r); - evaluator->set_rhess(nc_,*hess_appx); - evaluator->set_x0(nc_,*x0); + hess_appx->setToConstant(1.0); - bret = master_prob_->set_recourse_approx_evaluator(nc_, evaluator); - if(!bret) { - //TODO - } - - options_file_master_prob = options_->GetString("options_file_master_prob"); - - // log_->printf(hovIteration, "solving full problem starts, iteration %d \n", it); - solver_status_ = master_prob_->solve_master(*x_, true, 0, 0, 0, options_file_master_prob.c_str()); - - log_->printf(hovSummary, "solved full problem with objective %18.12e\n", master_prob_->get_objective()); - - log_->write(nullptr, *x_, hovFcnEval); - - t2 = MPI_Wtime(); - log_->printf(hovFcnEval, "Elapsed time for entire iteration %d is %f\n",it, t2 - t1); - - dinf = step_size_inf(nc_, *xc_idx_, *x_, *x0); - + if(nc_ < n_) { + x0->copy_from_indexes(*x_, *xc_idx_); } else { - // evaluator ranks do nothing + assert(nc_ == n_); + x0->copyFromStarting(0, *x_); } - if(convg <= accp_tol_) { - accp_count += 1; + + if(it == 0) { + hess_appx_2->initialize(rval, *x0, *grad_r); + double alp_temp = hess_appx_2->get_alpha_f(*grad_r); + // double alp_temp = hess_appx_2->get_alpha_tr(); // alternative update rule for alpha + log_->printf(hovSummary, "alpd %18.12e\n", alp_temp); + + hess_appx->setToConstant(alp_temp); } else { - accp_count = 0; + hess_appx_2->update_hess_coeff(*x0, *grad_r, rval); + // update basecase objective, this requires updated skm1 and ykm1 + base_valm1 = base_val; + base_val = hess_appx_2->compute_base(master_prob_->get_objective()); + + // hess_appx_2->update_ratio(); + hess_appx_2->update_ratio(base_val, base_valm1); + + double alp_temp = hess_appx_2->get_alpha_f(*grad_r); + // double alp_temp = hess_appx_2->get_alpha_tr(); + + // double alp_temp2 = hess_appx_2->get_alpha_BB(); + log_->printf(hovSummary, "alpd %18.12e\n", alp_temp); + // log_->printf(hovSummary, "alpd BB %18.12e\n", alp_temp2); + convg_g = hess_appx_2->check_convergence_grad(*grad_r); + log_->printf(hovSummary, "gradient convergence measure %18.12e\n", convg_g); + convg_f = hess_appx_2->check_convergence_fcn(base_val, base_valm1); + log_->printf(hovSummary, "function val convergence measure %18.12e\n", convg_f); + convg = std::min(convg_f, convg_g); + hess_appx->setToConstant(alp_temp); } - if(stopping_criteria(it, convg, accp_count)) { - end_signal = 1; - } - ierr = MPI_Bcast(&end_signal, 1, MPI_INT, rank_master, comm_world_); - assert(ierr == MPI_SUCCESS); - - for(auto it : rec_prob) { - delete it; + // wait for the sending/receiving to finish + // for debugging purpose print out the recourse gradient + log_->write(nullptr, *grad_r, hovFcnEval); + + if(it > 0) { + log_->printf(hovSummary, + "iteration objective residual " + "step_size convg\n"); + + log_->printf(hovSummary, + "%d %18.12e %18.12e %18.12e " + "%18.12e\n", + it, + base_val + recourse_val, + convg_f, + dinf, + convg_g); + + fflush(stdout); } - for(auto it : req_cont_idx) { - delete it; - } - - if(end_signal) { - break; + assert(evaluator->get_rgrad() != NULL); // evaluator should be defined + evaluator->set_rval(rval); + evaluator->set_rgrad(nc_, *grad_r); + evaluator->set_rhess(nc_, *hess_appx); + evaluator->set_x0(nc_, *x0); + + bret = master_prob_->set_recourse_approx_evaluator(nc_, evaluator); + if(!bret) { + // TODO } - } - delete grad_r; - delete grad_r_main; - delete hess_appx; - delete x0; - delete grad_aux; - //delete grad_acc; - delete hess_appx_2; - delete evaluator; - - if(my_rank_==0) { - return solver_status_; + options_file_master_prob = options_->GetString("options_file_master_prob"); + + // log_->printf(hovIteration, "solving full problem starts, iteration %d \n", it); + solver_status_ = master_prob_->solve_master(*x_, true, 0, 0, 0, options_file_master_prob.c_str()); + + log_->printf(hovSummary, "solved full problem with objective %18.12e\n", master_prob_->get_objective()); + + log_->write(nullptr, *x_, hovFcnEval); + + t2 = MPI_Wtime(); + log_->printf(hovFcnEval, "Elapsed time for entire iteration %d is %f\n", it, t2 - t1); + + dinf = step_size_inf(nc_, *xc_idx_, *x_, *x0); + } else { - return Solve_Success; + // evaluator ranks do nothing } + if(convg <= accp_tol_) { + accp_count += 1; + } else { + accp_count = 0; + } + + if(stopping_criteria(it, convg, accp_count)) { + end_signal = 1; + } + ierr = MPI_Bcast(&end_signal, 1, MPI_INT, rank_master, comm_world_); + assert(ierr == MPI_SUCCESS); + + for(auto it: rec_prob) { + delete it; + } + + for(auto it: req_cont_idx) { + delete it; + } + + if(end_signal) { + break; + } + } + + delete grad_r; + delete grad_r_main; + delete hess_appx; + delete x0; + delete grad_aux; + // delete grad_acc; + delete hess_appx_2; + delete evaluator; + + if(my_rank_ == 0) { + return solver_status_; + } else { + return Solve_Success; } +} #else hiopSolveStatus hiopAlgPrimalDecomposition::run() @@ -1792,13 +1754,10 @@ hiopSolveStatus hiopAlgPrimalDecomposition::run() if(options_->GetString("print_options") != "no") { log_->write(nullptr, *options_, hovSummary); } - return run_single(); // call the serial solver + return run_single(); // call the serial solver } #endif - - - /* Solve problem in serial with only one rank */ hiopSolveStatus hiopAlgPrimalDecomposition::run_single() @@ -1806,17 +1765,17 @@ hiopSolveStatus hiopAlgPrimalDecomposition::run_single() printf("total number of recourse problems %lu\n", S_); // initial point for now set to all zero x_->setToZero(); - + bool bret; // Define the values and gradients as needed in the master rank double rval = 0.; // double grad_r[nc_]; hiopVector* grad_r; - grad_r = LinearAlgebraFactory::create_vector(options_->GetString("mem_space"), nc_) ; - + grad_r = LinearAlgebraFactory::create_vector(options_->GetString("mem_space"), nc_); + hiopVector* hess_appx; hess_appx = grad_r->alloc_clone(); - + hiopVector* x0 = grad_r->alloc_clone(); double* x0_vec = x0->local_data(); @@ -1826,14 +1785,17 @@ hiopSolveStatus hiopAlgPrimalDecomposition::run_single() HessianApprox* hess_appx_2 = new HessianApprox(nc_, alpha_ratio_, master_prob_, options_); hess_appx_2->set_alpha_min(alpha_min_); hess_appx_2->set_alpha_max(alpha_max_); - + hiopInterfacePriDecProblem::RecourseApproxEvaluator* evaluator = - new hiopInterfacePriDecProblem::RecourseApproxEvaluator(nc_, S_, xc_idx_->local_data(), options_->GetString("mem_space")); + new hiopInterfacePriDecProblem::RecourseApproxEvaluator(nc_, + S_, + xc_idx_->local_data(), + options_->GetString("mem_space")); - double base_val = 0.; // basecase objective value - double base_valm1 = 0.; // basecase objective value from previous iteration + double base_val = 0.; // basecase objective value + double base_valm1 = 0.; // basecase objective value from previous iteration double recourse_val = 0.; // recourse objective value - double dinf = 0.; // step size + double dinf = 0.; // step size double convg = 1e20; double convg_f = 1e20; double convg_g = 1e20; @@ -1842,16 +1804,15 @@ hiopSolveStatus hiopAlgPrimalDecomposition::run_single() std::string options_file_master_prob; // Outer loop starts - for(int it=0; itprintf(hovIteration, "iteration %d\n", it); + for(int it = 0; it < max_iter_; it++) { + // log_->printf(hovIteration, "iteration %d\n", it); // solve the basecase it_ = it; - if(it==0) { + if(it == 0) { options_file_master_prob = options_->GetString("options_file_master_prob"); - // solve master problem basecase (solver rank supposed to do it) + // solve master problem basecase (solver rank supposed to do it) solver_status_ = master_prob_->solve_master(*x_, false, 0, 0, 0, options_file_master_prob.c_str()); - if(solver_status_) { - + if(solver_status_) { } base_val = master_prob_->get_objective(); base_valm1 = base_val; @@ -1862,78 +1823,78 @@ hiopSolveStatus hiopAlgPrimalDecomposition::run_single() grad_r->setToZero(); std::vector cont_idx(S_); - for(int i=0; i(S_); i++) { + for(int i = 0; i < static_cast(S_); i++) { cont_idx[i] = i; } // The number of contigencies should be larger than the number of processors, which is 1 // idx is the next contingency to be sent out from the master - if(nc_copy_from_indexes(*x_, *xc_idx_); } else { - assert(nc_==n_); + assert(nc_ == n_); x0->copyFromStarting(0, *x_); } - for(int i=0; i< static_cast(S_);i++) { + for(int i = 0; i < static_cast(S_); i++) { int idx_temp = cont_idx[i]; - double aux=0.; - bret = master_prob_->eval_f_rterm(idx_temp, nc_, x0_vec, aux); //need to add extra time here + double aux = 0.; + bret = master_prob_->eval_f_rterm(idx_temp, nc_, x0_vec, aux); // need to add extra time here if(!bret) { - //TODO + // TODO } rval += aux; // assert("for debugging" && false); //for debugging purpose // TODO: allocate this outside the loop hiopVector* grad_aux = grad_r->alloc_clone(); - grad_aux->setToZero(); + grad_aux->setToZero(); // double grad_aux[nc_]; bret = master_prob_->eval_grad_rterm(idx_temp, nc_, x0_vec, *grad_aux); if(!bret) { - //TODO + // TODO } - grad_r->axpy(1.0,*grad_aux); + grad_r->axpy(1.0, *grad_aux); delete grad_aux; - } + } rval /= S_; - grad_r->scale(1.0/S_); + grad_r->scale(1.0 / S_); log_->printf(hovSummary, "real rval %18.12e\n", rval); - + recourse_val = rval; hess_appx->setToConstant(1e6); - - if(it==0) { + + if(it == 0) { hess_appx_2->initialize(rval, *x0, *grad_r); double alp_temp = hess_appx_2->get_alpha_f(*grad_r); // double alp_temp = hess_appx_2->get_alpha_tr(); // alternative update rule for alpha log_->printf(hovSummary, "alpd %18.12e\n", alp_temp); - + hess_appx->setToConstant(alp_temp); } else { hess_appx_2->update_hess_coeff(*x0, *grad_r, rval); - + base_valm1 = base_val; base_val = hess_appx_2->compute_base(master_prob_->get_objective()); - + // hess_appx_2->update_ratio(); hess_appx_2->update_ratio(base_val, base_valm1); - + double alp_temp = hess_appx_2->get_alpha_f(*grad_r); // double alp_temp = hess_appx_2->get_alpha_tr(); log_->printf(hovSummary, "alpd %18.12e\n", alp_temp); - + convg_g = hess_appx_2->check_convergence_grad(*grad_r); log_->printf(hovSummary, "convergence measure %18.12e\n", convg_g); - + convg_f = hess_appx_2->check_convergence_fcn(base_val, base_valm1); - + log_->printf(hovSummary, "function val convergence measure %18.12e\n", convg_f); - - convg = std::min(convg_f,convg_g); + + convg = std::min(convg_f, convg_g); hess_appx->setToConstant(alp_temp); } @@ -1942,41 +1903,47 @@ hiopSolveStatus hiopAlgPrimalDecomposition::run_single() // nc_ is the demesnion of coupled x - if(it>0) { - log_->printf(hovSummary, "iteration objective residual " - "step_size convg\n"); - log_->printf(hovSummary, "%d %18.12e %18.12e %18.12e " - "%18.12e\n", it, base_val+recourse_val, convg_f, dinf, convg_g); + if(it > 0) { + log_->printf(hovSummary, + "iteration objective residual " + "step_size convg\n"); + log_->printf(hovSummary, + "%d %18.12e %18.12e %18.12e " + "%18.12e\n", + it, + base_val + recourse_val, + convg_f, + dinf, + convg_g); fflush(stdout); } - assert(evaluator->get_rgrad()!=NULL);// should be defined + assert(evaluator->get_rgrad() != NULL); // should be defined evaluator->set_rval(rval); - evaluator->set_rgrad(nc_,*grad_r); - evaluator->set_rhess(nc_,*hess_appx); - evaluator->set_x0(nc_,*x0); - + evaluator->set_rgrad(nc_, *grad_r); + evaluator->set_rhess(nc_, *hess_appx); + evaluator->set_x0(nc_, *x0); bret = master_prob_->set_recourse_approx_evaluator(nc_, evaluator); if(!bret) { - //TODO + // TODO } options_file_master_prob = options_->GetString("options_file_master_prob"); - log_->printf(hovSummary, "solving full problem starts, iteration %d \n", it); + log_->printf(hovSummary, "solving full problem starts, iteration %d \n", it); solver_status_ = master_prob_->solve_master(*x_, true, 0, 0, 0, options_file_master_prob.c_str()); - - dinf = step_size_inf(nc_, *xc_idx_, *x_, *x0); + + dinf = step_size_inf(nc_, *xc_idx_, *x_, *x0); // print solution x at the end of a full solve log_->write(nullptr, *x_, hovFcnEval); - + // assert("for debugging" && false); //for debugging purpose if(convg <= accp_tol_) { accp_count += 1; } else { accp_count = 0; } - log_->printf(hovIteration, "count %d \n", accp_count); + log_->printf(hovIteration, "count %d \n", accp_count); if(stopping_criteria(it, convg, accp_count)) { break; } @@ -1987,7 +1954,7 @@ hiopSolveStatus hiopAlgPrimalDecomposition::run_single() delete x0; delete hess_appx_2; delete evaluator; - return Solve_Success; + return Solve_Success; } -}//end namespace +} // namespace hiop diff --git a/src/Optimization/hiopAlgPrimalDecomp.hpp b/src/Optimization/hiopAlgPrimalDecomp.hpp index fee5bcd13..a0b6b407a 100644 --- a/src/Optimization/hiopAlgPrimalDecomp.hpp +++ b/src/Optimization/hiopAlgPrimalDecomp.hpp @@ -70,28 +70,28 @@ namespace hiop { class hiopLogger; - + // temporary output levels, aiming to integrate with hiop verbosity -enum MPIout { - outlevel0=0, //print nothing during from the MPI engine - outlevel1=1, //print standard output: objective, step size - outlevel2=2, //print the details needed to debug the algorithm, including alpha info, - //also prints elapsed time and output x and gradient - outlevel3=3, //print the send and receive messages - outlevel4=4 //print details about the algorithm -}; - -/** This class defines the main serial/MPI solver for solving a class of problems with primal decomposition. +enum MPIout +{ + outlevel0 = 0, // print nothing during from the MPI engine + outlevel1 = 1, // print standard output: objective, step size + outlevel2 = 2, // print the details needed to debug the algorithm, including alpha info, + // also prints elapsed time and output x and gradient + outlevel3 = 3, // print the send and receive messages + outlevel4 = 4 // print details about the algorithm +}; + +/** This class defines the main serial/MPI solver for solving a class of problems with primal decomposition. * The master problem is the user defined class that should be able to solve both - * the basecase and full problem depending whether a recourse approximation is included. + * the basecase and full problem depending whether a recourse approximation is included. * Available options to be set in hiop_pridec.options file: - * mem_space, alpha_max, alpha_min, tolerance, acceptable_tolerance, acceptable_iterations, + * mem_space, alpha_max, alpha_min, tolerance, acceptable_tolerance, acceptable_iterations, * max_iter, verbosity_level, print_options. */ class hiopAlgPrimalDecomposition { public: - /** * Creates a primal decomposition algorithm for the primal decomposable problem * passed as an argument @@ -100,8 +100,7 @@ class hiopAlgPrimalDecomposition * @param comm_world the communicator whose ranks should be used to schedule the tasks * (subproblems of the primal decomposable problem prob_in) */ - hiopAlgPrimalDecomposition(hiopInterfacePriDecProblem* prob_in, - MPI_Comm comm_world = MPI_COMM_WORLD); + hiopAlgPrimalDecomposition(hiopInterfacePriDecProblem* prob_in, MPI_Comm comm_world = MPI_COMM_WORLD); /** * Creates a primal decomposition algorithm for the primal decomposable problem @@ -109,14 +108,14 @@ class hiopAlgPrimalDecomposition * * @param prob_in the primal decomposable problem * @param nc the number of coupling variables - * @param xc_index array with the the indexes of the coupling variables - * in the full vector (primal) variables for the basecase/master problem within the primal + * @param xc_index array with the the indexes of the coupling variables + * in the full vector (primal) variables for the basecase/master problem within the primal * decomposable problem prob_in. * @param comm_world the communicator whose ranks should be used to schedule the tasks * (subproblems of the primal decomposable problem prob_in) */ hiopAlgPrimalDecomposition(hiopInterfacePriDecProblem* prob_in, - const int nc, + const int nc, const int* xc_index, MPI_Comm comm_world = MPI_COMM_WORLD); @@ -126,22 +125,22 @@ class hiopAlgPrimalDecomposition hiopSolveStatus run(); /** Main function to run the optimization in parallel with local accumulation of recourse function (and subgradient) */ hiopSolveStatus run_local(); - + /** Main function to run the optimization in serial */ hiopSolveStatus run_single(); /** returns the objective of the master problem */ double getObjective() const; - + /** returns the solution to the master problem */ void getSolution(hiopVector& x) const; - - /** not implemented */ + + /** not implemented */ void getDualSolutions(double* zl, double* zu, double* lambda); - + /** returns the status of the solver */ inline hiopSolveStatus getSolveStatus() const; - + /** returns the number of iterations, meaning how many times the master was solved */ int getNumIterations() const; @@ -151,7 +150,7 @@ class hiopAlgPrimalDecomposition * 2. maximum number of iterations have been reached * 3. the number of iterations that reached accpetable tolerance consecutively has reached accpetable account * - * @param it the number of iterations + * @param it the number of iterations * @param convg current error value, equivalent to the predicted decrease by the (approximation) objective model * @param accp_count number of consecutive iterations that have reached acceptable tolerance (smaller than tol_) */ @@ -162,60 +161,61 @@ class hiopAlgPrimalDecomposition /** set up the initial alpha update ratio, a parameter of the algorithm */ void set_initial_alpha_ratio(const double ratio); - - /** set up the lower bound of the quadratic coefficient alpha */ - void set_alpha_min(const double alp_min); - - /** set up the upper bound of the quadratic coefficient alpha */ + + /** set up the lower bound of the quadratic coefficient alpha */ + void set_alpha_min(const double alp_min); + + /** set up the upper bound of the quadratic coefficient alpha */ void set_alpha_max(const double alp_max); - - /** set the maximum number of iterations allowed before termination */ - void set_max_iteration(const int max_it); - - /** set the error tolerance */ + + /** set the maximum number of iterations allowed before termination */ + void set_max_iteration(const int max_it); + + /** set the error tolerance */ void set_tolerance(const double tol); - - /** set the acceptable tolerance, smaller than error tolerance */ + + /** set the acceptable tolerance, smaller than error tolerance */ void set_acceptable_tolerance(const double tol); - - /** set the number of iterations that consecutively reach acceptable tolerance before convergence is considered obtained */ + + /** set the number of iterations that consecutively reach acceptable tolerance before convergence is considered obtained */ void set_acceptable_count(const int count); - - /** compute the step size of the coupled variable, nc components of x - * default is two-norm, not inf norm. Naming due to older version. - */ + + /** compute the step size of the coupled variable, nc components of x + * default is two-norm, not inf norm. Naming due to older version. + */ double step_size_inf(const int nc, const hiopVectorInt& idx, const hiopVector& x, const hiopVector& x0); - - /** set the variable local_accum_ */ - void set_local_accum(const std::string local_accum); + /** set the variable local_accum_ */ + void set_local_accum(const std::string local_accum); - /** Contains information of a previous solution step including function value + /** Contains information of a previous solution step including function value * and gradient. Used for storing the solution for the previous iteration * This struct is intended for internal use of hiopAlgPrimalDecomposition class only. * Further, it is not implemented yet in the current version of the algorithm. */ - struct Prevsol{ + struct Prevsol + { Prevsol(const int n, const double f, const double* grad, const double* x) { n_ = n; f_ = f; grad_ = new double[n]; - memcpy(grad_, grad, n_*sizeof(double)); + memcpy(grad_, grad, n_ * sizeof(double)); x_ = new double[n]; - memcpy(x_, x, n_*sizeof(double)); + memcpy(x_, x, n_ * sizeof(double)); } void update(const double f, const double* grad, const double* x) { - assert(grad!=NULL); - memcpy(grad_, grad, n_*sizeof(double)); - memcpy(x_, x, n_*sizeof(double)); + assert(grad != NULL); + memcpy(grad_, grad, n_ * sizeof(double)); + memcpy(x_, x, n_ * sizeof(double)); f_ = f; } - double get_f(){return f_;} - double* get_grad(){return grad_;} - double* get_x(){return x_;} + double get_f() { return f_; } + double* get_grad() { return grad_; } + double* get_x() { return x_; } + private: int n_; double f_; @@ -224,61 +224,60 @@ class hiopAlgPrimalDecomposition }; /** Struct to update the quadratic coefficient alpha in the recourse approximation - * function. It contains quantities such as s_{k-1} = x_k-x_{k-1} that is + * function. It contains quantities such as s_{k-1} = x_k-x_{k-1} that is * otherwise not computed but useful for certian update rules for alpha, * as well as the convergence measure. The update function is called * every iteration to ensure the values are up to date. * The x_k here should only be the coupled x. * This struct is intened for internal use of hiopAlgPrimalDecomposition class only. - * It is emphasized that it is used for the coupled variable between basecase and recourse problems. + * It is emphasized that it is used for the coupled variable between basecase and recourse problems. */ - struct HessianApprox { + struct HessianApprox + { /** Constructor * @param priDecProb the primal decomposable problem - * @param options_pridec options for pridec solver + * @param options_pridec options for pridec solver * @param comm_world the communicator whose ranks should be used to schedule the tasks * (subproblems of the primal decomposable problem priDecProb) */ - HessianApprox(hiopInterfacePriDecProblem* priDecProb, - hiopOptions* options_pridec, - MPI_Comm comm_world=MPI_COMM_WORLD); - HessianApprox(const int& n, - hiopInterfacePriDecProblem* priDecProb, + HessianApprox(hiopInterfacePriDecProblem* priDecProb, hiopOptions* options_pridec, MPI_Comm comm_world = MPI_COMM_WORLD); + HessianApprox(const int& n, + hiopInterfacePriDecProblem* priDecProb, hiopOptions* options_pridec, - MPI_Comm comm_world=MPI_COMM_WORLD); - + MPI_Comm comm_world = MPI_COMM_WORLD); + /** ratio is used to compute alpha in alpha_f */ HessianApprox(const int& n, const double ratio, hiopInterfacePriDecProblem* priDecProb, hiopOptions* options_pridec, - MPI_Comm comm_world=MPI_COMM_WORLD); + MPI_Comm comm_world = MPI_COMM_WORLD); ~HessianApprox(); /** n_ is the dimension of COUPLED x, hence the dimension of g_k, skm1, etc. - * Not to be confused with the full length of the basecase or recourse optimization variable. + * Not to be confused with the full length of the basecase or recourse optimization variable. * It is often hiopAlgPrimalDecomposition->nc_, NOT hiopAlgPrimalDecomposition->n_. */ void set_n(const int n); void set_xkm1(const hiopVector& xk); - + void set_gkm1(const hiopVector& grad); void initialize(const double f_val, const hiopVector& xk, const hiopVector& grad); - + /* updating variables for the current iteration */ void update_hess_coeff(const hiopVector& xk, const hiopVector& gk, const double& f_val); - - /** updating ratio_ used to compute alpha + + /** updating ratio_ used to compute alpha * Using trust-region notations, * rhok = (f_{k-1}-f_k)/(m(0)-m(p_k)), where m(p)=f_{k-1}+g_{k-1}^Tp+0.5 alpha_{k-1} p^Tp. * Therefore, m(0) = f_{k-1}. rhok is the ratio between real change in recourse function value * and the estimate change. Trust-region algorithms use a set heuristics to update alpha_k * based on rhok. * rk: m(p_k) - * The ratio of quadratic objective and linear objective is also used in the heuristics. + * The ratio of quadratic objective and linear objective is also used in the heuristics. * User can provide a global maximum and minimum for alpha through options_pridec. * This function does not use basecase objective in this update. OUTDATED. */ @@ -290,28 +289,30 @@ class hiopAlgPrimalDecomposition * @param rhok trust-region ratio on the true and predicted objective of recourse only * @param rkm1 recourse objective iteration k-1 * @param rk recourse objective iteration k - * @param alpha_g_ratio the ratio of quadratic part of the recourse objective versus the linear part of the recourse objective. - * Used as a supplement to rhok to determine how alpha_ratio is updated. Again heuristics. + * @param alpha_g_ratio the ratio of quadratic part of the recourse objective versus the linear part of the recourse + * objective. Used as a supplement to rhok to determine how alpha_ratio is updated. Again heuristics. * @param alpha_ratio the value to be updated */ - void update_ratio_tr(const double rhok, const double rkm1, const double rk, - const double alpha_g_ratio, double& alpha_ratio); + void update_ratio_tr(const double rhok, + const double rkm1, + const double rk, + const double alpha_g_ratio, + double& alpha_ratio); /** Updating ratio_ using both basecase and recourse objective function - * @param base_v basecase objective value at iteration k + * @param base_v basecase objective value at iteration k * @param base_vm1 basecase objective at iteration k-1 */ void update_ratio(const double base_v, const double base_vm1); /** Classic trust region update rule for alpha_ratio/ratio_ - * Only rhok is needed. Much simplier heuristics than the other update_ratio_tr(...) function. + * Only rhok is needed. Much simplier heuristics than the other update_ratio_tr(...) function. * Current workhorse as its rhok takes into account basecase objective. * @param rhok trust-region type of ratio between predicted full objective and true full objective * @alpha_ratio the output variable to be updated, typically ratio_ that is then used to update alpha */ void update_ratio_tr(const double rhok, double& alpha_ratio); - /** One of the multiple ways to compute alpha, one is to use the Barzilai-Borwein rule. * Using skm1, ykm1 interal variables, similar to quasi-Newton method. */ @@ -322,81 +323,84 @@ class hiopAlgPrimalDecomposition * min{f_k+g_k^T(x-x_k)+0.5 alpha_k|x-x_k|^2 >= beta_k f} * So alpha_f is based on the constraint on the minimum of recourse * approximition. This is one way to ensure good approximation. Works for some functions. - */ + */ double get_alpha_f(const hiopVector& gk); /** Computing alpha through alpha = alpha_*ratio_ * Not based on function information, purely on trust-region update of ratio_. * Notice alpha is not updated directly through trust-region method. A feature that could be added. - */ + */ double get_alpha_tr(); - + /** Function to check convergence based on gradient */ double check_convergence_grad(const hiopVector& gk); - + /** Function to check convergence based on function value */ double check_convergence_fcn(const double base_v, const double base_vm1); - + /** Compute the basecase objective at the kth step by subtracting * recourse approximation value from the full objective. rval is the real - * recourse function value at x_{k-1}, val is the master problem + * recourse function value at x_{k-1}, val is the master problem * objective which is the sum of the basecase value and the recourse function value. * This requires the info from previous steps to compute, hence in the HessianApprox class. */ double compute_base(const double val); - // setting the output level for the Hessian approximation - void set_verbosity(const int i); + // setting the output level for the Hessian approximation + void set_verbosity(const int i); - // setting the lower bound alpha_ratio. Notice this is not on alpha directly. - void set_alpha_ratio_min(const double alp_ratio_min); - - // setting the upper bound alpha_ratio. + // setting the lower bound alpha_ratio. Notice this is not on alpha directly. + void set_alpha_ratio_min(const double alp_ratio_min); + + // setting the upper bound alpha_ratio. void set_alpha_ratio_max(const double alp_ratio_max); - - // setting the lower bound on alpha. These bounds should be the same and passed by the outer hiopAlgPrimalDecomposition class - void set_alpha_min(const double alp_min); - - // setting the upper bound on alpha. + + // setting the lower bound on alpha. These bounds should be the same and passed by the outer hiopAlgPrimalDecomposition + // class + void set_alpha_min(const double alp_min); + + // setting the upper bound on alpha. void set_alpha_max(const double alp_max); private: int n_; - double alpha_ = 1e6; // this parameter is the quadratic coefficient alpha in the pridec paper, see user manual for details + double alpha_ = + 1e6; // this parameter is the quadratic coefficient alpha in the pridec paper, see user manual for details double ratio_ = 1.0; double tr_ratio_ = 1.0; - double ratio_min = 0.5; - double ratio_max = 5.0; - double alpha_min = 1e-5; - double alpha_max = 1e6; - - double fk; /// current RECOURSE objective value at k. Due to initial design, this is not the full objective, - /// meaning it does not contain the basecase objective. - double fkm1; /// recourse objective at k-1 - double fkm1_lin; /// linear part of the recourse objective at k-1 - hiopVector* xkm1; /// coupled part of the solution at k-1 - hiopVector* gkm1; /// coupled part of the gradient at k-1 - hiopVector* skm1; /// xk-xkm1 - hiopVector* ykm1; /// gk-gkm1 - size_t ver_ = 2; //output level for HessianApprox class, being phased out. + double ratio_min = 0.5; + double ratio_max = 5.0; + double alpha_min = 1e-5; + double alpha_max = 1e6; + + double fk; /// current RECOURSE objective value at k. Due to initial design, this is not the full objective, + /// meaning it does not contain the basecase objective. + double fkm1; /// recourse objective at k-1 + double fkm1_lin; /// linear part of the recourse objective at k-1 + hiopVector* xkm1; /// coupled part of the solution at k-1 + hiopVector* gkm1; /// coupled part of the gradient at k-1 + hiopVector* skm1; /// xk-xkm1 + hiopVector* ykm1; /// gk-gkm1 + size_t ver_ = 2; // output level for HessianApprox class, being phased out. hiopOptions* options_; /// options is given by on pridec options. hiopLogger* log_; MPI_Comm comm_world_; }; -private: + +private: #ifdef HIOP_USE_MPI MPI_Request* request_; - MPI_Status status_; - int my_rank_,comm_size_; + MPI_Status status_; + int my_rank_, comm_size_; int my_rank_type_; #endif MPI_Comm comm_world_; - //master/solver(0), or worker(1:total rank) + // master/solver(0), or worker(1:total rank) // communication strategy; if true, worker ranks accumulate information locally // no effect if only one rank - std::string local_accum_ = "no"; + std::string local_accum_ = "no"; /// maximum number of outer iterations, can be user specified int max_iter_ = 200; @@ -405,7 +409,7 @@ class hiopAlgPrimalDecomposition /// pointer to the master problem to be solved (passed as argument) hiopInterfacePriDecProblem* master_prob_; hiopSolveStatus solver_status_; - + /// current primal iterate hiopVector* x_; @@ -423,27 +427,26 @@ class hiopAlgPrimalDecomposition /// Indices of the coupled x in the full x of the basecase/master problem hiopVectorInt* xc_idx_; - + /// tolerance of the convergence stopping criteria. User options from options file via hiop_pridec.options double tol_ = 1e-8; - /// acceptable tolerance is used to terminate hiop if NLP residuals are below the + /// acceptable tolerance is used to terminate hiop if NLP residuals are below the /// default value for 10 consecutive iterations double accp_tol_ = 1e-6; /// consecutive iteration count where NLP residual is lower than acceptable tolerance int accp_count_ = 10; /// initial alpha_ratio if used double alpha_ratio_ = 1.0; - - double alpha_min_ = 1e-5; - double alpha_max_ = 1e6; - + + double alpha_min_ = 1e-5; + double alpha_max_ = 1e6; + protected: hiopOptions* options_; hiopLogger* log_; - }; -} //end of namespace +} // namespace hiop #endif diff --git a/src/Optimization/hiopDualsUpdater.cpp b/src/Optimization/hiopDualsUpdater.cpp index 510fae0cf..bd19c2a2d 100644 --- a/src/Optimization/hiopDualsUpdater.cpp +++ b/src/Optimization/hiopDualsUpdater.cpp @@ -2,47 +2,47 @@ // Produced at the Lawrence Livermore National Laboratory (LLNL). // LLNL-CODE-742473. All rights reserved. // -// This file is part of HiOp. For details, see https://github.com/LLNL/hiop. HiOp -// is released under the BSD 3-clause license (https://opensource.org/licenses/BSD-3-Clause). +// This file is part of HiOp. For details, see https://github.com/LLNL/hiop. HiOp +// is released under the BSD 3-clause license (https://opensource.org/licenses/BSD-3-Clause). // Please also read "Additional BSD Notice" below. // -// Redistribution and use in source and binary forms, with or without modification, +// Redistribution and use in source and binary forms, with or without modification, // are permitted provided that the following conditions are met: -// i. Redistributions of source code must retain the above copyright notice, this list +// i. Redistributions of source code must retain the above copyright notice, this list // of conditions and the disclaimer below. -// ii. Redistributions in binary form must reproduce the above copyright notice, -// this list of conditions and the disclaimer (as noted below) in the documentation and/or +// ii. Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the disclaimer (as noted below) in the documentation and/or // other materials provided with the distribution. -// iii. Neither the name of the LLNS/LLNL nor the names of its contributors may be used to -// endorse or promote products derived from this software without specific prior written +// iii. Neither the name of the LLNS/LLNL nor the names of its contributors may be used to +// endorse or promote products derived from this software without specific prior written // permission. // -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES -// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT -// SHALL LAWRENCE LIVERMORE NATIONAL SECURITY, LLC, THE U.S. DEPARTMENT OF ENERGY OR -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS -// OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED -// AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT +// SHALL LAWRENCE LIVERMORE NATIONAL SECURITY, LLC, THE U.S. DEPARTMENT OF ENERGY OR +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +// OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED +// AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, // EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Additional BSD Notice -// 1. This notice is required to be provided under our contract with the U.S. Department -// of Energy (DOE). This work was produced at Lawrence Livermore National Laboratory under +// 1. This notice is required to be provided under our contract with the U.S. Department +// of Energy (DOE). This work was produced at Lawrence Livermore National Laboratory under // Contract No. DE-AC52-07NA27344 with the DOE. -// 2. Neither the United States Government nor Lawrence Livermore National Security, LLC -// nor any of their employees, makes any warranty, express or implied, or assumes any -// liability or responsibility for the accuracy, completeness, or usefulness of any +// 2. Neither the United States Government nor Lawrence Livermore National Security, LLC +// nor any of their employees, makes any warranty, express or implied, or assumes any +// liability or responsibility for the accuracy, completeness, or usefulness of any // information, apparatus, product, or process disclosed, or represents that its use would // not infringe privately-owned rights. -// 3. Also, reference herein to any specific commercial products, process, or services by -// trade name, trademark, manufacturer or otherwise does not necessarily constitute or -// imply its endorsement, recommendation, or favoring by the United States Government or -// Lawrence Livermore National Security, LLC. The views and opinions of authors expressed -// herein do not necessarily state or reflect those of the United States Government or -// Lawrence Livermore National Security, LLC, and shall not be used for advertising or +// 3. Also, reference herein to any specific commercial products, process, or services by +// trade name, trademark, manufacturer or otherwise does not necessarily constitute or +// imply its endorsement, recommendation, or favoring by the United States Government or +// Lawrence Livermore National Security, LLC. The views and opinions of authors expressed +// herein do not necessarily state or reflect those of the United States Government or +// Lawrence Livermore National Security, LLC, and shall not be used for advertising or // product endorsement purposes. /** @@ -52,14 +52,13 @@ * @author Nai-Yuan Chiang , LLNL * */ - + #include "hiopDualsUpdater.hpp" #include "LinAlgFactory.hpp" #include "hiopLinSolverSymDenseLapack.hpp" #include "hiopLinSolverSymDenseMagma.hpp" - #include "hiop_blasdefs.hpp" #ifdef HIOP_SPARSE @@ -83,16 +82,19 @@ namespace hiop { -hiopDualsLsqUpdate::hiopDualsLsqUpdate(hiopNlpFormulation* nlp) - : hiopDualsUpdater(nlp), - rhs_(nullptr), rhsc_(nullptr), rhsd_(nullptr), - vec_n_(nullptr),vec_mi_(nullptr) +hiopDualsLsqUpdate::hiopDualsLsqUpdate(hiopNlpFormulation* nlp) + : hiopDualsUpdater(nlp), + rhs_(nullptr), + rhsc_(nullptr), + rhsd_(nullptr), + vec_n_(nullptr), + vec_mi_(nullptr) { vec_n_ = nlp_->alloc_primal_vec(); - rhsc_ = nlp_->alloc_dual_eq_vec(); + rhsc_ = nlp_->alloc_dual_eq_vec(); rhsd_ = nlp_->alloc_dual_ineq_vec(); - vec_mi_ = rhsd_->alloc_clone(); - + vec_mi_ = rhsd_->alloc_clone(); + rhsc_->setToZero(); rhsd_->setToZero(); }; @@ -100,7 +102,7 @@ hiopDualsLsqUpdate::hiopDualsLsqUpdate(hiopNlpFormulation* nlp) hiopDualsLsqUpdate::~hiopDualsLsqUpdate() { if(rhs_) delete rhs_; - if(rhsc_) delete rhsc_; + if(rhsc_) delete rhsc_; if(rhsd_) delete rhsd_; if(vec_n_) delete vec_n_; if(vec_mi_) delete vec_mi_; @@ -122,27 +124,28 @@ bool hiopDualsLsqUpdate::go(const hiopIterate& iter, const double& infeas_nrm_trial) { hiopNlpDenseConstraints* nlpd = dynamic_cast(nlp_); - assert(nullptr!=nlpd); + assert(nullptr != nlpd); - //first update the duals using steplength along the search directions. This is fine for - //signed duals z_l, z_u, v_l, and v_u. The rest of the duals, yc and yd, will be found as a - //solution to the above LSQ problem + // first update the duals using steplength along the search directions. This is fine for + // signed duals z_l, z_u, v_l, and v_u. The rest of the duals, yc and yd, will be found as a + // solution to the above LSQ problem if(!iter_plus.takeStep_duals(iter, search_dir, alpha_primal, alpha_dual)) { nlp_->log->printf(hovError, "dual lsq update: error in standard update of the duals"); return false; } - if(!iter_plus.adjustDuals_primalLogHessian(mu,kappa_sigma)) { + if(!iter_plus.adjustDuals_primalLogHessian(mu, kappa_sigma)) { nlp_->log->printf(hovError, "dual lsq update: error in adjustDuals"); return false; } const double recalc_lsq_duals_tol = nlp_->options->GetNumeric("recalc_lsq_duals_tol"); - //return if the constraint violation (primal infeasibility) is not below the tol for the LSQ update + // return if the constraint violation (primal infeasibility) is not below the tol for the LSQ update if(infeas_nrm_trial > recalc_lsq_duals_tol) { nlp_->log->printf(hovScalars, "will not perform the dual lsq update since the primal infeasibility (%g) " "is not under the tolerance recalc_lsq_duals_tol=%g.\n", - infeas_nrm_trial, recalc_lsq_duals_tol); + infeas_nrm_trial, + recalc_lsq_duals_tol); return true; } @@ -150,34 +153,23 @@ bool hiopDualsLsqUpdate::go(const hiopIterate& iter, } hiopDualsLsqUpdateLinsysRedDense::hiopDualsLsqUpdateLinsysRedDense(hiopNlpFormulation* nlp) - : hiopDualsLsqUpdate(nlp), - mexme_(nullptr), - mexmi_(nullptr), - mixmi_(nullptr), - mxm_(nullptr) + : hiopDualsLsqUpdate(nlp), + mexme_(nullptr), + mexmi_(nullptr), + mixmi_(nullptr), + mxm_(nullptr) { - mexme_ = LinearAlgebraFactory::create_matrix_dense(nlp_->options->GetString("mem_space"), - nlp_->m_eq(), - nlp_->m_eq()); - mexmi_ = LinearAlgebraFactory::create_matrix_dense(nlp_->options->GetString("mem_space"), - nlp_->m_eq(), - nlp_->m_ineq()); - mixmi_ = LinearAlgebraFactory::create_matrix_dense(nlp_->options->GetString("mem_space"), - nlp_->m_ineq(), - nlp_->m_ineq()); - mxm_ = LinearAlgebraFactory::create_matrix_dense(nlp_->options->GetString("mem_space"), - nlp_->m(), - nlp_->m()); - - rhs_ = LinearAlgebraFactory::create_vector(nlp_->options->GetString("mem_space"), - nlp_->m()); - + mexme_ = LinearAlgebraFactory::create_matrix_dense(nlp_->options->GetString("mem_space"), nlp_->m_eq(), nlp_->m_eq()); + mexmi_ = LinearAlgebraFactory::create_matrix_dense(nlp_->options->GetString("mem_space"), nlp_->m_eq(), nlp_->m_ineq()); + mixmi_ = LinearAlgebraFactory::create_matrix_dense(nlp_->options->GetString("mem_space"), nlp_->m_ineq(), nlp_->m_ineq()); + mxm_ = LinearAlgebraFactory::create_matrix_dense(nlp_->options->GetString("mem_space"), nlp_->m(), nlp_->m()); + + rhs_ = LinearAlgebraFactory::create_vector(nlp_->options->GetString("mem_space"), nlp_->m()); + #ifdef HIOP_DEEPCHECKS - M_copy_ = nullptr; //delayed allocation - rhs_copy_ = rhs_->alloc_clone(); - mixme_ = LinearAlgebraFactory::create_matrix_dense(nlp_->options->GetString("mem_space"), - nlp_->m_ineq(), - nlp_->m_eq()); + M_copy_ = nullptr; // delayed allocation + rhs_copy_ = rhs_->alloc_clone(); + mixme_ = LinearAlgebraFactory::create_matrix_dense(nlp_->options->GetString("mem_space"), nlp_->m_ineq(), nlp_->m_eq()); #endif } @@ -191,10 +183,10 @@ hiopDualsLsqUpdateLinsysRedDense::~hiopDualsLsqUpdateLinsysRedDense() delete M_copy_; delete rhs_copy_; delete mixme_; -#endif +#endif } -/** Given xk, zk_l, zk_u, vk_l, and vk_u (contained in 'iter'), this method solves an LSQ problem +/** Given xk, zk_l, zk_u, vk_l, and vk_u (contained in 'iter'), this method solves an LSQ problem * corresponding to dual infeasibility equation * min_{y_c,y_d} || \nabla f(xk) + J^T_c(xk) y_c + J_d^T(xk) y_d - zk_l+zk_u ||^2 * || - y_d - vk_l + vk_u ||_2, @@ -202,27 +194,27 @@ hiopDualsLsqUpdateLinsysRedDense::~hiopDualsLsqUpdateLinsysRedDense() * min_{y_c, y_d} || [ J_c^T J_d^T ] [ y_c ] - [ -\nabla f(xk) + zk_l-zk_u ] ||^2 * || [ 0 I ] [ y_d ] [ - vk_l + vk_u ] ||_2 * ****************************** - * NLPs with dense constraints + * NLPs with dense constraints * ****************************** - * For NLPs with dense constraints, the above LSQ problem is solved by solving the linear + * For NLPs with dense constraints, the above LSQ problem is solved by solving the linear * system in y_c and y_d: - * [ J_c J_c^T J_c J_d^T ] [ y_c ] = [ J_c 0 ] [ -\nabla f(xk) + zk_l-zk_u ] + * [ J_c J_c^T J_c J_d^T ] [ y_c ] = [ J_c 0 ] [ -\nabla f(xk) + zk_l-zk_u ] * [ J_d J_c^T J_d J_d^T + I ] [ y_d ] [ J_d I ] [ - vk_l + vk_u ] * This linear system is small (of size m=m_E+m_I) (so it is replicated for all MPI ranks). - * + * * The matrix of the above system is stored in the member variable M_ of this class and the * right-hand side in 'rhs_'. - * + * * ************** * MDS NLPs * ************** - * For MDS NLPs, the linear system exploits the block structure of the Jacobians Jc and Jd. + * For MDS NLPs, the linear system exploits the block structure of the Jacobians Jc and Jd. * Namely, since Jc = [Jxdc Jxsc] and Jd = [Jxdd Jxsd], the following * dense linear system is to be solved for y_c and y_d * * [ Jxdc Jxdc^T + Jxsc Jxsc^T Jxdc Jxdd^T + Jxsc Jxsd^T ] [ y_c ] = same rhs as * [ Jxdd Jxdc^T + Jxsd Jxsc^T Jxdd Jxdd^T + Jxsd Jxsd^T + I ] [ y_d ] above - * The above linear system is solved as a dense linear system. + * The above linear system is solved as a dense linear system. * * *********************** * Sparse (general) NLPs @@ -234,7 +226,7 @@ hiopDualsLsqUpdateLinsysRedDense::~hiopDualsLsqUpdateLinsysRedDense() * [ Jd -I 0 0 ] [dyd] [ 0 ] * * The matrix of the above system is stored in the member variable M_ of this class and the - * right-hand side in 'rhs'. * + * right-hand side in 'rhs'. * */ bool hiopDualsLsqUpdateLinsysRedDense::do_lsq_update(hiopIterate& iter, const hiopVector& grad_f, @@ -247,16 +239,16 @@ bool hiopDualsLsqUpdateLinsysRedDense::do_lsq_update(hiopIterate& iter, hiopMatrixDense* mexmi = dynamic_cast(mexmi_); hiopMatrixDense* mixmi = dynamic_cast(mixmi_); - //compute terms in M: Jc * Jc^T, J_c * J_d^T, and J_d * J_d^T + // compute terms in M: Jc * Jc^T, J_c * J_d^T, and J_d * J_d^T //! streamline the communication (use mxm as a global buffer for the MPI_Allreduce) jac_c.timesMatTrans(0.0, *mexme, 1.0, jac_c); jac_c.timesMatTrans(0.0, *mexmi, 1.0, jac_d); jac_d.timesMatTrans(0.0, *mixmi, 1.0, jac_d); mixmi->addDiagonal(1.0); - M->copyBlockFromMatrix(0,0,*mexme); + M->copyBlockFromMatrix(0, 0, *mexme); M->copyBlockFromMatrix(0, nlp_->m_eq(), *mexmi); - M->copyBlockFromMatrix(nlp_->m_eq(),nlp_->m_eq(), *mixmi); + M->copyBlockFromMatrix(nlp_->m_eq(), nlp_->m_eq(), *mixmi); #ifdef HIOP_DEEPCHECKS if(M_copy_ == nullptr) { @@ -271,21 +263,21 @@ bool hiopDualsLsqUpdateLinsysRedDense::do_lsq_update(hiopIterate& iter, M_copy->assertSymmetry(1e-12); #endif - //bailout in case there is an error in the Cholesky factorization + // bailout in case there is an error in the Cholesky factorization bool ret = this->factorize_mat(); if(!ret) { nlp_->log->printf(hovError, "dual lsq update: error in the dense factorization.\n"); return false; } - // compute rhs_=[rhsc_,rhsd_]. - // [ rhsc_ ] = - [ J_c 0 ] [ vecx ] + // compute rhs_=[rhsc_,rhsd_]. + // [ rhsc_ ] = - [ J_c 0 ] [ vecx ] // [ rhsd_ ] [ J_d I ] [ vecd ] - // [vecx,vecd] = - [ -\nabla f(xk) + zk_l-zk_u, - vk_l + vk_u]. + // [vecx,vecd] = - [ -\nabla f(xk) + zk_l-zk_u, - vk_l + vk_u]. hiopVector& vecx = *vec_n_; vecx.copyFrom(grad_f); vecx.axpy(-1.0, *iter.get_zl()); - vecx.axpy( 1.0, *iter.get_zu()); + vecx.axpy(1.0, *iter.get_zu()); hiopVector& vecd = *vec_mi_; vecd.copyFrom(*iter.get_vl()); vecd.axpy(-1.0, *iter.get_vu()); @@ -294,88 +286,83 @@ bool hiopDualsLsqUpdateLinsysRedDense::do_lsq_update(hiopIterate& iter, rhsd_->axpy(-1.0, vecd); rhs_->copyFromStarting(0, *rhsc_); rhs_->copyFromStarting(nlp_->m_eq(), *rhsd_); - //nlp_->log->write("rhs_", *rhs_, hovSummary); + // nlp_->log->write("rhs_", *rhs_, hovSummary); #ifdef HIOP_DEEPCHECKS rhs_copy_->copyFrom(*rhs_); #endif - //solve for this rhs_ + // solve for this rhs_ if(!this->solve_with_factors(*rhs_)) { nlp_->log->printf(hovError, "dual lsq update: error in the solution process (dense solve).\n"); return false; } - //update yc and yd in iter_plus + // update yc and yd in iter_plus rhs_->copyToStarting(0, *iter.get_yc()); rhs_->copyToStarting(nlp_->m_eq(), *iter.get_yd()); #ifdef HIOP_DEEPCHECKS assert(M_copy_); double nrmrhs = rhs_copy_->twonorm(); - M_copy_->timesVec(-1.0, *rhs_copy_, 1.0, *rhs_); - double nrmres = rhs_copy_->twonorm() / (1+nrmrhs); - if(nrmres>1e-4) { + M_copy_->timesVec(-1.0, *rhs_copy_, 1.0, *rhs_); + double nrmres = rhs_copy_->twonorm() / (1 + nrmrhs); + if(nrmres > 1e-4) { nlp_->log->printf(hovError, "hiopDualsLsqUpdateDense::do_lsq_update linear system residual is dangerously high: %g\n", nrmres); assert(false && "hiopDualsLsqUpdateDense::do_lsq_update linear system residual is dangerously high"); return false; } else { - if(nrmres>1e-6) + if(nrmres > 1e-6) nlp_->log->printf(hovWarning, "hiopDualsLsqUpdate::do_lsq_update linear system residual is dangerously high: %g\n", nrmres); } #endif - //nlp_->log->write("yc ini", *iter.get_yc(), hovSummary); - //nlp_->log->write("yd ini", *iter.get_yd(), hovSummary); + // nlp_->log->write("yc ini", *iter.get_yc(), hovSummary); + // nlp_->log->write("yd ini", *iter.get_yd(), hovSummary); return true; }; hiopDualsLsqUpdateLinsysAugSparse::hiopDualsLsqUpdateLinsysAugSparse(hiopNlpFormulation* nlp) - : hiopDualsLsqUpdate(nlp), - lin_sys_(nullptr) + : hiopDualsLsqUpdate(nlp), + lin_sys_(nullptr) { #ifndef HIOP_SPARSE assert(0 && "should not reach here!"); -#endif // HIOP_SPARSE - rhs_ = LinearAlgebraFactory::create_vector(nlp_->options->GetString("mem_space"), - nlp_->n() + nlp_->m_ineq() + nlp_->m()); +#endif // HIOP_SPARSE + rhs_ = LinearAlgebraFactory::create_vector(nlp_->options->GetString("mem_space"), nlp_->n() + nlp_->m_ineq() + nlp_->m()); } -hiopDualsLsqUpdateLinsysAugSparse::~hiopDualsLsqUpdateLinsysAugSparse() -{ - delete lin_sys_; -} +hiopDualsLsqUpdateLinsysAugSparse::~hiopDualsLsqUpdateLinsysAugSparse() { delete lin_sys_; } -bool hiopDualsLsqUpdateLinsysAugSparse:: -instantiate_linear_solver(const char* linsol_opt, - hiopIterate& iter, - const hiopVector& grad_f, - const hiopMatrix& jac_c, - const hiopMatrix& jac_d) +bool hiopDualsLsqUpdateLinsysAugSparse::instantiate_linear_solver(const char* linsol_opt, + hiopIterate& iter, + const hiopVector& grad_f, + const hiopMatrix& jac_c, + const hiopMatrix& jac_d) { const hiopMatrixSparse& Jac_cSp = dynamic_cast(jac_c); const hiopMatrixSparse& Jac_dSp = dynamic_cast(jac_d); - + std::stringstream ss_log; - + size_type nx = Jac_cSp.n(); - size_type nd=Jac_dSp.m(); - size_type neq=Jac_cSp.m(); - size_type nineq=Jac_dSp.m(); - [[maybe_unused]] const size_type n = nx + nineq + neq + nineq; - [[maybe_unused]] const size_type nnz = nx + nd + Jac_cSp.numberOfNonzeros() + Jac_dSp.numberOfNonzeros() + nd + (nx + nd + neq + nineq); + size_type nd = Jac_dSp.m(); + size_type neq = Jac_cSp.m(); + size_type nineq = Jac_dSp.m(); + [[maybe_unused]] const size_type n = nx + nineq + neq + nineq; + [[maybe_unused]] const size_type nnz = + nx + nd + Jac_cSp.numberOfNonzeros() + Jac_dSp.numberOfNonzeros() + nd + (nx + nd + neq + nineq); auto linear_solver = nlp_->options->GetString(linsol_opt); auto compute_mode = nlp_->options->GetString("compute_mode"); auto fact_acceptor = nlp_->options->GetString("fact_acceptor"); #ifndef HIOP_USE_GPU - assert( (compute_mode == "cpu" || compute_mode == "auto") && + assert((compute_mode == "cpu" || compute_mode == "auto") && "the value for compute_mode is invalid and should have been corrected during user options processing"); #endif - + if(!lin_sys_) { - if(compute_mode == "cpu") { ///////////////////////////////////////////////////////////////////////////////////////// // compute mode CPU @@ -383,38 +370,38 @@ instantiate_linear_solver(const char* linsol_opt, assert(nullptr == lin_sys_); if(linear_solver == "ma57" || linear_solver == "auto") { #ifdef HIOP_USE_COINHSL - ss_log << "LSQ linear solver --- KKT_SPARSE_XDYcYd linsys: MA57 size " << n - << " cons " << (neq+nineq) << " nnz " << nnz; - lin_sys_ = new hiopLinSolverSymSparseMA57(n, nnz, nlp_); -#endif // HIOP_USE_COINHSL + ss_log << "LSQ linear solver --- KKT_SPARSE_XDYcYd linsys: MA57 size " << n << " cons " << (neq + nineq) << " nnz " + << nnz; + lin_sys_ = new hiopLinSolverSymSparseMA57(n, nnz, nlp_); +#endif // HIOP_USE_COINHSL } - if( (nullptr == lin_sys_ && linear_solver == "auto") || linear_solver == "pardiso") { - //ma57 is not available or user requested pardiso + if((nullptr == lin_sys_ && linear_solver == "auto") || linear_solver == "pardiso") { + // ma57 is not available or user requested pardiso #ifdef HIOP_USE_PARDISO - ss_log << "LSQ linear solver --- KKT_SPARSE_XDYcYd linsys: PARDISO size " << n - << " cons " << (neq+nineq) << " nnz " << nnz; - lin_sys_ = new hiopLinSolverSymSparsePARDISO(n, nnz, nlp_); + ss_log << "LSQ linear solver --- KKT_SPARSE_XDYcYd linsys: PARDISO size " << n << " cons " << (neq + nineq) + << " nnz " << nnz; + lin_sys_ = new hiopLinSolverSymSparsePARDISO(n, nnz, nlp_); #endif // HIOP_USE_PARDISO } - if( (NULL == lin_sys_ && linear_solver == "auto") || linear_solver == "ginkgo") { - //ma57 and pardiso are not available or user requested ginkgo + if((NULL == lin_sys_ && linear_solver == "auto") || linear_solver == "ginkgo") { + // ma57 and pardiso are not available or user requested ginkgo #ifdef HIOP_USE_GINKGO ss_log << "LSQ with GINKGO: create "; - hiopLinSolverSymSparseGinkgo *p = new hiopLinSolverSymSparseGinkgo(n, nnz, nlp_); - + hiopLinSolverSymSparseGinkgo* p = new hiopLinSolverSymSparseGinkgo(n, nnz, nlp_); + nlp_->log->printf(hovSummary, "LSQ Duals Initialization --- KKT_SPARSE_XDYcYd linsys: using GINKGO on CPU as an " "indefinite solver, size %d (%d cons)\n", - n, neq+nineq); + n, + neq + nineq); lin_sys_ = p; #endif // HIOP_USE_GINKGO } - if(NULL == lin_sys_) { - //ma57, pardiso and ginkgo are not available or user requested strumpack + // ma57, pardiso and ginkgo are not available or user requested strumpack #ifdef HIOP_USE_STRUMPACK assert((linear_solver == "strumpack" || linear_solver == "auto") && "the value for duals_init_linear_solver_sparse is invalid and should have been corrected during " @@ -426,23 +413,23 @@ instantiate_linear_solver(const char* linsol_opt, assert(false); return false; } - ss_log << "LSQ linear solver --- KKT_SPARSE_XDYcYd linsys: PARDISO size " << n - << " cons " << (neq+nineq) << " nnz " << nnz; - lin_sys_ = new hiopLinSolverSymSparseSTRUMPACK(n, nnz, nlp_); + ss_log << "LSQ linear solver --- KKT_SPARSE_XDYcYd linsys: PARDISO size " << n << " cons " << (neq + nineq) + << " nnz " << nnz; + lin_sys_ = new hiopLinSolverSymSparseSTRUMPACK(n, nnz, nlp_); #endif // HIOP_USE_STRUMPACK } - //KS: /end of CPU mode/ do not put CU SOLVER anywhere above this!!!!! + // KS: /end of CPU mode/ do not put CU SOLVER anywhere above this!!!!! nlp_->log->printf(hovSummary, "%s (option '%s' '%s')\n", ss_log.str().c_str(), linsol_opt, linear_solver.c_str()); - } else { + } else { // // We're on device // // Under hybrid compute_mode, LSQ-based initialization can be done using CPU sparse linear solvers. // Under gpu compute_mode, which is work in progress, the initialization should be done only using // GPU sparse linear solvers. - -#ifdef HIOP_USE_RESOLVE + +#ifdef HIOP_USE_RESOLVE if(compute_mode == "gpu") { assert((linear_solver == "resolve" || linear_solver == "auto") && "the value for duals_init_linear_solver_sparse is invalid and should have been corrected during " @@ -457,12 +444,12 @@ instantiate_linear_solver(const char* linsol_opt, } // This is our first choice on the device. if(linear_solver == "resolve" || linear_solver == "auto") { - ss_log << "LSQ linear solver --- KKT_SPARSE_XDYcYd linsys: ReSolve size " << n - << " cons " << (neq+nineq) << " nnz " << nnz; + ss_log << "LSQ linear solver --- KKT_SPARSE_XDYcYd linsys: ReSolve size " << n << " cons " << (neq + nineq) + << " nnz " << nnz; lin_sys_ = new hiopLinSolverSymSparseReSolve(n, nnz, nlp_); } -#else // of #ifdef HIOP_USE_RESOLVE - //under compute mode gpu, at this point we don't have a sparse linear solver +#else // of #ifdef HIOP_USE_RESOLVE + // under compute mode gpu, at this point we don't have a sparse linear solver if(compute_mode == "gpu") { if(linear_solver == "auto") { nlp_->log->printf(hovError, @@ -471,7 +458,7 @@ instantiate_linear_solver(const char* linsol_opt, "mode to hybrid, which will allow using a CPU sparse solver.", linsol_opt, linear_solver.c_str()); - assert(false); + assert(false); } else { nlp_->log->printf(hovError, "Impossible to deploy the (CPU?) sparse linear solver specified by option '%s' set to " @@ -487,53 +474,55 @@ instantiate_linear_solver(const char* linsol_opt, assert(compute_mode == "hybrid" || compute_mode == "auto"); #if defined(HIOP_USE_STRUMPACK) - if( (nullptr == lin_sys_) && (linear_solver == "strumpack" || linear_solver == "auto") ) { - ss_log << "LSQ linear solver --- KKT_SPARSE_XDYcYd linsys: STRUMPACK size " << n - << " cons " << (neq+nineq) << " nnz " << nnz; + if((nullptr == lin_sys_) && (linear_solver == "strumpack" || linear_solver == "auto")) { + ss_log << "LSQ linear solver --- KKT_SPARSE_XDYcYd linsys: STRUMPACK size " << n << " cons " << (neq + nineq) + << " nnz " << nnz; lin_sys_ = new hiopLinSolverSymSparseSTRUMPACK(n, nnz, nlp_); } #endif // HIOP_USE_STRUMPACK - + #ifdef HIOP_USE_COINHSL if(nullptr == lin_sys_) { // we get here if no other linear solvers are available or when the linear solver is set to be ma57 assert((linear_solver == "ma57" || linear_solver == "auto") && "the value for duals_init_linear_solver_sparse is invalid and should have been corrected during " "options processing"); - ss_log << "LSQ linear solver --- KKT_SPARSE_XDYcYd linsys: MA57 size " << n - << " cons " << (neq+nineq) << " nnz " << nnz; + ss_log << "LSQ linear solver --- KKT_SPARSE_XDYcYd linsys: MA57 size " << n << " cons " << (neq + nineq) << " nnz " + << nnz; lin_sys_ = new hiopLinSolverSymSparseMA57(n, nnz, nlp_); } -#endif // HIOP_USE_COINHSL +#endif // HIOP_USE_COINHSL #ifdef HIOP_USE_PARDISO if(nullptr == lin_sys_) { // we get here if no other linear solvers are available or when the linear solver is set to pardiso assert((linear_solver == "pardiso" || linear_solver == "auto") && "the value for duals_init_linear_solver_sparse is invalid and should have been corrected during " "options processing"); - ss_log << "LSQ linear solver --- KKT_SPARSE_XDYcYd linsys: MA57 size " << n - << " cons " << (neq+nineq) << " nnz " << nnz; + ss_log << "LSQ linear solver --- KKT_SPARSE_XDYcYd linsys: MA57 size " << n << " cons " << (neq + nineq) << " nnz " + << nnz; lin_sys_ = new hiopLinSolverSymSparsePARDISO(n, nnz, nlp_); } -#endif // HIOP_USE_PARDISO +#endif // HIOP_USE_PARDISO if(NULL == lin_sys_) { - // we get here if strumpack, ma57 and pardiso are not available or is available but the duals_init_linear_solver_sparse was - //set to be ginkgo + // we get here if strumpack, ma57 and pardiso are not available or is available but the + // duals_init_linear_solver_sparse was + // set to be ginkgo #ifdef HIOP_USE_GINKGO ss_log << "LSQ with GINKGO: create "; - hiopLinSolverSymSparseGinkgo *p = new hiopLinSolverSymSparseGinkgo(n, nnz, nlp_); - + hiopLinSolverSymSparseGinkgo* p = new hiopLinSolverSymSparseGinkgo(n, nnz, nlp_); + nlp_->log->printf(hovSummary, "LSQ Duals Initialization --- KKT_SPARSE_XDYcYd linsys: using GINKGO on CPU as an " "indefinite solver, size %d (%d cons)\n", - n, neq+nineq); + n, + neq + nineq); lin_sys_ = p; #endif // HIOP_USE_GINKGO } - } // end of else compute_mode=='cpu' - } //end of else if(!linsys) + } // end of else compute_mode=='cpu' + } // end of else if(!linsys) - //return false, which will trigger a backup to LSQ computation(s), if it is not possible to instantiate a linear solver + // return false, which will trigger a backup to LSQ computation(s), if it is not possible to instantiate a linear solver return (nullptr != lin_sys_); } @@ -543,15 +532,15 @@ bool hiopDualsLsqUpdateLinsysAugSparse::do_lsq_update(hiopIterate& iter, const hiopMatrix& jac_d) { hiopNlpSparse* nlpsp = dynamic_cast(nlp_); - assert(nullptr!=nlpsp); - + assert(nullptr != nlpsp); + const hiopMatrixSparse& Jac_cSp = dynamic_cast(jac_c); const hiopMatrixSparse& Jac_dSp = dynamic_cast(jac_d); hiopTimer t; std::stringstream ss_log; - - int nx = Jac_cSp.n(), nd=Jac_dSp.m(), neq=Jac_cSp.m(), nineq=Jac_dSp.m(); + + int nx = Jac_cSp.n(), nd = Jac_dSp.m(), neq = Jac_cSp.m(), nineq = Jac_dSp.m(); t.reset(); t.start(); @@ -560,7 +549,7 @@ bool hiopDualsLsqUpdateLinsysAugSparse::do_lsq_update(hiopIterate& iter, if(nullptr == lin_sys_) { return false; } - hiopLinSolverSymSparse* linSys = dynamic_cast (lin_sys_); + hiopLinSolverSymSparse* linSys = dynamic_cast(lin_sys_); assert(linSys); t.stop(); @@ -575,28 +564,28 @@ bool hiopDualsLsqUpdateLinsysAugSparse::do_lsq_update(hiopIterate& iter, // copy Jac and Hes to the full iterate matrix size_type dest_nnz_st{0}; - Msys.copyDiagMatrixToSubblock(1., 0, 0, dest_nnz_st, nx+nd); - dest_nnz_st += nx+nd; - Msys.copyRowsBlockFrom(Jac_cSp, 0, neq, nx+nd, dest_nnz_st); + Msys.copyDiagMatrixToSubblock(1., 0, 0, dest_nnz_st, nx + nd); + dest_nnz_st += nx + nd; + Msys.copyRowsBlockFrom(Jac_cSp, 0, neq, nx + nd, dest_nnz_st); dest_nnz_st += Jac_cSp.numberOfNonzeros(); - Msys.copyRowsBlockFrom(Jac_dSp, 0, nineq, nx+nd+neq, dest_nnz_st); + Msys.copyRowsBlockFrom(Jac_dSp, 0, nineq, nx + nd + neq, dest_nnz_st); dest_nnz_st += Jac_dSp.numberOfNonzeros(); // minus identity matrix for slack variables - Msys.copyDiagMatrixToSubblock(-1., nx+nd+neq, nx, dest_nnz_st, nineq); + Msys.copyDiagMatrixToSubblock(-1., nx + nd + neq, nx, dest_nnz_st, nineq); dest_nnz_st += nineq; - //add 0.0 to diagonal block linSys starting at (0,0) - Msys.setSubDiagonalTo(0, nx+nd+neq+nineq, 0.0, dest_nnz_st); - dest_nnz_st += nx+nd+neq+nineq; - + // add 0.0 to diagonal block linSys starting at (0,0) + Msys.setSubDiagonalTo(0, nx + nd + neq + nineq, 0.0, dest_nnz_st); + dest_nnz_st += nx + nd + neq + nineq; + /* we've just done - * - * [ I 0 Jc^T Jd^T ] [ dx] [ rx_tilde ] - * [ 0 I 0 -I ] [ dd] [ rd_tilde ] - * [ Jc 0 0 0 ] [dyc] = [ ryc ] - * [ Jd -I 0 0 ] [dyd] [ ryd ] - */ + * + * [ I 0 Jc^T Jd^T ] [ dx] [ rx_tilde ] + * [ 0 I 0 -I ] [ dd] [ rd_tilde ] + * [ Jc 0 0 0 ] [dyc] = [ ryc ] + * [ Jd -I 0 0 ] [dyd] [ ryd ] + */ nlp_->log->write("LSQ Dual Updater --- KKT_SPARSE_XDYcYd linsys:", Msys, hovMatrices); } t.stop(); @@ -607,22 +596,22 @@ bool hiopDualsLsqUpdateLinsysAugSparse::do_lsq_update(hiopIterate& iter, int ret_val = linSys->matrixChanged(); t.stop(); ss_log << " factor linsys " << t.getElapsedTime() << " sec\n"; - - if(ret_val<0) { + + if(ret_val < 0) { nlp_->log->printf(hovError, "dual lsq update: error %d in the factorization.\n", ret_val); return false; - } + } t.reset(); t.start(); - // compute rhs_=[rhsx, rhss, rhsc_, rhsd_]. + // compute rhs_=[rhsx, rhss, rhsc_, rhsd_]. // rhsx = - [ \nabla f(xk) - zk_l + zk_u ] // rhss = - [ -vk_l + vk_u ] // rhsc_ = rhsd_ = 0 hiopVector& rhsx = *vec_n_; rhsx.copyFrom(grad_f); rhsx.negate(); - rhsx.axpy( 1.0, *iter.get_zl()); + rhsx.axpy(1.0, *iter.get_zl()); rhsx.axpy(-1.0, *iter.get_zu()); hiopVector& rhss = *vec_mi_; @@ -631,10 +620,10 @@ bool hiopDualsLsqUpdateLinsysAugSparse::do_lsq_update(hiopIterate& iter, rhs_->copyFromStarting(0, rhsx); rhs_->copyFromStarting(nx, rhss); - rhs_->copyFromStarting(nx+nd, *rhsc_); - rhs_->copyFromStarting(nx+nd+neq, *rhsd_); + rhs_->copyFromStarting(nx + nd, *rhsc_); + rhs_->copyFromStarting(nx + nd + neq, *rhsd_); - //solve for this rhs_ + // solve for this rhs_ bool linsol_ok = lin_sys_->solve(*rhs_); if(!linsol_ok) { @@ -642,9 +631,9 @@ bool hiopDualsLsqUpdateLinsysAugSparse::do_lsq_update(hiopIterate& iter, iter.get_yc()->setToZero(); iter.get_yd()->setToZero(); } else { - //update yc and yd in iter_plus - rhs_->copyToStarting(nx+nd, *iter.get_yc()); - rhs_->copyToStarting(nx+nd+neq, *iter.get_yd()); + // update yc and yd in iter_plus + rhs_->copyToStarting(nx + nd, *iter.get_yc()); + rhs_->copyToStarting(nx + nd + neq, *iter.get_yd()); } t.stop(); ss_log << " solve linsys " << t.getElapsedTime() << " sec\n"; @@ -659,12 +648,12 @@ bool hiopDualsLsqUpdateLinsysAugSparse::do_lsq_update(hiopIterate& iter, // MAGMA specialization //////////////////////////////////////////////////////////////////////////////////////////////// hiopDualsLsqUpdateLinsysRedDenseSym::hiopDualsLsqUpdateLinsysRedDenseSym(hiopNlpFormulation* nlp) - : hiopDualsLsqUpdateLinsysRedDense(nlp) + : hiopDualsLsqUpdateLinsysRedDense(nlp) { #ifdef HIOP_USE_MAGMA linsys_ = new hiopLinSolverSymDenseMagmaBuKa(nlp_->m(), nlp_); #else - assert(false && + assert(false && "hiopDualsLsqUpdateLinsysRedDenseSym is meant to be used with MAGMA, but" "MAGMA is not available within HiOp."); linsys_ = new hiopLinSolverSymDenseLapack(nlp_->m(), nlp_); @@ -674,14 +663,10 @@ hiopDualsLsqUpdateLinsysRedDenseSym::hiopDualsLsqUpdateLinsysRedDenseSym(hiopNlp bool hiopDualsLsqUpdateLinsysRedDenseSym::factorize_mat() { int ret = linsys_->matrixChanged(); - return (ret==0); -} - -bool hiopDualsLsqUpdateLinsysRedDenseSym::solve_with_factors(hiopVector& r) -{ - return linsys_->solve(r); + return (ret == 0); } +bool hiopDualsLsqUpdateLinsysRedDenseSym::solve_with_factors(hiopVector& r) { return linsys_->solve(r); } ///////////////////////////////////////////////////////////////////////////////////////////////// // LAPACK specialization @@ -691,45 +676,48 @@ bool hiopDualsLsqUpdateLinsysRedDenseSymPD::solve_with_factors(hiopVector& r) { assert(M_); #ifdef HIOP_DEEPCHECKS - assert(M_->m()==M_->n()); + assert(M_->m() == M_->n()); #endif - if(M_->m()==0) { + if(M_->m() == 0) { return true; } - char uplo='L'; //we have upper triangular in C++, but this is lower in fortran - int N=M_->n(), lda=N, nrhs=1, info; - DPOTRS(&uplo,&N, &nrhs, M_->local_data(), &lda, r.local_data(), &lda, &info); - if(info<0) { - nlp_->log->printf(hovError, "hiopDualsLsqUpdateLinsysRedDenseSymPD::solveWithFactors: dpotrs " - "returned error %d\n", info); + char uplo = 'L'; // we have upper triangular in C++, but this is lower in fortran + int N = M_->n(), lda = N, nrhs = 1, info; + DPOTRS(&uplo, &N, &nrhs, M_->local_data(), &lda, r.local_data(), &lda, &info); + if(info < 0) { + nlp_->log->printf(hovError, + "hiopDualsLsqUpdateLinsysRedDenseSymPD::solveWithFactors: dpotrs " + "returned error %d\n", + info); } #ifdef HIOP_DEEPCHECKS - assert(info<=0); + assert(info <= 0); #endif - return (info==0); + return (info == 0); } - bool hiopDualsLsqUpdateLinsysRedDenseSymPD::factorize_mat() { #ifdef HIOP_DEEPCHECKS - assert(M_->m()==M_->n()); + assert(M_->m() == M_->n()); #endif - if(M_->m()==0) { + if(M_->m() == 0) { return true; } - char uplo='L'; int N=M_->n(), lda=N, info; + char uplo = 'L'; + int N = M_->n(), lda = N, info; DPOTRF(&uplo, &N, M_->local_data(), &lda, &info); - if(info>0) { + if(info > 0) { nlp_->log->printf(hovError, "hiopDualsLsqUpdateLinsysRedDense::factorizeMat: dpotrf (Chol fact) detected " - "%d minor being indefinite.\n", info); + "%d minor being indefinite.\n", + info); } else { - if(info<0) { + if(info < 0) { nlp_->log->printf(hovError, "hiopKKTLinSysLowRank::factorizeMat: dpotrf returned error %d\n", info); } } - return (info==0); + return (info == 0); } -}; //~ end of namespace +}; // namespace hiop diff --git a/src/Optimization/hiopDualsUpdater.hpp b/src/Optimization/hiopDualsUpdater.hpp index 8f67eeab2..754f7a4be 100644 --- a/src/Optimization/hiopDualsUpdater.hpp +++ b/src/Optimization/hiopDualsUpdater.hpp @@ -2,47 +2,47 @@ // Produced at the Lawrence Livermore National Laboratory (LLNL). // LLNL-CODE-742473. All rights reserved. // -// This file is part of HiOp. For details, see https://github.com/LLNL/hiop. HiOp -// is released under the BSD 3-clause license (https://opensource.org/licenses/BSD-3-Clause). +// This file is part of HiOp. For details, see https://github.com/LLNL/hiop. HiOp +// is released under the BSD 3-clause license (https://opensource.org/licenses/BSD-3-Clause). // Please also read “Additional BSD Notice” below. // -// Redistribution and use in source and binary forms, with or without modification, +// Redistribution and use in source and binary forms, with or without modification, // are permitted provided that the following conditions are met: -// i. Redistributions of source code must retain the above copyright notice, this list +// i. Redistributions of source code must retain the above copyright notice, this list // of conditions and the disclaimer below. -// ii. Redistributions in binary form must reproduce the above copyright notice, -// this list of conditions and the disclaimer (as noted below) in the documentation and/or +// ii. Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the disclaimer (as noted below) in the documentation and/or // other materials provided with the distribution. -// iii. Neither the name of the LLNS/LLNL nor the names of its contributors may be used to -// endorse or promote products derived from this software without specific prior written +// iii. Neither the name of the LLNS/LLNL nor the names of its contributors may be used to +// endorse or promote products derived from this software without specific prior written // permission. // -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES -// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT -// SHALL LAWRENCE LIVERMORE NATIONAL SECURITY, LLC, THE U.S. DEPARTMENT OF ENERGY OR -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS -// OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED -// AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT +// SHALL LAWRENCE LIVERMORE NATIONAL SECURITY, LLC, THE U.S. DEPARTMENT OF ENERGY OR +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +// OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED +// AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, // EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Additional BSD Notice -// 1. This notice is required to be provided under our contract with the U.S. Department -// of Energy (DOE). This work was produced at Lawrence Livermore National Laboratory under +// 1. This notice is required to be provided under our contract with the U.S. Department +// of Energy (DOE). This work was produced at Lawrence Livermore National Laboratory under // Contract No. DE-AC52-07NA27344 with the DOE. -// 2. Neither the United States Government nor Lawrence Livermore National Security, LLC -// nor any of their employees, makes any warranty, express or implied, or assumes any -// liability or responsibility for the accuracy, completeness, or usefulness of any +// 2. Neither the United States Government nor Lawrence Livermore National Security, LLC +// nor any of their employees, makes any warranty, express or implied, or assumes any +// liability or responsibility for the accuracy, completeness, or usefulness of any // information, apparatus, product, or process disclosed, or represents that its use would // not infringe privately-owned rights. -// 3. Also, reference herein to any specific commercial products, process, or services by -// trade name, trademark, manufacturer or otherwise does not necessarily constitute or -// imply its endorsement, recommendation, or favoring by the United States Government or -// Lawrence Livermore National Security, LLC. The views and opinions of authors expressed -// herein do not necessarily state or reflect those of the United States Government or -// Lawrence Livermore National Security, LLC, and shall not be used for advertising or +// 3. Also, reference herein to any specific commercial products, process, or services by +// trade name, trademark, manufacturer or otherwise does not necessarily constitute or +// imply its endorsement, recommendation, or favoring by the United States Government or +// Lawrence Livermore National Security, LLC. The views and opinions of authors expressed +// herein do not necessarily state or reflect those of the United States Government or +// Lawrence Livermore National Security, LLC, and shall not be used for advertising or // product endorsement purposes. /** @@ -52,7 +52,7 @@ * @author Cosmin G. Petra , LLNL * */ - + #ifndef HIOP_DUALSUPDATER #define HIOP_DUALSUPDATER @@ -68,22 +68,23 @@ namespace hiop class hiopDualsUpdater { public: - hiopDualsUpdater(hiopNlpFormulation* nlp) : nlp_(nlp) {}; + hiopDualsUpdater(hiopNlpFormulation* nlp) + : nlp_(nlp) {}; virtual ~hiopDualsUpdater() {}; - /* The method is called after each iteration to update the duals. Implementations for different - * multiplier updating strategies are provided by child classes + /* The method is called after each iteration to update the duals. Implementations for different + * multiplier updating strategies are provided by child classes * - linear (Newton) update in hiopDualsNewtonLinearUpdate * - lsq in hiopDualsLsqUpdate * The parameters are: * - iter: incumbent iterate that is going to be updated with iter_plus by the caller of this method. - * - iter_plus: [in/out] on return the duals should be updated; primals are already updated, but + * - iter_plus: [in/out] on return the duals should be updated; primals are already updated, but * the function should not rely on this. If a particular implementation of this method requires - * accessing primals, it should do so by working with 'iter'. In the algorithm class, iter_plus + * accessing primals, it should do so by working with 'iter'. In the algorithm class, iter_plus * corresponds to 'iter_trial'. - * - f,c,d: fcn evals at iter_plus + * - f,c,d: fcn evals at iter_plus * - grad_f, jac_c, jac_d: derivatives at iter_plus - * - search_dir: search direction (already used to update primals, potentially to be used to + * - search_dir: search direction (already used to update primals, potentially to be used to * update duals (in linear update)) * - alpha_primal: step taken for primals (also taken for eq. duals for the linear Newton duals update) * - alpha_dual: max step for the duals based on the fraction-to-the-boundary rule (not used @@ -102,15 +103,17 @@ class hiopDualsUpdater const double& alpha_dual, const double& mu, const double& kappa_sigma, - const double& infeas_nrm_trial)=0; + const double& infeas_nrm_trial) = 0; + protected: hiopNlpFormulation* nlp_; -protected: + +protected: hiopDualsUpdater() {}; + private: hiopDualsUpdater(const hiopDualsUpdater&) {}; - void operator=(const hiopDualsUpdater&) {}; - + void operator=(const hiopDualsUpdater&) {}; }; class hiopDualsLsqUpdate : public hiopDualsUpdater @@ -136,14 +139,11 @@ class hiopDualsLsqUpdate : public hiopDualsUpdater const double& infeas_nrm_trial); /** LSQ update of the constraints duals (yc and yd). Source file describe the math. */ - virtual bool go(hiopIterate& it_ini, - const hiopVector& grad_f, - const hiopMatrix& jac_c, - const hiopMatrix& jac_d) + virtual bool go(hiopIterate& it_ini, const hiopVector& grad_f, const hiopMatrix& jac_c, const hiopMatrix& jac_d) { bool bret = init_for_duals_update(it_ini, grad_f, jac_c, jac_d); if(!bret) { - //non-fatal + // non-fatal nlp_->log->printf(hovScalars, "LSQ Duals update error in initialization.\n"); } bret = do_lsq_update(it_ini, grad_f, jac_c, jac_d); @@ -158,18 +158,18 @@ class hiopDualsLsqUpdate : public hiopDualsUpdater { bool bret = init_for_ini_duals_comp(it_ini, grad_f, jac_c, jac_d); if(!bret) { - //non-fatal + // non-fatal nlp_->log->printf(hovScalars, "Initial Duals error in initialization.\n"); return false; } - + bret = do_lsq_update(it_ini, grad_f, jac_c, jac_d); - + double ycnrm = it_ini.get_yc()->infnorm(); double ydnrm = it_ini.get_yd()->infnorm(); double ynrm = (ycnrm > ydnrm) ? ycnrm : ydnrm; - // do not use the LSQ duals if their norm is greater than 'duals_lsq_ini_max'; instead, + // do not use the LSQ duals if their norm is greater than 'duals_lsq_ini_max'; instead, double lsq_dual_init_max = nlp_->options->GetNumeric("duals_lsq_ini_max"); if(ynrm > lsq_dual_init_max || !bret) { it_ini.get_yc()->setToZero(); @@ -184,13 +184,14 @@ class hiopDualsLsqUpdate : public hiopDualsUpdater } return bret; } + protected: /// Helper method doing the work for both `go` and `compute_initial_duals_eq` virtual bool do_lsq_update(hiopIterate& it, const hiopVector& grad_f, const hiopMatrix& jac_c, const hiopMatrix& jac_d) = 0; - + /// Performs internal (re)initializations related to computations of initial duals that are needed by `do_lsq_update` virtual bool init_for_ini_duals_comp(hiopIterate& it, const hiopVector& grad_f, @@ -199,7 +200,7 @@ class hiopDualsLsqUpdate : public hiopDualsUpdater { return true; } - + /// Performs internal (re)initializations related to computations LSQ-based duals that are needed by `do_lsq_update` virtual bool init_for_duals_update(hiopIterate& it, const hiopVector& grad_f, @@ -208,17 +209,18 @@ class hiopDualsLsqUpdate : public hiopDualsUpdater { return true; } + protected: hiopVector *rhs_, *rhsc_, *rhsd_; hiopVector *vec_n_, *vec_mi_; -private: +private: hiopDualsLsqUpdate() {}; hiopDualsLsqUpdate(const hiopDualsLsqUpdate&) {}; - void operator=(const hiopDualsLsqUpdate&) {}; + void operator=(const hiopDualsLsqUpdate&) {}; }; -/** Given xk, zk_l, zk_u, vk_l, and vk_u (contained in 'iter'), this method solves an LSQ problem +/** Given xk, zk_l, zk_u, vk_l, and vk_u (contained in 'iter'), this method solves an LSQ problem * corresponding to dual infeasibility equation * min_{y_c,y_d} || \nabla f(xk) + J^T_c(xk) y_c + J_d^T(xk) y_d - zk_l+zk_u ||^2 * || - y_d - vk_l + vk_u ||_2, @@ -226,29 +228,29 @@ class hiopDualsLsqUpdate : public hiopDualsUpdater * min_{y_c, y_d} || [ J_c^T J_d^T ] [ y_c ] - [ -\nabla f(xk) + zk_l-zk_u ] ||^2 * || [ 0 I ] [ y_d ] [ - vk_l + vk_u ] ||_2 * ****************************** - * NLPs with dense constraints + * NLPs with dense constraints * ****************************** - * For NLPs with dense constraints, the above LSQ problem is solved by solving the linear + * For NLPs with dense constraints, the above LSQ problem is solved by solving the linear * system in y_c and y_d: - * [ J_c J_c^T J_c J_d^T ] [ y_c ] = [ J_c 0 ] [ -\nabla f(xk) + zk_l-zk_u ] + * [ J_c J_c^T J_c J_d^T ] [ y_c ] = [ J_c 0 ] [ -\nabla f(xk) + zk_l-zk_u ] * [ J_d J_c^T J_d J_d^T + I ] [ y_d ] [ J_d I ] [ - vk_l + vk_u ] * This linear system is small (of size m=m_E+m_I) (so it is replicated for all MPI ranks). - * + * * The matrix of the above system is stored in the member variable M_ of this class and the * right-hand side in 'rhs_'. - * + * * ************** * MDS NLPs * ************** - * For MDS NLPs, the linear system exploits the block structure of the Jacobians Jc and Jd. + * For MDS NLPs, the linear system exploits the block structure of the Jacobians Jc and Jd. * Namely, since Jc = [Jxdc Jxsc] and Jd = [Jxdd Jxsd], the following * dense linear system is to be solved for y_c and y_d * * [ Jxdc Jxdc^T + Jxsc Jxsc^T Jxdc Jxdd^T + Jxsc Jxsd^T ] [ y_c ] = same rhs as * [ Jxdd Jxdc^T + Jxsd Jxsc^T Jxdd Jxdd^T + Jxsd Jxsd^T + I ] [ y_d ] above - * - * The above linear systems are solved as dense linear systems using Cholesky factorization - * of LAPACK or MAGMA. + * + * The above linear systems are solved as dense linear systems using Cholesky factorization + * of LAPACK or MAGMA. * */ class hiopDualsLsqUpdateLinsysRedDense : public hiopDualsLsqUpdate @@ -256,13 +258,12 @@ class hiopDualsLsqUpdateLinsysRedDense : public hiopDualsLsqUpdate public: hiopDualsLsqUpdateLinsysRedDense(hiopNlpFormulation* nlp); virtual ~hiopDualsLsqUpdateLinsysRedDense(); + private: - virtual bool do_lsq_update(hiopIterate& it, - const hiopVector& grad_f, - const hiopMatrix& jac_c, - const hiopMatrix& jac_d); + virtual bool do_lsq_update(hiopIterate& it, const hiopVector& grad_f, const hiopMatrix& jac_c, const hiopMatrix& jac_d); + protected: - //not part of hiopDualsLsqUpdate but overridden in child classes + // not part of hiopDualsLsqUpdate but overridden in child classes /* Returns reference to the underlying system matrix, which is maintained / allocated differently * by child classes @@ -272,15 +273,16 @@ class hiopDualsLsqUpdateLinsysRedDense : public hiopDualsLsqUpdate /// Factorizes the LSQ matrix and returns true if successfull, otherwise returns false virtual bool factorize_mat() = 0; - /* Performs triangular solves based on the factorize matrix and returns true if successfull, + /* Performs triangular solves based on the factorize matrix and returns true if successfull, * otherwise returns false */ virtual bool solve_with_factors(hiopVector& r) = 0; + private: hiopMatrix *mexme_, *mexmi_, *mixmi_, *mxm_; #ifdef HIOP_DEEPCHECKS hiopMatrix* M_copy_; - hiopVector *rhs_copy_; + hiopVector* rhs_copy_; hiopMatrix* mixme_; #endif }; @@ -292,21 +294,16 @@ class hiopDualsLsqUpdateLinsysRedDenseSym : public hiopDualsLsqUpdateLinsysRedDe { public: hiopDualsLsqUpdateLinsysRedDenseSym(hiopNlpFormulation* nlp); - - virtual ~hiopDualsLsqUpdateLinsysRedDenseSym() - { - delete linsys_; - } + + virtual ~hiopDualsLsqUpdateLinsysRedDenseSym() { delete linsys_; } protected: /// Returns reference to the underlying system matrix, which is maintained by the linear solver - hiopMatrixDense* get_lsq_sysmatrix() - { - return &linsys_->sysMatrix(); - } - + hiopMatrixDense* get_lsq_sysmatrix() { return &linsys_->sysMatrix(); } + bool factorize_mat(); bool solve_with_factors(hiopVector& r); + protected: hiopLinSolverSymDense* linsys_; }; @@ -317,34 +314,29 @@ class hiopDualsLsqUpdateLinsysRedDenseSymPD : public hiopDualsLsqUpdateLinsysRed { public: hiopDualsLsqUpdateLinsysRedDenseSymPD(hiopNlpFormulation* nlp) - : hiopDualsLsqUpdateLinsysRedDense(nlp) + : hiopDualsLsqUpdateLinsysRedDense(nlp) { - M_ = LinearAlgebraFactory::create_matrix_dense(nlp->options->GetString("mem_space"), nlp_->m(), nlp_->m()); - } - - virtual ~hiopDualsLsqUpdateLinsysRedDenseSymPD() - { - delete M_; + M_ = LinearAlgebraFactory::create_matrix_dense(nlp->options->GetString("mem_space"), nlp_->m(), nlp_->m()); } + virtual ~hiopDualsLsqUpdateLinsysRedDenseSymPD() { delete M_; } + protected: /// Returns reference to the underlying system matrix, which is maintained by the linear solver - hiopMatrixDense* get_lsq_sysmatrix() - { - return M_; - } - + hiopMatrixDense* get_lsq_sysmatrix() { return M_; } + bool factorize_mat(); bool solve_with_factors(hiopVector& r); + protected: hiopMatrixDense* M_; }; /** * @brief LSQ-based initialization for sparse linear algebra (NLPs with sparse Jac/Hes) - * + * * With the same notation used above for hiopDualsLsqUpdateLinsysRedDense class, - * for sparse NLPs, the corresponding LSQ problem is the following augmented + * for sparse NLPs, the corresponding LSQ problem is the following augmented * linear system: * [ I 0 Jc^T Jd^T ] [ dx] [ \nabla f(xk) - zk_l + zk_u ] * [ 0 I 0 -I ] [ dd] [ -vk_l + vk_u ] @@ -359,12 +351,10 @@ class hiopDualsLsqUpdateLinsysAugSparse : public hiopDualsLsqUpdate public: hiopDualsLsqUpdateLinsysAugSparse(hiopNlpFormulation* nlp); virtual ~hiopDualsLsqUpdateLinsysAugSparse(); + private: - virtual bool do_lsq_update(hiopIterate& iter, - const hiopVector& grad_f, - const hiopMatrix& jac_c, - const hiopMatrix& jac_d); - + virtual bool do_lsq_update(hiopIterate& iter, const hiopVector& grad_f, const hiopMatrix& jac_c, const hiopMatrix& jac_d); + /// Performs internal (re)initializations related to computations of initial duals that are needed by `do_lsq_update` virtual bool init_for_ini_duals_comp(hiopIterate& iter, const hiopVector& grad_f, @@ -373,7 +363,7 @@ class hiopDualsLsqUpdateLinsysAugSparse : public hiopDualsLsqUpdate { return instantiate_linear_solver("duals_init_linear_solver_sparse", iter, grad_f, jac_c, jac_d); } - + /// Performs internal (re)initializations related to computations LSQ-based duals that are needed by `do_lsq_update` virtual bool init_for_duals_update(hiopIterate& iter, const hiopVector& grad_f, @@ -389,19 +379,20 @@ class hiopDualsLsqUpdateLinsysAugSparse : public hiopDualsLsqUpdate const hiopVector& grad_f, const hiopMatrix& jac_c, const hiopMatrix& jac_d); + private: hiopLinSolver* lin_sys_; }; - -/** +/** * Performs Newton update for the duals, which is a simple linear update along the dual Newton direction * with a given dual step. - */ + */ class hiopDualsNewtonLinearUpdate : public hiopDualsUpdater { public: - hiopDualsNewtonLinearUpdate(hiopNlpFormulation* nlp) : hiopDualsUpdater(nlp) {}; + hiopDualsNewtonLinearUpdate(hiopNlpFormulation* nlp) + : hiopDualsUpdater(nlp) {}; virtual ~hiopDualsNewtonLinearUpdate() {}; /* Linear update of step length alpha_primal in eq. duals yc and yd and step length @@ -422,20 +413,19 @@ class hiopDualsNewtonLinearUpdate : public hiopDualsUpdater const double& mu, const double& kappa_sigma, const double& infeas_nrm_trial) - { + { if(!iter_plus.takeStep_duals(iter, search_dir, alpha_primal, alpha_dual)) { nlp_->log->printf(hovError, "dual Newton updater: error in standard update of the duals"); return false; } - return iter_plus.adjustDuals_primalLogHessian(mu,kappa_sigma); + return iter_plus.adjustDuals_primalLogHessian(mu, kappa_sigma); } -private: +private: hiopDualsNewtonLinearUpdate() {}; hiopDualsNewtonLinearUpdate(const hiopDualsNewtonLinearUpdate&) {}; - void operator=(const hiopDualsNewtonLinearUpdate&) {}; - + void operator=(const hiopDualsNewtonLinearUpdate&) {}; }; -} +} // namespace hiop #endif diff --git a/src/Optimization/hiopFRProb.cpp b/src/Optimization/hiopFRProb.cpp index d9039ccd7..6c2ef7dc3 100644 --- a/src/Optimization/hiopFRProb.cpp +++ b/src/Optimization/hiopFRProb.cpp @@ -57,27 +57,27 @@ #include "hiopVector.hpp" #include -#include //for memcpy +#include //for memcpy #include #include namespace hiop { -/* -* Specialized interface for feasibility restoration problem with sparse blocks in the Jacobian and Hessian. -*/ +/* + * Specialized interface for feasibility restoration problem with sparse blocks in the Jacobian and Hessian. + */ hiopFRProbSparse::hiopFRProbSparse(hiopAlgFilterIPMBase& solver_base) - : solver_base_(solver_base), - last_x_{nullptr}, - last_d_{nullptr} + : solver_base_(solver_base), + last_x_{nullptr}, + last_d_{nullptr} { nlp_base_ = dynamic_cast(solver_base.get_nlp()); n_x_ = nlp_base_->n(); m_eq_ = nlp_base_->m_eq(); m_ineq_ = nlp_base_->m_ineq(); - n_ = n_x_ + 2*m_eq_ + 2*m_ineq_; + n_ = n_x_ + 2 * m_eq_ + 2 * m_ineq_; m_ = m_eq_ + m_ineq_; pe_st_ = n_x_; @@ -108,28 +108,26 @@ hiopFRProbSparse::hiopFRProbSparse(hiopAlgFilterIPMBase& solver_base) // nnz for sparse matrices; nnz_Jac_c_ = nlp_base_->get_nnz_Jaceq() + 2 * m_eq_; nnz_Jac_d_ = nlp_base_->get_nnz_Jacineq() + 2 * m_ineq_; - + // not sure if Hess has diagonal terms, compute nnz_hess here // assuming hess is in upper_triangular form hiopMatrixSparse* Hess_base = dynamic_cast(solver_base_.get_Hess_Lagr()); nnz_Hess_Lag_ = n_x_ + Hess_base->numberOfOffDiagNonzeros(); - + Jac_cd_ = LinearAlgebraFactory::create_matrix_sparse(nlp_base_->options->GetString("mem_space"), m_, n_, nnz_Jac_c_ + nnz_Jac_d_); - Hess_cd_ = LinearAlgebraFactory::create_matrix_sym_sparse(nlp_base_->options->GetString("mem_space"), - n_, - nnz_Hess_Lag_); - + Hess_cd_ = LinearAlgebraFactory::create_matrix_sym_sparse(nlp_base_->options->GetString("mem_space"), n_, nnz_Hess_Lag_); + // set mu0 to be the maximun of the current barrier parameter mu and norm_inf(|c|)*/ - theta_ref_ = solver_base_.get_resid()->get_theta(); //at current point, i.e., reference point + theta_ref_ = solver_base_.get_resid()->get_theta(); // at current point, i.e., reference point nrmInf_feas_ref_ = solver_base_.get_resid()->get_nrmInf_bar_feasib(); mu_ = solver_base.get_mu(); mu_ = std::max(mu_, nrmInf_feas_ref_); zeta_ = std::sqrt(mu_); - rho_ = 1000; // FIXME: make this as an user option + rho_ = 1000; // FIXME: make this as an user option } hiopFRProbSparse::~hiopFRProbSparse() @@ -154,8 +152,8 @@ hiopFRProbSparse::~hiopFRProbSparse() } } -bool hiopFRProbSparse::get_MPI_comm(MPI_Comm& comm_out) -{ +bool hiopFRProbSparse::get_MPI_comm(MPI_Comm& comm_out) +{ #ifdef HIOP_USE_MPI comm_out = nlp_base_->get_comm(); #else @@ -180,7 +178,7 @@ bool hiopFRProbSparse::get_prob_info(NonlinearityType& type) return true; } -bool hiopFRProbSparse::get_vars_info(const size_type& n, double *xlow, double* xupp, NonlinearityType* type) +bool hiopFRProbSparse::get_vars_info(const size_type& n, double* xlow, double* xupp, NonlinearityType* type) { assert(n == n_); @@ -190,11 +188,11 @@ bool hiopFRProbSparse::get_vars_info(const size_type& n, double *xlow, double* x // x, p and n wrk_primal_->setToConstant(0.0); - xl.copyToStarting(*wrk_primal_,0); + xl.copyToStarting(*wrk_primal_, 0); wrk_primal_->copyTo(xlow); wrk_primal_->setToConstant(1e+20); - xu.copyToStarting(*wrk_primal_,0); + xu.copyToStarting(*wrk_primal_, 0); wrk_primal_->copyTo(xupp); wrk_primal_->set_array_from_to(type, 0, n_x_, var_type, 0); @@ -247,9 +245,9 @@ bool hiopFRProbSparse::eval_f(const size_type& n, const double* x, bool new_x, d assert(n == n_); obj_value = 0.; - wrk_primal_->copy_from_starting_at(x, 0, n_); // [x pe ne pi ni] - wrk_x_->copy_from_starting_at(x, 0, n_x_); // [x] - + wrk_primal_->copy_from_starting_at(x, 0, n_); // [x pe ne pi ni] + wrk_x_->copy_from_starting_at(x, 0, n_x_); // [x] + // rho*sum(p+n) obj_value += rho_ * (wrk_primal_->sum_local() - wrk_x_->sum_local()); @@ -261,7 +259,7 @@ bool hiopFRProbSparse::eval_f(const size_type& n, const double* x, bool new_x, d obj_value += 0.5 * zeta_ * wrk_db * wrk_db; // keep a copy of the original objective value - nlp_base_->eval_f(*wrk_x_, new_x, obj_base_); + nlp_base_->eval_f(*wrk_x_, new_x, obj_base_); return true; } @@ -279,7 +277,7 @@ bool hiopFRProbSparse::eval_grad_f(const size_type& n, const double* x, bool new wrk_x_->componentMult(*DR_); wrk_x_->componentMult(*DR_); wrk_x_->scale(zeta_); - wrk_x_->copyToStarting(*wrk_primal_,0); + wrk_x_->copyToStarting(*wrk_primal_, 0); wrk_primal_->copyTo(gradf); @@ -297,11 +295,7 @@ bool hiopFRProbSparse::eval_cons(const size_type& n, return false; } -bool hiopFRProbSparse::eval_cons(const size_type& n, - const size_type& m, - const double* x, - bool new_x, - double* cons) +bool hiopFRProbSparse::eval_cons(const size_type& n, const size_type& m, const double* x, bool new_x, double* cons) { assert(n == n_); assert(m == m_); @@ -311,15 +305,15 @@ bool hiopFRProbSparse::eval_cons(const size_type& n, nlp_base_->eval_c_d(*wrk_x_, new_x, *wrk_c_, *wrk_d_); // compute FR equality constratint body c-pe+ne - wrk_eq_->copy_from_starting_at(x, pe_st_, m_eq_); //pe + wrk_eq_->copy_from_starting_at(x, pe_st_, m_eq_); // pe wrk_c_->axpy(-1.0, *wrk_eq_); - wrk_eq_->copy_from_starting_at(x, ne_st_, m_eq_); //ne + wrk_eq_->copy_from_starting_at(x, ne_st_, m_eq_); // ne wrk_c_->axpy(1.0, *wrk_eq_); // compute FR inequality constratint body d-pi+ni - wrk_ineq_->copy_from_starting_at(x, pi_st_, m_ineq_); //pi + wrk_ineq_->copy_from_starting_at(x, pi_st_, m_ineq_); // pi wrk_d_->axpy(-1.0, *wrk_ineq_); - wrk_ineq_->copy_from_starting_at(x, ni_st_, m_ineq_); //ni + wrk_ineq_->copy_from_starting_at(x, ni_st_, m_ineq_); // ni wrk_d_->axpy(1.0, *wrk_ineq_); // assemble the full vector @@ -331,7 +325,8 @@ bool hiopFRProbSparse::eval_cons(const size_type& n, return true; } -bool hiopFRProbSparse::eval_Jac_cons(const size_type& n, const size_type& m, +bool hiopFRProbSparse::eval_Jac_cons(const size_type& n, + const size_type& m, const size_type& num_cons, const index_type* idx_cons, const double* x, @@ -354,8 +349,8 @@ bool hiopFRProbSparse::eval_Jac_cons(const size_type& n, int* jJacS, double* MJacS) { - assert( n == n_); - assert( m == m_); + assert(n == n_); + assert(m == m_); assert(nnzJacS == nlp_base_->get_nnz_Jaceq() + nlp_base_->get_nnz_Jacineq() + 2 * (m_)); @@ -368,11 +363,11 @@ bool hiopFRProbSparse::eval_Jac_cons(const size_type& n, wrk_x_->copy_from_starting_at(x, 0, n_x_); // get Jac_c and Jac_d for the x part --- use original Jac_c/Jac_d as buffers - nlp_base_->eval_Jac_c_d(*wrk_x_, new_x, Jac_c, Jac_d); + nlp_base_->eval_Jac_c_d(*wrk_x_, new_x, Jac_c, Jac_d); } Jac_cd_->set_Jac_FR(Jac_c, Jac_d, iJacS, jJacS, MJacS); - + return true; } @@ -404,7 +399,7 @@ bool hiopFRProbSparse::eval_Hess_Lagr(const size_type& n, double obj_factor = 0.0; // get Hess for the x part --- use original Hess as buffers nlp_base_->eval_Hess_Lagr(*wrk_x_, new_x, obj_factor, *wrk_eq_, *wrk_ineq_, new_lambda, Hess); - + // additional diag Hess for x: zeta*DR^2 wrk_x_->setToConstant(zeta_); wrk_x_->componentMult(*DR_); @@ -413,22 +408,22 @@ bool hiopFRProbSparse::eval_Hess_Lagr(const size_type& n, // extend Hes to the p and n parts Hess_cd_->set_Hess_FR(Hess, iHSS, jHSS, MHSS, *wrk_x_); - + return true; } bool hiopFRProbSparse::get_warmstart_point(const size_type& n, const size_type& m, double* x0, - double* z_bndL0, + double* z_bndL0, double* z_bndU0, double* lambda0, double* ineq_slack, double* vl0, - double* vu0 ) + double* vu0) { - assert( n == n_); - assert( m == m_); + assert(n == n_); + assert(m == m_); hiopVector* c = solver_base_.get_c(); hiopVector* d = solver_base_.get_d(); @@ -446,14 +441,14 @@ bool hiopFRProbSparse::get_warmstart_point(const size_type& n, s->copyTo(ineq_slack); /* - * compute pe (wrk_c_) and ne (wrk_eq_) rom equation (33) - */ + * compute pe (wrk_c_) and ne (wrk_eq_) rom equation (33) + */ // firstly use pe as a temp vec - double tmp_db = mu_/(2*rho_); + double tmp_db = mu_ / (2 * rho_); wrk_cbody_->copyFrom(*c); - wrk_cbody_->axpy(-1.0, crhs); // wrk_cbody_ = (c-crhs) + wrk_cbody_->axpy(-1.0, crhs); // wrk_cbody_ = (c-crhs) wrk_c_->setToConstant(tmp_db); - wrk_c_->axpy(-0.5, *wrk_cbody_); // wrk_c_ = (mu-rho*(c-crhs))/(2*rho) + wrk_c_->axpy(-0.5, *wrk_cbody_); // wrk_c_ = (mu-rho*(c-crhs))/(2*rho) // compute ne (wrk_eq_) wrk_eq_->copyFrom(*wrk_c_); @@ -467,13 +462,13 @@ bool hiopFRProbSparse::get_warmstart_point(const size_type& n, wrk_c_->axpy(1.0, *wrk_eq_); /* - * compute pi (wrk_d_) and ni (wrk_ineq_) rom equation (33) - */ + * compute pi (wrk_d_) and ni (wrk_ineq_) rom equation (33) + */ // firstly use pi as a temp vec wrk_dbody_->copyFrom(*d); - wrk_dbody_->axpy(-1.0, *s); // wrk_dbody_ = (d-s) + wrk_dbody_->axpy(-1.0, *s); // wrk_dbody_ = (d-s) wrk_d_->setToConstant(tmp_db); - wrk_d_->axpy(-0.5, *wrk_dbody_); // wrk_c_ = (mu-rho*(d-s))/(2*rho) + wrk_d_->axpy(-0.5, *wrk_dbody_); // wrk_c_ = (mu-rho*(d-s))/(2*rho) // compute ni (wrk_ineq_) wrk_ineq_->copyFrom(*wrk_d_); @@ -487,13 +482,13 @@ bool hiopFRProbSparse::get_warmstart_point(const size_type& n, wrk_d_->axpy(1.0, *wrk_ineq_); /* - * assemble x0 - */ + * assemble x0 + */ wrk_x_->copyToStarting(*wrk_primal_, 0); - wrk_c_->copyToStarting(*wrk_primal_, n_x_); // pe - wrk_eq_->copyToStarting(*wrk_primal_, n_x_ + m_eq_); // ne - wrk_d_->copyToStarting(*wrk_primal_, n_x_ + 2*m_eq_); // pi - wrk_ineq_->copyToStarting(*wrk_primal_, n_x_ + 2*m_eq_ + m_ineq_); // ni + wrk_c_->copyToStarting(*wrk_primal_, n_x_); // pe + wrk_eq_->copyToStarting(*wrk_primal_, n_x_ + m_eq_); // ne + wrk_d_->copyToStarting(*wrk_primal_, n_x_ + 2 * m_eq_); // pi + wrk_ineq_->copyToStarting(*wrk_primal_, n_x_ + 2 * m_eq_ + m_ineq_); // ni wrk_primal_->copyTo(x0); @@ -514,10 +509,10 @@ bool hiopFRProbSparse::get_warmstart_point(const size_type& n, // assemble zl wrk_x_->copyToStarting(*wrk_primal_, 0); - wrk_c_->copyToStarting(*wrk_primal_, n_x_); // pe - wrk_eq_->copyToStarting(*wrk_primal_, n_x_ + m_eq_); // ne - wrk_d_->copyToStarting(*wrk_primal_, n_x_ + 2*m_eq_); // pi - wrk_ineq_->copyToStarting(*wrk_primal_, n_x_ + 2*m_eq_ + m_ineq_); // ni + wrk_c_->copyToStarting(*wrk_primal_, n_x_); // pe + wrk_eq_->copyToStarting(*wrk_primal_, n_x_ + m_eq_); // ne + wrk_d_->copyToStarting(*wrk_primal_, n_x_ + 2 * m_eq_); // pi + wrk_ineq_->copyToStarting(*wrk_primal_, n_x_ + 2 * m_eq_ + m_ineq_); // ni wrk_primal_->copyTo(z_bndL0); // get zu @@ -574,8 +569,8 @@ bool hiopFRProbSparse::iterate_callback(int iter, nlp_base_->eval_c_d(*wrk_x_, true, *wrk_cbody_, *wrk_dbody_); // compute theta for base problem - wrk_cbody_->axpy(-1.0, crhs); // wrk_cbody_ = (c-crhs) - wrk_dbody_->axpy(-1.0, *wrk_d_); // wrk_dbody_ = (d-s) + wrk_cbody_->axpy(-1.0, crhs); // wrk_cbody_ = (c-crhs) + wrk_dbody_->axpy(-1.0, *wrk_d_); // wrk_dbody_ = (d-s) double theta_ori = 0.0; theta_ori += wrk_cbody_->onenorm(); @@ -588,25 +583,26 @@ bool hiopFRProbSparse::iterate_callback(int iter, double max_nrmInf_feas = nlp_base_->options->GetNumeric("kappa_resto") * nrmInf_feas_ref_; // termination condition 1) theta_curr <= kappa_resto*theta_ref - if(nrmInf_feas_ori <= max_nrmInf_feas && iter>0) { + if(nrmInf_feas_ori <= max_nrmInf_feas && iter > 0) { // termination condition 2) (theta and logbar) are not in the original filter // check (original) filter condition // compute the original logbar objective from the trial point given by the FR problem // Note that this function will updates the slack and dual variables - - // set original trial (x,d) to the soltion from FR problem + + // set original trial (x,d) to the soltion from FR problem hiopIterate* it_base_trial = solver_base_.get_it_trial_nonconst(); - const hiopIterate* it_base_curr = solver_base_.get_it_curr(); + const hiopIterate* it_base_curr = solver_base_.get_it_curr(); it_base_trial->get_x()->copyFrom(*wrk_x_); it_base_trial->get_d()->copyFrom(*wrk_d_); // compute other slacks in the base problem - [[maybe_unused]] const size_type n_adjusted_slacks = it_base_trial->compute_safe_slacks(*it_base_curr, solver_base_.get_mu()); + [[maybe_unused]] const size_type n_adjusted_slacks = + it_base_trial->compute_safe_slacks(*it_base_curr, solver_base_.get_mu()); - // evaluate base problem log barr + // evaluate base problem log barr solver_base_.get_logbar()->updateWithNlpInfo_trial_funcOnly(*it_base_trial, obj_base_, *wrk_cbody_, *wrk_dbody_); - + double trial_bar_obj_ori = solver_base_.get_logbar()->f_logbar_trial; if(!solver_base_.filter_contains(theta_ori, trial_bar_obj_ori)) { @@ -626,7 +622,7 @@ bool hiopFRProbSparse::iterate_callback(int iter, bool hiopFRProbSparse::force_update_x(const int n, double* x) { // this function is used in FR in FR, see eq (33) - assert( n == n_); + assert(n == n_); hiopVector* c = solver_base_.get_c(); hiopVector* d = solver_base_.get_d(); @@ -637,14 +633,14 @@ bool hiopFRProbSparse::force_update_x(const int n, double* x) wrk_x_->copy_from_starting_at(x, 0, n_x_); /* - * compute pe (wrk_c_) and ne (wrk_eq_) rom equation (33) - */ + * compute pe (wrk_c_) and ne (wrk_eq_) rom equation (33) + */ // firstly use pe as a temp vec - double tmp_db = mu_/(2*rho_); + double tmp_db = mu_ / (2 * rho_); wrk_cbody_->copyFrom(*c); - wrk_cbody_->axpy(-1.0, crhs); // wrk_cbody_ = (c-crhs) + wrk_cbody_->axpy(-1.0, crhs); // wrk_cbody_ = (c-crhs) wrk_c_->setToConstant(tmp_db); - wrk_c_->axpy(-0.5, *wrk_cbody_); // wrk_c_ = (mu-rho*(c-crhs))/(2*rho) + wrk_c_->axpy(-0.5, *wrk_cbody_); // wrk_c_ = (mu-rho*(c-crhs))/(2*rho) // compute ne (wrk_eq_) wrk_eq_->copyFrom(*wrk_c_); @@ -658,13 +654,13 @@ bool hiopFRProbSparse::force_update_x(const int n, double* x) wrk_c_->axpy(1.0, *wrk_eq_); /* - * compute pi (wrk_d_) and ni (wrk_ineq_) rom equation (33) - */ + * compute pi (wrk_d_) and ni (wrk_ineq_) rom equation (33) + */ // firstly use pi as a temp vec wrk_dbody_->copyFrom(*d); - wrk_dbody_->axpy(-1.0, *s); // wrk_dbody_ = (d-s) + wrk_dbody_->axpy(-1.0, *s); // wrk_dbody_ = (d-s) wrk_d_->setToConstant(tmp_db); - wrk_d_->axpy(-0.5, *wrk_dbody_); // wrk_c_ = (mu-rho*(d-s))/(2*rho) + wrk_d_->axpy(-0.5, *wrk_dbody_); // wrk_c_ = (mu-rho*(d-s))/(2*rho) // compute ni (wrk_ineq_) wrk_ineq_->copyFrom(*wrk_d_); @@ -678,31 +674,26 @@ bool hiopFRProbSparse::force_update_x(const int n, double* x) wrk_d_->axpy(1.0, *wrk_ineq_); /* - * assemble x = [x pe ne pi ni - */ + * assemble x = [x pe ne pi ni + */ wrk_x_->copyToStarting(*wrk_primal_, 0); wrk_c_->copyToStarting(*wrk_primal_, n_x_); wrk_eq_->copyToStarting(*wrk_primal_, n_x_ + m_eq_); - wrk_d_->copyToStarting(*wrk_primal_, n_x_ + 2*m_eq_); - wrk_ineq_->copyToStarting(*wrk_primal_, n_x_ + 2*m_eq_ + m_ineq_); + wrk_d_->copyToStarting(*wrk_primal_, n_x_ + 2 * m_eq_); + wrk_ineq_->copyToStarting(*wrk_primal_, n_x_ + 2 * m_eq_ + m_ineq_); wrk_primal_->copyTo(x); return true; } - - - - - -/* -* Specialized interface for feasibility restoration problem with MDS blocks in the Jacobian and Hessian. -*/ +/* + * Specialized interface for feasibility restoration problem with MDS blocks in the Jacobian and Hessian. + */ hiopFRProbMDS::hiopFRProbMDS(hiopAlgFilterIPMBase& solver_base) - : solver_base_(solver_base), - last_x_{nullptr}, - last_d_{nullptr} + : solver_base_(solver_base), + last_x_{nullptr}, + last_d_{nullptr} { nlp_base_ = dynamic_cast(solver_base.get_nlp()); n_x_ = nlp_base_->n(); @@ -711,8 +702,8 @@ hiopFRProbMDS::hiopFRProbMDS(hiopAlgFilterIPMBase& solver_base) m_eq_ = nlp_base_->m_eq(); m_ineq_ = nlp_base_->m_ineq(); - n_ = n_x_ + 2*m_eq_ + 2*m_ineq_; - n_sp_ = n_x_sp_ + 2*m_eq_ + 2*m_ineq_; + n_ = n_x_ + 2 * m_eq_ + 2 * m_ineq_; + n_sp_ = n_x_sp_ + 2 * m_eq_ + 2 * m_ineq_; n_de_ = n_x_de_; m_ = m_eq_ + m_ineq_; @@ -756,17 +747,17 @@ hiopFRProbMDS::hiopFRProbMDS(hiopAlgFilterIPMBase& solver_base) nnz_sp_Hess_Lagr_SS_ = n_x_sp_ + Hess_SS->sp_mat()->numberOfOffDiagNonzeros(); nnz_sp_Hess_Lagr_SD_ = 0; - Jac_cd_ = new hiopMatrixMDS(m_, n_sp_, n_de_, nnz_sp_Jac_c_+nnz_sp_Jac_d_, nlp_base_->options->GetString("mem_space")); + Jac_cd_ = new hiopMatrixMDS(m_, n_sp_, n_de_, nnz_sp_Jac_c_ + nnz_sp_Jac_d_, nlp_base_->options->GetString("mem_space")); Hess_cd_ = new hiopMatrixSymBlockDiagMDS(n_sp_, n_de_, nnz_sp_Hess_Lagr_SS_, nlp_base_->options->GetString("mem_space")); // set mu0 to be the maximun of the current barrier parameter mu and norm_inf(|c|)*/ - theta_ref_ = solver_base_.get_resid()->get_theta(); //at current point, i.e., reference point + theta_ref_ = solver_base_.get_resid()->get_theta(); // at current point, i.e., reference point nrmInf_feas_ref_ = solver_base_.get_resid()->get_nrmInf_bar_feasib(); mu_ = solver_base.get_mu(); mu_ = std::max(mu_, nrmInf_feas_ref_); zeta_ = std::sqrt(mu_); - rho_ = 1000; // FIXME: make this as an user option + rho_ = 1000; // FIXME: make this as an user option } hiopFRProbMDS::~hiopFRProbMDS() @@ -784,7 +775,7 @@ hiopFRProbMDS::~hiopFRProbMDS() delete wrk_x_sp_; delete wrk_x_de_; - + delete Jac_cd_; delete Hess_cd_; if(last_x_) { @@ -795,8 +786,8 @@ hiopFRProbMDS::~hiopFRProbMDS() } } -bool hiopFRProbMDS::get_MPI_comm(MPI_Comm& comm_out) -{ +bool hiopFRProbMDS::get_MPI_comm(MPI_Comm& comm_out) +{ #ifdef HIOP_USE_MPI comm_out = nlp_base_->get_comm(); #else @@ -821,7 +812,7 @@ bool hiopFRProbMDS::get_prob_info(NonlinearityType& type) return true; } -bool hiopFRProbMDS::get_vars_info(const size_type& n, double *xlow, double* xupp, NonlinearityType* type) +bool hiopFRProbMDS::get_vars_info(const size_type& n, double* xlow, double* xupp, NonlinearityType* type) { assert(n == n_); @@ -841,9 +832,9 @@ bool hiopFRProbMDS::get_vars_info(const size_type& n, double *xlow, double* xupp wrk_primal_->copyTo(xupp); wrk_primal_->set_array_from_to(type, 0, n_, hiopLinear); - wrk_primal_->set_array_from_to(type, x_sp_st_, x_sp_st_+n_x_sp_, var_type, 0); - wrk_primal_->set_array_from_to(type, x_de_st_, x_de_st_+n_x_de_, var_type, n_x_sp_); - + wrk_primal_->set_array_from_to(type, x_sp_st_, x_sp_st_ + n_x_sp_, var_type, 0); + wrk_primal_->set_array_from_to(type, x_de_st_, x_de_st_ + n_x_de_, var_type, n_x_sp_); + return true; } @@ -895,10 +886,10 @@ bool hiopFRProbMDS::eval_f(const size_type& n, const double* x, bool new_x, doub assert(n == n_); obj_value = 0.; - wrk_primal_->copy_from_starting_at(x, 0, n_); // [xsp pe ne pi ni xde] - wrk_primal_->startingAtCopyToStartingAt(x_sp_st_, *wrk_x_, 0, n_x_sp_); // [xsp] - wrk_primal_->startingAtCopyToStartingAt(x_de_st_, *wrk_x_, n_x_sp_, n_x_de_); // [xde] - + wrk_primal_->copy_from_starting_at(x, 0, n_); // [xsp pe ne pi ni xde] + wrk_primal_->startingAtCopyToStartingAt(x_sp_st_, *wrk_x_, 0, n_x_sp_); // [xsp] + wrk_primal_->startingAtCopyToStartingAt(x_de_st_, *wrk_x_, n_x_sp_, n_x_de_); // [xde] + // rho*sum(p+n) obj_value += rho_ * (wrk_primal_->sum_local() - wrk_x_->sum_local()); @@ -909,7 +900,7 @@ bool hiopFRProbMDS::eval_f(const size_type& n, const double* x, bool new_x, doub obj_value += 0.5 * zeta_ * wrk_db * wrk_db; - nlp_base_->eval_f(*wrk_x_, new_x, obj_base_); + nlp_base_->eval_f(*wrk_x_, new_x, obj_base_); return true; } @@ -919,10 +910,10 @@ bool hiopFRProbMDS::eval_grad_f(const size_type& n, const double* x, bool new_x, assert(n == n_); // x - wrk_primal_->copy_from_starting_at(x, 0, n_); // [xsp pe ne pi ni xde] - wrk_primal_->startingAtCopyToStartingAt(x_sp_st_, *wrk_x_, 0, n_x_sp_); // x = [xsp xde] - wrk_primal_->startingAtCopyToStartingAt(x_de_st_, *wrk_x_, n_x_sp_, n_x_de_); // x = [xsp xde] - + wrk_primal_->copy_from_starting_at(x, 0, n_); // [xsp pe ne pi ni xde] + wrk_primal_->startingAtCopyToStartingAt(x_sp_st_, *wrk_x_, 0, n_x_sp_); // x = [xsp xde] + wrk_primal_->startingAtCopyToStartingAt(x_de_st_, *wrk_x_, n_x_sp_, n_x_de_); // x = [xsp xde] + // p and n wrk_primal_->setToConstant(rho_); @@ -931,9 +922,9 @@ bool hiopFRProbMDS::eval_grad_f(const size_type& n, const double* x, bool new_x, wrk_x_->componentMult(*DR_); wrk_x_->componentMult(*DR_); wrk_x_->scale(zeta_); - wrk_x_->startingAtCopyToStartingAt(0, *wrk_primal_, x_sp_st_, n_x_sp_); // x = [xsp pe ne pi ni xde] - wrk_x_->startingAtCopyToStartingAt(n_x_sp_, *wrk_primal_, x_de_st_, n_x_de_); // x = [xsp pe ne pi ni xde] - + wrk_x_->startingAtCopyToStartingAt(0, *wrk_primal_, x_sp_st_, n_x_sp_); // x = [xsp pe ne pi ni xde] + wrk_x_->startingAtCopyToStartingAt(n_x_sp_, *wrk_primal_, x_de_st_, n_x_de_); // x = [xsp pe ne pi ni xde] + wrk_primal_->copyTo(gradf); return true; @@ -950,31 +941,27 @@ bool hiopFRProbMDS::eval_cons(const size_type& n, return false; } -bool hiopFRProbMDS::eval_cons(const size_type& n, - const size_type& m, - const double* x, - bool new_x, - double* cons) +bool hiopFRProbMDS::eval_cons(const size_type& n, const size_type& m, const double* x, bool new_x, double* cons) { assert(n == n_); assert(m == m_); // evaluate base case c and d - wrk_primal_->copy_from_starting_at(x, 0, n_); // [xsp pe ne pi ni xde] - wrk_primal_->startingAtCopyToStartingAt(x_sp_st_, *wrk_x_, 0, n_x_sp_); // x = [xsp xde] - wrk_primal_->startingAtCopyToStartingAt(x_de_st_, *wrk_x_, n_x_sp_, n_x_de_); // x = [xsp xde] + wrk_primal_->copy_from_starting_at(x, 0, n_); // [xsp pe ne pi ni xde] + wrk_primal_->startingAtCopyToStartingAt(x_sp_st_, *wrk_x_, 0, n_x_sp_); // x = [xsp xde] + wrk_primal_->startingAtCopyToStartingAt(x_de_st_, *wrk_x_, n_x_sp_, n_x_de_); // x = [xsp xde] nlp_base_->eval_c_d(*wrk_x_, new_x, *wrk_c_, *wrk_d_); // compute FR equality constratint body c-pe+ne - wrk_eq_->copy_from_starting_at(x, pe_st_, m_eq_); //pe + wrk_eq_->copy_from_starting_at(x, pe_st_, m_eq_); // pe wrk_c_->axpy(-1.0, *wrk_eq_); - wrk_eq_->copy_from_starting_at(x, ne_st_, m_eq_); //ne + wrk_eq_->copy_from_starting_at(x, ne_st_, m_eq_); // ne wrk_c_->axpy(1.0, *wrk_eq_); // compute FR inequality constratint body d-pi+ni - wrk_ineq_->copy_from_starting_at(x, pi_st_, m_ineq_); //pi + wrk_ineq_->copy_from_starting_at(x, pi_st_, m_ineq_); // pi wrk_d_->axpy(-1.0, *wrk_ineq_); - wrk_ineq_->copy_from_starting_at(x, ni_st_, m_ineq_); //ni + wrk_ineq_->copy_from_starting_at(x, ni_st_, m_ineq_); // ni wrk_d_->axpy(1.0, *wrk_ineq_); // assemble the full vector @@ -990,7 +977,7 @@ bool hiopFRProbMDS::eval_Jac_cons(const size_type& n, const size_type& m, const size_type& num_cons, const index_type* idx_cons, - const double* x, + const double* x, bool new_x, const size_type& nsparse, const size_type& ndense, @@ -1004,7 +991,7 @@ bool hiopFRProbMDS::eval_Jac_cons(const size_type& n, } /// @pre assuming Jac of the original prob is sorted -bool hiopFRProbMDS::eval_Jac_cons(const size_type& n, +bool hiopFRProbMDS::eval_Jac_cons(const size_type& n, const size_type& m, const double* x, bool new_x, @@ -1016,11 +1003,11 @@ bool hiopFRProbMDS::eval_Jac_cons(const size_type& n, double* MJacS, double* JacD) { - assert( n == n_); - assert( m == m_); - assert( nsparse == n_sp_); - assert( ndense == n_de_); - assert( nnzJacS == nlp_base_->get_nnz_sp_Jaceq() + nlp_base_->get_nnz_sp_Jacineq() + 2 * (m_)); + assert(n == n_); + assert(m == m_); + assert(nsparse == n_sp_); + assert(ndense == n_de_); + assert(nnzJacS == nlp_base_->get_nnz_sp_Jaceq() + nlp_base_->get_nnz_sp_Jacineq() + 2 * (m_)); hiopMatrixMDS* Jac_c = dynamic_cast(solver_base_.get_Jac_c()); hiopMatrixMDS* Jac_d = dynamic_cast(solver_base_.get_Jac_d()); @@ -1029,12 +1016,12 @@ bool hiopFRProbMDS::eval_Jac_cons(const size_type& n, // extend Jac to the p and n parts if(MJacS != nullptr) { // get x for the original problem - wrk_primal_->copy_from_starting_at(x, 0, n_); // [xsp pe ne pi ni xde] - wrk_primal_->startingAtCopyToStartingAt(x_sp_st_, *wrk_x_, 0, n_x_sp_); // x = [xsp xde] - wrk_primal_->startingAtCopyToStartingAt(x_de_st_, *wrk_x_, n_x_sp_, n_x_de_); // x = [xsp xde] + wrk_primal_->copy_from_starting_at(x, 0, n_); // [xsp pe ne pi ni xde] + wrk_primal_->startingAtCopyToStartingAt(x_sp_st_, *wrk_x_, 0, n_x_sp_); // x = [xsp xde] + wrk_primal_->startingAtCopyToStartingAt(x_de_st_, *wrk_x_, n_x_sp_, n_x_de_); // x = [xsp xde] // get Jac_c and Jac_d for the x part --- use original Jac_c/Jac_d as buffers - nlp_base_->eval_Jac_c_d(*wrk_x_, new_x, *Jac_c, *Jac_d); + nlp_base_->eval_Jac_c_d(*wrk_x_, new_x, *Jac_c, *Jac_d); } Jac_cd_->set_Jac_FR(*Jac_c, *Jac_d, iJacS, jJacS, MJacS, JacD); @@ -1045,15 +1032,15 @@ bool hiopFRProbMDS::eval_Jac_cons(const size_type& n, bool hiopFRProbMDS::get_warmstart_point(const size_type& n, const size_type& m, double* x0, - double* z_bndL0, + double* z_bndL0, double* z_bndU0, double* lambda0, double* ineq_slack, double* vl0, - double* vu0 ) + double* vu0) { - assert( n == n_); - assert( m == m_); + assert(n == n_); + assert(m == m_); hiopVector* c = solver_base_.get_c(); hiopVector* d = solver_base_.get_d(); @@ -1073,14 +1060,14 @@ bool hiopFRProbMDS::get_warmstart_point(const size_type& n, s->copyTo(ineq_slack); /* - * compute pe (wrk_c_) and ne (wrk_eq_) rom equation (33) - */ + * compute pe (wrk_c_) and ne (wrk_eq_) rom equation (33) + */ // firstly use pe as a temp vec - double tmp_db = mu_/(2*rho_); + double tmp_db = mu_ / (2 * rho_); wrk_cbody_->copyFrom(*c); - wrk_cbody_->axpy(-1.0, crhs); // wrk_cbody_ = (c-crhs) + wrk_cbody_->axpy(-1.0, crhs); // wrk_cbody_ = (c-crhs) wrk_c_->setToConstant(tmp_db); - wrk_c_->axpy(-0.5, *wrk_cbody_); // wrk_c_ = (mu-rho*(c-crhs))/(2*rho) + wrk_c_->axpy(-0.5, *wrk_cbody_); // wrk_c_ = (mu-rho*(c-crhs))/(2*rho) // compute ne (wrk_eq_) wrk_eq_->copyFrom(*wrk_c_); @@ -1094,13 +1081,13 @@ bool hiopFRProbMDS::get_warmstart_point(const size_type& n, wrk_c_->axpy(1.0, *wrk_eq_); /* - * compute pi (wrk_d_) and ni (wrk_ineq_) rom equation (33) - */ + * compute pi (wrk_d_) and ni (wrk_ineq_) rom equation (33) + */ // firstly use pi as a temp vec wrk_dbody_->copyFrom(*d); - wrk_dbody_->axpy(-1.0, *s); // wrk_dbody_ = (d-s) + wrk_dbody_->axpy(-1.0, *s); // wrk_dbody_ = (d-s) wrk_d_->setToConstant(tmp_db); - wrk_d_->axpy(-0.5, *wrk_dbody_); // wrk_c_ = (mu-rho*(d-s))/(2*rho) + wrk_d_->axpy(-0.5, *wrk_dbody_); // wrk_c_ = (mu-rho*(d-s))/(2*rho) // compute ni (wrk_ineq_) wrk_ineq_->copyFrom(*wrk_d_); @@ -1114,14 +1101,14 @@ bool hiopFRProbMDS::get_warmstart_point(const size_type& n, wrk_d_->axpy(1.0, *wrk_ineq_); /* - * assemble x0 - */ - wrk_x_->startingAtCopyToStartingAt(0, *wrk_primal_, x_sp_st_, n_x_sp_); // [xsp pe ne pi ni xde] - wrk_c_->copyToStarting(*wrk_primal_, pe_st_); // pe - wrk_eq_->copyToStarting(*wrk_primal_, ne_st_); // ne - wrk_d_->copyToStarting(*wrk_primal_, pi_st_); // pi - wrk_ineq_->copyToStarting(*wrk_primal_, ni_st_); // ni - wrk_x_->startingAtCopyToStartingAt(n_x_sp_, *wrk_primal_, x_de_st_, n_x_de_); // [xsp pe ne pi ni xde] + * assemble x0 + */ + wrk_x_->startingAtCopyToStartingAt(0, *wrk_primal_, x_sp_st_, n_x_sp_); // [xsp pe ne pi ni xde] + wrk_c_->copyToStarting(*wrk_primal_, pe_st_); // pe + wrk_eq_->copyToStarting(*wrk_primal_, ne_st_); // ne + wrk_d_->copyToStarting(*wrk_primal_, pi_st_); // pi + wrk_ineq_->copyToStarting(*wrk_primal_, ni_st_); // ni + wrk_x_->startingAtCopyToStartingAt(n_x_sp_, *wrk_primal_, x_de_st_, n_x_de_); // [xsp pe ne pi ni xde] wrk_primal_->copyTo(x0); @@ -1141,20 +1128,20 @@ bool hiopFRProbMDS::get_warmstart_point(const size_type& n, wrk_ineq_->scale(mu_); // assemble zl - wrk_x_->startingAtCopyToStartingAt(0, *wrk_primal_, x_sp_st_, n_x_sp_); // [xsp pe ne pi ni xde] - wrk_c_->copyToStarting(*wrk_primal_, pe_st_); // pe - wrk_eq_->copyToStarting(*wrk_primal_, ne_st_); // ne - wrk_d_->copyToStarting(*wrk_primal_, pi_st_); // pi - wrk_ineq_->copyToStarting(*wrk_primal_, ni_st_); // ni - wrk_x_->startingAtCopyToStartingAt(n_x_sp_, *wrk_primal_, x_de_st_, n_x_de_); // [xsp pe ne pi ni xde] + wrk_x_->startingAtCopyToStartingAt(0, *wrk_primal_, x_sp_st_, n_x_sp_); // [xsp pe ne pi ni xde] + wrk_c_->copyToStarting(*wrk_primal_, pe_st_); // pe + wrk_eq_->copyToStarting(*wrk_primal_, ne_st_); // ne + wrk_d_->copyToStarting(*wrk_primal_, pi_st_); // pi + wrk_ineq_->copyToStarting(*wrk_primal_, ni_st_); // ni + wrk_x_->startingAtCopyToStartingAt(n_x_sp_, *wrk_primal_, x_de_st_, n_x_de_); // [xsp pe ne pi ni xde] wrk_primal_->copyTo(z_bndL0); // get zu wrk_primal_->setToZero(); wrk_x_->copyFrom(*zu); wrk_x_->component_min(rho_); - wrk_x_->startingAtCopyToStartingAt(0, *wrk_primal_, x_sp_st_, n_x_sp_); // [xsp pe ne pi ni xde] - wrk_x_->startingAtCopyToStartingAt(n_x_sp_, *wrk_primal_, x_de_st_, n_x_de_); // [xsp pe ne pi ni xde] + wrk_x_->startingAtCopyToStartingAt(0, *wrk_primal_, x_sp_st_, n_x_sp_); // [xsp pe ne pi ni xde] + wrk_x_->startingAtCopyToStartingAt(n_x_sp_, *wrk_primal_, x_de_st_, n_x_de_); // [xsp pe ne pi ni xde] wrk_primal_->copyTo(z_bndU0); // compute vl vu @@ -1199,15 +1186,15 @@ bool hiopFRProbMDS::iterate_callback(int iter, const hiopVector& crhs = nlp_base_->get_crhs(); // evaluate c_body and d_body in base problem - wrk_primal_->copy_from_starting_at(x, 0, n_); // [xsp pe ne pi ni xde] - wrk_primal_->startingAtCopyToStartingAt(x_sp_st_, *wrk_x_, 0, n_x_sp_); // x = [xsp xde] - wrk_primal_->startingAtCopyToStartingAt(x_de_st_, *wrk_x_, n_x_sp_, n_x_de_); // x = [xsp xde] + wrk_primal_->copy_from_starting_at(x, 0, n_); // [xsp pe ne pi ni xde] + wrk_primal_->startingAtCopyToStartingAt(x_sp_st_, *wrk_x_, 0, n_x_sp_); // x = [xsp xde] + wrk_primal_->startingAtCopyToStartingAt(x_de_st_, *wrk_x_, n_x_sp_, n_x_de_); // x = [xsp xde] wrk_d_->copy_from_starting_at(s, 0, m_ineq_); nlp_base_->eval_c_d(*wrk_x_, true, *wrk_cbody_, *wrk_dbody_); // compute theta for base problem - wrk_cbody_->axpy(-1.0, crhs); // wrk_cbody_ = (c-crhs) - wrk_dbody_->axpy(-1.0, *wrk_d_); // wrk_dbody_ = (d-s) + wrk_cbody_->axpy(-1.0, crhs); // wrk_cbody_ = (c-crhs) + wrk_dbody_->axpy(-1.0, *wrk_d_); // wrk_dbody_ = (d-s) double theta_ori = 0.0; theta_ori += wrk_cbody_->onenorm(); @@ -1220,25 +1207,26 @@ bool hiopFRProbMDS::iterate_callback(int iter, double max_nrmInf_feas = nlp_base_->options->GetNumeric("kappa_resto") * nrmInf_feas_ref_; // termination condition 1) theta_curr <= kappa_resto*theta_ref - if(nrmInf_feas_ori <= max_nrmInf_feas && iter>0) { + if(nrmInf_feas_ori <= max_nrmInf_feas && iter > 0) { // termination condition 2) (theta and logbar) are not in the original filter // check (original) filter condition // compute the original logbar objective from the trial point given by the FR problem // Note that this function will updates the slack and dual variables - - // set original trial (x,d) to the soltion from FR problem + + // set original trial (x,d) to the soltion from FR problem hiopIterate* it_base_trial = solver_base_.get_it_trial_nonconst(); - const hiopIterate* it_base_curr = solver_base_.get_it_curr(); + const hiopIterate* it_base_curr = solver_base_.get_it_curr(); it_base_trial->get_x()->copyFrom(*wrk_x_); it_base_trial->get_d()->copyFrom(*wrk_d_); // compute other slacks in the base problem - [[maybe_unused]] const size_type n_adjusted_slacks = it_base_trial->compute_safe_slacks(*it_base_curr, solver_base_.get_mu()); + [[maybe_unused]] const size_type n_adjusted_slacks = + it_base_trial->compute_safe_slacks(*it_base_curr, solver_base_.get_mu()); - // evaluate base problem log barr + // evaluate base problem log barr solver_base_.get_logbar()->updateWithNlpInfo_trial_funcOnly(*it_base_trial, obj_base_, *wrk_cbody_, *wrk_dbody_); - + double trial_bar_obj_ori = solver_base_.get_logbar()->f_logbar_trial; if(!solver_base_.filter_contains(theta_ori, trial_bar_obj_ori)) { @@ -1258,7 +1246,7 @@ bool hiopFRProbMDS::iterate_callback(int iter, bool hiopFRProbMDS::force_update_x(const int n, double* x) { // this function is used in FR in FR, see eq (33) - assert( n == n_); + assert(n == n_); hiopVector* c = solver_base_.get_c(); hiopVector* d = solver_base_.get_d(); @@ -1266,19 +1254,19 @@ bool hiopFRProbMDS::force_update_x(const int n, double* x) const hiopVector& crhs = nlp_base_->get_crhs(); // x is fixed - wrk_primal_->copy_from_starting_at(x, 0, n_); // [xsp pe ne pi ni xde] - wrk_primal_->startingAtCopyToStartingAt(x_sp_st_, *wrk_x_, 0, n_x_sp_); // x = [xsp xde] - wrk_primal_->startingAtCopyToStartingAt(x_de_st_, *wrk_x_, n_x_sp_, n_x_de_); // x = [xsp xde] + wrk_primal_->copy_from_starting_at(x, 0, n_); // [xsp pe ne pi ni xde] + wrk_primal_->startingAtCopyToStartingAt(x_sp_st_, *wrk_x_, 0, n_x_sp_); // x = [xsp xde] + wrk_primal_->startingAtCopyToStartingAt(x_de_st_, *wrk_x_, n_x_sp_, n_x_de_); // x = [xsp xde] /* - * compute pe (wrk_c_) and ne (wrk_eq_) rom equation (33) - */ + * compute pe (wrk_c_) and ne (wrk_eq_) rom equation (33) + */ // firstly use pe as a temp vec - double tmp_db = mu_/(2*rho_); + double tmp_db = mu_ / (2 * rho_); wrk_cbody_->copyFrom(*c); - wrk_cbody_->axpy(-1.0, crhs); // wrk_cbody_ = (c-crhs) + wrk_cbody_->axpy(-1.0, crhs); // wrk_cbody_ = (c-crhs) wrk_c_->setToConstant(tmp_db); - wrk_c_->axpy(-0.5, *wrk_cbody_); // wrk_c_ = (mu-rho*(c-crhs))/(2*rho) + wrk_c_->axpy(-0.5, *wrk_cbody_); // wrk_c_ = (mu-rho*(c-crhs))/(2*rho) // compute ne (wrk_eq_) wrk_eq_->copyFrom(*wrk_c_); @@ -1292,13 +1280,13 @@ bool hiopFRProbMDS::force_update_x(const int n, double* x) wrk_c_->axpy(1.0, *wrk_eq_); /* - * compute pi (wrk_d_) and ni (wrk_ineq_) rom equation (33) - */ + * compute pi (wrk_d_) and ni (wrk_ineq_) rom equation (33) + */ // firstly use pi as a temp vec wrk_dbody_->copyFrom(*d); - wrk_dbody_->axpy(-1.0, *s); // wrk_dbody_ = (d-s) + wrk_dbody_->axpy(-1.0, *s); // wrk_dbody_ = (d-s) wrk_d_->setToConstant(tmp_db); - wrk_d_->axpy(-0.5, *wrk_dbody_); // wrk_c_ = (mu-rho*(d-s))/(2*rho) + wrk_d_->axpy(-0.5, *wrk_dbody_); // wrk_c_ = (mu-rho*(d-s))/(2*rho) // compute ni (wrk_ineq_) wrk_ineq_->copyFrom(*wrk_d_); @@ -1312,14 +1300,14 @@ bool hiopFRProbMDS::force_update_x(const int n, double* x) wrk_d_->axpy(1.0, *wrk_ineq_); /* - * assemble x - */ - wrk_x_->startingAtCopyToStartingAt(0, *wrk_primal_, x_sp_st_, n_x_sp_); // [xsp pe ne pi ni xde] - wrk_c_->copyToStarting(*wrk_primal_, pe_st_); // pe - wrk_eq_->copyToStarting(*wrk_primal_, ne_st_); // ne - wrk_d_->copyToStarting(*wrk_primal_, pi_st_); // pi - wrk_ineq_->copyToStarting(*wrk_primal_, ni_st_); // ni - wrk_x_->startingAtCopyToStartingAt(n_x_sp_, *wrk_primal_, x_de_st_, n_x_de_); // [xsp pe ne pi ni xde] + * assemble x + */ + wrk_x_->startingAtCopyToStartingAt(0, *wrk_primal_, x_sp_st_, n_x_sp_); // [xsp pe ne pi ni xde] + wrk_c_->copyToStarting(*wrk_primal_, pe_st_); // pe + wrk_eq_->copyToStarting(*wrk_primal_, ne_st_); // ne + wrk_d_->copyToStarting(*wrk_primal_, pi_st_); // pi + wrk_ineq_->copyToStarting(*wrk_primal_, ni_st_); // ni + wrk_x_->startingAtCopyToStartingAt(n_x_sp_, *wrk_primal_, x_de_st_, n_x_de_); // [xsp pe ne pi ni xde] wrk_primal_->copyTo(x); @@ -1354,9 +1342,9 @@ bool hiopFRProbMDS::eval_Hess_Lagr(const size_type& n, if(MHSS != nullptr) { // get x for the original problem - wrk_primal_->copy_from_starting_at(x, 0, n_); // [xsp pe ne pi ni xde] - wrk_primal_->startingAtCopyToStartingAt(x_sp_st_, *wrk_x_, 0, n_x_sp_); // x = [xsp xde] - wrk_primal_->startingAtCopyToStartingAt(x_de_st_, *wrk_x_, n_x_sp_, n_x_de_); // x = [xsp xde] + wrk_primal_->copy_from_starting_at(x, 0, n_); // [xsp pe ne pi ni xde] + wrk_primal_->startingAtCopyToStartingAt(x_sp_st_, *wrk_x_, 0, n_x_sp_); // x = [xsp xde] + wrk_primal_->startingAtCopyToStartingAt(x_de_st_, *wrk_x_, n_x_sp_, n_x_de_); // x = [xsp xde] // split lambda wrk_eq_->copy_from_starting_at(lambda, 0, m_eq_); @@ -1365,14 +1353,14 @@ bool hiopFRProbMDS::eval_Hess_Lagr(const size_type& n, double obj_factor = 0.0; // get Hess for the x part --- use original Hess as buffers nlp_base_->eval_Hess_Lagr(*wrk_x_, new_x, obj_factor, *wrk_eq_, *wrk_ineq_, new_lambda, *Hess); - + // additional diag Hess for x: zeta*DR^2 wrk_x_->setToConstant(zeta_); wrk_x_->componentMult(*DR_); wrk_x_->componentMult(*DR_); - + wrk_x_->copyToStarting(0, *wrk_x_sp_); - wrk_x_->copyToStarting(n_x_sp_, *wrk_x_de_); + wrk_x_->copyToStarting(n_x_sp_, *wrk_x_de_); } // extend Hes to the p and n parts @@ -1382,11 +1370,11 @@ bool hiopFRProbMDS::eval_Hess_Lagr(const size_type& n, } /* -index_type hiopFRProbDense::denseVecBase2FR(hiopVector* vec_base, hiopVector* vec_fr) -{ +index_type hiopFRProbDense::denseVecBase2FR(hiopVector* vec_base, hiopVector* vec_fr) +{ assert(vec_base->get_size() == n_x_); assert(vec_fr->get_size() == n_); - + { return idx_local + col_partition_[my_rank]; } @@ -1396,13 +1384,13 @@ index_type hiopFRProbDense::denseVecBase2FR(hiopVector* vec_base, hiopVector* ve } */ -/* -* Specialized interface for feasibility restoration problem with MDS blocks in the Jacobian and Hessian. -*/ +/* + * Specialized interface for feasibility restoration problem with MDS blocks in the Jacobian and Hessian. + */ hiopFRProbDense::hiopFRProbDense(hiopAlgFilterIPMBase& solver_base) - : solver_base_(solver_base), - last_x_{nullptr}, - last_d_{nullptr} + : solver_base_(solver_base), + last_x_{nullptr}, + last_d_{nullptr} { nlp_base_ = dynamic_cast(solver_base.get_nlp()); n_x_ = nlp_base_->n(); @@ -1411,7 +1399,7 @@ hiopFRProbDense::hiopFRProbDense(hiopAlgFilterIPMBase& solver_base) #ifdef HIOP_USE_MPI vec_distrib_base_ = nlp_base_->getVecDistInfo(); #endif - n_ = n_x_ + 2*m_eq_ + 2*m_ineq_; + n_ = n_x_ + 2 * m_eq_ + 2 * m_ineq_; m_ = m_eq_ + m_ineq_; pe_st_ = n_x_; @@ -1437,7 +1425,7 @@ hiopFRProbDense::hiopFRProbDense(hiopAlgFilterIPMBase& solver_base) #endif // assign col_partition_ - col_partition_ = new index_type[comm_size_+1]; + col_partition_ = new index_type[comm_size_ + 1]; col_partition_[0] = 0; col_partition_[comm_size_] = n_; @@ -1448,8 +1436,7 @@ hiopFRProbDense::hiopFRProbDense(hiopAlgFilterIPMBase& solver_base) } } if(col_partition_) { - wrk_primal_ = LinearAlgebraFactory::create_vector(nlp_base_->options->GetString("mem_space"), n_, - col_partition_, comm_); + wrk_primal_ = LinearAlgebraFactory::create_vector(nlp_base_->options->GetString("mem_space"), n_, col_partition_, comm_); Jac_cd_ = LinearAlgebraFactory::create_matrix_dense("DEFAULT", m_, n_, col_partition_, comm_); } else { wrk_primal_ = LinearAlgebraFactory::create_vector(nlp_base_->options->GetString("mem_space"), n_); @@ -1473,13 +1460,13 @@ hiopFRProbDense::hiopFRProbDense(hiopAlgFilterIPMBase& solver_base) last_d_ = wrk_d_->alloc_clone(); // set mu0 to be the maximun of the current barrier parameter mu and norm_inf(|c|)*/ - theta_ref_ = solver_base_.get_resid()->get_theta(); //at current point, i.e., reference point + theta_ref_ = solver_base_.get_resid()->get_theta(); // at current point, i.e., reference point nrmInf_feas_ref_ = solver_base_.get_resid()->get_nrmInf_bar_feasib(); mu_ = solver_base.get_mu(); mu_ = std::max(mu_, nrmInf_feas_ref_); zeta_ = std::sqrt(mu_); - rho_ = 1000; // FIXME: make this as an user option + rho_ = 1000; // FIXME: make this as an user option } hiopFRProbDense::~hiopFRProbDense() @@ -1494,7 +1481,7 @@ hiopFRProbDense::~hiopFRProbDense() delete wrk_primal_; delete wrk_dual_; delete DR_; - + delete Jac_cd_; if(last_x_) { @@ -1506,8 +1493,8 @@ hiopFRProbDense::~hiopFRProbDense() delete[] col_partition_; } -bool hiopFRProbDense::get_MPI_comm(MPI_Comm& comm_out) -{ +bool hiopFRProbDense::get_MPI_comm(MPI_Comm& comm_out) +{ comm_out = comm_; return true; } @@ -1518,7 +1505,7 @@ bool hiopFRProbDense::get_vecdistrib_info(size_type global_n, index_type* cols) for(int i = 0; i <= comm_size_; i++) { cols[i] = col_partition_[i]; } - } else { + } else { assert(false && "You shouldn't need distrib info for this size."); } return true; @@ -1540,7 +1527,7 @@ bool hiopFRProbDense::get_prob_info(NonlinearityType& type) return true; } -bool hiopFRProbDense::get_vars_info(const size_type& n, double *xlow, double* xupp, NonlinearityType* type) +bool hiopFRProbDense::get_vars_info(const size_type& n, double* xlow, double* xupp, NonlinearityType* type) { assert(n == n_); @@ -1552,18 +1539,18 @@ bool hiopFRProbDense::get_vars_info(const size_type& n, double *xlow, double* xu // build new lower bound wrk_primal_->setToConstant(0.0); // FIXME; add global methed. now we only have local method - xl.copyToStarting(*wrk_primal_,0); + xl.copyToStarting(*wrk_primal_, 0); wrk_primal_->copyTo(xlow); // build new upper bound wrk_primal_->setToConstant(1e+20); - xu.copyToStarting(*wrk_primal_,0); + xu.copyToStarting(*wrk_primal_, 0); wrk_primal_->copyTo(xupp); for(index_type i_local = 0; i_local < xl.get_local_size(); ++i_local) { type[i_local] = hiopNonlinear; } - + return true; } @@ -1600,16 +1587,17 @@ bool hiopFRProbDense::eval_f(const size_type& n, const double* x, bool new_x, do obj_value = 0.; // FIXME; add global methed. now we only have local method, or sync mpi results here - wrk_primal_->copy_from_starting_at(x, 0, n_); // [x pe ne pi ni] - wrk_x_->copy_from_starting_at(x, 0, n_x_); // [x] - + wrk_primal_->copy_from_starting_at(x, 0, n_); // [x pe ne pi ni] + wrk_x_->copy_from_starting_at(x, 0, n_x_); // [x] + // rho*sum(p+n) // FIXME; add global methed. now we only have local method, or sync mpi results here obj_value += rho_ * (wrk_primal_->sum_local() - wrk_x_->sum_local()); #ifdef HIOP_USE_MPI double obj_global; - int ierr = MPI_Allreduce(&obj_value, &obj_global, 1, MPI_DOUBLE, MPI_SUM, comm_); assert(ierr==MPI_SUCCESS); + int ierr = MPI_Allreduce(&obj_value, &obj_global, 1, MPI_DOUBLE, MPI_SUM, comm_); + assert(ierr == MPI_SUCCESS); obj_value = obj_global; #endif @@ -1640,21 +1628,17 @@ bool hiopFRProbDense::eval_grad_f(const size_type& n, const double* x, bool new_ wrk_x_->componentMult(*DR_); wrk_x_->componentMult(*DR_); wrk_x_->scale(zeta_); - + // build [x p n] // FIXME; add global methed. now we only have local method - wrk_x_->copyToStarting(*wrk_primal_,0); - + wrk_x_->copyToStarting(*wrk_primal_, 0); + wrk_primal_->copyTo(gradf); return true; } -bool hiopFRProbDense::eval_cons(const size_type& n, - const size_type& m, - const double* x, - bool new_x, - double* cons) +bool hiopFRProbDense::eval_cons(const size_type& n, const size_type& m, const double* x, bool new_x, double* cons) { assert(n == n_); assert(m == m_); @@ -1669,41 +1653,41 @@ bool hiopFRProbDense::eval_cons(const size_type& n, // p and n are only located in the last rank // compute FR equality constratint body c-pe+ne if(m_eq_ > 0) { - if(rank_ == comm_size_-1) { - wrk_eq_->copy_from_starting_at(x+pe_st_-col_partition_[rank_], 0, m_eq_); //pe + if(rank_ == comm_size_ - 1) { + wrk_eq_->copy_from_starting_at(x + pe_st_ - col_partition_[rank_], 0, m_eq_); // pe wrk_c_->axpy(-1.0, *wrk_eq_); - wrk_eq_->copy_from_starting_at(x+ne_st_-col_partition_[rank_], 0, m_eq_); //ne + wrk_eq_->copy_from_starting_at(x + ne_st_ - col_partition_[rank_], 0, m_eq_); // ne wrk_c_->axpy(1.0, *wrk_eq_); } - int ierr = MPI_Bcast(wrk_c_->local_data(), m_eq_, MPI_DOUBLE, comm_size_-1, comm_); - assert(ierr==MPI_SUCCESS); + int ierr = MPI_Bcast(wrk_c_->local_data(), m_eq_, MPI_DOUBLE, comm_size_ - 1, comm_); + assert(ierr == MPI_SUCCESS); } // compute FR equality constratint body d-pi+ni if(m_ineq_ > 0) { - if(rank_ == comm_size_-1) { - wrk_ineq_->copy_from_starting_at(x+pi_st_-col_partition_[rank_], 0, m_ineq_); //pi + if(rank_ == comm_size_ - 1) { + wrk_ineq_->copy_from_starting_at(x + pi_st_ - col_partition_[rank_], 0, m_ineq_); // pi wrk_d_->axpy(-1.0, *wrk_ineq_); - wrk_ineq_->copy_from_starting_at(x+ni_st_-col_partition_[rank_], 0, m_ineq_); //ni + wrk_ineq_->copy_from_starting_at(x + ni_st_ - col_partition_[rank_], 0, m_ineq_); // ni wrk_d_->axpy(1.0, *wrk_ineq_); } - int ierr = MPI_Bcast(wrk_d_->local_data(), m_ineq_, MPI_DOUBLE, comm_size_-1, comm_); - assert(ierr==MPI_SUCCESS); + int ierr = MPI_Bcast(wrk_d_->local_data(), m_ineq_, MPI_DOUBLE, comm_size_ - 1, comm_); + assert(ierr == MPI_SUCCESS); } #else // compute FR equality constratint body c-pe+ne if(m_eq_ > 0) { - wrk_eq_->copy_from_starting_at(x+pe_st_, 0, m_eq_); //pe + wrk_eq_->copy_from_starting_at(x + pe_st_, 0, m_eq_); // pe wrk_c_->axpy(-1.0, *wrk_eq_); - wrk_eq_->copy_from_starting_at(x+ne_st_, 0, m_eq_); //ne + wrk_eq_->copy_from_starting_at(x + ne_st_, 0, m_eq_); // ne wrk_c_->axpy(1.0, *wrk_eq_); } // compute FR equality constratint body d-pi+ni if(m_ineq_ > 0) { - wrk_ineq_->copy_from_starting_at(x+pi_st_, 0, m_ineq_); //pi + wrk_ineq_->copy_from_starting_at(x + pi_st_, 0, m_ineq_); // pi wrk_d_->axpy(-1.0, *wrk_ineq_); - wrk_ineq_->copy_from_starting_at(x+ni_st_, 0, m_ineq_); //ni + wrk_ineq_->copy_from_starting_at(x + ni_st_, 0, m_ineq_); // ni wrk_d_->axpy(1.0, *wrk_ineq_); } #endif @@ -1719,14 +1703,10 @@ bool hiopFRProbDense::eval_cons(const size_type& n, } /// @pre assuming Jac of the original prob is sorted -bool hiopFRProbDense::eval_Jac_cons(const size_type& n, - const size_type& m, - const double* x, - bool new_x, - double* Jac) +bool hiopFRProbDense::eval_Jac_cons(const size_type& n, const size_type& m, const double* x, bool new_x, double* Jac) { - assert( n == n_); - assert( m == m_); + assert(n == n_); + assert(m == m_); hiopMatrixDense* Jac_c = dynamic_cast(solver_base_.get_Jac_c()); hiopMatrixDense* Jac_d = dynamic_cast(solver_base_.get_Jac_d()); @@ -1739,12 +1719,12 @@ bool hiopFRProbDense::eval_Jac_cons(const size_type& n, wrk_x_->copy_from_starting_at(x, 0, n_x_); // get Jac_c and Jac_d for the x part --- use original Jac_c/Jac_d as buffers - nlp_base_->eval_Jac_c_d(*wrk_x_, new_x, *Jac_c, *Jac_d); - + nlp_base_->eval_Jac_c_d(*wrk_x_, new_x, *Jac_c, *Jac_d); + // FIXME add set_Jac_FR in hiopMatrixDense Jac_cd_->set_Jac_FR(*Jac_c, *Jac_d); - - Jac_cd_->copy_to(Jac); + + Jac_cd_->copy_to(Jac); return true; } @@ -1752,15 +1732,15 @@ bool hiopFRProbDense::eval_Jac_cons(const size_type& n, bool hiopFRProbDense::get_warmstart_point(const size_type& n, const size_type& m, double* x0, - double* z_bndL0, + double* z_bndL0, double* z_bndU0, double* lambda0, double* ineq_slack, double* vl0, double* vu0) { - assert( n == n_); - assert( m == m_); + assert(n == n_); + assert(m == m_); hiopVector* c = solver_base_.get_c(); hiopVector* d = solver_base_.get_d(); @@ -1778,14 +1758,14 @@ bool hiopFRProbDense::get_warmstart_point(const size_type& n, s->copyTo(ineq_slack); /* - * compute pe (wrk_c_) and ne (wrk_eq_) rom equation (33) - */ + * compute pe (wrk_c_) and ne (wrk_eq_) rom equation (33) + */ // firstly use pe as a temp vec - double tmp_db = mu_/(2*rho_); + double tmp_db = mu_ / (2 * rho_); wrk_cbody_->copyFrom(*c); - wrk_cbody_->axpy(-1.0, crhs); // wrk_cbody_ = (c-crhs) + wrk_cbody_->axpy(-1.0, crhs); // wrk_cbody_ = (c-crhs) wrk_c_->setToConstant(tmp_db); - wrk_c_->axpy(-0.5, *wrk_cbody_); // wrk_c_ = (mu-rho*(c-crhs))/(2*rho) + wrk_c_->axpy(-0.5, *wrk_cbody_); // wrk_c_ = (mu-rho*(c-crhs))/(2*rho) // compute ne (wrk_eq_) wrk_eq_->copyFrom(*wrk_c_); @@ -1799,13 +1779,13 @@ bool hiopFRProbDense::get_warmstart_point(const size_type& n, wrk_c_->axpy(1.0, *wrk_eq_); /* - * compute pi (wrk_d_) and ni (wrk_ineq_) rom equation (33) - */ + * compute pi (wrk_d_) and ni (wrk_ineq_) rom equation (33) + */ // firstly use pi as a temp vec wrk_dbody_->copyFrom(*d); - wrk_dbody_->axpy(-1.0, *s); // wrk_dbody_ = (d-s) + wrk_dbody_->axpy(-1.0, *s); // wrk_dbody_ = (d-s) wrk_d_->setToConstant(tmp_db); - wrk_d_->axpy(-0.5, *wrk_dbody_); // wrk_c_ = (mu-rho*(d-s))/(2*rho) + wrk_d_->axpy(-0.5, *wrk_dbody_); // wrk_c_ = (mu-rho*(d-s))/(2*rho) // compute ni (wrk_ineq_) wrk_ineq_->copyFrom(*wrk_d_); @@ -1819,15 +1799,15 @@ bool hiopFRProbDense::get_warmstart_point(const size_type& n, wrk_d_->axpy(1.0, *wrk_ineq_); /* - * assemble x0 - */ + * assemble x0 + */ // FIXME; add global methed. now we only have local method wrk_primal_->setToConstant(0.0); wrk_x_->copyToStarting(*wrk_primal_, 0); - wrk_c_->copyToStarting(*wrk_primal_, n_x_); // pe - wrk_eq_->copyToStarting(*wrk_primal_, n_x_ + m_eq_); // ne - wrk_d_->copyToStarting(*wrk_primal_, n_x_ + 2*m_eq_); // pi - wrk_ineq_->copyToStarting(*wrk_primal_, n_x_ + 2*m_eq_ + m_ineq_); // ni + wrk_c_->copyToStarting(*wrk_primal_, n_x_); // pe + wrk_eq_->copyToStarting(*wrk_primal_, n_x_ + m_eq_); // ne + wrk_d_->copyToStarting(*wrk_primal_, n_x_ + 2 * m_eq_); // pi + wrk_ineq_->copyToStarting(*wrk_primal_, n_x_ + 2 * m_eq_ + m_ineq_); // ni wrk_primal_->copyTo(x0); @@ -1849,10 +1829,10 @@ bool hiopFRProbDense::get_warmstart_point(const size_type& n, // assemble zl // FIXME; add global methed. now we only have local method wrk_x_->copyToStarting(*wrk_primal_, 0); - wrk_c_->copyToStarting(*wrk_primal_, n_x_); // pe - wrk_eq_->copyToStarting(*wrk_primal_, n_x_ + m_eq_); // ne - wrk_d_->copyToStarting(*wrk_primal_, n_x_ + 2*m_eq_); // pi - wrk_ineq_->copyToStarting(*wrk_primal_, n_x_ + 2*m_eq_ + m_ineq_); // ni + wrk_c_->copyToStarting(*wrk_primal_, n_x_); // pe + wrk_eq_->copyToStarting(*wrk_primal_, n_x_ + m_eq_); // ne + wrk_d_->copyToStarting(*wrk_primal_, n_x_ + 2 * m_eq_); // pi + wrk_ineq_->copyToStarting(*wrk_primal_, n_x_ + 2 * m_eq_ + m_ineq_); // ni wrk_primal_->copyTo(z_bndL0); // get zu @@ -1910,8 +1890,8 @@ bool hiopFRProbDense::iterate_callback(int iter, nlp_base_->eval_c_d(*wrk_x_, true, *wrk_cbody_, *wrk_dbody_); // compute theta for base problem - wrk_cbody_->axpy(-1.0, crhs); // wrk_cbody_ = (c-crhs) - wrk_dbody_->axpy(-1.0, *wrk_d_); // wrk_dbody_ = (d-s) + wrk_cbody_->axpy(-1.0, crhs); // wrk_cbody_ = (c-crhs) + wrk_dbody_->axpy(-1.0, *wrk_d_); // wrk_dbody_ = (d-s) double theta_ori = 0.0; theta_ori += wrk_cbody_->onenorm(); @@ -1924,26 +1904,26 @@ bool hiopFRProbDense::iterate_callback(int iter, double max_nrmInf_feas = nlp_base_->options->GetNumeric("kappa_resto") * nrmInf_feas_ref_; // termination condition 1) theta_curr <= kappa_resto*theta_ref - if(nrmInf_feas_ori <= max_nrmInf_feas && iter>0) { + if(nrmInf_feas_ori <= max_nrmInf_feas && iter > 0) { // termination condition 2) (theta and logbar) are not in the original filter // check (original) filter condition - [[maybe_unused]] double trial_obj_ori = obj_base_; // obj_base_ has been updated in the FR loop when we evaluate obj + [[maybe_unused]] double trial_obj_ori = obj_base_; // obj_base_ has been updated in the FR loop when we evaluate obj // compute the original logbar objective from the trial point given by the FR problem // Note that this function will updates the slack and dual variables - - // set original trial (x,d) to the soltion from FR problem + + // set original trial (x,d) to the soltion from FR problem hiopIterate* it_base_trial = solver_base_.get_it_trial_nonconst(); - const hiopIterate* it_base_curr = solver_base_.get_it_curr(); + const hiopIterate* it_base_curr = solver_base_.get_it_curr(); it_base_trial->get_x()->copyFrom(*wrk_x_); it_base_trial->get_d()->copyFrom(*wrk_d_); // compute other slacks in the base problem [[maybe_unused]] size_type n_adjusted_slacks = it_base_trial->compute_safe_slacks(*it_base_curr, solver_base_.get_mu()); - // evaluate base problem log barr + // evaluate base problem log barr solver_base_.get_logbar()->updateWithNlpInfo_trial_funcOnly(*it_base_trial, obj_base_, *wrk_cbody_, *wrk_dbody_); - + double trial_bar_obj_ori = solver_base_.get_logbar()->f_logbar_trial; if(!solver_base_.filter_contains(theta_ori, trial_bar_obj_ori)) { @@ -1963,7 +1943,7 @@ bool hiopFRProbDense::iterate_callback(int iter, bool hiopFRProbDense::force_update_x(const int n, double* x) { // this function is used in FR in FR, see eq (33) - assert( n == n_); + assert(n == n_); hiopVector* c = solver_base_.get_c(); hiopVector* d = solver_base_.get_d(); @@ -1975,14 +1955,14 @@ bool hiopFRProbDense::force_update_x(const int n, double* x) wrk_x_->copy_from_starting_at(x, 0, n_x_); /* - * compute pe (wrk_c_) and ne (wrk_eq_) rom equation (33) - */ + * compute pe (wrk_c_) and ne (wrk_eq_) rom equation (33) + */ // firstly use pe as a temp vec - double tmp_db = mu_/(2*rho_); + double tmp_db = mu_ / (2 * rho_); wrk_cbody_->copyFrom(*c); - wrk_cbody_->axpy(-1.0, crhs); // wrk_cbody_ = (c-crhs) + wrk_cbody_->axpy(-1.0, crhs); // wrk_cbody_ = (c-crhs) wrk_c_->setToConstant(tmp_db); - wrk_c_->axpy(-0.5, *wrk_cbody_); // wrk_c_ = (mu-rho*(c-crhs))/(2*rho) + wrk_c_->axpy(-0.5, *wrk_cbody_); // wrk_c_ = (mu-rho*(c-crhs))/(2*rho) // compute ne (wrk_eq_) wrk_eq_->copyFrom(*wrk_c_); @@ -1996,13 +1976,13 @@ bool hiopFRProbDense::force_update_x(const int n, double* x) wrk_c_->axpy(1.0, *wrk_eq_); /* - * compute pi (wrk_d_) and ni (wrk_ineq_) rom equation (33) - */ + * compute pi (wrk_d_) and ni (wrk_ineq_) rom equation (33) + */ // firstly use pi as a temp vec wrk_dbody_->copyFrom(*d); - wrk_dbody_->axpy(-1.0, *s); // wrk_dbody_ = (d-s) + wrk_dbody_->axpy(-1.0, *s); // wrk_dbody_ = (d-s) wrk_d_->setToConstant(tmp_db); - wrk_d_->axpy(-0.5, *wrk_dbody_); // wrk_c_ = (mu-rho*(d-s))/(2*rho) + wrk_d_->axpy(-0.5, *wrk_dbody_); // wrk_c_ = (mu-rho*(d-s))/(2*rho) // compute ni (wrk_ineq_) wrk_ineq_->copyFrom(*wrk_d_); @@ -2016,20 +1996,18 @@ bool hiopFRProbDense::force_update_x(const int n, double* x) wrk_d_->axpy(1.0, *wrk_ineq_); /* - * assemble x = [x pe ne pi ni] - */ + * assemble x = [x pe ne pi ni] + */ // FIXME; add global methed. now we only have local method wrk_x_->copyToStarting(*wrk_primal_, 0); wrk_c_->copyToStarting(*wrk_primal_, n_x_); wrk_eq_->copyToStarting(*wrk_primal_, n_x_ + m_eq_); - wrk_d_->copyToStarting(*wrk_primal_, n_x_ + 2*m_eq_); - wrk_ineq_->copyToStarting(*wrk_primal_, n_x_ + 2*m_eq_ + m_ineq_); + wrk_d_->copyToStarting(*wrk_primal_, n_x_ + 2 * m_eq_); + wrk_ineq_->copyToStarting(*wrk_primal_, n_x_ + 2 * m_eq_ + m_ineq_); wrk_primal_->copyTo(x); return true; } - - -}; +}; // namespace hiop diff --git a/src/Optimization/hiopFRProb.hpp b/src/Optimization/hiopFRProb.hpp index 50e51e2ab..ae05192f8 100644 --- a/src/Optimization/hiopFRProb.hpp +++ b/src/Optimization/hiopFRProb.hpp @@ -93,12 +93,9 @@ class hiopFRProbSparse : public hiopInterfaceSparse virtual bool get_prob_sizes(size_type& n, size_type& m); virtual bool get_prob_info(NonlinearityType& type); - virtual bool get_vars_info(const size_type& n, double *xlow, double* xupp, NonlinearityType* type); + virtual bool get_vars_info(const size_type& n, double* xlow, double* xupp, NonlinearityType* type); virtual bool get_cons_info(const size_type& m, double* clow, double* cupp, NonlinearityType* type); - virtual bool get_sparse_blocks_info(int& nx, - int& nnz_sparse_Jaceq, - int& nnz_sparse_Jacineq, - int& nnz_sparse_Hess_Lagr); + virtual bool get_sparse_blocks_info(int& nx, int& nnz_sparse_Jaceq, int& nnz_sparse_Jacineq, int& nnz_sparse_Hess_Lagr); virtual bool eval_f(const size_type& n, const double* x, bool new_x, double& obj_value); virtual bool eval_cons(const size_type& n, @@ -108,13 +105,10 @@ class hiopFRProbSparse : public hiopInterfaceSparse const double* x, bool new_x, double* cons); - virtual bool eval_cons(const size_type& n, - const size_type& m, - const double* x, - bool new_x, - double* cons); + virtual bool eval_cons(const size_type& n, const size_type& m, const double* x, bool new_x, double* cons); virtual bool eval_grad_f(const size_type& n, const double* x, bool new_x, double* gradf); - virtual bool eval_Jac_cons(const size_type& n, const size_type& m, + virtual bool eval_Jac_cons(const size_type& n, + const size_type& m, const size_type& num_cons, const index_type* idx_cons, const double* x, @@ -135,7 +129,7 @@ class hiopFRProbSparse : public hiopInterfaceSparse virtual bool get_warmstart_point(const size_type& n, const size_type& m, double* x0, - double* z_bndL0, + double* z_bndL0, double* z_bndU0, double* lambda0, double* ineq_slack, @@ -176,8 +170,8 @@ class hiopFRProbSparse : public hiopInterfaceSparse virtual bool force_update_x(const int n, double* x); - virtual const hiopVector& get_fr_sol_x () const { return *last_x_; } - virtual const hiopVector& get_fr_sol_d () const { return *last_d_; } + virtual const hiopVector& get_fr_sol_x() const { return *last_x_; } + virtual const hiopVector& get_fr_sol_d() const { return *last_d_; } private: size_type n_; @@ -204,7 +198,7 @@ class hiopFRProbSparse : public hiopInterfaceSparse hiopVector* wrk_cbody_; hiopVector* wrk_dbody_; hiopVector* wrk_primal_; // [x pe ne pi ni] - hiopVector* wrk_dual_; // [c d] + hiopVector* wrk_dual_; // [c d] hiopMatrixSparse* Jac_cd_; hiopMatrixSparse* Hess_cd_; @@ -219,10 +213,10 @@ class hiopFRProbSparse : public hiopInterfaceSparse double rho_; double obj_base_; - int pe_st_; // the 1st index of pe in the full primal space - int ne_st_; // the 1st index of ne in the full primal space - int pi_st_; // the 1st index of pi in the full primal space - int ni_st_; // the 1st index of ni in the full primal space + int pe_st_; // the 1st index of pe in the full primal space + int ne_st_; // the 1st index of ne in the full primal space + int pi_st_; // the 1st index of pi in the full primal space + int ni_st_; // the 1st index of ni in the full primal space }; /** Specialized interface for feasibility restoration problem with MDS blocks in the Jacobian and Hessian. @@ -242,7 +236,7 @@ class hiopFRProbMDS : public hiopInterfaceMDS virtual ~hiopFRProbMDS(); virtual bool get_MPI_comm(MPI_Comm& comm_out); - virtual bool get_sparse_dense_blocks_info(int& nx_sparse, + virtual bool get_sparse_dense_blocks_info(int& nx_sparse, int& nx_dense, int& nnz_sparse_Jaceq, int& nnz_sparse_Jacineq, @@ -274,7 +268,7 @@ class hiopFRProbMDS : public hiopInterfaceMDS index_type* jJacS, double* MJacS, double* JacD); - + virtual bool eval_Hess_Lagr(const size_type& n, const size_type& m, const double* x, @@ -296,7 +290,7 @@ class hiopFRProbMDS : public hiopInterfaceMDS virtual bool get_prob_sizes(size_type& n, size_type& m); virtual bool get_prob_info(NonlinearityType& type); - virtual bool get_vars_info(const size_type& n, double *xlow, double* xupp, NonlinearityType* type); + virtual bool get_vars_info(const size_type& n, double* xlow, double* xupp, NonlinearityType* type); virtual bool get_cons_info(const size_type& m, double* clow, double* cupp, NonlinearityType* type); virtual bool eval_f(const size_type& n, const double* x, bool new_x, double& obj_value); @@ -307,17 +301,13 @@ class hiopFRProbMDS : public hiopInterfaceMDS const double* x, bool new_x, double* cons); - virtual bool eval_cons(const size_type& n, - const size_type& m, - const double* x, - bool new_x, - double* cons); + virtual bool eval_cons(const size_type& n, const size_type& m, const double* x, bool new_x, double* cons); virtual bool eval_grad_f(const size_type& n, const double* x, bool new_x, double* gradf); virtual bool get_warmstart_point(const size_type& n, const size_type& m, double* x0, - double* z_bndL0, + double* z_bndL0, double* z_bndU0, double* lambda0, double* ineq_slack, @@ -346,8 +336,8 @@ class hiopFRProbMDS : public hiopInterfaceMDS virtual bool force_update_x(const int n, double* x); - virtual const hiopVector& get_fr_sol_x () const { return *last_x_; } - virtual const hiopVector& get_fr_sol_d () const { return *last_d_; } + virtual const hiopVector& get_fr_sol_x() const { return *last_x_; } + virtual const hiopVector& get_fr_sol_d() const { return *last_d_; } private: size_type n_; @@ -381,9 +371,9 @@ class hiopFRProbMDS : public hiopInterfaceMDS hiopVector* wrk_primal_; // [xsp pe ne pi ni xde] hiopVector* wrk_dual_; // [c d] - hiopVector* wrk_x_sp_; // the sparse part of x, xsp - hiopVector* wrk_x_de_; // the dense part of x, xde - + hiopVector* wrk_x_sp_; // the sparse part of x, xsp + hiopVector* wrk_x_de_; // the dense part of x, xde + hiopMatrixMDS* Jac_cd_; hiopMatrixSymBlockDiagMDS* Hess_cd_; @@ -397,12 +387,12 @@ class hiopFRProbMDS : public hiopInterfaceMDS double rho_; double obj_base_; - int x_sp_st_; // the 1st index of x_sp in the full primal space - int pe_st_; // the 1st index of pe in the full primal space - int ne_st_; // the 1st index of ne in the full primal space - int pi_st_; // the 1st index of pi in the full primal space - int ni_st_; // the 1st index of ni in the full primal space - int x_de_st_; // the 1st index of x_de in the full primal space + int x_sp_st_; // the 1st index of x_sp in the full primal space + int pe_st_; // the 1st index of pe in the full primal space + int ne_st_; // the 1st index of ne in the full primal space + int pi_st_; // the 1st index of pi in the full primal space + int ni_st_; // the 1st index of ni in the full primal space + int x_de_st_; // the 1st index of x_de in the full primal space }; /** Specialized interface for feasibility restoration problem with dense blocks in the Jacobian and Hessian. @@ -418,7 +408,7 @@ class hiopFRProbMDS : public hiopInterfaceMDS * distributed across MPI ranks ('get_vecdistrib_info' should return 'false'). * Acceleration can be however obtained using OpenMP and CUDA via Raja * abstraction layer that HiOp uses and via linear solver. - * + * */ class hiopFRProbDense : public hiopInterfaceDenseConstraints { @@ -434,17 +424,16 @@ class hiopFRProbDense : public hiopInterfaceDenseConstraints const index_type* idx_cons, const double* x, bool new_x, - double* Jac) { return false; } + double* Jac) + { + return false; + } - virtual bool eval_Jac_cons(const size_type& n, - const size_type& m, - const double* x, - bool new_x, - double* Jac); + virtual bool eval_Jac_cons(const size_type& n, const size_type& m, const double* x, bool new_x, double* Jac); virtual bool get_prob_sizes(size_type& n, size_type& m); virtual bool get_prob_info(NonlinearityType& type); - virtual bool get_vars_info(const size_type& n, double *xlow, double* xupp, NonlinearityType* type); + virtual bool get_vars_info(const size_type& n, double* xlow, double* xupp, NonlinearityType* type); virtual bool get_cons_info(const size_type& m, double* clow, double* cupp, NonlinearityType* type); virtual bool eval_f(const size_type& n, const double* x, bool new_x, double& obj_value); @@ -455,16 +444,15 @@ class hiopFRProbDense : public hiopInterfaceDenseConstraints const index_type* idx_cons, const double* x, bool new_x, - double* cons) { return false; } - virtual bool eval_cons(const size_type& n, - const size_type& m, - const double* x, - bool new_x, - double* cons); + double* cons) + { + return false; + } + virtual bool eval_cons(const size_type& n, const size_type& m, const double* x, bool new_x, double* cons); virtual bool get_warmstart_point(const size_type& n, const size_type& m, double* x0, - double* z_bndL0, + double* z_bndL0, double* z_bndU0, double* lambda0, double* ineq_slack, @@ -493,8 +481,8 @@ class hiopFRProbDense : public hiopInterfaceDenseConstraints virtual bool force_update_x(const int n, double* x); - virtual const hiopVector& get_fr_sol_x () const { return *last_x_; } - virtual const hiopVector& get_fr_sol_d () const { return *last_d_; } + virtual const hiopVector& get_fr_sol_x() const { return *last_x_; } + virtual const hiopVector& get_fr_sol_d() const { return *last_d_; } private: size_type n_; @@ -518,7 +506,7 @@ class hiopFRProbDense : public hiopInterfaceDenseConstraints hiopVector* wrk_dbody_; hiopVector* wrk_primal_; // [xde pe ne pi ni] hiopVector* wrk_dual_; // [c d] - + hiopMatrixDense* Jac_cd_; hiopVector* last_x_; @@ -539,15 +527,11 @@ class hiopFRProbDense : public hiopInterfaceDenseConstraints index_type* vec_distrib_base_; #endif - int pe_st_; // the 1st index of pe in the full primal space - int ne_st_; // the 1st index of ne in the full primal space - int pi_st_; // the 1st index of pi in the full primal space - int ni_st_; // the 1st index of ni in the full primal space + int pe_st_; // the 1st index of pe in the full primal space + int ne_st_; // the 1st index of ne in the full primal space + int pi_st_; // the 1st index of pi in the full primal space + int ni_st_; // the 1st index of ni in the full primal space }; - - - - -} //end of namespace +} // namespace hiop #endif diff --git a/src/Optimization/hiopFactAcceptor.cpp b/src/Optimization/hiopFactAcceptor.cpp index 0f1d58414..364718dce 100644 --- a/src/Optimization/hiopFactAcceptor.cpp +++ b/src/Optimization/hiopFactAcceptor.cpp @@ -2,47 +2,47 @@ // Produced at the Lawrence Livermore National Laboratory (LLNL). // LLNL-CODE-742473. All rights reserved. // -// This file is part of HiOp. For details, see https://github.com/LLNL/hiop. HiOp -// is released under the BSD 3-clause license (https://opensource.org/licenses/BSD-3-Clause). +// This file is part of HiOp. For details, see https://github.com/LLNL/hiop. HiOp +// is released under the BSD 3-clause license (https://opensource.org/licenses/BSD-3-Clause). // Please also read "Additional BSD Notice" below. // -// Redistribution and use in source and binary forms, with or without modification, +// Redistribution and use in source and binary forms, with or without modification, // are permitted provided that the following conditions are met: -// i. Redistributions of source code must retain the above copyright notice, this list +// i. Redistributions of source code must retain the above copyright notice, this list // of conditions and the disclaimer below. -// ii. Redistributions in binary form must reproduce the above copyright notice, -// this list of conditions and the disclaimer (as noted below) in the documentation and/or +// ii. Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the disclaimer (as noted below) in the documentation and/or // other materials provided with the distribution. -// iii. Neither the name of the LLNS/LLNL nor the names of its contributors may be used to -// endorse or promote products derived from this software without specific prior written +// iii. Neither the name of the LLNS/LLNL nor the names of its contributors may be used to +// endorse or promote products derived from this software without specific prior written // permission. // -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES -// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT -// SHALL LAWRENCE LIVERMORE NATIONAL SECURITY, LLC, THE U.S. DEPARTMENT OF ENERGY OR -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS -// OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED -// AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT +// SHALL LAWRENCE LIVERMORE NATIONAL SECURITY, LLC, THE U.S. DEPARTMENT OF ENERGY OR +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +// OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED +// AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, // EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Additional BSD Notice -// 1. This notice is required to be provided under our contract with the U.S. Department -// of Energy (DOE). This work was produced at Lawrence Livermore National Laboratory under +// 1. This notice is required to be provided under our contract with the U.S. Department +// of Energy (DOE). This work was produced at Lawrence Livermore National Laboratory under // Contract No. DE-AC52-07NA27344 with the DOE. -// 2. Neither the United States Government nor Lawrence Livermore National Security, LLC -// nor any of their employees, makes any warranty, express or implied, or assumes any -// liability or responsibility for the accuracy, completeness, or usefulness of any +// 2. Neither the United States Government nor Lawrence Livermore National Security, LLC +// nor any of their employees, makes any warranty, express or implied, or assumes any +// liability or responsibility for the accuracy, completeness, or usefulness of any // information, apparatus, product, or process disclosed, or represents that its use would // not infringe privately-owned rights. -// 3. Also, reference herein to any specific commercial products, process, or services by -// trade name, trademark, manufacturer or otherwise does not necessarily constitute or -// imply its endorsement, recommendation, or favoring by the United States Government or -// Lawrence Livermore National Security, LLC. The views and opinions of authors expressed -// herein do not necessarily state or reflect those of the United States Government or -// Lawrence Livermore National Security, LLC, and shall not be used for advertising or +// 3. Also, reference herein to any specific commercial products, process, or services by +// trade name, trademark, manufacturer or otherwise does not necessarily constitute or +// imply its endorsement, recommendation, or favoring by the United States Government or +// Lawrence Livermore National Security, LLC. The views and opinions of authors expressed +// herein do not necessarily state or reflect those of the United States Government or +// Lawrence Livermore National Security, LLC, and shall not be used for advertising or // product endorsement purposes. /** * @file hiopFactAcceptor.cpp @@ -51,7 +51,7 @@ * @author Cosmin G. Petra , LLNL * */ - + #include "hiopFactAcceptor.hpp" #include "LinAlgFactory.hpp" #include "hiopPDPerturbation.hpp" @@ -61,44 +61,43 @@ namespace hiop { -int hiopFactAcceptorIC::requireReFactorization(const hiopNlpFormulation& nlp, - const int& n_neg_eig, - const bool force_reg) +int hiopFactAcceptorIC::requireReFactorization(const hiopNlpFormulation& nlp, const int& n_neg_eig, const bool force_reg) { int continue_re_fact{1}; - if(n_required_neg_eig_>0) { + if(n_required_neg_eig_ > 0) { if(n_neg_eig < 0) { - //matrix singular + // matrix singular nlp.log->printf(hovScalars, "linsys is singular.\n"); - if(!perturb_calc_->compute_perturb_singularity()) {\ + if(!perturb_calc_->compute_perturb_singularity()) { continue_re_fact = -1; } } else if(n_neg_eig != n_required_neg_eig_) { - //wrong inertia - nlp.log->printf(hovScalars, "linsys negative eigs mismatch: has %d expected %d.\n", - n_neg_eig, n_required_neg_eig_); + // wrong inertia + nlp.log->printf(hovScalars, "linsys negative eigs mismatch: has %d expected %d.\n", n_neg_eig, n_required_neg_eig_); if(!perturb_calc_->compute_perturb_wrong_inertia()) { nlp.log->printf(hovWarning, "linsys: computing inertia perturbation failed.\n"); continue_re_fact = -1; } } else { - //all is good + // all is good continue_re_fact = 0; } } else if(n_neg_eig != 0) { - //correct for wrong intertia - nlp.log->printf(hovScalars, "linsys has wrong inertia (no constraints): factoriz " - "ret code %d\n.", n_neg_eig); - if(!perturb_calc_->compute_perturb_wrong_inertia()) { - nlp.log->printf(hovWarning, "linsys: computing inertia perturbation failed (2).\n"); - continue_re_fact = -1; - } + // correct for wrong intertia + nlp.log->printf(hovScalars, + "linsys has wrong inertia (no constraints): factoriz " + "ret code %d\n.", + n_neg_eig); + if(!perturb_calc_->compute_perturb_wrong_inertia()) { + nlp.log->printf(hovWarning, "linsys: computing inertia perturbation failed (2).\n"); + continue_re_fact = -1; + } } else { - //all is good - continue_re_fact = 0; + // all is good + continue_re_fact = 0; } return continue_re_fact; } @@ -108,9 +107,9 @@ int hiopFactAcceptorInertiaFreeDWD::requireReFactorization(const hiopNlpFormulat const bool force_reg) { int continue_re_fact{1}; - if(n_required_neg_eig_>0) { + if(n_required_neg_eig_ > 0) { if(n_neg_eig < 0) { - //matrix singular + // matrix singular nlp.log->printf(hovScalars, "linsys is singular.\n"); if(!perturb_calc_->compute_perturb_singularity()) { @@ -122,7 +121,7 @@ int hiopFactAcceptorInertiaFreeDWD::requireReFactorization(const hiopNlpFormulat continue_re_fact = 0; } else { // add regularization and accept current factorization (we do curvature test after backsolve) - nlp.log->printf(hovScalars, "linsys has wrong curvature. \n"); + nlp.log->printf(hovScalars, "linsys has wrong curvature. \n"); if(!perturb_calc_->compute_perturb_wrong_inertia()) { nlp.log->printf(hovWarning, "linsys: computing inertia perturbation failed (2).\n"); continue_re_fact = -1; @@ -132,7 +131,7 @@ int hiopFactAcceptorInertiaFreeDWD::requireReFactorization(const hiopNlpFormulat } else { if(n_neg_eig < 0) { // Cholesky solver failes due to the lack of positive definiteness - nlp.log->printf(hovScalars, "Cholesky solver: factoriz ret code %d\n.", n_neg_eig); + nlp.log->printf(hovScalars, "Cholesky solver: factoriz ret code %d\n.", n_neg_eig); if(!perturb_calc_->compute_perturb_wrong_inertia()) { nlp.log->printf(hovWarning, "linsys: computing inertia perturbation failed (2).\n"); continue_re_fact = -1; @@ -143,7 +142,7 @@ int hiopFactAcceptorInertiaFreeDWD::requireReFactorization(const hiopNlpFormulat continue_re_fact = 0; } else { // add regularization and accept current factorization (we do curvature test after backsolve) - nlp.log->printf(hovScalars, "linsys has wrong curvature. \n"); + nlp.log->printf(hovScalars, "linsys has wrong curvature. \n"); if(!perturb_calc_->compute_perturb_wrong_inertia()) { nlp.log->printf(hovWarning, "linsys: computing inertia perturbation failed (2).\n"); continue_re_fact = -1; @@ -154,6 +153,5 @@ int hiopFactAcceptorInertiaFreeDWD::requireReFactorization(const hiopNlpFormulat return continue_re_fact; } - -} //end of namespace +} // namespace hiop diff --git a/src/Optimization/hiopFactAcceptor.hpp b/src/Optimization/hiopFactAcceptor.hpp index 31724fac3..88abe2be7 100644 --- a/src/Optimization/hiopFactAcceptor.hpp +++ b/src/Optimization/hiopFactAcceptor.hpp @@ -47,12 +47,12 @@ // /** - * @file hiopFactAcceptor.cpp - * - * @author Nai-Yuan Chiang , LLNL - * @author Cosmin G. Petra , LLNL - * - */ + * @file hiopFactAcceptor.cpp + * + * @author Nai-Yuan Chiang , LLNL + * @author Cosmin G. Petra , LLNL + * + */ #ifndef HIOP_FACT_ACCEPTOR #define HIOP_FACT_ACCEPTOR @@ -66,80 +66,69 @@ namespace hiop class hiopFactAcceptor { public: - /** - * Default constructor + /** + * Default constructor * Determine if a factorization is acceptable or not */ hiopFactAcceptor(hiopPDPerturbation* p) - : perturb_calc_{p} + : perturb_calc_{p} {} - virtual ~hiopFactAcceptor() - {} - - /** + virtual ~hiopFactAcceptor() {} + + /** * @brief method to check if current factorization is acceptable or/and if - * a re-factorization is reqired by increasing 'delta_wx'-'delta_cd'. - * + * a re-factorization is reqired by increasing 'delta_wx'-'delta_cd'. + * * Returns '1' if current factorization is rejected * Returns '0' if current factorization is ok * Returns '-1' if current factorization failed due to singularity */ - virtual int requireReFactorization(const hiopNlpFormulation& nlp, - const int& n_neg_eig, - const bool force_reg=false) = 0; - -protected: + virtual int requireReFactorization(const hiopNlpFormulation& nlp, const int& n_neg_eig, const bool force_reg = false) = 0; + +protected: hiopPDPerturbation* perturb_calc_; - }; - + class hiopFactAcceptorIC : public hiopFactAcceptor { public: - /** - * Default constructor + /** + * Default constructor * Check inertia condition to determine if a factorization is acceptable or not */ hiopFactAcceptorIC(hiopPDPerturbation* p, const size_type n_required_neg_eig) - : hiopFactAcceptor(p), - n_required_neg_eig_(n_required_neg_eig) + : hiopFactAcceptor(p), + n_required_neg_eig_(n_required_neg_eig) {} - virtual ~hiopFactAcceptorIC() - {} - - virtual int requireReFactorization(const hiopNlpFormulation& nlp, - const int& n_neg_eig, - const bool force_reg=false); - + virtual ~hiopFactAcceptorIC() {} + + virtual int requireReFactorization(const hiopNlpFormulation& nlp, const int& n_neg_eig, const bool force_reg = false); + protected: - int n_required_neg_eig_; + int n_required_neg_eig_; }; class hiopFactAcceptorInertiaFreeDWD : public hiopFactAcceptor { public: - /** - * Default constructor + /** + * Default constructor * Check inertia condition to determine if a factorization is acceptable or not */ hiopFactAcceptorInertiaFreeDWD(hiopPDPerturbation* p, const size_type n_required_neg_eig) - : hiopFactAcceptor(p), - n_required_neg_eig_(n_required_neg_eig) + : hiopFactAcceptor(p), + n_required_neg_eig_(n_required_neg_eig) {} - virtual ~hiopFactAcceptorInertiaFreeDWD() - {} - - virtual int requireReFactorization(const hiopNlpFormulation& nlp, - const int& n_neg_eig, - const bool force_reg=false); - + virtual ~hiopFactAcceptorInertiaFreeDWD() {} + + virtual int requireReFactorization(const hiopNlpFormulation& nlp, const int& n_neg_eig, const bool force_reg = false); + protected: int n_required_neg_eig_; - }; -} //end of namespace +} // namespace hiop #endif diff --git a/src/Optimization/hiopFilter.cpp b/src/Optimization/hiopFilter.cpp index 095ad35f6..54988c1ad 100644 --- a/src/Optimization/hiopFilter.cpp +++ b/src/Optimization/hiopFilter.cpp @@ -3,47 +3,47 @@ // Written by Cosmin G. Petra, petra1@llnl.gov. // LLNL-CODE-742473. All rights reserved. // -// This file is part of HiOp. For details, see https://github.com/LLNL/hiop. HiOp -// is released under the BSD 3-clause license (https://opensource.org/licenses/BSD-3-Clause). +// This file is part of HiOp. For details, see https://github.com/LLNL/hiop. HiOp +// is released under the BSD 3-clause license (https://opensource.org/licenses/BSD-3-Clause). // Please also read “Additional BSD Notice” below. // -// Redistribution and use in source and binary forms, with or without modification, +// Redistribution and use in source and binary forms, with or without modification, // are permitted provided that the following conditions are met: -// i. Redistributions of source code must retain the above copyright notice, this list +// i. Redistributions of source code must retain the above copyright notice, this list // of conditions and the disclaimer below. -// ii. Redistributions in binary form must reproduce the above copyright notice, -// this list of conditions and the disclaimer (as noted below) in the documentation and/or +// ii. Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the disclaimer (as noted below) in the documentation and/or // other materials provided with the distribution. -// iii. Neither the name of the LLNS/LLNL nor the names of its contributors may be used to -// endorse or promote products derived from this software without specific prior written +// iii. Neither the name of the LLNS/LLNL nor the names of its contributors may be used to +// endorse or promote products derived from this software without specific prior written // permission. // -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES -// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT -// SHALL LAWRENCE LIVERMORE NATIONAL SECURITY, LLC, THE U.S. DEPARTMENT OF ENERGY OR -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS -// OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED -// AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT +// SHALL LAWRENCE LIVERMORE NATIONAL SECURITY, LLC, THE U.S. DEPARTMENT OF ENERGY OR +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +// OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED +// AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, // EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Additional BSD Notice -// 1. This notice is required to be provided under our contract with the U.S. Department -// of Energy (DOE). This work was produced at Lawrence Livermore National Laboratory under +// 1. This notice is required to be provided under our contract with the U.S. Department +// of Energy (DOE). This work was produced at Lawrence Livermore National Laboratory under // Contract No. DE-AC52-07NA27344 with the DOE. -// 2. Neither the United States Government nor Lawrence Livermore National Security, LLC -// nor any of their employees, makes any warranty, express or implied, or assumes any -// liability or responsibility for the accuracy, completeness, or usefulness of any +// 2. Neither the United States Government nor Lawrence Livermore National Security, LLC +// nor any of their employees, makes any warranty, express or implied, or assumes any +// liability or responsibility for the accuracy, completeness, or usefulness of any // information, apparatus, product, or process disclosed, or represents that its use would // not infringe privately-owned rights. -// 3. Also, reference herein to any specific commercial products, process, or services by -// trade name, trademark, manufacturer or otherwise does not necessarily constitute or -// imply its endorsement, recommendation, or favoring by the United States Government or -// Lawrence Livermore National Security, LLC. The views and opinions of authors expressed -// herein do not necessarily state or reflect those of the United States Government or -// Lawrence Livermore National Security, LLC, and shall not be used for advertising or +// 3. Also, reference herein to any specific commercial products, process, or services by +// trade name, trademark, manufacturer or otherwise does not necessarily constitute or +// imply its endorsement, recommendation, or favoring by the United States Government or +// Lawrence Livermore National Security, LLC. The views and opinions of authors expressed +// herein do not necessarily state or reflect those of the United States Government or +// Lawrence Livermore National Security, LLC, and shall not be used for advertising or // product endorsement purposes. #include "hiopFilter.hpp" @@ -56,10 +56,10 @@ namespace hiop bool hiopFilter::contains(const double& theta, const double& phi) const { list::const_iterator it = entries.begin(); - bool bFound=false; - while(it!=entries.end()) { - if(theta>=it->theta && phi>=it->phi) { - bFound=true; + bool bFound = false; + while(it != entries.end()) { + if(theta >= it->theta && phi >= it->phi) { + bFound = true; break; } ++it; @@ -72,15 +72,14 @@ void hiopFilter::print(FILE* file, const char* msg) const if(msg) fprintf(file, "%s", msg); fprintf(file, " (theta, phi) pairs: "); - for(auto& fe : entries) { + for(auto& fe: entries) { fprintf(file, "(%22.16e, %22.16e) ", fe.theta, fe.phi); } - if(entries.size()==0) { + if(entries.size() == 0) { fprintf(file, "filter is empty"); } fprintf(file, "\n"); } - -}; +}; // namespace hiop diff --git a/src/Optimization/hiopFilter.hpp b/src/Optimization/hiopFilter.hpp index 421febb1e..17fbd1ef3 100644 --- a/src/Optimization/hiopFilter.hpp +++ b/src/Optimization/hiopFilter.hpp @@ -3,47 +3,47 @@ // Written by Cosmin G. Petra, petra1@llnl.gov. // LLNL-CODE-742473. All rights reserved. // -// This file is part of HiOp. For details, see https://github.com/LLNL/hiop. HiOp -// is released under the BSD 3-clause license (https://opensource.org/licenses/BSD-3-Clause). +// This file is part of HiOp. For details, see https://github.com/LLNL/hiop. HiOp +// is released under the BSD 3-clause license (https://opensource.org/licenses/BSD-3-Clause). // Please also read “Additional BSD Notice” below. // -// Redistribution and use in source and binary forms, with or without modification, +// Redistribution and use in source and binary forms, with or without modification, // are permitted provided that the following conditions are met: -// i. Redistributions of source code must retain the above copyright notice, this list +// i. Redistributions of source code must retain the above copyright notice, this list // of conditions and the disclaimer below. -// ii. Redistributions in binary form must reproduce the above copyright notice, -// this list of conditions and the disclaimer (as noted below) in the documentation and/or +// ii. Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the disclaimer (as noted below) in the documentation and/or // other materials provided with the distribution. -// iii. Neither the name of the LLNS/LLNL nor the names of its contributors may be used to -// endorse or promote products derived from this software without specific prior written +// iii. Neither the name of the LLNS/LLNL nor the names of its contributors may be used to +// endorse or promote products derived from this software without specific prior written // permission. // -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES -// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT -// SHALL LAWRENCE LIVERMORE NATIONAL SECURITY, LLC, THE U.S. DEPARTMENT OF ENERGY OR -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS -// OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED -// AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT +// SHALL LAWRENCE LIVERMORE NATIONAL SECURITY, LLC, THE U.S. DEPARTMENT OF ENERGY OR +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +// OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED +// AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, // EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Additional BSD Notice -// 1. This notice is required to be provided under our contract with the U.S. Department -// of Energy (DOE). This work was produced at Lawrence Livermore National Laboratory under +// 1. This notice is required to be provided under our contract with the U.S. Department +// of Energy (DOE). This work was produced at Lawrence Livermore National Laboratory under // Contract No. DE-AC52-07NA27344 with the DOE. -// 2. Neither the United States Government nor Lawrence Livermore National Security, LLC -// nor any of their employees, makes any warranty, express or implied, or assumes any -// liability or responsibility for the accuracy, completeness, or usefulness of any +// 2. Neither the United States Government nor Lawrence Livermore National Security, LLC +// nor any of their employees, makes any warranty, express or implied, or assumes any +// liability or responsibility for the accuracy, completeness, or usefulness of any // information, apparatus, product, or process disclosed, or represents that its use would // not infringe privately-owned rights. -// 3. Also, reference herein to any specific commercial products, process, or services by -// trade name, trademark, manufacturer or otherwise does not necessarily constitute or -// imply its endorsement, recommendation, or favoring by the United States Government or -// Lawrence Livermore National Security, LLC. The views and opinions of authors expressed -// herein do not necessarily state or reflect those of the United States Government or -// Lawrence Livermore National Security, LLC, and shall not be used for advertising or +// 3. Also, reference herein to any specific commercial products, process, or services by +// trade name, trademark, manufacturer or otherwise does not necessarily constitute or +// imply its endorsement, recommendation, or favoring by the United States Government or +// Lawrence Livermore National Security, LLC. The views and opinions of authors expressed +// herein do not necessarily state or reflect those of the United States Government or +// Lawrence Livermore National Security, LLC, and shall not be used for advertising or // product endorsement purposes. #ifndef HIOP_FILTER @@ -59,32 +59,42 @@ namespace hiop class hiopFilter { public: - hiopFilter() { }; - ~hiopFilter() { }; - inline void initialize (const double& theta_max) { entries.clear(); entries.push_front(FilterEntry(theta_max,-1e20)); } + hiopFilter() {}; + ~hiopFilter() {}; + inline void initialize(const double& theta_max) + { + entries.clear(); + entries.push_front(FilterEntry(theta_max, -1e20)); + } inline void reinitialize(const double& theta_max) { initialize(theta_max); } inline void clear() { entries.clear(); } - - //entries are pushed at the front since these are most likely to reject new iterates - inline void add(const double& theta, const double& phi) - { - entries.push_front(FilterEntry(theta,phi)); - } - + + // entries are pushed at the front since these are most likely to reject new iterates + inline void add(const double& theta, const double& phi) { entries.push_front(FilterEntry(theta, phi)); } + bool contains(const double& theta, const double& phi) const; void print(FILE* file, const char* msg) const; + private: - struct FilterEntry { - FilterEntry(const double& t, const double& p) : theta(t), phi(p) {}; - double theta,phi; + struct FilterEntry + { + FilterEntry(const double& t, const double& p) + : theta(t), + phi(p) {}; + double theta, phi; #ifdef HIOP_DEEPCHECKS - FilterEntry() : theta(0.), phi(0.) { assert(true); } + FilterEntry() + : theta(0.), + phi(0.) + { + assert(true); + } #endif }; std::list entries; }; -} +} // namespace hiop #endif diff --git a/src/Optimization/hiopIterate.cpp b/src/Optimization/hiopIterate.cpp index b47913970..81c9d6887 100644 --- a/src/Optimization/hiopIterate.cpp +++ b/src/Optimization/hiopIterate.cpp @@ -2,47 +2,47 @@ // Produced at the Lawrence Livermore National Laboratory (LLNL). // LLNL-CODE-742473. All rights reserved. // -// This file is part of HiOp. For details, see https://github.com/LLNL/hiop. HiOp -// is released under the BSD 3-clause license (https://opensource.org/licenses/BSD-3-Clause). +// This file is part of HiOp. For details, see https://github.com/LLNL/hiop. HiOp +// is released under the BSD 3-clause license (https://opensource.org/licenses/BSD-3-Clause). // Please also read “Additional BSD Notice” below. // -// Redistribution and use in source and binary forms, with or without modification, +// Redistribution and use in source and binary forms, with or without modification, // are permitted provided that the following conditions are met: -// i. Redistributions of source code must retain the above copyright notice, this list +// i. Redistributions of source code must retain the above copyright notice, this list // of conditions and the disclaimer below. -// ii. Redistributions in binary form must reproduce the above copyright notice, -// this list of conditions and the disclaimer (as noted below) in the documentation and/or +// ii. Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the disclaimer (as noted below) in the documentation and/or // other materials provided with the distribution. -// iii. Neither the name of the LLNS/LLNL nor the names of its contributors may be used to -// endorse or promote products derived from this software without specific prior written +// iii. Neither the name of the LLNS/LLNL nor the names of its contributors may be used to +// endorse or promote products derived from this software without specific prior written // permission. // -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES -// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT -// SHALL LAWRENCE LIVERMORE NATIONAL SECURITY, LLC, THE U.S. DEPARTMENT OF ENERGY OR -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS -// OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED -// AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT +// SHALL LAWRENCE LIVERMORE NATIONAL SECURITY, LLC, THE U.S. DEPARTMENT OF ENERGY OR +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +// OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED +// AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, // EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Additional BSD Notice -// 1. This notice is required to be provided under our contract with the U.S. Department -// of Energy (DOE). This work was produced at Lawrence Livermore National Laboratory under +// 1. This notice is required to be provided under our contract with the U.S. Department +// of Energy (DOE). This work was produced at Lawrence Livermore National Laboratory under // Contract No. DE-AC52-07NA27344 with the DOE. -// 2. Neither the United States Government nor Lawrence Livermore National Security, LLC -// nor any of their employees, makes any warranty, express or implied, or assumes any -// liability or responsibility for the accuracy, completeness, or usefulness of any +// 2. Neither the United States Government nor Lawrence Livermore National Security, LLC +// nor any of their employees, makes any warranty, express or implied, or assumes any +// liability or responsibility for the accuracy, completeness, or usefulness of any // information, apparatus, product, or process disclosed, or represents that its use would // not infringe privately-owned rights. -// 3. Also, reference herein to any specific commercial products, process, or services by -// trade name, trademark, manufacturer or otherwise does not necessarily constitute or -// imply its endorsement, recommendation, or favoring by the United States Government or -// Lawrence Livermore National Security, LLC. The views and opinions of authors expressed -// herein do not necessarily state or reflect those of the United States Government or -// Lawrence Livermore National Security, LLC, and shall not be used for advertising or +// 3. Also, reference herein to any specific commercial products, process, or services by +// trade name, trademark, manufacturer or otherwise does not necessarily constitute or +// imply its endorsement, recommendation, or favoring by the United States Government or +// Lawrence Livermore National Security, LLC. The views and opinions of authors expressed +// herein do not necessarily state or reflect those of the United States Government or +// Lawrence Livermore National Security, LLC, and shall not be used for advertising or // product endorsement purposes. /** @@ -64,12 +64,12 @@ namespace hiop { hiopIterate::hiopIterate(const hiopNlpFormulation* nlp_) - : sx_arg1_{nullptr}, - sx_arg2_{nullptr}, - sx_arg3_{nullptr}, - sd_arg1_{nullptr}, - sd_arg2_{nullptr}, - sd_arg3_{nullptr} + : sx_arg1_{nullptr}, + sx_arg2_{nullptr}, + sx_arg3_{nullptr}, + sd_arg1_{nullptr}, + sd_arg2_{nullptr}, + sd_arg3_{nullptr} { nlp = nlp_; x = nlp->alloc_primal_vec(); @@ -81,7 +81,7 @@ hiopIterate::hiopIterate(const hiopNlpFormulation* nlp_) sdl = d->new_copy(); sdu = d->new_copy(); - //duals + // duals yc = nlp->alloc_dual_eq_vec(); yc->setToZero(); yd = d->new_copy(); @@ -114,11 +114,7 @@ hiopIterate::~hiopIterate() } /* cloning and copying */ -hiopIterate* hiopIterate::alloc_clone() const -{ - return new hiopIterate(this->nlp); -} - +hiopIterate* hiopIterate::alloc_clone() const { return new hiopIterate(this->nlp); } hiopIterate* hiopIterate::new_copy() const { @@ -127,64 +123,59 @@ hiopIterate* hiopIterate::new_copy() const return copy; } -void hiopIterate::copyFrom(const hiopIterate& src) +void hiopIterate::copyFrom(const hiopIterate& src) { x->copyFrom(*src.x); d->copyFrom(*src.d); - yc->copyFrom(*src.yc); + yc->copyFrom(*src.yc); yd->copyFrom(*src.yd); - sxl->copyFrom(*src.sxl); + sxl->copyFrom(*src.sxl); sxu->copyFrom(*src.sxu); sdl->copyFrom(*src.sdl); sdu->copyFrom(*src.sdu); - zl->copyFrom(*src.zl); + zl->copyFrom(*src.zl); zu->copyFrom(*src.zu); vl->copyFrom(*src.vl); vu->copyFrom(*src.vu); } -void hiopIterate::print(FILE* f, const char* msg/*=NULL*/) const +void hiopIterate::print(FILE* f, const char* msg /*=NULL*/) const { - if(NULL==msg) fprintf(f, "hiopIterate:\n"); - else fprintf(f, "%s\n", msg); - - x->print( f, "x: "); - d->print( f, "d: "); - yc->print( f, "yc: "); - yd->print( f, "yd: "); - sxl->print(f, "sxl: "); + if(NULL == msg) + fprintf(f, "hiopIterate:\n"); + else + fprintf(f, "%s\n", msg); + + x->print(f, "x: "); + d->print(f, "d: "); + yc->print(f, "yc: "); + yd->print(f, "yd: "); + sxl->print(f, "sxl: "); sxu->print(f, "sxu: "); sdl->print(f, "sdl: "); sdu->print(f, "sdu: "); - zl->print( f, "zl: "); - zu->print( f, "zu: "); - vl->print( f, "vl: "); - vu->print( f, "vu: "); + zl->print(f, "zl: "); + zu->print(f, "zu: "); + vl->print(f, "vl: "); + vu->print(f, "vu: "); } - -void hiopIterate:: -projectPrimalsXIntoBounds(double kappa1, double kappa2) +void hiopIterate::projectPrimalsXIntoBounds(double kappa1, double kappa2) { - if(!x->projectIntoBounds_local(nlp->get_xl(),nlp->get_ixl(), - nlp->get_xu(),nlp->get_ixu(), - kappa1,kappa2)) { - nlp->log->printf(hovError, + if(!x->projectIntoBounds_local(nlp->get_xl(), nlp->get_ixl(), nlp->get_xu(), nlp->get_ixu(), kappa1, kappa2)) { + nlp->log->printf(hovError, "Problem is infeasible due to inconsistent bounds for the variables (lower>upper). " "Please fix this. In the meanwhile, HiOp will exit (ungracefully).\n"); exit(-1); } } -void hiopIterate:: -projectPrimalsDIntoBounds(double kappa1, double kappa2) +void hiopIterate::projectPrimalsDIntoBounds(double kappa1, double kappa2) { - if(!d->projectIntoBounds_local(nlp->get_dl(),nlp->get_idl(), - nlp->get_du(),nlp->get_idu(), - kappa1,kappa2)) { - nlp->log->printf(hovError, + if(!d->projectIntoBounds_local(nlp->get_dl(), nlp->get_idl(), nlp->get_du(), nlp->get_idu(), kappa1, kappa2)) { + nlp->log->printf(hovError, "Problem is infeasible due to inconsistent inequality constraints (lower>upper). " "Please fix this. In the meanwhile, HiOp will exit (ungracefully).\n"); exit(-1); @@ -213,12 +204,13 @@ double hiopIterate::normOneOfBoundDuals() const assert(vl->matchesPattern(nlp->get_idl())); assert(vu->matchesPattern(nlp->get_idu())); #endif - //work locally with all the vectors. This will result in only one MPI_Allreduce call instead of two. - double nrm1=zl->onenorm_local() + zu->onenorm_local(); + // work locally with all the vectors. This will result in only one MPI_Allreduce call instead of two. + double nrm1 = zl->onenorm_local() + zu->onenorm_local(); #ifdef HIOP_USE_MPI double nrm1_global; - int ierr=MPI_Allreduce(&nrm1, &nrm1_global, 1, MPI_DOUBLE, MPI_SUM, nlp->get_comm()); assert(MPI_SUCCESS==ierr); - nrm1=nrm1_global; + int ierr = MPI_Allreduce(&nrm1, &nrm1_global, 1, MPI_DOUBLE, MPI_SUM, nlp->get_comm()); + assert(MPI_SUCCESS == ierr); + nrm1 = nrm1_global; #endif nrm1 += vl->onenorm_local() + vu->onenorm_local(); return nrm1; @@ -232,12 +224,13 @@ double hiopIterate::normOneOfEqualityDuals() const assert(vl->matchesPattern(nlp->get_idl())); assert(vu->matchesPattern(nlp->get_idu())); #endif - //work locally with all the vectors. This will result in only one MPI_Allreduce call instead of two. - double nrm1=zl->onenorm_local() + zu->onenorm_local(); + // work locally with all the vectors. This will result in only one MPI_Allreduce call instead of two. + double nrm1 = zl->onenorm_local() + zu->onenorm_local(); #ifdef HIOP_USE_MPI double nrm1_global; - int ierr=MPI_Allreduce(&nrm1, &nrm1_global, 1, MPI_DOUBLE, MPI_SUM, nlp->get_comm()); assert(MPI_SUCCESS==ierr); - nrm1=nrm1_global; + int ierr = MPI_Allreduce(&nrm1, &nrm1_global, 1, MPI_DOUBLE, MPI_SUM, nlp->get_comm()); + assert(MPI_SUCCESS == ierr); + nrm1 = nrm1_global; #endif nrm1 += vl->onenorm_local() + vu->onenorm_local() + yc->onenorm_local() + yd->onenorm_local(); return nrm1; @@ -251,23 +244,23 @@ void hiopIterate::normOneOfDuals(double& nrm1Eq, double& nrm1Bnd) const assert(vl->matchesPattern(nlp->get_idl())); assert(vu->matchesPattern(nlp->get_idu())); #endif - //work locally with all the vectors. This will result in only one MPI_Allreduce call + // work locally with all the vectors. This will result in only one MPI_Allreduce call nrm1Bnd = zl->onenorm_local() + zu->onenorm_local(); #ifdef HIOP_USE_MPI double nrm1_global; - int ierr=MPI_Allreduce(&nrm1Bnd, &nrm1_global, 1, MPI_DOUBLE, MPI_SUM, nlp->get_comm()); - assert(MPI_SUCCESS==ierr); - nrm1Bnd=nrm1_global; + int ierr = MPI_Allreduce(&nrm1Bnd, &nrm1_global, 1, MPI_DOUBLE, MPI_SUM, nlp->get_comm()); + assert(MPI_SUCCESS == ierr); + nrm1Bnd = nrm1_global; #endif nrm1Bnd += vl->onenorm_local() + vu->onenorm_local(); - nrm1Eq = yc->onenorm_local() + yd->onenorm_local(); + nrm1Eq = yc->onenorm_local() + yd->onenorm_local(); } void hiopIterate::selectPattern() { sxl->selectPattern(nlp->get_ixl()); zl->selectPattern(nlp->get_ixl()); - + sxu->selectPattern(nlp->get_ixu()); zu->selectPattern(nlp->get_ixu()); @@ -285,7 +278,7 @@ void hiopIterate::determineSlacks() sxl->selectPattern(nlp->get_ixl()); sxu->copyFrom(nlp->get_xu()); - sxu->axpy(-1., *x); + sxu->axpy(-1., *x); sxu->selectPattern(nlp->get_ixu()); sdl->copyFrom(*d); @@ -293,7 +286,7 @@ void hiopIterate::determineSlacks() sdl->selectPattern(nlp->get_idl()); sdu->copyFrom(nlp->get_du()); - sdu->axpy(-1., *d); + sdu->axpy(-1., *d); sdu->selectPattern(nlp->get_idu()); } @@ -302,12 +295,16 @@ size_type hiopIterate::compute_safe_slacks(const hiopIterate& iter_curr, const d #if 1 determineSlacks(); #else - sxl->copyFrom(*iter.sxl); sxl->axpy(alphaprimal,*dir.sxl); - sxu->copyFrom(*iter.sxu); sxu->axpy(alphaprimal,*dir.sxu); - sdl->copyFrom(*iter.sdl); sdl->axpy(alphaprimal,*dir.sdl); - sdu->copyFrom(*iter.sdu); sdu->axpy(alphaprimal,*dir.sdu); -#endif // 1 - + sxl->copyFrom(*iter.sxl); + sxl->axpy(alphaprimal, *dir.sxl); + sxu->copyFrom(*iter.sxu); + sxu->axpy(alphaprimal, *dir.sxu); + sdl->copyFrom(*iter.sdl); + sdl->axpy(alphaprimal, *dir.sdl); + sdu->copyFrom(*iter.sdu); + sdu->axpy(alphaprimal, *dir.sdu); +#endif // 1 + size_type retval = adjust_small_slacks(iter_curr, mu); #ifdef HIOP_DEEPCHECKS @@ -332,61 +329,75 @@ void hiopIterate::determineDualsBounds_d(const double& mu) vu->componentDiv_w_selectPattern(*sdu, nlp->get_idu()); } -bool hiopIterate:: -fractionToTheBdry(const hiopIterate& dir, const double& tau, double& alphaprimal, double& alphadual) const +bool hiopIterate::fractionToTheBdry(const hiopIterate& dir, const double& tau, double& alphaprimal, double& alphadual) const { - alphaprimal=alphadual=10.0; - double alpha=0; - alpha=sxl->fractionToTheBdry_w_pattern_local(*dir.sxl, tau, nlp->get_ixl()); - alphaprimal=fmin(alphaprimal,alpha); - - alpha=sxu->fractionToTheBdry_w_pattern_local(*dir.sxu, tau, nlp->get_ixu()); - alphaprimal=fmin(alphaprimal,alpha); - - alpha=sdl->fractionToTheBdry_w_pattern_local(*dir.sdl, tau, nlp->get_idl()); - alphaprimal=fmin(alphaprimal,alpha); - - alpha=sdu->fractionToTheBdry_w_pattern_local(*dir.sdu, tau, nlp->get_idu()); - alphaprimal=fmin(alphaprimal,alpha); - - //for dual variables - alpha=zl->fractionToTheBdry_w_pattern_local(*dir.zl, tau, nlp->get_ixl()); - alphadual=fmin(alphadual,alpha); - - alpha=zu->fractionToTheBdry_w_pattern_local(*dir.zu, tau, nlp->get_ixu()); - alphadual=fmin(alphadual,alpha); - - alpha=vl->fractionToTheBdry_w_pattern_local(*dir.vl, tau, nlp->get_idl()); - alphadual=fmin(alphadual,alpha); - - alpha=vu->fractionToTheBdry_w_pattern_local(*dir.vu, tau, nlp->get_idu()); - alphadual=fmin(alphadual,alpha); + alphaprimal = alphadual = 10.0; + double alpha = 0; + alpha = sxl->fractionToTheBdry_w_pattern_local(*dir.sxl, tau, nlp->get_ixl()); + alphaprimal = fmin(alphaprimal, alpha); + + alpha = sxu->fractionToTheBdry_w_pattern_local(*dir.sxu, tau, nlp->get_ixu()); + alphaprimal = fmin(alphaprimal, alpha); + + alpha = sdl->fractionToTheBdry_w_pattern_local(*dir.sdl, tau, nlp->get_idl()); + alphaprimal = fmin(alphaprimal, alpha); + + alpha = sdu->fractionToTheBdry_w_pattern_local(*dir.sdu, tau, nlp->get_idu()); + alphaprimal = fmin(alphaprimal, alpha); + + // for dual variables + alpha = zl->fractionToTheBdry_w_pattern_local(*dir.zl, tau, nlp->get_ixl()); + alphadual = fmin(alphadual, alpha); + + alpha = zu->fractionToTheBdry_w_pattern_local(*dir.zu, tau, nlp->get_ixu()); + alphadual = fmin(alphadual, alpha); + + alpha = vl->fractionToTheBdry_w_pattern_local(*dir.vl, tau, nlp->get_idl()); + alphadual = fmin(alphadual, alpha); + + alpha = vu->fractionToTheBdry_w_pattern_local(*dir.vu, tau, nlp->get_idu()); + alphadual = fmin(alphadual, alpha); #ifdef HIOP_USE_MPI - double aux[2]={alphaprimal,alphadual}, aux_g[2]; - int ierr=MPI_Allreduce(aux, aux_g, 2, MPI_DOUBLE, MPI_MIN, nlp->get_comm()); assert(MPI_SUCCESS==ierr); - alphaprimal=aux_g[0]; alphadual=aux_g[1]; + double aux[2] = {alphaprimal, alphadual}, aux_g[2]; + int ierr = MPI_Allreduce(aux, aux_g, 2, MPI_DOUBLE, MPI_MIN, nlp->get_comm()); + assert(MPI_SUCCESS == ierr); + alphaprimal = aux_g[0]; + alphadual = aux_g[1]; #endif return true; } - -bool hiopIterate::takeStep_primals(const hiopIterate& iter, const hiopIterate& dir, const double& alphaprimal, const double& alphadual) +bool hiopIterate::takeStep_primals(const hiopIterate& iter, + const hiopIterate& dir, + const double& alphaprimal, + const double& alphadual) { - x->copyFrom(*iter.x); x->axpy(alphaprimal, *dir.x); - d->copyFrom(*iter.d); d->axpy(alphaprimal, *dir.d); + x->copyFrom(*iter.x); + x->axpy(alphaprimal, *dir.x); + d->copyFrom(*iter.d); + d->axpy(alphaprimal, *dir.d); return true; } -bool hiopIterate::takeStep_duals(const hiopIterate& iter, const hiopIterate& dir, const double& alphaprimal, const double& alphadual) -{ - yd->copyFrom(*iter.yd); yd->axpy(alphaprimal, *dir.yd); - yc->copyFrom(*iter.yc); yc->axpy(alphaprimal, *dir.yc); - zl->copyFrom(*iter.zl); zl->axpy(alphadual, *dir.zl); - zu->copyFrom(*iter.zu); zu->axpy(alphadual, *dir.zu); - vl->copyFrom(*iter.vl); vl->axpy(alphadual, *dir.vl); - vu->copyFrom(*iter.vu); vu->axpy(alphadual, *dir.vu); +bool hiopIterate::takeStep_duals(const hiopIterate& iter, + const hiopIterate& dir, + const double& alphaprimal, + const double& alphadual) +{ + yd->copyFrom(*iter.yd); + yd->axpy(alphaprimal, *dir.yd); + yc->copyFrom(*iter.yc); + yc->axpy(alphaprimal, *dir.yc); + zl->copyFrom(*iter.zl); + zl->axpy(alphadual, *dir.zl); + zu->copyFrom(*iter.zu); + zu->axpy(alphadual, *dir.zu); + vl->copyFrom(*iter.vl); + vl->axpy(alphadual, *dir.vl); + vu->copyFrom(*iter.vu); + vu->axpy(alphadual, *dir.vu); #ifdef HIOP_DEEPCHECKS assert(zl->matchesPattern(nlp->get_ixl())); assert(zu->matchesPattern(nlp->get_ixu())); @@ -428,24 +439,23 @@ int hiopIterate::adjust_small_slacks(hiopVector& slack, hiopVector& arg3) { int num_adjusted_slack = 0; - double zero=0.0; + double zero = 0.0; if(slack.get_size() > 0) { double slack_min; - double small_val = std::numeric_limits::epsilon()* fmin(1., mu); + double small_val = std::numeric_limits::epsilon() * fmin(1., mu); double scale_fact = pow(std::numeric_limits::epsilon(), 0.75); /** * if slack < small_val, - * new_slack = last_slack + min( max(mu/slack_dual,small_val), scale_fact * max(1.0,|bound|) ), + * new_slack = last_slack + min( max(mu/slack_dual,small_val), scale_fact * max(1.0,|bound|) ), */ slack_min = slack.min_w_pattern(select); if(slack_min < small_val) { - arg1.copyFrom(slack); // correct variable bound to avoid numerical difficulty - arg1.addConstant_w_patternSelect(-small_val,select); + arg1.addConstant_w_patternSelect(-small_val, select); arg1.component_min(0.0); num_adjusted_slack = arg1.numOfElemsLessThan(zero); @@ -479,45 +489,45 @@ int hiopIterate::adjust_small_slacks(hiopVector& slack, slack.copyFrom(arg1); #ifndef NDEBUG - assert(slack.matchesPattern(select)); + assert(slack.matchesPattern(select)); #endif } } - return num_adjusted_slack; + return num_adjusted_slack; } int hiopIterate::adjust_small_slacks(const hiopIterate& iter_curr, const double& mu) { int num_adjusted_slacks = 0; - if(nullptr==sx_arg1_) { + if(nullptr == sx_arg1_) { sx_arg1_ = sxl->alloc_clone(); sx_arg2_ = sxl->alloc_clone(); - sx_arg3_ = sxl->alloc_clone(); + sx_arg3_ = sxl->alloc_clone(); sd_arg1_ = sdl->alloc_clone(); sd_arg2_ = sdl->alloc_clone(); sd_arg3_ = sdl->alloc_clone(); } - num_adjusted_slacks += adjust_small_slacks(*sxl, nlp->get_xl(), *(iter_curr.get_zl()), (nlp->get_ixl()), mu, - *sx_arg1_, *sx_arg2_, *sx_arg3_); - num_adjusted_slacks += adjust_small_slacks(*sxu, nlp->get_xu(), *(iter_curr.get_zu()), (nlp->get_ixu()), mu, - *sx_arg1_, *sx_arg2_, *sx_arg3_); - num_adjusted_slacks += adjust_small_slacks(*sdl, nlp->get_dl(), *(iter_curr.get_vl()), (nlp->get_idl()), mu, - *sd_arg1_, *sd_arg2_, *sd_arg3_); - num_adjusted_slacks += adjust_small_slacks(*sdu, nlp->get_du(), *(iter_curr.get_vu()), (nlp->get_idu()), mu, - *sd_arg1_, *sd_arg2_, *sd_arg3_); + num_adjusted_slacks += + adjust_small_slacks(*sxl, nlp->get_xl(), *(iter_curr.get_zl()), (nlp->get_ixl()), mu, *sx_arg1_, *sx_arg2_, *sx_arg3_); + num_adjusted_slacks += + adjust_small_slacks(*sxu, nlp->get_xu(), *(iter_curr.get_zu()), (nlp->get_ixu()), mu, *sx_arg1_, *sx_arg2_, *sx_arg3_); + num_adjusted_slacks += + adjust_small_slacks(*sdl, nlp->get_dl(), *(iter_curr.get_vl()), (nlp->get_idl()), mu, *sd_arg1_, *sd_arg2_, *sd_arg3_); + num_adjusted_slacks += + adjust_small_slacks(*sdu, nlp->get_du(), *(iter_curr.get_vu()), (nlp->get_idu()), mu, *sd_arg1_, *sd_arg2_, *sd_arg3_); - return num_adjusted_slacks; + return num_adjusted_slacks; } bool hiopIterate::adjustDuals_primalLogHessian(const double& mu, const double& kappa_Sigma) { - zl->adjustDuals_plh(*sxl,nlp->get_ixl(),mu,kappa_Sigma); - zu->adjustDuals_plh(*sxu,nlp->get_ixu(),mu,kappa_Sigma); - vl->adjustDuals_plh(*sdl,nlp->get_idl(),mu,kappa_Sigma); - vu->adjustDuals_plh(*sdu,nlp->get_idu(),mu,kappa_Sigma); + zl->adjustDuals_plh(*sxl, nlp->get_ixl(), mu, kappa_Sigma); + zu->adjustDuals_plh(*sxu, nlp->get_ixu(), mu, kappa_Sigma); + vl->adjustDuals_plh(*sdl, nlp->get_idl(), mu, kappa_Sigma); + vu->adjustDuals_plh(*sdu, nlp->get_idu(), mu, kappa_Sigma); #ifdef HIOP_DEEPCHECKS assert(zl->matchesPattern(nlp->get_ixl())); assert(zu->matchesPattern(nlp->get_ixu())); @@ -531,67 +541,69 @@ double hiopIterate::evalLogBarrier() const { double barrier; barrier = sxl->logBarrier_local(nlp->get_ixl()); - barrier+= sxu->logBarrier_local(nlp->get_ixu()); + barrier += sxu->logBarrier_local(nlp->get_ixu()); #ifdef HIOP_USE_MPI double res; - int ierr = MPI_Allreduce(&barrier, &res, 1, MPI_DOUBLE, MPI_SUM, nlp->get_comm()); assert(ierr==MPI_SUCCESS); - barrier=res; + int ierr = MPI_Allreduce(&barrier, &res, 1, MPI_DOUBLE, MPI_SUM, nlp->get_comm()); + assert(ierr == MPI_SUCCESS); + barrier = res; #endif - barrier+= sdl->logBarrier_local(nlp->get_idl()); - barrier+= sdu->logBarrier_local(nlp->get_idu()); + barrier += sdl->logBarrier_local(nlp->get_idl()); + barrier += sdu->logBarrier_local(nlp->get_idu()); return barrier; } -void hiopIterate::addLogBarGrad_x(const double& mu, hiopVector& gradx) const +void hiopIterate::addLogBarGrad_x(const double& mu, hiopVector& gradx) const { // gradx = grad - mu / sxl = grad - mu * select/sxl gradx.addLogBarrierGrad(-mu, *sxl, nlp->get_ixl()); - gradx.addLogBarrierGrad( mu, *sxu, nlp->get_ixu()); + gradx.addLogBarrierGrad(mu, *sxu, nlp->get_ixu()); } -void hiopIterate::addLogBarGrad_d(const double& mu, hiopVector& gradd) const +void hiopIterate::addLogBarGrad_d(const double& mu, hiopVector& gradd) const { gradd.addLogBarrierGrad(-mu, *sdl, nlp->get_idl()); - gradd.addLogBarrierGrad( mu, *sdu, nlp->get_idu()); + gradd.addLogBarrierGrad(mu, *sdu, nlp->get_idu()); } double hiopIterate::linearDampingTerm(const double& mu, const double& kappa_d) const { double term; - term = sxl->linearDampingTerm_local(nlp->get_ixl(), nlp->get_ixu(), mu, kappa_d); + term = sxl->linearDampingTerm_local(nlp->get_ixl(), nlp->get_ixu(), mu, kappa_d); term += sxu->linearDampingTerm_local(nlp->get_ixu(), nlp->get_ixl(), mu, kappa_d); #ifdef HIOP_USE_MPI double res; - int ierr = MPI_Allreduce(&term, &res, 1, MPI_DOUBLE, MPI_SUM, nlp->get_comm()); assert(ierr==MPI_SUCCESS); + int ierr = MPI_Allreduce(&term, &res, 1, MPI_DOUBLE, MPI_SUM, nlp->get_comm()); + assert(ierr == MPI_SUCCESS); term = res; -#endif +#endif term += sdl->linearDampingTerm_local(nlp->get_idl(), nlp->get_idu(), mu, kappa_d); term += sdu->linearDampingTerm_local(nlp->get_idu(), nlp->get_idl(), mu, kappa_d); return term; } -void hiopIterate::addLinearDampingTermToGrad_x(const double& mu, - const double& kappa_d, - const double& beta, +void hiopIterate::addLinearDampingTermToGrad_x(const double& mu, + const double& kappa_d, + const double& beta, hiopVector& grad_x) const { - assert(x->get_local_size()==grad_x.get_local_size()); + assert(x->get_local_size() == grad_x.get_local_size()); - const double ct=kappa_d*mu*beta; + const double ct = kappa_d * mu * beta; grad_x.addLinearDampingTerm(nlp->get_ixl(), nlp->get_ixu(), 1.0, ct); } -void hiopIterate::addLinearDampingTermToGrad_d(const double& mu, - const double& kappa_d, - const double& beta, +void hiopIterate::addLinearDampingTermToGrad_d(const double& mu, + const double& kappa_d, + const double& beta, hiopVector& grad_d) const { - assert(d->get_local_size()==grad_d.get_local_size()); - - const double ct=kappa_d*mu*beta; + assert(d->get_local_size() == grad_d.get_local_size()); + + const double ct = kappa_d * mu * beta; grad_d.addLinearDampingTerm(nlp->get_idl(), nlp->get_idu(), 1.0, ct); } -}; +}; // namespace hiop diff --git a/src/Optimization/hiopIterate.hpp b/src/Optimization/hiopIterate.hpp index 6e0e3e0e1..a2ad86d2f 100644 --- a/src/Optimization/hiopIterate.hpp +++ b/src/Optimization/hiopIterate.hpp @@ -74,7 +74,7 @@ class hiopIterate virtual void setEqualityDualsToConstant(const double& v); /** * Computes the slacks given the primals: sxl=x-xl, sxu=xu-x, and similar - * for sdl and sdu. This function will adjust the variable bounds + * for sdl and sdu. This function will adjust the variable bounds * if the corresponding slack is too small. * return the number of adjusted variables on exit */ @@ -88,14 +88,17 @@ class hiopIterate virtual void determineDualsBounds_d(const double& mu); /* max{a\in(0,1]| x+ad >=(1-tau)x} */ - bool fractionToTheBdry(const hiopIterate& dir, const double& tau, - double& alphaprimal, double& alphadual) const; + bool fractionToTheBdry(const hiopIterate& dir, const double& tau, double& alphaprimal, double& alphadual) const; /* take the step: this = iter+alpha*dir */ - virtual bool takeStep_primals(const hiopIterate& iter, const hiopIterate& dir, - const double& alphaprimal, const double& alphadual); - virtual bool takeStep_duals(const hiopIterate& iter, const hiopIterate& dir, - const double& alphaprimal, const double& alphadual); + virtual bool takeStep_primals(const hiopIterate& iter, + const hiopIterate& dir, + const double& alphaprimal, + const double& alphadual); + virtual bool takeStep_duals(const hiopIterate& iter, + const hiopIterate& dir, + const double& alphaprimal, + const double& alphadual); /// @brief adjust slack variables if they are negative, or if they are positive but too small virtual int adjust_small_slacks(const hiopIterate& iter_curr, const double& mu); @@ -121,25 +124,25 @@ class hiopIterate * negative) to each elements of the gradient that corresponds to a variable x bounded only * from below (above). The parameter `beta` is 1.0 or -1.0 indicating whether one should * add or substract kappa_d*mu; this is to accomodate also residuals computations. */ - virtual void addLinearDampingTermToGrad_x(const double& mu, - const double& kappa_d, + virtual void addLinearDampingTermToGrad_x(const double& mu, + const double& kappa_d, const double& beta, - hiopVector& grad_x) const; + hiopVector& grad_x) const; /* @brief Adds the d-damping term to the gradient, essentially adds mu*kappa_d*beta (or its * negative) to each elements of the gradient that corresponds to a variable d bounded only * from below (above). The parameter `beta` is 1.0 or -1.0 indicating whether one should * add or substract kappa_d*mu; this is to accomodate also residuals computation. */ - virtual void addLinearDampingTermToGrad_d(const double& mu, - const double& kappa_d, + virtual void addLinearDampingTermToGrad_d(const double& mu, + const double& kappa_d, const double& beta, - hiopVector& grad_d) const; + hiopVector& grad_d) const; /** norms for individual parts of the iterate (on demand computation) */ virtual double normOneOfBoundDuals() const; virtual double normOneOfEqualityDuals() const; /* same as above but computed in one shot to save on communication and computation */ - virtual void normOneOfDuals(double& nrm1Eq, double& nrm1Bnd) const; + virtual void normOneOfDuals(double& nrm1Eq, double& nrm1Bnd) const; /// @brief Entries corresponding to zeros in ix are set to zero virtual void selectPattern(); @@ -150,20 +153,20 @@ class hiopIterate void copyFrom(const hiopIterate& src); /* accessors */ - inline hiopVector* get_x() const {return x;} - inline hiopVector* get_d() const {return d;} - inline hiopVector* get_sxl() const {return sxl;} - inline hiopVector* get_sxu() const {return sxu;} - inline hiopVector* get_sdl() const {return sdl;} - inline hiopVector* get_sdu() const {return sdu;} - inline hiopVector* get_yc() const {return yc;} - inline hiopVector* get_yd() const {return yd;} - inline hiopVector* get_zl() const {return zl;} - inline hiopVector* get_zu() const {return zu;} - inline hiopVector* get_vl() const {return vl;} - inline hiopVector* get_vu() const {return vu;} - - void print(FILE* f, const char* msg=NULL) const; + inline hiopVector* get_x() const { return x; } + inline hiopVector* get_d() const { return d; } + inline hiopVector* get_sxl() const { return sxl; } + inline hiopVector* get_sxu() const { return sxu; } + inline hiopVector* get_sdl() const { return sdl; } + inline hiopVector* get_sdu() const { return sdu; } + inline hiopVector* get_yc() const { return yc; } + inline hiopVector* get_yd() const { return yd; } + inline hiopVector* get_zl() const { return zl; } + inline hiopVector* get_zu() const { return zu; } + inline hiopVector* get_vl() const { return vl; } + inline hiopVector* get_vu() const { return vu; } + + void print(FILE* f, const char* msg = NULL) const; friend class hiopResidual; friend class hiopKKTLinSys; @@ -185,38 +188,39 @@ class hiopIterate private: /** Primal variables */ - hiopVector* x; //the original decision x - hiopVector* d; //the adtl decisions d, d=d(x) - hiopVector* sxl; //slacks for x. in x: x-sxl=xl - hiopVector* sxu; //slacks for x. in x: x+sxu=xu - hiopVector* sdl; //slacks for d. in d: d-sdl=dl - hiopVector* sdu; //slacks for d. in d: d+sdu=du - hiopVector* sx_arg1_; //workspace for slacks for x - hiopVector* sx_arg2_; //workspace for slacks for x - hiopVector* sx_arg3_; //workspace for slacks for x - hiopVector* sd_arg1_; //workspace for slacks for d - hiopVector* sd_arg2_; //workspace for slacks for d - hiopVector* sd_arg3_; //workspace for slacks for d + hiopVector* x; // the original decision x + hiopVector* d; // the adtl decisions d, d=d(x) + hiopVector* sxl; // slacks for x. in x: x-sxl=xl + hiopVector* sxu; // slacks for x. in x: x+sxu=xu + hiopVector* sdl; // slacks for d. in d: d-sdl=dl + hiopVector* sdu; // slacks for d. in d: d+sdu=du + hiopVector* sx_arg1_; // workspace for slacks for x + hiopVector* sx_arg2_; // workspace for slacks for x + hiopVector* sx_arg3_; // workspace for slacks for x + hiopVector* sd_arg1_; // workspace for slacks for d + hiopVector* sd_arg2_; // workspace for slacks for d + hiopVector* sd_arg3_; // workspace for slacks for d /** Dual variables */ - hiopVector* yc; //for c(x)=crhs - hiopVector* yd; //for d(x)-d=0 - hiopVector* zl; //for slacks eq. in x: x-sxl=xl - hiopVector* zu; //for slacks eq. in x: x+sxu=xu - hiopVector* vl; //for slacks eq. in d: d-sdl=dl - hiopVector* vu; //for slacks eq. in d: d+sdu=du + hiopVector* yc; // for c(x)=crhs + hiopVector* yd; // for d(x)-d=0 + hiopVector* zl; // for slacks eq. in x: x-sxl=xl + hiopVector* zu; // for slacks eq. in x: x+sxu=xu + hiopVector* vl; // for slacks eq. in d: d-sdl=dl + hiopVector* vu; // for slacks eq. in d: d+sdu=du private: - //associated info from problem formulation - const hiopNlpFormulation * nlp; + // associated info from problem formulation + const hiopNlpFormulation* nlp; + private: /** * @brief adjust slack variables if they are negative, or if they are positive but too small * if slack < small_val, compute new_slack as * new_slack = last_slack + min( max(mu/slack_dual,small_val), scale_fact * max(1.0,|bound|) ) */ - virtual int adjust_small_slacks(hiopVector& slack, - const hiopVector& bound, - const hiopVector& slack_dual, + virtual int adjust_small_slacks(hiopVector& slack, + const hiopVector& bound, + const hiopVector& slack_dual, const hiopVector& select, const double& mu, hiopVector& arg1, @@ -225,8 +229,8 @@ class hiopIterate hiopIterate() {}; hiopIterate(const hiopIterate&) {}; - hiopIterate& operator=(const hiopIterate& o) {return *this;} + hiopIterate& operator=(const hiopIterate& o) { return *this; } }; -} +} // namespace hiop #endif diff --git a/src/Optimization/hiopKKTLinSys.cpp b/src/Optimization/hiopKKTLinSys.cpp index a2704909a..d388d1f15 100644 --- a/src/Optimization/hiopKKTLinSys.cpp +++ b/src/Optimization/hiopKKTLinSys.cpp @@ -57,24 +57,24 @@ namespace hiop { hiopKKTLinSys::hiopKKTLinSys(hiopNlpFormulation* nlp) - : nlp_(nlp), - iter_(NULL), - grad_f_(NULL), - Jac_c_(NULL), - Jac_d_(NULL), - Hess_(NULL), - perturb_calc_(NULL), - safe_mode_(true), - kkt_opr_(nullptr), - prec_opr_(nullptr), - bicgIR_(nullptr), - delta_wx_(nullptr), - delta_wd_(nullptr), - delta_cc_(nullptr), - delta_cd_(nullptr) - + : nlp_(nlp), + iter_(NULL), + grad_f_(NULL), + Jac_c_(NULL), + Jac_d_(NULL), + Hess_(NULL), + perturb_calc_(NULL), + safe_mode_(true), + kkt_opr_(nullptr), + prec_opr_(nullptr), + bicgIR_(nullptr), + delta_wx_(nullptr), + delta_wd_(nullptr), + delta_cc_(nullptr), + delta_cd_(nullptr) + { - perf_report_ = "on"==hiop::tolower(nlp_->options->GetString("time_kkt")); + perf_report_ = "on" == hiop::tolower(nlp_->options->GetString("time_kkt")); mu_ = nlp_->options->GetNumeric("mu0"); } @@ -85,7 +85,7 @@ hiopKKTLinSys::~hiopKKTLinSys() delete bicgIR_; } -//computes the solve error for the KKT Linear system; used only for correctness checking +// computes the solve error for the KKT Linear system; used only for correctness checking double hiopKKTLinSys::errorKKT(const hiopResidual* resid, const hiopIterate* sol) { nlp_->log->printf(hovLinAlgScalars, "KKT LinSys::errorKKT KKT_large residuals norm:\n"); @@ -95,160 +95,158 @@ double hiopKKTLinSys::errorKKT(const hiopResidual* resid, const hiopIterate* sol delta_wx_ = perturb_calc_->get_curr_delta_wx(); delta_wd_ = perturb_calc_->get_curr_delta_wd(); delta_cc_ = perturb_calc_->get_curr_delta_cc(); - delta_cd_ = perturb_calc_->get_curr_delta_cd(); + delta_cd_ = perturb_calc_->get_curr_delta_cd(); } else { - } - double derr=1e20, aux; - hiopVector *RX=resid->rx->new_copy(); + double derr = 1e20, aux; + hiopVector* RX = resid->rx->new_copy(); - //RX = rx-H*dx-J'c*dyc-J'*dyd +dzl-dzu + // RX = rx-H*dx-J'c*dyc-J'*dyd +dzl-dzu HessianTimesVec_noLogBarrierTerm(1.0, *RX, -1.0, *sol->x); RX->axzpy(-1., *delta_wx_, *sol->x); Jac_c_->transTimesVec(1.0, *RX, -1.0, *sol->yc); Jac_d_->transTimesVec(1.0, *RX, -1.0, *sol->yd); - RX->axpy( 1.0, *sol->zl); + RX->axpy(1.0, *sol->zl); RX->axpy(-1.0, *sol->zu); - aux=RX->twonorm(); - derr=fmax(aux,derr); + aux = RX->twonorm(); + derr = fmax(aux, derr); nlp_->log->printf(hovLinAlgScalars, " --- rx=%g\n", aux); - - //RD = rd - (-dyd - dvl + dvu + delta_wd_*dd) + // RD = rd - (-dyd - dvl + dvu + delta_wd_*dd) hiopVector* RD = resid->rd->new_copy(); RD->axpy(+1., *sol->yd); RD->axpy(+1., *sol->vl); RD->axpy(-1., *sol->vu); RD->axzpy(-1., *delta_wd_, *sol->d); - aux=RD->twonorm(); - derr=fmax(aux,derr); + aux = RD->twonorm(); + derr = fmax(aux, derr); nlp_->log->printf(hovLinAlgScalars, " --- rd=%g\n", aux); - //RYC = ryc - Jc*dx + delta_cc_*dyc - hiopVector* RYC=resid->ryc->new_copy(); + // RYC = ryc - Jc*dx + delta_cc_*dyc + hiopVector* RYC = resid->ryc->new_copy(); Jac_c_->timesVec(1.0, *RYC, -1.0, *sol->x); RYC->axzpy(1., *delta_cc_, *sol->yc); - aux=RYC->twonorm(); - derr=fmax(aux,derr); + aux = RYC->twonorm(); + derr = fmax(aux, derr); nlp_->log->printf(hovLinAlgScalars, " --- ryc=%g\n", aux); delete RYC; - //RYD=ryd - Jd*dx + dd + delta_cd_*dyd - hiopVector* RYD=resid->ryd->new_copy(); + // RYD=ryd - Jd*dx + dd + delta_cd_*dyd + hiopVector* RYD = resid->ryd->new_copy(); Jac_d_->timesVec(1.0, *RYD, -1.0, *sol->x); RYD->axpy(1.0, *sol->d); RYD->axzpy(1., *delta_cd_, *sol->yd); - aux=RYD->infnorm(); - derr=fmax(aux,derr); + aux = RYD->infnorm(); + derr = fmax(aux, derr); nlp_->log->printf(hovLinAlgScalars, " --- ryd=%g\n", aux); delete RYD; - //RXL=rxl+x-sxl + // RXL=rxl+x-sxl RX->copyFrom(*resid->rxl); - RX->axpy( 1.0, *sol->x); + RX->axpy(1.0, *sol->x); RX->axpy(-1.0, *sol->sxl); RX->selectPattern(nlp_->get_ixl()); - aux=RX->twonorm(); - derr=fmax(aux,derr); + aux = RX->twonorm(); + derr = fmax(aux, derr); nlp_->log->printf(hovLinAlgScalars, " --- rxl=%g\n", aux); - //RXU=rxu-x-sxu + // RXU=rxu-x-sxu RX->copyFrom(*resid->rxu); RX->axpy(-1.0, *sol->x); RX->axpy(-1.0, *sol->sxu); RX->selectPattern(nlp_->get_ixu()); - aux=RX->twonorm(); - derr=fmax(aux,derr); + aux = RX->twonorm(); + derr = fmax(aux, derr); nlp_->log->printf(hovLinAlgScalars, " --- rxu=%g\n", aux); - //RDL=rdl+d-sdl + // RDL=rdl+d-sdl RD->copyFrom(*resid->rdl); - RD->axpy( 1.0, *sol->d); + RD->axpy(1.0, *sol->d); RD->axpy(-1.0, *sol->sdl); RD->selectPattern(nlp_->get_idl()); - aux=RD->twonorm(); - derr=fmax(aux,derr); + aux = RD->twonorm(); + derr = fmax(aux, derr); nlp_->log->printf(hovLinAlgScalars, " --- rdl=%g\n", aux); - //RDU=rdu-d-sdu + // RDU=rdu-d-sdu RD->copyFrom(*resid->rdu); RD->axpy(-1.0, *sol->d); RD->axpy(-1.0, *sol->sdu); RD->selectPattern(nlp_->get_idu()); - aux=RD->twonorm(); - derr=fmax(aux,derr); + aux = RD->twonorm(); + derr = fmax(aux, derr); nlp_->log->printf(hovLinAlgScalars, " --- rdu=%g\n", aux); - //complementarity residuals checks: rszl - Sxl dzxl - Zxl dsxl + // complementarity residuals checks: rszl - Sxl dzxl - Zxl dsxl RX->copyFrom(*resid->rszl); - RX->axzpy(-1.0,*iter_->sxl,*sol->zl); - RX->axzpy(-1.0,*iter_->zl, *sol->sxl); - aux=RX->twonorm(); - derr=fmax(aux,derr); + RX->axzpy(-1.0, *iter_->sxl, *sol->zl); + RX->axzpy(-1.0, *iter_->zl, *sol->sxl); + aux = RX->twonorm(); + derr = fmax(aux, derr); nlp_->log->printf(hovLinAlgScalars, " --- rszl=%g\n", aux); - //rszl - Sxl dzxl - Zxl dsxl + // rszl - Sxl dzxl - Zxl dsxl RX->copyFrom(*resid->rszu); - RX->axzpy(-1.0,*iter_->sxu,*sol->zu); - RX->axzpy(-1.0,*iter_->zu, *sol->sxu); - aux=RX->twonorm(); - derr=fmax(aux,derr); + RX->axzpy(-1.0, *iter_->sxu, *sol->zu); + RX->axzpy(-1.0, *iter_->zu, *sol->sxu); + aux = RX->twonorm(); + derr = fmax(aux, derr); nlp_->log->printf(hovLinAlgScalars, " --- rszu=%g\n", aux); - delete RX; RX=NULL; + delete RX; + RX = NULL; - //complementarity residuals checks: rsvl - Sdl dvl - Vl dsdl + // complementarity residuals checks: rsvl - Sdl dvl - Vl dsdl RD->copyFrom(*resid->rsvl); - RD->axzpy(-1.0,*iter_->sdl,*sol->vl); - RD->axzpy(-1.0,*iter_->vl, *sol->sdl); - aux=RD->twonorm(); - derr=fmax(aux,derr); + RD->axzpy(-1.0, *iter_->sdl, *sol->vl); + RD->axzpy(-1.0, *iter_->vl, *sol->sdl); + aux = RD->twonorm(); + derr = fmax(aux, derr); nlp_->log->printf(hovLinAlgScalars, " --- rsvl=%g\n", aux); - //complementarity residuals checks: rsvu - Sdu dvu - Vu dsdu + // complementarity residuals checks: rsvu - Sdu dvu - Vu dsdu RD->copyFrom(*resid->rsvu); - RD->axzpy(-1.0,*iter_->sdu,*sol->vu); - RD->axzpy(-1.0,*iter_->vu, *sol->sdu); - aux=RD->twonorm(); - derr=fmax(aux,derr); + RD->axzpy(-1.0, *iter_->sdu, *sol->vu); + RD->axzpy(-1.0, *iter_->vu, *sol->sdu); + aux = RD->twonorm(); + derr = fmax(aux, derr); nlp_->log->printf(hovLinAlgScalars, " --- rsvu=%g\n", aux); delete RD; return derr; } -bool hiopKKTLinSys::compute_directions_for_full_space(const hiopResidual* resid, - hiopIterate* dir) +bool hiopKKTLinSys::compute_directions_for_full_space(const hiopResidual* resid, hiopIterate* dir) { nlp_->runStats.kkt.tmSolveRhsManip.start(); - const hiopResidual &r=*resid; + const hiopResidual& r = *resid; /*********************************************************************** * compute the rest of the directions * */ - //dsxl = rxl + dx and dzl= [Sxl]^{-1} ( - Zl*dsxl + rszl) + // dsxl = rxl + dx and dzl= [Sxl]^{-1} ( - Zl*dsxl + rszl) if(nlp_->n_low_local()) { dir->sxl->copyFrom(*r.rxl); - dir->sxl->axpy( 1.0,*dir->x); + dir->sxl->axpy(1.0, *dir->x); dir->sxl->selectPattern(nlp_->get_ixl()); dir->zl->copyFrom(*r.rszl); - dir->zl->axzpy(-1.0,*iter_->zl,*dir->sxl); + dir->zl->axzpy(-1.0, *iter_->zl, *dir->sxl); dir->zl->componentDiv_w_selectPattern(*iter_->sxl, nlp_->get_ixl()); } else { dir->sxl->setToZero(); dir->zl->setToZero(); } - //dir->sxl->print(); - //dir->zl->print(); - //dsxu = rxu - dx and dzu = [Sxu]^{-1} ( - Zu*dsxu + rszu) + // dir->sxl->print(); + // dir->zl->print(); + // dsxu = rxu - dx and dzu = [Sxu]^{-1} ( - Zu*dsxu + rszu) if(nlp_->n_upp_local()) { dir->sxu->copyFrom(*r.rxu); - dir->sxu->axpy(-1.0,*dir->x); + dir->sxu->axpy(-1.0, *dir->x); dir->sxu->selectPattern(nlp_->get_ixu()); dir->zu->copyFrom(*r.rszu); - dir->zu->axzpy(-1.0,*iter_->zu,*dir->sxu); + dir->zu->axzpy(-1.0, *iter_->zu, *dir->sxu); dir->zu->selectPattern(nlp_->get_ixu()); dir->zu->componentDiv_w_selectPattern(*iter_->sxu, nlp_->get_ixu()); } else { @@ -256,16 +254,16 @@ bool hiopKKTLinSys::compute_directions_for_full_space(const hiopResidual* resid, dir->zu->setToZero(); } - //dir->sxu->print(); - //dir->zu->print(); - //dsdl = rdl + dd and dvl = [Sdl]^{-1} ( - Vl*dsdl + rsvl) + // dir->sxu->print(); + // dir->zu->print(); + // dsdl = rdl + dd and dvl = [Sdl]^{-1} ( - Vl*dsdl + rsvl) if(nlp_->m_ineq_low()) { dir->sdl->copyFrom(*r.rdl); - dir->sdl->axpy( 1.0,*dir->d); + dir->sdl->axpy(1.0, *dir->d); dir->sdl->selectPattern(nlp_->get_idl()); dir->vl->copyFrom(*r.rsvl); - dir->vl->axzpy(-1.0,*iter_->vl,*dir->sdl); + dir->vl->axzpy(-1.0, *iter_->vl, *dir->sdl); dir->vl->selectPattern(nlp_->get_idl()); dir->vl->componentDiv_w_selectPattern(*iter_->sdl, nlp_->get_idl()); } else { @@ -273,14 +271,14 @@ bool hiopKKTLinSys::compute_directions_for_full_space(const hiopResidual* resid, dir->vl->setToZero(); } - //dsdu = rdu - dd and dvu = [Sdu]^{-1} ( - Vu*dsdu + rsvu ) - if(nlp_->m_ineq_upp()>0) { + // dsdu = rdu - dd and dvu = [Sdu]^{-1} ( - Vu*dsdu + rsvu ) + if(nlp_->m_ineq_upp() > 0) { dir->sdu->copyFrom(*r.rdu); - dir->sdu->axpy(-1.0,*dir->d); + dir->sdu->axpy(-1.0, *dir->d); dir->sdu->selectPattern(nlp_->get_idu()); dir->vu->copyFrom(*r.rsvu); - dir->vu->axzpy(-1.0,*iter_->vu,*dir->sdu); + dir->vu->axzpy(-1.0, *iter_->vu, *dir->sdu); dir->vu->selectPattern(nlp_->get_idu()); dir->vu->componentDiv_w_selectPattern(*iter_->sdu, nlp_->get_idu()); } else { @@ -288,7 +286,7 @@ bool hiopKKTLinSys::compute_directions_for_full_space(const hiopResidual* resid, dir->vu->setToZero(); } nlp_->runStats.kkt.tmSolveRhsManip.stop(); - + #ifdef HIOP_DEEPCHECKS nlp_->runStats.kkt.tmResid.start(); assert(dir->sxl->matchesPattern(nlp_->get_ixl())); @@ -300,18 +298,15 @@ bool hiopKKTLinSys::compute_directions_for_full_space(const hiopResidual* resid, assert(dir->vl->matchesPattern(nlp_->get_idl())); assert(dir->vu->matchesPattern(nlp_->get_idu())); - //CHECK THE SOLUTION - errorKKT(resid,dir); + // CHECK THE SOLUTION + errorKKT(resid, dir); nlp_->runStats.kkt.tmResid.stop(); #endif return true; } -int hiopKKTLinSysCurvCheck::factorizeWithCurvCheck() -{ - return linSys_->matrixChanged(); -} +int hiopKKTLinSysCurvCheck::factorizeWithCurvCheck() { return linSys_->matrixChanged(); } bool hiopKKTLinSysCurvCheck::factorize() { @@ -331,14 +326,17 @@ bool hiopKKTLinSysCurvCheck::factorize() delta_wd_ = perturb_calc_->get_curr_delta_wd(); delta_cc_ = perturb_calc_->get_curr_delta_cc(); delta_cd_ = perturb_calc_->get_curr_delta_cd(); - + while(num_refactorization <= max_refactorization) { #ifdef HIOP_DEEPCHECKS assert(perturb_calc_->check_consistency() && "something went wrong with IC"); #endif if(hovScalars <= nlp_->options->GetInteger("verbosity_level")) { - nlp_->log->printf(hovScalars, "linsys: norminf(delta_w)=%12.5e norminf(delta_c)=%12.5e (ic %d)\n", - delta_wx_->infnorm(), delta_cc_->infnorm(), num_refactorization); + nlp_->log->printf(hovScalars, + "linsys: norminf(delta_w)=%12.5e norminf(delta_c)=%12.5e (ic %d)\n", + delta_wx_->infnorm(), + delta_cc_->infnorm(), + num_refactorization); } // the update of the linear system, including IC perturbations @@ -352,22 +350,22 @@ bool hiopKKTLinSysCurvCheck::factorize() nlp_->runStats.kkt.tmUpdateInnerFact.stop(); continue_re_fact = fact_acceptor_->requireReFactorization(*nlp_, n_neg_eig); - - if(-1==continue_re_fact) { + + if(-1 == continue_re_fact) { return false; - } else if(0==continue_re_fact) { + } else if(0 == continue_re_fact) { break; } // will do an inertia correction num_refactorization++; nlp_->runStats.kkt.nUpdateICCorr++; - } // end of IC loop + } // end of IC loop - if(num_refactorization>max_refactorization) { + if(num_refactorization > max_refactorization) { nlp_->log->printf(hovError, - "Reached max number (%d) of refactorization within an outer iteration.\n", - max_refactorization); + "Reached max number (%d) of refactorization within an outer iteration.\n", + max_refactorization); return false; } return true; @@ -388,11 +386,13 @@ bool hiopKKTLinSysCurvCheck::factorize_inertia_free() continue_re_fact = fact_acceptor_->requireReFactorization(*nlp_, non_singular_mat, true); #ifdef HIOP_DEEPCHECKS - assert(perturb_calc_->check_consistency() && "something went wrong with IC"); + assert(perturb_calc_->check_consistency() && "something went wrong with IC"); #endif if(hovScalars <= nlp_->options->GetInteger("verbosity_level")) { - nlp_->log->printf(hovScalars, "linsys: norminf(delta_w)=%12.5e norminf(delta_c)=%12.5e \n", - delta_wx_->infnorm(), delta_cc_->infnorm()); + nlp_->log->printf(hovScalars, + "linsys: norminf(delta_w)=%12.5e norminf(delta_c)=%12.5e \n", + delta_wx_->infnorm(), + delta_cc_->infnorm()); } // the update of the linear system, including IC perturbations @@ -402,28 +402,30 @@ bool hiopKKTLinSysCurvCheck::factorize_inertia_free() // factorization int solver_flag = factorizeWithCurvCheck(); - + // if solver_flag<0, matrix becomes singular, or not pd (in condensed system) after adding regularization // this should not happen, but some linear solver may have numerical difficulty. // adding more regularization till it succeeds const size_t max_refactorization = 10; size_t num_refactorization = 0; - while(num_refactorization<=max_refactorization && solver_flag < 0) { + while(num_refactorization <= max_refactorization && solver_flag < 0) { nlp_->log->printf(hovWarning, "linsys: matrix becomes singular after adding primal regularization!\n"); continue_re_fact = fact_acceptor_->requireReFactorization(*nlp_, solver_flag); - - if(-1==continue_re_fact) { + + if(-1 == continue_re_fact) { return false; } else { // this while loop is used to correct singularity - assert(1==continue_re_fact); + assert(1 == continue_re_fact); } if(hovScalars <= nlp_->options->GetInteger("verbosity_level")) { - nlp_->log->printf(hovScalars, "linsys: norminf(delta_w)=%12.5e norminf(delta_c)=%12.5e \n", - delta_wx_->infnorm(), delta_cc_->infnorm()); + nlp_->log->printf(hovScalars, + "linsys: norminf(delta_w)=%12.5e norminf(delta_c)=%12.5e \n", + delta_wx_->infnorm(), + delta_cc_->infnorm()); } // the update of the linear system, including IC perturbations @@ -439,14 +441,13 @@ bool hiopKKTLinSysCurvCheck::factorize_inertia_free() // will do an inertia correction num_refactorization++; nlp_->runStats.kkt.nUpdateICCorr++; - } // end of IC loop + } // end of IC loop nlp_->runStats.kkt.tmUpdateInnerFact.stop(); return true; } - //////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////// // hiopKKTLinSysCompressed @@ -475,11 +476,11 @@ bool hiopKKTLinSysCompressed::test_direction(const hiopIterate* dir, hiopMatrix* delta_wd_ = perturb_calc_->get_curr_delta_wd(); delta_cc_ = perturb_calc_->get_curr_delta_cc(); delta_cd_ = perturb_calc_->get_curr_delta_cd(); - + /* compute xWx = x(H+Dx_)x (for primal var [x,d] */ Hess_->timesVec(0.0, *x_wrk_, 1.0, *sol_x); dWd += x_wrk_->dotProductWith(*sol_x); - + x_wrk_->copyFrom(*sol_x); x_wrk_->componentMult(*Dx_); x_wrk_->axzpy(1., *delta_wx_, *sol_x); @@ -492,9 +493,9 @@ bool hiopKKTLinSysCompressed::test_direction(const hiopIterate* dir, hiopMatrix* /* compute rhs for the dWd test */ dbl_wrk = sol_x->twonorm(); - xs_nrmsq += dbl_wrk*dbl_wrk; + xs_nrmsq += dbl_wrk * dbl_wrk; dbl_wrk = sol_d->twonorm(); - xs_nrmsq += dbl_wrk*dbl_wrk; + xs_nrmsq += dbl_wrk * dbl_wrk; if(dWd < xs_nrmsq * nlp_->options->GetNumeric("neg_curv_test_fact")) { // have negative curvature. Add regularization and re-factorize the matrix @@ -513,7 +514,7 @@ bool hiopKKTLinSysCompressed::test_direction(const hiopIterate* dir, hiopMatrix* // hiopKKTLinSysCompressedXYcYd //////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////// -/** +/** * Provides the functionality for reducing the KKT linear system to the * compressed linear below in dx, dd, dyc, and dyd variables and then to perform * the basic ops needed to compute the remaining directions. @@ -526,7 +527,7 @@ bool hiopKKTLinSysCompressed::test_direction(const hiopIterate* dir, hiopMatrix* * [ Jd 0 -Dd^{-1}] [dyd] [ ryd_tilde] */ hiopKKTLinSysCompressedXYcYd::hiopKKTLinSysCompressedXYcYd(hiopNlpFormulation* nlp) - : hiopKKTLinSysCompressed(nlp) + : hiopKKTLinSysCompressed(nlp) { Dd_inv_ = dynamic_cast(nlp_->alloc_dual_ineq_vec()); assert(Dd_inv_ != NULL); @@ -552,14 +553,17 @@ bool hiopKKTLinSysCompressedXYcYd::update(const hiopIterate* iter, iter_ = iter; grad_f_ = dynamic_cast(grad_f); - Jac_c_ = Jac_c; Jac_d_ = Jac_d; - Hess_=Hess; + Jac_c_ = Jac_c; + Jac_d_ = Jac_d; + Hess_ = Hess; - int nx = Hess_->m(); - assert(nx==Hess_->n()); assert(nx==Jac_c_->n()); assert(nx==Jac_d_->n()); + int nx = Hess_->m(); + assert(nx == Hess_->n()); + assert(nx == Jac_c_->n()); + assert(nx == Jac_d_->n()); - //compute and put the barrier diagonals in - //Dx=(Sxl)^{-1}Zl + (Sxu)^{-1}Zu + // compute and put the barrier diagonals in + // Dx=(Sxl)^{-1}Zl + (Sxu)^{-1}Zu Dx_->setToZero(); Dx_->axdzpy_w_pattern(1.0, *iter_->zl, *iter_->sxl, nlp_->get_ixl()); Dx_->axdzpy_w_pattern(1.0, *iter_->zu, *iter_->sxu, nlp_->get_ixu()); @@ -571,24 +575,23 @@ bool hiopKKTLinSysCompressedXYcYd::update(const hiopIterate* iter, Dd_->axdzpy_w_pattern(1.0, *iter_->vu, *iter_->sdu, nlp_->get_idu()); nlp_->log->write("Dd in KKT", *Dd_, hovMatrices); #ifdef HIOP_DEEPCHECKS - assert(true==Dd_->allPositive()); + assert(true == Dd_->allPositive()); #endif nlp_->runStats.kkt.tmUpdateInit.stop(); - //factorization + inertia correction if needed + // factorization + inertia correction if needed bool retval = factorize(); nlp_->runStats.tmSolverInternal.stop(); return retval; } -bool hiopKKTLinSysCompressedXYcYd::computeDirections(const hiopResidual* resid, - hiopIterate* dir) -{ +bool hiopKKTLinSysCompressedXYcYd::computeDirections(const hiopResidual* resid, hiopIterate* dir) +{ nlp_->runStats.tmSolverInternal.start(); nlp_->runStats.kkt.tmSolveRhsManip.start(); - const hiopResidual &r=*resid; + const hiopResidual& r = *resid; /*********************************************************************** * perform the reduction to the compressed linear system @@ -600,60 +603,61 @@ bool hiopKKTLinSysCompressedXYcYd::computeDirections(const hiopResidual* resid, * yd_tilde = ryd + Dd_inv*rd_tilde */ rx_tilde_->copyFrom(*r.rx); - if(nlp_->n_low_local()>0) { + if(nlp_->n_low_local() > 0) { // rl:=rszl-Zl*rxl (using dir->x as working buffer) - hiopVector&rl=*(dir->x);//temporary working buffer + hiopVector& rl = *(dir->x); // temporary working buffer rl.copyFrom(*r.rszl); rl.axzpy(-1.0, *iter_->zl, *r.rxl); - //rx_tilde = rx+Sxl^{-1}*rl - rx_tilde_->axdzpy_w_pattern( 1.0, rl, *iter_->sxl, nlp_->get_ixl()); + // rx_tilde = rx+Sxl^{-1}*rl + rx_tilde_->axdzpy_w_pattern(1.0, rl, *iter_->sxl, nlp_->get_ixl()); } - if(nlp_->n_upp_local()>0) { - //ru:=rszu-Zu*rxu (using dir->x as working buffer) - hiopVector&ru=*(dir->x);//temporary working buffer - ru.copyFrom(*r.rszu); ru.axzpy(-1.0,*iter_->zu, *r.rxu); - //rx_tilde = rx_tilde - Sxu^{-1}*ru + if(nlp_->n_upp_local() > 0) { + // ru:=rszu-Zu*rxu (using dir->x as working buffer) + hiopVector& ru = *(dir->x); // temporary working buffer + ru.copyFrom(*r.rszu); + ru.axzpy(-1.0, *iter_->zu, *r.rxu); + // rx_tilde = rx_tilde - Sxu^{-1}*ru rx_tilde_->axdzpy_w_pattern(-1.0, ru, *iter_->sxu, nlp_->get_ixu()); } - //for ryd_tilde: + // for ryd_tilde: ryd_tilde_->copyFrom(*r.ryd); // 1. the diag (Sdl^{-1}Vl+Sdu^{-1}Vu)^{-1} has already computed in Dd_inv in 'update' // 2. compute the left multiplicand in ryd2 (using buffer dir->sdl), that is // ryd2 = [rd + Sdl^{-1}*(rsvl-Vl*rdl)-Sdu^{-1}(rsvu-Vu*rdu)] (this is \tilde{r}_d in the notes) // Inner ops are performed by accumulating in rd2 (buffer dir->sdu) - hiopVector&ryd2=*dir->sdl; + hiopVector& ryd2 = *dir->sdl; ryd2.copyFrom(*r.rd); - if(nlp_->m_ineq_low()>0) { - hiopVector& rd2=*dir->sdu; - //rd2=rsvl-Vl*rdl + if(nlp_->m_ineq_low() > 0) { + hiopVector& rd2 = *dir->sdu; + // rd2=rsvl-Vl*rdl rd2.copyFrom(*r.rsvl); rd2.axzpy(-1.0, *iter_->vl, *r.rdl); - //ryd2 += Sdl^{-1}*(rsvl-Vl*rdl) + // ryd2 += Sdl^{-1}*(rsvl-Vl*rdl) ryd2.axdzpy_w_pattern(1.0, rd2, *iter_->sdl, nlp_->get_idl()); } - if(nlp_->m_ineq_upp()>0) { - hiopVector& rd2=*dir->sdu; - //rd2=rsvu-Vu*rdu + if(nlp_->m_ineq_upp() > 0) { + hiopVector& rd2 = *dir->sdu; + // rd2=rsvu-Vu*rdu rd2.copyFrom(*r.rsvu); rd2.axzpy(-1.0, *iter_->vu, *r.rdu); - //ryd2 += -Sdu^{-1}(rsvu-Vu*rdu) + // ryd2 += -Sdu^{-1}(rsvu-Vu*rdu) ryd2.axdzpy_w_pattern(-1.0, rd2, *iter_->sdu, nlp_->get_idu()); } nlp_->log->write("Dinv (in computeDirections)", *Dd_inv_, hovMatrices); - //now the final ryd_tilde += Dd^{-1}*ryd2 + // now the final ryd_tilde += Dd^{-1}*ryd2 ryd_tilde_->axzpy(1.0, ryd2, *Dd_inv_); - + nlp_->runStats.kkt.tmSolveRhsManip.stop(); - + #ifdef HIOP_DEEPCHECKS nlp_->runStats.kkt.tmResid.start(); - hiopVector* rx_tilde_save=rx_tilde_->new_copy(); - hiopVector* ryc_save=r.ryc->new_copy(); - hiopVector* ryd_tilde_save=ryd_tilde_->new_copy(); + hiopVector* rx_tilde_save = rx_tilde_->new_copy(); + hiopVector* ryc_save = r.ryc->new_copy(); + hiopVector* ryd_tilde_save = ryd_tilde_->new_copy(); nlp_->runStats.kkt.tmResid.stop(); #endif @@ -662,39 +666,42 @@ bool hiopKKTLinSysCompressedXYcYd::computeDirections(const hiopResidual* resid, * (be aware that rx_tilde is reused/modified inside this function) ***********************************************************************/ bool sol_ok = solveCompressed(*rx_tilde_, *r.ryc, *ryd_tilde_, *dir->x, *dir->yc, *dir->yd); - + nlp_->runStats.kkt.tmSolveRhsManip.start(); - //recover dir->d = (D)^{-1}*(dir->yd + ryd2) + // recover dir->d = (D)^{-1}*(dir->yd + ryd2) dir->d->copyFrom(ryd2); - dir->d->axpy(1.0,*dir->yd); + dir->d->axpy(1.0, *dir->yd); dir->d->componentMult(*Dd_inv_); nlp_->runStats.kkt.tmSolveRhsManip.stop(); - //dir->d->print(); + // dir->d->print(); #ifdef HIOP_DEEPCHECKS nlp_->runStats.kkt.tmResid.start(); - errorCompressedLinsys(*rx_tilde_save,*ryc_save,*ryd_tilde_save, *dir->x, *dir->yc, *dir->yd); + errorCompressedLinsys(*rx_tilde_save, *ryc_save, *ryd_tilde_save, *dir->x, *dir->yc, *dir->yd); delete rx_tilde_save; delete ryc_save; delete ryd_tilde_save; nlp_->runStats.kkt.tmResid.stop(); #endif - if(false==sol_ok) { + if(false == sol_ok) { return false; } - const bool bret = compute_directions_for_full_space(resid, dir); + const bool bret = compute_directions_for_full_space(resid, dir); nlp_->runStats.tmSolverInternal.stop(); return bret; } #ifdef HIOP_DEEPCHECKS -//this method needs a bit of revisiting if becomes critical (mainly avoid dynamic allocations) -double hiopKKTLinSysCompressedXYcYd:: -errorCompressedLinsys(const hiopVector& rx, const hiopVector& ryc, const hiopVector& ryd, - const hiopVector& dx, const hiopVector& dyc, const hiopVector& dyd) +// this method needs a bit of revisiting if becomes critical (mainly avoid dynamic allocations) +double hiopKKTLinSysCompressedXYcYd::errorCompressedLinsys(const hiopVector& rx, + const hiopVector& ryc, + const hiopVector& ryd, + const hiopVector& dx, + const hiopVector& dyc, + const hiopVector& dyd) { nlp_->log->printf(hovLinAlgScalars, "hiopKKTLinSysDenseXYcYd::errorCompressedLinsys residuals norm:\n"); assert(perturb_calc_); @@ -703,35 +710,37 @@ errorCompressedLinsys(const hiopVector& rx, const hiopVector& ryc, const hiopVec delta_cc_ = perturb_calc_->get_curr_delta_cc(); delta_cd_ = perturb_calc_->get_curr_delta_cd(); - double derr=1e20, aux; - hiopVector *RX=rx.new_copy(); - //RX=rx-H*dx-J'c*dyc-J'*dyd + double derr = 1e20, aux; + hiopVector* RX = rx.new_copy(); + // RX=rx-H*dx-J'c*dyc-J'*dyd Hess_->timesVec(1.0, *RX, -1.0, dx); RX->axzpy(-1.0, *Dx_, dx); RX->axzpy(-1., *delta_wx_, dx); Jac_c_->transTimesVec(1.0, *RX, -1.0, dyc); Jac_d_->transTimesVec(1.0, *RX, -1.0, dyd); - aux=RX->twonorm(); - derr=fmax(derr,aux); + aux = RX->twonorm(); + derr = fmax(derr, aux); nlp_->log->printf(hovLinAlgScalars, " >> rx=%g\n", aux); - delete RX; RX=NULL; + delete RX; + RX = NULL; - hiopVector* RC=ryc.new_copy(); + hiopVector* RC = ryc.new_copy(); Jac_c_->timesVec(1.0, *RC, -1.0, dx); RC->axzpy(1., *delta_cc_, dyc); aux = RC->twonorm(); - derr=fmax(derr,aux); + derr = fmax(derr, aux); nlp_->log->printf(hovLinAlgScalars, " >> ryc=%g\n", aux); - delete RC; RC=NULL; + delete RC; + RC = NULL; - hiopVector* RD=ryd.new_copy(); + hiopVector* RD = ryd.new_copy(); Jac_d_->timesVec(1.0, *RD, -1.0, dx); RD->axzpy(1.0, *Dd_inv_, dyd); RD->axzpy(1., *delta_cd_, dyd); aux = RD->twonorm(); - derr=fmax(derr,aux); + derr = fmax(derr, aux); nlp_->log->printf(hovLinAlgScalars, " >> ryd=%g\n", aux); delete RD; @@ -758,37 +767,41 @@ errorCompressedLinsys(const hiopVector& rx, const hiopVector& ryc, const hiopVec * and then to compute the rest of the search directions */ hiopKKTLinSysCompressedXDYcYd::hiopKKTLinSysCompressedXDYcYd(hiopNlpFormulation* nlp) - : hiopKKTLinSysCompressed(nlp) + : hiopKKTLinSysCompressed(nlp) { -// Dd_ = dynamic_cast(nlp_->alloc_dual_ineq_vec()); -// assert(Dd_ != NULL); + // Dd_ = dynamic_cast(nlp_->alloc_dual_ineq_vec()); + // assert(Dd_ != NULL); rd_tilde_ = Dd_->alloc_clone(); } hiopKKTLinSysCompressedXDYcYd::~hiopKKTLinSysCompressedXDYcYd() { -// delete Dd_; + // delete Dd_; delete rd_tilde_; } -bool hiopKKTLinSysCompressedXDYcYd::update( const hiopIterate* iter, - const hiopVector* grad_f, - const hiopMatrix* Jac_c, - const hiopMatrix* Jac_d, - hiopMatrix* Hess) +bool hiopKKTLinSysCompressedXDYcYd::update(const hiopIterate* iter, + const hiopVector* grad_f, + const hiopMatrix* Jac_c, + const hiopMatrix* Jac_d, + hiopMatrix* Hess) { nlp_->runStats.linsolv.reset(); nlp_->runStats.tmSolverInternal.start(); nlp_->runStats.kkt.tmUpdateInit.start(); - + iter_ = iter; grad_f_ = dynamic_cast(grad_f); - Jac_c_ = Jac_c; Jac_d_ = Jac_d; + Jac_c_ = Jac_c; + Jac_d_ = Jac_d; - Hess_=Hess; + Hess_ = Hess; - int nx = Hess_->m(); assert(nx==Hess_->n()); assert(nx==Jac_c_->n()); assert(nx==Jac_d_->n()); + int nx = Hess_->m(); + assert(nx == Hess_->n()); + assert(nx == Jac_c_->n()); + assert(nx == Jac_d_->n()); // compute barrier diagonals (these change only between outer optimiz iterations) // Dx=(Sxl)^{-1}Zl + (Sxu)^{-1}Zu @@ -803,25 +816,23 @@ bool hiopKKTLinSysCompressedXDYcYd::update( const hiopIterate* iter, Dd_->axdzpy_w_pattern(1.0, *iter_->vu, *iter_->sdu, nlp_->get_idu()); nlp_->log->write("Dd in KKT", *Dd_, hovMatrices); #ifdef HIOP_DEEPCHECKS - assert(true==Dd_->allPositive()); + assert(true == Dd_->allPositive()); #endif nlp_->runStats.kkt.tmUpdateInit.stop(); - - //factorization + inertia correction if needed + + // factorization + inertia correction if needed bool retval = factorize(); nlp_->runStats.tmSolverInternal.stop(); return retval; } - -bool hiopKKTLinSysCompressedXDYcYd::computeDirections(const hiopResidual* resid, - hiopIterate* dir) +bool hiopKKTLinSysCompressedXDYcYd::computeDirections(const hiopResidual* resid, hiopIterate* dir) { nlp_->runStats.tmSolverInternal.start(); nlp_->runStats.kkt.tmSolveRhsManip.start(); - const hiopResidual &r=*resid; + const hiopResidual& r = *resid; /*********************************************************************** * perform the reduction to the compressed linear system @@ -831,42 +842,43 @@ bool hiopKKTLinSysCompressedXDYcYd::computeDirections(const hiopResidual* resid, rx_tilde_->copyFrom(*r.rx); if(nlp_->n_low_local()) { // rl:=rszl-Zl*rxl (using dir->x as working buffer) - hiopVector &rl=*(dir->x);//temporary working buffer + hiopVector& rl = *(dir->x); // temporary working buffer rl.copyFrom(*r.rszl); rl.axzpy(-1.0, *iter_->zl, *r.rxl); - //rx_tilde = rx+Sxl^{-1}*rl - rx_tilde_->axdzpy_w_pattern( 1.0, rl, *iter_->sxl, nlp_->get_ixl()); + // rx_tilde = rx+Sxl^{-1}*rl + rx_tilde_->axdzpy_w_pattern(1.0, rl, *iter_->sxl, nlp_->get_ixl()); } if(nlp_->n_upp_local()) { - //ru:=rszu-Zu*rxu (using dir->x as working buffer) - hiopVector &ru=*(dir->x);//temporary working buffer - ru.copyFrom(*r.rszu); ru.axzpy(-1.0,*iter_->zu, *r.rxu); - //rx_tilde = rx_tilde - Sxu^{-1}*ru + // ru:=rszu-Zu*rxu (using dir->x as working buffer) + hiopVector& ru = *(dir->x); // temporary working buffer + ru.copyFrom(*r.rszu); + ru.axzpy(-1.0, *iter_->zu, *r.rxu); + // rx_tilde = rx_tilde - Sxu^{-1}*ru rx_tilde_->axdzpy_w_pattern(-1.0, ru, *iter_->sxu, nlp_->get_ixu()); } - //for rd_tilde = rd + Sdl^{-1}*(rsvl-Vl*rdl)-Sdu^{-1}(rsvu-Vu*rdu) + // for rd_tilde = rd + Sdl^{-1}*(rsvl-Vl*rdl)-Sdu^{-1}(rsvu-Vu*rdu) rd_tilde_->copyFrom(*r.rd); if(nlp_->m_ineq_low()) { - hiopVector& rd2=*dir->sdu; - //rd2=rsvl-Vl*rdl + hiopVector& rd2 = *dir->sdu; + // rd2=rsvl-Vl*rdl rd2.copyFrom(*r.rsvl); rd2.axzpy(-1.0, *iter_->vl, *r.rdl); - //rd_tilde += Sdl^{-1}*(rsvl-Vl*rdl) + // rd_tilde += Sdl^{-1}*(rsvl-Vl*rdl) rd_tilde_->axdzpy_w_pattern(1.0, rd2, *iter_->sdl, nlp_->get_idl()); } - if(nlp_->m_ineq_upp()>0) { - hiopVector& rd2=*dir->sdu; - //rd2=rsvu-Vu*rdu + if(nlp_->m_ineq_upp() > 0) { + hiopVector& rd2 = *dir->sdu; + // rd2=rsvu-Vu*rdu rd2.copyFrom(*r.rsvu); rd2.axzpy(-1.0, *iter_->vu, *r.rdu); - //rd_tilde += -Sdu^{-1}(rsvu-Vu*rdu) + // rd_tilde += -Sdu^{-1}(rsvu-Vu*rdu) rd_tilde_->axdzpy_w_pattern(-1.0, rd2, *iter_->sdu, nlp_->get_idu()); } nlp_->log->write("Dd (in computeDirections)", *Dd_, hovMatrices); - + nlp_->runStats.kkt.tmSolveRhsManip.stop(); - + #ifdef HIOP_DEEPCHECKS nlp_->runStats.kkt.tmResid.start(); hiopVector* rx_tilde_save = rx_tilde_->new_copy(); @@ -875,20 +887,18 @@ bool hiopKKTLinSysCompressedXDYcYd::computeDirections(const hiopResidual* resid, hiopVector* ryd_save = r.ryd->new_copy(); nlp_->runStats.kkt.tmResid.stop(); #endif - + /*********************************************************************** * solve the compressed system * (be aware that rx_tilde is reused/modified inside this function) ***********************************************************************/ bool sol_ok = solveCompressed(*rx_tilde_, *rd_tilde_, *r.ryc, *r.ryd, *dir->x, *dir->d, *dir->yc, *dir->yd); - + #ifdef HIOP_DEEPCHECKS nlp_->runStats.kkt.tmResid.start(); double derr = - errorCompressedLinsys(*rx_tilde_save, *rd_tilde_save, *ryc_save, *ryd_save, - *dir->x, *dir->d, *dir->yc, *dir->yd); - if(derr>1e-8) - nlp_->log->printf(hovWarning, "solve compressed high absolute resid norm (=%12.5e)\n", derr); + errorCompressedLinsys(*rx_tilde_save, *rd_tilde_save, *ryc_save, *ryd_save, *dir->x, *dir->d, *dir->yc, *dir->yd); + if(derr > 1e-8) nlp_->log->printf(hovWarning, "solve compressed high absolute resid norm (=%12.5e)\n", derr); delete rx_tilde_save; delete ryc_save; delete rd_tilde_save; @@ -900,31 +910,30 @@ bool hiopKKTLinSysCompressedXDYcYd::computeDirections(const hiopResidual* resid, nlp_->runStats.tmSolverInternal.stop(); return false; } - + const bool bret = compute_directions_for_full_space(resid, dir); - + nlp_->runStats.tmSolverInternal.stop(); return bret; } - bool hiopKKTLinSys::compute_directions_w_IR(const hiopResidual* resid, hiopIterate* dir) { nlp_->runStats.tmSolverInternal.start(); - + // skip IR if user set ir_outer_maxit to 0 or negative values if(0 >= nlp_->options->GetInteger("ir_outer_maxit")) { nlp_->runStats.tmSolverInternal.stop(); - return computeDirections(resid,dir); + return computeDirections(resid, dir); } - const hiopResidual &r=*resid; + const hiopResidual& r = *resid; // in the order of rx, rd, ryc, ryd, rxl, rxu, rdl, rdu, rszl, rszu, rsvl, rsvu const size_type nx = r.rx->get_local_size(); const size_type nd = r.rd->get_local_size(); const size_type nyc = r.ryc->get_local_size(); const size_type nyd = r.ryd->get_local_size(); - size_type dim_rhs = 5*nx + 5*nd + nyc + nyd; + size_type dim_rhs = 5 * nx + 5 * nd + nyc + nyd; /*********************************************************************** * solve the compressed system as a preconditioner * (be aware that rx_tilde is reused/modified inside this function) @@ -938,8 +947,9 @@ bool hiopKKTLinSys::compute_directions_w_IR(const hiopResidual* resid, hiopItera // need to reset the pointer to the current iter, since the outer loop keeps swtiching between curr_iter and trial_iter kkt_opr_->reset_curr_iter(iter_); - - double tol = std::min(mu_*nlp_->options->GetNumeric("ir_outer_tol_factor"), nlp_->options->GetNumeric("ir_outer_tol_min")); + + double tol = + std::min(mu_ * nlp_->options->GetNumeric("ir_outer_tol_factor"), nlp_->options->GetNumeric("ir_outer_tol_min")); bicgIR_->set_max_num_iter(nlp_->options->GetInteger("ir_outer_maxit")); bicgIR_->set_tol(tol); bicgIR_->set_x0(0.0); @@ -960,68 +970,69 @@ bool hiopKKTLinSys::compute_directions_w_IR(const hiopResidual* resid, hiopItera return bret; } - - #ifdef HIOP_DEEPCHECKS -double hiopKKTLinSysCompressedXDYcYd:: -errorCompressedLinsys(const hiopVector& rx, const hiopVector& rd, - const hiopVector& ryc, const hiopVector& ryd, - const hiopVector& dx, const hiopVector& dd, - const hiopVector& dyc, const hiopVector& dyd) +double hiopKKTLinSysCompressedXDYcYd::errorCompressedLinsys(const hiopVector& rx, + const hiopVector& rd, + const hiopVector& ryc, + const hiopVector& ryd, + const hiopVector& dx, + const hiopVector& dd, + const hiopVector& dyc, + const hiopVector& dyd) { nlp_->log->printf(hovLinAlgScalars, "hiopKKTLinSysDenseXDYcYd::errorCompressedLinsys residuals norm:\n"); assert(perturb_calc_); delta_wx_ = perturb_calc_->get_curr_delta_wx(); delta_wd_ = perturb_calc_->get_curr_delta_wd(); delta_cc_ = perturb_calc_->get_curr_delta_cc(); - delta_cd_ = perturb_calc_->get_curr_delta_cd(); + delta_cd_ = perturb_calc_->get_curr_delta_cd(); - double derr=-1., aux; - hiopVector *RX=rx.new_copy(); - //RX=rx-H*dx-J'c*dyc-J'*dyd + double derr = -1., aux; + hiopVector* RX = rx.new_copy(); + // RX=rx-H*dx-J'c*dyc-J'*dyd Hess_->timesVec(1.0, *RX, -1.0, dx); RX->axzpy(-1.0, *Dx_, dx); - RX->axzpy(-1,*delta_wx_, dx); + RX->axzpy(-1, *delta_wx_, dx); Jac_c_->transTimesVec(1.0, *RX, -1.0, dyc); Jac_d_->transTimesVec(1.0, *RX, -1.0, dyd); - aux=RX->twonorm(); - derr=fmax(derr,aux); + aux = RX->twonorm(); + derr = fmax(derr, aux); nlp_->log->printf(hovLinAlgScalars, " >> rx=%g\n", aux); delete RX; - //RD = rd + dyd - Dd*dd - hiopVector* RD=rd.new_copy(); - RD->axpy( 1., dyd); + // RD = rd + dyd - Dd*dd + hiopVector* RD = rd.new_copy(); + RD->axpy(1., dyd); RD->axzpy(-1., *Dd_, dd); - RD->axzpy(-1.,*delta_wd_, dd); - aux=RD->twonorm(); - derr=fmax(derr,aux); + RD->axzpy(-1., *delta_wd_, dd); + aux = RD->twonorm(); + derr = fmax(derr, aux); nlp_->log->printf(hovLinAlgScalars, " >> rd=%g\n", aux); delete RD; - hiopVector* RC=ryc.new_copy(); + hiopVector* RC = ryc.new_copy(); Jac_c_->timesVec(1.0, *RC, -1.0, dx); RC->axzpy(1., *delta_cc_, dyc); aux = RC->twonorm(); - derr=fmax(derr,aux); + derr = fmax(derr, aux); nlp_->log->printf(hovLinAlgScalars, " >> ryc=%g\n", aux); delete RC; - //RYD = ryd+dyd - Jd*dx - hiopVector* RYD=ryd.new_copy(); + // RYD = ryd+dyd - Jd*dx + hiopVector* RYD = ryd.new_copy(); Jac_d_->timesVec(1.0, *RYD, -1.0, dx); RYD->axpy(1.0, dd); RYD->axzpy(1., *delta_cd_, dyd); aux = RYD->twonorm(); - derr=fmax(derr,aux); + derr = fmax(derr, aux); nlp_->log->printf(hovLinAlgScalars, " >> ryd=%g\n", aux); - delete RYD; RYD=NULL; + delete RYD; + RYD = NULL; return derr; } #endif - //////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////// // hiopKKTLinSysFull @@ -1033,10 +1044,9 @@ bool hiopKKTLinSysFull::update(const hiopIterate* iter, const hiopMatrix* Jac_d, hiopMatrix* Hess) { - iter_ = iter; grad_f_ = dynamic_cast(grad_f); - Jac_c_ = Jac_c; + Jac_c_ = Jac_c; Jac_d_ = Jac_d; Hess_ = Hess; nlp_->runStats.linsolv.reset(); @@ -1049,38 +1059,51 @@ bool hiopKKTLinSysFull::update(const hiopIterate* iter, return retval; } - - -bool hiopKKTLinSysFull::computeDirections(const hiopResidual* resid, - hiopIterate* dir) +bool hiopKKTLinSysFull::computeDirections(const hiopResidual* resid, hiopIterate* dir) { nlp_->runStats.tmSolverInternal.start(); - const hiopResidual &r=*resid; + const hiopResidual& r = *resid; /*********************************************************************** * solve the full system * (be aware that rx_tilde is reused/modified inside this function) ***********************************************************************/ - bool sol_ok = solve(*r.rx, *r.ryc, *r.ryd, *r.rd, - *r.rdl, *r.rdu, *r.rxl, *r.rxu, - *r.rsvl, *r.rsvu, *r.rszl, *r.rszu, - *dir->x, *dir->yc, *dir->yd, *dir->d, - *dir->vl, *dir->vu, *dir->zl, *dir->zu, - *dir->sdl, *dir->sdu, *dir->sxl, *dir->sxu); + bool sol_ok = solve(*r.rx, + *r.ryc, + *r.ryd, + *r.rd, + *r.rdl, + *r.rdu, + *r.rxl, + *r.rxu, + *r.rsvl, + *r.rsvu, + *r.rszl, + *r.rszu, + *dir->x, + *dir->yc, + *dir->yd, + *dir->d, + *dir->vl, + *dir->vu, + *dir->zl, + *dir->zu, + *dir->sdl, + *dir->sdu, + *dir->sxl, + *dir->sxu); nlp_->runStats.tmSolverInternal.stop(); return sol_ok; } - //////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////// // hiopMatVecKKTFullOpr //////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////// -hiopMatVecKKTFullOpr::hiopMatVecKKTFullOpr(hiopKKTLinSys* kkt, - const hiopIterate* iter) +hiopMatVecKKTFullOpr::hiopMatVecKKTFullOpr(hiopKKTLinSys* kkt, const hiopIterate* iter) : kkt_(kkt), iter_(iter), resid_(nullptr), @@ -1094,15 +1117,9 @@ hiopMatVecKKTFullOpr::hiopMatVecKKTFullOpr(hiopKKTLinSys* kkt, res_cv_ = new hiopVectorCompoundPD(resid_); } -bool hiopMatVecKKTFullOpr::split_vec_to_build_it(const hiopVector& x) -{ - return true; -} +bool hiopMatVecKKTFullOpr::split_vec_to_build_it(const hiopVector& x) { return true; } -bool hiopMatVecKKTFullOpr::combine_res_to_build_vec(hiopVector& y) -{ - return true; -} +bool hiopMatVecKKTFullOpr::combine_res_to_build_vec(hiopVector& y) { return true; } /** * Full KKT matrix is @@ -1144,13 +1161,13 @@ bool hiopMatVecKKTFullOpr::times_vec(hiopVector& yvec, const hiopVector& xvec) hiopVectorCompoundPD& y = dynamic_cast(yvec); const hiopVectorCompoundPD& x = dynamic_cast(xvec); - assert(x.get_num_parts()==y.get_num_parts() && x.get_num_parts() == 12); + assert(x.get_num_parts() == y.get_num_parts() && x.get_num_parts() == 12); hiopVector* dx_ = &(x.getVector(0)); hiopVector* dd_ = &(x.getVector(1)); hiopVector* dyc_ = &(x.getVector(2)); hiopVector* dyd_ = &(x.getVector(3)); - hiopVector* dsxl_= &(x.getVector(4)); + hiopVector* dsxl_ = &(x.getVector(4)); hiopVector* dsxu_ = &(x.getVector(5)); hiopVector* dsdl_ = &(x.getVector(6)); hiopVector* dsdu_ = &(x.getVector(7)); @@ -1158,7 +1175,7 @@ bool hiopMatVecKKTFullOpr::times_vec(hiopVector& yvec, const hiopVector& xvec) hiopVector* dzu_ = &(x.getVector(9)); hiopVector* dvl_ = &(x.getVector(10)); hiopVector* dvu_ = &(x.getVector(11)); - + hiopVector* yrx_ = &(y.getVector(0)); hiopVector* yrd_ = &(y.getVector(1)); hiopVector* yryc_ = &(y.getVector(2)); @@ -1172,69 +1189,69 @@ bool hiopMatVecKKTFullOpr::times_vec(hiopVector& yvec, const hiopVector& xvec) hiopVector* yrvl_ = &(y.getVector(10)); hiopVector* yrvu_ = &(y.getVector(11)); - //rx = H*dx + delta_wx*I*dx + Jc'*dyc + Jd'*dyd - dzl + dzu + // rx = H*dx + delta_wx*I*dx + Jc'*dyc + Jd'*dyd - dzl + dzu Hess->timesVec(0.0, *yrx_, +1.0, *dx_); yrx_->axzpy(1., *delta_wx, *dx_); Jac_c->transTimesVec(1.0, *yrx_, 1.0, *dyc_); Jac_d->transTimesVec(1.0, *yrx_, 1.0, *dyd_); yrx_->axpy(-1.0, *dzl_); - yrx_->axpy( 1.0, *dzu_); + yrx_->axpy(1.0, *dzu_); - //RD = delta_wd_*dd - dyd - dvl + dvu + // RD = delta_wd_*dd - dyd - dvl + dvu yrd_->setToZero(); yrd_->axpy(-1., *dyd_); yrd_->axpy(-1., *dvl_); yrd_->axpy(+1., *dvu_); yrd_->axzpy(1., *delta_wd, *dd_); - //RYC = Jc*dx - delta_cc_*dyc + // RYC = Jc*dx - delta_cc_*dyc Jac_c->timesVec(0.0, *yryc_, 1.0, *dx_); yryc_->axzpy(-1., *delta_cc, *dyc_); - //RYD = Jd*dx - dd - delta_cd_*dyd + // RYD = Jd*dx - dd - delta_cd_*dyd Jac_d->timesVec(0.0, *yryd_, 1.0, *dx_); yryd_->axpy(-1.0, *dd_); yryd_->axzpy(-1., *delta_cd, *dyd_); - //RXL = -dx + dsxl + // RXL = -dx + dsxl yrsxl_->copyFrom(*dsxl_); yrsxl_->axpy(-1.0, *dx_); yrsxl_->selectPattern(kkt_->nlp_->get_ixl()); - //RXU = dx + dsxu + // RXU = dx + dsxu yrsxu_->copyFrom(*dsxu_); - yrsxu_->axpy( 1.0, *dx_); + yrsxu_->axpy(1.0, *dx_); yrsxu_->selectPattern(kkt_->nlp_->get_ixu()); - //RDL = -dd + dsdl + // RDL = -dd + dsdl yrsdl_->copyFrom(*dsdl_); yrsdl_->axpy(-1.0, *dd_); yrsdl_->selectPattern(kkt_->nlp_->get_idl()); - //RDU = dd + dsdu + // RDU = dd + dsdu yrsdu_->copyFrom(*dsdu_); - yrsdu_->axpy( 1.0, *dd_); + yrsdu_->axpy(1.0, *dd_); yrsdu_->selectPattern(kkt_->nlp_->get_idu()); // rszl = Sxl dzxl + Zxl dsxl yrzl_->setToZero(); - yrzl_->axzpy(1.0,*iter_->get_sxl(), *dzl_); - yrzl_->axzpy(1.0,*iter_->get_zl(), *dsxl_); + yrzl_->axzpy(1.0, *iter_->get_sxl(), *dzl_); + yrzl_->axzpy(1.0, *iter_->get_zl(), *dsxl_); // rszu = Sxu dzxu + Zxu dsxu yrzu_->setToZero(); - yrzu_->axzpy(1.0,*iter_->get_sxu(),*dzu_); - yrzu_->axzpy(1.0,*iter_->get_zu(), *dsxu_); + yrzu_->axzpy(1.0, *iter_->get_sxu(), *dzu_); + yrzu_->axzpy(1.0, *iter_->get_zu(), *dsxu_); // rsvl = Sdl dzdl + Zdl dsdl yrvl_->setToZero(); - yrvl_->axzpy(1.0,*iter_->get_sdl(), *dvl_); - yrvl_->axzpy(1.0,*iter_->get_vl(), *dsdl_); + yrvl_->axzpy(1.0, *iter_->get_sdl(), *dvl_); + yrvl_->axzpy(1.0, *iter_->get_vl(), *dsdl_); // rszu = Sdu dzdu + Zdu dsdu yrvu_->setToZero(); - yrvu_->axzpy(1.0,*iter_->get_sdu(),*dvu_); - yrvu_->axzpy(1.0,*iter_->get_vu(), *dsdu_); + yrvu_->axzpy(1.0, *iter_->get_sdu(), *dvu_); + yrvu_->axzpy(1.0, *iter_->get_vu(), *dsdu_); return true; } @@ -1280,13 +1297,13 @@ bool hiopMatVecKKTFullOpr::trans_times_vec(hiopVector& yvec, const hiopVector& x delta_cc = kkt_->perturb_calc_->get_curr_delta_cc(); delta_cd = kkt_->perturb_calc_->get_curr_delta_cd(); - assert(x.get_num_parts()==y.get_num_parts() && x.get_num_parts() == 12); + assert(x.get_num_parts() == y.get_num_parts() && x.get_num_parts() == 12); hiopVector* dx_ = &(x.getVector(0)); hiopVector* dd_ = &(x.getVector(1)); hiopVector* dyc_ = &(x.getVector(2)); hiopVector* dyd_ = &(x.getVector(3)); - hiopVector* dsxl_= &(x.getVector(4)); + hiopVector* dsxl_ = &(x.getVector(4)); hiopVector* dsxu_ = &(x.getVector(5)); hiopVector* dsdl_ = &(x.getVector(6)); hiopVector* dsdu_ = &(x.getVector(7)); @@ -1294,7 +1311,7 @@ bool hiopMatVecKKTFullOpr::trans_times_vec(hiopVector& yvec, const hiopVector& x hiopVector* dzu_ = &(x.getVector(9)); hiopVector* dvl_ = &(x.getVector(10)); hiopVector* dvu_ = &(x.getVector(11)); - + hiopVector* yrx_ = &(y.getVector(0)); hiopVector* yrd_ = &(y.getVector(1)); hiopVector* yryc_ = &(y.getVector(2)); @@ -1308,68 +1325,68 @@ bool hiopMatVecKKTFullOpr::trans_times_vec(hiopVector& yvec, const hiopVector& x hiopVector* yrvl_ = &(y.getVector(10)); hiopVector* yrvu_ = &(y.getVector(11)); - //rx = H*dx + delta_wx_*I*dx + Jc'*dyc + Jd'*dyd - dzl + dzu + // rx = H*dx + delta_wx_*I*dx + Jc'*dyc + Jd'*dyd - dzl + dzu Hess->timesVec(0.0, *yrx_, +1.0, *dx_); yrx_->axzpy(1., *delta_wx, *dx_); Jac_c->transTimesVec(1.0, *yrx_, 1.0, *dyc_); Jac_d->transTimesVec(1.0, *yrx_, 1.0, *dyd_); yrx_->axpy(-1.0, *dzl_); - yrx_->axpy( 1.0, *dzu_); + yrx_->axpy(1.0, *dzu_); - //RD = delta_wd_*dd - dyd - dvl + dvu + // RD = delta_wd_*dd - dyd - dvl + dvu yrd_->setToZero(); yrd_->axpy(-1., *dyd_); yrd_->axpy(-1., *dvl_); yrd_->axpy(+1., *dvu_); yrd_->axzpy(1., *delta_wd, *dd_); - //RYC = Jc*dx - delta_cc_*dyc + // RYC = Jc*dx - delta_cc_*dyc Jac_c->timesVec(0.0, *yryc_, 1.0, *dx_); yryc_->axzpy(-1., *delta_cc, *dyc_); - //RYD = Jd*dx - dd - delta_cd_*dyd + // RYD = Jd*dx - dd - delta_cd_*dyd Jac_d->timesVec(0.0, *yryd_, 1.0, *dx_); yryd_->axpy(-1.0, *dd_); yryd_->axzpy(-1., *delta_cd, *dyd_); - //RXL = -dx + Sxl*dsxl + // RXL = -dx + Sxl*dsxl yrsxl_->setToZero(); yrsxl_->axpy(-1.0, *dx_); - yrsxl_->axzpy(1.0,*iter_->get_sxl(), *dsxl_); + yrsxl_->axzpy(1.0, *iter_->get_sxl(), *dsxl_); yrsxl_->selectPattern(kkt_->nlp_->get_ixl()); - - //RXU = dx + Sxu*dsxu + + // RXU = dx + Sxu*dsxu yrsxu_->copyFrom(*dx_); - yrsxu_->axzpy(1.0,*iter_->get_sxu(), *dsxu_); + yrsxu_->axzpy(1.0, *iter_->get_sxu(), *dsxu_); yrsxu_->selectPattern(kkt_->nlp_->get_ixu()); - //RDL = -dd + Sdl*dsdl + // RDL = -dd + Sdl*dsdl yrsdl_->setToZero(); - yrsdl_->axpy( -1.0, *dd_); - yrsdl_->axzpy(1.0,*iter_->get_sdl(), *dsdl_); + yrsdl_->axpy(-1.0, *dd_); + yrsdl_->axzpy(1.0, *iter_->get_sdl(), *dsdl_); yrsdl_->selectPattern(kkt_->nlp_->get_idl()); - //RDU = dd + Sdu*dsdu + // RDU = dd + Sdu*dsdu yrsdu_->setToZero(); - yrsdu_->axpy( 1.0, *dd_); - yrsdu_->axzpy(1.0,*iter_->get_sdu(), *dsdu_); + yrsdu_->axpy(1.0, *dd_); + yrsdu_->axzpy(1.0, *iter_->get_sdu(), *dsdu_); yrsdu_->selectPattern(kkt_->nlp_->get_idu()); // rszl = dzxl + Zxl*dsxl yrzl_->copyFrom(*dzl_); - yrzl_->axzpy(1.0,*iter_->get_zl(), *dsxl_); + yrzl_->axzpy(1.0, *iter_->get_zl(), *dsxl_); // rszu = dzxu + Zxu*dsxu yrzu_->copyFrom(*dzu_); - yrzu_->axzpy(1.0,*iter_->get_zu(), *dsxu_); + yrzu_->axzpy(1.0, *iter_->get_zu(), *dsxu_); // rsvl = dzdl + Zdl dsdl yrvl_->copyFrom(*dvl_); - yrvl_->axzpy(1.0,*iter_->get_vl(), *dsdl_); + yrvl_->axzpy(1.0, *iter_->get_vl(), *dsdl_); // rszu = dzdu + Zdu dsdu yrvu_->copyFrom(*dvu_); - yrvu_->axzpy(1.0,*iter_->get_vu(), *dsdu_); + yrvu_->axzpy(1.0, *iter_->get_vu(), *dsdu_); return true; } @@ -1379,12 +1396,11 @@ bool hiopMatVecKKTFullOpr::trans_times_vec(hiopVector& yvec, const hiopVector& x // hiopPrecondKKTOpr //////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////// -hiopPrecondKKTOpr::hiopPrecondKKTOpr(hiopKKTLinSys* kkt, - const hiopIterate* iter) - : kkt_(kkt), - iter_(iter), - resid_(nullptr), - dir_(nullptr) +hiopPrecondKKTOpr::hiopPrecondKKTOpr(hiopKKTLinSys* kkt, const hiopIterate* iter) + : kkt_(kkt), + iter_(iter), + resid_(nullptr), + dir_(nullptr) { resid_ = new hiopResidual(kkt_->nlp_); dir_ = new hiopIterate(kkt_->nlp_); @@ -1394,22 +1410,16 @@ hiopPrecondKKTOpr::hiopPrecondKKTOpr(hiopKKTLinSys* kkt, res_cv_ = new hiopVectorCompoundPD(resid_); } -bool hiopPrecondKKTOpr::split_vec_to_build_res(const hiopVector& vec) -{ - return true; -} +bool hiopPrecondKKTOpr::split_vec_to_build_res(const hiopVector& vec) { return true; } -bool hiopPrecondKKTOpr::combine_dir_to_build_vec(hiopVector& vec) -{ - return true; -} +bool hiopPrecondKKTOpr::combine_dir_to_build_vec(hiopVector& vec) { return true; } bool hiopPrecondKKTOpr::times_vec(hiopVector& y, const hiopVector& x) { res_cv_->copyFrom(x); - const bool bret = kkt_->computeDirections(resid_, dir_); - + const bool bret = kkt_->computeDirections(resid_, dir_); + y.copyFrom(*dir_cv_); return bret; @@ -1418,14 +1428,13 @@ bool hiopPrecondKKTOpr::times_vec(hiopVector& y, const hiopVector& x) bool hiopPrecondKKTOpr::trans_times_vec(hiopVector& y, const hiopVector& x) { // compressed preconditioner is symmetric - return times_vec(y,x); + return times_vec(y, x); } - hiopKKTLinSysNormalEquation::hiopKKTLinSysNormalEquation(hiopNlpFormulation* nlp) - : hiopKKTLinSysCompressed(nlp) + : hiopKKTLinSysCompressed(nlp) { - rd_tilde_ = Dd_->alloc_clone(); + rd_tilde_ = Dd_->alloc_clone(); ryc_tilde_ = nlp->alloc_dual_eq_vec(); ryd_tilde_ = Dd_->alloc_clone(); Hx_ = Dx_->alloc_clone(); @@ -1454,17 +1463,17 @@ bool hiopKKTLinSysNormalEquation::update(const hiopIterate* iter, nlp_->runStats.linsolv.reset(); nlp_->runStats.tmSolverInternal.start(); nlp_->runStats.kkt.tmUpdateInit.start(); - + iter_ = iter; grad_f_ = dynamic_cast(grad_f); Jac_c_ = Jac_c; Jac_d_ = Jac_d; Hess_ = Hess; - size_type nx = Hess_->m(); - assert(nx==Hess_->n()); - assert(nx==Jac_c_->n()); - assert(nx==Jac_d_->n()); + size_type nx = Hess_->m(); + assert(nx == Hess_->n()); + assert(nx == Jac_c_->n()); + assert(nx == Jac_d_->n()); // compute barrier diagonals (these change only between outer optimiz iterations) // Dx=(Sxl)^{-1}Zl + (Sxu)^{-1}Zu @@ -1479,24 +1488,23 @@ bool hiopKKTLinSysNormalEquation::update(const hiopIterate* iter, Dd_->axdzpy_w_pattern(1.0, *iter_->vu, *iter_->sdu, nlp_->get_idu()); nlp_->log->write("Dd in KKT", *Dd_, hovMatrices); #ifdef HIOP_DEEPCHECKS - assert(true==Dd_->allPositive()); + assert(true == Dd_->allPositive()); #endif nlp_->runStats.kkt.tmUpdateInit.stop(); - - //factorization + inertia correction if needed + + // factorization + inertia correction if needed bool retval = factorize(); nlp_->runStats.tmSolverInternal.stop(); return retval; } - bool hiopKKTLinSysNormalEquation::computeDirections(const hiopResidual* resid, hiopIterate* dir) { nlp_->runStats.tmSolverInternal.start(); nlp_->runStats.kkt.tmSolveRhsManip.start(); - const hiopResidual &r = *resid; + const hiopResidual& r = *resid; /*********************************************************************** * perform the reduction to the compressed linear system @@ -1506,49 +1514,50 @@ bool hiopKKTLinSysNormalEquation::computeDirections(const hiopResidual* resid, h rx_tilde_->copyFrom(*r.rx); if(nlp_->n_low_local()) { // rl:=rszl-Zl*rxl (using dir->x as working buffer) - hiopVector &rl=*(dir->x);//temporary working buffer + hiopVector& rl = *(dir->x); // temporary working buffer rl.copyFrom(*r.rszl); rl.axzpy(-1.0, *iter_->zl, *r.rxl); - //rx_tilde = rx+Sxl^{-1}*rl - rx_tilde_->axdzpy_w_pattern( 1.0, rl, *iter_->sxl, nlp_->get_ixl()); + // rx_tilde = rx+Sxl^{-1}*rl + rx_tilde_->axdzpy_w_pattern(1.0, rl, *iter_->sxl, nlp_->get_ixl()); } if(nlp_->n_upp_local()) { - //ru:=rszu-Zu*rxu (using dir->x as working buffer) - hiopVector &ru=*(dir->x);//temporary working buffer - ru.copyFrom(*r.rszu); ru.axzpy(-1.0,*iter_->zu, *r.rxu); - //rx_tilde = rx_tilde - Sxu^{-1}*ru + // ru:=rszu-Zu*rxu (using dir->x as working buffer) + hiopVector& ru = *(dir->x); // temporary working buffer + ru.copyFrom(*r.rszu); + ru.axzpy(-1.0, *iter_->zu, *r.rxu); + // rx_tilde = rx_tilde - Sxu^{-1}*ru rx_tilde_->axdzpy_w_pattern(-1.0, ru, *iter_->sxu, nlp_->get_ixu()); } - //for rd_tilde = rd + Sdl^{-1}*(rsvl-Vl*rdl)-Sdu^{-1}(rsvu-Vu*rdu) + // for rd_tilde = rd + Sdl^{-1}*(rsvl-Vl*rdl)-Sdu^{-1}(rsvu-Vu*rdu) rd_tilde_->copyFrom(*r.rd); if(nlp_->m_ineq_low()) { - hiopVector& rd2=*dir->sdu; - //rd2=rsvl-Vl*rdl + hiopVector& rd2 = *dir->sdu; + // rd2=rsvl-Vl*rdl rd2.copyFrom(*r.rsvl); rd2.axzpy(-1.0, *iter_->vl, *r.rdl); - //rd_tilde += Sdl^{-1}*(rsvl-Vl*rdl) + // rd_tilde += Sdl^{-1}*(rsvl-Vl*rdl) rd_tilde_->axdzpy_w_pattern(1.0, rd2, *iter_->sdl, nlp_->get_idl()); } - if(nlp_->m_ineq_upp()>0) { - hiopVector& rd2=*dir->sdu; - //rd2=rsvu-Vu*rdu + if(nlp_->m_ineq_upp() > 0) { + hiopVector& rd2 = *dir->sdu; + // rd2=rsvu-Vu*rdu rd2.copyFrom(*r.rsvu); rd2.axzpy(-1.0, *iter_->vu, *r.rdu); - //rd_tilde += -Sdu^{-1}(rsvu-Vu*rdu) + // rd_tilde += -Sdu^{-1}(rsvu-Vu*rdu) rd_tilde_->axdzpy_w_pattern(-1.0, rd2, *iter_->sdu, nlp_->get_idu()); } /*********************************************************************** * perform the reduction to the compressed linear system - * [ ryc_tilde ] = [ Jc 0 ] [ H+Dx+delta_wx_ 0 ]^{-1} [ rx_tilde ] - [ ryc ] + * [ ryc_tilde ] = [ Jc 0 ] [ H+Dx+delta_wx_ 0 ]^{-1} [ rx_tilde ] - [ ryc ] * [ ryd_tilde ] [ Jd -I ] [ 0 Dd+delta_wd_ ] [ rd_tilde ] [ ryd ] */ /*********************************************************************** * TODO: now we assume H is empty or diagonal - * hence we have - * [ ryc_tilde ] = [ Jc ] [H+Dx+delta_wx_]^{-1} [ rx_tilde ] - [ ryc ] + * hence we have + * [ ryc_tilde ] = [ Jc ] [H+Dx+delta_wx_]^{-1} [ rx_tilde ] - [ ryc ] * [ ryd_tilde ] [ Jd ] [H+Dx+delta_wx_]^{-1} [ rx_tilde ] - [ Dd+delta_wd_ ]^{-1} [ rd_tilde ] - [ ryd ] */ { @@ -1558,7 +1567,7 @@ bool hiopKKTLinSysNormalEquation::computeDirections(const hiopResidual* resid, h ryc_tilde_->copyFrom(*r.ryc); Jac_c_->timesVec(-1.0, *ryc_tilde_, 1.0, *x_wrk_); - + /* d_wrk_ = [ Dd+delta_wd_ ]^{-1} [ rd_tilde ] */ d_wrk_->copyFrom(*rd_tilde_); d_wrk_->componentDiv(*Hd_); @@ -1578,36 +1587,35 @@ bool hiopKKTLinSysNormalEquation::computeDirections(const hiopResidual* resid, h nlp_->runStats.kkt.tmSolveRhsManip.start(); /*********************************************************************** - * TODO: now we assume H is empty or diagonal - * hence from - * [ H+Dx+delta_wx_ 0 ] [dx] = [ rx_tilde ] - [ Jc^T Jd^T] [dyc] - * [ 0 Dd+delta_wd_ ] [dd] [ rd_tilde ] [ 0 -I ] [dyd] - * we can recover - * [dx] = [ H+Dx+delta_wx_ ]^{-1} ( [ rx_tilde ] - [ Jc^T ] [dyc] - [Jd^T] [dyd] ) - * [dd] = [ Dd+delta_wd_ ]^{-1} ( [ rd_tilde ] + [dyd] ) - */ + * TODO: now we assume H is empty or diagonal + * hence from + * [ H+Dx+delta_wx_ 0 ] [dx] = [ rx_tilde ] - [ Jc^T Jd^T] [dyc] + * [ 0 Dd+delta_wd_ ] [dd] [ rd_tilde ] [ 0 -I ] [dyd] + * we can recover + * [dx] = [ H+Dx+delta_wx_ ]^{-1} ( [ rx_tilde ] - [ Jc^T ] [dyc] - [Jd^T] [dyd] ) + * [dd] = [ Dd+delta_wd_ ]^{-1} ( [ rd_tilde ] + [dyd] ) + */ dir->x->copyFrom(*rx_tilde_); Jac_c_->transTimesVec(1.0, *dir->x, -1.0, *dir->yc); Jac_d_->transTimesVec(1.0, *dir->x, -1.0, *dir->yd); dir->x->componentDiv(*Hx_); dir->d->copyFrom(*rd_tilde_); - dir->d->axpy(1.0,*dir->yd); + dir->d->axpy(1.0, *dir->yd); dir->d->componentDiv(*Hd_); nlp_->runStats.kkt.tmSolveRhsManip.stop(); - + if(false == sol_ok) { nlp_->runStats.tmSolverInternal.stop(); return false; } - + const bool bret = compute_directions_for_full_space(resid, dir); - + nlp_->runStats.tmSolverInternal.stop(); return bret; } - bool hiopKKTLinSysFull::test_direction(const hiopIterate* dir, hiopMatrix* Hess) { bool retval; @@ -1637,7 +1645,7 @@ bool hiopKKTLinSysFull::test_direction(const hiopIterate* dir, hiopMatrix* Hess) /* compute xWx = x(H+Dx_)x (for primal var [x,d] */ Hess_->timesVec(0.0, *x_wrk_, 1.0, *sol_x); dWd += x_wrk_->dotProductWith(*sol_x); - + // Dx=(Sxl)^{-1}Zl + (Sxu)^{-1}Zu x_wrk_->setToZero(); x_wrk_->axdzpy_w_pattern(1.0, *iter_->zl, *iter_->sxl, nlp_->get_ixl()); @@ -1656,9 +1664,9 @@ bool hiopKKTLinSysFull::test_direction(const hiopIterate* dir, hiopMatrix* Hess) /* compute rhs for the dWd test */ dbl_wrk = sol_x->twonorm(); - xs_nrmsq += dbl_wrk*dbl_wrk; + xs_nrmsq += dbl_wrk * dbl_wrk; dbl_wrk = sol_d->twonorm(); - xs_nrmsq += dbl_wrk*dbl_wrk; + xs_nrmsq += dbl_wrk * dbl_wrk; if(dWd < xs_nrmsq * nlp_->options->GetNumeric("neg_curv_test_fact")) { // have negative curvature. Add regularization and re-factorize the matrix @@ -1667,12 +1675,9 @@ bool hiopKKTLinSysFull::test_direction(const hiopIterate* dir, hiopMatrix* Hess) // have positive curvature. Accept this factoraizaiton and direction. retval = true; } - + nlp_->runStats.tmSolverInternal.stop(); return retval; } - - -}; - +}; // namespace hiop diff --git a/src/Optimization/hiopKKTLinSys.hpp b/src/Optimization/hiopKKTLinSys.hpp index 98dee25e4..530e43b94 100644 --- a/src/Optimization/hiopKKTLinSys.hpp +++ b/src/Optimization/hiopKKTLinSys.hpp @@ -62,7 +62,7 @@ namespace hiop { - + class hiopMatVecKKTFullOpr; class hiopPrecondKKTOpr; @@ -75,15 +75,17 @@ class hiopKKTLinSys /** * Updates the parts in KKT system that are dependent on the iterate. * It may trigger a refactorization for direct linear systems, or it may not do - * anything, for example, LowRank KKT linear system + * anything, for example, LowRank KKT linear system */ virtual bool update(const hiopIterate* iter, - const hiopVector* grad_f, - const hiopMatrix* Jac_c, const hiopMatrix* Jac_d, hiopMatrix* Hess) = 0; - + const hiopVector* grad_f, + const hiopMatrix* Jac_c, + const hiopMatrix* Jac_d, + hiopMatrix* Hess) = 0; + /** * Forms the residual of the underlying linear system. It uses the factorization - * computed by `update` to compute the "reduced-space" (i.e., compressed, condensed, etc.) + * computed by `update` to compute the "reduced-space" (i.e., compressed, condensed, etc.) * search directions by solving with the factors, then computes the "full-space" directions */ virtual bool computeDirections(const hiopResidual* resid, hiopIterate* direction) = 0; virtual bool compute_directions_w_IR(const hiopResidual* resid, hiopIterate* direction); @@ -92,80 +94,62 @@ class hiopKKTLinSys virtual bool factorize_inertia_free() = 0; - /* curvature test for inertia-free approach */ + /* curvature test for inertia-free approach */ virtual bool test_direction(const hiopIterate* dir, hiopMatrix* Hess) = 0; - virtual void set_PD_perturb_calc(hiopPDPerturbation* p) - { - perturb_calc_ = p; - } + virtual void set_PD_perturb_calc(hiopPDPerturbation* p) { perturb_calc_ = p; } - virtual void set_fact_acceptor(hiopFactAcceptor* p_fact_acceptor) - { - fact_acceptor_ = p_fact_acceptor; - } - - inline void set_safe_mode(bool val) - { - safe_mode_ = val; - } + virtual void set_fact_acceptor(hiopFactAcceptor* p_fact_acceptor) { fact_acceptor_ = p_fact_acceptor; } + + inline void set_safe_mode(bool val) { safe_mode_ = val; } /// @brief Sets the log barrier parameter `mu` - inline void set_logbar_mu(double mu) - { - mu_ = mu; - } + inline void set_logbar_mu(double mu) { mu_ = mu; } /** * Returns the absolute residual norm at the last KKT solve. * - * The returned norm can be an only hint/approximation of the true residual norm in cases the last + * The returned norm can be an only hint/approximation of the true residual norm in cases the last * solve is successful. If the KKT solve fails (i.e., one of the `compute_directions` methods fails) * the KKT class should return a good approximation of the norm of residual; if this is not feasible, - * it is better to return an optimistic underestimate (lower than the true residual norm) so that the + * it is better to return an optimistic underestimate (lower than the true residual norm) so that the * IPM does not activate agressive regularization strategies unnecessarily. */ - virtual double get_resid_norm_abs() const - { - return 0.0; - } - + virtual double get_resid_norm_abs() const { return 0.0; } + /** * Returns the relative residual norm at the last KKT solve. * - * The returned norm can be an only hint/approximation of the true residual norm in cases the last + * The returned norm can be an only hint/approximation of the true residual norm in cases the last * solve is successful. If the KKT solve fails (i.e., one of the `compute_directions` methods fails) * the KKT class should return a good approximation of the norm of residual; if this is not feasible, - * it is better to return an optimistic underestimate (lower than the true residual norm) so that the + * it is better to return an optimistic underestimate (lower than the true residual norm) so that the * IPM does not activate agressive regularization strategies unnecessarily. */ - virtual double get_resid_norm_rel() const - { - return 0.0; - } + virtual double get_resid_norm_rel() const { return 0.0; } /** - * Compute the inf norm of residual for the KKT linear system. + * Compute the inf norm of residual for the KKT linear system. * - * This is not currently used by the IPM algorithm since small-enough residual error + * This is not currently used by the IPM algorithm since small-enough residual error * for the inner linear system, as reported by the linear solver, is indicative of - * small KKT error. The method is called under HIOP_DEEPCHECKS to report residuals of + * small KKT error. The method is called under HIOP_DEEPCHECKS to report residuals of * large inf-norm. */ virtual double errorKKT(const hiopResidual* resid, const hiopIterate* sol); - - inline hiopPDPerturbation* get_perturb_calc() const {return perturb_calc_;} + + inline hiopPDPerturbation* get_perturb_calc() const { return perturb_calc_; } + protected: - /** + /** * @brief y=beta*y+alpha*H*x - * + * * @pre Should not include log barrier diagonal terms * @pre Should not include IC perturbations * * A default implementation is below */ - virtual void HessianTimesVec_noLogBarrierTerm(double beta, hiopVector& y, - double alpha, const hiopVector&x) + virtual void HessianTimesVec_noLogBarrierTerm(double beta, hiopVector& y, double alpha, const hiopVector& x) { Hess_->timesVec(beta, y, alpha, x); } @@ -177,20 +161,20 @@ class hiopKKTLinSys const hiopMatrix *Jac_c_, *Jac_d_; hiopMatrix* Hess_; hiopPDPerturbation* perturb_calc_; - hiopFactAcceptor* fact_acceptor_; + hiopFactAcceptor* fact_acceptor_; bool perf_report_; bool safe_mode_; double mu_; /// Matrix operator performing mat-vec with given kkt linear system - hiopMatVecKKTFullOpr *kkt_opr_; + hiopMatVecKKTFullOpr* kkt_opr_; /// Preconditioner operator that solves with the given (usually compressed) KKT system - hiopPrecondKKTOpr *prec_opr_; + hiopPrecondKKTOpr* prec_opr_; /// iterative refinement from BiCGStab solver hiopBiCGStabSolver* bicgIR_; - + friend class hiopMatVecKKTFullOpr; friend class hiopPrecondKKTOpr; @@ -205,14 +189,11 @@ class hiopKKTLinSysCurvCheck : public hiopKKTLinSys { public: hiopKKTLinSysCurvCheck(hiopNlpFormulation* nlp) - : hiopKKTLinSys(nlp), linSys_{nullptr} - { - } + : hiopKKTLinSys(nlp), + linSys_{nullptr} + {} - virtual ~hiopKKTLinSysCurvCheck() - { - delete linSys_; - } + virtual ~hiopKKTLinSysCurvCheck() { delete linSys_; } virtual bool update(const hiopIterate* iter, const hiopVector* grad_f, @@ -223,42 +204,40 @@ class hiopKKTLinSysCurvCheck : public hiopKKTLinSys virtual bool computeDirections(const hiopResidual* resid, hiopIterate* direction) = 0; virtual bool factorize(); - + virtual bool factorize_inertia_free(); - /* curvature test for inertia-free approach */ + /* curvature test for inertia-free approach */ virtual bool test_direction(const hiopIterate* dir, hiopMatrix* Hess) = 0; - + /** * @brief factorize the matrix and check curvature - */ + */ virtual int factorizeWithCurvCheck(); - /** + /** * @brief updates the iterate matrix, given regularizations 'delta_wx', 'delta_wd', 'delta_cc' and 'delta_cd'. */ virtual bool build_kkt_matrix(const hiopPDPerturbation& pdreg) = 0; hiopLinSolver* linSys_; - }; - class hiopKKTLinSysCompressed : public hiopKKTLinSysCurvCheck { public: hiopKKTLinSysCompressed(hiopNlpFormulation* nlp) - : hiopKKTLinSysCurvCheck(nlp), - Dx_(nullptr), - Dd_(nullptr), - rx_tilde_(nullptr), - x_wrk_(nullptr), - d_wrk_(nullptr) + : hiopKKTLinSysCurvCheck(nlp), + Dx_(nullptr), + Dd_(nullptr), + rx_tilde_(nullptr), + x_wrk_(nullptr), + d_wrk_(nullptr) { Dx_ = nlp->alloc_primal_vec(); assert(Dx_ != nullptr); - rx_tilde_ = Dx_->alloc_clone(); - Dd_ = nlp->alloc_dual_ineq_vec(); + rx_tilde_ = Dx_->alloc_clone(); + Dd_ = nlp->alloc_dual_ineq_vec(); } virtual ~hiopKKTLinSysCompressed() { @@ -273,14 +252,17 @@ class hiopKKTLinSysCompressed : public hiopKKTLinSysCurvCheck } } virtual bool update(const hiopIterate* iter, - const hiopVector* grad_f, - const hiopMatrix* Jac_c, const hiopMatrix* Jac_d, hiopMatrix* Hess) = 0; + const hiopVector* grad_f, + const hiopMatrix* Jac_c, + const hiopMatrix* Jac_d, + hiopMatrix* Hess) = 0; virtual bool test_direction(const hiopIterate* dir, hiopMatrix* Hess); virtual bool computeDirections(const hiopResidual* resid, hiopIterate* direction) = 0; virtual bool build_kkt_matrix(const hiopPDPerturbation& pdreg) = 0; + protected: hiopVector* Dx_; hiopVector* Dd_; @@ -304,31 +286,35 @@ class hiopKKTLinSysCompressedXYcYd : public hiopKKTLinSysCompressed hiopKKTLinSysCompressedXYcYd(hiopNlpFormulation* nlp); virtual ~hiopKKTLinSysCompressedXYcYd(); - virtual bool update(const hiopIterate* iter, - const hiopVector* grad_f, - const hiopMatrix* Jac_c, const hiopMatrix* Jac_d, + virtual bool update(const hiopIterate* iter, + const hiopVector* grad_f, + const hiopMatrix* Jac_c, + const hiopMatrix* Jac_d, hiopMatrix* Hess); - virtual bool computeDirections(const hiopResidual* resid, hiopIterate* direction); virtual bool build_kkt_matrix(const hiopPDPerturbation& pdreg) = 0; - virtual bool solveCompressed(hiopVector& rx, hiopVector& ryc, hiopVector& ryd, - hiopVector& dx, hiopVector& dyc, hiopVector& dyd) = 0; + virtual bool solveCompressed(hiopVector& rx, + hiopVector& ryc, + hiopVector& ryd, + hiopVector& dx, + hiopVector& dyc, + hiopVector& dyd) = 0; #ifdef HIOP_DEEPCHECKS virtual double errorCompressedLinsys(const hiopVector& rx, - const hiopVector& ryc, - const hiopVector& ryd, - const hiopVector& dx, - const hiopVector& dyc, - const hiopVector& dyd); + const hiopVector& ryc, + const hiopVector& ryd, + const hiopVector& dx, + const hiopVector& dyc, + const hiopVector& dyd); #endif protected: - hiopVector *Dd_inv_; - hiopVector *ryd_tilde_; + hiopVector* Dd_inv_; + hiopVector* ryd_tilde_; }; /* Provides the functionality for reducing the KKT linear system to the @@ -349,42 +335,48 @@ class hiopKKTLinSysCompressedXDYcYd : public hiopKKTLinSysCompressed hiopKKTLinSysCompressedXDYcYd(hiopNlpFormulation* nlp); virtual ~hiopKKTLinSysCompressedXDYcYd(); - virtual bool update(const hiopIterate* iter, - const hiopVector* grad_f, - const hiopMatrix* Jac_c, const hiopMatrix* Jac_d, hiopMatrix* Hess); + virtual bool update(const hiopIterate* iter, + const hiopVector* grad_f, + const hiopMatrix* Jac_c, + const hiopMatrix* Jac_d, + hiopMatrix* Hess); virtual bool computeDirections(const hiopResidual* resid, hiopIterate* direction); virtual bool build_kkt_matrix(const hiopPDPerturbation& pdreg) = 0; - virtual bool solveCompressed(hiopVector& rx, hiopVector& rd, - hiopVector& ryc, hiopVector& ryd, - hiopVector& dx, hiopVector& dd, - hiopVector& dyc, hiopVector& dyd) = 0; + virtual bool solveCompressed(hiopVector& rx, + hiopVector& rd, + hiopVector& ryc, + hiopVector& ryd, + hiopVector& dx, + hiopVector& dd, + hiopVector& dyc, + hiopVector& dyd) = 0; #ifdef HIOP_DEEPCHECKS - virtual double errorCompressedLinsys(const hiopVector& rx, const hiopVector& rd, - const hiopVector& ryc, const hiopVector& ryd, - const hiopVector& dx, const hiopVector& dd, - const hiopVector& dyc, const hiopVector& dyd); + virtual double errorCompressedLinsys(const hiopVector& rx, + const hiopVector& rd, + const hiopVector& ryc, + const hiopVector& ryd, + const hiopVector& dx, + const hiopVector& dd, + const hiopVector& dyc, + const hiopVector& dyd); #endif protected: hiopVector* rd_tilde_; #ifdef HIOP_DEEPCHECKS - //y=beta*y+alpha*H*x - virtual void HessianTimesVec_noLogBarrierTerm(double beta, hiopVector& y, - double alpha, const hiopVector&x) + // y=beta*y+alpha*H*x + virtual void HessianTimesVec_noLogBarrierTerm(double beta, hiopVector& y, double alpha, const hiopVector& x) { Hess_->timesVec(beta, y, alpha, x); } #endif }; - - - /* * Solves hiopKKTLinSysFull by exploiting the sparse structure * @@ -424,43 +416,64 @@ class hiopKKTLinSysCompressedXDYcYd : public hiopKKTLinSysCompressed * [ 0 0 0 0 | 0 0 Sl^d 0 | 0 0 Vl 0 ] [dsdl] [ rsvl ] * [ 0 0 0 0 | 0 0 0 Su^d | 0 0 0 Vu ] [dsdu] [ rsvu ] */ -class hiopKKTLinSysFull: public hiopKKTLinSysCurvCheck +class hiopKKTLinSysFull : public hiopKKTLinSysCurvCheck { public: hiopKKTLinSysFull(hiopNlpFormulation* nlp) - : hiopKKTLinSysCurvCheck(nlp), - x_wrk_{nullptr}, - d_wrk_{nullptr} + : hiopKKTLinSysCurvCheck(nlp), + x_wrk_{nullptr}, + d_wrk_{nullptr} {} virtual ~hiopKKTLinSysFull() { delete x_wrk_; - delete d_wrk_; + delete d_wrk_; } virtual bool update(const hiopIterate* iter, const hiopVector* grad_f, - const hiopMatrix* Jac_c, const hiopMatrix* Jac_d, hiopMatrix* Hess); + const hiopMatrix* Jac_c, + const hiopMatrix* Jac_d, + hiopMatrix* Hess); virtual bool test_direction(const hiopIterate* dir, hiopMatrix* Hess); virtual bool computeDirections(const hiopResidual* resid, hiopIterate* direction); virtual bool build_kkt_matrix(const hiopPDPerturbation& pdreg) = 0; - - virtual bool solve( hiopVector& rx, hiopVector& ryc, hiopVector& ryd, hiopVector& rd, - hiopVector& rdl, hiopVector& rdu, hiopVector& rxl, hiopVector& rxu, - hiopVector& rsvl, hiopVector& rsvu, hiopVector& rszl, hiopVector& rszu, - hiopVector& dx, hiopVector& dyc, hiopVector& dyd, hiopVector& dd, - hiopVector& dvl, hiopVector& dvu, hiopVector& dzl, hiopVector& dzu, - hiopVector& dsdl, hiopVector& dsdu, hiopVector& dsxl, hiopVector& dsxu)=0; + + virtual bool solve(hiopVector& rx, + hiopVector& ryc, + hiopVector& ryd, + hiopVector& rd, + hiopVector& rdl, + hiopVector& rdu, + hiopVector& rxl, + hiopVector& rxu, + hiopVector& rsvl, + hiopVector& rsvu, + hiopVector& rszl, + hiopVector& rszu, + hiopVector& dx, + hiopVector& dyc, + hiopVector& dyd, + hiopVector& dd, + hiopVector& dvl, + hiopVector& dvu, + hiopVector& dzl, + hiopVector& dzu, + hiopVector& dsdl, + hiopVector& dsdu, + hiopVector& dsxl, + hiopVector& dsxu) = 0; + protected: hiopVector* x_wrk_; hiopVector* d_wrk_; }; -/** +/** * @brief Provides the functionality for reducing the KKT linear system to the * normal equation system below in dyc and dyd variables and then to perform * the basic ops needed to compute the remaining directions @@ -470,13 +483,13 @@ class hiopKKTLinSysFull: public hiopKKTLinSysCurvCheck * [ Jc 0 ] [ H + Dx 0 ]^{-1} [ Jc^T Jd^T] [dyc] = [ ryc_tilde ] * [ Jd -I ] [ 0 Dd ] [ 0 -I ] [dyd] [ ryd_tilde ] * - * [ ryc_tilde ] = [ Jc 0 ] [ H+Dx+delta_wx 0 ]^{-1} [ rx_tilde ] - [ ryc ] + * [ ryc_tilde ] = [ Jc 0 ] [ H+Dx+delta_wx 0 ]^{-1} [ rx_tilde ] - [ ryc ] * [ ryd_tilde ] [ Jd -I ] [ 0 Dd+delta_wd ] [ rd_tilde ] [ ryd ] - * + * * and then to compute the rest of the search directions from * [ H+Dx+delta_wx 0 ] [dx] = [ rx_tilde ] - [ Jc^T Jd^T] [dyc] * [ 0 Dd+delta_wd ] [dd] [ rd_tilde ] [ 0 -I ] [dyd] - * + * */ class hiopKKTLinSysNormalEquation : public hiopKKTLinSysCompressed { @@ -484,8 +497,8 @@ class hiopKKTLinSysNormalEquation : public hiopKKTLinSysCompressed hiopKKTLinSysNormalEquation(hiopNlpFormulation* nlp); virtual ~hiopKKTLinSysNormalEquation(); - virtual bool update(const hiopIterate* iter, - const hiopVector* grad_f, + virtual bool update(const hiopIterate* iter, + const hiopVector* grad_f, const hiopMatrix* Jac_c, const hiopMatrix* Jac_d, hiopMatrix* Hess); @@ -494,14 +507,11 @@ class hiopKKTLinSysNormalEquation : public hiopKKTLinSysCompressed virtual bool build_kkt_matrix(const hiopPDPerturbation& pdreg) = 0; - virtual bool solveCompressed(hiopVector& ryc_tilde, - hiopVector& ryd_tilde, - hiopVector& dyc, - hiopVector& dyd) = 0; + virtual bool solveCompressed(hiopVector& ryc_tilde, hiopVector& ryd_tilde, hiopVector& dyc, hiopVector& dyd) = 0; /** * @brief factorize the matrix and check curvature - */ + */ virtual int factorizeWithCurvCheck() = 0; protected: @@ -512,14 +522,13 @@ class hiopKKTLinSysNormalEquation : public hiopKKTLinSysCompressed hiopVector* Hx_; // [diag(H)+Dx+delta_wx] hiopVector* Hd_; // [Dd+delta_wd ] - hiopVector *x_wrk_; - hiopVector *d_wrk_; + hiopVector* x_wrk_; + hiopVector* d_wrk_; }; - -/** +/** * operators for KKT mat-vec operations - * + * * Full KKT matrix is * [ H 0 Jc^T Jd^T | -I I 0 0 | 0 0 0 0 ] [ dx] [ rx ] * [ 0 0 0 -I | 0 0 -I I | 0 0 0 0 ] [ dd] [ rd ] @@ -544,7 +553,7 @@ class hiopMatVecKKTFullOpr : public hiopLinearOperator virtual ~hiopMatVecKKTFullOpr() { - delete resid_; + delete resid_; delete dir_; delete dir_cv_; delete res_cv_; @@ -557,7 +566,7 @@ class hiopMatVecKKTFullOpr : public hiopLinearOperator virtual bool trans_times_vec(hiopVector& y, const hiopVector& x); /* need to reset the pointer to the current iter, since the outer loop keeps swtiching between curr_iter and trial_iter */ - inline void reset_curr_iter(const hiopIterate* iter) {iter_ = iter;} + inline void reset_curr_iter(const hiopIterate* iter) { iter_ = iter; } private: hiopKKTLinSys* kkt_; @@ -566,30 +575,30 @@ class hiopMatVecKKTFullOpr : public hiopLinearOperator hiopIterate* dir_; hiopMatVecKKTFullOpr() - : kkt_(nullptr), - resid_(nullptr), - dir_(nullptr) + : kkt_(nullptr), + resid_(nullptr), + dir_(nullptr) { assert(false && "this constructor should not be used"); } - /** @brief split a large vector to build a hiopIterate object. + /** @brief split a large vector to build a hiopIterate object. * Note that the size of vector is equal to the size of full KKT. * TODO: revisit this function after we implement compound vector */ bool split_vec_to_build_it(const hiopVector& vec); - /** @brief combine vectors from a hiopResidual object into a large vector. + /** @brief combine vectors from a hiopResidual object into a large vector. * Note that the size of vector is equal to the size of full KKT. * TODO: revisit this function after we implement compound vector */ bool combine_res_to_build_vec(hiopVector& vec); - + hiopVectorCompoundPD* dir_cv_; hiopVectorCompoundPD* res_cv_; }; -/** +/** * operators for KKT preconditioner */ class hiopPrecondKKTOpr : public hiopLinearOperator @@ -599,7 +608,7 @@ class hiopPrecondKKTOpr : public hiopLinearOperator virtual ~hiopPrecondKKTOpr() { - delete resid_; + delete resid_; delete dir_; delete dir_cv_; delete res_cv_; @@ -618,20 +627,20 @@ class hiopPrecondKKTOpr : public hiopLinearOperator hiopIterate* dir_; hiopPrecondKKTOpr() - : kkt_(nullptr), - resid_(nullptr), - dir_(nullptr) + : kkt_(nullptr), + resid_(nullptr), + dir_(nullptr) { assert(false && "this constructor should not be used"); } - - /** @brief split a large vector to build a hiopResidual object. + + /** @brief split a large vector to build a hiopResidual object. * Note that the size of vector is equal to the size of full KKT. * TODO: revisit this function after we implement compound vector */ virtual bool split_vec_to_build_res(const hiopVector& vec); - /** @brief combine vectors from a hiopIterate object into a large vector. + /** @brief combine vectors from a hiopIterate object into a large vector. * Note that the size of vector is equal to the size of full KKT. * TODO: revisit this function after we implement compound vector */ @@ -641,6 +650,6 @@ class hiopPrecondKKTOpr : public hiopLinearOperator hiopVectorCompoundPD* res_cv_; }; -}; +}; // namespace hiop #endif diff --git a/src/Optimization/hiopKKTLinSysDense.hpp b/src/Optimization/hiopKKTLinSysDense.hpp index 907904088..956fc2c0c 100644 --- a/src/Optimization/hiopKKTLinSysDense.hpp +++ b/src/Optimization/hiopKKTLinSysDense.hpp @@ -2,47 +2,47 @@ // Produced at the Lawrence Livermore National Laboratory (LLNL). // LLNL-CODE-742473. All rights reserved. // -// This file is part of HiOp. For details, see https://github.com/LLNL/hiop. HiOp -// is released under the BSD 3-clause license (https://opensource.org/licenses/BSD-3-Clause). +// This file is part of HiOp. For details, see https://github.com/LLNL/hiop. HiOp +// is released under the BSD 3-clause license (https://opensource.org/licenses/BSD-3-Clause). // Please also read “Additional BSD Notice” below. // -// Redistribution and use in source and binary forms, with or without modification, +// Redistribution and use in source and binary forms, with or without modification, // are permitted provided that the following conditions are met: -// i. Redistributions of source code must retain the above copyright notice, this list +// i. Redistributions of source code must retain the above copyright notice, this list // of conditions and the disclaimer below. -// ii. Redistributions in binary form must reproduce the above copyright notice, -// this list of conditions and the disclaimer (as noted below) in the documentation and/or +// ii. Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the disclaimer (as noted below) in the documentation and/or // other materials provided with the distribution. -// iii. Neither the name of the LLNS/LLNL nor the names of its contributors may be used to -// endorse or promote products derived from this software without specific prior written +// iii. Neither the name of the LLNS/LLNL nor the names of its contributors may be used to +// endorse or promote products derived from this software without specific prior written // permission. // -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES -// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT -// SHALL LAWRENCE LIVERMORE NATIONAL SECURITY, LLC, THE U.S. DEPARTMENT OF ENERGY OR -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS -// OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED -// AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT +// SHALL LAWRENCE LIVERMORE NATIONAL SECURITY, LLC, THE U.S. DEPARTMENT OF ENERGY OR +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +// OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED +// AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, // EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Additional BSD Notice -// 1. This notice is required to be provided under our contract with the U.S. Department -// of Energy (DOE). This work was produced at Lawrence Livermore National Laboratory under +// 1. This notice is required to be provided under our contract with the U.S. Department +// of Energy (DOE). This work was produced at Lawrence Livermore National Laboratory under // Contract No. DE-AC52-07NA27344 with the DOE. -// 2. Neither the United States Government nor Lawrence Livermore National Security, LLC -// nor any of their employees, makes any warranty, express or implied, or assumes any -// liability or responsibility for the accuracy, completeness, or usefulness of any +// 2. Neither the United States Government nor Lawrence Livermore National Security, LLC +// nor any of their employees, makes any warranty, express or implied, or assumes any +// liability or responsibility for the accuracy, completeness, or usefulness of any // information, apparatus, product, or process disclosed, or represents that its use would // not infringe privately-owned rights. -// 3. Also, reference herein to any specific commercial products, process, or services by -// trade name, trademark, manufacturer or otherwise does not necessarily constitute or -// imply its endorsement, recommendation, or favoring by the United States Government or -// Lawrence Livermore National Security, LLC. The views and opinions of authors expressed -// herein do not necessarily state or reflect those of the United States Government or -// Lawrence Livermore National Security, LLC, and shall not be used for advertising or +// 3. Also, reference herein to any specific commercial products, process, or services by +// trade name, trademark, manufacturer or otherwise does not necessarily constitute or +// imply its endorsement, recommendation, or favoring by the United States Government or +// Lawrence Livermore National Security, LLC. The views and opinions of authors expressed +// herein do not necessarily state or reflect those of the United States Government or +// Lawrence Livermore National Security, LLC, and shall not be used for advertising or // product endorsement purposes. #ifndef HIOP_KKTLINSYSY_DENSE @@ -61,26 +61,24 @@ namespace hiop { -/* KKT system treated as dense; used for development/testing purposes mainly - * updates the parts in KKT system that are dependent on the iterate. - * Triggers a refactorization for the dense linear system +/* KKT system treated as dense; used for development/testing purposes mainly + * updates the parts in KKT system that are dependent on the iterate. + * Triggers a refactorization for the dense linear system * Forms the linear system * [ H + Dx Jc^T Jd^T ] [ dx] [ rx_tilde ] * [ Jc 0 0 ] [dyc] = [ ryc ] - * [ Jd 0 -Dd^{-1} ] [dyd] [ ryd ] - */ + * [ Jd 0 -Dd^{-1} ] [dyd] [ ryd ] + */ class hiopKKTLinSysDenseXYcYd : public hiopKKTLinSysCompressedXYcYd { public: hiopKKTLinSysDenseXYcYd(hiopNlpFormulation* nlp) - : hiopKKTLinSysCompressedXYcYd(nlp),rhsXYcYd(NULL), - write_linsys_counter(-1), csr_writer(nlp) - { - } - virtual ~hiopKKTLinSysDenseXYcYd() - { - delete rhsXYcYd; - } + : hiopKKTLinSysCompressedXYcYd(nlp), + rhsXYcYd(NULL), + write_linsys_counter(-1), + csr_writer(nlp) + {} + virtual ~hiopKKTLinSysDenseXYcYd() { delete rhsXYcYd; } virtual bool build_kkt_matrix(const hiopPDPerturbation& pdreg) { @@ -91,64 +89,59 @@ class hiopKKTLinSysDenseXYcYd : public hiopKKTLinSysCompressedXYcYd assert(nlp_); - int nx = Hess_->m(); - assert(nx==Hess_->n()); assert(nx==Jac_c_->n()); assert(nx==Jac_d_->n()); + int nx = Hess_->m(); + assert(nx == Hess_->n()); + assert(nx == Jac_c_->n()); + assert(nx == Jac_d_->n()); int neq = Jac_c_->m(), nineq = Jac_d_->m(); - - if(NULL==linSys_) { - int n=Jac_c_->m() + Jac_d_->m() + Hess_->m(); - if(nlp_->options->GetString("compute_mode")=="hybrid" || - nlp_->options->GetString("compute_mode")=="gpu") { + if(NULL == linSys_) { + int n = Jac_c_->m() + Jac_d_->m() + Hess_->m(); + + if(nlp_->options->GetString("compute_mode") == "hybrid" || nlp_->options->GetString("compute_mode") == "gpu") { #ifdef HIOP_USE_MAGMA - linSys_ = new hiopLinSolverSymDenseMagmaNopiv(n, nlp_); - nlp_->log->printf(hovScalars, - "LinSysDenseXYcYd: instantiating Magma for a matrix of size %d\n", - n); + linSys_ = new hiopLinSolverSymDenseMagmaNopiv(n, nlp_); + nlp_->log->printf(hovScalars, "LinSysDenseXYcYd: instantiating Magma for a matrix of size %d\n", n); #else - linSys_ = new hiopLinSolverSymDenseLapack(n, nlp_); - nlp_->log->printf(hovScalars, - "LinSysDenseXYcYd: instantiating Lapack for a matrix of size %d\n", - n); + linSys_ = new hiopLinSolverSymDenseLapack(n, nlp_); + nlp_->log->printf(hovScalars, "LinSysDenseXYcYd: instantiating Lapack for a matrix of size %d\n", n); #endif } else { - linSys_ = new hiopLinSolverSymDenseLapack(n, nlp_); - nlp_->log->printf(hovScalars, - "LinSysDenseXYcYd: instantiating Lapack for a matrix of size %d\n", - n); + linSys_ = new hiopLinSolverSymDenseLapack(n, nlp_); + nlp_->log->printf(hovScalars, "LinSysDenseXYcYd: instantiating Lapack for a matrix of size %d\n", n); } } - hiopLinSolverSymDense* linSys = dynamic_cast (linSys_); + hiopLinSolverSymDense* linSys = dynamic_cast(linSys_); hiopMatrixDense& Msys = linSys->sysMatrix(); - + // // update linSys system matrix, including IC perturbations // nlp_->runStats.kkt.tmUpdateLinsys.start(); - + Msys.setToZero(); - + int alpha = 1.; Hess_->addUpperTriangleToSymDenseMatrixUpperTriangle(0, alpha, Msys); - - Jac_c_->transAddToSymDenseMatrixUpperTriangle(0, nx, alpha, Msys); - Jac_d_->transAddToSymDenseMatrixUpperTriangle(0, nx+neq, alpha, Msys); - + + Jac_c_->transAddToSymDenseMatrixUpperTriangle(0, nx, alpha, Msys); + Jac_d_->transAddToSymDenseMatrixUpperTriangle(0, nx + neq, alpha, Msys); + Msys.addSubDiagonal(alpha, 0, *Dx_); Msys.addSubDiagonal(alpha, 0, *delta_wx_); - //Dd=(Sdl)^{-1}Vu + (Sdu)^{-1}Vu + delta_wd*I + // Dd=(Sdl)^{-1}Vu + (Sdu)^{-1}Vu + delta_wd*I Dd_inv_->copyFrom(*delta_wd_); Dd_inv_->axdzpy_w_pattern(1.0, *iter_->vl, *iter_->sdl, nlp_->get_idl()); Dd_inv_->axdzpy_w_pattern(1.0, *iter_->vu, *iter_->sdu, nlp_->get_idu()); #ifdef HIOP_DEEPCHECKS - assert(true==Dd_inv_->allPositive()); + assert(true == Dd_inv_->allPositive()); #endif Dd_inv_->invert(); - - alpha=-1.; - Msys.addSubDiagonal(alpha, nx+neq, *Dd_inv_); + + alpha = -1.; + Msys.addSubDiagonal(alpha, nx + neq, *Dd_inv_); #ifdef HIOP_DEEPCHECKS assert(perturb_calc_->check_consistency() && "something went wrong with IC"); @@ -157,50 +150,53 @@ class hiopKKTLinSysDenseXYcYd : public hiopKKTLinSysCompressedXYcYd nlp_->log->write("KKT Linsys:", Msys, hovMatrices); - //write matrix to file if requested + // write matrix to file if requested if(nlp_->options->GetString("write_kkt") == "yes") { write_linsys_counter++; } - if(write_linsys_counter>=0) { + if(write_linsys_counter >= 0) { csr_writer.writeMatToFile(Msys, write_linsys_counter, nx, neq, nineq); } - + return true; } - virtual bool solveCompressed(hiopVector& rx, hiopVector& ryc, hiopVector& ryd, - hiopVector& dx, hiopVector& dyc, hiopVector& dyd) + virtual bool solveCompressed(hiopVector& rx, + hiopVector& ryc, + hiopVector& ryd, + hiopVector& dx, + hiopVector& dyc, + hiopVector& dyd) { - hiopLinSolverSymDense* linSys = dynamic_cast (linSys_); + hiopLinSolverSymDense* linSys = dynamic_cast(linSys_); assert(linSys && "fail to get an object for correct linear system"); - int nx=rx.get_size(), nyc=ryc.get_size(), nyd=ryd.get_size(); + int nx = rx.get_size(), nyc = ryc.get_size(), nyd = ryd.get_size(); if(rhsXYcYd == nullptr) { - rhsXYcYd = LinearAlgebraFactory::create_vector(nlp_->options->GetString("mem_space"), - nx+nyc+nyd); + rhsXYcYd = LinearAlgebraFactory::create_vector(nlp_->options->GetString("mem_space"), nx + nyc + nyd); } - nlp_->log->write("RHS KKT XYcYd rx: ", rx, hovIteration); + nlp_->log->write("RHS KKT XYcYd rx: ", rx, hovIteration); nlp_->log->write("RHS KKT XYcYd ryc:", ryc, hovIteration); nlp_->log->write("RHS KKT XYcYd ryd:", ryd, hovIteration); - rx. copyToStarting(*rhsXYcYd, 0); + rx.copyToStarting(*rhsXYcYd, 0); ryc.copyToStarting(*rhsXYcYd, nx); - ryd.copyToStarting(*rhsXYcYd, nx+nyc); + ryd.copyToStarting(*rhsXYcYd, nx + nyc); - if(write_linsys_counter>=0) csr_writer.writeRhsToFile(*rhsXYcYd, write_linsys_counter); + if(write_linsys_counter >= 0) csr_writer.writeRhsToFile(*rhsXYcYd, write_linsys_counter); //! todo: iterative refinement bool sol_ok = linSys->solve(*rhsXYcYd); - if(write_linsys_counter>=0) csr_writer.writeSolToFile(*rhsXYcYd, write_linsys_counter); + if(write_linsys_counter >= 0) csr_writer.writeSolToFile(*rhsXYcYd, write_linsys_counter); - if(false==sol_ok) return false; + if(false == sol_ok) return false; - rhsXYcYd->copyToStarting(0, dx); - rhsXYcYd->copyToStarting(nx, dyc); - rhsXYcYd->copyToStarting(nx+nyc, dyd); + rhsXYcYd->copyToStarting(0, dx); + rhsXYcYd->copyToStarting(nx, dyc); + rhsXYcYd->copyToStarting(nx + nyc, dyd); - nlp_->log->write("SOL KKT XYcYd dx: ", dx, hovMatrices); + nlp_->log->write("SOL KKT XYcYd dx: ", dx, hovMatrices); nlp_->log->write("SOL KKT XYcYd dyc:", dyc, hovMatrices); nlp_->log->write("SOL KKT XYcYd dyd:", dyd, hovMatrices); return true; @@ -208,18 +204,20 @@ class hiopKKTLinSysDenseXYcYd : public hiopKKTLinSysCompressedXYcYd protected: hiopVector* rhsXYcYd; - + /** -1 when disabled; otherwise acts like a counter, 0,1,... * incremented each time 'solveCompressed' is called depends on the 'write_kkt' option */ - int write_linsys_counter; + int write_linsys_counter; hiopCSR_IO csr_writer; + private: - hiopKKTLinSysDenseXYcYd() - : hiopKKTLinSysCompressedXYcYd(NULL), - write_linsys_counter(-1), csr_writer(NULL) - { - assert(false); + hiopKKTLinSysDenseXYcYd() + : hiopKKTLinSysCompressedXYcYd(NULL), + write_linsys_counter(-1), + csr_writer(NULL) + { + assert(false); } }; @@ -228,24 +226,21 @@ class hiopKKTLinSysDenseXDYcYd : public hiopKKTLinSysCompressedXDYcYd { public: hiopKKTLinSysDenseXDYcYd(hiopNlpFormulation* nlp) - : hiopKKTLinSysCompressedXDYcYd(nlp), rhsXDYcYd(NULL), - write_linsys_counter(-1), csr_writer(nlp) - { - } - virtual ~hiopKKTLinSysDenseXDYcYd() - { - delete rhsXDYcYd; - } - + : hiopKKTLinSysCompressedXDYcYd(nlp), + rhsXDYcYd(NULL), + write_linsys_counter(-1), + csr_writer(nlp) + {} + virtual ~hiopKKTLinSysDenseXDYcYd() { delete rhsXDYcYd; } /* Updates the parts in KKT system that are dependent on the iterate. - * Triggers a refactorization for the dense linear system - * Forms the linear system - * [ H + Dx 0 Jc^T Jd^T ] [ dx] [ rx_tilde ] - * [ 0 Dd 0 -I ] [ dd] [ rd_tilde ] - * [ Jc 0 0 0 ] [dyc] = [ ryc ] - * [ Jd -I 0 0 ] [dyd] [ ryd ] - */ + * Triggers a refactorization for the dense linear system + * Forms the linear system + * [ H + Dx 0 Jc^T Jd^T ] [ dx] [ rx_tilde ] + * [ 0 Dd 0 -I ] [ dd] [ rd_tilde ] + * [ Jc 0 0 0 ] [dyc] = [ ryc ] + * [ Jd -I 0 0 ] [dyd] [ ryd ] + */ virtual bool build_kkt_matrix(const hiopPDPerturbation& pdreg) { assert(nlp_); @@ -254,73 +249,77 @@ class hiopKKTLinSysDenseXDYcYd : public hiopKKTLinSysCompressedXDYcYd delta_cc_ = perturb_calc_->get_curr_delta_cc(); delta_cd_ = perturb_calc_->get_curr_delta_cd(); - int nx = Hess_->m(); assert(nx==Hess_->n()); assert(nx==Jac_c_->n()); assert(nx==Jac_d_->n()); + int nx = Hess_->m(); + assert(nx == Hess_->n()); + assert(nx == Jac_c_->n()); + assert(nx == Jac_d_->n()); int neq = Jac_c_->m(), nineq = Jac_d_->m(); - assert(nx==Hess_->n()); assert(nx==Jac_c_->n()); assert(nx==Jac_d_->n()); - - if(NULL==linSys_) { - int n=nx+neq+2*nineq; + assert(nx == Hess_->n()); + assert(nx == Jac_c_->n()); + assert(nx == Jac_d_->n()); + + if(NULL == linSys_) { + int n = nx + neq + 2 * nineq; - if(nlp_->options->GetString("compute_mode")=="hybrid" || - nlp_->options->GetString("compute_mode")=="gpu") { + if(nlp_->options->GetString("compute_mode") == "hybrid" || nlp_->options->GetString("compute_mode") == "gpu") { #ifdef HIOP_USE_MAGMA - nlp_->log->printf(hovScalars, "LinSysDenseDXYcYd: instantiating Magma for a matrix of size %d\n", n); - linSys_ = new hiopLinSolverSymDenseMagmaNopiv(n, nlp_); + nlp_->log->printf(hovScalars, "LinSysDenseDXYcYd: instantiating Magma for a matrix of size %d\n", n); + linSys_ = new hiopLinSolverSymDenseMagmaNopiv(n, nlp_); #else - nlp_->log->printf(hovScalars, "LinSysDenseXDYcYd: instantiating Lapack for a matrix of size %d\n", n); - linSys_ = new hiopLinSolverSymDenseLapack(n, nlp_); + nlp_->log->printf(hovScalars, "LinSysDenseXDYcYd: instantiating Lapack for a matrix of size %d\n", n); + linSys_ = new hiopLinSolverSymDenseLapack(n, nlp_); #endif } else { - nlp_->log->printf(hovScalars, "LinSysDenseXDYcYd instantiating Lapack for a matrix of size %d\n", n); - linSys_ = new hiopLinSolverSymDenseLapack(n, nlp_); - } + nlp_->log->printf(hovScalars, "LinSysDenseXDYcYd instantiating Lapack for a matrix of size %d\n", n); + linSys_ = new hiopLinSolverSymDenseLapack(n, nlp_); + } } - hiopLinSolverSymDense* linSys = dynamic_cast (linSys_); + hiopLinSolverSymDense* linSys = dynamic_cast(linSys_); hiopMatrixDense& Msys = linSys->sysMatrix(); - + // // update linSys system matrix, including IC perturbations // Msys.setToZero(); - + const int alpha = 1.; Hess_->addUpperTriangleToSymDenseMatrixUpperTriangle(0, alpha, Msys); - - Jac_c_->transAddToSymDenseMatrixUpperTriangle(0, nx+nineq, alpha, Msys); - Jac_d_->transAddToSymDenseMatrixUpperTriangle(0, nx+nineq+neq, alpha, Msys); - - //add diagonals and IC perturbations + + Jac_c_->transAddToSymDenseMatrixUpperTriangle(0, nx + nineq, alpha, Msys); + Jac_d_->transAddToSymDenseMatrixUpperTriangle(0, nx + nineq + neq, alpha, Msys); + + // add diagonals and IC perturbations Msys.addSubDiagonal(alpha, 0, *Dx_); Msys.addSubDiagonal(alpha, 0, *delta_wx_); Msys.addSubDiagonal(alpha, nx, *Dd_); Msys.addSubDiagonal(alpha, nx, *delta_wd_); - - //add -I (of size nineq) starting at index (nx, nx+nineq+neq) - int col_start = nx+nineq+neq; + + // add -I (of size nineq) starting at index (nx, nx+nineq+neq) + int col_start = nx + nineq + neq; double* MsysM = Msys.local_data(); int m_Msys = Msys.m(); assert(m_Msys == Msys.n()); - for(int i=nx; iis_equal(*delta_cd_)); + assert(delta_cc_->is_equal(*delta_cd_)); #endif - Msys.addSubDiagonal(-alpha, nx+nineq, *delta_cd_); + Msys.addSubDiagonal(-alpha, nx + nineq, *delta_cd_); nlp_->log->write("KKT Linsys:", Msys, hovMatrices); - //write matrix to file if requested + // write matrix to file if requested if(nlp_->options->GetString("write_kkt") == "yes") { write_linsys_counter++; } - if(write_linsys_counter>=0) { + if(write_linsys_counter >= 0) { csr_writer.writeMatToFile(Msys, write_linsys_counter, nx, neq, nineq); } @@ -328,42 +327,47 @@ class hiopKKTLinSysDenseXDYcYd : public hiopKKTLinSysCompressedXDYcYd return true; } - virtual bool solveCompressed(hiopVector& rx, hiopVector& rd, hiopVector& ryc, hiopVector& ryd, - hiopVector& dx, hiopVector& dd, hiopVector& dyc, hiopVector& dyd) + virtual bool solveCompressed(hiopVector& rx, + hiopVector& rd, + hiopVector& ryc, + hiopVector& ryd, + hiopVector& dx, + hiopVector& dd, + hiopVector& dyc, + hiopVector& dyd) { - hiopLinSolverSymDense* linSys = dynamic_cast (linSys_); + hiopLinSolverSymDense* linSys = dynamic_cast(linSys_); - int nx=rx.get_size(), nyc=ryc.get_size(), nyd=ryd.get_size(); + int nx = rx.get_size(), nyc = ryc.get_size(), nyd = ryd.get_size(); if(rhsXDYcYd == nullptr) { - rhsXDYcYd = LinearAlgebraFactory::create_vector(nlp_->options->GetString("mem_space"), - nx+nyc+2*nyd); + rhsXDYcYd = LinearAlgebraFactory::create_vector(nlp_->options->GetString("mem_space"), nx + nyc + 2 * nyd); } - nlp_->log->write("RHS KKT XDycYd rx: ", rx, hovMatrices); - nlp_->log->write("RHS KKT XDycYd rd: ", rd, hovMatrices); + nlp_->log->write("RHS KKT XDycYd rx: ", rx, hovMatrices); + nlp_->log->write("RHS KKT XDycYd rd: ", rd, hovMatrices); nlp_->log->write("RHS KKT XDycYd ryc:", ryc, hovMatrices); nlp_->log->write("RHS KKT XDycYd ryd:", ryd, hovMatrices); - rx. copyToStarting(*rhsXDYcYd, 0); - rd. copyToStarting(*rhsXDYcYd, nx); - ryc.copyToStarting(*rhsXDYcYd, nx+nyd); - ryd.copyToStarting(*rhsXDYcYd, nx+nyd+nyc); + rx.copyToStarting(*rhsXDYcYd, 0); + rd.copyToStarting(*rhsXDYcYd, nx); + ryc.copyToStarting(*rhsXDYcYd, nx + nyd); + ryd.copyToStarting(*rhsXDYcYd, nx + nyd + nyc); - if(write_linsys_counter>=0) csr_writer.writeRhsToFile(*rhsXDYcYd, write_linsys_counter); + if(write_linsys_counter >= 0) csr_writer.writeRhsToFile(*rhsXDYcYd, write_linsys_counter); bool sol_ok = linSys->solve(*rhsXDYcYd); - if(write_linsys_counter>=0) csr_writer.writeSolToFile(*rhsXDYcYd, write_linsys_counter); + if(write_linsys_counter >= 0) csr_writer.writeSolToFile(*rhsXDYcYd, write_linsys_counter); - if(false==sol_ok) return false; + if(false == sol_ok) return false; - rhsXDYcYd->copyToStarting(0, dx); - rhsXDYcYd->copyToStarting(nx, dd); - rhsXDYcYd->copyToStarting(nx+nyd, dyc); - rhsXDYcYd->copyToStarting(nx+nyd+nyc, dyd); + rhsXDYcYd->copyToStarting(0, dx); + rhsXDYcYd->copyToStarting(nx, dd); + rhsXDYcYd->copyToStarting(nx + nyd, dyc); + rhsXDYcYd->copyToStarting(nx + nyd + nyc, dyd); - nlp_->log->write("SOL KKT XDYcYd dx: ", dx, hovMatrices); - nlp_->log->write("SOL KKT XDYcYd dd: ", dd, hovMatrices); + nlp_->log->write("SOL KKT XDYcYd dx: ", dx, hovMatrices); + nlp_->log->write("SOL KKT XDYcYd dd: ", dd, hovMatrices); nlp_->log->write("SOL KKT XDYcYd dyc:", dyc, hovMatrices); nlp_->log->write("SOL KKT XDYcYd dyd:", dyd, hovMatrices); return true; @@ -372,19 +376,20 @@ class hiopKKTLinSysDenseXDYcYd : public hiopKKTLinSysCompressedXDYcYd protected: hiopVector* rhsXDYcYd; //-1 when disabled; otherwise acts like a counter, 0,1,... incremented each time 'solveCompressed' is called - //depends on the 'write_kkt' option - int write_linsys_counter; + // depends on the 'write_kkt' option + int write_linsys_counter; hiopCSR_IO csr_writer; + private: - hiopKKTLinSysDenseXDYcYd() - : hiopKKTLinSysCompressedXDYcYd(NULL), - write_linsys_counter(-1), csr_writer(NULL) - { - assert(false && "not intended to be used"); + hiopKKTLinSysDenseXDYcYd() + : hiopKKTLinSysCompressedXDYcYd(NULL), + write_linsys_counter(-1), + csr_writer(NULL) + { + assert(false && "not intended to be used"); } }; - -} //end of namespace +} // namespace hiop #endif diff --git a/src/Optimization/hiopKKTLinSysMDS.cpp b/src/Optimization/hiopKKTLinSysMDS.cpp index 40830c13a..61040c16c 100644 --- a/src/Optimization/hiopKKTLinSysMDS.cpp +++ b/src/Optimization/hiopKKTLinSysMDS.cpp @@ -3,47 +3,47 @@ // Written by Cosmin G. Petra, petra1@llnl.gov. // LLNL-CODE-742473. All rights reserved. // -// This file is part of HiOp. For details, see https://github.com/LLNL/hiop. HiOp -// is released under the BSD 3-clause license (https://opensource.org/licenses/BSD-3-Clause). +// This file is part of HiOp. For details, see https://github.com/LLNL/hiop. HiOp +// is released under the BSD 3-clause license (https://opensource.org/licenses/BSD-3-Clause). // Please also read “Additional BSD Notice” below. // -// Redistribution and use in source and binary forms, with or without modification, +// Redistribution and use in source and binary forms, with or without modification, // are permitted provided that the following conditions are met: -// i. Redistributions of source code must retain the above copyright notice, this list +// i. Redistributions of source code must retain the above copyright notice, this list // of conditions and the disclaimer below. -// ii. Redistributions in binary form must reproduce the above copyright notice, -// this list of conditions and the disclaimer (as noted below) in the documentation and/or +// ii. Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the disclaimer (as noted below) in the documentation and/or // other materials provided with the distribution. -// iii. Neither the name of the LLNS/LLNL nor the names of its contributors may be used to -// endorse or promote products derived from this software without specific prior written +// iii. Neither the name of the LLNS/LLNL nor the names of its contributors may be used to +// endorse or promote products derived from this software without specific prior written // permission. // -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES -// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT -// SHALL LAWRENCE LIVERMORE NATIONAL SECURITY, LLC, THE U.S. DEPARTMENT OF ENERGY OR -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS -// OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED -// AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT +// SHALL LAWRENCE LIVERMORE NATIONAL SECURITY, LLC, THE U.S. DEPARTMENT OF ENERGY OR +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +// OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED +// AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, // EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Additional BSD Notice -// 1. This notice is required to be provided under our contract with the U.S. Department -// of Energy (DOE). This work was produced at Lawrence Livermore National Laboratory under +// 1. This notice is required to be provided under our contract with the U.S. Department +// of Energy (DOE). This work was produced at Lawrence Livermore National Laboratory under // Contract No. DE-AC52-07NA27344 with the DOE. -// 2. Neither the United States Government nor Lawrence Livermore National Security, LLC -// nor any of their employees, makes any warranty, express or implied, or assumes any -// liability or responsibility for the accuracy, completeness, or usefulness of any +// 2. Neither the United States Government nor Lawrence Livermore National Security, LLC +// nor any of their employees, makes any warranty, express or implied, or assumes any +// liability or responsibility for the accuracy, completeness, or usefulness of any // information, apparatus, product, or process disclosed, or represents that its use would // not infringe privately-owned rights. -// 3. Also, reference herein to any specific commercial products, process, or services by -// trade name, trademark, manufacturer or otherwise does not necessarily constitute or -// imply its endorsement, recommendation, or favoring by the United States Government or -// Lawrence Livermore National Security, LLC. The views and opinions of authors expressed -// herein do not necessarily state or reflect those of the United States Government or -// Lawrence Livermore National Security, LLC, and shall not be used for advertising or +// 3. Also, reference herein to any specific commercial products, process, or services by +// trade name, trademark, manufacturer or otherwise does not necessarily constitute or +// imply its endorsement, recommendation, or favoring by the United States Government or +// Lawrence Livermore National Security, LLC. The views and opinions of authors expressed +// herein do not necessarily state or reflect those of the United States Government or +// Lawrence Livermore National Security, LLC, and shall not be used for advertising or // product endorsement purposes. #include "hiopKKTLinSysMDS.hpp" @@ -56,429 +56,457 @@ namespace hiop { - hiopKKTLinSysCompressedMDSXYcYd::hiopKKTLinSysCompressedMDSXYcYd(hiopNlpFormulation* nlp) - : hiopKKTLinSysCompressedXYcYd(nlp), - rhs_(NULL), _buff_xs_(NULL), - Hxs_(NULL), Hxs_wrk_(nullptr), - HessMDS_(NULL), Jac_cMDS_(NULL), Jac_dMDS_(NULL), - write_linsys_counter_(-1), csr_writer_(nlp) - { - nlpMDS_ = dynamic_cast(nlp_); - assert(nlpMDS_); +hiopKKTLinSysCompressedMDSXYcYd::hiopKKTLinSysCompressedMDSXYcYd(hiopNlpFormulation* nlp) + : hiopKKTLinSysCompressedXYcYd(nlp), + rhs_(NULL), + _buff_xs_(NULL), + Hxs_(NULL), + Hxs_wrk_(nullptr), + HessMDS_(NULL), + Jac_cMDS_(NULL), + Jac_dMDS_(NULL), + write_linsys_counter_(-1), + csr_writer_(nlp) +{ + nlpMDS_ = dynamic_cast(nlp_); + assert(nlpMDS_); +} + +hiopKKTLinSysCompressedMDSXYcYd::~hiopKKTLinSysCompressedMDSXYcYd() +{ + delete rhs_; + delete _buff_xs_; + delete Hxs_; + delete Hxs_wrk_; +} + +int hiopKKTLinSysCompressedMDSXYcYd::factorizeWithCurvCheck() +{ + // factorization + int n_neg_eig = hiopKKTLinSysCurvCheck::factorizeWithCurvCheck(); + + int n_neg_eig_11 = 0; + if(n_neg_eig >= 0) { + // 'n_neg_eig' is the number of negative eigenvalues of the "dense" (reduced) KKT + // + // One can compute the number of negative eigenvalues of the whole MDS or XYcYd + // linear system using Haynsworth inertia additivity formula, namely, + // count the negative eigenvalues of the sparse Hessian block. + int n_neg_eig_Hxs = Hxs_->numOfElemsLessThan(-1e-14); + int n_zero_eig_Hxs = Hxs_->numOfElemsAbsLessThan(1e-14); + n_neg_eig_11 += n_neg_eig_Hxs; + if(n_zero_eig_Hxs > 0) { + n_neg_eig_11 = -1; + } } - hiopKKTLinSysCompressedMDSXYcYd::~hiopKKTLinSysCompressedMDSXYcYd() - { - delete rhs_; - delete _buff_xs_; - delete Hxs_; - delete Hxs_wrk_; + if(n_neg_eig_11 < 0) { + nlp_->log->printf(hovWarning, "KKT_MDS_XYcYd linsys: Detected null eigenvalues in (1,1) sparse block.\n"); + assert(n_neg_eig_11 == -1); + n_neg_eig = -1; + } else if(n_neg_eig_11 > 0) { + n_neg_eig += n_neg_eig_11; + nlp_->log->printf(hovScalars, "KKT_MDS_XYcYd linsys: Detected negative eigenvalues in (1,1) sparse block.\n"); + } + return n_neg_eig; +} + +bool hiopKKTLinSysCompressedMDSXYcYd::update(const hiopIterate* iter, + const hiopVector* grad_f, + const hiopMatrix* Jac_c, + const hiopMatrix* Jac_d, + hiopMatrix* Hess) +{ + if(!nlpMDS_) { + assert(false); + return false; } - int hiopKKTLinSysCompressedMDSXYcYd::factorizeWithCurvCheck() - { - //factorization - int n_neg_eig = hiopKKTLinSysCurvCheck::factorizeWithCurvCheck(); - - int n_neg_eig_11 = 0; - if(n_neg_eig>=0) { - // 'n_neg_eig' is the number of negative eigenvalues of the "dense" (reduced) KKT - // - // One can compute the number of negative eigenvalues of the whole MDS or XYcYd - // linear system using Haynsworth inertia additivity formula, namely, - // count the negative eigenvalues of the sparse Hessian block. - int n_neg_eig_Hxs = Hxs_->numOfElemsLessThan(-1e-14); - int n_zero_eig_Hxs = Hxs_->numOfElemsAbsLessThan(1e-14); - n_neg_eig_11 += n_neg_eig_Hxs; - if (n_zero_eig_Hxs > 0) - { - n_neg_eig_11 = -1; - } - } + nlp_->runStats.linsolv.reset(); + nlp_->runStats.tmSolverInternal.start(); + nlp_->runStats.kkt.tmUpdateInit.start(); - if(n_neg_eig_11 < 0) { - nlp_->log->printf(hovWarning, - "KKT_MDS_XYcYd linsys: Detected null eigenvalues in (1,1) sparse block.\n"); - assert(n_neg_eig_11 == -1); - n_neg_eig = -1; - } else if(n_neg_eig_11 > 0) { - n_neg_eig += n_neg_eig_11; - nlp_->log->printf(hovScalars, - "KKT_MDS_XYcYd linsys: Detected negative eigenvalues in (1,1) sparse block.\n"); - } - return n_neg_eig; + iter_ = iter; + grad_f_ = grad_f; + Jac_c_ = Jac_c; + Jac_d_ = Jac_d; + Hess_ = Hess; + + HessMDS_ = dynamic_cast(Hess); + if(!HessMDS_) { + assert(false); + return false; } - bool hiopKKTLinSysCompressedMDSXYcYd::update(const hiopIterate* iter, - const hiopVector* grad_f, - const hiopMatrix* Jac_c, - const hiopMatrix* Jac_d, - hiopMatrix* Hess) - { - if(!nlpMDS_) { assert(false); return false; } + Jac_cMDS_ = dynamic_cast(Jac_c); + if(!Jac_cMDS_) { + assert(false); + return false; + } - nlp_->runStats.linsolv.reset(); - nlp_->runStats.tmSolverInternal.start(); - nlp_->runStats.kkt.tmUpdateInit.start(); + Jac_dMDS_ = dynamic_cast(Jac_d); + if(!Jac_dMDS_) { + assert(false); + return false; + } - iter_ = iter; - grad_f_ = grad_f; - Jac_c_ = Jac_c; Jac_d_ = Jac_d; Hess_=Hess; + int nxs = HessMDS_->n_sp(), nxd = HessMDS_->n_de(), nx = HessMDS_->n(); + int neq = Jac_cMDS_->m(), nineq = Jac_dMDS_->m(); - HessMDS_ = dynamic_cast(Hess); - if(!HessMDS_) { assert(false); return false; } + assert(nx == nxs + nxd); + assert(nx == Jac_cMDS_->n_sp() + Jac_cMDS_->n_de()); + assert(nx == Jac_dMDS_->n_sp() + Jac_dMDS_->n_de()); - Jac_cMDS_ = dynamic_cast(Jac_c); - if(!Jac_cMDS_) { assert(false); return false; } + // + // based on safe_mode_, decide whether to go with the nopiv (fast) or Bunch-Kaufman (stable) linear solve + // + linSys_ = determineAndCreateLinsys(nxd, neq, nineq); - Jac_dMDS_ = dynamic_cast(Jac_d); - if(!Jac_dMDS_) { assert(false); return false; } + // + // update/compute KKT + // - int nxs = HessMDS_->n_sp(), nxd = HessMDS_->n_de(), nx = HessMDS_->n(); - int neq = Jac_cMDS_->m(), nineq = Jac_dMDS_->m(); + // Dx (<-- log-barrier diagonal, for both sparse (Dxs) and dense (Dxd) + assert(Dx_->get_local_size() == nxs + nxd); + Dx_->setToZero(); + Dx_->axdzpy_w_pattern(1.0, *iter->zl, *iter->sxl, nlp_->get_ixl()); + Dx_->axdzpy_w_pattern(1.0, *iter->zu, *iter->sxu, nlp_->get_ixu()); + nlp_->log->write("Dx in KKT", *Dx_, hovMatrices); - assert(nx==nxs+nxd); - assert(nx==Jac_cMDS_->n_sp()+Jac_cMDS_->n_de()); - assert(nx==Jac_dMDS_->n_sp()+Jac_dMDS_->n_de()); + nlp_->runStats.kkt.tmUpdateInit.stop(); - // - //based on safe_mode_, decide whether to go with the nopiv (fast) or Bunch-Kaufman (stable) linear solve - // - linSys_ = determineAndCreateLinsys(nxd, neq, nineq); + // + // factorization + inertia correction if needed + // + const bool retval = factorize(); - // - //update/compute KKT - // + nlp_->runStats.tmSolverInternal.stop(); + return retval; +} - //Dx (<-- log-barrier diagonal, for both sparse (Dxs) and dense (Dxd) - assert(Dx_->get_local_size() == nxs+nxd); - Dx_->setToZero(); - Dx_->axdzpy_w_pattern(1.0, *iter->zl, *iter->sxl, nlp_->get_ixl()); - Dx_->axdzpy_w_pattern(1.0, *iter->zu, *iter->sxu, nlp_->get_ixu()); - nlp_->log->write("Dx in KKT", *Dx_, hovMatrices); +bool hiopKKTLinSysCompressedMDSXYcYd::build_kkt_matrix(const hiopPDPerturbation& pdreg) +{ + assert(linSys_); + hiopLinSolverSymDense* linSys = dynamic_cast(linSys_); + assert(linSys); - nlp_->runStats.kkt.tmUpdateInit.stop(); + delta_wx_ = perturb_calc_->get_curr_delta_wx(); + delta_wd_ = perturb_calc_->get_curr_delta_wd(); + delta_cc_ = perturb_calc_->get_curr_delta_cc(); + delta_cd_ = perturb_calc_->get_curr_delta_cd(); - // - //factorization + inertia correction if needed - // - const bool retval = factorize(); - - nlp_->runStats.tmSolverInternal.stop(); - return retval; + int nxs = HessMDS_->n_sp(), nxd = HessMDS_->n_de(); + int neq = Jac_cMDS_->m(), nineq = Jac_dMDS_->m(); + + hiopMatrixDense& Msys = linSys->sysMatrix(); + if(perf_report_) { + nlp_->log->printf(hovSummary, "KKT_MDS_XYcYd linsys: Low-level linear system size: %d\n", Msys.n()); } + nlp_->runStats.kkt.tmUpdateLinsys.start(); - bool hiopKKTLinSysCompressedMDSXYcYd::build_kkt_matrix(const hiopPDPerturbation& pdreg) - { - assert(linSys_); - hiopLinSolverSymDense* linSys = dynamic_cast (linSys_); - assert(linSys); + // update linSys system matrix, including IC perturbations + Msys.setToZero(); - delta_wx_ = perturb_calc_->get_curr_delta_wx(); - delta_wd_ = perturb_calc_->get_curr_delta_wd(); - delta_cc_ = perturb_calc_->get_curr_delta_cc(); - delta_cd_ = perturb_calc_->get_curr_delta_cd(); + int alpha = 1.; - int nxs = HessMDS_->n_sp(), nxd = HessMDS_->n_de(); - int neq = Jac_cMDS_->m(), nineq = Jac_dMDS_->m(); + // perf eval + // hiopTimer tm; + // tm.start(); - hiopMatrixDense& Msys = linSys->sysMatrix(); - if(perf_report_) { - nlp_->log->printf(hovSummary, - "KKT_MDS_XYcYd linsys: Low-level linear system size: %d\n", - Msys.n()); - } + HessMDS_->de_mat()->addUpperTriangleToSymDenseMatrixUpperTriangle(0, alpha, Msys); + Jac_cMDS_->de_mat()->transAddToSymDenseMatrixUpperTriangle(0, nxd, alpha, Msys); + Jac_dMDS_->de_mat()->transAddToSymDenseMatrixUpperTriangle(0, nxd + neq, alpha, Msys); - nlp_->runStats.kkt.tmUpdateLinsys.start(); + // tm.stop(); + // printf("the three add methods took %g sec\n", tm.getElapsedTime()); + // tm.reset(); - // update linSys system matrix, including IC perturbations - Msys.setToZero(); - - int alpha = 1.; + // update -> add Dxd to (1,1) block of KKT matrix (Hd = HessMDS_->de_mat already added above) + Msys.addSubDiagonal(0, alpha, *Dx_, nxs, nxd); + // add perturbation 'delta_wx' for xd + Msys.addSubDiagonal(0, alpha, *delta_wx_, nxs, nxd); - // perf eval - //hiopTimer tm; - //tm.start(); + // build the diagonal Hxs = Hsparse+Dxs + if(NULL == Hxs_) { + Hxs_ = LinearAlgebraFactory::create_vector(nlp_->options->GetString("mem_space"), nxs); + Hxs_wrk_ = LinearAlgebraFactory::create_vector(nlp_->options->GetString("mem_space"), nxs); + assert(Hxs_); + } + Hxs_->startingAtCopyFromStartingAt(0, *Dx_, 0); + + // a good time to add the IC 'delta_wx' perturbation + Hxs_wrk_->startingAtCopyFromStartingAt(0, *delta_wx_, 0); + Hxs_->axpy(1., *Hxs_wrk_); + + // Hxs += diag(HessMDS->sp_mat()); + // todo: make sure we check that the HessMDS->sp_mat() is a diagonal + HessMDS_->sp_mat()->startingAtAddSubDiagonalToStartingAt(0, alpha, *Hxs_, 0); + nlp_->log->write("Hxs in KKT_MDS_X", *Hxs_, hovMatrices); + + // add - Jac_c_sp * (Hxs)^{-1} Jac_c_sp^T to diagonal block linSys starting at (nxd, nxd) + alpha = -1.; + + // perf eval + // tm.start(); + Jac_cMDS_->sp_mat()->addMDinvMtransToDiagBlockOfSymDeMatUTri(nxd, alpha, *Hxs_, Msys); + + // tm.stop(); + // printf("addMDinvMtransToDiagBlockOfSymDeMatUTri 111 took %g sec\n", tm.getElapsedTime()); + // tm.reset(); + + Msys.addSubDiagonal(-1., nxd, *delta_cc_); + + /* we've just done above the (1,1) and (2,2) blocks of + * + * [ Hd+Dxd+delta_wx*I Jcd^T Jdd^T ] + * [ Jcd -Jcs(Hs+Dxs+delta_wx*I)^{-1}Jcs^T-delta_cc*I K_21 ] + * [ Jdd K_21 M_{33} ] + * + * where + * K_21 = - Jcs * (Hs+Dxs+delta_wx)^{-1} * Jds^T + * + * M_{33} = -Jds(Hs+Dxs+delta_wx)^{-1}Jds^T - (Dd+delta_wd)*I^{-1} - delta_cd*I + * is performed below + */ + + alpha = -1.; + // add - Jac_d_sp * (Hxs+Dxs+delta_wx*I)^{-1} * Jac_d_sp^T to diagonal block + // linSys starting at (nxd+neq, nxd+neq) + + // perf eval + // tm.start(); + + Jac_dMDS_->sp_mat()->addMDinvMtransToDiagBlockOfSymDeMatUTri(nxd + neq, alpha, *Hxs_, Msys); + + // tm.stop(); + // printf("addMDinvMtransToDiagBlockOfSymDeMatUTri 222 took %g sec\n", tm.getElapsedTime()); + + // K_21 = - Jcs * (Hs+Dxs+delta_wx)^{-1} * Jds^T + alpha = -1.; + Jac_cMDS_->sp_mat()->addMDinvNtransToSymDeMatUTri(nxd, nxd + neq, alpha, *Hxs_, *Jac_dMDS_->sp_mat(), Msys); + + // add -{Dd}^{-1} + // Dd=(Sdl)^{-1}Vu + (Sdu)^{-1}Vu + delta_wd * I + Dd_inv_->copyFrom(*delta_wd_); + Dd_inv_->axdzpy_w_pattern(1.0, *iter_->vl, *iter_->sdl, nlp_->get_idl()); + Dd_inv_->axdzpy_w_pattern(1.0, *iter_->vu, *iter_->sdu, nlp_->get_idu()); +#ifdef HIOP_DEEPCHECKS + assert(true == Dd_inv_->allPositive()); +#endif + Dd_inv_->invert(); - HessMDS_->de_mat()->addUpperTriangleToSymDenseMatrixUpperTriangle(0, alpha, Msys); - Jac_cMDS_->de_mat()->transAddToSymDenseMatrixUpperTriangle(0, nxd, alpha, Msys); - Jac_dMDS_->de_mat()->transAddToSymDenseMatrixUpperTriangle(0, nxd+neq, alpha, Msys); + alpha = -1.; + Msys.addSubDiagonal(alpha, nxd + neq, *Dd_inv_); + Msys.addSubDiagonal(alpha, nxd + neq, *delta_cd_); - //tm.stop(); - //printf("the three add methods took %g sec\n", tm.getElapsedTime()); - //tm.reset(); + nlp_->log->write("KKT_MDS_XYcYd linsys:", Msys, hovMatrices); - //update -> add Dxd to (1,1) block of KKT matrix (Hd = HessMDS_->de_mat already added above) - Msys.addSubDiagonal(0, alpha, *Dx_, nxs, nxd); - //add perturbation 'delta_wx' for xd - Msys.addSubDiagonal(0, alpha, *delta_wx_, nxs, nxd); + nlp_->runStats.kkt.tmUpdateLinsys.stop(); - //build the diagonal Hxs = Hsparse+Dxs - if(NULL == Hxs_) { - Hxs_ = LinearAlgebraFactory::create_vector(nlp_->options->GetString("mem_space"), nxs); - Hxs_wrk_ = LinearAlgebraFactory::create_vector(nlp_->options->GetString("mem_space"), nxs); - assert(Hxs_); - } - Hxs_->startingAtCopyFromStartingAt(0, *Dx_, 0); - - //a good time to add the IC 'delta_wx' perturbation - Hxs_wrk_->startingAtCopyFromStartingAt(0, *delta_wx_, 0); - Hxs_->axpy(1., *Hxs_wrk_); - - //Hxs += diag(HessMDS->sp_mat()); - //todo: make sure we check that the HessMDS->sp_mat() is a diagonal - HessMDS_->sp_mat()->startingAtAddSubDiagonalToStartingAt(0, alpha, *Hxs_, 0); - nlp_->log->write("Hxs in KKT_MDS_X", *Hxs_, hovMatrices); - - //add - Jac_c_sp * (Hxs)^{-1} Jac_c_sp^T to diagonal block linSys starting at (nxd, nxd) - alpha = -1.; - - // perf eval - //tm.start(); - Jac_cMDS_->sp_mat()->addMDinvMtransToDiagBlockOfSymDeMatUTri(nxd, alpha, *Hxs_, Msys); - - //tm.stop(); - //printf("addMDinvMtransToDiagBlockOfSymDeMatUTri 111 took %g sec\n", tm.getElapsedTime()); - //tm.reset(); - - Msys.addSubDiagonal(-1., nxd, *delta_cc_); - - /* we've just done above the (1,1) and (2,2) blocks of - * - * [ Hd+Dxd+delta_wx*I Jcd^T Jdd^T ] - * [ Jcd -Jcs(Hs+Dxs+delta_wx*I)^{-1}Jcs^T-delta_cc*I K_21 ] - * [ Jdd K_21 M_{33} ] - * - * where - * K_21 = - Jcs * (Hs+Dxs+delta_wx)^{-1} * Jds^T - * - * M_{33} = -Jds(Hs+Dxs+delta_wx)^{-1}Jds^T - (Dd+delta_wd)*I^{-1} - delta_cd*I - * is performed below - */ - - alpha = -1.; - // add - Jac_d_sp * (Hxs+Dxs+delta_wx*I)^{-1} * Jac_d_sp^T to diagonal block - // linSys starting at (nxd+neq, nxd+neq) - - // perf eval - //tm.start(); - - Jac_dMDS_->sp_mat()-> - addMDinvMtransToDiagBlockOfSymDeMatUTri(nxd+neq, alpha, *Hxs_, Msys); - - //tm.stop(); - //printf("addMDinvMtransToDiagBlockOfSymDeMatUTri 222 took %g sec\n", tm.getElapsedTime()); - - //K_21 = - Jcs * (Hs+Dxs+delta_wx)^{-1} * Jds^T - alpha = -1.; - Jac_cMDS_->sp_mat()-> - addMDinvNtransToSymDeMatUTri(nxd, nxd+neq, alpha, *Hxs_, *Jac_dMDS_->sp_mat(), Msys); - - // add -{Dd}^{-1} - // Dd=(Sdl)^{-1}Vu + (Sdu)^{-1}Vu + delta_wd * I - Dd_inv_->copyFrom(*delta_wd_); - Dd_inv_->axdzpy_w_pattern(1.0, *iter_->vl, *iter_->sdl, nlp_->get_idl()); - Dd_inv_->axdzpy_w_pattern(1.0, *iter_->vu, *iter_->sdu, nlp_->get_idu()); -#ifdef HIOP_DEEPCHECKS - assert(true==Dd_inv_->allPositive()); -#endif - Dd_inv_->invert(); - - alpha=-1.; - Msys.addSubDiagonal(alpha, nxd+neq, *Dd_inv_); - Msys.addSubDiagonal(alpha, nxd+neq, *delta_cd_); - - nlp_->log->write("KKT_MDS_XYcYd linsys:", Msys, hovMatrices); - - nlp_->runStats.kkt.tmUpdateLinsys.stop(); - - //write matrix to file if requested - if(nlp_->options->GetString("write_kkt") == "yes") { - write_linsys_counter_++; - } - if(write_linsys_counter_>=0) { - csr_writer_.writeMatToFile(Msys, write_linsys_counter_, nxd+nxs, neq, nineq); - } - - return true; + // write matrix to file if requested + if(nlp_->options->GetString("write_kkt") == "yes") { + write_linsys_counter_++; + } + if(write_linsys_counter_ >= 0) { + csr_writer_.writeMatToFile(Msys, write_linsys_counter_, nxd + nxs, neq, nineq); } - bool hiopKKTLinSysCompressedMDSXYcYd:: - solveCompressed(hiopVector& rx, hiopVector& ryc, hiopVector& ryd, - hiopVector& dx, hiopVector& dyc, hiopVector& dyd) - { - hiopLinSolverSymDense* linSys = dynamic_cast (linSys_); - - if(!nlpMDS_) { assert(false); return false; } - if(!HessMDS_) { assert(false); return false; } - if(!Jac_cMDS_) { assert(false); return false; } - if(!Jac_dMDS_) { assert(false); return false; } - - nlp_->runStats.kkt.tmSolveRhsManip.start(); - - int nx=rx.get_size(), nyc=ryc.get_size(), nyd=ryd.get_size(); - int nxsp=Hxs_->get_size(); assert(nxsp<=nx); - int nxde = nlpMDS_->nx_de(); - assert(nxsp+nxde==nx); - if(rhs_ == NULL) { - rhs_ = LinearAlgebraFactory::create_vector(nlp_->options->GetString("mem_space"), nxde+nyc+nyd); - } - if(_buff_xs_==NULL) { - _buff_xs_ = LinearAlgebraFactory::create_vector(nlp_->options->GetString("mem_space"), nxsp); - } + return true; +} - nlp_->log->write("RHS KKT_MDS_XYcYd rx: ", rx, hovIteration); - nlp_->log->write("RHS KKT_MDS_XYcYd ryc:", ryc, hovIteration); - nlp_->log->write("RHS KKT_MDS_XYcYd ryd:", ryd, hovIteration); +bool hiopKKTLinSysCompressedMDSXYcYd::solveCompressed(hiopVector& rx, + hiopVector& ryc, + hiopVector& ryd, + hiopVector& dx, + hiopVector& dyc, + hiopVector& dyd) +{ + hiopLinSolverSymDense* linSys = dynamic_cast(linSys_); - hiopVector& rxs = *_buff_xs_; - //rxs = Hxs^{-1} * rx_sparse - rx.startingAtCopyToStartingAt(0, rxs, 0, nxsp); - rxs.componentDiv(*Hxs_); + if(!nlpMDS_) { + assert(false); + return false; + } + if(!HessMDS_) { + assert(false); + return false; + } + if(!Jac_cMDS_) { + assert(false); + return false; + } + if(!Jac_dMDS_) { + assert(false); + return false; + } - //ryc = ryc - Jac_c_sp * Hxs^{-1} * rxs - //use dyc as working buffer to avoid altering ryc, which refers directly in the hiopResidual class - assert(dyc.get_size()==ryc.get_size()); - dyc.copyFrom(ryc); - Jac_cMDS_->sp_mat()->timesVec(1.0, dyc, -1., rxs); + nlp_->runStats.kkt.tmSolveRhsManip.start(); - //ryd = ryd - Jac_d_sp * Hxs^{-1} * rxs - Jac_dMDS_->sp_mat()->timesVec(1.0, ryd, -1., rxs); + int nx = rx.get_size(), nyc = ryc.get_size(), nyd = ryd.get_size(); + int nxsp = Hxs_->get_size(); + assert(nxsp <= nx); + int nxde = nlpMDS_->nx_de(); + assert(nxsp + nxde == nx); + if(rhs_ == NULL) { + rhs_ = LinearAlgebraFactory::create_vector(nlp_->options->GetString("mem_space"), nxde + nyc + nyd); + } + if(_buff_xs_ == NULL) { + _buff_xs_ = LinearAlgebraFactory::create_vector(nlp_->options->GetString("mem_space"), nxsp); + } - // - // form the rhs for the MDS linSys - // - //rhs[0:nxde-1] = rx[nxs:(nxsp+nxde-1)] - rx.startingAtCopyToStartingAt(nxsp, *rhs_, 0, nxde); - //rhs[nxde:nxde+nyc-1] = ryc - dyc.copyToStarting(*rhs_, nxde); - //ths[nxde+nyc:nxde+nyc+nyd-1] = ryd - ryd.copyToStarting(*rhs_, nxde+nyc); - - if(write_linsys_counter_>=0) { - csr_writer_.writeRhsToFile(*rhs_, write_linsys_counter_); - } - - nlp_->runStats.kkt.tmSolveRhsManip.stop(); - nlp_->runStats.kkt.tmSolveInner.start(); - - // solve - bool linsol_ok = linSys->solve(*rhs_); - nlp_->runStats.kkt.tmSolveInner.stop(); - - if(perf_report_) { - nlp_->log->printf(hovSummary, "(summary for linear solver from KKT_MDS_XYcYd)\n%s", - nlp_->runStats.linsolv.get_summary_last_solve().c_str()); - } - - if(write_linsys_counter_>=0) { - csr_writer_.writeSolToFile(*rhs_, write_linsys_counter_); - } - if(false==linsol_ok) return false; - - nlp_->runStats.kkt.tmSolveRhsManip.start(); - - // unpack - rhs_->startingAtCopyToStartingAt(0, dx, nxsp, nxde); - rhs_->startingAtCopyToStartingAt(nxde, dyc, 0); - rhs_->startingAtCopyToStartingAt(nxde+nyc, dyd, 0); - - // compute dxs - hiopVector& dxs = *_buff_xs_; - // dxs = (Hxs)^{-1} ( rxs - Jac_c_sp^T dyc - Jac_d_sp^T dyd) - rx.startingAtCopyToStartingAt(0, dxs, 0, nxsp); - Jac_cMDS_->sp_mat()->transTimesVec(1., dxs, -1., dyc); - Jac_dMDS_->sp_mat()->transTimesVec(1., dxs, -1., dyd); - dxs.componentDiv(*Hxs_); - //copy to dx - dxs.startingAtCopyToStartingAt(0, dx, 0); - - nlp_->log->write("SOL KKT_MDS_XYcYd dx: ", dx, hovMatrices); - nlp_->log->write("SOL KKT_MDS_XYcYd dyc:", dyc, hovMatrices); - nlp_->log->write("SOL KKT_MDS_XYcYd dyd:", dyd, hovMatrices); - - nlp_->runStats.kkt.tmSolveRhsManip.stop(); - return true; + nlp_->log->write("RHS KKT_MDS_XYcYd rx: ", rx, hovIteration); + nlp_->log->write("RHS KKT_MDS_XYcYd ryc:", ryc, hovIteration); + nlp_->log->write("RHS KKT_MDS_XYcYd ryd:", ryd, hovIteration); + + hiopVector& rxs = *_buff_xs_; + // rxs = Hxs^{-1} * rx_sparse + rx.startingAtCopyToStartingAt(0, rxs, 0, nxsp); + rxs.componentDiv(*Hxs_); + + // ryc = ryc - Jac_c_sp * Hxs^{-1} * rxs + // use dyc as working buffer to avoid altering ryc, which refers directly in the hiopResidual class + assert(dyc.get_size() == ryc.get_size()); + dyc.copyFrom(ryc); + Jac_cMDS_->sp_mat()->timesVec(1.0, dyc, -1., rxs); + + // ryd = ryd - Jac_d_sp * Hxs^{-1} * rxs + Jac_dMDS_->sp_mat()->timesVec(1.0, ryd, -1., rxs); + + // + // form the rhs for the MDS linSys + // + // rhs[0:nxde-1] = rx[nxs:(nxsp+nxde-1)] + rx.startingAtCopyToStartingAt(nxsp, *rhs_, 0, nxde); + // rhs[nxde:nxde+nyc-1] = ryc + dyc.copyToStarting(*rhs_, nxde); + // ths[nxde+nyc:nxde+nyc+nyd-1] = ryd + ryd.copyToStarting(*rhs_, nxde + nyc); + + if(write_linsys_counter_ >= 0) { + csr_writer_.writeRhsToFile(*rhs_, write_linsys_counter_); } - hiopLinSolverSymDense* hiopKKTLinSysCompressedMDSXYcYd::determineAndCreateLinsys(int nxd, int neq, int nineq) - { + nlp_->runStats.kkt.tmSolveRhsManip.stop(); + nlp_->runStats.kkt.tmSolveInner.start(); + + // solve + bool linsol_ok = linSys->solve(*rhs_); + nlp_->runStats.kkt.tmSolveInner.stop(); + + if(perf_report_) { + nlp_->log->printf(hovSummary, + "(summary for linear solver from KKT_MDS_XYcYd)\n%s", + nlp_->runStats.linsolv.get_summary_last_solve().c_str()); + } + + if(write_linsys_counter_ >= 0) { + csr_writer_.writeSolToFile(*rhs_, write_linsys_counter_); + } + if(false == linsol_ok) return false; + + nlp_->runStats.kkt.tmSolveRhsManip.start(); + + // unpack + rhs_->startingAtCopyToStartingAt(0, dx, nxsp, nxde); + rhs_->startingAtCopyToStartingAt(nxde, dyc, 0); + rhs_->startingAtCopyToStartingAt(nxde + nyc, dyd, 0); + + // compute dxs + hiopVector& dxs = *_buff_xs_; + // dxs = (Hxs)^{-1} ( rxs - Jac_c_sp^T dyc - Jac_d_sp^T dyd) + rx.startingAtCopyToStartingAt(0, dxs, 0, nxsp); + Jac_cMDS_->sp_mat()->transTimesVec(1., dxs, -1., dyc); + Jac_dMDS_->sp_mat()->transTimesVec(1., dxs, -1., dyd); + dxs.componentDiv(*Hxs_); + // copy to dx + dxs.startingAtCopyToStartingAt(0, dx, 0); + + nlp_->log->write("SOL KKT_MDS_XYcYd dx: ", dx, hovMatrices); + nlp_->log->write("SOL KKT_MDS_XYcYd dyc:", dyc, hovMatrices); + nlp_->log->write("SOL KKT_MDS_XYcYd dyd:", dyd, hovMatrices); + + nlp_->runStats.kkt.tmSolveRhsManip.stop(); + return true; +} + +hiopLinSolverSymDense* hiopKKTLinSysCompressedMDSXYcYd::determineAndCreateLinsys(int nxd, int neq, int nineq) +{ #ifdef HIOP_USE_MAGMA - bool switched_linsolvers = false; - if(safe_mode_) { - hiopLinSolverSymDenseMagmaBuKa* p = dynamic_cast(linSys_); - if(p==NULL) { - //we have a nopiv linear solver or linear solver has not been created yet - if(linSys_) switched_linsolvers = true; - delete linSys_; - linSys_ = NULL; - } else { - return p; - } + bool switched_linsolvers = false; + if(safe_mode_) { + hiopLinSolverSymDenseMagmaBuKa* p = dynamic_cast(linSys_); + if(p == NULL) { + // we have a nopiv linear solver or linear solver has not been created yet + if(linSys_) switched_linsolvers = true; + delete linSys_; + linSys_ = NULL; } else { - hiopLinSolverSymDenseMagmaNopiv* p = dynamic_cast(linSys_); - if(p==NULL) { - //we have a BuKa linear solver or linear solver has not been created yet - if(linSys_) switched_linsolvers = true; - delete linSys_; - linSys_ = NULL; - } else { - return p; - } + return p; } + } else { + hiopLinSolverSymDenseMagmaNopiv* p = dynamic_cast(linSys_); + if(p == NULL) { + // we have a BuKa linear solver or linear solver has not been created yet + if(linSys_) switched_linsolvers = true; + delete linSys_; + linSys_ = NULL; + } else { + return p; + } + } #endif - if(NULL==linSys_) { - int n = nxd + neq + nineq; + if(NULL == linSys_) { + int n = nxd + neq + nineq; - if("cpu" == nlp_->options->GetString("compute_mode")) { - nlp_->log->printf(hovScalars, "KKT_MDS_XYcYd linsys: Lapack for a matrix of size %d [1]\n", n); - linSys_ = new hiopLinSolverSymDenseLapack(n, nlp_); - return dynamic_cast(linSys_); - } + if("cpu" == nlp_->options->GetString("compute_mode")) { + nlp_->log->printf(hovScalars, "KKT_MDS_XYcYd linsys: Lapack for a matrix of size %d [1]\n", n); + linSys_ = new hiopLinSolverSymDenseLapack(n, nlp_); + return dynamic_cast(linSys_); + } #ifdef HIOP_USE_MAGMA - if(nlp_->options->GetString("compute_mode")=="hybrid" || - nlp_->options->GetString("compute_mode")=="gpu" || - nlp_->options->GetString("compute_mode")=="auto") { - - if(safe_mode_) { - - auto hovLevel = hovScalars; - if(switched_linsolvers) hovLevel = hovWarning; - - nlp_->log->printf(hovLevel, - "KKT_MDS_XYcYd linsys: MagmaBuKa size %d (%d cons) (safe_mode=%d)\n", - n, neq+nineq, safe_mode_); - - linSys_ = new hiopLinSolverSymDenseMagmaBuKa(n, nlp_); - } else { - - auto hovLevel = hovScalars; - if(switched_linsolvers) hovLevel = hovWarning; - - nlp_->log->printf(hovLevel, - "KKT_MDS_XYcYd linsys: MagmaNopiv size %d (%d cons) (safe_mode=%d)\n", - n, neq+nineq, safe_mode_); - - linSys_ = new hiopLinSolverSymDenseMagmaNopiv(n, nlp_); - //hiopLinSolverSymDenseMagmaNopiv* p = new hiopLinSolverSymDenseMagmaNopiv(n, nlp_); - //linSys_ = p; - //p->set_fake_inertia(neq + nineq); - } + if(nlp_->options->GetString("compute_mode") == "hybrid" || nlp_->options->GetString("compute_mode") == "gpu" || + nlp_->options->GetString("compute_mode") == "auto") { + if(safe_mode_) { + auto hovLevel = hovScalars; + if(switched_linsolvers) hovLevel = hovWarning; + + nlp_->log->printf(hovLevel, + "KKT_MDS_XYcYd linsys: MagmaBuKa size %d (%d cons) (safe_mode=%d)\n", + n, + neq + nineq, + safe_mode_); + + linSys_ = new hiopLinSolverSymDenseMagmaBuKa(n, nlp_); } else { - nlp_->log->printf(hovScalars, "KKT_MDS_XYcYd linsys: Lapack for a matrix of size %d [2]\n", n); - linSys_ = new hiopLinSolverSymDenseLapack(n, nlp_); - return dynamic_cast(linSys_); + auto hovLevel = hovScalars; + if(switched_linsolvers) hovLevel = hovWarning; + + nlp_->log->printf(hovLevel, + "KKT_MDS_XYcYd linsys: MagmaNopiv size %d (%d cons) (safe_mode=%d)\n", + n, + neq + nineq, + safe_mode_); + + linSys_ = new hiopLinSolverSymDenseMagmaNopiv(n, nlp_); + // hiopLinSolverSymDenseMagmaNopiv* p = new hiopLinSolverSymDenseMagmaNopiv(n, nlp_); + // linSys_ = p; + // p->set_fake_inertia(neq + nineq); } -#else - nlp_->log->printf(hovScalars, "KKT_MDS_XYcYd linsys: Lapack for a matrix of size %d [3]\n", n); + } else { + nlp_->log->printf(hovScalars, "KKT_MDS_XYcYd linsys: Lapack for a matrix of size %d [2]\n", n); linSys_ = new hiopLinSolverSymDenseLapack(n, nlp_); return dynamic_cast(linSys_); -#endif } +#else + nlp_->log->printf(hovScalars, "KKT_MDS_XYcYd linsys: Lapack for a matrix of size %d [3]\n", n); + linSys_ = new hiopLinSolverSymDenseLapack(n, nlp_); return dynamic_cast(linSys_); +#endif } - -} // end of namespace + return dynamic_cast(linSys_); +} + +} // namespace hiop diff --git a/src/Optimization/hiopKKTLinSysMDS.hpp b/src/Optimization/hiopKKTLinSysMDS.hpp index 5fadccb08..5b9b75337 100644 --- a/src/Optimization/hiopKKTLinSysMDS.hpp +++ b/src/Optimization/hiopKKTLinSysMDS.hpp @@ -3,47 +3,47 @@ // Written by Cosmin G. Petra, petra1@llnl.gov. // LLNL-CODE-742473. All rights reserved. // -// This file is part of HiOp. For details, see https://github.com/LLNL/hiop. HiOp -// is released under the BSD 3-clause license (https://opensource.org/licenses/BSD-3-Clause). +// This file is part of HiOp. For details, see https://github.com/LLNL/hiop. HiOp +// is released under the BSD 3-clause license (https://opensource.org/licenses/BSD-3-Clause). // Please also read “Additional BSD Notice” below. // -// Redistribution and use in source and binary forms, with or without modification, +// Redistribution and use in source and binary forms, with or without modification, // are permitted provided that the following conditions are met: -// i. Redistributions of source code must retain the above copyright notice, this list +// i. Redistributions of source code must retain the above copyright notice, this list // of conditions and the disclaimer below. -// ii. Redistributions in binary form must reproduce the above copyright notice, -// this list of conditions and the disclaimer (as noted below) in the documentation and/or +// ii. Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the disclaimer (as noted below) in the documentation and/or // other materials provided with the distribution. -// iii. Neither the name of the LLNS/LLNL nor the names of its contributors may be used to -// endorse or promote products derived from this software without specific prior written +// iii. Neither the name of the LLNS/LLNL nor the names of its contributors may be used to +// endorse or promote products derived from this software without specific prior written // permission. // -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES -// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT -// SHALL LAWRENCE LIVERMORE NATIONAL SECURITY, LLC, THE U.S. DEPARTMENT OF ENERGY OR -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS -// OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED -// AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT +// SHALL LAWRENCE LIVERMORE NATIONAL SECURITY, LLC, THE U.S. DEPARTMENT OF ENERGY OR +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +// OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED +// AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, // EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Additional BSD Notice -// 1. This notice is required to be provided under our contract with the U.S. Department -// of Energy (DOE). This work was produced at Lawrence Livermore National Laboratory under +// 1. This notice is required to be provided under our contract with the U.S. Department +// of Energy (DOE). This work was produced at Lawrence Livermore National Laboratory under // Contract No. DE-AC52-07NA27344 with the DOE. -// 2. Neither the United States Government nor Lawrence Livermore National Security, LLC -// nor any of their employees, makes any warranty, express or implied, or assumes any -// liability or responsibility for the accuracy, completeness, or usefulness of any +// 2. Neither the United States Government nor Lawrence Livermore National Security, LLC +// nor any of their employees, makes any warranty, express or implied, or assumes any +// liability or responsibility for the accuracy, completeness, or usefulness of any // information, apparatus, product, or process disclosed, or represents that its use would // not infringe privately-owned rights. -// 3. Also, reference herein to any specific commercial products, process, or services by -// trade name, trademark, manufacturer or otherwise does not necessarily constitute or -// imply its endorsement, recommendation, or favoring by the United States Government or -// Lawrence Livermore National Security, LLC. The views and opinions of authors expressed -// herein do not necessarily state or reflect those of the United States Government or -// Lawrence Livermore National Security, LLC, and shall not be used for advertising or +// 3. Also, reference herein to any specific commercial products, process, or services by +// trade name, trademark, manufacturer or otherwise does not necessarily constitute or +// imply its endorsement, recommendation, or favoring by the United States Government or +// Lawrence Livermore National Security, LLC. The views and opinions of authors expressed +// herein do not necessarily state or reflect those of the United States Government or +// Lawrence Livermore National Security, LLC, and shall not be used for advertising or // product endorsement purposes. #ifndef HIOP_KKTLINSYSMDS @@ -57,8 +57,7 @@ namespace hiop { - -/* +/* * Solves KKTLinSysCompressedXYcYd by exploiting the mixed dense-sparse (MDS) * structure of the problem * @@ -72,19 +71,19 @@ namespace hiop * [ 0 Hd+Dxd Jcd^T Jdd^T ] [dxd] [ rxd_tilde ] * [ Jcs Jcd 0 0 ] [dyc] = [ ryc ] * [ Jds Jdd 0 -Dd^{-1} ] [dyd] [ ryd_tilde] - * where + * where * - Jcs and Jds contain the sparse columns of the Jacobians Jc and Jd * - Jcd and Jdd contain the dense columns of the Jacobians Jc and Jd * - Hs is a diagonal matrix (sparse part of the Hessian) * - Hd is the dense part of the Hessian - * - Dxs and Dxd are diagonals corresponding to sparse (xs) and dense (xd) + * - Dxs and Dxd are diagonals corresponding to sparse (xs) and dense (xd) * variables in the log-barrier diagonal Dx, respectively * * 'solveCompressed' performs a reduction to - * [ Hd+Dxd Jcd^T Jdd^T ] [dxd] - * [ Jcd -Jcs(Hs+Dxs)^{-1}Jcs^T K_21 ] [dyc] = - * [ Jdd K_21^T -Jds(Hs+Dxs)^{-1}Jds^T-Dd^{-1} ] [dyd] - * + * [ Hd+Dxd Jcd^T Jdd^T ] [dxd] + * [ Jcd -Jcs(Hs+Dxs)^{-1}Jcs^T K_21 ] [dyc] = + * [ Jdd K_21^T -Jds(Hs+Dxs)^{-1}Jds^T-Dd^{-1} ] [dyd] + * * [ rxd_tilde ] * = [ ryc - Jcs(Hs+Dxs)^{-1}rxs_tilde ] * [ ryd_tilde - Jds(Hs+Dxs)^{-1}rxs_tilde ] @@ -102,35 +101,40 @@ class hiopKKTLinSysCompressedMDSXYcYd : public hiopKKTLinSysCompressedXYcYd virtual int factorizeWithCurvCheck(); - virtual bool update(const hiopIterate* iter, - const hiopVector* grad_f, - const hiopMatrix* Jac_c, const hiopMatrix* Jac_d, - hiopMatrix* Hess); + virtual bool update(const hiopIterate* iter, + const hiopVector* grad_f, + const hiopMatrix* Jac_c, + const hiopMatrix* Jac_d, + hiopMatrix* Hess); virtual bool build_kkt_matrix(const hiopPDPerturbation& pdreg); - virtual bool solveCompressed(hiopVector& rx, hiopVector& ryc, hiopVector& ryd, - hiopVector& dx, hiopVector& dyc, hiopVector& dyd); + virtual bool solveCompressed(hiopVector& rx, + hiopVector& ryc, + hiopVector& ryd, + hiopVector& dx, + hiopVector& dyc, + hiopVector& dyd); protected: - hiopVector *rhs_; //[rxdense, ryc, ryd] - hiopVector *_buff_xs_; //an auxiliary buffer + hiopVector* rhs_; //[rxdense, ryc, ryd] + hiopVector* _buff_xs_; // an auxiliary buffer // - //from the parent class we also use + // from the parent class we also use // // hiopVectorPar *Dd_inv; // hiopVectorPar *ryd_tilde; - //from the parent's parent class (hiopKKTLinSysCompressed) we also use - // hiopVectorPar *Dx; - // hiopVectorPar *rx_tilde; + // from the parent's parent class (hiopKKTLinSysCompressed) we also use + // hiopVectorPar *Dx; + // hiopVectorPar *rx_tilde; // Keeps Hxs = HessMDS->sp_mat() + Dxs (Dx=log-barrier diagonal for xs) - hiopVector *Hxs_; - hiopVector *Hxs_wrk_; + hiopVector* Hxs_; + hiopVector* Hxs_wrk_; - //just dynamic_cast-ed pointers + // just dynamic_cast-ed pointers hiopNlpMDS* nlpMDS_; hiopMatrixSymBlockDiagMDS* HessMDS_; const hiopMatrixMDS* Jac_cMDS_; @@ -138,14 +142,14 @@ class hiopKKTLinSysCompressedMDSXYcYd : public hiopKKTLinSysCompressedXYcYd // -1 when disabled; otherwise acts like a counter, 0,1,... incremented each time // 'solveCompressed' is called; activated by the 'write_kkt' option - int write_linsys_counter_; + int write_linsys_counter_; hiopCSR_IO csr_writer_; private: - //placeholder for the code that decides which linear solver to used based on safe_mode_ + // placeholder for the code that decides which linear solver to used based on safe_mode_ hiopLinSolverSymDense* determineAndCreateLinsys(int nxd, int neq, int nineq); }; -} // end of namespace +} // namespace hiop #endif diff --git a/src/Optimization/hiopKKTLinSysSparse.cpp b/src/Optimization/hiopKKTLinSysSparse.cpp index c41a8579a..e9af09d04 100644 --- a/src/Optimization/hiopKKTLinSysSparse.cpp +++ b/src/Optimization/hiopKKTLinSysSparse.cpp @@ -69,729 +69,776 @@ namespace hiop { - /* ************************************************************************* - * For class hiopKKTLinSysCompressedSparseXYcYd - * ************************************************************************* - */ - hiopKKTLinSysCompressedSparseXYcYd::hiopKKTLinSysCompressedSparseXYcYd(hiopNlpFormulation* nlp) - : hiopKKTLinSysCompressedXYcYd(nlp), rhs_(NULL), - Hx_(NULL), HessSp_(NULL), Jac_cSp_(NULL), Jac_dSp_(NULL), - write_linsys_counter_(-1), csr_writer_(nlp) - { - nlpSp_ = dynamic_cast(nlp_); - assert(nlpSp_); +/* ************************************************************************* + * For class hiopKKTLinSysCompressedSparseXYcYd + * ************************************************************************* + */ +hiopKKTLinSysCompressedSparseXYcYd::hiopKKTLinSysCompressedSparseXYcYd(hiopNlpFormulation* nlp) + : hiopKKTLinSysCompressedXYcYd(nlp), + rhs_(NULL), + Hx_(NULL), + HessSp_(NULL), + Jac_cSp_(NULL), + Jac_dSp_(NULL), + write_linsys_counter_(-1), + csr_writer_(nlp) +{ + nlpSp_ = dynamic_cast(nlp_); + assert(nlpSp_); +} + +hiopKKTLinSysCompressedSparseXYcYd::~hiopKKTLinSysCompressedSparseXYcYd() +{ + delete rhs_; + delete Hx_; +} + +bool hiopKKTLinSysCompressedSparseXYcYd::build_kkt_matrix(const hiopPDPerturbation& pdreg) +{ + delta_wx_ = perturb_calc_->get_curr_delta_wx(); + delta_wd_ = perturb_calc_->get_curr_delta_wd(); + delta_cc_ = perturb_calc_->get_curr_delta_cc(); + delta_cd_ = perturb_calc_->get_curr_delta_cd(); + + HessSp_ = dynamic_cast(Hess_); + if(!HessSp_) { + assert(false); + return false; } - hiopKKTLinSysCompressedSparseXYcYd::~hiopKKTLinSysCompressedSparseXYcYd() - { - delete rhs_; - delete Hx_; + Jac_cSp_ = dynamic_cast(Jac_c_); + if(!Jac_cSp_) { + assert(false); + return false; + } + + Jac_dSp_ = dynamic_cast(Jac_d_); + if(!Jac_dSp_) { + assert(false); + return false; } - bool hiopKKTLinSysCompressedSparseXYcYd::build_kkt_matrix(const hiopPDPerturbation& pdreg) + size_type nx = HessSp_->n(), neq = Jac_cSp_->m(), nineq = Jac_dSp_->m(); + int nnz = HessSp_->numberOfNonzeros() + Jac_cSp_->numberOfNonzeros() + Jac_dSp_->numberOfNonzeros(); + nnz += nx + neq + nineq; + + linSys_ = determineAndCreateLinsys(nx, neq, nineq, nnz); + + auto* linSys = dynamic_cast(linSys_); + assert(linSys); + + auto* Msys = dynamic_cast(linSys->sys_matrix()); + assert(Msys); + + if(perf_report_) { + nlp_->log->printf(hovSummary, "KKT_Sparse_XYcYd linsys: Low-level linear system size: %d\n", Msys->n()); + } + + // update linSys system matrix, including IC perturbations { - delta_wx_ = perturb_calc_->get_curr_delta_wx(); - delta_wd_ = perturb_calc_->get_curr_delta_wd(); - delta_cc_ = perturb_calc_->get_curr_delta_cc(); - delta_cd_ = perturb_calc_->get_curr_delta_cd(); - - HessSp_ = dynamic_cast(Hess_); - if(!HessSp_) { assert(false); return false; } - - Jac_cSp_ = dynamic_cast(Jac_c_); - if(!Jac_cSp_) { assert(false); return false; } - - Jac_dSp_ = dynamic_cast(Jac_d_); - if(!Jac_dSp_) { assert(false); return false; } - - size_type nx = HessSp_->n(), neq=Jac_cSp_->m(), nineq=Jac_dSp_->m(); - int nnz = HessSp_->numberOfNonzeros() + Jac_cSp_->numberOfNonzeros() + Jac_dSp_->numberOfNonzeros(); - nnz += nx + neq + nineq; - - linSys_ = determineAndCreateLinsys(nx, neq, nineq, nnz); - - auto* linSys = dynamic_cast (linSys_); - assert(linSys); - - auto* Msys = dynamic_cast(linSys->sys_matrix()); - assert(Msys); - - if(perf_report_) { - nlp_->log->printf(hovSummary, - "KKT_Sparse_XYcYd linsys: Low-level linear system size: %d\n", - Msys->n()); + nlp_->runStats.kkt.tmUpdateLinsys.start(); + + Msys->setToZero(); + + // copy Jac and Hes to the full iterate matrix + size_type dest_nnz_st{0}; + Msys->copyRowsBlockFrom(*HessSp_, 0, nx, 0, dest_nnz_st); + dest_nnz_st += HessSp_->numberOfNonzeros(); + Msys->copyRowsBlockFrom(*Jac_cSp_, 0, neq, nx, dest_nnz_st); + dest_nnz_st += Jac_cSp_->numberOfNonzeros(); + Msys->copyRowsBlockFrom(*Jac_dSp_, 0, nineq, nx + neq, dest_nnz_st); + dest_nnz_st += Jac_dSp_->numberOfNonzeros(); + + // build the diagonal Hx = Dx + delta_wx + if(NULL == Hx_) { + Hx_ = LinearAlgebraFactory::create_vector(nlp_->options->GetString("mem_space"), nx); + assert(Hx_); } + Hx_->startingAtCopyFromStartingAt(0, *Dx_, 0); - // update linSys system matrix, including IC perturbations - { - nlp_->runStats.kkt.tmUpdateLinsys.start(); - - Msys->setToZero(); - - // copy Jac and Hes to the full iterate matrix - size_type dest_nnz_st{0}; - Msys->copyRowsBlockFrom(*HessSp_, 0, nx, 0, dest_nnz_st); - dest_nnz_st += HessSp_->numberOfNonzeros(); - Msys->copyRowsBlockFrom(*Jac_cSp_, 0, neq, nx, dest_nnz_st); - dest_nnz_st += Jac_cSp_->numberOfNonzeros(); - Msys->copyRowsBlockFrom(*Jac_dSp_, 0, nineq, nx+neq, dest_nnz_st); - dest_nnz_st += Jac_dSp_->numberOfNonzeros(); - - //build the diagonal Hx = Dx + delta_wx - if(NULL == Hx_) { - Hx_ = LinearAlgebraFactory::create_vector(nlp_->options->GetString("mem_space"), nx); - assert(Hx_); - } - Hx_->startingAtCopyFromStartingAt(0, *Dx_, 0); + // a good time to add the IC 'delta_wx' perturbation + Hx_->axpy(1., *delta_wx_); - //a good time to add the IC 'delta_wx' perturbation - Hx_->axpy(1., *delta_wx_); + Msys->copySubDiagonalFrom(0, nx, *Hx_, dest_nnz_st); + dest_nnz_st += nx; - Msys->copySubDiagonalFrom(0, nx, *Hx_, dest_nnz_st); dest_nnz_st += nx; + // add -delta_cc to diagonal block linSys starting at (nx, nx) + Msys->copySubDiagonalFrom(nx, neq, *delta_cc_, dest_nnz_st, -1.); + dest_nnz_st += neq; - //add -delta_cc to diagonal block linSys starting at (nx, nx) - Msys->copySubDiagonalFrom(nx, neq, *delta_cc_, dest_nnz_st, -1.); dest_nnz_st += neq; + /* we've just done above the (1,1) and (2,2) blocks of + * + * [ Hx+Dxd+delta_wx*I Jcd^T Jdd^T ] + * [ Jcd -delta_cc*I 0 ] + * [ Jdd 0 M_{33} ] + * + * where + * M_{33} = - (Dd+delta_wd)*I^{-1} - delta_cd*I = - Dd_inv - delta_cd*I is performed below + */ - /* we've just done above the (1,1) and (2,2) blocks of - * - * [ Hx+Dxd+delta_wx*I Jcd^T Jdd^T ] - * [ Jcd -delta_cc*I 0 ] - * [ Jdd 0 M_{33} ] - * - * where - * M_{33} = - (Dd+delta_wd)*I^{-1} - delta_cd*I = - Dd_inv - delta_cd*I is performed below - */ - - // Dd = (Sdl)^{-1}Vu + (Sdu)^{-1}Vu + delta_wd * I - Dd_inv_->axpy(1., *delta_wd_); - Dd_inv_->axdzpy_w_pattern(1.0, *iter_->vl, *iter_->sdl, nlp_->get_idl()); - Dd_inv_->axdzpy_w_pattern(1.0, *iter_->vu, *iter_->sdu, nlp_->get_idu()); + // Dd = (Sdl)^{-1}Vu + (Sdu)^{-1}Vu + delta_wd * I + Dd_inv_->axpy(1., *delta_wd_); + Dd_inv_->axdzpy_w_pattern(1.0, *iter_->vl, *iter_->sdl, nlp_->get_idl()); + Dd_inv_->axdzpy_w_pattern(1.0, *iter_->vu, *iter_->sdu, nlp_->get_idu()); #ifdef HIOP_DEEPCHECKS - assert(true==Dd_inv_->allPositive()); + assert(true == Dd_inv_->allPositive()); #endif - Dd_inv_->invert(); - Dd_inv_->axpy(1., *delta_cd_); - - Msys->copySubDiagonalFrom(nx+neq, nineq, *Dd_inv_, dest_nnz_st, -1); dest_nnz_st += nineq; + Dd_inv_->invert(); + Dd_inv_->axpy(1., *delta_cd_); + Msys->copySubDiagonalFrom(nx + neq, nineq, *Dd_inv_, dest_nnz_st, -1); + dest_nnz_st += nineq; - nlp_->log->write("KKT_SPARSE_XYcYd linsys:", *Msys, hovMatrices); - nlp_->runStats.kkt.tmUpdateLinsys.stop(); - } // end of update of the linear system + nlp_->log->write("KKT_SPARSE_XYcYd linsys:", *Msys, hovMatrices); + nlp_->runStats.kkt.tmUpdateLinsys.stop(); + } // end of update of the linear system - //write matrix to file if requested - if(nlp_->options->GetString("write_kkt") == "yes") { - write_linsys_counter_++; - } - if(write_linsys_counter_>=0) { + // write matrix to file if requested + if(nlp_->options->GetString("write_kkt") == "yes") { + write_linsys_counter_++; + } + if(write_linsys_counter_ >= 0) { #ifndef HIOP_USE_GPU - auto* MsysSp = dynamic_cast(linSys->sys_matrix()); - csr_writer_.writeMatToFile(*MsysSp, write_linsys_counter_, nx, neq, nineq); + auto* MsysSp = dynamic_cast(linSys->sys_matrix()); + csr_writer_.writeMatToFile(*MsysSp, write_linsys_counter_, nx, neq, nineq); #else - //TODO csr_writer_.writeMatToFile(*Msys, write_linsys_counter_, nx, neq, nineq); + // TODO csr_writer_.writeMatToFile(*Msys, write_linsys_counter_, nx, neq, nineq); #endif - } + } - return true; + return true; +} + +bool hiopKKTLinSysCompressedSparseXYcYd::solveCompressed(hiopVector& rx, + hiopVector& ryc, + hiopVector& ryd, + hiopVector& dx, + hiopVector& dyc, + hiopVector& dyd) +{ + if(!nlpSp_) { + assert(false); + return false; + } + if(!HessSp_) { + assert(false); + return false; + } + if(!Jac_cSp_) { + assert(false); + return false; + } + if(!Jac_dSp_) { + assert(false); + return false; } - bool hiopKKTLinSysCompressedSparseXYcYd:: - solveCompressed(hiopVector& rx, hiopVector& ryc, hiopVector& ryd, - hiopVector& dx, hiopVector& dyc, hiopVector& dyd) - { - if(!nlpSp_) { assert(false); return false; } - if(!HessSp_) { assert(false); return false; } - if(!Jac_cSp_) { assert(false); return false; } - if(!Jac_dSp_) { assert(false); return false; } - - nlp_->runStats.kkt.tmSolveRhsManip.start(); - - int nx=rx.get_size(), nyc=ryc.get_size(), nyd=ryd.get_size(); - int nxsp=Hx_->get_size(); - assert(nxsp==nx); - if(rhs_ == NULL) { - rhs_ = LinearAlgebraFactory::create_vector(nlp_->options->GetString("mem_space"), - nx+nyc+nyd); - } + nlp_->runStats.kkt.tmSolveRhsManip.start(); - nlp_->log->write("RHS KKT_SPARSE_XYcYd rx: ", rx, hovIteration); - nlp_->log->write("RHS KKT_SPARSE_XYcYd ryc:", ryc, hovIteration); - nlp_->log->write("RHS KKT_SPARSE_XYcYd ryd:", ryd, hovIteration); + int nx = rx.get_size(), nyc = ryc.get_size(), nyd = ryd.get_size(); + int nxsp = Hx_->get_size(); + assert(nxsp == nx); + if(rhs_ == NULL) { + rhs_ = LinearAlgebraFactory::create_vector(nlp_->options->GetString("mem_space"), nx + nyc + nyd); + } - // - // form the rhs for the sparse linSys - // - rx.copyToStarting(*rhs_, 0); - ryc.copyToStarting(*rhs_, nx); - ryd.copyToStarting(*rhs_, nx+nyc); + nlp_->log->write("RHS KKT_SPARSE_XYcYd rx: ", rx, hovIteration); + nlp_->log->write("RHS KKT_SPARSE_XYcYd ryc:", ryc, hovIteration); + nlp_->log->write("RHS KKT_SPARSE_XYcYd ryd:", ryd, hovIteration); - if(write_linsys_counter_>=0) { - csr_writer_.writeRhsToFile(*rhs_, write_linsys_counter_); - } - nlp_->runStats.kkt.tmSolveRhsManip.stop(); - - nlp_->runStats.kkt.tmSolveInner.start(); - // - // solve - // - bool linsol_ok = linSys_->solve(*rhs_); - nlp_->runStats.kkt.tmSolveInner.stop(); - - if(perf_report_) { - nlp_->log->printf(hovSummary, "(summary for linear solver from KKT_SPARSE_XYcYd)\n%s", - nlp_->runStats.linsolv.get_summary_last_solve().c_str()); - } + // + // form the rhs for the sparse linSys + // + rx.copyToStarting(*rhs_, 0); + ryc.copyToStarting(*rhs_, nx); + ryd.copyToStarting(*rhs_, nx + nyc); - if(write_linsys_counter_>=0) { - csr_writer_.writeSolToFile(*rhs_, write_linsys_counter_); - } - if(false==linsol_ok) return false; + if(write_linsys_counter_ >= 0) { + csr_writer_.writeRhsToFile(*rhs_, write_linsys_counter_); + } + nlp_->runStats.kkt.tmSolveRhsManip.stop(); + + nlp_->runStats.kkt.tmSolveInner.start(); + // + // solve + // + bool linsol_ok = linSys_->solve(*rhs_); + nlp_->runStats.kkt.tmSolveInner.stop(); + + if(perf_report_) { + nlp_->log->printf(hovSummary, + "(summary for linear solver from KKT_SPARSE_XYcYd)\n%s", + nlp_->runStats.linsolv.get_summary_last_solve().c_str()); + } - nlp_->runStats.kkt.tmSolveRhsManip.start(); + if(write_linsys_counter_ >= 0) { + csr_writer_.writeSolToFile(*rhs_, write_linsys_counter_); + } + if(false == linsol_ok) return false; - // - // unpack - // - rhs_->startingAtCopyToStartingAt(0, dx, 0); - rhs_->startingAtCopyToStartingAt(nx, dyc, 0); - rhs_->startingAtCopyToStartingAt(nx+nyc, dyd, 0); + nlp_->runStats.kkt.tmSolveRhsManip.start(); - nlp_->log->write("SOL KKT_SPARSE_XYcYd dx: ", dx, hovMatrices); - nlp_->log->write("SOL KKT_SPARSE_XYcYd dyc:", dyc, hovMatrices); - nlp_->log->write("SOL KKT_SPARSE_XYcYd dyd:", dyd, hovMatrices); + // + // unpack + // + rhs_->startingAtCopyToStartingAt(0, dx, 0); + rhs_->startingAtCopyToStartingAt(nx, dyc, 0); + rhs_->startingAtCopyToStartingAt(nx + nyc, dyd, 0); - nlp_->runStats.kkt.tmSolveRhsManip.stop(); - return true; - } + nlp_->log->write("SOL KKT_SPARSE_XYcYd dx: ", dx, hovMatrices); + nlp_->log->write("SOL KKT_SPARSE_XYcYd dyc:", dyc, hovMatrices); + nlp_->log->write("SOL KKT_SPARSE_XYcYd dyd:", dyd, hovMatrices); - hiopLinSolverSymSparse* - hiopKKTLinSysCompressedSparseXYcYd::determineAndCreateLinsys(int nx, int neq, int nineq, int nnz) - { - if(nullptr==linSys_) { - int n = nx + neq + nineq; + nlp_->runStats.kkt.tmSolveRhsManip.stop(); + return true; +} - assert(false == safe_mode_ && "KKT_SPARSE_XYcYd linsys does not support safe mode."); +hiopLinSolverSymSparse* hiopKKTLinSysCompressedSparseXYcYd::determineAndCreateLinsys(int nx, int neq, int nineq, int nnz) +{ + if(nullptr == linSys_) { + int n = nx + neq + nineq; + + assert(false == safe_mode_ && "KKT_SPARSE_XYcYd linsys does not support safe mode."); - auto compute_mode = nlp_->options->GetString("compute_mode"); - auto linear_solver = nlp_->options->GetString("linear_solver_sparse"); - std::string linsol_actual = "[?]"; - if(compute_mode == "cpu") { - //////////////////////////////////////////////////////////////////////////////////////////////// - // compute mode CPU - //////////////////////////////////////////////////////////////////////////////////////////////// - assert(nullptr == linSys_); + auto compute_mode = nlp_->options->GetString("compute_mode"); + auto linear_solver = nlp_->options->GetString("linear_solver_sparse"); + std::string linsol_actual = "[?]"; + if(compute_mode == "cpu") { + //////////////////////////////////////////////////////////////////////////////////////////////// + // compute mode CPU + //////////////////////////////////////////////////////////////////////////////////////////////// + assert(nullptr == linSys_); - if(linear_solver == "ma57" || linear_solver == "auto") { + if(linear_solver == "ma57" || linear_solver == "auto") { #ifdef HIOP_USE_COINHSL - linsol_actual = "MA57"; - linSys_ = new hiopLinSolverSymSparseMA57(n, nnz, nlp_); -#endif // HIOP_USE_COINHSL - } + linsol_actual = "MA57"; + linSys_ = new hiopLinSolverSymSparseMA57(n, nnz, nlp_); +#endif // HIOP_USE_COINHSL + } - if( (nullptr == linSys_ && linear_solver == "auto") || linear_solver == "pardiso") { - //ma57 is not available or user requested pardiso + if((nullptr == linSys_ && linear_solver == "auto") || linear_solver == "pardiso") { + // ma57 is not available or user requested pardiso #ifdef HIOP_USE_PARDISO - linsol_actual = "PARDISO"; - linSys_ = new hiopLinSolverSymSparsePARDISO(n, nnz, nlp_); -#endif // HIOP_USE_PARDISO - } + linsol_actual = "PARDISO"; + linSys_ = new hiopLinSolverSymSparsePARDISO(n, nnz, nlp_); +#endif // HIOP_USE_PARDISO + } - if( (nullptr == linSys_ && linear_solver == "auto") || linear_solver == "strumpack") { - //ma57 and pardiso are not available or user requested strumpack + if((nullptr == linSys_ && linear_solver == "auto") || linear_solver == "strumpack") { + // ma57 and pardiso are not available or user requested strumpack #ifdef HIOP_USE_STRUMPACK - linsol_actual = "STRUMPACK"; - linSys_ = new hiopLinSolverSymSparseSTRUMPACK(n, nnz, nlp_); - - auto* fact_acceptor_ic = dynamic_cast (fact_acceptor_); - if(fact_acceptor_ic) { - nlp_->log->printf(hovError, - "KKT_SPARSE_XYcYd linsys with STRUMPACK does not support inertia correction. " - "Please set option 'fact_acceptor' to 'inertia_free'.\n"); - assert(false); - return nullptr; - } -#endif // HIOP_USE_STRUMPACK + linsol_actual = "STRUMPACK"; + linSys_ = new hiopLinSolverSymSparseSTRUMPACK(n, nnz, nlp_); + + auto* fact_acceptor_ic = dynamic_cast(fact_acceptor_); + if(fact_acceptor_ic) { + nlp_->log->printf(hovError, + "KKT_SPARSE_XYcYd linsys with STRUMPACK does not support inertia correction. " + "Please set option 'fact_acceptor' to 'inertia_free'.\n"); + assert(false); + return nullptr; } +#endif // HIOP_USE_STRUMPACK + } - if( (nullptr == linSys_ && linear_solver == "auto") || linear_solver == "ginkgo") { - //ma57, pardiso and strumpack are not available or user requested ginkgo -#ifdef HIOP_USE_GINKGO - nlp_->log->printf(hovScalars, - "KKT_SPARSE_XYcYd linsys: alloc GINKGO with matrix size %d (%d cons)\n", - n, neq+nineq); - linSys_ = new hiopLinSolverSymSparseGinkgo(n, nnz, nlp_); -#endif // HIOP_USE_GINKGO - } + if((nullptr == linSys_ && linear_solver == "auto") || linear_solver == "ginkgo") { + // ma57, pardiso and strumpack are not available or user requested ginkgo +#ifdef HIOP_USE_GINKGO + nlp_->log->printf(hovScalars, + "KKT_SPARSE_XYcYd linsys: alloc GINKGO with matrix size %d (%d cons)\n", + n, + neq + nineq); + linSys_ = new hiopLinSolverSymSparseGinkgo(n, nnz, nlp_); +#endif // HIOP_USE_GINKGO + } - } else { //hybrid / gpu - //////////////////////////////////////////////////////////////////////////////////////////////// - // compute mode hybrid or gpu - //////////////////////////////////////////////////////////////////////////////////////////////// - assert(false == safe_mode_); - assert(nullptr == linSys_); - assert(compute_mode != "gpu" && - "KKT_SPARSE_XYcYd linsys: GPU compute mode not supported at this time."); + } else { // hybrid / gpu + //////////////////////////////////////////////////////////////////////////////////////////////// + // compute mode hybrid or gpu + //////////////////////////////////////////////////////////////////////////////////////////////// + assert(false == safe_mode_); + assert(nullptr == linSys_); + assert(compute_mode != "gpu" && "KKT_SPARSE_XYcYd linsys: GPU compute mode not supported at this time."); - if( (nullptr == linSys_ && linear_solver == "auto") || linear_solver == "resolve") { + if((nullptr == linSys_ && linear_solver == "auto") || linear_solver == "resolve") { #if defined(HIOP_USE_RESOLVE) - linSys_ = new hiopLinSolverSymSparseReSolve(n, nnz, nlp_); - linsol_actual = "ReSolve"; - auto* fact_acceptor_ic = dynamic_cast (fact_acceptor_); - if(fact_acceptor_ic) { - nlp_->log->printf(hovError, - "KKT_SPARSE_XYcYd linsys with ReSolve does not support inertia correction. " - "Please set option 'fact_acceptor' to 'inertia_free'.\n"); - assert(false); - return nullptr; - } -#endif + linSys_ = new hiopLinSolverSymSparseReSolve(n, nnz, nlp_); + linsol_actual = "ReSolve"; + auto* fact_acceptor_ic = dynamic_cast(fact_acceptor_); + if(fact_acceptor_ic) { + nlp_->log->printf(hovError, + "KKT_SPARSE_XYcYd linsys with ReSolve does not support inertia correction. " + "Please set option 'fact_acceptor' to 'inertia_free'.\n"); + assert(false); + return nullptr; } +#endif + } - if( (nullptr == linSys_ && linear_solver == "auto") || linear_solver == "strumpack") { -#if defined(HIOP_USE_STRUMPACK) - linSys_ = new hiopLinSolverSymSparseSTRUMPACK(n, nnz, nlp_); - linsol_actual = "STRUMPACK"; - auto* fact_acceptor_ic = dynamic_cast (fact_acceptor_); - if(fact_acceptor_ic) { - nlp_->log->printf(hovError, - "KKT_SPARSE_XYcYd linsys with STRUMPACK does not support inertia correction. " - "Please set option 'fact_acceptor' to 'inertia_free'.\n"); - assert(false); - return nullptr; - } -#endif //HIOP_USE_STRUMPACK + if((nullptr == linSys_ && linear_solver == "auto") || linear_solver == "strumpack") { +#if defined(HIOP_USE_STRUMPACK) + linSys_ = new hiopLinSolverSymSparseSTRUMPACK(n, nnz, nlp_); + linsol_actual = "STRUMPACK"; + auto* fact_acceptor_ic = dynamic_cast(fact_acceptor_); + if(fact_acceptor_ic) { + nlp_->log->printf(hovError, + "KKT_SPARSE_XYcYd linsys with STRUMPACK does not support inertia correction. " + "Please set option 'fact_acceptor' to 'inertia_free'.\n"); + assert(false); + return nullptr; } +#endif // HIOP_USE_STRUMPACK + } - if( (nullptr == linSys_ && linear_solver == "auto") || linear_solver == "ma57") { + if((nullptr == linSys_ && linear_solver == "auto") || linear_solver == "ma57") { #if defined(HIOP_USE_COINHSL) - linsol_actual = "MA57"; - linSys_ = new hiopLinSolverSymSparseMA57(n, nnz, nlp_); -#endif //HIOP_USE_COINHSL - } - - if( (nullptr == linSys_&& linear_solver == "auto") || linear_solver == "pardiso") { + linsol_actual = "MA57"; + linSys_ = new hiopLinSolverSymSparseMA57(n, nnz, nlp_); +#endif // HIOP_USE_COINHSL + } + + if((nullptr == linSys_ && linear_solver == "auto") || linear_solver == "pardiso") { #if defined(HIOP_USE_PARDISO) - linsol_actual = "PARDISO"; - linSys_ = new hiopLinSolverSymSparsePARDISO(n, nnz, nlp_); -#endif // HIOP_USE_PARDISO - } + linsol_actual = "PARDISO"; + linSys_ = new hiopLinSolverSymSparsePARDISO(n, nnz, nlp_); +#endif // HIOP_USE_PARDISO + } - if(linSys_) { - nlp_->log->printf(hovScalars, - "KKT_SPARSE_XYcYd linsys: alloc [%s] size %d (%d cons)(%s)\n", - linsol_actual.c_str(), - n, - neq+nineq, - compute_mode.c_str()); - } + if(linSys_) { + nlp_->log->printf(hovScalars, + "KKT_SPARSE_XYcYd linsys: alloc [%s] size %d (%d cons)(%s)\n", + linsol_actual.c_str(), + n, + neq + nineq, + compute_mode.c_str()); } - assert(linSys_&& "KKT_SPARSE_XYcYd linsys: cannot instantiate backend linear solver"); } - return dynamic_cast (linSys_); + assert(linSys_ && "KKT_SPARSE_XYcYd linsys: cannot instantiate backend linear solver"); } - - - - /* ************************************************************************* - * For class hiopKKTLinSysCompressedSparseXDYcYd - * ************************************************************************* - */ - hiopKKTLinSysCompressedSparseXDYcYd::hiopKKTLinSysCompressedSparseXDYcYd(hiopNlpFormulation* nlp) - : hiopKKTLinSysCompressedXDYcYd(nlp), rhs_{nullptr}, + return dynamic_cast(linSys_); +} + +/* ************************************************************************* + * For class hiopKKTLinSysCompressedSparseXDYcYd + * ************************************************************************* + */ +hiopKKTLinSysCompressedSparseXDYcYd::hiopKKTLinSysCompressedSparseXDYcYd(hiopNlpFormulation* nlp) + : hiopKKTLinSysCompressedXDYcYd(nlp), + rhs_{nullptr}, Hx_{nullptr}, Hd_{nullptr}, - HessSp_{nullptr}, - Jac_cSp_{nullptr}, + HessSp_{nullptr}, + Jac_cSp_{nullptr}, Jac_dSp_{nullptr}, - write_linsys_counter_(-1), + write_linsys_counter_(-1), csr_writer_(nlp) - { - nlpSp_ = dynamic_cast(nlp_); - assert(nlpSp_); +{ + nlpSp_ = dynamic_cast(nlp_); + assert(nlpSp_); +} + +hiopKKTLinSysCompressedSparseXDYcYd::~hiopKKTLinSysCompressedSparseXDYcYd() +{ + delete rhs_; + delete Hx_; + delete Hd_; +} + +bool hiopKKTLinSysCompressedSparseXDYcYd::build_kkt_matrix(const hiopPDPerturbation& pdreg) +{ + delta_wx_ = perturb_calc_->get_curr_delta_wx(); + delta_wd_ = perturb_calc_->get_curr_delta_wd(); + delta_cc_ = perturb_calc_->get_curr_delta_cc(); + delta_cd_ = perturb_calc_->get_curr_delta_cd(); + + HessSp_ = dynamic_cast(Hess_); + Jac_cSp_ = dynamic_cast(Jac_c_); + Jac_dSp_ = dynamic_cast(Jac_d_); + + if(!HessSp_) { + assert(false); + return false; + } + if(!Jac_cSp_) { + assert(false); + return false; + } + if(!Jac_dSp_) { + assert(false); + return false; } - hiopKKTLinSysCompressedSparseXDYcYd::~hiopKKTLinSysCompressedSparseXDYcYd() - { - delete rhs_; - delete Hx_; - delete Hd_; + size_type nx = HessSp_->n(), nd = Jac_dSp_->m(), neq = Jac_cSp_->m(), nineq = Jac_dSp_->m(); + int nnz = + HessSp_->numberOfNonzeros() + Jac_cSp_->numberOfNonzeros() + Jac_dSp_->numberOfNonzeros() + nd + nx + nd + neq + nineq; + + linSys_ = determineAndCreateLinsys(nx, neq, nineq, nnz); + + auto* linSys = dynamic_cast(linSys_); + assert(linSys); + + auto* Msys = dynamic_cast(linSys->sys_matrix()); + assert(Msys); + if(perf_report_) { + nlp_->log->printf(hovSummary, "KKT_SPARSE_XDYcYd linsys: Low-level linear system size: %d\n", Msys->n()); } - bool hiopKKTLinSysCompressedSparseXDYcYd::build_kkt_matrix(const hiopPDPerturbation& pdreg) + // update linSys system matrix, including IC perturbations { - delta_wx_ = perturb_calc_->get_curr_delta_wx(); - delta_wd_ = perturb_calc_->get_curr_delta_wd(); - delta_cc_ = perturb_calc_->get_curr_delta_cc(); - delta_cd_ = perturb_calc_->get_curr_delta_cd(); - - HessSp_ = dynamic_cast(Hess_); - Jac_cSp_ = dynamic_cast(Jac_c_); - Jac_dSp_ = dynamic_cast(Jac_d_); - - if(!HessSp_) { assert(false); return false; } - if(!Jac_cSp_) { assert(false); return false; } - if(!Jac_dSp_) { assert(false); return false; } - - size_type nx = HessSp_->n(), nd=Jac_dSp_->m(), neq=Jac_cSp_->m(), nineq=Jac_dSp_->m(); - int nnz = HessSp_->numberOfNonzeros() + Jac_cSp_->numberOfNonzeros() + Jac_dSp_->numberOfNonzeros() + nd + nx + nd + neq + nineq; - - linSys_ = determineAndCreateLinsys(nx, neq, nineq, nnz); - - auto* linSys = dynamic_cast (linSys_); - assert(linSys); - - auto* Msys = dynamic_cast(linSys->sys_matrix()); - assert(Msys); - if(perf_report_) { - nlp_->log->printf(hovSummary, - "KKT_SPARSE_XDYcYd linsys: Low-level linear system size: %d\n", - Msys->n()); + nlp_->runStats.kkt.tmUpdateLinsys.start(); + + Msys->setToZero(); + + // copy Jac and Hes to the full iterate matrix + size_type dest_nnz_st{0}; + Msys->copyRowsBlockFrom(*HessSp_, 0, nx, 0, dest_nnz_st); + dest_nnz_st += HessSp_->numberOfNonzeros(); + Msys->copyRowsBlockFrom(*Jac_cSp_, 0, neq, nx + nd, dest_nnz_st); + dest_nnz_st += Jac_cSp_->numberOfNonzeros(); + Msys->copyRowsBlockFrom(*Jac_dSp_, 0, nineq, nx + nd + neq, dest_nnz_st); + dest_nnz_st += Jac_dSp_->numberOfNonzeros(); + + // minus identity matrix for slack variables + Msys->copyDiagMatrixToSubblock(-1., nx + nd + neq, nx, dest_nnz_st, nineq); + dest_nnz_st += nineq; + + // build the diagonal Hx = Dx + delta_wx + if(NULL == Hx_) { + Hx_ = LinearAlgebraFactory::create_vector(nlp_->options->GetString("mem_space"), nx); + assert(Hx_); } + Hx_->startingAtCopyFromStartingAt(0, *Dx_, 0); - // update linSys system matrix, including IC perturbations - { - nlp_->runStats.kkt.tmUpdateLinsys.start(); - - Msys->setToZero(); - - // copy Jac and Hes to the full iterate matrix - size_type dest_nnz_st{0}; - Msys->copyRowsBlockFrom(*HessSp_, 0, nx, 0, dest_nnz_st); - dest_nnz_st += HessSp_->numberOfNonzeros(); - Msys->copyRowsBlockFrom(*Jac_cSp_, 0, neq, nx+nd, dest_nnz_st); - dest_nnz_st += Jac_cSp_->numberOfNonzeros(); - Msys->copyRowsBlockFrom(*Jac_dSp_, 0, nineq, nx+nd+neq, dest_nnz_st); - dest_nnz_st += Jac_dSp_->numberOfNonzeros(); - - // minus identity matrix for slack variables - Msys->copyDiagMatrixToSubblock(-1., nx+nd+neq, nx, dest_nnz_st, nineq); - dest_nnz_st += nineq; - - //build the diagonal Hx = Dx + delta_wx - if(NULL == Hx_) { - Hx_ = LinearAlgebraFactory::create_vector(nlp_->options->GetString("mem_space"), nx); - assert(Hx_); - } - Hx_->startingAtCopyFromStartingAt(0, *Dx_, 0); - - //a good time to add the IC 'delta_wx' perturbation - Hx_->axpy(1., *delta_wx_); + // a good time to add the IC 'delta_wx' perturbation + Hx_->axpy(1., *delta_wx_); - Msys->copySubDiagonalFrom(0, nx, *Hx_, dest_nnz_st); - dest_nnz_st += nx; + Msys->copySubDiagonalFrom(0, nx, *Hx_, dest_nnz_st); + dest_nnz_st += nx; - //build the diagonal Hd = Dd + delta_wd - if(NULL == Hd_) { - Hd_ = LinearAlgebraFactory::create_vector(nlp_->options->GetString("mem_space"), nd); - assert(Hd_); - } - Hd_->startingAtCopyFromStartingAt(0, *Dd_, 0); - Hd_->axpy(1., *delta_wd_); - Msys->copySubDiagonalFrom(nx, nd, *Hd_, dest_nnz_st); - dest_nnz_st += nd; - - //add -delta_cc to diagonal block linSys starting at (nx+nd, nx+nd) - Msys->copySubDiagonalFrom(nx+nd, neq, *delta_cc_, dest_nnz_st, -1.); - dest_nnz_st += neq; - - //add -delta_cd to diagonal block linSys starting at (nx+nd+neq, nx+nd+neq) - Msys->copySubDiagonalFrom(nx+nd+neq, nineq, *delta_cd_, dest_nnz_st, -1.); - dest_nnz_st += nineq; - - /* we've just done - * - * [ H+Dx+delta_wx 0 Jc^T Jd^T ] [ dx] [ rx_tilde ] - * [ 0 Dd+delta_wd 0 -I ] [ dd] [ rd_tilde ] - * [ Jc 0 -delta_cc 0 ] [dyc] = [ ryc ] - * [ Jd -I 0 -delta_cd ] [dyd] [ ryd ] - */ - nlp_->log->write("KKT_SPARSE_XDYcYd linsys:", *Msys, hovMatrices); - nlp_->runStats.kkt.tmUpdateLinsys.stop(); + // build the diagonal Hd = Dd + delta_wd + if(NULL == Hd_) { + Hd_ = LinearAlgebraFactory::create_vector(nlp_->options->GetString("mem_space"), nd); + assert(Hd_); } + Hd_->startingAtCopyFromStartingAt(0, *Dd_, 0); + Hd_->axpy(1., *delta_wd_); + Msys->copySubDiagonalFrom(nx, nd, *Hd_, dest_nnz_st); + dest_nnz_st += nd; + + // add -delta_cc to diagonal block linSys starting at (nx+nd, nx+nd) + Msys->copySubDiagonalFrom(nx + nd, neq, *delta_cc_, dest_nnz_st, -1.); + dest_nnz_st += neq; + + // add -delta_cd to diagonal block linSys starting at (nx+nd+neq, nx+nd+neq) + Msys->copySubDiagonalFrom(nx + nd + neq, nineq, *delta_cd_, dest_nnz_st, -1.); + dest_nnz_st += nineq; + + /* we've just done + * + * [ H+Dx+delta_wx 0 Jc^T Jd^T ] [ dx] [ rx_tilde ] + * [ 0 Dd+delta_wd 0 -I ] [ dd] [ rd_tilde ] + * [ Jc 0 -delta_cc 0 ] [dyc] = [ ryc ] + * [ Jd -I 0 -delta_cd ] [dyd] [ ryd ] + */ + nlp_->log->write("KKT_SPARSE_XDYcYd linsys:", *Msys, hovMatrices); + nlp_->runStats.kkt.tmUpdateLinsys.stop(); + } - //write matrix to file if requested - if(nlp_->options->GetString("write_kkt") == "yes") { - write_linsys_counter_++; - } - if(write_linsys_counter_>=0) { + // write matrix to file if requested + if(nlp_->options->GetString("write_kkt") == "yes") { + write_linsys_counter_++; + } + if(write_linsys_counter_ >= 0) { #ifndef HIOP_USE_GPU - auto* MsysSp = dynamic_cast(linSys->sys_matrix()); - csr_writer_.writeMatToFile(*MsysSp, write_linsys_counter_, nx, neq, nineq); + auto* MsysSp = dynamic_cast(linSys->sys_matrix()); + csr_writer_.writeMatToFile(*MsysSp, write_linsys_counter_, nx, neq, nineq); #else - //TODO csr_writer_.writeMatToFile(*Msys, write_linsys_counter_, nx, neq, nineq); + // TODO csr_writer_.writeMatToFile(*Msys, write_linsys_counter_, nx, neq, nineq); #endif - } + } - return true; + return true; +} + +bool hiopKKTLinSysCompressedSparseXDYcYd::solveCompressed(hiopVector& rx, + hiopVector& rd, + hiopVector& ryc, + hiopVector& ryd, + hiopVector& dx, + hiopVector& dd, + hiopVector& dyc, + hiopVector& dyd) +{ + if(!nlpSp_) { + assert(false); + return false; + } + if(!HessSp_) { + assert(false); + return false; + } + if(!Jac_cSp_) { + assert(false); + return false; + } + if(!Jac_dSp_) { + assert(false); + return false; } - bool hiopKKTLinSysCompressedSparseXDYcYd:: - solveCompressed(hiopVector& rx, hiopVector& rd, hiopVector& ryc, hiopVector& ryd, - hiopVector& dx, hiopVector& dd, hiopVector& dyc, hiopVector& dyd) - { - if(!nlpSp_) { assert(false); return false; } - if(!HessSp_) { assert(false); return false; } - if(!Jac_cSp_) { assert(false); return false; } - if(!Jac_dSp_) { assert(false); return false; } - - nlp_->runStats.kkt.tmSolveRhsManip.start(); - - int nx=rx.get_size(), nd=rd.get_size(), nyc=ryc.get_size(), nyd=ryd.get_size(); - int nxsp=Hx_->get_size(); - assert(nxsp==nx); - if(rhs_ == NULL) { - rhs_ = LinearAlgebraFactory::create_vector(nlp_->options->GetString("mem_space"), - nx+nd+nyc+nyd); - } + nlp_->runStats.kkt.tmSolveRhsManip.start(); - nlp_->log->write("RHS KKT_SPARSE_XDYcYd rx: ", rx, hovIteration); - nlp_->log->write("RHS KKT_SPARSE_XDYcYd rx: ", rd, hovIteration); - nlp_->log->write("RHS KKT_SPARSE_XDYcYd ryc:", ryc, hovIteration); - nlp_->log->write("RHS KKT_SPARSE_XDYcYd ryd:", ryd, hovIteration); - - // - // form the rhs for the sparse linSys - // - rx.copyToStarting(*rhs_, 0); - rd.copyToStarting(*rhs_, nx); - ryc.copyToStarting(*rhs_, nx+nd); - ryd.copyToStarting(*rhs_, nx+nd+nyc); - - if(write_linsys_counter_>=0) { - csr_writer_.writeRhsToFile(*rhs_, write_linsys_counter_); - } - nlp_->runStats.kkt.tmSolveRhsManip.stop(); - - nlp_->runStats.kkt.tmSolveInner.start(); - // - // solve - // - bool linsol_ok = linSys_->solve(*rhs_); - nlp_->runStats.kkt.tmSolveInner.stop(); - - if(perf_report_) { - nlp_->log->printf(hovSummary, "(summary for linear solver from KKT_SPARSE_XDYcYd)\n%s", - nlp_->runStats.linsolv.get_summary_last_solve().c_str()); - } + int nx = rx.get_size(), nd = rd.get_size(), nyc = ryc.get_size(), nyd = ryd.get_size(); + int nxsp = Hx_->get_size(); + assert(nxsp == nx); + if(rhs_ == NULL) { + rhs_ = LinearAlgebraFactory::create_vector(nlp_->options->GetString("mem_space"), nx + nd + nyc + nyd); + } - if(write_linsys_counter_>=0) { - csr_writer_.writeSolToFile(*rhs_, write_linsys_counter_); - } - if(false==linsol_ok) return false; - - nlp_->runStats.kkt.tmSolveRhsManip.start(); - - // - // unpack - // - rhs_->startingAtCopyToStartingAt(0, dx, 0); - rhs_->startingAtCopyToStartingAt(nx, dd, 0); - rhs_->startingAtCopyToStartingAt(nx+nd, dyc, 0); - rhs_->startingAtCopyToStartingAt(nx+nd+nyc, dyd, 0); - nlp_->log->write("SOL KKT_SPARSE_XDYcYd dx: ", dx, hovMatrices); - nlp_->log->write("SOL KKT_SPARSE_XDYcYd dd: ", dd, hovMatrices); - nlp_->log->write("SOL KKT_SPARSE_XDYcYd dyc:", dyc, hovMatrices); - nlp_->log->write("SOL KKT_SPARSE_XDYcYd dyd:", dyd, hovMatrices); - - nlp_->runStats.kkt.tmSolveRhsManip.stop(); - return true; + nlp_->log->write("RHS KKT_SPARSE_XDYcYd rx: ", rx, hovIteration); + nlp_->log->write("RHS KKT_SPARSE_XDYcYd rx: ", rd, hovIteration); + nlp_->log->write("RHS KKT_SPARSE_XDYcYd ryc:", ryc, hovIteration); + nlp_->log->write("RHS KKT_SPARSE_XDYcYd ryd:", ryd, hovIteration); + + // + // form the rhs for the sparse linSys + // + rx.copyToStarting(*rhs_, 0); + rd.copyToStarting(*rhs_, nx); + ryc.copyToStarting(*rhs_, nx + nd); + ryd.copyToStarting(*rhs_, nx + nd + nyc); + + if(write_linsys_counter_ >= 0) { + csr_writer_.writeRhsToFile(*rhs_, write_linsys_counter_); + } + nlp_->runStats.kkt.tmSolveRhsManip.stop(); + + nlp_->runStats.kkt.tmSolveInner.start(); + // + // solve + // + bool linsol_ok = linSys_->solve(*rhs_); + nlp_->runStats.kkt.tmSolveInner.stop(); + + if(perf_report_) { + nlp_->log->printf(hovSummary, + "(summary for linear solver from KKT_SPARSE_XDYcYd)\n%s", + nlp_->runStats.linsolv.get_summary_last_solve().c_str()); } - hiopLinSolverSymSparse* - hiopKKTLinSysCompressedSparseXDYcYd::determineAndCreateLinsys(int nx, int neq, int nineq, int nnz) - { - if(nullptr==linSys_) { - int n = nx + nineq + neq + nineq; - auto compute_mode = nlp_->options->GetString("compute_mode"); + if(write_linsys_counter_ >= 0) { + csr_writer_.writeSolToFile(*rhs_, write_linsys_counter_); + } + if(false == linsol_ok) return false; + + nlp_->runStats.kkt.tmSolveRhsManip.start(); + + // + // unpack + // + rhs_->startingAtCopyToStartingAt(0, dx, 0); + rhs_->startingAtCopyToStartingAt(nx, dd, 0); + rhs_->startingAtCopyToStartingAt(nx + nd, dyc, 0); + rhs_->startingAtCopyToStartingAt(nx + nd + nyc, dyd, 0); + nlp_->log->write("SOL KKT_SPARSE_XDYcYd dx: ", dx, hovMatrices); + nlp_->log->write("SOL KKT_SPARSE_XDYcYd dd: ", dd, hovMatrices); + nlp_->log->write("SOL KKT_SPARSE_XDYcYd dyc:", dyc, hovMatrices); + nlp_->log->write("SOL KKT_SPARSE_XDYcYd dyd:", dyd, hovMatrices); + + nlp_->runStats.kkt.tmSolveRhsManip.stop(); + return true; +} + +hiopLinSolverSymSparse* hiopKKTLinSysCompressedSparseXDYcYd::determineAndCreateLinsys(int nx, int neq, int nineq, int nnz) +{ + if(nullptr == linSys_) { + int n = nx + nineq + neq + nineq; + auto compute_mode = nlp_->options->GetString("compute_mode"); + + ///////////////////////////////////////////////////////////////////////////////////////////// + // safe mode + ///////////////////////////////////////////////////////////////////////////////////////////// + if(safe_mode_) { + assert((compute_mode == "hybrid" || compute_mode == "cpu" || compute_mode == "auto") && + "KKT_SPARSE_XDYcYd linsys: safe mode not currently supported with gpu compute mode."); + + // for now we can only rely on MA57 as the safe mode linear solver +#if defined(HIOP_USE_COINHSL) + linSys_ = new hiopLinSolverSymSparseMA57(n, nnz, nlp_); + nlp_->log->printf(hovWarning, + "KKT_SPARSE_XDYcYd linsys: alloc MA57 on CPU size %d (%d cons) (safe_mode=%d)\n", + n, + neq + nineq, + safe_mode_); + return dynamic_cast(linSys_); +#else // end of if defined(HIOP_USE_COINHSL) + assert(false && + "HiOp was not built with the safe(r) sparse linear solver MA57 and cannot switch to " + "safe mode as requested. "); + return nullptr; +#endif + } // end of if(safe_mode_) + auto linear_solver = nlp_->options->GetString("linear_solver_sparse"); + std::string actual_lin_solver = "[?]"; + if(compute_mode == "cpu") { ///////////////////////////////////////////////////////////////////////////////////////////// - // safe mode + // CPU compute mode ///////////////////////////////////////////////////////////////////////////////////////////// - if(safe_mode_) { - assert( (compute_mode == "hybrid" || compute_mode == "cpu" || compute_mode == "auto") && - "KKT_SPARSE_XDYcYd linsys: safe mode not currently supported with gpu compute mode."); - - //for now we can only rely on MA57 as the safe mode linear solver -#if defined(HIOP_USE_COINHSL) - linSys_ = new hiopLinSolverSymSparseMA57(n, nnz, nlp_); - nlp_->log->printf(hovWarning, - "KKT_SPARSE_XDYcYd linsys: alloc MA57 on CPU size %d (%d cons) (safe_mode=%d)\n", - n, - neq+nineq, - safe_mode_); - return dynamic_cast (linSys_); -#else // end of if defined(HIOP_USE_COINHSL) - assert(false && - "HiOp was not built with the safe(r) sparse linear solver MA57 and cannot switch to " - "safe mode as requested. "); - return nullptr; -#endif - } // end of if(safe_mode_) - - auto linear_solver = nlp_->options->GetString("linear_solver_sparse"); - std::string actual_lin_solver = "[?]"; - if(compute_mode == "cpu") { - - ///////////////////////////////////////////////////////////////////////////////////////////// - // CPU compute mode - ///////////////////////////////////////////////////////////////////////////////////////////// - if(linear_solver == "ma57" || linear_solver == "auto") { + if(linear_solver == "ma57" || linear_solver == "auto") { #ifdef HIOP_USE_COINHSL - linSys_ = new hiopLinSolverSymSparseMA57(n, nnz, nlp_); - actual_lin_solver = "MA57"; -#endif // HIOP_USE_COINHSL - } + linSys_ = new hiopLinSolverSymSparseMA57(n, nnz, nlp_); + actual_lin_solver = "MA57"; +#endif // HIOP_USE_COINHSL + } - if( (nullptr == linSys_ && linear_solver == "auto") || linear_solver == "pardiso") { - //ma57 is not available or user requested pardiso + if((nullptr == linSys_ && linear_solver == "auto") || linear_solver == "pardiso") { + // ma57 is not available or user requested pardiso #ifdef HIOP_USE_PARDISO - linSys_ = new hiopLinSolverSymSparsePARDISO(n, nnz, nlp_); - actual_lin_solver = "PARDISO"; -#endif // HIOP_USE_PARDISO - } + linSys_ = new hiopLinSolverSymSparsePARDISO(n, nnz, nlp_); + actual_lin_solver = "PARDISO"; +#endif // HIOP_USE_PARDISO + } - if( (nullptr == linSys_ && linear_solver == "auto") || linear_solver == "strumpack") { - //ma57 and pardiso are not available or user requested strumpack -#ifdef HIOP_USE_STRUMPACK - linSys_ = new hiopLinSolverSymSparseSTRUMPACK(n, nnz, nlp_); - actual_lin_solver = "STRUMPACK"; - auto* fact_acceptor_ic = dynamic_cast (fact_acceptor_); - if(fact_acceptor_ic) { - nlp_->log->printf(hovError, - "KKT_SPARSE_XDYcYd linsys with STRUMPACK does not support inertia correction. " - "Please set option 'fact_acceptor' to 'inertia_free'.\n"); - assert(false); - return nullptr; - } -#endif // HIOP_USE_STRUMPACK + if((nullptr == linSys_ && linear_solver == "auto") || linear_solver == "strumpack") { + // ma57 and pardiso are not available or user requested strumpack +#ifdef HIOP_USE_STRUMPACK + linSys_ = new hiopLinSolverSymSparseSTRUMPACK(n, nnz, nlp_); + actual_lin_solver = "STRUMPACK"; + auto* fact_acceptor_ic = dynamic_cast(fact_acceptor_); + if(fact_acceptor_ic) { + nlp_->log->printf(hovError, + "KKT_SPARSE_XDYcYd linsys with STRUMPACK does not support inertia correction. " + "Please set option 'fact_acceptor' to 'inertia_free'.\n"); + assert(false); + return nullptr; } +#endif // HIOP_USE_STRUMPACK + } - if( (nullptr == linSys_ && linear_solver == "auto") || linear_solver == "ginkgo") { - //ma57, pardiso and strumpack are not available or user requested ginkgo + if((nullptr == linSys_ && linear_solver == "auto") || linear_solver == "ginkgo") { + // ma57, pardiso and strumpack are not available or user requested ginkgo #ifdef HIOP_USE_GINKGO - linSys_ = new hiopLinSolverSymSparseGinkgo(n, nnz, nlp_); - actual_lin_solver = "GINKGO"; -#endif // HIOP_USE_GINKGO - } + linSys_ = new hiopLinSolverSymSparseGinkgo(n, nnz, nlp_); + actual_lin_solver = "GINKGO"; +#endif // HIOP_USE_GINKGO + } - if(linSys_) { - nlp_->log->printf(hovScalars, - "KKT_SPARSE_XDYcYd linsys: alloc [%s] size %d (%d cons)(cpu)\n", - actual_lin_solver.c_str(), - n, - neq+nineq); - } - } else if(compute_mode == "hybrid" || compute_mode == "auto") { - assert(false == safe_mode_); - ///////////////////////////////////////////////////////////////////////////////////////////// - // hybrid compute mode - // - // We still allow for CPU linear solver, but in a different order than for cpu compute mode - ///////////////////////////////////////////////////////////////////////////////////////////// - - //our first choice is cuSolver on hybrid compute mode - assert(nullptr == linSys_); - if(linear_solver == "resolve" || linear_solver == "auto") { + if(linSys_) { + nlp_->log->printf(hovScalars, + "KKT_SPARSE_XDYcYd linsys: alloc [%s] size %d (%d cons)(cpu)\n", + actual_lin_solver.c_str(), + n, + neq + nineq); + } + } else if(compute_mode == "hybrid" || compute_mode == "auto") { + assert(false == safe_mode_); + ///////////////////////////////////////////////////////////////////////////////////////////// + // hybrid compute mode + // + // We still allow for CPU linear solver, but in a different order than for cpu compute mode + ///////////////////////////////////////////////////////////////////////////////////////////// + + // our first choice is cuSolver on hybrid compute mode + assert(nullptr == linSys_); + if(linear_solver == "resolve" || linear_solver == "auto") { #if defined(HIOP_USE_RESOLVE) - actual_lin_solver = "ReSolve"; - linSys_ = new hiopLinSolverSymSparseReSolve(n, nnz, nlp_); - auto* fact_acceptor_ic = dynamic_cast (fact_acceptor_); - if(fact_acceptor_ic) { - nlp_->log->printf(hovError, - "KKT_SPARSE_XDYcYd linsys with ReSolve does not support inertia correction. " - "Please set option 'fact_acceptor' to 'inertia_free'.\n"); - assert(false); - return nullptr; - } + actual_lin_solver = "ReSolve"; + linSys_ = new hiopLinSolverSymSparseReSolve(n, nnz, nlp_); + auto* fact_acceptor_ic = dynamic_cast(fact_acceptor_); + if(fact_acceptor_ic) { + nlp_->log->printf(hovError, + "KKT_SPARSE_XDYcYd linsys with ReSolve does not support inertia correction. " + "Please set option 'fact_acceptor' to 'inertia_free'.\n"); + assert(false); + return nullptr; + } #endif - } //end resolve + } // end resolve - if(nullptr == linSys_ && (linear_solver == "strumpack" || linear_solver == "auto")) { + if(nullptr == linSys_ && (linear_solver == "strumpack" || linear_solver == "auto")) { #if defined(HIOP_USE_STRUMPACK) - actual_lin_solver = "STRUMPACK"; - linSys_ = new hiopLinSolverSymSparseSTRUMPACK(n, nnz, nlp_); - auto* fact_acceptor_ic = dynamic_cast (fact_acceptor_); - if(fact_acceptor_ic) { - nlp_->log->printf(hovError, - "KKT_SPARSE_XDYcYd linsys with STRUMPACK does not support inertia correction. " - "Please set option 'fact_acceptor' to 'inertia_free'.\n"); - assert(false); - return nullptr; - } -#endif - } //end strumpack - - if(linear_solver == "auto") { - //assert if a GPU sparse linear solver is not present for auto - assert(linSys_!=nullptr && - "HiOp was built without a sparse linear solver for GPU/device and is likely better to " - " run with 'compute_mode' set to 'cpu' instead of the current 'hybrid' " - "(from hiopKKTLinSysCompressedSparseXDYcYd)"); + actual_lin_solver = "STRUMPACK"; + linSys_ = new hiopLinSolverSymSparseSTRUMPACK(n, nnz, nlp_); + auto* fact_acceptor_ic = dynamic_cast(fact_acceptor_); + if(fact_acceptor_ic) { + nlp_->log->printf(hovError, + "KKT_SPARSE_XDYcYd linsys with STRUMPACK does not support inertia correction. " + "Please set option 'fact_acceptor' to 'inertia_free'.\n"); + assert(false); return nullptr; } - // if user requests CPU solvers under hybrid mode, let it run with it he's likely to be aware of the setup - assert(linear_solver != "auto"); - - if(nullptr == linSys_ && linear_solver == "ma57") { +#endif + } // end strumpack + + if(linear_solver == "auto") { + // assert if a GPU sparse linear solver is not present for auto + assert(linSys_ != nullptr && + "HiOp was built without a sparse linear solver for GPU/device and is likely better to " + " run with 'compute_mode' set to 'cpu' instead of the current 'hybrid' " + "(from hiopKKTLinSysCompressedSparseXDYcYd)"); + return nullptr; + } + // if user requests CPU solvers under hybrid mode, let it run with it he's likely to be aware of the setup + assert(linear_solver != "auto"); + + if(nullptr == linSys_ && linear_solver == "ma57") { #ifdef HIOP_USE_COINHSL - actual_lin_solver = "MA57"; - linSys_ = new hiopLinSolverSymSparseMA57(n, nnz, nlp_); -#endif // HIOP_USE_COINHSL - } - - if(nullptr == linSys_&& linear_solver == "pardiso") { + actual_lin_solver = "MA57"; + linSys_ = new hiopLinSolverSymSparseMA57(n, nnz, nlp_); +#endif // HIOP_USE_COINHSL + } + + if(nullptr == linSys_ && linear_solver == "pardiso") { #ifdef HIOP_USE_PARDISO - actual_lin_solver = "PARDISO"; - linSys_ = new hiopLinSolverSymSparseMA57(n, nnz, nlp_); -#endif // HIOP_USE_PARDISO - } + actual_lin_solver = "PARDISO"; + linSys_ = new hiopLinSolverSymSparseMA57(n, nnz, nlp_); +#endif // HIOP_USE_PARDISO + } - if(linSys_) { - nlp_->log->printf(hovScalars, - "KKT_SPARSE_XDYcYd linsys: alloc [%s] size %d (%d cons) (hybrid)\n", - actual_lin_solver.c_str(), - n, - neq+nineq); + if(linSys_) { + nlp_->log->printf(hovScalars, + "KKT_SPARSE_XDYcYd linsys: alloc [%s] size %d (%d cons) (hybrid)\n", + actual_lin_solver.c_str(), + n, + neq + nineq); + } + } else if(compute_mode == "gpu") { + ///////////////////////////////////////////////////////////////////////////////////////////// + // gpu compute mode + // + // We don't allow CPU linear solvers. + ///////////////////////////////////////////////////////////////////////////////////////////// + // assert(false && "KKT_SPARSE_XDYcYd linsys: GPU compute mode not yet supported."); + // assert(false == safe_mode_); + assert(nullptr == linSys_); + + if(linear_solver == "resolve" || linear_solver == "auto") { +#if defined(HIOP_USE_RESOLVE) + linSys_ = new hiopLinSolverSymSparseReSolve(n, nnz, nlp_); + nlp_->log->printf(hovScalars, "KKT_SPARSE_XDYcYd linsys: alloc ReSolve size %d (%d cons) (gpu)\n", n, neq + nineq); + auto* fact_acceptor_ic = dynamic_cast(fact_acceptor_); + if(fact_acceptor_ic) { + nlp_->log->printf(hovError, + "KKT_SPARSE_XDYcYd linsys with ReSolve does not support inertia correction. " + "Please set option 'fact_acceptor' to 'inertia_free'.\n"); + assert(false); + return nullptr; } - } else if(compute_mode == "gpu") { - ///////////////////////////////////////////////////////////////////////////////////////////// - // gpu compute mode - // - // We don't allow CPU linear solvers. - ///////////////////////////////////////////////////////////////////////////////////////////// - // assert(false && "KKT_SPARSE_XDYcYd linsys: GPU compute mode not yet supported."); - // assert(false == safe_mode_); - assert(nullptr == linSys_); - - if(linear_solver == "resolve" || linear_solver == "auto") { -#if defined(HIOP_USE_RESOLVE) - linSys_ = new hiopLinSolverSymSparseReSolve(n, nnz, nlp_); - nlp_->log->printf(hovScalars, - "KKT_SPARSE_XDYcYd linsys: alloc ReSolve size %d (%d cons) (gpu)\n", - n, - neq+nineq); - auto* fact_acceptor_ic = dynamic_cast (fact_acceptor_); - if(fact_acceptor_ic) { - nlp_->log->printf(hovError, - "KKT_SPARSE_XDYcYd linsys with ReSolve does not support inertia correction. " - "Please set option 'fact_acceptor' to 'inertia_free'.\n"); - assert(false); - return nullptr; - } #endif - } //end resolve - } // end of compute mode gpu - } - assert(linSys_&& "KKT_SPARSE_XDYcYd linsys: cannot instantiate backend linear solver"); - return dynamic_cast (linSys_); + } // end resolve + } // end of compute mode gpu } - - /* ************************************************************************* - * For class hiopKKTLinSysSparseFull - * ************************************************************************* - */ - hiopKKTLinSysSparseFull::hiopKKTLinSysSparseFull(hiopNlpFormulation* nlp) + assert(linSys_ && "KKT_SPARSE_XDYcYd linsys: cannot instantiate backend linear solver"); + return dynamic_cast(linSys_); +} + +/* ************************************************************************* + * For class hiopKKTLinSysSparseFull + * ************************************************************************* + */ +hiopKKTLinSysSparseFull::hiopKKTLinSysSparseFull(hiopNlpFormulation* nlp) : hiopKKTLinSysFull(nlp), rhs_(nullptr), Hx_(nullptr), @@ -801,381 +848,421 @@ namespace hiop Jac_dSp_(nullptr), write_linsys_counter_(-1), csr_writer_(nlp) - { - nlpSp_ = dynamic_cast(nlp_); - assert(nlpSp_); - } +{ + nlpSp_ = dynamic_cast(nlp_); + assert(nlpSp_); +} - hiopKKTLinSysSparseFull::~hiopKKTLinSysSparseFull() - { - delete rhs_; - delete Hx_; - delete Hd_; - } +hiopKKTLinSysSparseFull::~hiopKKTLinSysSparseFull() +{ + delete rhs_; + delete Hx_; + delete Hd_; +} - hiopLinSolverNonSymSparse* - hiopKKTLinSysSparseFull::determineAndCreateLinsys(const int &n, const int &n_con, const int &nnz) - { - if(NULL==linSys_) { +hiopLinSolverNonSymSparse* hiopKKTLinSysSparseFull::determineAndCreateLinsys(const int& n, const int& n_con, const int& nnz) +{ + if(NULL == linSys_) { + if(safe_mode_) { + nlp_->log->printf(hovError, "Safe mode is not supported KKT_SPARSE_FULL_KKT linsys\n"); + assert(false); + return nullptr; + } - if(safe_mode_) { - nlp_->log->printf(hovError, "Safe mode is not supported KKT_SPARSE_FULL_KKT linsys\n"); - assert(false); - return nullptr; - } + auto* fact_acceptor_ic = dynamic_cast(fact_acceptor_); + if(fact_acceptor_ic) { + nlp_->log->printf(hovError, + "KKT_SPARSE_FULL_KKT linsys does not support inertia correction. " + "Please try setting option 'fact_acceptor' to 'inertia_free'.\n"); + assert(false); + return nullptr; + } - auto* fact_acceptor_ic = dynamic_cast (fact_acceptor_); - if(fact_acceptor_ic) { - nlp_->log->printf(hovError, - "KKT_SPARSE_FULL_KKT linsys does not support inertia correction. " - "Please try setting option 'fact_acceptor' to 'inertia_free'.\n"); - assert(false); - return nullptr; - } + auto compute_mode = nlp_->options->GetString("compute_mode"); + assert((compute_mode == "hybrid" || compute_mode == "cpu") && + "KKT_SPARSE_FULL_KKT linsys does not currently support gpu compute mode"); - auto compute_mode = nlp_->options->GetString("compute_mode"); - assert( (compute_mode == "hybrid" || compute_mode == "cpu") && - "KKT_SPARSE_FULL_KKT linsys does not currently support gpu compute mode"); - #ifdef HIOP_USE_PARDISO - nlp_->log->printf(hovWarning, - "KKT_SPARSE_FULL_KKT linsys: alloc PARDISO size %d (%d cons)\n", - n, - n_con); - hiopLinSolverNonSymSparsePARDISO *p = new hiopLinSolverNonSymSparsePARDISO(n, nnz, nlp_); - linSys_ = p; + nlp_->log->printf(hovWarning, "KKT_SPARSE_FULL_KKT linsys: alloc PARDISO size %d (%d cons)\n", n, n_con); + hiopLinSolverNonSymSparsePARDISO* p = new hiopLinSolverNonSymSparsePARDISO(n, nnz, nlp_); + linSys_ = p; #elif defined(HIOP_USE_STRUMPACK) - hiopLinSolverNonSymSparseSTRUMPACK *p = new hiopLinSolverNonSymSparseSTRUMPACK(n, nnz, nlp_); - nlp_->log->printf(hovWarning, - "KKT_SPARSE_FULL_KKT linsys: alloc STRUMPACK size %d (%d cons)\n", - n, - n_con); - linSys_ = p; -#endif // CUSOLVER - if(NULL==linSys_) { - nlp_->log->printf(hovError, - "KKT_SPARSE_FULL_KKT linsys: cannot instantiate backend linear solver " - "because HIOP was not built with CUSOLVER, STRUMPACK, or PARDISO.\n"); - assert(false); - return nullptr; - } + hiopLinSolverNonSymSparseSTRUMPACK* p = new hiopLinSolverNonSymSparseSTRUMPACK(n, nnz, nlp_); + nlp_->log->printf(hovWarning, "KKT_SPARSE_FULL_KKT linsys: alloc STRUMPACK size %d (%d cons)\n", n, n_con); + linSys_ = p; +#endif // CUSOLVER + if(NULL == linSys_) { + nlp_->log->printf(hovError, + "KKT_SPARSE_FULL_KKT linsys: cannot instantiate backend linear solver " + "because HIOP was not built with CUSOLVER, STRUMPACK, or PARDISO.\n"); + assert(false); + return nullptr; } - return dynamic_cast (linSys_); } + return dynamic_cast(linSys_); +} - bool hiopKKTLinSysSparseFull::build_kkt_matrix(const hiopPDPerturbation& pdreg) - { - delta_wx_ = perturb_calc_->get_curr_delta_wx(); - delta_wd_ = perturb_calc_->get_curr_delta_wd(); - delta_cc_ = perturb_calc_->get_curr_delta_cc(); - delta_cd_ = perturb_calc_->get_curr_delta_cd(); - - HessSp_ = dynamic_cast(Hess_); - if(!HessSp_) { assert(false); return false; } - - Jac_cSp_ = dynamic_cast(Jac_c_); - if(!Jac_cSp_) { assert(false); return false; } - - Jac_dSp_ = dynamic_cast(Jac_d_); - if(!Jac_dSp_) { assert(false); return false; } - - size_type nx = HessSp_->n(); - size_type nd = Jac_dSp_->m(); - size_type neq = Jac_cSp_->m(); - size_type nineq=Jac_dSp_->m(); - size_type ndl = nlp_->m_ineq_low(); - size_type ndu = nlp_->m_ineq_upp(); - size_type nxl = nlp_->n_low(); - size_type nxu = nlp_->n_upp(); - - // note that hess may be saved as a triangular matrix - int n2st = nx + neq + nineq; - int n3st = n2st + nd; - int n4st = n3st + ndl + ndu + nxl + nxu; // shortcut for each subbloock - int n = n4st + ndl + ndu + nxl + nxu; - int n_reg = n3st; - - int required_num_neg_eig = neq+nineq; - int nnz = HessSp_->numberOfNonzeros() + HessSp_->numberOfOffDiagNonzeros() - + 2*Jac_cSp_->numberOfNonzeros() + 2*Jac_dSp_->numberOfNonzeros() - + 2*(nd + ndl + ndu + nxl + nxu + ndl + ndu + nxl + nxu) - + ndl + ndu + nxl + nxu - + n_reg; - - linSys_ = determineAndCreateLinsys(n, required_num_neg_eig, nnz); - - auto* linSys = dynamic_cast (linSys_); - assert(linSys); - - auto* Msys = dynamic_cast(linSys->sys_matrix()); - assert(Msys); - if(perf_report_) { - nlp_->log->printf(hovSummary, - "KKT_SPARSE_FULL linsys: Low-level linear system size: %d\n", - Msys->n()); - } +bool hiopKKTLinSysSparseFull::build_kkt_matrix(const hiopPDPerturbation& pdreg) +{ + delta_wx_ = perturb_calc_->get_curr_delta_wx(); + delta_wd_ = perturb_calc_->get_curr_delta_wd(); + delta_cc_ = perturb_calc_->get_curr_delta_cc(); + delta_cd_ = perturb_calc_->get_curr_delta_cd(); + + HessSp_ = dynamic_cast(Hess_); + if(!HessSp_) { + assert(false); + return false; + } - // update linSys system matrix, including IC perturbations - { - nlp_->runStats.kkt.tmUpdateLinsys.start(); - - Msys->setToZero(); - - // copy Jac and Hes to the full iterate matrix, use Dx_ and Dd_ as temp vector - size_type dest_nnz_st{0}; - - // H is triangular - // [ H Jc^T Jd^T | 0 | 0 0 -I I | 0 0 0 0 ] [ dx] [ rx ] - Msys->copySubmatrixFrom(*HessSp_, 0, 0, dest_nnz_st, true); - dest_nnz_st += HessSp_->numberOfOffDiagNonzeros(); - Msys->copySubmatrixFromTrans(*HessSp_, 0, 0, dest_nnz_st); - dest_nnz_st += HessSp_->numberOfNonzeros(); - - Msys->copySubmatrixFromTrans(*Jac_cSp_, 0, nx, dest_nnz_st); - dest_nnz_st += Jac_cSp_->numberOfNonzeros(); - Msys->copySubmatrixFromTrans(*Jac_dSp_, 0, nx+neq, dest_nnz_st); - dest_nnz_st += Jac_dSp_->numberOfNonzeros(); - Msys->setSubmatrixToConstantDiag_w_colpattern(-1., 0, n3st+ndl+ndu, dest_nnz_st, nxl, nlp_->get_ixl()); - dest_nnz_st += nxl; - Msys->setSubmatrixToConstantDiag_w_colpattern(1., 0, n3st+ndl+ndu+nxl, dest_nnz_st, nxu, nlp_->get_ixu()); - dest_nnz_st += nxu; - - // [ Jc 0 0 | 0 | 0 0 0 0 | 0 0 0 0 ] [ dyc] = [ ryc ] - Msys->copySubmatrixFrom(*Jac_cSp_, nx, 0, dest_nnz_st); - dest_nnz_st += Jac_cSp_->numberOfNonzeros(); - - // [ Jd 0 0 |-I | 0 0 0 0 | 0 0 0 0 ] [ dyd] [ ryd ] - Msys->copySubmatrixFrom(*Jac_dSp_, nx+neq, 0, dest_nnz_st); - dest_nnz_st += Jac_dSp_->numberOfNonzeros(); - Msys->copyDiagMatrixToSubblock(-1., nx+neq, n2st, dest_nnz_st, nd); - dest_nnz_st += nd; - - // [ 0 0 -I | 0 | -I I 0 0 | 0 0 0 0 ] [ dd] [ rd ] - Msys->copyDiagMatrixToSubblock(-1., n2st, nx+neq, dest_nnz_st, nd); - dest_nnz_st += nd; - Msys->setSubmatrixToConstantDiag_w_colpattern(-1., n2st, n3st, dest_nnz_st, ndl, nlp_->get_idl()); - dest_nnz_st += ndl; - Msys->setSubmatrixToConstantDiag_w_colpattern(1., n2st, n3st+ndl, dest_nnz_st, ndu, nlp_->get_idu()); - dest_nnz_st += ndu; - - // part3 - // [ 0 0 0 |-I | 0 0 0 0 | I 0 0 0 ] [ dvl] [ rvl ] - Msys->setSubmatrixToConstantDiag_w_rowpattern(-1., n3st, n2st, dest_nnz_st, ndl, nlp_->get_idl()); - dest_nnz_st += ndl; - Msys->copyDiagMatrixToSubblock(1., n3st, n4st, dest_nnz_st, ndl); - dest_nnz_st += ndl; - - // [ 0 0 0 | I | 0 0 0 0 | 0 I 0 0 ] [ dvu] [ rvu ] - Msys->setSubmatrixToConstantDiag_w_rowpattern(1., n3st+ndl, n2st, dest_nnz_st, ndu, nlp_->get_idu()); - dest_nnz_st += ndu; - Msys->copyDiagMatrixToSubblock(1., n3st+ndl, n4st+ndl, dest_nnz_st, ndu); - dest_nnz_st += ndu; - - // [ -I 0 0 | 0 | 0 0 0 0 | 0 0 I 0 ] [ dzl] [ rzl ] - Msys->setSubmatrixToConstantDiag_w_rowpattern(-1., n3st+ndl+ndu, 0, dest_nnz_st, nxl, nlp_->get_ixl()); - dest_nnz_st += nxl; - Msys->copyDiagMatrixToSubblock(1., n3st+ndl+ndu, n4st+ndl+ndu, dest_nnz_st, nxl); - dest_nnz_st += nxl; - - // [ I 0 0 | 0 | 0 0 0 0 | 0 0 0 I ] [ dzu] [ rzu ] - Msys->setSubmatrixToConstantDiag_w_rowpattern(1., n3st+ndl+ndu+nxl, 0, dest_nnz_st, nxu, nlp_->get_ixu()); - dest_nnz_st += nxu; - Msys->copyDiagMatrixToSubblock(1., n3st+ndl+ndu+nxl, n4st+ndl+ndu+nxl, dest_nnz_st, nxu); - dest_nnz_st += nxu; - - // part 4 - // [ 0 0 0 | 0 | Sl^d 0 0 0 | Vl 0 0 0 ] [dsdl] [ rsdl ] - Msys->copyDiagMatrixToSubblock_w_pattern(*iter_->sdl, n4st, n3st, dest_nnz_st, ndl, nlp_->get_idl()); - dest_nnz_st += ndl; - Msys->copyDiagMatrixToSubblock_w_pattern(*iter_->vl, n4st, n4st, dest_nnz_st, ndl, nlp_->get_idl()); - dest_nnz_st += ndl; - - // [ 0 0 0 | 0 | 0 Su^d 0 0 | 0 Vu 0 0 ] [dsdu] [ rsdu ] - Msys->copyDiagMatrixToSubblock_w_pattern(*iter_->sdu, n4st+ndl, n3st+ndl, dest_nnz_st, ndu, nlp_->get_idu()); - dest_nnz_st += ndu; - Msys->copyDiagMatrixToSubblock_w_pattern(*iter_->vu, n4st+ndl, n4st+ndl, dest_nnz_st, ndu, nlp_->get_idu()); - dest_nnz_st += ndu; - - // [ 0 0 0 | 0 | 0 0 Sl^x 0 | 0 0 Zl 0 ] [dsxl] [ rsxl ] - Msys->copyDiagMatrixToSubblock_w_pattern(*iter_->sxl, n4st+ndl+ndu, n3st+ndl+ndu, dest_nnz_st, nxl, nlp_->get_ixl()); - dest_nnz_st += nxl; - Msys->copyDiagMatrixToSubblock_w_pattern(*iter_->zl, n4st+ndl+ndu, n4st+ndl+ndu, dest_nnz_st, nxl, nlp_->get_ixl()); - dest_nnz_st += nxl; - - // [ 0 0 0 | 0 | 0 0 0 Su^x | 0 0 0 Zu ] [dsxu] [ rsxu ] - Msys->copyDiagMatrixToSubblock_w_pattern(*iter_->sxu, - n4st+ndl+ndu+nxl, - n3st+ndl+ndu+nxl, - dest_nnz_st, - nxu, - nlp_->get_ixu()); - dest_nnz_st += nxu; - Msys->copyDiagMatrixToSubblock_w_pattern(*iter_->zu, - n4st+ndl+ndu+nxl, - n4st+ndl+ndu+nxl, - dest_nnz_st, - nxu, - nlp_->get_ixu()); - dest_nnz_st += nxu; - - //build the diagonal Hx = delta_wx - if(nullptr == Hx_) { - Hx_ = LinearAlgebraFactory::create_vector(nlp_->options->GetString("mem_space"), nx); - assert(Hx_); - } - Hx_->axpy(1., *delta_wx_); - Msys->copySubDiagonalFrom(0, nx, *Hx_, dest_nnz_st); dest_nnz_st += nx; + Jac_cSp_ = dynamic_cast(Jac_c_); + if(!Jac_cSp_) { + assert(false); + return false; + } - //build the diagonal Hd = delta_wd - if(nullptr == Hd_) { - Hd_ = LinearAlgebraFactory::create_vector(nlp_->options->GetString("mem_space"), nd); - assert(Hd_); - } + Jac_dSp_ = dynamic_cast(Jac_d_); + if(!Jac_dSp_) { + assert(false); + return false; + } + + size_type nx = HessSp_->n(); + size_type nd = Jac_dSp_->m(); + size_type neq = Jac_cSp_->m(); + size_type nineq = Jac_dSp_->m(); + size_type ndl = nlp_->m_ineq_low(); + size_type ndu = nlp_->m_ineq_upp(); + size_type nxl = nlp_->n_low(); + size_type nxu = nlp_->n_upp(); + + // note that hess may be saved as a triangular matrix + int n2st = nx + neq + nineq; + int n3st = n2st + nd; + int n4st = n3st + ndl + ndu + nxl + nxu; // shortcut for each subbloock + int n = n4st + ndl + ndu + nxl + nxu; + int n_reg = n3st; + + int required_num_neg_eig = neq + nineq; + int nnz = HessSp_->numberOfNonzeros() + HessSp_->numberOfOffDiagNonzeros() + 2 * Jac_cSp_->numberOfNonzeros() + + 2 * Jac_dSp_->numberOfNonzeros() + 2 * (nd + ndl + ndu + nxl + nxu + ndl + ndu + nxl + nxu) + ndl + ndu + nxl + + nxu + n_reg; + + linSys_ = determineAndCreateLinsys(n, required_num_neg_eig, nnz); + + auto* linSys = dynamic_cast(linSys_); + assert(linSys); + + auto* Msys = dynamic_cast(linSys->sys_matrix()); + assert(Msys); + if(perf_report_) { + nlp_->log->printf(hovSummary, "KKT_SPARSE_FULL linsys: Low-level linear system size: %d\n", Msys->n()); + } - Hd_->axpy(1., *delta_wd_); - Msys->copySubDiagonalFrom(n2st, nd, *Hd_, dest_nnz_st); - dest_nnz_st += nd; + // update linSys system matrix, including IC perturbations + { + nlp_->runStats.kkt.tmUpdateLinsys.start(); + + Msys->setToZero(); + + // copy Jac and Hes to the full iterate matrix, use Dx_ and Dd_ as temp vector + size_type dest_nnz_st{0}; + + // H is triangular + // [ H Jc^T Jd^T | 0 | 0 0 -I I | 0 0 0 0 ] [ dx] [ rx ] + Msys->copySubmatrixFrom(*HessSp_, 0, 0, dest_nnz_st, true); + dest_nnz_st += HessSp_->numberOfOffDiagNonzeros(); + Msys->copySubmatrixFromTrans(*HessSp_, 0, 0, dest_nnz_st); + dest_nnz_st += HessSp_->numberOfNonzeros(); + + Msys->copySubmatrixFromTrans(*Jac_cSp_, 0, nx, dest_nnz_st); + dest_nnz_st += Jac_cSp_->numberOfNonzeros(); + Msys->copySubmatrixFromTrans(*Jac_dSp_, 0, nx + neq, dest_nnz_st); + dest_nnz_st += Jac_dSp_->numberOfNonzeros(); + Msys->setSubmatrixToConstantDiag_w_colpattern(-1., 0, n3st + ndl + ndu, dest_nnz_st, nxl, nlp_->get_ixl()); + dest_nnz_st += nxl; + Msys->setSubmatrixToConstantDiag_w_colpattern(1., 0, n3st + ndl + ndu + nxl, dest_nnz_st, nxu, nlp_->get_ixu()); + dest_nnz_st += nxu; + + // [ Jc 0 0 | 0 | 0 0 0 0 | 0 0 0 0 ] [ dyc] = [ ryc ] + Msys->copySubmatrixFrom(*Jac_cSp_, nx, 0, dest_nnz_st); + dest_nnz_st += Jac_cSp_->numberOfNonzeros(); + + // [ Jd 0 0 |-I | 0 0 0 0 | 0 0 0 0 ] [ dyd] [ ryd ] + Msys->copySubmatrixFrom(*Jac_dSp_, nx + neq, 0, dest_nnz_st); + dest_nnz_st += Jac_dSp_->numberOfNonzeros(); + Msys->copyDiagMatrixToSubblock(-1., nx + neq, n2st, dest_nnz_st, nd); + dest_nnz_st += nd; + + // [ 0 0 -I | 0 | -I I 0 0 | 0 0 0 0 ] [ dd] [ rd ] + Msys->copyDiagMatrixToSubblock(-1., n2st, nx + neq, dest_nnz_st, nd); + dest_nnz_st += nd; + Msys->setSubmatrixToConstantDiag_w_colpattern(-1., n2st, n3st, dest_nnz_st, ndl, nlp_->get_idl()); + dest_nnz_st += ndl; + Msys->setSubmatrixToConstantDiag_w_colpattern(1., n2st, n3st + ndl, dest_nnz_st, ndu, nlp_->get_idu()); + dest_nnz_st += ndu; + + // part3 + // [ 0 0 0 |-I | 0 0 0 0 | I 0 0 0 ] [ dvl] [ rvl ] + Msys->setSubmatrixToConstantDiag_w_rowpattern(-1., n3st, n2st, dest_nnz_st, ndl, nlp_->get_idl()); + dest_nnz_st += ndl; + Msys->copyDiagMatrixToSubblock(1., n3st, n4st, dest_nnz_st, ndl); + dest_nnz_st += ndl; + + // [ 0 0 0 | I | 0 0 0 0 | 0 I 0 0 ] [ dvu] [ rvu ] + Msys->setSubmatrixToConstantDiag_w_rowpattern(1., n3st + ndl, n2st, dest_nnz_st, ndu, nlp_->get_idu()); + dest_nnz_st += ndu; + Msys->copyDiagMatrixToSubblock(1., n3st + ndl, n4st + ndl, dest_nnz_st, ndu); + dest_nnz_st += ndu; + + // [ -I 0 0 | 0 | 0 0 0 0 | 0 0 I 0 ] [ dzl] [ rzl ] + Msys->setSubmatrixToConstantDiag_w_rowpattern(-1., n3st + ndl + ndu, 0, dest_nnz_st, nxl, nlp_->get_ixl()); + dest_nnz_st += nxl; + Msys->copyDiagMatrixToSubblock(1., n3st + ndl + ndu, n4st + ndl + ndu, dest_nnz_st, nxl); + dest_nnz_st += nxl; + + // [ I 0 0 | 0 | 0 0 0 0 | 0 0 0 I ] [ dzu] [ rzu ] + Msys->setSubmatrixToConstantDiag_w_rowpattern(1., n3st + ndl + ndu + nxl, 0, dest_nnz_st, nxu, nlp_->get_ixu()); + dest_nnz_st += nxu; + Msys->copyDiagMatrixToSubblock(1., n3st + ndl + ndu + nxl, n4st + ndl + ndu + nxl, dest_nnz_st, nxu); + dest_nnz_st += nxu; + + // part 4 + // [ 0 0 0 | 0 | Sl^d 0 0 0 | Vl 0 0 0 ] [dsdl] [ rsdl ] + Msys->copyDiagMatrixToSubblock_w_pattern(*iter_->sdl, n4st, n3st, dest_nnz_st, ndl, nlp_->get_idl()); + dest_nnz_st += ndl; + Msys->copyDiagMatrixToSubblock_w_pattern(*iter_->vl, n4st, n4st, dest_nnz_st, ndl, nlp_->get_idl()); + dest_nnz_st += ndl; + + // [ 0 0 0 | 0 | 0 Su^d 0 0 | 0 Vu 0 0 ] [dsdu] [ rsdu ] + Msys->copyDiagMatrixToSubblock_w_pattern(*iter_->sdu, n4st + ndl, n3st + ndl, dest_nnz_st, ndu, nlp_->get_idu()); + dest_nnz_st += ndu; + Msys->copyDiagMatrixToSubblock_w_pattern(*iter_->vu, n4st + ndl, n4st + ndl, dest_nnz_st, ndu, nlp_->get_idu()); + dest_nnz_st += ndu; + + // [ 0 0 0 | 0 | 0 0 Sl^x 0 | 0 0 Zl 0 ] [dsxl] [ rsxl ] + Msys->copyDiagMatrixToSubblock_w_pattern(*iter_->sxl, + n4st + ndl + ndu, + n3st + ndl + ndu, + dest_nnz_st, + nxl, + nlp_->get_ixl()); + dest_nnz_st += nxl; + Msys->copyDiagMatrixToSubblock_w_pattern(*iter_->zl, + n4st + ndl + ndu, + n4st + ndl + ndu, + dest_nnz_st, + nxl, + nlp_->get_ixl()); + dest_nnz_st += nxl; + + // [ 0 0 0 | 0 | 0 0 0 Su^x | 0 0 0 Zu ] [dsxu] [ rsxu ] + Msys->copyDiagMatrixToSubblock_w_pattern(*iter_->sxu, + n4st + ndl + ndu + nxl, + n3st + ndl + ndu + nxl, + dest_nnz_st, + nxu, + nlp_->get_ixu()); + dest_nnz_st += nxu; + Msys->copyDiagMatrixToSubblock_w_pattern(*iter_->zu, + n4st + ndl + ndu + nxl, + n4st + ndl + ndu + nxl, + dest_nnz_st, + nxu, + nlp_->get_ixu()); + dest_nnz_st += nxu; + + // build the diagonal Hx = delta_wx + if(nullptr == Hx_) { + Hx_ = LinearAlgebraFactory::create_vector(nlp_->options->GetString("mem_space"), nx); + assert(Hx_); + } + Hx_->axpy(1., *delta_wx_); + Msys->copySubDiagonalFrom(0, nx, *Hx_, dest_nnz_st); + dest_nnz_st += nx; + + // build the diagonal Hd = delta_wd + if(nullptr == Hd_) { + Hd_ = LinearAlgebraFactory::create_vector(nlp_->options->GetString("mem_space"), nd); + assert(Hd_); + } - //add -delta_cc to diagonal block linSys starting at (nx, nx) - Msys->copySubDiagonalFrom(nx, neq, *delta_cc_, dest_nnz_st, -1.); - dest_nnz_st += neq; + Hd_->axpy(1., *delta_wd_); + Msys->copySubDiagonalFrom(n2st, nd, *Hd_, dest_nnz_st); + dest_nnz_st += nd; - //add -delta_cd to diagonal block linSys starting at (nx+neq, nx+neq) - Msys->copySubDiagonalFrom(nx+neq, nineq, *delta_cd_, dest_nnz_st, -1.); - dest_nnz_st += nineq; + // add -delta_cc to diagonal block linSys starting at (nx, nx) + Msys->copySubDiagonalFrom(nx, neq, *delta_cc_, dest_nnz_st, -1.); + dest_nnz_st += neq; - assert(dest_nnz_st==nnz); - nlp_->log->write("KKT_SPARSE_FULL linsys:", *Msys, hovMatrices); - nlp_->runStats.kkt.tmUpdateLinsys.stop(); - } + // add -delta_cd to diagonal block linSys starting at (nx+neq, nx+neq) + Msys->copySubDiagonalFrom(nx + neq, nineq, *delta_cd_, dest_nnz_st, -1.); + dest_nnz_st += nineq; - //write matrix to file if requested - if(nlp_->options->GetString("write_kkt") == "yes") { - write_linsys_counter_++; - } - if(write_linsys_counter_>=0) { + assert(dest_nnz_st == nnz); + nlp_->log->write("KKT_SPARSE_FULL linsys:", *Msys, hovMatrices); + nlp_->runStats.kkt.tmUpdateLinsys.stop(); + } + + // write matrix to file if requested + if(nlp_->options->GetString("write_kkt") == "yes") { + write_linsys_counter_++; + } + if(write_linsys_counter_ >= 0) { #ifndef HIOP_USE_GPU - auto* MsysSp = dynamic_cast(linSys->sys_matrix()); - csr_writer_.writeMatToFile(*MsysSp, write_linsys_counter_, nx, neq, nineq); + auto* MsysSp = dynamic_cast(linSys->sys_matrix()); + csr_writer_.writeMatToFile(*MsysSp, write_linsys_counter_, nx, neq, nineq); #else - //TODO csr_writer_.writeMatToFile(*Msys, write_linsys_counter_, nx, neq, nineq); + // TODO csr_writer_.writeMatToFile(*Msys, write_linsys_counter_, nx, neq, nineq); #endif - } - - return true; } + return true; +} + +bool hiopKKTLinSysSparseFull::solve(hiopVector& rx, + hiopVector& ryc, + hiopVector& ryd, + hiopVector& rd, + hiopVector& rvl, + hiopVector& rvu, + hiopVector& rzl, + hiopVector& rzu, + hiopVector& rsdl, + hiopVector& rsdu, + hiopVector& rsxl, + hiopVector& rsxu, + hiopVector& dx, + hiopVector& dyc, + hiopVector& dyd, + hiopVector& dd, + hiopVector& dvl, + hiopVector& dvu, + hiopVector& dzl, + hiopVector& dzu, + hiopVector& dsdl, + hiopVector& dsdu, + hiopVector& dsxl, + hiopVector& dsxu) +{ + if(!nlpSp_) { + assert(false); + return false; + } + if(!HessSp_) { + assert(false); + return false; + } + if(!Jac_cSp_) { + assert(false); + return false; + } + if(!Jac_dSp_) { + assert(false); + return false; + } - bool hiopKKTLinSysSparseFull::solve( hiopVector& rx, hiopVector& ryc, hiopVector& ryd, hiopVector& rd, - hiopVector& rvl, hiopVector& rvu, hiopVector& rzl, hiopVector& rzu, - hiopVector& rsdl, hiopVector& rsdu, hiopVector& rsxl, hiopVector& rsxu, - hiopVector& dx, hiopVector& dyc, hiopVector& dyd, hiopVector& dd, - hiopVector& dvl, hiopVector& dvu, hiopVector& dzl, hiopVector& dzu, - hiopVector& dsdl, hiopVector& dsdu, hiopVector& dsxl, hiopVector& dsxu) - { - if(!nlpSp_) { assert(false); return false; } - if(!HessSp_) { assert(false); return false; } - if(!Jac_cSp_) { assert(false); return false; } - if(!Jac_dSp_) { assert(false); return false; } - - nlp_->runStats.kkt.tmSolveRhsManip.start(); - - size_type nx=rx.get_size(), nd=rd.get_size(), neq=ryc.get_size(), nineq=ryd.get_size(), - ndl = nlp_->m_ineq_low(), ndu = nlp_->m_ineq_upp(), nxl = nlp_->n_low(), nxu = nlp_->n_upp(); - size_type nxsp=Hx_->get_size(); - assert(nxsp==nx); - int n = nx + neq + nineq + nd + ndl + ndu + nxl + nxu + ndl + ndu + nxl + nxu; + nlp_->runStats.kkt.tmSolveRhsManip.start(); - if(rhs_ == nullptr) { - rhs_ = LinearAlgebraFactory::create_vector(nlp_->options->GetString("mem_space"), n); - } + size_type nx = rx.get_size(), nd = rd.get_size(), neq = ryc.get_size(), nineq = ryd.get_size(), ndl = nlp_->m_ineq_low(), + ndu = nlp_->m_ineq_upp(), nxl = nlp_->n_low(), nxu = nlp_->n_upp(); + size_type nxsp = Hx_->get_size(); + assert(nxsp == nx); + int n = nx + neq + nineq + nd + ndl + ndu + nxl + nxu + ndl + ndu + nxl + nxu; - {//write to log - nlp_->log->write("RHS KKT_SPARSE_FULL rx: ", rx, hovIteration); - nlp_->log->write("RHS KKT_SPARSE_FULL ryc:", ryc, hovIteration); - nlp_->log->write("RHS KKT_SPARSE_FULL ryd:", ryd, hovIteration); - nlp_->log->write("RHS KKT_SPARSE_FULL rd: ", rd, hovIteration); - nlp_->log->write("RHS KKT_SPARSE_FULL rvl: ", rvl, hovIteration); - nlp_->log->write("RHS KKT_SPARSE_FULL rvu: ", rvu, hovIteration); - nlp_->log->write("RHS KKT_SPARSE_FULL rzl: ", rzl, hovIteration); - nlp_->log->write("RHS KKT_SPARSE_FULL rzu: ", rzu, hovIteration); - nlp_->log->write("RHS KKT_SPARSE_FULL rsdl: ", rsdl, hovIteration); - nlp_->log->write("RHS KKT_SPARSE_FULL rsdu: ", rsdu, hovIteration); - nlp_->log->write("RHS KKT_SPARSE_FULL rsxl: ", rsxl, hovIteration); - nlp_->log->write("RHS KKT_SPARSE_FULL rsxu: ", rsxu, hovIteration); - } - - // form the rhs for the sparse linSys - rx.copyToStarting(*rhs_, 0); - ryc.copyToStarting(*rhs_, nx); - ryd.copyToStarting(*rhs_, nx+neq); - rd.copyToStarting(*rhs_, nx + neq + nineq); - rvl.copyToStartingAt_w_pattern(*rhs_, nx + neq + nineq + nd, nlp_->get_idl()); - rvu.copyToStartingAt_w_pattern(*rhs_, nx + neq + nineq + nd + ndl, nlp_->get_idu()); - rzl.copyToStartingAt_w_pattern(*rhs_, nx + neq + nineq + nd + ndl + ndu, nlp_->get_ixl()); - rzu.copyToStartingAt_w_pattern(*rhs_, nx + neq + nineq + nd + ndl + ndu + nxl, nlp_->get_ixu()); - rsdl.copyToStartingAt_w_pattern(*rhs_, nx + neq + nineq + nd + ndl + ndu + nxl + nxu, nlp_->get_idl()); - rsdu.copyToStartingAt_w_pattern(*rhs_, nx + neq + nineq + nd + ndl + ndu + nxl + nxu + ndl, nlp_->get_idu()); - rsxl.copyToStartingAt_w_pattern(*rhs_, nx + neq + nineq + nd + ndl + ndu + nxl + nxu + ndl + ndu, nlp_->get_ixl()); - rsxu.copyToStartingAt_w_pattern(*rhs_, nx + neq + nineq + nd + ndl + ndu + nxl + nxu + ndl + ndu + nxl, nlp_->get_ixu()); - - if(write_linsys_counter_>=0) - csr_writer_.writeRhsToFile(*rhs_, write_linsys_counter_); - - nlp_->runStats.kkt.tmSolveRhsManip.stop(); - - nlp_->runStats.kkt.tmSolveInner.start(); - - // solve - bool linsol_ok = linSys_->solve(*rhs_); - nlp_->runStats.kkt.tmSolveInner.stop(); - - if(perf_report_) { - nlp_->log->printf(hovSummary, "(summary for linear solver from KKT_SPARSE_XDYcYd)\n%s", - nlp_->runStats.linsolv.get_summary_last_solve().c_str()); - } + if(rhs_ == nullptr) { + rhs_ = LinearAlgebraFactory::create_vector(nlp_->options->GetString("mem_space"), n); + } - if(write_linsys_counter_>=0) - csr_writer_.writeSolToFile(*rhs_, write_linsys_counter_); - - if(false==linsol_ok) return false; - - nlp_->runStats.kkt.tmSolveRhsManip.start(); - - // unpack - rhs_->startingAtCopyToStartingAt(0, dx, 0); - rhs_->startingAtCopyToStartingAt(nx, dyc, 0); - rhs_->startingAtCopyToStartingAt(nx+neq, dyd, 0); - rhs_->startingAtCopyToStartingAt(nx+neq+nineq, dd, 0); - rhs_->startingAtCopyToStartingAt_w_pattern(nx+neq+nineq+nd, dvl, 0, nlp_->get_idl() ); - rhs_->startingAtCopyToStartingAt_w_pattern(nx+neq+nineq+nd+ndl, dvu, 0, nlp_->get_idu()); - rhs_->startingAtCopyToStartingAt_w_pattern(nx+neq+nineq+nd+ndl+ndu, dzl, 0, nlp_->get_ixl()); - rhs_->startingAtCopyToStartingAt_w_pattern(nx+neq+nineq+nd+ndl+ndu+nxl, dzu, 0, nlp_->get_ixu()); - rhs_->startingAtCopyToStartingAt_w_pattern(nx+neq+nineq+nd+ndl+ndu+nxl+nxu, dsdl, 0, nlp_->get_idl()); - rhs_->startingAtCopyToStartingAt_w_pattern(nx+neq+nineq+nd+ndl+ndu+nxl+nxu+ndl, dsdu, 0, nlp_->get_idu()); - rhs_->startingAtCopyToStartingAt_w_pattern(nx+neq+nineq+nd+ndl+ndu+nxl+nxu+ndl+ndu, dsxl, 0, nlp_->get_ixl()); - rhs_->startingAtCopyToStartingAt_w_pattern(nx+neq+nineq+nd+ndl+ndu+nxl+nxu+ndl+ndu+nxl, dsxu, 0, nlp_->get_ixu()); - - {//write to log - nlp_->log->write("RHS KKT_SPARSE_FULL dx: ", dx, hovIteration); - nlp_->log->write("RHS KKT_SPARSE_FULL dyc:", dyc, hovIteration); - nlp_->log->write("RHS KKT_SPARSE_FULL dyd:", dyd, hovIteration); - nlp_->log->write("RHS KKT_SPARSE_FULL dd: ", dd, hovIteration); - nlp_->log->write("RHS KKT_SPARSE_FULL dvl: ", dvl, hovIteration); - nlp_->log->write("RHS KKT_SPARSE_FULL dvu: ", dvu, hovIteration); - nlp_->log->write("RHS KKT_SPARSE_FULL dzl: ", dzl, hovIteration); - nlp_->log->write("RHS KKT_SPARSE_FULL dzu: ", dzu, hovIteration); - nlp_->log->write("RHS KKT_SPARSE_FULL dsdl: ", dsdl, hovIteration); - nlp_->log->write("RHS KKT_SPARSE_FULL dsdu: ", dsdu, hovIteration); - nlp_->log->write("RHS KKT_SPARSE_FULL dsxl: ", dsxl, hovIteration); - nlp_->log->write("RHS KKT_SPARSE_FULL dsxu: ", dsxu, hovIteration); - } + { // write to log + nlp_->log->write("RHS KKT_SPARSE_FULL rx: ", rx, hovIteration); + nlp_->log->write("RHS KKT_SPARSE_FULL ryc:", ryc, hovIteration); + nlp_->log->write("RHS KKT_SPARSE_FULL ryd:", ryd, hovIteration); + nlp_->log->write("RHS KKT_SPARSE_FULL rd: ", rd, hovIteration); + nlp_->log->write("RHS KKT_SPARSE_FULL rvl: ", rvl, hovIteration); + nlp_->log->write("RHS KKT_SPARSE_FULL rvu: ", rvu, hovIteration); + nlp_->log->write("RHS KKT_SPARSE_FULL rzl: ", rzl, hovIteration); + nlp_->log->write("RHS KKT_SPARSE_FULL rzu: ", rzu, hovIteration); + nlp_->log->write("RHS KKT_SPARSE_FULL rsdl: ", rsdl, hovIteration); + nlp_->log->write("RHS KKT_SPARSE_FULL rsdu: ", rsdu, hovIteration); + nlp_->log->write("RHS KKT_SPARSE_FULL rsxl: ", rsxl, hovIteration); + nlp_->log->write("RHS KKT_SPARSE_FULL rsxu: ", rsxu, hovIteration); + } - nlp_->runStats.kkt.tmSolveRhsManip.stop(); - return true; + // form the rhs for the sparse linSys + rx.copyToStarting(*rhs_, 0); + ryc.copyToStarting(*rhs_, nx); + ryd.copyToStarting(*rhs_, nx + neq); + rd.copyToStarting(*rhs_, nx + neq + nineq); + rvl.copyToStartingAt_w_pattern(*rhs_, nx + neq + nineq + nd, nlp_->get_idl()); + rvu.copyToStartingAt_w_pattern(*rhs_, nx + neq + nineq + nd + ndl, nlp_->get_idu()); + rzl.copyToStartingAt_w_pattern(*rhs_, nx + neq + nineq + nd + ndl + ndu, nlp_->get_ixl()); + rzu.copyToStartingAt_w_pattern(*rhs_, nx + neq + nineq + nd + ndl + ndu + nxl, nlp_->get_ixu()); + rsdl.copyToStartingAt_w_pattern(*rhs_, nx + neq + nineq + nd + ndl + ndu + nxl + nxu, nlp_->get_idl()); + rsdu.copyToStartingAt_w_pattern(*rhs_, nx + neq + nineq + nd + ndl + ndu + nxl + nxu + ndl, nlp_->get_idu()); + rsxl.copyToStartingAt_w_pattern(*rhs_, nx + neq + nineq + nd + ndl + ndu + nxl + nxu + ndl + ndu, nlp_->get_ixl()); + rsxu.copyToStartingAt_w_pattern(*rhs_, nx + neq + nineq + nd + ndl + ndu + nxl + nxu + ndl + ndu + nxl, nlp_->get_ixu()); + + if(write_linsys_counter_ >= 0) csr_writer_.writeRhsToFile(*rhs_, write_linsys_counter_); + + nlp_->runStats.kkt.tmSolveRhsManip.stop(); + + nlp_->runStats.kkt.tmSolveInner.start(); + + // solve + bool linsol_ok = linSys_->solve(*rhs_); + nlp_->runStats.kkt.tmSolveInner.stop(); + + if(perf_report_) { + nlp_->log->printf(hovSummary, + "(summary for linear solver from KKT_SPARSE_XDYcYd)\n%s", + nlp_->runStats.linsolv.get_summary_last_solve().c_str()); } + if(write_linsys_counter_ >= 0) csr_writer_.writeSolToFile(*rhs_, write_linsys_counter_); + + if(false == linsol_ok) return false; + + nlp_->runStats.kkt.tmSolveRhsManip.start(); + + // unpack + rhs_->startingAtCopyToStartingAt(0, dx, 0); + rhs_->startingAtCopyToStartingAt(nx, dyc, 0); + rhs_->startingAtCopyToStartingAt(nx + neq, dyd, 0); + rhs_->startingAtCopyToStartingAt(nx + neq + nineq, dd, 0); + rhs_->startingAtCopyToStartingAt_w_pattern(nx + neq + nineq + nd, dvl, 0, nlp_->get_idl()); + rhs_->startingAtCopyToStartingAt_w_pattern(nx + neq + nineq + nd + ndl, dvu, 0, nlp_->get_idu()); + rhs_->startingAtCopyToStartingAt_w_pattern(nx + neq + nineq + nd + ndl + ndu, dzl, 0, nlp_->get_ixl()); + rhs_->startingAtCopyToStartingAt_w_pattern(nx + neq + nineq + nd + ndl + ndu + nxl, dzu, 0, nlp_->get_ixu()); + rhs_->startingAtCopyToStartingAt_w_pattern(nx + neq + nineq + nd + ndl + ndu + nxl + nxu, dsdl, 0, nlp_->get_idl()); + rhs_->startingAtCopyToStartingAt_w_pattern(nx + neq + nineq + nd + ndl + ndu + nxl + nxu + ndl, dsdu, 0, nlp_->get_idu()); + rhs_->startingAtCopyToStartingAt_w_pattern(nx + neq + nineq + nd + ndl + ndu + nxl + nxu + ndl + ndu, + dsxl, + 0, + nlp_->get_ixl()); + rhs_->startingAtCopyToStartingAt_w_pattern(nx + neq + nineq + nd + ndl + ndu + nxl + nxu + ndl + ndu + nxl, + dsxu, + 0, + nlp_->get_ixu()); + + { // write to log + nlp_->log->write("RHS KKT_SPARSE_FULL dx: ", dx, hovIteration); + nlp_->log->write("RHS KKT_SPARSE_FULL dyc:", dyc, hovIteration); + nlp_->log->write("RHS KKT_SPARSE_FULL dyd:", dyd, hovIteration); + nlp_->log->write("RHS KKT_SPARSE_FULL dd: ", dd, hovIteration); + nlp_->log->write("RHS KKT_SPARSE_FULL dvl: ", dvl, hovIteration); + nlp_->log->write("RHS KKT_SPARSE_FULL dvu: ", dvu, hovIteration); + nlp_->log->write("RHS KKT_SPARSE_FULL dzl: ", dzl, hovIteration); + nlp_->log->write("RHS KKT_SPARSE_FULL dzu: ", dzu, hovIteration); + nlp_->log->write("RHS KKT_SPARSE_FULL dsdl: ", dsdl, hovIteration); + nlp_->log->write("RHS KKT_SPARSE_FULL dsdu: ", dsdu, hovIteration); + nlp_->log->write("RHS KKT_SPARSE_FULL dsxl: ", dsxl, hovIteration); + nlp_->log->write("RHS KKT_SPARSE_FULL dsxu: ", dsxu, hovIteration); + } + nlp_->runStats.kkt.tmSolveRhsManip.stop(); + return true; +} -} // end of namespace +} // namespace hiop diff --git a/src/Optimization/hiopKKTLinSysSparse.hpp b/src/Optimization/hiopKKTLinSysSparse.hpp index 20d97e861..1b34cd21f 100644 --- a/src/Optimization/hiopKKTLinSysSparse.hpp +++ b/src/Optimization/hiopKKTLinSysSparse.hpp @@ -79,26 +79,30 @@ class hiopKKTLinSysCompressedSparseXYcYd : public hiopKKTLinSysCompressedXYcYd virtual bool build_kkt_matrix(const hiopPDPerturbation& pdreg); - virtual bool solveCompressed(hiopVector& rx, hiopVector& ryc, hiopVector& ryd, - hiopVector& dx, hiopVector& dyc, hiopVector& dyd); + virtual bool solveCompressed(hiopVector& rx, + hiopVector& ryc, + hiopVector& ryd, + hiopVector& dx, + hiopVector& dyc, + hiopVector& dyd); protected: - hiopVector *rhs_; //[rx_tilde, ryc_tilde, ryd_tilde] + hiopVector* rhs_; //[rx_tilde, ryc_tilde, ryd_tilde] // - //from the parent class we also use + // from the parent class we also use // // hiopVectorPar *Dd_inv; // hiopVectorPar *ryd_tilde; - //from the parent's parent class (hiopKKTLinSysCompressed) we also use - // hiopVectorPar *Dx; - // hiopVectorPar *rx_tilde; + // from the parent's parent class (hiopKKTLinSysCompressed) we also use + // hiopVectorPar *Dx; + // hiopVectorPar *rx_tilde; // Keeps Hx = HessSp_->sp_mat() + Dxs (Dx=log-barrier diagonal for x) - hiopVector *Hx_; + hiopVector* Hx_; - //just dynamic_cast-ed pointers + // just dynamic_cast-ed pointers hiopNlpSparse* nlpSp_; hiopMatrixSparse* HessSp_; const hiopMatrixSparse* Jac_cSp_; @@ -110,11 +114,10 @@ class hiopKKTLinSysCompressedSparseXYcYd : public hiopKKTLinSysCompressedXYcYd hiopCSR_IO csr_writer_; private: - //placeholder for the code that decides which linear solver to used based on safe_mode_ + // placeholder for the code that decides which linear solver to used based on safe_mode_ hiopLinSolverSymSparse* determineAndCreateLinsys(int nxd, int neq, int nineq, int nnz); }; - /* * Solves KKTLinSysCompressedXDYcYd by exploiting the sparse structure * @@ -138,27 +141,33 @@ class hiopKKTLinSysCompressedSparseXDYcYd : public hiopKKTLinSysCompressedXDYcYd virtual bool build_kkt_matrix(const hiopPDPerturbation& pdreg); - virtual bool solveCompressed(hiopVector& rx, hiopVector& rd, hiopVector& ryc, hiopVector& ryd, - hiopVector& dx, hiopVector& dd, hiopVector& dyc, hiopVector& dyd); + virtual bool solveCompressed(hiopVector& rx, + hiopVector& rd, + hiopVector& ryc, + hiopVector& ryd, + hiopVector& dx, + hiopVector& dd, + hiopVector& dyc, + hiopVector& dyd); protected: - hiopVector *rhs_; //[rx_tilde, rd_tilde, ryc, ryd] + hiopVector* rhs_; //[rx_tilde, rd_tilde, ryc, ryd] // - //from the parent class we also use + // from the parent class we also use // // hiopVectorPar *Dd; // hiopVectorPar *ryd_tilde; - //from the parent's parent class (hiopKKTLinSysCompressed) we also use - // hiopVectorPar *Dx; - // hiopVectorPar *rx_tilde; + // from the parent's parent class (hiopKKTLinSysCompressed) we also use + // hiopVectorPar *Dx; + // hiopVectorPar *rx_tilde; // Keeps Hx = Dx (Dx=log-barrier diagonal for x) + regularization // Keeps Hd = Dd (Dd=log-barrier diagonal for slack variable) + regularization hiopVector *Hx_, *Hd_; - //just dynamic_cast-ed pointers + // just dynamic_cast-ed pointers hiopNlpSparse* nlpSp_; hiopMatrixSparse* HessSp_; const hiopMatrixSparse* Jac_cSp_; @@ -170,11 +179,10 @@ class hiopKKTLinSysCompressedSparseXDYcYd : public hiopKKTLinSysCompressedXDYcYd hiopCSR_IO csr_writer_; private: - //placeholder for the code that decides which linear solver to used based on safe_mode_ + // placeholder for the code that decides which linear solver to used based on safe_mode_ hiopLinSolverSymSparse* determineAndCreateLinsys(int nxd, int neq, int nineq, int nnz); }; - /* * Solves KKTLinSysCompressedXYcYd by exploiting the sparse structure * @@ -208,19 +216,37 @@ class hiopKKTLinSysSparseFull : public hiopKKTLinSysFull virtual bool build_kkt_matrix(const hiopPDPerturbation& pdreg); - bool solve(hiopVector& rx, hiopVector& ryc, hiopVector& ryd, hiopVector& rd, - hiopVector& rvl, hiopVector& rvu, hiopVector& rzl, hiopVector& rzu, - hiopVector& rsdl, hiopVector& rsdu, hiopVector& rsxl, hiopVector& rsxu, - hiopVector& dx, hiopVector& dyc, hiopVector& dyd, hiopVector& dd, - hiopVector& dvl, hiopVector& dvu, hiopVector& dzl, hiopVector& dzu, - hiopVector& dsdl, hiopVector& dsdu, hiopVector& dsxl, hiopVector& dsxu); + bool solve(hiopVector& rx, + hiopVector& ryc, + hiopVector& ryd, + hiopVector& rd, + hiopVector& rvl, + hiopVector& rvu, + hiopVector& rzl, + hiopVector& rzu, + hiopVector& rsdl, + hiopVector& rsdu, + hiopVector& rsxl, + hiopVector& rsxu, + hiopVector& dx, + hiopVector& dyc, + hiopVector& dyd, + hiopVector& dd, + hiopVector& dvl, + hiopVector& dvu, + hiopVector& dzl, + hiopVector& dzu, + hiopVector& dsdl, + hiopVector& dsdu, + hiopVector& dsxl, + hiopVector& dsxu); protected: - hiopVector *rhs_; + hiopVector* rhs_; hiopVector *Hx_, *Hd_; - //just dynamic_cast-ed pointers + // just dynamic_cast-ed pointers hiopNlpSparse* nlpSp_; hiopMatrixSparse* HessSp_; const hiopMatrixSparse* Jac_cSp_; @@ -232,10 +258,10 @@ class hiopKKTLinSysSparseFull : public hiopKKTLinSysFull hiopCSR_IO csr_writer_; private: - //placeholder for the code that decides which linear solver to used based on safe_mode_ - hiopLinSolverNonSymSparse* determineAndCreateLinsys(const int &n, const int &n_con, const int &nnz); + // placeholder for the code that decides which linear solver to used based on safe_mode_ + hiopLinSolverNonSymSparse* determineAndCreateLinsys(const int& n, const int& n_con, const int& nnz); }; -} // end of namespace +} // namespace hiop #endif diff --git a/src/Optimization/hiopKKTLinSysSparseCondensed.cpp b/src/Optimization/hiopKKTLinSysSparseCondensed.cpp index 67b1d26d4..2373126c4 100644 --- a/src/Optimization/hiopKKTLinSysSparseCondensed.cpp +++ b/src/Optimization/hiopKKTLinSysSparseCondensed.cpp @@ -61,29 +61,28 @@ #include "hiopLinSolverCholCuSparse.hpp" #include "hiopMatrixSparseCsrCuda.hpp" #include "hiopVectorCuda.hpp" -#endif // HIOP_USE_CUDA +#endif // HIOP_USE_CUDA #include "hiopMatrixSparseTripletStorage.hpp" #include "hiopMatrixSparseCSRSeq.hpp" namespace hiop -{ -hiopKKTLinSysCondensedSparse::hiopKKTLinSysCondensedSparse(hiopNlpFormulation* nlp) - : hiopKKTLinSysCompressedSparseXDYcYd(nlp), - JacD_(nullptr), - JacDt_(nullptr), - Hess_lower_csr_(nullptr), - Hess_upper_csr_(nullptr), - Hess_csr_(nullptr), - JtDiagJ_(nullptr), - M_condensed_(nullptr), - Hess_upper_plus_diag_(nullptr), - Diag_Dx_deltawx_(nullptr), - Dx_plus_deltawx_(nullptr), - deltawx_(nullptr), - Hd_copy_(nullptr) { -} +hiopKKTLinSysCondensedSparse::hiopKKTLinSysCondensedSparse(hiopNlpFormulation* nlp) + : hiopKKTLinSysCompressedSparseXDYcYd(nlp), + JacD_(nullptr), + JacDt_(nullptr), + Hess_lower_csr_(nullptr), + Hess_upper_csr_(nullptr), + Hess_csr_(nullptr), + JtDiagJ_(nullptr), + M_condensed_(nullptr), + Hess_upper_plus_diag_(nullptr), + Diag_Dx_deltawx_(nullptr), + Dx_plus_deltawx_(nullptr), + deltawx_(nullptr), + Hd_copy_(nullptr) +{} hiopKKTLinSysCondensedSparse::~hiopKKTLinSysCondensedSparse() { @@ -114,66 +113,65 @@ bool hiopKKTLinSysCondensedSparse::build_kkt_matrix(const hiopPDPerturbation& pd nlp_->runStats.kkt.tmUpdateInit.start(); hiopMatrixSymSparseTriplet* Hess_triplet = dynamic_cast(Hess_); - HessSp_ = Hess_triplet; //dynamic_cast(Hess_); - - Jac_cSp_ = nullptr; //not used by this class + HessSp_ = Hess_triplet; // dynamic_cast(Hess_); + + Jac_cSp_ = nullptr; // not used by this class const hiopMatrixSparseTriplet* Jac_triplet = dynamic_cast(Jac_d_); Jac_dSp_ = Jac_triplet; - + assert(HessSp_ && Jac_dSp_); - if(nullptr==Jac_dSp_ || nullptr==HessSp_) { + if(nullptr == Jac_dSp_ || nullptr == HessSp_) { nlp_->runStats.kkt.tmUpdateInit.stop(); - //incorrect linear algebra objects were provided to this class + // incorrect linear algebra objects were provided to this class return false; } - assert(0 == Jac_c_->m() && - "Detected NLP with equality constraints. Please use hiopNlpSparseIneq formulation"); - + assert(0 == Jac_c_->m() && "Detected NLP with equality constraints. Please use hiopNlpSparseIneq formulation"); + size_type nx = HessSp_->n(); size_type nineq = Jac_dSp_->m(); assert(nineq == Dd_->get_size()); assert(nx == Dx_->get_size()); // NOTE: - // hybrid compute mode -> linear algebra objects used internally by the class will be allocated on the device. Most of the inputs - // to this class will be however on HOST under hybrid mode, so some objects are copied/replicated/transfered to device - // gpu copute mode -> not yet supported - // cpu compute mode -> all objects on HOST, however, some objects will still be copied (e.g., Hd_) to ensure code homogeneity + // hybrid compute mode -> linear algebra objects used internally by the class will be allocated on the device. Most of the + // inputs to this class will be however on HOST under hybrid mode, so some objects are copied/replicated/transfered to + // device gpu copute mode -> not yet supported cpu compute mode -> all objects on HOST, however, some objects will still be + // copied (e.g., Hd_) to ensure code homogeneity // - // REMARK: The objects that are copied/replicated are temporary and will be removed later on as the remaining sparse KKT computations - // will be ported to device + // REMARK: The objects that are copied/replicated are temporary and will be removed later on as the remaining sparse KKT + // computations will be ported to device - //determine the "internal" memory space, see above note + // determine the "internal" memory space, see above note std::string mem_space_internal = determine_memory_space_internal(nlp_->options->GetString("compute_mode")); - - //allocate on the first call + + // allocate on the first call if(nullptr == Hd_) { - //HOST + // HOST Hd_ = LinearAlgebraFactory::create_vector(nlp_->options->GetString("mem_space"), nineq); assert(nullptr == Hd_copy_); - //temporary: make a copy of Hd on the "internal" mem_space + // temporary: make a copy of Hd on the "internal" mem_space Hd_copy_ = LinearAlgebraFactory::create_vector(mem_space_internal, nineq); - assert(nullptr == Dx_plus_deltawx_); //should be also not allocated - assert(nullptr == deltawx_); //should be also not allocated - //allocate this internal vector on the device if hybrid compute mode + assert(nullptr == Dx_plus_deltawx_); // should be also not allocated + assert(nullptr == deltawx_); // should be also not allocated + // allocate this internal vector on the device if hybrid compute mode Dx_plus_deltawx_ = LinearAlgebraFactory::create_vector(mem_space_internal, Dx_->get_size()); deltawx_ = LinearAlgebraFactory::create_vector(mem_space_internal, Dx_->get_size()); } - Hd_->copyFrom(*Dd_); + Hd_->copyFrom(*Dd_); Hd_->axpy(1., delta_wd_in); - //temporary code, see above note + // temporary code, see above note { if(mem_space_internal == "CUDA") { #ifdef HIOP_USE_CUDA auto Hd_cuda = dynamic_cast(Hd_copy_); - auto Hd_par = dynamic_cast(Hd_); + auto Hd_par = dynamic_cast(Hd_); assert(Hd_cuda && "incorrect type for vector class"); - assert(Hd_par && "incorrect type for vector class"); + assert(Hd_par && "incorrect type for vector class"); Hd_cuda->copy_from_vectorpar(*Hd_par); auto Dx_delta_cuda = dynamic_cast(Dx_plus_deltawx_); @@ -189,7 +187,7 @@ bool hiopKKTLinSysCondensedSparse::build_kkt_matrix(const hiopPDPerturbation& pd Hd_copy_->copyFrom(*Hd_); Dx_plus_deltawx_->copyFrom(*Dx_); deltawx_->copyFrom(delta_wx_in); -#endif +#endif } else { assert(dynamic_cast(Hd_) && "incorrect type for vector class"); Hd_copy_->copyFrom(*Hd_); @@ -200,54 +198,57 @@ bool hiopKKTLinSysCondensedSparse::build_kkt_matrix(const hiopPDPerturbation& pd // Dx_ + delta_wx*I Dx_plus_deltawx_->axpy(1.0, *deltawx_); - + nlp_->runStats.kkt.tmUpdateInit.stop(); nlp_->runStats.kkt.tmUpdateLinsys.start(); - + // // compute condensed linear system J'*D*J + H + Dx + delta_wx*I // hiopTimer t; - + // symbolic conversion from triplet to CSR if(nullptr == JacD_) { - t.reset(); t.start(); + t.reset(); + t.start(); JacD_ = LinearAlgebraFactory::create_matrix_sparse_csr(mem_space_internal); JacD_->form_from_symbolic(*Jac_triplet); assert(nullptr == JacDt_); JacDt_ = LinearAlgebraFactory::create_matrix_sparse_csr(mem_space_internal); JacDt_->form_transpose_from_symbolic(*JacD_); - //t.stop(); printf("JacD JacDt-symb from csr took %.5f\n", t.getElapsedTime()); + // t.stop(); printf("JacD JacDt-symb from csr took %.5f\n", t.getElapsedTime()); } // numeric conversion from triplet to CSR - t.reset(); t.start(); + t.reset(); + t.start(); JacD_->form_from_numeric(*Jac_triplet); JacDt_->form_transpose_from_numeric(*JacD_); - //t.stop(); printf("JacD JacDt-nume csr took %.5f\n", t.getElapsedTime()); + // t.stop(); printf("JacD JacDt-nume csr took %.5f\n", t.getElapsedTime()); - - //symbolic multiplication for JacD'*D*J + // symbolic multiplication for JacD'*D*J if(nullptr == JtDiagJ_) { - t.reset(); t.start(); - + t.reset(); + t.start(); + // D * J - //nothing to do symbolically since we just numerically scale columns of Jt by D - + // nothing to do symbolically since we just numerically scale columns of Jt by D + // Jt* (D*J) (D is not used since it does not change the sparsity pattern) JtDiagJ_ = JacDt_->times_mat_alloc(*JacD_); JacDt_->times_mat_symbolic(*JtDiagJ_, *JacD_); - //t.stop(); printf("J*D*J'-symb took %.5f\n", t.getElapsedTime()); + // t.stop(); printf("J*D*J'-symb took %.5f\n", t.getElapsedTime()); } - - //numeric multiplication for JacD'*D*J - t.reset(); t.start(); + + // numeric multiplication for JacD'*D*J + t.reset(); + t.start(); // Jt * D JacD_->scale_rows(*Hd_copy_); // (Jt*D) * J JacDt_->times_mat_numeric(0.0, *JtDiagJ_, 1.0, *JacD_); - //t.stop(); printf("J*D*J'-nume took %.5f\n", t.getElapsedTime()); + // t.stop(); printf("J*D*J'-nume took %.5f\n", t.getElapsedTime()); #ifdef HIOP_DEEPCHECKS JtDiagJ_->check_csr_is_ordered(); @@ -263,28 +264,28 @@ bool hiopKKTLinSysCondensedSparse::build_kkt_matrix(const hiopPDPerturbation& pd Hess_upper_csr_ = LinearAlgebraFactory::create_matrix_sparse_csr(mem_space_internal); Hess_upper_csr_->form_from_symbolic(*Hess_triplet); Hess_upper_csr_->form_from_numeric(*Hess_triplet); - + assert(nullptr == Hess_lower_csr_); Hess_lower_csr_ = LinearAlgebraFactory::create_matrix_sparse_csr(mem_space_internal); Hess_lower_csr_->form_transpose_from_symbolic(*Hess_upper_csr_); Hess_lower_csr_->form_transpose_from_numeric(*Hess_upper_csr_); - //zero out diagonal of the upper triangle to avoid adding it twice + // zero out diagonal of the upper triangle to avoid adding it twice Hess_upper_csr_->set_diagonal(0.0); - + assert(Hess_lower_csr_->numberOfNonzeros() == Hess_upper_csr_->numberOfNonzeros()); assert(nullptr == Diag_Dx_deltawx_); Diag_Dx_deltawx_ = LinearAlgebraFactory::create_matrix_sparse_csr(mem_space_internal); Diag_Dx_deltawx_->form_diag_from_symbolic(*Dx_plus_deltawx_); Diag_Dx_deltawx_->form_diag_from_numeric(*Dx_plus_deltawx_); - + // Hess_upper_plus_diag_ = Hess_upper_csr_ + Dx + delta_wx*I assert(nullptr == Hess_upper_plus_diag_); Hess_upper_plus_diag_ = Hess_upper_csr_->add_matrix_alloc(*Diag_Dx_deltawx_); Hess_upper_csr_->add_matrix_symbolic(*Hess_upper_plus_diag_, *Diag_Dx_deltawx_); Hess_upper_csr_->add_matrix_numeric(*Hess_upper_plus_diag_, 1.0, *Diag_Dx_deltawx_, 1.0); - + // form full Hess_csr_ = Hess_lower_csr_ + ( Hess_upper_csr_ + Dx + delta_wx*I ) assert(nullptr == Hess_csr_); Hess_csr_ = Hess_lower_csr_->add_matrix_alloc(*Hess_upper_plus_diag_); @@ -295,25 +296,26 @@ bool hiopKKTLinSysCondensedSparse::build_kkt_matrix(const hiopPDPerturbation& pd M_condensed_ = Hess_csr_->add_matrix_alloc(*JtDiagJ_); Hess_csr_->add_matrix_symbolic(*M_condensed_, *JtDiagJ_); Hess_csr_->add_matrix_numeric(*M_condensed_, 1.0, *JtDiagJ_, 1.0); - - //t.stop(); printf("ADD-symb took %.5f\n", t.getElapsedTime()); + + // t.stop(); printf("ADD-symb took %.5f\n", t.getElapsedTime()); } else { assert(linSys_); assert(M_condensed_); - //todo assert(M_condensed_ == linSys_->sys_matrix()); - - t.reset(); t.start(); + // todo assert(M_condensed_ == linSys_->sys_matrix()); + + t.reset(); + t.start(); // compute M_condensed_ = M_condensed_ + Hess_csr_ + JtDiagJ_ + Dx_ + delta_wx*I - //form lower and upper + // form lower and upper Hess_upper_csr_->form_from_numeric(*Hess_triplet); Hess_lower_csr_->form_transpose_from_numeric(*Hess_upper_csr_); - //zero out diagonal of the upper triangle to avoid adding it twice + // zero out diagonal of the upper triangle to avoid adding it twice Hess_upper_csr_->set_diagonal(0.0); Diag_Dx_deltawx_->form_diag_from_numeric(*Dx_plus_deltawx_); Hess_upper_csr_->add_matrix_numeric(*Hess_upper_plus_diag_, 1.0, *Diag_Dx_deltawx_, 1.0); Hess_lower_csr_->add_matrix_numeric(*Hess_csr_, 1.0, *Hess_upper_plus_diag_, 1.0); Hess_csr_->add_matrix_numeric(*M_condensed_, 1.0, *JtDiagJ_, 1.0); - //t.stop(); printf("ADD-nume took %.5f\n", t.getElapsedTime()); + // t.stop(); printf("ADD-nume took %.5f\n", t.getElapsedTime()); } fflush(stdout); @@ -324,22 +326,22 @@ bool hiopKKTLinSysCondensedSparse::build_kkt_matrix(const hiopPDPerturbation& pd linSys_ = determine_and_create_linsys(); nlp_->runStats.kkt.tmUpdateLinsys.stop(); - + if(perf_report_) { nlp_->log->printf(hovSummary, "KKT_SPARSE_Condensed linsys: Low-level linear system size %d nnz %d\n", - nx, + nx, M_condensed_->numberOfNonzeros()); } - //write matrix to file if requested + // write matrix to file if requested if(nlp_->options->GetString("write_kkt") == "yes") { write_linsys_counter_++; } - if(write_linsys_counter_>=0) { + if(write_linsys_counter_ >= 0) { // TODO csr_writer_.writeMatToFile(Msys, write_linsys_counter_, nx, 0, nineq); } - return true; + return true; } bool hiopKKTLinSysCondensedSparse::solve_compressed_direct(hiopVector& rx, @@ -361,13 +363,13 @@ bool hiopKKTLinSysCondensedSparse::solve_compressed_direct(hiopVector& rx, assert(rhs_); assert(rhs_->get_size() == nx); - /* (H+Dx+Jd^T*(Dd+delta_wd*I)*Jd)dx = rx + Jd^T*Dd*ryd + Jd^T*rd + /* (H+Dx+Jd^T*(Dd+delta_wd*I)*Jd)dx = rx + Jd^T*Dd*ryd + Jd^T*rd * dd = Jd*dx - ryd * dyd = (Dd+delta_wd*I)*dd - rd = (Dd+delta_wd*I)*Jd*dx - (Dd+delta_wd*I)*ryd - rd */ rhs_->copyFrom(rx); - //working buffers in the size of nineq/nd using output as storage + // working buffers in the size of nineq/nd using output as storage hiopVector& Dd_x_ryd = dyd; Dd_x_ryd.copyFrom(ryd); Dd_x_ryd.componentMult(*Hd_); @@ -381,8 +383,8 @@ bool hiopKKTLinSysCondensedSparse::solve_compressed_direct(hiopVector& rx, // solve // bool linsol_ok = linSys_->solve(*rhs_); - - if(false==linsol_ok) { + + if(false == linsol_ok) { return false; } dx.copyFrom(*rhs_); @@ -413,7 +415,7 @@ bool hiopKKTLinSysCondensedSparse::solveCompressed(hiopVector& rx, bool bret; nlp_->runStats.kkt.tmSolveInner.start(); - + size_type nx = rx.get_size(); // this is rhs used by the direct "condensed" solve @@ -422,8 +424,8 @@ bool hiopKKTLinSysCondensedSparse::solveCompressed(hiopVector& rx, } assert(rhs_->get_size() == nx); - nlp_->log->write("RHS KKT_SPARSE_Condensed rx: ", rx, hovIteration); - nlp_->log->write("RHS KKT_SPARSE_Condensed rd: ", rd, hovIteration); + nlp_->log->write("RHS KKT_SPARSE_Condensed rx: ", rx, hovIteration); + nlp_->log->write("RHS KKT_SPARSE_Condensed rd: ", rd, hovIteration); nlp_->log->write("RHS KKT_SPARSE_Condensed ryc:", ryc, hovIteration); nlp_->log->write("RHS KKT_SPARSE_Condensed ryd:", ryd, hovIteration); @@ -432,78 +434,73 @@ bool hiopKKTLinSysCondensedSparse::solveCompressed(hiopVector& rx, // Code for iterative refinement of the XDYcYd KKT system was removed since the parent // KKT class performs this now. Old code residing in this class can be found at - // header file: https://github.com/LLNL/hiop/blob/fa61c1993128afd65a3cb21301c1f131922ceef8/src/Optimization/hiopKKTLinSysSparseCondensed.hpp#L209 - // implementation file: https://github.com/LLNL/hiop/blob/fa61c1993128afd65a3cb21301c1f131922ceef8/src/Optimization/hiopKKTLinSysSparseCondensed.cpp#L731 + // header file: + // https://github.com/LLNL/hiop/blob/fa61c1993128afd65a3cb21301c1f131922ceef8/src/Optimization/hiopKKTLinSysSparseCondensed.hpp#L209 + // implementation file: + // https://github.com/LLNL/hiop/blob/fa61c1993128afd65a3cb21301c1f131922ceef8/src/Optimization/hiopKKTLinSysSparseCondensed.cpp#L731 - if(perf_report_) { nlp_->log->printf(hovSummary, "(summary for linear solver from KKT_SPARSE_Condensed(direct))\n%s", nlp_->runStats.linsolv.get_summary_last_solve().c_str()); } - - nlp_->log->write("SOL KKT_SPARSE_Condensed dx: ", dx, hovMatrices); - nlp_->log->write("SOL KKT_SPARSE_Condensed dd: ", dd, hovMatrices); + nlp_->log->write("SOL KKT_SPARSE_Condensed dx: ", dx, hovMatrices); + nlp_->log->write("SOL KKT_SPARSE_Condensed dd: ", dd, hovMatrices); nlp_->log->write("SOL KKT_SPARSE_Condensed dyc:", dyc, hovMatrices); nlp_->log->write("SOL KKT_SPARSE_Condensed dyd:", dyd, hovMatrices); return bret; } - -hiopLinSolverSymSparse* -hiopKKTLinSysCondensedSparse::determine_and_create_linsys() -{ +hiopLinSolverSymSparse* hiopKKTLinSysCondensedSparse::determine_and_create_linsys() +{ if(linSys_) { - return dynamic_cast (linSys_); + return dynamic_cast(linSys_); } - + int n = M_condensed_->m(); auto linsolv = nlp_->options->GetString("linear_solver_sparse"); if(nlp_->options->GetString("compute_mode") == "cpu") { - - //TODO: - //add support for linear_solver == "cholmod" - // maybe add pardiso as an option in the future + // TODO: + // add support for linear_solver == "cholmod" + // maybe add pardiso as an option in the future // - assert((linsolv=="ma57" || linsolv=="auto") && "Only MA57 or auto is supported on cpu."); - + assert((linsolv == "ma57" || linsolv == "auto") && "Only MA57 or auto is supported on cpu."); + #ifdef HIOP_USE_COINHSL - nlp_->log->printf(hovWarning, - "KKT_SPARSE_Condensed linsys: alloc MA57 for matrix of size %d (0 cons)\n", n); + nlp_->log->printf(hovWarning, "KKT_SPARSE_Condensed linsys: alloc MA57 for matrix of size %d (0 cons)\n", n); - //we need to get CPU CSR matrix + // we need to get CPU CSR matrix auto* M_csr = dynamic_cast(M_condensed_); assert(M_csr); linSys_ = new hiopLinSolverSparseCsrMa57(M_csr, nlp_); #else assert(false && "HiOp was built without a sparse linear solver needed by the condensed KKT approach"); -#endif // HIOP_USE_COINHSL - +#endif // HIOP_USE_COINHSL + } else { // // on device: compute_mode is hybrid, auto, or gpu // - assert(nullptr==linSys_); + assert(nullptr == linSys_); + + assert((linsolv == "cusolver-chol" || linsolv == "auto") && "Only cusolver-chol or auto is supported on gpu."); - assert((linsolv=="cusolver-chol" || linsolv=="auto") && "Only cusolver-chol or auto is supported on gpu."); - #ifdef HIOP_USE_CUDA - nlp_->log->printf(hovWarning, - "KKT_SPARSE_Condensed linsys: alloc cuSOLVER-chol matrix size %d\n", n); + nlp_->log->printf(hovWarning, "KKT_SPARSE_Condensed linsys: alloc cuSOLVER-chol matrix size %d\n", n); assert(M_condensed_); linSys_ = new hiopLinSolverCholCuSparse(M_condensed_, nlp_); -#endif - - //Return NULL (and assert) if a GPU sparse linear solver is not present - assert(linSys_!=nullptr && +#endif + + // Return NULL (and assert) if a GPU sparse linear solver is not present + assert(linSys_ != nullptr && "HiOp was built without a sparse linear solver for GPU/device and cannot run on the " "device as instructed by the 'compute_mode' option. Change the 'compute_mode' to 'cpu'"); } - - assert(linSys_&& "KKT_SPARSE_Condensed linsys: cannot instantiate backend linear solver"); - return dynamic_cast (linSys_); + assert(linSys_ && "KKT_SPARSE_Condensed linsys: cannot instantiate backend linear solver"); + + return dynamic_cast(linSys_); } -} // end of namespace +} // namespace hiop diff --git a/src/Optimization/hiopKKTLinSysSparseCondensed.hpp b/src/Optimization/hiopKKTLinSysSparseCondensed.hpp index cb9c6fc8d..b6d95d55c 100644 --- a/src/Optimization/hiopKKTLinSysSparseCondensed.hpp +++ b/src/Optimization/hiopKKTLinSysSparseCondensed.hpp @@ -63,8 +63,8 @@ namespace hiop { /** - * Solves a sparse KKT linear system by exploiting the sparse structure, namely reduces - * the so-called XDYcYd KKT system + * Solves a sparse KKT linear system by exploiting the sparse structure, namely reduces + * the so-called XDYcYd KKT system * [ H + Dx 0 Jd^T ] [ dx] [ rx_tilde ] * [ 0 Dd -I ] [ dd] = [ rd_tilde ] * [ Jd -I 0 ] [dyd] [ ryd ] @@ -73,14 +73,14 @@ namespace hiop * dd = Jd*dx - ryd * dyd = Dd*dd - rd_tilde = Dd*Jd*dx - Dd*ryd - rd_tilde - * Here Jd is sparse Jacobians for inequalities, H is a sparse Hessian matrix, Dx is - * log-barrier diagonal corresponding to x variables, Dd is the log-barrier diagonal - * corresponding to the inequality slacks, and I is the identity matrix. + * Here Jd is sparse Jacobians for inequalities, H is a sparse Hessian matrix, Dx is + * log-barrier diagonal corresponding to x variables, Dd is the log-barrier diagonal + * corresponding to the inequality slacks, and I is the identity matrix. * - * @note: the NLP is assumed to have no equality constraints (or have been relaxed to + * @note: the NLP is assumed to have no equality constraints (or have been relaxed to * two-sided inequality constraints). * - * Dual regularization may be not enforced as it requires repeated divisions that are + * Dual regularization may be not enforced as it requires repeated divisions that are * prone to round-off error accumulation. When/If the class is going to be updated to * use dual regularization, the regularized XDYcYd KKT system reads: * [ H+Dx+delta_wx*I 0 Jd^T ] [ dx] [ rx_tilde ] @@ -94,14 +94,14 @@ namespace hiop * * From (Dd+delta_wd*I)*dd - dyd = rd_tilde one can write * -> (Dd+delta_wd*I)*(Jd*dx - delta_cd*dyd - ryd) - dyd = rd_tilde - * -> [I+delta_cd*(Dd+delta_wd*I)] dyd = (Dd+delta_wd*I)*(Jd*dx - ryd) - rd_tilde + * -> [I+delta_cd*(Dd+delta_wd*I)] dyd = (Dd+delta_wd*I)*(Jd*dx - ryd) - rd_tilde * dyd = (I+delta_cd*(Dd+delta_wd*I))^{-1} [ (Dd+delta_wd*I)*(Jd*dx - ryd) - rd_tilde ] * dyd = Dd2 [ (Dd+delta_wd*I)*(Jd*dx - ryd) - rd_tilde ] - * dyd = Dd3*Jd*dx - Dd3*ryd - Dd2 rd_tilde + * dyd = Dd3*Jd*dx - Dd3*ryd - Dd2 rd_tilde * * (H+Dx+delta_wx*I + Jd^T * Dd3 * Jd) dx = rx_tilde + Jd^T*Dd3*ryd + Jd^T*Dd2*rd_tilde */ - + class hiopKKTLinSysCondensedSparse : public hiopKKTLinSysCompressedSparseXDYcYd { public: @@ -119,11 +119,12 @@ class hiopKKTLinSysCondensedSparse : public hiopKKTLinSysCompressedSparseXDYcYd hiopVector& dd, hiopVector& dyc, hiopVector& dyd); + protected: /** - * Solves the compressed XDYcYd system by using direct solves with Cholesky factors of the + * Solves the compressed XDYcYd system by using direct solves with Cholesky factors of the * condensed linear system and appropriately manipulate the XDYcYD rhs/sol to condensed rhs/sol. - * + * * The method is used as a preconditioner solve in the Krylov-based iterative refinement from * solve_compressed method. */ @@ -135,25 +136,24 @@ class hiopKKTLinSysCondensedSparse : public hiopKKTLinSysCompressedSparseXDYcYd hiopVector& dd, hiopVector& dyc, hiopVector& dyd); - + protected: //// ////from the parent class and its parents we also use //// - //right-hand side [rx_tilde, rd_tilde, ((ryc->empty)), ryd] - // hiopVector *rhs_; + // right-hand side [rx_tilde, rd_tilde, ((ryc->empty)), ryd] + // hiopVector *rhs_; - // hiopVectorPar *Dd; // hiopVectorPar *ryd_tilde; - //from the parent's parent class (hiopKKTLinSysCompressed) we also use - // hiopVectorPar *Dx; - // hiopVectorPar *rx_tilde; + // from the parent's parent class (hiopKKTLinSysCompressed) we also use + // hiopVectorPar *Dx; + // hiopVectorPar *rx_tilde; - //keep Hx = Dx (Dx=log-barrier diagonal for x) + regularization - //keep Hd = Dd (Dd=log-barrier diagonal for slack variable) + regularization - // hiopVector *Hx_, *Hd_; + // keep Hx = Dx (Dx=log-barrier diagonal for x) + regularization + // keep Hd = Dd (Dd=log-barrier diagonal for slack variable) + regularization + // hiopVector *Hx_, *Hd_; // // hiopNlpSparse* nlpSp_; @@ -166,7 +166,7 @@ class hiopKKTLinSysCondensedSparse : public hiopKKTLinSysCompressedSparseXDYcYd /// Member for JacD in CSR format hiopMatrixSparseCSR* JacD_; - + /// Member for JacD' in CSR format hiopMatrixSparseCSR* JacDt_; @@ -175,10 +175,10 @@ class hiopKKTLinSysCondensedSparse : public hiopKKTLinSysCompressedSparseXDYcYd /// Member for upper triangular part of Hess hiopMatrixSparseCSR* Hess_upper_csr_; - + /// Member for Hess hiopMatrixSparseCSR* Hess_csr_; - + /// Member for JacD'*Dd*JacD hiopMatrixSparseCSR* JtDiagJ_; @@ -190,13 +190,14 @@ class hiopKKTLinSysCondensedSparse : public hiopKKTLinSysCompressedSparseXDYcYd /// Member for storing the auxiliary sum of Dx + delta_wx*I hiopMatrixSparseCSR* Diag_Dx_deltawx_; - + /// Stores Dx plus delta_wx for more efficient updates of the condensed system matrix hiopVector* Dx_plus_deltawx_; hiopVector* deltawx_; /// Stores a copy of Hd_ on the device (to be later removed) hiopVector* Hd_copy_; + private: /// Decides which linear solver to be used. Call only after `M_condended_` has been computed. hiopLinSolverSymSparse* determine_and_create_linsys(); @@ -214,11 +215,11 @@ class hiopKKTLinSysCondensedSparse : public hiopKKTLinSysCompressedSparseXDYcYd #else assert(false && "compute mode not supported without HIOP_USE_CUDA build"); return "DEFAULT"; -#endif // HIOP_USE_CUDA +#endif // HIOP_USE_CUDA } } }; - -} // end of namespace + +} // namespace hiop #endif diff --git a/src/Optimization/hiopKKTLinSysSparseNormalEqn.cpp b/src/Optimization/hiopKKTLinSysSparseNormalEqn.cpp index a56553659..e6e33d3d3 100644 --- a/src/Optimization/hiopKKTLinSysSparseNormalEqn.cpp +++ b/src/Optimization/hiopKKTLinSysSparseNormalEqn.cpp @@ -60,36 +60,36 @@ #ifdef HIOP_USE_CUDA #include "hiopLinSolverCholCuSparse.hpp" #include "hiopVectorCuda.hpp" -#endif // HIOP_USE_CUDA +#endif // HIOP_USE_CUDA #include "hiopMatrixSparseCSRSeq.hpp" namespace hiop -{ +{ hiopKKTLinSysSparseNormalEqn::hiopKKTLinSysSparseNormalEqn(hiopNlpFormulation* nlp) - : hiopKKTLinSysNormalEquation(nlp), - rhs_{nullptr}, - Hess_diag_{nullptr}, - deltawx_{nullptr}, - deltawd_{nullptr}, - deltacc_{nullptr}, - deltacd_{nullptr}, - dual_reg_copy_{nullptr}, - Hess_diag_copy_{nullptr}, - Hx_copy_{nullptr}, - Hd_copy_{nullptr}, - Hxd_inv_copy_{nullptr}, - write_linsys_counter_(-1), - csr_writer_(nlp), - nlpSp_{nullptr}, - HessSp_{nullptr}, - Jac_cSp_{nullptr}, - Jac_dSp_{nullptr}, - JacD_{nullptr}, - JacDt_{nullptr}, - JDiagJt_{nullptr}, - Diag_dualreg_{nullptr}, - M_normaleqn_{nullptr} + : hiopKKTLinSysNormalEquation(nlp), + rhs_{nullptr}, + Hess_diag_{nullptr}, + deltawx_{nullptr}, + deltawd_{nullptr}, + deltacc_{nullptr}, + deltacd_{nullptr}, + dual_reg_copy_{nullptr}, + Hess_diag_copy_{nullptr}, + Hx_copy_{nullptr}, + Hd_copy_{nullptr}, + Hxd_inv_copy_{nullptr}, + write_linsys_counter_(-1), + csr_writer_(nlp), + nlpSp_{nullptr}, + HessSp_{nullptr}, + Jac_cSp_{nullptr}, + Jac_dSp_{nullptr}, + JacD_{nullptr}, + JacDt_{nullptr}, + JDiagJt_{nullptr}, + Diag_dualreg_{nullptr}, + M_normaleqn_{nullptr} { nlpSp_ = dynamic_cast(nlp_); assert(nlpSp_); @@ -117,9 +117,8 @@ hiopKKTLinSysSparseNormalEqn::~hiopKKTLinSysSparseNormalEqn() bool hiopKKTLinSysSparseNormalEqn::build_kkt_matrix(const hiopPDPerturbation& pdreg) { - #ifdef HIOP_DEEPCHECKS - assert(perturb_calc_->check_consistency() && "something went wrong with IC"); + assert(perturb_calc_->check_consistency() && "something went wrong with IC"); #endif delta_wx_ = perturb_calc_->get_curr_delta_wx(); @@ -128,7 +127,7 @@ bool hiopKKTLinSysSparseNormalEqn::build_kkt_matrix(const hiopPDPerturbation& pd delta_cd_ = perturb_calc_->get_curr_delta_cd(); HessSp_ = dynamic_cast(Hess_); - if(!HessSp_) { + if(!HessSp_) { assert(false); return false; } @@ -149,16 +148,16 @@ bool hiopKKTLinSysSparseNormalEqn::build_kkt_matrix(const hiopPDPerturbation& pd /* TODO: here we assume Hess is diagonal!*/ assert(HessSp_->is_diagonal()); - + hiopMatrixSymSparseTriplet* Hess_triplet = dynamic_cast(Hess_); - + assert(HessSp_ && Jac_cSp_ && Jac_dSp_); - if(nullptr==Jac_dSp_ || nullptr==HessSp_) { + if(nullptr == Jac_dSp_ || nullptr == HessSp_) { nlp_->runStats.kkt.tmUpdateInit.stop(); - //incorrect linear algebra objects were provided to this class + // incorrect linear algebra objects were provided to this class return false; } - + size_type nx = HessSp_->n(); size_type neq = Jac_cSp_->m(); size_type nineq = Jac_dSp_->m(); @@ -166,42 +165,42 @@ bool hiopKKTLinSysSparseNormalEqn::build_kkt_matrix(const hiopPDPerturbation& pd assert(nx == Dx_->get_size()); // NOTE: - // hybrid compute mode -> linear algebra objects used internally by the class will be allocated on the device. Most of the inputs - // to this class will be however on HOST under hybrid mode, so some objects are copied/replicated/transfered to device - // gpu compute mode -> not yet supported - // cpu compute mode -> all objects on HOST, however, some objects will still be copied (e.g., Hd_) to ensure code homogeneity + // hybrid compute mode -> linear algebra objects used internally by the class will be allocated on the device. Most of the + // inputs to this class will be however on HOST under hybrid mode, so some objects are copied/replicated/transfered to + // device gpu compute mode -> not yet supported cpu compute mode -> all objects on HOST, however, some objects will still + // be copied (e.g., Hd_) to ensure code homogeneity // - // REMARK: The objects that are copied/replicated are temporary and will be removed later on as the remaining sparse KKT computations - // will be ported to device + // REMARK: The objects that are copied/replicated are temporary and will be removed later on as the remaining sparse KKT + // computations will be ported to device - //determine the "internal" memory space, see above note + // determine the "internal" memory space, see above note std::string mem_space_internal = determine_memory_space_internal(nlp_->options->GetString("compute_mode")); - //allocate on the first call + // allocate on the first call if(nullptr == Hess_diag_) { - //HOST + // HOST Hess_diag_ = LinearAlgebraFactory::create_vector(nlp_->options->GetString("mem_space"), nx); Hx_ = LinearAlgebraFactory::create_vector(nlp_->options->GetString("mem_space"), nx); Hd_ = LinearAlgebraFactory::create_vector(nlp_->options->GetString("mem_space"), nineq); Hess_triplet->extract_diagonal(*Hess_diag_); - assert(nullptr == Hd_copy_); //should be also not allocated - //temporary: make a copy of Hd on the "internal" mem_space + assert(nullptr == Hd_copy_); // should be also not allocated + // temporary: make a copy of Hd on the "internal" mem_space Hess_diag_copy_ = LinearAlgebraFactory::create_vector(mem_space_internal, nx); Hx_copy_ = LinearAlgebraFactory::create_vector(mem_space_internal, nx); Hd_copy_ = LinearAlgebraFactory::create_vector(mem_space_internal, nineq); Hxd_inv_copy_ = LinearAlgebraFactory::create_vector(mem_space_internal, nx + nineq); - - assert(nullptr == deltawx_); //should be also not allocated - //allocate this internal vector on the device if hybrid compute mode + + assert(nullptr == deltawx_); // should be also not allocated + // allocate this internal vector on the device if hybrid compute mode deltawx_ = LinearAlgebraFactory::create_vector(mem_space_internal, nx); deltawd_ = LinearAlgebraFactory::create_vector(mem_space_internal, nineq); deltacc_ = LinearAlgebraFactory::create_vector(mem_space_internal, neq); deltacd_ = LinearAlgebraFactory::create_vector(mem_space_internal, nineq); dual_reg_copy_ = LinearAlgebraFactory::create_vector(mem_space_internal, neq + nineq); } - //build the diagonal Hx = Dx + delta_wx + diag(Hess) + // build the diagonal Hx = Dx + delta_wx + diag(Hess) Hx_->copyFrom(*Dx_); Hx_->axpy(1., *delta_wx_); Hx_->axpy(1., *Hess_diag_); @@ -210,18 +209,18 @@ bool hiopKKTLinSysSparseNormalEqn::build_kkt_matrix(const hiopPDPerturbation& pd Hd_->copyFrom(*Dd_); Hd_->axpy(1., *delta_wd_); - //temporary code, see above note + // temporary code, see above note { if(mem_space_internal == "CUDA") { #ifdef HIOP_USE_CUDA auto Hess_diag_cuda = dynamic_cast(Hess_diag_copy_); auto Hess_diag_par = dynamic_cast(Hess_diag_); auto Hx_cuda = dynamic_cast(Hx_copy_); - auto Hx_par = dynamic_cast(Hx_); + auto Hx_par = dynamic_cast(Hx_); auto Hd_cuda = dynamic_cast(Hd_copy_); - auto Hd_par = dynamic_cast(Hd_); + auto Hd_par = dynamic_cast(Hd_); assert(Hx_cuda && "incorrect type for vector class"); - assert(Hx_par && "incorrect type for vector class"); + assert(Hx_par && "incorrect type for vector class"); Hess_diag_cuda->copy_from_vectorpar(*Hess_diag_par); Hx_cuda->copy_from_vectorpar(*Hx_par); Hd_cuda->copy_from_vectorpar(*Hd_par); @@ -241,7 +240,7 @@ bool hiopKKTLinSysSparseNormalEqn::build_kkt_matrix(const hiopPDPerturbation& pd deltacd_cuda->copy_from_vectorpar(deltacd_host); #else assert(false && "compute mode not available under current build: enable CUDA."); -#endif +#endif } else { assert(dynamic_cast(Hd_) && "incorrect type for vector class"); Hess_diag_copy_->copyFrom(*Hess_diag_); @@ -256,29 +255,31 @@ bool hiopKKTLinSysSparseNormalEqn::build_kkt_matrix(const hiopPDPerturbation& pd nlp_->runStats.kkt.tmUpdateInit.stop(); nlp_->runStats.kkt.tmUpdateLinsys.start(); - + /* - * compute condensed linear system - * ( [ Jc 0 ] [ H+Dx+delta_wx 0 ]^{-1} [ Jc^T Jd^T ] + [ delta_cc 0 ] ) - * ( [ Jd -I ] [ 0 Dd+delta_wd ] [ 0 -I ] [ 0 delta_cd ] ) - */ + * compute condensed linear system + * ( [ Jc 0 ] [ H+Dx+delta_wx 0 ]^{-1} [ Jc^T Jd^T ] + [ delta_cc 0 ] ) + * ( [ Jd -I ] [ 0 Dd+delta_wd ] [ 0 -I ] [ 0 delta_cd ] ) + */ - // TODO: jump to the steps where we add dual regularization, if delta_wx is not changed and this function is called due to refactorization + // TODO: jump to the steps where we add dual regularization, if delta_wx is not changed and this function is called due to + // refactorization hiopTimer t; if(nullptr == JDiagJt_) { - t.reset(); t.start(); + t.reset(); + t.start(); // first time this is called // form sparse matrix in triplet form on HOST - size_type nnz_jac_con = Jac_cSp_->numberOfNonzeros()+Jac_dSp_->numberOfNonzeros()+nineq; - auto* Jac_triplet_tmp = new hiopMatrixSparseTriplet(neq+nineq, nx+nineq, nnz_jac_con); + size_type nnz_jac_con = Jac_cSp_->numberOfNonzeros() + Jac_dSp_->numberOfNonzeros() + nineq; + auto* Jac_triplet_tmp = new hiopMatrixSparseTriplet(neq + nineq, nx + nineq, nnz_jac_con); Jac_triplet_tmp->setToZero(); // build [ Jc 0 ] // [ Jd -I ] // copy Jac to the full iterate matrix size_type dest_nnz_st{0}; - Jac_triplet_tmp->copyRowsBlockFrom(*Jac_cSp_, 0, neq, 0, dest_nnz_st); + Jac_triplet_tmp->copyRowsBlockFrom(*Jac_cSp_, 0, neq, 0, dest_nnz_st); dest_nnz_st += Jac_cSp_->numberOfNonzeros(); Jac_triplet_tmp->copyRowsBlockFrom(*Jac_dSp_, 0, nineq, neq, dest_nnz_st); dest_nnz_st += Jac_dSp_->numberOfNonzeros(); @@ -290,7 +291,7 @@ bool hiopKKTLinSysSparseNormalEqn::build_kkt_matrix(const hiopPDPerturbation& pd /// TODO: now we assume Jc and Jd won't change, i.e., LP or QP. hence we build JacD_ and JacDt_ once and save them Jac_triplet_tmp->sort(); - assert( nullptr == JacD_ && nullptr == JacDt_ && nullptr == JDiagJt_ ); + assert(nullptr == JacD_ && nullptr == JacDt_ && nullptr == JDiagJt_); JacD_ = LinearAlgebraFactory::create_matrix_sparse_csr(mem_space_internal); JacD_->form_from_symbolic(*Jac_triplet_tmp); @@ -298,17 +299,18 @@ bool hiopKKTLinSysSparseNormalEqn::build_kkt_matrix(const hiopPDPerturbation& pd JacDt_ = LinearAlgebraFactory::create_matrix_sparse_csr(mem_space_internal); JacDt_->form_transpose_from_symbolic(*JacD_); - JacDt_->form_transpose_from_numeric(*JacD_); // need this line before calling JacD_->times_mat_alloc(*JacDt_) + JacDt_->form_transpose_from_numeric(*JacD_); // need this line before calling JacD_->times_mat_alloc(*JacDt_) - //symbolic multiplication for JacD*Diag*JacDt - // J * (D*Jt) (D is not used since it does not change the sparsity pattern) + // symbolic multiplication for JacD*Diag*JacDt + // J * (D*Jt) (D is not used since it does not change the sparsity pattern) JDiagJt_ = JacD_->times_mat_alloc(*JacDt_); JacD_->times_mat_symbolic(*JDiagJt_, *JacDt_); delete Jac_triplet_tmp; } - t.reset(); t.start(); + t.reset(); + t.start(); if(pdreg.get_curr_delta_type() != hiopPDPerturbation::DeltasUpdateType::DualUpdate) { // build the diagonal Hxd_inv_copy_ = [H+Dx+delta_wx, Dd+delta_wd ]^{-1} @@ -326,18 +328,20 @@ bool hiopKKTLinSysSparseNormalEqn::build_kkt_matrix(const hiopPDPerturbation& pd } if(nullptr == M_normaleqn_) { - t.reset(); t.start(); + t.reset(); + t.start(); Diag_dualreg_ = LinearAlgebraFactory::create_matrix_sparse_csr(mem_space_internal); Diag_dualreg_->form_diag_from_symbolic(*dual_reg_copy_); - //form sparsity pattern of M_normaleqn_ = JacD*Diag*JacDt + delta_dual*I + // form sparsity pattern of M_normaleqn_ = JacD*Diag*JacDt + delta_dual*I M_normaleqn_ = Diag_dualreg_->add_matrix_alloc(*JDiagJt_); Diag_dualreg_->add_matrix_symbolic(*M_normaleqn_, *JDiagJt_); } - t.reset(); t.start(); + t.reset(); + t.start(); Diag_dualreg_->set_diagonal(0.0); - //if(!delta_cc_in.is_zero() || !delta_cd_in.is_zero()) // TODO: for efficiency? + // if(!delta_cc_in.is_zero() || !delta_cd_in.is_zero()) // TODO: for efficiency? { deltacc_->copyToStarting(*dual_reg_copy_, 0); deltacd_->copyToStarting(*dual_reg_copy_, neq); @@ -352,7 +356,7 @@ bool hiopKKTLinSysSparseNormalEqn::build_kkt_matrix(const hiopPDPerturbation& pd linSys_ = determine_and_create_linsys(); nlp_->runStats.kkt.tmUpdateLinsys.stop(); - + if(perf_report_) { nlp_->log->printf(hovSummary, "KKT_SPARSE_NormalEqn linsys: Low-level linear system size %d nnz %d\n", @@ -360,74 +364,68 @@ bool hiopKKTLinSysSparseNormalEqn::build_kkt_matrix(const hiopPDPerturbation& pd M_normaleqn_->numberOfNonzeros()); } - //write matrix to file if requested + // write matrix to file if requested if(nlp_->options->GetString("write_kkt") == "yes") { write_linsys_counter_++; } - if(write_linsys_counter_>=0) { + if(write_linsys_counter_ >= 0) { // TODO csr_writer_.writeMatToFile(Msys, write_linsys_counter_, nx, 0, nineq); } - return true; + return true; } hiopLinSolverSymSparse* hiopKKTLinSysSparseNormalEqn::determine_and_create_linsys() { if(linSys_) { - return dynamic_cast (linSys_); + return dynamic_cast(linSys_); } size_type n = M_normaleqn_->m(); auto linsolv = nlp_->options->GetString("linear_solver_sparse"); if(nlp_->options->GetString("compute_mode") == "cpu") { - - //TODO: - //add support for linear_solver == "cholmod" - // maybe add pardiso as an option in the future + // TODO: + // add support for linear_solver == "cholmod" + // maybe add pardiso as an option in the future // - assert((linsolv=="ma57" || linsolv=="auto") && "Only MA57 or auto is supported on cpu."); + assert((linsolv == "ma57" || linsolv == "auto") && "Only MA57 or auto is supported on cpu."); #ifdef HIOP_USE_COINHSL - nlp_->log->printf(hovWarning, - "KKT_SPARSE_NormalEqn linsys: alloc MA57 for matrix of size %d (0 cons)\n", n); + nlp_->log->printf(hovWarning, "KKT_SPARSE_NormalEqn linsys: alloc MA57 for matrix of size %d (0 cons)\n", n); - //we need to get CPU CSR matrix + // we need to get CPU CSR matrix auto* M_csr = dynamic_cast(M_normaleqn_); assert(M_csr); linSys_ = new hiopLinSolverSparseCsrMa57(M_csr, nlp_); #else assert(false && "HiOp was built without a sparse linear solver needed by the condensed KKT approach"); -#endif // HIOP_USE_COINHSL +#endif // HIOP_USE_COINHSL } else { // // on device: compute_mode is hybrid, auto, or gpu // - assert(nullptr==linSys_); + assert(nullptr == linSys_); - assert((linsolv=="cusolver-chol" || linsolv=="auto") && "Only cusolver-chol or auto is supported on gpu."); + assert((linsolv == "cusolver-chol" || linsolv == "auto") && "Only cusolver-chol or auto is supported on gpu."); #ifdef HIOP_USE_CUDA - nlp_->log->printf(hovWarning, - "KKT_SPARSE_NormalEqn linsys: alloc cuSOLVER-chol matrix size %d\n", n); + nlp_->log->printf(hovWarning, "KKT_SPARSE_NormalEqn linsys: alloc cuSOLVER-chol matrix size %d\n", n); assert(M_normaleqn_); linSys_ = new hiopLinSolverCholCuSparse(M_normaleqn_, nlp_); #endif - //Return NULL (and assert) if a GPU sparse linear solver is not present - assert(linSys_!=nullptr && + // Return NULL (and assert) if a GPU sparse linear solver is not present + assert(linSys_ != nullptr && "HiOp was built without a sparse linear solver for GPU/device and cannot run on the " "device as instructed by the 'compute_mode' option. Change the 'compute_mode' to 'cpu'"); } - assert(linSys_&& "KKT_SPARSE_NormalEqn linsys: cannot instantiate backend linear solver"); + assert(linSys_ && "KKT_SPARSE_NormalEqn linsys: cannot instantiate backend linear solver"); - return dynamic_cast (linSys_); + return dynamic_cast(linSys_); } -bool hiopKKTLinSysSparseNormalEqn::solveCompressed(hiopVector& ryc, - hiopVector& ryd, - hiopVector& dyc, - hiopVector& dyd) +bool hiopKKTLinSysSparseNormalEqn::solveCompressed(hiopVector& ryc, hiopVector& ryd, hiopVector& dyc, hiopVector& dyd) { bool bret{false}; @@ -438,7 +436,7 @@ bool hiopKKTLinSysSparseNormalEqn::solveCompressed(hiopVector& ryc, // this is rhs used by the direct "condensed" solve if(rhs_ == NULL) { - rhs_ = LinearAlgebraFactory::create_vector(nlp_->options->GetString("mem_space"), nyc+nyd); + rhs_ = LinearAlgebraFactory::create_vector(nlp_->options->GetString("mem_space"), nyc + nyd); } nlp_->log->write("RHS KKT_SPARSE_NormalEqn ryc:", ryc, hovIteration); @@ -450,7 +448,7 @@ bool hiopKKTLinSysSparseNormalEqn::solveCompressed(hiopVector& ryc, ryc.copyToStarting(*rhs_, 0); ryd.copyToStarting(*rhs_, nyc); - if(write_linsys_counter_>=0) { + if(write_linsys_counter_ >= 0) { csr_writer_.writeRhsToFile(*rhs_, write_linsys_counter_); } nlp_->runStats.kkt.tmSolveRhsManip.stop(); @@ -467,7 +465,7 @@ bool hiopKKTLinSysSparseNormalEqn::solveCompressed(hiopVector& ryc, "(summary for linear solver from KKT_SPARSE_NormalEqn(direct))\n%s", nlp_->runStats.linsolv.get_summary_last_solve().c_str()); } - if(write_linsys_counter_>=0) { + if(write_linsys_counter_ >= 0) { csr_writer_.writeSolToFile(*rhs_, write_linsys_counter_); } @@ -476,31 +474,31 @@ bool hiopKKTLinSysSparseNormalEqn::solveCompressed(hiopVector& ryc, // // unpack // - rhs_->startingAtCopyToStartingAt(0, dyc, 0); + rhs_->startingAtCopyToStartingAt(0, dyc, 0); rhs_->startingAtCopyToStartingAt(nyc, dyd, 0); nlp_->log->write("SOL KKT_SPARSE_NormalEqn dyc:", dyc, hovMatrices); nlp_->log->write("SOL KKT_SPARSE_NormalEqn dyd:", dyd, hovMatrices); nlp_->runStats.kkt.tmSolveRhsManip.stop(); - + return bret; } int hiopKKTLinSysSparseNormalEqn::factorizeWithCurvCheck() { - //factorization + // factorization size_type n_neg_eig = hiopKKTLinSysCurvCheck::factorizeWithCurvCheck(); if(n_neg_eig == -1) { - nlp_->log->printf(hovScalars, - "KKT_SPARSE_NormalEqn linsys: Detected null eigenvalues.\n"); + nlp_->log->printf(hovScalars, "KKT_SPARSE_NormalEqn linsys: Detected null eigenvalues.\n"); n_neg_eig = -1; } else { // Cholesky factorization succeeds. Matrix is PD and hence the corresponding Augmented system has correct inertia - n_neg_eig = Jac_c_->m() + Jac_d_->m();; + n_neg_eig = Jac_c_->m() + Jac_d_->m(); + ; } return n_neg_eig; } -} // end of namespace +} // namespace hiop diff --git a/src/Optimization/hiopKKTLinSysSparseNormalEqn.hpp b/src/Optimization/hiopKKTLinSysSparseNormalEqn.hpp index f74347a80..98ba6410e 100644 --- a/src/Optimization/hiopKKTLinSysSparseNormalEqn.hpp +++ b/src/Optimization/hiopKKTLinSysSparseNormalEqn.hpp @@ -63,7 +63,7 @@ namespace hiop { -/** +/** * @brief Provides the functionality for reducing the KKT linear system to the * normal equation system below in dyc and dyd variables and then to perform * the basic ops needed to compute the remaining directions @@ -73,9 +73,9 @@ namespace hiop * ( [ Jc 0 ] [ H+Dx+delta_wx 0 ]^{-1} [ Jc^T Jd^T ] + [ delta_cc 0 ] ) [dyc] = [ ryc_tilde ] * ( [ Jd -I ] [ 0 Dd+delta_wd ] [ 0 -I ] [ 0 delta_cd ] ) [dyd] [ ryd_tilde ] * - * [ ryc_tilde ] = [ Jc 0 ] [ H+Dx+delta_wx 0 ]^{-1} [ rx_tilde ] - [ ryc ] + * [ ryc_tilde ] = [ Jc 0 ] [ H+Dx+delta_wx 0 ]^{-1} [ rx_tilde ] - [ ryc ] * [ ryd_tilde ] [ Jd -I ] [ 0 Dd+delta_wd ] [ rd_tilde ] [ ryd ] - * + * * where * - Jc and Jd present the sparse Jacobians for equalities and inequalities * - H is a sparse Hessian matrix @@ -83,7 +83,7 @@ namespace hiop * * REMARK: This linear system fits LP/QP best, where H is empty and hence only diagonal matrices are inversed. * If H is diagonal, the normal equation matrix becomes: - * [ Jc(H+Dx+delta_wx)^{-1}Jc^T Jc(H+Dx+delta_wx)^{-1}Jd^T ] + [ delta_cc 0 ] + * [ Jc(H+Dx+delta_wx)^{-1}Jc^T Jc(H+Dx+delta_wx)^{-1}Jd^T ] + [ delta_cc 0 ] * [ Jd(H+Dx+delta_wx)^{-1}Jc^T Jd(H+Dx+delta_wx)^{-1}Jd^T + ( Dd+delta_wd)^{-1} ] [ 0 delta_cd ] * */ @@ -95,14 +95,11 @@ class hiopKKTLinSysSparseNormalEqn : public hiopKKTLinSysNormalEquation virtual bool build_kkt_matrix(const hiopPDPerturbation& pdreg); - virtual bool solveCompressed(hiopVector& ryc_tilde, - hiopVector& ryd_tilde, - hiopVector& dyc, - hiopVector& dyd); + virtual bool solveCompressed(hiopVector& ryc_tilde, hiopVector& ryd_tilde, hiopVector& dyc, hiopVector& dyd); /** * @brief factorize the matrix and check curvature - */ + */ virtual int factorizeWithCurvCheck(); protected: @@ -129,22 +126,22 @@ class hiopKKTLinSysSparseNormalEqn : public hiopKKTLinSysNormalEquation int write_linsys_counter_; hiopCSR_IO csr_writer_; - //just dynamic_cast-ed pointers + // just dynamic_cast-ed pointers hiopNlpSparse* nlpSp_; hiopMatrixSparse* HessSp_; const hiopMatrixSparse* Jac_cSp_; const hiopMatrixSparse* Jac_dSp_; /** - * Member for ( [ Jc 0 ] [ H+Dx+delta_wx 0 ]^{-1} [ Jc^T Jd^T ] + [ delta_cc 0 ] ) - * ( [ Jd -I ] [ 0 Dd+delta_wd ] [ 0 -I ] [ 0 delta_cd ] ) - * let JacD_ = [Jc 0; Jd -I] - * @pre: now we assume Jc and Jd won't change, i.e., LP or QP. hence we build JacD_ and JacDt_ once and save them - */ + * Member for ( [ Jc 0 ] [ H+Dx+delta_wx 0 ]^{-1} [ Jc^T Jd^T ] + [ delta_cc 0 ] ) + * ( [ Jd -I ] [ 0 Dd+delta_wd ] [ 0 -I ] [ 0 delta_cd ] ) + * let JacD_ = [Jc 0; Jd -I] + * @pre: now we assume Jc and Jd won't change, i.e., LP or QP. hence we build JacD_ and JacDt_ once and save them + */ /// Member for JacD in CSR format hiopMatrixSparseCSR* JacD_; - + /// Member for JacD' in CSR format hiopMatrixSparseCSR* JacDt_; @@ -158,7 +155,7 @@ class hiopKKTLinSysSparseNormalEqn : public hiopKKTLinSysNormalEquation hiopMatrixSparseCSR* M_normaleqn_; private: - //placeholder for the code that decides which linear solver to used based on safe_mode_ + // placeholder for the code that decides which linear solver to used based on safe_mode_ hiopLinSolverSymSparse* determine_and_create_linsys(); /// Determines memory space used internally based on the "mem_space" and "compute_mode" options. This is temporary @@ -175,11 +172,11 @@ class hiopKKTLinSysSparseNormalEqn : public hiopKKTLinSysNormalEquation #else assert(false && "compute mode not supported without HIOP_USE_CUDA build"); return "DEFAULT"; -#endif // HIOP_USE_CUDA +#endif // HIOP_USE_CUDA } } }; -} // end of namespace +} // namespace hiop #endif diff --git a/src/Optimization/hiopLogBarProblem.hpp b/src/Optimization/hiopLogBarProblem.hpp index b17553e0b..4d3134078 100644 --- a/src/Optimization/hiopLogBarProblem.hpp +++ b/src/Optimization/hiopLogBarProblem.hpp @@ -3,47 +3,47 @@ // Written by Cosmin G. Petra, petra1@llnl.gov. // LLNL-CODE-742473. All rights reserved. // -// This file is part of HiOp. For details, see https://github.com/LLNL/hiop. HiOp -// is released under the BSD 3-clause license (https://opensource.org/licenses/BSD-3-Clause). +// This file is part of HiOp. For details, see https://github.com/LLNL/hiop. HiOp +// is released under the BSD 3-clause license (https://opensource.org/licenses/BSD-3-Clause). // Please also read “Additional BSD Notice” below. // -// Redistribution and use in source and binary forms, with or without modification, +// Redistribution and use in source and binary forms, with or without modification, // are permitted provided that the following conditions are met: -// i. Redistributions of source code must retain the above copyright notice, this list +// i. Redistributions of source code must retain the above copyright notice, this list // of conditions and the disclaimer below. -// ii. Redistributions in binary form must reproduce the above copyright notice, -// this list of conditions and the disclaimer (as noted below) in the documentation and/or +// ii. Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the disclaimer (as noted below) in the documentation and/or // other materials provided with the distribution. -// iii. Neither the name of the LLNS/LLNL nor the names of its contributors may be used to -// endorse or promote products derived from this software without specific prior written +// iii. Neither the name of the LLNS/LLNL nor the names of its contributors may be used to +// endorse or promote products derived from this software without specific prior written // permission. // -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES -// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT -// SHALL LAWRENCE LIVERMORE NATIONAL SECURITY, LLC, THE U.S. DEPARTMENT OF ENERGY OR -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS -// OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED -// AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT +// SHALL LAWRENCE LIVERMORE NATIONAL SECURITY, LLC, THE U.S. DEPARTMENT OF ENERGY OR +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +// OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED +// AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, // EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Additional BSD Notice -// 1. This notice is required to be provided under our contract with the U.S. Department -// of Energy (DOE). This work was produced at Lawrence Livermore National Laboratory under +// 1. This notice is required to be provided under our contract with the U.S. Department +// of Energy (DOE). This work was produced at Lawrence Livermore National Laboratory under // Contract No. DE-AC52-07NA27344 with the DOE. -// 2. Neither the United States Government nor Lawrence Livermore National Security, LLC -// nor any of their employees, makes any warranty, express or implied, or assumes any -// liability or responsibility for the accuracy, completeness, or usefulness of any +// 2. Neither the United States Government nor Lawrence Livermore National Security, LLC +// nor any of their employees, makes any warranty, express or implied, or assumes any +// liability or responsibility for the accuracy, completeness, or usefulness of any // information, apparatus, product, or process disclosed, or represents that its use would // not infringe privately-owned rights. -// 3. Also, reference herein to any specific commercial products, process, or services by -// trade name, trademark, manufacturer or otherwise does not necessarily constitute or -// imply its endorsement, recommendation, or favoring by the United States Government or -// Lawrence Livermore National Security, LLC. The views and opinions of authors expressed -// herein do not necessarily state or reflect those of the United States Government or -// Lawrence Livermore National Security, LLC, and shall not be used for advertising or +// 3. Also, reference herein to any specific commercial products, process, or services by +// trade name, trademark, manufacturer or otherwise does not necessarily constitute or +// imply its endorsement, recommendation, or favoring by the United States Government or +// Lawrence Livermore National Security, LLC. The views and opinions of authors expressed +// herein do not necessarily state or reflect those of the United States Government or +// Lawrence Livermore National Security, LLC, and shall not be used for advertising or // product endorsement purposes. #ifndef HIOP_LOGBARRPROB @@ -55,8 +55,9 @@ namespace hiop class hiopLogBarProblem { public: - hiopLogBarProblem(hiopNlpFormulation* nlp_) - : kappa_d(1e-5), nlp(nlp_) + hiopLogBarProblem(hiopNlpFormulation* nlp_) + : kappa_d(1e-5), + nlp(nlp_) { _grad_x_logbar = nlp->alloc_primal_vec(); _grad_d_logbar = nlp->alloc_dual_ineq_vec(); @@ -66,38 +67,49 @@ class hiopLogBarProblem delete _grad_x_logbar; delete _grad_d_logbar; }; -public: //members + +public: // members double mu; double f_logbar, f_logbar_trial; - hiopVector *_grad_x_logbar, *_grad_d_logbar; //of the log barrier - //just proxies: keeps pointers to the problem's data and updates LogBar func, grad and all that on the fly + hiopVector *_grad_x_logbar, *_grad_d_logbar; // of the log barrier + // just proxies: keeps pointers to the problem's data and updates LogBar func, grad and all that on the fly const hiopIterate *iter, *iter_trial; - const hiopVector *c_nlp,*d_nlp, *c_nlp_trial, *d_nlp_trial; + const hiopVector *c_nlp, *d_nlp, *c_nlp_trial, *d_nlp_trial; const hiopMatrix *Jac_c_nlp, *Jac_d_nlp; - //algorithm's parameters + // algorithm's parameters // factor in computing the linear damping terms used to control unboundness in the log-barrier problem (Section 3.7) */ - double kappa_d; + double kappa_d; + public: - //update with the NLP problem data given by the parameters - inline void - updateWithNlpInfo(const hiopIterate& iter_, const double& mu_, - const double &f, const hiopVector& c_, const hiopVector& d_, - const hiopVector& gradf_, const hiopMatrix& Jac_c_, const hiopMatrix& Jac_d_) + // update with the NLP problem data given by the parameters + inline void updateWithNlpInfo(const hiopIterate& iter_, + const double& mu_, + const double& f, + const hiopVector& c_, + const hiopVector& d_, + const hiopVector& gradf_, + const hiopMatrix& Jac_c_, + const hiopMatrix& Jac_d_) { nlp->runStats.tmSolverInternal.start(); - mu=mu_; c_nlp=&c_; d_nlp=&d_; Jac_c_nlp=&Jac_c_; Jac_d_nlp=&Jac_d_; iter=&iter_; + mu = mu_; + c_nlp = &c_; + d_nlp = &d_; + Jac_c_nlp = &Jac_c_; + Jac_d_nlp = &Jac_d_; + iter = &iter_; _grad_x_logbar->copyFrom(gradf_); - _grad_d_logbar->setToZero(); - //add log terms to function - double aux=-mu * iter->evalLogBarrier(); + _grad_d_logbar->setToZero(); + // add log terms to function + double aux = -mu * iter->evalLogBarrier(); f_logbar = f + aux; #ifdef HIOP_DEEPCHECKS nlp->log->write("gradx_log_bar grad_f:", *_grad_x_logbar, hovLinesearchVerb); #endif - //add log terms to gradient + // add log terms to gradient iter->addLogBarGrad_x(mu, *_grad_x_logbar); iter->addLogBarGrad_d(mu, *_grad_d_logbar); @@ -105,12 +117,12 @@ class hiopLogBarProblem nlp->log->write("gradx_log_bar grad_log:", *_grad_x_logbar, hovLinesearchVerb); #endif - //add damping terms - if(kappa_d>0.) { - iter->addLinearDampingTermToGrad_x(mu,kappa_d,1.0,*_grad_x_logbar); - iter->addLinearDampingTermToGrad_d(mu,kappa_d,1.0,*_grad_d_logbar); + // add damping terms + if(kappa_d > 0.) { + iter->addLinearDampingTermToGrad_x(mu, kappa_d, 1.0, *_grad_x_logbar); + iter->addLinearDampingTermToGrad_d(mu, kappa_d, 1.0, *_grad_d_logbar); - f_logbar += iter->linearDampingTerm(mu,kappa_d); + f_logbar += iter->linearDampingTerm(mu, kappa_d); #ifdef HIOP_DEEPCHECKS nlp->log->write("gradx_log_bar final, with damping:", *_grad_x_logbar, hovLinesearchVerb); nlp->log->write("gradd_log_bar final, with damping:", *_grad_d_logbar, hovLinesearchVerb); @@ -118,15 +130,18 @@ class hiopLogBarProblem nlp->runStats.tmSolverInternal.stop(); } } - inline void - updateWithNlpInfo_trial_funcOnly(const hiopIterate& iter_, - const double &f, const hiopVector& c_, const hiopVector& d_) + inline void updateWithNlpInfo_trial_funcOnly(const hiopIterate& iter_, + const double& f, + const hiopVector& c_, + const hiopVector& d_) { nlp->runStats.tmSolverInternal.start(); - - c_nlp_trial=&c_; d_nlp_trial=&d_; iter_trial=&iter_; + + c_nlp_trial = &c_; + d_nlp_trial = &d_; + iter_trial = &iter_; f_logbar_trial = f - mu * iter_trial->evalLogBarrier(); - if(kappa_d>0.) f_logbar_trial += iter_trial->linearDampingTerm(mu,kappa_d); + if(kappa_d > 0.) f_logbar_trial += iter_trial->linearDampingTerm(mu, kappa_d); nlp->runStats.tmSolverInternal.stop(); } @@ -134,33 +149,34 @@ class hiopLogBarProblem /* @brief Adds beta*(damping terms) to the gradient `gradx` w.r.t. x */ inline void addNonLogBarTermsToGrad_x(const double& beta, hiopVector& gradx) const { - if(kappa_d>0.) iter->addLinearDampingTermToGrad_x(mu, kappa_d, beta, gradx); + if(kappa_d > 0.) iter->addLinearDampingTermToGrad_x(mu, kappa_d, beta, gradx); } /* @brief Adds beta*(damping terms) to the gradient `gradx` w.r.t. d */ inline void addNonLogBarTermsToGrad_d(const double& beta, hiopVector& gradd) const { - //if(kappa_d>0.) iter->addLinearDampingTermToGrad_d(mu,kappa_d,beta,gradd); - if(kappa_d>0.) iter->addLinearDampingTermToGrad_d(mu, kappa_d, beta, gradd); + // if(kappa_d>0.) iter->addLinearDampingTermToGrad_d(mu,kappa_d,beta,gradd); + if(kappa_d > 0.) iter->addLinearDampingTermToGrad_d(mu, kappa_d, beta, gradd); } - /* grad_log^T * [ dx ] = grad_f^T * dx + grad_x_dampingTerm^T * dx + grad_d_dampingTerm^T *ds - [ dd ] + /* grad_log^T * [ dx ] = grad_f^T * dx + grad_x_dampingTerm^T * dx + grad_d_dampingTerm^T *ds + [ dd ] */ - inline double directionalDerivative(const hiopIterate& dir) + inline double directionalDerivative(const hiopIterate& dir) { nlp->runStats.tmSolverInternal.start(); double tr = dir.get_x()->dotProductWith(*_grad_x_logbar); - tr += dir.get_d()->dotProductWith(*_grad_d_logbar); + tr += dir.get_d()->dotProductWith(*_grad_d_logbar); nlp->runStats.tmSolverInternal.stop(); return tr; } protected: hiopNlpFormulation* nlp; + private: hiopLogBarProblem() {}; hiopLogBarProblem(const hiopLogBarProblem&) {}; - hiopLogBarProblem& operator=(const hiopLogBarProblem&) {return *this;}; + hiopLogBarProblem& operator=(const hiopLogBarProblem&) { return *this; }; }; -} +} // namespace hiop #endif diff --git a/src/Optimization/hiopNlpFormulation.cpp b/src/Optimization/hiopNlpFormulation.cpp index 7ffb404de..fff3acf3c 100644 --- a/src/Optimization/hiopNlpFormulation.cpp +++ b/src/Optimization/hiopNlpFormulation.cpp @@ -2,47 +2,47 @@ // Produced at the Lawrence Livermore National Laboratory (LLNL). // LLNL-CODE-742473. All rights reserved. // -// This file is part of HiOp. For details, see https://github.com/LLNL/hiop. HiOp -// is released under the BSD 3-clause license (https://opensource.org/licenses/BSD-3-Clause). +// This file is part of HiOp. For details, see https://github.com/LLNL/hiop. HiOp +// is released under the BSD 3-clause license (https://opensource.org/licenses/BSD-3-Clause). // Please also read "Additional BSD Notice" below. // -// Redistribution and use in source and binary forms, with or without modification, +// Redistribution and use in source and binary forms, with or without modification, // are permitted provided that the following conditions are met: -// i. Redistributions of source code must retain the above copyright notice, this list +// i. Redistributions of source code must retain the above copyright notice, this list // of conditions and the disclaimer below. -// ii. Redistributions in binary form must reproduce the above copyright notice, -// this list of conditions and the disclaimer (as noted below) in the documentation and/or +// ii. Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the disclaimer (as noted below) in the documentation and/or // other materials provided with the distribution. -// iii. Neither the name of the LLNS/LLNL nor the names of its contributors may be used to -// endorse or promote products derived from this software without specific prior written +// iii. Neither the name of the LLNS/LLNL nor the names of its contributors may be used to +// endorse or promote products derived from this software without specific prior written // permission. // -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES -// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT -// SHALL LAWRENCE LIVERMORE NATIONAL SECURITY, LLC, THE U.S. DEPARTMENT OF ENERGY OR -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS -// OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED -// AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT +// SHALL LAWRENCE LIVERMORE NATIONAL SECURITY, LLC, THE U.S. DEPARTMENT OF ENERGY OR +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +// OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED +// AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, // EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Additional BSD Notice -// 1. This notice is required to be provided under our contract with the U.S. Department -// of Energy (DOE). This work was produced at Lawrence Livermore National Laboratory under +// 1. This notice is required to be provided under our contract with the U.S. Department +// of Energy (DOE). This work was produced at Lawrence Livermore National Laboratory under // Contract No. DE-AC52-07NA27344 with the DOE. -// 2. Neither the United States Government nor Lawrence Livermore National Security, LLC -// nor any of their employees, makes any warranty, express or implied, or assumes any -// liability or responsibility for the accuracy, completeness, or usefulness of any +// 2. Neither the United States Government nor Lawrence Livermore National Security, LLC +// nor any of their employees, makes any warranty, express or implied, or assumes any +// liability or responsibility for the accuracy, completeness, or usefulness of any // information, apparatus, product, or process disclosed, or represents that its use would // not infringe privately-owned rights. -// 3. Also, reference herein to any specific commercial products, process, or services by -// trade name, trademark, manufacturer or otherwise does not necessarily constitute or -// imply its endorsement, recommendation, or favoring by the United States Government or -// Lawrence Livermore National Security, LLC. The views and opinions of authors expressed -// herein do not necessarily state or reflect those of the United States Government or -// Lawrence Livermore National Security, LLC, and shall not be used for advertising or +// 3. Also, reference herein to any specific commercial products, process, or services by +// trade name, trademark, manufacturer or otherwise does not necessarily constitute or +// imply its endorsement, recommendation, or favoring by the United States Government or +// Lawrence Livermore National Security, LLC. The views and opinions of authors expressed +// herein do not necessarily state or reflect those of the United States Government or +// Lawrence Livermore National Security, LLC, and shall not be used for advertising or // product endorsement purposes. /** @@ -62,7 +62,7 @@ #include "hiopVectorIntSeq.hpp" -#include /* exit, EXIT_FAILURE */ +#include /* exit, EXIT_FAILURE */ #include using namespace std; @@ -70,41 +70,44 @@ namespace hiop { hiopNlpFormulation::hiopNlpFormulation(hiopInterfaceBase& interface_, const char* option_file) - : + : #ifdef HIOP_USE_MPI - mpi_init_called(false), + mpi_init_called(false), #endif - prob_type_(hiopInterfaceBase::hiopNonlinear), - nlp_evaluated_(false), - nlp_transformations_(this), - interface_base(interface_) + prob_type_(hiopInterfaceBase::hiopNonlinear), + nlp_evaluated_(false), + nlp_transformations_(this), + interface_base(interface_) { - strFixedVars_ = ""; //uninitialized - dFixedVarsTol_ = -1.; //uninitialized + strFixedVars_ = ""; // uninitialized + dFixedVarsTol_ = -1.; // uninitialized bool bret; #ifdef HIOP_USE_MPI - bret = interface_base.get_MPI_comm(comm_); assert(bret); + bret = interface_base.get_MPI_comm(comm_); + assert(bret); int nret; - //MPI may not be initialized: this occurs when a serial driver call HiOp built with MPI support on + // MPI may not be initialized: this occurs when a serial driver call HiOp built with MPI support on int initialized; - nret = MPI_Initialized( &initialized ); + nret = MPI_Initialized(&initialized); if(!initialized) { - mpi_init_called=true; - nret = MPI_Init(NULL,NULL); - assert(MPI_SUCCESS==nret); - } - - nret=MPI_Comm_rank(comm_, &rank_); assert(MPI_SUCCESS==nret); - nret=MPI_Comm_size(comm_, &num_ranks_); assert(MPI_SUCCESS==nret); + mpi_init_called = true; + nret = MPI_Init(NULL, NULL); + assert(MPI_SUCCESS == nret); + } + + nret = MPI_Comm_rank(comm_, &rank_); + assert(MPI_SUCCESS == nret); + nret = MPI_Comm_size(comm_, &num_ranks_); + assert(MPI_SUCCESS == nret); #else - //fake communicator (defined by hiop) + // fake communicator (defined by hiop) MPI_Comm comm_ = MPI_COMM_SELF; #endif options = new hiopOptionsNLP(option_file); - //logger will output on stdout on rank 0 of the MPI 'comm' communicator + // logger will output on stdout on rank 0 of the MPI 'comm' communicator log = new hiopLogger(options, stdout, 0, comm_); options->SetLog(log); @@ -112,27 +115,28 @@ hiopNlpFormulation::hiopNlpFormulation(hiopInterfaceBase& interface_, const char runStats = hiopRunStats(comm_); /* NLP members intialization */ - bret = interface_base.get_prob_sizes(n_vars_, n_cons_); assert(bret); + bret = interface_base.get_prob_sizes(n_vars_, n_cons_); + assert(bret); xl_ = nullptr; xu_ = nullptr; vars_type_ = nullptr; - ixl_ = nullptr; - ixu_ = nullptr; + ixl_ = nullptr; + ixu_ = nullptr; c_rhs_ = nullptr; cons_eq_type_ = nullptr; dl_ = nullptr; du_ = nullptr; cons_ineq_type_ = nullptr; - cons_eq_mapping_= nullptr; - cons_ineq_mapping_= nullptr; + cons_eq_mapping_ = nullptr; + cons_ineq_mapping_ = nullptr; idl_ = nullptr; idu_ = nullptr; #ifdef HIOP_USE_MPI - vec_distrib_=nullptr; + vec_distrib_ = nullptr; #endif cons_eval_type_ = -1; cons_body_ = nullptr; - cons_Jac_ = nullptr; + cons_Jac_ = nullptr; cons_lambdas_ = nullptr; temp_eq_ = nullptr; temp_ineq_ = nullptr; @@ -142,7 +146,7 @@ hiopNlpFormulation::hiopNlpFormulation(hiopInterfaceBase& interface_, const char } hiopNlpFormulation::~hiopNlpFormulation() -{ +{ delete xl_; delete xu_; delete ixl_; @@ -166,36 +170,36 @@ hiopNlpFormulation::~hiopNlpFormulation() delete options; #ifdef HIOP_USE_MPI - //some (serial) drivers call (MPI) HiOp repeatedly in an outer loop - //if we finalize here, subsequent calls to MPI will fail and break this outer loop. So we don't finalize + // some (serial) drivers call (MPI) HiOp repeatedly in an outer loop + // if we finalize here, subsequent calls to MPI will fail and break this outer loop. So we don't finalize - //if(mpi_init_called) { - // int nret=MPI_Finalize(); assert(MPI_SUCCESS==nret); - //} + // if(mpi_init_called) { + // int nret=MPI_Finalize(); assert(MPI_SUCCESS==nret); + // } #endif delete cons_body_; delete cons_Jac_; delete cons_lambdas_; delete temp_eq_; delete temp_ineq_; - delete temp_x_; + delete temp_x_; /// nlp_scaling_ and relax_bounds_ are deleted inside nlp_transformations_ } bool hiopNlpFormulation::finalizeInitialization() { - //check if there was a change in the user options that requires reinitialization of 'this' - bool doinit = false; + // check if there was a change in the user options that requires reinitialization of 'this' + bool doinit = false; if(strFixedVars_ != options->GetString("fixed_var")) { - doinit=true; + doinit = true; } const double fixedVarTol = options->GetNumeric("fixed_var_tolerance"); if(dFixedVarsTol_ != fixedVarTol) { - doinit=true; + doinit = true; } - //more checks whether we should reinitialize go here (for example change in the rescaling option) - + // more checks whether we should reinitialize go here (for example change in the rescaling option) + if(!doinit) { return true; } @@ -207,7 +211,8 @@ bool hiopNlpFormulation::finalizeInitialization() /////////////////////////////////////////////////////////////////////////// // LOWER and UPPER bound allocation and processing //////////////////////////////////////////////////////////////////////////// - bool bret = interface_base.get_prob_sizes(n_vars_, n_cons_); assert(bret); + bool bret = interface_base.get_prob_sizes(n_vars_, n_cons_); + assert(bret); nlp_transformations_.clear(); nlp_transformations_.setUserNlpNumVars(n_vars_); @@ -216,110 +221,104 @@ bool hiopNlpFormulation::finalizeInitialization() delete[] vars_type_; #ifdef HIOP_USE_MPI delete[] vec_distrib_; - vec_distrib_ = new index_type[num_ranks_+1]; - if(interface_base.get_vecdistrib_info(n_vars_,vec_distrib_)) { + vec_distrib_ = new index_type[num_ranks_ + 1]; + if(interface_base.get_vecdistrib_info(n_vars_, vec_distrib_)) { xl_ = LinearAlgebraFactory::create_vector(mem_space, n_vars_, vec_distrib_, comm_); } else { - xl_ = LinearAlgebraFactory::create_vector(mem_space, n_vars_); + xl_ = LinearAlgebraFactory::create_vector(mem_space, n_vars_); delete[] vec_distrib_; vec_distrib_ = nullptr; } #else xl_ = LinearAlgebraFactory::create_vector(mem_space, n_vars_); -#endif +#endif xu_ = xl_->alloc_clone(); bret = interface_base.get_prob_info(prob_type_); assert(bret); - int nlocal=xl_->get_local_size(); + int nlocal = xl_->get_local_size(); nlp_transformations_.setUserNlpNumLocalVars(nlocal); vars_type_ = new hiopInterfaceBase::NonlinearityType[nlocal]; // get variable bounds info from user - bret = interface_base.get_vars_info(n_vars_, xl_->local_data(), xu_->local_data(), vars_type_); + bret = interface_base.get_vars_info(n_vars_, xl_->local_data(), xu_->local_data(), vars_type_); assert(bret); - //allocate and build ixl(ow) and ix(upp) vectors + // allocate and build ixl(ow) and ix(upp) vectors delete ixl_; delete ixu_; - - ixl_ = xu_->alloc_clone(); + + ixl_ = xu_->alloc_clone(); ixu_ = xu_->alloc_clone(); // - //preprocess variables bounds - this is curently done on the CPU + // preprocess variables bounds - this is curently done on the CPU // size_type nfixed_vars_local; - process_bounds(n_bnds_low_local_,n_bnds_upp_local_, n_bnds_lu_, nfixed_vars_local); + process_bounds(n_bnds_low_local_, n_bnds_upp_local_, n_bnds_lu_, nfixed_vars_local); /////////////////////////////////////////////////////////////////////////// // Handling of fixed variables ////////////////////////////////////////////////////////////////////////// - dFixedVarsTol_ = fixedVarTol; - size_type nfixed_vars=nfixed_vars_local; + dFixedVarsTol_ = fixedVarTol; + size_type nfixed_vars = nfixed_vars_local; #ifdef HIOP_USE_MPI - int ierr = MPI_Allreduce(&nfixed_vars_local, &nfixed_vars, 1, MPI_HIOP_SIZE_TYPE, MPI_SUM, comm_); - assert(MPI_SUCCESS==ierr); + int ierr = MPI_Allreduce(&nfixed_vars_local, &nfixed_vars, 1, MPI_HIOP_SIZE_TYPE, MPI_SUM, comm_); + assert(MPI_SUCCESS == ierr); #endif hiopFixedVarsRemover* fixedVarsRemover = NULL; - if(nfixed_vars>0) { + if(nfixed_vars > 0) { log->printf(hovWarning, "Detected %lld fixed variables out of a total of %lld.\n", nfixed_vars, n_vars_); - if(options->GetString("fixed_var")=="remove") { + if(options->GetString("fixed_var") == "remove") { // // remove free variables // log->printf(hovWarning, "Fixed variables will be removed internally.\n"); - fixedVarsRemover = new hiopFixedVarsRemover(this, - *xl_, - *xu_, - fixedVarTol, - nfixed_vars, - nfixed_vars_local); - + fixedVarsRemover = new hiopFixedVarsRemover(this, *xl_, *xu_, fixedVarTol, nfixed_vars, nfixed_vars_local); #ifdef HIOP_USE_MPI - fixedVarsRemover->setFSVectorDistrib(vec_distrib_,num_ranks_); + fixedVarsRemover->setFSVectorDistrib(vec_distrib_, num_ranks_); fixedVarsRemover->setMPIComm(comm_); #endif - bret = fixedVarsRemover->setupDecisionVectorPart(); - assert(bret && "error while removing fixed variables"); - + bret = fixedVarsRemover->setupDecisionVectorPart(); + assert(bret && "error while removing fixed variables"); + n_vars_ = fixedVarsRemover->rs_n(); #ifdef HIOP_USE_MPI index_type* vec_distrib_rs = fixedVarsRemover->allocRSVectorDistrib(); delete[] vec_distrib_; vec_distrib_ = vec_distrib_rs; #endif - + hiopVector* xl_rs; #ifdef HIOP_USE_MPI if(vec_distrib_ != nullptr) { xl_rs = LinearAlgebraFactory::create_vector(mem_space, n_vars_, vec_distrib_, comm_); } else { - xl_rs = LinearAlgebraFactory::create_vector(mem_space, n_vars_); + xl_rs = LinearAlgebraFactory::create_vector(mem_space, n_vars_); } #else - xl_rs = LinearAlgebraFactory::create_vector(mem_space, n_vars_); -#endif // HIOP_USE_MPI - - hiopVector* xu_rs = xl_rs->alloc_clone(); - hiopVector* ixl_rs = xl_rs->alloc_clone(); + xl_rs = LinearAlgebraFactory::create_vector(mem_space, n_vars_); +#endif // HIOP_USE_MPI + + hiopVector* xu_rs = xl_rs->alloc_clone(); + hiopVector* ixl_rs = xl_rs->alloc_clone(); hiopVector* ixu_rs = xu_rs->alloc_clone(); - - fixedVarsRemover->copyFsToRs( *xl_, *xl_rs); - fixedVarsRemover->copyFsToRs( *xu_, *xu_rs); + + fixedVarsRemover->copyFsToRs(*xl_, *xl_rs); + fixedVarsRemover->copyFsToRs(*xu_, *xu_rs); fixedVarsRemover->copyFsToRs(*ixl_, *ixl_rs); fixedVarsRemover->copyFsToRs(*ixu_, *ixu_rs); - - nlocal=xl_rs->get_local_size(); + + nlocal = xl_rs->get_local_size(); hiopInterfaceBase::NonlinearityType* vars_type_rs = new hiopInterfaceBase::NonlinearityType[nlocal]; fixedVarsRemover->copyFsToRs(vars_type_, vars_type_rs); - + delete xl_; delete xu_; delete ixl_; @@ -330,34 +329,33 @@ bool hiopNlpFormulation::finalizeInitialization() ixl_ = ixl_rs; ixu_ = ixu_rs; vars_type_ = vars_type_rs; - + n_bnds_low_local_ -= nfixed_vars_local; n_bnds_upp_local_ -= nfixed_vars_local; - n_bnds_lu_ -= nfixed_vars_local; - + n_bnds_lu_ -= nfixed_vars_local; + nlp_transformations_.append(fixedVarsRemover); } else { /* - * Relax fixed variables according to 2 conditions: - * 1. bound_relax_perturb==0.0: Relax fixed variables according to fixed_var_perturb and fixed_var_tolerance. - * Other variables are not relaxed. hiopFixedVarsRelaxer is used to relax fixed var - * 2. bound_relax_perturb!=0.0: Later we will use hiopBoundsRelaxer to relax the variable and inequlity bounds, - * according to bound_relax_perturb. It will also relax the fixed variables, hence we can skip relax fixed var here. - */ - if(options->GetString("fixed_var")=="relax" && options->GetNumeric("bound_relax_perturb") == 0.0) { + * Relax fixed variables according to 2 conditions: + * 1. bound_relax_perturb==0.0: Relax fixed variables according to fixed_var_perturb and fixed_var_tolerance. + * Other variables are not relaxed. hiopFixedVarsRelaxer is used to relax fixed var + * 2. bound_relax_perturb!=0.0: Later we will use hiopBoundsRelaxer to relax the variable and inequlity bounds, + * according to bound_relax_perturb. It will also relax the fixed variables, hence we can skip relax fixed var here. + */ + if(options->GetString("fixed_var") == "relax" && options->GetNumeric("bound_relax_perturb") == 0.0) { log->printf(hovWarning, "Fixed variables will be relaxed internally.\n"); - auto* fixedVarsRelaxer = - new hiopFixedVarsRelaxer(this, *xl_, *xu_, nfixed_vars, nfixed_vars_local); + auto* fixedVarsRelaxer = new hiopFixedVarsRelaxer(this, *xl_, *xu_, nfixed_vars, nfixed_vars_local); fixedVarsRelaxer->setup(); const double fv_tol = options->GetNumeric("fixed_var_tolerance"); const double fv_per = options->GetNumeric("fixed_var_perturb"); fixedVarsRelaxer->relax(fv_tol, fv_per, *xl_, *xu_); - + nlp_transformations_.append(fixedVarsRelaxer); } else if(options->GetNumeric("bound_relax_perturb") == 0.0) { - log->printf(hovError, + log->printf(hovError, "detected fixed variables but HiOp was not instructed how to deal with them (option " "'fixed_var' is 'none').\n"); exit(EXIT_FAILURE); @@ -368,28 +366,28 @@ bool hiopNlpFormulation::finalizeInitialization() // RHS, LOWER, and UPPER bounds allocation and processing (for constraints) //////////////////////////////////////////////////////////////////////////// if(!process_constraints()) { - log->printf(hovError, "Initial processing of constraints failed.\n"); + log->printf(hovError, "Initial processing of constraints failed.\n"); return false; } if(fixedVarsRemover) { fixedVarsRemover->setupConstraintsPart(n_cons_eq_, n_cons_ineq_); } - //save the new value of 'fixed_var' option + // save the new value of 'fixed_var' option strFixedVars_ = options->GetString("fixed_var"); - //compute the overall n_low and n_upp + // compute the overall n_low and n_upp #ifdef HIOP_USE_MPI - size_type aux[3]={n_bnds_low_local_, n_bnds_upp_local_, n_bnds_lu_}; + size_type aux[3] = {n_bnds_low_local_, n_bnds_upp_local_, n_bnds_lu_}; size_type aux_g[3]; - ierr=MPI_Allreduce(aux, aux_g, 3, MPI_HIOP_SIZE_TYPE, MPI_SUM, comm_); - assert(MPI_SUCCESS==ierr); + ierr = MPI_Allreduce(aux, aux_g, 3, MPI_HIOP_SIZE_TYPE, MPI_SUM, comm_); + assert(MPI_SUCCESS == ierr); n_bnds_low_ = aux_g[0]; n_bnds_upp_ = aux_g[1]; n_bnds_lu_ = aux_g[2]; #else n_bnds_low_ = n_bnds_low_local_; - n_bnds_upp_ = n_bnds_upp_local_; //n_bnds_lu is ok + n_bnds_upp_ = n_bnds_upp_local_; // n_bnds_lu is ok #endif // @@ -401,17 +399,17 @@ bool hiopNlpFormulation::finalizeInitialization() if(options->GetString("elastic_mode") == "none") { relax_bounds_->relax(options->GetNumeric("bound_relax_perturb"), *xl_, *xu_, *dl_, *du_); } else { - relax_bounds_->relax(options->GetNumeric("elastic_mode_bound_relax_initial"), *xl_, *xu_, *dl_, *du_); + relax_bounds_->relax(options->GetNumeric("elastic_mode_bound_relax_initial"), *xl_, *xu_, *dl_, *du_); } nlp_transformations_.append(relax_bounds_); } - //reset/release info and data related to one-call constraints evaluation + // reset/release info and data related to one-call constraints evaluation cons_eval_type_ = -1; - + delete cons_body_; cons_body_ = nullptr; - + delete cons_Jac_; cons_Jac_ = NULL; @@ -435,7 +433,6 @@ bool hiopNlpFormulation::process_bounds(size_type& n_bnds_low, size_type& n_bnds_lu, size_type& nfixed_vars) { - n_bnds_low = 0; n_bnds_upp = 0; n_bnds_lu = 0; @@ -444,28 +441,29 @@ bool hiopNlpFormulation::process_bounds(size_type& n_bnds_low, #if !defined(HIOP_USE_MPI) int* vec_distrib_ = nullptr; MPI_Comm comm_ = MPI_COMM_SELF; -#endif +#endif hiopVectorPar xl_tmp(n_vars_, vec_distrib_, comm_); hiopVectorPar xu_tmp(n_vars_, vec_distrib_, comm_); hiopVectorPar ixl_tmp(n_vars_, vec_distrib_, comm_); hiopVectorPar ixu_tmp(n_vars_, vec_distrib_, comm_); - + this->xl_->copy_to_vectorpar(xl_tmp); this->xu_->copy_to_vectorpar(xu_tmp); this->ixl_->copy_to_vectorpar(ixl_tmp); this->ixu_->copy_to_vectorpar(ixu_tmp); - - double *ixl_vec = ixl_tmp.local_data_host(); - double *ixu_vec = ixu_tmp.local_data_host(); + + double* ixl_vec = ixl_tmp.local_data_host(); + double* ixu_vec = ixu_tmp.local_data_host(); double* xl_vec = xl_tmp.local_data_host(); double* xu_vec = xu_tmp.local_data_host(); #ifdef HIOP_DEEPCHECKS - const int maxBndsCloseMsgs=3; int nBndsClose=0; + const int maxBndsCloseMsgs = 3; + int nBndsClose = 0; #endif const double fixedVarTol = options->GetNumeric("fixed_var_tolerance"); - int nlocal=xl_->get_local_size(); - for(int i=0;iget_local_size(); + for(int i = 0; i < nlocal; i++) { if(xl_vec[i] > -1e20) { ixl_vec[i] = 1.; n_bnds_low++; @@ -487,24 +485,25 @@ bool hiopNlpFormulation::process_bounds(size_type& n_bnds_low, assert(xl_vec[i] <= xu_vec[i] && "please fix the inconsistent bounds, otherwise the problem is infeasible"); #endif - //if(xl_vec[i]==xu_vec[i]) { - if( xu_vec[i]<1e20 && - fabs(xl_vec[i]-xu_vec[i]) <= fixedVarTol*fmax(1.,fabs(xu_vec[i]))) { + // if(xl_vec[i]==xu_vec[i]) { + if(xu_vec[i] < 1e20 && fabs(xl_vec[i] - xu_vec[i]) <= fixedVarTol * fmax(1., fabs(xu_vec[i]))) { nfixed_vars++; } else { #ifdef HIOP_DEEPCHECKS #define min_dist 1e-8 - if(fixedVarTolprintf(hovWarning, + if(fixedVarTol < min_dist) { + if(nBndsClose < maxBndsCloseMsgs) { + if(fabs(xl_vec[i] - xu_vec[i]) / std::max(1., fabs(xu_vec[i])) < min_dist) { + log->printf(hovWarning, "Lower (%g) and upper bound (%g) for variable %d are very close. " "Consider fixing this variable or increase 'fixed_var_tolerance'.\n", - i, xl_vec[i], xu_vec[i]); + i, + xl_vec[i], + xu_vec[i]); nBndsClose++; } - } - if(nBndsClose==maxBndsCloseMsgs) { + } + if(nBndsClose == maxBndsCloseMsgs) { log->printf(hovWarning, "[further messages were surpressed]\n"); nBndsClose++; } @@ -512,25 +511,25 @@ bool hiopNlpFormulation::process_bounds(size_type& n_bnds_low, #endif } } - + this->xl_->copy_from_vectorpar(xl_tmp); this->xu_->copy_from_vectorpar(xu_tmp); this->ixl_->copy_from_vectorpar(ixl_tmp); this->ixu_->copy_from_vectorpar(ixu_tmp); return true; -} +} bool hiopNlpFormulation::process_constraints() { bool bret; // deallocate if previously allocated - delete c_rhs_; + delete c_rhs_; delete[] cons_eq_type_; delete dl_; delete du_; - delete idl_; + delete idl_; delete idu_; delete[] cons_ineq_type_; delete cons_eq_mapping_; @@ -538,21 +537,21 @@ bool hiopNlpFormulation::process_constraints() string mem_space = options->GetString("mem_space"); - hiopVector* gl = LinearAlgebraFactory::create_vector(mem_space, n_cons_); + hiopVector* gl = LinearAlgebraFactory::create_vector(mem_space, n_cons_); hiopVector* gu = LinearAlgebraFactory::create_vector(mem_space, n_cons_); hiopInterfaceBase::NonlinearityType* cons_type = new hiopInterfaceBase::NonlinearityType[n_cons_]; - //get constraints information and transfer to host for pre-processing + // get constraints information and transfer to host for pre-processing bret = interface_base.get_cons_info(n_cons_, gl->local_data(), gu->local_data(), cons_type); if(!bret) { assert(bret); return false; } - assert(gl->get_local_size()==n_cons_); - assert(gu->get_local_size()==n_cons_); + assert(gl->get_local_size() == n_cons_); + assert(gu->get_local_size() == n_cons_); - // transfer to host + // transfer to host hiopVectorPar gl_host(n_cons_); hiopVectorPar gu_host(n_cons_); gl->copy_to_vectorpar(gl_host); @@ -561,21 +560,21 @@ bool hiopNlpFormulation::process_constraints() double* gl_vec = gl_host.local_data(); double* gu_vec = gu_host.local_data(); n_cons_eq_ = 0; - n_cons_ineq_ = 0; - for(int i=0;i-1e20) { - idl_vec[i]=1.; - n_ineq_low_++; - if(du_vec[i]< 1e20) { + for(int i = 0; i < n_cons_ineq_; i++) { + if(dl_vec[i] > -1e20) { + idl_vec[i] = 1.; + n_ineq_low_++; + if(du_vec[i] < 1e20) { n_ineq_lu_++; } - } - else { - idl_vec[i]=0.; + } else { + idl_vec[i] = 0.; } - if(du_vec[i]< 1e20) { - idu_vec[i]=1.; - n_ineq_upp_++; + if(du_vec[i] < 1e20) { + idu_vec[i] = 1.; + n_ineq_upp_++; } else { - idu_vec[i]=0.; + idu_vec[i] = 0.; } } @@ -649,33 +648,32 @@ bool hiopNlpFormulation::process_constraints() // c_rhs_ = LinearAlgebraFactory::create_vector(mem_space, n_cons_eq_); c_rhs_->copy_from_vectorpar(c_rhs_host); - + dl_ = LinearAlgebraFactory::create_vector(mem_space, n_cons_ineq_); dl_->copy_from_vectorpar(dl_host); du_ = dl_->alloc_clone(); du_->copy_from_vectorpar(du_host); - + cons_eq_mapping_ = LinearAlgebraFactory::create_vector_int(mem_space, n_cons_eq_); cons_eq_mapping_->copy_from_vectorseq(cons_eq_mapping_host); cons_ineq_mapping_ = LinearAlgebraFactory::create_vector_int(mem_space, n_cons_ineq_); cons_ineq_mapping_->copy_from_vectorseq(cons_ineq_mapping_host); - + idl_ = dl_->alloc_clone(); idl_->copy_from_vectorpar(idl_host); idu_ = du_->alloc_clone(); idu_->copy_from_vectorpar(idu_host); - + return true; } -bool hiopNlpFormulation::apply_scaling(hiopVector& c, hiopVector& d, hiopVector& gradf, - hiopMatrix& Jac_c, hiopMatrix& Jac_d) +bool hiopNlpFormulation::apply_scaling(hiopVector& c, hiopVector& d, hiopVector& gradf, hiopMatrix& Jac_c, hiopMatrix& Jac_d) { - //check if we need to do scaling + // check if we need to do scaling if("none" == options->GetString("scaling_type")) { return false; } - + const double max_grad = options->GetNumeric("scaling_max_grad"); const double max_obj_grad = options->GetNumeric("scaling_max_obj_grad"); const double max_con_grad = options->GetNumeric("scaling_max_con_grad"); @@ -688,54 +686,34 @@ bool hiopNlpFormulation::apply_scaling(hiopVector& c, hiopVector& d, hiopVector& con_grad_target = max_con_grad; } - if(gradf.infnorm() < obj_grad_target && - Jac_c.max_abs_value() < con_grad_target && - Jac_d.max_abs_value() < con_grad_target) - { + if(gradf.infnorm() < obj_grad_target && Jac_c.max_abs_value() < con_grad_target && + Jac_d.max_abs_value() < con_grad_target) { return false; } - - nlp_scaling_ = new hiopNLPObjGradScaling(this, - c, - d, - gradf, - Jac_c, - Jac_d, - *cons_eq_mapping_, - *cons_ineq_mapping_); - + + nlp_scaling_ = new hiopNLPObjGradScaling(this, c, d, gradf, Jac_c, Jac_d, *cons_eq_mapping_, *cons_ineq_mapping_); + c_rhs_ = nlp_scaling_->apply_to_cons_eq(*c_rhs_, n_cons_eq_); dl_ = nlp_scaling_->apply_to_cons_ineq(*dl_, n_cons_ineq_); du_ = nlp_scaling_->apply_to_cons_ineq(*du_, n_cons_ineq_); nlp_transformations_.append(nlp_scaling_); - + return true; } +hiopVector* hiopNlpFormulation::alloc_primal_vec() const { return xl_->alloc_clone(); } -hiopVector* hiopNlpFormulation::alloc_primal_vec() const -{ - return xl_->alloc_clone(); -} - -hiopVector* hiopNlpFormulation::alloc_dual_eq_vec() const -{ - return c_rhs_->alloc_clone(); -} +hiopVector* hiopNlpFormulation::alloc_dual_eq_vec() const { return c_rhs_->alloc_clone(); } -hiopVector* hiopNlpFormulation::alloc_dual_ineq_vec() const -{ - return dl_->alloc_clone(); -} +hiopVector* hiopNlpFormulation::alloc_dual_ineq_vec() const { return dl_->alloc_clone(); } hiopVector* hiopNlpFormulation::alloc_dual_vec() const { - assert(n_cons_eq_+n_cons_ineq_ == n_cons_); - hiopVector* ret = LinearAlgebraFactory::create_vector(options->GetString("mem_space"), - n_cons_); + assert(n_cons_eq_ + n_cons_ineq_ == n_cons_); + hiopVector* ret = LinearAlgebraFactory::create_vector(options->GetString("mem_space"), n_cons_); #ifdef HIOP_DEEPCHECKS - assert(ret!=NULL); + assert(ret != NULL); #endif return ret; } @@ -746,7 +724,8 @@ bool hiopNlpFormulation::eval_f(hiopVector& x, bool new_x, double& f) runStats.tmEvalObj.start(); bool bret = interface_base.eval_f(nlp_transformations_.n_pre(), xx->local_data_const(), new_x, f); - runStats.tmEvalObj.stop(); runStats.nEvalObj++; + runStats.tmEvalObj.stop(); + runStats.nEvalObj++; f = nlp_transformations_.apply_to_obj(f); return bret; @@ -754,8 +733,7 @@ bool hiopNlpFormulation::eval_f(hiopVector& x, bool new_x, double& f) bool hiopNlpFormulation::eval_grad_f(hiopVector& x, bool new_x, hiopVector& gradf) { - if((prob_type_==hiopInterfaceBase::hiopLinear || prob_type_==hiopInterfaceBase::hiopQuadratic) - && nlp_evaluated_) { + if((prob_type_ == hiopInterfaceBase::hiopLinear || prob_type_ == hiopInterfaceBase::hiopQuadratic) && nlp_evaluated_) { return true; } @@ -765,7 +743,8 @@ bool hiopNlpFormulation::eval_grad_f(hiopVector& x, bool new_x, hiopVector& grad bool bret; runStats.tmEvalGrad_f.start(); bret = interface_base.eval_grad_f(nlp_transformations_.n_pre(), xx->local_data_const(), new_x, gradff->local_data()); - runStats.tmEvalGrad_f.stop(); runStats.nEvalGrad_f++; + runStats.tmEvalGrad_f.stop(); + runStats.nEvalGrad_f++; gradf = *(nlp_transformations_.apply_to_grad_obj(*gradff)); @@ -781,18 +760,17 @@ bool hiopNlpFormulation::get_starting_point(hiopVector& x0_for_hiop, bool& slacks_avail, hiopVector& d0) { - bool bret; + bool bret; - hiopVector* lambdas = hiop::LinearAlgebraFactory:: - create_vector(options->GetString("mem_space"), - yc0_for_hiop.get_size() + yd0_for_hiop.get_size()); + hiopVector* lambdas = hiop::LinearAlgebraFactory::create_vector(options->GetString("mem_space"), + yc0_for_hiop.get_size() + yd0_for_hiop.get_size()); hiopVector* x0_for_user = nlp_transformations_.apply_inv_to_x(x0_for_hiop, true); double* zL0_for_user = zL0_for_hiop.local_data(); double* zU0_for_user = zU0_for_hiop.local_data(); double* lambda_for_user = lambdas->local_data(); double* d_for_user = d0.local_data(); - + bret = interface_base.get_starting_point(nlp_transformations_.n_pre(), n_cons_, x0_for_user->local_data(), @@ -803,18 +781,17 @@ bool hiopNlpFormulation::get_starting_point(hiopVector& x0_for_hiop, slacks_avail, d_for_user); if(duals_avail) { - - assert(n_cons_eq_ == yc0_for_hiop.get_size() && "when did the cons change?"); + assert(n_cons_eq_ == yc0_for_hiop.get_size() && "when did the cons change?"); assert(n_cons_ineq_ == yd0_for_hiop.get_size() && "when did the cons change?"); - assert(n_cons_eq_+n_cons_ineq_ == n_cons_); - - //copy back + assert(n_cons_eq_ + n_cons_ineq_ == n_cons_); + + // copy back lambdas->copy_to_two_vec_w_pattern(yc0_for_hiop, *cons_eq_mapping_, yd0_for_hiop, *cons_ineq_mapping_); } if(!bret) { bret = interface_base.get_starting_point(nlp_transformations_.n_pre(), x0_for_user->local_data()); } - + if(bret) { nlp_transformations_.apply_to_x(*x0_for_user, x0_for_hiop); } @@ -833,11 +810,11 @@ bool hiopNlpFormulation::get_warmstart_point(hiopVector& x0_for_hiop, hiopVector& vl0, hiopVector& vu0) { - bool bret; + bool bret; hiopVector* lambdas = hiop::LinearAlgebraFactory::create_vector(options->GetString("mem_space"), yc0_for_hiop.get_size() + yd0_for_hiop.get_size()); - + hiopVector* x0_for_user = nlp_transformations_.apply_inv_to_x(x0_for_hiop, true); double* zL0_for_user = zL0_for_hiop.local_data(); double* zU0_for_user = zU0_for_hiop.local_data(); @@ -845,7 +822,7 @@ bool hiopNlpFormulation::get_warmstart_point(hiopVector& x0_for_hiop, double* d_for_user = d0.local_data(); double* vl_for_user = vl0.local_data(); double* vu_for_user = vu0.local_data(); - + bret = interface_base.get_warmstart_point(nlp_transformations_.n_pre(), n_cons_, x0_for_user->local_data(), @@ -856,18 +833,18 @@ bool hiopNlpFormulation::get_warmstart_point(hiopVector& x0_for_hiop, vl_for_user, vu_for_user); { - assert(n_cons_eq_ == yc0_for_hiop.get_size() && "when did the cons change?"); + assert(n_cons_eq_ == yc0_for_hiop.get_size() && "when did the cons change?"); assert(n_cons_ineq_ == yd0_for_hiop.get_size() && "when did the cons change?"); - assert(n_cons_eq_+n_cons_ineq_ == n_cons_); - - //copy back + assert(n_cons_eq_ + n_cons_ineq_ == n_cons_); + + // copy back lambdas->copy_to_two_vec_w_pattern(yc0_for_hiop, *cons_eq_mapping_, yd0_for_hiop, *cons_ineq_mapping_); } - + if(!bret) { bret = interface_base.get_starting_point(nlp_transformations_.n_pre(), x0_for_user->local_data()); } - + if(bret) { nlp_transformations_.apply_to_x(*x0_for_user, x0_for_hiop); } @@ -878,14 +855,11 @@ bool hiopNlpFormulation::get_warmstart_point(hiopVector& x0_for_hiop, return bret; } - - bool hiopNlpFormulation::eval_c(hiopVector& x, bool new_x, hiopVector& c) { hiopVector* xx = nlp_transformations_.apply_inv_to_x(x, new_x); hiopVector* cc = &c; // nlp_transformations_.apply_inv_to_cons_eq(c, n_cons_eq_); // NOT required - runStats.tmEvalCons.start(); bool bret = interface_base.eval_cons(nlp_transformations_.n_pre(), @@ -895,7 +869,8 @@ bool hiopNlpFormulation::eval_c(hiopVector& x, bool new_x, hiopVector& c) xx->local_data_const(), new_x, cc->local_data()); - runStats.tmEvalCons.stop(); runStats.nEvalCons_eq++; + runStats.tmEvalCons.stop(); + runStats.nEvalCons_eq++; // scale the constraint c = *(nlp_transformations_.apply_to_cons_eq(c, n_cons_eq_)); @@ -915,7 +890,8 @@ bool hiopNlpFormulation::eval_d(hiopVector& x, bool new_x, hiopVector& d) xx->local_data_const(), new_x, dd->local_data()); - runStats.tmEvalCons.stop(); runStats.nEvalCons_ineq++; + runStats.tmEvalCons.stop(); + runStats.nEvalCons_ineq++; // scale the constraint d = *(nlp_transformations_.apply_to_cons_ineq(d, n_cons_ineq_)); @@ -929,7 +905,7 @@ bool hiopNlpFormulation::eval_c_d(hiopVector& x, bool new_x, hiopVector& c, hiop assert(cons_body_ == nullptr); assert(NULL == cons_Jac_); if(!eval_c(x, new_x, c)) { - //test if eval_d also fails; this means we should use one-call constraints/Jacobian evaluation + // test if eval_d also fails; this means we should use one-call constraints/Jacobian evaluation if(!eval_d(x, new_x, d)) { cons_eval_type_ = 1; cons_body_ = this->alloc_dual_vec(); @@ -946,9 +922,10 @@ bool hiopNlpFormulation::eval_c_d(hiopVector& x, bool new_x, hiopVector& c, hiop } if(0 == cons_eval_type_) { - if(do_eval_c) if(!eval_c(x, new_x, c)) { - return false; - } + if(do_eval_c) + if(!eval_c(x, new_x, c)) { + return false; + } if(!eval_d(x, new_x, d)) { return false; } @@ -967,27 +944,26 @@ bool hiopNlpFormulation::eval_c_d(hiopVector& x, bool new_x, hiopVector& c, hiop xx->local_data_const(), new_x, cons_body_->local_data()); - //copy back to c and d + // copy back to c and d cons_body_->copy_to_two_vec_w_pattern(c, *cons_eq_mapping_, d, *cons_ineq_mapping_); - + // scale c c = *(nlp_transformations_.apply_to_cons_eq(c, n_cons_eq_)); - + // scale d d = *(nlp_transformations_.apply_to_cons_ineq(d, n_cons_ineq_)); - + runStats.tmEvalCons.stop(); runStats.nEvalCons_eq++; runStats.nEvalCons_ineq++; - + return bret; } } bool hiopNlpFormulation::eval_Jac_c_d(hiopVector& x, bool new_x, hiopMatrix& Jac_c, hiopMatrix& Jac_d) { - if((prob_type_==hiopInterfaceBase::hiopLinear || prob_type_==hiopInterfaceBase::hiopQuadratic) - && nlp_evaluated_) { + if((prob_type_ == hiopInterfaceBase::hiopLinear || prob_type_ == hiopInterfaceBase::hiopQuadratic) && nlp_evaluated_) { return true; } @@ -997,7 +973,7 @@ bool hiopNlpFormulation::eval_Jac_c_d(hiopVector& x, bool new_x, hiopMatrix& Jac assert(cons_body_ == nullptr); assert(NULL == cons_Jac_); if(!eval_Jac_c(x, new_x, Jac_c)) { - //test if eval_d also fails; this means we should use one-call constraints/Jacobian evaluation + // test if eval_d also fails; this means we should use one-call constraints/Jacobian evaluation if(!eval_Jac_d(x, new_x, Jac_d)) { cons_eval_type_ = 1; cons_body_ = this->alloc_dual_vec(); @@ -1026,26 +1002,25 @@ bool hiopNlpFormulation::eval_Jac_c_d(hiopVector& x, bool new_x, hiopMatrix& Jac assert(1 == cons_eval_type_); assert(cons_body_); assert(cons_Jac_); - + return eval_Jac_c_d_interface_impl(x, new_x, Jac_c, Jac_d); } return true; } -void hiopNlpFormulation:: -get_dual_solutions(const hiopIterate& it, double* zl_a, double* zu_a, double* lambda_a) +void hiopNlpFormulation::get_dual_solutions(const hiopIterate& it, double* zl_a, double* zu_a, double* lambda_a) { - if(nullptr==nlp_scaling_) { + if(nullptr == nlp_scaling_) { const hiopVector& zl = *it.get_zl(); const hiopVector& zu = *it.get_zu(); zl.copyTo(zl_a); zu.copyTo(zu_a); - + if(cons_lambdas_ == nullptr) { cons_lambdas_ = this->alloc_dual_vec(); } cons_lambdas_->copy_from_two_vec_w_pattern(*it.get_yc(), *cons_eq_mapping_, *it.get_yd(), *cons_ineq_mapping_); - cons_lambdas_->copyTo(lambda_a); + cons_lambdas_->copyTo(lambda_a); } else { const double obj_scale_ext_to_hiop = this->get_obj_scale(); if(temp_x_ == nullptr) { @@ -1067,13 +1042,12 @@ get_dual_solutions(const hiopIterate& it, double* zl_a, double* zu_a, double* la if(cons_lambdas_ == nullptr) { cons_lambdas_ = this->alloc_dual_vec(); } - temp_eq_ = nlp_transformations_.apply_to_cons_eq(*temp_eq_, n_cons_eq_); + temp_eq_ = nlp_transformations_.apply_to_cons_eq(*temp_eq_, n_cons_eq_); temp_ineq_ = nlp_transformations_.apply_to_cons_ineq(*temp_ineq_, n_cons_ineq_); cons_lambdas_->copy_from_two_vec_w_pattern(*temp_eq_, *cons_eq_mapping_, *temp_ineq_, *cons_ineq_mapping_); cons_lambdas_->scale(obj_scale_ext_to_hiop); cons_lambdas_->copyTo(lambda_a); } - } void hiopNlpFormulation::user_callback_solution(hiopSolveStatus status, @@ -1084,9 +1058,9 @@ void hiopNlpFormulation::user_callback_solution(hiopSolveStatus status, hiopVector& d, hiopVector& y_c, hiopVector& y_d, - double obj_value) + double obj_value) { - assert(x.get_size()==n_vars_); + assert(x.get_size() == n_vars_); assert(y_c.get_size() == n_cons_eq_); assert(y_d.get_size() == n_cons_ineq_); @@ -1095,7 +1069,7 @@ void hiopNlpFormulation::user_callback_solution(hiopSolveStatus status, cons_lambdas_ = this->alloc_dual_vec(); } if(nlp_scaling_) { - // return unscaled values. + // return unscaled values. // it's safe to modify these values since this function is called in the end // y_unscaled = y_scale*y_scaled/obj_scale, z_unscaled = z_scaled/obj_scale y_c = *(nlp_transformations_.apply_to_cons_eq(y_c, n_cons_eq_)); @@ -1107,12 +1081,12 @@ void hiopNlpFormulation::user_callback_solution(hiopSolveStatus status, } cons_lambdas_->copy_from_two_vec_w_pattern(y_c, *cons_eq_mapping_, y_d, *cons_ineq_mapping_); - //concatenate 'c' and 'd' into user's constraint body + // concatenate 'c' and 'd' into user's constraint body if(cons_body_ == nullptr) { cons_body_ = cons_lambdas_->alloc_clone(); } if(nlp_scaling_) { - // return unscaled values + // return unscaled values c = *(nlp_transformations_.apply_inv_to_cons_eq(c, n_cons_eq_)); d = *(nlp_transformations_.apply_inv_to_cons_ineq(d, n_cons_ineq_)); } @@ -1121,12 +1095,11 @@ void hiopNlpFormulation::user_callback_solution(hiopSolveStatus status, //! todo -> test this when fixed variables are removed -> the internal //! zl and zu may have different sizes than what user expects since HiOp removes //! variables internally - if(options->GetString("callback_mem_space")=="host" && options->GetString("mem_space")=="device") { - + if(options->GetString("callback_mem_space") == "host" && options->GetString("mem_space") == "device") { #if !defined(HIOP_USE_MPI) int* vec_distrib_ = nullptr; MPI_Comm comm_ = MPI_COMM_SELF; -#endif +#endif hiopVectorPar x_host(n_vars_, vec_distrib_, comm_); hiopVectorPar zl_host(n_vars_, vec_distrib_, comm_); hiopVectorPar zu_host(n_vars_, vec_distrib_, comm_); @@ -1140,26 +1113,25 @@ void hiopNlpFormulation::user_callback_solution(hiopSolveStatus status, cons_lambdas_->copy_to_vectorpar(cons_lambdas_host); interface_base.solution_callback(status, - (int)n_vars_, - x_host.local_data_const(), - zl_host.local_data_const(), - zu_host.local_data_const(), - (int)n_cons_, - cons_body_host.local_data_const(), - cons_lambdas_host.local_data_const(), - obj_value/obj_scale_ext_to_hiop); + (int)n_vars_, + x_host.local_data_const(), + zl_host.local_data_const(), + zu_host.local_data_const(), + (int)n_cons_, + cons_body_host.local_data_const(), + cons_lambdas_host.local_data_const(), + obj_value / obj_scale_ext_to_hiop); } else { interface_base.solution_callback(status, - (int)n_vars_, - x.local_data_const(), - z_L.local_data_const(), - z_U.local_data_const(), - (int)n_cons_, - cons_body_->local_data_const(), - cons_lambdas_->local_data_const(), - obj_value/obj_scale_ext_to_hiop); - } - + (int)n_vars_, + x.local_data_const(), + z_L.local_data_const(), + z_U.local_data_const(), + (int)n_cons_, + cons_body_->local_data_const(), + cons_lambdas_->local_data_const(), + obj_value / obj_scale_ext_to_hiop); + } } bool hiopNlpFormulation::user_callback_iterate(int iter, @@ -1181,8 +1153,8 @@ bool hiopNlpFormulation::user_callback_iterate(int iter, double alpha_pr, int ls_trials) { - assert(x.get_size()==n_vars_); - assert(c.get_size()+d.get_size()==n_cons_); + assert(x.get_size() == n_vars_); + assert(c.get_size() + d.get_size() == n_cons_); assert(y_c.get_size() == n_cons_eq_); assert(y_d.get_size() == n_cons_ineq_); @@ -1192,7 +1164,7 @@ bool hiopNlpFormulation::user_callback_iterate(int iter, } cons_lambdas_->copy_from_two_vec_w_pattern(y_c, *cons_eq_mapping_, y_d, *cons_ineq_mapping_); - //concatenate 'c' and 'd' into user's constrainty body + // concatenate 'c' and 'd' into user's constrainty body if(cons_body_ == NULL) { cons_body_ = cons_lambdas_->alloc_clone(); } @@ -1204,32 +1176,31 @@ bool hiopNlpFormulation::user_callback_iterate(int iter, bool bret{false}; - if(options->GetString("callback_mem_space")=="host" && options->GetString("mem_space")=="device") { - + if(options->GetString("callback_mem_space") == "host" && options->GetString("mem_space") == "device") { #if !defined(HIOP_USE_MPI) int* vec_distrib_ = nullptr; MPI_Comm comm_ = MPI_COMM_SELF; -#endif +#endif hiopVectorPar x_host(n_vars_, vec_distrib_, comm_); x.copy_to_vectorpar(x_host); hiopVectorPar s_host(n_cons_ineq_, vec_distrib_, comm_); s.copy_to_vectorpar(s_host); - + hiopVectorPar zl_host(n_vars_, vec_distrib_, comm_); z_L.copy_to_vectorpar(zl_host); - + hiopVectorPar zu_host(n_vars_, vec_distrib_, comm_); z_U.copy_to_vectorpar(zu_host); - + hiopVectorPar cons_body_host(n_cons_, vec_distrib_, comm_); cons_body_->copy_to_vectorpar(cons_body_host); - + hiopVectorPar cons_lambdas_host(n_cons_); cons_lambdas_->copy_to_vectorpar(cons_lambdas_host); bret = interface_base.iterate_callback(iter, - obj_value/this->get_obj_scale(), + obj_value / this->get_obj_scale(), logbar_obj_value, (int)n_vars_, x_host.local_data_const(), @@ -1249,7 +1220,7 @@ bool hiopNlpFormulation::user_callback_iterate(int iter, ls_trials); } else { bret = interface_base.iterate_callback(iter, - obj_value/this->get_obj_scale(), + obj_value / this->get_obj_scale(), logbar_obj_value, (int)n_vars_, x.local_data_const(), @@ -1267,8 +1238,8 @@ bool hiopNlpFormulation::user_callback_iterate(int iter, alpha_du, alpha_pr, ls_trials); - } - return bret; + } + return bret; } bool hiopNlpFormulation::user_callback_full_iterate(hiopVector& x, @@ -1280,24 +1251,23 @@ bool hiopNlpFormulation::user_callback_full_iterate(hiopVector& x, hiopVector& v_L, hiopVector& v_U) { - assert(x.get_size()==n_vars_); + assert(x.get_size() == n_vars_); assert(y_c.get_size() == n_cons_eq_); assert(y_d.get_size() == n_cons_ineq_); bool bret{false}; - if(options->GetString("callback_mem_space")=="host" && options->GetString("mem_space")=="device") { - + if(options->GetString("callback_mem_space") == "host" && options->GetString("mem_space") == "device") { #if !defined(HIOP_USE_MPI) int* vec_distrib_ = nullptr; MPI_Comm comm_ = MPI_COMM_SELF; -#endif +#endif hiopVectorPar x_host(n_vars_, vec_distrib_, comm_); x.copy_to_vectorpar(x_host); - + hiopVectorPar zl_host(n_vars_, vec_distrib_, comm_); z_L.copy_to_vectorpar(zl_host); - + hiopVectorPar zu_host(n_vars_, vec_distrib_, comm_); z_U.copy_to_vectorpar(zu_host); @@ -1312,9 +1282,9 @@ bool hiopNlpFormulation::user_callback_full_iterate(hiopVector& x, hiopVectorPar vl_host(n_cons_ineq_, vec_distrib_, comm_); v_L.copy_to_vectorpar(zl_host); - + hiopVectorPar vu_host(n_cons_ineq_, vec_distrib_, comm_); - v_U.copy_to_vectorpar(zu_host); + v_U.copy_to_vectorpar(zu_host); bret = interface_base.iterate_full_callback(x_host.local_data_const(), zl_host.local_data_const(), @@ -1333,11 +1303,10 @@ bool hiopNlpFormulation::user_callback_full_iterate(hiopVector& x, s.local_data_const(), v_L.local_data_const(), v_U.local_data_const()); - } - return bret; + } + return bret; } - bool hiopNlpFormulation::user_force_update(int iter, double& obj_value, hiopVector& x, @@ -1352,15 +1321,15 @@ bool hiopNlpFormulation::user_force_update(int iter, double& alpha_pr) { bool retval; - assert(x.get_size()==n_vars_); - assert(c.get_size()+d.get_size()==n_cons_); + assert(x.get_size() == n_vars_); + assert(c.get_size() + d.get_size() == n_cons_); assert(y_c.get_size() == n_cons_eq_); assert(y_d.get_size() == n_cons_ineq_); // force update x retval = interface_base.force_update_x((int)n_vars_, x.local_data()); - + assert(retval); return true; @@ -1368,40 +1337,39 @@ bool hiopNlpFormulation::user_force_update(int iter, void hiopNlpFormulation::print(FILE* f, const char* msg, int rank) const { - int myrank=0; + int myrank = 0; #ifdef HIOP_USE_MPI - if(rank>=0) { - int ierr = MPI_Comm_rank(comm_, &myrank); assert(ierr==MPI_SUCCESS); - } + if(rank >= 0) { + int ierr = MPI_Comm_rank(comm_, &myrank); + assert(ierr == MPI_SUCCESS); + } #endif - if(myrank==rank || rank==-1) { - if(NULL==f) f=stdout; + if(myrank == rank || rank == -1) { + if(NULL == f) f = stdout; if(msg) { fprintf(f, "%s\n", msg); - } else { + } else { fprintf(f, "NLP summary\n"); } fprintf(f, "Total number of variables: %d\n", n_vars_); - fprintf(f, " lower/upper/lower_and_upper bounds: %d / %d / %d\n", - n_bnds_low_, n_bnds_upp_, n_bnds_lu_); + fprintf(f, " lower/upper/lower_and_upper bounds: %d / %d / %d\n", n_bnds_low_, n_bnds_upp_, n_bnds_lu_); fprintf(f, "Total number of equality constraints: %d\n", n_cons_eq_); fprintf(f, "Total number of inequality constraints: %d\n", n_cons_ineq_); - fprintf(f, " lower/upper/lower_and_upper bounds: %d / %d / %d\n", - n_ineq_low_, n_ineq_upp_, n_ineq_lu_); - } + fprintf(f, " lower/upper/lower_and_upper bounds: %d / %d / %d\n", n_ineq_low_, n_ineq_upp_, n_ineq_lu_); + } } -double hiopNlpFormulation::get_obj_scale() const +double hiopNlpFormulation::get_obj_scale() const { - if(nlp_scaling_){ + if(nlp_scaling_) { return nlp_scaling_->get_obj_scale(); } return 1.0; } void hiopNlpFormulation::adjust_bounds(const hiopIterate& it) -{ +{ xl_->copy_from_w_pattern(*it.get_x(), *ixl_); xl_->axpy_w_pattern(-1.0, *it.get_sxl(), *ixl_); @@ -1421,24 +1389,18 @@ void hiopNlpFormulation::reset_bounds(double bound_relax_perturb) } /* *********************************************************************************** - * hiopNlpDenseConstraints class implementation + * hiopNlpDenseConstraints class implementation * *********************************************************************************** -*/ + */ -hiopNlpDenseConstraints::hiopNlpDenseConstraints(hiopInterfaceDenseConstraints& interface_, - const char* option_file) - : hiopNlpFormulation(interface_, option_file), interface(interface_) -{ -} +hiopNlpDenseConstraints::hiopNlpDenseConstraints(hiopInterfaceDenseConstraints& interface_, const char* option_file) + : hiopNlpFormulation(interface_, option_file), + interface(interface_) +{} -hiopNlpDenseConstraints::~hiopNlpDenseConstraints() -{ -} +hiopNlpDenseConstraints::~hiopNlpDenseConstraints() {} -bool hiopNlpDenseConstraints::finalizeInitialization() -{ - return hiopNlpFormulation::finalizeInitialization(); -} +bool hiopNlpDenseConstraints::finalizeInitialization() { return hiopNlpFormulation::finalizeInitialization(); } hiopDualsLsqUpdate* hiopNlpDenseConstraints::alloc_duals_lsq_updater() { @@ -1446,7 +1408,7 @@ hiopDualsLsqUpdate* hiopNlpDenseConstraints::alloc_duals_lsq_updater() } bool hiopNlpDenseConstraints::eval_Jac_c(hiopVector& x, bool new_x, double* Jac_c) -{ +{ #if 0 if((prob_type_==hiopInterfaceBase::hiopLinear || prob_type_==hiopInterfaceBase::hiopQuadratic) && nlp_evaluated_) { @@ -1463,9 +1425,9 @@ bool hiopNlpDenseConstraints::eval_Jac_c(hiopVector& x, bool new_x, double* Jac_ runStats.tmEvalJac_con.stop(); runStats.nEvalJac_con_eq++; Jac_c = nlp_transformations_.apply_to_jacob_eq(Jac_c_user, n_cons_eq_); -#endif // 0 +#endif // 0 - assert(0&&"not needed"); + assert(0 && "not needed"); return false; } bool hiopNlpDenseConstraints::eval_Jac_d(hiopVector& x, bool new_x, double* Jac_d) @@ -1486,15 +1448,13 @@ bool hiopNlpDenseConstraints::eval_Jac_d(hiopVector& x, bool new_x, double* Jac_ runStats.tmEvalJac_con.stop(); runStats.nEvalJac_con_ineq++; Jac_d = nlp_transformations_.apply_to_jacob_ineq(Jac_d_user, n_cons_ineq_); -#endif // 0 +#endif // 0 - assert(0&&"not needed"); + assert(0 && "not needed"); return false; } -bool hiopNlpDenseConstraints::eval_Jac_c_d_interface_impl(hiopVector& x, bool new_x, - hiopMatrix& Jac_c, - hiopMatrix& Jac_d) +bool hiopNlpDenseConstraints::eval_Jac_c_d_interface_impl(hiopVector& x, bool new_x, hiopMatrix& Jac_c, hiopMatrix& Jac_d) { hiopMatrixDense* cons_Jac_de = dynamic_cast(cons_Jac_); if(cons_Jac_de == NULL) { @@ -1511,27 +1471,29 @@ bool hiopNlpDenseConstraints::eval_Jac_c_d_interface_impl(hiopVector& x, bool ne log->printf(hovError, "[internal error] hiopNlpDenseConstraints NLP received an unexpected matrix\n"); return false; } - + runStats.tmEvalJac_con.start(); - bool bret = interface.eval_Jac_cons(nlp_transformations_.n_pre(), n_cons_, - x_user->local_data_const(), new_x, + bool bret = interface.eval_Jac_cons(nlp_transformations_.n_pre(), + n_cons_, + x_user->local_data_const(), + new_x, cons_Jac_user_de->local_data()); - + cons_Jac_ = nlp_transformations_.apply_to_jacob_cons(*Jac_user, n_cons_); - + hiopMatrixDense* Jac_cde = dynamic_cast(&Jac_c); hiopMatrixDense* Jac_dde = dynamic_cast(&Jac_d); - if(Jac_cde==NULL || Jac_dde==NULL) { + if(Jac_cde == NULL || Jac_dde == NULL) { log->printf(hovError, "[internal error] hiopNlpDenseConstraints NLP works only with dense matrices\n"); return false; - } - + } + assert(cons_Jac_de->local_data() == Jac_consde && "mismatch between Jacobian mem adress pre- and post-transformations should not happen"); Jac_cde->copyRowsFrom(*cons_Jac_, cons_eq_mapping_->local_data_const(), n_cons_eq_); Jac_dde->copyRowsFrom(*cons_Jac_, cons_ineq_mapping_->local_data_const(), n_cons_ineq_); - + // scale Jacobian matrices Jac_c = *(nlp_transformations_.apply_inv_to_jacob_eq(Jac_c, n_cons_eq_)); Jac_d = *(nlp_transformations_.apply_inv_to_jacob_ineq(Jac_d, n_cons_ineq_)); @@ -1545,20 +1507,18 @@ bool hiopNlpDenseConstraints::eval_Jac_c_d_interface_impl(hiopVector& x, bool ne bool hiopNlpDenseConstraints::eval_Jac_c(hiopVector& x, bool new_x, hiopMatrix& Jac_c) { - if((prob_type_==hiopInterfaceBase::hiopLinear || prob_type_==hiopInterfaceBase::hiopQuadratic) - && nlp_evaluated_) { + if((prob_type_ == hiopInterfaceBase::hiopLinear || prob_type_ == hiopInterfaceBase::hiopQuadratic) && nlp_evaluated_) { return true; } hiopMatrixDense* Jac_cde = dynamic_cast(&Jac_c); - if(Jac_cde==NULL) { + if(Jac_cde == NULL) { log->printf(hovError, "[internal error] hiopNlpDenseConstraints NLP works only with dense matrices\n"); return false; } else { - hiopVector* x_user = nlp_transformations_.apply_inv_to_x(x, new_x); hiopMatrix* Jac_c_user = nlp_transformations_.apply_inv_to_jacob_eq(Jac_c, n_cons_eq_); - if(Jac_c_user==nullptr) { + if(Jac_c_user == nullptr) { log->printf(hovError, "[internal error] hiopFixedVarsRemover works only with dense matrices\n"); return false; } @@ -1566,16 +1526,21 @@ bool hiopNlpDenseConstraints::eval_Jac_c(hiopVector& x, bool new_x, hiopMatrix& assert(Jac_c_user_de); runStats.tmEvalJac_con.start(); - bool bret = interface.eval_Jac_cons(nlp_transformations_.n_pre(), n_cons_, - n_cons_eq_, cons_eq_mapping_->local_data_const(), - x_user->local_data_const(), new_x, Jac_c_user_de->local_data()); - runStats.tmEvalJac_con.stop(); runStats.nEvalJac_con_eq++; + bool bret = interface.eval_Jac_cons(nlp_transformations_.n_pre(), + n_cons_, + n_cons_eq_, + cons_eq_mapping_->local_data_const(), + x_user->local_data_const(), + new_x, + Jac_c_user_de->local_data()); + runStats.tmEvalJac_con.stop(); + runStats.nEvalJac_con_eq++; auto* Jac_c_p = nlp_transformations_.apply_to_jacob_eq(*Jac_c_user, n_cons_eq_); - if(Jac_c_p==nullptr) { + if(Jac_c_p == nullptr) { log->printf(hovError, "[internal error] hiopFixedVarsRemover works only with dense matrices\n"); return false; - } + } Jac_c = *Jac_c_p; return bret; } @@ -1583,20 +1548,18 @@ bool hiopNlpDenseConstraints::eval_Jac_c(hiopVector& x, bool new_x, hiopMatrix& bool hiopNlpDenseConstraints::eval_Jac_d(hiopVector& x, bool new_x, hiopMatrix& Jac_d) { - if((prob_type_==hiopInterfaceBase::hiopLinear || prob_type_==hiopInterfaceBase::hiopQuadratic) - && nlp_evaluated_) { + if((prob_type_ == hiopInterfaceBase::hiopLinear || prob_type_ == hiopInterfaceBase::hiopQuadratic) && nlp_evaluated_) { return true; } hiopMatrixDense* Jac_dde = dynamic_cast(&Jac_d); - if(Jac_dde==NULL) { + if(Jac_dde == NULL) { log->printf(hovError, "[internal error] hiopNlpDenseConstraints NLP works only with dense matrices\n"); return false; } else { - hiopVector* x_user = nlp_transformations_.apply_inv_to_x(x, new_x); hiopMatrix* Jac_d_user = nlp_transformations_.apply_inv_to_jacob_ineq(Jac_d, n_cons_ineq_); - if(Jac_d_user==nullptr) { + if(Jac_d_user == nullptr) { log->printf(hovError, "[internal error] hiopFixedVarsRemover works only with dense matrices\n"); return false; } @@ -1604,13 +1567,18 @@ bool hiopNlpDenseConstraints::eval_Jac_d(hiopVector& x, bool new_x, hiopMatrix& assert(Jac_d_user_de); runStats.tmEvalJac_con.start(); - bool bret = interface.eval_Jac_cons(nlp_transformations_.n_pre(), n_cons_, - n_cons_ineq_, cons_ineq_mapping_->local_data_const(), - x_user->local_data_const(), new_x,Jac_d_user_de->local_data()); - runStats.tmEvalJac_con.stop(); runStats.nEvalJac_con_ineq++; + bool bret = interface.eval_Jac_cons(nlp_transformations_.n_pre(), + n_cons_, + n_cons_ineq_, + cons_ineq_mapping_->local_data_const(), + x_user->local_data_const(), + new_x, + Jac_d_user_de->local_data()); + runStats.tmEvalJac_con.stop(); + runStats.nEvalJac_con_ineq++; auto* Jac_d_p = nlp_transformations_.apply_to_jacob_ineq(*Jac_d_user, n_cons_ineq_); - if(Jac_d_p==nullptr) { + if(Jac_d_p == nullptr) { log->printf(hovError, "[internal error] hiopFixedVarsRemover works only with dense matrices\n"); return false; } @@ -1619,46 +1587,36 @@ bool hiopNlpDenseConstraints::eval_Jac_d(hiopVector& x, bool new_x, hiopMatrix& } } -hiopMatrixDense* hiopNlpDenseConstraints::alloc_Jac_c() -{ - return alloc_multivector_primal(n_cons_eq_); -} +hiopMatrixDense* hiopNlpDenseConstraints::alloc_Jac_c() { return alloc_multivector_primal(n_cons_eq_); } -hiopMatrixDense* hiopNlpDenseConstraints::alloc_Jac_d() -{ - return alloc_multivector_primal(n_cons_ineq_); -} +hiopMatrixDense* hiopNlpDenseConstraints::alloc_Jac_d() { return alloc_multivector_primal(n_cons_ineq_); } -hiopMatrixDense* hiopNlpDenseConstraints::alloc_Jac_cons() -{ - return alloc_multivector_primal(n_cons_); -} +hiopMatrixDense* hiopNlpDenseConstraints::alloc_Jac_cons() { return alloc_multivector_primal(n_cons_); } hiopMatrix* hiopNlpDenseConstraints::alloc_Hess_Lagr() { return new HessianDiagPlusRowRank(this, this->options->GetInteger("secant_memory_len")); } -hiopMatrixDense* hiopNlpDenseConstraints::alloc_multivector_primal(int nrows, int maxrows/*=-1*/) const +hiopMatrixDense* hiopNlpDenseConstraints::alloc_multivector_primal(int nrows, int maxrows /*=-1*/) const { hiopMatrixDense* M; #ifdef HIOP_USE_MPI - //index_type* vec_distrib_=new index_type[num_ranks_+1]; - //if(true==interface.get_vecdistrib_info(n_vars_,vec_distrib_)) - if(vec_distrib_) - { + // index_type* vec_distrib_=new index_type[num_ranks_+1]; + // if(true==interface.get_vecdistrib_info(n_vars_,vec_distrib_)) + if(vec_distrib_) { M = LinearAlgebraFactory::create_matrix_dense("DEFAULT", nrows, n_vars_, vec_distrib_, comm_, maxrows); } else { - //the if is not really needed, but let's keep it clear, costs only a comparison - if(-1==maxrows) - M = LinearAlgebraFactory::create_matrix_dense("DEFAULT", nrows, n_vars_); + // the if is not really needed, but let's keep it clear, costs only a comparison + if(-1 == maxrows) + M = LinearAlgebraFactory::create_matrix_dense("DEFAULT", nrows, n_vars_); else M = LinearAlgebraFactory::create_matrix_dense("DEFAULT", nrows, n_vars_, NULL, MPI_COMM_SELF, maxrows); } #else - //the if is not really needed, but let's keep it clear, costs only a comparison - if(-1==maxrows) - M = LinearAlgebraFactory::create_matrix_dense("DEFAULT", nrows, n_vars_); + // the if is not really needed, but let's keep it clear, costs only a comparison + if(-1 == maxrows) + M = LinearAlgebraFactory::create_matrix_dense("DEFAULT", nrows, n_vars_); else M = LinearAlgebraFactory::create_matrix_dense("DEFAULT", nrows, n_vars_, NULL, MPI_COMM_SELF, maxrows); #endif @@ -1666,29 +1624,27 @@ hiopMatrixDense* hiopNlpDenseConstraints::alloc_multivector_primal(int nrows, in } /* *********************************************************************************** - * hiopNlpMDS class implementation + * hiopNlpMDS class implementation * *********************************************************************************** -*/ + */ hiopDualsLsqUpdate* hiopNlpMDS::alloc_duals_lsq_updater() { #ifdef HIOP_USE_MAGMA - if(this->options->GetString("compute_mode")=="hybrid" || - this->options->GetString("compute_mode")=="gpu" || - this->options->GetString("compute_mode")=="auto") { - return new hiopDualsLsqUpdateLinsysRedDenseSym(this); - } + if(this->options->GetString("compute_mode") == "hybrid" || this->options->GetString("compute_mode") == "gpu" || + this->options->GetString("compute_mode") == "auto") { + return new hiopDualsLsqUpdateLinsysRedDenseSym(this); + } #endif - //at this point use LAPACK Cholesky since we have that - //i. cpu compute mode OR - //ii. MAGMA is not available to handle the LSQ linear system on the device + // at this point use LAPACK Cholesky since we have that + // i. cpu compute mode OR + // ii. MAGMA is not available to handle the LSQ linear system on the device return new hiopDualsLsqUpdateLinsysRedDenseSymPD(this); } bool hiopNlpMDS::eval_Jac_c(hiopVector& x, bool new_x, hiopMatrix& Jac_c) { - if((prob_type_==hiopInterfaceBase::hiopLinear || prob_type_==hiopInterfaceBase::hiopQuadratic) - && nlp_evaluated_) { + if((prob_type_ == hiopInterfaceBase::hiopLinear || prob_type_ == hiopInterfaceBase::hiopQuadratic) && nlp_evaluated_) { return true; } @@ -1696,19 +1652,26 @@ bool hiopNlpMDS::eval_Jac_c(hiopVector& x, bool new_x, hiopMatrix& Jac_c) assert(pJac_c); if(pJac_c) { hiopVector* x_user = nlp_transformations_.apply_inv_to_x(x, new_x); - + // NOT needed for now -// hiopMatrix* Jac_c_user = nlp_transformations_.apply_inv_to_jacob_eq(Jac_c, n_cons_eq); -// assert(Jac_c_user); + // hiopMatrix* Jac_c_user = nlp_transformations_.apply_inv_to_jacob_eq(Jac_c, n_cons_eq); + // assert(Jac_c_user); runStats.tmEvalJac_con.start(); - + int nnz = pJac_c->sp_nnz(); - bool bret = interface.eval_Jac_cons(n_vars_, n_cons_, - n_cons_eq_, cons_eq_mapping_->local_data_const(), - x_user->local_data_const(), new_x, - pJac_c->n_sp(), pJac_c->n_de(), - nnz, pJac_c->sp_irow(), pJac_c->sp_jcol(), pJac_c->sp_M(), + bool bret = interface.eval_Jac_cons(n_vars_, + n_cons_, + n_cons_eq_, + cons_eq_mapping_->local_data_const(), + x_user->local_data_const(), + new_x, + pJac_c->n_sp(), + pJac_c->n_de(), + nnz, + pJac_c->sp_irow(), + pJac_c->sp_jcol(), + pJac_c->sp_M(), pJac_c->de_local_data()); // scale the matrix @@ -1724,29 +1687,35 @@ bool hiopNlpMDS::eval_Jac_c(hiopVector& x, bool new_x, hiopMatrix& Jac_c) bool hiopNlpMDS::eval_Jac_d(hiopVector& x, bool new_x, hiopMatrix& Jac_d) { - if((prob_type_==hiopInterfaceBase::hiopLinear || prob_type_==hiopInterfaceBase::hiopQuadratic) - && nlp_evaluated_) { + if((prob_type_ == hiopInterfaceBase::hiopLinear || prob_type_ == hiopInterfaceBase::hiopQuadratic) && nlp_evaluated_) { return true; } hiopMatrixMDS* pJac_d = dynamic_cast(&Jac_d); assert(pJac_d); if(pJac_d) { - hiopVector* x_user = nlp_transformations_.apply_inv_to_x(x, new_x); - + hiopVector* x_user = nlp_transformations_.apply_inv_to_x(x, new_x); + // NOT needed for now -// hiopMatrix* Jac_d_user = nlp_transformations_.apply_inv_to_jacob_ineq(Jac_d, n_cons_ineq_); -// assert(Jac_d_user); - + // hiopMatrix* Jac_d_user = nlp_transformations_.apply_inv_to_jacob_ineq(Jac_d, n_cons_ineq_); + // assert(Jac_d_user); + runStats.tmEvalJac_con.start(); - + int nnz = pJac_d->sp_nnz(); - bool bret = interface.eval_Jac_cons(n_vars_, n_cons_, - n_cons_ineq_, cons_ineq_mapping_->local_data_const(), - x_user->local_data_const(), new_x, - pJac_d->n_sp(), pJac_d->n_de(), - nnz, pJac_d->sp_irow(), pJac_d->sp_jcol(), pJac_d->sp_M(), - pJac_d->de_local_data()); + bool bret = interface.eval_Jac_cons(n_vars_, + n_cons_, + n_cons_ineq_, + cons_ineq_mapping_->local_data_const(), + x_user->local_data_const(), + new_x, + pJac_d->n_sp(), + pJac_d->n_de(), + nnz, + pJac_d->sp_irow(), + pJac_d->sp_jcol(), + pJac_d->sp_M(), + pJac_d->de_local_data()); // scale the matrix Jac_d = *(nlp_transformations_.apply_to_jacob_ineq(Jac_d, n_cons_ineq_)); @@ -1758,36 +1727,38 @@ bool hiopNlpMDS::eval_Jac_d(hiopVector& x, bool new_x, hiopMatrix& Jac_d) } } -bool hiopNlpMDS::eval_Jac_c_d_interface_impl(hiopVector& x, - bool new_x, - hiopMatrix& Jac_c, - hiopMatrix& Jac_d) +bool hiopNlpMDS::eval_Jac_c_d_interface_impl(hiopVector& x, bool new_x, hiopMatrix& Jac_c, hiopMatrix& Jac_d) { hiopMatrixMDS* pJac_c = dynamic_cast(&Jac_c); hiopMatrixMDS* pJac_d = dynamic_cast(&Jac_d); hiopMatrixMDS* cons_Jac = dynamic_cast(cons_Jac_); if(pJac_c && pJac_d) { assert(cons_Jac); - if(NULL == cons_Jac) - return false; + if(NULL == cons_Jac) return false; assert(cons_Jac->n_de() == pJac_d->n_de()); assert(cons_Jac->n_sp() == pJac_d->n_sp()); assert(cons_Jac->sp_nnz() == pJac_c->sp_nnz() + pJac_d->sp_nnz()); - + hiopVector* x_user = nlp_transformations_.apply_inv_to_x(x, new_x); //! todo -> need hiopNlpTransformation::apply_to_jacob_ineq to work with MDS Jacobian - //double** Jac_d_user = nlp_transformations_.apply_inv_to_jacob_ineq(Jac_d, n_cons_ineq_); - + // double** Jac_d_user = nlp_transformations_.apply_inv_to_jacob_ineq(Jac_d, n_cons_ineq_); + runStats.tmEvalJac_con.start(); int nnz = cons_Jac->sp_nnz(); - bool bret = interface.eval_Jac_cons(n_vars_, n_cons_, - x_user->local_data_const(), new_x, - pJac_d->n_sp(), pJac_d->n_de(), - nnz, cons_Jac->sp_irow(), cons_Jac->sp_jcol(), cons_Jac->sp_M(), + bool bret = interface.eval_Jac_cons(n_vars_, + n_cons_, + x_user->local_data_const(), + new_x, + pJac_d->n_sp(), + pJac_d->n_de(), + nnz, + cons_Jac->sp_irow(), + cons_Jac->sp_jcol(), + cons_Jac->sp_M(), cons_Jac->de_local_data()); - //copy back to Jac_c and Jac_d + // copy back to Jac_c and Jac_d pJac_c->copyRowsFrom(*cons_Jac, cons_eq_mapping_->local_data_const(), n_cons_eq_); pJac_d->copyRowsFrom(*cons_Jac, cons_ineq_mapping_->local_data_const(), n_cons_ineq_); @@ -1798,7 +1769,7 @@ bool hiopNlpMDS::eval_Jac_c_d_interface_impl(hiopVector& x, runStats.tmEvalJac_con.stop(); runStats.nEvalJac_con_eq++; runStats.nEvalJac_con_ineq++; - + return bret; } else { return false; @@ -1814,7 +1785,7 @@ bool hiopNlpMDS::eval_Hess_Lagr(const hiopVector& x, bool new_lambdas, hiopMatrix& Hess_L) { - if(prob_type_==hiopInterfaceBase::hiopLinear && nlp_evaluated_) { + if(prob_type_ == hiopInterfaceBase::hiopLinear && nlp_evaluated_) { return true; } @@ -1825,68 +1796,76 @@ bool hiopNlpMDS::eval_Hess_Lagr(const hiopVector& x, bool bret = false; if(pHessL) { - if(n_cons_eq_ + n_cons_ineq_ != buf_lambda_->get_size()) { delete buf_lambda_; buf_lambda_ = this->alloc_dual_vec(); } assert(buf_lambda_); - buf_lambda_->copyFromStarting(0, lambda_eq.local_data_const(), n_cons_eq_); + buf_lambda_->copyFromStarting(0, lambda_eq.local_data_const(), n_cons_eq_); buf_lambda_->copyFromStarting(n_cons_eq_, lambda_ineq.local_data_const(), n_cons_ineq_); // scale lambda before passing it to user interface to compute Hess int n_cons_eq_ineq = n_cons_eq_ + n_cons_ineq_; buf_lambda_ = nlp_transformations_.apply_to_cons(*buf_lambda_, n_cons_eq_ineq); - double obj_factor_with_scale = obj_factor*get_obj_scale(); + double obj_factor_with_scale = obj_factor * get_obj_scale(); int nnzHSS = pHessL->sp_nnz(), nnzHSD = 0; - - bret = interface.eval_Hess_Lagr(n_vars_, n_cons_, x.local_data_const(), new_x, + + bret = interface.eval_Hess_Lagr(n_vars_, + n_cons_, + x.local_data_const(), + new_x, obj_factor_with_scale, - buf_lambda_->local_data(), new_lambdas, - pHessL->n_sp(), pHessL->n_de(), - nnzHSS, pHessL->sp_irow(), pHessL->sp_jcol(), pHessL->sp_M(), + buf_lambda_->local_data(), + new_lambdas, + pHessL->n_sp(), + pHessL->n_de(), + nnzHSS, + pHessL->sp_irow(), + pHessL->sp_jcol(), + pHessL->sp_M(), pHessL->de_local_data(), - nnzHSD, NULL, NULL, NULL); - assert(nnzHSD==0); - assert(nnzHSS==pHessL->sp_nnz()); - + nnzHSD, + NULL, + NULL, + NULL); + assert(nnzHSD == 0); + assert(nnzHSS == pHessL->sp_nnz()); + } else { bret = false; } runStats.tmEvalHessL.stop(); runStats.nEvalHessL++; - + return bret; } bool hiopNlpMDS::finalizeInitialization() { - if(!interface.get_sparse_dense_blocks_info(nx_sparse, nx_dense, - nnz_sparse_Jaceq, nnz_sparse_Jacineq, - nnz_sparse_Hess_Lagr_SS, + if(!interface.get_sparse_dense_blocks_info(nx_sparse, + nx_dense, + nnz_sparse_Jaceq, + nnz_sparse_Jacineq, + nnz_sparse_Hess_Lagr_SS, nnz_sparse_Hess_Lagr_SD)) { return false; } - assert(0==nnz_sparse_Hess_Lagr_SD); + assert(0 == nnz_sparse_Hess_Lagr_SD); return hiopNlpFormulation::finalizeInitialization(); } /* *********************************************************************************** * hiopNlpSparse class implementation * *********************************************************************************** -*/ -hiopDualsLsqUpdate* hiopNlpSparse::alloc_duals_lsq_updater() -{ - return new hiopDualsLsqUpdateLinsysAugSparse(this); -} + */ +hiopDualsLsqUpdate* hiopNlpSparse::alloc_duals_lsq_updater() { return new hiopDualsLsqUpdateLinsysAugSparse(this); } bool hiopNlpSparse::eval_Jac_c(hiopVector& x, bool new_x, hiopMatrix& Jac_c) { - if((prob_type_==hiopInterfaceBase::hiopLinear || prob_type_==hiopInterfaceBase::hiopQuadratic) - && nlp_evaluated_) { + if((prob_type_ == hiopInterfaceBase::hiopLinear || prob_type_ == hiopInterfaceBase::hiopQuadratic) && nlp_evaluated_) { return true; } @@ -1894,7 +1873,7 @@ bool hiopNlpSparse::eval_Jac_c(hiopVector& x, bool new_x, hiopMatrix& Jac_c) assert(pJac_c); if(pJac_c) { hiopVector* x_user = nlp_transformations_.apply_inv_to_x(x, new_x); - + runStats.tmEvalJac_con.start(); int nnz = pJac_c->numberOfNonzeros(); @@ -1922,8 +1901,7 @@ bool hiopNlpSparse::eval_Jac_c(hiopVector& x, bool new_x, hiopMatrix& Jac_c) bool hiopNlpSparse::eval_Jac_d(hiopVector& x, bool new_x, hiopMatrix& Jac_d) { - if((prob_type_==hiopInterfaceBase::hiopLinear || prob_type_==hiopInterfaceBase::hiopQuadratic) - && nlp_evaluated_) { + if((prob_type_ == hiopInterfaceBase::hiopLinear || prob_type_ == hiopInterfaceBase::hiopQuadratic) && nlp_evaluated_) { return true; } @@ -1936,16 +1914,16 @@ bool hiopNlpSparse::eval_Jac_d(hiopVector& x, bool new_x, hiopMatrix& Jac_d) int nnz = pJac_d->numberOfNonzeros(); - bool bret = interface.eval_Jac_cons(n_vars_, - n_cons_, - n_cons_ineq_, - cons_ineq_mapping_->local_data_const(), - x_user->local_data_const(), - new_x, - nnz, - pJac_d->i_row(), - pJac_d->j_col(), - pJac_d->M()); + bool bret = interface.eval_Jac_cons(n_vars_, + n_cons_, + n_cons_ineq_, + cons_ineq_mapping_->local_data_const(), + x_user->local_data_const(), + new_x, + nnz, + pJac_d->i_row(), + pJac_d->j_col(), + pJac_d->M()); // scale the matrix Jac_d = *(nlp_transformations_.apply_to_jacob_ineq(Jac_d, n_cons_ineq_)); @@ -1958,18 +1936,14 @@ bool hiopNlpSparse::eval_Jac_d(hiopVector& x, bool new_x, hiopMatrix& Jac_d) } } -bool hiopNlpSparse::eval_Jac_c_d_interface_impl(hiopVector& x, - bool new_x, - hiopMatrix& Jac_c, - hiopMatrix& Jac_d) +bool hiopNlpSparse::eval_Jac_c_d_interface_impl(hiopVector& x, bool new_x, hiopMatrix& Jac_c, hiopMatrix& Jac_d) { hiopMatrixSparse* pJac_c = dynamic_cast(&Jac_c); hiopMatrixSparse* pJac_d = dynamic_cast(&Jac_d); hiopMatrixSparse* cons_Jac = dynamic_cast(cons_Jac_); if(pJac_c && pJac_d) { assert(cons_Jac); - if(NULL == cons_Jac) - return false; + if(NULL == cons_Jac) return false; assert(cons_Jac->numberOfNonzeros() == pJac_c->numberOfNonzeros() + pJac_d->numberOfNonzeros()); @@ -1978,30 +1952,23 @@ bool hiopNlpSparse::eval_Jac_c_d_interface_impl(hiopVector& x, runStats.tmEvalJac_con.start(); int nnz = cons_Jac->numberOfNonzeros(); - bool bret=false; - if(0==num_jac_eval_) - { - bret = interface.eval_Jac_cons(n_vars_, + bool bret = false; + if(0 == num_jac_eval_) { + bret = interface.eval_Jac_cons(n_vars_, n_cons_, - x_user->local_data_const(), + x_user->local_data_const(), new_x, - nnz, - cons_Jac->i_row(), - cons_Jac->j_col(), + nnz, + cons_Jac->i_row(), + cons_Jac->j_col(), nullptr); num_jac_eval_++; } - - bret = interface.eval_Jac_cons(n_vars_, - n_cons_, - x_user->local_data_const(), - new_x, - nnz, - nullptr, - nullptr, - cons_Jac->M()); - - //copy back to Jac_c and Jac_d + + bret = + interface.eval_Jac_cons(n_vars_, n_cons_, x_user->local_data_const(), new_x, nnz, nullptr, nullptr, cons_Jac->M()); + + // copy back to Jac_c and Jac_d pJac_c->copyRowsFrom(*cons_Jac, cons_eq_mapping_->local_data_const(), n_cons_eq_); pJac_d->copyRowsFrom(*cons_Jac, cons_ineq_mapping_->local_data_const(), n_cons_ineq_); @@ -2020,45 +1987,42 @@ bool hiopNlpSparse::eval_Jac_c_d_interface_impl(hiopVector& x, return true; } -bool hiopNlpSparse::eval_Hess_Lagr(const hiopVector& x, - bool new_x, +bool hiopNlpSparse::eval_Hess_Lagr(const hiopVector& x, + bool new_x, const double& obj_factor, - const hiopVector& lambda_eq, - const hiopVector& lambda_ineq, + const hiopVector& lambda_eq, + const hiopVector& lambda_ineq, bool new_lambdas, hiopMatrix& Hess_L) { - if(prob_type_==hiopInterfaceBase::hiopLinear && nlp_evaluated_) { + if(prob_type_ == hiopInterfaceBase::hiopLinear && nlp_evaluated_) { return true; } hiopMatrixSparse* pHessL = dynamic_cast(&Hess_L); assert(pHessL); - + runStats.tmEvalHessL.start(); bool bret = false; if(pHessL) { if(n_cons_eq_ + n_cons_ineq_ != buf_lambda_->get_size()) { delete buf_lambda_; - buf_lambda_ = LinearAlgebraFactory::create_vector(options->GetString("mem_space"), - n_cons_eq_ + n_cons_ineq_); + buf_lambda_ = LinearAlgebraFactory::create_vector(options->GetString("mem_space"), n_cons_eq_ + n_cons_ineq_); } assert(buf_lambda_); - - buf_lambda_-> - copy_from_two_vec_w_pattern(lambda_eq, *cons_eq_mapping_, lambda_ineq, *cons_ineq_mapping_); + + buf_lambda_->copy_from_two_vec_w_pattern(lambda_eq, *cons_eq_mapping_, lambda_ineq, *cons_ineq_mapping_); // scale lambda before passing it to user interface to compute Hess int n_cons_eq_ineq = n_cons_eq_ + n_cons_ineq_; buf_lambda_ = nlp_transformations_.apply_to_cons(*buf_lambda_, n_cons_eq_ineq); - - double obj_factor_with_scale = obj_factor*get_obj_scale(); + + double obj_factor_with_scale = obj_factor * get_obj_scale(); int nnzHSS = pHessL->numberOfNonzeros(); - if(0==num_hess_eval_) - { + if(0 == num_hess_eval_) { bret = interface.eval_Hess_Lagr(n_vars_, n_cons_, x.local_data_const(), @@ -2084,7 +2048,7 @@ bool hiopNlpSparse::eval_Hess_Lagr(const hiopVector& x, nullptr, nullptr, pHessL->M()); - assert(nnzHSS==pHessL->numberOfNonzeros()); + assert(nnzHSS == pHessL->numberOfNonzeros()); } else { bret = false; @@ -2099,10 +2063,7 @@ bool hiopNlpSparse::eval_Hess_Lagr(const hiopVector& x, bool hiopNlpSparse::finalizeInitialization() { int nx = 0; - if(!interface.get_sparse_blocks_info(nx, - nnz_sparse_Jaceq_, - nnz_sparse_Jacineq_, - nnz_sparse_Hess_Lagr_)) { + if(!interface.get_sparse_blocks_info(nx, nnz_sparse_Jaceq_, nnz_sparse_Jacineq_, nnz_sparse_Hess_Lagr_)) { return false; } assert(nx == n_vars_); @@ -2115,19 +2076,14 @@ bool hiopNlpSparse::finalizeInitialization() bool hiopNlpSparseIneq::finalizeInitialization() { int nx = 0; - if(!interface.get_sparse_blocks_info(nx, - nnz_sparse_Jaceq_, - nnz_sparse_Jacineq_, - nnz_sparse_Hess_Lagr_)) { + if(!interface.get_sparse_blocks_info(nx, nnz_sparse_Jaceq_, nnz_sparse_Jacineq_, nnz_sparse_Hess_Lagr_)) { return false; } assert(nx == n_vars_); nnz_sparse_Jacineq_ += nnz_sparse_Jaceq_; nnz_sparse_Jaceq_ = 0.; - return hiopNlpFormulation::finalizeInitialization(); - } bool hiopNlpSparseIneq::process_constraints() @@ -2135,7 +2091,7 @@ bool hiopNlpSparseIneq::process_constraints() bool bret; // deallocate if previously allocated - delete c_rhs_; + delete c_rhs_; delete[] cons_eq_type_; delete dl_; delete du_; @@ -2146,20 +2102,20 @@ bool hiopNlpSparseIneq::process_constraints() delete cons_ineq_mapping_; string mem_space = options->GetString("mem_space"); - - hiopVector* gl = LinearAlgebraFactory::create_vector(mem_space, n_cons_); + + hiopVector* gl = LinearAlgebraFactory::create_vector(mem_space, n_cons_); hiopVector* gu = LinearAlgebraFactory::create_vector(mem_space, n_cons_); auto* cons_type = new hiopInterfaceBase::NonlinearityType[n_cons_]; - //get constraints information and transfer to host for pre-processing - bret = interface_base.get_cons_info(n_cons_, gl->local_data(), gu->local_data(), cons_type); + // get constraints information and transfer to host for pre-processing + bret = interface_base.get_cons_info(n_cons_, gl->local_data(), gu->local_data(), cons_type); if(!bret) { assert(bret); return false; } - assert(gl->get_local_size()==n_cons_); - assert(gl->get_local_size()==n_cons_); + assert(gl->get_local_size() == n_cons_); + assert(gl->get_local_size() == n_cons_); // transfer to host for processing hiopVectorPar gl_host(n_cons_); @@ -2170,21 +2126,21 @@ bool hiopNlpSparseIneq::process_constraints() double* gl_vec = gl_host.local_data(); double* gu_vec = gu_host.local_data(); n_cons_eq_ = 0; - n_cons_ineq_ = n_cons_; + n_cons_ineq_ = n_cons_; /* Allocate host temporary vectors/arrays for on host processing. */ hiopVectorPar dl_host(n_cons_ineq_); hiopVectorPar du_host(n_cons_ineq_); - cons_ineq_type_ = new hiopInterfaceBase::NonlinearityType[n_cons_ineq_]; + cons_ineq_type_ = new hiopInterfaceBase::NonlinearityType[n_cons_ineq_]; - //will only use ineq mapping since all the constraints will become inequalities + // will only use ineq mapping since all the constraints will become inequalities hiopVectorIntSeq cons_ineq_mapping_host(n_cons_ineq_); /* copy lower and upper bounds - constraints */ double* dl_vec = dl_host.local_data(); double* du_vec = du_host.local_data(); - index_type *cons_ineq_mapping = cons_ineq_mapping_host.local_data(); + index_type* cons_ineq_mapping = cons_ineq_mapping_host.local_data(); // // two-sided relaxed bounds for equalities @@ -2192,88 +2148,84 @@ bool hiopNlpSparseIneq::process_constraints() eq_relax_value_ = options->GetNumeric("eq_relax_factor"); n_cons_eq_origNLP_ = 0; - for(int i=0; i-1e20) { - idl_vec[i]=1.; - n_ineq_low_++; - if(du_vec[i]< 1e20) { + for(int i = 0; i < n_cons_ineq_; i++) { + if(dl_vec[i] > -1e20) { + idl_vec[i] = 1.; + n_ineq_low_++; + if(du_vec[i] < 1e20) { n_ineq_lu_++; } } else { - //no lower bound on constraint - idl_vec[i]=0.; + // no lower bound on constraint + idl_vec[i] = 0.; } - if(du_vec[i]< 1e20) { - idu_vec[i]=1.; - n_ineq_upp_++; + if(du_vec[i] < 1e20) { + idu_vec[i] = 1.; + n_ineq_upp_++; } else { - //no upper bound on constraint - idu_vec[i]=0.; + // no upper bound on constraint + idu_vec[i] = 0.; } } if(n_cons_eq_origNLP_) { - std::string strEquality = n_cons_eq_origNLP_==1 ? "equality" : "equalities"; + std::string strEquality = n_cons_eq_origNLP_ == 1 ? "equality" : "equalities"; log->printf(hovSummary, "%d %s will be treated as relaxed (two-sided) in%s.\n", n_cons_eq_origNLP_, strEquality.c_str(), strEquality.c_str()); - log->printf(hovScalars, - "Equality right-hand sides were relaxed by a factor of %.5e.\n", - eq_relax_value_); + log->printf(hovScalars, "Equality right-hand sides were relaxed by a factor of %.5e.\n", eq_relax_value_); } - // pass the constraints info from host back to (possibly) device vectors - assert(n_cons_eq_==0); //address line below - //since n_cons_eq_==0, no copies will be done for anything equality-related. + assert(n_cons_eq_ == 0); // address line below + // since n_cons_eq_==0, no copies will be done for anything equality-related. c_rhs_ = LinearAlgebraFactory::create_vector(mem_space, n_cons_eq_); cons_eq_type_ = new hiopInterfaceBase::NonlinearityType[n_cons_eq_]; cons_eq_mapping_ = LinearAlgebraFactory::create_vector_int(mem_space, n_cons_eq_); - + dl_ = LinearAlgebraFactory::create_vector(mem_space, n_cons_ineq_); dl_->copy_from_vectorpar(dl_host); du_ = dl_->alloc_clone(); du_->copy_from_vectorpar(du_host); - + cons_ineq_mapping_ = LinearAlgebraFactory::create_vector_int(mem_space, n_cons_ineq_); cons_ineq_mapping_->copy_from_vectorseq(cons_ineq_mapping_host); @@ -2282,7 +2234,6 @@ bool hiopNlpSparseIneq::process_constraints() idu_ = du_->alloc_clone(); idu_->copy_from_vectorpar(idu_host); - return true; } -}; +}; // namespace hiop diff --git a/src/Optimization/hiopNlpFormulation.hpp b/src/Optimization/hiopNlpFormulation.hpp index a9467ae59..4d0df76a7 100644 --- a/src/Optimization/hiopNlpFormulation.hpp +++ b/src/Optimization/hiopNlpFormulation.hpp @@ -2,47 +2,47 @@ // Produced at the Lawrence Livermore National Laboratory (LLNL). // LLNL-CODE-742473. All rights reserved. // -// This file is part of HiOp. For details, see https://github.com/LLNL/hiop. HiOp -// is released under the BSD 3-clause license (https://opensource.org/licenses/BSD-3-Clause). +// This file is part of HiOp. For details, see https://github.com/LLNL/hiop. HiOp +// is released under the BSD 3-clause license (https://opensource.org/licenses/BSD-3-Clause). // Please also read "Additional BSD Notice" below. // -// Redistribution and use in source and binary forms, with or without modification, +// Redistribution and use in source and binary forms, with or without modification, // are permitted provided that the following conditions are met: -// i. Redistributions of source code must retain the above copyright notice, this list +// i. Redistributions of source code must retain the above copyright notice, this list // of conditions and the disclaimer below. -// ii. Redistributions in binary form must reproduce the above copyright notice, -// this list of conditions and the disclaimer (as noted below) in the documentation and/or +// ii. Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the disclaimer (as noted below) in the documentation and/or // other materials provided with the distribution. -// iii. Neither the name of the LLNS/LLNL nor the names of its contributors may be used to -// endorse or promote products derived from this software without specific prior written +// iii. Neither the name of the LLNS/LLNL nor the names of its contributors may be used to +// endorse or promote products derived from this software without specific prior written // permission. // -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES -// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT -// SHALL LAWRENCE LIVERMORE NATIONAL SECURITY, LLC, THE U.S. DEPARTMENT OF ENERGY OR -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS -// OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED -// AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT +// SHALL LAWRENCE LIVERMORE NATIONAL SECURITY, LLC, THE U.S. DEPARTMENT OF ENERGY OR +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +// OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED +// AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, // EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Additional BSD Notice -// 1. This notice is required to be provided under our contract with the U.S. Department -// of Energy (DOE). This work was produced at Lawrence Livermore National Laboratory under +// 1. This notice is required to be provided under our contract with the U.S. Department +// of Energy (DOE). This work was produced at Lawrence Livermore National Laboratory under // Contract No. DE-AC52-07NA27344 with the DOE. -// 2. Neither the United States Government nor Lawrence Livermore National Security, LLC -// nor any of their employees, makes any warranty, express or implied, or assumes any -// liability or responsibility for the accuracy, completeness, or usefulness of any +// 2. Neither the United States Government nor Lawrence Livermore National Security, LLC +// nor any of their employees, makes any warranty, express or implied, or assumes any +// liability or responsibility for the accuracy, completeness, or usefulness of any // information, apparatus, product, or process disclosed, or represents that its use would // not infringe privately-owned rights. -// 3. Also, reference herein to any specific commercial products, process, or services by -// trade name, trademark, manufacturer or otherwise does not necessarily constitute or -// imply its endorsement, recommendation, or favoring by the United States Government or -// Lawrence Livermore National Security, LLC. The views and opinions of authors expressed -// herein do not necessarily state or reflect those of the United States Government or -// Lawrence Livermore National Security, LLC, and shall not be used for advertising or +// 3. Also, reference herein to any specific commercial products, process, or services by +// trade name, trademark, manufacturer or otherwise does not necessarily constitute or +// imply its endorsement, recommendation, or favoring by the United States Government or +// Lawrence Livermore National Security, LLC. The views and opinions of authors expressed +// herein do not necessarily state or reflect those of the United States Government or +// Lawrence Livermore National Security, LLC, and shall not be used for advertising or // product endorsement purposes. /** @@ -52,7 +52,7 @@ * @author Nai-Yuan Chiang , LLNL * */ - + #ifndef HIOP_NLP_FORMULATION #define HIOP_NLP_FORMULATION @@ -62,7 +62,7 @@ #include "hiopMatrixMDS.hpp" #ifdef HIOP_USE_MPI -#include "mpi.h" +#include "mpi.h" #endif #include "hiopNlpTransforms.hpp" @@ -78,18 +78,18 @@ namespace hiop { -//some forward decls +// some forward decls class hiopDualsLsqUpdate; - -/** Class for a general NlpFormulation with general constraints and bounds on the variables. - * This class also acts as a factory for linear algebra objects (derivative + +/** Class for a general NlpFormulation with general constraints and bounds on the variables. + * This class also acts as a factory for linear algebra objects (derivative * matrices, KKT system) whose types are decided based on the hiopInterfaceXXX object passed in the * constructor. - * - * This formulation assumes that optimiz variables, rhs, and gradient are VECTORS: contiguous + * + * This formulation assumes that optimiz variables, rhs, and gradient are VECTORS: contiguous * double arrays for which only local part is accessed (no inter-process comm). - * Derivatives are generic MATRICES, whose type depend on - * i. the NLP formulation (sparse general or NLP with few dense constraints) + * Derivatives are generic MATRICES, whose type depend on + * i. the NLP formulation (sparse general or NLP with few dense constraints) * ii. the interface provided (general sparse (not yet supported), mixed sparse-dense, or dense * constraints). * Exact matching of MATRICES and hiopInterface is to be done by specializations of this class. @@ -101,33 +101,35 @@ class hiopNlpFormulation virtual ~hiopNlpFormulation(); virtual bool finalizeInitialization(); - virtual bool apply_scaling(hiopVector& c, hiopVector& d, hiopVector& gradf, - hiopMatrix& Jac_c, hiopMatrix& Jac_d); + virtual bool apply_scaling(hiopVector& c, hiopVector& d, hiopVector& gradf, hiopMatrix& Jac_c, hiopMatrix& Jac_d); /** - * Wrappers for the interface calls. + * Wrappers for the interface calls. * Can be overridden for specialized formulations required by the algorithm. */ virtual bool eval_f(hiopVector& x, bool new_x, double& f); virtual bool eval_grad_f(hiopVector& x, bool new_x, hiopVector& gradf); - + virtual bool eval_c(hiopVector& x, bool new_x, hiopVector& c); virtual bool eval_d(hiopVector& x, bool new_x, hiopVector& d); virtual bool eval_c_d(hiopVector& x, bool new_x, hiopVector& c, hiopVector& d); /* the implementation of the next two methods depends both on the interface and on the formulation */ - virtual bool eval_Jac_c(hiopVector& x, bool new_x, hiopMatrix& Jac_c)=0; - virtual bool eval_Jac_d(hiopVector& x, bool new_x, hiopMatrix& Jac_d)=0; + virtual bool eval_Jac_c(hiopVector& x, bool new_x, hiopMatrix& Jac_c) = 0; + virtual bool eval_Jac_d(hiopVector& x, bool new_x, hiopMatrix& Jac_d) = 0; virtual bool eval_Jac_c_d(hiopVector& x, bool new_x, hiopMatrix& Jac_c, hiopMatrix& Jac_d); + protected: - //calls specific hiopInterfaceXXX::eval_Jac_cons and deals with specializations of hiopMatrix arguments + // calls specific hiopInterfaceXXX::eval_Jac_cons and deals with specializations of hiopMatrix arguments virtual bool eval_Jac_c_d_interface_impl(hiopVector& x, bool new_x, hiopMatrix& Jac_c, hiopMatrix& Jac_d) = 0; + public: - virtual bool eval_Hess_Lagr(const hiopVector& x, bool new_x, - const double& obj_factor, - const hiopVector& lambda_eq, - const hiopVector& lambda_ineq, - bool new_lambdas, - hiopMatrix& Hess_L)=0; + virtual bool eval_Hess_Lagr(const hiopVector& x, + bool new_x, + const double& obj_factor, + const hiopVector& lambda_eq, + const hiopVector& lambda_ineq, + bool new_lambdas, + hiopMatrix& Hess_L) = 0; /* starting point */ virtual bool get_starting_point(hiopVector& x0, bool& duals_avail, @@ -149,7 +151,7 @@ class hiopNlpFormulation /* Allocates the LSQ duals update class. */ virtual hiopDualsLsqUpdate* alloc_duals_lsq_updater() = 0; - + /** linear algebra factory */ virtual hiopVector* alloc_primal_vec() const; virtual hiopVector* alloc_dual_eq_vec() const; @@ -161,113 +163,100 @@ class hiopNlpFormulation virtual hiopMatrix* alloc_Jac_cons() = 0; virtual hiopMatrix* alloc_Hess_Lagr() = 0; - virtual - void user_callback_solution(hiopSolveStatus status, - const hiopVector& x, - hiopVector& z_L, - hiopVector& z_U, - hiopVector& c, - hiopVector& d, - hiopVector& y_c, - hiopVector& y_d, - double obj_value); - - virtual - bool user_callback_iterate(int iter, - double obj_value, - double logbar_obj_value, - const hiopVector& x, - const hiopVector& z_L, - const hiopVector& z_U, - const hiopVector& s, // the slack for inequalities - const hiopVector& c, - const hiopVector& d, - const hiopVector& yc, - const hiopVector& yd, - double inf_pr, - double inf_du, - double onenorm_pr, - double mu, - double alpha_du, - double alpha_pr, - int ls_trials); - - virtual - bool user_callback_full_iterate(hiopVector& x, - hiopVector& z_L, - hiopVector& z_U, - hiopVector& y_c, - hiopVector& y_d, - hiopVector& s, - hiopVector& v_L, - hiopVector& v_U); - - virtual - bool user_force_update(int iter, - double& obj_value, - hiopVector& x, - hiopVector& z_L, - hiopVector& z_U, - hiopVector& c, - hiopVector& d, - hiopVector& y_c, - hiopVector& y_d, - double& mu, - double& alpha_du, - double& alpha_pr); - + virtual void user_callback_solution(hiopSolveStatus status, + const hiopVector& x, + hiopVector& z_L, + hiopVector& z_U, + hiopVector& c, + hiopVector& d, + hiopVector& y_c, + hiopVector& y_d, + double obj_value); + + virtual bool user_callback_iterate(int iter, + double obj_value, + double logbar_obj_value, + const hiopVector& x, + const hiopVector& z_L, + const hiopVector& z_U, + const hiopVector& s, // the slack for inequalities + const hiopVector& c, + const hiopVector& d, + const hiopVector& yc, + const hiopVector& yd, + double inf_pr, + double inf_du, + double onenorm_pr, + double mu, + double alpha_du, + double alpha_pr, + int ls_trials); + + virtual bool user_callback_full_iterate(hiopVector& x, + hiopVector& z_L, + hiopVector& z_U, + hiopVector& y_c, + hiopVector& y_d, + hiopVector& s, + hiopVector& v_L, + hiopVector& v_U); + + virtual bool user_force_update(int iter, + double& obj_value, + hiopVector& x, + hiopVector& z_L, + hiopVector& z_U, + hiopVector& c, + hiopVector& d, + hiopVector& y_c, + hiopVector& y_d, + double& mu, + double& alpha_du, + double& alpha_pr); + /** const accessors */ - inline const hiopVector& get_xl () const { return *xl_; } - inline const hiopVector& get_xu () const { return *xu_; } - inline const hiopVector& get_ixl() const { return *ixl_; } - inline const hiopVector& get_ixu() const { return *ixu_; } - inline const hiopVector& get_dl () const { return *dl_; } - inline const hiopVector& get_du () const { return *du_; } - inline const hiopVector& get_idl() const { return *idl_; } - inline const hiopVector& get_idu() const { return *idu_; } - inline const hiopVector& get_crhs() const { return *c_rhs_;} - - inline hiopInterfaceBase::NonlinearityType* get_var_type() const {return vars_type_;} - inline hiopInterfaceBase::NonlinearityType* get_cons_eq_type() const {return cons_eq_type_;} - inline hiopInterfaceBase::NonlinearityType* get_cons_ineq_type() const {return cons_ineq_type_;} - inline hiopInterfaceBase::NonlinearityType get_prob_type() const {return prob_type_;} + inline const hiopVector& get_xl() const { return *xl_; } + inline const hiopVector& get_xu() const { return *xu_; } + inline const hiopVector& get_ixl() const { return *ixl_; } + inline const hiopVector& get_ixu() const { return *ixu_; } + inline const hiopVector& get_dl() const { return *dl_; } + inline const hiopVector& get_du() const { return *du_; } + inline const hiopVector& get_idl() const { return *idl_; } + inline const hiopVector& get_idu() const { return *idu_; } + inline const hiopVector& get_crhs() const { return *c_rhs_; } + + inline hiopInterfaceBase::NonlinearityType* get_var_type() const { return vars_type_; } + inline hiopInterfaceBase::NonlinearityType* get_cons_eq_type() const { return cons_eq_type_; } + inline hiopInterfaceBase::NonlinearityType* get_cons_ineq_type() const { return cons_ineq_type_; } + inline hiopInterfaceBase::NonlinearityType get_prob_type() const { return prob_type_; } /** const accessors */ - inline size_type n() const {return n_vars_;} - inline size_type m() const {return n_cons_;} - inline size_type m_eq() const {return n_cons_eq_;} - inline size_type m_ineq() const {return n_cons_ineq_;} - inline size_type n_low() const {return n_bnds_low_;} - inline size_type n_upp() const {return n_bnds_upp_;} - inline size_type m_ineq_low() const {return n_ineq_low_;} - inline size_type m_ineq_upp() const {return n_ineq_upp_;} - inline size_type n_complem() const {return m_ineq_low()+m_ineq_upp()+n_low()+n_upp();} - - inline size_type n_local() const - { - return xl_->get_local_size(); - } - inline size_type n_low_local() const {return n_bnds_low_local_;} - inline size_type n_upp_local() const {return n_bnds_upp_local_;} + inline size_type n() const { return n_vars_; } + inline size_type m() const { return n_cons_; } + inline size_type m_eq() const { return n_cons_eq_; } + inline size_type m_ineq() const { return n_cons_ineq_; } + inline size_type n_low() const { return n_bnds_low_; } + inline size_type n_upp() const { return n_bnds_upp_; } + inline size_type m_ineq_low() const { return n_ineq_low_; } + inline size_type m_ineq_upp() const { return n_ineq_upp_; } + inline size_type n_complem() const { return m_ineq_low() + m_ineq_upp() + n_low() + n_upp(); } + + inline size_type n_local() const { return xl_->get_local_size(); } + inline size_type n_low_local() const { return n_bnds_low_local_; } + inline size_type n_upp_local() const { return n_bnds_upp_local_; } /* methods for transforming the internal objects to corresponding user objects */ - inline double user_obj(double hiop_f) + inline double user_obj(double hiop_f) { return nlp_transformations_.apply_inv_to_obj(hiop_f); } + inline void user_x(hiopVector& hiop_x, double* user_x) { - return nlp_transformations_.apply_inv_to_obj(hiop_f); - } - inline void user_x(hiopVector& hiop_x, double* user_x) - { - //double *hiop_xa = hiop_x.local_data(); - hiopVector *x = nlp_transformations_.apply_inv_to_x(hiop_x,/*new_x=*/true); - //memcpy(user_x, user_xa, hiop_x.get_local_size()*sizeof(double)); - memcpy(user_x, x->local_data(), nlp_transformations_.n_post_local()*sizeof(double)); + // double *hiop_xa = hiop_x.local_data(); + hiopVector* x = nlp_transformations_.apply_inv_to_x(hiop_x, /*new_x=*/true); + // memcpy(user_x, user_xa, hiop_x.get_local_size()*sizeof(double)); + memcpy(user_x, x->local_data(), nlp_transformations_.n_post_local() * sizeof(double)); } /* copies/unpacks duals of the bounds and of constraints from 'it' to the three arrays */ - void get_dual_solutions(const hiopIterate& it, - double* zl_a, - double* zu_a, - double* lambda_a); + void get_dual_solutions(const hiopIterate& it, double* zl_a, double* zu_a, double* lambda_a); /// @brief return the scaling fact for objective double get_obj_scale() const; @@ -282,37 +271,23 @@ class hiopNlpFormulation hiopLogger* log; hiopRunStats runStats; hiopOptions* options; - //prints a summary of the problem - virtual void print(FILE* f=NULL, const char* msg=NULL, int rank=-1) const; + // prints a summary of the problem + virtual void print(FILE* f = NULL, const char* msg = NULL, int rank = -1) const; #ifdef HIOP_USE_MPI - inline MPI_Comm get_comm() const - { - return comm_; - } - inline int get_rank() const - { - return rank_; - } - inline int get_num_ranks() const - { - return num_ranks_; - } - inline index_type* getVecDistInfo() - { - return vec_distrib_; - } + inline MPI_Comm get_comm() const { return comm_; } + inline int get_rank() const { return rank_; } + inline int get_num_ranks() const { return num_ranks_; } + inline index_type* getVecDistInfo() { return vec_distrib_; } #endif protected: /* Preprocess bounds in a form supported by the NLP formulation. Returns counts of - * the variables with lower, upper, and lower and lower bounds, as well of the fixed - * variables. + * the variables with lower, upper, and lower and lower bounds, as well of the fixed + * variables. */ - virtual bool process_bounds(size_type& n_bnds_low, - size_type& n_bnds_upp, - size_type& n_bnds_lu, - size_type& nfixed_vars); + virtual bool process_bounds(size_type& n_bnds_low, size_type& n_bnds_upp, size_type& n_bnds_lu, size_type& nfixed_vars); /* Preprocess constraints in a form supported the NLP formulation. */ virtual bool process_constraints(); + protected: #ifdef HIOP_USE_MPI MPI_Comm comm_; @@ -334,13 +309,13 @@ class hiopNlpFormulation size_type n_ineq_upp_; size_type n_bnds_lu_; size_type n_ineq_lu_; - hiopVector *xl_, *xu_, *ixu_, *ixl_; //these will/can be global, memory distributed - hiopInterfaceBase::NonlinearityType* vars_type_; //C array containing the types for local vars + hiopVector *xl_, *xu_, *ixu_, *ixl_; // these will/can be global, memory distributed + hiopInterfaceBase::NonlinearityType* vars_type_; // C array containing the types for local vars - hiopVector *c_rhs_; //local + hiopVector* c_rhs_; // local hiopInterfaceBase::NonlinearityType* cons_eq_type_; - hiopVector *dl_, *du_, *idl_, *idu_; //these will be local + hiopVector *dl_, *du_, *idl_, *idu_; // these will be local hiopInterfaceBase::NonlinearityType* cons_ineq_type_; /** @@ -352,10 +327,10 @@ class hiopNlpFormulation bool nlp_evaluated_; // keep track of the constraints indexes in the original, user's formulation - hiopVectorInt *cons_eq_mapping_, *cons_ineq_mapping_; + hiopVectorInt *cons_eq_mapping_, *cons_ineq_mapping_; - //options for which this class was setup - std::string strFixedVars_; //"none", "fixed", "relax" + // options for which this class was setup + std::string strFixedVars_; //"none", "fixed", "relax" double dFixedVarsTol_; /** @@ -363,16 +338,15 @@ class hiopNlpFormulation * problem rescalings. */ hiopNlpTransformations nlp_transformations_; - - //internal NLP transformations (currently gradient scaling implemented) + + // internal NLP transformations (currently gradient scaling implemented) hiopNLPObjGradScaling* nlp_scaling_; /// @brief internal NLP transformations that relaxes the bounds hiopBoundsRelaxer* relax_bounds_; - #ifdef HIOP_USE_MPI - //inter-process distribution of vectors + // inter-process distribution of vectors index_type* vec_distrib_; #endif @@ -387,26 +361,26 @@ class hiopNlpFormulation * 1 : at once */ int cons_eval_type_; - - /** - * Internal buffer for constraints. Used only when constraints and Jacobian are evaluated at + + /** + * Internal buffer for constraints. Used only when constraints and Jacobian are evaluated at * once (cons_eval_type_==1), otherwise NULL. */ hiopVector* cons_body_; - - /** - * Internal buffer for the Jacobian. Used only when constraints and Jacobian are evaluated at + + /** + * Internal buffer for the Jacobian. Used only when constraints and Jacobian are evaluated at * once (cons_eval_type_==1), otherwise NULL. */ hiopMatrix* cons_Jac_; - /** + /** * Internal buffer for the multipliers of the constraints use to copy the multipliers of eq. and * ineq. into and to return it to the user via @user_callback_solution and @user_callback_iterate */ hiopVector* cons_lambdas_; - /** + /** * Internal buffers. These vectors are used in unscaling the corresponding values. */ hiopVector* temp_eq_; @@ -415,8 +389,9 @@ class hiopNlpFormulation private: hiopNlpFormulation(const hiopNlpFormulation& s) - : nlp_transformations_(this), interface_base(s.interface_base) - + : nlp_transformations_(this), + interface_base(s.interface_base) + {}; }; @@ -438,62 +413,60 @@ class hiopNlpDenseConstraints : public hiopNlpFormulation /* specialized evals to avoid overhead of dynamic cast. Generic variants available above. */ virtual bool eval_Jac_c(hiopVector& x, bool new_x, double* Jac_c); virtual bool eval_Jac_d(hiopVector& x, bool new_x, double* Jac_d); + protected: - //calls specific hiopInterfaceXXX::eval_Jac_cons and deals with specializations of - //hiopMatrix arguments + // calls specific hiopInterfaceXXX::eval_Jac_cons and deals with specializations of + // hiopMatrix arguments virtual bool eval_Jac_c_d_interface_impl(hiopVector& x, bool new_x, hiopMatrix& Jac_c, hiopMatrix& Jac_d); + public: virtual bool eval_Hess_Lagr(const hiopVector& x, - bool new_x, - const double& obj_factor, - const hiopVector& lambda_eq, - const hiopVector& lambda_ineq, - bool new_lambda, - hiopMatrix& Hess_L) + bool new_x, + const double& obj_factor, + const hiopVector& lambda_eq, + const hiopVector& lambda_ineq, + bool new_lambda, + hiopMatrix& Hess_L) { - //silently ignore the call since we're in the quasi-Newton case + // silently ignore the call since we're in the quasi-Newton case return true; } /* Allocates the LSQ duals update class. */ virtual hiopDualsLsqUpdate* alloc_duals_lsq_updater(); - + virtual hiopMatrixDense* alloc_Jac_c(); virtual hiopMatrixDense* alloc_Jac_d(); virtual hiopMatrixDense* alloc_Jac_cons(); - //returns HessianDiagPlusRowRank which (fakely) inherits from hiopMatrix + // returns HessianDiagPlusRowRank which (fakely) inherits from hiopMatrix virtual hiopMatrix* alloc_Hess_Lagr(); - /* this is in general for a dense matrix with n_vars cols and a small number of + /* this is in general for a dense matrix with n_vars cols and a small number of * 'nrows' rows. The second argument indicates how much total memory should the * matrix (pre)allocate. */ - virtual hiopMatrixDense* alloc_multivector_primal(int nrows, int max_rows=-1) const; + virtual hiopMatrixDense* alloc_multivector_primal(int nrows, int max_rows = -1) const; private: /* interface implemented and provided by the user */ hiopInterfaceDenseConstraints& interface; }; - - /* ************************************************************************* * Class is for general NLPs that have mixed sparse-dense (MDS) derivatives - * blocks. + * blocks. * ************************************************************************* */ class hiopNlpMDS : public hiopNlpFormulation { public: hiopNlpMDS(hiopInterfaceMDS& interface_, const char* option_file = nullptr) - : hiopNlpFormulation(interface_, option_file), interface(interface_) + : hiopNlpFormulation(interface_, option_file), + interface(interface_) { buf_lambda_ = LinearAlgebraFactory::create_vector(options->GetString("mem_space"), 0); } - virtual ~hiopNlpMDS() - { - delete buf_lambda_; - } + virtual ~hiopNlpMDS() { delete buf_lambda_; } virtual bool finalizeInitialization(); @@ -501,52 +474,53 @@ class hiopNlpMDS : public hiopNlpFormulation virtual bool eval_Jac_d(hiopVector& x, bool new_x, hiopMatrix& Jac_d); protected: - //calls specific hiopInterfaceXXX::eval_Jac_cons and deals with specializations of hiopMatrix arguments + // calls specific hiopInterfaceXXX::eval_Jac_cons and deals with specializations of hiopMatrix arguments virtual bool eval_Jac_c_d_interface_impl(hiopVector& x, bool new_x, hiopMatrix& Jac_c, hiopMatrix& Jac_d); + public: virtual bool eval_Hess_Lagr(const hiopVector& x, - bool new_x, - const double& obj_factor, - const hiopVector& lambda_eq, - const hiopVector& lambda_ineq, - bool new_lambdas, - hiopMatrix& Hess_L); - + bool new_x, + const double& obj_factor, + const hiopVector& lambda_eq, + const hiopVector& lambda_ineq, + bool new_lambdas, + hiopMatrix& Hess_L); + /* Allocates the LSQ duals update class. */ virtual hiopDualsLsqUpdate* alloc_duals_lsq_updater(); - - virtual hiopMatrix* alloc_Jac_c() + + virtual hiopMatrix* alloc_Jac_c() { - assert(n_vars_ == nx_sparse+nx_dense); + assert(n_vars_ == nx_sparse + nx_dense); return new hiopMatrixMDS(n_cons_eq_, nx_sparse, nx_dense, nnz_sparse_Jaceq, options->GetString("mem_space")); } - virtual hiopMatrix* alloc_Jac_d() + virtual hiopMatrix* alloc_Jac_d() { - assert(n_vars_ == nx_sparse+nx_dense); + assert(n_vars_ == nx_sparse + nx_dense); return new hiopMatrixMDS(n_cons_ineq_, nx_sparse, nx_dense, nnz_sparse_Jacineq, options->GetString("mem_space")); } virtual hiopMatrix* alloc_Jac_cons() { - assert(n_vars_ == nx_sparse+nx_dense); + assert(n_vars_ == nx_sparse + nx_dense); return new hiopMatrixMDS(n_cons_, nx_sparse, nx_dense, - nnz_sparse_Jaceq+nnz_sparse_Jacineq, + nnz_sparse_Jaceq + nnz_sparse_Jacineq, options->GetString("mem_space")); } virtual hiopMatrix* alloc_Hess_Lagr() { - assert(0==nnz_sparse_Hess_Lagr_SD); + assert(0 == nnz_sparse_Hess_Lagr_SD); return new hiopMatrixSymBlockDiagMDS(nx_sparse, nx_dense, nnz_sparse_Hess_Lagr_SS, options->GetString("mem_space")); } /** const accessors */ virtual size_type nx_sp() const { return nx_sparse; } virtual size_type nx_de() const { return nx_dense; } - inline int get_nnz_sp_Jaceq() const { return nnz_sparse_Jaceq; } - inline int get_nnz_sp_Jacineq() const { return nnz_sparse_Jacineq; } - inline int get_nnz_sp_Hess_Lagr_SS() const { return nnz_sparse_Hess_Lagr_SS; } - inline int get_nnz_sp_Hess_Lagr_SD() const { return nnz_sparse_Hess_Lagr_SD; } + inline int get_nnz_sp_Jaceq() const { return nnz_sparse_Jaceq; } + inline int get_nnz_sp_Jacineq() const { return nnz_sparse_Jacineq; } + inline int get_nnz_sp_Hess_Lagr_SS() const { return nnz_sparse_Hess_Lagr_SS; } + inline int get_nnz_sp_Hess_Lagr_SD() const { return nnz_sparse_Hess_Lagr_SD; } private: hiopInterfaceMDS& interface; @@ -557,7 +531,6 @@ class hiopNlpMDS : public hiopNlpFormulation hiopVector* buf_lambda_; }; - /* ************************************************************************* * Class is for general NLPs that have sparse derivatives blocks. * ************************************************************************* @@ -566,15 +539,14 @@ class hiopNlpSparse : public hiopNlpFormulation { public: hiopNlpSparse(hiopInterfaceSparse& interface_, const char* option_file = nullptr) - : hiopNlpFormulation(interface_, option_file), interface(interface_), - num_jac_eval_{0}, num_hess_eval_{0} + : hiopNlpFormulation(interface_, option_file), + interface(interface_), + num_jac_eval_{0}, + num_hess_eval_{0} { buf_lambda_ = LinearAlgebraFactory::create_vector(options->GetString("mem_space"), 0); } - virtual ~hiopNlpSparse() - { - delete buf_lambda_; - } + virtual ~hiopNlpSparse() { delete buf_lambda_; } virtual bool finalizeInitialization(); @@ -582,46 +554,52 @@ class hiopNlpSparse : public hiopNlpFormulation virtual bool eval_Jac_d(hiopVector& x, bool new_x, hiopMatrix& Jac_d); protected: - //calls specific hiopInterfaceXXX::eval_Jac_cons and deals with specializations of hiopMatrix arguments + // calls specific hiopInterfaceXXX::eval_Jac_cons and deals with specializations of hiopMatrix arguments virtual bool eval_Jac_c_d_interface_impl(hiopVector& x, bool new_x, hiopMatrix& Jac_c, hiopMatrix& Jac_d); public: virtual bool eval_Hess_Lagr(const hiopVector& x, - bool new_x, - const double& obj_factor, - const hiopVector& lambda_eq, - const hiopVector& lambda_ineq, - bool new_lambdas, - hiopMatrix& Hess_L); + bool new_x, + const double& obj_factor, + const hiopVector& lambda_eq, + const hiopVector& lambda_ineq, + bool new_lambdas, + hiopMatrix& Hess_L); /* Allocates the LSQ duals update class. */ virtual hiopDualsLsqUpdate* alloc_duals_lsq_updater(); - + virtual hiopMatrix* alloc_Jac_c() { - return LinearAlgebraFactory::create_matrix_sparse(options->GetString("mem_space"), n_cons_eq_, n_vars_, nnz_sparse_Jaceq_); - //return new hiopMatrixSparseTriplet(n_cons_eq_, n_vars_, nnz_sparse_Jaceq_); + return LinearAlgebraFactory::create_matrix_sparse(options->GetString("mem_space"), + n_cons_eq_, + n_vars_, + nnz_sparse_Jaceq_); + // return new hiopMatrixSparseTriplet(n_cons_eq_, n_vars_, nnz_sparse_Jaceq_); } virtual hiopMatrix* alloc_Jac_d() { - return LinearAlgebraFactory::create_matrix_sparse(options->GetString("mem_space"), n_cons_ineq_, n_vars_, nnz_sparse_Jacineq_); - //return new hiopMatrixSparseTriplet(n_cons_ineq_, n_vars_, nnz_sparse_Jacineq_); + return LinearAlgebraFactory::create_matrix_sparse(options->GetString("mem_space"), + n_cons_ineq_, + n_vars_, + nnz_sparse_Jacineq_); + // return new hiopMatrixSparseTriplet(n_cons_ineq_, n_vars_, nnz_sparse_Jacineq_); } virtual hiopMatrix* alloc_Jac_cons() { - return LinearAlgebraFactory::create_matrix_sparse(options->GetString("mem_space"),n_cons_, n_vars_, nnz_sparse_Jaceq_ + nnz_sparse_Jacineq_); - //return new hiopMatrixSparseTriplet(n_cons_, n_vars_, nnz_sparse_Jaceq_ + nnz_sparse_Jacineq_); + return LinearAlgebraFactory::create_matrix_sparse(options->GetString("mem_space"), + n_cons_, + n_vars_, + nnz_sparse_Jaceq_ + nnz_sparse_Jacineq_); + // return new hiopMatrixSparseTriplet(n_cons_, n_vars_, nnz_sparse_Jaceq_ + nnz_sparse_Jacineq_); } virtual hiopMatrix* alloc_Hess_Lagr() { - return LinearAlgebraFactory::create_matrix_sym_sparse(options->GetString("mem_space"),n_vars_, nnz_sparse_Hess_Lagr_); - //return new hiopMatrixSymSparseTriplet(n_vars_, nnz_sparse_Hess_Lagr_); - } - virtual size_type nx() const - { - return n_vars_; + return LinearAlgebraFactory::create_matrix_sym_sparse(options->GetString("mem_space"), n_vars_, nnz_sparse_Hess_Lagr_); + // return new hiopMatrixSymSparseTriplet(n_vars_, nnz_sparse_Hess_Lagr_); } + virtual size_type nx() const { return n_vars_; } - //not inherited from NlpFormulation + // not inherited from NlpFormulation /** * @brief Allocates a non-MPI vector with size given by the size of primal plus dual spaces. @@ -629,16 +607,15 @@ class hiopNlpSparse : public hiopNlpFormulation */ virtual hiopVector* alloc_primal_dual_vec() const { - assert(n_cons_ == n_cons_eq_+n_cons_ineq_); - return LinearAlgebraFactory::create_vector(options->GetString("mem_space"), - n_vars_+n_cons_); + assert(n_cons_ == n_cons_eq_ + n_cons_ineq_); + return LinearAlgebraFactory::create_vector(options->GetString("mem_space"), n_vars_ + n_cons_); } /** const accessors */ - inline int get_nnz_Jaceq() const { return nnz_sparse_Jaceq_; } - inline int get_nnz_Jacineq() const { return nnz_sparse_Jacineq_; } - inline int get_nnz_Hess_Lagr() const { return nnz_sparse_Hess_Lagr_; } - + inline int get_nnz_Jaceq() const { return nnz_sparse_Jaceq_; } + inline int get_nnz_Jacineq() const { return nnz_sparse_Jacineq_; } + inline int get_nnz_Hess_Lagr() const { return nnz_sparse_Hess_Lagr_; } + protected: hiopInterfaceSparse& interface; int nnz_sparse_Jaceq_; @@ -651,26 +628,24 @@ class hiopNlpSparse : public hiopNlpFormulation }; /** - * Specialized NLP formulation class that poses equalities as relaxed two-sided + * Specialized NLP formulation class that poses equalities as relaxed two-sided * inequalities */ class hiopNlpSparseIneq : public hiopNlpSparse { public: hiopNlpSparseIneq(hiopInterfaceSparse& interface_, const char* option_file = nullptr) - : hiopNlpSparse(interface_, option_file), - n_cons_eq_origNLP_(0), - eq_relax_value_(1e-8) - { - } - virtual ~hiopNlpSparseIneq() - { - } + : hiopNlpSparse(interface_, option_file), + n_cons_eq_origNLP_(0), + eq_relax_value_(1e-8) + {} + virtual ~hiopNlpSparseIneq() {} /* Preprocess constraints so that equalities are posed as relaxed two-sided inequalities. */ virtual bool process_constraints(); /* Perform initialization and preprocessing. */ - virtual bool finalizeInitialization(); + virtual bool finalizeInitialization(); + protected: /* Number of equalities in the original NLP formulation. */ size_type n_cons_eq_origNLP_; @@ -678,5 +653,5 @@ class hiopNlpSparseIneq : public hiopNlpSparse /* Maximum violation of the equalities relative to the magnitude of the right-hand side. */ double eq_relax_value_; }; -} // end of namespace +} // namespace hiop #endif diff --git a/src/Optimization/hiopNlpTransforms.cpp b/src/Optimization/hiopNlpTransforms.cpp index d97696b56..a4d77e12b 100644 --- a/src/Optimization/hiopNlpTransforms.cpp +++ b/src/Optimization/hiopNlpTransforms.cpp @@ -3,47 +3,47 @@ // Written by Cosmin G. Petra, petra1@llnl.gov. // LLNL-CODE-742473. All rights reserved. // -// This file is part of HiOp. For details, see https://github.com/LLNL/hiop. HiOp -// is released under the BSD 3-clause license (https://opensource.org/licenses/BSD-3-Clause). +// This file is part of HiOp. For details, see https://github.com/LLNL/hiop. HiOp +// is released under the BSD 3-clause license (https://opensource.org/licenses/BSD-3-Clause). // Please also read "Additional BSD Notice" below. // -// Redistribution and use in source and binary forms, with or without modification, +// Redistribution and use in source and binary forms, with or without modification, // are permitted provided that the following conditions are met: -// i. Redistributions of source code must retain the above copyright notice, this list +// i. Redistributions of source code must retain the above copyright notice, this list // of conditions and the disclaimer below. -// ii. Redistributions in binary form must reproduce the above copyright notice, -// this list of conditions and the disclaimer (as noted below) in the documentation and/or +// ii. Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the disclaimer (as noted below) in the documentation and/or // other materials provided with the distribution. -// iii. Neither the name of the LLNS/LLNL nor the names of its contributors may be used to -// endorse or promote products derived from this software without specific prior written +// iii. Neither the name of the LLNS/LLNL nor the names of its contributors may be used to +// endorse or promote products derived from this software without specific prior written // permission. // -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES -// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT -// SHALL LAWRENCE LIVERMORE NATIONAL SECURITY, LLC, THE U.S. DEPARTMENT OF ENERGY OR -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS -// OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED -// AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT +// SHALL LAWRENCE LIVERMORE NATIONAL SECURITY, LLC, THE U.S. DEPARTMENT OF ENERGY OR +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +// OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED +// AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, // EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Additional BSD Notice -// 1. This notice is required to be provided under our contract with the U.S. Department -// of Energy (DOE). This work was produced at Lawrence Livermore National Laboratory under +// 1. This notice is required to be provided under our contract with the U.S. Department +// of Energy (DOE). This work was produced at Lawrence Livermore National Laboratory under // Contract No. DE-AC52-07NA27344 with the DOE. -// 2. Neither the United States Government nor Lawrence Livermore National Security, LLC -// nor any of their employees, makes any warranty, express or implied, or assumes any -// liability or responsibility for the accuracy, completeness, or usefulness of any +// 2. Neither the United States Government nor Lawrence Livermore National Security, LLC +// nor any of their employees, makes any warranty, express or implied, or assumes any +// liability or responsibility for the accuracy, completeness, or usefulness of any // information, apparatus, product, or process disclosed, or represents that its use would // not infringe privately-owned rights. -// 3. Also, reference herein to any specific commercial products, process, or services by -// trade name, trademark, manufacturer or otherwise does not necessarily constitute or -// imply its endorsement, recommendation, or favoring by the United States Government or -// Lawrence Livermore National Security, LLC. The views and opinions of authors expressed -// herein do not necessarily state or reflect those of the United States Government or -// Lawrence Livermore National Security, LLC, and shall not be used for advertising or +// 3. Also, reference herein to any specific commercial products, process, or services by +// trade name, trademark, manufacturer or otherwise does not necessarily constitute or +// imply its endorsement, recommendation, or favoring by the United States Government or +// Lawrence Livermore National Security, LLC. The views and opinions of authors expressed +// herein do not necessarily state or reflect those of the United States Government or +// Lawrence Livermore National Security, LLC, and shall not be used for advertising or // product endorsement purposes. /** @@ -53,7 +53,7 @@ * @author Nai-Yuan Chiang , LLNL * */ - + #include "hiopNlpTransforms.hpp" #include "hiopNlpFormulation.hpp" @@ -61,32 +61,34 @@ namespace hiop { -hiopFixedVarsRemover:: -hiopFixedVarsRemover(hiopNlpFormulation* nlp, - const hiopVector& xl, - const hiopVector& xu, - const double& fixedVarTol_, - const size_type& numFixedVars, - const size_type& numFixedVars_local) - : hiopNlpTransformation(nlp), - n_fixed_vars_local(numFixedVars_local), fixedVarTol(fixedVarTol_), - Jacc_fs(nullptr), Jacd_fs(nullptr), - Jacc_rs_ref(nullptr), Jacd_rs_ref(nullptr), - fs2rs_idx_map(xl.get_local_size()), - x_rs_ref_(nullptr) +hiopFixedVarsRemover::hiopFixedVarsRemover(hiopNlpFormulation* nlp, + const hiopVector& xl, + const hiopVector& xu, + const double& fixedVarTol_, + const size_type& numFixedVars, + const size_type& numFixedVars_local) + : hiopNlpTransformation(nlp), + n_fixed_vars_local(numFixedVars_local), + fixedVarTol(fixedVarTol_), + Jacc_fs(nullptr), + Jacd_fs(nullptr), + Jacc_rs_ref(nullptr), + Jacd_rs_ref(nullptr), + fs2rs_idx_map(xl.get_local_size()), + x_rs_ref_(nullptr) { xl_fs = xl.new_copy(); xu_fs = xu.new_copy(); - x_fs = xl.alloc_clone(); + x_fs = xl.alloc_clone(); grad_fs = xl.alloc_clone(); n_fs = xl.get_size(); - n_rs = n_fs-numFixedVars; + n_rs = n_fs - numFixedVars; }; hiopFixedVarsRemover::~hiopFixedVarsRemover() { - delete xl_fs; + delete xl_fs; delete xu_fs; delete x_fs; delete grad_fs; @@ -98,96 +100,93 @@ hiopFixedVarsRemover::~hiopFixedVarsRemover() /* saves the inter-process distribution of (primal) vectors distribution */ void hiopFixedVarsRemover::setFSVectorDistrib(index_type* vec_distrib_in, int num_ranks) { - assert(vec_distrib_in!=NULL); - fs_vec_distrib.resize(num_ranks+1); - std::copy(vec_distrib_in, vec_distrib_in+num_ranks+1, fs_vec_distrib.begin()); + assert(vec_distrib_in != NULL); + fs_vec_distrib.resize(num_ranks + 1); + std::copy(vec_distrib_in, vec_distrib_in + num_ranks + 1, fs_vec_distrib.begin()); }; /* allocates and returns the reduced-space column partitioning to be used internally by HiOp */ index_type* hiopFixedVarsRemover::allocRSVectorDistrib() { - size_type nlen = fs_vec_distrib.size(); //nlen==nranks+1 - assert(nlen>=1); + size_type nlen = fs_vec_distrib.size(); // nlen==nranks+1 + assert(nlen >= 1); index_type* rsVecDistrib = new index_type[nlen]; - rsVecDistrib[0]=0; + rsVecDistrib[0] = 0; #ifdef HIOP_DEEPCHECKS - assert(fs_vec_distrib[0]==0); - assert(nlen>=1); + assert(fs_vec_distrib[0] == 0); + assert(nlen >= 1); #endif #ifdef HIOP_USE_MPI int ierr; #ifdef HIOP_DEEPCHECKS - int nRanks=-1; + int nRanks = -1; ierr = MPI_Comm_size(comm, &nRanks); - assert(nRanks==nlen-1); + assert(nRanks == nlen - 1); #endif - //first gather on all ranks the number of variables fixed on each rank - ierr = MPI_Allgather(&n_fixed_vars_local, - 1, - MPI_HIOP_SIZE_TYPE, - rsVecDistrib+1, - 1, - MPI_HIOP_INDEX_TYPE, - comm); - assert(ierr==MPI_SUCCESS); + // first gather on all ranks the number of variables fixed on each rank + ierr = MPI_Allgather(&n_fixed_vars_local, 1, MPI_HIOP_SIZE_TYPE, rsVecDistrib + 1, 1, MPI_HIOP_INDEX_TYPE, comm); + assert(ierr == MPI_SUCCESS); #else - assert(nlen==1); + assert(nlen == 1); #endif - assert(rsVecDistrib[0]==0); + assert(rsVecDistrib[0] == 0); const int intNLEN(static_cast(nlen)); - //then accumulate these - for(int r=1; rget_local_size(); - double *xl_vec= xl_fs->local_data(), *xu_vec= xu_fs->local_data(); + int n_fs_local = xl_fs->get_local_size(); + double *xl_vec = xl_fs->local_data(), *xu_vec = xu_fs->local_data(); /* build the map from full-space to reduced-space */ - int it_rs=0; - for(int i=0;ioptions->GetString("mem_space"),neq, n_fs, fs_vec_distrib.data(), comm); - Jacd_fs = LinearAlgebraFactory:: - create_matrix_dense(nlp_->options->GetString("mem_space"), nineq, n_fs, fs_vec_distrib.data(), comm); + Jacc_fs = LinearAlgebraFactory::create_matrix_dense(nlp_->options->GetString("mem_space"), + neq, + n_fs, + fs_vec_distrib.data(), + comm); + Jacd_fs = LinearAlgebraFactory::create_matrix_dense(nlp_->options->GetString("mem_space"), + nineq, + n_fs, + fs_vec_distrib.data(), + comm); } else { - Jacc_fs = LinearAlgebraFactory:: - create_matrix_dense(nlp_->options->GetString("mem_space"), neq, n_fs, NULL, comm); - Jacd_fs = LinearAlgebraFactory:: - create_matrix_dense(nlp_->options->GetString("mem_space"), nineq, n_fs, NULL, comm); + Jacc_fs = LinearAlgebraFactory::create_matrix_dense(nlp_->options->GetString("mem_space"), neq, n_fs, NULL, comm); + Jacd_fs = LinearAlgebraFactory::create_matrix_dense(nlp_->options->GetString("mem_space"), nineq, n_fs, NULL, comm); } #else Jacc_fs = LinearAlgebraFactory::create_matrix_dense(nlp_->options->GetString("mem_space"), neq, n_fs); @@ -197,22 +196,21 @@ bool hiopFixedVarsRemover::setupConstraintsPart(const int& neq, const int& nineq } /* "copies" a full space vector/array to a reduced space vector/array */ -void hiopFixedVarsRemover::copyFsToRs(const hiopVector& fsVec, hiopVector& rsVec) +void hiopFixedVarsRemover::copyFsToRs(const hiopVector& fsVec, hiopVector& rsVec) { - assert(fsVec.get_local_size()==static_cast(fs2rs_idx_map.size())); + assert(fsVec.get_local_size() == static_cast(fs2rs_idx_map.size())); apply_to_vector(&fsVec, &rsVec); } -void hiopFixedVarsRemover:: -copyFsToRs(const hiopInterfaceBase::NonlinearityType* fs, hiopInterfaceBase::NonlinearityType* rs) +void hiopFixedVarsRemover::copyFsToRs(const hiopInterfaceBase::NonlinearityType* fs, hiopInterfaceBase::NonlinearityType* rs) { int rs_idx; const int Size(static_cast(fs2rs_idx_map.size())); - for(int i=0; i=0) { + if(rs_idx >= 0) { rs[rs_idx] = fs[i]; - } + } } } @@ -224,9 +222,9 @@ void hiopFixedVarsRemover::apply_inv_to_vector(const hiopVector* vec_rs, hiopVec double* vec_fs_arr = vec_fs->local_data(); int rs_idx; const int Size(static_cast(fs2rs_idx_map.size())); - for(int i=0; ilocal_data_const(); int rs_idx; const int Size(fs2rs_idx_map.size()); - for(int i=0; i=0) { - vec_rs_arr[rs_idx]=vec_fs_arr[i]; + if(rs_idx >= 0) { + vec_rs_arr[rs_idx] = vec_fs_arr[i]; } } } @@ -257,15 +255,15 @@ void hiopFixedVarsRemover::applyToMatrix(const double* M_rs, const int& m_in, do assert(nfs == fs_n_local()); const int nrs = rs_n_local(); - for(int i=0; i=0) { - M_rs[i*nrs+rs_idx] = M_fs[i*nfs+j]; + if(rs_idx >= 0) { + M_rs[i * nrs + rs_idx] = M_fs[i * nfs + j]; } } } } -hiopFixedVarsRelaxer:: -hiopFixedVarsRelaxer(hiopNlpFormulation* nlp, - const hiopVector& xl, - const hiopVector& xu, - const size_type& numFixedVars, - const size_type& numFixedVars_local) - : hiopNlpTransformation(nlp), - xl_copy(NULL), xu_copy(NULL), n_vars(xl.get_size()), n_vars_local(xl.get_local_size()) +hiopFixedVarsRelaxer::hiopFixedVarsRelaxer(hiopNlpFormulation* nlp, + const hiopVector& xl, + const hiopVector& xu, + const size_type& numFixedVars, + const size_type& numFixedVars_local) + : hiopNlpTransformation(nlp), + xl_copy(NULL), + xu_copy(NULL), + n_vars(xl.get_size()), + n_vars_local(xl.get_local_size()) { - //xl_copy = xl.new_copy(); // no need to copy at this point - //xu_copy = xu.new_copy(); // no need to copy at this point + // xl_copy = xl.new_copy(); // no need to copy at this point + // xu_copy = xu.new_copy(); // no need to copy at this point } hiopFixedVarsRelaxer::~hiopFixedVarsRelaxer() @@ -308,38 +308,42 @@ hiopFixedVarsRelaxer::~hiopFixedVarsRelaxer() if(xu_copy) delete xu_copy; } -void hiopFixedVarsRelaxer:: -relax(const double& fixed_var_tol, const double& fixed_var_perturb, hiopVector& xl, hiopVector& xu) +void hiopFixedVarsRelaxer::relax(const double& fixed_var_tol, + const double& fixed_var_perturb, + hiopVector& xl, + hiopVector& xu) { - double *xla=xl.local_data(), *xua=xu.local_data(); - size_type n=xl.get_local_size(); + double *xla = xl.local_data(), *xua = xu.local_data(); + size_type n = xl.get_local_size(); double xuabs; - for(index_type i=0; ioptions->GetNumeric("scaling_max_grad"); const double min_grad = nlp_->options->GetNumeric("scaling_min_grad"); const double max_obj_grad = nlp_->options->GetNumeric("scaling_max_obj_grad"); const double max_con_grad = nlp_->options->GetNumeric("scaling_max_con_grad"); - const double gradf_infnorm = gradf.infnorm(); + const double gradf_infnorm = gradf.infnorm(); scale_factor_obj = 1.; if(max_obj_grad == 0.) { if(gradf_infnorm > max_grad) { @@ -452,19 +462,18 @@ hiopNLPObjGradScaling::hiopNLPObjGradScaling(hiopNlpFormulation* nlp, if(min_grad > 0.0 && scale_factor_obj < min_grad) { scale_factor_obj = min_grad; } - + scale_factor_c = c.new_copy(); scale_factor_d = d.new_copy(); - scale_factor_cd = LinearAlgebraFactory::create_vector(nlp_->options->GetString("mem_space"), - n_eq + n_ineq); + scale_factor_cd = LinearAlgebraFactory::create_vector(nlp_->options->GetString("mem_space"), n_eq + n_ineq); Jac_c.row_max_abs_value(*scale_factor_c); - scale_factor_c->scale(1./max_grad); + scale_factor_c->scale(1. / max_grad); scale_factor_c->component_max(1.0); scale_factor_c->invert(); Jac_d.row_max_abs_value(*scale_factor_d); - scale_factor_d->scale(1./max_grad); + scale_factor_d->scale(1. / max_grad); scale_factor_d->component_max(1.0); scale_factor_d->invert(); @@ -473,18 +482,18 @@ hiopNLPObjGradScaling::hiopNLPObjGradScaling(hiopNlpFormulation* nlp, Jac_c.row_max_abs_value(*scale_factor_c); Jac_d.row_max_abs_value(*scale_factor_d); if(max_con_grad == 0.) { - if(scale_factor_c->infnorm() > max_grad){ - scale_factor_c->scale(1./max_grad); + if(scale_factor_c->infnorm() > max_grad) { + scale_factor_c->scale(1. / max_grad); scale_factor_c->component_max(1.0); - scale_factor_c->invert(); + scale_factor_c->invert(); } else { scale_factor_c->setToConstant(1.0); } - - if(scale_factor_d->infnorm() > max_grad){ - scale_factor_d->scale(1./max_grad); + + if(scale_factor_d->infnorm() > max_grad) { + scale_factor_d->scale(1. / max_grad); scale_factor_d->component_max(1.0); - scale_factor_d->invert(); + scale_factor_d->invert(); } else { scale_factor_d->setToConstant(1.0); } @@ -505,9 +514,4 @@ hiopNLPObjGradScaling::~hiopNLPObjGradScaling() if(scale_factor_cd) delete scale_factor_cd; } - - - - - -} //end of namespace +} // namespace hiop diff --git a/src/Optimization/hiopNlpTransforms.hpp b/src/Optimization/hiopNlpTransforms.hpp index 0548f5e74..cf62026bf 100644 --- a/src/Optimization/hiopNlpTransforms.hpp +++ b/src/Optimization/hiopNlpTransforms.hpp @@ -2,47 +2,47 @@ // Produced at the Lawrence Livermore National Laboratory (LLNL). // LLNL-CODE-742473. All rights reserved. // -// This file is part of HiOp. For details, see https://github.com/LLNL/hiop. HiOp -// is released under the BSD 3-clause license (https://opensource.org/licenses/BSD-3-Clause). +// This file is part of HiOp. For details, see https://github.com/LLNL/hiop. HiOp +// is released under the BSD 3-clause license (https://opensource.org/licenses/BSD-3-Clause). // Please also read "Additional BSD Notice" below. // -// Redistribution and use in source and binary forms, with or without modification, +// Redistribution and use in source and binary forms, with or without modification, // are permitted provided that the following conditions are met: -// i. Redistributions of source code must retain the above copyright notice, this list +// i. Redistributions of source code must retain the above copyright notice, this list // of conditions and the disclaimer below. -// ii. Redistributions in binary form must reproduce the above copyright notice, -// this list of conditions and the disclaimer (as noted below) in the documentation and/or +// ii. Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the disclaimer (as noted below) in the documentation and/or // other materials provided with the distribution. -// iii. Neither the name of the LLNS/LLNL nor the names of its contributors may be used to -// endorse or promote products derived from this software without specific prior written +// iii. Neither the name of the LLNS/LLNL nor the names of its contributors may be used to +// endorse or promote products derived from this software without specific prior written // permission. // -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES -// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT -// SHALL LAWRENCE LIVERMORE NATIONAL SECURITY, LLC, THE U.S. DEPARTMENT OF ENERGY OR -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS -// OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED -// AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT +// SHALL LAWRENCE LIVERMORE NATIONAL SECURITY, LLC, THE U.S. DEPARTMENT OF ENERGY OR +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +// OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED +// AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, // EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Additional BSD Notice -// 1. This notice is required to be provided under our contract with the U.S. Department -// of Energy (DOE). This work was produced at Lawrence Livermore National Laboratory under +// 1. This notice is required to be provided under our contract with the U.S. Department +// of Energy (DOE). This work was produced at Lawrence Livermore National Laboratory under // Contract No. DE-AC52-07NA27344 with the DOE. -// 2. Neither the United States Government nor Lawrence Livermore National Security, LLC -// nor any of their employees, makes any warranty, express or implied, or assumes any -// liability or responsibility for the accuracy, completeness, or usefulness of any +// 2. Neither the United States Government nor Lawrence Livermore National Security, LLC +// nor any of their employees, makes any warranty, express or implied, or assumes any +// liability or responsibility for the accuracy, completeness, or usefulness of any // information, apparatus, product, or process disclosed, or represents that its use would // not infringe privately-owned rights. -// 3. Also, reference herein to any specific commercial products, process, or services by -// trade name, trademark, manufacturer or otherwise does not necessarily constitute or -// imply its endorsement, recommendation, or favoring by the United States Government or -// Lawrence Livermore National Security, LLC. The views and opinions of authors expressed -// herein do not necessarily state or reflect those of the United States Government or -// Lawrence Livermore National Security, LLC, and shall not be used for advertising or +// 3. Also, reference herein to any specific commercial products, process, or services by +// trade name, trademark, manufacturer or otherwise does not necessarily constitute or +// imply its endorsement, recommendation, or favoring by the United States Government or +// Lawrence Livermore National Security, LLC. The views and opinions of authors expressed +// herein do not necessarily state or reflect those of the United States Government or +// Lawrence Livermore National Security, LLC, and shall not be used for advertising or // product endorsement purposes. /** @@ -68,10 +68,10 @@ namespace hiop { class hiopNlpFormulation; - + /** Template class for internal NLP manipulation/transformation. * - * Examples of such transformations are removing fix variables, relaxing fixed + * Examples of such transformations are removing fix variables, relaxing fixed * variables, and problem rescaling. * * applyToXXX returns the transformation @@ -84,64 +84,64 @@ class hiopNlpTransformation virtual bool setup() = 0; /* number of vars in the NLP after the transformation */ - virtual size_type n_post()=0; - virtual size_type n_post_local()=0; + virtual size_type n_post() = 0; + virtual size_type n_post_local() = 0; /* number of vars in the NLP to which the transformation is to be applied */ - virtual size_type n_pre()=0; - virtual size_type n_pre_local()=0; + virtual size_type n_pre() = 0; + virtual size_type n_pre_local() = 0; /* transforms variable vector, from transformed ones to original ones*/ virtual inline hiopVector* apply_inv_to_x(hiopVector& x, const bool& new_x) { return &x; }; /* transforms variable vector, from original ones to transformed ones*/ virtual inline hiopVector* apply_to_x(hiopVector& x) { return &x; } - virtual inline void apply_to_x(hiopVector& x_in, hiopVector& x_out) - { - //default implementation should have x_in as x_out's internal data array + virtual inline void apply_to_x(hiopVector& x_in, hiopVector& x_out) + { + // default implementation should have x_in as x_out's internal data array assert(x_in.local_data() == x_out.local_data()); } - virtual inline double apply_inv_to_obj (double& f_in) { return f_in;} - virtual inline double apply_to_obj (double& f_in) { return f_in;} + virtual inline double apply_inv_to_obj(double& f_in) { return f_in; } + virtual inline double apply_to_obj(double& f_in) { return f_in; } virtual inline hiopVector* apply_inv_to_grad_obj(hiopVector& grad_in) { return &grad_in; } - virtual inline hiopVector* apply_to_grad_obj (hiopVector& grad_in) { return &grad_in; } + virtual inline hiopVector* apply_to_grad_obj(hiopVector& grad_in) { return &grad_in; } - virtual inline hiopVector* apply_inv_to_cons_eq (hiopVector& c_in, const int& m_in) { return &c_in; } - virtual inline hiopVector* apply_to_cons_eq (hiopVector& c_in, const int& m_in) { return &c_in; } + virtual inline hiopVector* apply_inv_to_cons_eq(hiopVector& c_in, const int& m_in) { return &c_in; } + virtual inline hiopVector* apply_to_cons_eq(hiopVector& c_in, const int& m_in) { return &c_in; } virtual inline hiopVector* apply_inv_to_cons_ineq(hiopVector& c_in, const int& m_in) { return &c_in; } - virtual inline hiopVector* apply_to_cons_ineq (hiopVector& c_in, const int& m_in) { return &c_in; } - - //the following two are for when the underlying NLP formulation works with full body constraints, - //that is, evaluates both equalities and inequalities at once (a.k.a. one-call constraints and - //and Jacobian evaluations) - virtual inline hiopVector* apply_inv_to_cons (hiopVector& cons_in, const int& m_in) { return &cons_in; } - virtual inline hiopVector* apply_to_cons (hiopVector& cons_in, const int& m_in) { return &cons_in; } + virtual inline hiopVector* apply_to_cons_ineq(hiopVector& c_in, const int& m_in) { return &c_in; } + + // the following two are for when the underlying NLP formulation works with full body constraints, + // that is, evaluates both equalities and inequalities at once (a.k.a. one-call constraints and + // and Jacobian evaluations) + virtual inline hiopVector* apply_inv_to_cons(hiopVector& cons_in, const int& m_in) { return &cons_in; } + virtual inline hiopVector* apply_to_cons(hiopVector& cons_in, const int& m_in) { return &cons_in; } virtual inline hiopMatrix* apply_inv_to_jacob_eq(hiopMatrix& Jac_in, const int& m_in) { return &Jac_in; } - virtual inline hiopMatrix* apply_to_jacob_eq (hiopMatrix& Jac_in, const int& m_in) { return &Jac_in; } + virtual inline hiopMatrix* apply_to_jacob_eq(hiopMatrix& Jac_in, const int& m_in) { return &Jac_in; } virtual inline hiopMatrix* apply_inv_to_jacob_ineq(hiopMatrix& Jac_in, const int& m_in) { return &Jac_in; } - virtual inline hiopMatrix* apply_to_jacob_ineq (hiopMatrix& Jac_in, const int& m_in) { return &Jac_in; } + virtual inline hiopMatrix* apply_to_jacob_ineq(hiopMatrix& Jac_in, const int& m_in) { return &Jac_in; } virtual inline hiopMatrix* apply_inv_to_jacob_cons(hiopMatrix& Jac_in, const int& m_in) { return &Jac_in; } - virtual inline hiopMatrix* apply_to_jacob_cons (hiopMatrix& Jac_in, const int& m_in) { return &Jac_in; } + virtual inline hiopMatrix* apply_to_jacob_cons(hiopMatrix& Jac_in, const int& m_in) { return &Jac_in; } virtual inline hiopMatrix* apply_inv_to_larg_hess(hiopMatrix& Hess_in, const int& m_in) { return &Hess_in; } - virtual inline hiopMatrix* apply_to_larg_hess (hiopMatrix& Hess_in, const int& m_in) { return &Hess_in; } - + virtual inline hiopMatrix* apply_to_larg_hess(hiopMatrix& Hess_in, const int& m_in) { return &Hess_in; } + public: hiopNlpTransformation(hiopNlpFormulation* nlp) - : nlp_(nlp) - {}; + : nlp_(nlp) {}; virtual ~hiopNlpTransformation() {}; + protected: hiopNlpFormulation* nlp_; }; /** Removes fixed variables from the NLP formulation. * - * applyToXXX: takes the internal (reduced-space) XXX object (variables vector, function, gradient, etc) - * of the NLP formulation and returns it in the full-space (including the fixed + * applyToXXX: takes the internal (reduced-space) XXX object (variables vector, function, gradient, etc) + * of the NLP formulation and returns it in the full-space (including the fixed * variables) so that it is ready to be passed to user's calling code. * * applyInvToXXX: takes XXX as seen by the user calling code and returns the corresponding @@ -157,46 +157,50 @@ class hiopFixedVarsRemover : public hiopNlpTransformation const size_type& numFixedVars, const size_type& numFixedVars_local); ~hiopFixedVarsRemover(); + public: /** inherited from the parent class */ /* more setup methods (specific to this class) are defined below */ - virtual inline bool setup() {return true;} + virtual inline bool setup() { return true; } /* number of vars in the NLP after the tranformation */ virtual inline size_type n_post() { return rs_n(); } /* number of vars in the NLP to which the tranformation is to be applied */ - virtual inline size_type n_pre () { return fs_n(); } + virtual inline size_type n_pre() { return fs_n(); } virtual inline size_type n_post_local() { return rs_n_local(); } virtual inline size_type n_pre_local() { return fs_n_local(); } /* from reduced space to full space */ - inline hiopVector* apply_inv_to_x(hiopVector& x, const bool& new_x) - { + inline hiopVector* apply_inv_to_x(hiopVector& x, const bool& new_x) + { x_rs_ref_ = &x; - if(!new_x) { return x_fs; } + if(!new_x) { + return x_fs; + } apply_inv_to_vector(&x, x_fs); return x_fs; }; /* from full space to reduced space (fixed vars removed) */ - inline hiopVector* apply_to_x(hiopVector& x_fs_in) - { - assert(x_rs_ref_!=NULL); assert(x_fs_in.local_data()==x_fs->local_data()); + inline hiopVector* apply_to_x(hiopVector& x_fs_in) + { + assert(x_rs_ref_ != NULL); + assert(x_fs_in.local_data() == x_fs->local_data()); apply_inv_to_vector(&x_fs_in, x_rs_ref_); - return x_rs_ref_; + return x_rs_ref_; } - + /* from fs to rs */ - inline void apply_to_x(hiopVector& x_in, hiopVector& xv_out) + inline void apply_to_x(hiopVector& x_in, hiopVector& xv_out) { #ifdef HIOP_DEEPCHECKS - assert(xv_out.get_size()get_size()); + assert(xv_out.get_size() < xl_fs->get_size()); #endif apply_to_vector(&x_in, &xv_out); } - + /* from rs to fs and return the fs*/ inline hiopVector* apply_inv_to_grad_obj(hiopVector& grad_in) { @@ -207,7 +211,7 @@ class hiopFixedVarsRemover : public hiopNlpTransformation /* from fs to rs */ inline hiopVector* apply_to_grad_obj(hiopVector& grad_in) { - assert(&grad_in==grad_fs); + assert(&grad_in == grad_fs); apply_to_vector(&grad_in, grad_rs_ref); return grad_rs_ref; } @@ -215,42 +219,42 @@ class hiopFixedVarsRemover : public hiopNlpTransformation inline hiopMatrix* apply_inv_to_jacob_eq(hiopMatrix& Jac_in, const int& m_in) { hiopMatrixDense* Jac_de = dynamic_cast(&Jac_in); - if(Jac_de==nullptr) { + if(Jac_de == nullptr) { return nullptr; } Jacc_rs_ref = Jac_de; - assert(Jacc_fs->m()==m_in); + assert(Jacc_fs->m() == m_in); applyToMatrix(Jac_de->local_data(), m_in, Jacc_fs->local_data()); return Jacc_fs; } - inline hiopMatrix* apply_to_jacob_eq(hiopMatrix& Jac_in, const int& m_in) + inline hiopMatrix* apply_to_jacob_eq(hiopMatrix& Jac_in, const int& m_in) { hiopMatrixDense* Jac_de = dynamic_cast(&Jac_in); - if(Jac_de==NULL) { + if(Jac_de == NULL) { return nullptr; - } - assert(Jacc_fs->m()==m_in); + } + assert(Jacc_fs->m() == m_in); applyInvToMatrix(Jac_de->local_data(), m_in, Jacc_rs_ref->local_data()); return Jacc_rs_ref; } inline hiopMatrix* apply_inv_to_jacob_ineq(hiopMatrix& Jac_in, const int& m_in) { hiopMatrixDense* Jac_de = dynamic_cast(&Jac_in); - if(Jac_de==NULL) { + if(Jac_de == NULL) { return nullptr; } Jacd_rs_ref = Jac_de; - assert(Jacd_fs->m()==m_in); + assert(Jacd_fs->m() == m_in); applyToMatrix(Jac_de->local_data(), m_in, Jacd_fs->local_data()); - return Jacd_fs; + return Jacd_fs; } inline hiopMatrix* apply_to_jacob_ineq(hiopMatrix& Jac_in, const int& m_in) { hiopMatrixDense* Jac_de = dynamic_cast(&Jac_in); - if(Jac_de==NULL) { + if(Jac_de == NULL) { return nullptr; - } - assert(Jacd_fs->m()==m_in); + } + assert(Jacd_fs->m() == m_in); applyInvToMatrix(Jac_de->local_data(), m_in, Jacd_rs_ref->local_data()); return Jacd_rs_ref; } @@ -260,64 +264,73 @@ class hiopFixedVarsRemover : public hiopNlpTransformation bool setupConstraintsPart(const int& neq, const int& nineq); #ifdef HIOP_USE_MPI /* saves the inter-process distribution of (primal) vectors distribution */ - void setFSVectorDistrib(index_type* vec_distrib,int num_ranks); + void setFSVectorDistrib(index_type* vec_distrib, int num_ranks); /* allocates and returns the reduced-space column partitioning to be used internally by HiOp */ index_type* allocRSVectorDistrib(); inline void setMPIComm(const MPI_Comm& commIn) { comm = commIn; } #endif /* "copies" a full space vector to a reduced space vector */ - void copyFsToRs(const hiopVector& fsVec, hiopVector& rsVec); + void copyFsToRs(const hiopVector& fsVec, hiopVector& rsVec); void copyFsToRs(const hiopInterfaceBase::NonlinearityType* fs, hiopInterfaceBase::NonlinearityType* rs); - - inline size_type fs_n() const { return n_fs;} - inline size_type rs_n() const { return n_rs;} - inline size_type fs_n_local() const { assert(xl_fs); return xl_fs->get_local_size(); } - inline size_type rs_n_local() const { assert(xl_fs); return fs_n_local()-n_fixed_vars_local;} -protected: -#if 0 //old interface + + inline size_type fs_n() const { return n_fs; } + inline size_type rs_n() const { return n_rs; } + inline size_type fs_n_local() const + { + assert(xl_fs); + return xl_fs->get_local_size(); + } + inline size_type rs_n_local() const + { + assert(xl_fs); + return fs_n_local() - n_fixed_vars_local; + } + +protected: +#if 0 // old interface void applyToArray (const double* vec_rs, double* vec_fs); void applyInvToArray(const double* vec_fs, double* vec_rs); #endif - void apply_inv_to_vector (const hiopVector* vec_rs, hiopVector* vec_fs); + void apply_inv_to_vector(const hiopVector* vec_rs, hiopVector* vec_fs); void apply_to_vector(const hiopVector* vec_fs, hiopVector* vec_rs); - - void applyToMatrix (const double* M_rs, const int& m_in, double* M_fs); + + void applyToMatrix(const double* M_rs, const int& m_in, double* M_fs); void applyInvToMatrix(const double* M_fs, const int& m_in, double* M_rs); + protected: size_type n_fixed_vars_local; size_type n_fixed_vars; double fixedVarTol; - size_type n_fs; //full-space n - size_type n_rs; //reduced-space n + size_type n_fs; // full-space n + size_type n_rs; // reduced-space n - //working buffer used to hold the full-space (user's) vector of decision variables and other optimiz objects - hiopVector*x_fs, *grad_fs; - //working buffers for the full-space Jacobians + // working buffer used to hold the full-space (user's) vector of decision variables and other optimiz objects + hiopVector *x_fs, *grad_fs; + // working buffers for the full-space Jacobians hiopMatrixDense *Jacc_fs, *Jacd_fs; - - hiopMatrixDense *Jacc_rs_ref; - hiopMatrixDense *Jacd_rs_ref; - //a copy of the lower and upper bounds provided by user - hiopVector*xl_fs, *xu_fs; - //indexes corresponding to fixed variables (local indexes) + hiopMatrixDense* Jacc_rs_ref; + hiopMatrixDense* Jacd_rs_ref; + + // a copy of the lower and upper bounds provided by user + hiopVector *xl_fs, *xu_fs; + // indexes corresponding to fixed variables (local indexes) std::vector fs2rs_idx_map; - //references to reduced-space buffers - returned in applyInvXXX + // references to reduced-space buffers - returned in applyInvXXX hiopVector* x_rs_ref_; hiopVector* grad_rs_ref; #ifdef HIOP_USE_MPI std::vector fs_vec_distrib; MPI_Comm comm; #endif - }; class hiopFixedVarsRelaxer : public hiopNlpTransformation { -public: +public: hiopFixedVarsRelaxer(hiopNlpFormulation* nlp, const hiopVector& xl, const hiopVector& xu, @@ -326,23 +339,24 @@ class hiopFixedVarsRelaxer : public hiopNlpTransformation virtual ~hiopFixedVarsRelaxer(); /* number of vars in the NLP after the tranformation */ - inline size_type n_post() { /*assert(xl_copy);*/ return n_vars; } //xl_copy->get_size(); } + inline size_type n_post() { /*assert(xl_copy);*/ return n_vars; } // xl_copy->get_size(); } /* number of vars in the NLP to which the tranformation is to be applied */ - virtual size_type n_pre () { /*assert(xl_copy);*/ return n_vars; } //xl_copy->get_size(); } + virtual size_type n_pre() { /*assert(xl_copy);*/ return n_vars; } // xl_copy->get_size(); } - inline size_type n_post_local() { return n_vars_local; } //xl_copy->get_local_size(); } - inline size_type n_pre_local() { return n_vars_local; } //xl_copy->get_local_size(); } + inline size_type n_post_local() { return n_vars_local; } // xl_copy->get_local_size(); } + inline size_type n_pre_local() { return n_vars_local; } // xl_copy->get_local_size(); } inline bool setup() { return true; } - void relax(const double& fixed_var_tol, const double& fixed_var_perturb, - hiopVector& xl, hiopVector& xu); + void relax(const double& fixed_var_tol, const double& fixed_var_perturb, hiopVector& xl, hiopVector& xu); + private: - hiopVector*xl_copy, *xu_copy; - size_type n_vars; int n_vars_local; + hiopVector *xl_copy, *xu_copy; + size_type n_vars; + int n_vars_local; }; -/** +/** * @brief Scale the NLP formulation before solving the problem * * scale the NLP objective using the maximum gradient approach. @@ -352,46 +366,44 @@ class hiopNLPObjGradScaling : public hiopNlpTransformation { public: hiopNLPObjGradScaling(hiopNlpFormulation* nlp, - hiopVector& c, - hiopVector& d, + hiopVector& c, + hiopVector& d, hiopVector& gradf, - hiopMatrix& Jac_c, - hiopMatrix& Jac_d, - hiopVectorInt& cons_eq_mapping, + hiopMatrix& Jac_c, + hiopMatrix& Jac_d, + hiopVectorInt& cons_eq_mapping, hiopVectorInt& cons_ineq_mapping); ~hiopNLPObjGradScaling(); + public: /** inherited from the parent class */ /* more setup methods (specific to this class) are defined below */ - virtual inline bool setup() {return true;} + virtual inline bool setup() { return true; } /* number of vars in the NLP after the tranformation */ - inline size_type n_post() { return n_vars; } + inline size_type n_post() { return n_vars; } /* number of vars in the NLP to which the tranformation is to be applied */ - virtual size_type n_pre () { return n_vars; } + virtual size_type n_pre() { return n_vars; } - inline size_type n_post_local() { return n_vars_local; } - inline size_type n_pre_local() { return n_vars_local; } + inline size_type n_post_local() { return n_vars_local; } + inline size_type n_pre_local() { return n_vars_local; } + + inline hiopVector* apply_to_x(hiopVector& x) { return hiopNlpTransformation::apply_to_x(x); } + inline void apply_to_x(hiopVector& x_in, hiopVector& x_out) {} - inline hiopVector* apply_to_x(hiopVector& x) - { - return hiopNlpTransformation::apply_to_x(x); - } - inline void apply_to_x(hiopVector& x_in, hiopVector& x_out){} - /// @brief return the scaling fact for objective - inline double get_obj_scale() const {return scale_factor_obj;} + inline double get_obj_scale() const { return scale_factor_obj; } /* from scaled to unscaled objective*/ - inline double apply_inv_to_obj(double& f_in) { return f_in/scale_factor_obj;} + inline double apply_inv_to_obj(double& f_in) { return f_in / scale_factor_obj; } /* from unscaled to scaled objective*/ - inline double apply_to_obj(double& f_in) { return scale_factor_obj*f_in;} + inline double apply_to_obj(double& f_in) { return scale_factor_obj * f_in; } /* from scaled to unscaled*/ inline hiopVector* apply_inv_to_grad_obj(hiopVector& grad_in) { - grad_in.scale(1./scale_factor_obj); + grad_in.scale(1. / scale_factor_obj); return &grad_in; } @@ -404,48 +416,48 @@ class hiopNLPObjGradScaling : public hiopNlpTransformation /* from scaled to unscaled*/ inline hiopVector* apply_inv_to_cons_eq(hiopVector& c_in, const int& m_in) - { - assert(n_eq==m_in); + { + assert(n_eq == m_in); c_in.componentDiv(*scale_factor_c); return &c_in; } /* from unscaled to scaled*/ - inline hiopVector* apply_to_cons_eq(hiopVector& c_in, const int& m_in) - { - assert(n_eq==m_in); + inline hiopVector* apply_to_cons_eq(hiopVector& c_in, const int& m_in) + { + assert(n_eq == m_in); c_in.componentMult(*scale_factor_c); return &c_in; } - + /* from scaled to unscaled*/ inline hiopVector* apply_inv_to_cons_ineq(hiopVector& d_in, const int& m_in) - { - assert(n_ineq==m_in); + { + assert(n_ineq == m_in); d_in.componentDiv(*scale_factor_d); return &d_in; } /* from unscaled to scaled*/ inline hiopVector* apply_to_cons_ineq(hiopVector& d_in, const int& m_in) - { - assert(n_ineq==m_in); + { + assert(n_ineq == m_in); d_in.componentMult(*scale_factor_d); return &d_in; } /* from scaled to unscaled*/ inline hiopVector* apply_inv_to_cons(hiopVector& cd_in, const int& m_in) - { - assert(n_ineq+n_eq==m_in); + { + assert(n_ineq + n_eq == m_in); cd_in.componentDiv(*scale_factor_cd); return &cd_in; } /* from unscaled to scaled*/ inline hiopVector* apply_to_cons(hiopVector& cd_in, const int& m_in) - { - assert(n_ineq+n_eq==m_in); + { + assert(n_ineq + n_eq == m_in); cd_in.componentMult(*scale_factor_cd); return &cd_in; } @@ -453,7 +465,7 @@ class hiopNLPObjGradScaling : public hiopNlpTransformation /* from scaled to unscaled*/ inline hiopMatrix* apply_inv_to_jacob_eq(hiopMatrix& Jac_in, const int& m_in) { - assert(n_eq==m_in); + assert(n_eq == m_in); Jac_in.scale_row(*scale_factor_c, true); return &Jac_in; } @@ -461,7 +473,7 @@ class hiopNLPObjGradScaling : public hiopNlpTransformation /* from scaled to unscaled*/ inline hiopMatrix* apply_to_jacob_eq(hiopMatrix& Jac_in, const int& m_in) { - assert(n_eq==m_in); + assert(n_eq == m_in); Jac_in.scale_row(*scale_factor_c, false); return &Jac_in; } @@ -469,7 +481,7 @@ class hiopNLPObjGradScaling : public hiopNlpTransformation /* from scaled to unscaled*/ inline hiopMatrix* apply_inv_to_jacob_ineq(hiopMatrix& Jac_in, const int& m_in) { - assert(n_ineq==m_in); + assert(n_ineq == m_in); Jac_in.scale_row(*scale_factor_d, true); return &Jac_in; } @@ -477,7 +489,7 @@ class hiopNLPObjGradScaling : public hiopNlpTransformation /* from scaled to unscaled*/ inline hiopMatrix* apply_to_jacob_ineq(hiopMatrix& Jac_in, const int& m_in) { - assert(n_ineq==m_in); + assert(n_ineq == m_in); Jac_in.scale_row(*scale_factor_d, false); return &Jac_in; } @@ -488,7 +500,7 @@ class hiopNLPObjGradScaling : public hiopNlpTransformation void applyToMatrix (const double* M_rs, const int& m_in, double* M_fs); void applyInvToMatrix(const double* M_fs, const int& m_in, double* M_rs); -#endif +#endif private: size_type n_vars, n_vars_local; @@ -500,12 +512,12 @@ class hiopNLPObjGradScaling : public hiopNlpTransformation hiopMatrix *Jacc_unscaled, *Jacd_unscaled; hiopMatrix *Hess_scaled; hiopMatrix *Hess_unscaled; -#endif // 0 +#endif // 0 }; class hiopBoundsRelaxer : public hiopNlpTransformation { -public: +public: hiopBoundsRelaxer(hiopNlpFormulation* nlp, const hiopVector& xl, const hiopVector& xu, @@ -513,185 +525,168 @@ class hiopBoundsRelaxer : public hiopNlpTransformation const hiopVector& du); virtual ~hiopBoundsRelaxer(); - inline size_type n_post() { /*assert(xl_copy);*/ return n_vars; } - virtual size_type n_pre () { /*assert(xl_copy);*/ return n_vars; } - inline size_type n_post_local() { return n_vars_local; } - inline size_type n_pre_local() { return n_vars_local; } + inline size_type n_post() { /*assert(xl_copy);*/ return n_vars; } + virtual size_type n_pre() { /*assert(xl_copy);*/ return n_vars; } + inline size_type n_post_local() { return n_vars_local; } + inline size_type n_pre_local() { return n_vars_local; } inline bool setup() { return true; } - inline hiopVector* apply_to_x(hiopVector& x) - { - return hiopNlpTransformation::apply_to_x(x); - } + inline hiopVector* apply_to_x(hiopVector& x) { return hiopNlpTransformation::apply_to_x(x); } - inline void apply_to_x(hiopVector& x_in, hiopVector& x_out) - { - } + inline void apply_to_x(hiopVector& x_in, hiopVector& x_out) {} - void relax(const double& bound_relax_perturb, - hiopVector& xl, - hiopVector& xu, - hiopVector& dl, - hiopVector& du); + void relax(const double& bound_relax_perturb, hiopVector& xl, hiopVector& xu, hiopVector& dl, hiopVector& du); - void relax_from_ori(const double& bound_relax_perturb, - hiopVector& xl, - hiopVector& xu, - hiopVector& dl, - hiopVector& du); + void relax_from_ori(const double& bound_relax_perturb, hiopVector& xl, hiopVector& xu, hiopVector& dl, hiopVector& du); private: hiopVector* xl_ori; hiopVector* xu_ori; hiopVector* dl_ori; hiopVector* du_ori; - size_type n_vars; + size_type n_vars; size_type n_vars_local; size_type n_ineq; }; - - class hiopNlpTransformations : public hiopNlpTransformation { public: hiopNlpTransformations(hiopNlpFormulation* nlp) - : hiopNlpTransformation(nlp), - n_vars_usernlp(-1), - n_vars_local_usernlp(-1) - {}; - virtual ~hiopNlpTransformations() + : hiopNlpTransformation(nlp), + n_vars_usernlp(-1), + n_vars_local_usernlp(-1) {}; + virtual ~hiopNlpTransformations() { std::list::iterator it; - for(it=list_trans_.begin(); it!=list_trans_.end(); it++) - delete (*it); + for(it = list_trans_.begin(); it != list_trans_.end(); it++) delete(*it); }; inline bool setup() { return true; } inline void setUserNlpNumVars(const size_type& n_vars) { n_vars_usernlp = n_vars; } inline void setUserNlpNumLocalVars(const size_type& n_vars) { n_vars_local_usernlp = n_vars; } inline void append(hiopNlpTransformation* t) { list_trans_.push_back(t); } - inline void clear() { + inline void clear() + { std::list::iterator it; - for(it=list_trans_.begin(); it!=list_trans_.end(); it++) - delete (*it); - list_trans_.clear(); + for(it = list_trans_.begin(); it != list_trans_.end(); it++) delete(*it); + list_trans_.clear(); } /* number of vars in the NLP after the tranformation */ - inline virtual size_type n_post() - { + inline virtual size_type n_post() + { #ifdef HIOP_DEEPCHECKS - assert(n_vars_usernlp>0); -#endif + assert(n_vars_usernlp > 0); +#endif if(list_trans_.empty()) { return n_vars_usernlp; } else { #ifdef HIOP_DEEPCHECKS - assert(n_vars_usernlp==list_trans_.front()->n_pre()); -#endif - return list_trans_.back()->n_post(); + assert(n_vars_usernlp == list_trans_.front()->n_pre()); +#endif + return list_trans_.back()->n_post(); } } - inline virtual size_type n_post_local() - { + inline virtual size_type n_post_local() + { #ifdef HIOP_DEEPCHECKS - assert(n_vars_usernlp>0); -#endif + assert(n_vars_usernlp > 0); +#endif if(list_trans_.empty()) { return n_vars_local_usernlp; } else { #ifdef HIOP_DEEPCHECKS - assert(n_vars_usernlp==list_trans_.front()->n_pre()); - assert(n_vars_local_usernlp==list_trans_.front()->n_pre_local()); -#endif - return list_trans_.back()->n_post_local(); + assert(n_vars_usernlp == list_trans_.front()->n_pre()); + assert(n_vars_local_usernlp == list_trans_.front()->n_pre_local()); +#endif + return list_trans_.back()->n_post_local(); } } /* number of vars in the NLP to which the tranformation is to be applied */ - inline virtual size_type n_pre() - { + inline virtual size_type n_pre() + { #ifdef HIOP_DEEPCHECKS - assert(n_vars_usernlp>0); -#endif + assert(n_vars_usernlp > 0); +#endif if(list_trans_.empty()) { return n_vars_usernlp; } else { #ifdef HIOP_DEEPCHECKS - assert(n_vars_usernlp==list_trans_.front()->n_pre()); + assert(n_vars_usernlp == list_trans_.front()->n_pre()); #endif - return list_trans_.front()->n_pre(); + return list_trans_.front()->n_pre(); } } /* number of local vars in the NLP to which the tranformation is to be applied */ - inline virtual size_type n_pre_local() - { + inline virtual size_type n_pre_local() + { #ifdef HIOP_DEEPCHECKS - assert(n_vars_usernlp>0); -#endif + assert(n_vars_usernlp > 0); +#endif if(list_trans_.empty()) { return n_vars_local_usernlp; } else { #ifdef HIOP_DEEPCHECKS - assert(n_vars_usernlp==list_trans_.front()->n_pre()); + assert(n_vars_usernlp == list_trans_.front()->n_pre()); #endif - return list_trans_.front()->n_pre_local(); + return list_trans_.front()->n_pre_local(); } } - - hiopVector* apply_inv_to_x(hiopVector& x, const bool& new_x) + + hiopVector* apply_inv_to_x(hiopVector& x, const bool& new_x) { hiopVector* ret = &x; - for(std::list::reverse_iterator it=list_trans_.rbegin(); it!=list_trans_.rend(); ++it) { - ret = (*it)->apply_inv_to_x(*ret ,new_x); + for(std::list::reverse_iterator it = list_trans_.rbegin(); it != list_trans_.rend(); ++it) { + ret = (*it)->apply_inv_to_x(*ret, new_x); } return ret; } virtual hiopVector* apply_to_x(hiopVector& x) - { + { assert(false && "This overload of apply_to_x is not implemented in hiopNlpTransformations class\n"); return nullptr; } - void apply_to_x(hiopVector& x_in, hiopVector& x_out) + void apply_to_x(hiopVector& x_in, hiopVector& x_out) { - for(std::list::iterator it=list_trans_.begin(); it!=list_trans_.end(); ++it) { + for(std::list::iterator it = list_trans_.begin(); it != list_trans_.end(); ++it) { (*it)->apply_to_x(x_in, x_out); } } - double apply_inv_to_obj(double& f_in) + double apply_inv_to_obj(double& f_in) { double ret = f_in; - for(std::list::reverse_iterator it=list_trans_.rbegin(); it!=list_trans_.rend(); ++it) { + for(std::list::reverse_iterator it = list_trans_.rbegin(); it != list_trans_.rend(); ++it) { ret = (*it)->apply_inv_to_obj(ret); } return ret; } - double apply_to_obj(double& f_in) + double apply_to_obj(double& f_in) { double ret = f_in; - for(std::list::iterator it=list_trans_.begin(); it!=list_trans_.end(); ++it) { + for(std::list::iterator it = list_trans_.begin(); it != list_trans_.end(); ++it) { ret = (*it)->apply_to_obj(ret); } return ret; - } - - hiopVector* apply_inv_to_grad_obj(hiopVector& grad_in) + } + + hiopVector* apply_inv_to_grad_obj(hiopVector& grad_in) { hiopVector* ret = &grad_in; - for(std::list::reverse_iterator it=list_trans_.rbegin(); it!=list_trans_.rend(); ++it) { + for(std::list::reverse_iterator it = list_trans_.rbegin(); it != list_trans_.rend(); ++it) { ret = (*it)->apply_inv_to_grad_obj(*ret); } return ret; } - hiopVector* apply_to_grad_obj(hiopVector& grad_in) + hiopVector* apply_to_grad_obj(hiopVector& grad_in) { hiopVector* ret = &grad_in; - for(std::list::iterator it=list_trans_.begin(); it!=list_trans_.end(); ++it) { + for(std::list::iterator it = list_trans_.begin(); it != list_trans_.end(); ++it) { ret = (*it)->apply_to_grad_obj(*ret); } return ret; @@ -700,7 +695,7 @@ class hiopNlpTransformations : public hiopNlpTransformation hiopVector* apply_inv_to_cons_eq(hiopVector& c_in, const int& m_in) { hiopVector* ret = &c_in; - for(std::list::reverse_iterator it=list_trans_.rbegin(); it!=list_trans_.rend(); ++it) { + for(std::list::reverse_iterator it = list_trans_.rbegin(); it != list_trans_.rend(); ++it) { ret = (*it)->apply_inv_to_cons_eq(*ret, m_in); } return ret; @@ -709,7 +704,7 @@ class hiopNlpTransformations : public hiopNlpTransformation hiopVector* apply_to_cons_eq(hiopVector& c_in, const int& m_in) { hiopVector* ret = &c_in; - for(std::list::iterator it=list_trans_.begin(); it!=list_trans_.end(); ++it) { + for(std::list::iterator it = list_trans_.begin(); it != list_trans_.end(); ++it) { ret = (*it)->apply_to_cons_eq(*ret, m_in); } return ret; @@ -718,7 +713,7 @@ class hiopNlpTransformations : public hiopNlpTransformation hiopVector* apply_inv_to_cons_ineq(hiopVector& c_in, const int& m_in) { hiopVector* ret = &c_in; - for(std::list::reverse_iterator it=list_trans_.rbegin(); it!=list_trans_.rend(); ++it) { + for(std::list::reverse_iterator it = list_trans_.rbegin(); it != list_trans_.rend(); ++it) { ret = (*it)->apply_inv_to_cons_ineq(*ret, m_in); } return ret; @@ -727,7 +722,7 @@ class hiopNlpTransformations : public hiopNlpTransformation hiopVector* apply_to_cons_ineq(hiopVector& c_in, const int& m_in) { hiopVector* ret = &c_in; - for(std::list::iterator it=list_trans_.begin(); it!=list_trans_.end(); ++it) { + for(std::list::iterator it = list_trans_.begin(); it != list_trans_.end(); ++it) { ret = (*it)->apply_to_cons_ineq(*ret, m_in); } return ret; @@ -736,7 +731,7 @@ class hiopNlpTransformations : public hiopNlpTransformation hiopVector* apply_inv_to_cons(hiopVector& c_in, const int& m_in) { hiopVector* ret = &c_in; - for(std::list::reverse_iterator it=list_trans_.rbegin(); it!=list_trans_.rend(); ++it) { + for(std::list::reverse_iterator it = list_trans_.rbegin(); it != list_trans_.rend(); ++it) { ret = (*it)->apply_inv_to_cons(*ret, m_in); } return ret; @@ -745,7 +740,7 @@ class hiopNlpTransformations : public hiopNlpTransformation hiopVector* apply_to_cons(hiopVector& c_in, const int& m_in) { hiopVector* ret = &c_in; - for(std::list::iterator it=list_trans_.begin(); it!=list_trans_.end(); ++it) { + for(std::list::iterator it = list_trans_.begin(); it != list_trans_.end(); ++it) { ret = (*it)->apply_to_cons(*ret, m_in); } return ret; @@ -754,7 +749,7 @@ class hiopNlpTransformations : public hiopNlpTransformation hiopMatrix* apply_inv_to_jacob_eq(hiopMatrix& Jac_in, const int& m_in) { hiopMatrix* ret = &Jac_in; - for(std::list::reverse_iterator it=list_trans_.rbegin(); it!=list_trans_.rend(); ++it) { + for(std::list::reverse_iterator it = list_trans_.rbegin(); it != list_trans_.rend(); ++it) { ret = (*it)->apply_inv_to_jacob_eq(*ret, m_in); } return ret; @@ -763,7 +758,7 @@ class hiopNlpTransformations : public hiopNlpTransformation hiopMatrix* apply_to_jacob_eq(hiopMatrix& Jac_in, const int& m_in) { hiopMatrix* ret = &Jac_in; - for(std::list::iterator it=list_trans_.begin(); it!=list_trans_.end(); ++it) { + for(std::list::iterator it = list_trans_.begin(); it != list_trans_.end(); ++it) { ret = (*it)->apply_to_jacob_eq(*ret, m_in); } return ret; @@ -772,7 +767,7 @@ class hiopNlpTransformations : public hiopNlpTransformation hiopMatrix* apply_inv_to_jacob_ineq(hiopMatrix& Jac_in, const int& m_in) { hiopMatrix* ret = &Jac_in; - for(std::list::reverse_iterator it=list_trans_.rbegin(); it!=list_trans_.rend(); ++it) { + for(std::list::reverse_iterator it = list_trans_.rbegin(); it != list_trans_.rend(); ++it) { ret = (*it)->apply_inv_to_jacob_ineq(*ret, m_in); } return ret; @@ -781,17 +776,16 @@ class hiopNlpTransformations : public hiopNlpTransformation hiopMatrix* apply_to_jacob_ineq(hiopMatrix& Jac_in, const int& m_in) { hiopMatrix* ret = &Jac_in; - for(std::list::iterator it=list_trans_.begin(); it!=list_trans_.end(); ++it) { + for(std::list::iterator it = list_trans_.begin(); it != list_trans_.end(); ++it) { ret = (*it)->apply_to_jacob_ineq(*ret, m_in); } return ret; } - private: std::list list_trans_; - size_type n_vars_usernlp, n_vars_local_usernlp; + size_type n_vars_usernlp, n_vars_local_usernlp; }; -} +} // namespace hiop #endif diff --git a/src/Optimization/hiopPDPerturbation.cpp b/src/Optimization/hiopPDPerturbation.cpp index e73125ace..3d99cf6a2 100644 --- a/src/Optimization/hiopPDPerturbation.cpp +++ b/src/Optimization/hiopPDPerturbation.cpp @@ -3,47 +3,47 @@ // Written by Cosmin G. Petra, petra1@llnl.gov. // LLNL-CODE-742473. All rights reserved. // -// This file is part of HiOp. For details, see https://github.com/LLNL/hiop. HiOp -// is released under the BSD 3-clause license (https://opensource.org/licenses/BSD-3-Clause). +// This file is part of HiOp. For details, see https://github.com/LLNL/hiop. HiOp +// is released under the BSD 3-clause license (https://opensource.org/licenses/BSD-3-Clause). // Please also read "Additional BSD Notice" below. // -// Redistribution and use in source and binary forms, with or without modification, +// Redistribution and use in source and binary forms, with or without modification, // are permitted provided that the following conditions are met: -// i. Redistributions of source code must retain the above copyright notice, this list +// i. Redistributions of source code must retain the above copyright notice, this list // of conditions and the disclaimer below. -// ii. Redistributions in binary form must reproduce the above copyright notice, -// this list of conditions and the disclaimer (as noted below) in the documentation and/or +// ii. Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the disclaimer (as noted below) in the documentation and/or // other materials provided with the distribution. -// iii. Neither the name of the LLNS/LLNL nor the names of its contributors may be used to -// endorse or promote products derived from this software without specific prior written +// iii. Neither the name of the LLNS/LLNL nor the names of its contributors may be used to +// endorse or promote products derived from this software without specific prior written // permission. // -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES -// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT -// SHALL LAWRENCE LIVERMORE NATIONAL SECURITY, LLC, THE U.S. DEPARTMENT OF ENERGY OR -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS -// OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED -// AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT +// SHALL LAWRENCE LIVERMORE NATIONAL SECURITY, LLC, THE U.S. DEPARTMENT OF ENERGY OR +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +// OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED +// AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, // EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Additional BSD Notice -// 1. This notice is required to be provided under our contract with the U.S. Department -// of Energy (DOE). This work was produced at Lawrence Livermore National Laboratory under +// 1. This notice is required to be provided under our contract with the U.S. Department +// of Energy (DOE). This work was produced at Lawrence Livermore National Laboratory under // Contract No. DE-AC52-07NA27344 with the DOE. -// 2. Neither the United States Government nor Lawrence Livermore National Security, LLC -// nor any of their employees, makes any warranty, express or implied, or assumes any -// liability or responsibility for the accuracy, completeness, or usefulness of any +// 2. Neither the United States Government nor Lawrence Livermore National Security, LLC +// nor any of their employees, makes any warranty, express or implied, or assumes any +// liability or responsibility for the accuracy, completeness, or usefulness of any // information, apparatus, product, or process disclosed, or represents that its use would // not infringe privately-owned rights. -// 3. Also, reference herein to any specific commercial products, process, or services by -// trade name, trademark, manufacturer or otherwise does not necessarily constitute or -// imply its endorsement, recommendation, or favoring by the United States Government or -// Lawrence Livermore National Security, LLC. The views and opinions of authors expressed -// herein do not necessarily state or reflect those of the United States Government or -// Lawrence Livermore National Security, LLC, and shall not be used for advertising or +// 3. Also, reference herein to any specific commercial products, process, or services by +// trade name, trademark, manufacturer or otherwise does not necessarily constitute or +// imply its endorsement, recommendation, or favoring by the United States Government or +// Lawrence Livermore National Security, LLC. The views and opinions of authors expressed +// herein do not necessarily state or reflect those of the United States Government or +// Lawrence Livermore National Security, LLC, and shall not be used for advertising or // product endorsement purposes. /** @@ -53,7 +53,7 @@ * @author Nai-Yuan Chiang , LLNL * */ - + #include "hiopPDPerturbation.hpp" #include #include @@ -62,209 +62,206 @@ namespace hiop { - /** Initializes and reinitializes object based on the 'options' parameters of the - * 'nlp_' object. - * Returns 'false' if something goes wrong, otherwise 'true' - */ - bool hiopPDPerturbation::initialize(hiopNlpFormulation* nlp) - { - nlp_ = nlp; - delta_w_min_bar_ = nlp->options->GetNumeric("delta_w_min_bar"); - delta_w_max_bar_ = nlp->options->GetNumeric("delta_w_max_bar"); - delta_w_0_bar_ = nlp->options->GetNumeric("delta_0_bar"); - kappa_w_minus_ = nlp->options->GetNumeric("kappa_w_minus"); - kappa_w_plus_bar_= nlp->options->GetNumeric("kappa_w_plus_bar"); - kappa_w_plus_ = nlp->options->GetNumeric("kappa_w_plus"); - delta_c_bar_ = nlp->options->GetNumeric("delta_c_bar"); - kappa_c_ = nlp->options->GetNumeric("kappa_c"); - - if(nullptr == delta_wx_curr_) { - delta_wx_curr_ = nlp_->alloc_primal_vec(); - delta_wd_curr_ = nlp_->alloc_dual_ineq_vec(); - delta_cc_curr_ = nlp_->alloc_dual_eq_vec(); - delta_cd_curr_ = nlp_->alloc_dual_ineq_vec(); - delta_wx_last_ = nlp_->alloc_primal_vec(); - delta_wd_last_ = nlp_->alloc_dual_ineq_vec(); - delta_cc_last_ = nlp_->alloc_dual_eq_vec(); - delta_cd_last_ = nlp_->alloc_dual_ineq_vec(); - } +/** Initializes and reinitializes object based on the 'options' parameters of the + * 'nlp_' object. + * Returns 'false' if something goes wrong, otherwise 'true' + */ +bool hiopPDPerturbation::initialize(hiopNlpFormulation* nlp) +{ + nlp_ = nlp; + delta_w_min_bar_ = nlp->options->GetNumeric("delta_w_min_bar"); + delta_w_max_bar_ = nlp->options->GetNumeric("delta_w_max_bar"); + delta_w_0_bar_ = nlp->options->GetNumeric("delta_0_bar"); + kappa_w_minus_ = nlp->options->GetNumeric("kappa_w_minus"); + kappa_w_plus_bar_ = nlp->options->GetNumeric("kappa_w_plus_bar"); + kappa_w_plus_ = nlp->options->GetNumeric("kappa_w_plus"); + delta_c_bar_ = nlp->options->GetNumeric("delta_c_bar"); + kappa_c_ = nlp->options->GetNumeric("kappa_c"); + + if(nullptr == delta_wx_curr_) { + delta_wx_curr_ = nlp_->alloc_primal_vec(); + delta_wd_curr_ = nlp_->alloc_dual_ineq_vec(); + delta_cc_curr_ = nlp_->alloc_dual_eq_vec(); + delta_cd_curr_ = nlp_->alloc_dual_ineq_vec(); + delta_wx_last_ = nlp_->alloc_primal_vec(); + delta_wd_last_ = nlp_->alloc_dual_ineq_vec(); + delta_cc_last_ = nlp_->alloc_dual_eq_vec(); + delta_cd_last_ = nlp_->alloc_dual_ineq_vec(); + } + + delta_wx_curr_->setToZero(); + delta_wd_curr_->setToZero(); + delta_cc_curr_->setToZero(); + delta_cd_curr_->setToZero(); + delta_wx_last_->setToZero(); + delta_wd_last_->setToZero(); + delta_cc_last_->setToZero(); + delta_cd_last_->setToZero(); - delta_wx_curr_->setToZero(); - delta_wd_curr_->setToZero(); - delta_cc_curr_->setToZero(); - delta_cd_curr_->setToZero(); - delta_wx_last_->setToZero(); - delta_wd_last_->setToZero(); - delta_cc_last_->setToZero(); - delta_cd_last_->setToZero(); + num_degen_iters_ = 0; + + deltas_test_type_ = dttNoTest; + deltas_curr_update_ = None; + return true; +} + +/** Decides degeneracy @hess_degenerate_ and @jac_degenerate_ based on @deltas_test_type_ + * when the @num_degen_iters_ > @num_degen_max_iters_ + */ +void hiopPDPerturbation::update_degeneracy_type() +{ + switch(deltas_test_type_) { + case dttNoTest: + return; + case dttDeltac0Deltaw0: + if(hess_degenerate_ == dtNotEstablished && jac_degenerate_ == dtNotEstablished) { + hess_degenerate_ = dtNotDegenerate; + jac_degenerate_ = dtNotDegenerate; + } else if(hess_degenerate_ == dtNotEstablished) { + hess_degenerate_ = dtNotDegenerate; + } else if(jac_degenerate_ == dtNotEstablished) { + jac_degenerate_ = dtNotDegenerate; + } + break; + case dttDeltacposDeltaw0: + if(hess_degenerate_ == dtNotEstablished) { + hess_degenerate_ = dtNotDegenerate; + } - num_degen_iters_ = 0; + if(jac_degenerate_ == dtNotEstablished) { + num_degen_iters_++; + if(num_degen_iters_ >= num_degen_max_iters_) { + jac_degenerate_ = dtDegenerate; + } + } + break; + case dttDeltac0Deltawpos: + if(jac_degenerate_ == dtNotEstablished) { + jac_degenerate_ = dtNotDegenerate; + } + if(hess_degenerate_ == dtNotEstablished) { + num_degen_iters_++; + if(num_degen_iters_ >= num_degen_max_iters_) { + hess_degenerate_ = dtDegenerate; + } + } + break; + case dttDeltacposDeltawpos: + num_degen_iters_++; + if(num_degen_iters_ >= num_degen_max_iters_) { + hess_degenerate_ = dtDegenerate; + jac_degenerate_ = dtDegenerate; + } + break; + } +} +/** Called when a new linear system is attempted to be factorized + */ +bool hiopPDPerturbationPrimalFirstScalar::compute_initial_deltas() +{ + double delta_temp = 0.0; + double delta_temp2 = 0.0; + update_degeneracy_type(); + + if(delta_wx_curr_db_ > 0.) { + delta_wx_last_db_ = delta_wx_curr_db_; + } + if(delta_wd_curr_db_ > 0.) { + delta_wd_last_db_ = delta_wd_curr_db_; + } + if(delta_cc_curr_db_ > 0.) { + delta_cc_last_db_ = delta_cc_curr_db_; + } + if(delta_cd_curr_db_ > 0.) { + delta_cd_last_db_ = delta_cd_curr_db_; + } + + set_delta_last_vec(PDUpdate); + + if(hess_degenerate_ == dtNotEstablished || jac_degenerate_ == dtNotEstablished) { + deltas_test_type_ = dttDeltac0Deltaw0; + } else { deltas_test_type_ = dttNoTest; - deltas_curr_update_ = None; - return true; } - /** Decides degeneracy @hess_degenerate_ and @jac_degenerate_ based on @deltas_test_type_ - * when the @num_degen_iters_ > @num_degen_max_iters_ - */ - void hiopPDPerturbation::update_degeneracy_type() - { - switch (deltas_test_type_) { - case dttNoTest: - return; - case dttDeltac0Deltaw0: - if(hess_degenerate_ == dtNotEstablished && - jac_degenerate_ == dtNotEstablished) { - hess_degenerate_ = dtNotDegenerate; - jac_degenerate_ = dtNotDegenerate; - } else if(hess_degenerate_ == dtNotEstablished) { - hess_degenerate_ = dtNotDegenerate; - } else if(jac_degenerate_ == dtNotEstablished) { - jac_degenerate_ = dtNotDegenerate; - } - break; - case dttDeltacposDeltaw0: - if(hess_degenerate_ == dtNotEstablished) { - hess_degenerate_ = dtNotDegenerate; - } - - if(jac_degenerate_ == dtNotEstablished) { - num_degen_iters_++; - if(num_degen_iters_ >= num_degen_max_iters_) { - jac_degenerate_ = dtDegenerate; - } - } - break; - case dttDeltac0Deltawpos: - if(jac_degenerate_ == dtNotEstablished) { - jac_degenerate_ = dtNotDegenerate; - - } - if(hess_degenerate_ == dtNotEstablished) { - num_degen_iters_++; - if(num_degen_iters_ >= num_degen_max_iters_) { - hess_degenerate_ = dtDegenerate; - } - } - break; - case dttDeltacposDeltawpos: - num_degen_iters_++; - if(num_degen_iters_ >= num_degen_max_iters_) { - hess_degenerate_ = dtDegenerate; - jac_degenerate_ = dtDegenerate; - } - break; - } - } - - /** Called when a new linear system is attempted to be factorized - */ - bool hiopPDPerturbationPrimalFirstScalar::compute_initial_deltas() - { - double delta_temp = 0.0; - double delta_temp2 = 0.0; - update_degeneracy_type(); - - if(delta_wx_curr_db_>0.) { - delta_wx_last_db_ = delta_wx_curr_db_; - } - if(delta_wd_curr_db_>0.) { - delta_wd_last_db_ = delta_wd_curr_db_; - } - if(delta_cc_curr_db_>0.) { - delta_cc_last_db_ = delta_cc_curr_db_; - } - if(delta_cd_curr_db_>0.) { - delta_cd_last_db_= delta_cd_curr_db_; + if(jac_degenerate_ == dtDegenerate) { + delta_temp = compute_delta_c(mu_); + } else { + delta_temp = 0.0; + } + delta_cc_curr_db_ = delta_temp; + delta_cd_curr_db_ = delta_temp; + + if(hess_degenerate_ == dtDegenerate) { + delta_wx_curr_db_ = 0.; + delta_wd_curr_db_ = 0.; + if(!guts_of_compute_perturb_wrong_inertia(delta_temp, delta_temp2)) { + return false; } + } else { + delta_temp = delta_temp2 = 0.; + } + delta_wx_curr_db_ = delta_temp; + delta_wd_curr_db_ = delta_temp2; - set_delta_last_vec(PDUpdate); + set_delta_curr_vec(PDUpdate); - if(hess_degenerate_ == dtNotEstablished || jac_degenerate_ == dtNotEstablished) { - deltas_test_type_ = dttDeltac0Deltaw0; - } else { - deltas_test_type_ = dttNoTest; - } + deltas_curr_update_ = Initialized; + return true; +} - if(jac_degenerate_ == dtDegenerate) { - delta_temp = compute_delta_c(mu_); - } else { - delta_temp = 0.0; - } - delta_cc_curr_db_ = delta_temp; - delta_cd_curr_db_ = delta_temp; +/** Method for correcting inertia */ +bool hiopPDPerturbationPrimalFirstScalar::compute_perturb_wrong_inertia() +{ + update_degeneracy_type(); + assert(delta_wx_curr_db_ == delta_wd_curr_db_); + assert(delta_cc_curr_db_ == delta_cd_curr_db_); + + double delta_wx_temp{0.0}; + double delta_wd_temp{0.0}; + + bool ret = guts_of_compute_perturb_wrong_inertia(delta_wx_temp, delta_wd_temp); + if(!ret && delta_cc_curr_db_ == 0.) { + delta_wx_curr_db_ = delta_wd_curr_db_ = 0.; + delta_cc_curr_db_ = delta_cd_curr_db_ = compute_delta_c(mu_); + deltas_test_type_ = dttNoTest; if(hess_degenerate_ == dtDegenerate) { - delta_wx_curr_db_ = 0.; - delta_wd_curr_db_ = 0.; - if(!guts_of_compute_perturb_wrong_inertia(delta_temp, delta_temp2)) { - return false; - } - } else { - delta_temp = delta_temp2 = 0.; + hess_degenerate_ = dtNotEstablished; } - delta_wx_curr_db_ = delta_temp; - delta_wd_curr_db_ = delta_temp2; - + ret = guts_of_compute_perturb_wrong_inertia(delta_wx_temp, delta_wd_temp); set_delta_curr_vec(PDUpdate); - - deltas_curr_update_ = Initialized; - return true; - } - - /** Method for correcting inertia */ - bool hiopPDPerturbationPrimalFirstScalar::compute_perturb_wrong_inertia() - { - update_degeneracy_type(); - - assert(delta_wx_curr_db_ == delta_wd_curr_db_); - assert(delta_cc_curr_db_ == delta_cd_curr_db_); - - double delta_wx_temp{0.0}; - double delta_wd_temp{0.0}; - - bool ret = guts_of_compute_perturb_wrong_inertia(delta_wx_temp, delta_wd_temp); - if(!ret && delta_cc_curr_db_ == 0.) { - delta_wx_curr_db_ = delta_wd_curr_db_ = 0.; - delta_cc_curr_db_ = delta_cd_curr_db_ = compute_delta_c(mu_); - deltas_test_type_ = dttNoTest; - if(hess_degenerate_ == dtDegenerate) { - hess_degenerate_ = dtNotEstablished; - } - ret = guts_of_compute_perturb_wrong_inertia(delta_wx_temp, delta_wd_temp); - set_delta_curr_vec(PDUpdate); - } else { - set_delta_curr_vec(PrimalUpdate); - } - - nlp_->log->printf(hovScalars, - "primal regularization: %12.5e, dual regularization: %12.5e \n", - delta_wx_curr_db_,delta_cc_curr_db_); - return ret; + } else { + set_delta_curr_vec(PrimalUpdate); } - /** Method for correcting singular Jacobian - * (follows Ipopt closely since the paper seems to be outdated) - */ - bool hiopPDPerturbationPrimalFirstScalar::compute_perturb_singularity() - { - assert(delta_wx_curr_db_ == delta_wd_curr_db_); - assert(delta_cc_curr_db_ == delta_cd_curr_db_); - double delta_wx_temp{0.0}; - double delta_wd_temp{0.0}; - bool bret = true; - - if (hess_degenerate_ == dtNotEstablished || - jac_degenerate_ == dtNotEstablished) { - switch (deltas_test_type_) { + nlp_->log->printf(hovScalars, + "primal regularization: %12.5e, dual regularization: %12.5e \n", + delta_wx_curr_db_, + delta_cc_curr_db_); + return ret; +} + +/** Method for correcting singular Jacobian + * (follows Ipopt closely since the paper seems to be outdated) + */ +bool hiopPDPerturbationPrimalFirstScalar::compute_perturb_singularity() +{ + assert(delta_wx_curr_db_ == delta_wd_curr_db_); + assert(delta_cc_curr_db_ == delta_cd_curr_db_); + double delta_wx_temp{0.0}; + double delta_wd_temp{0.0}; + bool bret = true; + + if(hess_degenerate_ == dtNotEstablished || jac_degenerate_ == dtNotEstablished) { + switch(deltas_test_type_) { case dttDeltac0Deltaw0: - //this is the first call - if (jac_degenerate_ == dtNotEstablished) { + // this is the first call + if(jac_degenerate_ == dtNotEstablished) { delta_cc_curr_db_ = delta_cd_curr_db_ = compute_delta_c(mu_); deltas_test_type_ = dttDeltacposDeltaw0; - } - else { + } else { assert(hess_degenerate_ == dtNotEstablished); if(!guts_of_compute_perturb_wrong_inertia(delta_wx_temp, delta_wd_temp)) { bret = false; @@ -300,413 +297,404 @@ namespace hiop break; case dttNoTest: assert(false && "something went wrong - should not get here"); + } + } else { + if(delta_cc_curr_db_ > 0.) { + // If we already used a perturbation for the constraints, we do + // the same thing as if we were encountering negative curvature + if(!guts_of_compute_perturb_wrong_inertia(delta_wx_temp, delta_wd_temp)) { + // todo: need some error message (so that we know what and more important + // where something went wrong) + bret = false; } } else { - if(delta_cc_curr_db_ > 0.) { - // If we already used a perturbation for the constraints, we do - // the same thing as if we were encountering negative curvature - if(!guts_of_compute_perturb_wrong_inertia(delta_wx_temp, delta_wd_temp)) { - //todo: need some error message (so that we know what and more important - //where something went wrong) - bret = false; - } - } else { - // Otherwise we now perturb the Jacobian part - delta_cd_curr_db_ = delta_cc_curr_db_ = compute_delta_c(mu_); - } + // Otherwise we now perturb the Jacobian part + delta_cd_curr_db_ = delta_cc_curr_db_ = compute_delta_c(mu_); } - - set_delta_curr_vec(PDUpdate); - - nlp_->log->printf(hovScalars, - "primal regularization: %12.5e, dual regularization: %12.5e \n", - delta_wx_curr_db_,delta_cc_curr_db_); - - return bret; } - /** - * Internal method implementing the computation of delta_w's to correct wrong inertia - */ - bool hiopPDPerturbationPrimalFirstScalar::guts_of_compute_perturb_wrong_inertia(double& delta_wx, double& delta_wd) - { - assert(delta_wx_curr_db_ == delta_wd_curr_db_ && "these should be equal"); - assert(delta_wx_last_db_ == delta_wd_last_db_ && "these should be equal"); - if(delta_wx_curr_db_ == 0.) { - if(delta_wx_last_db_ == 0.) { - delta_wx_curr_db_ = delta_w_0_bar_; - } else { - delta_wx_curr_db_ = std::fmax(delta_w_min_bar_, delta_wx_last_db_*kappa_w_minus_); - } - } else { //delta_wx_curr_ != 0. - if(delta_wx_last_db_==0. || 1e5*delta_wx_last_db_ delta_w_max_bar_) { - //Hessian perturbation becoming too large - delta_wx_last_db_ = delta_wd_last_db_ = 0.; - set_delta_last_vec(PrimalUpdate); - return false; - } - - return true; - } - - double hiopPDPerturbationPrimalFirstScalar::compute_delta_c(const double& mu) const - { - return delta_c_bar_ * std::pow(mu, kappa_c_); - } + nlp_->log->printf(hovScalars, + "primal regularization: %12.5e, dual regularization: %12.5e \n", + delta_wx_curr_db_, + delta_cc_curr_db_); - bool hiopPDPerturbationPrimalFirstScalar::check_consistency() - { - return (delta_wx_curr_db_ == delta_wd_curr_db_) && (delta_cc_curr_db_ == delta_cd_curr_db_); - } + return bret; +} - void hiopPDPerturbationPrimalFirstScalar::set_delta_curr_vec(DeltasUpdateType taskid) - { - deltas_curr_update_ = taskid; - if(DualUpdate == taskid) { - // only update dual deltas - delta_cc_curr_->setToConstant(delta_cc_curr_db_); - delta_cd_curr_->setToConstant(delta_cd_curr_db_); - } else if(PrimalUpdate == taskid) { - // only update primal deltas - delta_wx_curr_->setToConstant(delta_wx_curr_db_); - delta_wd_curr_->setToConstant(delta_wd_curr_db_); +/** + * Internal method implementing the computation of delta_w's to correct wrong inertia + */ +bool hiopPDPerturbationPrimalFirstScalar::guts_of_compute_perturb_wrong_inertia(double& delta_wx, double& delta_wd) +{ + assert(delta_wx_curr_db_ == delta_wd_curr_db_ && "these should be equal"); + assert(delta_wx_last_db_ == delta_wd_last_db_ && "these should be equal"); + if(delta_wx_curr_db_ == 0.) { + if(delta_wx_last_db_ == 0.) { + delta_wx_curr_db_ = delta_w_0_bar_; } else { - // update all deltas - delta_cc_curr_->setToConstant(delta_cc_curr_db_); - delta_cd_curr_->setToConstant(delta_cd_curr_db_); - delta_wx_curr_->setToConstant(delta_wx_curr_db_); - delta_wd_curr_->setToConstant(delta_wd_curr_db_); + delta_wx_curr_db_ = std::fmax(delta_w_min_bar_, delta_wx_last_db_ * kappa_w_minus_); } - } - - void hiopPDPerturbationPrimalFirstScalar::set_delta_last_vec(DeltasUpdateType taskid) - { - if(DualUpdate == taskid) { - // only update dual deltas - delta_cc_last_->setToConstant(delta_cc_last_db_); - delta_cd_last_->setToConstant(delta_cd_last_db_); - } else if(PrimalUpdate == taskid) { - // only update primal deltas - delta_wx_last_->setToConstant(delta_wx_last_db_); - delta_wd_last_->setToConstant(delta_wd_last_db_); + } else { // delta_wx_curr_ != 0. + if(delta_wx_last_db_ == 0. || 1e5 * delta_wx_last_db_ < delta_wx_curr_db_) { + delta_wx_curr_db_ = kappa_w_plus_bar_ * delta_wx_curr_db_; } else { - // update all deltas - delta_cc_last_->setToConstant(delta_cc_last_db_); - delta_cd_last_->setToConstant(delta_cd_last_db_); - delta_wx_last_->setToConstant(delta_wx_last_db_); - delta_wd_last_->setToConstant(delta_wd_last_db_); + delta_wx_curr_db_ = kappa_w_plus_ * delta_wx_curr_db_; } } + delta_wd_curr_db_ = delta_wx_curr_db_; + set_delta_curr_vec(PrimalUpdate); + + if(delta_wx_curr_db_ > delta_w_max_bar_) { + // Hessian perturbation becoming too large + delta_wx_last_db_ = delta_wd_last_db_ = 0.; + set_delta_last_vec(PrimalUpdate); + return false; + } + return true; +} - /* - * class hiopPDPerturbationPrimalFirstRand - */ - void hiopPDPerturbationPrimalFirstRand::set_delta_last_vec(DeltasUpdateType taskid) - { - if(DualUpdate == taskid) { - // only update dual deltas - delta_cc_last_->set_to_random_uniform(min_uniform_ratio_*delta_cc_last_db_, max_uniform_ratio_*delta_cc_last_db_); - delta_cd_last_->set_to_random_uniform(min_uniform_ratio_*delta_cd_last_db_, max_uniform_ratio_*delta_cd_last_db_); - } else if(PrimalUpdate == taskid) { - // only update primal deltas - delta_wx_last_->set_to_random_uniform(min_uniform_ratio_*delta_wx_last_db_, max_uniform_ratio_*delta_wx_last_db_); - delta_wd_last_->set_to_random_uniform(min_uniform_ratio_*delta_wd_last_db_, max_uniform_ratio_*delta_wd_last_db_); - } else { - // update all deltas - delta_wx_last_->set_to_random_uniform(min_uniform_ratio_*delta_wx_last_db_, max_uniform_ratio_*delta_wx_last_db_); - delta_wd_last_->set_to_random_uniform(min_uniform_ratio_*delta_wd_last_db_, max_uniform_ratio_*delta_wd_last_db_); - delta_cc_last_->set_to_random_uniform(min_uniform_ratio_*delta_cc_last_db_, max_uniform_ratio_*delta_cc_last_db_); - delta_cd_last_->set_to_random_uniform(min_uniform_ratio_*delta_cd_last_db_, max_uniform_ratio_*delta_cd_last_db_); - } - } +double hiopPDPerturbationPrimalFirstScalar::compute_delta_c(const double& mu) const +{ + return delta_c_bar_ * std::pow(mu, kappa_c_); +} - void hiopPDPerturbationPrimalFirstRand::set_delta_curr_vec(DeltasUpdateType taskid) - { - deltas_curr_update_ = taskid; - if(DualUpdate == taskid) { - // only update dual deltas - delta_cc_curr_->set_to_random_uniform(min_uniform_ratio_*delta_cc_curr_db_, max_uniform_ratio_*delta_cc_curr_db_); - delta_cd_curr_->set_to_random_uniform(min_uniform_ratio_*delta_cd_curr_db_, max_uniform_ratio_*delta_cd_curr_db_); - } else if(PrimalUpdate == taskid) { - // only update primal deltas - delta_wx_curr_->set_to_random_uniform(min_uniform_ratio_*delta_wx_curr_db_, max_uniform_ratio_*delta_wx_curr_db_); - delta_wd_curr_->set_to_random_uniform(min_uniform_ratio_*delta_wd_curr_db_, max_uniform_ratio_*delta_wd_curr_db_); - } else { - // update all deltas - delta_wx_curr_->set_to_random_uniform(min_uniform_ratio_*delta_wx_curr_db_, max_uniform_ratio_*delta_wx_curr_db_); - delta_wd_curr_->set_to_random_uniform(min_uniform_ratio_*delta_wd_curr_db_, max_uniform_ratio_*delta_wd_curr_db_); - delta_cc_curr_->set_to_random_uniform(min_uniform_ratio_*delta_cc_curr_db_, max_uniform_ratio_*delta_cc_curr_db_); - delta_cd_curr_->set_to_random_uniform(min_uniform_ratio_*delta_cd_curr_db_, max_uniform_ratio_*delta_cd_curr_db_); - } +bool hiopPDPerturbationPrimalFirstScalar::check_consistency() +{ + return (delta_wx_curr_db_ == delta_wd_curr_db_) && (delta_cc_curr_db_ == delta_cd_curr_db_); +} + +void hiopPDPerturbationPrimalFirstScalar::set_delta_curr_vec(DeltasUpdateType taskid) +{ + deltas_curr_update_ = taskid; + if(DualUpdate == taskid) { + // only update dual deltas + delta_cc_curr_->setToConstant(delta_cc_curr_db_); + delta_cd_curr_->setToConstant(delta_cd_curr_db_); + } else if(PrimalUpdate == taskid) { + // only update primal deltas + delta_wx_curr_->setToConstant(delta_wx_curr_db_); + delta_wd_curr_->setToConstant(delta_wd_curr_db_); + } else { + // update all deltas + delta_cc_curr_->setToConstant(delta_cc_curr_db_); + delta_cd_curr_->setToConstant(delta_cd_curr_db_); + delta_wx_curr_->setToConstant(delta_wx_curr_db_); + delta_wd_curr_->setToConstant(delta_wd_curr_db_); } +} +void hiopPDPerturbationPrimalFirstScalar::set_delta_last_vec(DeltasUpdateType taskid) +{ + if(DualUpdate == taskid) { + // only update dual deltas + delta_cc_last_->setToConstant(delta_cc_last_db_); + delta_cd_last_->setToConstant(delta_cd_last_db_); + } else if(PrimalUpdate == taskid) { + // only update primal deltas + delta_wx_last_->setToConstant(delta_wx_last_db_); + delta_wd_last_->setToConstant(delta_wd_last_db_); + } else { + // update all deltas + delta_cc_last_->setToConstant(delta_cc_last_db_); + delta_cd_last_->setToConstant(delta_cd_last_db_); + delta_wx_last_->setToConstant(delta_wx_last_db_); + delta_wd_last_->setToConstant(delta_wd_last_db_); + } +} +/* + * class hiopPDPerturbationPrimalFirstRand + */ +void hiopPDPerturbationPrimalFirstRand::set_delta_last_vec(DeltasUpdateType taskid) +{ + if(DualUpdate == taskid) { + // only update dual deltas + delta_cc_last_->set_to_random_uniform(min_uniform_ratio_ * delta_cc_last_db_, max_uniform_ratio_ * delta_cc_last_db_); + delta_cd_last_->set_to_random_uniform(min_uniform_ratio_ * delta_cd_last_db_, max_uniform_ratio_ * delta_cd_last_db_); + } else if(PrimalUpdate == taskid) { + // only update primal deltas + delta_wx_last_->set_to_random_uniform(min_uniform_ratio_ * delta_wx_last_db_, max_uniform_ratio_ * delta_wx_last_db_); + delta_wd_last_->set_to_random_uniform(min_uniform_ratio_ * delta_wd_last_db_, max_uniform_ratio_ * delta_wd_last_db_); + } else { + // update all deltas + delta_wx_last_->set_to_random_uniform(min_uniform_ratio_ * delta_wx_last_db_, max_uniform_ratio_ * delta_wx_last_db_); + delta_wd_last_->set_to_random_uniform(min_uniform_ratio_ * delta_wd_last_db_, max_uniform_ratio_ * delta_wd_last_db_); + delta_cc_last_->set_to_random_uniform(min_uniform_ratio_ * delta_cc_last_db_, max_uniform_ratio_ * delta_cc_last_db_); + delta_cd_last_->set_to_random_uniform(min_uniform_ratio_ * delta_cd_last_db_, max_uniform_ratio_ * delta_cd_last_db_); + } +} +void hiopPDPerturbationPrimalFirstRand::set_delta_curr_vec(DeltasUpdateType taskid) +{ + deltas_curr_update_ = taskid; + if(DualUpdate == taskid) { + // only update dual deltas + delta_cc_curr_->set_to_random_uniform(min_uniform_ratio_ * delta_cc_curr_db_, max_uniform_ratio_ * delta_cc_curr_db_); + delta_cd_curr_->set_to_random_uniform(min_uniform_ratio_ * delta_cd_curr_db_, max_uniform_ratio_ * delta_cd_curr_db_); + } else if(PrimalUpdate == taskid) { + // only update primal deltas + delta_wx_curr_->set_to_random_uniform(min_uniform_ratio_ * delta_wx_curr_db_, max_uniform_ratio_ * delta_wx_curr_db_); + delta_wd_curr_->set_to_random_uniform(min_uniform_ratio_ * delta_wd_curr_db_, max_uniform_ratio_ * delta_wd_curr_db_); + } else { + // update all deltas + delta_wx_curr_->set_to_random_uniform(min_uniform_ratio_ * delta_wx_curr_db_, max_uniform_ratio_ * delta_wx_curr_db_); + delta_wd_curr_->set_to_random_uniform(min_uniform_ratio_ * delta_wd_curr_db_, max_uniform_ratio_ * delta_wd_curr_db_); + delta_cc_curr_->set_to_random_uniform(min_uniform_ratio_ * delta_cc_curr_db_, max_uniform_ratio_ * delta_cc_curr_db_); + delta_cd_curr_->set_to_random_uniform(min_uniform_ratio_ * delta_cd_curr_db_, max_uniform_ratio_ * delta_cd_curr_db_); + } +} - /* - * class hiopPDPerturbationDualFirstScalar - */ - hiopPDPerturbationDualFirstScalar::hiopPDPerturbationDualFirstScalar() +/* + * class hiopPDPerturbationDualFirstScalar + */ +hiopPDPerturbationDualFirstScalar::hiopPDPerturbationDualFirstScalar() : hiopPDPerturbation(), delta_c_min_bar_(1e-20), kappa_c_plus_(10.) - { - } +{} + +hiopPDPerturbationDualFirstScalar::~hiopPDPerturbationDualFirstScalar() {} - hiopPDPerturbationDualFirstScalar::~hiopPDPerturbationDualFirstScalar() - { +bool hiopPDPerturbationDualFirstScalar::compute_initial_deltas() +{ + update_degeneracy_type(); + + if(delta_wx_curr_db_ > 0.) { + delta_wx_last_db_ = delta_wx_curr_db_; + } + if(delta_wd_curr_db_ > 0.) { + delta_wd_last_db_ = delta_wd_curr_db_; + } + if(delta_cc_curr_db_ > 0.) { + delta_cc_last_db_ = delta_cc_curr_db_; + } + if(delta_cd_curr_db_ > 0.) { + delta_cd_last_db_ = delta_cd_curr_db_; } - bool hiopPDPerturbationDualFirstScalar::compute_initial_deltas() - { - update_degeneracy_type(); - - if(delta_wx_curr_db_>0.) { - delta_wx_last_db_ = delta_wx_curr_db_; - } - if(delta_wd_curr_db_>0.) { - delta_wd_last_db_ = delta_wd_curr_db_; - } - if(delta_cc_curr_db_>0.) { - delta_cc_last_db_ = delta_cc_curr_db_; - } - if(delta_cd_curr_db_>0.) { - delta_cd_last_db_= delta_cd_curr_db_; - } + if(hess_degenerate_ == dtNotEstablished || jac_degenerate_ == dtNotEstablished) { + deltas_test_type_ = dttDeltac0Deltaw0; + } else { + deltas_test_type_ = dttNoTest; + } - if(hess_degenerate_ == dtNotEstablished || jac_degenerate_ == dtNotEstablished) { - deltas_test_type_ = dttDeltac0Deltaw0; - } else { - deltas_test_type_ = dttNoTest; + delta_cc_curr_db_ = delta_cd_curr_db_ = 0.; + if(jac_degenerate_ == dtDegenerate) { + if(!compute_dual_perturb_impl(mu_)) { + return false; } + } - delta_cc_curr_db_ = delta_cd_curr_db_ = 0.; - if(jac_degenerate_ == dtDegenerate) { - if(!compute_dual_perturb_impl(mu_)) { - return false; - } - } - - delta_wx_curr_db_ = delta_wd_curr_db_ = 0.; - if(hess_degenerate_ == dtDegenerate) { - if(!compute_primal_perturb_impl()) { - return false; - } + delta_wx_curr_db_ = delta_wd_curr_db_ = 0.; + if(hess_degenerate_ == dtDegenerate) { + if(!compute_primal_perturb_impl()) { + return false; } + } - set_delta_curr_vec(PDUpdate); - set_delta_last_vec(PDUpdate); - - deltas_curr_update_ = Initialized; + set_delta_curr_vec(PDUpdate); + set_delta_last_vec(PDUpdate); - return true; - } + deltas_curr_update_ = Initialized; - bool hiopPDPerturbationDualFirstScalar::compute_perturb_wrong_inertia() - { - /** - * for normal equation, wrong inertia means the KKT 1x1 matrix is not PD - * we try to corret the dual regularization first, and then primal regularizaion - * */ - update_degeneracy_type(); + return true; +} - assert(delta_wx_curr_db_ == delta_wd_curr_db_); - assert(delta_cc_curr_db_ == delta_cd_curr_db_); +bool hiopPDPerturbationDualFirstScalar::compute_perturb_wrong_inertia() +{ + /** + * for normal equation, wrong inertia means the KKT 1x1 matrix is not PD + * we try to corret the dual regularization first, and then primal regularizaion + * */ + update_degeneracy_type(); - bool ret = compute_dual_perturb_impl(mu_); - if(!ret && delta_wx_curr_db_==0.) { - delta_cc_curr_db_ = delta_cd_curr_db_ = 0.; - ret = compute_primal_perturb_impl(); - if(!ret) { - return ret; - } - deltas_test_type_ = dttNoTest; - if(jac_degenerate_ == dtDegenerate) { - jac_degenerate_ = dtNotEstablished; - } - ret = compute_dual_perturb_impl(mu_); + assert(delta_wx_curr_db_ == delta_wd_curr_db_); + assert(delta_cc_curr_db_ == delta_cd_curr_db_); - set_delta_curr_vec(PrimalUpdate); - } - - set_delta_curr_vec(DualUpdate); - - nlp_->log->printf(hovScalars, - "primal regularization (mean): %12.5e, dual regularization (mean): %12.5e \n", - delta_wx_curr_db_,delta_cc_curr_db_); - return ret; - } - - bool hiopPDPerturbationDualFirstScalar::compute_perturb_singularity() - { - /** - * we try to corret the dual regularization first, and then primal regularizaion - * same implementation as hiopPDPerturbationDualFirstScalar::compute_perturb_wrong_inertia - */ - return compute_perturb_wrong_inertia(); - } - - bool hiopPDPerturbationDualFirstScalar::compute_dual_perturb_impl(const double& mu) - { - assert(delta_cc_curr_db_ == delta_cd_curr_db_ && "these should be equal"); - assert(delta_cc_last_db_ == delta_cd_last_db_ && "these should be equal"); - - if(delta_cc_curr_db_ == 0.) { - if(delta_cc_last_db_ == 0.) { - delta_cc_curr_db_ = std::fmax(delta_c_min_bar_, delta_c_bar_ * std::pow(mu, kappa_c_)); - } else { - delta_cc_curr_db_ = std::fmax(delta_c_min_bar_, delta_cc_last_db_*kappa_w_minus_); - } - } else { //delta_cc_curr_db_ != 0. - if(delta_cc_last_db_==0. || 1e5*delta_cc_last_db_ delta_w_max_bar_) { - //dual perturbation becoming too large - delta_cc_last_db_ = delta_cd_last_db_ = 0.; - set_delta_last_vec(DualUpdate); - return false; + deltas_test_type_ = dttNoTest; + if(jac_degenerate_ == dtDegenerate) { + jac_degenerate_ = dtNotEstablished; } + ret = compute_dual_perturb_impl(mu_); - return true; + set_delta_curr_vec(PrimalUpdate); } - bool hiopPDPerturbationDualFirstScalar::compute_primal_perturb_impl() - { - assert(delta_wx_curr_db_ == delta_wd_curr_db_ && "these should be equal"); - assert(delta_wx_last_db_ == delta_wd_last_db_ && "these should be equal"); - bool bval = true; - - if(delta_wx_curr_db_ == 0.) { - if(delta_wx_last_db_ == 0.) { - delta_wx_curr_db_ = delta_w_0_bar_; - } else { - delta_wx_curr_db_ = std::fmax(delta_w_min_bar_, delta_wx_last_db_*kappa_w_minus_); - } - } else { //delta_wx_curr_ != 0. - if(delta_wx_last_db_==0. || 1e5*delta_wx_last_db_ delta_w_max_bar_) { - //Hessian perturbation becoming too large - delta_wx_last_db_ = delta_wd_last_db_ = 0.; - set_delta_last_vec(PrimalUpdate); - bval = false; - } + set_delta_curr_vec(DualUpdate); - return bval; - } + nlp_->log->printf(hovScalars, + "primal regularization (mean): %12.5e, dual regularization (mean): %12.5e \n", + delta_wx_curr_db_, + delta_cc_curr_db_); + return ret; +} - bool hiopPDPerturbationDualFirstScalar::check_consistency() - { - return (delta_wx_curr_db_ == delta_wd_curr_db_) && (delta_cc_curr_db_ == delta_cd_curr_db_); - } +bool hiopPDPerturbationDualFirstScalar::compute_perturb_singularity() +{ + /** + * we try to corret the dual regularization first, and then primal regularizaion + * same implementation as hiopPDPerturbationDualFirstScalar::compute_perturb_wrong_inertia + */ + return compute_perturb_wrong_inertia(); +} + +bool hiopPDPerturbationDualFirstScalar::compute_dual_perturb_impl(const double& mu) +{ + assert(delta_cc_curr_db_ == delta_cd_curr_db_ && "these should be equal"); + assert(delta_cc_last_db_ == delta_cd_last_db_ && "these should be equal"); - void hiopPDPerturbationDualFirstScalar::set_delta_curr_vec(DeltasUpdateType taskid) - { - deltas_curr_update_ = taskid; - if(DualUpdate == taskid) { - // only update dual deltas - delta_cc_curr_->setToConstant(delta_cc_curr_db_); - delta_cd_curr_->setToConstant(delta_cd_curr_db_); - } else if(PrimalUpdate == taskid) { - // only update primal deltas - delta_wx_curr_->setToConstant(delta_wx_curr_db_); - delta_wd_curr_->setToConstant(delta_wd_curr_db_); + if(delta_cc_curr_db_ == 0.) { + if(delta_cc_last_db_ == 0.) { + delta_cc_curr_db_ = std::fmax(delta_c_min_bar_, delta_c_bar_ * std::pow(mu, kappa_c_)); } else { - // update all deltas - delta_cc_curr_->setToConstant(delta_cc_curr_db_); - delta_cd_curr_->setToConstant(delta_cd_curr_db_); - delta_wx_curr_->setToConstant(delta_wx_curr_db_); - delta_wd_curr_->setToConstant(delta_wd_curr_db_); + delta_cc_curr_db_ = std::fmax(delta_c_min_bar_, delta_cc_last_db_ * kappa_w_minus_); } - } - - void hiopPDPerturbationDualFirstScalar::set_delta_last_vec(DeltasUpdateType taskid) - { - if(DualUpdate == taskid) { - // only update dual deltas - delta_cc_last_->setToConstant(delta_cc_last_db_); - delta_cd_last_->setToConstant(delta_cd_last_db_); - } else if(PrimalUpdate == taskid) { - // only update primal deltas - delta_wx_last_->setToConstant(delta_wx_last_db_); - delta_wd_last_->setToConstant(delta_wd_last_db_); + } else { // delta_cc_curr_db_ != 0. + if(delta_cc_last_db_ == 0. || 1e5 * delta_cc_last_db_ < delta_cc_curr_db_) { + delta_cc_curr_db_ = kappa_w_plus_bar_ * delta_cc_curr_db_; } else { - // update all deltas - delta_cc_last_->setToConstant(delta_cc_last_db_); - delta_cd_last_->setToConstant(delta_cd_last_db_); - delta_wx_last_->setToConstant(delta_wx_last_db_); - delta_wd_last_->setToConstant(delta_wd_last_db_); + delta_cc_curr_db_ = kappa_c_plus_ * delta_cc_curr_db_; } } + delta_cd_curr_db_ = delta_cc_curr_db_; + set_delta_curr_vec(DualUpdate); + + if(delta_cc_curr_db_ > delta_w_max_bar_) { + // dual perturbation becoming too large + delta_cc_last_db_ = delta_cd_last_db_ = 0.; + set_delta_last_vec(DualUpdate); + return false; + } + + return true; +} +bool hiopPDPerturbationDualFirstScalar::compute_primal_perturb_impl() +{ + assert(delta_wx_curr_db_ == delta_wd_curr_db_ && "these should be equal"); + assert(delta_wx_last_db_ == delta_wd_last_db_ && "these should be equal"); + bool bval = true; - /* - * class hiopPDPerturbationDualFirstRand - */ - void hiopPDPerturbationDualFirstRand::set_delta_last_vec(DeltasUpdateType taskid) - { - if(DualUpdate == taskid) { - // only update dual deltas - delta_cc_last_->set_to_random_uniform(min_uniform_ratio_*delta_cc_last_db_, max_uniform_ratio_*delta_cc_last_db_); - delta_cd_last_->set_to_random_uniform(min_uniform_ratio_*delta_cd_last_db_, max_uniform_ratio_*delta_cd_last_db_); - } else if(PrimalUpdate == taskid) { - // only update primal deltas - delta_wx_last_->set_to_random_uniform(min_uniform_ratio_*delta_wx_last_db_, max_uniform_ratio_*delta_wx_last_db_); - delta_wd_last_->set_to_random_uniform(min_uniform_ratio_*delta_wd_last_db_, max_uniform_ratio_*delta_wd_last_db_); + if(delta_wx_curr_db_ == 0.) { + if(delta_wx_last_db_ == 0.) { + delta_wx_curr_db_ = delta_w_0_bar_; } else { - // update all deltas - delta_wx_last_->set_to_random_uniform(min_uniform_ratio_*delta_wx_last_db_, max_uniform_ratio_*delta_wx_last_db_); - delta_wd_last_->set_to_random_uniform(min_uniform_ratio_*delta_wd_last_db_, max_uniform_ratio_*delta_wd_last_db_); - delta_cc_last_->set_to_random_uniform(min_uniform_ratio_*delta_cc_last_db_, max_uniform_ratio_*delta_cc_last_db_); - delta_cd_last_->set_to_random_uniform(min_uniform_ratio_*delta_cd_last_db_, max_uniform_ratio_*delta_cd_last_db_); + delta_wx_curr_db_ = std::fmax(delta_w_min_bar_, delta_wx_last_db_ * kappa_w_minus_); } - } - - void hiopPDPerturbationDualFirstRand::set_delta_curr_vec(DeltasUpdateType taskid) - { - deltas_curr_update_ = taskid; - if(DualUpdate == taskid) { - // only update dual deltas - delta_cc_curr_->set_to_random_uniform(min_uniform_ratio_*delta_cc_curr_db_, max_uniform_ratio_*delta_cc_curr_db_); - delta_cd_curr_->set_to_random_uniform(min_uniform_ratio_*delta_cd_curr_db_, max_uniform_ratio_*delta_cd_curr_db_); - } else if(PrimalUpdate == taskid) { - // only update primal deltas - delta_wx_curr_->set_to_random_uniform(min_uniform_ratio_*delta_wx_curr_db_, max_uniform_ratio_*delta_wx_curr_db_); - delta_wd_curr_->set_to_random_uniform(min_uniform_ratio_*delta_wd_curr_db_, max_uniform_ratio_*delta_wd_curr_db_); + } else { // delta_wx_curr_ != 0. + if(delta_wx_last_db_ == 0. || 1e5 * delta_wx_last_db_ < delta_wx_curr_db_) { + delta_wx_curr_db_ = kappa_w_plus_bar_ * delta_wx_curr_db_; } else { - // update all deltas - delta_wx_curr_->set_to_random_uniform(min_uniform_ratio_*delta_wx_curr_db_, max_uniform_ratio_*delta_wx_curr_db_); - delta_wd_curr_->set_to_random_uniform(min_uniform_ratio_*delta_wd_curr_db_, max_uniform_ratio_*delta_wd_curr_db_); - delta_cc_curr_->set_to_random_uniform(min_uniform_ratio_*delta_cc_curr_db_, max_uniform_ratio_*delta_cc_curr_db_); - delta_cd_curr_->set_to_random_uniform(min_uniform_ratio_*delta_cd_curr_db_, max_uniform_ratio_*delta_cd_curr_db_); + delta_wx_curr_db_ = kappa_w_plus_ * delta_wx_curr_db_; } } + delta_wd_curr_db_ = delta_wx_curr_db_; + set_delta_curr_vec(PrimalUpdate); + + if(delta_wx_curr_db_ > delta_w_max_bar_) { + // Hessian perturbation becoming too large + delta_wx_last_db_ = delta_wd_last_db_ = 0.; + set_delta_last_vec(PrimalUpdate); + bval = false; + } + return bval; +} +bool hiopPDPerturbationDualFirstScalar::check_consistency() +{ + return (delta_wx_curr_db_ == delta_wd_curr_db_) && (delta_cc_curr_db_ == delta_cd_curr_db_); +} +void hiopPDPerturbationDualFirstScalar::set_delta_curr_vec(DeltasUpdateType taskid) +{ + deltas_curr_update_ = taskid; + if(DualUpdate == taskid) { + // only update dual deltas + delta_cc_curr_->setToConstant(delta_cc_curr_db_); + delta_cd_curr_->setToConstant(delta_cd_curr_db_); + } else if(PrimalUpdate == taskid) { + // only update primal deltas + delta_wx_curr_->setToConstant(delta_wx_curr_db_); + delta_wd_curr_->setToConstant(delta_wd_curr_db_); + } else { + // update all deltas + delta_cc_curr_->setToConstant(delta_cc_curr_db_); + delta_cd_curr_->setToConstant(delta_cd_curr_db_); + delta_wx_curr_->setToConstant(delta_wx_curr_db_); + delta_wd_curr_->setToConstant(delta_wd_curr_db_); + } +} + +void hiopPDPerturbationDualFirstScalar::set_delta_last_vec(DeltasUpdateType taskid) +{ + if(DualUpdate == taskid) { + // only update dual deltas + delta_cc_last_->setToConstant(delta_cc_last_db_); + delta_cd_last_->setToConstant(delta_cd_last_db_); + } else if(PrimalUpdate == taskid) { + // only update primal deltas + delta_wx_last_->setToConstant(delta_wx_last_db_); + delta_wd_last_->setToConstant(delta_wd_last_db_); + } else { + // update all deltas + delta_cc_last_->setToConstant(delta_cc_last_db_); + delta_cd_last_->setToConstant(delta_cd_last_db_); + delta_wx_last_->setToConstant(delta_wx_last_db_); + delta_wd_last_->setToConstant(delta_wd_last_db_); + } +} + +/* + * class hiopPDPerturbationDualFirstRand + */ +void hiopPDPerturbationDualFirstRand::set_delta_last_vec(DeltasUpdateType taskid) +{ + if(DualUpdate == taskid) { + // only update dual deltas + delta_cc_last_->set_to_random_uniform(min_uniform_ratio_ * delta_cc_last_db_, max_uniform_ratio_ * delta_cc_last_db_); + delta_cd_last_->set_to_random_uniform(min_uniform_ratio_ * delta_cd_last_db_, max_uniform_ratio_ * delta_cd_last_db_); + } else if(PrimalUpdate == taskid) { + // only update primal deltas + delta_wx_last_->set_to_random_uniform(min_uniform_ratio_ * delta_wx_last_db_, max_uniform_ratio_ * delta_wx_last_db_); + delta_wd_last_->set_to_random_uniform(min_uniform_ratio_ * delta_wd_last_db_, max_uniform_ratio_ * delta_wd_last_db_); + } else { + // update all deltas + delta_wx_last_->set_to_random_uniform(min_uniform_ratio_ * delta_wx_last_db_, max_uniform_ratio_ * delta_wx_last_db_); + delta_wd_last_->set_to_random_uniform(min_uniform_ratio_ * delta_wd_last_db_, max_uniform_ratio_ * delta_wd_last_db_); + delta_cc_last_->set_to_random_uniform(min_uniform_ratio_ * delta_cc_last_db_, max_uniform_ratio_ * delta_cc_last_db_); + delta_cd_last_->set_to_random_uniform(min_uniform_ratio_ * delta_cd_last_db_, max_uniform_ratio_ * delta_cd_last_db_); + } +} + +void hiopPDPerturbationDualFirstRand::set_delta_curr_vec(DeltasUpdateType taskid) +{ + deltas_curr_update_ = taskid; + if(DualUpdate == taskid) { + // only update dual deltas + delta_cc_curr_->set_to_random_uniform(min_uniform_ratio_ * delta_cc_curr_db_, max_uniform_ratio_ * delta_cc_curr_db_); + delta_cd_curr_->set_to_random_uniform(min_uniform_ratio_ * delta_cd_curr_db_, max_uniform_ratio_ * delta_cd_curr_db_); + } else if(PrimalUpdate == taskid) { + // only update primal deltas + delta_wx_curr_->set_to_random_uniform(min_uniform_ratio_ * delta_wx_curr_db_, max_uniform_ratio_ * delta_wx_curr_db_); + delta_wd_curr_->set_to_random_uniform(min_uniform_ratio_ * delta_wd_curr_db_, max_uniform_ratio_ * delta_wd_curr_db_); + } else { + // update all deltas + delta_wx_curr_->set_to_random_uniform(min_uniform_ratio_ * delta_wx_curr_db_, max_uniform_ratio_ * delta_wx_curr_db_); + delta_wd_curr_->set_to_random_uniform(min_uniform_ratio_ * delta_wd_curr_db_, max_uniform_ratio_ * delta_wd_curr_db_); + delta_cc_curr_->set_to_random_uniform(min_uniform_ratio_ * delta_cc_curr_db_, max_uniform_ratio_ * delta_cc_curr_db_); + delta_cd_curr_->set_to_random_uniform(min_uniform_ratio_ * delta_cd_curr_db_, max_uniform_ratio_ * delta_cd_curr_db_); + } } +} // namespace hiop diff --git a/src/Optimization/hiopPDPerturbation.hpp b/src/Optimization/hiopPDPerturbation.hpp index d9a108a1b..c863b25ee 100644 --- a/src/Optimization/hiopPDPerturbation.hpp +++ b/src/Optimization/hiopPDPerturbation.hpp @@ -10,57 +10,55 @@ namespace hiop class hiopPDPerturbation { public: - enum DeltasUpdateType { - None = -2, - Initialized = -1, - DualUpdate = 0, - PrimalUpdate = 1, - PDUpdate = 2 + None = -2, + Initialized = -1, + DualUpdate = 0, + PrimalUpdate = 1, + PDUpdate = 2 }; - /** Default constructor + /** Default constructor * Provides complete initialization, but uses algorithmic parameters from the Ipopt * implementation paper. \ref initialize(hiopNlpFormulation*) should be used to initialize * this class based on HiOp option file or HiOp user-supplied runtime options. */ hiopPDPerturbation() - : delta_wx_curr_(nullptr), - delta_wd_curr_(nullptr), - delta_cc_curr_(nullptr), - delta_cd_curr_(nullptr), - delta_wx_last_(nullptr), - delta_wd_last_(nullptr), - delta_cc_last_(nullptr), - delta_cd_last_(nullptr), - delta_wx_curr_db_{0.}, - delta_wd_curr_db_{0.}, - delta_cc_curr_db_{0.}, - delta_cd_curr_db_{0.}, - delta_wx_last_db_{0.}, - delta_wd_last_db_{0.}, - delta_cc_last_db_{0.}, - delta_cd_last_db_{0.}, - delta_w_min_bar_(1e-20), - delta_w_max_bar_(1e+40), - delta_w_0_bar_(1e-4), - kappa_w_minus_(1./3), - kappa_w_plus_bar_(100.), - kappa_w_plus_(8.), - delta_c_bar_(1e-8), - kappa_c_(0.25), - min_uniform_ratio_{0.9}, - max_uniform_ratio_{1.0}, - hess_degenerate_(dtNotEstablished), - jac_degenerate_(dtNotEstablished), - num_degen_iters_(0), - num_degen_max_iters_(3), - deltas_test_type_(dttNoTest), - mu_(1e-8), - nlp_(nullptr) - { - } + : delta_wx_curr_(nullptr), + delta_wd_curr_(nullptr), + delta_cc_curr_(nullptr), + delta_cd_curr_(nullptr), + delta_wx_last_(nullptr), + delta_wd_last_(nullptr), + delta_cc_last_(nullptr), + delta_cd_last_(nullptr), + delta_wx_curr_db_{0.}, + delta_wd_curr_db_{0.}, + delta_cc_curr_db_{0.}, + delta_cd_curr_db_{0.}, + delta_wx_last_db_{0.}, + delta_wd_last_db_{0.}, + delta_cc_last_db_{0.}, + delta_cd_last_db_{0.}, + delta_w_min_bar_(1e-20), + delta_w_max_bar_(1e+40), + delta_w_0_bar_(1e-4), + kappa_w_minus_(1. / 3), + kappa_w_plus_bar_(100.), + kappa_w_plus_(8.), + delta_c_bar_(1e-8), + kappa_c_(0.25), + min_uniform_ratio_{0.9}, + max_uniform_ratio_{1.0}, + hess_degenerate_(dtNotEstablished), + jac_degenerate_(dtNotEstablished), + num_degen_iters_(0), + num_degen_max_iters_(3), + deltas_test_type_(dttNoTest), + mu_(1e-8), + nlp_(nullptr) + {} virtual ~hiopPDPerturbation() { @@ -81,24 +79,21 @@ class hiopPDPerturbation virtual bool initialize(hiopNlpFormulation* nlp); /** Set log-barrier mu. */ - virtual inline void set_mu(const double& mu) - { - mu_ = mu; - } + virtual inline void set_mu(const double& mu) { mu_ = mu; } - /** Called when a new linear system is attempted to be factorized + /** Called when a new linear system is attempted to be factorized */ virtual bool compute_initial_deltas() = 0; /** Method for correcting inertia */ virtual bool compute_perturb_wrong_inertia() = 0; - /** Method for correcting singular Jacobian + /** Method for correcting singular Jacobian * (follows Ipopt closely since the paper seems to be outdated) */ virtual bool compute_perturb_singularity() = 0; - inline bool copy_from_curr_perturbations(hiopVector& delta_wx, + inline bool copy_from_curr_perturbations(hiopVector& delta_wx, hiopVector& delta_wd, hiopVector& delta_cc, hiopVector& delta_cd) @@ -110,12 +105,12 @@ class hiopPDPerturbation return true; } - inline hiopVector* get_curr_delta_wx() const {return delta_wx_curr_;} - inline hiopVector* get_curr_delta_wd() const {return delta_wd_curr_;} - inline hiopVector* get_curr_delta_cc() const {return delta_cc_curr_;} - inline hiopVector* get_curr_delta_cd() const {return delta_cd_curr_;} + inline hiopVector* get_curr_delta_wx() const { return delta_wx_curr_; } + inline hiopVector* get_curr_delta_wd() const { return delta_wd_curr_; } + inline hiopVector* get_curr_delta_cc() const { return delta_cc_curr_; } + inline hiopVector* get_curr_delta_cd() const { return delta_cd_curr_; } - inline DeltasUpdateType get_curr_delta_type() const {return deltas_curr_update_;} + inline DeltasUpdateType get_curr_delta_type() const { return deltas_curr_update_; } virtual bool check_consistency() = 0; @@ -140,7 +135,7 @@ class hiopPDPerturbation double delta_wd_last_db_; double delta_cc_last_db_; double delta_cd_last_db_; - + /** Algorithmic parameters */ /** Smallest possible perturbation for Hessian (for primal 'x' and 's' variables). */ @@ -155,17 +150,17 @@ class hiopPDPerturbation double kappa_w_plus_bar_; /** Increase factor for delta_w for later perturbations. */ double kappa_w_plus_; - + /** Factor for regularization for potentially rank-deficient Jacobian. */ double delta_c_bar_; /** Exponent of mu when computing regularization for Jacobian. */ double kappa_c_; - + double min_uniform_ratio_; double max_uniform_ratio_; - + /** Degeneracy is handled as in Ipopt*/ - + /** Type for degeneracy flags */ enum DegeneracyType { @@ -183,7 +178,7 @@ class hiopPDPerturbation int num_degen_iters_; /* Max number of iters after which to conclude matrix is degenerate. */ const int num_degen_max_iters_; - + /** Status of current trial configuration */ enum DeltasTestType { @@ -197,14 +192,14 @@ class hiopPDPerturbation /** Current status */ DeltasTestType deltas_test_type_; DeltasUpdateType deltas_curr_update_; - + /** Log barrier mu in the outer loop. */ double mu_; hiopNlpFormulation* nlp_; -protected: //methods - /** Decides degeneracy @hess_degenerate_ and @jac_degenerate_ based on @deltas_test_type_ +protected: // methods + /** Decides degeneracy @hess_degenerate_ and @jac_degenerate_ based on @deltas_test_type_ * when the @num_degen_iters_ > @num_degen_max_iters_ */ virtual void update_degeneracy_type(); @@ -216,31 +211,33 @@ class hiopPDPerturbation class hiopPDPerturbationNull : public hiopPDPerturbation { public: - /** Default constructor + /** Default constructor * Provides complete initialization, but uses algorithmic parameters from the Ipopt * implementation paper. \ref initialize(hiopNlpFormulation*) should be used to initialize * this class based on HiOp option file or HiOp user-supplied runtime options. */ - hiopPDPerturbationNull() : hiopPDPerturbation() {} + hiopPDPerturbationNull() + : hiopPDPerturbation() + {} virtual ~hiopPDPerturbationNull() {} - /** Called when a new linear system is attempted to be factorized + /** Called when a new linear system is attempted to be factorized */ - virtual bool compute_initial_deltas() {return true;} + virtual bool compute_initial_deltas() { return true; } /** Method for correcting inertia */ - virtual bool compute_perturb_wrong_inertia() {return true;} + virtual bool compute_perturb_wrong_inertia() { return true; } - /** Method for correcting singular Jacobian + /** Method for correcting singular Jacobian * (follows Ipopt closely since the paper seems to be outdated) */ - virtual bool compute_perturb_singularity() {return true;} - - virtual bool check_consistency() {return true;} + virtual bool compute_perturb_singularity() { return true; } + + virtual bool check_consistency() { return true; } -protected: //methods +protected: // methods virtual void set_delta_curr_vec(DeltasUpdateType taskid) {} - virtual void set_delta_last_vec(DeltasUpdateType taskid) {} + virtual void set_delta_last_vec(DeltasUpdateType taskid) {} }; /** @@ -250,42 +247,41 @@ class hiopPDPerturbationNull : public hiopPDPerturbation class hiopPDPerturbationPrimalFirstScalar : public hiopPDPerturbation { public: - /** Default constructor + /** Default constructor * Provides complete initialization, but uses algorithmic parameters from the Ipopt * implementation paper. \ref initialize(hiopNlpFormulation*) should be used to initialize * this class based on HiOp option file or HiOp user-supplied runtime options. */ - hiopPDPerturbationPrimalFirstScalar() : hiopPDPerturbation() {} + hiopPDPerturbationPrimalFirstScalar() + : hiopPDPerturbation() + {} virtual ~hiopPDPerturbationPrimalFirstScalar() {} - /** Called when a new linear system is attempted to be factorized + /** Called when a new linear system is attempted to be factorized */ virtual bool compute_initial_deltas(); /** Method for correcting inertia */ virtual bool compute_perturb_wrong_inertia(); - /** Method for correcting singular Jacobian + /** Method for correcting singular Jacobian * (follows Ipopt closely since the paper seems to be outdated) */ virtual bool compute_perturb_singularity(); - - virtual bool check_consistency(); - -protected: + virtual bool check_consistency(); -protected: // methods - virtual void set_delta_curr_vec(DeltasUpdateType taskid); +protected: +protected: // methods + virtual void set_delta_curr_vec(DeltasUpdateType taskid); virtual void set_delta_last_vec(DeltasUpdateType taskid); /** Internal method implementing the computation of delta_w's to correct wrong inertia - * + * */ virtual bool guts_of_compute_perturb_wrong_inertia(double& delta_wx, double& delta_wd); - virtual double compute_delta_c(const double& mu) const; - + virtual double compute_delta_c(const double& mu) const; }; /** @@ -296,13 +292,15 @@ class hiopPDPerturbationPrimalFirstScalar : public hiopPDPerturbation class hiopPDPerturbationPrimalFirstRand : public hiopPDPerturbationPrimalFirstScalar { public: - hiopPDPerturbationPrimalFirstRand() : hiopPDPerturbationPrimalFirstScalar() {} + hiopPDPerturbationPrimalFirstRand() + : hiopPDPerturbationPrimalFirstScalar() + {} virtual ~hiopPDPerturbationPrimalFirstRand() {} -protected: // methods - virtual void set_delta_curr_vec(DeltasUpdateType taskid); - virtual void set_delta_last_vec(DeltasUpdateType taskid); +protected: // methods + virtual void set_delta_curr_vec(DeltasUpdateType taskid); + virtual void set_delta_last_vec(DeltasUpdateType taskid); }; /** @@ -316,38 +314,37 @@ class hiopPDPerturbationDualFirstScalar : public hiopPDPerturbation virtual ~hiopPDPerturbationDualFirstScalar(); - /** Called when a new linear system is attempted to be factorized + /** Called when a new linear system is attempted to be factorized */ virtual bool compute_initial_deltas(); /** Method for correcting inertia */ virtual bool compute_perturb_wrong_inertia(); - - /** Method for correcting singular Jacobian + + /** Method for correcting singular Jacobian * (follows Ipopt closely since the paper seems to be outdated) - */ + */ virtual bool compute_perturb_singularity(); virtual bool check_consistency(); - -protected: //variables + +protected: // variables double delta_c_min_bar_; double kappa_c_plus_; -protected: // methods - virtual void set_delta_curr_vec(DeltasUpdateType taskid); +protected: // methods + virtual void set_delta_curr_vec(DeltasUpdateType taskid); virtual void set_delta_last_vec(DeltasUpdateType taskid); - /** + /** * Internal method implementing the computation of delta_w's to correct wrong inertia */ virtual bool compute_primal_perturb_impl(); - - /** + + /** * Internal method implementing the computation of delta_c's to correct wrong inertia */ virtual bool compute_dual_perturb_impl(const double& mu); - }; /** @@ -358,14 +355,16 @@ class hiopPDPerturbationDualFirstScalar : public hiopPDPerturbation class hiopPDPerturbationDualFirstRand : public hiopPDPerturbationDualFirstScalar { public: - hiopPDPerturbationDualFirstRand() : hiopPDPerturbationDualFirstScalar() {} + hiopPDPerturbationDualFirstRand() + : hiopPDPerturbationDualFirstScalar() + {} virtual ~hiopPDPerturbationDualFirstRand() {} -protected: // methods - virtual void set_delta_curr_vec(DeltasUpdateType taskid); - virtual void set_delta_last_vec(DeltasUpdateType taskid); +protected: // methods + virtual void set_delta_curr_vec(DeltasUpdateType taskid); + virtual void set_delta_last_vec(DeltasUpdateType taskid); }; -} //end of namespace +} // namespace hiop #endif diff --git a/src/Optimization/hiopResidual.cpp b/src/Optimization/hiopResidual.cpp index f5a56c048..3f44bfe83 100644 --- a/src/Optimization/hiopResidual.cpp +++ b/src/Optimization/hiopResidual.cpp @@ -3,47 +3,47 @@ // Written by Cosmin G. Petra, petra1@llnl.gov. // LLNL-CODE-742473. All rights reserved. // -// This file is part of HiOp. For details, see https://github.com/LLNL/hiop. HiOp -// is released under the BSD 3-clause license (https://opensource.org/licenses/BSD-3-Clause). +// This file is part of HiOp. For details, see https://github.com/LLNL/hiop. HiOp +// is released under the BSD 3-clause license (https://opensource.org/licenses/BSD-3-Clause). // Please also read “Additional BSD Notice” below. // -// Redistribution and use in source and binary forms, with or without modification, +// Redistribution and use in source and binary forms, with or without modification, // are permitted provided that the following conditions are met: -// i. Redistributions of source code must retain the above copyright notice, this list +// i. Redistributions of source code must retain the above copyright notice, this list // of conditions and the disclaimer below. -// ii. Redistributions in binary form must reproduce the above copyright notice, -// this list of conditions and the disclaimer (as noted below) in the documentation and/or +// ii. Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the disclaimer (as noted below) in the documentation and/or // other materials provided with the distribution. -// iii. Neither the name of the LLNS/LLNL nor the names of its contributors may be used to -// endorse or promote products derived from this software without specific prior written +// iii. Neither the name of the LLNS/LLNL nor the names of its contributors may be used to +// endorse or promote products derived from this software without specific prior written // permission. // -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES -// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT -// SHALL LAWRENCE LIVERMORE NATIONAL SECURITY, LLC, THE U.S. DEPARTMENT OF ENERGY OR -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS -// OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED -// AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT +// SHALL LAWRENCE LIVERMORE NATIONAL SECURITY, LLC, THE U.S. DEPARTMENT OF ENERGY OR +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +// OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED +// AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, // EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Additional BSD Notice -// 1. This notice is required to be provided under our contract with the U.S. Department -// of Energy (DOE). This work was produced at Lawrence Livermore National Laboratory under +// 1. This notice is required to be provided under our contract with the U.S. Department +// of Energy (DOE). This work was produced at Lawrence Livermore National Laboratory under // Contract No. DE-AC52-07NA27344 with the DOE. -// 2. Neither the United States Government nor Lawrence Livermore National Security, LLC -// nor any of their employees, makes any warranty, express or implied, or assumes any -// liability or responsibility for the accuracy, completeness, or usefulness of any +// 2. Neither the United States Government nor Lawrence Livermore National Security, LLC +// nor any of their employees, makes any warranty, express or implied, or assumes any +// liability or responsibility for the accuracy, completeness, or usefulness of any // information, apparatus, product, or process disclosed, or represents that its use would // not infringe privately-owned rights. -// 3. Also, reference herein to any specific commercial products, process, or services by -// trade name, trademark, manufacturer or otherwise does not necessarily constitute or -// imply its endorsement, recommendation, or favoring by the United States Government or -// Lawrence Livermore National Security, LLC. The views and opinions of authors expressed -// herein do not necessarily state or reflect those of the United States Government or -// Lawrence Livermore National Security, LLC, and shall not be used for advertising or +// 3. Also, reference herein to any specific commercial products, process, or services by +// trade name, trademark, manufacturer or otherwise does not necessarily constitute or +// imply its endorsement, recommendation, or favoring by the United States Government or +// Lawrence Livermore National Security, LLC. The views and opinions of authors expressed +// herein do not necessarily state or reflect those of the United States Government or +// Lawrence Livermore National Security, LLC, and shall not be used for advertising or // product endorsement purposes. #include "hiopResidual.hpp" @@ -85,65 +85,65 @@ hiopResidual::hiopResidual(hiopNlpFormulation* nlp_) hiopResidual::~hiopResidual() { - if(rx) delete rx; - if(rd) delete rd; - if(rxl) delete rxl; - if(rxu) delete rxu; - if(rdl) delete rdl; - if(rdu) delete rdu; - if(ryc) delete ryc; - if(ryd) delete ryd; + if(rx) delete rx; + if(rd) delete rd; + if(rxl) delete rxl; + if(rxu) delete rxu; + if(rdl) delete rdl; + if(rdu) delete rdu; + if(ryc) delete ryc; + if(ryd) delete ryd; if(rszl) delete rszl; if(rszu) delete rszu; if(rsvl) delete rsvl; if(rsvu) delete rsvu; } -double hiopResidual::compute_nlp_infeasib_onenorm (const hiopIterate& it, - const hiopVector& c, - const hiopVector& d) +double hiopResidual::compute_nlp_infeasib_onenorm(const hiopIterate& it, const hiopVector& c, const hiopVector& d) { nlp->runStats.tmSolverInternal.start(); double nrmOne_infeasib = 0.; - size_type nx_loc=rx->get_local_size(); - //ryc + size_type nx_loc = rx->get_local_size(); + // ryc ryc->copyFrom(nlp->get_crhs()); - ryc->axpy(-1.0,c); + ryc->axpy(-1.0, c); nrmOne_infeasib += ryc->onenorm(); - //ryd + // ryd ryd->copyFrom(*it.d); ryd->axpy(-1.0, d); nrmOne_infeasib += ryd->onenorm(); - //rxl=x-sxl-xl - if(nlp->n_low_local()>0) { + // rxl=x-sxl-xl + if(nlp->n_low_local() > 0) { rxl->copyFrom(*it.x); - rxl->axpy(-1.0,*it.sxl); - rxl->axpy(-1.0,nlp->get_xl()); - //zero out entries in the resid that don't correspond to a finite low bound - if(nlp->n_low_local()axpy(-1.0, *it.sxl); + rxl->axpy(-1.0, nlp->get_xl()); + // zero out entries in the resid that don't correspond to a finite low bound + if(nlp->n_low_local() < nx_loc) { rxl->selectPattern(nlp->get_ixl()); } } - //rxu=-x-sxu+xu - if(nlp->n_upp_local()>0) { - rxu->copyFrom(nlp->get_xu()); rxu->axpy(-1.0,*it.x); rxu->axpy(-1.0,*it.sxu); - if(nlp->n_upp_local()n_upp_local() > 0) { + rxu->copyFrom(nlp->get_xu()); + rxu->axpy(-1.0, *it.x); + rxu->axpy(-1.0, *it.sxu); + if(nlp->n_upp_local() < nx_loc) { rxu->selectPattern(nlp->get_ixu()); } } - //rdl=d-sdl-dl - if(nlp->m_ineq_low()>0) { + // rdl=d-sdl-dl + if(nlp->m_ineq_low() > 0) { rdl->copyFrom(*it.d); - rdl->axpy(-1.0,*it.sdl); - rdl->axpy(-1.0,nlp->get_dl()); + rdl->axpy(-1.0, *it.sdl); + rdl->axpy(-1.0, nlp->get_dl()); rdl->selectPattern(nlp->get_idl()); } - //rdu=-d-sdu+du - if(nlp->m_ineq_upp()>0) { + // rdu=-d-sdu+du + if(nlp->m_ineq_upp() > 0) { rdu->copyFrom(nlp->get_du()); - rdu->axpy(-1.0,*it.sdu); - rdu->axpy(-1.0,*it.d); + rdu->axpy(-1.0, *it.sdu); + rdu->axpy(-1.0, *it.d); rdu->selectPattern(nlp->get_idu()); } @@ -151,10 +151,14 @@ double hiopResidual::compute_nlp_infeasib_onenorm (const hiopIterate& it, return nrmOne_infeasib; } -int hiopResidual::update(const hiopIterate& it, - const double& f, const hiopVector& c, const hiopVector& d, - const hiopVector& grad, const hiopMatrix& jac_c, const hiopMatrix& jac_d, - const hiopLogBarProblem& logprob) +int hiopResidual::update(const hiopIterate& it, + const double& f, + const hiopVector& c, + const hiopVector& d, + const hiopVector& grad, + const hiopMatrix& jac_c, + const hiopMatrix& jac_d, + const hiopLogBarProblem& logprob) { nlp->runStats.tmSolverInternal.start(); nrmInf_nlp_optim = nrmInf_nlp_feasib = nrmInf_nlp_complem = 0; @@ -163,8 +167,8 @@ int hiopResidual::update(const hiopIterate& it, nrmOne_nlp_optim = nrmOne_bar_optim = 0.; nrmInf_cons_violation = 0.; - size_type nx_loc=rx->get_local_size(); - const double& mu=logprob.mu; + size_type nx_loc = rx->get_local_size(); + const double& mu = logprob.mu; double buf; #ifdef HIOP_DEEPCHECKS assert(it.zl->matchesPattern(nlp->get_ixl())); @@ -177,221 +181,225 @@ int hiopResidual::update(const hiopIterate& it, jac_c.transTimesVec(1.0, *rx, 1.0, *it.yc); jac_d.transTimesVec(1.0, *rx, 1.0, *it.yd); rx->axpy(-1.0, *it.zl); - rx->axpy( 1.0, *it.zu); + rx->axpy(1.0, *it.zu); buf = rx->infnorm_local(); nrmInf_nlp_optim = fmax(nrmInf_nlp_optim, buf); nrmOne_nlp_optim += rx->onenorm(); - nlp->log->printf(hovScalars,"NLP resid [update]: inf norm rx=%22.17e\n", buf); + nlp->log->printf(hovScalars, "NLP resid [update]: inf norm rx=%22.17e\n", buf); logprob.addNonLogBarTermsToGrad_x(1.0, *rx); rx->negate(); nrmInf_bar_optim = fmax(nrmInf_bar_optim, rx->infnorm_local()); nrmOne_bar_optim += rx->onenorm(); //~ done with rx - // rd + // rd rd->copyFrom(*it.yd); - rd->axpy( 1.0, *it.vl); + rd->axpy(1.0, *it.vl); rd->axpy(-1.0, *it.vu); buf = rd->infnorm_local(); nrmInf_nlp_optim = fmax(nrmInf_nlp_optim, buf); nrmOne_nlp_optim += rd->onenorm(); - nlp->log->printf(hovScalars,"NLP resid [update]: inf norm rd=%22.17e\n", buf); - logprob.addNonLogBarTermsToGrad_d(-1.0,*rd); + nlp->log->printf(hovScalars, "NLP resid [update]: inf norm rd=%22.17e\n", buf); + logprob.addNonLogBarTermsToGrad_d(-1.0, *rd); nrmInf_bar_optim = fmax(nrmInf_bar_optim, rd->infnorm_local()); nrmOne_bar_optim += rd->onenorm(); - //ryc + // ryc ryc->copyFrom(nlp->get_crhs()); - ryc->axpy(-1.0,c); + ryc->axpy(-1.0, c); buf = ryc->infnorm_local(); nrmInf_nlp_feasib = fmax(nrmInf_nlp_feasib, buf); nrmOne_nlp_feasib += ryc->onenorm(); nrmInf_cons_violation = fmax(nrmInf_cons_violation, buf); - nlp->log->printf(hovScalars,"NLP resid [update]: inf norm ryc=%22.17e\n", buf); - + nlp->log->printf(hovScalars, "NLP resid [update]: inf norm ryc=%22.17e\n", buf); + // compute constraint violation. Use ryd as a temporary vector double arg1 = 0.0; ryd->copyFrom(d); ryd->axpy(-1.0, nlp->get_dl()); arg1 = ryd->min_w_pattern(nlp->get_idl()); - buf = (arg1<0.)? -arg1 : 0.0; + buf = (arg1 < 0.) ? -arg1 : 0.0; nrmInf_cons_violation = fmax(nrmInf_cons_violation, buf); - + ryd->copyFrom(nlp->get_du()); ryd->axpy(-1.0, d); arg1 = ryd->min_w_pattern(nlp->get_idu()); - buf = (arg1<0.)? -arg1 : 0.0; + buf = (arg1 < 0.) ? -arg1 : 0.0; nrmInf_cons_violation = fmax(nrmInf_cons_violation, buf); - //ryd + // ryd ryd->copyFrom(*it.d); ryd->axpy(-1.0, d); buf = ryd->infnorm_local(); nrmInf_nlp_feasib = fmax(nrmInf_nlp_feasib, buf); nrmOne_nlp_feasib += ryd->onenorm(); - nlp->log->printf(hovScalars,"NLP resid [update]: inf norm ryd=%22.17e\n", buf); - - //rxl=x-sxl-xl - if(nlp->n_low_local()>0) { + nlp->log->printf(hovScalars, "NLP resid [update]: inf norm ryd=%22.17e\n", buf); + + // rxl=x-sxl-xl + if(nlp->n_low_local() > 0) { rxl->copyFrom(*it.x); - rxl->axpy(-1.0,*it.sxl); - rxl->axpy(-1.0,nlp->get_xl()); - //zero out entries in the resid that don't correspond to a finite low bound - if(nlp->n_low_local()axpy(-1.0, *it.sxl); + rxl->axpy(-1.0, nlp->get_xl()); + // zero out entries in the resid that don't correspond to a finite low bound + if(nlp->n_low_local() < nx_loc) { rxl->selectPattern(nlp->get_ixl()); } buf = rxl->infnorm_local(); - //nrmInf_nlp_feasib = fmax(nrmInf_nlp_feasib, buf); - nlp->log->printf(hovScalars,"NLP resid [update]: inf norm rxl=%22.17e\n", buf); + // nrmInf_nlp_feasib = fmax(nrmInf_nlp_feasib, buf); + nlp->log->printf(hovScalars, "NLP resid [update]: inf norm rxl=%22.17e\n", buf); } - //printf(" %10.4e (xl)", nrmInf_nlp_feasib); - //rxu=-x-sxu+xu - if(nlp->n_upp_local()>0) { + // printf(" %10.4e (xl)", nrmInf_nlp_feasib); + // rxu=-x-sxu+xu + if(nlp->n_upp_local() > 0) { rxu->copyFrom(nlp->get_xu()); - rxu->axpy(-1.0,*it.x); - rxu->axpy(-1.0,*it.sxu); - if(nlp->n_upp_local()axpy(-1.0, *it.x); + rxu->axpy(-1.0, *it.sxu); + if(nlp->n_upp_local() < nx_loc) { rxu->selectPattern(nlp->get_ixu()); } buf = rxu->infnorm_local(); - //nrmInf_nlp_feasib = fmax(nrmInf_nlp_feasib, buf); - nlp->log->printf(hovScalars,"NLP resid [update]: inf norm rxu=%22.17e\n", buf); - } - //printf(" %10.4e (xu)", nrmInf_nlp_feasib); - //rdl=d-sdl-dl - if(nlp->m_ineq_low()>0) { - rdl->copyFrom(*it.d); rdl->axpy(-1.0,*it.sdl); rdl->axpy(-1.0,nlp->get_dl()); + // nrmInf_nlp_feasib = fmax(nrmInf_nlp_feasib, buf); + nlp->log->printf(hovScalars, "NLP resid [update]: inf norm rxu=%22.17e\n", buf); + } + // printf(" %10.4e (xu)", nrmInf_nlp_feasib); + // rdl=d-sdl-dl + if(nlp->m_ineq_low() > 0) { + rdl->copyFrom(*it.d); + rdl->axpy(-1.0, *it.sdl); + rdl->axpy(-1.0, nlp->get_dl()); rdl->selectPattern(nlp->get_idl()); buf = rdl->infnorm_local(); - //nrmInf_nlp_feasib = fmax(nrmInf_nlp_feasib, buf); - nlp->log->printf(hovScalars,"NLP resid [update]: inf norm rdl=%22.17e\n", buf); + // nrmInf_nlp_feasib = fmax(nrmInf_nlp_feasib, buf); + nlp->log->printf(hovScalars, "NLP resid [update]: inf norm rdl=%22.17e\n", buf); } - //printf(" %10.4e (dl)", nrmInf_nlp_feasib); - //rdu=-d-sdu+du - if(nlp->m_ineq_upp()>0) { + // printf(" %10.4e (dl)", nrmInf_nlp_feasib); + // rdu=-d-sdu+du + if(nlp->m_ineq_upp() > 0) { rdu->copyFrom(nlp->get_du()); - rdu->axpy(-1.0,*it.sdu); - rdu->axpy(-1.0,*it.d); + rdu->axpy(-1.0, *it.sdu); + rdu->axpy(-1.0, *it.d); rdu->selectPattern(nlp->get_idu()); buf = rdu->infnorm_local(); - //nrmInf_nlp_feasib = fmax(nrmInf_nlp_feasib, buf); - nlp->log->printf(hovScalars,"NLP resid [update]: inf norm rdu=%22.17e\n", buf); + // nrmInf_nlp_feasib = fmax(nrmInf_nlp_feasib, buf); + nlp->log->printf(hovScalars, "NLP resid [update]: inf norm rdu=%22.17e\n", buf); } - //printf(" %10.4e (du)\n", nrmInf_nlp_feasib); - //set the feasibility error for the log barrier problem + // printf(" %10.4e (du)\n", nrmInf_nlp_feasib); + // set the feasibility error for the log barrier problem nrmInf_bar_feasib = nrmInf_nlp_feasib; nrmOne_bar_feasib = nrmOne_nlp_feasib; - //rszl = \mu e - sxl * zl - if(nlp->n_low_local()>0) { + // rszl = \mu e - sxl * zl + if(nlp->n_low_local() > 0) { rszl->setToZero(); rszl->axzpy(-1.0, *it.sxl, *it.zl); - if(nlp->n_low_local()n_low_local() < nx_loc) { rszl->selectPattern(nlp->get_ixl()); } nrmInf_nlp_complem = fmax(nrmInf_nlp_complem, rszl->infnorm_local()); - - rszl->addConstant_w_patternSelect(mu,nlp->get_ixl()); + + rszl->addConstant_w_patternSelect(mu, nlp->get_ixl()); buf = rszl->infnorm_local(); nrmInf_bar_complem = fmax(nrmInf_bar_complem, buf); - nlp->log->printf(hovScalars,"NLP resid [update]: inf norm rszl=%22.17e\n", buf); + nlp->log->printf(hovScalars, "NLP resid [update]: inf norm rszl=%22.17e\n", buf); } - //rszu = \mu e - sxu * zu - if(nlp->n_upp_local()>0) { + // rszu = \mu e - sxu * zu + if(nlp->n_upp_local() > 0) { rszu->setToZero(); rszu->axzpy(-1.0, *it.sxu, *it.zu); - if(nlp->n_upp_local()n_upp_local() < nx_loc) { rszu->selectPattern(nlp->get_ixu()); } buf = rszu->infnorm_local(); nrmInf_nlp_complem = fmax(nrmInf_nlp_complem, buf); - rszu->addConstant_w_patternSelect(mu,nlp->get_ixu()); + rszu->addConstant_w_patternSelect(mu, nlp->get_ixu()); buf = rszu->infnorm_local(); nrmInf_bar_complem = fmax(nrmInf_bar_complem, buf); - nlp->log->printf(hovScalars,"NLP resid [update]: inf norm rszu=%22.17e\n", buf); + nlp->log->printf(hovScalars, "NLP resid [update]: inf norm rszu=%22.17e\n", buf); } - //rsvl = \mu e - sdl * vl - if(nlp->m_ineq_low()>0) { + // rsvl = \mu e - sdl * vl + if(nlp->m_ineq_low() > 0) { rsvl->setToZero(); rsvl->axzpy(-1.0, *it.sdl, *it.vl); - if(nlp->m_ineq_low()m_ineq()) { + if(nlp->m_ineq_low() < nlp->m_ineq()) { rsvl->selectPattern(nlp->get_idl()); } buf = rsvl->infnorm_local(); nrmInf_nlp_complem = fmax(nrmInf_nlp_complem, buf); - //add mu - rsvl->addConstant_w_patternSelect(mu,nlp->get_idl()); + // add mu + rsvl->addConstant_w_patternSelect(mu, nlp->get_idl()); buf = rsvl->infnorm_local(); nrmInf_bar_complem = fmax(nrmInf_bar_complem, buf); - nlp->log->printf(hovScalars,"NLP resid [update]: inf norm rsvl=%22.17e\n", buf); + nlp->log->printf(hovScalars, "NLP resid [update]: inf norm rsvl=%22.17e\n", buf); } - //rsvu = \mu e - sdu * vu - if(nlp->m_ineq_upp()>0) { + // rsvu = \mu e - sdu * vu + if(nlp->m_ineq_upp() > 0) { rsvu->setToZero(); rsvu->axzpy(-1.0, *it.sdu, *it.vu); - if(nlp->m_ineq_upp()m_ineq()) { + if(nlp->m_ineq_upp() < nlp->m_ineq()) { rsvu->selectPattern(nlp->get_idu()); } buf = rsvu->infnorm_local(); nrmInf_nlp_complem = fmax(nrmInf_nlp_complem, buf); - //add mu - rsvu->addConstant_w_patternSelect(mu,nlp->get_idu()); + // add mu + rsvu->addConstant_w_patternSelect(mu, nlp->get_idu()); buf = rsvu->infnorm_local(); nrmInf_bar_complem = fmax(nrmInf_bar_complem, buf); - nlp->log->printf(hovScalars,"NLP resid [update]: inf norm rsvu=%22.17e\n", buf); + nlp->log->printf(hovScalars, "NLP resid [update]: inf norm rsvu=%22.17e\n", buf); } - + #ifdef HIOP_USE_MPI - //here we reduce each of the norm together for a total cost of 1 Allreduce of 3 doubles - //otherwise, if calling infnorm() for each vector, there will be 12 Allreduce's, each of 1 double - double aux[6]={nrmInf_nlp_optim, - nrmInf_nlp_feasib, - nrmInf_nlp_complem, - nrmInf_bar_optim, - nrmInf_bar_feasib, - nrmInf_bar_complem}; + // here we reduce each of the norm together for a total cost of 1 Allreduce of 3 doubles + // otherwise, if calling infnorm() for each vector, there will be 12 Allreduce's, each of 1 double + double aux[6] = + {nrmInf_nlp_optim, nrmInf_nlp_feasib, nrmInf_nlp_complem, nrmInf_bar_optim, nrmInf_bar_feasib, nrmInf_bar_complem}; double aux_g[6]; - int ierr = MPI_Allreduce(aux, aux_g, 6, MPI_DOUBLE, MPI_MAX, nlp->get_comm()); - assert(MPI_SUCCESS==ierr); - nrmInf_nlp_optim=aux_g[0]; - nrmInf_nlp_feasib=aux_g[1]; - nrmInf_nlp_complem=aux_g[2]; - nrmInf_bar_optim=aux_g[3]; - nrmInf_bar_feasib=aux_g[4]; - nrmInf_bar_complem=aux_g[5]; + int ierr = MPI_Allreduce(aux, aux_g, 6, MPI_DOUBLE, MPI_MAX, nlp->get_comm()); + assert(MPI_SUCCESS == ierr); + nrmInf_nlp_optim = aux_g[0]; + nrmInf_nlp_feasib = aux_g[1]; + nrmInf_nlp_complem = aux_g[2]; + nrmInf_bar_optim = aux_g[3]; + nrmInf_bar_feasib = aux_g[4]; + nrmInf_bar_complem = aux_g[5]; #endif nlp->runStats.tmSolverInternal.stop(); return true; } -void hiopResidual::print(FILE* f, const char* msg/*=NULL*/, int max_elems/*=-1*/, int rank/*=-1*/) const +void hiopResidual::print(FILE* f, const char* msg /*=NULL*/, int max_elems /*=-1*/, int rank /*=-1*/) const { - if(NULL==msg) fprintf(f, "hiopResidual print\n"); - else fprintf(f, "%s\n", msg); - - rx->print( f, " rx:", max_elems, rank); - rd->print( f, " rd:", max_elems, rank); - ryc->print( f, " ryc:", max_elems, rank); - ryd->print( f, " ryd:", max_elems, rank); - rszl->print(f, " rszl:", max_elems, rank); - rszu->print(f, " rszu:", max_elems, rank); - rsvl->print(f, " rsvl:", max_elems, rank); - rsvu->print(f, " rsvu:", max_elems, rank); - rxl->print( f, " rxl:", max_elems, rank); - rxu->print( f, " rxu:", max_elems, rank); - rdl->print( f, " rdl:", max_elems, rank); - rdu->print( f, " rdu:", max_elems, rank); - printf(" errors (optim/feasib/complem) nlp : %26.16e %25.16e %25.16e\n", - nrmInf_nlp_optim, nrmInf_nlp_feasib, nrmInf_nlp_complem); - printf(" errors (optim/feasib/complem) barrier: %25.16e %25.16e %25.16e\n", - nrmInf_bar_optim, nrmInf_bar_feasib, nrmInf_bar_complem); + if(NULL == msg) + fprintf(f, "hiopResidual print\n"); + else + fprintf(f, "%s\n", msg); + + rx->print(f, " rx:", max_elems, rank); + rd->print(f, " rd:", max_elems, rank); + ryc->print(f, " ryc:", max_elems, rank); + ryd->print(f, " ryd:", max_elems, rank); + rszl->print(f, " rszl:", max_elems, rank); + rszu->print(f, " rszu:", max_elems, rank); + rsvl->print(f, " rsvl:", max_elems, rank); + rsvu->print(f, " rsvu:", max_elems, rank); + rxl->print(f, " rxl:", max_elems, rank); + rxu->print(f, " rxu:", max_elems, rank); + rdl->print(f, " rdl:", max_elems, rank); + rdu->print(f, " rdu:", max_elems, rank); + printf(" errors (optim/feasib/complem) nlp : %26.16e %25.16e %25.16e\n", + nrmInf_nlp_optim, + nrmInf_nlp_feasib, + nrmInf_nlp_complem); + printf(" errors (optim/feasib/complem) barrier: %25.16e %25.16e %25.16e\n", + nrmInf_bar_optim, + nrmInf_bar_feasib, + nrmInf_bar_complem); } -void hiopResidual::copyFrom(const hiopResidual& resid_src) +void hiopResidual::copyFrom(const hiopResidual& resid_src) { rx->copyFrom(*resid_src.get_rx()); rd->copyFrom(*resid_src.get_rd()); @@ -406,7 +414,7 @@ void hiopResidual::copyFrom(const hiopResidual& resid_src) rszl->copyFrom(*resid_src.get_rszl()); rszu->copyFrom(*resid_src.get_rszu()); rsvl->copyFrom(*resid_src.get_rsvl()); - rsvu->copyFrom(*resid_src.get_rsvu()); + rsvu->copyFrom(*resid_src.get_rsvu()); nrmInf_nlp_optim = resid_src.get_nrmInf_nlp_optim(); nrmInf_bar_optim = resid_src.get_nrmInf_bar_optim(); @@ -418,7 +426,7 @@ void hiopResidual::copyFrom(const hiopResidual& resid_src) nrmOne_bar_feasib = resid_src.get_nrmOne_bar_feasib(); nrmOne_nlp_optim = resid_src.get_nrmOne_nlp_optim(); nrmOne_bar_optim = resid_src.get_nrmOne_bar_optim(); - + nlp = resid_src.nlp; } @@ -437,8 +445,8 @@ void hiopResidual::update_soc(const hiopIterate& it, nrmOne_nlp_feasib = nrmOne_bar_feasib = 0.; nrmOne_nlp_optim = nrmOne_bar_optim = 0.; - size_type nx_loc=rx->get_local_size(); - const double& mu=logprob.mu; + size_type nx_loc = rx->get_local_size(); + const double& mu = logprob.mu; double buf; #ifdef HIOP_DEEPCHECKS assert(it.zl->matchesPattern(nlp->get_ixl())); @@ -451,175 +459,177 @@ void hiopResidual::update_soc(const hiopIterate& it, jac_c.transTimesVec(1.0, *rx, 1.0, *it.yc); jac_d.transTimesVec(1.0, *rx, 1.0, *it.yd); rx->axpy(-1.0, *it.zl); - rx->axpy( 1.0, *it.zu); + rx->axpy(1.0, *it.zu); buf = rx->infnorm_local(); nrmInf_nlp_optim = fmax(nrmInf_nlp_optim, buf); nrmOne_nlp_optim += rx->onenorm(); - nlp->log->printf(hovScalars,"NLP resid [update]: inf norm rx=%22.17e\n", buf); + nlp->log->printf(hovScalars, "NLP resid [update]: inf norm rx=%22.17e\n", buf); logprob.addNonLogBarTermsToGrad_x(1.0, *rx); rx->negate(); nrmInf_bar_optim = fmax(nrmInf_bar_optim, rx->infnorm_local()); nrmOne_bar_optim += rx->onenorm(); - - // rd + + // rd rd->copyFrom(*it.yd); - rd->axpy( 1.0, *it.vl); + rd->axpy(1.0, *it.vl); rd->axpy(-1.0, *it.vu); buf = rd->infnorm_local(); nrmInf_nlp_optim = fmax(nrmInf_nlp_optim, buf); nrmOne_nlp_optim += rd->onenorm(); - nlp->log->printf(hovScalars,"NLP resid [update]: inf norm rd=%22.17e\n", buf); - logprob.addNonLogBarTermsToGrad_d(-1.0,*rd); + nlp->log->printf(hovScalars, "NLP resid [update]: inf norm rd=%22.17e\n", buf); + logprob.addNonLogBarTermsToGrad_d(-1.0, *rd); nrmInf_bar_optim = fmax(nrmInf_bar_optim, rd->infnorm_local()); nrmOne_bar_optim += rd->onenorm(); - - //ryc for soc: \alpha*c + c_trial + + // ryc for soc: \alpha*c + c_trial ryc->copyFrom(c_soc); buf = ryc->infnorm_local(); nrmInf_nlp_feasib = fmax(nrmInf_nlp_feasib, buf); nrmOne_nlp_feasib += ryc->onenorm(); - nlp->log->printf(hovScalars,"NLP resid [update]: inf norm ryc=%22.17e\n", buf); + nlp->log->printf(hovScalars, "NLP resid [update]: inf norm ryc=%22.17e\n", buf); - //ryd for soc: \alpha*(slack-d_soc) + (slack_trial-c_trial) + // ryd for soc: \alpha*(slack-d_soc) + (slack_trial-c_trial) ryd->copyFrom(d_soc); buf = ryd->infnorm_local(); nrmInf_nlp_feasib = fmax(nrmInf_nlp_feasib, buf); nrmOne_nlp_feasib += ryd->onenorm(); - nlp->log->printf(hovScalars,"NLP resid [update]: inf norm ryd=%22.17e\n", buf); + nlp->log->printf(hovScalars, "NLP resid [update]: inf norm ryd=%22.17e\n", buf); - //rxl=x-sxl-xl - if(nlp->n_low_local()>0) { + // rxl=x-sxl-xl + if(nlp->n_low_local() > 0) { rxl->copyFrom(*it.x); - rxl->axpy(-1.0,*it.sxl); - rxl->axpy(-1.0,nlp->get_xl()); - //zero out entries in the resid that don't correspond to a finite low bound - if(nlp->n_low_local()axpy(-1.0, *it.sxl); + rxl->axpy(-1.0, nlp->get_xl()); + // zero out entries in the resid that don't correspond to a finite low bound + if(nlp->n_low_local() < nx_loc) { rxl->selectPattern(nlp->get_ixl()); } buf = rxl->infnorm_local(); - //nrmInf_nlp_feasib = fmax(nrmInf_nlp_feasib, buf); - nlp->log->printf(hovScalars,"NLP resid [update]: inf norm rxl=%22.17e\n", buf); + // nrmInf_nlp_feasib = fmax(nrmInf_nlp_feasib, buf); + nlp->log->printf(hovScalars, "NLP resid [update]: inf norm rxl=%22.17e\n", buf); } - //printf(" %10.4e (xl)", nrmInf_nlp_feasib); - //rxu=-x-sxu+xu - if(nlp->n_upp_local()>0) { + // printf(" %10.4e (xl)", nrmInf_nlp_feasib); + // rxu=-x-sxu+xu + if(nlp->n_upp_local() > 0) { rxu->copyFrom(nlp->get_xu()); - rxu->axpy(-1.0,*it.x); - rxu->axpy(-1.0,*it.sxu); - if(nlp->n_upp_local()axpy(-1.0, *it.x); + rxu->axpy(-1.0, *it.sxu); + if(nlp->n_upp_local() < nx_loc) { rxu->selectPattern(nlp->get_ixu()); } buf = rxu->infnorm_local(); - //nrmInf_nlp_feasib = fmax(nrmInf_nlp_feasib, buf); - nlp->log->printf(hovScalars,"NLP resid [update]: inf norm rxu=%22.17e\n", buf); - } - //printf(" %10.4e (xu)", nrmInf_nlp_feasib); - //rdl=d-sdl-dl - if(nlp->m_ineq_low()>0) { - rdl->copyFrom(*it.d); rdl->axpy(-1.0,*it.sdl); rdl->axpy(-1.0,nlp->get_dl()); + // nrmInf_nlp_feasib = fmax(nrmInf_nlp_feasib, buf); + nlp->log->printf(hovScalars, "NLP resid [update]: inf norm rxu=%22.17e\n", buf); + } + // printf(" %10.4e (xu)", nrmInf_nlp_feasib); + // rdl=d-sdl-dl + if(nlp->m_ineq_low() > 0) { + rdl->copyFrom(*it.d); + rdl->axpy(-1.0, *it.sdl); + rdl->axpy(-1.0, nlp->get_dl()); rdl->selectPattern(nlp->get_idl()); buf = rdl->infnorm_local(); - //nrmInf_nlp_feasib = fmax(nrmInf_nlp_feasib, buf); - nlp->log->printf(hovScalars,"NLP resid [update]: inf norm rdl=%22.17e\n", buf); + // nrmInf_nlp_feasib = fmax(nrmInf_nlp_feasib, buf); + nlp->log->printf(hovScalars, "NLP resid [update]: inf norm rdl=%22.17e\n", buf); } - //printf(" %10.4e (dl)", nrmInf_nlp_feasib); - //rdu=-d-sdu+du - if(nlp->m_ineq_upp()>0) { + // printf(" %10.4e (dl)", nrmInf_nlp_feasib); + // rdu=-d-sdu+du + if(nlp->m_ineq_upp() > 0) { rdu->copyFrom(nlp->get_du()); - rdu->axpy(-1.0,*it.sdu); - rdu->axpy(-1.0,*it.d); + rdu->axpy(-1.0, *it.sdu); + rdu->axpy(-1.0, *it.d); rdu->selectPattern(nlp->get_idu()); buf = rdu->infnorm_local(); -// nrmInf_nlp_feasib = fmax(nrmInf_nlp_feasib, buf); - nlp->log->printf(hovScalars,"NLP resid [update]: inf norm rdl=%22.17e\n", buf); + // nrmInf_nlp_feasib = fmax(nrmInf_nlp_feasib, buf); + nlp->log->printf(hovScalars, "NLP resid [update]: inf norm rdl=%22.17e\n", buf); } - //printf(" %10.4e (du)\n", nrmInf_nlp_feasib); - //set the feasibility error for the log barrier problem + // printf(" %10.4e (du)\n", nrmInf_nlp_feasib); + // set the feasibility error for the log barrier problem nrmInf_bar_feasib = nrmInf_nlp_feasib; nrmOne_bar_feasib = nrmOne_nlp_feasib; - //rszl = \mu e - sxl * zl - if(nlp->n_low_local()>0) { + // rszl = \mu e - sxl * zl + if(nlp->n_low_local() > 0) { rszl->setToZero(); rszl->axzpy(-1.0, *it.sxl, *it.zl); - if(nlp->n_low_local()n_low_local() < nx_loc) { rszl->selectPattern(nlp->get_ixl()); } nrmInf_nlp_complem = fmax(nrmInf_nlp_complem, rszl->infnorm_local()); - - rszl->addConstant_w_patternSelect(mu,nlp->get_ixl()); + + rszl->addConstant_w_patternSelect(mu, nlp->get_ixl()); buf = rszl->infnorm_local(); nrmInf_bar_complem = fmax(nrmInf_bar_complem, buf); - nlp->log->printf(hovScalars,"NLP resid [update]: inf norm rszl=%22.17e\n", buf); + nlp->log->printf(hovScalars, "NLP resid [update]: inf norm rszl=%22.17e\n", buf); } - //rszu = \mu e - sxu * zu - if(nlp->n_upp_local()>0) { + // rszu = \mu e - sxu * zu + if(nlp->n_upp_local() > 0) { rszu->setToZero(); rszu->axzpy(-1.0, *it.sxu, *it.zu); - if(nlp->n_upp_local()n_upp_local() < nx_loc) { rszu->selectPattern(nlp->get_ixu()); } buf = rszu->infnorm_local(); nrmInf_nlp_complem = fmax(nrmInf_nlp_complem, buf); - rszu->addConstant_w_patternSelect(mu,nlp->get_ixu()); + rszu->addConstant_w_patternSelect(mu, nlp->get_ixu()); buf = rszu->infnorm_local(); nrmInf_bar_complem = fmax(nrmInf_bar_complem, buf); - nlp->log->printf(hovScalars,"NLP resid [update]: inf norm rszu=%22.17e\n", buf); + nlp->log->printf(hovScalars, "NLP resid [update]: inf norm rszu=%22.17e\n", buf); } - //rsvl = \mu e - sdl * vl - if(nlp->m_ineq_low()>0) { + // rsvl = \mu e - sdl * vl + if(nlp->m_ineq_low() > 0) { rsvl->setToZero(); rsvl->axzpy(-1.0, *it.sdl, *it.vl); - if(nlp->m_ineq_low()m_ineq()) { + if(nlp->m_ineq_low() < nlp->m_ineq()) { rsvl->selectPattern(nlp->get_idl()); } buf = rsvl->infnorm_local(); nrmInf_nlp_complem = fmax(nrmInf_nlp_complem, buf); - //add mu - rsvl->addConstant_w_patternSelect(mu,nlp->get_idl()); + // add mu + rsvl->addConstant_w_patternSelect(mu, nlp->get_idl()); buf = rsvl->infnorm_local(); nrmInf_bar_complem = fmax(nrmInf_bar_complem, buf); - nlp->log->printf(hovScalars,"NLP resid [update]: inf norm rsvl=%22.17e\n", buf); + nlp->log->printf(hovScalars, "NLP resid [update]: inf norm rsvl=%22.17e\n", buf); } - //rsvu = \mu e - sdu * vu - if(nlp->m_ineq_upp()>0) { + // rsvu = \mu e - sdu * vu + if(nlp->m_ineq_upp() > 0) { rsvu->setToZero(); rsvu->axzpy(-1.0, *it.sdu, *it.vu); - if(nlp->m_ineq_upp()m_ineq()) { + if(nlp->m_ineq_upp() < nlp->m_ineq()) { rsvu->selectPattern(nlp->get_idu()); } buf = rsvu->infnorm_local(); nrmInf_nlp_complem = fmax(nrmInf_nlp_complem, buf); - //add mu - rsvu->addConstant_w_patternSelect(mu,nlp->get_idu()); + // add mu + rsvu->addConstant_w_patternSelect(mu, nlp->get_idu()); buf = rsvu->infnorm_local(); nrmInf_bar_complem = fmax(nrmInf_bar_complem, buf); - nlp->log->printf(hovScalars,"NLP resid [update]: inf norm rsvu=%22.17e\n", buf); + nlp->log->printf(hovScalars, "NLP resid [update]: inf norm rsvu=%22.17e\n", buf); } #ifdef HIOP_USE_MPI - //here we reduce each of the norm together for a total cost of 1 Allreduce of 3 doubles - //otherwise, if calling infnorm() for each vector, there will be 12 Allreduce's, each of 1 double - double aux[6] = {nrmInf_nlp_optim,nrmInf_nlp_feasib,nrmInf_nlp_complem,nrmInf_bar_optim,nrmInf_bar_feasib,nrmInf_bar_complem}, aux_g[6]; + // here we reduce each of the norm together for a total cost of 1 Allreduce of 3 doubles + // otherwise, if calling infnorm() for each vector, there will be 12 Allreduce's, each of 1 double + double aux[6] = + {nrmInf_nlp_optim, nrmInf_nlp_feasib, nrmInf_nlp_complem, nrmInf_bar_optim, nrmInf_bar_feasib, nrmInf_bar_complem}, + aux_g[6]; int ierr = MPI_Allreduce(aux, aux_g, 6, MPI_DOUBLE, MPI_MAX, nlp->get_comm()); - assert(MPI_SUCCESS==ierr); + assert(MPI_SUCCESS == ierr); nrmInf_nlp_optim = aux_g[0]; nrmInf_nlp_feasib = aux_g[1]; nrmInf_nlp_complem = aux_g[2]; nrmInf_bar_optim = aux_g[3]; nrmInf_bar_feasib = aux_g[4]; - nrmInf_bar_complem = aux_g[5]; + nrmInf_bar_complem = aux_g[5]; #endif nlp->runStats.tmSolverInternal.stop(); - } - -}; +}; // namespace hiop // void hiopResidual:: // projectPrimalsIntoBounds(double kappa1, double kappa2) @@ -649,7 +659,6 @@ void hiopResidual::update_soc(const hiopIterate& it, // yd->setToConstant(v); // } - // void hiopResidual::determineSlacks() // { // sxl->copyFrom(*x); @@ -657,7 +666,7 @@ void hiopResidual::update_soc(const hiopIterate& it, // sxl->selectPattern(nlp->get_ixl()); // sxu->copyFrom(nlp->get_xu()); -// sxu->axpy(-1., *x); +// sxu->axpy(-1., *x); // sxu->selectPattern(nlp->get_ixu()); // sdl->copyFrom(*d); @@ -665,7 +674,7 @@ void hiopResidual::update_soc(const hiopIterate& it, // sdl->selectPattern(nlp->get_idl()); // sdu->copyFrom(nlp->get_du()); -// sdu->axpy(-1., *d); +// sdu->axpy(-1., *d); // sdu->selectPattern(nlp->get_idu()); // #ifdef HIOP_DEEPCHECKS diff --git a/src/Optimization/hiopResidual.hpp b/src/Optimization/hiopResidual.hpp index 57082cae8..4ad02969d 100644 --- a/src/Optimization/hiopResidual.hpp +++ b/src/Optimization/hiopResidual.hpp @@ -65,20 +65,24 @@ class hiopResidual virtual ~hiopResidual(); virtual int update(const hiopIterate& it, - const double& f, const hiopVector& c, const hiopVector& d, - const hiopVector& gradf, const hiopMatrix& jac_c, const hiopMatrix& jac_d, - const hiopLogBarProblem& logbar); - - /// + const double& f, + const hiopVector& c, + const hiopVector& d, + const hiopVector& gradf, + const hiopMatrix& jac_c, + const hiopMatrix& jac_d, + const hiopLogBarProblem& logbar); + + /// /// @pre /** * @brief Evaluate the infeasibilities at the new second-order-correction iterate. - * - * Evaluate the the infeasibilities at the new second-order-correction iterate. Save + * + * Evaluate the the infeasibilities at the new second-order-correction iterate. Save * The method modifies 'this', in particular ryd,ryc, rxl,rxu, rdl, rdu in an attempt - * to reuse storage/buffers, and it updates the cached `nrmXXX` members. + * to reuse storage/buffers, and it updates the cached `nrmXXX` members. * - * @pre unlike method `update`, here vector `c_soc` and `d_soc` is the constraint + * @pre unlike method `update`, here vector `c_soc` and `d_soc` is the constraint * violation including the constraint right-hand-side/slack, i.e., * c_soc = c_rhs - c_body and d_soc = d - d_body * @@ -95,78 +99,72 @@ class hiopResidual /* Return the Nlp and Log-bar errors computed at the previous update call. */ inline void getNlpErrors(double& optim, double& feas, double& comple, double& cons_violation) const - { + { optim = nrmInf_nlp_optim; feas = nrmInf_nlp_feasib; comple = nrmInf_nlp_complem; cons_violation = nrmInf_cons_violation; }; inline void getBarrierErrors(double& optim, double& feas, double& comple) const - { + { optim = nrmInf_bar_optim; feas = nrmInf_bar_feasib; comple = nrmInf_bar_complem; }; /* get the previously computed Infeasibility */ - inline double getInfeasInfNorm() const { - return nrmInf_nlp_feasib; - } + inline double getInfeasInfNorm() const { return nrmInf_nlp_feasib; } /* get the previously computed Infeasibility */ - inline double get_theta() const { - return nrmOne_nlp_feasib; - } + inline double get_theta() const { return nrmOne_nlp_feasib; } /* evaluate the Infeasibility at the new iterate, which has eq and ineq functions * computed in c_eval and d_eval, respectively. * The method modifies 'this', in particular ryd,ryc, rxl,rxu, rdl, rdu in an attempt - * to reuse storage/buffers, but does not update the cached nrmInf_XXX members. + * to reuse storage/buffers, but does not update the cached nrmInf_XXX members. * It computes and returns the one norm of [ryc ryd] */ - double compute_nlp_infeasib_onenorm (const hiopIterate& iter, - const hiopVector& c_eval, - const hiopVector& d_eval); + double compute_nlp_infeasib_onenorm(const hiopIterate& iter, const hiopVector& c_eval, const hiopVector& d_eval); /* residual printing function - calls hiopVector::print * prints up to max_elems (by default all), on rank 'rank' (by default on all) */ - virtual void print(FILE*, const char* msg=NULL, int max_elems=-1, int rank=-1) const; - + virtual void print(FILE*, const char* msg = NULL, int max_elems = -1, int rank = -1) const; + /* accessors */ - inline hiopVector* get_rx() const {return rx;} - inline hiopVector* get_rd() const {return rd;} - inline hiopVector* get_rxl() const {return rxl;} - inline hiopVector* get_rxu() const {return rxu;} - inline hiopVector* get_rdl() const {return rdl;} - inline hiopVector* get_rdu() const {return rdu;} - inline hiopVector* get_ryc() const {return ryc;} - inline hiopVector* get_ryd() const {return ryd;} - inline hiopVector* get_rszl() const {return rszl;} - inline hiopVector* get_rszu() const {return rszu;} - inline hiopVector* get_rsvl() const {return rsvl;} - inline hiopVector* get_rsvu() const {return rsvu;} - - inline double get_nrmInf_nlp_optim() const {return nrmInf_nlp_optim;} - inline double get_nrmInf_nlp_feasib() const {return nrmInf_nlp_feasib;} - inline double get_nrmInf_nlp_complem() const {return nrmInf_nlp_complem;} - inline double get_nrmInf_bar_optim() const {return nrmInf_bar_optim;} - inline double get_nrmInf_bar_feasib() const {return nrmInf_bar_feasib;} - inline double get_nrmInf_bar_complem() const {return nrmInf_bar_complem;} - inline double get_nrmOne_nlp_feasib() const {return nrmOne_nlp_feasib;} - inline double get_nrmOne_bar_feasib() const {return nrmOne_bar_feasib;} - inline double get_nrmOne_nlp_optim() const {return nrmOne_nlp_optim;} - inline double get_nrmOne_bar_optim() const {return nrmOne_bar_optim;} - inline hiopNlpFormulation * get_nlp() const {return nlp;} - + inline hiopVector* get_rx() const { return rx; } + inline hiopVector* get_rd() const { return rd; } + inline hiopVector* get_rxl() const { return rxl; } + inline hiopVector* get_rxu() const { return rxu; } + inline hiopVector* get_rdl() const { return rdl; } + inline hiopVector* get_rdu() const { return rdu; } + inline hiopVector* get_ryc() const { return ryc; } + inline hiopVector* get_ryd() const { return ryd; } + inline hiopVector* get_rszl() const { return rszl; } + inline hiopVector* get_rszu() const { return rszu; } + inline hiopVector* get_rsvl() const { return rsvl; } + inline hiopVector* get_rsvu() const { return rsvu; } + + inline double get_nrmInf_nlp_optim() const { return nrmInf_nlp_optim; } + inline double get_nrmInf_nlp_feasib() const { return nrmInf_nlp_feasib; } + inline double get_nrmInf_nlp_complem() const { return nrmInf_nlp_complem; } + inline double get_nrmInf_bar_optim() const { return nrmInf_bar_optim; } + inline double get_nrmInf_bar_feasib() const { return nrmInf_bar_feasib; } + inline double get_nrmInf_bar_complem() const { return nrmInf_bar_complem; } + inline double get_nrmOne_nlp_feasib() const { return nrmOne_nlp_feasib; } + inline double get_nrmOne_bar_feasib() const { return nrmOne_bar_feasib; } + inline double get_nrmOne_nlp_optim() const { return nrmOne_nlp_optim; } + inline double get_nrmOne_bar_optim() const { return nrmOne_bar_optim; } + inline hiopNlpFormulation* get_nlp() const { return nlp; } + void copyFrom(const hiopResidual& resid_src); private: - hiopVector*rx; // -\grad f - J_c^t y_c - J_d^t y_d + z_l - z_u - hiopVector*rd; // y_d + v_l - v_u - hiopVector*rxl,*rxu; // x - sxl-xl, -x-sxu+xu - hiopVector*rdl,*rdu; // as above but for d + hiopVector* rx; // -\grad f - J_c^t y_c - J_d^t y_d + z_l - z_u + hiopVector* rd; // y_d + v_l - v_u + hiopVector *rxl, *rxu; // x - sxl-xl, -x-sxu+xu + hiopVector *rdl, *rdu; // as above but for d - hiopVector*ryc; // -c(x) (c(x)=0!//!) - hiopVector*ryd; //for d- d(x) + hiopVector* ryc; // -c(x) (c(x)=0!//!) + hiopVector* ryd; // for d- d(x) - hiopVector*rszl,*rszu; // \mu e-sxl zl, \mu e - sxu zu - hiopVector*rsvl,*rsvu; // \mu e-sdl vl, \mu e - sdu vu + hiopVector *rszl, *rszu; // \mu e-sxl zl, \mu e - sxu zu + hiopVector *rsvl, *rsvu; // \mu e-sdl vl, \mu e - sdu vu /** storage for the norm of [rx,rd], [rxl,...,rdu,ryc,ryd], and [rszl,...,rsvu] * for the nlp (\mu=0) @@ -178,21 +176,22 @@ class hiopResidual double nrmInf_bar_optim, nrmInf_bar_feasib, nrmInf_bar_complem; /** storage for the one norm of [ryc,ryd]. This is the one norm of constraint violations. - */ + */ double nrmOne_nlp_feasib; double nrmOne_bar_feasib; double nrmOne_nlp_optim; double nrmOne_bar_optim; - + /** inf norm of constraint violation */ double nrmInf_cons_violation; // and associated info from problem formulation - hiopNlpFormulation * nlp; + hiopNlpFormulation* nlp; + private: hiopResidual() {}; hiopResidual(const hiopResidual&) {}; - hiopResidual& operator=(const hiopResidual& o) {return *this;}; + hiopResidual& operator=(const hiopResidual& o) { return *this; }; friend class hiopKKTLinSysFull; friend class hiopKKTLinSysCompressed; friend class hiopKKTLinSysCompressedXYcYd; @@ -203,5 +202,5 @@ class hiopResidual friend class hiopVectorCompoundPD; }; -} +} // namespace hiop #endif diff --git a/src/Utils/MathKernelsCuda.hpp b/src/Utils/MathKernelsCuda.hpp index f5b9be9c7..3777c1540 100644 --- a/src/Utils/MathKernelsCuda.hpp +++ b/src/Utils/MathKernelsCuda.hpp @@ -60,22 +60,22 @@ namespace hiop { namespace cuda { - // Generates uniformly distributed double-precision floating-point values, from minv to maxv - int array_random_uniform_kernel(int n, double* d_array, double minv, double maxv); +// Generates uniformly distributed double-precision floating-point values, from minv to maxv +int array_random_uniform_kernel(int n, double* d_array, double minv, double maxv); - // Generates uniformly distributed double-precision floating-point values, from 0.0 to 1.0 - int array_random_uniform_kernel(int n, double* d_array); +// Generates uniformly distributed double-precision floating-point values, from 0.0 to 1.0 +int array_random_uniform_kernel(int n, double* d_array); - // set all elements to `val' - void set_to_val_kernel(int n, double* values, double val); +// set all elements to `val' +void set_to_val_kernel(int n, double* values, double val); - /// set dest[mapping[i]] = src[i]; - void copy_src_to_mapped_dest_kernel(int n, const double* src, double* dest, const int* mapping); +/// set dest[mapping[i]] = src[i]; +void copy_src_to_mapped_dest_kernel(int n, const double* src, double* dest, const int* mapping); - /// set dest[i] = src[mapping[i]]; - void copy_mapped_src_to_dest_kernel(int n, const double* src, double* dest, const int* mapping); +/// set dest[i] = src[mapping[i]]; +void copy_mapped_src_to_dest_kernel(int n, const double* src, double* dest, const int* mapping); -} //end of namespace device -} //end of namespace hiop +} // namespace cuda +} // end of namespace hiop #endif diff --git a/src/Utils/MathKernelsHip.cpp b/src/Utils/MathKernelsHip.cpp index ba9a86f74..b3ef50f3a 100644 --- a/src/Utils/MathKernelsHip.cpp +++ b/src/Utils/MathKernelsHip.cpp @@ -61,51 +61,46 @@ #include #include "hiopCppStdUtils.hpp" -__global__ -void array_random_uniform_hip(int n, double* d_array, unsigned long seed, double minv, double maxv) +__global__ void array_random_uniform_hip(int n, double* d_array, unsigned long seed, double minv, double maxv) { - const int num_threads = blockDim.x * gridDim.x; - const int tid = blockIdx.x * blockDim.x + threadIdx.x; - const double delta = maxv - minv; - hiprandState state; - hiprand_init(seed, tid, 0, &state); - for (int i = tid; i < n; i += num_threads) { - const double ranv = hiprand_uniform_double( &state ); // from 0 to 1 - d_array[i] = ranv * delta + minv; - } + const int num_threads = blockDim.x * gridDim.x; + const int tid = blockIdx.x * blockDim.x + threadIdx.x; + const double delta = maxv - minv; + hiprandState state; + hiprand_init(seed, tid, 0, &state); + for(int i = tid; i < n; i += num_threads) { + const double ranv = hiprand_uniform_double(&state); // from 0 to 1 + d_array[i] = ranv * delta + minv; + } } -__global__ void set_to_constant_hip(int n, double *vec, double val) +__global__ void set_to_constant_hip(int n, double* vec, double val) { - const int num_threads = blockDim.x * gridDim.x; - const int tid = blockIdx.x * blockDim.x + threadIdx.x; - for (int i = tid; i < n; i += num_threads) { - vec[i] = val; + const int tid = blockIdx.x * blockDim.x + threadIdx.x; + for(int i = tid; i < n; i += num_threads) { + vec[i] = val; } } __global__ void copy_to_mapped_dest_hip(int n, const double* src, double* dest, const int* mapping) { - const int num_threads = blockDim.x * gridDim.x; - const int tid = blockIdx.x * blockDim.x + threadIdx.x; - for (int i = tid; i < n; i += num_threads) { - dest[mapping[i]] = src[i]; + const int tid = blockIdx.x * blockDim.x + threadIdx.x; + for(int i = tid; i < n; i += num_threads) { + dest[mapping[i]] = src[i]; } } __global__ void copy_from_mapped_src_hip(int n, const double* src, double* dest, const int* mapping) { - const int num_threads = blockDim.x * gridDim.x; - const int tid = blockIdx.x * blockDim.x + threadIdx.x; - for (int i = tid; i < n; i += num_threads) { - dest[i] = src[mapping[i]]; + const int tid = blockIdx.x * blockDim.x + threadIdx.x; + for(int i = tid; i < n; i += num_threads) { + dest[i] = src[mapping[i]]; } } - namespace hiop { namespace hip @@ -113,24 +108,24 @@ namespace hip int array_random_uniform_kernel(int n, double* d_array, double minv, double maxv) { - int block_size=256; - int grid_size = (n+block_size-1)/block_size; - + int block_size = 256; + int grid_size = (n + block_size - 1) / block_size; + unsigned long seed = generate_seed(); - array_random_uniform_hip<<>>(n, d_array, seed, minv, maxv); + array_random_uniform_hip<<>>(n, d_array, seed, minv, maxv); hipDeviceSynchronize(); return 1; } int array_random_uniform_kernel(int n, double* d_array) -{ +{ unsigned long seed = generate_seed(); hiprandGenerator_t generator; hiprandCreateGenerator(&generator, HIPRAND_RNG_PSEUDO_DEFAULT); hiprandSetPseudoRandomGeneratorSeed(generator, seed); - + // generate random val from 0 to 1 hiprandGenerateUniformDouble(generator, d_array, n); @@ -141,25 +136,24 @@ int array_random_uniform_kernel(int n, double* d_array) void set_to_val_kernel(int n, double* values, double val) { - int block_size=256; - int num_blocks = (n+block_size-1)/block_size; - set_to_constant_hip<<>>(n, values, val); + int block_size = 256; + int num_blocks = (n + block_size - 1) / block_size; + set_to_constant_hip<<>>(n, values, val); } void copy_src_to_mapped_dest_kernel(int n, const double* src, double* dest, const int* mapping) { - int block_size=256; - int num_blocks = (n+block_size-1)/block_size; - copy_to_mapped_dest_hip<<>>(n, src, dest, mapping); + int block_size = 256; + int num_blocks = (n + block_size - 1) / block_size; + copy_to_mapped_dest_hip<<>>(n, src, dest, mapping); } void copy_mapped_src_to_dest_kernel(int n, const double* src, double* dest, const int* mapping) { - int block_size=256; - int num_blocks = (n+block_size-1)/block_size; - copy_from_mapped_src_hip<<>>(n, src, dest, mapping); + int block_size = 256; + int num_blocks = (n + block_size - 1) / block_size; + copy_from_mapped_src_hip<<>>(n, src, dest, mapping); } -} //end of namespace hip -} //end of namespace hiop - +} // end of namespace hip +} // end of namespace hiop diff --git a/src/Utils/MathKernelsHip.hpp b/src/Utils/MathKernelsHip.hpp index 411135806..4c298951a 100644 --- a/src/Utils/MathKernelsHip.hpp +++ b/src/Utils/MathKernelsHip.hpp @@ -60,22 +60,22 @@ namespace hiop { namespace hip { - // Generates uniformly distributed double-precision floating-point values, from minv to maxv - int array_random_uniform_kernel(int n, double* d_array, double minv, double maxv); +// Generates uniformly distributed double-precision floating-point values, from minv to maxv +int array_random_uniform_kernel(int n, double* d_array, double minv, double maxv); - // Generates uniformly distributed double-precision floating-point values, from 0.0 to 1.0 - int array_random_uniform_kernel(int n, double* d_array); +// Generates uniformly distributed double-precision floating-point values, from 0.0 to 1.0 +int array_random_uniform_kernel(int n, double* d_array); - // set all elements to `val' - void set_to_val_kernel(int n, double* values, double val); +// set all elements to `val' +void set_to_val_kernel(int n, double* values, double val); - /// set dest[mapping[i]] = src[i]; - void copy_src_to_mapped_dest_kernel(int n, const double* src, double* dest, const int* mapping); +/// set dest[mapping[i]] = src[i]; +void copy_src_to_mapped_dest_kernel(int n, const double* src, double* dest, const int* mapping); - /// set dest[i] = src[mapping[i]]; - void copy_mapped_src_to_dest_kernel(int n, const double* src, double* dest, const int* mapping); +/// set dest[i] = src[mapping[i]]; +void copy_mapped_src_to_dest_kernel(int n, const double* src, double* dest, const int* mapping); -} //end of namespace device -} //end of namespace hiop +} // namespace hip +} // end of namespace hiop #endif diff --git a/src/Utils/MathKernelsHost.cpp b/src/Utils/MathKernelsHost.cpp index fd68b1f00..d693a7958 100644 --- a/src/Utils/MathKernelsHost.cpp +++ b/src/Utils/MathKernelsHost.cpp @@ -68,12 +68,12 @@ namespace host int array_random_uniform_kernel(int n, double* d_array, double minv, double maxv) { - std::uniform_real_distribution unif(minv,maxv); + std::uniform_real_distribution unif(minv, maxv); std::default_random_engine re; re.seed(generate_seed()); - for(auto i=0; i, LLNL * */ @@ -63,7 +63,8 @@ #include #endif -namespace hiop { +namespace hiop +{ std::string get_umpire_mem_address_info(void* address) { @@ -73,7 +74,7 @@ std::string get_umpire_mem_address_info(void* address) auto found_allocator = rm.getAllocator(address); std::stringstream ss; ss << "Allocated on '" << found_allocator.getName() << "' "; - ss << "Platform " << static_cast(found_allocator.getPlatform()) << " "; + ss << "Platform " << static_cast(found_allocator.getPlatform()) << " "; ss << "Size [" << found_allocator.getSize(address) << "]"; return ss.str(); } else { @@ -84,6 +85,4 @@ std::string get_umpire_mem_address_info(void* address) #endif } - - -} //end namespace +} // namespace hiop diff --git a/src/Utils/RajaUmpireUtils.hpp b/src/Utils/RajaUmpireUtils.hpp index f4bc32feb..35970ccca 100644 --- a/src/Utils/RajaUmpireUtils.hpp +++ b/src/Utils/RajaUmpireUtils.hpp @@ -3,52 +3,52 @@ // Written by Cosmin G. Petra, petra1@llnl.gov. // LLNL-CODE-742473. All rights reserved. // -// This file is part of HiOp. For details, see https://github.com/LLNL/hiop. HiOp -// is released under the BSD 3-clause license (https://opensource.org/licenses/BSD-3-Clause). +// This file is part of HiOp. For details, see https://github.com/LLNL/hiop. HiOp +// is released under the BSD 3-clause license (https://opensource.org/licenses/BSD-3-Clause). // Please also read "Additional BSD Notice" below. // -// Redistribution and use in source and binary forms, with or without modification, +// Redistribution and use in source and binary forms, with or without modification, // are permitted provided that the following conditions are met: -// i. Redistributions of source code must retain the above copyright notice, this list +// i. Redistributions of source code must retain the above copyright notice, this list // of conditions and the disclaimer below. -// ii. Redistributions in binary form must reproduce the above copyright notice, -// this list of conditions and the disclaimer (as noted below) in the documentation and/or +// ii. Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the disclaimer (as noted below) in the documentation and/or // other materials provided with the distribution. -// iii. Neither the name of the LLNS/LLNL nor the names of its contributors may be used to -// endorse or promote products derived from this software without specific prior written +// iii. Neither the name of the LLNS/LLNL nor the names of its contributors may be used to +// endorse or promote products derived from this software without specific prior written // permission. // -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES -// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT -// SHALL LAWRENCE LIVERMORE NATIONAL SECURITY, LLC, THE U.S. DEPARTMENT OF ENERGY OR -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS -// OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED -// AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT +// SHALL LAWRENCE LIVERMORE NATIONAL SECURITY, LLC, THE U.S. DEPARTMENT OF ENERGY OR +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +// OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED +// AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, // EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Additional BSD Notice -// 1. This notice is required to be provided under our contract with the U.S. Department -// of Energy (DOE). This work was produced at Lawrence Livermore National Laboratory under +// 1. This notice is required to be provided under our contract with the U.S. Department +// of Energy (DOE). This work was produced at Lawrence Livermore National Laboratory under // Contract No. DE-AC52-07NA27344 with the DOE. -// 2. Neither the United States Government nor Lawrence Livermore National Security, LLC -// nor any of their employees, makes any warranty, express or implied, or assumes any -// liability or responsibility for the accuracy, completeness, or usefulness of any +// 2. Neither the United States Government nor Lawrence Livermore National Security, LLC +// nor any of their employees, makes any warranty, express or implied, or assumes any +// liability or responsibility for the accuracy, completeness, or usefulness of any // information, apparatus, product, or process disclosed, or represents that its use would // not infringe privately-owned rights. -// 3. Also, reference herein to any specific commercial products, process, or services by -// trade name, trademark, manufacturer or otherwise does not necessarily constitute or -// imply its endorsement, recommendation, or favoring by the United States Government or -// Lawrence Livermore National Security, LLC. The views and opinions of authors expressed -// herein do not necessarily state or reflect those of the United States Government or -// Lawrence Livermore National Security, LLC, and shall not be used for advertising or +// 3. Also, reference herein to any specific commercial products, process, or services by +// trade name, trademark, manufacturer or otherwise does not necessarily constitute or +// imply its endorsement, recommendation, or favoring by the United States Government or +// Lawrence Livermore National Security, LLC. The views and opinions of authors expressed +// herein do not necessarily state or reflect those of the United States Government or +// Lawrence Livermore National Security, LLC, and shall not be used for advertising or // product endorsement purposes. /** * @file RajaUmpireUtils.hpp - * + * * @author Cosmin G. Petra , LLNL * */ @@ -57,17 +57,18 @@ #define HIOP_RAJA_UMPIRE_UTILS #include -#include +#include -namespace hiop { - /** - * Returns a string with memory space Umpire keeps about the address passed as the argument, - * "__not_managed_by_umpire__" if Umpire does not have a record of the address, or - * "__info_not_available__HiOp_not_built_with_Umpire__" if HiOp was not built with Umpire. - * - * The function should be used for debugging purposes only. - */ - std::string get_umpire_mem_address_info(void* address); +namespace hiop +{ +/** + * Returns a string with memory space Umpire keeps about the address passed as the argument, + * "__not_managed_by_umpire__" if Umpire does not have a record of the address, or + * "__info_not_available__HiOp_not_built_with_Umpire__" if HiOp was not built with Umpire. + * + * The function should be used for debugging purposes only. + */ +std::string get_umpire_mem_address_info(void* address); -} // namespace hiop -#endif // HIOP_RAJA_UMPIRE_UTILS +} // namespace hiop +#endif // HIOP_RAJA_UMPIRE_UTILS diff --git a/src/Utils/SidreHelper.hpp b/src/Utils/SidreHelper.hpp index 69bca66bf..ac485fd85 100644 --- a/src/Utils/SidreHelper.hpp +++ b/src/Utils/SidreHelper.hpp @@ -69,25 +69,25 @@ namespace hiop { /** * @brief Holder of functionality needed by HiOp for checkpointing based on axom::sidre - */ + */ class SidreHelper { public: /** - * @brief Copy raw array to sidre::View within specified sidre::Group. - * + * @brief Copy raw array to sidre::View within specified sidre::Group. + * * @params group contains the view where the copy should be made to. - * @params view_name is the name of the view where to copy + * @params view_name is the name of the view where to copy * @params arr_src is the source double array * @params size is the number of elements of the array * * @exception std::runtime indicates the group contains a view with a number of elements * different than expected size. - * - * @details A view with the specified name will be created if does not already exist. If + * + * @details A view with the specified name will be created if does not already exist. If * exists, the view should have the same number of elements as the argument `size`. */ - + static void copy_array_to_view(::axom::sidre::Group& group, const ::std::string& view_name, const double* arr_src, @@ -96,78 +96,75 @@ class SidreHelper auto view = get_or_create_view(group, view_name, size); if(view->getNumElements() != size) { ::std::stringstream ss; - ss << "Size mismatch between HiOp state and existing sidre::view '" << view_name << - "' when copying to view. HiOp state has " << size << " doubles, while the view " << - "has " << view->getNumElements() << " double elements.\n"; + ss << "Size mismatch between HiOp state and existing sidre::view '" << view_name + << "' when copying to view. HiOp state has " << size << " doubles, while the view " << "has " + << view->getNumElements() << " double elements.\n"; throw ::std::runtime_error(ss.str()); } const auto stride(view->getStride()); - double *const arr_dest(view->getArray()); - if(1==stride) { - ::std::copy(arr_src, arr_src+size, arr_dest); + double* const arr_dest(view->getArray()); + if(1 == stride) { + ::std::copy(arr_src, arr_src + size, arr_dest); } else { - for(::axom::sidre::IndexType i=0; igetNumElements() != size) { ::std::stringstream ss; - ss << "Size mismatch between HiOp state and sidre::View '" << view_name << - "' when copying from the view. HiOp state is " << size << " doubles, "<< - "while the view has " << view_const->getNumElements() << " double elements.\n"; + ss << "Size mismatch between HiOp state and sidre::View '" << view_name + << "' when copying from the view. HiOp state is " << size << " doubles, " << "while the view has " + << view_const->getNumElements() << " double elements.\n"; throw ::std::runtime_error(ss.str()); } // const_cast becase View does not have a const getArray() auto view = const_cast<::axom::sidre::View*>(view_const); assert(view); - const double *const arr_src = view->getArray(); + const double* const arr_src = view->getArray(); const auto stride(view->getStride()); - if(1==stride) { - ::std::copy(arr_src, arr_src+size, arr_dest); + if(1 == stride) { + ::std::copy(arr_src, arr_src + size, arr_dest); } else { - for(hiop::index_type i=0; i will create/overwrite kkt_linsys_counter.iajaaa file and will write - * the matrix passed as argument - * 2. writeRhsToFile -> will append the rhs - * 3. writeSolToFile -> will append the sol - * - * The format of .iajaaa files is described in src/LinAlg/csr_iajaaa.md - */ - class hiopCSR_IO { - public: - // masterrank=-1 means all ranks save - hiopCSR_IO(hiopNlpFormulation* nlp, int masterrank=0) +/** + * @brief This class saves a dense or other matrices in the CSR format. The implementation + * assumes the following order of calls + * 1. writeMatToFile -> will create/overwrite kkt_linsys_counter.iajaaa file and will write + * the matrix passed as argument + * 2. writeRhsToFile -> will append the rhs + * 3. writeSolToFile -> will append the sol + * + * The format of .iajaaa files is described in src/LinAlg/csr_iajaaa.md + */ +class hiopCSR_IO +{ +public: + // masterrank=-1 means all ranks save + hiopCSR_IO(hiopNlpFormulation* nlp, int masterrank = 0) : _nlp(nlp), #ifdef HIOP_USE_MPI _master_rank(masterrank), #endif m(-1), last_counter(-1) - { - } + {} - virtual ~hiopCSR_IO() - { - } + virtual ~hiopCSR_IO() {} - /** - * @brief Appends a right-hand side vector to the .iajaaa file - * - * @param rhs is the right-hand side vector to be written - * @param counter specifies the suffix in the filename, usually is the iteration number - */ - void writeRhsToFile(const hiopVector& rhs, const int& counter) - { + /** + * @brief Appends a right-hand side vector to the .iajaaa file + * + * @param rhs is the right-hand side vector to be written + * @param counter specifies the suffix in the filename, usually is the iteration number + */ + void writeRhsToFile(const hiopVector& rhs, const int& counter) + { #ifdef HIOP_USE_MPI - if(_master_rank>=0 && _master_rank != _nlp->get_rank()) return; + if(_master_rank >= 0 && _master_rank != _nlp->get_rank()) return; #endif - assert(counter == last_counter); - assert(m == rhs.get_size()); - - std::string fname = "kkt_linsys_"; - fname += std::to_string(counter); - fname += ".iajaaa"; - FILE* f = fopen(fname.c_str(), "a+"); - if(NULL==f) { - _nlp->log->printf(hovError, "Could not open '%s' for writing the rhs/sol.\n", fname.c_str()); - return; - } + assert(counter == last_counter); + assert(m == rhs.get_size()); - const double* v = rhs.local_data_const(); - for(int i=0; ilog->printf(hovError, "Could not open '%s' for writing the rhs/sol.\n", fname.c_str()); + return; } - /** - * @brief Appends a solution vector to the .iajaaa file - * - * @param sol is the solution vector to be written - * @param counter specifies the suffix in the filename, usually is the iteration number - */ + const double* v = rhs.local_data_const(); + for(int i = 0; i < m; i++) fprintf(f, "%.20f ", v[i]); + fprintf(f, "\n"); + fclose(f); + } - inline void writeSolToFile(const hiopVector& sol, const int& counter) - { - writeRhsToFile(sol, counter); - } + /** + * @brief Appends a solution vector to the .iajaaa file + * + * @param sol is the solution vector to be written + * @param counter specifies the suffix in the filename, usually is the iteration number + */ - /** - * @brief Writes a dense matrix in the sparse iajaaa format (zero elements are not written) - * - * @param Msys is the matrix to be written - * @param counter specifies the suffix in the filename, usually is the iteration number - * @param nx specifies the number of primal variables - * @param meq specifies the number of equality constraints - * @param mineq specifies the number of inequality constraints - */ - void writeMatToFile(hiopMatrixDense& Msys, - const int& counter, - const int& nx, - const int& meq, - const int& mineq) - { + inline void writeSolToFile(const hiopVector& sol, const int& counter) { writeRhsToFile(sol, counter); } + + /** + * @brief Writes a dense matrix in the sparse iajaaa format (zero elements are not written) + * + * @param Msys is the matrix to be written + * @param counter specifies the suffix in the filename, usually is the iteration number + * @param nx specifies the number of primal variables + * @param meq specifies the number of equality constraints + * @param mineq specifies the number of inequality constraints + */ + void writeMatToFile(hiopMatrixDense& Msys, const int& counter, const int& nx, const int& meq, const int& mineq) + { #ifdef HIOP_USE_MPI - if(_master_rank>=0 && _master_rank != _nlp->get_rank()) return; + if(_master_rank >= 0 && _master_rank != _nlp->get_rank()) return; #endif - last_counter = counter; - m = Msys.m(); - - std::string fname = "kkt_linsys_"; - fname += std::to_string(counter); - fname += ".iajaaa"; - FILE* f = fopen(fname.c_str(), "w+"); - if(NULL==f) { - _nlp->log->printf(hovError, "Could not open '%s' for writing the linsys.\n", fname.c_str()); - return; - } - - //count nnz - const double zero_tol = 1e-25; - int nnz=0; - double* M = Msys.local_data(); - for(int i=0; izero_tol) { - nnz++; - } + last_counter = counter; + m = Msys.m(); + + std::string fname = "kkt_linsys_"; + fname += std::to_string(counter); + fname += ".iajaaa"; + FILE* f = fopen(fname.c_str(), "w+"); + if(NULL == f) { + _nlp->log->printf(hovError, "Could not open '%s' for writing the linsys.\n", fname.c_str()); + return; + } + + // count nnz + const double zero_tol = 1e-25; + int nnz = 0; + double* M = Msys.local_data(); + for(int i = 0; i < m; i++) { + for(int j = i; j < m; j++) { + if(fabs(M[i * m + j]) > zero_tol) { + nnz++; } } - - //start writing -> indexes are starting at 1 - fprintf(f, "%d\n%d\n%d\n%d\n%d\n", m, nx, meq, mineq, nnz); - - //array of pointers/offsets in of the first nonzero of each row; first entry is 1 and the last entry is nnz+1 - int offset = 1; + } + + // start writing -> indexes are starting at 1 + fprintf(f, "%d\n%d\n%d\n%d\n%d\n", m, nx, meq, mineq, nnz); + + // array of pointers/offsets in of the first nonzero of each row; first entry is 1 and the last entry is nnz+1 + int offset = 1; + fprintf(f, "%d ", offset); + for(int i = 0; i < m; i++) { + for(int j = i; j < m; j++) + if(fabs(M[i * m + j]) > zero_tol) offset++; + fprintf(f, "%d ", offset); - for(int i=0; izero_tol) - offset++; - - fprintf(f, "%d ", offset); - } - assert(offset == nnz+1); - fprintf(f, "\n"); - - //array of the column indexes of nonzeros - for(int i=0; izero_tol) - fprintf(f, "%d ", j+1); } - fprintf(f, "\n"); - - //array of nonzero entries of the matrix - for(int i=0; izero_tol) - fprintf(f, "%.20f ", M[i*m+j]); - } - fprintf(f, "\n"); - - fclose(f); + assert(offset == nnz + 1); + fprintf(f, "\n"); + + // array of the column indexes of nonzeros + for(int i = 0; i < m; i++) { + for(int j = i; j < m; j++) + if(fabs(M[i * m + j]) > zero_tol) fprintf(f, "%d ", j + 1); } - - /** - * @brief Writes a dense matrix in the sparse iajaaa format (zero elements are not written) - * - * @param Msys is the matrix to be written - * @param counter specifies the suffix in the filename, usually is the iteration number - * @param nx specifies the number of primal variables - * @param meq specifies the number of equality constraints - * @param mineq specifies the number of inequality constraints - */ - void writeMatToFile(hiopMatrixSparseTriplet& Msys, - const int& counter, - const int& nx, - const int& meq, - const int& mineq) - { + fprintf(f, "\n"); + + // array of nonzero entries of the matrix + for(int i = 0; i < m; i++) { + for(int j = i; j < m; j++) + if(fabs(M[i * m + j]) > zero_tol) fprintf(f, "%.20f ", M[i * m + j]); + } + fprintf(f, "\n"); + + fclose(f); + } + + /** + * @brief Writes a dense matrix in the sparse iajaaa format (zero elements are not written) + * + * @param Msys is the matrix to be written + * @param counter specifies the suffix in the filename, usually is the iteration number + * @param nx specifies the number of primal variables + * @param meq specifies the number of equality constraints + * @param mineq specifies the number of inequality constraints + */ + void writeMatToFile(hiopMatrixSparseTriplet& Msys, const int& counter, const int& nx, const int& meq, const int& mineq) + { #ifdef HIOP_USE_MPI - if(_master_rank>=0 && _master_rank != _nlp->get_rank()) return; + if(_master_rank >= 0 && _master_rank != _nlp->get_rank()) return; #endif - last_counter = counter; - m = Msys.m(); - - std::string fname = "kkt_linsys_"; - fname += std::to_string(counter); - fname += ".iajaaa"; - FILE* f = fopen(fname.c_str(), "w+"); - if(NULL==f) { - _nlp->log->printf(hovError, "Could not open '%s' for writing the linsys.\n", fname.c_str()); - return; - } + last_counter = counter; + m = Msys.m(); - int csr_nnz; - int *csr_kRowPtr{nullptr}, *csr_jCol{nullptr}, *index_covert_CSR2Triplet{nullptr}, *index_covert_extra_Diag2CSR{nullptr}; - double *csr_kVal{nullptr}; - std::unordered_map extra_diag_nnz_map; - - Msys.convert_to_csr_arrays(csr_nnz, &csr_kRowPtr, &csr_jCol, &csr_kVal, &index_covert_CSR2Triplet, &index_covert_extra_Diag2CSR, extra_diag_nnz_map); - - if(index_covert_CSR2Triplet) { - delete [] index_covert_CSR2Triplet; - index_covert_CSR2Triplet = nullptr; - } - if(index_covert_extra_Diag2CSR) { - delete [] index_covert_extra_Diag2CSR; - index_covert_extra_Diag2CSR = nullptr; - } - - //start writing -> indexes are starting at 1 - fprintf(f, "%d\n%d\n%d\n%d\n%d\n", m, nx, meq, mineq, csr_nnz); - - //array of pointers/offsets in of the first nonzero of each row; first entry is 1 and the last entry is nnz+1 - for(int i=0; ilog->printf(hovError, "Could not open '%s' for writing the linsys.\n", fname.c_str()); + return; + } + + int csr_nnz; + int *csr_kRowPtr{nullptr}, *csr_jCol{nullptr}, *index_covert_CSR2Triplet{nullptr}, *index_covert_extra_Diag2CSR{nullptr}; + double* csr_kVal{nullptr}; + std::unordered_map extra_diag_nnz_map; + + Msys.convert_to_csr_arrays(csr_nnz, + &csr_kRowPtr, + &csr_jCol, + &csr_kVal, + &index_covert_CSR2Triplet, + &index_covert_extra_Diag2CSR, + extra_diag_nnz_map); + + if(index_covert_CSR2Triplet) { + delete[] index_covert_CSR2Triplet; + index_covert_CSR2Triplet = nullptr; + } + if(index_covert_extra_Diag2CSR) { + delete[] index_covert_extra_Diag2CSR; + index_covert_extra_Diag2CSR = nullptr; } - - private: - hiopNlpFormulation* _nlp; + + // start writing -> indexes are starting at 1 + fprintf(f, "%d\n%d\n%d\n%d\n%d\n", m, nx, meq, mineq, csr_nnz); + + // array of pointers/offsets in of the first nonzero of each row; first entry is 1 and the last entry is nnz+1 + for(int i = 0; i < m + 1; i++) { + fprintf(f, "%d ", csr_kRowPtr[i] + 1); + } + assert(csr_kRowPtr[m] == csr_nnz); + fprintf(f, "\n"); + + // array of the column indexes of nonzeros + for(int i = 0; i < csr_nnz; i++) { + fprintf(f, "%d ", csr_jCol[i] + 1); + } + fprintf(f, "\n"); + + // array of nonzero entries of the matrix + for(int i = 0; i < csr_nnz; i++) { + fprintf(f, "%.20f ", csr_kVal[i]); + } + fprintf(f, "\n"); + + fclose(f); + + if(csr_kRowPtr) { + delete[] csr_kRowPtr; + csr_kRowPtr = nullptr; + } + if(csr_jCol) { + delete[] csr_jCol; + csr_jCol = nullptr; + } + if(csr_kVal) { + delete[] csr_kVal; + csr_kVal = nullptr; + } + } + +private: + hiopNlpFormulation* _nlp; #ifdef HIOP_USE_MPI - int _master_rank; + int _master_rank; #endif - int m; - int last_counter; //used only for consistency (such as order of calls) checks - }; -} // end namespace - + int m; + int last_counter; // used only for consistency (such as order of calls) checks +}; +} // namespace hiop #endif diff --git a/src/Utils/hiopCppStdUtils.hpp b/src/Utils/hiopCppStdUtils.hpp index 4147cc6bb..a2642ea36 100644 --- a/src/Utils/hiopCppStdUtils.hpp +++ b/src/Utils/hiopCppStdUtils.hpp @@ -14,90 +14,88 @@ #include #include -namespace hiop { - template inline void printvec(const std::vector& v, const std::string& msg="") - { - std::cout.precision(6); - std::cout << msg << " size:" << v.size() << std::endl; - std::cout << std::scientific; - typename std::vector::const_iterator it=v.begin(); - for(;it!=v.end(); ++it) std::cout << (*it) << " "; - std::cout << std::endl; - } - - template inline void printlist(const std::list& v, const std::string& msg="") - { - std::cout.precision(6); - std::cout << msg << " size:" << v.size() << std::endl; - std::cout << std::scientific; - typename std::list::const_iterator it=v.begin(); - for(;it!=v.end(); ++it) std::cout << (*it) << " "; - std::cout << std::endl; - } +namespace hiop +{ +template +inline void printvec(const std::vector& v, const std::string& msg = "") +{ + std::cout.precision(6); + std::cout << msg << " size:" << v.size() << std::endl; + std::cout << std::scientific; + typename std::vector::const_iterator it = v.begin(); + for(; it != v.end(); ++it) std::cout << (*it) << " "; + std::cout << std::endl; +} +template +inline void printlist(const std::list& v, const std::string& msg = "") +{ + std::cout.precision(6); + std::cout << msg << " size:" << v.size() << std::endl; + std::cout << std::scientific; + typename std::list::const_iterator it = v.begin(); + for(; it != v.end(); ++it) std::cout << (*it) << " "; + std::cout << std::endl; +} - template inline void printvecvec(const std::vector >& v, const std::string& msg="") - { - std::cout.precision(6); - std::cout << msg << " size:" << v.size() << std::endl; - std::cout << std::scientific; - for(auto& l: v) { - for(auto& c: l) std::cout << c << " "; - std::cout << std::endl; - } - } - template inline void hardclear(std::vector& in) - { - std::vector().swap(in); +template +inline void printvecvec(const std::vector >& v, const std::string& msg = "") +{ + std::cout.precision(6); + std::cout << msg << " size:" << v.size() << std::endl; + std::cout << std::scientific; + for(auto& l: v) { + for(auto& c: l) std::cout << c << " "; + std::cout << std::endl; } +} +template +inline void hardclear(std::vector& in) +{ + std::vector().swap(in); +} - static inline std::string tolower(const std::string& str_in) - { - auto str_out = str_in; - std::transform(str_out.begin(), str_out.end(), str_out.begin(), ::tolower); - return str_out; - } +static inline std::string tolower(const std::string& str_in) +{ + auto str_out = str_in; + std::transform(str_out.begin(), str_out.end(), str_out.begin(), ::tolower); + return str_out; +} - static inline void tolower(std::string& str_in) - { - std::transform(str_in.begin(), str_in.end(), str_in.begin(), ::tolower); - } +static inline void tolower(std::string& str_in) { std::transform(str_in.begin(), str_in.end(), str_in.begin(), ::tolower); } - static inline std::string toupper(const std::string& str_in) - { - auto str_out = str_in; - std::transform(str_out.begin(), str_out.end(), str_out.begin(), ::toupper); - return str_out; - } - static inline void toupper(std::string& str_in) - { - std::transform(str_in.begin(), str_in.end(), str_in.begin(), ::toupper); - } +static inline std::string toupper(const std::string& str_in) +{ + auto str_out = str_in; + std::transform(str_out.begin(), str_out.end(), str_out.begin(), ::toupper); + return str_out; +} +static inline void toupper(std::string& str_in) { std::transform(str_in.begin(), str_in.end(), str_in.begin(), ::toupper); } - // Function to reorder elements of arr[] according to index[] - template inline void reorder(T *arr, std::vector index, int n) - { - T temp[n]; +// Function to reorder elements of arr[] according to index[] +template +inline void reorder(T* arr, std::vector index, int n) +{ + T temp[n]; - // arr[i] should be present at index[i] index - for (int i=0; i& idx_nonaux_buses, + const std::vector& idx_aux_buses, + const hiopMatrixComplexSparseTriplet& Ybus, + hiopMatrixComplexDense& Ybus_red) +{ + // printvec(idx_aux_buses, "aux="); + // printvec(idx_nonaux_buses, "nonaux="); + + // Ybus.print(); + // int nnz = Ybus.numberOfNonzeros(); + // printf("Ybus has %d nnz\n", nnz); + + // Yaa = Matrix(Ybus[nonaux, nonaux]) + auto* Yaa = + Ybus.new_slice(idx_nonaux_buses.data(), idx_nonaux_buses.size(), idx_nonaux_buses.data(), idx_nonaux_buses.size()); + + auto* Ybb = Ybus.new_slice(idx_aux_buses.data(), idx_aux_buses.size(), idx_aux_buses.data(), idx_aux_buses.size()); + + auto* Yba = Ybus.new_slice(idx_aux_buses.data(), idx_aux_buses.size(), idx_nonaux_buses.data(), idx_nonaux_buses.size()); + + if(NULL != linsolver_) { + assert(false); delete linsolver_; - delete map_nonaux_to_aux_; - } - - bool hiopKronReduction::go(const std::vector& idx_nonaux_buses, - const std::vector& idx_aux_buses, - const hiopMatrixComplexSparseTriplet& Ybus, - hiopMatrixComplexDense& Ybus_red) - { - //printvec(idx_aux_buses, "aux="); - //printvec(idx_nonaux_buses, "nonaux="); - - //Ybus.print(); - //int nnz = Ybus.numberOfNonzeros(); - //printf("Ybus has %d nnz\n", nnz); - - //Yaa = Matrix(Ybus[nonaux, nonaux]) - auto* Yaa = Ybus.new_slice(idx_nonaux_buses.data(), - idx_nonaux_buses.size(), - idx_nonaux_buses.data(), - idx_nonaux_buses.size()); - - auto* Ybb = Ybus.new_slice(idx_aux_buses.data(), - idx_aux_buses.size(), - idx_aux_buses.data(), - idx_aux_buses.size()); - - auto* Yba = Ybus.new_slice(idx_aux_buses.data(), - idx_aux_buses.size(), - idx_nonaux_buses.data(), - idx_nonaux_buses.size()); - - if(NULL != linsolver_) { - assert(false); - delete linsolver_; - } - - linsolver_ = new hiopLinSolverUMFPACKZ(*Ybb); - - int nret = linsolver_->matrixChanged(); - if(nret>=0) { - - // - //Yaa - Yab*(Ybb\Yba) - // - - //Ybb\Yba - //hiopMatrixComplexDense Ybbinv_Yba(Yba_->m(), Yba_->n()); - assert(map_nonaux_to_aux_==NULL); - delete map_nonaux_to_aux_; - map_nonaux_to_aux_ = new hiopMatrixComplexDense(Yba->m(), Yba->n()); - linsolver_->solve(*Yba, *map_nonaux_to_aux_); - - map_nonaux_to_aux_->negate(); - //Ybbinv_Yba.print(); - delete Ybb; - delete linsolver_; - linsolver_ = NULL; - - //Ybus_red = - Yab*(Ybb\Yba) - Yba->transTimesMat(0.0, Ybus_red, 1.0, *map_nonaux_to_aux_); - delete Yba; - - Ybus_red.addSparseMatrix(std::complex(1.0, 0.0), *Yaa); - delete Yaa; - - } else { - printf("Error occured while performing the Kron reduction (factorization issue)\n"); - delete linsolver_; - linsolver_ = NULL; - delete Yaa; - delete Ybb; - delete Yba; - return false; - } - return true; } + linsolver_ = new hiopLinSolverUMFPACKZ(*Ybb); + + int nret = linsolver_->matrixChanged(); + if(nret >= 0) { + // + // Yaa - Yab*(Ybb\Yba) + // + + // Ybb\Yba + // hiopMatrixComplexDense Ybbinv_Yba(Yba_->m(), Yba_->n()); + assert(map_nonaux_to_aux_ == NULL); + delete map_nonaux_to_aux_; + map_nonaux_to_aux_ = new hiopMatrixComplexDense(Yba->m(), Yba->n()); + linsolver_->solve(*Yba, *map_nonaux_to_aux_); + + map_nonaux_to_aux_->negate(); + // Ybbinv_Yba.print(); + delete Ybb; + delete linsolver_; + linsolver_ = NULL; + + // Ybus_red = - Yab*(Ybb\Yba) + Yba->transTimesMat(0.0, Ybus_red, 1.0, *map_nonaux_to_aux_); + delete Yba; + + Ybus_red.addSparseMatrix(std::complex(1.0, 0.0), *Yaa); + delete Yaa; - /** - * Performs v_aux_out = (Ybb\Yba)* v_nonaux_in - */ - bool hiopKronReduction::apply_nonaux_to_aux(const std::vector >& v_nonaux_in, - std::vector >& v_aux_out) - { - - assert(map_nonaux_to_aux_); - if(NULL==map_nonaux_to_aux_) return false; - - assert((size_type) v_nonaux_in.size() == map_nonaux_to_aux_->n()); - assert((size_type) v_aux_out.size() == map_nonaux_to_aux_->m()); - - map_nonaux_to_aux_->timesVec(std::complex(0.,0.), - v_aux_out.data(), - std::complex(1.,0.), - v_nonaux_in.data()); - - - // assert(linsolver_); - // std::complex Yba_x_vnonaux[Yba_->n()]; - - // for(int i=0; in(); i++) { - // Yba_x_vnonaux[i]=0.; - // } - - // Yba_->timesVec(0., Yba_x_vnonaux, 1., v_nonaux_in.data()); - - // assert(Yba_->m() == v_aux_out.size()); - // linsolver_->solve(Yba_x_vnonaux, v_aux_out.data()); - - return true; + } else { + printf("Error occured while performing the Kron reduction (factorization issue)\n"); + delete linsolver_; + linsolver_ = NULL; + delete Yaa; + delete Ybb; + delete Yba; + return false; } + return true; +} + +/** + * Performs v_aux_out = (Ybb\Yba)* v_nonaux_in + */ +bool hiopKronReduction::apply_nonaux_to_aux(const std::vector >& v_nonaux_in, + std::vector >& v_aux_out) +{ + assert(map_nonaux_to_aux_); + if(NULL == map_nonaux_to_aux_) return false; + + assert((size_type)v_nonaux_in.size() == map_nonaux_to_aux_->n()); + assert((size_type)v_aux_out.size() == map_nonaux_to_aux_->m()); + + map_nonaux_to_aux_->timesVec(std::complex(0., 0.), + v_aux_out.data(), + std::complex(1., 0.), + v_nonaux_in.data()); + + // assert(linsolver_); + // std::complex Yba_x_vnonaux[Yba_->n()]; + + // for(int i=0; in(); i++) { + // Yba_x_vnonaux[i]=0.; + // } + + // Yba_->timesVec(0., Yba_x_vnonaux, 1., v_nonaux_in.data()); + + // assert(Yba_->m() == v_aux_out.size()); + // linsolver_->solve(Yba_x_vnonaux, v_aux_out.data()); + + return true; +} -}//end namespace +} // namespace hiop diff --git a/src/Utils/hiopKronReduction.hpp b/src/Utils/hiopKronReduction.hpp index 8ddbc8b8c..b7520dd5d 100644 --- a/src/Utils/hiopKronReduction.hpp +++ b/src/Utils/hiopKronReduction.hpp @@ -3,47 +3,47 @@ // Written by Cosmin G. Petra, petra1@llnl.gov. // LLNL-CODE-742473. All rights reserved. // -// This file is part of HiOp. For details, see https://github.com/LLNL/hiop. HiOp -// is released under the BSD 3-clause license (https://opensource.org/licenses/BSD-3-Clause). +// This file is part of HiOp. For details, see https://github.com/LLNL/hiop. HiOp +// is released under the BSD 3-clause license (https://opensource.org/licenses/BSD-3-Clause). // Please also read “Additional BSD Notice” below. // -// Redistribution and use in source and binary forms, with or without modification, +// Redistribution and use in source and binary forms, with or without modification, // are permitted provided that the following conditions are met: -// i. Redistributions of source code must retain the above copyright notice, this list +// i. Redistributions of source code must retain the above copyright notice, this list // of conditions and the disclaimer below. -// ii. Redistributions in binary form must reproduce the above copyright notice, -// this list of conditions and the disclaimer (as noted below) in the documentation and/or +// ii. Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the disclaimer (as noted below) in the documentation and/or // other materials provided with the distribution. -// iii. Neither the name of the LLNS/LLNL nor the names of its contributors may be used to -// endorse or promote products derived from this software without specific prior written +// iii. Neither the name of the LLNS/LLNL nor the names of its contributors may be used to +// endorse or promote products derived from this software without specific prior written // permission. // -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES -// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT -// SHALL LAWRENCE LIVERMORE NATIONAL SECURITY, LLC, THE U.S. DEPARTMENT OF ENERGY OR -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS -// OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED -// AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT +// SHALL LAWRENCE LIVERMORE NATIONAL SECURITY, LLC, THE U.S. DEPARTMENT OF ENERGY OR +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +// OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED +// AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, // EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Additional BSD Notice -// 1. This notice is required to be provided under our contract with the U.S. Department -// of Energy (DOE). This work was produced at Lawrence Livermore National Laboratory under +// 1. This notice is required to be provided under our contract with the U.S. Department +// of Energy (DOE). This work was produced at Lawrence Livermore National Laboratory under // Contract No. DE-AC52-07NA27344 with the DOE. -// 2. Neither the United States Government nor Lawrence Livermore National Security, LLC -// nor any of their employees, makes any warranty, express or implied, or assumes any -// liability or responsibility for the accuracy, completeness, or usefulness of any +// 2. Neither the United States Government nor Lawrence Livermore National Security, LLC +// nor any of their employees, makes any warranty, express or implied, or assumes any +// liability or responsibility for the accuracy, completeness, or usefulness of any // information, apparatus, product, or process disclosed, or represents that its use would // not infringe privately-owned rights. -// 3. Also, reference herein to any specific commercial products, process, or services by -// trade name, trademark, manufacturer or otherwise does not necessarily constitute or -// imply its endorsement, recommendation, or favoring by the United States Government or -// Lawrence Livermore National Security, LLC. The views and opinions of authors expressed -// herein do not necessarily state or reflect those of the United States Government or -// Lawrence Livermore National Security, LLC, and shall not be used for advertising or +// 3. Also, reference herein to any specific commercial products, process, or services by +// trade name, trademark, manufacturer or otherwise does not necessarily constitute or +// imply its endorsement, recommendation, or favoring by the United States Government or +// Lawrence Livermore National Security, LLC. The views and opinions of authors expressed +// herein do not necessarily state or reflect those of the United States Government or +// Lawrence Livermore National Security, LLC, and shall not be used for advertising or // product endorsement purposes. #ifndef HIOP_KRONRED @@ -59,48 +59,47 @@ namespace hiop { - //forward defs - class hiopLinSolverUMFPACKZ; - class hiopMatrixComplexDense; - - /* Utility to perform the Kron reduction of the Ybus matrix (sparse symmetric complex) - * into the reduced Ybus (dense symmetric complex) matrix +// forward defs +class hiopLinSolverUMFPACKZ; +class hiopMatrixComplexDense; + +/* Utility to perform the Kron reduction of the Ybus matrix (sparse symmetric complex) + * into the reduced Ybus (dense symmetric complex) matrix + */ +class hiopKronReduction +{ +public: + hiopKronReduction(); + virtual ~hiopKronReduction(); + + /* Performs the Kron reduction (computes Schur complement) + * In parameters + * - idx_nonaux_buses, idx_aux_buses: indexes of the auxiliary and non-auxiliary + * buses (in the rows/cols of Ybus) + * - Ybus + * Out parameters + * - Ybus_red: reduced Ybus of size (nonaux,nonaux) = Yaa - Yab'*(Ybb\Yba) + * + * The function factorizes Ybb and stores the factorization for later use, for example for use + * in @axpy_nonaux_to_aux + */ + bool go(const std::vector& idx_nonaux_buses, + const std::vector& idx_aux_buses, + const hiopMatrixComplexSparseTriplet& Ybus, + hiopMatrixComplexDense& Ybus_red); + + /** + * Performs v_aux_out = (Ybb\Yba)* v_nonaux_in */ - class hiopKronReduction - { - public: - hiopKronReduction(); - virtual ~hiopKronReduction(); - - /* Performs the Kron reduction (computes Schur complement) - * In parameters - * - idx_nonaux_buses, idx_aux_buses: indexes of the auxiliary and non-auxiliary - * buses (in the rows/cols of Ybus) - * - Ybus - * Out parameters - * - Ybus_red: reduced Ybus of size (nonaux,nonaux) = Yaa - Yab'*(Ybb\Yba) - * - * The function factorizes Ybb and stores the factorization for later use, for example for use - * in @axpy_nonaux_to_aux - */ - bool go(const std::vector& idx_nonaux_buses, const std::vector& idx_aux_buses, - const hiopMatrixComplexSparseTriplet& Ybus, - hiopMatrixComplexDense& Ybus_red); + bool apply_nonaux_to_aux(const std::vector >& v_nonaux_in, + std::vector >& v_aux_out); - /** - * Performs v_aux_out = (Ybb\Yba)* v_nonaux_in - */ - bool apply_nonaux_to_aux(const std::vector >& v_nonaux_in, - std::vector >& v_aux_out); + const hiopMatrixComplexDense& map_nonaux_to_aux() const { return *map_nonaux_to_aux_; } - const hiopMatrixComplexDense& map_nonaux_to_aux() const - { - return *map_nonaux_to_aux_; - } - private: - hiopLinSolverUMFPACKZ* linsolver_; - hiopMatrixComplexDense* map_nonaux_to_aux_; - }; +private: + hiopLinSolverUMFPACKZ* linsolver_; + hiopMatrixComplexDense* map_nonaux_to_aux_; +}; -} //end namespace +} // namespace hiop #endif diff --git a/src/Utils/hiopLogger.cpp b/src/Utils/hiopLogger.cpp index 705c19e01..795eeb741 100644 --- a/src/Utils/hiopLogger.cpp +++ b/src/Utils/hiopLogger.cpp @@ -3,47 +3,47 @@ // Written by Cosmin G. Petra, petra1@llnl.gov. // LLNL-CODE-742473. All rights reserved. // -// This file is part of HiOp. For details, see https://github.com/LLNL/hiop. HiOp -// is released under the BSD 3-clause license (https://opensource.org/licenses/BSD-3-Clause). +// This file is part of HiOp. For details, see https://github.com/LLNL/hiop. HiOp +// is released under the BSD 3-clause license (https://opensource.org/licenses/BSD-3-Clause). // Please also read "Additional BSD Notice" below. // -// Redistribution and use in source and binary forms, with or without modification, +// Redistribution and use in source and binary forms, with or without modification, // are permitted provided that the following conditions are met: -// i. Redistributions of source code must retain the above copyright notice, this list +// i. Redistributions of source code must retain the above copyright notice, this list // of conditions and the disclaimer below. -// ii. Redistributions in binary form must reproduce the above copyright notice, -// this list of conditions and the disclaimer (as noted below) in the documentation and/or +// ii. Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the disclaimer (as noted below) in the documentation and/or // other materials provided with the distribution. -// iii. Neither the name of the LLNS/LLNL nor the names of its contributors may be used to -// endorse or promote products derived from this software without specific prior written +// iii. Neither the name of the LLNS/LLNL nor the names of its contributors may be used to +// endorse or promote products derived from this software without specific prior written // permission. // -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES -// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT -// SHALL LAWRENCE LIVERMORE NATIONAL SECURITY, LLC, THE U.S. DEPARTMENT OF ENERGY OR -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS -// OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED -// AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT +// SHALL LAWRENCE LIVERMORE NATIONAL SECURITY, LLC, THE U.S. DEPARTMENT OF ENERGY OR +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +// OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED +// AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, // EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Additional BSD Notice -// 1. This notice is required to be provided under our contract with the U.S. Department -// of Energy (DOE). This work was produced at Lawrence Livermore National Laboratory under +// 1. This notice is required to be provided under our contract with the U.S. Department +// of Energy (DOE). This work was produced at Lawrence Livermore National Laboratory under // Contract No. DE-AC52-07NA27344 with the DOE. -// 2. Neither the United States Government nor Lawrence Livermore National Security, LLC -// nor any of their employees, makes any warranty, express or implied, or assumes any -// liability or responsibility for the accuracy, completeness, or usefulness of any +// 2. Neither the United States Government nor Lawrence Livermore National Security, LLC +// nor any of their employees, makes any warranty, express or implied, or assumes any +// liability or responsibility for the accuracy, completeness, or usefulness of any // information, apparatus, product, or process disclosed, or represents that its use would // not infringe privately-owned rights. -// 3. Also, reference herein to any specific commercial products, process, or services by -// trade name, trademark, manufacturer or otherwise does not necessarily constitute or -// imply its endorsement, recommendation, or favoring by the United States Government or -// Lawrence Livermore National Security, LLC. The views and opinions of authors expressed -// herein do not necessarily state or reflect those of the United States Government or -// Lawrence Livermore National Security, LLC, and shall not be used for advertising or +// 3. Also, reference herein to any specific commercial products, process, or services by +// trade name, trademark, manufacturer or otherwise does not necessarily constitute or +// imply its endorsement, recommendation, or favoring by the United States Government or +// Lawrence Livermore National Security, LLC. The views and opinions of authors expressed +// herein do not necessarily state or reflect those of the United States Government or +// Lawrence Livermore National Security, LLC, and shall not be used for advertising or // product endorsement purposes. #include "hiopLogger.hpp" @@ -57,116 +57,114 @@ namespace hiop { - -void hiopLogger::write(const char* msg, const hiopVector& vec, hiopOutVerbosity v, int loggerid/*=0*/) +void hiopLogger::write(const char* msg, const hiopVector& vec, hiopOutVerbosity v, int loggerid /*=0*/) { - hiopOutVerbosity _verb = (hiopOutVerbosity) options_->GetInteger("verbosity_level"); - if(v>_verb) return; + hiopOutVerbosity _verb = (hiopOutVerbosity)options_->GetInteger("verbosity_level"); + if(v > _verb) return; vec.print(f_, msg); } -void hiopLogger::write(const char* msg, const hiopMatrix& M, hiopOutVerbosity v, int loggerid/*=0*/) +void hiopLogger::write(const char* msg, const hiopMatrix& M, hiopOutVerbosity v, int loggerid /*=0*/) { if(master_rank_ != my_rank_) return; - hiopOutVerbosity _verb = (hiopOutVerbosity) options_->GetInteger("verbosity_level"); - if(v>_verb) return; + hiopOutVerbosity _verb = (hiopOutVerbosity)options_->GetInteger("verbosity_level"); + if(v > _verb) return; M.print(f_, msg); } -void hiopLogger::write(const char* msg, const hiopResidual& r, hiopOutVerbosity v, int loggerid/*=0*/) +void hiopLogger::write(const char* msg, const hiopResidual& r, hiopOutVerbosity v, int loggerid /*=0*/) { if(master_rank_ != my_rank_) return; - hiopOutVerbosity _verb = (hiopOutVerbosity) options_->GetInteger("verbosity_level"); - if(v>_verb) return; - r.print(f_,msg); + hiopOutVerbosity _verb = (hiopOutVerbosity)options_->GetInteger("verbosity_level"); + if(v > _verb) return; + r.print(f_, msg); } -void hiopLogger::write(const char* msg, hiopOutVerbosity v, int loggerid/*=0*/) -{ +void hiopLogger::write(const char* msg, hiopOutVerbosity v, int loggerid /*=0*/) +{ if(master_rank_ != my_rank_) return; - hiopOutVerbosity _verb = (hiopOutVerbosity) options_->GetInteger("verbosity_level"); - if(v>_verb) return; - fprintf(f_, "%s\n", msg); + hiopOutVerbosity _verb = (hiopOutVerbosity)options_->GetInteger("verbosity_level"); + if(v > _verb) return; + fprintf(f_, "%s\n", msg); } -void hiopLogger::write(const char* msg, const hiopIterate& it, hiopOutVerbosity v, int loggerid/*=0*/) +void hiopLogger::write(const char* msg, const hiopIterate& it, hiopOutVerbosity v, int loggerid /*=0*/) { if(master_rank_ != my_rank_) return; - hiopOutVerbosity _verb = (hiopOutVerbosity) options_->GetInteger("verbosity_level"); - if(v>_verb) return; + hiopOutVerbosity _verb = (hiopOutVerbosity)options_->GetInteger("verbosity_level"); + if(v > _verb) return; it.print(f_, msg); } #ifdef HIOP_DEEPCHECKS -void hiopLogger::write(const char* msg, - const HessianDiagPlusRowRank& Hess, - hiopOutVerbosity v, - int loggerid/*=0*/) +void hiopLogger::write(const char* msg, const HessianDiagPlusRowRank& Hess, hiopOutVerbosity v, int loggerid /*=0*/) { if(master_rank_ != my_rank_) return; - hiopOutVerbosity _verb = (hiopOutVerbosity) options_->GetInteger("verbosity_level"); - if(v>_verb) return; + hiopOutVerbosity _verb = (hiopOutVerbosity)options_->GetInteger("verbosity_level"); + if(v > _verb) return; Hess.print(f_, v, msg); } #endif -void hiopLogger::write(const char* msg, const hiopOptions& options, hiopOutVerbosity v, int loggerid/*=0*/) +void hiopLogger::write(const char* msg, const hiopOptions& options, hiopOutVerbosity v, int loggerid /*=0*/) { if(master_rank_ != my_rank_) return; - hiopOutVerbosity _verb = (hiopOutVerbosity) options_->GetInteger("verbosity_level"); - if(v>_verb) return; + hiopOutVerbosity _verb = (hiopOutVerbosity)options_->GetInteger("verbosity_level"); + if(v > _verb) return; options.print(f_, msg); } -void hiopLogger::write(const char* msg, const hiopNlpFormulation& nlp, hiopOutVerbosity v, int loggerid) +void hiopLogger::write(const char* msg, const hiopNlpFormulation& nlp, hiopOutVerbosity v, int loggerid) { if(master_rank_ != my_rank_) return; - hiopOutVerbosity _verb = (hiopOutVerbosity) options_->GetInteger("verbosity_level"); - if(v>_verb) return; + hiopOutVerbosity _verb = (hiopOutVerbosity)options_->GetInteger("verbosity_level"); + if(v > _verb) return; nlp.print(f_, msg); } -void hiopLogger::write(const char* msg, const hiopFilter& filt, hiopOutVerbosity v, int loggerid/*=0*/) +void hiopLogger::write(const char* msg, const hiopFilter& filt, hiopOutVerbosity v, int loggerid /*=0*/) { if(master_rank_ != my_rank_) return; - hiopOutVerbosity _verb = (hiopOutVerbosity) options_->GetInteger("verbosity_level"); - if(v>_verb) return; + hiopOutVerbosity _verb = (hiopOutVerbosity)options_->GetInteger("verbosity_level"); + if(v > _verb) return; filt.print(f_, msg); } - //only for loggerid=0 for now +// only for loggerid=0 for now void hiopLogger::printf(hiopOutVerbosity v, const char* format, ...) { if(master_rank_ != my_rank_) return; - hiopOutVerbosity _verb = (hiopOutVerbosity) options_->GetInteger("verbosity_level"); - if(v>_verb) return; - - char label[16];label[0]='\0'; - if(v==hovError) strcpy(label, "[Error] "); - else if(v==hovWarning) strcpy(label, "[Warning] "); + hiopOutVerbosity _verb = (hiopOutVerbosity)options_->GetInteger("verbosity_level"); + if(v > _verb) return; + + char label[16]; + label[0] = '\0'; + if(v == hovError) + strcpy(label, "[Error] "); + else if(v == hovWarning) + strcpy(label, "[Warning] "); fprintf(f_, "%s", label); va_list args; va_start(args, format); - vsnprintf(buff_,4096,format, args); - fprintf(f_,"%s",buff_); + vsnprintf(buff_, 4096, format, args); + fprintf(f_, "%s", buff_); va_end(args); - }; void hiopLogger::printf_error(hiopOutVerbosity v, const char* format, ...) { char buff[4096]; va_list args; - va_start (args, format); + va_start(args, format); vsnprintf(buff, 4096, format, args); - if(v<=hovError) { + if(v <= hovError) { fprintf(stderr, "ERROR: %s", buff); - } else if(v<=hovWarning) { + } else if(v <= hovWarning) { fprintf(stderr, "WARNING: %s", buff); } else { fprintf(stderr, "%s", buff); } - va_end (args); + va_end(args); }; -}; +}; // namespace hiop diff --git a/src/Utils/hiopLogger.hpp b/src/Utils/hiopLogger.hpp index 8bb68e524..71e49743c 100644 --- a/src/Utils/hiopLogger.hpp +++ b/src/Utils/hiopLogger.hpp @@ -3,47 +3,47 @@ // Written by Cosmin G. Petra, petra1@llnl.gov. // LLNL-CODE-742473. All rights reserved. // -// This file is part of HiOp. For details, see https://github.com/LLNL/hiop. HiOp -// is released under the BSD 3-clause license (https://opensource.org/licenses/BSD-3-Clause). +// This file is part of HiOp. For details, see https://github.com/LLNL/hiop. HiOp +// is released under the BSD 3-clause license (https://opensource.org/licenses/BSD-3-Clause). // Please also read "Additional BSD Notice" below. // -// Redistribution and use in source and binary forms, with or without modification, +// Redistribution and use in source and binary forms, with or without modification, // are permitted provided that the following conditions are met: -// i. Redistributions of source code must retain the above copyright notice, this list +// i. Redistributions of source code must retain the above copyright notice, this list // of conditions and the disclaimer below. -// ii. Redistributions in binary form must reproduce the above copyright notice, -// this list of conditions and the disclaimer (as noted below) in the documentation and/or +// ii. Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the disclaimer (as noted below) in the documentation and/or // other materials provided with the distribution. -// iii. Neither the name of the LLNS/LLNL nor the names of its contributors may be used to -// endorse or promote products derived from this software without specific prior written +// iii. Neither the name of the LLNS/LLNL nor the names of its contributors may be used to +// endorse or promote products derived from this software without specific prior written // permission. // -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES -// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT -// SHALL LAWRENCE LIVERMORE NATIONAL SECURITY, LLC, THE U.S. DEPARTMENT OF ENERGY OR -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS -// OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED -// AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT +// SHALL LAWRENCE LIVERMORE NATIONAL SECURITY, LLC, THE U.S. DEPARTMENT OF ENERGY OR +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +// OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED +// AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, // EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Additional BSD Notice -// 1. This notice is required to be provided under our contract with the U.S. Department -// of Energy (DOE). This work was produced at Lawrence Livermore National Laboratory under +// 1. This notice is required to be provided under our contract with the U.S. Department +// of Energy (DOE). This work was produced at Lawrence Livermore National Laboratory under // Contract No. DE-AC52-07NA27344 with the DOE. -// 2. Neither the United States Government nor Lawrence Livermore National Security, LLC -// nor any of their employees, makes any warranty, express or implied, or assumes any -// liability or responsibility for the accuracy, completeness, or usefulness of any +// 2. Neither the United States Government nor Lawrence Livermore National Security, LLC +// nor any of their employees, makes any warranty, express or implied, or assumes any +// liability or responsibility for the accuracy, completeness, or usefulness of any // information, apparatus, product, or process disclosed, or represents that its use would // not infringe privately-owned rights. -// 3. Also, reference herein to any specific commercial products, process, or services by -// trade name, trademark, manufacturer or otherwise does not necessarily constitute or -// imply its endorsement, recommendation, or favoring by the United States Government or -// Lawrence Livermore National Security, LLC. The views and opinions of authors expressed -// herein do not necessarily state or reflect those of the United States Government or -// Lawrence Livermore National Security, LLC, and shall not be used for advertising or +// 3. Also, reference herein to any specific commercial products, process, or services by +// trade name, trademark, manufacturer or otherwise does not necessarily constitute or +// imply its endorsement, recommendation, or favoring by the United States Government or +// Lawrence Livermore National Security, LLC. The views and opinions of authors expressed +// herein do not necessarily state or reflect those of the United States Government or +// Lawrence Livermore National Security, LLC, and shall not be used for advertising or // product endorsement purposes. #ifndef HIOP_LOGGER @@ -68,28 +68,31 @@ class hiopOptions; class hiopFilter; /* Verbosity 0 to 9 */ -enum hiopOutVerbosity { - hovError=-1, - hovVerySilent=0, - hovWarning=1, - hovNoOutput=2, - hovSummary=3, //summary of the problem and each iteration - hovScalars=4, //additional, usually scalars, such as norm of resids, nlp and log bar errors, etc - hovFcnEval=5, //the above plus info about the number of function, gradient and Hessians - hovLinesearch=6, //linesearch info - hovLinAlgScalars=7, //print out various scalars: e.g., linear systems residuals - hovLinesearchVerb=8, //linesearch with more output - hovLinAlgScalarsVerb=9, //additional scalars, e.g., BFGS updating info - hovIteration=10, //print out iteration - hovMatrices=11, - hovMaxVerbose=12 +enum hiopOutVerbosity +{ + hovError = -1, + hovVerySilent = 0, + hovWarning = 1, + hovNoOutput = 2, + hovSummary = 3, // summary of the problem and each iteration + hovScalars = 4, // additional, usually scalars, such as norm of resids, nlp and log bar errors, etc + hovFcnEval = 5, // the above plus info about the number of function, gradient and Hessians + hovLinesearch = 6, // linesearch info + hovLinAlgScalars = 7, // print out various scalars: e.g., linear systems residuals + hovLinesearchVerb = 8, // linesearch with more output + hovLinAlgScalarsVerb = 9, // additional scalars, e.g., BFGS updating info + hovIteration = 10, // print out iteration + hovMatrices = 11, + hovMaxVerbose = 12 }; class hiopLogger { public: - hiopLogger(hiopOptions* options, FILE* f, int masterrank=0, MPI_Comm comm_wrld=MPI_COMM_WORLD) - : options_(options), f_(f), master_rank_(masterrank) + hiopLogger(hiopOptions* options, FILE* f, int masterrank = 0, MPI_Comm comm_wrld = MPI_COMM_WORLD) + : options_(options), + f_(f), + master_rank_(masterrank) { #ifdef HIOP_USE_MPI int ierr = MPI_Comm_rank(comm_wrld, &my_rank_); @@ -100,33 +103,34 @@ class hiopLogger }; virtual ~hiopLogger() {}; /* outputs a vector. loggerid indicates which logger should be used, by default stdout*/ - void write(const char* msg, const hiopVector& vec, hiopOutVerbosity v, int loggerid=0); - void write(const char* msg, const hiopResidual& r, hiopOutVerbosity v, int loggerid=0); - void write(const char* msg, const hiopIterate& r, hiopOutVerbosity v, int loggerid=0); - void write(const char* msg, const hiopMatrix& M, hiopOutVerbosity v, int loggerid=0); + void write(const char* msg, const hiopVector& vec, hiopOutVerbosity v, int loggerid = 0); + void write(const char* msg, const hiopResidual& r, hiopOutVerbosity v, int loggerid = 0); + void write(const char* msg, const hiopIterate& r, hiopOutVerbosity v, int loggerid = 0); + void write(const char* msg, const hiopMatrix& M, hiopOutVerbosity v, int loggerid = 0); #ifdef HIOP_DEEPCHECKS - void write(const char* msg, const HessianDiagPlusRowRank& Hess, hiopOutVerbosity v, int loggerid=0); + void write(const char* msg, const HessianDiagPlusRowRank& Hess, hiopOutVerbosity v, int loggerid = 0); #endif - void write(const char* msg, const hiopNlpFormulation& nlp, hiopOutVerbosity v, int loggerid=0); - void write(const char* msg, const hiopOptions& options, hiopOutVerbosity v, int loggerid=0); - void write(const char* msg, const hiopFilter& filt, hiopOutVerbosity v, int loggerid=0); - void write(const char* msg, hiopOutVerbosity v, int loggerid=0); - //only for loggerid=0 for now - void printf(hiopOutVerbosity v, const char* format, ...); + void write(const char* msg, const hiopNlpFormulation& nlp, hiopOutVerbosity v, int loggerid = 0); + void write(const char* msg, const hiopOptions& options, hiopOutVerbosity v, int loggerid = 0); + void write(const char* msg, const hiopFilter& filt, hiopOutVerbosity v, int loggerid = 0); + void write(const char* msg, hiopOutVerbosity v, int loggerid = 0); + // only for loggerid=0 for now + void printf(hiopOutVerbosity v, const char* format, ...); /** * This static method is to be used before NLP created its internal instance of hiopLogger. To be used - * for displaying errors (on stderr) that occur during initialization of the NLP or PriDec solver class. + * for displaying errors (on stderr) that occur during initialization of the NLP or PriDec solver class. */ - static void printf_error(hiopOutVerbosity v, const char* format, ...); + static void printf_error(hiopOutVerbosity v, const char* format, ...); protected: hiopOptions* options_; FILE* f_; char buff_[4096]; + private: int master_rank_; int my_rank_; }; -} +} // namespace hiop #endif diff --git a/src/Utils/hiopMPI.hpp b/src/Utils/hiopMPI.hpp index 420724ebc..76bdf74e2 100644 --- a/src/Utils/hiopMPI.hpp +++ b/src/Utils/hiopMPI.hpp @@ -3,47 +3,47 @@ // Written by Cosmin G. Petra, petra1@llnl.gov. // LLNL-CODE-742473. All rights reserved. // -// This file is part of HiOp. For details, see https://github.com/LLNL/hiop. HiOp -// is released under the BSD 3-clause license (https://opensource.org/licenses/BSD-3-Clause). +// This file is part of HiOp. For details, see https://github.com/LLNL/hiop. HiOp +// is released under the BSD 3-clause license (https://opensource.org/licenses/BSD-3-Clause). // Please also read “Additional BSD Notice” below. // -// Redistribution and use in source and binary forms, with or without modification, +// Redistribution and use in source and binary forms, with or without modification, // are permitted provided that the following conditions are met: -// i. Redistributions of source code must retain the above copyright notice, this list +// i. Redistributions of source code must retain the above copyright notice, this list // of conditions and the disclaimer below. -// ii. Redistributions in binary form must reproduce the above copyright notice, -// this list of conditions and the disclaimer (as noted below) in the documentation and/or +// ii. Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the disclaimer (as noted below) in the documentation and/or // other materials provided with the distribution. -// iii. Neither the name of the LLNS/LLNL nor the names of its contributors may be used to -// endorse or promote products derived from this software without specific prior written +// iii. Neither the name of the LLNS/LLNL nor the names of its contributors may be used to +// endorse or promote products derived from this software without specific prior written // permission. // -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES -// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT -// SHALL LAWRENCE LIVERMORE NATIONAL SECURITY, LLC, THE U.S. DEPARTMENT OF ENERGY OR -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS -// OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED -// AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT +// SHALL LAWRENCE LIVERMORE NATIONAL SECURITY, LLC, THE U.S. DEPARTMENT OF ENERGY OR +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +// OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED +// AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, // EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Additional BSD Notice -// 1. This notice is required to be provided under our contract with the U.S. Department -// of Energy (DOE). This work was produced at Lawrence Livermore National Laboratory under +// 1. This notice is required to be provided under our contract with the U.S. Department +// of Energy (DOE). This work was produced at Lawrence Livermore National Laboratory under // Contract No. DE-AC52-07NA27344 with the DOE. -// 2. Neither the United States Government nor Lawrence Livermore National Security, LLC -// nor any of their employees, makes any warranty, express or implied, or assumes any -// liability or responsibility for the accuracy, completeness, or usefulness of any +// 2. Neither the United States Government nor Lawrence Livermore National Security, LLC +// nor any of their employees, makes any warranty, express or implied, or assumes any +// liability or responsibility for the accuracy, completeness, or usefulness of any // information, apparatus, product, or process disclosed, or represents that its use would // not infringe privately-owned rights. -// 3. Also, reference herein to any specific commercial products, process, or services by -// trade name, trademark, manufacturer or otherwise does not necessarily constitute or -// imply its endorsement, recommendation, or favoring by the United States Government or -// Lawrence Livermore National Security, LLC. The views and opinions of authors expressed -// herein do not necessarily state or reflect those of the United States Government or -// Lawrence Livermore National Security, LLC, and shall not be used for advertising or +// 3. Also, reference herein to any specific commercial products, process, or services by +// trade name, trademark, manufacturer or otherwise does not necessarily constitute or +// imply its endorsement, recommendation, or favoring by the United States Government or +// Lawrence Livermore National Security, LLC. The views and opinions of authors expressed +// herein do not necessarily state or reflect those of the United States Government or +// Lawrence Livermore National Security, LLC, and shall not be used for advertising or // product endorsement purposes. #pragma once @@ -51,16 +51,16 @@ #include "hiop_defs.hpp" #ifdef HIOP_USE_MPI - #include "mpi.h" -#else - #ifndef MPI_COMM - #define MPI_Comm int - #endif - #ifndef MPI_COMM_SELF - #define MPI_COMM_SELF 0 - #endif - #ifndef MPI_COMM_WORLD - #define MPI_COMM_WORLD 0 - #endif - #include -#endif +#include "mpi.h" +#else +#ifndef MPI_COMM +#define MPI_Comm int +#endif +#ifndef MPI_COMM_SELF +#define MPI_COMM_SELF 0 +#endif +#ifndef MPI_COMM_WORLD +#define MPI_COMM_WORLD 0 +#endif +#include +#endif diff --git a/src/Utils/hiopOptions.cpp b/src/Utils/hiopOptions.cpp index 7ce51f916..c8d5f1799 100644 --- a/src/Utils/hiopOptions.cpp +++ b/src/Utils/hiopOptions.cpp @@ -59,50 +59,49 @@ namespace hiop { using namespace std; - + const char* hiopOptions::default_filename = "hiop.options"; const char* hiopOptions::default_filename_pridec_solver = "hiop_pridec.options"; const char* hiopOptions::default_filename_pridec_masterNLP = "hiop_pridec_master.options"; const char* hiopOptions::default_filename_fr = "hiop_fr.options"; - + hiopOptions::hiopOptions() - : log_(nullptr) -{ -} + : log_(nullptr) +{} hiopOptions::~hiopOptions() { map::iterator it = mOptions_.begin(); - for(;it!=mOptions_.end(); it++) delete it->second; + for(; it != mOptions_.end(); it++) delete it->second; } double hiopOptions::GetNumeric(const char* name) const { map::const_iterator it = mOptions_.find(name); - assert(it!=mOptions_.end()); - assert(it->second!=NULL); + assert(it != mOptions_.end()); + assert(it->second != NULL); OptionNum* option = dynamic_cast(it->second); - assert(option!=NULL); + assert(option != NULL); return option->val; } int hiopOptions::GetInteger(const char* name) const { map::const_iterator it = mOptions_.find(name); - assert(it!=mOptions_.end()); - assert(it->second!=NULL); + assert(it != mOptions_.end()); + assert(it->second != NULL); OptionInt* option = dynamic_cast(it->second); - assert(option!=NULL); + assert(option != NULL); return option->val; } -string hiopOptions::GetString (const char* name) const +string hiopOptions::GetString(const char* name) const { map::const_iterator it = mOptions_.find(name); - assert(it!=mOptions_.end()); - assert(it->second!=NULL); + assert(it != mOptions_.end()); + assert(it->second != NULL); OptionStr* option = dynamic_cast(it->second); - assert(option!=NULL); + assert(option != NULL); return option->val; } @@ -112,7 +111,7 @@ void hiopOptions::register_num_option(const std::string& name, double upp, const char* description) { - mOptions_[name]=new OptionNum(defaultValue, low, upp, description); + mOptions_[name] = new OptionNum(defaultValue, low, upp, description); } void hiopOptions::register_str_option(const std::string& name, @@ -120,81 +119,70 @@ void hiopOptions::register_str_option(const std::string& name, const std::vector& range, const char* description) { - mOptions_[name]=new OptionStr(defaultValue, range, description); + mOptions_[name] = new OptionStr(defaultValue, range, description); } void hiopOptions::register_str_option(const std::string& name, const std::string& defaultValue, const char* description) { - vector empty_range; //empty range for a OptionStr means the option can take any values + vector empty_range; // empty range for a OptionStr means the option can take any values mOptions_[name] = new OptionStr(defaultValue, empty_range, description); } -void hiopOptions::register_int_option(const std::string& name, - int defaultValue, - int low, - int upp, - const char* description) +void hiopOptions::register_int_option(const std::string& name, int defaultValue, int low, int upp, const char* description) { - mOptions_[name]=new OptionInt(defaultValue, low, upp, description); + mOptions_[name] = new OptionInt(defaultValue, low, upp, description); } -static inline std::string <rim(std::string &s) +static inline std::string& ltrim(std::string& s) { - //s.erase(s.begin(), std::find_if(s.begin(), s.end(), - // std::not1(std::ptr_fun(std::isspace)))); - s.erase(s.begin(), - std::find_if(s.begin(), - s.end(), - [](int c) {return !std::isspace(c);} - ) - ); - return s; + // s.erase(s.begin(), std::find_if(s.begin(), s.end(), + // std::not1(std::ptr_fun(std::isspace)))); + s.erase(s.begin(), std::find_if(s.begin(), s.end(), [](int c) { return !std::isspace(c); })); + return s; } void hiopOptions::load_from_file(const char* filename) { - if(NULL==filename) { + if(NULL == filename) { log_printf(hovError, "Option file name not valid"); return; } - ifstream input( filename ); + ifstream input(filename); if(input.fail()) { if(strcmp(default_filename, filename)) { - log_printf(hovWarning, - "Failed to read option file '%s'. Hiop will use default options.\n", - filename); + log_printf(hovWarning, "Failed to read option file '%s'. Hiop will use default options.\n", filename); return; } } - string line; string name, value; - for( std::string line; getline( input, line ); ) { - + string line; + string name, value; + for(std::string line; getline(input, line);) { line = ltrim(line); - if(line.size()==0) continue; - if(line[0]=='#') continue; + if(line.size() == 0) continue; + if(line[0] == '#') continue; istringstream iss(line); if(!(iss >> name >> value)) { - log_printf(hovWarning, - "Hiop could not parse and ignored line '%s' from the option file\n", - line.c_str()); + log_printf(hovWarning, "Hiop could not parse and ignored line '%s' from the option file\n", line.c_str()); continue; } - //find the Option object in mOptions_ corresponding to 'optname' and set his value to 'optval' - OptionNum* on; OptionInt* oi; OptionStr* os; + // find the Option object in mOptions_ corresponding to 'optname' and set his value to 'optval' + OptionNum* on; + OptionInt* oi; + OptionStr* os; map::iterator it = mOptions_.find(name); - if(it!=mOptions_.end()) { + if(it != mOptions_.end()) { Option* option = it->second; on = dynamic_cast(option); - - if(on!=NULL) { + + if(on != NULL) { stringstream ss(value); double val; - if(ss>>val) { + if(ss >> val) { SetNumericValue(name.c_str(), val, true); } else { log_printf(hovWarning, @@ -206,14 +194,16 @@ void hiopOptions::load_from_file(const char* filename) } } else { os = dynamic_cast(option); - if(os!=NULL) { + if(os != NULL) { SetStringValue(name.c_str(), value.c_str(), true); } else { oi = dynamic_cast(option); - if(oi!=NULL) { - stringstream ss(value); int val; - if(ss>>val) { SetIntegerValue(name.c_str(), val, true); } - else { + if(oi != NULL) { + stringstream ss(value); + int val; + if(ss >> val) { + SetIntegerValue(name.c_str(), val, true); + } else { log_printf(hovWarning, "Hiop could not parse value '%s' as int for option '%s' specified in " "the option file and will use default value '%d'\n", @@ -227,8 +217,8 @@ void hiopOptions::load_from_file(const char* filename) } } } - - } else { // else from it!=mOptions_.end() + + } else { // else from it!=mOptions_.end() // option not recognized/found/registered log_printf(hovWarning, "Hiop does not understand option '%s' specified in the option file and will " @@ -236,13 +226,13 @@ void hiopOptions::load_from_file(const char* filename) name.c_str(), value.c_str()); } - } //end of the for over the lines + } // end of the for over the lines } bool hiopOptions::is_user_defined(const char* option_name) { map::iterator it = mOptions_.find(option_name); - if(it==mOptions_.end()) { + if(it == mOptions_.end()) { return false; } return (it->second->specifiedInFile || it->second->specifiedAtRuntime); @@ -251,13 +241,12 @@ bool hiopOptions::is_user_defined(const char* option_name) bool hiopOptions::set_val(const char* name, const double& value) { map::iterator it = mOptions_.find(name); - if(it!=mOptions_.end()) { + if(it != mOptions_.end()) { OptionNum* option = dynamic_cast(it->second); - if(NULL==option) { + if(NULL == option) { assert(false && "mismatch between name and type happened in internal 'set_val'"); } else { - - if(valuelb || value>option->ub) { + if(value < option->lb || value > option->ub) { assert(false && "incorrect use of internal 'set_val': value out of bounds\n"); } else { option->val = value; @@ -268,19 +257,19 @@ bool hiopOptions::set_val(const char* name, const double& value) } return true; } -bool hiopOptions::SetNumericValue (const char* name, const double& value, const bool& setFromFile/*=false*/) +bool hiopOptions::SetNumericValue(const char* name, const double& value, const bool& setFromFile /*=false*/) { map::iterator it = mOptions_.find(name); - if(it!=mOptions_.end()) { + if(it != mOptions_.end()) { OptionNum* option = dynamic_cast(it->second); - if(NULL==option) { + if(NULL == option) { log_printf(hovWarning, "Hiop does not know option '%s' as 'numeric'. Maybe it is an 'integer' or 'string' " "value? The option will be ignored.\n", name); } else { - if(true==option->specifiedInFile) { - if(false==setFromFile) { + if(true == option->specifiedInFile) { + if(false == setFromFile) { log_printf(hovWarning, "Hiop will ignore value '%g' set for option '%s' at runtime since this option is " "already specified in the option file.\n", @@ -289,17 +278,18 @@ bool hiopOptions::SetNumericValue (const char* name, const double& value, const return true; } } - + if(setFromFile) { - option->specifiedInFile=true; + option->specifiedInFile = true; } else { - option->specifiedAtRuntime=true; + option->specifiedAtRuntime = true; } - - if(valuelb || value>option->ub) { + + if(value < option->lb || value > option->ub) { log_printf(hovWarning, "Hiop: option '%s' must be in [%g,%g]. Default value %g will be used.\n", - name, option->lb, + name, + option->lb, option->ub, option->val); } else { @@ -307,26 +297,21 @@ bool hiopOptions::SetNumericValue (const char* name, const double& value, const } } } else { - log_printf(hovWarning, - "Hiop does not understand option '%s' and will ignore its value '%g'.\n", - name, - value); + log_printf(hovWarning, "Hiop does not understand option '%s' and will ignore its value '%g'.\n", name, value); } ensure_consistence(); return true; } - bool hiopOptions::set_val(const char* name, const int& value) { map::iterator it = mOptions_.find(name); - if(it!=mOptions_.end()) { + if(it != mOptions_.end()) { OptionInt* option = dynamic_cast(it->second); - if(NULL==option) { + if(NULL == option) { assert(false && "mismatch between name and type happened in internal 'set_val'"); } else { - - if(valuelb || value>option->ub) { + if(value < option->lb || value > option->ub) { assert(false && "incorrect use of internal 'set_val': value out of bounds\n"); } else { option->val = value; @@ -338,20 +323,19 @@ bool hiopOptions::set_val(const char* name, const int& value) return true; } - -bool hiopOptions::SetIntegerValue(const char* name, const int& value, const bool& setFromFile/*=false*/) +bool hiopOptions::SetIntegerValue(const char* name, const int& value, const bool& setFromFile /*=false*/) { map::iterator it = mOptions_.find(name); - if(it!=mOptions_.end()) { + if(it != mOptions_.end()) { OptionInt* option = dynamic_cast(it->second); - if(NULL==option) { + if(NULL == option) { log_printf(hovWarning, "Hiop does not know option '%s' as 'integer'. Maybe it is an 'numeric' " "or a 'string' option? The option will be ignored.\n", name); } else { - if(true==option->specifiedInFile) { - if(false==setFromFile) { + if(true == option->specifiedInFile) { + if(false == setFromFile) { log_printf(hovWarning, "Hiop will ignore value '%d' set for option '%s' at runtime since this " "option is already specified in the option file.\n", @@ -360,11 +344,11 @@ bool hiopOptions::SetIntegerValue(const char* name, const int& value, const bool return true; } } - + if(setFromFile) { - option->specifiedInFile=true; + option->specifiedInFile = true; } - if(valuelb || value>option->ub) { + if(value < option->lb || value > option->ub) { log_printf(hovWarning, "Hiop: option '%s' must be in [%d, %d]. Default value %d will be used.\n", name, @@ -376,10 +360,7 @@ bool hiopOptions::SetIntegerValue(const char* name, const int& value, const bool } } } else { - log_printf(hovWarning, - "Hiop does not understand option '%s' and will ignore its value '%d'.\n", - name, - value); + log_printf(hovWarning, "Hiop does not understand option '%s' and will ignore its value '%d'.\n", name, value); } ensure_consistence(); return true; @@ -388,17 +369,17 @@ bool hiopOptions::SetIntegerValue(const char* name, const int& value, const bool bool hiopOptions::set_val(const char* name, const char* value_in) { map::iterator it = mOptions_.find(name); - if(it!=mOptions_.end()) { + if(it != mOptions_.end()) { OptionStr* option = dynamic_cast(it->second); - if(NULL==option) { + if(NULL == option) { assert(false && "mismatch between name and type happened in internal 'set_val'"); } else { string value(value_in); transform(value.begin(), value.end(), value.begin(), ::tolower); - //see if it is in the range (of supported values) - bool inrange=false; - for(int it=0; it(option->range.size()) && !inrange; it++) { - inrange = (option->range[it]==value); + // see if it is in the range (of supported values) + bool inrange = false; + for(int it = 0; it < static_cast(option->range.size()) && !inrange; it++) { + inrange = (option->range[it] == value); } if(!inrange && !option->range.empty()) { @@ -413,19 +394,19 @@ bool hiopOptions::set_val(const char* name, const char* value_in) return true; } -bool hiopOptions::SetStringValue (const char* name, const char* value, const bool& setFromFile/*=false*/) +bool hiopOptions::SetStringValue(const char* name, const char* value, const bool& setFromFile /*=false*/) { map::iterator it = mOptions_.find(name); - if(it!=mOptions_.end()) { + if(it != mOptions_.end()) { OptionStr* option = dynamic_cast(it->second); - if(NULL==option) { + if(NULL == option) { log_printf(hovWarning, "Hiop does not know option '%s' as 'string'. Maybe it is an 'integer' or a " "'string' option? The option will be ignored.\n", name); } else { - if(true==option->specifiedInFile) { - if(false==setFromFile) { + if(true == option->specifiedInFile) { + if(false == setFromFile) { log_printf(hovWarning, "Hiop will ignore value '%s' set for option '%s' at runtime since this option " "is already specified in the option file.\n", @@ -434,25 +415,26 @@ bool hiopOptions::SetStringValue (const char* name, const char* value, const bo return true; } } - + if(setFromFile) { option->specifiedInFile = true; } string strValue(value); transform(strValue.begin(), strValue.end(), strValue.begin(), ::tolower); - //see if it is in the range (of supported values) - bool inrange=false; - for(int it=0; it(option->range.size()) && !inrange; it++) { - inrange = (option->range[it]==strValue); + // see if it is in the range (of supported values) + bool inrange = false; + for(int it = 0; it < static_cast(option->range.size()) && !inrange; it++) { + inrange = (option->range[it] == strValue); } - - //empty range means the option can take any value and no range check is needed + + // empty range means the option can take any value and no range check is needed if(!inrange && !option->range.empty()) { - stringstream ssRange; ssRange << " "; - for(int it=0; it(option->range.size()); it++) { + stringstream ssRange; + ssRange << " "; + for(int it = 0; it < static_cast(option->range.size()); it++) { ssRange << option->range[it] << " "; } - + log_printf(hovWarning, "Hiop: value '%s' for option '%s' must be one of [%s]. Default value '%s' will be used.\n", value, @@ -464,10 +446,7 @@ bool hiopOptions::SetStringValue (const char* name, const char* value, const bo } } } else { - log_printf(hovWarning, - "Hiop does not understand option '%s' and will ignore its value '%s'.\n", - name, - value); + log_printf(hovWarning, "Hiop does not understand option '%s' and will ignore its value '%s'.\n", name, value); } ensure_consistence(); return true; @@ -477,29 +456,31 @@ void hiopOptions::log_printf(hiopOutVerbosity v, const char* format, ...) { char buff[1024]; va_list args; - va_start (args, format); - vsnprintf(buff,1024,format, args); + va_start(args, format); + vsnprintf(buff, 1024, format, args); if(log_) { - log_->printf(v,buff); + log_->printf(v, buff); } else { - hiopLogger::printf_error(v,buff); + hiopLogger::printf_error(v, buff); } - //fprintf(stderr,buff); - va_end (args); + // fprintf(stderr,buff); + va_end(args); } void hiopOptions::print(FILE* file, const char* msg) const { - if(nullptr==msg) fprintf(file, "#\n# Hiop options\n#\n"); - else fprintf(file, "%s ", msg); + if(nullptr == msg) + fprintf(file, "#\n# Hiop options\n#\n"); + else + fprintf(file, "%s ", msg); bool short_ver{false}; - if(GetString("print_options") == "short"){ + if(GetString("print_options") == "short") { short_ver = true; } - map::const_iterator it = mOptions_.begin(); - for(; it!=mOptions_.end(); it++) { + map::const_iterator it = mOptions_.begin(); + for(; it != mOptions_.end(); it++) { fprintf(file, "%s ", it->first.c_str()); it->second->print(file, short_ver); fprintf(file, "\n"); @@ -526,7 +507,7 @@ void hiopOptions::OptionInt::print(FILE* f, bool short_ver) const void hiopOptions::OptionStr::print(FILE* f, bool short_ver) const { - //empty range means the string option is not bound to a range of values + // empty range means the string option is not bound to a range of values if(range.empty()) { if(!short_ver) { fprintf(f, "%s \t# (string) [%s]", val.c_str(), descr.c_str()); @@ -535,8 +516,9 @@ void hiopOptions::OptionStr::print(FILE* f, bool short_ver) const } } else { if(!short_ver) { - stringstream ssRange; ssRange << " "; - for(int i=0; i(range.size()); i++) { + stringstream ssRange; + ssRange << " "; + for(int i = 0; i < static_cast(range.size()); i++) { ssRange << range[i] << " "; } fprintf(f, "%s \t# (string) one of [%s] [%s]", val.c_str(), ssRange.str().c_str(), descr.c_str()); @@ -549,16 +531,14 @@ void hiopOptions::OptionStr::print(FILE* f, bool short_ver) const ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// // hiopOptionsNLP ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -hiopOptionsNLP::hiopOptionsNLP(const char* opt_filename/*=nullptr*/) - : hiopOptions() +hiopOptionsNLP::hiopOptionsNLP(const char* opt_filename /*=nullptr*/) + : hiopOptions() { register_options(); - load_from_file(opt_filename==nullptr ? hiopOptions::default_filename : opt_filename); + load_from_file(opt_filename == nullptr ? hiopOptions::default_filename : opt_filename); ensure_consistence(); } -hiopOptionsNLP::~hiopOptionsNLP() -{ -} +hiopOptionsNLP::~hiopOptionsNLP() {} void hiopOptionsNLP::register_options() { @@ -583,11 +563,7 @@ void hiopOptionsNLP::register_options() 1e+10, "Absolute error tolerance for the constraint violation (default 1e-4)"); - register_num_option("dual_tol", - 1.0, - 1e-16, - 1e+10, - "Absolute error tolerance for the dual infeasibility (default 1.0)"); + register_num_option("dual_tol", 1.0, 1e-16, 1e+10, "Absolute error tolerance for the dual infeasibility (default 1.0)"); register_num_option("comp_tol", 1e-4, @@ -607,7 +583,11 @@ void hiopOptionsNLP::register_options() 0.99999, "Fraction-to-the-boundary parameter used in the line-search to back-off a bit " "(see eqn (8) in the Filt-IPM paper) (default 0.99)"); - register_num_option("kappa_eps", 10., 1e-6, 1e+3, "mu is reduced when when log-bar error is below kappa_eps*mu (default 10.)"); + register_num_option("kappa_eps", + 10., + 1e-6, + 1e+3, + "mu is reduced when when log-bar error is below kappa_eps*mu (default 10.)"); register_num_option("kappa1", 1e-2, 1e-16, @@ -628,35 +608,41 @@ void hiopOptionsNLP::register_options() // 'duals_update_type' should be 'lsq' or 'linear' for 'Hessian=quasinewton_approx' // 'duals_update_type' can only be 'linear' for Newton methods 'Hessian=analytical_exact' - //here will set the default value to 'lsq' and this will be adjusted later in 'ensure_consistency' - //to a valid value depending on the 'Hessian' value - vector range(2); range[0]="lsq"; range[1]="linear"; + // here will set the default value to 'lsq' and this will be adjusted later in 'ensure_consistency' + // to a valid value depending on the 'Hessian' value + vector range(2); + range[0] = "lsq"; + range[1] = "linear"; register_str_option("duals_update_type", "lsq", range, "Type of update of the multipliers of the eq. constraints " "(default is 'lsq' when 'Hessian' is 'quasinewton_approx' and " "'linear' when 'Hessian is 'analytical_exact')"); - + register_num_option("recalc_lsq_duals_tol", 1e-6, 0., - 1e10, - "Threshold for infeasibility under which LSQ computation of duals will be employed " - "(requires 'duals_update_type' to be 'lsq'"); + 1e10, + "Threshold for infeasibility under which LSQ computation of duals will be employed " + "(requires 'duals_update_type' to be 'lsq'"); } { - vector range(2); range[0]="lsq"; range[1]="zero"; - register_str_option("duals_init", "lsq", range, "Type of initialization of the multipliers of the eq. cons. (default lsq)"); + vector range(2); + range[0] = "lsq"; + range[1] = "zero"; + register_str_option("duals_init", + "lsq", + range, + "Type of initialization of the multipliers of the eq. cons. (default lsq)"); register_num_option("duals_lsq_ini_max", 1e3, 1e-16, - 1e+10, + 1e+10, "Max inf-norm allowed for initials duals computed with LSQ; if norm is greater, the duals for" "equality constraints will be set to zero."); - } register_int_option("max_iter", 3000, 1, 1e6, "Max number of iterations (default 3000)"); @@ -670,52 +656,62 @@ void hiopOptionsNLP::register_options() register_int_option("acceptable_iterations", 10, 1, - 1e6, "Number of iterations of acceptable tolerance after which HiOp terminates (default 10)"); + 1e6, + "Number of iterations of acceptable tolerance after which HiOp terminates (default 10)"); register_num_option("sigma0", 1., 0., 1e+7, "Initial value of the initial multiplier of the identity in the secant approximation (default 1.0)"); - //step length controls + // step length controls { - vector range(2); range[0] = "no"; range[1] = "yes"; + vector range(2); + range[0] = "no"; + range[1] = "yes"; register_str_option("accept_every_trial_step", "no", range, "Disable line-search and take close-to-boundary step"); - + register_num_option("min_step_size", 1e-16, 0., 1e6, - "Minimum step size allowed in line-search (default 1e-16). If step size is less than this number, " + "Minimum step size allowed in line-search (default 1e-16). If step size is less than this number, " "feasibility restoration problem is activated."); - auto d_abs = "Max allowed update of the 'x' primal variables during the line-search. Primal step-length may be reduced " - "so that the inf norm of the 'x' update is less than or equal with the option's value. Default value: " - "0 (disabled)."; + auto d_abs = + "Max allowed update of the 'x' primal variables during the line-search. Primal step-length may be reduced " + "so that the inf norm of the 'x' update is less than or equal with the option's value. Default value: " + "0 (disabled)."; register_num_option("moving_lim_abs", 0., 0., 1e+8, d_abs); - auto d_rel = "Max allowed update of the primal variables relative to fraction-to-boundary (FTB) step. Line-search " - "will be started using the FTB step scaled by the option's value. Default value: 0 (disabled)."; + auto d_rel = + "Max allowed update of the primal variables relative to fraction-to-boundary (FTB) step. Line-search " + "will be started using the FTB step scaled by the option's value. Default value: 0 (disabled)."; register_num_option("moving_lim_rel", 0., 0., 1., d_rel); - - register_num_option("theta_max_fact", - 1e+4, - 0.0, - 1e+7, - "Maximum constraint violation (theta_max) is scaled by this factor before using in the filter line-search " - "algorithm (default 1e+4). (eqn (21) in Filt-IPM paper)"); - register_num_option("theta_min_fact", - 1e-4, - 0.0, - 1e+7, - "Minimum constraint violation (theta_min) is scaled by this factor before using in the filter line-search " - "algorithm (default 1e-4). (eqn (21) in Filt-IPM paper)"); + register_num_option( + "theta_max_fact", + 1e+4, + 0.0, + 1e+7, + "Maximum constraint violation (theta_max) is scaled by this factor before using in the filter line-search " + "algorithm (default 1e+4). (eqn (21) in Filt-IPM paper)"); + + register_num_option( + "theta_min_fact", + 1e-4, + 0.0, + 1e+7, + "Minimum constraint violation (theta_min) is scaled by this factor before using in the filter line-search " + "algorithm (default 1e-4). (eqn (21) in Filt-IPM paper)"); } { vector range(5); - range[0]="sigma0"; range[1]="sty"; range[2]="sty_inv"; - range[3]="snrm_ynrm"; range[4]="sty_srnm_ynrm"; + range[0] = "sigma0"; + range[1] = "sty"; + range[2] = "sty_inv"; + range[3] = "snrm_ynrm"; + range[4] = "sty_srnm_ynrm"; register_str_option("sigma_update_strategy", range[1], range, @@ -723,12 +719,18 @@ void hiopOptionsNLP::register_options() } register_int_option("secant_memory_len", 6, 0, 256, "Size of the memory of the Hessian secant approximation"); - register_int_option("verbosity_level", 3, 0, 12, + register_int_option("verbosity_level", + 3, + 0, + 12, "Verbosity level: 0 no output (only errors), 1=0+warnings, 2=1 (reserved), " "3=2+optimization output, 4=3+scalars; larger values explained in hiopLogger.hpp"); { - vector range(3); range[0]="remove"; range[1]="relax"; range[2]="none"; + vector range(3); + range[0] = "remove"; + range[1] = "relax"; + range[2] = "none"; register_str_option("fixed_var", "none", range, @@ -753,23 +755,24 @@ void hiopOptionsNLP::register_options() // warm_start { - vector range(2); range[0] = "no"; range[1] = "yes"; - register_str_option("warm_start", - "no", - range, - "Warm start from the user provided primal-dual point. (default no)"); + vector range(2); + range[0] = "no"; + range[1] = "yes"; + register_str_option("warm_start", "no", range, "Warm start from the user provided primal-dual point. (default no)"); } // scaling { - vector range(2); range[0]="none"; range[1]="gradient"; + vector range(2); + range[0] = "none"; + range[1] = "gradient"; register_str_option("scaling_type", "gradient", range, "The method used for scaling the problem before solving it. " "Setting this option to 'gradient' will scale the problem such that the inf-norm of gradient at the " "initial point is less or equal to to the value of scaling_max_grad option (default 'gradient')"); - + register_num_option("scaling_max_grad", 100, 1e-20, @@ -798,7 +801,8 @@ void hiopOptionsNLP::register_options() 0.0, 1e+20, "a positive value for this option will be used as a lower bound for (and will overwrite) " - "the scaling factors computed as instructed by options scaling_max_grad, scaling_max_obj_grad and scaling_max_con_grad."); + "the scaling factors computed as instructed by options scaling_max_grad, scaling_max_obj_grad and " + "scaling_max_con_grad."); } // outer iterative refinement @@ -820,19 +824,22 @@ void hiopOptionsNLP::register_options() register_int_option("ir_outer_maxit", 8, 0, - 100, + 100, "Max number of outer iterative refinement iterations (default 8). " "Setting it to 0 deactivates the outer iterative refinement"); } - + // relax bounds { - register_num_option("bound_relax_perturb", 1e-8, 0.0, 1e20, + register_num_option("bound_relax_perturb", + 1e-8, + 0.0, + 1e20, "Perturbation of the lower and upper bounds for variables and constraints relative" "to its magnitude: lower/upper_bound -=/+= bound_relax_perturb*max(abs(lower/upper_bound),1)" "(default 1e-8)"); - //relax equalities internally to two-sided inequalties and pose the NLP as an NLP with inequalities only + // relax equalities internally to two-sided inequalties and pose the NLP as an NLP with inequalities only register_num_option("eq_relax_factor", 1e-8, 1e-15, @@ -845,16 +852,18 @@ void hiopOptionsNLP::register_options() // second order correction { register_int_option("max_soc_iter", 4, 0, 1000000, "Max number of iterations in second order correction (default 4)"); - - register_num_option("kappa_soc", 0.99, 0.0, 1e+20, "Factor to decrease the constraint violation in second order correction."); + + register_num_option("kappa_soc", + 0.99, + 0.0, + 1e+20, + "Factor to decrease the constraint violation in second order correction."); } // feasibility restoration { - //name of the options file to be passed to the FR solver - register_str_option("options_file_fr_prob", - hiopOptions::default_filename_fr, - "Options file for the FR solver."); + // name of the options file to be passed to the FR solver + register_str_option("options_file_fr_prob", hiopOptions::default_filename_fr, "Options file for the FR solver."); register_num_option("kappa_resto", 0.9, @@ -862,20 +871,24 @@ void hiopOptionsNLP::register_options() 1.0, "Factor to decrease the constraint violation in feasibility restoration. (default 0.9)"); - vector range(2); range[0] = "no"; range[1] = "yes"; + vector range(2); + range[0] = "no"; + range[1] = "yes"; register_str_option("force_resto", "no", range, "Force applying feasibility restoration phase"); } - //optimization method used + // optimization method used { - vector range(2); range[0]="quasinewton_approx"; range[1]="analytical_exact"; + vector range(2); + range[0] = "quasinewton_approx"; + range[1] = "analytical_exact"; register_str_option("Hessian", "quasinewton_approx", range, "Type of Hessian used with the filter IPM: 'quasinewton_approx' built internally " "by HiOp (default option) or 'analytical_exact' provided by the user"); } - //linear algebra + // linear algebra { vector range = {"auto", "xycyd", "xdycyd", "full", "condensed", "normaleqn"}; register_str_option("KKTLinsys", @@ -908,7 +921,7 @@ void hiopOptionsNLP::register_options() // - 'gpu' compute mode: work in progress { - vector range {"auto", "ma57", "pardiso", "strumpack", "resolve", "ginkgo", "cusolver-chol"}; + vector range{"auto", "ma57", "pardiso", "strumpack", "resolve", "ginkgo", "cusolver-chol"}; register_str_option("linear_solver_sparse", "auto", @@ -922,7 +935,7 @@ void hiopOptionsNLP::register_options() // - when GPU mode is on, STRUMPACK is chosen by 'auto' if available // - choosing option ma57 or pardiso with GPU being on, it results in no device being used in the linear solve! { - vector range {"auto", "ma57", "pardiso", "resolve", "strumpack", "ginkgo"}; + vector range{"auto", "ma57", "pardiso", "resolve", "strumpack", "ginkgo"}; register_str_option("duals_init_linear_solver_sparse", "auto", @@ -934,7 +947,7 @@ void hiopOptionsNLP::register_options() // - Default is 'reference' which uses sequential CPU implementations // - 'cuda' uses NVIDIA, 'hip' uses AMD GPUs (if available) { - vector range {"cuda", "hip", "reference"}; + vector range{"cuda", "hip", "reference"}; register_str_option("ginkgo_exec", "reference", @@ -942,20 +955,15 @@ void hiopOptionsNLP::register_options() "Selects the hardware architecture to run the Ginkgo linear solver on."); } - // choose triangular solver implementation in Ginkgo. // - Default is 'sparselib' which uses vendor triangular solvers // - 'syncfree' uses the busy waiting loop based Ginkgo implementation { - vector range {"syncfree", "sparselib"}; + vector range{"syncfree", "sparselib"}; - register_str_option("ginkgo_trisolve", - "syncfree", - range, - "Selects the triangular solver for Ginkgo."); + register_str_option("ginkgo_trisolve", "syncfree", range, "Selects the triangular solver for Ginkgo."); } - // choose sparsity permutation (to reduce nz in the factors). This option is available only when using // Cholesky linear solvers // - metis: use CUDA function csrmetisnd, which is a wrapper of METIS_NodeND; requires linking with @@ -967,13 +975,13 @@ void hiopOptionsNLP::register_options() // - amd-ssparse: symmetric approximate minimum degree (AMD) from Suite Sparse library. // - colamd-ssparse: column approximate minimum degree (COLAMD) from Suite Sparse library. { - vector range = { "metis", "symamd-cuda", "symamd-eigen", "symrcm", "amd-ssparse", "colamd-ssparse"}; + vector range = {"metis", "symamd-cuda", "symamd-eigen", "symrcm", "amd-ssparse", "colamd-ssparse"}; auto default_value = range[1]; #ifdef HIOP_USE_EIGEN default_value = range[2]; #endif register_str_option("linear_solver_sparse_ordering", - default_value, + default_value, range, "permutation to promote sparsity in the (Chol) factorization: 'metis' based on a wrapper of " "METIS_NodeND, 'symamd-cuda', 'symamd-eigen' (default), and 'symrcm' are the well-known " @@ -985,10 +993,7 @@ void hiopOptionsNLP::register_options() { vector range = {"klu"}; auto default_value = range[0]; - register_str_option("resolve_factorization", - default_value, - range, - "So far, only 'klu' option is available. "); + register_str_option("resolve_factorization", default_value, range, "So far, only 'klu' option is available. "); } // resolve refactorization options @@ -1002,41 +1007,17 @@ void hiopOptionsNLP::register_options() "'glu' is experimental and 'rf' is NVIDIA's stable refactorization. "); } - register_int_option("ir_inner_restart", - 20, - 1, - 100, - "(F)GMRES restart value (default is 20). "); + register_int_option("ir_inner_restart", 20, 1, 100, "(F)GMRES restart value (default is 20). "); - register_num_option("ir_inner_tol", - 1e-12, - 1e-16, - 1e-1, - "(F)GMRES tolerance (default is 1e-12). "); + register_num_option("ir_inner_tol", 1e-12, 1e-16, 1e-1, "(F)GMRES tolerance (default is 1e-12). "); - register_num_option("ir_inner_tol_min", - 1e-6, - 1e-16, - 1e-1, - "FGMRES minimum tolerance (default is 1e-6). "); - register_int_option("ir_inner_conv_cond", - 0, - 0, - 2, - "FGMRES convergence check for IR (default is 0) "); - register_num_option("ir_inner_tol_factor", - 1e-2, - 1e-20, - 1.0, - "FGMRES tolerance factor multiplying mu. (default 1e-2)"); + register_num_option("ir_inner_tol_min", 1e-6, 1e-16, 1e-1, "FGMRES minimum tolerance (default is 1e-6). "); + register_int_option("ir_inner_conv_cond", 0, 0, 2, "FGMRES convergence check for IR (default is 0) "); + register_num_option("ir_inner_tol_factor", 1e-2, 1e-20, 1.0, "FGMRES tolerance factor multiplying mu. (default 1e-2)"); - register_int_option("ir_inner_maxit", - 50, - 0, - 1000, - "(F)GMRES maximum number of iterations (default is 50). "); + register_int_option("ir_inner_maxit", 50, 0, 1000, "(F)GMRES maximum number of iterations (default is 50). "); -{ + { vector range = {"mgs", "cgs2", "mgs_two_synch", "mgs_pm"}; auto default_value = range[0]; register_str_option("ir_inner_gs_scheme", @@ -1049,17 +1030,24 @@ void hiopOptionsNLP::register_options() "mgs_pm: post-modern MGS, two synchs. "); } - //linsol_mode -> mostly related to magma and MDS linear algebra + // linsol_mode -> mostly related to magma and MDS linear algebra { - vector range(3); range[0]="stable"; range[1]="speculative"; range[2]="forcequick"; - register_str_option("linsol_mode", "stable", range, + vector range(3); + range[0] = "stable"; + range[1] = "speculative"; + range[2] = "forcequick"; + register_str_option("linsol_mode", + "stable", + range, "'stable'=using stable factorization; 'speculative'=try faster linear solvers when is safe " "to do so (experimental); 'forcequick'=always rely on faster solvers (experimental, avoid)"); } - - //factorization acceptor + + // factorization acceptor { - vector range(2); range[0] = "inertia_correction"; range[1]="inertia_free"; + vector range(2); + range[0] = "inertia_correction"; + range[1] = "inertia_free"; register_str_option("fact_acceptor", "inertia_correction", range, @@ -1072,11 +1060,11 @@ void hiopOptionsNLP::register_options() "Apply curvature test to check if a factorization is acceptable. " "This is the scaling factor used to determines if the " "direction is considered to have sufficiently positive curvature (1e-11 by default)"); - } + } - //inertia correction and Jacobian regularization + // inertia correction and Jacobian regularization { - //Hessian related + // Hessian related register_num_option("delta_w_min_bar", 1e-20, 0, @@ -1087,28 +1075,25 @@ void hiopOptionsNLP::register_options() 1e-40, 1e+40, "Largest perturbation of the Hessian block for inertia correction"); - register_num_option("delta_0_bar", - 1e-4, - 0, - 1e+40, - "First perturbation of the Hessian block for inertia correction"); - register_num_option("kappa_w_minus", 1./3, + register_num_option("delta_0_bar", 1e-4, 0, 1e+40, "First perturbation of the Hessian block for inertia correction"); + register_num_option("kappa_w_minus", + 1. / 3, 1e-20, 1 - 1e-20, "Factor to decrease the most recent successful perturbation for inertia correction"); register_num_option("kappa_w_plus", 8., - 1+1e-20, + 1 + 1e-20, 1e+40, "Factor to increase perturbation when it did not provide correct " "inertia correction (not first iteration)"); register_num_option("kappa_w_plus_bar", 100., - 1+1e-20, + 1 + 1e-20, 1e+40, "Factor to increase perturbation when it did not provide correct " "inertia correction (first iteration when scale not known)"); - //Jacobian related + // Jacobian related register_num_option("delta_c_bar", 1e-8, 1e-20, @@ -1122,7 +1107,7 @@ void hiopOptionsNLP::register_options() "Exponent of mu when computing regularization for potentially rank-deficient " "Jacobian (delta_c=delta_c_bar*mu^kappa_c)"); - vector range = {"primal_first", "dual_first"}; + vector range = {"primal_first", "dual_first"}; register_str_option("normaleqn_regularization_priority", "dual_first", range, @@ -1138,7 +1123,6 @@ void hiopOptionsNLP::register_options() "The method used to compute regularizations. By default, `scalar` sets all the primal " "regularizations to a constant computed by HiOp. `randomized` approach sets regularization " "to a randomized vector around a constant."); - } // performance profiling { @@ -1154,7 +1138,7 @@ void hiopOptionsNLP::register_options() // elastic mode { - vector range = { "none", "tighten_bound", "correct_it", "correct_it_adjust_bound"}; + vector range = {"none", "tighten_bound", "correct_it", "correct_it_adjust_bound"}; register_str_option("elastic_mode", "none", range, @@ -1192,19 +1176,21 @@ void hiopOptionsNLP::register_options() "HiOp will use the default values for both parameters."); } - //other options + // other options { - vector range(2); range[0]="no"; range[1]="yes"; + vector range(2); + range[0] = "no"; + range[1] = "yes"; register_str_option("write_kkt", range[0], range, "write internal KKT linear system (matrix, rhs, sol) to file (default 'no')"); register_str_option("print_options", - "no", // default value for the option - vector({"yes", "no", "short"}), // range + "no", // default value for the option + vector({"yes", "no", "short"}), // range "prints options before algorithm starts (default 'no')"); } - + // memory space selection { #ifdef HIOP_USE_RAJA @@ -1228,7 +1214,11 @@ void hiopOptionsNLP::register_options() // compute mode { //! todo: proposing to remove this option - vector range(4); range[0]="auto"; range[1]="cpu"; range[2]="hybrid"; range[3]="gpu"; + vector range(4); + range[0] = "auto"; + range[1] = "cpu"; + range[2] = "hybrid"; + range[3] = "gpu"; register_str_option("compute_mode", "auto", range, @@ -1254,12 +1244,9 @@ void hiopOptionsNLP::register_options() #endif #if defined(HIOP_USE_HIP) range.push_back("hip"); -#endif +#endif - register_str_option("mem_backend", - "auto", - range, - "'auto', 'stdcpp', 'umpire', 'cuda', 'hip'"); + register_str_option("mem_backend", "auto", range, "'auto', 'stdcpp', 'umpire', 'cuda', 'hip'"); } // execution policies { @@ -1281,12 +1268,9 @@ void hiopOptionsNLP::register_options() #endif #if defined(HIOP_USE_HIP) range.push_back("hip"); -#endif +#endif - register_str_option("exec_policies", - "auto", - range, - ""); + register_str_option("exec_policies", "auto", range, ""); } // checkpointing and restarting @@ -1296,24 +1280,25 @@ void hiopOptionsNLP::register_options() vector range = {"yes", "no"}; constexpr char msgcs[] = "Save state of NLP solver to file indicated by 'checkpoint_file'."; register_str_option("checkpoint_save", range[1], range, msgcs); - + constexpr char msgcsN[] = "Iteration frequency of saving checkpoints to disk."; register_int_option("checkpoint_save_every_N_iter", 10, 1, 1e+6, msgcsN); constexpr char msgcf[] = "Path to checkpoint file to load from or save to."; register_str_option("checkpoint_file", "hiop_state_chk", msgcf); - constexpr char msgclos[] = "On (re)start the NLP solver will load checkpoint file " - "specified by 'checkpoint_file' option."; + constexpr char msgclos[] = + "On (re)start the NLP solver will load checkpoint file " + "specified by 'checkpoint_file' option."; register_str_option("checkpoint_load_on_start", range[1], range, msgclos); } } void hiopOptionsNLP::ensure_consistence() { - //check that the values of different options are consistent - //do not check is the values of a particular option is valid; this is done in the Set methods + // check that the values of different options are consistent + // do not check is the values of a particular option is valid; this is done in the Set methods double eps_tol_accep = GetNumeric("acceptable_tolerance"); - double eps_tol = GetNumeric("tolerance"); + double eps_tol = GetNumeric("tolerance"); if(eps_tol_accep < eps_tol) { if(is_user_defined("acceptable_tolerance")) { log_printf(hovWarning, @@ -1353,25 +1338,26 @@ void hiopOptionsNLP::ensure_consistence() } } - if(GetString("Hessian")=="quasinewton_approx") { + if(GetString("Hessian") == "quasinewton_approx") { string strKKT = GetString("KKTLinsys"); - if(strKKT=="xycyd" || strKKT=="xdycyd" || strKKT=="full" || strKKT=="normaleqn") { + if(strKKT == "xycyd" || strKKT == "xdycyd" || strKKT == "full" || strKKT == "normaleqn") { if(is_user_defined("Hessian")) { log_printf(hovWarning, "The option 'KKTLinsys=%s' is not valid with 'Hessian=quasiNewtonApprox'. " - "Will use 'KKTLinsys=auto'\n", strKKT.c_str()); + "Will use 'KKTLinsys=auto'\n", + strKKT.c_str()); set_val("KKTLinsys", "auto"); } } } - if(GetString("Hessian")=="analytical_exact") { + if(GetString("Hessian") == "analytical_exact") { string duals_update_type = GetString("duals_update_type"); if("linear" != duals_update_type) { // 'duals_update_type' should be 'lsq' or 'linear' for 'Hessian=quasinewton_approx' // 'duals_update_type' can only be 'linear' for Newton methods 'Hessian=analytical_exact' - //warn only if these are defined by the user (option file or via SetXXX methods) + // warn only if these are defined by the user (option file or via SetXXX methods) if(is_user_defined("duals_update_type")) { log_printf(hovWarning, "The option 'duals_update_type=%s' is not valid with 'Hessian=analytical_exact'. " @@ -1388,7 +1374,7 @@ void hiopOptionsNLP::ensure_consistence() auto kkt_linsys = GetString("KKTLinsys"); auto sol_sp = GetString("linear_solver_sparse"); if(kkt_linsys == "full") { - if(sol_sp!="resolve" && sol_sp!="pardiso" && sol_sp!="strumpack" && sol_sp!="auto") { + if(sol_sp != "resolve" && sol_sp != "pardiso" && sol_sp != "strumpack" && sol_sp != "auto") { if(is_user_defined("linear_solver_sparse")) { log_printf(hovWarning, "The option 'linear_solver_sparse=%s' is not valid with option 'KKTLinsys=full'. " @@ -1399,7 +1385,7 @@ void hiopOptionsNLP::ensure_consistence() } } else { if(kkt_linsys == "condensed") { - if(sol_sp!="cusolver-chol" && sol_sp!="auto") { + if(sol_sp != "cusolver-chol" && sol_sp != "auto") { if(is_user_defined("linear_solver_sparse")) { log_printf(hovWarning, "The option 'linear_solver_sparse=%s' is not valid with option 'KKTLinsys=condensed'. " @@ -1414,14 +1400,14 @@ void hiopOptionsNLP::ensure_consistence() #ifndef HIOP_USE_CUDA if(sol_sp == "resolve" || sol_sp == "cusolver-chol") { if(is_user_defined("linear_solver_sparse")) { - log_printf(hovWarning, - "The option 'linear_solver_sparse=%s' is not valid without CUDA support enabled." - " Will use 'linear_solver_sparse=auto'.\n", - GetString("linear_solver_sparse").c_str()); + log_printf(hovWarning, + "The option 'linear_solver_sparse=%s' is not valid without CUDA support enabled." + " Will use 'linear_solver_sparse=auto'.\n", + GetString("linear_solver_sparse").c_str()); } - set_val("linear_solver_sparse", "auto"); + set_val("linear_solver_sparse", "auto"); } -#endif // HIOP_USE_CUDA +#endif // HIOP_USE_CUDA #ifdef HIOP_USE_GINKGO auto exec_string = GetString("ginkgo_exec"); @@ -1435,7 +1421,7 @@ void hiopOptionsNLP::ensure_consistence() } set_val("ginkgo_exec", "reference"); } -#endif // HIOP_USE_CUDA +#endif // HIOP_USE_CUDA #ifndef HIOP_USE_HIP if(sol_sp == "ginkgo" && exec_string == "hip") { if(is_user_defined("linear_solver_sparse")) { @@ -1446,10 +1432,10 @@ void hiopOptionsNLP::ensure_consistence() } set_val("ginkgo_exec", "reference"); } -#endif // HIOP_USE_HIP -#endif // HIOP_USE_GINKGO +#endif // HIOP_USE_HIP +#endif // HIOP_USE_GINKGO - //linear_solver_sparse_ordering checks and warnings + // linear_solver_sparse_ordering checks and warnings #ifndef HIOP_USE_CUDA if(is_user_defined("linear_solver_sparse_ordering")) { @@ -1457,21 +1443,20 @@ void hiopOptionsNLP::ensure_consistence() } #else #ifndef HIOP_USE_EIGEN - if(GetString("linear_solver_sparse_ordering")=="symamd-eigen") { + if(GetString("linear_solver_sparse_ordering") == "symamd-eigen") { if(is_user_defined("linear_solver_sparse_ordering")) { log_printf(hovWarning, "option linear_solver_sparse_ordering=symamd-eigen was changed to 'symamd-cuda' since HiOp was " "built without EIGEN.\n"); - } set_val("linear_solver_sparse_ordering", "symamd-cuda"); } #endif #endif - + // When RAJA is not enabled ... #ifndef HIOP_USE_RAJA - if(GetString("compute_mode")=="gpu") { + if(GetString("compute_mode") == "gpu") { if(is_user_defined("compute_mode")) { log_printf(hovWarning, "option compute_mode=gpu was changed to 'hybrid' since HiOp was built without " @@ -1479,25 +1464,26 @@ void hiopOptionsNLP::ensure_consistence() } set_val("compute_mode", "hybrid"); } - if(GetString("mem_space")!="default") { + if(GetString("mem_space") != "default") { std::string memory_space = GetString("mem_space"); if(is_user_defined("compute_mode")) { log_printf(hovWarning, "option mem_space=%s was changed to 'default' since HiOp was built without " - "RAJA/Umpire support.\n", memory_space.c_str()); + "RAJA/Umpire support.\n", + memory_space.c_str()); } set_val("mem_space", "default"); } #endif if(GetString("mem_space") != GetString("callback_mem_space")) { - if( (is_user_defined("callback_mem_space") && GetString("mem_space")!="device") - || (GetString("callback_mem_space")=="um" && GetString("mem_space")=="device") ) { + if((is_user_defined("callback_mem_space") && GetString("mem_space") != "device") || + (GetString("callback_mem_space") == "um" && GetString("mem_space") == "device")) { log_printf(hovWarning, - "option 'callback_mem_space' was changed to the value '%s' of 'mem_space' options since the provided " - "value '%s' is not supported by HiOp with the provided values of 'mem_space'.\n", - GetString("mem_space").c_str(), - GetString("callback_mem_space").c_str()); + "option 'callback_mem_space' was changed to the value '%s' of 'mem_space' options since the provided " + "value '%s' is not supported by HiOp with the provided values of 'mem_space'.\n", + GetString("mem_space").c_str(), + GetString("callback_mem_space").c_str()); set_val("callback_mem_space", GetString("mem_space").c_str()); } else if(GetString("callback_mem_space") == "default") { // user didn't specify this option, set it to the value of `mem_space` @@ -1507,7 +1493,7 @@ void hiopOptionsNLP::ensure_consistence() // No hybrid or GPU compute mode if HiOp is built without GPU linear solvers #ifndef HIOP_USE_GPU - if(GetString("compute_mode")=="hybrid") { + if(GetString("compute_mode") == "hybrid") { if(is_user_defined("compute_mode")) { log_printf(hovWarning, "option compute_mode=hybrid was changed to 'cpu' since HiOp was built without " @@ -1515,21 +1501,19 @@ void hiopOptionsNLP::ensure_consistence() } set_val("compute_mode", "cpu"); } - if(GetString("compute_mode")=="gpu") { - log_printf(hovWarning, - "option compute_mode=gpu was changed to 'cpu' since HiOp was built without GPU support.\n"); + if(GetString("compute_mode") == "gpu") { + log_printf(hovWarning, "option compute_mode=gpu was changed to 'cpu' since HiOp was built without GPU support.\n"); set_val("compute_mode", "cpu"); } - - if(GetString("compute_mode")=="auto") { + + if(GetString("compute_mode") == "auto") { set_val("compute_mode", "cpu"); } #endif - + // No removing of fixed variables in GPU compute mode ... - if(GetString("compute_mode")=="gpu") { - if(GetString("fixed_var")=="remove") { - + if(GetString("compute_mode") == "gpu") { + if(GetString("fixed_var") == "remove") { log_printf(hovWarning, "option fixed_var=remove was changed to 'relax' since only 'relax'" "is supported in GPU compute mode.\n"); @@ -1538,19 +1522,18 @@ void hiopOptionsNLP::ensure_consistence() } // use inertia-free approach if 1) solver is strumpack or resolve, or 2) if linsys is full - if(GetString("KKTLinsys")=="full") { - if(GetString("fact_acceptor")=="inertia_correction") { + if(GetString("KKTLinsys") == "full") { + if(GetString("fact_acceptor") == "inertia_correction") { if(is_user_defined("fact_acceptor")) { log_printf(hovWarning, "Option fact_acceptor=inertia_correction was changed to 'inertia_free' since the requested " "KKTLinsys option 'full' does not have support for inertia computation.\n"); - } set_val("fact_acceptor", "inertia_free"); } } else if(GetString("linear_solver_sparse") == "strumpack" || GetString("linear_solver_sparse") == "resolve") { - if(GetString("fact_acceptor")=="inertia_correction") { - if(is_user_defined("fact_acceptor") && is_user_defined("linear_solver_sparse") ) { + if(GetString("fact_acceptor") == "inertia_correction") { + if(is_user_defined("fact_acceptor") && is_user_defined("linear_solver_sparse")) { log_printf(hovWarning, "Option fact_acceptor=inertia_correction was changed to 'inertia_free' since the requested " "linear solver '%s' does not support inertia calculation.\n", @@ -1574,43 +1557,41 @@ void hiopOptionsNLP::ensure_consistence() "checkpoint_save_every_N_iter", "checkpoint_file", "checkpoint_load_on_start"}; - for(string opt : chkpnt_opts) { + for(string opt: chkpnt_opts) { if(is_user_defined(opt.c_str())) { log_printf(hovWarning, "Checkpointing not available since HiOp was not built with AXOM. All checkpointing options " "are ignored.\n"); - //reset them to as not being user defined to avoid triggering the message. - for(auto opt2 : chkpnt_opts) { + // reset them to as not being user defined to avoid triggering the message. + for(auto opt2: chkpnt_opts) { mOptions_[opt2]->specifiedInFile = false; mOptions_[opt2]->specifiedAtRuntime = false; } break; } } -#endif +#endif } ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// // hiopOptionsPriDec ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// -hiopOptionsPriDec::hiopOptionsPriDec(const char* opt_filename/*=nullptr*/) - : hiopOptions() +hiopOptionsPriDec::hiopOptionsPriDec(const char* opt_filename /*=nullptr*/) + : hiopOptions() { register_options(); - load_from_file(opt_filename==nullptr ? hiopOptions::default_filename_pridec_solver : opt_filename); + load_from_file(opt_filename == nullptr ? hiopOptions::default_filename_pridec_solver : opt_filename); ensure_consistence(); } -hiopOptionsPriDec::~hiopOptionsPriDec() -{ -} +hiopOptionsPriDec::~hiopOptionsPriDec() {} void hiopOptionsPriDec::register_options() { // // Primal decomposition (PriDec) solver // - - //name of the options file to be passed to the master solver (by the NLP solver, e.g., HiOp or Ipopt or other) + + // name of the options file to be passed to the master solver (by the NLP solver, e.g., HiOp or Ipopt or other) { register_str_option("options_file_master_prob", hiopOptions::default_filename_pridec_masterNLP, @@ -1637,12 +1618,12 @@ void hiopOptionsPriDec::register_options() "Determines the memory space used by PriDec solver for linear algebra objects. Must match the " "the memory space in which the master solve is going to be done."); } - + // option for local accumulation of function value and subgradient on evaluator ranks, then reduce { register_str_option("accum_local", - "false", // default value for the option - vector({"yes", "no"}), // range + "false", // default value for the option + vector({"yes", "no"}), // range "Accumulates recourse problem solutions locally on evaluator ranks (default 'false')"); } @@ -1650,70 +1631,56 @@ void hiopOptionsPriDec::register_options() // convergence and stopping criteria // { - register_num_option("alpha_max", - 1e6, - 1, - 1e14, - "Upper bound of quadratic coefficient alpha (default 1e6)"); + register_num_option("alpha_max", 1e6, 1, 1e14, "Upper bound of quadratic coefficient alpha (default 1e6)"); - register_num_option("alpha_min", - 1e-5, - 1e-8, - 1e3, - "Lower bound of quadratic coefficient alpha (default 1e6)"); + register_num_option("alpha_min", 1e-5, 1e-8, 1e3, "Lower bound of quadratic coefficient alpha (default 1e6)"); - - //TODO: Frank check these and add others as needed in the primal decomposition algorithm - register_num_option("tolerance", - 1e-5, - 1e-14, - 1e-1, - "Absolute error tolerance for the PriDec solver (default 1e-5)"); + // TODO: Frank check these and add others as needed in the primal decomposition algorithm + register_num_option("tolerance", 1e-5, 1e-14, 1e-1, "Absolute error tolerance for the PriDec solver (default 1e-5)"); + + // register_num_option("rel_tolerance", 0., 0., 0.1, + // "Error tolerance for the NLP relative to errors at the initial point. A null " + // "value disables this option (default 0.)"); - //register_num_option("rel_tolerance", 0., 0., 0.1, - // "Error tolerance for the NLP relative to errors at the initial point. A null " - // "value disables this option (default 0.)"); - register_num_option("acceptable_tolerance", 1e-3, 1e-14, 1e-1, "HiOp PriDec terminates if the error is below 'acceptable tolerance' for 'acceptable_iterations' " "many consecutive iterations (default 1e-3)"); - + register_int_option("acceptable_iterations", 25, 1, 1e6, "Number of iterations of acceptable tolerance after which HiOp terminates (default 25)"); - + register_int_option("max_iter", 30000, 1, 1e9, "Max number of iterations (default 30000)"); } - + // - // misc options + // misc options // - //TODO: Frank check/implement these in PriDecSolver and add others as needed + // TODO: Frank check/implement these in PriDecSolver and add others as needed register_int_option("verbosity_level", 2, 0, 12, "Verbosity level: 0 no output (only errors), 1=0+warnings, 2=1 (reserved), " "3=2+optimization output, 4=3+scalars; larger values explained in hiopLogger.hpp"); - + register_str_option("print_options", - "no", // default value for the option - vector({"yes", "no"}), // range + "no", // default value for the option + vector({"yes", "no"}), // range "Prints options before algorithm starts (default 'no')"); - } void hiopOptionsPriDec::ensure_consistence() { - //check that the values of different options are consistent - //do not check is the values of a particular option is valid; this is done in the Set methods + // check that the values of different options are consistent + // do not check is the values of a particular option is valid; this is done in the Set methods double eps_tol_accep = GetNumeric("acceptable_tolerance"); - double eps_tol = GetNumeric("tolerance"); + double eps_tol = GetNumeric("tolerance"); if(eps_tol_accep < eps_tol) { if(is_user_defined("acceptable_tolerance")) { log_printf(hovWarning, @@ -1726,11 +1693,13 @@ void hiopOptionsPriDec::ensure_consistence() void hiopOptionsPriDec::print(FILE* file, const char* msg) const { - if(nullptr==msg) fprintf(file, "#\n# Hiop PriDec Solver options\n#\n"); - else fprintf(file, "%s ", msg); + if(nullptr == msg) + fprintf(file, "#\n# Hiop PriDec Solver options\n#\n"); + else + fprintf(file, "%s ", msg); - map::const_iterator it = mOptions_.begin(); - for(; it!=mOptions_.end(); it++) { + map::const_iterator it = mOptions_.begin(); + for(; it != mOptions_.end(); it++) { fprintf(file, "%s ", it->first.c_str()); it->second->print(file); fprintf(file, "\n"); @@ -1738,4 +1707,4 @@ void hiopOptionsPriDec::print(FILE* file, const char* msg) const fprintf(file, "# end of Hiop PriDec Solver options\n\n"); } -} //~end namespace +} // namespace hiop diff --git a/src/Utils/hiopOptions.hpp b/src/Utils/hiopOptions.hpp index b01140253..1c51b7d92 100644 --- a/src/Utils/hiopOptions.hpp +++ b/src/Utils/hiopOptions.hpp @@ -2,47 +2,47 @@ // Produced at the Lawrence Livermore National Laboratory (LLNL). // LLNL-CODE-742473. All rights reserved. // -// This file is part of HiOp. For details, see https://github.com/LLNL/hiop. HiOp -// is released under the BSD 3-clause license (https://opensource.org/licenses/BSD-3-Clause). +// This file is part of HiOp. For details, see https://github.com/LLNL/hiop. HiOp +// is released under the BSD 3-clause license (https://opensource.org/licenses/BSD-3-Clause). // Please also read “Additional BSD Notice” below. // -// Redistribution and use in source and binary forms, with or without modification, +// Redistribution and use in source and binary forms, with or without modification, // are permitted provided that the following conditions are met: -// i. Redistributions of source code must retain the above copyright notice, this list +// i. Redistributions of source code must retain the above copyright notice, this list // of conditions and the disclaimer below. -// ii. Redistributions in binary form must reproduce the above copyright notice, -// this list of conditions and the disclaimer (as noted below) in the documentation and/or +// ii. Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the disclaimer (as noted below) in the documentation and/or // other materials provided with the distribution. -// iii. Neither the name of the LLNS/LLNL nor the names of its contributors may be used to -// endorse or promote products derived from this software without specific prior written +// iii. Neither the name of the LLNS/LLNL nor the names of its contributors may be used to +// endorse or promote products derived from this software without specific prior written // permission. // -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES -// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT -// SHALL LAWRENCE LIVERMORE NATIONAL SECURITY, LLC, THE U.S. DEPARTMENT OF ENERGY OR -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS -// OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED -// AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT +// SHALL LAWRENCE LIVERMORE NATIONAL SECURITY, LLC, THE U.S. DEPARTMENT OF ENERGY OR +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +// OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED +// AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, // EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Additional BSD Notice -// 1. This notice is required to be provided under our contract with the U.S. Department -// of Energy (DOE). This work was produced at Lawrence Livermore National Laboratory under +// 1. This notice is required to be provided under our contract with the U.S. Department +// of Energy (DOE). This work was produced at Lawrence Livermore National Laboratory under // Contract No. DE-AC52-07NA27344 with the DOE. -// 2. Neither the United States Government nor Lawrence Livermore National Security, LLC -// nor any of their employees, makes any warranty, express or implied, or assumes any -// liability or responsibility for the accuracy, completeness, or usefulness of any +// 2. Neither the United States Government nor Lawrence Livermore National Security, LLC +// nor any of their employees, makes any warranty, express or implied, or assumes any +// liability or responsibility for the accuracy, completeness, or usefulness of any // information, apparatus, product, or process disclosed, or represents that its use would // not infringe privately-owned rights. -// 3. Also, reference herein to any specific commercial products, process, or services by -// trade name, trademark, manufacturer or otherwise does not necessarily constitute or -// imply its endorsement, recommendation, or favoring by the United States Government or -// Lawrence Livermore National Security, LLC. The views and opinions of authors expressed -// herein do not necessarily state or reflect those of the United States Government or -// Lawrence Livermore National Security, LLC, and shall not be used for advertising or +// 3. Also, reference herein to any specific commercial products, process, or services by +// trade name, trademark, manufacturer or otherwise does not necessarily constitute or +// imply its endorsement, recommendation, or favoring by the United States Government or +// Lawrence Livermore National Security, LLC. The views and opinions of authors expressed +// herein do not necessarily state or reflect those of the United States Government or +// Lawrence Livermore National Security, LLC, and shall not be used for advertising or // product endorsement purposes. #ifndef HIOP_OPTIONS @@ -57,66 +57,66 @@ namespace hiop { class hiopLogger; - + class hiopOptions { public: hiopOptions(); virtual ~hiopOptions(); - //Seters for options values that should be self explanatory with the exception of the last parameter. + // Seters for options values that should be self explanatory with the exception of the last parameter. // - //Passing 'setFromFile' with non-default, 'true' value is for expert use-only. It indicates that the option - //value comes from the options file (hiop.options) and will overwrite any options set at runtime by the - //user's code. However, passing 'setFromFile' with 'true' at runtime is perfectly fine and will - //conveniently "overwrite the overwriting" file options - - virtual bool SetNumericValue (const char* name, const double& value, const bool& setFromFile=false); - virtual bool SetIntegerValue(const char* name, const int& value, const bool& setFromFile=false); - virtual bool SetStringValue (const char* name, const char* value, const bool& setFromFile=false); - - virtual double GetNumeric(const char* name) const; - virtual int GetInteger(const char* name) const; - virtual std::string GetString (const char* name) const; + // Passing 'setFromFile' with non-default, 'true' value is for expert use-only. It indicates that the option + // value comes from the options file (hiop.options) and will overwrite any options set at runtime by the + // user's code. However, passing 'setFromFile' with 'true' at runtime is perfectly fine and will + // conveniently "overwrite the overwriting" file options + + virtual bool SetNumericValue(const char* name, const double& value, const bool& setFromFile = false); + virtual bool SetIntegerValue(const char* name, const int& value, const bool& setFromFile = false); + virtual bool SetStringValue(const char* name, const char* value, const bool& setFromFile = false); + + virtual double GetNumeric(const char* name) const; + virtual int GetInteger(const char* name) const; + virtual std::string GetString(const char* name) const; void SetLog(hiopLogger* log_in) { log_ = log_in; ensure_consistence(); } - virtual void print(FILE* file, const char* msg=NULL) const; + virtual void print(FILE* file, const char* msg = NULL) const; /** - * Default name for the options file for the HiOp NLP solver. If the file does not exist, - * the options object will be created with default options. + * Default name for the options file for the HiOp NLP solver. If the file does not exist, + * the options object will be created with default options. * * When the PriDec solver is used and the worker (a.k.a., contingency or recourse) NLP solver * is HiOp's NLP solver, this filename will be used by the worker NLP solver to load the options. */ static const char* default_filename; - + /** * Default name for the options file for the PriDec solver. If the file does not exist, the * options object will be created with default options. - * + * * This filename is not used by the master or worker NLP solvers employed by the PriDec solver. */ static const char* default_filename_pridec_solver; /** - * Default name for the options file for the master (a.k.a basecase) NLP solver within PriDec solver. - * This is passed by the PriDec solver to the user's routine that solves the master NLP. The + * Default name for the options file for the master (a.k.a basecase) NLP solver within PriDec solver. + * This is passed by the PriDec solver to the user's routine that solves the master NLP. The * filename can be changed in the PriDec solver options files via the option 'options_file_master_prob'. * - * The behavior for the case when the file does not exist is dependent on the underlying NLP solver - * used to solve the master. If the file does not exist and HiOp is used as a master solver, HiOp NLP - * will create an option object with default option values. + * The behavior for the case when the file does not exist is dependent on the underlying NLP solver + * used to solve the master. If the file does not exist and HiOp is used as a master solver, HiOp NLP + * will create an option object with default option values. */ static const char* default_filename_pridec_masterNLP; /** - * Default name for the options file for the feasibility restoration problem. If the file does not exist, - * the options object will be created with default options. + * Default name for the options file for the feasibility restoration problem. If the file does not exist, + * the options object will be created with default options. */ static const char* default_filename_fr; @@ -130,9 +130,9 @@ class hiopOptions const std::string& defaultValue, const std::vector& range, const char* description); - /// register a string option that can take any value + /// register a string option that can take any value void register_str_option(const std::string& name, const std::string& defaultValue, const char* description); - + virtual void register_options() = 0; void load_from_file(const char* szFilename); @@ -144,43 +144,58 @@ class hiopOptions virtual bool set_val(const char* name, const double& value); // Setter method used to ensure consistence. Does not alter 'specifiedInFile' and 'specifiedAtRuntime' virtual bool set_val(const char* name, const int& value); - // Setter method used to ensure consistence. Does not alter 'specifiedInFile' and 'specifiedAtRuntime' + // Setter method used to ensure consistence. Does not alter 'specifiedInFile' and 'specifiedAtRuntime' virtual bool set_val(const char* name, const char* value); - //Returns true if an option was set by the user (via options file or at runtime) or false if the option was not set - //by the user or cannot be found + // Returns true if an option was set by the user (via options file or at runtime) or false if the option was not set + // by the user or cannot be found virtual bool is_user_defined(const char* option_name); + protected: void log_printf(hiopOutVerbosity v, const char* format, ...); - struct Option { // option entry + struct Option + { // option entry Option(const char* description) - : descr(description), specifiedInFile(false), specifiedAtRuntime(false) {}; + : descr(description), + specifiedInFile(false), + specifiedAtRuntime(false) {}; virtual ~Option() {}; std::string descr; bool specifiedInFile; bool specifiedAtRuntime; - virtual void print(FILE* f, bool short_ver=false) const =0; + virtual void print(FILE* f, bool short_ver = false) const = 0; }; - struct OptionInt : public Option { - OptionInt(int v, int low, int upp, const char* description) - : Option(description), val(v), lb(low), ub(upp) {}; - int val, lb, ub; - void print(FILE* f, bool short_ver=false) const; + struct OptionInt : public Option + { + OptionInt(int v, int low, int upp, const char* description) + : Option(description), + val(v), + lb(low), + ub(upp) {}; + int val, lb, ub; + void print(FILE* f, bool short_ver = false) const; }; - struct OptionNum : public Option { + struct OptionNum : public Option + { OptionNum(double v, double low, double upp, const char* description) - : Option(description), val(v), lb(low), ub(upp) {}; - double val, lb, ub; - void print(FILE* f, bool short_ver=false) const; + : Option(description), + val(v), + lb(low), + ub(upp) {}; + double val, lb, ub; + void print(FILE* f, bool short_ver = false) const; }; - struct OptionStr : public Option { - OptionStr(std::string v, const std::vector& range_, const char* description) - : Option(description), val(v), range(range_) {}; + struct OptionStr : public Option + { + OptionStr(std::string v, const std::vector& range_, const char* description) + : Option(description), + val(v), + range(range_) {}; std::string val; std::vector range; - void print(FILE* f, bool short_ver=false) const; + void print(FILE* f, bool short_ver = false) const; }; std::map mOptions_; @@ -188,7 +203,6 @@ class hiopOptions hiopLogger* log_; }; - /** * @brief Options class specialized for the NLP solver * @@ -196,14 +210,14 @@ class hiopOptions class hiopOptionsNLP : public hiopOptions { public: - hiopOptionsNLP(const char* opt_filename=nullptr); + hiopOptionsNLP(const char* opt_filename = nullptr); virtual ~hiopOptionsNLP(); + protected: virtual void register_options(); virtual void ensure_consistence(); }; - /** * @brief Options class specialized for the PriDec solver * @@ -211,15 +225,15 @@ class hiopOptionsNLP : public hiopOptions class hiopOptionsPriDec : public hiopOptions { public: - hiopOptionsPriDec(const char* opt_filename=nullptr); + hiopOptionsPriDec(const char* opt_filename = nullptr); virtual ~hiopOptionsPriDec(); - - void print(FILE* file, const char* msg=NULL) const; + + void print(FILE* file, const char* msg = NULL) const; protected: virtual void register_options(); virtual void ensure_consistence(); }; -} // ~namespace -#endif +} // namespace hiop +#endif diff --git a/src/Utils/hiopRunStats.hpp b/src/Utils/hiopRunStats.hpp index 9d02b64cc..0ee87903c 100644 --- a/src/Utils/hiopRunStats.hpp +++ b/src/Utils/hiopRunStats.hpp @@ -3,47 +3,47 @@ // Written by Cosmin G. Petra, petra1@llnl.gov. // LLNL-CODE-742473. All rights reserved. // -// This file is part of HiOp. For details, see https://github.com/LLNL/hiop. HiOp -// is released under the BSD 3-clause license (https://opensource.org/licenses/BSD-3-Clause). +// This file is part of HiOp. For details, see https://github.com/LLNL/hiop. HiOp +// is released under the BSD 3-clause license (https://opensource.org/licenses/BSD-3-Clause). // Please also read “Additional BSD Notice” below. // -// Redistribution and use in source and binary forms, with or without modification, +// Redistribution and use in source and binary forms, with or without modification, // are permitted provided that the following conditions are met: -// i. Redistributions of source code must retain the above copyright notice, this list +// i. Redistributions of source code must retain the above copyright notice, this list // of conditions and the disclaimer below. -// ii. Redistributions in binary form must reproduce the above copyright notice, -// this list of conditions and the disclaimer (as noted below) in the documentation and/or +// ii. Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the disclaimer (as noted below) in the documentation and/or // other materials provided with the distribution. -// iii. Neither the name of the LLNS/LLNL nor the names of its contributors may be used to -// endorse or promote products derived from this software without specific prior written +// iii. Neither the name of the LLNS/LLNL nor the names of its contributors may be used to +// endorse or promote products derived from this software without specific prior written // permission. // -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES -// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT -// SHALL LAWRENCE LIVERMORE NATIONAL SECURITY, LLC, THE U.S. DEPARTMENT OF ENERGY OR -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS -// OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED -// AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT +// SHALL LAWRENCE LIVERMORE NATIONAL SECURITY, LLC, THE U.S. DEPARTMENT OF ENERGY OR +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +// OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED +// AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, // EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Additional BSD Notice -// 1. This notice is required to be provided under our contract with the U.S. Department -// of Energy (DOE). This work was produced at Lawrence Livermore National Laboratory under +// 1. This notice is required to be provided under our contract with the U.S. Department +// of Energy (DOE). This work was produced at Lawrence Livermore National Laboratory under // Contract No. DE-AC52-07NA27344 with the DOE. -// 2. Neither the United States Government nor Lawrence Livermore National Security, LLC -// nor any of their employees, makes any warranty, express or implied, or assumes any -// liability or responsibility for the accuracy, completeness, or usefulness of any +// 2. Neither the United States Government nor Lawrence Livermore National Security, LLC +// nor any of their employees, makes any warranty, express or implied, or assumes any +// liability or responsibility for the accuracy, completeness, or usefulness of any // information, apparatus, product, or process disclosed, or represents that its use would // not infringe privately-owned rights. -// 3. Also, reference herein to any specific commercial products, process, or services by -// trade name, trademark, manufacturer or otherwise does not necessarily constitute or -// imply its endorsement, recommendation, or favoring by the United States Government or -// Lawrence Livermore National Security, LLC. The views and opinions of authors expressed -// herein do not necessarily state or reflect those of the United States Government or -// Lawrence Livermore National Security, LLC, and shall not be used for advertising or +// 3. Also, reference herein to any specific commercial products, process, or services by +// trade name, trademark, manufacturer or otherwise does not necessarily constitute or +// imply its endorsement, recommendation, or favoring by the United States Government or +// Lawrence Livermore National Security, LLC. The views and opinions of authors expressed +// herein do not necessarily state or reflect those of the United States Government or +// Lawrence Livermore National Security, LLC, and shall not be used for advertising or // product endorsement purposes. #ifndef HIOP_RUNSTATS @@ -56,23 +56,17 @@ #include #ifdef HIOP_USE_MPI -#include "mpi.h" +#include "mpi.h" #endif namespace hiop { - class hiopRunKKTSolStats { public: - hiopRunKKTSolStats() - { - initialize(); - }; + hiopRunKKTSolStats() { initialize(); }; - virtual ~hiopRunKKTSolStats() - { - }; + virtual ~hiopRunKKTSolStats() {}; // // at each optimization iteration @@ -83,23 +77,23 @@ class hiopRunKKTSolStats /// Records time of the initial boilerplate, before any expensive matrix update or factorization (at current iteration) hiopTimer tmUpdateInit; - + /** * Records time in the update of the linsys at current iteration. Multiple updates can occur if the inertia correction or * regularization procedures kick in. */ hiopTimer tmUpdateLinsys; - + /** * Records time spent in lower level factorizations at current iteration. Multiple factorizations can occur if the inertia * correction or regularization procedures kick in. */ hiopTimer tmUpdateInnerFact; - + /// Number of inertia corrections or regularizations int nUpdateICCorr; - /** + /** * Records time spent in compressing or decompressing rhs (or in other words, pre- and post-inner solve). Should * not include rhs manipulations done in the inner solve, which are recorded by `tmSolveInner`. */ @@ -110,13 +104,13 @@ class hiopRunKKTSolStats * in the inner solve, which are recorded by `tmSolveInner`. */ hiopTimer tmResid; - + /** * Records the time spent in the inner solve. The inner solve is generally the call from `solveCompressed` to the * linear solver, such as to Magma, MA57, BiCGStab, etc. * - * The inner solve can be the triangular solves when a direct solver is used without iterative refinement or can - * be the Krylov-based iterative refinement (IR) solve, which can consist of the triangular solves, matrix applies + * The inner solve can be the triangular solves when a direct solver is used without iterative refinement or can + * be the Krylov-based iterative refinement (IR) solve, which can consist of the triangular solves, matrix applies * and residual computations needed in the IR, iterative refinement updates, and preconditioner applies, if any. */ hiopTimer tmSolveInner; @@ -128,16 +122,16 @@ class hiopRunKKTSolStats double nIterRefinInner; /// (TODO) Records the number of outer IR steps (on the full KKT system) - //double nIterRefinOuter; + // double nIterRefinOuter; // // total - // - + // + /// Records total time in KKT-related computations over the life of the algorithm double tmTotal; // - //constituents of total time from `tmTotal`-> map into timers used to time each optimization iteration + // constituents of total time from `tmTotal`-> map into timers used to time each optimization iteration // /// Total time recorded by `tmUpdateInit` double tmTotalUpdateInit; @@ -153,8 +147,9 @@ class hiopRunKKTSolStats double tmTotalResid; /// Total number of inner IR steps double nTotalIterRefinInner; - - inline void initialize() { + + inline void initialize() + { tmTotalPerIter.reset(); tmUpdateInit.reset(); tmUpdateLinsys.reset(); @@ -164,12 +159,12 @@ class hiopRunKKTSolStats tmSolveInner.reset(); tmResid.reset(); nIterRefinInner = 0.; - + tmTotal = 0.; tmTotalUpdateInit = 0.; tmTotalUpdateLinsys = 0.; tmTotalUpdateInnerFact = 0.; - tmTotalSolveRhsManip = 0.; + tmTotalSolveRhsManip = 0.; tmTotalSolveInner = 0.; tmTotalResid = 0.; nTotalIterRefinInner = 0.; @@ -179,7 +174,7 @@ class hiopRunKKTSolStats { tmTotalPerIter.reset(); tmTotalPerIter.start(); - + tmUpdateInit.reset(); tmUpdateLinsys.reset(); tmUpdateInnerFact.reset(); @@ -188,7 +183,7 @@ class hiopRunKKTSolStats tmSolveInner.reset(); tmResid.reset(); nIterRefinInner = 0.; - } + } inline void end_optimiz_iteration() { tmTotalPerIter.stop(); @@ -197,49 +192,51 @@ class hiopRunKKTSolStats tmTotalUpdateInit += tmUpdateInit.getElapsedTime(); tmTotalUpdateLinsys += tmUpdateLinsys.getElapsedTime(); tmTotalUpdateInnerFact += tmUpdateInnerFact.getElapsedTime(); - tmTotalSolveRhsManip += tmSolveRhsManip.getElapsedTime(); + tmTotalSolveRhsManip += tmSolveRhsManip.getElapsedTime(); tmTotalSolveInner += tmSolveInner.getElapsedTime(); tmTotalResid += tmResid.getElapsedTime(); nTotalIterRefinInner += nIterRefinInner; } - inline std::string get_summary_last_iter() { + inline std::string get_summary_last_iter() + { std::stringstream ss; ss << std::fixed << std::setprecision(3); ss << "Iteration KKT time " << tmTotalPerIter.getElapsedTime() << "s " << std::endl; ss << "\tupdate init " << std::setprecision(3) << tmUpdateInit.getElapsedTime() << "s " - << "update linsys " << tmUpdateLinsys.getElapsedTime() << "s " - << "fact " << tmUpdateInnerFact.getElapsedTime() << "s " + << "update linsys " << tmUpdateLinsys.getElapsedTime() << "s " + << "fact " << tmUpdateInnerFact.getElapsedTime() << "s " << "inertia corrections " << nUpdateICCorr << std::endl; - ss << "\tsolve rhs-manip " <0) { - ss << " at " << flopsFact/tmFactTime.getElapsedTime() << "TFlops/s" ; + if(flopsFact > 0) { + ss << " at " << flopsFact / tmFactTime.getElapsedTime() << "TFlops/s"; } - ss << " inertia " << tmInertiaComp.getElapsedTime() << "s" + ss << " inertia " << tmInertiaComp.getElapsedTime() << "s" << " triu. solves " << tmTriuSolves.getElapsedTime() << "s"; - if(flopsTriuSolves>0) { - ss << " at " << flopsTriuSolves/tmTriuSolves.getElapsedTime() << "TFlops/s"; + if(flopsTriuSolves > 0) { + ss << " at " << flopsTriuSolves / tmTriuSolves.getElapsedTime() << "TFlops/s"; } - ss << " device transfer " << tmDeviceTransfer.getElapsedTime() << "s" - << std::endl; + ss << " device transfer " << tmDeviceTransfer.getElapsedTime() << "s" << std::endl; return ss.str(); } }; - class hiopRunStats { public: - hiopRunStats(MPI_Comm comm_=MPI_COMM_WORLD) -#ifdef HIOP_USE_MPI - : comm(comm_) + hiopRunStats(MPI_Comm comm_ = MPI_COMM_WORLD) +#ifdef HIOP_USE_MPI + : comm(comm_) #endif - { + { initialize(); }; @@ -322,71 +317,75 @@ class hiopRunStats hiopTimer tmEvalObj, tmEvalGrad_f, tmEvalCons, tmEvalJac_con, tmEvalHessL; int nEvalObj, nEvalGrad_f, nEvalCons_eq, nEvalCons_ineq, nEvalJac_con_eq, nEvalJac_con_ineq; int nEvalHessL; - + int nIter; hiopRunKKTSolStats kkt; hiopLinSolStats linsolv; - inline virtual void initialize() { + inline virtual void initialize() + { tmOptimizTotal = tmSolverInternal = tmSearchDir = tmStartingPoint = tmMultUpdate = tmComm = tmInit = 0.; - tmEvalObj = tmEvalGrad_f = tmEvalCons = tmEvalJac_con = tmEvalHessL = 0.; - nEvalObj = nEvalGrad_f = nEvalCons_eq = nEvalCons_ineq = nEvalJac_con_eq = nEvalJac_con_ineq = 0; + tmEvalObj = tmEvalGrad_f = tmEvalCons = tmEvalJac_con = tmEvalHessL = 0.; + nEvalObj = nEvalGrad_f = nEvalCons_eq = nEvalCons_ineq = nEvalJac_con_eq = nEvalJac_con_ineq = 0; nEvalHessL = 0; - nIter = 0; + nIter = 0; } - inline std::string get_summary(int masterRank=0) { + inline std::string get_summary(int masterRank = 0) + { std::stringstream ss; - ss << "Total time " << std::fixed << std::setprecision(3) - << tmOptimizTotal.getElapsedTime() << "s " << std::endl; + ss << "Total time " << std::fixed << std::setprecision(3) << tmOptimizTotal.getElapsedTime() << "s " << std::endl; - ss << "Hiop internal time: " << std::setprecision(3) - << " total " << std::setprecision(3) << tmSolverInternal.getElapsedTime() << "s " - << " avg iter " << (tmSolverInternal.getElapsedTime()/nIter) << "s " << std::endl; + ss << "Hiop internal time: " << std::setprecision(3) << " total " << std::setprecision(3) + << tmSolverInternal.getElapsedTime() << "s " + << " avg iter " << (tmSolverInternal.getElapsedTime() / nIter) << "s " << std::endl; #ifdef HIOP_USE_MPI int nranks; - int ierr = MPI_Comm_size(comm, &nranks); assert(MPI_SUCCESS==ierr); - - double loc=tmSolverInternal.getElapsedTime(), mean; - ierr = MPI_Allreduce(&loc, &mean, 1, MPI_DOUBLE, MPI_SUM, comm); assert(MPI_SUCCESS==ierr); - mean = mean/nranks; - loc = tmSolverInternal.getElapsedTime()-mean; loc = loc*loc; + int ierr = MPI_Comm_size(comm, &nranks); + assert(MPI_SUCCESS == ierr); + + double loc = tmSolverInternal.getElapsedTime(), mean; + ierr = MPI_Allreduce(&loc, &mean, 1, MPI_DOUBLE, MPI_SUM, comm); + assert(MPI_SUCCESS == ierr); + mean = mean / nranks; + loc = tmSolverInternal.getElapsedTime() - mean; + loc = loc * loc; double stddev; - ierr = MPI_Allreduce(&loc, &stddev, 1, MPI_DOUBLE, MPI_SUM, comm); assert(MPI_SUCCESS==ierr); + ierr = MPI_Allreduce(&loc, &stddev, 1, MPI_DOUBLE, MPI_SUM, comm); + assert(MPI_SUCCESS == ierr); stddev = sqrt(stddev); stddev /= nranks; - ss << " internal total std dev across ranks " << (stddev/mean*100) << " percent" << std::endl; + ss << " internal total std dev across ranks " << (stddev / mean * 100) << " percent" << std::endl; #endif - ss << std::setprecision(3) - << "Fcn/deriv time: total=" << (tmEvalObj.getElapsedTime() + - tmEvalGrad_f.getElapsedTime() + - tmEvalCons.getElapsedTime() + - tmEvalJac_con.getElapsedTime() + - tmEvalHessL.getElapsedTime()) << "s " - << "( obj=" << tmEvalObj.getElapsedTime() - << " grad=" << tmEvalGrad_f.getElapsedTime() - << " cons=" << tmEvalCons.getElapsedTime() - << " Jac=" << tmEvalJac_con.getElapsedTime() + ss << std::setprecision(3) << "Fcn/deriv time: total=" + << (tmEvalObj.getElapsedTime() + tmEvalGrad_f.getElapsedTime() + tmEvalCons.getElapsedTime() + + tmEvalJac_con.getElapsedTime() + tmEvalHessL.getElapsedTime()) + << "s " + << "( obj=" << tmEvalObj.getElapsedTime() << " grad=" << tmEvalGrad_f.getElapsedTime() + << " cons=" << tmEvalCons.getElapsedTime() << " Jac=" << tmEvalJac_con.getElapsedTime() << " Hess=" << tmEvalHessL.getElapsedTime() << ") " << std::endl; #ifdef HIOP_USE_MPI - loc=tmEvalObj.getElapsedTime() + tmEvalGrad_f.getElapsedTime() + tmEvalCons.getElapsedTime() + tmEvalJac_con.getElapsedTime(); - - ierr = MPI_Allreduce(&loc, &mean, 1, MPI_DOUBLE, MPI_SUM, comm); assert(MPI_SUCCESS==ierr); - mean = mean/nranks; - loc = tmEvalObj.getElapsedTime() + tmEvalGrad_f.getElapsedTime() + tmEvalCons.getElapsedTime() + tmEvalJac_con.getElapsedTime() - mean; - loc = loc*loc; - - ierr = MPI_Allreduce(&loc, &stddev, 1, MPI_DOUBLE, MPI_SUM, comm); assert(MPI_SUCCESS==ierr); + loc = tmEvalObj.getElapsedTime() + tmEvalGrad_f.getElapsedTime() + tmEvalCons.getElapsedTime() + + tmEvalJac_con.getElapsedTime(); + + ierr = MPI_Allreduce(&loc, &mean, 1, MPI_DOUBLE, MPI_SUM, comm); + assert(MPI_SUCCESS == ierr); + mean = mean / nranks; + loc = tmEvalObj.getElapsedTime() + tmEvalGrad_f.getElapsedTime() + tmEvalCons.getElapsedTime() + + tmEvalJac_con.getElapsedTime() - mean; + loc = loc * loc; + + ierr = MPI_Allreduce(&loc, &stddev, 1, MPI_DOUBLE, MPI_SUM, comm); + assert(MPI_SUCCESS == ierr); stddev = sqrt(stddev); stddev /= nranks; - ss << " Fcn/deriv total std dev across ranks " << (stddev/mean*100) << " percent" << std::endl; + ss << " Fcn/deriv total std dev across ranks " << (stddev / mean * 100) << " percent" << std::endl; #endif - ss << "Fcn/deriv #: obj " << nEvalObj << " grad " << nEvalGrad_f - << " eq cons " << nEvalCons_eq << " ineq cons " << nEvalCons_ineq - << " eq Jac " << nEvalJac_con_eq << " ineq Jac " << nEvalJac_con_ineq << std::endl; + ss << "Fcn/deriv #: obj " << nEvalObj << " grad " << nEvalGrad_f << " eq cons " << nEvalCons_eq << " ineq cons " + << nEvalCons_ineq << " eq Jac " << nEvalJac_con_eq << " ineq Jac " << nEvalJac_con_ineq << std::endl; return ss.str(); } @@ -397,5 +396,5 @@ class hiopRunStats #endif }; -} +} // namespace hiop #endif diff --git a/src/Utils/hiopTimer.hpp b/src/Utils/hiopTimer.hpp index 7f122b887..2e06977dc 100644 --- a/src/Utils/hiopTimer.hpp +++ b/src/Utils/hiopTimer.hpp @@ -3,50 +3,50 @@ // Written by Cosmin G. Petra, petra1@llnl.gov. // LLNL-CODE-742473. All rights reserved. // -// This file is part of HiOp. For details, see https://github.com/LLNL/hiop. HiOp -// is released under the BSD 3-clause license (https://opensource.org/licenses/BSD-3-Clause). +// This file is part of HiOp. For details, see https://github.com/LLNL/hiop. HiOp +// is released under the BSD 3-clause license (https://opensource.org/licenses/BSD-3-Clause). // Please also read “Additional BSD Notice” below. // -// Redistribution and use in source and binary forms, with or without modification, +// Redistribution and use in source and binary forms, with or without modification, // are permitted provided that the following conditions are met: -// i. Redistributions of source code must retain the above copyright notice, this list +// i. Redistributions of source code must retain the above copyright notice, this list // of conditions and the disclaimer below. -// ii. Redistributions in binary form must reproduce the above copyright notice, -// this list of conditions and the disclaimer (as noted below) in the documentation and/or +// ii. Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the disclaimer (as noted below) in the documentation and/or // other materials provided with the distribution. -// iii. Neither the name of the LLNS/LLNL nor the names of its contributors may be used to -// endorse or promote products derived from this software without specific prior written +// iii. Neither the name of the LLNS/LLNL nor the names of its contributors may be used to +// endorse or promote products derived from this software without specific prior written // permission. // -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES -// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT -// SHALL LAWRENCE LIVERMORE NATIONAL SECURITY, LLC, THE U.S. DEPARTMENT OF ENERGY OR -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS -// OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED -// AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT +// SHALL LAWRENCE LIVERMORE NATIONAL SECURITY, LLC, THE U.S. DEPARTMENT OF ENERGY OR +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +// OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED +// AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, // EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Additional BSD Notice -// 1. This notice is required to be provided under our contract with the U.S. Department -// of Energy (DOE). This work was produced at Lawrence Livermore National Laboratory under +// 1. This notice is required to be provided under our contract with the U.S. Department +// of Energy (DOE). This work was produced at Lawrence Livermore National Laboratory under // Contract No. DE-AC52-07NA27344 with the DOE. -// 2. Neither the United States Government nor Lawrence Livermore National Security, LLC -// nor any of their employees, makes any warranty, express or implied, or assumes any -// liability or responsibility for the accuracy, completeness, or usefulness of any +// 2. Neither the United States Government nor Lawrence Livermore National Security, LLC +// nor any of their employees, makes any warranty, express or implied, or assumes any +// liability or responsibility for the accuracy, completeness, or usefulness of any // information, apparatus, product, or process disclosed, or represents that its use would // not infringe privately-owned rights. -// 3. Also, reference herein to any specific commercial products, process, or services by -// trade name, trademark, manufacturer or otherwise does not necessarily constitute or -// imply its endorsement, recommendation, or favoring by the United States Government or -// Lawrence Livermore National Security, LLC. The views and opinions of authors expressed -// herein do not necessarily state or reflect those of the United States Government or -// Lawrence Livermore National Security, LLC, and shall not be used for advertising or +// 3. Also, reference herein to any specific commercial products, process, or services by +// trade name, trademark, manufacturer or otherwise does not necessarily constitute or +// imply its endorsement, recommendation, or favoring by the United States Government or +// Lawrence Livermore National Security, LLC. The views and opinions of authors expressed +// herein do not necessarily state or reflect those of the United States Government or +// Lawrence Livermore National Security, LLC, and shall not be used for advertising or // product endorsement purposes. -#ifndef HIOP_TIMER +#ifndef HIOP_TIMER #define HIOP_TIMER #ifdef HIOP_USE_MPI @@ -57,7 +57,7 @@ #include -//to do: sys time: getrusage(RUSAGE_SELF,&usage); +// to do: sys time: getrusage(RUSAGE_SELF,&usage); namespace hiop { @@ -65,42 +65,48 @@ namespace hiop class hiopTimer { public: - hiopTimer() : tmElapsed(0.0), tmStart(0.0) {}; + hiopTimer() + : tmElapsed(0.0), + tmStart(0.0) {}; - //returns the elapsed time (accumulated between start/stop) in seconds + // returns the elapsed time (accumulated between start/stop) in seconds inline double getElapsedTime() const { return tmElapsed; } - inline void start() + inline void start() { -#ifdef HIOP_USE_MPI +#ifdef HIOP_USE_MPI tmStart = MPI_Wtime(); #else gettimeofday(&tv, NULL); - tmStart = ( static_cast(tv.tv_sec) + static_cast(tv.tv_usec)/1000000.0 ); + tmStart = (static_cast(tv.tv_sec) + static_cast(tv.tv_usec) / 1000000.0); #endif } inline void stop() { #ifdef HIOP_USE_MPI - tmElapsed += ( MPI_Wtime()-tmStart ); + tmElapsed += (MPI_Wtime() - tmStart); #else gettimeofday(&tv, NULL); - tmElapsed += ( static_cast(tv.tv_sec) + static_cast(tv.tv_usec)/1000000.0 - tmStart ); + tmElapsed += (static_cast(tv.tv_sec) + static_cast(tv.tv_usec) / 1000000.0 - tmStart); #endif } - inline void reset() { - tmElapsed=0.0; tmStart=0.0; + inline void reset() + { + tmElapsed = 0.0; + tmStart = 0.0; } - inline hiopTimer& operator=(const double& zero) { - assert(0==zero); - this->reset(); + inline hiopTimer& operator=(const double& zero) + { + assert(0 == zero); + this->reset(); return *this; } + private: - double tmElapsed; //in seconds + double tmElapsed; // in seconds double tmStart; #ifdef HIOP_USE_MPI @@ -108,5 +114,5 @@ class hiopTimer struct timeval tv; #endif }; -} +} // namespace hiop #endif diff --git a/src/_Internals/hsl_mc69z.hpp b/src/_Internals/hsl_mc69z.hpp index 67d400e05..ba72e77f5 100644 --- a/src/_Internals/hsl_mc69z.hpp +++ b/src/_Internals/hsl_mc69z.hpp @@ -1,2 +1 @@ #define HSL_MC69Z_HEADER_NOT_CPP_READY 1 - diff --git a/tests/LinAlg/matrixTestsDense.hpp b/tests/LinAlg/matrixTestsDense.hpp index 582afb52b..0b3e2518a 100644 --- a/tests/LinAlg/matrixTestsDense.hpp +++ b/tests/LinAlg/matrixTestsDense.hpp @@ -65,10 +65,13 @@ #include #include "testBase.hpp" -//for processing indexes/ints arrays on host +// for processing indexes/ints arrays on host #include "hiopVectorIntSeq.hpp" -namespace hiop { namespace tests { +namespace hiop +{ +namespace tests +{ /** * @brief Collection of tests for abstract hiopMatrixDense implementations. @@ -76,15 +79,15 @@ namespace hiop { namespace tests { * This class contains implementation of all dense matrix unit tests and abstract * interface for testing utility functions, which are specific to the particular * matrix and vector implementations. - * + * * To add a new test, simply add a new public method to this class and call it * from function runTests implemented in file testMatrixDense.cpp. Use helper * functions to abstract implementation specific details such as local data * size and memory space, accessing local data elements, etc. - * + * * If you want to add tests for a new dense matrix implementation (e.g. * column-major), you will need to reimplement helper functions, as well. - * + * * @warning HiOp distributed memory partitioning is 1-D and some of the unit * tests here implicitly assume that. When and if HiOp MPI partitioning * changes, these tests will have to be rewritten. @@ -94,7 +97,7 @@ class MatrixTestsDense : public TestBase { public: MatrixTestsDense() {} - virtual ~MatrixTestsDense(){} + virtual ~MatrixTestsDense() {} int matrixSetToZero(hiop::hiopMatrixDense& A, const int rank) { @@ -115,10 +118,7 @@ class MatrixTestsDense : public TestBase return reduceReturn(fail, &A); } - int matrixCopyFrom( - hiopMatrixDense &dst, - hiopMatrixDense &src, - const int rank) + int matrixCopyFrom(hiopMatrixDense& dst, hiopMatrixDense& src, const int rank) { assert(dst.n() == src.n() && "Did you pass in matrices of the same size?"); assert(dst.m() == src.m() && "Did you pass in matrices of the same size?"); @@ -144,7 +144,7 @@ class MatrixTestsDense : public TestBase return reduceReturn(fail, &dst); } - int matrix_copy_to(hiopMatrixDense &dst, hiopMatrixDense &src, const int rank) + int matrix_copy_to(hiopMatrixDense& dst, hiopMatrixDense& src, const int rank) { assert(dst.n() == src.n() && "Did you pass in matrices of the same size?"); assert(dst.m() == src.m() && "Did you pass in matrices of the same size?"); @@ -170,16 +170,12 @@ class MatrixTestsDense : public TestBase /* * y_{glob} \leftarrow \beta y_{glob} + \alpha A_{glob \times loc} x_{loc} */ - int matrixTimesVec( - hiop::hiopMatrixDense& A, - hiop::hiopVector& y, - hiop::hiopVector& x, - const int rank=0) + int matrixTimesVec(hiop::hiopMatrixDense& A, hiop::hiopVector& y, hiop::hiopVector& x, const int rank = 0) { assert(getLocalSize(&y) == getNumLocRows(&A) && "Did you pass in vectors of the correct sizes?"); assert(getLocalSize(&x) == getNumLocCols(&A) && "Did you pass in vectors of the correct sizes?"); const real_type alpha = one; - const real_type beta = one; + const real_type beta = one; const real_type A_val = one; const real_type y_val = three; const real_type x_val = three; @@ -205,11 +201,7 @@ class MatrixTestsDense : public TestBase * Notice that since A^T, x must not be distributed in this case, whereas * the plain `timesVec' nessecitated that x be distributed and y not be. */ - int matrixTransTimesVec( - hiop::hiopMatrixDense& A, - hiop::hiopVector& x, - hiop::hiopVector& y, - const int rank=0) + int matrixTransTimesVec(hiop::hiopMatrixDense& A, hiop::hiopVector& x, hiop::hiopVector& y, const int rank = 0) { const local_ordinal_type M = getNumLocRows(&A); const local_ordinal_type N = getNumLocCols(&A); @@ -217,7 +209,7 @@ class MatrixTestsDense : public TestBase assert(getLocalSize(&x) == getNumLocRows(&A) && "Did you pass in vectors of the correct sizes?"); assert(getLocalSize(&y) == getNumLocCols(&A) && "Did you pass in vectors of the correct sizes?"); const real_type alpha = one; - const real_type beta = one; + const real_type beta = one; const real_type A_val = one; const real_type y_val = three; const real_type x_val = three; @@ -227,7 +219,7 @@ class MatrixTestsDense : public TestBase // Index of row of A that will be set to zero, // and index of y that will be beta * y_val - const local_ordinal_type index_to_zero = N-1; + const local_ordinal_type index_to_zero = N - 1; A.setToConstant(A_val); y.setToConstant(y_val); @@ -238,21 +230,16 @@ class MatrixTestsDense : public TestBase * has its initial value as the first element, ensuring that * the matrix is correctly transposed. */ - for (int i=0; i real_type - { - const bool isZerodRow = (i == index_to_zero); - return isZerodRow ? - beta * y_val : - (beta * y_val) + (alpha * A_val * x_val * N_glob); - }); + fail += verifyAnswer(&y, [=](local_ordinal_type i) -> real_type { + const bool isZerodRow = (i == index_to_zero); + return isZerodRow ? beta * y_val : (beta * y_val) + (alpha * A_val * x_val * N_glob); + }); printMessage(fail, __func__, rank); return reduceReturn(fail, &A); @@ -267,11 +254,7 @@ class MatrixTestsDense : public TestBase * W: KxN * all local */ - int matrixTimesMat( - hiop::hiopMatrixDense& A, - hiop::hiopMatrixDense& X, - hiop::hiopMatrixDense& W, - const int rank=0) + int matrixTimesMat(hiop::hiopMatrixDense& A, hiop::hiopMatrixDense& X, hiop::hiopMatrixDense& W, const int rank = 0) { const local_ordinal_type K = getNumLocCols(&A); assert(K == A.n()); @@ -279,11 +262,7 @@ class MatrixTestsDense : public TestBase assert(K == getNumLocRows(&X)); assert(getNumLocRows(&A) == getNumLocRows(&W)); assert(getNumLocCols(&X) == getNumLocCols(&W)); - const real_type A_val = two, - X_val = three, - W_val = two, - alpha = two, - beta = two; + const real_type A_val = two, X_val = three, W_val = two, alpha = two, beta = two; A.setToConstant(A_val); W.setToConstant(W_val); @@ -305,11 +284,7 @@ class MatrixTestsDense : public TestBase * X: kxn * */ - int matrixTransTimesMat( - hiop::hiopMatrixDense& A_local, - hiop::hiopMatrixDense& W, - hiop::hiopMatrixDense& X, - const int rank) + int matrixTransTimesMat(hiop::hiopMatrixDense& A_local, hiop::hiopMatrixDense& W, hiop::hiopMatrixDense& X, const int rank) { const local_ordinal_type K = getNumLocRows(&A_local); const global_ordinal_type N_loc = getNumLocCols(&X); @@ -317,11 +292,7 @@ class MatrixTestsDense : public TestBase assert(X.n() == W.n() && "Matrices have mismatched shapes"); assert(N_loc == getNumLocCols(&W) && "Matrices have mismatched shapes"); assert(K == getNumLocRows(&X) && "Matrices have mismatched shapes"); - const real_type A_val = two, - X_val = three, - W_val = two, - alpha = two, - beta = two; + const real_type A_val = two, X_val = three, W_val = two, alpha = two, beta = two; /* * One row of X will be set to zero to ensure @@ -335,8 +306,7 @@ class MatrixTestsDense : public TestBase X.setToConstant(X_val); // X[idx][:] = 0 - for (int i=0; i(A.n()); A.setToConstant(A_val); @@ -385,11 +347,9 @@ class MatrixTestsDense : public TestBase // Column of W with second term equal to zero local_ordinal_type idx_of_zero_col = getNumLocCols(&W_local) - 1; - int fail = verifyAnswer(&W_local, - [=] (local_ordinal_type i, local_ordinal_type j) -> real_type - { - return j == idx_of_zero_col ? (beta * W_val) : (beta * W_val) + (alpha * A_val * X_val * Nglob); - }); + int fail = verifyAnswer(&W_local, [=](local_ordinal_type i, local_ordinal_type j) -> real_type { + return j == idx_of_zero_col ? (beta * W_val) : (beta * W_val) + (alpha * A_val * X_val * Nglob); + }); printMessage(fail, __func__, rank); return reduceReturn(fail, &A); @@ -398,10 +358,7 @@ class MatrixTestsDense : public TestBase /* * this += alpha * diag */ - int matrixAddDiagonal( - hiop::hiopMatrixDense& A, - hiop::hiopVector& x, - const int rank=0) + int matrixAddDiagonal(hiop::hiopMatrixDense& A, hiop::hiopVector& x, const int rank = 0) { int fail = 0; assert(getNumLocCols(&A) == getLocalSize(&x)); @@ -409,28 +366,22 @@ class MatrixTestsDense : public TestBase assert(getNumLocRows(&A) == A.n()); assert(A.n() == x.get_size()); assert(A.m() == x.get_size()); - static const real_type alpha = two, - A_val = quarter, - x_val = half; + static const real_type alpha = two, A_val = quarter, x_val = half; A.setToConstant(A_val); x.setToConstant(x_val); A.addDiagonal(alpha, x); - fail += verifyAnswer(&A, - [=] (local_ordinal_type i, local_ordinal_type j) -> real_type - { - const bool isOnDiagonal = (i == j); - return isOnDiagonal ? A_val + x_val * alpha : A_val; - }); + fail += verifyAnswer(&A, [=](local_ordinal_type i, local_ordinal_type j) -> real_type { + const bool isOnDiagonal = (i == j); + return isOnDiagonal ? A_val + x_val * alpha : A_val; + }); A.setToConstant(A_val); A.addDiagonal(alpha); - fail += verifyAnswer(&A, - [=] (local_ordinal_type i, local_ordinal_type j) -> real_type - { - const bool isOnDiagonal = (i == j); - return isOnDiagonal ? A_val + alpha : A_val; - }); + fail += verifyAnswer(&A, [=](local_ordinal_type i, local_ordinal_type j) -> real_type { + const bool isOnDiagonal = (i == j); + return isOnDiagonal ? A_val + alpha : A_val; + }); printMessage(fail, __func__, rank); return reduceReturn(fail, &A); @@ -444,17 +395,14 @@ class MatrixTestsDense : public TestBase * - addSubDiagonal(int, const double&, const hiopVector&, int, int) * - addSubDiagonal(int, int, const double&) */ - int matrixAddSubDiagonal( - hiop::hiopMatrixDense& A, - hiop::hiopVector& x, - const int rank=0) + int matrixAddSubDiagonal(hiop::hiopMatrixDense& A, hiop::hiopVector& x, const int rank = 0) { - int fail = 0; - const local_ordinal_type N = getNumLocCols(&A); + int fail = 0; + const local_ordinal_type N = getNumLocCols(&A); const local_ordinal_type x_len = getLocalSize(&x); - const real_type alpha = half; - const real_type A_val = half; - const real_type x_val = one; + const real_type alpha = half; + const real_type A_val = half; + const real_type x_val = one; assert(N == A.n() && "Test should only be ran sequentially."); assert(N == A.m() && "Test should only run with symmetric matrices."); @@ -465,46 +413,38 @@ class MatrixTestsDense : public TestBase A.setToConstant(A_val); x.setToConstant(x_val); A.addSubDiagonal(alpha, start_idx, x); - fail += verifyAnswer(&A, - [=] (local_ordinal_type i, local_ordinal_type j) -> real_type - { - const bool isOnSubDiagonal = (i>=start_idx && i==j); - return isOnSubDiagonal ? A_val + x_val * alpha : A_val; - }); + fail += verifyAnswer(&A, [=](local_ordinal_type i, local_ordinal_type j) -> real_type { + const bool isOnSubDiagonal = (i >= start_idx && i == j); + return isOnSubDiagonal ? A_val + x_val * alpha : A_val; + }); // We're only going to add n-1 elements of the vector // Test the overload that specifies subset of the vector // to be added to subdiagonal - local_ordinal_type start_idx_src = 1; + local_ordinal_type start_idx_src = 1; local_ordinal_type num_elements_to_add = x_len - start_idx_src; - local_ordinal_type start_idx_dest = (N - x_len) + start_idx_src; + local_ordinal_type start_idx_dest = (N - x_len) + start_idx_src; A.setToConstant(A_val); x.setToConstant(x_val); A.addSubDiagonal(start_idx_dest, alpha, x, start_idx_src, num_elements_to_add); - fail += verifyAnswer(&A, - [=] (local_ordinal_type i, local_ordinal_type j) -> real_type - { - const bool isOnSubDiagonal = (i>=start_idx_dest && i==j); - return isOnSubDiagonal ? A_val + x_val * alpha : A_val; - }); + fail += verifyAnswer(&A, [=](local_ordinal_type i, local_ordinal_type j) -> real_type { + const bool isOnSubDiagonal = (i >= start_idx_dest && i == j); + return isOnSubDiagonal ? A_val + x_val * alpha : A_val; + }); // Operating on N-2 elements s.t. the first and last elements of the sub // diagonal are not operated on. - start_idx_dest = 1; - const double c = two; - const int num_elems = N - 2; + start_idx_dest = 1; + const double c = two; + const int num_elems = N - 2; A.setToConstant(A_val); A.addSubDiagonal(start_idx_dest, num_elems, c); - fail += verifyAnswer(&A, - [=] (local_ordinal_type i, local_ordinal_type j) -> real_type - { - const bool isOperatedOn = i >= start_idx_dest && - i == j && - i < start_idx_dest + num_elems; - return isOperatedOn ? A_val + c : A_val; - }); + fail += verifyAnswer(&A, [=](local_ordinal_type i, local_ordinal_type j) -> real_type { + const bool isOperatedOn = i >= start_idx_dest && i == j && i < start_idx_dest + num_elems; + return isOperatedOn ? A_val + c : A_val; + }); printMessage(fail, __func__, rank); return reduceReturn(fail, &A); @@ -513,16 +453,11 @@ class MatrixTestsDense : public TestBase /* * A += alpha * B */ - int matrixAddMatrix( - hiop::hiopMatrixDense& A, - hiop::hiopMatrixDense& B, - const int rank) + int matrixAddMatrix(hiop::hiopMatrixDense& A, hiop::hiopMatrixDense& B, const int rank) { assert(getNumLocRows(&A) == getNumLocRows(&B)); assert(getNumLocCols(&A) == getNumLocCols(&B)); - const real_type alpha = half, - A_val = half, - B_val = one; + const real_type alpha = half, A_val = half, B_val = one; A.setToConstant(A_val); B.setToConstant(B_val); @@ -541,10 +476,7 @@ class MatrixTestsDense : public TestBase * * Precondition: W is square */ - int matrixTransAddToSymDenseMatrixUpperTriangle( - hiop::hiopMatrixDense& W, - hiop::hiopMatrixDense& A, - const int rank=0) + int matrixTransAddToSymDenseMatrixUpperTriangle(hiop::hiopMatrixDense& W, hiop::hiopMatrixDense& A, const int rank = 0) { const local_ordinal_type N_loc = getNumLocCols(&W); const local_ordinal_type A_M = getNumLocRows(&A); @@ -555,22 +487,17 @@ class MatrixTestsDense : public TestBase const local_ordinal_type start_idx_row = 0; const local_ordinal_type start_idx_col = N_loc - A_M; - const real_type alpha = half, - A_val = half, - W_val = one; + const real_type alpha = half, A_val = half, W_val = one; A.setToConstant(A_val); W.setToConstant(W_val); A.transAddToSymDenseMatrixUpperTriangle(start_idx_row, start_idx_col, alpha, W); - const int fail = verifyAnswer(&W, - [=] (local_ordinal_type i, local_ordinal_type j) -> real_type - { - const bool isTransUpperTriangle = ( - i>=start_idx_row && i=start_idx_col && j real_type { + const bool isTransUpperTriangle = + (i >= start_idx_row && i < start_idx_row + A_N_loc && j >= start_idx_col && j < start_idx_col + A_M); - return isTransUpperTriangle ? W_val + A_val*alpha : W_val; - }); + return isTransUpperTriangle ? W_val + A_val * alpha : W_val; + }); printMessage(fail, __func__, rank); return reduceReturn(fail, &A); @@ -584,10 +511,9 @@ class MatrixTestsDense : public TestBase * A is square * degree of A <= degree of W */ - int matrixAddUpperTriangleToSymDenseMatrixUpperTriangle( - hiop::hiopMatrixDense& W, - hiop::hiopMatrixDense& A, - const int rank=0) + int matrixAddUpperTriangleToSymDenseMatrixUpperTriangle(hiop::hiopMatrixDense& W, + hiop::hiopMatrixDense& A, + const int rank = 0) { const local_ordinal_type A_M = getNumLocRows(&A); const local_ordinal_type A_N = getNumLocCols(&A); @@ -600,19 +526,15 @@ class MatrixTestsDense : public TestBase // at W's upper left corner const local_ordinal_type diag_start = 0; int fail = 0; - const real_type alpha = half, - A_val = half, - W_val = one; + const real_type alpha = half, A_val = half, W_val = one; A.setToConstant(A_val); W.setToConstant(W_val); A.addUpperTriangleToSymDenseMatrixUpperTriangle(diag_start, alpha, W); - fail += verifyAnswer(&W, - [=] (local_ordinal_type i, local_ordinal_type j) -> real_type - { - bool isUpperTriangle = (i>=diag_start && i=i && j real_type { + bool isUpperTriangle = (i >= diag_start && i < diag_start + A_N && j >= i && j < diag_start + A_M); + return isUpperTriangle ? W_val + A_val * alpha : W_val; + }); printMessage(fail, __func__, rank); return reduceReturn(fail, &A); @@ -622,22 +544,20 @@ class MatrixTestsDense : public TestBase * Set bottom right value to ensure that all values * are checked. */ - int matrixMaxAbsValue( - hiop::hiopMatrixDense& A, - const int rank) + int matrixMaxAbsValue(hiop::hiopMatrixDense& A, const int rank) { - const local_ordinal_type last_row_idx = getNumLocRows(&A)-1; - const local_ordinal_type last_col_idx = getNumLocCols(&A)-1; + const local_ordinal_type last_row_idx = getNumLocRows(&A) - 1; + const local_ordinal_type last_col_idx = getNumLocCols(&A) - 1; int fail = 0; // Positive largest value A.setToConstant(zero); - if (rank == 0) setLocalElement(&A, last_row_idx, last_col_idx, one); + if(rank == 0) setLocalElement(&A, last_row_idx, last_col_idx, one); fail += A.max_abs_value() != one; // Negative largest value A.setToConstant(zero); - if (rank == 0) setLocalElement(&A, last_row_idx, last_col_idx, -one); + if(rank == 0) setLocalElement(&A, last_row_idx, last_col_idx, -one); fail += A.max_abs_value() != one; printMessage(fail, __func__, rank); @@ -648,29 +568,24 @@ class MatrixTestsDense : public TestBase * Set bottom right value to ensure that all values * are checked. */ - int matrix_row_max_abs_value( - hiop::hiopMatrixDense& A, - hiop::hiopVector& x, - const int rank) + int matrix_row_max_abs_value(hiop::hiopMatrixDense& A, hiop::hiopVector& x, const int rank) { - const local_ordinal_type last_row_idx = getNumLocRows(&A)-1; - const local_ordinal_type last_col_idx = getNumLocCols(&A)-1; + const local_ordinal_type last_row_idx = getNumLocRows(&A) - 1; + const local_ordinal_type last_col_idx = getNumLocCols(&A) - 1; int fail = 0; // set the last element to -2, others are set to 1 A.setToConstant(one); - if (rank == 0) { + if(rank == 0) { setLocalElement(&A, last_row_idx, last_col_idx, -two); } - + A.row_max_abs_value(x); - - fail += verifyAnswer(&x, - [=] (local_ordinal_type i) -> real_type - { - const bool is_last_row = (i == last_row_idx); - return is_last_row ? two : one; - }); + + fail += verifyAnswer(&x, [=](local_ordinal_type i) -> real_type { + const bool is_last_row = (i == last_row_idx); + return is_last_row ? two : one; + }); printMessage(fail, __func__, rank); return reduceReturn(fail, &A); @@ -679,10 +594,7 @@ class MatrixTestsDense : public TestBase /* * scale each row of A */ - int matrix_scale_row( - hiop::hiopMatrixDense& A, - hiop::hiopVector& x, - const int rank) + int matrix_scale_row(hiop::hiopMatrixDense& A, hiop::hiopVector& x, const int rank) { const real_type A_val = two; const real_type x_val = three; @@ -691,9 +603,9 @@ class MatrixTestsDense : public TestBase x.setToConstant(x_val); A.setToConstant(A_val); - A.scale_row(x,false); + A.scale_row(x, false); - real_type expected = A_val*x_val; + real_type expected = A_val * x_val; fail += verifyAnswer(&A, expected); printMessage(fail, __func__, rank); @@ -704,44 +616,39 @@ class MatrixTestsDense : public TestBase * Set bottom right value to ensure that all values * are checked. */ - int matrixIsFinite( - hiop::hiopMatrixDense& A, - const int rank) + int matrixIsFinite(hiop::hiopMatrixDense& A, const int rank) { - const local_ordinal_type last_row_idx = getNumLocRows(&A)-1; - const local_ordinal_type last_col_idx = getNumLocCols(&A)-1; + const local_ordinal_type last_row_idx = getNumLocRows(&A) - 1; + const local_ordinal_type last_col_idx = getNumLocCols(&A) - 1; int fail = 0; A.setToConstant(zero); - if (!A.isfinite()) fail++; + if(!A.isfinite()) fail++; A.setToConstant(zero); - if (rank == 0) setLocalElement(&A, last_row_idx, last_col_idx, INFINITY); - if (!A.isfinite() && rank != 0) fail++; + if(rank == 0) setLocalElement(&A, last_row_idx, last_col_idx, INFINITY); + if(!A.isfinite() && rank != 0) fail++; printMessage(fail, __func__, rank); return reduceReturn(fail, &A); } -////////////////////////////////////////////////////////////////////// -// Test for methods from hiopMatrixDense that are not part of the -// abstract class hiopMatrix -////////////////////////////////////////////////////////////////////// + ////////////////////////////////////////////////////////////////////// + // Test for methods from hiopMatrixDense that are not part of the + // abstract class hiopMatrix + ////////////////////////////////////////////////////////////////////// /** * @brief Test method for appending matrix row. - * + * * @pre Matrix `A` must have space allocated for appending the row. */ - int matrixAppendRow( - hiopMatrixDense& A, - hiopVector& vec, - const int rank) + int matrixAppendRow(hiopMatrixDense& A, hiopVector& vec, const int rank) { - assert(A.n() == vec.get_size() - && "Did you pass in a vector with the same length as the number of columns of the matrix?"); - assert(getNumLocCols(&A) == vec.get_local_size() - && "Did you pass in a vector with the same length as the number of columns of the matrix?"); + assert(A.n() == vec.get_size() && + "Did you pass in a vector with the same length as the number of columns of the matrix?"); + assert(getNumLocCols(&A) == vec.get_local_size() && + "Did you pass in a vector with the same length as the number of columns of the matrix?"); const global_ordinal_type init_num_rows = A.m(); const real_type A_val = one; const real_type vec_val = two; @@ -752,19 +659,16 @@ class MatrixTestsDense : public TestBase A.appendRow(vec); // Ensure A's num rows is updated - if (A.m() != init_num_rows + 1) - fail++; + if(A.m() != init_num_rows + 1) fail++; // Ensure vec's values are copied over to A's last row - fail += verifyAnswer(&A, - [=](local_ordinal_type i, local_ordinal_type j) -> real_type - { - // Rows are always global in HiOp (for now) - auto irow = static_cast(i); - (void)j; // j is unused - const bool isLastRow = (irow == init_num_rows); - return isLastRow ? vec_val : A_val; - }); + fail += verifyAnswer(&A, [=](local_ordinal_type i, local_ordinal_type j) -> real_type { + // Rows are always global in HiOp (for now) + auto irow = static_cast(i); + (void)j; // j is unused + const bool isLastRow = (irow == init_num_rows); + return isLastRow ? vec_val : A_val; + }); printMessage(fail, __func__, rank); return reduceReturn(fail, &A); @@ -775,10 +679,7 @@ class MatrixTestsDense : public TestBase * `dst_start_idx` in the same order. * */ - int matrixCopyRowsFrom( - hiopMatrixDense& dst, - hiopMatrixDense& src, - const int rank) + int matrixCopyRowsFrom(hiopMatrixDense& dst, hiopMatrixDense& src, const int rank) { assert(dst.n() == src.n()); assert(dst.m() > src.m()); @@ -796,15 +697,11 @@ class MatrixTestsDense : public TestBase dst.copyRowsFrom(src, num_rows_to_copy, dst_start_idx); - int fail = verifyAnswer(&dst, - [=](local_ordinal_type i, local_ordinal_type j) -> real_type - { - (void)j; // j is unused - const bool isRowCopiedOver = ( - i >= dst_start_idx && - i < dst_start_idx + src_num_rows); - return isRowCopiedOver ? src_val : dst_val; - }); + int fail = verifyAnswer(&dst, [=](local_ordinal_type i, local_ordinal_type j) -> real_type { + (void)j; // j is unused + const bool isRowCopiedOver = (i >= dst_start_idx && i < dst_start_idx + src_num_rows); + return isRowCopiedOver ? src_val : dst_val; + }); printMessage(fail, __func__, rank); return reduceReturn(fail, &dst); @@ -815,10 +712,7 @@ class MatrixTestsDense : public TestBase * specified by index array `row_idxs`. * */ - int matrixCopyRowsFromSelect(hiopMatrixDense& dst, - hiopMatrixDense& src, - hiopVectorInt& rows_idxs, - const int rank) + int matrixCopyRowsFromSelect(hiopMatrixDense& dst, hiopMatrixDense& src, hiopVectorInt& rows_idxs, const int rank) { assert(dst.n() == src.n()); assert(getNumLocCols(&dst) == getNumLocCols(&src)); @@ -834,7 +728,7 @@ class MatrixTestsDense : public TestBase hiopVectorIntSeq rows_idxs_host(rows_idxs.get_local_size()); index_type* rows_idxs_arr = rows_idxs_host.local_data(); - for (index_type i = 0; i < num_rows_to_copy; ++i) { + for(index_type i = 0; i < num_rows_to_copy; ++i) { rows_idxs_arr[i] = i; } rows_idxs_arr[0] = num_rows_to_copy - 1; @@ -845,28 +739,20 @@ class MatrixTestsDense : public TestBase dst.copyRowsFrom(src, rows_idxs.local_data(), num_rows_to_copy); - int fail = verifyAnswer(&dst, - [=](local_ordinal_type i, local_ordinal_type j) -> real_type - { - (void)j; // j is unused - return i == 0 ? zero : src_val; - }); + int fail = verifyAnswer(&dst, [=](local_ordinal_type i, local_ordinal_type j) -> real_type { + (void)j; // j is unused + return i == 0 ? zero : src_val; + }); printMessage(fail, __func__, rank); return reduceReturn(fail, &dst); } - int matrixCopyBlockFromMatrix( - hiopMatrixDense& src, - hiopMatrixDense& dst, - const int rank=0) + int matrixCopyBlockFromMatrix(hiopMatrixDense& src, hiopMatrixDense& dst, const int rank = 0) { - assert(src.n() < dst.n() - && "Src mat must be smaller than dst mat"); - assert(src.m() < dst.m() - && "Src mat must be smaller than dst mat"); - assert(getNumLocCols(&src) < getNumLocCols(&dst) - && "Src mat must be smaller than dst mat"); + assert(src.n() < dst.n() && "Src mat must be smaller than dst mat"); + assert(src.m() < dst.m() && "Src mat must be smaller than dst mat"); + assert(getNumLocCols(&src) < getNumLocCols(&dst) && "Src mat must be smaller than dst mat"); const real_type src_val = one; const real_type dst_val = two; @@ -881,30 +767,21 @@ class MatrixTestsDense : public TestBase dst.setToConstant(dst_val); dst.copyBlockFromMatrix(dst_start_row, dst_start_col, src); - const int fail = verifyAnswer(&dst, - [=](local_ordinal_type i, local_ordinal_type j) -> real_type - { - const bool isIdxCopiedFromSource = ( - i >= dst_start_row && i < dst_start_row + src_num_rows && - j >= dst_start_col && j < dst_start_col + src_num_cols); - return isIdxCopiedFromSource ? src_val : dst_val; - }); + const int fail = verifyAnswer(&dst, [=](local_ordinal_type i, local_ordinal_type j) -> real_type { + const bool isIdxCopiedFromSource = + (i >= dst_start_row && i < dst_start_row + src_num_rows && j >= dst_start_col && j < dst_start_col + src_num_cols); + return isIdxCopiedFromSource ? src_val : dst_val; + }); printMessage(fail, __func__, rank); return reduceReturn(fail, &dst); } - int matrixCopyFromMatrixBlock( - hiopMatrixDense& src, - hiopMatrixDense& dst, - const int rank=0) + int matrixCopyFromMatrixBlock(hiopMatrixDense& src, hiopMatrixDense& dst, const int rank = 0) { - assert(src.n() > dst.n() - && "Src mat must be larger than dst mat"); - assert(src.m() > dst.m() - && "Src mat must be larger than dst mat"); - assert(getNumLocCols(&src) > getNumLocCols(&dst) - && "Src mat must be larger than dst mat"); + assert(src.n() > dst.n() && "Src mat must be larger than dst mat"); + assert(src.m() > dst.m() && "Src mat must be larger than dst mat"); + assert(getNumLocCols(&src) > getNumLocCols(&dst) && "Src mat must be larger than dst mat"); const local_ordinal_type dst_m = getNumLocRows(&dst); const local_ordinal_type dst_n = getNumLocCols(&dst); const local_ordinal_type src_m = getNumLocRows(&src); @@ -915,28 +792,25 @@ class MatrixTestsDense : public TestBase const real_type src_val = one; const real_type dst_val = two; src.setToConstant(src_val); - if (rank == 0) - setLocalElement(&src, src_m - 1, src_n - 1, zero); + if(rank == 0) setLocalElement(&src, src_m - 1, src_n - 1, zero); dst.setToConstant(dst_val); dst.copyFromMatrixBlock(src, block_start_row, block_start_col); - const int fail = verifyAnswer(&dst, - [=] (local_ordinal_type i, local_ordinal_type j) -> real_type - { - // This is the element set to zero in src - // before being copied over - if (i == dst_m && j == dst_n && rank == 0) - return zero; - else - return src_val; - }); + const int fail = verifyAnswer(&dst, [=](local_ordinal_type i, local_ordinal_type j) -> real_type { + // This is the element set to zero in src + // before being copied over + if(i == dst_m && j == dst_n && rank == 0) + return zero; + else + return src_val; + }); printMessage(fail, __func__, rank); return reduceReturn(fail, &dst); } - int matrix_set_Hess_FR(hiopMatrixDense& src, hiopMatrixDense& dst, hiopVector& diag, const int rank=0) + int matrix_set_Hess_FR(hiopMatrixDense& src, hiopMatrixDense& dst, hiopVector& diag, const int rank = 0) { assert(src.n() == src.m() && "Src mat must be square mat"); assert(src.n() == dst.n() && "Src mat must be equal to dst mat"); @@ -949,17 +823,15 @@ class MatrixTestsDense : public TestBase diag.setToConstant(diag_val); dst.set_Hess_FR(src, diag); - - const int fail = verifyAnswer(&dst, - [=] (local_ordinal_type i, local_ordinal_type j) -> real_type - { - // This is the element set to zero in src - // before being copied over - if (i == j && rank == 0) - return src_val + diag_val; - else - return src_val; - }); + + const int fail = verifyAnswer(&dst, [=](local_ordinal_type i, local_ordinal_type j) -> real_type { + // This is the element set to zero in src + // before being copied over + if(i == j && rank == 0) + return src_val + diag_val; + else + return src_val; + }); printMessage(fail, __func__, rank); return reduceReturn(fail, &dst); @@ -976,9 +848,7 @@ class MatrixTestsDense : public TestBase * The uppermost row is not overwritten by the 1-row that would * wrap around and replace it. */ - int matrixShiftRows( - hiopMatrixDense& A, - const int rank) + int matrixShiftRows(hiopMatrixDense& A, const int rank) { const local_ordinal_type M = getNumLocRows(&A); local_ordinal_type uniq_row_idx = 0; @@ -993,15 +863,11 @@ class MatrixTestsDense : public TestBase setLocalRow(&A, uniq_row_idx, uniq_row_val); A.shiftRows(shift); - fail += verifyAnswer(&A, - [=](local_ordinal_type i, local_ordinal_type j) -> real_type - { - (void)j; // j is unused - const bool isUniqueRow = ( - i == (uniq_row_idx + shift) || - i == uniq_row_idx); - return isUniqueRow ? uniq_row_val : A_val; - }); + fail += verifyAnswer(&A, [=](local_ordinal_type i, local_ordinal_type j) -> real_type { + (void)j; // j is unused + const bool isUniqueRow = (i == (uniq_row_idx + shift) || i == uniq_row_idx); + return isUniqueRow ? uniq_row_val : A_val; + }); // Now check negative shift shift *= -1; @@ -1010,24 +876,17 @@ class MatrixTestsDense : public TestBase setLocalRow(&A, uniq_row_idx, uniq_row_val); A.shiftRows(shift); - fail += verifyAnswer(&A, - [=] (local_ordinal_type i, local_ordinal_type j) -> real_type - { - (void)j; // j is unused - const bool isUniqueRow = ( - i == (uniq_row_idx + shift) || - i == uniq_row_idx); - return isUniqueRow ? uniq_row_val : A_val; - }); + fail += verifyAnswer(&A, [=](local_ordinal_type i, local_ordinal_type j) -> real_type { + (void)j; // j is unused + const bool isUniqueRow = (i == (uniq_row_idx + shift) || i == uniq_row_idx); + return isUniqueRow ? uniq_row_val : A_val; + }); printMessage(fail, __func__, rank); return reduceReturn(fail, &A); } - int matrixReplaceRow( - hiopMatrixDense& A, - hiopVector& vec, - const int rank) + int matrixReplaceRow(hiopMatrixDense& A, hiopVector& vec, const int rank) { const local_ordinal_type M = getNumLocRows(&A); assert(getNumLocCols(&A) == vec.get_local_size() && "Did you pass a vector and matrix of compatible lengths?"); @@ -1042,33 +901,26 @@ class MatrixTestsDense : public TestBase setLocalElement(&vec, col_idx, zero); A.replaceRow(row_idx, vec); - const int fail = verifyAnswer(&A, - [=](local_ordinal_type i, local_ordinal_type j) -> real_type - { - // Was the row replaced? - if (i == row_idx) - { - // Was the value at col_idx set to zero? - if (j == col_idx) - return zero; - else - return vec_val; - } - // The matrix should be otherwise unchanged. + const int fail = verifyAnswer(&A, [=](local_ordinal_type i, local_ordinal_type j) -> real_type { + // Was the row replaced? + if(i == row_idx) { + // Was the value at col_idx set to zero? + if(j == col_idx) + return zero; else - { - return A_val; - } - }); + return vec_val; + } + // The matrix should be otherwise unchanged. + else { + return A_val; + } + }); printMessage(fail, __func__, rank); return reduceReturn(fail, &A); } - int matrixGetRow( - hiopMatrixDense& A, - hiopVector& vec, - const int rank) + int matrixGetRow(hiopMatrixDense& A, hiopVector& vec, const int rank) { const local_ordinal_type N = getNumLocCols(&A); const local_ordinal_type M = getNumLocRows(&A); @@ -1081,27 +933,22 @@ class MatrixTestsDense : public TestBase const real_type A_val = one; const real_type vec_val = two; A.setToConstant(A_val); - if (rank == 0) - setLocalElement(&A, row_idx, col_idx, zero); + if(rank == 0) setLocalElement(&A, row_idx, col_idx, zero); vec.setToConstant(vec_val); A.getRow(row_idx, vec); - const int fail = verifyAnswer(&vec, - [=](local_ordinal_type i) -> real_type - { - if (rank == 0 && i == col_idx) - return zero; - else - return A_val; - }); + const int fail = verifyAnswer(&vec, [=](local_ordinal_type i) -> real_type { + if(rank == 0 && i == col_idx) + return zero; + else + return A_val; + }); printMessage(fail, __func__, rank); return reduceReturn(fail, &A); } - int matrix_symmetrize( - hiop::hiopMatrixDense& A, - const int rank=0) + int matrix_symmetrize(hiop::hiopMatrixDense& A, const int rank = 0) { const local_ordinal_type M = getNumLocRows(&A); const local_ordinal_type N = getNumLocCols(&A); @@ -1113,8 +960,8 @@ class MatrixTestsDense : public TestBase A.setToZero(); // Set the upper triangular part to one - for(int i=0; i real_type - { - bool is_diagonal = ( j==i ); - return is_diagonal ? diag_val : upper_val; - }); + fail += verifyAnswer(&A, [=](local_ordinal_type i, local_ordinal_type j) -> real_type { + bool is_diagonal = (j == i); + return is_diagonal ? diag_val : upper_val; + }); printMessage(fail, __func__, rank); return reduceReturn(fail, &A); } #ifdef HIOP_DEEPCHECKS - int matrixAssertSymmetry( - hiop::hiopMatrixDense& A, - const int rank=0) + int matrixAssertSymmetry(hiop::hiopMatrixDense& A, const int rank = 0) { const local_ordinal_type M = getNumLocRows(&A); const local_ordinal_type N = getNumLocCols(&A); @@ -1149,14 +992,11 @@ class MatrixTestsDense : public TestBase fail += !A.assertSymmetry(eps); // Set first row and column to zero globally - for (int i=0; i expect) = 0; + virtual int verifyAnswer(hiop::hiopMatrixDense* A, + std::function expect) = 0; virtual bool reduceReturn(int failures, hiop::hiopMatrixDense* A) = 0; // Vector helper function - virtual void setLocalElement( - hiop::hiopVector *_x, - const local_ordinal_type i, - const real_type val) = 0; - virtual real_type getLocalElement( - const hiop::hiopVector* x, - local_ordinal_type i) = 0; + virtual void setLocalElement(hiop::hiopVector* _x, const local_ordinal_type i, const real_type val) = 0; + virtual real_type getLocalElement(const hiop::hiopVector* x, local_ordinal_type i) = 0; virtual local_ordinal_type getLocalSize(const hiop::hiopVector* x) = 0; virtual int verifyAnswer(hiop::hiopVector* x, real_type answer) = 0; - virtual int verifyAnswer( - hiop::hiopVector* x, - std::function expect) = 0; + virtual int verifyAnswer(hiop::hiopVector* x, std::function expect) = 0; }; -}} // namespace hiop{ namespace tests{ +} // namespace tests +} // namespace hiop diff --git a/tests/LinAlg/matrixTestsDenseRowMajor.cpp b/tests/LinAlg/matrixTestsDenseRowMajor.cpp index bff4e2bca..dd068a1e5 100644 --- a/tests/LinAlg/matrixTestsDenseRowMajor.cpp +++ b/tests/LinAlg/matrixTestsDenseRowMajor.cpp @@ -57,7 +57,10 @@ #include #include "matrixTestsDenseRowMajor.hpp" -namespace hiop { namespace tests { +namespace hiop +{ +namespace tests +{ // // Matrix helper methods @@ -67,8 +70,7 @@ namespace hiop { namespace tests { local_ordinal_type MatrixTestsDenseRowMajor::getNumLocRows(const hiop::hiopMatrixDense* A) { const auto* amat = dynamic_cast(A); - if(amat == nullptr) - THROW_NULL_DEREF; + if(amat == nullptr) THROW_NULL_DEREF; // get_local_size_m returns global ordinal type! HiOp issue? return static_cast(amat->get_local_size_m()); @@ -79,8 +81,7 @@ local_ordinal_type MatrixTestsDenseRowMajor::getNumLocRows(const hiop::hiopMatri local_ordinal_type MatrixTestsDenseRowMajor::getNumLocCols(const hiop::hiopMatrixDense* A) { const auto* amat = dynamic_cast(A); - if(amat == nullptr) - THROW_NULL_DEREF; + if(amat == nullptr) THROW_NULL_DEREF; // Local sizes should be returned as local ordinal type return static_cast(amat->get_local_size_n()); @@ -88,60 +89,50 @@ local_ordinal_type MatrixTestsDenseRowMajor::getNumLocCols(const hiop::hiopMatri } /// Set local data element (i,j) of matrix _A_ to _val_. -void MatrixTestsDenseRowMajor::setLocalElement( - hiop::hiopMatrixDense* A, - local_ordinal_type i, - local_ordinal_type j, - real_type val) +void MatrixTestsDenseRowMajor::setLocalElement(hiop::hiopMatrixDense* A, + local_ordinal_type i, + local_ordinal_type j, + real_type val) { auto* amat = dynamic_cast(A); - if(amat == nullptr) - THROW_NULL_DEREF; + if(amat == nullptr) THROW_NULL_DEREF; real_type* data = amat->local_data(); local_ordinal_type ncols = getNumLocCols(A); - //data[i][j] = val; - data[i*ncols + j] = val; + // data[i][j] = val; + data[i * ncols + j] = val; } /// Method to set a single local row of matrix to a constant value -void MatrixTestsDenseRowMajor::setLocalRow( - hiop::hiopMatrixDense* A, - const local_ordinal_type row, - const real_type val) +void MatrixTestsDenseRowMajor::setLocalRow(hiop::hiopMatrixDense* A, const local_ordinal_type row, const real_type val) { auto* amat = dynamic_cast(A); - if(amat == nullptr) - THROW_NULL_DEREF; + if(amat == nullptr) THROW_NULL_DEREF; const local_ordinal_type N = getNumLocCols(amat); - for (int i=0; i(A); - if(amat == nullptr) - THROW_NULL_DEREF; + if(amat == nullptr) THROW_NULL_DEREF; const real_type* data = amat->local_data_const(); local_ordinal_type ncols = getNumLocCols(A); - return data[i*ncols + j]; + return data[i * ncols + j]; } /// Get MPI communicator of matrix _A_ MPI_Comm MatrixTestsDenseRowMajor::getMPIComm(hiop::hiopMatrixDense* A) { auto* amat = dynamic_cast(A); - if(amat == nullptr) - THROW_NULL_DEREF; + if(amat == nullptr) THROW_NULL_DEREF; return amat->get_mpi_comm(); } @@ -150,8 +141,7 @@ MPI_Comm MatrixTestsDenseRowMajor::getMPIComm(hiop::hiopMatrixDense* A) const real_type* MatrixTestsDenseRowMajor::getLocalDataConst(hiop::hiopMatrixDense* A) { auto* amat = dynamic_cast(A); - if(amat == nullptr) - THROW_NULL_DEREF; + if(amat == nullptr) THROW_NULL_DEREF; return amat->local_data_const(); } @@ -160,8 +150,7 @@ const real_type* MatrixTestsDenseRowMajor::getLocalDataConst(hiop::hiopMatrixDen real_type* MatrixTestsDenseRowMajor::getLocalData(hiop::hiopMatrixDense* A) { auto* amat = dynamic_cast(A); - if(amat == nullptr) - THROW_NULL_DEREF; + if(amat == nullptr) THROW_NULL_DEREF; return amat->local_data(); } @@ -174,7 +163,7 @@ bool MatrixTestsDenseRowMajor::reduceReturn(int failures, hiop::hiopMatrixDense* #ifdef HIOP_USE_MPI MPI_Allreduce(&failures, &fail, 1, MPI_INT, MPI_SUM, getMPIComm(A)); #else - (void) A; + (void)A; fail = failures; #endif @@ -189,13 +178,10 @@ int MatrixTestsDenseRowMajor::verifyAnswer(hiop::hiopMatrixDense* A, const doubl local_ordinal_type ncols = getNumLocCols(A); int fail = 0; - for (local_ordinal_type i=0; i expect) +int MatrixTestsDenseRowMajor::verifyAnswer(hiop::hiopMatrixDense* A, + std::function expect) { local_ordinal_type mrows = getNumLocRows(A); local_ordinal_type ncols = getNumLocCols(A); int fail = 0; - for (local_ordinal_type i=0; i(x); - if(xvec == nullptr) - THROW_NULL_DEREF; + if(xvec == nullptr) THROW_NULL_DEREF; return static_cast(xvec->get_local_size()); } /// Sets a local data element of vector _x_ -void MatrixTestsDenseRowMajor::setLocalElement( - hiop::hiopVector* x, - const local_ordinal_type i, - const real_type val) +void MatrixTestsDenseRowMajor::setLocalElement(hiop::hiopVector* x, const local_ordinal_type i, const real_type val) { auto* xvec = dynamic_cast(x); if(xvec == nullptr) { THROW_NULL_DEREF; } - real_type* data = x->local_data(); - data[i] = val; + real_type* data = x->local_data(); + data[i] = val; } /// Returns local data element _i_ of vector _x_. -real_type MatrixTestsDenseRowMajor::getLocalElement( - const hiop::hiopVector* x, - local_ordinal_type i) +real_type MatrixTestsDenseRowMajor::getLocalElement(const hiop::hiopVector* x, local_ordinal_type i) { const auto* xvec = dynamic_cast(x); - if(xvec == nullptr) - THROW_NULL_DEREF; + if(xvec == nullptr) THROW_NULL_DEREF; return xvec->local_data_const()[i]; } - /// Checks if _local_ vector elements are set to `answer`. [[nodiscard]] int MatrixTestsDenseRowMajor::verifyAnswer(hiop::hiopVector* x, double answer) @@ -283,10 +257,8 @@ int MatrixTestsDenseRowMajor::verifyAnswer(hiop::hiopVector* x, double answer) const local_ordinal_type N = getLocalSize(x); int local_fail = 0; - for(local_ordinal_type i=0; i expect) +int MatrixTestsDenseRowMajor::verifyAnswer(hiop::hiopVector* x, std::function expect) { const local_ordinal_type N = getLocalSize(x); int local_fail = 0; - for (int i=0; i expect) override; + virtual int verifyAnswer(hiop::hiopMatrixDense* A, + std::function expect) override; MPI_Comm getMPIComm(hiop::hiopMatrixDense* A); virtual bool reduceReturn(int failures, hiop::hiopMatrixDense* A) override; // Vector helper methods - virtual void setLocalElement( - hiop::hiopVector* x, - const local_ordinal_type i, - const real_type val) override; - virtual real_type getLocalElement( - const hiop::hiopVector* x, - local_ordinal_type i) override; - virtual local_ordinal_type getLocalSize(const hiop::hiopVector *x) override; - virtual int verifyAnswer(hiop::hiopVector *x, real_type answer) override; - virtual int verifyAnswer( - hiop::hiopVector *x, - std::function expect) override; + virtual void setLocalElement(hiop::hiopVector* x, const local_ordinal_type i, const real_type val) override; + virtual real_type getLocalElement(const hiop::hiopVector* x, local_ordinal_type i) override; + virtual local_ordinal_type getLocalSize(const hiop::hiopVector* x) override; + virtual int verifyAnswer(hiop::hiopVector* x, real_type answer) override; + virtual int verifyAnswer(hiop::hiopVector* x, std::function expect) override; }; -}} // namespace hiop::tests +} // namespace tests +} // namespace hiop diff --git a/tests/LinAlg/matrixTestsRajaDense.cpp b/tests/LinAlg/matrixTestsRajaDense.cpp index 2076da5df..b459b9012 100644 --- a/tests/LinAlg/matrixTestsRajaDense.cpp +++ b/tests/LinAlg/matrixTestsRajaDense.cpp @@ -59,10 +59,10 @@ #include #include "matrixTestsRajaDense.hpp" -//TODO: this is a quick hack. Will need to modify this class to be aware of the instantiated -// template parameters for vector and matrix RAJA classes. Likely a better approach would be -// to revise the tests to try out multiple configurations of the memory backends and execution -// policies for RAJA dense matrix. +// TODO: this is a quick hack. Will need to modify this class to be aware of the instantiated +// template parameters for vector and matrix RAJA classes. Likely a better approach would be +// to revise the tests to try out multiple configurations of the memory backends and execution +// policies for RAJA dense matrix. #if defined(HIOP_USE_CUDA) #include @@ -73,15 +73,17 @@ using hiopMatrixRajaDense = hiop::hiopMatrixDenseRaja; using hiopMatrixRajaDense = hiop::hiopMatrixDenseRaja; #else -//#if !defined(HIOP_USE_CUDA) && !defined(HIOP_USE_HIP) +// #if !defined(HIOP_USE_CUDA) && !defined(HIOP_USE_HIP) #include using hiopVectorRajaT = hiop::hiopVectorRaja; using hiopMatrixRajaDense = hiop::hiopMatrixDenseRaja; #endif +namespace hiop +{ +namespace tests +{ -namespace hiop { namespace tests { - // // Matrix helper methods // @@ -90,8 +92,7 @@ namespace hiop { namespace tests { local_ordinal_type MatrixTestsRajaDense::getNumLocRows(const hiop::hiopMatrixDense* A) { const auto* amat = dynamic_cast(A); - if(amat == nullptr) - THROW_NULL_DEREF; + if(amat == nullptr) THROW_NULL_DEREF; return amat->get_local_size_m(); // ^^^ @@ -101,76 +102,62 @@ local_ordinal_type MatrixTestsRajaDense::getNumLocRows(const hiop::hiopMatrixDen local_ordinal_type MatrixTestsRajaDense::getNumLocCols(const hiop::hiopMatrixDense* A) { const auto* amat = dynamic_cast(A); - if(amat == nullptr) - THROW_NULL_DEREF; + if(amat == nullptr) THROW_NULL_DEREF; return amat->get_local_size_n(); // ^^^ } /// Set local data element (i,j) of matrix _A_ to _val_ in current memory space. -void MatrixTestsRajaDense::setLocalElement( - hiop::hiopMatrixDense* A, - local_ordinal_type i, - local_ordinal_type j, - real_type val) +void MatrixTestsRajaDense::setLocalElement(hiop::hiopMatrixDense* A, + local_ordinal_type i, + local_ordinal_type j, + real_type val) { auto* amat = dynamic_cast(A); - if(amat == nullptr) - THROW_NULL_DEREF; + if(amat == nullptr) THROW_NULL_DEREF; amat->copyFromDev(); - real_type* data = amat->local_data_host(); + real_type* data = amat->local_data_host(); local_ordinal_type ncols = getNumLocCols(A); - data[i*ncols + j] = val; + data[i * ncols + j] = val; amat->copyToDev(); } /// Set a single local row of matrix to a constant value in current memory space. -void MatrixTestsRajaDense::setLocalRow( - hiop::hiopMatrixDense* A, - const local_ordinal_type row, - const real_type val) +void MatrixTestsRajaDense::setLocalRow(hiop::hiopMatrixDense* A, const local_ordinal_type row, const real_type val) { auto* amat = dynamic_cast(A); - if(amat == nullptr) - THROW_NULL_DEREF; + if(amat == nullptr) THROW_NULL_DEREF; const local_ordinal_type N = getNumLocCols(A); amat->copyFromDev(); real_type* data = amat->local_data_host(); - for (int j=0; jcopyToDev(); } /// Returns by value local element (i,j) of matrix _A_. -real_type MatrixTestsRajaDense::getLocalElement( - const hiop::hiopMatrixDense* A, - local_ordinal_type i, - local_ordinal_type j) +real_type MatrixTestsRajaDense::getLocalElement(const hiop::hiopMatrixDense* A, local_ordinal_type i, local_ordinal_type j) { const auto* am = dynamic_cast(A); - if(am == nullptr) - THROW_NULL_DEREF; + if(am == nullptr) THROW_NULL_DEREF; auto* amat = const_cast(am); - if(amat == nullptr) - THROW_NULL_DEREF; + if(amat == nullptr) THROW_NULL_DEREF; amat->copyFromDev(); const real_type* data = amat->local_data_host(); local_ordinal_type ncols = getNumLocCols(A); - return data[i*ncols + j]; + return data[i * ncols + j]; } /// Get MPI communicator of matrix _A_ MPI_Comm MatrixTestsRajaDense::getMPIComm(hiop::hiopMatrixDense* A) { const auto* amat = dynamic_cast(A); - if(amat == nullptr) - THROW_NULL_DEREF; + if(amat == nullptr) THROW_NULL_DEREF; return amat->get_mpi_comm(); } @@ -178,8 +165,7 @@ MPI_Comm MatrixTestsRajaDense::getMPIComm(hiop::hiopMatrixDense* A) const real_type* MatrixTestsRajaDense::getLocalDataConst(hiop::hiopMatrixDense* A) { auto* amat = dynamic_cast(A); - if(amat == nullptr) - THROW_NULL_DEREF; + if(amat == nullptr) THROW_NULL_DEREF; return amat->local_data_const(); } @@ -187,8 +173,7 @@ const real_type* MatrixTestsRajaDense::getLocalDataConst(hiop::hiopMatrixDense* real_type* MatrixTestsRajaDense::getLocalData(hiopMatrixDense* A) { auto* amat = dynamic_cast(A); - if(amat == nullptr) - THROW_NULL_DEREF; + if(amat == nullptr) THROW_NULL_DEREF; return amat->local_data(); } @@ -200,7 +185,7 @@ bool MatrixTestsRajaDense::reduceReturn(int failures, hiopMatrixDense* A) #ifdef HIOP_USE_MPI MPI_Allreduce(&failures, &fail, 1, MPI_INT, MPI_SUM, getMPIComm(A)); #else - (void) A; + (void)A; fail = failures; #endif @@ -212,8 +197,7 @@ bool MatrixTestsRajaDense::reduceReturn(int failures, hiopMatrixDense* A) int MatrixTestsRajaDense::verifyAnswer(hiop::hiopMatrixDense* A, const double answer) { auto* amat = dynamic_cast(A); - if(amat == nullptr) - THROW_NULL_DEREF; + if(amat == nullptr) THROW_NULL_DEREF; const local_ordinal_type M = getNumLocRows(amat); const local_ordinal_type N = getNumLocCols(amat); @@ -224,12 +208,9 @@ int MatrixTestsRajaDense::verifyAnswer(hiop::hiopMatrixDense* A, const double an int fail = 0; // RAJA matrix is stored in row-major format - for (local_ordinal_type i=0; i expect) +int MatrixTestsRajaDense::verifyAnswer(hiop::hiopMatrixDense* A, + std::function expect) { auto* amat = dynamic_cast(A); - if(amat == nullptr) - THROW_NULL_DEREF; + if(amat == nullptr) THROW_NULL_DEREF; const local_ordinal_type M = getNumLocRows(amat); const local_ordinal_type N = getNumLocCols(amat); @@ -260,15 +239,10 @@ int MatrixTestsRajaDense::verifyAnswer( const real_type* local_matrix_data = amat->local_data_host(); int fail = 0; - for (local_ordinal_type i=0; i(x); - if(xvec == nullptr) - THROW_NULL_DEREF; + if(xvec == nullptr) THROW_NULL_DEREF; return xvec->get_local_size(); } /// Sets a local data element of vector _x_ in current memory space -void MatrixTestsRajaDense::setLocalElement( - hiop::hiopVector* x, - const local_ordinal_type i, - const real_type val) +void MatrixTestsRajaDense::setLocalElement(hiop::hiopVector* x, const local_ordinal_type i, const real_type val) { auto* xvec = dynamic_cast(x); - if(xvec == nullptr) - THROW_NULL_DEREF; + if(xvec == nullptr) THROW_NULL_DEREF; xvec->copyFromDev(); real_type* data = xvec->local_data_host(); @@ -307,16 +276,12 @@ void MatrixTestsRajaDense::setLocalElement( } /// Returns local data element _i_ of vector _x_ by value on the host. -real_type MatrixTestsRajaDense::getLocalElement( - const hiop::hiopVector* x, - local_ordinal_type i) +real_type MatrixTestsRajaDense::getLocalElement(const hiop::hiopVector* x, local_ordinal_type i) { const auto* xv = dynamic_cast(x); - if(xv == nullptr) - THROW_NULL_DEREF; + if(xv == nullptr) THROW_NULL_DEREF; auto* xvec = const_cast(xv); - if(xvec == nullptr) - THROW_NULL_DEREF; + if(xvec == nullptr) THROW_NULL_DEREF; xvec->copyFromDev(); return xvec->local_data_host_const()[i]; @@ -327,8 +292,7 @@ real_type MatrixTestsRajaDense::getLocalElement( int MatrixTestsRajaDense::verifyAnswer(hiop::hiopVector* x, double answer) { auto* xvec = dynamic_cast(x); - if(xvec == nullptr) - THROW_NULL_DEREF; + if(xvec == nullptr) THROW_NULL_DEREF; const local_ordinal_type N = getLocalSize(xvec); @@ -338,10 +302,8 @@ int MatrixTestsRajaDense::verifyAnswer(hiop::hiopVector* x, double answer) const real_type* local_data = xvec->local_data_host_const(); int local_fail = 0; - for(local_ordinal_type i=0; i expect) +int MatrixTestsRajaDense::verifyAnswer(hiop::hiopVector* x, std::function expect) { auto* xvec = dynamic_cast(x); - if(xvec == nullptr) - THROW_NULL_DEREF; + if(xvec == nullptr) THROW_NULL_DEREF; const local_ordinal_type N = getLocalSize(xvec); @@ -367,10 +326,8 @@ int MatrixTestsRajaDense::verifyAnswer( const real_type* local_data = xvec->local_data_host_const(); int local_fail = 0; - for(local_ordinal_type i=0; i expect) override; + virtual int verifyAnswer(hiop::hiopMatrixDense* A, + std::function expect) override; MPI_Comm getMPIComm(hiop::hiopMatrixDense* A); virtual bool reduceReturn(int failures, hiop::hiopMatrixDense* A) override; // Vector helper methods - virtual void setLocalElement( - hiop::hiopVector* x, - const local_ordinal_type i, - const real_type val) override; - virtual real_type getLocalElement( - const hiop::hiopVector* x, - local_ordinal_type i) override; - virtual local_ordinal_type getLocalSize(const hiop::hiopVector *x) override; - virtual int verifyAnswer(hiop::hiopVector *x, real_type answer) override; - virtual int verifyAnswer( - hiop::hiopVector *x, - std::function expect) override; + virtual void setLocalElement(hiop::hiopVector* x, const local_ordinal_type i, const real_type val) override; + virtual real_type getLocalElement(const hiop::hiopVector* x, local_ordinal_type i) override; + virtual local_ordinal_type getLocalSize(const hiop::hiopVector* x) override; + virtual int verifyAnswer(hiop::hiopVector* x, real_type answer) override; + virtual int verifyAnswer(hiop::hiopVector* x, std::function expect) override; }; -}} // namespace hiop::tests +} // namespace tests +} // namespace hiop diff --git a/tests/LinAlg/matrixTestsRajaSparseTriplet.cpp b/tests/LinAlg/matrixTestsRajaSparseTriplet.cpp index ac4699262..8afda657d 100644 --- a/tests/LinAlg/matrixTestsRajaSparseTriplet.cpp +++ b/tests/LinAlg/matrixTestsRajaSparseTriplet.cpp @@ -53,7 +53,7 @@ * @author Slaven Peles , PNNL * @author Cameron Rutherford , PNNL * @author Jake K. Ryan , PNNL - * + * */ #include @@ -63,88 +63,84 @@ #include "matrixTestsRajaSparseTriplet.hpp" #include "hiopVectorIntRaja.hpp" -//TODO: this is a quick hack. Will need to modify this class to be aware of the instantiated -// template parameters for vector and matrix RAJA classes. Likely a better approach would be -// to revise the tests to try out multiple configurations of the memory backends and execution -// policies for RAJA dense matrix. +// TODO: this is a quick hack. Will need to modify this class to be aware of the instantiated +// template parameters for vector and matrix RAJA classes. Likely a better approach would be +// to revise the tests to try out multiple configurations of the memory backends and execution +// policies for RAJA dense matrix. #if defined(HIOP_USE_CUDA) #include using hiopVectorRajaT = hiop::hiopVectorRaja; using hiopVectorIntRajaT = hiop::hiopVectorIntRaja; using hiopMatrixRajaDense = hiop::hiopMatrixDenseRaja; using hiopMatrixSparseTripletRajaT = hiop::hiopMatrixRajaSparseTriplet; -using hiopMatrixSymSparseTripletRajaT = hiop::hiopMatrixRajaSymSparseTriplet; +using hiopMatrixSymSparseTripletRajaT = + hiop::hiopMatrixRajaSymSparseTriplet; #elif defined(HIOP_USE_HIP) #include using hiopVectorRajaT = hiop::hiopVectorRaja; using hiopVectorIntRajaT = hiop::hiopVectorIntRaja; using hiopMatrixRajaDense = hiop::hiopMatrixDenseRaja; using hiopMatrixSparseTripletRajaT = hiop::hiopMatrixRajaSparseTriplet; -using hiopMatrixSymSparseTripletRajaT = hiop::hiopMatrixRajaSymSparseTriplet; +using hiopMatrixSymSparseTripletRajaT = + hiop::hiopMatrixRajaSymSparseTriplet; #else -//#if !defined(HIOP_USE_CUDA) && !defined(HIOP_USE_HIP) +// #if !defined(HIOP_USE_CUDA) && !defined(HIOP_USE_HIP) #include using hiopVectorRajaT = hiop::hiopVectorRaja; using hiopVectorIntRajaT = hiop::hiopVectorIntRaja; using hiopMatrixRajaDense = hiop::hiopMatrixDenseRaja; using hiopMatrixSparseTripletRajaT = hiop::hiopMatrixRajaSparseTriplet; -using hiopMatrixSymSparseTripletRajaT = hiop::hiopMatrixRajaSymSparseTriplet; +using hiopMatrixSymSparseTripletRajaT = + hiop::hiopMatrixRajaSymSparseTriplet; #endif +namespace hiop +{ +namespace tests +{ -namespace hiop{ namespace tests { - -/// Set `i`th element of vector `x` -void MatrixTestsRajaSparseTriplet::setLocalElement( - hiop::hiopVector* xvec, - const local_ordinal_type i, - const real_type val) +/// Set `i`th element of vector `x` +void MatrixTestsRajaSparseTriplet::setLocalElement(hiop::hiopVector* xvec, const local_ordinal_type i, const real_type val) { auto x = dynamic_cast(xvec); - if(x != nullptr) - { + if(x != nullptr) { x->copyFromDev(); real_type* data = x->local_data_host(); data[i] = val; x->copyToDev(); - } - else THROW_NULL_DEREF; + } else + THROW_NULL_DEREF; } /// Returns element (i,j) of a dense matrix `A`. /// First need to retrieve hiopMatrixDense from the abstract interface -real_type MatrixTestsRajaSparseTriplet::getLocalElement( - const hiop::hiopMatrix* A, - local_ordinal_type row, - local_ordinal_type col) +real_type MatrixTestsRajaSparseTriplet::getLocalElement(const hiop::hiopMatrix* A, + local_ordinal_type row, + local_ordinal_type col) { const auto* mat = dynamic_cast(A); - - if (mat != nullptr) - { + + if(mat != nullptr) { auto* amat = const_cast(mat); amat->copyFromDev(); - //double** M = amat->get_M_host(); - //return M[row][col]; - return amat->local_data_const()[row*amat->get_local_size_n() + col]; - } - else THROW_NULL_DEREF; + // double** M = amat->get_M_host(); + // return M[row][col]; + return amat->local_data_const()[row * amat->get_local_size_n() + col]; + } else + THROW_NULL_DEREF; } /// Returns element _i_ of vector _x_. /// First need to retrieve hiopVectorPar from the abstract interface -real_type MatrixTestsRajaSparseTriplet::getLocalElement( - const hiop::hiopVector* x, - local_ordinal_type i) +real_type MatrixTestsRajaSparseTriplet::getLocalElement(const hiop::hiopVector* x, local_ordinal_type i) { const auto* xvec = dynamic_cast(x); - if(xvec != nullptr) - { + if(xvec != nullptr) { auto* axvec = const_cast(xvec); axvec->copyFromDev(); return xvec->local_data_host_const()[i]; - } - else THROW_NULL_DEREF; + } else + THROW_NULL_DEREF; } real_type* MatrixTestsRajaSparseTriplet::getMatrixData(hiop::hiopMatrixSparse* A) @@ -157,14 +153,14 @@ real_type* MatrixTestsRajaSparseTriplet::getMatrixData(hiop::hiopMatrixSparse* A const local_ordinal_type* MatrixTestsRajaSparseTriplet::getRowIndices(const hiop::hiopMatrixSparse* A) { const auto* mat = dynamic_cast(A); - const_cast(mat)->copyFromDev(); // UB? + const_cast(mat)->copyFromDev(); // UB? return mat->i_row_host(); } const local_ordinal_type* MatrixTestsRajaSparseTriplet::getColumnIndices(const hiop::hiopMatrixSparse* A) { const auto* mat = dynamic_cast(A); - const_cast(mat)->copyFromDev(); // UB? + const_cast(mat)->copyFromDev(); // UB? return mat->j_col_host(); } @@ -174,7 +170,8 @@ int MatrixTestsRajaSparseTriplet::getLocalSize(const hiop::hiopVector* x) const auto* xvec = dynamic_cast(x); if(xvec != nullptr) return static_cast(xvec->get_local_size()); - else THROW_NULL_DEREF; + else + THROW_NULL_DEREF; } /** @@ -186,17 +183,14 @@ int MatrixTestsRajaSparseTriplet::getLocalSize(const hiop::hiopVector* x) [[nodiscard]] int MatrixTestsRajaSparseTriplet::verifyAnswer(hiop::hiopMatrixSparse* A, const double answer) { - if(A == nullptr) - return 1; + if(A == nullptr) return 1; auto* mat = dynamic_cast(A); mat->copyFromDev(); const local_ordinal_type nnz = mat->numberOfNonzeros(); const real_type* values = mat->M_host(); int fail = 0; - for (local_ordinal_type i=0; i(A); mat->copyFromDev(); const local_ordinal_type nnz = mat->numberOfNonzeros(); - if(nnz_to-nnz_from > nnz) { + if(nnz_to - nnz_from > nnz) { return 1; } const real_type* values = mat->M_host(); int fail = 0; - for (local_ordinal_type i=nnz_from; i expect) +[[nodiscard]] +int MatrixTestsRajaSparseTriplet::verifyAnswer(hiop::hiopMatrixDense* Amat, + std::function expect) { auto* A = dynamic_cast(Amat); assert(A->get_local_size_n() == A->n() && "Matrix should not be distributed"); @@ -247,13 +241,10 @@ int MatrixTestsRajaSparseTriplet::verifyAnswer( int fail = 0; A->copyFromDev(); const double* mat = A->local_data_host(); - for (local_ordinal_type i=0; i(x); @@ -273,10 +264,8 @@ int MatrixTestsRajaSparseTriplet::verifyAnswer(hiop::hiopVector* x, double answe const auto* vec = xvec->local_data_host_const(); int local_fail = 0; - for(local_ordinal_type i=0; i expect) +[[nodiscard]] +int MatrixTestsRajaSparseTriplet::verifyAnswer(hiop::hiopVector* x, std::function expect) { const local_ordinal_type N = getLocalSize(x); @@ -296,10 +283,8 @@ int MatrixTestsRajaSparseTriplet::verifyAnswer( const auto* vec = xvec->local_data_host_const(); int local_fail = 0; - for (int i=0; im()]; std::memset(sparsity_pattern, 0, sizeof(local_ordinal_type) * mat->m()); - for(local_ordinal_type i = 0; i < nnz; i++) - { + for(local_ordinal_type i = 0; i < nnz; i++) { sparsity_pattern[iRow[i]]++; } return sparsity_pattern; @@ -334,40 +318,34 @@ local_ordinal_type* MatrixTestsRajaSparseTriplet::numNonzerosPerCol(hiop::hiopMa auto sparsity_pattern = new local_ordinal_type[mat->n()]; std::memset(sparsity_pattern, 0, sizeof(local_ordinal_type) * mat->n()); - for(local_ordinal_type i = 0; i < nnz; i++) - { + for(local_ordinal_type i = 0; i < nnz; i++) { sparsity_pattern[jCol[i]]++; } return sparsity_pattern; } -void MatrixTestsRajaSparseTriplet::initializeMatrix( - hiop::hiopMatrixSparse* mat, - local_ordinal_type entries_per_row) +void MatrixTestsRajaSparseTriplet::initializeMatrix(hiop::hiopMatrixSparse* mat, local_ordinal_type entries_per_row) { auto* A = dynamic_cast(mat); - local_ordinal_type * iRow = A->i_row_host(); - local_ordinal_type * jCol = A->j_col_host(); - double * val = A->M_host(); + local_ordinal_type* iRow = A->i_row_host(); + local_ordinal_type* jCol = A->j_col_host(); + double* val = A->M_host(); local_ordinal_type m = A->m(); local_ordinal_type n = A->n(); assert(A->numberOfNonzeros() == m * entries_per_row && "Matrix initialized with insufficent number of non-zero entries"); A->copyFromDev(); - for(local_ordinal_type row = 0, col = 0, i = 0; row < m && i < A->numberOfNonzeros(); row++, col = 0) - { - for(local_ordinal_type j=0; jnumberOfNonzeros(); row++, col = 0) { + for(local_ordinal_type j = 0; j < entries_per_row - 1; i++, j++, col += n / entries_per_row) { iRow[i] = row; jCol[i] = col; val[i] = one; } iRow[i] = row; - jCol[i] = n-1; + jCol[i] = n - 1; val[i++] = one; - } A->copyToDev(); } @@ -377,16 +355,13 @@ void MatrixTestsRajaSparseTriplet::initializeMatrix( */ void MatrixTestsRajaSparseTriplet::maybeCopyToDev(hiop::hiopMatrixSparse* mat) { - if (auto* A = dynamic_cast(mat)) - { + if(auto* A = dynamic_cast(mat)) { A->copyToDev(); - } - else if (auto* A = dynamic_cast(mat)) - { + } else if(auto* A = dynamic_cast(mat)) { A->copyToDev(); + } else // do nothing, raja sparse mat class was not passed in + { } - else // do nothing, raja sparse mat class was not passed in - { } } /** @@ -395,16 +370,13 @@ void MatrixTestsRajaSparseTriplet::maybeCopyToDev(hiop::hiopMatrixSparse* mat) */ void MatrixTestsRajaSparseTriplet::maybeCopyFromDev(hiop::hiopMatrixSparse* mat) { - if (auto* A = dynamic_cast(mat)) - { + if(auto* A = dynamic_cast(mat)) { A->copyFromDev(); - } - else if (auto* A = dynamic_cast(mat)) - { + } else if(auto* A = dynamic_cast(mat)) { A->copyFromDev(); + } else // do nothing, raja sparse mat class was not passed in + { } - else // do nothing, raja sparse mat class was not passed in - { } } int MatrixTestsRajaSparseTriplet::getLocalElement(hiop::hiopVectorInt* xvec, int idx) const @@ -429,4 +401,5 @@ void MatrixTestsRajaSparseTriplet::setLocalElement(hiop::hiopVectorInt* xvec, in } } -}} // namespace hiop::tests +} // namespace tests +} // namespace hiop diff --git a/tests/LinAlg/matrixTestsRajaSparseTriplet.hpp b/tests/LinAlg/matrixTestsRajaSparseTriplet.hpp index 9c6888d2e..4c37743ef 100644 --- a/tests/LinAlg/matrixTestsRajaSparseTriplet.hpp +++ b/tests/LinAlg/matrixTestsRajaSparseTriplet.hpp @@ -53,51 +53,54 @@ * @author Slaven Peles , PNNL * @author Cameron Rutherford , PNNL * @author Jake K. Ryan , PNNL - * + * */ #pragma once -//#include +// #include #include "matrixTestsSparse.hpp" -namespace hiop { namespace tests { +namespace hiop +{ +namespace tests +{ /** * @brief Tests are re-implemented here if necessary for SparseTriplet Matrices, * as the data layout is significantly different compares to dense matrices. * Any tests that would modify the sparsity pattern are not implemented. * Any tests that would make calls to non-implemented/needed functions are not implemented. - * -*/ + * + */ class MatrixTestsRajaSparseTriplet : public MatrixTestsSparse { public: MatrixTestsRajaSparseTriplet() {} - virtual ~MatrixTestsRajaSparseTriplet(){} - + virtual ~MatrixTestsRajaSparseTriplet() {} private: - virtual void setLocalElement( - hiop::hiopVector *_x, - const local_ordinal_type i, - const real_type val) override; - virtual real_type getLocalElement(const hiop::hiopMatrix *a, local_ordinal_type i, local_ordinal_type j) override; - virtual real_type getLocalElement(const hiop::hiopVector *x, local_ordinal_type i) override; + virtual void setLocalElement(hiop::hiopVector* _x, const local_ordinal_type i, const real_type val) override; + virtual real_type getLocalElement(const hiop::hiopMatrix* a, local_ordinal_type i, local_ordinal_type j) override; + virtual real_type getLocalElement(const hiop::hiopVector* x, local_ordinal_type i) override; virtual real_type* getMatrixData(hiop::hiopMatrixSparse* a) override; - virtual real_type getMatrixData(hiop::hiopMatrixSparse* a, local_ordinal_type i, local_ordinal_type j) override {assert(false);return 0;}; + virtual real_type getMatrixData(hiop::hiopMatrixSparse* a, local_ordinal_type i, local_ordinal_type j) override + { + assert(false); + return 0; + }; virtual const local_ordinal_type* getRowIndices(const hiop::hiopMatrixSparse* a) override; virtual const local_ordinal_type* getColumnIndices(const hiop::hiopMatrixSparse* a) override; - virtual local_ordinal_type getLocalSize(const hiop::hiopVector *x) override; + virtual local_ordinal_type getLocalSize(const hiop::hiopVector* x) override; virtual int verifyAnswer(hiop::hiopMatrixSparse* A, real_type answer) override; - virtual int verifyAnswer(hiop::hiopMatrix* A, local_ordinal_type nnz_st, local_ordinal_type nnz_ed, const double answer) override; - virtual int verifyAnswer( - hiop::hiopMatrixDense* A, - std::function expect) override; - virtual int verifyAnswer(hiop::hiopVector *x, real_type answer) override; - virtual int verifyAnswer( - hiop::hiopVector *x, - std::function expect) override; + virtual int verifyAnswer(hiop::hiopMatrix* A, + local_ordinal_type nnz_st, + local_ordinal_type nnz_ed, + const double answer) override; + virtual int verifyAnswer(hiop::hiopMatrixDense* A, + std::function expect) override; + virtual int verifyAnswer(hiop::hiopVector* x, real_type answer) override; + virtual int verifyAnswer(hiop::hiopVector* x, std::function expect) override; virtual local_ordinal_type* numNonzerosPerRow(hiop::hiopMatrixSparse* mat) override; virtual local_ordinal_type* numNonzerosPerCol(hiop::hiopMatrixSparse* mat) override; virtual void maybeCopyToDev(hiop::hiopMatrixSparse*) override; @@ -105,8 +108,10 @@ class MatrixTestsRajaSparseTriplet : public MatrixTestsSparse virtual int getLocalElement(hiop::hiopVectorInt*, int) const override; virtual void setLocalElement(hiop::hiopVectorInt*, int, int) const override; + public: virtual void initializeMatrix(hiop::hiopMatrixSparse* mat, local_ordinal_type entries_per_row) override; }; -}} // namespace hiop::tests +} // namespace tests +} // namespace hiop diff --git a/tests/LinAlg/matrixTestsRajaSymSparseTriplet.cpp b/tests/LinAlg/matrixTestsRajaSymSparseTriplet.cpp index 2bdcd196d..5cf270af9 100644 --- a/tests/LinAlg/matrixTestsRajaSymSparseTriplet.cpp +++ b/tests/LinAlg/matrixTestsRajaSymSparseTriplet.cpp @@ -53,7 +53,7 @@ * @author Slaven Peles , PNNL * @author Cameron Rutherford , PNNL * @author Jake K. Ryan , PNNL - * + * */ #include @@ -62,63 +62,64 @@ #include #include "matrixTestsRajaSymSparseTriplet.hpp" -//TODO: this is a quick hack. Will need to modify this class to be aware of the instantiated -// template parameters for vector and matrix RAJA classes. Likely a better approach would be -// to revise the tests to try out multiple configurations of the memory backends and execution -// policies for RAJA dense matrix. +// TODO: this is a quick hack. Will need to modify this class to be aware of the instantiated +// template parameters for vector and matrix RAJA classes. Likely a better approach would be +// to revise the tests to try out multiple configurations of the memory backends and execution +// policies for RAJA dense matrix. #if defined(HIOP_USE_CUDA) #include using hiopVectorRajaT = hiop::hiopVectorRaja; using hiopMatrixRajaDense = hiop::hiopMatrixDenseRaja; -using hiopMatrixSymSparseTripletRajaT = hiop::hiopMatrixRajaSymSparseTriplet; +using hiopMatrixSymSparseTripletRajaT = + hiop::hiopMatrixRajaSymSparseTriplet; #elif defined(HIOP_USE_HIP) #include using hiopVectorRajaT = hiop::hiopVectorRaja; using hiopMatrixRajaDense = hiop::hiopMatrixDenseRaja; -using hiopMatrixSymSparseTripletRajaT = hiop::hiopMatrixRajaSymSparseTriplet; +using hiopMatrixSymSparseTripletRajaT = + hiop::hiopMatrixRajaSymSparseTriplet; #else -//#if !defined(HIOP_USE_CUDA) && !defined(HIOP_USE_HIP) +// #if !defined(HIOP_USE_CUDA) && !defined(HIOP_USE_HIP) #include using hiopVectorRajaT = hiop::hiopVectorRaja; using hiopMatrixRajaDense = hiop::hiopMatrixDenseRaja; -using hiopMatrixSymSparseTripletRajaT = hiop::hiopMatrixRajaSymSparseTriplet; +using hiopMatrixSymSparseTripletRajaT = + hiop::hiopMatrixRajaSymSparseTriplet; #endif -namespace hiop{ namespace tests { +namespace hiop +{ +namespace tests +{ /// Returns element (i,j) of a dense matrix `A`. /// First need to retrieve hiopMatrixDense from the abstract interface -real_type MatrixTestsRajaSymSparseTriplet::getLocalElement( - const hiop::hiopMatrix* A, - local_ordinal_type row, - local_ordinal_type col) +real_type MatrixTestsRajaSymSparseTriplet::getLocalElement(const hiop::hiopMatrix* A, + local_ordinal_type row, + local_ordinal_type col) { const auto* mat = dynamic_cast(A); - - if (mat != nullptr) - { + + if(mat != nullptr) { auto* amat = const_cast(mat); amat->copyFromDev(); double* M = amat->local_data_const(); - return M[row*amat->get_local_size_n()+col]; - } - else THROW_NULL_DEREF; + return M[row * amat->get_local_size_n() + col]; + } else + THROW_NULL_DEREF; } /// Returns element _i_ of vector _x_. /// First need to retrieve hiopVectorPar from the abstract interface -real_type MatrixTestsRajaSymSparseTriplet::getLocalElement( - const hiop::hiopVector* x, - local_ordinal_type i) +real_type MatrixTestsRajaSymSparseTriplet::getLocalElement(const hiop::hiopVector* x, local_ordinal_type i) { const auto* xvec = dynamic_cast(x); - if(xvec != nullptr) - { + if(xvec != nullptr) { auto* axvec = const_cast(xvec); axvec->copyFromDev(); return xvec->local_data_host_const()[i]; - } - else THROW_NULL_DEREF; + } else + THROW_NULL_DEREF; } real_type* MatrixTestsRajaSymSparseTriplet::getMatrixData(hiop::hiopMatrixSparse* A) @@ -131,14 +132,14 @@ real_type* MatrixTestsRajaSymSparseTriplet::getMatrixData(hiop::hiopMatrixSparse const local_ordinal_type* MatrixTestsRajaSymSparseTriplet::getRowIndices(const hiop::hiopMatrixSparse* A) { const auto* mat = dynamic_cast(A); - const_cast(mat)->copyFromDev(); // UB? + const_cast(mat)->copyFromDev(); // UB? return mat->i_row_host(); } const local_ordinal_type* MatrixTestsRajaSymSparseTriplet::getColumnIndices(const hiop::hiopMatrixSparse* A) { const auto* mat = dynamic_cast(A); - const_cast(mat)->copyFromDev(); // UB? + const_cast(mat)->copyFromDev(); // UB? return mat->j_col_host(); } @@ -148,18 +149,17 @@ int MatrixTestsRajaSymSparseTriplet::getLocalSize(const hiop::hiopVector* x) const auto* xvec = dynamic_cast(x); if(xvec != nullptr) return static_cast(xvec->get_local_size()); - else THROW_NULL_DEREF; + else + THROW_NULL_DEREF; } - /* * Pass a function-like object to calculate the expected * answer dynamically, based on the row and column */ - [[nodiscard]] -int MatrixTestsRajaSymSparseTriplet::verifyAnswer( - hiop::hiopMatrixDense* Amat, - std::function expect) +[[nodiscard]] +int MatrixTestsRajaSymSparseTriplet::verifyAnswer(hiop::hiopMatrixDense* Amat, + std::function expect) { auto* A = dynamic_cast(Amat); assert(A->get_local_size_n() == A->n() && "Matrix should not be distributed"); @@ -168,13 +168,10 @@ int MatrixTestsRajaSymSparseTriplet::verifyAnswer( int fail = 0; A->copyFromDev(); double* mat = A->local_data_host(); - for (local_ordinal_type i=0; i expect) +int MatrixTestsRajaSymSparseTriplet::verifyAnswer(hiop::hiopVector* x, std::function expect) { const local_ordinal_type N = getLocalSize(x); @@ -194,11 +189,9 @@ int MatrixTestsRajaSymSparseTriplet::verifyAnswer( const auto* vec = xvec->local_data_host_const(); int local_fail = 0; - for (int i=0; im()]; std::memset(sparsity_pattern, 0, sizeof(local_ordinal_type) * mat->m()); - for(local_ordinal_type i = 0; i < nnz; i++) - { + for(local_ordinal_type i = 0; i < nnz; i++) { sparsity_pattern[iRow[i]]++; - if(iRow[i] != jCol[i]) - { + if(iRow[i] != jCol[i]) { sparsity_pattern[jCol[i]]++; } } @@ -236,17 +227,14 @@ local_ordinal_type* MatrixTestsRajaSymSparseTriplet::numNonzerosPerCol(hiop::hio auto sparsity_pattern = new local_ordinal_type[mat->n()]; std::memset(sparsity_pattern, 0, sizeof(local_ordinal_type) * mat->n()); - for(local_ordinal_type i = 0; i < nnz; i++) - { + for(local_ordinal_type i = 0; i < nnz; i++) { sparsity_pattern[jCol[i]]++; - if(iRow[i] != jCol[i]) - { + if(iRow[i] != jCol[i]) { sparsity_pattern[iRow[i]]++; } - } return sparsity_pattern; } - -}} // namespace hiop::tests +} // namespace tests +} // namespace hiop diff --git a/tests/LinAlg/matrixTestsRajaSymSparseTriplet.hpp b/tests/LinAlg/matrixTestsRajaSymSparseTriplet.hpp index 32aa64edd..46f261999 100644 --- a/tests/LinAlg/matrixTestsRajaSymSparseTriplet.hpp +++ b/tests/LinAlg/matrixTestsRajaSymSparseTriplet.hpp @@ -53,7 +53,7 @@ * @author Slaven Peles , PNNL * @author Cameron Rutherford , PNNL * @author Jake K. Ryan , PNNL - * + * */ #pragma once @@ -61,37 +61,37 @@ #include #include "matrixTestsSymSparse.hpp" -namespace hiop { namespace tests { +namespace hiop +{ +namespace tests +{ /** * @brief Tests are re-implemented here if necessary for SparseTriplet Matrices, * as the data layout is significantly different compares to dense matrices. * Any tests that would modify the sparsity pattern are not implemented. * Any tests that would make calls to non-implemented/needed functions are not implemented. - * -*/ + * + */ class MatrixTestsRajaSymSparseTriplet : public MatrixTestsSymSparse { public: MatrixTestsRajaSymSparseTriplet() {} - virtual ~MatrixTestsRajaSymSparseTriplet(){} - + virtual ~MatrixTestsRajaSymSparseTriplet() {} private: - virtual real_type getLocalElement(const hiop::hiopMatrix *a, local_ordinal_type i, local_ordinal_type j) override; - virtual real_type getLocalElement(const hiop::hiopVector *x, local_ordinal_type i) override; + virtual real_type getLocalElement(const hiop::hiopMatrix* a, local_ordinal_type i, local_ordinal_type j) override; + virtual real_type getLocalElement(const hiop::hiopVector* x, local_ordinal_type i) override; virtual real_type* getMatrixData(hiop::hiopMatrixSparse* a) override; virtual const local_ordinal_type* getRowIndices(const hiop::hiopMatrixSparse* a) override; virtual const local_ordinal_type* getColumnIndices(const hiop::hiopMatrixSparse* a) override; - virtual local_ordinal_type getLocalSize(const hiop::hiopVector *x) override; - virtual int verifyAnswer( - hiop::hiopMatrixDense* A, - std::function expect) override; - virtual int verifyAnswer( - hiop::hiopVector *x, - std::function expect) override; + virtual local_ordinal_type getLocalSize(const hiop::hiopVector* x) override; + virtual int verifyAnswer(hiop::hiopMatrixDense* A, + std::function expect) override; + virtual int verifyAnswer(hiop::hiopVector* x, std::function expect) override; virtual local_ordinal_type* numNonzerosPerRow(hiop::hiopMatrixSparse* mat) override; virtual local_ordinal_type* numNonzerosPerCol(hiop::hiopMatrixSparse* mat) override; }; -}} // namespace hiop::tests +} // namespace tests +} // namespace hiop diff --git a/tests/LinAlg/matrixTestsSparse.hpp b/tests/LinAlg/matrixTestsSparse.hpp index 33dae1633..988b3f816 100644 --- a/tests/LinAlg/matrixTestsSparse.hpp +++ b/tests/LinAlg/matrixTestsSparse.hpp @@ -53,7 +53,7 @@ * @author Slaven Peles , PNNL * @author Cameron Rutherford , PNNL * @author Jake K. Ryan , PNNL - * + * */ #pragma once @@ -67,7 +67,10 @@ #include #include "testBase.hpp" -namespace hiop { namespace tests { +namespace hiop +{ +namespace tests +{ /** * @brief Tests are re-implemented here if necessary for SparseTriplet Matrices, @@ -75,13 +78,13 @@ namespace hiop { namespace tests { * * Any tests that would modify the sparsity pattern are not implemented. * Any tests that would make calls to non-implemented/needed functions are not implemented. - * -*/ + * + */ class MatrixTestsSparse : public TestBase { public: MatrixTestsSparse() {} - virtual ~MatrixTestsSparse(){} + virtual ~MatrixTestsSparse() {} /// @brief Verify function returning number of rows bool matrixNumRows(hiop::hiopMatrix& A, global_ordinal_type M) @@ -123,20 +126,13 @@ class MatrixTestsSparse : public TestBase printMessage(fail, __func__); return fail; } - + /// @brief Test y <- beta * y + alpha * A * x - bool matrixTimesVec( - hiop::hiopMatrixSparse& A, - hiop::hiopVector& y, - hiop::hiopVector& x) + bool matrixTimesVec(hiop::hiopMatrixSparse& A, hiop::hiopVector& y, hiop::hiopVector& x) { assert(y.get_size() == A.m() && "Did you pass in vectors of the correct sizes?"); assert(x.get_size() == A.n() && "Did you pass in vectors of the correct sizes?"); - const real_type alpha = two, - beta = half, - A_val = half, - y_val = two, - x_val = three; + const real_type alpha = two, beta = half, A_val = half, y_val = two, x_val = three; int fail = 0; y.setToConstant(y_val); @@ -146,31 +142,22 @@ class MatrixTestsSparse : public TestBase A.timesVec(beta, y, alpha, x); - fail += verifyAnswer(&y, - [=] (local_ordinal_type i) - { - const local_ordinal_type numValuesInRow = sparsity_pattern[i]; - return (beta * y_val) + (alpha * A_val * x_val * numValuesInRow); - }); + fail += verifyAnswer(&y, [=](local_ordinal_type i) { + const local_ordinal_type numValuesInRow = sparsity_pattern[i]; + return (beta * y_val) + (alpha * A_val * x_val * numValuesInRow); + }); - delete [] sparsity_pattern; + delete[] sparsity_pattern; printMessage(fail, __func__); return fail; } /// @brief Test: y <- beta * y + alpha * A^T * x - bool matrixTransTimesVec( - hiop::hiopMatrixSparse& A, - hiop::hiopVector& x, - hiop::hiopVector& y) + bool matrixTransTimesVec(hiop::hiopMatrixSparse& A, hiop::hiopVector& x, hiop::hiopVector& y) { assert(x.get_size() == A.m() && "Did you pass in vectors of the correct sizes?"); assert(y.get_size() == A.n() && "Did you pass in vectors of the correct sizes?"); - const real_type alpha = one, - beta = one, - A_val = one, - y_val = three, - x_val = three; + const real_type alpha = one, beta = one, A_val = one, y_val = three, x_val = three; int fail = 0; A.setToConstant(A_val); @@ -180,21 +167,17 @@ class MatrixTestsSparse : public TestBase A.transTimesVec(beta, y, alpha, x); - fail += verifyAnswer(&y, - [=] (local_ordinal_type i) -> real_type - { - return (beta * y_val) + (alpha * A_val * x_val * sparsity_pattern[i]); - }); + fail += verifyAnswer(&y, [=](local_ordinal_type i) -> real_type { + return (beta * y_val) + (alpha * A_val * x_val * sparsity_pattern[i]); + }); - delete [] sparsity_pattern; + delete[] sparsity_pattern; printMessage(fail, __func__); return fail; } /// @brief Test function that returns max-norm of each row in this matrix - bool matrixMaxAbsValue( - hiop::hiopMatrixSparse& A, - const int rank=0) + bool matrixMaxAbsValue(hiop::hiopMatrixSparse& A, const int rank = 0) { auto nnz = A.numberOfNonzeros(); auto val = getMatrixData(&A); @@ -220,15 +203,12 @@ class MatrixTestsSparse : public TestBase } /// @brief Test function that returns matrix element with maximum absolute value - bool matrix_row_max_abs_value( - hiop::hiopMatrixSparse& A, - hiop::hiopVector& x, - const int rank=0) + bool matrix_row_max_abs_value(hiop::hiopMatrixSparse& A, hiop::hiopVector& x, const int rank = 0) { const local_ordinal_type nnz = A.numberOfNonzeros(); auto val = getMatrixData(&A); - - const local_ordinal_type last_row_idx = A.m()-1; + + const local_ordinal_type last_row_idx = A.m() - 1; int fail = 0; @@ -237,25 +217,20 @@ class MatrixTestsSparse : public TestBase maybeCopyFromDev(&A); val[nnz - 1] = -two; maybeCopyToDev(&A); - + A.row_max_abs_value(x); - - fail += verifyAnswer(&x, - [=] (local_ordinal_type i) -> real_type - { - const bool is_last_row = (i == last_row_idx); - return is_last_row ? two : one; - }); + + fail += verifyAnswer(&x, [=](local_ordinal_type i) -> real_type { + const bool is_last_row = (i == last_row_idx); + return is_last_row ? two : one; + }); printMessage(fail, __func__); return fail; } /// @brief Test function that scale each row of A - bool matrix_scale_row( - hiop::hiopMatrixSparse& A, - hiop::hiopVector& x, - const int rank=0) + bool matrix_scale_row(hiop::hiopMatrixSparse& A, hiop::hiopVector& x, const int rank = 0) { const real_type A_val = two; const real_type x_val = three; @@ -264,9 +239,9 @@ class MatrixTestsSparse : public TestBase x.setToConstant(x_val); A.setToConstant(A_val); - A.scale_row(x,false); + A.scale_row(x, false); - real_type expected = A_val*x_val; + real_type expected = A_val * x_val; fail += verifyAnswer(&A, expected); printMessage(fail, __func__, rank); @@ -282,13 +257,11 @@ class MatrixTestsSparse : public TestBase int fail = 0; A.setToConstant(two); - if (!A.isfinite()) - fail++; + if(!A.isfinite()) fail++; val[nnz - 1] = INFINITY; maybeCopyToDev(&A); - if (A.isfinite()) - fail++; + if(A.isfinite()) fail++; printMessage(fail, __func__); return fail; @@ -298,135 +271,123 @@ class MatrixTestsSparse : public TestBase bool matrix_copy_subdiagonal_from(hiop::hiopMatrixDense& W, hiop::hiopMatrixSparse& A, hiop::hiopVector& x, - const int rank=0) - { - assert(A.m() == W.m()); // W has same dimension as A - assert(A.n() == W.n()); // W has same dimension as A - - auto nnz = A.numberOfNonzeros(); + const int rank = 0) + { + assert(A.m() == W.m()); // W has same dimension as A + assert(A.n() == W.n()); // W has same dimension as A + + auto nnz = A.numberOfNonzeros(); auto dim_x = x.get_size(); auto num_row_A = A.m(); assert(num_row_A >= dim_x); - + const real_type A_val = two; const real_type x_val = three; int fail = 0; x.setToConstant(x_val); A.setToConstant(A_val); - + // replace the last `dim_x` values to a diagonal sub matrix - A.copySubDiagonalFrom(num_row_A-dim_x, dim_x, x, nnz-dim_x); + A.copySubDiagonalFrom(num_row_A - dim_x, dim_x, x, nnz - dim_x); // copy to a dense matrix A.copy_to(W); const auto* iRow = getRowIndices(&A); const auto* jCol = getColumnIndices(&A); - - fail += verifyAnswer(&W, - [=] (local_ordinal_type i, local_ordinal_type j) -> real_type - { - double ans = zero; - const bool indexExists = find_unsorted_pair(i, j, iRow, jCol, nnz-dim_x); - if(indexExists) { - if(i==j && i>=num_row_A-dim_x) { - // this ele is also defined in vector x as well - ans = x_val + A_val; - } else { - // this ele doesn't change - ans = A_val; - } - } else if(i==j && i>=num_row_A-dim_x) { - // this ele comes vector x - ans = x_val; + + fail += verifyAnswer(&W, [=](local_ordinal_type i, local_ordinal_type j) -> real_type { + double ans = zero; + const bool indexExists = find_unsorted_pair(i, j, iRow, jCol, nnz - dim_x); + if(indexExists) { + if(i == j && i >= num_row_A - dim_x) { + // this ele is also defined in vector x as well + ans = x_val + A_val; + } else { + // this ele doesn't change + ans = A_val; } - return ans; + } else if(i == j && i >= num_row_A - dim_x) { + // this ele comes vector x + ans = x_val; } - ); + return ans; + }); printMessage(fail, __func__, rank); return fail; } - - /// @brief test for mathod that set a sub-diagonal block from a vector - bool matrix_set_subdiagonal_to(hiop::hiopMatrixDense& W, - hiop::hiopMatrixSparse& A, - const int rank=0) - { - assert(A.m() == W.m()); // W has same dimension as A - assert(A.n() == W.n()); // W has same dimension as A - - auto nnz = A.numberOfNonzeros(); + + /// @brief test for mathod that set a sub-diagonal block from a vector + bool matrix_set_subdiagonal_to(hiop::hiopMatrixDense& W, hiop::hiopMatrixSparse& A, const int rank = 0) + { + assert(A.m() == W.m()); // W has same dimension as A + assert(A.n() == W.n()); // W has same dimension as A + + auto nnz = A.numberOfNonzeros(); auto num_row_A = A.m(); - int num_diag_ele = num_row_A/2; - + int num_diag_ele = num_row_A / 2; + const real_type A_val = two; const real_type x_val = three; int fail = 0; A.setToConstant(A_val); - + // replace the last `dim_x` values to a diagonal sub matrix - A.setSubDiagonalTo(num_row_A-num_diag_ele, num_diag_ele, x_val, nnz-num_diag_ele); + A.setSubDiagonalTo(num_row_A - num_diag_ele, num_diag_ele, x_val, nnz - num_diag_ele); // copy to a dense matrix A.copy_to(W); const auto* iRow = getRowIndices(&A); const auto* jCol = getColumnIndices(&A); - - fail += verifyAnswer(&W, - [=] (local_ordinal_type i, local_ordinal_type j) -> real_type - { - double ans = zero; - const bool indexExists = find_unsorted_pair(i, j, iRow, jCol, nnz-num_diag_ele); - if(indexExists) { - if(i==j && i>=num_row_A-num_diag_ele) { - // this ele is also defined in vector x as well - ans = x_val + A_val; - } else { - // this ele doesn't change - ans = A_val; - } - } else if(i==j && i>=num_row_A-num_diag_ele) { - // this ele comes vector x - ans = x_val; + + fail += verifyAnswer(&W, [=](local_ordinal_type i, local_ordinal_type j) -> real_type { + double ans = zero; + const bool indexExists = find_unsorted_pair(i, j, iRow, jCol, nnz - num_diag_ele); + if(indexExists) { + if(i == j && i >= num_row_A - num_diag_ele) { + // this ele is also defined in vector x as well + ans = x_val + A_val; + } else { + // this ele doesn't change + ans = A_val; } - return ans; + } else if(i == j && i >= num_row_A - num_diag_ele) { + // this ele comes vector x + ans = x_val; } - ); + return ans; + }); printMessage(fail, __func__, rank); return fail; } - + /** * @brief Test for method [W] += A * D^(-1) * A^T - * + * * Size of A is m x n; size of D is n x n. - * The method adds the matrix product to a block above the diagonal of W. - * + * The method adds the matrix product to a block above the diagonal of W. + * * @param[in] A - sparse matrix object which invokes the method (this) * @param[in] D - diagonal matrix stored in a vector * @param[in] W - dense matrix where the product is stored * @param[in] offset - row/column offset in W, from where A*D^(-1)*A^T is added in place */ - int matrixAddMDinvMtransToDiagBlockOfSymDeMatUTri( - hiop::hiopMatrixSparse& A, - hiop::hiopVector& D, - hiop::hiopMatrixDense& W, - local_ordinal_type offset) + int matrixAddMDinvMtransToDiagBlockOfSymDeMatUTri(hiop::hiopMatrixSparse& A, + hiop::hiopVector& D, + hiop::hiopMatrixDense& W, + local_ordinal_type offset) { int fail = 0; // Assertion is using API calls. assert(D.get_size() == A.n() && "Did you pass in a diagonal matrix of the correct size?"); - const real_type alpha = half, - A_val = one, - d_val = half, - W_val = zero; + const real_type alpha = half, A_val = one, d_val = half, W_val = zero; D.setToConstant(d_val); W.setToConstant(W_val); @@ -438,54 +399,42 @@ class MatrixTestsSparse : public TestBase const local_ordinal_type* jCol = getColumnIndices(&A); const local_ordinal_type nnz = A.numberOfNonzeros(); - fail += verifyAnswer(&W, - [&] (local_ordinal_type i, local_ordinal_type j) -> real_type - { - // Dense matrix elements that are not modified - if(i < offset || j < offset || i > j || i >= offset + A.m() || j >= offset + A.m()) - { - return W_val; - } - else - { - // The equivalent indices for the sparse matrices used in the calculation - local_ordinal_type d_i = i - offset; - local_ordinal_type d_j = j - offset; - - // Counting the number of columns with entries in rows d_i and d_j - local_ordinal_type count = 0; - - // Searching for the row index d_i in triplet structure - local_ordinal_type rs_di = 0; - while(iRow[rs_di] != d_i && rs_di < nnz) - ++rs_di; - - // Searching for the row index d_j in triplet structure - local_ordinal_type rs_dj = 0; - while(iRow[rs_dj] != d_j && rs_dj < nnz) - ++rs_dj; - - // Counting nonzero terms of the matrix product innermost loop - // \sum_k A_ik * A^T_jk / D_kk - while(rs_di < nnz && rs_dj < nnz && iRow[rs_di] == d_i && iRow[rs_dj] == d_j) - { - if(jCol[rs_di] == jCol[rs_dj]) - { - count++; - } - - if(jCol[rs_di] real_type { + // Dense matrix elements that are not modified + if(i < offset || j < offset || i > j || i >= offset + A.m() || j >= offset + A.m()) { + return W_val; + } else { + // The equivalent indices for the sparse matrices used in the calculation + local_ordinal_type d_i = i - offset; + local_ordinal_type d_j = j - offset; + + // Counting the number of columns with entries in rows d_i and d_j + local_ordinal_type count = 0; + + // Searching for the row index d_i in triplet structure + local_ordinal_type rs_di = 0; + while(iRow[rs_di] != d_i && rs_di < nnz) ++rs_di; + + // Searching for the row index d_j in triplet structure + local_ordinal_type rs_dj = 0; + while(iRow[rs_dj] != d_j && rs_dj < nnz) ++rs_dj; + + // Counting nonzero terms of the matrix product innermost loop + // \sum_k A_ik * A^T_jk / D_kk + while(rs_di < nnz && rs_dj < nnz && iRow[rs_di] == d_i && iRow[rs_dj] == d_j) { + if(jCol[rs_di] == jCol[rs_dj]) { + count++; + } + + if(jCol[rs_di] < jCol[rs_dj]) { + rs_di++; + } else { + rs_dj++; } - return W_val + (alpha * A_val * A_val / d_val * count); } - }); + return W_val + (alpha * A_val * A_val / d_val * count); + } + }); printMessage(fail, __func__); return fail; @@ -494,24 +443,21 @@ class MatrixTestsSparse : public TestBase /** * @brief Test for (W) = beta(W) + (alpha)*this * B^T W) += this * D^(-1) * B^T - * + * * The method adds the matrix product to a block above the diagonal of W. - * + * * @param[in] A - sparse matrix object which invokes the method (this) * @param[in] B - sparse matrix * @param[in] W - dense matrix where the product is stored */ - bool matrixTimesMatTrans( - hiop::hiopMatrixSparse& A, - hiop::hiopMatrixSparse& B, - hiop::hiopMatrixDense& W) + bool matrixTimesMatTrans(hiop::hiopMatrixSparse& A, hiop::hiopMatrixSparse& B, hiop::hiopMatrixDense& W) { int fail = 0; assert(A.n() == B.n() && "Did you pass in matrices with the same number of cols?"); const real_type alpha = half; - const real_type beta = two; + const real_type beta = two; const real_type A_val = one; const real_type B_val = one; const real_type W_val = zero; @@ -529,54 +475,45 @@ class MatrixTestsSparse : public TestBase const local_ordinal_type* B_jCol = getColumnIndices(&B); const local_ordinal_type B_nnz = B.numberOfNonzeros(); - fail += verifyAnswer(&W, - [=] (local_ordinal_type i, local_ordinal_type j) -> real_type - { - // Counting the number of columns with entries in row i in A and row j in B - local_ordinal_type count = 0; - - local_ordinal_type d_i = i; - local_ordinal_type d_j = j; + fail += verifyAnswer(&W, [=](local_ordinal_type i, local_ordinal_type j) -> real_type { + // Counting the number of columns with entries in row i in A and row j in B + local_ordinal_type count = 0; + + local_ordinal_type d_i = i; + local_ordinal_type d_j = j; + + // Searching for the row index d_i in triplet structure + local_ordinal_type rs_di = 0; + while(A_iRow[rs_di] != d_i && rs_di < A_nnz) rs_di++; + // Searching for the row index d_j in triplet structure + local_ordinal_type rs_dj = 0; + while(B_iRow[rs_dj] != d_j && rs_dj < B_nnz) rs_dj++; + + // Counting nonzero terms of the matrix product innermost loop + // \sum_k A_ik * B^T_jk + while(rs_di < A_nnz && rs_dj < B_nnz && A_iRow[rs_di] == d_i && B_iRow[rs_dj] == d_j) { + if(A_jCol[rs_di] == B_jCol[rs_dj]) { + count++; + } - // Searching for the row index d_i in triplet structure - local_ordinal_type rs_di = 0; - while(A_iRow[rs_di] != d_i && rs_di < A_nnz) + if(A_jCol[rs_di] < B_jCol[rs_dj]) { rs_di++; - // Searching for the row index d_j in triplet structure - local_ordinal_type rs_dj = 0; - while(B_iRow[rs_dj] != d_j && rs_dj < B_nnz) + } else { rs_dj++; - - // Counting nonzero terms of the matrix product innermost loop - // \sum_k A_ik * B^T_jk - while(rs_di < A_nnz && rs_dj < B_nnz && A_iRow[rs_di] == d_i && B_iRow[rs_dj] == d_j) - { - if(A_jCol[rs_di] == B_jCol[rs_dj]) - { - count++; - } - - if(A_jCol[rs_di] real_type - { - // Dense matrix elements that are not modified - if(i < i_offset || j < j_offset || i > j || i >= i_max || j >= j_max) - { - return W_val; - } - else - { - // Counting the number of columns with entries in row d_i in A and row d_j in B - local_ordinal_type count = 0; + fail += verifyAnswer(&W, [=](local_ordinal_type i, local_ordinal_type j) -> real_type { + // Dense matrix elements that are not modified + if(i < i_offset || j < j_offset || i > j || i >= i_max || j >= j_max) { + return W_val; + } else { + // Counting the number of columns with entries in row d_i in A and row d_j in B + local_ordinal_type count = 0; + + local_ordinal_type d_i = i - i_offset; + local_ordinal_type d_j = j - j_offset; - local_ordinal_type d_i = i - i_offset; - local_ordinal_type d_j = j - j_offset; + // Searching for the row index d_i in triplet structure + local_ordinal_type rs_di = 0; + while(A_iRow[rs_di] != d_i && rs_di < A_nnz) rs_di++; + // Searching for the row index d_j in triplet structure + local_ordinal_type rs_dj = 0; + while(B_iRow[rs_dj] != d_j && rs_dj < B_nnz) rs_dj++; - // Searching for the row index d_i in triplet structure - local_ordinal_type rs_di = 0; - while(A_iRow[rs_di] != d_i && rs_di < A_nnz) + // Counting nonzero terms of the matrix product innermost loop + // \sum_k A_ik * B^T_jk / D_kk + while(rs_di < A_nnz && rs_dj < B_nnz && A_iRow[rs_di] == d_i && B_iRow[rs_dj] == d_j) { + if(A_jCol[rs_di] == B_jCol[rs_dj]) { + count++; + } + + if(A_jCol[rs_di] < B_jCol[rs_dj]) { rs_di++; - // Searching for the row index d_j in triplet structure - local_ordinal_type rs_dj = 0; - while(B_iRow[rs_dj] != d_j && rs_dj < B_nnz) + } else { rs_dj++; - - // Counting nonzero terms of the matrix product innermost loop - // \sum_k A_ik * B^T_jk / D_kk - while(rs_di < A_nnz && rs_dj < B_nnz && A_iRow[rs_di] == d_i && B_iRow[rs_dj] == d_j) - { - if(A_jCol[rs_di] == B_jCol[rs_dj]) - { - count++; - } - - if(A_jCol[rs_di]=start_idx_row && i=start_idx_col && j= i); @@ -735,14 +659,11 @@ class MatrixTestsSparse : public TestBase * location of the same call to addToSymDenseMatrixUpperTriangle * * Precondition: W is square - * + * * @todo Remove implementations specific code from this test!!! * @todo Format documentation correctly */ - bool symTransAddToSymDenseMatrixUpperTriangle( - hiop::hiopMatrixDense& W, - hiop::hiopMatrixSparse& A, - const int rank=0) + bool symTransAddToSymDenseMatrixUpperTriangle(hiop::hiopMatrixDense& W, hiop::hiopMatrixSparse& A, const int rank = 0) { const local_ordinal_type N_loc = W.get_local_size_n(); const local_ordinal_type A_M = A.m(); @@ -753,9 +674,7 @@ class MatrixTestsSparse : public TestBase const local_ordinal_type start_idx_row = 0; const local_ordinal_type start_idx_col = N_loc - A_M; - const real_type alpha = half, - A_val = half, - W_val = one; + const real_type alpha = half, A_val = half, W_val = one; A.setToConstant(A_val); W.setToConstant(W_val); @@ -766,25 +685,22 @@ class MatrixTestsSparse : public TestBase const auto* iRow = getRowIndices(&A); const auto* jCol = getColumnIndices(&A); auto nnz = A.numberOfNonzeros(); - const int fail = verifyAnswer(&W, - [=] (local_ordinal_type i, local_ordinal_type j) -> real_type - { - const bool isTransUpperTriangle = ( - i>=start_idx_row && i=start_idx_col && j real_type { + const bool isTransUpperTriangle = (i >= start_idx_row && i < start_idx_row + A_N_loc && // iCol is in A + j >= start_idx_col && j < start_idx_col + A_M && // jRow is in A + j <= i); // (i, j) are in upper triangle of W^T - const bool indexExists = find_unsorted_pair(j, i, iRow, jCol, nnz); - return (isTransUpperTriangle && indexExists) ? W_val + A_val*alpha : W_val; - }); + const bool indexExists = find_unsorted_pair(j, i, iRow, jCol, nnz); + return (isTransUpperTriangle && indexExists) ? W_val + A_val * alpha : W_val; + }); printMessage(fail, __func__, rank); return fail; } // /** - // * @brief Test for the method block of W += alpha*this, where `this' is sparse - // * The block of W is in the upper triangular part + // * @brief Test for the method block of W += alpha*this, where `this' is sparse + // * The block of W is in the upper triangular part // * @remark W; contains only the upper triangular entries as it is symmetric // * This test doesn't test if W itself is symmetric // * (i,j) are the indices of the upper triangle of W @@ -823,9 +739,9 @@ class MatrixTestsSparse : public TestBase // [=] (local_ordinal_type i, local_ordinal_type j) -> real_type // { // const bool isUpperTriangle = ( - // i>=start_idx_row && i=start_idx_col && j=start_idx_row && i=start_idx_col && j real_type - { - const bool isTransUpperTriangle = ( - i>=start_idx_row && i=start_idx_col && j real_type { + const bool isTransUpperTriangle = + (i >= start_idx_row && i < start_idx_row + A_N_loc && j >= start_idx_col && j < start_idx_col + A_M && i <= j); - const bool indexExists = find_unsorted_pair(i-start_idx_row, j-start_idx_col, jCol, iRow, nnz); - return (isTransUpperTriangle && indexExists) ? W_val + A_val*alpha : W_val; - }); + const bool indexExists = find_unsorted_pair(i - start_idx_row, j - start_idx_col, jCol, iRow, nnz); + return (isTransUpperTriangle && indexExists) ? W_val + A_val * alpha : W_val; + }); printMessage(fail, __func__, rank); return fail; @@ -894,10 +801,7 @@ class MatrixTestsSparse : public TestBase * A is square * degree of A <= degree of W */ - int addUpperTriangleToSymDenseMatrixUpperTriangle( - hiop::hiopMatrixDense& W, - hiop::hiopMatrixSparse& A, - const int rank=0) + int addUpperTriangleToSymDenseMatrixUpperTriangle(hiop::hiopMatrixDense& W, hiop::hiopMatrixSparse& A, const int rank = 0) { const local_ordinal_type A_M = A.m(); const local_ordinal_type A_N = A.n(); @@ -905,14 +809,12 @@ class MatrixTestsSparse : public TestBase assert(A.m() == A.n()); assert(W.m() >= A.n()); assert(W.n() >= A.m()); - //auto W = dynamic_cast(&_W); - // Map the upper triangle of A to W starting - // at W's upper left corner + // auto W = dynamic_cast(&_W); + // Map the upper triangle of A to W starting + // at W's upper left corner const local_ordinal_type diag_start = 0; int fail = 0; - const real_type alpha = half, - A_val = half, - W_val = one; + const real_type alpha = half, A_val = half, W_val = one; A.setToConstant(A_val); W.setToConstant(W_val); @@ -923,23 +825,22 @@ class MatrixTestsSparse : public TestBase const auto* jCol = getColumnIndices(&A); auto nnz = A.numberOfNonzeros(); - fail += verifyAnswer(&W, - [=] (local_ordinal_type i, local_ordinal_type j) -> real_type - { - bool isUpperTriangle = (i>=diag_start && i=i && j real_type { + bool isUpperTriangle = (i >= diag_start && i < diag_start + A_M && j >= i && j < diag_start + A_N); + const bool indexExists = find_unsorted_pair(i - diag_start, j - diag_start, iRow, jCol, nnz); + return (isUpperTriangle && indexExists) ? W_val + A_val * alpha : W_val; + }); printMessage(fail, __func__, rank); return fail; } - /// @brief Copies rows from another sparse matrix into this one, according to the patten `select`. ith row of A = select[i]_th row of B - int matrix_copy_rows_from( hiop::hiopMatrixSparse& A, hiop::hiopMatrixSparse& B, hiop::hiopVectorInt& select) + /// @brief Copies rows from another sparse matrix into this one, according to the patten `select`. ith row of A = + /// select[i]_th row of B + int matrix_copy_rows_from(hiop::hiopMatrixSparse& A, hiop::hiopMatrixSparse& B, hiop::hiopVectorInt& select) { int n_A_rows = A.m(); - int n_B_rows = B.m(); + int n_B_rows = B.m(); assert(A.n() == B.n()); assert(n_A_rows <= n_B_rows); @@ -949,28 +850,24 @@ class MatrixTestsSparse : public TestBase A.setToConstant(A_val); B.setToConstant(B_val); - for(int i=0; i real_type - { - const bool indexExists = find_unsorted_pair(i, i, iRow, jCol, nnz); - return (indexExists) ? (W_val + A_val * alpha) : W_val; - }); - + const auto fail = verifyAnswer(&W, [=](local_ordinal_type i) -> real_type { + const bool indexExists = find_unsorted_pair(i, i, iRow, jCol, nnz); + return (indexExists) ? (W_val + A_val * alpha) : W_val; + }); + printMessage(fail, __func__, rank); return fail; } /** - * @brief Copy 'n_rows' rows from matrix 'A', started from 'A_rows_st', to the rows started from 'B_rows_st' in 'B'. - * The non-zero elements start from 'B_nnz_st' will be replaced by the new elements. - * - * @pre 'A' must have exactly, or more than 'n_rows' rows after row 'A_rows_st' - * @pre 'B' must have exactly, or more than 'n_rows' rows after row 'B_rows_st' - * @pre 'B_nnz_st' + the number of non-zeros in the copied the rows must be less or equal to B.nnz - * @pre User must know the nonzero pattern of A and B. Assume non-zero patterns of A and B wont change, and A is a submatrix of B - * @pre Otherwise, this function may replace the non-zero values and nonzero patterns for the undesired elements. - */ + * @brief Copy 'n_rows' rows from matrix 'A', started from 'A_rows_st', to the rows started from 'B_rows_st' in 'B'. + * The non-zero elements start from 'B_nnz_st' will be replaced by the new elements. + * + * @pre 'A' must have exactly, or more than 'n_rows' rows after row 'A_rows_st' + * @pre 'B' must have exactly, or more than 'n_rows' rows after row 'B_rows_st' + * @pre 'B_nnz_st' + the number of non-zeros in the copied the rows must be less or equal to B.nnz + * @pre User must know the nonzero pattern of A and B. Assume non-zero patterns of A and B wont change, and A is a + * submatrix of B + * @pre Otherwise, this function may replace the non-zero values and nonzero patterns for the undesired elements. + */ int copy_rows_block_from(hiop::hiopMatrixSparse& A, hiop::hiopMatrixSparse& B, local_ordinal_type A_rows_st, @@ -1019,14 +915,12 @@ class MatrixTestsSparse : public TestBase const local_ordinal_type B_nnz = B.numberOfNonzeros(); local_ordinal_type nnz_A_need_to_copy{0}; - for(local_ordinal_type k=0;k= A_rows_st && A_iRow[k] < A_rows_st + n_rows ) - { + for(local_ordinal_type k = 0; k < A_nnz; ++k) { + if(A_iRow[k] >= A_rows_st && A_iRow[k] < A_rows_st + n_rows) { nnz_A_need_to_copy++; } // assume matrix element is ordered by row - if(A_iRow[k]>=A_rows_st + n_rows) - { + if(A_iRow[k] >= A_rows_st + n_rows) { break; } } @@ -1034,8 +928,8 @@ class MatrixTestsSparse : public TestBase assert(A.n() >= B.n()); assert(n_rows + A_rows_st <= A.m()); assert(n_rows + B_rows_st <= B.m()); - assert(nnz_A_need_to_copy<=A_nnz); - assert(nnz_A_need_to_copy+B_nnz_st<=B_nnz); + assert(nnz_A_need_to_copy <= A_nnz); + assert(nnz_A_need_to_copy + B_nnz_st <= B_nnz); const real_type A_val = one; const real_type B_val = half; @@ -1047,26 +941,26 @@ class MatrixTestsSparse : public TestBase B.copyRowsBlockFrom(A, A_rows_st, n_rows, B_rows_st, B_nnz_st); - fail += verifyAnswer(&B,0,B_nnz_st,B_val); - fail += verifyAnswer(&B,B_nnz_st,B_nnz_st+nnz_A_need_to_copy,A_val); - fail += verifyAnswer(&B,B_nnz_st+nnz_A_need_to_copy,B_nnz,B_val); + fail += verifyAnswer(&B, 0, B_nnz_st, B_val); + fail += verifyAnswer(&B, B_nnz_st, B_nnz_st + nnz_A_need_to_copy, A_val); + fail += verifyAnswer(&B, B_nnz_st + nnz_A_need_to_copy, B_nnz, B_val); printMessage(fail, __func__); return fail; - } /** - * @brief Copy matrix 'B' into `A` as a subblock starting from the corner point ('A_rows_st', 'A_cols_st'). - * The non-zero elements start from 'B_nnz_st' will be replaced by the new elements. - * - * @pre 'A' must have exactly, or more than 'B.n_rows' rows after row 'A_rows_st' - * @pre 'A' must have exactly, or more than 'B.n_cols' cols after row 'A_cols_st' - * @pre 'B_nnz_st' + the number of non-zeros in the copied the rows must be less or equal to B.nnz - * @pre User must know the nonzero pattern of A and B. We assume the non-zero patterns of A and B stay the same, and B is a submatrix of A. - * @pre This function may replace the non-zero values and nonzero patterns of A. - * @pre Allow up-to two elements setting to the same position in the sparse matrix. - */ + * @brief Copy matrix 'B' into `A` as a subblock starting from the corner point ('A_rows_st', 'A_cols_st'). + * The non-zero elements start from 'B_nnz_st' will be replaced by the new elements. + * + * @pre 'A' must have exactly, or more than 'B.n_rows' rows after row 'A_rows_st' + * @pre 'A' must have exactly, or more than 'B.n_cols' cols after row 'A_cols_st' + * @pre 'B_nnz_st' + the number of non-zeros in the copied the rows must be less or equal to B.nnz + * @pre User must know the nonzero pattern of A and B. We assume the non-zero patterns of A and B stay the same, and B is a + * submatrix of A. + * @pre This function may replace the non-zero values and nonzero patterns of A. + * @pre Allow up-to two elements setting to the same position in the sparse matrix. + */ int matrix_copy_submatrix_from(hiop::hiopMatrixDense& W, hiop::hiopMatrixSparse& A, hiop::hiopMatrixSparse& B, @@ -1075,13 +969,13 @@ class MatrixTestsSparse : public TestBase local_ordinal_type A_nnz_st, const int rank = 0) { - assert(A.m() == W.m()); // W has same dimension as A - assert(A.n() == W.n()); // W has same dimension as A + assert(A.m() == W.m()); // W has same dimension as A + assert(A.n() == W.n()); // W has same dimension as A assert(A.m() >= B.m() + A_rows_st); assert(A.n() >= B.n() + A_cols_st); - assert(B.numberOfNonzeros()+A_nnz_st <= A.numberOfNonzeros()); + assert(B.numberOfNonzeros() + A_nnz_st <= A.numberOfNonzeros()); const real_type A_val = one; const real_type B_val = two; @@ -1103,47 +997,45 @@ class MatrixTestsSparse : public TestBase const auto* B_iRow = getRowIndices(&B); const auto* B_jCol = getColumnIndices(&B); auto B_nnz = B.numberOfNonzeros(); - - fail += verifyAnswer(&W, - [=] (local_ordinal_type i, local_ordinal_type j) -> real_type + + fail += verifyAnswer(&W, [=](local_ordinal_type i, local_ordinal_type j) -> real_type { + double ans = zero; + { - double ans = zero; - - { - const bool indexExists_in_A = find_unsorted_pair(i, j, iRow, jCol, nnz); - const bool indexExists_in_B = find_unsorted_pair(i-A_rows_st, j-A_cols_st, B_iRow, B_jCol, B_nnz); - const bool indexExists_in_A_not_replaced_by_B = ( find_unsorted_pair(i, j, iRow, jCol, 0, A_nnz_st) - || find_unsorted_pair(i, j, iRow, jCol, A_nnz_st+B_nnz, nnz)); - if(indexExists_in_A_not_replaced_by_B && indexExists_in_B) { - // this ele comes from sparse matrix A and B - ans = B_val + A_val; - } else if(indexExists_in_B) { - // this ele comes from sparse matrix B - ans = B_val; - } else if(indexExists_in_A) { - // this ele comes from sparse matrix A - ans = A_val; - } + const bool indexExists_in_A = find_unsorted_pair(i, j, iRow, jCol, nnz); + const bool indexExists_in_B = find_unsorted_pair(i - A_rows_st, j - A_cols_st, B_iRow, B_jCol, B_nnz); + const bool indexExists_in_A_not_replaced_by_B = (find_unsorted_pair(i, j, iRow, jCol, 0, A_nnz_st) || + find_unsorted_pair(i, j, iRow, jCol, A_nnz_st + B_nnz, nnz)); + if(indexExists_in_A_not_replaced_by_B && indexExists_in_B) { + // this ele comes from sparse matrix A and B + ans = B_val + A_val; + } else if(indexExists_in_B) { + // this ele comes from sparse matrix B + ans = B_val; + } else if(indexExists_in_A) { + // this ele comes from sparse matrix A + ans = A_val; } - return ans; } - ); + return ans; + }); printMessage(fail, __func__); return fail; } - + /** - * @brief Copy the transpose of matrix 'B' into `A` as a subblock starting from the corner point ('A_rows_st', 'A_cols_st'). - * The non-zero elements start from 'B_nnz_st' will be replaced by the new elements. - * - * @pre 'A' must have exactly, or more than 'B.n_cols' rows after row 'A_rows_st' - * @pre 'A' must have exactly, or more than 'B.n_rows' cols after row 'A_cols_st' - * @pre 'B_nnz_st' + the number of non-zeros in the copied the rows must be less or equal to B.nnz - * @pre User must know the nonzero pattern of A and B. We assume the non-zero patterns of A and B stay the same, and the transpose of B is a submatrix of A. - * @pre This function may replace the non-zero values and nonzero patterns of A. - * @pre Allow up-to two elements setting to the same position in the sparse matrix. - */ + * @brief Copy the transpose of matrix 'B' into `A` as a subblock starting from the corner point ('A_rows_st', + * 'A_cols_st'). The non-zero elements start from 'B_nnz_st' will be replaced by the new elements. + * + * @pre 'A' must have exactly, or more than 'B.n_cols' rows after row 'A_rows_st' + * @pre 'A' must have exactly, or more than 'B.n_rows' cols after row 'A_cols_st' + * @pre 'B_nnz_st' + the number of non-zeros in the copied the rows must be less or equal to B.nnz + * @pre User must know the nonzero pattern of A and B. We assume the non-zero patterns of A and B stay the same, and the + * transpose of B is a submatrix of A. + * @pre This function may replace the non-zero values and nonzero patterns of A. + * @pre Allow up-to two elements setting to the same position in the sparse matrix. + */ int matrix_copy_submatrix_from_trans(hiop::hiopMatrixDense& W, hiop::hiopMatrixSparse& A, hiop::hiopMatrixSparse& B, @@ -1152,13 +1044,13 @@ class MatrixTestsSparse : public TestBase local_ordinal_type A_nnz_st, const int rank = 0) { - assert(A.m() == W.m()); // W has same dimension as A - assert(A.n() == W.n()); // W has same dimension as A + assert(A.m() == W.m()); // W has same dimension as A + assert(A.n() == W.n()); // W has same dimension as A assert(A.m() >= B.m() + A_rows_st); assert(A.n() >= B.n() + A_cols_st); - assert(B.numberOfNonzeros()+A_nnz_st <= A.numberOfNonzeros()); + assert(B.numberOfNonzeros() + A_nnz_st <= A.numberOfNonzeros()); const real_type A_val = one; const real_type B_val = two; @@ -1180,79 +1072,73 @@ class MatrixTestsSparse : public TestBase const auto* B_iRow = getRowIndices(&B); const auto* B_jCol = getColumnIndices(&B); auto B_nnz = B.numberOfNonzeros(); - - fail += verifyAnswer(&W, - [=] (local_ordinal_type i, local_ordinal_type j) -> real_type + + fail += verifyAnswer(&W, [=](local_ordinal_type i, local_ordinal_type j) -> real_type { + double ans = zero; + { - double ans = zero; - - { - const bool indexExists_in_A = find_unsorted_pair(i, j, iRow, jCol, nnz); - const bool indexExists_in_B = find_unsorted_pair(j-A_cols_st, i-A_rows_st, B_iRow, B_jCol, B_nnz); - const bool indexExists_in_A_not_replaced_by_B = ( find_unsorted_pair(i, j, iRow, jCol, 0, A_nnz_st) - || find_unsorted_pair(i, j, iRow, jCol, A_nnz_st+B_nnz, nnz)); - if(indexExists_in_A_not_replaced_by_B && indexExists_in_B) { - // this ele comes from sparse matrix A and B - ans = B_val + A_val; - } else if(indexExists_in_B) { - // this ele comes from sparse matrix B - ans = B_val; - } else if(indexExists_in_A) { - // this ele comes from sparse matrix A - ans = A_val; - } + const bool indexExists_in_A = find_unsorted_pair(i, j, iRow, jCol, nnz); + const bool indexExists_in_B = find_unsorted_pair(j - A_cols_st, i - A_rows_st, B_iRow, B_jCol, B_nnz); + const bool indexExists_in_A_not_replaced_by_B = (find_unsorted_pair(i, j, iRow, jCol, 0, A_nnz_st) || + find_unsorted_pair(i, j, iRow, jCol, A_nnz_st + B_nnz, nnz)); + if(indexExists_in_A_not_replaced_by_B && indexExists_in_B) { + // this ele comes from sparse matrix A and B + ans = B_val + A_val; + } else if(indexExists_in_B) { + // this ele comes from sparse matrix B + ans = B_val; + } else if(indexExists_in_A) { + // this ele comes from sparse matrix A + ans = A_val; } - return ans; } - ); + return ans; + }); printMessage(fail, __func__); return fail; } - + /** - * @brief copy a sparse matrix into a dense matrix - * - * @pre 'A' must have same dim as `W` - */ - bool matrix_copy_to( hiop::hiopMatrixDense& W, hiop::hiopMatrixSparse& A, const int rank = 0) + * @brief copy a sparse matrix into a dense matrix + * + * @pre 'A' must have same dim as `W` + */ + bool matrix_copy_to(hiop::hiopMatrixDense& W, hiop::hiopMatrixSparse& A, const int rank = 0) { - assert(A.m() == W.m()); // W has same dimension as A - assert(A.n() == W.n()); // W has same dimension as A - + assert(A.m() == W.m()); // W has same dimension as A + assert(A.n() == W.n()); // W has same dimension as A + const real_type A_val = one; const real_type W_val = two; W.setToConstant(W_val); - + A.copy_to(W); - + int fail = 0; const local_ordinal_type* iRow = getRowIndices(&A); const local_ordinal_type* jCol = getColumnIndices(&A); const local_ordinal_type nnz = A.numberOfNonzeros(); - fail += verifyAnswer(&W, - [=] (local_ordinal_type i, local_ordinal_type j) -> real_type - { - const bool indexExists = find_unsorted_pair(i, j, iRow, jCol, nnz); - return (indexExists) ? A_val: zero; - } - ); + fail += verifyAnswer(&W, [=](local_ordinal_type i, local_ordinal_type j) -> real_type { + const bool indexExists = find_unsorted_pair(i, j, iRow, jCol, nnz); + return (indexExists) ? A_val : zero; + }); printMessage(fail, __func__); return fail; } /** - * @brief Copy a diagonal matrix into `A` as a subblock starting from the corner point ('A_rows_st', 'A_cols_st'). - * The non-zero elements start from 'A_nnz_st' will be replaced by the new elements. - * - * @pre 'A' must have exactly, or more than 'nnz_to_copy' rows after row 'A_rows_st' - * @pre 'A' must have exactly, or more than 'nnz_to_copy' rows after row 'A_cols_st' - * @pre The input diagonal matrix is 'src_val'*identity matrix with size 'nnz_to_copy'x'nnz_to_copy'. - * @pre User must know the nonzero pattern of A. - * @pre This function may replace the non-zero values and nonzero patterns for the undesired elements. - * @pre Allow up-to two elements setting to the same position in the sparse matrix. - * - */ + * @brief Copy a diagonal matrix into `A` as a subblock starting from the corner point ('A_rows_st', 'A_cols_st'). + * The non-zero elements start from 'A_nnz_st' will be replaced by the new elements. + * + * @pre 'A' must have exactly, or more than 'nnz_to_copy' rows after row 'A_rows_st' + * @pre 'A' must have exactly, or more than 'nnz_to_copy' rows after row 'A_cols_st' + * @pre The input diagonal matrix is 'src_val'*identity matrix with size 'nnz_to_copy'x'nnz_to_copy'. + * @pre User must know the nonzero pattern of A. + * @pre This function may replace the non-zero values and nonzero patterns for the undesired elements. + * @pre Allow up-to two elements setting to the same position in the sparse matrix. + * + */ int matrix_copy_diag_matrix_to_subblock(hiop::hiopMatrixDense& W, hiop::hiopMatrixSparse& A, local_ordinal_type A_rows_st, @@ -1261,13 +1147,13 @@ class MatrixTestsSparse : public TestBase local_ordinal_type nnz_to_copy, const int rank = 0) { - assert(A.m() == W.m()); // W has same dimension as A - assert(A.n() == W.n()); // W has same dimension as A - + assert(A.m() == W.m()); // W has same dimension as A + assert(A.n() == W.n()); // W has same dimension as A + assert(A.m() >= nnz_to_copy + A_rows_st); assert(A.n() >= nnz_to_copy + A_cols_st); - - assert(nnz_to_copy+A_nnz_st <= A.numberOfNonzeros()); + + assert(nnz_to_copy + A_nnz_st <= A.numberOfNonzeros()); const real_type A_val = half; const real_type src_val = two; @@ -1283,47 +1169,46 @@ class MatrixTestsSparse : public TestBase const local_ordinal_type* iRow = getRowIndices(&A); const local_ordinal_type* jCol = getColumnIndices(&A); const local_ordinal_type nnz = A.numberOfNonzeros(); - - fail += verifyAnswer(&W, - [=] (local_ordinal_type i, local_ordinal_type j) -> real_type - { - double ans = zero; - const bool indexExists_in_A = find_unsorted_pair(i, j, iRow, jCol, nnz); - const bool indexExists_in_B = ( (i-A_rows_st>=0) && ((i-A_rows_st) == (j-A_cols_st)) && (i-A_rows_st) real_type { + double ans = zero; + + const bool indexExists_in_A = find_unsorted_pair(i, j, iRow, jCol, nnz); + const bool indexExists_in_B = + ((i - A_rows_st >= 0) && ((i - A_rows_st) == (j - A_cols_st)) && (i - A_rows_st) < nnz_to_copy); + const bool indexExists_in_A_not_replaced_by_B = (find_unsorted_pair(i, j, iRow, jCol, 0, A_nnz_st) || + find_unsorted_pair(i, j, iRow, jCol, A_nnz_st + nnz_to_copy, nnz)); + if(indexExists_in_A_not_replaced_by_B && indexExists_in_B) { + // this ele comes from sparse matrix A and B + ans = src_val + A_val; + } else if(indexExists_in_B) { + // this ele comes from diagonal matrix + ans = src_val; + } else if(indexExists_in_A) { + // this ele comes from sparse matrix A + ans = A_val; } - ); + return ans; + }); printMessage(fail, __func__); return fail; } /** - * @brief Copy a diagonal matrix into `A` as a subblock starting from the corner point ('A_rows_st', 'A_cols_st'). - * The non-zero elements start from 'A_nnz_st' will be replaced by the new elements. - * - * @pre 'A' must have exactly, or more than 'nnz_to_copy' rows after row 'A_rows_st' - * @pre 'A' must have exactly, or more than 'nnz_to_copy' rows after row 'A_cols_st' - * @pre The input diagonal matrix has leading diagonal elements from the nonzeros from `D`, i.e., `pattern` decides the non-zero pattern - * @pre The index vector `pattern` has same length as `D`, and `nnz_to_copy` nonzeros. - * @pre User must know the nonzero pattern of A. - * @pre Assuming the non-zero pattern of A won't change after the first call of this function - * @pre This function may replace the non-zero values and nonzero patterns for the undesired elements. - * @pre Allow up-to two elements setting to the same position in the sparse matrix. - */ + * @brief Copy a diagonal matrix into `A` as a subblock starting from the corner point ('A_rows_st', 'A_cols_st'). + * The non-zero elements start from 'A_nnz_st' will be replaced by the new elements. + * + * @pre 'A' must have exactly, or more than 'nnz_to_copy' rows after row 'A_rows_st' + * @pre 'A' must have exactly, or more than 'nnz_to_copy' rows after row 'A_cols_st' + * @pre The input diagonal matrix has leading diagonal elements from the nonzeros from `D`, i.e., `pattern` decides the + * non-zero pattern + * @pre The index vector `pattern` has same length as `D`, and `nnz_to_copy` nonzeros. + * @pre User must know the nonzero pattern of A. + * @pre Assuming the non-zero pattern of A won't change after the first call of this function + * @pre This function may replace the non-zero values and nonzero patterns for the undesired elements. + * @pre Allow up-to two elements setting to the same position in the sparse matrix. + */ int matrix_copy_diag_matrix_to_subblock_w_pattern(hiop::hiopMatrixDense& W, hiop::hiopMatrixSparse& A, hiop::hiopVector& D, @@ -1334,24 +1219,24 @@ class MatrixTestsSparse : public TestBase local_ordinal_type nnz_to_copy, const int rank = 0) { - assert(A.m() == W.m()); // W has same dimension as A - assert(A.n() == W.n()); // W has same dimension as A - + assert(A.m() == W.m()); // W has same dimension as A + assert(A.n() == W.n()); // W has same dimension as A + assert(A.m() >= nnz_to_copy + A_rows_st); assert(A.n() >= nnz_to_copy + A_cols_st); - - assert(nnz_to_copy+A_nnz_st <= A.numberOfNonzeros()); + + assert(nnz_to_copy + A_nnz_st <= A.numberOfNonzeros()); const local_ordinal_type N = getLocalSize(&D); assert(N == getLocalSize(&pattern)); - + const real_type A_val = half; const real_type D_val = two; A.setToConstant(A_val); D.setToConstant(D_val); pattern.setToConstant(zero); - if (rank== 0) { - for(int i=0; i real_type - { - double ans = zero; - const bool indexExists_in_A = find_unsorted_pair(i, j, iRow, jCol, nnz); - const bool indexExists_in_B = ( (i-A_rows_st>=0) && ((i-A_rows_st) == (j-A_cols_st)) && (i-A_rows_st) real_type { + double ans = zero; + + const bool indexExists_in_A = find_unsorted_pair(i, j, iRow, jCol, nnz); + const bool indexExists_in_B = + ((i - A_rows_st >= 0) && ((i - A_rows_st) == (j - A_cols_st)) && (i - A_rows_st) < nnz_to_copy); + const bool indexExists_in_A_not_replaced_by_B = (find_unsorted_pair(i, j, iRow, jCol, 0, A_nnz_st) || + find_unsorted_pair(i, j, iRow, jCol, A_nnz_st + nnz_to_copy, nnz)); + if(indexExists_in_A_not_replaced_by_B && indexExists_in_B) { + // this ele comes from sparse matrix A and B + ans = D_val + A_val; + } else if(indexExists_in_B) { + // this ele comes from diagonal matrix + ans = D_val; + } else if(indexExists_in_A) { + // this ele comes from sparse matrix A + ans = A_val; } - ); + return ans; + }); printMessage(fail, __func__); return fail; } /** - * @brief Copy selected columns from a diagonal matrix into `A` as a subblock starting from the corner point ('A_rows_st', 'A_cols_st'). - * The non-zero elements start from 'A_nnz_st' will be replaced by the new elements. - * - * @pre 'A' must have exactly, or more than 'getLocalSize(pattern)' rows after row 'A_rows_st' - * @pre 'A' must have exactly, or more than 'nnz_to_copy' columns after column 'A_cols_st' - * @pre The index vector `pattern` has `nnz_to_copy` nonzeros. - * @pre User must know the nonzero pattern of A. - * @pre Assuming the non-zero pattern of A won't change after the first call of this function - * @pre Otherwise, this function may replace the non-zero values and nonzero patterns for the undesired elements. - * @pre Allow up-to two elements setting to the same position in the sparse matrix. - */ + * @brief Copy selected columns from a diagonal matrix into `A` as a subblock starting from the corner point ('A_rows_st', + * 'A_cols_st'). The non-zero elements start from 'A_nnz_st' will be replaced by the new elements. + * + * @pre 'A' must have exactly, or more than 'getLocalSize(pattern)' rows after row 'A_rows_st' + * @pre 'A' must have exactly, or more than 'nnz_to_copy' columns after column 'A_cols_st' + * @pre The index vector `pattern` has `nnz_to_copy` nonzeros. + * @pre User must know the nonzero pattern of A. + * @pre Assuming the non-zero pattern of A won't change after the first call of this function + * @pre Otherwise, this function may replace the non-zero values and nonzero patterns for the undesired elements. + * @pre Allow up-to two elements setting to the same position in the sparse matrix. + */ int matrix_set_submatrix_to_constant_diag_w_colpattern(hiop::hiopMatrixDense& W, hiop::hiopMatrixSparse& A, hiop::hiopVector& pattern, @@ -1414,23 +1297,23 @@ class MatrixTestsSparse : public TestBase local_ordinal_type nnz_to_copy, const int rank = 0) { - assert(A.m() == W.m()); // W has same dimension as A - assert(A.n() == W.n()); // W has same dimension as A - + assert(A.m() == W.m()); // W has same dimension as A + assert(A.n() == W.n()); // W has same dimension as A + const local_ordinal_type N = getLocalSize(&pattern); - + assert(A.m() >= N + A_rows_st); assert(A.n() >= nnz_to_copy + A_cols_st); - - assert(nnz_to_copy+A_nnz_st <= A.numberOfNonzeros()); - + + assert(nnz_to_copy + A_nnz_st <= A.numberOfNonzeros()); + const real_type A_val = half; const real_type scal_val = two; A.setToConstant(A_val); pattern.setToConstant(zero); - if (rank== 0) { - for(int i=0; i real_type - { - double ans = zero; - const int i_src = i - A_rows_st; - const int j_src = j - A_cols_st; - const bool i_in_B = (i_src >= 0 && i_src < N ); - const bool j_in_B = (j_src >= 0 && j_src < nnz_to_copy); - const bool indexExists_in_A = find_unsorted_pair(i, j, iRow, jCol, nnz); - const bool indexExists_in_B = i_in_B && j_in_B && (ix_v[i_src]!=0.0 && j_src == nnz_to_copy + i_src - N); - const bool indexExists_in_A_not_replaced_by_B = ( find_unsorted_pair(i, j, iRow, jCol, 0, A_nnz_st) - || find_unsorted_pair(i, j, iRow, jCol, A_nnz_st+nnz_to_copy, nnz)); - if(indexExists_in_A_not_replaced_by_B && indexExists_in_B) { - // this ele comes from sparse matrix A and B - ans = scal_val + A_val; - } else if(indexExists_in_B) { - // this ele comes from diagonal matrix - ans = scal_val; - } else if(indexExists_in_A) { - // this ele comes from sparse matrix A - ans = A_val; - } - return ans; + const double* ix_v = pattern.local_data_host_const(); + + fail += verifyAnswer(&W, [=](local_ordinal_type i, local_ordinal_type j) -> real_type { + double ans = zero; + const int i_src = i - A_rows_st; + const int j_src = j - A_cols_st; + const bool i_in_B = (i_src >= 0 && i_src < N); + const bool j_in_B = (j_src >= 0 && j_src < nnz_to_copy); + const bool indexExists_in_A = find_unsorted_pair(i, j, iRow, jCol, nnz); + const bool indexExists_in_B = i_in_B && j_in_B && (ix_v[i_src] != 0.0 && j_src == nnz_to_copy + i_src - N); + const bool indexExists_in_A_not_replaced_by_B = (find_unsorted_pair(i, j, iRow, jCol, 0, A_nnz_st) || + find_unsorted_pair(i, j, iRow, jCol, A_nnz_st + nnz_to_copy, nnz)); + if(indexExists_in_A_not_replaced_by_B && indexExists_in_B) { + // this ele comes from sparse matrix A and B + ans = scal_val + A_val; + } else if(indexExists_in_B) { + // this ele comes from diagonal matrix + ans = scal_val; + } else if(indexExists_in_A) { + // this ele comes from sparse matrix A + ans = A_val; } - ); + return ans; + }); printMessage(fail, __func__); return fail; } /** - * @brief Copy selected rowsß from a diagonal matrix into `A` as a subblock starting from the corner point ('A_rows_st', 'A_cols_st'). - * The non-zero elements start from 'A_nnz_st' will be replaced by the new elements. - * - * @pre 'A' must have exactly, or more than 'getLocalSize(pattern)' columns after column 'A_col_st' - * @pre 'A' must have exactly, or more than 'nnz_to_copy' columns after column 'A_rowsß_st' - * @pre The index vector `pattern` has `nnz_to_copy` nonzeros. - * @pre User must know the nonzero pattern of A. - * @pre Assuming the non-zero pattern of A won't change after the first call of this function - * @pre Otherwise, this function may replace the non-zero values and nonzero patterns for the undesired elements. - * @pre Allow up-to two elements setting to the same position in the sparse matrix. - */ + * @brief Copy selected rowsß from a diagonal matrix into `A` as a subblock starting from the corner point ('A_rows_st', + * 'A_cols_st'). The non-zero elements start from 'A_nnz_st' will be replaced by the new elements. + * + * @pre 'A' must have exactly, or more than 'getLocalSize(pattern)' columns after column 'A_col_st' + * @pre 'A' must have exactly, or more than 'nnz_to_copy' columns after column 'A_rowsß_st' + * @pre The index vector `pattern` has `nnz_to_copy` nonzeros. + * @pre User must know the nonzero pattern of A. + * @pre Assuming the non-zero pattern of A won't change after the first call of this function + * @pre Otherwise, this function may replace the non-zero values and nonzero patterns for the undesired elements. + * @pre Allow up-to two elements setting to the same position in the sparse matrix. + */ int matrix_set_submatrix_to_constant_diag_w_rowpattern(hiop::hiopMatrixDense& W, hiop::hiopMatrixSparse& A, hiop::hiopVector& pattern, @@ -1498,23 +1378,23 @@ class MatrixTestsSparse : public TestBase local_ordinal_type nnz_to_copy, const int rank = 0) { - assert(A.m() == W.m()); // W has same dimension as A - assert(A.n() == W.n()); // W has same dimension as A - + assert(A.m() == W.m()); // W has same dimension as A + assert(A.n() == W.n()); // W has same dimension as A + const local_ordinal_type N = getLocalSize(&pattern); - + assert(A.n() >= N + A_cols_st); assert(A.m() >= nnz_to_copy + A_rows_st); - - assert(nnz_to_copy+A_nnz_st <= A.numberOfNonzeros()); - + + assert(nnz_to_copy + A_nnz_st <= A.numberOfNonzeros()); + const real_type A_val = half; const real_type scal_val = two; A.setToConstant(A_val); pattern.setToConstant(zero); - if (rank== 0) { - for(int i=0; i real_type - { - double ans = zero; - const int i_src = i - A_rows_st; - const int j_src = j - A_cols_st; - const bool i_in_B = (i_src >= 0 && i_src < nnz_to_copy); - const bool j_in_B = (j_src >= 0 && j_src < N); - const bool indexExists_in_A = find_unsorted_pair(i, j, iRow, jCol, nnz); - const bool indexExists_in_B = i_in_B && j_in_B && (ix_v[j_src]!=0.0 && i_src == nnz_to_copy + j_src - N); - const bool indexExists_in_A_not_replaced_by_B = ( find_unsorted_pair(i, j, iRow, jCol, 0, A_nnz_st) - || find_unsorted_pair(i, j, iRow, jCol, A_nnz_st+nnz_to_copy, nnz)); - if(indexExists_in_A_not_replaced_by_B && indexExists_in_B) { - // this ele comes from sparse matrix A and B - ans = scal_val + A_val; - } else if(indexExists_in_B) { - // this ele comes from diagonal matrix - ans = scal_val; - } else if(indexExists_in_A) { - // this ele comes from sparse matrix A - ans = A_val; - } - return ans; + const double* ix_v = pattern.local_data_host_const(); + + fail += verifyAnswer(&W, [=](local_ordinal_type i, local_ordinal_type j) -> real_type { + double ans = zero; + const int i_src = i - A_rows_st; + const int j_src = j - A_cols_st; + const bool i_in_B = (i_src >= 0 && i_src < nnz_to_copy); + const bool j_in_B = (j_src >= 0 && j_src < N); + const bool indexExists_in_A = find_unsorted_pair(i, j, iRow, jCol, nnz); + const bool indexExists_in_B = i_in_B && j_in_B && (ix_v[j_src] != 0.0 && i_src == nnz_to_copy + j_src - N); + const bool indexExists_in_A_not_replaced_by_B = (find_unsorted_pair(i, j, iRow, jCol, 0, A_nnz_st) || + find_unsorted_pair(i, j, iRow, jCol, A_nnz_st + nnz_to_copy, nnz)); + if(indexExists_in_A_not_replaced_by_B && indexExists_in_B) { + // this ele comes from sparse matrix A and B + ans = scal_val + A_val; + } else if(indexExists_in_B) { + // this ele comes from diagonal matrix + ans = scal_val; + } else if(indexExists_in_A) { + // this ele comes from sparse matrix A + ans = A_val; } - ); + return ans; + }); printMessage(fail, __func__); return fail; } /** - * @brief set matrix `A` as [C -I I 0 0; D 0 0 -I I] - * - * @pre 'C' must have same number of cols as `D` - * @pre nnz of 'A' is predetermined - */ - bool matrix_set_Jac_FR( hiop::hiopMatrixDense& W, - hiop::hiopMatrixSparse& A, - hiop::hiopMatrixSparse& C, - hiop::hiopMatrixSparse& D, - const int rank = 0) + * @brief set matrix `A` as [C -I I 0 0; D 0 0 -I I] + * + * @pre 'C' must have same number of cols as `D` + * @pre nnz of 'A' is predetermined + */ + bool matrix_set_Jac_FR(hiop::hiopMatrixDense& W, + hiop::hiopMatrixSparse& A, + hiop::hiopMatrixSparse& C, + hiop::hiopMatrixSparse& D, + const int rank = 0) { - assert(A.m() == W.m()); // W has same dimension as A - assert(A.n() == W.n()); // W has same dimension as A - assert(C.n() == D.n()); // C has same number of cols as D + assert(A.m() == W.m()); // W has same dimension as A + assert(A.n() == W.n()); // W has same dimension as A + assert(C.n() == D.n()); // C has same number of cols as D int fail = 0; const real_type C_val = half; @@ -1600,50 +1477,44 @@ class MatrixTestsSparse : public TestBase // copy to a dense matrix A.copy_to(W); - fail += verifyAnswer(&W, - [=] (local_ordinal_type i, local_ordinal_type j) -> real_type - { - double ans = zero; - // this ele comes from sparse matrix C - if(i=mC && i=mC && i real_type { + double ans = zero; + // this ele comes from sparse matrix C + if(i < mC && j < nC) { + const bool indexExists = find_unsorted_pair(i, j, C_iRow, C_jCol, C_nnz); + if(indexExists) { + ans = C_val; + } + } else if(i < mC + mD && j < nD) { + // this ele comes from sparse matrix D + const bool indexExists = find_unsorted_pair(i - mC, j, D_iRow, D_jCol, D_nnz); + if(indexExists) { + ans = D_val; } - return ans; + } else if(i < mC && j == i + nC) { + // this is -I in [C -I I 0 0] + ans = -one; + } else if(i < mC && j == i + nC + mC) { + // this is I in [C -I I 0 0] + ans = one; + } else if(i >= mC && i < mC + mD && j == nC + mC + i) { + // this is -I in [D 0 0 -I I] + ans = -one; + } else if(i >= mC && i < mC + mD && j == nC + mC + mD + i) { + // this is I in [D 0 0 -I I] + ans = one; } - ); + return ans; + }); printMessage(fail, __func__, rank); return fail; } private: - /// TODO: The sparse matrix is not distributed - all is local. + /// TODO: The sparse matrix is not distributed - all is local. // Rename functions to remove redundant "local" from their names? - virtual void setLocalElement( - hiop::hiopVector* x, - const local_ordinal_type i, - const real_type val) = 0; + virtual void setLocalElement(hiop::hiopVector* x, const local_ordinal_type i, const real_type val) = 0; virtual real_type getLocalElement(const hiop::hiopMatrix* a, local_ordinal_type i, local_ordinal_type j) = 0; virtual real_type getLocalElement(const hiop::hiopVector* x, local_ordinal_type i) = 0; virtual real_type* getMatrixData(hiop::hiopMatrixSparse* a) = 0; @@ -1652,14 +1523,14 @@ class MatrixTestsSparse : public TestBase virtual const local_ordinal_type* getColumnIndices(const hiop::hiopMatrixSparse* a) = 0; virtual local_ordinal_type getLocalSize(const hiop::hiopVector* x) = 0; virtual int verifyAnswer(hiop::hiopMatrixSparse* A, real_type answer) = 0; - virtual int verifyAnswer(hiop::hiopMatrix* A, local_ordinal_type nnz_st, local_ordinal_type nnz_ed, const double answer) = 0; - virtual int verifyAnswer( - hiop::hiopMatrixDense* A, - std::function expect) = 0; + virtual int verifyAnswer(hiop::hiopMatrix* A, + local_ordinal_type nnz_st, + local_ordinal_type nnz_ed, + const double answer) = 0; + virtual int verifyAnswer(hiop::hiopMatrixDense* A, + std::function expect) = 0; virtual int verifyAnswer(hiop::hiopVector* x, real_type answer) = 0; - virtual int verifyAnswer( - hiop::hiopVector* x, - std::function expect) = 0; + virtual int verifyAnswer(hiop::hiopVector* x, std::function expect) = 0; virtual local_ordinal_type* numNonzerosPerRow(hiop::hiopMatrixSparse* mat) = 0; virtual local_ordinal_type* numNonzerosPerCol(hiop::hiopMatrixSparse* mat) = 0; virtual void maybeCopyToDev(hiop::hiopMatrixSparse*) = 0; @@ -1679,10 +1550,8 @@ class MatrixTestsSparse : public TestBase // linearly scans an unsorted array static bool find_unsorted_pair(int valA, int valB, const int* arrA, const int* arrB, size_t arrslen) { - for (int i = 0; i < static_cast(arrslen); i++) - { - if (arrA[i] == valA && arrB[i] == valB) - { + for(int i = 0; i < static_cast(arrslen); i++) { + if(arrA[i] == valA && arrB[i] == valB) { return true; } } @@ -1692,10 +1561,8 @@ class MatrixTestsSparse : public TestBase // linearly scans an unsorted array within range [nnz_st, nnz_ed) static bool find_unsorted_pair(int valA, int valB, const int* arrA, const int* arrB, size_t idx_st, size_t idx_ed) { - for (int i = idx_st; i < static_cast(idx_ed); i++) - { - if (arrA[i] == valA && arrB[i] == valB) - { + for(int i = idx_st; i < static_cast(idx_ed); i++) { + if(arrA[i] == valA && arrB[i] == valB) { return true; } } @@ -1703,4 +1570,5 @@ class MatrixTestsSparse : public TestBase } }; -}} // namespace hiop::tests +} // namespace tests +} // namespace hiop diff --git a/tests/LinAlg/matrixTestsSparseTriplet.cpp b/tests/LinAlg/matrixTestsSparseTriplet.cpp index 1a83ce124..fca4c8b51 100644 --- a/tests/LinAlg/matrixTestsSparseTriplet.cpp +++ b/tests/LinAlg/matrixTestsSparseTriplet.cpp @@ -53,7 +53,7 @@ * @author Slaven Peles , PNNL * @author Cameron Rutherford , PNNL * @author Jake K. Ryan , PNNL - * + * */ #include @@ -61,52 +61,49 @@ #include "matrixTestsSparseTriplet.hpp" #include -namespace hiop{ namespace tests { +namespace hiop +{ +namespace tests +{ -/// Set `i`th element of vector `x` -void MatrixTestsSparseTriplet::setLocalElement( - hiop::hiopVector* xvec, - const local_ordinal_type i, - const real_type val) +/// Set `i`th element of vector `x` +void MatrixTestsSparseTriplet::setLocalElement(hiop::hiopVector* xvec, const local_ordinal_type i, const real_type val) { auto x = dynamic_cast(xvec); - if(x != nullptr) - { + if(x != nullptr) { real_type* data = x->local_data(); data[i] = val; - } - else THROW_NULL_DEREF; + } else + THROW_NULL_DEREF; } /// Returns element (i,j) of a dense matrix `A`. /// First need to retrieve hiopMatrixDense from the abstract interface -real_type MatrixTestsSparseTriplet::getLocalElement( - const hiop::hiopMatrix* A, - local_ordinal_type row, - local_ordinal_type col) +real_type MatrixTestsSparseTriplet::getLocalElement(const hiop::hiopMatrix* A, + local_ordinal_type row, + local_ordinal_type col) { auto mat = dynamic_cast(A); - - if (mat != nullptr) - { + + if(mat != nullptr) { const double* M = mat->local_data_const(); - //return M[row][col]; - return M[row*mat->get_local_size_n()+col]; + // return M[row][col]; + return M[row * mat->get_local_size_n() + col]; } - else THROW_NULL_DEREF; + else + THROW_NULL_DEREF; } /// Returns element _i_ of vector _x_. /// First need to retrieve hiopVectorPar from the abstract interface -real_type MatrixTestsSparseTriplet::getLocalElement( - const hiop::hiopVector* x, - local_ordinal_type i) +real_type MatrixTestsSparseTriplet::getLocalElement(const hiop::hiopVector* x, local_ordinal_type i) { const hiop::hiopVectorPar* xvec = dynamic_cast(x); if(xvec != nullptr) return xvec->local_data_const()[i]; - else THROW_NULL_DEREF; + else + THROW_NULL_DEREF; } real_type* MatrixTestsSparseTriplet::getMatrixData(hiop::hiopMatrixSparse* A) @@ -123,14 +120,12 @@ real_type MatrixTestsSparseTriplet::getMatrixData(hiop::hiopMatrixSparse* A, loc auto* jCol = mat->j_col(); auto nnz = mat->numberOfNonzeros(); - for (auto k=0; k< nnz; i++) - { - if(iRow[k]==i && jCol[k]==j){ + for(auto k = 0; k < nnz; i++) { + if(iRow[k] == i && jCol[k] == j) { return val[k]; } // assume elements are row-major ordered. - if(iRow[k]>=i) - break; + if(iRow[k] >= i) break; } return zero; } @@ -153,7 +148,8 @@ int MatrixTestsSparseTriplet::getLocalSize(const hiop::hiopVector* x) const hiop::hiopVectorPar* xvec = dynamic_cast(x); if(xvec != nullptr) return static_cast(xvec->get_local_size()); - else THROW_NULL_DEREF; + else + THROW_NULL_DEREF; } /** @@ -165,16 +161,13 @@ int MatrixTestsSparseTriplet::getLocalSize(const hiop::hiopVector* x) [[nodiscard]] int MatrixTestsSparseTriplet::verifyAnswer(hiop::hiopMatrixSparse* A, const double answer) { - if(A == nullptr) - return 1; + if(A == nullptr) return 1; auto mat = dynamic_cast(A); const local_ordinal_type nnz = mat->numberOfNonzeros(); const real_type* values = mat->M(); int fail = 0; - for (local_ordinal_type i=0; i(A); const real_type* values = mat->M(); int fail = 0; - for (local_ordinal_type i=nnz_st; i expect) +int MatrixTestsSparseTriplet::verifyAnswer(hiop::hiopMatrixDense* A, + std::function expect) { - //auto A = dynamic_cast(Amat); + // auto A = dynamic_cast(Amat); assert(A->get_local_size_n() == A->n() && "Matrix should not be distributed"); const local_ordinal_type M = A->get_local_size_m(); const local_ordinal_type N = A->get_local_size_n(); int fail = 0; - for (local_ordinal_type i=0; i expect) +[[nodiscard]] +int MatrixTestsSparseTriplet::verifyAnswer(hiop::hiopVector* x, std::function expect) { const local_ordinal_type N = getLocalSize(x); int local_fail = 0; - for (int i=0; im()]; std::memset(sparsity_pattern, 0, sizeof(local_ordinal_type) * mat->m()); - for(local_ordinal_type i = 0; i < nnz; i++) - { + for(local_ordinal_type i = 0; i < nnz; i++) { sparsity_pattern[iRow[i]]++; } return sparsity_pattern; @@ -293,38 +275,33 @@ local_ordinal_type* MatrixTestsSparseTriplet::numNonzerosPerCol(hiop::hiopMatrix auto sparsity_pattern = new local_ordinal_type[mat->n()]; std::memset(sparsity_pattern, 0, sizeof(local_ordinal_type) * mat->n()); - for(local_ordinal_type i = 0; i < nnz; i++) - { + for(local_ordinal_type i = 0; i < nnz; i++) { sparsity_pattern[jCol[i]]++; } return sparsity_pattern; } -void MatrixTestsSparseTriplet::initializeMatrix( - hiop::hiopMatrixSparse* mat, - local_ordinal_type entries_per_row) +void MatrixTestsSparseTriplet::initializeMatrix(hiop::hiopMatrixSparse* mat, local_ordinal_type entries_per_row) { auto* A = dynamic_cast(mat); - local_ordinal_type * iRow = A->i_row(); - local_ordinal_type * jCol = A->j_col(); - double * val = A->M(); + local_ordinal_type* iRow = A->i_row(); + local_ordinal_type* jCol = A->j_col(); + double* val = A->M(); local_ordinal_type m = A->m(); local_ordinal_type n = A->n(); assert(A->numberOfNonzeros() == m * entries_per_row && "Matrix initialized with insufficent number of non-zero entries"); - for(local_ordinal_type row = 0, col = 0, i = 0; row < m && i < A->numberOfNonzeros(); row++, col = 0) - { - for(local_ordinal_type j=0; jnumberOfNonzeros(); row++, col = 0) { + for(local_ordinal_type j = 0; j < entries_per_row - 1; i++, j++, col += n / entries_per_row) { iRow[i] = row; jCol[i] = col; val[i] = one; } iRow[i] = row; - jCol[i] = n-1; + jCol[i] = n - 1; val[i++] = one; } } @@ -335,13 +312,13 @@ void MatrixTestsSparseTriplet::initializeMatrix( * device memory will copy from device when this is called, CPU-bound classes * will no-op. */ -void MatrixTestsSparseTriplet::maybeCopyToDev(hiop::hiopMatrixSparse*) { } +void MatrixTestsSparseTriplet::maybeCopyToDev(hiop::hiopMatrixSparse*) {} /** * @brief placeholder on CPU-bound classes. * @see MatrixTestsSparseTriplet::maybeCopyToDev */ -void MatrixTestsSparseTriplet::maybeCopyFromDev(hiop::hiopMatrixSparse*) { } +void MatrixTestsSparseTriplet::maybeCopyFromDev(hiop::hiopMatrixSparse*) {} int MatrixTestsSparseTriplet::getLocalElement(hiop::hiopVectorInt* xvec, int idx) const { @@ -362,4 +339,5 @@ void MatrixTestsSparseTriplet::setLocalElement(hiop::hiopVectorInt* xvec, int id } } -}} // namespace hiop::tests +} // namespace tests +} // namespace hiop diff --git a/tests/LinAlg/matrixTestsSparseTriplet.hpp b/tests/LinAlg/matrixTestsSparseTriplet.hpp index 8a9f192f2..3ef2f058e 100644 --- a/tests/LinAlg/matrixTestsSparseTriplet.hpp +++ b/tests/LinAlg/matrixTestsSparseTriplet.hpp @@ -53,7 +53,7 @@ * @author Slaven Peles , PNNL * @author Cameron Rutherford , PNNL * @author Jake K. Ryan , PNNL - * + * */ #pragma once @@ -62,43 +62,42 @@ #include #include "matrixTestsSparse.hpp" -namespace hiop { namespace tests { +namespace hiop +{ +namespace tests +{ /** * @brief Tests are re-implemented here if necessary for SparseTriplet Matrices, * as the data layout is significantly different compares to dense matrices. * Any tests that would modify the sparsity pattern are not implemented. * Any tests that would make calls to non-implemented/needed functions are not implemented. - * -*/ + * + */ class MatrixTestsSparseTriplet : public MatrixTestsSparse { public: MatrixTestsSparseTriplet() {} - virtual ~MatrixTestsSparseTriplet(){} - + virtual ~MatrixTestsSparseTriplet() {} private: - virtual void setLocalElement( - hiop::hiopVector *_x, - const local_ordinal_type i, - const real_type val) override; - virtual real_type getLocalElement(const hiop::hiopMatrix *a, local_ordinal_type i, local_ordinal_type j) override; - virtual real_type getLocalElement(const hiop::hiopVector *x, local_ordinal_type i) override; + virtual void setLocalElement(hiop::hiopVector* _x, const local_ordinal_type i, const real_type val) override; + virtual real_type getLocalElement(const hiop::hiopMatrix* a, local_ordinal_type i, local_ordinal_type j) override; + virtual real_type getLocalElement(const hiop::hiopVector* x, local_ordinal_type i) override; virtual real_type* getMatrixData(hiop::hiopMatrixSparse* a) override; virtual real_type getMatrixData(hiop::hiopMatrixSparse* a, local_ordinal_type i, local_ordinal_type j) override; virtual const local_ordinal_type* getRowIndices(const hiop::hiopMatrixSparse* a) override; virtual const local_ordinal_type* getColumnIndices(const hiop::hiopMatrixSparse* a) override; - virtual local_ordinal_type getLocalSize(const hiop::hiopVector *x) override; + virtual local_ordinal_type getLocalSize(const hiop::hiopVector* x) override; virtual int verifyAnswer(hiop::hiopMatrixSparse* A, real_type answer) override; - virtual int verifyAnswer(hiop::hiopMatrix* A, local_ordinal_type nnz_st, local_ordinal_type nnz_ed, const double answer) override; - virtual int verifyAnswer( - hiop::hiopMatrixDense* A, - std::function expect) override; - virtual int verifyAnswer(hiop::hiopVector *x, real_type answer) override; - virtual int verifyAnswer( - hiop::hiopVector *x, - std::function expect) override; + virtual int verifyAnswer(hiop::hiopMatrix* A, + local_ordinal_type nnz_st, + local_ordinal_type nnz_ed, + const double answer) override; + virtual int verifyAnswer(hiop::hiopMatrixDense* A, + std::function expect) override; + virtual int verifyAnswer(hiop::hiopVector* x, real_type answer) override; + virtual int verifyAnswer(hiop::hiopVector* x, std::function expect) override; virtual local_ordinal_type* numNonzerosPerRow(hiop::hiopMatrixSparse* mat) override; virtual local_ordinal_type* numNonzerosPerCol(hiop::hiopMatrixSparse* mat) override; virtual void maybeCopyToDev(hiop::hiopMatrixSparse*) override; @@ -106,8 +105,10 @@ class MatrixTestsSparseTriplet : public MatrixTestsSparse virtual int getLocalElement(hiop::hiopVectorInt*, int) const override; virtual void setLocalElement(hiop::hiopVectorInt*, int, int) const override; + public: virtual void initializeMatrix(hiop::hiopMatrixSparse* mat, local_ordinal_type entries_per_row) override; }; -}} // namespace hiop::tests +} // namespace tests +} // namespace hiop diff --git a/tests/LinAlg/matrixTestsSymSparse.hpp b/tests/LinAlg/matrixTestsSymSparse.hpp index c6c64e8ff..f9c0a872f 100644 --- a/tests/LinAlg/matrixTestsSymSparse.hpp +++ b/tests/LinAlg/matrixTestsSymSparse.hpp @@ -53,7 +53,7 @@ * @author Slaven Peles , PNNL * @author Cameron Rutherford , PNNL * @author Jake K. Ryan , PNNL - * + * */ #pragma once @@ -67,7 +67,10 @@ #include #include "testBase.hpp" -namespace hiop { namespace tests { +namespace hiop +{ +namespace tests +{ /** * @brief Tests are re-implemented here if necessary for SparseTriplet Matrices, @@ -75,27 +78,20 @@ namespace hiop { namespace tests { * * Any tests that would modify the sparsity pattern are not implemented. * Any tests that would make calls to non-implemented/needed functions are not implemented. - * -*/ + * + */ class MatrixTestsSymSparse : public TestBase { public: MatrixTestsSymSparse() {} - virtual ~MatrixTestsSymSparse(){} - + virtual ~MatrixTestsSymSparse() {} + /// @brief Test y <- beta * y + alpha * A * x - bool matrixTimesVec( - hiop::hiopMatrixSparse& A, - hiop::hiopVector& y, - hiop::hiopVector& x) + bool matrixTimesVec(hiop::hiopMatrixSparse& A, hiop::hiopVector& y, hiop::hiopVector& x) { assert(y.get_size() == A.m() && "Did you pass in vectors of the correct sizes?"); assert(x.get_size() == A.n() && "Did you pass in vectors of the correct sizes?"); - const real_type alpha = two, - beta = half, - A_val = half, - y_val = two, - x_val = three; + const real_type alpha = two, beta = half, A_val = half, y_val = two, x_val = three; int fail = 0; y.setToConstant(y_val); @@ -105,30 +101,26 @@ class MatrixTestsSymSparse : public TestBase A.timesVec(beta, y, alpha, x); - fail += verifyAnswer(&y, - [=] (local_ordinal_type i) - { - const local_ordinal_type numValuesInRow = sparsity_pattern[i]; - return (beta * y_val) + (alpha * A_val * x_val * numValuesInRow); - }); + fail += verifyAnswer(&y, [=](local_ordinal_type i) { + const local_ordinal_type numValuesInRow = sparsity_pattern[i]; + return (beta * y_val) + (alpha * A_val * x_val * numValuesInRow); + }); - delete [] sparsity_pattern; + delete[] sparsity_pattern; printMessage(fail, __func__); return fail; } - /** * Block of W += alpha*A * * @pre W is square * @pre A is symmetric sparse matrix - * + * */ - bool matrixAddUpperTriangleToSymDenseMatrixUpperTriangle( - hiop::hiopMatrixDense& W, - hiop::hiopMatrixSparse& A, // sym sparse matrix - const int rank=0) + bool matrixAddUpperTriangleToSymDenseMatrixUpperTriangle(hiop::hiopMatrixDense& W, + hiop::hiopMatrixSparse& A, // sym sparse matrix + const int rank = 0) { const local_ordinal_type N_loc = W.get_local_size_n(); const local_ordinal_type A_M = A.m(); @@ -139,51 +131,42 @@ class MatrixTestsSymSparse : public TestBase // The offset must be on dense matrix diagonal const local_ordinal_type start_diag = N_loc - A_N_loc; - const real_type alpha = half, - A_val = half, - W_val = one; + const real_type alpha = half, A_val = half, W_val = one; int fail = 0; // Check with non-1 alpha A.setToConstant(A_val); W.setToConstant(W_val); A.addUpperTriangleToSymDenseMatrixUpperTriangle(start_diag, alpha, W); - + // get sparsity pattern const auto* iRow = getRowIndices(&A); const auto* jCol = getColumnIndices(&A); auto nnz = A.numberOfNonzeros(); - fail += verifyAnswer(&W, - [=] (local_ordinal_type i, local_ordinal_type j) -> real_type - { - // check if (i, j) within bounds of A - // then check if (i, j) within upper triangle of W - const bool isUpperTriangle = ( - i>=start_diag && i=start_diag && j= i); - - int i_sp = i - start_diag; - int j_sp = j - start_diag; - // only nonzero entries in A will be added - const bool indexExists = (find_unsorted_pair(i_sp, j_sp, iRow, jCol, nnz) || find_unsorted_pair(j_sp, i_sp, iRow, jCol, nnz)); - real_type ans = (isUpperTriangle && indexExists) ? W_val + A_val*alpha : W_val; // 1 + .5 * .5 = 1.25 - return ans; - }); + fail += verifyAnswer(&W, [=](local_ordinal_type i, local_ordinal_type j) -> real_type { + // check if (i, j) within bounds of A + // then check if (i, j) within upper triangle of W + const bool isUpperTriangle = + (i >= start_diag && i < start_diag + A_M && j >= start_diag && j < start_diag + A_N_loc && j >= i); + + int i_sp = i - start_diag; + int j_sp = j - start_diag; + // only nonzero entries in A will be added + const bool indexExists = + (find_unsorted_pair(i_sp, j_sp, iRow, jCol, nnz) || find_unsorted_pair(j_sp, i_sp, iRow, jCol, nnz)); + real_type ans = (isUpperTriangle && indexExists) ? W_val + A_val * alpha : W_val; // 1 + .5 * .5 = 1.25 + return ans; + }); printMessage(fail, __func__, rank); return fail; } - /// @todo Document this test for `startingAtAddSubDiagonalToStartingAt` - bool matrixStartingAtAddSubDiagonalToStartingAt( - hiop::hiopVector& W, - hiop::hiopMatrixSparse& A, - const int rank = 0) + bool matrixStartingAtAddSubDiagonalToStartingAt(hiop::hiopVector& W, hiop::hiopMatrixSparse& A, const int rank = 0) { - assert(W.get_size() == A.m()); // A is square matrix - + assert(W.get_size() == A.m()); // A is square matrix + const auto start_src_idx = 0; const auto start_dest_idx = 0; const auto num_elems = W.get_size(); @@ -198,13 +181,11 @@ class MatrixTestsSymSparse : public TestBase const auto* iRow = getRowIndices(&A); const auto* jCol = getColumnIndices(&A); auto nnz = A.numberOfNonzeros(); - const auto fail = verifyAnswer(&W, - [=](local_ordinal_type i) -> real_type - { - const bool indexExists = find_unsorted_pair(i, i, iRow, jCol, nnz); - return (indexExists) ? (W_val + A_val * alpha) : W_val; - }); - + const auto fail = verifyAnswer(&W, [=](local_ordinal_type i) -> real_type { + const bool indexExists = find_unsorted_pair(i, i, iRow, jCol, nnz); + return (indexExists) ? (W_val + A_val * alpha) : W_val; + }); + printMessage(fail, __func__, rank); return fail; } @@ -216,16 +197,16 @@ class MatrixTestsSymSparse : public TestBase hiop::hiopVector& diag, const int rank = 0) { - assert(A.m() == A.n()); // A is square matrix - assert(diag.get_size() == B.m()); // B is square matrix - assert(A.m() >= B.m()); // A is larger or equal to B - assert(W.m() == W.n()); // W is square matrix - assert(W.m() == A.m()); // W has same dim as A - + assert(A.m() == A.n()); // A is square matrix + assert(diag.get_size() == B.m()); // B is square matrix + assert(A.m() >= B.m()); // A is larger or equal to B + assert(W.m() == W.n()); // W is square matrix + assert(W.m() == A.m()); // W has same dim as A + const local_ordinal_type A_M = A.m(); const local_ordinal_type A_N_loc = A.n(); const local_ordinal_type B_M = B.m(); - + const auto B_val = one; const auto W_val = zero; const auto D_val = two; @@ -235,69 +216,62 @@ class MatrixTestsSymSparse : public TestBase B.setToConstant(B_val); diag.setToConstant(D_val); - + A.set_Hess_FR(B, A.i_row(), A.j_col(), A.M(), diag); // copy to a dense matrix W.setToConstant(W_val); A.addUpperTriangleToSymDenseMatrixUpperTriangle(start_diag, alpha, W); - + // get sparsity pattern const auto* iRowB = getRowIndices(&B); const auto* jColB = getColumnIndices(&B); auto nnzB = B.numberOfNonzeros(); - fail += verifyAnswer(&W, - [=] (local_ordinal_type i, local_ordinal_type j) -> real_type - { - // check if (i, j) within bounds of A - // then check if (i, j) within upper triangle of W - const bool isUpperTriangle = ( - i>=start_diag && i=start_diag && j= i); - - // only nonzero entries in A will be added to W - int i_sp = i - start_diag; - int j_sp = j - start_diag; - const bool sp_index_B = (i real_type { + // check if (i, j) within bounds of A + // then check if (i, j) within upper triangle of W + const bool isUpperTriangle = + (i >= start_diag && i < start_diag + A_M && j >= start_diag && j < start_diag + A_N_loc && j >= i); + + // only nonzero entries in A will be added to W + int i_sp = i - start_diag; + int j_sp = j - start_diag; + const bool sp_index_B = (i < B_M && j < B_M) && (find_unsorted_pair(i_sp, j_sp, iRowB, jColB, nnzB) || + find_unsorted_pair(j_sp, i_sp, iRowB, jColB, nnzB)); + const bool diag_index = (i < B_M && j < B_M) && (i == j); + + real_type ans; + + if(isUpperTriangle && sp_index_B && diag_index) { + // found in sparse matirx B and it is a diagonal entry in B + ans = B_val + D_val; + } else if(isUpperTriangle && sp_index_B) { + // found in sparse matirx B + ans = B_val; + } else if(isUpperTriangle && diag_index) { + // NOT found in sparse matirx B. It comes from extra diag term + ans = D_val; + } else { + ans = W_val; } - ); + return ans; + }); printMessage(fail, __func__, rank); return fail; } protected: - /// TODO: The sparse matrix is not distributed - all is local. + /// TODO: The sparse matrix is not distributed - all is local. virtual real_type getLocalElement(const hiop::hiopMatrix* a, local_ordinal_type i, local_ordinal_type j) = 0; virtual real_type getLocalElement(const hiop::hiopVector* x, local_ordinal_type i) = 0; virtual real_type* getMatrixData(hiop::hiopMatrixSparse* a) = 0; virtual const local_ordinal_type* getRowIndices(const hiop::hiopMatrixSparse* a) = 0; virtual const local_ordinal_type* getColumnIndices(const hiop::hiopMatrixSparse* a) = 0; virtual local_ordinal_type getLocalSize(const hiop::hiopVector* x) = 0; - virtual int verifyAnswer( - hiop::hiopMatrixDense* A, - std::function expect) = 0; - virtual int verifyAnswer( - hiop::hiopVector* x, - std::function expect) = 0; + virtual int verifyAnswer(hiop::hiopMatrixDense* A, + std::function expect) = 0; + virtual int verifyAnswer(hiop::hiopVector* x, std::function expect) = 0; virtual local_ordinal_type* numNonzerosPerRow(hiop::hiopMatrixSparse* mat) = 0; virtual local_ordinal_type* numNonzerosPerCol(hiop::hiopMatrixSparse* mat) = 0; @@ -305,10 +279,8 @@ class MatrixTestsSymSparse : public TestBase // linearly scans an unsorted array static bool find_unsorted_pair(int valA, int valB, const int* arrA, const int* arrB, size_t arrslen) { - for (int i = 0; i < static_cast(arrslen); i++) - { - if (arrA[i] == valA && arrB[i] == valB) - { + for(int i = 0; i < static_cast(arrslen); i++) { + if(arrA[i] == valA && arrB[i] == valB) { return true; } } @@ -316,4 +288,5 @@ class MatrixTestsSymSparse : public TestBase } }; -}} // namespace hiop::tests +} // namespace tests +} // namespace hiop diff --git a/tests/LinAlg/matrixTestsSymSparseTriplet.cpp b/tests/LinAlg/matrixTestsSymSparseTriplet.cpp index f5abadd27..83bbadfe4 100644 --- a/tests/LinAlg/matrixTestsSymSparseTriplet.cpp +++ b/tests/LinAlg/matrixTestsSymSparseTriplet.cpp @@ -53,44 +53,45 @@ * @author Slaven Peles , PNNL * @author Cameron Rutherford , PNNL * @author Jake K. Ryan , PNNL - * + * */ #include #include #include "matrixTestsSymSparseTriplet.hpp" -namespace hiop{ namespace tests { +namespace hiop +{ +namespace tests +{ /// Returns element (i,j) of a dense matrix `A`. /// First need to retrieve hiopMatrixDense from the abstract interface -real_type MatrixTestsSymSparseTriplet::getLocalElement( - const hiop::hiopMatrix* A, - local_ordinal_type row, - local_ordinal_type col) +real_type MatrixTestsSymSparseTriplet::getLocalElement(const hiop::hiopMatrix* A, + local_ordinal_type row, + local_ordinal_type col) { auto mat = dynamic_cast(A); - - if (mat != nullptr) - { + + if(mat != nullptr) { double* M = mat->local_data_const(); - //return M[row][col]; - return M[row*mat->n()+col]; + // return M[row][col]; + return M[row * mat->n() + col]; } - else THROW_NULL_DEREF; + else + THROW_NULL_DEREF; } /// Returns element _i_ of vector _x_. /// First need to retrieve hiopVectorPar from the abstract interface -real_type MatrixTestsSymSparseTriplet::getLocalElement( - const hiop::hiopVector* x, - local_ordinal_type i) +real_type MatrixTestsSymSparseTriplet::getLocalElement(const hiop::hiopVector* x, local_ordinal_type i) { const hiop::hiopVectorPar* xvec = dynamic_cast(x); if(xvec != nullptr) return xvec->local_data_const()[i]; - else THROW_NULL_DEREF; + else + THROW_NULL_DEREF; } real_type* MatrixTestsSymSparseTriplet::getMatrixData(hiop::hiopMatrixSparse* A) @@ -117,30 +118,26 @@ int MatrixTestsSymSparseTriplet::getLocalSize(const hiop::hiopVector* x) const hiop::hiopVectorPar* xvec = dynamic_cast(x); if(xvec != nullptr) return static_cast(xvec->get_local_size()); - else THROW_NULL_DEREF; + else + THROW_NULL_DEREF; } - /* * Pass a function-like object to calculate the expected * answer dynamically, based on the row and column */ [[nodiscard]] -int MatrixTestsSymSparseTriplet::verifyAnswer( - hiop::hiopMatrixDense* A, - std::function expect) +int MatrixTestsSymSparseTriplet::verifyAnswer(hiop::hiopMatrixDense* A, + std::function expect) { - //auto A = dynamic_cast(Amat); + // auto A = dynamic_cast(Amat); assert(A->get_local_size_n() == A->n() && "Matrix should not be distributed"); const local_ordinal_type M = A->get_local_size_m(); const local_ordinal_type N = A->get_local_size_n(); int fail = 0; - for (local_ordinal_type i=0; i expect) +int MatrixTestsSymSparseTriplet::verifyAnswer(hiop::hiopVector* x, std::function expect) { const local_ordinal_type N = getLocalSize(x); int local_fail = 0; - for (int i=0; im()]; std::memset(sparsity_pattern, 0, sizeof(local_ordinal_type) * mat->m()); - for(local_ordinal_type i = 0; i < nnz; i++) - { + for(local_ordinal_type i = 0; i < nnz; i++) { sparsity_pattern[iRow[i]]++; - if(iRow[i] != jCol[i]) - { + if(iRow[i] != jCol[i]) { sparsity_pattern[jCol[i]]++; } } @@ -198,16 +188,14 @@ local_ordinal_type* MatrixTestsSymSparseTriplet::numNonzerosPerCol(hiop::hiopMat auto sparsity_pattern = new local_ordinal_type[mat->n()]; std::memset(sparsity_pattern, 0, sizeof(local_ordinal_type) * mat->n()); - for(local_ordinal_type i = 0; i < nnz; i++) - { + for(local_ordinal_type i = 0; i < nnz; i++) { sparsity_pattern[jCol[i]]++; - if(iRow[i] != jCol[i]) - { + if(iRow[i] != jCol[i]) { sparsity_pattern[iRow[i]]++; } } return sparsity_pattern; } - -}} // namespace hiop::tests +} // namespace tests +} // namespace hiop diff --git a/tests/LinAlg/matrixTestsSymSparseTriplet.hpp b/tests/LinAlg/matrixTestsSymSparseTriplet.hpp index d523d5b69..0cf414293 100644 --- a/tests/LinAlg/matrixTestsSymSparseTriplet.hpp +++ b/tests/LinAlg/matrixTestsSymSparseTriplet.hpp @@ -53,7 +53,7 @@ * @author Slaven Peles , PNNL * @author Cameron Rutherford , PNNL * @author Jake K. Ryan , PNNL - * + * */ #pragma once @@ -62,37 +62,37 @@ #include #include "matrixTestsSymSparse.hpp" -namespace hiop { namespace tests { +namespace hiop +{ +namespace tests +{ /** * @brief Tests are re-implemented here if necessary for SparseTriplet Matrices, * as the data layout is significantly different compares to dense matrices. * Any tests that would modify the sparsity pattern are not implemented. * Any tests that would make calls to non-implemented/needed functions are not implemented. - * -*/ + * + */ class MatrixTestsSymSparseTriplet : public MatrixTestsSymSparse { public: MatrixTestsSymSparseTriplet() {} - virtual ~MatrixTestsSymSparseTriplet(){} - + virtual ~MatrixTestsSymSparseTriplet() {} private: - virtual real_type getLocalElement(const hiop::hiopMatrix *a, local_ordinal_type i, local_ordinal_type j) override; - virtual real_type getLocalElement(const hiop::hiopVector *x, local_ordinal_type i) override; + virtual real_type getLocalElement(const hiop::hiopMatrix* a, local_ordinal_type i, local_ordinal_type j) override; + virtual real_type getLocalElement(const hiop::hiopVector* x, local_ordinal_type i) override; virtual real_type* getMatrixData(hiop::hiopMatrixSparse* a) override; virtual const local_ordinal_type* getRowIndices(const hiop::hiopMatrixSparse* a) override; virtual const local_ordinal_type* getColumnIndices(const hiop::hiopMatrixSparse* a) override; - virtual local_ordinal_type getLocalSize(const hiop::hiopVector *x) override; - virtual int verifyAnswer( - hiop::hiopMatrixDense* A, - std::function expect) override; - virtual int verifyAnswer( - hiop::hiopVector *x, - std::function expect) override; + virtual local_ordinal_type getLocalSize(const hiop::hiopVector* x) override; + virtual int verifyAnswer(hiop::hiopMatrixDense* A, + std::function expect) override; + virtual int verifyAnswer(hiop::hiopVector* x, std::function expect) override; virtual local_ordinal_type* numNonzerosPerRow(hiop::hiopMatrixSparse* mat) override; virtual local_ordinal_type* numNonzerosPerCol(hiop::hiopMatrixSparse* mat) override; }; -}} // namespace hiop::tests +} // namespace tests +} // namespace hiop diff --git a/tests/LinAlg/testBase.hpp b/tests/LinAlg/testBase.hpp index f96c2704b..e9ad91667 100644 --- a/tests/LinAlg/testBase.hpp +++ b/tests/LinAlg/testBase.hpp @@ -61,11 +61,14 @@ #include #include -namespace hiop { namespace tests { +namespace hiop +{ +namespace tests +{ -using real_type = double; -using local_ordinal_type = int; -using global_ordinal_type = int; +using real_type = double; +using local_ordinal_type = int; +using global_ordinal_type = int; static const real_type zero = 0.0; static const real_type quarter = 0.25; @@ -73,65 +76,52 @@ static const real_type half = 0.5; static const real_type one = 1.0; static const real_type two = 2.0; static const real_type three = 3.0; -static const real_type eps = - 10*std::numeric_limits::epsilon(); +static const real_type eps = 10 * std::numeric_limits::epsilon(); static const int SKIP_TEST = -1; // must be const pointer and const dest for // const string declarations to pass // -Wwrite-strings -static const char * const RED = "\033[1;31m"; -static const char * const GREEN = "\033[1;32m"; -static const char * const YELLOW = "\033[1;33m"; -static const char * const CLEAR = "\033[0m"; +static const char* const RED = "\033[1;31m"; +static const char* const GREEN = "\033[1;32m"; +static const char* const YELLOW = "\033[1;33m"; +static const char* const CLEAR = "\033[0m"; class TestBase { public: TestBase() - : mem_space_("DEFAULT") - { - } - inline void set_mem_space(const std::string& mem_space) - { - mem_space_ = mem_space; - } - inline std::string get_mem_space() const - { - return mem_space_; - } + : mem_space_("DEFAULT") + {} + inline void set_mem_space(const std::string& mem_space) { mem_space_ = mem_space; } + inline std::string get_mem_space() const { return mem_space_; } + protected: /// Returns true if two real numbers are equal within tolerance - [[nodiscard]] static - bool isEqual(const real_type a, const real_type b) + [[nodiscard]] static bool isEqual(const real_type a, const real_type b) { - return (std::abs(a - b)/(1.0 + std::abs(b)) < eps); + return (std::abs(a - b) / (1.0 + std::abs(b)) < eps); } /// Prints error output for each rank - static void printMessage(const int fail, const char* funcname, const int rank=0) + static void printMessage(const int fail, const char* funcname, const int rank = 0) { - if(fail > 0) - { + if(fail > 0) { std::cout << RED << "--- FAIL: Test " << funcname << " on rank " << rank << CLEAR << "\n"; - } - else if (fail == SKIP_TEST) - { - if(rank == 0) - { + } else if(fail == SKIP_TEST) { + if(rank == 0) { std::cout << YELLOW << "--- SKIP: Test " << funcname << CLEAR << "\n"; } - } - else - { - if(rank == 0) - { + } else { + if(rank == 0) { std::cout << GREEN << "--- PASS: Test " << funcname << CLEAR << "\n"; } } } + protected: std::string mem_space_; }; -}} // namespace hiop::tests +} // namespace tests +} // namespace hiop diff --git a/tests/LinAlg/vectorTests.hpp b/tests/LinAlg/vectorTests.hpp index dfe0ed29f..1a0b10d01 100644 --- a/tests/LinAlg/vectorTests.hpp +++ b/tests/LinAlg/vectorTests.hpp @@ -68,7 +68,10 @@ #include #include "testBase.hpp" -namespace hiop { namespace tests { +namespace hiop +{ +namespace tests +{ /** * @brief Collection of tests for abstract hiopVector implementations. @@ -89,8 +92,8 @@ namespace hiop { namespace tests { class VectorTests : public TestBase { public: - VectorTests(){} - virtual ~VectorTests(){} + VectorTests() {} + virtual ~VectorTests() {} /* * this[i] = 0 @@ -121,8 +124,7 @@ class VectorTests : public TestBase int fail = 0; local_ordinal_type N = getLocalSize(&x); - for(local_ordinal_type i=0; i0){ + if(fail > 0) { std::cout << "num fails = " << fail << std::endl; } - + printMessage(fail, __func__, rank); return reduceReturn(fail, &x); } /** - * @brief Test method: + * @brief Test method: * forall n in n_local if (pattern[n] != 0.0) this[n] = x_val */ - bool vectorSetToConstant_w_patternSelect( - hiop::hiopVector& x, - hiop::hiopVector& pattern, - const int rank) + bool vectorSetToConstant_w_patternSelect(hiop::hiopVector& x, hiop::hiopVector& pattern, const int rank) { const local_ordinal_type N = getLocalSize(&x); assert(N == getLocalSize(&pattern)); @@ -175,18 +173,14 @@ class VectorTests : public TestBase // Ensure that a single element (globally) is // set to zero in the pattern - if (rank == 0) - setLocalElement(&pattern, N-1, zero); + if(rank == 0) setLocalElement(&pattern, N - 1, zero); x.setToConstant_w_patternSelect(x_val, pattern); // Check that the last element of rank zero's vector is // zero, and that x_val was added to all other elements - const int fail = verifyAnswer(&x, - [=] (local_ordinal_type i) -> real_type - { - return (rank == 0 && i == N-1) ? zero : x_val; - }); + const int fail = + verifyAnswer(&x, [=](local_ordinal_type i) -> real_type { return (rank == 0 && i == N - 1) ? zero : x_val; }); printMessage(fail, __func__, rank); return reduceReturn(fail, &x); @@ -215,39 +209,31 @@ class VectorTests : public TestBase return reduceReturn(fail, &v); } - /* + /* * Test for function that copies to v the entries of from specified by idxs. */ bool vector_copy_from_indexes(hiop::hiopVector& v, hiop::hiopVector& from, hiop::hiopVectorInt& idxs) { local_ordinal_type N = getLocalSize(&v); assert(v.get_size() == idxs.get_local_size()); - assert(N == idxs.get_local_size()); + assert(N == idxs.get_local_size()); assert(N <= getLocalSize(&from)); - //copy indexes 0, 1, 2, ..., N (copy first N entries of from) + // copy indexes 0, 1, 2, ..., N (copy first N entries of from) idxs.linspace(0, 1); - + v.setToConstant(three); - + from.setToConstant(one); - setLocalElement(&from, N-1, two); - + setLocalElement(&from, N - 1, two); + v.copy_from_indexes(from, idxs); - - int fail = verifyAnswer(&v, - [=] (local_ordinal_type i) -> real_type - { - return (i == N-1) ? two : one; - }); - - real_type* from_buffer = from.local_data(); + + int fail = verifyAnswer(&v, [=](local_ordinal_type i) -> real_type { return (i == N - 1) ? two : one; }); + + real_type* from_buffer = from.local_data(); v.copy_from_indexes(from_buffer, idxs); - fail += verifyAnswer(&v, - [=] (local_ordinal_type i) -> real_type - { - return (i == N-1) ? two : one; - }); + fail += verifyAnswer(&v, [=](local_ordinal_type i) -> real_type { return (i == N - 1) ? two : one; }); printMessage(fail, __func__); return reduceReturn(fail, &v); @@ -255,15 +241,12 @@ class VectorTests : public TestBase /** * @brief Test vector method for copying data from another vector - * or data buffer. - * + * or data buffer. + * * @pre Vectors are not distributed. * @pre Memory space for hiop::LinearAlgebraFactory is set appropriately - */ - bool vectorCopyFromStarting( - hiop::hiopVector& x, - hiop::hiopVector& from, - const int rank=0) + */ + bool vectorCopyFromStarting(hiop::hiopVector& x, hiop::hiopVector& from, const int rank = 0) { int fail = 0; const local_ordinal_type Nx = getLocalSize(&x); @@ -274,31 +257,19 @@ class VectorTests : public TestBase real_type* from_buffer = createLocalBuffer(Nx, one); - x.copyFromStarting(1, from_buffer, Nx-1); - fail += verifyAnswer(&x, - [=] (local_ordinal_type i) -> real_type - { - return (i == 0) ? two : one; - }); + x.copyFromStarting(1, from_buffer, Nx - 1); + fail += verifyAnswer(&x, [=](local_ordinal_type i) -> real_type { return (i == 0) ? two : one; }); deleteLocalBuffer(from_buffer); x.setToConstant(two); from.setToConstant(one); x.copyFromStarting(Nx - Nfrom, from); - fail += verifyAnswer(&x, - [=] (local_ordinal_type i) -> real_type - { - return (i < (Nx - Nfrom)) ? two : one; - }); + fail += verifyAnswer(&x, [=](local_ordinal_type i) -> real_type { return (i < (Nx - Nfrom)) ? two : one; }); x.setToConstant(two); from.setToConstant(one); x.copyFromStarting(1, from); - fail += verifyAnswer(&x, - [=] (local_ordinal_type i) -> real_type - { - return (i < 1 || i > (Nfrom)) ? two : one; - }); + fail += verifyAnswer(&x, [=](local_ordinal_type i) -> real_type { return (i < 1 || i > (Nfrom)) ? two : one; }); // Testing copying from a zero size vector hiop::hiopVector* zero = hiop::LinearAlgebraFactory::create_vector(mem_space_, 0); @@ -322,14 +293,11 @@ class VectorTests : public TestBase /** * @brief Tests function that copies from one vector to another, specifying * both the start index in the source and the destination. - * + * * @pre `src` and `dest` are allocated to nonzero sizes and * size of `src` > size of `dest`. */ - bool vectorStartingAtCopyFromStartingAt( - hiop::hiopVector& dest, - hiop::hiopVector& src, - const int rank=0) + bool vectorStartingAtCopyFromStartingAt(hiop::hiopVector& dest, hiop::hiopVector& src, const int rank = 0) { const local_ordinal_type Ndest = getLocalSize(&dest); const local_ordinal_type Nsrc = getLocalSize(&src); @@ -337,25 +305,21 @@ class VectorTests : public TestBase assert(Ndest < Nsrc && "This test assumes source is bigger than destination vector"); const real_type dest_val = one; - const real_type src_val = two; + const real_type src_val = two; dest.setToConstant(dest_val); src.setToConstant(src_val); // Copy one element from `src` to `dest` local_ordinal_type start_dest = Ndest - 1; - local_ordinal_type start_src = Nsrc/2; + local_ordinal_type start_src = Nsrc / 2; dest.startingAtCopyFromStartingAt(start_dest, src, start_src); - int fail = verifyAnswer(&dest, - [=] (local_ordinal_type i) -> real_type - { - return i == start_dest ? src_val : dest_val; - }); + int fail = verifyAnswer(&dest, [=](local_ordinal_type i) -> real_type { return i == start_dest ? src_val : dest_val; }); // Restore destination values dest.setToConstant(dest_val); // Overwrite all `dest` elements with last Ndest elements of `src` start_dest = 0; - start_src = Nsrc - Ndest; + start_src = Nsrc - Ndest; dest.startingAtCopyFromStartingAt(start_dest, src, start_src); fail += verifyAnswer(&dest, src_val); @@ -365,7 +329,7 @@ class VectorTests : public TestBase /** * Test for function that copies data from `this` to a data buffer. - * + * * @note This test calls `local_data` vector method. Here this is OK, * because for as long copies between vectors and bufers are implemented * as public methods, `local_data` will be a public method, as well. @@ -389,22 +353,17 @@ class VectorTests : public TestBase /** * @brief Test vector method for copying data to another vector - * starting from prescribed index in destination vector. - * + * starting from prescribed index in destination vector. + * * @pre Vectors are not distributed. * @pre Memory space for hiop::LinearAlgebraFactory is set appropriately - */ - bool vectorCopyToStarting( - hiop::hiopVector& to, - hiop::hiopVector& from, - const int rank=0) + */ + bool vectorCopyToStarting(hiop::hiopVector& to, hiop::hiopVector& from, const int rank = 0) { const local_ordinal_type dest_size = getLocalSize(&to); const local_ordinal_type src_size = getLocalSize(&from); - assert(dest_size == to.get_size() - && "This test cannot be ran with distributed vectors"); - assert(dest_size > src_size - && "Must pass in a destination vector larger than source vector"); + assert(dest_size == to.get_size() && "This test cannot be ran with distributed vectors"); + assert(dest_size > src_size && "Must pass in a destination vector larger than source vector"); const int start_idx = dest_size - src_size; const real_type from_val = one; @@ -421,11 +380,7 @@ class VectorTests : public TestBase * greater than or equal to the start idx are set * to the source value */ - int fail = verifyAnswer(&to, - [=] (local_ordinal_type i) -> real_type - { - return i < start_idx ? to_val : from_val; - }); + int fail = verifyAnswer(&to, [=](local_ordinal_type i) -> real_type { return i < start_idx ? to_val : from_val; }); // Testing copying from a zero size vector hiop::hiopVector* zero = hiop::LinearAlgebraFactory::create_vector(mem_space_, 0); @@ -441,23 +396,20 @@ class VectorTests : public TestBase /** * @brief Test vector method for copying data to another vector - * starting from prescribed index in destination vector. - * + * starting from prescribed index in destination vector. + * * @pre Vectors are not distributed. * @pre Memory space for hiop::LinearAlgebraFactory is set appropriately - */ - bool vectorCopyToStartingAt_w_pattern( - hiop::hiopVector& from, - hiop::hiopVector& to, - hiop::hiopVector& pattern, - const int rank=0) + */ + bool vectorCopyToStartingAt_w_pattern(hiop::hiopVector& from, + hiop::hiopVector& to, + hiop::hiopVector& pattern, + const int rank = 0) { const local_ordinal_type to_size = getLocalSize(&to); const local_ordinal_type from_size = getLocalSize(&from); - assert(to_size == to.get_size() && to_size > from_size + 2 - && "This test cannot be ran with distributed vectors"); - assert(getLocalSize(&pattern) == from_size && from_size > 1 - && "pattern_size must be equal to the source size"); + assert(to_size == to.get_size() && to_size > from_size + 2 && "This test cannot be ran with distributed vectors"); + assert(getLocalSize(&pattern) == from_size && from_size > 1 && "pattern_size must be equal to the source size"); const int start_idx = to_size - from_size; const real_type from_val = one; const real_type from_val_st_ed = three; @@ -466,39 +418,36 @@ class VectorTests : public TestBase from.setToConstant(from_val); to.setToConstant(to_val); pattern.setToConstant(zero); - if (rank == 0) { + if(rank == 0) { setLocalElement(&from, 0, from_val_st_ed); - setLocalElement(&from, from_size-1, from_val_st_ed); + setLocalElement(&from, from_size - 1, from_val_st_ed); setLocalElement(&pattern, 0, one); - setLocalElement(&pattern, from_size-1, one); + setLocalElement(&pattern, from_size - 1, one); } from.copyToStartingAt_w_pattern(to, start_idx, pattern); // Check that the start and end values of `from' vector are copied to the `to' vector - const int fail = verifyAnswer(&to, - [=] (local_ordinal_type i) -> real_type - { - return (rank == 0 && (i == start_idx || i == start_idx + 1) ? from_val_st_ed : to_val); - }); + const int fail = verifyAnswer(&to, [=](local_ordinal_type i) -> real_type { + return (rank == 0 && (i == start_idx || i == start_idx + 1) ? from_val_st_ed : to_val); + }); printMessage(fail, __func__, rank); - return reduceReturn(fail, &from); + return reduceReturn(fail, &from); } - /** * @brief Test vector method for copying data from another two vectors - * + * * @pre Vectors are not distributed. * @pre Memory space for hiop::LinearAlgebraFactory is set appropriately - */ - bool vector_copy_from_two_vec( hiop::hiopVector& cd, - hiop::hiopVector& c, - hiop::hiopVectorInt& c_map, - hiop::hiopVector& d, - hiop::hiopVectorInt& d_map, - const int rank=0) + */ + bool vector_copy_from_two_vec(hiop::hiopVector& cd, + hiop::hiopVector& c, + hiop::hiopVectorInt& c_map, + hiop::hiopVector& d, + hiop::hiopVectorInt& d_map, + const int rank = 0) { const local_ordinal_type cd_size = getLocalSize(&cd); const local_ordinal_type c_size = getLocalSize(&c); @@ -521,11 +470,7 @@ class VectorTests : public TestBase cd.copy_from_two_vec_w_pattern(c, c_map, d, d_map); - int fail = verifyAnswer(&cd, - [=] (local_ordinal_type i) -> real_type - { - return i < c_size ? c_val : d_val; - }); + int fail = verifyAnswer(&cd, [=](local_ordinal_type i) -> real_type { return i < c_size ? c_val : d_val; }); printMessage(fail, __func__, rank); return reduceReturn(fail, &cd); @@ -533,16 +478,16 @@ class VectorTests : public TestBase /** * @brief Test vector method for copying data to another two vectors - * + * * @pre Vectors are not distributed. * @pre Memory space for hiop::LinearAlgebraFactory is set appropriately - */ - bool vector_copy_to_two_vec( hiop::hiopVector& cd, - hiop::hiopVector& c, - hiop::hiopVectorInt& c_map, - hiop::hiopVector& d, - hiop::hiopVectorInt& d_map, - const int rank=0) + */ + bool vector_copy_to_two_vec(hiop::hiopVector& cd, + hiop::hiopVector& c, + hiop::hiopVectorInt& c_map, + hiop::hiopVector& d, + hiop::hiopVectorInt& d_map, + const int rank = 0) { const local_ordinal_type cd_size = getLocalSize(&cd); const local_ordinal_type c_size = getLocalSize(&c); @@ -572,22 +517,17 @@ class VectorTests : public TestBase /** * @brief Test vector method for copying data to another vector * starting from prescribed indices in source and destination - * vectors. - * + * vectors. + * * @pre Vectors are not distributed. * @pre Memory space for hiop::LinearAlgebraFactory is set appropriately - */ - bool vectorStartingAtCopyToStartingAt( - hiop::hiopVector& to, - hiop::hiopVector& from, - const int rank=0) + */ + bool vectorStartingAtCopyToStartingAt(hiop::hiopVector& to, hiop::hiopVector& from, const int rank = 0) { const local_ordinal_type dest_size = getLocalSize(&to); const local_ordinal_type src_size = getLocalSize(&from); - assert(dest_size == to.get_size() - && "This test cannot be ran with distributed vectors"); - assert(dest_size > src_size - && "Must pass in a destination vector larger than source vector"); + assert(dest_size == to.get_size() && "This test cannot be ran with distributed vectors"); + assert(dest_size > src_size && "Must pass in a destination vector larger than source vector"); const real_type from_val = one; const real_type to_val = two; @@ -597,7 +537,7 @@ class VectorTests : public TestBase int num_elements_to_copy = -1; // Iteratively checking various edge cases for calls to the function - + hiop::hiopVector* zero = hiop::LinearAlgebraFactory::create_vector(mem_space_, 0); // Copying from a size 0 vector @@ -628,22 +568,16 @@ class VectorTests : public TestBase to.setToConstant(to_val); from.startingAtCopyToStartingAt(start_idx_src, to, start_idx_dst, num_elements_to_copy); - fail += verifyAnswer(&to, - [=] (local_ordinal_type i) -> real_type - { - int tmp; - if(num_elements_to_copy == -1) - { - tmp = src_size; - } - else - { - tmp = num_elements_to_copy; - } - const bool isValueCopied = (i >= start_idx_dst && - i < start_idx_dst + tmp); - return isValueCopied ? from_val : to_val; - }); + fail += verifyAnswer(&to, [=](local_ordinal_type i) -> real_type { + int tmp; + if(num_elements_to_copy == -1) { + tmp = src_size; + } else { + tmp = num_elements_to_copy; + } + const bool isValueCopied = (i >= start_idx_dst && i < start_idx_dst + tmp); + return isValueCopied ? from_val : to_val; + }); // Copying from start of from to end of to start_idx_dst = dest_size - src_size; @@ -651,22 +585,16 @@ class VectorTests : public TestBase to.setToConstant(to_val); from.startingAtCopyToStartingAt(start_idx_src, to, start_idx_dst, num_elements_to_copy); - fail += verifyAnswer(&to, - [=] (local_ordinal_type i) -> real_type - { - int tmp; - if(num_elements_to_copy == -1) - { - tmp = src_size; - } - else - { - tmp = num_elements_to_copy; - } - const bool isValueCopied = (i >= start_idx_dst && - i < start_idx_dst + tmp); - return isValueCopied ? from_val : to_val; - }); + fail += verifyAnswer(&to, [=](local_ordinal_type i) -> real_type { + int tmp; + if(num_elements_to_copy == -1) { + tmp = src_size; + } else { + tmp = num_elements_to_copy; + } + const bool isValueCopied = (i >= start_idx_dst && i < start_idx_dst + tmp); + return isValueCopied ? from_val : to_val; + }); // Not copying all elemtents num_elements_to_copy = num_elements_to_copy / 2; @@ -674,22 +602,16 @@ class VectorTests : public TestBase to.setToConstant(to_val); from.startingAtCopyToStartingAt(start_idx_src, to, start_idx_dst, num_elements_to_copy); - fail += verifyAnswer(&to, - [=] (local_ordinal_type i) -> real_type - { - int tmp; - if(num_elements_to_copy == -1) - { - tmp = src_size; - } - else - { - tmp = num_elements_to_copy; - } - const bool isValueCopied = (i >= start_idx_dst && - i < start_idx_dst + tmp); - return isValueCopied ? from_val : to_val; - }); + fail += verifyAnswer(&to, [=](local_ordinal_type i) -> real_type { + int tmp; + if(num_elements_to_copy == -1) { + tmp = src_size; + } else { + tmp = num_elements_to_copy; + } + const bool isValueCopied = (i >= start_idx_dst && i < start_idx_dst + tmp); + return isValueCopied ? from_val : to_val; + }); // Passing -1 as the number of elements num_elements_to_copy = -1; @@ -697,22 +619,16 @@ class VectorTests : public TestBase to.setToConstant(to_val); from.startingAtCopyToStartingAt(start_idx_src, to, start_idx_dst, num_elements_to_copy); - fail += verifyAnswer(&to, - [=] (local_ordinal_type i) -> real_type - { - int tmp; - if(num_elements_to_copy == -1) - { - tmp = src_size; - } - else - { - tmp = num_elements_to_copy; - } - const bool isValueCopied = (i >= start_idx_dst && - i < start_idx_dst + tmp); - return isValueCopied ? from_val : to_val; - }); + fail += verifyAnswer(&to, [=](local_ordinal_type i) -> real_type { + int tmp; + if(num_elements_to_copy == -1) { + tmp = src_size; + } else { + tmp = num_elements_to_copy; + } + const bool isValueCopied = (i >= start_idx_dst && i < start_idx_dst + tmp); + return isValueCopied ? from_val : to_val; + }); // Passing starting indices equal to the sizes start_idx_dst = src_size; @@ -721,22 +637,16 @@ class VectorTests : public TestBase to.setToConstant(to_val); from.startingAtCopyToStartingAt(start_idx_src, to, start_idx_dst, num_elements_to_copy); - fail += verifyAnswer(&to, - [=] (local_ordinal_type i) -> real_type - { - int tmp; - if(num_elements_to_copy == -1) - { - tmp = src_size; - } - else - { - tmp = num_elements_to_copy; - } - const bool isValueCopied = (i >= start_idx_dst && - i < start_idx_dst + tmp); - return isValueCopied ? from_val : to_val; - }); + fail += verifyAnswer(&to, [=](local_ordinal_type i) -> real_type { + int tmp; + if(num_elements_to_copy == -1) { + tmp = src_size; + } else { + tmp = num_elements_to_copy; + } + const bool isValueCopied = (i >= start_idx_dst && i < start_idx_dst + tmp); + return isValueCopied ? from_val : to_val; + }); delete zero; printMessage(fail, __func__, rank); @@ -747,10 +657,7 @@ class VectorTests : public TestBase * @brief Test: * this[i] = (pattern[i] == 0 ? 0 : this[i]) */ - bool vectorSelectPattern( - hiop::hiopVector& x, - hiop::hiopVector& pattern, - const int rank) + bool vectorSelectPattern(hiop::hiopVector& x, hiop::hiopVector& pattern, const int rank) { const local_ordinal_type N = getLocalSize(&x); assert(x.get_size() == pattern.get_size()); @@ -759,17 +666,14 @@ class VectorTests : public TestBase x.setToConstant(x_val); pattern.setToConstant(one); - if (rank== 0) - setLocalElement(&pattern, N - 1, zero); + if(rank == 0) setLocalElement(&pattern, N - 1, zero); x.selectPattern(pattern); - const int fail = verifyAnswer(&x, - [=] (local_ordinal_type i) -> real_type - { - const bool isLastElementOnRank0 = (i == N-1 && rank == 0); - return isLastElementOnRank0 ? zero : x_val; - }); + const int fail = verifyAnswer(&x, [=](local_ordinal_type i) -> real_type { + const bool isLastElementOnRank0 = (i == N - 1 && rank == 0); + return isLastElementOnRank0 ? zero : x_val; + }); printMessage(fail, __func__, rank); return reduceReturn(fail, &x); @@ -830,11 +734,10 @@ class VectorTests : public TestBase /** * @brief Test: this[i] = (pattern[i] == 0 ? 0 : this[i]/x[i]) */ - bool vectorComponentDiv_p_selectPattern( - hiop::hiopVector& v, - hiop::hiopVector& x, - hiop::hiopVector& pattern, - const int rank) + bool vectorComponentDiv_p_selectPattern(hiop::hiopVector& v, + hiop::hiopVector& x, + hiop::hiopVector& pattern, + const int rank) { const local_ordinal_type N = getLocalSize(&v); assert(v.get_size() == x.get_size()); @@ -847,20 +750,17 @@ class VectorTests : public TestBase x.setToConstant(x_val); v.setToConstant(v_val); pattern.setToConstant(one); - if (rank== 0) - { + if(rank == 0) { setLocalElement(&pattern, N - 1, zero); - setLocalElement(&x , N - 1, zero); + setLocalElement(&x, N - 1, zero); } v.componentDiv_w_selectPattern(x, pattern); - const int fail = verifyAnswer(&v, - [=] (local_ordinal_type i) -> real_type - { - const bool isLastElementOnRank0 = (i == N-1 && rank == 0); - return isLastElementOnRank0 ? zero : v_val / x_val; - }); + const int fail = verifyAnswer(&v, [=](local_ordinal_type i) -> real_type { + const bool isLastElementOnRank0 = (i == N - 1 && rank == 0); + return isLastElementOnRank0 ? zero : v_val / x_val; + }); printMessage(fail, __func__, rank); return reduceReturn(fail, &v); @@ -880,7 +780,7 @@ class VectorTests : public TestBase return reduceReturn(fail, &v); } - + /** * @brief Test: this[i] = min(this[i], x[i]) */ @@ -914,7 +814,7 @@ class VectorTests : public TestBase return reduceReturn(fail, &v); } - + /** * @brief Test: this[i] = max(this[i], x[i]) */ @@ -946,17 +846,15 @@ class VectorTests : public TestBase const real_type expected = half; if(rank == 0) { - setLocalElement(&x, N-1, expected); + setLocalElement(&x, N - 1, expected); } x.component_abs(); - const int fail = verifyAnswer(&x, - [=] (local_ordinal_type i) -> real_type - { - const bool isLastElementOnRank0 = (i == N-1 && rank == 0); - return isLastElementOnRank0 ? fabs(expected) : fabs(x_val); - }); + const int fail = verifyAnswer(&x, [=](local_ordinal_type i) -> real_type { + const bool isLastElementOnRank0 = (i == N - 1 && rank == 0); + return isLastElementOnRank0 ? fabs(expected) : fabs(x_val); + }); printMessage(fail, __func__, rank); return reduceReturn(fail, &x); @@ -972,24 +870,22 @@ class VectorTests : public TestBase x.setToConstant(x_val); - const real_type expected = two*two; + const real_type expected = two * two; if(rank == 0) { - setLocalElement(&x, N-1, expected); + setLocalElement(&x, N - 1, expected); } x.component_sqrt(); - const int fail = verifyAnswer(&x, - [=] (local_ordinal_type i) -> real_type - { - const bool isLastElementOnRank0 = (i == N-1 && rank == 0); - return isLastElementOnRank0 ? fabs(two) : fabs(half); - }); + const int fail = verifyAnswer(&x, [=](local_ordinal_type i) -> real_type { + const bool isLastElementOnRank0 = (i == N - 1 && rank == 0); + return isLastElementOnRank0 ? fabs(two) : fabs(half); + }); printMessage(fail, __func__, rank); return reduceReturn(fail, &x); } - + /** * @brief Test: this[i] = sgn(this[i]) */ @@ -1001,17 +897,15 @@ class VectorTests : public TestBase x.setToConstant(x_val); if(rank == 0) { - setLocalElement(&x, N-1, half); + setLocalElement(&x, N - 1, half); } x.component_sgn(); - const int fail = verifyAnswer(&x, - [=] (local_ordinal_type i) -> real_type - { - const bool isLastElementOnRank0 = (i == N-1 && rank == 0); - return isLastElementOnRank0 ? one : -one; - }); + const int fail = verifyAnswer(&x, [=](local_ordinal_type i) -> real_type { + const bool isLastElementOnRank0 = (i == N - 1 && rank == 0); + return isLastElementOnRank0 ? one : -one; + }); printMessage(fail, __func__, rank); return reduceReturn(fail, &x); @@ -1060,8 +954,7 @@ class VectorTests : public TestBase const real_type expected = two; v.setToConstant(one); - if (rank== 0) - setLocalElement(&v, N-1, -two); + if(rank == 0) setLocalElement(&v, N - 1, -two); real_type actual = v.infnorm(); int fail = (expected != actual); @@ -1070,7 +963,7 @@ class VectorTests : public TestBase return reduceReturn(fail, &v); } - /** + /** * @brief Test: * this[i] += alpha * x[i] */ @@ -1096,15 +989,11 @@ class VectorTests : public TestBase return reduceReturn(fail, &v); } - /** + /** * @brief Test: * this[i] += alpha * x[i] * z[i] */ - bool vectorAxzpy( - hiop::hiopVector& v, - hiop::hiopVector& x, - hiop::hiopVector& z, - const int rank) + bool vectorAxzpy(hiop::hiopVector& v, hiop::hiopVector& x, hiop::hiopVector& z, const int rank) { const local_ordinal_type N = getLocalSize(&v); assert(v.get_size() == x.get_size()); @@ -1128,15 +1017,11 @@ class VectorTests : public TestBase return reduceReturn(fail, &v); } - /** + /** * @brief Test: * this[i] += alpha * x[i] / z[i] */ - bool vectorAxdzpy( - hiop::hiopVector& v, - hiop::hiopVector& x, - hiop::hiopVector& z, - const int rank) + bool vectorAxdzpy(hiop::hiopVector& v, hiop::hiopVector& x, hiop::hiopVector& z, const int rank) { const local_ordinal_type N = getLocalSize(&v); assert(v.get_size() == x.get_size()); @@ -1160,16 +1045,15 @@ class VectorTests : public TestBase return reduceReturn(fail, &v); } - /** + /** * @brief Test: * this[i] += alpha * x[i] / z[i] */ - bool vectorAxdzpy_w_patternSelect( - hiop::hiopVector& v, - hiop::hiopVector& x, - hiop::hiopVector& z, - hiop::hiopVector& pattern, - const int rank) + bool vectorAxdzpy_w_patternSelect(hiop::hiopVector& v, + hiop::hiopVector& x, + hiop::hiopVector& z, + hiop::hiopVector& pattern, + const int rank) { const local_ordinal_type N = getLocalSize(&v); assert(v.get_size() == x.get_size()); @@ -1184,27 +1068,24 @@ class VectorTests : public TestBase z.setToConstant(z_val); v.setToConstant(v_val); pattern.setToConstant(one); - if (rank== 0) - { + if(rank == 0) { setLocalElement(&pattern, N - 1, zero); - setLocalElement(&z, N - 1, zero); + setLocalElement(&z, N - 1, zero); } const real_type expected = v_val + (alpha * x_val / z_val); v.axdzpy_w_pattern(alpha, x, z, pattern); - const int fail = verifyAnswer(&v, - [=] (local_ordinal_type i) -> real_type - { - const bool isLastElementOnRank0 = (i == N-1 && rank == 0); - return isLastElementOnRank0 ? v_val : expected; - }); + const int fail = verifyAnswer(&v, [=](local_ordinal_type i) -> real_type { + const bool isLastElementOnRank0 = (i == N - 1 && rank == 0); + return isLastElementOnRank0 ? v_val : expected; + }); printMessage(fail, __func__, rank); return reduceReturn(fail, &v); } - /** + /** * @brief Test: * this[i] += C forall i */ @@ -1221,14 +1102,11 @@ class VectorTests : public TestBase return reduceReturn(fail, &x); } - /** + /** * @brief Test: * if (pattern[i] > 0.0) this[i] += C forall i */ - bool vectorAddConstant_w_patternSelect( - hiop::hiopVector& x, - hiop::hiopVector& pattern, - const int rank) + bool vectorAddConstant_w_patternSelect(hiop::hiopVector& x, hiop::hiopVector& pattern, const int rank) { const local_ordinal_type N = getLocalSize(&x); assert(pattern.get_size() == x.get_size()); @@ -1236,30 +1114,23 @@ class VectorTests : public TestBase const real_type x_val = half; pattern.setToConstant(one); - if (rank== 0) - setLocalElement(&pattern, N - 1, zero); + if(rank == 0) setLocalElement(&pattern, N - 1, zero); x.setToConstant(zero); x.addConstant_w_patternSelect(x_val, pattern); - const int fail = verifyAnswer(&x, - [=] (local_ordinal_type i) -> real_type - { - return (rank == 0 && i == N-1) ? zero : x_val; - }); + const int fail = + verifyAnswer(&x, [=](local_ordinal_type i) -> real_type { return (rank == 0 && i == N - 1) ? zero : x_val; }); printMessage(fail, __func__, rank); return reduceReturn(fail, &x); } - /** + /** * @brief Test: * Dot product == \sum{this[i] * other[i]} */ - bool vectorDotProductWith( - hiop::hiopVector& x, - hiop::hiopVector& y, - const int rank) + bool vectorDotProductWith(hiop::hiopVector& x, hiop::hiopVector& y, const int rank) { // Must use global size, as every rank will get global const global_ordinal_type N = x.get_size(); @@ -1276,7 +1147,7 @@ class VectorTests : public TestBase return reduceReturn(fail, &x); } - /** + /** * @brief Test: * this[i] *= -1 forall i */ @@ -1289,7 +1160,7 @@ class VectorTests : public TestBase return reduceReturn(fail, &x); } - /** + /** * @brief Test: * this[i]^-1 forall i */ @@ -1302,14 +1173,11 @@ class VectorTests : public TestBase return reduceReturn(fail, &x); } - /** + /** * @brief Test: * sum{ln(x[i]): pattern[i] = 1} */ - bool vectorLogBarrier( - hiop::hiopVector& x, - hiop::hiopVector& pattern, - const int rank) + bool vectorLogBarrier(hiop::hiopVector& x, hiop::hiopVector& pattern, const int rank) { const local_ordinal_type N = getLocalSize(&x); assert(N == getLocalSize(&pattern)); @@ -1322,9 +1190,9 @@ class VectorTests : public TestBase const real_type x_val = one; x.setToConstant(x_val); // Make sure pattern eliminates the correct element - setLocalElement(&x, N - 1, 1000*three); + setLocalElement(&x, N - 1, 1000 * three); - real_type expected = (N-1) * std::log(x_val); + real_type expected = (N - 1) * std::log(x_val); real_type result = x.logBarrier_local(pattern); int fail = !isEqual(result, expected); @@ -1354,9 +1222,9 @@ class VectorTests : public TestBase // Ensure that only N-1 elements of x are // used in the log calculation x.setToConstant(half); - setLocalElement(&x, N-1, two); + setLocalElement(&x, N - 1, two); - real_type expected = (N-1) * half + two; + real_type expected = (N - 1) * half + two; real_type result = x.sum_local(); int fail = !isEqual(result, expected); @@ -1364,16 +1232,12 @@ class VectorTests : public TestBase printMessage(fail, __func__, rank); return reduceReturn(fail, &x); } - + /** * @brief Test: - * if(pattern[i] == 1) this[i] += alpha /x[i] forall i + * if(pattern[i] == 1) this[i] += alpha /x[i] forall i */ - bool vectorAddLogBarrierGrad( - hiop::hiopVector& x, - hiop::hiopVector& y, - hiop::hiopVector& pattern, - const int rank) + bool vectorAddLogBarrierGrad(hiop::hiopVector& x, hiop::hiopVector& y, hiop::hiopVector& pattern, const int rank) { const local_ordinal_type N = getLocalSize(&x); assert(N == getLocalSize(&pattern)); @@ -1386,18 +1250,15 @@ class VectorTests : public TestBase x.setToConstant(x_val); y.setToConstant(y_val); - if (rank == 0) - setLocalElement(&pattern, N-1, zero); + if(rank == 0) setLocalElement(&pattern, N - 1, zero); x.addLogBarrierGrad(alpha, y, pattern); const real_type logBarrierGradVal = x_val + (alpha / y_val); - const int fail = verifyAnswer(&x, - [=] (local_ordinal_type i) -> real_type - { - const bool isLastElementOnRank0 = (i == N-1 && rank == 0); - return isLastElementOnRank0 ? x_val : logBarrierGradVal; - }); + const int fail = verifyAnswer(&x, [=](local_ordinal_type i) -> real_type { + const bool isLastElementOnRank0 = (i == N - 1 && rank == 0); + return isLastElementOnRank0 ? x_val : logBarrierGradVal; + }); printMessage(fail, __func__, rank); return reduceReturn(fail, &x); @@ -1414,11 +1275,7 @@ class VectorTests : public TestBase * return term * @endverbatim */ - bool vectorLinearDampingTerm( - hiop::hiopVector& x, - hiop::hiopVector& left, - hiop::hiopVector& right, - const int rank) + bool vectorLinearDampingTerm(hiop::hiopVector& x, hiop::hiopVector& left, hiop::hiopVector& right, const int rank) { const local_ordinal_type N = getLocalSize(&x); assert(N == getLocalSize(&left)); @@ -1430,18 +1287,16 @@ class VectorTests : public TestBase left.setToConstant(one); right.setToConstant(zero); - if (rank == 0) - { - setLocalElement(&left, N-1, two); - setLocalElement(&right, N-1, two); + if(rank == 0) { + setLocalElement(&left, N - 1, two); + setLocalElement(&right, N - 1, two); } real_type expected = zero; - for (local_ordinal_type i=0; i=1) - { + if(N >= 1) { setLocalElement(&left, 0, zero); setLocalElement(&right, 0, one); } - //idx 1: left=0, right=0 - if(N>=2) - { + // idx 1: left=0, right=0 + if(N >= 2) { setLocalElement(&left, 1, zero); setLocalElement(&right, 1, zero); - } - //idx 2: left=1 right=1 - if(N>=3) - { + // idx 2: left=1 right=1 + if(N >= 3) { setLocalElement(&left, 2, one); setLocalElement(&right, 2, one); } - //idx 3: left=1 right=0 - if(N>=4) - { + // idx 3: left=1 right=0 + if(N >= 4) { setLocalElement(&left, 3, one); setLocalElement(&right, 3, zero); } real_type expected[4]; - // expected for idx 0 + // expected for idx 0 expected[0] = getLocalElement(&x, 0) * alpha - ct; - // expected for idx 1 - if(N>=2) - { + // expected for idx 1 + if(N >= 2) { expected[1] = getLocalElement(&x, 1) * alpha; } // expected for idx 2 - if(N>=3) - { + if(N >= 3) { expected[2] = getLocalElement(&x, 2) * alpha; } // expected for idx 3 - if(N>=4) - { + if(N >= 4) { expected[3] = getLocalElement(&x, 3) * alpha + ct; } @@ -1532,19 +1375,17 @@ class VectorTests : public TestBase x.addLinearDampingTerm(left, right, alpha, ct); // - // compare with actual values + // compare with actual values // bool fail = false; - for(local_ordinal_type test = 0; test < std::min(N,4) && !fail; ++test) - { + for(local_ordinal_type test = 0; test < std::min(N, 4) && !fail; ++test) { fail = !isEqual(expected[test], getLocalElement(&x, test)); } - + printMessage(fail, __func__, rank); return reduceReturn(fail, &x); } - /** * @brief Test: * this[i] > 0 @@ -1554,14 +1395,11 @@ class VectorTests : public TestBase const local_ordinal_type N = getLocalSize(&x); int fail = 0; x.setToConstant(one); - if (!x.allPositive()) - fail++; + if(!x.allPositive()) fail++; x.setToConstant(one); - if (rank == 0) - setLocalElement(&x, N-1, -one); - if (x.allPositive()) - fail++; + if(rank == 0) setLocalElement(&x, N - 1, -one); + if(x.allPositive()) fail++; printMessage(fail, __func__, rank); return fail; @@ -1571,10 +1409,7 @@ class VectorTests : public TestBase * @brief Test: * this[i] > 0 \lor pattern[i] != 1.0 */ - bool vectorAllPositive_w_patternSelect( - hiop::hiopVector& x, - hiop::hiopVector& pattern, - const int rank) + bool vectorAllPositive_w_patternSelect(hiop::hiopVector& x, hiop::hiopVector& pattern, const int rank) { const local_ordinal_type N = getLocalSize(&x); assert(N == getLocalSize(&pattern)); @@ -1583,18 +1418,14 @@ class VectorTests : public TestBase x.setToConstant(one); pattern.setToConstant(one); - if (!x.allPositive_w_patternSelect(pattern)) - fail++; + if(!x.allPositive_w_patternSelect(pattern)) fail++; x.setToConstant(-one); - if (x.allPositive_w_patternSelect(pattern)) - fail++; + if(x.allPositive_w_patternSelect(pattern)) fail++; x.setToConstant(one); - if (rank == 0) - setLocalElement(&x, N-1, -one); - if (x.allPositive_w_patternSelect(pattern)) - fail++; + if(rank == 0) setLocalElement(&x, N - 1, -one); + if(x.allPositive_w_patternSelect(pattern)) fail++; printMessage(fail, __func__, rank); return reduceReturn(fail, &x); @@ -1609,16 +1440,14 @@ class VectorTests : public TestBase int fail = 0; x.setToConstant(two); - if (rank == 0) - setLocalElement(&x, N-1, -one); + if(rank == 0) setLocalElement(&x, N - 1, -one); - fail += (x.min()!=-one); + fail += (x.min() != -one); x.setToConstant(one); - if (rank == 0) - setLocalElement(&x, N-1, two); + if(rank == 0) setLocalElement(&x, N - 1, two); - fail += (x.min()!=one); + fail += (x.min() != one); printMessage(fail, __func__, rank); return 0; @@ -1627,9 +1456,7 @@ class VectorTests : public TestBase /** * @brief Test: min value in a vector */ - bool vectorMin_w_pattern(hiop::hiopVector& x, - hiop::hiopVector& pattern, - const int rank) + bool vectorMin_w_pattern(hiop::hiopVector& x, hiop::hiopVector& pattern, const int rank) { const local_ordinal_type N = getLocalSize(&x); assert(N == getLocalSize(&pattern)); @@ -1637,14 +1464,12 @@ class VectorTests : public TestBase int fail = 0; x.setToConstant(one); - if (rank == 0) - setLocalElement(&x, N-1, -one); + if(rank == 0) setLocalElement(&x, N - 1, -one); pattern.setToConstant(one); - fail += (x.min_w_pattern(pattern)!=-one); + fail += (x.min_w_pattern(pattern) != -one); - if (rank == 0) - setLocalElement(&pattern, N-1, zero); - fail += (x.min_w_pattern(pattern)!=one); + if(rank == 0) setLocalElement(&pattern, N - 1, zero); + fail += (x.min_w_pattern(pattern) != one); printMessage(fail, __func__, rank); return 0; @@ -1653,13 +1478,12 @@ class VectorTests : public TestBase /** * @brief Test: Project vector into bounds */ - bool vectorProjectIntoBounds( - hiop::hiopVector& x, - hiop::hiopVector& lower, - hiop::hiopVector& upper, - hiop::hiopVector& lower_pattern, - hiop::hiopVector& upper_pattern, - const int rank) + bool vectorProjectIntoBounds(hiop::hiopVector& x, + hiop::hiopVector& lower, + hiop::hiopVector& upper, + hiop::hiopVector& lower_pattern, + hiop::hiopVector& upper_pattern, + const int rank) { // setup constants and make assertions const local_ordinal_type N = getLocalSize(&x); @@ -1677,11 +1501,7 @@ class VectorTests : public TestBase upper.setToConstant(-one); lower_pattern.setToConstant(one); upper_pattern.setToConstant(one); - if (x.projectIntoBounds_local( - lower, lower_pattern, - upper, upper_pattern, - kappa1, kappa2)) - fail++; + if(x.projectIntoBounds_local(lower, lower_pattern, upper, upper_pattern, kappa1, kappa2)) fail++; // check that patterns are correctly applied and // x[0] is left at 1 @@ -1694,9 +1514,7 @@ class VectorTests : public TestBase setLocalElement(&upper_pattern, 0, zero); // Call should return true - fail += !x.projectIntoBounds_local( - lower, lower_pattern, upper, - upper_pattern, kappa1, kappa2); + fail += !x.projectIntoBounds_local(lower, lower_pattern, upper, upper_pattern, kappa1, kappa2); // First element should be one fail += !isEqual(getLocalElement(&x, 0), one); @@ -1709,9 +1527,7 @@ class VectorTests : public TestBase upper.setToConstant(one); lower_pattern.setToConstant(one); upper_pattern.setToConstant(one); - x.projectIntoBounds_local( - lower, lower_pattern, upper, - upper_pattern, kappa1, kappa2); + x.projectIntoBounds_local(lower, lower_pattern, upper, upper_pattern, kappa1, kappa2); // x[i] == 1/2 \forall i \in [1, N) fail += verifyAnswer(&x, half); @@ -1726,9 +1542,7 @@ class VectorTests : public TestBase upper_pattern.setToConstant(one); // Call should return true - fail += !x.projectIntoBounds_local( - lower, lower_pattern, upper, - upper_pattern, kappa1, kappa2); + fail += !x.projectIntoBounds_local(lower, lower_pattern, upper, upper_pattern, kappa1, kappa2); // x[i] == 1/2 \forall i \in [1, N) fail += verifyAnswer(&x, half); @@ -1743,9 +1557,7 @@ class VectorTests : public TestBase upper_pattern.setToConstant(one); // Call should return true - fail += !x.projectIntoBounds_local( - lower, lower_pattern, upper, - upper_pattern, kappa1, kappa2); + fail += !x.projectIntoBounds_local(lower, lower_pattern, upper, upper_pattern, kappa1, kappa2); // x[i] == -1/2 \forall i \in [1, N) fail += verifyAnswer(&x, -half); @@ -1770,10 +1582,7 @@ class VectorTests : public TestBase * return auxilary * @endverbatim */ - bool vectorFractionToTheBdry( - hiop::hiopVector& x, - hiop::hiopVector& dx, - const int rank) + bool vectorFractionToTheBdry(hiop::hiopVector& x, hiop::hiopVector& dx, const int rank) { const local_ordinal_type N = getLocalSize(&x); assert(N == getLocalSize(&dx)); @@ -1791,10 +1600,10 @@ class VectorTests : public TestBase // Test minumum finding for dx < 0 dx.setToConstant(-one); - setLocalElement(&dx, N-1, -two); + setLocalElement(&dx, N - 1, -two); result = x.fractionToTheBdry_local(dx, tau); - expected = quarter; // -0.5*1/(-2) + expected = quarter; // -0.5*1/(-2) fail += !isEqual(result, expected); printMessage(fail, __func__, rank); @@ -1806,11 +1615,10 @@ class VectorTests : public TestBase * Same as fractionToTheBdry, except that * no x[i] where pattern[i]==0 will be calculated */ - bool vectorFractionToTheBdry_w_pattern( - hiop::hiopVector& x, - hiop::hiopVector& dx, - hiop::hiopVector& pattern, - const int rank) + bool vectorFractionToTheBdry_w_pattern(hiop::hiopVector& x, + hiop::hiopVector& dx, + hiop::hiopVector& pattern, + const int rank) { const local_ordinal_type N = getLocalSize(&x); assert(N == getLocalSize(&dx)); @@ -1833,8 +1641,8 @@ class VectorTests : public TestBase // value of one pattern.setToConstant(one); dx.setToConstant(one); - setLocalElement(&pattern, N-1, zero); - setLocalElement(&dx, N-1, -half); + setLocalElement(&pattern, N - 1, zero); + setLocalElement(&dx, N - 1, -half); result = x.fractionToTheBdry_w_pattern_local(dx, tau, pattern); expected = one; // default value if dx >= 0 @@ -1843,10 +1651,10 @@ class VectorTests : public TestBase // Pattern all ones, dx will be <0 pattern.setToConstant(one); dx.setToConstant(-one); - setLocalElement(&dx, N-1, -two); + setLocalElement(&dx, N - 1, -two); result = x.fractionToTheBdry_w_pattern_local(dx, tau, pattern); - expected = quarter; // -0.5*1/(-2) + expected = quarter; // -0.5*1/(-2) fail += !isEqual(result, expected); printMessage(fail, __func__, rank); @@ -1857,10 +1665,7 @@ class VectorTests : public TestBase * @brief Test: * pattern != 0 \lor this == 0 */ - bool vectorMatchesPattern( - hiop::hiopVector& x, - hiop::hiopVector& pattern, - const int rank) + bool vectorMatchesPattern(hiop::hiopVector& x, hiop::hiopVector& pattern, const int rank) { const local_ordinal_type N = getLocalSize(&x); assert(N == getLocalSize(&pattern)); @@ -1868,17 +1673,17 @@ class VectorTests : public TestBase x.setToConstant(one); pattern.setToConstant(one); - if (!x.matchesPattern(pattern)) fail++; + if(!x.matchesPattern(pattern)) fail++; x.setToConstant(one); pattern.setToConstant(one); - if (rank == 0) setLocalElement(&pattern, N-1, 0); - if (x.matchesPattern(pattern)) fail++; + if(rank == 0) setLocalElement(&pattern, N - 1, 0); + if(x.matchesPattern(pattern)) fail++; x.setToConstant(one); pattern.setToConstant(one); - if (rank == 0) setLocalElement(&x, N-1, 0); - if (!x.matchesPattern(pattern)) fail++; + if(rank == 0) setLocalElement(&x, N - 1, 0); + if(!x.matchesPattern(pattern)) fail++; printMessage(fail, __func__, rank); return reduceReturn(fail, &x); @@ -1888,12 +1693,11 @@ class VectorTests : public TestBase * @brief Test that hiop correctly adjusts based on the * hessian of the duals function */ - bool vectorAdjustDuals_plh( - hiop::hiopVector& z1, - hiop::hiopVector& z2, - hiop::hiopVector& x, - hiop::hiopVector& pattern, - const int rank) + bool vectorAdjustDuals_plh(hiop::hiopVector& z1, + hiop::hiopVector& z2, + hiop::hiopVector& x, + hiop::hiopVector& pattern, + const int rank) { const local_ordinal_type N = getLocalSize(&z1); assert(N == getLocalSize(&z2)); @@ -1911,31 +1715,27 @@ class VectorTests : public TestBase static const real_type mu = half; static const real_type kappa = half; - z1.adjustDuals_plh( - x, - pattern, - mu, - kappa); + z1.adjustDuals_plh(x, pattern, mu, kappa); real_type a, b; - for (local_ordinal_type i=0; i(x->get_local_size()); - } + local_ordinal_type getLocalSize(const hiop::hiopVector* x) { return static_cast(x->get_local_size()); } /// Checks if _local_ vector elements are set to `answer`. int verifyAnswer(hiop::hiopVector* x, real_type answer) { const local_ordinal_type N = getLocalSize(x); const real_type* xdata = getLocalDataConst(x); - + int local_fail = 0; - for(local_ordinal_type i = 0; i < N; ++i) - { - if(!isEqual(xdata[i], answer)) - { + for(local_ordinal_type i = 0; i < N; ++i) { + if(!isEqual(xdata[i], answer)) { ++local_fail; } } @@ -2085,13 +1866,11 @@ class VectorTests : public TestBase { const local_ordinal_type N = getLocalSize(x); const real_type* xdata = getLocalDataConst(x); - + int local_fail = 0; - for(local_ordinal_type i = 0; i < N; ++i) - { - if(xdata[i] > max_val || xdata[i] max_val || xdata[i] < min_val) { std::cout << "minv: " << min_val << ", maxv: " << max_val << ", x:[" << i << "]:" << xdata[i] << std::endl; ++local_fail; } @@ -2105,19 +1884,15 @@ class VectorTests : public TestBase * \forall x in _local_ vector data at index i, * x == expect(i) */ - int verifyAnswer( - hiop::hiopVector* x, - std::function expect) + int verifyAnswer(hiop::hiopVector* x, std::function expect) { const local_ordinal_type N = getLocalSize(x); const real_type* xdata = getLocalDataConst(x); - + int local_fail = 0; - for(local_ordinal_type i = 0; i < N; ++i) - { - if(!isEqual(xdata[i], expect(i))) - { + for(local_ordinal_type i = 0; i < N; ++i) { + if(!isEqual(xdata[i], expect(i))) { ++local_fail; } } @@ -2135,4 +1910,5 @@ class VectorTests : public TestBase virtual bool reduceReturn(int failures, hiop::hiopVector* x) = 0; }; -}} // namespace hiop::tests +} // namespace tests +} // namespace hiop diff --git a/tests/LinAlg/vectorTestsCuda.cpp b/tests/LinAlg/vectorTestsCuda.cpp index 686fee6a6..0beb3853a 100644 --- a/tests/LinAlg/vectorTestsCuda.cpp +++ b/tests/LinAlg/vectorTestsCuda.cpp @@ -52,24 +52,24 @@ * @author Asher Mancinelli , PNNL * @author Slaven Peles , PNNL * @author Cameron Rutherford , PNNL - * @author Nai-Yuan Chiang , LLNL + * @author Nai-Yuan Chiang , LLNL * */ #include #include "vectorTestsCuda.hpp" -namespace hiop { namespace tests { +namespace hiop +{ +namespace tests +{ /// Returns const pointer to local vector data const real_type* VectorTestsCuda::getLocalDataConst(hiop::hiopVector* x_in) { - if(auto* x = dynamic_cast(x_in)) - { + if(auto* x = dynamic_cast(x_in)) { x->copyFromDev(); return x->local_data_host_const(); - } - else - { + } else { assert(false && "Wrong type of vector passed into `VectorTestsRajaPar::getLocalDataConst`!"); THROW_NULL_DEREF; } @@ -78,15 +78,12 @@ const real_type* VectorTestsCuda::getLocalDataConst(hiop::hiopVector* x_in) /// Method to set vector _x_ element _i_ to _value_. void VectorTestsCuda::setLocalElement(hiop::hiopVector* x_in, local_ordinal_type i, real_type val) { - if(auto* x = dynamic_cast(x_in)) - { + if(auto* x = dynamic_cast(x_in)) { x->copyFromDev(); - real_type *xdat = x->local_data_host(); + real_type* xdat = x->local_data_host(); xdat[i] = val; x->copyToDev(); - } - else - { + } else { assert(false && "Wrong type of vector passed into `vectorTestsCuda::setLocalElement`!"); THROW_NULL_DEREF; } @@ -95,12 +92,9 @@ void VectorTestsCuda::setLocalElement(hiop::hiopVector* x_in, local_ordinal_type /// Get communicator MPI_Comm VectorTestsCuda::getMPIComm(hiop::hiopVector* x) { - if(auto* xvec = dynamic_cast(x)) - { + if(auto* xvec = dynamic_cast(x)) { return xvec->get_mpi_comm(); - } - else - { + } else { assert(false && "Wrong type of vector passed into `vectorTestsCuda::getMPIComm`!"); THROW_NULL_DEREF; } @@ -113,17 +107,16 @@ real_type* VectorTestsCuda::createLocalBuffer(local_ordinal_type N, real_type va real_type* dev_buffer = nullptr; // Set buffer elements to the initial value - for(local_ordinal_type i = 0; i < N; ++i) - buffer[i] = val; + for(local_ordinal_type i = 0; i < N; ++i) buffer[i] = val; #ifdef HIOP_USE_GPU // Allocate memory on GPU - cudaError_t cuerr = cudaMalloc((void**)&dev_buffer, N*sizeof(real_type)); + cudaError_t cuerr = cudaMalloc((void**)&dev_buffer, N * sizeof(real_type)); assert(cudaSuccess == cuerr); - cuerr = cudaMemcpy(dev_buffer, buffer, N*sizeof(real_type), cudaMemcpyHostToDevice); + cuerr = cudaMemcpy(dev_buffer, buffer, N * sizeof(real_type), cudaMemcpyHostToDevice); assert(cuerr == cudaSuccess); - delete [] buffer; + delete[] buffer; return dev_buffer; #endif @@ -134,19 +127,18 @@ local_ordinal_type* VectorTestsCuda::createIdxBuffer(local_ordinal_type N, local { local_ordinal_type* buffer = new local_ordinal_type[N]; // Set buffer elements to the initial value - for(local_ordinal_type i = 0; i < N; ++i) - buffer[i] = val; - buffer[N-1] = 0; + for(local_ordinal_type i = 0; i < N; ++i) buffer[i] = val; + buffer[N - 1] = 0; #ifdef HIOP_USE_GPU // Allocate memory on GPU local_ordinal_type* dev_buffer = nullptr; - cudaError_t cuerr = cudaMalloc((void**)&dev_buffer, N*sizeof(local_ordinal_type)); + cudaError_t cuerr = cudaMalloc((void**)&dev_buffer, N * sizeof(local_ordinal_type)); assert(cudaSuccess == cuerr); - cuerr = cudaMemcpy(dev_buffer, buffer, N*sizeof(local_ordinal_type), cudaMemcpyHostToDevice); + cuerr = cudaMemcpy(dev_buffer, buffer, N * sizeof(local_ordinal_type), cudaMemcpyHostToDevice); assert(cuerr == cudaSuccess); - delete [] buffer; + delete[] buffer; return dev_buffer; #endif @@ -156,11 +148,11 @@ local_ordinal_type* VectorTestsCuda::createIdxBuffer(local_ordinal_type N, local /// Wrap delete command void VectorTestsCuda::deleteLocalBuffer(real_type* buffer) { - #ifdef HIOP_USE_GPU +#ifdef HIOP_USE_GPU cudaFree(buffer); - return ; - #endif - delete [] buffer; + return; +#endif + delete[] buffer; } /// If test fails on any rank set fail flag on all ranks @@ -177,5 +169,5 @@ bool VectorTestsCuda::reduceReturn(int failures, hiop::hiopVector* x) return (fail != 0); } - -}} // namespace hiop::tests +} // namespace tests +} // namespace hiop diff --git a/tests/LinAlg/vectorTestsCuda.hpp b/tests/LinAlg/vectorTestsCuda.hpp index 55f07a958..a535776c9 100644 --- a/tests/LinAlg/vectorTestsCuda.hpp +++ b/tests/LinAlg/vectorTestsCuda.hpp @@ -53,13 +53,16 @@ * @author Slaven Peles , PNNL * @author Cameron Rutherford , PNNL * @author Nai-Yuan Chiang , LLNL - * + * */ #pragma once #include "vectorTests.hpp" -namespace hiop { namespace tests { +namespace hiop +{ +namespace tests +{ /** * @brief Utilities for testing hiopVectorCuda class @@ -70,8 +73,8 @@ namespace hiop { namespace tests { class VectorTestsCuda : public VectorTests { public: - VectorTestsCuda(){} - virtual ~VectorTestsCuda(){} + VectorTestsCuda() {} + virtual ~VectorTestsCuda() {} private: virtual const real_type* getLocalDataConst(hiop::hiopVector* x); @@ -83,4 +86,5 @@ class VectorTestsCuda : public VectorTests MPI_Comm getMPIComm(hiop::hiopVector* x); }; -}} // namespace hiop::tests +} // namespace tests +} // namespace hiop diff --git a/tests/LinAlg/vectorTestsHip.cpp b/tests/LinAlg/vectorTestsHip.cpp index f0d587221..27ee8fd7f 100644 --- a/tests/LinAlg/vectorTestsHip.cpp +++ b/tests/LinAlg/vectorTestsHip.cpp @@ -49,24 +49,24 @@ /** * @file vectorTestsHip.cpp * - * @author Nai-Yuan Chiang , LLNL + * @author Nai-Yuan Chiang , LLNL * */ #include #include "vectorTestsHip.hpp" -namespace hiop { namespace tests { +namespace hiop +{ +namespace tests +{ /// Returns const pointer to local vector data const real_type* VectorTestsHip::getLocalDataConst(hiop::hiopVector* x_in) { - if(auto* x = dynamic_cast(x_in)) - { + if(auto* x = dynamic_cast(x_in)) { x->copyFromDev(); return x->local_data_host_const(); - } - else - { + } else { assert(false && "Wrong type of vector passed into `VectorTestsRajaPar::getLocalDataConst`!"); THROW_NULL_DEREF; } @@ -75,15 +75,12 @@ const real_type* VectorTestsHip::getLocalDataConst(hiop::hiopVector* x_in) /// Method to set vector _x_ element _i_ to _value_. void VectorTestsHip::setLocalElement(hiop::hiopVector* x_in, local_ordinal_type i, real_type val) { - if(auto* x = dynamic_cast(x_in)) - { + if(auto* x = dynamic_cast(x_in)) { x->copyFromDev(); - real_type *xdat = x->local_data_host(); + real_type* xdat = x->local_data_host(); xdat[i] = val; x->copyToDev(); - } - else - { + } else { assert(false && "Wrong type of vector passed into `vectorTestsHip::setLocalElement`!"); THROW_NULL_DEREF; } @@ -92,12 +89,9 @@ void VectorTestsHip::setLocalElement(hiop::hiopVector* x_in, local_ordinal_type /// Get communicator MPI_Comm VectorTestsHip::getMPIComm(hiop::hiopVector* x) { - if(auto* xvec = dynamic_cast(x)) - { + if(auto* xvec = dynamic_cast(x)) { return xvec->get_mpi_comm(); - } - else - { + } else { assert(false && "Wrong type of vector passed into `vectorTestsHip::getMPIComm`!"); THROW_NULL_DEREF; } @@ -110,17 +104,16 @@ real_type* VectorTestsHip::createLocalBuffer(local_ordinal_type N, real_type val real_type* dev_buffer = nullptr; // Set buffer elements to the initial value - for(local_ordinal_type i = 0; i < N; ++i) - buffer[i] = val; + for(local_ordinal_type i = 0; i < N; ++i) buffer[i] = val; #ifdef HIOP_USE_GPU // Allocate memory on GPU - hipError_t cuerr = hipMalloc((void**)&dev_buffer, N*sizeof(real_type)); + hipError_t cuerr = hipMalloc((void**)&dev_buffer, N * sizeof(real_type)); assert(hipSuccess == cuerr); - cuerr = hipMemcpy(dev_buffer, buffer, N*sizeof(real_type), hipMemcpyHostToDevice); + cuerr = hipMemcpy(dev_buffer, buffer, N * sizeof(real_type), hipMemcpyHostToDevice); assert(cuerr == hipSuccess); - delete [] buffer; + delete[] buffer; return dev_buffer; #endif @@ -131,19 +124,18 @@ local_ordinal_type* VectorTestsHip::createIdxBuffer(local_ordinal_type N, local_ { local_ordinal_type* buffer = new local_ordinal_type[N]; // Set buffer elements to the initial value - for(local_ordinal_type i = 0; i < N; ++i) - buffer[i] = val; - buffer[N-1] = 0; + for(local_ordinal_type i = 0; i < N; ++i) buffer[i] = val; + buffer[N - 1] = 0; #ifdef HIOP_USE_GPU // Allocate memory on GPU local_ordinal_type* dev_buffer = nullptr; - hipError_t cuerr = hipMalloc((void**)&dev_buffer, N*sizeof(local_ordinal_type)); + hipError_t cuerr = hipMalloc((void**)&dev_buffer, N * sizeof(local_ordinal_type)); assert(hipSuccess == cuerr); - cuerr = hipMemcpy(dev_buffer, buffer, N*sizeof(local_ordinal_type), hipMemcpyHostToDevice); + cuerr = hipMemcpy(dev_buffer, buffer, N * sizeof(local_ordinal_type), hipMemcpyHostToDevice); assert(cuerr == hipSuccess); - delete [] buffer; + delete[] buffer; return dev_buffer; #endif @@ -153,11 +145,11 @@ local_ordinal_type* VectorTestsHip::createIdxBuffer(local_ordinal_type N, local_ /// Wrap delete command void VectorTestsHip::deleteLocalBuffer(real_type* buffer) { - #ifdef HIOP_USE_GPU +#ifdef HIOP_USE_GPU hipFree(buffer); - return ; - #endif - delete [] buffer; + return; +#endif + delete[] buffer; } /// If test fails on any rank set fail flag on all ranks @@ -174,5 +166,5 @@ bool VectorTestsHip::reduceReturn(int failures, hiop::hiopVector* x) return (fail != 0); } - -}} // namespace hiop::tests +} // namespace tests +} // namespace hiop diff --git a/tests/LinAlg/vectorTestsHip.hpp b/tests/LinAlg/vectorTestsHip.hpp index 0d03d6821..9d6f3d0ce 100644 --- a/tests/LinAlg/vectorTestsHip.hpp +++ b/tests/LinAlg/vectorTestsHip.hpp @@ -50,13 +50,16 @@ * @file vectorTestsHip.hpp * * @author Nai-Yuan Chiang , LLNL - * + * */ #pragma once #include "vectorTests.hpp" -namespace hiop { namespace tests { +namespace hiop +{ +namespace tests +{ /** * @brief Utilities for testing hiopVectorHip class @@ -67,8 +70,8 @@ namespace hiop { namespace tests { class VectorTestsHip : public VectorTests { public: - VectorTestsHip(){} - virtual ~VectorTestsHip(){} + VectorTestsHip() {} + virtual ~VectorTestsHip() {} private: virtual const real_type* getLocalDataConst(hiop::hiopVector* x); @@ -80,4 +83,5 @@ class VectorTestsHip : public VectorTests MPI_Comm getMPIComm(hiop::hiopVector* x); }; -}} // namespace hiop::tests +} // namespace tests +} // namespace hiop diff --git a/tests/LinAlg/vectorTestsInt.hpp b/tests/LinAlg/vectorTestsInt.hpp index 00850abaa..27163178d 100644 --- a/tests/LinAlg/vectorTestsInt.hpp +++ b/tests/LinAlg/vectorTestsInt.hpp @@ -57,7 +57,10 @@ #include #include "testBase.hpp" -namespace hiop { namespace tests { +namespace hiop +{ +namespace tests +{ /** * @brief Collection of tests for abstract hiopVectorInt implementations. @@ -70,14 +73,13 @@ namespace hiop { namespace tests { class VectorTestsInt : public TestBase { public: - VectorTestsInt(){} - virtual ~VectorTestsInt(){} + VectorTestsInt() {} + virtual ~VectorTestsInt() {} virtual bool vectorSize(hiop::hiopVectorInt& x, const int size) const { int fail = 0; - if (x.get_local_size() != size) - fail++; + if(x.get_local_size() != size) fail++; printMessage(fail, __func__); return fail; } @@ -89,9 +91,9 @@ class VectorTestsInt : public TestBase virtual bool vectorSetElement(hiop::hiopVectorInt& x) const { int fail = 0; - const int idx = x.get_local_size()/2; + const int idx = x.get_local_size() / 2; const int x_val = 1; - for(int i=0; i -//TODO: this is a quick hack. Will need to modify this class to be aware of the instantiated -// vector template. Likely a better approach would be to revise the `runTests` in testVector.cpp -// to test multiple configurations of the memory backend and execution policies for RAJA vector. +// TODO: this is a quick hack. Will need to modify this class to be aware of the instantiated +// vector template. Likely a better approach would be to revise the `runTests` in testVector.cpp +// to test multiple configurations of the memory backend and execution policies for RAJA vector. #if defined(HIOP_USE_CUDA) #include using hiopVectorIntRajaT = hiop::hiopVectorIntRaja; @@ -67,12 +67,15 @@ using hiopVectorIntRajaT = hiop::hiopVectorIntRaja using hiopVectorIntRajaT = hiop::hiopVectorIntRaja; #else -//#if !defined(HIOP_USE_CUDA) && !defined(HIOP_USE_HIP) +// #if !defined(HIOP_USE_CUDA) && !defined(HIOP_USE_HIP) #include using hiopVectorIntRajaT = hiop::hiopVectorIntRaja; #endif -namespace hiop { namespace tests { +namespace hiop +{ +namespace tests +{ int VectorTestsIntRaja::getLocalElement(hiop::hiopVectorInt* xvec, int idx) const { @@ -106,4 +109,5 @@ void VectorTestsIntRaja::setLocalElement(hiop::hiopVectorInt* xvec, int value) c } } -}} // namespace hiop::tests +} // namespace tests +} // namespace hiop diff --git a/tests/LinAlg/vectorTestsIntRaja.hpp b/tests/LinAlg/vectorTestsIntRaja.hpp index 3f4540ae4..757738efa 100644 --- a/tests/LinAlg/vectorTestsIntRaja.hpp +++ b/tests/LinAlg/vectorTestsIntRaja.hpp @@ -57,7 +57,10 @@ #include #include "vectorTestsInt.hpp" -namespace hiop { namespace tests { +namespace hiop +{ +namespace tests +{ /** * @brief Collection of helper methods for hiopVectorIntRaja @@ -68,12 +71,14 @@ namespace hiop { namespace tests { class VectorTestsIntRaja : public VectorTestsInt { public: - VectorTestsIntRaja(){} - virtual ~VectorTestsIntRaja(){} + VectorTestsIntRaja() {} + virtual ~VectorTestsIntRaja() {} + private: virtual int getLocalElement(hiop::hiopVectorInt*, int) const; virtual void setLocalElement(hiop::hiopVectorInt*, int, int) const; virtual void setLocalElement(hiop::hiopVectorInt*, int) const; }; -}} // namespace hiop::tests +} // namespace tests +} // namespace hiop diff --git a/tests/LinAlg/vectorTestsIntSeq.cpp b/tests/LinAlg/vectorTestsIntSeq.cpp index a5e8f5099..bc36b1099 100644 --- a/tests/LinAlg/vectorTestsIntSeq.cpp +++ b/tests/LinAlg/vectorTestsIntSeq.cpp @@ -57,7 +57,10 @@ #include "vectorTestsIntSeq.hpp" #include -namespace hiop { namespace tests { +namespace hiop +{ +namespace tests +{ int VectorTestsIntSeq::getLocalElement(hiop::hiopVectorInt* xvec, int idx) const { @@ -87,4 +90,5 @@ void VectorTestsIntSeq::setLocalElement(hiop::hiopVectorInt* xvec, int value) co } } -}} // namespace hiop::tests +} // namespace tests +} // namespace hiop diff --git a/tests/LinAlg/vectorTestsIntSeq.hpp b/tests/LinAlg/vectorTestsIntSeq.hpp index 8e57563e1..5bae04815 100644 --- a/tests/LinAlg/vectorTestsIntSeq.hpp +++ b/tests/LinAlg/vectorTestsIntSeq.hpp @@ -57,7 +57,10 @@ #include #include "vectorTestsInt.hpp" -namespace hiop { namespace tests { +namespace hiop +{ +namespace tests +{ /** * @brief Collection of helper methods for hiopVectorIntSeq @@ -68,8 +71,8 @@ namespace hiop { namespace tests { class VectorTestsIntSeq : public VectorTestsInt { public: - VectorTestsIntSeq(){} - virtual ~VectorTestsIntSeq(){} + VectorTestsIntSeq() {} + virtual ~VectorTestsIntSeq() {} private: virtual int getLocalElement(hiop::hiopVectorInt*, int) const; @@ -77,4 +80,5 @@ class VectorTestsIntSeq : public VectorTestsInt virtual void setLocalElement(hiop::hiopVectorInt*, int) const; }; -}} // namespace hiop::tests +} // namespace tests +} // namespace hiop diff --git a/tests/LinAlg/vectorTestsPar.cpp b/tests/LinAlg/vectorTestsPar.cpp index 1755c60b8..81c2b0d17 100644 --- a/tests/LinAlg/vectorTestsPar.cpp +++ b/tests/LinAlg/vectorTestsPar.cpp @@ -57,17 +57,17 @@ #include #include "vectorTestsPar.hpp" -namespace hiop { namespace tests { +namespace hiop +{ +namespace tests +{ /// Returns const pointer to local vector data const real_type* VectorTestsPar::getLocalDataConst(hiop::hiopVector* x) { - if(auto* xvec = dynamic_cast(x)) - { + if(auto* xvec = dynamic_cast(x)) { return xvec->local_data_const(); - } - else - { + } else { assert(false && "Wrong type of vector passed into `VectorTestsPar::getLocalDataConst`!"); THROW_NULL_DEREF; } @@ -76,13 +76,10 @@ const real_type* VectorTestsPar::getLocalDataConst(hiop::hiopVector* x) /// Method to set vector _x_ element _i_ to _value_. void VectorTestsPar::setLocalElement(hiop::hiopVector* x, local_ordinal_type i, real_type val) { - if(auto* xvec = dynamic_cast(x)) - { - real_type *xdat = xvec->local_data(); + if(auto* xvec = dynamic_cast(x)) { + real_type* xdat = xvec->local_data(); xdat[i] = val; - } - else - { + } else { assert(false && "Wrong type of vector passed into `VectorTestsPar::setLocalElement`!"); THROW_NULL_DEREF; } @@ -91,12 +88,9 @@ void VectorTestsPar::setLocalElement(hiop::hiopVector* x, local_ordinal_type i, /// Get communicator MPI_Comm VectorTestsPar::getMPIComm(hiop::hiopVector* x) { - if(auto* xvec = dynamic_cast(x)) - { + if(auto* xvec = dynamic_cast(x)) { return xvec->get_mpi_comm(); - } - else - { + } else { assert(false && "Wrong type of vector passed into `VectorTestsPar::getMPIComm`!"); THROW_NULL_DEREF; } @@ -106,25 +100,20 @@ MPI_Comm VectorTestsPar::getMPIComm(hiop::hiopVector* x) real_type* VectorTestsPar::createLocalBuffer(local_ordinal_type N, real_type val) { real_type* buffer = new real_type[N]; - for(local_ordinal_type i = 0; i < N; ++i) - buffer[i] = val; + for(local_ordinal_type i = 0; i < N; ++i) buffer[i] = val; return buffer; } local_ordinal_type* VectorTestsPar::createIdxBuffer(local_ordinal_type N, local_ordinal_type val) { local_ordinal_type* buffer = new local_ordinal_type[N]; - for(local_ordinal_type i = 0; i < N; ++i) - buffer[i] = val; - buffer[N-1] = 0; + for(local_ordinal_type i = 0; i < N; ++i) buffer[i] = val; + buffer[N - 1] = 0; return buffer; } /// Wrap delete command -void VectorTestsPar::deleteLocalBuffer(real_type* buffer) -{ - delete [] buffer; -} +void VectorTestsPar::deleteLocalBuffer(real_type* buffer) { delete[] buffer; } /// If test fails on any rank set fail flag on all ranks bool VectorTestsPar::reduceReturn(int failures, hiop::hiopVector* x) @@ -140,5 +129,5 @@ bool VectorTestsPar::reduceReturn(int failures, hiop::hiopVector* x) return (fail != 0); } - -}} // namespace hiop::tests +} // namespace tests +} // namespace hiop diff --git a/tests/LinAlg/vectorTestsPar.hpp b/tests/LinAlg/vectorTestsPar.hpp index 6d698e713..739ea549f 100644 --- a/tests/LinAlg/vectorTestsPar.hpp +++ b/tests/LinAlg/vectorTestsPar.hpp @@ -58,7 +58,10 @@ #include "vectorTests.hpp" -namespace hiop { namespace tests { +namespace hiop +{ +namespace tests +{ /** * @brief Utilities for testing hiopVectorPar class @@ -69,8 +72,8 @@ namespace hiop { namespace tests { class VectorTestsPar : public VectorTests { public: - VectorTestsPar(){} - virtual ~VectorTestsPar(){} + VectorTestsPar() {} + virtual ~VectorTestsPar() {} private: virtual const real_type* getLocalDataConst(hiop::hiopVector* x); @@ -82,4 +85,5 @@ class VectorTestsPar : public VectorTests MPI_Comm getMPIComm(hiop::hiopVector* x); }; -}} // namespace hiop::tests +} // namespace tests +} // namespace hiop diff --git a/tests/LinAlg/vectorTestsRajaPar.cpp b/tests/LinAlg/vectorTestsRajaPar.cpp index 537b563d6..cf012e4fc 100644 --- a/tests/LinAlg/vectorTestsRajaPar.cpp +++ b/tests/LinAlg/vectorTestsRajaPar.cpp @@ -62,9 +62,9 @@ #include #include "vectorTestsRajaPar.hpp" -//TODO: this is a quick hack. Will need to modify this class to be aware of the instantiated -// vector template. Likely a better approach would be to revise the `runTests` in testVector.cpp -// to test multiple configurations of the memory backend and execution policies for RAJA vector. +// TODO: this is a quick hack. Will need to modify this class to be aware of the instantiated +// vector template. Likely a better approach would be to revise the `runTests` in testVector.cpp +// to test multiple configurations of the memory backend and execution policies for RAJA vector. #if defined(HIOP_USE_CUDA) #include using hiopVectorRajaT = hiop::hiopVectorRaja; @@ -72,25 +72,23 @@ using hiopVectorRajaT = hiop::hiopVectorRaja using hiopVectorRajaT = hiop::hiopVectorRaja; #else -//#if !defined(HIOP_USE_CUDA) && !defined(HIOP_USE_HIP) +// #if !defined(HIOP_USE_CUDA) && !defined(HIOP_USE_HIP) #include using hiopVectorRajaT = hiop::hiopVectorRaja; #endif +namespace hiop +{ +namespace tests +{ -namespace hiop { namespace tests { - - /// Returns const pointer to local vector data const real_type* VectorTestsRajaPar::getLocalDataConst(hiop::hiopVector* x_in) { - if(auto* x = dynamic_cast(x_in)) - { + if(auto* x = dynamic_cast(x_in)) { x->copyFromDev(); return x->local_data_host_const(); - } - else - { + } else { assert(false && "Wrong type of vector passed into `VectorTestsRajaPar::getLocalDataConst`!"); THROW_NULL_DEREF; } @@ -99,15 +97,12 @@ const real_type* VectorTestsRajaPar::getLocalDataConst(hiop::hiopVector* x_in) /// Method to set vector _x_ element _i_ to _value_. void VectorTestsRajaPar::setLocalElement(hiop::hiopVector* x_in, local_ordinal_type i, real_type val) { - if(auto* x = dynamic_cast(x_in)) - { + if(auto* x = dynamic_cast(x_in)) { x->copyFromDev(); - real_type *xdat = x->local_data_host(); + real_type* xdat = x->local_data_host(); xdat[i] = val; x->copyToDev(); - } - else - { + } else { assert(false && "Wrong type of vector passed into `VectorTestsRajaPar::setLocalElement`!"); THROW_NULL_DEREF; } @@ -116,12 +111,9 @@ void VectorTestsRajaPar::setLocalElement(hiop::hiopVector* x_in, local_ordinal_t /// Get communicator MPI_Comm VectorTestsRajaPar::getMPIComm(hiop::hiopVector* x) { - if(auto* xvec = dynamic_cast(x)) - { + if(auto* xvec = dynamic_cast(x)) { return xvec->get_mpi_comm(); - } - else - { + } else { assert(false && "Wrong type of vector passed into `VectorTestsRajaPar::getMPIComm`!"); THROW_NULL_DEREF; } @@ -132,16 +124,15 @@ real_type* VectorTestsRajaPar::createLocalBuffer(local_ordinal_type N, real_type { auto& resmgr = umpire::ResourceManager::getInstance(); umpire::Allocator hal = resmgr.getAllocator("HOST"); - real_type* buffer = static_cast(hal.allocate(N*sizeof(real_type))); + real_type* buffer = static_cast(hal.allocate(N * sizeof(real_type))); // Set buffer elements to the initial value - for(local_ordinal_type i = 0; i < N; ++i) - buffer[i] = val; + for(local_ordinal_type i = 0; i < N; ++i) buffer[i] = val; #ifdef HIOP_USE_GPU umpire::Allocator dal = resmgr.getAllocator("DEVICE"); - real_type* dev_buffer = static_cast(dal.allocate(N*sizeof(real_type))); - resmgr.copy(dev_buffer, buffer, N*sizeof(real_type)); + real_type* dev_buffer = static_cast(dal.allocate(N * sizeof(real_type))); + resmgr.copy(dev_buffer, buffer, N * sizeof(real_type)); hal.deallocate(buffer); return dev_buffer; #endif @@ -153,18 +144,15 @@ local_ordinal_type* VectorTestsRajaPar::createIdxBuffer(local_ordinal_type N, lo { auto& resmgr = umpire::ResourceManager::getInstance(); umpire::Allocator hal = resmgr.getAllocator("HOST"); - local_ordinal_type* buffer = static_cast( - hal.allocate(N*sizeof(local_ordinal_type))); + local_ordinal_type* buffer = static_cast(hal.allocate(N * sizeof(local_ordinal_type))); // Set buffer elements to the initial value - for(local_ordinal_type i = 0; i < N; ++i) - buffer[i] = val; - buffer[N-1] = 0; + for(local_ordinal_type i = 0; i < N; ++i) buffer[i] = val; + buffer[N - 1] = 0; #ifdef HIOP_USE_GPU umpire::Allocator dal = resmgr.getAllocator("DEVICE"); - local_ordinal_type* dev_buffer = static_cast( - dal.allocate(N*sizeof(local_ordinal_type))); - resmgr.copy(dev_buffer, buffer, N*sizeof(local_ordinal_type)); + local_ordinal_type* dev_buffer = static_cast(dal.allocate(N * sizeof(local_ordinal_type))); + resmgr.copy(dev_buffer, buffer, N * sizeof(local_ordinal_type)); hal.deallocate(buffer); return dev_buffer; #endif @@ -172,14 +160,13 @@ local_ordinal_type* VectorTestsRajaPar::createIdxBuffer(local_ordinal_type N, lo return buffer; } - /// Wrap delete command void VectorTestsRajaPar::deleteLocalBuffer(real_type* buffer) { #ifdef HIOP_USE_GPU const std::string hiop_umpire_dev = "DEVICE"; #else - const std::string hiop_umpire_dev = "HOST"; + const std::string hiop_umpire_dev = "HOST"; #endif auto& resmgr = umpire::ResourceManager::getInstance(); @@ -201,4 +188,5 @@ bool VectorTestsRajaPar::reduceReturn(int failures, hiop::hiopVector* x) return (fail != 0); } -}} // namespace hiop::tests +} // namespace tests +} // namespace hiop diff --git a/tests/LinAlg/vectorTestsRajaPar.hpp b/tests/LinAlg/vectorTestsRajaPar.hpp index f544a478d..b1765bfd9 100644 --- a/tests/LinAlg/vectorTestsRajaPar.hpp +++ b/tests/LinAlg/vectorTestsRajaPar.hpp @@ -72,8 +72,8 @@ namespace tests class VectorTestsRajaPar : public VectorTests { public: - VectorTestsRajaPar(){} - virtual ~VectorTestsRajaPar(){} + VectorTestsRajaPar() {} + virtual ~VectorTestsRajaPar() {} private: virtual const real_type* getLocalDataConst(hiop::hiopVector* x); @@ -85,4 +85,5 @@ class VectorTestsRajaPar : public VectorTests MPI_Comm getMPIComm(hiop::hiopVector* x); }; -}} // namespace hiopTest +} // namespace tests +} // namespace hiop diff --git a/tests/testMatrixDense.cpp b/tests/testMatrixDense.cpp index 328ab401b..c92d42430 100644 --- a/tests/testMatrixDense.cpp +++ b/tests/testMatrixDense.cpp @@ -69,7 +69,7 @@ #include "LinAlg/matrixTestsRajaDense.hpp" #endif -template +template static int runTests(const char* mem_space, MPI_Comm comm); int main(int argc, char** argv) @@ -82,55 +82,47 @@ int main(int argc, char** argv) #ifdef HIOP_USE_MPI int err; - err = MPI_Init(&argc, &argv); assert(MPI_SUCCESS==err); + err = MPI_Init(&argc, &argv); + assert(MPI_SUCCESS == err); comm = MPI_COMM_WORLD; - err = MPI_Comm_rank(comm,&rank); assert(MPI_SUCCESS==err); - if(0 == rank && MPI_SUCCESS == err) - std::cout << "\nRunning MPI enabled tests ...\n"; + err = MPI_Comm_rank(comm, &rank); + assert(MPI_SUCCESS == err); + if(0 == rank && MPI_SUCCESS == err) std::cout << "\nRunning MPI enabled tests ...\n"; #endif - if(rank == 0 && argc > 1) - std::cout << "Executable " << argv[0] << " doesn't take any input."; + if(rank == 0 && argc > 1) std::cout << "Executable " << argv[0] << " doesn't take any input."; int fail = 0; // // Test HiOp Dense Matrices // - if (rank == 0) - std::cout << "\nTesting HiOp default dense matrix implementation:\n"; + if(rank == 0) std::cout << "\nTesting HiOp default dense matrix implementation:\n"; fail += runTests("default", comm); #ifdef HIOP_USE_RAJA #ifdef HIOP_USE_GPU - if (rank == 0) - { + if(rank == 0) { std::cout << "\nTesting HiOp RAJA dense matrix implementation ...\n"; std::cout << " ... using device memory space:\n"; } fail += runTests("device", comm); - if (rank == 0) - { + if(rank == 0) { std::cout << "\nTesting HiOp RAJA dense matrix implementation ...\n"; std::cout << " ... using unified virtual memory space:\n"; } fail += runTests("um", comm); #else - if (rank == 0) - { + if(rank == 0) { std::cout << "\nTesting HiOp RAJA dense matrix implementation ...\n"; std::cout << " ... using unified host memory space:\n"; } fail += runTests("host", comm); -#endif // GPU -#endif // RAJA +#endif // GPU +#endif // RAJA - if (rank == 0) - { - if(fail) - { + if(rank == 0) { + if(fail) { std::cout << "\n" << fail << " dense matrix tests failed\n\n"; - } - else - { + } else { std::cout << "\nAll dense matrix tests passed\n\n"; } } @@ -143,14 +135,14 @@ int main(int argc, char** argv) } /// Driver for all dense matrix tests -template +template static int runTests(const char* mem_space, MPI_Comm comm) { using namespace hiop; using hiop::tests::global_ordinal_type; - int rank=0; - int numRanks=1; + int rank = 0; + int numRanks = 1; #ifdef HIOP_USE_MPI MPI_Comm_rank(comm, &rank); @@ -159,9 +151,9 @@ static int runTests(const char* mem_space, MPI_Comm comm) T test; test.set_mem_space(mem_space); - //hiopOptions options; - //options.SetStringValue("mem_space", mem_space); - //LinearAlgebraFactory::set_mem_space(mem_space); + // hiopOptions options; + // options.SetStringValue("mem_space", mem_space); + // LinearAlgebraFactory::set_mem_space(mem_space); int fail = 0; @@ -176,35 +168,28 @@ static int runTests(const char* mem_space, MPI_Comm comm) global_ordinal_type K_global = K_local * numRanks; global_ordinal_type N_global = N_local * numRanks; - auto n_partition = new global_ordinal_type[numRanks+1]; - auto k_partition = new global_ordinal_type[numRanks+1]; - auto m_partition = new global_ordinal_type[numRanks+1]; + auto n_partition = new global_ordinal_type[numRanks + 1]; + auto k_partition = new global_ordinal_type[numRanks + 1]; + auto m_partition = new global_ordinal_type[numRanks + 1]; n_partition[0] = 0; k_partition[0] = 0; m_partition[0] = 0; - for(int i = 1; i < numRanks + 1; ++i) - { - n_partition[i] = i*N_local; - k_partition[i] = i*K_local; - m_partition[i] = i*M_local; + for(int i = 1; i < numRanks + 1; ++i) { + n_partition[i] = i * N_local; + k_partition[i] = i * K_local; + m_partition[i] = i * M_local; } // Distributed matrices: - hiopMatrixDense* A_kxm = - LinearAlgebraFactory::create_matrix_dense(mem_space, K_local, M_global, m_partition, comm); - hiopMatrixDense* A_kxn = - LinearAlgebraFactory::create_matrix_dense(mem_space, K_local, N_global, n_partition, comm); - hiopMatrixDense* A_mxk = - LinearAlgebraFactory::create_matrix_dense(mem_space, M_local, K_global, k_partition, comm); - hiopMatrixDense* A_mxn = - LinearAlgebraFactory::create_matrix_dense(mem_space, M_local, N_global, n_partition, comm); - hiopMatrixDense* A_nxm = - LinearAlgebraFactory::create_matrix_dense(mem_space, N_local, M_global, m_partition, comm); - hiopMatrixDense* B_mxn = - LinearAlgebraFactory::create_matrix_dense(mem_space, M_local, N_global, n_partition, comm); + hiopMatrixDense* A_kxm = LinearAlgebraFactory::create_matrix_dense(mem_space, K_local, M_global, m_partition, comm); + hiopMatrixDense* A_kxn = LinearAlgebraFactory::create_matrix_dense(mem_space, K_local, N_global, n_partition, comm); + hiopMatrixDense* A_mxk = LinearAlgebraFactory::create_matrix_dense(mem_space, M_local, K_global, k_partition, comm); + hiopMatrixDense* A_mxn = LinearAlgebraFactory::create_matrix_dense(mem_space, M_local, N_global, n_partition, comm); + hiopMatrixDense* A_nxm = LinearAlgebraFactory::create_matrix_dense(mem_space, N_local, M_global, m_partition, comm); + hiopMatrixDense* B_mxn = LinearAlgebraFactory::create_matrix_dense(mem_space, M_local, N_global, n_partition, comm); hiopMatrixDense* A_mxn_extra_row = - LinearAlgebraFactory::create_matrix_dense(mem_space, M_local, N_global, n_partition, comm, M_local+1); + LinearAlgebraFactory::create_matrix_dense(mem_space, M_local, N_global, n_partition, comm, M_local + 1); // Non-distributed matrices: hiopMatrixDense* A_mxk_nodist = LinearAlgebraFactory::create_matrix_dense(mem_space, M_local, K_local); @@ -224,8 +209,8 @@ static int runTests(const char* mem_space, MPI_Comm comm) // Non-distributed vectors hiopVector* x_n_nodist = LinearAlgebraFactory::create_vector(mem_space, N_local); hiopVector* x_m_nodist = LinearAlgebraFactory::create_vector(mem_space, M_local); - - //indexes vectors + + // indexes vectors hiopVectorInt* rows_idxs = LinearAlgebraFactory::create_vector_int(mem_space, M_local); fail += test.matrixSetToZero(*A_mxn, rank); @@ -233,8 +218,7 @@ static int runTests(const char* mem_space, MPI_Comm comm) fail += test.matrixTimesVec(*A_mxn, *x_m_nodist, *x_n, rank); fail += test.matrixTransTimesVec(*A_mxn, *x_m_nodist, *x_n, rank); - if(rank == 0) - { + if(rank == 0) { // These methods are local fail += test.matrixTimesMat(*A_mxk_nodist, *A_kxn_nodist, *A_mxn_nodist); fail += test.matrixAddDiagonal(*A_nxn_nodist, *x_n_nodist); @@ -250,7 +234,7 @@ static int runTests(const char* mem_space, MPI_Comm comm) // Not part of hiopMatrix interface, specific to matrixTestsDenseRowMajor fail += test.matrixCopyBlockFromMatrix(*A_mxm_nodist, *A_kxn_nodist); fail += test.matrixCopyFromMatrixBlock(*A_kxn_nodist, *A_mxm_nodist); - + fail += test.matrix_set_Hess_FR(*A_nxn_nodist, *B_nxn_nodist, *x_n_nodist); } @@ -261,7 +245,7 @@ static int runTests(const char* mem_space, MPI_Comm comm) fail += test.matrix_row_max_abs_value(*A_mxn, *x_m_nodist, rank); fail += test.matrix_scale_row(*A_mxn, *x_m_nodist, rank); fail += test.matrixIsFinite(*A_mxn, rank); - fail += test.matrixNumRows(*A_mxn, M_local, rank); //<- no row partitioning + fail += test.matrixNumRows(*A_mxn, M_local, rank); //<- no row partitioning fail += test.matrixNumCols(*A_mxn, N_global, rank); // specific to matrixTestsDenseRowMajor diff --git a/tests/testMatrixSparse.cpp b/tests/testMatrixSparse.cpp index d0e9d4935..4159d693e 100644 --- a/tests/testMatrixSparse.cpp +++ b/tests/testMatrixSparse.cpp @@ -53,7 +53,7 @@ * @author Slaven Peles , PNNL * @author Cameron Rutherford , PNNL * @author Jake K. Ryan , PNNL - * + * */ #include #include @@ -83,7 +83,7 @@ int main(int argc, char** argv) hiop::hiopOptionsNLP options; local_ordinal_type M_local = 5; - local_ordinal_type N_local = 10*M_local; + local_ordinal_type N_local = 10 * M_local; // Sparse matrix is not distributed global_ordinal_type M_global = M_local; @@ -97,21 +97,18 @@ int main(int argc, char** argv) std::cout << "\nTesting hiopMatrixSparseTriplet\n"; hiop::tests::MatrixTestsSparseTriplet test; test.set_mem_space(mem_space); - + // Establishing sparsity pattern and initializing Matrix local_ordinal_type entries_per_row = 5; local_ordinal_type nnz = M_local * entries_per_row; - hiop::hiopMatrixSparse* mxn_sparse = - hiop::LinearAlgebraFactory::create_matrix_sparse(mem_space, M_local, N_local, nnz); + hiop::hiopMatrixSparse* mxn_sparse = hiop::LinearAlgebraFactory::create_matrix_sparse(mem_space, M_local, N_local, nnz); test.initializeMatrix(mxn_sparse, entries_per_row); - hiop::hiopMatrixSparse* mxn_empty = - hiop::LinearAlgebraFactory::create_matrix_sparse(mem_space, M_local, N_global, 0); + hiop::hiopMatrixSparse* mxn_empty = hiop::LinearAlgebraFactory::create_matrix_sparse(mem_space, M_local, N_global, 0); test.initializeMatrix(mxn_empty, 0); - hiop::hiopMatrixSparse* nullxn_sparse = - hiop::LinearAlgebraFactory::create_matrix_sparse(mem_space, 0, N_global, 0); + hiop::hiopMatrixSparse* nullxn_sparse = hiop::LinearAlgebraFactory::create_matrix_sparse(mem_space, 0, N_global, 0); test.initializeMatrix(nullxn_sparse, 0); hiop::hiopVectorPar vec_m(M_global); @@ -128,7 +125,7 @@ int main(int argc, char** argv) fail += test.matrix_row_max_abs_value(*mxn_sparse, vec_m); fail += test.matrix_scale_row(*mxn_sparse, vec_m); fail += test.matrixIsFinite(*mxn_sparse); - + // Need a dense matrix to store the output of the following tests global_ordinal_type W_delta = M_global * 10; hiop::hiopMatrixDenseRowMajor W_dense(N_global + W_delta, N_global + W_delta); @@ -136,22 +133,21 @@ int main(int argc, char** argv) // local_ordinal_type test_offset = 10; local_ordinal_type test_offset = 4; fail += test.matrixAddMDinvMtransToDiagBlockOfSymDeMatUTri(*mxn_sparse, vec_n, W_dense, test_offset); - + // Need a dense matrix that is big enough for the sparse matrix to map inside the upper triangular part of it - //hiop::hiopMatrixDenseRowMajor n2xn2_dense(2 * N_global, 2 * N_global); - //fail += test.addToSymDenseMatrixUpperTriangle(W_dense, *mxn_sparse); + // hiop::hiopMatrixDenseRowMajor n2xn2_dense(2 * N_global, 2 * N_global); + // fail += test.addToSymDenseMatrixUpperTriangle(W_dense, *mxn_sparse); fail += test.transAddToSymDenseMatrixUpperTriangle(W_dense, *mxn_sparse); // Initialise another sparse Matrix local_ordinal_type M2 = M_global * 2; local_ordinal_type nnz2 = M2 * (entries_per_row); - hiop::hiopMatrixSparse* m2xn_sparse = - hiop::LinearAlgebraFactory::create_matrix_sparse(mem_space, M2, N_global, nnz2); + hiop::hiopMatrixSparse* m2xn_sparse = hiop::LinearAlgebraFactory::create_matrix_sparse(mem_space, M2, N_global, nnz2); test.initializeMatrix(m2xn_sparse, entries_per_row); hiop::hiopMatrixDenseRowMajor mxm2_dense(M_global, M2); - + // Set offsets where to insert sparse matrix local_ordinal_type i_offset = 1; local_ordinal_type j_offset = M2 + 1; @@ -162,57 +158,83 @@ int main(int argc, char** argv) // copy sparse matrix to a dense matrix hiop::hiopMatrixDenseRowMajor mxn_dense(M_global, N_global); fail += test.matrix_copy_to(mxn_dense, *mxn_sparse); - + // extend a sparse matrix [C;D] to [C -I I 0 0; D 0 0 -I I] - hiop::hiopMatrixDenseRowMajor m3xn3_dense(M_global+M2, N_global+2*(M_global+M2)); - local_ordinal_type nnz3 = nnz + nnz2 + 2*M_global + 2*M2; - hiop::hiopMatrixSparse* m3xn3_sparse = - hiop::LinearAlgebraFactory::create_matrix_sparse(mem_space, M_global+M2, N_global+2*(M_global+M2), nnz3); + hiop::hiopMatrixDenseRowMajor m3xn3_dense(M_global + M2, N_global + 2 * (M_global + M2)); + local_ordinal_type nnz3 = nnz + nnz2 + 2 * M_global + 2 * M2; + hiop::hiopMatrixSparse* m3xn3_sparse = + hiop::LinearAlgebraFactory::create_matrix_sparse(mem_space, M_global + M2, N_global + 2 * (M_global + M2), nnz3); fail += test.matrix_set_Jac_FR(m3xn3_dense, *m3xn3_sparse, *mxn_sparse, *m2xn_sparse); // functions used to build large sparse matrix from small pieces fail += test.matrix_copy_subdiagonal_from(m3xn3_dense, *m3xn3_sparse, vec_m); fail += test.matrix_set_subdiagonal_to(m3xn3_dense, *m3xn3_sparse); - + hiop::hiopVectorIntSeq select(M_local); fail += test.matrix_copy_rows_from(*mxn_sparse, *m2xn_sparse, select); // copy the 1st row of mxn_sparse to the last row in m2xn_sparse // replace the nonzero index from "nnz-entries_per_row" - fail += test.copy_rows_block_from(*mxn_sparse, *m2xn_sparse,0, 1, M_global-1, mxn_sparse->numberOfNonzeros()-entries_per_row); - fail += test.copy_rows_block_from(*mxn_empty, *m2xn_sparse,0, 1, M_global-1, mxn_sparse->numberOfNonzeros()); - fail += test.copy_rows_block_from(*nullxn_sparse, *m2xn_sparse,0, 0, M_global-1, mxn_sparse->numberOfNonzeros()); - fail += test.copy_rows_block_from(*nullxn_sparse, *nullxn_sparse,0, 0, 0, nullxn_sparse->numberOfNonzeros()); - + fail += test.copy_rows_block_from(*mxn_sparse, + *m2xn_sparse, + 0, + 1, + M_global - 1, + mxn_sparse->numberOfNonzeros() - entries_per_row); + fail += test.copy_rows_block_from(*mxn_empty, *m2xn_sparse, 0, 1, M_global - 1, mxn_sparse->numberOfNonzeros()); + fail += test.copy_rows_block_from(*nullxn_sparse, *m2xn_sparse, 0, 0, M_global - 1, mxn_sparse->numberOfNonzeros()); + fail += test.copy_rows_block_from(*nullxn_sparse, *nullxn_sparse, 0, 0, 0, nullxn_sparse->numberOfNonzeros()); + // create a bigger matrix, to test copy_submatrix_from and opy_submatrix_from_trans - hiop::hiopMatrixDenseRowMajor m4xn4_dense(2*M_global+N_global, 2*M_global+N_global); - local_ordinal_type nnz4 = entries_per_row*(2*M_global+N_global); - assert(nnz4 < (2*M_global+N_global)*(2*M_global+N_global)); - hiop::hiopMatrixSparse* m4xn4_sparse = - hiop::LinearAlgebraFactory::create_matrix_sparse(mem_space, 2*M_global+N_global, 2*M_global+N_global, nnz4); + hiop::hiopMatrixDenseRowMajor m4xn4_dense(2 * M_global + N_global, 2 * M_global + N_global); + local_ordinal_type nnz4 = entries_per_row * (2 * M_global + N_global); + assert(nnz4 < (2 * M_global + N_global) * (2 * M_global + N_global)); + hiop::hiopMatrixSparse* m4xn4_sparse = + hiop::LinearAlgebraFactory::create_matrix_sparse(mem_space, 2 * M_global + N_global, 2 * M_global + N_global, nnz4); test.initializeMatrix(m4xn4_sparse, entries_per_row); - fail += test.matrix_copy_submatrix_from(m4xn4_dense, *m4xn4_sparse, *mxn_sparse, M_global, 2*M_global, nnz4-nnz); - + fail += test.matrix_copy_submatrix_from(m4xn4_dense, *m4xn4_sparse, *mxn_sparse, M_global, 2 * M_global, nnz4 - nnz); + // reset the sparsity, since previous function may change the sparsity test.initializeMatrix(m4xn4_sparse, entries_per_row); - fail += test.matrix_copy_submatrix_from_trans(m4xn4_dense, *m4xn4_sparse, *mxn_sparse, M_global, 2*(M_global), nnz4-nnz); + fail += + test.matrix_copy_submatrix_from_trans(m4xn4_dense, *m4xn4_sparse, *mxn_sparse, M_global, 2 * (M_global), nnz4 - nnz); hiop::hiopVectorPar v_patern(N_global); local_ordinal_type nnz_to_replace = M_global; - assert(N_global>nnz_to_replace); + assert(N_global > nnz_to_replace); // reset the sparsity, since previous function may change the sparsity test.initializeMatrix(m4xn4_sparse, entries_per_row); - fail += test.matrix_copy_diag_matrix_to_subblock(m4xn4_dense, *m4xn4_sparse, M_global, 2*M_global, nnz4-2*nnz, nnz); - + fail += + test.matrix_copy_diag_matrix_to_subblock(m4xn4_dense, *m4xn4_sparse, M_global, 2 * M_global, nnz4 - 2 * nnz, nnz); + // reset the sparsity, since previous function may change the sparsity test.initializeMatrix(m4xn4_sparse, entries_per_row); - fail += test.matrix_copy_diag_matrix_to_subblock_w_pattern(m4xn4_dense, *m4xn4_sparse, vec_n, v_patern, M_global, 2*M_global, nnz4-2*nnz, nnz_to_replace); + fail += test.matrix_copy_diag_matrix_to_subblock_w_pattern(m4xn4_dense, + *m4xn4_sparse, + vec_n, + v_patern, + M_global, + 2 * M_global, + nnz4 - 2 * nnz, + nnz_to_replace); // reset the sparsity, since previous function may change the sparsity test.initializeMatrix(m4xn4_sparse, entries_per_row); - fail += test.matrix_set_submatrix_to_constant_diag_w_colpattern(m4xn4_dense, *m4xn4_sparse, v_patern, M_global, 2*M_global, nnz4-2*nnz, nnz_to_replace); - fail += test.matrix_set_submatrix_to_constant_diag_w_rowpattern(m4xn4_dense, *m4xn4_sparse, v_patern, M_global, 2*M_global, nnz4-2*nnz, nnz_to_replace); + fail += test.matrix_set_submatrix_to_constant_diag_w_colpattern(m4xn4_dense, + *m4xn4_sparse, + v_patern, + M_global, + 2 * M_global, + nnz4 - 2 * nnz, + nnz_to_replace); + fail += test.matrix_set_submatrix_to_constant_diag_w_rowpattern(m4xn4_dense, + *m4xn4_sparse, + v_patern, + M_global, + 2 * M_global, + nnz4 - 2 * nnz, + nnz_to_replace); // Remove testing objects delete mxn_sparse; @@ -221,7 +243,6 @@ int main(int argc, char** argv) delete m2xn_sparse; delete m3xn3_sparse; delete m4xn4_sparse; - } #ifdef HIOP_USE_RAJA @@ -231,30 +252,27 @@ int main(int argc, char** argv) const std::string mem_space = "HOST"; #else const std::string mem_space = "DEVICE"; -#endif - std::cout << "\nTesting hiopMatrixRajaSparseTriplet mem_space=" << mem_space << "\n" ; +#endif + std::cout << "\nTesting hiopMatrixRajaSparseTriplet mem_space=" << mem_space << "\n"; hiop::tests::MatrixTestsRajaSparseTriplet test; test.set_mem_space(mem_space); - + // Establishing sparsity pattern and initializing Matrix local_ordinal_type entries_per_row = 5; local_ordinal_type nnz = M_local * entries_per_row; - hiop::hiopMatrixSparse* mxn_sparse = - hiop::LinearAlgebraFactory::create_matrix_sparse(mem_space, M_local, N_local, nnz); + hiop::hiopMatrixSparse* mxn_sparse = hiop::LinearAlgebraFactory::create_matrix_sparse(mem_space, M_local, N_local, nnz); test.initializeMatrix(mxn_sparse, entries_per_row); - hiop::hiopMatrixSparse* mxn_empty = - hiop::LinearAlgebraFactory::create_matrix_sparse(mem_space, M_local, N_global, 0); + hiop::hiopMatrixSparse* mxn_empty = hiop::LinearAlgebraFactory::create_matrix_sparse(mem_space, M_local, N_global, 0); test.initializeMatrix(mxn_empty, 0); - - hiop::hiopMatrixSparse* nullxn_sparse = - hiop::LinearAlgebraFactory::create_matrix_sparse(mem_space, 0, N_global, 0); + + hiop::hiopMatrixSparse* nullxn_sparse = hiop::LinearAlgebraFactory::create_matrix_sparse(mem_space, 0, N_global, 0); test.initializeMatrix(nullxn_sparse, 0); - //hiop::hiopVectorRajaPar vec_m(M_global, mem_space); + // hiop::hiopVectorRajaPar vec_m(M_global, mem_space); hiop::hiopVector* vec_m = hiop::LinearAlgebraFactory::create_vector(mem_space, M_global); hiop::hiopVector* vec_m_2 = hiop::LinearAlgebraFactory::create_vector(mem_space, M_global); hiop::hiopVector* vec_n = hiop::LinearAlgebraFactory::create_vector(mem_space, N_global); @@ -270,19 +288,19 @@ int main(int argc, char** argv) fail += test.matrixIsFinite(*mxn_sparse); fail += test.matrixTimesVec(*mxn_sparse, *vec_m, *vec_n); fail += test.matrixTransTimesVec(*mxn_sparse, *vec_m, *vec_n); - + // Need a dense matrix to store the output of the following tests global_ordinal_type W_delta = M_global * 10; - + hiop::hiopMatrixDense* W_dense = - hiop::LinearAlgebraFactory::create_matrix_dense(mem_space, N_global + W_delta, N_global + W_delta); - + hiop::LinearAlgebraFactory::create_matrix_dense(mem_space, N_global + W_delta, N_global + W_delta); + // local_ordinal_type test_offset = 10; local_ordinal_type test_offset = 4; fail += test.matrixAddMDinvMtransToDiagBlockOfSymDeMatUTri(*mxn_sparse, *vec_n, *W_dense, test_offset); - // testing adding sparse matrix to the upper triangular area of a symmetric dense matrix - //fail += test.addToSymDenseMatrixUpperTriangle(W_dense, *mxn_sparse); + // testing adding sparse matrix to the upper triangular area of a symmetric dense matrix + // fail += test.addToSymDenseMatrixUpperTriangle(W_dense, *mxn_sparse); fail += test.transAddToSymDenseMatrixUpperTriangle(*W_dense, *mxn_sparse); // Initialise another sparse Matrix @@ -290,13 +308,11 @@ int main(int argc, char** argv) local_ordinal_type nnz2 = M2 * (entries_per_row); /// @todo: use linear algebra factory for this - hiop::hiopMatrixSparse* m2xn_sparse = - hiop::LinearAlgebraFactory::create_matrix_sparse(mem_space, M2, N_global, nnz2); + hiop::hiopMatrixSparse* m2xn_sparse = hiop::LinearAlgebraFactory::create_matrix_sparse(mem_space, M2, N_global, nnz2); test.initializeMatrix(m2xn_sparse, entries_per_row); - hiop::hiopMatrixDense* mxm2_dense = - hiop::LinearAlgebraFactory::create_matrix_dense(mem_space, M_global, M2); - + hiop::hiopMatrixDense* mxm2_dense = hiop::LinearAlgebraFactory::create_matrix_dense(mem_space, M_global, M2); + // Set offsets where to insert sparse matrix local_ordinal_type i_offset = 1; local_ordinal_type j_offset = M2 + 1; @@ -305,71 +321,100 @@ int main(int argc, char** argv) fail += test.matrixAddMDinvNtransToSymDeMatUTri(*mxn_sparse, *m2xn_sparse, *vec_n, *W_dense, i_offset, j_offset); // copy sparse matrix to dense matrix - hiop::hiopMatrixDense* mxn_dense = - hiop::LinearAlgebraFactory::create_matrix_dense(mem_space, M_global, N_global); - + hiop::hiopMatrixDense* mxn_dense = hiop::LinearAlgebraFactory::create_matrix_dense(mem_space, M_global, N_global); + fail += test.matrix_copy_to(*mxn_dense, *mxn_sparse); - + // extend a sparse matrix [C;D] to [C -I I 0 0; D 0 0 -I I] hiop::hiopMatrixDense* m3xn3_dense = - hiop::LinearAlgebraFactory::create_matrix_dense(mem_space, M_global+M2, N_global+2*(M_global+M2)); - - local_ordinal_type nnz3 = nnz + nnz2 + 2*M_global + 2*M2; - hiop::hiopMatrixSparse* m3xn3_sparse = - hiop::LinearAlgebraFactory::create_matrix_sparse(mem_space, M_global+M2, N_global+2*(M_global+M2), nnz3); + hiop::LinearAlgebraFactory::create_matrix_dense(mem_space, M_global + M2, N_global + 2 * (M_global + M2)); + + local_ordinal_type nnz3 = nnz + nnz2 + 2 * M_global + 2 * M2; + hiop::hiopMatrixSparse* m3xn3_sparse = + hiop::LinearAlgebraFactory::create_matrix_sparse(mem_space, M_global + M2, N_global + 2 * (M_global + M2), nnz3); fail += test.matrix_set_Jac_FR(*m3xn3_dense, *m3xn3_sparse, *mxn_sparse, *m2xn_sparse); // functions used to build large sparse matrix from small pieces fail += test.matrix_copy_subdiagonal_from(*m3xn3_dense, *m3xn3_sparse, *vec_m); fail += test.matrix_set_subdiagonal_to(*m3xn3_dense, *m3xn3_sparse); - + hiop::hiopVectorInt* select = hiop::LinearAlgebraFactory::create_vector_int(mem_space, M_local); - hiop::hiopMatrixSparse* mxn_sparse_2 = - hiop::LinearAlgebraFactory::create_matrix_sparse(mem_space, M_local, N_local, nnz); + hiop::hiopMatrixSparse* mxn_sparse_2 = + hiop::LinearAlgebraFactory::create_matrix_sparse(mem_space, M_local, N_local, nnz); fail += test.matrix_copy_rows_from(*mxn_sparse_2, *m2xn_sparse, *select); delete select; - + // copy the 1st row of mxn_sparse to the last row in m2xn_sparse // replace the nonzero index from "nnz-entries_per_row" - fail += test.copy_rows_block_from(*mxn_sparse, *m2xn_sparse,0, 1, M_global-1, mxn_sparse->numberOfNonzeros()-entries_per_row); - fail += test.copy_rows_block_from(*mxn_empty, *m2xn_sparse,0, 1, M_global-1, mxn_sparse->numberOfNonzeros()); - fail += test.copy_rows_block_from(*nullxn_sparse, *m2xn_sparse,0, 0, M_global-1, mxn_sparse->numberOfNonzeros()); - fail += test.copy_rows_block_from(*nullxn_sparse, *nullxn_sparse,0, 0, 0, nullxn_sparse->numberOfNonzeros()); - + fail += test.copy_rows_block_from(*mxn_sparse, + *m2xn_sparse, + 0, + 1, + M_global - 1, + mxn_sparse->numberOfNonzeros() - entries_per_row); + fail += test.copy_rows_block_from(*mxn_empty, *m2xn_sparse, 0, 1, M_global - 1, mxn_sparse->numberOfNonzeros()); + fail += test.copy_rows_block_from(*nullxn_sparse, *m2xn_sparse, 0, 0, M_global - 1, mxn_sparse->numberOfNonzeros()); + fail += test.copy_rows_block_from(*nullxn_sparse, *nullxn_sparse, 0, 0, 0, nullxn_sparse->numberOfNonzeros()); + // create a bigger matrix, to test copy_submatrix_from and opy_submatrix_from_trans - //hiop::hiopMatrixRajaDense m4xn4_dense(2*M_global+N_global, 2*M_global+N_global,mem_space); + // hiop::hiopMatrixRajaDense m4xn4_dense(2*M_global+N_global, 2*M_global+N_global,mem_space); hiop::hiopMatrixDense* m4xn4_dense = - hiop::LinearAlgebraFactory::create_matrix_dense(mem_space, 2*M_global+N_global, 2*M_global+N_global); - - local_ordinal_type nnz4 = entries_per_row*(2*M_global+N_global); - assert(nnz4 < (2*M_global+N_global)*(2*M_global+N_global)); - hiop::hiopMatrixSparse* m4xn4_sparse = - hiop::LinearAlgebraFactory::create_matrix_sparse(mem_space, 2*M_global+N_global, 2*M_global+N_global, nnz4); + hiop::LinearAlgebraFactory::create_matrix_dense(mem_space, 2 * M_global + N_global, 2 * M_global + N_global); + + local_ordinal_type nnz4 = entries_per_row * (2 * M_global + N_global); + assert(nnz4 < (2 * M_global + N_global) * (2 * M_global + N_global)); + hiop::hiopMatrixSparse* m4xn4_sparse = + hiop::LinearAlgebraFactory::create_matrix_sparse(mem_space, 2 * M_global + N_global, 2 * M_global + N_global, nnz4); // reset the sparsity, since previous function may change the sparsity test.initializeMatrix(m4xn4_sparse, entries_per_row); - fail += test.matrix_copy_submatrix_from(*m4xn4_dense, *m4xn4_sparse, *mxn_sparse, M_global, 2*M_global, nnz4-nnz); - + fail += test.matrix_copy_submatrix_from(*m4xn4_dense, *m4xn4_sparse, *mxn_sparse, M_global, 2 * M_global, nnz4 - nnz); + // reset the sparsity, since previous function may change the sparsity test.initializeMatrix(m4xn4_sparse, entries_per_row); - fail += test.matrix_copy_submatrix_from_trans(*m4xn4_dense, *m4xn4_sparse, *mxn_sparse, M_global, 2*(M_global), nnz4-nnz); + fail += test.matrix_copy_submatrix_from_trans(*m4xn4_dense, + *m4xn4_sparse, + *mxn_sparse, + M_global, + 2 * (M_global), + nnz4 - nnz); hiop::hiopVector* v_pattern = hiop::LinearAlgebraFactory::create_vector(mem_space, N_global); local_ordinal_type nnz_to_replace = M_global; - assert(N_global>nnz_to_replace); + assert(N_global > nnz_to_replace); // reset the sparsity, since previous function may change the sparsity test.initializeMatrix(m4xn4_sparse, entries_per_row); - fail += test.matrix_copy_diag_matrix_to_subblock(*m4xn4_dense, *m4xn4_sparse, M_global, 2*M_global, nnz4-2*nnz, nnz); - + fail += + test.matrix_copy_diag_matrix_to_subblock(*m4xn4_dense, *m4xn4_sparse, M_global, 2 * M_global, nnz4 - 2 * nnz, nnz); + // reset the sparsity, since previous function may change the sparsity test.initializeMatrix(m4xn4_sparse, entries_per_row); - fail += test.matrix_copy_diag_matrix_to_subblock_w_pattern(*m4xn4_dense, *m4xn4_sparse, *vec_n, *v_pattern, M_global, 2*M_global, nnz4-2*nnz, nnz_to_replace); + fail += test.matrix_copy_diag_matrix_to_subblock_w_pattern(*m4xn4_dense, + *m4xn4_sparse, + *vec_n, + *v_pattern, + M_global, + 2 * M_global, + nnz4 - 2 * nnz, + nnz_to_replace); // reset the sparsity, since previous function may change the sparsity test.initializeMatrix(m4xn4_sparse, entries_per_row); - fail += test.matrix_set_submatrix_to_constant_diag_w_colpattern(*m4xn4_dense, *m4xn4_sparse, *v_pattern, M_global, 2*M_global, nnz4-2*nnz, nnz_to_replace); - fail += test.matrix_set_submatrix_to_constant_diag_w_rowpattern(*m4xn4_dense, *m4xn4_sparse, *v_pattern, M_global, 2*M_global, nnz4-2*nnz, nnz_to_replace); + fail += test.matrix_set_submatrix_to_constant_diag_w_colpattern(*m4xn4_dense, + *m4xn4_sparse, + *v_pattern, + M_global, + 2 * M_global, + nnz4 - 2 * nnz, + nnz_to_replace); + fail += test.matrix_set_submatrix_to_constant_diag_w_rowpattern(*m4xn4_dense, + *m4xn4_sparse, + *v_pattern, + M_global, + 2 * M_global, + nnz4 - 2 * nnz, + nnz_to_replace); // Remove testing objects delete mxn_sparse; @@ -385,7 +430,7 @@ int main(int argc, char** argv) delete mxn_dense; delete m3xn3_dense; delete m4xn4_dense; - + delete vec_m; delete vec_m_2; delete vec_n; @@ -393,13 +438,9 @@ int main(int argc, char** argv) } #endif - - if(fail) - { + if(fail) { std::cout << "\n" << fail << " sparse matrix tests failed!\n\n"; - } - else - { + } else { std::cout << "\nAll sparse matrix tests passed!\n\n"; } diff --git a/tests/testMatrixSymSparse.cpp b/tests/testMatrixSymSparse.cpp index b14b77335..d69dd7dc2 100644 --- a/tests/testMatrixSymSparse.cpp +++ b/tests/testMatrixSymSparse.cpp @@ -96,24 +96,20 @@ void initializeSymSparseMat(hiop::hiopMatrixSparse* mat) auto iRow_idx = 0; auto jCol_idx = 0; - for (auto i = 0; i < num_entries; i++) - { - if(i % density == 0) - { + for(auto i = 0; i < num_entries; i++) { + if(i % density == 0) { iRow[nonZerosUsed] = iRow_idx; jCol[nonZerosUsed] = jCol_idx; val[nonZerosUsed] = i; nonZerosUsed++; - if(nnz == nonZerosUsed) - { + if(nnz == nonZerosUsed) { break; - } + } } jCol_idx++; // If we are at the end of the current row - if (jCol_idx % n == 0) - { + if(jCol_idx % n == 0) { iRow_idx++; jCol_idx = iRow_idx; } @@ -122,20 +118,23 @@ void initializeSymSparseMat(hiop::hiopMatrixSparse* mat) } #ifdef HIOP_USE_RAJA -//TODO: this is a quick hack. Will need to modify this class to be aware of the instantiated -// template parameters for vector and matrix RAJA classes. Likely a better approach would be -// to revise the tests to try out multiple configurations of the memory backends and execution -// policies for RAJA dense matrix. +// TODO: this is a quick hack. Will need to modify this class to be aware of the instantiated +// template parameters for vector and matrix RAJA classes. Likely a better approach would be +// to revise the tests to try out multiple configurations of the memory backends and execution +// policies for RAJA dense matrix. #if defined(HIOP_USE_CUDA) #include -using hiopMatrixSymSparseTripletRajaT = hiop::hiopMatrixRajaSymSparseTriplet; +using hiopMatrixSymSparseTripletRajaT = + hiop::hiopMatrixRajaSymSparseTriplet; #elif defined(HIOP_USE_HIP) #include -using hiopMatrixSymSparseTripletRajaT = hiop::hiopMatrixRajaSymSparseTriplet; +using hiopMatrixSymSparseTripletRajaT = + hiop::hiopMatrixRajaSymSparseTriplet; #else -//#if !defined(HIOP_USE_CUDA) && !defined(HIOP_USE_HIP) +// #if !defined(HIOP_USE_CUDA) && !defined(HIOP_USE_HIP) #include -using hiopMatrixSymSparseTripletRajaT = hiop::hiopMatrixRajaSymSparseTriplet; +using hiopMatrixSymSparseTripletRajaT = + hiop::hiopMatrixRajaSymSparseTriplet; #endif /** @@ -160,22 +159,18 @@ void initializeRajaSymSparseMat(hiop::hiopMatrixSparse* mat) auto iRow_idx = 0; auto jCol_idx = 0; - for (auto i = 0; i < num_entries; i++) - { - if (i % density == 0) - { + for(auto i = 0; i < num_entries; i++) { + if(i % density == 0) { iRow[nonZerosUsed] = iRow_idx; jCol[nonZerosUsed] = jCol_idx; val[nonZerosUsed] = i; nonZerosUsed++; - if (nonZerosUsed == nnz) - break; + if(nonZerosUsed == nnz) break; } jCol_idx++; // If we are at the end of the current row - if (jCol_idx % n == 0) - { + if(jCol_idx % n == 0) { iRow_idx++; jCol_idx = iRow_idx; } @@ -205,7 +200,7 @@ int main(int argc, char** argv) std::cout << "\nTesting hiopMatrixSymSparseTriplet\n"; hiop::tests::MatrixTestsSymSparseTriplet test; test.set_mem_space(mem_space); - + // Establishing sparsity pattern and initializing Matrix local_ordinal_type entries_per_row = 5; local_ordinal_type nnz = M_global * entries_per_row; @@ -214,13 +209,11 @@ int main(int argc, char** argv) hiop::hiopVectorPar vec_m_2(M_global); hiop::hiopMatrixDenseRowMajor mxm_dense(2 * M_global, 2 * M_global); - hiop::hiopMatrixSparse* m_sym = - hiop::LinearAlgebraFactory::create_matrix_sym_sparse(mem_space, M_global, nnz); + hiop::hiopMatrixSparse* m_sym = hiop::LinearAlgebraFactory::create_matrix_sym_sparse(mem_space, M_global, nnz); initializeSymSparseMat(m_sym); local_ordinal_type nnz_m2 = m_sym->numberOfOffDiagNonzeros() + M_global; - hiop::hiopMatrixSparse* m2_sym = - hiop::LinearAlgebraFactory::create_matrix_sym_sparse(mem_space, 2*M_global, nnz_m2); + hiop::hiopMatrixSparse* m2_sym = hiop::LinearAlgebraFactory::create_matrix_sym_sparse(mem_space, 2 * M_global, nnz_m2); fail += test.matrixTimesVec(*m_sym, vec_m, vec_m_2); fail += test.matrixAddUpperTriangleToSymDenseMatrixUpperTriangle(mxm_dense, *m_sym); @@ -240,12 +233,12 @@ int main(int argc, char** argv) const std::string mem_space = "HOST"; #else const std::string mem_space = "DEVICE"; -#endif - std::cout << "\nTesting hiopMatrixRajaSymSparseTriplet mem_space=" << mem_space << "\n" ; +#endif + std::cout << "\nTesting hiopMatrixRajaSymSparseTriplet mem_space=" << mem_space << "\n"; hiop::tests::MatrixTestsRajaSymSparseTriplet test; test.set_mem_space(mem_space); - + // Establishing sparsity pattern and initializing Matrix local_ordinal_type entries_per_row = 5; local_ordinal_type nnz = M_local * entries_per_row; @@ -253,15 +246,13 @@ int main(int argc, char** argv) hiop::hiopVector* vec_m = hiop::LinearAlgebraFactory::create_vector(mem_space, M_global); hiop::hiopVector* vec_m_2 = hiop::LinearAlgebraFactory::create_vector(mem_space, M_global); hiop::hiopMatrixDense* mxm_dense = - hiop::LinearAlgebraFactory::create_matrix_dense(mem_space, 2*M_global, 2*M_global); + hiop::LinearAlgebraFactory::create_matrix_dense(mem_space, 2 * M_global, 2 * M_global); - hiop::hiopMatrixSparse* m_sym = - hiop::LinearAlgebraFactory::create_matrix_sym_sparse(mem_space, M_local, nnz); + hiop::hiopMatrixSparse* m_sym = hiop::LinearAlgebraFactory::create_matrix_sym_sparse(mem_space, M_local, nnz); initializeRajaSymSparseMat(m_sym); local_ordinal_type nnz_m2 = m_sym->numberOfOffDiagNonzeros() + M_global; - hiop::hiopMatrixSparse* m2_sym = - hiop::LinearAlgebraFactory::create_matrix_sym_sparse(mem_space, 2*M_global, nnz_m2); + hiop::hiopMatrixSparse* m2_sym = hiop::LinearAlgebraFactory::create_matrix_sym_sparse(mem_space, 2 * M_global, nnz_m2); fail += test.matrixTimesVec(*m_sym, *vec_m, *vec_m_2); fail += test.matrixAddUpperTriangleToSymDenseMatrixUpperTriangle(*mxm_dense, *m_sym); diff --git a/tests/testVector.cpp b/tests/testVector.cpp index 65834293a..f0a362a75 100644 --- a/tests/testVector.cpp +++ b/tests/testVector.cpp @@ -82,13 +82,12 @@ #include #endif -template +template static int runTests(const char* mem_space, MPI_Comm comm); -template +template static int runIntTests(const char* mem_space); - /** * @brief Main body of vector implementation testing code. * @@ -101,15 +100,15 @@ int main(int argc, char** argv) { using namespace hiop::tests; - int rank=0; + int rank = 0; MPI_Comm comm = MPI_COMM_SELF; #ifdef HIOP_USE_MPI int err; - err = MPI_Init(&argc, &argv); + err = MPI_Init(&argc, &argv); assert(MPI_SUCCESS == err); comm = MPI_COMM_WORLD; - err = MPI_Comm_rank(comm, &rank); + err = MPI_Comm_rank(comm, &rank); assert(MPI_SUCCESS == err); if(0 == rank && MPI_SUCCESS == err) { std::cout << "\nRunning MPI enabled tests ...\n"; @@ -121,20 +120,17 @@ int main(int argc, char** argv) // // Test HiOp vectors // - if (rank == 0) - std::cout << "\nTesting HiOp default vector implementation:\n"; + if(rank == 0) std::cout << "\nTesting HiOp default vector implementation:\n"; fail += runTests("default", comm); #ifdef HIOP_USE_CUDA - if (rank == 0) - { + if(rank == 0) { std::cout << "\nTesting HiOp CUDA vector\n"; std::cout << " ... using CUDA memory space:\n"; } fail += runTests("cuda", comm); #endif #ifdef HIOP_USE_HIP - if (rank == 0) - { + if(rank == 0) { std::cout << "\nTesting HiOp HIP vector\n"; std::cout << " ... using HIP memory space:\n"; } @@ -142,65 +138,57 @@ int main(int argc, char** argv) #endif #ifdef HIOP_USE_RAJA #ifdef HIOP_USE_GPU - if (rank == 0) - { + if(rank == 0) { std::cout << "\nTesting HiOp RAJA vector\n"; std::cout << " ... using device memory space:\n"; } fail += runTests("device", comm); - if (rank == 0) - { + if(rank == 0) { std::cout << "\nTesting HiOp RAJA vector\n"; std::cout << " ... using unified virtual memory space:\n"; } fail += runTests("um", comm); #else - if (rank == 0) - { + if(rank == 0) { std::cout << "\nTesting HiOp RAJA vector\n"; std::cout << " ... using host memory space:\n"; } fail += runTests("host", comm); -#endif // GPU -#endif // RAJA +#endif // GPU +#endif // RAJA // // Test HiOp integer vectors - // - if (rank == 0) - { + // + if(rank == 0) { std::cout << "\nTesting HiOp sequential int vector:\n"; fail += runIntTests("default"); #ifdef HIOP_USE_RAJA #ifdef HIOP_USE_GPU - if (rank == 0) - { + if(rank == 0) { std::cout << "\nTesting HiOp RAJA int vector\n"; std::cout << " ... using device memory space:\n"; } fail += runIntTests("device"); - if (rank == 0) - { + if(rank == 0) { std::cout << "\nTesting HiOp RAJA int vector\n"; std::cout << " ... using unified virtual memory space:\n"; } fail += runIntTests("um"); #else - if (rank == 0) - { + if(rank == 0) { std::cout << "\nTesting HiOp RAJA int vector\n"; std::cout << " ... using host memory space:\n"; } fail += runIntTests("host"); -#endif // GPU -#endif // RAJA +#endif // GPU +#endif // RAJA } - + // // Test summary // - if (rank == 0) - { + if(rank == 0) { if(fail) std::cout << "\n" << fail << " vector tests failed!\n\n"; else @@ -215,14 +203,14 @@ int main(int argc, char** argv) } /// Driver for all real type vector tests -template +template int runTests(const char* mem_space, MPI_Comm comm) { using namespace hiop; using hiop::tests::global_ordinal_type; - int rank=0; - int numRanks=1; + int rank = 0; + int numRanks = 1; #ifdef HIOP_USE_MPI MPI_Comm_rank(comm, &rank); @@ -234,17 +222,16 @@ int runTests(const char* mem_space, MPI_Comm comm) global_ordinal_type Nlocal = 1000; global_ordinal_type Mlocal = 500; - global_ordinal_type Nglobal = Nlocal*numRanks; + global_ordinal_type Nglobal = Nlocal * numRanks; - global_ordinal_type* n_partition = new global_ordinal_type [numRanks + 1]; - global_ordinal_type* m_partition = new global_ordinal_type [numRanks + 1]; + global_ordinal_type* n_partition = new global_ordinal_type[numRanks + 1]; + global_ordinal_type* m_partition = new global_ordinal_type[numRanks + 1]; n_partition[0] = 0; m_partition[0] = 0; - for(int i = 1; i < numRanks + 1; ++i) - { - n_partition[i] = i*Nlocal; - m_partition[i] = i*Mlocal; + for(int i = 1; i < numRanks + 1; ++i) { + n_partition[i] = i * Nlocal; + m_partition[i] = i * Mlocal; } hiopVector* a = LinearAlgebraFactory::create_vector(mem_space, Nglobal, n_partition, comm); @@ -257,10 +244,10 @@ int runTests(const char* mem_space, MPI_Comm comm) hiopVector* z = LinearAlgebraFactory::create_vector(mem_space, Nglobal, n_partition, comm); hiopVectorInt* v_smaller_idxs = LinearAlgebraFactory::create_vector_int(mem_space, Mlocal); - + hiopVectorInt* v_map = LinearAlgebraFactory::create_vector_int(mem_space, Mlocal); hiopVectorInt* v2_map = LinearAlgebraFactory::create_vector_int(mem_space, Mlocal); - + int fail = 0; fail += test.vectorGetSize(*x, Nglobal, rank); @@ -271,8 +258,7 @@ int runTests(const char* mem_space, MPI_Comm comm) fail += test.vectorCopyFrom(*x, *y, rank); fail += test.vectorCopyTo(*x, *y, rank); - if (rank == 0) - { + if(rank == 0) { fail += test.vector_copy_from_indexes(*v_smaller, *v, *v_smaller_idxs); fail += test.vectorCopyFromStarting(*v, *v_smaller); fail += test.vectorStartingAtCopyFromStartingAt(*v_smaller, *v); @@ -327,15 +313,14 @@ int runTests(const char* mem_space, MPI_Comm comm) fail += test.vectorMatchesPattern(*x, *y, rank); fail += test.vectorAdjustDuals_plh(*x, *y, *z, *a, rank); - if (rank == 0) - { + if(rank == 0) { fail += test.vectorIsnan(*v); fail += test.vectorIsinf(*v); fail += test.vectorIsfinite(*v); } // TODO: remove - //fail += test.vector_is_equal(*x, *y, rank); + // fail += test.vector_is_equal(*x, *y, rank); delete a; delete b; @@ -355,7 +340,7 @@ int runTests(const char* mem_space, MPI_Comm comm) } /// Driver for all integer vector tests -template +template int runIntTests(const char* mem_space) { using namespace hiop; @@ -374,10 +359,10 @@ int runIntTests(const char* mem_space) fail += test.vectorGetElement(*x); fail += test.vectorSetElement(*x); fail += test.vector_linspace(*x); - + auto* y = LinearAlgebraFactory::create_vector_int(mem_space, sz); fail += test.vector_copy_from(*x, *y); - + delete x; delete y; diff --git a/tests/test_bicgstab.cpp b/tests/test_bicgstab.cpp index 6f6b7c55d..7aab925c6 100644 --- a/tests/test_bicgstab.cpp +++ b/tests/test_bicgstab.cpp @@ -22,31 +22,30 @@ void initializeSymSparseMat(hiop::hiopMatrixSparse* mat, bool is_diag_pred) size_type m = A->m(); - for (auto i = 0; i < m; i++) - { + for(auto i = 0; i < m; i++) { iRow[nonZerosUsed] = i; jCol[nonZerosUsed] = i; if(is_diag_pred) { - val[nonZerosUsed] = 1.0/((i+1.0)*5.); + val[nonZerosUsed] = 1.0 / ((i + 1.0) * 5.); } else { - val[nonZerosUsed] = (i+1.0)*5.; + val[nonZerosUsed] = (i + 1.0) * 5.; } nonZerosUsed++; #if 1 if(!is_diag_pred) { - if(i+1 -using hiopMatrixSymSparseTripletRajaT = hiop::hiopMatrixRajaSymSparseTriplet; +using hiopMatrixSymSparseTripletRajaT = + hiop::hiopMatrixRajaSymSparseTriplet; #elif defined(HIOP_USE_HIP) #include -using hiopMatrixSymSparseTripletRajaT = hiop::hiopMatrixRajaSymSparseTriplet; +using hiopMatrixSymSparseTripletRajaT = + hiop::hiopMatrixRajaSymSparseTriplet; #else -//#if !defined(HIOP_USE_CUDA) && !defined(HIOP_USE_HIP) +// #if !defined(HIOP_USE_CUDA) && !defined(HIOP_USE_HIP) #include -using hiopMatrixSymSparseTripletRajaT = hiop::hiopMatrixRajaSymSparseTriplet; +using hiopMatrixSymSparseTripletRajaT = + hiop::hiopMatrixRajaSymSparseTriplet; #endif /** @@ -86,31 +88,30 @@ void initializeRajaSymSparseMat(hiop::hiopMatrixSparse* mat, bool is_diag_pred) size_type m = A->m(); - for (auto i = 0; i < m; i++) - { + for(auto i = 0; i < m; i++) { iRow[nonZerosUsed] = i; jCol[nonZerosUsed] = i; if(is_diag_pred) { - val[nonZerosUsed] = 1.0/((i+1.0)*5.); + val[nonZerosUsed] = 1.0 / ((i + 1.0) * 5.); } else { - val[nonZerosUsed] = (i+1.0)*5.; + val[nonZerosUsed] = (i + 1.0) * 5.; } nonZerosUsed++; if(!is_diag_pred) { - if(i+11) { + + if(argc > 1) { n = std::atoi(argv[1]); - if(n<=0) { + if(n <= 0) { n = 50; } } - - printf("\nTesting hiopBiCGStabSolver with matrix_%dx%d\n\n",n,n); + + printf("\nTesting hiopBiCGStabSolver with matrix_%dx%d\n\n", n, n); // on host { @@ -148,21 +151,19 @@ int main(int argc, char **argv) size_type M_local = n; size_type N_local = M_local; - size_type nnz = M_local + M_local-1 + M_local-2; + size_type nnz = M_local + M_local - 1 + M_local - 2; hiop::hiopVector* rhs = hiop::LinearAlgebraFactory::create_vector(mem_space, N_local); rhs->setToConstant(1.0); // create a sysmetric matrix (only upper triangular part is needed by hiop) // it is an upper tridiagonal matrix - hiop::hiopMatrixSparse* A_mat = - hiop::LinearAlgebraFactory::create_matrix_sym_sparse(mem_space, M_local, nnz); + hiop::hiopMatrixSparse* A_mat = hiop::LinearAlgebraFactory::create_matrix_sym_sparse(mem_space, M_local, nnz); initializeSymSparseMat(A_mat, false); // use the diagonal part as a preconditioner // build the inverse of the diagonal preconditioner as a simple hiopLinearOperator - hiop::hiopMatrixSparse* Minv_mat = - hiop::LinearAlgebraFactory::create_matrix_sym_sparse(mem_space, M_local, N_local); + hiop::hiopMatrixSparse* Minv_mat = hiop::LinearAlgebraFactory::create_matrix_sym_sparse(mem_space, M_local, N_local); initializeSymSparseMat(Minv_mat, true); hiopMatVecOpr* A_opr = new hiopMatVecOpr(A_mat); @@ -188,26 +189,24 @@ int main(int argc, char **argv) size_type M_local = n; size_type N_local = M_local; - size_type nnz = M_local + M_local-1 + M_local-2; + size_type nnz = M_local + M_local - 1 + M_local - 2; hiop::hiopVector* rhs = hiop::LinearAlgebraFactory::create_vector(mem_space, N_local); rhs->setToConstant(1.0); // create a sysmetric matrix (only upper triangular part is needed by hiop) // it is an upper tridiagonal matrix - hiop::hiopMatrixSparse* A_mat = - hiop::LinearAlgebraFactory::create_matrix_sym_sparse(mem_space, M_local, nnz); + hiop::hiopMatrixSparse* A_mat = hiop::LinearAlgebraFactory::create_matrix_sym_sparse(mem_space, M_local, nnz); initializeRajaSymSparseMat(A_mat, false); // use the diagonal part as a preconditioner // build the inverse of the diagonal preconditioner as a simple hiopLinearOperator - hiop::hiopMatrixSparse* Minv_mat = - hiop::LinearAlgebraFactory::create_matrix_sym_sparse(mem_space, M_local, N_local); + hiop::hiopMatrixSparse* Minv_mat = hiop::LinearAlgebraFactory::create_matrix_sym_sparse(mem_space, M_local, N_local); initializeRajaSymSparseMat(Minv_mat, true); hiopMatVecOpr* A_opr = new hiopMatVecOpr(A_mat); hiopMatVecOpr* Minv_opr = new hiopMatVecOpr(Minv_mat); - + hiopBiCGStabSolver bicgstab_solver(N_local, A_opr, Minv_opr, nullptr, nullptr); bool is_solved = bicgstab_solver.solve(rhs); @@ -223,13 +222,11 @@ int main(int argc, char **argv) delete Minv_opr; delete A_mat; delete Minv_mat; - delete rhs; + delete rhs; } #endif #ifdef HIOP_USE_MPI MPI_Finalize(); #endif - } - diff --git a/tests/test_pcg.cpp b/tests/test_pcg.cpp index 732ebf27d..da7fadee5 100644 --- a/tests/test_pcg.cpp +++ b/tests/test_pcg.cpp @@ -22,31 +22,30 @@ void initializeSymSparseMat(hiop::hiopMatrixSparse* mat, bool is_diag_pred) size_type m = A->m(); - for (auto i = 0; i < m; i++) - { + for(auto i = 0; i < m; i++) { iRow[nonZerosUsed] = i; jCol[nonZerosUsed] = i; if(is_diag_pred) { - val[nonZerosUsed] = 1.0/((i+1.0)*5.); + val[nonZerosUsed] = 1.0 / ((i + 1.0) * 5.); } else { - val[nonZerosUsed] = (i+1.0)*5.; + val[nonZerosUsed] = (i + 1.0) * 5.; } nonZerosUsed++; if(!is_diag_pred) { - if(i+1 -using hiopMatrixSymSparseTripletRajaT = hiop::hiopMatrixRajaSymSparseTriplet; +using hiopMatrixSymSparseTripletRajaT = + hiop::hiopMatrixRajaSymSparseTriplet; #elif defined(HIOP_USE_HIP) #include -using hiopMatrixSymSparseTripletRajaT = hiop::hiopMatrixRajaSymSparseTriplet; +using hiopMatrixSymSparseTripletRajaT = + hiop::hiopMatrixRajaSymSparseTriplet; #else -//#if !defined(HIOP_USE_CUDA) && !defined(HIOP_USE_HIP) +// #if !defined(HIOP_USE_CUDA) && !defined(HIOP_USE_HIP) #include -using hiopMatrixSymSparseTripletRajaT = hiop::hiopMatrixRajaSymSparseTriplet; +using hiopMatrixSymSparseTripletRajaT = + hiop::hiopMatrixRajaSymSparseTriplet; #endif /** @@ -85,31 +87,30 @@ void initializeRajaSymSparseMat(hiop::hiopMatrixSparse* mat, bool is_diag_pred) size_type m = A->m(); - for (auto i = 0; i < m; i++) - { + for(auto i = 0; i < m; i++) { iRow[nonZerosUsed] = i; jCol[nonZerosUsed] = i; if(is_diag_pred) { - val[nonZerosUsed] = 1.0/((i+1.0)*5.); + val[nonZerosUsed] = 1.0 / ((i + 1.0) * 5.); } else { - val[nonZerosUsed] = (i+1.0)*5.; + val[nonZerosUsed] = (i + 1.0) * 5.; } nonZerosUsed++; if(!is_diag_pred) { - if(i+11) { + + if(argc > 1) { n = std::atoi(argv[1]); - if(n<=0) { + if(n <= 0) { n = 50; } } - - printf("\nTesting hiopPCGSolver with matrix_%dx%d\n",n,n); + + printf("\nTesting hiopPCGSolver with matrix_%dx%d\n", n, n); // on host { @@ -147,28 +150,26 @@ int main(int argc, char **argv) size_type M_local = n; size_type N_local = M_local; - size_type nnz = M_local + M_local-1 + M_local-2; + size_type nnz = M_local + M_local - 1 + M_local - 2; hiop::hiopVector* rhs = hiop::LinearAlgebraFactory::create_vector(mem_space, N_local); rhs->setToConstant(1.0); // create a sysmetric matrix (only upper triangular part is needed by hiop) // it is an upper tridiagonal matrix - hiop::hiopMatrixSparse* A_mat = - hiop::LinearAlgebraFactory::create_matrix_sym_sparse(mem_space, M_local, nnz); + hiop::hiopMatrixSparse* A_mat = hiop::LinearAlgebraFactory::create_matrix_sym_sparse(mem_space, M_local, nnz); initializeSymSparseMat(A_mat, false); // use the diagonal part as a preconditioner // build the inverse of the diagonal preconditioner as a simple hiopLinearOperator - hiop::hiopMatrixSparse* Minv_mat = - hiop::LinearAlgebraFactory::create_matrix_sym_sparse(mem_space, M_local, N_local); + hiop::hiopMatrixSparse* Minv_mat = hiop::LinearAlgebraFactory::create_matrix_sym_sparse(mem_space, M_local, N_local); initializeSymSparseMat(Minv_mat, true); hiopMatVecOpr* A_opr = new hiopMatVecOpr(A_mat); hiopMatVecOpr* Minv_opr = new hiopMatVecOpr(Minv_mat); - + hiopPCGSolver pcg_solver(N_local, A_opr, Minv_opr, nullptr, nullptr); - + [[maybe_unused]] bool is_solved = pcg_solver.solve(rhs); std::cout << mem_space << ": " << pcg_solver.get_convergence_info() << std::endl; @@ -180,7 +181,7 @@ int main(int argc, char **argv) delete Minv_mat; delete rhs; } - + #ifdef HIOP_USE_RAJA // with RAJA { @@ -188,49 +189,44 @@ int main(int argc, char **argv) size_type M_local = n; size_type N_local = M_local; - size_type nnz = M_local + M_local-1 + M_local-2; + size_type nnz = M_local + M_local - 1 + M_local - 2; hiop::hiopVector* rhs = hiop::LinearAlgebraFactory::create_vector(mem_space, N_local); rhs->setToConstant(1.0); // create a sysmetric matrix (only upper triangular part is needed by hiop) // it is an upper tridiagonal matrix - hiop::hiopMatrixSparse* A_mat = - hiop::LinearAlgebraFactory::create_matrix_sym_sparse(mem_space, M_local, nnz); + hiop::hiopMatrixSparse* A_mat = hiop::LinearAlgebraFactory::create_matrix_sym_sparse(mem_space, M_local, nnz); initializeRajaSymSparseMat(A_mat, false); // use the diagonal part as a preconditioner // build the inverse of the diagonal preconditioner as a simple hiopLinearOperator - hiop::hiopMatrixSparse* Minv_mat = - hiop::LinearAlgebraFactory::create_matrix_sym_sparse(mem_space, M_local, N_local); + hiop::hiopMatrixSparse* Minv_mat = hiop::LinearAlgebraFactory::create_matrix_sym_sparse(mem_space, M_local, N_local); initializeRajaSymSparseMat(Minv_mat, true); hiopMatVecOpr* A_opr = new hiopMatVecOpr(A_mat); hiopMatVecOpr* Minv_opr = new hiopMatVecOpr(Minv_mat); - + hiopPCGSolver pcg_solver(N_local, A_opr, Minv_opr, nullptr, nullptr); - + bool is_solved = pcg_solver.solve(rhs); - + if(is_solved) { std::cout << mem_space << ": " << pcg_solver.get_convergence_info() << std::endl; } else { std::cout << "Failed! " << mem_space << ": " << pcg_solver.get_convergence_info() << std::endl; } - + // Destroy testing objects delete A_opr; delete Minv_opr; delete A_mat; delete Minv_mat; - delete rhs; + delete rhs; } #endif #ifdef HIOP_USE_MPI MPI_Finalize(); #endif - - } -