diff --git a/src/HypreSystem.cpp b/src/HypreSystem.cpp index 7cf2d2a..66752f1 100644 --- a/src/HypreSystem.cpp +++ b/src/HypreSystem.cpp @@ -1,11 +1,10 @@ #include "HypreSystem.h" #include "GpuQualifiers.h" -#if defined (HYPER_USING_HIP) +#if defined(HYPER_USING_HIP) #include "laplace_3d_weak_scaling.hpp" #endif - namespace nalu { HypreSystem::HypreSystem(MPI_Comm comm, YAML::Node &inpfile) : comm_(comm), inpfile_(inpfile) { @@ -23,7 +22,7 @@ void HypreSystem::load() { get_optional(linsys, "type", "matrix_market"); if (iproc_ == 0) - printf("%s : Using %s mat_format\n", __FUNCTION__, mat_format.c_str()); + printf("%s : Using %s mat_format\n", __FUNCTION__, mat_format.c_str()); if (mat_format == "matrix_market") { load_matrix_market(); @@ -33,7 +32,8 @@ void HypreSystem::load() { #if defined(HYPRE_USING_HIP) build_27pt_stencil(); #else -throw std::runtime_error("Cannot use build_27pt_stencil() without Hypre HIP support"); + throw std::runtime_error( + "Cannot use build_27pt_stencil() without Hypre HIP support"); #endif } else { throw std::runtime_error("Invalid linear system format option: " + @@ -52,11 +52,13 @@ void HypreSystem::setup_precon_and_solver() { std::string preconditioner = solver["preconditioner"].as(); if (iproc_ == 0) - printf("%s : Using %s solver with %s preconditioner\n", - __FUNCTION__, method.c_str(), preconditioner.c_str()); + printf("%s : Using %s solver with %s preconditioner\n", __FUNCTION__, + method.c_str(), preconditioner.c_str()); if (preconditioner == "boomeramg") { setup_boomeramg_precond(); + } else if (preconditioner == "ilu") { + setup_ilu_precond(); } else if (preconditioner == "none") { usePrecond_ = false; } else { @@ -75,6 +77,8 @@ void HypreSystem::setup_precon_and_solver() { setup_boomeramg_solver(); } else if (!method.compare("cogmres")) { setup_cogmres(); + } else if (!method.compare("ilu")) { + setup_ilu(); } else { throw std::runtime_error("Invalid option for solver method provided: " + method); @@ -286,6 +290,28 @@ void HypreSystem::setup_boomeramg_precond() { precondDestroyPtr_ = &HYPRE_BoomerAMGDestroy; } +void HypreSystem::setup_ilu_precond() { + YAML::Node node = inpfile_["ilu_preconditioner_settings"]; + + HYPRE_ILUCreate(&precond_); + HYPRE_ILUSetType(precond_, get_optional(node, "ilu_type", 0)); + HYPRE_ILUSetMaxIter(precond_, get_optional(node, "max_iterations", 1)); + HYPRE_ILUSetPrintLevel(precond_, get_optional(node, "print_level", 1)); + HYPRE_ILUSetTol(precond_, get_optional(node, "tolerance", 0.0)); + + HYPRE_ILUSetIterativeSetupType(precond_, + get_optional(node, "algorithm_type", 0)); + HYPRE_ILUSetIterativeSetupMaxIter( + precond_, get_optional(node, "max_ilu_iterations", 1)); + HYPRE_ILUSetIterativeSetupTolerance( + precond_, get_optional(node, "iterative_ilu_tolerance", 1e-5)); + HYPRE_ILUSetTriSolve(precond_, get_optional(node, "trisolve", 1)); + + precondSetupPtr_ = &HYPRE_ILUSetup; + precondSolvePtr_ = &HYPRE_ILUSolve; + precondDestroyPtr_ = &HYPRE_ILUDestroy; +} + void HypreSystem::setup_cogmres() { YAML::Node node = inpfile_["solver_settings"]; @@ -360,10 +386,9 @@ void HypreSystem::setup_cg() { HYPRE_ParCSRPCGCreate(comm_, &solver_); HYPRE_ParCSRPCGSetTol(solver_, get_optional(node, "tolerance", 1.0e-5)); HYPRE_ParCSRPCGSetMaxIter(solver_, - get_optional(node, "max_iterations", 1000)); + get_optional(node, "max_iterations", 1000)); // HYPRE_ParCSRPCGSetKDim(solver_, get_optional(node, "kspace", 10)); - HYPRE_ParCSRPCGSetPrintLevel(solver_, - get_optional(node, "print_level", 4)); + HYPRE_ParCSRPCGSetPrintLevel(solver_, get_optional(node, "print_level", 4)); solverDestroyPtr_ = &HYPRE_ParCSRPCGDestroy; solverSetupPtr_ = &HYPRE_ParCSRPCGSetup; @@ -371,6 +396,27 @@ void HypreSystem::setup_cg() { solverSolvePtr_ = &HYPRE_ParCSRPCGSolve; } +void HypreSystem::setup_ilu() { + YAML::Node node = inpfile_["solver_settings"]; + HYPRE_ILUCreate(&solver_); + HYPRE_ILUSetType(solver_, get_optional(node, "ilu_type", 0)); + HYPRE_ILUSetMaxIter(solver_, get_optional(node, "max_iterations", 20)); + HYPRE_ILUSetPrintLevel(solver_, get_optional(node, "print_level", 4)); + + HYPRE_ILUSetIterativeSetupType(solver_, + get_optional(node, "algorithm_type", 0)); + HYPRE_ILUSetIterativeSetupMaxIter( + solver_, get_optional(node, "max_ilu_iterations", 1)); + HYPRE_ILUSetIterativeSetupTolerance( + solver_, get_optional(node, "iterative_ilu_tolerance", 1e-5)); + HYPRE_ILUSetTriSolve(solver_, get_optional(node, "trisolve", 1)); + + solverDestroyPtr_ = &HYPRE_ILUDestroy; + solverSetupPtr_ = &HYPRE_ILUSetup; + solverPrecondPtr_ = nullptr; + solverSolvePtr_ = &HYPRE_ILUSolve; +} + void HypreSystem::destroy_system() { if (mat_) HYPRE_IJMatrixDestroy(mat_); @@ -388,13 +434,20 @@ void HypreSystem::destroy_system() { if (precond_) precondDestroyPtr_(precond_); - if (d_vector_indices_) hypre_TFree(d_vector_indices_, HYPRE_MEMORY_DEVICE); - if (d_vector_vals_) hypre_TFree(d_vector_vals_, HYPRE_MEMORY_DEVICE); - if (d_rows_) hypre_TFree(d_rows_, HYPRE_MEMORY_DEVICE); - if (d_cols_) hypre_TFree(d_cols_, HYPRE_MEMORY_DEVICE); - if (d_offd_rows_) hypre_TFree(d_offd_rows_, HYPRE_MEMORY_DEVICE); - if (d_offd_cols_) hypre_TFree(d_offd_cols_, HYPRE_MEMORY_DEVICE); - if (d_vals_) hypre_TFree(d_vals_, HYPRE_MEMORY_DEVICE); + if (d_vector_indices_) + hypre_TFree(d_vector_indices_, HYPRE_MEMORY_DEVICE); + if (d_vector_vals_) + hypre_TFree(d_vector_vals_, HYPRE_MEMORY_DEVICE); + if (d_rows_) + hypre_TFree(d_rows_, HYPRE_MEMORY_DEVICE); + if (d_cols_) + hypre_TFree(d_cols_, HYPRE_MEMORY_DEVICE); + if (d_offd_rows_) + hypre_TFree(d_offd_rows_, HYPRE_MEMORY_DEVICE); + if (d_offd_cols_) + hypre_TFree(d_offd_cols_, HYPRE_MEMORY_DEVICE); + if (d_vals_) + hypre_TFree(d_vals_, HYPRE_MEMORY_DEVICE); } void HypreSystem::init_row_decomposition() { @@ -499,13 +552,20 @@ void HypreSystem::assemble_system() { MPI_Barrier(comm_); /* delete unneeded memory */ - if (d_vector_indices_) hypre_TFree(d_vector_indices_, HYPRE_MEMORY_DEVICE); - if (d_vector_vals_) hypre_TFree(d_vector_vals_, HYPRE_MEMORY_DEVICE); - if (d_rows_) hypre_TFree(d_rows_, HYPRE_MEMORY_DEVICE); - if (d_cols_) hypre_TFree(d_cols_, HYPRE_MEMORY_DEVICE); - if (d_offd_rows_) hypre_TFree(d_offd_rows_, HYPRE_MEMORY_DEVICE); - if (d_offd_cols_) hypre_TFree(d_offd_cols_, HYPRE_MEMORY_DEVICE); - if (d_vals_) hypre_TFree(d_vals_, HYPRE_MEMORY_DEVICE); + if (d_vector_indices_) + hypre_TFree(d_vector_indices_, HYPRE_MEMORY_DEVICE); + if (d_vector_vals_) + hypre_TFree(d_vector_vals_, HYPRE_MEMORY_DEVICE); + if (d_rows_) + hypre_TFree(d_rows_, HYPRE_MEMORY_DEVICE); + if (d_cols_) + hypre_TFree(d_cols_, HYPRE_MEMORY_DEVICE); + if (d_offd_rows_) + hypre_TFree(d_offd_rows_, HYPRE_MEMORY_DEVICE); + if (d_offd_cols_) + hypre_TFree(d_offd_cols_, HYPRE_MEMORY_DEVICE); + if (d_vals_) + hypre_TFree(d_vals_, HYPRE_MEMORY_DEVICE); checkMemory(); } @@ -521,27 +581,27 @@ void HypreSystem::checkMemory() { cudaDeviceProp prop; cudaGetDeviceProperties(&prop, device); if (iproc_ == 0) - printf("\trank=%d : %s %s %d : %s (cc=%d.%d): device=%d of %d : free " - "memory=%1.8g GB, total memory=%1.8g GB\n", - iproc_, __FUNCTION__, __FILE__, __LINE__, prop.name, prop.major, - prop.minor, device, count, free / 1.e9, total / 1.e9); + printf("\trank=%d : %s %s %d : %s (cc=%d.%d): device=%d of %d : free " + "memory=%1.8g GB, total memory=%1.8g GB\n", + iproc_, __FUNCTION__, __FILE__, __LINE__, prop.name, prop.major, + prop.minor, device, count, free / 1.e9, total / 1.e9); #endif #ifdef HYPRE_USING_HIP - int count; - hipGetDeviceCount(&count); - int device; - hipGetDevice(&device); - size_t free, total; - hipMemGetInfo(&free, &total); - hipDeviceProp_t prop; - hipGetDeviceProperties(&prop, device); - //if (iproc_ == 0) - printf("rank=%d : %s %s %d : %s arch=%s : device=%d of %d : free " - "memory=%1.8g GB, total memory=%1.8g GB\n", - iproc_, __FUNCTION__, __FILE__, __LINE__, prop.name, prop.gcnArchName, - device, count, free / 1.e9, total / 1.e9); - fflush(stdout); + int count; + hipGetDeviceCount(&count); + int device; + hipGetDevice(&device); + size_t free, total; + hipMemGetInfo(&free, &total); + hipDeviceProp_t prop; + hipGetDeviceProperties(&prop, device); + // if (iproc_ == 0) + printf("rank=%d : %s %s %d : %s arch=%s : device=%d of %d : free " + "memory=%1.8g GB, total memory=%1.8g GB\n", + iproc_, __FUNCTION__, __FILE__, __LINE__, prop.name, prop.gcnArchName, + device, count, free / 1.e9, total / 1.e9); + fflush(stdout); #endif } @@ -551,7 +611,7 @@ void HypreSystem::solve() { std::chrono::duration write_operators(0); std::chrono::duration solve(0); - //hypre_CSRMatrixGpuSpMVAnalysis(hypre_ParCSRMatrixDiag(parMat_)); + // hypre_CSRMatrixGpuSpMVAnalysis(hypre_ParCSRMatrixDiag(parMat_)); for (int i = 0; i < numSolves_; ++i) { if (iproc_ == 0) @@ -605,7 +665,7 @@ void HypreSystem::solve() { timers_.emplace_back("Preconditioner setup", setup.count()); if (writeAmgMatrices_) - timers_.emplace_back("Write AMG Matrices", write_operators.count()); + timers_.emplace_back("Write AMG Matrices", write_operators.count()); timers_.emplace_back("Solve", solve.count()); solveComplete_ = true; @@ -719,36 +779,30 @@ void HypreSystem::check_solution() { timers_.emplace_back("Check solution", elapsed.count()); } -void HypreSystem::retrieve_timers(std::vector& names, - std::vector> & data) { +void HypreSystem::retrieve_timers(std::vector &names, + std::vector> &data) { if (iproc_ != 0) return; - if (names.size()==0) - { - for (auto &timer : timers_) - names.push_back(std::string(timer.first)); - data.resize(names.size()); - int k=0; - for (auto &timer : timers_) - { + if (names.size() == 0) { + for (auto &timer : timers_) + names.push_back(std::string(timer.first)); + data.resize(names.size()); + int k = 0; + for (auto &timer : timers_) { + data[k].push_back(double(timer.second)); + k++; + } + } else { + for (auto &timer : timers_) { + auto it = std::find(names.begin(), names.end(), timer.first); + + // If element was found + if (it != names.end()) { + int k = it - names.begin(); data[k].push_back(double(timer.second)); - k++; - } - } - else - { - for (auto &timer : timers_) - { - auto it = std::find(names.begin(), names.end(), timer.first); - - // If element was found - if (it != names.end()) - { - int k = it - names.begin(); - data[k].push_back(double(timer.second)); - } - } + } + } } } @@ -771,7 +825,7 @@ void HypreSystem::summarize_timers() { void HypreSystem::hypre_matrix_set_values() { if (iproc_ == 0) - printf("%s : loading matrix into HYPRE_IJMatrix\n", __FUNCTION__); + printf("%s : loading matrix into HYPRE_IJMatrix\n", __FUNCTION__); auto start = std::chrono::system_clock::now(); @@ -801,9 +855,9 @@ void HypreSystem::hypre_matrix_set_values() { HYPRE_MEMORY_DEVICE, HYPRE_MEMORY_HOST); /* Use the fast path */ - //if (iproc_ == 0) - printf("rank=%d : %s %s %d : nnz_this_rank=%d\n", iproc_, __FILE__, - __FUNCTION__, __LINE__, nnz_this_rank); + // if (iproc_ == 0) + printf("rank=%d : %s %s %d : nnz_this_rank=%d\n", iproc_, __FILE__, + __FUNCTION__, __LINE__, nnz_this_rank); YAML::Node node = inpfile_["solver_settings"]; if (get_optional(node, "fast_matrix_assemble", 0)) { #if 0 @@ -832,7 +886,7 @@ void HypreSystem::hypre_matrix_set_values() { void HypreSystem::hypre_vector_set_values(std::vector &vec, int component) { if (iproc_ == 0) - printf("%s : loading vector into HYPRE_IJVector\n", __FUNCTION__); + printf("%s : loading vector into HYPRE_IJVector\n", __FUNCTION__); auto start = std::chrono::system_clock::now(); @@ -863,8 +917,8 @@ void HypreSystem::hypre_vector_set_values(std::vector &vec, /* Use the fast path. This probably doesn't work with multivectors yet */ if (iproc_ == 0) - printf("rank=%d : %s %s %d : N=%d\n", iproc_, __FILE__, __FUNCTION__, - __LINE__, N); + printf("rank=%d : %s %s %d : N=%d\n", iproc_, __FILE__, __FUNCTION__, + __LINE__, N); YAML::Node node = inpfile_["solver_settings"]; if (get_optional(node, "fast_vector_assemble", 0)) { #if 0 @@ -1059,7 +1113,7 @@ void HypreSystem::build_ij_matrix(std::string matfile, int nfiles) { // read the files if (iproc_ == 0) - printf("%s : Reading %d HYPRE IJ Matrix files\n", __FUNCTION__, nfiles); + printf("%s : Reading %d HYPRE IJ Matrix files\n", __FUNCTION__, nfiles); HYPRE_Int ilower, iupper, jlower, jupper; #if defined(HYPRE_MIXEDINT) || defined(HYPRE_BIGINT) @@ -1134,7 +1188,8 @@ void HypreSystem::build_ij_vector(std::vector &vecfiles, std::string vecfile = vecfiles[i]; if (iproc_ == 0) - printf("%s : Reading %d HYPRE IJ Vector files %s\n", __FUNCTION__, nfiles, vecfile.c_str()); + printf("%s : Reading %d HYPRE IJ Vector files %s\n", __FUNCTION__, nfiles, + vecfile.c_str()); HYPRE_Int ilower, iupper; #if defined(HYPRE_MIXEDINT) || defined(HYPRE_BIGINT) @@ -1193,156 +1248,171 @@ void HypreSystem::build_ij_vector(std::vector &vecfiles, } /********************************************************************************/ -/* Build 27 Pt Stencil */ +/* Build 27 Pt Stencil */ /********************************************************************************/ #if defined(HYPER_USING_HIP) GPU_GLOBAL void #if defined(HYPRE_MIXEDINT) || defined(HYPRE_BIGINT) -fillGlobalRowIndices(HYPRE_BigInt n, HYPRE_BigInt iLower, int * row_ptr, HYPRE_BigInt * global_row_inds) +fillGlobalRowIndices(HYPRE_BigInt n, HYPRE_BigInt iLower, int *row_ptr, + HYPRE_BigInt *global_row_inds) #else -fillGlobalRowIndices(HYPRE_Int n, HYPRE_Int iLower, int * row_ptr, HYPRE_Int * global_row_inds) +fillGlobalRowIndices(HYPRE_Int n, HYPRE_Int iLower, int *row_ptr, + HYPRE_Int *global_row_inds) #endif { - if (blockIdx.x hrows(nnz); - std::vector hcols(nnz); - HIP_CALL(hipMemcpy(hrows.data(), drows, nnz*sizeof(HYPRE_BigInt), hipMemcpyDeviceToHost)); - HIP_CALL(hipMemcpy(hcols.data(), dcols, nnz*sizeof(HYPRE_BigInt), hipMemcpyDeviceToHost)); - HYPRE_BigInt countr=0; - HYPRE_BigInt countc=0; - for (int i=0; iiUpper_) - { - countr++; - } - if (hcols[i]iUpper_) - { - countc++; - } - } - if (countr) printf("rank %d : Found %ld of %ld bad diag row indices\n", iproc_, countr, nnz); - if (countc) printf("rank %d : Found %ld of %ld bad diag col indices\n", iproc_, countc, nnz); - fflush(stdout); - return; +void HypreSystem::validateDiagData(HYPRE_BigInt nnz, HYPRE_BigInt *drows, + HYPRE_BigInt *dcols) { + std::vector hrows(nnz); + std::vector hcols(nnz); + HIP_CALL(hipMemcpy(hrows.data(), drows, nnz * sizeof(HYPRE_BigInt), + hipMemcpyDeviceToHost)); + HIP_CALL(hipMemcpy(hcols.data(), dcols, nnz * sizeof(HYPRE_BigInt), + hipMemcpyDeviceToHost)); + HYPRE_BigInt countr = 0; + HYPRE_BigInt countc = 0; + for (int i = 0; i < nnz; ++i) { + if (hrows[i] < iLower_ || hrows[i] > iUpper_) { + countr++; + } + if (hcols[i] < iLower_ || hcols[i] > iUpper_) { + countc++; + } + } + if (countr) + printf("rank %d : Found %ld of %ld bad diag row indices\n", iproc_, countr, + nnz); + if (countc) + printf("rank %d : Found %ld of %ld bad diag col indices\n", iproc_, countc, + nnz); + fflush(stdout); + return; } #else -void HypreSystem::validateDiagData(HYPRE_Int nnz, HYPRE_Int *drows, HYPRE_Int *dcols) -{ - std::vector hrows(nnz); - std::vector hcols(nnz); - HIP_CALL(hipMemcpy(hrows.data(), drows, nnz*sizeof(HYPRE_Int), hipMemcpyDeviceToHost)); - HIP_CALL(hipMemcpy(hcols.data(), dcols, nnz*sizeof(HYPRE_Int), hipMemcpyDeviceToHost)); - HYPRE_Int countr=0; - HYPRE_Int countc=0; - for (int i=0; iiUpper_) - { - countr++; - } - if (hcols[i]iUpper_) - { - countc++; - } - } - if (countr) printf("rank %d : Found %d of %d bad diag row indices\n", iproc_, countr, nnz); - if (countc) printf("rank %d : Found %d of %d bad diag col indices\n", iproc_, countc, nnz); - fflush(stdout); - return; +void HypreSystem::validateDiagData(HYPRE_Int nnz, HYPRE_Int *drows, + HYPRE_Int *dcols) { + std::vector hrows(nnz); + std::vector hcols(nnz); + HIP_CALL(hipMemcpy(hrows.data(), drows, nnz * sizeof(HYPRE_Int), + hipMemcpyDeviceToHost)); + HIP_CALL(hipMemcpy(hcols.data(), dcols, nnz * sizeof(HYPRE_Int), + hipMemcpyDeviceToHost)); + HYPRE_Int countr = 0; + HYPRE_Int countc = 0; + for (int i = 0; i < nnz; ++i) { + if (hrows[i] < iLower_ || hrows[i] > iUpper_) { + countr++; + } + if (hcols[i] < iLower_ || hcols[i] > iUpper_) { + countc++; + } + } + if (countr) + printf("rank %d : Found %d of %d bad diag row indices\n", iproc_, countr, + nnz); + if (countc) + printf("rank %d : Found %d of %d bad diag col indices\n", iproc_, countc, + nnz); + fflush(stdout); + return; } #endif #endif #if defined(HYPRE_USING_HIP) #if defined(HYPRE_MIXEDINT) || defined(HYPRE_BIGINT) -void HypreSystem::validateOffdData(HYPRE_BigInt nnz, HYPRE_BigInt *drows, HYPRE_BigInt *dcols) -{ - std::vector hrows(nnz); - std::vector hcols(nnz); - HIP_CALL(hipMemcpy(hrows.data(), drows, nnz*sizeof(HYPRE_BigInt), hipMemcpyDeviceToHost)); - HIP_CALL(hipMemcpy(hcols.data(), dcols, nnz*sizeof(HYPRE_BigInt), hipMemcpyDeviceToHost)); - HYPRE_BigInt countr=0; - HYPRE_BigInt countc=0; - for (int i=0; iiUpper_) - { - countr++; - } - if (hcols[i]>=iLower_ && hcols[i]<=iUpper_) - { - countc++; - } - } - if (countr) printf("rank %d : Found %ld of %ld bad offd row indices\n", iproc_, countr, nnz); - if (countc) printf("rank %d : Found %ld of %ld bad offd col indices\n", iproc_, countc, nnz); - fflush(stdout); - return; +void HypreSystem::validateOffdData(HYPRE_BigInt nnz, HYPRE_BigInt *drows, + HYPRE_BigInt *dcols) { + std::vector hrows(nnz); + std::vector hcols(nnz); + HIP_CALL(hipMemcpy(hrows.data(), drows, nnz * sizeof(HYPRE_BigInt), + hipMemcpyDeviceToHost)); + HIP_CALL(hipMemcpy(hcols.data(), dcols, nnz * sizeof(HYPRE_BigInt), + hipMemcpyDeviceToHost)); + HYPRE_BigInt countr = 0; + HYPRE_BigInt countc = 0; + for (int i = 0; i < nnz; ++i) { + if (hrows[i] < iLower_ || hrows[i] > iUpper_) { + countr++; + } + if (hcols[i] >= iLower_ && hcols[i] <= iUpper_) { + countc++; + } + } + if (countr) + printf("rank %d : Found %ld of %ld bad offd row indices\n", iproc_, countr, + nnz); + if (countc) + printf("rank %d : Found %ld of %ld bad offd col indices\n", iproc_, countc, + nnz); + fflush(stdout); + return; } #else -void HypreSystem::validateOffdData(HYPRE_Int nnz, HYPRE_Int *drows, HYPRE_Int *dcols) -{ - std::vector hrows(nnz); - std::vector hcols(nnz); - HIP_CALL(hipMemcpy(hrows.data(), drows, nnz*sizeof(HYPRE_Int), hipMemcpyDeviceToHost)); - HIP_CALL(hipMemcpy(hcols.data(), dcols, nnz*sizeof(HYPRE_Int), hipMemcpyDeviceToHost)); - HYPRE_Int countr=0; - HYPRE_Int countc=0; - for (int i=0; iiUpper_) - { - countr++; - } - if (hcols[i]>=iLower_ && hcols[i]<=iUpper_) - { - countc++; - } - } - if (countr) printf("rank %d : Found %d of %d bad offd row indices\n", iproc_, countr, nnz); - if (countc) printf("rank %d : Found %d of %d bad offd col indices\n", iproc_, countc, nnz); - fflush(stdout); - return; +void HypreSystem::validateOffdData(HYPRE_Int nnz, HYPRE_Int *drows, + HYPRE_Int *dcols) { + std::vector hrows(nnz); + std::vector hcols(nnz); + HIP_CALL(hipMemcpy(hrows.data(), drows, nnz * sizeof(HYPRE_Int), + hipMemcpyDeviceToHost)); + HIP_CALL(hipMemcpy(hcols.data(), dcols, nnz * sizeof(HYPRE_Int), + hipMemcpyDeviceToHost)); + HYPRE_Int countr = 0; + HYPRE_Int countc = 0; + for (int i = 0; i < nnz; ++i) { + if (hrows[i] < iLower_ || hrows[i] > iUpper_) { + countr++; + } + if (hcols[i] >= iLower_ && hcols[i] <= iUpper_) { + countc++; + } + } + if (countr) + printf("rank %d : Found %d of %d bad offd row indices\n", iproc_, countr, + nnz); + if (countc) + printf("rank %d : Found %d of %d bad offd col indices\n", iproc_, countc, + nnz); + fflush(stdout); + return; } #endif #endif @@ -1369,26 +1439,17 @@ void HypreSystem::build_27pt_stencil() { int nproc_z; compute_3d_process_distribution(nproc_, nproc_x, nproc_y, nproc_z); - if(iproc_ == 0) - { - printf("\tProcess distribution: %d x %d x %d\n", nproc_x, nproc_y, nproc_z); + if (iproc_ == 0) { + printf("\tProcess distribution: %d x %d x %d\n", nproc_x, nproc_y, nproc_z); } // Generate problem Data data; - generate_3d_laplacian_hip(nx_, - ny_, - nz_, - nproc_x, - nproc_y, - nproc_z, - &comm_, - iproc_, - nproc_, - &data); + generate_3d_laplacian_hip(nx_, ny_, nz_, nproc_x, nproc_y, nproc_z, &comm_, + iproc_, nproc_, &data); - totalRows_ = M_ = N_ = nx_*ny_*nz_*nproc_; + totalRows_ = M_ = N_ = nx_ * ny_ * nz_ * nproc_; // generic method for IJ and MM init_row_decomposition(); @@ -1412,10 +1473,12 @@ void HypreSystem::build_27pt_stencil() { d_offd_cols_ = hypre_TAlloc(HYPRE_Int, data.offd_nnz, HYPRE_MEMORY_DEVICE); #endif - fillGlobalRowIndices<<>>(numRows_, iLower_, data.diagonal_csr_row_ptr, d_rows_); + fillGlobalRowIndices<<>>(numRows_, iLower_, + data.diagonal_csr_row_ptr, d_rows_); HIP_CALL(hipGetLastError()); - fillGlobalRowIndices<<>>(numRows_, iLower_, data.offd_csr_row_ptr, d_offd_rows_); + fillGlobalRowIndices<<>>(numRows_, iLower_, + data.offd_csr_row_ptr, d_offd_rows_); HIP_CALL(hipGetLastError()); /* transform column indices to global */ @@ -1425,27 +1488,30 @@ void HypreSystem::build_27pt_stencil() { hipGetDeviceProperties(&prop, device); int CUcount = prop.multiProcessorCount; - fillGlobalColIndices<<>>(data.diagonal_nnz, iLower_, data.diagonal_csr_col_ind, d_cols_); + fillGlobalColIndices<<>>( + data.diagonal_nnz, iLower_, data.diagonal_csr_col_ind, d_cols_); HIP_CALL(hipGetLastError()); - fillGlobalColIndices<<>>(data.offd_nnz, ((iproc_==0) ? iUpper_+1 : 0), data.offd_csr_col_ind, d_offd_cols_); + fillGlobalColIndices<<>>( + data.offd_nnz, ((iproc_ == 0) ? iUpper_ + 1 : 0), data.offd_csr_col_ind, + d_offd_cols_); HIP_CALL(hipGetLastError()); /* Validate data */ - //validateDiagData(data.diagonal_nnz, d_rows_, d_cols_); - //validateOffdData(data.offd_nnz, d_offd_rows_, d_offd_cols_); + // validateDiagData(data.diagonal_nnz, d_rows_, d_cols_); + // validateOffdData(data.offd_nnz, d_offd_rows_, d_offd_cols_); /********************************************/ /* Call Hypre Matrix Assembly Routines */ /********************************************/ /* Set matrix diagonal values */ - HYPRE_IJMatrixSetValues2(mat_, data.diagonal_nnz, - NULL, d_rows_, NULL, d_cols_, data.diagonal_csr_val); + HYPRE_IJMatrixSetValues2(mat_, data.diagonal_nnz, NULL, d_rows_, NULL, + d_cols_, data.diagonal_csr_val); HIP_CALL(hipGetLastError()); /* Set matrix off diagonal values */ - HYPRE_IJMatrixAddToValues2(mat_, data.offd_nnz, - NULL, d_offd_rows_, NULL, d_offd_cols_, data.offd_csr_val); + HYPRE_IJMatrixAddToValues2(mat_, data.offd_nnz, NULL, d_offd_rows_, NULL, + d_offd_cols_, data.offd_csr_val); HIP_CALL(hipGetLastError()); /********************************************/ @@ -1458,14 +1524,15 @@ void HypreSystem::build_27pt_stencil() { v = rhs_[0]; HYPRE_IJVectorSetComponent(v, 0); } else { - /* Throw exception */ + /* Throw exception */ } #if defined(HYPRE_MIXEDINT) || defined(HYPRE_BIGINT) d_vector_indices_ = hypre_TAlloc(HYPRE_BigInt, numRows_, HYPRE_MEMORY_DEVICE); #else d_vector_indices_ = hypre_TAlloc(HYPRE_Int, numRows_, HYPRE_MEMORY_DEVICE); #endif - thrust::sequence(thrust::device, d_vector_indices_, d_vector_indices_ + numRows_, iLower_); + thrust::sequence(thrust::device, d_vector_indices_, + d_vector_indices_ + numRows_, iLower_); HIP_CALL(hipGetLastError()); HYPRE_IJVectorSetValues(v, numRows_, d_vector_indices_, data.rhs_val); @@ -1482,7 +1549,7 @@ void HypreSystem::build_27pt_stencil() { } #endif /********************************************************************************/ -/* Matrix Market Format */ +/* Matrix Market Format */ /********************************************************************************/ void HypreSystem::load_matrix_market() { @@ -1581,9 +1648,10 @@ void HypreSystem::determine_mm_system_sizes(std::string matfile) { * *******************/ void HypreSystem::build_mm_matrix(std::string matfile) { - MPI_Barrier(comm_); - if (iproc_ == 0) - printf("%s : Reading from %s into HYPRE_IJMatrix\n", __FUNCTION__, matfile.c_str()); + MPI_Barrier(comm_); + if (iproc_ == 0) + printf("%s : Reading from %s into HYPRE_IJMatrix\n", __FUNCTION__, + matfile.c_str()); auto start = std::chrono::system_clock::now(); @@ -1612,35 +1680,31 @@ void HypreSystem::build_mm_matrix(std::string matfile) { if (err != 0) throw std::runtime_error("Cannot read matrix sizes in file: " + matfile); - fclose(fh); int fd = open(matfile.c_str(), O_RDONLY); struct stat s; int status = fstat(fd, &s); int64_t size = s.st_size; - char * f = (char *) mmap (0, size, PROT_READ, MAP_FILE|MAP_PRIVATE, fd, 0); + char *f = (char *)mmap(0, size, PROT_READ, MAP_FILE | MAP_PRIVATE, fd, 0); std::string all_lines(f); std::string line; - int64_t found=0, pos=-1, rsize=0, len=0; - bool foundHeader=false; + int64_t found = 0, pos = -1, rsize = 0, len = 0; + bool foundHeader = false; rows_.resize(0); cols_.resize(0); vals_.resize(0); - while (rsize= iLower_ && irow <= iUpper_) { rows_.push_back(irow); cols_.push_back(icol); @@ -1685,7 +1749,8 @@ void HypreSystem::build_mm_vector(std::vector &mmfiles, for (int j = 0; j < numComps_; ++j) { std::string mmfile = mmfiles[j]; if (iproc_ == 0) - printf("%s : Reading from %s into HYPRE_IJVector\n", __FUNCTION__, mmfile.c_str()); + printf("%s : Reading from %s into HYPRE_IJVector\n", __FUNCTION__, + mmfile.c_str()); FILE *fh; MM_typecode matcode; @@ -1717,46 +1782,43 @@ void HypreSystem::build_mm_vector(std::vector &mmfiles, struct stat s; int status = fstat(fd, &s); int64_t size = s.st_size; - char * f = (char *) mmap (0, size, PROT_READ, MAP_FILE|MAP_PRIVATE, fd, 0); + char *f = (char *)mmap(0, size, PROT_READ, MAP_FILE | MAP_PRIVATE, fd, 0); std::string all_lines(f); std::string line; - int64_t found=0, pos=-1, rsize=0, len=0, i=0; - bool foundHeader=false; + int64_t found = 0, pos = -1, rsize = 0, len = 0, i = 0; + bool foundHeader = false; vector_indices_.resize(0); vector_values_.resize(0); - while (rsize= iLower_ && i <= iUpper_) { - sscanf(line.c_str(), "%lf", &value); - vector_values_.push_back(value); + sscanf(line.c_str(), "%lf", &value); + vector_values_.push_back(value); #if defined(HYPRE_MIXEDINT) || defined(HYPRE_BIGINT) - vector_indices_.push_back((HYPRE_BigInt)i); + vector_indices_.push_back((HYPRE_BigInt)i); #else - vector_indices_.push_back(i); + vector_indices_.push_back(i); #endif } i++; } - + int unmap_result = munmap(f, size); close(fd); - + /* build the vector */ hypre_vector_set_values(vec, j); } diff --git a/src/HypreSystem.h b/src/HypreSystem.h index 8aa3c3e..8fad7d5 100644 --- a/src/HypreSystem.h +++ b/src/HypreSystem.h @@ -1,10 +1,10 @@ #ifndef HYPRESYSTEM_H #define HYPRESYSTEM_H -#include #include -#include +#include #include +#include #include #if defined(__APPLE__) @@ -35,11 +35,10 @@ extern "C" { #include #if defined(HYPRE_USING_CUDA) || (HYPRE_USING_HIP) -#include #include +#include #endif - #define HIP_CALL(call) \ do { \ hipError_t err = call; \ @@ -81,8 +80,8 @@ class HypreSystem { void summarize_timers(); //! retrieve timers to dump to csv file - void retrieve_timers(std::vector& names, - std::vector>& data); + void retrieve_timers(std::vector &names, + std::vector> &data); //! Destroy hypre linear system void destroy_system(); @@ -133,7 +132,7 @@ class HypreSystem { void hypre_matrix_set_values(); //! Build the HYPRE_IJVector from data loaded from either IJ or matrix market - void hypre_vector_set_values(std::vector &vec, int component); + void hypre_vector_set_values(std::vector &vec, int component); //! Load the matrix into HYPRE_IJVector void build_mm_vector(std::vector &, @@ -157,6 +156,11 @@ class HypreSystem { void setup_bicg(); void setup_cg(); + //! Setup ILU + void setup_ilu_precond(); + + void setup_ilu(); + //! MPI Communicator object MPI_Comm comm_; @@ -169,13 +173,13 @@ class HypreSystem { #if defined(HYPRE_MIXEDINT) || defined(HYPRE_BIGINT) std::vector rows_; std::vector cols_; - HYPRE_BigInt *d_rows_=NULL; - HYPRE_BigInt *d_cols_=NULL; - HYPRE_BigInt *d_offd_rows_=NULL; - HYPRE_BigInt *d_offd_cols_=NULL; + HYPRE_BigInt *d_rows_ = NULL; + HYPRE_BigInt *d_cols_ = NULL; + HYPRE_BigInt *d_offd_rows_ = NULL; + HYPRE_BigInt *d_offd_cols_ = NULL; std::vector vector_indices_; - HYPRE_BigInt *d_vector_indices_=NULL; + HYPRE_BigInt *d_vector_indices_ = NULL; //! Global number of rows in the linear system HYPRE_BigInt totalRows_{0}; @@ -191,13 +195,13 @@ class HypreSystem { #else std::vector rows_; std::vector cols_; - HYPRE_Int *d_rows_=NULL; - HYPRE_Int *d_cols_=NULL; - HYPRE_Int *d_offd_rows_=NULL; - HYPRE_Int *d_offd_cols_=NULL; + HYPRE_Int *d_rows_ = NULL; + HYPRE_Int *d_cols_ = NULL; + HYPRE_Int *d_offd_rows_ = NULL; + HYPRE_Int *d_offd_cols_ = NULL; std::vector vector_indices_; - HYPRE_Int *d_vector_indices_=NULL; + HYPRE_Int *d_vector_indices_ = NULL; //! Global number of rows in the linear system HYPRE_Int totalRows_{0}; @@ -214,10 +218,10 @@ class HypreSystem { #endif std::vector vals_; - HYPRE_Complex *d_vals_=NULL; + HYPRE_Complex *d_vals_ = NULL; std::vector vector_values_; - HYPRE_Complex *d_vector_vals_=NULL; + HYPRE_Complex *d_vector_vals_ = NULL; //! Timers std::vector> timers_;