From 7fcc2aee8e2c6d48c863d66b1cc76517883df9bb Mon Sep 17 00:00:00 2001 From: Sam Shaner Date: Sat, 26 Mar 2016 15:15:43 -0400 Subject: [PATCH 1/3] implemented faster successive over-relaxation iteration scheme for linear solver --- src/Matrix.cpp | 135 ++++++++++++++++++++++++++++++++++++++++++++----- src/Matrix.h | 9 ++++ src/linalg.cpp | 51 +++++++------------ 3 files changed, 149 insertions(+), 46 deletions(-) diff --git a/src/Matrix.cpp b/src/Matrix.cpp index df9a470a1..947a20940 100644 --- a/src/Matrix.cpp +++ b/src/Matrix.cpp @@ -28,10 +28,15 @@ Matrix::Matrix(omp_lock_t* cell_locks, int num_x, int num_y, int num_groups) { _LIL.push_back(std::map()); _A = NULL; + _AD = NULL; _IA = NULL; + _IAD = NULL; _JA = NULL; + _JAD = NULL; _DIAG = NULL; _modified = true; + _NNZ = 0; + _NNZD = 0; /* Set OpenMP locks for each Matrix cell */ if (cell_locks == NULL) @@ -51,12 +56,21 @@ Matrix::~Matrix() { if (_A != NULL) delete [] _A; + if (_AD != NULL) + delete [] _AD; + if (_IA != NULL) delete [] _IA; + if (_IAD != NULL) + delete [] _IAD; + if (_JA != NULL) delete [] _JA; + if (_JAD != NULL) + delete [] _JAD; + if (_DIAG != NULL) delete [] _DIAG; @@ -175,32 +189,54 @@ void Matrix::convertToCSR() { /* Get number of nonzero values */ int NNZ = getNNZ(); + int NNZD = getNNZD(); - /* Deallocate memory for arrays if previously allocated */ - if (_A != NULL) - delete [] _A; + if (NNZ != _NNZ || NNZD != _NNZD) { - if (_IA != NULL) - delete [] _IA; + /* Deallocate memory for arrays if previously allocated */ + if (_A != NULL) + delete [] _A; - if (_JA != NULL) - delete [] _JA; + if (_AD != NULL) + delete [] _AD; - if (_DIAG != NULL) - delete [] _DIAG; + if (_IA != NULL) + delete [] _IA; + + if (_IAD != NULL) + delete [] _IAD; + + if (_JA != NULL) + delete [] _JA; + + if (_JAD != NULL) + delete [] _JAD; + + if (_DIAG != NULL) + delete [] _DIAG; + + /* Allocate memory for arrays */ + _A = new FP_PRECISION[NNZ]; + _AD = new FP_PRECISION[NNZD]; + _IA = new int[_num_rows+1]; + _IAD = new int[_num_rows+1]; + _JA = new int[NNZ]; + _JAD = new int[NNZD]; + _DIAG = new FP_PRECISION[_num_rows]; + + _NNZ = NNZ; + _NNZD = NNZD; + } - /* Allocate memory for arrays */ - _A = new FP_PRECISION[NNZ]; - _IA = new int[_num_rows+1]; - _JA = new int[NNZ]; - _DIAG = new FP_PRECISION[_num_rows]; std::fill_n(_DIAG, _num_rows, 0.0); /* Form arrays */ int j = 0; + int jd = 0; std::map::iterator iter; for (int row=0; row < _num_rows; row++) { _IA[row] = j; + _IAD[row] = jd; for (iter = _LIL[row].begin(); iter != _LIL[row].end(); ++iter) { if (iter->second != 0.0) { _JA[j] = iter->first; @@ -208,6 +244,11 @@ void Matrix::convertToCSR() { if (row == iter->first) _DIAG[row] = iter->second; + else { + _JAD[jd] = iter->first; + _AD[jd] = iter->second; + jd++; + } j++; } @@ -215,6 +256,7 @@ void Matrix::convertToCSR() { } _IA[_num_rows] = NNZ; + _IAD[_num_rows] = NNZD; /* Reset flat indicating the CSR objects have the same values as the * LIL object */ @@ -282,6 +324,21 @@ FP_PRECISION* Matrix::getA() { } +/** + * @brief Get the A component (excluding the diagonal) of the CSR form of the + * matrix object. + * @return A pointer to the A component (excluding the diagonal) of the CSR + * form matrix object. + */ +FP_PRECISION* Matrix::getAD() { + + if (_modified) + convertToCSR(); + + return _AD; +} + + /** * @brief Get the IA component of the CSR form of the matrix object. * @return A pointer to the IA component of the CSR form matrix object. @@ -295,6 +352,21 @@ int* Matrix::getIA() { } +/** + * @brief Get the IA component (excluding the diagonal) of the CSR form of the + * matrix object. + * @return A pointer to the IA component (excluding the diagonal) of the CSR + * form matrix object. + */ +int* Matrix::getIAD() { + + if (_modified) + convertToCSR(); + + return _IAD; +} + + /** * @brief Get the JA component of the CSR form of the matrix object. * @return A pointer to the JA component of the CSR form matrix object. @@ -308,6 +380,21 @@ int* Matrix::getJA() { } +/** + * @brief Get the JA component (excluding the diagonal) of the CSR form of the + * matrix object. + * @return A pointer to the JA component (excluding the diagonal) of the CSR + * form matrix object. + */ +int* Matrix::getJAD() { + + if (_modified) + convertToCSR(); + + return _JAD; +} + + /** * @brief Get the diagonal component of the matrix object. * @return A pointer to the diagonal component of the matrix object. @@ -376,6 +463,26 @@ int Matrix::getNNZ() { } +/** + * @brief Get the number of non-zero values in the matrix, excluding the + * diagonal. + * @return The number of non-zero values in the matrix, excluding the diagonal. + */ +int Matrix::getNNZD() { + + int NNZD = 0; + std::map::iterator iter; + for (int row=0; row < _num_rows; row++) { + for (iter = _LIL[row].begin(); iter != _LIL[row].end(); ++iter) { + if (iter->second != 0.0 || iter->first != row) + NNZD++; + } + } + + return NNZD; +} + + /** * @brief Set the number of cells in the x dimension. * @param num_x The number of cells in the x dimension. diff --git a/src/Matrix.h b/src/Matrix.h index c79b5d521..578ca46ce 100644 --- a/src/Matrix.h +++ b/src/Matrix.h @@ -34,8 +34,11 @@ class Matrix { /** The CSR matrix variables */ FP_PRECISION* _A; + FP_PRECISION* _AD; int* _IA; int* _JA; + int* _IAD; + int* _JAD; FP_PRECISION* _DIAG; bool _modified; @@ -43,6 +46,8 @@ class Matrix { int _num_y; int _num_groups; int _num_rows; + int _NNZ; + int _NNZD; /** OpenMP mutual exclusion locks for atomic cell updates */ omp_lock_t* _cell_locks; @@ -67,14 +72,18 @@ class Matrix { FP_PRECISION getValue(int cell_from, int group_from, int cell_to, int group_to); FP_PRECISION* getA(); + FP_PRECISION* getAD(); int* getIA(); + int* getIAD(); int* getJA(); + int* getJAD(); FP_PRECISION* getDiag(); int getNumX(); int getNumY(); int getNumGroups(); int getNumRows(); int getNNZ(); + int getNNZD(); omp_lock_t* getCellLocks(); /* Setter functions */ diff --git a/src/linalg.cpp b/src/linalg.cpp index 45d11db6d..783c6105a 100644 --- a/src/linalg.cpp +++ b/src/linalg.cpp @@ -138,15 +138,16 @@ void linearSolve(Matrix* A, Matrix* M, Vector* X, Vector* B, FP_PRECISION tol, int num_rows = X->getNumRows(); Vector X_old(cell_locks, num_x, num_y, num_groups); FP_PRECISION* x_old = X_old.getArray(); - int* IA = A->getIA(); - int* JA = A->getJA(); + int* IAD = A->getIAD(); + int* JAD = A->getJAD(); FP_PRECISION* DIAG = A->getDiag(); - FP_PRECISION* a = A->getA(); + FP_PRECISION* ad = A->getAD(); FP_PRECISION* x = X->getArray(); FP_PRECISION* b = B->getArray(); - int row, col; + int row; Vector old_source(cell_locks, num_x, num_y, num_groups); Vector new_source(cell_locks, num_x, num_y, num_groups); + FP_PRECISION val; /* Compute initial source */ matrixMultiplication(M, X, &old_source); @@ -156,37 +157,23 @@ void linearSolve(Matrix* A, Matrix* M, Vector* X, Vector* B, FP_PRECISION tol, /* Pass new flux to old flux */ X->copyTo(&X_old); - /* Iteration over red/black cells */ - for (int color = 0; color < 2; color++) { - for (int quad = 0; quad < 4; quad++) { -#pragma omp parallel for private(row, col) - for (int cy = (quad % 2) * num_y/2; cy < (quad % 2 + 1) * num_y/2; - cy++) { - for (int cx = (quad / 2) * num_x/2; cx < (quad / 2 + 1) * num_x/2; - cx++) { - - /* check for correct color */ - if (((cx % 2)+(cy % 2)) % 2 == color) { - - for (int g = 0; g < num_groups; g++) { - - row = (cy*num_x + cx)*num_groups + g; - - /* Over-relax the x array */ - x[row] = (1.0 - SOR_factor) * x[row]; + /* Perform parallel red/black SOR iteration */ + for (int color=0; color < 2; color++) { +#pragma omp parallel for private(row) + for (int yc=0; yc < num_y; yc++) { + for (int xc=(yc + color) % 2; xc < num_x; xc+=2) { + for (int g=0; g < num_groups; g++) { - for (int i = IA[row]; i < IA[row+1]; i++) { + /* Get the current matrix row */ + row = (yc * num_x + xc) * num_groups + g; - /* Get the column index */ - col = JA[i]; + /* Accumulate off diagonals multiplied by corresponding fluxes */ + val = 0.0; + for (int i = IAD[row]; i < IAD[row+1]; i++) + val += ad[i] * x[JAD[i]]; - if (row == col) - x[row] += SOR_factor * b[row] / DIAG[row]; - else - x[row] -= SOR_factor * a[i] * x[col] / DIAG[row]; - } - } - } + /* Update the flux for this row */ + x[row] += SOR_factor * ((b[row] - val) / DIAG[row] - x[row]); } } } From 36db653adaed154ec7b4fc879ca5b67389f0b1d3 Mon Sep 17 00:00:00 2001 From: Sam Shaner Date: Sat, 26 Mar 2016 15:34:16 -0400 Subject: [PATCH 2/3] fixed two issues in implementing new sor iteration scheme --- src/Matrix.cpp | 2 +- src/linalg.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Matrix.cpp b/src/Matrix.cpp index 947a20940..09e5efeb3 100644 --- a/src/Matrix.cpp +++ b/src/Matrix.cpp @@ -474,7 +474,7 @@ int Matrix::getNNZD() { std::map::iterator iter; for (int row=0; row < _num_rows; row++) { for (iter = _LIL[row].begin(); iter != _LIL[row].end(); ++iter) { - if (iter->second != 0.0 || iter->first != row) + if (iter->second != 0.0 && iter->first != row) NNZD++; } } diff --git a/src/linalg.cpp b/src/linalg.cpp index 783c6105a..9c64c00e6 100644 --- a/src/linalg.cpp +++ b/src/linalg.cpp @@ -159,7 +159,7 @@ void linearSolve(Matrix* A, Matrix* M, Vector* X, Vector* B, FP_PRECISION tol, /* Perform parallel red/black SOR iteration */ for (int color=0; color < 2; color++) { -#pragma omp parallel for private(row) +#pragma omp parallel for private(row, val) for (int yc=0; yc < num_y; yc++) { for (int xc=(yc + color) % 2; xc < num_x; xc+=2) { for (int g=0; g < num_groups; g++) { From fb760862cef017241b6ec56d2a934f0a1fe189fe Mon Sep 17 00:00:00 2001 From: Sam Shaner Date: Sun, 27 Mar 2016 10:23:04 -0400 Subject: [PATCH 3/3] added more descriptions for matrix components and changed AD to LU --- src/Matrix.cpp | 138 +++++++++++++++++++++++++++---------------------- src/Matrix.h | 16 +++--- src/linalg.cpp | 10 ++-- 3 files changed, 89 insertions(+), 75 deletions(-) diff --git a/src/Matrix.cpp b/src/Matrix.cpp index 09e5efeb3..cf619f108 100644 --- a/src/Matrix.cpp +++ b/src/Matrix.cpp @@ -7,10 +7,14 @@ * a map of lists) to allow for easy setting and incrementing of the * values in the object. When the matrix is needed to perform linear * algebra operations, it is converted to compressed row storage (CSR) - * form. The matrix is ordered by cell (as opposed to by group) on the - * outside. Locks are used to make the matrix thread-safe against + * form [1]. The matrix is ordered by cell (as opposed to by group) on + * the outside. Locks are used to make the matrix thread-safe against * concurrent writes the same value. One lock locks out multiple rows of * the matrix at a time reprsenting multiple groups in the same cell. + * + * [1] "Sparse matrix", Wikipedia, + * https://en.wikipedia.org/wiki/Sparse_matrix. + * * @param cell_locks Omp locks for atomic cell operations * @param num_x The number of cells in the x direction. * @param num_y The number of cells in the y direction. @@ -28,15 +32,15 @@ Matrix::Matrix(omp_lock_t* cell_locks, int num_x, int num_y, int num_groups) { _LIL.push_back(std::map()); _A = NULL; - _AD = NULL; + _LU = NULL; _IA = NULL; - _IAD = NULL; + _ILU = NULL; _JA = NULL; - _JAD = NULL; + _JLU = NULL; _DIAG = NULL; _modified = true; _NNZ = 0; - _NNZD = 0; + _NNZLU = 0; /* Set OpenMP locks for each Matrix cell */ if (cell_locks == NULL) @@ -56,20 +60,20 @@ Matrix::~Matrix() { if (_A != NULL) delete [] _A; - if (_AD != NULL) - delete [] _AD; + if (_LU != NULL) + delete [] _LU; if (_IA != NULL) delete [] _IA; - if (_IAD != NULL) - delete [] _IAD; + if (_ILU != NULL) + delete [] _ILU; if (_JA != NULL) delete [] _JA; - if (_JAD != NULL) - delete [] _JAD; + if (_JLU != NULL) + delete [] _JLU; if (_DIAG != NULL) delete [] _DIAG; @@ -189,54 +193,54 @@ void Matrix::convertToCSR() { /* Get number of nonzero values */ int NNZ = getNNZ(); - int NNZD = getNNZD(); + int NNZLU = getNNZLU(); - if (NNZ != _NNZ || NNZD != _NNZD) { + if (NNZ != _NNZ || NNZLU != _NNZLU) { /* Deallocate memory for arrays if previously allocated */ if (_A != NULL) delete [] _A; - if (_AD != NULL) - delete [] _AD; + if (_LU != NULL) + delete [] _LU; if (_IA != NULL) delete [] _IA; - if (_IAD != NULL) - delete [] _IAD; + if (_ILU != NULL) + delete [] _ILU; if (_JA != NULL) delete [] _JA; - if (_JAD != NULL) - delete [] _JAD; + if (_JLU != NULL) + delete [] _JLU; if (_DIAG != NULL) delete [] _DIAG; /* Allocate memory for arrays */ _A = new FP_PRECISION[NNZ]; - _AD = new FP_PRECISION[NNZD]; + _LU = new FP_PRECISION[NNZLU]; _IA = new int[_num_rows+1]; - _IAD = new int[_num_rows+1]; + _ILU = new int[_num_rows+1]; _JA = new int[NNZ]; - _JAD = new int[NNZD]; + _JLU = new int[NNZLU]; _DIAG = new FP_PRECISION[_num_rows]; _NNZ = NNZ; - _NNZD = NNZD; + _NNZLU = NNZLU; } std::fill_n(_DIAG, _num_rows, 0.0); /* Form arrays */ int j = 0; - int jd = 0; + int jlu = 0; std::map::iterator iter; for (int row=0; row < _num_rows; row++) { _IA[row] = j; - _IAD[row] = jd; + _ILU[row] = jlu; for (iter = _LIL[row].begin(); iter != _LIL[row].end(); ++iter) { if (iter->second != 0.0) { _JA[j] = iter->first; @@ -245,9 +249,9 @@ void Matrix::convertToCSR() { if (row == iter->first) _DIAG[row] = iter->second; else { - _JAD[jd] = iter->first; - _AD[jd] = iter->second; - jd++; + _JLU[jlu] = iter->first; + _LU[jlu] = iter->second; + jlu++; } j++; @@ -256,7 +260,7 @@ void Matrix::convertToCSR() { } _IA[_num_rows] = NNZ; - _IAD[_num_rows] = NNZD; + _ILU[_num_rows] = NNZLU; /* Reset flat indicating the CSR objects have the same values as the * LIL object */ @@ -312,11 +316,13 @@ FP_PRECISION Matrix::getValue(int cell_from, int group_from, /** - * @brief Get the A component of the CSR form of the matrix object. + * @brief Get the full matrix (A) component of the CSR form of the matrix + * object. * @return A pointer to the A component of the CSR form matrix object. */ FP_PRECISION* Matrix::getA() { + /* If the matrix has been modified, regenerate its' CSR attributes */ if (_modified) convertToCSR(); @@ -325,26 +331,29 @@ FP_PRECISION* Matrix::getA() { /** - * @brief Get the A component (excluding the diagonal) of the CSR form of the - * matrix object. - * @return A pointer to the A component (excluding the diagonal) of the CSR - * form matrix object. + * @brief Get the lower + upper (LU) component of the CSR form of the matrix + * object. + * @return A pointer to the lower + upper (LU) component of the CSR form matrix + * object. */ -FP_PRECISION* Matrix::getAD() { +FP_PRECISION* Matrix::getLU() { + /* If the matrix has been modified, regenerate its' CSR attributes */ if (_modified) convertToCSR(); - return _AD; + return _LU; } /** - * @brief Get the IA component of the CSR form of the matrix object. - * @return A pointer to the IA component of the CSR form matrix object. + * @brief Get an array of the row indices (I) component of the CSR form of the + * full matrix (A). + * @return A pointer to the I component of the CSR form of the full matrix (A). */ int* Matrix::getIA() { + /* If the matrix has been modified, regenerate its' CSR attributes */ if (_modified) convertToCSR(); @@ -353,26 +362,29 @@ int* Matrix::getIA() { /** - * @brief Get the IA component (excluding the diagonal) of the CSR form of the - * matrix object. - * @return A pointer to the IA component (excluding the diagonal) of the CSR - * form matrix object. + * @brief Get an array of the row indices (I) component of the CSR form of the + * lower + upper (LU) components of the matrix. + * @return A pointer to the I component of the CSR form of the LU components + * of the matrix. */ -int* Matrix::getIAD() { +int* Matrix::getILU() { + /* If the matrix has been modified, regenerate its' CSR attributes */ if (_modified) convertToCSR(); - return _IAD; + return _ILU; } /** - * @brief Get the JA component of the CSR form of the matrix object. - * @return A pointer to the JA component of the CSR form matrix object. + * @brief Get an array of the column indices (J) component of the CSR form of + * the full matrix (A). + * @return A pointer to the J component of the CSR form of the full matrix (A). */ int* Matrix::getJA() { + /* If the matrix has been modified, regenerate its' CSR attributes */ if (_modified) convertToCSR(); @@ -381,17 +393,18 @@ int* Matrix::getJA() { /** - * @brief Get the JA component (excluding the diagonal) of the CSR form of the - * matrix object. - * @return A pointer to the JA component (excluding the diagonal) of the CSR - * form matrix object. + * @brief Get an array of the column indices (J) component of the CSR form of + * the lower + upper (LU) components of the matrix. + * @return A pointer to the J component of the CSR form of the LU components + * of the matrix. */ -int* Matrix::getJAD() { +int* Matrix::getJLU() { + /* If the matrix has been modified, regenerate its' CSR attributes */ if (_modified) convertToCSR(); - return _JAD; + return _JLU; } @@ -445,8 +458,8 @@ int Matrix::getNumRows() { /** - * @brief Get the number of non-zero values in the matrix. - * @return The number of non-zero values in the matrix. + * @brief Get the number of non-zero values in the full matrix. + * @return The number of non-zero values in the full matrix. */ int Matrix::getNNZ() { @@ -464,22 +477,23 @@ int Matrix::getNNZ() { /** - * @brief Get the number of non-zero values in the matrix, excluding the - * diagonal. - * @return The number of non-zero values in the matrix, excluding the diagonal. + * @brief Get the number of non-zero values in the lower + upper components of + * the matrix. + * @return The number of non-zero values in the lower + upper components of the + * matrix. */ -int Matrix::getNNZD() { +int Matrix::getNNZLU() { - int NNZD = 0; + int NNZLU = 0; std::map::iterator iter; for (int row=0; row < _num_rows; row++) { for (iter = _LIL[row].begin(); iter != _LIL[row].end(); ++iter) { if (iter->second != 0.0 && iter->first != row) - NNZD++; + NNZLU++; } } - return NNZD; + return NNZLU; } diff --git a/src/Matrix.h b/src/Matrix.h index 578ca46ce..fbb46aa7c 100644 --- a/src/Matrix.h +++ b/src/Matrix.h @@ -34,11 +34,11 @@ class Matrix { /** The CSR matrix variables */ FP_PRECISION* _A; - FP_PRECISION* _AD; + FP_PRECISION* _LU; int* _IA; int* _JA; - int* _IAD; - int* _JAD; + int* _ILU; + int* _JLU; FP_PRECISION* _DIAG; bool _modified; @@ -47,7 +47,7 @@ class Matrix { int _num_groups; int _num_rows; int _NNZ; - int _NNZD; + int _NNZLU; /** OpenMP mutual exclusion locks for atomic cell updates */ omp_lock_t* _cell_locks; @@ -72,18 +72,18 @@ class Matrix { FP_PRECISION getValue(int cell_from, int group_from, int cell_to, int group_to); FP_PRECISION* getA(); - FP_PRECISION* getAD(); + FP_PRECISION* getLU(); int* getIA(); - int* getIAD(); + int* getILU(); int* getJA(); - int* getJAD(); + int* getJLU(); FP_PRECISION* getDiag(); int getNumX(); int getNumY(); int getNumGroups(); int getNumRows(); int getNNZ(); - int getNNZD(); + int getNNZLU(); omp_lock_t* getCellLocks(); /* Setter functions */ diff --git a/src/linalg.cpp b/src/linalg.cpp index 9c64c00e6..fa17a73c8 100644 --- a/src/linalg.cpp +++ b/src/linalg.cpp @@ -138,10 +138,10 @@ void linearSolve(Matrix* A, Matrix* M, Vector* X, Vector* B, FP_PRECISION tol, int num_rows = X->getNumRows(); Vector X_old(cell_locks, num_x, num_y, num_groups); FP_PRECISION* x_old = X_old.getArray(); - int* IAD = A->getIAD(); - int* JAD = A->getJAD(); + int* ILU = A->getILU(); + int* JLU = A->getJLU(); FP_PRECISION* DIAG = A->getDiag(); - FP_PRECISION* ad = A->getAD(); + FP_PRECISION* lu = A->getLU(); FP_PRECISION* x = X->getArray(); FP_PRECISION* b = B->getArray(); int row; @@ -169,8 +169,8 @@ void linearSolve(Matrix* A, Matrix* M, Vector* X, Vector* B, FP_PRECISION tol, /* Accumulate off diagonals multiplied by corresponding fluxes */ val = 0.0; - for (int i = IAD[row]; i < IAD[row+1]; i++) - val += ad[i] * x[JAD[i]]; + for (int i = ILU[row]; i < ILU[row+1]; i++) + val += lu[i] * x[JLU[i]]; /* Update the flux for this row */ x[row] += SOR_factor * ((b[row] - val) / DIAG[row] - x[row]);