diff --git a/src/Matrix.cpp b/src/Matrix.cpp index df9a470a1..947a20940 100644 --- a/src/Matrix.cpp +++ b/src/Matrix.cpp @@ -28,10 +28,15 @@ Matrix::Matrix(omp_lock_t* cell_locks, int num_x, int num_y, int num_groups) { _LIL.push_back(std::map()); _A = NULL; + _AD = NULL; _IA = NULL; + _IAD = NULL; _JA = NULL; + _JAD = NULL; _DIAG = NULL; _modified = true; + _NNZ = 0; + _NNZD = 0; /* Set OpenMP locks for each Matrix cell */ if (cell_locks == NULL) @@ -51,12 +56,21 @@ Matrix::~Matrix() { if (_A != NULL) delete [] _A; + if (_AD != NULL) + delete [] _AD; + if (_IA != NULL) delete [] _IA; + if (_IAD != NULL) + delete [] _IAD; + if (_JA != NULL) delete [] _JA; + if (_JAD != NULL) + delete [] _JAD; + if (_DIAG != NULL) delete [] _DIAG; @@ -175,32 +189,54 @@ void Matrix::convertToCSR() { /* Get number of nonzero values */ int NNZ = getNNZ(); + int NNZD = getNNZD(); - /* Deallocate memory for arrays if previously allocated */ - if (_A != NULL) - delete [] _A; + if (NNZ != _NNZ || NNZD != _NNZD) { - if (_IA != NULL) - delete [] _IA; + /* Deallocate memory for arrays if previously allocated */ + if (_A != NULL) + delete [] _A; - if (_JA != NULL) - delete [] _JA; + if (_AD != NULL) + delete [] _AD; - if (_DIAG != NULL) - delete [] _DIAG; + if (_IA != NULL) + delete [] _IA; + + if (_IAD != NULL) + delete [] _IAD; + + if (_JA != NULL) + delete [] _JA; + + if (_JAD != NULL) + delete [] _JAD; + + if (_DIAG != NULL) + delete [] _DIAG; + + /* Allocate memory for arrays */ + _A = new FP_PRECISION[NNZ]; + _AD = new FP_PRECISION[NNZD]; + _IA = new int[_num_rows+1]; + _IAD = new int[_num_rows+1]; + _JA = new int[NNZ]; + _JAD = new int[NNZD]; + _DIAG = new FP_PRECISION[_num_rows]; + + _NNZ = NNZ; + _NNZD = NNZD; + } - /* Allocate memory for arrays */ - _A = new FP_PRECISION[NNZ]; - _IA = new int[_num_rows+1]; - _JA = new int[NNZ]; - _DIAG = new FP_PRECISION[_num_rows]; std::fill_n(_DIAG, _num_rows, 0.0); /* Form arrays */ int j = 0; + int jd = 0; std::map::iterator iter; for (int row=0; row < _num_rows; row++) { _IA[row] = j; + _IAD[row] = jd; for (iter = _LIL[row].begin(); iter != _LIL[row].end(); ++iter) { if (iter->second != 0.0) { _JA[j] = iter->first; @@ -208,6 +244,11 @@ void Matrix::convertToCSR() { if (row == iter->first) _DIAG[row] = iter->second; + else { + _JAD[jd] = iter->first; + _AD[jd] = iter->second; + jd++; + } j++; } @@ -215,6 +256,7 @@ void Matrix::convertToCSR() { } _IA[_num_rows] = NNZ; + _IAD[_num_rows] = NNZD; /* Reset flat indicating the CSR objects have the same values as the * LIL object */ @@ -282,6 +324,21 @@ FP_PRECISION* Matrix::getA() { } +/** + * @brief Get the A component (excluding the diagonal) of the CSR form of the + * matrix object. + * @return A pointer to the A component (excluding the diagonal) of the CSR + * form matrix object. + */ +FP_PRECISION* Matrix::getAD() { + + if (_modified) + convertToCSR(); + + return _AD; +} + + /** * @brief Get the IA component of the CSR form of the matrix object. * @return A pointer to the IA component of the CSR form matrix object. @@ -295,6 +352,21 @@ int* Matrix::getIA() { } +/** + * @brief Get the IA component (excluding the diagonal) of the CSR form of the + * matrix object. + * @return A pointer to the IA component (excluding the diagonal) of the CSR + * form matrix object. + */ +int* Matrix::getIAD() { + + if (_modified) + convertToCSR(); + + return _IAD; +} + + /** * @brief Get the JA component of the CSR form of the matrix object. * @return A pointer to the JA component of the CSR form matrix object. @@ -308,6 +380,21 @@ int* Matrix::getJA() { } +/** + * @brief Get the JA component (excluding the diagonal) of the CSR form of the + * matrix object. + * @return A pointer to the JA component (excluding the diagonal) of the CSR + * form matrix object. + */ +int* Matrix::getJAD() { + + if (_modified) + convertToCSR(); + + return _JAD; +} + + /** * @brief Get the diagonal component of the matrix object. * @return A pointer to the diagonal component of the matrix object. @@ -376,6 +463,26 @@ int Matrix::getNNZ() { } +/** + * @brief Get the number of non-zero values in the matrix, excluding the + * diagonal. + * @return The number of non-zero values in the matrix, excluding the diagonal. + */ +int Matrix::getNNZD() { + + int NNZD = 0; + std::map::iterator iter; + for (int row=0; row < _num_rows; row++) { + for (iter = _LIL[row].begin(); iter != _LIL[row].end(); ++iter) { + if (iter->second != 0.0 || iter->first != row) + NNZD++; + } + } + + return NNZD; +} + + /** * @brief Set the number of cells in the x dimension. * @param num_x The number of cells in the x dimension. diff --git a/src/Matrix.h b/src/Matrix.h index c79b5d521..578ca46ce 100644 --- a/src/Matrix.h +++ b/src/Matrix.h @@ -34,8 +34,11 @@ class Matrix { /** The CSR matrix variables */ FP_PRECISION* _A; + FP_PRECISION* _AD; int* _IA; int* _JA; + int* _IAD; + int* _JAD; FP_PRECISION* _DIAG; bool _modified; @@ -43,6 +46,8 @@ class Matrix { int _num_y; int _num_groups; int _num_rows; + int _NNZ; + int _NNZD; /** OpenMP mutual exclusion locks for atomic cell updates */ omp_lock_t* _cell_locks; @@ -67,14 +72,18 @@ class Matrix { FP_PRECISION getValue(int cell_from, int group_from, int cell_to, int group_to); FP_PRECISION* getA(); + FP_PRECISION* getAD(); int* getIA(); + int* getIAD(); int* getJA(); + int* getJAD(); FP_PRECISION* getDiag(); int getNumX(); int getNumY(); int getNumGroups(); int getNumRows(); int getNNZ(); + int getNNZD(); omp_lock_t* getCellLocks(); /* Setter functions */ diff --git a/src/linalg.cpp b/src/linalg.cpp index 45d11db6d..783c6105a 100644 --- a/src/linalg.cpp +++ b/src/linalg.cpp @@ -138,15 +138,16 @@ void linearSolve(Matrix* A, Matrix* M, Vector* X, Vector* B, FP_PRECISION tol, int num_rows = X->getNumRows(); Vector X_old(cell_locks, num_x, num_y, num_groups); FP_PRECISION* x_old = X_old.getArray(); - int* IA = A->getIA(); - int* JA = A->getJA(); + int* IAD = A->getIAD(); + int* JAD = A->getJAD(); FP_PRECISION* DIAG = A->getDiag(); - FP_PRECISION* a = A->getA(); + FP_PRECISION* ad = A->getAD(); FP_PRECISION* x = X->getArray(); FP_PRECISION* b = B->getArray(); - int row, col; + int row; Vector old_source(cell_locks, num_x, num_y, num_groups); Vector new_source(cell_locks, num_x, num_y, num_groups); + FP_PRECISION val; /* Compute initial source */ matrixMultiplication(M, X, &old_source); @@ -156,37 +157,23 @@ void linearSolve(Matrix* A, Matrix* M, Vector* X, Vector* B, FP_PRECISION tol, /* Pass new flux to old flux */ X->copyTo(&X_old); - /* Iteration over red/black cells */ - for (int color = 0; color < 2; color++) { - for (int quad = 0; quad < 4; quad++) { -#pragma omp parallel for private(row, col) - for (int cy = (quad % 2) * num_y/2; cy < (quad % 2 + 1) * num_y/2; - cy++) { - for (int cx = (quad / 2) * num_x/2; cx < (quad / 2 + 1) * num_x/2; - cx++) { - - /* check for correct color */ - if (((cx % 2)+(cy % 2)) % 2 == color) { - - for (int g = 0; g < num_groups; g++) { - - row = (cy*num_x + cx)*num_groups + g; - - /* Over-relax the x array */ - x[row] = (1.0 - SOR_factor) * x[row]; + /* Perform parallel red/black SOR iteration */ + for (int color=0; color < 2; color++) { +#pragma omp parallel for private(row) + for (int yc=0; yc < num_y; yc++) { + for (int xc=(yc + color) % 2; xc < num_x; xc+=2) { + for (int g=0; g < num_groups; g++) { - for (int i = IA[row]; i < IA[row+1]; i++) { + /* Get the current matrix row */ + row = (yc * num_x + xc) * num_groups + g; - /* Get the column index */ - col = JA[i]; + /* Accumulate off diagonals multiplied by corresponding fluxes */ + val = 0.0; + for (int i = IAD[row]; i < IAD[row+1]; i++) + val += ad[i] * x[JAD[i]]; - if (row == col) - x[row] += SOR_factor * b[row] / DIAG[row]; - else - x[row] -= SOR_factor * a[i] * x[col] / DIAG[row]; - } - } - } + /* Update the flux for this row */ + x[row] += SOR_factor * ((b[row] - val) / DIAG[row] - x[row]); } } }