Skip to content

Commit

Permalink
Merge pull request #295 from samuelshaner/linear-solve-perf
Browse files Browse the repository at this point in the history
implemented faster successive over-relaxation iteration scheme for linear solver
  • Loading branch information
Will Boyd committed Mar 27, 2016
2 parents 7e4fa08 + fb76086 commit 414dc05
Show file tree
Hide file tree
Showing 3 changed files with 172 additions and 55 deletions.
167 changes: 144 additions & 23 deletions src/Matrix.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,14 @@
* a map of lists) to allow for easy setting and incrementing of the
* values in the object. When the matrix is needed to perform linear
* algebra operations, it is converted to compressed row storage (CSR)
* form. The matrix is ordered by cell (as opposed to by group) on the
* outside. Locks are used to make the matrix thread-safe against
* form [1]. The matrix is ordered by cell (as opposed to by group) on
* the outside. Locks are used to make the matrix thread-safe against
* concurrent writes the same value. One lock locks out multiple rows of
* the matrix at a time reprsenting multiple groups in the same cell.
*
* [1] "Sparse matrix", Wikipedia,
* https://en.wikipedia.org/wiki/Sparse_matrix.
*
* @param cell_locks Omp locks for atomic cell operations
* @param num_x The number of cells in the x direction.
* @param num_y The number of cells in the y direction.
Expand All @@ -28,10 +32,15 @@ Matrix::Matrix(omp_lock_t* cell_locks, int num_x, int num_y, int num_groups) {
_LIL.push_back(std::map<int, FP_PRECISION>());

_A = NULL;
_LU = NULL;
_IA = NULL;
_ILU = NULL;
_JA = NULL;
_JLU = NULL;
_DIAG = NULL;
_modified = true;
_NNZ = 0;
_NNZLU = 0;

/* Set OpenMP locks for each Matrix cell */
if (cell_locks == NULL)
Expand All @@ -51,12 +60,21 @@ Matrix::~Matrix() {
if (_A != NULL)
delete [] _A;

if (_LU != NULL)
delete [] _LU;

if (_IA != NULL)
delete [] _IA;

if (_ILU != NULL)
delete [] _ILU;

if (_JA != NULL)
delete [] _JA;

if (_JLU != NULL)
delete [] _JLU;

if (_DIAG != NULL)
delete [] _DIAG;

Expand Down Expand Up @@ -175,46 +193,74 @@ void Matrix::convertToCSR() {

/* Get number of nonzero values */
int NNZ = getNNZ();
int NNZLU = getNNZLU();

/* Deallocate memory for arrays if previously allocated */
if (_A != NULL)
delete [] _A;
if (NNZ != _NNZ || NNZLU != _NNZLU) {

if (_IA != NULL)
delete [] _IA;
/* Deallocate memory for arrays if previously allocated */
if (_A != NULL)
delete [] _A;

if (_JA != NULL)
delete [] _JA;
if (_LU != NULL)
delete [] _LU;

if (_DIAG != NULL)
delete [] _DIAG;
if (_IA != NULL)
delete [] _IA;

if (_ILU != NULL)
delete [] _ILU;

if (_JA != NULL)
delete [] _JA;

if (_JLU != NULL)
delete [] _JLU;

if (_DIAG != NULL)
delete [] _DIAG;

/* Allocate memory for arrays */
_A = new FP_PRECISION[NNZ];
_LU = new FP_PRECISION[NNZLU];
_IA = new int[_num_rows+1];
_ILU = new int[_num_rows+1];
_JA = new int[NNZ];
_JLU = new int[NNZLU];
_DIAG = new FP_PRECISION[_num_rows];

_NNZ = NNZ;
_NNZLU = NNZLU;
}

/* Allocate memory for arrays */
_A = new FP_PRECISION[NNZ];
_IA = new int[_num_rows+1];
_JA = new int[NNZ];
_DIAG = new FP_PRECISION[_num_rows];
std::fill_n(_DIAG, _num_rows, 0.0);

/* Form arrays */
int j = 0;
int jlu = 0;
std::map<int, FP_PRECISION>::iterator iter;
for (int row=0; row < _num_rows; row++) {
_IA[row] = j;
_ILU[row] = jlu;
for (iter = _LIL[row].begin(); iter != _LIL[row].end(); ++iter) {
if (iter->second != 0.0) {
_JA[j] = iter->first;
_A[j] = iter->second;

if (row == iter->first)
_DIAG[row] = iter->second;
else {
_JLU[jlu] = iter->first;
_LU[jlu] = iter->second;
jlu++;
}

j++;
}
}
}

_IA[_num_rows] = NNZ;
_ILU[_num_rows] = NNZLU;

/* Reset flat indicating the CSR objects have the same values as the
* LIL object */
Expand Down Expand Up @@ -270,11 +316,13 @@ FP_PRECISION Matrix::getValue(int cell_from, int group_from,


/**
* @brief Get the A component of the CSR form of the matrix object.
* @brief Get the full matrix (A) component of the CSR form of the matrix
* object.
* @return A pointer to the A component of the CSR form matrix object.
*/
FP_PRECISION* Matrix::getA() {

/* If the matrix has been modified, regenerate its' CSR attributes */
if (_modified)
convertToCSR();

Expand All @@ -283,11 +331,29 @@ FP_PRECISION* Matrix::getA() {


/**
* @brief Get the IA component of the CSR form of the matrix object.
* @return A pointer to the IA component of the CSR form matrix object.
* @brief Get the lower + upper (LU) component of the CSR form of the matrix
* object.
* @return A pointer to the lower + upper (LU) component of the CSR form matrix
* object.
*/
FP_PRECISION* Matrix::getLU() {

/* If the matrix has been modified, regenerate its' CSR attributes */
if (_modified)
convertToCSR();

return _LU;
}


/**
* @brief Get an array of the row indices (I) component of the CSR form of the
* full matrix (A).
* @return A pointer to the I component of the CSR form of the full matrix (A).
*/
int* Matrix::getIA() {

/* If the matrix has been modified, regenerate its' CSR attributes */
if (_modified)
convertToCSR();

Expand All @@ -296,18 +362,52 @@ int* Matrix::getIA() {


/**
* @brief Get the JA component of the CSR form of the matrix object.
* @return A pointer to the JA component of the CSR form matrix object.
* @brief Get an array of the row indices (I) component of the CSR form of the
* lower + upper (LU) components of the matrix.
* @return A pointer to the I component of the CSR form of the LU components
* of the matrix.
*/
int* Matrix::getILU() {

/* If the matrix has been modified, regenerate its' CSR attributes */
if (_modified)
convertToCSR();

return _ILU;
}


/**
* @brief Get an array of the column indices (J) component of the CSR form of
* the full matrix (A).
* @return A pointer to the J component of the CSR form of the full matrix (A).
*/
int* Matrix::getJA() {

/* If the matrix has been modified, regenerate its' CSR attributes */
if (_modified)
convertToCSR();

return _JA;
}


/**
* @brief Get an array of the column indices (J) component of the CSR form of
* the lower + upper (LU) components of the matrix.
* @return A pointer to the J component of the CSR form of the LU components
* of the matrix.
*/
int* Matrix::getJLU() {

/* If the matrix has been modified, regenerate its' CSR attributes */
if (_modified)
convertToCSR();

return _JLU;
}


/**
* @brief Get the diagonal component of the matrix object.
* @return A pointer to the diagonal component of the matrix object.
Expand Down Expand Up @@ -358,8 +458,8 @@ int Matrix::getNumRows() {


/**
* @brief Get the number of non-zero values in the matrix.
* @return The number of non-zero values in the matrix.
* @brief Get the number of non-zero values in the full matrix.
* @return The number of non-zero values in the full matrix.
*/
int Matrix::getNNZ() {

Expand All @@ -376,6 +476,27 @@ int Matrix::getNNZ() {
}


/**
* @brief Get the number of non-zero values in the lower + upper components of
* the matrix.
* @return The number of non-zero values in the lower + upper components of the
* matrix.
*/
int Matrix::getNNZLU() {

int NNZLU = 0;
std::map<int, FP_PRECISION>::iterator iter;
for (int row=0; row < _num_rows; row++) {
for (iter = _LIL[row].begin(); iter != _LIL[row].end(); ++iter) {
if (iter->second != 0.0 && iter->first != row)
NNZLU++;
}
}

return NNZLU;
}


/**
* @brief Set the number of cells in the x dimension.
* @param num_x The number of cells in the x dimension.
Expand Down
9 changes: 9 additions & 0 deletions src/Matrix.h
Original file line number Diff line number Diff line change
Expand Up @@ -34,15 +34,20 @@ class Matrix {

/** The CSR matrix variables */
FP_PRECISION* _A;
FP_PRECISION* _LU;
int* _IA;
int* _JA;
int* _ILU;
int* _JLU;
FP_PRECISION* _DIAG;

bool _modified;
int _num_x;
int _num_y;
int _num_groups;
int _num_rows;
int _NNZ;
int _NNZLU;

/** OpenMP mutual exclusion locks for atomic cell updates */
omp_lock_t* _cell_locks;
Expand All @@ -67,14 +72,18 @@ class Matrix {
FP_PRECISION getValue(int cell_from, int group_from, int cell_to,
int group_to);
FP_PRECISION* getA();
FP_PRECISION* getLU();
int* getIA();
int* getILU();
int* getJA();
int* getJLU();
FP_PRECISION* getDiag();
int getNumX();
int getNumY();
int getNumGroups();
int getNumRows();
int getNNZ();
int getNNZLU();
omp_lock_t* getCellLocks();

/* Setter functions */
Expand Down
51 changes: 19 additions & 32 deletions src/linalg.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -138,15 +138,16 @@ void linearSolve(Matrix* A, Matrix* M, Vector* X, Vector* B, FP_PRECISION tol,
int num_rows = X->getNumRows();
Vector X_old(cell_locks, num_x, num_y, num_groups);
FP_PRECISION* x_old = X_old.getArray();
int* IA = A->getIA();
int* JA = A->getJA();
int* ILU = A->getILU();
int* JLU = A->getJLU();
FP_PRECISION* DIAG = A->getDiag();
FP_PRECISION* a = A->getA();
FP_PRECISION* lu = A->getLU();
FP_PRECISION* x = X->getArray();
FP_PRECISION* b = B->getArray();
int row, col;
int row;
Vector old_source(cell_locks, num_x, num_y, num_groups);
Vector new_source(cell_locks, num_x, num_y, num_groups);
FP_PRECISION val;

/* Compute initial source */
matrixMultiplication(M, X, &old_source);
Expand All @@ -156,37 +157,23 @@ void linearSolve(Matrix* A, Matrix* M, Vector* X, Vector* B, FP_PRECISION tol,
/* Pass new flux to old flux */
X->copyTo(&X_old);

/* Iteration over red/black cells */
for (int color = 0; color < 2; color++) {
for (int quad = 0; quad < 4; quad++) {
#pragma omp parallel for private(row, col)
for (int cy = (quad % 2) * num_y/2; cy < (quad % 2 + 1) * num_y/2;
cy++) {
for (int cx = (quad / 2) * num_x/2; cx < (quad / 2 + 1) * num_x/2;
cx++) {

/* check for correct color */
if (((cx % 2)+(cy % 2)) % 2 == color) {

for (int g = 0; g < num_groups; g++) {

row = (cy*num_x + cx)*num_groups + g;

/* Over-relax the x array */
x[row] = (1.0 - SOR_factor) * x[row];
/* Perform parallel red/black SOR iteration */
for (int color=0; color < 2; color++) {
#pragma omp parallel for private(row, val)
for (int yc=0; yc < num_y; yc++) {
for (int xc=(yc + color) % 2; xc < num_x; xc+=2) {
for (int g=0; g < num_groups; g++) {

for (int i = IA[row]; i < IA[row+1]; i++) {
/* Get the current matrix row */
row = (yc * num_x + xc) * num_groups + g;

/* Get the column index */
col = JA[i];
/* Accumulate off diagonals multiplied by corresponding fluxes */
val = 0.0;
for (int i = ILU[row]; i < ILU[row+1]; i++)
val += lu[i] * x[JLU[i]];

if (row == col)
x[row] += SOR_factor * b[row] / DIAG[row];
else
x[row] -= SOR_factor * a[i] * x[col] / DIAG[row];
}
}
}
/* Update the flux for this row */
x[row] += SOR_factor * ((b[row] - val) / DIAG[row] - x[row]);
}
}
}
Expand Down

0 comments on commit 414dc05

Please sign in to comment.