Skip to content

Commit

Permalink
implemented faster successive over-relaxation iteration scheme for li…
Browse files Browse the repository at this point in the history
…near solver
  • Loading branch information
Sam Shaner committed Mar 26, 2016
1 parent 7e4fa08 commit 7fcc2ae
Show file tree
Hide file tree
Showing 3 changed files with 149 additions and 46 deletions.
135 changes: 121 additions & 14 deletions src/Matrix.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -28,10 +28,15 @@ Matrix::Matrix(omp_lock_t* cell_locks, int num_x, int num_y, int num_groups) {
_LIL.push_back(std::map<int, FP_PRECISION>());

_A = NULL;
_AD = NULL;
_IA = NULL;
_IAD = NULL;
_JA = NULL;
_JAD = NULL;
_DIAG = NULL;
_modified = true;
_NNZ = 0;
_NNZD = 0;

/* Set OpenMP locks for each Matrix cell */
if (cell_locks == NULL)
Expand All @@ -51,12 +56,21 @@ Matrix::~Matrix() {
if (_A != NULL)
delete [] _A;

if (_AD != NULL)
delete [] _AD;

if (_IA != NULL)
delete [] _IA;

if (_IAD != NULL)
delete [] _IAD;

if (_JA != NULL)
delete [] _JA;

if (_JAD != NULL)
delete [] _JAD;

if (_DIAG != NULL)
delete [] _DIAG;

Expand Down Expand Up @@ -175,46 +189,74 @@ void Matrix::convertToCSR() {

/* Get number of nonzero values */
int NNZ = getNNZ();
int NNZD = getNNZD();

/* Deallocate memory for arrays if previously allocated */
if (_A != NULL)
delete [] _A;
if (NNZ != _NNZ || NNZD != _NNZD) {

if (_IA != NULL)
delete [] _IA;
/* Deallocate memory for arrays if previously allocated */
if (_A != NULL)
delete [] _A;

if (_JA != NULL)
delete [] _JA;
if (_AD != NULL)
delete [] _AD;

if (_DIAG != NULL)
delete [] _DIAG;
if (_IA != NULL)
delete [] _IA;

if (_IAD != NULL)
delete [] _IAD;

if (_JA != NULL)
delete [] _JA;

if (_JAD != NULL)
delete [] _JAD;

if (_DIAG != NULL)
delete [] _DIAG;

/* Allocate memory for arrays */
_A = new FP_PRECISION[NNZ];
_AD = new FP_PRECISION[NNZD];
_IA = new int[_num_rows+1];
_IAD = new int[_num_rows+1];
_JA = new int[NNZ];
_JAD = new int[NNZD];
_DIAG = new FP_PRECISION[_num_rows];

_NNZ = NNZ;
_NNZD = NNZD;
}

/* Allocate memory for arrays */
_A = new FP_PRECISION[NNZ];
_IA = new int[_num_rows+1];
_JA = new int[NNZ];
_DIAG = new FP_PRECISION[_num_rows];
std::fill_n(_DIAG, _num_rows, 0.0);

/* Form arrays */
int j = 0;
int jd = 0;
std::map<int, FP_PRECISION>::iterator iter;
for (int row=0; row < _num_rows; row++) {
_IA[row] = j;
_IAD[row] = jd;
for (iter = _LIL[row].begin(); iter != _LIL[row].end(); ++iter) {
if (iter->second != 0.0) {
_JA[j] = iter->first;
_A[j] = iter->second;

if (row == iter->first)
_DIAG[row] = iter->second;
else {
_JAD[jd] = iter->first;
_AD[jd] = iter->second;
jd++;
}

j++;
}
}
}

_IA[_num_rows] = NNZ;
_IAD[_num_rows] = NNZD;

/* Reset flat indicating the CSR objects have the same values as the
* LIL object */
Expand Down Expand Up @@ -282,6 +324,21 @@ FP_PRECISION* Matrix::getA() {
}


/**
* @brief Get the A component (excluding the diagonal) of the CSR form of the
* matrix object.
* @return A pointer to the A component (excluding the diagonal) of the CSR
* form matrix object.
*/
FP_PRECISION* Matrix::getAD() {

if (_modified)
convertToCSR();

return _AD;
}


/**
* @brief Get the IA component of the CSR form of the matrix object.
* @return A pointer to the IA component of the CSR form matrix object.
Expand All @@ -295,6 +352,21 @@ int* Matrix::getIA() {
}


/**
* @brief Get the IA component (excluding the diagonal) of the CSR form of the
* matrix object.
* @return A pointer to the IA component (excluding the diagonal) of the CSR
* form matrix object.
*/
int* Matrix::getIAD() {

if (_modified)
convertToCSR();

return _IAD;
}


/**
* @brief Get the JA component of the CSR form of the matrix object.
* @return A pointer to the JA component of the CSR form matrix object.
Expand All @@ -308,6 +380,21 @@ int* Matrix::getJA() {
}


/**
* @brief Get the JA component (excluding the diagonal) of the CSR form of the
* matrix object.
* @return A pointer to the JA component (excluding the diagonal) of the CSR
* form matrix object.
*/
int* Matrix::getJAD() {

if (_modified)
convertToCSR();

return _JAD;
}


/**
* @brief Get the diagonal component of the matrix object.
* @return A pointer to the diagonal component of the matrix object.
Expand Down Expand Up @@ -376,6 +463,26 @@ int Matrix::getNNZ() {
}


/**
* @brief Get the number of non-zero values in the matrix, excluding the
* diagonal.
* @return The number of non-zero values in the matrix, excluding the diagonal.
*/
int Matrix::getNNZD() {

int NNZD = 0;
std::map<int, FP_PRECISION>::iterator iter;
for (int row=0; row < _num_rows; row++) {
for (iter = _LIL[row].begin(); iter != _LIL[row].end(); ++iter) {
if (iter->second != 0.0 || iter->first != row)
NNZD++;
}
}

return NNZD;
}


/**
* @brief Set the number of cells in the x dimension.
* @param num_x The number of cells in the x dimension.
Expand Down
9 changes: 9 additions & 0 deletions src/Matrix.h
Original file line number Diff line number Diff line change
Expand Up @@ -34,15 +34,20 @@ class Matrix {

/** The CSR matrix variables */
FP_PRECISION* _A;
FP_PRECISION* _AD;
int* _IA;
int* _JA;
int* _IAD;
int* _JAD;
FP_PRECISION* _DIAG;

bool _modified;
int _num_x;
int _num_y;
int _num_groups;
int _num_rows;
int _NNZ;
int _NNZD;

/** OpenMP mutual exclusion locks for atomic cell updates */
omp_lock_t* _cell_locks;
Expand All @@ -67,14 +72,18 @@ class Matrix {
FP_PRECISION getValue(int cell_from, int group_from, int cell_to,
int group_to);
FP_PRECISION* getA();
FP_PRECISION* getAD();
int* getIA();
int* getIAD();
int* getJA();
int* getJAD();
FP_PRECISION* getDiag();
int getNumX();
int getNumY();
int getNumGroups();
int getNumRows();
int getNNZ();
int getNNZD();
omp_lock_t* getCellLocks();

/* Setter functions */
Expand Down
51 changes: 19 additions & 32 deletions src/linalg.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -138,15 +138,16 @@ void linearSolve(Matrix* A, Matrix* M, Vector* X, Vector* B, FP_PRECISION tol,
int num_rows = X->getNumRows();
Vector X_old(cell_locks, num_x, num_y, num_groups);
FP_PRECISION* x_old = X_old.getArray();
int* IA = A->getIA();
int* JA = A->getJA();
int* IAD = A->getIAD();
int* JAD = A->getJAD();
FP_PRECISION* DIAG = A->getDiag();
FP_PRECISION* a = A->getA();
FP_PRECISION* ad = A->getAD();
FP_PRECISION* x = X->getArray();
FP_PRECISION* b = B->getArray();
int row, col;
int row;
Vector old_source(cell_locks, num_x, num_y, num_groups);
Vector new_source(cell_locks, num_x, num_y, num_groups);
FP_PRECISION val;

/* Compute initial source */
matrixMultiplication(M, X, &old_source);
Expand All @@ -156,37 +157,23 @@ void linearSolve(Matrix* A, Matrix* M, Vector* X, Vector* B, FP_PRECISION tol,
/* Pass new flux to old flux */
X->copyTo(&X_old);

/* Iteration over red/black cells */
for (int color = 0; color < 2; color++) {
for (int quad = 0; quad < 4; quad++) {
#pragma omp parallel for private(row, col)
for (int cy = (quad % 2) * num_y/2; cy < (quad % 2 + 1) * num_y/2;
cy++) {
for (int cx = (quad / 2) * num_x/2; cx < (quad / 2 + 1) * num_x/2;
cx++) {

/* check for correct color */
if (((cx % 2)+(cy % 2)) % 2 == color) {

for (int g = 0; g < num_groups; g++) {

row = (cy*num_x + cx)*num_groups + g;

/* Over-relax the x array */
x[row] = (1.0 - SOR_factor) * x[row];
/* Perform parallel red/black SOR iteration */
for (int color=0; color < 2; color++) {
#pragma omp parallel for private(row)
for (int yc=0; yc < num_y; yc++) {
for (int xc=(yc + color) % 2; xc < num_x; xc+=2) {
for (int g=0; g < num_groups; g++) {

for (int i = IA[row]; i < IA[row+1]; i++) {
/* Get the current matrix row */
row = (yc * num_x + xc) * num_groups + g;

/* Get the column index */
col = JA[i];
/* Accumulate off diagonals multiplied by corresponding fluxes */
val = 0.0;
for (int i = IAD[row]; i < IAD[row+1]; i++)
val += ad[i] * x[JAD[i]];

if (row == col)
x[row] += SOR_factor * b[row] / DIAG[row];
else
x[row] -= SOR_factor * a[i] * x[col] / DIAG[row];
}
}
}
/* Update the flux for this row */
x[row] += SOR_factor * ((b[row] - val) / DIAG[row] - x[row]);
}
}
}
Expand Down

0 comments on commit 7fcc2ae

Please sign in to comment.