Skip to content

Commit

Permalink
Merge pull request #3 from sirzooro/optimizations2
Browse files Browse the repository at this point in the history
Optimizations2
  • Loading branch information
sirzooro authored Feb 3, 2019
2 parents 3689bff + 422243d commit 21b6fc3
Show file tree
Hide file tree
Showing 13 changed files with 873 additions and 428 deletions.
28 changes: 28 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
# RakeSearch
Rake search of Diagonal Latin Squares

## Compilation

Application is compiled using gcc and make. Windows version is compiled using MinGW gcc crosscompiler.

To compile everything, you must have BOINC client libraries. Make sure you compile them using the same gcc version as this app, otherwise you may get link errors.

When compiling app for x86/x86_64, you will need gcc 8.x (I used gcc 8.2). Older gcc versions may not support some options used in Makefile.

To compile app, enter RakeSearch/RakeDiagSearch/RakeDiagSearch directory first. Open Makefile and update `BOINC_DIR` variable, so it will point to place where you have BOINC client library and its include files. After doing so, type `make` to start compilation.

Makefile supports number of extra parameters. Here are ones used for x86 and x86_64:

- `SSE2=1` - enable SSE2 instructions (x86 and x86_64)
- `AVX=1` - enable AVX instructions (x86_64 only)
- `AVX2=1` - enable AVX2 and BMI1/2 instructions (x86_64 only)
- `AVX512=1` - enable AVX512 instructions (x86_64 only)

Note: SSE2 is always enabled on x86_64, support for it is part of AMD64 specification.

You can also specify target platform:
- `M32=1` - compile 32-bit app version (used for Linux app)
- `MinGW64=1` - compile 64-bit app for Windows using MinGW crosscompiler
- `MinGW32=1` - compile 32-bit app for Windows using MinGW crosscompiler

You can also compile app for ARM (32-bit) and AARCH64 (64-bit). ARM may support NEON instructions, so there is compilation option `NEON=1` to enable it. AARCH64 always support NEON, so there is no special option for it.
284 changes: 168 additions & 116 deletions RakeDiagSearch/RakeDiagSearch/Generator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,11 @@

#define ffs __builtin_ffs

// Square::Empty is equal -1, all other values and non-negative.
// CPU sets sign bit in status register automatically when executing instructions,
// so sign check instead of value check can give faster code.
#define IsCellEmpty(val) ((val) < 0)


using namespace std;

Expand Down Expand Up @@ -265,7 +270,54 @@ void Generator::Read(std::istream& is)

// Read the number of generated squares
is >> squaresCount;


// Data loaded. Perform necessary post-loading tasks.
if (cellId == cellsInPath - 1)
{
// Start from WU
// Convert old checkpoint format to new one if used
int row = path[cellsInPath - 2][0], col = path[cellsInPath - 2][1];
if (0 != cellsHistory[row][col])
{
int tmpColumns[Rank];
int tmpRows[Rank];
memcpy(tmpColumns, columns, sizeof(columns));
memcpy(tmpRows, rows, sizeof(rows));

// Convert cellsHistory into candidates
for (int i = cellsInPath - 1; i >= 0; --i)
{
row = path[i][0];
col = path[i][1];
int bit = 1 << newSquare.Matrix[row][col];
tmpColumns[col] |= bit;
tmpRows[row] |= bit;
cellsHistory[row][col] &= tmpColumns[col] & tmpRows[row];

// Update rows/cols data for last cell in path, it is no longer set
if (i == cellsInPath - 1)
{
columns[col] = tmpColumns[col];
rows[row] = tmpRows[row];
}
}
}
}
else
{
// Start from checkpoint
// Check if there are no cells on diagonals in path
for (int i = 0; i < cellsInPath; i++)
{
int row = path[i][0], col = path[i][1];
if ((row == col) || (row == Rank - 1 - col))
{
std::cerr << "Error: Cell on diagonal in path! R=" << row << " C=" << col << std::endl;
return;
}
}
}

// Set initialization flag
isInitialized = Yes;
}
Expand Down Expand Up @@ -447,160 +499,160 @@ void Generator::CreateCheckpoint()
void Generator::Start()
{
// Check value of keyValue and pass result as a type to StartImpl
if (keyValue == Square::Empty)
if (IsCellEmpty(keyValue))
StartImpl<true_type>();
else
StartImpl<false_type>();
}

// Actual implementation of the squares generation
// Note: values on diagonal are preset in WU, so corresponding parts of code are commented out.
// It turned out that it was quite costly to have instructions which were doing nothing.
template<typename IsKeyValueEmpty>
inline void Generator::StartImpl()
{
int cellValue; // New value for the cell
int oldCellValue; // Old value from the cell
int cellValueCandidates; // Candidates for value for the cell

// Create constant copies of used fields to speedup calculations
const int cellsInPath = this->cellsInPath;
const int_fast32_t cellsInPath = this->cellsInPath;
const int keyValue = this->keyValue;
const int keyRowId = this->keyRowId;
const int keyColumnId = this->keyColumnId;
const int_fast32_t keyRowId = this->keyRowId;
const int_fast32_t keyColumnId = this->keyColumnId;

// Use registers for local variables instead of memory
int_fast32_t rowId, columnId;
int_fast32_t cellId = this->cellId;

// Checkpoint may be written after new ODLS is created only.
// Class members moved to registers above are constant in checkpoint
// file, so they can be set to proper values here.
this->rowId = path[cellsInPath - 1][0];
this->columnId = path[cellsInPath - 1][1];
this->cellId = cellsInPath - 1;

// Selection of the value for the next cell
// Read coordinates of the cell
rowId = path[cellId][0];
columnId = path[cellId][1];

// Generate new value for the cell (rowId, columnId)
// Select the value for the cell
// Check the i value for possibility to be written into the cell (rowId, columnId)
cellValueCandidates = columns[columnId] & rows[rowId];

if (isInitialized == Yes)
{
// Check if there are no candidates at the beginning, or if calculations are resumed from checkpoint
if ((cellId == cellsInPath - 1) || (0 == cellValueCandidates))
goto StepDown;

// Selection of the cells values
while(1)
{
// Selection of the value for the next cell
// Read coordinates of the cell
rowId = path[cellId][0];
columnId = path[cellId][1];

// Generate new value for the cell (rowId, columnId)
// Select the value for the cell
// Check the i value for possibility to be written into the cell (rowId, columnId)
cellValue = columns[columnId] & rows[rowId] & cellsHistory[rowId][columnId];

// Test the value: has it been used in diagonals
// Test the main diagonal
if(columnId == rowId)
{
cellValue &= primary;
}

// Test the secondary diagonal
if (rowId == Rank - 1 - columnId)
{
cellValue &= secondary;
}

// Process the search result
if (cellValue)
// 1st loop (used to be "if (cellValueCandidates)" part) - handle case when at least one cell value candidate is present
while (1)
{
// Get index of first bit set
cellValue = ffs(cellValue) - 1;
// Process the new found value
// Read the current value
oldCellValue = newSquare.Matrix[rowId][columnId];
// Write the new value
// Write the value into the square
newSquare.Matrix[rowId][columnId] = cellValue;
// Mark the value in columns
SetUsed(columns[columnId], cellValue);
// Mark the value in rows
SetUsed(rows[rowId], cellValue);
// Mark the value in diagonals
if (rowId == columnId)
{
SetUsed(primary, cellValue);
}
if (rowId == Rank - 1 - columnId)
{
SetUsed(secondary, cellValue);
}
// Mark the value in the history of cell values
SetUsed(cellsHistory[rowId][columnId], cellValue);

// Restore the previous value without clearing the history (because we are working with this cell)
if (oldCellValue != Square::Empty)
// Extract lowest bit set
int bit = (-cellValueCandidates) & cellValueCandidates;

// Write the value into the square
newSquare.Matrix[rowId][columnId] = __builtin_ctz(bit);

// Process the finish of the square generation
if (cellId == cellsInPath - 1)
{
// Process the found square
ProcessSquare();

// Check the finish condition of search
if (!IsKeyValueEmpty::value)
{
// Restore the value into columns
SetFree(columns[columnId], oldCellValue);
// Restore the value into rows
SetFree(rows[rowId], oldCellValue);
// Restore the value into diagonals
if (rowId == columnId)
{
SetFree(primary, oldCellValue);
}
if (rowId == Rank - 1 - columnId)
// Set the flag if the terminal value is other
if (newSquare.Matrix[keyRowId][keyColumnId] == keyValue)
{
SetFree(secondary, oldCellValue);
break;
}
}

// Process the finish of the square generation
if (cellId == cellsInPath - 1)
{
// Process the found square
ProcessSquare();
}
else

break;
}
else
{
// Mark the value in columns
columns[columnId] &= ~bit;
// Mark the value in rows
rows[rowId] &= ~bit;

// Mark the value in the history of cell values
cellsHistory[rowId][columnId] = cellValueCandidates & ~bit;

// Step forward
cellId++;

// Check the finish condition of search
if (!IsKeyValueEmpty::value)
{
// Step forward
cellId++;
// Set the flag if the terminal value is other
if (newSquare.Matrix[keyRowId][keyColumnId] == keyValue)
{
break;
}
}

// Selection of the value for the next cell
// Read coordinates of the cell
rowId = path[cellId][0];
columnId = path[cellId][1];

// Generate new value for the cell (rowId, columnId)
// Select the value for the cell
// Check the i value for possibility to be written into the cell (rowId, columnId)
cellValueCandidates = columns[columnId] & rows[rowId];

if (!cellValueCandidates)
break;
}
}
else

// 2nd loop (used to be "else" part) - handle case when there are no cell value candidates
StepDown:
while (1)
{
// Process the fact of not-founding a new value in the cell (rowId; columnId)
// Restore the previous value from the square into arrays
// Read the current value
cellValue = newSquare.Matrix[rowId][columnId];
// Restore the value into auxilary arrays
if (cellValue != Square::Empty)
{
// Restore the value into columns
SetFree(columns[columnId], cellValue);
// Restore the value into rows
SetFree(rows[rowId], cellValue);
// Restore the value into diagonals
if (rowId == columnId)
{
SetFree(primary, cellValue);
}
if (rowId == Rank - 1 - columnId)
{
SetFree(secondary, cellValue);
}
// Reset the cell of the square
newSquare.Matrix[rowId][columnId] = Square::Empty;
// Clear the history of the cell (rowId, columnId)
cellsHistory[rowId][columnId] = AllBitsMask(Rank);
}

// Step backward
cellId--;

// Check the finish condition of search
if (IsKeyValueEmpty::value)
{
// Set the flag if the terminal value is "-1" which means we must leave the cell
if (cellId < 0 && newSquare.Matrix[keyRowId][keyColumnId] == Square::Empty)
if (cellId < 0 /*&& IsCellEmpty(newSquare.Matrix[keyRowId][keyColumnId])*/)
{
break;
return;
}
}
}

// Check the finish condition of search
if (!IsKeyValueEmpty::value)
{
// Set the flag if the terminal value is other
if (newSquare.Matrix[keyRowId][keyColumnId] == keyValue)
{
break;
}

// Selection of the value for the next cell
// Read coordinates of the cell
rowId = path[cellId][0];
columnId = path[cellId][1];

// Process the fact of not-founding a new value in the cell (rowId; columnId)
// Restore the previous value from the square into arrays
// Read the current value
cellValue = newSquare.Matrix[rowId][columnId];

// Restore the value into auxilary arrays
// Restore the value into columns
SetFree(columns[columnId], cellValue);
// Restore the value into rows
SetFree(rows[rowId], cellValue);

cellValueCandidates = cellsHistory[rowId][columnId];

if (cellValueCandidates)
break;
}
}
}
Expand Down
14 changes: 14 additions & 0 deletions RakeDiagSearch/RakeDiagSearch/Helpers.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
// Various helper stuff

# if !defined Helpers_h
# define Helpers_h

#ifdef __AVX512F__
#define ALIGNED __attribute__((aligned(64)))
#elif defined (__SSE2__) || defined(__ARM_NEON)
#define ALIGNED __attribute__((aligned(32)))
#else
#define ALIGNED
#endif

# endif
Loading

0 comments on commit 21b6fc3

Please sign in to comment.