Skip to content

Commit

Permalink
NEON instruction logic fixes.
Browse files Browse the repository at this point in the history
  • Loading branch information
FinnWilkinson committed Oct 14, 2024
1 parent d5c631c commit 4ad3b6e
Show file tree
Hide file tree
Showing 3 changed files with 23 additions and 13 deletions.
14 changes: 12 additions & 2 deletions src/include/simeng/arch/aarch64/helpers/neon.hh
Original file line number Diff line number Diff line change
Expand Up @@ -568,9 +568,14 @@ RegisterValue vecUMaxP(srcValContainer& sourceValues) {
const T* n = sourceValues[0].getAsVector<T>();
const T* m = sourceValues[1].getAsVector<T>();

// Concatenate the vectors
T temp[2 * I];
memcpy(temp, m, sizeof(T) * I);
memcpy(temp + (sizeof(T) * I), n, sizeof(T) * I);
// Compare each adjacent pair of elements
T out[I];
for (int i = 0; i < I; i++) {
out[i] = std::max(n[i], m[i]);
out[i] = std::max(temp[2 * i], temp[2 * i + 1]);
}
return {out, 256};
}
Expand All @@ -585,9 +590,14 @@ RegisterValue vecUMinP(srcValContainer& sourceValues) {
const T* n = sourceValues[0].getAsVector<T>();
const T* m = sourceValues[1].getAsVector<T>();

// Concatenate the vectors
T temp[2 * I];
memcpy(temp, m, sizeof(T) * I);
memcpy(temp + (sizeof(T) * I), n, sizeof(T) * I);

T out[I];
for (int i = 0; i < I; i++) {
out[i] = std::min(n[i], m[i]);
out[i] = std::min(temp[2 * i], temp[2 * i + 1]);
}
return {out, 256};
}
Expand Down
4 changes: 2 additions & 2 deletions src/lib/arch/aarch64/Instruction_execute.cc
Original file line number Diff line number Diff line change
Expand Up @@ -838,9 +838,9 @@ void Instruction::execute() {
break;
}
case Opcode::AArch64_CMHSv16i8: { // cmhs vd.16b, vn.16b, vm.16b
results_[0] = vecCompare<int8_t, 16>(
results_[0] = vecCompare<uint8_t, 16>(
sourceValues_, false,
[](int8_t x, int8_t y) -> bool { return (x >= y); });
[](uint8_t x, uint8_t y) -> bool { return (x >= y); });
break;
}
case Opcode::AArch64_CMPEQ_PPzZI_B: { // cmpeq pd.b, pg/z, zn.b, #imm
Expand Down
18 changes: 9 additions & 9 deletions test/regression/aarch64/instructions/neon.cc
Original file line number Diff line number Diff line change
Expand Up @@ -726,8 +726,8 @@ TEST_P(InstNeon, cmhs) {
heap[1] = 0x7F;
heap[2] = INT8_MAX;
heap[3] = 1;
heap[4] = -128;
heap[5] = -1;
heap[4] = 128;
heap[5] = 1;
heap[6] = 0xAA;
heap[7] = 0xBB;
heap[8] = 0xCC;
Expand All @@ -743,7 +743,7 @@ TEST_P(InstNeon, cmhs) {
heap[16] = INT8_MAX;
heap[17] = 0x7F;
heap[18] = 0;
heap[19] = -128;
heap[19] = 128;
heap[20] = 1;
heap[21] = 0;
heap[22] = 0xAA;
Expand Down Expand Up @@ -771,10 +771,10 @@ TEST_P(InstNeon, cmhs) {
)");

CHECK_NEON(2, uint8_t,
{0x00, 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
{0x00, 0xFF, 0xFF, 0x00, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
0xFF, 0xFF, 0xFF, 0xFF, 0xFF});
CHECK_NEON(3, uint8_t,
{0xFF, 0xFF, 0x00, 0x00, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00,
{0xFF, 0xFF, 0x00, 0xFF, 0x00, 0x00, 0xFF, 0xFF, 0xFF, 0x00, 0x00,
0x00, 0xFF, 0xFF, 0xFF, 0xFF});
}

Expand Down Expand Up @@ -2683,8 +2683,8 @@ TEST_P(InstNeon, uminp) {
)");
CHECK_NEON(2, uint8_t,
{0x00, 0x00, 0xEE, 0x11, 0x22, 0x33, 0x44, 0x55, 0x01, 0x02, 0x03,
0x04, 0x05, 0x06, 0x07, 0x08});
{0x00, 0x11, 0x22, 0x44, 0xEE, 0xCC, 0xAA, 0x88, 0x00, 0xAA, 0xBB,
0xDD, 0x01, 0x03, 0x05, 0x07});
}
TEST_P(InstNeon, umaxp) {
// umaxp vd.16b vn.16b vm.16b
Expand Down Expand Up @@ -2741,8 +2741,8 @@ TEST_P(InstNeon, umaxp) {
)");
CHECK_NEON(2, uint8_t,
{0x01, 0x00, 0xFF, 0xAA, 0xBB, 0xCC, 0xDD, 0xEE, 0xFF, 0xEE, 0xDD,
0xCC, 0xBB, 0xAA, 0x99, 0x88});
{0x00, 0xEE, 0x33, 0x55, 0xFF, 0xDD, 0xBB, 0x99, 0x01, 0xFF, 0xCC,
0xEE, 0x02, 0x04, 0x06, 0x08});
}

TEST_P(InstNeon, smax) {
Expand Down

0 comments on commit 4ad3b6e

Please sign in to comment.