From 4ad3b6ef764b917367284d087267ff053e3dac60 Mon Sep 17 00:00:00 2001 From: Finn Wilkinson Date: Mon, 14 Oct 2024 10:47:36 +0100 Subject: [PATCH] NEON instruction logic fixes. --- .../simeng/arch/aarch64/helpers/neon.hh | 14 ++++++++++++-- src/lib/arch/aarch64/Instruction_execute.cc | 4 ++-- test/regression/aarch64/instructions/neon.cc | 18 +++++++++--------- 3 files changed, 23 insertions(+), 13 deletions(-) diff --git a/src/include/simeng/arch/aarch64/helpers/neon.hh b/src/include/simeng/arch/aarch64/helpers/neon.hh index 0fcf04f03f..79be89bce1 100644 --- a/src/include/simeng/arch/aarch64/helpers/neon.hh +++ b/src/include/simeng/arch/aarch64/helpers/neon.hh @@ -568,9 +568,14 @@ RegisterValue vecUMaxP(srcValContainer& sourceValues) { const T* n = sourceValues[0].getAsVector(); const T* m = sourceValues[1].getAsVector(); + // Concatenate the vectors + T temp[2 * I]; + memcpy(temp, m, sizeof(T) * I); + memcpy(temp + (sizeof(T) * I), n, sizeof(T) * I); + // Compare each adjacent pair of elements T out[I]; for (int i = 0; i < I; i++) { - out[i] = std::max(n[i], m[i]); + out[i] = std::max(temp[2 * i], temp[2 * i + 1]); } return {out, 256}; } @@ -585,9 +590,14 @@ RegisterValue vecUMinP(srcValContainer& sourceValues) { const T* n = sourceValues[0].getAsVector(); const T* m = sourceValues[1].getAsVector(); + // Concatenate the vectors + T temp[2 * I]; + memcpy(temp, m, sizeof(T) * I); + memcpy(temp + (sizeof(T) * I), n, sizeof(T) * I); + T out[I]; for (int i = 0; i < I; i++) { - out[i] = std::min(n[i], m[i]); + out[i] = std::min(temp[2 * i], temp[2 * i + 1]); } return {out, 256}; } diff --git a/src/lib/arch/aarch64/Instruction_execute.cc b/src/lib/arch/aarch64/Instruction_execute.cc index 9d6cc78f9d..7e2478b06a 100644 --- a/src/lib/arch/aarch64/Instruction_execute.cc +++ b/src/lib/arch/aarch64/Instruction_execute.cc @@ -838,9 +838,9 @@ void Instruction::execute() { break; } case Opcode::AArch64_CMHSv16i8: { // cmhs vd.16b, vn.16b, vm.16b - results_[0] = vecCompare( + results_[0] = vecCompare( sourceValues_, false, - [](int8_t x, int8_t y) -> bool { return (x >= y); }); + [](uint8_t x, uint8_t y) -> bool { return (x >= y); }); break; } case Opcode::AArch64_CMPEQ_PPzZI_B: { // cmpeq pd.b, pg/z, zn.b, #imm diff --git a/test/regression/aarch64/instructions/neon.cc b/test/regression/aarch64/instructions/neon.cc index e23573be9f..093be25bf0 100644 --- a/test/regression/aarch64/instructions/neon.cc +++ b/test/regression/aarch64/instructions/neon.cc @@ -726,8 +726,8 @@ TEST_P(InstNeon, cmhs) { heap[1] = 0x7F; heap[2] = INT8_MAX; heap[3] = 1; - heap[4] = -128; - heap[5] = -1; + heap[4] = 128; + heap[5] = 1; heap[6] = 0xAA; heap[7] = 0xBB; heap[8] = 0xCC; @@ -743,7 +743,7 @@ TEST_P(InstNeon, cmhs) { heap[16] = INT8_MAX; heap[17] = 0x7F; heap[18] = 0; - heap[19] = -128; + heap[19] = 128; heap[20] = 1; heap[21] = 0; heap[22] = 0xAA; @@ -771,10 +771,10 @@ TEST_P(InstNeon, cmhs) { )"); CHECK_NEON(2, uint8_t, - {0x00, 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + {0x00, 0xFF, 0xFF, 0x00, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF}); CHECK_NEON(3, uint8_t, - {0xFF, 0xFF, 0x00, 0x00, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00, + {0xFF, 0xFF, 0x00, 0xFF, 0x00, 0x00, 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0xFF, 0xFF, 0xFF, 0xFF}); } @@ -2683,8 +2683,8 @@ TEST_P(InstNeon, uminp) { )"); CHECK_NEON(2, uint8_t, - {0x00, 0x00, 0xEE, 0x11, 0x22, 0x33, 0x44, 0x55, 0x01, 0x02, 0x03, - 0x04, 0x05, 0x06, 0x07, 0x08}); + {0x00, 0x11, 0x22, 0x44, 0xEE, 0xCC, 0xAA, 0x88, 0x00, 0xAA, 0xBB, + 0xDD, 0x01, 0x03, 0x05, 0x07}); } TEST_P(InstNeon, umaxp) { // umaxp vd.16b vn.16b vm.16b @@ -2741,8 +2741,8 @@ TEST_P(InstNeon, umaxp) { )"); CHECK_NEON(2, uint8_t, - {0x01, 0x00, 0xFF, 0xAA, 0xBB, 0xCC, 0xDD, 0xEE, 0xFF, 0xEE, 0xDD, - 0xCC, 0xBB, 0xAA, 0x99, 0x88}); + {0x00, 0xEE, 0x33, 0x55, 0xFF, 0xDD, 0xBB, 0x99, 0x01, 0xFF, 0xCC, + 0xEE, 0x02, 0x04, 0x06, 0x08}); } TEST_P(InstNeon, smax) {