Description
Issue Description:
The amoadd.d.aqrl instruction appears to be malfunctioning, specifically when used in the code segment provided below. This issue manifests in the context of a barrier utilizing atomic operations. When this code is executed, it fails to function correctly, likely due to a problem with the amoadd.d.aqrl operation.
To Reproduce:
Environment:
sst --version
SST-Core Version (-dev)
sst-config --CXX
g++
sst-config --ELEMENT_CXXFLAGS
-std=c++17 -fPIC -DHAVE_CONFIG_H -I/home/aperdeus/git/sst-core/build/../sst-core-install/include
REV: devel (4a724e2)
REV compiled with: g++
compiler: clang (intel)
To reproduce this issue, please follow the steps below:
- Compile the code snippet provided.
- Launch REV with 3 Harts
- Set the entry point for the following functions: main, fix_stack_1, fix_stack_2.
- The test should exit without assertion, current behavior is hang in infinite loop
Additional Information:
It's worth noting that the provided code has been successfully tested on X86 and RISCV qemu, where it passes without encountering any issues. However, when running it on REV, the problem becomes evident.
#include<stdint.h>
#include<unistd.h>
#define CORECOUNT 3
#define REV_PID_START 1024
#define WAIT_TIME 10000
#define assert(x) if (!(x)) { asm(".byte 0x00"); asm(".byte 0x00"); asm(".byte 0x00"); asm(".byte 0x00"); }
volatile uint64_t counter;
volatile uint64_t flag;
volatile uint64_t localSense[CORECOUNT];
void barrierWait(uint64_t numOfProcess) {
int tid = REV_PID_START + CORECOUNT - 1 - getpid();
localSense[tid] = (localSense[tid] == 0) ? 1 : 0;
if (__atomic_add_fetch(&(counter), 1, __ATOMIC_SEQ_CST) == numOfProcess) {
__atomic_store_n(&(counter), 0, __ATOMIC_SEQ_CST);
__atomic_store(&(flag), &(localSense[tid]), __ATOMIC_SEQ_CST);
} else {
while (__atomic_load_n(&(flag), __ATOMIC_SEQ_CST) != localSense[tid]) {
};
}
}
volatile int c;
void mySleep(int t) {
for (int i = 0; i < t; i++) {
c++;
}
}
void test(int rank);
#define STACK_SIZE 1024 * 1024
int stack[CORECOUNT][STACK_SIZE];
void fix_stack_1() {
asm("mv sp, %0" ::"r"(&(stack[0][STACK_SIZE - 1])));
test(1);
return;
}
void fix_stack_2() {
asm("mv sp, %0" ::"r"(&(stack[1][STACK_SIZE - 1])));
test(2);
return;
}
int result1 = 0;
int result2 = 0;
void test(int rank) {
{
if (rank == 1) {
mySleep(WAIT_TIME);
result2 = 3;
}
barrierWait(CORECOUNT);
if (rank == 2) {
result1 = result2;
}
}
mySleep(WAIT_TIME * 10);
assert(result1 == 3);
}
int main() {
test(0);
return 0;
}