Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

WIP: Support Short (8/16 bit) atomic RMW operations on RISCV #297

Draft
wants to merge 1 commit into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions src/jit/Backend.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1057,6 +1057,10 @@ void JITCompiler::compileFunction(JITFunction* jitFunc, bool isExternal)
ASSERT(m_context.trapBlocksStart == 0);
m_context.trapBlocksStart = 1;
}

if (sljit_emit_atomic_load(m_compiler, SLJIT_MOV_U16 | SLJIT_ATOMIC_TEST, SLJIT_R0, SLJIT_R1) != SLJIT_ERR_UNSUPPORTED) {
m_options |= JITCompiler::kHasShortAtomic;
}
}

#ifdef WALRUS_JITPERF
Expand Down
40 changes: 26 additions & 14 deletions src/jit/ByteCodeParser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1907,24 +1907,27 @@ static void compileFunction(JITCompiler* compiler)
}
break;
}
case ByteCode::I32AtomicRmwAddOpcode:
case ByteCode::I32AtomicRmw8AddUOpcode:
case ByteCode::I32AtomicRmw16AddUOpcode:
case ByteCode::I32AtomicRmwSubOpcode:
case ByteCode::I32AtomicRmw8SubUOpcode:
case ByteCode::I32AtomicRmw16SubUOpcode:
case ByteCode::I32AtomicRmwAndOpcode:
case ByteCode::I32AtomicRmw8AndUOpcode:
case ByteCode::I32AtomicRmw16AndUOpcode:
case ByteCode::I32AtomicRmwOrOpcode:
case ByteCode::I32AtomicRmw8OrUOpcode:
case ByteCode::I32AtomicRmw16OrUOpcode:
case ByteCode::I32AtomicRmwXorOpcode:
case ByteCode::I32AtomicRmw8XorUOpcode:
case ByteCode::I32AtomicRmw16XorUOpcode:
case ByteCode::I32AtomicRmwXchgOpcode:
case ByteCode::I32AtomicRmw8XchgUOpcode:
case ByteCode::I32AtomicRmw16XchgUOpcode: {
compiler->increaseStackTmpSize(16);
FALLTHROUGH;
}
case ByteCode::I32AtomicRmwAddOpcode:
case ByteCode::I32AtomicRmwSubOpcode:
case ByteCode::I32AtomicRmwAndOpcode:
case ByteCode::I32AtomicRmwOrOpcode:
case ByteCode::I32AtomicRmwXorOpcode:
case ByteCode::I32AtomicRmwXchgOpcode: {
info = Instruction::kIs32Bit;
requiredInit = OTAtomicRmwI32;
FALLTHROUGH;
Expand All @@ -1945,21 +1948,24 @@ static void compileFunction(JITCompiler* compiler)
}
case ByteCode::I64AtomicRmw8AddUOpcode:
case ByteCode::I64AtomicRmw16AddUOpcode:
case ByteCode::I64AtomicRmw32AddUOpcode:
case ByteCode::I64AtomicRmw8SubUOpcode:
case ByteCode::I64AtomicRmw16SubUOpcode:
case ByteCode::I64AtomicRmw32SubUOpcode:
case ByteCode::I64AtomicRmw8AndUOpcode:
case ByteCode::I64AtomicRmw16AndUOpcode:
case ByteCode::I64AtomicRmw32AndUOpcode:
case ByteCode::I64AtomicRmw8OrUOpcode:
case ByteCode::I64AtomicRmw16OrUOpcode:
case ByteCode::I64AtomicRmw32OrUOpcode:
case ByteCode::I64AtomicRmw8XorUOpcode:
case ByteCode::I64AtomicRmw16XorUOpcode:
case ByteCode::I64AtomicRmw32XorUOpcode:
case ByteCode::I64AtomicRmw8XchgUOpcode:
case ByteCode::I64AtomicRmw16XchgUOpcode:
case ByteCode::I64AtomicRmw16XchgUOpcode: {
compiler->increaseStackTmpSize(16);
FALLTHROUGH;
}
case ByteCode::I64AtomicRmw32AddUOpcode:
case ByteCode::I64AtomicRmw32SubUOpcode:
case ByteCode::I64AtomicRmw32AndUOpcode:
case ByteCode::I64AtomicRmw32OrUOpcode:
case ByteCode::I64AtomicRmw32XorUOpcode:
case ByteCode::I64AtomicRmw32XchgUOpcode: {
Instruction* instr = compiler->append(byteCode, Instruction::Atomic, opcode, 2, 1);
instr->addInfo(info);
Expand All @@ -1973,9 +1979,12 @@ static void compileFunction(JITCompiler* compiler)
operands[2] = STACK_OFFSET(atomicRmw->dstOffset());
break;
}
case ByteCode::I32AtomicRmwCmpxchgOpcode:
case ByteCode::I32AtomicRmw8CmpxchgUOpcode:
case ByteCode::I32AtomicRmw16CmpxchgUOpcode: {
compiler->increaseStackTmpSize(16);
FALLTHROUGH;
}
case ByteCode::I32AtomicRmwCmpxchgOpcode: {
info = Instruction::kIs32Bit;
requiredInit = OTAtomicRmwCmpxchgI32;
FALLTHROUGH;
Expand All @@ -1990,7 +1999,10 @@ static void compileFunction(JITCompiler* compiler)
FALLTHROUGH;
}
case ByteCode::I64AtomicRmw8CmpxchgUOpcode:
case ByteCode::I64AtomicRmw16CmpxchgUOpcode:
case ByteCode::I64AtomicRmw16CmpxchgUOpcode: {
compiler->increaseStackTmpSize(16);
FALLTHROUGH;
}
case ByteCode::I64AtomicRmw32CmpxchgUOpcode: {
Instruction* instr = compiler->append(byteCode, Instruction::Atomic, opcode, 3, 1);
instr->addInfo(info);
Expand Down
1 change: 1 addition & 0 deletions src/jit/Compiler.h
Original file line number Diff line number Diff line change
Expand Up @@ -718,6 +718,7 @@ class JITCompiler {
#endif

static const uint32_t kHasCondMov = 1 << 0;
static const uint32_t kHasShortAtomic = 1 << 1;

JITCompiler(Module* module, uint32_t JITFlags);

Expand Down
194 changes: 180 additions & 14 deletions src/jit/MemoryInl.h
Original file line number Diff line number Diff line change
Expand Up @@ -1153,11 +1153,14 @@ static void emitAtomicRmwCmpxchg64(sljit_compiler* compiler, Instruction* instr)

static void emitAtomic(sljit_compiler* compiler, Instruction* instr)
{
bool noShortAtomic = !(CompileContext::get(compiler)->compiler->options() & JITCompiler::kHasShortAtomic);
sljit_s32 operationSize = SLJIT_MOV;
sljit_s32 size = 0;
sljit_s32 offset = 0;
sljit_s32 operation;
uint32_t options = MemAddress::CheckNaturalAlignment | MemAddress::AbsoluteAddress;
sljit_sw stackTmpStart = CompileContext::get(compiler)->stackTmpStart;


switch (instr->opcode()) {
case ByteCode::I64AtomicRmwCmpxchgOpcode: {
Expand Down Expand Up @@ -1390,21 +1393,79 @@ static void emitAtomic(sljit_compiler* compiler, Instruction* instr)
sljit_s32 baseReg = SLJIT_EXTRACT_REG(addr.memArg.arg);
sljit_s32 tmpReg = srcReg;

sljit_emit_atomic_load(compiler, operationSize, SLJIT_TMP_DEST_REG, baseReg);
JITArg memValue(operands + 0);
sljit_s32 memValueReg = SLJIT_EXTRACT_REG(memValue.arg);
sljit_s32 maskReg = SLJIT_TMP_R2;
sljit_s32 tempReg = noShortAtomic ? SLJIT_TMP_R0 : SLJIT_TMP_DEST_REG;

if (SLJIT_IS_IMM(memValueReg)) {
return;
}

if (noShortAtomic && size <= 2) {
#if (defined SLJIT_32BIT_ARCHITECTURE && SLJIT_32BIT_ARCHITECTURE)
maskReg = SLJIT_TMP_R1;
#endif /* SLJIT_32BIT_ARCHITECTURE */
operationSize = SLJIT_MOV32;

sljit_emit_op2(compiler, SLJIT_AND, maskReg, 0, baseReg, 0, SLJIT_IMM, 0x3);
sljit_emit_op2(compiler, SLJIT_SHL, maskReg, 0, maskReg, 0, SLJIT_IMM, 3); // multiply by 8
sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), stackTmpStart + 8 + WORD_LOW_OFFSET, maskReg, 0);

sljit_emit_op2(compiler, SLJIT_AND, baseReg, 0, baseReg, 0, SLJIT_IMM, ~0x3);

sljit_emit_op2(compiler, SLJIT_AND, srcReg, 0, srcReg, 0, SLJIT_IMM, (0xffffffff) >> ((4 - size) * 8));
sljit_emit_op2(compiler, SLJIT_SHL, srcReg, 0, srcReg, 0, maskReg, 0);

sljit_emit_op2(compiler, SLJIT_SHL, maskReg, 0, SLJIT_IMM, (0xffffffff) >> ((4 - size) * 8), maskReg, 0);
sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), stackTmpStart + WORD_LOW_OFFSET, maskReg, 0);
}
sljit_emit_atomic_load(compiler, operationSize, tempReg, baseReg);

if (noShortAtomic && size <= 2) {
sljit_emit_op1(compiler, SLJIT_MOV, memValueReg, 0, tempReg, 0);
}

if (operation != OP_XCHG) {
tmpReg = instr->requiredReg(1);
sljit_emit_op2(compiler, operation, tmpReg, 0, SLJIT_TMP_DEST_REG, 0, srcReg, 0);
}

sljit_emit_atomic_store(compiler, operationSize | SLJIT_SET_ATOMIC_STORED, tmpReg, baseReg, SLJIT_TMP_DEST_REG);
if (noShortAtomic && size <= 2) {
sljit_emit_op2(compiler, SLJIT_XOR, maskReg, 0, maskReg, 0, SLJIT_IMM, -1);
sljit_emit_op2(compiler, SLJIT_AND, memValueReg, 0, memValueReg, 0, maskReg, 0);
}

if (operation != OP_XCHG) {
sljit_emit_op2(compiler, operation, tmpReg, 0, tempReg, 0, srcReg, 0);
}

sljit_s32 returnReg = tempReg;
if (noShortAtomic && size <= 2) {
sljit_emit_op2(compiler, SLJIT_OR, tmpReg, 0, tmpReg, 0, memValueReg, 0);
}

#if (defined SLJIT_CONFIG_ARM_32 && SLJIT_CONFIG_ARM_32)
returnReg = memValueReg;
sljit_emit_op1(compiler, SLJIT_MOV, memValueReg, 0, tempReg, 0);
#endif /* SLJIT_CONFIG_ARM_32 */

sljit_emit_atomic_store(compiler, operationSize | SLJIT_SET_ATOMIC_STORED, tmpReg, baseReg, tempReg);
sljit_set_label(sljit_emit_jump(compiler, SLJIT_ATOMIC_NOT_STORED), restartOnFailure);

if (noShortAtomic && size <= 2) {
sljit_emit_op1(compiler, SLJIT_MOV, maskReg, 0, SLJIT_MEM1(SLJIT_SP), stackTmpStart + WORD_LOW_OFFSET);
sljit_emit_op2(compiler, SLJIT_AND, returnReg, 0, returnReg, 0, maskReg, 0);

sljit_emit_op1(compiler, SLJIT_MOV, maskReg, 0, SLJIT_MEM1(SLJIT_SP), stackTmpStart + 8 + WORD_LOW_OFFSET);
sljit_emit_op2(compiler, SLJIT_LSHR, returnReg, 0, returnReg, 0, maskReg, 0);
}

sljit_emit_op1(compiler, SLJIT_MOV, dst.arg, dst.argw, returnReg, 0);
#if (defined SLJIT_32BIT_ARCHITECTURE && SLJIT_32BIT_ARCHITECTURE)
if (dstPair.arg2 != 0) {
sljit_emit_op1(compiler, SLJIT_MOV, dstPair.arg2, dstPair.arg2w, SLJIT_IMM, 0);
}
#endif /* SLJIT_32BIT_ARCHITECTURE */
sljit_emit_op1(compiler, SLJIT_MOV, dst.arg, dst.argw, SLJIT_TMP_DEST_REG, 0);
return;
}

Expand All @@ -1417,6 +1478,7 @@ static void emitAtomic(sljit_compiler* compiler, Instruction* instr)
JITArgPair dstPair, srcExpectedPair;
sljit_s32 tmpReg;
sljit_s32 srcExpectedReg;
sljit_s32 srcValueReg;

dstPair.arg2 = 0;

Expand All @@ -1428,6 +1490,7 @@ static void emitAtomic(sljit_compiler* compiler, Instruction* instr)
dst = JITArg(operands + 3);
tmpReg = GET_SOURCE_REG(tmp.arg, instr->requiredReg(1));
srcExpectedReg = GET_SOURCE_REG(srcExpected.arg, instr->requiredReg(2));
srcValueReg = GET_TARGET_REG(srcValue.arg, instr->requiredReg(0));
} else {
JITArgPair tmpPair(operands + 0);
JITArgPair srcValuePair(operands + 2);
Expand All @@ -1439,6 +1502,7 @@ static void emitAtomic(sljit_compiler* compiler, Instruction* instr)

srcValue.arg = srcValuePair.arg1;
srcValue.argw = srcValuePair.arg1w;
srcValueReg = GET_TARGET_REG(srcValuePair.arg1, instr->requiredReg(0));
dst.arg = dstPair.arg1;
dst.argw = dstPair.arg1w;
sljit_emit_op1(compiler, SLJIT_MOV, dstPair.arg2, dstPair.arg2w, SLJIT_IMM, 0);
Expand All @@ -1450,28 +1514,90 @@ static void emitAtomic(sljit_compiler* compiler, Instruction* instr)
struct sljit_label* restartOnFailure = sljit_emit_label(compiler);
sljit_s32 baseReg = SLJIT_EXTRACT_REG(addr.memArg.arg);

sljit_s32 memValueReg = tmpReg;
sljit_s32 maskReg = SLJIT_TMP_R1;
sljit_s32 tempReg = noShortAtomic ? SLJIT_TMP_R0 : SLJIT_TMP_DEST_REG;

if (SLJIT_IS_IMM(memValueReg)) {
return;
}

if (noShortAtomic && size <= 2) {
if (!(operationSize & SLJIT_32) && operationSize != SLJIT_MOV32) {
operationSize = SLJIT_MOV;
} else {
operationSize = SLJIT_MOV32;
}
}

if (!(operationSize & SLJIT_32) && operationSize != SLJIT_MOV32) {
compareTopFalse = sljit_emit_cmp(compiler, SLJIT_NOT_EQUAL, SLJIT_IMM, 0, srcExpectedPair.arg2, srcExpectedPair.arg2w);
}
if (noShortAtomic && size <= 2) {
sljit_emit_op2(compiler, SLJIT_AND, maskReg, 0, baseReg, 0, SLJIT_IMM, 0x3);
sljit_emit_op2(compiler, SLJIT_SHL, maskReg, 0, maskReg, 0, SLJIT_IMM, 3); // multiply by 8
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

32 bit atomic should be present, so mutiply by 4 should be enough

sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), stackTmpStart + 8 + WORD_LOW_OFFSET, maskReg, 0);

sljit_emit_op2(compiler, SLJIT_AND, baseReg, 0, baseReg, 0, SLJIT_IMM, ~0x3);

sljit_emit_op2(compiler, SLJIT_AND, srcValueReg, 0, srcValueReg, 0, SLJIT_IMM, (0xffffffff) >> ((4 - size) * 8));
sljit_emit_op2(compiler, SLJIT_SHL, srcValueReg, 0, srcValueReg, 0, maskReg, 0);

sljit_emit_op2(compiler, SLJIT_SHL, maskReg, 0, SLJIT_IMM, (0xffffffff) >> ((4 - size) * 8), maskReg, 0);
sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), stackTmpStart + WORD_LOW_OFFSET, maskReg, 0);
sljit_emit_op1(compiler, SLJIT_MOV, maskReg, 0, SLJIT_MEM1(SLJIT_SP), stackTmpStart + 8 + WORD_LOW_OFFSET);
}

sljit_emit_op1(compiler, SLJIT_MOV, tmpReg, 0, srcValue.arg, srcValue.argw);

sljit_emit_atomic_load(compiler, operationSize, SLJIT_TMP_DEST_REG, baseReg);
compareFalse = sljit_emit_cmp(compiler, SLJIT_NOT_EQUAL, SLJIT_TMP_DEST_REG, 0, srcExpectedReg, 0);
sljit_emit_atomic_store(compiler, operationSize | SLJIT_SET_ATOMIC_STORED, tmpReg, baseReg, SLJIT_TMP_DEST_REG);
sljit_emit_atomic_load(compiler, operationSize, tempReg, baseReg);

if (noShortAtomic && size <= 2) {
sljit_emit_op2(compiler, SLJIT_AND, tmpReg, 0, tempReg, 0, maskReg, 0);
}

compareFalse = sljit_emit_cmp(compiler, SLJIT_NOT_EQUAL, (noShortAtomic && size <= 2 ? tmpReg : tempReg), 0, srcExpectedReg, 0);

if (noShortAtomic && size <= 2) {
sljit_emit_op2(compiler, SLJIT_AND, tmpReg, 0, tmpReg, 0, maskReg, 0);

sljit_emit_op1(compiler, SLJIT_MOV, srcExpectedReg, 0, tempReg, 0);
sljit_emit_op2(compiler, SLJIT_XOR, maskReg, 0, maskReg, 0, SLJIT_IMM, -1);
sljit_emit_op2(compiler, SLJIT_AND, srcExpectedReg, 0, srcExpectedReg, 0, maskReg, 0);
sljit_emit_op2(compiler, SLJIT_OR, tmpReg, 0, srcValueReg, 0, srcExpectedReg, 0);
}

if (noShortAtomic) {
sljit_emit_op1(compiler, operationSize, maskReg, 0, tempReg, 0);
}
sljit_emit_atomic_store(compiler, operationSize | SLJIT_SET_ATOMIC_STORED, tmpReg, baseReg, tempReg);
if (noShortAtomic) {
sljit_emit_op1(compiler, operationSize, tempReg, 0, maskReg, 0);
}
sljit_set_label(sljit_emit_jump(compiler, SLJIT_ATOMIC_NOT_STORED), restartOnFailure);
storeSuccess = sljit_emit_jump(compiler, SLJIT_ATOMIC_STORED);

if (!(operationSize & SLJIT_32) && operationSize != SLJIT_MOV32) {
sljit_set_label(compareTopFalse, sljit_emit_label(compiler));
sljit_emit_op1(compiler, operationSize, SLJIT_TMP_DEST_REG, 0, addr.memArg.arg, addr.memArg.argw);
sljit_emit_op1(compiler, operationSize, tempReg, 0, addr.memArg.arg, addr.memArg.argw);
}
sljit_set_label(compareFalse, sljit_emit_label(compiler));
sljit_set_label(storeSuccess, sljit_emit_label(compiler));

sljit_set_label(compareFalse, sljit_emit_label(compiler));

if (noShortAtomic && size <= 2) {
sljit_emit_op1(compiler, SLJIT_MOV, maskReg, 0, SLJIT_MEM1(SLJIT_SP), stackTmpStart + WORD_LOW_OFFSET);
sljit_emit_op2(compiler, SLJIT_AND, tmpReg, 0, tempReg, 0, maskReg, 0);

sljit_emit_op1(compiler, SLJIT_MOV, maskReg, 0, SLJIT_MEM1(SLJIT_SP), stackTmpStart + 8 + WORD_LOW_OFFSET);
sljit_emit_op2(compiler, SLJIT_LSHR, tmpReg, 0, tmpReg, 0, maskReg, 0);
tempReg = tmpReg;
}

if (dstPair.arg2 != 0) {
sljit_emit_op1(compiler, SLJIT_MOV, dstPair.arg2, dstPair.arg2w, SLJIT_IMM, 0);
}
sljit_emit_op1(compiler, SLJIT_MOV, dst.arg, dst.argw, SLJIT_TMP_DEST_REG, 0);
sljit_emit_op1(compiler, SLJIT_MOV, dst.arg, dst.argw, tempReg, 0);
#else /* !SLJIT_32BIT_ARCHITECTURE */
sljit_s32 tmpReg;
sljit_s32 srcExpectedReg;
Expand All @@ -1481,19 +1607,59 @@ static void emitAtomic(sljit_compiler* compiler, Instruction* instr)
JITArg dst(operands + 3);
tmpReg = GET_SOURCE_REG(tmp.arg, instr->requiredReg(1));
srcExpectedReg = GET_SOURCE_REG(srcExpected.arg, instr->requiredReg(2));
sljit_s32 tempReg = SLJIT_TMP_DEST_REG;
sljit_s32 tempReg2 = SLJIT_TMP_R1;
sljit_s32 maskReg = SLJIT_TMP_R2;

struct sljit_jump* compareFalse;
struct sljit_label* restartOnFailure = sljit_emit_label(compiler);
sljit_s32 baseReg = SLJIT_EXTRACT_REG(addr.memArg.arg);

sljit_emit_op1(compiler, SLJIT_MOV, tmpReg, 0, srcValue.arg, srcValue.argw);
sljit_emit_atomic_load(compiler, operationSize, SLJIT_TMP_DEST_REG, baseReg);
compareFalse = sljit_emit_cmp(compiler, SLJIT_NOT_EQUAL, SLJIT_TMP_DEST_REG, 0, srcExpectedReg, 0);
sljit_emit_atomic_store(compiler, operationSize | SLJIT_SET_ATOMIC_STORED, tmpReg, baseReg, SLJIT_TMP_DEST_REG);

if (noShortAtomic && size <= 2) {
operationSize = SLJIT_MOV_P;

sljit_emit_op2(compiler, SLJIT_AND, maskReg, 0, baseReg, 0, SLJIT_IMM, 0x7);
sljit_emit_op2(compiler, SLJIT_SHL, maskReg, 0, maskReg, 0, SLJIT_IMM, 3); // multiply by 8
sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), stackTmpStart + 8 + WORD_LOW_OFFSET, maskReg, 0);

sljit_emit_op2(compiler, SLJIT_AND, baseReg, 0, baseReg, 0, SLJIT_IMM, ~0x7);

sljit_emit_op2(compiler, SLJIT_AND, tmpReg, 0, tmpReg, 0, SLJIT_IMM, (0xffffffff) >> ((4 - size) * 8));
sljit_emit_op2(compiler, SLJIT_SHL, tmpReg, 0, tmpReg, 0, maskReg, 0);

sljit_emit_op2(compiler, SLJIT_SHL, maskReg, 0, SLJIT_IMM, (0xffffffff) >> ((4 - size) * 8), maskReg, 0);
sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), stackTmpStart + WORD_LOW_OFFSET, maskReg, 0);
sljit_emit_op1(compiler, SLJIT_MOV, maskReg, 0, SLJIT_MEM1(SLJIT_SP), stackTmpStart + 8 + WORD_LOW_OFFSET);
}

sljit_emit_atomic_load(compiler, operationSize, tempReg, baseReg);
if (noShortAtomic && size <= 2) {
sljit_emit_op2(compiler, SLJIT_LSHR, tempReg2, 0, tempReg, 0, maskReg, 0);
sljit_emit_op2(compiler, SLJIT_AND, tempReg2, 0, tempReg2, 0, SLJIT_IMM, (0xffffffff) >> ((4 - size) * 8));
}
compareFalse = sljit_emit_cmp(compiler, SLJIT_NOT_EQUAL, (noShortAtomic && size <= 2 ? tempReg2 : tempReg), 0, srcExpectedReg, 0);

if (noShortAtomic && size <= 2) {
sljit_emit_op1(compiler, SLJIT_MOV, srcExpectedReg, 0, SLJIT_IMM, (0xffffffff) >> ((4 - size) * 8));
sljit_emit_op2(compiler, SLJIT_SHL, maskReg, 0, srcExpectedReg, 0, maskReg, 0);
sljit_emit_op2(compiler, SLJIT_XOR, maskReg, 0, maskReg, 0, SLJIT_IMM, -1);
sljit_emit_op2(compiler, SLJIT_AND, tempReg2, 0, tempReg, 0, maskReg, 0);
sljit_emit_op2(compiler, SLJIT_OR, tmpReg, 0, tmpReg, 0, tempReg2, 0);
sljit_emit_op1(compiler, SLJIT_MOV, tempReg2, 0, tempReg, 0);
}

sljit_emit_atomic_store(compiler, operationSize | SLJIT_SET_ATOMIC_STORED, tmpReg, baseReg, tempReg);
sljit_set_label(sljit_emit_jump(compiler, SLJIT_ATOMIC_NOT_STORED), restartOnFailure);

sljit_set_label(compareFalse, sljit_emit_label(compiler));
sljit_emit_op1(compiler, SLJIT_MOV, dst.arg, dst.argw, SLJIT_TMP_DEST_REG, 0);

if (noShortAtomic && size <= 2) {
sljit_emit_op2(compiler, SLJIT_AND, tempReg, 0, tempReg, 0, SLJIT_IMM, (0xffffffff) >> ((4 - size) * 8));
}

sljit_emit_op1(compiler, SLJIT_MOV, dst.arg, dst.argw, tempReg, 0);
#endif /* SLJIT_32BIT_ARCHITECTURE */
}

Expand Down
Loading
Loading