Skip to content

Commit e7ab2f6

Browse files
authored
[RISC-V] Initial patch to fix RISCV64 interpreter (dotnet#94548)
* [RISC-V] Initial patch to fix RISCV64 interpreter * Code review feedback
1 parent 5127e07 commit e7ab2f6

File tree

5 files changed

+208
-11
lines changed

5 files changed

+208
-11
lines changed

src/coreclr/vm/interpreter.cpp

Lines changed: 53 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -911,6 +911,9 @@ CorJitResult Interpreter::GenerateInterpreterStub(CEEInfo* comp,
911911
// x8 through x15 are scratch registers on ARM64.
912912
IntReg x8 = IntReg(8);
913913
IntReg x9 = IntReg(9);
914+
915+
#elif defined(HOST_RISCV64)
916+
#else
914917
#error unsupported platform
915918
#endif
916919
}
@@ -1073,15 +1076,15 @@ CorJitResult Interpreter::GenerateInterpreterStub(CEEInfo* comp,
10731076
argState.AddArg(vaSigCookieIndex);
10741077
}
10751078

1076-
#if defined(HOST_ARM) || defined(HOST_AMD64) || defined(HOST_ARM64)
1079+
#if defined(HOST_ARM) || defined(HOST_AMD64) || defined(HOST_ARM64) || defined(HOST_RISCV64)
10771080
// Generics context comes before args on ARM. Would be better if I factored this out as a call,
10781081
// to avoid large swatches of duplicate code.
10791082
if (hasGenericsContextArg)
10801083
{
10811084
argPerm[genericsContextArgIndex] = physArgIndex; physArgIndex++;
10821085
argState.AddArg(genericsContextArgIndex);
10831086
}
1084-
#endif // HOST_ARM || HOST_AMD64 || HOST_ARM64
1087+
#endif // HOST_ARM || HOST_AMD64 || HOST_ARM64 || HOST_RISCV64
10851088

10861089
CORINFO_ARG_LIST_HANDLE argPtr = info->args.args;
10871090
// Some arguments are have been passed in registers, some in memory. We must generate code that
@@ -1432,7 +1435,7 @@ CorJitResult Interpreter::GenerateInterpreterStub(CEEInfo* comp,
14321435
sl.X86EmitPopReg(kEBP);
14331436
sl.X86EmitReturn(static_cast<WORD>(argState.callerArgStackSlots * sizeof(void*)));
14341437
#elif defined(UNIX_AMD64_ABI)
1435-
bool hasTowRetSlots = info->args.retType == CORINFO_TYPE_VALUECLASS &&
1438+
bool hasTwoRetSlots = info->args.retType == CORINFO_TYPE_VALUECLASS &&
14361439
getClassSize(info->args.retTypeClass) == 16;
14371440

14381441
int fixedTwoSlotSize = 16;
@@ -1484,7 +1487,7 @@ CorJitResult Interpreter::GenerateInterpreterStub(CEEInfo* comp,
14841487
sl.X86EmitRegLoad(ARGUMENT_kREG1, reinterpret_cast<UINT_PTR>(interpMethInfo));
14851488

14861489
sl.X86EmitCall(sl.NewExternalCodeLabel(interpretMethodFunc), 0);
1487-
if (hasTowRetSlots) {
1490+
if (hasTwoRetSlots) {
14881491
sl.X86EmitEspOffset(0x8b, kRAX, 0);
14891492
sl.X86EmitEspOffset(0x8b, kRDX, 8);
14901493
}
@@ -1635,7 +1638,40 @@ CorJitResult Interpreter::GenerateInterpreterStub(CEEInfo* comp,
16351638
#elif defined(HOST_LOONGARCH64)
16361639
assert(!"unimplemented on LOONGARCH yet");
16371640
#elif defined(HOST_RISCV64)
1638-
assert(!"unimplemented on RISCV64 yet");
1641+
bool hasTwoRetSlots = info->args.retType == CORINFO_TYPE_VALUECLASS &&
1642+
getClassSize(info->args.retTypeClass) == 16;
1643+
1644+
UINT stackFrameSize = argState.numFPRegArgSlots;
1645+
1646+
sl.EmitProlog(argState.numRegArgs, argState.numFPRegArgSlots, hasTwoRetSlots ? 2 * sizeof(void*) : 0);
1647+
1648+
#if INTERP_ILSTUBS
1649+
if (pMD->IsILStub())
1650+
{
1651+
// Third argument is stubcontext, in t2 (METHODDESC_REGISTER).
1652+
sl.EmitMovReg(IntReg(12), IntReg(7));
1653+
}
1654+
else
1655+
#endif
1656+
{
1657+
// For a non-ILStub method, push NULL as the third StubContext argument.
1658+
sl.EmitMovConstant(IntReg(12), 0);
1659+
}
1660+
// Second arg is pointer to the base of the ILargs arr -- i.e., the current stack value.
1661+
sl.EmitAddImm(IntReg(11), RegSp, sl.GetSavedRegArgsOffset());
1662+
1663+
// First arg is the pointer to the interpMethodInfo structure
1664+
sl.EmitMovConstant(IntReg(10), reinterpret_cast<UINT64>(interpMethInfo));
1665+
1666+
sl.EmitCallLabel(sl.NewExternalCodeLabel((LPVOID)interpretMethodFunc), FALSE, FALSE);
1667+
if (hasTwoRetSlots)
1668+
{
1669+
// TODO: handle return registers to use int or float registers
1670+
sl.EmitLoad(IntReg(10), RegSp, 0);
1671+
sl.EmitLoad(IntReg(11), RegSp, sizeof(void*));
1672+
}
1673+
1674+
sl.EmitEpilog();
16391675
#else
16401676
#error unsupported platform
16411677
#endif
@@ -2430,6 +2466,14 @@ void Interpreter::ExecuteMethod(ARG_SLOT* retVal, _Out_ bool* pDoJmpCall, _Out_
24302466
//The Fixed Two slot return buffer address
24312467
memcpy(m_ilArgs-16, OpStackGet<void*>(0), sz);
24322468
}
2469+
#elif defined(TARGET_RISCV64)
2470+
// Is it an struct contained in two slots
2471+
else if (m_methInfo->m_returnType == CORINFO_TYPE_VALUECLASS
2472+
&& sz == 16)
2473+
{
2474+
//The Fixed Two slot return buffer address
2475+
memcpy(m_ilArgs-32, OpStackGet<void*>(0), sz);
2476+
}
24332477
#endif
24342478
else if (CorInfoTypeIsFloatingPoint(m_methInfo->m_returnType) &&
24352479
CorInfoTypeIsFloatingPoint(retValIt.ToCorInfoType()))
@@ -9448,7 +9492,7 @@ void Interpreter::DoCallWork(bool virtualCall, void* thisArg, CORINFO_RESOLVED_T
94489492
HFAReturnArgSlots = (HFAReturnArgSlots + sizeof(ARG_SLOT) - 1) / sizeof(ARG_SLOT);
94499493
}
94509494
}
9451-
#elif defined(UNIX_AMD64_ABI)
9495+
#elif defined(UNIX_AMD64_ABI) || defined(TARGET_RISCV64)
94529496
unsigned HasTwoSlotBuf = sigInfo.retType == CORINFO_TYPE_VALUECLASS &&
94539497
getClassSize(sigInfo.retTypeClass) == 16;
94549498
#endif
@@ -9689,7 +9733,7 @@ void Interpreter::DoCallWork(bool virtualCall, void* thisArg, CORINFO_RESOLVED_T
96899733
// This is the argument slot that will be used to hold the return value.
96909734
// In UNIX_AMD64_ABI, return type may have need tow ARG_SLOTs.
96919735
ARG_SLOT retVals[2] = {0, 0};
9692-
#if !defined(HOST_ARM) && !defined(UNIX_AMD64_ABI)
9736+
#if !defined(HOST_ARM) && !defined(UNIX_AMD64_ABI) && !defined(TARGET_RISCV64)
96939737
_ASSERTE (NUMBER_RETURNVALUE_SLOTS == 1);
96949738
#endif
96959739

@@ -9968,7 +10012,7 @@ void Interpreter::DoCallWork(bool virtualCall, void* thisArg, CORINFO_RESOLVED_T
996810012
bool b = CycleTimer::GetThreadCyclesS(&startCycles); _ASSERTE(b);
996910013
#endif // INTERP_ILCYCLE_PROFILE
997010014

9971-
#if defined(UNIX_AMD64_ABI)
10015+
#if defined(UNIX_AMD64_ABI) || defined(TARGET_RISCV64)
997210016
mdcs.CallTargetWorker(args, retVals, HasTwoSlotBuf ? 16: 8);
997310017
#else
997410018
mdcs.CallTargetWorker(args, retVals, 8);
@@ -10114,7 +10158,7 @@ void Interpreter::DoCallWork(bool virtualCall, void* thisArg, CORINFO_RESOLVED_T
1011410158
{
1011510159
OpStackSet<INT64>(m_curStackHt, GetSmallStructValue(&smallStructRetVal, retTypeSz));
1011610160
}
10117-
#if defined(UNIX_AMD64_ABI)
10161+
#if defined(UNIX_AMD64_ABI) || defined(TARGET_RISCV64)
1011810162
else if (HasTwoSlotBuf)
1011910163
{
1012010164
void* dst = LargeStructOperandStackPush(16);

src/coreclr/vm/riscv64/cgencpu.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -365,7 +365,6 @@ class StubLinkerCPU : public StubLinker
365365
void EmitComputedInstantiatingMethodStub(MethodDesc* pSharedMD, struct ShuffleEntry *pShuffleEntryArray, void* extraArg);
366366
#endif // FEATURE_SHARE_GENERIC_CODE
367367

368-
private:
369368
void EmitMovConstant(IntReg target, UINT64 constant);
370369
void EmitJumpRegister(IntReg regTarget);
371370
void EmitMovReg(IntReg dest, IntReg source);
@@ -380,6 +379,9 @@ class StubLinkerCPU : public StubLinker
380379
void EmitLoad(FloatReg dest, IntReg srcAddr, int offset = 0);
381380
void EmitStore(IntReg src, IntReg destAddr, int offset = 0);
382381
void EmitStore(FloatReg src, IntReg destAddr, int offset = 0);
382+
383+
void EmitProlog(unsigned short cIntRegArgs, unsigned short cFpRegArgs, unsigned short cbStackSpace = 0);
384+
void EmitEpilog();
383385
};
384386

385387
extern "C" void SinglecastDelegateInvokeStub();

src/coreclr/vm/riscv64/stubs.cpp

Lines changed: 110 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1104,6 +1104,116 @@ void StubLinkerCPU::EmitJumpRegister(IntReg regTarget)
11041104
Emit32(0x00000067 | (regTarget << 15));
11051105
}
11061106

1107+
void StubLinkerCPU::EmitProlog(unsigned short cIntRegArgs, unsigned short cFpRegArgs, unsigned short cbStackSpace)
1108+
{
1109+
_ASSERTE(!m_fProlog);
1110+
1111+
unsigned short numberOfEntriesOnStack = 2 + cIntRegArgs + cFpRegArgs; // 2 for fp, ra
1112+
1113+
// Stack needs to be 16 byte aligned. Compute the required padding before saving it
1114+
unsigned short totalPaddedFrameSize = static_cast<unsigned short>(ALIGN_UP(cbStackSpace + numberOfEntriesOnStack * sizeof(void*), 2 * sizeof(void*)));
1115+
// The padding is going to be applied to the local stack
1116+
cbStackSpace = totalPaddedFrameSize - numberOfEntriesOnStack * sizeof(void*);
1117+
1118+
// Record the parameters of this prolog so that we can generate a matching epilog and unwind info.
1119+
DescribeProlog(cIntRegArgs, cFpRegArgs, cbStackSpace);
1120+
1121+
1122+
// N.B Despite the range of a jump with a sub sp is 4KB, we're limiting to 504 to save from emitting right prolog that's
1123+
// expressable in unwind codes efficiently. The largest offset in typical unwindinfo encodings that we use is 504.
1124+
// so allocations larger than 504 bytes would require setting the SP in multiple strides, which would complicate both
1125+
// prolog and epilog generation as well as unwindinfo generation.
1126+
_ASSERTE((totalPaddedFrameSize <= 504) && "NYI:RISCV64 Implement StubLinker prologs with larger than 504 bytes of frame size");
1127+
if (totalPaddedFrameSize > 504)
1128+
COMPlusThrow(kNotSupportedException);
1129+
1130+
// Here is how the stack would look like (Stack grows up)
1131+
// [Low Address]
1132+
// +------------+
1133+
// SP -> | | <-+
1134+
// : : | Stack Frame, (i.e outgoing arguments) including padding
1135+
// | | <-+
1136+
// +------------+
1137+
// | FP |
1138+
// +------------+
1139+
// | RA |
1140+
// +------------+
1141+
// | F10 | <-+
1142+
// +------------+ |
1143+
// : : | Fp Args
1144+
// +------------+ |
1145+
// | F17 | <-+
1146+
// +------------+
1147+
// | X10 | <-+
1148+
// +------------+ |
1149+
// : : | Int Args
1150+
// +------------+ |
1151+
// | X17 | <-+
1152+
// +------------+
1153+
// Old SP -> |[Stack Args]|
1154+
// [High Address]
1155+
1156+
// Regarding the order of operations in the prolog and epilog;
1157+
// If the prolog and the epilog matches each other we can simplify emitting the unwind codes and save a few
1158+
// bytes of unwind codes by making prolog and epilog share the same unwind codes.
1159+
// In order to do that we need to make the epilog be the reverse of the prolog.
1160+
// But we wouldn't want to add restoring of the argument registers as that's completely unnecessary.
1161+
// Besides, saving argument registers cannot be expressed by the unwind code encodings.
1162+
// So, we'll push saving the argument registers to the very last in the prolog, skip restoring it in epilog,
1163+
// and also skip reporting it to the OS.
1164+
//
1165+
// Another bit that we can save is resetting the frame pointer.
1166+
// This is not necessary when the SP doesn't get modified beyond prolog and epilog. (i.e no alloca/localloc)
1167+
// And in that case we don't need to report setting up the FP either.
1168+
1169+
// 1. Relocate SP
1170+
EmitSubImm(RegSp, RegSp, totalPaddedFrameSize);
1171+
1172+
unsigned cbOffset = 2 * sizeof(void*) + cbStackSpace; // 2 is for fp, ra
1173+
1174+
// 2. Store FP/RA
1175+
EmitStore(RegFp, RegSp, cbStackSpace);
1176+
EmitStore(RegRa, RegSp, cbStackSpace + sizeof(void*));
1177+
1178+
// 3. Set the frame pointer
1179+
EmitMovReg(RegFp, RegSp);
1180+
1181+
// 4. Store floating point argument registers
1182+
_ASSERTE(cFpRegArgs <= 8);
1183+
for (unsigned short i = 0; i < cFpRegArgs; i++)
1184+
EmitStore(FloatReg(i + 10), RegSp, cbOffset + i * sizeof(void*));
1185+
1186+
// 5. Store int argument registers
1187+
cbOffset += cFpRegArgs * sizeof(void*);
1188+
_ASSERTE(cIntRegArgs <= 8);
1189+
for (unsigned short i = 0 ; i < cIntRegArgs; i++)
1190+
EmitStore(IntReg(i + 10), RegSp, cbOffset + i * sizeof(void*));
1191+
}
1192+
1193+
void StubLinkerCPU::EmitEpilog()
1194+
{
1195+
_ASSERTE(m_fProlog);
1196+
1197+
// 5. Restore int argument registers
1198+
// nop: We don't need to. They are scratch registers
1199+
1200+
// 4. Restore floating point argument registers
1201+
// nop: We don't need to. They are scratch registers
1202+
1203+
// 3. Restore the SP from FP
1204+
// N.B. We're assuming that the stublinker stubs doesn't do alloca, hence nop
1205+
1206+
// 2. Restore FP/RA
1207+
EmitLoad(RegFp, RegSp, m_cbStackSpace);
1208+
EmitLoad(RegRa, RegSp, m_cbStackSpace + sizeof(void*));
1209+
1210+
// 1. Restore SP
1211+
EmitAddImm(RegSp, RegSp, GetStackFrameSize());
1212+
1213+
// jalr x0, 0(ra)
1214+
EmitJumpRegister(RegRa);
1215+
}
1216+
11071217
// Instruction types as per RISC-V Spec, Chapter 24 RV32/64G Instruction Set Listings
11081218
static unsigned ITypeInstr(unsigned opcode, unsigned funct3, unsigned rd, unsigned rs1, int imm12)
11091219
{

src/coreclr/vm/stublink.cpp

Lines changed: 30 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -360,6 +360,12 @@ StubLinker::StubLinker()
360360
m_cbStackFrame = 0;
361361
m_fPushArgRegs = FALSE;
362362
#endif
363+
#ifdef TARGET_RISCV64
364+
m_fProlog = FALSE;
365+
m_cIntRegArgs = 0;
366+
m_cFpRegArgs = 0;
367+
m_cbStackSpace = 0;
368+
#endif
363369
#ifdef STUBLINKER_GENERATES_UNWIND_INFO
364370
#ifdef _DEBUG
365371
m_pUnwindInfoCheckLabel = NULL;
@@ -1891,7 +1897,30 @@ UINT StubLinker::GetStackFrameSize()
18911897
return m_cbStackSpace + (2 + m_cCalleeSavedRegs + m_cIntRegArgs + m_cVecRegArgs)*sizeof(void*);
18921898
}
18931899

1894-
#endif // ifdef TARGET_ARM, elif defined(TARGET_ARM64)
1900+
#elif defined(TARGET_RISCV64)
1901+
void StubLinker::DescribeProlog(UINT cIntRegArgs, UINT cFpRegArgs, UINT cbStackSpace)
1902+
{
1903+
m_fProlog = TRUE;
1904+
m_cIntRegArgs = cIntRegArgs;
1905+
m_cFpRegArgs = cFpRegArgs;
1906+
m_cbStackSpace = cbStackSpace;
1907+
}
1908+
1909+
UINT StubLinker::GetSavedRegArgsOffset()
1910+
{
1911+
_ASSERTE(m_fProlog);
1912+
// This is the offset from SP
1913+
// We're assuming that the stublinker will push the arg registers to the bottom of the stack frame
1914+
return m_cbStackSpace + 2 * sizeof(void*); // 2 is for FP and LR
1915+
}
1916+
1917+
UINT StubLinker::GetStackFrameSize()
1918+
{
1919+
_ASSERTE(m_fProlog);
1920+
return m_cbStackSpace + (2 + m_cIntRegArgs + m_cFpRegArgs) * sizeof(void*);
1921+
}
1922+
1923+
#endif // ifdef TARGET_ARM, elif defined(TARGET_ARM64), elif defined(TARGET_RISCV64)
18951924

18961925
#endif // #ifndef DACCESS_COMPILE
18971926

src/coreclr/vm/stublink.h

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -229,6 +229,10 @@ class StubLinker
229229
void DescribeProlog(UINT cIntRegArgs, UINT cVecRegArgs, UINT cCalleeSavedRegs, UINT cbStackFrame);
230230
UINT GetSavedRegArgsOffset();
231231
UINT GetStackFrameSize();
232+
#elif defined(TARGET_RISCV64)
233+
void DescribeProlog(UINT cIntRegArgs, UINT cVecRegArgs, UINT cbStackFrame);
234+
UINT GetSavedRegArgsOffset();
235+
UINT GetStackFrameSize();
232236
#endif
233237

234238
//===========================================================================
@@ -304,6 +308,14 @@ class StubLinker
304308
UINT m_cbStackSpace; // Additional stack space for return buffer and stack alignment
305309
#endif // TARGET_ARM64
306310

311+
#ifdef TARGET_RISCV64
312+
protected:
313+
BOOL m_fProlog; // True if DescribeProlog has been called
314+
UINT m_cIntRegArgs; // Count of int register arguments (x10 - x17)
315+
UINT m_cFpRegArgs; // Count of FP register arguments (f10 - f17)
316+
UINT m_cbStackSpace; // Additional stack space for return buffer and stack alignment
317+
#endif // TARGET_RISCV64
318+
307319
#ifdef STUBLINKER_GENERATES_UNWIND_INFO
308320

309321
#ifdef _DEBUG

0 commit comments

Comments
 (0)