From 1f7a619c79c6708a8db2d0a0599f64305f2b0382 Mon Sep 17 00:00:00 2001 From: Ryan Houdek Date: Thu, 21 Dec 2023 03:25:22 -0800 Subject: [PATCH] OpcodeDispatcher: Fixes syscall rcx/r11 generation Noticed this while writing #3342. Fixes #3343 The syscall instruction is defined in the documentation that it will set RCX to the next instruction's RIP and R11 to be RFLAGS. We entirely skipped this which I noticed while writing unit tests. Adds unittests to test both 32-bit and 64-bit behaviour because our helper shares code with both. I don't know if anything actually relied on this behaviour but we should definitely support it. --- .../Interface/Core/OpcodeDispatcher.cpp | 19 +- .../Source/Interface/Core/OpcodeDispatcher.h | 1 + .../tests/signal/Syscall_state.32.cpp | 148 +++++++++++++++ .../tests/signal/Syscall_state.64.cpp | 175 ++++++++++++++++++ 4 files changed, 341 insertions(+), 2 deletions(-) create mode 100644 unittests/FEXLinuxTests/tests/signal/Syscall_state.32.cpp create mode 100644 unittests/FEXLinuxTests/tests/signal/Syscall_state.64.cpp diff --git a/FEXCore/Source/Interface/Core/OpcodeDispatcher.cpp b/FEXCore/Source/Interface/Core/OpcodeDispatcher.cpp index 14cdacfc22..a02bbcd49d 100644 --- a/FEXCore/Source/Interface/Core/OpcodeDispatcher.cpp +++ b/FEXCore/Source/Interface/Core/OpcodeDispatcher.cpp @@ -36,6 +36,7 @@ using X86Tables::OpToIndex; #define OpcodeArgs [[maybe_unused]] FEXCore::X86Tables::DecodedOp Op +template void OpDispatchBuilder::SyscallOp(OpcodeArgs) { constexpr size_t SyscallArgs = 7; using SyscallArray = std::array; @@ -126,6 +127,20 @@ void OpDispatchBuilder::SyscallOp(OpcodeArgs) { Arguments[i] = LoadGPRRegister(GPRIndicesRef[i]); } + if (IsSyscallInst) { + // If this is the `Syscall` instruction rather than `int 0x80` then we need to do some additional work. + // RCX = RIP after this instruction + // R11 = EFlags + // Calculate flags. + CalculateDeferredFlags(); + + auto RFLAG = GetPackedRFLAG(); + StoreGPRRegister(X86State::REG_R11, RFLAG, 8); + + auto RIPAfterInst = GetRelocatedPC(Op); + StoreGPRRegister(X86State::REG_RCX, RIPAfterInst, 8); + } + auto SyscallOp = _Syscall( Arguments[0], Arguments[1], @@ -5233,7 +5248,7 @@ void OpDispatchBuilder::INTOp(OpcodeArgs) { #endif if (Literal == SYSCALL_LITERAL) { // Syscall on linux - SyscallOp(Op); + SyscallOp(Op); return; } @@ -6245,7 +6260,7 @@ void InstallOpcodeHandlers(Context::OperatingMode Mode) { }; constexpr std::tuple TwoByteOpTable_64[] = { - {0x05, 1, &OpDispatchBuilder::SyscallOp}, + {0x05, 1, &OpDispatchBuilder::SyscallOp}, }; #define OPD(group, prefix, Reg) (((group - FEXCore::X86Tables::TYPE_GROUP_1) << 6) | (prefix) << 3 | (Reg)) diff --git a/FEXCore/Source/Interface/Core/OpcodeDispatcher.h b/FEXCore/Source/Interface/Core/OpcodeDispatcher.h index 901a0eec23..26f467ab1f 100644 --- a/FEXCore/Source/Interface/Core/OpcodeDispatcher.h +++ b/FEXCore/Source/Interface/Core/OpcodeDispatcher.h @@ -261,6 +261,7 @@ friend class FEXCore::IR::PassManager; template void ALUOp(OpcodeArgs); void INTOp(OpcodeArgs); + template void SyscallOp(OpcodeArgs); void ThunkOp(OpcodeArgs); void LEAOp(OpcodeArgs); diff --git a/unittests/FEXLinuxTests/tests/signal/Syscall_state.32.cpp b/unittests/FEXLinuxTests/tests/signal/Syscall_state.32.cpp new file mode 100644 index 0000000000..921cbce903 --- /dev/null +++ b/unittests/FEXLinuxTests/tests/signal/Syscall_state.32.cpp @@ -0,0 +1,148 @@ +#include +#include +#include +#include +#include +#include +#include + +struct CPUState { + uint32_t Registers[8]; + uint32_t eflags; +}; + +CPUState CapturedState{}; + +enum RegNums { + TEST_REG_EAX = 0, + TEST_REG_EBX, + TEST_REG_ECX, + TEST_REG_EDX, + TEST_REG_ESI, + TEST_REG_EDI, + TEST_REG_ESP, + TEST_REG_EBP, +}; + +__attribute__((naked)) +void DoZeroRegSyscallFault(CPUState State) { + // i386 stores arguments on the stack. + __asm volatile(R"( + // Load flags + push dword [esp + %[FlagsOffset]] + popfd + + // Do getpid syscall. + // Overwrites some arguments. + // Syscall num + mov eax, qword [esp + %[RAXOffset]] + + // Load remaining registers that we can + mov ebx, qword [esp + %[RBXOffset]]; + mov ecx, qword [esp + %[RCXOffset]]; + mov edx, qword [esp + %[RDXOffset]] + mov esi, qword [esp + %[RSIOffset]] + mov edi, qword [esp + %[RDIOffset]]; + mov ebp, qword [esp + %[RBPOffset]]; + // Can't load RSP + + int 0x80; + + // Immediately fault + hlt; + + // We long jump from the signal handler, so this won't continue. + )" + : + // integers are offset by 8 for some reason. + // But the stack is also offset by 4-bytes due to the call. + : [RAXOffset] "i" (offsetof(CPUState, Registers[TEST_REG_EAX]) - 4) + , [RDXOffset] "i" (offsetof(CPUState, Registers[TEST_REG_EDX]) - 4) + , [RSIOffset] "i" (offsetof(CPUState, Registers[TEST_REG_ESI]) - 4) + , [RDIOffset] "i" (offsetof(CPUState, Registers[TEST_REG_EDI]) - 4) + , [RBXOffset] "i" (offsetof(CPUState, Registers[TEST_REG_EBX]) - 4) + , [RCXOffset] "i" (offsetof(CPUState, Registers[TEST_REG_ECX]) - 4) + , [RBPOffset] "i" (offsetof(CPUState, Registers[TEST_REG_EBP]) - 4) + , [FlagsOffset] "i" (offsetof(CPUState, eflags) - 4) + + : "memory"); +} + +static jmp_buf LongJump{}; +static void CapturingHandler(int signal, siginfo_t *siginfo, void* context) { + ucontext_t* _context = (ucontext_t*)context; + + auto RAX = _context->uc_mcontext.gregs[REG_EAX]; + if (RAX > -4095U) { + // Failure to syscall + fprintf(stderr, "Parent thread failed to syscall: %d %s\n", static_cast(-RAX), strerror(-RAX)); + _exit(1); + } + + CPUState &State = CapturedState; + + // These aren't 1:1 mapped +#define COPY(REG) State.Registers[TEST_REG_##REG] = _context->uc_mcontext.gregs[REG_##REG]; + COPY(EAX); + COPY(EBX); + COPY(ECX); + COPY(EDX); + COPY(ESI); + COPY(EDI); + COPY(ESP); + COPY(EBP); + + longjmp(LongJump, 1); +} + + +TEST_CASE("getppid: State") { + // Set up a signal handler for SIGSEGV + struct sigaction act{}; + act.sa_sigaction = CapturingHandler; + act.sa_flags = SA_SIGINFO; + sigaction(SIGSEGV, &act, nullptr); + + CPUState Object = { + .Registers = { + 0x1011'1213ULL, + 0x2022'2223ULL, + 0x3033'3233ULL, + 0x4044'4243ULL, + 0x5055'5253ULL, + 0x6066'6263ULL, + 0x7077'7273ULL, + 0x8088'8283ULL, + }, + .eflags = + (1U << 0) | // CF + (1U << 1) | // RA1 + (1U << 2) | // PF + (1U << 4) | // AF + (1U << 6) | // ZF + (1U << 7) | // CF + (1U << 9) | // IF (Is always 1 in userspace) + (1U << 11) // OF + }; + + constexpr uint64_t SyscallNum = SYS_sched_yield; + Object.Registers[TEST_REG_EAX] = SyscallNum; + int Value = setjmp(LongJump); + if (Value == 0) { + DoZeroRegSyscallFault(Object); + } + + for (size_t i = 0; i < (sizeof(Object.Registers) / sizeof(Object.Registers[0])); ++i) { + if (i == TEST_REG_ESP || i == TEST_REG_EAX) { + // Needs special handling. + continue; + } + + CHECK(Object.Registers[i] == CapturedState.Registers[i]); + } + + // Syscall success return + CHECK(CapturedState.Registers[TEST_REG_EAX] == 0); + // RSP is untested here. + +} diff --git a/unittests/FEXLinuxTests/tests/signal/Syscall_state.64.cpp b/unittests/FEXLinuxTests/tests/signal/Syscall_state.64.cpp new file mode 100644 index 0000000000..3d6427fb10 --- /dev/null +++ b/unittests/FEXLinuxTests/tests/signal/Syscall_state.64.cpp @@ -0,0 +1,175 @@ +#include +#include +#include +#include +#include +#include +#include + +struct CPUState { + uint64_t Registers[16]; + + uint64_t eflags; +}; + +CPUState CapturedState{}; + +// This refers to the label defined in the inline ASM below. +extern "C" uint64_t HLT_INST; +void * const HaltLocation = &HLT_INST; + +__attribute__((naked)) +void DoZeroRegSyscallFault(CPUState *State) { + // x86-64 ABI puts State pointer in to RDI + __asm volatile(R"( + // Save some registers + push rbx + push rbp + push r12 + push r13 + push r14 + push r15 + + // Load flags + push qword [rdi + %[FlagsOffset]] + popfq + + // Do getpid syscall. + // Overwrites some arguments. + // Syscall num + mov rax, qword [rdi + %[RAXOffset]] + + // Load remaining registers that we can + mov rbx, qword [rdi + %[RBXOffset]]; + mov rcx, qword [rdi + %[RCXOffset]]; + mov rdx, qword [rdi + %[RDXOffset]] + mov rsi, qword [rdi + %[RSIOffset]] + mov rbp, qword [rdi + %[RBPOffset]]; + // Can't load RSP + mov r8, qword [rdi + %[R8Offset]] + mov r9, qword [rdi + %[R9Offset]]; + mov r10, qword [rdi + %[R10Offset]] + mov r11, qword [rdi + %[R11Offset]]; + mov r12, qword [rdi + %[R12Offset]]; + mov r13, qword [rdi + %[R13Offset]]; + mov r14, qword [rdi + %[R14Offset]]; + mov r15, qword [rdi + %[R15Offset]]; + + // Overwrite RDI last. + mov rdi, qword [rdi + %[RDIOffset]]; + + syscall; + + // Immediately fault + HLT_INST: + hlt; + + // We long jump from the signal handler, so this won't continue. + )" + : + // integers are offset by 8 for some reason. + : [RAXOffset] "i" (offsetof(CPUState, Registers[REG_RAX]) - 8) + , [RDXOffset] "i" (offsetof(CPUState, Registers[REG_RDX]) - 8) + , [R10Offset] "i" (offsetof(CPUState, Registers[REG_R10]) - 8) + , [R8Offset] "i" (offsetof(CPUState, Registers[REG_R8]) - 8) + , [RSIOffset] "i" (offsetof(CPUState, Registers[REG_RSI]) - 8) + , [RDIOffset] "i" (offsetof(CPUState, Registers[REG_RDI]) - 8) + , [RBXOffset] "i" (offsetof(CPUState, Registers[REG_RBX]) - 8) + , [RCXOffset] "i" (offsetof(CPUState, Registers[REG_RCX]) - 8) + , [RBPOffset] "i" (offsetof(CPUState, Registers[REG_RBP]) - 8) + , [R9Offset] "i" (offsetof(CPUState, Registers[REG_R9]) - 8) + , [R11Offset] "i" (offsetof(CPUState, Registers[REG_R11]) - 8) + , [R12Offset] "i" (offsetof(CPUState, Registers[REG_R12]) - 8) + , [R13Offset] "i" (offsetof(CPUState, Registers[REG_R13]) - 8) + , [R14Offset] "i" (offsetof(CPUState, Registers[REG_R14]) - 8) + , [R15Offset] "i" (offsetof(CPUState, Registers[REG_R15]) - 8) + , [FlagsOffset] "i" (offsetof(CPUState, eflags) - 8) + + : "memory"); +} + + +static jmp_buf LongJump{}; +static void CapturingHandler(int signal, siginfo_t *siginfo, void* context) { + ucontext_t* _context = (ucontext_t*)context; + + auto RAX = _context->uc_mcontext.gregs[REG_RAX]; + if (RAX > -4095U) { + // Failure to syscall + fprintf(stderr, "Parent thread failed to syscall: %ld %s\n", static_cast(-RAX), strerror(-RAX)); + _exit(1); + } + + CPUState &State = CapturedState; + + memcpy(&State.Registers, _context->uc_mcontext.gregs, sizeof(State.Registers)); + + longjmp(LongJump, 1); +} + + +TEST_CASE("getppid: State") { + // Set up a signal handler for SIGSEGV + struct sigaction act{}; + act.sa_sigaction = CapturingHandler; + act.sa_flags = SA_SIGINFO; + sigaction(SIGSEGV, &act, nullptr); + + CPUState Object = { + .Registers = { + 0x1011'1213'1415'1617ULL, + 0x2022'2223'2425'2627ULL, + 0x3033'3233'3435'3637ULL, + 0x4044'4243'4445'4647ULL, + 0x5055'5253'5455'5657ULL, + 0x6066'6263'6465'6667ULL, + 0x7077'7273'7475'7677ULL, + 0x8088'8283'8485'8687ULL, + 0x9099'9293'9495'9697ULL, + 0xA0AA'A2A3'A4A5'A6A7ULL, + 0xB0BB'B2B3'B4B5'B6B7ULL, + 0xC0CC'C2C3'C4C5'C6C7ULL, + 0xD0DD'D2D3'D4D5'D6D7ULL, + 0xE0EE'E2E3'E4E5'E6E7ULL, + 0xF0FF'F2F3'F4F5'F6F7ULL, + 0x0000'0203'0405'0607ULL, + }, + .eflags = + (1U << 0) | // CF + (1U << 1) | // RA1 + (1U << 2) | // PF + (1U << 4) | // AF + (1U << 6) | // ZF + (1U << 7) | // CF + (1U << 9) | // IF (Is always 1 in userspace) + (1U << 11) // OF + }; + + constexpr uint64_t SyscallNum = SYS_sched_yield; + Object.Registers[REG_RAX] = SyscallNum; + int Value = setjmp(LongJump); + if (Value == 0) { + DoZeroRegSyscallFault(&Object); + } + + for (size_t i = 0; i < 16; ++i) { + if (i == REG_R11 || i == REG_RCX || i == REG_RSP || i == REG_RAX) { + // Needs special handling. + continue; + } + + CHECK(Object.Registers[i] == CapturedState.Registers[i]); + } + + // Syscall success return + CHECK(CapturedState.Registers[REG_RAX] == 0); + + // syscall instruction RCX return. + CHECK(CapturedState.Registers[REG_RCX] == reinterpret_cast(HaltLocation)); + + // Syscall instruction R11 eflags return. + CHECK(Object.eflags == CapturedState.Registers[REG_R11]); + + // RSP is untested here. + +}