Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

OpcodeDispatcher: Minor division improvement #4231

Merged
merged 2 commits into from
Dec 31, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 9 additions & 9 deletions FEXCore/Source/Interface/Core/OpcodeDispatcher.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3613,16 +3613,16 @@ void OpDispatchBuilder::DIVOp(OpcodeArgs) {
auto ResultAX = _Bfi(GPRSize, 8, 8, UDivOp, URemOp);
StoreGPRRegister(X86State::REG_RAX, ResultAX, OpSize::i16Bit);
} else if (Size == OpSize::i16Bit) {
Ref Src1 = LoadGPRRegister(X86State::REG_RAX, Size);
Ref Src2 = LoadGPRRegister(X86State::REG_RDX, Size);
Ref Src1 = LoadGPRRegister(X86State::REG_RAX);
Ref Src2 = LoadGPRRegister(X86State::REG_RDX);
auto UDivOp = _LUDiv(OpSize::i16Bit, Src1, Src2, Divisor);
auto URemOp = _LURem(OpSize::i16Bit, Src1, Src2, Divisor);

StoreGPRRegister(X86State::REG_RAX, UDivOp, Size);
StoreGPRRegister(X86State::REG_RDX, URemOp, Size);
} else if (Size == OpSize::i32Bit) {
Ref Src1 = LoadGPRRegister(X86State::REG_RAX, Size);
Ref Src2 = LoadGPRRegister(X86State::REG_RDX, Size);
Ref Src1 = LoadGPRRegister(X86State::REG_RAX);
Ref Src2 = LoadGPRRegister(X86State::REG_RDX);

Ref UDivOp = _Bfe(OpSize::i32Bit, IR::OpSizeAsBits(Size), 0, _LUDiv(OpSize::i32Bit, Src1, Src2, Divisor));
Ref URemOp = _Bfe(OpSize::i32Bit, IR::OpSizeAsBits(Size), 0, _LURem(OpSize::i32Bit, Src1, Src2, Divisor));
Expand Down Expand Up @@ -3654,7 +3654,7 @@ void OpDispatchBuilder::IDIVOp(OpcodeArgs) {
const auto Size = OpSizeFromSrc(Op);

if (Size == OpSize::i8Bit) {
Ref Src1 = LoadGPRRegister(X86State::REG_RAX, OpSize::i16Bit);
Ref Src1 = LoadGPRRegister(X86State::REG_RAX);
Src1 = _Sbfe(OpSize::i64Bit, 16, 0, Src1);
Divisor = _Sbfe(OpSize::i64Bit, 8, 0, Divisor);

Expand All @@ -3665,16 +3665,16 @@ void OpDispatchBuilder::IDIVOp(OpcodeArgs) {
auto ResultAX = _Bfi(GPRSize, 8, 8, UDivOp, URemOp);
StoreGPRRegister(X86State::REG_RAX, ResultAX, OpSize::i16Bit);
} else if (Size == OpSize::i16Bit) {
Ref Src1 = LoadGPRRegister(X86State::REG_RAX, Size);
Ref Src2 = LoadGPRRegister(X86State::REG_RDX, Size);
Ref Src1 = LoadGPRRegister(X86State::REG_RAX);
Ref Src2 = LoadGPRRegister(X86State::REG_RDX);
auto UDivOp = _LDiv(OpSize::i16Bit, Src1, Src2, Divisor);
auto URemOp = _LRem(OpSize::i16Bit, Src1, Src2, Divisor);

StoreGPRRegister(X86State::REG_RAX, UDivOp, Size);
StoreGPRRegister(X86State::REG_RDX, URemOp, Size);
} else if (Size == OpSize::i32Bit) {
Ref Src1 = LoadGPRRegister(X86State::REG_RAX, Size);
Ref Src2 = LoadGPRRegister(X86State::REG_RDX, Size);
Ref Src1 = LoadGPRRegister(X86State::REG_RAX);
Ref Src2 = LoadGPRRegister(X86State::REG_RDX);

Ref UDivOp = _Bfe(OpSize::i32Bit, IR::OpSizeAsBits(Size), 0, _LDiv(OpSize::i32Bit, Src1, Src2, Divisor));
Ref URemOp = _Bfe(OpSize::i32Bit, IR::OpSizeAsBits(Size), 0, _LRem(OpSize::i32Bit, Src1, Src2, Divisor));
Expand Down
21 changes: 9 additions & 12 deletions unittests/InstructionCountCI/FlagM/PrimaryGroup.json
Original file line number Diff line number Diff line change
Expand Up @@ -2196,12 +2196,11 @@
]
},
"idiv bl": {
"ExpectedInstructionCount": 9,
"ExpectedInstructionCount": 8,
"Comment": "GROUP2 0xf6 /7",
"ExpectedArm64ASM": [
"uxtb w20, w6",
"uxth w21, w4",
"sxth x21, w21",
"sxth x21, w4",
"sxtb x20, w20",
"sdiv x22, x21, x20",
"sdiv x0, x21, x20",
Expand Down Expand Up @@ -2367,20 +2366,18 @@
]
},
"div bx": {
"ExpectedInstructionCount": 12,
"ExpectedInstructionCount": 10,
"Comment": "GROUP2 0xf7 /6",
"ExpectedArm64ASM": [
"uxth w20, w6",
"uxth w21, w4",
"uxth w22, w5",
"uxth w0, w21",
"bfi w0, w22, #16, #16",
"udiv w23, w0, w20",
"uxth w0, w21",
"bfi w0, w22, #16, #16",
"uxth w0, w4",
"bfi w0, w5, #16, #16",
"udiv w21, w0, w20",
"uxth w0, w4",
"bfi w0, w5, #16, #16",
"udiv w1, w0, w20",
"msub w20, w1, w20, w0",
"bfxil x4, x23, #0, #16",
"bfxil x4, x21, #0, #16",
"bfxil x5, x20, #0, #16"
]
},
Expand Down
75 changes: 34 additions & 41 deletions unittests/InstructionCountCI/PrimaryGroup.json
Original file line number Diff line number Diff line change
Expand Up @@ -2588,12 +2588,11 @@
]
},
"idiv bl": {
"ExpectedInstructionCount": 9,
"ExpectedInstructionCount": 8,
"Comment": "GROUP2 0xf6 /7",
"ExpectedArm64ASM": [
"uxtb w20, w6",
"uxth w21, w4",
"sxth x21, w21",
"sxth x21, w4",
"sxtb x20, w20",
"sdiv x22, x21, x20",
"sdiv x0, x21, x20",
Expand Down Expand Up @@ -2786,39 +2785,36 @@
]
},
"div bx": {
"ExpectedInstructionCount": 12,
"ExpectedInstructionCount": 10,
"Comment": "GROUP2 0xf7 /6",
"ExpectedArm64ASM": [
"uxth w20, w6",
"uxth w21, w4",
"uxth w22, w5",
"uxth w0, w21",
"bfi w0, w22, #16, #16",
"udiv w23, w0, w20",
"uxth w0, w21",
"bfi w0, w22, #16, #16",
"uxth w0, w4",
"bfi w0, w5, #16, #16",
"udiv w21, w0, w20",
"uxth w0, w4",
"bfi w0, w5, #16, #16",
"udiv w1, w0, w20",
"msub w20, w1, w20, w0",
"bfxil x4, x23, #0, #16",
"bfxil x4, x21, #0, #16",
"bfxil x5, x20, #0, #16"
]
},
"div ebx": {
"ExpectedInstructionCount": 12,
"ExpectedInstructionCount": 11,
"Comment": "GROUP2 0xf7 /6",
"ExpectedArm64ASM": [
"mov w20, w6",
"mov w21, w4",
"mov w22, w5",
"mov x0, x21",
"bfi x0, x22, #32, #32",
"udiv x23, x0, x20",
"mov w4, w23",
"mov x0, x21",
"bfi x0, x22, #32, #32",
"mov x0, x4",
"bfi x0, x5, #32, #32",
"udiv x21, x0, x20",
"mov w21, w21",
"mov x0, x4",
"bfi x0, x5, #32, #32",
"udiv x1, x0, x20",
"msub x20, x1, x20, x0",
"mov w5, w20"
"mov w5, w20",
"mov x4, x21"
]
},
"div rbx": {
Expand Down Expand Up @@ -2852,43 +2848,40 @@
]
},
"idiv bx": {
"ExpectedInstructionCount": 14,
"ExpectedInstructionCount": 12,
"Comment": "GROUP2 0xf7 /7",
"ExpectedArm64ASM": [
"uxth w20, w6",
"uxth w21, w4",
"uxth w22, w5",
"uxth w0, w21",
"bfi w0, w22, #16, #16",
"uxth w0, w4",
"bfi w0, w5, #16, #16",
"sxth w1, w20",
"sdiv w23, w0, w1",
"uxth w0, w21",
"bfi w0, w22, #16, #16",
"sdiv w21, w0, w1",
"uxth w0, w4",
"bfi w0, w5, #16, #16",
"sxth w1, w20",
"sdiv w2, w0, w1",
"msub w20, w2, w1, w0",
"bfxil x4, x23, #0, #16",
"bfxil x4, x21, #0, #16",
"bfxil x5, x20, #0, #16"
]
},
"idiv ebx": {
"ExpectedInstructionCount": 14,
"ExpectedInstructionCount": 13,
"Comment": "GROUP2 0xf7 /7",
"ExpectedArm64ASM": [
"mov w20, w6",
"mov w21, w4",
"mov w22, w5",
"mov x0, x21",
"bfi x0, x22, #32, #32",
"mov x0, x4",
"bfi x0, x5, #32, #32",
"sxtw x1, w20",
"sdiv x23, x0, x1",
"mov w4, w23",
"mov x0, x21",
"bfi x0, x22, #32, #32",
"sdiv x21, x0, x1",
"mov w21, w21",
"mov x0, x4",
"bfi x0, x5, #32, #32",
"sxtw x2, w20",
"sdiv x1, x0, x2",
"msub x20, x1, x2, x0",
"mov w5, w20"
"mov w5, w20",
"mov x4, x21"
]
},
"idiv rbx": {
Expand Down
Loading