Skip to content

Commit

Permalink
Merge pull request #4231 from Sonicadvance1/minor_div_opt
Browse files Browse the repository at this point in the history
OpcodeDispatcher: Minor division improvement
  • Loading branch information
Sonicadvance1 authored Dec 31, 2024
2 parents 15a1a0f + ffb745b commit 04e785e
Show file tree
Hide file tree
Showing 3 changed files with 52 additions and 62 deletions.
18 changes: 9 additions & 9 deletions FEXCore/Source/Interface/Core/OpcodeDispatcher.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3613,16 +3613,16 @@ void OpDispatchBuilder::DIVOp(OpcodeArgs) {
auto ResultAX = _Bfi(GPRSize, 8, 8, UDivOp, URemOp);
StoreGPRRegister(X86State::REG_RAX, ResultAX, OpSize::i16Bit);
} else if (Size == OpSize::i16Bit) {
Ref Src1 = LoadGPRRegister(X86State::REG_RAX, Size);
Ref Src2 = LoadGPRRegister(X86State::REG_RDX, Size);
Ref Src1 = LoadGPRRegister(X86State::REG_RAX);
Ref Src2 = LoadGPRRegister(X86State::REG_RDX);
auto UDivOp = _LUDiv(OpSize::i16Bit, Src1, Src2, Divisor);
auto URemOp = _LURem(OpSize::i16Bit, Src1, Src2, Divisor);

StoreGPRRegister(X86State::REG_RAX, UDivOp, Size);
StoreGPRRegister(X86State::REG_RDX, URemOp, Size);
} else if (Size == OpSize::i32Bit) {
Ref Src1 = LoadGPRRegister(X86State::REG_RAX, Size);
Ref Src2 = LoadGPRRegister(X86State::REG_RDX, Size);
Ref Src1 = LoadGPRRegister(X86State::REG_RAX);
Ref Src2 = LoadGPRRegister(X86State::REG_RDX);

Ref UDivOp = _Bfe(OpSize::i32Bit, IR::OpSizeAsBits(Size), 0, _LUDiv(OpSize::i32Bit, Src1, Src2, Divisor));
Ref URemOp = _Bfe(OpSize::i32Bit, IR::OpSizeAsBits(Size), 0, _LURem(OpSize::i32Bit, Src1, Src2, Divisor));
Expand Down Expand Up @@ -3654,7 +3654,7 @@ void OpDispatchBuilder::IDIVOp(OpcodeArgs) {
const auto Size = OpSizeFromSrc(Op);

if (Size == OpSize::i8Bit) {
Ref Src1 = LoadGPRRegister(X86State::REG_RAX, OpSize::i16Bit);
Ref Src1 = LoadGPRRegister(X86State::REG_RAX);
Src1 = _Sbfe(OpSize::i64Bit, 16, 0, Src1);
Divisor = _Sbfe(OpSize::i64Bit, 8, 0, Divisor);

Expand All @@ -3665,16 +3665,16 @@ void OpDispatchBuilder::IDIVOp(OpcodeArgs) {
auto ResultAX = _Bfi(GPRSize, 8, 8, UDivOp, URemOp);
StoreGPRRegister(X86State::REG_RAX, ResultAX, OpSize::i16Bit);
} else if (Size == OpSize::i16Bit) {
Ref Src1 = LoadGPRRegister(X86State::REG_RAX, Size);
Ref Src2 = LoadGPRRegister(X86State::REG_RDX, Size);
Ref Src1 = LoadGPRRegister(X86State::REG_RAX);
Ref Src2 = LoadGPRRegister(X86State::REG_RDX);
auto UDivOp = _LDiv(OpSize::i16Bit, Src1, Src2, Divisor);
auto URemOp = _LRem(OpSize::i16Bit, Src1, Src2, Divisor);

StoreGPRRegister(X86State::REG_RAX, UDivOp, Size);
StoreGPRRegister(X86State::REG_RDX, URemOp, Size);
} else if (Size == OpSize::i32Bit) {
Ref Src1 = LoadGPRRegister(X86State::REG_RAX, Size);
Ref Src2 = LoadGPRRegister(X86State::REG_RDX, Size);
Ref Src1 = LoadGPRRegister(X86State::REG_RAX);
Ref Src2 = LoadGPRRegister(X86State::REG_RDX);

Ref UDivOp = _Bfe(OpSize::i32Bit, IR::OpSizeAsBits(Size), 0, _LDiv(OpSize::i32Bit, Src1, Src2, Divisor));
Ref URemOp = _Bfe(OpSize::i32Bit, IR::OpSizeAsBits(Size), 0, _LRem(OpSize::i32Bit, Src1, Src2, Divisor));
Expand Down
21 changes: 9 additions & 12 deletions unittests/InstructionCountCI/FlagM/PrimaryGroup.json
Original file line number Diff line number Diff line change
Expand Up @@ -2196,12 +2196,11 @@
]
},
"idiv bl": {
"ExpectedInstructionCount": 9,
"ExpectedInstructionCount": 8,
"Comment": "GROUP2 0xf6 /7",
"ExpectedArm64ASM": [
"uxtb w20, w6",
"uxth w21, w4",
"sxth x21, w21",
"sxth x21, w4",
"sxtb x20, w20",
"sdiv x22, x21, x20",
"sdiv x0, x21, x20",
Expand Down Expand Up @@ -2367,20 +2366,18 @@
]
},
"div bx": {
"ExpectedInstructionCount": 12,
"ExpectedInstructionCount": 10,
"Comment": "GROUP2 0xf7 /6",
"ExpectedArm64ASM": [
"uxth w20, w6",
"uxth w21, w4",
"uxth w22, w5",
"uxth w0, w21",
"bfi w0, w22, #16, #16",
"udiv w23, w0, w20",
"uxth w0, w21",
"bfi w0, w22, #16, #16",
"uxth w0, w4",
"bfi w0, w5, #16, #16",
"udiv w21, w0, w20",
"uxth w0, w4",
"bfi w0, w5, #16, #16",
"udiv w1, w0, w20",
"msub w20, w1, w20, w0",
"bfxil x4, x23, #0, #16",
"bfxil x4, x21, #0, #16",
"bfxil x5, x20, #0, #16"
]
},
Expand Down
75 changes: 34 additions & 41 deletions unittests/InstructionCountCI/PrimaryGroup.json
Original file line number Diff line number Diff line change
Expand Up @@ -2588,12 +2588,11 @@
]
},
"idiv bl": {
"ExpectedInstructionCount": 9,
"ExpectedInstructionCount": 8,
"Comment": "GROUP2 0xf6 /7",
"ExpectedArm64ASM": [
"uxtb w20, w6",
"uxth w21, w4",
"sxth x21, w21",
"sxth x21, w4",
"sxtb x20, w20",
"sdiv x22, x21, x20",
"sdiv x0, x21, x20",
Expand Down Expand Up @@ -2786,39 +2785,36 @@
]
},
"div bx": {
"ExpectedInstructionCount": 12,
"ExpectedInstructionCount": 10,
"Comment": "GROUP2 0xf7 /6",
"ExpectedArm64ASM": [
"uxth w20, w6",
"uxth w21, w4",
"uxth w22, w5",
"uxth w0, w21",
"bfi w0, w22, #16, #16",
"udiv w23, w0, w20",
"uxth w0, w21",
"bfi w0, w22, #16, #16",
"uxth w0, w4",
"bfi w0, w5, #16, #16",
"udiv w21, w0, w20",
"uxth w0, w4",
"bfi w0, w5, #16, #16",
"udiv w1, w0, w20",
"msub w20, w1, w20, w0",
"bfxil x4, x23, #0, #16",
"bfxil x4, x21, #0, #16",
"bfxil x5, x20, #0, #16"
]
},
"div ebx": {
"ExpectedInstructionCount": 12,
"ExpectedInstructionCount": 11,
"Comment": "GROUP2 0xf7 /6",
"ExpectedArm64ASM": [
"mov w20, w6",
"mov w21, w4",
"mov w22, w5",
"mov x0, x21",
"bfi x0, x22, #32, #32",
"udiv x23, x0, x20",
"mov w4, w23",
"mov x0, x21",
"bfi x0, x22, #32, #32",
"mov x0, x4",
"bfi x0, x5, #32, #32",
"udiv x21, x0, x20",
"mov w21, w21",
"mov x0, x4",
"bfi x0, x5, #32, #32",
"udiv x1, x0, x20",
"msub x20, x1, x20, x0",
"mov w5, w20"
"mov w5, w20",
"mov x4, x21"
]
},
"div rbx": {
Expand Down Expand Up @@ -2852,43 +2848,40 @@
]
},
"idiv bx": {
"ExpectedInstructionCount": 14,
"ExpectedInstructionCount": 12,
"Comment": "GROUP2 0xf7 /7",
"ExpectedArm64ASM": [
"uxth w20, w6",
"uxth w21, w4",
"uxth w22, w5",
"uxth w0, w21",
"bfi w0, w22, #16, #16",
"uxth w0, w4",
"bfi w0, w5, #16, #16",
"sxth w1, w20",
"sdiv w23, w0, w1",
"uxth w0, w21",
"bfi w0, w22, #16, #16",
"sdiv w21, w0, w1",
"uxth w0, w4",
"bfi w0, w5, #16, #16",
"sxth w1, w20",
"sdiv w2, w0, w1",
"msub w20, w2, w1, w0",
"bfxil x4, x23, #0, #16",
"bfxil x4, x21, #0, #16",
"bfxil x5, x20, #0, #16"
]
},
"idiv ebx": {
"ExpectedInstructionCount": 14,
"ExpectedInstructionCount": 13,
"Comment": "GROUP2 0xf7 /7",
"ExpectedArm64ASM": [
"mov w20, w6",
"mov w21, w4",
"mov w22, w5",
"mov x0, x21",
"bfi x0, x22, #32, #32",
"mov x0, x4",
"bfi x0, x5, #32, #32",
"sxtw x1, w20",
"sdiv x23, x0, x1",
"mov w4, w23",
"mov x0, x21",
"bfi x0, x22, #32, #32",
"sdiv x21, x0, x1",
"mov w21, w21",
"mov x0, x4",
"bfi x0, x5, #32, #32",
"sxtw x2, w20",
"sdiv x1, x0, x2",
"msub x20, x1, x2, x0",
"mov w5, w20"
"mov w5, w20",
"mov x4, x21"
]
},
"idiv rbx": {
Expand Down

0 comments on commit 04e785e

Please sign in to comment.