Skip to content

Commit 922b5b5

Browse files
committed
x86_64: implement 128-bit integer multiply and divide
1 parent d559c61 commit 922b5b5

File tree

2 files changed

+102
-32
lines changed

2 files changed

+102
-32
lines changed

src/arch/x86_64/CodeGen.zig

Lines changed: 101 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -3586,7 +3586,7 @@ fn airMulWithOverflow(self: *Self, inst: Air.Inst.Index) !void {
35863586
const bin_op = self.air.extraData(Air.Bin, ty_pl.payload).data;
35873587
const dst_ty = self.typeOf(bin_op.lhs);
35883588
const result: MCValue = switch (dst_ty.zigTypeTag(mod)) {
3589-
.Vector => return self.fail("TODO implement mul_with_overflow for Vector type", .{}),
3589+
.Vector => return self.fail("TODO implement airMulWithOverflow for {}", .{dst_ty.fmt(mod)}),
35903590
.Int => result: {
35913591
try self.spillEflagsIfOccupied();
35923592
try self.spillRegisters(&.{ .rax, .rdx });
@@ -6847,29 +6847,99 @@ fn genMulDivBinOp(
68476847
maybe_inst: ?Air.Inst.Index,
68486848
dst_ty: Type,
68496849
src_ty: Type,
6850-
lhs: MCValue,
6851-
rhs: MCValue,
6850+
lhs_mcv: MCValue,
6851+
rhs_mcv: MCValue,
68526852
) !MCValue {
68536853
const mod = self.bin_file.options.module.?;
68546854
if (dst_ty.zigTypeTag(mod) == .Vector or dst_ty.zigTypeTag(mod) == .Float) return self.fail(
6855-
"TODO implement genMulDivBinOp for {}",
6856-
.{dst_ty.fmt(mod)},
6855+
"TODO implement genMulDivBinOp for {s} from {} to {}",
6856+
.{ @tagName(tag), src_ty.fmt(mod), dst_ty.fmt(mod) },
68576857
);
68586858
const dst_abi_size: u32 = @intCast(dst_ty.abiSize(mod));
68596859
const src_abi_size: u32 = @intCast(src_ty.abiSize(mod));
6860+
6861+
assert(self.register_manager.isRegFree(.rax));
6862+
assert(self.register_manager.isRegFree(.rdx));
6863+
assert(self.eflags_inst == null);
6864+
6865+
if (dst_abi_size == 16 and src_abi_size == 16) {
6866+
switch (tag) {
6867+
else => unreachable,
6868+
.mul, .mul_wrap => {},
6869+
.div_trunc, .div_floor, .div_exact, .rem, .mod => {
6870+
const signed = dst_ty.isSignedInt(mod);
6871+
if (signed) switch (tag) {
6872+
.div_floor, .mod => return self.fail(
6873+
"TODO implement genMulDivBinOp for {s} from {} to {}",
6874+
.{ @tagName(tag), src_ty.fmt(mod), dst_ty.fmt(mod) },
6875+
),
6876+
else => {},
6877+
};
6878+
var callee: ["__udiv?i3".len]u8 = undefined;
6879+
return try self.genCall(.{ .lib = .{
6880+
.return_type = dst_ty.toIntern(),
6881+
.param_types = &.{ src_ty.toIntern(), src_ty.toIntern() },
6882+
.callee = std.fmt.bufPrint(&callee, "__{s}{s}{c}i3", .{
6883+
if (signed) "" else "u",
6884+
switch (tag) {
6885+
.div_trunc, .div_exact => "div",
6886+
.div_floor => if (signed) unreachable else "div",
6887+
.rem => "mod",
6888+
.mod => if (signed) unreachable else "mod",
6889+
else => unreachable,
6890+
},
6891+
intCompilerRtAbiName(@intCast(dst_ty.bitSize(mod))),
6892+
}) catch unreachable,
6893+
} }, &.{ src_ty, src_ty }, &.{ lhs_mcv, rhs_mcv });
6894+
},
6895+
}
6896+
6897+
const reg_locks = self.register_manager.lockRegsAssumeUnused(2, .{ .rax, .rdx });
6898+
defer for (reg_locks) |lock| self.register_manager.unlockReg(lock);
6899+
6900+
const tmp_reg = try self.register_manager.allocReg(null, abi.RegisterClass.gp);
6901+
const tmp_lock = self.register_manager.lockRegAssumeUnused(tmp_reg);
6902+
defer self.register_manager.unlockReg(tmp_lock);
6903+
6904+
if (lhs_mcv.isMemory())
6905+
try self.asmRegisterMemory(.{ ._, .mov }, .rax, lhs_mcv.mem(.qword))
6906+
else
6907+
try self.asmRegisterRegister(.{ ._, .mov }, .rax, lhs_mcv.register_pair[0]);
6908+
if (rhs_mcv.isMemory()) try self.asmRegisterMemory(
6909+
.{ ._, .mov },
6910+
tmp_reg,
6911+
rhs_mcv.address().offset(8).deref().mem(.qword),
6912+
) else try self.asmRegisterRegister(.{ ._, .mov }, tmp_reg, rhs_mcv.register_pair[1]);
6913+
try self.asmRegisterRegister(.{ .i_, .mul }, tmp_reg, .rax);
6914+
if (rhs_mcv.isMemory())
6915+
try self.asmMemory(.{ ._, .mul }, rhs_mcv.mem(.qword))
6916+
else
6917+
try self.asmRegister(.{ ._, .mul }, rhs_mcv.register_pair[0]);
6918+
try self.asmRegisterRegister(.{ ._, .add }, .rdx, tmp_reg);
6919+
if (lhs_mcv.isMemory()) try self.asmRegisterMemory(
6920+
.{ ._, .mov },
6921+
tmp_reg,
6922+
lhs_mcv.address().offset(8).deref().mem(.qword),
6923+
) else try self.asmRegisterRegister(.{ ._, .mov }, tmp_reg, lhs_mcv.register_pair[1]);
6924+
if (rhs_mcv.isMemory())
6925+
try self.asmRegisterMemory(.{ .i_, .mul }, tmp_reg, rhs_mcv.mem(.qword))
6926+
else
6927+
try self.asmRegisterRegister(.{ .i_, .mul }, tmp_reg, rhs_mcv.register_pair[0]);
6928+
try self.asmRegisterRegister(.{ ._, .add }, .rdx, tmp_reg);
6929+
return .{ .register_pair = .{ .rax, .rdx } };
6930+
}
6931+
68606932
if (switch (tag) {
68616933
else => unreachable,
68626934
.mul, .mul_wrap => dst_abi_size != src_abi_size and dst_abi_size != src_abi_size * 2,
68636935
.div_trunc, .div_floor, .div_exact, .rem, .mod => dst_abi_size != src_abi_size,
6864-
} or src_abi_size > 8) return self.fail("TODO implement genMulDivBinOp from {} to {}", .{
6865-
src_ty.fmt(mod), dst_ty.fmt(mod),
6866-
});
6936+
} or src_abi_size > 8) return self.fail(
6937+
"TODO implement genMulDivBinOp for {s} from {} to {}",
6938+
.{ @tagName(tag), src_ty.fmt(mod), dst_ty.fmt(mod) },
6939+
);
68676940
const ty = if (dst_abi_size <= 8) dst_ty else src_ty;
68686941
const abi_size = if (dst_abi_size <= 8) dst_abi_size else src_abi_size;
68696942

6870-
assert(self.register_manager.isRegFree(.rax));
6871-
assert(self.register_manager.isRegFree(.rdx));
6872-
68736943
const reg_locks = self.register_manager.lockRegs(2, .{ .rax, .rdx });
68746944
defer for (reg_locks) |reg_lock| if (reg_lock) |lock| self.register_manager.unlockReg(lock);
68756945

@@ -6904,7 +6974,7 @@ fn genMulDivBinOp(
69046974
.div_trunc, .div_exact, .rem => .{ ._, .div },
69056975
else => unreachable,
69066976
},
6907-
}, ty, lhs, rhs);
6977+
}, ty, lhs_mcv, rhs_mcv);
69086978

69096979
if (dst_abi_size <= 8) return .{ .register = registerAlias(switch (tag) {
69106980
.mul, .mul_wrap, .div_trunc, .div_exact => .rax,
@@ -6938,37 +7008,37 @@ fn genMulDivBinOp(
69387008

69397009
switch (signedness) {
69407010
.signed => {
6941-
const lhs_lock = switch (lhs) {
7011+
const lhs_lock = switch (lhs_mcv) {
69427012
.register => |reg| self.register_manager.lockReg(reg),
69437013
else => null,
69447014
};
69457015
defer if (lhs_lock) |lock| self.register_manager.unlockReg(lock);
6946-
const rhs_lock = switch (rhs) {
7016+
const rhs_lock = switch (rhs_mcv) {
69477017
.register => |reg| self.register_manager.lockReg(reg),
69487018
else => null,
69497019
};
69507020
defer if (rhs_lock) |lock| self.register_manager.unlockReg(lock);
69517021

69527022
// hack around hazard between rhs and div_floor by copying rhs to another register
6953-
const rhs_copy = try self.copyToTmpRegister(ty, rhs);
7023+
const rhs_copy = try self.copyToTmpRegister(ty, rhs_mcv);
69547024
const rhs_copy_lock = self.register_manager.lockRegAssumeUnused(rhs_copy);
69557025
defer self.register_manager.unlockReg(rhs_copy_lock);
69567026

6957-
const div_floor = try self.genInlineIntDivFloor(ty, lhs, rhs);
7027+
const div_floor = try self.genInlineIntDivFloor(ty, lhs_mcv, rhs_mcv);
69587028
try self.genIntMulComplexOpMir(ty, div_floor, .{ .register = rhs_copy });
69597029
const div_floor_lock = self.register_manager.lockReg(div_floor.register);
69607030
defer if (div_floor_lock) |lock| self.register_manager.unlockReg(lock);
69617031

69627032
const result: MCValue = if (maybe_inst) |inst|
6963-
try self.copyToRegisterWithInstTracking(inst, ty, lhs)
7033+
try self.copyToRegisterWithInstTracking(inst, ty, lhs_mcv)
69647034
else
6965-
.{ .register = try self.copyToTmpRegister(ty, lhs) };
7035+
.{ .register = try self.copyToTmpRegister(ty, lhs_mcv) };
69667036
try self.genBinOpMir(.{ ._, .sub }, ty, result, div_floor);
69677037

69687038
return result;
69697039
},
69707040
.unsigned => {
6971-
try self.genIntMulDivOpMir(.{ ._, .div }, ty, lhs, rhs);
7041+
try self.genIntMulDivOpMir(.{ ._, .div }, ty, lhs_mcv, rhs_mcv);
69727042
return .{ .register = registerAlias(.rdx, abi_size) };
69737043
},
69747044
}
@@ -6978,39 +7048,39 @@ fn genMulDivBinOp(
69787048
try self.register_manager.getReg(.rax, if (signedness == .unsigned) maybe_inst else null);
69797049
try self.register_manager.getReg(.rdx, null);
69807050

6981-
const lhs_lock: ?RegisterLock = switch (lhs) {
7051+
const lhs_lock: ?RegisterLock = switch (lhs_mcv) {
69827052
.register => |reg| self.register_manager.lockRegAssumeUnused(reg),
69837053
else => null,
69847054
};
69857055
defer if (lhs_lock) |lock| self.register_manager.unlockReg(lock);
69867056

6987-
const actual_rhs: MCValue = blk: {
7057+
const actual_rhs_mcv: MCValue = blk: {
69887058
switch (signedness) {
69897059
.signed => {
6990-
const rhs_lock: ?RegisterLock = switch (rhs) {
7060+
const rhs_lock: ?RegisterLock = switch (rhs_mcv) {
69917061
.register => |reg| self.register_manager.lockRegAssumeUnused(reg),
69927062
else => null,
69937063
};
69947064
defer if (rhs_lock) |lock| self.register_manager.unlockReg(lock);
69957065

69967066
if (maybe_inst) |inst| {
6997-
break :blk try self.copyToRegisterWithInstTracking(inst, ty, rhs);
7067+
break :blk try self.copyToRegisterWithInstTracking(inst, ty, rhs_mcv);
69987068
}
6999-
break :blk MCValue{ .register = try self.copyToTmpRegister(ty, rhs) };
7069+
break :blk MCValue{ .register = try self.copyToTmpRegister(ty, rhs_mcv) };
70007070
},
7001-
.unsigned => break :blk rhs,
7071+
.unsigned => break :blk rhs_mcv,
70027072
}
70037073
};
7004-
const rhs_lock: ?RegisterLock = switch (actual_rhs) {
7074+
const rhs_lock: ?RegisterLock = switch (actual_rhs_mcv) {
70057075
.register => |reg| self.register_manager.lockReg(reg),
70067076
else => null,
70077077
};
70087078
defer if (rhs_lock) |lock| self.register_manager.unlockReg(lock);
70097079

70107080
switch (signedness) {
7011-
.signed => return try self.genInlineIntDivFloor(ty, lhs, actual_rhs),
7081+
.signed => return try self.genInlineIntDivFloor(ty, lhs_mcv, actual_rhs_mcv),
70127082
.unsigned => {
7013-
try self.genIntMulDivOpMir(.{ ._, .div }, ty, lhs, actual_rhs);
7083+
try self.genIntMulDivOpMir(.{ ._, .div }, ty, lhs_mcv, actual_rhs_mcv);
70147084
return .{ .register = registerAlias(.rax, abi_size) };
70157085
},
70167086
}
@@ -7038,7 +7108,7 @@ fn genBinOp(
70387108
80, 128 => true,
70397109
else => unreachable,
70407110
})) {
7041-
var callee: ["__add?f3".len]u8 = undefined;
7111+
var callee: ["__mod?f3".len]u8 = undefined;
70427112
const result = try self.genCall(.{ .lib = .{
70437113
.return_type = lhs_ty.toIntern(),
70447114
.param_types = &.{ lhs_ty.toIntern(), rhs_ty.toIntern() },
@@ -7259,15 +7329,15 @@ fn genBinOp(
72597329
abi.RegisterClass.gp,
72607330
);
72617331
const dst_regs_locks = self.register_manager.lockRegs(2, dst_regs);
7262-
for (dst_regs_locks) |dst_lock| if (dst_lock) |lock|
7332+
defer for (dst_regs_locks) |dst_lock| if (dst_lock) |lock|
72637333
self.register_manager.unlockReg(lock);
72647334

72657335
try self.genCopy(lhs_ty, .{ .register_pair = dst_regs }, dst_mcv);
72667336
break :dst dst_regs;
72677337
},
72687338
};
72697339
const dst_regs_locks = self.register_manager.lockRegs(2, dst_regs);
7270-
for (dst_regs_locks) |dst_lock| if (dst_lock) |lock|
7340+
defer for (dst_regs_locks) |dst_lock| if (dst_lock) |lock|
72717341
self.register_manager.unlockReg(lock);
72727342

72737343
const tmp_reg = try self.register_manager.allocReg(null, abi.RegisterClass.gp);

test/behavior/eval.zig

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -515,12 +515,12 @@ const foo_ref = &foo_contents;
515515

516516
test "runtime 128 bit integer division" {
517517
if (builtin.zig_backend == .stage2_wasm) return error.SkipZigTest; // TODO
518-
if (builtin.zig_backend == .stage2_x86_64) return error.SkipZigTest; // TODO
519518
if (builtin.zig_backend == .stage2_aarch64) return error.SkipZigTest; // TODO
520519
if (builtin.zig_backend == .stage2_arm) return error.SkipZigTest; // TODO
521520
if (builtin.zig_backend == .stage2_sparc64) return error.SkipZigTest; // TODO
522521
if (builtin.zig_backend == .stage2_spirv64) return error.SkipZigTest;
523522
if (builtin.zig_backend == .stage2_c and comptime builtin.cpu.arch.isArmOrThumb()) return error.SkipZigTest;
523+
if (builtin.zig_backend == .stage2_x86_64 and builtin.target.ofmt != .elf) return error.SkipZigTest;
524524

525525
var a: u128 = 152313999999999991610955792383;
526526
var b: u128 = 10000000000000000000;

0 commit comments

Comments
 (0)