Skip to content

Commit ee538a0

Browse files
committed
wip
1 parent f285275 commit ee538a0

File tree

2 files changed

+122
-5
lines changed

2 files changed

+122
-5
lines changed

cranelift/codegen/src/isa/riscv64/inst.isle

Lines changed: 20 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2871,9 +2871,9 @@
28712871
;; zero-extension for the `Value` input.
28722872
(decl put_value_in_reg_for_icmp (IntCC Value) XReg)
28732873

2874-
(decl madd_overflow64 (XReg XReg XReg) ValueRegs)
2874+
(decl umadd_overflow64 (XReg XReg XReg) ValueRegs)
28752875

2876-
(rule (madd_overflow64 x y z)
2876+
(rule (umadd_overflow64 x y z)
28772877
(let ((one XReg (imm $I8 1))
28782878
(hi XReg (rv_mulhu x y))
28792879
(m XReg (rv_mul x y))
@@ -2883,6 +2883,24 @@
28832883
(of XReg (rv_or of_mul of_add)))
28842884
(value_regs sum of)))
28852885

2886+
(decl smadd_overflow64 (XReg XReg XReg) ValueRegs)
2887+
2888+
(rule (smadd_overflow64 x y z)
2889+
(let ((mul_lo XReg (rv_mul x y))
2890+
(mul_hi XReg (rv_mulh x y))
2891+
(sign_mul_lo XReg (rv_srai mul_lo (imm12_const 63)))
2892+
(sign_mul_hi XReg (rv_srai mul_hi (imm12_const 63)))
2893+
(mul_overflow XReg (rv_xor sign_mul_lo sign_mul_hi))
2894+
2895+
(sum XReg (rv_add mul_lo z))
2896+
(sign_sum XReg (rv_srai sum (imm12_const 63)))
2897+
(sign_z XReg (rv_srai z (imm12_const 63)))
2898+
(same_signs_add XReg (rv_xor sign_mul_lo sign_z))
2899+
(diff_sign_add XReg (rv_xor sign_mul_lo sign_sum))
2900+
(add_overflow XReg (rv_and diff_sign_add (rv_xnor same_signs_add (zero_reg))))
2901+
2902+
(overflow XReg (rv_or mul_overflow add_overflow)))
2903+
(value_regs sum overflow)))
28862904

28872905
;; Base cases, use the `cc` to determine whether to zero or sign extend.
28882906
(rule 0 (put_value_in_reg_for_icmp cc val)

cranelift/codegen/src/isa/riscv64/lower.isle

Lines changed: 102 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -364,7 +364,6 @@
364364
(value_reg res)
365365
(value_reg of))))
366366

367-
;; TODO
368367
(rule 2 (lower (has_type $I128 (umul_overflow x y)))
369368
(let
370369
((x_regs ValueRegs x)
@@ -387,8 +386,8 @@
387386
;; madd dst_lo, x_lo, y_lo, zero
388387
(dst_hi1 XReg (rv_mulhu x_lo y_lo))
389388
(one XReg (imm $I32 1))
390-
(dst_hi2 ValueRegs (madd_overflow64 x_lo y_hi (value_regs_get dst_hi1 0)))
391-
(dst_hi ValueRegs (madd_overflow64 x_hi y_lo (value_regs_get dst_hi2 0)))
389+
(dst_hi2 ValueRegs (umadd_overflow64 x_lo y_hi (value_regs_get dst_hi1 0)))
390+
(dst_hi ValueRegs (umadd_overflow64 x_hi y_lo (value_regs_get dst_hi2 0)))
392391
(dst_lo XReg (madd x_lo y_lo (zero_reg)))
393392

394393
(of_res1 XReg (rv_or (value_regs_get dst_hi1 1) (value_regs_get dst_hi2 1)))
@@ -397,6 +396,106 @@
397396
(value_regs dst_lo (value_regs_get dst_hi 0))
398397
(value_reg of_res))))
399398

399+
;;; Rules for `smul_overflow` ;;;;;;;;;;;;;
400+
401+
(rule 0 (lower (has_type (fits_in_32 ty) (smul_overflow x y)))
402+
(let ((tmp_x XReg (sext x))
403+
(tmp_y XReg (sext y))
404+
(res XReg (rv_mul tmp_x tmp_y))
405+
(hi XReg (rv_srai res (imm12_const (ty_bits ty))))
406+
(one XReg (imm $I8 1))
407+
(of XReg (gen_select_xreg (cmp_eqz hi) (zero_reg) one)))
408+
(output_pair
409+
(value_reg res)
410+
(value_reg of))))
411+
412+
(rule 1 (lower (has_type $I64 (smul_overflow x y)))
413+
(let ((hi XReg (rv_mulh x y))
414+
(res XReg (rv_mul x y))
415+
(one XReg (imm $I8 1))
416+
(of XReg (gen_select_xreg (cmp_eqz hi) (zero_reg) one)))
417+
(output_pair
418+
(value_reg res)
419+
(value_reg of))))
420+
421+
(rule 2 (lower (has_type $I128 (smul_overflow x y)))
422+
(let
423+
((x_regs ValueRegs x)
424+
(x_lo XReg (value_regs_get x_regs 0))
425+
(x_hi XReg (value_regs_get x_regs 1))
426+
427+
;; Get the high/low registers for `y`.
428+
(y_regs ValueRegs y)
429+
(y_lo XReg (value_regs_get y_regs 0))
430+
(y_hi XReg (value_regs_get y_regs 1))
431+
432+
;; 128bit mul formula:
433+
;; dst_lo = x_lo * y_lo
434+
;; dst_hi = mulhu(x_lo, y_lo) + (x_lo * y_hi) + (x_hi * y_lo)
435+
;;
436+
;; We can convert the above formula into the following
437+
;; mulhu dst_hi, x_lo, y_lo
438+
;; madd dst_hi, x_lo, y_hi, dst_hi
439+
;; madd dst_hi, x_hi, y_lo, dst_hi
440+
;; madd dst_lo, x_lo, y_lo, zero
441+
(dst_hi1 XReg (rv_mulhu x_lo y_lo))
442+
(one XReg (imm $I32 1))
443+
(dst_hi2 ValueRegs (smadd_overflow64 x_lo y_hi (value_regs_get dst_hi1 0)))
444+
(dst_hi ValueRegs (smadd_overflow64 x_hi y_lo (value_regs_get dst_hi2 0)))
445+
(dst_lo XReg (madd x_lo y_lo (zero_reg)))
446+
447+
(of_res1 XReg (rv_or (value_regs_get dst_hi1 1) (value_regs_get dst_hi2 1)))
448+
(of_res XReg (rv_or of_res1 (value_regs_get dst_hi 1))))
449+
(output_pair
450+
(value_regs dst_lo (value_regs_get dst_hi 0))
451+
(value_reg of_res))))
452+
453+
;;; Rules for `sadd_overflow` ;;;;;;;;;;;;;
454+
455+
(rule 0 (lower (has_type (fits_in_32 ty) (sadd_overflow x y)))
456+
(let ((tmp_x XReg (sext x))
457+
(tmp_y XReg (sext y))
458+
(one XReg (imm $I8 1))
459+
(sum XReg (rv_add tmp_x tmp_y))
460+
(sign_x XReg (rv_srai tmp_x (imm12_const (ty_bits ty))))
461+
(sign_y XReg (rv_srai tmp_y (imm12_const (ty_bits ty))))
462+
(sign_sum XReg (rv_srai sum (imm12_const (ty_bits ty))))
463+
(same_signs XReg (rv_xor sign_x sign_y))
464+
(diff_sign XReg (rv_xor sign_x sign_sum))
465+
(overflow XReg (rv_and diff_sign (rv_xnor same_signs (zero_reg)))))
466+
(output_pair
467+
(value_reg sum)
468+
(value_reg overflow))))
469+
470+
(rule 1 (lower (has_type $I64 (sadd_overflow x y)))
471+
(let ((sum XReg (rv_add x y))
472+
(one XReg (imm $I8 1))
473+
(sign_x XReg (rv_srai x (imm12_const 63)))
474+
(sign_y XReg (rv_srai y (imm12_const 63)))
475+
(sign_sum XReg (rv_srai sum (imm12_const 63)))
476+
(same_signs XReg (rv_xor sign_x sign_y))
477+
(diff_sign XReg (rv_xor sign_x sign_sum))
478+
(overflow XReg (rv_and diff_sign (rv_xnor same_signs (zero_reg)))))
479+
(output_pair
480+
(value_reg sum)
481+
(value_reg overflow))))
482+
483+
(rule 2 (lower (has_type $I128 (sadd_overflow x y)))
484+
(let ((one XReg (imm $I8 1))
485+
(low XReg (rv_add (value_regs_get x 0) (value_regs_get y 0)))
486+
(carry XReg (rv_slt low (value_regs_get y 0)))
487+
(high_tmp XReg (rv_add (value_regs_get x 1) (value_regs_get y 1)))
488+
(high XReg (rv_add high_tmp carry))
489+
(sign_x XReg (rv_srai (value_regs_get x 1) (imm12_const 63)))
490+
(sign_y XReg (rv_srai (value_regs_get y 1) (imm12_const 63)))
491+
(sign_sum XReg (rv_srai high (imm12_const 63)))
492+
(same_signs XReg (rv_xor sign_x sign_y))
493+
(diff_sign XReg (rv_xor sign_x sign_sum))
494+
(overflow XReg (rv_and diff_sign (rv_xnor same_signs (zero_reg)))))
495+
(output_pair
496+
(value_regs low high)
497+
(value_reg overflow))))
498+
400499
;;;; Rules for `isub` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
401500
;; Base case, simply subtracting things in registers.
402501

0 commit comments

Comments
 (0)