|
364 | 364 | (value_reg res)
|
365 | 365 | (value_reg of))))
|
366 | 366 |
|
367 |
| -;; TODO |
368 | 367 | (rule 2 (lower (has_type $I128 (umul_overflow x y)))
|
369 | 368 | (let
|
370 | 369 | ((x_regs ValueRegs x)
|
|
387 | 386 | ;; madd dst_lo, x_lo, y_lo, zero
|
388 | 387 | (dst_hi1 XReg (rv_mulhu x_lo y_lo))
|
389 | 388 | (one XReg (imm $I32 1))
|
390 |
| - (dst_hi2 ValueRegs (madd_overflow64 x_lo y_hi (value_regs_get dst_hi1 0))) |
391 |
| - (dst_hi ValueRegs (madd_overflow64 x_hi y_lo (value_regs_get dst_hi2 0))) |
| 389 | + (dst_hi2 ValueRegs (umadd_overflow64 x_lo y_hi (value_regs_get dst_hi1 0))) |
| 390 | + (dst_hi ValueRegs (umadd_overflow64 x_hi y_lo (value_regs_get dst_hi2 0))) |
392 | 391 | (dst_lo XReg (madd x_lo y_lo (zero_reg)))
|
393 | 392 |
|
394 | 393 | (of_res1 XReg (rv_or (value_regs_get dst_hi1 1) (value_regs_get dst_hi2 1)))
|
|
397 | 396 | (value_regs dst_lo (value_regs_get dst_hi 0))
|
398 | 397 | (value_reg of_res))))
|
399 | 398 |
|
| 399 | +;;; Rules for `smul_overflow` ;;;;;;;;;;;;; |
| 400 | + |
| 401 | +(rule 0 (lower (has_type (fits_in_32 ty) (smul_overflow x y))) |
| 402 | + (let ((tmp_x XReg (sext x)) |
| 403 | + (tmp_y XReg (sext y)) |
| 404 | + (res XReg (rv_mul tmp_x tmp_y)) |
| 405 | + (hi XReg (rv_srai res (imm12_const (ty_bits ty)))) |
| 406 | + (one XReg (imm $I8 1)) |
| 407 | + (of XReg (gen_select_xreg (cmp_eqz hi) (zero_reg) one))) |
| 408 | + (output_pair |
| 409 | + (value_reg res) |
| 410 | + (value_reg of)))) |
| 411 | + |
| 412 | +(rule 1 (lower (has_type $I64 (smul_overflow x y))) |
| 413 | + (let ((hi XReg (rv_mulh x y)) |
| 414 | + (res XReg (rv_mul x y)) |
| 415 | + (one XReg (imm $I8 1)) |
| 416 | + (of XReg (gen_select_xreg (cmp_eqz hi) (zero_reg) one))) |
| 417 | + (output_pair |
| 418 | + (value_reg res) |
| 419 | + (value_reg of)))) |
| 420 | + |
| 421 | +(rule 2 (lower (has_type $I128 (smul_overflow x y))) |
| 422 | + (let |
| 423 | + ((x_regs ValueRegs x) |
| 424 | + (x_lo XReg (value_regs_get x_regs 0)) |
| 425 | + (x_hi XReg (value_regs_get x_regs 1)) |
| 426 | + |
| 427 | + ;; Get the high/low registers for `y`. |
| 428 | + (y_regs ValueRegs y) |
| 429 | + (y_lo XReg (value_regs_get y_regs 0)) |
| 430 | + (y_hi XReg (value_regs_get y_regs 1)) |
| 431 | + |
| 432 | + ;; 128bit mul formula: |
| 433 | + ;; dst_lo = x_lo * y_lo |
| 434 | + ;; dst_hi = mulhu(x_lo, y_lo) + (x_lo * y_hi) + (x_hi * y_lo) |
| 435 | + ;; |
| 436 | + ;; We can convert the above formula into the following |
| 437 | + ;; mulhu dst_hi, x_lo, y_lo |
| 438 | + ;; madd dst_hi, x_lo, y_hi, dst_hi |
| 439 | + ;; madd dst_hi, x_hi, y_lo, dst_hi |
| 440 | + ;; madd dst_lo, x_lo, y_lo, zero |
| 441 | + (dst_hi1 XReg (rv_mulhu x_lo y_lo)) |
| 442 | + (one XReg (imm $I32 1)) |
| 443 | + (dst_hi2 ValueRegs (smadd_overflow64 x_lo y_hi (value_regs_get dst_hi1 0))) |
| 444 | + (dst_hi ValueRegs (smadd_overflow64 x_hi y_lo (value_regs_get dst_hi2 0))) |
| 445 | + (dst_lo XReg (madd x_lo y_lo (zero_reg))) |
| 446 | + |
| 447 | + (of_res1 XReg (rv_or (value_regs_get dst_hi1 1) (value_regs_get dst_hi2 1))) |
| 448 | + (of_res XReg (rv_or of_res1 (value_regs_get dst_hi 1)))) |
| 449 | + (output_pair |
| 450 | + (value_regs dst_lo (value_regs_get dst_hi 0)) |
| 451 | + (value_reg of_res)))) |
| 452 | + |
| 453 | +;;; Rules for `sadd_overflow` ;;;;;;;;;;;;; |
| 454 | + |
| 455 | +(rule 0 (lower (has_type (fits_in_32 ty) (sadd_overflow x y))) |
| 456 | + (let ((tmp_x XReg (sext x)) |
| 457 | + (tmp_y XReg (sext y)) |
| 458 | + (one XReg (imm $I8 1)) |
| 459 | + (sum XReg (rv_add tmp_x tmp_y)) |
| 460 | + (sign_x XReg (rv_srai tmp_x (imm12_const (ty_bits ty)))) |
| 461 | + (sign_y XReg (rv_srai tmp_y (imm12_const (ty_bits ty)))) |
| 462 | + (sign_sum XReg (rv_srai sum (imm12_const (ty_bits ty)))) |
| 463 | + (same_signs XReg (rv_xor sign_x sign_y)) |
| 464 | + (diff_sign XReg (rv_xor sign_x sign_sum)) |
| 465 | + (overflow XReg (rv_and diff_sign (rv_xnor same_signs (zero_reg))))) |
| 466 | + (output_pair |
| 467 | + (value_reg sum) |
| 468 | + (value_reg overflow)))) |
| 469 | + |
| 470 | +(rule 1 (lower (has_type $I64 (sadd_overflow x y))) |
| 471 | + (let ((sum XReg (rv_add x y)) |
| 472 | + (one XReg (imm $I8 1)) |
| 473 | + (sign_x XReg (rv_srai x (imm12_const 63))) |
| 474 | + (sign_y XReg (rv_srai y (imm12_const 63))) |
| 475 | + (sign_sum XReg (rv_srai sum (imm12_const 63))) |
| 476 | + (same_signs XReg (rv_xor sign_x sign_y)) |
| 477 | + (diff_sign XReg (rv_xor sign_x sign_sum)) |
| 478 | + (overflow XReg (rv_and diff_sign (rv_xnor same_signs (zero_reg))))) |
| 479 | + (output_pair |
| 480 | + (value_reg sum) |
| 481 | + (value_reg overflow)))) |
| 482 | + |
| 483 | +(rule 2 (lower (has_type $I128 (sadd_overflow x y))) |
| 484 | + (let ((one XReg (imm $I8 1)) |
| 485 | + (low XReg (rv_add (value_regs_get x 0) (value_regs_get y 0))) |
| 486 | + (carry XReg (rv_slt low (value_regs_get y 0))) |
| 487 | + (high_tmp XReg (rv_add (value_regs_get x 1) (value_regs_get y 1))) |
| 488 | + (high XReg (rv_add high_tmp carry)) |
| 489 | + (sign_x XReg (rv_srai (value_regs_get x 1) (imm12_const 63))) |
| 490 | + (sign_y XReg (rv_srai (value_regs_get y 1) (imm12_const 63))) |
| 491 | + (sign_sum XReg (rv_srai high (imm12_const 63))) |
| 492 | + (same_signs XReg (rv_xor sign_x sign_y)) |
| 493 | + (diff_sign XReg (rv_xor sign_x sign_sum)) |
| 494 | + (overflow XReg (rv_and diff_sign (rv_xnor same_signs (zero_reg))))) |
| 495 | + (output_pair |
| 496 | + (value_regs low high) |
| 497 | + (value_reg overflow)))) |
| 498 | + |
400 | 499 | ;;;; Rules for `isub` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
401 | 500 | ;; Base case, simply subtracting things in registers.
|
402 | 501 |
|
|
0 commit comments