Skip to content

arc: Use intrinsics for __builtin_mul_overflow () #134

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 6 commits into
base: arc-2024.12
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions gcc/config/arc/arc-modes.def
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ along with GCC; see the file COPYING3. If not see

CC_MODE (CC_ZN);
CC_MODE (CC_Z);
CC_MODE (CC_V);
CC_MODE (CC_C);
CC_MODE (CC_FP_GT);
CC_MODE (CC_FP_GE);
Expand Down
1 change: 1 addition & 0 deletions gcc/config/arc/arc-protos.h
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@ extern bool arc_check_mov_const (HOST_WIDE_INT );
extern bool arc_split_mov_const (rtx *);
extern bool arc_can_use_return_insn (void);
extern bool arc_split_move_p (rtx *);
extern void arc_gen_unlikely_cbranch (enum rtx_code, machine_mode, rtx);
#endif /* RTX_CODE */


Expand Down
33 changes: 32 additions & 1 deletion gcc/config/arc/arc.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1438,6 +1438,13 @@ get_arc_condition_code (rtx comparison)
case GEU : return ARC_CC_NC;
default : gcc_unreachable ();
}
case E_CC_Vmode:
switch (GET_CODE (comparison))
{
case EQ : return ARC_CC_NV;
case NE : return ARC_CC_V;
default : gcc_unreachable ();
}
case E_CC_FP_GTmode:
if (TARGET_ARGONAUT_SET && TARGET_SPFP)
switch (GET_CODE (comparison))
Expand Down Expand Up @@ -1548,6 +1555,13 @@ arc_select_cc_mode (enum rtx_code op, rtx x, rtx y)
machine_mode mode = GET_MODE (x);
rtx x1;

/* Matches all instructions which can do .f and clobbers only Z flag. */
if (GET_MODE_CLASS (mode) == MODE_INT
&& y == const0_rtx
&& GET_CODE (x) == MULT
&& (op == EQ || op == NE))
return CC_Zmode;

/* For an operation that sets the condition codes as a side-effect, the
C and V flags is not set as for cmp, so we can only use comparisons where
this doesn't matter. (For LT and GE we can use "mi" and "pl"
Expand Down Expand Up @@ -1768,7 +1782,7 @@ arc_init_reg_tables (void)
/* mode_class hasn't been initialized yet for EXTRA_CC_MODES, so
we must explicitly check for them here. */
if (i == (int) CCmode || i == (int) CC_ZNmode || i == (int) CC_Zmode
|| i == (int) CC_Cmode
|| i == (int) CC_Cmode || i == (int) CC_Vmode
|| i == CC_FP_GTmode || i == CC_FP_GEmode || i == CC_FP_ORDmode
|| i == CC_FPUmode || i == CC_FPUEmode || i == CC_FPU_UNEQmode)
arc_mode_class[i] = 1 << (int) C_MODE;
Expand Down Expand Up @@ -11554,6 +11568,23 @@ arc_libm_function_max_error (unsigned cfn, machine_mode mode,
return default_libm_function_max_error (cfn, mode, boundary_p);
}

/* Generate RTL for conditional branch with rtx comparison CODE in mode
CC_MODE. */

void
arc_gen_unlikely_cbranch (enum rtx_code cmp, machine_mode cc_mode, rtx label)
{
rtx cc_reg, x;

cc_reg = gen_rtx_REG (cc_mode, CC_REG);
label = gen_rtx_LABEL_REF (VOIDmode, label);

x = gen_rtx_fmt_ee (cmp, VOIDmode, cc_reg, const0_rtx);
x = gen_rtx_IF_THEN_ELSE (VOIDmode, x, label, pc_rtx);

emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
}

#undef TARGET_USE_ANCHORS_FOR_SYMBOL_P
#define TARGET_USE_ANCHORS_FOR_SYMBOL_P arc_use_anchors_for_symbol_p

Expand Down
164 changes: 160 additions & 4 deletions gcc/config/arc/arc.md
Original file line number Diff line number Diff line change
Expand Up @@ -253,7 +253,7 @@
simd_vcompare, simd_vpermute, simd_vpack, simd_vpack_with_acc,
simd_valign, simd_valign_with_acc, simd_vcontrol,
simd_vspecial_3cycle, simd_vspecial_4cycle, simd_dma, mul16_em, div_rem,
fpu, fpu_fuse, fpu_sdiv, fpu_ddiv, fpu_cvt, block"
fpu, fpu_fuse, fpu_sdiv, fpu_ddiv, fpu_cvt, block, mpy"
(cond [(eq_attr "is_sfunc" "yes")
(cond [(match_test "!TARGET_LONG_CALLS_SET && (!TARGET_MEDIUM_CALLS || GET_CODE (PATTERN (insn)) != COND_EXEC)") (const_string "call")
(match_test "flag_pic") (const_string "sfunc")]
Expand Down Expand Up @@ -842,6 +842,9 @@ archs4x, archs4xd"
; Optab prefix for sign/zero-extending operations
(define_code_attr su_optab [(sign_extend "") (zero_extend "u")])

;; Code iterator for sign/zero extension
(define_code_iterator ANY_EXTEND [sign_extend zero_extend])

(define_insn "*<SEZ_prefix>xt<SQH_postfix>_cmp0_noout"
[(set (match_operand 0 "cc_set_register" "")
(compare:CC_ZN (SEZ:SI (match_operand:SQH 1 "register_operand" "r"))
Expand Down Expand Up @@ -1068,11 +1071,67 @@ archs4x, archs4xd"
(set_attr "cond" "set_zn")
(set_attr "length" "*,4,4,4,8")])

;; The next two patterns are for plos, ior, xor, and, and mult.
(define_expand "<su_optab>mulvsi4"
[(ANY_EXTEND:DI (match_operand:SI 0 "register_operand"))
(ANY_EXTEND:DI (match_operand:SI 1 "register_operand"))
(ANY_EXTEND:DI (match_operand:SI 2 "register_operand"))
(label_ref (match_operand 3 "" ""))]
"TARGET_MPY"
{
emit_insn (gen_<su_optab>mulsi3_Vcmp (operands[0], operands[1],
operands[2]));
arc_gen_unlikely_cbranch (NE, CC_Vmode, operands[3]);
DONE;
})

(define_insn "<su_optab>mulsi3_Vcmp"
[(parallel
[(set
(reg:CC_V CC_REG)
(compare:CC_V
(mult:DI
(ANY_EXTEND:DI (match_operand:SI 1 "register_operand" "%0,r,r,r"))
(ANY_EXTEND:DI (match_operand:SI 2 "nonmemory_operand" "I,L,r,C32")))
(ANY_EXTEND:DI (mult:SI (match_dup 1) (match_dup 2)))))
(set (match_operand:SI 0 "register_operand" "=r,r,r,r")
(mult:SI (match_dup 1) (match_dup 2)))])]
"register_operand (operands[1], SImode)
|| register_operand (operands[2], SImode)"
"mpy<su_optab>.f\\t%0,%1,%2"
[(set_attr "length" "4,4,4,8")
(set_attr "type" "mpy")])

(define_insn "*mulsi3_cmp0"
[(set (reg:CC_Z CC_REG)
(compare:CC_Z
(mult:SI
(match_operand:SI 1 "register_operand" "%r,0,r")
(match_operand:SI 2 "nonmemory_operand" "rL,I,i"))
(const_int 0)))
(set (match_operand:SI 0 "register_operand" "=r,r,r")
(mult:SI (match_dup 1) (match_dup 2)))]
"TARGET_MPY"
"mpy%?.f\\t%0,%1,%2"
[(set_attr "length" "4,4,8")
(set_attr "type" "mpy")])

(define_insn "*mulsi3_cmp0_noout"
[(set (reg:CC_Z CC_REG)
(compare:CC_Z
(mult:SI
(match_operand:SI 0 "register_operand" "%r,r,r")
(match_operand:SI 1 "nonmemory_operand" "rL,I,i"))
(const_int 0)))]
"TARGET_MPY"
"mpy%?.f\\t0,%0,%1"
[(set_attr "length" "4,4,8")
(set_attr "type" "mpy")])

;; The next two patterns are for plus, ior, xor, and.
(define_insn "*commutative_binary_cmp0_noout"
[(set (match_operand 0 "cc_set_register" "")
(match_operator 4 "zn_compare_operator"
[(match_operator:SI 3 "commutative_operator"
[(match_operator:SI 3 "commutative_operator_sans_mult"
[(match_operand:SI 1 "register_operand" "%r,r")
(match_operand:SI 2 "nonmemory_operand" "rL,Cal")])
(const_int 0)]))]
Expand All @@ -1085,7 +1144,7 @@ archs4x, archs4xd"
(define_insn "*commutative_binary_cmp0"
[(set (match_operand 3 "cc_set_register" "")
(match_operator 5 "zn_compare_operator"
[(match_operator:SI 4 "commutative_operator"
[(match_operator:SI 4 "commutative_operator_sans_mult"
[(match_operand:SI 1 "register_operand" "%0, 0,r,r")
(match_operand:SI 2 "nonmemory_operand" "rL,rI,r,Cal")])
(const_int 0)]))
Expand Down Expand Up @@ -2679,6 +2738,55 @@ archs4x, archs4xd"
operands[2] = force_reg (SImode, operands[2]);
})

(define_insn "addsi3_v"
[(set (match_operand:SI 0 "register_operand" "=r,r,r, r")
(plus:SI (match_operand:SI 1 "register_operand" "r,r,0, r")
(match_operand:SI 2 "nonmemory_operand" "r,L,I,C32")))
(set (reg:CC_V CC_REG)
(compare:CC_V (sign_extend:DI (plus:SI (match_dup 1)
(match_dup 2)))
(plus:DI (sign_extend:DI (match_dup 1))
(sign_extend:DI (match_dup 2)))))]
""
"add.f\\t%0,%1,%2"
[(set_attr "cond" "set")
(set_attr "type" "compare")
(set_attr "length" "4,4,4,8")])

(define_expand "addvsi4"
[(match_operand:SI 0 "register_operand")
(match_operand:SI 1 "register_operand")
(match_operand:SI 2 "nonmemory_operand")
(label_ref (match_operand 3 "" ""))]
""
"emit_insn (gen_addsi3_v (operands[0], operands[1], operands[2]));
arc_gen_unlikely_cbranch (NE, CC_Vmode, operands[3]);
DONE;")

(define_insn "addsi3_c"
[(set (match_operand:SI 0 "register_operand" "=r,r,r, r")
(plus:SI (match_operand:SI 1 "register_operand" "r,r,0, r")
(match_operand:SI 2 "nonmemory_operand" "r,L,I,C32")))
(set (reg:CC_C CC_REG)
(compare:CC_C (plus:SI (match_dup 1)
(match_dup 2))
(match_dup 1)))]
""
"add.f\\t%0,%1,%2"
[(set_attr "cond" "set")
(set_attr "type" "compare")
(set_attr "length" "4,4,4,8")])

(define_expand "uaddvsi4"
[(match_operand:SI 0 "register_operand")
(match_operand:SI 1 "register_operand")
(match_operand:SI 2 "nonmemory_operand")
(label_ref (match_operand 3 "" ""))]
""
"emit_insn (gen_addsi3_c (operands[0], operands[1], operands[2]));
arc_gen_unlikely_cbranch (LTU, CC_Cmode, operands[3]);
DONE;")

(define_expand "adddi3"
[(parallel
[(set (match_operand:DI 0 "register_operand" "")
Expand Down Expand Up @@ -2914,6 +3022,54 @@ archs4x, archs4xd"
(set_attr "cpu_facility" "*,cd,*,*,*,*,*,*,*,*")
])

(define_insn "subsi3_v"
[(set (match_operand:SI 0 "register_operand" "=r,r,r, r")
(minus:SI (match_operand:SI 1 "register_operand" "r,r,0, r")
(match_operand:SI 2 "nonmemory_operand" "r,L,I,C32")))
(set (reg:CC_V CC_REG)
(compare:CC_V (sign_extend:DI (minus:SI (match_dup 1)
(match_dup 2)))
(minus:DI (sign_extend:DI (match_dup 1))
(sign_extend:DI (match_dup 2)))))]
""
"sub.f\\t%0,%1,%2"
[(set_attr "cond" "set")
(set_attr "type" "compare")
(set_attr "length" "4,4,4,8")])

(define_expand "subvsi4"
[(match_operand:SI 0 "register_operand")
(match_operand:SI 1 "register_operand")
(match_operand:SI 2 "nonmemory_operand")
(label_ref (match_operand 3 "" ""))]
""
"emit_insn (gen_subsi3_v (operands[0], operands[1], operands[2]));
arc_gen_unlikely_cbranch (NE, CC_Vmode, operands[3]);
DONE;")

(define_insn "subsi3_c"
[(set (match_operand:SI 0 "register_operand" "=r,r,r, r")
(minus:SI (match_operand:SI 1 "register_operand" "r,r,0, r")
(match_operand:SI 2 "nonmemory_operand" "r,L,I,C32")))
(set (reg:CC_C CC_REG)
(compare:CC_C (match_dup 1)
(match_dup 2)))]
""
"sub.f\\t%0,%1,%2"
[(set_attr "cond" "set")
(set_attr "type" "compare")
(set_attr "length" "4,4,4,8")])

(define_expand "usubvsi4"
[(match_operand:SI 0 "register_operand")
(match_operand:SI 1 "register_operand")
(match_operand:SI 2 "nonmemory_operand")
(label_ref (match_operand 3 "" ""))]
""
"emit_insn (gen_subsi3_c (operands[0], operands[1], operands[2]));
arc_gen_unlikely_cbranch (LTU, CC_Cmode, operands[3]);
DONE;")

(define_expand "subdi3"
[(set (match_operand:DI 0 "register_operand" "")
(minus:DI (match_operand:DI 1 "register_operand" "")
Expand Down
14 changes: 11 additions & 3 deletions gcc/config/arc/predicates.md
Original file line number Diff line number Diff line change
Expand Up @@ -419,6 +419,8 @@
return code == EQ || code == NE;
case E_CC_Cmode:
return code == LTU || code == GEU;
case E_CC_Vmode:
return code == EQ || code == NE;
case E_CC_FP_GTmode:
return code == GT || code == UNLE;
case E_CC_FP_GEmode:
Expand Down Expand Up @@ -451,7 +453,12 @@
})

(define_predicate "equality_comparison_operator"
(match_code "eq, ne"))
(match_code "eq, ne")
{
machine_mode opmode = GET_MODE (XEXP (op, 0));
return opmode != CC_Vmode;
}
)

(define_predicate "ge_lt_comparison_operator"
(match_code "ge, lt"))
Expand Down Expand Up @@ -504,7 +511,8 @@
|| (mode == CC_ZNmode && rmode == CC_Zmode)
|| (mode == CCmode && rmode == CC_Zmode)
|| (mode == CCmode && rmode == CC_ZNmode)
|| (mode == CCmode && rmode == CC_Cmode))
|| (mode == CCmode && rmode == CC_Cmode)
|| (mode == CCmode && rmode == CC_Vmode))
return TRUE;

return FALSE;
Expand All @@ -524,7 +532,7 @@
if (GET_MODE (op) == CC_ZNmode)
return 1;
/* Fall through. */
case E_CC_ZNmode: case E_CC_Cmode:
case E_CC_ZNmode: case E_CC_Cmode: case E_CC_Vmode:
return GET_MODE (op) == CCmode;
default:
gcc_unreachable ();
Expand Down
15 changes: 10 additions & 5 deletions gcc/fwprop.cc
Original file line number Diff line number Diff line change
Expand Up @@ -180,7 +180,7 @@ namespace

bool changed_mem_p () const { return result_flags & CHANGED_MEM; }
bool folded_to_constants_p () const;
bool profitable_p () const;
bool likely_profitable_p () const;

bool check_mem (int, rtx) final override;
void note_simplification (int, uint16_t, rtx, rtx) final override;
Expand Down Expand Up @@ -323,7 +323,7 @@ fwprop_propagation::folded_to_constants_p () const
false if it would increase the complexity of the pattern too much. */

bool
fwprop_propagation::profitable_p () const
fwprop_propagation::likely_profitable_p () const
{
if (changed_mem_p ())
return true;
Expand Down Expand Up @@ -398,7 +398,7 @@ try_fwprop_subst_note (insn_info *use_insn, set_info *def,
}
else
{
if (!prop.folded_to_constants_p () && !prop.profitable_p ())
if (!prop.folded_to_constants_p () && !prop.likely_profitable_p ())
{
if (dump_file && (dump_flags & TDF_DETAILS))
fprintf (dump_file, "cannot propagate from insn %d into"
Expand Down Expand Up @@ -449,7 +449,11 @@ try_fwprop_subst_pattern (obstack_watermark &attempt, insn_change &use_change,
if (prop.num_replacements == 0)
return false;

if (!prop.profitable_p ())
if (!prop.likely_profitable_p ()
&& (prop.changed_mem_p ()
|| contains_mem_rtx_p (src)
|| use_insn->is_asm ()
|| !single_set (use_rtl)))
{
if (dump_file && (dump_flags & TDF_DETAILS))
fprintf (dump_file, "cannot propagate from insn %d into"
Expand Down Expand Up @@ -481,7 +485,8 @@ try_fwprop_subst_pattern (obstack_watermark &attempt, insn_change &use_change,
redo_changes (0);
auto new_cost = set_src_cost (SET_SRC (use_set),
GET_MODE (SET_DEST (use_set)), speed);
if (new_cost > old_cost)
if (new_cost > old_cost
|| (new_cost == old_cost && !prop.likely_profitable_p ()))
{
if (dump_file)
fprintf (dump_file, "change not profitable"
Expand Down
Loading