From 9f95d5c217f2d12d8228c7ee2a869252ea7b68f9 Mon Sep 17 00:00:00 2001 From: Luis Silva Date: Tue, 17 Sep 2024 13:42:30 +0100 Subject: [PATCH 1/6] Revert "Revert "PR rtl-optimization/111267: Improved forward propagation."" This reverts commit 8a4f766221bf9cf019d6b416fa25294def891363. Signed-off-by: Luis Silva --- gcc/fwprop.cc | 15 ++++++++++----- gcc/testsuite/gcc.target/i386/pr111267.c | 14 ++++++++++++++ 2 files changed, 24 insertions(+), 5 deletions(-) create mode 100644 gcc/testsuite/gcc.target/i386/pr111267.c diff --git a/gcc/fwprop.cc b/gcc/fwprop.cc index 11a3dfc9546d..de543923b92f 100644 --- a/gcc/fwprop.cc +++ b/gcc/fwprop.cc @@ -180,7 +180,7 @@ namespace bool changed_mem_p () const { return result_flags & CHANGED_MEM; } bool folded_to_constants_p () const; - bool profitable_p () const; + bool likely_profitable_p () const; bool check_mem (int, rtx) final override; void note_simplification (int, uint16_t, rtx, rtx) final override; @@ -323,7 +323,7 @@ fwprop_propagation::folded_to_constants_p () const false if it would increase the complexity of the pattern too much. */ bool -fwprop_propagation::profitable_p () const +fwprop_propagation::likely_profitable_p () const { if (changed_mem_p ()) return true; @@ -398,7 +398,7 @@ try_fwprop_subst_note (insn_info *use_insn, set_info *def, } else { - if (!prop.folded_to_constants_p () && !prop.profitable_p ()) + if (!prop.folded_to_constants_p () && !prop.likely_profitable_p ()) { if (dump_file && (dump_flags & TDF_DETAILS)) fprintf (dump_file, "cannot propagate from insn %d into" @@ -449,7 +449,11 @@ try_fwprop_subst_pattern (obstack_watermark &attempt, insn_change &use_change, if (prop.num_replacements == 0) return false; - if (!prop.profitable_p ()) + if (!prop.likely_profitable_p () + && (prop.changed_mem_p () + || contains_mem_rtx_p (src) + || use_insn->is_asm () + || !single_set (use_rtl))) { if (dump_file && (dump_flags & TDF_DETAILS)) fprintf (dump_file, "cannot propagate from insn %d into" @@ -481,7 +485,8 @@ try_fwprop_subst_pattern (obstack_watermark &attempt, insn_change &use_change, redo_changes (0); auto new_cost = set_src_cost (SET_SRC (use_set), GET_MODE (SET_DEST (use_set)), speed); - if (new_cost > old_cost) + if (new_cost > old_cost + || (new_cost == old_cost && !prop.likely_profitable_p ())) { if (dump_file) fprintf (dump_file, "change not profitable" diff --git a/gcc/testsuite/gcc.target/i386/pr111267.c b/gcc/testsuite/gcc.target/i386/pr111267.c new file mode 100644 index 000000000000..e3d549d8cbaf --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr111267.c @@ -0,0 +1,14 @@ +/* { dg-do compile } */ +/* { dg-options "-O2" } */ +struct S { float a, b, c, d; }; + +int +bar (struct S x, struct S y) +{ + return x.b <= y.d && x.c >= y.a; +} + +/* { dg-final { scan-assembler-not "movq" } } */ +/* { dg-final { scan-assembler-not "xchgq" } } */ +/* { dg-final { scan-assembler-not "shrq" } } */ +/* { dg-final { scan-assembler-not "movd" } } */ From 1b8394bdae055fa8bc1f4fad0ce0ceeb817566e2 Mon Sep 17 00:00:00 2001 From: Shahab Vahedi Date: Wed, 6 Sep 2023 14:31:22 +0200 Subject: [PATCH 2/6] ARC: Use intrinsics for __builtin_add_overflow*() This patch covers signed and unsigned additions. The generated code would be something along these lines: signed: add.f r0, r1, r2 b.v @label unsigned: add.f r0, r1, r2 b.c @label gcc/ChangeLog: * config/arc/arc-modes.def: Add CC_V mode. * config/arc/predicates.md (proper_comparison_operator): Handle E_CC_Vmode. (equality_comparison_operator): Exclude CC_Vmode from eq/ne. (cc_set_register): Handle CC_Vmode. (cc_use_register): Likewise. * config/arc/arc.md (addsi3_v): New insn. (addvsi4): New expand. (addsi3_c): New insn. (uaddvsi4): New expand. * config/arc/arc-protos.h (arc_gen_unlikely_cbranch): New. * config/arc/arc.cc (arc_gen_unlikely_cbranch): New. (get_arc_condition_code): Handle E_CC_Vmode. (arc_init_reg_tables): Handle CC_Vmode. gcc/testsuite/ChangeLog: * gcc.target/arc/overflow-1.c: New. Signed-off-by: Shahab Vahedi --- gcc/config/arc/arc-modes.def | 1 + gcc/config/arc/arc-protos.h | 1 + gcc/config/arc/arc.cc | 26 +++++- gcc/config/arc/arc.md | 49 +++++++++++ gcc/config/arc/predicates.md | 14 ++- gcc/testsuite/gcc.target/arc/overflow-1.c | 100 ++++++++++++++++++++++ 6 files changed, 187 insertions(+), 4 deletions(-) create mode 100644 gcc/testsuite/gcc.target/arc/overflow-1.c diff --git a/gcc/config/arc/arc-modes.def b/gcc/config/arc/arc-modes.def index 8017ec53b5e8..8f0d8553a2be 100644 --- a/gcc/config/arc/arc-modes.def +++ b/gcc/config/arc/arc-modes.def @@ -24,6 +24,7 @@ along with GCC; see the file COPYING3. If not see CC_MODE (CC_ZN); CC_MODE (CC_Z); +CC_MODE (CC_V); CC_MODE (CC_C); CC_MODE (CC_FP_GT); CC_MODE (CC_FP_GE); diff --git a/gcc/config/arc/arc-protos.h b/gcc/config/arc/arc-protos.h index 281cdfc4ea96..f90f2e3d7b96 100644 --- a/gcc/config/arc/arc-protos.h +++ b/gcc/config/arc/arc-protos.h @@ -55,6 +55,7 @@ extern bool arc_check_mov_const (HOST_WIDE_INT ); extern bool arc_split_mov_const (rtx *); extern bool arc_can_use_return_insn (void); extern bool arc_split_move_p (rtx *); +extern void arc_gen_unlikely_cbranch (enum rtx_code, machine_mode, rtx); #endif /* RTX_CODE */ diff --git a/gcc/config/arc/arc.cc b/gcc/config/arc/arc.cc index 686de0ff2d57..e3d535767684 100644 --- a/gcc/config/arc/arc.cc +++ b/gcc/config/arc/arc.cc @@ -1438,6 +1438,13 @@ get_arc_condition_code (rtx comparison) case GEU : return ARC_CC_NC; default : gcc_unreachable (); } + case E_CC_Vmode: + switch (GET_CODE (comparison)) + { + case EQ : return ARC_CC_NV; + case NE : return ARC_CC_V; + default : gcc_unreachable (); + } case E_CC_FP_GTmode: if (TARGET_ARGONAUT_SET && TARGET_SPFP) switch (GET_CODE (comparison)) @@ -1768,7 +1775,7 @@ arc_init_reg_tables (void) /* mode_class hasn't been initialized yet for EXTRA_CC_MODES, so we must explicitly check for them here. */ if (i == (int) CCmode || i == (int) CC_ZNmode || i == (int) CC_Zmode - || i == (int) CC_Cmode + || i == (int) CC_Cmode || i == (int) CC_Vmode || i == CC_FP_GTmode || i == CC_FP_GEmode || i == CC_FP_ORDmode || i == CC_FPUmode || i == CC_FPUEmode || i == CC_FPU_UNEQmode) arc_mode_class[i] = 1 << (int) C_MODE; @@ -11554,6 +11561,23 @@ arc_libm_function_max_error (unsigned cfn, machine_mode mode, return default_libm_function_max_error (cfn, mode, boundary_p); } +/* Generate RTL for conditional branch with rtx comparison CODE in mode + CC_MODE. */ + +void +arc_gen_unlikely_cbranch (enum rtx_code cmp, machine_mode cc_mode, rtx label) +{ + rtx cc_reg, x; + + cc_reg = gen_rtx_REG (cc_mode, CC_REG); + label = gen_rtx_LABEL_REF (VOIDmode, label); + + x = gen_rtx_fmt_ee (cmp, VOIDmode, cc_reg, const0_rtx); + x = gen_rtx_IF_THEN_ELSE (VOIDmode, x, label, pc_rtx); + + emit_unlikely_jump (gen_rtx_SET (pc_rtx, x)); +} + #undef TARGET_USE_ANCHORS_FOR_SYMBOL_P #define TARGET_USE_ANCHORS_FOR_SYMBOL_P arc_use_anchors_for_symbol_p diff --git a/gcc/config/arc/arc.md b/gcc/config/arc/arc.md index 9004b6085a23..b5981c2caada 100644 --- a/gcc/config/arc/arc.md +++ b/gcc/config/arc/arc.md @@ -2679,6 +2679,55 @@ archs4x, archs4xd" operands[2] = force_reg (SImode, operands[2]); }) +(define_insn "addsi3_v" + [(set (match_operand:SI 0 "register_operand" "=r,r,r, r") + (plus:SI (match_operand:SI 1 "register_operand" "r,r,0, r") + (match_operand:SI 2 "nonmemory_operand" "r,L,I,C32"))) + (set (reg:CC_V CC_REG) + (compare:CC_V (sign_extend:DI (plus:SI (match_dup 1) + (match_dup 2))) + (plus:DI (sign_extend:DI (match_dup 1)) + (sign_extend:DI (match_dup 2)))))] + "" + "add.f\\t%0,%1,%2" + [(set_attr "cond" "set") + (set_attr "type" "compare") + (set_attr "length" "4,4,4,8")]) + +(define_expand "addvsi4" + [(match_operand:SI 0 "register_operand") + (match_operand:SI 1 "register_operand") + (match_operand:SI 2 "nonmemory_operand") + (label_ref (match_operand 3 "" ""))] + "" + "emit_insn (gen_addsi3_v (operands[0], operands[1], operands[2])); + arc_gen_unlikely_cbranch (NE, CC_Vmode, operands[3]); + DONE;") + +(define_insn "addsi3_c" + [(set (match_operand:SI 0 "register_operand" "=r,r,r, r") + (plus:SI (match_operand:SI 1 "register_operand" "r,r,0, r") + (match_operand:SI 2 "nonmemory_operand" "r,L,I,C32"))) + (set (reg:CC_C CC_REG) + (compare:CC_C (plus:SI (match_dup 1) + (match_dup 2)) + (match_dup 1)))] + "" + "add.f\\t%0,%1,%2" + [(set_attr "cond" "set") + (set_attr "type" "compare") + (set_attr "length" "4,4,4,8")]) + +(define_expand "uaddvsi4" + [(match_operand:SI 0 "register_operand") + (match_operand:SI 1 "register_operand") + (match_operand:SI 2 "nonmemory_operand") + (label_ref (match_operand 3 "" ""))] + "" + "emit_insn (gen_addsi3_c (operands[0], operands[1], operands[2])); + arc_gen_unlikely_cbranch (LTU, CC_Cmode, operands[3]); + DONE;") + (define_expand "adddi3" [(parallel [(set (match_operand:DI 0 "register_operand" "") diff --git a/gcc/config/arc/predicates.md b/gcc/config/arc/predicates.md index afcb8e6eb62f..7e016c05b6fb 100644 --- a/gcc/config/arc/predicates.md +++ b/gcc/config/arc/predicates.md @@ -419,6 +419,8 @@ return code == EQ || code == NE; case E_CC_Cmode: return code == LTU || code == GEU; + case E_CC_Vmode: + return code == EQ || code == NE; case E_CC_FP_GTmode: return code == GT || code == UNLE; case E_CC_FP_GEmode: @@ -451,7 +453,12 @@ }) (define_predicate "equality_comparison_operator" - (match_code "eq, ne")) + (match_code "eq, ne") + { + machine_mode opmode = GET_MODE (XEXP (op, 0)); + return opmode != CC_Vmode; + } +) (define_predicate "ge_lt_comparison_operator" (match_code "ge, lt")) @@ -504,7 +511,8 @@ || (mode == CC_ZNmode && rmode == CC_Zmode) || (mode == CCmode && rmode == CC_Zmode) || (mode == CCmode && rmode == CC_ZNmode) - || (mode == CCmode && rmode == CC_Cmode)) + || (mode == CCmode && rmode == CC_Cmode) + || (mode == CCmode && rmode == CC_Vmode)) return TRUE; return FALSE; @@ -524,7 +532,7 @@ if (GET_MODE (op) == CC_ZNmode) return 1; /* Fall through. */ - case E_CC_ZNmode: case E_CC_Cmode: + case E_CC_ZNmode: case E_CC_Cmode: case E_CC_Vmode: return GET_MODE (op) == CCmode; default: gcc_unreachable (); diff --git a/gcc/testsuite/gcc.target/arc/overflow-1.c b/gcc/testsuite/gcc.target/arc/overflow-1.c new file mode 100644 index 000000000000..01b3e8ad0fab --- /dev/null +++ b/gcc/testsuite/gcc.target/arc/overflow-1.c @@ -0,0 +1,100 @@ +/* { dg-do compile } */ +/* { dg-options "-O1" } */ + +#include +#include + +/* + * add.f r0,r0,r1 + * st_s r0,[r2] + * mov_s r0,1 + * j_s.d [blink] + * mov.nv r0,0 + */ +bool add_overflow (int32_t a, int32_t b, int32_t *res) +{ + return __builtin_add_overflow (a, b, res); +} + +/* + * add.f r0,r0,-1234 + * st_s r0,[r1] + * mov_s r0,1 + * j_s.d [blink] + * mov.nv r0,0 + */ +bool addi_overflow (int32_t a, int32_t *res) +{ + return __builtin_add_overflow (a, -1234, res); +} + +/* + * add.f r0,r0,r1 + * st_s r0,[r2] + * mov_s r0,1 + * j_s.d [blink] + * mov.hs r0,0 + */ +bool uadd_overflow (uint32_t a, uint32_t b, uint32_t *res) +{ + return __builtin_add_overflow (a, b, res); +} + +/* + * add.f r2,r0, 4321 + * seths r0,r0,-4321 + * j_s.d [blink] + * st_s r2,[r1] + */ +bool uaddi_overflow (uint32_t a, uint32_t *res) +{ + return __builtin_add_overflow (a, 4321, res); +} + +/* + * add.f r0,r0,r1 + * mov_s r0,1 + * j_s.d [blink] + * mov.nv r0,0 + */ +bool add_overflow_p (int32_t a, int32_t b, int32_t res) +{ + return __builtin_add_overflow_p (a, b, res); +} + +/* + * add.f r0,r0,-1000 + * mov_s r0,1 + * j_s.d [blink] + * mov.nv r0,0 + */ +bool addi_overflow_p (int32_t a, int32_t res) +{ + return __builtin_add_overflow_p (a, -1000, res); +} + +/* + * add.f 0,r0,r1 + * mov_s r0,1 + * j_s.d [blink] + * mov.hs r0,0 + */ +bool uadd_overflow_p (uint32_t a, uint32_t b, uint32_t res) +{ + return __builtin_add_overflow_p (a, b, res); +} + +/* + * j_s.d [blink] + * seths r0,r0,-2000 + */ +bool uaddi_overflow_p (uint32_t a, uint32_t res) +{ + return __builtin_add_overflow_p (a, 2000, res); +} + +/* { dg-final { scan-assembler-times "add.f\\s\+" 7 } } */ +/* { dg-final { scan-assembler-times "mov\.nv\\s\+" 4 } } */ +/* { dg-final { scan-assembler-times "mov\.hs\\s\+" 2 } } */ +/* { dg-final { scan-assembler-times "seths\\s\+" 2 } } */ +/* { dg-final { scan-assembler-not "cmp" } } */ From e6f28c66d931e183853d3776f2a3d62fd104b475 Mon Sep 17 00:00:00 2001 From: Shahab Vahedi Date: Wed, 6 Sep 2023 14:30:31 +0200 Subject: [PATCH 3/6] ARC: Use intrinsics for __builtin_sub_overflow*() This patch covers signed and unsigned subtractions. The generated code would be something along these lines: signed: sub.f r0, r1, r2 b.v @label unsigned: sub.f r0, r1, r2 b.c @label gcc/ChangeLog: * config/arc/arc.md (subsi3_v): New insn. (subvsi4): New expand. (subsi3_c): New insn. (usubvsi4): New expand. gcc/testsuite/ChangeLog: * gcc.target/arc/overflow-2.c: New. Signed-off-by: Shahab Vahedi --- gcc/config/arc/arc.md | 48 +++++++++++ gcc/testsuite/gcc.target/arc/overflow-2.c | 97 +++++++++++++++++++++++ 2 files changed, 145 insertions(+) create mode 100644 gcc/testsuite/gcc.target/arc/overflow-2.c diff --git a/gcc/config/arc/arc.md b/gcc/config/arc/arc.md index b5981c2caada..49dfc9d35af6 100644 --- a/gcc/config/arc/arc.md +++ b/gcc/config/arc/arc.md @@ -2963,6 +2963,54 @@ archs4x, archs4xd" (set_attr "cpu_facility" "*,cd,*,*,*,*,*,*,*,*") ]) +(define_insn "subsi3_v" + [(set (match_operand:SI 0 "register_operand" "=r,r,r, r") + (minus:SI (match_operand:SI 1 "register_operand" "r,r,0, r") + (match_operand:SI 2 "nonmemory_operand" "r,L,I,C32"))) + (set (reg:CC_V CC_REG) + (compare:CC_V (sign_extend:DI (minus:SI (match_dup 1) + (match_dup 2))) + (minus:DI (sign_extend:DI (match_dup 1)) + (sign_extend:DI (match_dup 2)))))] + "" + "sub.f\\t%0,%1,%2" + [(set_attr "cond" "set") + (set_attr "type" "compare") + (set_attr "length" "4,4,4,8")]) + +(define_expand "subvsi4" + [(match_operand:SI 0 "register_operand") + (match_operand:SI 1 "register_operand") + (match_operand:SI 2 "nonmemory_operand") + (label_ref (match_operand 3 "" ""))] + "" + "emit_insn (gen_subsi3_v (operands[0], operands[1], operands[2])); + arc_gen_unlikely_cbranch (NE, CC_Vmode, operands[3]); + DONE;") + +(define_insn "subsi3_c" + [(set (match_operand:SI 0 "register_operand" "=r,r,r, r") + (minus:SI (match_operand:SI 1 "register_operand" "r,r,0, r") + (match_operand:SI 2 "nonmemory_operand" "r,L,I,C32"))) + (set (reg:CC_C CC_REG) + (compare:CC_C (match_dup 1) + (match_dup 2)))] + "" + "sub.f\\t%0,%1,%2" + [(set_attr "cond" "set") + (set_attr "type" "compare") + (set_attr "length" "4,4,4,8")]) + +(define_expand "usubvsi4" + [(match_operand:SI 0 "register_operand") + (match_operand:SI 1 "register_operand") + (match_operand:SI 2 "nonmemory_operand") + (label_ref (match_operand 3 "" ""))] + "" + "emit_insn (gen_subsi3_c (operands[0], operands[1], operands[2])); + arc_gen_unlikely_cbranch (LTU, CC_Cmode, operands[3]); + DONE;") + (define_expand "subdi3" [(set (match_operand:DI 0 "register_operand" "") (minus:DI (match_operand:DI 1 "register_operand" "") diff --git a/gcc/testsuite/gcc.target/arc/overflow-2.c b/gcc/testsuite/gcc.target/arc/overflow-2.c new file mode 100644 index 000000000000..b4de8c03b228 --- /dev/null +++ b/gcc/testsuite/gcc.target/arc/overflow-2.c @@ -0,0 +1,97 @@ +/* { dg-do compile } */ +/* { dg-options "-O1" } */ + +#include +#include + +/* + * sub.f r0,r0,r1 + * st_s r0,[r2] + * mov_s r0,1 + * j_s.d [blink] + * mov.nv r0,0 + */ +bool sub_overflow (int32_t a, int32_t b, int32_t *res) +{ + return __builtin_sub_overflow (a, b, res); +} + +/* + * sub.f r0,r0,-1234 + * st_s r0,[r1] + * mov_s r0,1 + * j_s.d [blink] + * mov.nv r0,0 + */ +bool subi_overflow (int32_t a, int32_t *res) +{ + return __builtin_sub_overflow (a, -1234, res); +} + +/* + * sub.f r3,r0,r1 + * st_s r3,[r2] + * j_s.d [blink] + * setlo r0,r0,r1 + */ +bool usub_overflow (uint32_t a, uint32_t b, uint32_t *res) +{ + return __builtin_sub_overflow (a, b, res); +} + +/* + * sub.f r2,r0,4321 + * seths r0,4320,r0 + * j_s.d [blink] + * st_s r2,[r1] + */ +bool usubi_overflow (uint32_t a, uint32_t *res) +{ + return __builtin_sub_overflow (a, 4321, res); +} + +/* + * sub.f r0,r0,r1 + * mov_s r0,1 + * j_s.d [blink] + * mov.nv r0,0 + */ +bool sub_overflow_p (int32_t a, int32_t b, int32_t res) +{ + return __builtin_sub_overflow_p (a, b, res); +} + +/* + * sub.f r0,r0,-1000 + * mov_s r0,1 + * j_s.d [blink] + * mov.nv r0,0 + */ +bool subi_overflow_p (int32_t a, int32_t res) +{ + return __builtin_sub_overflow_p (a, -1000, res); +} + +/* + * j_s.d [blink] + * setlo r0,r0,r1 + */ +bool usub_overflow_p (uint32_t a, uint32_t b, uint32_t res) +{ + return __builtin_sub_overflow_p (a, b, res); +} + +/* + * seths r0,1999,r0 + * j_s.d [blink] + */ +bool usubi_overflow_p (uint32_t a, uint32_t res) +{ + return __builtin_sub_overflow_p (a, 2000, res); +} + +/* { dg-final { scan-assembler-times "sub.f\\s\+" 6 } } */ +/* { dg-final { scan-assembler-times "mov\.nv\\s\+" 4 } } */ +/* { dg-final { scan-assembler-times "setlo\\s\+" 2 } } */ +/* { dg-final { scan-assembler-times "seths\\s\+" 2 } } */ +/* { dg-final { scan-assembler-not "cmp" } } */ From 8b0ca0d5f47842f965741b27695dd65c821fdc29 Mon Sep 17 00:00:00 2001 From: Luis Silva Date: Wed, 27 Nov 2024 09:46:25 -0800 Subject: [PATCH 4/6] arc: testsuite: Scan "rlc" instead of "mov.hs". Due to the patch by Roger Sayle, 09881218137f4af9b7c894c2d350cf2ff8e0ee23, which introduces the use of the `rlc rX,0` instruction in place of the `mov.hs`, the add overflow test case needs to be updated. The previous test case was validating the `mov.hs` instruction, but now it must validate the `rlc` instruction as the new behavior. gcc/testsuite/ChangeLog: * gcc.target/arc/overflow-1.c: Replace mov.hs with rlc. Signed-off-by: Luis Silva --- gcc/testsuite/gcc.target/arc/overflow-1.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/gcc/testsuite/gcc.target/arc/overflow-1.c b/gcc/testsuite/gcc.target/arc/overflow-1.c index 01b3e8ad0fab..694c25cfe665 100644 --- a/gcc/testsuite/gcc.target/arc/overflow-1.c +++ b/gcc/testsuite/gcc.target/arc/overflow-1.c @@ -31,9 +31,8 @@ bool addi_overflow (int32_t a, int32_t *res) /* * add.f r0,r0,r1 * st_s r0,[r2] - * mov_s r0,1 * j_s.d [blink] - * mov.hs r0,0 + * rlc r0,0 */ bool uadd_overflow (uint32_t a, uint32_t b, uint32_t *res) { @@ -75,9 +74,8 @@ bool addi_overflow_p (int32_t a, int32_t res) /* * add.f 0,r0,r1 - * mov_s r0,1 * j_s.d [blink] - * mov.hs r0,0 + * rlc r0,0 */ bool uadd_overflow_p (uint32_t a, uint32_t b, uint32_t res) { @@ -95,6 +93,6 @@ bool uaddi_overflow_p (uint32_t a, uint32_t res) /* { dg-final { scan-assembler-times "add.f\\s\+" 7 } } */ /* { dg-final { scan-assembler-times "mov\.nv\\s\+" 4 } } */ -/* { dg-final { scan-assembler-times "mov\.hs\\s\+" 2 } } */ +/* { dg-final { scan-assembler-times "rlc\\s\+" 2 } } */ /* { dg-final { scan-assembler-times "seths\\s\+" 2 } } */ /* { dg-final { scan-assembler-not "cmp" } } */ From 4ccc386791bb43c78038a47b90ecfc13662f8853 Mon Sep 17 00:00:00 2001 From: Luis Silva Date: Wed, 18 Sep 2024 16:09:43 +0100 Subject: [PATCH 5/6] arc: Add commutative multiplication patterns. This patch introduces two new instruction patterns: `*mulsi3_cmp0`: This pattern performs a multiplication and sets the CC_Z register based on the result, while also storing the result of the multiplication in a general-purpose register. `*mulsi3_cmp0_noout`: This pattern performs a multiplication and sets the CC_Z register based on the result without storing the result in a general-purpose register. These patterns are optimized to generate code using the `mpy.f` instruction, specifically used where the result is compared to zero. In addition, the previous commutative multiplication implementation was removed. It incorrectly took into account the negative flag, which is wrong. This new implementation only considers the zero flag. A test case has been added to verify the correctness of these changes. gcc/ChangeLog: * config/arc/arc.cc (arc_select_cc_mode): Handle multiplication results compared against zero, selecting CC_Zmode. * config/arc/arc.md (*mulsi3_cmp0): New define_insn. (*mulsi3_cmp0_noout): New define_insn. gcc/testsuite/ChangeLog: * gcc.target/arc/mult-cmp0.c: New test. Signed-off-by: Luis Silva --- gcc/config/arc/arc.cc | 7 +++ gcc/config/arc/arc.md | 34 ++++++++++-- gcc/testsuite/gcc.target/arc/mult-cmp0.c | 66 ++++++++++++++++++++++++ 3 files changed, 103 insertions(+), 4 deletions(-) create mode 100644 gcc/testsuite/gcc.target/arc/mult-cmp0.c diff --git a/gcc/config/arc/arc.cc b/gcc/config/arc/arc.cc index e3d535767684..8ad5649adc06 100644 --- a/gcc/config/arc/arc.cc +++ b/gcc/config/arc/arc.cc @@ -1555,6 +1555,13 @@ arc_select_cc_mode (enum rtx_code op, rtx x, rtx y) machine_mode mode = GET_MODE (x); rtx x1; + /* Matches all instructions which can do .f and clobbers only Z flag. */ + if (GET_MODE_CLASS (mode) == MODE_INT + && y == const0_rtx + && GET_CODE (x) == MULT + && (op == EQ || op == NE)) + return CC_Zmode; + /* For an operation that sets the condition codes as a side-effect, the C and V flags is not set as for cmp, so we can only use comparisons where this doesn't matter. (For LT and GE we can use "mi" and "pl" diff --git a/gcc/config/arc/arc.md b/gcc/config/arc/arc.md index 49dfc9d35af6..bc2e8fadd91a 100644 --- a/gcc/config/arc/arc.md +++ b/gcc/config/arc/arc.md @@ -253,7 +253,7 @@ simd_vcompare, simd_vpermute, simd_vpack, simd_vpack_with_acc, simd_valign, simd_valign_with_acc, simd_vcontrol, simd_vspecial_3cycle, simd_vspecial_4cycle, simd_dma, mul16_em, div_rem, - fpu, fpu_fuse, fpu_sdiv, fpu_ddiv, fpu_cvt, block" + fpu, fpu_fuse, fpu_sdiv, fpu_ddiv, fpu_cvt, block, mpy" (cond [(eq_attr "is_sfunc" "yes") (cond [(match_test "!TARGET_LONG_CALLS_SET && (!TARGET_MEDIUM_CALLS || GET_CODE (PATTERN (insn)) != COND_EXEC)") (const_string "call") (match_test "flag_pic") (const_string "sfunc")] @@ -1068,11 +1068,37 @@ archs4x, archs4xd" (set_attr "cond" "set_zn") (set_attr "length" "*,4,4,4,8")]) -;; The next two patterns are for plos, ior, xor, and, and mult. +(define_insn "*mulsi3_cmp0" + [(set (reg:CC_Z CC_REG) + (compare:CC_Z + (mult:SI + (match_operand:SI 1 "register_operand" "%r,0,r") + (match_operand:SI 2 "nonmemory_operand" "rL,I,i")) + (const_int 0))) + (set (match_operand:SI 0 "register_operand" "=r,r,r") + (mult:SI (match_dup 1) (match_dup 2)))] + "TARGET_MPY" + "mpy%?.f\\t%0,%1,%2" + [(set_attr "length" "4,4,8") + (set_attr "type" "mpy")]) + +(define_insn "*mulsi3_cmp0_noout" + [(set (reg:CC_Z CC_REG) + (compare:CC_Z + (mult:SI + (match_operand:SI 0 "register_operand" "%r,r,r") + (match_operand:SI 1 "nonmemory_operand" "rL,I,i")) + (const_int 0)))] + "TARGET_MPY" + "mpy%?.f\\t0,%0,%1" + [(set_attr "length" "4,4,8") + (set_attr "type" "mpy")]) + +;; The next two patterns are for plus, ior, xor, and. (define_insn "*commutative_binary_cmp0_noout" [(set (match_operand 0 "cc_set_register" "") (match_operator 4 "zn_compare_operator" - [(match_operator:SI 3 "commutative_operator" + [(match_operator:SI 3 "commutative_operator_sans_mult" [(match_operand:SI 1 "register_operand" "%r,r") (match_operand:SI 2 "nonmemory_operand" "rL,Cal")]) (const_int 0)]))] @@ -1085,7 +1111,7 @@ archs4x, archs4xd" (define_insn "*commutative_binary_cmp0" [(set (match_operand 3 "cc_set_register" "") (match_operator 5 "zn_compare_operator" - [(match_operator:SI 4 "commutative_operator" + [(match_operator:SI 4 "commutative_operator_sans_mult" [(match_operand:SI 1 "register_operand" "%0, 0,r,r") (match_operand:SI 2 "nonmemory_operand" "rL,rI,r,Cal")]) (const_int 0)])) diff --git a/gcc/testsuite/gcc.target/arc/mult-cmp0.c b/gcc/testsuite/gcc.target/arc/mult-cmp0.c new file mode 100644 index 000000000000..680c72eaa6de --- /dev/null +++ b/gcc/testsuite/gcc.target/arc/mult-cmp0.c @@ -0,0 +1,66 @@ +/* { dg-do compile } */ +/* { dg-options "-O1" } */ + +/* mpy.f r1,r0,r1 + mov_s r0,5 ;3 + j_s.d [blink] + mov.ne r0,r1 */ +unsigned int +ubar (unsigned int a, unsigned int b) +{ + unsigned int c = a * b; + if (c == 0) + { + return 5; + } + return c; +} + +/* mpy.f r1,r0,r1 + mov_s r0,5 ;3 + j_s.d [blink] + mov.ne r0,r1 */ +signed int +bar (signed int a, signed int b) +{ + signed int c = a * b; + if (c == 0) + { + return 5; + } + return c; +} + +/* mpy.f 0,r0,r1 + mov_s r0,1 ;3 + j_s.d [blink] + mov.eq r0,5 */ +unsigned int +ufoo (unsigned int a, unsigned int b) +{ + if (a * b == 0) + { + return 5; + } + return 1; +} + +/* mpy.f 0,r0,r1 + mov_s r0,1 ;3 + j_s.d [blink] + mov.eq r0,5 */ +unsigned int +foo (signed int a, signed int b) +{ + if (a * b == 0) + { + return 5; + } + return 1; +} + +/* { dg-final { scan-assembler-times "mpy\\.f\\s+0" 2 } } */ +/* { dg-final { scan-assembler-times "mov\\.ne\\s+" 2 } } */ +/* { dg-final { scan-assembler-times "mpy\\.f\\s+r" 2 } } */ +/* { dg-final { scan-assembler-times "mov\\.eq\\s+" 2 } } */ + From e8915da03d19cf42f003d06fdf58dfc522303005 Mon Sep 17 00:00:00 2001 From: Luis Silva Date: Mon, 18 Nov 2024 07:38:26 -0800 Subject: [PATCH 6/6] arc: Use intrinsics for __builtin_mul_overflow () This patch handles both signed and unsigned builtin multiplication overflow. Uses the "mpy.f" instruction to set the condition codes based on the result. In the event of an overflow, the V flag is set, triggering a conditional move depending on the V flag status. For example, set "1" to "r0" in case of overflow: mov_s r0,1 mpy.f r0,r0,r1 j_s.d [blink] mov.nv r0,0 gcc/ChangeLog: * config/arc/arc.md (mulvsi4): New define_expand. (mulsi3_Vcmp): New define_insn. Signed-off-by: Luis Silva --- gcc/config/arc/arc.md | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/gcc/config/arc/arc.md b/gcc/config/arc/arc.md index bc2e8fadd91a..dd245d1813c7 100644 --- a/gcc/config/arc/arc.md +++ b/gcc/config/arc/arc.md @@ -842,6 +842,9 @@ archs4x, archs4xd" ; Optab prefix for sign/zero-extending operations (define_code_attr su_optab [(sign_extend "") (zero_extend "u")]) +;; Code iterator for sign/zero extension +(define_code_iterator ANY_EXTEND [sign_extend zero_extend]) + (define_insn "*xt_cmp0_noout" [(set (match_operand 0 "cc_set_register" "") (compare:CC_ZN (SEZ:SI (match_operand:SQH 1 "register_operand" "r")) @@ -1068,6 +1071,36 @@ archs4x, archs4xd" (set_attr "cond" "set_zn") (set_attr "length" "*,4,4,4,8")]) +(define_expand "mulvsi4" + [(ANY_EXTEND:DI (match_operand:SI 0 "register_operand")) + (ANY_EXTEND:DI (match_operand:SI 1 "register_operand")) + (ANY_EXTEND:DI (match_operand:SI 2 "register_operand")) + (label_ref (match_operand 3 "" ""))] + "TARGET_MPY" + { + emit_insn (gen_mulsi3_Vcmp (operands[0], operands[1], + operands[2])); + arc_gen_unlikely_cbranch (NE, CC_Vmode, operands[3]); + DONE; + }) + +(define_insn "mulsi3_Vcmp" + [(parallel + [(set + (reg:CC_V CC_REG) + (compare:CC_V + (mult:DI + (ANY_EXTEND:DI (match_operand:SI 1 "register_operand" "%0,r,r,r")) + (ANY_EXTEND:DI (match_operand:SI 2 "nonmemory_operand" "I,L,r,C32"))) + (ANY_EXTEND:DI (mult:SI (match_dup 1) (match_dup 2))))) + (set (match_operand:SI 0 "register_operand" "=r,r,r,r") + (mult:SI (match_dup 1) (match_dup 2)))])] + "register_operand (operands[1], SImode) + || register_operand (operands[2], SImode)" + "mpy.f\\t%0,%1,%2" + [(set_attr "length" "4,4,4,8") + (set_attr "type" "mpy")]) + (define_insn "*mulsi3_cmp0" [(set (reg:CC_Z CC_REG) (compare:CC_Z