xtensa: Optimize boolean evaluation or branching when EQ/NE to zero i…

…n S[IF]mode This patch optimizes the boolean evaluation of EQ/NE against zero by adding two insn_and_split patterns similar to SImode conditional store: "eq_zero": op0 = (op1 == 0) ? 1 : 0; op0 = clz(op1) >> 5; /* optimized (requires TARGET_NSA) */ "movsicc_ne0_reg_0": op0 = (op1 != 0) ? op2 : 0; op0 = op2; if (op1 == 0) ? op0 = op1; /* optimized */ /* example #1 */ int bool_eqSI(int x) { return x == 0; } int bool_neSI(int x) { return x != 0; } ;; after (TARGET_NSA) bool_eqSI: nsau a2, a2 srli a2, a2, 5 ret.n bool_neSI: mov.n a9, a2 movi.n a2, 1 moveqz a2, a9, a9 ret.n These also work in SFmode by ignoring their sign bits, and further- more, the branch if EQ/NE against zero in SFmode is also done in the same manner. The reasons for this optimization in SFmode are: - Only zero values (negative or non-negative) contain no bits of 1 with both the exponent and the mantissa. - EQ/NE comparisons involving NaNs produce no signal even if they are signaling. - Even if the use of IEEE 754 single-precision floating-point co- processor is configured (TARGET_HARD_FLOAT is true): 1. Load zero value to FP register 2. Possibly, additional FP move if the comparison target is an address register 3. FP equality check instruction 4. Read the boolean register containing the result, or condi- tional branch As noted above, a considerable number of instructions are still generated. /* example #2 */ int bool_eqSF(float x) { return x == 0; } int bool_neSF(float x) { return x != 0; } int bool_ltSF(float x) { return x < 0; } extern void foo(void); void cb_eqSF(float x) { if(x != 0) foo(); } void cb_neSF(float x) { if(x == 0) foo(); } void cb_geSF(float x) { if(x < 0) foo(); } ;; after ;; (TARGET_NSA, TARGET_BOOLEANS and TARGET_HARD_FLOAT) bool_eqSF: add.n a2, a2, a2 nsau a2, a2 srli a2, a2, 5 ret.n bool_neSF: add.n a9, a2, a2 movi.n a2, 1 moveqz a2, a9, a9 ret.n bool_ltSF: movi.n a9, 0 wfr f0, a2 wfr f1, a9 olt.s b0, f0, f1 movi.n a9, 0 movi.n a2, 1 movf a2, a9, b0 ret.n cb_eqSF: add.n a2, a2, a2 beqz.n a2, .L6 j.l foo, a9 .L6: ret.n cb_neSF: add.n a2, a2, a2 bnez.n a2, .L8 j.l foo, a9 .L8: ret.n cb_geSF: addi sp, sp, -16 movi.n a3, 0 s32i.n a12, sp, 8 s32i.n a0, sp, 12 mov.n a12, a2 call0 __unordsf2 bnez.n a2, .L10 movi.n a3, 0 mov.n a2, a12 call0 __gesf2 bnei a2, -1, .L10 l32i.n a0, sp, 12 l32i.n a12, sp, 8 addi sp, sp, 16 j.l foo, a9 .L10: l32i.n a0, sp, 12 l32i.n a12, sp, 8 addi sp, sp, 16 ret.n gcc/ChangeLog: * config/xtensa/predicates.md (const_float_0_operand): Rename from obsolete "const_float_1_operand" and change the constant to compare. (cstoresf_cbranchsf_operand, cstoresf_cbranchsf_operator): New. * config/xtensa/xtensa.cc (xtensa_expand_conditional_branch): Add code for EQ/NE comparison with constant zero in SFmode. (xtensa_expand_scc): Added code to derive boolean evaluation of EQ/NE with constant zero for comparison in SFmode. (xtensa_rtx_costs): Change cost of CONST_DOUBLE with value zero inside "cbranchsf4" to 0. * config/xtensa/xtensa.md (cbranchsf4, cstoresf4): Change "match_operator" and the third "match_operand" to the ones mentioned above. (movsicc_ne0_reg_zero, eq_zero): New.
jcmvbkbc · Jun 5, 2023 · 72a5c16 · 72a5c16
1 parent 830d36b
commit 72a5c16
Show file tree

Hide file tree

Showing 3 changed files with 106 additions and 9 deletions.
diff --git a/gcc/config/xtensa/predicates.md b/gcc/config/xtensa/predicates.md
@@ -155,11 +155,11 @@
 			    && CONSTANT_P (op)
 			    && GET_MODE_SIZE (mode) % UNITS_PER_WORD == 0")))))
 
-;; Accept the floating point constant 1 in the appropriate mode.
-(define_predicate "const_float_1_operand"
+;; Accept the floating point constant 0 in the appropriate mode.
+(define_predicate "const_float_0_operand"
   (match_code "const_double")
 {
-  return real_equal (CONST_DOUBLE_REAL_VALUE (op), &dconst1);
+  return real_equal (CONST_DOUBLE_REAL_VALUE (op), &dconst0);
 })
 
 (define_predicate "fpmem_offset_operand"
@@ -179,6 +179,11 @@
   return false;
 })
 
+(define_predicate "cstoresf_cbranchsf_operand"
+  (ior (and (match_test "TARGET_HARD_FLOAT")
+	    (match_operand 0 "register_operand"))
+       (match_operand 0 "const_float_0_operand")))
+
 (define_predicate "branch_operator"
   (match_code "eq,ne,lt,ge"))
 
@@ -197,6 +202,12 @@
 (define_predicate "xtensa_cstoresi_operator"
   (match_code "eq,ne,gt,ge,lt,le"))
 
+(define_predicate "cstoresf_cbranchsf_operator"
+  (ior (and (match_test "TARGET_HARD_FLOAT")
+	    (and (match_operand 0 "comparison_operator")
+		 (match_test "register_operand (XEXP (op, 1), SFmode)")))
+       (match_operand 0 "boolean_operator")))
+
 (define_predicate "xtensa_shift_per_byte_operator"
   (match_code "ashift,ashiftrt,lshiftrt"))
 

diff --git a/gcc/config/xtensa/xtensa.cc b/gcc/config/xtensa/xtensa.cc
@@ -865,6 +865,16 @@ xtensa_expand_conditional_branch (rtx *operands, machine_mode mode)
   switch (mode)
     {
     case E_SFmode:
+      if ((test_code == EQ || test_code == NE)
+	  && const_float_0_operand (cmp1, SFmode))
+	{
+	  emit_move_insn (cmp1 = gen_reg_rtx (SImode),
+			  simplify_gen_subreg (SImode, cmp0, SFmode, 0));
+	  emit_insn (gen_addsi3 (cmp1, cmp1, cmp1));
+	  cmp = gen_int_relational (test_code, cmp1, const0_rtx);
+	  break;
+	}
+
       if (TARGET_HARD_FLOAT)
 	{
 	  cmp = gen_float_relational (test_code, cmp0, cmp1);
@@ -996,6 +1006,36 @@ xtensa_expand_scc (rtx operands[4], machine_mode cmp_mode)
   rtx one_tmp, zero_tmp;
   rtx (*gen_fn) (rtx, rtx, rtx, rtx, rtx);
 
+  if (cmp_mode == SFmode)
+    {
+      if (const_float_0_operand (operands[3], SFmode))
+	switch (GET_CODE (operands[1]))
+	  {
+	  case EQ:
+	    emit_move_insn (cmp = gen_reg_rtx (SImode),
+			    simplify_gen_subreg (SImode, operands[2],
+						 SFmode, 0));
+	    emit_insn (gen_addsi3 (cmp, cmp, cmp));
+	    emit_insn (gen_eq_zero (dest, cmp));
+	    return 1;
+
+	  case NE:
+	    emit_move_insn (cmp = gen_reg_rtx (SImode),
+			    simplify_gen_subreg (SImode, operands[2],
+						 SFmode, 0));
+	    emit_insn (gen_addsi3 (cmp, cmp, cmp));
+	    one_tmp = force_reg (SImode, const1_rtx);
+	    emit_insn (gen_movsicc_ne0_reg_zero (dest, cmp, one_tmp));
+	    return 1;
+
+	  default:
+	    return 0;
+	  }
+
+      if (! register_operand (operands[3], SFmode))
+	return 0;
+    }
+
   if (!(cmp = gen_conditional_move (GET_CODE (operands[1]), cmp_mode,
 				    operands[2], operands[3])))
     return 0;
@@ -4438,6 +4478,11 @@ xtensa_rtx_costs (rtx x, machine_mode mode, int outer_code,
       return true;
 
     case CONST_DOUBLE:
+      if (outer_code == COMPARE && const_float_0_operand (x, SFmode))
+	{
+	  *total = 0;
+	  return true;
+	}
       if (TARGET_CONST16)
 	*total = COSTS_N_INSNS (4);
       else

diff --git a/gcc/config/xtensa/xtensa.md b/gcc/config/xtensa/xtensa.md
@@ -1906,11 +1906,11 @@
 })
 
 (define_expand "cbranchsf4"
-  [(match_operator 0 "comparison_operator"
+  [(match_operator 0 "cstoresf_cbranchsf_operator"
     [(match_operand:SF 1 "register_operand")
-     (match_operand:SF 2 "register_operand")])
+     (match_operand:SF 2 "cstoresf_cbranchsf_operand")])
    (match_operand 3 "")]
-  "TARGET_HARD_FLOAT"
+  ""
 {
   xtensa_expand_conditional_branch (operands, SFmode);
   DONE;
@@ -2395,10 +2395,10 @@
 
 (define_expand "cstoresf4"
   [(match_operand:SI 0 "register_operand")
-   (match_operator:SI 1 "comparison_operator"
+   (match_operator:SI 1 "cstoresf_cbranchsf_operator"
     [(match_operand:SF 2 "register_operand")
-     (match_operand:SF 3 "register_operand")])]
-  "TARGET_HARD_FLOAT"
+     (match_operand:SF 3 "cstoresf_cbranchsf_operand")])]
+  ""
 {
   if (!xtensa_expand_scc (operands, SFmode))
     FAIL;
@@ -2463,6 +2463,30 @@
    (set_attr "mode"	"SI")
    (set_attr "length"	"3,3")])
 
+(define_insn_and_split "movsicc_ne0_reg_zero"
+  [(set (match_operand:SI 0 "register_operand" "=a")
+	(if_then_else:SI (ne (match_operand:SI 1 "register_operand" "r")
+			     (const_int 0))
+			 (match_operand:SI 2 "register_operand" "r")
+			 (const_int 0)))]
+  ""
+  "#"
+  ""
+  [(set (match_dup 0)
+	(match_dup 2))
+   (set (match_dup 0)
+	(if_then_else:SI (ne (match_dup 1)
+			     (const_int 0))
+			 (match_dup 0)
+			 (match_dup 1)))]
+  ""
+  [(set_attr "type"	"move")
+   (set_attr "mode"	"SI")
+   (set (attr "length")
+	(if_then_else (match_test "TARGET_DENSITY")
+		      (const_int 5)
+		      (const_int 6)))])
+
 (define_insn "movsfcc_internal0"
   [(set (match_operand:SF 0 "register_operand" "=a,a,f,f")
 	(if_then_else:SF (match_operator 4 "branch_operator"
@@ -3222,6 +3246,23 @@
 				    (const_int 5)
 				    (const_int 6))))])
 
+(define_insn_and_split "eq_zero"
+  [(set (match_operand:SI 0 "register_operand" "=a")
+	(eq:SI (match_operand:SI 1 "register_operand" "r")
+	       (const_int 0)))]
+  "TARGET_NSA"
+  "#"
+  "&& 1"
+  [(set (match_dup 0)
+	(clz:SI (match_dup 1)))
+   (set (match_dup 0)
+	(lshiftrt:SI (match_dup 0)
+		     (const_int 5)))]
+  ""
+  [(set_attr "type"	"move")
+   (set_attr "mode"	"SI")
+   (set_attr "length"	"6")])
+
 (define_peephole2
   [(set (match_operand:SI 0 "register_operand")
 	(match_operand:SI 6 "reload_operand"))