Skip to content

Commit

Permalink
xtensa: Optimize boolean evaluation or branching when EQ/NE to zero i…
Browse files Browse the repository at this point in the history
…n S[IF]mode

This patch optimizes the boolean evaluation of EQ/NE against zero
by adding two insn_and_split patterns similar to SImode conditional
store:

"eq_zero":
	op0 = (op1 == 0) ? 1 : 0;
	op0 = clz(op1) >> 5;  /* optimized (requires TARGET_NSA) */

"movsicc_ne0_reg_0":
	op0 = (op1 != 0) ? op2 : 0;
	op0 = op2; if (op1 == 0) ? op0 = op1;  /* optimized */

    /* example #1 */
    int bool_eqSI(int x) {
      return x == 0;
    }
    int bool_neSI(int x) {
      return x != 0;
    }

    ;; after (TARGET_NSA)
    bool_eqSI:
	nsau	a2, a2
	srli	a2, a2, 5
	ret.n
    bool_neSI:
	mov.n	a9, a2
	movi.n	a2, 1
	moveqz	a2, a9, a9
	ret.n

These also work in SFmode by ignoring their sign bits, and further-
more, the branch if EQ/NE against zero in SFmode is also done in the
same manner.

The reasons for this optimization in SFmode are:

  - Only zero values (negative or non-negative) contain no bits of 1
    with both the exponent and the mantissa.
  - EQ/NE comparisons involving NaNs produce no signal even if they
    are signaling.
  - Even if the use of IEEE 754 single-precision floating-point co-
    processor is configured (TARGET_HARD_FLOAT is true):
	1. Load zero value to FP register
        2. Possibly, additional FP move if the comparison target is
	   an address register
	3. FP equality check instruction
	4. Read the boolean register containing the result, or condi-
	   tional branch
    As noted above, a considerable number of instructions are still
    generated.

    /* example #2 */
    int bool_eqSF(float x) {
      return x == 0;
    }
    int bool_neSF(float x) {
      return x != 0;
    }
    int bool_ltSF(float x) {
      return x < 0;
    }
    extern void foo(void);
    void cb_eqSF(float x) {
      if(x != 0)
        foo();
    }
    void cb_neSF(float x) {
      if(x == 0)
        foo();
    }
    void cb_geSF(float x) {
      if(x < 0)
        foo();
    }

    ;; after
    ;; (TARGET_NSA, TARGET_BOOLEANS and TARGET_HARD_FLOAT)
    bool_eqSF:
	add.n	a2, a2, a2
	nsau	a2, a2
	srli	a2, a2, 5
	ret.n
    bool_neSF:
	add.n	a9, a2, a2
	movi.n	a2, 1
	moveqz	a2, a9, a9
	ret.n
    bool_ltSF:
	movi.n	a9, 0
	wfr	f0, a2
	wfr	f1, a9
	olt.s	b0, f0, f1
	movi.n	a9, 0
	movi.n	a2, 1
	movf	a2, a9, b0
	ret.n
    cb_eqSF:
	add.n	a2, a2, a2
	beqz.n	a2, .L6
	j.l	foo, a9
    .L6:
	ret.n
    cb_neSF:
	add.n	a2, a2, a2
	bnez.n	a2, .L8
	j.l	foo, a9
    .L8:
	ret.n
    cb_geSF:
	addi	sp, sp, -16
	movi.n	a3, 0
	s32i.n	a12, sp, 8
	s32i.n	a0, sp, 12
	mov.n	a12, a2
	call0	__unordsf2
	bnez.n	a2, .L10
	movi.n	a3, 0
	mov.n	a2, a12
	call0	__gesf2
	bnei	a2, -1, .L10
	l32i.n	a0, sp, 12
	l32i.n	a12, sp, 8
	addi	sp, sp, 16
	j.l	foo, a9
    .L10:
	l32i.n	a0, sp, 12
	l32i.n	a12, sp, 8
	addi	sp, sp, 16
	ret.n

gcc/ChangeLog:

	* config/xtensa/predicates.md (const_float_0_operand):
	Rename from obsolete "const_float_1_operand" and change the
	constant to compare.
	(cstoresf_cbranchsf_operand, cstoresf_cbranchsf_operator):
	New.
	* config/xtensa/xtensa.cc (xtensa_expand_conditional_branch):
	Add code for EQ/NE comparison with constant zero in SFmode.
	(xtensa_expand_scc): Added code to derive boolean evaluation
	of EQ/NE with constant zero for comparison in SFmode.
	(xtensa_rtx_costs): Change cost of CONST_DOUBLE with value
	zero inside "cbranchsf4" to 0.
	* config/xtensa/xtensa.md (cbranchsf4, cstoresf4):
	Change "match_operator" and the third "match_operand" to the
	ones mentioned above.
	(movsicc_ne0_reg_zero, eq_zero): New.
  • Loading branch information
jjsuwa-sys3175 authored and jcmvbkbc committed Jun 5, 2023
1 parent 830d36b commit 72a5c16
Show file tree
Hide file tree
Showing 3 changed files with 106 additions and 9 deletions.
17 changes: 14 additions & 3 deletions gcc/config/xtensa/predicates.md
Original file line number Diff line number Diff line change
Expand Up @@ -155,11 +155,11 @@
&& CONSTANT_P (op)
&& GET_MODE_SIZE (mode) % UNITS_PER_WORD == 0")))))

;; Accept the floating point constant 1 in the appropriate mode.
(define_predicate "const_float_1_operand"
;; Accept the floating point constant 0 in the appropriate mode.
(define_predicate "const_float_0_operand"
(match_code "const_double")
{
return real_equal (CONST_DOUBLE_REAL_VALUE (op), &dconst1);
return real_equal (CONST_DOUBLE_REAL_VALUE (op), &dconst0);
})

(define_predicate "fpmem_offset_operand"
Expand All @@ -179,6 +179,11 @@
return false;
})

(define_predicate "cstoresf_cbranchsf_operand"
(ior (and (match_test "TARGET_HARD_FLOAT")
(match_operand 0 "register_operand"))
(match_operand 0 "const_float_0_operand")))

(define_predicate "branch_operator"
(match_code "eq,ne,lt,ge"))

Expand All @@ -197,6 +202,12 @@
(define_predicate "xtensa_cstoresi_operator"
(match_code "eq,ne,gt,ge,lt,le"))

(define_predicate "cstoresf_cbranchsf_operator"
(ior (and (match_test "TARGET_HARD_FLOAT")
(and (match_operand 0 "comparison_operator")
(match_test "register_operand (XEXP (op, 1), SFmode)")))
(match_operand 0 "boolean_operator")))

(define_predicate "xtensa_shift_per_byte_operator"
(match_code "ashift,ashiftrt,lshiftrt"))

Expand Down
45 changes: 45 additions & 0 deletions gcc/config/xtensa/xtensa.cc
Original file line number Diff line number Diff line change
Expand Up @@ -865,6 +865,16 @@ xtensa_expand_conditional_branch (rtx *operands, machine_mode mode)
switch (mode)
{
case E_SFmode:
if ((test_code == EQ || test_code == NE)
&& const_float_0_operand (cmp1, SFmode))
{
emit_move_insn (cmp1 = gen_reg_rtx (SImode),
simplify_gen_subreg (SImode, cmp0, SFmode, 0));
emit_insn (gen_addsi3 (cmp1, cmp1, cmp1));
cmp = gen_int_relational (test_code, cmp1, const0_rtx);
break;
}

if (TARGET_HARD_FLOAT)
{
cmp = gen_float_relational (test_code, cmp0, cmp1);
Expand Down Expand Up @@ -996,6 +1006,36 @@ xtensa_expand_scc (rtx operands[4], machine_mode cmp_mode)
rtx one_tmp, zero_tmp;
rtx (*gen_fn) (rtx, rtx, rtx, rtx, rtx);

if (cmp_mode == SFmode)
{
if (const_float_0_operand (operands[3], SFmode))
switch (GET_CODE (operands[1]))
{
case EQ:
emit_move_insn (cmp = gen_reg_rtx (SImode),
simplify_gen_subreg (SImode, operands[2],
SFmode, 0));
emit_insn (gen_addsi3 (cmp, cmp, cmp));
emit_insn (gen_eq_zero (dest, cmp));
return 1;

case NE:
emit_move_insn (cmp = gen_reg_rtx (SImode),
simplify_gen_subreg (SImode, operands[2],
SFmode, 0));
emit_insn (gen_addsi3 (cmp, cmp, cmp));
one_tmp = force_reg (SImode, const1_rtx);
emit_insn (gen_movsicc_ne0_reg_zero (dest, cmp, one_tmp));
return 1;

default:
return 0;
}

if (! register_operand (operands[3], SFmode))
return 0;
}

if (!(cmp = gen_conditional_move (GET_CODE (operands[1]), cmp_mode,
operands[2], operands[3])))
return 0;
Expand Down Expand Up @@ -4438,6 +4478,11 @@ xtensa_rtx_costs (rtx x, machine_mode mode, int outer_code,
return true;

case CONST_DOUBLE:
if (outer_code == COMPARE && const_float_0_operand (x, SFmode))
{
*total = 0;
return true;
}
if (TARGET_CONST16)
*total = COSTS_N_INSNS (4);
else
Expand Down
53 changes: 47 additions & 6 deletions gcc/config/xtensa/xtensa.md
Original file line number Diff line number Diff line change
Expand Up @@ -1906,11 +1906,11 @@
})

(define_expand "cbranchsf4"
[(match_operator 0 "comparison_operator"
[(match_operator 0 "cstoresf_cbranchsf_operator"
[(match_operand:SF 1 "register_operand")
(match_operand:SF 2 "register_operand")])
(match_operand:SF 2 "cstoresf_cbranchsf_operand")])
(match_operand 3 "")]
"TARGET_HARD_FLOAT"
""
{
xtensa_expand_conditional_branch (operands, SFmode);
DONE;
Expand Down Expand Up @@ -2395,10 +2395,10 @@

(define_expand "cstoresf4"
[(match_operand:SI 0 "register_operand")
(match_operator:SI 1 "comparison_operator"
(match_operator:SI 1 "cstoresf_cbranchsf_operator"
[(match_operand:SF 2 "register_operand")
(match_operand:SF 3 "register_operand")])]
"TARGET_HARD_FLOAT"
(match_operand:SF 3 "cstoresf_cbranchsf_operand")])]
""
{
if (!xtensa_expand_scc (operands, SFmode))
FAIL;
Expand Down Expand Up @@ -2463,6 +2463,30 @@
(set_attr "mode" "SI")
(set_attr "length" "3,3")])

(define_insn_and_split "movsicc_ne0_reg_zero"
[(set (match_operand:SI 0 "register_operand" "=a")
(if_then_else:SI (ne (match_operand:SI 1 "register_operand" "r")
(const_int 0))
(match_operand:SI 2 "register_operand" "r")
(const_int 0)))]
""
"#"
""
[(set (match_dup 0)
(match_dup 2))
(set (match_dup 0)
(if_then_else:SI (ne (match_dup 1)
(const_int 0))
(match_dup 0)
(match_dup 1)))]
""
[(set_attr "type" "move")
(set_attr "mode" "SI")
(set (attr "length")
(if_then_else (match_test "TARGET_DENSITY")
(const_int 5)
(const_int 6)))])

(define_insn "movsfcc_internal0"
[(set (match_operand:SF 0 "register_operand" "=a,a,f,f")
(if_then_else:SF (match_operator 4 "branch_operator"
Expand Down Expand Up @@ -3222,6 +3246,23 @@
(const_int 5)
(const_int 6))))])

(define_insn_and_split "eq_zero"
[(set (match_operand:SI 0 "register_operand" "=a")
(eq:SI (match_operand:SI 1 "register_operand" "r")
(const_int 0)))]
"TARGET_NSA"
"#"
"&& 1"
[(set (match_dup 0)
(clz:SI (match_dup 1)))
(set (match_dup 0)
(lshiftrt:SI (match_dup 0)
(const_int 5)))]
""
[(set_attr "type" "move")
(set_attr "mode" "SI")
(set_attr "length" "6")])

(define_peephole2
[(set (match_operand:SI 0 "register_operand")
(match_operand:SI 6 "reload_operand"))
Expand Down

0 comments on commit 72a5c16

Please sign in to comment.