Skip to content

Commit

Permalink
aarch64: Fix bogus cnot optimisation [PR114603]
Browse files Browse the repository at this point in the history
aarch64-sve.md had a pattern that combined:

	cmpeq	pb.T, pa/z, zc.T, #0
	mov	zd.T, pb/z, #1

into:

	cnot	zd.T, pa/m, zc.T

But this is only valid if pa.T is a ptrue.  In other cases, the
original would set inactive elements of zd.T to 0, whereas the
combined form would copy elements from zc.T.

gcc/
	PR target/114603
	* config/aarch64/aarch64-sve.md (@aarch64_pred_cnot<mode>): Replace
	with...
	(@aarch64_ptrue_cnot<mode>): ...this, requiring operand 1 to be
	a ptrue.
	(*cnot<mode>): Require operand 1 to be a ptrue.
	* config/aarch64/aarch64-sve-builtins-base.cc (svcnot_impl::expand):
	Use aarch64_ptrue_cnot<mode> for _x operations that are predicated
	with a ptrue.  Represent other _x operations as fully-defined _m
	operations.

gcc/testsuite/
	PR target/114603
	* gcc.target/aarch64/sve/acle/general/cnot_1.c: New test.

(cherry picked from commit 67cbb1c)
  • Loading branch information
rsandifo-arm committed Aug 16, 2024
1 parent 22c6a11 commit 959d652
Show file tree
Hide file tree
Showing 3 changed files with 49 additions and 19 deletions.
25 changes: 16 additions & 9 deletions gcc/config/aarch64/aarch64-sve-builtins-base.cc
Original file line number Diff line number Diff line change
Expand Up @@ -496,15 +496,22 @@ class svcnot_impl : public function_base
expand (function_expander &e) const override
{
machine_mode mode = e.vector_mode (0);
if (e.pred == PRED_x)
{
/* The pattern for CNOT includes an UNSPEC_PRED_Z, so needs
a ptrue hint. */
e.add_ptrue_hint (0, e.gp_mode (0));
return e.use_pred_x_insn (code_for_aarch64_pred_cnot (mode));
}

return e.use_cond_insn (code_for_cond_cnot (mode), 0);
machine_mode pred_mode = e.gp_mode (0);
/* The underlying _x pattern is effectively:
dst = src == 0 ? 1 : 0
rather than an UNSPEC_PRED_X. Using this form allows autovec
constructs to be matched by combine, but it means that the
predicate on the src == 0 comparison must be all-true.
For simplicity, represent other _x operations as fully-defined _m
operations rather than using a separate bespoke pattern. */
if (e.pred == PRED_x
&& gen_lowpart (pred_mode, e.args[0]) == CONSTM1_RTX (pred_mode))
return e.use_pred_x_insn (code_for_aarch64_ptrue_cnot (mode));
return e.use_cond_insn (code_for_cond_cnot (mode),
e.pred == PRED_x ? 1 : 0);
}
};

Expand Down
20 changes: 10 additions & 10 deletions gcc/config/aarch64/aarch64-sve.md
Original file line number Diff line number Diff line change
Expand Up @@ -3225,24 +3225,24 @@
;; - CNOT
;; -------------------------------------------------------------------------

;; Predicated logical inverse.
(define_expand "@aarch64_pred_cnot<mode>"
;; Logical inverse, predicated with a ptrue.
(define_expand "@aarch64_ptrue_cnot<mode>"
[(set (match_operand:SVE_FULL_I 0 "register_operand")
(unspec:SVE_FULL_I
[(unspec:<VPRED>
[(match_operand:<VPRED> 1 "register_operand")
(match_operand:SI 2 "aarch64_sve_ptrue_flag")
(const_int SVE_KNOWN_PTRUE)
(eq:<VPRED>
(match_operand:SVE_FULL_I 3 "register_operand")
(match_dup 4))]
(match_operand:SVE_FULL_I 2 "register_operand")
(match_dup 3))]
UNSPEC_PRED_Z)
(match_dup 5)
(match_dup 4)]
(match_dup 4)
(match_dup 3)]
UNSPEC_SEL))]
"TARGET_SVE"
{
operands[4] = CONST0_RTX (<MODE>mode);
operands[5] = CONST1_RTX (<MODE>mode);
operands[3] = CONST0_RTX (<MODE>mode);
operands[4] = CONST1_RTX (<MODE>mode);
}
)

Expand All @@ -3251,7 +3251,7 @@
(unspec:SVE_I
[(unspec:<VPRED>
[(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
(match_operand:SI 5 "aarch64_sve_ptrue_flag")
(const_int SVE_KNOWN_PTRUE)
(eq:<VPRED>
(match_operand:SVE_I 2 "register_operand" "0, w")
(match_operand:SVE_I 3 "aarch64_simd_imm_zero"))]
Expand Down
23 changes: 23 additions & 0 deletions gcc/testsuite/gcc.target/aarch64/sve/acle/general/cnot_1.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
/* { dg-options "-O2" } */
/* { dg-final { check-function-bodies "**" "" } } */

#include <arm_sve.h>

#ifdef __cplusplus
extern "C" {
#endif

/*
** foo:
** cmpeq (p[0-7])\.s, p0/z, z0\.s, #0
** mov z0\.s, \1/z, #1
** ret
*/
svint32_t foo(svbool_t pg, svint32_t y)
{
return svsel(svcmpeq(pg, y, 0), svdup_s32(1), svdup_s32(0));
}

#ifdef __cplusplus
}
#endif

0 comments on commit 959d652

Please sign in to comment.