mirrored from git://gcc.gnu.org/git/gcc.git
-
Notifications
You must be signed in to change notification settings - Fork 4.5k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
aarch64: Emit ADD X, Y, Y instead of SHL X, Y, #1 for Advanced SIMD
On many cores, including Neoverse V2 the throughput of vector ADD instructions is higher than vector shifts like SHL. We can lean on that to emit code like: add v0.4s, v0.4s, v0.4s instead of: shl v0.4s, v0.4s, 1 LLVM already does this trick. In RTL the code gets canonincalised from (plus x x) to (ashift x 1) so I opted to instead do this at the final assembly printing stage, similar to how we emit CMLT instead of SSHR elsewhere in the backend. I'd like to also do this for SVE shifts, but those will have to be separate patches. Signed-off-by: Kyrylo Tkachov <ktkachov@nvidia.com> gcc/ChangeLog: * config/aarch64/aarch64-simd.md (aarch64_simd_imm_shl<mode><vczle><vczbe>): Rewrite to new syntax. Add =w,w,vs1 alternative. * config/aarch64/constraints.md (vs1): New constraint. gcc/testsuite/ChangeLog: * gcc.target/aarch64/advsimd_shl_add.c: New test.
- Loading branch information
Showing
3 changed files
with
77 additions
and
5 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,64 @@ | ||
/* { dg-do compile } */ | ||
/* { dg-additional-options "--save-temps -O1" } */ | ||
/* { dg-final { check-function-bodies "**" "" "" } } */ | ||
|
||
typedef __INT64_TYPE__ __attribute__ ((vector_size (16))) v2di; | ||
typedef int __attribute__ ((vector_size (16))) v4si; | ||
typedef short __attribute__ ((vector_size (16))) v8hi; | ||
typedef char __attribute__ ((vector_size (16))) v16qi; | ||
typedef short __attribute__ ((vector_size (8))) v4hi; | ||
typedef char __attribute__ ((vector_size (8))) v8qi; | ||
|
||
#define FUNC(S) \ | ||
S \ | ||
foo_##S (S a) \ | ||
{ return a << 1; } | ||
|
||
/* | ||
** foo_v2di: | ||
** add v0.2d, v0.2d, v0.2d | ||
** ret | ||
*/ | ||
|
||
FUNC (v2di) | ||
|
||
/* | ||
** foo_v4si: | ||
** add v0.4s, v0.4s, v0.4s | ||
** ret | ||
*/ | ||
|
||
FUNC (v4si) | ||
|
||
/* | ||
** foo_v8hi: | ||
** add v0.8h, v0.8h, v0.8h | ||
** ret | ||
*/ | ||
|
||
FUNC (v8hi) | ||
|
||
/* | ||
** foo_v16qi: | ||
** add v0.16b, v0.16b, v0.16b | ||
** ret | ||
*/ | ||
|
||
FUNC (v16qi) | ||
|
||
/* | ||
** foo_v4hi: | ||
** add v0.4h, v0.4h, v0.4h | ||
** ret | ||
*/ | ||
|
||
FUNC (v4hi) | ||
|
||
/* | ||
** foo_v8qi: | ||
** add v0.8b, v0.8b, v0.8b | ||
** ret | ||
*/ | ||
|
||
FUNC (v8qi) | ||
|