mirrored from git://gcc.gnu.org/git/gcc.git
-
Notifications
You must be signed in to change notification settings - Fork 4.5k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
aarch64: Add vector popcount besides QImode [PR113859]
This patch improves GCC’s vectorization of __builtin_popcount for aarch64 target by adding popcount patterns for vector modes besides QImode, i.e., HImode, SImode and DImode. With this patch, we now generate the following for V8HI: cnt v1.16b, v0.16b uaddlp v2.8h, v1.16b For V4HI, we generate: cnt v1.8b, v0.8b uaddlp v2.4h, v1.8b For V4SI, we generate: cnt v1.16b, v0.16b uaddlp v2.8h, v1.16b uaddlp v3.4s, v2.8h For V4SI with TARGET_DOTPROD, we generate the following instead: movi v0.4s, #0 movi v1.16b, #1 cnt v3.16b, v2.16b udot v0.4s, v3.16b, v1.16b For V2SI, we generate: cnt v1.8b, v.8b uaddlp v2.4h, v1.8b uaddlp v3.2s, v2.4h For V2SI with TARGET_DOTPROD, we generate the following instead: movi v0.8b, #0 movi v1.8b, #1 cnt v3.8b, v2.8b udot v0.2s, v3.8b, v1.8b For V2DI, we generate: cnt v1.16b, v.16b uaddlp v2.8h, v1.16b uaddlp v3.4s, v2.8h uaddlp v4.2d, v3.4s For V4SI with TARGET_DOTPROD, we generate the following instead: movi v0.4s, #0 movi v1.16b, #1 cnt v3.16b, v2.16b udot v0.4s, v3.16b, v1.16b uaddlp v0.2d, v0.4s PR target/113859 gcc/ChangeLog: * config/aarch64/aarch64-simd.md (aarch64_<su>addlp<mode>): Rename to... (@aarch64_<su>addlp<mode>): ... This. (popcount<mode>2): New define_expand. gcc/testsuite/ChangeLog: * gcc.target/aarch64/popcnt-udot.c: New test. * gcc.target/aarch64/popcnt-vec.c: New test. Signed-off-by: Pengxuan Zheng <quic_pzheng@quicinc.com>
- Loading branch information
Showing
3 changed files
with
167 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,58 @@ | ||
/* { dg-do compile } */ | ||
/* { dg-options "-O2 -march=armv8.2-a+dotprod -fno-vect-cost-model -fno-schedule-insns -fno-schedule-insns2" } */ | ||
|
||
/* | ||
** bar: | ||
** movi v([0-9]+).16b, 0x1 | ||
** movi v([0-9]+).4s, 0 | ||
** ldr q([0-9]+), \[x0\] | ||
** cnt v([0-9]+).16b, v\3.16b | ||
** udot v\2.4s, v\4.16b, v\1.16b | ||
** str q\2, \[x1\] | ||
** ret | ||
*/ | ||
void | ||
bar (unsigned int *__restrict b, unsigned int *__restrict d) | ||
{ | ||
d[0] = __builtin_popcount (b[0]); | ||
d[1] = __builtin_popcount (b[1]); | ||
d[2] = __builtin_popcount (b[2]); | ||
d[3] = __builtin_popcount (b[3]); | ||
} | ||
|
||
/* | ||
** bar1: | ||
** movi v([0-9]+).8b, 0x1 | ||
** movi v([0-9]+).2s, 0 | ||
** ldr d([0-9]+), \[x0\] | ||
** cnt v([0-9]+).8b, v\3.8b | ||
** udot v\2.2s, v\4.8b, v\1.8b | ||
** str d\2, \[x1\] | ||
** ret | ||
*/ | ||
void | ||
bar1 (unsigned int *__restrict b, unsigned int *__restrict d) | ||
{ | ||
d[0] = __builtin_popcount (b[0]); | ||
d[1] = __builtin_popcount (b[1]); | ||
} | ||
|
||
/* | ||
** bar2: | ||
** movi v([0-9]+).16b, 0x1 | ||
** movi v([0-9]+).4s, 0 | ||
** ldr q([0-9]+), \[x0\] | ||
** cnt v([0-9]+).16b, v\3.16b | ||
** udot v\2.4s, v\4.16b, v\1.16b | ||
** uaddlp v\2.2d, v\2.4s | ||
** str q\2, \[x1\] | ||
** ret | ||
*/ | ||
void | ||
bar2 (unsigned long long *__restrict b, unsigned long long *__restrict d) | ||
{ | ||
d[0] = __builtin_popcountll (b[0]); | ||
d[1] = __builtin_popcountll (b[1]); | ||
} | ||
|
||
/* { dg-final { check-function-bodies "**" "" "" } } */ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,69 @@ | ||
/* { dg-do compile } */ | ||
/* { dg-options "-O2 -fno-vect-cost-model" } */ | ||
|
||
/* This function should produce cnt v.16b. */ | ||
void | ||
bar (unsigned char *__restrict b, unsigned char *__restrict d) | ||
{ | ||
for (int i = 0; i < 1024; i++) | ||
d[i] = __builtin_popcount (b[i]); | ||
} | ||
|
||
/* This function should produce cnt v.16b and uaddlp (Add Long Pairwise). */ | ||
void | ||
bar1 (unsigned short *__restrict b, unsigned short *__restrict d) | ||
{ | ||
for (int i = 0; i < 1024; i++) | ||
d[i] = __builtin_popcount (b[i]); | ||
} | ||
|
||
/* This function should produce cnt v.16b and 2 uaddlp (Add Long Pairwise). */ | ||
void | ||
bar2 (unsigned int *__restrict b, unsigned int *__restrict d) | ||
{ | ||
for (int i = 0; i < 1024; i++) | ||
d[i] = __builtin_popcount (b[i]); | ||
} | ||
|
||
/* This function should produce cnt v.16b and 3 uaddlp (Add Long Pairwise). */ | ||
void | ||
bar3 (unsigned long long *__restrict b, unsigned long long *__restrict d) | ||
{ | ||
for (int i = 0; i < 1024; i++) | ||
d[i] = __builtin_popcountll (b[i]); | ||
} | ||
|
||
/* SLP | ||
This function should produce cnt v.8b and uaddlp (Add Long Pairwise). */ | ||
void | ||
bar4 (unsigned short *__restrict b, unsigned short *__restrict d) | ||
{ | ||
d[0] = __builtin_popcount (b[0]); | ||
d[1] = __builtin_popcount (b[1]); | ||
d[2] = __builtin_popcount (b[2]); | ||
d[3] = __builtin_popcount (b[3]); | ||
} | ||
|
||
/* SLP | ||
This function should produce cnt v.8b and 2 uaddlp (Add Long Pairwise). */ | ||
void | ||
bar5 (unsigned int *__restrict b, unsigned int *__restrict d) | ||
{ | ||
d[0] = __builtin_popcount (b[0]); | ||
d[1] = __builtin_popcount (b[1]); | ||
} | ||
|
||
/* SLP | ||
This function should produce cnt v.16b and 3 uaddlp (Add Long Pairwise). */ | ||
void | ||
bar6 (unsigned long long *__restrict b, unsigned long long *__restrict d) | ||
{ | ||
d[0] = __builtin_popcountll (b[0]); | ||
d[1] = __builtin_popcountll (b[1]); | ||
} | ||
|
||
/* { dg-final { scan-assembler-not {\tbl\tpopcount} } } */ | ||
/* { dg-final { scan-assembler-times {cnt\t} 7 } } */ | ||
/* { dg-final { scan-assembler-times {uaddlp\t} 12 } } */ | ||
/* { dg-final { scan-assembler-times {ldr\tq} 5 } } */ | ||
/* { dg-final { scan-assembler-times {ldr\td} 2 } } */ |