Skip to content

Commit

Permalink
only apply asm for shuffle for single precision
Browse files Browse the repository at this point in the history
  • Loading branch information
fancyIX committed Nov 1, 2023
1 parent 27648f5 commit ed32af0
Show file tree
Hide file tree
Showing 2 changed files with 7 additions and 6 deletions.
10 changes: 5 additions & 5 deletions src/kernels/level3/xgemm_part1.opencl
Original file line number Diff line number Diff line change
Expand Up @@ -139,15 +139,15 @@ R"(
#endif

#if USE_SUBGROUP_SHUFFLING == 1 && SUBGROUP_SHUFFLING_GCN == 1
#define SUBGROUP_SIZE 32 // Assumes subgroup size is always 4 on AMD GCN GPUs
#define SUBGROUP_SIZE 32 // Assumes subgroup size is always 32 on AMD Navi GPUs
#define NAVI_SHFL(s0, l) \
{ \
__asm ( \
"ds_bpermute_b32 %[dos0], %[ol0], %[os0]\n" \
"ds_bpermute_b32 %[d], %[l], %[s]\n" \
"s_waitcnt lgkmcnt(0)\n" \
: [dos0] "=&v" (s0) \
: [ol0] "v" (l), \
[os0] "0" (s0)); \
: [d] "=&v" (s0) \
: [l] "v" (l), \
[s] "0" (s0)); \
}
#define NAVI_LID() \
if (get_work_dim() == 2) { \
Expand Down
3 changes: 2 additions & 1 deletion src/utilities/compile.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,8 @@ std::shared_ptr<Program> CompileFromSource(
}
}

if (device.IsGPU() && device.IsAMD() && device.Name().find("gfx1") != std::string::npos) {
if (device.IsGPU() && device.IsAMD() && device.Name().find("gfx1") != std::string::npos &&
precision == Precision::kSingle) { // only for Navi cards (gfx1XXX)
header_string += "#define USE_SUBGROUP_SHUFFLING 1\n";
header_string += "#define SUBGROUP_SHUFFLING_GCN 1\n";
}
Expand Down

0 comments on commit ed32af0

Please sign in to comment.