[NVPTX][NFC] Update tests to use bfloat type #101493

hdelan · 2024-08-01T14:51:24Z

Intrinsics are defined with a bfloat type as of commit 250f2bb, not i16 and i32 storage types. As such declarations are no longer needed once the correct types are used.

llvmbot · 2024-08-01T14:51:56Z

@llvm/pr-subscribers-backend-nvptx

Author: Hugh Delaney (hdelan)

Changes

Intrinsics are defined with a bfloat type, not i16 and i32 storage types. As such declarations are no longer needed once the correct types are used.

Full diff: https://github.com/llvm/llvm-project/pull/101493.diff

2 Files Affected:

(modified) llvm/test/CodeGen/NVPTX/math-intrins-sm80-ptx70-autoupgrade.ll (+48-92)
(modified) llvm/test/CodeGen/NVPTX/math-intrins-sm86-ptx72-autoupgrade.ll (+24-58)

diff --git a/llvm/test/CodeGen/NVPTX/math-intrins-sm80-ptx70-autoupgrade.ll b/llvm/test/CodeGen/NVPTX/math-intrins-sm80-ptx70-autoupgrade.ll
index 34b9c08509326..783fd22b892a9 100644
--- a/llvm/test/CodeGen/NVPTX/math-intrins-sm80-ptx70-autoupgrade.ll
+++ b/llvm/test/CodeGen/NVPTX/math-intrins-sm80-ptx70-autoupgrade.ll
@@ -1,80 +1,36 @@
 ; RUN: llc < %s -march=nvptx64 -mcpu=sm_80 -mattr=+ptx70 | FileCheck %s
 ; RUN: %if ptxas-11.0 %{ llc < %s -march=nvptx64 -mcpu=sm_80 -mattr=+ptx70 | %ptxas-verify -arch=sm_80 %}
 
-declare i16 @llvm.nvvm.abs.bf16(i16)
-declare i32 @llvm.nvvm.abs.bf16x2(i32)
-declare i16 @llvm.nvvm.neg.bf16(i16)
-declare i32 @llvm.nvvm.neg.bf16x2(i32)
-
-declare float @llvm.nvvm.fmin.nan.f(float, float)
-declare float @llvm.nvvm.fmin.ftz.nan.f(float, float)
-declare half @llvm.nvvm.fmin.f16(half, half)
-declare half @llvm.nvvm.fmin.ftz.f16(half, half)
-declare half @llvm.nvvm.fmin.nan.f16(half, half)
-declare half @llvm.nvvm.fmin.ftz.nan.f16(half, half)
-declare <2 x half> @llvm.nvvm.fmin.f16x2(<2 x half>, <2 x half>)
-declare <2 x half> @llvm.nvvm.fmin.ftz.f16x2(<2 x half>, <2 x half>)
-declare <2 x half> @llvm.nvvm.fmin.nan.f16x2(<2 x half>, <2 x half>)
-declare <2 x half> @llvm.nvvm.fmin.ftz.nan.f16x2(<2 x half>, <2 x half>)
-declare i16 @llvm.nvvm.fmin.bf16(i16, i16)
-declare i16 @llvm.nvvm.fmin.nan.bf16(i16, i16)
-declare i32 @llvm.nvvm.fmin.bf16x2(i32, i32)
-declare i32 @llvm.nvvm.fmin.nan.bf16x2(i32, i32)
-
-declare float @llvm.nvvm.fmax.nan.f(float, float)
-declare float @llvm.nvvm.fmax.ftz.nan.f(float, float)
-declare half @llvm.nvvm.fmax.f16(half, half)
-declare half @llvm.nvvm.fmax.ftz.f16(half, half)
-declare half @llvm.nvvm.fmax.nan.f16(half, half)
-declare half @llvm.nvvm.fmax.ftz.nan.f16(half, half)
-declare <2 x half> @llvm.nvvm.fmax.f16x2(<2 x half>, <2 x half>)
-declare <2 x half> @llvm.nvvm.fmax.ftz.f16x2(<2 x half>, <2 x half>)
-declare <2 x half> @llvm.nvvm.fmax.nan.f16x2(<2 x half>, <2 x half>)
-declare <2 x half> @llvm.nvvm.fmax.ftz.nan.f16x2(<2 x half>, <2 x half>)
-declare i16 @llvm.nvvm.fmax.bf16(i16, i16)
-declare i16 @llvm.nvvm.fmax.nan.bf16(i16, i16)
-declare i32 @llvm.nvvm.fmax.bf16x2(i32, i32)
-declare i32 @llvm.nvvm.fmax.nan.bf16x2(i32, i32)
-
-declare half @llvm.nvvm.fma.rn.relu.f16(half, half, half)
-declare half @llvm.nvvm.fma.rn.ftz.relu.f16(half, half, half)
-declare <2 x half> @llvm.nvvm.fma.rn.relu.f16x2(<2 x half>, <2 x half>, <2 x half>)
-declare <2 x half> @llvm.nvvm.fma.rn.ftz.relu.f16x2(<2 x half>, <2 x half>, <2 x half>)
-declare i16 @llvm.nvvm.fma.rn.bf16(i16, i16, i16)
-declare i16 @llvm.nvvm.fma.rn.relu.bf16(i16, i16, i16)
-declare i32 @llvm.nvvm.fma.rn.bf16x2(i32, i32, i32)
-declare i32 @llvm.nvvm.fma.rn.relu.bf16x2(i32, i32, i32)
-
 ; CHECK-LABEL: abs_bf16
-define i16 @abs_bf16(i16 %0) {
+define bfloat @abs_bf16(bfloat %0) {
   ; CHECK-NOT: call
   ; CHECK: abs.bf16
-  %res = call i16 @llvm.nvvm.abs.bf16(i16 %0);
-  ret i16 %res
+  %res = call bfloat @llvm.nvvm.abs.bf16(bfloat %0);
+  ret bfloat %res
 }
 
 ; CHECK-LABEL: abs_bf16x2
-define i32 @abs_bf16x2(i32 %0) {
+define <2 x bfloat> @abs_bf16x2(<2 x bfloat> %0) {
   ; CHECK-NOT: call
   ; CHECK: abs.bf16x2
-  %res = call i32 @llvm.nvvm.abs.bf16x2(i32 %0);
-  ret i32 %res
+  %res = call <2 x bfloat> @llvm.nvvm.abs.bf16x2(<2 x bfloat> %0);
+  ret <2 x bfloat> %res
 }
 
 ; CHECK-LABEL: neg_bf16
-define i16 @neg_bf16(i16 %0) {
+define bfloat @neg_bf16(bfloat %0) {
   ; CHECK-NOT: call
   ; CHECK: neg.bf16
-  %res = call i16 @llvm.nvvm.neg.bf16(i16 %0);
-  ret i16 %res
+  %res = call bfloat @llvm.nvvm.neg.bf16(bfloat %0);
+  ret bfloat %res
 }
 
 ; CHECK-LABEL: neg_bf16x2
-define i32 @neg_bf16x2(i32 %0) {
+define <2 x bfloat> @neg_bf16x2(<2 x bfloat> %0) {
   ; CHECK-NOT: call
   ; CHECK: neg.bf16x2
-  %res = call i32 @llvm.nvvm.neg.bf16x2(i32 %0);
-  ret i32 %res
+  %res = call <2 x bfloat> @llvm.nvvm.neg.bf16x2(<2 x bfloat> %0);
+  ret <2 x bfloat> %res
 }
 
 ; CHECK-LABEL: fmin_nan_f
@@ -158,35 +114,35 @@ define <2 x half> @fmin_ftz_nan_f16x2(<2 x half> %0, <2 x half> %1) {
 }
 
 ; CHECK-LABEL: fmin_bf16
-define i16 @fmin_bf16(i16 %0, i16 %1) {
+define bfloat @fmin_bf16(bfloat %0, bfloat %1) {
   ; CHECK-NOT: call
   ; CHECK: min.bf16
-  %res = call i16 @llvm.nvvm.fmin.bf16(i16 %0, i16 %1)
-  ret i16 %res
+  %res = call bfloat @llvm.nvvm.fmin.bf16(bfloat %0, bfloat %1)
+  ret bfloat %res
 }
 
 ; CHECK-LABEL: fmin_nan_bf16
-define i16 @fmin_nan_bf16(i16 %0, i16 %1) {
+define bfloat @fmin_nan_bf16(bfloat %0, bfloat %1) {
   ; CHECK-NOT: call
   ; CHECK: min.NaN.bf16
-  %res = call i16 @llvm.nvvm.fmin.nan.bf16(i16 %0, i16 %1)
-  ret i16 %res
+  %res = call bfloat @llvm.nvvm.fmin.nan.bf16(bfloat %0, bfloat %1)
+  ret bfloat %res
 }
 
 ; CHECK-LABEL: fmin_bf16x2
-define i32 @fmin_bf16x2(i32 %0, i32 %1) {
+define <2 x bfloat> @fmin_bf16x2(<2 x bfloat> %0, <2 x bfloat> %1) {
   ; CHECK-NOT: call
   ; CHECK: min.bf16x2
-  %res = call i32 @llvm.nvvm.fmin.bf16x2(i32 %0, i32 %1)
-  ret i32 %res
+  %res = call <2 x bfloat> @llvm.nvvm.fmin.bf16x2(<2 x bfloat> %0, <2 x bfloat> %1)
+  ret <2 x bfloat> %res
 }
 
 ; CHECK-LABEL: fmin_nan_bf16x2
-define i32 @fmin_nan_bf16x2(i32 %0, i32 %1) {
+define <2 x bfloat> @fmin_nan_bf16x2(<2 x bfloat> %0, <2 x bfloat> %1) {
   ; CHECK-NOT: call
   ; CHECK: min.NaN.bf16x2
-  %res = call i32 @llvm.nvvm.fmin.nan.bf16x2(i32 %0, i32 %1)
-  ret i32 %res
+  %res = call <2 x bfloat> @llvm.nvvm.fmin.nan.bf16x2(<2 x bfloat> %0, <2 x bfloat> %1)
+  ret <2 x bfloat> %res
 }
 
 ; CHECK-LABEL: fmax_nan_f
@@ -270,35 +226,35 @@ define <2 x half> @fmax_ftz_nan_f16x2(<2 x half> %0, <2 x half> %1) {
 }
 
 ; CHECK-LABEL: fmax_bf16
-define i16 @fmax_bf16(i16 %0, i16 %1) {
+define bfloat @fmax_bf16(bfloat %0, bfloat %1) {
   ; CHECK-NOT: call
   ; CHECK: max.bf16
-  %res = call i16 @llvm.nvvm.fmax.bf16(i16 %0, i16 %1)
-  ret i16 %res
+  %res = call bfloat @llvm.nvvm.fmax.bf16(bfloat %0, bfloat %1)
+  ret bfloat %res
 }
 
 ; CHECK-LABEL: fmax_nan_bf16
-define i16 @fmax_nan_bf16(i16 %0, i16 %1) {
+define bfloat @fmax_nan_bf16(bfloat %0, bfloat %1) {
   ; CHECK-NOT: call
   ; CHECK: max.NaN.bf16
-  %res = call i16 @llvm.nvvm.fmax.nan.bf16(i16 %0, i16 %1)
-  ret i16 %res
+  %res = call bfloat @llvm.nvvm.fmax.nan.bf16(bfloat %0, bfloat %1)
+  ret bfloat %res
 }
 
 ; CHECK-LABEL: fmax_bf16x2
-define i32 @fmax_bf16x2(i32 %0, i32 %1) {
+define <2 x bfloat> @fmax_bf16x2(<2 x bfloat> %0, <2 x bfloat> %1) {
   ; CHECK-NOT: call
   ; CHECK: max.bf16x2
-  %res = call i32 @llvm.nvvm.fmax.bf16x2(i32 %0, i32 %1)
-  ret i32 %res
+  %res = call <2 x bfloat> @llvm.nvvm.fmax.bf16x2(<2 x bfloat> %0, <2 x bfloat> %1)
+  ret <2 x bfloat> %res
 }
 
 ; CHECK-LABEL: fmax_nan_bf16x2
-define i32 @fmax_nan_bf16x2(i32 %0, i32 %1) {
+define <2 x bfloat> @fmax_nan_bf16x2(<2 x bfloat> %0, <2 x bfloat> %1) {
   ; CHECK-NOT: call
   ; CHECK: max.NaN.bf16x2
-  %res = call i32 @llvm.nvvm.fmax.nan.bf16x2(i32 %0, i32 %1)
-  ret i32 %res
+  %res = call <2 x bfloat> @llvm.nvvm.fmax.nan.bf16x2(<2 x bfloat> %0, <2 x bfloat> %1)
+  ret <2 x bfloat> %res
 }
 
 ; CHECK-LABEL: fma_rn_relu_f16
@@ -334,33 +290,33 @@ define <2 x half> @fma_rn_ftz_relu_f16x2(<2 x half> %0, <2 x half> %1, <2 x half
 }
 
 ; CHECK-LABEL: fma_rn_bf16
-define i16 @fma_rn_bf16(i16 %0, i16 %1, i16 %2) {
+define bfloat @fma_rn_bf16(bfloat %0, bfloat %1, bfloat %2) {
   ; CHECK-NOT: call
   ; CHECK: fma.rn.bf16
-  %res = call i16 @llvm.nvvm.fma.rn.bf16(i16 %0, i16 %1, i16 %2)
-  ret i16 %res
+  %res = call bfloat @llvm.nvvm.fma.rn.bf16(bfloat %0, bfloat %1, bfloat %2)
+  ret bfloat %res
 }
 
 ; CHECK-LABEL: fma_rn_relu_bf16
-define i16 @fma_rn_relu_bf16(i16 %0, i16 %1, i16 %2) {
+define bfloat @fma_rn_relu_bf16(bfloat %0, bfloat %1, bfloat %2) {
   ; CHECK-NOT: call
   ; CHECK: fma.rn.relu.bf16
-  %res = call i16 @llvm.nvvm.fma.rn.relu.bf16(i16 %0, i16 %1, i16 %2)
-  ret i16 %res
+  %res = call bfloat @llvm.nvvm.fma.rn.relu.bf16(bfloat %0, bfloat %1, bfloat %2)
+  ret bfloat %res
 }
 
 ; CHECK-LABEL: fma_rn_bf16x2
-define i32 @fma_rn_bf16x2(i32 %0, i32 %1, i32 %2) {
+define <2 x bfloat> @fma_rn_bf16x2(<2 x bfloat> %0, <2 x bfloat> %1, <2 x bfloat> %2) {
   ; CHECK-NOT: call
   ; CHECK: fma.rn.bf16x2
-  %res = call i32 @llvm.nvvm.fma.rn.bf16x2(i32 %0, i32 %1, i32 %2)
-  ret i32 %res
+  %res = call <2 x bfloat> @llvm.nvvm.fma.rn.bf16x2(<2 x bfloat> %0, <2 x bfloat> %1, <2 x bfloat> %2)
+  ret <2 x bfloat> %res
 }
 
 ; CHECK-LABEL: fma_rn_relu_bf16x2
-define i32 @fma_rn_relu_bf16x2(i32 %0, i32 %1, i32 %2) {
+define <2 x bfloat> @fma_rn_relu_bf16x2(<2 x bfloat> %0, <2 x bfloat> %1, <2 x bfloat> %2) {
   ; CHECK-NOT: call
   ; CHECK: fma.rn.relu.bf16x2
-  %res = call i32 @llvm.nvvm.fma.rn.relu.bf16x2(i32 %0, i32 %1, i32 %2)
-  ret i32 %res
+  %res = call <2 x bfloat> @llvm.nvvm.fma.rn.relu.bf16x2(<2 x bfloat> %0, <2 x bfloat> %1, <2 x bfloat> %2)
+  ret <2 x bfloat> %res
 }
diff --git a/llvm/test/CodeGen/NVPTX/math-intrins-sm86-ptx72-autoupgrade.ll b/llvm/test/CodeGen/NVPTX/math-intrins-sm86-ptx72-autoupgrade.ll
index b745df484bab2..4070fac67ac9b 100644
--- a/llvm/test/CodeGen/NVPTX/math-intrins-sm86-ptx72-autoupgrade.ll
+++ b/llvm/test/CodeGen/NVPTX/math-intrins-sm86-ptx72-autoupgrade.ll
@@ -1,40 +1,6 @@
 ; RUN: llc < %s -march=nvptx64 -mcpu=sm_86 -mattr=+ptx72 | FileCheck %s
 ; RUN: %if ptxas-11.2 %{ llc < %s -march=nvptx64 -mcpu=sm_86 -mattr=+ptx72 | %ptxas-verify -arch=sm_86 %}
 
-declare half @llvm.nvvm.fmin.xorsign.abs.f16(half, half)
-declare half @llvm.nvvm.fmin.ftz.xorsign.abs.f16(half, half)
-declare half @llvm.nvvm.fmin.nan.xorsign.abs.f16(half, half)
-declare half @llvm.nvvm.fmin.ftz.nan.xorsign.abs.f16(half, half)
-declare <2 x half> @llvm.nvvm.fmin.xorsign.abs.f16x2(<2 x half> , <2 x half>)
-declare <2 x half> @llvm.nvvm.fmin.ftz.xorsign.abs.f16x2(<2 x half> , <2 x half>)
-declare <2 x half> @llvm.nvvm.fmin.nan.xorsign.abs.f16x2(<2 x half> , <2 x half>)
-declare <2 x half> @llvm.nvvm.fmin.ftz.nan.xorsign.abs.f16x2(<2 x half> , <2 x half>)
-declare i16 @llvm.nvvm.fmin.xorsign.abs.bf16(i16, i16)
-declare i16 @llvm.nvvm.fmin.nan.xorsign.abs.bf16(i16, i16)
-declare i32 @llvm.nvvm.fmin.xorsign.abs.bf16x2(i32, i32)
-declare i32 @llvm.nvvm.fmin.nan.xorsign.abs.bf16x2(i32, i32)
-declare float @llvm.nvvm.fmin.xorsign.abs.f(float, float)
-declare float @llvm.nvvm.fmin.ftz.xorsign.abs.f(float, float)
-declare float @llvm.nvvm.fmin.nan.xorsign.abs.f(float, float)
-declare float @llvm.nvvm.fmin.ftz.nan.xorsign.abs.f(float, float)
-
-declare half @llvm.nvvm.fmax.xorsign.abs.f16(half, half)
-declare half @llvm.nvvm.fmax.ftz.xorsign.abs.f16(half, half)
-declare half @llvm.nvvm.fmax.nan.xorsign.abs.f16(half, half)
-declare half @llvm.nvvm.fmax.ftz.nan.xorsign.abs.f16(half, half)
-declare <2 x half> @llvm.nvvm.fmax.xorsign.abs.f16x2(<2 x half> , <2 x half>)
-declare <2 x half> @llvm.nvvm.fmax.ftz.xorsign.abs.f16x2(<2 x half> , <2 x half>)
-declare <2 x half> @llvm.nvvm.fmax.nan.xorsign.abs.f16x2(<2 x half> , <2 x half>)
-declare <2 x half> @llvm.nvvm.fmax.ftz.nan.xorsign.abs.f16x2(<2 x half> , <2 x half>)
-declare i16 @llvm.nvvm.fmax.xorsign.abs.bf16(i16, i16)
-declare i16 @llvm.nvvm.fmax.nan.xorsign.abs.bf16(i16, i16)
-declare i32 @llvm.nvvm.fmax.xorsign.abs.bf16x2(i32, i32)
-declare i32 @llvm.nvvm.fmax.nan.xorsign.abs.bf16x2(i32, i32)
-declare float @llvm.nvvm.fmax.xorsign.abs.f(float, float)
-declare float @llvm.nvvm.fmax.ftz.xorsign.abs.f(float, float)
-declare float @llvm.nvvm.fmax.nan.xorsign.abs.f(float, float)
-declare float @llvm.nvvm.fmax.ftz.nan.xorsign.abs.f(float, float)
-
 ; CHECK-LABEL: fmin_xorsign_abs_f16
 define half @fmin_xorsign_abs_f16(half %0, half %1) {
   ; CHECK-NOT: call
@@ -100,35 +66,35 @@ define <2 x half> @fmin_ftz_nan_xorsign_abs_f16x2(<2 x half> %0, <2 x half> %1)
 }
 
 ; CHECK-LABEL: fmin_xorsign_abs_bf16
-define i16 @fmin_xorsign_abs_bf16(i16 %0, i16 %1) {
+define bfloat @fmin_xorsign_abs_bf16(bfloat %0, bfloat %1) {
   ; CHECK-NOT: call
   ; CHECK: min.xorsign.abs.bf16
-  %res = call i16 @llvm.nvvm.fmin.xorsign.abs.bf16(i16 %0, i16 %1)
-  ret i16 %res
+  %res = call bfloat @llvm.nvvm.fmin.xorsign.abs.bf16(bfloat %0, bfloat %1)
+  ret bfloat %res
 }
 
 ; CHECK-LABEL: fmin_nan_xorsign_abs_bf16
-define i16 @fmin_nan_xorsign_abs_bf16(i16 %0, i16 %1) {
+define bfloat @fmin_nan_xorsign_abs_bf16(bfloat %0, bfloat %1) {
   ; CHECK-NOT: call
   ; CHECK: min.NaN.xorsign.abs.bf16
-  %res = call i16 @llvm.nvvm.fmin.nan.xorsign.abs.bf16(i16 %0, i16 %1)
-  ret i16 %res
+  %res = call bfloat @llvm.nvvm.fmin.nan.xorsign.abs.bf16(bfloat %0, bfloat %1)
+  ret bfloat %res
 }
 
 ; CHECK-LABEL: fmin_xorsign_abs_bf16x2
-define i32 @fmin_xorsign_abs_bf16x2(i32 %0, i32 %1) {
+define <2 x bfloat> @fmin_xorsign_abs_bf16x2(<2 x bfloat> %0, <2 x bfloat> %1) {
   ; CHECK-NOT: call
   ; CHECK: min.xorsign.abs.bf16x2
-  %res = call i32 @llvm.nvvm.fmin.xorsign.abs.bf16x2(i32 %0, i32 %1)
-  ret i32 %res
+  %res = call <2 x bfloat> @llvm.nvvm.fmin.xorsign.abs.bf16x2(<2 x bfloat> %0, <2 x bfloat> %1)
+  ret <2 x bfloat> %res
 }
 
 ; CHECK-LABEL: fmin_nan_xorsign_abs_bf16x2
-define i32 @fmin_nan_xorsign_abs_bf16x2(i32 %0, i32 %1) {
+define <2 x bfloat> @fmin_nan_xorsign_abs_bf16x2(<2 x bfloat> %0, <2 x bfloat> %1) {
   ; CHECK-NOT: call
   ; CHECK: min.NaN.xorsign.abs.bf16x2
-  %res = call i32 @llvm.nvvm.fmin.nan.xorsign.abs.bf16x2(i32 %0, i32 %1)
-  ret i32 %res
+  %res = call <2 x bfloat> @llvm.nvvm.fmin.nan.xorsign.abs.bf16x2(<2 x bfloat> %0, <2 x bfloat> %1)
+  ret <2 x bfloat> %res
 }
 
 ; CHECK-LABEL: fmin_xorsign_abs_f
@@ -228,35 +194,35 @@ define <2 x half> @fmax_ftz_nan_xorsign_abs_f16x2(<2 x half> %0, <2 x half> %1)
 }
 
 ; CHECK-LABEL: fmax_xorsign_abs_bf16
-define i16 @fmax_xorsign_abs_bf16(i16 %0, i16 %1) {
+define bfloat @fmax_xorsign_abs_bf16(bfloat %0, bfloat %1) {
   ; CHECK-NOT: call
   ; CHECK: max.xorsign.abs.bf16
-  %res = call i16 @llvm.nvvm.fmax.xorsign.abs.bf16(i16 %0, i16 %1)
-  ret i16 %res
+  %res = call bfloat @llvm.nvvm.fmax.xorsign.abs.bf16(bfloat %0, bfloat %1)
+  ret bfloat %res
 }
 
 ; CHECK-LABEL: fmax_nan_xorsign_abs_bf16
-define i16 @fmax_nan_xorsign_abs_bf16(i16 %0, i16 %1) {
+define bfloat @fmax_nan_xorsign_abs_bf16(bfloat %0, bfloat %1) {
   ; CHECK-NOT: call
   ; CHECK: max.NaN.xorsign.abs.bf16
-  %res = call i16 @llvm.nvvm.fmax.nan.xorsign.abs.bf16(i16 %0, i16 %1)
-  ret i16 %res
+  %res = call bfloat @llvm.nvvm.fmax.nan.xorsign.abs.bf16(bfloat %0, bfloat %1)
+  ret bfloat %res
 }
 
 ; CHECK-LABEL: fmax_xorsign_abs_bf16x2
-define i32 @fmax_xorsign_abs_bf16x2(i32 %0, i32 %1) {
+define <2 x bfloat> @fmax_xorsign_abs_bf16x2(<2 x bfloat> %0, <2 x bfloat> %1) {
   ; CHECK-NOT: call
   ; CHECK: max.xorsign.abs.bf16x2
-  %res = call i32 @llvm.nvvm.fmax.xorsign.abs.bf16x2(i32 %0, i32 %1)
-  ret i32 %res
+  %res = call <2 x bfloat> @llvm.nvvm.fmax.xorsign.abs.bf16x2(<2 x bfloat> %0, <2 x bfloat> %1)
+  ret <2 x bfloat> %res
 }
 
 ; CHECK-LABEL: fmax_nan_xorsign_abs_bf16x2
-define i32 @fmax_nan_xorsign_abs_bf16x2(i32 %0, i32 %1) {
+define <2 x bfloat> @fmax_nan_xorsign_abs_bf16x2(<2 x bfloat> %0, <2 x bfloat> %1) {
   ; CHECK-NOT: call
   ; CHECK: max.NaN.xorsign.abs.bf16x2
-  %res = call i32 @llvm.nvvm.fmax.nan.xorsign.abs.bf16x2(i32 %0, i32 %1)
-  ret i32 %res
+  %res = call <2 x bfloat> @llvm.nvvm.fmax.nan.xorsign.abs.bf16x2(<2 x bfloat> %0, <2 x bfloat> %1)
+  ret <2 x bfloat> %res
 }
 
 ; CHECK-LABEL: fmax_xorsign_abs_f

Intrinsics are defined with a bfloat type as of commit 250f2bb, not i16 and i32 storage types. As such declarations are no longer needed once the correct types are used.

justinfargnoli · 2024-08-09T01:00:39Z

LGTM, but please wait 24 hours to merge to give other members of the community time to review this.

hdelan · 2024-08-09T20:53:58Z

Thanks @justinfargnoli @Artem-B would be great if someone could land this as I don’t have commit permissions

justinfargnoli · 2024-08-09T21:02:01Z

@hdelan, if you're interested, here's the documentation on obtaining commit access,

* 'main' of https://github.com/llvm/llvm-project: (700 commits) [SandboxIR][NFC] SingleLLVMInstructionImpl class (llvm#102687) [ThinLTO]Clean up 'import-assume-unique-local' flag. (llvm#102424) [nsan] Make #include more conventional [SandboxIR][NFC] Use Tracker.emplaceIfTracking() [libc] Moved range_reduction_double ifdef statement (llvm#102659) [libc] Fix CFP long double and add tests (llvm#102660) [TargetLowering] Handle vector types in expandFixedPointMul (llvm#102635) [compiler-rt][NFC] Replace environment variable with %t (llvm#102197) [UnitTests] Convert a test to use opaque pointers (llvm#102668) [CodeGen][NFCI] Don't re-implement parts of ASTContext::getIntWidth (llvm#101765) [SandboxIR] Clean up tracking code with the help of emplaceIfTracking() (llvm#102406) [mlir][bazel] remove extra blanks in mlir-tblgen test [NVPTX][NFC] Update tests to use bfloat type (llvm#101493) [mlir] Add support for parsing nested PassPipelineOptions (llvm#101118) [mlir][bazel] add missing td dependency in mlir-tblgen test [flang][cuda] Fix lib dependency [libc] Clean up remaining use of *_WIDTH macros in printf (llvm#102679) [flang][cuda] Convert cuf.alloc for box to fir.alloca in device context (llvm#102662) [SandboxIR] Implement the InsertElementInst class (llvm#102404) [libc] Fix use of cpp::numeric_limits<...>::digits (llvm#102674) [mlir][ODS] Verify type constraints in Types and Attributes (llvm#102326) [LTO] enable `ObjCARCContractPass` only on optimized build (llvm#101114) [mlir][ODS] Consistent `cppType` / `cppClassName` usage (llvm#102657) [lldb] Move definition of SBSaveCoreOptions dtor out of header (llvm#102539) [libc] Use cpp::numeric_limits in preference to C23 <limits.h> macros (llvm#102665) [clang] Implement -fptrauth-auth-traps. (llvm#102417) [LLVM][rtsan] rtsan transform to preserve CFGAnalyses (llvm#102651) Revert "[AMDGPU] Move `AMDGPUAttributorPass` to full LTO post link stage (llvm#102086)" [RISCV][GISel] Add missing tests for G_CTLZ/CTTZ instruction selection. NFC Return available function types for BindingDecls. (llvm#102196) [clang] Wire -fptrauth-returns to "ptrauth-returns" fn attribute. (llvm#102416) [RISCV] Remove riscv-experimental-rv64-legal-i32. (llvm#102509) [RISCV] Move PseudoVSET(I)VLI expansion to use PseudoInstExpansion. (llvm#102496) [NVPTX] support switch statement with brx.idx (reland) (llvm#102550) [libc][newhdrgen]sorted function names in yaml (llvm#102544) [GlobalIsel] Combine G_ADD and G_SUB with constants (llvm#97771) Suppress spurious warnings due to R_RISCV_SET_ULEB128 [scudo] Separated committed and decommitted entries. (llvm#101409) [MIPS] Fix missing ANDI optimization (llvm#97689) [Clang] Add env var for nvptx-arch/amdgpu-arch timeout (llvm#102521) [asan] Switch allocator to dynamic base address (llvm#98511) [AMDGPU] Move `AMDGPUAttributorPass` to full LTO post link stage (llvm#102086) [libc][math][c23] Add fadd{l,f128} C23 math functions (llvm#102531) [mlir][bazel] revert bazel rule change for DLTITransformOps [msan] Support vst{2,3,4}_lane instructions (llvm#101215) Revert "[MLIR][DLTI][Transform] Introduce transform.dlti.query (llvm#101561)" [X86] pr57673.ll - generate MIR test checks [mlir][vector][test] Split tests from vector-transfer-flatten.mlir (llvm#102584) [mlir][bazel] add bazel rule for DLTITransformOps OpenMPOpt: Remove dead include [IR] Add method to GlobalVariable to change type of initializer. (llvm#102553) [flang][cuda] Force default allocator in device code (llvm#102238) [llvm] Construct SmallVector<SDValue> with ArrayRef (NFC) (llvm#102578) [MLIR][DLTI][Transform] Introduce transform.dlti.query (llvm#101561) [AMDGPU][AsmParser][NFC] Remove a misleading comment. (llvm#102604) [Arm][AArch64][Clang] Respect function's branch protection attributes. (llvm#101978) [mlir] Verifier: steal bit to track seen instead of set. (llvm#102626) [Clang] Fix Handling of Init Capture with Parameter Packs in LambdaScopeForCallOperatorInstantiationRAII (llvm#100766) [X86] Convert truncsat clamping patterns to use SDPatternMatch. NFC. [gn] Give two scripts argparse.RawDescriptionHelpFormatter [bazel] Add missing dep for the SPIRVToLLVM target [Clang] Simplify specifying passes via -Xoffload-linker (llvm#102483) [bazel] Port for d45de80 [SelectionDAG] Use unaligned store/load to move AVX registers onto stack for `insertelement` (llvm#82130) [Clang][OMPX] Add the code generation for multi-dim `num_teams` (llvm#101407) [ARM] Regenerate big-endian-vmov.ll. NFC [AMDGPU][AsmParser][NFCI] All NamedIntOperands to be of the i32 type. (llvm#102616) [libc][math][c23] Add totalorderl function. (llvm#102564) [mlir][spirv] Support `memref` in `convert-to-spirv` pass (llvm#102534) [MLIR][GPU-LLVM] Convert `gpu.func` to `llvm.func` (llvm#101664) Fix a unit test input file (llvm#102567) [llvm-readobj][COFF] Dump hybrid objects for ARM64X files. (llvm#102245) AMDGPU/NewPM: Port SIFixSGPRCopies to new pass manager (llvm#102614) [MemoryBuiltins] Simplify getCalledFunction() helper (NFC) [AArch64] Add invalid 1 x vscale costs for reductions and reduction-operations. (llvm#102105) [MemoryBuiltins] Handle allocator attributes on call-site LSV/test/AArch64: add missing lit.local.cfg; fix build (llvm#102607) Revert "Enable logf128 constant folding for hosts with 128bit floats (llvm#96287)" [RISCV] Add Syntacore SCR5 RV32/64 processors definition (llvm#102285) [InstCombine] Remove unnecessary RUN line from test (NFC) [flang][OpenMP] Handle multiple ranges in `num_teams` clause (llvm#102535) [mlir][vector] Add tests for scalable vectors in one-shot-bufferize.mlir (llvm#102361) [mlir][vector] Disable `vector.matrix_multiply` for scalable vectors (llvm#102573) [clang] Implement CWG2627 Bit-fields and narrowing conversions (llvm#78112) [NFC] Use references to avoid copying (llvm#99863) Revert "[mlir][ArmSME] Pattern to swap shape_cast(tranpose) with transpose(shape_cast) (llvm#100731)" (llvm#102457) [IRBuilder] Generate nuw GEPs for struct member accesses (llvm#99538) [bazel] Port for 9b06e25 [CodeGen][NewPM] Improve start/stop pass error message CodeGenPassBuilder (llvm#102591) [AArch64] Implement TRBMPAM_EL1 system register (llvm#102485) [InstCombine] Fixing wrong select folding in vectors with undef elements (llvm#102244) [AArch64] Sink operands to fmuladd. (llvm#102297) LSV: document hang reported in llvm#37865 (llvm#102479) Enable logf128 constant folding for hosts with 128bit floats (llvm#96287) [RISCV][clang] Remove bfloat base type in non-zvfbfmin vcreate (llvm#102146) [RISCV][clang] Add missing `zvfbfmin` to `vget_v` intrinsic (llvm#102149) [mlir][vector] Add mask elimination transform (llvm#99314) [Clang][Interp] Fix display of syntactically-invalid note for member function calls (llvm#102170) [bazel] Port for 3fffa6d [DebugInfo][RemoveDIs] Use iterator-inserters in clang (llvm#102006) ... Signed-off-by: Edwiin Kusuma Jaya <kutemeikito0905@gmail.com>

llvmbot added the backend:NVPTX label Aug 1, 2024

hdelan changed the title ~~[NVPTX][NFC] Update test to use bfloat type~~ [NVPTX][NFC] Update tests to use bfloat type Aug 1, 2024

hdelan force-pushed the cleanup-test branch from d763f1d to a77d4df Compare August 1, 2024 14:55

Remove unneeded declarations in test

88b4b49

Intrinsics are defined with a bfloat type as of commit 250f2bb, not i16 and i32 storage types. As such declarations are no longer needed once the correct types are used.

hdelan force-pushed the cleanup-test branch from a77d4df to 88b4b49 Compare August 1, 2024 15:13

justinfargnoli assigned hdelan Aug 9, 2024

justinfargnoli requested review from justinfargnoli and Artem-B August 9, 2024 00:55

justinfargnoli approved these changes Aug 9, 2024

View reviewed changes

Artem-B approved these changes Aug 9, 2024

View reviewed changes

justinfargnoli merged commit 8a5e179 into llvm:main Aug 9, 2024
8 checks passed

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

[NVPTX][NFC] Update tests to use bfloat type #101493

[NVPTX][NFC] Update tests to use bfloat type #101493

hdelan commented Aug 1, 2024 •

edited by justinfargnoli

Loading

llvmbot commented Aug 1, 2024

justinfargnoli commented Aug 9, 2024 •

edited

Loading

hdelan commented Aug 9, 2024

justinfargnoli commented Aug 9, 2024

[NVPTX][NFC] Update tests to use bfloat type #101493

[NVPTX][NFC] Update tests to use bfloat type #101493

Conversation

hdelan commented Aug 1, 2024 • edited by justinfargnoli Loading

llvmbot commented Aug 1, 2024

justinfargnoli commented Aug 9, 2024 • edited Loading

hdelan commented Aug 9, 2024

justinfargnoli commented Aug 9, 2024

hdelan commented Aug 1, 2024 •

edited by justinfargnoli

Loading

justinfargnoli commented Aug 9, 2024 •

edited

Loading