-
Notifications
You must be signed in to change notification settings - Fork 12.8k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[msan] Support most Arm NEON vector shift instructions #102507
Conversation
This adds support for the Arm NEON vector shift instructions that follow the same pattern as x86 (handleVectorShiftIntrinsic). VSLI is not supported because it does not follow the 2-argument pattern expected by handleVectorShiftIntrinsic. This patch also updates the arm64-vshift.ll MSan test that was introduced in llvm@5d0a12d
@llvm/pr-subscribers-compiler-rt-sanitizer @llvm/pr-subscribers-llvm-transforms Author: Thurston Dang (thurstond) ChangesThis adds support for the Arm NEON vector shift instructions that follow the same pattern as x86 (handleVectorShiftIntrinsic). VSLI is not supported because it does not follow the 2-argument pattern expected by handleVectorShiftIntrinsic. This patch also updates the arm64-vshift.ll MSan test that was introduced in 5d0a12d Patch is 222.49 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/102507.diff 2 Files Affected:
diff --git a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
index 45b3edf2b8f231..a1632a93966c89 100644
--- a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
+++ b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
@@ -4116,6 +4116,23 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
case Intrinsic::x86_mmx_psrli_q:
case Intrinsic::x86_mmx_psrai_w:
case Intrinsic::x86_mmx_psrai_d:
+ case Intrinsic::aarch64_neon_rshrn:
+ case Intrinsic::aarch64_neon_sqrshl:
+ case Intrinsic::aarch64_neon_sqrshrn:
+ case Intrinsic::aarch64_neon_sqrshrun:
+ case Intrinsic::aarch64_neon_sqshl:
+ case Intrinsic::aarch64_neon_sqshlu:
+ case Intrinsic::aarch64_neon_sqshrn:
+ case Intrinsic::aarch64_neon_sqshrun:
+ case Intrinsic::aarch64_neon_srshl:
+ case Intrinsic::aarch64_neon_sshl:
+ case Intrinsic::aarch64_neon_uqrshl:
+ case Intrinsic::aarch64_neon_uqrshrn:
+ case Intrinsic::aarch64_neon_uqshl:
+ case Intrinsic::aarch64_neon_uqshrn:
+ case Intrinsic::aarch64_neon_urshl:
+ case Intrinsic::aarch64_neon_ushl:
+ // Not handled here: aarch64_neon_vsli (vector shift left and insert)
handleVectorShiftIntrinsic(I, /* Variable */ false);
break;
case Intrinsic::x86_avx2_psllv_d:
diff --git a/llvm/test/Instrumentation/MemorySanitizer/AArch64/arm64-vshift.ll b/llvm/test/Instrumentation/MemorySanitizer/AArch64/arm64-vshift.ll
index a755562d683fbd..4cc038f03ff2c3 100644
--- a/llvm/test/Instrumentation/MemorySanitizer/AArch64/arm64-vshift.ll
+++ b/llvm/test/Instrumentation/MemorySanitizer/AArch64/arm64-vshift.ll
@@ -35,7 +35,12 @@ define <8 x i8> @sqshl8b(ptr %A, ptr %B) nounwind sanitize_memory {
; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
; CHECK-NEXT: [[_MSLD1:%.*]] = load <8 x i8>, ptr [[TMP12]], align 8
-; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i8> [[_MSLD]], [[_MSLD1]]
+; CHECK-NEXT: [[TMP13:%.*]] = bitcast <8 x i8> [[_MSLD1]] to i64
+; CHECK-NEXT: [[TMP14:%.*]] = icmp ne i64 [[TMP13]], 0
+; CHECK-NEXT: [[TMP15:%.*]] = sext i1 [[TMP14]] to i64
+; CHECK-NEXT: [[TMP16:%.*]] = bitcast i64 [[TMP15]] to <8 x i8>
+; CHECK-NEXT: [[TMP17:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshl.v8i8(<8 x i8> [[_MSLD]], <8 x i8> [[TMP2]])
+; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i8> [[TMP17]], [[TMP16]]
; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i8> @llvm.aarch64.neon.sqshl.v8i8(<8 x i8> [[TMP1]], <8 x i8> [[TMP2]])
; CHECK-NEXT: store <8 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret <8 x i8> [[TMP3]]
@@ -74,7 +79,12 @@ define <4 x i16> @sqshl4h(ptr %A, ptr %B) nounwind sanitize_memory {
; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
; CHECK-NEXT: [[_MSLD1:%.*]] = load <4 x i16>, ptr [[TMP12]], align 8
-; CHECK-NEXT: [[_MSPROP:%.*]] = or <4 x i16> [[_MSLD]], [[_MSLD1]]
+; CHECK-NEXT: [[TMP13:%.*]] = bitcast <4 x i16> [[_MSLD1]] to i64
+; CHECK-NEXT: [[TMP14:%.*]] = icmp ne i64 [[TMP13]], 0
+; CHECK-NEXT: [[TMP15:%.*]] = sext i1 [[TMP14]] to i64
+; CHECK-NEXT: [[TMP16:%.*]] = bitcast i64 [[TMP15]] to <4 x i16>
+; CHECK-NEXT: [[TMP17:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshl.v4i16(<4 x i16> [[_MSLD]], <4 x i16> [[TMP2]])
+; CHECK-NEXT: [[_MSPROP:%.*]] = or <4 x i16> [[TMP17]], [[TMP16]]
; CHECK-NEXT: [[TMP3:%.*]] = call <4 x i16> @llvm.aarch64.neon.sqshl.v4i16(<4 x i16> [[TMP1]], <4 x i16> [[TMP2]])
; CHECK-NEXT: store <4 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret <4 x i16> [[TMP3]]
@@ -113,7 +123,12 @@ define <2 x i32> @sqshl2s(ptr %A, ptr %B) nounwind sanitize_memory {
; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
; CHECK-NEXT: [[_MSLD1:%.*]] = load <2 x i32>, ptr [[TMP12]], align 8
-; CHECK-NEXT: [[_MSPROP:%.*]] = or <2 x i32> [[_MSLD]], [[_MSLD1]]
+; CHECK-NEXT: [[TMP13:%.*]] = bitcast <2 x i32> [[_MSLD1]] to i64
+; CHECK-NEXT: [[TMP14:%.*]] = icmp ne i64 [[TMP13]], 0
+; CHECK-NEXT: [[TMP15:%.*]] = sext i1 [[TMP14]] to i64
+; CHECK-NEXT: [[TMP16:%.*]] = bitcast i64 [[TMP15]] to <2 x i32>
+; CHECK-NEXT: [[TMP17:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqshl.v2i32(<2 x i32> [[_MSLD]], <2 x i32> [[TMP2]])
+; CHECK-NEXT: [[_MSPROP:%.*]] = or <2 x i32> [[TMP17]], [[TMP16]]
; CHECK-NEXT: [[TMP3:%.*]] = call <2 x i32> @llvm.aarch64.neon.sqshl.v2i32(<2 x i32> [[TMP1]], <2 x i32> [[TMP2]])
; CHECK-NEXT: store <2 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret <2 x i32> [[TMP3]]
@@ -152,7 +167,12 @@ define <1 x i64> @sqshl1d(ptr %A, ptr %B) nounwind sanitize_memory {
; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
; CHECK-NEXT: [[_MSLD1:%.*]] = load <1 x i64>, ptr [[TMP12]], align 8
-; CHECK-NEXT: [[_MSPROP:%.*]] = or <1 x i64> [[_MSLD]], [[_MSLD1]]
+; CHECK-NEXT: [[TMP13:%.*]] = bitcast <1 x i64> [[_MSLD1]] to i64
+; CHECK-NEXT: [[TMP14:%.*]] = icmp ne i64 [[TMP13]], 0
+; CHECK-NEXT: [[TMP15:%.*]] = sext i1 [[TMP14]] to i64
+; CHECK-NEXT: [[TMP16:%.*]] = bitcast i64 [[TMP15]] to <1 x i64>
+; CHECK-NEXT: [[TMP17:%.*]] = call <1 x i64> @llvm.aarch64.neon.sqshl.v1i64(<1 x i64> [[_MSLD]], <1 x i64> [[TMP2]])
+; CHECK-NEXT: [[_MSPROP:%.*]] = or <1 x i64> [[TMP17]], [[TMP16]]
; CHECK-NEXT: [[TMP3:%.*]] = call <1 x i64> @llvm.aarch64.neon.sqshl.v1i64(<1 x i64> [[TMP1]], <1 x i64> [[TMP2]])
; CHECK-NEXT: store <1 x i64> [[_MSPROP]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret <1 x i64> [[TMP3]]
@@ -179,7 +199,8 @@ define <1 x i64> @sqshl1d_constant(ptr %A) nounwind sanitize_memory {
; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
; CHECK-NEXT: [[_MSLD:%.*]] = load <1 x i64>, ptr [[TMP6]], align 8
-; CHECK-NEXT: [[_MSPROP:%.*]] = or <1 x i64> [[_MSLD]], zeroinitializer
+; CHECK-NEXT: [[TMP7:%.*]] = call <1 x i64> @llvm.aarch64.neon.sqshl.v1i64(<1 x i64> [[_MSLD]], <1 x i64> <i64 1>)
+; CHECK-NEXT: [[_MSPROP:%.*]] = or <1 x i64> [[TMP7]], zeroinitializer
; CHECK-NEXT: [[TMP3:%.*]] = call <1 x i64> @llvm.aarch64.neon.sqshl.v1i64(<1 x i64> [[TMP1]], <1 x i64> <i64 1>)
; CHECK-NEXT: store <1 x i64> [[_MSPROP]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret <1 x i64> [[TMP3]]
@@ -217,7 +238,10 @@ define i64 @sqshl_scalar(ptr %A, ptr %B) nounwind sanitize_memory {
; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
; CHECK-NEXT: [[_MSLD1:%.*]] = load i64, ptr [[TMP12]], align 8
-; CHECK-NEXT: [[_MSPROP:%.*]] = or i64 [[_MSLD]], [[_MSLD1]]
+; CHECK-NEXT: [[TMP13:%.*]] = icmp ne i64 [[_MSLD1]], 0
+; CHECK-NEXT: [[TMP14:%.*]] = sext i1 [[TMP13]] to i64
+; CHECK-NEXT: [[TMP15:%.*]] = call i64 @llvm.aarch64.neon.sqshl.i64(i64 [[_MSLD]], i64 [[TMP2]])
+; CHECK-NEXT: [[_MSPROP:%.*]] = or i64 [[TMP15]], [[TMP14]]
; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.aarch64.neon.sqshl.i64(i64 [[TMP1]], i64 [[TMP2]])
; CHECK-NEXT: store i64 [[_MSPROP]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret i64 [[TMP3]]
@@ -244,7 +268,8 @@ define i64 @sqshl_scalar_constant(ptr %A) nounwind sanitize_memory {
; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 193514046488576
; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
; CHECK-NEXT: [[_MSLD:%.*]] = load i64, ptr [[TMP6]], align 8
-; CHECK-NEXT: [[_MSPROP:%.*]] = or i64 [[_MSLD]], 0
+; CHECK-NEXT: [[TMP7:%.*]] = call i64 @llvm.aarch64.neon.sqshl.i64(i64 [[_MSLD]], i64 1)
+; CHECK-NEXT: [[_MSPROP:%.*]] = or i64 [[TMP7]], 0
; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.aarch64.neon.sqshl.i64(i64 [[TMP1]], i64 1)
; CHECK-NEXT: store i64 [[_MSPROP]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret i64 [[TMP3]]
@@ -282,7 +307,12 @@ define <8 x i8> @uqshl8b(ptr %A, ptr %B) nounwind sanitize_memory {
; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
; CHECK-NEXT: [[_MSLD1:%.*]] = load <8 x i8>, ptr [[TMP12]], align 8
-; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i8> [[_MSLD]], [[_MSLD1]]
+; CHECK-NEXT: [[TMP13:%.*]] = bitcast <8 x i8> [[_MSLD1]] to i64
+; CHECK-NEXT: [[TMP14:%.*]] = icmp ne i64 [[TMP13]], 0
+; CHECK-NEXT: [[TMP15:%.*]] = sext i1 [[TMP14]] to i64
+; CHECK-NEXT: [[TMP16:%.*]] = bitcast i64 [[TMP15]] to <8 x i8>
+; CHECK-NEXT: [[TMP17:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqshl.v8i8(<8 x i8> [[_MSLD]], <8 x i8> [[TMP2]])
+; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i8> [[TMP17]], [[TMP16]]
; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i8> @llvm.aarch64.neon.uqshl.v8i8(<8 x i8> [[TMP1]], <8 x i8> [[TMP2]])
; CHECK-NEXT: store <8 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret <8 x i8> [[TMP3]]
@@ -321,7 +351,12 @@ define <4 x i16> @uqshl4h(ptr %A, ptr %B) nounwind sanitize_memory {
; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
; CHECK-NEXT: [[_MSLD1:%.*]] = load <4 x i16>, ptr [[TMP12]], align 8
-; CHECK-NEXT: [[_MSPROP:%.*]] = or <4 x i16> [[_MSLD]], [[_MSLD1]]
+; CHECK-NEXT: [[TMP13:%.*]] = bitcast <4 x i16> [[_MSLD1]] to i64
+; CHECK-NEXT: [[TMP14:%.*]] = icmp ne i64 [[TMP13]], 0
+; CHECK-NEXT: [[TMP15:%.*]] = sext i1 [[TMP14]] to i64
+; CHECK-NEXT: [[TMP16:%.*]] = bitcast i64 [[TMP15]] to <4 x i16>
+; CHECK-NEXT: [[TMP17:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqshl.v4i16(<4 x i16> [[_MSLD]], <4 x i16> [[TMP2]])
+; CHECK-NEXT: [[_MSPROP:%.*]] = or <4 x i16> [[TMP17]], [[TMP16]]
; CHECK-NEXT: [[TMP3:%.*]] = call <4 x i16> @llvm.aarch64.neon.uqshl.v4i16(<4 x i16> [[TMP1]], <4 x i16> [[TMP2]])
; CHECK-NEXT: store <4 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret <4 x i16> [[TMP3]]
@@ -360,7 +395,12 @@ define <2 x i32> @uqshl2s(ptr %A, ptr %B) nounwind sanitize_memory {
; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
; CHECK-NEXT: [[_MSLD1:%.*]] = load <2 x i32>, ptr [[TMP12]], align 8
-; CHECK-NEXT: [[_MSPROP:%.*]] = or <2 x i32> [[_MSLD]], [[_MSLD1]]
+; CHECK-NEXT: [[TMP13:%.*]] = bitcast <2 x i32> [[_MSLD1]] to i64
+; CHECK-NEXT: [[TMP14:%.*]] = icmp ne i64 [[TMP13]], 0
+; CHECK-NEXT: [[TMP15:%.*]] = sext i1 [[TMP14]] to i64
+; CHECK-NEXT: [[TMP16:%.*]] = bitcast i64 [[TMP15]] to <2 x i32>
+; CHECK-NEXT: [[TMP17:%.*]] = call <2 x i32> @llvm.aarch64.neon.uqshl.v2i32(<2 x i32> [[_MSLD]], <2 x i32> [[TMP2]])
+; CHECK-NEXT: [[_MSPROP:%.*]] = or <2 x i32> [[TMP17]], [[TMP16]]
; CHECK-NEXT: [[TMP3:%.*]] = call <2 x i32> @llvm.aarch64.neon.uqshl.v2i32(<2 x i32> [[TMP1]], <2 x i32> [[TMP2]])
; CHECK-NEXT: store <2 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret <2 x i32> [[TMP3]]
@@ -399,7 +439,13 @@ define <16 x i8> @sqshl16b(ptr %A, ptr %B) nounwind sanitize_memory {
; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
; CHECK-NEXT: [[_MSLD1:%.*]] = load <16 x i8>, ptr [[TMP12]], align 16
-; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i8> [[_MSLD]], [[_MSLD1]]
+; CHECK-NEXT: [[TMP13:%.*]] = bitcast <16 x i8> [[_MSLD1]] to i128
+; CHECK-NEXT: [[TMP14:%.*]] = trunc i128 [[TMP13]] to i64
+; CHECK-NEXT: [[TMP15:%.*]] = icmp ne i64 [[TMP14]], 0
+; CHECK-NEXT: [[TMP16:%.*]] = sext i1 [[TMP15]] to i128
+; CHECK-NEXT: [[TMP17:%.*]] = bitcast i128 [[TMP16]] to <16 x i8>
+; CHECK-NEXT: [[TMP18:%.*]] = call <16 x i8> @llvm.aarch64.neon.sqshl.v16i8(<16 x i8> [[_MSLD]], <16 x i8> [[TMP2]])
+; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i8> [[TMP18]], [[TMP17]]
; CHECK-NEXT: [[TMP3:%.*]] = call <16 x i8> @llvm.aarch64.neon.sqshl.v16i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]])
; CHECK-NEXT: store <16 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret <16 x i8> [[TMP3]]
@@ -438,7 +484,13 @@ define <8 x i16> @sqshl8h(ptr %A, ptr %B) nounwind sanitize_memory {
; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
; CHECK-NEXT: [[_MSLD1:%.*]] = load <8 x i16>, ptr [[TMP12]], align 16
-; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i16> [[_MSLD]], [[_MSLD1]]
+; CHECK-NEXT: [[TMP13:%.*]] = bitcast <8 x i16> [[_MSLD1]] to i128
+; CHECK-NEXT: [[TMP14:%.*]] = trunc i128 [[TMP13]] to i64
+; CHECK-NEXT: [[TMP15:%.*]] = icmp ne i64 [[TMP14]], 0
+; CHECK-NEXT: [[TMP16:%.*]] = sext i1 [[TMP15]] to i128
+; CHECK-NEXT: [[TMP17:%.*]] = bitcast i128 [[TMP16]] to <8 x i16>
+; CHECK-NEXT: [[TMP18:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqshl.v8i16(<8 x i16> [[_MSLD]], <8 x i16> [[TMP2]])
+; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i16> [[TMP18]], [[TMP17]]
; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i16> @llvm.aarch64.neon.sqshl.v8i16(<8 x i16> [[TMP1]], <8 x i16> [[TMP2]])
; CHECK-NEXT: store <8 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret <8 x i16> [[TMP3]]
@@ -477,7 +529,13 @@ define <4 x i32> @sqshl4s(ptr %A, ptr %B) nounwind sanitize_memory {
; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
; CHECK-NEXT: [[_MSLD1:%.*]] = load <4 x i32>, ptr [[TMP12]], align 16
-; CHECK-NEXT: [[_MSPROP:%.*]] = or <4 x i32> [[_MSLD]], [[_MSLD1]]
+; CHECK-NEXT: [[TMP13:%.*]] = bitcast <4 x i32> [[_MSLD1]] to i128
+; CHECK-NEXT: [[TMP14:%.*]] = trunc i128 [[TMP13]] to i64
+; CHECK-NEXT: [[TMP15:%.*]] = icmp ne i64 [[TMP14]], 0
+; CHECK-NEXT: [[TMP16:%.*]] = sext i1 [[TMP15]] to i128
+; CHECK-NEXT: [[TMP17:%.*]] = bitcast i128 [[TMP16]] to <4 x i32>
+; CHECK-NEXT: [[TMP18:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqshl.v4i32(<4 x i32> [[_MSLD]], <4 x i32> [[TMP2]])
+; CHECK-NEXT: [[_MSPROP:%.*]] = or <4 x i32> [[TMP18]], [[TMP17]]
; CHECK-NEXT: [[TMP3:%.*]] = call <4 x i32> @llvm.aarch64.neon.sqshl.v4i32(<4 x i32> [[TMP1]], <4 x i32> [[TMP2]])
; CHECK-NEXT: store <4 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret <4 x i32> [[TMP3]]
@@ -516,7 +574,13 @@ define <2 x i64> @sqshl2d(ptr %A, ptr %B) nounwind sanitize_memory {
; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
; CHECK-NEXT: [[_MSLD1:%.*]] = load <2 x i64>, ptr [[TMP12]], align 16
-; CHECK-NEXT: [[_MSPROP:%.*]] = or <2 x i64> [[_MSLD]], [[_MSLD1]]
+; CHECK-NEXT: [[TMP13:%.*]] = bitcast <2 x i64> [[_MSLD1]] to i128
+; CHECK-NEXT: [[TMP14:%.*]] = trunc i128 [[TMP13]] to i64
+; CHECK-NEXT: [[TMP15:%.*]] = icmp ne i64 [[TMP14]], 0
+; CHECK-NEXT: [[TMP16:%.*]] = sext i1 [[TMP15]] to i128
+; CHECK-NEXT: [[TMP17:%.*]] = bitcast i128 [[TMP16]] to <2 x i64>
+; CHECK-NEXT: [[TMP18:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqshl.v2i64(<2 x i64> [[_MSLD]], <2 x i64> [[TMP2]])
+; CHECK-NEXT: [[_MSPROP:%.*]] = or <2 x i64> [[TMP18]], [[TMP17]]
; CHECK-NEXT: [[TMP3:%.*]] = call <2 x i64> @llvm.aarch64.neon.sqshl.v2i64(<2 x i64> [[TMP1]], <2 x i64> [[TMP2]])
; CHECK-NEXT: store <2 x i64> [[_MSPROP]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret <2 x i64> [[TMP3]]
@@ -555,7 +619,13 @@ define <16 x i8> @uqshl16b(ptr %A, ptr %B) nounwind sanitize_memory {
; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
; CHECK-NEXT: [[_MSLD1:%.*]] = load <16 x i8>, ptr [[TMP12]], align 16
-; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i8> [[_MSLD]], [[_MSLD1]]
+; CHECK-NEXT: [[TMP13:%.*]] = bitcast <16 x i8> [[_MSLD1]] to i128
+; CHECK-NEXT: [[TMP14:%.*]] = trunc i128 [[TMP13]] to i64
+; CHECK-NEXT: [[TMP15:%.*]] = icmp ne i64 [[TMP14]], 0
+; CHECK-NEXT: [[TMP16:%.*]] = sext i1 [[TMP15]] to i128
+; CHECK-NEXT: [[TMP17:%.*]] = bitcast i128 [[TMP16]] to <16 x i8>
+; CHECK-NEXT: [[TMP18:%.*]] = call <16 x i8> @llvm.aarch64.neon.uqshl.v16i8(<16 x i8> [[_MSLD]], <16 x i8> [[TMP2]])
+; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i8> [[TMP18]], [[TMP17]]
; CHECK-NEXT: [[TMP3:%.*]] = call <16 x i8> @llvm.aarch64.neon.uqshl.v16i8(<16 x i8> [[TMP1]], <16 x i8> [[TMP2]])
; CHECK-NEXT: store <16 x i8> [[_MSPROP]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret <16 x i8> [[TMP3]]
@@ -594,7 +664,13 @@ define <8 x i16> @uqshl8h(ptr %A, ptr %B) nounwind sanitize_memory {
; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
; CHECK-NEXT: [[_MSLD1:%.*]] = load <8 x i16>, ptr [[TMP12]], align 16
-; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i16> [[_MSLD]], [[_MSLD1]]
+; CHECK-NEXT: [[TMP13:%.*]] = bitcast <8 x i16> [[_MSLD1]] to i128
+; CHECK-NEXT: [[TMP14:%.*]] = trunc i128 [[TMP13]] to i64
+; CHECK-NEXT: [[TMP15:%.*]] = icmp ne i64 [[TMP14]], 0
+; CHECK-NEXT: [[TMP16:%.*]] = sext i1 [[TMP15]] to i128
+; CHECK-NEXT: [[TMP17:%.*]] = bitcast i128 [[TMP16]] to <8 x i16>
+; CHECK-NEXT: [[TMP18:%.*]] = call <8 x i16> @llvm.aarch64.neon.uqshl.v8i16(<8 x i16> [[_MSLD]], <8 x i16> [[TMP2]])
+; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i16> [[TMP18]], [[TMP17]]
; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i16> @llvm.aarch64.neon.uqshl.v8i16(<8 x i16> [[TMP1]], <8 x i16> [[TMP2]])
; CHECK-NEXT: store <8 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret <8 x i16> [[TMP3]]
@@ -633,7 +709,13 @@ define <4 x i32> @uqshl4s(ptr %A, ptr %B) nounwind sanitize_memory {
; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
; CHECK-NEXT: [[_MSLD1:%.*]] = load <4 x i32>, ptr [[TMP12]], align 16
-; CHECK-NEXT: [[_MSPROP:%.*]] = or <4 x i32> [[_MSLD]], [[_MSLD1]]
+; CHECK-NEXT: [[TMP13:%.*]] = bitcast <4 x i32> [[_MSLD1]] to i128
+; CHECK-NEXT: [[TMP14:%.*]] = trunc i128 [[TMP13]] to i64
+; CHECK-NEXT: [[TMP15:%.*]] = icmp ne i64 [[TMP14]], 0
+; CHECK-NEXT: [[TMP16:%.*]] = sext i1 [[TMP15]] to i128
+; CHECK-NEXT: [[TMP17:%.*]] = bitcast i128 [[TMP16]] to <4 x i32>
+; CHECK-NEXT: [[TMP18:%.*]] = call <4 x i32> @llvm.aarch64.neon.uqshl.v4i32(<4 x i32> [[_MSLD]], <4 x i32> [[TMP2]])
+; CHECK-NEXT: [[_MSPROP:%.*]] = or <4 x i32> [[TMP18]], [[TMP17]]
; CHECK-NEXT: [[TMP3:%.*]] = call <4 x i32> @llvm.aarch64.neon.uqshl.v4i32(<4 x i32> [[TMP1]], <4 x i32> [[TMP2]])
; CHECK-NEXT: store <4 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret <4 x i32> [[TMP3]]
@@ -672,7 +754,13 @@ define <2 x i64> @uqshl2d(ptr %A, ptr %B) nounwind sanitize_memory {
; CHECK-NEXT: [[TMP11:%.*]] = xor i64 [[TMP10]], 193514046488576
; CHECK-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP11]] to ptr
; CHECK-NEXT: [[_MSLD1:%.*]] = load <2 x i64>, ptr [[TMP12]], align 16
-; CHECK-NEXT: [[_MSPROP:%.*]] = or <2 x i64> [[_MSLD]], [[_MSLD1]]
+; CHECK-NEXT: [[TMP13:%.*]] = bitcast <2 x i64> [[_MSLD1]] to i128
+; CHECK-NEXT: [[TMP14:%.*]] = trunc i128 [[TMP13]] to i64
+; CHECK-NEXT: [[TMP15:%.*]] = icmp ne i64 [[TMP14]], 0
+; CHECK-NEXT: [[TMP16:%.*]] = sext i1 [[TMP15]] to i128
+; CHECK-NEXT: [[TMP17:%.*]] = bitcast i128 [[TMP16]] to <2 x i64>
+; CHECK-NEXT: [[TMP18:%.*]] = call <2 x i64> @llvm.aarch64.neon.uqshl.v2i64(<2 x i64> [[_MSLD]], <2 x i64> [[TMP2]])
+; CHECK-NEXT: [[_MSPROP:%.*]] = or <2 x i64> [[TMP18]], [[TMP17]]
; CHECK-NEXT: [[TMP3:%.*]] = call <2 x i64> @llvm.aarch64.neon.uqshl.v2i64(<2 x i64> [[TMP1]], <2 x i64> [[TMP2]])
; CHECK-NEXT: store <2 x i64> [[_MSPROP]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret <2 x i64> [[TMP3]]
@@ -711,7 +799,12 @@ define <1 x i64> @uqshl1d(ptr %A, ptr %B) nounwind sanitize_memory {
; CHECK-NEXT: [[TMP11:%.*]] = xo...
[truncated]
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/123/builds/3344 Here is the relevant piece of the build log for the reference:
|
This adds support for the Arm NEON vector shift instructions that follow the same pattern as x86 (handleVectorShiftIntrinsic).
VSLI is not supported because it does not follow the 2-argument pattern expected by handleVectorShiftIntrinsic.
This patch also updates the arm64-vshift.ll MSan test that was introduced in 5d0a12d