From 8fa6d36636ffc01b9ef734d14bb31ee7441d7583 Mon Sep 17 00:00:00 2001 From: Miha Zupan Date: Sat, 14 Sep 2024 01:39:41 +0200 Subject: [PATCH] Improve TeddyHelper.RightShift helpers for AVX512 --- .../AsciiStringSearchValuesTeddyBase.cs | 8 ++--- .../Strings/Helpers/TeddyHelper.cs | 31 +++++++------------ 2 files changed, 16 insertions(+), 23 deletions(-) diff --git a/src/libraries/System.Private.CoreLib/src/System/SearchValues/Strings/AsciiStringSearchValuesTeddyBase.cs b/src/libraries/System.Private.CoreLib/src/System/SearchValues/Strings/AsciiStringSearchValuesTeddyBase.cs index df6c8a90ac47fb..1a0c3d80863aec 100644 --- a/src/libraries/System.Private.CoreLib/src/System/SearchValues/Strings/AsciiStringSearchValuesTeddyBase.cs +++ b/src/libraries/System.Private.CoreLib/src/System/SearchValues/Strings/AsciiStringSearchValuesTeddyBase.cs @@ -154,7 +154,7 @@ protected int IndexOfAnyN2(ReadOnlySpan span) { // The behavior of the rest of the function remains the same if Avx2 or Avx512BW aren't supported #pragma warning disable IntrinsicsInSystemPrivateCoreLibAttributeNotSpecificEnough - if (Vector512.IsHardwareAccelerated && Avx512BW.IsSupported && span.Length >= CharsPerIterationAvx512 + MatchStartOffsetN2) + if (Vector512.IsHardwareAccelerated && Avx512Vbmi.IsSupported && span.Length >= CharsPerIterationAvx512 + MatchStartOffsetN2) { return IndexOfAnyN2Avx512(span); } @@ -174,7 +174,7 @@ protected int IndexOfAnyN3(ReadOnlySpan span) { // The behavior of the rest of the function remains the same if Avx2 or Avx512BW aren't supported #pragma warning disable IntrinsicsInSystemPrivateCoreLibAttributeNotSpecificEnough - if (Vector512.IsHardwareAccelerated && Avx512BW.IsSupported && span.Length >= CharsPerIterationAvx512 + MatchStartOffsetN3) + if (Vector512.IsHardwareAccelerated && Avx512Vbmi.IsSupported && span.Length >= CharsPerIterationAvx512 + MatchStartOffsetN3) { return IndexOfAnyN3Avx512(span); } @@ -296,7 +296,7 @@ private int IndexOfAnyN2Avx2(ReadOnlySpan span) goto ContinueLoop; } - [CompExactlyDependsOn(typeof(Avx512BW))] + [CompExactlyDependsOn(typeof(Avx512Vbmi))] private int IndexOfAnyN2Avx512(ReadOnlySpan span) { // See comments in 'IndexOfAnyN3Vector128' below. @@ -476,7 +476,7 @@ private int IndexOfAnyN3Avx2(ReadOnlySpan span) goto ContinueLoop; } - [CompExactlyDependsOn(typeof(Avx512BW))] + [CompExactlyDependsOn(typeof(Avx512Vbmi))] private int IndexOfAnyN3Avx512(ReadOnlySpan span) { // See comments in 'IndexOfAnyN3Vector128' above. diff --git a/src/libraries/System.Private.CoreLib/src/System/SearchValues/Strings/Helpers/TeddyHelper.cs b/src/libraries/System.Private.CoreLib/src/System/SearchValues/Strings/Helpers/TeddyHelper.cs index a162bad30bcf45..0f734ad4338a25 100644 --- a/src/libraries/System.Private.CoreLib/src/System/SearchValues/Strings/Helpers/TeddyHelper.cs +++ b/src/libraries/System.Private.CoreLib/src/System/SearchValues/Strings/Helpers/TeddyHelper.cs @@ -66,7 +66,7 @@ public static (Vector256 Result, Vector256 Prev0) ProcessInputN2( } [MethodImpl(MethodImplOptions.AggressiveInlining)] - [CompExactlyDependsOn(typeof(Avx512BW))] + [CompExactlyDependsOn(typeof(Avx512Vbmi))] public static (Vector512 Result, Vector512 Prev0) ProcessInputN2( Vector512 input, Vector512 prev0, @@ -183,7 +183,7 @@ public static (Vector256 Result, Vector256 Prev0, Vector256 Pr } [MethodImpl(MethodImplOptions.AggressiveInlining)] - [CompExactlyDependsOn(typeof(Avx512BW))] + [CompExactlyDependsOn(typeof(Avx512Vbmi))] public static (Vector512 Result, Vector512 Prev0, Vector512 Prev1) ProcessInputN3( Vector512 input, Vector512 prev0, Vector512 prev1, @@ -381,7 +381,7 @@ private static Vector256 RightShift2(Vector256 left, Vector256 } [MethodImpl(MethodImplOptions.AggressiveInlining)] - [CompExactlyDependsOn(typeof(Avx512BW))] + [CompExactlyDependsOn(typeof(Avx512Vbmi))] private static Vector512 RightShift1(Vector512 left, Vector512 right) { // Given input vectors like @@ -389,28 +389,21 @@ private static Vector512 RightShift1(Vector512 left, Vector512 // right: 64, 65, 66, 67, 68, 69, ... , 122, 123, 124, 125, 126, 127 // We want to shift the last element of left (63) to be the first element of the result // result: [63], 64, 65, 66, 67, 68, ... , 121, 122, 123, 124, 125, 126 - // - // Avx512BW.AlignRight acts like four separate Ssse3.AlignRight calls on each 128-bit pair of the of the source operands. - // Result of Avx512BW.AlignRight(right, left, 15) is - // lower: [15], 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, [31], 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, - // upper: [47], 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, [63], 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126 - // note how elements at indexes 0, 16, 32 and 48 are off by 48 places. - // We want to read 63 instead of 15, 79 instead of 31, 95 instead of 47, and 111 instead of 63. - // - // Similar to Avx2 above, we create a temporary value where we shift these positions by 48 places - shift 8-byte values by 6 places (PermuteVar8x64x2). - // The indices vector below could be [6, 7, 8, 9, 10, 11, 12, 13], but we only care about the last byte in each 128-bit block (positions with value 0 don't affect the result). - Vector512 leftShifted = Avx512F.PermuteVar8x64x2(left.AsInt64(), Vector512.Create(0, 7, 0, 9, 0, 11, 0, 13), right.AsInt64()).AsByte(); - return Avx512BW.AlignRight(right, leftShifted, 15); + return Avx512Vbmi.PermuteVar64x8x2(left, Vector512.CreateSequence(63, 1), right); } [MethodImpl(MethodImplOptions.AggressiveInlining)] - [CompExactlyDependsOn(typeof(Avx512BW))] + [CompExactlyDependsOn(typeof(Avx512Vbmi))] private static Vector512 RightShift2(Vector512 left, Vector512 right) { - // See comments in 'RightShift1(Vector512 left, Vector512 right)' above. - Vector512 leftShifted = Avx512F.PermuteVar8x64x2(left.AsInt64(), Vector512.Create(0, 7, 0, 9, 0, 11, 0, 13), right.AsInt64()).AsByte(); - return Avx512BW.AlignRight(right, leftShifted, 14); + // Given input vectors like + // left: 0, 1, 2, 3, 4, 5, ... , 58, 59, 60, 61, [62], [63] + // right: 64, 65, 66, 67, 68, 69, ... , 122, 123, 124, 125, 126, 127 + // We want to shift the last two elements of left (62, 63) to be the first elements of the result + // result: [62], [63], 64, 65, 66, 67, 68, ... , 121, 122, 123, 124, 125 + + return Avx512Vbmi.PermuteVar64x8x2(left, Vector512.CreateSequence(62, 1), right); } } }