From b9557bc4e73cf6a25d5674b5704f02b2cabde023 Mon Sep 17 00:00:00 2001 From: Prashanth Govindarajan Date: Mon, 10 Aug 2020 16:48:35 -0700 Subject: [PATCH] Widen ascii to utf16 (#39510) * WidenAsciiToUtf16 * sq * Address nits --- .../src/System/Text/ASCIIUtility.cs | 112 ++++++++++++++---- 1 file changed, 92 insertions(+), 20 deletions(-) diff --git a/src/libraries/System.Private.CoreLib/src/System/Text/ASCIIUtility.cs b/src/libraries/System.Private.CoreLib/src/System/Text/ASCIIUtility.cs index 76075a5e66dc48..0dde692dc68d5e 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Text/ASCIIUtility.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Text/ASCIIUtility.cs @@ -1478,11 +1478,11 @@ public static unsafe nuint WidenAsciiToUtf16(byte* pAsciiBuffer, char* pUtf16Buf // pmovmskb which we know are optimized, and (b) we can avoid downclocking the processor while // this method is running. - if (Sse2.IsSupported) + if (BitConverter.IsLittleEndian && (Sse2.IsSupported || AdvSimd.Arm64.IsSupported)) { if (elementCount >= 2 * (uint)Unsafe.SizeOf>()) { - currentOffset = WidenAsciiToUtf16_Sse2(pAsciiBuffer, pUtf16Buffer, elementCount); + currentOffset = WidenAsciiToUtf16_Intrinsified(pAsciiBuffer, pUtf16Buffer, elementCount); } } else if (Vector.IsHardwareAccelerated) @@ -1597,7 +1597,18 @@ public static unsafe nuint WidenAsciiToUtf16(byte* pAsciiBuffer, char* pUtf16Buf goto Finish; } - private static unsafe nuint WidenAsciiToUtf16_Sse2(byte* pAsciiBuffer, char* pUtf16Buffer, nuint elementCount) + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static bool ContainsNonAsciiByte(Vector128 value) + { + if (!AdvSimd.Arm64.IsSupported) + { + throw new PlatformNotSupportedException(); + } + value = AdvSimd.Arm64.MaxPairwise(value, value); + return (value.AsUInt64().ToScalar() & 0x8080808080808080) != 0; + } + + private static unsafe nuint WidenAsciiToUtf16_Intrinsified(byte* pAsciiBuffer, char* pUtf16Buffer, nuint elementCount) { // JIT turns the below into constants @@ -1608,7 +1619,7 @@ private static unsafe nuint WidenAsciiToUtf16_Sse2(byte* pAsciiBuffer, char* pUt // jumps as much as possible in the optimistic case of "all ASCII". If we see non-ASCII // data, we jump out of the hot paths to targets at the end of the method. - Debug.Assert(Sse2.IsSupported); + Debug.Assert(Sse2.IsSupported || AdvSimd.Arm64.IsSupported); Debug.Assert(BitConverter.IsLittleEndian); Debug.Assert(elementCount >= 2 * SizeOfVector128); @@ -1617,16 +1628,28 @@ private static unsafe nuint WidenAsciiToUtf16_Sse2(byte* pAsciiBuffer, char* pUt Vector128 asciiVector; Vector128 utf16FirstHalfVector; - uint mask; + bool containsNonAsciiBytes; // First, perform an unaligned read of the first part of the input buffer. - asciiVector = Sse2.LoadVector128(pAsciiBuffer); // unaligned load - mask = (uint)Sse2.MoveMask(asciiVector); + if (Sse2.IsSupported) + { + asciiVector = Sse2.LoadVector128(pAsciiBuffer); // unaligned load + containsNonAsciiBytes = (uint)Sse2.MoveMask(asciiVector) != 0; + } + else if (AdvSimd.Arm64.IsSupported) + { + asciiVector = AdvSimd.LoadVector128(pAsciiBuffer); + containsNonAsciiBytes = ContainsNonAsciiByte(asciiVector); + } + else + { + throw new PlatformNotSupportedException(); + } // If there's non-ASCII data in the first 8 elements of the vector, there's nothing we can do. - if ((byte)mask != 0) + if (containsNonAsciiBytes) { return 0; } @@ -1635,8 +1658,20 @@ private static unsafe nuint WidenAsciiToUtf16_Sse2(byte* pAsciiBuffer, char* pUt Vector128 zeroVector = Vector128.Zero; - utf16FirstHalfVector = Sse2.UnpackLow(asciiVector, zeroVector); - Sse2.Store((byte*)pUtf16Buffer, utf16FirstHalfVector); // unaligned + if (Sse2.IsSupported) + { + utf16FirstHalfVector = Sse2.UnpackLow(asciiVector, zeroVector); + Sse2.Store((byte*)pUtf16Buffer, utf16FirstHalfVector); // unaligned + } + else if (AdvSimd.IsSupported) + { + utf16FirstHalfVector = AdvSimd.ZeroExtendWideningLower(asciiVector.GetLower()).AsByte(); + AdvSimd.Store((byte*)pUtf16Buffer, utf16FirstHalfVector); // unaligned + } + else + { + throw new PlatformNotSupportedException(); + } // Calculate how many elements we wrote in order to get pOutputBuffer to its next alignment // point, then use that as the base offset going forward. Remember the >> 1 to account for @@ -1658,20 +1693,45 @@ private static unsafe nuint WidenAsciiToUtf16_Sse2(byte* pAsciiBuffer, char* pUt { // In a loop, perform an unaligned read, widen to two vectors, then aligned write the two vectors. - asciiVector = Sse2.LoadVector128(pAsciiBuffer + currentOffset); // unaligned load - mask = (uint)Sse2.MoveMask(asciiVector); + if (Sse2.IsSupported) + { + asciiVector = Sse2.LoadVector128(pAsciiBuffer + currentOffset); // unaligned load + containsNonAsciiBytes = (uint)Sse2.MoveMask(asciiVector) != 0; + } + else if (AdvSimd.Arm64.IsSupported) + { + asciiVector = AdvSimd.LoadVector128(pAsciiBuffer + currentOffset); + containsNonAsciiBytes = ContainsNonAsciiByte(asciiVector); + } + else + { + throw new PlatformNotSupportedException(); + } - if (mask != 0) + if (containsNonAsciiBytes) { // non-ASCII byte somewhere goto NonAsciiDataSeenInInnerLoop; } - Vector128 low = Sse2.UnpackLow(asciiVector, zeroVector); - Sse2.StoreAligned((byte*)pCurrentWriteAddress, low); + if (Sse2.IsSupported) + { + Vector128 low = Sse2.UnpackLow(asciiVector, zeroVector); + Sse2.StoreAligned((byte*)pCurrentWriteAddress, low); - Vector128 high = Sse2.UnpackHigh(asciiVector, zeroVector); - Sse2.StoreAligned((byte*)pCurrentWriteAddress + SizeOfVector128, high); + Vector128 high = Sse2.UnpackHigh(asciiVector, zeroVector); + Sse2.StoreAligned((byte*)pCurrentWriteAddress + SizeOfVector128, high); + } + else if (AdvSimd.Arm64.IsSupported) + { + Vector128 low = AdvSimd.ZeroExtendWideningLower(asciiVector.GetLower()); + Vector128 high = AdvSimd.ZeroExtendWideningUpper(asciiVector); + AdvSimd.Arm64.StorePair((ushort*)pCurrentWriteAddress, low, high); + } + else + { + throw new PlatformNotSupportedException(); + } currentOffset += SizeOfVector128; pCurrentWriteAddress += SizeOfVector128; @@ -1685,11 +1745,23 @@ private static unsafe nuint WidenAsciiToUtf16_Sse2(byte* pAsciiBuffer, char* pUt // Can we at least widen the first part of the vector? - if ((byte)mask == 0) + if (!containsNonAsciiBytes) { // First part was all ASCII, widen - utf16FirstHalfVector = Sse2.UnpackLow(asciiVector, zeroVector); - Sse2.StoreAligned((byte*)(pUtf16Buffer + currentOffset), utf16FirstHalfVector); + if (Sse2.IsSupported) + { + utf16FirstHalfVector = Sse2.UnpackLow(asciiVector, zeroVector); + Sse2.StoreAligned((byte*)(pUtf16Buffer + currentOffset), utf16FirstHalfVector); + } + else if (AdvSimd.Arm64.IsSupported) + { + Vector128 lower = AdvSimd.ZeroExtendWideningLower(asciiVector.GetLower()); + AdvSimd.Store((ushort*)(pUtf16Buffer + currentOffset), lower); + } + else + { + throw new PlatformNotSupportedException(); + } currentOffset += SizeOfVector128 / 2; }