Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Widen ascii to utf16 #39510

Merged
merged 3 commits into from
Aug 10, 2020
Merged
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
112 changes: 92 additions & 20 deletions src/libraries/System.Private.CoreLib/src/System/Text/ASCIIUtility.cs
Original file line number Diff line number Diff line change
Expand Up @@ -1478,11 +1478,11 @@ public static unsafe nuint WidenAsciiToUtf16(byte* pAsciiBuffer, char* pUtf16Buf
// pmovmskb which we know are optimized, and (b) we can avoid downclocking the processor while
// this method is running.

if (Sse2.IsSupported)
if (BitConverter.IsLittleEndian && (Sse2.IsSupported || AdvSimd.Arm64.IsSupported))
{
if (elementCount >= 2 * (uint)Unsafe.SizeOf<Vector128<byte>>())
{
currentOffset = WidenAsciiToUtf16_Sse2(pAsciiBuffer, pUtf16Buffer, elementCount);
currentOffset = WidenAsciiToUtf16_Intrinsified(pAsciiBuffer, pUtf16Buffer, elementCount);
}
}
else if (Vector.IsHardwareAccelerated)
Expand Down Expand Up @@ -1597,7 +1597,18 @@ public static unsafe nuint WidenAsciiToUtf16(byte* pAsciiBuffer, char* pUtf16Buf
goto Finish;
}

private static unsafe nuint WidenAsciiToUtf16_Sse2(byte* pAsciiBuffer, char* pUtf16Buffer, nuint elementCount)
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static bool ContainsNonAsciiValue(Vector128<byte> value)
{
if (!AdvSimd.Arm64.IsSupported)
{
throw new PlatformNotSupportedException();
}
value = AdvSimd.Arm64.MaxPairwise(value, value);
return (value.AsUInt64().ToScalar() & 0x8080808080808080) != 0;
}

private static unsafe nuint WidenAsciiToUtf16_Intrinsified(byte* pAsciiBuffer, char* pUtf16Buffer, nuint elementCount)
{
// JIT turns the below into constants

Expand All @@ -1608,7 +1619,7 @@ private static unsafe nuint WidenAsciiToUtf16_Sse2(byte* pAsciiBuffer, char* pUt
// jumps as much as possible in the optimistic case of "all ASCII". If we see non-ASCII
// data, we jump out of the hot paths to targets at the end of the method.

Debug.Assert(Sse2.IsSupported);
Debug.Assert(Sse2.IsSupported || AdvSimd.Arm64.IsSupported);
Debug.Assert(BitConverter.IsLittleEndian);
Debug.Assert(elementCount >= 2 * SizeOfVector128);

Expand All @@ -1617,16 +1628,28 @@ private static unsafe nuint WidenAsciiToUtf16_Sse2(byte* pAsciiBuffer, char* pUt

Vector128<byte> asciiVector;
Vector128<byte> utf16FirstHalfVector;
uint mask;
bool containsNonAsciiBytes;

// First, perform an unaligned read of the first part of the input buffer.

asciiVector = Sse2.LoadVector128(pAsciiBuffer); // unaligned load
mask = (uint)Sse2.MoveMask(asciiVector);
if (Sse2.IsSupported)
{
asciiVector = Sse2.LoadVector128(pAsciiBuffer); // unaligned load
containsNonAsciiBytes = (uint)Sse2.MoveMask(asciiVector) != 0;
}
else if (AdvSimd.Arm64.IsSupported)
{
asciiVector = AdvSimd.LoadVector128(pAsciiBuffer);
containsNonAsciiBytes = ContainsNonAsciiValue(asciiVector);
}
else
{
throw new PlatformNotSupportedException();
}

// If there's non-ASCII data in the first 8 elements of the vector, there's nothing we can do.

if ((byte)mask != 0)
if (containsNonAsciiBytes)
{
return 0;
}
Expand All @@ -1635,8 +1658,20 @@ private static unsafe nuint WidenAsciiToUtf16_Sse2(byte* pAsciiBuffer, char* pUt

Vector128<byte> zeroVector = Vector128<byte>.Zero;

utf16FirstHalfVector = Sse2.UnpackLow(asciiVector, zeroVector);
Sse2.Store((byte*)pUtf16Buffer, utf16FirstHalfVector); // unaligned
if (Sse2.IsSupported)
{
utf16FirstHalfVector = Sse2.UnpackLow(asciiVector, zeroVector);
Sse2.Store((byte*)pUtf16Buffer, utf16FirstHalfVector); // unaligned
}
else if (AdvSimd.Arm64.IsSupported)
{
utf16FirstHalfVector = AdvSimd.ZeroExtendWideningLower(asciiVector.GetLower()).AsByte();
AdvSimd.Store((byte*)pUtf16Buffer, utf16FirstHalfVector); // unaligned
}
else
{
throw new PlatformNotSupportedException();
}

// Calculate how many elements we wrote in order to get pOutputBuffer to its next alignment
// point, then use that as the base offset going forward. Remember the >> 1 to account for
Expand All @@ -1658,20 +1693,45 @@ private static unsafe nuint WidenAsciiToUtf16_Sse2(byte* pAsciiBuffer, char* pUt
{
// In a loop, perform an unaligned read, widen to two vectors, then aligned write the two vectors.

asciiVector = Sse2.LoadVector128(pAsciiBuffer + currentOffset); // unaligned load
mask = (uint)Sse2.MoveMask(asciiVector);
if (Sse2.IsSupported)
{
asciiVector = Sse2.LoadVector128(pAsciiBuffer + currentOffset); // unaligned load
containsNonAsciiBytes = (uint)Sse2.MoveMask(asciiVector) != 0;
}
else if (AdvSimd.Arm64.IsSupported)
{
asciiVector = AdvSimd.LoadVector128(pAsciiBuffer + currentOffset);
containsNonAsciiBytes = ContainsNonAsciiValue(asciiVector);
}
else
{
throw new PlatformNotSupportedException();
}

if (mask != 0)
if (containsNonAsciiBytes)
{
// non-ASCII byte somewhere
goto NonAsciiDataSeenInInnerLoop;
}

Vector128<byte> low = Sse2.UnpackLow(asciiVector, zeroVector);
Sse2.StoreAligned((byte*)pCurrentWriteAddress, low);
if (Sse2.IsSupported)
{
Vector128<byte> low = Sse2.UnpackLow(asciiVector, zeroVector);
Sse2.StoreAligned((byte*)pCurrentWriteAddress, low);

Vector128<byte> high = Sse2.UnpackHigh(asciiVector, zeroVector);
Sse2.StoreAligned((byte*)pCurrentWriteAddress + SizeOfVector128, high);
Vector128<byte> high = Sse2.UnpackHigh(asciiVector, zeroVector);
Sse2.StoreAligned((byte*)pCurrentWriteAddress + SizeOfVector128, high);
}
else if (AdvSimd.Arm64.IsSupported)
{
Vector128<ushort> low = AdvSimd.ZeroExtendWideningLower(asciiVector.GetLower());
Vector128<ushort> high = AdvSimd.ZeroExtendWideningUpper(asciiVector);
AdvSimd.Arm64.StorePair((ushort*)pCurrentWriteAddress, low, high);
}
else
{
throw new PlatformNotSupportedException();
}

currentOffset += SizeOfVector128;
pCurrentWriteAddress += SizeOfVector128;
Expand All @@ -1685,11 +1745,23 @@ private static unsafe nuint WidenAsciiToUtf16_Sse2(byte* pAsciiBuffer, char* pUt

// Can we at least widen the first part of the vector?

if ((byte)mask == 0)
if (!containsNonAsciiBytes)
{
// First part was all ASCII, widen
utf16FirstHalfVector = Sse2.UnpackLow(asciiVector, zeroVector);
Sse2.StoreAligned((byte*)(pUtf16Buffer + currentOffset), utf16FirstHalfVector);
if (Sse2.IsSupported)
{
utf16FirstHalfVector = Sse2.UnpackLow(asciiVector, zeroVector);
Sse2.StoreAligned((byte*)(pUtf16Buffer + currentOffset), utf16FirstHalfVector);
}
else if (AdvSimd.Arm64.IsSupported)
{
Vector128<ushort> lower = AdvSimd.ZeroExtendWideningLower(asciiVector.GetLower());
AdvSimd.Store((ushort*)(pUtf16Buffer + currentOffset), lower);
}
else
{
throw new PlatformNotSupportedException();
}
currentOffset += SizeOfVector128 / 2;
}

Expand Down