Skip to content

Commit

Permalink
Solve recently introduce ASCII regression (#80709)
Browse files Browse the repository at this point in the history
* call the helper methods directly to reduce overhead (important for small inputs)

* inline the helper that was supposed to be public into it's only caller
  • Loading branch information
adamsitnik authored Jan 17, 2023
1 parent a024a97 commit f3a0f4f
Show file tree
Hide file tree
Showing 6 changed files with 60 additions and 74 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -193,8 +193,7 @@ private protected sealed override unsafe int GetByteCountFast(char* pChars, int
{
// Unrecognized fallback mechanism - count chars manually.

int firstNonAsciiIndex = Ascii.GetIndexOfFirstNonAsciiChar(new ReadOnlySpan<char>(pChars, charsLength));
byteCount = firstNonAsciiIndex < 0 ? charsLength : firstNonAsciiIndex;
byteCount = (int)Ascii.GetIndexOfFirstNonAsciiChar(pChars, (uint)charsLength);
}

charsConsumed = byteCount;
Expand Down Expand Up @@ -355,8 +354,10 @@ private unsafe int GetBytesCommon(char* pChars, int charCount, byte* pBytes, int
[MethodImpl(MethodImplOptions.AggressiveInlining)] // called directly by GetBytesCommon
private protected sealed override unsafe int GetBytesFast(char* pChars, int charsLength, byte* pBytes, int bytesLength, out int charsConsumed)
{
Ascii.FromUtf16(new ReadOnlySpan<char>(pChars, charsLength), new Span<byte>(pBytes, bytesLength), out charsConsumed);
return charsConsumed;
int bytesWritten = (int)Ascii.NarrowUtf16ToAscii(pChars, pBytes, (uint)Math.Min(charsLength, bytesLength));

charsConsumed = bytesWritten;
return bytesWritten;
}

private protected sealed override unsafe int GetBytesWithFallback(ReadOnlySpan<char> chars, int originalCharsLength, Span<byte> bytes, int originalBytesLength, EncoderNLS? encoder)
Expand Down Expand Up @@ -513,8 +514,7 @@ private protected sealed override unsafe int GetCharCountFast(byte* pBytes, int
{
// Unrecognized fallback mechanism - count bytes manually.

int indexOfFirstNonAscii = Ascii.GetIndexOfFirstNonAsciiByte(new ReadOnlySpan<byte>(pBytes, bytesLength));
charCount = indexOfFirstNonAscii < 0 ? bytesLength : indexOfFirstNonAscii;
charCount = (int)Ascii.GetIndexOfFirstNonAsciiByte(pBytes, (uint)bytesLength);
}

bytesConsumed = charCount;
Expand Down Expand Up @@ -627,7 +627,7 @@ private unsafe int GetCharsCommon(byte* pBytes, int byteCount, char* pChars, int
[MethodImpl(MethodImplOptions.AggressiveInlining)] // called directly by GetCharsCommon
private protected sealed override unsafe int GetCharsFast(byte* pBytes, int bytesLength, char* pChars, int charsLength, out int bytesConsumed)
{
Ascii.ToUtf16(new ReadOnlySpan<byte>(pBytes, bytesLength), new Span<char>(pChars, charsLength), out bytesConsumed);
bytesConsumed = (int)Ascii.WidenAsciiToUtf16(pBytes, pChars, (uint)Math.Min(charsLength, bytesLength));
return bytesConsumed;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,7 @@ private static bool FirstCharInUInt32IsAscii(uint value)
/// </summary>
/// <returns>An ASCII byte is defined as 0x00 - 0x7F, inclusive.</returns>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static unsafe nuint GetIndexOfFirstNonAsciiByte(byte* pBuffer, nuint bufferLength)
internal static unsafe nuint GetIndexOfFirstNonAsciiByte(byte* pBuffer, nuint bufferLength)
{
// If SSE2 is supported, use those specific intrinsics instead of the generic vectorized
// code below. This has two benefits: (a) we can take advantage of specific instructions like
Expand Down Expand Up @@ -617,7 +617,7 @@ private static unsafe nuint GetIndexOfFirstNonAsciiByte_Intrinsified(byte* pBuff
/// </summary>
/// <returns>An ASCII char is defined as 0x0000 - 0x007F, inclusive.</returns>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static unsafe nuint GetIndexOfFirstNonAsciiChar(char* pBuffer, nuint bufferLength /* in chars */)
internal static unsafe nuint GetIndexOfFirstNonAsciiChar(char* pBuffer, nuint bufferLength /* in chars */)
{
// If SSE2/ASIMD is supported, use those specific intrinsics instead of the generic vectorized
// code below. This has two benefits: (a) we can take advantage of specific instructions like
Expand Down Expand Up @@ -1152,7 +1152,7 @@ private static void NarrowTwoUtf16CharsToAsciiAndWriteToBuffer(ref byte outputBu
/// or once <paramref name="elementCount"/> elements have been converted. Returns the total number
/// of elements that were able to be converted.
/// </summary>
private static unsafe nuint NarrowUtf16ToAscii(char* pUtf16Buffer, byte* pAsciiBuffer, nuint elementCount)
internal static unsafe nuint NarrowUtf16ToAscii(char* pUtf16Buffer, byte* pAsciiBuffer, nuint elementCount)
{
nuint currentOffset = 0;

Expand Down Expand Up @@ -1576,7 +1576,7 @@ private static unsafe nuint NarrowUtf16ToAscii_Intrinsified(char* pUtf16Buffer,
/// or once <paramref name="elementCount"/> elements have been converted. Returns the total number
/// of elements that were able to be converted.
/// </summary>
private static unsafe nuint WidenAsciiToUtf16(byte* pAsciiBuffer, char* pUtf16Buffer, nuint elementCount)
internal static unsafe nuint WidenAsciiToUtf16(byte* pAsciiBuffer, char* pUtf16Buffer, nuint elementCount)
{
// Intrinsified in mono interpreter
nuint currentOffset = 0;
Expand Down
56 changes: 20 additions & 36 deletions src/libraries/System.Private.CoreLib/src/System/Text/Ascii.cs
Original file line number Diff line number Diff line change
Expand Up @@ -9,65 +9,49 @@ namespace System.Text
public static partial class Ascii
{
/// <summary>
/// Returns the index of the first non-ASCII byte in a buffer.
/// Determines whether the provided value contains only ASCII bytes.
/// </summary>
/// <param name="buffer">The buffer to scan.</param>
/// <returns>The index in <paramref name="buffer"/> where the first non-ASCII
/// byte appears, or -1 if the buffer contains only ASCII bytes.</returns>
internal static unsafe int GetIndexOfFirstNonAsciiByte(ReadOnlySpan<byte> buffer)
/// <param name="value">The value to inspect.</param>
/// <returns>True if <paramref name="value"/> contains only ASCII bytes or is
/// empty; False otherwise.</returns>
public static unsafe bool IsValid(ReadOnlySpan<byte> value)
{
if (buffer.IsEmpty)
if (value.IsEmpty)
{
return -1;
return true;
}

nuint bufferLength = (uint)buffer.Length;
fixed (byte* pBuffer = &MemoryMarshal.GetReference(buffer))
nuint bufferLength = (uint)value.Length;
fixed (byte* pBuffer = &MemoryMarshal.GetReference(value))
{
nuint idxOfFirstNonAsciiElement = GetIndexOfFirstNonAsciiByte(pBuffer, bufferLength);
Debug.Assert(idxOfFirstNonAsciiElement <= bufferLength);
return (idxOfFirstNonAsciiElement == bufferLength) ? -1 : (int)idxOfFirstNonAsciiElement;
return idxOfFirstNonAsciiElement == bufferLength;
}
}

/// <summary>
/// Returns the index of the first non-ASCII char in a buffer.
/// Determines whether the provided value contains only ASCII chars.
/// </summary>
/// <param name="buffer">The buffer to scan.</param>
/// <returns>The index in <paramref name="buffer"/> where the first non-ASCII
/// char appears, or -1 if the buffer contains only ASCII char.</returns>
internal static unsafe int GetIndexOfFirstNonAsciiChar(ReadOnlySpan<char> buffer)
/// <param name="value">The value to inspect.</param>
/// <returns>True if <paramref name="value"/> contains only ASCII chars or is
/// empty; False otherwise.</returns>
public static unsafe bool IsValid(ReadOnlySpan<char> value)
{
if (buffer.IsEmpty)
if (value.IsEmpty)
{
return -1;
return true;
}

nuint bufferLength = (uint)buffer.Length;
fixed (char* pBuffer = &MemoryMarshal.GetReference(buffer))
nuint bufferLength = (uint)value.Length;
fixed (char* pBuffer = &MemoryMarshal.GetReference(value))
{
nuint idxOfFirstNonAsciiElement = GetIndexOfFirstNonAsciiChar(pBuffer, bufferLength);
Debug.Assert(idxOfFirstNonAsciiElement <= bufferLength);
return (idxOfFirstNonAsciiElement == bufferLength) ? -1 : (int)idxOfFirstNonAsciiElement;
return idxOfFirstNonAsciiElement == bufferLength;
}
}

/// <summary>
/// Determines whether the provided value contains only ASCII bytes.
/// </summary>
/// <param name="value">The value to inspect.</param>
/// <returns>True if <paramref name="value"/> contains only ASCII bytes or is
/// empty; False otherwise.</returns>
public static unsafe bool IsValid(ReadOnlySpan<byte> value) => GetIndexOfFirstNonAsciiByte(value) < 0;

/// <summary>
/// Determines whether the provided value contains only ASCII chars.
/// </summary>
/// <param name="value">The value to inspect.</param>
/// <returns>True if <paramref name="value"/> contains only ASCII chars or is
/// empty; False otherwise.</returns>
public static unsafe bool IsValid(ReadOnlySpan<char> value) => GetIndexOfFirstNonAsciiChar(value) < 0;

/// <summary>
/// Determines whether the provided value is ASCII byte.
/// </summary>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,8 +29,7 @@ internal static unsafe partial class Utf16Utility
// First, we'll handle the common case of all-ASCII. If this is able to
// consume the entire buffer, we'll skip the remainder of this method's logic.

int firstNonAsciiIndex = Ascii.GetIndexOfFirstNonAsciiChar(new ReadOnlySpan<char>(pInputBuffer, inputLength));
int numAsciiCharsConsumedJustNow = firstNonAsciiIndex < 0 ? inputLength : firstNonAsciiIndex;
int numAsciiCharsConsumedJustNow = (int)Ascii.GetIndexOfFirstNonAsciiChar(pInputBuffer, (uint)inputLength);
Debug.Assert(0 <= numAsciiCharsConsumedJustNow && numAsciiCharsConsumedJustNow <= inputLength);

pInputBuffer += (uint)numAsciiCharsConsumedJustNow;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,23 +26,25 @@ public static OperationStatus TranscodeToUtf16(byte* pInputBuffer, int inputLeng
Debug.Assert(pOutputBuffer != null || outputCharsRemaining == 0, "Destination length must be zero if destination buffer pointer is null.");

// First, try vectorized conversion.
OperationStatus status = Ascii.ToUtf16(new ReadOnlySpan<byte>(pInputBuffer, inputLength), new Span<char>(pOutputBuffer, outputCharsRemaining), out int bytesConsumed);
{
nuint numElementsConverted = Ascii.WidenAsciiToUtf16(pInputBuffer, pOutputBuffer, (uint)Math.Min(inputLength, outputCharsRemaining));

pInputBuffer += bytesConsumed;
pOutputBuffer += bytesConsumed;
pInputBuffer += numElementsConverted;
pOutputBuffer += numElementsConverted;

// Quick check - did we just end up consuming the entire input buffer?
// If so, short-circuit the remainder of the method.
// Quick check - did we just end up consuming the entire input buffer?
// If so, short-circuit the remainder of the method.

if (status == OperationStatus.Done)
{
pInputBufferRemaining = pInputBuffer;
pOutputBufferRemaining = pOutputBuffer;
return OperationStatus.Done;
}
if ((int)numElementsConverted == inputLength)
{
pInputBufferRemaining = pInputBuffer;
pOutputBufferRemaining = pOutputBuffer;
return OperationStatus.Done;
}

inputLength -= bytesConsumed;
outputCharsRemaining -= bytesConsumed;
inputLength -= (int)numElementsConverted;
outputCharsRemaining -= (int)numElementsConverted;
}

if (inputLength < sizeof(uint))
{
Expand Down Expand Up @@ -846,23 +848,23 @@ public static OperationStatus TranscodeToUtf8(char* pInputBuffer, int inputLengt
// First, try vectorized conversion.

{
OperationStatus status = Ascii.FromUtf16(new ReadOnlySpan<char>(pInputBuffer, inputLength), new Span<byte>(pOutputBuffer, outputBytesRemaining), out int charsConsumed);
nuint numElementsConverted = Ascii.NarrowUtf16ToAscii(pInputBuffer, pOutputBuffer, (uint)Math.Min(inputLength, outputBytesRemaining));

pInputBuffer += charsConsumed;
pOutputBuffer += charsConsumed;
pInputBuffer += numElementsConverted;
pOutputBuffer += numElementsConverted;

// Quick check - did we just end up consuming the entire input buffer?
// If so, short-circuit the remainder of the method.

if (status == OperationStatus.Done)
if ((int)numElementsConverted == inputLength)
{
pInputBufferRemaining = pInputBuffer;
pOutputBufferRemaining = pOutputBuffer;
return OperationStatus.Done;
}

inputLength -= charsConsumed;
outputBytesRemaining -= charsConsumed;
inputLength -= (int)numElementsConverted;
outputBytesRemaining -= (int)numElementsConverted;
}

if (inputLength < CharsPerDWord)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,19 +27,20 @@ internal static unsafe partial class Utf8Utility
Debug.Assert(pInputBuffer != null || inputLength == 0, "Input length must be zero if input buffer pointer is null.");

// First, try to drain off as many ASCII bytes as we can from the beginning.
int indexOfFirstNonAscii = Ascii.GetIndexOfFirstNonAsciiByte(new ReadOnlySpan<byte>(pInputBuffer, inputLength));
nuint numAsciiBytesCounted = Ascii.GetIndexOfFirstNonAsciiByte(pInputBuffer, (uint)inputLength);
pInputBuffer += numAsciiBytesCounted;

// Quick check - did we just end up consuming the entire input buffer?
// If so, short-circuit the remainder of the method.
if (indexOfFirstNonAscii < 0)

inputLength -= (int)numAsciiBytesCounted;
if (inputLength == 0)
{
utf16CodeUnitCountAdjustment = 0;
scalarCountAdjustment = 0;
return pInputBuffer + inputLength;
return pInputBuffer;
}

pInputBuffer += indexOfFirstNonAscii;
inputLength -= indexOfFirstNonAscii;

#if DEBUG
// Keep these around for final validation at the end of the method.
byte* pOriginalInputBuffer = pInputBuffer;
Expand Down

0 comments on commit f3a0f4f

Please sign in to comment.