Skip to content

Commit

Permalink
Rewrite SpanHelpers.IndexOfNullByte/IndexOfNullCharacter to unmanaged…
Browse files Browse the repository at this point in the history
… pointers (#81347)
  • Loading branch information
EgorBo authored Jan 31, 2023
1 parent e831dab commit 0043b6e
Show file tree
Hide file tree
Showing 3 changed files with 48 additions and 68 deletions.
52 changes: 26 additions & 26 deletions src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Byte.cs
Original file line number Diff line number Diff line change
Expand Up @@ -343,7 +343,7 @@ private static void ThrowMustBeNullTerminatedString()
// IndexOfNullByte processes memory in aligned chunks, and thus it won't crash even if it accesses memory beyond the null terminator.
// This behavior is an implementation detail of the runtime and callers outside System.Private.CoreLib must not depend on it.
[MethodImpl(MethodImplOptions.AggressiveOptimization)]
internal static unsafe int IndexOfNullByte(ref byte searchSpace)
internal static unsafe int IndexOfNullByte(byte* searchSpace)
{
const int Length = int.MaxValue;

Expand All @@ -354,32 +354,32 @@ internal static unsafe int IndexOfNullByte(ref byte searchSpace)
if (Vector128.IsHardwareAccelerated)
{
// Avx2 branch also operates on Sse2 sizes, so check is combined.
lengthToExamine = UnalignedCountVector128(ref searchSpace);
lengthToExamine = UnalignedCountVector128(searchSpace);
}
else if (Vector.IsHardwareAccelerated)
{
lengthToExamine = UnalignedCountVector(ref searchSpace);
lengthToExamine = UnalignedCountVector(searchSpace);
}
SequentialScan:
while (lengthToExamine >= 8)
{
lengthToExamine -= 8;

if (uValue == Unsafe.AddByteOffset(ref searchSpace, offset))
if (uValue == searchSpace[offset])
goto Found;
if (uValue == Unsafe.AddByteOffset(ref searchSpace, offset + 1))
if (uValue == searchSpace[offset + 1])
goto Found1;
if (uValue == Unsafe.AddByteOffset(ref searchSpace, offset + 2))
if (uValue == searchSpace[offset + 2])
goto Found2;
if (uValue == Unsafe.AddByteOffset(ref searchSpace, offset + 3))
if (uValue == searchSpace[offset + 3])
goto Found3;
if (uValue == Unsafe.AddByteOffset(ref searchSpace, offset + 4))
if (uValue == searchSpace[offset + 4])
goto Found4;
if (uValue == Unsafe.AddByteOffset(ref searchSpace, offset + 5))
if (uValue == searchSpace[offset + 5])
goto Found5;
if (uValue == Unsafe.AddByteOffset(ref searchSpace, offset + 6))
if (uValue == searchSpace[offset + 6])
goto Found6;
if (uValue == Unsafe.AddByteOffset(ref searchSpace, offset + 7))
if (uValue == searchSpace[offset + 7])
goto Found7;

offset += 8;
Expand All @@ -389,13 +389,13 @@ internal static unsafe int IndexOfNullByte(ref byte searchSpace)
{
lengthToExamine -= 4;

if (uValue == Unsafe.AddByteOffset(ref searchSpace, offset))
if (uValue == searchSpace[offset])
goto Found;
if (uValue == Unsafe.AddByteOffset(ref searchSpace, offset + 1))
if (uValue == searchSpace[offset + 1])
goto Found1;
if (uValue == Unsafe.AddByteOffset(ref searchSpace, offset + 2))
if (uValue == searchSpace[offset + 2])
goto Found2;
if (uValue == Unsafe.AddByteOffset(ref searchSpace, offset + 3))
if (uValue == searchSpace[offset + 3])
goto Found3;

offset += 4;
Expand All @@ -405,7 +405,7 @@ internal static unsafe int IndexOfNullByte(ref byte searchSpace)
{
lengthToExamine -= 1;

if (uValue == Unsafe.AddByteOffset(ref searchSpace, offset))
if (uValue == searchSpace[offset])
goto Found;

offset += 1;
Expand All @@ -418,13 +418,13 @@ internal static unsafe int IndexOfNullByte(ref byte searchSpace)
{
if (offset < (nuint)(uint)Length)
{
if ((((nuint)(uint)Unsafe.AsPointer(ref searchSpace) + offset) & (nuint)(Vector256<byte>.Count - 1)) != 0)
if ((((nuint)(uint)searchSpace + offset) & (nuint)(Vector256<byte>.Count - 1)) != 0)
{
// Not currently aligned to Vector256 (is aligned to Vector128); this can cause a problem for searches
// with no upper bound e.g. String.strlen.
// Start with a check on Vector128 to align to Vector256, before moving to processing Vector256.
// This ensures we do not fault across memory pages while searching for an end of string.
Vector128<byte> search = Vector128.LoadUnsafe(ref searchSpace, offset);
Vector128<byte> search = Vector128.Load(searchSpace + offset);

// Same method as below
uint matches = Vector128.Equals(Vector128<byte>.Zero, search).ExtractMostSignificantBits();
Expand All @@ -445,7 +445,7 @@ internal static unsafe int IndexOfNullByte(ref byte searchSpace)
{
do
{
Vector256<byte> search = Vector256.LoadUnsafe(ref searchSpace, offset);
Vector256<byte> search = Vector256.Load(searchSpace + offset);
uint matches = Vector256.Equals(Vector256<byte>.Zero, search).ExtractMostSignificantBits();
// Note that MoveMask has converted the equal vector elements into a set of bit flags,
// So the bit position in 'matches' corresponds to the element offset.
Expand All @@ -464,7 +464,7 @@ internal static unsafe int IndexOfNullByte(ref byte searchSpace)
lengthToExamine = GetByteVector128SpanLength(offset, Length);
if (lengthToExamine > offset)
{
Vector128<byte> search = Vector128.LoadUnsafe(ref searchSpace, offset);
Vector128<byte> search = Vector128.Load(searchSpace + offset);

// Same method as above
uint matches = Vector128.Equals(Vector128<byte>.Zero, search).ExtractMostSignificantBits();
Expand Down Expand Up @@ -495,7 +495,7 @@ internal static unsafe int IndexOfNullByte(ref byte searchSpace)

while (lengthToExamine > offset)
{
Vector128<byte> search = Vector128.LoadUnsafe(ref searchSpace, offset);
Vector128<byte> search = Vector128.Load(searchSpace + offset);

// Same method as above
Vector128<byte> compareResult = Vector128.Equals(Vector128<byte>.Zero, search);
Expand Down Expand Up @@ -526,7 +526,7 @@ internal static unsafe int IndexOfNullByte(ref byte searchSpace)

while (lengthToExamine > offset)
{
var matches = Vector.Equals(Vector<byte>.Zero, LoadVector(ref searchSpace, offset));
Vector<byte> matches = Vector.Equals(Vector<byte>.Zero, Vector.Load(searchSpace + offset));
if (Vector<byte>.Zero.Equals(matches))
{
offset += (nuint)Vector<byte>.Count;
Expand Down Expand Up @@ -1123,16 +1123,16 @@ private static nuint GetByteVector256SpanLength(nuint offset, int length)
=> (nuint)(uint)((length - (int)offset) & ~(Vector256<byte>.Count - 1));

[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static unsafe nuint UnalignedCountVector(ref byte searchSpace)
private static unsafe nuint UnalignedCountVector(byte* searchSpace)
{
nint unaligned = (nint)Unsafe.AsPointer(ref searchSpace) & (Vector<byte>.Count - 1);
nint unaligned = (nint)searchSpace & (Vector<byte>.Count - 1);
return (nuint)((Vector<byte>.Count - unaligned) & (Vector<byte>.Count - 1));
}

[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static unsafe nuint UnalignedCountVector128(ref byte searchSpace)
private static unsafe nuint UnalignedCountVector128(byte* searchSpace)
{
nint unaligned = (nint)Unsafe.AsPointer(ref searchSpace) & (Vector128<byte>.Count - 1);
nint unaligned = (nint)searchSpace & (Vector128<byte>.Count - 1);
return (nuint)(uint)((Vector128<byte>.Count - unaligned) & (Vector128<byte>.Count - 1));
}

Expand Down
60 changes: 20 additions & 40 deletions src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Char.cs
Original file line number Diff line number Diff line change
Expand Up @@ -425,28 +425,28 @@ public static unsafe int SequenceCompareTo(ref char first, int firstLength, ref
// IndexOfNullCharacter processes memory in aligned chunks, and thus it won't crash even if it accesses memory beyond the null terminator.
// This behavior is an implementation detail of the runtime and callers outside System.Private.CoreLib must not depend on it.
[MethodImpl(MethodImplOptions.AggressiveOptimization)]
public static unsafe int IndexOfNullCharacter(ref char searchSpace)
public static unsafe int IndexOfNullCharacter(char* searchSpace)
{
const char value = '\0';
const int length = int.MaxValue;

nint offset = 0;
nint lengthToExamine = length;

if (((int)Unsafe.AsPointer(ref searchSpace) & 1) != 0)
if (((int)searchSpace & 1) != 0)
{
// Input isn't char aligned, we won't be able to align it to a Vector
}
else if (Vector128.IsHardwareAccelerated)
{
// Avx2 branch also operates on Sse2 sizes, so check is combined.
// Needs to be double length to allow us to align the data first.
lengthToExamine = UnalignedCountVector128(ref searchSpace);
lengthToExamine = UnalignedCountVector128(searchSpace);
}
else if (Vector.IsHardwareAccelerated)
{
// Needs to be double length to allow us to align the data first.
lengthToExamine = UnalignedCountVector(ref searchSpace);
lengthToExamine = UnalignedCountVector(searchSpace);
}

SequentialScan:
Expand All @@ -456,15 +456,13 @@ public static unsafe int IndexOfNullCharacter(ref char searchSpace)
// remaining data that is shorter than a Vector length.
while (lengthToExamine >= 4)
{
ref char current = ref Unsafe.Add(ref searchSpace, offset);

if (value == current)
if (value == searchSpace[offset])
goto Found;
if (value == Unsafe.Add(ref current, 1))
if (value == searchSpace[offset + 1])
goto Found1;
if (value == Unsafe.Add(ref current, 2))
if (value == searchSpace[offset + 2])
goto Found2;
if (value == Unsafe.Add(ref current, 3))
if (value == searchSpace[offset + 3])
goto Found3;

offset += 4;
Expand All @@ -473,7 +471,7 @@ public static unsafe int IndexOfNullCharacter(ref char searchSpace)

while (lengthToExamine > 0)
{
if (value == Unsafe.Add(ref searchSpace, offset))
if (value == searchSpace[offset])
goto Found;

offset++;
Expand All @@ -487,25 +485,19 @@ public static unsafe int IndexOfNullCharacter(ref char searchSpace)
if (offset < length)
{
Debug.Assert(length - offset >= Vector128<ushort>.Count);
ref ushort ushortSearchSpace = ref Unsafe.As<char, ushort>(ref searchSpace);
if (((nint)Unsafe.AsPointer(ref Unsafe.Add(ref searchSpace, (nint)offset)) & (nint)(Vector256<byte>.Count - 1)) != 0)
if (((nint)(searchSpace + (nint)offset) & (nint)(Vector256<byte>.Count - 1)) != 0)
{
// Not currently aligned to Vector256 (is aligned to Vector128); this can cause a problem for searches
// with no upper bound e.g. String.wcslen. Start with a check on Vector128 to align to Vector256,
// before moving to processing Vector256.

// If the input searchSpan has been fixed or pinned, this ensures we do not fault across memory pages
// This ensures we do not fault across memory pages
// while searching for an end of string. Specifically that this assumes that the length is either correct
// or that the data is pinned otherwise it may cause an AccessViolation from crossing a page boundary into an
// unowned page. If the search is unbounded (e.g. null terminator in wcslen) and the search value is not found,
// again this will likely cause an AccessViolation. However, correctly bounded searches will return -1 rather
// than ever causing an AV.

// If the searchSpan has not been fixed or pinned the GC can relocate it during the execution of this
// method, so the alignment only acts as best endeavour. The GC cost is likely to dominate over
// the misalignment that may occur after; to we default to giving the GC a free hand to relocate and
// its up to the caller whether they are operating over fixed data.
Vector128<ushort> search = Vector128.LoadUnsafe(ref ushortSearchSpace, (nuint)offset);
Vector128<ushort> search = *(Vector128<ushort>*)(searchSpace + (nuint)offset);

// Same method as below
uint matches = Vector128.Equals(Vector128<ushort>.Zero, search).AsByte().ExtractMostSignificantBits();
Expand All @@ -528,7 +520,7 @@ public static unsafe int IndexOfNullCharacter(ref char searchSpace)
{
Debug.Assert(lengthToExamine >= Vector256<ushort>.Count);

Vector256<ushort> search = Vector256.LoadUnsafe(ref ushortSearchSpace, (nuint)offset);
Vector256<ushort> search = *(Vector256<ushort>*)(searchSpace + (nuint)offset);
uint matches = Vector256.Equals(Vector256<ushort>.Zero, search).AsByte().ExtractMostSignificantBits();
// Note that MoveMask has converted the equal vector elements into a set of bit flags,
// So the bit position in 'matches' corresponds to the element offset.
Expand All @@ -551,7 +543,7 @@ public static unsafe int IndexOfNullCharacter(ref char searchSpace)
{
Debug.Assert(lengthToExamine >= Vector128<ushort>.Count);

Vector128<ushort> search = Vector128.LoadUnsafe(ref ushortSearchSpace, (nuint)offset);
Vector128<ushort> search = *(Vector128<ushort>*)(searchSpace + (nuint)offset);

// Same method as above
uint matches = Vector128.Equals(Vector128<ushort>.Zero, search).AsByte().ExtractMostSignificantBits();
Expand Down Expand Up @@ -581,7 +573,6 @@ public static unsafe int IndexOfNullCharacter(ref char searchSpace)
if (offset < length)
{
Debug.Assert(length - offset >= Vector128<ushort>.Count);
ref ushort ushortSearchSpace = ref Unsafe.As<char, ushort>(ref searchSpace);

lengthToExamine = GetCharVector128SpanLength(offset, length);
if (lengthToExamine > 0)
Expand All @@ -590,7 +581,7 @@ public static unsafe int IndexOfNullCharacter(ref char searchSpace)
{
Debug.Assert(lengthToExamine >= Vector128<ushort>.Count);

Vector128<ushort> search = Vector128.LoadUnsafe(ref ushortSearchSpace, (uint)offset);
Vector128<ushort> search = *(Vector128<ushort>*)(searchSpace + (nuint)offset);

// Same method as above
Vector128<ushort> compareResult = Vector128.Equals(Vector128<ushort>.Zero, search);
Expand Down Expand Up @@ -630,7 +621,7 @@ public static unsafe int IndexOfNullCharacter(ref char searchSpace)
{
Debug.Assert(lengthToExamine >= Vector<ushort>.Count);

var matches = Vector.Equals(Vector<ushort>.Zero, LoadVector(ref searchSpace, offset));
var matches = Vector.Equals(Vector<ushort>.Zero, *(Vector<ushort>*)(searchSpace + (nuint)offset));
if (Vector<ushort>.Zero.Equals(matches))
{
offset += Vector<ushort>.Count;
Expand Down Expand Up @@ -708,28 +699,17 @@ private static nint GetCharVector256SpanLength(nint offset, nint length)
=> (length - offset) & ~(Vector256<ushort>.Count - 1);

[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static unsafe nint UnalignedCountVector(ref char searchSpace)
private static unsafe nint UnalignedCountVector(char* searchSpace)
{
const int ElementsPerByte = sizeof(ushort) / sizeof(byte);
// Figure out how many characters to read sequentially until we are vector aligned
// This is equivalent to:
// unaligned = ((int)pCh % sizeof(Vector<ushort>) / ElementsPerByte
// length = (Vector<ushort>.Count - unaligned) % Vector<ushort>.Count

// This alignment is only valid if the GC does not relocate; so we use ReadUnaligned to get the data.
// If a GC does occur and alignment is lost, the GC cost will outweigh any gains from alignment so it
// isn't too important to pin to maintain the alignment.
return (nint)(uint)(-(int)Unsafe.AsPointer(ref searchSpace) / ElementsPerByte) & (Vector<ushort>.Count - 1);
return (nint)(uint)(-(int)searchSpace / ElementsPerByte) & (Vector<ushort>.Count - 1);
}

[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static unsafe nint UnalignedCountVector128(ref char searchSpace)
private static unsafe nint UnalignedCountVector128(char* searchSpace)
{
const int ElementsPerByte = sizeof(ushort) / sizeof(byte);
// This alignment is only valid if the GC does not relocate; so we use ReadUnaligned to get the data.
// If a GC does occur and alignment is lost, the GC cost will outweigh any gains from alignment so it
// isn't too important to pin to maintain the alignment.
return (nint)(uint)(-(int)Unsafe.AsPointer(ref searchSpace) / ElementsPerByte) & (Vector128<ushort>.Count - 1);
return (nint)(uint)(-(int)searchSpace / ElementsPerByte) & (Vector128<ushort>.Count - 1);
}

public static void Reverse(ref char buf, nuint length)
Expand Down
4 changes: 2 additions & 2 deletions src/libraries/System.Private.CoreLib/src/System/String.cs
Original file line number Diff line number Diff line change
Expand Up @@ -575,9 +575,9 @@ public StringRuneEnumerator EnumerateRunes()
return new StringRuneEnumerator(this);
}

internal static unsafe int wcslen(char* ptr) => SpanHelpers.IndexOfNullCharacter(ref *ptr);
internal static unsafe int wcslen(char* ptr) => SpanHelpers.IndexOfNullCharacter(ptr);

internal static unsafe int strlen(byte* ptr) => SpanHelpers.IndexOfNullByte(ref *ptr);
internal static unsafe int strlen(byte* ptr) => SpanHelpers.IndexOfNullByte(ptr);

//
// IConvertible implementation
Expand Down

0 comments on commit 0043b6e

Please sign in to comment.