Skip to content

Commit

Permalink
Optimize CharacterMap further
Browse files Browse the repository at this point in the history
  • Loading branch information
MihaZupan committed Apr 7, 2020
1 parent 4852e08 commit 256b4d3
Show file tree
Hide file tree
Showing 2 changed files with 100 additions and 62 deletions.
160 changes: 99 additions & 61 deletions src/Markdig/Helpers/CharacterMap.cs
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,8 @@ namespace Markdig.Helpers
public class CharacterMap<T> where T : class
{
private readonly T[] asciiMap;
private readonly Dictionary<char, T> nonAsciiMap;
private readonly BitVector128 isOpeningCharacter;
private readonly Dictionary<uint, T> nonAsciiMap;
private readonly BoolVector128 isOpeningCharacter;

/// <summary>
/// Initializes a new instance of the <see cref="CharacterMap{T}"/> class.
Expand All @@ -34,49 +34,31 @@ public CharacterMap(IEnumerable<KeyValuePair<char, T>> maps)
foreach (var map in maps)
{
var openingChar = map.Key;

charSet.Add(openingChar);

if (openingChar < 128 && openingChar > maxChar)
{
maxChar = openingChar;
}
else if (openingChar >= 128 && nonAsciiMap == null)
{
// Initialize only if with have an actual non-ASCII opening character
nonAsciiMap = new Dictionary<char, T>();
}
maxChar = Math.Max(maxChar, openingChar);
}

OpeningCharacters = charSet.ToArray();
Array.Sort(OpeningCharacters);

asciiMap = new T[maxChar + 1];

This comment has been minimized.

Copy link
@KrisVandermotten

KrisVandermotten Apr 7, 2020

Contributor

What about when maxChar > 128? That used to be prevented, but not any more. Should we not ensure that the array cannot grow without bounds?

var isOpeningCharacter = new BitVector128();

if (maxChar >= 128)
nonAsciiMap = new Dictionary<uint, T>();

foreach (var state in maps)
{
var openingChar = state.Key;
T stateByChar;
char openingChar = state.Key;
if (openingChar < 128)
{
stateByChar = asciiMap[openingChar];

if (stateByChar == null)
{
asciiMap[openingChar] = state.Value;
}
asciiMap[openingChar] ??= state.Value;
isOpeningCharacter.Set(openingChar);
}
else
else if (!nonAsciiMap.ContainsKey(openingChar))
{
if (!nonAsciiMap.TryGetValue(openingChar, out stateByChar))
{
nonAsciiMap[openingChar] = state.Value;
}
nonAsciiMap[openingChar] = state.Value;
}
}

this.isOpeningCharacter = isOpeningCharacter;
}

/// <summary>
Expand All @@ -89,24 +71,26 @@ public CharacterMap(IEnumerable<KeyValuePair<char, T>> maps)
/// </summary>
/// <param name="openingChar">The opening character.</param>
/// <returns>A list of parsers valid for the specified opening character or null if no parsers registered.</returns>
public T this[char openingChar]
public T this[uint openingChar]
{
[MethodImpl(MethodImplOptions.AggressiveInlining)]
get
{
T map = null;
if (openingChar < asciiMap.Length)
T[] asciiMap = this.asciiMap;
if (openingChar < (uint)asciiMap.Length)
{
map = asciiMap[openingChar];
return asciiMap[openingChar];
}
else if (nonAsciiMap != null)
else
{
nonAsciiMap.TryGetValue(openingChar, out map);
T map = null;
nonAsciiMap?.TryGetValue(openingChar, out map);
return map;
}
return map;
}
}


/// <summary>
/// Searches for an opening character from a registered parser in the specified string.
/// </summary>
Expand All @@ -116,57 +100,111 @@ public T this[char openingChar]
/// <returns>Index position within the string of the first opening character found in the specified text; if not found, returns -1</returns>
public int IndexOfOpeningCharacter(string text, int start, int end)
{
var openingChars = isOpeningCharacter;

unsafe
if (nonAsciiMap is null)
{
fixed (char* pText = text)
#if NETCOREAPP3_1
ref char textRef = ref Unsafe.AsRef(in text.GetPinnableReference());
for (; start <= end; start++)
{
if (nonAsciiMap == null)
if (IntPtr.Size == 4)
{
for (int i = start; i <= end; i++)
uint c = Unsafe.Add(ref textRef, start);
if (c < 128 && isOpeningCharacter[c])
{
var c = pText[i];
if (c < 128 && openingChars[c])
{
return i;
}
return start;
}
}
else
{
ulong c = Unsafe.Add(ref textRef, start);
if (c < 128 && isOpeningCharacter[c])
{
return start;
}
}
}
#else
unsafe
{
fixed (char* pText = text)
{
for (int i = start; i <= end; i++)
{
var c = pText[i];
if (c < 128 ? openingChars[c] : nonAsciiMap.ContainsKey(c))
char c = pText[i];
if (c < 128 && isOpeningCharacter[c])
{
return i;
}
}
}
}
#endif
return -1;
}
else
{
return IndexOfOpeningCharacterNonAscii(text, start, end);
}
}

private int IndexOfOpeningCharacterNonAscii(string text, int start, int end)
{
#if NETCOREAPP3_1
ref char textRef = ref Unsafe.AsRef(in text.GetPinnableReference());
for (int i = start; i <= end; i++)
{
char c = Unsafe.Add(ref textRef, i);
if (c < 128 ? isOpeningCharacter[c] : nonAsciiMap.ContainsKey(c))
{
return i;
}
}
#else
unsafe
{
fixed (char* pText = text)
{
for (int i = start; i <= end; i++)
{
char c = pText[i];
if (c < 128 ? isOpeningCharacter[c] : nonAsciiMap.ContainsKey(c))
{
return i;
}
}
}
}
#endif
return -1;
}
}

internal unsafe struct BoolVector128
{
private fixed bool values[128];

internal unsafe struct BitVector128
public void Set(char c)
{
fixed uint values[4];
Debug.Assert(c < 128);
values[c] = true;
}

public void Set(char c)
public readonly bool this[uint c]
{
[MethodImpl(MethodImplOptions.AggressiveInlining)]
get
{
Debug.Assert(c < 128);
values[c >> 5] |= (uint)1 << c;
return values[c];
}

public readonly bool this[char c]
}
public readonly bool this[ulong c]
{
[MethodImpl(MethodImplOptions.AggressiveInlining)]
get
{
[MethodImpl(MethodImplOptions.AggressiveInlining)]
get
{
Debug.Assert(c < 128);
return (values[c >> 5] & (uint)1 << c) != 0;
}
Debug.Assert(c < 128 && IntPtr.Size == 8);
return values[c];
}
}
}
Expand Down
2 changes: 1 addition & 1 deletion src/Markdig/Parsers/ParserList.cs
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,7 @@ protected ParserList(IEnumerable<T> parsersArg) : base(parsersArg)
/// <param name="openingChar">The opening character.</param>
/// <returns>A list of parsers valid for the specified opening character or null if no parsers registered.</returns>
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public T[] GetParsersForOpeningCharacter(char openingChar)
public T[] GetParsersForOpeningCharacter(uint openingChar)
{
return charMap[openingChar];
}
Expand Down

0 comments on commit 256b4d3

Please sign in to comment.