Skip to content

Commit

Permalink
Cleanup regex diagnostic output (dotnet#1947)
Browse files Browse the repository at this point in the history
* Include actual pattern in diagnostic output

* Fix char category name in diagnostic output

* Tighten Boyer Moore debug output

* Clean up diagnostic output from RegexCode and RegexNode

* Apply suggestions from code review

Co-Authored-By: Stephen Toub <stoub@microsoft.com>

* more

* display options and timeout

* Some random nits

* Update src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexBoyerMoore.cs

Co-Authored-By: Stephen Toub <stoub@microsoft.com>

* Github typo

Co-authored-by: Stephen Toub <stoub@microsoft.com>
  • Loading branch information
danmoseley and stephentoub committed Jan 21, 2020
1 parent 86bfd8d commit 539d9e6
Show file tree
Hide file tree
Showing 6 changed files with 61 additions and 36 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,18 @@ private void Init(string pattern, RegexOptions options, TimeSpan matchTimeout, C
roptions = options;
internalMatchTimeout = matchTimeout;

#if DEBUG
if (Debug)
{
System.Diagnostics.Debug.Write($"Pattern: {pattern}");
RegexOptions displayOptions = options & ~RegexOptions.Debug;
if (displayOptions != RegexOptions.None)
System.Diagnostics.Debug.Write($"Options: {displayOptions}");
if (matchTimeout != Regex.InfiniteMatchTimeout)
System.Diagnostics.Debug.Write($"Timeout: {matchTimeout}");
}
#endif

// Parse the input
RegexTree tree = RegexParser.Parse(pattern, roptions, culture ?? ((options & RegexOptions.CultureInvariant) != 0 ? CultureInfo.InvariantCulture : CultureInfo.CurrentCulture));

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -347,26 +347,28 @@ public string Dump(string indent)
{
StringBuilder sb = new StringBuilder();

sb.Append(indent + "BM Pattern: " + Pattern + "\n");
sb.AppendLine($"{indent}BM Pattern: {Pattern}");
sb.Append(indent + "Positive: ");
for (int i = 0; i < Positive.Length; i++)
{
sb.Append(Positive[i].ToString(CultureInfo.InvariantCulture) + " ");
}
sb.Append("\n");
sb.AppendLine();

if (NegativeASCII != null)
{
sb.Append(indent + "Negative table\n");
sb.Append(indent + "Negative table: ");
for (int i = 0; i < NegativeASCII.Length; i++)
{
if (NegativeASCII[i] != Pattern.Length)
{
sb.Append(indent + " " + Regex.Escape(Convert.ToString((char)i, CultureInfo.InvariantCulture)) + " " + NegativeASCII[i].ToString(CultureInfo.InvariantCulture) + "\n");
sb.Append(" {" + Regex.Escape(Convert.ToString((char)i, CultureInfo.InvariantCulture)) + " " + NegativeASCII[i].ToString(CultureInfo.InvariantCulture) + "}");
}
}
}

sb.AppendLine();

return sb.ToString();
}
#endif
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1522,14 +1522,17 @@ private static ReadOnlySpan<char> SetFromProperty(string capname, bool invert, s

#if DEBUG
public static readonly char[] Hex = new char[] { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f' };
public static readonly string[] Categories = new string[] {"Lu", "Ll", "Lt", "Lm", "Lo", InternalRegexIgnoreCase,
"Mn", "Mc", "Me",
"Nd", "Nl", "No",
"Zs", "Zl", "Zp",
"Cc", "Cf", "Cs", "Co",
"Pc", "Pd", "Ps", "Pe", "Pi", "Pf", "Po",
"Sm", "Sc", "Sk", "So",
"Cn" };
public static readonly string[] CategoryIdToName = PopulateCategoryIdToName();

private static string[] PopulateCategoryIdToName()
{
// Populate category reverse lookup used for diagnostic output

var temp = new List<KeyValuePair<string, string>>(s_definedCategories);
temp.RemoveAll(kvp => kvp.Value.Length != 1);
temp.Sort((kvp1, kvp2) => ((short)kvp1.Value[0]).CompareTo((short)kvp2.Value[0]));
return temp.ConvertAll(kvp => kvp.Key).ToArray();
}

/// <summary>
/// Produces a human-readable description for a set string.
Expand Down Expand Up @@ -1684,10 +1687,10 @@ private static string CategoryDescription(char ch)

if ((short)ch < 0)
{
return "\\P{" + Categories[(-((short)ch) - 1)] + "}";
return "\\P{" + CategoryIdToName[(-((short)ch) - 1)] + "}";
}

return "\\p{" + Categories[(ch - 1)] + "}";
return "\\p{" + CategoryIdToName[(ch - 1)] + "}";
}
#endif

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
// See the LICENSE file in the project root for more information.

// This RegexCode class is internal to the regular expression package.
// It provides operator constants for use by the Builder and the Machine.

// Implementation notes:
//
Expand Down Expand Up @@ -292,7 +291,7 @@ public string OpcodeDescription(int offset)
sb.AppendFormat("{0:D6} ", offset);
sb.Append(OpcodeBacktracks(opcode & Mask) ? '*' : ' ');
sb.Append(OperatorDescription(opcode));
sb.Append('(');
sb.Append(Indent());

opcode &= Mask;

Expand All @@ -308,7 +307,6 @@ public string OpcodeDescription(int offset)
case Notoneloopatomic:
case Onelazy:
case Notonelazy:
sb.Append("Ch = ");
sb.Append(RegexCharClass.CharDescription((char)Codes[offset + 1]));
break;

Expand All @@ -317,34 +315,32 @@ public string OpcodeDescription(int offset)
case Setloop:
case Setloopatomic:
case Setlazy:
sb.Append("Set = ");
sb.Append(RegexCharClass.SetDescription(Strings[Codes[offset + 1]]));
break;

case Multi:
sb.Append("String = ");
sb.Append(Strings[Codes[offset + 1]]);
break;

case Ref:
case Testref:
sb.Append("Index = ");
sb.Append("index = ");
sb.Append(Codes[offset + 1]);
break;

case Capturemark:
sb.Append("Index = ");
sb.Append("index = ");
sb.Append(Codes[offset + 1]);
if (Codes[offset + 2] != -1)
{
sb.Append(", Unindex = ");
sb.Append(", unindex = ");
sb.Append(Codes[offset + 2]);
}
break;

case Nullcount:
case Setcount:
sb.Append("Value = ");
sb.Append("value = ");
sb.Append(Codes[offset + 1]);
break;

Expand All @@ -354,7 +350,7 @@ public string OpcodeDescription(int offset)
case Lazybranchmark:
case Branchcount:
case Lazybranchcount:
sb.Append("Addr = ");
sb.Append("addr = ");
sb.Append(Codes[offset + 1]);
break;
}
Expand All @@ -373,7 +369,7 @@ public string OpcodeDescription(int offset)
case Setloop:
case Setloopatomic:
case Setlazy:
sb.Append(", Rep = ");
sb.Append(", rep = ");
if (Codes[offset + 2] == int.MaxValue)
sb.Append("inf");
else
Expand All @@ -382,15 +378,15 @@ public string OpcodeDescription(int offset)

case Branchcount:
case Lazybranchcount:
sb.Append(", Limit = ");
sb.Append(", limit = ");
if (Codes[offset + 2] == int.MaxValue)
sb.Append("inf");
else
sb.Append(Codes[offset + 2]);
break;
}

sb.Append(')');
string Indent() => new string(' ', Math.Max(1, 25 - sb.Length));

return sb.ToString();
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1354,6 +1354,8 @@ public string Description()
if ((Options & RegexOptions.IgnorePatternWhitespace) != 0) argSb.Append("-X");
if ((Options & RegexOptions.ECMAScript) != 0) argSb.Append("-E");

argSb.Append(Indent());

switch (Type)
{
case Oneloop:
Expand All @@ -1364,23 +1366,25 @@ public string Description()
case Notonelazy:
case One:
case Notone:
argSb.Append("(Ch = " + RegexCharClass.CharDescription(Ch) + ")");
argSb.Append(RegexCharClass.CharDescription(Ch));
break;
case Capture:
argSb.Append("(index = " + M.ToString(CultureInfo.InvariantCulture) + ", unindex = " + N.ToString(CultureInfo.InvariantCulture) + ")");
argSb.Append("index = " + M);
if (N != -1)
argSb.Append(", unindex = " + N);
break;
case Ref:
case Testref:
argSb.Append("(index = " + M.ToString(CultureInfo.InvariantCulture) + ")");
argSb.Append("index = " + M);
break;
case Multi:
argSb.Append("(String = " + Str + ")");
argSb.Append(Str);
break;
case Set:
case Setloop:
case Setloopatomic:
case Setlazy:
argSb.Append("(Set = " + RegexCharClass.SetDescription(Str!) + ")");
argSb.Append(RegexCharClass.SetDescription(Str!));
break;
}

Expand All @@ -1397,10 +1401,18 @@ public string Description()
case Setlazy:
case Loop:
case Lazyloop:
argSb.Append("(Min = " + M.ToString(CultureInfo.InvariantCulture) + ", Max = " + (N == int.MaxValue ? "inf" : Convert.ToString(N, CultureInfo.InvariantCulture)) + ")");
if (argSb[^1] != ' ')
argSb.Append(", ");
argSb.Append("min = " + M + ", max = ");
if (N == int.MaxValue)
argSb.Append("inf");
else
argSb.Append(N);
break;
}

string Indent() => new string(' ', Math.Max(1, 25 - argSb.Length));

return argSb.ToString();
}

Expand All @@ -1421,7 +1433,7 @@ public void Dump()
curNode = curNode.Child(curChild);
curChild = 0;

Debug.WriteLine(new string(' ', stack.Count) + curNode.Description());
Debug.WriteLine(new string(' ', stack.Count * 2) + curNode.Description());
}
else
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1874,7 +1874,7 @@ private void CountCaptures()
else
{
// Simple (unnamed) capture group.
// Add unnamend parentheses if ExplicitCapture is not set
// Add unnamed parentheses if ExplicitCapture is not set
// and the next parentheses is not ignored.
if (!UseOptionN() && !_ignoreNextParen)
{
Expand Down Expand Up @@ -2313,7 +2313,7 @@ private RegexParseException MakeException(RegexParseError error, string message)
private char CharAt(int i) => _pattern[i];

/// <summary>Returns the char right of the current parsing position.</summary>
internal char RightChar() => _pattern[_currentPos];
private char RightChar() => _pattern[_currentPos];

/// <summary>Returns the char i chars right of the current parsing position.</summary>
private char RightChar(int i) => _pattern[_currentPos + i];
Expand Down

0 comments on commit 539d9e6

Please sign in to comment.