diff --git a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Match.cs b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Match.cs index 9c68d11ba6d157..a29209beced54b 100644 --- a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Match.cs +++ b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Match.cs @@ -81,9 +81,10 @@ internal void Reset(Regex regex, string text, int textbeg, int textend, int text _textend = textend; _textstart = textstart; - for (int i = 0; i < _matchcount.Length; i++) + int[] matchcount = _matchcount; + for (int i = 0; i < matchcount.Length; i++) { - _matchcount[i] = 0; + matchcount[i] = 0; } _balancing = false; @@ -170,21 +171,23 @@ public static Match Synchronized(Match inner) internal void AddMatch(int cap, int start, int len) { _matches[cap] ??= new int[2]; + int[][] matches = _matches; - int capcount = _matchcount[cap]; + int[] matchcount = _matchcount; + int capcount = matchcount[cap]; - if (capcount * 2 + 2 > _matches[cap].Length) + if (capcount * 2 + 2 > matches[cap].Length) { - int[] oldmatches = _matches[cap]; + int[] oldmatches = matches[cap]; int[] newmatches = new int[capcount * 8]; for (int j = 0; j < capcount * 2; j++) newmatches[j] = oldmatches[j]; - _matches[cap] = newmatches; + matches[cap] = newmatches; } - _matches[cap][capcount * 2] = start; - _matches[cap][capcount * 2 + 1] = len; - _matchcount[cap] = capcount + 1; + matches[cap][capcount * 2] = start; + matches[cap][capcount * 2 + 1] = len; + matchcount[cap] = capcount + 1; } /* @@ -204,15 +207,16 @@ internal void BalanceMatch(int cap) // first see if it is negative, and therefore is a reference to the next available // capture group for balancing. If it is, we'll reset target to point to that capture. - if (_matches[cap][target] < 0) - target = -3 - _matches[cap][target]; + int[][] matches = _matches; + if (matches[cap][target] < 0) + target = -3 - matches[cap][target]; // move back to the previous capture target -= 2; // if the previous capture is a reference, just copy that reference to the end. Otherwise, point to it. - if (target >= 0 && _matches[cap][target] < 0) - AddMatch(cap, _matches[cap][target], _matches[cap][target + 1]); + if (target >= 0 && matches[cap][target] < 0) + AddMatch(cap, matches[cap][target], matches[cap][target + 1]); else AddMatch(cap, -3 - target, -4 - target /* == -3 - (target + 1) */ ); } @@ -230,7 +234,8 @@ internal void RemoveMatch(int cap) /// internal bool IsMatched(int cap) { - return cap < _matchcount.Length && _matchcount[cap] > 0 && _matches[cap][_matchcount[cap] * 2 - 1] != (-3 + 1); + int[] matchcount = _matchcount; + return (uint)cap < (uint)matchcount.Length && matchcount[cap] > 0 && _matches[cap][matchcount[cap] * 2 - 1] != (-3 + 1); } /// @@ -238,11 +243,13 @@ internal bool IsMatched(int cap) /// internal int MatchIndex(int cap) { - int i = _matches[cap][_matchcount[cap] * 2 - 2]; + int[][] matches = _matches; + + int i = matches[cap][_matchcount[cap] * 2 - 2]; if (i >= 0) return i; - return _matches[cap][-3 - i]; + return matches[cap][-3 - i]; } /// @@ -250,11 +257,13 @@ internal int MatchIndex(int cap) /// internal int MatchLength(int cap) { - int i = _matches[cap][_matchcount[cap] * 2 - 1]; + int[][] matches = _matches; + + int i = matches[cap][_matchcount[cap] * 2 - 1]; if (i >= 0) return i; - return _matches[cap][-3 - i]; + return matches[cap][-3 - i]; } /// @@ -262,11 +271,15 @@ internal int MatchLength(int cap) /// internal void Tidy(int textpos) { - int[] interval = _matches[0]; + int[][] matches = _matches; + + int[] interval = matches[0]; Index = interval[0]; Length = interval[1]; _textpos = textpos; - _capcount = _matchcount[0]; + + int[] matchcount = _matchcount; + _capcount = matchcount[0]; if (_balancing) { @@ -276,13 +289,13 @@ internal void Tidy(int textpos) // until we find a balance captures. Then we check each subsequent entry. If it's a balance // capture (it's negative), we decrement j. If it's a real capture, we increment j and copy // it down to the last free position. - for (int cap = 0; cap < _matchcount.Length; cap++) + for (int cap = 0; cap < matchcount.Length; cap++) { int limit; int[] matcharray; - limit = _matchcount[cap] * 2; - matcharray = _matches[cap]; + limit = matchcount[cap] * 2; + matcharray = matches[cap]; int i = 0; int j; @@ -310,7 +323,7 @@ internal void Tidy(int textpos) } } - _matchcount[cap] = j / 2; + matchcount[cap] = j / 2; } _balancing = false; diff --git a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexBoyerMoore.cs b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexBoyerMoore.cs index cc427df359c3e5..c142f5d5a62740 100644 --- a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexBoyerMoore.cs +++ b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexBoyerMoore.cs @@ -205,23 +205,6 @@ public RegexBoyerMoore(string pattern, bool caseInsensitive, bool rightToLeft, C } } - private bool MatchPattern(string text, int index) - { - if (CaseInsensitive) - { - if (text.Length - index < Pattern.Length) - { - return false; - } - - return (0 == string.Compare(Pattern, 0, text, index, Pattern.Length, CaseInsensitive, _culture)); - } - else - { - return (0 == string.CompareOrdinal(Pattern, 0, text, index, Pattern.Length)); - } - } - /// /// When a regex is anchored, we can do a quick IsMatch test instead of a Scan /// @@ -231,16 +214,21 @@ public bool IsMatch(string text, int index, int beglimit, int endlimit) { if (index < beglimit || endlimit - index < Pattern.Length) return false; - - return MatchPattern(text, index); } else { if (index > endlimit || index - beglimit < Pattern.Length) return false; - return MatchPattern(text, index - Pattern.Length); + index -= Pattern.Length; } + + if (CaseInsensitive) + { + return string.Compare(Pattern, 0, text, index, Pattern.Length, ignoreCase: true, _culture) == 0; + } + + return Pattern.AsSpan().SequenceEqual(text.AsSpan(index, Pattern.Length)); } /// diff --git a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexCharClass.cs b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexCharClass.cs index 26e44966008311..2c66ad68366f83 100644 --- a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexCharClass.cs +++ b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexCharClass.cs @@ -5,6 +5,7 @@ using System.Collections.Generic; using System.Diagnostics; using System.Globalization; +using System.Threading; namespace System.Text.RegularExpressions { @@ -732,7 +733,7 @@ public static string ConvertOldStringsToClass(string set, string category) /// public static char SingletonChar(string set) { - Debug.Assert(IsSingleton(set) || IsSingletonInverse(set), "Tried to get the singleton char out of a non singleton character class"); + Debug.Assert(IsSingletonInverse(set), "Tried to get the singleton char out of a non singleton character class"); return set[SetStartIndex]; } @@ -747,14 +748,6 @@ public static bool IsEmpty(string charClass) => !IsNegated(charClass) && !IsSubtraction(charClass); - /// true if the set contains a single character only - public static bool IsSingleton(string set) => - set[CategoryLengthIndex] == 0 && - set[SetLengthIndex] == 2 && - !IsNegated(set) && - !IsSubtraction(set) && - (set[SetStartIndex] == LastChar || set[SetStartIndex] + 1 == set[SetStartIndex + 1]); - public static bool IsSingletonInverse(string set) => set[CategoryLengthIndex] == 0 && set[SetLengthIndex] == 2 && @@ -823,6 +816,68 @@ public static bool IsWordChar(char ch) } } + public static bool CharInClass(char ch, string set, ref int[]? asciiResultCache) + { + // The int[] contains 8 ints, or 256 bits. These are laid out as pairs, where the first bit ("known") in the pair + // says whether the second bit ("value") in the pair has already been computed. Once a value is computed, it's never + // changed, so since Int32s are written/read atomically, we can trust the value bit if we see that the known bit + // has been set. If the known bit hasn't been set, then we proceed to look it up, and then swap in the result. + const int CacheArrayLength = 8; + Debug.Assert(asciiResultCache is null || asciiResultCache.Length == CacheArrayLength, "set lookup should be able to store two bits for each of the first 128 characters"); + + if (ch < 128) + { + // Lazily-initialize the cache for this set. + if (asciiResultCache is null) + { + Interlocked.CompareExchange(ref asciiResultCache, new int[CacheArrayLength], null); + } + + // Determine which int in the lookup array contains the known and value bits for this character, + // and compute their bit numbers. + ref int slot = ref asciiResultCache[ch >> 4]; + int knownBit = 1 << ((ch & 0xF) << 1); + int valueBit = knownBit << 1; + + // If the value for this bit has already been computed, use it. + int current = slot; + if ((current & knownBit) != 0) + { + return (current & valueBit) != 0; + } + + // (After warm-up, we should find ourselves rarely getting here.) + + // Otherwise, compute it normally. + bool isInClass = CharInClass(ch, set); + + // Determine which bits to write back to the array. + int bitsToSet = knownBit; + if (isInClass) + { + bitsToSet |= valueBit; + } + + // "or" the bits back in a thread-safe manner. + while (true) + { + int oldValue = Interlocked.CompareExchange(ref slot, current | bitsToSet, current); + if (oldValue == current) + { + break; + } + + current = oldValue; + } + + // Return the computed value. + return isInClass; + } + + // Non-ASCII. Fall back to computing the answer. + return CharInClassRecursive(ch, set, 0); + } + public static bool CharInClass(char ch, string set) => CharInClassRecursive(ch, set, 0); diff --git a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexCode.cs b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexCode.cs index 818a058939312d..33ef2f07728151 100644 --- a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexCode.cs +++ b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexCode.cs @@ -18,7 +18,6 @@ using System.Collections; using System.Collections.Generic; using System.Diagnostics; -using System.Globalization; namespace System.Text.RegularExpressions { @@ -91,10 +90,12 @@ internal sealed class RegexCode public readonly int[] Codes; // the code public readonly string[] Strings; // the string/set table + public readonly int[]?[] StringsAsciiLookup; // the ASCII lookup table optimization for the sets in Strings public readonly int TrackCount; // how many instructions use backtracking public readonly Hashtable? Caps; // mapping of user group numbers -> impl group slots public readonly int CapSize; // number of impl group slots public readonly RegexPrefix? FCPrefix; // the set of candidate first characters (may be null) + public int[]? FCPrefixAsciiLookup; // the ASCII lookup table optimization for the set of candidate first characters if there are any public readonly RegexBoyerMoore? BMPrefix; // the fixed prefix string as a Boyer-Moore machine (may be null) public readonly int Anchors; // the set of zero-length start anchors (RegexFCD.Bol, etc) public readonly bool RightToLeft; // true if right to left @@ -109,6 +110,7 @@ public RegexCode(int[] codes, List stringlist, int trackcount, Codes = codes; Strings = stringlist.ToArray(); + StringsAsciiLookup = new int[Strings.Length][]; TrackCount = trackcount; Caps = caps; CapSize = capsize; diff --git a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexCompiler.cs b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexCompiler.cs index 4ece103bf74808..1f7fe5d4837e69 100644 --- a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexCompiler.cs +++ b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexCompiler.cs @@ -1163,29 +1163,14 @@ protected void GenerateFindFirstChar() CallToLower(); } - if (!RegexCharClass.IsSingleton(_fcPrefix.GetValueOrDefault().Prefix)) - { - EmitCallCharInClass(_fcPrefix.GetValueOrDefault().Prefix, charInClassV); - BrtrueFar(l2); - } - else - { - Ldc(RegexCharClass.SingletonChar(_fcPrefix.GetValueOrDefault().Prefix)); - Beq(l2); - } + EmitCallCharInClass(_fcPrefix.GetValueOrDefault().Prefix, charInClassV); + BrtrueFar(l2); MarkLabel(l5); Ldloc(cV); Ldc(0); - if (!RegexCharClass.IsSingleton(_fcPrefix.GetValueOrDefault().Prefix)) - { - BgtFar(l1); - } - else - { - Bgt(l1); - } + BgtFar(l1); Ldc(0); BrFar(l3); diff --git a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexInterpreter.cs b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexInterpreter.cs index 067fb04ed470cf..4cc0457de842f6 100644 --- a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexInterpreter.cs +++ b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexInterpreter.cs @@ -7,6 +7,7 @@ using System.Diagnostics; using System.Globalization; +using System.Runtime.CompilerServices; namespace System.Text.RegularExpressions { @@ -83,36 +84,61 @@ private void TrackPush() private void TrackPush(int I1) { - runtrack![--runtrackpos] = I1; - runtrack[--runtrackpos] = _codepos; + int[] localruntrack = runtrack!; + int localruntrackpos = runtrackpos; + + localruntrack[--localruntrackpos] = I1; + localruntrack[--localruntrackpos] = _codepos; + + runtrackpos = localruntrackpos; } private void TrackPush(int I1, int I2) { - runtrack![--runtrackpos] = I1; - runtrack[--runtrackpos] = I2; - runtrack[--runtrackpos] = _codepos; + int[] localruntrack = runtrack!; + int localruntrackpos = runtrackpos; + + localruntrack[--localruntrackpos] = I1; + localruntrack[--localruntrackpos] = I2; + localruntrack[--localruntrackpos] = _codepos; + + runtrackpos = localruntrackpos; } private void TrackPush(int I1, int I2, int I3) { - runtrack![--runtrackpos] = I1; - runtrack[--runtrackpos] = I2; - runtrack[--runtrackpos] = I3; - runtrack[--runtrackpos] = _codepos; + int[] localruntrack = runtrack!; + int localruntrackpos = runtrackpos; + + localruntrack[--localruntrackpos] = I1; + localruntrack[--localruntrackpos] = I2; + localruntrack[--localruntrackpos] = I3; + localruntrack[--localruntrackpos] = _codepos; + + runtrackpos = localruntrackpos; } private void TrackPush2(int I1) { - runtrack![--runtrackpos] = I1; - runtrack[--runtrackpos] = -_codepos; + int[] localruntrack = runtrack!; + int localruntrackpos = runtrackpos; + + localruntrack[--localruntrackpos] = I1; + localruntrack[--localruntrackpos] = -_codepos; + + runtrackpos = localruntrackpos; } private void TrackPush2(int I1, int I2) { - runtrack![--runtrackpos] = I1; - runtrack[--runtrackpos] = I2; - runtrack[--runtrackpos] = -_codepos; + int[] localruntrack = runtrack!; + int localruntrackpos = runtrackpos; + + localruntrack[--localruntrackpos] = I1; + localruntrack[--localruntrackpos] = I2; + localruntrack[--localruntrackpos] = -_codepos; + + runtrackpos = localruntrackpos; } private void Backtrack() @@ -145,6 +171,7 @@ private void Backtrack() _codepos = newpos; } + [MethodImpl(MethodImplOptions.AggressiveInlining)] private void SetOperator(int op) { _caseInsensitive = (0 != (op & RegexCode.Ci)); @@ -186,8 +213,13 @@ private void StackPush(int I1) private void StackPush(int I1, int I2) { - runstack![--runstackpos] = I1; - runstack[--runstackpos] = I2; + int[] localrunstack = runstack!; + int localrunstackpos = runstackpos; + + localrunstack[--localrunstackpos] = I1; + localrunstack[--localrunstackpos] = I2; + + runstackpos = localrunstackpos; } private void StackPop() @@ -241,16 +273,13 @@ private int Bump() return _rightToLeft ? -1 : 1; } - private int Forwardchars() - { - return _rightToLeft ? runtextpos - runtextbeg : runtextend - runtextpos; - } + private int Forwardchars() => _rightToLeft ? runtextpos - runtextbeg : runtextend - runtextpos; private char Forwardcharnext() { - char ch = (_rightToLeft ? runtext![--runtextpos] : runtext![runtextpos++]); + char ch = _rightToLeft ? runtext![--runtextpos] : runtext![runtextpos++]; - return (_caseInsensitive ? _culture.TextInfo.ToLower(ch) : ch); + return _caseInsensitive ? _culture.TextInfo.ToLower(ch) : ch; } private bool Stringmatch(string str) @@ -281,8 +310,9 @@ private bool Stringmatch(string str) } else { + TextInfo ti = _culture.TextInfo; while (c != 0) - if (str[--c] != _culture.TextInfo.ToLower(runtext![--pos])) + if (str[--c] != ti.ToLower(runtext![--pos])) return false; } @@ -328,8 +358,9 @@ private bool Refmatch(int index, int len) } else { + TextInfo ti = _culture.TextInfo; while (c-- != 0) - if (_culture.TextInfo.ToLower(runtext![--cmpos]) != _culture.TextInfo.ToLower(runtext[--pos])) + if (ti.ToLower(runtext![--cmpos]) != ti.ToLower(runtext[--pos])) return false; } @@ -418,27 +449,67 @@ protected override bool FindFirstChar() _caseInsensitive = _code.FCPrefix.GetValueOrDefault().CaseInsensitive; string set = _code.FCPrefix.GetValueOrDefault().Prefix; - if (RegexCharClass.IsSingleton(set)) - { - char ch = RegexCharClass.SingletonChar(set); + // We now loop through looking for the first matching character. This is a hot loop, so we lift out as many + // branches as we can. Each operation requires knowing whether this is a) right-to-left vs left-to-right, and + // b) case-sensitive vs case-insensitive. So, we split it all out into 4 loops, for each combination of these. + // It's duplicated code, but it allows the inner loop to be much tighter than if everything were combined with + // multiple branches on each operation. We can also then use spans to avoid bounds checks in at least the forward + // iteration direction where the JIT is able to detect the pattern. - for (int i = Forwardchars(); i > 0; i--) + if (!_rightToLeft) + { + ReadOnlySpan span = runtext.AsSpan(runtextpos, runtextend - runtextpos); + if (!_caseInsensitive) + { + // left-to-right, case-sensitive + for (int i = 0; i < span.Length; i++) + { + if (RegexCharClass.CharInClass(span[i], set, ref _code.FCPrefixAsciiLookup)) + { + runtextpos += i; + return true; + } + } + } + else { - if (ch == Forwardcharnext()) + // left-to-right, case-insensitive + TextInfo ti = _culture.TextInfo; + for (int i = 0; i < span.Length; i++) { - Backwardnext(); - return true; + if (RegexCharClass.CharInClass(ti.ToLower(span[i]), set, ref _code.FCPrefixAsciiLookup)) + { + runtextpos += i; + return true; + } } } } else { - for (int i = Forwardchars(); i > 0; i--) + if (!_caseInsensitive) + { + // right-to-left, case-sensitive + for (int i = runtextpos - 1; i >= runtextbeg; i--) + { + if (RegexCharClass.CharInClass(runtext![i], set, ref _code.FCPrefixAsciiLookup)) + { + runtextpos = i + 1; + return true; + } + } + } + else { - if (RegexCharClass.CharInClass(Forwardcharnext(), set)) + // right-to-left, case-insensitive + TextInfo ti = _culture.TextInfo; + for (int i = runtextpos - 1; i >= runtextbeg; i--) { - Backwardnext(); - return true; + if (RegexCharClass.CharInClass(ti.ToLower(runtext![i]), set, ref _code.FCPrefixAsciiLookup)) + { + runtextpos = i + 1; + return true; + } } } } @@ -887,9 +958,15 @@ protected override void Go() continue; case RegexCode.Set: - if (Forwardchars() < 1 || !RegexCharClass.CharInClass(Forwardcharnext(), _code.Strings[Operand(0)])) + if (Forwardchars() < 1) break; + { + int operand = Operand(0); + if (!RegexCharClass.CharInClass(Forwardcharnext(), _code.Strings[operand], ref _code.StringsAsciiLookup[operand])) + break; + } + advance = 1; continue; @@ -962,7 +1039,9 @@ protected override void Go() if (Forwardchars() < c) break; - string set = _code.Strings[Operand(0)]; + int operand0 = Operand(0); + string set = _code.Strings[operand0]; + ref int[]? setLookup = ref _code.StringsAsciiLookup[operand0]; while (c-- > 0) { @@ -974,7 +1053,7 @@ protected override void Go() CheckTimeout(); } - if (!RegexCharClass.CharInClass(Forwardcharnext(), set)) + if (!RegexCharClass.CharInClass(Forwardcharnext(), set, ref setLookup)) goto BreakBackward; } @@ -986,8 +1065,9 @@ protected override void Go() { int c = Operand(1); - if (c > Forwardchars()) - c = Forwardchars(); + int fc = Forwardchars(); + if (c > fc) + c = fc; char ch = (char)Operand(0); int i; @@ -1012,8 +1092,9 @@ protected override void Go() { int c = Operand(1); - if (c > Forwardchars()) - c = Forwardchars(); + int fc = Forwardchars(); + if (c > fc) + c = fc; char ch = (char)Operand(0); int i; @@ -1038,10 +1119,13 @@ protected override void Go() { int c = Operand(1); - if (c > Forwardchars()) - c = Forwardchars(); + int fc = Forwardchars(); + if (c > fc) + c = fc; - string set = _code.Strings[Operand(0)]; + int operand0 = Operand(0); + string set = _code.Strings[operand0]; + ref int[]? setLookup = ref _code.StringsAsciiLookup[operand0]; int i; for (i = c; i > 0; i--) @@ -1054,7 +1138,7 @@ protected override void Go() CheckTimeout(); } - if (!RegexCharClass.CharInClass(Forwardcharnext(), set)) + if (!RegexCharClass.CharInClass(Forwardcharnext(), set, ref setLookup)) { Backwardnext(); break; @@ -1104,8 +1188,9 @@ protected override void Go() { int c = Operand(1); - if (c > Forwardchars()) - c = Forwardchars(); + int fc = Forwardchars(); + if (c > fc) + c = fc; if (c > 0) TrackPush(c - 1, Textpos()); @@ -1118,8 +1203,9 @@ protected override void Go() { int c = Operand(1); - if (c > Forwardchars()) - c = Forwardchars(); + int fc = Forwardchars(); + if (c > fc) + c = fc; if (c > 0) TrackPush(c - 1, Textpos()); @@ -1170,7 +1256,8 @@ protected override void Go() int pos = TrackPeek(1); Textto(pos); - if (!RegexCharClass.CharInClass(Forwardcharnext(), _code.Strings[Operand(0)])) + int operand0 = Operand(0); + if (!RegexCharClass.CharInClass(Forwardcharnext(), _code.Strings[operand0], ref _code.StringsAsciiLookup[operand0])) break; int i = TrackPeek(); diff --git a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexNode.cs b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexNode.cs index 13cff7efde8d2f..e9e48724d05f37 100644 --- a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexNode.cs +++ b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexNode.cs @@ -279,8 +279,7 @@ private RegexNode ReduceRep() } /// - /// Simple optimization. If a set is a singleton, an inverse singleton, - /// or empty, it's transformed accordingly. + /// Simple optimization. If a set is an inverse singleton or empty, it's transformed accordingly. /// private RegexNode ReduceSet() { @@ -293,12 +292,6 @@ private RegexNode ReduceSet() NType = Nothing; Str = null; } - else if (RegexCharClass.IsSingleton(Str)) - { - Ch = RegexCharClass.SingletonChar(Str); - Str = null; - NType += (One - Set); - } else if (RegexCharClass.IsSingletonInverse(Str)) { Ch = RegexCharClass.SingletonChar(Str); diff --git a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexRunner.cs b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexRunner.cs index af52dccb664e1a..db3d8b4c82b436 100644 --- a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexRunner.cs +++ b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexRunner.cs @@ -342,9 +342,12 @@ private void InitMatch() /// protected void EnsureStorage() { - if (runstackpos < runtrackcount * 4) + int limit = runtrackcount * 4; + + if (runstackpos < limit) DoubleStack(); - if (runtrackpos < runtrackcount * 4) + + if (runtrackpos < limit) DoubleTrack(); } @@ -382,9 +385,7 @@ protected static bool CharInClass(char ch, string charClass) /// protected void DoubleTrack() { - int[] newtrack; - - newtrack = new int[runtrack!.Length * 2]; + int[] newtrack = new int[runtrack!.Length * 2]; Array.Copy(runtrack, 0, newtrack, runtrack.Length, runtrack.Length); runtrackpos += runtrack.Length; @@ -397,9 +398,7 @@ protected void DoubleTrack() /// protected void DoubleStack() { - int[] newstack; - - newstack = new int[runstack!.Length * 2]; + int[] newstack = new int[runstack!.Length * 2]; Array.Copy(runstack, 0, newstack, runstack.Length, runstack.Length); runstackpos += runstack.Length; @@ -411,9 +410,7 @@ protected void DoubleStack() /// protected void DoubleCrawl() { - int[] newcrawl; - - newcrawl = new int[runcrawl!.Length * 2]; + int[] newcrawl = new int[runcrawl!.Length * 2]; Array.Copy(runcrawl, 0, newcrawl, runcrawl.Length, runcrawl.Length); runcrawlpos += runcrawl.Length; @@ -456,11 +453,9 @@ protected void Capture(int capnum, int start, int end) { if (end < start) { - int T; - - T = end; + int t = end; end = start; - start = T; + start = t; } Crawl(capnum); @@ -474,22 +469,17 @@ protected void Capture(int capnum, int start, int end) /// protected void TransferCapture(int capnum, int uncapnum, int start, int end) { - int start2; - int end2; - - // these are the two intervals that are cancelling each other + // these are the two intervals that are canceling each other if (end < start) { - int T; - - T = end; + int t = end; end = start; - start = T; + start = t; } - start2 = MatchIndex(uncapnum); - end2 = start2 + MatchLength(uncapnum); + int start2 = MatchIndex(uncapnum); + int end2 = start2 + MatchLength(uncapnum); // The new capture gets the innermost defined interval