Skip to content

Commit

Permalink
Change regex usage to manual parsing
Browse files Browse the repository at this point in the history
* use spans when possible
  • Loading branch information
lahma committed Jun 9, 2023
1 parent 5996a73 commit da33d7d
Show file tree
Hide file tree
Showing 7 changed files with 360 additions and 128 deletions.
172 changes: 172 additions & 0 deletions main/CellReferenceParser.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,172 @@
using System;

namespace NPOI;

internal static class CellReferenceParser
{
// (\$?[A-Za-z]+)?(\$?[0-9]+)?
public static bool TryParseCellReference(ReadOnlySpan<char> input, out char columnPrefix, out ReadOnlySpan<char> column, out char rowPrefix, out ReadOnlySpan<char> row)
{
return TryParse(input, out columnPrefix, out column, out rowPrefix, out row)
&& columnPrefix is '$' or char.MinValue && rowPrefix is '$' or char.MinValue;
}

// Matches references only where row and column are included.
// Matches a run of one or more letters followed by a run of one or more digits.
// If a reference does not match this pattern, it might match COLUMN_REF_PATTERN or ROW_REF_PATTERN
// References may optionally include a single '$' before each group, but these are excluded from the Matcher.group(int).
// ^\$?([A-Z]+)\$?([0-9]+)$
public static bool TryParseStrictCellReference(ReadOnlySpan<char> input, out ReadOnlySpan<char> column, out ReadOnlySpan<char> row)
{
return TryParse(input, out var columnPrefix, out column, out var rowPrefix, out row)
&& columnPrefix is '$' or char.MinValue
&& column.Length > 0
&& rowPrefix is '$' or char.MinValue
&& row.Length > 0;
}


// Matches a run of one or more letters. The run of letters is group 1.
// References may optionally include a single '$' before the group, but these are excluded from the Matcher.group(int).
// ^\$?([A-Za-z]+)$
public static bool TryParseColumnReference(ReadOnlySpan<char> input, out ReadOnlySpan<char> column)
{
return TryParse(input, out var columnPrefix, out column, out var rowPrefix, out var row)
&& columnPrefix is '$' or char.MinValue
&& column.Length > 0
&& rowPrefix is char.MinValue
&& row.Length == 0;
}

// Matches a run of one or more letters. The run of numbers is group 1.
// References may optionally include a single '$' before the group, but these are excluded from the Matcher.group(int).
// ^\$?([0-9]+)$
public static bool TryParseRowReference(ReadOnlySpan<char> input, out ReadOnlySpan<char> row)
{
return TryParse(input, out var columnPrefix, out var cell, out var rowPrefix, out row)
&& columnPrefix is '$' or char.MinValue
&& cell.Length == 0
&& rowPrefix is '$' or char.MinValue
&& row.Length > 0;
}

/// <summary>
/// Generic parsing logic that extracts reference information.
/// </summary>
/// <param name="input">Input to parse.</param>
/// <param name="columnPrefix">Possible column prefix like '$', <see cref="char.MinValue" /> if none.</param>
/// <param name="column">Column name string, empty if none.</param>
/// <param name="rowPrefix">Possible row prefix like '$', <see cref="char.MinValue" /> if none.</param>
/// <param name="row">Row data, empty if none</param>
/// <returns></returns>
private static bool TryParse(
ReadOnlySpan<char> input,
out char columnPrefix,
out ReadOnlySpan<char> column,
out char rowPrefix,
out ReadOnlySpan<char> row)
{
column = default;
columnPrefix = char.MinValue;
row = default;
rowPrefix = char.MinValue;

if (input.Length == 0)
{
return false;
}

// quick check
var firstChar = input[0];
if (input.Length == 2 && char.IsLetter(firstChar) && char.IsDigit(input[1]))
{
column = input.Slice(0, 1);
row = input.Slice(1);
return true;
}

int cellStartIndex = 0;
int cellEndIndex = input.Length - 1;
int rowStartIndex = input.Length;

if (char.IsDigit(firstChar))
{
// no cell
cellStartIndex = int.MaxValue;
rowStartIndex = 0;
}
else if (!char.IsLetter(firstChar))
{
if (input.Length > 1 && char.IsDigit(input[1]))
{
// actually row starts now
rowStartIndex = 0;
cellStartIndex = input.Length;
}
else
{
columnPrefix = firstChar;
cellStartIndex = 1;
}
}

for (int i = cellStartIndex; i < input.Length; ++i)
{
var c = input[i];
cellEndIndex = i + 1;
if (!char.IsLetter(c))
{
// end of cell information
rowStartIndex = i;
cellEndIndex--;
break;
}
}

for (int i = rowStartIndex; i < input.Length; ++i)
{
var c = input[i];

if (!char.IsNumber(c) && i == rowStartIndex)
{
// first is allowed to be a prefix
rowPrefix = c;
rowStartIndex++;
continue;
}

if (!char.IsDigit(input[i]))
{
return false;
}
}

// seems ok
var cellStringLength = cellEndIndex - cellStartIndex;
if (cellStringLength > 0)
{
column = input.Slice(cellStartIndex, cellStringLength);
}

row = input.Slice(rowStartIndex);
return true;
}

public static bool TryParsePositiveInt32Fast(ReadOnlySpan<char> s, out int result)
{
int value = 0;
foreach (var c in s)
{
if (!char.IsDigit(c))
{
result = -1;
return false;
}

value = 10 * value + (c - 48);
}

result = value;
return true;
}
}
26 changes: 9 additions & 17 deletions main/SS/Formula/FormulaParser.cs
Original file line number Diff line number Diff line change
Expand Up @@ -1173,9 +1173,6 @@ private static AreaReference CreateAreaRef(SimpleRangePart part1, SimpleRangePar
}
return new AreaReference(part1.CellReference, part2.CellReference);
}
private string CELL_REF_PATTERN = "(\\$?[A-Za-z]+)?(\\$?[0-9]+)?";




/**
Expand Down Expand Up @@ -1213,11 +1210,9 @@ private SimpleRangePart ParseSimpleRangePart()
{
return null;
}
String rep = _formulaString.Substring(_pointer - 1, ptr - _pointer + 1);
ReadOnlySpan<char> rep = _formulaString.AsSpan(_pointer - 1, ptr - _pointer + 1);

Regex pattern = new Regex(CELL_REF_PATTERN);

if (!pattern.IsMatch(rep))
if (!CellReferenceParser.TryParseCellReference(rep, out _, out var column, out _, out var row))
{
return null;
}
Expand All @@ -1231,19 +1226,14 @@ private SimpleRangePart ParseSimpleRangePart()
}
else if (hasLetters)
{
if (!CellReference.IsColumnWithinRange(rep.Replace("$", ""), _ssVersion))
if (!CellReference.IsColumnWithinRange(column, _ssVersion))
{
return null;
}
}
else if (hasDigits)
{
int i;
try
{
i = Int32.Parse(rep.Replace("$", ""), CultureInfo.InvariantCulture);
}
catch
if (!CellReferenceParser.TryParsePositiveInt32Fast(row, out int i))
{
return null;
}
Expand All @@ -1260,7 +1250,7 @@ private SimpleRangePart ParseSimpleRangePart()


ResetPointer(ptr + 1); // stepping forward
return new SimpleRangePart(rep, hasLetters, hasDigits);
return new SimpleRangePart(rep.ToString(), hasLetters, hasDigits);
}


Expand Down Expand Up @@ -1544,7 +1534,9 @@ private void ResetPointer(int ptr)
/**
* @return <c>true</c> if the specified name is a valid cell reference
*/
private bool IsValidCellReference(String str)
private bool IsValidCellReference(String str) => IsValidCellReference(str.AsSpan());

private bool IsValidCellReference(ReadOnlySpan<char> str)
{
//check range bounds against grid max
bool result = CellReference.ClassifyCellReference(str, _ssVersion) == NameType.Cell;
Expand All @@ -1557,7 +1549,7 @@ private bool IsValidCellReference(String str)
* (b) LOG10 + 1
* In (a) LOG10 is a name of a built-in function. In (b) LOG10 is a cell reference
*/
bool isFunc = FunctionMetadataRegistry.GetFunctionByName(str.ToUpper()) != null;
bool isFunc = FunctionMetadataRegistry.GetFunctionByName(str.ToString().ToUpper()) != null;
if (isFunc)
{
int savePointer = _pointer;
Expand Down
6 changes: 2 additions & 4 deletions main/SS/Formula/Function/FunctionMetadataRegistry.cs
Original file line number Diff line number Diff line change
Expand Up @@ -93,10 +93,8 @@ public static short LookupIndexByName(String name)

private FunctionMetadata GetFunctionByNameInternal(String name)
{
if (_functionDataByName.ContainsKey(name))
return _functionDataByName[name];
else
return null;
_functionDataByName.TryGetValue(name, out var metadata);
return metadata;
}


Expand Down
7 changes: 4 additions & 3 deletions main/SS/Util/CellAddress.cs
Original file line number Diff line number Diff line change
Expand Up @@ -78,11 +78,12 @@ public CellAddress(String address)
}
}

String sCol = address.Substring(0, loc).ToUpper();
String sRow = address.Substring(loc);
ReadOnlySpan<char> sCol = address.AsSpan(0, loc);
ReadOnlySpan<char> sRow = address.AsSpan(loc);

// FIXME: breaks if Address Contains a sheet name or dollar signs from an absolute CellReference
this._row = int.Parse(sRow) - 1;
CellReferenceParser.TryParsePositiveInt32Fast(sRow, out var rowNumber);
this._row = rowNumber - 1;
this._col = CellReference.ConvertColStringToIndex(sCol);
}

Expand Down
4 changes: 2 additions & 2 deletions main/SS/Util/CellRangeAddress.cs
Original file line number Diff line number Diff line change
Expand Up @@ -112,8 +112,8 @@ public static CellRangeAddress ValueOf(String reference)
}
else
{
a = new CellReference(reference.Substring(0, sep));
b = new CellReference(reference.Substring(sep + 1));
a = new CellReference(reference.AsSpan(0, sep));
b = new CellReference(reference.AsSpan(sep + 1));
}
return new CellRangeAddress(a.Row, b.Row, a.Col, b.Col);
}
Expand Down
Loading

0 comments on commit da33d7d

Please sign in to comment.