Skip to content

Commit

Permalink
Change regex usage to manual parsing
Browse files Browse the repository at this point in the history
* use spans when possible
  • Loading branch information
lahma committed Jun 8, 2023
1 parent f6fa08e commit 01009b5
Show file tree
Hide file tree
Showing 8 changed files with 338 additions and 114 deletions.
163 changes: 163 additions & 0 deletions main/CellReferenceParser.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,163 @@
using System;

namespace NPOI;

internal static class CellReferenceParser
{
// (\$?[A-Za-z]+)?(\$?[0-9]+)?
public static bool TryParseCellReference(ReadOnlySpan<char> input, out ReadOnlySpan<char> column, out ReadOnlySpan<char> row)
{
return TryParse(input, out var cellPrefix, out column, out var rowPrefix, out row)
&& cellPrefix is '$' or char.MinValue && rowPrefix is '$' or char.MinValue;
}

// Matches references only where row and column are included.
// Matches a run of one or more letters followed by a run of one or more digits.
// If a reference does not match this pattern, it might match COLUMN_REF_PATTERN or ROW_REF_PATTERN
// References may optionally include a single '$' before each group, but these are excluded from the Matcher.group(int).
// ^\$?([A-Z]+)\$?([0-9]+)$
public static bool TryParseStrictCellReference(ReadOnlySpan<char> input, out ReadOnlySpan<char> column, out ReadOnlySpan<char> row)
{
return TryParse(input, out var cellPrefix, out column, out var rowPrefix, out row)
&& cellPrefix is '$' or char.MinValue
&& column.Length > 0
&& rowPrefix is '$' or char.MinValue
&& row.Length > 0;
}


// Matches a run of one or more letters. The run of letters is group 1.
// References may optionally include a single '$' before the group, but these are excluded from the Matcher.group(int).
// ^\$?([A-Za-z]+)$
public static bool TryParseColumnReference(ReadOnlySpan<char> input, out ReadOnlySpan<char> column)
{
return TryParse(input, out var cellPrefix, out column, out var rowPrefix, out var row)
&& cellPrefix is '$' or char.MinValue
&& column.Length > 0
&& rowPrefix is char.MinValue
&& row.Length == 0;
}

// Matches a run of one or more letters. The run of numbers is group 1.
// References may optionally include a single '$' before the group, but these are excluded from the Matcher.group(int).
// ^\$?([0-9]+)$
public static bool TryParseRowReference(ReadOnlySpan<char> input, out ReadOnlySpan<char> row)
{
return TryParse(input, out var cellPrefix, out var cell, out var rowPrefix, out row)
&& cellPrefix is '$' or char.MinValue
&& cell.Length == 0
&& rowPrefix is '$' or char.MinValue
&& row.Length > 0;
}

/// <summary>
/// Generic parsing logic that extracts reference information.
/// </summary>
/// <param name="input">Input to parse.</param>
/// <param name="columnPrefix">Possible column prefix like '$', <see cref="char.MinValue" /> if none.</param>
/// <param name="column">Column name string, empty if none.</param>
/// <param name="rowPrefix">Possible row prefix like '$', <see cref="char.MinValue" /> if none.</param>
/// <param name="row">Row data, empty if none</param>
/// <returns></returns>
private static bool TryParse(
ReadOnlySpan<char> input,
out char columnPrefix,
out ReadOnlySpan<char> column,
out char rowPrefix,
out ReadOnlySpan<char> row)
{
column = default;
columnPrefix = char.MinValue;
row = default;
rowPrefix = char.MinValue;

if (input.Length == 0)
{
return false;
}

// quick check
var firstChar = input[0];
if (input.Length == 2 && char.IsLetter(firstChar) && char.IsDigit(input[1]))
{
column = input.Slice(0, 1);
row = input.Slice(1);
return true;
}

int cellStartIndex = 0;
int cellEndIndex = input.Length - 1;
int rowStartIndex = input.Length;

if (char.IsDigit(firstChar))
{
// no cell
cellStartIndex = int.MaxValue;
rowStartIndex = 0;
}
else if (!char.IsLetter(firstChar))
{
columnPrefix = firstChar;
cellStartIndex = 1;
}

for (int i = cellStartIndex; i < input.Length; ++i)
{
var c = input[i];
cellEndIndex = i + 1;
if (!char.IsLetter(c))
{
// end of cell information
rowStartIndex = i;
cellEndIndex--;
break;
}
}

for (int i = rowStartIndex; i < input.Length; ++i)
{
var c = input[i];

if (!char.IsNumber(c) && i == rowStartIndex)
{
// first is allowed to be a prefix
rowPrefix = c;
rowStartIndex++;
continue;
}

if (!char.IsDigit(input[i]))
{
return false;
}
}

// seems ok
var cellStringLength = cellEndIndex - cellStartIndex;
if (cellStringLength > 0)
{
column = input.Slice(cellStartIndex, cellStringLength);
}

row = input.Slice(rowStartIndex);
return true;
}

public static bool TryParseInt32Fast(ReadOnlySpan<char> s, out int result)
{
int value = 0;
foreach (var c in s)
{
if (!char.IsDigit(c))
{
result = -1;
return false;
}

value = 10 * value + (c - 48);
}

result = value;
return true;
}
}
50 changes: 20 additions & 30 deletions main/SS/Formula/FormulaParser.cs
Original file line number Diff line number Diff line change
Expand Up @@ -163,8 +163,8 @@ public static Area3DPxg ParseStructuredReference(String tableText, IFormulaParsi
/** Read New Character From Input Stream */
private void GetChar()
{
// The intersection operator is a space. We track whether the run of
// whitespace preceeding "look" counts as an intersection operator.
// The intersection operator is a space. We track whether the run of
// whitespace preceeding "look" counts as an intersection operator.
if (IsWhite(look))
{
if (look == ' ')
Expand Down Expand Up @@ -1173,9 +1173,6 @@ private static AreaReference CreateAreaRef(SimpleRangePart part1, SimpleRangePar
}
return new AreaReference(part1.CellReference, part2.CellReference);
}
private string CELL_REF_PATTERN = "(\\$?[A-Za-z]+)?(\\$?[0-9]+)?";




/**
Expand Down Expand Up @@ -1213,11 +1210,9 @@ private SimpleRangePart ParseSimpleRangePart()
{
return null;
}
String rep = _formulaString.Substring(_pointer - 1, ptr - _pointer + 1);
ReadOnlySpan<char> rep = _formulaString.AsSpan(_pointer - 1, ptr - _pointer + 1);

Regex pattern = new Regex(CELL_REF_PATTERN);

if (!pattern.IsMatch(rep))
if (!CellReferenceParser.TryParseCellReference(rep, out var column, out var row))
{
return null;
}
Expand All @@ -1231,19 +1226,14 @@ private SimpleRangePart ParseSimpleRangePart()
}
else if (hasLetters)
{
if (!CellReference.IsColumnWithinRange(rep.Replace("$", ""), _ssVersion))
if (!CellReference.IsColumnWithinRange(column, _ssVersion))
{
return null;
}
}
else if (hasDigits)
{
int i;
try
{
i = Int32.Parse(rep.Replace("$", ""), CultureInfo.InvariantCulture);
}
catch
if (!CellReferenceParser.TryParseInt32Fast(row, out int i))
{
return null;
}
Expand All @@ -1260,23 +1250,21 @@ private SimpleRangePart ParseSimpleRangePart()


ResetPointer(ptr + 1); // stepping forward
return new SimpleRangePart(rep, hasLetters, hasDigits);
return new SimpleRangePart(rep.ToString(), hasLetters, hasDigits);
}



/**
*
* "A1", "B3" -> "A1:B3"
*
* "A1", "B3" -> "A1:B3"
* "sheet1!A1", "B3" -> "sheet1!A1:B3"
*
*
* @return <c>null</c> if the range expression cannot / shouldn't be reduced.
*/
private static Ptg ReduceRangeExpression(Ptg ptgA, Ptg ptgB)
{
if (!(ptgB is RefPtg))
{
// only when second ref is simple 2-D ref can the range
// only when second ref is simple 2-D ref can the range
// expression be converted To an area ref
return null;
}
Expand All @@ -1295,7 +1283,7 @@ private static Ptg ReduceRangeExpression(Ptg ptgA, Ptg ptgB)
refA.IsRowRelative, refB.IsRowRelative, refA.IsColRelative, refB.IsColRelative,
refA.ExternSheetIndex);
}
// Note - other operand types (like AreaPtg) which probably can't evaluate
// Note - other operand types (like AreaPtg) which probably can't evaluate
// do not cause validation errors at Parse time
return null;
}
Expand Down Expand Up @@ -1496,7 +1484,7 @@ private SheetIdentifier ParseSheetName()
}

/**
* If we have something that looks like [book]Sheet1: or
* If we have something that looks like [book]Sheet1: or
* Sheet1, see if it's actually a range eg Sheet1:Sheet2!
*/
private SheetIdentifier ParseSheetRange(String bookname, NameIdentifier sheet1Name)
Expand Down Expand Up @@ -1544,7 +1532,9 @@ private void ResetPointer(int ptr)
/**
* @return <c>true</c> if the specified name is a valid cell reference
*/
private bool IsValidCellReference(String str)
private bool IsValidCellReference(String str) => IsValidCellReference(str.AsSpan());

private bool IsValidCellReference(ReadOnlySpan<char> str)
{
//check range bounds against grid max
bool result = CellReference.ClassifyCellReference(str, _ssVersion) == NameType.Cell;
Expand All @@ -1557,7 +1547,7 @@ private bool IsValidCellReference(String str)
* (b) LOG10 + 1
* In (a) LOG10 is a name of a built-in function. In (b) LOG10 is a cell reference
*/
bool isFunc = FunctionMetadataRegistry.GetFunctionByName(str.ToUpper()) != null;
bool isFunc = FunctionMetadataRegistry.GetFunctionByName(str.ToString().ToUpper()) != null;
if (isFunc)
{
int savePointer = _pointer;
Expand All @@ -1577,7 +1567,7 @@ private bool IsValidCellReference(String str)
* Note - Excel Function names are 'case aware but not case sensitive'. This method may end
* up creating a defined name record in the workbook if the specified name is not an internal
* Excel Function, and Has not been encountered before.
*
*
* Side effect: creates workbook name if name is not recognized (name is probably a UDF)
*
* @param name case preserved Function name (as it was entered/appeared in the formula).
Expand Down Expand Up @@ -1661,7 +1651,7 @@ private void AddName(String functionName)
}
/**
* Generates the variable Function ptg for the formula.
*
*
* For IF Formulas, Additional PTGs are Added To the Tokens
* @param name a {@link NamePtg} or {@link NameXPtg} or <code>null</code>
* @return Ptg a null is returned if we're in an IF formula, it needs extreme manipulation and is handled in this Function
Expand Down Expand Up @@ -2379,7 +2369,7 @@ private ParseNode AdditiveExpression()

/**
* API call To execute the parsing of the formula
*
*
*/
private void Parse()
{
Expand Down
10 changes: 4 additions & 6 deletions main/SS/Formula/Function/FunctionMetadataRegistry.cs
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ namespace NPOI.SS.Formula.Function

/**
* Allows clients to Get <c>FunctionMetadata</c> instances for any built-in function of Excel.
*
*
* @author Josh Micich
*/
public class FunctionMetadataRegistry
Expand Down Expand Up @@ -76,7 +76,7 @@ private FunctionMetadata GetFunctionByIndexInternal(int index)
return _functionDataByIndex[index];
}
/**
* Resolves a built-in function index.
* Resolves a built-in function index.
* @param name uppercase function name
* @return a negative value if the function name is not found.
* This typically occurs for external functions.
Expand All @@ -93,10 +93,8 @@ public static short LookupIndexByName(String name)

private FunctionMetadata GetFunctionByNameInternal(String name)
{
if (_functionDataByName.ContainsKey(name))
return _functionDataByName[name];
else
return null;
_functionDataByName.TryGetValue(name, out var metadata);
return metadata;
}


Expand Down
12 changes: 6 additions & 6 deletions main/SS/Formula/OperationEvaluationContext.cs
Original file line number Diff line number Diff line change
Expand Up @@ -28,13 +28,13 @@ public class OperationEvaluationContext
private bool _isSingleValue;
private WorkbookEvaluator _bookEvaluator;
private bool _isInArrayContext;

public OperationEvaluationContext(WorkbookEvaluator bookEvaluator, IEvaluationWorkbook workbook, int sheetIndex, int srcRowNum,
int srcColNum, EvaluationTracker tracker)
int srcColNum, EvaluationTracker tracker)
: this(bookEvaluator, workbook, sheetIndex, srcRowNum, srcColNum, tracker, isSingleValue: true)
{
}

public OperationEvaluationContext(WorkbookEvaluator bookEvaluator, IEvaluationWorkbook workbook, int sheetIndex, int srcRowNum,
int srcColNum, EvaluationTracker tracker, bool isSingleValue)
{
Expand Down Expand Up @@ -243,7 +243,7 @@ public ValueEval GetDynamicReference(String workbookName, String sheetName, Stri
// ugly typecast - TODO - make spReadsheet version more easily accessible
SpreadsheetVersion ssVersion = ((IFormulaParsingWorkbook)_workbook).GetSpreadsheetVersion();

NameType part1refType = ClassifyCellReference(refStrPart1, ssVersion);
NameType part1refType = ClassifyCellReference(refStrPart1.AsSpan(), ssVersion);
switch (part1refType)
{
case NameType.BadCellOrNamedRange:
Expand All @@ -270,7 +270,7 @@ public ValueEval GetDynamicReference(String workbookName, String sheetName, Stri
}
throw new InvalidOperationException("Unexpected reference classification of '" + refStrPart1 + "'.");
}
NameType part2refType = ClassifyCellReference(refStrPart1, ssVersion);
NameType part2refType = ClassifyCellReference(refStrPart1.AsSpan(), ssVersion);
switch (part2refType)
{
case NameType.BadCellOrNamedRange:
Expand Down Expand Up @@ -344,7 +344,7 @@ private static int ParseColRef(String refStrPart)
return Int32.Parse(refStrPart, CultureInfo.InvariantCulture) - 1;
}

private static NameType ClassifyCellReference(String str, SpreadsheetVersion ssVersion)
private static NameType ClassifyCellReference(ReadOnlySpan<char> str, SpreadsheetVersion ssVersion)
{
int len = str.Length;
if (len < 1)
Expand Down
Loading

0 comments on commit 01009b5

Please sign in to comment.