From 0ed9812908411c67c90dcfc08c696640b726d8ac Mon Sep 17 00:00:00 2001 From: jkronegg Date: Mon, 5 Dec 2022 07:53:21 +0100 Subject: [PATCH 1/5] Update ExpressionFactory.java Improved expression creation performance by about 50%. --- .../ExpressionFactory.java | 29 ++++++++++++------- 1 file changed, 19 insertions(+), 10 deletions(-) diff --git a/java/src/main/java/io/cucumber/cucumberexpressions/ExpressionFactory.java b/java/src/main/java/io/cucumber/cucumberexpressions/ExpressionFactory.java index 6565bc861..2e6489a85 100644 --- a/java/src/main/java/io/cucumber/cucumberexpressions/ExpressionFactory.java +++ b/java/src/main/java/io/cucumber/cucumberexpressions/ExpressionFactory.java @@ -11,15 +11,13 @@ * using heuristics. This is particularly useful for languages that don't have a * literal syntax for regular expressions. In Java, a regular expression has to be represented as a String. * - * A string that starts with `^` and/or ends with `$` is considered a regular expression. + * A string that starts with `^` and/or ends with `$` (or written in script style, i.e. starting with `/` + * and ending with `/`) is considered a regular expression. * Everything else is considered a Cucumber expression. */ @API(status = API.Status.STABLE) public final class ExpressionFactory { - private static final Pattern BEGIN_ANCHOR = Pattern.compile("^\\^.*"); - private static final Pattern END_ANCHOR = Pattern.compile(".*\\$$"); - private static final Pattern SCRIPT_STYLE_REGEXP = Pattern.compile("^/(.*)/$"); private static final Pattern PARAMETER_PATTERN = Pattern.compile("((?:\\\\){0,2})\\{([^}]*)\\}"); private final ParameterTypeRegistry parameterTypeRegistry; @@ -29,14 +27,25 @@ public ExpressionFactory(ParameterTypeRegistry parameterTypeRegistry) { } public Expression createExpression(String expressionString) { - if (BEGIN_ANCHOR.matcher(expressionString).find() || END_ANCHOR.matcher(expressionString).find()) { - return createRegularExpressionWithAnchors(expressionString); + int length = expressionString.length(); + int lastCharIndex = 0; + char firstChar = 0; + char lastChar = 0; + if (length > 0) { + lastCharIndex = length - 1; + firstChar = expressionString.charAt(0); + lastChar = expressionString.charAt(lastCharIndex); } - Matcher m = SCRIPT_STYLE_REGEXP.matcher(expressionString); - if (m.find()) { - return new RegularExpression(Pattern.compile(m.group(1)), parameterTypeRegistry); + if (firstChar == '^' || lastChar == '$') { + // java regexp => create from regexp + return this.createRegularExpressionWithAnchors(expressionString); + } else if (firstChar == '/' && lastChar == '/') { + // script style regexp => create from regexp + return new RegularExpression(Pattern.compile(expressionString.substring(1, lastCharIndex)), this.parameterTypeRegistry); + } else { + // otherwise create cucumber style expression + return new CucumberExpression(expressionString, this.parameterTypeRegistry); } - return new CucumberExpression(expressionString, parameterTypeRegistry); } private RegularExpression createRegularExpressionWithAnchors(String expressionString) { From a92393b9ec296a78d3d94de346f84412938a00f4 Mon Sep 17 00:00:00 2001 From: jkronegg Date: Mon, 5 Dec 2022 08:10:30 +0100 Subject: [PATCH 2/5] Update ExpressionFactoryTest.java Added test case for empty cucumber expression. --- .../cucumberexpressions/ExpressionFactoryTest.java | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/java/src/test/java/io/cucumber/cucumberexpressions/ExpressionFactoryTest.java b/java/src/test/java/io/cucumber/cucumberexpressions/ExpressionFactoryTest.java index ee0826651..52169139e 100644 --- a/java/src/test/java/io/cucumber/cucumberexpressions/ExpressionFactoryTest.java +++ b/java/src/test/java/io/cucumber/cucumberexpressions/ExpressionFactoryTest.java @@ -13,7 +13,12 @@ import static org.junit.jupiter.api.Assertions.assertThrows; public class ExpressionFactoryTest { - + + @Test + public void creates_cucumber_expression_for_empty() { + assertCucumberExpression(""); + } + @Test public void creates_cucumber_expression_by_default() { assertCucumberExpression("strings are cukexp by default"); From 1a1a642a8a1f626c72c06d597012f1ca2695c390 Mon Sep 17 00:00:00 2001 From: jkronegg Date: Mon, 5 Dec 2022 16:30:31 +0100 Subject: [PATCH 3/5] Update ExpressionFactory.java Refactored code according to PR comments. --- .../ExpressionFactory.java | 29 +++++++++++-------- 1 file changed, 17 insertions(+), 12 deletions(-) diff --git a/java/src/main/java/io/cucumber/cucumberexpressions/ExpressionFactory.java b/java/src/main/java/io/cucumber/cucumberexpressions/ExpressionFactory.java index 2e6489a85..e5ebb2e6f 100644 --- a/java/src/main/java/io/cucumber/cucumberexpressions/ExpressionFactory.java +++ b/java/src/main/java/io/cucumber/cucumberexpressions/ExpressionFactory.java @@ -27,25 +27,30 @@ public ExpressionFactory(ParameterTypeRegistry parameterTypeRegistry) { } public Expression createExpression(String expressionString) { + /* This method is called often (typically about number_of_steps x + * nbr_test_scenarios), thus performance is more important than + * readability here. + * Consequently, we check the first and last expressionString + * characters to determine whether we need to create a + * RegularExpression or a CucumberExpression (because character + * matching is faster than startsWith/endsWith and regexp matching). + */ int length = expressionString.length(); - int lastCharIndex = 0; - char firstChar = 0; - char lastChar = 0; - if (length > 0) { - lastCharIndex = length - 1; - firstChar = expressionString.charAt(0); - lastChar = expressionString.charAt(lastCharIndex); + if (length == 0) { + return new CucumberExpression(expressionString, this.parameterTypeRegistry); } + + int lastCharIndex = length - 1; + char firstChar = expressionString.charAt(0); + char lastChar = expressionString.charAt(lastCharIndex); + if (firstChar == '^' || lastChar == '$') { - // java regexp => create from regexp return this.createRegularExpressionWithAnchors(expressionString); } else if (firstChar == '/' && lastChar == '/') { - // script style regexp => create from regexp return new RegularExpression(Pattern.compile(expressionString.substring(1, lastCharIndex)), this.parameterTypeRegistry); - } else { - // otherwise create cucumber style expression - return new CucumberExpression(expressionString, this.parameterTypeRegistry); } + + return new CucumberExpression(expressionString, this.parameterTypeRegistry); } private RegularExpression createRegularExpressionWithAnchors(String expressionString) { From 88dde3a104dcaaf15744bf2aecbe6307950a8654 Mon Sep 17 00:00:00 2001 From: jkronegg Date: Tue, 6 Dec 2022 15:24:05 +0100 Subject: [PATCH 4/5] Improve CucumberExpression creation performance MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit I made a benchmark with JMH which shows that the modified method is about 15% faster (see escapeRegex2 below) : Benchmark Mode Cnt Score Error Units CucumberExpressionBenchmark.escapeRegex0 thrpt 25 726563,091 ± 15558,020 ops/s CucumberExpressionBenchmark.escapeRegex1 thrpt 25 782046,026 ± 10993,804 ops/s CucumberExpressionBenchmark.escapeRegex2 thrpt 25 833512,235 ± 25088,968 ops/s The `CucumberExpression` class looks extensively tested, thus I didn't added more test cases. Note: for completeness, I tested another variant (escapeRegex1) which replaces the `ESCAPE_PATTERN.matcher(text).replaceAll(String)` with multiple `String.replace(String,String)` : the performance is not that good (only 8% faster than the original method) and code readability/maintenance is bad. But it was interesting to have the numbers to compare... --- .../io/cucumber/cucumberexpressions/CucumberExpression.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/java/src/main/java/io/cucumber/cucumberexpressions/CucumberExpression.java b/java/src/main/java/io/cucumber/cucumberexpressions/CucumberExpression.java index d744705b9..d23b3d4b5 100644 --- a/java/src/main/java/io/cucumber/cucumberexpressions/CucumberExpression.java +++ b/java/src/main/java/io/cucumber/cucumberexpressions/CucumberExpression.java @@ -24,7 +24,7 @@ @API(status = API.Status.STABLE) public final class CucumberExpression implements Expression { - private static final Pattern ESCAPE_PATTERN = Pattern.compile("([\\\\^\\[({$.|?*+})\\]])"); + private static final Pattern ESCAPE_PATTERN = Pattern.compile("[\\\\^\\[({$.|?*+})\\]]"); private final List> parameterTypes = new ArrayList<>(); private final String source; @@ -62,7 +62,7 @@ private String rewriteToRegex(Node node) { } private static String escapeRegex(String text) { - return ESCAPE_PATTERN.matcher(text).replaceAll("\\\\$1"); + return ESCAPE_PATTERN.matcher(text).replaceAll("\\\\$0"); } private String rewriteOptional(Node node) { From dd679d6ea4fa58abb72fb57cec14a6d877e54afe Mon Sep 17 00:00:00 2001 From: "M.P. Korstanje" Date: Thu, 8 Dec 2022 16:39:55 +0100 Subject: [PATCH 5/5] Update CHANGELOG --- CHANGELOG.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index bacb7924c..b42f598d4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,8 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/) and this project adheres to [Semantic Versioning](http://semver.org/). ## [Unreleased] +### Fixed +- [Java] Improve expression creation performance ([#187](https://github.com/cucumber/cucumber-expressions/pull/187), [#189](https://github.com/cucumber/cucumber-expressions/pull/189)) ## [16.1.0] - 2022-11-28 ### Added