diff --git a/src/main/java/com/networknt/schema/format/RegexFormat.java b/src/main/java/com/networknt/schema/format/RegexFormat.java index c6f778b9b..191e6364f 100644 --- a/src/main/java/com/networknt/schema/format/RegexFormat.java +++ b/src/main/java/com/networknt/schema/format/RegexFormat.java @@ -13,24 +13,22 @@ */ package com.networknt.schema.format; -import java.util.regex.Pattern; -import java.util.regex.PatternSyntaxException; - import com.networknt.schema.ExecutionContext; import com.networknt.schema.Format; +import com.networknt.schema.ValidationContext; +import com.networknt.schema.regex.RegularExpression; /** * Format for regex. */ public class RegexFormat implements Format { @Override - public boolean matches(ExecutionContext executionContext, String value) { + public boolean matches(ExecutionContext executionContext, ValidationContext validationContext, String value) { if (null == value) return true; try { - Pattern.compile(value); + RegularExpression.compile(value, validationContext); return true; - - } catch (PatternSyntaxException e) { + } catch (RuntimeException e) { return false; } } diff --git a/src/main/java/com/networknt/schema/regex/JDKRegularExpression.java b/src/main/java/com/networknt/schema/regex/JDKRegularExpression.java index 6be0c7eb7..408117f80 100644 --- a/src/main/java/com/networknt/schema/regex/JDKRegularExpression.java +++ b/src/main/java/com/networknt/schema/regex/JDKRegularExpression.java @@ -2,6 +2,9 @@ import java.util.regex.Pattern; +/** + * JDK {@link RegularExpression}. + */ class JDKRegularExpression implements RegularExpression { private final Pattern pattern; diff --git a/src/main/java/com/networknt/schema/regex/JoniRegularExpression.java b/src/main/java/com/networknt/schema/regex/JoniRegularExpression.java index bad42510d..5e205e4d1 100644 --- a/src/main/java/com/networknt/schema/regex/JoniRegularExpression.java +++ b/src/main/java/com/networknt/schema/regex/JoniRegularExpression.java @@ -1,16 +1,28 @@ package com.networknt.schema.regex; import java.nio.charset.StandardCharsets; +import java.util.regex.Pattern; import org.jcodings.specific.UTF8Encoding; import org.joni.Option; import org.joni.Regex; import org.joni.Syntax; +import org.joni.exception.SyntaxException; +/** + * ECMAScript {@link RegularExpression}. + */ class JoniRegularExpression implements RegularExpression { private final Regex pattern; + private final Pattern INVALID_ESCAPE_PATTERN = Pattern.compile( + ".*\\\\([aeg-moqyzACE-OQ-RT-VX-Z1-9]|c$|[pP]([^{]|$)|u([^{0-9]|$)|x([0-9a-fA-F][^0-9a-fA-F]|[^0-9a-fA-F][0-9a-fA-F]|[^0-9a-fA-F][^0-9a-fA-F]|.?$)).*"); JoniRegularExpression(String regex) { + this(regex, Syntax.ECMAScript); + } + + JoniRegularExpression(String regex, Syntax syntax) { + validate(regex); // Joni is too liberal on some constructs String s = regex .replace("\\d", "[0-9]") @@ -21,7 +33,19 @@ class JoniRegularExpression implements RegularExpression { .replace("\\S", "[^ \\f\\n\\r\\t\\v\\u00a0\\u1680\\u2000-\\u200a\\u2028\\u2029\\u202f\\u205f\\u3000\\ufeff]"); byte[] bytes = s.getBytes(StandardCharsets.UTF_8); - this.pattern = new Regex(bytes, 0, bytes.length, Option.SINGLELINE, UTF8Encoding.INSTANCE, Syntax.ECMAScript); + this.pattern = new Regex(bytes, 0, bytes.length, Option.SINGLELINE, UTF8Encoding.INSTANCE, syntax); + } + + protected void validate(String regex) { + // Joni is not strict with escapes + if (INVALID_ESCAPE_PATTERN.matcher(regex).matches()) { + /* + * One option considered was a custom Encoding implementation that rejects + * certain code points but it is unable to distinguish \a vs \cG for instance as + * both translate to BEL + */ + throw new SyntaxException("Invalid escape"); + } } @Override diff --git a/src/main/java/com/networknt/schema/regex/RegularExpression.java b/src/main/java/com/networknt/schema/regex/RegularExpression.java index 64c1ba2ca..83f4d22e0 100644 --- a/src/main/java/com/networknt/schema/regex/RegularExpression.java +++ b/src/main/java/com/networknt/schema/regex/RegularExpression.java @@ -2,6 +2,9 @@ import com.networknt.schema.ValidationContext; +/** + * Regular expression. + */ @FunctionalInterface public interface RegularExpression { boolean matches(String value); diff --git a/src/test/java/com/networknt/schema/AbstractJsonSchemaTestSuite.java b/src/test/java/com/networknt/schema/AbstractJsonSchemaTestSuite.java index 66a6856a4..3852f6340 100644 --- a/src/test/java/com/networknt/schema/AbstractJsonSchemaTestSuite.java +++ b/src/test/java/com/networknt/schema/AbstractJsonSchemaTestSuite.java @@ -55,7 +55,9 @@ private static String toForwardSlashPath(Path file) { private static void executeTest(JsonSchema schema, TestSpec testSpec) { Set errors = schema.validate(testSpec.getData(), OutputFormat.DEFAULT, (executionContext, validationContext) -> { - if (testSpec.getTestCase().getSource().getPath().getParent().toString().endsWith("format")) { + if (testSpec.getTestCase().getSource().getPath().getParent().toString().endsWith("format") + || "ecmascript-regex.json" + .equals(testSpec.getTestCase().getSource().getPath().getFileName().toString())) { executionContext.getExecutionConfig().setFormatAssertionsEnabled(true); } }); diff --git a/src/test/java/com/networknt/schema/regex/JoniRegularExpressionTest.java b/src/test/java/com/networknt/schema/regex/JoniRegularExpressionTest.java new file mode 100644 index 000000000..fc1f89975 --- /dev/null +++ b/src/test/java/com/networknt/schema/regex/JoniRegularExpressionTest.java @@ -0,0 +1,122 @@ +/* + * Copyright (c) 2024 the original author or authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.networknt.schema.regex; + +import static org.junit.jupiter.api.Assertions.assertDoesNotThrow; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertThrows; + +import org.joni.exception.SyntaxException; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.EnumSource; + +/** + * Tests for JoniRegularExpression. + */ +class JoniRegularExpressionTest { + + enum InvalidEscapeInput { + A("\\a"), + HELLOA("hello\\a"), + C("\\c"), + E("\\e"), + G("\\g"), + H("\\h"), + I("\\i"), + J("\\j"), + K("\\k"), + L("\\l"), + M("\\m"), + O("\\o"), + Q("\\q"), + U("\\u"), + X("\\x"), + X1("\\x1"), + XGG("\\xgg"), + X1G("\\x1g"), + Y("\\y"), + Z("\\z"), + _1("\\1"), + _2("\\2"), + _3("\\3"), + _4("\\4"), + _5("\\5"), + _6("\\6"), + _7("\\7"), + _8("\\8"), + _9("\\9"); + + String value; + + InvalidEscapeInput(String value) { + this.value = value; + } + } + + @ParameterizedTest + @EnumSource(InvalidEscapeInput.class) + void invalidEscape(InvalidEscapeInput input) { + SyntaxException e = assertThrows(SyntaxException.class, () -> new JoniRegularExpression(input.value)); + assertEquals("Invalid escape", e.getMessage()); + } + + enum ValidEscapeInput { + B("\\b"), + D("\\d"), + CAP_D("\\D"), + W("\\w"), + CAP_W("\\W"), + S("\\s"), + CAP_S("\\S"), + T("\\t"), + U1234("\\u1234"), + R("\\r"), + N("\\n"), + V("\\v"), + F("\\f"), + X12("\\x12"), + X1F("\\x1f"), + X1234("\\x1234"), + P("\\p{Letter}cole"), // unicode property + CAP_P("\\P{Letter}cole"), // unicode property + _0("\\0"), + CA("\\cA"), // control + CB("\\cB"), // control + CC("\\cC"), // control + CG("\\cG"); // control + + String value; + + ValidEscapeInput(String value) { + this.value = value; + } + } + + @ParameterizedTest + @EnumSource(ValidEscapeInput.class) + void validEscape(ValidEscapeInput input) { + assertDoesNotThrow(() -> new JoniRegularExpression(input.value)); + } + + @Test + void invalidPropertyName() { + assertThrows(SyntaxException.class, () -> new JoniRegularExpression("\\p")); + assertThrows(SyntaxException.class, () -> new JoniRegularExpression("\\P")); + assertThrows(SyntaxException.class, () -> new JoniRegularExpression("\\pa")); + assertThrows(SyntaxException.class, () -> new JoniRegularExpression("\\Pa")); + } +} diff --git a/src/test/suite/tests/draft-next/optional/ecmascript-regex.json b/src/test/suite/tests/draft-next/optional/ecmascript-regex.json index 272114503..c96d4e052 100644 --- a/src/test/suite/tests/draft-next/optional/ecmascript-regex.json +++ b/src/test/suite/tests/draft-next/optional/ecmascript-regex.json @@ -409,12 +409,12 @@ "description": "\\a is not an ECMA 262 control escape", "schema": { "$schema": "https://json-schema.org/draft/next/schema", - "$ref": "https://json-schema.org/draft/next/schema" + "format": "regex" }, "tests": [ { "description": "when used as a pattern", - "data": { "pattern": "\\a" }, + "data": "\\a", "valid": false } ] diff --git a/src/test/suite/tests/draft2020-12/optional/ecmascript-regex.json b/src/test/suite/tests/draft2020-12/optional/ecmascript-regex.json index 41eaafe5e..976495312 100644 --- a/src/test/suite/tests/draft2020-12/optional/ecmascript-regex.json +++ b/src/test/suite/tests/draft2020-12/optional/ecmascript-regex.json @@ -409,15 +409,13 @@ "description": "\\a is not an ECMA 262 control escape", "schema": { "$schema": "https://json-schema.org/draft/2020-12/schema", - "$ref": "https://json-schema.org/draft/2020-12/schema" + "format": "regex" }, "tests": [ { "description": "when used as a pattern", - "data": { "pattern": "\\a" }, - "valid": false, - "disabled": true, - "reason": "TODO: RegexFormat does not support ECMA 262 regular expressions" + "data": "\\a", + "valid": false } ] },