Skip to content

Commit

Permalink
Detect invalid escape for ecmascript
Browse files Browse the repository at this point in the history
  • Loading branch information
justin-tay committed Jun 6, 2024
1 parent 564d8f6 commit e11488a
Show file tree
Hide file tree
Showing 8 changed files with 166 additions and 16 deletions.
12 changes: 5 additions & 7 deletions src/main/java/com/networknt/schema/format/RegexFormat.java
Original file line number Diff line number Diff line change
Expand Up @@ -13,24 +13,22 @@
*/
package com.networknt.schema.format;

import java.util.regex.Pattern;
import java.util.regex.PatternSyntaxException;

import com.networknt.schema.ExecutionContext;
import com.networknt.schema.Format;
import com.networknt.schema.ValidationContext;
import com.networknt.schema.regex.RegularExpression;

/**
* Format for regex.
*/
public class RegexFormat implements Format {
@Override
public boolean matches(ExecutionContext executionContext, String value) {
public boolean matches(ExecutionContext executionContext, ValidationContext validationContext, String value) {
if (null == value) return true;
try {
Pattern.compile(value);
RegularExpression.compile(value, validationContext);
return true;

} catch (PatternSyntaxException e) {
} catch (RuntimeException e) {
return false;
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,9 @@

import java.util.regex.Pattern;

/**
* JDK {@link RegularExpression}.
*/
class JDKRegularExpression implements RegularExpression {
private final Pattern pattern;

Expand Down
Original file line number Diff line number Diff line change
@@ -1,16 +1,28 @@
package com.networknt.schema.regex;

import java.nio.charset.StandardCharsets;
import java.util.regex.Pattern;

import org.jcodings.specific.UTF8Encoding;
import org.joni.Option;
import org.joni.Regex;
import org.joni.Syntax;
import org.joni.exception.SyntaxException;

/**
* ECMAScript {@link RegularExpression}.
*/
class JoniRegularExpression implements RegularExpression {
private final Regex pattern;
private final Pattern INVALID_ESCAPE_PATTERN = Pattern.compile(
".*\\\\([aeg-moqyzACE-OQ-RT-VX-Z1-9]|c$|[pP]([^{]|$)|u([^{0-9]|$)|x([0-9a-fA-F][^0-9a-fA-F]|[^0-9a-fA-F][0-9a-fA-F]|[^0-9a-fA-F][^0-9a-fA-F]|.?$)).*");

JoniRegularExpression(String regex) {
this(regex, Syntax.ECMAScript);
}

JoniRegularExpression(String regex, Syntax syntax) {
validate(regex);
// Joni is too liberal on some constructs
String s = regex
.replace("\\d", "[0-9]")
Expand All @@ -21,7 +33,19 @@ class JoniRegularExpression implements RegularExpression {
.replace("\\S", "[^ \\f\\n\\r\\t\\v\\u00a0\\u1680\\u2000-\\u200a\\u2028\\u2029\\u202f\\u205f\\u3000\\ufeff]");

byte[] bytes = s.getBytes(StandardCharsets.UTF_8);
this.pattern = new Regex(bytes, 0, bytes.length, Option.SINGLELINE, UTF8Encoding.INSTANCE, Syntax.ECMAScript);
this.pattern = new Regex(bytes, 0, bytes.length, Option.SINGLELINE, UTF8Encoding.INSTANCE, syntax);
}

protected void validate(String regex) {
// Joni is not strict with escapes
if (INVALID_ESCAPE_PATTERN.matcher(regex).matches()) {
/*
* One option considered was a custom Encoding implementation that rejects
* certain code points but it is unable to distinguish \a vs \cG for instance as
* both translate to BEL
*/
throw new SyntaxException("Invalid escape");
}
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,9 @@

import com.networknt.schema.ValidationContext;

/**
* Regular expression.
*/
@FunctionalInterface
public interface RegularExpression {
boolean matches(String value);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,9 @@ private static String toForwardSlashPath(Path file) {

private static void executeTest(JsonSchema schema, TestSpec testSpec) {
Set<ValidationMessage> errors = schema.validate(testSpec.getData(), OutputFormat.DEFAULT, (executionContext, validationContext) -> {
if (testSpec.getTestCase().getSource().getPath().getParent().toString().endsWith("format")) {
if (testSpec.getTestCase().getSource().getPath().getParent().toString().endsWith("format")
|| "ecmascript-regex.json"
.equals(testSpec.getTestCase().getSource().getPath().getFileName().toString())) {
executionContext.getExecutionConfig().setFormatAssertionsEnabled(true);
}
});
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,122 @@
/*
* Copyright (c) 2024 the original author or authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.networknt.schema.regex;

import static org.junit.jupiter.api.Assertions.assertDoesNotThrow;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertThrows;

import org.joni.exception.SyntaxException;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.params.ParameterizedTest;
import org.junit.jupiter.params.provider.EnumSource;

/**
* Tests for JoniRegularExpression.
*/
class JoniRegularExpressionTest {

enum InvalidEscapeInput {
A("\\a"),
HELLOA("hello\\a"),
C("\\c"),
E("\\e"),
G("\\g"),
H("\\h"),
I("\\i"),
J("\\j"),
K("\\k"),
L("\\l"),
M("\\m"),
O("\\o"),
Q("\\q"),
U("\\u"),
X("\\x"),
X1("\\x1"),
XGG("\\xgg"),
X1G("\\x1g"),
Y("\\y"),
Z("\\z"),
_1("\\1"),
_2("\\2"),
_3("\\3"),
_4("\\4"),
_5("\\5"),
_6("\\6"),
_7("\\7"),
_8("\\8"),
_9("\\9");

String value;

InvalidEscapeInput(String value) {
this.value = value;
}
}

@ParameterizedTest
@EnumSource(InvalidEscapeInput.class)
void invalidEscape(InvalidEscapeInput input) {
SyntaxException e = assertThrows(SyntaxException.class, () -> new JoniRegularExpression(input.value));
assertEquals("Invalid escape", e.getMessage());
}

enum ValidEscapeInput {
B("\\b"),
D("\\d"),
CAP_D("\\D"),
W("\\w"),
CAP_W("\\W"),
S("\\s"),
CAP_S("\\S"),
T("\\t"),
U1234("\\u1234"),
R("\\r"),
N("\\n"),
V("\\v"),
F("\\f"),
X12("\\x12"),
X1F("\\x1f"),
X1234("\\x1234"),
P("\\p{Letter}cole"), // unicode property
CAP_P("\\P{Letter}cole"), // unicode property
_0("\\0"),
CA("\\cA"), // control
CB("\\cB"), // control
CC("\\cC"), // control
CG("\\cG"); // control

String value;

ValidEscapeInput(String value) {
this.value = value;
}
}

@ParameterizedTest
@EnumSource(ValidEscapeInput.class)
void validEscape(ValidEscapeInput input) {
assertDoesNotThrow(() -> new JoniRegularExpression(input.value));
}

@Test
void invalidPropertyName() {
assertThrows(SyntaxException.class, () -> new JoniRegularExpression("\\p"));
assertThrows(SyntaxException.class, () -> new JoniRegularExpression("\\P"));
assertThrows(SyntaxException.class, () -> new JoniRegularExpression("\\pa"));
assertThrows(SyntaxException.class, () -> new JoniRegularExpression("\\Pa"));
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -409,12 +409,12 @@
"description": "\\a is not an ECMA 262 control escape",
"schema": {
"$schema": "https://json-schema.org/draft/next/schema",
"$ref": "https://json-schema.org/draft/next/schema"
"format": "regex"
},
"tests": [
{
"description": "when used as a pattern",
"data": { "pattern": "\\a" },
"data": "\\a",
"valid": false
}
]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -409,15 +409,13 @@
"description": "\\a is not an ECMA 262 control escape",
"schema": {
"$schema": "https://json-schema.org/draft/2020-12/schema",
"$ref": "https://json-schema.org/draft/2020-12/schema"
"format": "regex"
},
"tests": [
{
"description": "when used as a pattern",
"data": { "pattern": "\\a" },
"valid": false,
"disabled": true,
"reason": "TODO: RegexFormat does not support ECMA 262 regular expressions"
"data": "\\a",
"valid": false
}
]
},
Expand Down

0 comments on commit e11488a

Please sign in to comment.