diff --git a/core/src/main/java/gov/nist/secauto/metaschema/core/metapath/function/library/DefaultFunctionLibrary.java b/core/src/main/java/gov/nist/secauto/metaschema/core/metapath/function/library/DefaultFunctionLibrary.java index 0d6efaa53..f3ef8e49e 100644 --- a/core/src/main/java/gov/nist/secauto/metaschema/core/metapath/function/library/DefaultFunctionLibrary.java +++ b/core/src/main/java/gov/nist/secauto/metaschema/core/metapath/function/library/DefaultFunctionLibrary.java @@ -102,6 +102,8 @@ public DefaultFunctionLibrary() { // NOPMD - intentional // P1: https://www.w3.org/TR/xpath-functions-31/#func-last // P1: https://www.w3.org/TR/xpath-functions-31/#func-lower-case // P1: https://www.w3.org/TR/xpath-functions-31/#func-matches + registerFunction(FnMatches.SIGNATURE_TWO_ARG); + registerFunction(FnMatches.SIGNATURE_THREE_ARG); // https://www.w3.org/TR/xpath-functions-31/#func-max registerFunction(FnMinMax.SIGNATURE_MAX); // https://www.w3.org/TR/xpath-functions-31/#func-min diff --git a/core/src/main/java/gov/nist/secauto/metaschema/core/metapath/function/library/FnMatches.java b/core/src/main/java/gov/nist/secauto/metaschema/core/metapath/function/library/FnMatches.java new file mode 100644 index 000000000..8851faecd --- /dev/null +++ b/core/src/main/java/gov/nist/secauto/metaschema/core/metapath/function/library/FnMatches.java @@ -0,0 +1,166 @@ + +package gov.nist.secauto.metaschema.core.metapath.function.library; + +import gov.nist.secauto.metaschema.core.metapath.DynamicContext; +import gov.nist.secauto.metaschema.core.metapath.ISequence; +import gov.nist.secauto.metaschema.core.metapath.MetapathConstants; +import gov.nist.secauto.metaschema.core.metapath.function.FunctionUtils; +import gov.nist.secauto.metaschema.core.metapath.function.IArgument; +import gov.nist.secauto.metaschema.core.metapath.function.IFunction; +import gov.nist.secauto.metaschema.core.metapath.function.regex.RegexUtil; +import gov.nist.secauto.metaschema.core.metapath.function.regex.RegularExpressionMetapathException; +import gov.nist.secauto.metaschema.core.metapath.item.IItem; +import gov.nist.secauto.metaschema.core.metapath.item.atomic.IBooleanItem; +import gov.nist.secauto.metaschema.core.metapath.item.atomic.IStringItem; +import gov.nist.secauto.metaschema.core.util.ObjectUtils; + +import java.util.List; +import java.util.regex.Pattern; +import java.util.regex.PatternSyntaxException; + +import edu.umd.cs.findbugs.annotations.NonNull; +import edu.umd.cs.findbugs.annotations.Nullable; + +/** + * Implements fn:matches. + */ +public final class FnMatches { + @NonNull + static final IFunction SIGNATURE_TWO_ARG = IFunction.builder() + .name("matches") + .namespace(MetapathConstants.NS_METAPATH_FUNCTIONS) + .deterministic() + .contextIndependent() + .focusIndependent() + .argument(IArgument.builder() + .name("input") + .type(IStringItem.class) + .zeroOrOne() + .build()) + .argument(IArgument.builder() + .name("pattern") + .type(IStringItem.class) + .one() + .build()) + .returnType(IBooleanItem.class) + .returnOne() + .functionHandler(FnMatches::executeTwoArg) + .build(); + + @NonNull + static final IFunction SIGNATURE_THREE_ARG = IFunction.builder() + .name("matches") + .namespace(MetapathConstants.NS_METAPATH_FUNCTIONS) + .deterministic() + .contextIndependent() + .focusIndependent() + .argument(IArgument.builder() + .name("input") + .type(IStringItem.class) + .zeroOrOne() + .build()) + .argument(IArgument.builder() + .name("pattern") + .type(IStringItem.class) + .one() + .build()) + .argument(IArgument.builder() + .name("flags") + .type(IStringItem.class) + .one() + .build()) + .returnType(IBooleanItem.class) + .returnOne() + .functionHandler(FnMatches::executeThreeArg) + .build(); + + @SuppressWarnings("unused") + @NonNull + private static ISequence executeTwoArg( + @NonNull IFunction function, + @NonNull List> arguments, + @NonNull DynamicContext dynamicContext, + IItem focus) { + IStringItem input = FunctionUtils.asTypeOrNull(arguments.get(0).getFirstItem(true)); + IStringItem pattern = ObjectUtils.requireNonNull(FunctionUtils.asTypeOrNull(arguments.get(1).getFirstItem(true))); + + return execute(input, pattern, IStringItem.valueOf("")); + } + + @SuppressWarnings("unused") + + @NonNull + private static ISequence executeThreeArg( + @NonNull IFunction function, + @NonNull List> arguments, + @NonNull DynamicContext dynamicContext, + IItem focus) { + + IStringItem input = FunctionUtils.asTypeOrNull(arguments.get(0).getFirstItem(true)); + IStringItem pattern = ObjectUtils.requireNonNull(FunctionUtils.asTypeOrNull(arguments.get(1).getFirstItem(true))); + IStringItem flags = ObjectUtils.requireNonNull(FunctionUtils.asTypeOrNull(arguments.get(2).getFirstItem(true))); + + return execute(input, pattern, flags); + } + + @SuppressWarnings("PMD.OnlyOneReturn") + @NonNull + private static ISequence execute( + @Nullable IStringItem input, + @NonNull IStringItem pattern, + @NonNull IStringItem flags) { + if (input == null) { + return ISequence.empty(); + } + + return ISequence.of(fnMatches(input, pattern, flags)); + } + + /** + * Implements fn:matches. + * + * @param input + * the string to match against + * @param pattern + * the regular expression to use for matching + * @param flags + * matching options + * @return {@link IBooleanItem#TRUE} if the pattern matches or + * {@link IBooleanItem#FALSE} otherwise + */ + public static IBooleanItem fnMatches( + @NonNull IStringItem input, + @NonNull IStringItem pattern, + @NonNull IStringItem flags) { + return IBooleanItem.valueOf(fnMatches(input.asString(), pattern.asString(), flags.asString())); + } + + /** + * Implements fn:matches. + * + * @param input + * the string to match against + * @param pattern + * the regular expression to use for matching + * @param flags + * matching options + * @return {@code true} if the pattern matches or {@code false} otherwise + */ + public static boolean fnMatches(@NonNull String input, @NonNull String pattern, @NonNull String flags) { + try { + return Pattern.compile(pattern, RegexUtil.parseFlags(flags)) + .matcher(input).find(); + } catch (PatternSyntaxException ex) { + throw new RegularExpressionMetapathException(RegularExpressionMetapathException.INVALID_EXPRESSION, ex); + } catch (IllegalArgumentException ex) { + throw new RegularExpressionMetapathException(RegularExpressionMetapathException.INVALID_FLAG, ex); + } + } + + private FnMatches() { + // disable construction + } +} diff --git a/core/src/main/java/gov/nist/secauto/metaschema/core/metapath/function/regex/RegexUtil.java b/core/src/main/java/gov/nist/secauto/metaschema/core/metapath/function/regex/RegexUtil.java new file mode 100644 index 000000000..47ebec2b7 --- /dev/null +++ b/core/src/main/java/gov/nist/secauto/metaschema/core/metapath/function/regex/RegexUtil.java @@ -0,0 +1,54 @@ + +package gov.nist.secauto.metaschema.core.metapath.function.regex; + +import java.util.regex.Pattern; + +import edu.umd.cs.findbugs.annotations.NonNull; + +public final class RegexUtil { + + /** + * Parse the regular expression flags according to + * the + * specification producing a bitmask suitable for use in + * {@link Pattern#compile(String, int)}. + * + * @param flags + * the flags to process + * @return the bitmask + */ + public static int parseFlags(@NonNull String flags) { + return flags.chars() + .map(i -> characterToFlag((char) i)) + .reduce(0, (mask, flag) -> mask | flag); + } + + private static int characterToFlag(Character ch) { + int retval; + switch (ch) { + case 's': + retval = Pattern.DOTALL; + break; + case 'm': + retval = Pattern.MULTILINE; + break; + case 'i': + retval = Pattern.CASE_INSENSITIVE; + break; + case 'x': + retval = Pattern.COMMENTS; + break; + case 'q': + retval = Pattern.LITERAL; + break; + default: + throw new RegularExpressionMetapathException(RegularExpressionMetapathException.INVALID_FLAG, + String.format("Invalid flag '%s'.", ch)); + } + return retval; + } + + private RegexUtil() { + // disable construction + } +} diff --git a/core/src/main/java/gov/nist/secauto/metaschema/core/metapath/function/regex/RegularExpressionMetapathException.java b/core/src/main/java/gov/nist/secauto/metaschema/core/metapath/function/regex/RegularExpressionMetapathException.java new file mode 100644 index 000000000..d245c8e71 --- /dev/null +++ b/core/src/main/java/gov/nist/secauto/metaschema/core/metapath/function/regex/RegularExpressionMetapathException.java @@ -0,0 +1,97 @@ + +package gov.nist.secauto.metaschema.core.metapath.function.regex; + +import gov.nist.secauto.metaschema.core.metapath.AbstractCodedMetapathException; + +public class RegularExpressionMetapathException + extends AbstractCodedMetapathException { + /** + * the serial version UID. + */ + private static final long serialVersionUID = 1L; + /** + * err:MPRX0001: + * Raised by regular expression functions such as fn:matches and + * fn:replace if + * the regular expression flags contain a character other than i, m, q, s, or x. + */ + public static final int INVALID_FLAG = 1; + /** + * err:MPRX0002: + * Raised by regular expression functions such as fn:matches and + * fn:replace if + * the regular expression is syntactically invalid. + */ + public static final int INVALID_EXPRESSION = 2; + /** + * err:MPRX0003: For + * functions such as fn:replace and + * fn:tokenize, + * raises an error if the supplied regular expression is capable of matching a + * zero length string. + */ + public static final int MATCHES_ZERO_LENGTH_STRING = 3; + /** + * err:MPRX0004: + * Raised by fn:replace to + * report errors in the replacement string. + */ + public static final int INVALID_REPLACEMENT_STRING = 4; + + /** + * Constructs a new exception with the provided {@code code}, {@code message}, + * and {@code cause}. + * + * @param code + * the error code value + * @param message + * the exception message + * @param cause + * the original exception cause + */ + public RegularExpressionMetapathException(int code, String message, Throwable cause) { + super(code, message, cause); + } + + /** + * Constructs a new exception with the provided {@code code}, {@code message}, + * and no cause. + * + * @param code + * the error code value + * @param message + * the exception message + */ + public RegularExpressionMetapathException(int code, String message) { + super(code, message); + } + + /** + * Constructs a new exception with the provided {@code code}, no message, and + * the {@code cause}. + * + * @param code + * the error code value + * @param cause + * the original exception cause + */ + public RegularExpressionMetapathException(int code, Throwable cause) { + super(code, cause); + } + + @Override + public String getCodePrefix() { + return "MPRX"; + } +} diff --git a/core/src/main/java/module-info.java b/core/src/main/java/module-info.java index 0c5adffc0..baa726659 100644 --- a/core/src/main/java/module-info.java +++ b/core/src/main/java/module-info.java @@ -64,6 +64,7 @@ exports gov.nist.secauto.metaschema.core.metapath.format; exports gov.nist.secauto.metaschema.core.metapath.function; exports gov.nist.secauto.metaschema.core.metapath.function.library; + exports gov.nist.secauto.metaschema.core.metapath.function.regex; exports gov.nist.secauto.metaschema.core.metapath.item; exports gov.nist.secauto.metaschema.core.metapath.item.atomic; exports gov.nist.secauto.metaschema.core.metapath.item.function; diff --git a/core/src/test/java/gov/nist/secauto/metaschema/core/metapath/function/library/FnMatchesTest.java b/core/src/test/java/gov/nist/secauto/metaschema/core/metapath/function/library/FnMatchesTest.java new file mode 100644 index 000000000..6e888f971 --- /dev/null +++ b/core/src/test/java/gov/nist/secauto/metaschema/core/metapath/function/library/FnMatchesTest.java @@ -0,0 +1,125 @@ +/* + * SPDX-FileCopyrightText: none + * SPDX-License-Identifier: CC0-1.0 + */ + +package gov.nist.secauto.metaschema.core.metapath.function.library; + +import static gov.nist.secauto.metaschema.core.metapath.TestUtils.bool; +import static gov.nist.secauto.metaschema.core.metapath.TestUtils.sequence; +import static gov.nist.secauto.metaschema.core.metapath.TestUtils.string; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertThrows; + +import gov.nist.secauto.metaschema.core.metapath.DynamicContext; +import gov.nist.secauto.metaschema.core.metapath.ExpressionTestBase; +import gov.nist.secauto.metaschema.core.metapath.ISequence; +import gov.nist.secauto.metaschema.core.metapath.MetapathException; +import gov.nist.secauto.metaschema.core.metapath.MetapathExpression; +import gov.nist.secauto.metaschema.core.metapath.function.regex.RegularExpressionMetapathException; +import gov.nist.secauto.metaschema.core.metapath.item.atomic.IBooleanItem; +import gov.nist.secauto.metaschema.core.metapath.item.atomic.IStringItem; + +import org.junit.jupiter.api.Test; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.Arguments; +import org.junit.jupiter.params.provider.MethodSource; + +import java.util.List; +import java.util.stream.Stream; + +import javax.xml.namespace.QName; + +import edu.umd.cs.findbugs.annotations.NonNull; + +class FnMatchesTest + extends ExpressionTestBase { + private static final String POEM = "Kaum hat dies der Hahn gesehen,\n" + + "Fängt er auch schon an zu krähen:\n" + + "Kikeriki! Kikikerikih!!\n" + + "Tak, tak, tak! - da kommen sie."; + + private static Stream provideValues() { // NOPMD - false positive + return Stream.of( + Arguments.of( + bool(true), + "matches(\"abracadabra\", \"bra\")"), + Arguments.of( + bool(true), + "matches(\"abracadabra\", \"^a.*a$\")"), + Arguments.of( + bool(false), + "matches(\"abracadabra\", \"^bra\")"), + Arguments.of( + bool(false), + "matches($poem, \"Kaum.*krähen\")"), + Arguments.of( + bool(true), + "matches($poem, \"Kaum.*krähen\", \"s\")"), + Arguments.of( + bool(true), + "matches($poem, \"^Kaum.*gesehen,$\", \"m\")"), + Arguments.of( + bool(false), + "matches($poem, \"^Kaum.*gesehen,$\")"), + Arguments.of( + bool(true), + "matches($poem, \"kiki\", \"i\")")); + } + + @ParameterizedTest + @MethodSource("provideValues") + void test(@NonNull IBooleanItem expected, @NonNull String metapath) { + assertEquals(expected, MetapathExpression.compile(metapath) + .evaluateAs(null, MetapathExpression.ResultType.ITEM, + newDynamicContext())); + } + + /** + * Construct a new dynamic context for testing. + * + * @return the dynamic context + */ + @NonNull + protected static DynamicContext newDynamicContext() { + DynamicContext retval = ExpressionTestBase.newDynamicContext(); + + retval.bindVariableValue(new QName("poem"), ISequence.of(IStringItem.valueOf(POEM))); + + return retval; + } + + @Test + void testInvalidPattern() { + RegularExpressionMetapathException throwable = assertThrows(RegularExpressionMetapathException.class, + () -> { + try { + FunctionTestBase.executeFunction( + FnMatches.SIGNATURE_TWO_ARG, + newDynamicContext(), + ISequence.empty(), + List.of(sequence(string("input")), sequence(string("pattern[")))); + } catch (MetapathException ex) { + throw ex.getCause(); + } + }); + assertEquals(RegularExpressionMetapathException.INVALID_EXPRESSION, throwable.getCode()); + } + + @Test + void testInvalidFlag() { + RegularExpressionMetapathException throwable = assertThrows(RegularExpressionMetapathException.class, + () -> { + try { + FunctionTestBase.executeFunction( + FnMatches.SIGNATURE_THREE_ARG, + newDynamicContext(), + ISequence.empty(), + List.of(sequence(string("input")), sequence(string("pattern")), sequence(string("dsm")))); + } catch (MetapathException ex) { + throw ex.getCause(); + } + }); + assertEquals(RegularExpressionMetapathException.INVALID_FLAG, throwable.getCode()); + } +}