Skip to content

Commit

Permalink
Added support for the tokenize Metapath function. Resolves #135.
Browse files Browse the repository at this point in the history
  • Loading branch information
david-waltermire committed Oct 4, 2024
1 parent 42cd173 commit cc7c378
Show file tree
Hide file tree
Showing 10 changed files with 432 additions and 44 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,7 @@ public DefaultFunctionLibrary() { // NOPMD - intentional
// https://www.w3.org/TR/xpath-functions-31/#func-iri-to-uri
// P1: https://www.w3.org/TR/xpath-functions-31/#func-last
// P1: https://www.w3.org/TR/xpath-functions-31/#func-lower-case
// P1: https://www.w3.org/TR/xpath-functions-31/#func-matches
// https://www.w3.org/TR/xpath-functions-31/#func-matches
registerFunction(FnMatches.SIGNATURE_TWO_ARG);
registerFunction(FnMatches.SIGNATURE_THREE_ARG);
// https://www.w3.org/TR/xpath-functions-31/#func-max
Expand Down Expand Up @@ -164,7 +164,10 @@ public DefaultFunctionLibrary() { // NOPMD - intentional
// https://www.w3.org/TR/xpath-functions-31/#func-timezone-from-date
// https://www.w3.org/TR/xpath-functions-31/#func-timezone-from-dateTime
// https://www.w3.org/TR/xpath-functions-31/#func-timezone-from-time
// P1: https://www.w3.org/TR/xpath-functions-31/#func-tokenize
// https://www.w3.org/TR/xpath-functions-31/#func-tokenize
registerFunction(FnTokenize.SIGNATURE_ONE_ARG);
registerFunction(FnTokenize.SIGNATURE_TWO_ARG);
registerFunction(FnTokenize.SIGNATURE_THREE_ARG);
// P1: https://www.w3.org/TR/xpath-functions-31/#func-translate
// https://www.w3.org/TR/xpath-functions-31/#func-true
registerFunction(FnTrue.SIGNATURE);
Expand Down
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
/*
* SPDX-FileCopyrightText: none
* SPDX-License-Identifier: CC0-1.0
*/

package gov.nist.secauto.metaschema.core.metapath.function.library;

Expand Down Expand Up @@ -75,66 +79,41 @@ public final class FnMatches {
.functionHandler(FnMatches::executeThreeArg)
.build();

@SuppressWarnings("unused")
@NonNull
private static ISequence<IBooleanItem> executeTwoArg(
@NonNull IFunction function,
@SuppressWarnings("unused") @NonNull IFunction function,
@NonNull List<ISequence<?>> arguments,
@NonNull DynamicContext dynamicContext,
IItem focus) {
@SuppressWarnings("unused") @NonNull DynamicContext dynamicContext,
@SuppressWarnings("unused") IItem focus) {
IStringItem input = FunctionUtils.asTypeOrNull(arguments.get(0).getFirstItem(true));
IStringItem pattern = ObjectUtils.requireNonNull(FunctionUtils.asTypeOrNull(arguments.get(1).getFirstItem(true)));

return execute(input, pattern, IStringItem.valueOf(""));
}

@SuppressWarnings("unused")

@NonNull
private static ISequence<IBooleanItem> executeThreeArg(
@NonNull IFunction function,
@SuppressWarnings("unused") @NonNull IFunction function,
@NonNull List<ISequence<?>> arguments,
@NonNull DynamicContext dynamicContext,
IItem focus) {

@SuppressWarnings("unused") @NonNull DynamicContext dynamicContext,
@SuppressWarnings("unused") IItem focus) {
IStringItem input = FunctionUtils.asTypeOrNull(arguments.get(0).getFirstItem(true));
IStringItem pattern = ObjectUtils.requireNonNull(FunctionUtils.asTypeOrNull(arguments.get(1).getFirstItem(true)));
IStringItem flags = ObjectUtils.requireNonNull(FunctionUtils.asTypeOrNull(arguments.get(2).getFirstItem(true)));

return execute(input, pattern, flags);
}

@SuppressWarnings("PMD.OnlyOneReturn")
@NonNull
private static ISequence<IBooleanItem> execute(
@Nullable IStringItem input,
@NonNull IStringItem pattern,
@NonNull IStringItem flags) {
if (input == null) {
return ISequence.empty();
}

return ISequence.of(fnMatches(input, pattern, flags));
}

/**
* Implements <a href=
* "https://www.w3.org/TR/xpath-functions-31/#func-matches">fn:matches</a>.
*
* @param input
* the string to match against
* @param pattern
* the regular expression to use for matching
* @param flags
* matching options
* @return {@link IBooleanItem#TRUE} if the pattern matches or
* {@link IBooleanItem#FALSE} otherwise
*/
public static IBooleanItem fnMatches(
@NonNull IStringItem input,
@NonNull IStringItem pattern,
@NonNull IStringItem flags) {
return IBooleanItem.valueOf(fnMatches(input.asString(), pattern.asString(), flags.asString()));
return input == null
? ISequence.empty()
: ISequence.of(
IBooleanItem.valueOf(
fnMatches(input.asString(), pattern.asString(), flags.asString())));
}

/**
Expand All @@ -154,9 +133,15 @@ public static boolean fnMatches(@NonNull String input, @NonNull String pattern,
return Pattern.compile(pattern, RegexUtil.parseFlags(flags))
.matcher(input).find();
} catch (PatternSyntaxException ex) {
throw new RegularExpressionMetapathException(RegularExpressionMetapathException.INVALID_EXPRESSION, ex);
throw new RegularExpressionMetapathException(
RegularExpressionMetapathException.INVALID_EXPRESSION,
"Invalid regular expression pattern: '" + pattern + "'",
ex);
} catch (IllegalArgumentException ex) {
throw new RegularExpressionMetapathException(RegularExpressionMetapathException.INVALID_FLAG, ex);
throw new RegularExpressionMetapathException(
RegularExpressionMetapathException.INVALID_FLAG,
"Invalid regular expression flags: '" + flags + "'",
ex);
}
}

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,234 @@
/*
* SPDX-FileCopyrightText: none
* SPDX-License-Identifier: CC0-1.0
*/

package gov.nist.secauto.metaschema.core.metapath.function.library;

import gov.nist.secauto.metaschema.core.metapath.DynamicContext;
import gov.nist.secauto.metaschema.core.metapath.ISequence;
import gov.nist.secauto.metaschema.core.metapath.MetapathConstants;
import gov.nist.secauto.metaschema.core.metapath.function.FunctionUtils;
import gov.nist.secauto.metaschema.core.metapath.function.IArgument;
import gov.nist.secauto.metaschema.core.metapath.function.IFunction;
import gov.nist.secauto.metaschema.core.metapath.function.regex.RegexUtil;
import gov.nist.secauto.metaschema.core.metapath.function.regex.RegularExpressionMetapathException;
import gov.nist.secauto.metaschema.core.metapath.item.IItem;
import gov.nist.secauto.metaschema.core.metapath.item.atomic.IStringItem;
import gov.nist.secauto.metaschema.core.util.CollectionUtil;
import gov.nist.secauto.metaschema.core.util.ObjectUtils;

import java.util.LinkedList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.regex.PatternSyntaxException;

import edu.umd.cs.findbugs.annotations.NonNull;
import edu.umd.cs.findbugs.annotations.Nullable;

/**
* Implements <a href=
* "https://www.w3.org/TR/xpath-functions-31/#func-tokenize">fn:tokenize</a>.
*/
public final class FnTokenize {
@NonNull
static final IFunction SIGNATURE_ONE_ARG = IFunction.builder()
.name("tokenize")
.namespace(MetapathConstants.NS_METAPATH_FUNCTIONS)
.deterministic()
.contextIndependent()
.focusIndependent()
.argument(IArgument.builder()
.name("input")
.type(IStringItem.class)
.zeroOrOne()
.build())
.returnType(IStringItem.class)
.returnZeroOrMore()
.functionHandler(FnTokenize::executeOneArg)
.build();
@NonNull
static final IFunction SIGNATURE_TWO_ARG = IFunction.builder()
.name("tokenize")
.namespace(MetapathConstants.NS_METAPATH_FUNCTIONS)
.deterministic()
.contextIndependent()
.focusIndependent()
.argument(IArgument.builder()
.name("input")
.type(IStringItem.class)
.zeroOrOne()
.build())
.argument(IArgument.builder()
.name("pattern")
.type(IStringItem.class)
.one()
.build())
.returnType(IStringItem.class)
.returnZeroOrMore()
.functionHandler(FnTokenize::executeTwoArg)
.build();

@NonNull
static final IFunction SIGNATURE_THREE_ARG = IFunction.builder()
.name("tokenize")
.namespace(MetapathConstants.NS_METAPATH_FUNCTIONS)
.deterministic()
.contextIndependent()
.focusIndependent()
.argument(IArgument.builder()
.name("input")
.type(IStringItem.class)
.zeroOrOne()
.build())
.argument(IArgument.builder()
.name("pattern")
.type(IStringItem.class)
.one()
.build())
.argument(IArgument.builder()
.name("flags")
.type(IStringItem.class)
.one()
.build())
.returnType(IStringItem.class)
.returnZeroOrMore()
.functionHandler(FnTokenize::executeThreeArg)
.build();

@NonNull
private static ISequence<IStringItem> executeOneArg(
@SuppressWarnings("unused") @NonNull IFunction function,
@NonNull List<ISequence<?>> arguments,
@SuppressWarnings("unused") @NonNull DynamicContext dynamicContext,
@SuppressWarnings("unused") IItem focus) {
IStringItem input = FunctionUtils.asTypeOrNull(arguments.get(0).getFirstItem(true));

return input == null
? ISequence.empty()
: ISequence.of(ObjectUtils.notNull(
fnTokenize(input.normalizeSpace().asString(), " ", "").stream()
.map(IStringItem::valueOf)));
}

@NonNull
private static ISequence<IStringItem> executeTwoArg(
@SuppressWarnings("unused") @NonNull IFunction function,
@NonNull List<ISequence<?>> arguments,
@SuppressWarnings("unused") @NonNull DynamicContext dynamicContext,
@SuppressWarnings("unused") IItem focus) {
IStringItem input = FunctionUtils.asTypeOrNull(arguments.get(0).getFirstItem(true));
IStringItem pattern = ObjectUtils.requireNonNull(FunctionUtils.asTypeOrNull(arguments.get(1).getFirstItem(true)));

return execute(input, pattern, IStringItem.valueOf(""));
}

@NonNull
private static ISequence<IStringItem> executeThreeArg(
@SuppressWarnings("unused") @NonNull IFunction function,
@NonNull List<ISequence<?>> arguments,
@SuppressWarnings("unused") @NonNull DynamicContext dynamicContext,
@SuppressWarnings("unused") IItem focus) {

IStringItem input = FunctionUtils.asTypeOrNull(arguments.get(0).getFirstItem(true));
IStringItem pattern = ObjectUtils.requireNonNull(FunctionUtils.asTypeOrNull(arguments.get(1).getFirstItem(true)));
IStringItem flags = ObjectUtils.requireNonNull(FunctionUtils.asTypeOrNull(arguments.get(2).getFirstItem(true)));

return execute(input, pattern, flags);
}

@SuppressWarnings("PMD.OnlyOneReturn")
@NonNull
private static ISequence<IStringItem> execute(
@Nullable IStringItem input,
@NonNull IStringItem pattern,
@NonNull IStringItem flags) {
return input == null
? ISequence.empty()
: fnTokenize(input, pattern, flags);
}

/**
* Implements <a href=
* "https://www.w3.org/TR/xpath-functions-31/#func-tokenize">fn:tokenize</a>.
*
* @param input
* the string to tokenize
* @param pattern
* the regular expression to use for identifying token boundaries
* @param flags
* matching options
* @return the sequence of tokens
*/
@NonNull
public static ISequence<IStringItem> fnTokenize(
@NonNull IStringItem input,
@NonNull IStringItem pattern,
@NonNull IStringItem flags) {
return ISequence.of(ObjectUtils.notNull(
fnTokenize(input.asString(), pattern.asString(), flags.asString()).stream()
.map(IStringItem::valueOf)));
}

/**
* Implements <a href=
* "https://www.w3.org/TR/xpath-functions-31/#func-tokenize">fn:tokenize</a>.
*
* @param input
* the string to match against
* @param pattern
* the regular expression to use for matching
* @param flags
* matching options
* @return the stream of tokens
*/
@SuppressWarnings({ "PMD.OnlyOneReturn", "PMD.CyclomaticComplexity" })
@NonNull
public static List<String> fnTokenize(@NonNull String input, @NonNull String pattern, @NonNull String flags) {
if (input.isEmpty()) {
return CollectionUtil.emptyList();
}

try {
Matcher matcher = Pattern.compile(pattern, RegexUtil.parseFlags(flags)).matcher(input);

int lastPosition = 0;
int length = input.length();

List<String> result = new LinkedList<>();
while (matcher.find()) {
String group = matcher.group();
if (group.isEmpty()) {
throw new RegularExpressionMetapathException(RegularExpressionMetapathException.MATCHES_ZERO_LENGTH_STRING,
String.format("Pattern '%s' will match a zero-length string.", pattern));
}

int start = matcher.start();
if (start == 0) {
result.add("");
} else {
result.add(input.substring(lastPosition, start));
}

lastPosition = matcher.end();
}

if (lastPosition == length) {
result.add("");
} else {
result.add(input.substring(lastPosition, length));
}

return result;
} catch (PatternSyntaxException ex) {
throw new RegularExpressionMetapathException(RegularExpressionMetapathException.INVALID_EXPRESSION, ex);
} catch (IllegalArgumentException ex) {
throw new RegularExpressionMetapathException(RegularExpressionMetapathException.INVALID_FLAG, ex);
}
}

private FnTokenize() {
// disable construction
}
}
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
/*
* SPDX-FileCopyrightText: none
* SPDX-License-Identifier: CC0-1.0
*/

package gov.nist.secauto.metaschema.core.metapath.function.regex;

Expand All @@ -18,12 +22,12 @@ public final class RegexUtil {
* @return the bitmask
*/
public static int parseFlags(@NonNull String flags) {
return flags.chars()
return flags.codePoints()
.map(i -> characterToFlag((char) i))
.reduce(0, (mask, flag) -> mask | flag);
}

private static int characterToFlag(Character ch) {
private static int characterToFlag(char ch) {
int retval;
switch (ch) {
case 's':
Expand Down
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
/*
* SPDX-FileCopyrightText: none
* SPDX-License-Identifier: CC0-1.0
*/

package gov.nist.secauto.metaschema.core.metapath.function.regex;

Expand Down
Loading

0 comments on commit cc7c378

Please sign in to comment.