-
Notifications
You must be signed in to change notification settings - Fork 4
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Added support for the tokenize Metapath function. Resolves #135.
- Loading branch information
1 parent
42cd173
commit cc7c378
Showing
10 changed files
with
432 additions
and
44 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
234 changes: 234 additions & 0 deletions
234
.../src/main/java/gov/nist/secauto/metaschema/core/metapath/function/library/FnTokenize.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,234 @@ | ||
/* | ||
* SPDX-FileCopyrightText: none | ||
* SPDX-License-Identifier: CC0-1.0 | ||
*/ | ||
|
||
package gov.nist.secauto.metaschema.core.metapath.function.library; | ||
|
||
import gov.nist.secauto.metaschema.core.metapath.DynamicContext; | ||
import gov.nist.secauto.metaschema.core.metapath.ISequence; | ||
import gov.nist.secauto.metaschema.core.metapath.MetapathConstants; | ||
import gov.nist.secauto.metaschema.core.metapath.function.FunctionUtils; | ||
import gov.nist.secauto.metaschema.core.metapath.function.IArgument; | ||
import gov.nist.secauto.metaschema.core.metapath.function.IFunction; | ||
import gov.nist.secauto.metaschema.core.metapath.function.regex.RegexUtil; | ||
import gov.nist.secauto.metaschema.core.metapath.function.regex.RegularExpressionMetapathException; | ||
import gov.nist.secauto.metaschema.core.metapath.item.IItem; | ||
import gov.nist.secauto.metaschema.core.metapath.item.atomic.IStringItem; | ||
import gov.nist.secauto.metaschema.core.util.CollectionUtil; | ||
import gov.nist.secauto.metaschema.core.util.ObjectUtils; | ||
|
||
import java.util.LinkedList; | ||
import java.util.List; | ||
import java.util.regex.Matcher; | ||
import java.util.regex.Pattern; | ||
import java.util.regex.PatternSyntaxException; | ||
|
||
import edu.umd.cs.findbugs.annotations.NonNull; | ||
import edu.umd.cs.findbugs.annotations.Nullable; | ||
|
||
/** | ||
* Implements <a href= | ||
* "https://www.w3.org/TR/xpath-functions-31/#func-tokenize">fn:tokenize</a>. | ||
*/ | ||
public final class FnTokenize { | ||
@NonNull | ||
static final IFunction SIGNATURE_ONE_ARG = IFunction.builder() | ||
.name("tokenize") | ||
.namespace(MetapathConstants.NS_METAPATH_FUNCTIONS) | ||
.deterministic() | ||
.contextIndependent() | ||
.focusIndependent() | ||
.argument(IArgument.builder() | ||
.name("input") | ||
.type(IStringItem.class) | ||
.zeroOrOne() | ||
.build()) | ||
.returnType(IStringItem.class) | ||
.returnZeroOrMore() | ||
.functionHandler(FnTokenize::executeOneArg) | ||
.build(); | ||
@NonNull | ||
static final IFunction SIGNATURE_TWO_ARG = IFunction.builder() | ||
.name("tokenize") | ||
.namespace(MetapathConstants.NS_METAPATH_FUNCTIONS) | ||
.deterministic() | ||
.contextIndependent() | ||
.focusIndependent() | ||
.argument(IArgument.builder() | ||
.name("input") | ||
.type(IStringItem.class) | ||
.zeroOrOne() | ||
.build()) | ||
.argument(IArgument.builder() | ||
.name("pattern") | ||
.type(IStringItem.class) | ||
.one() | ||
.build()) | ||
.returnType(IStringItem.class) | ||
.returnZeroOrMore() | ||
.functionHandler(FnTokenize::executeTwoArg) | ||
.build(); | ||
|
||
@NonNull | ||
static final IFunction SIGNATURE_THREE_ARG = IFunction.builder() | ||
.name("tokenize") | ||
.namespace(MetapathConstants.NS_METAPATH_FUNCTIONS) | ||
.deterministic() | ||
.contextIndependent() | ||
.focusIndependent() | ||
.argument(IArgument.builder() | ||
.name("input") | ||
.type(IStringItem.class) | ||
.zeroOrOne() | ||
.build()) | ||
.argument(IArgument.builder() | ||
.name("pattern") | ||
.type(IStringItem.class) | ||
.one() | ||
.build()) | ||
.argument(IArgument.builder() | ||
.name("flags") | ||
.type(IStringItem.class) | ||
.one() | ||
.build()) | ||
.returnType(IStringItem.class) | ||
.returnZeroOrMore() | ||
.functionHandler(FnTokenize::executeThreeArg) | ||
.build(); | ||
|
||
@NonNull | ||
private static ISequence<IStringItem> executeOneArg( | ||
@SuppressWarnings("unused") @NonNull IFunction function, | ||
@NonNull List<ISequence<?>> arguments, | ||
@SuppressWarnings("unused") @NonNull DynamicContext dynamicContext, | ||
@SuppressWarnings("unused") IItem focus) { | ||
IStringItem input = FunctionUtils.asTypeOrNull(arguments.get(0).getFirstItem(true)); | ||
|
||
return input == null | ||
? ISequence.empty() | ||
: ISequence.of(ObjectUtils.notNull( | ||
fnTokenize(input.normalizeSpace().asString(), " ", "").stream() | ||
.map(IStringItem::valueOf))); | ||
} | ||
|
||
@NonNull | ||
private static ISequence<IStringItem> executeTwoArg( | ||
@SuppressWarnings("unused") @NonNull IFunction function, | ||
@NonNull List<ISequence<?>> arguments, | ||
@SuppressWarnings("unused") @NonNull DynamicContext dynamicContext, | ||
@SuppressWarnings("unused") IItem focus) { | ||
IStringItem input = FunctionUtils.asTypeOrNull(arguments.get(0).getFirstItem(true)); | ||
IStringItem pattern = ObjectUtils.requireNonNull(FunctionUtils.asTypeOrNull(arguments.get(1).getFirstItem(true))); | ||
|
||
return execute(input, pattern, IStringItem.valueOf("")); | ||
} | ||
|
||
@NonNull | ||
private static ISequence<IStringItem> executeThreeArg( | ||
@SuppressWarnings("unused") @NonNull IFunction function, | ||
@NonNull List<ISequence<?>> arguments, | ||
@SuppressWarnings("unused") @NonNull DynamicContext dynamicContext, | ||
@SuppressWarnings("unused") IItem focus) { | ||
|
||
IStringItem input = FunctionUtils.asTypeOrNull(arguments.get(0).getFirstItem(true)); | ||
IStringItem pattern = ObjectUtils.requireNonNull(FunctionUtils.asTypeOrNull(arguments.get(1).getFirstItem(true))); | ||
IStringItem flags = ObjectUtils.requireNonNull(FunctionUtils.asTypeOrNull(arguments.get(2).getFirstItem(true))); | ||
|
||
return execute(input, pattern, flags); | ||
} | ||
|
||
@SuppressWarnings("PMD.OnlyOneReturn") | ||
@NonNull | ||
private static ISequence<IStringItem> execute( | ||
@Nullable IStringItem input, | ||
@NonNull IStringItem pattern, | ||
@NonNull IStringItem flags) { | ||
return input == null | ||
? ISequence.empty() | ||
: fnTokenize(input, pattern, flags); | ||
} | ||
|
||
/** | ||
* Implements <a href= | ||
* "https://www.w3.org/TR/xpath-functions-31/#func-tokenize">fn:tokenize</a>. | ||
* | ||
* @param input | ||
* the string to tokenize | ||
* @param pattern | ||
* the regular expression to use for identifying token boundaries | ||
* @param flags | ||
* matching options | ||
* @return the sequence of tokens | ||
*/ | ||
@NonNull | ||
public static ISequence<IStringItem> fnTokenize( | ||
@NonNull IStringItem input, | ||
@NonNull IStringItem pattern, | ||
@NonNull IStringItem flags) { | ||
return ISequence.of(ObjectUtils.notNull( | ||
fnTokenize(input.asString(), pattern.asString(), flags.asString()).stream() | ||
.map(IStringItem::valueOf))); | ||
} | ||
|
||
/** | ||
* Implements <a href= | ||
* "https://www.w3.org/TR/xpath-functions-31/#func-tokenize">fn:tokenize</a>. | ||
* | ||
* @param input | ||
* the string to match against | ||
* @param pattern | ||
* the regular expression to use for matching | ||
* @param flags | ||
* matching options | ||
* @return the stream of tokens | ||
*/ | ||
@SuppressWarnings({ "PMD.OnlyOneReturn", "PMD.CyclomaticComplexity" }) | ||
@NonNull | ||
public static List<String> fnTokenize(@NonNull String input, @NonNull String pattern, @NonNull String flags) { | ||
if (input.isEmpty()) { | ||
return CollectionUtil.emptyList(); | ||
} | ||
|
||
try { | ||
Matcher matcher = Pattern.compile(pattern, RegexUtil.parseFlags(flags)).matcher(input); | ||
|
||
int lastPosition = 0; | ||
int length = input.length(); | ||
|
||
List<String> result = new LinkedList<>(); | ||
while (matcher.find()) { | ||
String group = matcher.group(); | ||
if (group.isEmpty()) { | ||
throw new RegularExpressionMetapathException(RegularExpressionMetapathException.MATCHES_ZERO_LENGTH_STRING, | ||
String.format("Pattern '%s' will match a zero-length string.", pattern)); | ||
} | ||
|
||
int start = matcher.start(); | ||
if (start == 0) { | ||
result.add(""); | ||
} else { | ||
result.add(input.substring(lastPosition, start)); | ||
} | ||
|
||
lastPosition = matcher.end(); | ||
} | ||
|
||
if (lastPosition == length) { | ||
result.add(""); | ||
} else { | ||
result.add(input.substring(lastPosition, length)); | ||
} | ||
|
||
return result; | ||
} catch (PatternSyntaxException ex) { | ||
throw new RegularExpressionMetapathException(RegularExpressionMetapathException.INVALID_EXPRESSION, ex); | ||
} catch (IllegalArgumentException ex) { | ||
throw new RegularExpressionMetapathException(RegularExpressionMetapathException.INVALID_FLAG, ex); | ||
} | ||
} | ||
|
||
private FnTokenize() { | ||
// disable construction | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
4 changes: 4 additions & 0 deletions
4
...t/secauto/metaschema/core/metapath/function/regex/RegularExpressionMetapathException.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.