From 76eaf7759f8ab57934909df3765b35cee4320acf Mon Sep 17 00:00:00 2001 From: Saif Addin Date: Sat, 17 Aug 2019 22:35:49 -0300 Subject: [PATCH] Added missing defaults in Tokenizer --- .../scala/com/johnsnowlabs/nlp/annotators/Tokenizer.scala | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/main/scala/com/johnsnowlabs/nlp/annotators/Tokenizer.scala b/src/main/scala/com/johnsnowlabs/nlp/annotators/Tokenizer.scala index 385f778b90099d..da9d603fe6b47f 100644 --- a/src/main/scala/com/johnsnowlabs/nlp/annotators/Tokenizer.scala +++ b/src/main/scala/com/johnsnowlabs/nlp/annotators/Tokenizer.scala @@ -96,6 +96,12 @@ class Tokenizer(override val uid: String) extends AnnotatorApproach[TokenizerMod $(splitChars) } + setDefault( + targetPattern -> "\\S+", + contextChars -> Array(".", ",", ";", ":", "!", "?", "*", "-", "(", ")", "\"", "'"), + caseSensitiveExceptions -> true + ) + def buildRuleFactory: RuleFactory = { val rules = ArrayBuffer.empty[String]