diff --git a/src/Nest/Analysis/Tokenizers/NoriTokenizer.cs b/src/Nest/Analysis/Tokenizers/NoriTokenizer.cs
index d277be9607c..f6f44ced826 100644
--- a/src/Nest/Analysis/Tokenizers/NoriTokenizer.cs
+++ b/src/Nest/Analysis/Tokenizers/NoriTokenizer.cs
@@ -1,4 +1,5 @@
-using System.Runtime.Serialization;
+using System.Collections.Generic;
+using System.Runtime.Serialization;
using Newtonsoft.Json;
using Newtonsoft.Json.Converters;
@@ -32,10 +33,20 @@ public interface INoriTokenizer : ITokenizer
///
/// The Nori tokenizer uses the mecab-ko-dic dictionary by default. A user_dictionary with custom nouns (NNG) may be appended to
- /// the default dictionary. This property allows you to specify this file on disk
+ /// the default dictionary. This property allows you to specify a path to this file on disk
///
[JsonProperty("user_dictionary")]
string UserDictionary { get; set; }
+
+ ///
+ /// The Nori tokenizer uses the mecab-ko-dic dictionary by default. A user_dictionary with custom nouns (NNG)
+ /// can be specified inline with this property
+ ///
+ ///
+ /// Valid for Elasticsearch 6.6.0+
+ ///
+ [JsonProperty("user_dictionary_rules")]
+ IEnumerable UserDictionaryRules { get; set; }
}
///
@@ -48,6 +59,9 @@ public class NoriTokenizer : TokenizerBase, INoriTokenizer
///
public string UserDictionary { get; set; }
+
+ ///
+ public IEnumerable UserDictionaryRules { get; set; }
}
///
@@ -58,11 +72,18 @@ public class NoriTokenizerDescriptor
NoriDecompoundMode? INoriTokenizer.DecompoundMode { get; set; }
string INoriTokenizer.UserDictionary { get; set; }
+ IEnumerable INoriTokenizer.UserDictionaryRules { get; set; }
///
public NoriTokenizerDescriptor DecompoundMode(NoriDecompoundMode? mode) => Assign(mode, (a, v) => a.DecompoundMode = v);
///
public NoriTokenizerDescriptor UserDictionary(string path) => Assign(path, (a, v) => a.UserDictionary = v);
+
+ ///
+ public NoriTokenizerDescriptor UserDictionaryRules(params string[] rules) => Assign(rules, (a, v) => a.UserDictionaryRules = v);
+
+ ///
+ public NoriTokenizerDescriptor UserDictionaryRules(IEnumerable rules) => Assign(rules, (a, v) => a.UserDictionaryRules = v);
}
}
diff --git a/src/Tests/Tests/Analysis/Tokenizers/TokenizerTests.cs b/src/Tests/Tests/Analysis/Tokenizers/TokenizerTests.cs
index bb9a8e9858c..c124b1ca055 100644
--- a/src/Tests/Tests/Analysis/Tokenizers/TokenizerTests.cs
+++ b/src/Tests/Tests/Analysis/Tokenizers/TokenizerTests.cs
@@ -222,6 +222,29 @@ public class NoriTests : TokenizerAssertionBase
public override string Name => "nori";
}
+ [SkipVersion("<6.6.0", "inline user dictionary rules introduced in 6.6.0")]
+ public class NoriWithUserDictionaryTests : TokenizerAssertionBase
+ {
+ public override FuncTokenizer Fluent => (n, t) => t.Nori(n, e => e
+ .DecompoundMode(NoriDecompoundMode.Mixed)
+ .UserDictionaryRules("c++", "C샤프", "세종", "세종시 세종 시")
+ );
+
+ public override ITokenizer Initializer => new NoriTokenizer
+ {
+ DecompoundMode = NoriDecompoundMode.Mixed,
+ UserDictionaryRules = new [] { "c++", "C샤프", "세종", "세종시 세종 시" }
+ };
+
+ public override object Json => new
+ {
+ type = "nori_tokenizer",
+ decompound_mode = "mixed",
+ user_dictionary_rules = new [] { "c++", "C샤프", "세종", "세종시 세종 시" }
+ };
+ public override string Name => "nori_userdictionary";
+ }
+
[SkipVersion("<6.4.0", "char_group introduced in 6.4.0")]
public class CharGroupTests : TokenizerAssertionBase
{