From b0f65746dcb247efb10e55f50a86e590650ca551 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Luk=C3=A1=C5=A1=20Vl=C4=8Dek?= Date: Tue, 24 Oct 2023 16:40:52 +0200 Subject: [PATCH] Deprecate CamelCase PathHierarchy tokenizer name MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Deprecate CamelCase PathHierarchy tokenizer name in favor to lowercase path_hierarchy. Signed-off-by: Lukáš Vlček --- CHANGELOG.md | 1 + .../common/CommonAnalysisModulePlugin.java | 33 +++++++++++++++++-- .../test/analysis-common/30_tokenizers.yml | 25 -------------- 3 files changed, 31 insertions(+), 28 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index b40878066960a..2bd8c11307c97 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -62,6 +62,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), - Return 409 Conflict HTTP status instead of 503 on failure to concurrently execute snapshots ([#8986](https://github.com/opensearch-project/OpenSearch/pull/5855)) - Add task completion count in search backpressure stats API ([#10028](https://github.com/opensearch-project/OpenSearch/pull/10028/)) - Performance improvement for Datetime field caching ([#4558](https://github.com/opensearch-project/OpenSearch/issues/4558)) +- Deprecate CamelCase `PathHierarchy` tokenizer name in favor to LowerCase `path_hierarchy` ([#10894](https://github.com/opensearch-project/OpenSearch/pull/10894)) ### Deprecated diff --git a/modules/analysis-common/src/main/java/org/opensearch/analysis/common/CommonAnalysisModulePlugin.java b/modules/analysis-common/src/main/java/org/opensearch/analysis/common/CommonAnalysisModulePlugin.java index edb8c37c2dbdd..5450e86ce473c 100644 --- a/modules/analysis-common/src/main/java/org/opensearch/analysis/common/CommonAnalysisModulePlugin.java +++ b/modules/analysis-common/src/main/java/org/opensearch/analysis/common/CommonAnalysisModulePlugin.java @@ -394,7 +394,21 @@ public Map> getTokenizers() { // TODO deprecate and remove in API tokenizers.put("lowercase", XLowerCaseTokenizerFactory::new); tokenizers.put("path_hierarchy", PathHierarchyTokenizerFactory::new); - tokenizers.put("PathHierarchy", PathHierarchyTokenizerFactory::new); + tokenizers.put("PathHierarchy", (IndexSettings indexSettings, Environment environment, String name, Settings settings) -> { + if (indexSettings.getIndexVersionCreated().onOrAfter(Version.V_3_0_0)) { + throw new IllegalArgumentException( + "The [PathHierarchy] tokenizer name was deprecated. " + + "Please change the tokenizer name to [path_hierarchy] for indices created in versions 3.0 or higher instead." + ); + } else { + deprecationLogger.deprecate( + "PathHierarchy_tokenizer_deprecation", + "The [PathHierarchy] tokenizer name is deprecated and will be removed in a future version. " + + "Please change the tokenizer name to [path_hierarchy] instead." + ); + } + return new PathHierarchyTokenizerFactory(indexSettings, environment, name, settings); + }); tokenizers.put("pattern", PatternTokenizerFactory::new); tokenizers.put("uax_url_email", UAX29URLEmailTokenizerFactory::new); tokenizers.put("whitespace", WhitespaceTokenizerFactory::new); @@ -662,8 +676,21 @@ public List getPreConfiguredTokenizers() { } return new EdgeNGramTokenizer(NGramTokenizer.DEFAULT_MIN_NGRAM_SIZE, NGramTokenizer.DEFAULT_MAX_NGRAM_SIZE); })); - tokenizers.add(PreConfiguredTokenizer.singleton("PathHierarchy", PathHierarchyTokenizer::new)); - + tokenizers.add(PreConfiguredTokenizer.openSearchVersion("PathHierarchy", (version) -> { + if (version.onOrAfter(Version.V_3_0_0)) { + throw new IllegalArgumentException( + "The [PathHierarchy] tokenizer name was deprecated. " + + "Please change the tokenizer name to [path_hierarchy] for indices created in versions 3.0 or higher instead." + ); + } else { + deprecationLogger.deprecate( + "PathHierarchy_tokenizer_deprecation", + "The [PathHierarchy] tokenizer name is deprecated and will be removed in a future version. " + + "Please change the tokenizer name to [path_hierarchy] instead." + ); + } + return new PathHierarchyTokenizer(); + })); return tokenizers; } } diff --git a/modules/analysis-common/src/yamlRestTest/resources/rest-api-spec/test/analysis-common/30_tokenizers.yml b/modules/analysis-common/src/yamlRestTest/resources/rest-api-spec/test/analysis-common/30_tokenizers.yml index 56ed2175df60a..fbbdc5e13198c 100644 --- a/modules/analysis-common/src/yamlRestTest/resources/rest-api-spec/test/analysis-common/30_tokenizers.yml +++ b/modules/analysis-common/src/yamlRestTest/resources/rest-api-spec/test/analysis-common/30_tokenizers.yml @@ -311,19 +311,6 @@ - match: { detail.tokenizer.tokens.1.token: a/b } - match: { detail.tokenizer.tokens.2.token: a/b/c } - - do: - indices.analyze: - body: - text: "a/b/c" - explain: true - tokenizer: - type: PathHierarchy - - length: { detail.tokenizer.tokens: 3 } - - match: { detail.tokenizer.name: __anonymous__PathHierarchy } - - match: { detail.tokenizer.tokens.0.token: a } - - match: { detail.tokenizer.tokens.1.token: a/b } - - match: { detail.tokenizer.tokens.2.token: a/b/c } - - do: indices.analyze: body: @@ -336,18 +323,6 @@ - match: { detail.tokenizer.tokens.1.token: a/b } - match: { detail.tokenizer.tokens.2.token: a/b/c } - - do: - indices.analyze: - body: - text: "a/b/c" - explain: true - tokenizer: PathHierarchy - - length: { detail.tokenizer.tokens: 3 } - - match: { detail.tokenizer.name: PathHierarchy } - - match: { detail.tokenizer.tokens.0.token: a } - - match: { detail.tokenizer.tokens.1.token: a/b } - - match: { detail.tokenizer.tokens.2.token: a/b/c } - --- "pattern": - do: