-
Notifications
You must be signed in to change notification settings - Fork 25k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Check for deprecations when analyzers are built #50908
Changes from 2 commits
54c659c
1ca8b8b
bcdd2b0
4941470
9a18f62
d938eaa
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -38,6 +38,7 @@ | |
import org.elasticsearch.index.analysis.AnalysisRegistry; | ||
import org.elasticsearch.index.analysis.CharFilterFactory; | ||
import org.elasticsearch.index.analysis.IndexAnalyzers; | ||
import org.elasticsearch.index.analysis.NormalizingTokenFilterFactory; | ||
import org.elasticsearch.index.analysis.PreConfiguredCharFilter; | ||
import org.elasticsearch.index.analysis.TokenFilterFactory; | ||
import org.elasticsearch.index.analysis.TokenizerFactory; | ||
|
@@ -108,6 +109,25 @@ public TokenStream create(TokenStream tokenStream) { | |
} | ||
} | ||
|
||
class DeprecatedTokenFilterFactory extends AbstractTokenFilterFactory implements NormalizingTokenFilterFactory { | ||
|
||
DeprecatedTokenFilterFactory(IndexSettings indexSettings, Environment env, String name, Settings settings) { | ||
super(indexSettings, name, settings); | ||
} | ||
|
||
@Override | ||
public TokenStream create(TokenStream tokenStream) { | ||
deprecationLogger.deprecated("Using deprecated token filter [deprecated]"); | ||
return tokenStream; | ||
} | ||
|
||
@Override | ||
public TokenStream normalize(TokenStream tokenStream) { | ||
deprecationLogger.deprecated("Using deprecated token filter [deprecated]"); | ||
return tokenStream; | ||
} | ||
} | ||
|
||
class AppendCharFilterFactory extends AbstractCharFilterFactory { | ||
|
||
final String suffix; | ||
|
@@ -136,7 +156,7 @@ public Map<String, AnalysisProvider<TokenizerFactory>> getTokenizers() { | |
|
||
@Override | ||
public Map<String, AnalysisProvider<TokenFilterFactory>> getTokenFilters() { | ||
return singletonMap("mock", MockFactory::new); | ||
return Map.of("mock", MockFactory::new, "deprecated", DeprecatedTokenFilterFactory::new); | ||
} | ||
|
||
@Override | ||
|
@@ -492,4 +512,28 @@ public void testExceedSetMaxTokenLimit() { | |
assertEquals(e.getMessage(), "The number of tokens produced by calling _analyze has exceeded the allowed maximum of [" | ||
+ idxMaxTokenCount + "]." + " This limit can be set by changing the [index.analyze.max_token_count] index level setting."); | ||
} | ||
|
||
public void testDeprecationWarnings() throws IOException { | ||
AnalyzeAction.Request req = new AnalyzeAction.Request(); | ||
req.tokenizer("standard"); | ||
req.addTokenFilter("lowercase"); | ||
req.addTokenFilter("deprecated"); | ||
req.text("test text"); | ||
|
||
AnalyzeAction.Response analyze = | ||
TransportAnalyzeAction.analyze(req, registry, mockIndexService(), maxTokenCount); | ||
assertEquals(2, analyze.getTokens().size()); | ||
assertWarnings("Using deprecated token filter [deprecated]"); | ||
|
||
// normalizer | ||
req = new AnalyzeAction.Request(); | ||
req.addTokenFilter("lowercase"); | ||
req.addTokenFilter("deprecated"); | ||
req.text("text"); | ||
|
||
analyze = | ||
TransportAnalyzeAction.analyze(req, registry, mockIndexService(), maxTokenCount); | ||
assertEquals(1, analyze.getTokens().size()); | ||
assertWarnings("Using deprecated token filter [deprecated]"); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Just a small observation: I thought this checks that the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Because the default implementation of There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. So that sounds like the "normalize()" method isn't actualy tested here? I saw we cover that in some other test though, so fine with whatever you decide doing here, I was just curious if this could be checked here through the "_analyze" API as well, but no problem it its too tricky I. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. No, it's tested, but in order to make it fail you can't just remove the method (because that then delegates to There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Oh, I've just realised that we're talking about the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
This can most likely be done in a follow up PR though, I just wanted to understand whats going on here and which of the two methods in the DeprecatedTokenFilterFactory in this test is supposed to fire the warning, currently they emit the same text so its hard to tell which code path is checked. |
||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -20,12 +20,13 @@ | |
package org.elasticsearch.index.analysis; | ||
|
||
import com.carrotsearch.randomizedtesting.generators.RandomPicks; | ||
|
||
import org.apache.lucene.analysis.Analyzer; | ||
import org.apache.lucene.analysis.MockTokenFilter; | ||
import org.apache.lucene.analysis.TokenStream; | ||
import org.apache.lucene.analysis.Tokenizer; | ||
import org.apache.lucene.analysis.en.EnglishAnalyzer; | ||
import org.apache.lucene.analysis.standard.StandardAnalyzer; | ||
import org.apache.lucene.analysis.standard.StandardTokenizer; | ||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; | ||
import org.elasticsearch.Version; | ||
import org.elasticsearch.cluster.metadata.IndexMetaData; | ||
|
@@ -108,19 +109,25 @@ public void testOverrideDefaultAnalyzer() throws IOException { | |
public void testOverrideDefaultAnalyzerWithoutAnalysisModeAll() throws IOException { | ||
Version version = VersionUtils.randomVersion(random()); | ||
Settings settings = Settings.builder().put(IndexMetaData.SETTING_VERSION_CREATED, version).build(); | ||
TokenFilterFactory tokenFilter = new AbstractTokenFilterFactory(IndexSettingsModule.newIndexSettings("index", settings), | ||
"my_filter", Settings.EMPTY) { | ||
IndexSettings indexSettings = IndexSettingsModule.newIndexSettings("index", settings); | ||
TokenFilterFactory tokenFilter = new AbstractTokenFilterFactory(indexSettings, "my_filter", Settings.EMPTY) { | ||
@Override | ||
public AnalysisMode getAnalysisMode() { | ||
return randomFrom(AnalysisMode.SEARCH_TIME, AnalysisMode.INDEX_TIME); | ||
} | ||
|
||
@Override | ||
public TokenStream create(TokenStream tokenStream) { | ||
return null; | ||
return tokenStream; | ||
} | ||
}; | ||
TokenizerFactory tokenizer = new AbstractTokenizerFactory(indexSettings, Settings.EMPTY, "my_tokenizer") { | ||
@Override | ||
public Tokenizer create() { | ||
return new StandardTokenizer(); | ||
} | ||
}; | ||
Analyzer analyzer = new CustomAnalyzer(null, new CharFilterFactory[0], new TokenFilterFactory[] { tokenFilter }); | ||
Analyzer analyzer = new CustomAnalyzer(tokenizer, new CharFilterFactory[0], new TokenFilterFactory[] { tokenFilter }); | ||
MapperException ex = expectThrows(MapperException.class, | ||
() -> emptyRegistry.build(IndexSettingsModule.newIndexSettings("index", settings), | ||
singletonMap("default", new PreBuiltAnalyzerProvider("default", AnalyzerScope.INDEX, analyzer)), emptyMap(), | ||
|
@@ -264,4 +271,83 @@ public void testEnsureCloseInvocationProperlyDelegated() throws IOException { | |
registry.close(); | ||
verify(mock).close(); | ||
} | ||
|
||
public void testDeprecations() throws IOException { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can you also add a test that throws an exception ? |
||
|
||
AnalysisPlugin plugin = new AnalysisPlugin() { | ||
|
||
class MockFactory extends AbstractTokenFilterFactory { | ||
MockFactory(IndexSettings indexSettings, Environment env, String name, Settings settings) { | ||
super(indexSettings, name, settings); | ||
} | ||
|
||
@Override | ||
public TokenStream create(TokenStream tokenStream) { | ||
deprecationLogger.deprecated("Using deprecated token filter [deprecated]"); | ||
return tokenStream; | ||
} | ||
} | ||
|
||
class UnusedMockFactory extends AbstractTokenFilterFactory { | ||
UnusedMockFactory(IndexSettings indexSettings, Environment env, String name, Settings settings) { | ||
super(indexSettings, name, settings); | ||
} | ||
|
||
@Override | ||
public TokenStream create(TokenStream tokenStream) { | ||
deprecationLogger.deprecated("Using deprecated token filter [unused]"); | ||
return tokenStream; | ||
} | ||
} | ||
|
||
class NormalizerFactory extends AbstractTokenFilterFactory implements NormalizingTokenFilterFactory { | ||
|
||
NormalizerFactory(IndexSettings indexSettings, Environment env, String name, Settings settings) { | ||
super(indexSettings, name, settings); | ||
} | ||
|
||
@Override | ||
public TokenStream create(TokenStream tokenStream) { | ||
deprecationLogger.deprecated("Using deprecated token filter [deprecated_normalizer]"); | ||
return tokenStream; | ||
} | ||
|
||
} | ||
|
||
@Override | ||
public Map<String, AnalysisProvider<TokenFilterFactory>> getTokenFilters() { | ||
return Map.of("deprecated", MockFactory::new, "unused", UnusedMockFactory::new, | ||
"deprecated_normalizer", NormalizerFactory::new); | ||
} | ||
}; | ||
|
||
Settings settings = Settings.builder().put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString()).build(); | ||
Settings indexSettings = Settings.builder() | ||
.put(IndexMetaData.SETTING_VERSION_CREATED, Version.CURRENT) | ||
.put("index.analysis.filter.deprecated.type", "deprecated") | ||
.put("index.analysis.analyzer.custom.tokenizer", "standard") | ||
.putList("index.analysis.analyzer.custom.filter", "lowercase", "deprecated") | ||
.build(); | ||
|
||
IndexSettings idxSettings = IndexSettingsModule.newIndexSettings("index", indexSettings); | ||
|
||
new AnalysisModule(TestEnvironment.newEnvironment(settings), | ||
singletonList(plugin)).getAnalysisRegistry().build(idxSettings); | ||
|
||
// We should only get a warning from the token filter that is referenced in settings | ||
assertWarnings("Using deprecated token filter [deprecated]"); | ||
|
||
indexSettings = Settings.builder() | ||
.put(IndexMetaData.SETTING_VERSION_CREATED, Version.CURRENT) | ||
.put("index.analysis.filter.deprecated.type", "deprecated_normalizer") | ||
.putList("index.analysis.normalizer.custom.filter", "lowercase", "deprecated_normalizer") | ||
.build(); | ||
idxSettings = IndexSettingsModule.newIndexSettings("index", indexSettings); | ||
|
||
new AnalysisModule(TestEnvironment.newEnvironment(settings), | ||
singletonList(plugin)).getAnalysisRegistry().build(idxSettings); | ||
|
||
assertWarnings("Using deprecated token filter [deprecated_normalizer]"); | ||
|
||
} | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We also throw exceptions in some cases so it's not only about deprecations ?