Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Check for deprecations when analyzers are built #50908

Merged
merged 6 commits into from
Jan 14, 2020
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -19,20 +19,15 @@

package org.elasticsearch.analysis.common;

import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.Tokenizer;
import org.elasticsearch.Version;
import org.elasticsearch.cluster.metadata.IndexMetaData;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.env.Environment;
import org.elasticsearch.index.analysis.TokenFilterFactory;
import org.elasticsearch.test.ESTestCase;
import org.elasticsearch.test.IndexSettingsModule;
import org.elasticsearch.test.VersionUtils;

import java.io.IOException;
import java.io.StringReader;
import java.util.Map;

public class CommonAnalysisPluginTests extends ESTestCase {

Expand All @@ -51,13 +46,8 @@ public void testNGramFilterInCustomAnalyzerDeprecationError() throws IOException
.build();

try (CommonAnalysisPlugin commonAnalysisPlugin = new CommonAnalysisPlugin()) {
Map<String, TokenFilterFactory> tokenFilters = createTestAnalysis(IndexSettingsModule.newIndexSettings("index", settings),
settings, commonAnalysisPlugin).tokenFilter;
TokenFilterFactory tokenFilterFactory = tokenFilters.get("nGram");
Tokenizer tokenizer = new MockTokenizer();
tokenizer.setReader(new StringReader("foo bar"));

IllegalArgumentException ex = expectThrows(IllegalArgumentException.class, () -> tokenFilterFactory.create(tokenizer));
IllegalArgumentException ex = expectThrows(IllegalArgumentException.class,
() -> createTestAnalysis(IndexSettingsModule.newIndexSettings("index", settings), settings, commonAnalysisPlugin));
assertEquals("The [nGram] token filter name was deprecated in 6.4 and cannot be used in new indices. "
+ "Please change the filter name to [ngram] instead.", ex.getMessage());
}
Expand All @@ -69,12 +59,7 @@ public void testNGramFilterInCustomAnalyzerDeprecationError() throws IOException
.putList("index.analysis.analyzer.custom_analyzer.filter", "my_ngram").put("index.analysis.filter.my_ngram.type", "nGram")
.build();
try (CommonAnalysisPlugin commonAnalysisPlugin = new CommonAnalysisPlugin()) {
Map<String, TokenFilterFactory> tokenFilters = createTestAnalysis(IndexSettingsModule.newIndexSettings("index", settingsPre7),
settingsPre7, commonAnalysisPlugin).tokenFilter;
TokenFilterFactory tokenFilterFactory = tokenFilters.get("nGram");
Tokenizer tokenizer = new MockTokenizer();
tokenizer.setReader(new StringReader("foo bar"));
assertNotNull(tokenFilterFactory.create(tokenizer));
createTestAnalysis(IndexSettingsModule.newIndexSettings("index", settingsPre7), settingsPre7, commonAnalysisPlugin);
assertWarnings("The [nGram] token filter name is deprecated and will be removed in a future version. "
+ "Please change the filter name to [ngram] instead.");
}
Expand All @@ -95,13 +80,8 @@ public void testEdgeNGramFilterInCustomAnalyzerDeprecationError() throws IOExcep
.build();

try (CommonAnalysisPlugin commonAnalysisPlugin = new CommonAnalysisPlugin()) {
Map<String, TokenFilterFactory> tokenFilters = createTestAnalysis(IndexSettingsModule.newIndexSettings("index", settings),
settings, commonAnalysisPlugin).tokenFilter;
TokenFilterFactory tokenFilterFactory = tokenFilters.get("edgeNGram");
Tokenizer tokenizer = new MockTokenizer();
tokenizer.setReader(new StringReader("foo bar"));

IllegalArgumentException ex = expectThrows(IllegalArgumentException.class, () -> tokenFilterFactory.create(tokenizer));
IllegalArgumentException ex = expectThrows(IllegalArgumentException.class,
() -> createTestAnalysis(IndexSettingsModule.newIndexSettings("index", settings), settings, commonAnalysisPlugin));
assertEquals("The [edgeNGram] token filter name was deprecated in 6.4 and cannot be used in new indices. "
+ "Please change the filter name to [edge_ngram] instead.", ex.getMessage());
}
Expand All @@ -116,12 +96,8 @@ public void testEdgeNGramFilterInCustomAnalyzerDeprecationError() throws IOExcep
.build();

try (CommonAnalysisPlugin commonAnalysisPlugin = new CommonAnalysisPlugin()) {
Map<String, TokenFilterFactory> tokenFilters = createTestAnalysis(IndexSettingsModule.newIndexSettings("index", settingsPre7),
settingsPre7, commonAnalysisPlugin).tokenFilter;
TokenFilterFactory tokenFilterFactory = tokenFilters.get("edgeNGram");
Tokenizer tokenizer = new MockTokenizer();
tokenizer.setReader(new StringReader("foo bar"));
assertNotNull(tokenFilterFactory.create(tokenizer));
createTestAnalysis(IndexSettingsModule.newIndexSettings("index", settingsPre7),
settingsPre7, commonAnalysisPlugin);
assertWarnings("The [edgeNGram] token filter name is deprecated and will be removed in a future version. "
+ "Please change the filter name to [edge_ngram] instead.");
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
package org.elasticsearch.index.analysis;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.core.KeywordTokenizer;
import org.apache.lucene.analysis.core.WhitespaceTokenizer;
import org.elasticsearch.ElasticsearchException;
Expand Down Expand Up @@ -536,6 +537,10 @@ public IndexAnalyzers build(IndexSettings indexSettings,
tokenFilterFactoryFactories, charFilterFactoryFactories);
}

for (Analyzer analyzer : normalizers.values()) {
analyzer.normalize("", ""); // check for deprecations
}

if (!analyzers.containsKey(DEFAULT_ANALYZER_NAME)) {
analyzers.put(DEFAULT_ANALYZER_NAME,
produceAnalyzer(DEFAULT_ANALYZER_NAME,
Expand Down Expand Up @@ -599,6 +604,7 @@ private static NamedAnalyzer produceAnalyzer(String name,
} else {
analyzer = new NamedAnalyzer(name, analyzerFactory.scope(), analyzerF, overridePositionIncrementGap);
}
checkDeprecations(analyzer);
return analyzer;
}

Expand Down Expand Up @@ -626,4 +632,17 @@ private void processNormalizerFactory(
NamedAnalyzer normalizer = new NamedAnalyzer(name, normalizerFactory.scope(), normalizerF);
normalizers.put(name, normalizer);
}

// Deprecation warnings are emitted when actual TokenStreams are built; this is usually
// too late to be useful, so we build an empty tokenstream at construction time and
// use it, to ensure that warnings are emitted immediately.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We also throw exceptions in some cases so it's not only about deprecations ?

private static void checkDeprecations(Analyzer analyzer) {
try (TokenStream ts = analyzer.tokenStream("", "")) {
ts.reset();
while (ts.incrementToken()) {}
ts.end();
} catch (IOException e) {
throw new UncheckedIOException(e);
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@
import org.elasticsearch.index.analysis.AnalysisRegistry;
import org.elasticsearch.index.analysis.CharFilterFactory;
import org.elasticsearch.index.analysis.IndexAnalyzers;
import org.elasticsearch.index.analysis.NormalizingTokenFilterFactory;
import org.elasticsearch.index.analysis.PreConfiguredCharFilter;
import org.elasticsearch.index.analysis.TokenFilterFactory;
import org.elasticsearch.index.analysis.TokenizerFactory;
Expand Down Expand Up @@ -108,6 +109,25 @@ public TokenStream create(TokenStream tokenStream) {
}
}

class DeprecatedTokenFilterFactory extends AbstractTokenFilterFactory implements NormalizingTokenFilterFactory {

DeprecatedTokenFilterFactory(IndexSettings indexSettings, Environment env, String name, Settings settings) {
super(indexSettings, name, settings);
}

@Override
public TokenStream create(TokenStream tokenStream) {
deprecationLogger.deprecated("Using deprecated token filter [deprecated]");
return tokenStream;
}

@Override
public TokenStream normalize(TokenStream tokenStream) {
deprecationLogger.deprecated("Using deprecated token filter [deprecated]");
return tokenStream;
}
}

class AppendCharFilterFactory extends AbstractCharFilterFactory {

final String suffix;
Expand Down Expand Up @@ -136,7 +156,7 @@ public Map<String, AnalysisProvider<TokenizerFactory>> getTokenizers() {

@Override
public Map<String, AnalysisProvider<TokenFilterFactory>> getTokenFilters() {
return singletonMap("mock", MockFactory::new);
return Map.of("mock", MockFactory::new, "deprecated", DeprecatedTokenFilterFactory::new);
}

@Override
Expand Down Expand Up @@ -492,4 +512,28 @@ public void testExceedSetMaxTokenLimit() {
assertEquals(e.getMessage(), "The number of tokens produced by calling _analyze has exceeded the allowed maximum of ["
+ idxMaxTokenCount + "]." + " This limit can be set by changing the [index.analyze.max_token_count] index level setting.");
}

public void testDeprecationWarnings() throws IOException {
AnalyzeAction.Request req = new AnalyzeAction.Request();
req.tokenizer("standard");
req.addTokenFilter("lowercase");
req.addTokenFilter("deprecated");
req.text("test text");

AnalyzeAction.Response analyze =
TransportAnalyzeAction.analyze(req, registry, mockIndexService(), maxTokenCount);
assertEquals(2, analyze.getTokens().size());
assertWarnings("Using deprecated token filter [deprecated]");

// normalizer
req = new AnalyzeAction.Request();
req.addTokenFilter("lowercase");
req.addTokenFilter("deprecated");
req.text("text");

analyze =
TransportAnalyzeAction.analyze(req, registry, mockIndexService(), maxTokenCount);
assertEquals(1, analyze.getTokens().size());
assertWarnings("Using deprecated token filter [deprecated]");
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just a small observation: I thought this checks that the normalize method in DeprecatedTokenFilterFactory, so I tried changing it but the test didn't fail, do you know why?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Because the default implementation of normalize delegates to create, so we get the warning from there instead.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So that sounds like the "normalize()" method isn't actualy tested here? I saw we cover that in some other test though, so fine with whatever you decide doing here, I was just curious if this could be checked here through the "_analyze" API as well, but no problem it its too tricky I.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No, it's tested, but in order to make it fail you can't just remove the method (because that then delegates to create()), you have to have an implementation that doesn't emit a warning

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh, I've just realised that we're talking about the AnalyzeAction - this always uses create() rather than normalize(), and we should probably change that.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

we should probably change that

This can most likely be done in a follow up PR though, I just wanted to understand whats going on here and which of the two methods in the DeprecatedTokenFilterFactory in this test is supposed to fire the warning, currently they emit the same text so its hard to tell which code path is checked.

}
}
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
package org.elasticsearch.index;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardTokenizer;
import org.apache.lucene.index.AssertingDirectoryReader;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.FieldInvertState;
Expand Down Expand Up @@ -442,7 +443,7 @@ public Analyzer get() {
final Analyzer analyzer = new Analyzer() {
@Override
protected TokenStreamComponents createComponents(String fieldName) {
throw new AssertionError("should not be here");
return new TokenStreamComponents(new StandardTokenizer());
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,12 +20,13 @@
package org.elasticsearch.index.analysis;

import com.carrotsearch.randomizedtesting.generators.RandomPicks;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.MockTokenFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.en.EnglishAnalyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.analysis.standard.StandardTokenizer;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.elasticsearch.Version;
import org.elasticsearch.cluster.metadata.IndexMetaData;
Expand Down Expand Up @@ -108,19 +109,25 @@ public void testOverrideDefaultAnalyzer() throws IOException {
public void testOverrideDefaultAnalyzerWithoutAnalysisModeAll() throws IOException {
Version version = VersionUtils.randomVersion(random());
Settings settings = Settings.builder().put(IndexMetaData.SETTING_VERSION_CREATED, version).build();
TokenFilterFactory tokenFilter = new AbstractTokenFilterFactory(IndexSettingsModule.newIndexSettings("index", settings),
"my_filter", Settings.EMPTY) {
IndexSettings indexSettings = IndexSettingsModule.newIndexSettings("index", settings);
TokenFilterFactory tokenFilter = new AbstractTokenFilterFactory(indexSettings, "my_filter", Settings.EMPTY) {
@Override
public AnalysisMode getAnalysisMode() {
return randomFrom(AnalysisMode.SEARCH_TIME, AnalysisMode.INDEX_TIME);
}

@Override
public TokenStream create(TokenStream tokenStream) {
return null;
return tokenStream;
}
};
TokenizerFactory tokenizer = new AbstractTokenizerFactory(indexSettings, Settings.EMPTY, "my_tokenizer") {
@Override
public Tokenizer create() {
return new StandardTokenizer();
}
};
Analyzer analyzer = new CustomAnalyzer(null, new CharFilterFactory[0], new TokenFilterFactory[] { tokenFilter });
Analyzer analyzer = new CustomAnalyzer(tokenizer, new CharFilterFactory[0], new TokenFilterFactory[] { tokenFilter });
MapperException ex = expectThrows(MapperException.class,
() -> emptyRegistry.build(IndexSettingsModule.newIndexSettings("index", settings),
singletonMap("default", new PreBuiltAnalyzerProvider("default", AnalyzerScope.INDEX, analyzer)), emptyMap(),
Expand Down Expand Up @@ -264,4 +271,83 @@ public void testEnsureCloseInvocationProperlyDelegated() throws IOException {
registry.close();
verify(mock).close();
}

public void testDeprecations() throws IOException {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you also add a test that throws an exception ?


AnalysisPlugin plugin = new AnalysisPlugin() {

class MockFactory extends AbstractTokenFilterFactory {
MockFactory(IndexSettings indexSettings, Environment env, String name, Settings settings) {
super(indexSettings, name, settings);
}

@Override
public TokenStream create(TokenStream tokenStream) {
deprecationLogger.deprecated("Using deprecated token filter [deprecated]");
return tokenStream;
}
}

class UnusedMockFactory extends AbstractTokenFilterFactory {
UnusedMockFactory(IndexSettings indexSettings, Environment env, String name, Settings settings) {
super(indexSettings, name, settings);
}

@Override
public TokenStream create(TokenStream tokenStream) {
deprecationLogger.deprecated("Using deprecated token filter [unused]");
return tokenStream;
}
}

class NormalizerFactory extends AbstractTokenFilterFactory implements NormalizingTokenFilterFactory {

NormalizerFactory(IndexSettings indexSettings, Environment env, String name, Settings settings) {
super(indexSettings, name, settings);
}

@Override
public TokenStream create(TokenStream tokenStream) {
deprecationLogger.deprecated("Using deprecated token filter [deprecated_normalizer]");
return tokenStream;
}

}

@Override
public Map<String, AnalysisProvider<TokenFilterFactory>> getTokenFilters() {
return Map.of("deprecated", MockFactory::new, "unused", UnusedMockFactory::new,
"deprecated_normalizer", NormalizerFactory::new);
}
};

Settings settings = Settings.builder().put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString()).build();
Settings indexSettings = Settings.builder()
.put(IndexMetaData.SETTING_VERSION_CREATED, Version.CURRENT)
.put("index.analysis.filter.deprecated.type", "deprecated")
.put("index.analysis.analyzer.custom.tokenizer", "standard")
.putList("index.analysis.analyzer.custom.filter", "lowercase", "deprecated")
.build();

IndexSettings idxSettings = IndexSettingsModule.newIndexSettings("index", indexSettings);

new AnalysisModule(TestEnvironment.newEnvironment(settings),
singletonList(plugin)).getAnalysisRegistry().build(idxSettings);

// We should only get a warning from the token filter that is referenced in settings
assertWarnings("Using deprecated token filter [deprecated]");

indexSettings = Settings.builder()
.put(IndexMetaData.SETTING_VERSION_CREATED, Version.CURRENT)
.put("index.analysis.filter.deprecated.type", "deprecated_normalizer")
.putList("index.analysis.normalizer.custom.filter", "lowercase", "deprecated_normalizer")
.build();
idxSettings = IndexSettingsModule.newIndexSettings("index", indexSettings);

new AnalysisModule(TestEnvironment.newEnvironment(settings),
singletonList(plugin)).getAnalysisRegistry().build(idxSettings);

assertWarnings("Using deprecated token filter [deprecated_normalizer]");

}
}
Original file line number Diff line number Diff line change
Expand Up @@ -228,14 +228,13 @@ public void testUnderscoreInAnalyzerName() throws IOException {
}
}

public void testStandardFilterBWC() throws IOException {
public void testStandardFilterBWC() {
Version version = VersionUtils.randomVersionBetween(random(), Version.V_7_0_0, Version.CURRENT);
final Settings settings = Settings.builder().put("index.analysis.analyzer.my_standard.tokenizer", "standard")
.put("index.analysis.analyzer.my_standard.filter", "standard")
.put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString()).put(IndexMetaData.SETTING_VERSION_CREATED, version)
.build();
IndexAnalyzers analyzers = getIndexAnalyzers(settings);
IllegalArgumentException exc = expectThrows(IllegalArgumentException.class, () -> analyzers.get("my_standard").tokenStream("", ""));
IllegalArgumentException exc = expectThrows(IllegalArgumentException.class, () -> getIndexAnalyzers(settings));
assertThat(exc.getMessage(), equalTo("The [standard] token filter has been removed."));
}

Expand Down