Skip to content

Commit

Permalink
LUCENE-10352: Convert TestAllAnalyzersHaveFactories and TestRandomCha…
Browse files Browse the repository at this point in the history
…ins to a global integration test and discover classes to check from module system (#582)

Co-authored-by: Robert Muir <rmuir@apache.org>
  • Loading branch information
uschindler and rmuir authored Jan 5, 2022
1 parent 2381192 commit 475fbd0
Show file tree
Hide file tree
Showing 73 changed files with 2,600 additions and 1,222 deletions.
2 changes: 1 addition & 1 deletion gradle/java/modules.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -214,7 +214,7 @@ allprojects {
}

// Configure (tasks.test, sourceSets.test)
tasks.matching { it.name == "test" }.all { Test task ->
tasks.matching { it.name ==~ /test(_[0-9]+)?/ }.all { Test task ->
configureTestTaskForSourceSet(task, task.project.sourceSets.test)
}

Expand Down
1 change: 1 addition & 0 deletions gradle/validation/rat-sources.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,7 @@ allprojects {
break

case ":lucene:analysis:common":
case ":lucene:analysis.tests":
exclude "src/**/*.aff"
exclude "src/**/*.dic"
exclude "src/**/*.good"
Expand Down
13 changes: 13 additions & 0 deletions lucene/CHANGES.txt
Original file line number Diff line number Diff line change
Expand Up @@ -153,6 +153,12 @@ Bug Fixes
* LUCENE-10349: Fix all analyzers to behave according to their documentation:
getDefaultStopSet() methods now return unmodifiable CharArraySets. (Uwe Schindler)

* LUCENE-10352: Add missing service provider entries: KoreanNumberFilterFactory,
DaitchMokotoffSoundexFilterFactory (Uwe Schindler, Robert Muir)

* LUCENE-10352: Fixed ctor argument checks: JapaneseKatakanaStemFilter,
DoubleMetaphoneFilter (Uwe Schindler, Robert Muir)

Other
---------------------

Expand All @@ -163,6 +169,13 @@ Other
* LUCENE-10310: TestXYDocValuesQueries#doRandomDistanceTest does not produce random circles with radius
with '0' value any longer.

* LUCENE-10352: Removed duplicate instances of StringMockResourceLoader and migrated class to
test-framework. (Uwe Schindler, Robert Muir)

* LUCENE-10352: Convert TestAllAnalyzersHaveFactories and TestRandomChains to a global integration test
and discover classes to check from module system. The test now checks all analyzer modules,
so it may discover new bugs outside of analysis:common module. (Uwe Schindler, Robert Muir)

======================= Lucene 9.0.0 =======================

New Features
Expand Down
33 changes: 33 additions & 0 deletions lucene/analysis.tests/build.gradle
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

apply plugin: 'java-library'

description = 'Module integration tests for all :lucene:analysis modules'

dependencies {
moduleTestImplementation project(':lucene:analysis:common')
moduleTestImplementation project(':lucene:analysis:icu')
moduleTestImplementation project(':lucene:analysis:kuromoji')
moduleTestImplementation project(':lucene:analysis:morfologik')
moduleTestImplementation project(':lucene:analysis:nori')
moduleTestImplementation project(':lucene:analysis:opennlp')
moduleTestImplementation project(':lucene:analysis:phonetic')
moduleTestImplementation project(':lucene:analysis:smartcn')
moduleTestImplementation project(':lucene:analysis:stempel')
moduleTestImplementation project(':lucene:test-framework')
}
39 changes: 39 additions & 0 deletions lucene/analysis.tests/src/test/module-info.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

/**
* Test module for global integration tests of all {@code org.apache.lucene.analysis}
* packages/modules.
*/
@SuppressWarnings({"requires-automatic"})
module org.apache.lucene.analysis.tests {
requires java.xml;
requires org.apache.lucene.core;
requires org.apache.lucene.analysis.common;
requires org.apache.lucene.analysis.icu;
requires org.apache.lucene.analysis.kuromoji;
requires org.apache.lucene.analysis.morfologik;
requires org.apache.lucene.analysis.nori;
requires org.apache.lucene.analysis.opennlp;
requires org.apache.lucene.analysis.phonetic;
requires org.apache.lucene.analysis.smartcn;
requires org.apache.lucene.analysis.stempel;
requires org.apache.lucene.test_framework;
requires junit;

exports org.apache.lucene.analysis.tests;
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.analysis.tests;

import java.io.IOException;
import java.lang.module.ResolvedModule;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.SortedMap;
import java.util.TreeMap;
import java.util.function.Predicate;
import org.apache.lucene.tests.util.LuceneTestCase;
import org.junit.Assert;

/** Discovers all classes from the module graph and loads them (without initialization) */
abstract class ModuleClassDiscovery {

private static final Module THIS_MODULE = ModuleClassDiscovery.class.getModule();
private static final ModuleLayer LAYER = THIS_MODULE.getLayer();
private static final SortedMap<String, ResolvedModule> ALL_ANALYSIS_MODULES;

private static final Predicate<String> ALLOW_MODULES =
name ->
name.equals("org.apache.lucene.core") || name.startsWith("org.apache.lucene.analysis.");

static {
Assert.assertTrue(
"Analysis integration tests must run in Java Module System as named module",
THIS_MODULE.isNamed());
Assert.assertNotNull("Module layer is missing", LAYER);

var mods = new TreeMap<String, ResolvedModule>();
discoverAnalysisModules(LAYER, mods);
ALL_ANALYSIS_MODULES = Collections.unmodifiableSortedMap(mods);
if (LuceneTestCase.VERBOSE) {
System.out.println(
"Discovered the following analysis modules: " + ALL_ANALYSIS_MODULES.keySet());
}
}

private static void discoverAnalysisModules(
ModuleLayer layer, Map<String, ResolvedModule> result) {
for (var mod : layer.configuration().modules()) {
String name = mod.name();
if (ALLOW_MODULES.test(name) && !Objects.equals(name, THIS_MODULE.getName())) {
result.put(name, mod);
}
}
for (var parent : layer.parents()) {
discoverAnalysisModules(parent, result);
}
}

/** Finds all classes in package across all analysis modules */
public static List<Class<?>> getClassesForPackage(String pkgname) throws IOException {
final var prefix = pkgname.concat(".");
final var classes = new ArrayList<Class<?>>();
for (var resolvedModule : ALL_ANALYSIS_MODULES.values()) {
final var module = LAYER.findModule(resolvedModule.name()).orElseThrow();
try (var reader = resolvedModule.reference().open()) {
reader
.list()
.filter(entry -> entry.endsWith(".class"))
.map(entry -> entry.substring(0, entry.length() - 6).replace('/', '.'))
.filter(clazzname -> clazzname.startsWith(prefix))
.sorted()
.map(
clazzname ->
Objects.requireNonNull(
Class.forName(module, clazzname),
"Class '" + clazzname + "' not found in module '" + module.getName() + "'"))
.forEach(classes::add);
}
}
Assert.assertFalse("No classes found in package:" + pkgname, classes.isEmpty());
return classes;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -14,15 +14,12 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.analysis.core;
package org.apache.lucene.analysis.tests;

import java.io.Reader;
import java.io.StringReader;
import java.lang.reflect.Modifier;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.IdentityHashMap;
import java.util.List;
import java.util.Map;
import java.util.Set;
Expand All @@ -34,27 +31,17 @@
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.TokenizerFactory;
import org.apache.lucene.analysis.core.KeywordTokenizer;
import org.apache.lucene.analysis.core.UnicodeWhitespaceTokenizer;
import org.apache.lucene.analysis.miscellaneous.PatternKeywordMarkerFilter;
import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
import org.apache.lucene.analysis.path.ReversePathHierarchyTokenizer;
import org.apache.lucene.analysis.sinks.TeeSinkTokenFilter;
import org.apache.lucene.analysis.snowball.SnowballFilter;
import org.apache.lucene.analysis.sr.SerbianNormalizationRegularFilter;
import org.apache.lucene.analysis.util.StringMockResourceLoader;
import org.apache.lucene.tests.analysis.CrankyTokenFilter;
import org.apache.lucene.tests.analysis.MockCharFilter;
import org.apache.lucene.tests.analysis.MockFixedLengthPayloadFilter;
import org.apache.lucene.tests.analysis.MockGraphTokenFilter;
import org.apache.lucene.tests.analysis.MockHoleInjectingTokenFilter;
import org.apache.lucene.tests.analysis.MockLowerCaseFilter;
import org.apache.lucene.tests.analysis.MockRandomLookaheadTokenFilter;
import org.apache.lucene.tests.analysis.MockSynonymFilter;
import org.apache.lucene.tests.analysis.MockTokenFilter;
import org.apache.lucene.tests.analysis.MockTokenizer;
import org.apache.lucene.tests.analysis.MockVariableLengthPayloadFilter;
import org.apache.lucene.tests.analysis.SimplePayloadFilter;
import org.apache.lucene.tests.analysis.ValidatingTokenFilter;
import org.apache.lucene.analysis.stempel.StempelFilter;
import org.apache.lucene.tests.util.LuceneTestCase;
import org.apache.lucene.tests.util.StringMockResourceLoader;
import org.apache.lucene.util.ResourceLoader;
import org.apache.lucene.util.ResourceLoaderAware;
import org.apache.lucene.util.Version;
Expand All @@ -65,71 +52,37 @@
*/
public class TestAllAnalyzersHaveFactories extends LuceneTestCase {

// these are test-only components (e.g. test-framework)
private static final Set<Class<?>> testComponents =
Collections.newSetFromMap(new IdentityHashMap<Class<?>, Boolean>());

static {
Collections.<Class<?>>addAll(
testComponents,
MockTokenizer.class,
MockCharFilter.class,
MockFixedLengthPayloadFilter.class,
MockGraphTokenFilter.class,
MockHoleInjectingTokenFilter.class,
MockLowerCaseFilter.class,
MockRandomLookaheadTokenFilter.class,
MockSynonymFilter.class,
MockTokenFilter.class,
MockVariableLengthPayloadFilter.class,
ValidatingTokenFilter.class,
CrankyTokenFilter.class,
SimplePayloadFilter.class);
}

// these are 'crazy' components like cachingtokenfilter. does it make sense to add factories for
// these?
private static final Set<Class<?>> crazyComponents =
Collections.newSetFromMap(new IdentityHashMap<Class<?>, Boolean>());

static {
Collections.<Class<?>>addAll(
crazyComponents, CachingTokenFilter.class, TeeSinkTokenFilter.class);
}
Set.of(CachingTokenFilter.class, TeeSinkTokenFilter.class);

// these are oddly-named (either the actual analyzer, or its factory)
// they do actually have factories.
// TODO: clean this up!
private static final Set<Class<?>> oddlyNamedComponents =
Collections.newSetFromMap(new IdentityHashMap<Class<?>, Boolean>());

static {
Collections.<Class<?>>addAll(
oddlyNamedComponents,
// this is supported via an option to PathHierarchyTokenizer's factory
ReversePathHierarchyTokenizer.class,
SnowballFilter.class, // this is called SnowballPorterFilterFactory
PatternKeywordMarkerFilter.class,
SetKeywordMarkerFilter.class,
UnicodeWhitespaceTokenizer.class, // a supported option via WhitespaceTokenizerFactory
// class from core, but StopFilterFactory creates one from this module
org.apache.lucene.analysis.StopFilter.class,
// class from core, but LowerCaseFilterFactory creates one from this module
org.apache.lucene.analysis.LowerCaseFilter.class);
}
Set.of(
// this is supported via an option to PathHierarchyTokenizer's factory
ReversePathHierarchyTokenizer.class,
SnowballFilter.class, // this is called SnowballPorterFilterFactory
StempelFilter.class, // this is called StempelPolishStemFilterFactory
PatternKeywordMarkerFilter.class,
SetKeywordMarkerFilter.class,
UnicodeWhitespaceTokenizer.class, // a supported option via WhitespaceTokenizerFactory
// class from core, but StopFilterFactory creates one from this module
org.apache.lucene.analysis.StopFilter.class,
// class from core, but LowerCaseFilterFactory creates one from this module
org.apache.lucene.analysis.LowerCaseFilter.class);

// The following token filters are excused from having their factory.
private static final Set<Class<?>> tokenFiltersWithoutFactory = new HashSet<>();

static {
tokenFiltersWithoutFactory.add(SerbianNormalizationRegularFilter.class);
}
private static final Set<Class<?>> tokenFiltersWithoutFactory =
Set.of(SerbianNormalizationRegularFilter.class);

private static final ResourceLoader loader = new StringMockResourceLoader("");

public void test() throws Exception {
List<Class<?>> analysisClasses =
TestRandomChains.getClassesForPackage("org.apache.lucene.analysis");
ModuleClassDiscovery.getClassesForPackage("org.apache.lucene.analysis");

for (final Class<?> c : analysisClasses) {
final int modifiers = c.getModifiers();
Expand All @@ -141,7 +94,6 @@ public void test() throws Exception {
|| c.isAnonymousClass()
|| c.isMemberClass()
|| c.isInterface()
|| testComponents.contains(c)
|| crazyComponents.contains(c)
|| oddlyNamedComponents.contains(c)
|| tokenFiltersWithoutFactory.contains(c)
Expand Down
Loading

0 comments on commit 475fbd0

Please sign in to comment.