LUCENE-10352: Convert TestAllAnalyzersHaveFactories and TestRandomCha…

…ins to a global integration test and discover classes to check from module system (#582) Co-authored-by: Robert Muir <rmuir@apache.org>
apache · Jan 5, 2022 · 475fbd0 · 475fbd0
1 parent 2381192
commit 475fbd0
Show file tree

Hide file tree

Showing 73 changed files with 2,600 additions and 1,222 deletions.
diff --git a/gradle/java/modules.gradle b/gradle/java/modules.gradle
@@ -214,7 +214,7 @@ allprojects {
     }
 
     // Configure (tasks.test, sourceSets.test)
-    tasks.matching { it.name == "test" }.all { Test task ->
+    tasks.matching { it.name ==~ /test(_[0-9]+)?/ }.all { Test task ->
       configureTestTaskForSourceSet(task, task.project.sourceSets.test)
     }
 

diff --git a/gradle/validation/rat-sources.gradle b/gradle/validation/rat-sources.gradle
@@ -102,6 +102,7 @@ allprojects {
                     break
 
                 case ":lucene:analysis:common":
+                case ":lucene:analysis.tests":
                     exclude "src/**/*.aff"
                     exclude "src/**/*.dic"
                     exclude "src/**/*.good"

diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
@@ -153,6 +153,12 @@ Bug Fixes
 * LUCENE-10349: Fix all analyzers to behave according to their documentation:
   getDefaultStopSet() methods now return unmodifiable CharArraySets.  (Uwe Schindler)
 
+* LUCENE-10352: Add missing service provider entries: KoreanNumberFilterFactory,
+  DaitchMokotoffSoundexFilterFactory (Uwe Schindler, Robert Muir)
+
+* LUCENE-10352: Fixed ctor argument checks: JapaneseKatakanaStemFilter,
+  DoubleMetaphoneFilter (Uwe Schindler, Robert Muir)
+
 Other
 ---------------------
 
@@ -163,6 +169,13 @@ Other
 * LUCENE-10310: TestXYDocValuesQueries#doRandomDistanceTest does not produce random circles with radius
   with '0' value any longer.
 
+* LUCENE-10352: Removed duplicate instances of StringMockResourceLoader and migrated class to
+  test-framework.  (Uwe Schindler, Robert Muir)
+
+* LUCENE-10352: Convert TestAllAnalyzersHaveFactories and TestRandomChains to a global integration test
+  and discover classes to check from module system. The test now checks all analyzer modules,
+  so it may discover new bugs outside of analysis:common module.  (Uwe Schindler, Robert Muir)
+
 ======================= Lucene 9.0.0 =======================
 
 New Features

diff --git a/lucene/analysis.tests/build.gradle b/lucene/analysis.tests/build.gradle
@@ -0,0 +1,33 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+apply plugin: 'java-library'
+
+description = 'Module integration tests for all :lucene:analysis modules'
+
+dependencies {
+  moduleTestImplementation project(':lucene:analysis:common')
+  moduleTestImplementation project(':lucene:analysis:icu')
+  moduleTestImplementation project(':lucene:analysis:kuromoji')
+  moduleTestImplementation project(':lucene:analysis:morfologik')
+  moduleTestImplementation project(':lucene:analysis:nori')
+  moduleTestImplementation project(':lucene:analysis:opennlp')
+  moduleTestImplementation project(':lucene:analysis:phonetic')
+  moduleTestImplementation project(':lucene:analysis:smartcn')
+  moduleTestImplementation project(':lucene:analysis:stempel')
+  moduleTestImplementation project(':lucene:test-framework')
+}
diff --git a/lucene/analysis.tests/src/test/module-info.java b/lucene/analysis.tests/src/test/module-info.java
@@ -0,0 +1,39 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Test module for global integration tests of all {@code org.apache.lucene.analysis}
+ * packages/modules.
+ */
+@SuppressWarnings({"requires-automatic"})
+module org.apache.lucene.analysis.tests {
+  requires java.xml;
+  requires org.apache.lucene.core;
+  requires org.apache.lucene.analysis.common;
+  requires org.apache.lucene.analysis.icu;
+  requires org.apache.lucene.analysis.kuromoji;
+  requires org.apache.lucene.analysis.morfologik;
+  requires org.apache.lucene.analysis.nori;
+  requires org.apache.lucene.analysis.opennlp;
+  requires org.apache.lucene.analysis.phonetic;
+  requires org.apache.lucene.analysis.smartcn;
+  requires org.apache.lucene.analysis.stempel;
+  requires org.apache.lucene.test_framework;
+  requires junit;
+
+  exports org.apache.lucene.analysis.tests;
+}
diff --git a/lucene/analysis.tests/src/test/org/apache/lucene/analysis/tests/ModuleClassDiscovery.java b/lucene/analysis.tests/src/test/org/apache/lucene/analysis/tests/ModuleClassDiscovery.java
@@ -0,0 +1,95 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.analysis.tests;
+
+import java.io.IOException;
+import java.lang.module.ResolvedModule;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+import java.util.Map;
+import java.util.Objects;
+import java.util.SortedMap;
+import java.util.TreeMap;
+import java.util.function.Predicate;
+import org.apache.lucene.tests.util.LuceneTestCase;
+import org.junit.Assert;
+
+/** Discovers all classes from the module graph and loads them (without initialization) */
+abstract class ModuleClassDiscovery {
+
+  private static final Module THIS_MODULE = ModuleClassDiscovery.class.getModule();
+  private static final ModuleLayer LAYER = THIS_MODULE.getLayer();
+  private static final SortedMap<String, ResolvedModule> ALL_ANALYSIS_MODULES;
+
+  private static final Predicate<String> ALLOW_MODULES =
+      name ->
+          name.equals("org.apache.lucene.core") || name.startsWith("org.apache.lucene.analysis.");
+
+  static {
+    Assert.assertTrue(
+        "Analysis integration tests must run in Java Module System as named module",
+        THIS_MODULE.isNamed());
+    Assert.assertNotNull("Module layer is missing", LAYER);
+
+    var mods = new TreeMap<String, ResolvedModule>();
+    discoverAnalysisModules(LAYER, mods);
+    ALL_ANALYSIS_MODULES = Collections.unmodifiableSortedMap(mods);
+    if (LuceneTestCase.VERBOSE) {
+      System.out.println(
+          "Discovered the following analysis modules: " + ALL_ANALYSIS_MODULES.keySet());
+    }
+  }
+
+  private static void discoverAnalysisModules(
+      ModuleLayer layer, Map<String, ResolvedModule> result) {
+    for (var mod : layer.configuration().modules()) {
+      String name = mod.name();
+      if (ALLOW_MODULES.test(name) && !Objects.equals(name, THIS_MODULE.getName())) {
+        result.put(name, mod);
+      }
+    }
+    for (var parent : layer.parents()) {
+      discoverAnalysisModules(parent, result);
+    }
+  }
+
+  /** Finds all classes in package across all analysis modules */
+  public static List<Class<?>> getClassesForPackage(String pkgname) throws IOException {
+    final var prefix = pkgname.concat(".");
+    final var classes = new ArrayList<Class<?>>();
+    for (var resolvedModule : ALL_ANALYSIS_MODULES.values()) {
+      final var module = LAYER.findModule(resolvedModule.name()).orElseThrow();
+      try (var reader = resolvedModule.reference().open()) {
+        reader
+            .list()
+            .filter(entry -> entry.endsWith(".class"))
+            .map(entry -> entry.substring(0, entry.length() - 6).replace('/', '.'))
+            .filter(clazzname -> clazzname.startsWith(prefix))
+            .sorted()
+            .map(
+                clazzname ->
+                    Objects.requireNonNull(
+                        Class.forName(module, clazzname),
+                        "Class '" + clazzname + "' not found in module '" + module.getName() + "'"))
+            .forEach(classes::add);
+      }
+    }
+    Assert.assertFalse("No classes found in package:" + pkgname, classes.isEmpty());
+    return classes;
+  }
+}
diff --git a/...s/core/TestAllAnalyzersHaveFactories.java → .../tests/TestAllAnalyzersHaveFactories.java b/...s/core/TestAllAnalyzersHaveFactories.java → .../tests/TestAllAnalyzersHaveFactories.java
@@ -14,15 +14,12 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-package org.apache.lucene.analysis.core;
+package org.apache.lucene.analysis.tests;
 
 import java.io.Reader;
 import java.io.StringReader;
 import java.lang.reflect.Modifier;
-import java.util.Collections;
 import java.util.HashMap;
-import java.util.HashSet;
-import java.util.IdentityHashMap;
 import java.util.List;
 import java.util.Map;
 import java.util.Set;
@@ -34,27 +31,17 @@
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.Tokenizer;
 import org.apache.lucene.analysis.TokenizerFactory;
+import org.apache.lucene.analysis.core.KeywordTokenizer;
+import org.apache.lucene.analysis.core.UnicodeWhitespaceTokenizer;
 import org.apache.lucene.analysis.miscellaneous.PatternKeywordMarkerFilter;
 import org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter;
 import org.apache.lucene.analysis.path.ReversePathHierarchyTokenizer;
 import org.apache.lucene.analysis.sinks.TeeSinkTokenFilter;
 import org.apache.lucene.analysis.snowball.SnowballFilter;
 import org.apache.lucene.analysis.sr.SerbianNormalizationRegularFilter;
-import org.apache.lucene.analysis.util.StringMockResourceLoader;
-import org.apache.lucene.tests.analysis.CrankyTokenFilter;
-import org.apache.lucene.tests.analysis.MockCharFilter;
-import org.apache.lucene.tests.analysis.MockFixedLengthPayloadFilter;
-import org.apache.lucene.tests.analysis.MockGraphTokenFilter;
-import org.apache.lucene.tests.analysis.MockHoleInjectingTokenFilter;
-import org.apache.lucene.tests.analysis.MockLowerCaseFilter;
-import org.apache.lucene.tests.analysis.MockRandomLookaheadTokenFilter;
-import org.apache.lucene.tests.analysis.MockSynonymFilter;
-import org.apache.lucene.tests.analysis.MockTokenFilter;
-import org.apache.lucene.tests.analysis.MockTokenizer;
-import org.apache.lucene.tests.analysis.MockVariableLengthPayloadFilter;
-import org.apache.lucene.tests.analysis.SimplePayloadFilter;
-import org.apache.lucene.tests.analysis.ValidatingTokenFilter;
+import org.apache.lucene.analysis.stempel.StempelFilter;
 import org.apache.lucene.tests.util.LuceneTestCase;
+import org.apache.lucene.tests.util.StringMockResourceLoader;
 import org.apache.lucene.util.ResourceLoader;
 import org.apache.lucene.util.ResourceLoaderAware;
 import org.apache.lucene.util.Version;
@@ -65,71 +52,37 @@
  */
 public class TestAllAnalyzersHaveFactories extends LuceneTestCase {
 
-  // these are test-only components (e.g. test-framework)
-  private static final Set<Class<?>> testComponents =
-      Collections.newSetFromMap(new IdentityHashMap<Class<?>, Boolean>());
-
-  static {
-    Collections.<Class<?>>addAll(
-        testComponents,
-        MockTokenizer.class,
-        MockCharFilter.class,
-        MockFixedLengthPayloadFilter.class,
-        MockGraphTokenFilter.class,
-        MockHoleInjectingTokenFilter.class,
-        MockLowerCaseFilter.class,
-        MockRandomLookaheadTokenFilter.class,
-        MockSynonymFilter.class,
-        MockTokenFilter.class,
-        MockVariableLengthPayloadFilter.class,
-        ValidatingTokenFilter.class,
-        CrankyTokenFilter.class,
-        SimplePayloadFilter.class);
-  }
-
   // these are 'crazy' components like cachingtokenfilter. does it make sense to add factories for
   // these?
   private static final Set<Class<?>> crazyComponents =
-      Collections.newSetFromMap(new IdentityHashMap<Class<?>, Boolean>());
-
-  static {
-    Collections.<Class<?>>addAll(
-        crazyComponents, CachingTokenFilter.class, TeeSinkTokenFilter.class);
-  }
+      Set.of(CachingTokenFilter.class, TeeSinkTokenFilter.class);
 
   // these are oddly-named (either the actual analyzer, or its factory)
   // they do actually have factories.
   // TODO: clean this up!
   private static final Set<Class<?>> oddlyNamedComponents =
-      Collections.newSetFromMap(new IdentityHashMap<Class<?>, Boolean>());
-
-  static {
-    Collections.<Class<?>>addAll(
-        oddlyNamedComponents,
-        // this is supported via an option to PathHierarchyTokenizer's factory
-        ReversePathHierarchyTokenizer.class,
-        SnowballFilter.class, // this is called SnowballPorterFilterFactory
-        PatternKeywordMarkerFilter.class,
-        SetKeywordMarkerFilter.class,
-        UnicodeWhitespaceTokenizer.class, // a supported option via WhitespaceTokenizerFactory
-        // class from core, but StopFilterFactory creates one from this module
-        org.apache.lucene.analysis.StopFilter.class,
-        // class from core, but LowerCaseFilterFactory creates one from this module
-        org.apache.lucene.analysis.LowerCaseFilter.class);
-  }
+      Set.of(
+          // this is supported via an option to PathHierarchyTokenizer's factory
+          ReversePathHierarchyTokenizer.class,
+          SnowballFilter.class, // this is called SnowballPorterFilterFactory
+          StempelFilter.class, // this is called StempelPolishStemFilterFactory
+          PatternKeywordMarkerFilter.class,
+          SetKeywordMarkerFilter.class,
+          UnicodeWhitespaceTokenizer.class, // a supported option via WhitespaceTokenizerFactory
+          // class from core, but StopFilterFactory creates one from this module
+          org.apache.lucene.analysis.StopFilter.class,
+          // class from core, but LowerCaseFilterFactory creates one from this module
+          org.apache.lucene.analysis.LowerCaseFilter.class);
 
   // The following token filters are excused from having their factory.
-  private static final Set<Class<?>> tokenFiltersWithoutFactory = new HashSet<>();
-
-  static {
-    tokenFiltersWithoutFactory.add(SerbianNormalizationRegularFilter.class);
-  }
+  private static final Set<Class<?>> tokenFiltersWithoutFactory =
+      Set.of(SerbianNormalizationRegularFilter.class);
 
   private static final ResourceLoader loader = new StringMockResourceLoader("");
 
   public void test() throws Exception {
     List<Class<?>> analysisClasses =
-        TestRandomChains.getClassesForPackage("org.apache.lucene.analysis");
+        ModuleClassDiscovery.getClassesForPackage("org.apache.lucene.analysis");
 
     for (final Class<?> c : analysisClasses) {
       final int modifiers = c.getModifiers();
@@ -141,7 +94,6 @@ public void test() throws Exception {
           || c.isAnonymousClass()
           || c.isMemberClass()
           || c.isInterface()
-          || testComponents.contains(c)
           || crazyComponents.contains(c)
           || oddlyNamedComponents.contains(c)
           || tokenFiltersWithoutFactory.contains(c)