diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index 5d08c8518ad6..40660f722fb7 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -14,6 +14,11 @@ API Changes taxoEpoch decide. Add a test case that demonstrates the inconsistencies caused when you reuse taxoArrays on older checkpoints. (Gautam Worah) +* LUCENE-10558: Add new constructors to Kuromoji and Nori dictionary classes to support classpath / + module system usage. It is now possible to use JDK's Class/ClassLoader/Module#getResource(...) apis + and pass their returned URL to dictionary constructors to load resources from Classpath or Module + resources. (Uwe Schindler, Tomoko Uchida, Mike Sokolov) + New Features --------------------- @@ -110,6 +115,10 @@ Bug Fixes * LUCENE-10518: Relax field consistency check for old indices (Nhat Nguyen) +* LUCENE-10558: Restore behaviour of deprecated Kuromoji and Nori dictionary constructors for + custom dictionary support. Please also use new URL-based constructors for classpath/module + system ressources. (Uwe Schindler, Tomoko Uchida, Mike Sokolov) + Build --------------------- diff --git a/lucene/MIGRATE.md b/lucene/MIGRATE.md index 9cf569d48c8d..378cce193fe0 100644 --- a/lucene/MIGRATE.md +++ b/lucene/MIGRATE.md @@ -51,6 +51,19 @@ the [Log4j JDK Logging Adapter](https://logging.apache.org/log4j/2.x/log4j-jul/i in combination with the corresponding system property: `java.util.logging.manager=org.apache.logging.log4j.jul.LogManager`. +### Kuromoji and Nori analysis component constructors for custom dictionaries + +The Kuromoji and Nori analysis modules had some way to customize the backing dictionaries +by passing a path to file or classpath resources using some inconsistently implemented +APIs. This was buggy from the beginning, but some users made use of it. Due to move to Java +module system, especially the resource lookup on classpath stopped to work correctly. +The Lucene team therefore implemented new APIs to create dictionary implementations +with custom data files. Unfortunately there were some shortcomings in the 9.1 version, +also when using the now deprecated ctors, so users are advised to upgrade to +Lucene 9.2 or stay with 9.0. + +See LUCENE-10558 for more details and workarounds. + ## Migration from Lucene 8.x to Lucene 9.0 ### Rename of binary artifacts from '**-analyzers-**' to '**-analysis-**' (LUCENE-9562) diff --git a/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/dict/BinaryDictionary.java b/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/dict/BinaryDictionary.java index 4b75bed4ea6a..78255b0b4500 100644 --- a/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/dict/BinaryDictionary.java +++ b/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/dict/BinaryDictionary.java @@ -25,6 +25,7 @@ import java.nio.channels.ReadableByteChannel; import java.nio.file.Files; import java.nio.file.Paths; +import java.util.Objects; import org.apache.lucene.codecs.CodecUtil; import org.apache.lucene.store.DataInput; import org.apache.lucene.store.InputStreamDataInput; @@ -140,7 +141,11 @@ public static final InputStream getResource(ResourceScheme scheme, String path) throws IOException { switch (scheme) { case CLASSPATH: - return getClassResource(path); + Objects.requireNonNull( + path, + "Deprecated API no longer works with null paths, to load default resources use default ctors."); + return IOUtils.requireResourceNonNull( + BinaryDictionary.class.getClassLoader().getResourceAsStream(path), path); case FILE: return Files.newInputStream(Paths.get(path)); default: @@ -148,11 +153,6 @@ public static final InputStream getResource(ResourceScheme scheme, String path) } } - @Deprecated(forRemoval = true, since = "9.1") - private static InputStream getClassResource(String path) throws IOException { - return IOUtils.requireResourceNonNull(BinaryDictionary.class.getResourceAsStream(path), path); - } - public void lookupWordIds(int sourceId, IntsRef ref) { ref.ints = targetMap; ref.offset = targetMapOffsets[sourceId]; diff --git a/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/dict/ConnectionCosts.java b/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/dict/ConnectionCosts.java index 16084c644a53..8adc2e740766 100644 --- a/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/dict/ConnectionCosts.java +++ b/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/dict/ConnectionCosts.java @@ -19,10 +19,10 @@ import java.io.BufferedInputStream; import java.io.IOException; import java.io.InputStream; +import java.net.URL; import java.nio.ByteBuffer; import java.nio.file.Files; import java.nio.file.Path; -import java.nio.file.Paths; import org.apache.lucene.codecs.CodecUtil; import org.apache.lucene.store.DataInput; import org.apache.lucene.store.InputStreamDataInput; @@ -42,15 +42,13 @@ public final class ConnectionCosts { /** * @param scheme - scheme for loading resources (FILE or CLASSPATH). * @param path - where to load resources from, without the ".dat" suffix - * @deprecated replaced by {@link #ConnectionCosts(Path)} + * @deprecated replaced by {@link #ConnectionCosts(Path)} for files and {@link + * #ConnectionCosts(URL)} for classpath/module resources. */ @Deprecated(forRemoval = true, since = "9.1") @SuppressWarnings("removal") public ConnectionCosts(BinaryDictionary.ResourceScheme scheme, String path) throws IOException { - this( - scheme == BinaryDictionary.ResourceScheme.FILE - ? () -> Files.newInputStream(Paths.get(path + FILENAME_SUFFIX)) - : ConnectionCosts::getClassResource); + this(() -> BinaryDictionary.getResource(scheme, path.replace('.', '/') + FILENAME_SUFFIX)); } /** @@ -63,6 +61,17 @@ public ConnectionCosts(Path connectionCostsFile) throws IOException { this(() -> Files.newInputStream(connectionCostsFile)); } + /** + * Create a {@link ConnectionCosts} from an external resource URL (e.g. from Classpath with {@link + * ClassLoader#getResource(String)}). + * + * @param connectionCostsUrl where to load connection costs resource + * @throws IOException if resource was not found or broken + */ + public ConnectionCosts(URL connectionCostsUrl) throws IOException { + this(() -> connectionCostsUrl.openStream()); + } + private ConnectionCosts() throws IOException { this(ConnectionCosts::getClassResource); } diff --git a/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/dict/TokenInfoDictionary.java b/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/dict/TokenInfoDictionary.java index 55b4633b4488..42fa603d11c5 100644 --- a/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/dict/TokenInfoDictionary.java +++ b/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/dict/TokenInfoDictionary.java @@ -19,9 +19,9 @@ import java.io.BufferedInputStream; import java.io.IOException; import java.io.InputStream; +import java.net.URL; import java.nio.file.Files; import java.nio.file.Path; -import java.nio.file.Paths; import org.apache.lucene.store.DataInput; import org.apache.lucene.store.InputStreamDataInput; import org.apache.lucene.util.IOSupplier; @@ -41,27 +41,20 @@ public final class TokenInfoDictionary extends BinaryDictionary { /** * @param resourceScheme - scheme for loading resources (FILE or CLASSPATH). - * @param resourcePath - where to load resources (dictionaries) from. If null, with CLASSPATH - * scheme only, use this class's name as the path. - * @deprecated replaced by {@link #TokenInfoDictionary(Path, Path, Path, Path)} + * @param resourcePath - where to load resources (dictionaries) from. + * @deprecated replaced by {@link #TokenInfoDictionary(Path, Path, Path, Path)} for files and + * {@link #TokenInfoDictionary(URL, URL, URL, URL)} for classpath/module resources */ @Deprecated(forRemoval = true, since = "9.1") @SuppressWarnings("removal") public TokenInfoDictionary(ResourceScheme resourceScheme, String resourcePath) throws IOException { this( - resourceScheme == ResourceScheme.FILE - ? () -> Files.newInputStream(Paths.get(resourcePath + TARGETMAP_FILENAME_SUFFIX)) - : () -> getClassResource(TARGETMAP_FILENAME_SUFFIX), - resourceScheme == ResourceScheme.FILE - ? () -> Files.newInputStream(Paths.get(resourcePath + POSDICT_FILENAME_SUFFIX)) - : () -> getClassResource(POSDICT_FILENAME_SUFFIX), - resourceScheme == ResourceScheme.FILE - ? () -> Files.newInputStream(Paths.get(resourcePath + DICT_FILENAME_SUFFIX)) - : () -> getClassResource(DICT_FILENAME_SUFFIX), - resourceScheme == ResourceScheme.FILE - ? () -> Files.newInputStream(Paths.get(resourcePath + FST_FILENAME_SUFFIX)) - : () -> getClassResource(FST_FILENAME_SUFFIX)); + () -> + BinaryDictionary.getResource(resourceScheme, resourcePath + TARGETMAP_FILENAME_SUFFIX), + () -> BinaryDictionary.getResource(resourceScheme, resourcePath + POSDICT_FILENAME_SUFFIX), + () -> BinaryDictionary.getResource(resourceScheme, resourcePath + DICT_FILENAME_SUFFIX), + () -> BinaryDictionary.getResource(resourceScheme, resourcePath + FST_FILENAME_SUFFIX)); } /** @@ -82,6 +75,25 @@ public TokenInfoDictionary(Path targetMapFile, Path posDictFile, Path dictFile, () -> Files.newInputStream(fstFile)); } + /** + * Create a {@link TokenInfoDictionary} from an external resource URL (e.g. from Classpath with + * {@link ClassLoader#getResource(String)}). + * + * @param targetMapUrl where to load target map resource + * @param posDictUrl where to load POS dictionary resource + * @param dictUrl where to load dictionary entries resource + * @param fstUrl where to load encoded FST data resource + * @throws IOException if resource was not found or broken + */ + public TokenInfoDictionary(URL targetMapUrl, URL posDictUrl, URL dictUrl, URL fstUrl) + throws IOException { + this( + () -> targetMapUrl.openStream(), + () -> posDictUrl.openStream(), + () -> dictUrl.openStream(), + () -> fstUrl.openStream()); + } + private TokenInfoDictionary() throws IOException { this( () -> getClassResource(TARGETMAP_FILENAME_SUFFIX), diff --git a/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/dict/UnknownDictionary.java b/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/dict/UnknownDictionary.java index e5cff9209e36..bb7c3b5b4b57 100644 --- a/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/dict/UnknownDictionary.java +++ b/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/dict/UnknownDictionary.java @@ -18,9 +18,9 @@ import java.io.IOException; import java.io.InputStream; +import java.net.URL; import java.nio.file.Files; import java.nio.file.Path; -import java.nio.file.Paths; import org.apache.lucene.util.IOUtils; /** Dictionary for unknown-word handling. */ @@ -32,21 +32,16 @@ public final class UnknownDictionary extends BinaryDictionary { * @param scheme scheme for loading resources (FILE or CLASSPATH). * @param path where to load resources from; a path, including the file base name without * extension; this is used to match multiple files with the same base name. - * @deprecated replaced by {@link #UnknownDictionary(Path, Path, Path)} + * @deprecated replaced by {@link #UnknownDictionary(Path, Path, Path)} for files and {@link + * #UnknownDictionary(URL, URL, URL)} for classpath/module resources */ @Deprecated(forRemoval = true, since = "9.1") @SuppressWarnings("removal") public UnknownDictionary(ResourceScheme scheme, String path) throws IOException { super( - scheme == ResourceScheme.FILE - ? () -> Files.newInputStream(Paths.get(path + TARGETMAP_FILENAME_SUFFIX)) - : () -> getClassResource(TARGETMAP_FILENAME_SUFFIX), - scheme == ResourceScheme.FILE - ? () -> Files.newInputStream(Paths.get(path + POSDICT_FILENAME_SUFFIX)) - : () -> getClassResource(POSDICT_FILENAME_SUFFIX), - scheme == ResourceScheme.FILE - ? () -> Files.newInputStream(Paths.get(path + DICT_FILENAME_SUFFIX)) - : () -> getClassResource(DICT_FILENAME_SUFFIX)); + () -> BinaryDictionary.getResource(scheme, path + TARGETMAP_FILENAME_SUFFIX), + () -> BinaryDictionary.getResource(scheme, path + POSDICT_FILENAME_SUFFIX), + () -> BinaryDictionary.getResource(scheme, path + DICT_FILENAME_SUFFIX)); } /** @@ -64,6 +59,20 @@ public UnknownDictionary(Path targetMapFile, Path posDictFile, Path dictFile) th () -> Files.newInputStream(dictFile)); } + /** + * Create a {@link UnknownDictionary} from an external resource URL (e.g. from Classpath with + * {@link ClassLoader#getResource(String)}). + * + * @param targetMapUrl where to load target map resource + * @param posDictUrl where to load POS dictionary resource + * @param dictUrl where to load dictionary entries resource + * @throws IOException if resource was not found or broken + */ + public UnknownDictionary(URL targetMapUrl, URL posDictUrl, URL dictUrl) throws IOException { + super( + () -> targetMapUrl.openStream(), () -> posDictUrl.openStream(), () -> dictUrl.openStream()); + } + private UnknownDictionary() throws IOException { super( () -> getClassResource(TARGETMAP_FILENAME_SUFFIX), diff --git a/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/dict/TestExternalDictionary.java b/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/dict/TestExternalDictionary.java index bc44723996ac..0209154d09d8 100644 --- a/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/dict/TestExternalDictionary.java +++ b/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/dict/TestExternalDictionary.java @@ -32,6 +32,7 @@ public class TestExternalDictionary extends LuceneTestCase { private Path dir; + private ClassLoader loader = getClass().getClassLoader(); @Override @Before @@ -98,4 +99,59 @@ public void testLoadExternalConnectionCosts() throws Exception { new ConnectionCosts(dir.resolve(dictionaryPath + ConnectionCosts.FILENAME_SUFFIX)); assertEquals(1, cc.get(0, 1)); } + + public void testLoadExternalUrlTokenInfoDictionary() throws Exception { + String dictionaryPath = TokenInfoDictionary.class.getName().replace('.', '/'); + TokenInfoDictionary dict = + new TokenInfoDictionary( + loader.getResource(dictionaryPath + TARGETMAP_FILENAME_SUFFIX), + loader.getResource(dictionaryPath + POSDICT_FILENAME_SUFFIX), + loader.getResource(dictionaryPath + DICT_FILENAME_SUFFIX), + loader.getResource(dictionaryPath + FST_FILENAME_SUFFIX)); + assertNotNull(dict.getFST()); + } + + public void testLoadExternalUrlUnknownDictionary() throws Exception { + String dictionaryPath = UnknownDictionary.class.getName().replace('.', '/'); + UnknownDictionary dict = + new UnknownDictionary( + loader.getResource(dictionaryPath + TARGETMAP_FILENAME_SUFFIX), + loader.getResource(dictionaryPath + POSDICT_FILENAME_SUFFIX), + loader.getResource(dictionaryPath + DICT_FILENAME_SUFFIX)); + assertNotNull(dict.getCharacterDefinition()); + } + + public void testLoadExternalUrlConnectionCosts() throws Exception { + String dictionaryPath = ConnectionCosts.class.getName().replace('.', '/'); + ConnectionCosts cc = + new ConnectionCosts(loader.getResource(dictionaryPath + ConnectionCosts.FILENAME_SUFFIX)); + assertEquals(1, cc.get(0, 1)); + } + + @Deprecated(forRemoval = true, since = "9.1") + @SuppressWarnings("removal") + public void testDeprecatedLoadExternalTokenInfoDictionary() throws Exception { + String dictionaryPath = TokenInfoDictionary.class.getName().replace('.', '/'); + TokenInfoDictionary dict = + new TokenInfoDictionary(BinaryDictionary.ResourceScheme.CLASSPATH, dictionaryPath); + assertNotNull(dict.getFST()); + } + + @Deprecated(forRemoval = true, since = "9.1") + @SuppressWarnings("removal") + public void testDeprecatedLoadExternalUnknownDictionary() throws Exception { + String dictionaryPath = UnknownDictionary.class.getName().replace('.', '/'); + UnknownDictionary dict = + new UnknownDictionary(BinaryDictionary.ResourceScheme.CLASSPATH, dictionaryPath); + assertNotNull(dict.getCharacterDefinition()); + } + + @Deprecated(forRemoval = true, since = "9.1") + @SuppressWarnings("removal") + public void testDeprecatedLoadExternalConnectionCosts() throws Exception { + String dictionaryPath = ConnectionCosts.class.getName().replace('.', '/'); + ConnectionCosts cc = + new ConnectionCosts(BinaryDictionary.ResourceScheme.CLASSPATH, dictionaryPath); + assertEquals(1, cc.get(0, 1)); + } } diff --git a/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/dict/BinaryDictionary.java b/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/dict/BinaryDictionary.java index aba6782182bb..767eba2e6fda 100644 --- a/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/dict/BinaryDictionary.java +++ b/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/dict/BinaryDictionary.java @@ -23,11 +23,15 @@ import java.nio.ByteBuffer; import java.nio.channels.Channels; import java.nio.channels.ReadableByteChannel; +import java.nio.file.Files; +import java.nio.file.Paths; +import java.util.Objects; import org.apache.lucene.analysis.ko.POS; import org.apache.lucene.codecs.CodecUtil; import org.apache.lucene.store.DataInput; import org.apache.lucene.store.InputStreamDataInput; import org.apache.lucene.util.IOSupplier; +import org.apache.lucene.util.IOUtils; import org.apache.lucene.util.IntsRef; /** Base class for a binary-encoded in-memory dictionary. */ @@ -114,6 +118,23 @@ private static void populateTargetMap(DataInput in, int[] targetMap, int[] targe targetMapOffsets[sourceId] = targetMap.length; } + @Deprecated(forRemoval = true, since = "9.1") + public static final InputStream getResource(ResourceScheme scheme, String path) + throws IOException { + switch (scheme) { + case CLASSPATH: + Objects.requireNonNull( + path, + "Deprecated API no longer works with null paths, to load default resources use default ctors."); + return IOUtils.requireResourceNonNull( + BinaryDictionary.class.getClassLoader().getResourceAsStream(path), path); + case FILE: + return Files.newInputStream(Paths.get(path)); + default: + throw new IllegalStateException("unknown resource scheme " + scheme); + } + } + public void lookupWordIds(int sourceId, IntsRef ref) { ref.ints = targetMap; ref.offset = targetMapOffsets[sourceId]; diff --git a/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/dict/ConnectionCosts.java b/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/dict/ConnectionCosts.java index 41dc4aa713ad..6e68963a286b 100644 --- a/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/dict/ConnectionCosts.java +++ b/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/dict/ConnectionCosts.java @@ -19,10 +19,10 @@ import java.io.BufferedInputStream; import java.io.IOException; import java.io.InputStream; +import java.net.URL; import java.nio.ByteBuffer; import java.nio.file.Files; import java.nio.file.Path; -import java.nio.file.Paths; import org.apache.lucene.codecs.CodecUtil; import org.apache.lucene.store.DataInput; import org.apache.lucene.store.InputStreamDataInput; @@ -41,16 +41,14 @@ public final class ConnectionCosts { /** * @param scheme - scheme for loading resources (FILE or CLASSPATH). - * @param resourcePath - where to load resources from, without the ".dat" suffix + * @param path - where to load resources from, without the ".dat" suffix + * @deprecated replaced by {@link #ConnectionCosts(Path)} for files and {@link + * #ConnectionCosts(URL)} for classpath/module resources. */ @Deprecated(forRemoval = true, since = "9.1") @SuppressWarnings("removal") - public ConnectionCosts(BinaryDictionary.ResourceScheme scheme, String resourcePath) - throws IOException { - this( - scheme == BinaryDictionary.ResourceScheme.FILE - ? () -> Files.newInputStream(Paths.get(resourcePath + FILENAME_SUFFIX)) - : ConnectionCosts::getClassResource); + public ConnectionCosts(BinaryDictionary.ResourceScheme scheme, String path) throws IOException { + this(() -> BinaryDictionary.getResource(scheme, path.replace('.', '/') + FILENAME_SUFFIX)); } /** @@ -63,6 +61,17 @@ public ConnectionCosts(Path connectionCostsFile) throws IOException { this(() -> Files.newInputStream(connectionCostsFile)); } + /** + * Create a {@link ConnectionCosts} from an external resource URL (e.g. from Classpath with {@link + * ClassLoader#getResource(String)}). + * + * @param connectionCostsUrl where to load connection costs resource + * @throws IOException if resource was not found or broken + */ + public ConnectionCosts(URL connectionCostsUrl) throws IOException { + this(() -> connectionCostsUrl.openStream()); + } + private ConnectionCosts() throws IOException { this(ConnectionCosts::getClassResource); } diff --git a/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/dict/TokenInfoDictionary.java b/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/dict/TokenInfoDictionary.java index c5182a5123b1..910ad3f4f0ac 100644 --- a/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/dict/TokenInfoDictionary.java +++ b/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/dict/TokenInfoDictionary.java @@ -19,9 +19,9 @@ import java.io.BufferedInputStream; import java.io.IOException; import java.io.InputStream; +import java.net.URL; import java.nio.file.Files; import java.nio.file.Path; -import java.nio.file.Paths; import org.apache.lucene.store.DataInput; import org.apache.lucene.store.InputStreamDataInput; import org.apache.lucene.util.IOSupplier; @@ -49,26 +49,20 @@ private TokenInfoDictionary() throws IOException { /** * @param resourceScheme - scheme for loading resources (FILE or CLASSPATH). - * @param resourcePath - where to load resources (dictionaries) from. If null, with CLASSPATH - * scheme only, use this class's name as the path. + * @param resourcePath - where to load resources (dictionaries) from. + * @deprecated replaced by {@link #TokenInfoDictionary(Path, Path, Path, Path)} for files and + * {@link #TokenInfoDictionary(URL, URL, URL, URL)} for classpath/module resources */ @Deprecated(forRemoval = true, since = "9.1") @SuppressWarnings("removal") public TokenInfoDictionary(ResourceScheme resourceScheme, String resourcePath) throws IOException { this( - resourceScheme == ResourceScheme.FILE - ? () -> Files.newInputStream(Paths.get(resourcePath + TARGETMAP_FILENAME_SUFFIX)) - : () -> getClassResource(TARGETMAP_FILENAME_SUFFIX), - resourceScheme == ResourceScheme.FILE - ? () -> Files.newInputStream(Paths.get(resourcePath + POSDICT_FILENAME_SUFFIX)) - : () -> getClassResource(POSDICT_FILENAME_SUFFIX), - resourceScheme == ResourceScheme.FILE - ? () -> Files.newInputStream(Paths.get(resourcePath + DICT_FILENAME_SUFFIX)) - : () -> getClassResource(DICT_FILENAME_SUFFIX), - resourceScheme == ResourceScheme.FILE - ? () -> Files.newInputStream(Paths.get(resourcePath + FST_FILENAME_SUFFIX)) - : () -> getClassResource(FST_FILENAME_SUFFIX)); + () -> + BinaryDictionary.getResource(resourceScheme, resourcePath + TARGETMAP_FILENAME_SUFFIX), + () -> BinaryDictionary.getResource(resourceScheme, resourcePath + POSDICT_FILENAME_SUFFIX), + () -> BinaryDictionary.getResource(resourceScheme, resourcePath + DICT_FILENAME_SUFFIX), + () -> BinaryDictionary.getResource(resourceScheme, resourcePath + FST_FILENAME_SUFFIX)); } /** @@ -89,6 +83,25 @@ public TokenInfoDictionary(Path targetMapFile, Path posDictFile, Path dictFile, () -> Files.newInputStream(fstFile)); } + /** + * Create a {@link TokenInfoDictionary} from an external resource URL (e.g. from Classpath with + * {@link ClassLoader#getResource(String)}). + * + * @param targetMapUrl where to load target map resource + * @param posDictUrl where to load POS dictionary resource + * @param dictUrl where to load dictionary entries resource + * @param fstUrl where to load encoded FST data resource + * @throws IOException if resource was not found or broken + */ + public TokenInfoDictionary(URL targetMapUrl, URL posDictUrl, URL dictUrl, URL fstUrl) + throws IOException { + this( + () -> targetMapUrl.openStream(), + () -> posDictUrl.openStream(), + () -> dictUrl.openStream(), + () -> fstUrl.openStream()); + } + private TokenInfoDictionary( IOSupplier targetMapResource, IOSupplier posResource, diff --git a/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/dict/UnknownDictionary.java b/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/dict/UnknownDictionary.java index 10eb1611b824..d7ad14de2cd4 100644 --- a/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/dict/UnknownDictionary.java +++ b/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/dict/UnknownDictionary.java @@ -18,9 +18,9 @@ import java.io.IOException; import java.io.InputStream; +import java.net.URL; import java.nio.file.Files; import java.nio.file.Path; -import java.nio.file.Paths; import org.apache.lucene.util.IOUtils; /** Dictionary for unknown-word handling. */ @@ -31,20 +31,16 @@ public final class UnknownDictionary extends BinaryDictionary { * @param scheme scheme for loading resources (FILE or CLASSPATH). * @param resourcePath where to load resources from; a path, including the file base name without * extension; this is used to match multiple files with the same base name. + * @deprecated replaced by {@link #UnknownDictionary(Path, Path, Path)} for files and {@link + * #UnknownDictionary(URL, URL, URL)} for classpath/module resources */ @Deprecated(forRemoval = true, since = "9.1") @SuppressWarnings("removal") public UnknownDictionary(ResourceScheme scheme, String resourcePath) throws IOException { super( - scheme == ResourceScheme.FILE - ? () -> Files.newInputStream(Paths.get(resourcePath + TARGETMAP_FILENAME_SUFFIX)) - : () -> getClassResource(TARGETMAP_FILENAME_SUFFIX), - scheme == ResourceScheme.FILE - ? () -> Files.newInputStream(Paths.get(resourcePath + POSDICT_FILENAME_SUFFIX)) - : () -> getClassResource(POSDICT_FILENAME_SUFFIX), - scheme == ResourceScheme.FILE - ? () -> Files.newInputStream(Paths.get(resourcePath + DICT_FILENAME_SUFFIX)) - : () -> getClassResource(DICT_FILENAME_SUFFIX)); + () -> BinaryDictionary.getResource(scheme, resourcePath + TARGETMAP_FILENAME_SUFFIX), + () -> BinaryDictionary.getResource(scheme, resourcePath + POSDICT_FILENAME_SUFFIX), + () -> BinaryDictionary.getResource(scheme, resourcePath + DICT_FILENAME_SUFFIX)); } /** @@ -62,6 +58,20 @@ public UnknownDictionary(Path targetMapFile, Path posDictFile, Path dictFile) th () -> Files.newInputStream(dictFile)); } + /** + * Create a {@link UnknownDictionary} from an external resource URL (e.g. from Classpath with + * {@link ClassLoader#getResource(String)}). + * + * @param targetMapUrl where to load target map resource + * @param posDictUrl where to load POS dictionary resource + * @param dictUrl where to load dictionary entries resource + * @throws IOException if resource was not found or broken + */ + public UnknownDictionary(URL targetMapUrl, URL posDictUrl, URL dictUrl) throws IOException { + super( + () -> targetMapUrl.openStream(), () -> posDictUrl.openStream(), () -> dictUrl.openStream()); + } + private UnknownDictionary() throws IOException { super( () -> getClassResource(TARGETMAP_FILENAME_SUFFIX), diff --git a/lucene/analysis/nori/src/test/org/apache/lucene/analysis/ko/dict/TestExternalDictionary.java b/lucene/analysis/nori/src/test/org/apache/lucene/analysis/ko/dict/TestExternalDictionary.java index 5f8edab8934b..ba749e924611 100644 --- a/lucene/analysis/nori/src/test/org/apache/lucene/analysis/ko/dict/TestExternalDictionary.java +++ b/lucene/analysis/nori/src/test/org/apache/lucene/analysis/ko/dict/TestExternalDictionary.java @@ -32,6 +32,7 @@ public class TestExternalDictionary extends LuceneTestCase { private Path dir; + private ClassLoader loader = getClass().getClassLoader(); @Override @Before @@ -98,4 +99,59 @@ public void testLoadExternalConnectionCosts() throws Exception { new ConnectionCosts(dir.resolve(dictionaryPath + ConnectionCosts.FILENAME_SUFFIX)); assertEquals(0, cc.get(1, 1)); } + + public void testLoadExternalUrlTokenInfoDictionary() throws Exception { + String dictionaryPath = TokenInfoDictionary.class.getName().replace('.', '/'); + TokenInfoDictionary dict = + new TokenInfoDictionary( + loader.getResource(dictionaryPath + TARGETMAP_FILENAME_SUFFIX), + loader.getResource(dictionaryPath + POSDICT_FILENAME_SUFFIX), + loader.getResource(dictionaryPath + DICT_FILENAME_SUFFIX), + loader.getResource(dictionaryPath + FST_FILENAME_SUFFIX)); + assertNotNull(dict.getFST()); + } + + public void testLoadExternalUrlUnknownDictionary() throws Exception { + String dictionaryPath = UnknownDictionary.class.getName().replace('.', '/'); + UnknownDictionary dict = + new UnknownDictionary( + loader.getResource(dictionaryPath + TARGETMAP_FILENAME_SUFFIX), + loader.getResource(dictionaryPath + POSDICT_FILENAME_SUFFIX), + loader.getResource(dictionaryPath + DICT_FILENAME_SUFFIX)); + assertNotNull(dict.getCharacterDefinition()); + } + + public void testLoadExternalUrlConnectionCosts() throws Exception { + String dictionaryPath = ConnectionCosts.class.getName().replace('.', '/'); + ConnectionCosts cc = + new ConnectionCosts(loader.getResource(dictionaryPath + ConnectionCosts.FILENAME_SUFFIX)); + assertEquals(0, cc.get(1, 1)); + } + + @Deprecated(forRemoval = true, since = "9.1") + @SuppressWarnings("removal") + public void testDeprecatedLoadExternalTokenInfoDictionary() throws Exception { + String dictionaryPath = TokenInfoDictionary.class.getName().replace('.', '/'); + TokenInfoDictionary dict = + new TokenInfoDictionary(BinaryDictionary.ResourceScheme.CLASSPATH, dictionaryPath); + assertNotNull(dict.getFST()); + } + + @Deprecated(forRemoval = true, since = "9.1") + @SuppressWarnings("removal") + public void testDeprecatedLoadExternalUnknownDictionary() throws Exception { + String dictionaryPath = UnknownDictionary.class.getName().replace('.', '/'); + UnknownDictionary dict = + new UnknownDictionary(BinaryDictionary.ResourceScheme.CLASSPATH, dictionaryPath); + assertNotNull(dict.getCharacterDefinition()); + } + + @Deprecated(forRemoval = true, since = "9.1") + @SuppressWarnings("removal") + public void testDeprecatedLoadExternalConnectionCosts() throws Exception { + String dictionaryPath = ConnectionCosts.class.getName().replace('.', '/'); + ConnectionCosts cc = + new ConnectionCosts(BinaryDictionary.ResourceScheme.CLASSPATH, dictionaryPath); + assertEquals(0, cc.get(1, 1)); + } }