Skip to content

Commit

Permalink
merge .util package to .dict
Browse files Browse the repository at this point in the history
  • Loading branch information
mocobeta committed Mar 19, 2022
1 parent 4387d3f commit 67ed016
Show file tree
Hide file tree
Showing 43 changed files with 55 additions and 122 deletions.
2 changes: 1 addition & 1 deletion gradle/generation/kuromoji.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@

def recompileDictionary(project, dictionaryName, Closure closure) {
project.javaexec {
main = "org.apache.lucene.analysis.ja.util.DictionaryBuilder"
main = "org.apache.lucene.analysis.ja.dict.DictionaryBuilder"
classpath = project.sourceSets.main.runtimeClasspath

jvmArgs '-Xmx1G'
Expand Down
2 changes: 1 addition & 1 deletion gradle/generation/nori.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@

def recompileDictionary(project, dictionaryName, Closure closure) {
project.javaexec {
main = "org.apache.lucene.analysis.ko.util.DictionaryBuilder"
main = "org.apache.lucene.analysis.ko.dict.DictionaryBuilder"
classpath = project.sourceSets.main.runtimeClasspath

jvmArgs '-Xmx1G'
Expand Down
1 change: 0 additions & 1 deletion lucene/analysis/kuromoji/src/java/module-info.java
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,6 @@
exports org.apache.lucene.analysis.ja.completion;
exports org.apache.lucene.analysis.ja.dict;
exports org.apache.lucene.analysis.ja.tokenattributes;
exports org.apache.lucene.analysis.ja.util;

opens org.apache.lucene.analysis.ja to
org.apache.lucene.core;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,8 @@
import java.io.IOException;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.ja.dict.ToStringUtil;
import org.apache.lucene.analysis.ja.tokenattributes.ReadingAttribute;
import org.apache.lucene.analysis.ja.util.ToStringUtil;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.ja.dict.CharacterDefinition;
import org.apache.lucene.analysis.ja.dict.ConnectionCosts;
import org.apache.lucene.analysis.ja.dict.DictionaryBuilder;
import org.apache.lucene.analysis.ja.dict.JaMorphData;
import org.apache.lucene.analysis.ja.dict.TokenInfoDictionary;
import org.apache.lucene.analysis.ja.dict.TokenInfoFST;
Expand Down Expand Up @@ -264,8 +265,7 @@ public JapaneseTokenizer(
/**
* Create a new JapaneseTokenizer, supplying a custom system dictionary and unknown dictionary.
* This constructor provides an entry point for users that want to construct custom language
* models that can be used as input to {@link
* org.apache.lucene.analysis.ja.util.DictionaryBuilder}.
* models that can be used as input to {@link DictionaryBuilder}.
*
* @param factory the AttributeFactory to use
* @param systemDictionary a custom known token dictionary
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,14 +14,14 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.analysis.ja.util;
package org.apache.lucene.analysis.ja.dict;

import java.util.ArrayList;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

/** Utility class for parsing CSV text */
public final class CSVUtil {
final class CSVUtil {
private static final char QUOTE = '"';

private static final char COMMA = ',';
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,18 +14,17 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.analysis.ja.util;
package org.apache.lucene.analysis.ja.dict;

import java.io.IOException;
import java.io.LineNumberReader;
import java.io.Reader;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.Path;
import org.apache.lucene.analysis.ja.dict.ConnectionCosts;
import org.apache.lucene.analysis.morph.ConnectionCostsWriter;

class ConnectionCostsBuilder {
final class ConnectionCostsBuilder {

private ConnectionCostsBuilder() {}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,19 +14,18 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.analysis.ja.util;
package org.apache.lucene.analysis.ja.dict;

import java.io.IOException;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.Locale;
import org.apache.lucene.analysis.ja.dict.DictionaryConstants;

/**
* Tool to build dictionaries. Usage:
*
* <pre>
* java -cp [lucene classpath] org.apache.lucene.analysis.ja.util.DictionaryBuilder \
* java -cp [lucene classpath] org.apache.lucene.analysis.ja.dict.DictionaryBuilder \
* ${inputDir} ${outputDir} ${encoding} ${normalizeEntry}
* </pre>
*
Expand All @@ -44,7 +43,7 @@
*
* @lucene.experimental
*/
public class DictionaryBuilder {
public final class DictionaryBuilder {

/** Format of the dictionary. */
public enum DictionaryFormat {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
package org.apache.lucene.analysis.ja.dict;

/** Dictionary constants */
public final class DictionaryConstants {
final class DictionaryConstants {
/** Codec header of the dictionary file. */
public static final String DICT_HEADER = "kuromoji_dict";
/** Codec header of the dictionary mapping file. */
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,13 +14,13 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.analysis.ja.util;
package org.apache.lucene.analysis.ja.dict;

import java.io.IOException;
import java.util.HashMap;

/** Utility class for english translations of morphological data, used only for debugging. */
public class ToStringUtil {
public final class ToStringUtil {
// a translation map for parts of speech, only used for reflectWith
private static final HashMap<String, String> posTranslations = new HashMap<>();

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.analysis.ja.util;
package org.apache.lucene.analysis.ja.dict;

import java.io.BufferedReader;
import java.io.IOException;
Expand All @@ -28,14 +28,14 @@
import java.util.List;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import org.apache.lucene.analysis.ja.util.DictionaryBuilder.DictionaryFormat;
import org.apache.lucene.analysis.ja.dict.DictionaryBuilder.DictionaryFormat;
import org.apache.lucene.util.IntsRefBuilder;
import org.apache.lucene.util.fst.FST;
import org.apache.lucene.util.fst.FSTCompiler;
import org.apache.lucene.util.fst.PositiveIntOutputs;

/** */
class TokenInfoDictionaryBuilder {
final class TokenInfoDictionaryBuilder {

private final String encoding;
private final Normalizer.Form normalForm;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,18 +14,17 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.analysis.ja.util;
package org.apache.lucene.analysis.ja.dict;

import java.io.IOException;
import java.io.OutputStream;
import java.nio.ByteBuffer;
import org.apache.lucene.analysis.ja.dict.TokenInfoMorphData;
import org.apache.lucene.analysis.morph.DictionaryEntryWriter;
import org.apache.lucene.store.DataOutput;
import org.apache.lucene.util.ArrayUtil;

/** Writes system dictionary entries */
class TokenInfoDictionaryEntryWriter extends DictionaryEntryWriter {
final class TokenInfoDictionaryEntryWriter extends DictionaryEntryWriter {
private static final int ID_LIMIT = 8192;

TokenInfoDictionaryEntryWriter(int size) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,17 +14,15 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.analysis.ja.util;
package org.apache.lucene.analysis.ja.dict;

import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.Objects;
import org.apache.lucene.analysis.ja.dict.DictionaryConstants;
import org.apache.lucene.analysis.ja.dict.TokenInfoDictionary;
import org.apache.lucene.util.fst.FST;

class TokenInfoDictionaryWriter
final class TokenInfoDictionaryWriter
extends org.apache.lucene.analysis.morph.BinaryDictionaryWriter<TokenInfoDictionary> {
private FST<Long> fst;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
import org.apache.lucene.util.IOSupplier;

/** Morphological information for system dictionary. */
public class TokenInfoMorphData implements JaMorphData {
class TokenInfoMorphData implements JaMorphData {

private final ByteBuffer buffer;
private final String[] posDict;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.analysis.ja.util;
package org.apache.lucene.analysis.ja.dict;

import java.io.IOException;
import java.io.LineNumberReader;
Expand All @@ -25,9 +25,8 @@
import java.util.ArrayList;
import java.util.Comparator;
import java.util.List;
import org.apache.lucene.analysis.ja.dict.CharacterDefinition;

class UnknownDictionaryBuilder {
final class UnknownDictionaryBuilder {
private static final String NGRAM_DICTIONARY_ENTRY = "NGRAM,5,5,-32768,記号,一般,*,*,*,*,*,*,*";

private final String encoding;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,17 +14,14 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.analysis.ja.util;
package org.apache.lucene.analysis.ja.dict;

import java.io.IOException;
import java.nio.file.Path;
import org.apache.lucene.analysis.ja.dict.CharacterDefinition;
import org.apache.lucene.analysis.ja.dict.DictionaryConstants;
import org.apache.lucene.analysis.ja.dict.UnknownDictionary;
import org.apache.lucene.analysis.morph.BinaryDictionaryWriter;
import org.apache.lucene.analysis.morph.CharacterDefinitionWriter;

class UnknownDictionaryWriter extends BinaryDictionaryWriter<UnknownDictionary> {
final class UnknownDictionaryWriter extends BinaryDictionaryWriter<UnknownDictionary> {
private final CharacterDefinitionWriter<CharacterDefinition> characterDefinition =
new CharacterDefinitionWriter<>(
CharacterDefinition.class,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,6 @@
import java.util.List;
import java.util.Map;
import java.util.TreeMap;
import org.apache.lucene.analysis.ja.util.CSVUtil;
import org.apache.lucene.analysis.morph.Dictionary;
import org.apache.lucene.util.IntsRefBuilder;
import org.apache.lucene.util.fst.FST;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,6 @@
import static org.apache.lucene.analysis.ja.dict.UserDictionary.CUSTOM_DICTIONARY_WORD_ID_OFFSET;
import static org.apache.lucene.analysis.ja.dict.UserDictionary.INTERNAL_SEPARATOR;

import org.apache.lucene.analysis.ja.util.CSVUtil;

/** Morphological information for user dictionary. */
final class UserMorphData implements JaMorphData {
public static final int WORD_COST = -100000;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
package org.apache.lucene.analysis.ja.tokenattributes;

import org.apache.lucene.analysis.ja.Token;
import org.apache.lucene.analysis.ja.util.ToStringUtil;
import org.apache.lucene.analysis.ja.dict.ToStringUtil;
import org.apache.lucene.util.AttributeImpl;
import org.apache.lucene.util.AttributeReflector;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
package org.apache.lucene.analysis.ja.tokenattributes;

import org.apache.lucene.analysis.ja.Token;
import org.apache.lucene.analysis.ja.util.ToStringUtil;
import org.apache.lucene.analysis.ja.dict.ToStringUtil;
import org.apache.lucene.util.AttributeImpl;
import org.apache.lucene.util.AttributeReflector;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
package org.apache.lucene.analysis.ja.tokenattributes;

import org.apache.lucene.analysis.ja.Token;
import org.apache.lucene.analysis.ja.util.ToStringUtil;
import org.apache.lucene.analysis.ja.dict.ToStringUtil;
import org.apache.lucene.util.AttributeImpl;
import org.apache.lucene.util.AttributeReflector;

Expand Down

This file was deleted.

Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,9 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.analysis.ja;
package org.apache.lucene.analysis.ja.dict;

import java.io.IOException;
import org.apache.lucene.analysis.ja.util.CSVUtil;
import org.apache.lucene.tests.util.LuceneTestCase;

/*
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,6 @@
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.Path;
import org.apache.lucene.analysis.ja.util.DictionaryBuilder;
import org.apache.lucene.tests.util.LuceneTestCase;
import org.junit.Before;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.analysis.ja.util;
package org.apache.lucene.analysis.ja.dict;

import java.util.HashMap;
import java.util.Map;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,9 +27,7 @@
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.Path;
import org.apache.lucene.analysis.ja.util.DictionaryBuilder;
import org.apache.lucene.analysis.ja.util.DictionaryBuilder.DictionaryFormat;
import org.apache.lucene.analysis.ja.util.ToStringUtil;
import org.apache.lucene.analysis.ja.dict.DictionaryBuilder.DictionaryFormat;
import org.apache.lucene.tests.util.LuceneTestCase;
import org.apache.lucene.util.IntsRef;
import org.apache.lucene.util.IntsRefBuilder;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.analysis.ja.util;
package org.apache.lucene.analysis.ja.dict;

import org.apache.lucene.tests.util.LuceneTestCase;
import org.junit.Test;
Expand Down
1 change: 0 additions & 1 deletion lucene/analysis/nori/src/java/module-info.java
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,6 @@
exports org.apache.lucene.analysis.ko;
exports org.apache.lucene.analysis.ko.dict;
exports org.apache.lucene.analysis.ko.tokenattributes;
exports org.apache.lucene.analysis.ko.util;

provides org.apache.lucene.analysis.TokenizerFactory with
org.apache.lucene.analysis.ko.KoreanTokenizerFactory;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -197,7 +197,7 @@ public KoreanTokenizer(
/**
* Create a new KoreanTokenizer supplying a custom system dictionary and unknown dictionary. This
* constructor provides an entry point for users that want to construct custom language models
* that can be used as input to {@link org.apache.lucene.analysis.ko.util.DictionaryBuilder}.
* that can be used as input to {@link org.apache.lucene.analysis.ko.dict.DictionaryBuilder}.
*
* @param factory the AttributeFactory to use
* @param systemDictionary a custom known token dictionary
Expand Down
Loading

0 comments on commit 67ed016

Please sign in to comment.