From bd22f199dec5ab767da7481f1ea9c1cd0195bb9f Mon Sep 17 00:00:00 2001
From: Tomoko Uchida <tomoko.uchida.1111@gmail.com>
Date: Fri, 25 Mar 2022 18:44:36 +0900
Subject: [PATCH] LUCENE-10393: Unify binary dictionary and dictionary writer
 in kuromoji and nori (#740)

---
 lucene/CHANGES.txt                            |   3 +
 .../analysis/common/src/java/module-info.java |   1 +
 .../analysis/morph/BinaryDictionary.java      | 100 ++++++
 .../morph/BinaryDictionaryWriter.java         | 148 ++++++++
 .../analysis/morph/CharacterDefinition.java   |  73 ++++
 .../morph}/CharacterDefinitionWriter.java     |  52 ++-
 .../analysis/morph/ConnectionCosts.java       |  66 ++++
 .../morph}/ConnectionCostsWriter.java         |  23 +-
 .../lucene/analysis/morph/Dictionary.java     |  49 +++
 .../analysis/morph/DictionaryEntryWriter.java |  79 +++++
 .../lucene/analysis/morph/MorphData.java      |  41 +++
 .../lucene/analysis/morph/package-info.java   |  19 +
 .../lucene/analysis/ja/GraphvizFormatter.java |   5 +-
 .../lucene/analysis/ja/JapaneseTokenizer.java |  55 +--
 .../org/apache/lucene/analysis/ja/Token.java  |  22 +-
 .../analysis/ja/dict/CharacterDefinition.java |  43 +--
 .../analysis/ja/dict/ConnectionCosts.java     |  40 +--
 .../analysis/ja/dict/DictionaryConstants.java |  33 ++
 .../{Dictionary.java => JaMorphData.java}     |  50 +--
 .../analysis/ja/dict/TokenInfoDictionary.java |  18 +-
 ...ictionary.java => TokenInfoMorphData.java} | 133 ++-----
 .../analysis/ja/dict/UnknownDictionary.java   |  43 ++-
 .../analysis/ja/dict/UnknownMorphData.java    |  44 +++
 .../analysis/ja/dict/UserDictionary.java      |  96 +----
 .../analysis/ja/dict/UserMorphData.java       | 110 ++++++
 .../ja/util/BinaryDictionaryWriter.java       | 334 ------------------
 .../ja/util/CharacterDefinitionWriter.java    |  83 -----
 .../ja/util/ConnectionCostsBuilder.java       |   7 +-
 .../analysis/ja/util/DictionaryBuilder.java   |   4 +-
 .../util/TokenInfoDictionaryEntryWriter.java  | 237 +++++++++++++
 .../ja/util/TokenInfoDictionaryWriter.java    |  18 +-
 .../ja/util/UnknownDictionaryWriter.java      |  27 +-
 .../ja/dict/TestExternalDictionary.java       |   6 +-
 .../ja/dict/TestTokenInfoDictionary.java      |  19 +-
 .../analysis/ja/dict/TestUserDictionary.java  |  11 +-
 .../lucene/analysis/ko/DecompoundToken.java   |   4 +-
 .../lucene/analysis/ko/DictionaryToken.java   |  24 +-
 .../lucene/analysis/ko/GraphvizFormatter.java |   5 +-
 .../lucene/analysis/ko/KoreanTokenizer.java   |  43 ++-
 .../org/apache/lucene/analysis/ko/Token.java  |   9 +-
 .../analysis/ko/dict/BinaryDictionary.java    | 220 ------------
 .../analysis/ko/dict/CharacterDefinition.java |  43 +--
 .../analysis/ko/dict/ConnectionCosts.java     |  40 +--
 .../analysis/ko/dict/DictionaryConstants.java |  33 ++
 .../{Dictionary.java => KoMorphData.java}     |  46 ++-
 .../analysis/ko/dict/TokenInfoDictionary.java |  17 +-
 .../analysis/ko/dict/TokenInfoMorphData.java  | 155 ++++++++
 .../analysis/ko/dict/UnknownDictionary.java   |  32 +-
 .../analysis/ko/dict/UnknownMorphData.java    |  39 ++
 .../analysis/ko/dict/UserDictionary.java      |  89 +----
 .../analysis/ko/dict/UserMorphData.java       |  90 +++++
 .../PartOfSpeechAttribute.java                |   9 +-
 .../PartOfSpeechAttributeImpl.java            |   8 +-
 .../ko/util/ConnectionCostsBuilder.java       |   7 +-
 .../ko/util/ConnectionCostsWriter.java        |  68 ----
 .../analysis/ko/util/DictionaryBuilder.java   |   4 +-
 ...va => TokenInfoDictionaryEntryWriter.java} | 156 ++------
 .../ko/util/TokenInfoDictionaryWriter.java    |  18 +-
 .../ko/util/UnknownDictionaryWriter.java      |  27 +-
 .../ko/dict/TestExternalDictionary.java       |   6 +-
 .../ko/dict/TestTokenInfoDictionary.java      |  19 +-
 .../analysis/ko/dict/TestUserDictionary.java  |  10 +-
 62 files changed, 1821 insertions(+), 1492 deletions(-)
 create mode 100644 lucene/analysis/common/src/java/org/apache/lucene/analysis/morph/BinaryDictionary.java
 create mode 100644 lucene/analysis/common/src/java/org/apache/lucene/analysis/morph/BinaryDictionaryWriter.java
 create mode 100644 lucene/analysis/common/src/java/org/apache/lucene/analysis/morph/CharacterDefinition.java
 rename lucene/analysis/{nori/src/java/org/apache/lucene/analysis/ko/util => common/src/java/org/apache/lucene/analysis/morph}/CharacterDefinitionWriter.java (57%)
 create mode 100644 lucene/analysis/common/src/java/org/apache/lucene/analysis/morph/ConnectionCosts.java
 rename lucene/analysis/{kuromoji/src/java/org/apache/lucene/analysis/ja/util => common/src/java/org/apache/lucene/analysis/morph}/ConnectionCostsWriter.java (77%)
 create mode 100644 lucene/analysis/common/src/java/org/apache/lucene/analysis/morph/Dictionary.java
 create mode 100644 lucene/analysis/common/src/java/org/apache/lucene/analysis/morph/DictionaryEntryWriter.java
 create mode 100644 lucene/analysis/common/src/java/org/apache/lucene/analysis/morph/MorphData.java
 create mode 100644 lucene/analysis/common/src/java/org/apache/lucene/analysis/morph/package-info.java
 create mode 100644 lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/dict/DictionaryConstants.java
 rename lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/dict/{Dictionary.java => JaMorphData.java} (60%)
 rename lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/dict/{BinaryDictionary.java => TokenInfoMorphData.java} (56%)
 create mode 100644 lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/dict/UnknownMorphData.java
 create mode 100644 lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/dict/UserMorphData.java
 delete mode 100644 lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/util/BinaryDictionaryWriter.java
 delete mode 100644 lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/util/CharacterDefinitionWriter.java
 create mode 100644 lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/util/TokenInfoDictionaryEntryWriter.java
 delete mode 100644 lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/dict/BinaryDictionary.java
 create mode 100644 lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/dict/DictionaryConstants.java
 rename lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/dict/{Dictionary.java => KoMorphData.java} (52%)
 create mode 100644 lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/dict/TokenInfoMorphData.java
 create mode 100644 lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/dict/UnknownMorphData.java
 create mode 100644 lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/dict/UserMorphData.java
 delete mode 100644 lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/util/ConnectionCostsWriter.java
 rename lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/util/{BinaryDictionaryWriter.java => TokenInfoDictionaryEntryWriter.java} (50%)

diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index 435434aaf30b..cda9ad881cc9 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -49,6 +49,9 @@ Other
 * LUCENE-10253: The @BadApple annotation has been removed from the test
   framework. (Adrien Grand)
 
+* LUCENE-10393: Unify binary dictionary and dictionary writer in Kuromoji and Nori.
+  (Tomoko Uchida, Robert Muir)
+
 ======================= Lucene 9.2.0 =======================
 API Changes
 ---------------------
diff --git a/lucene/analysis/common/src/java/module-info.java b/lucene/analysis/common/src/java/module-info.java
index ec837952e7b5..f8e7bf085546 100644
--- a/lucene/analysis/common/src/java/module-info.java
+++ b/lucene/analysis/common/src/java/module-info.java
@@ -60,6 +60,7 @@
   exports org.apache.lucene.analysis.lv;
   exports org.apache.lucene.analysis.minhash;
   exports org.apache.lucene.analysis.miscellaneous;
+  exports org.apache.lucene.analysis.morph;
   exports org.apache.lucene.analysis.ne;
   exports org.apache.lucene.analysis.ngram;
   exports org.apache.lucene.analysis.nl;
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/morph/BinaryDictionary.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/morph/BinaryDictionary.java
new file mode 100644
index 000000000000..f72ed913d351
--- /dev/null
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/morph/BinaryDictionary.java
@@ -0,0 +1,100 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.analysis.morph;
+
+import java.io.BufferedInputStream;
+import java.io.EOFException;
+import java.io.IOException;
+import java.io.InputStream;
+import java.nio.ByteBuffer;
+import java.nio.channels.Channels;
+import java.nio.channels.ReadableByteChannel;
+import org.apache.lucene.codecs.CodecUtil;
+import org.apache.lucene.store.DataInput;
+import org.apache.lucene.store.InputStreamDataInput;
+import org.apache.lucene.util.IOSupplier;
+import org.apache.lucene.util.IntsRef;
+
+/** Abstract dictionary base class. */
+public abstract class BinaryDictionary<T extends MorphData> implements Dictionary<T> {
+  public static final String DICT_FILENAME_SUFFIX = "$buffer.dat";
+  public static final String TARGETMAP_FILENAME_SUFFIX = "$targetMap.dat";
+  public static final String POSDICT_FILENAME_SUFFIX = "$posDict.dat";
+
+  private final int[] targetMapOffsets, targetMap;
+  protected final ByteBuffer buffer;
+
+  protected BinaryDictionary(
+      IOSupplier<InputStream> targetMapResource,
+      IOSupplier<InputStream> dictResource,
+      String targetMapCodecHeader,
+      String dictCodecHeader,
+      int dictCodecVersion)
+      throws IOException {
+    try (InputStream mapIS = new BufferedInputStream(targetMapResource.get())) {
+      final DataInput in = new InputStreamDataInput(mapIS);
+      CodecUtil.checkHeader(in, targetMapCodecHeader, dictCodecVersion, dictCodecVersion);
+      this.targetMap = new int[in.readVInt()];
+      this.targetMapOffsets = new int[in.readVInt()];
+      populateTargetMap(in, this.targetMap, this.targetMapOffsets);
+    }
+
+    // no buffering here, as we load in one large buffer
+    try (InputStream dictIS = dictResource.get()) {
+      final DataInput in = new InputStreamDataInput(dictIS);
+      CodecUtil.checkHeader(in, dictCodecHeader, dictCodecVersion, dictCodecVersion);
+      final int size = in.readVInt();
+      final ByteBuffer tmpBuffer = ByteBuffer.allocateDirect(size);
+      final ReadableByteChannel channel = Channels.newChannel(dictIS);
+      final int read = channel.read(tmpBuffer);
+      if (read != size) {
+        throw new EOFException("Cannot read whole dictionary");
+      }
+      this.buffer = tmpBuffer.asReadOnlyBuffer();
+    }
+  }
+
+  private static void populateTargetMap(DataInput in, int[] targetMap, int[] targetMapOffsets)
+      throws IOException {
+    int accum = 0, sourceId = 0;
+    for (int ofs = 0; ofs < targetMap.length; ofs++) {
+      final int val = in.readVInt();
+      if ((val & 0x01) != 0) {
+        targetMapOffsets[sourceId] = ofs;
+        sourceId++;
+      }
+      accum += val >>> 1;
+      targetMap[ofs] = accum;
+    }
+    if (sourceId + 1 != targetMapOffsets.length)
+      throw new IOException(
+          "targetMap file format broken; targetMap.length="
+              + targetMap.length
+              + ", targetMapOffsets.length="
+              + targetMapOffsets.length
+              + ", sourceId="
+              + sourceId);
+    targetMapOffsets[sourceId] = targetMap.length;
+  }
+
+  public void lookupWordIds(int sourceId, IntsRef ref) {
+    ref.ints = targetMap;
+    ref.offset = targetMapOffsets[sourceId];
+    // targetMapOffsets always has one more entry pointing behind last:
+    ref.length = targetMapOffsets[sourceId + 1] - ref.offset;
+  }
+}
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/morph/BinaryDictionaryWriter.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/morph/BinaryDictionaryWriter.java
new file mode 100644
index 000000000000..bb0a5c096b31
--- /dev/null
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/morph/BinaryDictionaryWriter.java
@@ -0,0 +1,148 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.analysis.morph;
+
+import java.io.BufferedOutputStream;
+import java.io.IOException;
+import java.io.OutputStream;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import org.apache.lucene.codecs.CodecUtil;
+import org.apache.lucene.store.DataOutput;
+import org.apache.lucene.store.OutputStreamDataOutput;
+import org.apache.lucene.util.ArrayUtil;
+
+/** Abstract base dictionary writer class. */
+public abstract class BinaryDictionaryWriter<T extends BinaryDictionary<? extends MorphData>> {
+  private final Class<T> implClazz;
+  private int targetMapEndOffset = 0, lastWordId = -1, lastSourceId = -1;
+  private int[] targetMap = new int[8192];
+  private int[] targetMapOffsets = new int[8192];
+  protected final DictionaryEntryWriter entryWriter;
+
+  protected BinaryDictionaryWriter(Class<T> implClazz, DictionaryEntryWriter entryWriter) {
+    this.implClazz = implClazz;
+    this.entryWriter = entryWriter;
+  }
+
+  /**
+   * put the entry in map
+   *
+   * @return current position of buffer, which will be wordId of next entry
+   */
+  public int put(String[] entry) {
+    return entryWriter.putEntry(entry);
+  }
+
+  /**
+   * Write whole dictionary in a directory.
+   *
+   * @throws IOException if an I/O error occurs writing the dictionary files
+   */
+  public abstract void write(Path baseDir) throws IOException;
+
+  protected void addMapping(int sourceId, int wordId) {
+    if (wordId <= lastWordId) {
+      throw new IllegalStateException(
+          "words out of order: " + wordId + " vs lastID: " + lastWordId);
+    }
+
+    if (sourceId > lastSourceId) {
+      targetMapOffsets = ArrayUtil.grow(targetMapOffsets, sourceId + 1);
+      for (int i = lastSourceId + 1; i <= sourceId; i++) {
+        targetMapOffsets[i] = targetMapEndOffset;
+      }
+    } else if (sourceId != lastSourceId) {
+      throw new IllegalStateException(
+          "source ids not in increasing order: lastSourceId="
+              + lastSourceId
+              + " vs sourceId="
+              + sourceId);
+    }
+
+    targetMap = ArrayUtil.grow(targetMap, targetMapEndOffset + 1);
+    targetMap[targetMapEndOffset] = wordId;
+    targetMapEndOffset++;
+
+    lastSourceId = sourceId;
+    lastWordId = wordId;
+  }
+
+  /**
+   * Write dictionary in file Dictionary format is: [Size of dictionary(int)], [entry:{left
+   * id(short)}{right id(short)}{word cost(short)}{length of pos info(short)}{pos info(char)}],
+   * [entry...], [entry...].....
+   *
+   * @throws IOException if an I/O error occurs writing the dictionary files
+   */
+  protected void write(
+      Path baseDir,
+      String targetMapCodecHeader,
+      String posDictCodecHeader,
+      String dictCodecHeader,
+      int dictCodecVersion)
+      throws IOException {
+    final String baseName = getBaseFileName();
+    entryWriter.writeDictionary(
+        baseDir.resolve(baseName + BinaryDictionary.DICT_FILENAME_SUFFIX),
+        dictCodecHeader,
+        dictCodecVersion);
+    entryWriter.writePosDict(
+        baseDir.resolve(baseName + BinaryDictionary.POSDICT_FILENAME_SUFFIX),
+        posDictCodecHeader,
+        dictCodecVersion);
+    writeTargetMap(
+        baseDir.resolve(baseName + BinaryDictionary.TARGETMAP_FILENAME_SUFFIX),
+        targetMapCodecHeader,
+        dictCodecVersion);
+  }
+
+  protected final String getBaseFileName() {
+    return implClazz.getName().replace('.', '/');
+  }
+
+  // TODO: maybe this int[] should instead be the output to the FST...
+  private void writeTargetMap(Path path, String targetMapCodecHeader, int dictCodecVersion)
+      throws IOException {
+    Files.createDirectories(path.getParent());
+    try (OutputStream os = Files.newOutputStream(path);
+        OutputStream bos = new BufferedOutputStream(os)) {
+      final DataOutput out = new OutputStreamDataOutput(bos);
+      CodecUtil.writeHeader(out, targetMapCodecHeader, dictCodecVersion);
+
+      final int numSourceIds = lastSourceId + 1;
+      out.writeVInt(targetMapEndOffset); // <-- size of main array
+      out.writeVInt(numSourceIds + 1); // <-- size of offset array (+ 1 more entry)
+      int prev = 0, sourceId = 0;
+      for (int ofs = 0; ofs < targetMapEndOffset; ofs++) {
+        final int val = targetMap[ofs], delta = val - prev;
+        assert delta >= 0;
+        if (ofs == targetMapOffsets[sourceId]) {
+          out.writeVInt((delta << 1) | 0x01);
+          sourceId++;
+        } else {
+          out.writeVInt((delta << 1));
+        }
+        prev += delta;
+      }
+      if (sourceId != numSourceIds) {
+        throw new IllegalStateException(
+            "sourceId:" + sourceId + " != numSourceIds:" + numSourceIds);
+      }
+    }
+  }
+}
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/morph/CharacterDefinition.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/morph/CharacterDefinition.java
new file mode 100644
index 000000000000..29c1b0122132
--- /dev/null
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/morph/CharacterDefinition.java
@@ -0,0 +1,73 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.analysis.morph;
+
+import java.io.IOException;
+import java.io.InputStream;
+import org.apache.lucene.codecs.CodecUtil;
+import org.apache.lucene.store.DataInput;
+import org.apache.lucene.store.InputStreamDataInput;
+import org.apache.lucene.util.IOSupplier;
+
+/** Character category data. */
+public abstract class CharacterDefinition {
+
+  public static final String FILENAME_SUFFIX = ".dat";
+
+  protected final byte[] characterCategoryMap = new byte[0x10000];
+  private final boolean[] invokeMap;
+  private final boolean[] groupMap;
+
+  protected CharacterDefinition(
+      IOSupplier<InputStream> charDefResource,
+      String charDefCodecHeader,
+      int charDefCodecVersion,
+      int classCount)
+      throws IOException {
+    try (InputStream is = charDefResource.get()) {
+      final DataInput in = new InputStreamDataInput(is);
+      CodecUtil.checkHeader(in, charDefCodecHeader, charDefCodecVersion, charDefCodecVersion);
+      in.readBytes(characterCategoryMap, 0, characterCategoryMap.length);
+      this.invokeMap = new boolean[classCount];
+      this.groupMap = new boolean[classCount];
+      for (int i = 0; i < classCount; i++) {
+        final byte b = in.readByte();
+        invokeMap[i] = (b & 0x01) != 0;
+        groupMap[i] = (b & 0x02) != 0;
+      }
+    }
+  }
+
+  public byte getCharacterClass(char c) {
+    return characterCategoryMap[c];
+  }
+
+  public boolean isInvoke(char c) {
+    return invokeMap[characterCategoryMap[c]];
+  }
+
+  public boolean isGroup(char c) {
+    return groupMap[characterCategoryMap[c]];
+  }
+
+  /** Functional interface to lookup character class */
+  @FunctionalInterface
+  public interface LookupCharacterClass {
+    /** looks up character class for given class name */
+    byte lookupCharacterClass(String characterClassName);
+  }
+}
diff --git a/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/util/CharacterDefinitionWriter.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/morph/CharacterDefinitionWriter.java
similarity index 57%
rename from lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/util/CharacterDefinitionWriter.java
rename to lucene/analysis/common/src/java/org/apache/lucene/analysis/morph/CharacterDefinitionWriter.java
index cbd3f7686857..1f4fc3d13a91 100644
--- a/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/util/CharacterDefinitionWriter.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/morph/CharacterDefinitionWriter.java
@@ -14,7 +14,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-package org.apache.lucene.analysis.ko.util;
+package org.apache.lucene.analysis.morph;
 
 import java.io.BufferedOutputStream;
 import java.io.IOException;
@@ -22,21 +22,33 @@
 import java.nio.file.Files;
 import java.nio.file.Path;
 import java.util.Arrays;
-import org.apache.lucene.analysis.ko.dict.CharacterDefinition;
 import org.apache.lucene.codecs.CodecUtil;
 import org.apache.lucene.store.DataOutput;
 import org.apache.lucene.store.OutputStreamDataOutput;
 
-final class CharacterDefinitionWriter {
+/** Writes character definition file */
+public final class CharacterDefinitionWriter<T extends CharacterDefinition> {
 
-  private final byte[] characterCategoryMap = new byte[0x10000];
+  private final Class<T> implClazz;
 
-  private final boolean[] invokeMap = new boolean[CharacterDefinition.CLASS_COUNT];
-  private final boolean[] groupMap = new boolean[CharacterDefinition.CLASS_COUNT];
+  private final byte[] characterCategoryMap = new byte[0x10000];
+  private final int classCount;
+  private final boolean[] invokeMap;
+  private final boolean[] groupMap;
+  private final CharacterDefinition.LookupCharacterClass lookupCharClass;
 
   /** Constructor for building. TODO: remove write access */
-  CharacterDefinitionWriter() {
-    Arrays.fill(characterCategoryMap, CharacterDefinition.DEFAULT);
+  public CharacterDefinitionWriter(
+      Class<T> implClazz,
+      byte defaultValue,
+      int classCount,
+      CharacterDefinition.LookupCharacterClass lookupCharClass) {
+    this.implClazz = implClazz;
+    Arrays.fill(characterCategoryMap, defaultValue);
+    this.invokeMap = new boolean[classCount];
+    this.groupMap = new boolean[classCount];
+    this.classCount = classCount;
+    this.lookupCharClass = lookupCharClass;
   }
 
   /**
@@ -45,7 +57,7 @@ final class CharacterDefinitionWriter {
    * @param codePoint code point
    * @param characterClassName character class name
    */
-  void putCharacterCategory(int codePoint, String characterClassName) {
+  public void putCharacterCategory(int codePoint, String characterClassName) {
     characterClassName = characterClassName.split(" ")[0]; // use first
     // category
     // class
@@ -54,27 +66,29 @@ void putCharacterCategory(int codePoint, String characterClassName) {
     if (codePoint == 0x30FB) {
       characterClassName = "SYMBOL";
     }
-    characterCategoryMap[codePoint] = CharacterDefinition.lookupCharacterClass(characterClassName);
+    characterCategoryMap[codePoint] = lookupCharClass.lookupCharacterClass(characterClassName);
   }
 
-  void putInvokeDefinition(String characterClassName, int invoke, int group, int length) {
-    final byte characterClass = CharacterDefinition.lookupCharacterClass(characterClassName);
+  public void putInvokeDefinition(String characterClassName, int invoke, int group, int length) {
+    final byte characterClass = lookupCharClass.lookupCharacterClass(characterClassName);
     invokeMap[characterClass] = invoke == 1;
     groupMap[characterClass] = group == 1;
     // TODO: length def ignored
   }
 
-  public void write(Path baseDir) throws IOException {
-    Path path =
-        baseDir.resolve(
-            CharacterDefinition.class.getName().replace('.', '/')
-                + CharacterDefinition.FILENAME_SUFFIX);
+  private String getBaseFileName() {
+    return implClazz.getName().replace('.', '/');
+  }
+
+  public void write(Path baseDir, String charDefCodecHeader, int charDefCodecVersion)
+      throws IOException {
+    Path path = baseDir.resolve(getBaseFileName() + CharacterDefinition.FILENAME_SUFFIX);
     Files.createDirectories(path.getParent());
     try (OutputStream os = new BufferedOutputStream(Files.newOutputStream(path))) {
       final DataOutput out = new OutputStreamDataOutput(os);
-      CodecUtil.writeHeader(out, CharacterDefinition.HEADER, CharacterDefinition.VERSION);
+      CodecUtil.writeHeader(out, charDefCodecHeader, charDefCodecVersion);
       out.writeBytes(characterCategoryMap, 0, characterCategoryMap.length);
-      for (int i = 0; i < CharacterDefinition.CLASS_COUNT; i++) {
+      for (int i = 0; i < classCount; i++) {
         final byte b = (byte) ((invokeMap[i] ? 0x01 : 0x00) | (groupMap[i] ? 0x02 : 0x00));
         out.writeByte(b);
       }
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/morph/ConnectionCosts.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/morph/ConnectionCosts.java
new file mode 100644
index 000000000000..70c581611965
--- /dev/null
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/morph/ConnectionCosts.java
@@ -0,0 +1,66 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.analysis.morph;
+
+import java.io.BufferedInputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.nio.ByteBuffer;
+import org.apache.lucene.codecs.CodecUtil;
+import org.apache.lucene.store.DataInput;
+import org.apache.lucene.store.InputStreamDataInput;
+import org.apache.lucene.util.IOSupplier;
+
+/** n-gram connection cost data */
+public abstract class ConnectionCosts {
+
+  public static final String FILENAME_SUFFIX = ".dat";
+
+  private final ByteBuffer buffer;
+  private final int forwardSize;
+
+  protected ConnectionCosts(
+      IOSupplier<InputStream> connectionCostResource,
+      String connectionCostsCodecHeader,
+      int dictCodecVersion)
+      throws IOException {
+    try (InputStream is = new BufferedInputStream(connectionCostResource.get())) {
+      final DataInput in = new InputStreamDataInput(is);
+      CodecUtil.checkHeader(in, connectionCostsCodecHeader, dictCodecVersion, dictCodecVersion);
+      forwardSize = in.readVInt();
+      int backwardSize = in.readVInt();
+      int size = forwardSize * backwardSize;
+
+      // copy the matrix into a direct byte buffer
+      final ByteBuffer tmpBuffer = ByteBuffer.allocateDirect(size * 2);
+      int accum = 0;
+      for (int j = 0; j < backwardSize; j++) {
+        for (int i = 0; i < forwardSize; i++) {
+          accum += in.readZInt();
+          tmpBuffer.putShort((short) accum);
+        }
+      }
+      buffer = tmpBuffer.asReadOnlyBuffer();
+    }
+  }
+
+  public int get(int forwardId, int backwardId) {
+    // map 2d matrix into a single dimension short array
+    int offset = (backwardId * forwardSize + forwardId) * 2;
+    return buffer.getShort(offset);
+  }
+}
diff --git a/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/util/ConnectionCostsWriter.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/morph/ConnectionCostsWriter.java
similarity index 77%
rename from lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/util/ConnectionCostsWriter.java
rename to lucene/analysis/common/src/java/org/apache/lucene/analysis/morph/ConnectionCostsWriter.java
index 8d081f77bfcd..f4f0a51c8a8e 100644
--- a/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/util/ConnectionCostsWriter.java
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/morph/ConnectionCostsWriter.java
@@ -14,7 +14,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-package org.apache.lucene.analysis.ja.util;
+package org.apache.lucene.analysis.morph;
 
 import java.io.BufferedOutputStream;
 import java.io.IOException;
@@ -22,20 +22,23 @@
 import java.nio.ByteBuffer;
 import java.nio.file.Files;
 import java.nio.file.Path;
-import org.apache.lucene.analysis.ja.dict.ConnectionCosts;
 import org.apache.lucene.codecs.CodecUtil;
 import org.apache.lucene.store.DataOutput;
 import org.apache.lucene.store.OutputStreamDataOutput;
 
-final class ConnectionCostsWriter {
+/** Writes connection costs */
+public final class ConnectionCostsWriter<T extends ConnectionCosts> {
 
+  private final Class<T> implClazz;
   private final ByteBuffer
       costs; // array is backward IDs first since get is called using the same backward ID
   // consecutively. maybe doesn't matter.
   private final int forwardSize;
   private final int backwardSize;
+
   /** Constructor for building. TODO: remove write access */
-  ConnectionCostsWriter(int forwardSize, int backwardSize) {
+  public ConnectionCostsWriter(Class<T> implClazz, int forwardSize, int backwardSize) {
+    this.implClazz = implClazz;
     this.forwardSize = forwardSize;
     this.backwardSize = backwardSize;
     this.costs = ByteBuffer.allocateDirect(2 * backwardSize * forwardSize);
@@ -46,14 +49,18 @@ public void add(int forwardId, int backwardId, int cost) {
     costs.putShort(offset, (short) cost);
   }
 
-  public void write(Path baseDir) throws IOException {
+  private String getBaseFileName() {
+    return implClazz.getName().replace('.', '/');
+  }
+
+  public void write(Path baseDir, String connectionCostsCodecHeader, int dictCodecVersion)
+      throws IOException {
     Files.createDirectories(baseDir);
-    String fileName =
-        ConnectionCosts.class.getName().replace('.', '/') + ConnectionCosts.FILENAME_SUFFIX;
+    String fileName = getBaseFileName() + ConnectionCosts.FILENAME_SUFFIX;
     try (OutputStream os = Files.newOutputStream(baseDir.resolve(fileName));
         OutputStream bos = new BufferedOutputStream(os)) {
       final DataOutput out = new OutputStreamDataOutput(bos);
-      CodecUtil.writeHeader(out, ConnectionCosts.HEADER, ConnectionCosts.VERSION);
+      CodecUtil.writeHeader(out, connectionCostsCodecHeader, dictCodecVersion);
       out.writeVInt(forwardSize);
       out.writeVInt(backwardSize);
       int last = 0;
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/morph/Dictionary.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/morph/Dictionary.java
new file mode 100644
index 000000000000..5ff714c63f7b
--- /dev/null
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/morph/Dictionary.java
@@ -0,0 +1,49 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.analysis.morph;
+
+/** High-level dictionary interface for morphological analyzers. */
+public interface Dictionary<T extends MorphData> {
+  /**
+   * Get left id of specified word
+   *
+   * @return left id
+   */
+  default int getLeftId(int morphId) {
+    return getMorphAttributes().getLeftId(morphId);
+  }
+
+  /**
+   * Get right id of specified word
+   *
+   * @return right id
+   */
+  default int getRightId(int morphId) {
+    return getMorphAttributes().getRightId(morphId);
+  }
+
+  /**
+   * Get word cost of specified word
+   *
+   * @return word's cost
+   */
+  default int getWordCost(int morphId) {
+    return getMorphAttributes().getWordCost(morphId);
+  }
+
+  T getMorphAttributes();
+}
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/morph/DictionaryEntryWriter.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/morph/DictionaryEntryWriter.java
new file mode 100644
index 000000000000..76fb44ea603b
--- /dev/null
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/morph/DictionaryEntryWriter.java
@@ -0,0 +1,79 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.analysis.morph;
+
+import java.io.BufferedOutputStream;
+import java.io.IOException;
+import java.io.OutputStream;
+import java.nio.ByteBuffer;
+import java.nio.channels.Channels;
+import java.nio.channels.WritableByteChannel;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.util.ArrayList;
+import java.util.List;
+import org.apache.lucene.codecs.CodecUtil;
+import org.apache.lucene.store.DataOutput;
+import org.apache.lucene.store.OutputStreamDataOutput;
+
+/** Abstract writer class to write dictionary entries. */
+public abstract class DictionaryEntryWriter {
+
+  protected ByteBuffer buffer;
+  protected final List<String> posDict;
+
+  protected DictionaryEntryWriter(int size) {
+    this.buffer = ByteBuffer.allocate(size);
+    this.posDict = new ArrayList<>();
+  }
+
+  /** Writes an entry. */
+  protected abstract int putEntry(String[] entry);
+
+  /** Flush POS dictionary data. */
+  protected abstract void writePosDict(OutputStream bos, DataOutput out) throws IOException;
+
+  void writePosDict(Path path, String posDictCodecHeader, int dictCodecVersion) throws IOException {
+    Files.createDirectories(path.getParent());
+    try (OutputStream os = Files.newOutputStream(path);
+        OutputStream bos = new BufferedOutputStream(os)) {
+      final DataOutput out = new OutputStreamDataOutput(bos);
+      CodecUtil.writeHeader(out, posDictCodecHeader, dictCodecVersion);
+      writePosDict(bos, out);
+    }
+  }
+
+  void writeDictionary(Path path, String dictCodecHeader, int dictCodecVersion) throws IOException {
+    Files.createDirectories(path.getParent());
+    try (OutputStream os = Files.newOutputStream(path);
+        OutputStream bos = new BufferedOutputStream(os)) {
+      final DataOutput out = new OutputStreamDataOutput(bos);
+      CodecUtil.writeHeader(out, dictCodecHeader, dictCodecVersion);
+      out.writeVInt(buffer.position());
+      final WritableByteChannel channel = Channels.newChannel(bos);
+      // Write Buffer
+      buffer.flip(); // set position to 0, set limit to current position
+      channel.write(buffer);
+      assert buffer.remaining() == 0L;
+    }
+  }
+
+  /** Returns current word id. */
+  public int currentPosition() {
+    return buffer.position();
+  }
+}
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/morph/MorphData.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/morph/MorphData.java
new file mode 100644
index 000000000000..1cafe9af71aa
--- /dev/null
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/morph/MorphData.java
@@ -0,0 +1,41 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.analysis.morph;
+
+/** High-level interface that represents morphological information in a dictionary */
+public interface MorphData {
+  /**
+   * Get left id of specified word
+   *
+   * @return left id
+   */
+  int getLeftId(int morphId);
+
+  /**
+   * Get right id of specified word
+   *
+   * @return right id
+   */
+  int getRightId(int morphId);
+
+  /**
+   * Get word cost of specified word
+   *
+   * @return word's cost
+   */
+  int getWordCost(int morphId);
+}
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/morph/package-info.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/morph/package-info.java
new file mode 100644
index 000000000000..7e53020588f7
--- /dev/null
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/morph/package-info.java
@@ -0,0 +1,19 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/** Abstract classes for morphological analyzers. */
+package org.apache.lucene.analysis.morph;
diff --git a/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/GraphvizFormatter.java b/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/GraphvizFormatter.java
index de3748b26664..4538daa362f6 100644
--- a/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/GraphvizFormatter.java
+++ b/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/GraphvizFormatter.java
@@ -21,7 +21,8 @@
 import org.apache.lucene.analysis.ja.JapaneseTokenizer.Position;
 import org.apache.lucene.analysis.ja.JapaneseTokenizer.WrappedPositionArray;
 import org.apache.lucene.analysis.ja.dict.ConnectionCosts;
-import org.apache.lucene.analysis.ja.dict.Dictionary;
+import org.apache.lucene.analysis.ja.dict.JaMorphData;
+import org.apache.lucene.analysis.morph.Dictionary;
 
 // TODO: would be nice to show 2nd best path in a diff't
 // color...
@@ -140,7 +141,7 @@ private String formatNodes(
           attrs = "";
         }
 
-        final Dictionary dict = tok.getDict(posData.backType[idx]);
+        final Dictionary<? extends JaMorphData> dict = tok.getDict(posData.backType[idx]);
         final int wordCost = dict.getWordCost(posData.backID[idx]);
         final int bgCost =
             costs.get(
diff --git a/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapaneseTokenizer.java b/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapaneseTokenizer.java
index 47cb8d19297f..a933525b2f04 100644
--- a/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapaneseTokenizer.java
+++ b/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/JapaneseTokenizer.java
@@ -28,12 +28,13 @@
 import org.apache.lucene.analysis.Tokenizer;
 import org.apache.lucene.analysis.ja.dict.CharacterDefinition;
 import org.apache.lucene.analysis.ja.dict.ConnectionCosts;
-import org.apache.lucene.analysis.ja.dict.Dictionary;
+import org.apache.lucene.analysis.ja.dict.JaMorphData;
 import org.apache.lucene.analysis.ja.dict.TokenInfoDictionary;
 import org.apache.lucene.analysis.ja.dict.TokenInfoFST;
 import org.apache.lucene.analysis.ja.dict.UnknownDictionary;
 import org.apache.lucene.analysis.ja.dict.UserDictionary;
 import org.apache.lucene.analysis.ja.tokenattributes.*;
+import org.apache.lucene.analysis.morph.Dictionary;
 import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
 import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
 import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
@@ -116,7 +117,8 @@ public enum Type {
   private static final int MAX_UNKNOWN_WORD_LENGTH = 1024;
   private static final int MAX_BACKTRACE_GAP = 1024;
 
-  private final EnumMap<Type, Dictionary> dictionaryMap = new EnumMap<>(Type.class);
+  private final EnumMap<Type, Dictionary<? extends JaMorphData>> dictionaryMap =
+      new EnumMap<>(Type.class);
 
   private final TokenInfoFST fst;
   private final TokenInfoDictionary dictionary;
@@ -494,10 +496,15 @@ public void reset() {
   }
 
   private void add(
-      Dictionary dict, Position fromPosData, int endPos, int wordID, Type type, boolean addPenalty)
+      JaMorphData morphAtts,
+      Position fromPosData,
+      int endPos,
+      int wordID,
+      Type type,
+      boolean addPenalty)
       throws IOException {
-    final int wordCost = dict.getWordCost(wordID);
-    final int leftID = dict.getLeftId(wordID);
+    final int wordCost = morphAtts.getWordCost(wordID);
+    final int leftID = morphAtts.getLeftId(wordID);
     int leastCost = Integer.MAX_VALUE;
     int leastIDX = -1;
     assert fromPosData.count > 0;
@@ -560,7 +567,7 @@ private void add(
 
     // positions.get(endPos).add(leastCost, dict.getRightId(wordID), fromPosData.pos, leastIDX,
     // wordID, type);
-    assert leftID == dict.getRightId(wordID);
+    assert leftID == morphAtts.getRightId(wordID);
     positions.get(endPos).add(leastCost, leftID, fromPosData.pos, leastIDX, wordID, type);
   }
 
@@ -895,7 +902,7 @@ private void parse() throws IOException {
                       + (posAhead + 1));
             }
             add(
-                userDictionary,
+                userDictionary.getMorphAttributes(),
                 posData,
                 posAhead + 1,
                 output + arc.nextFinalOutput().intValue(),
@@ -948,7 +955,7 @@ private void parse() throws IOException {
             }
             for (int ofs = 0; ofs < wordIdRef.length; ofs++) {
               add(
-                  dictionary,
+                  dictionary.getMorphAttributes(),
                   posData,
                   posAhead + 1,
                   wordIdRef.ints[wordIdRef.offset + ofs],
@@ -1004,7 +1011,7 @@ && isPunctuation((char) ch) == isPunct) {
         }
         for (int ofs = 0; ofs < wordIdRef.length; ofs++) {
           add(
-              unkDictionary,
+              unkDictionary.getMorphAttributes(),
               posData,
               posData.pos + unknownWordLength,
               wordIdRef.ints[wordIdRef.offset + ofs],
@@ -1126,7 +1133,7 @@ private void pruneAndRescore(int startPos, int endPos, int bestStartIDX) throws
         final int pathCost = posData.costs[bestStartIDX];
         for (int forwardArcIDX = 0; forwardArcIDX < posData.forwardCount; forwardArcIDX++) {
           final Type forwardType = posData.forwardType[forwardArcIDX];
-          final Dictionary dict2 = getDict(forwardType);
+          final Dictionary<? extends JaMorphData> dict2 = getDict(forwardType);
           final int wordID = posData.forwardID[forwardArcIDX];
           final int toPos = posData.forwardPos[forwardArcIDX];
           final int newCost =
@@ -1169,7 +1176,7 @@ private void pruneAndRescore(int startPos, int endPos, int bestStartIDX) throws
                     + toPos);
           }
           add(
-              getDict(forwardType),
+              getDict(forwardType).getMorphAttributes(),
               posData,
               toPos,
               posData.forwardID[forwardArcIDX],
@@ -1184,7 +1191,7 @@ private void pruneAndRescore(int startPos, int endPos, int bestStartIDX) throws
   // yet another lattice data structure
   private static final class Lattice {
     char[] fragment;
-    EnumMap<Type, Dictionary> dictionaryMap;
+    EnumMap<Type, Dictionary<? extends JaMorphData>> dictionaryMap;
     boolean useEOS;
 
     int rootCapacity = 0;
@@ -1296,7 +1303,7 @@ private int addNode(Type dicType, int wordID, int left, int right) {
         nodeLeftID[node] = 0;
         nodeRightID[node] = 0;
       } else {
-        Dictionary dic = dictionaryMap.get(dicType);
+        Dictionary<? extends JaMorphData> dic = dictionaryMap.get(dicType);
         nodeWordCost[node] = dic.getWordCost(wordID);
         nodeLeftID[node] = dic.getLeftId(wordID);
         nodeRightID[node] = dic.getRightId(wordID);
@@ -1338,7 +1345,7 @@ private int positionCount(WrappedPositionArray positions, int beg, int end) {
 
     void setup(
         char[] fragment,
-        EnumMap<Type, Dictionary> dictionaryMap,
+        EnumMap<Type, Dictionary<? extends JaMorphData>> dictionaryMap,
         WrappedPositionArray positions,
         int prevOffset,
         int endOffset,
@@ -1565,7 +1572,7 @@ private void registerNode(int node, char[] fragment) {
                 right - left,
                 Type.USER,
                 lattice.rootBase + left,
-                userDictionary));
+                userDictionary.getMorphAttributes()));
         // Output compound
         int current = 0;
         for (int j = 1; j < wordIDAndLength.length; j++) {
@@ -1579,7 +1586,7 @@ private void registerNode(int node, char[] fragment) {
                     len,
                     Type.USER,
                     lattice.rootBase + current + left,
-                    userDictionary));
+                    userDictionary.getMorphAttributes()));
           }
           current += len;
         }
@@ -1592,7 +1599,7 @@ private void registerNode(int node, char[] fragment) {
                 right - left,
                 type,
                 lattice.rootBase + left,
-                getDict(type)));
+                getDict(type).getMorphAttributes()));
       }
     }
   }
@@ -1921,7 +1928,7 @@ private void backtrace(final Position endPosData, final int fromIDX) throws IOEx
                     length,
                     backType,
                     backPos,
-                    getDict(backType));
+                    getDict(backType).getMorphAttributes());
 
             // Redirect our backtrace to 2nd best:
             bestIDX = leastIDX;
@@ -1980,7 +1987,7 @@ private void backtrace(final Position endPosData, final int fromIDX) throws IOEx
         altToken = null;
       }
 
-      final Dictionary dict = getDict(backType);
+      final Dictionary<? extends JaMorphData> dict = getDict(backType);
 
       if (backType == Type.USER) {
 
@@ -2000,7 +2007,7 @@ private void backtrace(final Position endPosData, final int fromIDX) throws IOEx
                   len,
                   Type.USER,
                   current + backPos,
-                  dict));
+                  dict.getMorphAttributes()));
           if (VERBOSE) {
             System.out.println("    add USER token=" + pending.get(pending.size() - 1));
           }
@@ -2037,14 +2044,16 @@ private void backtrace(final Position endPosData, final int fromIDX) throws IOEx
                       charLen,
                       Type.UNKNOWN,
                       backPos + i,
-                      unkDictionary));
+                      unkDictionary.getMorphAttributes()));
               unigramTokenCount++;
             }
           }
           backCount += unigramTokenCount;
 
         } else if (!discardPunctuation || length == 0 || !isPunctuation(fragment[offset])) {
-          pending.add(new Token(backID, fragment, offset, length, backType, backPos, dict));
+          pending.add(
+              new Token(
+                  backID, fragment, offset, length, backType, backPos, dict.getMorphAttributes()));
           if (VERBOSE) {
             System.out.println("    add token=" + pending.get(pending.size() - 1));
           }
@@ -2073,7 +2082,7 @@ private void backtrace(final Position endPosData, final int fromIDX) throws IOEx
     positions.freeBefore(endPos);
   }
 
-  Dictionary getDict(Type type) {
+  Dictionary<? extends JaMorphData> getDict(Type type) {
     return dictionaryMap.get(type);
   }
 
diff --git a/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/Token.java b/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/Token.java
index 5a10b50817b4..e9ae9490125e 100644
--- a/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/Token.java
+++ b/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/Token.java
@@ -17,11 +17,11 @@
 package org.apache.lucene.analysis.ja;
 
 import org.apache.lucene.analysis.ja.JapaneseTokenizer.Type;
-import org.apache.lucene.analysis.ja.dict.Dictionary;
+import org.apache.lucene.analysis.ja.dict.JaMorphData;
 
 /** Analyzed token with morphological data from its dictionary. */
 public class Token {
-  private final Dictionary dictionary;
+  private final JaMorphData morphAtts;
 
   private final int wordId;
 
@@ -41,14 +41,14 @@ public Token(
       int length,
       Type type,
       int position,
-      Dictionary dictionary) {
+      JaMorphData morphAtts) {
     this.wordId = wordId;
     this.surfaceForm = surfaceForm;
     this.offset = offset;
     this.length = length;
     this.type = type;
     this.position = position;
-    this.dictionary = dictionary;
+    this.morphAtts = morphAtts;
   }
 
   @Override
@@ -66,7 +66,7 @@ public String toString() {
         + " wordId="
         + wordId
         + " leftID="
-        + dictionary.getLeftId(wordId)
+        + morphAtts.getLeftId(wordId)
         + ")";
   }
 
@@ -92,32 +92,32 @@ public String getSurfaceFormString() {
 
   /** @return reading. null if token doesn't have reading. */
   public String getReading() {
-    return dictionary.getReading(wordId, surfaceForm, offset, length);
+    return morphAtts.getReading(wordId, surfaceForm, offset, length);
   }
 
   /** @return pronunciation. null if token doesn't have pronunciation. */
   public String getPronunciation() {
-    return dictionary.getPronunciation(wordId, surfaceForm, offset, length);
+    return morphAtts.getPronunciation(wordId, surfaceForm, offset, length);
   }
 
   /** @return part of speech. */
   public String getPartOfSpeech() {
-    return dictionary.getPartOfSpeech(wordId);
+    return morphAtts.getPartOfSpeech(wordId);
   }
 
   /** @return inflection type or null */
   public String getInflectionType() {
-    return dictionary.getInflectionType(wordId);
+    return morphAtts.getInflectionType(wordId);
   }
 
   /** @return inflection form or null */
   public String getInflectionForm() {
-    return dictionary.getInflectionForm(wordId);
+    return morphAtts.getInflectionForm(wordId);
   }
 
   /** @return base form or null if token is not inflected */
   public String getBaseForm() {
-    return dictionary.getBaseForm(wordId, surfaceForm, offset, length);
+    return morphAtts.getBaseForm(wordId, surfaceForm, offset, length);
   }
 
   /**
diff --git a/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/dict/CharacterDefinition.java b/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/dict/CharacterDefinition.java
index 36bb825bc2d2..be29ebbe3674 100644
--- a/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/dict/CharacterDefinition.java
+++ b/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/dict/CharacterDefinition.java
@@ -16,20 +16,13 @@
  */
 package org.apache.lucene.analysis.ja.dict;
 
-import java.io.BufferedInputStream;
 import java.io.IOException;
 import java.io.InputStream;
-import org.apache.lucene.codecs.CodecUtil;
-import org.apache.lucene.store.DataInput;
-import org.apache.lucene.store.InputStreamDataInput;
 import org.apache.lucene.util.IOUtils;
 
 /** Character category data. */
-public final class CharacterDefinition {
-
-  public static final String FILENAME_SUFFIX = ".dat";
-  public static final String HEADER = "kuromoji_cd";
-  public static final int VERSION = 1;
+public final class CharacterDefinition
+    extends org.apache.lucene.analysis.morph.CharacterDefinition {
 
   public static final int CLASS_COUNT = CharacterClass.values().length;
 
@@ -49,11 +42,6 @@ private enum CharacterClass {
     KANJINUMERIC;
   }
 
-  private final byte[] characterCategoryMap = new byte[0x10000];
-
-  private final boolean[] invokeMap = new boolean[CLASS_COUNT];
-  private final boolean[] groupMap = new boolean[CLASS_COUNT];
-
   // the classes:
   public static final byte NGRAM = (byte) CharacterClass.NGRAM.ordinal();
   public static final byte DEFAULT = (byte) CharacterClass.DEFAULT.ordinal();
@@ -69,16 +57,11 @@ private enum CharacterClass {
   public static final byte KANJINUMERIC = (byte) CharacterClass.KANJINUMERIC.ordinal();
 
   private CharacterDefinition() throws IOException {
-    try (InputStream is = new BufferedInputStream(getClassResource())) {
-      final DataInput in = new InputStreamDataInput(is);
-      CodecUtil.checkHeader(in, HEADER, VERSION, VERSION);
-      in.readBytes(characterCategoryMap, 0, characterCategoryMap.length);
-      for (int i = 0; i < CLASS_COUNT; i++) {
-        final byte b = in.readByte();
-        invokeMap[i] = (b & 0x01) != 0;
-        groupMap[i] = (b & 0x02) != 0;
-      }
-    }
+    super(
+        CharacterDefinition::getClassResource,
+        DictionaryConstants.CHARDEF_HEADER,
+        DictionaryConstants.VERSION,
+        CharacterClass.values().length);
   }
 
   private static InputStream getClassResource() throws IOException {
@@ -87,18 +70,6 @@ private static InputStream getClassResource() throws IOException {
         CharacterDefinition.class.getResourceAsStream(resourcePath), resourcePath);
   }
 
-  public byte getCharacterClass(char c) {
-    return characterCategoryMap[c];
-  }
-
-  public boolean isInvoke(char c) {
-    return invokeMap[characterCategoryMap[c]];
-  }
-
-  public boolean isGroup(char c) {
-    return groupMap[characterCategoryMap[c]];
-  }
-
   public boolean isKanji(char c) {
     final byte characterClass = characterCategoryMap[c];
     return characterClass == KANJI || characterClass == KANJINUMERIC;
diff --git a/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/dict/ConnectionCosts.java b/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/dict/ConnectionCosts.java
index dc5fabe6c544..c11b9ee716a7 100644
--- a/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/dict/ConnectionCosts.java
+++ b/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/dict/ConnectionCosts.java
@@ -16,27 +16,15 @@
  */
 package org.apache.lucene.analysis.ja.dict;
 
-import java.io.BufferedInputStream;
 import java.io.IOException;
 import java.io.InputStream;
-import java.nio.ByteBuffer;
 import java.nio.file.Files;
 import java.nio.file.Path;
-import org.apache.lucene.codecs.CodecUtil;
-import org.apache.lucene.store.DataInput;
-import org.apache.lucene.store.InputStreamDataInput;
 import org.apache.lucene.util.IOSupplier;
 import org.apache.lucene.util.IOUtils;
 
 /** n-gram connection cost data */
-public final class ConnectionCosts {
-
-  public static final String FILENAME_SUFFIX = ".dat";
-  public static final String HEADER = "kuromoji_cc";
-  public static final int VERSION = 1;
-
-  private final ByteBuffer buffer;
-  private final int forwardSize;
+public final class ConnectionCosts extends org.apache.lucene.analysis.morph.ConnectionCosts {
 
   /**
    * Create a {@link ConnectionCosts} from an external resource path.
@@ -53,24 +41,8 @@ private ConnectionCosts() throws IOException {
   }
 
   private ConnectionCosts(IOSupplier<InputStream> connectionCostResource) throws IOException {
-    try (InputStream is = new BufferedInputStream(connectionCostResource.get())) {
-      final DataInput in = new InputStreamDataInput(is);
-      CodecUtil.checkHeader(in, HEADER, VERSION, VERSION);
-      forwardSize = in.readVInt();
-      int backwardSize = in.readVInt();
-      int size = forwardSize * backwardSize;
-
-      // copy the matrix into a direct byte buffer
-      final ByteBuffer tmpBuffer = ByteBuffer.allocateDirect(size * 2);
-      int accum = 0;
-      for (int j = 0; j < backwardSize; j++) {
-        for (int i = 0; i < forwardSize; i++) {
-          accum += in.readZInt();
-          tmpBuffer.putShort((short) accum);
-        }
-      }
-      buffer = tmpBuffer.asReadOnlyBuffer();
-    }
+    super(
+        connectionCostResource, DictionaryConstants.CONN_COSTS_HEADER, DictionaryConstants.VERSION);
   }
 
   private static InputStream getClassResource() throws IOException {
@@ -79,12 +51,6 @@ private static InputStream getClassResource() throws IOException {
         ConnectionCosts.class.getResourceAsStream(resourcePath), resourcePath);
   }
 
-  public int get(int forwardId, int backwardId) {
-    // map 2d matrix into a single dimension short array
-    int offset = (backwardId * forwardSize + forwardId) * 2;
-    return buffer.getShort(offset);
-  }
-
   public static ConnectionCosts getInstance() {
     return SingletonHolder.INSTANCE;
   }
diff --git a/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/dict/DictionaryConstants.java b/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/dict/DictionaryConstants.java
new file mode 100644
index 000000000000..3046beef3e63
--- /dev/null
+++ b/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/dict/DictionaryConstants.java
@@ -0,0 +1,33 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.analysis.ja.dict;
+
+/** Dictionary constants */
+public final class DictionaryConstants {
+  /** Codec header of the dictionary file. */
+  public static final String DICT_HEADER = "kuromoji_dict";
+  /** Codec header of the dictionary mapping file. */
+  public static final String TARGETMAP_HEADER = "kuromoji_dict_map";
+  /** Codec header of the POS dictionary file. */
+  public static final String POSDICT_HEADER = "kuromoji_dict_pos";
+  /** Codec header of the connection costs. */
+  public static final String CONN_COSTS_HEADER = "kuromoji_cc";
+  /** Codec header of the character definition file. */
+  public static final String CHARDEF_HEADER = "kuromoji_cd";
+  /** Codec version of the binary dictionary */
+  public static final int VERSION = 1;
+}
diff --git a/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/dict/Dictionary.java b/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/dict/JaMorphData.java
similarity index 60%
rename from lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/dict/Dictionary.java
rename to lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/dict/JaMorphData.java
index cfe11b30cd90..8865f6ad09f5 100644
--- a/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/dict/Dictionary.java
+++ b/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/dict/JaMorphData.java
@@ -16,71 +16,49 @@
  */
 package org.apache.lucene.analysis.ja.dict;
 
-/** Dictionary interface for retrieving morphological data by id. */
-public interface Dictionary {
-
-  public static final String INTERNAL_SEPARATOR = "\u0000";
-
-  /**
-   * Get left id of specified word
-   *
-   * @return left id
-   */
-  public int getLeftId(int wordId);
-
-  /**
-   * Get right id of specified word
-   *
-   * @return right id
-   */
-  public int getRightId(int wordId);
-
-  /**
-   * Get word cost of specified word
-   *
-   * @return word's cost
-   */
-  public int getWordCost(int wordId);
+import org.apache.lucene.analysis.morph.MorphData;
 
+/** Represents Japanese morphological information. */
+public interface JaMorphData extends MorphData {
   /**
    * Get Part-Of-Speech of tokens
    *
-   * @param wordId word ID of token
+   * @param morphId word ID of token
    * @return Part-Of-Speech of the token
    */
-  public String getPartOfSpeech(int wordId);
+  String getPartOfSpeech(int morphId);
 
   /**
    * Get reading of tokens
    *
-   * @param wordId word ID of token
+   * @param morphId word ID of token
    * @return Reading of the token
    */
-  public String getReading(int wordId, char[] surface, int off, int len);
+  String getReading(int morphId, char[] surface, int off, int len);
 
   /**
    * Get base form of word
    *
-   * @param wordId word ID of token
+   * @param morphId word ID of token
    * @return Base form (only different for inflected words, otherwise null)
    */
-  public String getBaseForm(int wordId, char[] surface, int off, int len);
+  String getBaseForm(int morphId, char[] surface, int off, int len);
 
   /**
    * Get pronunciation of tokens
    *
-   * @param wordId word ID of token
+   * @param morphId word ID of token
    * @return Pronunciation of the token
    */
-  public String getPronunciation(int wordId, char[] surface, int off, int len);
+  String getPronunciation(int morphId, char[] surface, int off, int len);
 
   /**
    * Get inflection type of tokens
    *
-   * @param wordId word ID of token
+   * @param morphId word ID of token
    * @return inflection type, or null
    */
-  public String getInflectionType(int wordId);
+  String getInflectionType(int morphId);
 
   /**
    * Get inflection form of tokens
@@ -88,7 +66,7 @@ public interface Dictionary {
    * @param wordId word ID of token
    * @return inflection form, or null
    */
-  public String getInflectionForm(int wordId);
+  String getInflectionForm(int wordId);
   // TODO: maybe we should have a optimal method, a non-typesafe
   // 'getAdditionalData' if other dictionaries like unidic have additional data
 }
diff --git a/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/dict/TokenInfoDictionary.java b/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/dict/TokenInfoDictionary.java
index d9e50f86a759..c769587829b6 100644
--- a/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/dict/TokenInfoDictionary.java
+++ b/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/dict/TokenInfoDictionary.java
@@ -21,6 +21,7 @@
 import java.io.InputStream;
 import java.nio.file.Files;
 import java.nio.file.Path;
+import org.apache.lucene.analysis.morph.BinaryDictionary;
 import org.apache.lucene.store.DataInput;
 import org.apache.lucene.store.InputStreamDataInput;
 import org.apache.lucene.util.IOSupplier;
@@ -32,11 +33,12 @@
  * Binary dictionary implementation for a known-word dictionary model: Words are encoded into an FST
  * mapping to a list of wordIDs.
  */
-public final class TokenInfoDictionary extends BinaryDictionary {
+public final class TokenInfoDictionary extends BinaryDictionary<TokenInfoMorphData> {
 
   public static final String FST_FILENAME_SUFFIX = "$fst.dat";
 
   private final TokenInfoFST fst;
+  private final TokenInfoMorphData morphAtts;
 
   /**
    * Create a {@link TokenInfoDictionary} from an external resource path.
@@ -70,7 +72,14 @@ private TokenInfoDictionary(
       IOSupplier<InputStream> dictResource,
       IOSupplier<InputStream> fstResource)
       throws IOException {
-    super(targetMapResource, posResource, dictResource);
+    super(
+        targetMapResource,
+        dictResource,
+        DictionaryConstants.TARGETMAP_HEADER,
+        DictionaryConstants.DICT_HEADER,
+        DictionaryConstants.VERSION);
+    this.morphAtts = new TokenInfoMorphData(buffer, posResource);
+
     FST<Long> fst;
     try (InputStream is = new BufferedInputStream(fstResource.get())) {
       DataInput in = new InputStreamDataInput(is);
@@ -86,6 +95,11 @@ private static InputStream getClassResource(String suffix) throws IOException {
         TokenInfoDictionary.class.getResourceAsStream(resourcePath), resourcePath);
   }
 
+  @Override
+  public TokenInfoMorphData getMorphAttributes() {
+    return morphAtts;
+  }
+
   public TokenInfoFST getFST() {
     return fst;
   }
diff --git a/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/dict/BinaryDictionary.java b/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/dict/TokenInfoMorphData.java
similarity index 56%
rename from lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/dict/BinaryDictionary.java
rename to lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/dict/TokenInfoMorphData.java
index 9ad0d8137783..d743a7faca67 100644
--- a/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/dict/BinaryDictionary.java
+++ b/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/dict/TokenInfoMorphData.java
@@ -17,95 +17,37 @@
 package org.apache.lucene.analysis.ja.dict;
 
 import java.io.BufferedInputStream;
-import java.io.EOFException;
 import java.io.IOException;
 import java.io.InputStream;
 import java.nio.ByteBuffer;
-import java.nio.channels.Channels;
-import java.nio.channels.ReadableByteChannel;
 import org.apache.lucene.codecs.CodecUtil;
 import org.apache.lucene.store.DataInput;
 import org.apache.lucene.store.InputStreamDataInput;
 import org.apache.lucene.util.IOSupplier;
-import org.apache.lucene.util.IntsRef;
 
-/** Base class for a binary-encoded in-memory dictionary. */
-public abstract class BinaryDictionary implements Dictionary {
-
-  public static final String DICT_FILENAME_SUFFIX = "$buffer.dat";
-  public static final String TARGETMAP_FILENAME_SUFFIX = "$targetMap.dat";
-  public static final String POSDICT_FILENAME_SUFFIX = "$posDict.dat";
-
-  public static final String DICT_HEADER = "kuromoji_dict";
-  public static final String TARGETMAP_HEADER = "kuromoji_dict_map";
-  public static final String POSDICT_HEADER = "kuromoji_dict_pos";
-  public static final int VERSION = 1;
+/** Morphological information for system dictionary. */
+public class TokenInfoMorphData implements JaMorphData {
 
   private final ByteBuffer buffer;
-  private final int[] targetMapOffsets, targetMap;
   private final String[] posDict;
   private final String[] inflTypeDict;
   private final String[] inflFormDict;
 
-  protected BinaryDictionary(
-      IOSupplier<InputStream> targetMapResource,
-      IOSupplier<InputStream> posResource,
-      IOSupplier<InputStream> dictResource)
-      throws IOException {
-    try (InputStream mapIS = new BufferedInputStream(targetMapResource.get())) {
-      final DataInput in = new InputStreamDataInput(mapIS);
-      CodecUtil.checkHeader(in, TARGETMAP_HEADER, VERSION, VERSION);
-      this.targetMap = new int[in.readVInt()];
-      this.targetMapOffsets = new int[in.readVInt()];
-      populateTargetMap(in, this.targetMap, this.targetMapOffsets);
-    }
-
+  TokenInfoMorphData(ByteBuffer buffer, IOSupplier<InputStream> posResource) throws IOException {
+    this.buffer = buffer;
     try (InputStream posIS = new BufferedInputStream(posResource.get())) {
       final DataInput in = new InputStreamDataInput(posIS);
-      CodecUtil.checkHeader(in, POSDICT_HEADER, VERSION, VERSION);
+      CodecUtil.checkHeader(
+          in,
+          DictionaryConstants.POSDICT_HEADER,
+          DictionaryConstants.VERSION,
+          DictionaryConstants.VERSION);
       final int posSize = in.readVInt();
       this.posDict = new String[posSize];
       this.inflTypeDict = new String[posSize];
       this.inflFormDict = new String[posSize];
-      populatePosDict(in, posSize, this.posDict, this.inflTypeDict, this.inflFormDict);
-    }
-
-    // no buffering here, as we load in one large buffer
-    try (InputStream dictIS = dictResource.get()) {
-      final DataInput in = new InputStreamDataInput(dictIS);
-      CodecUtil.checkHeader(in, DICT_HEADER, VERSION, VERSION);
-      final int size = in.readVInt();
-      final ByteBuffer tmpBuffer = ByteBuffer.allocateDirect(size);
-      final ReadableByteChannel channel = Channels.newChannel(dictIS);
-      final int read = channel.read(tmpBuffer);
-      if (read != size) {
-        throw new EOFException("Cannot read whole dictionary");
-      }
-      this.buffer = tmpBuffer.asReadOnlyBuffer();
-    }
-  }
-
-  private static void populateTargetMap(DataInput in, int[] targetMap, int[] targetMapOffsets)
-      throws IOException {
-    int accum = 0, sourceId = 0;
-    for (int ofs = 0; ofs < targetMap.length; ofs++) {
-      final int val = in.readVInt();
-      if ((val & 0x01) != 0) {
-        targetMapOffsets[sourceId] = ofs;
-        sourceId++;
-      }
-      accum += val >>> 1;
-      targetMap[ofs] = accum;
+      populatePosDict(in, posSize, posDict, inflTypeDict, inflFormDict);
     }
-    if (sourceId + 1 != targetMapOffsets.length)
-      throw new IOException(
-          "targetMap file format broken; targetMap.length="
-              + targetMap.length
-              + ", targetMapOffsets.length="
-              + targetMapOffsets.length
-              + ", sourceId="
-              + sourceId);
-    targetMapOffsets[sourceId] = targetMap.length;
   }
 
   private static void populatePosDict(
@@ -125,32 +67,25 @@ private static void populatePosDict(
     }
   }
 
-  public void lookupWordIds(int sourceId, IntsRef ref) {
-    ref.ints = targetMap;
-    ref.offset = targetMapOffsets[sourceId];
-    // targetMapOffsets always has one more entry pointing behind last:
-    ref.length = targetMapOffsets[sourceId + 1] - ref.offset;
-  }
-
   @Override
-  public int getLeftId(int wordId) {
-    return (buffer.getShort(wordId) & 0xffff) >>> 3;
+  public int getLeftId(int morphId) {
+    return (buffer.getShort(morphId) & 0xffff) >>> 3;
   }
 
   @Override
-  public int getRightId(int wordId) {
-    return (buffer.getShort(wordId) & 0xffff) >>> 3;
+  public int getRightId(int morphId) {
+    return (buffer.getShort(morphId) & 0xffff) >>> 3;
   }
 
   @Override
-  public int getWordCost(int wordId) {
-    return buffer.getShort(wordId + 2); // Skip id
+  public int getWordCost(int morphId) {
+    return buffer.getShort(morphId + 2); // Skip id
   }
 
   @Override
-  public String getBaseForm(int wordId, char[] surfaceForm, int off, int len) {
-    if (hasBaseFormData(wordId)) {
-      int offset = baseFormOffset(wordId);
+  public String getBaseForm(int morphId, char[] surfaceForm, int off, int len) {
+    if (hasBaseFormData(morphId)) {
+      int offset = baseFormOffset(morphId);
       int data = buffer.get(offset++) & 0xff;
       int prefix = data >>> 4;
       int suffix = data & 0xF;
@@ -166,9 +101,9 @@ public String getBaseForm(int wordId, char[] surfaceForm, int off, int len) {
   }
 
   @Override
-  public String getReading(int wordId, char[] surface, int off, int len) {
-    if (hasReadingData(wordId)) {
-      int offset = readingOffset(wordId);
+  public String getReading(int morphId, char[] surface, int off, int len) {
+    if (hasReadingData(morphId)) {
+      int offset = readingOffset(morphId);
       int readingData = buffer.get(offset++) & 0xff;
       return readString(offset, readingData >>> 1, (readingData & 1) == 1);
     } else {
@@ -187,24 +122,24 @@ public String getReading(int wordId, char[] surface, int off, int len) {
   }
 
   @Override
-  public String getPartOfSpeech(int wordId) {
-    return posDict[getLeftId(wordId)];
+  public String getPartOfSpeech(int morphId) {
+    return posDict[getLeftId(morphId)];
   }
 
   @Override
-  public String getPronunciation(int wordId, char[] surface, int off, int len) {
-    if (hasPronunciationData(wordId)) {
-      int offset = pronunciationOffset(wordId);
+  public String getPronunciation(int morphId, char[] surface, int off, int len) {
+    if (hasPronunciationData(morphId)) {
+      int offset = pronunciationOffset(morphId);
       int pronunciationData = buffer.get(offset++) & 0xff;
       return readString(offset, pronunciationData >>> 1, (pronunciationData & 1) == 1);
     } else {
-      return getReading(wordId, surface, off, len); // same as the reading
+      return getReading(morphId, surface, off, len); // same as the reading
     }
   }
 
   @Override
-  public String getInflectionType(int wordId) {
-    return inflTypeDict[getLeftId(wordId)];
+  public String getInflectionType(int morphId) {
+    return inflTypeDict[getLeftId(morphId)];
   }
 
   @Override
@@ -212,10 +147,6 @@ public String getInflectionForm(int wordId) {
     return inflFormDict[getLeftId(wordId)];
   }
 
-  private static int baseFormOffset(int wordId) {
-    return wordId + 4;
-  }
-
   private int readingOffset(int wordId) {
     int offset = baseFormOffset(wordId);
     if (hasBaseFormData(wordId)) {
@@ -242,6 +173,10 @@ private int pronunciationOffset(int wordId) {
     }
   }
 
+  private static int baseFormOffset(int wordId) {
+    return wordId + 4;
+  }
+
   private boolean hasBaseFormData(int wordId) {
     return (buffer.getShort(wordId) & HAS_BASEFORM) != 0;
   }
diff --git a/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/dict/UnknownDictionary.java b/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/dict/UnknownDictionary.java
index f9bdc62f49e4..b4a9012e3ee6 100644
--- a/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/dict/UnknownDictionary.java
+++ b/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/dict/UnknownDictionary.java
@@ -20,12 +20,15 @@
 import java.io.InputStream;
 import java.nio.file.Files;
 import java.nio.file.Path;
+import org.apache.lucene.analysis.morph.BinaryDictionary;
+import org.apache.lucene.util.IOSupplier;
 import org.apache.lucene.util.IOUtils;
 
 /** Dictionary for unknown-word handling. */
-public final class UnknownDictionary extends BinaryDictionary {
+public final class UnknownDictionary extends BinaryDictionary<UnknownMorphData> {
 
   private final CharacterDefinition characterDefinition = CharacterDefinition.getInstance();
+  private final UnknownMorphData morphAtts;
 
   /**
    * Create a {@link UnknownDictionary} from an external resource path.
@@ -36,25 +39,44 @@ public final class UnknownDictionary extends BinaryDictionary {
    * @throws IOException if resource was not found or broken
    */
   public UnknownDictionary(Path targetMapFile, Path posDictFile, Path dictFile) throws IOException {
-    super(
+    this(
         () -> Files.newInputStream(targetMapFile),
         () -> Files.newInputStream(posDictFile),
         () -> Files.newInputStream(dictFile));
   }
 
   private UnknownDictionary() throws IOException {
-    super(
+    this(
         () -> getClassResource(TARGETMAP_FILENAME_SUFFIX),
         () -> getClassResource(POSDICT_FILENAME_SUFFIX),
         () -> getClassResource(DICT_FILENAME_SUFFIX));
   }
 
+  private UnknownDictionary(
+      IOSupplier<InputStream> targetMapResource,
+      IOSupplier<InputStream> posResource,
+      IOSupplier<InputStream> dictResource)
+      throws IOException {
+    super(
+        targetMapResource,
+        dictResource,
+        DictionaryConstants.TARGETMAP_HEADER,
+        DictionaryConstants.DICT_HEADER,
+        DictionaryConstants.VERSION);
+    this.morphAtts = new UnknownMorphData(buffer, posResource);
+  }
+
   private static InputStream getClassResource(String suffix) throws IOException {
     final String resourcePath = UnknownDictionary.class.getSimpleName() + suffix;
     return IOUtils.requireResourceNonNull(
         UnknownDictionary.class.getResourceAsStream(resourcePath), resourcePath);
   }
 
+  @Override
+  public UnknownMorphData getMorphAttributes() {
+    return morphAtts;
+  }
+
   public int lookup(char[] text, int offset, int len) {
     if (!characterDefinition.isGroup(text[offset])) {
       return 1;
@@ -79,21 +101,6 @@ public CharacterDefinition getCharacterDefinition() {
     return characterDefinition;
   }
 
-  @Override
-  public String getReading(int wordId, char[] surface, int off, int len) {
-    return null;
-  }
-
-  @Override
-  public String getInflectionType(int wordId) {
-    return null;
-  }
-
-  @Override
-  public String getInflectionForm(int wordId) {
-    return null;
-  }
-
   public static UnknownDictionary getInstance() {
     return SingletonHolder.INSTANCE;
   }
diff --git a/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/dict/UnknownMorphData.java b/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/dict/UnknownMorphData.java
new file mode 100644
index 000000000000..0810346e2183
--- /dev/null
+++ b/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/dict/UnknownMorphData.java
@@ -0,0 +1,44 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.analysis.ja.dict;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.nio.ByteBuffer;
+import org.apache.lucene.util.IOSupplier;
+
+/** Morphological information for unk dictionary. */
+final class UnknownMorphData extends TokenInfoMorphData {
+  UnknownMorphData(ByteBuffer buffer, IOSupplier<InputStream> posResource) throws IOException {
+    super(buffer, posResource);
+  }
+
+  @Override
+  public String getReading(int morphId, char[] surface, int off, int len) {
+    return null;
+  }
+
+  @Override
+  public String getInflectionType(int morphId) {
+    return null;
+  }
+
+  @Override
+  public String getInflectionForm(int wordId) {
+    return null;
+  }
+}
diff --git a/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/dict/UserDictionary.java b/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/dict/UserDictionary.java
index 859806e5ef00..12dd27d95845 100644
--- a/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/dict/UserDictionary.java
+++ b/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/dict/UserDictionary.java
@@ -26,13 +26,16 @@
 import java.util.Map;
 import java.util.TreeMap;
 import org.apache.lucene.analysis.ja.util.CSVUtil;
+import org.apache.lucene.analysis.morph.Dictionary;
 import org.apache.lucene.util.IntsRefBuilder;
 import org.apache.lucene.util.fst.FST;
 import org.apache.lucene.util.fst.FSTCompiler;
 import org.apache.lucene.util.fst.PositiveIntOutputs;
 
 /** Class for building a User Dictionary. This class allows for custom segmentation of phrases. */
-public final class UserDictionary implements Dictionary {
+public final class UserDictionary implements Dictionary<UserMorphData> {
+
+  public static final String INTERNAL_SEPARATOR = "\u0000";
 
   // phrase text -> phrase ID
   private final TokenInfoFST fst;
@@ -41,15 +44,9 @@ public final class UserDictionary implements Dictionary {
   private final int[][] segmentations;
 
   // holds readings and POS, indexed by wordid
-  private final String[] data;
-
-  private static final int CUSTOM_DICTIONARY_WORD_ID_OFFSET = 100000000;
-
-  public static final int WORD_COST = -100000;
+  private final UserMorphData morphAtts;
 
-  public static final int LEFT_ID = 5;
-
-  public static final int RIGHT_ID = 5;
+  static final int CUSTOM_DICTIONARY_WORD_ID_OFFSET = 100000000;
 
   public static UserDictionary open(Reader reader) throws IOException {
 
@@ -150,10 +147,15 @@ public int compare(String[] left, String[] right) {
       ord++;
     }
     this.fst = new TokenInfoFST(fstCompiler.compile(), false);
-    this.data = data.toArray(new String[data.size()]);
+    this.morphAtts = new UserMorphData(data.toArray(new String[0]));
     this.segmentations = segmentations.toArray(new int[segmentations.size()][]);
   }
 
+  @Override
+  public UserMorphData getMorphAttributes() {
+    return morphAtts;
+  }
+
   /**
    * Lookup words in text
    *
@@ -222,78 +224,4 @@ private int[][] toIndexArray(Map<Integer, int[]> input) {
   public int[] lookupSegmentation(int phraseID) {
     return segmentations[phraseID];
   }
-
-  @Override
-  public int getLeftId(int wordId) {
-    return LEFT_ID;
-  }
-
-  @Override
-  public int getRightId(int wordId) {
-    return RIGHT_ID;
-  }
-
-  @Override
-  public int getWordCost(int wordId) {
-    return WORD_COST;
-  }
-
-  @Override
-  public String getReading(int wordId, char[] surface, int off, int len) {
-    return getFeature(wordId, 0);
-  }
-
-  @Override
-  public String getPartOfSpeech(int wordId) {
-    return getFeature(wordId, 1);
-  }
-
-  @Override
-  public String getBaseForm(int wordId, char[] surface, int off, int len) {
-    return null; // TODO: add support?
-  }
-
-  @Override
-  public String getPronunciation(int wordId, char[] surface, int off, int len) {
-    return null; // TODO: add support?
-  }
-
-  @Override
-  public String getInflectionType(int wordId) {
-    return null; // TODO: add support?
-  }
-
-  @Override
-  public String getInflectionForm(int wordId) {
-    return null; // TODO: add support?
-  }
-
-  private String[] getAllFeaturesArray(int wordId) {
-    String allFeatures = data[wordId - CUSTOM_DICTIONARY_WORD_ID_OFFSET];
-    if (allFeatures == null) {
-      return null;
-    }
-
-    return allFeatures.split(INTERNAL_SEPARATOR);
-  }
-
-  private String getFeature(int wordId, int... fields) {
-    String[] allFeatures = getAllFeaturesArray(wordId);
-    if (allFeatures == null) {
-      return null;
-    }
-    StringBuilder sb = new StringBuilder();
-    if (fields.length == 0) { // All features
-      for (String feature : allFeatures) {
-        sb.append(CSVUtil.quoteEscape(feature)).append(",");
-      }
-    } else if (fields.length == 1) { // One feature doesn't need to escape value
-      sb.append(allFeatures[fields[0]]).append(",");
-    } else {
-      for (int field : fields) {
-        sb.append(CSVUtil.quoteEscape(allFeatures[field])).append(",");
-      }
-    }
-    return sb.deleteCharAt(sb.length() - 1).toString();
-  }
 }
diff --git a/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/dict/UserMorphData.java b/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/dict/UserMorphData.java
new file mode 100644
index 000000000000..1f6907052e0f
--- /dev/null
+++ b/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/dict/UserMorphData.java
@@ -0,0 +1,110 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.analysis.ja.dict;
+
+import static org.apache.lucene.analysis.ja.dict.UserDictionary.CUSTOM_DICTIONARY_WORD_ID_OFFSET;
+import static org.apache.lucene.analysis.ja.dict.UserDictionary.INTERNAL_SEPARATOR;
+
+import org.apache.lucene.analysis.ja.util.CSVUtil;
+
+/** Morphological information for user dictionary. */
+final class UserMorphData implements JaMorphData {
+  public static final int WORD_COST = -100000;
+  public static final int LEFT_ID = 5;
+  public static final int RIGHT_ID = 5;
+
+  // holds readings and POS, indexed by wordid
+  private final String[] data;
+
+  UserMorphData(String[] data) {
+    this.data = data;
+  }
+
+  @Override
+  public int getLeftId(int wordId) {
+    return LEFT_ID;
+  }
+
+  @Override
+  public int getRightId(int wordId) {
+    return RIGHT_ID;
+  }
+
+  @Override
+  public int getWordCost(int wordId) {
+    return WORD_COST;
+  }
+
+  @Override
+  public String getReading(int morphId, char[] surface, int off, int len) {
+    return getFeature(morphId, 0);
+  }
+
+  @Override
+  public String getPartOfSpeech(int morphId) {
+    return getFeature(morphId, 1);
+  }
+
+  @Override
+  public String getBaseForm(int morphId, char[] surface, int off, int len) {
+    return null; // TODO: add support?
+  }
+
+  @Override
+  public String getPronunciation(int morphId, char[] surface, int off, int len) {
+    return null; // TODO: add support?
+  }
+
+  @Override
+  public String getInflectionType(int morphId) {
+    return null; // TODO: add support?
+  }
+
+  @Override
+  public String getInflectionForm(int wordId) {
+    return null; // TODO: add support?
+  }
+
+  private String[] getAllFeaturesArray(int wordId) {
+    String allFeatures = data[wordId - CUSTOM_DICTIONARY_WORD_ID_OFFSET];
+    if (allFeatures == null) {
+      return null;
+    }
+
+    return allFeatures.split(INTERNAL_SEPARATOR);
+  }
+
+  private String getFeature(int wordId, int... fields) {
+    String[] allFeatures = getAllFeaturesArray(wordId);
+    if (allFeatures == null) {
+      return null;
+    }
+    StringBuilder sb = new StringBuilder();
+    if (fields.length == 0) { // All features
+      for (String feature : allFeatures) {
+        sb.append(CSVUtil.quoteEscape(feature)).append(",");
+      }
+    } else if (fields.length == 1) { // One feature doesn't need to escape value
+      sb.append(allFeatures[fields[0]]).append(",");
+    } else {
+      for (int field : fields) {
+        sb.append(CSVUtil.quoteEscape(allFeatures[field])).append(",");
+      }
+    }
+    return sb.deleteCharAt(sb.length() - 1).toString();
+  }
+}
diff --git a/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/util/BinaryDictionaryWriter.java b/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/util/BinaryDictionaryWriter.java
deleted file mode 100644
index bf157b9abb25..000000000000
--- a/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/util/BinaryDictionaryWriter.java
+++ /dev/null
@@ -1,334 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.lucene.analysis.ja.util;
-
-import java.io.BufferedOutputStream;
-import java.io.IOException;
-import java.io.OutputStream;
-import java.nio.ByteBuffer;
-import java.nio.channels.Channels;
-import java.nio.channels.WritableByteChannel;
-import java.nio.file.Files;
-import java.nio.file.Path;
-import java.util.ArrayList;
-import org.apache.lucene.analysis.ja.dict.BinaryDictionary;
-import org.apache.lucene.codecs.CodecUtil;
-import org.apache.lucene.store.DataOutput;
-import org.apache.lucene.store.OutputStreamDataOutput;
-import org.apache.lucene.util.ArrayUtil;
-
-abstract class BinaryDictionaryWriter {
-  private static final int ID_LIMIT = 8192;
-
-  private final Class<? extends BinaryDictionary> implClazz;
-  protected ByteBuffer buffer;
-  private int targetMapEndOffset = 0, lastWordId = -1, lastSourceId = -1;
-  private int[] targetMap = new int[8192];
-  private int[] targetMapOffsets = new int[8192];
-  private final ArrayList<String> posDict = new ArrayList<>();
-
-  BinaryDictionaryWriter(Class<? extends BinaryDictionary> implClazz, int size) {
-    this.implClazz = implClazz;
-    buffer = ByteBuffer.allocateDirect(size);
-  }
-
-  /**
-   * put the entry in map
-   *
-   * @return current position of buffer, which will be wordId of next entry
-   */
-  public int put(String[] entry) {
-    short leftId = Short.parseShort(entry[1]);
-    short rightId = Short.parseShort(entry[2]);
-    short wordCost = Short.parseShort(entry[3]);
-
-    StringBuilder sb = new StringBuilder();
-
-    // build up the POS string
-    for (int i = 4; i < 8; i++) {
-      String part = entry[i];
-      assert part.length() > 0;
-      if (!"*".equals(part)) {
-        if (sb.length() > 0) {
-          sb.append('-');
-        }
-        sb.append(part);
-      }
-    }
-
-    String posData = sb.toString();
-    if (posData.isEmpty()) {
-      throw new IllegalArgumentException("POS fields are empty");
-    }
-    sb.setLength(0);
-    sb.append(CSVUtil.quoteEscape(posData));
-    sb.append(',');
-    if (!"*".equals(entry[8])) {
-      sb.append(CSVUtil.quoteEscape(entry[8]));
-    }
-    sb.append(',');
-    if (!"*".equals(entry[9])) {
-      sb.append(CSVUtil.quoteEscape(entry[9]));
-    }
-    String fullPOSData = sb.toString();
-
-    String baseForm = entry[10];
-    String reading = entry[11];
-    String pronunciation = entry[12];
-
-    // extend buffer if necessary
-    int left = buffer.remaining();
-    // worst case: two short, 3 bytes, and features (all as utf-16)
-    int worstCase = 4 + 3 + 2 * (baseForm.length() + reading.length() + pronunciation.length());
-    if (worstCase > left) {
-      ByteBuffer newBuffer =
-          ByteBuffer.allocateDirect(ArrayUtil.oversize(buffer.limit() + worstCase - left, 1));
-      buffer.flip();
-      newBuffer.put(buffer);
-      buffer = newBuffer;
-    }
-
-    int flags = 0;
-    if (baseForm.isEmpty()) {
-      throw new IllegalArgumentException("base form is empty");
-    }
-    if (!("*".equals(baseForm) || baseForm.equals(entry[0]))) {
-      flags |= BinaryDictionary.HAS_BASEFORM;
-    }
-    if (!reading.equals(toKatakana(entry[0]))) {
-      flags |= BinaryDictionary.HAS_READING;
-    }
-    if (!pronunciation.equals(reading)) {
-      flags |= BinaryDictionary.HAS_PRONUNCIATION;
-    }
-
-    if (leftId != rightId) {
-      throw new IllegalArgumentException("rightId != leftId: " + rightId + " " + leftId);
-    }
-    if (leftId >= ID_LIMIT) {
-      throw new IllegalArgumentException("leftId >= " + ID_LIMIT + ": " + leftId);
-    }
-    // add pos mapping
-    int toFill = 1 + leftId - posDict.size();
-    for (int i = 0; i < toFill; i++) {
-      posDict.add(null);
-    }
-
-    String existing = posDict.get(leftId);
-    if (existing != null && existing.equals(fullPOSData) == false) {
-      // TODO: test me
-      throw new IllegalArgumentException("Multiple entries found for leftID=" + leftId);
-    }
-    posDict.set(leftId, fullPOSData);
-
-    buffer.putShort((short) (leftId << 3 | flags));
-    buffer.putShort(wordCost);
-
-    if ((flags & BinaryDictionary.HAS_BASEFORM) != 0) {
-      if (baseForm.length() >= 16) {
-        throw new IllegalArgumentException("Length of base form " + baseForm + " is >= 16");
-      }
-      int shared = sharedPrefix(entry[0], baseForm);
-      int suffix = baseForm.length() - shared;
-      buffer.put((byte) (shared << 4 | suffix));
-      for (int i = shared; i < baseForm.length(); i++) {
-        buffer.putChar(baseForm.charAt(i));
-      }
-    }
-
-    if ((flags & BinaryDictionary.HAS_READING) != 0) {
-      if (isKatakana(reading)) {
-        buffer.put((byte) (reading.length() << 1 | 1));
-        writeKatakana(reading);
-      } else {
-        buffer.put((byte) (reading.length() << 1));
-        for (int i = 0; i < reading.length(); i++) {
-          buffer.putChar(reading.charAt(i));
-        }
-      }
-    }
-
-    if ((flags & BinaryDictionary.HAS_PRONUNCIATION) != 0) {
-      // we can save 150KB here, but it makes the reader a little complicated.
-      // int shared = sharedPrefix(reading, pronunciation);
-      // buffer.put((byte) shared);
-      // pronunciation = pronunciation.substring(shared);
-      if (isKatakana(pronunciation)) {
-        buffer.put((byte) (pronunciation.length() << 1 | 1));
-        writeKatakana(pronunciation);
-      } else {
-        buffer.put((byte) (pronunciation.length() << 1));
-        for (int i = 0; i < pronunciation.length(); i++) {
-          buffer.putChar(pronunciation.charAt(i));
-        }
-      }
-    }
-
-    return buffer.position();
-  }
-
-  private boolean isKatakana(String s) {
-    for (int i = 0; i < s.length(); i++) {
-      char ch = s.charAt(i);
-      if (ch < 0x30A0 || ch > 0x30FF) {
-        return false;
-      }
-    }
-    return true;
-  }
-
-  private void writeKatakana(String s) {
-    for (int i = 0; i < s.length(); i++) {
-      buffer.put((byte) (s.charAt(i) - 0x30A0));
-    }
-  }
-
-  private String toKatakana(String s) {
-    char[] text = new char[s.length()];
-    for (int i = 0; i < s.length(); i++) {
-      char ch = s.charAt(i);
-      if (ch > 0x3040 && ch < 0x3097) {
-        text[i] = (char) (ch + 0x60);
-      } else {
-        text[i] = ch;
-      }
-    }
-    return new String(text);
-  }
-
-  private static int sharedPrefix(String left, String right) {
-    int len = left.length() < right.length() ? left.length() : right.length();
-    for (int i = 0; i < len; i++) if (left.charAt(i) != right.charAt(i)) return i;
-    return len;
-  }
-
-  void addMapping(int sourceId, int wordId) {
-    if (wordId <= lastWordId) {
-      throw new IllegalStateException(
-          "words out of order: " + wordId + " vs lastID: " + lastWordId);
-    }
-
-    if (sourceId > lastSourceId) {
-      targetMapOffsets = ArrayUtil.grow(targetMapOffsets, sourceId + 1);
-      for (int i = lastSourceId + 1; i <= sourceId; i++) {
-        targetMapOffsets[i] = targetMapEndOffset;
-      }
-    } else if (sourceId != lastSourceId) {
-      throw new IllegalStateException(
-          "source ids not in increasing order: lastSourceId="
-              + lastSourceId
-              + " vs sourceId="
-              + sourceId);
-    }
-
-    targetMap = ArrayUtil.grow(targetMap, targetMapEndOffset + 1);
-    targetMap[targetMapEndOffset] = wordId;
-    targetMapEndOffset++;
-
-    lastSourceId = sourceId;
-    lastWordId = wordId;
-  }
-
-  final String getBaseFileName() {
-    return implClazz.getName().replace('.', '/');
-  }
-
-  /**
-   * Write dictionary in file Dictionary format is: [Size of dictionary(int)], [entry:{left
-   * id(short)}{right id(short)}{word cost(short)}{length of pos info(short)}{pos info(char)}],
-   * [entry...], [entry...].....
-   *
-   * @throws IOException if an I/O error occurs writing the dictionary files
-   */
-  public void write(Path baseDir) throws IOException {
-    final String baseName = getBaseFileName();
-    writeDictionary(baseDir.resolve(baseName + BinaryDictionary.DICT_FILENAME_SUFFIX));
-    writeTargetMap(baseDir.resolve(baseName + BinaryDictionary.TARGETMAP_FILENAME_SUFFIX));
-    writePosDict(baseDir.resolve(baseName + BinaryDictionary.POSDICT_FILENAME_SUFFIX));
-  }
-
-  // TODO: maybe this int[] should instead be the output to the FST...
-  private void writeTargetMap(Path path) throws IOException {
-    Files.createDirectories(path.getParent());
-    try (OutputStream os = Files.newOutputStream(path);
-        OutputStream bos = new BufferedOutputStream(os)) {
-      final DataOutput out = new OutputStreamDataOutput(bos);
-      CodecUtil.writeHeader(out, BinaryDictionary.TARGETMAP_HEADER, BinaryDictionary.VERSION);
-
-      final int numSourceIds = lastSourceId + 1;
-      out.writeVInt(targetMapEndOffset); // <-- size of main array
-      out.writeVInt(numSourceIds + 1); // <-- size of offset array (+ 1 more entry)
-      int prev = 0, sourceId = 0;
-      for (int ofs = 0; ofs < targetMapEndOffset; ofs++) {
-        final int val = targetMap[ofs], delta = val - prev;
-        assert delta >= 0;
-        if (ofs == targetMapOffsets[sourceId]) {
-          out.writeVInt((delta << 1) | 0x01);
-          sourceId++;
-        } else {
-          out.writeVInt((delta << 1));
-        }
-        prev += delta;
-      }
-      if (sourceId != numSourceIds) {
-        throw new IllegalStateException(
-            "sourceId:" + sourceId + " != numSourceIds:" + numSourceIds);
-      }
-    }
-  }
-
-  private void writePosDict(Path path) throws IOException {
-    Files.createDirectories(path.getParent());
-    try (OutputStream os = Files.newOutputStream(path);
-        OutputStream bos = new BufferedOutputStream(os)) {
-      final DataOutput out = new OutputStreamDataOutput(bos);
-      CodecUtil.writeHeader(out, BinaryDictionary.POSDICT_HEADER, BinaryDictionary.VERSION);
-      out.writeVInt(posDict.size());
-      for (String s : posDict) {
-        if (s == null) {
-          out.writeByte((byte) 0);
-          out.writeByte((byte) 0);
-          out.writeByte((byte) 0);
-        } else {
-          String[] data = CSVUtil.parse(s);
-          if (data.length != 3) {
-            throw new IllegalArgumentException(
-                "Malformed pos/inflection: " + s + "; expected 3 characters");
-          }
-          out.writeString(data[0]);
-          out.writeString(data[1]);
-          out.writeString(data[2]);
-        }
-      }
-    }
-  }
-
-  private void writeDictionary(Path path) throws IOException {
-    Files.createDirectories(path.getParent());
-    try (OutputStream os = Files.newOutputStream(path);
-        OutputStream bos = new BufferedOutputStream(os)) {
-      final DataOutput out = new OutputStreamDataOutput(bos);
-      CodecUtil.writeHeader(out, BinaryDictionary.DICT_HEADER, BinaryDictionary.VERSION);
-      out.writeVInt(buffer.position());
-      final WritableByteChannel channel = Channels.newChannel(bos);
-      // Write Buffer
-      buffer.flip(); // set position to 0, set limit to current position
-      channel.write(buffer);
-      assert buffer.remaining() == 0L;
-    }
-  }
-}
diff --git a/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/util/CharacterDefinitionWriter.java b/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/util/CharacterDefinitionWriter.java
deleted file mode 100644
index 0afadeda83df..000000000000
--- a/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/util/CharacterDefinitionWriter.java
+++ /dev/null
@@ -1,83 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.lucene.analysis.ja.util;
-
-import java.io.BufferedOutputStream;
-import java.io.IOException;
-import java.io.OutputStream;
-import java.nio.file.Files;
-import java.nio.file.Path;
-import java.util.Arrays;
-import org.apache.lucene.analysis.ja.dict.CharacterDefinition;
-import org.apache.lucene.codecs.CodecUtil;
-import org.apache.lucene.store.DataOutput;
-import org.apache.lucene.store.OutputStreamDataOutput;
-
-final class CharacterDefinitionWriter {
-
-  private final byte[] characterCategoryMap = new byte[0x10000];
-
-  private final boolean[] invokeMap = new boolean[CharacterDefinition.CLASS_COUNT];
-  private final boolean[] groupMap = new boolean[CharacterDefinition.CLASS_COUNT];
-
-  /** Constructor for building. TODO: remove write access */
-  CharacterDefinitionWriter() {
-    Arrays.fill(characterCategoryMap, CharacterDefinition.DEFAULT);
-  }
-
-  /**
-   * Put mapping from unicode code point to character class.
-   *
-   * @param codePoint code point
-   * @param characterClassName character class name
-   */
-  void putCharacterCategory(int codePoint, String characterClassName) {
-    characterClassName = characterClassName.split(" ")[0]; // use first
-    // category
-    // class
-
-    // Override Nakaguro
-    if (codePoint == 0x30FB) {
-      characterClassName = "SYMBOL";
-    }
-    characterCategoryMap[codePoint] = CharacterDefinition.lookupCharacterClass(characterClassName);
-  }
-
-  void putInvokeDefinition(String characterClassName, int invoke, int group, int length) {
-    final byte characterClass = CharacterDefinition.lookupCharacterClass(characterClassName);
-    invokeMap[characterClass] = invoke == 1;
-    groupMap[characterClass] = group == 1;
-    // TODO: length def ignored
-  }
-
-  public void write(Path baseDir) throws IOException {
-    Path path =
-        baseDir.resolve(
-            CharacterDefinition.class.getName().replace('.', '/')
-                + CharacterDefinition.FILENAME_SUFFIX);
-    Files.createDirectories(path.getParent());
-    try (OutputStream os = new BufferedOutputStream(Files.newOutputStream(path))) {
-      final DataOutput out = new OutputStreamDataOutput(os);
-      CodecUtil.writeHeader(out, CharacterDefinition.HEADER, CharacterDefinition.VERSION);
-      out.writeBytes(characterCategoryMap, 0, characterCategoryMap.length);
-      for (int i = 0; i < CharacterDefinition.CLASS_COUNT; i++) {
-        final byte b = (byte) ((invokeMap[i] ? 0x01 : 0x00) | (groupMap[i] ? 0x02 : 0x00));
-        out.writeByte(b);
-      }
-    }
-  }
-}
diff --git a/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/util/ConnectionCostsBuilder.java b/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/util/ConnectionCostsBuilder.java
index 8b518ee991a0..bfd8dd0d17c9 100644
--- a/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/util/ConnectionCostsBuilder.java
+++ b/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/util/ConnectionCostsBuilder.java
@@ -22,12 +22,14 @@
 import java.nio.charset.StandardCharsets;
 import java.nio.file.Files;
 import java.nio.file.Path;
+import org.apache.lucene.analysis.ja.dict.ConnectionCosts;
+import org.apache.lucene.analysis.morph.ConnectionCostsWriter;
 
 class ConnectionCostsBuilder {
 
   private ConnectionCostsBuilder() {}
 
-  public static ConnectionCostsWriter build(Path path) throws IOException {
+  public static ConnectionCostsWriter<ConnectionCosts> build(Path path) throws IOException {
     try (Reader reader = Files.newBufferedReader(path, StandardCharsets.US_ASCII);
         LineNumberReader lineReader = new LineNumberReader(reader)) {
 
@@ -41,7 +43,8 @@ public static ConnectionCostsWriter build(Path path) throws IOException {
 
       assert forwardSize > 0 && backwardSize > 0;
 
-      ConnectionCostsWriter costs = new ConnectionCostsWriter(forwardSize, backwardSize);
+      ConnectionCostsWriter<ConnectionCosts> costs =
+          new ConnectionCostsWriter<>(ConnectionCosts.class, forwardSize, backwardSize);
 
       while ((line = lineReader.readLine()) != null) {
         String[] fields = line.split("\\s+");
diff --git a/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/util/DictionaryBuilder.java b/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/util/DictionaryBuilder.java
index 72920baf9840..d547dcb122d9 100644
--- a/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/util/DictionaryBuilder.java
+++ b/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/util/DictionaryBuilder.java
@@ -20,6 +20,7 @@
 import java.nio.file.Path;
 import java.nio.file.Paths;
 import java.util.Locale;
+import org.apache.lucene.analysis.ja.dict.DictionaryConstants;
 
 /**
  * Tool to build dictionaries. Usage:
@@ -68,7 +69,8 @@ public static void build(
 
     new UnknownDictionaryBuilder(encoding).build(inputDir).write(outputDir);
 
-    ConnectionCostsBuilder.build(inputDir.resolve("matrix.def")).write(outputDir);
+    ConnectionCostsBuilder.build(inputDir.resolve("matrix.def"))
+        .write(outputDir, DictionaryConstants.CONN_COSTS_HEADER, DictionaryConstants.VERSION);
   }
 
   public static void main(String[] args) throws IOException {
diff --git a/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/util/TokenInfoDictionaryEntryWriter.java b/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/util/TokenInfoDictionaryEntryWriter.java
new file mode 100644
index 000000000000..618047825f2e
--- /dev/null
+++ b/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/util/TokenInfoDictionaryEntryWriter.java
@@ -0,0 +1,237 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.analysis.ja.util;
+
+import java.io.IOException;
+import java.io.OutputStream;
+import java.nio.ByteBuffer;
+import org.apache.lucene.analysis.ja.dict.TokenInfoMorphData;
+import org.apache.lucene.analysis.morph.DictionaryEntryWriter;
+import org.apache.lucene.store.DataOutput;
+import org.apache.lucene.util.ArrayUtil;
+
+/** Writes system dictionary entries */
+class TokenInfoDictionaryEntryWriter extends DictionaryEntryWriter {
+  private static final int ID_LIMIT = 8192;
+
+  TokenInfoDictionaryEntryWriter(int size) {
+    super(size);
+  }
+
+  /**
+   * put the entry in map
+   *
+   * <p>mecab-ipadic features
+   *
+   * <pre>
+   * 0   - surface
+   * 1   - left cost
+   * 2   - right cost
+   * 3   - word cost
+   * 4-9 - pos
+   * 10  - base form
+   * 11  - reading
+   * 12  - pronounciation
+   * </pre>
+   */
+  @Override
+  protected int putEntry(String[] entry) {
+    short leftId = Short.parseShort(entry[1]);
+    short rightId = Short.parseShort(entry[2]);
+    short wordCost = Short.parseShort(entry[3]);
+
+    StringBuilder sb = new StringBuilder();
+
+    // build up the POS string
+    for (int i = 4; i < 8; i++) {
+      String part = entry[i];
+      assert part.length() > 0;
+      if (!"*".equals(part)) {
+        if (sb.length() > 0) {
+          sb.append('-');
+        }
+        sb.append(part);
+      }
+    }
+
+    String posData = sb.toString();
+    if (posData.isEmpty()) {
+      throw new IllegalArgumentException("POS fields are empty");
+    }
+    sb.setLength(0);
+    sb.append(CSVUtil.quoteEscape(posData));
+    sb.append(',');
+    if (!"*".equals(entry[8])) {
+      sb.append(CSVUtil.quoteEscape(entry[8]));
+    }
+    sb.append(',');
+    if (!"*".equals(entry[9])) {
+      sb.append(CSVUtil.quoteEscape(entry[9]));
+    }
+    String fullPOSData = sb.toString();
+
+    String baseForm = entry[10];
+    String reading = entry[11];
+    String pronunciation = entry[12];
+
+    // extend buffer if necessary
+    int left = buffer.remaining();
+    // worst case: two short, 3 bytes, and features (all as utf-16)
+    int worstCase = 4 + 3 + 2 * (baseForm.length() + reading.length() + pronunciation.length());
+    if (worstCase > left) {
+      ByteBuffer newBuffer =
+          ByteBuffer.allocateDirect(ArrayUtil.oversize(buffer.limit() + worstCase - left, 1));
+      buffer.flip();
+      newBuffer.put(buffer);
+      buffer = newBuffer;
+    }
+
+    int flags = 0;
+    if (baseForm.isEmpty()) {
+      throw new IllegalArgumentException("base form is empty");
+    }
+    if (!("*".equals(baseForm) || baseForm.equals(entry[0]))) {
+      flags |= TokenInfoMorphData.HAS_BASEFORM;
+    }
+    if (!reading.equals(toKatakana(entry[0]))) {
+      flags |= TokenInfoMorphData.HAS_READING;
+    }
+    if (!pronunciation.equals(reading)) {
+      flags |= TokenInfoMorphData.HAS_PRONUNCIATION;
+    }
+
+    if (leftId != rightId) {
+      throw new IllegalArgumentException("rightId != leftId: " + rightId + " " + leftId);
+    }
+    if (leftId >= ID_LIMIT) {
+      throw new IllegalArgumentException("leftId >= " + ID_LIMIT + ": " + leftId);
+    }
+    // add pos mapping
+    int toFill = 1 + leftId - posDict.size();
+    for (int i = 0; i < toFill; i++) {
+      posDict.add(null);
+    }
+
+    String existing = posDict.get(leftId);
+    if (existing != null && existing.equals(fullPOSData) == false) {
+      // TODO: test me
+      throw new IllegalArgumentException("Multiple entries found for leftID=" + leftId);
+    }
+    posDict.set(leftId, fullPOSData);
+
+    buffer.putShort((short) (leftId << 3 | flags));
+    buffer.putShort(wordCost);
+
+    if ((flags & TokenInfoMorphData.HAS_BASEFORM) != 0) {
+      if (baseForm.length() >= 16) {
+        throw new IllegalArgumentException("Length of base form " + baseForm + " is >= 16");
+      }
+      int shared = sharedPrefix(entry[0], baseForm);
+      int suffix = baseForm.length() - shared;
+      buffer.put((byte) (shared << 4 | suffix));
+      for (int i = shared; i < baseForm.length(); i++) {
+        buffer.putChar(baseForm.charAt(i));
+      }
+    }
+
+    if ((flags & TokenInfoMorphData.HAS_READING) != 0) {
+      if (isKatakana(reading)) {
+        buffer.put((byte) (reading.length() << 1 | 1));
+        writeKatakana(reading, buffer);
+      } else {
+        buffer.put((byte) (reading.length() << 1));
+        for (int i = 0; i < reading.length(); i++) {
+          buffer.putChar(reading.charAt(i));
+        }
+      }
+    }
+
+    if ((flags & TokenInfoMorphData.HAS_PRONUNCIATION) != 0) {
+      // we can save 150KB here, but it makes the reader a little complicated.
+      // int shared = sharedPrefix(reading, pronunciation);
+      // buffer.put((byte) shared);
+      // pronunciation = pronunciation.substring(shared);
+      if (isKatakana(pronunciation)) {
+        buffer.put((byte) (pronunciation.length() << 1 | 1));
+        writeKatakana(pronunciation, buffer);
+      } else {
+        buffer.put((byte) (pronunciation.length() << 1));
+        for (int i = 0; i < pronunciation.length(); i++) {
+          buffer.putChar(pronunciation.charAt(i));
+        }
+      }
+    }
+
+    return buffer.position();
+  }
+
+  private boolean isKatakana(String s) {
+    for (int i = 0; i < s.length(); i++) {
+      char ch = s.charAt(i);
+      if (ch < 0x30A0 || ch > 0x30FF) {
+        return false;
+      }
+    }
+    return true;
+  }
+
+  private void writeKatakana(String s, ByteBuffer buffer) {
+    for (int i = 0; i < s.length(); i++) {
+      buffer.put((byte) (s.charAt(i) - 0x30A0));
+    }
+  }
+
+  private String toKatakana(String s) {
+    char[] text = new char[s.length()];
+    for (int i = 0; i < s.length(); i++) {
+      char ch = s.charAt(i);
+      if (ch > 0x3040 && ch < 0x3097) {
+        text[i] = (char) (ch + 0x60);
+      } else {
+        text[i] = ch;
+      }
+    }
+    return new String(text);
+  }
+
+  private static int sharedPrefix(String left, String right) {
+    int len = left.length() < right.length() ? left.length() : right.length();
+    for (int i = 0; i < len; i++) if (left.charAt(i) != right.charAt(i)) return i;
+    return len;
+  }
+
+  @Override
+  protected void writePosDict(OutputStream bos, DataOutput out) throws IOException {
+    out.writeVInt(posDict.size());
+    for (String s : posDict) {
+      if (s == null) {
+        out.writeByte((byte) 0);
+        out.writeByte((byte) 0);
+        out.writeByte((byte) 0);
+      } else {
+        String[] data = CSVUtil.parse(s);
+        if (data.length != 3) {
+          throw new IllegalArgumentException(
+              "Malformed pos/inflection: " + s + "; expected 3 characters");
+        }
+        out.writeString(data[0]);
+        out.writeString(data[1]);
+        out.writeString(data[2]);
+      }
+    }
+  }
+}
diff --git a/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/util/TokenInfoDictionaryWriter.java b/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/util/TokenInfoDictionaryWriter.java
index 400c834cadfc..fbdf65f34ce7 100644
--- a/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/util/TokenInfoDictionaryWriter.java
+++ b/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/util/TokenInfoDictionaryWriter.java
@@ -20,14 +20,16 @@
 import java.nio.file.Files;
 import java.nio.file.Path;
 import java.util.Objects;
+import org.apache.lucene.analysis.ja.dict.DictionaryConstants;
 import org.apache.lucene.analysis.ja.dict.TokenInfoDictionary;
 import org.apache.lucene.util.fst.FST;
 
-class TokenInfoDictionaryWriter extends BinaryDictionaryWriter {
+class TokenInfoDictionaryWriter
+    extends org.apache.lucene.analysis.morph.BinaryDictionaryWriter<TokenInfoDictionary> {
   private FST<Long> fst;
 
   TokenInfoDictionaryWriter(int size) {
-    super(TokenInfoDictionary.class, size);
+    super(TokenInfoDictionary.class, new TokenInfoDictionaryEntryWriter(size));
   }
 
   public void setFST(FST<Long> fst) {
@@ -35,9 +37,19 @@ public void setFST(FST<Long> fst) {
     this.fst = fst;
   }
 
+  @Override
+  protected void addMapping(int sourceId, int wordId) {
+    super.addMapping(sourceId, wordId);
+  }
+
   @Override
   public void write(Path baseDir) throws IOException {
-    super.write(baseDir);
+    super.write(
+        baseDir,
+        DictionaryConstants.TARGETMAP_HEADER,
+        DictionaryConstants.POSDICT_HEADER,
+        DictionaryConstants.DICT_HEADER,
+        DictionaryConstants.VERSION);
     writeFST(baseDir.resolve(getBaseFileName() + TokenInfoDictionary.FST_FILENAME_SUFFIX));
   }
 
diff --git a/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/util/UnknownDictionaryWriter.java b/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/util/UnknownDictionaryWriter.java
index 6d80f513feca..84000fdf0de3 100644
--- a/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/util/UnknownDictionaryWriter.java
+++ b/lucene/analysis/kuromoji/src/java/org/apache/lucene/analysis/ja/util/UnknownDictionaryWriter.java
@@ -19,19 +19,28 @@
 import java.io.IOException;
 import java.nio.file.Path;
 import org.apache.lucene.analysis.ja.dict.CharacterDefinition;
+import org.apache.lucene.analysis.ja.dict.DictionaryConstants;
 import org.apache.lucene.analysis.ja.dict.UnknownDictionary;
+import org.apache.lucene.analysis.morph.BinaryDictionaryWriter;
+import org.apache.lucene.analysis.morph.CharacterDefinitionWriter;
 
-class UnknownDictionaryWriter extends BinaryDictionaryWriter {
-  private final CharacterDefinitionWriter characterDefinition = new CharacterDefinitionWriter();
+class UnknownDictionaryWriter extends BinaryDictionaryWriter<UnknownDictionary> {
+  private final CharacterDefinitionWriter<CharacterDefinition> characterDefinition =
+      new CharacterDefinitionWriter<>(
+          CharacterDefinition.class,
+          CharacterDefinition.DEFAULT,
+          CharacterDefinition.CLASS_COUNT,
+          CharacterDefinition::lookupCharacterClass);
 
   public UnknownDictionaryWriter(int size) {
-    super(UnknownDictionary.class, size);
+    super(UnknownDictionary.class, new TokenInfoDictionaryEntryWriter(size));
   }
 
   @Override
   public int put(String[] entry) {
     // Get wordId of current entry
-    int wordId = buffer.position();
+    // int wordId = buffer.position();
+    int wordId = entryWriter.currentPosition();
 
     // Put entry
     int result = super.put(entry);
@@ -58,7 +67,13 @@ public void putInvokeDefinition(String characterClassName, int invoke, int group
 
   @Override
   public void write(Path baseDir) throws IOException {
-    super.write(baseDir);
-    characterDefinition.write(baseDir);
+    super.write(
+        baseDir,
+        DictionaryConstants.TARGETMAP_HEADER,
+        DictionaryConstants.POSDICT_HEADER,
+        DictionaryConstants.DICT_HEADER,
+        DictionaryConstants.VERSION);
+    characterDefinition.write(
+        baseDir, DictionaryConstants.CHARDEF_HEADER, DictionaryConstants.VERSION);
   }
 }
diff --git a/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/dict/TestExternalDictionary.java b/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/dict/TestExternalDictionary.java
index bc44723996ac..5423d7a76cbf 100644
--- a/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/dict/TestExternalDictionary.java
+++ b/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/dict/TestExternalDictionary.java
@@ -16,10 +16,10 @@
  */
 package org.apache.lucene.analysis.ja.dict;
 
-import static org.apache.lucene.analysis.ja.dict.BinaryDictionary.DICT_FILENAME_SUFFIX;
-import static org.apache.lucene.analysis.ja.dict.BinaryDictionary.POSDICT_FILENAME_SUFFIX;
-import static org.apache.lucene.analysis.ja.dict.BinaryDictionary.TARGETMAP_FILENAME_SUFFIX;
 import static org.apache.lucene.analysis.ja.dict.TokenInfoDictionary.FST_FILENAME_SUFFIX;
+import static org.apache.lucene.analysis.morph.BinaryDictionary.DICT_FILENAME_SUFFIX;
+import static org.apache.lucene.analysis.morph.BinaryDictionary.POSDICT_FILENAME_SUFFIX;
+import static org.apache.lucene.analysis.morph.BinaryDictionary.TARGETMAP_FILENAME_SUFFIX;
 
 import java.io.BufferedWriter;
 import java.nio.charset.StandardCharsets;
diff --git a/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/dict/TestTokenInfoDictionary.java b/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/dict/TestTokenInfoDictionary.java
index 11de60c05fb4..0320d6e5f13b 100644
--- a/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/dict/TestTokenInfoDictionary.java
+++ b/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/dict/TestTokenInfoDictionary.java
@@ -16,10 +16,10 @@
  */
 package org.apache.lucene.analysis.ja.dict;
 
-import static org.apache.lucene.analysis.ja.dict.BinaryDictionary.DICT_FILENAME_SUFFIX;
-import static org.apache.lucene.analysis.ja.dict.BinaryDictionary.POSDICT_FILENAME_SUFFIX;
-import static org.apache.lucene.analysis.ja.dict.BinaryDictionary.TARGETMAP_FILENAME_SUFFIX;
 import static org.apache.lucene.analysis.ja.dict.TokenInfoDictionary.FST_FILENAME_SUFFIX;
+import static org.apache.lucene.analysis.morph.BinaryDictionary.DICT_FILENAME_SUFFIX;
+import static org.apache.lucene.analysis.morph.BinaryDictionary.POSDICT_FILENAME_SUFFIX;
+import static org.apache.lucene.analysis.morph.BinaryDictionary.TARGETMAP_FILENAME_SUFFIX;
 
 import java.io.OutputStream;
 import java.io.OutputStreamWriter;
@@ -137,17 +137,17 @@ public void testEnumerateAll() throws Exception {
         assertTrue(wordId > lastWordId);
         lastWordId = wordId;
 
-        String baseForm = tid.getBaseForm(wordId, chars, 0, chars.length);
+        String baseForm = tid.getMorphAttributes().getBaseForm(wordId, chars, 0, chars.length);
         assertTrue(baseForm == null || UnicodeUtil.validUTF16String(baseForm));
 
-        String inflectionForm = tid.getInflectionForm(wordId);
+        String inflectionForm = tid.getMorphAttributes().getInflectionForm(wordId);
         assertTrue(inflectionForm == null || UnicodeUtil.validUTF16String(inflectionForm));
         if (inflectionForm != null) {
           // check that it's actually an ipadic inflection form
           assertNotNull(ToStringUtil.getInflectedFormTranslation(inflectionForm));
         }
 
-        String inflectionType = tid.getInflectionType(wordId);
+        String inflectionType = tid.getMorphAttributes().getInflectionType(wordId);
         assertTrue(inflectionType == null || UnicodeUtil.validUTF16String(inflectionType));
         if (inflectionType != null) {
           // check that it's actually an ipadic inflection type
@@ -161,17 +161,18 @@ public void testEnumerateAll() throws Exception {
 
         tid.getWordCost(wordId);
 
-        String pos = tid.getPartOfSpeech(wordId);
+        String pos = tid.getMorphAttributes().getPartOfSpeech(wordId);
         assertNotNull(pos);
         assertTrue(UnicodeUtil.validUTF16String(pos));
         // check that it's actually an ipadic pos tag
         assertNotNull(ToStringUtil.getPOSTranslation(pos));
 
-        String pronunciation = tid.getPronunciation(wordId, chars, 0, chars.length);
+        String pronunciation =
+            tid.getMorphAttributes().getPronunciation(wordId, chars, 0, chars.length);
         assertNotNull(pronunciation);
         assertTrue(UnicodeUtil.validUTF16String(pronunciation));
 
-        String reading = tid.getReading(wordId, chars, 0, chars.length);
+        String reading = tid.getMorphAttributes().getReading(wordId, chars, 0, chars.length);
         assertNotNull(reading);
         assertTrue(UnicodeUtil.validUTF16String(reading));
       }
diff --git a/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/dict/TestUserDictionary.java b/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/dict/TestUserDictionary.java
index b08d75344f0f..537d621016e6 100644
--- a/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/dict/TestUserDictionary.java
+++ b/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/dict/TestUserDictionary.java
@@ -54,12 +54,15 @@ public void testReadings() throws IOException {
     int[][] result = dictionary.lookup("日本経済新聞".toCharArray(), 0, 6);
     assertEquals(3, result.length);
     int wordIdNihon = result[0][0]; // wordId of 日本 in 日本経済新聞
-    assertEquals("ニホン", dictionary.getReading(wordIdNihon, "日本".toCharArray(), 0, 2));
+    assertEquals(
+        "ニホン", dictionary.getMorphAttributes().getReading(wordIdNihon, "日本".toCharArray(), 0, 2));
 
     result = dictionary.lookup("朝青龍".toCharArray(), 0, 3);
     assertEquals(1, result.length);
     int wordIdAsashoryu = result[0][0]; // wordId for 朝青龍
-    assertEquals("アサショウリュウ", dictionary.getReading(wordIdAsashoryu, "朝青龍".toCharArray(), 0, 3));
+    assertEquals(
+        "アサショウリュウ",
+        dictionary.getMorphAttributes().getReading(wordIdAsashoryu, "朝青龍".toCharArray(), 0, 3));
   }
 
   @Test
@@ -68,7 +71,7 @@ public void testPartOfSpeech() throws IOException {
     int[][] result = dictionary.lookup("日本経済新聞".toCharArray(), 0, 6);
     assertEquals(3, result.length);
     int wordIdKeizai = result[1][0]; // wordId of 経済 in 日本経済新聞
-    assertEquals("カスタム名詞", dictionary.getPartOfSpeech(wordIdKeizai));
+    assertEquals("カスタム名詞", dictionary.getMorphAttributes().getPartOfSpeech(wordIdKeizai));
   }
 
   @Test
@@ -109,7 +112,7 @@ public void testSharp() throws IOException {
     for (String input : inputs) {
       System.out.println(input);
       int[][] result = dictionary.lookup(input.toCharArray(), 0, input.length());
-      assertEquals("カスタム名刺", dictionary.getPartOfSpeech(result[0][0]));
+      assertEquals("カスタム名刺", dictionary.getMorphAttributes().getPartOfSpeech(result[0][0]));
     }
   }
 }
diff --git a/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/DecompoundToken.java b/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/DecompoundToken.java
index 731378c9c4ae..a6af55f482e4 100644
--- a/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/DecompoundToken.java
+++ b/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/DecompoundToken.java
@@ -16,7 +16,7 @@
  */
 package org.apache.lucene.analysis.ko;
 
-import org.apache.lucene.analysis.ko.dict.Dictionary;
+import org.apache.lucene.analysis.ko.dict.KoMorphData;
 
 /** A token that was generated from a compound. */
 public class DecompoundToken extends Token {
@@ -71,7 +71,7 @@ public String getReading() {
   }
 
   @Override
-  public Dictionary.Morpheme[] getMorphemes() {
+  public KoMorphData.Morpheme[] getMorphemes() {
     return null;
   }
 }
diff --git a/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/DictionaryToken.java b/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/DictionaryToken.java
index 82fe981c24df..f548f8007d30 100644
--- a/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/DictionaryToken.java
+++ b/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/DictionaryToken.java
@@ -16,17 +16,17 @@
  */
 package org.apache.lucene.analysis.ko;
 
-import org.apache.lucene.analysis.ko.dict.Dictionary;
+import org.apache.lucene.analysis.ko.dict.KoMorphData;
 
-/** A token stored in a {@link Dictionary}. */
+/** A token stored in a {@link KoMorphData}. */
 public class DictionaryToken extends Token {
   private final int wordId;
   private final KoreanTokenizer.Type type;
-  private final Dictionary dictionary;
+  private final KoMorphData morphAtts;
 
   public DictionaryToken(
       KoreanTokenizer.Type type,
-      Dictionary dictionary,
+      KoMorphData morphAtts,
       int wordId,
       char[] surfaceForm,
       int offset,
@@ -35,7 +35,7 @@ public DictionaryToken(
       int endOffset) {
     super(surfaceForm, offset, length, startOffset, endOffset);
     this.type = type;
-    this.dictionary = dictionary;
+    this.morphAtts = morphAtts;
     this.wordId = wordId;
   }
 
@@ -54,7 +54,7 @@ public String toString() {
         + " wordId="
         + wordId
         + " leftID="
-        + dictionary.getLeftId(wordId)
+        + morphAtts.getLeftId(wordId)
         + ")";
   }
 
@@ -96,26 +96,26 @@ public boolean isUser() {
 
   @Override
   public POS.Type getPOSType() {
-    return dictionary.getPOSType(wordId);
+    return morphAtts.getPOSType(wordId);
   }
 
   @Override
   public POS.Tag getLeftPOS() {
-    return dictionary.getLeftPOS(wordId);
+    return morphAtts.getLeftPOS(wordId);
   }
 
   @Override
   public POS.Tag getRightPOS() {
-    return dictionary.getRightPOS(wordId);
+    return morphAtts.getRightPOS(wordId);
   }
 
   @Override
   public String getReading() {
-    return dictionary.getReading(wordId);
+    return morphAtts.getReading(wordId);
   }
 
   @Override
-  public Dictionary.Morpheme[] getMorphemes() {
-    return dictionary.getMorphemes(wordId, getSurfaceForm(), getOffset(), getLength());
+  public KoMorphData.Morpheme[] getMorphemes() {
+    return morphAtts.getMorphemes(wordId, getSurfaceForm(), getOffset(), getLength());
   }
 }
diff --git a/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/GraphvizFormatter.java b/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/GraphvizFormatter.java
index d8e3f7a9fafc..9beaf2e5b9b5 100644
--- a/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/GraphvizFormatter.java
+++ b/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/GraphvizFormatter.java
@@ -21,7 +21,8 @@
 import org.apache.lucene.analysis.ko.KoreanTokenizer.Position;
 import org.apache.lucene.analysis.ko.KoreanTokenizer.WrappedPositionArray;
 import org.apache.lucene.analysis.ko.dict.ConnectionCosts;
-import org.apache.lucene.analysis.ko.dict.Dictionary;
+import org.apache.lucene.analysis.ko.dict.KoMorphData;
+import org.apache.lucene.analysis.morph.Dictionary;
 
 // TODO: would be nice to show 2nd best path in a diff't
 // color...
@@ -140,7 +141,7 @@ private String formatNodes(
           attrs = "";
         }
 
-        final Dictionary dict = tok.getDict(posData.backType[idx]);
+        final Dictionary<? extends KoMorphData> dict = tok.getDict(posData.backType[idx]);
         final int wordCost = dict.getWordCost(posData.backID[idx]);
         final int bgCost =
             costs.get(
diff --git a/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/KoreanTokenizer.java b/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/KoreanTokenizer.java
index 325fae710b90..028d9dd54d1f 100644
--- a/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/KoreanTokenizer.java
+++ b/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/KoreanTokenizer.java
@@ -26,13 +26,14 @@
 import org.apache.lucene.analysis.Tokenizer;
 import org.apache.lucene.analysis.ko.dict.CharacterDefinition;
 import org.apache.lucene.analysis.ko.dict.ConnectionCosts;
-import org.apache.lucene.analysis.ko.dict.Dictionary;
+import org.apache.lucene.analysis.ko.dict.KoMorphData;
 import org.apache.lucene.analysis.ko.dict.TokenInfoDictionary;
 import org.apache.lucene.analysis.ko.dict.TokenInfoFST;
 import org.apache.lucene.analysis.ko.dict.UnknownDictionary;
 import org.apache.lucene.analysis.ko.dict.UserDictionary;
 import org.apache.lucene.analysis.ko.tokenattributes.PartOfSpeechAttribute;
 import org.apache.lucene.analysis.ko.tokenattributes.ReadingAttribute;
+import org.apache.lucene.analysis.morph.Dictionary;
 import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
 import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
 import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
@@ -97,7 +98,8 @@ public enum DecompoundMode {
   private static final int MAX_UNKNOWN_WORD_LENGTH = 1024;
   private static final int MAX_BACKTRACE_GAP = 1024;
 
-  private final EnumMap<Type, Dictionary> dictionaryMap = new EnumMap<>(Type.class);
+  private final EnumMap<Type, Dictionary<? extends KoMorphData>> dictionaryMap =
+      new EnumMap<>(Type.class);
 
   private final TokenInfoFST fst;
   private final TokenInfoDictionary dictionary;
@@ -406,10 +408,10 @@ private int computeSpacePenalty(POS.Tag leftPOS, int numSpaces) {
   }
 
   private void add(
-      Dictionary dict, Position fromPosData, int wordPos, int endPos, int wordID, Type type) {
-    final POS.Tag leftPOS = dict.getLeftPOS(wordID);
-    final int wordCost = dict.getWordCost(wordID);
-    final int leftID = dict.getLeftId(wordID);
+      KoMorphData morphAtts, Position fromPosData, int wordPos, int endPos, int wordID, Type type) {
+    final POS.Tag leftPOS = morphAtts.getLeftPOS(wordID);
+    final int wordCost = morphAtts.getWordCost(wordID);
+    final int leftID = morphAtts.getLeftId(wordID);
     int leastCost = Integer.MAX_VALUE;
     int leastIDX = -1;
     assert fromPosData.count > 0;
@@ -472,7 +474,14 @@ private void add(
 
     positions
         .get(endPos)
-        .add(leastCost, dict.getRightId(wordID), fromPosData.pos, wordPos, leastIDX, wordID, type);
+        .add(
+            leastCost,
+            morphAtts.getRightId(wordID),
+            fromPosData.pos,
+            wordPos,
+            leastIDX,
+            wordID,
+            type);
   }
 
   @Override
@@ -796,7 +805,7 @@ private void parse() throws IOException {
                     + (maxPosAhead + 1));
           }
           add(
-              userDictionary,
+              userDictionary.getMorphAttributes(),
               posData,
               pos,
               maxPosAhead + 1,
@@ -848,7 +857,7 @@ private void parse() throws IOException {
             }
             for (int ofs = 0; ofs < wordIdRef.length; ofs++) {
               add(
-                  dictionary,
+                  dictionary.getMorphAttributes(),
                   posData,
                   pos,
                   posAhead + 1,
@@ -922,7 +931,7 @@ && isPunctuation(ch, chType) == isPunct
         }
         for (int ofs = 0; ofs < wordIdRef.length; ofs++) {
           add(
-              unkDictionary,
+              unkDictionary.getMorphAttributes(),
               posData,
               pos,
               pos + unknownWordLength,
@@ -1016,7 +1025,7 @@ private void backtrace(final Position endPosData, final int fromIDX) {
       final int fragmentOffset = backWordPos - lastBackTracePos;
       assert fragmentOffset >= 0;
 
-      final Dictionary dict = getDict(backType);
+      final Dictionary<? extends KoMorphData> dict = getDict(backType);
 
       if (outputUnknownUnigrams && backType == Type.UNKNOWN) {
         // outputUnknownUnigrams converts unknown word into unigrams:
@@ -1029,7 +1038,7 @@ private void backtrace(final Position endPosData, final int fromIDX) {
           final DictionaryToken token =
               new DictionaryToken(
                   Type.UNKNOWN,
-                  unkDictionary,
+                  unkDictionary.getMorphAttributes(),
                   CharacterDefinition.NGRAM,
                   fragment,
                   fragmentOffset + i,
@@ -1045,7 +1054,7 @@ private void backtrace(final Position endPosData, final int fromIDX) {
         final DictionaryToken token =
             new DictionaryToken(
                 backType,
-                dict,
+                dict.getMorphAttributes(),
                 backID,
                 fragment,
                 fragmentOffset,
@@ -1060,7 +1069,7 @@ private void backtrace(final Position endPosData, final int fromIDX) {
             }
           }
         } else {
-          Dictionary.Morpheme[] morphemes = token.getMorphemes();
+          KoMorphData.Morpheme[] morphemes = token.getMorphemes();
           if (morphemes == null) {
             pending.add(token);
             if (VERBOSE) {
@@ -1071,7 +1080,7 @@ private void backtrace(final Position endPosData, final int fromIDX) {
             int posLen = 0;
             // decompose the compound
             for (int i = morphemes.length - 1; i >= 0; i--) {
-              final Dictionary.Morpheme morpheme = morphemes[i];
+              final KoMorphData.Morpheme morpheme = morphemes[i];
               final Token compoundToken;
               if (token.getPOSType() == POS.Type.COMPOUND) {
                 assert endOffset - morpheme.surfaceForm.length() >= 0;
@@ -1119,7 +1128,7 @@ private void backtrace(final Position endPosData, final int fromIDX) {
         DictionaryToken spaceToken =
             new DictionaryToken(
                 Type.UNKNOWN,
-                unkDictionary,
+                unkDictionary.getMorphAttributes(),
                 wordIdRef.ints[wordIdRef.offset],
                 fragment,
                 offset,
@@ -1144,7 +1153,7 @@ private void backtrace(final Position endPosData, final int fromIDX) {
     positions.freeBefore(endPos);
   }
 
-  Dictionary getDict(Type type) {
+  Dictionary<? extends KoMorphData> getDict(Type type) {
     return dictionaryMap.get(type);
   }
 
diff --git a/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/Token.java b/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/Token.java
index 730262907d89..ed2d7ff6ba73 100644
--- a/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/Token.java
+++ b/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/Token.java
@@ -16,7 +16,7 @@
  */
 package org.apache.lucene.analysis.ko;
 
-import org.apache.lucene.analysis.ko.dict.Dictionary.Morpheme;
+import org.apache.lucene.analysis.ko.dict.KoMorphData;
 
 /** Analyzed token with morphological data. */
 public abstract class Token {
@@ -70,8 +70,11 @@ public String getSurfaceFormString() {
   /** Get the reading of the token. */
   public abstract String getReading();
 
-  /** Get the {@link Morpheme} decomposition of the token. */
-  public abstract Morpheme[] getMorphemes();
+  /**
+   * Get the {@link org.apache.lucene.analysis.ko.dict.KoMorphData.Morpheme} decomposition of the
+   * token.
+   */
+  public abstract KoMorphData.Morpheme[] getMorphemes();
 
   /** Get the start offset of the term in the analyzed text. */
   public int getStartOffset() {
diff --git a/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/dict/BinaryDictionary.java b/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/dict/BinaryDictionary.java
deleted file mode 100644
index 4d10e5200556..000000000000
--- a/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/dict/BinaryDictionary.java
+++ /dev/null
@@ -1,220 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.lucene.analysis.ko.dict;
-
-import java.io.BufferedInputStream;
-import java.io.EOFException;
-import java.io.IOException;
-import java.io.InputStream;
-import java.nio.ByteBuffer;
-import java.nio.channels.Channels;
-import java.nio.channels.ReadableByteChannel;
-import org.apache.lucene.analysis.ko.POS;
-import org.apache.lucene.codecs.CodecUtil;
-import org.apache.lucene.store.DataInput;
-import org.apache.lucene.store.InputStreamDataInput;
-import org.apache.lucene.util.IOSupplier;
-import org.apache.lucene.util.IntsRef;
-
-/** Base class for a binary-encoded in-memory dictionary. */
-public abstract class BinaryDictionary implements Dictionary {
-
-  public static final String TARGETMAP_FILENAME_SUFFIX = "$targetMap.dat";
-  public static final String DICT_FILENAME_SUFFIX = "$buffer.dat";
-  public static final String POSDICT_FILENAME_SUFFIX = "$posDict.dat";
-
-  public static final String DICT_HEADER = "ko_dict";
-  public static final String TARGETMAP_HEADER = "ko_dict_map";
-  public static final String POSDICT_HEADER = "ko_dict_pos";
-  public static final int VERSION = 1;
-
-  private final ByteBuffer buffer;
-  private final int[] targetMapOffsets, targetMap;
-  private final POS.Tag[] posDict;
-
-  protected BinaryDictionary(
-      IOSupplier<InputStream> targetMapResource,
-      IOSupplier<InputStream> posResource,
-      IOSupplier<InputStream> dictResource)
-      throws IOException {
-    try (InputStream mapIS = new BufferedInputStream(targetMapResource.get())) {
-      DataInput in = new InputStreamDataInput(mapIS);
-      CodecUtil.checkHeader(in, TARGETMAP_HEADER, VERSION, VERSION);
-      this.targetMap = new int[in.readVInt()];
-      this.targetMapOffsets = new int[in.readVInt()];
-      populateTargetMap(in, this.targetMap, this.targetMapOffsets);
-    }
-
-    try (InputStream posIS = new BufferedInputStream(posResource.get())) {
-      DataInput in = new InputStreamDataInput(posIS);
-      CodecUtil.checkHeader(in, POSDICT_HEADER, VERSION, VERSION);
-      int posSize = in.readVInt();
-      this.posDict = new POS.Tag[posSize];
-      for (int j = 0; j < posSize; j++) {
-        posDict[j] = POS.resolveTag(in.readByte());
-      }
-    }
-
-    // no buffering here, as we load in one large buffer
-    try (InputStream dictIS = dictResource.get()) {
-      DataInput in = new InputStreamDataInput(dictIS);
-      CodecUtil.checkHeader(in, DICT_HEADER, VERSION, VERSION);
-      final int size = in.readVInt();
-      final ByteBuffer tmpBuffer = ByteBuffer.allocateDirect(size);
-      final ReadableByteChannel channel = Channels.newChannel(dictIS);
-      final int read = channel.read(tmpBuffer);
-      if (read != size) {
-        throw new EOFException("Cannot read whole dictionary");
-      }
-      this.buffer = tmpBuffer.asReadOnlyBuffer();
-    }
-  }
-
-  private static void populateTargetMap(DataInput in, int[] targetMap, int[] targetMapOffsets)
-      throws IOException {
-    int accum = 0, sourceId = 0;
-    for (int ofs = 0; ofs < targetMap.length; ofs++) {
-      final int val = in.readVInt();
-      if ((val & 0x01) != 0) {
-        targetMapOffsets[sourceId] = ofs;
-        sourceId++;
-      }
-      accum += val >>> 1;
-      targetMap[ofs] = accum;
-    }
-    if (sourceId + 1 != targetMapOffsets.length)
-      throw new IOException(
-          "targetMap file format broken; targetMap.length="
-              + targetMap.length
-              + ", targetMapOffsets.length="
-              + targetMapOffsets.length
-              + ", sourceId="
-              + sourceId);
-    targetMapOffsets[sourceId] = targetMap.length;
-  }
-
-  public void lookupWordIds(int sourceId, IntsRef ref) {
-    ref.ints = targetMap;
-    ref.offset = targetMapOffsets[sourceId];
-    // targetMapOffsets always has one more entry pointing behind last:
-    ref.length = targetMapOffsets[sourceId + 1] - ref.offset;
-  }
-
-  @Override
-  public int getLeftId(int wordId) {
-    return buffer.getShort(wordId) >>> 2;
-  }
-
-  @Override
-  public int getRightId(int wordId) {
-    return buffer.getShort(wordId + 2) >>> 2; // Skip left id
-  }
-
-  @Override
-  public int getWordCost(int wordId) {
-    return buffer.getShort(wordId + 4); // Skip left and right id
-  }
-
-  @Override
-  public POS.Type getPOSType(int wordId) {
-    byte value = (byte) (buffer.getShort(wordId) & 3);
-    return POS.resolveType(value);
-  }
-
-  @Override
-  public POS.Tag getLeftPOS(int wordId) {
-    return posDict[getLeftId(wordId)];
-  }
-
-  @Override
-  public POS.Tag getRightPOS(int wordId) {
-    POS.Type type = getPOSType(wordId);
-    if (type == POS.Type.MORPHEME || type == POS.Type.COMPOUND || hasSinglePOS(wordId)) {
-      return getLeftPOS(wordId);
-    } else {
-      byte value = buffer.get(wordId + 6);
-      return POS.resolveTag(value);
-    }
-  }
-
-  @Override
-  public String getReading(int wordId) {
-    if (hasReadingData(wordId)) {
-      int offset = wordId + 6;
-      return readString(offset);
-    }
-    return null;
-  }
-
-  @Override
-  public Morpheme[] getMorphemes(int wordId, char[] surfaceForm, int off, int len) {
-    POS.Type posType = getPOSType(wordId);
-    if (posType == POS.Type.MORPHEME) {
-      return null;
-    }
-    int offset = wordId + 6;
-    boolean hasSinglePos = hasSinglePOS(wordId);
-    if (hasSinglePos == false) {
-      offset++; // skip rightPOS
-    }
-    int length = buffer.get(offset++);
-    if (length == 0) {
-      return null;
-    }
-    Morpheme[] morphemes = new Morpheme[length];
-    int surfaceOffset = 0;
-    final POS.Tag leftPOS = getLeftPOS(wordId);
-    for (int i = 0; i < length; i++) {
-      final String form;
-      final POS.Tag tag = hasSinglePos ? leftPOS : POS.resolveTag(buffer.get(offset++));
-      if (posType == POS.Type.INFLECT) {
-        form = readString(offset);
-        offset += form.length() * 2 + 1;
-      } else {
-        int formLen = buffer.get(offset++);
-        form = new String(surfaceForm, off + surfaceOffset, formLen);
-        surfaceOffset += formLen;
-      }
-      morphemes[i] = new Morpheme(tag, form);
-    }
-    return morphemes;
-  }
-
-  private String readString(int offset) {
-    int strOffset = offset;
-    int len = buffer.get(strOffset++);
-    char[] text = new char[len];
-    for (int i = 0; i < len; i++) {
-      text[i] = buffer.getChar(strOffset + (i << 1));
-    }
-    return new String(text);
-  }
-
-  private boolean hasSinglePOS(int wordId) {
-    return (buffer.getShort(wordId + 2) & HAS_SINGLE_POS) != 0;
-  }
-
-  private boolean hasReadingData(int wordId) {
-    return (buffer.getShort(wordId + 2) & HAS_READING) != 0;
-  }
-
-  /** flag that the entry has a single part of speech (leftPOS) */
-  public static final int HAS_SINGLE_POS = 1;
-
-  /** flag that the entry has reading data. otherwise reading is surface form */
-  public static final int HAS_READING = 2;
-}
diff --git a/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/dict/CharacterDefinition.java b/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/dict/CharacterDefinition.java
index 5e2e48932bab..bcdcfde2f064 100644
--- a/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/dict/CharacterDefinition.java
+++ b/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/dict/CharacterDefinition.java
@@ -16,20 +16,13 @@
  */
 package org.apache.lucene.analysis.ko.dict;
 
-import java.io.BufferedInputStream;
 import java.io.IOException;
 import java.io.InputStream;
-import org.apache.lucene.codecs.CodecUtil;
-import org.apache.lucene.store.DataInput;
-import org.apache.lucene.store.InputStreamDataInput;
 import org.apache.lucene.util.IOUtils;
 
 /** Character category data. */
-public final class CharacterDefinition {
-
-  public static final String FILENAME_SUFFIX = ".dat";
-  public static final String HEADER = "ko_cd";
-  public static final int VERSION = 1;
+public final class CharacterDefinition
+    extends org.apache.lucene.analysis.morph.CharacterDefinition {
 
   public static final int CLASS_COUNT = CharacterClass.values().length;
 
@@ -51,11 +44,6 @@ enum CharacterClass {
     HANJANUMERIC
   }
 
-  private final byte[] characterCategoryMap = new byte[0x10000];
-
-  private final boolean[] invokeMap = new boolean[CLASS_COUNT];
-  private final boolean[] groupMap = new boolean[CLASS_COUNT];
-
   // the classes:
   public static final byte NGRAM = (byte) CharacterClass.NGRAM.ordinal();
   public static final byte DEFAULT = (byte) CharacterClass.DEFAULT.ordinal();
@@ -73,16 +61,11 @@ enum CharacterClass {
   public static final byte HANJANUMERIC = (byte) CharacterClass.HANJANUMERIC.ordinal();
 
   private CharacterDefinition() throws IOException {
-    try (InputStream is = new BufferedInputStream(getClassResource())) {
-      final DataInput in = new InputStreamDataInput(is);
-      CodecUtil.checkHeader(in, HEADER, VERSION, VERSION);
-      in.readBytes(characterCategoryMap, 0, characterCategoryMap.length);
-      for (int i = 0; i < CLASS_COUNT; i++) {
-        final byte b = in.readByte();
-        invokeMap[i] = (b & 0x01) != 0;
-        groupMap[i] = (b & 0x02) != 0;
-      }
-    }
+    super(
+        CharacterDefinition::getClassResource,
+        DictionaryConstants.CHARDEF_HEADER,
+        DictionaryConstants.VERSION,
+        CharacterDefinition.CLASS_COUNT);
   }
 
   private static InputStream getClassResource() throws IOException {
@@ -91,18 +74,6 @@ private static InputStream getClassResource() throws IOException {
         CharacterDefinition.class.getResourceAsStream(resourcePath), resourcePath);
   }
 
-  public byte getCharacterClass(char c) {
-    return characterCategoryMap[c];
-  }
-
-  public boolean isInvoke(char c) {
-    return invokeMap[characterCategoryMap[c]];
-  }
-
-  public boolean isGroup(char c) {
-    return groupMap[characterCategoryMap[c]];
-  }
-
   public boolean isHanja(char c) {
     final byte characterClass = getCharacterClass(c);
     return characterClass == HANJA || characterClass == HANJANUMERIC;
diff --git a/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/dict/ConnectionCosts.java b/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/dict/ConnectionCosts.java
index 61579aef8450..3b13a86b90a7 100644
--- a/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/dict/ConnectionCosts.java
+++ b/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/dict/ConnectionCosts.java
@@ -16,27 +16,15 @@
  */
 package org.apache.lucene.analysis.ko.dict;
 
-import java.io.BufferedInputStream;
 import java.io.IOException;
 import java.io.InputStream;
-import java.nio.ByteBuffer;
 import java.nio.file.Files;
 import java.nio.file.Path;
-import org.apache.lucene.codecs.CodecUtil;
-import org.apache.lucene.store.DataInput;
-import org.apache.lucene.store.InputStreamDataInput;
 import org.apache.lucene.util.IOSupplier;
 import org.apache.lucene.util.IOUtils;
 
 /** n-gram connection cost data */
-public final class ConnectionCosts {
-
-  public static final String FILENAME_SUFFIX = ".dat";
-  public static final String HEADER = "ko_cc";
-  public static final int VERSION = 1;
-
-  private final ByteBuffer buffer;
-  private final int forwardSize;
+public final class ConnectionCosts extends org.apache.lucene.analysis.morph.ConnectionCosts {
 
   /**
    * Create a {@link ConnectionCosts} from an external resource path.
@@ -53,24 +41,8 @@ private ConnectionCosts() throws IOException {
   }
 
   private ConnectionCosts(IOSupplier<InputStream> connectionCostResource) throws IOException {
-    try (InputStream is = new BufferedInputStream(connectionCostResource.get())) {
-      final DataInput in = new InputStreamDataInput(is);
-      CodecUtil.checkHeader(in, HEADER, VERSION, VERSION);
-      this.forwardSize = in.readVInt();
-      int backwardSize = in.readVInt();
-      int size = forwardSize * backwardSize;
-
-      // copy the matrix into a direct byte buffer
-      final ByteBuffer tmpBuffer = ByteBuffer.allocateDirect(size * 2);
-      int accum = 0;
-      for (int j = 0; j < backwardSize; j++) {
-        for (int i = 0; i < forwardSize; i++) {
-          accum += in.readZInt();
-          tmpBuffer.putShort((short) accum);
-        }
-      }
-      buffer = tmpBuffer.asReadOnlyBuffer();
-    }
+    super(
+        connectionCostResource, DictionaryConstants.CONN_COSTS_HEADER, DictionaryConstants.VERSION);
   }
 
   private static InputStream getClassResource() throws IOException {
@@ -79,12 +51,6 @@ private static InputStream getClassResource() throws IOException {
         ConnectionCosts.class.getResourceAsStream(resourcePath), resourcePath);
   }
 
-  public int get(int forwardId, int backwardId) {
-    // map 2d matrix into a single dimension short array
-    int offset = (backwardId * forwardSize + forwardId) * 2;
-    return buffer.getShort(offset);
-  }
-
   public static ConnectionCosts getInstance() {
     return SingletonHolder.INSTANCE;
   }
diff --git a/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/dict/DictionaryConstants.java b/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/dict/DictionaryConstants.java
new file mode 100644
index 000000000000..4f8761e59e7e
--- /dev/null
+++ b/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/dict/DictionaryConstants.java
@@ -0,0 +1,33 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.analysis.ko.dict;
+
+/** Dictionary constants */
+public final class DictionaryConstants {
+  /** Codec header of the dictionary file. */
+  public static final String DICT_HEADER = "ko_dict";
+  /** Codec header of the dictionary mapping file. */
+  public static final String TARGETMAP_HEADER = "ko_dict_map";
+  /** Codec header of the POS dictionary file. */
+  public static final String POSDICT_HEADER = "ko_dict_pos";
+  /** Codec header of the connection costs file. */
+  public static final String CONN_COSTS_HEADER = "ko_cc";
+  /** Codec header of the character definition file */
+  public static final String CHARDEF_HEADER = "ko_cd";
+  /** Codec version of the binary dictionary */
+  public static final int VERSION = 1;
+}
diff --git a/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/dict/Dictionary.java b/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/dict/KoMorphData.java
similarity index 52%
rename from lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/dict/Dictionary.java
rename to lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/dict/KoMorphData.java
index b25e1b5ff5ed..0887a7f0c428 100644
--- a/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/dict/Dictionary.java
+++ b/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/dict/KoMorphData.java
@@ -16,51 +16,47 @@
  */
 package org.apache.lucene.analysis.ko.dict;
 
-import org.apache.lucene.analysis.ko.POS.Tag;
-import org.apache.lucene.analysis.ko.POS.Type;
+import org.apache.lucene.analysis.ko.POS;
+import org.apache.lucene.analysis.morph.MorphData;
 
-/** Dictionary interface for retrieving morphological data by id. */
-public interface Dictionary {
+/** Represents Korean morphological information. */
+public interface KoMorphData extends MorphData {
   /** A morpheme extracted from a compound token. */
   class Morpheme {
-    public final Tag posTag;
+    public final POS.Tag posTag;
     public final String surfaceForm;
 
-    public Morpheme(Tag posTag, String surfaceForm) {
+    public Morpheme(POS.Tag posTag, String surfaceForm) {
       this.posTag = posTag;
       this.surfaceForm = surfaceForm;
     }
   }
 
-  /** Get left id of specified word */
-  int getLeftId(int wordId);
-
-  /** Get right id of specified word */
-  int getRightId(int wordId);
-
-  /** Get word cost of specified word */
-  int getWordCost(int wordId);
-
-  /** Get the {@link Type} of specified word (morpheme, compound, inflect or pre-analysis) */
-  Type getPOSType(int wordId);
+  /**
+   * Get the {@link org.apache.lucene.analysis.ko.POS.Type} of specified word (morpheme, compound,
+   * inflect or pre-analysis)
+   */
+  POS.Type getPOSType(int morphId);
 
   /**
-   * Get the left {@link Tag} of specfied word.
+   * Get the left {@link org.apache.lucene.analysis.ko.POS.Tag} of specfied word.
    *
-   * <p>For {@link Type#MORPHEME} and {@link Type#COMPOUND} the left and right POS are the same.
+   * <p>For {@link org.apache.lucene.analysis.ko.POS.Type#MORPHEME} and {@link
+   * org.apache.lucene.analysis.ko.POS.Type#COMPOUND} the left and right POS are the same.
    */
-  Tag getLeftPOS(int wordId);
+  POS.Tag getLeftPOS(int morphId);
 
   /**
-   * Get the right {@link Tag} of specfied word.
+   * Get the right {@link org.apache.lucene.analysis.ko.POS.Tag} of specfied word.
    *
-   * <p>For {@link Type#MORPHEME} and {@link Type#COMPOUND} the left and right POS are the same.
+   * <p>For {@link org.apache.lucene.analysis.ko.POS.Type#MORPHEME} and {@link
+   * org.apache.lucene.analysis.ko.POS.Type#COMPOUND} the left and right POS are the same.
    */
-  Tag getRightPOS(int wordId);
+  POS.Tag getRightPOS(int morphId);
 
   /** Get the reading of specified word (mainly used for Hanja to Hangul conversion). */
-  String getReading(int wordId);
+  String getReading(int morphId);
 
   /** Get the morphemes of specified word (e.g. 가깝으나: 가깝 + 으나). */
-  Morpheme[] getMorphemes(int wordId, char[] surfaceForm, int off, int len);
+  Morpheme[] getMorphemes(int morphId, char[] surfaceForm, int off, int len);
 }
diff --git a/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/dict/TokenInfoDictionary.java b/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/dict/TokenInfoDictionary.java
index 1f936b44724b..b8132cea61b5 100644
--- a/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/dict/TokenInfoDictionary.java
+++ b/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/dict/TokenInfoDictionary.java
@@ -21,6 +21,7 @@
 import java.io.InputStream;
 import java.nio.file.Files;
 import java.nio.file.Path;
+import org.apache.lucene.analysis.morph.BinaryDictionary;
 import org.apache.lucene.store.DataInput;
 import org.apache.lucene.store.InputStreamDataInput;
 import org.apache.lucene.util.IOSupplier;
@@ -32,11 +33,12 @@
  * Binary dictionary implementation for a known-word dictionary model: Words are encoded into an FST
  * mapping to a list of wordIDs.
  */
-public final class TokenInfoDictionary extends BinaryDictionary {
+public final class TokenInfoDictionary extends BinaryDictionary<TokenInfoMorphData> {
 
   public static final String FST_FILENAME_SUFFIX = "$fst.dat";
 
   private final TokenInfoFST fst;
+  private final TokenInfoMorphData morphAtts;
 
   private TokenInfoDictionary() throws IOException {
     this(
@@ -70,7 +72,13 @@ private TokenInfoDictionary(
       IOSupplier<InputStream> dictResource,
       IOSupplier<InputStream> fstResource)
       throws IOException {
-    super(targetMapResource, posResource, dictResource);
+    super(
+        targetMapResource,
+        dictResource,
+        DictionaryConstants.TARGETMAP_HEADER,
+        DictionaryConstants.DICT_HEADER,
+        DictionaryConstants.VERSION);
+    this.morphAtts = new TokenInfoMorphData(buffer, posResource);
     FST<Long> fst;
     try (InputStream is = new BufferedInputStream(fstResource.get())) {
       DataInput in = new InputStreamDataInput(is);
@@ -93,6 +101,11 @@ public static TokenInfoDictionary getInstance() {
     return SingletonHolder.INSTANCE;
   }
 
+  @Override
+  public TokenInfoMorphData getMorphAttributes() {
+    return morphAtts;
+  }
+
   private static class SingletonHolder {
     static final TokenInfoDictionary INSTANCE;
 
diff --git a/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/dict/TokenInfoMorphData.java b/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/dict/TokenInfoMorphData.java
new file mode 100644
index 000000000000..c8046b2e6041
--- /dev/null
+++ b/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/dict/TokenInfoMorphData.java
@@ -0,0 +1,155 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.analysis.ko.dict;
+
+import java.io.BufferedInputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.nio.ByteBuffer;
+import org.apache.lucene.analysis.ko.POS;
+import org.apache.lucene.codecs.CodecUtil;
+import org.apache.lucene.store.DataInput;
+import org.apache.lucene.store.InputStreamDataInput;
+import org.apache.lucene.util.IOSupplier;
+
+/** Morphological information for system dictionary. */
+public class TokenInfoMorphData implements KoMorphData {
+
+  private final ByteBuffer buffer;
+  private final POS.Tag[] posDict;
+
+  TokenInfoMorphData(ByteBuffer buffer, IOSupplier<InputStream> posResource) throws IOException {
+    this.buffer = buffer;
+    try (InputStream posIS = new BufferedInputStream(posResource.get())) {
+      DataInput in = new InputStreamDataInput(posIS);
+      CodecUtil.checkHeader(
+          in,
+          DictionaryConstants.POSDICT_HEADER,
+          DictionaryConstants.VERSION,
+          DictionaryConstants.VERSION);
+      int posSize = in.readVInt();
+      this.posDict = new POS.Tag[posSize];
+      for (int j = 0; j < posSize; j++) {
+        posDict[j] = POS.resolveTag(in.readByte());
+      }
+    }
+  }
+
+  @Override
+  public int getLeftId(int morphId) {
+    return buffer.getShort(morphId) >>> 2;
+  }
+
+  @Override
+  public int getRightId(int morphId) {
+    return buffer.getShort(morphId + 2) >>> 2; // Skip left id
+  }
+
+  @Override
+  public int getWordCost(int morphId) {
+    return buffer.getShort(morphId + 4); // Skip left and right id
+  }
+
+  @Override
+  public POS.Type getPOSType(int morphId) {
+    byte value = (byte) (buffer.getShort(morphId) & 3);
+    return POS.resolveType(value);
+  }
+
+  @Override
+  public POS.Tag getLeftPOS(int morphId) {
+    return posDict[getLeftId(morphId)];
+  }
+
+  @Override
+  public POS.Tag getRightPOS(int morphId) {
+    POS.Type type = getPOSType(morphId);
+    if (type == POS.Type.MORPHEME || type == POS.Type.COMPOUND || hasSinglePOS(morphId)) {
+      return getLeftPOS(morphId);
+    } else {
+      byte value = buffer.get(morphId + 6);
+      return POS.resolveTag(value);
+    }
+  }
+
+  @Override
+  public String getReading(int morphId) {
+    if (hasReadingData(morphId)) {
+      int offset = morphId + 6;
+      return readString(offset);
+    }
+    return null;
+  }
+
+  @Override
+  public Morpheme[] getMorphemes(int morphId, char[] surfaceForm, int off, int len) {
+    POS.Type posType = getPOSType(morphId);
+    if (posType == POS.Type.MORPHEME) {
+      return null;
+    }
+    int offset = morphId + 6;
+    boolean hasSinglePos = hasSinglePOS(morphId);
+    if (hasSinglePos == false) {
+      offset++; // skip rightPOS
+    }
+    int length = buffer.get(offset++);
+    if (length == 0) {
+      return null;
+    }
+    Morpheme[] morphemes = new Morpheme[length];
+    int surfaceOffset = 0;
+    final POS.Tag leftPOS = getLeftPOS(morphId);
+    for (int i = 0; i < length; i++) {
+      final String form;
+      final POS.Tag tag = hasSinglePos ? leftPOS : POS.resolveTag(buffer.get(offset++));
+      if (posType == POS.Type.INFLECT) {
+        form = readString(offset);
+        offset += form.length() * 2 + 1;
+      } else {
+        int formLen = buffer.get(offset++);
+        form = new String(surfaceForm, off + surfaceOffset, formLen);
+        surfaceOffset += formLen;
+      }
+      morphemes[i] = new Morpheme(tag, form);
+    }
+    return morphemes;
+  }
+
+  private String readString(int offset) {
+    int strOffset = offset;
+    int len = buffer.get(strOffset++);
+    char[] text = new char[len];
+    for (int i = 0; i < len; i++) {
+      text[i] = buffer.getChar(strOffset + (i << 1));
+    }
+    return new String(text);
+  }
+
+  private boolean hasSinglePOS(int wordId) {
+    return (buffer.getShort(wordId + 2) & HAS_SINGLE_POS) != 0;
+  }
+
+  private boolean hasReadingData(int wordId) {
+    return (buffer.getShort(wordId + 2) & HAS_READING) != 0;
+  }
+
+  /** flag that the entry has a single part of speech (leftPOS) */
+  public static final int HAS_SINGLE_POS = 1;
+
+  /** flag that the entry has reading data. otherwise reading is surface form */
+  public static final int HAS_READING = 2;
+}
diff --git a/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/dict/UnknownDictionary.java b/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/dict/UnknownDictionary.java
index f2aa89ee8c54..4b45fd332585 100644
--- a/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/dict/UnknownDictionary.java
+++ b/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/dict/UnknownDictionary.java
@@ -20,11 +20,14 @@
 import java.io.InputStream;
 import java.nio.file.Files;
 import java.nio.file.Path;
+import org.apache.lucene.analysis.morph.BinaryDictionary;
+import org.apache.lucene.util.IOSupplier;
 import org.apache.lucene.util.IOUtils;
 
 /** Dictionary for unknown-word handling. */
-public final class UnknownDictionary extends BinaryDictionary {
+public final class UnknownDictionary extends BinaryDictionary<UnknownMorphData> {
   private final CharacterDefinition characterDefinition = CharacterDefinition.getInstance();
+  private final UnknownMorphData morphAtts;
 
   /**
    * Create a {@link UnknownDictionary} from an external resource path.
@@ -35,19 +38,33 @@ public final class UnknownDictionary extends BinaryDictionary {
    * @throws IOException if resource was not found or broken
    */
   public UnknownDictionary(Path targetMapFile, Path posDictFile, Path dictFile) throws IOException {
-    super(
+    this(
         () -> Files.newInputStream(targetMapFile),
         () -> Files.newInputStream(posDictFile),
         () -> Files.newInputStream(dictFile));
   }
 
   private UnknownDictionary() throws IOException {
-    super(
+    this(
         () -> getClassResource(TARGETMAP_FILENAME_SUFFIX),
         () -> getClassResource(POSDICT_FILENAME_SUFFIX),
         () -> getClassResource(DICT_FILENAME_SUFFIX));
   }
 
+  private UnknownDictionary(
+      IOSupplier<InputStream> targetMapResource,
+      IOSupplier<InputStream> posResource,
+      IOSupplier<InputStream> dictResource)
+      throws IOException {
+    super(
+        targetMapResource,
+        dictResource,
+        DictionaryConstants.TARGETMAP_HEADER,
+        DictionaryConstants.DICT_HEADER,
+        DictionaryConstants.VERSION);
+    this.morphAtts = new UnknownMorphData(buffer, posResource);
+  }
+
   private static InputStream getClassResource(String suffix) throws IOException {
     final String resourcePath = UnknownDictionary.class.getSimpleName() + suffix;
     return IOUtils.requireResourceNonNull(
@@ -63,13 +80,8 @@ public static UnknownDictionary getInstance() {
   }
 
   @Override
-  public String getReading(int wordId) {
-    return null;
-  }
-
-  @Override
-  public Morpheme[] getMorphemes(int wordId, char[] surfaceForm, int off, int len) {
-    return null;
+  public UnknownMorphData getMorphAttributes() {
+    return morphAtts;
   }
 
   private static class SingletonHolder {
diff --git a/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/dict/UnknownMorphData.java b/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/dict/UnknownMorphData.java
new file mode 100644
index 000000000000..dafb174f8d55
--- /dev/null
+++ b/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/dict/UnknownMorphData.java
@@ -0,0 +1,39 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.analysis.ko.dict;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.nio.ByteBuffer;
+import org.apache.lucene.util.IOSupplier;
+
+/** Morphological information for unk dictionary. */
+final class UnknownMorphData extends TokenInfoMorphData {
+  UnknownMorphData(ByteBuffer buffer, IOSupplier<InputStream> posResource) throws IOException {
+    super(buffer, posResource);
+  }
+
+  @Override
+  public String getReading(int wordId) {
+    return null;
+  }
+
+  @Override
+  public Morpheme[] getMorphemes(int wordId, char[] surfaceForm, int off, int len) {
+    return null;
+  }
+}
diff --git a/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/dict/UserDictionary.java b/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/dict/UserDictionary.java
index f8fcefdd295b..58a233112aa5 100644
--- a/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/dict/UserDictionary.java
+++ b/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/dict/UserDictionary.java
@@ -22,7 +22,7 @@
 import java.util.ArrayList;
 import java.util.Comparator;
 import java.util.List;
-import org.apache.lucene.analysis.ko.POS;
+import org.apache.lucene.analysis.morph.Dictionary;
 import org.apache.lucene.util.IntsRefBuilder;
 import org.apache.lucene.util.fst.FST;
 import org.apache.lucene.util.fst.FSTCompiler;
@@ -32,15 +32,10 @@
  * Class for building a User Dictionary. This class allows for adding custom nouns (세종) or compounds
  * (세종시 세종 시).
  */
-public final class UserDictionary implements Dictionary {
+public final class UserDictionary implements Dictionary<UserMorphData> {
   // text -> wordID
   private final TokenInfoFST fst;
 
-  private static final int WORD_COST = -100000;
-
-  // NNG left
-  private static final short LEFT_ID = 1781;
-
   // NNG right
   private static final short RIGHT_ID = 3533;
   // NNG right with hangul and a coda on the last char
@@ -48,9 +43,7 @@ public final class UserDictionary implements Dictionary {
   // NNG right with hangul and no coda on the last char
   private static final short RIGHT_ID_F = 3534;
 
-  // length, length... indexed by compound ID or null for simple noun
-  private final int[][] segmentations;
-  private final short[] rightIds;
+  private UserMorphData morphAtts;
 
   public static UserDictionary open(Reader reader) throws IOException {
 
@@ -86,8 +79,8 @@ private UserDictionary(List<String> entries) throws IOException {
     IntsRefBuilder scratch = new IntsRefBuilder();
 
     String lastToken = null;
-    List<int[]> segmentations = new ArrayList<>(entries.size());
-    List<Short> rightIds = new ArrayList<>(entries.size());
+    List<int[]> _segmentations = new ArrayList<>(entries.size());
+    List<Short> _rightIds = new ArrayList<>(entries.size());
     long ord = 0;
     for (String entry : entries) {
       String[] splits = entry.split("\\s+");
@@ -98,16 +91,16 @@ private UserDictionary(List<String> entries) throws IOException {
       char lastChar = entry.charAt(entry.length() - 1);
       if (charDef.isHangul(lastChar)) {
         if (charDef.hasCoda(lastChar)) {
-          rightIds.add(RIGHT_ID_T);
+          _rightIds.add(RIGHT_ID_T);
         } else {
-          rightIds.add(RIGHT_ID_F);
+          _rightIds.add(RIGHT_ID_F);
         }
       } else {
-        rightIds.add(RIGHT_ID);
+        _rightIds.add(RIGHT_ID);
       }
 
       if (splits.length == 1) {
-        segmentations.add(null);
+        _segmentations.add(null);
       } else {
         int[] length = new int[splits.length - 1];
         int offset = 0;
@@ -123,7 +116,7 @@ private UserDictionary(List<String> entries) throws IOException {
                   + token
                   + ")");
         }
-        segmentations.add(length);
+        _segmentations.add(length);
       }
 
       // add mapping to FST
@@ -137,11 +130,12 @@ private UserDictionary(List<String> entries) throws IOException {
       ord++;
     }
     this.fst = new TokenInfoFST(fstCompiler.compile());
-    this.segmentations = segmentations.toArray(new int[segmentations.size()][]);
-    this.rightIds = new short[rightIds.size()];
-    for (int i = 0; i < rightIds.size(); i++) {
-      this.rightIds[i] = rightIds.get(i);
+    int[][] segmentations = _segmentations.toArray(new int[_segmentations.size()][]);
+    short[] rightIds = new short[_rightIds.size()];
+    for (int i = 0; i < _rightIds.size(); i++) {
+      rightIds[i] = _rightIds.get(i);
     }
+    this.morphAtts = new UserMorphData(segmentations, rightIds);
   }
 
   public TokenInfoFST getFST() {
@@ -149,57 +143,8 @@ public TokenInfoFST getFST() {
   }
 
   @Override
-  public int getLeftId(int wordId) {
-    return LEFT_ID;
-  }
-
-  @Override
-  public int getRightId(int wordId) {
-    return rightIds[wordId];
-  }
-
-  @Override
-  public int getWordCost(int wordId) {
-    return WORD_COST;
-  }
-
-  @Override
-  public POS.Type getPOSType(int wordId) {
-    if (segmentations[wordId] == null) {
-      return POS.Type.MORPHEME;
-    } else {
-      return POS.Type.COMPOUND;
-    }
-  }
-
-  @Override
-  public POS.Tag getLeftPOS(int wordId) {
-    return POS.Tag.NNG;
-  }
-
-  @Override
-  public POS.Tag getRightPOS(int wordId) {
-    return POS.Tag.NNG;
-  }
-
-  @Override
-  public String getReading(int wordId) {
-    return null;
-  }
-
-  @Override
-  public Morpheme[] getMorphemes(int wordId, char[] surfaceForm, int off, int len) {
-    int[] segs = segmentations[wordId];
-    if (segs == null) {
-      return null;
-    }
-    int offset = 0;
-    Morpheme[] morphemes = new Morpheme[segs.length];
-    for (int i = 0; i < segs.length; i++) {
-      morphemes[i] = new Morpheme(POS.Tag.NNG, new String(surfaceForm, off + offset, segs[i]));
-      offset += segs[i];
-    }
-    return morphemes;
+  public UserMorphData getMorphAttributes() {
+    return morphAtts;
   }
 
   /**
diff --git a/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/dict/UserMorphData.java b/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/dict/UserMorphData.java
new file mode 100644
index 000000000000..2056bd0829c9
--- /dev/null
+++ b/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/dict/UserMorphData.java
@@ -0,0 +1,90 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.analysis.ko.dict;
+
+import org.apache.lucene.analysis.ko.POS;
+
+/** Morphological information for user dictionary. */
+final class UserMorphData implements KoMorphData {
+  private static final int WORD_COST = -100000;
+
+  // NNG left
+  private static final short LEFT_ID = 1781;
+
+  // length, length... indexed by compound ID or null for simple noun
+  private final int[][] segmentations;
+  private final short[] rightIds;
+
+  UserMorphData(int[][] segmentations, short[] rightIds) {
+    this.segmentations = segmentations;
+    this.rightIds = rightIds;
+  }
+
+  @Override
+  public int getLeftId(int morphId) {
+    return LEFT_ID;
+  }
+
+  @Override
+  public int getRightId(int morphId) {
+    return rightIds[morphId];
+  }
+
+  @Override
+  public int getWordCost(int morphId) {
+    return WORD_COST;
+  }
+
+  @Override
+  public POS.Type getPOSType(int morphId) {
+    if (segmentations[morphId] == null) {
+      return POS.Type.MORPHEME;
+    } else {
+      return POS.Type.COMPOUND;
+    }
+  }
+
+  @Override
+  public POS.Tag getLeftPOS(int morphId) {
+    return POS.Tag.NNG;
+  }
+
+  @Override
+  public POS.Tag getRightPOS(int morphId) {
+    return POS.Tag.NNG;
+  }
+
+  @Override
+  public String getReading(int morphId) {
+    return null;
+  }
+
+  @Override
+  public Morpheme[] getMorphemes(int morphId, char[] surfaceForm, int off, int len) {
+    int[] segs = segmentations[morphId];
+    if (segs == null) {
+      return null;
+    }
+    int offset = 0;
+    Morpheme[] morphemes = new Morpheme[segs.length];
+    for (int i = 0; i < segs.length; i++) {
+      morphemes[i] = new Morpheme(POS.Tag.NNG, new String(surfaceForm, off + offset, segs[i]));
+      offset += segs[i];
+    }
+    return morphemes;
+  }
+}
diff --git a/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/tokenattributes/PartOfSpeechAttribute.java b/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/tokenattributes/PartOfSpeechAttribute.java
index c9fb33b17c2d..1e28155e667e 100644
--- a/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/tokenattributes/PartOfSpeechAttribute.java
+++ b/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/tokenattributes/PartOfSpeechAttribute.java
@@ -19,7 +19,7 @@
 import org.apache.lucene.analysis.ko.POS.Tag;
 import org.apache.lucene.analysis.ko.POS.Type;
 import org.apache.lucene.analysis.ko.Token;
-import org.apache.lucene.analysis.ko.dict.Dictionary.Morpheme;
+import org.apache.lucene.analysis.ko.dict.KoMorphData;
 import org.apache.lucene.util.Attribute;
 
 /**
@@ -37,8 +37,11 @@ public interface PartOfSpeechAttribute extends Attribute {
   /** Get the right part of speech of the token. */
   Tag getRightPOS();
 
-  /** Get the {@link Morpheme} decomposition of the token. */
-  Morpheme[] getMorphemes();
+  /**
+   * Get the {@link org.apache.lucene.analysis.ko.dict.KoMorphData.Morpheme} decomposition of the
+   * token.
+   */
+  KoMorphData.Morpheme[] getMorphemes();
 
   /** Set the current token. */
   void setToken(Token token);
diff --git a/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/tokenattributes/PartOfSpeechAttributeImpl.java b/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/tokenattributes/PartOfSpeechAttributeImpl.java
index a1b04cb7f057..a59999db557f 100644
--- a/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/tokenattributes/PartOfSpeechAttributeImpl.java
+++ b/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/tokenattributes/PartOfSpeechAttributeImpl.java
@@ -19,7 +19,7 @@
 import org.apache.lucene.analysis.ko.POS.Tag;
 import org.apache.lucene.analysis.ko.POS.Type;
 import org.apache.lucene.analysis.ko.Token;
-import org.apache.lucene.analysis.ko.dict.Dictionary.Morpheme;
+import org.apache.lucene.analysis.ko.dict.KoMorphData;
 import org.apache.lucene.util.AttributeImpl;
 import org.apache.lucene.util.AttributeReflector;
 
@@ -47,7 +47,7 @@ public Tag getRightPOS() {
   }
 
   @Override
-  public Morpheme[] getMorphemes() {
+  public KoMorphData.Morpheme[] getMorphemes() {
     return token == null ? null : token.getMorphemes();
   }
 
@@ -76,12 +76,12 @@ public void reflectWith(AttributeReflector reflector) {
     reflector.reflect(PartOfSpeechAttribute.class, "morphemes", displayMorphemes(getMorphemes()));
   }
 
-  private String displayMorphemes(Morpheme[] morphemes) {
+  private String displayMorphemes(KoMorphData.Morpheme[] morphemes) {
     if (morphemes == null) {
       return null;
     }
     StringBuilder builder = new StringBuilder();
-    for (Morpheme morpheme : morphemes) {
+    for (KoMorphData.Morpheme morpheme : morphemes) {
       if (builder.length() > 0) {
         builder.append("+");
       }
diff --git a/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/util/ConnectionCostsBuilder.java b/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/util/ConnectionCostsBuilder.java
index 4a6fd6d353c7..31505fb45c8b 100644
--- a/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/util/ConnectionCostsBuilder.java
+++ b/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/util/ConnectionCostsBuilder.java
@@ -22,12 +22,14 @@
 import java.nio.charset.StandardCharsets;
 import java.nio.file.Files;
 import java.nio.file.Path;
+import org.apache.lucene.analysis.ko.dict.ConnectionCosts;
+import org.apache.lucene.analysis.morph.ConnectionCostsWriter;
 
 class ConnectionCostsBuilder {
 
   private ConnectionCostsBuilder() {}
 
-  public static ConnectionCostsWriter build(Path path) throws IOException {
+  public static ConnectionCostsWriter<ConnectionCosts> build(Path path) throws IOException {
     try (Reader reader = Files.newBufferedReader(path, StandardCharsets.US_ASCII);
         LineNumberReader lineReader = new LineNumberReader(reader)) {
 
@@ -41,7 +43,8 @@ public static ConnectionCostsWriter build(Path path) throws IOException {
 
       assert forwardSize > 0 && backwardSize > 0;
 
-      ConnectionCostsWriter costs = new ConnectionCostsWriter(forwardSize, backwardSize);
+      ConnectionCostsWriter<ConnectionCosts> costs =
+          new ConnectionCostsWriter<>(ConnectionCosts.class, forwardSize, backwardSize);
 
       while ((line = lineReader.readLine()) != null) {
         String[] fields = line.split("\\s+");
diff --git a/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/util/ConnectionCostsWriter.java b/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/util/ConnectionCostsWriter.java
deleted file mode 100644
index 7265ef84dbba..000000000000
--- a/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/util/ConnectionCostsWriter.java
+++ /dev/null
@@ -1,68 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.lucene.analysis.ko.util;
-
-import java.io.BufferedOutputStream;
-import java.io.IOException;
-import java.io.OutputStream;
-import java.nio.ByteBuffer;
-import java.nio.file.Files;
-import java.nio.file.Path;
-import org.apache.lucene.analysis.ko.dict.ConnectionCosts;
-import org.apache.lucene.codecs.CodecUtil;
-import org.apache.lucene.store.DataOutput;
-import org.apache.lucene.store.OutputStreamDataOutput;
-
-final class ConnectionCostsWriter {
-
-  private final ByteBuffer
-      costs; // array is backward IDs first since get is called using the same backward ID
-  // consecutively. maybe doesn't matter.
-  private final int forwardSize;
-  private final int backwardSize;
-  /** Constructor for building. TODO: remove write access */
-  ConnectionCostsWriter(int forwardSize, int backwardSize) {
-    this.forwardSize = forwardSize;
-    this.backwardSize = backwardSize;
-    this.costs = ByteBuffer.allocateDirect(2 * backwardSize * forwardSize);
-  }
-
-  public void add(int forwardId, int backwardId, int cost) {
-    int offset = (backwardId * forwardSize + forwardId) * 2;
-    costs.putShort(offset, (short) cost);
-  }
-
-  public void write(Path baseDir) throws IOException {
-    Files.createDirectories(baseDir);
-    String fileName =
-        ConnectionCosts.class.getName().replace('.', '/') + ConnectionCosts.FILENAME_SUFFIX;
-    try (OutputStream os = Files.newOutputStream(baseDir.resolve(fileName));
-        OutputStream bos = new BufferedOutputStream(os)) {
-      final DataOutput out = new OutputStreamDataOutput(bos);
-      CodecUtil.writeHeader(out, ConnectionCosts.HEADER, ConnectionCosts.VERSION);
-      out.writeVInt(forwardSize);
-      out.writeVInt(backwardSize);
-      int last = 0;
-      for (int i = 0; i < costs.limit() / 2; i++) {
-        short cost = costs.getShort(i * 2);
-        int delta = (int) cost - last;
-        out.writeZInt(delta);
-        last = cost;
-      }
-    }
-  }
-}
diff --git a/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/util/DictionaryBuilder.java b/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/util/DictionaryBuilder.java
index e4c3b20c9b6b..0e8f11f0cf8e 100644
--- a/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/util/DictionaryBuilder.java
+++ b/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/util/DictionaryBuilder.java
@@ -19,6 +19,7 @@
 import java.io.IOException;
 import java.nio.file.Path;
 import java.nio.file.Paths;
+import org.apache.lucene.analysis.ko.dict.DictionaryConstants;
 
 /** Tool to build dictionaries. */
 public class DictionaryBuilder {
@@ -34,7 +35,8 @@ public static void build(Path inputDir, Path outputDir, String encoding, boolean
     new UnknownDictionaryBuilder(encoding).build(inputDir).write(outputDir);
 
     // Build Connection Cost
-    ConnectionCostsBuilder.build(inputDir.resolve("matrix.def")).write(outputDir);
+    ConnectionCostsBuilder.build(inputDir.resolve("matrix.def"))
+        .write(outputDir, DictionaryConstants.CONN_COSTS_HEADER, DictionaryConstants.VERSION);
   }
 
   public static void main(String[] args) throws IOException {
diff --git a/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/util/BinaryDictionaryWriter.java b/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/util/TokenInfoDictionaryEntryWriter.java
similarity index 50%
rename from lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/util/BinaryDictionaryWriter.java
rename to lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/util/TokenInfoDictionaryEntryWriter.java
index 37ed157f8c8a..e18cae357d34 100644
--- a/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/util/BinaryDictionaryWriter.java
+++ b/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/util/TokenInfoDictionaryEntryWriter.java
@@ -16,38 +16,25 @@
  */
 package org.apache.lucene.analysis.ko.util;
 
-import java.io.BufferedOutputStream;
 import java.io.IOException;
 import java.io.OutputStream;
 import java.nio.ByteBuffer;
-import java.nio.channels.Channels;
-import java.nio.channels.WritableByteChannel;
-import java.nio.file.Files;
-import java.nio.file.Path;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.List;
 import org.apache.lucene.analysis.ko.POS;
-import org.apache.lucene.analysis.ko.dict.BinaryDictionary;
-import org.apache.lucene.analysis.ko.dict.Dictionary;
-import org.apache.lucene.codecs.CodecUtil;
+import org.apache.lucene.analysis.ko.dict.KoMorphData;
+import org.apache.lucene.analysis.ko.dict.TokenInfoMorphData;
+import org.apache.lucene.analysis.morph.DictionaryEntryWriter;
 import org.apache.lucene.store.DataOutput;
-import org.apache.lucene.store.OutputStreamDataOutput;
 import org.apache.lucene.util.ArrayUtil;
 
-abstract class BinaryDictionaryWriter {
+/** Writes system dictionary entries. */
+class TokenInfoDictionaryEntryWriter extends DictionaryEntryWriter {
   private static final int ID_LIMIT = 8192;
 
-  private final Class<? extends BinaryDictionary> implClazz;
-  protected ByteBuffer buffer;
-  private int targetMapEndOffset = 0, lastWordId = -1, lastSourceId = -1;
-  private int[] targetMap = new int[8192];
-  private int[] targetMapOffsets = new int[8192];
-  private final ArrayList<String> posDict = new ArrayList<>();
-
-  BinaryDictionaryWriter(Class<? extends BinaryDictionary> implClazz, int size) {
-    this.implClazz = implClazz;
-    buffer = ByteBuffer.allocateDirect(size);
+  TokenInfoDictionaryEntryWriter(int size) {
+    super(size);
   }
 
   /**
@@ -72,7 +59,8 @@ abstract class BinaryDictionaryWriter {
    *
    * @return current position of buffer, which will be wordId of next entry
    */
-  public int put(String[] entry) {
+  @Override
+  protected int putEntry(String[] entry) {
     short leftId = Short.parseShort(entry[1]);
     short rightId = Short.parseShort(entry[2]);
     short wordCost = Short.parseShort(entry[3]);
@@ -113,7 +101,7 @@ public int put(String[] entry) {
     assert existing == null || existing.equals(fullPOSData);
     posDict.set(leftId, fullPOSData);
 
-    final List<Dictionary.Morpheme> morphemes = new ArrayList<>();
+    final List<KoMorphData.Morpheme> morphemes = new ArrayList<>();
     // true if the POS and decompounds of the token are all the same.
     boolean hasSinglePOS = (leftPOS == rightPOS);
     if (posType != POS.Type.MORPHEME && expression.length() > 0) {
@@ -124,7 +112,7 @@ public int put(String[] entry) {
         String surfaceForm = tokenSplit[0].trim();
         if (surfaceForm.isEmpty() == false) {
           POS.Tag exprTag = POS.resolveTag(tokenSplit[1]);
-          morphemes.add(new Dictionary.Morpheme(exprTag, tokenSplit[0]));
+          morphemes.add(new KoMorphData.Morpheme(exprTag, tokenSplit[0]));
           if (leftPOS != exprTag) {
             hasSinglePOS = false;
           }
@@ -134,10 +122,10 @@ public int put(String[] entry) {
 
     int flags = 0;
     if (hasSinglePOS) {
-      flags |= BinaryDictionary.HAS_SINGLE_POS;
+      flags |= TokenInfoMorphData.HAS_SINGLE_POS;
     }
     if (posType == POS.Type.MORPHEME && reading.length() > 0) {
-      flags |= BinaryDictionary.HAS_READING;
+      flags |= TokenInfoMorphData.HAS_READING;
     }
 
     if (leftId >= ID_LIMIT) {
@@ -161,7 +149,7 @@ public int put(String[] entry) {
       }
       buffer.put((byte) morphemes.size());
       int compoundOffset = 0;
-      for (Dictionary.Morpheme morpheme : morphemes) {
+      for (KoMorphData.Morpheme morpheme : morphemes) {
         if (hasSinglePOS == false) {
           buffer.put((byte) morpheme.posTag.ordinal());
         }
@@ -184,112 +172,20 @@ private void writeString(String s) {
     }
   }
 
-  void addMapping(int sourceId, int wordId) {
-    if (wordId <= lastWordId) {
-      throw new IllegalStateException(
-          "words out of order: " + wordId + " vs lastID: " + lastWordId);
-    }
-
-    if (sourceId > lastSourceId) {
-      targetMapOffsets = ArrayUtil.grow(targetMapOffsets, sourceId + 1);
-      for (int i = lastSourceId + 1; i <= sourceId; i++) {
-        targetMapOffsets[i] = targetMapEndOffset;
-      }
-    } else if (sourceId != lastSourceId) {
-      throw new IllegalStateException(
-          "source ids not in increasing order: lastSourceId="
-              + lastSourceId
-              + " vs sourceId="
-              + sourceId);
-    }
-
-    targetMap = ArrayUtil.grow(targetMap, targetMapEndOffset + 1);
-    targetMap[targetMapEndOffset] = wordId;
-    targetMapEndOffset++;
-
-    lastSourceId = sourceId;
-    lastWordId = wordId;
-  }
-
-  final String getBaseFileName() {
-    return implClazz.getName().replace('.', '/');
-  }
-
-  /**
-   * Write dictionary in file
-   *
-   * @throws IOException if an I/O error occurs writing the dictionary files
-   */
-  public void write(Path baseDir) throws IOException {
-    final String baseName = getBaseFileName();
-    writeDictionary(baseDir.resolve(baseName + BinaryDictionary.DICT_FILENAME_SUFFIX));
-    writeTargetMap(baseDir.resolve(baseName + BinaryDictionary.TARGETMAP_FILENAME_SUFFIX));
-    writePosDict(baseDir.resolve(baseName + BinaryDictionary.POSDICT_FILENAME_SUFFIX));
-  }
-
-  private void writeTargetMap(Path path) throws IOException {
-    Files.createDirectories(path.getParent());
-    try (OutputStream os = Files.newOutputStream(path);
-        OutputStream bos = new BufferedOutputStream(os)) {
-      final DataOutput out = new OutputStreamDataOutput(bos);
-      CodecUtil.writeHeader(out, BinaryDictionary.TARGETMAP_HEADER, BinaryDictionary.VERSION);
-
-      final int numSourceIds = lastSourceId + 1;
-      out.writeVInt(targetMapEndOffset); // <-- size of main array
-      out.writeVInt(numSourceIds + 1); // <-- size of offset array (+ 1 more entry)
-      int prev = 0, sourceId = 0;
-      for (int ofs = 0; ofs < targetMapEndOffset; ofs++) {
-        final int val = targetMap[ofs], delta = val - prev;
-        assert delta >= 0;
-        if (ofs == targetMapOffsets[sourceId]) {
-          out.writeVInt((delta << 1) | 0x01);
-          sourceId++;
-        } else {
-          out.writeVInt((delta << 1));
-        }
-        prev += delta;
-      }
-      if (sourceId != numSourceIds) {
-        throw new IllegalStateException(
-            "sourceId:" + sourceId + " != numSourceIds:" + numSourceIds);
-      }
-    }
-  }
-
-  private void writePosDict(Path path) throws IOException {
-    Files.createDirectories(path.getParent());
-    try (OutputStream os = Files.newOutputStream(path);
-        OutputStream bos = new BufferedOutputStream(os)) {
-      final DataOutput out = new OutputStreamDataOutput(bos);
-      CodecUtil.writeHeader(out, BinaryDictionary.POSDICT_HEADER, BinaryDictionary.VERSION);
-      out.writeVInt(posDict.size());
-      for (String s : posDict) {
-        if (s == null) {
-          out.writeByte((byte) POS.Tag.UNKNOWN.ordinal());
-        } else {
-          String[] data = CSVUtil.parse(s);
-          if (data.length != 2) {
-            throw new IllegalArgumentException(
-                "Malformed pos/inflection: " + s + "; expected 2 characters");
-          }
-          out.writeByte((byte) POS.Tag.valueOf(data[0]).ordinal());
+  @Override
+  protected void writePosDict(OutputStream bos, DataOutput out) throws IOException {
+    out.writeVInt(posDict.size());
+    for (String s : posDict) {
+      if (s == null) {
+        out.writeByte((byte) POS.Tag.UNKNOWN.ordinal());
+      } else {
+        String[] data = CSVUtil.parse(s);
+        if (data.length != 2) {
+          throw new IllegalArgumentException(
+              "Malformed pos/inflection: " + s + "; expected 2 characters");
         }
+        out.writeByte((byte) POS.Tag.valueOf(data[0]).ordinal());
       }
     }
   }
-
-  private void writeDictionary(Path path) throws IOException {
-    Files.createDirectories(path.getParent());
-    try (OutputStream os = Files.newOutputStream(path);
-        OutputStream bos = new BufferedOutputStream(os)) {
-      final DataOutput out = new OutputStreamDataOutput(bos);
-      CodecUtil.writeHeader(out, BinaryDictionary.DICT_HEADER, BinaryDictionary.VERSION);
-      out.writeVInt(buffer.position());
-      final WritableByteChannel channel = Channels.newChannel(bos);
-      // Write Buffer
-      buffer.flip(); // set position to 0, set limit to current position
-      channel.write(buffer);
-      assert buffer.remaining() == 0L;
-    }
-  }
 }
diff --git a/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/util/TokenInfoDictionaryWriter.java b/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/util/TokenInfoDictionaryWriter.java
index 316e1b6232ab..78a9acaff8a3 100644
--- a/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/util/TokenInfoDictionaryWriter.java
+++ b/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/util/TokenInfoDictionaryWriter.java
@@ -20,14 +20,16 @@
 import java.nio.file.Files;
 import java.nio.file.Path;
 import java.util.Objects;
+import org.apache.lucene.analysis.ko.dict.DictionaryConstants;
 import org.apache.lucene.analysis.ko.dict.TokenInfoDictionary;
+import org.apache.lucene.analysis.morph.BinaryDictionaryWriter;
 import org.apache.lucene.util.fst.FST;
 
-class TokenInfoDictionaryWriter extends BinaryDictionaryWriter {
+class TokenInfoDictionaryWriter extends BinaryDictionaryWriter<TokenInfoDictionary> {
   private FST<Long> fst;
 
   TokenInfoDictionaryWriter(int size) {
-    super(TokenInfoDictionary.class, size);
+    super(TokenInfoDictionary.class, new TokenInfoDictionaryEntryWriter(size));
   }
 
   public void setFST(FST<Long> fst) {
@@ -35,9 +37,19 @@ public void setFST(FST<Long> fst) {
     this.fst = fst;
   }
 
+  @Override
+  protected void addMapping(int sourceId, int wordId) {
+    super.addMapping(sourceId, wordId);
+  }
+
   @Override
   public void write(Path baseDir) throws IOException {
-    super.write(baseDir);
+    super.write(
+        baseDir,
+        DictionaryConstants.TARGETMAP_HEADER,
+        DictionaryConstants.POSDICT_HEADER,
+        DictionaryConstants.DICT_HEADER,
+        DictionaryConstants.VERSION);
     writeFST(baseDir.resolve(getBaseFileName() + TokenInfoDictionary.FST_FILENAME_SUFFIX));
   }
 
diff --git a/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/util/UnknownDictionaryWriter.java b/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/util/UnknownDictionaryWriter.java
index 97b525d60f0a..82285e1c78b3 100644
--- a/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/util/UnknownDictionaryWriter.java
+++ b/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/util/UnknownDictionaryWriter.java
@@ -19,20 +19,29 @@
 import java.io.IOException;
 import java.nio.file.Path;
 import org.apache.lucene.analysis.ko.dict.CharacterDefinition;
+import org.apache.lucene.analysis.ko.dict.DictionaryConstants;
 import org.apache.lucene.analysis.ko.dict.UnknownDictionary;
+import org.apache.lucene.analysis.morph.BinaryDictionaryWriter;
+import org.apache.lucene.analysis.morph.CharacterDefinitionWriter;
 
-class UnknownDictionaryWriter extends BinaryDictionaryWriter {
+class UnknownDictionaryWriter extends BinaryDictionaryWriter<UnknownDictionary> {
 
-  private final CharacterDefinitionWriter characterDefinition = new CharacterDefinitionWriter();
+  private final org.apache.lucene.analysis.morph.CharacterDefinitionWriter<CharacterDefinition>
+      characterDefinition =
+          new CharacterDefinitionWriter<>(
+              CharacterDefinition.class,
+              CharacterDefinition.DEFAULT,
+              CharacterDefinition.CLASS_COUNT,
+              CharacterDefinition::lookupCharacterClass);
 
   public UnknownDictionaryWriter(int size) {
-    super(UnknownDictionary.class, size);
+    super(UnknownDictionary.class, new TokenInfoDictionaryEntryWriter(size));
   }
 
   @Override
   public int put(String[] entry) {
     // Get wordId of current entry
-    int wordId = buffer.position();
+    int wordId = entryWriter.currentPosition();
 
     // Put entry
     int result = super.put(entry);
@@ -59,7 +68,13 @@ public void putInvokeDefinition(String characterClassName, int invoke, int group
 
   @Override
   public void write(Path baseDir) throws IOException {
-    super.write(baseDir);
-    characterDefinition.write(baseDir);
+    super.write(
+        baseDir,
+        DictionaryConstants.TARGETMAP_HEADER,
+        DictionaryConstants.POSDICT_HEADER,
+        DictionaryConstants.DICT_HEADER,
+        DictionaryConstants.VERSION);
+    characterDefinition.write(
+        baseDir, DictionaryConstants.CHARDEF_HEADER, DictionaryConstants.VERSION);
   }
 }
diff --git a/lucene/analysis/nori/src/test/org/apache/lucene/analysis/ko/dict/TestExternalDictionary.java b/lucene/analysis/nori/src/test/org/apache/lucene/analysis/ko/dict/TestExternalDictionary.java
index 5f8edab8934b..2ba7cee6a37c 100644
--- a/lucene/analysis/nori/src/test/org/apache/lucene/analysis/ko/dict/TestExternalDictionary.java
+++ b/lucene/analysis/nori/src/test/org/apache/lucene/analysis/ko/dict/TestExternalDictionary.java
@@ -16,10 +16,10 @@
  */
 package org.apache.lucene.analysis.ko.dict;
 
-import static org.apache.lucene.analysis.ko.dict.BinaryDictionary.DICT_FILENAME_SUFFIX;
-import static org.apache.lucene.analysis.ko.dict.BinaryDictionary.POSDICT_FILENAME_SUFFIX;
-import static org.apache.lucene.analysis.ko.dict.BinaryDictionary.TARGETMAP_FILENAME_SUFFIX;
 import static org.apache.lucene.analysis.ko.dict.TokenInfoDictionary.FST_FILENAME_SUFFIX;
+import static org.apache.lucene.analysis.morph.BinaryDictionary.DICT_FILENAME_SUFFIX;
+import static org.apache.lucene.analysis.morph.BinaryDictionary.POSDICT_FILENAME_SUFFIX;
+import static org.apache.lucene.analysis.morph.BinaryDictionary.TARGETMAP_FILENAME_SUFFIX;
 
 import java.io.BufferedWriter;
 import java.nio.charset.StandardCharsets;
diff --git a/lucene/analysis/nori/src/test/org/apache/lucene/analysis/ko/dict/TestTokenInfoDictionary.java b/lucene/analysis/nori/src/test/org/apache/lucene/analysis/ko/dict/TestTokenInfoDictionary.java
index 39fc55065d96..6948c4aa851e 100644
--- a/lucene/analysis/nori/src/test/org/apache/lucene/analysis/ko/dict/TestTokenInfoDictionary.java
+++ b/lucene/analysis/nori/src/test/org/apache/lucene/analysis/ko/dict/TestTokenInfoDictionary.java
@@ -16,10 +16,10 @@
  */
 package org.apache.lucene.analysis.ko.dict;
 
-import static org.apache.lucene.analysis.ko.dict.BinaryDictionary.DICT_FILENAME_SUFFIX;
-import static org.apache.lucene.analysis.ko.dict.BinaryDictionary.POSDICT_FILENAME_SUFFIX;
-import static org.apache.lucene.analysis.ko.dict.BinaryDictionary.TARGETMAP_FILENAME_SUFFIX;
 import static org.apache.lucene.analysis.ko.dict.TokenInfoDictionary.FST_FILENAME_SUFFIX;
+import static org.apache.lucene.analysis.morph.BinaryDictionary.DICT_FILENAME_SUFFIX;
+import static org.apache.lucene.analysis.morph.BinaryDictionary.POSDICT_FILENAME_SUFFIX;
+import static org.apache.lucene.analysis.morph.BinaryDictionary.TARGETMAP_FILENAME_SUFFIX;
 
 import java.io.OutputStream;
 import java.io.OutputStreamWriter;
@@ -141,13 +141,13 @@ public void testEnumerateAll() throws Exception {
 
         tid.getWordCost(wordId);
 
-        POS.Type type = tid.getPOSType(wordId);
-        POS.Tag leftPOS = tid.getLeftPOS(wordId);
-        POS.Tag rightPOS = tid.getRightPOS(wordId);
+        POS.Type type = tid.getMorphAttributes().getPOSType(wordId);
+        POS.Tag leftPOS = tid.getMorphAttributes().getLeftPOS(wordId);
+        POS.Tag rightPOS = tid.getMorphAttributes().getRightPOS(wordId);
 
         if (type == POS.Type.MORPHEME) {
           assertSame(leftPOS, rightPOS);
-          String reading = tid.getReading(wordId);
+          String reading = tid.getMorphAttributes().getReading(wordId);
           boolean isHanja = charDef.isHanja(surfaceForm.charAt(0));
           if (isHanja) {
             assertNotNull(reading);
@@ -163,10 +163,11 @@ public void testEnumerateAll() throws Exception {
             assertSame(leftPOS, rightPOS);
             assertTrue(leftPOS == POS.Tag.NNG || rightPOS == POS.Tag.NNP);
           }
-          Dictionary.Morpheme[] decompound = tid.getMorphemes(wordId, chars, 0, chars.length);
+          KoMorphData.Morpheme[] decompound =
+              tid.getMorphAttributes().getMorphemes(wordId, chars, 0, chars.length);
           if (decompound != null) {
             int offset = 0;
-            for (Dictionary.Morpheme morph : decompound) {
+            for (KoMorphData.Morpheme morph : decompound) {
               assertTrue(UnicodeUtil.validUTF16String(morph.surfaceForm));
               assertFalse(morph.surfaceForm.isEmpty());
               assertEquals(morph.surfaceForm.trim(), morph.surfaceForm);
diff --git a/lucene/analysis/nori/src/test/org/apache/lucene/analysis/ko/dict/TestUserDictionary.java b/lucene/analysis/nori/src/test/org/apache/lucene/analysis/ko/dict/TestUserDictionary.java
index 7f97d0520e8e..8fb2827a336b 100644
--- a/lucene/analysis/nori/src/test/org/apache/lucene/analysis/ko/dict/TestUserDictionary.java
+++ b/lucene/analysis/nori/src/test/org/apache/lucene/analysis/ko/dict/TestUserDictionary.java
@@ -31,16 +31,16 @@ public void testLookup() throws IOException {
     char[] sArray = s.toCharArray();
     List<Integer> wordIds = dictionary.lookup(sArray, 0, s.length());
     assertEquals(1, wordIds.size());
-    assertNull(dictionary.getMorphemes(wordIds.get(0), sArray, 0, s.length()));
+    assertNull(dictionary.getMorphAttributes().getMorphemes(wordIds.get(0), sArray, 0, s.length()));
 
     s = "세종시";
     sArray = s.toCharArray();
     wordIds = dictionary.lookup(sArray, 0, s.length());
     assertEquals(2, wordIds.size());
-    assertNull(dictionary.getMorphemes(wordIds.get(0), sArray, 0, s.length()));
+    assertNull(dictionary.getMorphAttributes().getMorphemes(wordIds.get(0), sArray, 0, s.length()));
 
-    Dictionary.Morpheme[] decompound =
-        dictionary.getMorphemes(wordIds.get(1), sArray, 0, s.length());
+    KoMorphData.Morpheme[] decompound =
+        dictionary.getMorphAttributes().getMorphemes(wordIds.get(1), sArray, 0, s.length());
     assertNotNull(decompound);
     assertEquals(2, decompound.length);
     assertEquals(decompound[0].posTag, POS.Tag.NNG);
@@ -52,7 +52,7 @@ public void testLookup() throws IOException {
     sArray = s.toCharArray();
     wordIds = dictionary.lookup(sArray, 0, s.length());
     assertEquals(1, wordIds.size());
-    assertNull(dictionary.getMorphemes(wordIds.get(0), sArray, 0, s.length()));
+    assertNull(dictionary.getMorphAttributes().getMorphemes(wordIds.get(0), sArray, 0, s.length()));
   }
 
   @Test