Skip to content

Commit

Permalink
Update codecs to Apache Lucene 9.12.0
Browse files Browse the repository at this point in the history
Signed-off-by: Andriy Redko <andriy.redko@aiven.io>
  • Loading branch information
reta committed Oct 10, 2024
1 parent 5b5d693 commit e501bff
Show file tree
Hide file tree
Showing 34 changed files with 1,306 additions and 177 deletions.
4 changes: 2 additions & 2 deletions .github/workflows/check.yml
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ jobs:
run: |
# https://github.com/opensearch-project/opensearch-build/issues/4191
chown -R ci-runner:ci-runner `pwd`
su ci-runner -c "source /etc/profile.d/java_home.sh && ./gradlew check -Dorg.gradle.java.home=/opt/java/openjdk-${{ matrix.java }}"
su ci-runner -c "source /etc/profile.d/java_home.sh && ./gradlew test check -Dorg.gradle.java.home=/opt/java/openjdk-${{ matrix.java }}"
- name: Run Gradle (assemble)
run: |
# https://github.com/opensearch-project/opensearch-build/issues/4191
Expand All @@ -53,7 +53,7 @@ jobs:
cache: gradle
- name: Run Gradle (check)
run: |
./gradlew check
./gradlew test check
- name: Run Gradle (assemble)
run: |
./gradlew assemble
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
import org.opensearch.cluster.metadata.IndexMetadata;
import org.opensearch.common.settings.Settings;
import org.opensearch.core.common.Strings;
import org.opensearch.index.codec.customcodecs.Lucene99QatCodec;
import org.opensearch.index.codec.customcodecs.Lucene912QatCodec;
import org.opensearch.index.codec.customcodecs.QatZipperFactory;
import org.opensearch.test.rest.OpenSearchRestTestCase;

Expand Down Expand Up @@ -103,7 +103,10 @@ public void testCreateIndexWithQatSPICodecWithQatHardwareUnavailable() throws IO
Settings.builder()
.put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, 1)
.put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 0)
.put("index.codec", randomFrom(Lucene99QatCodec.Mode.QAT_LZ4.getCodec(), Lucene99QatCodec.Mode.QAT_DEFLATE.getCodec()))
.put(
"index.codec",
randomFrom(Lucene912QatCodec.Mode.QAT_LZ4.getCodec(), Lucene912QatCodec.Mode.QAT_DEFLATE.getCodec())
)
.put("index.codec.compression_level", randomIntBetween(1, 6))
.build()
)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
import org.opensearch.common.settings.Setting;
import org.opensearch.index.IndexSettings;
import org.opensearch.index.codec.CodecServiceFactory;
import org.opensearch.index.codec.customcodecs.backward_codecs.lucene99.Lucene99QatCodec;
import org.opensearch.index.engine.EngineConfig;
import org.opensearch.plugins.EnginePlugin;
import org.opensearch.plugins.Plugin;
Expand Down Expand Up @@ -49,12 +50,19 @@ public Optional<CodecServiceFactory> getCustomCodecServiceFactory(final IndexSet
|| codecName.equals(CustomCodecService.QAT_DEFLATE_CODEC)) {
return Optional.of(new CustomCodecServiceFactory());
} else {
if (!QatZipperFactory.isQatAvailable()
&& (codecName.equals(Lucene99QatCodec.Mode.QAT_LZ4.getCodec())
|| codecName.equals(Lucene99QatCodec.Mode.QAT_DEFLATE.getCodec()))) {
throw new IllegalArgumentException("QAT codecs are not supported. Please create indices with a different codec.");
if (codecName.equals(Lucene99QatCodec.Mode.QAT_LZ4.getCodec())
|| codecName.equals(Lucene99QatCodec.Mode.QAT_DEFLATE.getCodec())) {
if (!QatZipperFactory.isQatAvailable()) {
throw new IllegalArgumentException("QAT codecs are not supported. Please create indices with a different codec.");
}
}

if (codecName.equals(Lucene912QatCodec.Mode.QAT_LZ4.getCodec())
|| codecName.equals(Lucene912QatCodec.Mode.QAT_DEFLATE.getCodec())) {
if (!QatZipperFactory.isQatAvailable()) {
throw new IllegalArgumentException("QAT codecs are not supported. Please create indices with a different codec.");
}
}
}
return Optional.empty();
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
import java.util.Map;
import java.util.stream.Stream;

import static org.opensearch.index.codec.customcodecs.backward_codecs.lucene99.Lucene99QatCodec.INDEX_CODEC_QAT_MODE_SETTING;
import static org.opensearch.index.engine.EngineConfig.INDEX_CODEC_COMPRESSION_LEVEL_SETTING;

/** CustomCodecService provides ZSTD, ZSTD_NO_DICT, QAT_LZ4, and QAT_DEFLATE compression codecs. */
Expand Down Expand Up @@ -49,25 +50,26 @@ public CustomCodecService(MapperService mapperService, IndexSettings indexSettin
int compressionLevel = indexSettings.getValue(INDEX_CODEC_COMPRESSION_LEVEL_SETTING);
final MapBuilder<String, Codec> codecs = MapBuilder.<String, Codec>newMapBuilder();
if (mapperService == null) {
codecs.put(ZSTD_CODEC, new Zstd99Codec(compressionLevel));
codecs.put(ZSTD_NO_DICT_CODEC, new ZstdNoDict99Codec(compressionLevel));
codecs.put(ZSTD_CODEC, new Zstd912Codec(compressionLevel));
codecs.put(ZSTD_NO_DICT_CODEC, new ZstdNoDict912Codec(compressionLevel));
if (QatZipperFactory.isQatAvailable()) {
codecs.put(QAT_LZ4_CODEC, new QatLz499Codec(compressionLevel, () -> {
return indexSettings.getValue(Lucene99QatCodec.INDEX_CODEC_QAT_MODE_SETTING);
}));
codecs.put(QAT_DEFLATE_CODEC, new QatDeflate99Codec(compressionLevel, () -> {
return indexSettings.getValue(Lucene99QatCodec.INDEX_CODEC_QAT_MODE_SETTING);
codecs.put(
QAT_LZ4_CODEC,
new QatLz4912Codec(compressionLevel, () -> { return indexSettings.getValue(INDEX_CODEC_QAT_MODE_SETTING); })
);
codecs.put(QAT_DEFLATE_CODEC, new QatDeflate912Codec(compressionLevel, () -> {
return indexSettings.getValue(INDEX_CODEC_QAT_MODE_SETTING);
}));
}
} else {
codecs.put(ZSTD_CODEC, new Zstd99Codec(mapperService, logger, compressionLevel));
codecs.put(ZSTD_NO_DICT_CODEC, new ZstdNoDict99Codec(mapperService, logger, compressionLevel));
codecs.put(ZSTD_CODEC, new Zstd912Codec(mapperService, logger, compressionLevel));
codecs.put(ZSTD_NO_DICT_CODEC, new ZstdNoDict912Codec(mapperService, logger, compressionLevel));
if (QatZipperFactory.isQatAvailable()) {
codecs.put(QAT_LZ4_CODEC, new QatLz499Codec(mapperService, logger, compressionLevel, () -> {
return indexSettings.getValue(Lucene99QatCodec.INDEX_CODEC_QAT_MODE_SETTING);
codecs.put(QAT_LZ4_CODEC, new QatLz4912Codec(mapperService, logger, compressionLevel, () -> {
return indexSettings.getValue(INDEX_CODEC_QAT_MODE_SETTING);
}));
codecs.put(QAT_DEFLATE_CODEC, new QatDeflate99Codec(mapperService, logger, compressionLevel, () -> {
return indexSettings.getValue(Lucene99QatCodec.INDEX_CODEC_QAT_MODE_SETTING);
codecs.put(QAT_DEFLATE_CODEC, new QatDeflate912Codec(mapperService, logger, compressionLevel, () -> {
return indexSettings.getValue(INDEX_CODEC_QAT_MODE_SETTING);
}));
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,14 +11,13 @@
import org.apache.logging.log4j.Logger;
import org.apache.lucene.codecs.FilterCodec;
import org.apache.lucene.codecs.StoredFieldsFormat;
import org.apache.lucene.codecs.lucene99.Lucene99Codec;
import org.opensearch.common.settings.Settings;
import org.apache.lucene.codecs.lucene912.Lucene912Codec;
import org.opensearch.index.codec.PerFieldMappingPostingFormatCodec;
import org.opensearch.index.mapper.MapperService;

import java.util.Set;

import static org.opensearch.index.engine.EngineConfig.INDEX_CODEC_COMPRESSION_LEVEL_SETTING;
import static org.opensearch.index.codec.customcodecs.backward_codecs.lucene99.Lucene99CustomCodec.DEFAULT_COMPRESSION_LEVEL;

/**
*
Expand All @@ -28,21 +27,18 @@
*
* @opensearch.internal
*/
public abstract class Lucene99CustomCodec extends FilterCodec {

/** Default compression level used for compression */
public static final int DEFAULT_COMPRESSION_LEVEL = INDEX_CODEC_COMPRESSION_LEVEL_SETTING.getDefault(Settings.EMPTY);
public abstract class Lucene912CustomCodec extends FilterCodec {

/** Each mode represents a compression algorithm. */
public enum Mode {
/**
* ZStandard mode with dictionary
*/
ZSTD("ZSTD99", Set.of("zstd")),
ZSTD("ZSTD912", Set.of("zstd")),
/**
* ZStandard mode without dictionary
*/
ZSTD_NO_DICT("ZSTDNODICT99", Set.of("zstd_no_dict"));
ZSTD_NO_DICT("ZSTDNODICT912", Set.of("zstd_no_dict"));

private final String codec;
private final Set<String> aliases;
Expand Down Expand Up @@ -74,7 +70,7 @@ public Set<String> getAliases() {
*
* @param mode The compression codec (ZSTD or ZSTDNODICT).
*/
public Lucene99CustomCodec(Mode mode) {
public Lucene912CustomCodec(Mode mode) {
this(mode, DEFAULT_COMPRESSION_LEVEL);
}

Expand All @@ -86,9 +82,9 @@ public Lucene99CustomCodec(Mode mode) {
* @param mode The compression codec (ZSTD or ZSTDNODICT).
* @param compressionLevel The compression level.
*/
public Lucene99CustomCodec(Mode mode, int compressionLevel) {
super(mode.getCodec(), new Lucene99Codec());
this.storedFieldsFormat = new Lucene99CustomStoredFieldsFormat(mode, compressionLevel);
public Lucene912CustomCodec(Mode mode, int compressionLevel) {
super(mode.getCodec(), new Lucene912Codec());
this.storedFieldsFormat = new Lucene912CustomStoredFieldsFormat(mode, compressionLevel);
}

/**
Expand All @@ -101,9 +97,9 @@ public Lucene99CustomCodec(Mode mode, int compressionLevel) {
* @param mapperService The mapper service.
* @param logger The logger.
*/
public Lucene99CustomCodec(Mode mode, int compressionLevel, MapperService mapperService, Logger logger) {
super(mode.getCodec(), new PerFieldMappingPostingFormatCodec(Lucene99Codec.Mode.BEST_SPEED, mapperService, logger));
this.storedFieldsFormat = new Lucene99CustomStoredFieldsFormat(mode, compressionLevel);
public Lucene912CustomCodec(Mode mode, int compressionLevel, MapperService mapperService, Logger logger) {
super(mode.getCodec(), new PerFieldMappingPostingFormatCodec(Lucene912Codec.Mode.BEST_SPEED, mapperService, logger));
this.storedFieldsFormat = new Lucene912CustomStoredFieldsFormat(mode, compressionLevel);
}

@Override
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,140 @@
/*
* SPDX-License-Identifier: Apache-2.0
*
* The OpenSearch Contributors require contributions made to
* this file be licensed under the Apache-2.0 license or a
* compatible open source license.
*/

package org.opensearch.index.codec.customcodecs;

import org.apache.lucene.codecs.StoredFieldsFormat;
import org.apache.lucene.codecs.StoredFieldsReader;
import org.apache.lucene.codecs.StoredFieldsWriter;
import org.apache.lucene.codecs.compressing.CompressionMode;
import org.apache.lucene.codecs.lucene90.compressing.Lucene90CompressingStoredFieldsFormat;
import org.apache.lucene.index.FieldInfos;
import org.apache.lucene.index.SegmentInfo;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IOContext;

import java.io.IOException;
import java.util.Objects;

import static org.opensearch.index.codec.customcodecs.backward_codecs.lucene99.Lucene99CustomCodec.DEFAULT_COMPRESSION_LEVEL;

/** Stored field format used by pluggable codec */
public class Lucene912CustomStoredFieldsFormat extends StoredFieldsFormat {

/** A key that we use to map to a mode */
public static final String MODE_KEY = Lucene912CustomStoredFieldsFormat.class.getSimpleName() + ".mode";

protected static final int ZSTD_BLOCK_LENGTH = 10 * 48 * 1024;
protected static final int ZSTD_MAX_DOCS_PER_BLOCK = 4096;
protected static final int ZSTD_BLOCK_SHIFT = 10;

private final CompressionMode zstdCompressionMode;
private final CompressionMode zstdNoDictCompressionMode;

private final Lucene912CustomCodec.Mode mode;
private final int compressionLevel;

/** default constructor */
public Lucene912CustomStoredFieldsFormat() {
this(Lucene912CustomCodec.Mode.ZSTD, DEFAULT_COMPRESSION_LEVEL);
}

/**
* Creates a new instance.
*
* @param mode The mode represents ZSTD or ZSTDNODICT
*/
public Lucene912CustomStoredFieldsFormat(Lucene912CustomCodec.Mode mode) {
this(mode, DEFAULT_COMPRESSION_LEVEL);
}

/**
* Creates a new instance with the specified mode and compression level.
*
* @param mode The mode represents ZSTD or ZSTDNODICT
* @param compressionLevel The compression level for the mode.
*/
public Lucene912CustomStoredFieldsFormat(Lucene912CustomCodec.Mode mode, int compressionLevel) {
this.mode = Objects.requireNonNull(mode);
this.compressionLevel = compressionLevel;
zstdCompressionMode = new ZstdCompressionMode(compressionLevel);
zstdNoDictCompressionMode = new ZstdNoDictCompressionMode(compressionLevel);
}

/**
* Returns a {@link StoredFieldsReader} to load stored fields.
* @param directory The index directory.
* @param si The SegmentInfo that stores segment information.
* @param fn The fieldInfos.
* @param context The IOContext that holds additional details on the merge/search context.
*/
@Override
public StoredFieldsReader fieldsReader(Directory directory, SegmentInfo si, FieldInfos fn, IOContext context) throws IOException {
if (si.getAttribute(MODE_KEY) != null) {
String value = si.getAttribute(MODE_KEY);
Lucene912CustomCodec.Mode mode = Lucene912CustomCodec.Mode.valueOf(value);
return impl(mode).fieldsReader(directory, si, fn, context);
} else {
throw new IllegalStateException("missing value for " + MODE_KEY + " for segment: " + si.name);
}
}

/**
* Returns a {@link StoredFieldsReader} to write stored fields.
* @param directory The index directory.
* @param si The SegmentInfo that stores segment information.
* @param context The IOContext that holds additional details on the merge/search context.
*/
@Override
public StoredFieldsWriter fieldsWriter(Directory directory, SegmentInfo si, IOContext context) throws IOException {
String previous = si.putAttribute(MODE_KEY, mode.name());
if (previous != null && previous.equals(mode.name()) == false) {
throw new IllegalStateException(
"found existing value for " + MODE_KEY + " for segment: " + si.name + " old = " + previous + ", new = " + mode.name()
);
}
return impl(mode).fieldsWriter(directory, si, context);
}

StoredFieldsFormat impl(Lucene912CustomCodec.Mode mode) {
switch (mode) {
case ZSTD:
return getCustomCompressingStoredFieldsFormat("CustomStoredFieldsZstd", this.zstdCompressionMode);
case ZSTD_NO_DICT:
return getCustomCompressingStoredFieldsFormat("CustomStoredFieldsZstdNoDict", this.zstdNoDictCompressionMode);
default:
throw new IllegalStateException("Unsupported compression mode: " + mode);
}
}

private StoredFieldsFormat getCustomCompressingStoredFieldsFormat(String formatName, CompressionMode compressionMode) {
return new Lucene90CompressingStoredFieldsFormat(
formatName,
compressionMode,
ZSTD_BLOCK_LENGTH,
ZSTD_MAX_DOCS_PER_BLOCK,
ZSTD_BLOCK_SHIFT
);
}

public Lucene912CustomCodec.Mode getMode() {
return mode;
}

/**
* Returns the compression level.
*/
public int getCompressionLevel() {
return compressionLevel;
}

public CompressionMode getCompressionMode() {
return mode == Lucene912CustomCodec.Mode.ZSTD_NO_DICT ? zstdNoDictCompressionMode : zstdCompressionMode;
}

}
Loading

0 comments on commit e501bff

Please sign in to comment.