Skip to content

Commit

Permalink
Startree fileformat (#31)
Browse files Browse the repository at this point in the history
* star tree file formats

Signed-off-by: Sarthak Aggarwal <sarthagg@amazon.com>

* refactored meta, test fixes

Signed-off-by: Sarthak Aggarwal <sarthagg@amazon.com>

* refactored packages, minor fixes

Signed-off-by: Sarthak Aggarwal <sarthagg@amazon.com>

---------

Signed-off-by: Sarthak Aggarwal <sarthagg@amazon.com>
Co-authored-by: Sarthak Aggarwal <sarthagg@amazon.com>
  • Loading branch information
bharath-techie and sarthakaggarwal97 authored Jul 28, 2024
1 parent 59302a3 commit 9db1fa9
Show file tree
Hide file tree
Showing 46 changed files with 2,998 additions and 286 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
/*
* SPDX-License-Identifier: Apache-2.0
*
* The OpenSearch Contributors require contributions made to
* this file be licensed under the Apache-2.0 license or a
* compatible open source license.
*/

package org.apache.lucene.codecs.lucene90;

import org.apache.lucene.codecs.DocValuesConsumer;
import org.apache.lucene.codecs.DocValuesProducer;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.SegmentWriteState;

import java.io.IOException;

/**
* This class is an abstraction of the {@link DocValuesConsumer} for the Star Tree index structure.
* It is responsible to consume various types of document values (numeric, binary, sorted, sorted numeric,
* and sorted set) for fields in the Star Tree index.
*
* @opensearch.experimental
*/
public class Lucene90DocValuesConsumerWrapper extends DocValuesConsumer {

Lucene90DocValuesConsumer lucene90DocValuesConsumer;

public Lucene90DocValuesConsumerWrapper(
SegmentWriteState state,
String dataCodec,
String dataExtension,
String metaCodec,
String metaExtension
) throws IOException {
lucene90DocValuesConsumer = new Lucene90DocValuesConsumer(state, dataCodec, dataExtension, metaCodec, metaExtension);
}

@Override
public void close() throws IOException {
lucene90DocValuesConsumer.close();
}

@Override
public void addNumericField(FieldInfo fieldInfo, DocValuesProducer docValuesProducer) throws IOException {
lucene90DocValuesConsumer.addNumericField(fieldInfo, docValuesProducer);
}

@Override
public void addBinaryField(FieldInfo fieldInfo, DocValuesProducer docValuesProducer) throws IOException {
lucene90DocValuesConsumer.addNumericField(fieldInfo, docValuesProducer);
}

@Override
public void addSortedField(FieldInfo fieldInfo, DocValuesProducer docValuesProducer) throws IOException {
lucene90DocValuesConsumer.addSortedField(fieldInfo, docValuesProducer);
}

@Override
public void addSortedNumericField(FieldInfo fieldInfo, DocValuesProducer docValuesProducer) throws IOException {
lucene90DocValuesConsumer.addSortedNumericField(fieldInfo, docValuesProducer);
}

@Override
public void addSortedSetField(FieldInfo fieldInfo, DocValuesProducer docValuesProducer) throws IOException {
lucene90DocValuesConsumer.addSortedSetField(fieldInfo, docValuesProducer);
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
/*
* SPDX-License-Identifier: Apache-2.0
*
* The OpenSearch Contributors require contributions made to
* this file be licensed under the Apache-2.0 license or a
* compatible open source license.
*/

package org.apache.lucene.codecs.lucene90;

import org.apache.lucene.codecs.DocValuesProducer;
import org.apache.lucene.index.BinaryDocValues;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.NumericDocValues;
import org.apache.lucene.index.SegmentReadState;
import org.apache.lucene.index.SortedDocValues;
import org.apache.lucene.index.SortedNumericDocValues;
import org.apache.lucene.index.SortedSetDocValues;

import java.io.IOException;

/**
* This class is a custom abstraction of the {@link DocValuesProducer} for the Star Tree index structure.
* It is responsible for providing access to various types of document values (numeric, binary, sorted, sorted numeric,
* and sorted set) for fields in the Star Tree index.
*
* @opensearch.experimental
*/
public class Lucene90DocValuesProducerWrapper extends DocValuesProducer {

Lucene90DocValuesProducer lucene90DocValuesProducer;
SegmentReadState state;

public Lucene90DocValuesProducerWrapper(
SegmentReadState state,
String dataCodec,
String dataExtension,
String metaCodec,
String metaExtension
) throws IOException {
lucene90DocValuesProducer = new Lucene90DocValuesProducer(state, dataCodec, dataExtension, metaCodec, metaExtension);
this.state = state;
}

@Override
public NumericDocValues getNumeric(FieldInfo field) throws IOException {
return this.lucene90DocValuesProducer.getNumeric(field);
}

@Override
public BinaryDocValues getBinary(FieldInfo field) throws IOException {
return this.lucene90DocValuesProducer.getBinary(field);
}

@Override
public SortedDocValues getSorted(FieldInfo field) throws IOException {
return this.lucene90DocValuesProducer.getSorted(field);
}

@Override
public SortedNumericDocValues getSortedNumeric(FieldInfo field) throws IOException {
return this.lucene90DocValuesProducer.getSortedNumeric(field);
}

@Override
public SortedSetDocValues getSortedSet(FieldInfo field) throws IOException {
return this.lucene90DocValuesProducer.getSortedSet(field);
}

@Override
public void checkIntegrity() throws IOException {
this.lucene90DocValuesProducer.checkIntegrity();
}

// returns the doc id set iterator based on field name
public SortedNumericDocValues getSortedNumeric(String fieldName) throws IOException {
return this.lucene90DocValuesProducer.getSortedNumeric(state.fieldInfos.fieldInfo(fieldName));
}

@Override
public void close() throws IOException {
this.lucene90DocValuesProducer.close();
}

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
/*
* SPDX-License-Identifier: Apache-2.0
*
* The OpenSearch Contributors require contributions made to
* this file be licensed under the Apache-2.0 license or a
* compatible open source license.
*/

package org.apache.lucene.index;

import org.apache.lucene.util.Counter;

/**
* A wrapper class for writing sorted numeric doc values.
* <p>
* This class provides a convenient way to add sorted numeric doc values to a field
* and retrieve the corresponding {@link SortedNumericDocValues} instance.
*
* @opensearch.experimental
*/
public class SortedNumericDocValuesWriterWrapper {

private final SortedNumericDocValuesWriter sortedNumericDocValuesWriter;

/**
* Sole constructor. Constructs a new {@link SortedNumericDocValuesWriterWrapper} instance.
*
* @param fieldInfo the field information for the field being written
* @param counter a counter for tracking memory usage
*/
public SortedNumericDocValuesWriterWrapper(FieldInfo fieldInfo, Counter counter) {
sortedNumericDocValuesWriter = new SortedNumericDocValuesWriter(fieldInfo, counter);
}

/**
* Adds a value to the sorted numeric doc values for the specified document.
*
* @param docID the document ID
* @param value the value to add
*/
public void addValue(int docID, long value) {
sortedNumericDocValuesWriter.addValue(docID, value);
}

/**
* Returns the {@link SortedNumericDocValues} instance containing the sorted numeric doc values
*
* @return the {@link SortedNumericDocValues} instance
*/
public SortedNumericDocValues getDocValues() {
return sortedNumericDocValuesWriter.getDocValues();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,36 @@ public class Composite99DocValuesFormat extends DocValuesFormat {
private final DocValuesFormat delegate;
private final MapperService mapperService;

/** Data codec name for Composite Doc Values Format */
public static final String DATA_CODEC_NAME = "Composite99FormatData";

/** Meta codec name for Composite Doc Values Format */
public static final String META_CODEC_NAME = "Composite99FormatMeta";

/** Filename extension for the composite index data */
public static final String DATA_EXTENSION = "cid";

/** Filename extension for the composite index meta */
public static final String META_EXTENSION = "cim";

/** Data doc values codec name for Composite Doc Values Format */
public static final String DATA_DOC_VALUES_CODEC = "Composite99DocValuesData";

/** Meta doc values codec name for Composite Doc Values Format */
public static final String META_DOC_VALUES_CODEC = "Composite99DocValuesMetadata";

/** Filename extension for the composite index data doc values */
public static final String DATA_DOC_VALUES_EXTENSION = "cidvd";

/** Filename extension for the composite index meta doc values */
public static final String META_DOC_VALUES_EXTENSION = "cidvm";

/** Initial version for the Composite90DocValuesFormat */
public static final int VERSION_START = 0;

/** Current version for the Composite90DocValuesFormat */
public static final int VERSION_CURRENT = VERSION_START;

// needed for SPI
public Composite99DocValuesFormat() {
this(new Lucene90DocValuesFormat(), null);
Expand Down
Loading

0 comments on commit 9db1fa9

Please sign in to comment.