Skip to content

Commit

Permalink
Removes Array based vector serialization
Browse files Browse the repository at this point in the history
Previously, we had serialized vectors via an array method. This was
inefficient and remove in the #253 PR and launced in 1.3.0. With that,
we no longer serialized new segment data via the array based serializer.
Now that it is 3.0, because the oldest index that can be upgraded is
from 1.3.0, we no longer need to handle array based serialization. So,
we can remove all of it.

Signed-off-by: John Mazanec <jmazane@amazon.com>
  • Loading branch information
jmazanec15 committed Mar 6, 2025
1 parent 8faf388 commit 4f7a238
Show file tree
Hide file tree
Showing 18 changed files with 38 additions and 249 deletions.
4 changes: 2 additions & 2 deletions src/main/java/org/opensearch/knn/index/VectorDataType.java
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,8 @@
import org.apache.lucene.document.KnnFloatVectorField;
import org.apache.lucene.index.VectorSimilarityFunction;
import org.apache.lucene.util.BytesRef;
import org.opensearch.knn.index.codec.util.KNNVectorAsCollectionOfFloatsSerializer;
import org.opensearch.knn.index.codec.util.KNNVectorSerializer;
import org.opensearch.knn.index.codec.util.KNNVectorSerializerFactory;
import org.opensearch.knn.index.memory.NativeMemoryAllocation;
import org.opensearch.knn.jni.JNICommons;
import org.opensearch.knn.training.BinaryTrainingDataConsumer;
Expand Down Expand Up @@ -105,7 +105,7 @@ public FieldType createKnnVectorFieldType(int dimension, KNNVectorSimilarityFunc

@Override
public float[] getVectorFromBytesRef(BytesRef binaryValue) {
final KNNVectorSerializer vectorSerializer = KNNVectorSerializerFactory.getSerializerByBytesRef(binaryValue);
final KNNVectorSerializer vectorSerializer = KNNVectorAsCollectionOfFloatsSerializer.INSTANCE;
return vectorSerializer.byteToFloatArray(binaryValue);
}

Expand Down
4 changes: 2 additions & 2 deletions src/main/java/org/opensearch/knn/index/VectorField.java
Original file line number Diff line number Diff line change
Expand Up @@ -8,15 +8,15 @@
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexableFieldType;
import org.apache.lucene.util.BytesRef;
import org.opensearch.knn.index.codec.util.KNNVectorAsCollectionOfFloatsSerializer;
import org.opensearch.knn.index.codec.util.KNNVectorSerializer;
import org.opensearch.knn.index.codec.util.KNNVectorSerializerFactory;

public class VectorField extends Field {

public VectorField(String name, float[] value, IndexableFieldType type) {
super(name, new BytesRef(), type);
try {
final KNNVectorSerializer vectorSerializer = KNNVectorSerializerFactory.getDefaultSerializer();
final KNNVectorSerializer vectorSerializer = KNNVectorAsCollectionOfFloatsSerializer.INSTANCE;
final byte[] floatToByte = vectorSerializer.floatToByteArray(value);
this.setBytesValue(floatToByte);
} catch (Exception e) {
Expand Down

This file was deleted.

Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@
public class KNNVectorAsCollectionOfFloatsSerializer implements KNNVectorSerializer {
private static final int BYTES_IN_FLOAT = 4;

public static final KNNVectorAsCollectionOfFloatsSerializer INSTANCE = new KNNVectorAsCollectionOfFloatsSerializer();

@Override
public byte[] floatToByteArray(float[] input) {
final ByteBuffer bb = ByteBuffer.allocate(input.length * BYTES_IN_FLOAT).order(ByteOrder.BIG_ENDIAN);
Expand Down

This file was deleted.

This file was deleted.

Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
import org.opensearch.knn.index.KnnCircuitBreakerException;
import org.opensearch.knn.index.SpaceType;
import org.opensearch.knn.index.VectorDataType;
import org.opensearch.knn.index.codec.util.KNNVectorSerializerFactory;
import org.opensearch.knn.index.codec.util.KNNVectorAsCollectionOfFloatsSerializer;
import org.opensearch.knn.index.engine.KNNEngine;
import org.opensearch.knn.index.engine.KNNMethodContext;
import org.opensearch.knn.index.engine.MethodComponentContext;
Expand Down Expand Up @@ -74,7 +74,7 @@ public static StoredField createStoredFieldForByteVector(String name, byte[] vec
* @param vector vector to be added to stored field
*/
public static StoredField createStoredFieldForFloatVector(String name, float[] vector) {
return new StoredField(name, KNNVectorSerializerFactory.getDefaultSerializer().floatToByteArray(vector));
return new StoredField(name, KNNVectorAsCollectionOfFloatsSerializer.INSTANCE.floatToByteArray(vector));
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,8 @@
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.BytesRef;
import org.opensearch.knn.index.VectorDataType;
import org.opensearch.knn.index.codec.util.KNNVectorAsCollectionOfFloatsSerializer;
import org.opensearch.knn.index.codec.util.KNNVectorSerializer;
import org.opensearch.knn.index.codec.util.KNNVectorSerializerFactory;

import java.io.IOException;

Expand Down Expand Up @@ -121,7 +121,7 @@ private <T> T extractFromKnnVectorValues(
}

private float[] getFloatVectorFromByteRef(final BytesRef bytesRef) {
final KNNVectorSerializer vectorSerializer = KNNVectorSerializerFactory.getSerializerByBytesRef(bytesRef);
final KNNVectorSerializer vectorSerializer = KNNVectorAsCollectionOfFloatsSerializer.INSTANCE;
return vectorSerializer.byteToFloatArray(bytesRef);
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
import org.apache.lucene.store.Directory;
import org.opensearch.index.fielddata.ScriptDocValues;
import org.junit.Before;
import org.opensearch.knn.index.codec.util.KNNVectorSerializerFactory;
import org.opensearch.knn.index.codec.util.KNNVectorAsCollectionOfFloatsSerializer;

import java.io.IOException;

Expand All @@ -43,7 +43,7 @@ private void createKNNVectorDocument(Directory directory) throws IOException {
IndexWriterConfig conf = newIndexWriterConfig(new MockAnalyzer(random()));
IndexWriter writer = new IndexWriter(directory, conf);
Document knnDocument = new Document();
byte[] vectorBinary = KNNVectorSerializerFactory.getDefaultSerializer().floatToByteArray(new float[] { 1.0f, 2.0f });
byte[] vectorBinary = KNNVectorAsCollectionOfFloatsSerializer.INSTANCE.floatToByteArray(new float[] { 1.0f, 2.0f });
knnDocument.add(new BinaryDocValuesField(MOCK_INDEX_FIELD_NAME, new BytesRef(vectorBinary)));
knnDocument.add(new NumericDocValuesField(MOCK_NUMERIC_INDEX_FIELD_NAME, 1000));
writer.addDocument(knnDocument);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
import org.junit.Before;
import org.junit.After;
import org.junit.Test;
import org.opensearch.knn.index.codec.util.KNNVectorSerializerFactory;
import org.opensearch.knn.index.codec.util.KNNVectorAsCollectionOfFloatsSerializer;

import java.io.IOException;

Expand Down Expand Up @@ -167,7 +167,7 @@ private void createKNNVectorDocument(Directory directory, Class<?> valuesClass)
Field field;

if (BinaryDocValues.class.equals(valuesClass)) {
byte[] vectorBinary = KNNVectorSerializerFactory.getDefaultSerializer().floatToByteArray(SAMPLE_VECTOR_DATA);
byte[] vectorBinary = KNNVectorAsCollectionOfFloatsSerializer.INSTANCE.floatToByteArray(SAMPLE_VECTOR_DATA);
field = new BinaryDocValuesField(MOCK_INDEX_FIELD_NAME, new BytesRef(vectorBinary));
} else if (ByteVectorValues.class.equals(valuesClass)) {
field = new KnnByteVectorField(MOCK_INDEX_FIELD_NAME, SAMPLE_BYTE_VECTOR_DATA);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
import org.apache.lucene.util.BytesRef;
import org.junit.Assert;
import org.opensearch.knn.KNNTestCase;
import org.opensearch.knn.index.codec.util.KNNVectorSerializerFactory;
import org.opensearch.knn.index.codec.util.KNNVectorAsCollectionOfFloatsSerializer;

import java.io.IOException;

Expand Down Expand Up @@ -82,7 +82,7 @@ private void createKNNFloatVectorDocument(Directory directory) throws IOExceptio
IndexWriterConfig conf = newIndexWriterConfig(new MockAnalyzer(random()));
IndexWriter writer = new IndexWriter(directory, conf);
Document knnDocument = new Document();
BytesRef bytesRef = new BytesRef(KNNVectorSerializerFactory.getDefaultSerializer().floatToByteArray(SAMPLE_FLOAT_VECTOR_DATA));
BytesRef bytesRef = new BytesRef(KNNVectorAsCollectionOfFloatsSerializer.INSTANCE.floatToByteArray(SAMPLE_FLOAT_VECTOR_DATA));
knnDocument.add(new BinaryDocValuesField(MOCK_FLOAT_INDEX_FIELD_NAME, bytesRef));
writer.addDocument(knnDocument);
writer.commit();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,73 +10,13 @@

import java.io.ByteArrayOutputStream;
import java.io.DataOutputStream;
import java.io.ObjectOutputStream;
import java.util.Random;
import java.util.stream.IntStream;

public class KNNVectorSerializerTests extends KNNTestCase {

Random random = new Random();

public void testVectorSerializerFactory() throws Exception {
// check that default serializer can work with array of floats
// setup
final float[] vector = getArrayOfRandomFloats(20);
final ByteArrayOutputStream bas = new ByteArrayOutputStream();
final DataOutputStream ds = new DataOutputStream(bas);
for (float f : vector)
ds.writeFloat(f);
final BytesRef vectorAsCollectionOfFloats = new BytesRef(bas.toByteArray());
final KNNVectorSerializer defaultSerializer = KNNVectorSerializerFactory.getDefaultSerializer();
assertNotNull(defaultSerializer);

final float[] actualDeserializedVector = defaultSerializer.byteToFloatArray(vectorAsCollectionOfFloats);
assertNotNull(actualDeserializedVector);
assertArrayEquals(vector, actualDeserializedVector, 0.1f);

final KNNVectorSerializer arraySerializer = KNNVectorSerializerFactory.getSerializerBySerializationMode(SerializationMode.ARRAY);
assertNotNull(arraySerializer);

final KNNVectorSerializer collectionOfFloatsSerializer = KNNVectorSerializerFactory.getSerializerBySerializationMode(
SerializationMode.COLLECTION_OF_FLOATS
);
assertNotNull(collectionOfFloatsSerializer);
}

public void testVectorSerializerFactory_throwExceptionForBytesWithUnsupportedDataType() throws Exception {
// prepare array of chars that is not supported by serializer factory. expected behavior is to fail
final char[] arrayOfChars = new char[] { 'a', 'b', 'c' };
final ByteArrayOutputStream bas = new ByteArrayOutputStream();
final DataOutputStream ds = new DataOutputStream(bas);
for (char ch : arrayOfChars)
ds.writeChar(ch);
final BytesRef vectorAsCollectionOfChars = new BytesRef(bas.toByteArray());

expectThrows(RuntimeException.class, () -> KNNVectorSerializerFactory.getSerializerByBytesRef(vectorAsCollectionOfChars));
}

public void testVectorAsArraySerializer() throws Exception {
final float[] vector = getArrayOfRandomFloats(20);

final ByteArrayOutputStream byteStream = new ByteArrayOutputStream();
final ObjectOutputStream objectStream = new ObjectOutputStream(byteStream);
objectStream.writeObject(vector);
final BytesRef serializedVector = new BytesRef(byteStream.toByteArray());
final KNNVectorSerializer vectorSerializer = KNNVectorSerializerFactory.getSerializerByBytesRef(serializedVector);

// testing serialization
final byte[] actualSerializedVector = vectorSerializer.floatToByteArray(vector);

assertNotNull(actualSerializedVector);
assertArrayEquals(serializedVector.bytes, actualSerializedVector);

// testing deserialization
final float[] actualDeserializedVector = vectorSerializer.byteToFloatArray(serializedVector);

assertNotNull(actualDeserializedVector);
assertArrayEquals(vector, actualDeserializedVector, 0.1f);
}

public void testVectorAsCollectionOfFloatsSerializer() throws Exception {
// setup
final float[] vector = getArrayOfRandomFloats(20);
Expand All @@ -86,7 +26,7 @@ public void testVectorAsCollectionOfFloatsSerializer() throws Exception {
for (float f : vector)
ds.writeFloat(f);
final BytesRef vectorAsCollectionOfFloats = new BytesRef(bas.toByteArray());
final KNNVectorSerializer vectorSerializer = KNNVectorSerializerFactory.getSerializerByBytesRef(vectorAsCollectionOfFloats);
final KNNVectorSerializer vectorSerializer = KNNVectorAsCollectionOfFloatsSerializer.INSTANCE;

// testing serialization
final byte[] actualSerializedVector = vectorSerializer.floatToByteArray(vector);
Expand All @@ -104,16 +44,14 @@ public void testVectorAsCollectionOfFloatsSerializer() throws Exception {
public void testVectorSerializer_whenVectorBytesOffset_thenSuccess() {
final float[] vector = getArrayOfRandomFloats(20);
int offset = randomInt(4);
for (SerializationMode serializationMode : SerializationMode.values()) {
final KNNVectorSerializer vectorSerializer = KNNVectorSerializerFactory.getSerializerBySerializationMode(serializationMode);
assertNotNull(vectorSerializer);
byte[] bytes = vectorSerializer.floatToByteArray(vector);
byte[] bytesWithOffset = new byte[bytes.length + 2 * offset];
System.arraycopy(bytes, 0, bytesWithOffset, offset, bytes.length);
BytesRef serializedVector = new BytesRef(bytesWithOffset, offset, bytes.length);
float[] deserializedVector = vectorSerializer.byteToFloatArray(serializedVector);
assertArrayEquals(vector, deserializedVector, 0.1f);
}
final KNNVectorSerializer vectorSerializer = KNNVectorAsCollectionOfFloatsSerializer.INSTANCE;
assertNotNull(vectorSerializer);
byte[] bytes = vectorSerializer.floatToByteArray(vector);
byte[] bytesWithOffset = new byte[bytes.length + 2 * offset];
System.arraycopy(bytes, 0, bytesWithOffset, offset, bytes.length);
BytesRef serializedVector = new BytesRef(bytesWithOffset, offset, bytes.length);
float[] deserializedVector = vectorSerializer.byteToFloatArray(serializedVector);
assertArrayEquals(vector, deserializedVector, 0.1f);
}

private float[] getArrayOfRandomFloats(int arrayLength) {
Expand Down
Loading

0 comments on commit 4f7a238

Please sign in to comment.