Skip to content
This repository has been archived by the owner on Aug 2, 2022. It is now read-only.

Backport nmslib 2.0.11 upgrade to 1.12 #305

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ build
*.log
out/
buildSrc/es-knn-offline-repo-*
buildSrc/libKNNIndexV2_0_6*
buildSrc/libKNNIndex*
oss/*
*.iml
jni/CMakeCache.txt
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -458,7 +458,7 @@ Second, it should first be confirmed that all of the graphs of interest are able
## Scoring
During k-NN search, for each graph, NMSLIB will return up to `k` results. These results contain both the [document ID and the NMSLIB score](https://github.com/opendistro-for-elasticsearch/k-NN/blob/master/src/main/java/com/amazon/opendistroforelasticsearch/knn/index/KNNQueryResult.java#L21).

The score NMSLIB assigns to a result is related to the space type that is selected. For example, for cosine similarity, NMSLIB will return [`1 - normScalarProduct`](https://github.com/nmslib/nmslib/blob/master/similarity_search/src/method/hnsw_distfunc_opt.cc#L372). For euclidean distance, in almost all cases, it will return the euclidean distance between [the result and the query vector](https://github.com/nmslib/nmslib/blob/master/similarity_search/src/method/hnsw_distfunc_opt.cc#L131). However, when the dimension of the vector is divisible by 16 (i.e. `dimension % 16 == 0`), the score returned will actually be the [square of the euclidean distance](https://github.com/nmslib/nmslib/blob/master/similarity_search/src/method/hnsw_distfunc_opt.cc#L50).
The score NMSLIB assigns to a result is related to the space type that is selected. For example, for cosine similarity, NMSLIB will return [`1 - normScalarProduct`](https://github.com/nmslib/nmslib/blob/master/similarity_search/src/method/hnsw_distfunc_opt.cc#L372). For l2, the score returned will be the square of the euclidean distance.

From the k-NN and NMSLIB perspective, a lower score equates to a closer and better result. This is the opposite of how Elasticsearch scores results, where a greater score equates to a better result. In order to convert from the NMSLIB score to the Elasticsearch score, we perform [the following conversion](https://github.com/opendistro-for-elasticsearch/k-NN/blob/master/src/main/java/com/amazon/opendistroforelasticsearch/knn/index/KNNWeight.java#L113):
```
Expand Down
6 changes: 3 additions & 3 deletions jni/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@

cmake_minimum_required(VERSION 2.8)

project(KNNIndexV2_0_6)
project(KNNIndexV2_0_11)

# Corner case. For CMake 2.8, there is no option to specify set(CMAKE_CXX_STANDARD 11). Instead, the flag manually needs
# to be set.
Expand All @@ -27,7 +27,7 @@ else()
endif()

# Target Library to be built
set(KNN_INDEX KNNIndexV2_0_6)
set(KNN_INDEX KNNIndexV2_0_11)
set(KNN_PACKAGE_NAME opendistro-knnlib)

# Check if similarity search exists
Expand Down Expand Up @@ -55,7 +55,7 @@ else()
endif()

# Compile the library
add_library(${KNN_INDEX} SHARED ${CMAKE_CURRENT_SOURCE_DIR}/src/com_amazon_opendistroforelasticsearch_knn_index_v206_KNNIndex.cpp)
add_library(${KNN_INDEX} SHARED ${CMAKE_CURRENT_SOURCE_DIR}/src/com_amazon_opendistroforelasticsearch_knn_index_v2011_KNNIndex.cpp)
target_link_libraries(${KNN_INDEX} NonMetricSpaceLib)
target_include_directories(${KNN_INDEX} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/include $ENV{JAVA_HOME}/include $ENV{JAVA_HOME}/include/${JVM_OS_TYPE} ${CMAKE_CURRENT_SOURCE_DIR}/external/nmslib/similarity_search/include)

Expand Down
2 changes: 1 addition & 1 deletion jni/external/nmslib
Submodule nmslib updated 251 files

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright 2019 Amazon.com, Inc. or its affiliates. All Rights Reserved.
* Copyright 2021 Amazon.com, Inc. or its affiliates. All Rights Reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License").
* You may not use this file except in compliance with the License.
Expand All @@ -13,7 +13,7 @@
* permissions and limitations under the License.
*/

#include "com_amazon_opendistroforelasticsearch_knn_index_v206_KNNIndex.h"
#include "com_amazon_opendistroforelasticsearch_knn_index_v2011_KNNIndex.h"

#include "init.h"
#include "index.h"
Expand Down Expand Up @@ -85,7 +85,7 @@ void catch_cpp_exception_and_throw_java(JNIEnv* env)
}
}

JNIEXPORT void JNICALL Java_com_amazon_opendistroforelasticsearch_knn_index_v206_KNNIndex_saveIndex(JNIEnv* env, jclass cls, jintArray ids, jobjectArray vectors, jstring indexPath, jobjectArray algoParams, jstring spaceType)
JNIEXPORT void JNICALL Java_com_amazon_opendistroforelasticsearch_knn_index_v2011_KNNIndex_saveIndex(JNIEnv* env, jclass cls, jintArray ids, jobjectArray vectors, jstring indexPath, jobjectArray algoParams, jstring spaceType)
{
Space<float>* space = NULL;
ObjectVector dataset;
Expand Down Expand Up @@ -144,7 +144,7 @@ JNIEXPORT void JNICALL Java_com_amazon_opendistroforelasticsearch_knn_index_v206
}
}

JNIEXPORT jobjectArray JNICALL Java_com_amazon_opendistroforelasticsearch_knn_index_v206_KNNIndex_queryIndex(JNIEnv* env, jclass cls, jlong indexPointer, jfloatArray queryVector, jint k)
JNIEXPORT jobjectArray JNICALL Java_com_amazon_opendistroforelasticsearch_knn_index_v2011_KNNIndex_queryIndex(JNIEnv* env, jclass cls, jlong indexPointer, jfloatArray queryVector, jint k)
{
try {
IndexWrapper *indexWrapper = reinterpret_cast<IndexWrapper*>(indexPointer);
Expand Down Expand Up @@ -175,7 +175,7 @@ JNIEXPORT jobjectArray JNICALL Java_com_amazon_opendistroforelasticsearch_knn_in
return NULL;
}

JNIEXPORT jlong JNICALL Java_com_amazon_opendistroforelasticsearch_knn_index_v206_KNNIndex_init(JNIEnv* env, jclass cls, jstring indexPath, jobjectArray algoParams, jstring spaceType)
JNIEXPORT jlong JNICALL Java_com_amazon_opendistroforelasticsearch_knn_index_v2011_KNNIndex_init(JNIEnv* env, jclass cls, jstring indexPath, jobjectArray algoParams, jstring spaceType)
{
IndexWrapper *indexWrapper = NULL;
try {
Expand Down Expand Up @@ -215,7 +215,7 @@ JNIEXPORT jlong JNICALL Java_com_amazon_opendistroforelasticsearch_knn_index_v20
return NULL;
}

JNIEXPORT void JNICALL Java_com_amazon_opendistroforelasticsearch_knn_index_v206_KNNIndex_gc(JNIEnv* env, jclass cls, jlong indexPointer)
JNIEXPORT void JNICALL Java_com_amazon_opendistroforelasticsearch_knn_index_v2011_KNNIndex_gc(JNIEnv* env, jclass cls, jlong indexPointer)
{
try {
IndexWrapper *indexWrapper = reinterpret_cast<IndexWrapper*>(indexPointer);
Expand All @@ -228,7 +228,7 @@ JNIEXPORT void JNICALL Java_com_amazon_opendistroforelasticsearch_knn_index_v206
}
}

JNIEXPORT void JNICALL Java_com_amazon_opendistroforelasticsearch_knn_index_v206_KNNIndex_initLibrary(JNIEnv *, jclass)
JNIEXPORT void JNICALL Java_com_amazon_opendistroforelasticsearch_knn_index_v2011_KNNIndex_initLibrary(JNIEnv *, jclass)
{
initLibrary();

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@

package com.amazon.opendistroforelasticsearch.knn.index;

import com.amazon.opendistroforelasticsearch.knn.index.v206.KNNIndex;
import com.amazon.opendistroforelasticsearch.knn.index.v2011.KNNIndex;
import com.amazon.opendistroforelasticsearch.knn.plugin.stats.StatNames;
import com.google.common.cache.Cache;
import com.google.common.cache.CacheBuilder;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
package com.amazon.opendistroforelasticsearch.knn.index;

import com.amazon.opendistroforelasticsearch.knn.index.codec.KNNCodecUtil;
import com.amazon.opendistroforelasticsearch.knn.index.v206.KNNIndex;
import com.amazon.opendistroforelasticsearch.knn.index.v2011.KNNIndex;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.apache.lucene.index.FilterLeafReader;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
package com.amazon.opendistroforelasticsearch.knn.index;

import com.amazon.opendistroforelasticsearch.knn.index.codec.KNNCodecUtil;
import com.amazon.opendistroforelasticsearch.knn.index.v206.KNNIndex;
import com.amazon.opendistroforelasticsearch.knn.index.v2011.KNNIndex;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.apache.lucene.index.FieldInfo;
Expand Down Expand Up @@ -92,7 +92,7 @@ public Scorer scorer(LeafReaderContext context) throws IOException {

/**
* TODO Add logic to pick up the right nmslib version based on the version
* in the name of the file. As of now we have one version 2.0.6
* in the name of the file. As of now we have one version 2.0.11
* So deferring this to future releases
*/

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@
import com.amazon.opendistroforelasticsearch.knn.index.KNNVectorFieldMapper;
import com.amazon.opendistroforelasticsearch.knn.index.util.KNNConstants;
import com.amazon.opendistroforelasticsearch.knn.index.util.NmsLibVersion;
import com.amazon.opendistroforelasticsearch.knn.index.v206.KNNIndex;
import com.amazon.opendistroforelasticsearch.knn.index.v2011.KNNIndex;

import java.io.Closeable;
import java.io.IOException;
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright 2019 Amazon.com, Inc. or its affiliates. All Rights Reserved.
* Copyright 2021 Amazon.com, Inc. or its affiliates. All Rights Reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License").
* You may not use this file except in compliance with the License.
Expand All @@ -20,14 +20,14 @@ public enum NmsLibVersion {
/**
* Latest available nmslib version
*/
V206("206"){
V2011("2011"){
@Override
public String indexLibraryVersion() {
return "KNNIndexV2_0_6";
return "KNNIndexV2_0_11";
}
};

public static final NmsLibVersion LATEST = V206;
public static final NmsLibVersion LATEST = V2011;

public String buildVersion;

Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright 2019 Amazon.com, Inc. or its affiliates. All Rights Reserved.
* Copyright 2021 Amazon.com, Inc. or its affiliates. All Rights Reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License").
* You may not use this file except in compliance with the License.
Expand All @@ -13,7 +13,7 @@
* permissions and limitations under the License.
*/

package com.amazon.opendistroforelasticsearch.knn.index.v206;
package com.amazon.opendistroforelasticsearch.knn.index.v2011;

import com.amazon.opendistroforelasticsearch.knn.index.KNNQueryResult;
import com.amazon.opendistroforelasticsearch.knn.index.util.NmsLibVersion;
Expand All @@ -29,16 +29,16 @@

/**
* JNI layer to communicate with the nmslib
* This class refers to the nms library build with version tag 2.0.6
* See <a href="https://github.com/nmslib/nmslib/tree/v2.0.6">tag2.0.6</a>
* This class refers to the nms library build with version tag 2.0.11
* See <a href="https://github.com/nmslib/nmslib/tree/v2.0.11">tag2.0.11</a>
*/
public class KNNIndex implements AutoCloseable {
public static NmsLibVersion VERSION = NmsLibVersion.V206;
public static NmsLibVersion VERSION = NmsLibVersion.V2011;

static {
AccessController.doPrivileged(new PrivilegedAction<Void>() {
public Void run() {
System.loadLibrary(NmsLibVersion.V206.indexLibraryVersion());
System.loadLibrary(NmsLibVersion.LATEST.indexLibraryVersion());
return null;
}
});
Expand Down Expand Up @@ -153,6 +153,6 @@ private static long computeFileSize(String indexPath) {
// Deletes memory pointed to by index pointer (needs write lock)
private static native void gc(long indexPointer);

// Calls nmslib's initLibrary function: https://github.com/nmslib/nmslib/blob/v2.0.6/similarity_search/include/init.h#L27
// Calls nmslib's initLibrary function: https://github.com/nmslib/nmslib/blob/v2.0.11/similarity_search/include/init.h#L27
private static native void initLibrary();
}
2 changes: 1 addition & 1 deletion src/main/plugin-metadata/plugin-security.policy
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
grant {
permission java.lang.RuntimePermission "loadLibrary.KNNIndexV2_0_6";
permission java.lang.RuntimePermission "loadLibrary.KNNIndexV2_0_11";
};
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@

package com.amazon.opendistroforelasticsearch.knn.index;

import com.amazon.opendistroforelasticsearch.knn.index.v206.KNNIndex;
import com.amazon.opendistroforelasticsearch.knn.index.v2011.KNNIndex;
import com.amazon.opendistroforelasticsearch.knn.KNNTestCase;
import com.amazon.opendistroforelasticsearch.knn.plugin.KNNPlugin;
import org.elasticsearch.action.admin.indices.mapping.put.PutMappingRequest;
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright 2019 Amazon.com, Inc. or its affiliates. All Rights Reserved.
* Copyright 2021 Amazon.com, Inc. or its affiliates. All Rights Reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License").
* You may not use this file except in compliance with the License.
Expand All @@ -16,7 +16,7 @@
package com.amazon.opendistroforelasticsearch.knn.index;

import com.amazon.opendistroforelasticsearch.knn.KNNTestCase;
import com.amazon.opendistroforelasticsearch.knn.index.v206.KNNIndex;
import com.amazon.opendistroforelasticsearch.knn.index.v2011.KNNIndex;

import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
Expand Down Expand Up @@ -102,12 +102,12 @@ public Void run() {
/*
* scores are evaluated using Euclidean distance. Distance of the documents with
* respect to query vector are as follows
* doc0 = 11.224972, doc1 = 3.7416575, doc2 = 19.131126
* doc0 = 126, doc1 = 14, doc2 = 366
* Nearest neighbor is doc1 then doc0 then doc2
*/
assertEquals(scores.get(0), 11.224972, 0.1);
assertEquals(scores.get(1), 3.7416575, 0.1);
assertEquals(scores.get(2), 19.131126, 0.1);
assertEquals(126.0, scores.get(0), 0.001);
assertEquals(14.0, scores.get(1), 0.001);
assertEquals(366.0, scores.get(2), 0.001);
dir.close();
}

Expand Down Expand Up @@ -225,12 +225,12 @@ public Void run() {
/*
* scores are evaluated using Euclidean distance. Distance of the documents with
* respect to query vector are as follows
* doc0 = 11.224972, doc1 = 3.7416575, doc2 = 19.131126
* doc0 = 126, doc1 = 14, doc2 = 366
* Nearest neighbor is doc1 then doc0 then doc2
*/
assertEquals(scores.get(0), 11.224972, 0.1);
assertEquals(scores.get(1), 3.7416575, 0.1);
assertEquals(scores.get(2), 19.131126, 0.1);
assertEquals(126.0, scores.get(0), 0.001);
assertEquals(14.0, scores.get(1), 0.001);
assertEquals(366.0, scores.get(2), 0.001);
dir.close();
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -153,8 +153,8 @@ public void testMultiFieldsKnnIndex(Codec codec) throws Exception {
IndexSearcher searcher = new IndexSearcher(reader);
float score = searcher.search(new KNNQuery("test_vector", new float[] {1.0f, 0.0f, 0.0f}, 1, "dummy"), 10).scoreDocs[0].score;
float score1 = searcher.search(new KNNQuery("my_vector", new float[] {1.0f, 2.0f}, 1, "dummy"), 10).scoreDocs[0].score;
assertEquals(score, 0.1667f, 0.01f);
assertEquals(score1, 0.0714f, 0.01f);
assertEquals(1.0f/(1 + 25), score, 0.01f);
assertEquals(1.0f/(1 + 169), score1, 0.01f);

// query to determine the hits
assertEquals(1, searcher.count(new KNNQuery("test_vector", new float[] {1.0f, 0.0f, 0.0f}, 1, "dummy")));
Expand Down