From 17aefef249e4caed9353683b3d4ab5788cf35d96 Mon Sep 17 00:00:00 2001 From: Michael Sokolov Date: Wed, 6 Apr 2022 19:13:14 +0000 Subject: [PATCH 1/3] LUCENE-10504: KnnGraphTester to use KnnVectorQuery --- .../lucene/util/hnsw/KnnGraphTester.java | 32 ++++++++++++++++--- 1 file changed, 27 insertions(+), 5 deletions(-) diff --git a/lucene/core/src/test/org/apache/lucene/util/hnsw/KnnGraphTester.java b/lucene/core/src/test/org/apache/lucene/util/hnsw/KnnGraphTester.java index 822ef78197c3..533582b0e157 100644 --- a/lucene/core/src/test/org/apache/lucene/util/hnsw/KnnGraphTester.java +++ b/lucene/core/src/test/org/apache/lucene/util/hnsw/KnnGraphTester.java @@ -32,8 +32,10 @@ import java.nio.file.Files; import java.nio.file.Path; import java.nio.file.Paths; +import java.nio.file.attribute.FileTime; import java.util.HashSet; import java.util.Locale; +import java.util.Objects; import java.util.Set; import org.apache.lucene.codecs.KnnVectorsFormat; import org.apache.lucene.codecs.KnnVectorsReader; @@ -55,6 +57,8 @@ import org.apache.lucene.index.RandomAccessVectorValues; import org.apache.lucene.index.RandomAccessVectorValuesProducer; import org.apache.lucene.index.VectorSimilarityFunction; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.KnnVectorQuery; import org.apache.lucene.search.ScoreDoc; import org.apache.lucene.search.TopDocs; import org.apache.lucene.store.Directory; @@ -349,8 +353,9 @@ private void testSearch(Path indexPath, Path queryPath, Path outputPath, int[][] TopDocs[] results = new TopDocs[numIters]; long elapsed, totalCpuTime, totalVisited = 0; try (FileChannel q = FileChannel.open(queryPath)) { + int bufferSize = Math.max(numIters, warmCount) * dim * Float.BYTES; FloatBuffer targets = - q.map(FileChannel.MapMode.READ_ONLY, 0, numIters * dim * Float.BYTES) + q.map(FileChannel.MapMode.READ_ONLY, 0, bufferSize) .order(ByteOrder.LITTLE_ENDIAN) .asFloatBuffer(); float[] target = new float[dim]; @@ -362,18 +367,19 @@ private void testSearch(Path indexPath, Path queryPath, Path outputPath, int[][] long cpuTimeStartNs; try (Directory dir = FSDirectory.open(indexPath); DirectoryReader reader = DirectoryReader.open(dir)) { + IndexSearcher searcher = new IndexSearcher(reader); numDocs = reader.maxDoc(); for (int i = 0; i < warmCount; i++) { // warm up targets.get(target); - results[i] = doKnnSearch(reader, KNN_FIELD, target, topK, fanout); + doKnnSearch(reader, KNN_FIELD, target, topK, fanout); } targets.position(0); start = System.nanoTime(); cpuTimeStartNs = bean.getCurrentThreadCpuTime(); for (int i = 0; i < numIters; i++) { targets.get(target); - results[i] = doKnnSearch(reader, KNN_FIELD, target, topK, fanout); + results[i] = doKnnVectorQuery(searcher, KNN_FIELD, target, topK, fanout); } totalCpuTime = (bean.getCurrentThreadCpuTime() - cpuTimeStartNs) / 1_000_000; elapsed = (System.nanoTime() - start) / 1_000_000; // ns -> ms @@ -430,6 +436,11 @@ private void testSearch(Path indexPath, Path queryPath, Path outputPath, int[][] } } + private static TopDocs doKnnVectorQuery( + IndexSearcher searcher, String field, float[] vector, int k, int fanout) throws IOException { + return searcher.search(new KnnVectorQuery(field, vector, k + fanout), k); + } + private static TopDocs doKnnSearch( IndexReader reader, String field, float[] vector, int k, int fanout) throws IOException { TopDocs[] results = new TopDocs[reader.leaves().size()]; @@ -487,9 +498,10 @@ private int compareNN(int[] expected, TopDocs results) { private int[][] getNN(Path docPath, Path queryPath) throws IOException { // look in working directory for cached nn file - String nnFileName = "nn-" + numDocs + "-" + numIters + "-" + topK + "-" + dim + ".bin"; + String hash = Integer.toString(Objects.hash(docPath, queryPath, numDocs, numIters, topK), 36); + String nnFileName = "nn-" + hash + ".bin"; Path nnPath = Paths.get(nnFileName); - if (Files.exists(nnPath)) { + if (Files.exists(nnPath) && isNewer(nnPath, docPath, queryPath)) { return readNN(nnPath); } else { int[][] nn = computeNN(docPath, queryPath); @@ -498,6 +510,16 @@ private int[][] getNN(Path docPath, Path queryPath) throws IOException { } } + private boolean isNewer(Path path, Path... others) throws IOException { + FileTime modified = Files.getLastModifiedTime(path); + for (Path other : others) { + if (Files.getLastModifiedTime(other).compareTo(modified) >= 0) { + return false; + } + } + return true; + } + private int[][] readNN(Path nnPath) throws IOException { int[][] result = new int[numIters][]; try (FileChannel in = FileChannel.open(nnPath)) { From baf3bd82735980aab5ccc4b98b578d8f225c0f28 Mon Sep 17 00:00:00 2001 From: Michael Sokolov Date: Fri, 29 Apr 2022 20:28:04 +0000 Subject: [PATCH 2/3] remove KnnGraphTester -warm parameter; just use numIters --- .../test/org/apache/lucene/util/hnsw/KnnGraphTester.java | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/lucene/core/src/test/org/apache/lucene/util/hnsw/KnnGraphTester.java b/lucene/core/src/test/org/apache/lucene/util/hnsw/KnnGraphTester.java index 533582b0e157..28bed17d809d 100644 --- a/lucene/core/src/test/org/apache/lucene/util/hnsw/KnnGraphTester.java +++ b/lucene/core/src/test/org/apache/lucene/util/hnsw/KnnGraphTester.java @@ -83,7 +83,6 @@ public class KnnGraphTester { private int numDocs; private int dim; private int topK; - private int warmCount; private int numIters; private int fanout; private Path indexPath; @@ -102,7 +101,6 @@ private KnnGraphTester() { numIters = 1000; dim = 256; topK = 100; - warmCount = 1000; fanout = topK; similarityFunction = VectorSimilarityFunction.DOT_PRODUCT; } @@ -182,9 +180,6 @@ private void run(String... args) throws Exception { case "-out": outputPath = Paths.get(args[++iarg]); break; - case "-warm": - warmCount = Integer.parseInt(args[++iarg]); - break; case "-docs": docVectorsPath = Paths.get(args[++iarg]); break; @@ -353,7 +348,7 @@ private void testSearch(Path indexPath, Path queryPath, Path outputPath, int[][] TopDocs[] results = new TopDocs[numIters]; long elapsed, totalCpuTime, totalVisited = 0; try (FileChannel q = FileChannel.open(queryPath)) { - int bufferSize = Math.max(numIters, warmCount) * dim * Float.BYTES; + int bufferSize = numIters * dim * Float.BYTES; FloatBuffer targets = q.map(FileChannel.MapMode.READ_ONLY, 0, bufferSize) .order(ByteOrder.LITTLE_ENDIAN) @@ -369,7 +364,7 @@ private void testSearch(Path indexPath, Path queryPath, Path outputPath, int[][] DirectoryReader reader = DirectoryReader.open(dir)) { IndexSearcher searcher = new IndexSearcher(reader); numDocs = reader.maxDoc(); - for (int i = 0; i < warmCount; i++) { + for (int i = 0; i < numIters; i++) { // warm up targets.get(target); doKnnSearch(reader, KNN_FIELD, target, topK, fanout); From 7563390d147386398b5af98d9adbae24dde0bfda Mon Sep 17 00:00:00 2001 From: Michael Sokolov Date: Wed, 4 May 2022 21:17:50 +0000 Subject: [PATCH 3/3] also use KnnVectorQuery for warming --- .../lucene/util/hnsw/KnnGraphTester.java | 19 +------------------ 1 file changed, 1 insertion(+), 18 deletions(-) diff --git a/lucene/core/src/test/org/apache/lucene/util/hnsw/KnnGraphTester.java b/lucene/core/src/test/org/apache/lucene/util/hnsw/KnnGraphTester.java index 28bed17d809d..b7fe2c13d95a 100644 --- a/lucene/core/src/test/org/apache/lucene/util/hnsw/KnnGraphTester.java +++ b/lucene/core/src/test/org/apache/lucene/util/hnsw/KnnGraphTester.java @@ -49,7 +49,6 @@ import org.apache.lucene.document.StoredField; import org.apache.lucene.index.CodecReader; import org.apache.lucene.index.DirectoryReader; -import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.index.LeafReader; @@ -63,7 +62,6 @@ import org.apache.lucene.search.TopDocs; import org.apache.lucene.store.Directory; import org.apache.lucene.store.FSDirectory; -import org.apache.lucene.util.Bits; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.IntroSorter; import org.apache.lucene.util.PrintStreamInfoStream; @@ -367,7 +365,7 @@ private void testSearch(Path indexPath, Path queryPath, Path outputPath, int[][] for (int i = 0; i < numIters; i++) { // warm up targets.get(target); - doKnnSearch(reader, KNN_FIELD, target, topK, fanout); + doKnnVectorQuery(searcher, KNN_FIELD, target, topK, fanout); } targets.position(0); start = System.nanoTime(); @@ -436,21 +434,6 @@ private static TopDocs doKnnVectorQuery( return searcher.search(new KnnVectorQuery(field, vector, k + fanout), k); } - private static TopDocs doKnnSearch( - IndexReader reader, String field, float[] vector, int k, int fanout) throws IOException { - TopDocs[] results = new TopDocs[reader.leaves().size()]; - for (LeafReaderContext ctx : reader.leaves()) { - Bits liveDocs = ctx.reader().getLiveDocs(); - results[ctx.ord] = - ctx.reader().searchNearestVectors(field, vector, k + fanout, liveDocs, Integer.MAX_VALUE); - int docBase = ctx.docBase; - for (ScoreDoc scoreDoc : results[ctx.ord].scoreDocs) { - scoreDoc.doc += docBase; - } - } - return TopDocs.merge(k, results); - } - private float checkResults(TopDocs[] results, int[][] nn) { int totalMatches = 0; int totalResults = 0;