From d5f8853fda8f488d73aed09d376e1c1c3c9ea85f Mon Sep 17 00:00:00 2001 From: Benjamin Trent Date: Wed, 20 Mar 2024 11:49:30 -0400 Subject: [PATCH] Fix NPE for when fields are missing in `searchNearestVectors` (#13195) Related to: https://github.com/apache/lucene/pull/13162 Since this is unreleased, no changelog entry is necessary. --- .../org/apache/lucene/index/LeafReader.java | 6 ++++ .../TestPerFieldKnnVectorsFormat.java | 34 +++++++++++++++++++ 2 files changed, 40 insertions(+) diff --git a/lucene/core/src/java/org/apache/lucene/index/LeafReader.java b/lucene/core/src/java/org/apache/lucene/index/LeafReader.java index 4856feedca3c..5c3c9f8da5e1 100644 --- a/lucene/core/src/java/org/apache/lucene/index/LeafReader.java +++ b/lucene/core/src/java/org/apache/lucene/index/LeafReader.java @@ -246,6 +246,9 @@ public final PostingsEnum postings(Term term) throws IOException { public final TopDocs searchNearestVectors( String field, float[] target, int k, Bits acceptDocs, int visitedLimit) throws IOException { FieldInfo fi = getFieldInfos().fieldInfo(field); + if (fi == null || fi.getVectorDimension() == 0) { + return TopDocsCollector.EMPTY_TOPDOCS; + } FloatVectorValues floatVectorValues = getFloatVectorValues(fi.name); if (floatVectorValues == null) { return TopDocsCollector.EMPTY_TOPDOCS; @@ -287,6 +290,9 @@ public final TopDocs searchNearestVectors( public final TopDocs searchNearestVectors( String field, byte[] target, int k, Bits acceptDocs, int visitedLimit) throws IOException { FieldInfo fi = getFieldInfos().fieldInfo(field); + if (fi == null || fi.getVectorDimension() == 0) { + return TopDocsCollector.EMPTY_TOPDOCS; + } ByteVectorValues byteVectorValues = getByteVectorValues(fi.name); if (byteVectorValues == null) { return TopDocsCollector.EMPTY_TOPDOCS; diff --git a/lucene/core/src/test/org/apache/lucene/codecs/perfield/TestPerFieldKnnVectorsFormat.java b/lucene/core/src/test/org/apache/lucene/codecs/perfield/TestPerFieldKnnVectorsFormat.java index c51f51f5f113..45814144d10a 100644 --- a/lucene/core/src/test/org/apache/lucene/codecs/perfield/TestPerFieldKnnVectorsFormat.java +++ b/lucene/core/src/test/org/apache/lucene/codecs/perfield/TestPerFieldKnnVectorsFormat.java @@ -71,6 +71,40 @@ protected Codec getCodec() { return codec; } + public void testMissingFieldReturnsNoResults() throws IOException { + try (Directory directory = newDirectory()) { + IndexWriterConfig iwc = newIndexWriterConfig(new MockAnalyzer(random())); + iwc.setCodec( + new AssertingCodec() { + @Override + public KnnVectorsFormat getKnnVectorsFormatForField(String field) { + return TestUtil.getDefaultKnnVectorsFormat(); + } + }); + try (IndexWriter iwriter = new IndexWriter(directory, iwc)) { + Document doc = new Document(); + doc.add(newTextField("id", "1", Field.Store.YES)); + iwriter.addDocument(doc); + } + + try (IndexReader ireader = DirectoryReader.open(directory)) { + LeafReader reader = ireader.leaves().get(0).reader(); + TopDocs hits = + reader.searchNearestVectors( + "missing_field", + new float[] {1, 2, 3}, + 10, + reader.getLiveDocs(), + Integer.MAX_VALUE); + assertEquals(0, hits.scoreDocs.length); + hits = + reader.searchNearestVectors( + "id", new float[] {1, 2, 3}, 10, reader.getLiveDocs(), Integer.MAX_VALUE); + assertEquals(0, hits.scoreDocs.length); + } + } + } + public void testTwoFieldsTwoFormats() throws IOException { try (Directory directory = newDirectory()) { // we don't use RandomIndexWriter because it might add more values than we expect !!!!1