From 6846008f950e483be924a76e56bdcd4d36bb47d9 Mon Sep 17 00:00:00 2001 From: Adrien Grand Date: Sun, 1 Jan 2023 10:44:57 +0100 Subject: [PATCH 1/6] Introduce a new `KeywordField`. `KeywordField` is a combination of `StringField` and `SortedSetDocValuesField`, similarly to how `LongField` is a combination of `LongPoint` and `SortedNumericDocValuesField`. This makes it easier for users to create fields that can be used for filtering, sorting and faceting. --- .../apache/lucene/document/KeywordField.java | 127 ++++++++++++++++++ .../lucene/document/TestKeywordField.java | 88 ++++++++++++ .../lucene/search/TestSortOptimization.java | 40 +++--- .../lucene/search/TestSortedSetSortField.java | 30 ++--- .../org/apache/lucene/demo/IndexFiles.java | 8 +- 5 files changed, 255 insertions(+), 38 deletions(-) create mode 100644 lucene/core/src/java/org/apache/lucene/document/KeywordField.java create mode 100644 lucene/core/src/test/org/apache/lucene/document/TestKeywordField.java diff --git a/lucene/core/src/java/org/apache/lucene/document/KeywordField.java b/lucene/core/src/java/org/apache/lucene/document/KeywordField.java new file mode 100644 index 000000000000..a283eb2db3b7 --- /dev/null +++ b/lucene/core/src/java/org/apache/lucene/document/KeywordField.java @@ -0,0 +1,127 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.lucene.document; + +import java.util.Objects; +import org.apache.lucene.index.DocValuesType; +import org.apache.lucene.index.IndexOptions; +import org.apache.lucene.index.Term; +import org.apache.lucene.search.ConstantScoreQuery; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.SortField; +import org.apache.lucene.search.SortedSetSelector; +import org.apache.lucene.search.SortedSetSortField; +import org.apache.lucene.search.TermQuery; +import org.apache.lucene.util.BytesRef; + +/** + * Field that indexes a per-document {@link BytesRef} into an inverted index for fast filtering and + * stores values in a columnar fashion using {@link DocValuesType#SORTED_SET} doc values for sorting + * and faceting. This field does not support scoring: queries produce constant scores. If you also + * need to store the value, you should add a separate {@link StoredField} instance. If you need more + * fine-grained control you can use {@link StringField} and {@link SortedDocValuesField} or {@link + * SortedSetDocValuesField}. + * + *

This field defines static factory methods for creating common query objects: + * + *

+ */ +public class KeywordField extends Field { + + private static final FieldType FIELD_TYPE = new FieldType(); + + static { + FIELD_TYPE.setIndexOptions(IndexOptions.DOCS); + FIELD_TYPE.setOmitNorms(true); + FIELD_TYPE.setTokenized(false); + FIELD_TYPE.setDocValuesType(DocValuesType.SORTED_SET); + FIELD_TYPE.freeze(); + } + + /** + * Creates a new KeywordField. + * + * @param name field name + * @param value the BytesRef value + * @throws IllegalArgumentException if the field name or value is null. + */ + public KeywordField(String name, BytesRef value) { + super(name, value, FIELD_TYPE); + } + + /** + * Creates a new KeywordField from a String value, by indexing its UTF-8 representation. + * + * @param name field name + * @param value the BytesRef value + * @throws IllegalArgumentException if the field name or value is null. + */ + public KeywordField(String name, String value) { + super(name, value, FIELD_TYPE); + } + + @Override + public BytesRef binaryValue() { + BytesRef binaryValue = super.binaryValue(); + if (binaryValue != null) { + return binaryValue; + } else { + return new BytesRef(stringValue()); + } + } + + /** + * Create a query for matching an exact {@link BytesRef} value. + * + * @param field field name. must not be {@code null}. + * @param value exact value + * @throws IllegalArgumentException if {@code field} is null. + * @return a query matching documents with this exact value + */ + public static Query newExactQuery(String field, BytesRef value) { + Objects.requireNonNull(field, "field must not be null"); + Objects.requireNonNull(value, "value must not be null"); + return new ConstantScoreQuery(new TermQuery(new Term(field, value))); + } + + /** + * Create a query for matching an exact {@link String} value. + * + * @param field field name. must not be {@code null}. + * @param value exact value + * @throws IllegalArgumentException if {@code field} is null. + * @return a query matching documents with this exact value + */ + public static Query newExactQuery(String field, String value) { + return newExactQuery(field, new BytesRef(value)); + } + + /** + * Create a new {@link SortField} for {@link BytesRef} values. + * + * @param field field name. must not be {@code null}. + * @param reverse true if natural order should be reversed. + * @param selector custom selector type for choosing the sort value from the set. + */ + public static SortField newSortField( + String field, boolean reverse, SortedSetSelector.Type selector) { + return new SortedSetSortField(field, reverse, selector); + } +} diff --git a/lucene/core/src/test/org/apache/lucene/document/TestKeywordField.java b/lucene/core/src/test/org/apache/lucene/document/TestKeywordField.java new file mode 100644 index 000000000000..ca1be9e4b2da --- /dev/null +++ b/lucene/core/src/test/org/apache/lucene/document/TestKeywordField.java @@ -0,0 +1,88 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.lucene.document; + +import java.io.IOException; +import java.util.Collections; +import org.apache.lucene.index.DirectoryReader; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.IndexWriter; +import org.apache.lucene.index.LeafReader; +import org.apache.lucene.index.SortedSetDocValues; +import org.apache.lucene.index.TermsEnum; +import org.apache.lucene.store.Directory; +import org.apache.lucene.tests.util.LuceneTestCase; +import org.apache.lucene.util.BytesRef; + +public class TestKeywordField extends LuceneTestCase { + + public void testSetBytesValue() { + KeywordField field = new KeywordField("name", newBytesRef("value")); + assertEquals(newBytesRef("value"), field.binaryValue()); + assertNull(field.stringValue()); + field.setBytesValue(newBytesRef("value2")); + assertEquals(newBytesRef("value2"), field.binaryValue()); + assertNull(field.stringValue()); + } + + public void testSetStringValue() { + KeywordField field = new KeywordField("name", "value"); + assertEquals("value", field.stringValue()); + assertEquals(newBytesRef("value"), field.binaryValue()); + field.setStringValue("value2"); + assertEquals("value2", field.stringValue()); + assertEquals(newBytesRef("value2"), field.binaryValue()); + } + + public void testIndexBytesValue() throws IOException { + Directory dir = newDirectory(); + IndexWriter w = new IndexWriter(dir, newIndexWriterConfig()); + w.addDocument(Collections.singleton(new KeywordField("field", newBytesRef("value")))); + IndexReader reader = DirectoryReader.open(w); + w.close(); + LeafReader leaf = getOnlyLeafReader(reader); + TermsEnum terms = leaf.terms("field").iterator(); + assertEquals(new BytesRef("value"), terms.next()); + assertNull(terms.next()); + SortedSetDocValues values = leaf.getSortedSetDocValues("field"); + assertTrue(values.advanceExact(0)); + assertEquals(1, values.docValueCount()); + assertEquals(0L, values.nextOrd()); + assertEquals(new BytesRef("value"), values.lookupOrd(0)); + reader.close(); + dir.close(); + } + + public void testIndexStringValue() throws IOException { + Directory dir = newDirectory(); + IndexWriter w = new IndexWriter(dir, newIndexWriterConfig()); + w.addDocument(Collections.singleton(new KeywordField("field", "value"))); + IndexReader reader = DirectoryReader.open(w); + w.close(); + LeafReader leaf = getOnlyLeafReader(reader); + TermsEnum terms = leaf.terms("field").iterator(); + assertEquals(new BytesRef("value"), terms.next()); + assertNull(terms.next()); + SortedSetDocValues values = leaf.getSortedSetDocValues("field"); + assertTrue(values.advanceExact(0)); + assertEquals(1, values.docValueCount()); + assertEquals(0L, values.nextOrd()); + assertEquals(new BytesRef("value"), values.lookupOrd(0)); + reader.close(); + dir.close(); + } +} diff --git a/lucene/core/src/test/org/apache/lucene/search/TestSortOptimization.java b/lucene/core/src/test/org/apache/lucene/search/TestSortOptimization.java index 5c0aad74d97f..8d7ab1d65792 100644 --- a/lucene/core/src/test/org/apache/lucene/search/TestSortOptimization.java +++ b/lucene/core/src/test/org/apache/lucene/search/TestSortOptimization.java @@ -26,15 +26,14 @@ import java.util.List; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; -import org.apache.lucene.document.Field.Store; import org.apache.lucene.document.FloatDocValuesField; import org.apache.lucene.document.FloatPoint; import org.apache.lucene.document.IntPoint; import org.apache.lucene.document.IntRange; +import org.apache.lucene.document.KeywordField; import org.apache.lucene.document.LongField; import org.apache.lucene.document.LongPoint; import org.apache.lucene.document.NumericDocValuesField; -import org.apache.lucene.document.SortedDocValuesField; import org.apache.lucene.document.StoredField; import org.apache.lucene.document.StringField; import org.apache.lucene.index.DirectoryReader; @@ -891,8 +890,7 @@ public void testStringSortOptimization() throws IOException { for (int i = 0; i < numDocs; ++i) { final Document doc = new Document(); final BytesRef value = new BytesRef(Integer.toString(random().nextInt(1000))); - doc.add(new StringField("my_field", value, Store.NO)); - doc.add(new SortedDocValuesField("my_field", value)); + doc.add(new KeywordField("my_field", value)); writer.addDocument(doc); if (i % 2000 == 0) writer.flush(); // multiple segments } @@ -916,8 +914,7 @@ public void testStringSortOptimizationWithMissingValues() throws IOException { final Document doc = new Document(); if (random().nextInt(2) == 0) { final BytesRef value = new BytesRef(Integer.toString(random().nextInt(1000))); - doc.add(new StringField("my_field", value, Store.NO)); - doc.add(new SortedDocValuesField("my_field", value)); + doc.add(new KeywordField("my_field", value)); } writer.addDocument(doc); } @@ -936,7 +933,8 @@ private void doTestStringSortOptimization(DirectoryReader reader) throws IOExcep final int numHits = 5; { // simple ascending sort - SortField sortField = new SortField("my_field", SortField.Type.STRING); + SortField sortField = + KeywordField.newSortField("my_field", false, SortedSetSelector.Type.MIN); sortField.setMissingValue(SortField.STRING_LAST); Sort sort = new Sort(sortField); TopDocs topDocs = assertSort(reader, sort, numHits, null); @@ -944,7 +942,7 @@ private void doTestStringSortOptimization(DirectoryReader reader) throws IOExcep } { // simple descending sort - SortField sortField = new SortField("my_field", SortField.Type.STRING, true); + SortField sortField = KeywordField.newSortField("my_field", true, SortedSetSelector.Type.MIN); sortField.setMissingValue(SortField.STRING_FIRST); Sort sort = new Sort(sortField); TopDocs topDocs = assertSort(reader, sort, numHits, null); @@ -952,21 +950,23 @@ private void doTestStringSortOptimization(DirectoryReader reader) throws IOExcep } { // ascending sort that returns missing values first - SortField sortField = new SortField("my_field", SortField.Type.STRING); + SortField sortField = + KeywordField.newSortField("my_field", false, SortedSetSelector.Type.MIN); sortField.setMissingValue(SortField.STRING_FIRST); Sort sort = new Sort(sortField); assertSort(reader, sort, numHits, null); } { // descending sort that returns missing values last - SortField sortField = new SortField("my_field", SortField.Type.STRING, true); + SortField sortField = KeywordField.newSortField("my_field", true, SortedSetSelector.Type.MIN); sortField.setMissingValue(SortField.STRING_LAST); Sort sort = new Sort(sortField); assertSort(reader, sort, numHits, null); } { // paging ascending sort with after - SortField sortField = new SortField("my_field", SortField.Type.STRING); + SortField sortField = + KeywordField.newSortField("my_field", false, SortedSetSelector.Type.MIN); sortField.setMissingValue(SortField.STRING_LAST); Sort sort = new Sort(sortField); BytesRef afterValue = new BytesRef(random().nextBoolean() ? "23" : "230000000"); @@ -976,7 +976,7 @@ private void doTestStringSortOptimization(DirectoryReader reader) throws IOExcep } { // paging descending sort with after - SortField sortField = new SortField("my_field", SortField.Type.STRING, true); + SortField sortField = KeywordField.newSortField("my_field", true, SortedSetSelector.Type.MIN); sortField.setMissingValue(SortField.STRING_FIRST); Sort sort = new Sort(sortField); BytesRef afterValue = new BytesRef(random().nextBoolean() ? "17" : "170000000"); @@ -986,7 +986,8 @@ private void doTestStringSortOptimization(DirectoryReader reader) throws IOExcep } { // paging ascending sort with after that returns missing values first - SortField sortField = new SortField("my_field", SortField.Type.STRING); + SortField sortField = + KeywordField.newSortField("my_field", false, SortedSetSelector.Type.MIN); sortField.setMissingValue(SortField.STRING_FIRST); Sort sort = new Sort(sortField); BytesRef afterValue = new BytesRef(random().nextBoolean() ? "23" : "230000000"); @@ -996,7 +997,7 @@ private void doTestStringSortOptimization(DirectoryReader reader) throws IOExcep } { // paging descending sort with after that returns missing values first - SortField sortField = new SortField("my_field", SortField.Type.STRING, true); + SortField sortField = KeywordField.newSortField("my_field", true, SortedSetSelector.Type.MIN); sortField.setMissingValue(SortField.STRING_LAST); Sort sort = new Sort(sortField); BytesRef afterValue = new BytesRef(random().nextBoolean() ? "17" : "170000000"); @@ -1006,7 +1007,8 @@ private void doTestStringSortOptimization(DirectoryReader reader) throws IOExcep } { // test that if there is the secondary sort on _score, hits are still skipped - SortField sortField = new SortField("my_field", SortField.Type.STRING); + SortField sortField = + KeywordField.newSortField("my_field", false, SortedSetSelector.Type.MIN); sortField.setMissingValue(SortField.STRING_LAST); Sort sort = new Sort(sortField, FIELD_SCORE); TopDocs topDocs = assertSort(reader, sort, numHits, null); @@ -1014,7 +1016,8 @@ private void doTestStringSortOptimization(DirectoryReader reader) throws IOExcep } { // test that if string field is a secondary sort, no optimization is run - SortField sortField = new SortField("my_field", SortField.Type.STRING); + SortField sortField = + KeywordField.newSortField("my_field", false, SortedSetSelector.Type.MIN); sortField.setMissingValue(SortField.STRING_LAST); Sort sort = new Sort(FIELD_SCORE, sortField); TopDocs topDocs = assertSort(reader, sort, numHits, null); @@ -1025,10 +1028,7 @@ private void doTestStringSortOptimization(DirectoryReader reader) throws IOExcep } public void doTestStringSortOptimizationDisabled(DirectoryReader reader) throws IOException { - SortField sortField = - random().nextBoolean() - ? new SortedSetSortField("my_field", false) - : new SortField("my_field", SortField.Type.STRING); + SortField sortField = KeywordField.newSortField("my_field", false, SortedSetSelector.Type.MIN); sortField.setMissingValue(SortField.STRING_LAST); sortField.setOptimizeSortWithIndexedData(false); diff --git a/lucene/core/src/test/org/apache/lucene/search/TestSortedSetSortField.java b/lucene/core/src/test/org/apache/lucene/search/TestSortedSetSortField.java index aad6552ccf83..df78f5bafcd6 100644 --- a/lucene/core/src/test/org/apache/lucene/search/TestSortedSetSortField.java +++ b/lucene/core/src/test/org/apache/lucene/search/TestSortedSetSortField.java @@ -18,7 +18,7 @@ import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; -import org.apache.lucene.document.SortedSetDocValuesField; +import org.apache.lucene.document.KeywordField; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.MultiReader; import org.apache.lucene.index.Term; @@ -64,12 +64,12 @@ public void testForward() throws Exception { Directory dir = newDirectory(); RandomIndexWriter writer = new RandomIndexWriter(random(), dir); Document doc = new Document(); - doc.add(new SortedSetDocValuesField("value", newBytesRef("baz"))); + doc.add(new KeywordField("value", newBytesRef("baz"))); doc.add(newStringField("id", "2", Field.Store.YES)); writer.addDocument(doc); doc = new Document(); - doc.add(new SortedSetDocValuesField("value", newBytesRef("foo"))); - doc.add(new SortedSetDocValuesField("value", newBytesRef("bar"))); + doc.add(new KeywordField("value", newBytesRef("foo"))); + doc.add(new KeywordField("value", newBytesRef("bar"))); doc.add(newStringField("id", "1", Field.Store.YES)); writer.addDocument(doc); IndexReader ir = writer.getReader(); @@ -92,12 +92,12 @@ public void testReverse() throws Exception { Directory dir = newDirectory(); RandomIndexWriter writer = new RandomIndexWriter(random(), dir); Document doc = new Document(); - doc.add(new SortedSetDocValuesField("value", newBytesRef("foo"))); - doc.add(new SortedSetDocValuesField("value", newBytesRef("bar"))); + doc.add(new KeywordField("value", newBytesRef("foo"))); + doc.add(new KeywordField("value", newBytesRef("bar"))); doc.add(newStringField("id", "1", Field.Store.YES)); writer.addDocument(doc); doc = new Document(); - doc.add(new SortedSetDocValuesField("value", newBytesRef("baz"))); + doc.add(new KeywordField("value", newBytesRef("baz"))); doc.add(newStringField("id", "2", Field.Store.YES)); writer.addDocument(doc); @@ -121,12 +121,12 @@ public void testMissingFirst() throws Exception { Directory dir = newDirectory(); RandomIndexWriter writer = new RandomIndexWriter(random(), dir); Document doc = new Document(); - doc.add(new SortedSetDocValuesField("value", newBytesRef("baz"))); + doc.add(new KeywordField("value", newBytesRef("baz"))); doc.add(newStringField("id", "2", Field.Store.YES)); writer.addDocument(doc); doc = new Document(); - doc.add(new SortedSetDocValuesField("value", newBytesRef("foo"))); - doc.add(new SortedSetDocValuesField("value", newBytesRef("bar"))); + doc.add(new KeywordField("value", newBytesRef("foo"))); + doc.add(new KeywordField("value", newBytesRef("bar"))); doc.add(newStringField("id", "1", Field.Store.YES)); writer.addDocument(doc); doc = new Document(); @@ -156,12 +156,12 @@ public void testMissingLast() throws Exception { Directory dir = newDirectory(); RandomIndexWriter writer = new RandomIndexWriter(random(), dir); Document doc = new Document(); - doc.add(new SortedSetDocValuesField("value", newBytesRef("baz"))); + doc.add(new KeywordField("value", newBytesRef("baz"))); doc.add(newStringField("id", "2", Field.Store.YES)); writer.addDocument(doc); doc = new Document(); - doc.add(new SortedSetDocValuesField("value", newBytesRef("foo"))); - doc.add(new SortedSetDocValuesField("value", newBytesRef("bar"))); + doc.add(new KeywordField("value", newBytesRef("foo"))); + doc.add(new KeywordField("value", newBytesRef("bar"))); doc.add(newStringField("id", "1", Field.Store.YES)); writer.addDocument(doc); doc = new Document(); @@ -191,11 +191,11 @@ public void testSingleton() throws Exception { Directory dir = newDirectory(); RandomIndexWriter writer = new RandomIndexWriter(random(), dir); Document doc = new Document(); - doc.add(new SortedSetDocValuesField("value", newBytesRef("baz"))); + doc.add(new KeywordField("value", newBytesRef("baz"))); doc.add(newStringField("id", "2", Field.Store.YES)); writer.addDocument(doc); doc = new Document(); - doc.add(new SortedSetDocValuesField("value", newBytesRef("bar"))); + doc.add(new KeywordField("value", newBytesRef("bar"))); doc.add(newStringField("id", "1", Field.Store.YES)); writer.addDocument(doc); IndexReader ir = writer.getReader(); diff --git a/lucene/demo/src/java/org/apache/lucene/demo/IndexFiles.java b/lucene/demo/src/java/org/apache/lucene/demo/IndexFiles.java index 7b172d65d410..2cd8e56b22b5 100644 --- a/lucene/demo/src/java/org/apache/lucene/demo/IndexFiles.java +++ b/lucene/demo/src/java/org/apache/lucene/demo/IndexFiles.java @@ -28,15 +28,17 @@ import java.nio.file.SimpleFileVisitor; import java.nio.file.attribute.BasicFileAttributes; import java.util.Date; + import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.demo.knn.DemoEmbeddings; import org.apache.lucene.demo.knn.KnnVectorDict; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; +import org.apache.lucene.document.KeywordField; import org.apache.lucene.document.KnnFloatVectorField; import org.apache.lucene.document.LongField; -import org.apache.lucene.document.StringField; +import org.apache.lucene.document.StoredField; import org.apache.lucene.document.TextField; import org.apache.lucene.index.DirectoryReader; import org.apache.lucene.index.IndexReader; @@ -234,8 +236,8 @@ void indexDoc(IndexWriter writer, Path file, long lastModified) throws IOExcepti // field that is indexed (i.e. searchable), but don't tokenize // the field into separate words and don't index term frequency // or positional information: - Field pathField = new StringField("path", file.toString(), Field.Store.YES); - doc.add(pathField); + doc.add(new KeywordField("path", file.toString())); + doc.add(new StoredField("path", file.toString())); // Add the last modified date of the file a field named "modified". // Use a LongField that is indexed with points and doc values, and is efficient From 5ebeceeaf629775a4da4c6544783a24344a598ab Mon Sep 17 00:00:00 2001 From: Adrien Grand Date: Mon, 6 Feb 2023 17:42:56 +0100 Subject: [PATCH 2/6] Iterate. --- .../apache/lucene/document/KeywordField.java | 84 ++++++++++++++++--- .../lucene/document/TestKeywordField.java | 62 ++++++++++---- .../lucene/search/TestSortOptimization.java | 8 +- .../lucene/search/TestSortedSetSortField.java | 28 +++---- .../org/apache/lucene/demo/IndexFiles.java | 4 +- 5 files changed, 140 insertions(+), 46 deletions(-) diff --git a/lucene/core/src/java/org/apache/lucene/document/KeywordField.java b/lucene/core/src/java/org/apache/lucene/document/KeywordField.java index a283eb2db3b7..caae7cdb598e 100644 --- a/lucene/core/src/java/org/apache/lucene/document/KeywordField.java +++ b/lucene/core/src/java/org/apache/lucene/document/KeywordField.java @@ -16,7 +16,9 @@ */ package org.apache.lucene.document; +import java.util.Collection; import java.util.Objects; + import org.apache.lucene.index.DocValuesType; import org.apache.lucene.index.IndexOptions; import org.apache.lucene.index.Term; @@ -25,27 +27,31 @@ import org.apache.lucene.search.SortField; import org.apache.lucene.search.SortedSetSelector; import org.apache.lucene.search.SortedSetSortField; +import org.apache.lucene.search.TermInSetQuery; import org.apache.lucene.search.TermQuery; import org.apache.lucene.util.BytesRef; /** - * Field that indexes a per-document {@link BytesRef} into an inverted index for fast filtering and - * stores values in a columnar fashion using {@link DocValuesType#SORTED_SET} doc values for sorting - * and faceting. This field does not support scoring: queries produce constant scores. If you also + * Field that indexes a per-document String or {@link BytesRef} into an inverted index for fast + * filtering, stores values in a columnar fashion using {@link DocValuesType#SORTED_SET} doc + * values for sorting and faceting, and optionally stores values as stored fields for top-hits + * retrieval. This field does not support scoring: queries produce constant scores. If you also * need to store the value, you should add a separate {@link StoredField} instance. If you need more - * fine-grained control you can use {@link StringField} and {@link SortedDocValuesField} or {@link - * SortedSetDocValuesField}. + * fine-grained control you can use {@link StringField}, {@link SortedDocValuesField} or + * {@link SortedSetDocValuesField}, and {@link StoredField}. * *

This field defines static factory methods for creating common query objects: * *

*/ public class KeywordField extends Field { private static final FieldType FIELD_TYPE = new FieldType(); + private static final FieldType FIELD_TYPE_STORED; static { FIELD_TYPE.setIndexOptions(IndexOptions.DOCS); @@ -53,17 +59,29 @@ public class KeywordField extends Field { FIELD_TYPE.setTokenized(false); FIELD_TYPE.setDocValuesType(DocValuesType.SORTED_SET); FIELD_TYPE.freeze(); + + FIELD_TYPE_STORED = new FieldType(FIELD_TYPE); + FIELD_TYPE_STORED.setStored(true); + FIELD_TYPE_STORED.freeze(); } + private final StoredValue storedValue; + /** * Creates a new KeywordField. * * @param name field name * @param value the BytesRef value + * @param stored whether to store the field * @throws IllegalArgumentException if the field name or value is null. */ - public KeywordField(String name, BytesRef value) { - super(name, value, FIELD_TYPE); + public KeywordField(String name, BytesRef value, Store stored) { + super(name, value, stored == Field.Store.YES ? FIELD_TYPE_STORED : FIELD_TYPE); + if (stored == Store.YES) { + storedValue = new StoredValue(value); + } else { + storedValue = null; + } } /** @@ -71,10 +89,16 @@ public KeywordField(String name, BytesRef value) { * * @param name field name * @param value the BytesRef value + * @param stored whether to store the field * @throws IllegalArgumentException if the field name or value is null. */ - public KeywordField(String name, String value) { - super(name, value, FIELD_TYPE); + public KeywordField(String name, String value, Store stored) { + super(name, value, stored == Field.Store.YES ? FIELD_TYPE_STORED : FIELD_TYPE); + if (stored == Store.YES) { + storedValue = new StoredValue(value); + } else { + storedValue = null; + } } @Override @@ -87,12 +111,33 @@ public BytesRef binaryValue() { } } + @Override + public void setStringValue(String value) { + super.setStringValue(value); + if (storedValue != null) { + storedValue.setStringValue(value); + } + } + + @Override + public void setBytesValue(BytesRef value) { + super.setBytesValue(value); + if (storedValue != null) { + storedValue.setBinaryValue(value); + } + } + + @Override + public StoredValue storedValue() { + return storedValue; + } + /** * Create a query for matching an exact {@link BytesRef} value. * * @param field field name. must not be {@code null}. * @param value exact value - * @throws IllegalArgumentException if {@code field} is null. + * @throws NullPointerException if {@code field} is null. * @return a query matching documents with this exact value */ public static Query newExactQuery(String field, BytesRef value) { @@ -106,13 +151,28 @@ public static Query newExactQuery(String field, BytesRef value) { * * @param field field name. must not be {@code null}. * @param value exact value - * @throws IllegalArgumentException if {@code field} is null. + * @throws NullPointerException if {@code field} is null. * @return a query matching documents with this exact value */ public static Query newExactQuery(String field, String value) { + Objects.requireNonNull(value, "value must not be null"); return newExactQuery(field, new BytesRef(value)); } + /** + * Create a query for matching any of a set of provided {@link BytesRef} values. + * + * @param field field name. must not be {@code null}. + * @param values the set of values to match + * @throws NullPointerException if {@code field} is null. + * @return a query matching documents with this exact value + */ + public static Query newSetQuery(String field, Collection values) { + Objects.requireNonNull(field, "field must not be null"); + Objects.requireNonNull(values, "values must not be null"); + return new TermInSetQuery(field, values); + } + /** * Create a new {@link SortField} for {@link BytesRef} values. * @@ -122,6 +182,8 @@ public static Query newExactQuery(String field, String value) { */ public static SortField newSortField( String field, boolean reverse, SortedSetSelector.Type selector) { + Objects.requireNonNull(field, "field must not be null"); + Objects.requireNonNull(selector, "selector must not be null"); return new SortedSetSortField(field, reverse, selector); } } diff --git a/lucene/core/src/test/org/apache/lucene/document/TestKeywordField.java b/lucene/core/src/test/org/apache/lucene/document/TestKeywordField.java index ca1be9e4b2da..4601e1f7ec49 100644 --- a/lucene/core/src/test/org/apache/lucene/document/TestKeywordField.java +++ b/lucene/core/src/test/org/apache/lucene/document/TestKeywordField.java @@ -31,27 +31,57 @@ public class TestKeywordField extends LuceneTestCase { public void testSetBytesValue() { - KeywordField field = new KeywordField("name", newBytesRef("value")); - assertEquals(newBytesRef("value"), field.binaryValue()); - assertNull(field.stringValue()); - field.setBytesValue(newBytesRef("value2")); - assertEquals(newBytesRef("value2"), field.binaryValue()); - assertNull(field.stringValue()); + Field[] fields = new Field[] { + new KeywordField("name", newBytesRef("value"), Field.Store.NO), + new KeywordField("name", newBytesRef("value"), Field.Store.YES) + }; + for (Field field : fields) { + assertEquals(newBytesRef("value"), field.binaryValue()); + assertNull(field.stringValue()); + if (field.fieldType().stored()) { + assertEquals(newBytesRef("value"), field.storedValue().getBinaryValue()); + } else { + assertNull(field.storedValue()); + } + field.setBytesValue(newBytesRef("value2")); + assertEquals(newBytesRef("value2"), field.binaryValue()); + assertNull(field.stringValue()); + if (field.fieldType().stored()) { + assertEquals(newBytesRef("value2"), field.storedValue().getBinaryValue()); + } else { + assertNull(field.storedValue()); + } + } } public void testSetStringValue() { - KeywordField field = new KeywordField("name", "value"); - assertEquals("value", field.stringValue()); - assertEquals(newBytesRef("value"), field.binaryValue()); - field.setStringValue("value2"); - assertEquals("value2", field.stringValue()); - assertEquals(newBytesRef("value2"), field.binaryValue()); + Field[] fields = new Field[] { + new KeywordField("name", "value", Field.Store.NO), + new KeywordField("name", "value", Field.Store.YES) + }; + for (Field field : fields) { + assertEquals("value", field.stringValue()); + assertEquals(newBytesRef("value"), field.binaryValue()); + if (field.fieldType().stored()) { + assertEquals("value", field.storedValue().getStringValue()); + } else { + assertNull(field.storedValue()); + } + field.setStringValue("value2"); + assertEquals("value2", field.stringValue()); + assertEquals(newBytesRef("value2"), field.binaryValue()); + if (field.fieldType().stored()) { + assertEquals("value2", field.storedValue().getStringValue()); + } else { + assertNull(field.storedValue()); + } + } } public void testIndexBytesValue() throws IOException { Directory dir = newDirectory(); IndexWriter w = new IndexWriter(dir, newIndexWriterConfig()); - w.addDocument(Collections.singleton(new KeywordField("field", newBytesRef("value")))); + w.addDocument(Collections.singleton(new KeywordField("field", newBytesRef("value"), Field.Store.YES))); IndexReader reader = DirectoryReader.open(w); w.close(); LeafReader leaf = getOnlyLeafReader(reader); @@ -63,6 +93,8 @@ public void testIndexBytesValue() throws IOException { assertEquals(1, values.docValueCount()); assertEquals(0L, values.nextOrd()); assertEquals(new BytesRef("value"), values.lookupOrd(0)); + Document storedDoc = leaf.storedFields().document(0); + assertEquals(new BytesRef("value"), storedDoc.getBinaryValue("field")); reader.close(); dir.close(); } @@ -70,7 +102,7 @@ public void testIndexBytesValue() throws IOException { public void testIndexStringValue() throws IOException { Directory dir = newDirectory(); IndexWriter w = new IndexWriter(dir, newIndexWriterConfig()); - w.addDocument(Collections.singleton(new KeywordField("field", "value"))); + w.addDocument(Collections.singleton(new KeywordField("field", "value", Field.Store.YES))); IndexReader reader = DirectoryReader.open(w); w.close(); LeafReader leaf = getOnlyLeafReader(reader); @@ -82,6 +114,8 @@ public void testIndexStringValue() throws IOException { assertEquals(1, values.docValueCount()); assertEquals(0L, values.nextOrd()); assertEquals(new BytesRef("value"), values.lookupOrd(0)); + Document storedDoc = leaf.storedFields().document(0); + assertEquals("value", storedDoc.get("field")); reader.close(); dir.close(); } diff --git a/lucene/core/src/test/org/apache/lucene/search/TestSortOptimization.java b/lucene/core/src/test/org/apache/lucene/search/TestSortOptimization.java index 8d7ab1d65792..d30146f39a3c 100644 --- a/lucene/core/src/test/org/apache/lucene/search/TestSortOptimization.java +++ b/lucene/core/src/test/org/apache/lucene/search/TestSortOptimization.java @@ -808,8 +808,8 @@ public void testSortOptimizationOnSortedNumericField() throws IOException { int value = random().nextInt(); int value2 = random().nextInt(); final Document doc = new Document(); - doc.add(new LongField("my_field", value, Store.NO)); - doc.add(new LongField("my_field", value2, Store.NO)); + doc.add(new LongField("my_field", value, Field.Store.NO)); + doc.add(new LongField("my_field", value2, Field.Store.NO)); writer.addDocument(doc); } final IndexReader reader = DirectoryReader.open(writer); @@ -890,7 +890,7 @@ public void testStringSortOptimization() throws IOException { for (int i = 0; i < numDocs; ++i) { final Document doc = new Document(); final BytesRef value = new BytesRef(Integer.toString(random().nextInt(1000))); - doc.add(new KeywordField("my_field", value)); + doc.add(new KeywordField("my_field", value, Field.Store.NO)); writer.addDocument(doc); if (i % 2000 == 0) writer.flush(); // multiple segments } @@ -914,7 +914,7 @@ public void testStringSortOptimizationWithMissingValues() throws IOException { final Document doc = new Document(); if (random().nextInt(2) == 0) { final BytesRef value = new BytesRef(Integer.toString(random().nextInt(1000))); - doc.add(new KeywordField("my_field", value)); + doc.add(new KeywordField("my_field", value, Field.Store.NO)); } writer.addDocument(doc); } diff --git a/lucene/core/src/test/org/apache/lucene/search/TestSortedSetSortField.java b/lucene/core/src/test/org/apache/lucene/search/TestSortedSetSortField.java index df78f5bafcd6..873d948373f1 100644 --- a/lucene/core/src/test/org/apache/lucene/search/TestSortedSetSortField.java +++ b/lucene/core/src/test/org/apache/lucene/search/TestSortedSetSortField.java @@ -64,12 +64,12 @@ public void testForward() throws Exception { Directory dir = newDirectory(); RandomIndexWriter writer = new RandomIndexWriter(random(), dir); Document doc = new Document(); - doc.add(new KeywordField("value", newBytesRef("baz"))); + doc.add(new KeywordField("value", newBytesRef("baz"), Field.Store.NO)); doc.add(newStringField("id", "2", Field.Store.YES)); writer.addDocument(doc); doc = new Document(); - doc.add(new KeywordField("value", newBytesRef("foo"))); - doc.add(new KeywordField("value", newBytesRef("bar"))); + doc.add(new KeywordField("value", newBytesRef("foo"), Field.Store.NO)); + doc.add(new KeywordField("value", newBytesRef("bar"), Field.Store.NO)); doc.add(newStringField("id", "1", Field.Store.YES)); writer.addDocument(doc); IndexReader ir = writer.getReader(); @@ -92,12 +92,12 @@ public void testReverse() throws Exception { Directory dir = newDirectory(); RandomIndexWriter writer = new RandomIndexWriter(random(), dir); Document doc = new Document(); - doc.add(new KeywordField("value", newBytesRef("foo"))); - doc.add(new KeywordField("value", newBytesRef("bar"))); + doc.add(new KeywordField("value", newBytesRef("foo"), Field.Store.NO)); + doc.add(new KeywordField("value", newBytesRef("bar"), Field.Store.NO)); doc.add(newStringField("id", "1", Field.Store.YES)); writer.addDocument(doc); doc = new Document(); - doc.add(new KeywordField("value", newBytesRef("baz"))); + doc.add(new KeywordField("value", newBytesRef("baz"), Field.Store.NO)); doc.add(newStringField("id", "2", Field.Store.YES)); writer.addDocument(doc); @@ -121,12 +121,12 @@ public void testMissingFirst() throws Exception { Directory dir = newDirectory(); RandomIndexWriter writer = new RandomIndexWriter(random(), dir); Document doc = new Document(); - doc.add(new KeywordField("value", newBytesRef("baz"))); + doc.add(new KeywordField("value", newBytesRef("baz"), Field.Store.NO)); doc.add(newStringField("id", "2", Field.Store.YES)); writer.addDocument(doc); doc = new Document(); - doc.add(new KeywordField("value", newBytesRef("foo"))); - doc.add(new KeywordField("value", newBytesRef("bar"))); + doc.add(new KeywordField("value", newBytesRef("foo"), Field.Store.NO)); + doc.add(new KeywordField("value", newBytesRef("bar"), Field.Store.NO)); doc.add(newStringField("id", "1", Field.Store.YES)); writer.addDocument(doc); doc = new Document(); @@ -156,12 +156,12 @@ public void testMissingLast() throws Exception { Directory dir = newDirectory(); RandomIndexWriter writer = new RandomIndexWriter(random(), dir); Document doc = new Document(); - doc.add(new KeywordField("value", newBytesRef("baz"))); + doc.add(new KeywordField("value", newBytesRef("baz"), Field.Store.NO)); doc.add(newStringField("id", "2", Field.Store.YES)); writer.addDocument(doc); doc = new Document(); - doc.add(new KeywordField("value", newBytesRef("foo"))); - doc.add(new KeywordField("value", newBytesRef("bar"))); + doc.add(new KeywordField("value", newBytesRef("foo"), Field.Store.NO)); + doc.add(new KeywordField("value", newBytesRef("bar"), Field.Store.NO)); doc.add(newStringField("id", "1", Field.Store.YES)); writer.addDocument(doc); doc = new Document(); @@ -191,11 +191,11 @@ public void testSingleton() throws Exception { Directory dir = newDirectory(); RandomIndexWriter writer = new RandomIndexWriter(random(), dir); Document doc = new Document(); - doc.add(new KeywordField("value", newBytesRef("baz"))); + doc.add(new KeywordField("value", newBytesRef("baz"), Field.Store.NO)); doc.add(newStringField("id", "2", Field.Store.YES)); writer.addDocument(doc); doc = new Document(); - doc.add(new KeywordField("value", newBytesRef("bar"))); + doc.add(new KeywordField("value", newBytesRef("bar"), Field.Store.NO)); doc.add(newStringField("id", "1", Field.Store.YES)); writer.addDocument(doc); IndexReader ir = writer.getReader(); diff --git a/lucene/demo/src/java/org/apache/lucene/demo/IndexFiles.java b/lucene/demo/src/java/org/apache/lucene/demo/IndexFiles.java index 2cd8e56b22b5..2c19e3acc88b 100644 --- a/lucene/demo/src/java/org/apache/lucene/demo/IndexFiles.java +++ b/lucene/demo/src/java/org/apache/lucene/demo/IndexFiles.java @@ -38,7 +38,6 @@ import org.apache.lucene.document.KeywordField; import org.apache.lucene.document.KnnFloatVectorField; import org.apache.lucene.document.LongField; -import org.apache.lucene.document.StoredField; import org.apache.lucene.document.TextField; import org.apache.lucene.index.DirectoryReader; import org.apache.lucene.index.IndexReader; @@ -236,8 +235,7 @@ void indexDoc(IndexWriter writer, Path file, long lastModified) throws IOExcepti // field that is indexed (i.e. searchable), but don't tokenize // the field into separate words and don't index term frequency // or positional information: - doc.add(new KeywordField("path", file.toString())); - doc.add(new StoredField("path", file.toString())); + doc.add(new KeywordField("path", file.toString(), Field.Store.YES)); // Add the last modified date of the file a field named "modified". // Use a LongField that is indexed with points and doc values, and is efficient From 3e2d421f2b5431a6ca5c9001cb457af248c3a93c Mon Sep 17 00:00:00 2001 From: Adrien Grand Date: Mon, 6 Feb 2023 18:25:45 +0100 Subject: [PATCH 3/6] spotless --- .../apache/lucene/document/KeywordField.java | 13 ++++++------ .../lucene/document/TestKeywordField.java | 21 +++++++++++-------- .../org/apache/lucene/demo/IndexFiles.java | 1 - 3 files changed, 18 insertions(+), 17 deletions(-) diff --git a/lucene/core/src/java/org/apache/lucene/document/KeywordField.java b/lucene/core/src/java/org/apache/lucene/document/KeywordField.java index caae7cdb598e..0e372b48e92b 100644 --- a/lucene/core/src/java/org/apache/lucene/document/KeywordField.java +++ b/lucene/core/src/java/org/apache/lucene/document/KeywordField.java @@ -18,7 +18,6 @@ import java.util.Collection; import java.util.Objects; - import org.apache.lucene.index.DocValuesType; import org.apache.lucene.index.IndexOptions; import org.apache.lucene.index.Term; @@ -33,12 +32,12 @@ /** * Field that indexes a per-document String or {@link BytesRef} into an inverted index for fast - * filtering, stores values in a columnar fashion using {@link DocValuesType#SORTED_SET} doc - * values for sorting and faceting, and optionally stores values as stored fields for top-hits - * retrieval. This field does not support scoring: queries produce constant scores. If you also - * need to store the value, you should add a separate {@link StoredField} instance. If you need more - * fine-grained control you can use {@link StringField}, {@link SortedDocValuesField} or - * {@link SortedSetDocValuesField}, and {@link StoredField}. + * filtering, stores values in a columnar fashion using {@link DocValuesType#SORTED_SET} doc values + * for sorting and faceting, and optionally stores values as stored fields for top-hits retrieval. + * This field does not support scoring: queries produce constant scores. If you also need to store + * the value, you should add a separate {@link StoredField} instance. If you need more fine-grained + * control you can use {@link StringField}, {@link SortedDocValuesField} or {@link + * SortedSetDocValuesField}, and {@link StoredField}. * *

This field defines static factory methods for creating common query objects: * diff --git a/lucene/core/src/test/org/apache/lucene/document/TestKeywordField.java b/lucene/core/src/test/org/apache/lucene/document/TestKeywordField.java index 4601e1f7ec49..6593a4509c6e 100644 --- a/lucene/core/src/test/org/apache/lucene/document/TestKeywordField.java +++ b/lucene/core/src/test/org/apache/lucene/document/TestKeywordField.java @@ -31,10 +31,11 @@ public class TestKeywordField extends LuceneTestCase { public void testSetBytesValue() { - Field[] fields = new Field[] { - new KeywordField("name", newBytesRef("value"), Field.Store.NO), - new KeywordField("name", newBytesRef("value"), Field.Store.YES) - }; + Field[] fields = + new Field[] { + new KeywordField("name", newBytesRef("value"), Field.Store.NO), + new KeywordField("name", newBytesRef("value"), Field.Store.YES) + }; for (Field field : fields) { assertEquals(newBytesRef("value"), field.binaryValue()); assertNull(field.stringValue()); @@ -55,10 +56,11 @@ public void testSetBytesValue() { } public void testSetStringValue() { - Field[] fields = new Field[] { - new KeywordField("name", "value", Field.Store.NO), - new KeywordField("name", "value", Field.Store.YES) - }; + Field[] fields = + new Field[] { + new KeywordField("name", "value", Field.Store.NO), + new KeywordField("name", "value", Field.Store.YES) + }; for (Field field : fields) { assertEquals("value", field.stringValue()); assertEquals(newBytesRef("value"), field.binaryValue()); @@ -81,7 +83,8 @@ public void testSetStringValue() { public void testIndexBytesValue() throws IOException { Directory dir = newDirectory(); IndexWriter w = new IndexWriter(dir, newIndexWriterConfig()); - w.addDocument(Collections.singleton(new KeywordField("field", newBytesRef("value"), Field.Store.YES))); + w.addDocument( + Collections.singleton(new KeywordField("field", newBytesRef("value"), Field.Store.YES))); IndexReader reader = DirectoryReader.open(w); w.close(); LeafReader leaf = getOnlyLeafReader(reader); diff --git a/lucene/demo/src/java/org/apache/lucene/demo/IndexFiles.java b/lucene/demo/src/java/org/apache/lucene/demo/IndexFiles.java index 2c19e3acc88b..9c683d3937c9 100644 --- a/lucene/demo/src/java/org/apache/lucene/demo/IndexFiles.java +++ b/lucene/demo/src/java/org/apache/lucene/demo/IndexFiles.java @@ -28,7 +28,6 @@ import java.nio.file.SimpleFileVisitor; import java.nio.file.attribute.BasicFileAttributes; import java.util.Date; - import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.demo.knn.DemoEmbeddings; From 7dc8cbae11e7eb696cd96bba72019762b0ecf71d Mon Sep 17 00:00:00 2001 From: Adrien Grand Date: Tue, 7 Feb 2023 14:58:23 +0100 Subject: [PATCH 4/6] feedback --- .../src/java/org/apache/lucene/document/KeywordField.java | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/lucene/core/src/java/org/apache/lucene/document/KeywordField.java b/lucene/core/src/java/org/apache/lucene/document/KeywordField.java index 0e372b48e92b..4d6303dc25fd 100644 --- a/lucene/core/src/java/org/apache/lucene/document/KeywordField.java +++ b/lucene/core/src/java/org/apache/lucene/document/KeywordField.java @@ -16,7 +16,6 @@ */ package org.apache.lucene.document; -import java.util.Collection; import java.util.Objects; import org.apache.lucene.index.DocValuesType; import org.apache.lucene.index.IndexOptions; @@ -34,9 +33,8 @@ * Field that indexes a per-document String or {@link BytesRef} into an inverted index for fast * filtering, stores values in a columnar fashion using {@link DocValuesType#SORTED_SET} doc values * for sorting and faceting, and optionally stores values as stored fields for top-hits retrieval. - * This field does not support scoring: queries produce constant scores. If you also need to store - * the value, you should add a separate {@link StoredField} instance. If you need more fine-grained - * control you can use {@link StringField}, {@link SortedDocValuesField} or {@link + * This field does not support scoring: queries produce constant scores. If you need more + * fine-grained control you can use {@link StringField}, {@link SortedDocValuesField} or {@link * SortedSetDocValuesField}, and {@link StoredField}. * *

This field defines static factory methods for creating common query objects: @@ -166,7 +164,7 @@ public static Query newExactQuery(String field, String value) { * @throws NullPointerException if {@code field} is null. * @return a query matching documents with this exact value */ - public static Query newSetQuery(String field, Collection values) { + public static Query newSetQuery(String field, BytesRef... values) { Objects.requireNonNull(field, "field must not be null"); Objects.requireNonNull(values, "values must not be null"); return new TermInSetQuery(field, values); From 1b9b5db572072a918d10a1d80c54b9c7e6979c8e Mon Sep 17 00:00:00 2001 From: Adrien Grand Date: Tue, 7 Feb 2023 15:00:24 +0100 Subject: [PATCH 5/6] CHANGES --- lucene/CHANGES.txt | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index da89bc1789b6..1bc6cfe3ccd9 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -114,7 +114,9 @@ API Changes New Features --------------------- -(No changes) + +* GITHUB#12054: Introduce a new KeywordField for simple and efficient + filtering, sorting and faceting. (Adrien Grand) Improvements --------------------- From e552d1c738ea2200ffb696911087616e1cdd07b8 Mon Sep 17 00:00:00 2001 From: Adrien Grand Date: Tue, 7 Feb 2023 15:15:24 +0100 Subject: [PATCH 6/6] feedback --- .../src/java/org/apache/lucene/document/KeywordField.java | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/lucene/core/src/java/org/apache/lucene/document/KeywordField.java b/lucene/core/src/java/org/apache/lucene/document/KeywordField.java index 4d6303dc25fd..70b27ad671af 100644 --- a/lucene/core/src/java/org/apache/lucene/document/KeywordField.java +++ b/lucene/core/src/java/org/apache/lucene/document/KeywordField.java @@ -21,6 +21,7 @@ import org.apache.lucene.index.IndexOptions; import org.apache.lucene.index.Term; import org.apache.lucene.search.ConstantScoreQuery; +import org.apache.lucene.search.IndexOrDocValuesQuery; import org.apache.lucene.search.Query; import org.apache.lucene.search.SortField; import org.apache.lucene.search.SortedSetSelector; @@ -167,7 +168,8 @@ public static Query newExactQuery(String field, String value) { public static Query newSetQuery(String field, BytesRef... values) { Objects.requireNonNull(field, "field must not be null"); Objects.requireNonNull(values, "values must not be null"); - return new TermInSetQuery(field, values); + return new IndexOrDocValuesQuery( + new TermInSetQuery(field, values), new SortedSetDocValuesSetQuery(field, values)); } /**