From 6846008f950e483be924a76e56bdcd4d36bb47d9 Mon Sep 17 00:00:00 2001
From: Adrien Grand <jpountz@gmail.com>
Date: Sun, 1 Jan 2023 10:44:57 +0100
Subject: [PATCH 1/6] Introduce a new `KeywordField`.

`KeywordField` is a combination of `StringField` and `SortedSetDocValuesField`,
similarly to how `LongField` is a combination of `LongPoint` and
`SortedNumericDocValuesField`. This makes it easier for users to create fields
that can be used for filtering, sorting and faceting.
---
 .../apache/lucene/document/KeywordField.java  | 127 ++++++++++++++++++
 .../lucene/document/TestKeywordField.java     |  88 ++++++++++++
 .../lucene/search/TestSortOptimization.java   |  40 +++---
 .../lucene/search/TestSortedSetSortField.java |  30 ++---
 .../org/apache/lucene/demo/IndexFiles.java    |   8 +-
 5 files changed, 255 insertions(+), 38 deletions(-)
 create mode 100644 lucene/core/src/java/org/apache/lucene/document/KeywordField.java
 create mode 100644 lucene/core/src/test/org/apache/lucene/document/TestKeywordField.java
diff --git a/lucene/core/src/java/org/apache/lucene/document/KeywordField.java b/lucene/core/src/java/org/apache/lucene/document/KeywordField.java
new file mode 100644
index 000000000000..a283eb2db3b7
--- /dev/null
+++ b/lucene/core/src/java/org/apache/lucene/document/KeywordField.java
@@ -0,0 +1,127 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.document;
+
+import java.util.Objects;
+import org.apache.lucene.index.DocValuesType;
+import org.apache.lucene.index.IndexOptions;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.search.ConstantScoreQuery;
+import org.apache.lucene.search.Query;
+import org.apache.lucene.search.SortField;
+import org.apache.lucene.search.SortedSetSelector;
+import org.apache.lucene.search.SortedSetSortField;
+import org.apache.lucene.search.TermQuery;
+import org.apache.lucene.util.BytesRef;
+
+/**
+ * Field that indexes a per-document {@link BytesRef} into an inverted index for fast filtering and
+ * stores values in a columnar fashion using {@link DocValuesType#SORTED_SET} doc values for sorting
+ * and faceting. This field does not support scoring: queries produce constant scores. If you also
+ * need to store the value, you should add a separate {@link StoredField} instance. If you need more
+ * fine-grained control you can use {@link StringField} and {@link SortedDocValuesField} or {@link
+ * SortedSetDocValuesField}.
+ *
+ * <p>This field defines static factory methods for creating common query objects:
+ *
+ * <ul>
+ *   <li>{@link #newExactQuery} for matching a value.
+ *   <li>{@link #newSortField} for matching a value.
+ * </ul>
+ */
+public class KeywordField extends Field {
+
+  private static final FieldType FIELD_TYPE = new FieldType();
+
+  static {
+    FIELD_TYPE.setIndexOptions(IndexOptions.DOCS);
+    FIELD_TYPE.setOmitNorms(true);
+    FIELD_TYPE.setTokenized(false);
+    FIELD_TYPE.setDocValuesType(DocValuesType.SORTED_SET);
+    FIELD_TYPE.freeze();
+  }
+
+  /**
+   * Creates a new KeywordField.
+   *
+   * @param name field name
+   * @param value the BytesRef value
+   * @throws IllegalArgumentException if the field name or value is null.
+   */
+  public KeywordField(String name, BytesRef value) {
+    super(name, value, FIELD_TYPE);
+  }
+
+  /**
+   * Creates a new KeywordField from a String value, by indexing its UTF-8 representation.
+   *
+   * @param name field name
+   * @param value the BytesRef value
+   * @throws IllegalArgumentException if the field name or value is null.
+   */
+  public KeywordField(String name, String value) {
+    super(name, value, FIELD_TYPE);
+  }
+
+  @Override
+  public BytesRef binaryValue() {
+    BytesRef binaryValue = super.binaryValue();
+    if (binaryValue != null) {
+      return binaryValue;
+    } else {
+      return new BytesRef(stringValue());
+    }
+  }
+
+  /**
+   * Create a query for matching an exact {@link BytesRef} value.
+   *
+   * @param field field name. must not be {@code null}.
+   * @param value exact value
+   * @throws IllegalArgumentException if {@code field} is null.
+   * @return a query matching documents with this exact value
+   */
+  public static Query newExactQuery(String field, BytesRef value) {
+    Objects.requireNonNull(field, "field must not be null");
+    Objects.requireNonNull(value, "value must not be null");
+    return new ConstantScoreQuery(new TermQuery(new Term(field, value)));
+  }
+
+  /**
+   * Create a query for matching an exact {@link String} value.
+   *
+   * @param field field name. must not be {@code null}.
+   * @param value exact value
+   * @throws IllegalArgumentException if {@code field} is null.
+   * @return a query matching documents with this exact value
+   */
+  public static Query newExactQuery(String field, String value) {
+    return newExactQuery(field, new BytesRef(value));
+  }
+
+  /**
+   * Create a new {@link SortField} for {@link BytesRef} values.
+   *
+   * @param field field name. must not be {@code null}.
+   * @param reverse true if natural order should be reversed.
+   * @param selector custom selector type for choosing the sort value from the set.
+   */
+  public static SortField newSortField(
+      String field, boolean reverse, SortedSetSelector.Type selector) {
+    return new SortedSetSortField(field, reverse, selector);
+  }
+}
diff --git a/lucene/core/src/test/org/apache/lucene/document/TestKeywordField.java b/lucene/core/src/test/org/apache/lucene/document/TestKeywordField.java
new file mode 100644
index 000000000000..ca1be9e4b2da
--- /dev/null
+++ b/lucene/core/src/test/org/apache/lucene/document/TestKeywordField.java
@@ -0,0 +1,88 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.document;
+
+import java.io.IOException;
+import java.util.Collections;
+import org.apache.lucene.index.DirectoryReader;
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.IndexWriter;
+import org.apache.lucene.index.LeafReader;
+import org.apache.lucene.index.SortedSetDocValues;
+import org.apache.lucene.index.TermsEnum;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.tests.util.LuceneTestCase;
+import org.apache.lucene.util.BytesRef;
+
+public class TestKeywordField extends LuceneTestCase {
+
+  public void testSetBytesValue() {
+    KeywordField field = new KeywordField("name", newBytesRef("value"));
+    assertEquals(newBytesRef("value"), field.binaryValue());
+    assertNull(field.stringValue());
+    field.setBytesValue(newBytesRef("value2"));
+    assertEquals(newBytesRef("value2"), field.binaryValue());
+    assertNull(field.stringValue());
+  }
+
+  public void testSetStringValue() {
+    KeywordField field = new KeywordField("name", "value");
+    assertEquals("value", field.stringValue());
+    assertEquals(newBytesRef("value"), field.binaryValue());
+    field.setStringValue("value2");
+    assertEquals("value2", field.stringValue());
+    assertEquals(newBytesRef("value2"), field.binaryValue());
+  }
+
+  public void testIndexBytesValue() throws IOException {
+    Directory dir = newDirectory();
+    IndexWriter w = new IndexWriter(dir, newIndexWriterConfig());
+    w.addDocument(Collections.singleton(new KeywordField("field", newBytesRef("value"))));
+    IndexReader reader = DirectoryReader.open(w);
+    w.close();
+    LeafReader leaf = getOnlyLeafReader(reader);
+    TermsEnum terms = leaf.terms("field").iterator();
+    assertEquals(new BytesRef("value"), terms.next());
+    assertNull(terms.next());
+    SortedSetDocValues values = leaf.getSortedSetDocValues("field");
+    assertTrue(values.advanceExact(0));
+    assertEquals(1, values.docValueCount());
+    assertEquals(0L, values.nextOrd());
+    assertEquals(new BytesRef("value"), values.lookupOrd(0));
+    reader.close();
+    dir.close();
+  }
+
+  public void testIndexStringValue() throws IOException {
+    Directory dir = newDirectory();
+    IndexWriter w = new IndexWriter(dir, newIndexWriterConfig());
+    w.addDocument(Collections.singleton(new KeywordField("field", "value")));
+    IndexReader reader = DirectoryReader.open(w);
+    w.close();
+    LeafReader leaf = getOnlyLeafReader(reader);
+    TermsEnum terms = leaf.terms("field").iterator();
+    assertEquals(new BytesRef("value"), terms.next());
+    assertNull(terms.next());
+    SortedSetDocValues values = leaf.getSortedSetDocValues("field");
+    assertTrue(values.advanceExact(0));
+    assertEquals(1, values.docValueCount());
+    assertEquals(0L, values.nextOrd());
+    assertEquals(new BytesRef("value"), values.lookupOrd(0));
+    reader.close();
+    dir.close();
+  }
+}
diff --git a/lucene/core/src/test/org/apache/lucene/search/TestSortOptimization.java b/lucene/core/src/test/org/apache/lucene/search/TestSortOptimization.java
index 5c0aad74d97f..8d7ab1d65792 100644
--- a/lucene/core/src/test/org/apache/lucene/search/TestSortOptimization.java
+++ b/lucene/core/src/test/org/apache/lucene/search/TestSortOptimization.java
@@ -26,15 +26,14 @@
 import java.util.List;
 import org.apache.lucene.document.Document;
 import org.apache.lucene.document.Field;
-import org.apache.lucene.document.Field.Store;
 import org.apache.lucene.document.FloatDocValuesField;
 import org.apache.lucene.document.FloatPoint;
 import org.apache.lucene.document.IntPoint;
 import org.apache.lucene.document.IntRange;
+import org.apache.lucene.document.KeywordField;
 import org.apache.lucene.document.LongField;
 import org.apache.lucene.document.LongPoint;
 import org.apache.lucene.document.NumericDocValuesField;
-import org.apache.lucene.document.SortedDocValuesField;
 import org.apache.lucene.document.StoredField;
 import org.apache.lucene.document.StringField;
 import org.apache.lucene.index.DirectoryReader;
@@ -891,8 +890,7 @@ public void testStringSortOptimization() throws IOException {
     for (int i = 0; i < numDocs; ++i) {
       final Document doc = new Document();
       final BytesRef value = new BytesRef(Integer.toString(random().nextInt(1000)));
-      doc.add(new StringField("my_field", value, Store.NO));
-      doc.add(new SortedDocValuesField("my_field", value));
+      doc.add(new KeywordField("my_field", value));
       writer.addDocument(doc);
       if (i % 2000 == 0) writer.flush(); // multiple segments
     }
@@ -916,8 +914,7 @@ public void testStringSortOptimizationWithMissingValues() throws IOException {
       final Document doc = new Document();
       if (random().nextInt(2) == 0) {
         final BytesRef value = new BytesRef(Integer.toString(random().nextInt(1000)));
-        doc.add(new StringField("my_field", value, Store.NO));
-        doc.add(new SortedDocValuesField("my_field", value));
+        doc.add(new KeywordField("my_field", value));
       }
       writer.addDocument(doc);
     }
@@ -936,7 +933,8 @@ private void doTestStringSortOptimization(DirectoryReader reader) throws IOExcep
     final int numHits = 5;
 
     { // simple ascending sort
-      SortField sortField = new SortField("my_field", SortField.Type.STRING);
+      SortField sortField =
+          KeywordField.newSortField("my_field", false, SortedSetSelector.Type.MIN);
       sortField.setMissingValue(SortField.STRING_LAST);
       Sort sort = new Sort(sortField);
       TopDocs topDocs = assertSort(reader, sort, numHits, null);
@@ -944,7 +942,7 @@ private void doTestStringSortOptimization(DirectoryReader reader) throws IOExcep
     }
 
     { // simple descending sort
-      SortField sortField = new SortField("my_field", SortField.Type.STRING, true);
+      SortField sortField = KeywordField.newSortField("my_field", true, SortedSetSelector.Type.MIN);
       sortField.setMissingValue(SortField.STRING_FIRST);
       Sort sort = new Sort(sortField);
       TopDocs topDocs = assertSort(reader, sort, numHits, null);
@@ -952,21 +950,23 @@ private void doTestStringSortOptimization(DirectoryReader reader) throws IOExcep
     }
 
     { // ascending sort that returns missing values first
-      SortField sortField = new SortField("my_field", SortField.Type.STRING);
+      SortField sortField =
+          KeywordField.newSortField("my_field", false, SortedSetSelector.Type.MIN);
       sortField.setMissingValue(SortField.STRING_FIRST);
       Sort sort = new Sort(sortField);
       assertSort(reader, sort, numHits, null);
     }
 
     { // descending sort that returns missing values last
-      SortField sortField = new SortField("my_field", SortField.Type.STRING, true);
+      SortField sortField = KeywordField.newSortField("my_field", true, SortedSetSelector.Type.MIN);
       sortField.setMissingValue(SortField.STRING_LAST);
       Sort sort = new Sort(sortField);
       assertSort(reader, sort, numHits, null);
     }
 
     { // paging ascending sort with after
-      SortField sortField = new SortField("my_field", SortField.Type.STRING);
+      SortField sortField =
+          KeywordField.newSortField("my_field", false, SortedSetSelector.Type.MIN);
       sortField.setMissingValue(SortField.STRING_LAST);
       Sort sort = new Sort(sortField);
       BytesRef afterValue = new BytesRef(random().nextBoolean() ? "23" : "230000000");
@@ -976,7 +976,7 @@ private void doTestStringSortOptimization(DirectoryReader reader) throws IOExcep
     }
 
     { // paging descending sort with after
-      SortField sortField = new SortField("my_field", SortField.Type.STRING, true);
+      SortField sortField = KeywordField.newSortField("my_field", true, SortedSetSelector.Type.MIN);
       sortField.setMissingValue(SortField.STRING_FIRST);
       Sort sort = new Sort(sortField);
       BytesRef afterValue = new BytesRef(random().nextBoolean() ? "17" : "170000000");
@@ -986,7 +986,8 @@ private void doTestStringSortOptimization(DirectoryReader reader) throws IOExcep
     }
 
     { // paging ascending sort with after that returns missing values first
-      SortField sortField = new SortField("my_field", SortField.Type.STRING);
+      SortField sortField =
+          KeywordField.newSortField("my_field", false, SortedSetSelector.Type.MIN);
       sortField.setMissingValue(SortField.STRING_FIRST);
       Sort sort = new Sort(sortField);
       BytesRef afterValue = new BytesRef(random().nextBoolean() ? "23" : "230000000");
@@ -996,7 +997,7 @@ private void doTestStringSortOptimization(DirectoryReader reader) throws IOExcep
     }
 
     { // paging descending sort with after that returns missing values first
-      SortField sortField = new SortField("my_field", SortField.Type.STRING, true);
+      SortField sortField = KeywordField.newSortField("my_field", true, SortedSetSelector.Type.MIN);
       sortField.setMissingValue(SortField.STRING_LAST);
       Sort sort = new Sort(sortField);
       BytesRef afterValue = new BytesRef(random().nextBoolean() ? "17" : "170000000");
@@ -1006,7 +1007,8 @@ private void doTestStringSortOptimization(DirectoryReader reader) throws IOExcep
     }
 
     { // test that if there is the secondary sort on _score, hits are still skipped
-      SortField sortField = new SortField("my_field", SortField.Type.STRING);
+      SortField sortField =
+          KeywordField.newSortField("my_field", false, SortedSetSelector.Type.MIN);
       sortField.setMissingValue(SortField.STRING_LAST);
       Sort sort = new Sort(sortField, FIELD_SCORE);
       TopDocs topDocs = assertSort(reader, sort, numHits, null);
@@ -1014,7 +1016,8 @@ private void doTestStringSortOptimization(DirectoryReader reader) throws IOExcep
     }
 
     { // test that if string field is a secondary sort, no optimization is run
-      SortField sortField = new SortField("my_field", SortField.Type.STRING);
+      SortField sortField =
+          KeywordField.newSortField("my_field", false, SortedSetSelector.Type.MIN);
       sortField.setMissingValue(SortField.STRING_LAST);
       Sort sort = new Sort(FIELD_SCORE, sortField);
       TopDocs topDocs = assertSort(reader, sort, numHits, null);
@@ -1025,10 +1028,7 @@ private void doTestStringSortOptimization(DirectoryReader reader) throws IOExcep
   }
 
   public void doTestStringSortOptimizationDisabled(DirectoryReader reader) throws IOException {
-    SortField sortField =
-        random().nextBoolean()
-            ? new SortedSetSortField("my_field", false)
-            : new SortField("my_field", SortField.Type.STRING);
+    SortField sortField = KeywordField.newSortField("my_field", false, SortedSetSelector.Type.MIN);
     sortField.setMissingValue(SortField.STRING_LAST);
     sortField.setOptimizeSortWithIndexedData(false);
 
diff --git a/lucene/core/src/test/org/apache/lucene/search/TestSortedSetSortField.java b/lucene/core/src/test/org/apache/lucene/search/TestSortedSetSortField.java
index aad6552ccf83..df78f5bafcd6 100644
--- a/lucene/core/src/test/org/apache/lucene/search/TestSortedSetSortField.java
+++ b/lucene/core/src/test/org/apache/lucene/search/TestSortedSetSortField.java
@@ -18,7 +18,7 @@
 
 import org.apache.lucene.document.Document;
 import org.apache.lucene.document.Field;
-import org.apache.lucene.document.SortedSetDocValuesField;
+import org.apache.lucene.document.KeywordField;
 import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.MultiReader;
 import org.apache.lucene.index.Term;
@@ -64,12 +64,12 @@ public void testForward() throws Exception {
     Directory dir = newDirectory();
     RandomIndexWriter writer = new RandomIndexWriter(random(), dir);
     Document doc = new Document();
-    doc.add(new SortedSetDocValuesField("value", newBytesRef("baz")));
+    doc.add(new KeywordField("value", newBytesRef("baz")));
     doc.add(newStringField("id", "2", Field.Store.YES));
     writer.addDocument(doc);
     doc = new Document();
-    doc.add(new SortedSetDocValuesField("value", newBytesRef("foo")));
-    doc.add(new SortedSetDocValuesField("value", newBytesRef("bar")));
+    doc.add(new KeywordField("value", newBytesRef("foo")));
+    doc.add(new KeywordField("value", newBytesRef("bar")));
     doc.add(newStringField("id", "1", Field.Store.YES));
     writer.addDocument(doc);
     IndexReader ir = writer.getReader();
@@ -92,12 +92,12 @@ public void testReverse() throws Exception {
     Directory dir = newDirectory();
     RandomIndexWriter writer = new RandomIndexWriter(random(), dir);
     Document doc = new Document();
-    doc.add(new SortedSetDocValuesField("value", newBytesRef("foo")));
-    doc.add(new SortedSetDocValuesField("value", newBytesRef("bar")));
+    doc.add(new KeywordField("value", newBytesRef("foo")));
+    doc.add(new KeywordField("value", newBytesRef("bar")));
     doc.add(newStringField("id", "1", Field.Store.YES));
     writer.addDocument(doc);
     doc = new Document();
-    doc.add(new SortedSetDocValuesField("value", newBytesRef("baz")));
+    doc.add(new KeywordField("value", newBytesRef("baz")));
     doc.add(newStringField("id", "2", Field.Store.YES));
     writer.addDocument(doc);
 
@@ -121,12 +121,12 @@ public void testMissingFirst() throws Exception {
     Directory dir = newDirectory();
     RandomIndexWriter writer = new RandomIndexWriter(random(), dir);
     Document doc = new Document();
-    doc.add(new SortedSetDocValuesField("value", newBytesRef("baz")));
+    doc.add(new KeywordField("value", newBytesRef("baz")));
     doc.add(newStringField("id", "2", Field.Store.YES));
     writer.addDocument(doc);
     doc = new Document();
-    doc.add(new SortedSetDocValuesField("value", newBytesRef("foo")));
-    doc.add(new SortedSetDocValuesField("value", newBytesRef("bar")));
+    doc.add(new KeywordField("value", newBytesRef("foo")));
+    doc.add(new KeywordField("value", newBytesRef("bar")));
     doc.add(newStringField("id", "1", Field.Store.YES));
     writer.addDocument(doc);
     doc = new Document();
@@ -156,12 +156,12 @@ public void testMissingLast() throws Exception {
     Directory dir = newDirectory();
     RandomIndexWriter writer = new RandomIndexWriter(random(), dir);
     Document doc = new Document();
-    doc.add(new SortedSetDocValuesField("value", newBytesRef("baz")));
+    doc.add(new KeywordField("value", newBytesRef("baz")));
     doc.add(newStringField("id", "2", Field.Store.YES));
     writer.addDocument(doc);
     doc = new Document();
-    doc.add(new SortedSetDocValuesField("value", newBytesRef("foo")));
-    doc.add(new SortedSetDocValuesField("value", newBytesRef("bar")));
+    doc.add(new KeywordField("value", newBytesRef("foo")));
+    doc.add(new KeywordField("value", newBytesRef("bar")));
     doc.add(newStringField("id", "1", Field.Store.YES));
     writer.addDocument(doc);
     doc = new Document();
@@ -191,11 +191,11 @@ public void testSingleton() throws Exception {
     Directory dir = newDirectory();
     RandomIndexWriter writer = new RandomIndexWriter(random(), dir);
     Document doc = new Document();
-    doc.add(new SortedSetDocValuesField("value", newBytesRef("baz")));
+    doc.add(new KeywordField("value", newBytesRef("baz")));
     doc.add(newStringField("id", "2", Field.Store.YES));
     writer.addDocument(doc);
     doc = new Document();
-    doc.add(new SortedSetDocValuesField("value", newBytesRef("bar")));
+    doc.add(new KeywordField("value", newBytesRef("bar")));
     doc.add(newStringField("id", "1", Field.Store.YES));
     writer.addDocument(doc);
     IndexReader ir = writer.getReader();
diff --git a/lucene/demo/src/java/org/apache/lucene/demo/IndexFiles.java b/lucene/demo/src/java/org/apache/lucene/demo/IndexFiles.java
index 7b172d65d410..2cd8e56b22b5 100644
--- a/lucene/demo/src/java/org/apache/lucene/demo/IndexFiles.java
+++ b/lucene/demo/src/java/org/apache/lucene/demo/IndexFiles.java
@@ -28,15 +28,17 @@
 import java.nio.file.SimpleFileVisitor;
 import java.nio.file.attribute.BasicFileAttributes;
 import java.util.Date;
+
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.standard.StandardAnalyzer;
 import org.apache.lucene.demo.knn.DemoEmbeddings;
 import org.apache.lucene.demo.knn.KnnVectorDict;
 import org.apache.lucene.document.Document;
 import org.apache.lucene.document.Field;
+import org.apache.lucene.document.KeywordField;
 import org.apache.lucene.document.KnnFloatVectorField;
 import org.apache.lucene.document.LongField;
-import org.apache.lucene.document.StringField;
+import org.apache.lucene.document.StoredField;
 import org.apache.lucene.document.TextField;
 import org.apache.lucene.index.DirectoryReader;
 import org.apache.lucene.index.IndexReader;
@@ -234,8 +236,8 @@ void indexDoc(IndexWriter writer, Path file, long lastModified) throws IOExcepti
       // field that is indexed (i.e. searchable), but don't tokenize
       // the field into separate words and don't index term frequency
       // or positional information:
-      Field pathField = new StringField("path", file.toString(), Field.Store.YES);
-      doc.add(pathField);
+      doc.add(new KeywordField("path", file.toString()));
+      doc.add(new StoredField("path", file.toString()));
 
       // Add the last modified date of the file a field named "modified".
       // Use a LongField that is indexed with points and doc values, and is efficient

From 5ebeceeaf629775a4da4c6544783a24344a598ab Mon Sep 17 00:00:00 2001
From: Adrien Grand <jpountz@gmail.com>
Date: Mon, 6 Feb 2023 17:42:56 +0100
Subject: [PATCH 2/6] Iterate.

---
 .../apache/lucene/document/KeywordField.java  | 84 ++++++++++++++++---
 .../lucene/document/TestKeywordField.java     | 62 ++++++++++----
 .../lucene/search/TestSortOptimization.java   |  8 +-
 .../lucene/search/TestSortedSetSortField.java | 28 +++----
 .../org/apache/lucene/demo/IndexFiles.java    |  4 +-
 5 files changed, 140 insertions(+), 46 deletions(-)

diff --git a/lucene/core/src/java/org/apache/lucene/document/KeywordField.java b/lucene/core/src/java/org/apache/lucene/document/KeywordField.java
index a283eb2db3b7..caae7cdb598e 100644
--- a/lucene/core/src/java/org/apache/lucene/document/KeywordField.java
+++ b/lucene/core/src/java/org/apache/lucene/document/KeywordField.java
@@ -16,7 +16,9 @@
  */
 package org.apache.lucene.document;
 
+import java.util.Collection;
 import java.util.Objects;
+
 import org.apache.lucene.index.DocValuesType;
 import org.apache.lucene.index.IndexOptions;
 import org.apache.lucene.index.Term;
@@ -25,27 +27,31 @@
 import org.apache.lucene.search.SortField;
 import org.apache.lucene.search.SortedSetSelector;
 import org.apache.lucene.search.SortedSetSortField;
+import org.apache.lucene.search.TermInSetQuery;
 import org.apache.lucene.search.TermQuery;
 import org.apache.lucene.util.BytesRef;
 
 /**
- * Field that indexes a per-document {@link BytesRef} into an inverted index for fast filtering and
- * stores values in a columnar fashion using {@link DocValuesType#SORTED_SET} doc values for sorting
- * and faceting. This field does not support scoring: queries produce constant scores. If you also
+ * Field that indexes a per-document String or {@link BytesRef} into an inverted index for fast
+ * filtering, stores values in a columnar fashion using {@link DocValuesType#SORTED_SET} doc
+ * values for sorting and faceting, and optionally stores values as stored fields for top-hits
+ * retrieval. This field does not support scoring: queries produce constant scores. If you also
  * need to store the value, you should add a separate {@link StoredField} instance. If you need more
- * fine-grained control you can use {@link StringField} and {@link SortedDocValuesField} or {@link
- * SortedSetDocValuesField}.
+ * fine-grained control you can use {@link StringField}, {@link SortedDocValuesField} or
+ * {@link SortedSetDocValuesField}, and {@link StoredField}.
  *
  * <p>This field defines static factory methods for creating common query objects:
  *
  * <ul>
  *   <li>{@link #newExactQuery} for matching a value.
+ *   <li>{@link #newSetQuery} for matching any of the values coming from a set.
  *   <li>{@link #newSortField} for matching a value.
  * </ul>
  */
 public class KeywordField extends Field {
 
   private static final FieldType FIELD_TYPE = new FieldType();
+  private static final FieldType FIELD_TYPE_STORED;
 
   static {
     FIELD_TYPE.setIndexOptions(IndexOptions.DOCS);
@@ -53,17 +59,29 @@ public class KeywordField extends Field {
     FIELD_TYPE.setTokenized(false);
     FIELD_TYPE.setDocValuesType(DocValuesType.SORTED_SET);
     FIELD_TYPE.freeze();
+
+    FIELD_TYPE_STORED = new FieldType(FIELD_TYPE);
+    FIELD_TYPE_STORED.setStored(true);
+    FIELD_TYPE_STORED.freeze();
   }
 
+  private final StoredValue storedValue;
+
   /**
    * Creates a new KeywordField.
    *
    * @param name field name
    * @param value the BytesRef value
+   * @param stored whether to store the field
    * @throws IllegalArgumentException if the field name or value is null.
    */
-  public KeywordField(String name, BytesRef value) {
-    super(name, value, FIELD_TYPE);
+  public KeywordField(String name, BytesRef value, Store stored) {
+    super(name, value, stored == Field.Store.YES ? FIELD_TYPE_STORED : FIELD_TYPE);
+    if (stored == Store.YES) {
+      storedValue = new StoredValue(value);
+    } else {
+      storedValue = null;
+    }
   }
 
   /**
@@ -71,10 +89,16 @@ public KeywordField(String name, BytesRef value) {
    *
    * @param name field name
    * @param value the BytesRef value
+   * @param stored whether to store the field
    * @throws IllegalArgumentException if the field name or value is null.
    */
-  public KeywordField(String name, String value) {
-    super(name, value, FIELD_TYPE);
+  public KeywordField(String name, String value, Store stored) {
+    super(name, value, stored == Field.Store.YES ? FIELD_TYPE_STORED : FIELD_TYPE);
+    if (stored == Store.YES) {
+      storedValue = new StoredValue(value);
+    } else {
+      storedValue = null;
+    }
   }
 
   @Override
@@ -87,12 +111,33 @@ public BytesRef binaryValue() {
     }
   }
 
+  @Override
+  public void setStringValue(String value) {
+    super.setStringValue(value);
+    if (storedValue != null) {
+      storedValue.setStringValue(value);
+    }
+  }
+
+  @Override
+  public void setBytesValue(BytesRef value) {
+    super.setBytesValue(value);
+    if (storedValue != null) {
+      storedValue.setBinaryValue(value);
+    }
+  }
+
+  @Override
+  public StoredValue storedValue() {
+    return storedValue;
+  }
+
   /**
    * Create a query for matching an exact {@link BytesRef} value.
    *
    * @param field field name. must not be {@code null}.
    * @param value exact value
-   * @throws IllegalArgumentException if {@code field} is null.
+   * @throws NullPointerException if {@code field} is null.
    * @return a query matching documents with this exact value
    */
   public static Query newExactQuery(String field, BytesRef value) {
@@ -106,13 +151,28 @@ public static Query newExactQuery(String field, BytesRef value) {
    *
    * @param field field name. must not be {@code null}.
    * @param value exact value
-   * @throws IllegalArgumentException if {@code field} is null.
+   * @throws NullPointerException if {@code field} is null.
    * @return a query matching documents with this exact value
    */
   public static Query newExactQuery(String field, String value) {
+    Objects.requireNonNull(value, "value must not be null");
     return newExactQuery(field, new BytesRef(value));
   }
 
+  /**
+   * Create a query for matching any of a set of provided {@link BytesRef} values.
+   *
+   * @param field field name. must not be {@code null}.
+   * @param values the set of values to match
+   * @throws NullPointerException if {@code field} is null.
+   * @return a query matching documents with this exact value
+   */
+  public static Query newSetQuery(String field, Collection<BytesRef> values) {
+    Objects.requireNonNull(field, "field must not be null");
+    Objects.requireNonNull(values, "values must not be null");
+    return new TermInSetQuery(field, values);
+  }
+
   /**
    * Create a new {@link SortField} for {@link BytesRef} values.
    *
@@ -122,6 +182,8 @@ public static Query newExactQuery(String field, String value) {
    */
   public static SortField newSortField(
       String field, boolean reverse, SortedSetSelector.Type selector) {
+    Objects.requireNonNull(field, "field must not be null");
+    Objects.requireNonNull(selector, "selector must not be null");
     return new SortedSetSortField(field, reverse, selector);
   }
 }
diff --git a/lucene/core/src/test/org/apache/lucene/document/TestKeywordField.java b/lucene/core/src/test/org/apache/lucene/document/TestKeywordField.java
index ca1be9e4b2da..4601e1f7ec49 100644
--- a/lucene/core/src/test/org/apache/lucene/document/TestKeywordField.java
+++ b/lucene/core/src/test/org/apache/lucene/document/TestKeywordField.java
@@ -31,27 +31,57 @@
 public class TestKeywordField extends LuceneTestCase {
 
   public void testSetBytesValue() {
-    KeywordField field = new KeywordField("name", newBytesRef("value"));
-    assertEquals(newBytesRef("value"), field.binaryValue());
-    assertNull(field.stringValue());
-    field.setBytesValue(newBytesRef("value2"));
-    assertEquals(newBytesRef("value2"), field.binaryValue());
-    assertNull(field.stringValue());
+    Field[] fields = new Field[] {
+        new KeywordField("name", newBytesRef("value"), Field.Store.NO),
+        new KeywordField("name", newBytesRef("value"), Field.Store.YES)
+    };
+    for (Field field : fields) {
+      assertEquals(newBytesRef("value"), field.binaryValue());
+      assertNull(field.stringValue());
+      if (field.fieldType().stored()) {
+        assertEquals(newBytesRef("value"), field.storedValue().getBinaryValue());
+      } else {
+        assertNull(field.storedValue());
+      }
+      field.setBytesValue(newBytesRef("value2"));
+      assertEquals(newBytesRef("value2"), field.binaryValue());
+      assertNull(field.stringValue());
+      if (field.fieldType().stored()) {
+        assertEquals(newBytesRef("value2"), field.storedValue().getBinaryValue());
+      } else {
+        assertNull(field.storedValue());
+      }
+    }
   }
 
   public void testSetStringValue() {
-    KeywordField field = new KeywordField("name", "value");
-    assertEquals("value", field.stringValue());
-    assertEquals(newBytesRef("value"), field.binaryValue());
-    field.setStringValue("value2");
-    assertEquals("value2", field.stringValue());
-    assertEquals(newBytesRef("value2"), field.binaryValue());
+    Field[] fields = new Field[] {
+        new KeywordField("name", "value", Field.Store.NO),
+        new KeywordField("name", "value", Field.Store.YES)
+    };
+    for (Field field : fields) {
+      assertEquals("value", field.stringValue());
+      assertEquals(newBytesRef("value"), field.binaryValue());
+      if (field.fieldType().stored()) {
+        assertEquals("value", field.storedValue().getStringValue());
+      } else {
+        assertNull(field.storedValue());
+      }
+      field.setStringValue("value2");
+      assertEquals("value2", field.stringValue());
+      assertEquals(newBytesRef("value2"), field.binaryValue());
+      if (field.fieldType().stored()) {
+        assertEquals("value2", field.storedValue().getStringValue());
+      } else {
+        assertNull(field.storedValue());
+      }
+    }
   }
 
   public void testIndexBytesValue() throws IOException {
     Directory dir = newDirectory();
     IndexWriter w = new IndexWriter(dir, newIndexWriterConfig());
-    w.addDocument(Collections.singleton(new KeywordField("field", newBytesRef("value"))));
+    w.addDocument(Collections.singleton(new KeywordField("field", newBytesRef("value"), Field.Store.YES)));
     IndexReader reader = DirectoryReader.open(w);
     w.close();
     LeafReader leaf = getOnlyLeafReader(reader);
@@ -63,6 +93,8 @@ public void testIndexBytesValue() throws IOException {
     assertEquals(1, values.docValueCount());
     assertEquals(0L, values.nextOrd());
     assertEquals(new BytesRef("value"), values.lookupOrd(0));
+    Document storedDoc = leaf.storedFields().document(0);
+    assertEquals(new BytesRef("value"), storedDoc.getBinaryValue("field"));
     reader.close();
     dir.close();
   }
@@ -70,7 +102,7 @@ public void testIndexBytesValue() throws IOException {
   public void testIndexStringValue() throws IOException {
     Directory dir = newDirectory();
     IndexWriter w = new IndexWriter(dir, newIndexWriterConfig());
-    w.addDocument(Collections.singleton(new KeywordField("field", "value")));
+    w.addDocument(Collections.singleton(new KeywordField("field", "value", Field.Store.YES)));
     IndexReader reader = DirectoryReader.open(w);
     w.close();
     LeafReader leaf = getOnlyLeafReader(reader);
@@ -82,6 +114,8 @@ public void testIndexStringValue() throws IOException {
     assertEquals(1, values.docValueCount());
     assertEquals(0L, values.nextOrd());
     assertEquals(new BytesRef("value"), values.lookupOrd(0));
+    Document storedDoc = leaf.storedFields().document(0);
+    assertEquals("value", storedDoc.get("field"));
     reader.close();
     dir.close();
   }
diff --git a/lucene/core/src/test/org/apache/lucene/search/TestSortOptimization.java b/lucene/core/src/test/org/apache/lucene/search/TestSortOptimization.java
index 8d7ab1d65792..d30146f39a3c 100644
--- a/lucene/core/src/test/org/apache/lucene/search/TestSortOptimization.java
+++ b/lucene/core/src/test/org/apache/lucene/search/TestSortOptimization.java
@@ -808,8 +808,8 @@ public void testSortOptimizationOnSortedNumericField() throws IOException {
       int value = random().nextInt();
       int value2 = random().nextInt();
       final Document doc = new Document();
-      doc.add(new LongField("my_field", value, Store.NO));
-      doc.add(new LongField("my_field", value2, Store.NO));
+      doc.add(new LongField("my_field", value, Field.Store.NO));
+      doc.add(new LongField("my_field", value2, Field.Store.NO));
       writer.addDocument(doc);
     }
     final IndexReader reader = DirectoryReader.open(writer);
@@ -890,7 +890,7 @@ public void testStringSortOptimization() throws IOException {
     for (int i = 0; i < numDocs; ++i) {
       final Document doc = new Document();
       final BytesRef value = new BytesRef(Integer.toString(random().nextInt(1000)));
-      doc.add(new KeywordField("my_field", value));
+      doc.add(new KeywordField("my_field", value, Field.Store.NO));
       writer.addDocument(doc);
       if (i % 2000 == 0) writer.flush(); // multiple segments
     }
@@ -914,7 +914,7 @@ public void testStringSortOptimizationWithMissingValues() throws IOException {
       final Document doc = new Document();
       if (random().nextInt(2) == 0) {
         final BytesRef value = new BytesRef(Integer.toString(random().nextInt(1000)));
-        doc.add(new KeywordField("my_field", value));
+        doc.add(new KeywordField("my_field", value, Field.Store.NO));
       }
       writer.addDocument(doc);
     }
diff --git a/lucene/core/src/test/org/apache/lucene/search/TestSortedSetSortField.java b/lucene/core/src/test/org/apache/lucene/search/TestSortedSetSortField.java
index df78f5bafcd6..873d948373f1 100644
--- a/lucene/core/src/test/org/apache/lucene/search/TestSortedSetSortField.java
+++ b/lucene/core/src/test/org/apache/lucene/search/TestSortedSetSortField.java
@@ -64,12 +64,12 @@ public void testForward() throws Exception {
     Directory dir = newDirectory();
     RandomIndexWriter writer = new RandomIndexWriter(random(), dir);
     Document doc = new Document();
-    doc.add(new KeywordField("value", newBytesRef("baz")));
+    doc.add(new KeywordField("value", newBytesRef("baz"), Field.Store.NO));
     doc.add(newStringField("id", "2", Field.Store.YES));
     writer.addDocument(doc);
     doc = new Document();
-    doc.add(new KeywordField("value", newBytesRef("foo")));
-    doc.add(new KeywordField("value", newBytesRef("bar")));
+    doc.add(new KeywordField("value", newBytesRef("foo"), Field.Store.NO));
+    doc.add(new KeywordField("value", newBytesRef("bar"), Field.Store.NO));
     doc.add(newStringField("id", "1", Field.Store.YES));
     writer.addDocument(doc);
     IndexReader ir = writer.getReader();
@@ -92,12 +92,12 @@ public void testReverse() throws Exception {
     Directory dir = newDirectory();
     RandomIndexWriter writer = new RandomIndexWriter(random(), dir);
     Document doc = new Document();
-    doc.add(new KeywordField("value", newBytesRef("foo")));
-    doc.add(new KeywordField("value", newBytesRef("bar")));
+    doc.add(new KeywordField("value", newBytesRef("foo"), Field.Store.NO));
+    doc.add(new KeywordField("value", newBytesRef("bar"), Field.Store.NO));
     doc.add(newStringField("id", "1", Field.Store.YES));
     writer.addDocument(doc);
     doc = new Document();
-    doc.add(new KeywordField("value", newBytesRef("baz")));
+    doc.add(new KeywordField("value", newBytesRef("baz"), Field.Store.NO));
     doc.add(newStringField("id", "2", Field.Store.YES));
     writer.addDocument(doc);
 
@@ -121,12 +121,12 @@ public void testMissingFirst() throws Exception {
     Directory dir = newDirectory();
     RandomIndexWriter writer = new RandomIndexWriter(random(), dir);
     Document doc = new Document();
-    doc.add(new KeywordField("value", newBytesRef("baz")));
+    doc.add(new KeywordField("value", newBytesRef("baz"), Field.Store.NO));
     doc.add(newStringField("id", "2", Field.Store.YES));
     writer.addDocument(doc);
     doc = new Document();
-    doc.add(new KeywordField("value", newBytesRef("foo")));
-    doc.add(new KeywordField("value", newBytesRef("bar")));
+    doc.add(new KeywordField("value", newBytesRef("foo"), Field.Store.NO));
+    doc.add(new KeywordField("value", newBytesRef("bar"), Field.Store.NO));
     doc.add(newStringField("id", "1", Field.Store.YES));
     writer.addDocument(doc);
     doc = new Document();
@@ -156,12 +156,12 @@ public void testMissingLast() throws Exception {
     Directory dir = newDirectory();
     RandomIndexWriter writer = new RandomIndexWriter(random(), dir);
     Document doc = new Document();
-    doc.add(new KeywordField("value", newBytesRef("baz")));
+    doc.add(new KeywordField("value", newBytesRef("baz"), Field.Store.NO));
     doc.add(newStringField("id", "2", Field.Store.YES));
     writer.addDocument(doc);
     doc = new Document();
-    doc.add(new KeywordField("value", newBytesRef("foo")));
-    doc.add(new KeywordField("value", newBytesRef("bar")));
+    doc.add(new KeywordField("value", newBytesRef("foo"), Field.Store.NO));
+    doc.add(new KeywordField("value", newBytesRef("bar"), Field.Store.NO));
     doc.add(newStringField("id", "1", Field.Store.YES));
     writer.addDocument(doc);
     doc = new Document();
@@ -191,11 +191,11 @@ public void testSingleton() throws Exception {
     Directory dir = newDirectory();
     RandomIndexWriter writer = new RandomIndexWriter(random(), dir);
     Document doc = new Document();
-    doc.add(new KeywordField("value", newBytesRef("baz")));
+    doc.add(new KeywordField("value", newBytesRef("baz"), Field.Store.NO));
     doc.add(newStringField("id", "2", Field.Store.YES));
     writer.addDocument(doc);
     doc = new Document();
-    doc.add(new KeywordField("value", newBytesRef("bar")));
+    doc.add(new KeywordField("value", newBytesRef("bar"), Field.Store.NO));
     doc.add(newStringField("id", "1", Field.Store.YES));
     writer.addDocument(doc);
     IndexReader ir = writer.getReader();
diff --git a/lucene/demo/src/java/org/apache/lucene/demo/IndexFiles.java b/lucene/demo/src/java/org/apache/lucene/demo/IndexFiles.java
index 2cd8e56b22b5..2c19e3acc88b 100644
--- a/lucene/demo/src/java/org/apache/lucene/demo/IndexFiles.java
+++ b/lucene/demo/src/java/org/apache/lucene/demo/IndexFiles.java
@@ -38,7 +38,6 @@
 import org.apache.lucene.document.KeywordField;
 import org.apache.lucene.document.KnnFloatVectorField;
 import org.apache.lucene.document.LongField;
-import org.apache.lucene.document.StoredField;
 import org.apache.lucene.document.TextField;
 import org.apache.lucene.index.DirectoryReader;
 import org.apache.lucene.index.IndexReader;
@@ -236,8 +235,7 @@ void indexDoc(IndexWriter writer, Path file, long lastModified) throws IOExcepti
       // field that is indexed (i.e. searchable), but don't tokenize
       // the field into separate words and don't index term frequency
       // or positional information:
-      doc.add(new KeywordField("path", file.toString()));
-      doc.add(new StoredField("path", file.toString()));
+      doc.add(new KeywordField("path", file.toString(), Field.Store.YES));
 
       // Add the last modified date of the file a field named "modified".
       // Use a LongField that is indexed with points and doc values, and is efficient

From 3e2d421f2b5431a6ca5c9001cb457af248c3a93c Mon Sep 17 00:00:00 2001
From: Adrien Grand <jpountz@gmail.com>
Date: Mon, 6 Feb 2023 18:25:45 +0100
Subject: [PATCH 3/6] spotless

---
 .../apache/lucene/document/KeywordField.java  | 13 ++++++------
 .../lucene/document/TestKeywordField.java     | 21 +++++++++++--------
 .../org/apache/lucene/demo/IndexFiles.java    |  1 -
 3 files changed, 18 insertions(+), 17 deletions(-)

diff --git a/lucene/core/src/java/org/apache/lucene/document/KeywordField.java b/lucene/core/src/java/org/apache/lucene/document/KeywordField.java
index caae7cdb598e..0e372b48e92b 100644
--- a/lucene/core/src/java/org/apache/lucene/document/KeywordField.java
+++ b/lucene/core/src/java/org/apache/lucene/document/KeywordField.java
@@ -18,7 +18,6 @@
 
 import java.util.Collection;
 import java.util.Objects;
-
 import org.apache.lucene.index.DocValuesType;
 import org.apache.lucene.index.IndexOptions;
 import org.apache.lucene.index.Term;
@@ -33,12 +32,12 @@
 
 /**
  * Field that indexes a per-document String or {@link BytesRef} into an inverted index for fast
- * filtering, stores values in a columnar fashion using {@link DocValuesType#SORTED_SET} doc
- * values for sorting and faceting, and optionally stores values as stored fields for top-hits
- * retrieval. This field does not support scoring: queries produce constant scores. If you also
- * need to store the value, you should add a separate {@link StoredField} instance. If you need more
- * fine-grained control you can use {@link StringField}, {@link SortedDocValuesField} or
- * {@link SortedSetDocValuesField}, and {@link StoredField}.
+ * filtering, stores values in a columnar fashion using {@link DocValuesType#SORTED_SET} doc values
+ * for sorting and faceting, and optionally stores values as stored fields for top-hits retrieval.
+ * This field does not support scoring: queries produce constant scores. If you also need to store
+ * the value, you should add a separate {@link StoredField} instance. If you need more fine-grained
+ * control you can use {@link StringField}, {@link SortedDocValuesField} or {@link
+ * SortedSetDocValuesField}, and {@link StoredField}.
  *
  * <p>This field defines static factory methods for creating common query objects:
  *
diff --git a/lucene/core/src/test/org/apache/lucene/document/TestKeywordField.java b/lucene/core/src/test/org/apache/lucene/document/TestKeywordField.java
index 4601e1f7ec49..6593a4509c6e 100644
--- a/lucene/core/src/test/org/apache/lucene/document/TestKeywordField.java
+++ b/lucene/core/src/test/org/apache/lucene/document/TestKeywordField.java
@@ -31,10 +31,11 @@
 public class TestKeywordField extends LuceneTestCase {
 
   public void testSetBytesValue() {
-    Field[] fields = new Field[] {
-        new KeywordField("name", newBytesRef("value"), Field.Store.NO),
-        new KeywordField("name", newBytesRef("value"), Field.Store.YES)
-    };
+    Field[] fields =
+        new Field[] {
+          new KeywordField("name", newBytesRef("value"), Field.Store.NO),
+          new KeywordField("name", newBytesRef("value"), Field.Store.YES)
+        };
     for (Field field : fields) {
       assertEquals(newBytesRef("value"), field.binaryValue());
       assertNull(field.stringValue());
@@ -55,10 +56,11 @@ public void testSetBytesValue() {
   }
 
   public void testSetStringValue() {
-    Field[] fields = new Field[] {
-        new KeywordField("name", "value", Field.Store.NO),
-        new KeywordField("name", "value", Field.Store.YES)
-    };
+    Field[] fields =
+        new Field[] {
+          new KeywordField("name", "value", Field.Store.NO),
+          new KeywordField("name", "value", Field.Store.YES)
+        };
     for (Field field : fields) {
       assertEquals("value", field.stringValue());
       assertEquals(newBytesRef("value"), field.binaryValue());
@@ -81,7 +83,8 @@ public void testSetStringValue() {
   public void testIndexBytesValue() throws IOException {
     Directory dir = newDirectory();
     IndexWriter w = new IndexWriter(dir, newIndexWriterConfig());
-    w.addDocument(Collections.singleton(new KeywordField("field", newBytesRef("value"), Field.Store.YES)));
+    w.addDocument(
+        Collections.singleton(new KeywordField("field", newBytesRef("value"), Field.Store.YES)));
     IndexReader reader = DirectoryReader.open(w);
     w.close();
     LeafReader leaf = getOnlyLeafReader(reader);
diff --git a/lucene/demo/src/java/org/apache/lucene/demo/IndexFiles.java b/lucene/demo/src/java/org/apache/lucene/demo/IndexFiles.java
index 2c19e3acc88b..9c683d3937c9 100644
--- a/lucene/demo/src/java/org/apache/lucene/demo/IndexFiles.java
+++ b/lucene/demo/src/java/org/apache/lucene/demo/IndexFiles.java
@@ -28,7 +28,6 @@
 import java.nio.file.SimpleFileVisitor;
 import java.nio.file.attribute.BasicFileAttributes;
 import java.util.Date;
-
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.standard.StandardAnalyzer;
 import org.apache.lucene.demo.knn.DemoEmbeddings;

From 7dc8cbae11e7eb696cd96bba72019762b0ecf71d Mon Sep 17 00:00:00 2001
From: Adrien Grand <jpountz@gmail.com>
Date: Tue, 7 Feb 2023 14:58:23 +0100
Subject: [PATCH 4/6] feedback

---
 .../src/java/org/apache/lucene/document/KeywordField.java | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/lucene/core/src/java/org/apache/lucene/document/KeywordField.java b/lucene/core/src/java/org/apache/lucene/document/KeywordField.java
index 0e372b48e92b..4d6303dc25fd 100644
--- a/lucene/core/src/java/org/apache/lucene/document/KeywordField.java
+++ b/lucene/core/src/java/org/apache/lucene/document/KeywordField.java
@@ -16,7 +16,6 @@
  */
 package org.apache.lucene.document;
 
-import java.util.Collection;
 import java.util.Objects;
 import org.apache.lucene.index.DocValuesType;
 import org.apache.lucene.index.IndexOptions;
@@ -34,9 +33,8 @@
  * Field that indexes a per-document String or {@link BytesRef} into an inverted index for fast
  * filtering, stores values in a columnar fashion using {@link DocValuesType#SORTED_SET} doc values
  * for sorting and faceting, and optionally stores values as stored fields for top-hits retrieval.
- * This field does not support scoring: queries produce constant scores. If you also need to store
- * the value, you should add a separate {@link StoredField} instance. If you need more fine-grained
- * control you can use {@link StringField}, {@link SortedDocValuesField} or {@link
+ * This field does not support scoring: queries produce constant scores. If you need more
+ * fine-grained control you can use {@link StringField}, {@link SortedDocValuesField} or {@link
  * SortedSetDocValuesField}, and {@link StoredField}.
  *
  * <p>This field defines static factory methods for creating common query objects:
@@ -166,7 +164,7 @@ public static Query newExactQuery(String field, String value) {
    * @throws NullPointerException if {@code field} is null.
    * @return a query matching documents with this exact value
    */
-  public static Query newSetQuery(String field, Collection<BytesRef> values) {
+  public static Query newSetQuery(String field, BytesRef... values) {
     Objects.requireNonNull(field, "field must not be null");
     Objects.requireNonNull(values, "values must not be null");
     return new TermInSetQuery(field, values);

From 1b9b5db572072a918d10a1d80c54b9c7e6979c8e Mon Sep 17 00:00:00 2001
From: Adrien Grand <jpountz@gmail.com>
Date: Tue, 7 Feb 2023 15:00:24 +0100
Subject: [PATCH 5/6] CHANGES

---
 lucene/CHANGES.txt | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index da89bc1789b6..1bc6cfe3ccd9 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -114,7 +114,9 @@ API Changes
 
 New Features
 ---------------------
-(No changes)
+
+* GITHUB#12054: Introduce a new KeywordField for simple and efficient
+  filtering, sorting and faceting. (Adrien Grand)
 
 Improvements
 ---------------------

From e552d1c738ea2200ffb696911087616e1cdd07b8 Mon Sep 17 00:00:00 2001
From: Adrien Grand <jpountz@gmail.com>
Date: Tue, 7 Feb 2023 15:15:24 +0100
Subject: [PATCH 6/6] feedback

---
 .../src/java/org/apache/lucene/document/KeywordField.java     | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/lucene/core/src/java/org/apache/lucene/document/KeywordField.java b/lucene/core/src/java/org/apache/lucene/document/KeywordField.java
index 4d6303dc25fd..70b27ad671af 100644
--- a/lucene/core/src/java/org/apache/lucene/document/KeywordField.java
+++ b/lucene/core/src/java/org/apache/lucene/document/KeywordField.java
@@ -21,6 +21,7 @@
 import org.apache.lucene.index.IndexOptions;
 import org.apache.lucene.index.Term;
 import org.apache.lucene.search.ConstantScoreQuery;
+import org.apache.lucene.search.IndexOrDocValuesQuery;
 import org.apache.lucene.search.Query;
 import org.apache.lucene.search.SortField;
 import org.apache.lucene.search.SortedSetSelector;
@@ -167,7 +168,8 @@ public static Query newExactQuery(String field, String value) {
   public static Query newSetQuery(String field, BytesRef... values) {
     Objects.requireNonNull(field, "field must not be null");
     Objects.requireNonNull(values, "values must not be null");
-    return new TermInSetQuery(field, values);
+    return new IndexOrDocValuesQuery(
+        new TermInSetQuery(field, values), new SortedSetDocValuesSetQuery(field, values));
   }
 
   /**