Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Introduce a new KeywordField. #12054

Merged
merged 8 commits into from
Feb 7, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion lucene/CHANGES.txt
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,9 @@ API Changes

New Features
---------------------
(No changes)

* GITHUB#12054: Introduce a new KeywordField for simple and efficient
filtering, sorting and faceting. (Adrien Grand)

Improvements
---------------------
Expand Down
188 changes: 188 additions & 0 deletions lucene/core/src/java/org/apache/lucene/document/KeywordField.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,188 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.document;

import java.util.Objects;
import org.apache.lucene.index.DocValuesType;
import org.apache.lucene.index.IndexOptions;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.ConstantScoreQuery;
import org.apache.lucene.search.IndexOrDocValuesQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.SortField;
import org.apache.lucene.search.SortedSetSelector;
import org.apache.lucene.search.SortedSetSortField;
import org.apache.lucene.search.TermInSetQuery;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.util.BytesRef;

/**
* Field that indexes a per-document String or {@link BytesRef} into an inverted index for fast
* filtering, stores values in a columnar fashion using {@link DocValuesType#SORTED_SET} doc values
* for sorting and faceting, and optionally stores values as stored fields for top-hits retrieval.
* This field does not support scoring: queries produce constant scores. If you need more
* fine-grained control you can use {@link StringField}, {@link SortedDocValuesField} or {@link
* SortedSetDocValuesField}, and {@link StoredField}.
*
* <p>This field defines static factory methods for creating common query objects:
*
* <ul>
* <li>{@link #newExactQuery} for matching a value.
* <li>{@link #newSetQuery} for matching any of the values coming from a set.
* <li>{@link #newSortField} for matching a value.
* </ul>
*/
public class KeywordField extends Field {

private static final FieldType FIELD_TYPE = new FieldType();
private static final FieldType FIELD_TYPE_STORED;

static {
FIELD_TYPE.setIndexOptions(IndexOptions.DOCS);
FIELD_TYPE.setOmitNorms(true);
FIELD_TYPE.setTokenized(false);
FIELD_TYPE.setDocValuesType(DocValuesType.SORTED_SET);
FIELD_TYPE.freeze();

FIELD_TYPE_STORED = new FieldType(FIELD_TYPE);
FIELD_TYPE_STORED.setStored(true);
FIELD_TYPE_STORED.freeze();
}

private final StoredValue storedValue;

/**
* Creates a new KeywordField.
*
* @param name field name
* @param value the BytesRef value
* @param stored whether to store the field
* @throws IllegalArgumentException if the field name or value is null.
*/
public KeywordField(String name, BytesRef value, Store stored) {
super(name, value, stored == Field.Store.YES ? FIELD_TYPE_STORED : FIELD_TYPE);
if (stored == Store.YES) {
storedValue = new StoredValue(value);
} else {
storedValue = null;
}
}

/**
* Creates a new KeywordField from a String value, by indexing its UTF-8 representation.
*
* @param name field name
* @param value the BytesRef value
* @param stored whether to store the field
* @throws IllegalArgumentException if the field name or value is null.
*/
public KeywordField(String name, String value, Store stored) {
super(name, value, stored == Field.Store.YES ? FIELD_TYPE_STORED : FIELD_TYPE);
if (stored == Store.YES) {
storedValue = new StoredValue(value);
} else {
storedValue = null;
}
}

@Override
public BytesRef binaryValue() {
BytesRef binaryValue = super.binaryValue();
if (binaryValue != null) {
return binaryValue;
} else {
return new BytesRef(stringValue());
}
}

@Override
public void setStringValue(String value) {
super.setStringValue(value);
if (storedValue != null) {
storedValue.setStringValue(value);
}
}

@Override
public void setBytesValue(BytesRef value) {
super.setBytesValue(value);
if (storedValue != null) {
storedValue.setBinaryValue(value);
}
}

@Override
public StoredValue storedValue() {
return storedValue;
}

/**
* Create a query for matching an exact {@link BytesRef} value.
*
* @param field field name. must not be {@code null}.
* @param value exact value
* @throws NullPointerException if {@code field} is null.
* @return a query matching documents with this exact value
*/
public static Query newExactQuery(String field, BytesRef value) {
Objects.requireNonNull(field, "field must not be null");
Objects.requireNonNull(value, "value must not be null");
return new ConstantScoreQuery(new TermQuery(new Term(field, value)));
}

/**
* Create a query for matching an exact {@link String} value.
*
* @param field field name. must not be {@code null}.
* @param value exact value
* @throws NullPointerException if {@code field} is null.
* @return a query matching documents with this exact value
*/
public static Query newExactQuery(String field, String value) {
Objects.requireNonNull(value, "value must not be null");
return newExactQuery(field, new BytesRef(value));
}

/**
* Create a query for matching any of a set of provided {@link BytesRef} values.
*
* @param field field name. must not be {@code null}.
* @param values the set of values to match
* @throws NullPointerException if {@code field} is null.
* @return a query matching documents with this exact value
*/
public static Query newSetQuery(String field, BytesRef... values) {
Objects.requireNonNull(field, "field must not be null");
Objects.requireNonNull(values, "values must not be null");
return new IndexOrDocValuesQuery(
new TermInSetQuery(field, values), new SortedSetDocValuesSetQuery(field, values));
}

/**
* Create a new {@link SortField} for {@link BytesRef} values.
*
* @param field field name. must not be {@code null}.
* @param reverse true if natural order should be reversed.
* @param selector custom selector type for choosing the sort value from the set.
*/
public static SortField newSortField(
String field, boolean reverse, SortedSetSelector.Type selector) {
Objects.requireNonNull(field, "field must not be null");
Objects.requireNonNull(selector, "selector must not be null");
return new SortedSetSortField(field, reverse, selector);
}
}
125 changes: 125 additions & 0 deletions lucene/core/src/test/org/apache/lucene/document/TestKeywordField.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,125 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.document;

import java.io.IOException;
import java.util.Collections;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.SortedSetDocValues;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.store.Directory;
import org.apache.lucene.tests.util.LuceneTestCase;
import org.apache.lucene.util.BytesRef;

public class TestKeywordField extends LuceneTestCase {

public void testSetBytesValue() {
Field[] fields =
new Field[] {
new KeywordField("name", newBytesRef("value"), Field.Store.NO),
new KeywordField("name", newBytesRef("value"), Field.Store.YES)
};
for (Field field : fields) {
assertEquals(newBytesRef("value"), field.binaryValue());
assertNull(field.stringValue());
if (field.fieldType().stored()) {
assertEquals(newBytesRef("value"), field.storedValue().getBinaryValue());
} else {
assertNull(field.storedValue());
}
field.setBytesValue(newBytesRef("value2"));
assertEquals(newBytesRef("value2"), field.binaryValue());
assertNull(field.stringValue());
if (field.fieldType().stored()) {
assertEquals(newBytesRef("value2"), field.storedValue().getBinaryValue());
} else {
assertNull(field.storedValue());
}
}
}

public void testSetStringValue() {
Field[] fields =
new Field[] {
new KeywordField("name", "value", Field.Store.NO),
new KeywordField("name", "value", Field.Store.YES)
};
for (Field field : fields) {
assertEquals("value", field.stringValue());
assertEquals(newBytesRef("value"), field.binaryValue());
if (field.fieldType().stored()) {
assertEquals("value", field.storedValue().getStringValue());
} else {
assertNull(field.storedValue());
}
field.setStringValue("value2");
assertEquals("value2", field.stringValue());
assertEquals(newBytesRef("value2"), field.binaryValue());
if (field.fieldType().stored()) {
assertEquals("value2", field.storedValue().getStringValue());
} else {
assertNull(field.storedValue());
}
}
}

public void testIndexBytesValue() throws IOException {
Directory dir = newDirectory();
IndexWriter w = new IndexWriter(dir, newIndexWriterConfig());
w.addDocument(
Collections.singleton(new KeywordField("field", newBytesRef("value"), Field.Store.YES)));
IndexReader reader = DirectoryReader.open(w);
w.close();
LeafReader leaf = getOnlyLeafReader(reader);
TermsEnum terms = leaf.terms("field").iterator();
assertEquals(new BytesRef("value"), terms.next());
assertNull(terms.next());
SortedSetDocValues values = leaf.getSortedSetDocValues("field");
assertTrue(values.advanceExact(0));
assertEquals(1, values.docValueCount());
assertEquals(0L, values.nextOrd());
assertEquals(new BytesRef("value"), values.lookupOrd(0));
Document storedDoc = leaf.storedFields().document(0);
assertEquals(new BytesRef("value"), storedDoc.getBinaryValue("field"));
reader.close();
dir.close();
}

public void testIndexStringValue() throws IOException {
Directory dir = newDirectory();
IndexWriter w = new IndexWriter(dir, newIndexWriterConfig());
w.addDocument(Collections.singleton(new KeywordField("field", "value", Field.Store.YES)));
IndexReader reader = DirectoryReader.open(w);
w.close();
LeafReader leaf = getOnlyLeafReader(reader);
TermsEnum terms = leaf.terms("field").iterator();
assertEquals(new BytesRef("value"), terms.next());
assertNull(terms.next());
SortedSetDocValues values = leaf.getSortedSetDocValues("field");
assertTrue(values.advanceExact(0));
assertEquals(1, values.docValueCount());
assertEquals(0L, values.nextOrd());
assertEquals(new BytesRef("value"), values.lookupOrd(0));
Document storedDoc = leaf.storedFields().document(0);
assertEquals("value", storedDoc.get("field"));
reader.close();
dir.close();
}
}
Loading