From d76dd269ad196e2353547dcf6557164e13d4d282 Mon Sep 17 00:00:00 2001 From: Patrick Zhai Date: Sat, 3 Jun 2023 20:16:16 -0700 Subject: [PATCH] Add updateDocuments API which accept a query (reopen) (#12346) --- lucene/CHANGES.txt | 3 +++ .../index/DocumentsWriterDeleteQueue.java | 21 +++++++++++++++++++ .../org/apache/lucene/index/IndexWriter.java | 13 ++++++++++++ .../apache/lucene/index/TestIndexWriter.java | 7 ++++++- .../lucene/tests/index/RandomIndexWriter.java | 8 ++++++- 5 files changed, 50 insertions(+), 2 deletions(-) diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index b61ede95d8e8..d169535d4db1 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -21,6 +21,9 @@ API Changes * GITHUB#12268: Add BitSet.clear() without parameters for clearing the entire set (Jonathan Ellis) +* GITHUB#12346: add new IndexWriter#updateDocuments(Query, Iterable) API + to update documents atomically, with respect to refresh and commit using a query. (Patrick Zhai) + New Features --------------------- diff --git a/lucene/core/src/java/org/apache/lucene/index/DocumentsWriterDeleteQueue.java b/lucene/core/src/java/org/apache/lucene/index/DocumentsWriterDeleteQueue.java index ed86416a2b19..4d8bfd054654 100644 --- a/lucene/core/src/java/org/apache/lucene/index/DocumentsWriterDeleteQueue.java +++ b/lucene/core/src/java/org/apache/lucene/index/DocumentsWriterDeleteQueue.java @@ -142,6 +142,10 @@ static Node newNode(Term term) { return new TermNode(term); } + static Node newNode(Query query) { + return new QueryNode(query); + } + static Node newNode(DocValuesUpdate... updates) { return new DocValuesUpdatesNode(updates); } @@ -437,6 +441,23 @@ public String toString() { } } + private static final class QueryNode extends Node { + + QueryNode(Query query) { + super(query); + } + + @Override + void apply(BufferedUpdates bufferedDeletes, int docIDUpto) { + bufferedDeletes.addQuery(item, docIDUpto); + } + + @Override + public String toString() { + return "del=" + item; + } + } + private static final class QueryArrayNode extends Node { QueryArrayNode(Query[] query) { super(query); diff --git a/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java b/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java index f488e1f83f62..467dc5758c83 100644 --- a/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java +++ b/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java @@ -1522,6 +1522,19 @@ public long updateDocuments( delTerm == null ? null : DocumentsWriterDeleteQueue.newNode(delTerm), docs); } + /** + * Similar to {@link #updateDocuments(Term, Iterable)}, but take a query instead of a term to + * identify the documents to be updated + * + * @lucene.experimental + */ + public long updateDocuments( + Query delQuery, Iterable> docs) + throws IOException { + return updateDocuments( + delQuery == null ? null : DocumentsWriterDeleteQueue.newNode(delQuery), docs); + } + private long updateDocuments( final DocumentsWriterDeleteQueue.Node delNode, Iterable> docs) diff --git a/lucene/core/src/test/org/apache/lucene/index/TestIndexWriter.java b/lucene/core/src/test/org/apache/lucene/index/TestIndexWriter.java index e9a2fd9141dd..9c7b7c8a5290 100644 --- a/lucene/core/src/test/org/apache/lucene/index/TestIndexWriter.java +++ b/lucene/core/src/test/org/apache/lucene/index/TestIndexWriter.java @@ -3473,7 +3473,12 @@ public int numDeletesToMerge( Document doc = new Document(); doc.add(new StringField("id", id, Field.Store.YES)); if (mixDeletes && random().nextBoolean()) { - writer.updateDocuments(new Term("id", id), Arrays.asList(doc, doc)); + if (random().nextBoolean()) { + writer.updateDocuments(new Term("id", id), Arrays.asList(doc, doc)); + } else { + writer.updateDocuments( + new TermQuery(new Term("id", id)), Arrays.asList(doc, doc)); + } } else { writer.softUpdateDocuments( new Term("id", id), diff --git a/lucene/test-framework/src/java/org/apache/lucene/tests/index/RandomIndexWriter.java b/lucene/test-framework/src/java/org/apache/lucene/tests/index/RandomIndexWriter.java index 11cd73a2aaf5..2988957ba295 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/tests/index/RandomIndexWriter.java +++ b/lucene/test-framework/src/java/org/apache/lucene/tests/index/RandomIndexWriter.java @@ -39,6 +39,7 @@ import org.apache.lucene.internal.tests.IndexWriterAccess; import org.apache.lucene.internal.tests.TestSecrets; import org.apache.lucene.search.Query; +import org.apache.lucene.search.TermQuery; import org.apache.lucene.store.Directory; import org.apache.lucene.tests.analysis.MockAnalyzer; import org.apache.lucene.tests.util.LuceneTestCase; @@ -283,7 +284,12 @@ public long updateDocuments( w.softUpdateDocuments( delTerm, docs, new NumericDocValuesField(config.getSoftDeletesField(), 1)); } else { - seqNo = w.updateDocuments(delTerm, docs); + if (r.nextInt(10) < 3) { + // 30% chance + seqNo = w.updateDocuments(new TermQuery(delTerm), docs); + } else { + seqNo = w.updateDocuments(delTerm, docs); + } } maybeFlushOrCommit(); return seqNo;