From 1107aa2c4570dc028c9efda8fb00c6f65d3f5022 Mon Sep 17 00:00:00 2001 From: fudongying <30896830+fudongyingluck@users.noreply.github.com> Date: Fri, 9 Jun 2023 17:41:28 +0800 Subject: [PATCH] feat: soft delete optimize (#12339) --- lucene/CHANGES.txt | 2 + .../lucene/index/CachingMergeContext.java | 61 ++++++++++++++++ .../org/apache/lucene/index/IndexWriter.java | 13 ++-- .../lucene/index/TestCachingMergeContext.java | 69 +++++++++++++++++++ 4 files changed, 141 insertions(+), 4 deletions(-) create mode 100644 lucene/core/src/java/org/apache/lucene/index/CachingMergeContext.java create mode 100644 lucene/core/src/test/org/apache/lucene/index/TestCachingMergeContext.java diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index d5542d11850e..8702cfa4892e 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -68,6 +68,8 @@ Optimizations * GITHUB#12328: Optimize ConjunctionDISI.createConjunction (Armin Braun) +* GITHUB#12339: Optimize part of duplicate calculation numDeletesToMerge in merge phase (fudongying) + Bug Fixes --------------------- diff --git a/lucene/core/src/java/org/apache/lucene/index/CachingMergeContext.java b/lucene/core/src/java/org/apache/lucene/index/CachingMergeContext.java new file mode 100644 index 000000000000..a9974c64e0b1 --- /dev/null +++ b/lucene/core/src/java/org/apache/lucene/index/CachingMergeContext.java @@ -0,0 +1,61 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.lucene.index; + +import java.io.IOException; +import java.util.HashMap; +import java.util.Set; +import org.apache.lucene.util.InfoStream; + +/** + * a wrapper of IndexWriter MergeContext. Try to cache the {@link + * #numDeletesToMerge(SegmentCommitInfo)} result in merge phase, to avoid duplicate calculation + */ +class CachingMergeContext implements MergePolicy.MergeContext { + final MergePolicy.MergeContext mergeContext; + final HashMap cachedNumDeletesToMerge = new HashMap<>(); + + CachingMergeContext(MergePolicy.MergeContext mergeContext) { + this.mergeContext = mergeContext; + } + + @Override + public final int numDeletesToMerge(SegmentCommitInfo info) throws IOException { + Integer numDeletesToMerge = cachedNumDeletesToMerge.get(info); + if (numDeletesToMerge != null) { + return numDeletesToMerge; + } + numDeletesToMerge = mergeContext.numDeletesToMerge(info); + cachedNumDeletesToMerge.put(info, numDeletesToMerge); + return numDeletesToMerge; + } + + @Override + public final int numDeletedDocs(SegmentCommitInfo info) { + return mergeContext.numDeletedDocs(info); + } + + @Override + public final InfoStream getInfoStream() { + return mergeContext.getInfoStream(); + } + + @Override + public final Set getMergingSegments() { + return mergeContext.getMergingSegments(); + } +} diff --git a/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java b/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java index 467dc5758c83..07dca8945548 100644 --- a/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java +++ b/lucene/core/src/java/org/apache/lucene/index/IndexWriter.java @@ -2215,10 +2215,11 @@ public void forceMergeDeletes(boolean doWait) throws IOException { } final MergePolicy mergePolicy = config.getMergePolicy(); + final CachingMergeContext cachingMergeContext = new CachingMergeContext(this); MergePolicy.MergeSpecification spec; boolean newMergesFound = false; synchronized (this) { - spec = mergePolicy.findForcedDeletesMerges(segmentInfos, this); + spec = mergePolicy.findForcedDeletesMerges(segmentInfos, cachingMergeContext); newMergesFound = spec != null; if (newMergesFound) { final int numMerges = spec.merges.size(); @@ -2328,6 +2329,7 @@ private synchronized MergePolicy.MergeSpecification updatePendingMerges( } final MergePolicy.MergeSpecification spec; + final CachingMergeContext cachingMergeContext = new CachingMergeContext(this); if (maxNumSegments != UNBOUNDED_MAX_MERGE_SEGMENTS) { assert trigger == MergeTrigger.EXPLICIT || trigger == MergeTrigger.MERGE_FINISHED : "Expected EXPLICT or MERGE_FINISHED as trigger even with maxNumSegments set but was: " @@ -2335,7 +2337,10 @@ private synchronized MergePolicy.MergeSpecification updatePendingMerges( spec = mergePolicy.findForcedMerges( - segmentInfos, maxNumSegments, Collections.unmodifiableMap(segmentsToMerge), this); + segmentInfos, + maxNumSegments, + Collections.unmodifiableMap(segmentsToMerge), + cachingMergeContext); if (spec != null) { final int numMerges = spec.merges.size(); for (int i = 0; i < numMerges; i++) { @@ -2347,7 +2352,7 @@ private synchronized MergePolicy.MergeSpecification updatePendingMerges( switch (trigger) { case GET_READER: case COMMIT: - spec = mergePolicy.findFullFlushMerges(trigger, segmentInfos, this); + spec = mergePolicy.findFullFlushMerges(trigger, segmentInfos, cachingMergeContext); break; case ADD_INDEXES: throw new IllegalStateException( @@ -2359,7 +2364,7 @@ private synchronized MergePolicy.MergeSpecification updatePendingMerges( case SEGMENT_FLUSH: case CLOSING: default: - spec = mergePolicy.findMerges(trigger, segmentInfos, this); + spec = mergePolicy.findMerges(trigger, segmentInfos, cachingMergeContext); } } if (spec != null) { diff --git a/lucene/core/src/test/org/apache/lucene/index/TestCachingMergeContext.java b/lucene/core/src/test/org/apache/lucene/index/TestCachingMergeContext.java new file mode 100644 index 000000000000..3210d54540d7 --- /dev/null +++ b/lucene/core/src/test/org/apache/lucene/index/TestCachingMergeContext.java @@ -0,0 +1,69 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.lucene.index; + +import java.io.IOException; +import java.util.Set; +import org.apache.lucene.tests.util.LuceneTestCase; +import org.apache.lucene.util.InfoStream; + +public class TestCachingMergeContext extends LuceneTestCase { + public void testNumDeletesToMerge() throws IOException { + MockMergeContext mergeContext = new MockMergeContext(); + CachingMergeContext cachingMergeContext = new CachingMergeContext(mergeContext); + assertEquals(cachingMergeContext.numDeletesToMerge(null), 1); + assertEquals(cachingMergeContext.cachedNumDeletesToMerge.size(), 1); + assertEquals( + cachingMergeContext.cachedNumDeletesToMerge.getOrDefault(null, -1), Integer.valueOf(1)); + assertEquals(mergeContext.count, 1); + + // increase the mock count + mergeContext.numDeletesToMerge(null); + assertEquals(mergeContext.count, 2); + + // assert the cache result still one + assertEquals(cachingMergeContext.numDeletesToMerge(null), 1); + assertEquals(cachingMergeContext.cachedNumDeletesToMerge.size(), 1); + assertEquals( + cachingMergeContext.cachedNumDeletesToMerge.getOrDefault(null, -1), Integer.valueOf(1)); + } + + private static final class MockMergeContext implements MergePolicy.MergeContext { + int count = 0; + + @Override + public final int numDeletesToMerge(SegmentCommitInfo info) throws IOException { + this.count += 1; + return this.count; + } + + @Override + public int numDeletedDocs(SegmentCommitInfo info) { + return 0; + } + + @Override + public InfoStream getInfoStream() { + return null; + } + + @Override + public Set getMergingSegments() { + return null; + } + } +}