Skip to content

Commit

Permalink
feat: soft delete optimize (#12339)
Browse files Browse the repository at this point in the history
  • Loading branch information
fudongyingluck authored and jpountz committed Jun 9, 2023
1 parent 84ea3aa commit 1107aa2
Show file tree
Hide file tree
Showing 4 changed files with 141 additions and 4 deletions.
2 changes: 2 additions & 0 deletions lucene/CHANGES.txt
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,8 @@ Optimizations

* GITHUB#12328: Optimize ConjunctionDISI.createConjunction (Armin Braun)

* GITHUB#12339: Optimize part of duplicate calculation numDeletesToMerge in merge phase (fudongying)

Bug Fixes
---------------------

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.index;

import java.io.IOException;
import java.util.HashMap;
import java.util.Set;
import org.apache.lucene.util.InfoStream;

/**
* a wrapper of IndexWriter MergeContext. Try to cache the {@link
* #numDeletesToMerge(SegmentCommitInfo)} result in merge phase, to avoid duplicate calculation
*/
class CachingMergeContext implements MergePolicy.MergeContext {
final MergePolicy.MergeContext mergeContext;
final HashMap<SegmentCommitInfo, Integer> cachedNumDeletesToMerge = new HashMap<>();

CachingMergeContext(MergePolicy.MergeContext mergeContext) {
this.mergeContext = mergeContext;
}

@Override
public final int numDeletesToMerge(SegmentCommitInfo info) throws IOException {
Integer numDeletesToMerge = cachedNumDeletesToMerge.get(info);
if (numDeletesToMerge != null) {
return numDeletesToMerge;
}
numDeletesToMerge = mergeContext.numDeletesToMerge(info);
cachedNumDeletesToMerge.put(info, numDeletesToMerge);
return numDeletesToMerge;
}

@Override
public final int numDeletedDocs(SegmentCommitInfo info) {
return mergeContext.numDeletedDocs(info);
}

@Override
public final InfoStream getInfoStream() {
return mergeContext.getInfoStream();
}

@Override
public final Set<SegmentCommitInfo> getMergingSegments() {
return mergeContext.getMergingSegments();
}
}
13 changes: 9 additions & 4 deletions lucene/core/src/java/org/apache/lucene/index/IndexWriter.java
Original file line number Diff line number Diff line change
Expand Up @@ -2215,10 +2215,11 @@ public void forceMergeDeletes(boolean doWait) throws IOException {
}

final MergePolicy mergePolicy = config.getMergePolicy();
final CachingMergeContext cachingMergeContext = new CachingMergeContext(this);
MergePolicy.MergeSpecification spec;
boolean newMergesFound = false;
synchronized (this) {
spec = mergePolicy.findForcedDeletesMerges(segmentInfos, this);
spec = mergePolicy.findForcedDeletesMerges(segmentInfos, cachingMergeContext);
newMergesFound = spec != null;
if (newMergesFound) {
final int numMerges = spec.merges.size();
Expand Down Expand Up @@ -2328,14 +2329,18 @@ private synchronized MergePolicy.MergeSpecification updatePendingMerges(
}

final MergePolicy.MergeSpecification spec;
final CachingMergeContext cachingMergeContext = new CachingMergeContext(this);
if (maxNumSegments != UNBOUNDED_MAX_MERGE_SEGMENTS) {
assert trigger == MergeTrigger.EXPLICIT || trigger == MergeTrigger.MERGE_FINISHED
: "Expected EXPLICT or MERGE_FINISHED as trigger even with maxNumSegments set but was: "
+ trigger.name();

spec =
mergePolicy.findForcedMerges(
segmentInfos, maxNumSegments, Collections.unmodifiableMap(segmentsToMerge), this);
segmentInfos,
maxNumSegments,
Collections.unmodifiableMap(segmentsToMerge),
cachingMergeContext);
if (spec != null) {
final int numMerges = spec.merges.size();
for (int i = 0; i < numMerges; i++) {
Expand All @@ -2347,7 +2352,7 @@ private synchronized MergePolicy.MergeSpecification updatePendingMerges(
switch (trigger) {
case GET_READER:
case COMMIT:
spec = mergePolicy.findFullFlushMerges(trigger, segmentInfos, this);
spec = mergePolicy.findFullFlushMerges(trigger, segmentInfos, cachingMergeContext);
break;
case ADD_INDEXES:
throw new IllegalStateException(
Expand All @@ -2359,7 +2364,7 @@ private synchronized MergePolicy.MergeSpecification updatePendingMerges(
case SEGMENT_FLUSH:
case CLOSING:
default:
spec = mergePolicy.findMerges(trigger, segmentInfos, this);
spec = mergePolicy.findMerges(trigger, segmentInfos, cachingMergeContext);
}
}
if (spec != null) {
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.index;

import java.io.IOException;
import java.util.Set;
import org.apache.lucene.tests.util.LuceneTestCase;
import org.apache.lucene.util.InfoStream;

public class TestCachingMergeContext extends LuceneTestCase {
public void testNumDeletesToMerge() throws IOException {
MockMergeContext mergeContext = new MockMergeContext();
CachingMergeContext cachingMergeContext = new CachingMergeContext(mergeContext);
assertEquals(cachingMergeContext.numDeletesToMerge(null), 1);
assertEquals(cachingMergeContext.cachedNumDeletesToMerge.size(), 1);
assertEquals(
cachingMergeContext.cachedNumDeletesToMerge.getOrDefault(null, -1), Integer.valueOf(1));
assertEquals(mergeContext.count, 1);

// increase the mock count
mergeContext.numDeletesToMerge(null);
assertEquals(mergeContext.count, 2);

// assert the cache result still one
assertEquals(cachingMergeContext.numDeletesToMerge(null), 1);
assertEquals(cachingMergeContext.cachedNumDeletesToMerge.size(), 1);
assertEquals(
cachingMergeContext.cachedNumDeletesToMerge.getOrDefault(null, -1), Integer.valueOf(1));
}

private static final class MockMergeContext implements MergePolicy.MergeContext {
int count = 0;

@Override
public final int numDeletesToMerge(SegmentCommitInfo info) throws IOException {
this.count += 1;
return this.count;
}

@Override
public int numDeletedDocs(SegmentCommitInfo info) {
return 0;
}

@Override
public InfoStream getInfoStream() {
return null;
}

@Override
public Set<SegmentCommitInfo> getMergingSegments() {
return null;
}
}
}

0 comments on commit 1107aa2

Please sign in to comment.