Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Replace boolean flags on IOContext with an enum. #13219

Merged
merged 4 commits into from
Mar 27, 2024
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.IndexFileNames;
import org.apache.lucene.index.SegmentReadState;
import org.apache.lucene.store.IOContext;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.util.Accountable;
import org.apache.lucene.util.Accountables;
Expand All @@ -53,7 +54,7 @@ public VariableGapTermsIndexReader(SegmentReadState state) throws IOException {
state.segmentInfo.name,
state.segmentSuffix,
VariableGapTermsIndexWriter.TERMS_INDEX_EXTENSION);
final IndexInput in = state.directory.openInput(fileName, state.context.toReadOnce());
final IndexInput in = state.directory.openInput(fileName, IOContext.READONCE);
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

A change to this class wouldn't be necessary anymore once #13216 is merged.

boolean success = false;

try {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ public FSTTermsReader(SegmentReadState state, PostingsReaderBase postingsReader)
state.segmentInfo.name, state.segmentSuffix, FSTTermsWriter.TERMS_EXTENSION);

this.postingsReader = postingsReader;
this.fstTermsInput = state.directory.openInput(termsFileName, IOContext.LOAD);
this.fstTermsInput = state.directory.openInput(termsFileName, IOContext.PRELOAD);

IndexInput in = this.fstTermsInput;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ public Lucene90PointsReader(SegmentReadState readState) throws IOException {

boolean success = false;
try {
indexIn = readState.directory.openInput(indexFileName, IOContext.LOAD);
indexIn = readState.directory.openInput(indexFileName, IOContext.PRELOAD);
CodecUtil.checkIndexHeader(
indexIn,
Lucene90PointsFormat.INDEX_CODEC_NAME,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -143,7 +143,7 @@ public Lucene90BlockTreeTermsReader(PostingsReaderBase postingsReader, SegmentRe

String indexName =
IndexFileNames.segmentFileName(segment, state.segmentSuffix, TERMS_INDEX_EXTENSION);
indexIn = state.directory.openInput(indexName, IOContext.LOAD);
indexIn = state.directory.openInput(indexName, IOContext.PRELOAD);
CodecUtil.checkIndexHeader(
indexIn,
TERMS_INDEX_CODEC_NAME,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ final class FieldsIndexReader extends FieldsIndex {
maxPointer = metaIn.readLong();

indexInput =
dir.openInput(IndexFileNames.segmentFileName(name, suffix, extension), IOContext.LOAD);
dir.openInput(IndexFileNames.segmentFileName(name, suffix, extension), IOContext.PRELOAD);
boolean success = false;
try {
CodecUtil.checkIndexHeader(
Expand Down
64 changes: 22 additions & 42 deletions lucene/core/src/java/org/apache/lucene/store/IOContext.java
Original file line number Diff line number Diff line change
Expand Up @@ -27,22 +27,10 @@
* @param context An object of a enumerator Context type
* @param mergeInfo must be given when {@code context == MERGE}
* @param flushInfo must be given when {@code context == FLUSH}
* @param readOnce This flag indicates that the file will be opened, then fully read sequentially
* then closed.
* @param load This flag is used for files that are a small fraction of the total index size and are
* expected to be heavily accessed in random-access fashion. Some {@link Directory}
* implementations may choose to load such files into physical memory (e.g. Java heap) as a way
* to provide stronger guarantees on query latency.
* @param randomAccess This flag indicates that the file will be accessed randomly. If this flag is
* set, then readOnce will be false.
* @param readAdvice Advice regarding the read access pattern
*/
public record IOContext(
Context context,
MergeInfo mergeInfo,
FlushInfo flushInfo,
boolean readOnce,
boolean load,
boolean randomAccess) {
Context context, MergeInfo mergeInfo, FlushInfo flushInfo, ReadAdvice readAdvice) {

/**
* Context is a enumerator which specifies the context in which the Directory is being used for.
Expand All @@ -54,58 +42,50 @@ public enum Context {
DEFAULT
};

public static final IOContext DEFAULT = new IOContext(Context.DEFAULT);
public static final IOContext DEFAULT =
new IOContext(Context.DEFAULT, null, null, ReadAdvice.NORMAL);

public static final IOContext READONCE = new IOContext(true, false, false);
public static final IOContext READONCE = new IOContext(ReadAdvice.SEQUENTIAL);

public static final IOContext READ = new IOContext(false, false, false);
public static final IOContext READ = new IOContext(ReadAdvice.NORMAL);

public static final IOContext LOAD = new IOContext(false, true, true);
public static final IOContext PRELOAD = new IOContext(ReadAdvice.RANDOM_PRELOAD);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This may need a MIGRATE.md entry.


public static final IOContext RANDOM = new IOContext(false, false, true);
public static final IOContext RANDOM = new IOContext(ReadAdvice.RANDOM);
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

FWIW kept these constant names as-is rather than align them with ReadAdvice constant names on purpose as they convey stronger expectations.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We can still refactor and rename them a bit. The LOAD should really be PRELOAD.


@SuppressWarnings("incomplete-switch")
public IOContext {
Objects.requireNonNull(context, "context must not be null");
Objects.requireNonNull(readAdvice, "readAdvice must not be null");
switch (context) {
case MERGE -> Objects.requireNonNull(
mergeInfo, "mergeInfo must not be null if context is MERGE");
case FLUSH -> Objects.requireNonNull(
flushInfo, "flushInfo must not be null if context is FLUSH");
}
if (load && readOnce) {
throw new IllegalArgumentException("load and readOnce are mutually exclusive");
if (context == Context.MERGE && readAdvice != ReadAdvice.SEQUENTIAL) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is really a good idea! It makes code much easier and the merge case needs no special handling in MMapDir.

throw new IllegalArgumentException(
"The MERGE context must use the SEQUENTIAL read access advice");
}
if (readOnce && randomAccess) {
throw new IllegalArgumentException("readOnce and randomAccess are mutually exclusive");
if ((context == Context.FLUSH || context == Context.DEFAULT)
&& readAdvice != ReadAdvice.NORMAL) {
throw new IllegalArgumentException(
"The FLUSH and DEFAULT contexts must use the NORMAL read access advice");
}
if (load && randomAccess == false) {
throw new IllegalArgumentException("cannot be load but not randomAccess");
}
}

private IOContext(boolean readOnce, boolean load, boolean randomAccess) {
this(Context.READ, null, null, readOnce, load, randomAccess);
}

private IOContext(Context context) {
this(context, null, null, false, false, false);
private IOContext(ReadAdvice accessAdvice) {
this(Context.READ, null, null, accessAdvice);
}

/** Creates an IOContext for flushing. */
public IOContext(FlushInfo flushInfo) {
this(Context.FLUSH, null, flushInfo, false, false, false);
this(Context.FLUSH, null, flushInfo, ReadAdvice.NORMAL);
}

/** Creates an IOContext for merging. */
public IOContext(MergeInfo mergeInfo) {
this(Context.MERGE, mergeInfo, null, false, false, false);
}

/**
* Return a copy of this IOContext with {@link #readOnce} set to {@code true}. The {@link #load}
* flag is set to {@code false}.
*/
public IOContext toReadOnce() {
return new IOContext(context, mergeInfo, flushInfo, true, false, randomAccess);
// Merges read input segments sequentially.
this(Context.MERGE, mergeInfo, null, ReadAdvice.SEQUENTIAL);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -85,10 +85,10 @@ public class MMapDirectory extends FSDirectory {

/**
* Argument for {@link #setPreload(BiPredicate)} that configures files to be preloaded upon
* opening them if they use the {@link IOContext#LOAD} I/O context.
* opening them if they use the {@link IOContext#PRELOAD} I/O context.
*/
public static final BiPredicate<String, IOContext> BASED_ON_LOAD_IO_CONTEXT =
(filename, context) -> context.load();
(filename, context) -> context.readAdvice() == ReadAdvice.RANDOM_PRELOAD;

private BiPredicate<String, IOContext> preload = NO_FILES;

Expand Down
44 changes: 44 additions & 0 deletions lucene/core/src/java/org/apache/lucene/store/ReadAdvice.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.store;

/** Advice regarding the read access pattern. */
public enum ReadAdvice {
/**
* Normal behavior. Data is expected to be read mostly sequentially. The system is expected to
* cache the hottest pages.
*/
NORMAL,
/**
* Data is expected to be read in a random-access fashion, either by {@link IndexInput#seek(long)
* seeking} often and reading relatively short sequences of bytes at once, or by reading data
* through the {@link RandomAccessInput} abstraction in random order.
*/
RANDOM,
/**
* Data is expected to be read sequentially with very little seeking at most. The system may read
* ahead aggressively and free pages soon after they are accessed.
*/
SEQUENTIAL,
/**
* Data is treated as random-access memory in practice. {@link Directory} implementations may
* explicitly load the content of the file in memory, or provide hints to the system so that it
* loads the content of the file into the page cache at open time. This should only be used on
* very small files that can be expected to fit in RAM with very high confidence.
*/
RANDOM_PRELOAD
}
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,14 @@ public IndexInput openInput(Path path, IOContext context, int chunkSizePower, bo
MemorySegmentIndexInput.newInstance(
resourceDescription,
arena,
map(arena, resourceDescription, fc, context, chunkSizePower, preload, fileSize),
map(
arena,
resourceDescription,
fc,
context.readAdvice(),
chunkSizePower,
preload,
fileSize),
fileSize,
chunkSizePower);
success = true;
Expand All @@ -78,7 +85,7 @@ private final MemorySegment[] map(
Arena arena,
String resourceDescription,
FileChannel fc,
IOContext context,
ReadAdvice readAdvice,
int chunkSizePower,
boolean preload,
long length)
Expand Down Expand Up @@ -108,7 +115,7 @@ private final MemorySegment[] map(
if (preload) {
segment.load();
} else if (nativeAccess.isPresent() && chunkSizePower >= 21) {
nativeAccess.get().madvise(segment, context);
nativeAccess.get().madvise(segment, readAdvice);
}
segments[segNr] = segment;
startOffset += segSize;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
abstract class NativeAccess {

/** Invoke the {@code madvise} call for the given {@link MemorySegment}. */
public abstract void madvise(MemorySegment segment, IOContext context) throws IOException;
public abstract void madvise(MemorySegment segment, ReadAdvice readAdvice) throws IOException;

/**
* Return the NativeAccess instance for this platform. At moment we only support Linux and MacOS
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,6 @@
import java.util.Locale;
import java.util.Optional;
import java.util.logging.Logger;
import org.apache.lucene.store.IOContext.Context;

@SuppressWarnings("preview")
final class PosixNativeAccess extends NativeAccess {
Expand Down Expand Up @@ -110,12 +109,12 @@ private static MethodHandle findFunction(
}

@Override
public void madvise(MemorySegment segment, IOContext context) throws IOException {
public void madvise(MemorySegment segment, ReadAdvice readAdvice) throws IOException {
// Note: madvise is bypassed if the segment should be preloaded via MemorySegment#load.
if (segment.byteSize() == 0L) {
return; // empty segments should be excluded, because they may have no address at all
}
final Integer advice = mapIOContext(context);
final Integer advice = mapReadAdvice(readAdvice);
if (advice == null) {
return; // do nothing
}
Expand All @@ -136,18 +135,12 @@ public void madvise(MemorySegment segment, IOContext context) throws IOException
}
}

private Integer mapIOContext(IOContext ctx) {
// Merging always wins and implies sequential access, because kernel is advised to free pages
// after use:
if (ctx.context() == Context.MERGE) {
return POSIX_MADV_SEQUENTIAL;
}
if (ctx.randomAccess()) {
return POSIX_MADV_RANDOM;
}
if (ctx.readOnce()) {
return POSIX_MADV_SEQUENTIAL;
}
return null;
private Integer mapReadAdvice(ReadAdvice readAdvice) {
return switch (readAdvice) {
case NORMAL -> null;
case RANDOM -> POSIX_MADV_RANDOM;
case SEQUENTIAL -> POSIX_MADV_SEQUENTIAL;
case RANDOM_PRELOAD -> null;
};
}
}