Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Better paging when random reads go backwards #12357

Merged
merged 8 commits into from
Jun 9, 2023
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ public abstract class BufferedIndexInput extends IndexInput implements RandomAcc
/** A buffer size for merges set to {@value #MERGE_BUFFER_SIZE}. */
public static final int MERGE_BUFFER_SIZE = 4096;

private int bufferSize = BUFFER_SIZE;
private final int bufferSize;
romseygeek marked this conversation as resolved.
Show resolved Hide resolved

private ByteBuffer buffer = EMPTY_BYTEBUFFER;

Expand Down Expand Up @@ -72,7 +72,7 @@ public BufferedIndexInput(String resourceDesc, int bufferSize) {
this.bufferSize = bufferSize;
}

/** Returns buffer size. @see #setBufferSize */
/** Returns buffer size */
public final int getBufferSize() {
return bufferSize;
}
Expand Down Expand Up @@ -163,11 +163,14 @@ public final long readLong() throws IOException {
public final byte readByte(long pos) throws IOException {
long index = pos - bufferStart;
if (index < 0 || index >= buffer.limit()) {
bufferStart = pos;
// if we're moving backwards, then try and fill up the previous page rather than
// starting again at the current pos, to avoid successive backwards reads reloading
// the same data over and over again
bufferStart = index < 0 ? Math.min(pos, Math.max(0, bufferStart - bufferSize)) : pos;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Let's add a comment explaining that this is trying to avoid backward reading being an adversarial case?

buffer.limit(0); // trigger refill() on read
seekInternal(pos);
seekInternal(bufferStart);
refill();
index = 0;
index = pos - bufferStart;
}
return buffer.get((int) index);
}
Expand All @@ -176,11 +179,17 @@ public final byte readByte(long pos) throws IOException {
public final short readShort(long pos) throws IOException {
long index = pos - bufferStart;
if (index < 0 || index >= buffer.limit() - 1) {
bufferStart = pos;
// if we're moving backwards, then try and fill up the previous page rather than
// starting again at the current pos, to avoid successive backwards reads reloading
// the same data over and over again
bufferStart = index < 0 ? Math.min(pos, Math.max(0, bufferStart - bufferSize)) : pos;
if (bufferStart + bufferSize - pos <= 1) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nit: use Short.BYTES to make it clearier we're checking we can read a whole short?

Suggested change
if (bufferStart + bufferSize - pos <= 1) {
if (bufferStart + bufferSize - pos < Short.BYTES) {

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If I'm not mistaken, this can only happen when index < 0 so maybe we should rewrite the above ternary expression into an if/else block and only have the above check when index < 0?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I've rewritten this to extract a maybeRefillBuffer() method that should be easier to read.

bufferStart++; // adjust buffer to include the whole short value at pos
}
buffer.limit(0); // trigger refill() on read
seekInternal(pos);
seekInternal(bufferStart);
refill();
index = 0;
index = pos - bufferStart;
}
return buffer.getShort((int) index);
}
Expand All @@ -189,11 +198,17 @@ public final short readShort(long pos) throws IOException {
public final int readInt(long pos) throws IOException {
long index = pos - bufferStart;
if (index < 0 || index >= buffer.limit() - 3) {
bufferStart = pos;
// if we're moving backwards, then try and fill up the previous page rather than
// starting again at the current pos, to avoid successive backwards reads reloading
// the same data over and over again
bufferStart = index < 0 ? Math.min(pos, Math.max(0, bufferStart - bufferSize)) : pos;
if (bufferStart + bufferSize - pos <= 3) {
bufferStart += 4; // adjust buffer to include the whole int value at pos
}
buffer.limit(0); // trigger refill() on read
seekInternal(pos);
seekInternal(bufferStart);
refill();
index = 0;
index = pos - bufferStart;
}
return buffer.getInt((int) index);
}
Expand All @@ -202,11 +217,17 @@ public final int readInt(long pos) throws IOException {
public final long readLong(long pos) throws IOException {
long index = pos - bufferStart;
if (index < 0 || index >= buffer.limit() - 7) {
bufferStart = pos;
// if we're moving backwards, then try and fill up the previous page rather than
// starting again at the current pos, to avoid successive backwards reads reloading
// the same data over and over again
bufferStart = index < 0 ? Math.min(pos, Math.max(0, bufferStart - bufferSize)) : pos;
if (bufferStart + bufferSize - pos <= 7) {
bufferStart += 8; // adjust buffer to include the whole long value at pos
}
buffer.limit(0); // trigger refill() on read
seekInternal(pos);
seekInternal(bufferStart);
refill();
index = 0;
index = pos - bufferStart;
}
return buffer.getLong((int) index);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@

import java.io.IOException;
import java.nio.ByteBuffer;
import java.nio.ByteOrder;
import java.util.Random;
import org.apache.lucene.tests.util.LuceneTestCase;
import org.apache.lucene.util.ArrayUtil;
Expand Down Expand Up @@ -142,6 +143,69 @@ public void testEOF() throws Exception {
});
}

// Test that when reading backwards, we page backwards rather than refilling
// on every call
public void testBackwardsByteReads() throws IOException {
MyBufferedIndexInput input = new MyBufferedIndexInput(1024 * 8);
for (int i = 2048; i > 0; i -= random().nextInt(16)) {
assertEquals(byten(i), input.readByte(i));
}
assertEquals(3, input.readCount);
}

public void testBackwardsShortReads() throws IOException {
MyBufferedIndexInput input = new MyBufferedIndexInput(1024 * 8);
ByteBuffer bb = ByteBuffer.allocate(2);
bb.order(ByteOrder.LITTLE_ENDIAN);
for (int i = 2048; i > 0; i -= (random().nextInt(16) + 1)) {
bb.clear();
bb.put(byten(i));
bb.put(byten(i + 1));
assertEquals(bb.getShort(0), input.readShort(i));
}
// readCount can be three or four, depending on whether or not we had to adjust the bufferStart
// to include a whole short
assertTrue(input.readCount == 4 || input.readCount == 3);
}

public void testBackwardsIntReads() throws IOException {
MyBufferedIndexInput input = new MyBufferedIndexInput(1024 * 8);
ByteBuffer bb = ByteBuffer.allocate(4);
bb.order(ByteOrder.LITTLE_ENDIAN);
for (int i = 2048; i > 0; i -= (random().nextInt(16) + 3)) {
bb.clear();
bb.put(byten(i));
bb.put(byten(i + 1));
bb.put(byten(i + 2));
bb.put(byten(i + 3));
assertEquals(bb.getInt(0), input.readInt(i));
}
// readCount can be three or four, depending on whether or not we had to adjust the bufferStart
// to include a whole int
assertTrue(input.readCount == 4 || input.readCount == 3);
}

public void testBackwardsLongReads() throws IOException {
MyBufferedIndexInput input = new MyBufferedIndexInput(1024 * 8);
ByteBuffer bb = ByteBuffer.allocate(8);
bb.order(ByteOrder.LITTLE_ENDIAN);
for (int i = 2048; i > 0; i -= (random().nextInt(16) + 7)) {
bb.clear();
bb.put(byten(i));
bb.put(byten(i + 1));
bb.put(byten(i + 2));
bb.put(byten(i + 3));
bb.put(byten(i + 4));
bb.put(byten(i + 5));
bb.put(byten(i + 6));
bb.put(byten(i + 7));
assertEquals(bb.getLong(0), input.readLong(i));
}
// readCount can be three or four, depending on whether or not we had to adjust the bufferStart
// to include a whole long
assertTrue(input.readCount == 4 || input.readCount == 3);
}

// byten emulates a file - byten(n) returns the n'th byte in that file.
// MyBufferedIndexInput reads this "file".
private static byte byten(long n) {
Expand All @@ -150,7 +214,8 @@ private static byte byten(long n) {

private static class MyBufferedIndexInput extends BufferedIndexInput {
private long pos;
private long len;
private final long len;
private long readCount = 0;

public MyBufferedIndexInput(long len) {
super("MyBufferedIndexInput(len=" + len + ")", BufferedIndexInput.BUFFER_SIZE);
Expand All @@ -164,14 +229,15 @@ public MyBufferedIndexInput() {
}

@Override
protected void readInternal(ByteBuffer b) throws IOException {
protected void readInternal(ByteBuffer b) {
readCount++;
while (b.hasRemaining()) {
b.put(byten(pos++));
}
}

@Override
protected void seekInternal(long pos) throws IOException {
protected void seekInternal(long pos) {
this.pos = pos;
}

Expand Down