Skip to content

Commit

Permalink
Fix page retained size by accounting slice base object once for a page
Browse files Browse the repository at this point in the history
Fix page retained size by accounting slice base object once for a page

Co-Authored-By: Karol Sobczak <napewnotrafi@gmail.com>
  • Loading branch information
2 people authored and highker committed Apr 15, 2022
1 parent fb68c37 commit bdb2184
Show file tree
Hide file tree
Showing 4 changed files with 78 additions and 5 deletions.
17 changes: 13 additions & 4 deletions presto-common/src/main/java/com/facebook/presto/common/Page.java
Original file line number Diff line number Diff line change
Expand Up @@ -21,13 +21,17 @@
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.IdentityHashMap;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.atomic.AtomicLong;

import static com.facebook.presto.common.block.DictionaryId.randomDictionaryId;
import static io.airlift.slice.SizeOf.sizeOf;
import static java.lang.Math.min;
import static java.lang.String.format;
import static java.util.Collections.newSetFromMap;
import static java.util.Objects.requireNonNull;

public final class Page
Expand Down Expand Up @@ -418,12 +422,17 @@ public Page dropColumn(int channelIndex)

private long updateRetainedSize()
{
long retainedSizeInBytes = INSTANCE_SIZE + sizeOf(blocks);
AtomicLong retainedSizeInBytes = new AtomicLong(INSTANCE_SIZE + sizeOf(blocks));
Set<Object> referenceSet = newSetFromMap(new IdentityHashMap<>());
for (Block block : blocks) {
retainedSizeInBytes += block.getRetainedSizeInBytes();
block.retainedBytesForEachPart((object, size) -> {
if (referenceSet.add(object)) {
retainedSizeInBytes.addAndGet(size);
}
});
}
this.retainedSizeInBytes = retainedSizeInBytes;
return retainedSizeInBytes;
this.retainedSizeInBytes = retainedSizeInBytes.longValue();
return retainedSizeInBytes.longValue();
}

private static class DictionaryBlockIndexes
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@
import static com.facebook.presto.common.block.BlockUtil.compactSlice;
import static com.facebook.presto.common.block.BlockUtil.internalPositionInRange;
import static io.airlift.slice.SizeOf.sizeOf;
import static io.airlift.slice.Slices.EMPTY_SLICE;
import static java.lang.String.format;

public class VariableWidthBlock
Expand Down Expand Up @@ -164,7 +165,16 @@ public long getRetainedSizeInBytes()
@Override
public void retainedBytesForEachPart(ObjLongConsumer<Object> consumer)
{
consumer.accept(slice, slice.getRetainedSize());
// For VariableWidthBlocks created from deserialized pages, it refers to byte array of whole page.
// When a page size is calculated, this byte array gets counted x number of times resulting in incorrect page size
// This problem is solved by accounting slice memory & underlying byte array separately while ensuring same object is counted once
if (slice.getBase() != null && slice.hasByteArray()) {
consumer.accept(slice, EMPTY_SLICE.getRetainedSize());
consumer.accept(slice.getBase(), sizeOf((byte[]) slice.getBase()));
}
else {
consumer.accept(slice, slice.getRetainedSize());
}
consumer.accept(offsets, sizeOf(offsets));
if (valueIsNull != null) {
consumer.accept(valueIsNull, sizeOf(valueIsNull));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,13 +17,18 @@
import com.facebook.presto.common.block.BlockBuilder;
import com.facebook.presto.common.block.DictionaryBlock;
import com.facebook.presto.common.block.DictionaryId;
import com.facebook.presto.common.block.VariableWidthBlock;
import io.airlift.slice.DynamicSliceOutput;
import io.airlift.slice.Slice;
import org.testng.annotations.Test;

import java.util.UUID;
import java.util.stream.LongStream;

import static com.facebook.presto.common.block.DictionaryId.randomDictionaryId;
import static com.facebook.presto.common.type.BigintType.BIGINT;
import static com.facebook.presto.common.type.VarbinaryType.VARBINARY;
import static com.facebook.presto.common.type.VarcharType.VARCHAR;
import static com.google.common.base.Preconditions.checkArgument;
import static com.google.common.base.Verify.verify;
import static org.testng.Assert.assertEquals;
Expand Down Expand Up @@ -131,6 +136,24 @@ public void testGetPositions()
}
}

@Test
public void testRetainedSizeIsCorrect()
{
BlockBuilder variableWidthBlockBuilder = VARCHAR.createBlockBuilder(null, 256);

LongStream.range(0, 100).forEach(value -> VARCHAR.writeString(variableWidthBlockBuilder, UUID.randomUUID().toString()));
VariableWidthBlock variableWidthBlock = (VariableWidthBlock) variableWidthBlockBuilder.build();
Page page = new Page(
variableWidthBlock, // Original block
variableWidthBlock, // Same block twice
variableWidthBlock.getRegion(0, 50), // Block with same underlying slice
variableWidthBlockBuilder.getRegion(51, 25)); // Block with slice having same underlying base object/byte array
// Account for extra overhead of objects to be around 20%.
// Close attention should be paid when this needs to be updated to 2x or higher as that case may introduce double counting
double expectedMaximumSizeOfPage = variableWidthBlock.getRawSlice(0).getRetainedSize() * 1.2;
assertTrue(page.getRetainedSizeInBytes() < expectedMaximumSizeOfPage, "Expected slice & underlying object to be counted once");
}

private static Slice[] createExpectedValues(int positionCount)
{
Slice[] expectedValues = new Slice[positionCount];
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,8 @@

import java.util.Iterator;
import java.util.List;
import java.util.UUID;
import java.util.stream.LongStream;

import static com.facebook.presto.common.type.BigintType.BIGINT;
import static com.facebook.presto.common.type.VarcharType.VARCHAR;
Expand All @@ -33,6 +35,7 @@
import static com.facebook.presto.spi.page.PagesSerdeUtil.writePages;
import static org.testng.Assert.assertEquals;
import static org.testng.Assert.assertFalse;
import static org.testng.Assert.assertTrue;

public class TestPagesSerde
{
Expand Down Expand Up @@ -107,6 +110,34 @@ public void testVarcharSerializedSize()
assertEquals(secondValueSize, 4 + 3); // length + "bob" (null shared with first entry)
}

@Test
public void testRoundTripSizeForCompactPageStaysWithinTwentyPercent()
{
PagesSerde serde = new TestingPagesSerdeFactory().createPagesSerde();
BlockBuilder variableWidthBlockBuilder1 = VARCHAR.createBlockBuilder(null, 128);
BlockBuilder variableWidthBlockBuilder2 = VARCHAR.createBlockBuilder(null, 256);
BlockBuilder bigintBlockBuilder = BIGINT.createBlockBuilder(null, 128);
Block emptyVariableWidthBlock = VARCHAR.createBlockBuilder(null, 128).build();

LongStream.range(0, 100).forEach(value -> {
VARCHAR.writeString(variableWidthBlockBuilder1, UUID.randomUUID().toString());
VARCHAR.writeString(variableWidthBlockBuilder2, UUID.randomUUID().toString());
VARCHAR.writeString(variableWidthBlockBuilder2, UUID.randomUUID().toString());
BIGINT.writeLong(bigintBlockBuilder, value);
});
Page compactPage = new Page(
emptyVariableWidthBlock,
variableWidthBlockBuilder1.build(),
variableWidthBlockBuilder2.build(),
bigintBlockBuilder.build())
.compact();
Page deserializedPage = serde.deserialize(serde.serialize(compactPage));

double expectedMaxSize = compactPage.getRetainedSizeInBytes() * 1.2; // 120%
double actualSize = deserializedPage.getRetainedSizeInBytes();
assertTrue(actualSize < expectedMaxSize, "Expected round trip size difference less than 20% of original page");
}

private static int serializedSize(List<? extends Type> types, Page expectedPage)
{
PagesSerde serde = new TestingPagesSerdeFactory().createPagesSerde();
Expand Down

0 comments on commit bdb2184

Please sign in to comment.