diff --git a/presto-common/src/main/java/com/facebook/presto/common/Page.java b/presto-common/src/main/java/com/facebook/presto/common/Page.java index 31d5e062cbaae..d7ccfe7fac33f 100644 --- a/presto-common/src/main/java/com/facebook/presto/common/Page.java +++ b/presto-common/src/main/java/com/facebook/presto/common/Page.java @@ -21,13 +21,17 @@ import java.util.ArrayList; import java.util.Arrays; import java.util.HashMap; +import java.util.IdentityHashMap; import java.util.List; import java.util.Map; +import java.util.Set; +import java.util.concurrent.atomic.AtomicLong; import static com.facebook.presto.common.block.DictionaryId.randomDictionaryId; import static io.airlift.slice.SizeOf.sizeOf; import static java.lang.Math.min; import static java.lang.String.format; +import static java.util.Collections.newSetFromMap; import static java.util.Objects.requireNonNull; public final class Page @@ -418,12 +422,17 @@ public Page dropColumn(int channelIndex) private long updateRetainedSize() { - long retainedSizeInBytes = INSTANCE_SIZE + sizeOf(blocks); + AtomicLong retainedSizeInBytes = new AtomicLong(INSTANCE_SIZE + sizeOf(blocks)); + Set referenceSet = newSetFromMap(new IdentityHashMap<>()); for (Block block : blocks) { - retainedSizeInBytes += block.getRetainedSizeInBytes(); + block.retainedBytesForEachPart((object, size) -> { + if (referenceSet.add(object)) { + retainedSizeInBytes.addAndGet(size); + } + }); } - this.retainedSizeInBytes = retainedSizeInBytes; - return retainedSizeInBytes; + this.retainedSizeInBytes = retainedSizeInBytes.longValue(); + return retainedSizeInBytes.longValue(); } private static class DictionaryBlockIndexes diff --git a/presto-common/src/main/java/com/facebook/presto/common/block/VariableWidthBlock.java b/presto-common/src/main/java/com/facebook/presto/common/block/VariableWidthBlock.java index fc9f636476287..eeaacf44c68e7 100644 --- a/presto-common/src/main/java/com/facebook/presto/common/block/VariableWidthBlock.java +++ b/presto-common/src/main/java/com/facebook/presto/common/block/VariableWidthBlock.java @@ -35,6 +35,7 @@ import static com.facebook.presto.common.block.BlockUtil.compactSlice; import static com.facebook.presto.common.block.BlockUtil.internalPositionInRange; import static io.airlift.slice.SizeOf.sizeOf; +import static io.airlift.slice.Slices.EMPTY_SLICE; import static java.lang.String.format; public class VariableWidthBlock @@ -164,7 +165,16 @@ public long getRetainedSizeInBytes() @Override public void retainedBytesForEachPart(ObjLongConsumer consumer) { - consumer.accept(slice, slice.getRetainedSize()); + // For VariableWidthBlocks created from deserialized pages, it refers to byte array of whole page. + // When a page size is calculated, this byte array gets counted x number of times resulting in incorrect page size + // This problem is solved by accounting slice memory & underlying byte array separately while ensuring same object is counted once + if (slice.getBase() != null && slice.hasByteArray()) { + consumer.accept(slice, EMPTY_SLICE.getRetainedSize()); + consumer.accept(slice.getBase(), sizeOf((byte[]) slice.getBase())); + } + else { + consumer.accept(slice, slice.getRetainedSize()); + } consumer.accept(offsets, sizeOf(offsets)); if (valueIsNull != null) { consumer.accept(valueIsNull, sizeOf(valueIsNull)); diff --git a/presto-common/src/test/java/com/facebook/presto/common/TestPage.java b/presto-common/src/test/java/com/facebook/presto/common/TestPage.java index 1c2222dd87670..c9559d304683a 100644 --- a/presto-common/src/test/java/com/facebook/presto/common/TestPage.java +++ b/presto-common/src/test/java/com/facebook/presto/common/TestPage.java @@ -17,13 +17,18 @@ import com.facebook.presto.common.block.BlockBuilder; import com.facebook.presto.common.block.DictionaryBlock; import com.facebook.presto.common.block.DictionaryId; +import com.facebook.presto.common.block.VariableWidthBlock; import io.airlift.slice.DynamicSliceOutput; import io.airlift.slice.Slice; import org.testng.annotations.Test; +import java.util.UUID; +import java.util.stream.LongStream; + import static com.facebook.presto.common.block.DictionaryId.randomDictionaryId; import static com.facebook.presto.common.type.BigintType.BIGINT; import static com.facebook.presto.common.type.VarbinaryType.VARBINARY; +import static com.facebook.presto.common.type.VarcharType.VARCHAR; import static com.google.common.base.Preconditions.checkArgument; import static com.google.common.base.Verify.verify; import static org.testng.Assert.assertEquals; @@ -131,6 +136,24 @@ public void testGetPositions() } } + @Test + public void testRetainedSizeIsCorrect() + { + BlockBuilder variableWidthBlockBuilder = VARCHAR.createBlockBuilder(null, 256); + + LongStream.range(0, 100).forEach(value -> VARCHAR.writeString(variableWidthBlockBuilder, UUID.randomUUID().toString())); + VariableWidthBlock variableWidthBlock = (VariableWidthBlock) variableWidthBlockBuilder.build(); + Page page = new Page( + variableWidthBlock, // Original block + variableWidthBlock, // Same block twice + variableWidthBlock.getRegion(0, 50), // Block with same underlying slice + variableWidthBlockBuilder.getRegion(51, 25)); // Block with slice having same underlying base object/byte array + // Account for extra overhead of objects to be around 20%. + // Close attention should be paid when this needs to be updated to 2x or higher as that case may introduce double counting + double expectedMaximumSizeOfPage = variableWidthBlock.getRawSlice(0).getRetainedSize() * 1.2; + assertTrue(page.getRetainedSizeInBytes() < expectedMaximumSizeOfPage, "Expected slice & underlying object to be counted once"); + } + private static Slice[] createExpectedValues(int positionCount) { Slice[] expectedValues = new Slice[positionCount]; diff --git a/presto-main/src/test/java/com/facebook/presto/execution/buffer/TestPagesSerde.java b/presto-main/src/test/java/com/facebook/presto/execution/buffer/TestPagesSerde.java index c4d1872d982f1..d78a97821bb41 100644 --- a/presto-main/src/test/java/com/facebook/presto/execution/buffer/TestPagesSerde.java +++ b/presto-main/src/test/java/com/facebook/presto/execution/buffer/TestPagesSerde.java @@ -25,6 +25,8 @@ import java.util.Iterator; import java.util.List; +import java.util.UUID; +import java.util.stream.LongStream; import static com.facebook.presto.common.type.BigintType.BIGINT; import static com.facebook.presto.common.type.VarcharType.VARCHAR; @@ -33,6 +35,7 @@ import static com.facebook.presto.spi.page.PagesSerdeUtil.writePages; import static org.testng.Assert.assertEquals; import static org.testng.Assert.assertFalse; +import static org.testng.Assert.assertTrue; public class TestPagesSerde { @@ -107,6 +110,34 @@ public void testVarcharSerializedSize() assertEquals(secondValueSize, 4 + 3); // length + "bob" (null shared with first entry) } + @Test + public void testRoundTripSizeForCompactPageStaysWithinTwentyPercent() + { + PagesSerde serde = new TestingPagesSerdeFactory().createPagesSerde(); + BlockBuilder variableWidthBlockBuilder1 = VARCHAR.createBlockBuilder(null, 128); + BlockBuilder variableWidthBlockBuilder2 = VARCHAR.createBlockBuilder(null, 256); + BlockBuilder bigintBlockBuilder = BIGINT.createBlockBuilder(null, 128); + Block emptyVariableWidthBlock = VARCHAR.createBlockBuilder(null, 128).build(); + + LongStream.range(0, 100).forEach(value -> { + VARCHAR.writeString(variableWidthBlockBuilder1, UUID.randomUUID().toString()); + VARCHAR.writeString(variableWidthBlockBuilder2, UUID.randomUUID().toString()); + VARCHAR.writeString(variableWidthBlockBuilder2, UUID.randomUUID().toString()); + BIGINT.writeLong(bigintBlockBuilder, value); + }); + Page compactPage = new Page( + emptyVariableWidthBlock, + variableWidthBlockBuilder1.build(), + variableWidthBlockBuilder2.build(), + bigintBlockBuilder.build()) + .compact(); + Page deserializedPage = serde.deserialize(serde.serialize(compactPage)); + + double expectedMaxSize = compactPage.getRetainedSizeInBytes() * 1.2; // 120% + double actualSize = deserializedPage.getRetainedSizeInBytes(); + assertTrue(actualSize < expectedMaxSize, "Expected round trip size difference less than 20% of original page"); + } + private static int serializedSize(List types, Page expectedPage) { PagesSerde serde = new TestingPagesSerdeFactory().createPagesSerde();