Skip to content

Commit

Permalink
Extend Block interface to include fixed width size information
Browse files Browse the repository at this point in the history
Adds a positionCount argument to Block#getPositionSizeInBytes
and adds a new method: Block#fixedSizeInBytesPerPosition() to
reduce the overhead associated with calculating DictionaryBlock
size in bytes when the underlying dictionary size in bytes can
be calculated without specific information about which positions
are referenced.
  • Loading branch information
pettyjamesm authored and losipiuk committed Mar 1, 2022
1 parent 3010518 commit 5a43d04
Show file tree
Hide file tree
Showing 34 changed files with 552 additions and 187 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
import org.openjdk.jol.info.ClassLayout;

import java.util.List;
import java.util.OptionalInt;
import java.util.function.BiConsumer;

import static com.google.common.base.MoreObjects.toStringHelper;
Expand Down Expand Up @@ -64,9 +65,15 @@ public long getRegionSizeInBytes(int positionOffset, int length)
}

@Override
public long getPositionsSizeInBytes(boolean[] positions)
public OptionalInt fixedSizeInBytesPerPosition()
{
return block.getPositionsSizeInBytes(positions);
return block.fixedSizeInBytesPerPosition();
}

@Override
public long getPositionsSizeInBytes(boolean[] positions, int selectedPositionCount)
{
return block.getPositionsSizeInBytes(positions, selectedPositionCount);
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -223,11 +223,11 @@ private void assertBlockSize(Block block)

boolean[] positions = new boolean[block.getPositionCount()];
fill(positions, 0, firstHalf.getPositionCount(), true);
assertEquals(block.getPositionsSizeInBytes(positions), expectedFirstHalfSize);
assertEquals(block.getPositionsSizeInBytes(positions, firstHalf.getPositionCount()), expectedFirstHalfSize);
fill(positions, true);
assertEquals(block.getPositionsSizeInBytes(positions), expectedBlockSize);
assertEquals(block.getPositionsSizeInBytes(positions, positions.length), expectedBlockSize);
fill(positions, 0, firstHalf.getPositionCount(), false);
assertEquals(block.getPositionsSizeInBytes(positions), expectedSecondHalfSize);
assertEquals(block.getPositionsSizeInBytes(positions, positions.length - firstHalf.getPositionCount()), expectedSecondHalfSize);
}

// expectedValueType is required since otherwise the expected value type is unknown when expectedValue is null.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -252,12 +252,12 @@ public void testNestedCompact()

assertEquals(
dictionary.getSizeInBytes(),
valuesBlock.getPositionsSizeInBytes(new boolean[] {true, false, true, false, false, false}) + 4 * Integer.BYTES);
valuesBlock.getPositionsSizeInBytes(new boolean[] {true, false, true, false, false, false}, 2) + 4 * Integer.BYTES);
assertFalse(dictionary.isCompact());

assertEquals(
dictionaryWithAllPositionsUsed.getSizeInBytes(),
valuesBlock.getPositionsSizeInBytes(new boolean[] {true, true, true, false, true, true}) + 6 * Integer.BYTES);
valuesBlock.getPositionsSizeInBytes(new boolean[] {true, true, true, false, true, true}, 5) + 6 * Integer.BYTES);
// dictionary is not compact (even though all positions were used) because it's unnested
assertFalse(dictionaryWithAllPositionsUsed.isCompact());

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
import org.testng.annotations.Test;

import static org.testng.Assert.assertEquals;
import static org.testng.Assert.assertTrue;

public class TestRunLengthEncodedBlock
extends AbstractTestBlock
Expand Down Expand Up @@ -61,6 +62,26 @@ private static BlockBuilder createBlockBuilder()
return new VariableWidthBlockBuilder(null, 1, 1);
}

@Test
public void testPositionsSizeInBytes()
{
Block valueBlock = createSingleValueBlock(createExpectedValue(10));
Block rleBlock = new RunLengthEncodedBlock(valueBlock, 10);
// Size in bytes is not fixed per position
assertTrue(rleBlock.fixedSizeInBytesPerPosition().isEmpty());
// Accepts specific position selection
boolean[] positions = new boolean[rleBlock.getPositionCount()];
positions[0] = true;
positions[1] = true;
assertEquals(rleBlock.getPositionsSizeInBytes(positions, 2), valueBlock.getSizeInBytes());
// Accepts null positions array with count only
assertEquals(rleBlock.getPositionsSizeInBytes(null, 2), valueBlock.getSizeInBytes());
// Always reports the same size in bytes regardless of positions
for (int positionCount = 0; positionCount < rleBlock.getPositionCount(); positionCount++) {
assertEquals(rleBlock.getPositionsSizeInBytes(null, positionCount), valueBlock.getSizeInBytes());
}
}

@Test
public void testBuildingFromLongArrayBlockBuilder()
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,13 +16,16 @@
import javax.annotation.Nullable;

import java.util.List;
import java.util.OptionalInt;

import static io.trino.spi.block.ArrayBlock.createArrayBlockInternal;
import static io.trino.spi.block.BlockUtil.checkArrayRange;
import static io.trino.spi.block.BlockUtil.checkValidPositions;
import static io.trino.spi.block.BlockUtil.checkValidRegion;
import static io.trino.spi.block.BlockUtil.compactArray;
import static io.trino.spi.block.BlockUtil.compactOffsets;
import static io.trino.spi.block.BlockUtil.countAndMarkSelectedPositionsFromOffsets;
import static io.trino.spi.block.BlockUtil.countSelectedPositionsFromOffsets;
import static java.util.Collections.singletonList;

public abstract class AbstractArrayBlock
Expand Down Expand Up @@ -103,6 +106,12 @@ public Block getRegion(int position, int length)
getRawElementBlock());
}

@Override
public OptionalInt fixedSizeInBytesPerPosition()
{
return OptionalInt.empty(); // size per position is variable based on the number of entries in each array
}

@Override
public long getRegionSizeInBytes(int position, int length)
{
Expand All @@ -116,22 +125,36 @@ public long getRegionSizeInBytes(int position, int length)
}

@Override
public long getPositionsSizeInBytes(boolean[] positions)
{
checkValidPositions(positions, getPositionCount());
boolean[] used = new boolean[getRawElementBlock().getPositionCount()];
int usedPositionCount = 0;
for (int i = 0; i < positions.length; ++i) {
if (positions[i]) {
usedPositionCount++;
int valueStart = getOffsets()[getOffsetBase() + i];
int valueEnd = getOffsets()[getOffsetBase() + i + 1];
for (int j = valueStart; j < valueEnd; ++j) {
used[j] = true;
}
}
public final long getPositionsSizeInBytes(boolean[] positions, int selectedArrayPositions)
{
int positionCount = getPositionCount();
checkValidPositions(positions, positionCount);
if (selectedArrayPositions == 0) {
return 0;
}
if (selectedArrayPositions == positionCount) {
return getSizeInBytes();
}

Block rawElementBlock = getRawElementBlock();
OptionalInt fixedPerElementSizeInBytes = rawElementBlock.fixedSizeInBytesPerPosition();
int[] offsets = getOffsets();
int offsetBase = getOffsetBase();
long elementsSizeInBytes;

if (fixedPerElementSizeInBytes.isPresent()) {
elementsSizeInBytes = fixedPerElementSizeInBytes.getAsInt() * (long) countSelectedPositionsFromOffsets(positions, offsets, offsetBase);
}
else if (rawElementBlock instanceof RunLengthEncodedBlock) {
// RLE blocks don't have fixed size per position, but accept null for the positions array
elementsSizeInBytes = rawElementBlock.getPositionsSizeInBytes(null, countSelectedPositionsFromOffsets(positions, offsets, offsetBase));
}
else {
boolean[] selectedElements = new boolean[rawElementBlock.getPositionCount()];
int selectedElementCount = countAndMarkSelectedPositionsFromOffsets(positions, offsets, offsetBase, selectedElements);
elementsSizeInBytes = rawElementBlock.getPositionsSizeInBytes(selectedElements, selectedElementCount);
}
return getRawElementBlock().getPositionsSizeInBytes(used) + ((Integer.BYTES + Byte.BYTES) * (long) usedPositionCount);
return elementsSizeInBytes + ((Integer.BYTES + Byte.BYTES) * (long) selectedArrayPositions);
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,12 +21,15 @@
import java.util.Arrays;
import java.util.List;
import java.util.Optional;
import java.util.OptionalInt;

import static io.trino.spi.block.BlockUtil.checkArrayRange;
import static io.trino.spi.block.BlockUtil.checkValidPositions;
import static io.trino.spi.block.BlockUtil.checkValidRegion;
import static io.trino.spi.block.BlockUtil.compactArray;
import static io.trino.spi.block.BlockUtil.compactOffsets;
import static io.trino.spi.block.BlockUtil.countAndMarkSelectedPositionsFromOffsets;
import static io.trino.spi.block.BlockUtil.countSelectedPositionsFromOffsets;
import static io.trino.spi.block.MapBlock.createMapBlockInternal;
import static io.trino.spi.block.MapHashTables.HASH_MULTIPLIER;
import static java.util.Objects.requireNonNull;
Expand Down Expand Up @@ -180,33 +183,63 @@ public long getRegionSizeInBytes(int position, int length)
}

@Override
public long getPositionsSizeInBytes(boolean[] positions)
public OptionalInt fixedSizeInBytesPerPosition()
{
return OptionalInt.empty(); // size per row is variable on the number of entries in each row
}

private OptionalInt keyAndValueFixedSizeInBytesPerRow()
{
OptionalInt keyFixedSizePerRow = getRawKeyBlock().fixedSizeInBytesPerPosition();
if (!keyFixedSizePerRow.isPresent()) {
return OptionalInt.empty();
}
OptionalInt valueFixedSizePerRow = getRawValueBlock().fixedSizeInBytesPerPosition();
if (!valueFixedSizePerRow.isPresent()) {
return OptionalInt.empty();
}

return OptionalInt.of(keyFixedSizePerRow.getAsInt() + valueFixedSizePerRow.getAsInt());
}

@Override
public final long getPositionsSizeInBytes(boolean[] positions, int selectedMapPositions)
{
// We can use either the getRegionSizeInBytes or getPositionsSizeInBytes
// from the underlying raw blocks to implement this function. We chose
// getPositionsSizeInBytes with the assumption that constructing a
// positions array is cheaper than calling getRegionSizeInBytes for each
// used position.
int positionCount = getPositionCount();
checkValidPositions(positions, positionCount);
boolean[] entryPositions = new boolean[getRawKeyBlock().getPositionCount()];
int usedEntryCount = 0;
int usedPositionCount = 0;
for (int i = 0; i < positions.length; ++i) {
if (positions[i]) {
usedPositionCount++;
int entriesStart = getOffsets()[getOffsetBase() + i];
int entriesEnd = getOffsets()[getOffsetBase() + i + 1];
for (int j = entriesStart; j < entriesEnd; j++) {
entryPositions[j] = true;
}
usedEntryCount += (entriesEnd - entriesStart);
}
if (selectedMapPositions == 0) {
return 0;
}
return getRawKeyBlock().getPositionsSizeInBytes(entryPositions) +
getRawValueBlock().getPositionsSizeInBytes(entryPositions) +
(Integer.BYTES + Byte.BYTES) * (long) usedPositionCount +
Integer.BYTES * HASH_MULTIPLIER * (long) usedEntryCount;
if (selectedMapPositions == positionCount) {
return getSizeInBytes();
}

int[] offsets = getOffsets();
int offsetBase = getOffsetBase();
OptionalInt fixedKeyAndValueSizePerRow = keyAndValueFixedSizeInBytesPerRow();

int selectedEntryCount;
long keyAndValuesSizeInBytes;
if (fixedKeyAndValueSizePerRow.isPresent()) {
// no new positions array need be created, we can just count the number of elements
selectedEntryCount = countSelectedPositionsFromOffsets(positions, offsets, offsetBase);
keyAndValuesSizeInBytes = fixedKeyAndValueSizePerRow.getAsInt() * (long) selectedEntryCount;
}
else {
// We can use either the getRegionSizeInBytes or getPositionsSizeInBytes
// from the underlying raw blocks to implement this function. We chose
// getPositionsSizeInBytes with the assumption that constructing a
// positions array is cheaper than calling getRegionSizeInBytes for each
// used position.
boolean[] entryPositions = new boolean[getRawKeyBlock().getPositionCount()];
selectedEntryCount = countAndMarkSelectedPositionsFromOffsets(positions, offsets, offsetBase, entryPositions);
keyAndValuesSizeInBytes = getRawKeyBlock().getPositionsSizeInBytes(entryPositions, selectedEntryCount) +
getRawValueBlock().getPositionsSizeInBytes(entryPositions, selectedEntryCount);
}

return keyAndValuesSizeInBytes +
(Integer.BYTES + Byte.BYTES) * (long) selectedMapPositions +
Integer.BYTES * HASH_MULTIPLIER * (long) selectedEntryCount;
}

@Override
Expand Down
Loading

0 comments on commit 5a43d04

Please sign in to comment.