Skip to content

Commit

Permalink
Optimize copyPositions method in DictionaryBlock
Browse files Browse the repository at this point in the history
DictionaryBlock.copyPositions is used heavily from the performance
profiles captured in the production. Optimizing the method using
putIfAbsent instead of contains/put/get pattern.

Avoid additional boxing on one of the integers repeatedly. Using
a primitive map from fastutil would have been better, but did not
put lot of effort into it.
  • Loading branch information
Arunachalam Thirupathi committed Dec 2, 2022
1 parent 1cb5fff commit b12080e
Show file tree
Hide file tree
Showing 2 changed files with 10 additions and 6 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -401,14 +401,18 @@ public Block copyPositions(int[] positions, int offset, int length)
Map<Integer, Integer> oldIndexToNewIndex = new HashMap<>();
int[] newIds = new int[length];

// Using a boxed integer to avoid repeated boxing.
Integer nextIndex = 0;
for (int i = 0; i < length; i++) {
int position = positions[offset + i];
int oldIndex = getId(position);
if (!oldIndexToNewIndex.containsKey(oldIndex)) {
oldIndexToNewIndex.put(oldIndex, positionsToCopy.size());
Integer newIndex = oldIndexToNewIndex.putIfAbsent(oldIndex, nextIndex);
if (newIndex == null) {
newIndex = nextIndex;
positionsToCopy.add(oldIndex);
nextIndex = nextIndex + 1;
}
newIds[i] = oldIndexToNewIndex.get(oldIndex);
newIds[i] = newIndex;
}
return new DictionaryBlock(
length,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -201,14 +201,14 @@ public void testCompact()
Slice[] expectedValues = createExpectedValues(5);
DictionaryBlock dictionaryBlock = createDictionaryBlockWithUnreferencedKeys(expectedValues, 10);

assertEquals(dictionaryBlock.isCompact(), false);
assertFalse(dictionaryBlock.isCompact());
DictionaryBlock compactBlock = dictionaryBlock.compact();
assertNotEquals(dictionaryBlock.getDictionarySourceId(), compactBlock.getDictionarySourceId());

assertEquals(compactBlock.getDictionary().getPositionCount(), (expectedValues.length / 2) + 1);
assertBlock(compactBlock.getDictionary(), TestDictionaryBlock::createBlockBuilder, new Slice[] {expectedValues[0], expectedValues[1], expectedValues[3]});
assertDictionaryIds(compactBlock, 0, 1, 1, 2, 2, 0, 1, 1, 2, 2);
assertEquals(compactBlock.isCompact(), true);
assertTrue(compactBlock.isCompact());

DictionaryBlock reCompactedBlock = compactBlock.compact();
assertEquals(reCompactedBlock.getDictionarySourceId(), compactBlock.getDictionarySourceId());
Expand All @@ -227,7 +227,7 @@ public void testCompactAllKeysReferenced()
for (int position = 0; position < compactBlock.getPositionCount(); position++) {
assertEquals(compactBlock.getId(position), dictionaryBlock.getId(position));
}
assertEquals(compactBlock.isCompact(), true);
assertTrue(compactBlock.isCompact());
}

@Test
Expand Down

0 comments on commit b12080e

Please sign in to comment.