Skip to content

Commit

Permalink
Use hex string as the representation of byte array for queries (apach…
Browse files Browse the repository at this point in the history
…e#4041) (#4)

* Use hex string as the representation of byte array for queries

* Adding test for indexing hexstring

* Address comments
  • Loading branch information
xiangfu0 authored Apr 1, 2019
1 parent 2995f9c commit 80c633b
Show file tree
Hide file tree
Showing 3 changed files with 63 additions and 9 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,9 @@
import java.io.IOException;
import java.util.Arrays;
import javax.annotation.Nonnull;
import org.apache.commons.codec.DecoderException;
import org.apache.commons.codec.binary.Hex;
import org.apache.pinot.common.Utils;
import org.apache.pinot.common.utils.primitive.ByteArray;
import org.apache.pinot.core.io.readerwriter.PinotDataBufferMemoryManager;
import org.apache.pinot.core.io.writer.impl.MutableOffHeapByteArrayStore;
Expand All @@ -30,6 +33,7 @@
* OffHeap mutable dictionary for Bytes data type.
*/
public class BytesOffHeapMutableDictionary extends BaseOffHeapMutableDictionary {

private final MutableOffHeapByteArrayStore _byteStore;

private ByteArray _min = null;
Expand All @@ -52,8 +56,19 @@ public BytesOffHeapMutableDictionary(int estimatedCardinality, int maxOverflowHa

@Override
public int indexOf(Object rawValue) {
assert rawValue instanceof byte[];
byte[] bytes = (byte[]) rawValue;
byte[] bytes = null;
// Convert hex string to byte[].
if (rawValue instanceof byte[]) {
bytes = (byte[]) rawValue;
} else if (rawValue instanceof String) {
try {
bytes = Hex.decodeHex(((String) rawValue).toCharArray());
} catch (DecoderException e) {
Utils.rethrowException(e);
}
} else {
assert rawValue instanceof byte[];
}
return getDictId(new ByteArray(bytes), bytes);
}

Expand All @@ -80,8 +95,18 @@ protected void setRawValueAt(int dictId, Object rawValue, byte[] serializedValue

@Override
public void index(@Nonnull Object rawValue) {
assert rawValue instanceof byte[];
byte[] bytes = (byte[]) rawValue;
byte[] bytes = null;
// Convert hex string to byte[].
if (rawValue instanceof String) {
try {
bytes = Hex.decodeHex(((String) rawValue).toCharArray());
} catch (DecoderException e) {
Utils.rethrowException(e);
}
} else {
assert rawValue instanceof byte[];
bytes = (byte[]) rawValue;
}
ByteArray byteArray = new ByteArray(bytes);
indexValue(byteArray, bytes);
updateMinMax(byteArray);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,20 +20,36 @@

import java.util.Arrays;
import javax.annotation.Nonnull;
import org.apache.commons.codec.DecoderException;
import org.apache.commons.codec.binary.Hex;
import org.apache.pinot.common.Utils;
import org.apache.pinot.common.utils.primitive.ByteArray;


/**
* OnHeap mutable dictionary of Bytes type.
*/
public class BytesOnHeapMutableDictionary extends BaseOnHeapMutableDictionary {

private ByteArray _min = null;
private ByteArray _max = null;

@Override
public int indexOf(Object rawValue) {
assert rawValue instanceof byte[];
return getDictId(new ByteArray((byte[]) rawValue));
byte[] bytes = null;
// Convert hex string to byte[].
if (rawValue instanceof byte[]) {
bytes = (byte[]) rawValue;
} else if (rawValue instanceof String) {
try {
bytes = Hex.decodeHex(((String) rawValue).toCharArray());
} catch (DecoderException e) {
Utils.rethrowException(e);
}
} else {
assert rawValue instanceof byte[];
}
return getDictId(new ByteArray(bytes));
}

@Override
Expand All @@ -48,8 +64,19 @@ public byte[] getBytesValue(int dictId) {

@Override
public void index(@Nonnull Object rawValue) {
assert rawValue instanceof byte[];
ByteArray byteArray = new ByteArray((byte[]) rawValue);
byte[] bytes = null;
// Convert hex string to byte[].
if (rawValue instanceof String) {
try {
bytes = Hex.decodeHex(((String) rawValue).toCharArray());
} catch (DecoderException e) {
Utils.rethrowException(e);
}
} else {
assert rawValue instanceof byte[];
bytes = (byte[]) rawValue;
}
ByteArray byteArray = new ByteArray(bytes);
indexValue(byteArray);
updateMinMax(byteArray);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
import java.util.concurrent.Executors;
import java.util.concurrent.Future;
import java.util.stream.Collectors;
import org.apache.commons.codec.binary.Hex;
import org.apache.commons.lang.RandomStringUtils;
import org.apache.pinot.common.data.FieldSpec;
import org.apache.pinot.common.utils.primitive.ByteArray;
Expand Down Expand Up @@ -174,7 +175,8 @@ private void testMutableDictionary(MutableDictionary dictionary, FieldSpec.DataT
Comparable value =
(i == 0 && dataType == FieldSpec.DataType.INT) ? Integer.MIN_VALUE : makeRandomObjectOfType(dataType);

Object rawValue = dataType == FieldSpec.DataType.BYTES ? ((ByteArray) value).getBytes() : value;
Object rawValue = dataType == FieldSpec.DataType.BYTES ? ((i % 2 == 0) ? ((ByteArray) value).getBytes()
: Hex.encodeHexString(((ByteArray) value).getBytes())) : value;
if (valueToDictId.containsKey(value)) {
Assert.assertEquals(dictionary.indexOf(rawValue), (int) valueToDictId.get(value));
} else {
Expand Down

0 comments on commit 80c633b

Please sign in to comment.