diff --git a/pinot-core/src/main/java/org/apache/pinot/core/realtime/impl/dictionary/BytesOffHeapMutableDictionary.java b/pinot-core/src/main/java/org/apache/pinot/core/realtime/impl/dictionary/BytesOffHeapMutableDictionary.java index 816e0deae418..5220749da2b9 100644 --- a/pinot-core/src/main/java/org/apache/pinot/core/realtime/impl/dictionary/BytesOffHeapMutableDictionary.java +++ b/pinot-core/src/main/java/org/apache/pinot/core/realtime/impl/dictionary/BytesOffHeapMutableDictionary.java @@ -21,6 +21,9 @@ import java.io.IOException; import java.util.Arrays; import javax.annotation.Nonnull; +import org.apache.commons.codec.DecoderException; +import org.apache.commons.codec.binary.Hex; +import org.apache.pinot.common.Utils; import org.apache.pinot.common.utils.primitive.ByteArray; import org.apache.pinot.core.io.readerwriter.PinotDataBufferMemoryManager; import org.apache.pinot.core.io.writer.impl.MutableOffHeapByteArrayStore; @@ -30,6 +33,7 @@ * OffHeap mutable dictionary for Bytes data type. */ public class BytesOffHeapMutableDictionary extends BaseOffHeapMutableDictionary { + private final MutableOffHeapByteArrayStore _byteStore; private ByteArray _min = null; @@ -52,8 +56,19 @@ public BytesOffHeapMutableDictionary(int estimatedCardinality, int maxOverflowHa @Override public int indexOf(Object rawValue) { - assert rawValue instanceof byte[]; - byte[] bytes = (byte[]) rawValue; + byte[] bytes = null; + // Convert hex string to byte[]. + if (rawValue instanceof byte[]) { + bytes = (byte[]) rawValue; + } else if (rawValue instanceof String) { + try { + bytes = Hex.decodeHex(((String) rawValue).toCharArray()); + } catch (DecoderException e) { + Utils.rethrowException(e); + } + } else { + assert rawValue instanceof byte[]; + } return getDictId(new ByteArray(bytes), bytes); } @@ -80,8 +95,18 @@ protected void setRawValueAt(int dictId, Object rawValue, byte[] serializedValue @Override public void index(@Nonnull Object rawValue) { - assert rawValue instanceof byte[]; - byte[] bytes = (byte[]) rawValue; + byte[] bytes = null; + // Convert hex string to byte[]. + if (rawValue instanceof String) { + try { + bytes = Hex.decodeHex(((String) rawValue).toCharArray()); + } catch (DecoderException e) { + Utils.rethrowException(e); + } + } else { + assert rawValue instanceof byte[]; + bytes = (byte[]) rawValue; + } ByteArray byteArray = new ByteArray(bytes); indexValue(byteArray, bytes); updateMinMax(byteArray); diff --git a/pinot-core/src/main/java/org/apache/pinot/core/realtime/impl/dictionary/BytesOnHeapMutableDictionary.java b/pinot-core/src/main/java/org/apache/pinot/core/realtime/impl/dictionary/BytesOnHeapMutableDictionary.java index df3609824252..37236c3f45ff 100644 --- a/pinot-core/src/main/java/org/apache/pinot/core/realtime/impl/dictionary/BytesOnHeapMutableDictionary.java +++ b/pinot-core/src/main/java/org/apache/pinot/core/realtime/impl/dictionary/BytesOnHeapMutableDictionary.java @@ -20,6 +20,9 @@ import java.util.Arrays; import javax.annotation.Nonnull; +import org.apache.commons.codec.DecoderException; +import org.apache.commons.codec.binary.Hex; +import org.apache.pinot.common.Utils; import org.apache.pinot.common.utils.primitive.ByteArray; @@ -27,13 +30,26 @@ * OnHeap mutable dictionary of Bytes type. */ public class BytesOnHeapMutableDictionary extends BaseOnHeapMutableDictionary { + private ByteArray _min = null; private ByteArray _max = null; @Override public int indexOf(Object rawValue) { - assert rawValue instanceof byte[]; - return getDictId(new ByteArray((byte[]) rawValue)); + byte[] bytes = null; + // Convert hex string to byte[]. + if (rawValue instanceof byte[]) { + bytes = (byte[]) rawValue; + } else if (rawValue instanceof String) { + try { + bytes = Hex.decodeHex(((String) rawValue).toCharArray()); + } catch (DecoderException e) { + Utils.rethrowException(e); + } + } else { + assert rawValue instanceof byte[]; + } + return getDictId(new ByteArray(bytes)); } @Override @@ -48,8 +64,19 @@ public byte[] getBytesValue(int dictId) { @Override public void index(@Nonnull Object rawValue) { - assert rawValue instanceof byte[]; - ByteArray byteArray = new ByteArray((byte[]) rawValue); + byte[] bytes = null; + // Convert hex string to byte[]. + if (rawValue instanceof String) { + try { + bytes = Hex.decodeHex(((String) rawValue).toCharArray()); + } catch (DecoderException e) { + Utils.rethrowException(e); + } + } else { + assert rawValue instanceof byte[]; + bytes = (byte[]) rawValue; + } + ByteArray byteArray = new ByteArray(bytes); indexValue(byteArray); updateMinMax(byteArray); } diff --git a/pinot-core/src/test/java/org/apache/pinot/core/realtime/impl/dictionary/MutableDictionaryTest.java b/pinot-core/src/test/java/org/apache/pinot/core/realtime/impl/dictionary/MutableDictionaryTest.java index ce79b10a027a..99a28d243155 100644 --- a/pinot-core/src/test/java/org/apache/pinot/core/realtime/impl/dictionary/MutableDictionaryTest.java +++ b/pinot-core/src/test/java/org/apache/pinot/core/realtime/impl/dictionary/MutableDictionaryTest.java @@ -30,6 +30,7 @@ import java.util.concurrent.Executors; import java.util.concurrent.Future; import java.util.stream.Collectors; +import org.apache.commons.codec.binary.Hex; import org.apache.commons.lang.RandomStringUtils; import org.apache.pinot.common.data.FieldSpec; import org.apache.pinot.common.utils.primitive.ByteArray; @@ -174,7 +175,8 @@ private void testMutableDictionary(MutableDictionary dictionary, FieldSpec.DataT Comparable value = (i == 0 && dataType == FieldSpec.DataType.INT) ? Integer.MIN_VALUE : makeRandomObjectOfType(dataType); - Object rawValue = dataType == FieldSpec.DataType.BYTES ? ((ByteArray) value).getBytes() : value; + Object rawValue = dataType == FieldSpec.DataType.BYTES ? ((i % 2 == 0) ? ((ByteArray) value).getBytes() + : Hex.encodeHexString(((ByteArray) value).getBytes())) : value; if (valueToDictId.containsKey(value)) { Assert.assertEquals(dictionary.indexOf(rawValue), (int) valueToDictId.get(value)); } else {