Skip to content

Commit

Permalink
Make ArrowColumnVector reusable.
Browse files Browse the repository at this point in the history
  • Loading branch information
ueshin committed Aug 22, 2017
1 parent 5782cc4 commit 9eb88a8
Show file tree
Hide file tree
Showing 4 changed files with 49 additions and 40 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -32,23 +32,34 @@
public final class ArrowColumnVector extends ColumnVector {

private final ArrowVectorAccessor accessor;
private final int valueCount;
private ArrowColumnVector[] childColumns;

private void ensureAccessible(int index) {
int valueCount = accessor.getValueCount();
if (index < 0 || index >= valueCount) {
throw new IndexOutOfBoundsException(
String.format("index: %d, valueCount: %d", index, valueCount));
}
}

private void ensureAccessible(int index, int count) {
int valueCount = accessor.getValueCount();
if (index < 0 || index + count > valueCount) {
throw new IndexOutOfBoundsException(
String.format("index range: [%d, %d), valueCount: %d", index, index + count, valueCount));
}
}

@Override
public int numNulls() {
return accessor.getNullCount();
}

@Override
public boolean anyNullsSet() {
return numNulls() > 0;
}

@Override
public long nullsNativeAddress() {
throw new RuntimeException("Cannot get native address for arrow column");
Expand Down Expand Up @@ -288,7 +299,7 @@ public byte[] getBinary(int rowId) {
public ArrowColumnVector getChildColumn(int ordinal) { return childColumns[ordinal]; }

public ArrowColumnVector(ValueVector vector) {
super(vector.getValueCapacity(), ArrowUtils.fromArrowField(vector.getField()));
super(ArrowUtils.fromArrowField(vector.getField()));

if (vector instanceof NullableBitVector) {
accessor = new BooleanAccessor((NullableBitVector) vector);
Expand Down Expand Up @@ -329,36 +340,28 @@ public ArrowColumnVector(ValueVector vector) {
} else {
throw new UnsupportedOperationException();
}
valueCount = accessor.getValueCount();
numNulls = accessor.getNullCount();
anyNullsSet = numNulls > 0;
}

private abstract static class ArrowVectorAccessor {

private final ValueVector vector;
private final ValueVector.Accessor nulls;

private final int valueCount;
private final int nullCount;

ArrowVectorAccessor(ValueVector vector) {
this.vector = vector;
this.nulls = vector.getAccessor();
this.valueCount = nulls.getValueCount();
this.nullCount = nulls.getNullCount();
}

final boolean isNullAt(int rowId) {
return nulls.isNull(rowId);
}

final int getValueCount() {
return valueCount;
return nulls.getValueCount();
}

final int getNullCount() {
return nullCount;
return nulls.getNullCount();
}

final void close() {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -261,13 +261,13 @@ public Object get(int ordinal, DataType dataType) {
/**
* Returns the number of nulls in this column.
*/
public final int numNulls() { return numNulls; }
public abstract int numNulls();

/**
* Returns true if any of the nulls indicator are set for this column. This can be used
* as an optimization to prevent setting nulls.
*/
public final boolean anyNullsSet() { return anyNullsSet; }
public abstract boolean anyNullsSet();

/**
* Returns the off heap ptr for the arrays backing the NULLs and values buffer. Only valid
Expand Down Expand Up @@ -436,27 +436,11 @@ public MapData getMap(int ordinal) {
*/
public final boolean isArray() { return resultArray != null; }

/**
* Maximum number of rows that can be stored in this column.
*/
protected int capacity;

/**
* Data type for this column.
*/
protected DataType type;

/**
* Number of nulls in this column. This is an optimization for the reader, to skip NULL checks.
*/
protected int numNulls;

/**
* True if there is at least one NULL byte set. This is an optimization for the writer, to skip
* having to clear NULL bits.
*/
protected boolean anyNullsSet;

/**
* Reusable Array holder for getArray().
*/
Expand Down Expand Up @@ -495,8 +479,7 @@ public ColumnVector getDictionaryIds() {
* Sets up the common state and also handles creating the child columns if this is a nested
* type.
*/
protected ColumnVector(int capacity, DataType type) {
this.capacity = capacity;
protected ColumnVector(DataType type) {
this.type = type;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -421,7 +421,7 @@ public void setNumRows(int numRows) {
this.numRows = numRows;

for (int ordinal : nullFilteredColumns) {
if (columns[ordinal].numNulls != 0) {
if (columns[ordinal].numNulls() != 0) {
for (int rowId = 0; rowId < numRows; rowId++) {
if (!filteredRows[rowId] && columns[ordinal].isNullAt(rowId)) {
filteredRows[rowId] = true;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,12 @@ private void throwUnsupportedException(int requiredCapacity, Throwable cause) {
throw new RuntimeException(message, cause);
}

@Override
public int numNulls() { return numNulls; }

@Override
public boolean anyNullsSet() { return anyNullsSet; }

/**
* Ensures that there is enough storage to store capacity elements. That is, the put() APIs
* must work for all rowIds < capacity.
Expand Down Expand Up @@ -544,12 +550,34 @@ public final int appendStruct(boolean isNull) {
*/
public final void setIsConstant() { isConstant = true; }

/**
* Maximum number of rows that can be stored in this column.
*/
protected int capacity;

/**
* Upper limit for the maximum capacity for this column.
*/
@VisibleForTesting
protected int MAX_CAPACITY = Integer.MAX_VALUE;

/**
* Number of nulls in this column. This is an optimization for the reader, to skip NULL checks.
*/
protected int numNulls;

/**
* True if there is at least one NULL byte set. This is an optimization for the writer, to skip
* having to clear NULL bits.
*/
protected boolean anyNullsSet;

/**
* True if this column's values are fixed. This means the column values never change, even
* across resets.
*/
protected boolean isConstant;

/**
* Default size of each array length value. This grows as necessary.
*/
Expand All @@ -565,12 +593,6 @@ public final int appendStruct(boolean isNull) {
*/
protected WritableColumnVector[] childColumns;

/**
* True if this column's values are fixed. This means the column values never change, even
* across resets.
*/
protected boolean isConstant;

/**
* Update the dictionary.
*/
Expand Down Expand Up @@ -611,7 +633,8 @@ public WritableColumnVector getDictionaryIds() {
* type.
*/
protected WritableColumnVector(int capacity, DataType type) {
super(capacity, type);
super(type);
this.capacity = capacity;

if (type instanceof ArrayType || type instanceof BinaryType || type instanceof StringType
|| DecimalType.isByteArrayDecimalType(type)) {
Expand Down

0 comments on commit 9eb88a8

Please sign in to comment.