Skip to content

Commit

Permalink
Add utility methods for managing null bitmap directly to ArrayBuilder
Browse files Browse the repository at this point in the history
  • Loading branch information
emkornfield committed Apr 13, 2016
1 parent cc7f851 commit 01c50be
Show file tree
Hide file tree
Showing 2 changed files with 81 additions and 2 deletions.
56 changes: 56 additions & 0 deletions cpp/src/arrow/builder.cc
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,25 @@

namespace arrow {

Status ArrayBuilder::AppendToBitmap(bool is_null) {
if (length_ == capacity_) {
// If the capacity was not already a multiple of 2, do so here
// TODO(emkornfield) doubling isn't great default allocation practice
// see https://github.com/facebook/folly/blob/master/folly/docs/FBVector.md
// fo discussion
RETURN_NOT_OK(Resize(util::next_power2(capacity_ + 1)));
}
UnsafeAppendToBitmap(is_null);
return Status::OK();
}

Status ArrayBuilder::AppendToBitmap(const uint8_t* valid_bytes, int32_t length) {
Reserve(length);

UnsafeAppendToBitmap(valid_bytes, length);
return Status::OK();
}

Status ArrayBuilder::Init(int32_t capacity) {
capacity_ = capacity;
int32_t to_alloc = util::ceil_byte(capacity) / 8;
Expand All @@ -36,6 +55,7 @@ Status ArrayBuilder::Init(int32_t capacity) {
}

Status ArrayBuilder::Resize(int32_t new_bits) {
if (!null_bitmap_) { return Init(new_bits); }
int32_t new_bytes = util::ceil_byte(new_bits) / 8;
int32_t old_bytes = null_bitmap_->size();
RETURN_NOT_OK(null_bitmap_->Resize(new_bytes));
Expand All @@ -56,10 +76,46 @@ Status ArrayBuilder::Advance(int32_t elements) {

Status ArrayBuilder::Reserve(int32_t elements) {
if (length_ + elements > capacity_) {
// TODO(emkornfield) power of 2 growth is potentially suboptimal
int32_t new_capacity = util::next_power2(length_ + elements);
return Resize(new_capacity);
}
return Status::OK();
}

Status ArrayBuilder::SetNotNull(int32_t length) {
RETURN_NOT_OK(Reserve(length));
UnsafeSetNotNull(length);
return Status::OK();
}

void ArrayBuilder::UnsafeAppendToBitmap(bool is_null) {
if (is_null) {
++null_count_;
} else {
util::set_bit(null_bitmap_data_, length_);
}
++length_;
}

void ArrayBuilder::UnsafeAppendToBitmap(const uint8_t* valid_bytes, int32_t length) {
if (valid_bytes == nullptr) {
UnsafeSetNotNull(length);
return;
}
for (int32_t i = 0; i < length; ++i) {
// TODO(emkornfield) Optimize for large values of length?
AppendToBitmap(valid_bytes[i] == 0);
}
}

void ArrayBuilder::UnsafeSetNotNull(int32_t length) {
const int32_t new_length = length + length_;
// TODO(emkornfield) Optimize for large values of length?
for (int32_t i = length_; i < new_length; ++i) {
util::set_bit(null_bitmap_data_, i);
}
length_ = new_length;
}

} // namespace arrow
27 changes: 25 additions & 2 deletions cpp/src/arrow/builder.h
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,10 @@ class PoolBuffer;

static constexpr int32_t MIN_BUILDER_CAPACITY = 1 << 5;

// Base class for all data array builders
// Base class for all data array builders.
// This class provides a facilities for incrementally building the null bitmap
// (see Append methods) and as a side effect the current number of slots and
// the null count.
class ArrayBuilder {
public:
explicit ArrayBuilder(MemoryPool* pool, const TypePtr& type)
Expand All @@ -58,6 +61,14 @@ class ArrayBuilder {
int32_t null_count() const { return null_count_; }
int32_t capacity() const { return capacity_; }

// Append to null bitmap
Status AppendToBitmap(bool is_null);
// Vector append. Treat each zero byte as a null. If valid_bytes is null
// assume all of length bits are valid.
Status AppendToBitmap(const uint8_t* valid_bytes, int32_t length);
// Set the next length bits to not null (i.e. valid).
Status SetNotNull(int32_t length);

// Allocates requires memory at this level, but children need to be
// initialized independently
Status Init(int32_t capacity);
Expand All @@ -75,7 +86,7 @@ class ArrayBuilder {
const std::shared_ptr<PoolBuffer>& null_bitmap() const { return null_bitmap_; }

// Creates new array object to hold the contents of the builder and transfers
// ownership of the data
// ownership of the data. This resets all variables on the builder.
virtual std::shared_ptr<Array> Finish() = 0;

const std::shared_ptr<DataType>& type() const { return type_; }
Expand All @@ -97,6 +108,18 @@ class ArrayBuilder {
// Child value array builders. These are owned by this class
std::vector<std::unique_ptr<ArrayBuilder>> children_;

//
// Unsafe operations (don't check capacity/don't resize)
//

// Append to null bitmap.
void UnsafeAppendToBitmap(bool is_null);
// Vector append. Treat each zero byte as a nullzero. If valid_bytes is null
// assume all of length bits are valid.
void UnsafeAppendToBitmap(const uint8_t* valid_bytes, int32_t length);
// Set the next length bits to not null (i.e. valid).
void UnsafeSetNotNull(int32_t length);

private:
DISALLOW_COPY_AND_ASSIGN(ArrayBuilder);
};
Expand Down

0 comments on commit 01c50be

Please sign in to comment.