From 39c57edeebbf9679ac0a83f759d5cae2d4a57df5 Mon Sep 17 00:00:00 2001 From: Micah Kornfield Date: Wed, 13 Apr 2016 08:09:21 +0000 Subject: [PATCH] add potentially useful methods for generative arrays to ipc test-common --- cpp/src/arrow/ipc/test-common.h | 59 +++++++++++++++++++++++++++++++++ 1 file changed, 59 insertions(+) diff --git a/cpp/src/arrow/ipc/test-common.h b/cpp/src/arrow/ipc/test-common.h index 65c837dc8b141..00688ad1f059f 100644 --- a/cpp/src/arrow/ipc/test-common.h +++ b/cpp/src/arrow/ipc/test-common.h @@ -23,6 +23,13 @@ #include #include +#include "arrow/array.h" +#include "arrow/test-util.h" +#include "arrow/types/primitive.h" +#include "arrow/types/list.h" +#include "arrow/util/buffer.h" +#include "arrow/util/memory-pool.h" + namespace arrow { namespace ipc { @@ -45,6 +52,58 @@ class MemoryMapFixture { std::vector tmp_files_; }; +Status MakeRandomInt32Array( + int32_t length, bool include_nulls, MemoryPool* pool, std::shared_ptr* array) { + std::shared_ptr data; + test::MakeRandomInt32PoolBuffer(length, pool, &data); + const auto INT32 = std::make_shared(); + Int32Builder builder(pool, INT32); + if (include_nulls) { + std::shared_ptr valid_bytes; + test::MakeRandomBytePoolBuffer(length, pool, &valid_bytes); + RETURN_NOT_OK(builder.Append( + reinterpret_cast(data->data()), length, valid_bytes->data())); + *array = builder.Finish(); + return Status::OK(); + } + RETURN_NOT_OK(builder.Append(reinterpret_cast(data->data()), length)); + *array = builder.Finish(); + return Status::OK(); +} + +Status MakeRandomListArray(const std::shared_ptr& child_array, int num_lists, + MemoryPool* pool, std::shared_ptr* array) { + // Create the null list values + std::vector valid_lists(num_lists); + const double null_percent = 0.1; + test::random_null_bytes(num_lists, null_percent, valid_lists.data()); + + // Create list offsets + const int max_list_size = 10; + + std::vector list_sizes(num_lists, 0); + std::vector offsets( + num_lists + 1, 0); // +1 so we can shift for nulls. See partial sum below. + const int seed = child_array->length(); + test::rand_uniform_int(num_lists, seed, 0, max_list_size, list_sizes.data()); + // make sure sizes are consistent with null + std::transform(list_sizes.begin(), list_sizes.end(), valid_lists.begin(), + list_sizes.begin(), + [](int32_t size, int32_t valid) { return valid == 0 ? 0 : size; }); + std::partial_sum(list_sizes.begin(), list_sizes.end(), ++offsets.begin()); + + // Force invariants + const int child_length = child_array->length(); + offsets[0] = 0; + std::replace_if(offsets.begin(), offsets.end(), + [child_length](int32_t offset) { return offset > child_length; }, child_length); + + ListBuilder builder(pool, child_array); + RETURN_NOT_OK(builder.Append(offsets.data(), num_lists, valid_lists.data())); + *array = builder.Finish(); + return (*array)->Validate(); +} + } // namespace ipc } // namespace arrow