Skip to content

Commit

Permalink
Merge branch 'apache:master' into bkmgit-update-dependencies
Browse files Browse the repository at this point in the history
  • Loading branch information
bkmgit authored Nov 17, 2021
2 parents dd0b00c + 463b5dd commit 0477e9d
Show file tree
Hide file tree
Showing 336 changed files with 2,687 additions and 4,382 deletions.
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,8 @@ perf.data.old
cpp/.idea/
.clangd/
cpp/.clangd/
.cache/clangd/
cpp/.cache/clangd/
cpp/apidoc/xml/
docs/example.gz
docs/example1.dat
Expand Down
1 change: 1 addition & 0 deletions cpp/.gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
thirdparty/*.tar*
CMakeFiles/
CMakeCache.txt
CMakeUserPresets.json
CTestTestfile.cmake
Makefile
cmake_install.cmake
Expand Down
1 change: 1 addition & 0 deletions cpp/Brewfile
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
# under the License.

brew "automake"
brew "aws-sdk-cpp"
brew "boost"
brew "brotli"
brew "c-ares"
Expand Down
165 changes: 165 additions & 0 deletions cpp/CMakePresets.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,165 @@
{
"version": 2,
"cmakeMinimumRequired": {
"major": 3,
"minor": 20,
"patch": 0
},
"configurePresets": [
{
"name": "ninja-benchmarks",
"description": "Build for benchmarks",
"inherits": "ninja-release",
"cacheVariables": {
"ARROW_BUILD_BENCHMARKS": "ON",
"ARROW_BUILD_BENCHMARKS_REFERENCE": "ON",
"ARROW_BUILD_TESTS": "OFF"
}
},
{
"name": "ninja-debug",
"description": "Debug configuration with basic build",
"binaryDir": "${sourceDir}/build/${presetName}",
"generator": "Ninja",
"cacheVariables": {
"ARROW_BUILD_BENCHMARKS": {
"type": "BOOL",
"value": "OFF"
},
"ARROW_BUILD_TESTS": {
"type": "BOOL",
"value": "ON"
},
"ARROW_COMPUTE": {
"type": "BOOL",
"value": "ON"
},
"ARROW_CSV": {
"type": "BOOL",
"value": "ON"
},
"ARROW_CUDA": {
"type": "BOOL",
"value": "OFF"
},
"ARROW_DATASET": {
"type": "BOOL",
"value": "OFF"
},
"ARROW_GANDIVA": {
"type": "BOOL",
"value": "OFF"
},
"ARROW_GANDIVA_JAVA": {
"type": "BOOL",
"value": "OFF"
},
"ARROW_GANDIVA_JNI": {
"type": "BOOL",
"value": "OFF"
},
"ARROW_FILESYSTEM": {
"type": "BOOL",
"value": "ON"
},
"ARROW_IPC": {
"type": "BOOL",
"value": "ON"
},
"ARROW_PARQUET": {
"type": "BOOL",
"value": "OFF"
},
"ARROW_PLASMA_JAVA_CLIENT": {
"type": "BOOL",
"value": "OFF"
},
"ARROW_PYTHON": {
"type": "BOOL",
"value": "ON"
},
"ARROW_SKYHOOK": {
"type": "BOOL",
"value": "OFF"
},
"ARROW_WITH_RE2": {
"type": "BOOL",
"value": "ON"
},
"CMAKE_BUILD_TYPE": {
"type": "String",
"value": "Debug"
},
"CMAKE_INSTALL_PREFIX": {
"type": "PATH",
"value": "${sourceDir}/build/${presetName}/pkg"
}
}
},
{
"name": "ninja-debug-cuda",
"description": "Debug Arrow build with CUDA extensions (requires CUDA toolkit)",
"inherits": "ninja-debug",
"cacheVariables": {
"ARROW_CUDA": "ON"
}
},
{
"name": "ninja-debug-dataset",
"description": "Builds Arrow Dataset modules",
"inherits": "ninja-debug",
"cacheVariables": {
"ARROW_DATASET": "ON"
}
},
{
"name": "ninja-debug-gandiva",
"description": "Builds Gandiva libraries",
"inherits": "ninja-debug",
"cacheVariables": {
"ARROW_GANDIVA": "ON"
}
},
{
"name": "ninja-debug-parquet",
"description": "Builds Parquet libraries",
"inherits": "ninja-debug",
"cacheVariables": {
"ARROW_PARQUET": "ON"
}
},
{
"name": "ninja-debug-skyhook",
"description": "Builds Skyhook libraries",

"inherits": "ninja-debug",
"cacheVariables": {
"ARROW_SKYHOOK": "ON"
}
},
{
"name": "ninja-release",
"description": "Release configuration",
"inherits": "ninja-debug",
"cacheVariables": {
"CMAKE_BUILD_TYPE": "Release"
}
},
{
"name": "ninja-release-gandiva",
"description": "Release configuration with Gandiva",
"inherits": "ninja-release",
"cacheVariables": {
"ARROW_GANDIVA": "ON"
}
},
{
"name": "ninja-release-parquet",
"description": "Release configuration with Parquet",
"inherits": "ninja-release",
"cacheVariables": {
"ARROW_PARQUET": "ON"
}
}
]
}
34 changes: 17 additions & 17 deletions cpp/src/arrow/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -377,11 +377,21 @@ if(ARROW_COMPUTE)
compute/exec/exec_plan.cc
compute/exec/expression.cc
compute/exec/filter_node.cc
compute/exec/hash_join.cc
compute/exec/hash_join_dict.cc
compute/exec/hash_join_node.cc
compute/exec/ir_consumer.cc
compute/exec/key_compare.cc
compute/exec/key_encode.cc
compute/exec/key_hash.cc
compute/exec/key_map.cc
compute/exec/order_by_impl.cc
compute/exec/project_node.cc
compute/exec/source_node.cc
compute/exec/sink_node.cc
compute/exec/order_by_impl.cc
compute/exec/source_node.cc
compute/exec/task_util.cc
compute/exec/union_node.cc
compute/exec/util.cc
compute/function.cc
compute/function_internal.cc
compute/kernel.cc
Expand All @@ -393,6 +403,7 @@ if(ARROW_COMPUTE)
compute/kernels/aggregate_var_std.cc
compute/kernels/codegen_internal.cc
compute/kernels/hash_aggregate.cc
compute/kernels/row_encoder.cc
compute/kernels/scalar_arithmetic.cc
compute/kernels/scalar_boolean.cc
compute/kernels/scalar_cast_boolean.cc
Expand All @@ -403,39 +414,28 @@ if(ARROW_COMPUTE)
compute/kernels/scalar_cast_string.cc
compute/kernels/scalar_cast_temporal.cc
compute/kernels/scalar_compare.cc
compute/kernels/scalar_if_else.cc
compute/kernels/scalar_nested.cc
compute/kernels/scalar_set_lookup.cc
compute/kernels/scalar_string.cc
compute/kernels/scalar_temporal_binary.cc
compute/kernels/scalar_temporal_unary.cc
compute/kernels/scalar_validity.cc
compute/kernels/scalar_if_else.cc
compute/kernels/util_internal.cc
compute/kernels/vector_array_sort.cc
compute/kernels/vector_hash.cc
compute/kernels/vector_nested.cc
compute/kernels/vector_replace.cc
compute/kernels/vector_selection.cc
compute/kernels/vector_sort.cc
compute/kernels/row_encoder.cc
compute/exec/union_node.cc
compute/exec/key_hash.cc
compute/exec/key_map.cc
compute/exec/key_compare.cc
compute/exec/key_encode.cc
compute/exec/util.cc
compute/exec/hash_join_dict.cc
compute/exec/hash_join.cc
compute/exec/hash_join_node.cc
compute/exec/task_util.cc)
compute/kernels/vector_sort.cc)

append_avx2_src(compute/kernels/aggregate_basic_avx2.cc)
append_avx512_src(compute/kernels/aggregate_basic_avx512.cc)

append_avx2_src(compute/exec/key_hash_avx2.cc)
append_avx2_src(compute/exec/key_map_avx2.cc)
append_avx2_src(compute/exec/key_compare_avx2.cc)
append_avx2_src(compute/exec/key_encode_avx2.cc)
append_avx2_src(compute/exec/key_hash_avx2.cc)
append_avx2_src(compute/exec/key_map_avx2.cc)
append_avx2_src(compute/exec/util_avx2.cc)

list(APPEND ARROW_TESTING_SRCS compute/exec/test_util.cc)
Expand Down
5 changes: 1 addition & 4 deletions cpp/src/arrow/array/array_base.cc
Original file line number Diff line number Diff line change
Expand Up @@ -305,9 +305,6 @@ Status Array::Accept(ArrayVisitor* visitor) const {

Status Array::Validate() const { return internal::ValidateArray(*this); }

Status Array::ValidateFull() const {
RETURN_NOT_OK(internal::ValidateArray(*this));
return internal::ValidateArrayFull(*this);
}
Status Array::ValidateFull() const { return internal::ValidateArrayFull(*this); }

} // namespace arrow
29 changes: 24 additions & 5 deletions cpp/src/arrow/array/array_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,8 @@ TEST_F(TestArray, TestNullToString) {
auto data = std::make_shared<Buffer>(nullptr, 400);

std::unique_ptr<Int32Array> arr(new Int32Array(100, data));
ASSERT_EQ(arr->ToString(), "<Invalid array: Missing values buffer in non-empty array>");
ASSERT_EQ(arr->ToString(),
"<Invalid array: Missing values buffer in non-empty fixed-width array>");
}

TEST_F(TestArray, TestSliceSafe) {
Expand Down Expand Up @@ -332,6 +333,10 @@ TEST_F(TestArray, BuildLargeInMemoryArray) {
}

TEST_F(TestArray, TestMakeArrayOfNull) {
FieldVector union_fields1({field("a", utf8()), field("b", int32())});
FieldVector union_fields2({field("a", null()), field("b", list(large_utf8()))});
std::vector<int8_t> union_type_codes{7, 42};

std::shared_ptr<DataType> types[] = {
// clang-format off
null(),
Expand All @@ -354,19 +359,33 @@ TEST_F(TestArray, TestMakeArrayOfNull) {
fixed_size_list(int64(), 4),
dictionary(int32(), utf8()),
struct_({field("a", utf8()), field("b", int32())}),
sparse_union(union_fields1, union_type_codes),
sparse_union(union_fields2, union_type_codes),
dense_union(union_fields1, union_type_codes),
dense_union(union_fields2, union_type_codes),
smallint(), // extension type
// clang-format on
};

for (int64_t length : {0, 1, 16, 133}) {
for (auto type : types) {
ARROW_SCOPED_TRACE("type = ", type->ToString());
ASSERT_OK_AND_ASSIGN(auto array, MakeArrayOfNull(type, length));
ASSERT_OK(array->ValidateFull());
ASSERT_EQ(array->length(), length);
ASSERT_EQ(array->null_count(), length);
for (int64_t i = 0; i < length; ++i) {
ASSERT_TRUE(array->IsNull(i));
ASSERT_FALSE(array->IsValid(i));
if (is_union(type->id())) {
// For unions, MakeArrayOfNull places the nulls in the children
ASSERT_EQ(array->null_count(), 0);
const auto& union_array = checked_cast<const UnionArray&>(*array);
for (int i = 0; i < union_array.num_fields(); ++i) {
ASSERT_EQ(union_array.field(i)->null_count(), union_array.field(i)->length());
}
} else {
ASSERT_EQ(array->null_count(), length);
for (int64_t i = 0; i < length; ++i) {
ASSERT_TRUE(array->IsNull(i));
ASSERT_FALSE(array->IsValid(i));
}
}
}
}
Expand Down
19 changes: 14 additions & 5 deletions cpp/src/arrow/array/util.cc
Original file line number Diff line number Diff line change
Expand Up @@ -363,19 +363,28 @@ class NullArrayFactory {
return Status::OK();
}

Status Visit(const UnionType& type) {
Status Visit(const SparseUnionType& type) {
// type codes
RETURN_NOT_OK(MaxOf(length_));
if (type.mode() == UnionMode::DENSE) {
// offsets
RETURN_NOT_OK(MaxOf(sizeof(int32_t) * length_));
}
// will create children of the same length as the union
for (const auto& child : type.fields()) {
RETURN_NOT_OK(MaxOf(GetBufferLength(child->type(), length_)));
}
return Status::OK();
}

Status Visit(const DenseUnionType& type) {
// type codes
RETURN_NOT_OK(MaxOf(length_));
// offsets
RETURN_NOT_OK(MaxOf(sizeof(int32_t) * length_));
// will create children of length 1
for (const auto& child : type.fields()) {
RETURN_NOT_OK(MaxOf(GetBufferLength(child->type(), 1)));
}
return Status::OK();
}

Status Visit(const DictionaryType& type) {
RETURN_NOT_OK(MaxOf(GetBufferLength(type.value_type(), length_)));
return MaxOf(GetBufferLength(type.index_type(), length_));
Expand Down
Loading

0 comments on commit 0477e9d

Please sign in to comment.