forked from facebookincubator/velox
-
Notifications
You must be signed in to change notification settings - Fork 6
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
0baf13f
commit bec6db1
Showing
9 changed files
with
479 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,206 @@ | ||
/* | ||
* Copyright (c) Facebook, Inc. and its affiliates. | ||
* | ||
* Licensed under the Apache License, Version 2.0 (the "License"); | ||
* you may not use this file except in compliance with the License. | ||
* You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
|
||
#include <folly/container/F14Set.h> | ||
|
||
#include "velox/expression/EvalCtx.h" | ||
#include "velox/expression/Expr.h" | ||
#include "velox/expression/VectorFunction.h" | ||
#include "velox/functions/lib/LambdaFunctionUtil.h" | ||
#include "velox/functions/prestosql/CheckedArithmetic.h" | ||
|
||
namespace facebook::velox::functions { | ||
namespace { | ||
/// | ||
/// Implements the array_sum function. | ||
/// See documentation at https://prestodb.io/docs/current/functions/array.html | ||
/// | ||
template <typename TInput, typename TOutput> | ||
class ArraySumFunction : public exec::VectorFunction { | ||
public: | ||
template <bool mayHaveNulls> | ||
void applyFlat( | ||
const SelectivityVector& rows, | ||
ArrayVector* arrayVector, | ||
const uint64_t* rawNulls, | ||
const TInput* rawElements, | ||
FlatVector<TOutput>* resultValues) const { | ||
rows.template applyToSelected([&](vector_size_t row) { | ||
auto start = arrayVector->offsetAt(row); | ||
auto end = start + arrayVector->sizeAt(row); | ||
TOutput sum = 0; | ||
for (; start < end; start++) { | ||
if constexpr (mayHaveNulls) { | ||
bool isNull = bits::isBitNull(rawNulls, start); | ||
if (!isNull) { | ||
if constexpr (std::is_same<TOutput, int64_t>::value) { | ||
sum = checkedPlus<TOutput>(sum, rawElements[start]); | ||
} else { | ||
sum += rawElements[start]; | ||
} | ||
} | ||
} else { | ||
if constexpr (std::is_same<TOutput, int64_t>::value) { | ||
sum = checkedPlus<TOutput>(sum, rawElements[start]); | ||
} else { | ||
sum += rawElements[start]; | ||
} | ||
} | ||
} | ||
resultValues->set(row, sum); | ||
}); | ||
} | ||
|
||
template <bool mayHaveNulls> | ||
void applyNonFlat( | ||
const SelectivityVector& rows, | ||
ArrayVector* arrayVector, | ||
exec::LocalDecodedVector& elements, | ||
FlatVector<TOutput>* resultValues) const { | ||
rows.template applyToSelected([&](vector_size_t row) { | ||
auto start = arrayVector->offsetAt(row); | ||
auto end = start + arrayVector->sizeAt(row); | ||
TOutput sum = 0; | ||
for (; start < end; start++) { | ||
if constexpr (mayHaveNulls) { | ||
if (!elements->isNullAt(start)) { | ||
if constexpr (std::is_same<TOutput, int64_t>::value) { | ||
sum = checkedPlus<TOutput>( | ||
sum, elements->template valueAt<TInput>(start)); | ||
} else { | ||
sum += elements->template valueAt<TInput>(start); | ||
} | ||
} | ||
} else { | ||
if constexpr (std::is_same<TOutput, int64_t>::value) { | ||
sum = checkedPlus<TOutput>( | ||
sum, elements->template valueAt<TInput>(start)); | ||
} else { | ||
sum += elements->template valueAt<TInput>(start); | ||
} | ||
} | ||
} | ||
resultValues->set(row, sum); | ||
}); | ||
} | ||
|
||
void apply( | ||
const SelectivityVector& rows, | ||
std::vector<VectorPtr>& args, // Not using const ref so we can reuse args | ||
const TypePtr& outputType, | ||
exec::EvalCtx* context, | ||
VectorPtr* result) const override { | ||
// Prepare result vector for writing | ||
BaseVector::ensureWritable(rows, outputType, context->pool(), result); | ||
auto resultValues = (*result)->template asFlatVector<TOutput>(); | ||
|
||
// Acquire the array elements vector. | ||
auto arrayVector = args[0]->as<ArrayVector>(); | ||
VELOX_CHECK(arrayVector); | ||
auto elementsVector = arrayVector->elements(); | ||
|
||
if (elementsVector->encoding() == VectorEncoding::Simple::FLAT) { | ||
const TInput* __restrict rawElements = | ||
elementsVector->as<FlatVector<TInput>>()->rawValues(); | ||
const uint64_t* __restrict rawNulls = elementsVector->rawNulls(); | ||
|
||
if (elementsVector->mayHaveNulls()) { | ||
applyFlat<true>(rows, arrayVector, rawNulls, rawElements, resultValues); | ||
} else { | ||
applyFlat<false>( | ||
rows, arrayVector, rawNulls, rawElements, resultValues); | ||
} | ||
} else { | ||
SelectivityVector elementsRows(elementsVector->size()); | ||
exec::LocalDecodedVector elements(context, *elementsVector, elementsRows); | ||
|
||
if (elementsVector->mayHaveNulls()) { | ||
applyNonFlat<true>(rows, arrayVector, elements, resultValues); | ||
} else { | ||
applyNonFlat<false>(rows, arrayVector, elements, resultValues); | ||
} | ||
} | ||
} | ||
}; | ||
|
||
// Create function. | ||
std::shared_ptr<exec::VectorFunction> create( | ||
const std::string& /* name */, | ||
const std::vector<exec::VectorFunctionArg>& inputArgs) { | ||
auto elementType = inputArgs.front().type->childAt(0); | ||
|
||
switch (elementType->kind()) { | ||
case TypeKind::TINYINT: { | ||
return std::make_shared<ArraySumFunction< | ||
TypeTraits<TypeKind::TINYINT>::NativeType, | ||
int64_t>>(); | ||
} | ||
case TypeKind::SMALLINT: { | ||
return std::make_shared<ArraySumFunction< | ||
TypeTraits<TypeKind::SMALLINT>::NativeType, | ||
int64_t>>(); | ||
} | ||
case TypeKind::INTEGER: { | ||
return std::make_shared<ArraySumFunction< | ||
TypeTraits<TypeKind::INTEGER>::NativeType, | ||
int64_t>>(); | ||
} | ||
case TypeKind::BIGINT: { | ||
return std::make_shared<ArraySumFunction< | ||
TypeTraits<TypeKind::BIGINT>::NativeType, | ||
int64_t>>(); | ||
} | ||
case TypeKind::REAL: { | ||
return std::make_shared< | ||
ArraySumFunction<TypeTraits<TypeKind::REAL>::NativeType, double>>(); | ||
} | ||
case TypeKind::DOUBLE: { | ||
return std::make_shared< | ||
ArraySumFunction<TypeTraits<TypeKind::DOUBLE>::NativeType, double>>(); | ||
} | ||
default: { | ||
VELOX_FAIL("Unsupported Type") | ||
} | ||
} | ||
} | ||
|
||
// Define function signature. | ||
// array(T1) -> T2 where T1 must be coercible to bigint or double, and | ||
// T2 is bigint or double | ||
std::vector<std::shared_ptr<exec::FunctionSignature>> signatures() { | ||
static const std::map<std::string, std::string> s = { | ||
{"tinyint", "bigint"}, | ||
{"smallint", "bigint"}, | ||
{"integer", "bigint"}, | ||
{"bigint", "bigint"}, | ||
{"real", "double"}, | ||
{"double", "double"}}; | ||
std::vector<std::shared_ptr<exec::FunctionSignature>> signatures; | ||
signatures.reserve(s.size()); | ||
for (const auto& [argType, returnType] : s) { | ||
signatures.emplace_back(exec::FunctionSignatureBuilder() | ||
.returnType(returnType) | ||
.argumentType(fmt::format("array({})", argType)) | ||
.build()); | ||
} | ||
return signatures; | ||
} | ||
} // namespace | ||
|
||
// Register function. | ||
VELOX_DECLARE_STATEFUL_VECTOR_FUNCTION(udf_array_sum, signatures(), create); | ||
|
||
} // namespace facebook::velox::functions |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
104 changes: 104 additions & 0 deletions
104
velox/functions/prestosql/benchmarks/ArraySumBenchmark.cpp
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,104 @@ | ||
/* | ||
* Copyright (c) Facebook, Inc. and its affiliates. | ||
* | ||
* Licensed under the Apache License, Version 2.0 (the "License"); | ||
* you may not use this file except in compliance with the License. | ||
* You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
#include <folly/Benchmark.h> | ||
#include "velox/expression/VectorFunction.h" | ||
#include "velox/functions/Macros.h" | ||
#include "velox/functions/lib/LambdaFunctionUtil.h" | ||
#include "velox/functions/lib/benchmarks/FunctionBenchmarkBase.h" | ||
#include "velox/functions/prestosql/registration/RegistrationFunctions.h" | ||
|
||
using namespace facebook::velox; | ||
using namespace facebook::velox::exec; | ||
using namespace facebook::velox::functions; | ||
|
||
namespace { | ||
|
||
class ArraySumBenchmark : public functions::test::FunctionBenchmarkBase { | ||
public: | ||
ArraySumBenchmark() : FunctionBenchmarkBase() { | ||
functions::prestosql::registerArrayFunctions(); | ||
functions::prestosql::registerGeneralFunctions(); | ||
} | ||
|
||
void runInteger(const std::string& functionName) { | ||
folly::BenchmarkSuspender suspender; | ||
vector_size_t size = 10'000; | ||
auto arrayVector = vectorMaker_.arrayVector<int32_t>( | ||
size, | ||
[](auto row) { return row % 5; }, | ||
[](auto row) { return row % 23; }); | ||
|
||
auto rowVector = vectorMaker_.rowVector({arrayVector}); | ||
auto exprSet = compileExpression( | ||
fmt::format("{}(c0)", functionName), rowVector->type()); | ||
suspender.dismiss(); | ||
|
||
doRun(exprSet, rowVector); | ||
} | ||
|
||
void runIntegerNulls(const std::string& functionName) { | ||
folly::BenchmarkSuspender suspender; | ||
vector_size_t size = 10'000; | ||
auto arrayVector = vectorMaker_.arrayVector<int32_t>( | ||
size, | ||
[](auto row) { return row % 5; }, | ||
[](auto row) { return row % 23; }, | ||
[](auto row) { return (row % 513) == 0; }, | ||
[](auto row) { return (row % 13) == 0; }); | ||
|
||
auto rowVector = vectorMaker_.rowVector({arrayVector}); | ||
auto exprSet = compileExpression( | ||
fmt::format("{}(c0)", functionName), rowVector->type()); | ||
suspender.dismiss(); | ||
|
||
doRun(exprSet, rowVector); | ||
} | ||
|
||
void doRun(ExprSet& exprSet, const RowVectorPtr& rowVector) { | ||
int cnt = 0; | ||
for (auto i = 0; i < 100; i++) { | ||
cnt += evaluate(exprSet, rowVector)->size(); | ||
} | ||
folly::doNotOptimizeAway(cnt); | ||
} | ||
}; | ||
|
||
BENCHMARK(SimpleFunction) { | ||
ArraySumBenchmark benchmark; | ||
benchmark.runInteger("array_sum_alt"); | ||
} | ||
|
||
BENCHMARK_RELATIVE(VectorFunction) { | ||
ArraySumBenchmark benchmark; | ||
benchmark.runInteger("array_sum"); | ||
} | ||
|
||
BENCHMARK(SimpleFunctionNulls) { | ||
ArraySumBenchmark benchmark; | ||
benchmark.runIntegerNulls("array_sum_alt"); | ||
} | ||
|
||
BENCHMARK_RELATIVE(VectorFunctionNulls) { | ||
ArraySumBenchmark benchmark; | ||
benchmark.runIntegerNulls("array_sum"); | ||
} | ||
|
||
} // namespace | ||
|
||
int main(int /*argc*/, char** /*argv*/) { | ||
folly::runBenchmarks(); | ||
return 0; | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.