-
Notifications
You must be signed in to change notification settings - Fork 1.2k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Add transform_keys and transform_values Presto functions #2245
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,124 @@ | ||
/* | ||
* Copyright (c) Facebook, Inc. and its affiliates. | ||
* | ||
* Licensed under the Apache License, Version 2.0 (the "License"); | ||
* you may not use this file except in compliance with the License. | ||
* You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
#include "velox/expression/Expr.h" | ||
#include "velox/expression/VectorFunction.h" | ||
#include "velox/functions/lib/LambdaFunctionUtil.h" | ||
#include "velox/vector/FunctionVector.h" | ||
|
||
namespace facebook::velox::functions { | ||
namespace { | ||
|
||
// See documentation at https://prestodb.io/docs/current/functions/map.html | ||
class TransformKeysFunction : public exec::VectorFunction { | ||
public: | ||
bool isDefaultNullBehavior() const override { | ||
// transform_keys is null preserving for the map. But | ||
// since an expr tree with a lambda depends on all named fields, including | ||
// captures, a null in a capture does not automatically make a | ||
// null result. | ||
return false; | ||
} | ||
|
||
void apply( | ||
const SelectivityVector& rows, | ||
std::vector<VectorPtr>& args, | ||
const TypePtr& outputType, | ||
exec::EvalCtx* context, | ||
VectorPtr* result) const override { | ||
VELOX_CHECK_EQ(args.size(), 2); | ||
|
||
// Flatten input map. | ||
exec::LocalDecodedVector mapDecoder(context, *args[0], rows); | ||
auto& decodedMap = *mapDecoder.get(); | ||
|
||
auto flatMap = flattenMap(rows, args[0], decodedMap); | ||
|
||
std::vector<VectorPtr> lambdaArgs = { | ||
flatMap->mapKeys(), flatMap->mapValues()}; | ||
auto numKeys = flatMap->mapKeys()->size(); | ||
|
||
VectorPtr transformedKeys; | ||
|
||
// Loop over lambda functions and apply these to keys of the map. | ||
// In most cases there will be only one function and the loop will run once. | ||
auto it = args[1]->asUnchecked<FunctionVector>()->iterator(&rows); | ||
while (auto entry = it.next()) { | ||
auto keyRows = | ||
toElementRows<MapVector>(numKeys, *entry.rows, flatMap.get()); | ||
auto wrapCapture = toWrapCapture<MapVector>( | ||
numKeys, entry.callable, *entry.rows, flatMap); | ||
|
||
entry.callable->apply( | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nit: One question: across multiple lambdas , how do we ensure the type of keys remains same ? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is the job of the query planner. We assume the types are correct similar to different "then" and "else" branches of a switch statement. |
||
keyRows, wrapCapture, context, lambdaArgs, &transformedKeys); | ||
} | ||
|
||
auto localResult = std::make_shared<MapVector>( | ||
flatMap->pool(), | ||
outputType, | ||
flatMap->nulls(), | ||
flatMap->size(), | ||
flatMap->offsets(), | ||
flatMap->sizes(), | ||
transformedKeys, | ||
flatMap->mapValues()); | ||
|
||
checkDuplicateKeys(localResult, rows); | ||
|
||
context->moveOrCopyResult(localResult, rows, result); | ||
} | ||
|
||
static std::vector<std::shared_ptr<exec::FunctionSignature>> signatures() { | ||
// map(K1, V), function(K1, V) -> K2 -> map(K2, V) | ||
return {exec::FunctionSignatureBuilder() | ||
.typeVariable("K1") | ||
.typeVariable("K2") | ||
.typeVariable("V") | ||
.returnType("map(K2,V)") | ||
.argumentType("map(K1,V)") | ||
.argumentType("function(K1,V,K2)") | ||
.build()}; | ||
} | ||
|
||
private: | ||
void checkDuplicateKeys( | ||
const MapVectorPtr& mapVector, | ||
const SelectivityVector& rows) const { | ||
static const char* kDuplicateKey = "Duplicate map keys are not allowed"; | ||
|
||
MapVector::canonicalize(mapVector); | ||
|
||
auto offsets = mapVector->rawOffsets(); | ||
auto sizes = mapVector->rawSizes(); | ||
auto mapKeys = mapVector->mapKeys(); | ||
rows.applyToSelected([&](auto row) { | ||
auto offset = offsets[row]; | ||
auto size = sizes[row]; | ||
for (auto i = 1; i < size; i++) { | ||
if (mapKeys->equalValueAt(mapKeys.get(), offset + i, offset + i - 1)) { | ||
VELOX_USER_FAIL("{}", kDuplicateKey); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nit: Maybe also print out the value of the duplicate key? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. That's a good idea. Let me add this in a follow-up PR as it should be added to the map() function as well. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @kagamiori #2260 adds duplicate key to the message. |
||
} | ||
} | ||
}); | ||
} | ||
}; | ||
} // namespace | ||
|
||
VELOX_DECLARE_VECTOR_FUNCTION( | ||
udf_transform_keys, | ||
TransformKeysFunction::signatures(), | ||
std::make_unique<TransformKeysFunction>()); | ||
|
||
} // namespace facebook::velox::functions |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,99 @@ | ||
/* | ||
* Copyright (c) Facebook, Inc. and its affiliates. | ||
* | ||
* Licensed under the Apache License, Version 2.0 (the "License"); | ||
* you may not use this file except in compliance with the License. | ||
* You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
#include "velox/expression/Expr.h" | ||
#include "velox/expression/VectorFunction.h" | ||
#include "velox/functions/lib/LambdaFunctionUtil.h" | ||
#include "velox/vector/FunctionVector.h" | ||
|
||
namespace facebook::velox::functions { | ||
namespace { | ||
|
||
// See documentation at https://prestodb.io/docs/current/functions/map.html | ||
class TransformValuesFunction : public exec::VectorFunction { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nit: It seems values/key transforms can be templatized to one function, however this makes easier reading personally to me at expense of code duplication. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I was thinking about that and decided not to use templates for readability. |
||
public: | ||
bool isDefaultNullBehavior() const override { | ||
// transform_values is null preserving for the map. But | ||
// since an expr tree with a lambda depends on all named fields, including | ||
// captures, a null in a capture does not automatically make a | ||
// null result. | ||
return false; | ||
} | ||
|
||
void apply( | ||
const SelectivityVector& rows, | ||
std::vector<VectorPtr>& args, | ||
const TypePtr& outputType, | ||
exec::EvalCtx* context, | ||
VectorPtr* result) const override { | ||
VELOX_CHECK_EQ(args.size(), 2); | ||
|
||
// Flatten input map. | ||
exec::LocalDecodedVector mapDecoder(context, *args[0], rows); | ||
auto& decodedMap = *mapDecoder.get(); | ||
|
||
auto flatMap = flattenMap(rows, args[0], decodedMap); | ||
|
||
std::vector<VectorPtr> lambdaArgs = { | ||
flatMap->mapKeys(), flatMap->mapValues()}; | ||
auto numValues = flatMap->mapValues()->size(); | ||
|
||
VectorPtr transformedValues; | ||
|
||
// Loop over lambda functions and apply these to values of the map. | ||
// In most cases there will be only one function and the loop will run once. | ||
auto it = args[1]->asUnchecked<FunctionVector>()->iterator(&rows); | ||
while (auto entry = it.next()) { | ||
auto valueRows = | ||
toElementRows<MapVector>(numValues, *entry.rows, flatMap.get()); | ||
auto wrapCapture = toWrapCapture<MapVector>( | ||
numValues, entry.callable, *entry.rows, flatMap); | ||
|
||
entry.callable->apply( | ||
valueRows, wrapCapture, context, lambdaArgs, &transformedValues); | ||
} | ||
|
||
auto localResult = std::make_shared<MapVector>( | ||
flatMap->pool(), | ||
outputType, | ||
flatMap->nulls(), | ||
flatMap->size(), | ||
flatMap->offsets(), | ||
flatMap->sizes(), | ||
flatMap->mapKeys(), | ||
transformedValues); | ||
context->moveOrCopyResult(localResult, rows, result); | ||
} | ||
|
||
static std::vector<std::shared_ptr<exec::FunctionSignature>> signatures() { | ||
// map(K, V1), function(K, V1) -> V2 -> map(K, V2) | ||
return {exec::FunctionSignatureBuilder() | ||
.typeVariable("K") | ||
.typeVariable("V1") | ||
.typeVariable("V2") | ||
.returnType("map(K,V2)") | ||
.argumentType("map(K,V1)") | ||
.argumentType("function(K,V1,V2)") | ||
.build()}; | ||
} | ||
}; | ||
} // namespace | ||
|
||
VELOX_DECLARE_VECTOR_FUNCTION( | ||
udf_transform_values, | ||
TransformValuesFunction::signatures(), | ||
std::make_unique<TransformValuesFunction>()); | ||
|
||
} // namespace facebook::velox::functions |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
do keyRows change over lambdas ? I would presume typically entry.rows would be similar .
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
In a general case, there is only one lambda function and entry.rows == rows. However, in case when there are multiple lambdas, each lambda applies to a unique subset of rows, hence, keyRows are non-overlapping between lambdas.