Skip to content

Commit

Permalink
Check determinism using function name only
Browse files Browse the repository at this point in the history
  • Loading branch information
rui-mo committed Jun 18, 2024
1 parent b37ceb4 commit c2cea96
Show file tree
Hide file tree
Showing 4 changed files with 86 additions and 63 deletions.
10 changes: 10 additions & 0 deletions velox/expression/VectorFunction.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,16 @@ std::optional<std::vector<FunctionSignaturePtr>> getVectorFunctionSignatures(
});
}

std::optional<
std::pair<VectorFunctionMetadata, std::vector<FunctionSignaturePtr>>>
getVectorFunctionSignaturesAndMetadata(const std::string& name) {
return applyToVectorFunctionEntry<
std::pair<VectorFunctionMetadata, std::vector<FunctionSignaturePtr>>>(
name, [&](const auto& /*name*/, const auto& entry) {
return std::make_pair(entry.metadata, entry.signatures);
});
}

TypePtr resolveVectorFunction(
const std::string& functionName,
const std::vector<TypePtr>& argTypes) {
Expand Down
7 changes: 7 additions & 0 deletions velox/expression/VectorFunction.h
Original file line number Diff line number Diff line change
Expand Up @@ -175,6 +175,13 @@ class SimpleFunctionAdapterFactory {
std::optional<std::vector<FunctionSignaturePtr>> getVectorFunctionSignatures(
const std::string& name);

/// Returns a pair of function metadata and a list of signatures supported by
/// VectorFunction with the specified name. Returns std::nullopt if there is no
/// function with the specified name.
std::optional<
std::pair<VectorFunctionMetadata, std::vector<FunctionSignaturePtr>>>
getVectorFunctionSignaturesAndMetadata(const std::string& name);

/// Given name of vector function and argument types, returns
/// the return type if function exists and have a signature that binds to the
/// input types otherwise returns nullptr.
Expand Down
124 changes: 61 additions & 63 deletions velox/expression/fuzzer/ExpressionFuzzer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -341,32 +341,6 @@ static void appendSpecialForms(
}
}

/// Returns if `functionName` with the given `argTypes` is deterministic.
/// Returns true if the function was not found or determinism cannot be
/// established.
bool isDeterministic(
const std::string& functionName,
const std::vector<TypePtr>& argTypes) {
// We know that the 'cast', 'and', and 'or' special forms are deterministic.
// Hard-code them here because they are not real functions and hence cannot
// be resolved by the code below.
if (functionName == "and" || functionName == "or" ||
functionName == "coalesce" || functionName == "if" ||
functionName == "switch" || functionName == "cast") {
return true;
}

if (auto typeAndMetadata =
resolveFunctionWithMetadata(functionName, argTypes)) {
return typeAndMetadata->second.deterministic;
}

// functionName must be a special form.
LOG(WARNING) << "Unable to determine if '" << functionName
<< "' is deterministic or not. Assuming it is.";
return true;
}

std::optional<CallableSignature> processConcreteSignature(
const std::string& functionName,
const std::vector<TypePtr>& argTypes,
Expand Down Expand Up @@ -522,6 +496,39 @@ uint32_t levelOfNesting(const TypePtr& type) {

} // namespace

bool detail::isDeterministic(const std::string& functionName) {
// We know that the 'cast', 'and', and 'or' special forms are deterministic.
// Hard-code them here because they are not real functions and hence cannot
// be resolved by the code below.
if (functionName == "and" || functionName == "or" ||
functionName == "coalesce" || functionName == "if" ||
functionName == "switch" || functionName == "cast") {
return true;
}

const auto simpleFunctions =
exec::simpleFunctions().getFunctionSignaturesAndMetadata(functionName);
const auto vectorFunction =
exec::getVectorFunctionSignaturesAndMetadata(functionName);
if (simpleFunctions.empty() && !vectorFunction.has_value()) {
// functionName must be a special form.
LOG(WARNING) << "Unable to determine if '" << functionName
<< "' is deterministic or not. Assuming it is.";
return true;
}

for (const auto& [metadata, _] : simpleFunctions) {
if (!metadata.deterministic) {
return false;
}
}
if (vectorFunction.has_value() &&
!vectorFunction.value().first.deterministic) {
return false;
}
return true;
}

ExpressionFuzzer::ExpressionFuzzer(
FunctionSignatureMap signatureMap,
size_t initialSeed,
Expand Down Expand Up @@ -567,13 +574,26 @@ ExpressionFuzzer::ExpressionFuzzer(
continue;
}

// Determine a list of concrete argument types that can bind to the
// signature. For non-parameterized signatures, these argument types will
// be used to create a callable signature. For parameterized signatures,
// these argument types are only used to fetch the function instance to
// get their determinism.
std::vector<TypePtr> argTypes;
if (signature->variables().empty()) {
if (!detail::isDeterministic(function.first)) {
LOG(WARNING) << "Skipping non-deterministic function: "
<< function.first << signature->toString();
continue;
}

if (!signature->variables().empty()) {
std::unordered_set<std::string> typeVariables;
for (const auto& [name, _] : signature->variables()) {
typeVariables.insert(name);
}
atLeastOneSupported = true;
++supportedFunctionSignatures;
signatureTemplates_.emplace_back(SignatureTemplate{
function.first, signature, std::move(typeVariables)});
} else {
// Determine a list of concrete argument types that can bind to the
// signature. For non-parameterized signatures, these argument types
// will be used to create a callable signature.
std::vector<TypePtr> argTypes;
bool supportedSignature = true;
for (const auto& arg : signature->argumentTypes()) {
auto resolvedType = SignatureBinder::tryResolveType(arg, {}, {});
Expand All @@ -589,37 +609,15 @@ ExpressionFuzzer::ExpressionFuzzer(
<< function.first << signature->toString();
continue;
}
} else {
ArgumentTypeFuzzer typeFuzzer{*signature, localRng};
typeFuzzer.fuzzReturnType();
VELOX_CHECK_EQ(
typeFuzzer.fuzzArgumentTypes(options_.maxNumVarArgs), true);
argTypes = typeFuzzer.argumentTypes();
}
if (!isDeterministic(function.first, argTypes)) {
LOG(WARNING) << "Skipping non-deterministic function: "
<< function.first << signature->toString();
continue;
}

if (!signature->variables().empty()) {
std::unordered_set<std::string> typeVariables;
for (const auto& [name, _] : signature->variables()) {
typeVariables.insert(name);
if (auto callableFunction = processConcreteSignature(
function.first,
argTypes,
*signature,
options_.enableComplexTypes)) {
atLeastOneSupported = true;
++supportedFunctionSignatures;
signatures_.emplace_back(*callableFunction);
}
atLeastOneSupported = true;
++supportedFunctionSignatures;
signatureTemplates_.emplace_back(SignatureTemplate{
function.first, signature, std::move(typeVariables)});
} else if (
auto callableFunction = processConcreteSignature(
function.first,
argTypes,
*signature,
options_.enableComplexTypes)) {
atLeastOneSupported = true;
++supportedFunctionSignatures;
signatures_.emplace_back(*callableFunction);
}
}

Expand Down
8 changes: 8 additions & 0 deletions velox/expression/fuzzer/ExpressionFuzzer.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,14 @@
#include "velox/vector/tests/utils/VectorMaker.h"

namespace facebook::velox::fuzzer {
namespace detail {

// Returns if a function is deterministic by fetching all registry entries for
// the given function name and checking if all of them are deterministic.
// Returns true if the function was not found. Returns false if any of the
// entries are not deterministic.
bool isDeterministic(const std::string& functionName);
} // namespace detail

// A tool that can be used to generate random expressions.
class ExpressionFuzzer {
Expand Down

0 comments on commit c2cea96

Please sign in to comment.