Skip to content

Commit

Permalink
Extend assertEqualResults to compare floating-point columns in comple…
Browse files Browse the repository at this point in the history
…x-type columns with epsilon (#10927)

Summary:
Pull Request resolved: #10927

This diff addresses the limitation in #4481.

One limitation is still left that floating-point in map keys are currently not compared with
epsilon. This is because fuzzers can generate floating-point keys that are very close to
each other (with in epsilon), making one key incorrectly match another during comparison.

Reviewed By: xiaoxmeng

Differential Revision: D62222471

fbshipit-source-id: 4bba6ff01c61dc6f9c1a719cc3543b37b1ab84e8
  • Loading branch information
kagamiori authored and facebook-github-bot committed Sep 14, 2024
1 parent 167dcf2 commit 079ed31
Show file tree
Hide file tree
Showing 3 changed files with 170 additions and 24 deletions.
75 changes: 75 additions & 0 deletions velox/exec/tests/QueryAssertionsTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -359,6 +359,81 @@ TEST_F(QueryAssertionsTest, multiFloatColumnWithNonUniqueKeys) {
"971 extra rows, 971 missing rows");
}

TEST_F(QueryAssertionsTest, complexTypesContainingFloat) {
auto size = 1000;
// Test array.
auto expected = makeRowVector(
{makeArrayVector<float>(
size,
[](auto /*row*/) { return 2; },
[](auto row) { return row % 2 + 0.01; },
[](auto row) { return row % 7 == 0; }),
makeFlatVector<int64_t>(size, [](auto row) { return row; })});
auto actual = makeRowVector(
{makeArrayVector<float>(
size,
[&](auto /*row*/) { return 2; },
[&](auto row) {
auto value = row % 2 + 0.01;
return value + value * FLT_EPSILON;
},
[&](auto row) { return (size - row - 1) % 7 == 0; }),
makeFlatVector<int64_t>(
size, [&](auto row) { return size - row - 1; })});
EXPECT_TRUE(assertEqualResults({expected}, {actual}));

// Test map and row.
expected = makeRowVector(
{makeMapVector<int64_t, float>(
size,
[](auto /*row*/) { return 2; },
[](auto row) { return row; },
[](auto row) { return row % 5 + 0.01; },
[](auto /*row*/) { return false; },
[](auto row) { return row % 6 == 0; }),
makeRowVector(
{makeFlatVector<float>(
size, [](auto row) { return row % 5 + 0.01; }),
makeFlatVector<int64_t>(size, [](auto row) { return row % 5; }),
makeArrayVector<float>(
size,
[](auto /*row*/) { return 2; },
[](auto row) { return row % 2 + 0.01; },
[](auto row) { return row % 7 == 0; })}),
makeFlatVector<int64_t>(size, [](auto row) { return row; })});
actual = makeRowVector(
{makeMapVector<int64_t, float>(
size,
[](auto /*row*/) { return 2; },
[&](auto row) { return (size * 2 - row - 1); },
[&](auto row) {
auto value = (size * 2 - row - 1) % 5 + 0.01;
return value + value * FLT_EPSILON;
},
[&](auto /*row*/) { return false; },
[&](auto row) { return (size * 2 - row - 1) % 6 == 0; }),
makeRowVector(
{makeFlatVector<float>(
size,
[&](auto row) {
auto value = (size - row - 1) % 5 + 0.01;
return value + value * FLT_EPSILON;
}),
makeFlatVector<int64_t>(
size, [&](auto row) { return (size - row - 1) % 5; }),
makeArrayVector<float>(
size,
[](auto /*row*/) { return 2; },
[&](auto row) {
auto value = row % 2 + 0.01;
return value + value * FLT_EPSILON;
},
[&](auto row) { return (size - row - 1) % 7 == 0; })}),
makeFlatVector<int64_t>(
size, [&](auto row) { return (size - row - 1); })});
EXPECT_TRUE(assertEqualResults({expected}, {actual}));
}

TEST_F(QueryAssertionsTest, nullDecimalValue) {
auto shortDecimal = makeRowVector(
{makeNullableFlatVector<int64_t>({std::nullopt}, DECIMAL(5, 2))});
Expand Down
63 changes: 42 additions & 21 deletions velox/exec/tests/utils/QueryAssertions.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1068,26 +1068,44 @@ void assertEqualTypeAndNumRows(
EXPECT_EQ(expectedNumRows, actualNumRows);
}

bool containsFloatingPoint(const TypePtr& type) {
if (type->isPrimitiveType()) {
return type->isReal() || type->isDouble();
} else if (type->isArray()) {
return containsFloatingPoint(type->as<TypeKind::ARRAY>().elementType());
} else if (type->isMap()) {
// We currently don't support comparing maps with floating-point keys with
// epsilon. This is because fuzzer can generate floating-point keys that are
// very close, causing one key incorrectly match another during the
// comparison.
return containsFloatingPoint(type->as<TypeKind::MAP>().valueType());
} else if (type->isRow()) {
for (auto& child : type->as<TypeKind::ROW>().children()) {
if (containsFloatingPoint(child)) {
return true;
}
}
}
return false;
}

/// Returns the number of floating-point columns and a list of columns indices
/// with floating-point columns placed at the end.
std::tuple<uint32_t, std::vector<velox::column_index_t>>
findFloatingPointColumns(const MaterializedRow& row) {
auto isFloatingPointColumn = [&](size_t i) {
return row[i].kind() == TypeKind::REAL || row[i].kind() == TypeKind::DOUBLE;
};

findFloatingPointColumns(const TypePtr& type) {
const auto rowType = asRowType(type);
uint32_t numFloatingPointColumns = 0;
std::vector<velox::column_index_t> indices;
for (auto i = 0; i < row.size(); ++i) {
if (isFloatingPointColumn(i)) {
for (auto i = 0; i < rowType->children().size(); ++i) {
if (containsFloatingPoint(rowType->childAt(i))) {
++numFloatingPointColumns;
} else {
indices.push_back(i);
}
}

for (auto i = 0; i < row.size(); ++i) {
if (isFloatingPointColumn(i)) {
for (auto i = 0; i < rowType->children().size(); ++i) {
if (containsFloatingPoint(rowType->childAt(i))) {
indices.push_back(i);
}
}
Expand All @@ -1107,8 +1125,8 @@ bool assertEqualResults(
const MaterializedRowMultiset& actualRows,
const TypePtr& actualType,
const std::string& message) {
const auto& type = (!expectedRows.empty()) ? expectedType : actualType;
if (expectedRows.empty() != actualRows.empty()) {
const auto& type = (!expectedRows.empty()) ? expectedType : actualType;
ADD_FAILURE() << generateUserFriendlyDiff(expectedRows, actualRows, type)
<< message;
return false;
Expand All @@ -1125,8 +1143,7 @@ bool assertEqualResults(
return false;
}

auto [numFloatingPointColumns, columns] =
findFloatingPointColumns(*expectedRows.begin());
auto [numFloatingPointColumns, columns] = findFloatingPointColumns(type);
if (numFloatingPointColumns) {
MaterializedRowEpsilonComparator comparator{
numFloatingPointColumns, columns};
Expand Down Expand Up @@ -1214,10 +1231,11 @@ using OrderedPartition = std::pair<MaterializedRow, MaterializedRowMultiset>;

// Special function to compare ordered partitions in a way that
// we compare all floating point values inside using 'epsilon' constant.
// Returns true if equal.
// Returns true if equal. valueType is the type of expected.second.
static bool compareOrderedPartitions(
const OrderedPartition& expected,
const OrderedPartition& actual) {
const OrderedPartition& actual,
const RowTypePtr& valueType) {
if (expected.first.size() != actual.first.size() or
expected.second.size() != actual.second.size()) {
return false;
Expand All @@ -1238,8 +1256,9 @@ static bool compareOrderedPartitions(
return false;
}

auto [numFloatingPointColumns, columns] =
findFloatingPointColumns(*expected.second.begin());
// valueType is needed by findFloatingPointColumns() to avoid having to infer
// the type from expected.second.
auto [numFloatingPointColumns, columns] = findFloatingPointColumns(valueType);
if (numFloatingPointColumns) {
MaterializedRowEpsilonComparator comparator{
numFloatingPointColumns, columns};
Expand All @@ -1255,16 +1274,17 @@ static bool compareOrderedPartitions(

// Special function to compare vectors of ordered partitions in a way that
// we compare all floating point values inside using 'epsilon' constant.
// Returns true if equal.
// Returns true if equal. valueType is the type of expected[i].second.
static bool compareOrderedPartitionsVectors(
const std::vector<OrderedPartition>& expected,
const std::vector<OrderedPartition>& actual) {
const std::vector<OrderedPartition>& actual,
const RowTypePtr& valueType) {
if (expected.size() != actual.size()) {
return false;
}

for (size_t i = 0; i < expected.size(); ++i) {
if (not compareOrderedPartitions(expected[i], actual[i])) {
if (not compareOrderedPartitions(expected[i], actual[i], valueType)) {
return false;
}
}
Expand Down Expand Up @@ -1302,12 +1322,13 @@ void assertResultsOrdered(
}

if (not compareOrderedPartitionsVectors(
expectedPartitions, actualPartitions)) {
expectedPartitions, actualPartitions, resultType)) {
auto actualPartIter = actualPartitions.begin();
auto expectedPartIter = expectedPartitions.begin();
while (expectedPartIter != expectedPartitions.end() &&
actualPartIter != actualPartitions.end()) {
if (not compareOrderedPartitions(*expectedPartIter, *actualPartIter)) {
if (not compareOrderedPartitions(
*expectedPartIter, *actualPartIter, resultType)) {
break;
}
++expectedPartIter;
Expand Down
56 changes: 53 additions & 3 deletions velox/type/Variant.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -858,20 +858,70 @@ bool variant::lessThanWithEpsilon(const variant& other) const {
return VELOX_DYNAMIC_TYPE_DISPATCH_ALL(lessThan, kind_, *this, other);
}

namespace {

// Compare variants of Array or Row type.
template <TypeKind KIND>
bool compareComplexTypeWithEpsilon(const variant& left, const variant& right) {
auto& leftContainer = left.value<KIND>();
auto& rightContainer = right.value<KIND>();
if (leftContainer.size() != rightContainer.size()) {
return false;
}
for (int32_t i = 0; i < leftContainer.size(); i++) {
if (!leftContainer[i].equalsWithEpsilon(rightContainer[i])) {
return false;
}
}
return true;
}

// Compare variants of Map type.
template <>
bool compareComplexTypeWithEpsilon<TypeKind::MAP>(
const variant& left,
const variant& right) {
auto& leftMap = left.value<TypeKind::MAP>();
auto& rightMap = right.value<TypeKind::MAP>();
if (leftMap.size() != rightMap.size()) {
return false;
}
for (auto it = leftMap.begin(); it != leftMap.end(); ++it) {
auto otherIt = rightMap.find(it->first);
if (otherIt == rightMap.end()) {
return false;
}
if (!it->second.equalsWithEpsilon(otherIt->second)) {
return false;
}
}
return true;
}
} // namespace

// Uses kEpsilon to compare floating point types (REAL and DOUBLE).
// For testing purposes.
bool variant::equalsWithEpsilon(const variant& other) const {
if (other.kind_ != this->kind_) {
return false;
}
if (other.isNull()) {
return this->isNull();
if (other.isNull() || this->isNull()) {
return other.isNull() && this->isNull();
}
if ((kind_ == TypeKind::REAL) or (kind_ == TypeKind::DOUBLE)) {
return equalsFloatingPointWithEpsilon(*this, other);
}

return VELOX_DYNAMIC_TYPE_DISPATCH_ALL(equals, kind_, *this, other);
switch (kind_) {
case TypeKind::ARRAY:
return compareComplexTypeWithEpsilon<TypeKind::ARRAY>(*this, other);
case TypeKind::MAP:
return compareComplexTypeWithEpsilon<TypeKind::MAP>(*this, other);
case TypeKind::ROW:
return compareComplexTypeWithEpsilon<TypeKind::ROW>(*this, other);
default:
return VELOX_DYNAMIC_TYPE_DISPATCH_ALL(equals, kind_, *this, other);
}
}

void variant::verifyArrayElements(const std::vector<variant>& inputs) {
Expand Down

0 comments on commit 079ed31

Please sign in to comment.