Skip to content

Commit

Permalink
Fix bug in PrestoVectorSerializers when serializing empty Vectors wit…
Browse files Browse the repository at this point in the history
…h complex types (#9157)

Summary:
Pull Request resolved: #9157

PrestoIterativeVectorSerializer currently only supports serializing empty Vectors/selections when the columns are flat
primitives.  If the columns are complex attempting to serialize them will trigger a SIGSEGV.

PrestoBatchVectorSerializer doesn't support serializing empty Vectors/selections at all.

The fixes VectorStream to initialize itself and its children correctly when the Vector is empty.  It also updates
PrestoBatchVectorSerializer to only call serialize on the children if the Vector is non-empty.  With these two changes both
serializers support any column types in empty Vectors.

Reviewed By: aozturk

Differential Revision: D55090408

fbshipit-source-id: 720bba19c265f0e1ee7dc55245e344aa4cda29fa
  • Loading branch information
Kevin Wilfong authored and facebook-github-bot committed Mar 20, 2024
1 parent 29d0f31 commit 1d95a47
Show file tree
Hide file tree
Showing 2 changed files with 134 additions and 2 deletions.
22 changes: 21 additions & 1 deletion velox/serializers/PrestoSerializer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1371,6 +1371,24 @@ class VectorStream {
values_(streamArena) {
if (initialNumRows == 0) {
initializeHeader(typeToEncodingName(type), *streamArena);
if (type_->size() > 0) {
hasLengths_ = true;
children_.resize(type_->size());
for (int32_t i = 0; i < type_->size(); ++i) {
children_[i] = std::make_unique<VectorStream>(
type_->childAt(i),
std::nullopt,
getChildAt(vector, i),
streamArena_,
initialNumRows,
opts_);
}

// The first element in the offsets in the wire format is always 0 for
// nested types.
lengths_.startWrite(sizeof(vector_size_t));
lengths_.appendOne<int32_t>(0);
}
return;
}

Expand Down Expand Up @@ -3632,7 +3650,9 @@ class PrestoBatchVectorSerializer : public BatchVectorSerializer {
numRows,
opts_);

serializeColumn(vector->childAt(i), ranges, streams[i].get(), scratch);
if (numRows > 0) {
serializeColumn(vector->childAt(i), ranges, streams[i].get(), scratch);
}
}

flushStreams(
Expand Down
114 changes: 113 additions & 1 deletion velox/serializers/tests/PrestoSerializerTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -735,6 +735,25 @@ class PrestoSerializerTest
alphabetSize / factor);
}

RowVectorPtr makeEmptyTestVector() {
return makeRowVector(
{"a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l"},
{makeFlatVector<bool>({}),
makeFlatVector<int8_t>({}),
makeFlatVector<int16_t>({}),
makeFlatVector<int32_t>({}),
makeFlatVector<int64_t>({}),
makeFlatVector<float>({}),
makeFlatVector<double>({}),
makeFlatVector<StringView>({}),
makeFlatVector<Timestamp>({}),
makeRowVector(
{"1", "2"},
{makeFlatVector<StringView>({}), makeFlatVector<int32_t>({})}),
makeArrayVector<int32_t>({}),
makeMapVector<StringView, int32_t>({})});
}

std::unique_ptr<serializer::presto::PrestoVectorSerde> serde_;
folly::Random::DefaultGenerator rng_;
};
Expand Down Expand Up @@ -771,7 +790,7 @@ TEST_P(PrestoSerializerTest, dictionaryWithExtraNulls) {
}

TEST_P(PrestoSerializerTest, emptyPage) {
auto rowVector = makeRowVector(ROW({"a"}, {BIGINT()}), 0);
auto rowVector = makeEmptyTestVector();

std::ostringstream out;
serialize(rowVector, &out, nullptr);
Expand All @@ -781,6 +800,50 @@ TEST_P(PrestoSerializerTest, emptyPage) {
assertEqualVectors(deserialized, rowVector);
}

TEST_P(PrestoSerializerTest, serializeNoRowsSelected) {
std::ostringstream out;
facebook::velox::serializer::presto::PrestoOutputStreamListener listener;
OStreamOutputStream output(&out, &listener);
auto paramOptions = getParamSerdeOptions(nullptr);
auto arena = std::make_unique<StreamArena>(pool_.get());
auto rowVector = makeRowVector(
{"a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l"},
{makeFlatVector<bool>({true, false, true, false}),
makeFlatVector<int8_t>({1, 2, 3, 4}),
makeFlatVector<int16_t>({5, 6, 7, 8}),
makeFlatVector<int32_t>({9, 10, 11, 12}),
makeFlatVector<int64_t>({13, 14, 15, 16}),
makeFlatVector<float>({17.0, 18.0, 19.0, 20.0}),
makeFlatVector<double>({21.0, 22.0, 23.0, 24.0}),
makeFlatVector<StringView>({"25", "26", "27", "28"}),
makeFlatVector<Timestamp>({{29, 30}, {32, 32}, {33, 34}, {35, 36}}),
makeRowVector(
{"1", "2"},
{makeFlatVector<StringView>({"37", "38", "39", "40"}),
makeFlatVector<int32_t>({41, 42, 43, 44})}),
makeArrayVector<int32_t>({{45, 46}, {47, 48}, {49, 50}, {51, 52}}),
makeMapVector<StringView, int32_t>(
{{{"53", 54}, {"55", 56}},
{{"57", 58}, {"59", 60}},
{{"61", 62}, {"63", 64}},
{{"65", 66}, {"67", 68}}})});
const IndexRange noRows{0, 0};

auto serializer = serde_->createIterativeSerializer(
asRowType(rowVector->type()),
rowVector->size(),
arena.get(),
&paramOptions);

serializer->append(rowVector, folly::Range(&noRows, 1));
serializer->flush(&output);

auto expected = makeEmptyTestVector();
auto rowType = asRowType(expected->type());
auto deserialized = deserialize(rowType, out.str(), nullptr);
assertEqualVectors(deserialized, expected);
}

TEST_P(PrestoSerializerTest, emptyArray) {
auto arrayVector = makeArrayVector<int32_t>(
1'000,
Expand Down Expand Up @@ -1074,6 +1137,55 @@ TEST_P(PrestoSerializerTest, dictionaryEncodingTurnedOff) {
ASSERT_EQ(deserialized->childAt(9)->encoding(), VectorEncoding::Simple::FLAT);
}

TEST_P(PrestoSerializerTest, emptyVectorBatchVectorSerializer) {
// Serialize an empty RowVector.
auto rowVector = makeEmptyTestVector();

std::ostringstream out;
serializeBatch(rowVector, &out, nullptr);

auto rowType = asRowType(rowVector->type());
auto deserialized = deserialize(rowType, out.str(), nullptr);
assertEqualVectors(deserialized, rowVector);
}

TEST_P(PrestoSerializerTest, serializeNoRowsSelectedBatchVectorSerializer) {
std::ostringstream out;
facebook::velox::serializer::presto::PrestoOutputStreamListener listener;
OStreamOutputStream output(&out, &listener);
auto paramOptions = getParamSerdeOptions(nullptr);
auto serializer = serde_->createBatchSerializer(pool_.get(), &paramOptions);
auto rowVector = makeRowVector(
{"a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l"},
{makeFlatVector<bool>({true, false, true, false}),
makeFlatVector<int8_t>({1, 2, 3, 4}),
makeFlatVector<int16_t>({5, 6, 7, 8}),
makeFlatVector<int32_t>({9, 10, 11, 12}),
makeFlatVector<int64_t>({13, 14, 15, 16}),
makeFlatVector<float>({17.0, 18.0, 19.0, 20.0}),
makeFlatVector<double>({21.0, 22.0, 23.0, 24.0}),
makeFlatVector<StringView>({"25", "26", "27", "28"}),
makeFlatVector<Timestamp>({{29, 30}, {32, 32}, {33, 34}, {35, 36}}),
makeRowVector(
{"1", "2"},
{makeFlatVector<StringView>({"37", "38", "39", "40"}),
makeFlatVector<int32_t>({41, 42, 43, 44})}),
makeArrayVector<int32_t>({{45, 46}, {47, 48}, {49, 50}, {51, 52}}),
makeMapVector<StringView, int32_t>(
{{{"53", 54}, {"55", 56}},
{{"57", 58}, {"59", 60}},
{{"61", 62}, {"63", 64}},
{{"65", 66}, {"67", 68}}})});
const IndexRange noRows{0, 0};

serializer->serialize(rowVector, folly::Range(&noRows, 1), &output);

auto expected = makeEmptyTestVector();
auto rowType = asRowType(expected->type());
auto deserialized = deserialize(rowType, out.str(), nullptr);
assertEqualVectors(deserialized, expected);
}

TEST_P(PrestoSerializerTest, lazy) {
constexpr int kSize = 1000;
auto rowVector = makeTestVector(kSize);
Expand Down

0 comments on commit 1d95a47

Please sign in to comment.