diff --git a/velox/serializers/PrestoSerializer.cpp b/velox/serializers/PrestoSerializer.cpp index 60f7049c2a2e..97b8cd52e100 100644 --- a/velox/serializers/PrestoSerializer.cpp +++ b/velox/serializers/PrestoSerializer.cpp @@ -1371,6 +1371,24 @@ class VectorStream { values_(streamArena) { if (initialNumRows == 0) { initializeHeader(typeToEncodingName(type), *streamArena); + if (type_->size() > 0) { + hasLengths_ = true; + children_.resize(type_->size()); + for (int32_t i = 0; i < type_->size(); ++i) { + children_[i] = std::make_unique( + type_->childAt(i), + std::nullopt, + getChildAt(vector, i), + streamArena_, + initialNumRows, + opts_); + } + + // The first element in the offsets in the wire format is always 0 for + // nested types. + lengths_.startWrite(sizeof(vector_size_t)); + lengths_.appendOne(0); + } return; } @@ -3632,7 +3650,9 @@ class PrestoBatchVectorSerializer : public BatchVectorSerializer { numRows, opts_); - serializeColumn(vector->childAt(i), ranges, streams[i].get(), scratch); + if (numRows > 0) { + serializeColumn(vector->childAt(i), ranges, streams[i].get(), scratch); + } } flushStreams( diff --git a/velox/serializers/tests/PrestoSerializerTest.cpp b/velox/serializers/tests/PrestoSerializerTest.cpp index b76cc2560d98..fedf8d33867b 100644 --- a/velox/serializers/tests/PrestoSerializerTest.cpp +++ b/velox/serializers/tests/PrestoSerializerTest.cpp @@ -735,6 +735,25 @@ class PrestoSerializerTest alphabetSize / factor); } + RowVectorPtr makeEmptyTestVector() { + return makeRowVector( + {"a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l"}, + {makeFlatVector({}), + makeFlatVector({}), + makeFlatVector({}), + makeFlatVector({}), + makeFlatVector({}), + makeFlatVector({}), + makeFlatVector({}), + makeFlatVector({}), + makeFlatVector({}), + makeRowVector( + {"1", "2"}, + {makeFlatVector({}), makeFlatVector({})}), + makeArrayVector({}), + makeMapVector({})}); + } + std::unique_ptr serde_; folly::Random::DefaultGenerator rng_; }; @@ -771,7 +790,7 @@ TEST_P(PrestoSerializerTest, dictionaryWithExtraNulls) { } TEST_P(PrestoSerializerTest, emptyPage) { - auto rowVector = makeRowVector(ROW({"a"}, {BIGINT()}), 0); + auto rowVector = makeEmptyTestVector(); std::ostringstream out; serialize(rowVector, &out, nullptr); @@ -781,6 +800,50 @@ TEST_P(PrestoSerializerTest, emptyPage) { assertEqualVectors(deserialized, rowVector); } +TEST_P(PrestoSerializerTest, serializeNoRowsSelected) { + std::ostringstream out; + facebook::velox::serializer::presto::PrestoOutputStreamListener listener; + OStreamOutputStream output(&out, &listener); + auto paramOptions = getParamSerdeOptions(nullptr); + auto arena = std::make_unique(pool_.get()); + auto rowVector = makeRowVector( + {"a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l"}, + {makeFlatVector({true, false, true, false}), + makeFlatVector({1, 2, 3, 4}), + makeFlatVector({5, 6, 7, 8}), + makeFlatVector({9, 10, 11, 12}), + makeFlatVector({13, 14, 15, 16}), + makeFlatVector({17.0, 18.0, 19.0, 20.0}), + makeFlatVector({21.0, 22.0, 23.0, 24.0}), + makeFlatVector({"25", "26", "27", "28"}), + makeFlatVector({{29, 30}, {32, 32}, {33, 34}, {35, 36}}), + makeRowVector( + {"1", "2"}, + {makeFlatVector({"37", "38", "39", "40"}), + makeFlatVector({41, 42, 43, 44})}), + makeArrayVector({{45, 46}, {47, 48}, {49, 50}, {51, 52}}), + makeMapVector( + {{{"53", 54}, {"55", 56}}, + {{"57", 58}, {"59", 60}}, + {{"61", 62}, {"63", 64}}, + {{"65", 66}, {"67", 68}}})}); + const IndexRange noRows{0, 0}; + + auto serializer = serde_->createIterativeSerializer( + asRowType(rowVector->type()), + rowVector->size(), + arena.get(), + ¶mOptions); + + serializer->append(rowVector, folly::Range(&noRows, 1)); + serializer->flush(&output); + + auto expected = makeEmptyTestVector(); + auto rowType = asRowType(expected->type()); + auto deserialized = deserialize(rowType, out.str(), nullptr); + assertEqualVectors(deserialized, expected); +} + TEST_P(PrestoSerializerTest, emptyArray) { auto arrayVector = makeArrayVector( 1'000, @@ -1074,6 +1137,55 @@ TEST_P(PrestoSerializerTest, dictionaryEncodingTurnedOff) { ASSERT_EQ(deserialized->childAt(9)->encoding(), VectorEncoding::Simple::FLAT); } +TEST_P(PrestoSerializerTest, emptyVectorBatchVectorSerializer) { + // Serialize an empty RowVector. + auto rowVector = makeEmptyTestVector(); + + std::ostringstream out; + serializeBatch(rowVector, &out, nullptr); + + auto rowType = asRowType(rowVector->type()); + auto deserialized = deserialize(rowType, out.str(), nullptr); + assertEqualVectors(deserialized, rowVector); +} + +TEST_P(PrestoSerializerTest, serializeNoRowsSelectedBatchVectorSerializer) { + std::ostringstream out; + facebook::velox::serializer::presto::PrestoOutputStreamListener listener; + OStreamOutputStream output(&out, &listener); + auto paramOptions = getParamSerdeOptions(nullptr); + auto serializer = serde_->createBatchSerializer(pool_.get(), ¶mOptions); + auto rowVector = makeRowVector( + {"a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l"}, + {makeFlatVector({true, false, true, false}), + makeFlatVector({1, 2, 3, 4}), + makeFlatVector({5, 6, 7, 8}), + makeFlatVector({9, 10, 11, 12}), + makeFlatVector({13, 14, 15, 16}), + makeFlatVector({17.0, 18.0, 19.0, 20.0}), + makeFlatVector({21.0, 22.0, 23.0, 24.0}), + makeFlatVector({"25", "26", "27", "28"}), + makeFlatVector({{29, 30}, {32, 32}, {33, 34}, {35, 36}}), + makeRowVector( + {"1", "2"}, + {makeFlatVector({"37", "38", "39", "40"}), + makeFlatVector({41, 42, 43, 44})}), + makeArrayVector({{45, 46}, {47, 48}, {49, 50}, {51, 52}}), + makeMapVector( + {{{"53", 54}, {"55", 56}}, + {{"57", 58}, {"59", 60}}, + {{"61", 62}, {"63", 64}}, + {{"65", 66}, {"67", 68}}})}); + const IndexRange noRows{0, 0}; + + serializer->serialize(rowVector, folly::Range(&noRows, 1), &output); + + auto expected = makeEmptyTestVector(); + auto rowType = asRowType(expected->type()); + auto deserialized = deserialize(rowType, out.str(), nullptr); + assertEqualVectors(deserialized, expected); +} + TEST_P(PrestoSerializerTest, lazy) { constexpr int kSize = 1000; auto rowVector = makeTestVector(kSize);