Skip to content

Commit

Permalink
misc: Surface clear parsing error for invalid prestopage header (face…
Browse files Browse the repository at this point in the history
…bookincubator#11552)

Summary:
Pull Request resolved: facebookincubator#11552

During deserialization, PrestoPage header is parsed with the assumption of enough bytes in the provided input stream. This causes exceptions that are not easy to interpret when there is invalid data provided. This change validates existence of enough bytes before parsing the header and surfaces the parsing error with a clear exception message.

Reviewed By: kevinwilfong

Differential Revision: D66003722

fbshipit-source-id: 678c8103010d5d4d184510e3be7c3c6b76d1af9c
  • Loading branch information
Abdullah Ozturk authored and athmaja-n committed Jan 10, 2025
1 parent 4b415ea commit 6b0e665
Show file tree
Hide file tree
Showing 2 changed files with 41 additions and 8 deletions.
28 changes: 23 additions & 5 deletions velox/serializers/PrestoSerializer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -223,17 +223,30 @@ struct PrestoHeader {
int32_t compressedSize;
int64_t checksum;

static PrestoHeader read(ByteInputStream* source) {
static Expected<PrestoHeader> read(ByteInputStream* source) {
if (source->remainingSize() < kHeaderSize) {
return folly::makeUnexpected(Status::Invalid(
fmt::format("{} bytes for header", source->remainingSize())));
}
PrestoHeader header;
header.numRows = source->read<int32_t>();
header.pageCodecMarker = source->read<int8_t>();
header.uncompressedSize = source->read<int32_t>();
header.compressedSize = source->read<int32_t>();
header.checksum = source->read<int64_t>();

VELOX_CHECK_GE(header.numRows, 0);
VELOX_CHECK_GE(header.uncompressedSize, 0);
VELOX_CHECK_GE(header.compressedSize, 0);
if (header.numRows < 0) {
return folly::makeUnexpected(
Status::Invalid(fmt::format("negative numRows: {}", header.numRows)));
}
if (header.uncompressedSize < 0) {
return folly::makeUnexpected(Status::Invalid(fmt::format(
"negative uncompressedSize: {}", header.uncompressedSize)));
}
if (header.compressedSize < 0) {
return folly::makeUnexpected(Status::Invalid(
fmt::format("negative compressedSize: {}", header.compressedSize)));
}

return header;
}
Expand Down Expand Up @@ -4286,7 +4299,12 @@ void PrestoVectorSerde::deserialize(
const auto prestoOptions = toPrestoOptions(options);
const auto codec =
common::compressionKindToCodec(prestoOptions.compressionKind);
auto const header = PrestoHeader::read(source);
auto maybeHeader = PrestoHeader::read(source);
VELOX_CHECK(
maybeHeader.hasValue(),
fmt::format(
"PrestoPage header is invalid: {}", maybeHeader.error().message()));
auto const header = std::move(maybeHeader.value());

int64_t actualCheckSum = 0;
if (isChecksumBitSet(header.pageCodecMarker)) {
Expand Down
21 changes: 18 additions & 3 deletions velox/serializers/tests/PrestoSerializerTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -242,11 +242,13 @@ class PrestoSerializerTest
RowVectorPtr deserialize(
const RowTypePtr& rowType,
const std::string& input,
const serializer::presto::PrestoVectorSerde::PrestoOptions*
serdeOptions) {
const serializer::presto::PrestoVectorSerde::PrestoOptions* serdeOptions,
bool skipLexer = false) {
auto byteStream = toByteStream(input);
auto paramOptions = getParamSerdeOptions(serdeOptions);
validateLexer(input, paramOptions);
if (!skipLexer) {
validateLexer(input, paramOptions);
}
RowVectorPtr result;
serde_->deserialize(
byteStream.get(), pool_.get(), rowType, &result, 0, &paramOptions);
Expand Down Expand Up @@ -879,6 +881,19 @@ TEST_P(PrestoSerializerTest, emptyPage) {
assertEqualVectors(deserialized, rowVector);
}

TEST_P(PrestoSerializerTest, invalidPage) {
auto rowVector = makeEmptyTestVector();

std::ostringstream out;
serialize(rowVector, &out, nullptr);

auto invalidPage = ""; // empty string
auto rowType = asRowType(rowVector->type());
VELOX_ASSERT_THROW(
deserialize(rowType, invalidPage, nullptr, true /*skipLexer*/),
"PrestoPage header is invalid: 0 bytes for header");
}

TEST_P(PrestoSerializerTest, initMemory) {
const auto numRows = 100;
auto testFunc = [&](TypePtr type, int64_t expectedBytes) {
Expand Down

0 comments on commit 6b0e665

Please sign in to comment.