From 1eb29b8257a3c3e9e40cbcb16b968df7538ab61c Mon Sep 17 00:00:00 2001 From: Josh Humphries Date: Thu, 15 Sep 2022 10:26:45 -0400 Subject: [PATCH 1/4] validate reserved names are identifiers --- src/google/protobuf/compiler/parser.cc | 39 ++++++++++++------- src/google/protobuf/compiler/parser.h | 1 + .../protobuf/compiler/parser_unittest.cc | 17 ++++++++ 3 files changed, 44 insertions(+), 13 deletions(-) diff --git a/src/google/protobuf/compiler/parser.cc b/src/google/protobuf/compiler/parser.cc index 4cae1a11874ce..8158b9724eb81 100644 --- a/src/google/protobuf/compiler/parser.cc +++ b/src/google/protobuf/compiler/parser.cc @@ -51,6 +51,7 @@ #include "absl/strings/ascii.h" #include "absl/strings/escaping.h" #include "absl/strings/str_cat.h" +#include "absl/strings/str_format.h" #include "google/protobuf/descriptor.h" #include "google/protobuf/descriptor.pb.h" #include "google/protobuf/io/tokenizer.h" @@ -1728,11 +1729,23 @@ bool Parser::ParseReserved(DescriptorProto* message, } } +bool Parser::ParseReservedName(std::string* name, const char* error_message) { + // capture position of token + int line = input_->current().line; + int col = input_->current().column; + DO(ConsumeString(name, error_message)); + if (!io::Tokenizer::IsIdentifier(*name)) { + AddError(line, col, absl::StrFormat("Reserved name \"%s\" is not a valid identifier.", *name)); + return false; + } + return true; +} + bool Parser::ParseReservedNames(DescriptorProto* message, const LocationRecorder& parent_location) { do { LocationRecorder location(parent_location, message->reserved_name_size()); - DO(ConsumeString(message->add_reserved_name(), "Expected field name.")); + DO(ParseReservedName(message->add_reserved_name(), "Expected field name.")); } while (TryConsume(",")); DO(ConsumeEndOfDeclaration(";", &parent_location)); return true; @@ -1787,42 +1800,42 @@ bool Parser::ParseReservedNumbers(DescriptorProto* message, return true; } -bool Parser::ParseReserved(EnumDescriptorProto* message, - const LocationRecorder& message_location) { +bool Parser::ParseReserved(EnumDescriptorProto* proto, + const LocationRecorder& enum_location) { io::Tokenizer::Token start_token = input_->current(); // Parse the declaration. DO(Consume("reserved")); if (LookingAtType(io::Tokenizer::TYPE_STRING)) { - LocationRecorder location(message_location, + LocationRecorder location(enum_location, EnumDescriptorProto::kReservedNameFieldNumber); location.StartAt(start_token); - return ParseReservedNames(message, location); + return ParseReservedNames(proto, location); } else { - LocationRecorder location(message_location, + LocationRecorder location(enum_location, EnumDescriptorProto::kReservedRangeFieldNumber); location.StartAt(start_token); - return ParseReservedNumbers(message, location); + return ParseReservedNumbers(proto, location); } } -bool Parser::ParseReservedNames(EnumDescriptorProto* message, +bool Parser::ParseReservedNames(EnumDescriptorProto* proto, const LocationRecorder& parent_location) { do { - LocationRecorder location(parent_location, message->reserved_name_size()); - DO(ConsumeString(message->add_reserved_name(), "Expected enum value.")); + LocationRecorder location(parent_location, proto->reserved_name_size()); + DO(ParseReservedName(proto->add_reserved_name(), "Expected enum value.")); } while (TryConsume(",")); DO(ConsumeEndOfDeclaration(";", &parent_location)); return true; } -bool Parser::ParseReservedNumbers(EnumDescriptorProto* message, +bool Parser::ParseReservedNumbers(EnumDescriptorProto* proto, const LocationRecorder& parent_location) { bool first = true; do { - LocationRecorder location(parent_location, message->reserved_range_size()); + LocationRecorder location(parent_location, proto->reserved_range_size()); EnumDescriptorProto::EnumReservedRange* range = - message->add_reserved_range(); + proto->add_reserved_range(); int start, end; io::Tokenizer::Token start_token; { diff --git a/src/google/protobuf/compiler/parser.h b/src/google/protobuf/compiler/parser.h index ccd3e5a5f7780..b7dfec6e259a3 100644 --- a/src/google/protobuf/compiler/parser.h +++ b/src/google/protobuf/compiler/parser.h @@ -394,6 +394,7 @@ class PROTOBUF_EXPORT Parser { const LocationRecorder& message_location); bool ParseReservedNames(DescriptorProto* message, const LocationRecorder& parent_location); + bool ParseReservedName(std::string* name, const char* error_message); bool ParseReservedNumbers(DescriptorProto* message, const LocationRecorder& parent_location); bool ParseReserved(EnumDescriptorProto* message, diff --git a/src/google/protobuf/compiler/parser_unittest.cc b/src/google/protobuf/compiler/parser_unittest.cc index 3b32451916597..c6dae84494d70 100644 --- a/src/google/protobuf/compiler/parser_unittest.cc +++ b/src/google/protobuf/compiler/parser_unittest.cc @@ -1675,6 +1675,15 @@ TEST_F(ParseErrorTest, EnumReservedMissingQuotes) { "2:11: Expected enum value or number range.\n"); } +TEST_F(ParseErrorTest, EnumReservedInvalidIdentifier) { + ExpectHasErrors( + "enum TestEnum {\n" + " FOO = 1;\n" + " reserved \"foo bar\";\n" + "}\n", + "2:11: Reserved name \"foo bar\" is not a valid identifier.\n"); +} + // ------------------------------------------------------------------- // Reserved field number errors @@ -1702,6 +1711,14 @@ TEST_F(ParseErrorTest, ReservedMissingQuotes) { "1:11: Expected field name or number range.\n"); } +TEST_F(ParseErrorTest, ReservedInvalidIdentifier) { + ExpectHasErrors( + "message Foo {\n" + " reserved \"foo bar\";\n" + "}\n", + "1:11: Reserved name \"foo bar\" is not a valid identifier.\n"); +} + TEST_F(ParseErrorTest, ReservedNegativeNumber) { ExpectHasErrors( "message Foo {\n" From e293b5cc43f3416706c61e8c839e6e9ab4d0e853 Mon Sep 17 00:00:00 2001 From: Joshua Humphries Date: Thu, 15 Sep 2022 15:27:38 -0400 Subject: [PATCH 2/4] clarify comment --- src/google/protobuf/compiler/parser.cc | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/google/protobuf/compiler/parser.cc b/src/google/protobuf/compiler/parser.cc index 8158b9724eb81..e053eca7e5c25 100644 --- a/src/google/protobuf/compiler/parser.cc +++ b/src/google/protobuf/compiler/parser.cc @@ -1730,7 +1730,8 @@ bool Parser::ParseReserved(DescriptorProto* message, } bool Parser::ParseReservedName(std::string* name, const char* error_message) { - // capture position of token + // Capture the position of the token, in case we have to report an + // error after it is consumed. int line = input_->current().line; int col = input_->current().column; DO(ConsumeString(name, error_message)); From 072fb8f58a63f36b34b5cb05242903ae71245b2c Mon Sep 17 00:00:00 2001 From: Josh Humphries Date: Wed, 21 Sep 2022 18:24:59 -0400 Subject: [PATCH 3/4] downgrade to a warning --- src/google/protobuf/compiler/parser.cc | 11 +++--- src/google/protobuf/compiler/parser.h | 3 ++ .../protobuf/compiler/parser_unittest.cc | 36 +++++++++++++------ 3 files changed, 35 insertions(+), 15 deletions(-) diff --git a/src/google/protobuf/compiler/parser.cc b/src/google/protobuf/compiler/parser.cc index e053eca7e5c25..b937fa25b910a 100644 --- a/src/google/protobuf/compiler/parser.cc +++ b/src/google/protobuf/compiler/parser.cc @@ -390,13 +390,16 @@ void Parser::AddError(const std::string& error) { AddError(input_->current().line, input_->current().column, error); } -void Parser::AddWarning(const std::string& warning) { +void Parser::AddWarning(int line, int column, const std::string& warning) { if (error_collector_ != nullptr) { - error_collector_->AddWarning(input_->current().line, - input_->current().column, warning); + error_collector_->AddWarning(line, column, warning); } } +void Parser::AddWarning(const std::string& warning) { + AddWarning(input_->current().line, input_->current().column, warning); +} + // ------------------------------------------------------------------- Parser::LocationRecorder::LocationRecorder(Parser* parser) @@ -1736,7 +1739,7 @@ bool Parser::ParseReservedName(std::string* name, const char* error_message) { int col = input_->current().column; DO(ConsumeString(name, error_message)); if (!io::Tokenizer::IsIdentifier(*name)) { - AddError(line, col, absl::StrFormat("Reserved name \"%s\" is not a valid identifier.", *name)); + AddWarning(line, col, absl::StrFormat("Reserved name \"%s\" is not a valid identifier.", *name)); return false; } return true; diff --git a/src/google/protobuf/compiler/parser.h b/src/google/protobuf/compiler/parser.h index b7dfec6e259a3..600fef6adbd15 100644 --- a/src/google/protobuf/compiler/parser.h +++ b/src/google/protobuf/compiler/parser.h @@ -213,6 +213,9 @@ class PROTOBUF_EXPORT Parser { // of the current token. void AddError(const std::string& error); + // Invokes error_collector_->AddWarning(), if error_collector_ is not NULL. + void AddWarning(int line, int column, const std::string& warning); + // Invokes error_collector_->AddWarning() with the line and column number // of the current token. void AddWarning(const std::string& warning); diff --git a/src/google/protobuf/compiler/parser_unittest.cc b/src/google/protobuf/compiler/parser_unittest.cc index c6dae84494d70..9cd34f155c756 100644 --- a/src/google/protobuf/compiler/parser_unittest.cc +++ b/src/google/protobuf/compiler/parser_unittest.cc @@ -147,6 +147,16 @@ class ParserTest : public testing::Test { EXPECT_EQ(io::Tokenizer::TYPE_END, input_->current().type); } + // Parse the text and expect that the given warnings are reported. + void ExpectHasWarnings(const char* text, const char* expected_warnings) { + SetupParser(text); + FileDescriptorProto file; + parser_->Parse(input_.get(), &file); + EXPECT_EQ(io::Tokenizer::TYPE_END, input_->current().type); + ASSERT_EQ("", error_collector_.text_); + EXPECT_EQ(expected_warnings, error_collector_.warning_); + } + // Same as above but does not expect that the parser parses the complete // input. void ExpectHasEarlyExitErrors(const char* text, const char* expected_errors) { @@ -1676,12 +1686,14 @@ TEST_F(ParseErrorTest, EnumReservedMissingQuotes) { } TEST_F(ParseErrorTest, EnumReservedInvalidIdentifier) { - ExpectHasErrors( - "enum TestEnum {\n" - " FOO = 1;\n" - " reserved \"foo bar\";\n" - "}\n", - "2:11: Reserved name \"foo bar\" is not a valid identifier.\n"); + ExpectHasWarnings( + R"pb( + enum TestEnum { + FOO = 1; + reserved "foo bar"; + } + )pb", + "3:17: Reserved name \"foo bar\" is not a valid identifier.\n"); } // ------------------------------------------------------------------- @@ -1712,11 +1724,13 @@ TEST_F(ParseErrorTest, ReservedMissingQuotes) { } TEST_F(ParseErrorTest, ReservedInvalidIdentifier) { - ExpectHasErrors( - "message Foo {\n" - " reserved \"foo bar\";\n" - "}\n", - "1:11: Reserved name \"foo bar\" is not a valid identifier.\n"); + ExpectHasWarnings( + R"pb( + message Foo { + reserved "foo bar"; + } + )pb", + "2:17: Reserved name \"foo bar\" is not a valid identifier.\n"); } TEST_F(ParseErrorTest, ReservedNegativeNumber) { From 8f17f578552215bd1a3563fe4c2538775064f351 Mon Sep 17 00:00:00 2001 From: Josh Humphries Date: Wed, 21 Sep 2022 22:12:49 -0400 Subject: [PATCH 4/4] don't return false and abort parsing of current statement --- src/google/protobuf/compiler/parser.cc | 1 - src/google/protobuf/compiler/parser_unittest.cc | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/src/google/protobuf/compiler/parser.cc b/src/google/protobuf/compiler/parser.cc index b937fa25b910a..fafd3c57f6eb8 100644 --- a/src/google/protobuf/compiler/parser.cc +++ b/src/google/protobuf/compiler/parser.cc @@ -1740,7 +1740,6 @@ bool Parser::ParseReservedName(std::string* name, const char* error_message) { DO(ConsumeString(name, error_message)); if (!io::Tokenizer::IsIdentifier(*name)) { AddWarning(line, col, absl::StrFormat("Reserved name \"%s\" is not a valid identifier.", *name)); - return false; } return true; } diff --git a/src/google/protobuf/compiler/parser_unittest.cc b/src/google/protobuf/compiler/parser_unittest.cc index 9cd34f155c756..4e261433a6c6a 100644 --- a/src/google/protobuf/compiler/parser_unittest.cc +++ b/src/google/protobuf/compiler/parser_unittest.cc @@ -151,7 +151,7 @@ class ParserTest : public testing::Test { void ExpectHasWarnings(const char* text, const char* expected_warnings) { SetupParser(text); FileDescriptorProto file; - parser_->Parse(input_.get(), &file); + ASSERT_TRUE(parser_->Parse(input_.get(), &file)); EXPECT_EQ(io::Tokenizer::TYPE_END, input_->current().type); ASSERT_EQ("", error_collector_.text_); EXPECT_EQ(expected_warnings, error_collector_.warning_);