Skip to content

Commit c4644b7

Browse files
authoredSep 15, 2022
Merge pull request #10555 from jhump/jh/fix-consistency-with-very-large-decimal-numbers
protoc: fix consistency with parsing very large decimal numbers
2 parents 0d0164f + 7e745c4 commit c4644b7

File tree

7 files changed

+155
-48
lines changed

7 files changed

+155
-48
lines changed
 

‎src/google/protobuf/compiler/parser.cc

+31-13
Original file line numberDiff line numberDiff line change
@@ -288,6 +288,16 @@ bool Parser::ConsumeInteger64(uint64_t max_value, uint64_t* output,
288288
}
289289
}
290290

291+
bool Parser::TryConsumeInteger64(uint64_t max_value, uint64_t* output) {
292+
if (LookingAtType(io::Tokenizer::TYPE_INTEGER) &&
293+
io::Tokenizer::ParseInteger(input_->current().text, max_value,
294+
output)) {
295+
input_->Next();
296+
return true;
297+
}
298+
return false;
299+
}
300+
291301
bool Parser::ConsumeNumber(double* output, const char* error) {
292302
if (LookingAtType(io::Tokenizer::TYPE_FLOAT)) {
293303
*output = io::Tokenizer::ParseFloat(input_->current().text);
@@ -296,13 +306,19 @@ bool Parser::ConsumeNumber(double* output, const char* error) {
296306
} else if (LookingAtType(io::Tokenizer::TYPE_INTEGER)) {
297307
// Also accept integers.
298308
uint64_t value = 0;
299-
if (!io::Tokenizer::ParseInteger(input_->current().text,
309+
if (io::Tokenizer::ParseInteger(input_->current().text,
300310
std::numeric_limits<uint64_t>::max(),
301311
&value)) {
312+
*output = value;
313+
} else if (input_->current().text[0] == '0') {
314+
// octal or hexadecimal; don't bother parsing as float
315+
AddError("Integer out of range.");
316+
// We still return true because we did, in fact, parse a number.
317+
} else if (!io::Tokenizer::TryParseFloat(input_->current().text, output)) {
318+
// out of int range, and not valid float? 🤷
302319
AddError("Integer out of range.");
303320
// We still return true because we did, in fact, parse a number.
304321
}
305-
*output = value;
306322
input_->Next();
307323
return true;
308324
} else if (LookingAt("inf")) {
@@ -1575,18 +1591,20 @@ bool Parser::ParseOption(Message* options,
15751591
is_negative
15761592
? static_cast<uint64_t>(std::numeric_limits<int64_t>::max()) + 1
15771593
: std::numeric_limits<uint64_t>::max();
1578-
DO(ConsumeInteger64(max_value, &value, "Expected integer."));
1579-
if (is_negative) {
1580-
value_location.AddPath(
1581-
UninterpretedOption::kNegativeIntValueFieldNumber);
1582-
uninterpreted_option->set_negative_int_value(
1583-
static_cast<int64_t>(0 - value));
1584-
} else {
1585-
value_location.AddPath(
1586-
UninterpretedOption::kPositiveIntValueFieldNumber);
1587-
uninterpreted_option->set_positive_int_value(value);
1594+
if (TryConsumeInteger64(max_value, &value)) {
1595+
if (is_negative) {
1596+
value_location.AddPath(
1597+
UninterpretedOption::kNegativeIntValueFieldNumber);
1598+
uninterpreted_option->set_negative_int_value(
1599+
static_cast<int64_t>(0 - value));
1600+
} else {
1601+
value_location.AddPath(
1602+
UninterpretedOption::kPositiveIntValueFieldNumber);
1603+
uninterpreted_option->set_positive_int_value(value);
1604+
}
1605+
break;
15881606
}
1589-
break;
1607+
// value too large for an integer; fall through below to treat as floating point
15901608
}
15911609

15921610
case io::Tokenizer::TYPE_FLOAT: {

‎src/google/protobuf/compiler/parser.h

+3
Original file line numberDiff line numberDiff line change
@@ -180,6 +180,9 @@ class PROTOBUF_EXPORT Parser {
180180
// is greater than max_value, an error will be reported.
181181
bool ConsumeInteger64(uint64_t max_value, uint64_t* output,
182182
const char* error);
183+
// Try to consume a 64-bit integer and store its value in "output". No
184+
// error is reported on failure, allowing caller to consume token another way.
185+
bool TryConsumeInteger64(uint64_t max_value, uint64_t* output);
183186
// Consume a number and store its value in "output". This will accept
184187
// tokens of either INTEGER or FLOAT type.
185188
bool ConsumeNumber(double* output, const char* error);

‎src/google/protobuf/compiler/parser_unittest.cc

+66
Original file line numberDiff line numberDiff line change
@@ -592,6 +592,56 @@ TEST_F(ParseMessageTest, FieldOptions) {
592592
"}");
593593
}
594594

595+
TEST_F(ParseMessageTest, FieldOptionsSupportLargeDecimalLiteral) {
596+
// decimal integer literal > uint64 max
597+
ExpectParsesTo(
598+
"import \"google/protobuf/descriptor.proto\";\n"
599+
"extend google.protobuf.FieldOptions {\n"
600+
" optional double f = 10101;\n"
601+
"}\n"
602+
"message TestMessage {\n"
603+
" optional double a = 1 [default = 18446744073709551616];\n"
604+
" optional double b = 2 [default = -18446744073709551616];\n"
605+
" optional double c = 3 [(f) = 18446744073709551616];\n"
606+
" optional double d = 4 [(f) = -18446744073709551616];\n"
607+
"}\n",
608+
609+
"dependency: \"google/protobuf/descriptor.proto\""
610+
"extension {"
611+
" name: \"f\" label: LABEL_OPTIONAL type: TYPE_DOUBLE number: 10101"
612+
" extendee: \"google.protobuf.FieldOptions\""
613+
"}"
614+
"message_type {"
615+
" name: \"TestMessage\""
616+
" field {"
617+
" name: \"a\" label: LABEL_OPTIONAL type: TYPE_DOUBLE number: 1"
618+
" default_value: \"1.8446744073709552e+19\""
619+
" }"
620+
" field {"
621+
" name: \"b\" label: LABEL_OPTIONAL type: TYPE_DOUBLE number: 2"
622+
" default_value: \"-1.8446744073709552e+19\""
623+
" }"
624+
" field {"
625+
" name: \"c\" label: LABEL_OPTIONAL type: TYPE_DOUBLE number: 3"
626+
" options{"
627+
" uninterpreted_option{"
628+
" name{ name_part: \"f\" is_extension: true }"
629+
" double_value: 1.8446744073709552e+19"
630+
" }"
631+
" }"
632+
" }"
633+
" field {"
634+
" name: \"d\" label: LABEL_OPTIONAL type: TYPE_DOUBLE number: 4"
635+
" options{"
636+
" uninterpreted_option{"
637+
" name{ name_part: \"f\" is_extension: true }"
638+
" double_value: -1.8446744073709552e+19"
639+
" }"
640+
" }"
641+
" }"
642+
"}");
643+
}
644+
595645
TEST_F(ParseMessageTest, Oneof) {
596646
ExpectParsesTo(
597647
"message TestMessage {\n"
@@ -1891,6 +1941,22 @@ TEST_F(ParserValidationErrorTest, FieldDefaultValueError) {
18911941
"2:32: Enum type \"Baz\" has no value named \"NO_SUCH_VALUE\".\n");
18921942
}
18931943

1944+
TEST_F(ParserValidationErrorTest, FieldDefaultIntegerOutOfRange) {
1945+
ExpectHasErrors(
1946+
"message Foo {\n"
1947+
" optional double bar = 1 [default = 0x10000000000000000];\n"
1948+
"}\n",
1949+
"1:37: Integer out of range.\n");
1950+
}
1951+
1952+
TEST_F(ParserValidationErrorTest, FieldOptionOutOfRange) {
1953+
ExpectHasErrors(
1954+
"message Foo {\n"
1955+
" optional double bar = 1 [foo = 0x10000000000000000];\n"
1956+
"}\n",
1957+
"1:33: Integer out of range.\n");
1958+
}
1959+
18941960
TEST_F(ParserValidationErrorTest, FileOptionNameError) {
18951961
ExpectHasValidationErrors(
18961962
"option foo = 5;",

‎src/google/protobuf/descriptor.cc

+30-20
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,7 @@
5858
#include "absl/strings/ascii.h"
5959
#include "absl/strings/escaping.h"
6060
#include "absl/strings/str_cat.h"
61+
#include "absl/strings/str_format.h"
6162
#include "google/protobuf/stubs/stringprintf.h"
6263
#include "absl/strings/str_join.h"
6364
#include "absl/strings/str_split.h"
@@ -7675,6 +7676,27 @@ bool DescriptorBuilder::OptionInterpreter::ExamineIfOptionIsSet(
76757676
return true;
76767677
}
76777678

7679+
namespace {
7680+
// Helpers for method below
7681+
7682+
template <typename T> std::string ValueOutOfRange(
7683+
absl::string_view type_name, absl::string_view option_name) {
7684+
return absl::StrFormat(
7685+
"Value out of range, %d to %d, for %s option \"%s\".", \
7686+
std::numeric_limits<T>::min(), std::numeric_limits<T>::max(),
7687+
type_name, option_name);
7688+
}
7689+
7690+
template <typename T> std::string ValueMustBeInt(
7691+
absl::string_view type_name, absl::string_view option_name) {
7692+
return absl::StrFormat(
7693+
"Value must be integer, from %d to %d, for %s option \"%s\".", \
7694+
std::numeric_limits<T>::min(), std::numeric_limits<T>::max(),
7695+
type_name, option_name);
7696+
}
7697+
7698+
} // namespace
7699+
76787700
bool DescriptorBuilder::OptionInterpreter::SetOptionValue(
76797701
const FieldDescriptor* option_field, UnknownFieldSet* unknown_fields) {
76807702
// We switch on the CppType to validate.
@@ -7683,8 +7705,7 @@ bool DescriptorBuilder::OptionInterpreter::SetOptionValue(
76837705
if (uninterpreted_option_->has_positive_int_value()) {
76847706
if (uninterpreted_option_->positive_int_value() >
76857707
static_cast<uint64_t>(std::numeric_limits<int32_t>::max())) {
7686-
return AddValueError("Value out of range for int32 option \"" +
7687-
option_field->full_name() + "\".");
7708+
return AddValueError(ValueOutOfRange<int32_t>("int32", option_field->full_name()));
76887709
} else {
76897710
SetInt32(option_field->number(),
76907711
uninterpreted_option_->positive_int_value(),
@@ -7693,25 +7714,22 @@ bool DescriptorBuilder::OptionInterpreter::SetOptionValue(
76937714
} else if (uninterpreted_option_->has_negative_int_value()) {
76947715
if (uninterpreted_option_->negative_int_value() <
76957716
static_cast<int64_t>(std::numeric_limits<int32_t>::min())) {
7696-
return AddValueError("Value out of range for int32 option \"" +
7697-
option_field->full_name() + "\".");
7717+
return AddValueError(ValueOutOfRange<int32_t>("int32", option_field->full_name()));
76987718
} else {
76997719
SetInt32(option_field->number(),
77007720
uninterpreted_option_->negative_int_value(),
77017721
option_field->type(), unknown_fields);
77027722
}
77037723
} else {
7704-
return AddValueError("Value must be integer for int32 option \"" +
7705-
option_field->full_name() + "\".");
7724+
return AddValueError(ValueMustBeInt<int32_t>("int32", option_field->full_name()));
77067725
}
77077726
break;
77087727

77097728
case FieldDescriptor::CPPTYPE_INT64:
77107729
if (uninterpreted_option_->has_positive_int_value()) {
77117730
if (uninterpreted_option_->positive_int_value() >
77127731
static_cast<uint64_t>(std::numeric_limits<int64_t>::max())) {
7713-
return AddValueError("Value out of range for int64 option \"" +
7714-
option_field->full_name() + "\".");
7732+
return AddValueError(ValueOutOfRange<int64_t>("int64", option_field->full_name()));
77157733
} else {
77167734
SetInt64(option_field->number(),
77177735
uninterpreted_option_->positive_int_value(),
@@ -7722,27 +7740,22 @@ bool DescriptorBuilder::OptionInterpreter::SetOptionValue(
77227740
uninterpreted_option_->negative_int_value(),
77237741
option_field->type(), unknown_fields);
77247742
} else {
7725-
return AddValueError("Value must be integer for int64 option \"" +
7726-
option_field->full_name() + "\".");
7743+
return AddValueError(ValueMustBeInt<int64_t>("int64", option_field->full_name()));
77277744
}
77287745
break;
77297746

77307747
case FieldDescriptor::CPPTYPE_UINT32:
77317748
if (uninterpreted_option_->has_positive_int_value()) {
77327749
if (uninterpreted_option_->positive_int_value() >
77337750
std::numeric_limits<uint32_t>::max()) {
7734-
return AddValueError("Value out of range for uint32 option \"" +
7735-
option_field->name() + "\".");
7751+
return AddValueError(ValueOutOfRange<uint32_t>("uint32", option_field->full_name()));
77367752
} else {
77377753
SetUInt32(option_field->number(),
77387754
uninterpreted_option_->positive_int_value(),
77397755
option_field->type(), unknown_fields);
77407756
}
77417757
} else {
7742-
return AddValueError(
7743-
"Value must be non-negative integer for uint32 "
7744-
"option \"" +
7745-
option_field->full_name() + "\".");
7758+
return AddValueError(ValueMustBeInt<uint32_t>("uint32", option_field->full_name()));
77467759
}
77477760
break;
77487761

@@ -7752,10 +7765,7 @@ bool DescriptorBuilder::OptionInterpreter::SetOptionValue(
77527765
uninterpreted_option_->positive_int_value(),
77537766
option_field->type(), unknown_fields);
77547767
} else {
7755-
return AddValueError(
7756-
"Value must be non-negative integer for uint64 "
7757-
"option \"" +
7758-
option_field->full_name() + "\".");
7768+
return AddValueError(ValueMustBeInt<uint64_t>("uint64", option_field->full_name()));
77597769
}
77607770
break;
77617771

‎src/google/protobuf/descriptor_unittest.cc

+8-8
Original file line numberDiff line numberDiff line change
@@ -5557,7 +5557,7 @@ TEST_F(ValidationErrorTest, Int32OptionValueOutOfPositiveRange) {
55575557
" positive_int_value: 0x80000000 } "
55585558
"}",
55595559

5560-
"foo.proto: foo.proto: OPTION_VALUE: Value out of range "
5560+
"foo.proto: foo.proto: OPTION_VALUE: Value out of range, -2147483648 to 2147483647, "
55615561
"for int32 option \"foo\".\n");
55625562
}
55635563

@@ -5574,7 +5574,7 @@ TEST_F(ValidationErrorTest, Int32OptionValueOutOfNegativeRange) {
55745574
" negative_int_value: -0x80000001 } "
55755575
"}",
55765576

5577-
"foo.proto: foo.proto: OPTION_VALUE: Value out of range "
5577+
"foo.proto: foo.proto: OPTION_VALUE: Value out of range, -2147483648 to 2147483647, "
55785578
"for int32 option \"foo\".\n");
55795579
}
55805580

@@ -5590,7 +5590,7 @@ TEST_F(ValidationErrorTest, Int32OptionValueIsNotPositiveInt) {
55905590
" is_extension: true } "
55915591
" string_value: \"5\" } }",
55925592

5593-
"foo.proto: foo.proto: OPTION_VALUE: Value must be integer "
5593+
"foo.proto: foo.proto: OPTION_VALUE: Value must be integer, from -2147483648 to 2147483647, "
55945594
"for int32 option \"foo\".\n");
55955595
}
55965596

@@ -5608,7 +5608,7 @@ TEST_F(ValidationErrorTest, Int64OptionValueOutOfRange) {
56085608
"} "
56095609
"}",
56105610

5611-
"foo.proto: foo.proto: OPTION_VALUE: Value out of range "
5611+
"foo.proto: foo.proto: OPTION_VALUE: Value out of range, -9223372036854775808 to 9223372036854775807, "
56125612
"for int64 option \"foo\".\n");
56135613
}
56145614

@@ -5624,7 +5624,7 @@ TEST_F(ValidationErrorTest, Int64OptionValueIsNotPositiveInt) {
56245624
" is_extension: true } "
56255625
" identifier_value: \"5\" } }",
56265626

5627-
"foo.proto: foo.proto: OPTION_VALUE: Value must be integer "
5627+
"foo.proto: foo.proto: OPTION_VALUE: Value must be integer, from -9223372036854775808 to 9223372036854775807, "
56285628
"for int64 option \"foo\".\n");
56295629
}
56305630

@@ -5640,7 +5640,7 @@ TEST_F(ValidationErrorTest, UInt32OptionValueOutOfRange) {
56405640
" is_extension: true } "
56415641
" positive_int_value: 0x100000000 } }",
56425642

5643-
"foo.proto: foo.proto: OPTION_VALUE: Value out of range "
5643+
"foo.proto: foo.proto: OPTION_VALUE: Value out of range, 0 to 4294967295, "
56445644
"for uint32 option \"foo\".\n");
56455645
}
56465646

@@ -5656,7 +5656,7 @@ TEST_F(ValidationErrorTest, UInt32OptionValueIsNotPositiveInt) {
56565656
" is_extension: true } "
56575657
" double_value: -5.6 } }",
56585658

5659-
"foo.proto: foo.proto: OPTION_VALUE: Value must be non-negative integer "
5659+
"foo.proto: foo.proto: OPTION_VALUE: Value must be integer, from 0 to 4294967295, "
56605660
"for uint32 option \"foo\".\n");
56615661
}
56625662

@@ -5672,7 +5672,7 @@ TEST_F(ValidationErrorTest, UInt64OptionValueIsNotPositiveInt) {
56725672
" is_extension: true } "
56735673
" negative_int_value: -5 } }",
56745674

5675-
"foo.proto: foo.proto: OPTION_VALUE: Value must be non-negative integer "
5675+
"foo.proto: foo.proto: OPTION_VALUE: Value must be integer, from 0 to 18446744073709551615, "
56765676
"for uint64 option \"foo\".\n");
56775677
}
56785678

‎src/google/protobuf/io/tokenizer.cc

+13-7
Original file line numberDiff line numberDiff line change
@@ -1002,9 +1002,20 @@ bool Tokenizer::ParseInteger(const std::string& text, uint64_t max_value,
10021002
}
10031003

10041004
double Tokenizer::ParseFloat(const std::string& text) {
1005+
double result = 0;
1006+
if (!TryParseFloat(text, &result)) {
1007+
GOOGLE_LOG(DFATAL)
1008+
<< " Tokenizer::ParseFloat() passed text that could not have been"
1009+
" tokenized as a float: "
1010+
<< absl::CEscape(text);
1011+
}
1012+
return result;
1013+
}
1014+
1015+
bool Tokenizer::TryParseFloat(const std::string& text, double* result) {
10051016
const char* start = text.c_str();
10061017
char* end;
1007-
double result = NoLocaleStrtod(start, &end);
1018+
*result = NoLocaleStrtod(start, &end);
10081019

10091020
// "1e" is not a valid float, but if the tokenizer reads it, it will
10101021
// report an error but still return it as a valid token. We need to
@@ -1020,12 +1031,7 @@ double Tokenizer::ParseFloat(const std::string& text) {
10201031
++end;
10211032
}
10221033

1023-
GOOGLE_LOG_IF(DFATAL,
1024-
static_cast<size_t>(end - start) != text.size() || *start == '-')
1025-
<< " Tokenizer::ParseFloat() passed text that could not have been"
1026-
" tokenized as a float: "
1027-
<< absl::CEscape(text);
1028-
return result;
1034+
return static_cast<size_t>(end - start) == text.size() && *start != '-';
10291035
}
10301036

10311037
// Helper to append a Unicode code point to a string as UTF8, without bringing

‎src/google/protobuf/io/tokenizer.h

+4
Original file line numberDiff line numberDiff line change
@@ -214,6 +214,10 @@ class PROTOBUF_EXPORT Tokenizer {
214214
// result is undefined (possibly an assert failure).
215215
static double ParseFloat(const std::string& text);
216216

217+
// Parses given text as if it were a TYPE_FLOAT token. Returns false if the
218+
// given text is not actually a valid float literal.
219+
static bool TryParseFloat(const std::string& text, double* result);
220+
217221
// Parses a TYPE_STRING token. This never fails, so long as the text actually
218222
// comes from a TYPE_STRING token parsed by Tokenizer. If it doesn't, the
219223
// result is undefined (possibly an assert failure).

0 commit comments

Comments
 (0)
Please sign in to comment.