Skip to content

Commit

Permalink
fix crash with pathologically-nested inputs (closes #100)
Browse files Browse the repository at this point in the history
also:
- fixed parse_result natvis
- added parse_result default constructor
- added nested value limit example to error printer
  • Loading branch information
marzer committed May 17, 2021
1 parent c4e00f9 commit a29ecda
Show file tree
Hide file tree
Showing 10 changed files with 174 additions and 32 deletions.
10 changes: 5 additions & 5 deletions .github/ISSUE_TEMPLATE/bug_report.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ assignees: marzer

<!--
Replace the HTML comments below with the requested information.
DO NOT delete this template and roll your own!
Please DO NOT delete this template and roll your own!
Thanks for contributing!
-->
Expand All @@ -21,7 +21,7 @@ assignees: marzer
<!--
If you're using the single-header version of the library, the version number is right at the top of the file.
Otherwise you can find it by opening toml++/toml_version.h; it'll be represented by three defines -
TOML_LANG_MAJOR, TOML_LANG_MINOR and TOML_LANG_PATCH.
TOML_LIB_MAJOR, TOML_LIB_MINOR and TOML_LIB_PATCH.
If you're not using any particular release and are instead just living large at HEAD of master, the commit hash
would be super helpful too, though it's not critical.
Expand All @@ -36,14 +36,14 @@ assignees: marzer



**C++ standard mode (e.g. 17, 20, 'latest'):**
**C++ standard mode:**
<!--
The C++ standard level you were targeting, e.g. C++17
The C++ standard level you were targeting, e.g. 17, 20, 'latest'
-->



**Target arch (e.g. x64):**
**Target arch:**
<!--
The architecture you were targeting, e.g. x86, x64, ARM
-->
Expand Down
2 changes: 1 addition & 1 deletion .github/ISSUE_TEMPLATE/feature_request.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ assignees: marzer

<!--
Replace the HTML comments below with the requested information.
DO NOT delete this template and roll your own!
Please DO NOT delete this template and roll your own!
Thanks for contributing!
-->
Expand Down
16 changes: 14 additions & 2 deletions examples/error_printer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ namespace
"[foo] [bar]"sv,
"[foo]\n[foo]"sv,
"? = 'foo' ?"sv,
"[ [foo] ]"sv
"[ [foo] ]"sv,

"########## arrays"sv,
"val = [,]"sv,
Expand All @@ -68,6 +68,7 @@ namespace
"########## values"sv,
"val = _"sv,
"val = G"sv,
"PATHOLOGICALLY_NESTED"sv, // generated inline

"########## strings"sv,
"val = \" \r \""sv,
Expand Down Expand Up @@ -136,7 +137,18 @@ int main(int /*argc*/, char** /*argv*/)
}
else
{
auto result = toml::parse(str);
toml::parse_result result;

if (str == "PATHOLOGICALLY_NESTED"sv)
{
std::string s(1000_sz, '[');
constexpr auto start = "array = "sv;
memcpy(s.data(), start.data(), start.length());
result = toml::parse(s);
}
else
result = toml::parse(str);

if (!result)
{
std::cout << result.error();
Expand Down
9 changes: 8 additions & 1 deletion include/toml++/toml_parse_result.h
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ TOML_NAMESPACE_START
static constexpr size_t align_ =
(alignof(toml::table) < alignof(parse_error) ? alignof(parse_error) : alignof(toml::table));

alignas(align_) unsigned char bytes[size_ + 1u];
alignas(align_) unsigned char bytes[size_];
};

mutable storage_t storage_;
Expand Down Expand Up @@ -162,6 +162,13 @@ TOML_NAMESPACE_START
/// \brief Returns the internal toml::parse_error (const lvalue overload).
[[nodiscard]] explicit operator const parse_error& () const noexcept { return error(); }

TOML_NODISCARD_CTOR
parse_result() noexcept
: err_{ true }
{
::new (static_cast<void*>(storage_.bytes)) parse_error{ std::string{}, source_region{} };
}

TOML_NODISCARD_CTOR
explicit parse_result(toml::table&& tbl) noexcept
: err_{ false }
Expand Down
45 changes: 45 additions & 0 deletions include/toml++/toml_parser.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -234,6 +234,11 @@ TOML_ANON_NAMESPACE_START
};
#endif
}

error_builder(const error_builder&) = delete;
error_builder(error_builder&&) = delete;
error_builder& operator=(const error_builder&) = delete;
error_builder& operator=(error_builder&&) = delete;
};

struct parse_scope final
Expand All @@ -253,6 +258,11 @@ TOML_ANON_NAMESPACE_START
{
storage_ = parent_;
}

parse_scope(const parse_scope&) = delete;
parse_scope(parse_scope&&) = delete;
parse_scope& operator=(const parse_scope&) = delete;
parse_scope& operator=(parse_scope&&) = delete;
};
#define push_parse_scope_2(scope, line) parse_scope ps_##line{ current_scope, scope }
#define push_parse_scope_1(scope, line) push_parse_scope_2(scope, line)
Expand Down Expand Up @@ -296,13 +306,15 @@ TOML_ANON_NAMESPACE_START

[[nodiscard]]
TOML_ATTR(pure)
TOML_ALWAYS_INLINE
operator bool() const noexcept
{
return node_ != nullptr;
}

[[nodiscard]]
TOML_ATTR(pure)
TOML_ALWAYS_INLINE
toml::node* get() const noexcept
{
return node_;
Expand All @@ -328,6 +340,28 @@ TOML_ANON_NAMESPACE_START
node_ptr value;
};

struct parse_depth_counter final
{
size_t& depth_;

TOML_NODISCARD_CTOR
explicit parse_depth_counter(size_t& depth) noexcept
: depth_{ depth }
{
depth_++;
}

~parse_depth_counter() noexcept
{
depth_--;
}

parse_depth_counter(const parse_depth_counter&) = delete;
parse_depth_counter(parse_depth_counter&&) = delete;
parse_depth_counter& operator=(const parse_depth_counter&) = delete;
parse_depth_counter& operator=(parse_depth_counter&&) = delete;
};

}
TOML_ANON_NAMESPACE_END;

Expand Down Expand Up @@ -383,6 +417,8 @@ TOML_IMPL_NAMESPACE_START
class parser final
{
private:
static constexpr size_t max_nested_values = TOML_MAX_NESTED_VALUES;

utf8_buffered_reader reader;
table root;
source_position prev_pos = { 1, 1 };
Expand All @@ -393,6 +429,7 @@ TOML_IMPL_NAMESPACE_START
std::string recording_buffer; //for diagnostics
bool recording = false, recording_whitespace = true;
std::string_view current_scope;
size_t nested_values = {};
#if !TOML_EXCEPTIONS
mutable optional<toml::parse_error> err;
#endif
Expand Down Expand Up @@ -1885,6 +1922,14 @@ TOML_IMPL_NAMESPACE_START
assert_or_assume(!is_value_terminator(*cp));
push_parse_scope("value"sv);

const parse_depth_counter depth_counter{ nested_values };
if (nested_values > max_nested_values)
set_error_and_return_default(
"exceeded maximum nested value depth of "sv,
static_cast<uint64_t>(max_nested_values),
" (TOML_MAX_NESTED_VALUES)"sv
);

// check if it begins with some control character
// (note that this will also fail for whitespace but we're assuming we've
// called consume_leading_whitespace() before calling parse_value())
Expand Down
6 changes: 6 additions & 0 deletions include/toml++/toml_preprocessor.h
Original file line number Diff line number Diff line change
Expand Up @@ -386,6 +386,12 @@
#define TOML_PARSER 1
#endif

#ifndef TOML_MAX_NESTED_VALUES
#define TOML_MAX_NESTED_VALUES 256
// this refers to the depth of nested values, e.g. inline tables and arrays.
// 256 is crazy high! if you're hitting this limit with real input, TOML is probably the wrong tool for the job...
#endif

#ifndef DOXYGEN
#if defined(_WIN32) && !defined(TOML_WINDOWS_COMPAT)
#define TOML_WINDOWS_COMPAT 1
Expand Down
14 changes: 14 additions & 0 deletions tests/user_feedback.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -162,5 +162,19 @@ b = []
});
}

SECTION("github/issues/100") // https://github.com/marzer/tomlplusplus/issues/100
{
// this tests for two separate things that should fail gracefully, not crash:
// 1. pathologically-nested inputs
// 2. a particular sequence of malformed UTF-8

parsing_should_fail(FILE_LINE_ARGS, "fl =[ [[[[[[[[[[[[[[[\x36\x80\x86\x00\x00\x00\x2D\x36\x9F\x20\x00"sv);

std::string s(2048_sz, '[');
constexpr auto start = "fl =[ "sv;
memcpy(s.data(), start.data(), start.length());
parsing_should_fail(FILE_LINE_ARGS, std::string_view{ s });
}

}

9 changes: 4 additions & 5 deletions toml++.natvis
Original file line number Diff line number Diff line change
Expand Up @@ -128,12 +128,11 @@
</Type>

<Type Name="toml::v2::noex::parse_result">
<DisplayString Condition="!is_err">{*reinterpret_cast&lt;toml::v2::table*&gt;(&amp;storage)}</DisplayString>
<DisplayString Condition="is_err">{*reinterpret_cast&lt;toml::v2::noex::parse_error*&gt;(&amp;storage)}</DisplayString>
<DisplayString Condition="!err_">{*reinterpret_cast&lt;toml::v2::table*&gt;(&amp;storage_.bytes)}</DisplayString>
<DisplayString Condition="err_">{*reinterpret_cast&lt;toml::v2::noex::parse_error*&gt;(&amp;storage_.bytes)}</DisplayString>
<Expand>
<Item Name="[table]" Condition="!is_err">*reinterpret_cast&lt;toml::v2::table*&gt;(&amp;storage)</Item>
<Item Name="[error]" Condition="is_err">*reinterpret_cast&lt;toml::v2::noex::parse_error*&gt;(&amp;storage)</Item>
<Item Name="is_err" ExcludeView="simple">is_err</Item>
<Item Name="[table]" Condition="!err_">*reinterpret_cast&lt;toml::v2::table*&gt;(&amp;storage_.bytes)</Item>
<Item Name="[error]" Condition="err_">*reinterpret_cast&lt;toml::v2::noex::parse_error*&gt;(&amp;storage_.bytes)</Item>
</Expand>
</Type>

Expand Down
60 changes: 59 additions & 1 deletion toml.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -401,6 +401,12 @@
#define TOML_PARSER 1
#endif

#ifndef TOML_MAX_NESTED_VALUES
#define TOML_MAX_NESTED_VALUES 256
// this refers to the depth of nested values, e.g. inline tables and arrays.
// 256 is crazy high! if you're hitting this limit with real input, TOML is probably the wrong tool for the job...
#endif

#ifndef DOXYGEN
#if defined(_WIN32) && !defined(TOML_WINDOWS_COMPAT)
#define TOML_WINDOWS_COMPAT 1
Expand Down Expand Up @@ -6257,7 +6263,7 @@ TOML_NAMESPACE_START
static constexpr size_t align_ =
(alignof(toml::table) < alignof(parse_error) ? alignof(parse_error) : alignof(toml::table));

alignas(align_) unsigned char bytes[size_ + 1u];
alignas(align_) unsigned char bytes[size_];
};

mutable storage_t storage_;
Expand Down Expand Up @@ -6336,6 +6342,13 @@ TOML_NAMESPACE_START
[[nodiscard]] explicit operator parse_error && () noexcept { return std::move(error()); }
[[nodiscard]] explicit operator const parse_error& () const noexcept { return error(); }

TOML_NODISCARD_CTOR
parse_result() noexcept
: err_{ true }
{
::new (static_cast<void*>(storage_.bytes)) parse_error{ std::string{}, source_region{} };
}

TOML_NODISCARD_CTOR
explicit parse_result(toml::table&& tbl) noexcept
: err_{ false }
Expand Down Expand Up @@ -8934,6 +8947,11 @@ TOML_ANON_NAMESPACE_START
};
#endif
}

error_builder(const error_builder&) = delete;
error_builder(error_builder&&) = delete;
error_builder& operator=(const error_builder&) = delete;
error_builder& operator=(error_builder&&) = delete;
};

struct parse_scope final
Expand All @@ -8953,6 +8971,11 @@ TOML_ANON_NAMESPACE_START
{
storage_ = parent_;
}

parse_scope(const parse_scope&) = delete;
parse_scope(parse_scope&&) = delete;
parse_scope& operator=(const parse_scope&) = delete;
parse_scope& operator=(parse_scope&&) = delete;
};
#define push_parse_scope_2(scope, line) parse_scope ps_##line{ current_scope, scope }
#define push_parse_scope_1(scope, line) push_parse_scope_2(scope, line)
Expand Down Expand Up @@ -8996,13 +9019,15 @@ TOML_ANON_NAMESPACE_START

[[nodiscard]]
TOML_ATTR(pure)
TOML_ALWAYS_INLINE
operator bool() const noexcept
{
return node_ != nullptr;
}

[[nodiscard]]
TOML_ATTR(pure)
TOML_ALWAYS_INLINE
toml::node* get() const noexcept
{
return node_;
Expand All @@ -9028,6 +9053,28 @@ TOML_ANON_NAMESPACE_START
node_ptr value;
};

struct parse_depth_counter final
{
size_t& depth_;

TOML_NODISCARD_CTOR
explicit parse_depth_counter(size_t& depth) noexcept
: depth_{ depth }
{
depth_++;
}

~parse_depth_counter() noexcept
{
depth_--;
}

parse_depth_counter(const parse_depth_counter&) = delete;
parse_depth_counter(parse_depth_counter&&) = delete;
parse_depth_counter& operator=(const parse_depth_counter&) = delete;
parse_depth_counter& operator=(parse_depth_counter&&) = delete;
};

}
TOML_ANON_NAMESPACE_END;

Expand Down Expand Up @@ -9083,6 +9130,8 @@ TOML_IMPL_NAMESPACE_START
class parser final
{
private:
static constexpr size_t max_nested_values = TOML_MAX_NESTED_VALUES;

utf8_buffered_reader reader;
table root;
source_position prev_pos = { 1, 1 };
Expand All @@ -9093,6 +9142,7 @@ TOML_IMPL_NAMESPACE_START
std::string recording_buffer; //for diagnostics
bool recording = false, recording_whitespace = true;
std::string_view current_scope;
size_t nested_values = {};
#if !TOML_EXCEPTIONS
mutable optional<toml::parse_error> err;
#endif
Expand Down Expand Up @@ -10583,6 +10633,14 @@ TOML_IMPL_NAMESPACE_START
assert_or_assume(!is_value_terminator(*cp));
push_parse_scope("value"sv);

const parse_depth_counter depth_counter{ nested_values };
if (nested_values > max_nested_values)
set_error_and_return_default(
"exceeded maximum nested value depth of "sv,
static_cast<uint64_t>(max_nested_values),
" (TOML_MAX_NESTED_VALUES)"sv
);

// check if it begins with some control character
// (note that this will also fail for whitespace but we're assuming we've
// called consume_leading_whitespace() before calling parse_value())
Expand Down
Loading

0 comments on commit a29ecda

Please sign in to comment.