diff --git a/.phpstorm.meta.php b/.phpstorm.meta.php index 0b56ab530d..5eb9270da0 100644 --- a/.phpstorm.meta.php +++ b/.phpstorm.meta.php @@ -31,6 +31,7 @@ 'html_input', 'allow_unsafe_links', 'max_nesting_level', + 'max_delimiters_per_line', 'renderer', 'renderer/block_separator', 'renderer/inner_separator', diff --git a/CHANGELOG.md b/CHANGELOG.md index 7024144cb4..7fd3207bcd 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,9 +8,14 @@ Updates should follow the [Keep a CHANGELOG](https://keepachangelog.com/) princi ### Added +- Added `max_delimiters_per_line` config option to prevent denial of service attacks when parsing malicious input +- Added `table/max_autocompleted_cells` config option to prevent denial of service attacks when parsing large tables +- The `AttributesExtension` now supports attributes without values (#985, #986) +- The `AutolinkExtension` exposes two new configuration options to override the default behavior (#969, #987): + - `autolink/allowed_protocols` - an array of protocols to allow autolinking for + - `autolink/default_protocol` - the default protocol to use when none is specified - Added `RegexHelper::isWhitespace()` method to check if a given character is an ASCII whitespace character - Added `CacheableDelimiterProcessorInterface` to ensure linear complexity for dynamic delimiter processing -- Added `table/max_autocompleted_cells` config option to prevent denial of service attacks when parsing large tables - Added `Bracket` delimiter type to optimize bracket parsing ### Changed diff --git a/docs/2.5/configuration.md b/docs/2.5/configuration.md index 472b99ff56..5991f9e1cf 100644 --- a/docs/2.5/configuration.md +++ b/docs/2.5/configuration.md @@ -27,6 +27,7 @@ $config = [ 'html_input' => 'escape', 'allow_unsafe_links' => false, 'max_nesting_level' => PHP_INT_MAX, + 'max_delimiters_per_line' => PHP_INT_MAX, 'slug_normalizer' => [ 'max_length' => 255, ], @@ -73,6 +74,7 @@ Here's a list of the core configuration options available: - `escape` - Escape all HTML - `allow_unsafe_links` - Remove risky link and image URLs by setting this to `false` (default: `true`) - `max_nesting_level` - The maximum nesting level for blocks (default: `PHP_INT_MAX`). Setting this to a positive integer can help protect against long parse times and/or segfaults if blocks are too deeply-nested. +- `max_delimiters_per_line` - The maximum number of delimiters (e.g. `*` or `_`) allowed in a single line (default: `PHP_INT_MAX`). Setting this to a positive integer can help protect against long parse times and/or segfaults if lines are too long. - `slug_normalizer` - Array of options for configuring how URL-safe slugs are created; see [the slug normalizer docs](/2.5/customization/slug-normalizer/#configuration) for more details - `instance` - An alternative normalizer to use (defaults to the included `SlugNormalizer`) - `max_length` - Limits the size of generated slugs (defaults to 255 characters) diff --git a/docs/2.5/security.md b/docs/2.5/security.md index b4bd8ab598..8573e69352 100644 --- a/docs/2.5/security.md +++ b/docs/2.5/security.md @@ -11,7 +11,8 @@ In order to be fully compliant with the CommonMark spec, certain security settin - `html_input`: How to handle raw HTML - `allow_unsafe_links`: Whether unsafe links are permitted -- `max_nesting_level`: Protected against long render times or segfaults +- `max_nesting_level`: Protect against long render times or segfaults +- `max_delimiters_per_line`: Protect against long parse times or rendering segfaults Further information about each option can be found below. @@ -88,6 +89,25 @@ echo $converter->convert($markdown); See the [configuration](/2.5/configuration/) section for more information. +## Max Delimiters Per Line + +Similarly to the maximum nesting level, **no maximum number of delimiters per line is enforced by default.** Delimiters can be nested (like `*a **b** c*`) or un-nested (like `*a* *b* *c*`) - in either case, having too many in a single line can result in long parse times. We therefore have a separate option to limit the number of delimiters per line. + +If you need to parse untrusted input, consider setting a reasonable `max_delimiters_per_line` (perhaps 100-1000) depending on your needs. Once this level is hit, any subsequent delimiters on that line will be rendered as plain text. + +### Example - Prevent too many delimiters + +```php +use League\CommonMark\CommonMarkConverter; + +$markdown = '*a* **b *c **d** c* b**'; // 8 delimiters (* and **) + +$converter = new CommonMarkConverter(['max_delimiters_per_line' => 6]); +echo $converter->convert($markdown); + +//

a **b *c d c* b**

+``` + ## Additional Filtering Although this library does offer these security features out-of-the-box, some users may opt to also run the HTML output through additional filtering layers (like HTMLPurifier). If you do this, make sure you **thoroughly** test your additional post-processing steps and configure them to work properly with the types of HTML elements and attributes that converted Markdown might produce, otherwise, you may end up with weird behavior like missing images, broken links, mismatched HTML tags, etc. diff --git a/docs/2.6/upgrading.md b/docs/2.6/upgrading.md index d19d1760a5..5d6e91c3c6 100644 --- a/docs/2.6/upgrading.md +++ b/docs/2.6/upgrading.md @@ -7,6 +7,13 @@ redirect_from: /upgrading/ # Upgrading from 2.5 to 2.6 +## `max_delimiters_per_line` Configuration Option + +The `max_delimiters_per_line` configuration option was added in 2.6 to help protect against malicious input that could +cause excessive memory usage or denial of service attacks. It defaults to `PHP_INT_MAX` (no limit) for backwards +compatibility, which is safe when parsing trusted input. However, if you're parsing untrusted input from users, you +should probably set this to a reasonable value (somewhere between `100` and `1000`) to protect against malicious inputs. + ## Custom Delimiter Processors If you're implementing a custom delimiter processor, and `getDelimiterUse()` has more logic than just a diff --git a/src/Delimiter/DelimiterStack.php b/src/Delimiter/DelimiterStack.php index 665b03799e..cf2a41e524 100644 --- a/src/Delimiter/DelimiterStack.php +++ b/src/Delimiter/DelimiterStack.php @@ -39,8 +39,13 @@ final class DelimiterStack */ private $missingIndexCache; - public function __construct() + + private int $remainingDelimiters = 0; + + public function __construct(int $maximumStackSize = PHP_INT_MAX) { + $this->remainingDelimiters = $maximumStackSize; + if (\PHP_VERSION_ID >= 80000) { /** @psalm-suppress PropertyTypeCoercion */ $this->missingIndexCache = new \WeakMap(); // @phpstan-ignore-line @@ -51,6 +56,10 @@ public function __construct() public function push(DelimiterInterface $newDelimiter): void { + if ($this->remainingDelimiters-- <= 0) { + return; + } + $newDelimiter->setPrevious($this->top); if ($this->top !== null) { diff --git a/src/Environment/Environment.php b/src/Environment/Environment.php index 3c24749bbb..a8112967e4 100644 --- a/src/Environment/Environment.php +++ b/src/Environment/Environment.php @@ -432,6 +432,7 @@ public static function createDefaultConfiguration(): Configuration 'html_input' => Expect::anyOf(HtmlFilter::STRIP, HtmlFilter::ALLOW, HtmlFilter::ESCAPE)->default(HtmlFilter::ALLOW), 'allow_unsafe_links' => Expect::bool(true), 'max_nesting_level' => Expect::type('int')->default(PHP_INT_MAX), + 'max_delimiters_per_line' => Expect::type('int')->default(PHP_INT_MAX), 'renderer' => Expect::structure([ 'block_separator' => Expect::string("\n"), 'inner_separator' => Expect::string("\n"), diff --git a/src/Parser/InlineParserContext.php b/src/Parser/InlineParserContext.php index 796f2f388c..9372904281 100644 --- a/src/Parser/InlineParserContext.php +++ b/src/Parser/InlineParserContext.php @@ -42,12 +42,12 @@ final class InlineParserContext */ private array $matches; - public function __construct(Cursor $contents, AbstractBlock $container, ReferenceMapInterface $referenceMap) + public function __construct(Cursor $contents, AbstractBlock $container, ReferenceMapInterface $referenceMap, int $maxDelimitersPerLine = PHP_INT_MAX) { $this->referenceMap = $referenceMap; $this->container = $container; $this->cursor = $contents; - $this->delimiterStack = new DelimiterStack(); + $this->delimiterStack = new DelimiterStack($maxDelimitersPerLine); } public function getContainer(): AbstractBlock diff --git a/src/Parser/InlineParserEngine.php b/src/Parser/InlineParserEngine.php index b91a63f72f..6a26979329 100644 --- a/src/Parser/InlineParserEngine.php +++ b/src/Parser/InlineParserEngine.php @@ -59,7 +59,7 @@ public function parse(string $contents, AbstractBlock $block): void $contents = \trim($contents); $cursor = new Cursor($contents); - $inlineParserContext = new InlineParserContext($cursor, $block, $this->referenceMap); + $inlineParserContext = new InlineParserContext($cursor, $block, $this->referenceMap, $this->environment->getConfiguration()->get('max_delimiters_per_line')); // Have all parsers look at the line to determine what they might want to parse and what positions they exist at foreach ($this->matchParsers($contents) as $matchPosition => $parsers) { diff --git a/tests/functional/MaxDelimitersPerLineTest.php b/tests/functional/MaxDelimitersPerLineTest.php new file mode 100644 index 0000000000..1de9697dc5 --- /dev/null +++ b/tests/functional/MaxDelimitersPerLineTest.php @@ -0,0 +1,50 @@ + + * + * For the full copyright and license information, please view the LICENSE + * file that was distributed with this source code. + */ + +namespace League\CommonMark\Tests\Functional; + +use League\CommonMark\CommonMarkConverter; +use PHPUnit\Framework\TestCase; + +final class MaxDelimitersPerLineTest extends TestCase +{ + /** + * @dataProvider provideTestCases + */ + public function testIt(string $input, int $maxDelimsPerLine, string $expectedOutput): void + { + $converter = new CommonMarkConverter(['max_delimiters_per_line' => $maxDelimsPerLine]); + + $this->assertEquals($expectedOutput, \trim($converter->convert($input)->getContent())); + } + + /** + * @return iterable> + */ + public function provideTestCases(): iterable + { + yield ['*a* **b *c* b**', 6, '

a b c b

']; + + yield ['*a* **b *c **d** c* b**', 0, '

*a* **b *c **d** c* b**

']; + yield ['*a* **b *c **d** c* b**', 1, '

*a* **b *c **d** c* b**

']; + yield ['*a* **b *c **d** c* b**', 2, '

a **b *c **d** c* b**

']; + yield ['*a* **b *c **d** c* b**', 3, '

a **b *c **d** c* b**

']; + yield ['*a* **b *c **d** c* b**', 4, '

a **b *c **d** c* b**

']; + yield ['*a* **b *c **d** c* b**', 5, '

a **b *c **d** c* b**

']; + yield ['*a* **b *c **d** c* b**', 6, '

a **b *c d c* b**

']; + yield ['*a* **b *c **d** c* b**', 7, '

a **b c d c b**

']; + yield ['*a* **b *c **d** c* b**', 8, '

a b c d c b

']; + yield ['*a* **b *c **d** c* b**', 9, '

a b c d c b

']; + yield ['*a* **b *c **d** c* b**', 100, '

a b c d c b

']; + } +}