From dbf0aa8cd58593ebc6942d47df42cfbc80f060d1 Mon Sep 17 00:00:00 2001 From: Michael Go Date: Mon, 6 Nov 2023 17:56:11 -0400 Subject: [PATCH 1/8] don't parse nodes inside a comment tag Co-authored-by: Alex Coco --- lib/liquid/tags/comment.rb | 53 ++++++++++++++++ test/unit/tags/comment_tag_unit_test.rb | 82 +++++++++++++++++++++++++ 2 files changed, 135 insertions(+) create mode 100644 test/unit/tags/comment_tag_unit_test.rb diff --git a/lib/liquid/tags/comment.rb b/lib/liquid/tags/comment.rb index 9922ee0dd..b60ef01b6 100644 --- a/lib/liquid/tags/comment.rb +++ b/lib/liquid/tags/comment.rb @@ -25,6 +25,59 @@ def unknown_tag(_tag, _markup, _tokens) def blank? true end + + private + + def parse_body(body, tokens) + if parse_context.depth >= MAX_DEPTH + raise StackLevelError, "Nesting too deep" + end + + parse_context.depth += 1 + comment_tag_depth = 1 + + begin + # Consume tokens without creating child nodes. + # The children tag doesn't require to be a valid Liquid except the comment and raw tag. + # The child comment and raw tag must be closed. + while (token = tokens.send(:shift)) + tag_name_match = BlockBody::FullToken.match(token) + + next if tag_name_match.nil? + + tag_name = tag_name_match[2] + + if tag_name == "raw" + # raw tags are required to be closed + parse_raw_tag_body(tokens) + next + end + + if tag_name_match[2] == "comment" + comment_tag_depth += 1 + next + elsif tag_name_match[2] == "endcomment" + comment_tag_depth -= 1 + + return false if comment_tag_depth.zero? + end + end + + raise_tag_never_closed(block_name) + ensure + parse_context.depth -= 1 + end + + false + end + + def parse_raw_tag_body(tokens) + while (token = tokens.send(:shift)) + return if token =~ Raw::FullTokenPossiblyInvalid && "endraw" == Regexp.last_match(2) + end + + raise_tag_never_closed("raw") + end end Template.register_tag('comment', Comment) diff --git a/test/unit/tags/comment_tag_unit_test.rb b/test/unit/tags/comment_tag_unit_test.rb new file mode 100644 index 000000000..457d224ad --- /dev/null +++ b/test/unit/tags/comment_tag_unit_test.rb @@ -0,0 +1,82 @@ +# frozen_string_literal: true + +require 'test_helper' + +class CommentTagUnitTest < Minitest::Test + def test_does_not_parse_nodes_inside_a_comment + template = Liquid::Template.parse(<<~LIQUID.chomp, line_numbers: true) + {% comment %} + {% if true %} + {% if ... %} + {%- for ? -%} + {% while true %} + {% + unless if + %} + {% endcase %} + {% endcomment %} + LIQUID + + assert_equal("", template.render) + end + + def test_child_comment_tags_need_to_be_closed + template = Liquid::Template.parse(<<~LIQUID.chomp, line_numbers: true) + {% comment %} + {% comment %} + {% comment %}{% endcomment %} + {% endcomment %} + {% endcomment %} + LIQUID + + assert_equal("", template.render) + + assert_raises(Liquid::SyntaxError) do + Liquid::Template.parse(<<~LIQUID.chomp, line_numbers: true) + {% comment %} + {% comment %} + {% comment %} + {% endcomment %} + {% endcomment %} + LIQUID + end + end + + def test_child_raw_tags_need_to_be_closed + template = Liquid::Template.parse(<<~LIQUID.chomp, line_numbers: true) + {% comment %} + {% raw %} + {% endcomment %} + {% endraw %} + {% endcomment %} + LIQUID + + assert_equal("", template.render) + + assert_raises(Liquid::SyntaxError) do + Liquid::Template.parse(<<~LIQUID.chomp, line_numbers: true) + {% comment %} + {% raw %} + {% endcomment %} + {% endcomment %} + LIQUID + end + end + + def test_error_line_number_is_correct + template = Liquid::Template.parse(<<~LIQUID.chomp, line_numbers: true) + {% comment %} + {% if true %} + {% endcomment %} + {{ errors.standard_error }} + LIQUID + + output = template.render('errors' => ErrorDrop.new) + expected = <<~TEXT.chomp + + Liquid error (line 4): standard error + TEXT + + assert_equal(expected, output) + end +end From eada2b65a2aad7eb291e10cfdb1242f767c395cc Mon Sep 17 00:00:00 2001 From: Michael Go Date: Wed, 8 Nov 2023 11:13:13 -0400 Subject: [PATCH 2/8] clean up comment tag body parsing Co-authored-by: Peter Zhu --- lib/liquid/tags/comment.rb | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/lib/liquid/tags/comment.rb b/lib/liquid/tags/comment.rb index b60ef01b6..5c7315e0e 100644 --- a/lib/liquid/tags/comment.rb +++ b/lib/liquid/tags/comment.rb @@ -47,16 +47,12 @@ def parse_body(body, tokens) tag_name = tag_name_match[2] - if tag_name == "raw" - # raw tags are required to be closed + case tag_name + when "raw" parse_raw_tag_body(tokens) - next - end - - if tag_name_match[2] == "comment" + when "comment" comment_tag_depth += 1 - next - elsif tag_name_match[2] == "endcomment" + when "endcomment" comment_tag_depth -= 1 return false if comment_tag_depth.zero? From 2abf52d5465004a30dc303a280838de862b84388 Mon Sep 17 00:00:00 2001 From: Michael Go Date: Wed, 8 Nov 2023 11:23:00 -0400 Subject: [PATCH 3/8] add a quirky comment tag unit test --- test/unit/tags/comment_tag_unit_test.rb | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/test/unit/tags/comment_tag_unit_test.rb b/test/unit/tags/comment_tag_unit_test.rb index 457d224ad..f201e2777 100644 --- a/test/unit/tags/comment_tag_unit_test.rb +++ b/test/unit/tags/comment_tag_unit_test.rb @@ -20,6 +20,16 @@ def test_does_not_parse_nodes_inside_a_comment assert_equal("", template.render) end + def test_allows_incomplete_tags_inside_a_comment + template = Liquid::Template.parse(<<~LIQUID.chomp, line_numbers: true) + {% comment %} + {% assign foo = "1" + {% endcomment %} + LIQUID + + assert_equal("", template.render) + end + def test_child_comment_tags_need_to_be_closed template = Liquid::Template.parse(<<~LIQUID.chomp, line_numbers: true) {% comment %} From a681e73aec7768b7a50778882781e1a9aee1eedc Mon Sep 17 00:00:00 2001 From: Michael Go Date: Wed, 8 Nov 2023 16:45:11 -0400 Subject: [PATCH 4/8] refactor comment tag unit test to use test helper --- test/unit/tags/comment_tag_unit_test.rb | 31 ++++++++++++++----------- 1 file changed, 18 insertions(+), 13 deletions(-) diff --git a/test/unit/tags/comment_tag_unit_test.rb b/test/unit/tags/comment_tag_unit_test.rb index f201e2777..8093cd42f 100644 --- a/test/unit/tags/comment_tag_unit_test.rb +++ b/test/unit/tags/comment_tag_unit_test.rb @@ -4,7 +4,7 @@ class CommentTagUnitTest < Minitest::Test def test_does_not_parse_nodes_inside_a_comment - template = Liquid::Template.parse(<<~LIQUID.chomp, line_numbers: true) + assert_template_result("", <<~LIQUID.chomp) {% comment %} {% if true %} {% if ... %} @@ -16,22 +16,31 @@ def test_does_not_parse_nodes_inside_a_comment {% endcase %} {% endcomment %} LIQUID - - assert_equal("", template.render) end def test_allows_incomplete_tags_inside_a_comment - template = Liquid::Template.parse(<<~LIQUID.chomp, line_numbers: true) + assert_template_result("", <<~LIQUID.chomp) {% comment %} {% assign foo = "1" {% endcomment %} LIQUID - assert_equal("", template.render) + assert_template_result("", <<~LIQUID.chomp) + {% comment %} + {% comment %} + {% invalid + {% endcomment %} + {% endcomment %} + LIQUID + + assert_template_result("", <<~LIQUID.chomp) + {% comment %} + {% {{ {%- endcomment %} + LIQUID end def test_child_comment_tags_need_to_be_closed - template = Liquid::Template.parse(<<~LIQUID.chomp, line_numbers: true) + assert_template_result("", <<~LIQUID.chomp) {% comment %} {% comment %} {% comment %}{% endcomment %} @@ -39,10 +48,8 @@ def test_child_comment_tags_need_to_be_closed {% endcomment %} LIQUID - assert_equal("", template.render) - assert_raises(Liquid::SyntaxError) do - Liquid::Template.parse(<<~LIQUID.chomp, line_numbers: true) + assert_template_result("", <<~LIQUID.chomp) {% comment %} {% comment %} {% comment %} @@ -53,7 +60,7 @@ def test_child_comment_tags_need_to_be_closed end def test_child_raw_tags_need_to_be_closed - template = Liquid::Template.parse(<<~LIQUID.chomp, line_numbers: true) + assert_template_result("", <<~LIQUID.chomp) {% comment %} {% raw %} {% endcomment %} @@ -61,10 +68,8 @@ def test_child_raw_tags_need_to_be_closed {% endcomment %} LIQUID - assert_equal("", template.render) - assert_raises(Liquid::SyntaxError) do - Liquid::Template.parse(<<~LIQUID.chomp, line_numbers: true) + Liquid::Template.parse(<<~LIQUID.chomp) {% comment %} {% raw %} {% endcomment %} From e180535784b0375340dfc3fd7696d117f9f2a2ec Mon Sep 17 00:00:00 2001 From: Michael Go Date: Wed, 8 Nov 2023 16:47:24 -0400 Subject: [PATCH 5/8] allow incomplete tags inside a comment tag --- lib/liquid/block_body.rb | 1 + lib/liquid/tags/comment.rb | 4 ++-- lib/liquid/tags/raw.rb | 3 +-- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/lib/liquid/block_body.rb b/lib/liquid/block_body.rb index 33a6a99f5..61096de80 100644 --- a/lib/liquid/block_body.rb +++ b/lib/liquid/block_body.rb @@ -6,6 +6,7 @@ module Liquid class BlockBody LiquidTagToken = /\A\s*(#{TagName})\s*(.*?)\z/o FullToken = /\A#{TagStart}#{WhitespaceControl}?(\s*)(#{TagName})(\s*)(.*?)#{WhitespaceControl}?#{TagEnd}\z/om + FullTokenPossiblyInvalid = /\A(.*)#{TagStart}#{WhitespaceControl}?\s*(\w+)\s*(.*)?#{WhitespaceControl}?#{TagEnd}\z/om ContentOfVariable = /\A#{VariableStart}#{WhitespaceControl}?(.*?)#{WhitespaceControl}?#{VariableEnd}\z/om WhitespaceOrNothing = /\A\s*\z/ TAGSTART = "{%" diff --git a/lib/liquid/tags/comment.rb b/lib/liquid/tags/comment.rb index 5c7315e0e..df7b294c3 100644 --- a/lib/liquid/tags/comment.rb +++ b/lib/liquid/tags/comment.rb @@ -41,7 +41,7 @@ def parse_body(body, tokens) # The children tag doesn't require to be a valid Liquid except the comment and raw tag. # The child comment and raw tag must be closed. while (token = tokens.send(:shift)) - tag_name_match = BlockBody::FullToken.match(token) + tag_name_match = BlockBody::FullTokenPossiblyInvalid.match(token) next if tag_name_match.nil? @@ -69,7 +69,7 @@ def parse_body(body, tokens) def parse_raw_tag_body(tokens) while (token = tokens.send(:shift)) - return if token =~ Raw::FullTokenPossiblyInvalid && "endraw" == Regexp.last_match(2) + return if token =~ BlockBody::FullTokenPossiblyInvalid && "endraw" == Regexp.last_match(2) end raise_tag_never_closed("raw") diff --git a/lib/liquid/tags/raw.rb b/lib/liquid/tags/raw.rb index 7f3dec9b1..02ee2b3cb 100644 --- a/lib/liquid/tags/raw.rb +++ b/lib/liquid/tags/raw.rb @@ -14,7 +14,6 @@ module Liquid # @liquid_syntax_keyword expression The expression to be output without being rendered. class Raw < Block Syntax = /\A\s*\z/ - FullTokenPossiblyInvalid = /\A(.*)#{TagStart}#{WhitespaceControl}?\s*(\w+)\s*(.*)?#{WhitespaceControl}?#{TagEnd}\z/om def initialize(tag_name, markup, parse_context) super @@ -25,7 +24,7 @@ def initialize(tag_name, markup, parse_context) def parse(tokens) @body = +'' while (token = tokens.shift) - if token =~ FullTokenPossiblyInvalid && block_delimiter == Regexp.last_match(2) + if token =~ BlockBody::FullTokenPossiblyInvalid && block_delimiter == Regexp.last_match(2) parse_context.trim_whitespace = (token[-3] == WhitespaceControl) @body << Regexp.last_match(1) if Regexp.last_match(1) != "" return From 41f65173b0ac8f2e18a5d9e15e50f4c934f3f20a Mon Sep 17 00:00:00 2001 From: Michael Go Date: Thu, 9 Nov 2023 16:26:49 -0400 Subject: [PATCH 6/8] fix parsing comment block body inside a liquid tag --- lib/liquid/tags/comment.rb | 26 +++++++++++++++++-------- test/unit/tags/comment_tag_unit_test.rb | 13 +++++++++++++ 2 files changed, 31 insertions(+), 8 deletions(-) diff --git a/lib/liquid/tags/comment.rb b/lib/liquid/tags/comment.rb index df7b294c3..060a0293c 100644 --- a/lib/liquid/tags/comment.rb +++ b/lib/liquid/tags/comment.rb @@ -28,7 +28,7 @@ def blank? private - def parse_body(body, tokens) + def parse_body(body, tokenizer) if parse_context.depth >= MAX_DEPTH raise StackLevelError, "Nesting too deep" end @@ -40,16 +40,26 @@ def parse_body(body, tokens) # Consume tokens without creating child nodes. # The children tag doesn't require to be a valid Liquid except the comment and raw tag. # The child comment and raw tag must be closed. - while (token = tokens.send(:shift)) - tag_name_match = BlockBody::FullTokenPossiblyInvalid.match(token) + while (token = tokenizer.send(:shift)) + tag_name = if tokenizer.for_liquid_tag + next if token.empty? || token.match?(BlockBody::WhitespaceOrNothing) - next if tag_name_match.nil? + tag_name_match = BlockBody::LiquidTagToken.match(token) - tag_name = tag_name_match[2] + next if tag_name_match.nil? + + tag_name_match[1] + else + tag_name_match = BlockBody::FullTokenPossiblyInvalid.match(token) + + next if tag_name_match.nil? + + tag_name_match[2] + end case tag_name when "raw" - parse_raw_tag_body(tokens) + parse_raw_tag_body(tokenizer) when "comment" comment_tag_depth += 1 when "endcomment" @@ -67,8 +77,8 @@ def parse_body(body, tokens) false end - def parse_raw_tag_body(tokens) - while (token = tokens.send(:shift)) + def parse_raw_tag_body(tokenizer) + while (token = tokenizer.send(:shift)) return if token =~ BlockBody::FullTokenPossiblyInvalid && "endraw" == Regexp.last_match(2) end diff --git a/test/unit/tags/comment_tag_unit_test.rb b/test/unit/tags/comment_tag_unit_test.rb index 8093cd42f..64f96f3a1 100644 --- a/test/unit/tags/comment_tag_unit_test.rb +++ b/test/unit/tags/comment_tag_unit_test.rb @@ -3,6 +3,19 @@ require 'test_helper' class CommentTagUnitTest < Minitest::Test + def test_comment_inside_liquid_tag + assert_template_result("", <<~LIQUID.chomp) + {% liquid + if 1 != 1 + comment + else + echo 123 + endcomment + endif + %} + LIQUID + end + def test_does_not_parse_nodes_inside_a_comment assert_template_result("", <<~LIQUID.chomp) {% comment %} From 6dafc19b6d9186a2d0ee2f330f9f349567150ee8 Mon Sep 17 00:00:00 2001 From: Michael Go Date: Fri, 10 Nov 2023 15:42:54 -0400 Subject: [PATCH 7/8] fix parsing comment tag delimiter with extra strings --- lib/liquid/tags/comment.rb | 5 +++++ test/unit/tags/comment_tag_unit_test.rb | 14 ++++++++++++++ 2 files changed, 19 insertions(+) diff --git a/lib/liquid/tags/comment.rb b/lib/liquid/tags/comment.rb index 060a0293c..d6f855711 100644 --- a/lib/liquid/tags/comment.rb +++ b/lib/liquid/tags/comment.rb @@ -15,6 +15,8 @@ module Liquid # {% endcomment %} # @liquid_syntax_keyword content The content of the comment. class Comment < Block + TagDelimiter = /\A(.*)#{TagStart}#{WhitespaceControl}?\s*(endcomment)\s*(.*)?#{WhitespaceControl}?#{TagEnd}\z/om + def render_to_output_buffer(_context, output) output end @@ -49,6 +51,9 @@ def parse_body(body, tokenizer) next if tag_name_match.nil? tag_name_match[1] + elsif TagDelimiter.match?(token) + # aggressively match comment delimiter first + "endcomment" else tag_name_match = BlockBody::FullTokenPossiblyInvalid.match(token) diff --git a/test/unit/tags/comment_tag_unit_test.rb b/test/unit/tags/comment_tag_unit_test.rb index 64f96f3a1..11d2fd152 100644 --- a/test/unit/tags/comment_tag_unit_test.rb +++ b/test/unit/tags/comment_tag_unit_test.rb @@ -107,4 +107,18 @@ def test_error_line_number_is_correct assert_equal(expected, output) end + + def test_comment_tag_delimiter_with_extra_strings + assert_template_result( + '', + <<~LIQUID.chomp, + {% comment %} + {% comment %} + {% endcomment + {% if true %} + {% endif %} + {% endcomment %} + LIQUID + ) + end end From 6a5ebb0e856cc481ee64f802e4caf940a9d90488 Mon Sep 17 00:00:00 2001 From: Michael Go Date: Sat, 11 Nov 2023 12:19:44 -0400 Subject: [PATCH 8/8] fix parsing nested comment tag with extra strings --- lib/liquid/tags/comment.rb | 9 ++++++--- test/unit/tags/comment_tag_unit_test.rb | 14 ++++++++++++++ 2 files changed, 20 insertions(+), 3 deletions(-) diff --git a/lib/liquid/tags/comment.rb b/lib/liquid/tags/comment.rb index d6f855711..0446685ba 100644 --- a/lib/liquid/tags/comment.rb +++ b/lib/liquid/tags/comment.rb @@ -15,7 +15,7 @@ module Liquid # {% endcomment %} # @liquid_syntax_keyword content The content of the comment. class Comment < Block - TagDelimiter = /\A(.*)#{TagStart}#{WhitespaceControl}?\s*(endcomment)\s*(.*)?#{WhitespaceControl}?#{TagEnd}\z/om + TAG_DELIMITER = /\A(.*)#{TagStart}#{WhitespaceControl}?\s*(endcomment)\s*(.*)?#{WhitespaceControl}?#{TagEnd}\z/om def render_to_output_buffer(_context, output) output @@ -51,9 +51,12 @@ def parse_body(body, tokenizer) next if tag_name_match.nil? tag_name_match[1] - elsif TagDelimiter.match?(token) - # aggressively match comment delimiter first + elsif TAG_DELIMITER.match?(token) + # aggressively match comment delimiter "endcomment" + elsif token =~ BlockBody::FullToken && Regexp.last_match(2) == "comment" + # aggressively match comment tag + "comment" else tag_name_match = BlockBody::FullTokenPossiblyInvalid.match(token) diff --git a/test/unit/tags/comment_tag_unit_test.rb b/test/unit/tags/comment_tag_unit_test.rb index 11d2fd152..9f812b71b 100644 --- a/test/unit/tags/comment_tag_unit_test.rb +++ b/test/unit/tags/comment_tag_unit_test.rb @@ -121,4 +121,18 @@ def test_comment_tag_delimiter_with_extra_strings LIQUID ) end + + def test_nested_comment_tag_with_extra_strings + assert_template_result( + '', + <<~LIQUID.chomp, + {% comment %} + {% comment + {% assign foo = 1 %} + {% endcomment + {% assign foo = 1 %} + {% endcomment %} + LIQUID + ) + end end