diff --git a/test/html5/test_sanitizer.rb b/test/html5/test_sanitizer.rb index 91b5d6f..0799a6d 100755 --- a/test/html5/test_sanitizer.rb +++ b/test/html5/test_sanitizer.rb @@ -17,18 +17,16 @@ def sanitize_html(stream) Loofah.fragment(stream).scrub!(:escape).to_html end - def check_sanitization(input, htmloutput, xhtmloutput, rexmloutput) - ## libxml uses double-quotes, so let's swappo-boppo our quotes before comparing. + def check_sanitization(input, *possible_answers) + # shotgun approach - if any of the possible answers match, we win + + # libxml uses double-quotes, so let's swappo-boppo our quotes before comparing. sane = sanitize_html(input).gsub('"', "'") - htmloutput = htmloutput.gsub('"', "'") - xhtmloutput = xhtmloutput.gsub('"', "'") - rexmloutput = rexmloutput.gsub('"', "'") + possible_output = possible_answers.compact.map do |possible_answer| + possible_answer.gsub('"', "'") + end - ## HTML5's parsers are shit. there's so much inconsistency with what has closing tags, etc, that - ## it would require a lot of manual hacking to make the tests match libxml's output. - ## instead, I'm taking the shotgun approach, and trying to match any of the described outputs. - assert((htmloutput == sane) || (rexmloutput == sane) || (xhtmloutput == sane), - %Q{given: "#{input}"\nexpected: "#{htmloutput}"\ngot: "#{sane}"}) + assert_includes(possible_output, sane) end def assert_completes_in_reasonable_time(&block) @@ -81,7 +79,7 @@ def assert_completes_in_reasonable_time(&block) # define_method "test_should_forbid_#{tag_name.upcase}_tag" do # input = "<#{tag_name.upcase} title='1'>foo bar baz" # output = "<#{tag_name.upcase} title=\"1\">foo <bad>bar</bad> baz</#{tag_name.upcase}>" - # check_sanitization(input, output, output, output) + # check_sanitization(input, output) # end # end @@ -96,7 +94,7 @@ def assert_completes_in_reasonable_time(&block) output = "

foo <bad>bar</bad> baz

" htmloutput = "

foo <bad>bar</bad> baz

" end - check_sanitization(input, htmloutput, output, output) + check_sanitization(input, htmloutput, output) end end @@ -104,28 +102,28 @@ def test_should_allow_data_attributes input = "

foo bar baz

" output = "

foo <bad>bar</bad> baz

" - check_sanitization(input, output, output, output) + check_sanitization(input, output) end def test_should_allow_multi_word_data_attributes input = "

foo bar baz

" output = "

foo <bad>bar</bad> baz

" - check_sanitization(input, output, output, output) + check_sanitization(input, output) end def test_should_allow_empty_data_attributes input = "

foo bar baz

" output = "

foo <bad>bar</bad> baz

" - check_sanitization(input, output, output, output) + check_sanitization(input, output) end def test_should_allow_contenteditable input = '

Hi!

' output = '

Hi!

' - check_sanitization(input, output, output, output) + check_sanitization(input, output) end ## @@ -135,7 +133,7 @@ def test_should_allow_contenteditable # define_method "test_should_forbid_#{attribute_name.upcase}_attribute" do # input = "

foo bar baz

" # output = "

foo <bad>bar</bad> baz

" - # check_sanitization(input, output, output, output) + # check_sanitization(input, output) # end # end @@ -143,7 +141,7 @@ def test_should_allow_contenteditable define_method "test_should_allow_#{protocol}_uris" do input = %(foo) output = "foo" - check_sanitization(input, output, output, output) + check_sanitization(input, output) end end @@ -151,7 +149,7 @@ def test_should_allow_contenteditable define_method "test_should_allow_uppercase_#{protocol}_uris" do input = %(foo) output = "foo" - check_sanitization(input, output, output, output) + check_sanitization(input, output) end end @@ -159,42 +157,44 @@ def test_should_allow_contenteditable define_method "test_should_allow_data_#{data_uri_type}_uris" do input = %(foo) output = "foo" - check_sanitization(input, output, output, output) + check_sanitization(input, output) input = %(foo) output = "foo" - check_sanitization(input, output, output, output) + check_sanitization(input, output) end define_method "test_should_allow_uppercase_data_#{data_uri_type}_uris" do input = %(foo) output = "foo" - check_sanitization(input, output, output, output) + check_sanitization(input, output) end end def test_should_disallow_other_uri_mediatypes input = %(foo) output = "foo" - check_sanitization(input, output, output, output) + check_sanitization(input, output) input = %(foo) output = "foo" - check_sanitization(input, output, output, output) + check_sanitization(input, output) input = %(foo) output = "foo" - check_sanitization(input, output, output, output) + + check_sanitization(input, output) input = %(foo) output = "foo" - check_sanitization(input, output, output, output) + check_sanitization(input, output) # https://hackerone.com/bugs?report_id=1694173 # https://github.com/w3c/svgwg/issues/266 input = %() output = "" - check_sanitization(input, output, output, output) + + check_sanitization(input, output) end HTML5::SafeList::SVG_ALLOW_LOCAL_HREF.each do |tag_name| @@ -239,18 +239,18 @@ def test_figure_element_is_valid # def test_should_handle_astral_plane_characters # input = "

𝒵 𝔸

" # output = "

\360\235\222\265 \360\235\224\270

" - # check_sanitization(input, output, output, output) + # check_sanitization(input, output) # input = "

\360\235\224\270 a

" # output = "

\360\235\224\270 a

" - # check_sanitization(input, output, output, output) + # check_sanitization(input, output) # end # This affects only NS4. Is it worth fixing? # def test_javascript_includes # input = %(
foo
) # output = "
foo
" - # check_sanitization(input, output, output, output) + # check_sanitization(input, output) # end ## @@ -263,12 +263,11 @@ def test_figure_element_is_valid Dir[File.join(File.dirname(__FILE__), "..", "assets", "testdata_sanitizer_tests1.dat")].each do |filename| JSON::parse(open(filename).read).each do |test| it "testdata sanitizer #{test["name"]}" do - check_sanitization( - test["input"], - test["output"], - test["xhtml"] || test["output"], - test["rexml"] || test["output"] - ) + test.delete("name") + test.delete("commentary") + input = test.delete("input") + outputs = test.keys.sort.map { |k| test[k] } + check_sanitization(input, *outputs) end end end @@ -278,13 +277,13 @@ def test_figure_element_is_valid define_method "test_allow_uri_refs_in_svg_attribute_#{attr_name}" do input = "" output = "" - check_sanitization(input, output, output, output) + check_sanitization(input, output) end define_method "test_disallow_absolute_uri_refs_in_svg_attribute_#{attr_name}" do input = "" output = "" - check_sanitization(input, output, output, output) + check_sanitization(input, output) end end