diff --git a/lib/rexml/parsers/baseparser.rb b/lib/rexml/parsers/baseparser.rb index f76aed07..84fc5ca5 100644 --- a/lib/rexml/parsers/baseparser.rb +++ b/lib/rexml/parsers/baseparser.rb @@ -83,9 +83,6 @@ class BaseParser ATTDEF_RE = /#{ATTDEF}/ ATTLISTDECL_START = /\A\s*/um - NOTATIONDECL_START = /\A\s*/um - SYSTEM = /\A\s*/um TEXT_PATTERN = /\A([^<]*)/um @@ -103,6 +100,10 @@ class BaseParser GEDECL = "" ENTITYDECL = /\s*(?:#{GEDECL})|(?:#{PEDECL})/um + NOTATIONDECL_START = /\A\s*/um + SYSTEM = /\A\s*/um + EREFERENCE = /&(?!#{NAME};)/ DEFAULT_ENTITIES = { @@ -315,12 +316,22 @@ def pull_event md = nil if @source.match( PUBLIC ) md = @source.match( PUBLIC, true ) - vals = [md[1],md[2],md[4],md[6]] + pubid = system = nil + pubid_literal = md[3] + pubid = pubid_literal[1..-2] if pubid_literal # Remove quote + system_literal = md[4] + system = system_literal[1..-2] if system_literal # Remove quote + vals = [md[1], md[2], pubid, system] elsif @source.match( SYSTEM ) md = @source.match( SYSTEM, true ) - vals = [md[1],md[2],nil,md[4]] + system = nil + system_literal = md[3] + system = system_literal[1..-2] if system_literal # Remove quote + vals = [md[1], md[2], nil, system] else - raise REXML::ParseException.new( "error parsing notation: no matching pattern", @source ) + details = notation_decl_invalid_details + message = "Malformed notation declaration: #{details}" + raise REXML::ParseException.new(message, @source) end return [ :notationdecl, *vals ] when DOCTYPE_END @@ -569,6 +580,42 @@ def parse_attributes(prefixes, curr_ns) end return attributes, closed end + + def notation_decl_invalid_details + name = /#{NOTATIONDECL_START}\s+#{NAME}/um + public = /#{name}\s+PUBLIC/um + system = /#{name}\s+SYSTEM/um + if @source.match(/#{NOTATIONDECL_START}\s*>/um) + return "name is missing" + elsif not @source.match(/#{name}[\s>]/um) + return "invalid name" + elsif @source.match(/#{name}\s*>/um) + return "ID type is missing" + elsif not @source.match(/#{name}\s+(?:PUBLIC|SYSTEM)[\s>]/um) + return "invalid ID type" + elsif @source.match(/#{public}/um) + if @source.match(/#{public}\s*>/um) + return "public ID literal is missing" + elsif not @source.match(/#{public}\s+#{PUBIDLITERAL}/um) + return "invalid public ID literal" + elsif @source.match(/#{public}\s+#{PUBIDLITERAL}[^\s>]/um) + return "garbage after public ID literal" + elsif not @source.match(/#{public}\s+#{PUBIDLITERAL}\s+#{SYSTEMLITERAL}/um) + return "invalid system literal" + elsif not @source.match(/#{public}\s+#{PUBIDLITERAL}\s+#{SYSTEMLITERAL}\s*>/um) + return "garbage after system literal" + end + elsif @source.match(/#{system}/um) + if @source.match(/#{system}\s*>/um) + return "system literal is missing" + elsif not @source.match(/#{system}\s+#{SYSTEMLITERAL}/um) + return "invalid system literal" + elsif not @source.match(/#{system}\s+#{SYSTEMLITERAL}\s*>/um) + return "garbage after system literal" + end + end + "end > is missing" + end end end end diff --git a/test/parse/test_notation_declaration.rb b/test/parse/test_notation_declaration.rb index 0d29f0d8..fbd29e2a 100644 --- a/test/parse/test_notation_declaration.rb +++ b/test/parse/test_notation_declaration.rb @@ -23,10 +23,100 @@ def test_name doctype = parse("") assert_equal("name", doctype.notation("name").name) end + + def test_no_name + exception = assert_raise(REXML::ParseException) do + parse(<<-INTERNAL_SUBSET) + + INTERNAL_SUBSET + end + assert_equal(<<-DETAIL.chomp, exception.to_s) +Malformed notation declaration: name is missing +Line: 5 +Position: 72 +Last 80 unconsumed characters: + ]> + DETAIL + end + + def test_invalid_name + exception = assert_raise(REXML::ParseException) do + parse(<<-INTERNAL_SUBSET) + + INTERNAL_SUBSET + end + assert_equal(<<-DETAIL.chomp, exception.to_s) +Malformed notation declaration: invalid name +Line: 5 +Position: 74 +Last 80 unconsumed characters: + ]> + DETAIL + end + + def test_no_id_type + exception = assert_raise(REXML::ParseException) do + parse(<<-INTERNAL_SUBSET) + + INTERNAL_SUBSET + end + assert_equal(<<-DETAIL.chomp, exception.to_s) +Malformed notation declaration: ID type is missing +Line: 5 +Position: 77 +Last 80 unconsumed characters: + ]> + DETAIL + end + + def test_invalid_id_type + exception = assert_raise(REXML::ParseException) do + parse(<<-INTERNAL_SUBSET) + + INTERNAL_SUBSET + end + assert_equal(<<-DETAIL.chomp, exception.to_s) +Malformed notation declaration: invalid ID type +Line: 5 +Position: 85 +Last 80 unconsumed characters: + ]> + DETAIL + end end class TestExternalID < self class TestSystem < self + def test_no_literal + exception = assert_raise(REXML::ParseException) do + parse(<<-INTERNAL_SUBSET) + + INTERNAL_SUBSET + end + assert_equal(<<-DETAIL.chomp, exception.to_s) +Malformed notation declaration: system literal is missing +Line: 5 +Position: 84 +Last 80 unconsumed characters: + ]> + DETAIL + end + + def test_garbage_after_literal + exception = assert_raise(REXML::ParseException) do + parse(<<-INTERNAL_SUBSET) + + INTERNAL_SUBSET + end + assert_equal(<<-DETAIL.chomp, exception.to_s) +Malformed notation declaration: garbage after system literal +Line: 5 +Position: 103 +Last 80 unconsumed characters: + ]> + DETAIL + end + def test_single_quote doctype = parse(<<-INTERNAL_SUBSET) @@ -44,6 +134,21 @@ def test_double_quote class TestPublic < self class TestPublicIDLiteral < self + def test_content_double_quote + exception = assert_raise(REXML::ParseException) do + parse(<<-INTERNAL_SUBSET) + + INTERNAL_SUBSET + end + assert_equal(<<-DETAIL.chomp, exception.to_s) +Malformed notation declaration: invalid public ID literal +Line: 5 +Position: 129 +Last 80 unconsumed characters: + ]> + DETAIL + end + def test_single_quote doctype = parse(<<-INTERNAL_SUBSET) @@ -60,6 +165,21 @@ def test_double_quote end class TestSystemLiteral < self + def test_garbage_after_literal + exception = assert_raise(REXML::ParseException) do + parse(<<-INTERNAL_SUBSET) + + INTERNAL_SUBSET + end + assert_equal(<<-DETAIL.chomp, exception.to_s) +Malformed notation declaration: garbage after system literal +Line: 5 +Position: 123 +Last 80 unconsumed characters: + ]> + DETAIL + end + def test_single_quote doctype = parse(<<-INTERNAL_SUBSET) @@ -96,5 +216,66 @@ def test_public_system end end end + + class TestPublicID < self + def test_no_literal + exception = assert_raise(REXML::ParseException) do + parse(<<-INTERNAL_SUBSET) + + INTERNAL_SUBSET + end + assert_equal(<<-DETAIL.chomp, exception.to_s) +Malformed notation declaration: public ID literal is missing +Line: 5 +Position: 84 +Last 80 unconsumed characters: + ]> + DETAIL + end + + def test_literal_content_double_quote + exception = assert_raise(REXML::ParseException) do + parse(<<-INTERNAL_SUBSET) + + INTERNAL_SUBSET + end + assert_equal(<<-DETAIL.chomp, exception.to_s) +Malformed notation declaration: invalid public ID literal +Line: 5 +Position: 128 +Last 80 unconsumed characters: + ]> + DETAIL + end + + def test_garbage_after_literal + exception = assert_raise(REXML::ParseException) do + parse(<<-INTERNAL_SUBSET) + + INTERNAL_SUBSET + end + assert_equal(<<-DETAIL.chomp, exception.to_s) +Malformed notation declaration: garbage after public ID literal +Line: 5 +Position: 106 +Last 80 unconsumed characters: + ]> + DETAIL + end + + def test_literal_single_quote + doctype = parse(<<-INTERNAL_SUBSET) + + INTERNAL_SUBSET + assert_equal("public-id-literal", doctype.notation("name").public) + end + + def test_literal_double_quote + doctype = parse(<<-INTERNAL_SUBSET) + + INTERNAL_SUBSET + assert_equal("public-id-literal", doctype.notation("name").public) + end + end end end