diff --git a/lib/rexml/parsers/baseparser.rb b/lib/rexml/parsers/baseparser.rb
index f76aed07..84fc5ca5 100644
--- a/lib/rexml/parsers/baseparser.rb
+++ b/lib/rexml/parsers/baseparser.rb
@@ -83,9 +83,6 @@ class BaseParser
ATTDEF_RE = /#{ATTDEF}/
ATTLISTDECL_START = /\A\s*/um
- NOTATIONDECL_START = /\A\s*/um
- SYSTEM = /\A\s*/um
TEXT_PATTERN = /\A([^<]*)/um
@@ -103,6 +100,10 @@ class BaseParser
GEDECL = ""
ENTITYDECL = /\s*(?:#{GEDECL})|(?:#{PEDECL})/um
+ NOTATIONDECL_START = /\A\s*/um
+ SYSTEM = /\A\s*/um
+
EREFERENCE = /&(?!#{NAME};)/
DEFAULT_ENTITIES = {
@@ -315,12 +316,22 @@ def pull_event
md = nil
if @source.match( PUBLIC )
md = @source.match( PUBLIC, true )
- vals = [md[1],md[2],md[4],md[6]]
+ pubid = system = nil
+ pubid_literal = md[3]
+ pubid = pubid_literal[1..-2] if pubid_literal # Remove quote
+ system_literal = md[4]
+ system = system_literal[1..-2] if system_literal # Remove quote
+ vals = [md[1], md[2], pubid, system]
elsif @source.match( SYSTEM )
md = @source.match( SYSTEM, true )
- vals = [md[1],md[2],nil,md[4]]
+ system = nil
+ system_literal = md[3]
+ system = system_literal[1..-2] if system_literal # Remove quote
+ vals = [md[1], md[2], nil, system]
else
- raise REXML::ParseException.new( "error parsing notation: no matching pattern", @source )
+ details = notation_decl_invalid_details
+ message = "Malformed notation declaration: #{details}"
+ raise REXML::ParseException.new(message, @source)
end
return [ :notationdecl, *vals ]
when DOCTYPE_END
@@ -569,6 +580,42 @@ def parse_attributes(prefixes, curr_ns)
end
return attributes, closed
end
+
+ def notation_decl_invalid_details
+ name = /#{NOTATIONDECL_START}\s+#{NAME}/um
+ public = /#{name}\s+PUBLIC/um
+ system = /#{name}\s+SYSTEM/um
+ if @source.match(/#{NOTATIONDECL_START}\s*>/um)
+ return "name is missing"
+ elsif not @source.match(/#{name}[\s>]/um)
+ return "invalid name"
+ elsif @source.match(/#{name}\s*>/um)
+ return "ID type is missing"
+ elsif not @source.match(/#{name}\s+(?:PUBLIC|SYSTEM)[\s>]/um)
+ return "invalid ID type"
+ elsif @source.match(/#{public}/um)
+ if @source.match(/#{public}\s*>/um)
+ return "public ID literal is missing"
+ elsif not @source.match(/#{public}\s+#{PUBIDLITERAL}/um)
+ return "invalid public ID literal"
+ elsif @source.match(/#{public}\s+#{PUBIDLITERAL}[^\s>]/um)
+ return "garbage after public ID literal"
+ elsif not @source.match(/#{public}\s+#{PUBIDLITERAL}\s+#{SYSTEMLITERAL}/um)
+ return "invalid system literal"
+ elsif not @source.match(/#{public}\s+#{PUBIDLITERAL}\s+#{SYSTEMLITERAL}\s*>/um)
+ return "garbage after system literal"
+ end
+ elsif @source.match(/#{system}/um)
+ if @source.match(/#{system}\s*>/um)
+ return "system literal is missing"
+ elsif not @source.match(/#{system}\s+#{SYSTEMLITERAL}/um)
+ return "invalid system literal"
+ elsif not @source.match(/#{system}\s+#{SYSTEMLITERAL}\s*>/um)
+ return "garbage after system literal"
+ end
+ end
+ "end > is missing"
+ end
end
end
end
diff --git a/test/parse/test_notation_declaration.rb b/test/parse/test_notation_declaration.rb
index 0d29f0d8..fbd29e2a 100644
--- a/test/parse/test_notation_declaration.rb
+++ b/test/parse/test_notation_declaration.rb
@@ -23,10 +23,100 @@ def test_name
doctype = parse("")
assert_equal("name", doctype.notation("name").name)
end
+
+ def test_no_name
+ exception = assert_raise(REXML::ParseException) do
+ parse(<<-INTERNAL_SUBSET)
+
+ INTERNAL_SUBSET
+ end
+ assert_equal(<<-DETAIL.chomp, exception.to_s)
+Malformed notation declaration: name is missing
+Line: 5
+Position: 72
+Last 80 unconsumed characters:
+ ]>
+ DETAIL
+ end
+
+ def test_invalid_name
+ exception = assert_raise(REXML::ParseException) do
+ parse(<<-INTERNAL_SUBSET)
+
+ INTERNAL_SUBSET
+ end
+ assert_equal(<<-DETAIL.chomp, exception.to_s)
+Malformed notation declaration: invalid name
+Line: 5
+Position: 74
+Last 80 unconsumed characters:
+ ]>
+ DETAIL
+ end
+
+ def test_no_id_type
+ exception = assert_raise(REXML::ParseException) do
+ parse(<<-INTERNAL_SUBSET)
+
+ INTERNAL_SUBSET
+ end
+ assert_equal(<<-DETAIL.chomp, exception.to_s)
+Malformed notation declaration: ID type is missing
+Line: 5
+Position: 77
+Last 80 unconsumed characters:
+ ]>
+ DETAIL
+ end
+
+ def test_invalid_id_type
+ exception = assert_raise(REXML::ParseException) do
+ parse(<<-INTERNAL_SUBSET)
+
+ INTERNAL_SUBSET
+ end
+ assert_equal(<<-DETAIL.chomp, exception.to_s)
+Malformed notation declaration: invalid ID type
+Line: 5
+Position: 85
+Last 80 unconsumed characters:
+ ]>
+ DETAIL
+ end
end
class TestExternalID < self
class TestSystem < self
+ def test_no_literal
+ exception = assert_raise(REXML::ParseException) do
+ parse(<<-INTERNAL_SUBSET)
+
+ INTERNAL_SUBSET
+ end
+ assert_equal(<<-DETAIL.chomp, exception.to_s)
+Malformed notation declaration: system literal is missing
+Line: 5
+Position: 84
+Last 80 unconsumed characters:
+ ]>
+ DETAIL
+ end
+
+ def test_garbage_after_literal
+ exception = assert_raise(REXML::ParseException) do
+ parse(<<-INTERNAL_SUBSET)
+
+ INTERNAL_SUBSET
+ end
+ assert_equal(<<-DETAIL.chomp, exception.to_s)
+Malformed notation declaration: garbage after system literal
+Line: 5
+Position: 103
+Last 80 unconsumed characters:
+ ]>
+ DETAIL
+ end
+
def test_single_quote
doctype = parse(<<-INTERNAL_SUBSET)
@@ -44,6 +134,21 @@ def test_double_quote
class TestPublic < self
class TestPublicIDLiteral < self
+ def test_content_double_quote
+ exception = assert_raise(REXML::ParseException) do
+ parse(<<-INTERNAL_SUBSET)
+
+ INTERNAL_SUBSET
+ end
+ assert_equal(<<-DETAIL.chomp, exception.to_s)
+Malformed notation declaration: invalid public ID literal
+Line: 5
+Position: 129
+Last 80 unconsumed characters:
+ ]>
+ DETAIL
+ end
+
def test_single_quote
doctype = parse(<<-INTERNAL_SUBSET)
@@ -60,6 +165,21 @@ def test_double_quote
end
class TestSystemLiteral < self
+ def test_garbage_after_literal
+ exception = assert_raise(REXML::ParseException) do
+ parse(<<-INTERNAL_SUBSET)
+
+ INTERNAL_SUBSET
+ end
+ assert_equal(<<-DETAIL.chomp, exception.to_s)
+Malformed notation declaration: garbage after system literal
+Line: 5
+Position: 123
+Last 80 unconsumed characters:
+ ]>
+ DETAIL
+ end
+
def test_single_quote
doctype = parse(<<-INTERNAL_SUBSET)
@@ -96,5 +216,66 @@ def test_public_system
end
end
end
+
+ class TestPublicID < self
+ def test_no_literal
+ exception = assert_raise(REXML::ParseException) do
+ parse(<<-INTERNAL_SUBSET)
+
+ INTERNAL_SUBSET
+ end
+ assert_equal(<<-DETAIL.chomp, exception.to_s)
+Malformed notation declaration: public ID literal is missing
+Line: 5
+Position: 84
+Last 80 unconsumed characters:
+ ]>
+ DETAIL
+ end
+
+ def test_literal_content_double_quote
+ exception = assert_raise(REXML::ParseException) do
+ parse(<<-INTERNAL_SUBSET)
+
+ INTERNAL_SUBSET
+ end
+ assert_equal(<<-DETAIL.chomp, exception.to_s)
+Malformed notation declaration: invalid public ID literal
+Line: 5
+Position: 128
+Last 80 unconsumed characters:
+ ]>
+ DETAIL
+ end
+
+ def test_garbage_after_literal
+ exception = assert_raise(REXML::ParseException) do
+ parse(<<-INTERNAL_SUBSET)
+
+ INTERNAL_SUBSET
+ end
+ assert_equal(<<-DETAIL.chomp, exception.to_s)
+Malformed notation declaration: garbage after public ID literal
+Line: 5
+Position: 106
+Last 80 unconsumed characters:
+ ]>
+ DETAIL
+ end
+
+ def test_literal_single_quote
+ doctype = parse(<<-INTERNAL_SUBSET)
+
+ INTERNAL_SUBSET
+ assert_equal("public-id-literal", doctype.notation("name").public)
+ end
+
+ def test_literal_double_quote
+ doctype = parse(<<-INTERNAL_SUBSET)
+
+ INTERNAL_SUBSET
+ assert_equal("public-id-literal", doctype.notation("name").public)
+ end
+ end
end
end