Skip to content
This repository has been archived by the owner on Aug 26, 2023. It is now read-only.

Commit

Permalink
Rename :max_parse_errors to :max_errors
Browse files Browse the repository at this point in the history
  • Loading branch information
stevecheckoway committed Aug 23, 2018
1 parent 014bb81 commit 3a0c307
Show file tree
Hide file tree
Showing 8 changed files with 32 additions and 31 deletions.
3 changes: 3 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,11 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.
### Changed
- Integrated [Gumbo parser](https://github.com/google/gumbo-parser) into
Nokogumbo. A system version will not be used.
- The undocumented (but publicly mentioned) `:max_parse_errors` renamed to `:max_errors`;
`:max_parse_errors` is deprecated and will go away

### Deprecated
- `:max_parse_errors`; use `:max_errors`

### Removed

Expand Down
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -33,11 +33,11 @@ doc = Nokogiri::HTML5.get(uri)
## Error reporting
Nokogumbo contains an experimental parse error reporting facility. By default,
no parse errors are reported but this can be configured by passing the
`:max_parse_errors` option to `::parse` or `::fragment`.
`:max_errors` option to `::parse` or `::fragment`.

```ruby
require 'nokogumbo'
doc = Nokogiri::HTML5.parse('Hi there!<body>', max_parse_errors: 10)
doc = Nokogiri::HTML5.parse('Hi there!<body>', max_errors: 10)
doc.errors.each do |err|
puts err
end
Expand Down
4 changes: 2 additions & 2 deletions ext/nokogumbo/nokogumbo.c
Original file line number Diff line number Diff line change
Expand Up @@ -242,9 +242,9 @@ static xmlNodePtr walk_tree(xmlDocPtr document, GumboNode *node) {
}

// Parse a string using gumbo_parse into a Nokogiri document
static VALUE parse(VALUE self, VALUE string, VALUE max_parse_errors) {
static VALUE parse(VALUE self, VALUE string, VALUE max_errors) {
GumboOptions options = kGumboDefaultOptions;
options.max_errors = NUM2INT(max_parse_errors);
options.max_errors = NUM2INT(max_errors);

const char *input = RSTRING_PTR(string);
size_t input_len = RSTRING_LEN(string);
Expand Down
6 changes: 4 additions & 2 deletions lib/nokogumbo.rb
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,8 @@ module HTML5
# may also be an IO-like object. Returns a +Nokogiri::HTML::Document+.
def self.parse(string, options={})
string = read_and_encode(string)
document = Nokogumbo.parse(string.to_s, options[:max_parse_errors] || 0)
max_errors = options[:max_errors] || options[:max_parse_errors] || 0
document = Nokogumbo.parse(string.to_s, max_errors)
document.encoding = 'UTF-8'
document
end
Expand Down Expand Up @@ -95,7 +96,8 @@ def self.fragment(tags, options = {})
else
path = "/html/body/node()"
end
temp_doc = Nokogumbo.parse("<!DOCTYPE html><html><body>#{tags}", options[:max_parse_errors] || 0)
max_errors = options[:max_errors] || options[:max_parse_errors] || 0
temp_doc = Nokogumbo.parse("<!DOCTYPE html><html><body>#{tags}", max_errors)
temp_doc.xpath(path).each { |child| child.parent = frag }
frag.errors = temp_doc.errors
frag
Expand Down
2 changes: 1 addition & 1 deletion test/test_encoding.rb
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ def test_charset_sniff_to_html
</body>
</html>
EOF
doc = Nokogiri::HTML5(html, max_parse_errors: 10)
doc = Nokogiri::HTML5(html, max_errors: 10)
assert_equal 0, doc.errors.length
refute_equal '', doc.to_html
end
Expand Down
28 changes: 17 additions & 11 deletions test/test_nokogumbo.rb
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,13 @@ def test_html5_doctype
end

def test_fragment_no_errors
doc = Nokogiri::HTML5.fragment("no missing DOCTYPE errors", max_parse_errors: 10)
doc = Nokogiri::HTML5.fragment("no missing DOCTYPE errors", max_errors: 10)
assert_equal 0, doc.errors.length
end

# This should be deleted when `:max_parse_errors` is removed.
def test_fragment_max_parse_errors
doc = Nokogiri::HTML5.fragment("testing deprecated :max_parse_errors", max_parse_errors: 10)
assert_equal 0, doc.errors.length
end

Expand Down Expand Up @@ -99,28 +105,28 @@ def test_root_comments
end

def test_parse_errors
doc = Nokogiri::HTML5("<!DOCTYPE html><html><!-- -- --></a>", max_parse_errors: 10)
doc = Nokogiri::HTML5("<!DOCTYPE html><html><!-- -- --></a>", max_errors: 10)
assert_equal doc.errors.length, 2
doc = Nokogiri::HTML5("<!DOCTYPE html><html>", max_parse_errors: 10)
doc = Nokogiri::HTML5("<!DOCTYPE html><html>", max_errors: 10)
assert_empty doc.errors
end

def test_max_parse_errors
def test_max_errors
# This document contains 2 parse errors, but we force limit to 1.
doc = Nokogiri::HTML5("<!DOCTYPE html><html><!-- -- --></a>", max_parse_errors: 1)
doc = Nokogiri::HTML5("<!DOCTYPE html><html><!-- -- --></a>", max_errors: 1)
assert_equal 1, doc.errors.length
doc = Nokogiri::HTML5("<!DOCTYPE html><html>", max_parse_errors: 1)
doc = Nokogiri::HTML5("<!DOCTYPE html><html>", max_errors: 1)
assert_empty doc.errors
end

def test_default_max_parse_errors
def test_default_max_errors
# This document contains 200 parse errors, but default limit is 0.
doc = Nokogiri::HTML5("<!DOCTYPE html><html>" + "</p>" * 200)
assert_equal 0, doc.errors.length
end

def test_parse_fragment_errors
doc = Nokogiri::HTML5.fragment("<\r\n", max_parse_errors: 10)
doc = Nokogiri::HTML5.fragment("<\r\n", max_errors: 10)
refute_empty doc.errors
end

Expand All @@ -140,13 +146,13 @@ def test_document_encoding
assert_equal "Кирилические символы", doc.at('body').text.gsub(/\n\s+/,'')
end

def test_fragment_max_parse_errors
def test_fragment_max_errors
# This fragment contains 3 parse errors, but we force limit to 1.
doc = Nokogiri::HTML5.fragment("<!-- -- --></a>", max_parse_errors: 1)
doc = Nokogiri::HTML5.fragment("<!-- -- --></a>", max_errors: 1)
assert_equal 1, doc.errors.length
end

def test_fragment_default_max_parse_errors
def test_fragment_default_max_errors
# This fragment contains 201 parse errors, but default limit is 0.
doc = Nokogiri::HTML5.fragment("</p>" * 200)
assert_equal 0, doc.errors.length
Expand Down
4 changes: 2 additions & 2 deletions test/test_null.rb
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

class TestNull < Minitest::Test
def fragment(s)
Nokogiri::HTML5.fragment(s, max_parse_errors: 10)
Nokogiri::HTML5.fragment(s, max_errors: 10)
end

def test_null_char_ref
Expand Down Expand Up @@ -83,7 +83,7 @@ def test_comment_state
def test_doctype_name_states
# There are two missing here for double quoted PUBLIC and SYSTEM values.
doc = Nokogiri::HTML5.parse("<!DOCTYPE \u0000\u0000 PUBLIC '\u0000' SYSTEM '\u0000' \u0000>",
max_parse_errors: 10)
max_errors: 10)
# 12.2.5.54 Before DOCTYPE name state: unexpected-null-character parse
# error
# 12.2.5.55 DOCTYPE name state: unexpected-null-character parse error
Expand Down
12 changes: 1 addition & 11 deletions test/test_tree-construction.rb
Original file line number Diff line number Diff line change
Expand Up @@ -2,16 +2,6 @@
require 'nokogumbo'
require 'minitest/autorun'

# class TestTreeConstructionBase < Minitest::Test
# def fragment(s)
# Nokogiri::HTML5.fragment(s, context, max_parse_errors: 100)
# end
#
# def parse(s)
# Nokogiri::HTML5.parse(s, max_parse_errors: 100)
# end
# end

def parse_test(test_data)
test = { script: :both }
#index = test_data.start_with?("#errors\n") ? 0 : test_data.index("\n#errors\n")
Expand Down Expand Up @@ -185,7 +175,7 @@ def compare_nodes(node, ng_node)
def run_test
skip "Scripting tests not supported" if @test[:script] == :on
skip "Fragment tests not supported" unless @test[:context].nil?
doc = Nokogiri::HTML5.parse(@test[:data], max_parse_errors: @test[:errors].length + 1)
doc = Nokogiri::HTML5.parse(@test[:data], max_errors: @test[:errors].length + 1)
# assert_equal doc.errors.length, @test[:errors].length

# Walk the tree.
Expand Down

0 comments on commit 3a0c307

Please sign in to comment.