Skip to content
This repository has been archived by the owner on Aug 26, 2023. It is now read-only.

Rename :max_parse_errors to :max_errors #88

Merged
merged 1 commit into from
Aug 23, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,11 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.
### Changed
- Integrated [Gumbo parser](https://github.com/google/gumbo-parser) into
Nokogumbo. A system version will not be used.
- The undocumented (but publicly mentioned) `:max_parse_errors` renamed to `:max_errors`;
`:max_parse_errors` is deprecated and will go away

### Deprecated
- `:max_parse_errors`; use `:max_errors`

### Removed

Expand Down
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -33,11 +33,11 @@ doc = Nokogiri::HTML5.get(uri)
## Error reporting
Nokogumbo contains an experimental parse error reporting facility. By default,
no parse errors are reported but this can be configured by passing the
`:max_parse_errors` option to `::parse` or `::fragment`.
`:max_errors` option to `::parse` or `::fragment`.

```ruby
require 'nokogumbo'
doc = Nokogiri::HTML5.parse('Hi there!<body>', max_parse_errors: 10)
doc = Nokogiri::HTML5.parse('Hi there!<body>', max_errors: 10)
doc.errors.each do |err|
puts err
end
Expand Down
4 changes: 2 additions & 2 deletions ext/nokogumbo/nokogumbo.c
Original file line number Diff line number Diff line change
Expand Up @@ -242,9 +242,9 @@ static xmlNodePtr walk_tree(xmlDocPtr document, GumboNode *node) {
}

// Parse a string using gumbo_parse into a Nokogiri document
static VALUE parse(VALUE self, VALUE string, VALUE max_parse_errors) {
static VALUE parse(VALUE self, VALUE string, VALUE max_errors) {
GumboOptions options = kGumboDefaultOptions;
options.max_errors = NUM2INT(max_parse_errors);
options.max_errors = NUM2INT(max_errors);

const char *input = RSTRING_PTR(string);
size_t input_len = RSTRING_LEN(string);
Expand Down
6 changes: 4 additions & 2 deletions lib/nokogumbo.rb
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,8 @@ module HTML5
# may also be an IO-like object. Returns a +Nokogiri::HTML::Document+.
def self.parse(string, options={})
string = read_and_encode(string)
document = Nokogumbo.parse(string.to_s, options[:max_parse_errors] || 0)
max_errors = options[:max_errors] || options[:max_parse_errors] || 0
document = Nokogumbo.parse(string.to_s, max_errors)
document.encoding = 'UTF-8'
document
end
Expand Down Expand Up @@ -95,7 +96,8 @@ def self.fragment(tags, options = {})
else
path = "/html/body/node()"
end
temp_doc = Nokogumbo.parse("<!DOCTYPE html><html><body>#{tags}", options[:max_parse_errors] || 0)
max_errors = options[:max_errors] || options[:max_parse_errors] || 0
temp_doc = Nokogumbo.parse("<!DOCTYPE html><html><body>#{tags}", max_errors)
temp_doc.xpath(path).each { |child| child.parent = frag }
frag.errors = temp_doc.errors
frag
Expand Down
2 changes: 1 addition & 1 deletion test/test_encoding.rb
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ def test_charset_sniff_to_html
</body>
</html>
EOF
doc = Nokogiri::HTML5(html, max_parse_errors: 10)
doc = Nokogiri::HTML5(html, max_errors: 10)
assert_equal 0, doc.errors.length
refute_equal '', doc.to_html
end
Expand Down
28 changes: 17 additions & 11 deletions test/test_nokogumbo.rb
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,13 @@ def test_html5_doctype
end

def test_fragment_no_errors
doc = Nokogiri::HTML5.fragment("no missing DOCTYPE errors", max_parse_errors: 10)
doc = Nokogiri::HTML5.fragment("no missing DOCTYPE errors", max_errors: 10)
assert_equal 0, doc.errors.length
end

# This should be deleted when `:max_parse_errors` is removed.
def test_fragment_max_parse_errors
doc = Nokogiri::HTML5.fragment("testing deprecated :max_parse_errors", max_parse_errors: 10)
assert_equal 0, doc.errors.length
end

Expand Down Expand Up @@ -99,28 +105,28 @@ def test_root_comments
end

def test_parse_errors
doc = Nokogiri::HTML5("<!DOCTYPE html><html><!-- -- --></a>", max_parse_errors: 10)
doc = Nokogiri::HTML5("<!DOCTYPE html><html><!-- -- --></a>", max_errors: 10)
assert_equal doc.errors.length, 2
doc = Nokogiri::HTML5("<!DOCTYPE html><html>", max_parse_errors: 10)
doc = Nokogiri::HTML5("<!DOCTYPE html><html>", max_errors: 10)
assert_empty doc.errors
end

def test_max_parse_errors
def test_max_errors
# This document contains 2 parse errors, but we force limit to 1.
doc = Nokogiri::HTML5("<!DOCTYPE html><html><!-- -- --></a>", max_parse_errors: 1)
doc = Nokogiri::HTML5("<!DOCTYPE html><html><!-- -- --></a>", max_errors: 1)
assert_equal 1, doc.errors.length
doc = Nokogiri::HTML5("<!DOCTYPE html><html>", max_parse_errors: 1)
doc = Nokogiri::HTML5("<!DOCTYPE html><html>", max_errors: 1)
assert_empty doc.errors
end

def test_default_max_parse_errors
def test_default_max_errors
# This document contains 200 parse errors, but default limit is 0.
doc = Nokogiri::HTML5("<!DOCTYPE html><html>" + "</p>" * 200)
assert_equal 0, doc.errors.length
end

def test_parse_fragment_errors
doc = Nokogiri::HTML5.fragment("<\r\n", max_parse_errors: 10)
doc = Nokogiri::HTML5.fragment("<\r\n", max_errors: 10)
refute_empty doc.errors
end

Expand All @@ -140,13 +146,13 @@ def test_document_encoding
assert_equal "Кирилические символы", doc.at('body').text.gsub(/\n\s+/,'')
end

def test_fragment_max_parse_errors
def test_fragment_max_errors
# This fragment contains 3 parse errors, but we force limit to 1.
doc = Nokogiri::HTML5.fragment("<!-- -- --></a>", max_parse_errors: 1)
doc = Nokogiri::HTML5.fragment("<!-- -- --></a>", max_errors: 1)
assert_equal 1, doc.errors.length
end

def test_fragment_default_max_parse_errors
def test_fragment_default_max_errors
# This fragment contains 201 parse errors, but default limit is 0.
doc = Nokogiri::HTML5.fragment("</p>" * 200)
assert_equal 0, doc.errors.length
Expand Down
4 changes: 2 additions & 2 deletions test/test_null.rb
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

class TestNull < Minitest::Test
def fragment(s)
Nokogiri::HTML5.fragment(s, max_parse_errors: 10)
Nokogiri::HTML5.fragment(s, max_errors: 10)
end

def test_null_char_ref
Expand Down Expand Up @@ -83,7 +83,7 @@ def test_comment_state
def test_doctype_name_states
# There are two missing here for double quoted PUBLIC and SYSTEM values.
doc = Nokogiri::HTML5.parse("<!DOCTYPE \u0000\u0000 PUBLIC '\u0000' SYSTEM '\u0000' \u0000>",
max_parse_errors: 10)
max_errors: 10)
# 12.2.5.54 Before DOCTYPE name state: unexpected-null-character parse
# error
# 12.2.5.55 DOCTYPE name state: unexpected-null-character parse error
Expand Down
12 changes: 1 addition & 11 deletions test/test_tree-construction.rb
Original file line number Diff line number Diff line change
Expand Up @@ -2,16 +2,6 @@
require 'nokogumbo'
require 'minitest/autorun'

# class TestTreeConstructionBase < Minitest::Test
# def fragment(s)
# Nokogiri::HTML5.fragment(s, context, max_parse_errors: 100)
# end
#
# def parse(s)
# Nokogiri::HTML5.parse(s, max_parse_errors: 100)
# end
# end

def parse_test(test_data)
test = { script: :both }
#index = test_data.start_with?("#errors\n") ? 0 : test_data.index("\n#errors\n")
Expand Down Expand Up @@ -185,7 +175,7 @@ def compare_nodes(node, ng_node)
def run_test
skip "Scripting tests not supported" if @test[:script] == :on
skip "Fragment tests not supported" unless @test[:context].nil?
doc = Nokogiri::HTML5.parse(@test[:data], max_parse_errors: @test[:errors].length + 1)
doc = Nokogiri::HTML5.parse(@test[:data], max_errors: @test[:errors].length + 1)
# assert_equal doc.errors.length, @test[:errors].length

# Walk the tree.
Expand Down