diff --git a/Gemfile b/Gemfile
index d65e2a66..17a80bc2 100644
--- a/Gemfile
+++ b/Gemfile
@@ -1,3 +1,5 @@
-source 'http://rubygems.org'
+source 'https://rubygems.org'
+
+gem 'nokogumbo', git: 'https://github.com/rubys/nokogumbo'
gemspec
diff --git a/README.md b/README.md
index 58f27899..e46c58ce 100644
--- a/README.md
+++ b/README.md
@@ -72,7 +72,7 @@ Below is mostly comprehensive list of checks that HTMLProofer can perform.
### HTML
-* Whether your HTML markup is valid. This is done via [Nokogiri](http://www.nokogiri.org/tutorials/ensuring_well_formed_markup.html) to ensure well-formed markup.
+* Whether your HTML markup is valid. This is done via [Nokogumbo](https://github.com/rubys/nokogumbo) to validate well-formed HTML5 markup.
## Usage
@@ -215,7 +215,7 @@ htmlproofer --assume-extension ./_site
### Using through Docker
-If you have trouble with (or don't want to) install Ruby/Nokogiri, the command-line tool can be run through Docker. See [html-proofer-docker](https://github.com/18F/html-proofer-docker) for more information.
+If you have trouble with (or don't want to) install Ruby/Nokogumbo, the command-line tool can be run through Docker. See [html-proofer-docker](https://github.com/18F/html-proofer-docker) for more information.
## Ignoring content
@@ -245,7 +245,7 @@ The `HTMLProofer` constructor takes an optional hash of additional options:
| `check_external_hash` | Checks whether external hashes exist (even if the webpage exists). This slows the checker down. | `false` |
| `check_favicon` | Enables the favicon checker. | `false` |
| `check_opengraph` | Enables the Open Graph checker. | `false` |
-| `check_html` | Enables HTML validation errors from Nokogiri | `false` |
+| `check_html` | Enables HTML validation errors from Nokogumbo | `false` |
| `check_img_http` | Fails an image if it's marked as `http` | `false` |
|`checks_to_ignore`| An array of Strings indicating which checks you'd like to not perform. | `[]`
| `directory_index_file` | Sets the file to look for when a link refers to a directory. | `index.html` |
@@ -275,13 +275,13 @@ See below for more information.
### Configuring HTML validation rules
-If `check_html` is `true`, Nokogiri performs additional validation on your HTML.
+If `check_html` is `true`, Nokogumbo performs additional validation on your HTML.
You can pass in additional options to configure this validation.
| Option | Description | Default |
| :----- | :---------- | :------ |
-| `report_invalid_tags` | When `check_html` is enabled, HTML markup that is unknown to Nokogiri are reported as errors. | `false`
+| `report_invalid_tags` | When `check_html` is enabled, HTML markup that is unknown to Nokogumbo are reported as errors. | `false`
| `report_missing_names` | When `check_html` is enabled, HTML markup that are missing entity names are reported as errors. | `false`
| `report_script_embeds` | When `check_html` is enabled, `script` tags containing markup [are reported as errors](http://git.io/vOovv). Enabling this option ignores those errors. | `false`
diff --git a/bin/htmlproofer b/bin/htmlproofer
index 8dd2b077..a1779ada 100755
--- a/bin/htmlproofer
+++ b/bin/htmlproofer
@@ -21,7 +21,7 @@ Mercenary.program(:htmlproofer) do |p|
p.option 'checks_to_ignore', '--checks-to-ignore check1,[check2,...]', Array, ' An array of Strings indicating which checks you\'d like to not perform.'
p.option 'check_external_hash', '--check-external-hash', 'Checks whether external hashes exist (even if the webpage exists). This slows the checker down (default: `false`).'
p.option 'check_favicon', '--check-favicon', 'Enables the favicon checker (default: `false`).'
- p.option 'check_html', '--check-html', 'Enables HTML validation errors from Nokogiri (default: `false`).'
+ p.option 'check_html', '--check-html', 'Enables HTML validation errors from Nokogumbo (default: `false`).'
p.option 'check_img_http', '--check-img-http', 'Fails an image if it\'s marked as `http` (default: `false`).'
p.option 'check_opengraph', '--check-opengraph', 'Enables the Open Graph checker (default: `false`).'
p.option 'check_sri', '--check-sri', 'Check that `` and `')
+ nokogiri = Nokogiri::HTML5('')
checkable = HTMLProofer::Element.new(nokogiri.css('script').first, @check)
expect(checkable.ignores_pattern_check([%r{\/assets\/.*(js|css|png|svg)}])).to eq true
end
it 'works for string patterns' do
- nokogiri = Nokogiri::HTML('')
+ nokogiri = Nokogiri::HTML5('')
checkable = HTMLProofer::Element.new(nokogiri.css('script').first, @check)
expect(checkable.ignores_pattern_check(['/assets/main.js'])).to eq true
end
@@ -41,7 +41,7 @@
describe '#url' do
it 'works for src attributes' do
- nokogiri = Nokogiri::HTML('')
+ nokogiri = Nokogiri::HTML5('')
checkable = HTMLProofer::Element.new(nokogiri.css('img').first, @check)
expect(checkable.url).to eq 'image.png'
end
@@ -49,7 +49,7 @@
describe '#ignore' do
it 'works for twitter cards' do
- nokogiri = Nokogiri::HTML('')
+ nokogiri = Nokogiri::HTML5('')
checkable = HTMLProofer::Element.new(nokogiri.css('meta').first, @check)
expect(checkable.ignore?).to eq true
end
diff --git a/spec/html-proofer/fixtures/html/weird_onclick.html b/spec/html-proofer/fixtures/html/weird_onclick.html
index 3ca33457..ee58f094 100644
--- a/spec/html-proofer/fixtures/html/weird_onclick.html
+++ b/spec/html-proofer/fixtures/html/weird_onclick.html
@@ -1 +1 @@
-
+
diff --git a/spec/html-proofer/html_spec.rb b/spec/html-proofer/html_spec.rb
index 1b4ae9dc..00b2c207 100644
--- a/spec/html-proofer/html_spec.rb
+++ b/spec/html-proofer/html_spec.rb
@@ -19,40 +19,40 @@
expect(proofer.failed_tests).to eq []
end
- it 'fails for an invalid tag' do
+ it 'works for custom tags' do
html = "#{FIXTURES_DIR}/html/invalid_tag.html"
proofer = run_proofer(html, :file, check_html: true, validation: { report_invalid_tags: true })
- expect(proofer.failed_tests.first).to match(/Tag myfancytag invalid \(line 2\)/)
+ expect(proofer.failed_tests).to eq []
end
- it 'fails for an unmatched end tag' do
+ it 'allows an unmatched end tag' do
html = "#{FIXTURES_DIR}/html/unmatched_end_tag.html"
proofer = run_proofer(html, :file, check_html: true)
- expect(proofer.failed_tests.first).to match(/Unexpected end tag : div \(line 3\)/)
+ expect(proofer.failed_tests).to eq []
end
- it 'fails for an unescaped ampersand in attribute' do
+ it 'allows an unescaped ampersand in attribute' do
html = "#{FIXTURES_DIR}/html/unescaped_ampersand_in_attribute.html"
proofer = run_proofer(html, :file, check_html: true)
- expect(proofer.failed_tests.first).to match(/htmlParseEntityRef: expecting ';' \(line 2\)/)
+ expect(proofer.failed_tests).to eq []
end
- it 'fails for mismatch between opening and ending tag' do
+ it 'allows mismatch between opening and ending tag' do
html = "#{FIXTURES_DIR}/html/opening_and_ending_tag_mismatch.html"
proofer = run_proofer(html, :file, check_html: true)
- expect(proofer.failed_tests.first).to match(/Opening and ending tag mismatch: p and strong/)
+ expect(proofer.failed_tests).to eq []
end
- it 'fails for div inside head' do
+ it 'allows div inside head' do
html = "#{FIXTURES_DIR}/html/div_inside_head.html"
proofer = run_proofer(html, :file, check_html: true)
- expect(proofer.failed_tests.first).to match(/Unexpected end tag : head \(line 5\)/)
+ expect(proofer.failed_tests).to eq []
end
- it 'fails for missing closing quotation mark in href' do
+ it 'allows missing closing quotation mark in href' do
html = "#{FIXTURES_DIR}/html/missing_closing_quotes.html"
proofer = run_proofer(html, :file, check_html: true)
- expect(proofer.failed_tests.to_s).to match(/Couldn't find end of Start Tag a \(line 6\)/)
+ expect(proofer.failed_tests).to eq []
end
it "doesn't fail for single ampersand" do
@@ -61,10 +61,10 @@
expect(proofer.failed_tests).to eq []
end
- it 'fails for single ampersand when asked' do
+ it "allows single ampersand" do
html = "#{FIXTURES_DIR}/html/single_amp.html"
proofer = run_proofer(html, :file, check_html: true, validation: { report_missing_names: true })
- expect(proofer.failed_tests.first).to match('htmlParseEntityRef: no name')
+ expect(proofer.failed_tests).to eq []
end
it 'ignores embeded scripts when asked' do
@@ -78,7 +78,7 @@
opts = { check_html: true, validation: { report_script_embeds: true } }
ignorable_script = "#{FIXTURES_DIR}/html/ignore_script_embeds.html"
proofer = run_proofer(ignorable_script, :file, opts)
- expect(proofer.failed_tests.length).to eq 2
+ expect(proofer.failed_tests).to eq []
end
it 'does not fail for weird iframe sources' do
diff --git a/spec/html-proofer/links_spec.rb b/spec/html-proofer/links_spec.rb
index 4ccc5cf6..9317b9b7 100644
--- a/spec/html-proofer/links_spec.rb
+++ b/spec/html-proofer/links_spec.rb
@@ -71,10 +71,10 @@
expect(proofer.failed_tests.first).to match(%r{internally linking to .\/notreal.html, which does not exist})
end
- it 'fails for link with no href' do
+ it 'allows link with no href' do
missing_link_href_filepath = "#{FIXTURES_DIR}/links/missing_link_href.html"
proofer = run_proofer(missing_link_href_filepath, :file)
- expect(proofer.failed_tests.first).to match(/anchor has no href attribute/)
+ expect(proofer.failed_tests).to eq []
end
it 'should follow redirects' do
@@ -215,16 +215,16 @@
expect(proofer.failed_tests).to eq []
end
- it 'fails for empty href within link elements' do
+ it 'allows empty href on link elements' do
head_link = "#{FIXTURES_DIR}/links/head_link_href_empty.html"
proofer = run_proofer(head_link, :file)
- expect(proofer.failed_tests.first).to match(/anchor has no href attribute/)
+ expect(proofer.failed_tests).to eq []
end
- it 'fails for absent href within link elements' do
+ it 'allows missing href on link elements' do
head_link = "#{FIXTURES_DIR}/links/head_link_href_absent.html"
proofer = run_proofer(head_link, :file)
- expect(proofer.failed_tests.first).to match(/anchor has no href attribute/)
+ expect(proofer.failed_tests).to eq []
end
it 'fails for internal linking to a directory without trailing slash' do
@@ -328,10 +328,10 @@
expect(proofer.failed_tests).to eq []
end
- it 'fails for placeholder with empty id' do
+ it 'allows placeholder with empty id' do
empty_id = "#{FIXTURES_DIR}/links/placeholder_with_empty_id.html"
proofer = run_proofer(empty_id, :file)
- expect(proofer.failed_tests.first).to match(/anchor has no href attribute/)
+ expect(proofer.failed_tests).to eq []
end
it 'ignores non-http(s) protocols' do
@@ -493,7 +493,7 @@
missing_href = "#{FIXTURES_DIR}/links/blank_href_htmlunknown.html"
proofer = run_proofer(missing_href, :file)
- expect(proofer.failed_tests.length).to eq 1
+ expect(proofer.failed_tests).to eq []
end
it 'can skip expecting href for anchors in non-HTML5' do
@@ -580,7 +580,7 @@
it 'timeout' do
proofer = run_proofer(['https://www.google.com:81'], :links)
- expect(proofer.failed_tests.first).to match(/Couldn't connect to server/)
+ expect(proofer.failed_tests.first).to match(/time.*out/)
end
it 'correctly handles empty href' do
diff --git a/spec/html-proofer/scripts_spec.rb b/spec/html-proofer/scripts_spec.rb
index db8a0b91..28aeb1f4 100644
--- a/spec/html-proofer/scripts_spec.rb
+++ b/spec/html-proofer/scripts_spec.rb
@@ -16,7 +16,8 @@
it 'fails for missing internal src' do
file = "#{FIXTURES_DIR}/scripts/script_missing_internal.html"
proofer = run_proofer(file, :file)
- expect(proofer.failed_tests.first).to match(/doesnotexist.js does not exist \(line 5\)/)
+ expect(proofer.failed_tests.length).to eq 1
+ expect(proofer.failed_tests.first).to include('spec/html-proofer/fixtures/scripts/script_missing_internal.html: internal script doesnotexist.js does not exist (line 5)')
end
it 'works for present content' do