diff --git a/.travis.yml b/.travis.yml index 03afed4d..d0a0dfda 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,6 +1,5 @@ language: ruby rvm: - - 2.3.6 - 2.4.3 - 2.5.0 - 2.6.0 diff --git a/README.md b/README.md index a5164965..156d0193 100644 --- a/README.md +++ b/README.md @@ -321,7 +321,9 @@ You can pass in additional options to configure this validation. | Option | Description | Default | | :----- | :---------- | :------ | +| `report_eof_tags` | When `check_html` is enabled, HTML markup with mismatched tags are reported as errors | `false` | `report_invalid_tags` | When `check_html` is enabled, HTML markup that is unknown to Nokogumbo are reported as errors. | `false` +| `report_mismatched_tags` | When `check_html` is enabled, HTML markup with tags that are malformed are reported as errors | `false` | `report_missing_doctype` | When `check_html` is enabled, HTML markup with missing or out-of-order `DOCTYPE` are reported as errors. | `false` | `report_missing_names` | When `check_html` is enabled, HTML markup that are missing entity names are reported as errors. | `false` | `report_script_embeds` | When `check_html` is enabled, `script` tags containing markup are reported as errors. | `false` diff --git a/Rakefile b/Rakefile index 87a1fa5a..d2ce39a4 100644 --- a/Rakefile +++ b/Rakefile @@ -13,6 +13,10 @@ RuboCop::RakeTask.new(:rubocop) task default: %i[spec proof_readme] +task :test do + Rake::Task['spec'].invoke +end + task :proof_readme do require 'html-proofer' require 'redcarpet' diff --git a/bin/htmlproofer b/bin/htmlproofer index dcbc5a0c..d971ce7e 100755 --- a/bin/htmlproofer +++ b/bin/htmlproofer @@ -41,6 +41,8 @@ Mercenary.program(:htmlproofer) do |p| p.option 'report_missing_names', '--report-missing-names', 'When `check_html` is enabled, HTML markup that are missing entity names are reported as errors (default: `false`)' p.option 'report_script_embeds', '--report-script-embeds', 'When `check_html` is enabled, `script` tags containing markup are reported as errors (default: `false`)' p.option 'report_missing_doctype', '--report-missing-doctype', 'When `check_html` is enabled, HTML markup with missing or out-of-order `DOCTYPE` are reported as errors (default: `false`)' + p.option 'report_eof_tags', '--report-eof-tags', 'When `check_html` is enabled, HTML markup with tags that are malformed are reported as errors (default: `false`)' + p.option 'report_mismatched_tags', '--report-mismatched-tags', 'When `check_html` is enabled, HTML markup with mismatched tags are reported as errors (default: `false`)' p.option 'log_level', '--log-level ', String, 'Sets the logging level, as determined by Yell. One of `:debug`, `:info`, `:warn`, `:error`, or `:fatal`. (default: `:info`)' p.option 'only_4xx', '--only-4xx', 'Only reports errors for links that fall within the 4xx status code range' p.option 'storage_dir', '--storage-dir PATH', String, 'Directory where to store the cache log (default: "tmp/.htmlproofer")' @@ -82,6 +84,8 @@ Mercenary.program(:htmlproofer) do |p| options[:validation][:report_missing_names] = opts['report_missing_names'] unless opts['report_missing_names'].nil? options[:validation][:report_invalid_tags] = opts['report_invalid_tags'] unless opts['report_invalid_tags'].nil? options[:validation][:report_missing_doctype] = opts['report_missing_doctype'] unless opts['report_missing_doctype'].nil? + options[:validation][:report_eof_tags] = opts['report_eof_tags'] unless opts['report_eof_tags'].nil? + options[:validation][:report_mismatched_tags] = opts['report_mismatched_tags'] unless opts['report_mismatched_tags'].nil? options[:typhoeus] = HTMLProofer::Configuration.parse_json_option('typhoeus_config', opts['typhoeus_config']) unless opts['typhoeus_config'].nil? diff --git a/lib/html-proofer/check.rb b/lib/html-proofer/check.rb index 1104b6e4..60e9a749 100644 --- a/lib/html-proofer/check.rb +++ b/lib/html-proofer/check.rb @@ -5,10 +5,11 @@ module HTMLProofer class Check attr_reader :node, :html, :element, :src, :path, :options, :issues, :external_urls - def initialize(src, path, html, options) + def initialize(src, path, html, logger, options) @src = src @path = path @html = remove_ignored(html) + @logger = logger @options = options @issues = [] @external_urls = {} @@ -16,7 +17,7 @@ def initialize(src, path, html, options) def create_element(node) @node = node - Element.new(node, self) + Element.new(node, self, @logger) end def run diff --git a/lib/html-proofer/check/html.rb b/lib/html-proofer/check/html.rb index ea6b64c3..b6512f18 100644 --- a/lib/html-proofer/check/html.rb +++ b/lib/html-proofer/check/html.rb @@ -6,7 +6,9 @@ class HtmlCheck < ::HTMLProofer::Check INVALID_TAG_MSG = /Tag ([\w\-:]+) invalid/.freeze INVALID_PREFIX = /Namespace prefix/.freeze PARSE_ENTITY_REF = /htmlParseEntityRef: no name/.freeze - DOCTYPE_MSG = /The doctype must be the first token in the document/.freeze + DOCTYPE_MSG = /Expected a doctype token/.freeze + EOF_IN_TAG = /End of input in tag/.freeze + MISMATCHED_TAGS = /That tag isn't allowed here/.freeze def run @html.errors.each do |error| @@ -24,6 +26,10 @@ def report?(message) options[:validation][:report_missing_names] when DOCTYPE_MSG options[:validation][:report_missing_doctype] + when EOF_IN_TAG + options[:validation][:report_eof_tags] + when MISMATCHED_TAGS + options[:validation][:report_mismatched_tags] else true end diff --git a/lib/html-proofer/check/opengraph.rb b/lib/html-proofer/check/opengraph.rb index 751ce460..058c8607 100644 --- a/lib/html-proofer/check/opengraph.rb +++ b/lib/html-proofer/check/opengraph.rb @@ -3,8 +3,8 @@ class OpenGraphElement < ::HTMLProofer::Element attr_reader :src - def initialize(obj, check) - super(obj, check) + def initialize(obj, check, logger) + super(obj, check, logger) # Fake up src from the content attribute instance_variable_set('@src', @content) @@ -23,7 +23,7 @@ def empty_src? def run @html.css('meta[property="og:url"], meta[property="og:image"]').each do |m| - @opengraph = OpenGraphElement.new(m, self) + @opengraph = OpenGraphElement.new(m, self, @logger) next if @opengraph.ignore? diff --git a/lib/html-proofer/configuration.rb b/lib/html-proofer/configuration.rb index d8ccc09b..51f63e90 100644 --- a/lib/html-proofer/configuration.rb +++ b/lib/html-proofer/configuration.rb @@ -52,7 +52,9 @@ module Configuration report_script_embeds: false, report_missing_names: false, report_invalid_tags: false, - report_missing_doctype: false + report_missing_doctype: false, + report_eof_tags: false, + report_mismatched_tags: false }.freeze CACHE_DEFAULTS = {}.freeze diff --git a/lib/html-proofer/element.rb b/lib/html-proofer/element.rb index dffb6b96..661f7bc7 100644 --- a/lib/html-proofer/element.rb +++ b/lib/html-proofer/element.rb @@ -10,12 +10,18 @@ class Element attr_reader :id, :name, :alt, :href, :link, :src, :line, :data_proofer_ignore - def initialize(obj, check) + def initialize(obj, check, logger) + @logger = logger # Construct readable ivars for every element - obj.attributes.each_pair do |attribute, value| - name = attribute.tr('-:.', '_').to_s.to_sym - (class << self; self; end).send(:attr_reader, name) - instance_variable_set("@#{name}", value.value) + begin + obj.attributes.each_pair do |attribute, value| + name = attribute.tr('-:.;', '_').to_s.to_sym + (class << self; self; end).send(:attr_reader, name) + instance_variable_set("@#{name}", value.value) + end + rescue NameError => e + @logger.log :error, "Attribute set `#{obj}` contains an error!" + raise e end @aria_hidden = defined?(@aria_hidden) && @aria_hidden == 'true' ? true : false diff --git a/lib/html-proofer/middleware.rb b/lib/html-proofer/middleware.rb index 5339d547..64f625e8 100644 --- a/lib/html-proofer/middleware.rb +++ b/lib/html-proofer/middleware.rb @@ -21,7 +21,8 @@ def self.options allow_hash_href: true, check_external_hash: true, check_html: true, - url_ignore: [/.*/] # Don't try to check local files exist + url_ignore: [/.*/], # Don't try to check if local files exist + validation: { report_eof_tags: true } } end diff --git a/lib/html-proofer/runner.rb b/lib/html-proofer/runner.rb index 1d7f2b9d..1ca2b015 100644 --- a/lib/html-proofer/runner.rb +++ b/lib/html-proofer/runner.rb @@ -100,7 +100,7 @@ def check_parsed(html, path) @src.each do |src| checks.each do |klass| @logger.log :debug, "Checking #{klass.to_s.downcase} on #{path} ..." - check = Object.const_get(klass).new(src, path, html, @options) + check = Object.const_get(klass).new(src, path, html, @logger, @options) check.run external_urls = check.external_urls external_urls = Hash[check.external_urls.map { |url, file| [swap(url, @options[:url_swap]), file] }] if @options[:url_swap] @@ -147,6 +147,8 @@ def ignore_file?(file) def checks return @checks if defined?(@checks) && !@checks.nil? + return (@checks = ['LinkCheck']) if @type == :links + @checks = HTMLProofer::Check.subchecks.map(&:name) @checks.delete('FaviconCheck') unless @options[:check_favicon] @checks.delete('HtmlCheck') unless @options[:check_html] diff --git a/lib/html-proofer/utils.rb b/lib/html-proofer/utils.rb index 523df91a..3733c529 100644 --- a/lib/html-proofer/utils.rb +++ b/lib/html-proofer/utils.rb @@ -15,7 +15,7 @@ def create_nokogiri(path) path end - Nokogiri::HTML5(content) + Nokogiri::HTML5(content, max_errors: -1) end def swap(href, replacement) diff --git a/lib/html-proofer/version.rb b/lib/html-proofer/version.rb index f1e492a2..57de6780 100644 --- a/lib/html-proofer/version.rb +++ b/lib/html-proofer/version.rb @@ -1,5 +1,5 @@ # frozen_string_literal: true module HTMLProofer - VERSION = '3.15.1' + VERSION = '3.15.2' end diff --git a/spec/html-proofer/command_spec.rb b/spec/html-proofer/command_spec.rb index 0eafddde..d61059c7 100644 --- a/spec/html-proofer/command_spec.rb +++ b/spec/html-proofer/command_spec.rb @@ -89,9 +89,9 @@ end it 'works with check-html' do - broken = "#{FIXTURES_DIR}/html/unmatched_end_tag.html" - output = make_bin('--check-html --report-invalid-tags', broken) - expect(output).to match('HTML-Proofer finished successfully') + broken = "#{FIXTURES_DIR}/html/missing_closing_quotes.html" + output = make_bin('--check-html --report-eof-tags', broken) + expect(output).to match('1 failure') end it 'works with empty-alt-ignore' do diff --git a/spec/html-proofer/element_spec.rb b/spec/html-proofer/element_spec.rb index 92fa5007..c5ce7013 100644 --- a/spec/html-proofer/element_spec.rb +++ b/spec/html-proofer/element_spec.rb @@ -4,25 +4,25 @@ describe HTMLProofer::Element do before(:each) do - @check = HTMLProofer::Check.new('', '', Nokogiri::HTML5(''), HTMLProofer::Configuration::PROOFER_DEFAULTS) + @check = HTMLProofer::Check.new('', '', Nokogiri::HTML5(''), nil, HTMLProofer::Configuration::PROOFER_DEFAULTS) end describe '#initialize' do it 'accepts the xmlns attribute' do nokogiri = Nokogiri::HTML5('Creative Commons') - checkable = HTMLProofer::Element.new(nokogiri.css('a').first, @check) + checkable = HTMLProofer::Element.new(nokogiri.css('a').first, @check, nil) expect(checkable.instance_variable_get(:@xmlns_cc)).to eq 'http://creativecommons.org/ns#' end it 'assignes the text node' do nokogiri = Nokogiri::HTML5('

One') - checkable = HTMLProofer::Element.new(nokogiri.css('p').first, @check) + checkable = HTMLProofer::Element.new(nokogiri.css('p').first, @check, nil) expect(checkable.instance_variable_get(:@text)).to eq 'One' end it 'accepts the content attribute' do nokogiri = Nokogiri::HTML5('') - checkable = HTMLProofer::Element.new(nokogiri.css('meta').first, @check) + checkable = HTMLProofer::Element.new(nokogiri.css('meta').first, @check, nil) expect(checkable.instance_variable_get(:@content)).to eq 'summary' end end @@ -30,13 +30,13 @@ describe '#ignores_pattern_check' do it 'works for regex patterns' do nokogiri = Nokogiri::HTML5('') - checkable = HTMLProofer::Element.new(nokogiri.css('script').first, @check) + checkable = HTMLProofer::Element.new(nokogiri.css('script').first, @check, nil) expect(checkable.ignores_pattern_check([%r{\/assets\/.*(js|css|png|svg)}])).to eq true end it 'works for string patterns' do nokogiri = Nokogiri::HTML5('') - checkable = HTMLProofer::Element.new(nokogiri.css('script').first, @check) + checkable = HTMLProofer::Element.new(nokogiri.css('script').first, @check, nil) expect(checkable.ignores_pattern_check(['/assets/main.js'])).to eq true end end @@ -44,7 +44,7 @@ describe '#url' do it 'works for src attributes' do nokogiri = Nokogiri::HTML5('') - checkable = HTMLProofer::Element.new(nokogiri.css('img').first, @check) + checkable = HTMLProofer::Element.new(nokogiri.css('img').first, @check, nil) expect(checkable.url).to eq 'image.png' end end @@ -52,7 +52,7 @@ describe '#ignore' do it 'works for twitter cards' do nokogiri = Nokogiri::HTML5('') - checkable = HTMLProofer::Element.new(nokogiri.css('meta').first, @check) + checkable = HTMLProofer::Element.new(nokogiri.css('meta').first, @check, nil) expect(checkable.ignore?).to eq true end end diff --git a/spec/html-proofer/fixtures/html/parse_failure.html b/spec/html-proofer/fixtures/html/parse_failure.html new file mode 100644 index 00000000..0b3330aa --- /dev/null +++ b/spec/html-proofer/fixtures/html/parse_failure.html @@ -0,0 +1 @@ + diff --git a/spec/html-proofer/fixtures/images/semicolon.html b/spec/html-proofer/fixtures/images/semicolon.html new file mode 100644 index 00000000..710604d7 --- /dev/null +++ b/spec/html-proofer/fixtures/images/semicolon.html @@ -0,0 +1,9 @@ +Frank Blank
diff --git a/spec/html-proofer/html_spec.rb b/spec/html-proofer/html_spec.rb index 76cbe909..027598d1 100644 --- a/spec/html-proofer/html_spec.rb +++ b/spec/html-proofer/html_spec.rb @@ -124,4 +124,11 @@ proofer = run_proofer(file, :file, opts) expect(proofer.failed_tests).to eq [] end + + it 'reports failures' do + opts = { check_html: true, validation: { report_mismatched_tags: true } } + file = "#{FIXTURES_DIR}/html/parse_failure.html" + proofer = run_proofer(file, :file, opts) + expect(proofer.failed_tests.first).to match(/ERROR: That tag isn't allowed here/) + end end diff --git a/spec/html-proofer/images_spec.rb b/spec/html-proofer/images_spec.rb index a8c9f8a7..50511837 100644 --- a/spec/html-proofer/images_spec.rb +++ b/spec/html-proofer/images_spec.rb @@ -201,4 +201,10 @@ proofer = run_proofer(relative_images, :file) expect(proofer.failed_tests).to eq [] end + + it 'ignores semicolon outside attribute name' do + relative_images = "#{FIXTURES_DIR}/images/semicolon.html" + proofer = run_proofer(relative_images, :file) + expect(proofer.failed_tests).to eq [] + end end