diff --git a/html-proofer.gemspec b/html-proofer.gemspec index 33d29bd0..0c3cff3f 100644 --- a/html-proofer.gemspec +++ b/html-proofer.gemspec @@ -27,6 +27,7 @@ Gem::Specification.new do |spec| spec.add_dependency("addressable", "~> 2.3") spec.add_dependency("async", "~> 2.1") spec.add_dependency("nokogiri", "~> 1.13") + spec.add_dependency("pdf-reader", "~> 2.11") spec.add_dependency("rainbow", "~> 3.0") spec.add_dependency("typhoeus", "~> 1.3") spec.add_dependency("yell", "~> 2.0") diff --git a/lib/html_proofer/attribute/url.rb b/lib/html_proofer/attribute/url.rb index 5c2f73f3..b554c90e 100644 --- a/lib/html_proofer/attribute/url.rb +++ b/lib/html_proofer/attribute/url.rb @@ -141,12 +141,16 @@ def file_path # either overwrite with root_dir; or, if source is directory, use that; or, just get the current file's dirname @runner.options[:root_dir] || (File.directory?(@runner.current_source) ? @runner.current_source : File.dirname(@runner.current_source)) # relative links, path is a file - elsif File.exist?(File.expand_path(path, - @runner.current_source)) || File.exist?(File.expand_path(path_dot_ext, @runner.current_source)) + elsif File.exist?(File.expand_path( + path, + @runner.current_source, + )) || File.exist?(File.expand_path(path_dot_ext, @runner.current_source)) File.dirname(@runner.current_filename) # relative links in nested dir, path is a file - elsif File.exist?(File.join(File.dirname(@runner.current_filename), - path)) || File.exist?(File.join(File.dirname(@runner.current_filename), path_dot_ext)) + elsif File.exist?(File.join( + File.dirname(@runner.current_filename), + path, + )) || File.exist?(File.join(File.dirname(@runner.current_filename), path_dot_ext)) File.dirname(@runner.current_filename) # relative link, path is a directory else diff --git a/lib/html_proofer/check.rb b/lib/html_proofer/check.rb index ac89b06d..32e40a5f 100644 --- a/lib/html_proofer/check.rb +++ b/lib/html_proofer/check.rb @@ -25,8 +25,14 @@ def run end def add_failure(description, line: nil, status: nil, content: nil) - @failures << Failure.new(@runner.current_filename, short_name, description, line: line, status: status, - content: content) + @failures << Failure.new( + @runner.current_filename, + short_name, + description, + line: line, + status: status, + content: content, + ) end def short_name diff --git a/lib/html_proofer/check/favicon.rb b/lib/html_proofer/check/favicon.rb index cf11909e..0c22d34d 100644 --- a/lib/html_proofer/check/favicon.rb +++ b/lib/html_proofer/check/favicon.rb @@ -17,13 +17,19 @@ def run if found if @favicon.url.protocol_relative? - add_failure("favicon link #{@favicon.url} is a protocol-relative URL, use explicit https:// instead", - line: @favicon.line, content: @favicon.content) + add_failure( + "favicon link #{@favicon.url} is a protocol-relative URL, use explicit https:// instead", + line: @favicon.line, + content: @favicon.content, + ) elsif @favicon.url.remote? add_to_external_urls(@favicon.url, @favicon.line) elsif !@favicon.url.exists? - add_failure("internal favicon #{@favicon.url.raw_attribute} does not exist", line: @favicon.line, - content: @favicon.content) + add_failure( + "internal favicon #{@favicon.url.raw_attribute} does not exist", + line: @favicon.line, + content: @favicon.content, + ) end else add_failure("no favicon provided") diff --git a/lib/html_proofer/check/images.rb b/lib/html_proofer/check/images.rb index 82e74744..b5b34aab 100644 --- a/lib/html_proofer/check/images.rb +++ b/lib/html_proofer/check/images.rb @@ -12,27 +12,39 @@ def run next if @img.ignore? # screenshot filenames should return because of terrible names - add_failure("image has a terrible filename (#{@img.url.raw_attribute})", line: @img.line, - content: @img.content) if terrible_filename? + add_failure( + "image has a terrible filename (#{@img.url.raw_attribute})", + line: @img.line, + content: @img.content, + ) if terrible_filename? # does the image exist? if missing_src? add_failure("image has no src or srcset attribute", line: @img.line, content: @img.content) elsif @img.url.protocol_relative? - add_failure("image link #{@img.url} is a protocol-relative URL, use explicit https:// instead", - line: @img.line, content: @img.content) + add_failure( + "image link #{@img.url} is a protocol-relative URL, use explicit https:// instead", + line: @img.line, + content: @img.content, + ) elsif @img.url.remote? add_to_external_urls(@img.url, @img.line) elsif !@img.url.exists? && !@img.multiple_srcsets? && !@img.multiple_sizes? - add_failure("internal image #{@img.url.raw_attribute} does not exist", line: @img.line, - content: @img.content) + add_failure( + "internal image #{@img.url.raw_attribute} does not exist", + line: @img.line, + content: @img.content, + ) elsif @img.multiple_srcsets? || @img.multiple_sizes? @img.srcsets_wo_sizes.each do |srcset| srcset_url = HTMLProofer::Attribute::Url.new(@runner, srcset, base_url: @img.base_url, extract_size: true) if srcset_url.protocol_relative? - add_failure("image link #{srcset_url.url} is a protocol-relative URL, use explicit https:// instead", - line: @img.line, content: @img.content) + add_failure( + "image link #{srcset_url.url} is a protocol-relative URL, use explicit https:// instead", + line: @img.line, + content: @img.content, + ) elsif srcset_url.remote? add_to_external_urls(srcset_url.url, @img.line) elsif !srcset_url.exists? @@ -44,16 +56,25 @@ def run # if this is an img element, check that the alt attribute is present if @img.img_tag? && !ignore_element? if missing_alt_tag? && !ignore_missing_alt? - add_failure("image #{@img.url.raw_attribute} does not have an alt attribute", line: @img.line, - content: @img.content) + add_failure( + "image #{@img.url.raw_attribute} does not have an alt attribute", + line: @img.line, + content: @img.content, + ) elsif (empty_alt_tag? || alt_all_spaces?) && !ignore_empty_alt? - add_failure("image #{@img.url.raw_attribute} has an alt attribute, but no content", line: @img.line, - content: @img.content) + add_failure( + "image #{@img.url.raw_attribute} has an alt attribute, but no content", + line: @img.line, + content: @img.content, + ) end end - add_failure("image #{@img.url.raw_attribute} uses the http scheme", line: @img.line, - content: @img.content) if @runner.enforce_https? && @img.url.http? + add_failure( + "image #{@img.url.raw_attribute} uses the http scheme", + line: @img.line, + content: @img.content, + ) if @runner.enforce_https? && @img.url.http? end external_urls diff --git a/lib/html_proofer/check/links.rb b/lib/html_proofer/check/links.rb index 89e5f118..ec72f001 100644 --- a/lib/html_proofer/check/links.rb +++ b/lib/html_proofer/check/links.rb @@ -29,8 +29,11 @@ def run end if @link.url.protocol_relative? - add_failure("#{@link.url} is a protocol-relative URL, use explicit https:// instead", - line: @link.line, content: @link.content) + add_failure( + "#{@link.url} is a protocol-relative URL, use explicit https:// instead", + line: @link.line, + content: @link.content, + ) next end @@ -55,8 +58,11 @@ def run elsif @link.url.internal? # does the local directory have a trailing slash? if @link.url.unslashed_directory?(@link.url.absolute_path) - add_failure("internally linking to a directory #{@link.url.raw_attribute} without trailing slash", - line: @link.line, content: @link.content) + add_failure( + "internally linking to a directory #{@link.url.raw_attribute} without trailing slash", + line: @link.line, + content: @link.content, + ) next end @@ -88,17 +94,26 @@ def check_schemes def handle_mailto if @link.url.path.empty? - add_failure("#{@link.url.raw_attribute} contains no email address", line: @link.line, - content: @link.content) unless ignore_empty_mailto? + add_failure( + "#{@link.url.raw_attribute} contains no email address", + line: @link.line, + content: @link.content, + ) unless ignore_empty_mailto? elsif !/#{URI::MailTo::EMAIL_REGEXP}/o.match?(@link.url.path) - add_failure("#{@link.url.raw_attribute} contains an invalid email address", line: @link.line, - content: @link.content) + add_failure( + "#{@link.url.raw_attribute} contains an invalid email address", + line: @link.line, + content: @link.content, + ) end end def handle_tel - add_failure("#{@link.url.raw_attribute} contains no phone number", line: @link.line, - content: @link.content) if @link.url.path.empty? + add_failure( + "#{@link.url.raw_attribute} contains no phone number", + line: @link.line, + content: @link.content, + ) if @link.url.path.empty? end def ignore_empty_mailto? @@ -113,13 +128,19 @@ def check_sri return unless SRI_REL_TYPES.include?(@link.node["rel"]) if blank?(@link.node["integrity"]) && blank?(@link.node["crossorigin"]) - add_failure("SRI and CORS not provided in: #{@link.url.raw_attribute}", line: @link.line, - content: @link.content) + add_failure( + "SRI and CORS not provided in: #{@link.url.raw_attribute}", + line: @link.line, + content: @link.content, + ) elsif blank?(@link.node["integrity"]) add_failure("Integrity is missing in: #{@link.url.raw_attribute}", line: @link.line, content: @link.content) elsif blank?(@link.node["crossorigin"]) - add_failure("CORS not provided for external resource in: #{@link.link.url.raw_attribute}", line: @link.line, - content: @link.content) + add_failure( + "CORS not provided for external resource in: #{@link.link.url.raw_attribute}", + line: @link.line, + content: @link.content, + ) end end diff --git a/lib/html_proofer/check/open_graph.rb b/lib/html_proofer/check/open_graph.rb index dc85fae7..fd32aef2 100644 --- a/lib/html_proofer/check/open_graph.rb +++ b/lib/html_proofer/check/open_graph.rb @@ -17,13 +17,19 @@ def run elsif !@open_graph.url.valid? add_failure("#{@open_graph.src} is an invalid URL", line: @open_graph.line) elsif @open_graph.url.protocol_relative? - add_failure("open graph link #{@open_graph.url} is a protocol-relative URL, use explicit https:// instead", - line: @open_graph.line, content: @open_graph.content) + add_failure( + "open graph link #{@open_graph.url} is a protocol-relative URL, use explicit https:// instead", + line: @open_graph.line, + content: @open_graph.content, + ) elsif @open_graph.url.remote? add_to_external_urls(@open_graph.url, @open_graph.line) else - add_failure("internal open graph #{@open_graph.url.raw_attribute} does not exist", line: @open_graph.line, - content: @open_graph.content) unless @open_graph.url.exists? + add_failure( + "internal open graph #{@open_graph.url.raw_attribute} does not exist", + line: @open_graph.line, + content: @open_graph.content, + ) unless @open_graph.url.exists? end end diff --git a/lib/html_proofer/check/scripts.rb b/lib/html_proofer/check/scripts.rb index 01f9b564..d07999fc 100644 --- a/lib/html_proofer/check/scripts.rb +++ b/lib/html_proofer/check/scripts.rb @@ -14,14 +14,20 @@ def run if missing_src? add_failure("script is empty and has no src attribute", line: @script.line, content: @script.content) elsif @script.url.protocol_relative? - add_failure("script link #{@script.url} is a protocol-relative URL, use explicit https:// instead", - line: @script.line, content: @script.content) + add_failure( + "script link #{@script.url} is a protocol-relative URL, use explicit https:// instead", + line: @script.line, + content: @script.content, + ) elsif @script.url.remote? add_to_external_urls(@script.url, @script.line) check_sri if @runner.check_sri? elsif !@script.url.exists? - add_failure("internal script reference #{@script.src} does not exist", line: @script.line, - content: @script.content) + add_failure( + "internal script reference #{@script.src} does not exist", + line: @script.line, + content: @script.content, + ) end end @@ -34,14 +40,23 @@ def missing_src? def check_sri if blank?(@script.node["integrity"]) && blank?(@script.node["crossorigin"]) - add_failure("SRI and CORS not provided in: #{@script.url.raw_attribute}", line: @script.line, - content: @script.content) + add_failure( + "SRI and CORS not provided in: #{@script.url.raw_attribute}", + line: @script.line, + content: @script.content, + ) elsif blank?(@script.node["integrity"]) - add_failure("Integrity is missing in: #{@script.url.raw_attribute}", line: @script.line, - content: @script.content) + add_failure( + "Integrity is missing in: #{@script.url.raw_attribute}", + line: @script.line, + content: @script.content, + ) elsif blank?(@script.node["crossorigin"]) - add_failure("CORS not provided for external resource in: #{@script.url.raw_attribute}", line: @script.line, - content: @script.content) + add_failure( + "CORS not provided for external resource in: #{@script.url.raw_attribute}", + line: @script.line, + content: @script.content, + ) end end end diff --git a/lib/html_proofer/configuration.rb b/lib/html_proofer/configuration.rb index 4f789ea3..88147a5c 100644 --- a/lib/html_proofer/configuration.rb +++ b/lib/html_proofer/configuration.rb @@ -280,17 +280,25 @@ def parse_json_option(option_name, config, symbolize_names: true) module ConfigurationHelp TEXT = { as_links: ["Assumes that `PATH` is a comma-separated array of links to check."], - assume_extension: ["Automatically add specified extension to files for internal links, ", - "to allow extensionless URLs (as supported by most servers) (default: `.html`).",], + assume_extension: [ + "Automatically add specified extension to files for internal links, ", + "to allow extensionless URLs (as supported by most servers) (default: `.html`).", + ], directory_index_file: ["Sets the file to look for when a link refers to a directory. (default: `index.html`)."], - extensions: ["A comma-separated list of Strings indicating the file extensions you", - "would like to check (default: `.html`)",], + extensions: [ + "A comma-separated list of Strings indicating the file extensions you", + "would like to check (default: `.html`)", + ], allow_hash_href: ['"If `true`, assumes `href="#"` anchors are valid (default: `true`)"'], - allow_missing_href: ["If `true`, does not flag `a` tags missing `href`. In HTML5, this is technically ", - "allowed, but could also be human error. (default: `false`)",], - checks: ["A comma-separated list of Strings indicating which checks you", - "want to run (default: `[\"Links\", \"Images\", \"Scripts\"]",], + allow_missing_href: [ + "If `true`, does not flag `a` tags missing `href`. In HTML5, this is technically ", + "allowed, but could also be human error. (default: `false`)", + ], + checks: [ + "A comma-separated list of Strings indicating which checks you", + "want to run (default: `[\"Links\", \"Images\", \"Scripts\"]", + ], check_external_hash: ["Checks whether external hashes exist (even if the webpage exists) (default: `true`)."], check_internal_hash: ["Checks whether internal hashes exist (even if the webpage exists) (default: `true`)."], check_sri: ["Check that `` and `